Transformers cli clean command (#37657)
* transformers-cli -> transformers * Chat command works with positional argument * update doc references to transformers-cli * doc headers * deepspeed --------- Co-authored-by: Joao Gante <joao@huggingface.co>
This commit is contained in:
@@ -49,9 +49,9 @@ model = AutoModelForCausalLM.from_pretrained("CohereForAI/c4ai-command-r-v01", t
|
||||
messages = [{"role": "user", "content": "How do plants make energy?"}]
|
||||
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
||||
output = model.generate(
|
||||
input_ids,
|
||||
max_new_tokens=100,
|
||||
do_sample=True,
|
||||
input_ids,
|
||||
max_new_tokens=100,
|
||||
do_sample=True,
|
||||
temperature=0.3,
|
||||
cache_implementation="static",
|
||||
)
|
||||
@@ -59,11 +59,11 @@ print(tokenizer.decode(output[0], skip_special_tokens=True))
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
<hfoption id="transformers-cli">
|
||||
<hfoption id="transformers CLI">
|
||||
|
||||
```bash
|
||||
# pip install -U flash-attn --no-build-isolation
|
||||
transformers-cli chat --model_name_or_path CohereForAI/c4ai-command-r-v01 --torch_dtype auto --attn_implementation flash_attention_2
|
||||
transformers chat CohereForAI/c4ai-command-r-v01 --torch_dtype auto --attn_implementation flash_attention_2
|
||||
```
|
||||
|
||||
</hfoption>
|
||||
@@ -85,9 +85,9 @@ model = AutoModelForCausalLM.from_pretrained("CohereForAI/c4ai-command-r-v01", t
|
||||
messages = [{"role": "user", "content": "How do plants make energy?"}]
|
||||
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
||||
output = model.generate(
|
||||
input_ids,
|
||||
max_new_tokens=100,
|
||||
do_sample=True,
|
||||
input_ids,
|
||||
max_new_tokens=100,
|
||||
do_sample=True,
|
||||
temperature=0.3,
|
||||
cache_implementation="static",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user