Fix doctest more (for docs/source/en) (#30247)
* fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
@@ -65,9 +65,9 @@ After conversion, the model and tokenizer can be loaded via:
|
||||
>>> tokenizer = CodeLlamaTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
|
||||
>>> model = LlamaForCausalLM.from_pretrained("codellama/CodeLlama-7b-hf")
|
||||
>>> PROMPT = '''def remove_non_ascii(s: str) -> str:
|
||||
""" <FILL_ME>
|
||||
return result
|
||||
'''
|
||||
... """ <FILL_ME>
|
||||
... return result
|
||||
... '''
|
||||
>>> input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
|
||||
>>> generated_ids = model.generate(input_ids, max_new_tokens=128)
|
||||
|
||||
@@ -75,10 +75,10 @@ After conversion, the model and tokenizer can be loaded via:
|
||||
>>> print(PROMPT.replace("<FILL_ME>", filling))
|
||||
def remove_non_ascii(s: str) -> str:
|
||||
""" Remove non-ASCII characters from a string.
|
||||
|
||||
<BLANKLINE>
|
||||
Args:
|
||||
s: The string to remove non-ASCII characters from.
|
||||
|
||||
<BLANKLINE>
|
||||
Returns:
|
||||
The string with non-ASCII characters removed.
|
||||
"""
|
||||
@@ -87,6 +87,7 @@ def remove_non_ascii(s: str) -> str:
|
||||
if ord(c) < 128:
|
||||
result += c
|
||||
return result
|
||||
<BLANKLINE>
|
||||
```
|
||||
|
||||
If you only want the infilled part:
|
||||
|
||||
@@ -92,7 +92,9 @@ Phi-2 has been integrated in the development version (4.37.0.dev) of `transforme
|
||||
>>> outputs = model.generate(**inputs, max_length=30)
|
||||
>>> text = tokenizer.batch_decode(outputs)[0]
|
||||
>>> print(text)
|
||||
'Can you help me write a formal email to a potential business partner proposing a joint venture?\nInput: Company A: ABC Inc.\nCompany B: XYZ Ltd.\nJoint Venture: A new online platform for e-commerce'
|
||||
Can you help me write a formal email to a potential business partner proposing a joint venture?
|
||||
Input: Company A: ABC Inc.
|
||||
Company B
|
||||
```
|
||||
|
||||
### Example :
|
||||
@@ -134,7 +136,7 @@ To load and run a model using Flash Attention 2, refer to the snippet below:
|
||||
>>> from transformers import PhiForCausalLM, AutoTokenizer
|
||||
|
||||
>>> # define the model and tokenizer and push the model and tokens to the GPU.
|
||||
>>> model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda")
|
||||
>>> model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda") # doctest: +SKIP
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
|
||||
|
||||
>>> # feel free to change the prompt to your liking.
|
||||
@@ -144,9 +146,9 @@ To load and run a model using Flash Attention 2, refer to the snippet below:
|
||||
>>> tokens = tokenizer(prompt, return_tensors="pt").to("cuda")
|
||||
|
||||
>>> # use the model to generate new tokens.
|
||||
>>> generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10)
|
||||
>>> generated_output = model.generate(**tokens, use_cache=True, max_new_tokens=10) # doctest: +SKIP
|
||||
|
||||
>>> tokenizer.batch_decode(generated_output)[0]
|
||||
>>> tokenizer.batch_decode(generated_output)[0] # doctest: +SKIP
|
||||
'If I were an AI that had just achieved a breakthrough in machine learning, I would be thrilled'
|
||||
```
|
||||
|
||||
|
||||
@@ -37,19 +37,21 @@ We also provide `StableLM Zephyr 3B`, an instruction fine-tuned version of the m
|
||||
The following code snippet demonstrates how to use `StableLM 3B 4E1T` for inference:
|
||||
|
||||
```python
|
||||
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
|
||||
>>> device = "cuda" # the device to load the model onto
|
||||
|
||||
>>> set_seed(0)
|
||||
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t")
|
||||
>>> model.to(device)
|
||||
>>> model.to(device) # doctest: +IGNORE_RESULT
|
||||
|
||||
>>> model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
|
||||
|
||||
>>> generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
|
||||
>>> responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
>>> responses
|
||||
['The weather is always wonderful in Santa Barbara and, for visitors hoping to make the move to our beautiful seaside city, this town offers plenty of great places to...']
|
||||
['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
|
||||
```
|
||||
|
||||
## Combining StableLM and Flash Attention 2
|
||||
@@ -66,19 +68,21 @@ Now, to run the model with Flash Attention 2, refer to the snippet below:
|
||||
|
||||
```python
|
||||
>>> import torch
|
||||
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
|
||||
>>> device = "cuda" # the device to load the model onto
|
||||
|
||||
>>> set_seed(0)
|
||||
|
||||
>>> tokenizer = AutoTokenizer.from_pretrained("stabilityai/stablelm-3b-4e1t")
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2")
|
||||
>>> model.to(device)
|
||||
>>> model = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-3b-4e1t", torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2") # doctest: +SKIP
|
||||
>>> model.to(device) # doctest: +SKIP
|
||||
|
||||
>>> model_inputs = tokenizer("The weather is always wonderful in", return_tensors="pt").to(model.device)
|
||||
|
||||
>>> generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True)
|
||||
>>> responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
|
||||
>>> responses
|
||||
['The weather is always wonderful in Santa Barbara and, for visitors hoping to make the move to our beautiful seaside city, this town offers plenty of great places to...']
|
||||
>>> generated_ids = model.generate(**model_inputs, max_length=32, do_sample=True) # doctest: +SKIP
|
||||
>>> responses = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) # doctest: +SKIP
|
||||
>>> responses # doctest: +SKIP
|
||||
['The weather is always wonderful in Costa Rica, which makes it a prime destination for retirees. That’s where the Pensionado program comes in, offering']
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -42,11 +42,10 @@ These ready-to-use checkpoints can be downloaded and used via the HuggingFace Hu
|
||||
>>> prompt = "def print_hello_world():"
|
||||
|
||||
>>> model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
|
||||
>>> model.to(device)
|
||||
|
||||
>>> generated_ids = model.generate(**model_inputs, max_new_tokens=10, do_sample=False)
|
||||
>>> tokenizer.batch_decode(generated_ids)[0]
|
||||
"def print_hello_world():\n\treturn 'Hello World!'"
|
||||
'def print_hello_world():\n print("Hello World!")\n\ndef print'
|
||||
```
|
||||
|
||||
## Starcoder2Config
|
||||
|
||||
@@ -309,7 +309,7 @@ The predicted tokens will then be placed between the sentinel tokens.
|
||||
>>> sequence_ids = model.generate(input_ids)
|
||||
>>> sequences = tokenizer.batch_decode(sequence_ids)
|
||||
>>> sequences
|
||||
['<pad><extra_id_0> park offers<extra_id_1> the<extra_id_2> park.</s>']
|
||||
['<pad> <extra_id_0> park offers <extra_id_1> the <extra_id_2> park.</s>']
|
||||
```
|
||||
|
||||
## Performance
|
||||
|
||||
Reference in New Issue
Block a user