Update all references to canonical models (#29001)
* Script & Manual edition * Update
This commit is contained in:
@@ -178,7 +178,7 @@ deepspeed --num_gpus=2 your_program.py <normal cl args> --deepspeed ds_config.js
|
||||
```bash
|
||||
deepspeed examples/pytorch/translation/run_translation.py \
|
||||
--deepspeed tests/deepspeed/ds_config_zero3.json \
|
||||
--model_name_or_path t5-small --per_device_train_batch_size 1 \
|
||||
--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
|
||||
--output_dir output_dir --overwrite_output_dir --fp16 \
|
||||
--do_train --max_train_samples 500 --num_train_epochs 1 \
|
||||
--dataset_name wmt16 --dataset_config "ro-en" \
|
||||
@@ -201,7 +201,7 @@ deepspeed examples/pytorch/translation/run_translation.py \
|
||||
```bash
|
||||
deepspeed --num_gpus=1 examples/pytorch/translation/run_translation.py \
|
||||
--deepspeed tests/deepspeed/ds_config_zero2.json \
|
||||
--model_name_or_path t5-small --per_device_train_batch_size 1 \
|
||||
--model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \
|
||||
--output_dir output_dir --overwrite_output_dir --fp16 \
|
||||
--do_train --max_train_samples 500 --num_train_epochs 1 \
|
||||
--dataset_name wmt16 --dataset_config "ro-en" \
|
||||
@@ -1628,7 +1628,7 @@ from transformers import T5ForConditionalGeneration, T5Config
|
||||
import deepspeed
|
||||
|
||||
with deepspeed.zero.Init():
|
||||
config = T5Config.from_pretrained("t5-small")
|
||||
config = T5Config.from_pretrained("google-t5/t5-small")
|
||||
model = T5ForConditionalGeneration(config)
|
||||
```
|
||||
|
||||
@@ -1640,7 +1640,7 @@ with deepspeed.zero.Init():
|
||||
from transformers import AutoModel, Trainer, TrainingArguments
|
||||
|
||||
training_args = TrainingArguments(..., deepspeed=ds_config)
|
||||
model = AutoModel.from_pretrained("t5-small")
|
||||
model = AutoModel.from_pretrained("google-t5/t5-small")
|
||||
trainer = Trainer(model=model, args=training_args, ...)
|
||||
```
|
||||
|
||||
@@ -1690,7 +1690,7 @@ deepspeed --num_gpus=2 your_program.py <normal cl args> --do_eval --deepspeed ds
|
||||
```bash
|
||||
deepspeed examples/pytorch/translation/run_translation.py \
|
||||
--deepspeed tests/deepspeed/ds_config_zero3.json \
|
||||
--model_name_or_path t5-small --output_dir output_dir \
|
||||
--model_name_or_path google-t5/t5-small --output_dir output_dir \
|
||||
--do_eval --max_eval_samples 50 --warmup_steps 50 \
|
||||
--max_source_length 128 --val_max_target_length 128 \
|
||||
--overwrite_output_dir --per_device_eval_batch_size 4 \
|
||||
@@ -1870,7 +1870,7 @@ import deepspeed
|
||||
ds_config = {...} # deepspeed config object or path to the file
|
||||
# must run before instantiating the model to detect zero 3
|
||||
dschf = HfDeepSpeedConfig(ds_config) # keep this object alive
|
||||
model = AutoModel.from_pretrained("gpt2")
|
||||
model = AutoModel.from_pretrained("openai-community/gpt2")
|
||||
engine = deepspeed.initialize(model=model, config_params=ds_config, ...)
|
||||
```
|
||||
|
||||
@@ -1884,7 +1884,7 @@ import deepspeed
|
||||
ds_config = {...} # deepspeed config object or path to the file
|
||||
# must run before instantiating the model to detect zero 3
|
||||
dschf = HfDeepSpeedConfig(ds_config) # keep this object alive
|
||||
config = AutoConfig.from_pretrained("gpt2")
|
||||
config = AutoConfig.from_pretrained("openai-community/gpt2")
|
||||
model = AutoModel.from_config(config)
|
||||
engine = deepspeed.initialize(model=model, config_params=ds_config, ...)
|
||||
```
|
||||
|
||||
@@ -24,8 +24,8 @@ rendered properly in your Markdown viewer.
|
||||
from transformers import BertTokenizer, BertForSequenceClassification
|
||||
import torch
|
||||
|
||||
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
|
||||
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")
|
||||
tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
|
||||
model = BertForSequenceClassification.from_pretrained("google-bert/bert-base-uncased")
|
||||
|
||||
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
|
||||
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
||||
|
||||
@@ -39,7 +39,7 @@ pipelines是使用模型进行推理的一种简单方法。这些pipelines是
|
||||
如果您想使用 [hub](https://huggingface.co) 上的特定模型,可以忽略任务,如果hub上的模型已经定义了该任务:
|
||||
|
||||
```python
|
||||
>>> pipe = pipeline(model="roberta-large-mnli")
|
||||
>>> pipe = pipeline(model="FacebookAI/roberta-large-mnli")
|
||||
>>> pipe("This restaurant is awesome")
|
||||
[{'label': 'NEUTRAL', 'score': 0.7313136458396912}]
|
||||
```
|
||||
|
||||
@@ -462,7 +462,7 @@ sudo ln -s /usr/bin/g++-7 /usr/local/cuda-10.2/bin/g++
|
||||
export TASK_NAME=mrpc
|
||||
|
||||
python examples/pytorch/text-classification/run_glue.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--task_name $TASK_NAME \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
@@ -597,7 +597,7 @@ cd transformers
|
||||
|
||||
accelerate launch \
|
||||
./examples/pytorch/text-classification/run_glue.py \
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--task_name $TASK_NAME \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
@@ -622,7 +622,7 @@ accelerate launch --num_processes=2 \
|
||||
--fsdp_sharding_strategy=1 \
|
||||
--fsdp_state_dict_type=FULL_STATE_DICT \
|
||||
./examples/pytorch/text-classification/run_glue.py
|
||||
--model_name_or_path bert-base-cased \
|
||||
--model_name_or_path google-bert/bert-base-cased \
|
||||
--task_name $TASK_NAME \
|
||||
--do_train \
|
||||
--do_eval \
|
||||
|
||||
Reference in New Issue
Block a user