update the examples, docs and template
This commit is contained in:
@@ -41,7 +41,7 @@ from torch.utils.data import (DataLoader, RandomSampler, SequentialSampler,
|
||||
|
||||
from transformers import (OpenAIGPTDoubleHeadsModel, OpenAIGPTTokenizer,
|
||||
AdamW, cached_path, WEIGHTS_NAME, CONFIG_NAME,
|
||||
WarmupLinearSchedule)
|
||||
get_linear_schedule_with_warmup)
|
||||
|
||||
ROCSTORIES_URL = "https://s3.amazonaws.com/datasets.huggingface.co/ROCStories.tar.gz"
|
||||
|
||||
@@ -211,7 +211,7 @@ def main():
|
||||
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
|
||||
]
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
|
||||
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)
|
||||
|
||||
if args.do_train:
|
||||
nb_tr_steps, tr_loss, exp_average_loss = 0, 0, None
|
||||
|
||||
@@ -42,7 +42,7 @@ from tqdm import tqdm, trange
|
||||
from transformers import (WEIGHTS_NAME, BertConfig,
|
||||
BertForMultipleChoice, BertTokenizer)
|
||||
|
||||
from transformers import AdamW, WarmupLinearSchedule
|
||||
from transformers import AdamW, get_linear_schedule_with_warmup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -322,7 +322,7 @@ def train(args, train_dataset, model, tokenizer):
|
||||
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
|
||||
]
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
|
||||
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)
|
||||
if args.fp16:
|
||||
try:
|
||||
from apex import amp
|
||||
|
||||
@@ -35,7 +35,7 @@ try:
|
||||
except:
|
||||
from tensorboardX import SummaryWriter
|
||||
|
||||
from transformers import WarmupLinearSchedule
|
||||
from transformers import get_linear_schedule_with_warmup
|
||||
|
||||
from utils import logger
|
||||
from lm_seqs_dataset import LmSeqsDataset
|
||||
@@ -137,9 +137,9 @@ class Distiller:
|
||||
betas=(0.9, 0.98))
|
||||
|
||||
warmup_steps = math.ceil(num_train_optimization_steps * params.warmup_prop)
|
||||
self.scheduler = WarmupLinearSchedule(self.optimizer,
|
||||
warmup_steps=warmup_steps,
|
||||
t_total=num_train_optimization_steps)
|
||||
self.scheduler = get_linear_schedule_with_warmup(self.optimizer,
|
||||
num_warmup_steps=warmup_steps,
|
||||
num_training_steps=num_train_optimization_steps)
|
||||
|
||||
if self.fp16:
|
||||
try:
|
||||
|
||||
@@ -46,7 +46,7 @@ from transformers import (WEIGHTS_NAME, BertConfig,
|
||||
XLNetTokenizer,
|
||||
DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer)
|
||||
|
||||
from transformers import AdamW, WarmupLinearSchedule
|
||||
from transformers import AdamW, get_linear_schedule_with_warmup
|
||||
|
||||
from ..utils_squad import (read_squad_examples, convert_examples_to_features,
|
||||
RawResult, write_predictions,
|
||||
@@ -101,7 +101,7 @@ def train(args, train_dataset, model, tokenizer, teacher=None):
|
||||
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
|
||||
]
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
|
||||
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)
|
||||
if args.fp16:
|
||||
try:
|
||||
from apex import amp
|
||||
|
||||
@@ -49,7 +49,7 @@ from transformers import (WEIGHTS_NAME, BertConfig,
|
||||
DistilBertForSequenceClassification,
|
||||
DistilBertTokenizer)
|
||||
|
||||
from transformers import AdamW, WarmupLinearSchedule
|
||||
from transformers import AdamW, get_linear_schedule_with_warmup
|
||||
|
||||
from transformers import glue_compute_metrics as compute_metrics
|
||||
from transformers import glue_output_modes as output_modes
|
||||
@@ -100,7 +100,7 @@ def train(args, train_dataset, model, tokenizer):
|
||||
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
|
||||
]
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
|
||||
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)
|
||||
if args.fp16:
|
||||
try:
|
||||
from apex import amp
|
||||
|
||||
@@ -42,7 +42,7 @@ except:
|
||||
|
||||
from tqdm import tqdm, trange
|
||||
|
||||
from transformers import (WEIGHTS_NAME, AdamW, WarmupLinearSchedule,
|
||||
from transformers import (WEIGHTS_NAME, AdamW, get_linear_schedule_with_warmup,
|
||||
BertConfig, BertForMaskedLM, BertTokenizer,
|
||||
GPT2Config, GPT2LMHeadModel, GPT2Tokenizer,
|
||||
OpenAIGPTConfig, OpenAIGPTLMHeadModel, OpenAIGPTTokenizer,
|
||||
@@ -185,7 +185,7 @@ def train(args, train_dataset, model, tokenizer):
|
||||
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
|
||||
]
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
|
||||
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)
|
||||
if args.fp16:
|
||||
try:
|
||||
from apex import amp
|
||||
|
||||
@@ -43,7 +43,7 @@ from transformers import (WEIGHTS_NAME, BertConfig,
|
||||
XLNetTokenizer, RobertaConfig,
|
||||
RobertaForMultipleChoice, RobertaTokenizer)
|
||||
|
||||
from transformers import AdamW, WarmupLinearSchedule
|
||||
from transformers import AdamW, get_linear_schedule_with_warmup
|
||||
|
||||
from utils_multiple_choice import (convert_examples_to_features, processors)
|
||||
|
||||
@@ -101,7 +101,7 @@ def train(args, train_dataset, model, tokenizer):
|
||||
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
|
||||
]
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
|
||||
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)
|
||||
if args.fp16:
|
||||
try:
|
||||
from apex import amp
|
||||
|
||||
@@ -33,7 +33,7 @@ from torch.utils.data.distributed import DistributedSampler
|
||||
from tqdm import tqdm, trange
|
||||
from utils_ner import convert_examples_to_features, get_labels, read_examples_from_file
|
||||
|
||||
from transformers import AdamW, WarmupLinearSchedule
|
||||
from transformers import AdamW, get_linear_schedule_with_warmup
|
||||
from transformers import WEIGHTS_NAME, BertConfig, BertForTokenClassification, BertTokenizer
|
||||
from transformers import RobertaConfig, RobertaForTokenClassification, RobertaTokenizer
|
||||
|
||||
@@ -80,7 +80,7 @@ def train(args, train_dataset, model, tokenizer, labels, pad_token_label_id):
|
||||
{"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0}
|
||||
]
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
|
||||
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)
|
||||
if args.fp16:
|
||||
try:
|
||||
from apex import amp
|
||||
|
||||
@@ -45,7 +45,7 @@ from transformers import (WEIGHTS_NAME, BertConfig,
|
||||
XLNetTokenizer,
|
||||
DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer)
|
||||
|
||||
from transformers import AdamW, WarmupLinearSchedule
|
||||
from transformers import AdamW, get_linear_schedule_with_warmup
|
||||
|
||||
from utils_squad import (read_squad_examples, convert_examples_to_features,
|
||||
RawResult, write_predictions,
|
||||
@@ -100,7 +100,7 @@ def train(args, train_dataset, model, tokenizer):
|
||||
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
|
||||
]
|
||||
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
|
||||
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total)
|
||||
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total)
|
||||
if args.fp16:
|
||||
try:
|
||||
from apex import amp
|
||||
|
||||
Reference in New Issue
Block a user