Remove unused variables in examples.
This commit is contained in:
@@ -44,13 +44,10 @@ from transformers import (
|
|||||||
AdamW,
|
AdamW,
|
||||||
OpenAIGPTDoubleHeadsModel,
|
OpenAIGPTDoubleHeadsModel,
|
||||||
OpenAIGPTTokenizer,
|
OpenAIGPTTokenizer,
|
||||||
cached_path,
|
|
||||||
get_linear_schedule_with_warmup,
|
get_linear_schedule_with_warmup,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
ROCSTORIES_URL = "https://s3.amazonaws.com/datasets.huggingface.co/ROCStories.tar.gz"
|
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
|
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
|
||||||
)
|
)
|
||||||
@@ -182,9 +179,6 @@ def main():
|
|||||||
model.to(device)
|
model.to(device)
|
||||||
|
|
||||||
# Load and encode the datasets
|
# Load and encode the datasets
|
||||||
if not args.train_dataset and not args.eval_dataset:
|
|
||||||
roc_stories = cached_path(ROCSTORIES_URL)
|
|
||||||
|
|
||||||
def tokenize_and_encode(obj):
|
def tokenize_and_encode(obj):
|
||||||
""" Tokenize and encode a nested object """
|
""" Tokenize and encode a nested object """
|
||||||
if isinstance(obj, str):
|
if isinstance(obj, str):
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ import time
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from transformers import TransfoXLCorpus, TransfoXLLMHeadModel, TransfoXLTokenizer
|
from transformers import TransfoXLCorpus, TransfoXLLMHeadModel
|
||||||
|
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
@@ -73,9 +73,7 @@ def main():
|
|||||||
# The pre-processing involve computing word frequencies to prepare the Adaptive input and SoftMax
|
# The pre-processing involve computing word frequencies to prepare the Adaptive input and SoftMax
|
||||||
# and tokenizing the dataset
|
# and tokenizing the dataset
|
||||||
# The pre-processed corpus is a convertion (using the conversion script )
|
# The pre-processed corpus is a convertion (using the conversion script )
|
||||||
tokenizer = TransfoXLTokenizer.from_pretrained(args.model_name)
|
|
||||||
corpus = TransfoXLCorpus.from_pretrained(args.model_name)
|
corpus = TransfoXLCorpus.from_pretrained(args.model_name)
|
||||||
ntokens = len(corpus.vocab)
|
|
||||||
|
|
||||||
va_iter = corpus.get_iterator("valid", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)
|
va_iter = corpus.get_iterator("valid", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)
|
||||||
te_iter = corpus.get_iterator("test", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)
|
te_iter = corpus.get_iterator("test", args.batch_size, args.tgt_len, device=device, ext_len=args.ext_len)
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ def train(args, train_dataset, model, tokenizer):
|
|||||||
|
|
||||||
global_step = 0
|
global_step = 0
|
||||||
tr_loss, logging_loss = 0.0, 0.0
|
tr_loss, logging_loss = 0.0, 0.0
|
||||||
best_dev_acc, best_dev_loss = 0.0, 99999999999.0
|
best_dev_acc = 0.0
|
||||||
best_steps = 0
|
best_steps = 0
|
||||||
model.zero_grad()
|
model.zero_grad()
|
||||||
train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
|
train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0])
|
||||||
@@ -193,7 +193,6 @@ def train(args, train_dataset, model, tokenizer):
|
|||||||
tb_writer.add_scalar("eval_{}".format(key), value, global_step)
|
tb_writer.add_scalar("eval_{}".format(key), value, global_step)
|
||||||
if results["eval_acc"] > best_dev_acc:
|
if results["eval_acc"] > best_dev_acc:
|
||||||
best_dev_acc = results["eval_acc"]
|
best_dev_acc = results["eval_acc"]
|
||||||
best_dev_loss = results["eval_loss"]
|
|
||||||
best_steps = global_step
|
best_steps = global_step
|
||||||
if args.do_test:
|
if args.do_test:
|
||||||
results_test = evaluate(args, model, tokenizer, test=True)
|
results_test = evaluate(args, model, tokenizer, test=True)
|
||||||
|
|||||||
@@ -446,8 +446,6 @@ class MultiHeadedAttention(nn.Module):
|
|||||||
batch_size = key.size(0)
|
batch_size = key.size(0)
|
||||||
dim_per_head = self.dim_per_head
|
dim_per_head = self.dim_per_head
|
||||||
head_count = self.head_count
|
head_count = self.head_count
|
||||||
key_len = key.size(1)
|
|
||||||
query_len = query.size(1)
|
|
||||||
|
|
||||||
def shape(x):
|
def shape(x):
|
||||||
""" projection """
|
""" projection """
|
||||||
@@ -504,9 +502,6 @@ class MultiHeadedAttention(nn.Module):
|
|||||||
|
|
||||||
query = shape(query)
|
query = shape(query)
|
||||||
|
|
||||||
key_len = key.size(2)
|
|
||||||
query_len = query.size(2)
|
|
||||||
|
|
||||||
# 2) Calculate and scale scores.
|
# 2) Calculate and scale scores.
|
||||||
query = query / math.sqrt(dim_per_head)
|
query = query / math.sqrt(dim_per_head)
|
||||||
scores = torch.matmul(query, key.transpose(2, 3))
|
scores = torch.matmul(query, key.transpose(2, 3))
|
||||||
|
|||||||
Reference in New Issue
Block a user