push fix to training
This commit is contained in:
@@ -154,8 +154,8 @@ def train(args, train_dataset, model, tokenizer):
|
|||||||
|
|
||||||
tr_loss += loss.item()
|
tr_loss += loss.item()
|
||||||
if (step + 1) % args.gradient_accumulation_steps == 0:
|
if (step + 1) % args.gradient_accumulation_steps == 0:
|
||||||
scheduler.step() # Update learning rate schedule
|
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
scheduler.step() # Update learning rate schedule
|
||||||
model.zero_grad()
|
model.zero_grad()
|
||||||
global_step += 1
|
global_step += 1
|
||||||
|
|
||||||
|
|||||||
69
examples/run_tf_glue.py
Normal file
69
examples/run_tf_glue.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow_datasets
|
||||||
|
from pytorch_transformers import BertTokenizer, BertForSequenceClassification, TFBertForSequenceClassification, glue_convert_examples_to_features
|
||||||
|
|
||||||
|
# Load tokenizer, model, dataset
|
||||||
|
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
|
||||||
|
tf_model = TFBertForSequenceClassification.from_pretrained('bert-base-cased')
|
||||||
|
dataset = tensorflow_datasets.load("glue/mrpc")
|
||||||
|
|
||||||
|
# Prepare dataset for GLUE
|
||||||
|
train_dataset = glue_convert_examples_to_features(dataset['train'], tokenizer, task='mrpc', max_length=128)
|
||||||
|
valid_dataset = glue_convert_examples_to_features(dataset['validation'], tokenizer, task='mrpc', max_length=128)
|
||||||
|
train_dataset = train_dataset.shuffle(100).batch(32).repeat(3)
|
||||||
|
valid_dataset = valid_dataset.batch(64)
|
||||||
|
|
||||||
|
# Compile tf.keras model for training
|
||||||
|
learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(2e-5, 345, end_learning_rate=0)
|
||||||
|
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
|
||||||
|
tf_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, clipnorm=1.0),
|
||||||
|
loss=loss, metrics=['sparse_categorical_accuracy'])
|
||||||
|
|
||||||
|
# Train and evaluate using tf.keras.Model.fit()
|
||||||
|
tf_model.fit(train_dataset, epochs=3, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7)
|
||||||
|
|
||||||
|
# Save the model and load it in PyTorch
|
||||||
|
tf_model.save_pretrained('./runs/')
|
||||||
|
pt_model = BertForSequenceClassification.from_pretrained('./runs/')
|
||||||
|
|
||||||
|
# Quickly inspect a few predictions
|
||||||
|
|
||||||
|
|
||||||
|
# Divers
|
||||||
|
import torch
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow_datasets
|
||||||
|
from pytorch_transformers import BertTokenizer, BertForSequenceClassification, TFBertForSequenceClassification, glue_convert_examples_to_features
|
||||||
|
|
||||||
|
# Load tokenizer, model, dataset
|
||||||
|
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
|
||||||
|
model = TFBertForSequenceClassification.from_pretrained('bert-base-cased')
|
||||||
|
|
||||||
|
pt_train_dataset = torch.load('../../data/glue_data//MRPC/cached_train_bert-base-cased_128_mrpc')
|
||||||
|
|
||||||
|
def gen():
|
||||||
|
for el in pt_train_dataset:
|
||||||
|
yield ((el.input_ids, el.attention_mask, el.token_type_ids), (el.label,))
|
||||||
|
|
||||||
|
dataset = tf.data.Dataset.from_generator(gen,
|
||||||
|
((tf.int32, tf.int32, tf.int32), (tf.int64,)),
|
||||||
|
((tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None])),
|
||||||
|
(tf.TensorShape([]),)))
|
||||||
|
|
||||||
|
dataset = dataset.shuffle(100).batch(32)
|
||||||
|
next(iter(dataset))
|
||||||
|
|
||||||
|
learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(2e-5, 345, 0)
|
||||||
|
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
|
||||||
|
model.compile(optimizer=tf.keras.optimizers.Adam(
|
||||||
|
learning_rate=learning_rate,
|
||||||
|
epsilon=1e-08,
|
||||||
|
clipnorm=1.0),
|
||||||
|
loss=loss,
|
||||||
|
metrics=[['sparse_categorical_accuracy']])
|
||||||
|
|
||||||
|
tensorboard_cbk = tf.keras.callbacks.TensorBoard(log_dir='./runs/', update_freq=10, histogram_freq=1)
|
||||||
|
|
||||||
|
# Train model
|
||||||
|
model.fit(dataset, epochs=3, callbacks=[tensorboard_cbk])
|
||||||
@@ -67,6 +67,7 @@ class PretrainedConfig(object):
|
|||||||
output_config_file = os.path.join(save_directory, CONFIG_NAME)
|
output_config_file = os.path.join(save_directory, CONFIG_NAME)
|
||||||
|
|
||||||
self.to_json_file(output_config_file)
|
self.to_json_file(output_config_file)
|
||||||
|
logger.info("Configuration saved in {}".format(output_config_file))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
|
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
import csv
|
import csv
|
||||||
import sys
|
import sys
|
||||||
import copy
|
import copy
|
||||||
|
import json
|
||||||
|
|
||||||
class InputExample(object):
|
class InputExample(object):
|
||||||
"""A single training/test example for simple sequence classification."""
|
"""A single training/test example for simple sequence classification."""
|
||||||
|
|||||||
@@ -132,8 +132,8 @@ class TFPreTrainedModel(tf.keras.Model):
|
|||||||
|
|
||||||
# If we save using the predefined names, we can load using `from_pretrained`
|
# If we save using the predefined names, we can load using `from_pretrained`
|
||||||
output_model_file = os.path.join(save_directory, TF2_WEIGHTS_NAME)
|
output_model_file = os.path.join(save_directory, TF2_WEIGHTS_NAME)
|
||||||
|
|
||||||
self.save_weights(output_model_file)
|
self.save_weights(output_model_file)
|
||||||
|
logger.info("Model weights saved in {}".format(output_model_file))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
||||||
|
|||||||
@@ -201,8 +201,8 @@ class PreTrainedModel(nn.Module):
|
|||||||
|
|
||||||
# If we save using the predefined names, we can load using `from_pretrained`
|
# If we save using the predefined names, we can load using `from_pretrained`
|
||||||
output_model_file = os.path.join(save_directory, WEIGHTS_NAME)
|
output_model_file = os.path.join(save_directory, WEIGHTS_NAME)
|
||||||
|
|
||||||
torch.save(model_to_save.state_dict(), output_model_file)
|
torch.save(model_to_save.state_dict(), output_model_file)
|
||||||
|
logger.info("Model weights saved in {}".format(output_model_file))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
||||||
@@ -305,7 +305,7 @@ class PreTrainedModel(nn.Module):
|
|||||||
archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)
|
archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)
|
||||||
else:
|
else:
|
||||||
raise EnvironmentError("Error no file named {} found in directory {}".format(
|
raise EnvironmentError("Error no file named {} found in directory {}".format(
|
||||||
tuple(WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + ".index"),
|
[WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + ".index"],
|
||||||
pretrained_model_name_or_path))
|
pretrained_model_name_or_path))
|
||||||
elif os.path.isfile(pretrained_model_name_or_path):
|
elif os.path.isfile(pretrained_model_name_or_path):
|
||||||
archive_file = pretrained_model_name_or_path
|
archive_file = pretrained_model_name_or_path
|
||||||
|
|||||||
Reference in New Issue
Block a user