diff --git a/examples/run_glue.py b/examples/run_glue.py index 278f5c723a..99821f454d 100644 --- a/examples/run_glue.py +++ b/examples/run_glue.py @@ -154,8 +154,8 @@ def train(args, train_dataset, model, tokenizer): tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: - scheduler.step() # Update learning rate schedule optimizer.step() + scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 diff --git a/examples/run_tf_glue.py b/examples/run_tf_glue.py new file mode 100644 index 0000000000..6f59d15286 --- /dev/null +++ b/examples/run_tf_glue.py @@ -0,0 +1,69 @@ +import tensorflow as tf +import tensorflow_datasets +from pytorch_transformers import BertTokenizer, BertForSequenceClassification, TFBertForSequenceClassification, glue_convert_examples_to_features + +# Load tokenizer, model, dataset +tokenizer = BertTokenizer.from_pretrained('bert-base-cased') +tf_model = TFBertForSequenceClassification.from_pretrained('bert-base-cased') +dataset = tensorflow_datasets.load("glue/mrpc") + +# Prepare dataset for GLUE +train_dataset = glue_convert_examples_to_features(dataset['train'], tokenizer, task='mrpc', max_length=128) +valid_dataset = glue_convert_examples_to_features(dataset['validation'], tokenizer, task='mrpc', max_length=128) +train_dataset = train_dataset.shuffle(100).batch(32).repeat(3) +valid_dataset = valid_dataset.batch(64) + +# Compile tf.keras model for training +learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(2e-5, 345, end_learning_rate=0) +loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) +tf_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate, epsilon=1e-08, clipnorm=1.0), + loss=loss, metrics=['sparse_categorical_accuracy']) + +# Train and evaluate using tf.keras.Model.fit() +tf_model.fit(train_dataset, epochs=3, steps_per_epoch=115, validation_data=valid_dataset, validation_steps=7) + +# Save the model and load it in PyTorch +tf_model.save_pretrained('./runs/') +pt_model = BertForSequenceClassification.from_pretrained('./runs/') + +# Quickly inspect a few predictions + + +# Divers +import torch + +import tensorflow as tf +import tensorflow_datasets +from pytorch_transformers import BertTokenizer, BertForSequenceClassification, TFBertForSequenceClassification, glue_convert_examples_to_features + +# Load tokenizer, model, dataset +tokenizer = BertTokenizer.from_pretrained('bert-base-cased') +model = TFBertForSequenceClassification.from_pretrained('bert-base-cased') + +pt_train_dataset = torch.load('../../data/glue_data//MRPC/cached_train_bert-base-cased_128_mrpc') + +def gen(): + for el in pt_train_dataset: + yield ((el.input_ids, el.attention_mask, el.token_type_ids), (el.label,)) + +dataset = tf.data.Dataset.from_generator(gen, + ((tf.int32, tf.int32, tf.int32), (tf.int64,)), + ((tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None])), + (tf.TensorShape([]),))) + +dataset = dataset.shuffle(100).batch(32) +next(iter(dataset)) + +learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(2e-5, 345, 0) +loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) +model.compile(optimizer=tf.keras.optimizers.Adam( + learning_rate=learning_rate, + epsilon=1e-08, + clipnorm=1.0), + loss=loss, + metrics=[['sparse_categorical_accuracy']]) + +tensorboard_cbk = tf.keras.callbacks.TensorBoard(log_dir='./runs/', update_freq=10, histogram_freq=1) + +# Train model +model.fit(dataset, epochs=3, callbacks=[tensorboard_cbk]) diff --git a/pytorch_transformers/configuration_utils.py b/pytorch_transformers/configuration_utils.py index fb1fe82f43..649a94e28c 100644 --- a/pytorch_transformers/configuration_utils.py +++ b/pytorch_transformers/configuration_utils.py @@ -67,6 +67,7 @@ class PretrainedConfig(object): output_config_file = os.path.join(save_directory, CONFIG_NAME) self.to_json_file(output_config_file) + logger.info("Configuration saved in {}".format(output_config_file)) @classmethod def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): diff --git a/pytorch_transformers/data/processors/utils.py b/pytorch_transformers/data/processors/utils.py index ed85f4a1f4..a616372054 100644 --- a/pytorch_transformers/data/processors/utils.py +++ b/pytorch_transformers/data/processors/utils.py @@ -17,6 +17,7 @@ import csv import sys import copy +import json class InputExample(object): """A single training/test example for simple sequence classification.""" diff --git a/pytorch_transformers/modeling_tf_utils.py b/pytorch_transformers/modeling_tf_utils.py index 2186e2d488..21faee6616 100644 --- a/pytorch_transformers/modeling_tf_utils.py +++ b/pytorch_transformers/modeling_tf_utils.py @@ -132,8 +132,8 @@ class TFPreTrainedModel(tf.keras.Model): # If we save using the predefined names, we can load using `from_pretrained` output_model_file = os.path.join(save_directory, TF2_WEIGHTS_NAME) - self.save_weights(output_model_file) + logger.info("Model weights saved in {}".format(output_model_file)) @classmethod def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): diff --git a/pytorch_transformers/modeling_utils.py b/pytorch_transformers/modeling_utils.py index af33c22d6e..00e1156125 100644 --- a/pytorch_transformers/modeling_utils.py +++ b/pytorch_transformers/modeling_utils.py @@ -201,8 +201,8 @@ class PreTrainedModel(nn.Module): # If we save using the predefined names, we can load using `from_pretrained` output_model_file = os.path.join(save_directory, WEIGHTS_NAME) - torch.save(model_to_save.state_dict(), output_model_file) + logger.info("Model weights saved in {}".format(output_model_file)) @classmethod def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): @@ -305,7 +305,7 @@ class PreTrainedModel(nn.Module): archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME) else: raise EnvironmentError("Error no file named {} found in directory {}".format( - tuple(WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + ".index"), + [WEIGHTS_NAME, TF2_WEIGHTS_NAME, TF_WEIGHTS_NAME + ".index"], pretrained_model_name_or_path)) elif os.path.isfile(pretrained_model_name_or_path): archive_file = pretrained_model_name_or_path