From 06b4aac9ebab77a0065ec2cab40a8085ad71946f Mon Sep 17 00:00:00 2001 From: Michael Chung <6252325+ArEnSc@users.noreply.github.com> Date: Wed, 13 Apr 2022 09:04:47 -0400 Subject: [PATCH] Add Doc Test for GPT-J (#16507) * Required the values GPTJ unfortunately cannot run the model =) * Added the file to the doc tests * Run Fixup and Style * Fixed with the test versions of gptj. Ran Style and Fixup. * Trigger ci * A Minor Change to License * Fixed spacing added to the benchmark_utils. Then refactored tests to const variables. * Removed strings that were included as default parameters anyways. Co-authored-by: ArEnSc --- src/transformers/models/gptj/modeling_gptj.py | 19 ++++++++++++++++--- utils/documentation_tests.txt | 1 + 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/transformers/models/gptj/modeling_gptj.py b/src/transformers/models/gptj/modeling_gptj.py index 18e3aa6303..d045603038 100755 --- a/src/transformers/models/gptj/modeling_gptj.py +++ b/src/transformers/models/gptj/modeling_gptj.py @@ -36,10 +36,19 @@ from .configuration_gptj import GPTJConfig logger = logging.get_logger(__name__) -_CHECKPOINT_FOR_DOC = "EleutherAI/gpt-j-6B" +_CHECKPOINT_FOR_DOC = "hf-internal-testing/tiny-random-gptj" _CONFIG_FOR_DOC = "GPTJConfig" _TOKENIZER_FOR_DOC = "GPT2Tokenizer" +_CHECKPOINT_FOR_QA = "ydshieh/tiny-random-gptj-for-question-answering" +_QA_EXPECTED_OUTPUT = "' was Jim Henson?Jim Henson was a n'" +_QA_EXPECTED_LOSS = 3.13 + +_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION = "ydshieh/tiny-random-gptj-for-sequence-classification" +_SEQ_CLASS_EXPECTED_OUTPUT = "'LABEL_0'" +_SEQ_CLASS_EXPECTED_LOSS = 0.76 + + GPTJ_PRETRAINED_MODEL_ARCHIVE_LIST = [ "EleutherAI/gpt-j-6B", # See all GPT-J models at https://huggingface.co/models?filter=gptj @@ -892,9 +901,11 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel): @add_start_docstrings_to_model_forward(GPTJ_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATION, output_type=SequenceClassifierOutputWithPast, config_class=_CONFIG_FOR_DOC, + expected_output=_SEQ_CLASS_EXPECTED_OUTPUT, + expected_loss=_SEQ_CLASS_EXPECTED_LOSS, ) def forward( self, @@ -1017,9 +1028,11 @@ class GPTJForQuestionAnswering(GPTJPreTrainedModel): @add_start_docstrings_to_model_forward(GPTJ_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_code_sample_docstrings( processor_class=_TOKENIZER_FOR_DOC, - checkpoint=_CHECKPOINT_FOR_DOC, + checkpoint=_CHECKPOINT_FOR_QA, output_type=QuestionAnsweringModelOutput, config_class=_CONFIG_FOR_DOC, + expected_output=_QA_EXPECTED_OUTPUT, + expected_loss=_QA_EXPECTED_LOSS, ) def forward( self, diff --git a/utils/documentation_tests.txt b/utils/documentation_tests.txt index 170076244c..8e96b81e6d 100644 --- a/utils/documentation_tests.txt +++ b/utils/documentation_tests.txt @@ -19,6 +19,7 @@ src/transformers/models/deit/modeling_deit.py src/transformers/models/dpt/modeling_dpt.py src/transformers/models/glpn/modeling_glpn.py src/transformers/models/gpt2/modeling_gpt2.py +src/transformers/models/gptj/modeling_gptj.py src/transformers/models/hubert/modeling_hubert.py src/transformers/models/marian/modeling_marian.py src/transformers/models/mbart/modeling_mbart.py