[RoBERTa] Update run_glue for RoBERTa

This commit is contained in:
Julien Chaumond
2019-08-12 13:49:50 -04:00
parent b3d83d68db
commit 912fdff899
2 changed files with 22 additions and 8 deletions

View File

@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor):
def convert_examples_to_features(examples, label_list, max_seq_length,
tokenizer, output_mode,
cls_token_at_end=False, pad_on_left=False,
cls_token='[CLS]', sep_token='[SEP]', pad_token=0,
sequence_a_segment_id=0, sequence_b_segment_id=1,
cls_token_segment_id=1, pad_token_segment_id=0,
cls_token_at_end=False,
cls_token='[CLS]',
cls_token_segment_id=1,
sep_token='[SEP]',
sep_token_extra=False,
pad_on_left=False,
pad_token=0,
pad_token_segment_id=0,
sequence_a_segment_id=0,
sequence_b_segment_id=1,
mask_padding_with_zero=True):
""" Loads a data file into a list of `InputBatch`s
`cls_token_at_end` define the location of the CLS token:
@@ -442,6 +448,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
# used as as the "sentence vector". Note that this only makes sense because
# the entire model is fine-tuned.
tokens = tokens_a + [sep_token]
if sep_token_extra:
# roberta uses an extra separator b/w pairs of sentences
tokens += [sep_token]
segment_ids = [sequence_a_segment_id] * len(tokens)
if tokens_b: