[RoBERTa] Update run_glue for RoBERTa
This commit is contained in:
@@ -390,10 +390,16 @@ class WnliProcessor(DataProcessor):
|
||||
|
||||
def convert_examples_to_features(examples, label_list, max_seq_length,
|
||||
tokenizer, output_mode,
|
||||
cls_token_at_end=False, pad_on_left=False,
|
||||
cls_token='[CLS]', sep_token='[SEP]', pad_token=0,
|
||||
sequence_a_segment_id=0, sequence_b_segment_id=1,
|
||||
cls_token_segment_id=1, pad_token_segment_id=0,
|
||||
cls_token_at_end=False,
|
||||
cls_token='[CLS]',
|
||||
cls_token_segment_id=1,
|
||||
sep_token='[SEP]',
|
||||
sep_token_extra=False,
|
||||
pad_on_left=False,
|
||||
pad_token=0,
|
||||
pad_token_segment_id=0,
|
||||
sequence_a_segment_id=0,
|
||||
sequence_b_segment_id=1,
|
||||
mask_padding_with_zero=True):
|
||||
""" Loads a data file into a list of `InputBatch`s
|
||||
`cls_token_at_end` define the location of the CLS token:
|
||||
@@ -442,6 +448,9 @@ def convert_examples_to_features(examples, label_list, max_seq_length,
|
||||
# used as as the "sentence vector". Note that this only makes sense because
|
||||
# the entire model is fine-tuned.
|
||||
tokens = tokens_a + [sep_token]
|
||||
if sep_token_extra:
|
||||
# roberta uses an extra separator b/w pairs of sentences
|
||||
tokens += [sep_token]
|
||||
segment_ids = [sequence_a_segment_id] * len(tokens)
|
||||
|
||||
if tokens_b:
|
||||
|
||||
Reference in New Issue
Block a user