remove convert_to_unicode and printable_text from examples

This commit is contained in:
thomwolf
2018-11-26 23:33:22 +01:00
parent ce37b8e481
commit 32167cdf4b
5 changed files with 19 additions and 32 deletions

View File

@@ -30,7 +30,7 @@ import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler
from pytorch_pretrained_bert.tokenization import printable_text, convert_to_unicode, BertTokenizer
from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.modeling import BertForSequenceClassification
from pytorch_pretrained_bert.optimization import BertAdam
@@ -122,9 +122,9 @@ class MrpcProcessor(DataProcessor):
if i == 0:
continue
guid = "%s-%s" % (set_type, i)
text_a = convert_to_unicode(line[3])
text_b = convert_to_unicode(line[4])
label = convert_to_unicode(line[0])
text_a = line[3]
text_b = line[4]
label = line[0]
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
@@ -154,10 +154,10 @@ class MnliProcessor(DataProcessor):
for (i, line) in enumerate(lines):
if i == 0:
continue
guid = "%s-%s" % (set_type, convert_to_unicode(line[0]))
text_a = convert_to_unicode(line[8])
text_b = convert_to_unicode(line[9])
label = convert_to_unicode(line[-1])
guid = "%s-%s" % (set_type, line[0])
text_a = line[8])
text_b = line[9])
label = line[-1]
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
return examples
@@ -185,8 +185,8 @@ class ColaProcessor(DataProcessor):
examples = []
for (i, line) in enumerate(lines):
guid = "%s-%s" % (set_type, i)
text_a = convert_to_unicode(line[3])
label = convert_to_unicode(line[1])
text_a = line[3]
label = line[1]
examples.append(
InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
return examples
@@ -273,7 +273,7 @@ def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer
logger.info("*** Example ***")
logger.info("guid: %s" % (example.guid))
logger.info("tokens: %s" % " ".join(
[printable_text(x) for x in tokens]))
[str(x) for x in tokens]))
logger.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
logger.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
logger.info(