set encoding to 'utf-8' in calls to open

This commit is contained in:
thomwolf
2018-12-14 13:48:58 +01:00
parent e1eab59aac
commit ae88eb88a4
7 changed files with 13 additions and 11 deletions

View File

@@ -168,7 +168,7 @@ def read_examples(input_file):
"""Read a list of `InputExample`s from an input file."""
examples = []
unique_id = 0
with open(input_file, "r") as reader:
with open(input_file, "r", encoding='utf-8') as reader:
while True:
line = reader.readline()
if not line:

View File

@@ -91,7 +91,7 @@ class DataProcessor(object):
@classmethod
def _read_tsv(cls, input_file, quotechar=None):
"""Reads a tab separated value file."""
with open(input_file, "r") as f:
with open(input_file, "r", encoding='utf-8') as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
lines = []
for line in reader:
@@ -413,7 +413,8 @@ def main():
n_gpu = 1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl')
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
device, n_gpu, bool(args.local_rank != -1), args.fp16))
if args.gradient_accumulation_steps < 1:
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(

View File

@@ -108,7 +108,7 @@ class InputFeatures(object):
def read_squad_examples(input_file, is_training):
"""Read a SQuAD json file into a list of SquadExample."""
with open(input_file, "r") as reader:
with open(input_file, "r", encoding='utf-8') as reader:
input_data = json.load(reader)["data"]
def is_whitespace(c):
@@ -757,7 +757,7 @@ def main():
n_gpu = 1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl')
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits trainiing: {}".format(
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
device, n_gpu, bool(args.local_rank != -1), args.fp16))
if args.gradient_accumulation_steps < 1:

View File

@@ -100,7 +100,7 @@ class InputFeatures(object):
def read_swag_examples(input_file, is_training):
with open(input_file, 'r') as f:
with open(input_file, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
lines = list(reader)
@@ -333,7 +333,8 @@ def main():
n_gpu = 1
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
torch.distributed.init_process_group(backend='nccl')
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
device, n_gpu, bool(args.local_rank != -1), args.fp16))
if args.gradient_accumulation_steps < 1:
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(