set encoding to 'utf-8' in calls to open
This commit is contained in:
@@ -168,7 +168,7 @@ def read_examples(input_file):
|
|||||||
"""Read a list of `InputExample`s from an input file."""
|
"""Read a list of `InputExample`s from an input file."""
|
||||||
examples = []
|
examples = []
|
||||||
unique_id = 0
|
unique_id = 0
|
||||||
with open(input_file, "r") as reader:
|
with open(input_file, "r", encoding='utf-8') as reader:
|
||||||
while True:
|
while True:
|
||||||
line = reader.readline()
|
line = reader.readline()
|
||||||
if not line:
|
if not line:
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ class DataProcessor(object):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def _read_tsv(cls, input_file, quotechar=None):
|
def _read_tsv(cls, input_file, quotechar=None):
|
||||||
"""Reads a tab separated value file."""
|
"""Reads a tab separated value file."""
|
||||||
with open(input_file, "r") as f:
|
with open(input_file, "r", encoding='utf-8') as f:
|
||||||
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
|
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
|
||||||
lines = []
|
lines = []
|
||||||
for line in reader:
|
for line in reader:
|
||||||
@@ -413,7 +413,8 @@ def main():
|
|||||||
n_gpu = 1
|
n_gpu = 1
|
||||||
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
|
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
|
||||||
torch.distributed.init_process_group(backend='nccl')
|
torch.distributed.init_process_group(backend='nccl')
|
||||||
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
|
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
|
||||||
|
device, n_gpu, bool(args.local_rank != -1), args.fp16))
|
||||||
|
|
||||||
if args.gradient_accumulation_steps < 1:
|
if args.gradient_accumulation_steps < 1:
|
||||||
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
|
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
|
||||||
|
|||||||
@@ -108,7 +108,7 @@ class InputFeatures(object):
|
|||||||
|
|
||||||
def read_squad_examples(input_file, is_training):
|
def read_squad_examples(input_file, is_training):
|
||||||
"""Read a SQuAD json file into a list of SquadExample."""
|
"""Read a SQuAD json file into a list of SquadExample."""
|
||||||
with open(input_file, "r") as reader:
|
with open(input_file, "r", encoding='utf-8') as reader:
|
||||||
input_data = json.load(reader)["data"]
|
input_data = json.load(reader)["data"]
|
||||||
|
|
||||||
def is_whitespace(c):
|
def is_whitespace(c):
|
||||||
@@ -757,7 +757,7 @@ def main():
|
|||||||
n_gpu = 1
|
n_gpu = 1
|
||||||
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
|
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
|
||||||
torch.distributed.init_process_group(backend='nccl')
|
torch.distributed.init_process_group(backend='nccl')
|
||||||
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits trainiing: {}".format(
|
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
|
||||||
device, n_gpu, bool(args.local_rank != -1), args.fp16))
|
device, n_gpu, bool(args.local_rank != -1), args.fp16))
|
||||||
|
|
||||||
if args.gradient_accumulation_steps < 1:
|
if args.gradient_accumulation_steps < 1:
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ class InputFeatures(object):
|
|||||||
|
|
||||||
|
|
||||||
def read_swag_examples(input_file, is_training):
|
def read_swag_examples(input_file, is_training):
|
||||||
with open(input_file, 'r') as f:
|
with open(input_file, 'r', encoding='utf-8') as f:
|
||||||
reader = csv.reader(f)
|
reader = csv.reader(f)
|
||||||
lines = list(reader)
|
lines = list(reader)
|
||||||
|
|
||||||
@@ -333,7 +333,8 @@ def main():
|
|||||||
n_gpu = 1
|
n_gpu = 1
|
||||||
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
|
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs
|
||||||
torch.distributed.init_process_group(backend='nccl')
|
torch.distributed.init_process_group(backend='nccl')
|
||||||
logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1))
|
logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
|
||||||
|
device, n_gpu, bool(args.local_rank != -1), args.fp16))
|
||||||
|
|
||||||
if args.gradient_accumulation_steps < 1:
|
if args.gradient_accumulation_steps < 1:
|
||||||
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
|
raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
|
||||||
|
|||||||
@@ -227,7 +227,7 @@ def read_set_from_file(filename: str) -> Set[str]:
|
|||||||
Expected file format is one item per line.
|
Expected file format is one item per line.
|
||||||
'''
|
'''
|
||||||
collection = set()
|
collection = set()
|
||||||
with open(filename, 'r') as file_:
|
with open(filename, 'r', encoding='utf-8') as file_:
|
||||||
for line in file_:
|
for line in file_:
|
||||||
collection.add(line.rstrip())
|
collection.add(line.rstrip())
|
||||||
return collection
|
return collection
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ class BertConfig(object):
|
|||||||
initializing all weight matrices.
|
initializing all weight matrices.
|
||||||
"""
|
"""
|
||||||
if isinstance(vocab_size_or_config_json_file, str):
|
if isinstance(vocab_size_or_config_json_file, str):
|
||||||
with open(vocab_size_or_config_json_file, "r") as reader:
|
with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader:
|
||||||
json_config = json.loads(reader.read())
|
json_config = json.loads(reader.read())
|
||||||
for key, value in json_config.items():
|
for key, value in json_config.items():
|
||||||
self.__dict__[key] = value
|
self.__dict__[key] = value
|
||||||
@@ -137,7 +137,7 @@ class BertConfig(object):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def from_json_file(cls, json_file):
|
def from_json_file(cls, json_file):
|
||||||
"""Constructs a `BertConfig` from a json file of parameters."""
|
"""Constructs a `BertConfig` from a json file of parameters."""
|
||||||
with open(json_file, "r") as reader:
|
with open(json_file, "r", encoding='utf-8') as reader:
|
||||||
text = reader.read()
|
text = reader.read()
|
||||||
return cls.from_dict(json.loads(text))
|
return cls.from_dict(json.loads(text))
|
||||||
|
|
||||||
|
|||||||
2
setup.py
2
setup.py
@@ -41,7 +41,7 @@ setup(
|
|||||||
author="Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors",
|
author="Thomas Wolf, Victor Sanh, Tim Rault, Google AI Language Team Authors",
|
||||||
author_email="thomas@huggingface.co",
|
author_email="thomas@huggingface.co",
|
||||||
description="PyTorch version of Google AI BERT model with script to load Google pre-trained models",
|
description="PyTorch version of Google AI BERT model with script to load Google pre-trained models",
|
||||||
long_description=open("README.md", "r").read(),
|
long_description=open("README.md", "r", encoding='utf-8').read(),
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
keywords='BERT NLP deep learning google',
|
keywords='BERT NLP deep learning google',
|
||||||
license='Apache',
|
license='Apache',
|
||||||
|
|||||||
Reference in New Issue
Block a user