Fix TypeError: Object of type int64 is not JSON serializable (#24340)

* Fix TypeError: Object of type int64 is not JSON serializable

* Convert numpy.float64 and numpy.int64 to float and int for json serialization

* Black reformatted examples/pytorch/token-classification/run_ner_no_trainer.py

* * make style
This commit is contained in:
Xiaoli Wang
2023-06-27 19:15:49 +08:00
committed by GitHub
parent ac19871ce2
commit 239ace152b
44 changed files with 74 additions and 71 deletions

View File

@@ -28,6 +28,7 @@ from pathlib import Path
import datasets
import evaluate
import numpy as np
import torch
from accelerate import Accelerator
from accelerate.logging import get_logger
@@ -777,6 +778,12 @@ def main():
if args.with_tracking:
all_results.update({"train_loss": total_loss.item() / len(train_dataloader)})
with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
# Convert all float64 & int64 type numbers to float & int for json serialization
for key, value in all_results.items():
if isinstance(value, np.float64):
all_results[key] = float(value)
elif isinstance(value, np.int64):
all_results[key] = int(value)
json.dump(all_results, f)

View File

@@ -60,7 +60,7 @@ class EndOfFunctionCriteria(StoppingCriteria):
decoded_generations = self.tokenizer.batch_decode(input_ids[:, self.start_length :])
done = []
for decoded_generation in decoded_generations:
done.append(any([stop_string in decoded_generation for stop_string in self.eof_strings]))
done.append(any(stop_string in decoded_generation for stop_string in self.eof_strings))
return all(done)

View File

@@ -17,7 +17,7 @@ class FSNERTokenizerUtils(object):
`transformers.tokenization_utils_base.BatchEncoding` dict with additional keys and values for start_token_id, end_token_id and sizes of example lists for each entity type
"""
if isinstance(x, list) and all([isinstance(_x, list) for _x in x]):
if isinstance(x, list) and all(isinstance(_x, list) for _x in x):
d = None
for l in x:
t = self.tokenizer(
@@ -37,7 +37,7 @@ class FSNERTokenizerUtils(object):
d["start_token_id"] = torch.tensor(self.tokenizer.convert_tokens_to_ids("[E]"))
d["end_token_id"] = torch.tensor(self.tokenizer.convert_tokens_to_ids("[/E]"))
elif isinstance(x, list) and all([isinstance(_x, str) for _x in x]):
elif isinstance(x, list) and all(isinstance(_x, str) for _x in x):
d = self.tokenizer(
x,
padding="max_length",

View File

@@ -50,7 +50,7 @@ def _get_single_answer(example):
answer["remove_it"] = False
cols = ["start_token", "end_token", "start_byte", "end_byte", "text"]
if not all([isinstance(answer[k], list) for k in cols]):
if not all(isinstance(answer[k], list) for k in cols):
raise ValueError("Issue in ID", example["id"])
return answer

View File

@@ -610,7 +610,7 @@ def main():
predicted_sequence = [label_list[0]] * len(true_tags)
for _, span, label in sorted(predictions, key=lambda o: o[0], reverse=True):
if all([o == label_list[0] for o in predicted_sequence[span[0] : span[1]]]):
if all(o == label_list[0] for o in predicted_sequence[span[0] : span[1]]):
predicted_sequence[span[0]] = label
if span[1] - span[0] > 1:
predicted_sequence[span[0] + 1 : span[1]] = [label] * (span[1] - span[0] - 1)

View File

@@ -554,8 +554,8 @@ class Matcher(object):
assert thresholds[0] > 0
thresholds.insert(0, -float("inf"))
thresholds.append(float("inf"))
assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])])
assert all([label_i in [-1, 0, 1] for label_i in labels])
assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:]))
assert all(label_i in [-1, 0, 1] for label_i in labels)
assert len(labels) == len(thresholds) - 1
self.thresholds = thresholds
self.labels = labels

View File

@@ -554,8 +554,8 @@ class Matcher(object):
assert thresholds[0] > 0
thresholds.insert(0, -float("inf"))
thresholds.append(float("inf"))
assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])])
assert all([label_i in [-1, 0, 1] for label_i in labels])
assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:]))
assert all(label_i in [-1, 0, 1] for label_i in labels)
assert len(labels) == len(thresholds) - 1
self.thresholds = thresholds
self.labels = labels