Fix TypeError: Object of type int64 is not JSON serializable (#24340)

* Fix TypeError: Object of type int64 is not JSON serializable * Convert numpy.float64 and numpy.int64 to float and int for json serialization * Black reformatted examples/pytorch/token-classification/run_ner_no_trainer.py * * make style
2023-06-27 19:15:49 +08:00
parent ac19871ce2
commit 239ace152b
44 changed files with 74 additions and 71 deletions
--- a/examples/pytorch/token-classification/run_ner_no_trainer.py
+++ b/examples/pytorch/token-classification/run_ner_no_trainer.py
@@ -28,6 +28,7 @@ from pathlib import Path

 import datasets
 import evaluate
+import numpy as np
 import torch
 from accelerate import Accelerator
 from accelerate.logging import get_logger
@@ -777,6 +778,12 @@ def main():
            if args.with_tracking:
                all_results.update({"train_loss": total_loss.item() / len(train_dataloader)})
            with open(os.path.join(args.output_dir, "all_results.json"), "w") as f:
+                # Convert all float64 & int64 type numbers to float & int for json serialization
+                for key, value in all_results.items():
+                    if isinstance(value, np.float64):
+                        all_results[key] = float(value)
+                    elif isinstance(value, np.int64):
+                        all_results[key] = int(value)
                json.dump(all_results, f)


--- a/examples/research_projects/codeparrot/scripts/human_eval.py
+++ b/examples/research_projects/codeparrot/scripts/human_eval.py
@@ -60,7 +60,7 @@ class EndOfFunctionCriteria(StoppingCriteria):
        decoded_generations = self.tokenizer.batch_decode(input_ids[:, self.start_length :])
        done = []
        for decoded_generation in decoded_generations:
-            done.append(any([stop_string in decoded_generation for stop_string in self.eof_strings]))
+            done.append(any(stop_string in decoded_generation for stop_string in self.eof_strings))
        return all(done)


--- a/examples/research_projects/fsner/src/fsner/tokenizer_utils.py
+++ b/examples/research_projects/fsner/src/fsner/tokenizer_utils.py
@@ -17,7 +17,7 @@ class FSNERTokenizerUtils(object):
            `transformers.tokenization_utils_base.BatchEncoding` dict with additional keys and values for start_token_id, end_token_id and sizes of example lists for each entity type
        """

-        if isinstance(x, list) and all([isinstance(_x, list) for _x in x]):
+        if isinstance(x, list) and all(isinstance(_x, list) for _x in x):
            d = None
            for l in x:
                t = self.tokenizer(
@@ -37,7 +37,7 @@ class FSNERTokenizerUtils(object):
            d["start_token_id"] = torch.tensor(self.tokenizer.convert_tokens_to_ids("[E]"))
            d["end_token_id"] = torch.tensor(self.tokenizer.convert_tokens_to_ids("[/E]"))

-        elif isinstance(x, list) and all([isinstance(_x, str) for _x in x]):
+        elif isinstance(x, list) and all(isinstance(_x, str) for _x in x):
            d = self.tokenizer(
                x,
                padding="max_length",
--- a/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py
+++ b/examples/research_projects/jax-projects/big_bird/prepare_natural_questions.py
@@ -50,7 +50,7 @@ def _get_single_answer(example):
        answer["remove_it"] = False

    cols = ["start_token", "end_token", "start_byte", "end_byte", "text"]
-    if not all([isinstance(answer[k], list) for k in cols]):
+    if not all(isinstance(answer[k], list) for k in cols):
        raise ValueError("Issue in ID", example["id"])

    return answer
--- a/examples/research_projects/luke/run_luke_ner_no_trainer.py
+++ b/examples/research_projects/luke/run_luke_ner_no_trainer.py
@@ -610,7 +610,7 @@ def main():
            predicted_sequence = [label_list[0]] * len(true_tags)

            for _, span, label in sorted(predictions, key=lambda o: o[0], reverse=True):
-                if all([o == label_list[0] for o in predicted_sequence[span[0] : span[1]]]):
+                if all(o == label_list[0] for o in predicted_sequence[span[0] : span[1]]):
                    predicted_sequence[span[0]] = label
                    if span[1] - span[0] > 1:
                        predicted_sequence[span[0] + 1 : span[1]] = [label] * (span[1] - span[0] - 1)
--- a/examples/research_projects/lxmert/modeling_frcnn.py
+++ b/examples/research_projects/lxmert/modeling_frcnn.py
@@ -554,8 +554,8 @@ class Matcher(object):
        assert thresholds[0] > 0
        thresholds.insert(0, -float("inf"))
        thresholds.append(float("inf"))
-        assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])])
-        assert all([label_i in [-1, 0, 1] for label_i in labels])
+        assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:]))
+        assert all(label_i in [-1, 0, 1] for label_i in labels)
        assert len(labels) == len(thresholds) - 1
        self.thresholds = thresholds
        self.labels = labels
--- a/examples/research_projects/visual_bert/modeling_frcnn.py
+++ b/examples/research_projects/visual_bert/modeling_frcnn.py
@@ -554,8 +554,8 @@ class Matcher(object):
        assert thresholds[0] > 0
        thresholds.insert(0, -float("inf"))
        thresholds.append(float("inf"))
-        assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])])
-        assert all([label_i in [-1, 0, 1] for label_i in labels])
+        assert all(low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:]))
+        assert all(label_i in [-1, 0, 1] for label_i in labels)
        assert len(labels) == len(thresholds) - 1
        self.thresholds = thresholds
        self.labels = labels