Reformat source code with black.

This is the result of: $ black --line-length 119 examples templates transformers utils hubconf.py setup.py There's a lot of fairly long lines in the project. As a consequence, I'm picking the longest widely accepted line length, 119 characters. This is also Thomas' preference, because it allows for explicit variable names, to make the code easier to understand.
2019-12-21 15:46:46 +01:00
parent 63e3827c6b
commit fa84ae26d6
200 changed files with 17452 additions and 12594 deletions
--- a/examples/distillation/scripts/token_counts.py
+++ b/examples/distillation/scripts/token_counts.py
@@ -20,32 +20,36 @@ import argparse
 import pickle
 import logging

-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                    datefmt = '%m/%d/%Y %H:%M:%S',
-                    level = logging.INFO)
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO
+)
 logger = logging.getLogger(__name__)

-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description="Token Counts for smoothing the masking probabilities in MLM (cf XLM/word2vec)")
-    parser.add_argument("--data_file", type=str, default="data/dump.bert-base-uncased.pickle",
-                        help="The binarized dataset.")
-    parser.add_argument("--token_counts_dump", type=str, default="data/token_counts.bert-base-uncased.pickle",
-                        help="The dump file.")
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Token Counts for smoothing the masking probabilities in MLM (cf XLM/word2vec)"
+    )
+    parser.add_argument(
+        "--data_file", type=str, default="data/dump.bert-base-uncased.pickle", help="The binarized dataset."
+    )
+    parser.add_argument(
+        "--token_counts_dump", type=str, default="data/token_counts.bert-base-uncased.pickle", help="The dump file."
+    )
    parser.add_argument("--vocab_size", default=30522, type=int)
    args = parser.parse_args()

-    logger.info(f'Loading data from {args.data_file}')
-    with open(args.data_file, 'rb') as fp:
+    logger.info(f"Loading data from {args.data_file}")
+    with open(args.data_file, "rb") as fp:
        data = pickle.load(fp)

-    logger.info('Counting occurences for MLM.')
+    logger.info("Counting occurences for MLM.")
    counter = Counter()
    for tk_ids in data:
        counter.update(tk_ids)
-    counts = [0]*args.vocab_size
+    counts = [0] * args.vocab_size
    for k, v in counter.items():
        counts[k] = v

-    logger.info(f'Dump to {args.token_counts_dump}')
-    with open(args.token_counts_dump, 'wb') as handle:
+    logger.info(f"Dump to {args.token_counts_dump}")
+    with open(args.token_counts_dump, "wb") as handle:
        pickle.dump(counts, handle, protocol=pickle.HIGHEST_PROTOCOL)