[Tokenizer Utils Base] Make pad function more flexible (#9928)

* change tokenizer requirement * split line * Correct typo from list to str * improve style * make other function pretty as well * add comment * correct typo * add new test * pass tests for tok without padding token * Apply suggestions from code review
2021-02-02 10:35:27 +03:00
parent d1b14c9b54
commit 538b3b4607
40 changed files with 187 additions and 107 deletions
--- a/examples/text-classification/run_tf_text_classification.py
+++ b/examples/text-classification/run_tf_text_classification.py
@@ -64,7 +64,7 @@ def get_tfds(
    label_name = features_name.pop(label_column_id)
    label_list = list(set(ds[list(files.keys())[0]][label_name]))
    label2id = {label: i for i, label in enumerate(label_list)}
-    input_names = ["input_ids"] + tokenizer.model_input_names
+    input_names = tokenizer.model_input_names
    transformed_ds = {}

    if len(features_name) == 1: