Black 20 release
This commit is contained in:
@@ -86,7 +86,10 @@ class GlueDataset(Dataset):
|
||||
cached_features_file = os.path.join(
|
||||
cache_dir if cache_dir is not None else args.data_dir,
|
||||
"cached_{}_{}_{}_{}".format(
|
||||
mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), args.task_name,
|
||||
mode.value,
|
||||
tokenizer.__class__.__name__,
|
||||
str(args.max_seq_length),
|
||||
args.task_name,
|
||||
),
|
||||
)
|
||||
label_list = self.processor.get_labels()
|
||||
|
||||
@@ -21,7 +21,11 @@ class TextDataset(Dataset):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, overwrite_cache=False,
|
||||
self,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
file_path: str,
|
||||
block_size: int,
|
||||
overwrite_cache=False,
|
||||
):
|
||||
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
|
||||
|
||||
@@ -29,7 +33,12 @@ class TextDataset(Dataset):
|
||||
|
||||
directory, filename = os.path.split(file_path)
|
||||
cached_features_file = os.path.join(
|
||||
directory, "cached_lm_{}_{}_{}".format(tokenizer.__class__.__name__, str(block_size), filename,),
|
||||
directory,
|
||||
"cached_lm_{}_{}_{}".format(
|
||||
tokenizer.__class__.__name__,
|
||||
str(block_size),
|
||||
filename,
|
||||
),
|
||||
)
|
||||
|
||||
# Make sure only the first process in distributed training processes the dataset,
|
||||
|
||||
@@ -119,7 +119,10 @@ class SquadDataset(Dataset):
|
||||
cached_features_file = os.path.join(
|
||||
cache_dir if cache_dir is not None else args.data_dir,
|
||||
"cached_{}_{}_{}_{}".format(
|
||||
mode.value, tokenizer.__class__.__name__, str(args.max_seq_length), version_tag,
|
||||
mode.value,
|
||||
tokenizer.__class__.__name__,
|
||||
str(args.max_seq_length),
|
||||
version_tag,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -589,10 +589,10 @@ def compute_predictions_log_probs(
|
||||
tokenizer,
|
||||
verbose_logging,
|
||||
):
|
||||
""" XLNet write prediction logic (more complex than Bert's).
|
||||
Write final predictions to the json file and log-odds of null if needed.
|
||||
"""XLNet write prediction logic (more complex than Bert's).
|
||||
Write final predictions to the json file and log-odds of null if needed.
|
||||
|
||||
Requires utils_squad_evaluate.py
|
||||
Requires utils_squad_evaluate.py
|
||||
"""
|
||||
_PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
|
||||
"PrelimPrediction", ["feature_index", "start_index", "end_index", "start_log_prob", "end_log_prob"]
|
||||
|
||||
@@ -69,7 +69,10 @@ def glue_convert_examples_to_features(
|
||||
if is_tf_available():
|
||||
|
||||
def _tf_glue_convert_examples_to_features(
|
||||
examples: tf.data.Dataset, tokenizer: PreTrainedTokenizer, task=str, max_length: Optional[int] = None,
|
||||
examples: tf.data.Dataset,
|
||||
tokenizer: PreTrainedTokenizer,
|
||||
task=str,
|
||||
max_length: Optional[int] = None,
|
||||
) -> tf.data.Dataset:
|
||||
"""
|
||||
Returns:
|
||||
|
||||
@@ -269,7 +269,9 @@ class SingleSentenceClassificationProcessor(DataProcessor):
|
||||
logger.info("Tokenizing example %d", ex_index)
|
||||
|
||||
input_ids = tokenizer.encode(
|
||||
example.text_a, add_special_tokens=True, max_length=min(max_length, tokenizer.max_len),
|
||||
example.text_a,
|
||||
add_special_tokens=True,
|
||||
max_length=min(max_length, tokenizer.max_len),
|
||||
)
|
||||
all_input_ids.append(input_ids)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user