update desc for map in all examples (#12226)

* update desc for map in all examples

* added plm

* suggestions
This commit is contained in:
Bhavitvya Malik
2021-06-18 01:07:31 +05:30
committed by GitHub
parent adb70eda4d
commit e43e11260f
20 changed files with 84 additions and 7 deletions

View File

@@ -1,3 +1,3 @@
seqeval
datasets >= 1.1.3
datasets >= 1.8.0
torch >= 1.3

View File

@@ -42,10 +42,12 @@ from transformers import (
)
from transformers.trainer_utils import get_last_checkpoint
from transformers.utils import check_min_version
from transformers.utils.versions import require_version
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
check_min_version("4.8.0.dev0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
logger = logging.getLogger(__name__)
@@ -388,6 +390,7 @@ def main():
batched=True,
num_proc=data_args.preprocessing_num_workers,
load_from_cache_file=not data_args.overwrite_cache,
desc="Running tokenizer on train dataset",
)
if training_args.do_eval:
@@ -401,6 +404,7 @@ def main():
batched=True,
num_proc=data_args.preprocessing_num_workers,
load_from_cache_file=not data_args.overwrite_cache,
desc="Running tokenizer on validation dataset",
)
if training_args.do_predict:
@@ -414,6 +418,7 @@ def main():
batched=True,
num_proc=data_args.preprocessing_num_workers,
load_from_cache_file=not data_args.overwrite_cache,
desc="Running tokenizer on prediction dataset",
)
# Data collator

View File

@@ -45,9 +45,12 @@ from transformers import (
get_scheduler,
set_seed,
)
from transformers.utils.versions import require_version
logger = logging.getLogger(__name__)
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
# You should update this to your particular problem to have better documentation of `model_type`
MODEL_CONFIG_CLASSES = list(MODEL_MAPPING.keys())
MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
@@ -381,7 +384,10 @@ def main():
return tokenized_inputs
processed_raw_datasets = raw_datasets.map(
tokenize_and_align_labels, batched=True, remove_columns=raw_datasets["train"].column_names
tokenize_and_align_labels,
batched=True,
remove_columns=raw_datasets["train"].column_names,
desc="Running tokenizer on dataset",
)
train_dataset = processed_raw_datasets["train"]