Deprecate old data/metrics functions (#8420)
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import warnings
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
@@ -69,6 +70,12 @@ class GlueDataset(Dataset):
|
|||||||
mode: Union[str, Split] = Split.train,
|
mode: Union[str, Split] = Split.train,
|
||||||
cache_dir: Optional[str] = None,
|
cache_dir: Optional[str] = None,
|
||||||
):
|
):
|
||||||
|
warnings.warn(
|
||||||
|
"This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets "
|
||||||
|
"library. You can have a look at this example script for pointers: "
|
||||||
|
"https://github.com/huggingface/transformers/blob/master/examples/text-classification/run_glue.py",
|
||||||
|
FutureWarning,
|
||||||
|
)
|
||||||
self.args = args
|
self.args = args
|
||||||
self.processor = glue_processors[args.task_name]()
|
self.processor = glue_processors[args.task_name]()
|
||||||
self.output_mode = glue_output_modes[args.task_name]
|
self.output_mode = glue_output_modes[args.task_name]
|
||||||
|
|||||||
@@ -19,7 +19,8 @@ logger = logging.get_logger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
DEPRECATION_WARNING = (
|
DEPRECATION_WARNING = (
|
||||||
"This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library."
|
"This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets "
|
||||||
|
"library. You can have a look at this example script for pointers: {0}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -36,7 +37,12 @@ class TextDataset(Dataset):
|
|||||||
overwrite_cache=False,
|
overwrite_cache=False,
|
||||||
cache_dir: Optional[str] = None,
|
cache_dir: Optional[str] = None,
|
||||||
):
|
):
|
||||||
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
warnings.warn(
|
||||||
|
DEPRECATION_WARNING.format(
|
||||||
|
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm.py"
|
||||||
|
),
|
||||||
|
FutureWarning,
|
||||||
|
)
|
||||||
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
|
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
|
||||||
|
|
||||||
block_size = block_size - tokenizer.num_special_tokens_to_add(pair=False)
|
block_size = block_size - tokenizer.num_special_tokens_to_add(pair=False)
|
||||||
@@ -101,7 +107,12 @@ class LineByLineTextDataset(Dataset):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int):
|
def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int):
|
||||||
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
warnings.warn(
|
||||||
|
DEPRECATION_WARNING.format(
|
||||||
|
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm.py"
|
||||||
|
),
|
||||||
|
FutureWarning,
|
||||||
|
)
|
||||||
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
|
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
|
||||||
# Here, we do not cache the features, operating under the assumption
|
# Here, we do not cache the features, operating under the assumption
|
||||||
# that we will soon use fast multithreaded tokenizers from the
|
# that we will soon use fast multithreaded tokenizers from the
|
||||||
@@ -128,7 +139,12 @@ class LineByLineWithRefDataset(Dataset):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, ref_path: str):
|
def __init__(self, tokenizer: PreTrainedTokenizer, file_path: str, block_size: int, ref_path: str):
|
||||||
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
warnings.warn(
|
||||||
|
DEPRECATION_WARNING.format(
|
||||||
|
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm_wwm.py"
|
||||||
|
),
|
||||||
|
FutureWarning,
|
||||||
|
)
|
||||||
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
|
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
|
||||||
assert os.path.isfile(ref_path), f"Ref file path {file_path} not found"
|
assert os.path.isfile(ref_path), f"Ref file path {file_path} not found"
|
||||||
# Here, we do not cache the features, operating under the assumption
|
# Here, we do not cache the features, operating under the assumption
|
||||||
@@ -165,7 +181,12 @@ class LineByLineWithSOPTextDataset(Dataset):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, tokenizer: PreTrainedTokenizer, file_dir: str, block_size: int):
|
def __init__(self, tokenizer: PreTrainedTokenizer, file_dir: str, block_size: int):
|
||||||
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
warnings.warn(
|
||||||
|
DEPRECATION_WARNING.format(
|
||||||
|
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm.py"
|
||||||
|
),
|
||||||
|
FutureWarning,
|
||||||
|
)
|
||||||
assert os.path.isdir(file_dir)
|
assert os.path.isdir(file_dir)
|
||||||
logger.info(f"Creating features from dataset file folder at {file_dir}")
|
logger.info(f"Creating features from dataset file folder at {file_dir}")
|
||||||
self.examples = []
|
self.examples = []
|
||||||
@@ -315,7 +336,12 @@ class TextDatasetForNextSentencePrediction(Dataset):
|
|||||||
short_seq_probability=0.1,
|
short_seq_probability=0.1,
|
||||||
nsp_probability=0.5,
|
nsp_probability=0.5,
|
||||||
):
|
):
|
||||||
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
warnings.warn(
|
||||||
|
DEPRECATION_WARNING.format(
|
||||||
|
"https://github.com/huggingface/transformers/blob/master/examples/language-modeling/run_mlm.py"
|
||||||
|
),
|
||||||
|
FutureWarning,
|
||||||
|
)
|
||||||
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
|
assert os.path.isfile(file_path), f"Input file path {file_path} not found"
|
||||||
|
|
||||||
self.block_size = block_size - tokenizer.num_special_tokens_to_add(pair=True)
|
self.block_size = block_size - tokenizer.num_special_tokens_to_add(pair=True)
|
||||||
|
|||||||
@@ -14,6 +14,8 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
from ...file_utils import is_sklearn_available, requires_sklearn
|
from ...file_utils import is_sklearn_available, requires_sklearn
|
||||||
|
|
||||||
|
|
||||||
@@ -23,12 +25,21 @@ if is_sklearn_available():
|
|||||||
from scipy.stats import pearsonr, spearmanr
|
from scipy.stats import pearsonr, spearmanr
|
||||||
|
|
||||||
|
|
||||||
|
DEPRECATION_WARNING = (
|
||||||
|
"This metric will be removed from the library soon, metrics should be handled with the 🤗 Datasets "
|
||||||
|
"library. You can have a look at this example script for pointers: "
|
||||||
|
"https://github.com/huggingface/transformers/blob/master/examples/text-classification/run_glue.py",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def simple_accuracy(preds, labels):
|
def simple_accuracy(preds, labels):
|
||||||
|
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
||||||
requires_sklearn(simple_accuracy)
|
requires_sklearn(simple_accuracy)
|
||||||
return (preds == labels).mean()
|
return (preds == labels).mean()
|
||||||
|
|
||||||
|
|
||||||
def acc_and_f1(preds, labels):
|
def acc_and_f1(preds, labels):
|
||||||
|
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
||||||
requires_sklearn(acc_and_f1)
|
requires_sklearn(acc_and_f1)
|
||||||
acc = simple_accuracy(preds, labels)
|
acc = simple_accuracy(preds, labels)
|
||||||
f1 = f1_score(y_true=labels, y_pred=preds)
|
f1 = f1_score(y_true=labels, y_pred=preds)
|
||||||
@@ -40,6 +51,7 @@ def acc_and_f1(preds, labels):
|
|||||||
|
|
||||||
|
|
||||||
def pearson_and_spearman(preds, labels):
|
def pearson_and_spearman(preds, labels):
|
||||||
|
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
||||||
requires_sklearn(pearson_and_spearman)
|
requires_sklearn(pearson_and_spearman)
|
||||||
pearson_corr = pearsonr(preds, labels)[0]
|
pearson_corr = pearsonr(preds, labels)[0]
|
||||||
spearman_corr = spearmanr(preds, labels)[0]
|
spearman_corr = spearmanr(preds, labels)[0]
|
||||||
@@ -51,6 +63,7 @@ def pearson_and_spearman(preds, labels):
|
|||||||
|
|
||||||
|
|
||||||
def glue_compute_metrics(task_name, preds, labels):
|
def glue_compute_metrics(task_name, preds, labels):
|
||||||
|
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
||||||
requires_sklearn(glue_compute_metrics)
|
requires_sklearn(glue_compute_metrics)
|
||||||
assert len(preds) == len(labels), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
|
assert len(preds) == len(labels), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
|
||||||
if task_name == "cola":
|
if task_name == "cola":
|
||||||
@@ -80,6 +93,7 @@ def glue_compute_metrics(task_name, preds, labels):
|
|||||||
|
|
||||||
|
|
||||||
def xnli_compute_metrics(task_name, preds, labels):
|
def xnli_compute_metrics(task_name, preds, labels):
|
||||||
|
warnings.warn(DEPRECATION_WARNING, FutureWarning)
|
||||||
requires_sklearn(xnli_compute_metrics)
|
requires_sklearn(xnli_compute_metrics)
|
||||||
assert len(preds) == len(labels), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
|
assert len(preds) == len(labels), f"Predictions and labels have mismatched lengths {len(preds)} and {len(labels)}"
|
||||||
if task_name == "xnli":
|
if task_name == "xnli":
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
""" GLUE processors and helpers """
|
""" GLUE processors and helpers """
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import warnings
|
||||||
from dataclasses import asdict
|
from dataclasses import asdict
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
@@ -31,6 +32,12 @@ if is_tf_available():
|
|||||||
|
|
||||||
logger = logging.get_logger(__name__)
|
logger = logging.get_logger(__name__)
|
||||||
|
|
||||||
|
DEPRECATION_WARNING = (
|
||||||
|
"This {0} will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets "
|
||||||
|
"library. You can have a look at this example script for pointers: "
|
||||||
|
"https://github.com/huggingface/transformers/blob/master/examples/text-classification/run_glue.py"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def glue_convert_examples_to_features(
|
def glue_convert_examples_to_features(
|
||||||
examples: Union[List[InputExample], "tf.data.Dataset"],
|
examples: Union[List[InputExample], "tf.data.Dataset"],
|
||||||
@@ -57,6 +64,7 @@ def glue_convert_examples_to_features(
|
|||||||
``InputFeatures`` which can be fed to the model.
|
``InputFeatures`` which can be fed to the model.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("function"), FutureWarning)
|
||||||
if is_tf_available() and isinstance(examples, tf.data.Dataset):
|
if is_tf_available() and isinstance(examples, tf.data.Dataset):
|
||||||
if task is None:
|
if task is None:
|
||||||
raise ValueError("When calling glue_convert_examples_to_features from TF, the task parameter is required.")
|
raise ValueError("When calling glue_convert_examples_to_features from TF, the task parameter is required.")
|
||||||
@@ -162,6 +170,10 @@ class OutputMode(Enum):
|
|||||||
class MrpcProcessor(DataProcessor):
|
class MrpcProcessor(DataProcessor):
|
||||||
"""Processor for the MRPC data set (GLUE version)."""
|
"""Processor for the MRPC data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_example_from_tensor_dict(self, tensor_dict):
|
def get_example_from_tensor_dict(self, tensor_dict):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return InputExample(
|
return InputExample(
|
||||||
@@ -205,6 +217,10 @@ class MrpcProcessor(DataProcessor):
|
|||||||
class MnliProcessor(DataProcessor):
|
class MnliProcessor(DataProcessor):
|
||||||
"""Processor for the MultiNLI data set (GLUE version)."""
|
"""Processor for the MultiNLI data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_example_from_tensor_dict(self, tensor_dict):
|
def get_example_from_tensor_dict(self, tensor_dict):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return InputExample(
|
return InputExample(
|
||||||
@@ -247,6 +263,10 @@ class MnliProcessor(DataProcessor):
|
|||||||
class MnliMismatchedProcessor(MnliProcessor):
|
class MnliMismatchedProcessor(MnliProcessor):
|
||||||
"""Processor for the MultiNLI Mismatched data set (GLUE version)."""
|
"""Processor for the MultiNLI Mismatched data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_dev_examples(self, data_dir):
|
def get_dev_examples(self, data_dir):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev_mismatched.tsv")), "dev_mismatched")
|
return self._create_examples(self._read_tsv(os.path.join(data_dir, "dev_mismatched.tsv")), "dev_mismatched")
|
||||||
@@ -259,6 +279,10 @@ class MnliMismatchedProcessor(MnliProcessor):
|
|||||||
class ColaProcessor(DataProcessor):
|
class ColaProcessor(DataProcessor):
|
||||||
"""Processor for the CoLA data set (GLUE version)."""
|
"""Processor for the CoLA data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_example_from_tensor_dict(self, tensor_dict):
|
def get_example_from_tensor_dict(self, tensor_dict):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return InputExample(
|
return InputExample(
|
||||||
@@ -302,6 +326,10 @@ class ColaProcessor(DataProcessor):
|
|||||||
class Sst2Processor(DataProcessor):
|
class Sst2Processor(DataProcessor):
|
||||||
"""Processor for the SST-2 data set (GLUE version)."""
|
"""Processor for the SST-2 data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_example_from_tensor_dict(self, tensor_dict):
|
def get_example_from_tensor_dict(self, tensor_dict):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return InputExample(
|
return InputExample(
|
||||||
@@ -344,6 +372,10 @@ class Sst2Processor(DataProcessor):
|
|||||||
class StsbProcessor(DataProcessor):
|
class StsbProcessor(DataProcessor):
|
||||||
"""Processor for the STS-B data set (GLUE version)."""
|
"""Processor for the STS-B data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_example_from_tensor_dict(self, tensor_dict):
|
def get_example_from_tensor_dict(self, tensor_dict):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return InputExample(
|
return InputExample(
|
||||||
@@ -386,6 +418,10 @@ class StsbProcessor(DataProcessor):
|
|||||||
class QqpProcessor(DataProcessor):
|
class QqpProcessor(DataProcessor):
|
||||||
"""Processor for the QQP data set (GLUE version)."""
|
"""Processor for the QQP data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_example_from_tensor_dict(self, tensor_dict):
|
def get_example_from_tensor_dict(self, tensor_dict):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return InputExample(
|
return InputExample(
|
||||||
@@ -434,6 +470,10 @@ class QqpProcessor(DataProcessor):
|
|||||||
class QnliProcessor(DataProcessor):
|
class QnliProcessor(DataProcessor):
|
||||||
"""Processor for the QNLI data set (GLUE version)."""
|
"""Processor for the QNLI data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_example_from_tensor_dict(self, tensor_dict):
|
def get_example_from_tensor_dict(self, tensor_dict):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return InputExample(
|
return InputExample(
|
||||||
@@ -476,6 +516,10 @@ class QnliProcessor(DataProcessor):
|
|||||||
class RteProcessor(DataProcessor):
|
class RteProcessor(DataProcessor):
|
||||||
"""Processor for the RTE data set (GLUE version)."""
|
"""Processor for the RTE data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_example_from_tensor_dict(self, tensor_dict):
|
def get_example_from_tensor_dict(self, tensor_dict):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return InputExample(
|
return InputExample(
|
||||||
@@ -518,6 +562,10 @@ class RteProcessor(DataProcessor):
|
|||||||
class WnliProcessor(DataProcessor):
|
class WnliProcessor(DataProcessor):
|
||||||
"""Processor for the WNLI data set (GLUE version)."""
|
"""Processor for the WNLI data set (GLUE version)."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
warnings.warn(DEPRECATION_WARNING.format("processor"), FutureWarning)
|
||||||
|
|
||||||
def get_example_from_tensor_dict(self, tensor_dict):
|
def get_example_from_tensor_dict(self, tensor_dict):
|
||||||
"""See base class."""
|
"""See base class."""
|
||||||
return InputExample(
|
return InputExample(
|
||||||
|
|||||||
Reference in New Issue
Block a user