Update feature selection in to_tf_dataset (#21935)
* Update feature selection * Check compatibility with datasets version * Checkout from datasets main
This commit is contained in:
@@ -385,12 +385,12 @@ Convert your datasets to the `tf.data.Dataset` format using the [`~datasets.Data
|
||||
```py
|
||||
>>> # converting our train dataset to tf.data.Dataset
|
||||
>>> tf_train_dataset = food["train"].to_tf_dataset(
|
||||
... columns=["pixel_values"], label_cols=["label"], shuffle=True, batch_size=batch_size, collate_fn=data_collator
|
||||
... columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
|
||||
... )
|
||||
|
||||
>>> # converting our test dataset to tf.data.Dataset
|
||||
>>> tf_eval_dataset = food["test"].to_tf_dataset(
|
||||
... columns=["pixel_values"], label_cols=["label"], shuffle=True, batch_size=batch_size, collate_fn=data_collator
|
||||
... columns="pixel_values", label_cols="label", shuffle=True, batch_size=batch_size, collate_fn=data_collator
|
||||
... )
|
||||
```
|
||||
|
||||
|
||||
@@ -173,7 +173,7 @@ A continuación, convierte los datasets tokenizados en datasets de TensorFlow co
|
||||
```py
|
||||
>>> tf_train_dataset = small_train_dataset.to_tf_dataset(
|
||||
... columns=["attention_mask", "input_ids", "token_type_ids"],
|
||||
... label_cols=["labels"],
|
||||
... label_cols="labels",
|
||||
... shuffle=True,
|
||||
... collate_fn=data_collator,
|
||||
... batch_size=8,
|
||||
@@ -181,7 +181,7 @@ A continuación, convierte los datasets tokenizados en datasets de TensorFlow co
|
||||
|
||||
>>> tf_validation_dataset = small_eval_dataset.to_tf_dataset(
|
||||
... columns=["attention_mask", "input_ids", "token_type_ids"],
|
||||
... label_cols=["labels"],
|
||||
... label_cols="labels",
|
||||
... shuffle=False,
|
||||
... collate_fn=data_collator,
|
||||
... batch_size=8,
|
||||
|
||||
@@ -205,7 +205,7 @@ Especifique suas entradas em `columns` e seu rótulo em `label_cols`:
|
||||
```py
|
||||
>>> tf_train_dataset = small_train_dataset.to_tf_dataset(
|
||||
... columns=["attention_mask", "input_ids", "token_type_ids"],
|
||||
... label_cols=["labels"],
|
||||
... label_cols="labels",
|
||||
... shuffle=True,
|
||||
... collate_fn=data_collator,
|
||||
... batch_size=8,
|
||||
@@ -213,7 +213,7 @@ Especifique suas entradas em `columns` e seu rótulo em `label_cols`:
|
||||
|
||||
>>> tf_validation_dataset = small_eval_dataset.to_tf_dataset(
|
||||
... columns=["attention_mask", "input_ids", "token_type_ids"],
|
||||
... label_cols=["labels"],
|
||||
... label_cols="labels",
|
||||
... shuffle=False,
|
||||
... collate_fn=data_collator,
|
||||
... batch_size=8,
|
||||
|
||||
@@ -1413,6 +1413,12 @@ class TFPreTrainedModel(tf.keras.Model, TFModelUtilsMixin, TFGenerationMixin, Pu
|
||||
feature_cols = [col for col in output_columns if col in model_inputs and col not in model_labels]
|
||||
label_cols = [col for col in output_columns if col in model_labels]
|
||||
|
||||
# Backwards compatibility for older versions of datasets. Previously, if `columns` or `label_cols`
|
||||
# were a single element list, the returned element spec would be a single element. Now, passing [feature]
|
||||
# will return a dict structure {"feature": feature}, and passing a single string will return a single element.
|
||||
feature_cols = feature_cols[0] if len(feature_cols) == 1 else feature_cols
|
||||
label_cols = label_cols[0] if len(label_cols) == 1 else label_cols
|
||||
|
||||
if drop_remainder is None:
|
||||
drop_remainder = shuffle
|
||||
tf_dataset = dataset.to_tf_dataset(
|
||||
|
||||
Reference in New Issue
Block a user