Remove Roberta Dependencies from XLM Roberta Flax and Tensorflow models (#21047)
* Added flax model code * Added tf changes * missed some * Added copy comments * Added style hints * Fixed copy statements * Added suggested fixes * Made some fixes * Style fixup * Added necessary copy statements * Fixing copy statements * Added more copies * Final copy fix * Some bugfixes * Adding imports to init * Fixed up all make fixup errors * Fixed doc errors * Auto model changes
This commit is contained in:
@@ -146,6 +146,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h
|
|||||||
[[autodoc]] TFXLMRobertaModel
|
[[autodoc]] TFXLMRobertaModel
|
||||||
- call
|
- call
|
||||||
|
|
||||||
|
## TFXLMRobertaForCausalLM
|
||||||
|
|
||||||
|
[[autodoc]] TFXLMRobertaForCausalLM
|
||||||
|
- call
|
||||||
|
|
||||||
## TFXLMRobertaForMaskedLM
|
## TFXLMRobertaForMaskedLM
|
||||||
|
|
||||||
[[autodoc]] TFXLMRobertaForMaskedLM
|
[[autodoc]] TFXLMRobertaForMaskedLM
|
||||||
@@ -176,6 +181,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h
|
|||||||
[[autodoc]] FlaxXLMRobertaModel
|
[[autodoc]] FlaxXLMRobertaModel
|
||||||
- __call__
|
- __call__
|
||||||
|
|
||||||
|
## FlaxXLMRobertaForCausalLM
|
||||||
|
|
||||||
|
[[autodoc]] FlaxXLMRobertaForCausalLM
|
||||||
|
- __call__
|
||||||
|
|
||||||
## FlaxXLMRobertaForMaskedLM
|
## FlaxXLMRobertaForMaskedLM
|
||||||
|
|
||||||
[[autodoc]] FlaxXLMRobertaForMaskedLM
|
[[autodoc]] FlaxXLMRobertaForMaskedLM
|
||||||
|
|||||||
@@ -3153,12 +3153,14 @@ else:
|
|||||||
_import_structure["models.xlm_roberta"].extend(
|
_import_structure["models.xlm_roberta"].extend(
|
||||||
[
|
[
|
||||||
"TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
"TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||||
|
"TFXLMRobertaForCausalLM",
|
||||||
"TFXLMRobertaForMaskedLM",
|
"TFXLMRobertaForMaskedLM",
|
||||||
"TFXLMRobertaForMultipleChoice",
|
"TFXLMRobertaForMultipleChoice",
|
||||||
"TFXLMRobertaForQuestionAnswering",
|
"TFXLMRobertaForQuestionAnswering",
|
||||||
"TFXLMRobertaForSequenceClassification",
|
"TFXLMRobertaForSequenceClassification",
|
||||||
"TFXLMRobertaForTokenClassification",
|
"TFXLMRobertaForTokenClassification",
|
||||||
"TFXLMRobertaModel",
|
"TFXLMRobertaModel",
|
||||||
|
"TFXLMRobertaPreTrainedModel",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
_import_structure["models.xlnet"].extend(
|
_import_structure["models.xlnet"].extend(
|
||||||
@@ -3435,12 +3437,15 @@ else:
|
|||||||
)
|
)
|
||||||
_import_structure["models.xlm_roberta"].extend(
|
_import_structure["models.xlm_roberta"].extend(
|
||||||
[
|
[
|
||||||
|
"FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||||
"FlaxXLMRobertaForMaskedLM",
|
"FlaxXLMRobertaForMaskedLM",
|
||||||
"FlaxXLMRobertaForMultipleChoice",
|
"FlaxXLMRobertaForMultipleChoice",
|
||||||
"FlaxXLMRobertaForQuestionAnswering",
|
"FlaxXLMRobertaForQuestionAnswering",
|
||||||
"FlaxXLMRobertaForSequenceClassification",
|
"FlaxXLMRobertaForSequenceClassification",
|
||||||
"FlaxXLMRobertaForTokenClassification",
|
"FlaxXLMRobertaForTokenClassification",
|
||||||
"FlaxXLMRobertaModel",
|
"FlaxXLMRobertaModel",
|
||||||
|
"FlaxXLMRobertaForCausalLM",
|
||||||
|
"FlaxXLMRobertaPreTrainedModel",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -6022,12 +6027,14 @@ if TYPE_CHECKING:
|
|||||||
)
|
)
|
||||||
from .models.xlm_roberta import (
|
from .models.xlm_roberta import (
|
||||||
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
|
TFXLMRobertaForCausalLM,
|
||||||
TFXLMRobertaForMaskedLM,
|
TFXLMRobertaForMaskedLM,
|
||||||
TFXLMRobertaForMultipleChoice,
|
TFXLMRobertaForMultipleChoice,
|
||||||
TFXLMRobertaForQuestionAnswering,
|
TFXLMRobertaForQuestionAnswering,
|
||||||
TFXLMRobertaForSequenceClassification,
|
TFXLMRobertaForSequenceClassification,
|
||||||
TFXLMRobertaForTokenClassification,
|
TFXLMRobertaForTokenClassification,
|
||||||
TFXLMRobertaModel,
|
TFXLMRobertaModel,
|
||||||
|
TFXLMRobertaPreTrainedModel,
|
||||||
)
|
)
|
||||||
from .models.xlnet import (
|
from .models.xlnet import (
|
||||||
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST,
|
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
@@ -6240,12 +6247,15 @@ if TYPE_CHECKING:
|
|||||||
)
|
)
|
||||||
from .models.xglm import FlaxXGLMForCausalLM, FlaxXGLMModel, FlaxXGLMPreTrainedModel
|
from .models.xglm import FlaxXGLMForCausalLM, FlaxXGLMModel, FlaxXGLMPreTrainedModel
|
||||||
from .models.xlm_roberta import (
|
from .models.xlm_roberta import (
|
||||||
|
FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
|
FlaxXLMRobertaForCausalLM,
|
||||||
FlaxXLMRobertaForMaskedLM,
|
FlaxXLMRobertaForMaskedLM,
|
||||||
FlaxXLMRobertaForMultipleChoice,
|
FlaxXLMRobertaForMultipleChoice,
|
||||||
FlaxXLMRobertaForQuestionAnswering,
|
FlaxXLMRobertaForQuestionAnswering,
|
||||||
FlaxXLMRobertaForSequenceClassification,
|
FlaxXLMRobertaForSequenceClassification,
|
||||||
FlaxXLMRobertaForTokenClassification,
|
FlaxXLMRobertaForTokenClassification,
|
||||||
FlaxXLMRobertaModel,
|
FlaxXLMRobertaModel,
|
||||||
|
FlaxXLMRobertaPreTrainedModel,
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -142,6 +142,7 @@ FLAX_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
|
|||||||
("roberta", "FlaxRobertaForCausalLM"),
|
("roberta", "FlaxRobertaForCausalLM"),
|
||||||
("roberta-prelayernorm", "FlaxRobertaPreLayerNormForCausalLM"),
|
("roberta-prelayernorm", "FlaxRobertaPreLayerNormForCausalLM"),
|
||||||
("xglm", "FlaxXGLMForCausalLM"),
|
("xglm", "FlaxXGLMForCausalLM"),
|
||||||
|
("xlm-roberta", "FlaxXLMRobertaForCausalLM"),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -180,6 +180,7 @@ TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
|
|||||||
("transfo-xl", "TFTransfoXLLMHeadModel"),
|
("transfo-xl", "TFTransfoXLLMHeadModel"),
|
||||||
("xglm", "TFXGLMForCausalLM"),
|
("xglm", "TFXGLMForCausalLM"),
|
||||||
("xlm", "TFXLMWithLMHeadModel"),
|
("xlm", "TFXLMWithLMHeadModel"),
|
||||||
|
("xlm-roberta", "TFXLMRobertaForCausalLM"),
|
||||||
("xlnet", "TFXLNetLMHeadModel"),
|
("xlnet", "TFXLNetLMHeadModel"),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -79,12 +79,14 @@ except OptionalDependencyNotAvailable:
|
|||||||
else:
|
else:
|
||||||
_import_structure["modeling_tf_xlm_roberta"] = [
|
_import_structure["modeling_tf_xlm_roberta"] = [
|
||||||
"TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
"TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||||
|
"TFXLMRobertaForCausalLM",
|
||||||
"TFXLMRobertaForMaskedLM",
|
"TFXLMRobertaForMaskedLM",
|
||||||
"TFXLMRobertaForMultipleChoice",
|
"TFXLMRobertaForMultipleChoice",
|
||||||
"TFXLMRobertaForQuestionAnswering",
|
"TFXLMRobertaForQuestionAnswering",
|
||||||
"TFXLMRobertaForSequenceClassification",
|
"TFXLMRobertaForSequenceClassification",
|
||||||
"TFXLMRobertaForTokenClassification",
|
"TFXLMRobertaForTokenClassification",
|
||||||
"TFXLMRobertaModel",
|
"TFXLMRobertaModel",
|
||||||
|
"TFXLMRobertaPreTrainedModel",
|
||||||
]
|
]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -94,12 +96,15 @@ except OptionalDependencyNotAvailable:
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
_import_structure["modeling_flax_xlm_roberta"] = [
|
_import_structure["modeling_flax_xlm_roberta"] = [
|
||||||
|
"FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||||
"FlaxXLMRobertaForMaskedLM",
|
"FlaxXLMRobertaForMaskedLM",
|
||||||
|
"FlaxXLMRobertaForCausalLM",
|
||||||
"FlaxXLMRobertaForMultipleChoice",
|
"FlaxXLMRobertaForMultipleChoice",
|
||||||
"FlaxXLMRobertaForQuestionAnswering",
|
"FlaxXLMRobertaForQuestionAnswering",
|
||||||
"FlaxXLMRobertaForSequenceClassification",
|
"FlaxXLMRobertaForSequenceClassification",
|
||||||
"FlaxXLMRobertaForTokenClassification",
|
"FlaxXLMRobertaForTokenClassification",
|
||||||
"FlaxXLMRobertaModel",
|
"FlaxXLMRobertaModel",
|
||||||
|
"FlaxXLMRobertaPreTrainedModel",
|
||||||
]
|
]
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@@ -151,12 +156,14 @@ if TYPE_CHECKING:
|
|||||||
else:
|
else:
|
||||||
from .modeling_tf_xlm_roberta import (
|
from .modeling_tf_xlm_roberta import (
|
||||||
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
|
TFXLMRobertaForCausalLM,
|
||||||
TFXLMRobertaForMaskedLM,
|
TFXLMRobertaForMaskedLM,
|
||||||
TFXLMRobertaForMultipleChoice,
|
TFXLMRobertaForMultipleChoice,
|
||||||
TFXLMRobertaForQuestionAnswering,
|
TFXLMRobertaForQuestionAnswering,
|
||||||
TFXLMRobertaForSequenceClassification,
|
TFXLMRobertaForSequenceClassification,
|
||||||
TFXLMRobertaForTokenClassification,
|
TFXLMRobertaForTokenClassification,
|
||||||
TFXLMRobertaModel,
|
TFXLMRobertaModel,
|
||||||
|
TFXLMRobertaPreTrainedModel,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -166,12 +173,15 @@ if TYPE_CHECKING:
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
from .modeling_flax_xlm_roberta import (
|
from .modeling_flax_xlm_roberta import (
|
||||||
|
FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||||
|
FlaxXLMRobertaForCausalLM,
|
||||||
FlaxXLMRobertaForMaskedLM,
|
FlaxXLMRobertaForMaskedLM,
|
||||||
FlaxXLMRobertaForMultipleChoice,
|
FlaxXLMRobertaForMultipleChoice,
|
||||||
FlaxXLMRobertaForQuestionAnswering,
|
FlaxXLMRobertaForQuestionAnswering,
|
||||||
FlaxXLMRobertaForSequenceClassification,
|
FlaxXLMRobertaForSequenceClassification,
|
||||||
FlaxXLMRobertaForTokenClassification,
|
FlaxXLMRobertaForTokenClassification,
|
||||||
FlaxXLMRobertaModel,
|
FlaxXLMRobertaModel,
|
||||||
|
FlaxXLMRobertaPreTrainedModel,
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -286,7 +286,7 @@ class XLMRobertaSelfAttention(nn.Module):
|
|||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaSelfOutput
|
# Copied from transformers.models.roberta.modeling_roberta.RobertaSelfOutput with Roberta->XLMRoberta
|
||||||
class XLMRobertaSelfOutput(nn.Module):
|
class XLMRobertaSelfOutput(nn.Module):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@@ -351,7 +351,7 @@ class XLMRobertaAttention(nn.Module):
|
|||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaIntermediate
|
# Copied from transformers.models.roberta.modeling_roberta.RobertaIntermediate with Roberta->XLMRoberta
|
||||||
class XLMRobertaIntermediate(nn.Module):
|
class XLMRobertaIntermediate(nn.Module):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@@ -367,7 +367,7 @@ class XLMRobertaIntermediate(nn.Module):
|
|||||||
return hidden_states
|
return hidden_states
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaOutput
|
# Copied from transformers.models.roberta.modeling_roberta.RobertaOutput with Roberta->XLMRoberta
|
||||||
class XLMRobertaOutput(nn.Module):
|
class XLMRobertaOutput(nn.Module):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@@ -567,7 +567,7 @@ class XLMRobertaEncoder(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaPooler
|
# Copied from transformers.models.roberta.modeling_roberta.RobertaPooler with Roberta->XLMRoberta
|
||||||
class XLMRobertaPooler(nn.Module):
|
class XLMRobertaPooler(nn.Module):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
@@ -1455,7 +1455,7 @@ class XLMRobertaForTokenClassification(XLMRobertaPreTrainedModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaClassificationHead
|
# Copied from transformers.models.roberta.modeling_roberta.RobertaClassificationHead with Roberta->XLMRoberta
|
||||||
class XLMRobertaClassificationHead(nn.Module):
|
class XLMRobertaClassificationHead(nn.Module):
|
||||||
"""Head for sentence-level classification tasks."""
|
"""Head for sentence-level classification tasks."""
|
||||||
|
|
||||||
|
|||||||
@@ -1152,6 +1152,16 @@ class FlaxXGLMPreTrainedModel(metaclass=DummyObject):
|
|||||||
requires_backends(self, ["flax"])
|
requires_backends(self, ["flax"])
|
||||||
|
|
||||||
|
|
||||||
|
FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = None
|
||||||
|
|
||||||
|
|
||||||
|
class FlaxXLMRobertaForCausalLM(metaclass=DummyObject):
|
||||||
|
_backends = ["flax"]
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
requires_backends(self, ["flax"])
|
||||||
|
|
||||||
|
|
||||||
class FlaxXLMRobertaForMaskedLM(metaclass=DummyObject):
|
class FlaxXLMRobertaForMaskedLM(metaclass=DummyObject):
|
||||||
_backends = ["flax"]
|
_backends = ["flax"]
|
||||||
|
|
||||||
@@ -1192,3 +1202,10 @@ class FlaxXLMRobertaModel(metaclass=DummyObject):
|
|||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
requires_backends(self, ["flax"])
|
requires_backends(self, ["flax"])
|
||||||
|
|
||||||
|
|
||||||
|
class FlaxXLMRobertaPreTrainedModel(metaclass=DummyObject):
|
||||||
|
_backends = ["flax"]
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
requires_backends(self, ["flax"])
|
||||||
|
|||||||
@@ -2646,6 +2646,13 @@ class TFXLMWithLMHeadModel(metaclass=DummyObject):
|
|||||||
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = None
|
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = None
|
||||||
|
|
||||||
|
|
||||||
|
class TFXLMRobertaForCausalLM(metaclass=DummyObject):
|
||||||
|
_backends = ["tf"]
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
requires_backends(self, ["tf"])
|
||||||
|
|
||||||
|
|
||||||
class TFXLMRobertaForMaskedLM(metaclass=DummyObject):
|
class TFXLMRobertaForMaskedLM(metaclass=DummyObject):
|
||||||
_backends = ["tf"]
|
_backends = ["tf"]
|
||||||
|
|
||||||
@@ -2688,6 +2695,13 @@ class TFXLMRobertaModel(metaclass=DummyObject):
|
|||||||
requires_backends(self, ["tf"])
|
requires_backends(self, ["tf"])
|
||||||
|
|
||||||
|
|
||||||
|
class TFXLMRobertaPreTrainedModel(metaclass=DummyObject):
|
||||||
|
_backends = ["tf"]
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
requires_backends(self, ["tf"])
|
||||||
|
|
||||||
|
|
||||||
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = None
|
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user