Remove Roberta Dependencies from XLM Roberta Flax and Tensorflow models (#21047)
* Added flax model code * Added tf changes * missed some * Added copy comments * Added style hints * Fixed copy statements * Added suggested fixes * Made some fixes * Style fixup * Added necessary copy statements * Fixing copy statements * Added more copies * Final copy fix * Some bugfixes * Adding imports to init * Fixed up all make fixup errors * Fixed doc errors * Auto model changes
This commit is contained in:
@@ -146,6 +146,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h
|
||||
[[autodoc]] TFXLMRobertaModel
|
||||
- call
|
||||
|
||||
## TFXLMRobertaForCausalLM
|
||||
|
||||
[[autodoc]] TFXLMRobertaForCausalLM
|
||||
- call
|
||||
|
||||
## TFXLMRobertaForMaskedLM
|
||||
|
||||
[[autodoc]] TFXLMRobertaForMaskedLM
|
||||
@@ -176,6 +181,11 @@ A list of official Hugging Face and community (indicated by 🌎) resources to h
|
||||
[[autodoc]] FlaxXLMRobertaModel
|
||||
- __call__
|
||||
|
||||
## FlaxXLMRobertaForCausalLM
|
||||
|
||||
[[autodoc]] FlaxXLMRobertaForCausalLM
|
||||
- __call__
|
||||
|
||||
## FlaxXLMRobertaForMaskedLM
|
||||
|
||||
[[autodoc]] FlaxXLMRobertaForMaskedLM
|
||||
|
||||
@@ -3153,12 +3153,14 @@ else:
|
||||
_import_structure["models.xlm_roberta"].extend(
|
||||
[
|
||||
"TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"TFXLMRobertaForCausalLM",
|
||||
"TFXLMRobertaForMaskedLM",
|
||||
"TFXLMRobertaForMultipleChoice",
|
||||
"TFXLMRobertaForQuestionAnswering",
|
||||
"TFXLMRobertaForSequenceClassification",
|
||||
"TFXLMRobertaForTokenClassification",
|
||||
"TFXLMRobertaModel",
|
||||
"TFXLMRobertaPreTrainedModel",
|
||||
]
|
||||
)
|
||||
_import_structure["models.xlnet"].extend(
|
||||
@@ -3435,12 +3437,15 @@ else:
|
||||
)
|
||||
_import_structure["models.xlm_roberta"].extend(
|
||||
[
|
||||
"FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"FlaxXLMRobertaForMaskedLM",
|
||||
"FlaxXLMRobertaForMultipleChoice",
|
||||
"FlaxXLMRobertaForQuestionAnswering",
|
||||
"FlaxXLMRobertaForSequenceClassification",
|
||||
"FlaxXLMRobertaForTokenClassification",
|
||||
"FlaxXLMRobertaModel",
|
||||
"FlaxXLMRobertaForCausalLM",
|
||||
"FlaxXLMRobertaPreTrainedModel",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -6022,12 +6027,14 @@ if TYPE_CHECKING:
|
||||
)
|
||||
from .models.xlm_roberta import (
|
||||
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
TFXLMRobertaForCausalLM,
|
||||
TFXLMRobertaForMaskedLM,
|
||||
TFXLMRobertaForMultipleChoice,
|
||||
TFXLMRobertaForQuestionAnswering,
|
||||
TFXLMRobertaForSequenceClassification,
|
||||
TFXLMRobertaForTokenClassification,
|
||||
TFXLMRobertaModel,
|
||||
TFXLMRobertaPreTrainedModel,
|
||||
)
|
||||
from .models.xlnet import (
|
||||
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
@@ -6240,12 +6247,15 @@ if TYPE_CHECKING:
|
||||
)
|
||||
from .models.xglm import FlaxXGLMForCausalLM, FlaxXGLMModel, FlaxXGLMPreTrainedModel
|
||||
from .models.xlm_roberta import (
|
||||
FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
FlaxXLMRobertaForCausalLM,
|
||||
FlaxXLMRobertaForMaskedLM,
|
||||
FlaxXLMRobertaForMultipleChoice,
|
||||
FlaxXLMRobertaForQuestionAnswering,
|
||||
FlaxXLMRobertaForSequenceClassification,
|
||||
FlaxXLMRobertaForTokenClassification,
|
||||
FlaxXLMRobertaModel,
|
||||
FlaxXLMRobertaPreTrainedModel,
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
@@ -142,6 +142,7 @@ FLAX_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
|
||||
("roberta", "FlaxRobertaForCausalLM"),
|
||||
("roberta-prelayernorm", "FlaxRobertaPreLayerNormForCausalLM"),
|
||||
("xglm", "FlaxXGLMForCausalLM"),
|
||||
("xlm-roberta", "FlaxXLMRobertaForCausalLM"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@@ -180,6 +180,7 @@ TF_MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = OrderedDict(
|
||||
("transfo-xl", "TFTransfoXLLMHeadModel"),
|
||||
("xglm", "TFXGLMForCausalLM"),
|
||||
("xlm", "TFXLMWithLMHeadModel"),
|
||||
("xlm-roberta", "TFXLMRobertaForCausalLM"),
|
||||
("xlnet", "TFXLNetLMHeadModel"),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -79,12 +79,14 @@ except OptionalDependencyNotAvailable:
|
||||
else:
|
||||
_import_structure["modeling_tf_xlm_roberta"] = [
|
||||
"TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"TFXLMRobertaForCausalLM",
|
||||
"TFXLMRobertaForMaskedLM",
|
||||
"TFXLMRobertaForMultipleChoice",
|
||||
"TFXLMRobertaForQuestionAnswering",
|
||||
"TFXLMRobertaForSequenceClassification",
|
||||
"TFXLMRobertaForTokenClassification",
|
||||
"TFXLMRobertaModel",
|
||||
"TFXLMRobertaPreTrainedModel",
|
||||
]
|
||||
|
||||
try:
|
||||
@@ -94,12 +96,15 @@ except OptionalDependencyNotAvailable:
|
||||
pass
|
||||
else:
|
||||
_import_structure["modeling_flax_xlm_roberta"] = [
|
||||
"FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST",
|
||||
"FlaxXLMRobertaForMaskedLM",
|
||||
"FlaxXLMRobertaForCausalLM",
|
||||
"FlaxXLMRobertaForMultipleChoice",
|
||||
"FlaxXLMRobertaForQuestionAnswering",
|
||||
"FlaxXLMRobertaForSequenceClassification",
|
||||
"FlaxXLMRobertaForTokenClassification",
|
||||
"FlaxXLMRobertaModel",
|
||||
"FlaxXLMRobertaPreTrainedModel",
|
||||
]
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -151,12 +156,14 @@ if TYPE_CHECKING:
|
||||
else:
|
||||
from .modeling_tf_xlm_roberta import (
|
||||
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
TFXLMRobertaForCausalLM,
|
||||
TFXLMRobertaForMaskedLM,
|
||||
TFXLMRobertaForMultipleChoice,
|
||||
TFXLMRobertaForQuestionAnswering,
|
||||
TFXLMRobertaForSequenceClassification,
|
||||
TFXLMRobertaForTokenClassification,
|
||||
TFXLMRobertaModel,
|
||||
TFXLMRobertaPreTrainedModel,
|
||||
)
|
||||
|
||||
try:
|
||||
@@ -166,12 +173,15 @@ if TYPE_CHECKING:
|
||||
pass
|
||||
else:
|
||||
from .modeling_flax_xlm_roberta import (
|
||||
FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
|
||||
FlaxXLMRobertaForCausalLM,
|
||||
FlaxXLMRobertaForMaskedLM,
|
||||
FlaxXLMRobertaForMultipleChoice,
|
||||
FlaxXLMRobertaForQuestionAnswering,
|
||||
FlaxXLMRobertaForSequenceClassification,
|
||||
FlaxXLMRobertaForTokenClassification,
|
||||
FlaxXLMRobertaModel,
|
||||
FlaxXLMRobertaPreTrainedModel,
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -286,7 +286,7 @@ class XLMRobertaSelfAttention(nn.Module):
|
||||
return outputs
|
||||
|
||||
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaSelfOutput
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaSelfOutput with Roberta->XLMRoberta
|
||||
class XLMRobertaSelfOutput(nn.Module):
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
@@ -351,7 +351,7 @@ class XLMRobertaAttention(nn.Module):
|
||||
return outputs
|
||||
|
||||
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaIntermediate
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaIntermediate with Roberta->XLMRoberta
|
||||
class XLMRobertaIntermediate(nn.Module):
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
@@ -367,7 +367,7 @@ class XLMRobertaIntermediate(nn.Module):
|
||||
return hidden_states
|
||||
|
||||
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaOutput
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaOutput with Roberta->XLMRoberta
|
||||
class XLMRobertaOutput(nn.Module):
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
@@ -567,7 +567,7 @@ class XLMRobertaEncoder(nn.Module):
|
||||
)
|
||||
|
||||
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaPooler
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaPooler with Roberta->XLMRoberta
|
||||
class XLMRobertaPooler(nn.Module):
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
@@ -1455,7 +1455,7 @@ class XLMRobertaForTokenClassification(XLMRobertaPreTrainedModel):
|
||||
)
|
||||
|
||||
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaClassificationHead
|
||||
# Copied from transformers.models.roberta.modeling_roberta.RobertaClassificationHead with Roberta->XLMRoberta
|
||||
class XLMRobertaClassificationHead(nn.Module):
|
||||
"""Head for sentence-level classification tasks."""
|
||||
|
||||
|
||||
@@ -1152,6 +1152,16 @@ class FlaxXGLMPreTrainedModel(metaclass=DummyObject):
|
||||
requires_backends(self, ["flax"])
|
||||
|
||||
|
||||
FLAX_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = None
|
||||
|
||||
|
||||
class FlaxXLMRobertaForCausalLM(metaclass=DummyObject):
|
||||
_backends = ["flax"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["flax"])
|
||||
|
||||
|
||||
class FlaxXLMRobertaForMaskedLM(metaclass=DummyObject):
|
||||
_backends = ["flax"]
|
||||
|
||||
@@ -1192,3 +1202,10 @@ class FlaxXLMRobertaModel(metaclass=DummyObject):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["flax"])
|
||||
|
||||
|
||||
class FlaxXLMRobertaPreTrainedModel(metaclass=DummyObject):
|
||||
_backends = ["flax"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["flax"])
|
||||
|
||||
@@ -2646,6 +2646,13 @@ class TFXLMWithLMHeadModel(metaclass=DummyObject):
|
||||
TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = None
|
||||
|
||||
|
||||
class TFXLMRobertaForCausalLM(metaclass=DummyObject):
|
||||
_backends = ["tf"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["tf"])
|
||||
|
||||
|
||||
class TFXLMRobertaForMaskedLM(metaclass=DummyObject):
|
||||
_backends = ["tf"]
|
||||
|
||||
@@ -2688,6 +2695,13 @@ class TFXLMRobertaModel(metaclass=DummyObject):
|
||||
requires_backends(self, ["tf"])
|
||||
|
||||
|
||||
class TFXLMRobertaPreTrainedModel(metaclass=DummyObject):
|
||||
_backends = ["tf"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["tf"])
|
||||
|
||||
|
||||
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = None
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user