Make TTS automodels importable (#25595)
* Add auto model for spectrogram/waveform * Add doc and install * Add dummy objects * Did I miss anything?
This commit is contained in:
@@ -330,6 +330,14 @@ The following auto classes are available for the following audio tasks.
|
|||||||
|
|
||||||
[[autodoc]] AutoModelForAudioXVector
|
[[autodoc]] AutoModelForAudioXVector
|
||||||
|
|
||||||
|
### AutoModelForTextToSpectrogram
|
||||||
|
|
||||||
|
[[autodoc]] AutoModelForTextToSpectrogram
|
||||||
|
|
||||||
|
### AutoModelForTextToWaveform
|
||||||
|
|
||||||
|
[[autodoc]] AutoModelForTextToWaveform
|
||||||
|
|
||||||
## Multimodal
|
## Multimodal
|
||||||
|
|
||||||
The following auto classes are available for the following multimodal tasks.
|
The following auto classes are available for the following multimodal tasks.
|
||||||
|
|||||||
@@ -1135,6 +1135,8 @@ else:
|
|||||||
"AutoModelForSpeechSeq2Seq",
|
"AutoModelForSpeechSeq2Seq",
|
||||||
"AutoModelForTableQuestionAnswering",
|
"AutoModelForTableQuestionAnswering",
|
||||||
"AutoModelForTextEncoding",
|
"AutoModelForTextEncoding",
|
||||||
|
"AutoModelForTextToSpectrogram",
|
||||||
|
"AutoModelForTextToWaveform",
|
||||||
"AutoModelForTokenClassification",
|
"AutoModelForTokenClassification",
|
||||||
"AutoModelForUniversalSegmentation",
|
"AutoModelForUniversalSegmentation",
|
||||||
"AutoModelForVideoClassification",
|
"AutoModelForVideoClassification",
|
||||||
@@ -5050,6 +5052,8 @@ if TYPE_CHECKING:
|
|||||||
AutoModelForSpeechSeq2Seq,
|
AutoModelForSpeechSeq2Seq,
|
||||||
AutoModelForTableQuestionAnswering,
|
AutoModelForTableQuestionAnswering,
|
||||||
AutoModelForTextEncoding,
|
AutoModelForTextEncoding,
|
||||||
|
AutoModelForTextToSpectrogram,
|
||||||
|
AutoModelForTextToWaveform,
|
||||||
AutoModelForTokenClassification,
|
AutoModelForTokenClassification,
|
||||||
AutoModelForUniversalSegmentation,
|
AutoModelForUniversalSegmentation,
|
||||||
AutoModelForVideoClassification,
|
AutoModelForVideoClassification,
|
||||||
|
|||||||
@@ -101,6 +101,8 @@ else:
|
|||||||
"AutoModelForSequenceClassification",
|
"AutoModelForSequenceClassification",
|
||||||
"AutoModelForSpeechSeq2Seq",
|
"AutoModelForSpeechSeq2Seq",
|
||||||
"AutoModelForTableQuestionAnswering",
|
"AutoModelForTableQuestionAnswering",
|
||||||
|
"AutoModelForTextToSpectrogram",
|
||||||
|
"AutoModelForTextToWaveform",
|
||||||
"AutoModelForTokenClassification",
|
"AutoModelForTokenClassification",
|
||||||
"AutoModelForUniversalSegmentation",
|
"AutoModelForUniversalSegmentation",
|
||||||
"AutoModelForVideoClassification",
|
"AutoModelForVideoClassification",
|
||||||
@@ -280,6 +282,8 @@ if TYPE_CHECKING:
|
|||||||
AutoModelForSpeechSeq2Seq,
|
AutoModelForSpeechSeq2Seq,
|
||||||
AutoModelForTableQuestionAnswering,
|
AutoModelForTableQuestionAnswering,
|
||||||
AutoModelForTextEncoding,
|
AutoModelForTextEncoding,
|
||||||
|
AutoModelForTextToSpectrogram,
|
||||||
|
AutoModelForTextToWaveform,
|
||||||
AutoModelForTokenClassification,
|
AutoModelForTokenClassification,
|
||||||
AutoModelForUniversalSegmentation,
|
AutoModelForUniversalSegmentation,
|
||||||
AutoModelForVideoClassification,
|
AutoModelForVideoClassification,
|
||||||
|
|||||||
@@ -742,6 +742,20 @@ class AutoModelForTextEncoding(metaclass=DummyObject):
|
|||||||
requires_backends(self, ["torch"])
|
requires_backends(self, ["torch"])
|
||||||
|
|
||||||
|
|
||||||
|
class AutoModelForTextToSpectrogram(metaclass=DummyObject):
|
||||||
|
_backends = ["torch"]
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
requires_backends(self, ["torch"])
|
||||||
|
|
||||||
|
|
||||||
|
class AutoModelForTextToWaveform(metaclass=DummyObject):
|
||||||
|
_backends = ["torch"]
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
requires_backends(self, ["torch"])
|
||||||
|
|
||||||
|
|
||||||
class AutoModelForTokenClassification(metaclass=DummyObject):
|
class AutoModelForTokenClassification(metaclass=DummyObject):
|
||||||
_backends = ["torch"]
|
_backends = ["torch"]
|
||||||
|
|
||||||
|
|||||||
@@ -115,6 +115,7 @@ PIPELINE_TAGS_AND_AUTO_MODELS = [
|
|||||||
("depth-estimation", "MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES", "AutoModelForDepthEstimation"),
|
("depth-estimation", "MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES", "AutoModelForDepthEstimation"),
|
||||||
("video-classification", "MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES", "AutoModelForVideoClassification"),
|
("video-classification", "MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES", "AutoModelForVideoClassification"),
|
||||||
("mask-generation", "MODEL_FOR_MASK_GENERATION_MAPPING_NAMES", "AutoModelForMaskGeneration"),
|
("mask-generation", "MODEL_FOR_MASK_GENERATION_MAPPING_NAMES", "AutoModelForMaskGeneration"),
|
||||||
|
("text-to-audio", "MODEL_FOR_TEXT_TO_SPECTROGRAM_NAMES", "AutoModelForTextToSpectrogram"),
|
||||||
("text-to-audio", "MODEL_FOR_TEXT_TO_WAVEFORM_NAMES", "AutoModelForTextToWaveform"),
|
("text-to-audio", "MODEL_FOR_TEXT_TO_WAVEFORM_NAMES", "AutoModelForTextToWaveform"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user