Make TTS automodels importable (#25595)
* Add auto model for spectrogram/waveform * Add doc and install * Add dummy objects * Did I miss anything?
This commit is contained in:
@@ -330,6 +330,14 @@ The following auto classes are available for the following audio tasks.
|
||||
|
||||
[[autodoc]] AutoModelForAudioXVector
|
||||
|
||||
### AutoModelForTextToSpectrogram
|
||||
|
||||
[[autodoc]] AutoModelForTextToSpectrogram
|
||||
|
||||
### AutoModelForTextToWaveform
|
||||
|
||||
[[autodoc]] AutoModelForTextToWaveform
|
||||
|
||||
## Multimodal
|
||||
|
||||
The following auto classes are available for the following multimodal tasks.
|
||||
|
||||
@@ -1135,6 +1135,8 @@ else:
|
||||
"AutoModelForSpeechSeq2Seq",
|
||||
"AutoModelForTableQuestionAnswering",
|
||||
"AutoModelForTextEncoding",
|
||||
"AutoModelForTextToSpectrogram",
|
||||
"AutoModelForTextToWaveform",
|
||||
"AutoModelForTokenClassification",
|
||||
"AutoModelForUniversalSegmentation",
|
||||
"AutoModelForVideoClassification",
|
||||
@@ -5050,6 +5052,8 @@ if TYPE_CHECKING:
|
||||
AutoModelForSpeechSeq2Seq,
|
||||
AutoModelForTableQuestionAnswering,
|
||||
AutoModelForTextEncoding,
|
||||
AutoModelForTextToSpectrogram,
|
||||
AutoModelForTextToWaveform,
|
||||
AutoModelForTokenClassification,
|
||||
AutoModelForUniversalSegmentation,
|
||||
AutoModelForVideoClassification,
|
||||
|
||||
@@ -101,6 +101,8 @@ else:
|
||||
"AutoModelForSequenceClassification",
|
||||
"AutoModelForSpeechSeq2Seq",
|
||||
"AutoModelForTableQuestionAnswering",
|
||||
"AutoModelForTextToSpectrogram",
|
||||
"AutoModelForTextToWaveform",
|
||||
"AutoModelForTokenClassification",
|
||||
"AutoModelForUniversalSegmentation",
|
||||
"AutoModelForVideoClassification",
|
||||
@@ -280,6 +282,8 @@ if TYPE_CHECKING:
|
||||
AutoModelForSpeechSeq2Seq,
|
||||
AutoModelForTableQuestionAnswering,
|
||||
AutoModelForTextEncoding,
|
||||
AutoModelForTextToSpectrogram,
|
||||
AutoModelForTextToWaveform,
|
||||
AutoModelForTokenClassification,
|
||||
AutoModelForUniversalSegmentation,
|
||||
AutoModelForVideoClassification,
|
||||
|
||||
@@ -742,6 +742,20 @@ class AutoModelForTextEncoding(metaclass=DummyObject):
|
||||
requires_backends(self, ["torch"])
|
||||
|
||||
|
||||
class AutoModelForTextToSpectrogram(metaclass=DummyObject):
|
||||
_backends = ["torch"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["torch"])
|
||||
|
||||
|
||||
class AutoModelForTextToWaveform(metaclass=DummyObject):
|
||||
_backends = ["torch"]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
requires_backends(self, ["torch"])
|
||||
|
||||
|
||||
class AutoModelForTokenClassification(metaclass=DummyObject):
|
||||
_backends = ["torch"]
|
||||
|
||||
|
||||
@@ -115,6 +115,7 @@ PIPELINE_TAGS_AND_AUTO_MODELS = [
|
||||
("depth-estimation", "MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES", "AutoModelForDepthEstimation"),
|
||||
("video-classification", "MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING_NAMES", "AutoModelForVideoClassification"),
|
||||
("mask-generation", "MODEL_FOR_MASK_GENERATION_MAPPING_NAMES", "AutoModelForMaskGeneration"),
|
||||
("text-to-audio", "MODEL_FOR_TEXT_TO_SPECTROGRAM_NAMES", "AutoModelForTextToSpectrogram"),
|
||||
("text-to-audio", "MODEL_FOR_TEXT_TO_WAVEFORM_NAMES", "AutoModelForTextToWaveform"),
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user