Black preview (#17217)
* Black preview * Fixup too! * Fix check copies * Use the same version as the CI * Bump black
This commit is contained in:
@@ -535,7 +535,7 @@ class FastAttentionviaLowRankDecomposition(FastAttention):
|
||||
assert key.ndim == value.ndim
|
||||
for ax in axis:
|
||||
if not (query.ndim >= 3 and 1 <= ax < query.ndim - 2):
|
||||
raise ValueError("Attention axis must be between the batch " "axis and the last-two axes.")
|
||||
raise ValueError("Attention axis must be between the batch axis and the last-two axes.")
|
||||
n = key.ndim
|
||||
|
||||
# Constructing projection tensor.
|
||||
|
||||
@@ -98,8 +98,9 @@ class ModelArguments:
|
||||
model_name_or_path: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The model checkpoint for weights initialization."
|
||||
"Don't set if you want to train a model from scratch."
|
||||
"help": (
|
||||
"The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
|
||||
)
|
||||
},
|
||||
)
|
||||
performer: bool = field(
|
||||
@@ -159,8 +160,10 @@ class DataTrainingArguments:
|
||||
max_seq_length: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"help": "The maximum total input sequence length after tokenization. Sequences longer "
|
||||
"than this will be truncated. Default to the max input length of the model."
|
||||
"help": (
|
||||
"The maximum total input sequence length after tokenization. Sequences longer "
|
||||
"than this will be truncated. Default to the max input length of the model."
|
||||
)
|
||||
},
|
||||
)
|
||||
preprocessing_num_workers: Optional[int] = field(
|
||||
@@ -173,8 +176,10 @@ class DataTrainingArguments:
|
||||
pad_to_max_length: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "Whether to pad all samples to `max_seq_length`. "
|
||||
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
|
||||
"help": (
|
||||
"Whether to pad all samples to `max_seq_length`. "
|
||||
"If False, will pad the samples dynamically when batching to the maximum length in the batch."
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user