No more Tuple, List, Dict (#38797)

* No more Tuple, List, Dict

* make fixup

* More style fixes

* Docstring fixes with regex replacement

* Trigger tests

* Redo fixes after rebase

* Fix copies

* [test all]

* update

* [test all]

* update

* [test all]

* make style after rebase

* Patch the hf_argparser test

* Patch the hf_argparser test

* style fixes

* style fixes

* style fixes

* Fix docstrings in Cohere test

* [test all]

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
This commit is contained in:
Matt
2025-06-17 19:37:18 +01:00
committed by GitHub
parent a396f4324b
commit 508a704055
1291 changed files with 14906 additions and 14941 deletions

View File

@@ -216,12 +216,12 @@ class Olmo2Attention(OlmoAttention):
def forward(
self,
hidden_states: torch.Tensor,
position_embeddings: Tuple[torch.Tensor, torch.Tensor],
position_embeddings: tuple[torch.Tensor, torch.Tensor],
attention_mask: Optional[torch.Tensor],
past_key_value: Optional[Cache] = None,
cache_position: Optional[torch.LongTensor] = None,
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[tuple[torch.Tensor]]]:
input_shape = hidden_states.shape[:-1]
hidden_shape = (*input_shape, -1, self.head_dim)
@@ -294,9 +294,9 @@ class Olmo2DecoderLayer(OlmoDecoderLayer):
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
) -> tuple[torch.FloatTensor, Optional[tuple[torch.FloatTensor, torch.FloatTensor]]]:
residual = hidden_states
# Self Attention
@@ -494,7 +494,7 @@ class LlamaForCausalLM(nn.Module):
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None,
past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
@@ -520,7 +520,7 @@ class NewModelForCausalLM(LlamaForCausalLM): | class LlamaForCausalLM(nn.M
| input_ids: torch.LongTensor = None,
| attention_mask: Optional[torch.Tensor] = None,
| position_ids: Optional[torch.LongTensor] = None,
| past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = |None,
| past_key_values: Optional[Union[Cache, list[torch.FloatTensor]]] = |None,
| inputs_embeds: Optional[torch.FloatTensor] = None,
| labels: Optional[torch.LongTensor] = None,
| use_cache: Optional[bool] = None,