v4.55.4

Revert "Fix GPT-OSS swiglu_limit not passed in for MXFP4 #40197 "
The cherry-picked commit does not match the changes nor the PR This reverts commit e75d67ec39.
2025-08-22 14:39:20 +02:00 · 2025-08-22 11:21:18 +02:00 · 2025-08-22 11:20:23 +02:00 · 2025-08-21 11:03:16 +02:00 · 2025-08-18 14:46:54 +02:00 · 2025-08-18 14:45:23 +02:00
6201 changed files with 31469 additions and 149527 deletions
--- a/._.DS_Store
+++ b/._.DS_Store
--- a/._.circleci
+++ b/._.circleci
--- a/._.git
+++ b/._.git
--- a/._.gitattributes
+++ b/._.gitattributes
--- a/._.github
+++ b/._.github
--- a/._.gitignore
+++ b/._.gitignore
--- a/._AGENTS.md
+++ b/._AGENTS.md
--- a/._CITATION.cff
+++ b/._CITATION.cff
--- a/._CODE_OF_CONDUCT.md
+++ b/._CODE_OF_CONDUCT.md
--- a/._ISSUES.md
+++ b/._ISSUES.md
--- a/._LICENSE
+++ b/._LICENSE
--- a/._awesome-transformers.md
+++ b/._awesome-transformers.md
--- a/._benchmark
+++ b/._benchmark
--- a/._docker
+++ b/._docker
--- a/._docs
+++ b/._docs
--- a/._examples
+++ b/._examples
--- a/._i18n
+++ b/._i18n
--- a/._notebooks
+++ b/._notebooks
--- a/._scripts
+++ b/._scripts
--- a/._src
+++ b/._src
--- a/._templates
+++ b/._templates
--- a/._tests
+++ b/._tests
--- a/._utils
+++ b/._utils
--- a/.circleci/._TROUBLESHOOT.md
+++ b/.circleci/._TROUBLESHOOT.md
--- a/.circleci/._config.yml
+++ b/.circleci/._config.yml
--- a/.circleci/._parse_test_outputs.py
+++ b/.circleci/._parse_test_outputs.py
--- a/.circleci/create_circleci_config.py
+++ b/.circleci/create_circleci_config.py
@@ -109,9 +109,7 @@ class CircleCIJob:
                self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
            print(f"Using {self.docker_image} docker image")
        if self.install_steps is None:
-            self.install_steps = ["uv pip install ."]
-        # Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded`
-        self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh")
+            self.install_steps = ["uv venv && uv pip install ."]
        if self.pytest_options is None:
            self.pytest_options = {}
        if isinstance(self.tests_to_run, str):
@@ -215,7 +213,7 @@ generate_job = CircleCIJob(
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    # networkx==3.3 (after #36957) cause some issues
    # TODO: remove this once it works directly
-    install_steps=["uv pip install ."],
+    install_steps=["uv venv && uv pip install ."],
    marker="generate",
    parallelism=6,
 )
@@ -252,7 +250,7 @@ examples_torch_job = CircleCIJob(
    additional_env={"OMP_NUM_THREADS": 8},
    docker_image=[{"image":"huggingface/transformers-examples-torch"}],
    # TODO @ArthurZucker remove this once docker is easier to build
-    install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
+    install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
    pytest_num_workers=4,
 )

@@ -261,7 +259,7 @@ hub_job = CircleCIJob(
    additional_env={"HUGGINGFACE_CO_STAGING": True},
    docker_image=[{"image":"huggingface/transformers-torch-light"}],
    install_steps=[
-        'uv pip install .',
+        'uv venv && uv pip install .',
        'git config --global user.email "ci@dummy.com"',
        'git config --global user.name "ci"',
    ],
@@ -275,6 +273,7 @@ onnx_job = CircleCIJob(
    "onnx",
    docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
    install_steps=[
+        "uv venv",
        "uv pip install .[testing,sentencepiece,onnxruntime,vision,rjieba]",
    ],
    pytest_options={"k onnx": None},
@@ -304,7 +303,7 @@ non_model_job = CircleCIJob(
    docker_image=[{"image": "huggingface/transformers-torch-light"}],
    # networkx==3.3 (after #36957) cause some issues
    # TODO: remove this once it works directly
-    install_steps=["uv pip install .[serving]"],
+    install_steps=["uv venv && uv pip install .[serving]"],
    marker="not generate",
    parallelism=6,
 )
@@ -322,7 +321,7 @@ doc_test_job = CircleCIJob(
    additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
    install_steps=[
        # Add an empty file to keep the test step running correctly even no file is selected to be tested.
-        "uv pip install .",
+        "uv venv && pip install .",
        "touch dummy.py",
        command,
        "cat pr_documentation_tests_temp.txt",
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,13 +1,4 @@
 *.py	eol=lf
 *.rst	eol=lf
 *.md	eol=lf
-*.mdx   eol=lf
-*.model filter=lfs diff=lfs merge=lfs -text
-*.png filter=lfs diff=lfs merge=lfs -text
-*.jpg filter=lfs diff=lfs merge=lfs -text
-*.jpeg filter=lfs diff=lfs merge=lfs -text
-*.gif filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
+*.mdx   eol=lf
--- a/.github/._ISSUE_TEMPLATE
+++ b/.github/._ISSUE_TEMPLATE
--- a/.github/._PULL_REQUEST_TEMPLATE.md
+++ b/.github/._PULL_REQUEST_TEMPLATE.md
--- a/.github/._conda
+++ b/.github/._conda
--- a/.github/._scripts
+++ b/.github/._scripts
--- a/.github/._workflows
+++ b/.github/._workflows
--- a/.github/ISSUE_TEMPLATE/._bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/._bug-report.yml
--- a/.github/ISSUE_TEMPLATE/._config.yml
+++ b/.github/ISSUE_TEMPLATE/._config.yml
--- a/.github/ISSUE_TEMPLATE/._feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/._feature-request.yml
--- a/.github/ISSUE_TEMPLATE/._i18n.md
+++ b/.github/ISSUE_TEMPLATE/._i18n.md
--- a/.github/ISSUE_TEMPLATE/._migration.yml
+++ b/.github/ISSUE_TEMPLATE/._migration.yml
--- a/.github/ISSUE_TEMPLATE/._new-model-addition.yml
+++ b/.github/ISSUE_TEMPLATE/._new-model-addition.yml
--- a/.github/conda/._build.sh
+++ b/.github/conda/._build.sh
--- a/.github/conda/._meta.yaml
+++ b/.github/conda/._meta.yaml
--- a/.github/scripts/._assign_reviewers.py
+++ b/.github/scripts/._assign_reviewers.py
--- a/.github/scripts/._codeowners_for_review_action
+++ b/.github/scripts/._codeowners_for_review_action
--- a/.github/workflows/._TROUBLESHOOT.md
+++ b/.github/workflows/._TROUBLESHOOT.md
--- a/.github/workflows/._add-model-like.yml
+++ b/.github/workflows/._add-model-like.yml
--- a/.github/workflows/._assign-reviewers.yml
+++ b/.github/workflows/._assign-reviewers.yml
--- a/.github/workflows/._build-ci-docker-images.yml
+++ b/.github/workflows/._build-ci-docker-images.yml
--- a/.github/workflows/._build-docker-images.yml
+++ b/.github/workflows/._build-docker-images.yml
--- a/.github/workflows/._build-nightly-ci-docker-images.yml
+++ b/.github/workflows/._build-nightly-ci-docker-images.yml
--- a/.github/workflows/._build-past-ci-docker-images.yml
+++ b/.github/workflows/._build-past-ci-docker-images.yml
--- a/.github/workflows/._check_tiny_models.yml
+++ b/.github/workflows/._check_tiny_models.yml
--- a/.github/workflows/._get-pr-info.yml
+++ b/.github/workflows/._get-pr-info.yml
--- a/.github/workflows/._get-pr-number.yml
+++ b/.github/workflows/._get-pr-number.yml
--- a/.github/workflows/._model_jobs_intel_gaudi.yml
+++ b/.github/workflows/._model_jobs_intel_gaudi.yml
--- a/.github/workflows/._new_model_pr_merged_notification.yml
+++ b/.github/workflows/._new_model_pr_merged_notification.yml
--- a/.github/workflows/._pr-style-bot.yml
+++ b/.github/workflows/._pr-style-bot.yml
--- a/.github/workflows/._push-important-models.yml
+++ b/.github/workflows/._push-important-models.yml
--- a/.github/workflows/._release-conda.yml
+++ b/.github/workflows/._release-conda.yml
--- a/.github/workflows/._self-nightly-past-ci-caller.yml
+++ b/.github/workflows/._self-nightly-past-ci-caller.yml
--- a/.github/workflows/._self-past-caller.yml
+++ b/.github/workflows/._self-past-caller.yml
--- a/.github/workflows/._self-push-amd-mi210-caller.yml
+++ b/.github/workflows/._self-push-amd-mi210-caller.yml
--- a/.github/workflows/._self-push-amd-mi250-caller.yml
+++ b/.github/workflows/._self-push-amd-mi250-caller.yml
--- a/.github/workflows/._self-push-amd.yml
+++ b/.github/workflows/._self-push-amd.yml
--- a/.github/workflows/._self-push-caller.yml
+++ b/.github/workflows/._self-push-caller.yml
--- a/.github/workflows/._self-scheduled-amd-caller.yml
+++ b/.github/workflows/._self-scheduled-amd-caller.yml
--- a/.github/workflows/._self-scheduled-amd-mi250-caller.yml
+++ b/.github/workflows/._self-scheduled-amd-mi250-caller.yml
--- a/.github/workflows/._self-scheduled-intel-gaudi.yml
+++ b/.github/workflows/._self-scheduled-intel-gaudi.yml
--- a/.github/workflows/._self-scheduled-intel-gaudi3-caller.yml
+++ b/.github/workflows/._self-scheduled-intel-gaudi3-caller.yml
--- a/.github/workflows/._ssh-runner.yml
+++ b/.github/workflows/._ssh-runner.yml
--- a/.github/workflows/._stale.yml
+++ b/.github/workflows/._stale.yml
--- a/.github/workflows/._trufflehog.yml
+++ b/.github/workflows/._trufflehog.yml
--- a/.github/workflows/._update_metdata.yml
+++ b/.github/workflows/._update_metdata.yml
--- a/.github/workflows/._upload_pr_documentation.yml
+++ b/.github/workflows/._upload_pr_documentation.yml
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -48,7 +48,7 @@ jobs:

      - name: Run database init script
        run: |
-          psql -f benchmark/utils/init_db.sql
+          psql -f benchmark/init_db.sql
        env:
          PGDATABASE: metrics
          PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
--- a/.github/workflows/check_failed_tests.yml
+++ b/.github/workflows/check_failed_tests.yml
@@ -21,9 +21,6 @@ on:
      report_repo_id:
        required: true
        type: string
-      commit_sha:
-        required: false
-        type: string


 env:
@@ -90,7 +87,7 @@ jobs:
      - name: Update clone
        working-directory: /transformers
        if: ${{ env.process == 'true' }}
-        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
+        run: git fetch && git checkout ${{ github.sha }}

      - name: Get target commit
        working-directory: /transformers/utils
--- a/.github/workflows/collated-reports.yml
+++ b/.github/workflows/collated-reports.yml
@@ -1,49 +0,0 @@
-name: CI collated reports
-
-on:
-  workflow_call:
-    inputs:
-      job:
-        required: true
-        type: string
-      report_repo_id:
-        required: true
-        type: string
-      machine_type:
-        required: true
-        type: string
-      gpu_name:
-        description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive.
-        required: true
-        type: string
-
-jobs:
-  collated_reports:
-    name: Collated reports
-    runs-on: ubuntu-22.04
-    if: always()
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/download-artifact@v4
-
-      - name: Collated reports
-        shell: bash
-        env:
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          CI_SHA: ${{ github.sha }}
-          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
-        run: |
-          pip install huggingface_hub
-          python3 utils/collated_reports.py                  \
-            --path .                                         \
-            --machine-type ${{ inputs.machine_type }}        \
-            --commit-hash ${{ env.CI_SHA }}                  \
-            --job ${{ inputs.job }}                          \
-            --report-repo-id ${{ inputs.report_repo_id }}    \
-            --gpu-name ${{ inputs.gpu_name }}
-
-      - name: Upload collated reports
-        uses: actions/upload-artifact@v4
-        with:
-          name: collated_reports_${{ env.CI_SHA }}.json
-          path: collated_reports_${{ env.CI_SHA }}.json
--- a/.github/workflows/model_jobs.yml
+++ b/.github/workflows/model_jobs.yml
@@ -18,9 +18,6 @@ on:
      docker:
        required: true
        type: string
-      commit_sha:
-        required: false
-        type: string
      report_name_prefix:
        required: false
        default: run_models_gpu
@@ -73,7 +70,7 @@ jobs:

      - name: Update clone
        working-directory: /transformers
-        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
+        run: git fetch && git checkout ${{ github.sha }}

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
--- a/.github/workflows/self-nightly-caller.yml
+++ b/.github/workflows/self-nightly-caller.yml
@@ -1,54 +1,43 @@
-name: Nvidia CI with nightly torch
+name: Self-hosted runner (nightly-ci)
+

 on:
  repository_dispatch:
-  # triggered when the daily scheduled Nvidia CI is completed.
-  # This way, we can compare the results more easily.
-  workflow_run:
-    workflows: ["Nvidia CI"]
-    branches: ["main"]
-    types: [completed]
+  schedule:
+    - cron: "17 2 * * *"
  push:
    branches:
-      - run_ci_with_nightly_torch*
-
-# Used for `push` to easily modify the target workflow runs to compare against
-env:
-    prev_workflow_run_id: ""
-    other_workflow_run_id: ""
-
+      - run_nightly_ci*

 jobs:
-  build_nightly_torch_ci_images:
-    name: Build CI Docker Images with nightly torch
+  build_nightly_ci_images:
+    name: Build Nightly CI Docker Images
+    if: (github.event_name == 'schedule') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_nightly_ci'))
    uses: ./.github/workflows/build-nightly-ci-docker-images.yml
    secrets: inherit

-  setup:
-    name: Setup
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Setup
-        run: |
-          mkdir "setup_values"
-          echo "${{ inputs.prev_workflow_run_id || env.prev_workflow_run_id }}" > "setup_values/prev_workflow_run_id.txt"
-          echo "${{ inputs.other_workflow_run_id || env.other_workflow_run_id }}" > "setup_values/other_workflow_run_id.txt"
-
-      - name: Upload artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: setup_values
-          path: setup_values
-
  model-ci:
    name: Model CI
-    needs: build_nightly_torch_ci_images
+    needs: [build_nightly_ci_images]
    uses: ./.github/workflows/self-scheduled.yml
    with:
      job: run_models_gpu
      slack_report_channel: "#transformers-ci-past-future"
+      runner: ci
      docker: huggingface/transformers-all-latest-torch-nightly-gpu
      ci_event: Nightly CI
-      report_repo_id: hf-internal-testing/transformers_daily_ci_with_torch_nightly
-      commit_sha: ${{ github.event.workflow_run.head_sha || github.sha }}
+    secrets: inherit
+
+  deepspeed-ci:
+    name: DeepSpeed CI
+    needs: [build_nightly_ci_images]
+    uses: ./.github/workflows/self-scheduled.yml
+    with:
+      job: run_torch_cuda_extensions_gpu
+      slack_report_channel: "#transformers-ci-past-future"
+      runner: ci
+      # test deepspeed nightly build with the latest release torch
+      docker: huggingface/transformers-pytorch-deepspeed-latest-gpu
+      ci_event: Nightly CI
+      working-directory-prefix: /workspace
    secrets: inherit
--- a/.github/workflows/self-push-amd-mi300-caller.yml
+++ b/.github/workflows/self-push-amd-mi300-caller.yml
@@ -0,0 +1,25 @@
+name: Self-hosted runner (AMD mi300 CI caller)
+
+on:
+  #workflow_run:
+  #  workflows: ["Self-hosted runner (push-caller)"]
+  #  branches: ["main"]
+  #  types: [completed]
+  push:
+    branches:
+      - run_amd_push_ci_caller*
+    paths:
+      - "src/**"
+      - "tests/**"
+      - ".github/**"
+      - "templates/**"
+      - "utils/**"
+
+jobs:
+  run_amd_ci:
+    name: AMD mi300
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
+    uses: ./.github/workflows/self-push-amd.yml
+    with:
+      gpu_flavor: mi300
+    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi300-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi300-caller.yml
@@ -1,8 +1,8 @@
-name: Self-hosted runner scale set (AMD mi355 scheduled CI caller)
+name: Self-hosted runner scale set (AMD mi300 scheduled CI caller)

 # Note: For every job in this workflow, the name of the runner scale set is finalized in the runner yaml i.e. huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled_arc_scale_set.yaml
-# For example, 1gpu : amd-mi355-ci-1gpu
-#              2gpu : amd-mi355-ci-2gpu
+# For example, 1gpu scale set: amd-mi300-ci-1gpu
+#              2gpu scale set: amd-mi300-ci-2gpu

 on:
  workflow_run:
@@ -20,9 +20,9 @@ jobs:
    with:
      job: run_models_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
+      runner_scale_set: amd-mi300-ci
      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi355
+      ci_event: Scheduled CI (AMD) - mi300
      report_repo_id: optimum-amd/transformers_daily_ci
    secrets: inherit

@@ -32,9 +32,9 @@ jobs:
    with:
      job: run_pipelines_torch_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
+      runner_scale_set: amd-mi300-ci
      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi355
+      ci_event: Scheduled CI (AMD) - mi300
      report_repo_id: optimum-amd/transformers_daily_ci
    secrets: inherit

@@ -44,9 +44,9 @@ jobs:
    with:
      job: run_examples_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
+      runner_scale_set: amd-mi300-ci
      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi355
+      ci_event: Scheduled CI (AMD) - mi300
      report_repo_id: optimum-amd/transformers_daily_ci
    secrets: inherit

@@ -56,8 +56,8 @@ jobs:
    with:
      job: run_torch_cuda_extensions_gpu
      slack_report_channel: "#amd-hf-ci"
-      runner_scale_set: amd-mi355-ci
+      runner_scale_set: amd-mi300-ci
      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi355
+      ci_event: Scheduled CI (AMD) - mi300
      report_repo_id: optimum-amd/transformers_daily_ci
    secrets: inherit
--- a/.github/workflows/self-scheduled-amd-mi325-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi325-caller.yml
@@ -24,7 +24,6 @@ jobs:
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi325
      report_repo_id: optimum-amd/transformers_daily_ci
-      env_file: /etc/podinfo/gha-gpu-isolation-settings
    secrets: inherit

  torch-pipeline:
@@ -37,7 +36,6 @@ jobs:
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi325
      report_repo_id: optimum-amd/transformers_daily_ci
-      env_file: /etc/podinfo/gha-gpu-isolation-settings
    secrets: inherit

  example-ci:
@@ -50,7 +48,6 @@ jobs:
      docker: huggingface/transformers-pytorch-amd-gpu
      ci_event: Scheduled CI (AMD) - mi325
      report_repo_id: optimum-amd/transformers_daily_ci
-      env_file: /etc/podinfo/gha-gpu-isolation-settings
    secrets: inherit

  deepspeed-ci:
@@ -63,5 +60,4 @@ jobs:
      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
      ci_event: Scheduled CI (AMD) - mi325
      report_repo_id: optimum-amd/transformers_daily_ci
-      env_file: /etc/podinfo/gha-gpu-isolation-settings
    secrets: inherit
--- a/.github/workflows/self-scheduled-caller.yml
+++ b/.github/workflows/self-scheduled-caller.yml
@@ -1,4 +1,5 @@
-name: Nvidia CI
+name: Self-hosted runner (scheduled)
+

 on:
  repository_dispatch:
@@ -6,7 +7,7 @@ on:
    - cron: "17 2 * * *"
  push:
    branches:
-      - run_nvidia_ci*
+      - run_scheduled_ci*
  workflow_dispatch:
    inputs:
      prev_workflow_run_id:
@@ -53,7 +54,6 @@ jobs:
      docker: huggingface/transformers-all-latest-gpu
      ci_event: Daily CI
      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
    secrets: inherit

  torch-pipeline:
@@ -65,7 +65,6 @@ jobs:
      docker: huggingface/transformers-pytorch-gpu
      ci_event: Daily CI
      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
    secrets: inherit

  example-ci:
@@ -77,7 +76,6 @@ jobs:
      docker: huggingface/transformers-all-latest-gpu
      ci_event: Daily CI
      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
    secrets: inherit

  trainer-fsdp-ci:
@@ -89,7 +87,6 @@ jobs:
      docker: huggingface/transformers-all-latest-gpu
      ci_event: Daily CI
      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
    secrets: inherit

  deepspeed-ci:
@@ -102,7 +99,6 @@ jobs:
      ci_event: Daily CI
      working-directory-prefix: /workspace
      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
    secrets: inherit

  quantization-ci:
@@ -114,5 +110,4 @@ jobs:
      docker: huggingface/transformers-quantization-latest-gpu
      ci_event: Daily CI
      report_repo_id: hf-internal-testing/transformers_daily_ci
-      commit_sha: ${{ github.sha }}
    secrets: inherit
--- a/.github/workflows/self-scheduled.yml
+++ b/.github/workflows/self-scheduled.yml
@@ -1,4 +1,4 @@
-name: Nvidia CI (job definitions)
+name: Self-hosted runner (scheduled)

 # Note that each job's dependencies go into a corresponding docker file.
 #
@@ -28,9 +28,6 @@ on:
      report_repo_id:
        required: true
        type: string
-      commit_sha:
-        required: false
-        type: string


 env:
@@ -49,8 +46,8 @@ env:

 jobs:
  setup:
-    name: Setup
    if: contains(fromJSON('["run_models_gpu", "run_trainer_and_fsdp_gpu", "run_quantization_torch_gpu"]'), inputs.job)
+    name: Setup
    strategy:
      matrix:
        machine_type: [aws-g5-4xlarge-cache, aws-g5-12xlarge-cache]
@@ -122,7 +119,6 @@ jobs:
      slice_id: ${{ matrix.slice_id }}
      runner_map: ${{ needs.setup.outputs.runner_map }}
      docker: ${{ inputs.docker }}
-      commit_sha: ${{ inputs.commit_sha || github.sha }}
    secrets: inherit

  run_trainer_and_fsdp_gpu:
@@ -141,7 +137,6 @@ jobs:
      slice_id: ${{ matrix.slice_id }}
      runner_map: ${{ needs.setup.outputs.runner_map }}
      docker: ${{ inputs.docker }}
-      commit_sha: ${{ inputs.commit_sha || github.sha }}
      report_name_prefix: run_trainer_and_fsdp_gpu
    secrets: inherit

@@ -160,7 +155,7 @@ jobs:
    steps:
      - name: Update clone
        working-directory: /transformers
-        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
+        run: git fetch && git checkout ${{ github.sha }}

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
@@ -228,7 +223,7 @@ jobs:
    steps:
      - name: Update clone
        working-directory: /transformers
-        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
+        run: git fetch && git checkout ${{ github.sha }}

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
@@ -297,7 +292,7 @@ jobs:
    steps:
      - name: Update clone
        working-directory: ${{ inputs.working-directory-prefix }}/transformers
-        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
+        run: git fetch && git checkout ${{ github.sha }}

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: ${{ inputs.working-directory-prefix }}/transformers
@@ -405,7 +400,7 @@ jobs:

      - name: Update clone
        working-directory: /transformers
-        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
+        run: git fetch && git checkout ${{ github.sha }}

      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
        working-directory: /transformers
@@ -469,7 +464,6 @@ jobs:
        uses: actions/checkout@v4
        with:
          fetch-depth: 2
-          ref: ${{ inputs.commit_sha || github.sha }}

      - name: Install transformers
        run: pip install transformers
@@ -524,7 +518,6 @@ jobs:
      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
      ci_event: ${{ inputs.ci_event }}
      report_repo_id: ${{ inputs.report_repo_id }}
-      commit_sha: ${{ inputs.commit_sha || github.sha }}

    secrets: inherit

@@ -535,7 +528,7 @@ jobs:
    uses: ./.github/workflows/check_failed_tests.yml
    with:
      docker: ${{ inputs.docker }}
-      start_sha: ${{ inputs.commit_sha || github.sha }}
+      start_sha: ${{ github.sha }}
      job: ${{ inputs.job }}
      slack_report_channel: ${{ inputs.slack_report_channel }}
      ci_event: ${{ inputs.ci_event }}
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@@ -24,10 +24,6 @@ on:
      report_repo_id:
        required: true
        type: string
-      commit_sha:
-        required: false
-        type: string
-

 env:
  TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@@ -36,7 +32,7 @@ jobs:
  send_results:
    name: Send results to webhook
    runs-on: ubuntu-22.04
-    if: always() && !cancelled()
+    if: always()
    steps:
      - name: Preliminary job status
        shell: bash
@@ -45,10 +41,6 @@ jobs:
          echo "Setup status: ${{ inputs.setup_status }}"

      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 2
-          ref: ${{ inputs.commit_sha || github.sha }}
-
      - uses: actions/download-artifact@v4

      - name: Prepare some setup values
@@ -75,7 +67,7 @@ jobs:
          SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
          CI_EVENT: ${{ inputs.ci_event }}
-          CI_SHA: ${{ inputs.commit_sha || github.sha }}
+          CI_SHA: ${{ github.sha }}
          CI_TEST_JOB: ${{ inputs.job }}
          SETUP_STATUS: ${{ inputs.setup_status }}
          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -68,7 +68,8 @@ already reported** (use the search bar on GitHub under Issues). Your issue shoul

 Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:

-* Your **OS type and version** and **Python**, and **PyTorch** versions when applicable.
+* Your **OS type and version** and **Python**, **PyTorch** and
+  **TensorFlow** versions when applicable.
 * A short, self-contained, code snippet that allows us to reproduce the bug in
  less than 30s.
 * The *full* traceback if an exception is raised.
@@ -164,7 +165,8 @@ You'll need **[Python 3.9](https://github.com/huggingface/transformers/blob/main
   mode with the `-e` flag.

   Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
-   failure with this command. If that's the case make sure to install Pytorch then do:
+   failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
+   (PyTorch, TensorFlow and/or Flax) then do:

   ```bash
   pip install -e ".[quality]"
--- a/1
+++ b/1
@@ -52,7 +52,6 @@ repo-consistency:
 	python utils/check_doctest_list.py
 	python utils/update_metadata.py --check-only
 	python utils/check_docstrings.py
-	python utils/add_dates.py

 # this target runs checks on all files

--- a/README.md
+++ b/README.md
@@ -147,7 +147,7 @@ chat = [
    {"role": "user", "content": "Hey, can you tell me any fun things to do in New York?"}
 ]

-pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", dtype=torch.bfloat16, device_map="auto")
+pipeline = pipeline(task="text-generation", model="meta-llama/Meta-Llama-3-8B-Instruct", torch_dtype=torch.bfloat16, device_map="auto")
 response = pipeline(chat, max_new_tokens=512)
 print(response[0]["generated_text"][-1]["content"])
 ```
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -14,7 +14,7 @@ Models uploaded on the Hugging Face Hub come in different formats. We heavily re
 models in the [`safetensors`](https://github.com/huggingface/safetensors) format (which is the default prioritized
 by the transformers library), as developed specifically to prevent arbitrary code execution on your system.

-To avoid loading models from unsafe formats (e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.
+To avoid loading models from unsafe formats(e.g. [pickle](https://docs.python.org/3/library/pickle.html), you should use the `use_safetensors` parameter. If doing so, in the event that no .safetensors file is present, transformers will error when loading the model.

 ### Remote code

--- a/benchmark/._README.md
+++ b/benchmark/._README.md
--- a/benchmark/._init.py
+++ b/benchmark/._init.py
--- a/benchmark/._benchmark.py
+++ b/benchmark/._benchmark.py
--- a/benchmark/._config
+++ b/benchmark/._config
--- a/benchmark/._default.yml
+++ b/benchmark/._default.yml
--- a/benchmark/._grafana_dashboard.json
+++ b/benchmark/._grafana_dashboard.json
--- a/benchmark/._grafana_datasource.yaml
+++ b/benchmark/._grafana_datasource.yaml
--- a/benchmark/._optimum_benchmark_wrapper.py
+++ b/benchmark/._optimum_benchmark_wrapper.py
--- a/benchmark/.gitignore
+++ b/benchmark/.gitignore
@@ -1 +0,0 @@
-benchmark_results/
--- a/benchmark/benches/llama.py
+++ b/benchmark/benches/llama.py
@@ -1,345 +0,0 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from logging import Logger
-import os
-from threading import Event, Thread
-from time import perf_counter, sleep
-from typing import Optional
-import sys
-
-# Add the parent directory to Python path to import benchmarks_entrypoint
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from benchmarks_entrypoint import MetricsRecorder
-
-import gpustat
-import psutil
-import psycopg2
-
-# Optional heavy ML dependencies - only required when actually running the benchmark
-try:
-    import torch
-    from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, StaticCache
-    TRANSFORMERS_AVAILABLE = True
-except ImportError:
-    TRANSFORMERS_AVAILABLE = False
-    torch = None
-    AutoModelForCausalLM = None
-    AutoTokenizer = None
-    GenerationConfig = None
-    StaticCache = None
-
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-os.environ["TOKENIZERS_PARALLELISM"] = "1"
-
-# Only set torch precision if torch is available
-if TRANSFORMERS_AVAILABLE:
-    torch.set_float32_matmul_precision("high")
-
-
-def collect_metrics(benchmark_id, continue_metric_collection, metrics_recorder):
-    p = psutil.Process(os.getpid())
-    while not continue_metric_collection.is_set():
-        with p.oneshot():
-            cpu_util = p.cpu_percent()
-            mem_megabytes = p.memory_info().rss / (1024 * 1024)
-        gpu_stats = gpustat.GPUStatCollection.new_query()
-        gpu_util = gpu_stats[0]["utilization.gpu"]
-        gpu_mem_megabytes = gpu_stats[0]["memory.used"]
-        metrics_recorder.collect_device_measurements(
-            benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes
-        )
-        sleep(0.01)
-
-
-def run_benchmark(
-    logger: Logger, repository: str, branch: str, commit_id: str, commit_msg: str, metrics_recorder=None, num_tokens_to_generate=100
-):
-    # Check if required ML dependencies are available
-    if not TRANSFORMERS_AVAILABLE:
-        logger.error("Transformers and torch are required to run the LLaMA benchmark. Please install them with:")
-        logger.error("pip install torch transformers")
-        logger.error("Skipping LLaMA benchmark due to missing dependencies.")
-        return
-    
-    continue_metric_collection = Event()
-    metrics_thread = None
-    model_id = "meta-llama/Llama-2-7b-hf"
-    
-    # If no metrics_recorder is provided, create one for backward compatibility
-    if metrics_recorder is None:
-        try:
-            metrics_recorder = MetricsRecorder(
-                psycopg2.connect("dbname=metrics"), logger, repository, branch, commit_id, commit_msg, True
-            )
-            should_close_recorder = True
-        except Exception as e:
-            logger.error(f"Failed to create metrics recorder: {e}")
-            return
-    else:
-        should_close_recorder = False
-    try:
-        gpu_stats = gpustat.GPUStatCollection.new_query()
-        gpu_name = gpu_stats[0]["name"]
-        benchmark_id = metrics_recorder.initialise_benchmark({"gpu_name": gpu_name, "model_id": model_id})
-        logger.info(f"running benchmark #{benchmark_id} on {gpu_name} for {model_id}")
-        metrics_thread = Thread(
-            target=collect_metrics,
-            args=[benchmark_id, continue_metric_collection, metrics_recorder],
-        )
-        metrics_thread.start()
-        logger.info("started background thread to fetch device metrics")
-
-        os.environ["TOKENIZERS_PARALLELISM"] = "false"  # silence warnings when compiling
-
-        device = "cuda"
-
-        logger.info("downloading weights")
-        # This is to avoid counting download in model load time measurement
-        model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
-        gen_config = GenerationConfig(do_sample=False, top_p=1, temperature=1)
-        logger.info("loading model")
-        start = perf_counter()
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id, dtype=torch.float16, generation_config=gen_config
-        ).eval()
-        model.to(device)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        model_load_time = end - start
-        logger.info(f"loaded model in: {model_load_time}s")
-
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-
-        prompt = "Why dogs are so cute?"
-        inputs = tokenizer(prompt, return_tensors="pt").to(device)
-
-        # Specify the max length (including both the prompt and the response)
-        # When calling `generate` with `cache_implementation="static" later, this is also used to create a `StaticCache` object
-        # with sequence length = `max_length`. The longer the more you will re-use it
-        seq_length = inputs["input_ids"].shape[1]
-        model.generation_config.max_length = seq_length + num_tokens_to_generate
-        batch_size = inputs["input_ids"].shape[0]
-
-        # Copied from the gpt-fast repo
-        def multinomial_sample_one_no_sync(probs_sort):  # Does multinomial sampling without a cuda synchronization
-            q = torch.empty_like(probs_sort).exponential_(1)
-            return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
-
-        def logits_to_probs(logits, temperature: float = 1.0, top_k: Optional[int] = None):
-            logits = logits / max(temperature, 1e-5)
-
-            if top_k is not None:
-                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
-                pivot = v.select(-1, -1).unsqueeze(-1)
-                logits = torch.where(logits < pivot, -float("Inf"), logits)
-            probs = torch.nn.functional.softmax(logits, dim=-1)
-            return probs
-
-        def sample(logits, temperature: float = 1.0, top_k: Optional[int] = None):
-            probs = logits_to_probs(logits[0, -1], temperature, top_k)
-            idx_next = multinomial_sample_one_no_sync(probs)
-            return idx_next, probs
-
-        # First eager forward pass
-        logger.info("running first eager forward pass")
-        start = perf_counter()
-        outputs = model(**inputs)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        first_eager_fwd_pass_time = end - start
-        logger.info(f"completed first eager forward pass in: {first_eager_fwd_pass_time}s")
-
-        # Second eager forward pass (should be faster)
-        logger.info("running second eager forward pass")
-        start = perf_counter()
-        outputs = model(**inputs)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        second_eager_fwd_pass_time = end - start
-        logger.info(f"completed second eager forward pass in: {second_eager_fwd_pass_time}s")
-
-        # First eager generation
-        logger.info("running first eager generation")
-        start = perf_counter()
-        output = model.generate(**inputs)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        first_eager_generate_time = end - start
-        logger.info(f"completed first eager generation in: {first_eager_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        # Second eager generation (should be faster)
-        logger.info("running second eager generation")
-        start = perf_counter()
-        output = model.generate(**inputs)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        second_eager_generate_time = end - start
-        logger.info(f"completed second eager generation in: {second_eager_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        logger.info("running generation timing loop")
-
-        input_pos = torch.arange(0, seq_length, device=device)
-        inputs = inputs["input_ids"]
-
-        start = perf_counter()
-        with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
-            logits = model(inputs, position_ids=input_pos).logits
-        next_token, probs = sample(logits, temperature=0.6, top_k=5)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        time_to_first_token = end - start
-
-        input_pos = torch.tensor([seq_length], device=device, dtype=torch.int)
-        next_token = next_token.clone()
-        start = perf_counter()
-        with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
-            logits = model(next_token, position_ids=input_pos).logits
-        next_token, probs = sample(logits, temperature=0.6, top_k=5)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        time_to_second_token = end - start
-
-        input_pos = torch.tensor([seq_length + 1], device=device, dtype=torch.int)
-        next_token = next_token.clone()
-        start = perf_counter()
-        with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
-            logits = model(next_token, position_ids=input_pos).logits
-        next_token, probs = sample(logits, temperature=0.6, top_k=5)
-        torch.cuda.synchronize()
-        end = perf_counter()
-        time_to_third_token = end - start
-
-        logger.info("running longer generation timing loop")
-
-        total_time = 0
-        for i in range(20):
-            input_pos = torch.tensor([seq_length + 2 + i], device=device, dtype=torch.int)
-            next_token = next_token.clone()
-            start = perf_counter()
-            with torch.nn.attention.sdpa_kernel(torch.nn.attention.SDPBackend.MATH):
-                logits = model(next_token, position_ids=input_pos).logits
-            next_token, probs = sample(logits, temperature=0.6, top_k=5)
-            torch.cuda.synchronize()
-            end = perf_counter()
-            total_time += end - start
-
-        mean_time_to_next_token = total_time / 20
-
-        logger.info("running compilation benchmarks")
-
-        # Now compile the model
-        model = torch.compile(model, mode="max-autotune", fullgraph=True)
-
-        # StaticCache for generation
-        with torch.device(device):
-            model.setup_caches(max_batch_size=batch_size, max_seq_len=seq_length + num_tokens_to_generate)
-
-        input_pos = torch.arange(0, seq_length, device=device)
-        inputs = tokenizer(prompt, return_tensors="pt").to(device)["input_ids"]
-
-        logger.info("compiling model")
-
-        model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16, generation_config=gen_config)
-        model.to(device)
-        model = torch.compile(model, mode="max-autotune", fullgraph=True)
-
-        past_key_values = StaticCache(
-            model.config,
-            max_batch_size=batch_size,
-            device=device,
-            dtype=torch.float16,
-            max_cache_len=seq_length + 128,
-        )
-        # 1st call
-        start = perf_counter()
-        output = model.generate(**inputs, past_key_values=past_key_values)
-        end = perf_counter()
-        first_compile_generate_time = end - start
-        logger.info(f"completed first compile generation in: {first_compile_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        past_key_values = StaticCache(
-            model.config,
-            max_batch_size=batch_size,
-            device=device,
-            dtype=torch.float16,
-            max_cache_len=seq_length + 128,
-        )
-        # 2nd call
-        start = perf_counter()
-        output = model.generate(**inputs, past_key_values=past_key_values)
-        end = perf_counter()
-        second_compile_generate_time = end - start
-        logger.info(f"completed second compile generation in: {second_compile_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        past_key_values = StaticCache(
-            model.config,
-            max_batch_size=batch_size,
-            device=device,
-            dtype=torch.float16,
-            max_cache_len=seq_length + 128,
-        )
-        # 3rd call
-        start = perf_counter()
-        output = model.generate(**inputs, past_key_values=past_key_values)
-        end = perf_counter()
-        third_compile_generate_time = end - start
-        logger.info(f"completed third compile generation in: {third_compile_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        past_key_values = StaticCache(
-            model.config,
-            max_batch_size=batch_size,
-            device=device,
-            dtype=torch.float16,
-            max_cache_len=seq_length + 128,
-        )
-        # 4th call
-        start = perf_counter()
-        output = model.generate(**inputs, past_key_values=past_key_values)
-        end = perf_counter()
-        fourth_compile_generate_time = end - start
-        logger.info(f"completed fourth compile generation in: {fourth_compile_generate_time}s")
-        logger.info(f"generated: {tokenizer.batch_decode(output.cpu().tolist())}")
-
-        metrics_recorder.collect_model_measurements(
-            benchmark_id,
-            {
-                "model_load_time": model_load_time,
-                "first_eager_forward_pass_time_secs": first_eager_fwd_pass_time,
-                "second_eager_forward_pass_time_secs": second_eager_fwd_pass_time,
-                "first_eager_generate_time_secs": first_eager_generate_time,
-                "second_eager_generate_time_secs": second_eager_generate_time,
-                "time_to_first_token_secs": time_to_first_token,
-                "time_to_second_token_secs": time_to_second_token,
-                "time_to_third_token_secs": time_to_third_token,
-                "time_to_next_token_mean_secs": mean_time_to_next_token,
-                "first_compile_generate_time_secs": first_compile_generate_time,
-                "second_compile_generate_time_secs": second_compile_generate_time,
-                "third_compile_generate_time_secs": third_compile_generate_time,
-                "fourth_compile_generate_time_secs": fourth_compile_generate_time,
-            },
-        )
-    except Exception as e:
-        logger.error(f"Caught exception: {e}")
-    continue_metric_collection.set()
-    if metrics_thread is not None:
-        metrics_thread.join()
-    
-    # Only close the recorder if we created it locally
-    if should_close_recorder:
-        metrics_recorder.close() 
--- a/benchmark/benchmarks_entrypoint.py
+++ b/benchmark/benchmarks_entrypoint.py
@@ -1,35 +1,15 @@
-# Copyright 2025 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
 import argparse
 import importlib.util
 import logging
 import os
 import sys
-import json
-import uuid
-from datetime import datetime
-from typing import Dict, Tuple, Optional, List
+from typing import Dict, Tuple

-import pandas as pd
+from psycopg2.extensions import register_adapter
+from psycopg2.extras import Json

-try:
-    from psycopg2.extensions import register_adapter
-    from psycopg2.extras import Json
-    register_adapter(dict, Json)
-    PSYCOPG2_AVAILABLE = True
-except ImportError:
-    PSYCOPG2_AVAILABLE = False
+
+register_adapter(dict, Json)


 class ImportModuleException(Exception):
@@ -38,239 +18,61 @@ class ImportModuleException(Exception):

 class MetricsRecorder:
    def __init__(
-        self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str, 
-        collect_csv_data: bool = True
+        self, connection, logger: logging.Logger, repository: str, branch: str, commit_id: str, commit_msg: str
    ):
        self.conn = connection
-        self.use_database = connection is not None
-        if self.use_database:
-            self.conn.autocommit = True
+        self.conn.autocommit = True
        self.logger = logger
        self.repository = repository
        self.branch = branch
        self.commit_id = commit_id
        self.commit_msg = commit_msg
-        self.collect_csv_data = collect_csv_data
-        
-        # For CSV export - store all data in pandas DataFrames (only if CSV collection is enabled)
-        if self.collect_csv_data:
-            # Initialize empty DataFrames with proper schemas
-            self.benchmarks_df = pd.DataFrame(columns=[
-                'benchmark_id', 'repository', 'branch', 'commit_id', 'commit_message', 
-                'metadata', 'created_at'
-            ])
-            self.device_measurements_df = pd.DataFrame(columns=[
-                'benchmark_id', 'cpu_util', 'mem_megabytes', 'gpu_util', 
-                'gpu_mem_megabytes', 'time'
-            ])
-            self.model_measurements_df = pd.DataFrame(columns=[
-                'benchmark_id', 'time', 'model_load_time', 'first_eager_forward_pass_time_secs',
-                'second_eager_forward_pass_time_secs', 'first_eager_generate_time_secs',
-                'second_eager_generate_time_secs', 'time_to_first_token_secs',
-                'time_to_second_token_secs', 'time_to_third_token_secs',
-                'time_to_next_token_mean_secs', 'first_compile_generate_time_secs',
-                'second_compile_generate_time_secs', 'third_compile_generate_time_secs',
-                'fourth_compile_generate_time_secs'
-            ])
-        else:
-            self.benchmarks_df = None
-            self.device_measurements_df = None
-            self.model_measurements_df = None

-    def initialise_benchmark(self, metadata: dict[str, str]) -> str:
+    def initialise_benchmark(self, metadata: dict[str, str]) -> int:
        """
-        Creates a new benchmark, returns the benchmark id (UUID)
+        Creates a new benchmark, returns the benchmark id
        """
-        # Generate a unique UUID for this benchmark
-        benchmark_id = str(uuid.uuid4())
-        
-        if self.use_database:
-            with self.conn.cursor() as cur:
-                cur.execute(
-                    "INSERT INTO benchmarks (benchmark_id, repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s, %s)",
-                    (benchmark_id, self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
-                )
-                self.logger.debug(f"initialised benchmark #{benchmark_id}")
-        
-        # Store benchmark data for CSV export (if enabled)
-        if self.collect_csv_data:
-            # Add row to pandas DataFrame
-            new_row = pd.DataFrame([{
-                'benchmark_id': benchmark_id,
-                'repository': self.repository,
-                'branch': self.branch,
-                'commit_id': self.commit_id,
-                'commit_message': self.commit_msg,
-                'metadata': json.dumps(metadata),
-                'created_at': datetime.utcnow().isoformat()
-            }])
-            self.benchmarks_df = pd.concat([self.benchmarks_df, new_row], ignore_index=True)
-            
-        mode_info = []
-        if self.use_database:
-            mode_info.append("database")
-        if self.collect_csv_data:
-            mode_info.append("CSV")
-        mode_str = " + ".join(mode_info) if mode_info else "no storage"
-        
-        self.logger.debug(f"initialised benchmark #{benchmark_id} ({mode_str} mode)")
-        return benchmark_id
+        # gpu_name: str, model_id: str
+        with self.conn.cursor() as cur:
+            cur.execute(
+                "INSERT INTO benchmarks (repository, branch, commit_id, commit_message, metadata) VALUES (%s, %s, %s, %s, %s) RETURNING benchmark_id",
+                (self.repository, self.branch, self.commit_id, self.commit_msg, metadata),
+            )
+            benchmark_id = cur.fetchone()[0]
+            logger.debug(f"initialised benchmark #{benchmark_id}")
+            return benchmark_id

-    def collect_device_measurements(self, benchmark_id: str, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
+    def collect_device_measurements(self, benchmark_id: int, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes):
        """
        Collect device metrics, such as CPU & GPU usage. These are "static", as in you cannot pass arbitrary arguments to the function.
        """
-        # Store device measurements for CSV export (if enabled)
-        if self.collect_csv_data:
-            # Add row to pandas DataFrame
-            new_row = pd.DataFrame([{
-                'benchmark_id': benchmark_id,
-                'cpu_util': cpu_util,
-                'mem_megabytes': mem_megabytes,
-                'gpu_util': gpu_util,
-                'gpu_mem_megabytes': gpu_mem_megabytes,
-                'time': datetime.utcnow().isoformat()
-            }])
-            self.device_measurements_df = pd.concat([self.device_measurements_df, new_row], ignore_index=True)
-        
-        # Store in database if available
-        if self.use_database:
-            with self.conn.cursor() as cur:
-                cur.execute(
-                    "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
-                    (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
-                )
-            
+        with self.conn.cursor() as cur:
+            cur.execute(
+                "INSERT INTO device_measurements (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes) VALUES (%s, %s, %s, %s, %s)",
+                (benchmark_id, cpu_util, mem_megabytes, gpu_util, gpu_mem_megabytes),
+            )
        self.logger.debug(
-            f"collected device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
+            f"inserted device measurements for benchmark #{benchmark_id} [CPU util: {cpu_util}, mem MBs: {mem_megabytes}, GPU util: {gpu_util}, GPU mem MBs: {gpu_mem_megabytes}]"
        )

-    def collect_model_measurements(self, benchmark_id: str, measurements: dict[str, float]):
-        # Store model measurements for CSV export (if enabled)
-        if self.collect_csv_data:
-            # Add row to pandas DataFrame with flattened measurements
-            row_data = {
-                'benchmark_id': benchmark_id,
-                'time': datetime.utcnow().isoformat()
-            }
-            # Flatten the measurements dict into the row
-            row_data.update(measurements)
-            
-            new_row = pd.DataFrame([row_data])
-            self.model_measurements_df = pd.concat([self.model_measurements_df, new_row], ignore_index=True)
-        
-        # Store in database if available
-        if self.use_database:
-            with self.conn.cursor() as cur:
-                cur.execute(
-                    """
-                    INSERT INTO model_measurements (
-                        benchmark_id,
-                        measurements
-                    ) VALUES (%s, %s)
-                    """,
-                    (
-                        benchmark_id,
-                        measurements,
-                    ),
-                )
-            
-        self.logger.debug(f"collected model measurements for benchmark #{benchmark_id}: {measurements}")
-
-    def export_to_csv(self, output_dir: str = "benchmark_results"):
-        """
-        Export all collected data to CSV files using pandas DataFrames
-        """
-        if not self.collect_csv_data:
-            self.logger.warning("CSV data collection is disabled - no CSV files will be generated")
-            return
-            
-        if not os.path.exists(output_dir):
-            os.makedirs(output_dir)
-            self.logger.info(f"Created output directory: {output_dir}")
-            
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        files_created = []
-        
-        # Export using pandas DataFrames
-        self._export_pandas_data(output_dir, timestamp, files_created)
-        
-        self.logger.info(f"CSV export complete! Created {len(files_created)} files in {output_dir}")
-    
-    def _export_pandas_data(self, output_dir: str, timestamp: str, files_created: list):
-        """
-        Export CSV files using pandas DataFrames
-        """
-        # Export benchmarks
-        benchmarks_file = os.path.join(output_dir, f"benchmarks_{timestamp}.csv")
-        self.benchmarks_df.to_csv(benchmarks_file, index=False)
-        files_created.append(benchmarks_file)
-        self.logger.info(f"Exported {len(self.benchmarks_df)} benchmark records to {benchmarks_file}")
-        
-        # Export device measurements  
-        device_file = os.path.join(output_dir, f"device_measurements_{timestamp}.csv")
-        self.device_measurements_df.to_csv(device_file, index=False)
-        files_created.append(device_file)
-        self.logger.info(f"Exported {len(self.device_measurements_df)} device measurement records to {device_file}")
-        
-        # Export model measurements (already flattened)
-        model_file = os.path.join(output_dir, f"model_measurements_{timestamp}.csv")
-        self.model_measurements_df.to_csv(model_file, index=False)
-        files_created.append(model_file)
-        self.logger.info(f"Exported {len(self.model_measurements_df)} model measurement records to {model_file}")
-        
-        # Create comprehensive summary using pandas operations
-        summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.csv")
-        self._create_summary(summary_file)
-        files_created.append(summary_file)
-    
-    def _create_summary(self, summary_file: str):
-        """
-        Create a comprehensive summary CSV using pandas operations
-        """
-        if len(self.benchmarks_df) == 0:
-            # Create empty summary file
-            summary_df = pd.DataFrame()
-            summary_df.to_csv(summary_file, index=False)
-            self.logger.info(f"Created empty benchmark summary at {summary_file}")
-            return
-        
-        # Start with benchmarks as the base
-        summary_df = self.benchmarks_df.copy()
-        
-        # Add model measurements (join on benchmark_id)
-        if len(self.model_measurements_df) > 0:
-            # Drop 'time' column from model measurements to avoid conflicts
-            model_df = self.model_measurements_df.drop(columns=['time'], errors='ignore')
-            summary_df = summary_df.merge(model_df, on='benchmark_id', how='left')
-        
-        # Calculate device measurement aggregates using pandas groupby
-        if len(self.device_measurements_df) > 0:
-            device_agg = self.device_measurements_df.groupby('benchmark_id').agg({
-                'cpu_util': ['mean', 'max', 'std', 'count'],
-                'mem_megabytes': ['mean', 'max', 'std'],
-                'gpu_util': ['mean', 'max', 'std'],
-                'gpu_mem_megabytes': ['mean', 'max', 'std']
-            }).round(3)
-            
-            # Flatten column names
-            device_agg.columns = [f"{col[0]}_{col[1]}" for col in device_agg.columns]
-            device_agg = device_agg.reset_index()
-            
-            # Rename count column to be more descriptive
-            if 'cpu_util_count' in device_agg.columns:
-                device_agg = device_agg.rename(columns={'cpu_util_count': 'device_measurement_count'})
-            
-            # Merge with summary
-            summary_df = summary_df.merge(device_agg, on='benchmark_id', how='left')
-        
-        # Export the comprehensive summary
-        summary_df.to_csv(summary_file, index=False)
-        self.logger.info(f"Created comprehensive benchmark summary with {len(summary_df)} records at {summary_file}")
+    def collect_model_measurements(self, benchmark_id: int, measurements: dict[str, float]):
+        with self.conn.cursor() as cur:
+            cur.execute(
+                """
+                INSERT INTO model_measurements (
+                    benchmark_id,
+                    measurements
+                ) VALUES (%s, %s)
+                """,
+                (
+                    benchmark_id,
+                    measurements,
+                ),
+            )
+        self.logger.debug(f"inserted model measurements for benchmark #{benchmark_id}: {measurements}")

    def close(self):
-        if self.use_database and self.conn:
-            self.conn.close()
+        self.conn.close()


 logger = logging.getLogger(__name__)
@@ -283,7 +85,7 @@ handler.setFormatter(formatter)
 logger.addHandler(handler)


-def parse_arguments() -> tuple[str, str, str, str, bool, str]:
+def parse_arguments() -> tuple[str, str, str, str]:
    """
    Parse command line arguments for the benchmarking CLI.
    """
@@ -312,27 +114,10 @@ def parse_arguments() -> tuple[str, str, str, str, bool, str]:
        type=str,
        help="The commit message associated with the commit, truncated to 70 characters.",
    )
-    
-    parser.add_argument(
-        "--csv",
-        action="store_true",
-        default=False,
-        help="Enable CSV output files generation."
-    )
-    
-    parser.add_argument(
-        "--csv-output-dir",
-        type=str,
-        default="benchmark_results",
-        help="Directory for CSV output files (default: benchmark_results)."
-    )

    args = parser.parse_args()
-    
-    # CSV is disabled by default, only enabled when --csv is used
-    generate_csv = args.csv

-    return args.repository, args.branch, args.commit_id, args.commit_msg, generate_csv, args.csv_output_dir
+    return args.repository, args.branch, args.commit_id, args.commit_msg


 def import_from_path(module_name, file_path):
@@ -346,124 +131,22 @@ def import_from_path(module_name, file_path):
        raise ImportModuleException(f"failed to load python module: {e}")


-def create_database_connection():
-    """
-    Try to create a database connection. Returns None if connection fails.
-    """
-    if not PSYCOPG2_AVAILABLE:
-        logger.warning("psycopg2 not available - running in CSV-only mode")
-        return None
-        
-    try:
-        import psycopg2
-        conn = psycopg2.connect("dbname=metrics")
-        logger.info("Successfully connected to database")
-        return conn
-    except Exception as e:
-        logger.warning(f"Failed to connect to database: {e}. Running in CSV-only mode")
-        return None
-
-
-def create_global_metrics_recorder(repository: str, branch: str, commit_id: str, commit_msg: str, 
-                                   generate_csv: bool = False) -> MetricsRecorder:
-    """
-    Create a global metrics recorder that will be used across all benchmarks.
-    """
-    connection = create_database_connection()
-    recorder = MetricsRecorder(connection, logger, repository, branch, commit_id, commit_msg, generate_csv)
-    
-    # Log the storage mode
-    storage_modes = []
-    if connection is not None:
-        storage_modes.append("database")
-    if generate_csv:
-        storage_modes.append("CSV")
-    
-    if not storage_modes:
-        logger.warning("Running benchmarks with NO data storage (no database connection, CSV disabled)")
-        logger.warning("Use --csv flag to enable CSV output when database is unavailable")
-    else:
-        logger.info(f"Running benchmarks with: {' + '.join(storage_modes)} storage")
-    
-    return recorder
-
-
 if __name__ == "__main__":
    benchmarks_folder_path = os.path.dirname(os.path.realpath(__file__))
-    benches_folder_path = os.path.join(benchmarks_folder_path, "benches")

-    repository, branch, commit_id, commit_msg, generate_csv, csv_output_dir = parse_arguments()
-    
-    # Create a global metrics recorder
-    global_metrics_recorder = create_global_metrics_recorder(repository, branch, commit_id, commit_msg, generate_csv)
-    
-    successful_benchmarks = 0
-    failed_benchmarks = 0
-    
-    # Automatically discover all benchmark modules in benches/ folder
-    benchmark_modules = []
-    
-    if os.path.exists(benches_folder_path):
-        logger.debug(f"Scanning for benchmarks in: {benches_folder_path}")
-        for entry in os.scandir(benches_folder_path):
+    repository, branch, commit_id, commit_msg = parse_arguments()
+
+    for entry in os.scandir(benchmarks_folder_path):
+        try:
            if not entry.name.endswith(".py"):
                continue
-            if entry.name.startswith("__"):  # Skip __init__.py, __pycache__, etc.
+            if entry.path == __file__:
                continue
-                
-            # Check if the file has a run_benchmark function
-            try:
-                logger.debug(f"checking if benches/{entry.name} has run_benchmark function")
-                module = import_from_path(entry.name.split(".")[0], entry.path)
-                if hasattr(module, 'run_benchmark'):
-                    benchmark_modules.append(entry.name)
-                    logger.debug(f"discovered benchmark: {entry.name}")
-                else:
-                    logger.debug(f"skipping {entry.name} - no run_benchmark function found")
-            except Exception as e:
-                logger.debug(f"failed to check benches/{entry.name}: {e}")
-    else:
-        logger.warning(f"Benches directory not found: {benches_folder_path}")
-
-    if benchmark_modules:
-        logger.info(f"Discovered {len(benchmark_modules)} benchmark(s): {benchmark_modules}")
-    else:
-        logger.warning("No benchmark modules found in benches/ directory")
-
-    for module_name in benchmark_modules:
-        module_path = os.path.join(benches_folder_path, module_name)
-        try:
-            logger.debug(f"loading: {module_name}")
-            module = import_from_path(module_name.split(".")[0], module_path)
-            logger.info(f"running benchmarks in: {module_name}")
-            
-            # Check if the module has an updated run_benchmark function that accepts metrics_recorder
-            try:
-                # Try the new signature first
-                module.run_benchmark(logger, repository, branch, commit_id, commit_msg, global_metrics_recorder)
-            except TypeError:
-                # Fall back to the old signature for backward compatibility
-                logger.warning(f"Module {module_name} using old run_benchmark signature - database connection will be created per module")
-                module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
-            
-            successful_benchmarks += 1
+            logger.debug(f"loading: {entry.name}")
+            module = import_from_path(entry.name.split(".")[0], entry.path)
+            logger.info(f"running benchmarks in: {entry.name}")
+            module.run_benchmark(logger, repository, branch, commit_id, commit_msg)
        except ImportModuleException as e:
            logger.error(e)
-            failed_benchmarks += 1
        except Exception as e:
-            logger.error(f"error running benchmarks for {module_name}: {e}")
-            failed_benchmarks += 1
-
-    # Export CSV results at the end (if enabled)
-    try:
-        if generate_csv:
-            global_metrics_recorder.export_to_csv(csv_output_dir)
-            logger.info(f"CSV reports have been generated and saved to the {csv_output_dir} directory")
-        else:
-            logger.info("CSV generation disabled - no CSV files created (use --csv to enable)")
-        
-        logger.info(f"Benchmark run completed. Successful: {successful_benchmarks}, Failed: {failed_benchmarks}")
-    except Exception as e:
-        logger.error(f"Failed to export CSV results: {e}")
-    finally:
-        global_metrics_recorder.close()
+            logger.error(f"error running benchmarks for {entry.name}: {e}")
--- a/benchmark/config/generation.yaml
+++ b/benchmark/config/generation.yaml
@@ -19,7 +19,7 @@ backend:
  model: meta-llama/Llama-2-7b-hf
  cache_implementation: static
  torch_compile: true
-  dtype: float16
+  torch_dtype: float16
  torch_compile_config:
    backend: inductor
    mode: reduce-overhead
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Arthur	d79b2d981f	v4.55.4 Some checks failed Release - Conda / build_and_package (push) Has been cancelled Details Secret Leaks / trufflehog (push) Has been cancelled Details	2025-08-22 14:39:20 +02:00
Arthur	90792b730a	Revert "Fix GPT-OSS swiglu_limit not passed in for MXFP4 #40197 " The cherry-picked commit does not match the changes nor the PR This reverts commit `e75d67ec39`.	2025-08-22 11:21:18 +02:00
Daniel Han	a03df6acd4	Fix GPT-OSS `swiglu_limit` not passed in for MXFP4 (#40197 ) Add swiglu_limit = 7.0	2025-08-22 11:20:23 +02:00
Arthur	170b2708cb	Fixes #40262 Some checks failed Release - Conda / build_and_package (push) Has been cancelled Details Secret Leaks / trufflehog (push) Has been cancelled Details	2025-08-21 11:03:16 +02:00
Arthur	7dbc054e2a	v4.55.3	2025-08-18 14:46:54 +02:00
Zhen	c097a43898	[bugfix] fix flash-attention2 unavailable error for Ascend NPU (#40151 ) * [bugfix] fix flash-attention2 unavailable error for Ascend NPU * remove redundant apply_rotary_emb usage * fix ruff check error * pad_input and unpad_input use same implementation as fa2 * rollback redundant codes * fix ruff check error * optimize fa2 judgement logic	2025-08-18 14:45:23 +02:00
Cyril Vallez	663cbb0d04	[FA2] Fix it finally - revert fa kwargs preparation (#40161 ) revert	2025-08-18 14:44:58 +02:00
Cyril Vallez	c7bd5350f0	Fix fsdp for generic-task models #40191	2025-08-18 14:44:16 +02:00
Lintch	e75d67ec39	Fix GPT-OSS swiglu_limit not passed in for MXFP4 #40197	2025-08-18 14:43:31 +02:00
Manuel de Prada Corral	d7f67d2006	Fix mamba caches (#40203 ) fix mamba models caches inheritance	2025-08-18 14:27:04 +02:00
Arthur	acf295aec3	v4.55.2 Some checks failed Release - Conda / build_and_package (push) Has been cancelled Details Secret Leaks / trufflehog (push) Has been cancelled Details	2025-08-13 20:14:33 +02:00
Arthur Zucker	aaa3169aa2	qfix bad cherry-pick	2025-08-13 18:13:21 +00:00
Arthur	ea2eee0bc8	v4.55.1 Some checks failed Release - Conda / build_and_package (push) Has been cancelled Details Secret Leaks / trufflehog (push) Has been cancelled Details	2025-08-13 10:33:42 +02:00
Quentin Gallouédec	956be23fff	[bugfix] Fix tensor device in Idefics2, Idefics3, and SmolVLM (#39975 ) * [bugfix] ensure correct tensor device in Idefics2, Idefics3, and SmolVLM models * to cuda	2025-08-13 10:33:17 +02:00
Anton Vlasjuk	79a9ffc520	fix merge conlicts	2025-08-13 10:25:20 +02:00
Mohamed Mekkouri	99404c7098	Default to dequantize if cpu in device_map for mxfp4 (#39993 ) * default to dq if cpu * an other check * style * revert some changes	2025-08-13 10:22:01 +02:00
Anton Vlasjuk	0d6908038c	[`GPT Big Code`] Fix attention scaling (#40041 ) * fix * update integration tests * fmt * add regression test	2025-08-13 10:22:01 +02:00
Tsumugii	b8e97fbfd2	fix: resolve triton version check compatibility on windows (#39986 ) * fix: resolve triton version check compatibility on windows * style: remove trailing space * fix: fix typo --------- Co-authored-by: Mohamed Mekkouri <93391238+MekkCyber@users.noreply.github.com>	2025-08-13 10:22:01 +02:00
Laurenz Ruzicka	586b6e693b	Fix missing None default values for Gemma3n model in get_placeholder_mask (#39991 ) (#40024 ) * Fix missing None default values for Gemma3n model in get_placeholder_mask (#39991) * Switched definition of optional from\| None to Optiona[] (Issue #39991) --------- Co-authored-by: Laurenz Ruzicka <Laurenz.Ruzicka@ait.ac.at>	2025-08-13 10:22:01 +02:00
Isotr0py	95ae07d11f	Fix broken image inference for Fuyu model (#39915 ) * fix fuyu Signed-off-by: Isotr0py <2037008807@qq.com> * oops Signed-off-by: Isotr0py <2037008807@qq.com> * run test on GPU Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> * clean unused Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> * revert Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> * add fuyu multimodal test Signed-off-by: Isotr0py <2037008807@qq.com> * fix Signed-off-by: Isotr0py <2037008807@qq.com> --------- Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>	2025-08-13 10:22:01 +02:00
Shuming Hu	0d9032ae71	Fix missing video inputs for PerceptionLM. (#39971 ) * Fix missing video inputs for PerceptionLM. * Minor fix for vanilla input image (only C,H,W, no tiles dim). * Revert "Minor fix for vanilla input image (only C,H,W, no tiles dim)." This reverts commit 181d87b964e59c4118035a9fd4f530c6e551ba9f.	2025-08-13 10:22:01 +02:00
Raushan Turganbay	1d42803aac	[Idefics] fix device mismatch (#39981 ) fix	2025-08-13 10:22:01 +02:00
Marc Sun	382717e543	remove `triton_kernels` dep with `kernels` instead (#39926 ) * remove dep * style * rm import * fix * style * simplify * style	2025-08-13 10:22:01 +02:00
Matthew Douglas	cc98f42d22	Enable gpt-oss mxfp4 on older hardware (sm75+) (#39940 ) Co-authored-by: Marc Sun <57196510+SunMarc@users.noreply.github.com>	2025-08-13 10:22:01 +02:00
Lintch	d2f7266367	Fix MXFP4 quantizer validation to allow CPU inference with dequantize option (#39953 ) * Fix MXFP4 quantizer validation to enable CPU dequantization Move dequantize check before CUDA availability check to allow CPU inference when quantization_config.dequantize is True. This enables users to run MXFP4 models on CPU by automatically converting them to BF16 format. * Add tests for MXFP4 quantizer CPU dequantization validation * fix: format mxfp4 test file with ruff	2025-08-13 10:22:01 +02:00
Joao Gante	daab2db33f	[CI] post-`GptOss` fixes for green CI (#39929 )	2025-08-07 16:27:00 +02:00
Lysandre	06f8004e5c	Release: v4.55.0 Some checks failed Release - Conda / build_and_package (push) Has been cancelled Details Secret Leaks / trufflehog (push) Has been cancelled Details	2025-08-05 18:09:15 +02:00