Skip to content

Fix failing tests on h100 #2231

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Prev Previous commit
Next Next commit
Merge main
  • Loading branch information
jainapurva committed May 21, 2025
commit 072500abcfbdec15a429eb53160c63fb92534b85
8 changes: 4 additions & 4 deletions .github/scripts/github_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ def gh_fetch_url_and_headers(
):
print(
f"""Rate limit exceeded:
Used: {err.headers['X-RateLimit-Used']}
Limit: {err.headers['X-RateLimit-Limit']}
Remaining: {err.headers['X-RateLimit-Remaining']}
Resets at: {err.headers['x-RateLimit-Reset']}"""
Used: {err.headers["X-RateLimit-Used"]}
Limit: {err.headers["X-RateLimit-Limit"]}
Remaining: {err.headers["X-RateLimit-Remaining"]}
Resets at: {err.headers["x-RateLimit-Reset"]}"""
)
raise

Expand Down
6 changes: 3 additions & 3 deletions .github/scripts/label_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ def gh_get_labels(org: str, repo: str) -> List[str]:
update_labels(labels, info)

last_page = get_last_page_num_from_header(header)
assert (
last_page > 0
), "Error reading header info to determine total number of pages of labels"
assert last_page > 0, (
"Error reading header info to determine total number of pages of labels"
)
for page_number in range(2, last_page + 1): # skip page 1
_, info = request_for_labels(prefix + f"&page={page_number}")
update_labels(labels, info)
Expand Down
6 changes: 3 additions & 3 deletions .github/scripts/trymerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def get_check_run_name_prefix(workflow_run: Any) -> str:
if workflow_run is None:
return ""
else:
return f'{workflow_run["workflow"]["name"]} / '
return f"{workflow_run['workflow']['name']} / "


def is_passing_status(status: Optional[str]) -> bool:
Expand Down Expand Up @@ -538,7 +538,7 @@ def add_conclusions(edges: Any) -> None:
if not isinstance(checkrun_node, dict):
warn(f"Expected dictionary, but got {type(checkrun_node)}")
continue
checkrun_name = f'{get_check_run_name_prefix(workflow_run)}{checkrun_node["name"]}'
checkrun_name = f"{get_check_run_name_prefix(workflow_run)}{checkrun_node['name']}"
existing_checkrun = workflow_obj.jobs.get(checkrun_name)
if existing_checkrun is None or not is_passing_status(
existing_checkrun.status
Expand Down Expand Up @@ -653,7 +653,7 @@ def skip_func(idx: int, candidate: "GitHubPR") -> bool:
if not open_only or not candidate.is_closed():
return False
print(
f"Skipping {idx+1} of {len(rev_list)} PR (#{candidate.pr_num}) as its already been merged"
f"Skipping {idx + 1} of {len(rev_list)} PR (#{candidate.pr_num}) as its already been merged"
)
return True

Expand Down
2 changes: 1 addition & 1 deletion .github/scripts/trymerge_explainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def get_merge_message(
(
"<details><summary>Advanced Debugging</summary>",
"Check the merge workflow status ",
f"<a href=\"{os.getenv('GH_RUN_URL')}\">here</a>",
f'<a href="{os.getenv("GH_RUN_URL")}">here</a>',
"</details>",
)
)
Expand Down
74 changes: 0 additions & 74 deletions .github/workflows/build-wheels_m1.yml

This file was deleted.

87 changes: 0 additions & 87 deletions .github/workflows/build_wheels_aarch64_linux.yml

This file was deleted.

4 changes: 2 additions & 2 deletions .github/workflows/build_wheels_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ jobs:
with-cuda: enable
with-rocm: enable
with-xpu: enable
# please note: excluding 3.13t for aarch64 builds for now
python-versions: '["3.9", "3.10", "3.11", "3.12", "3.13"]'
# Note: if free-threaded python is required add py3.13t here
python-versions: '["3.9"]'

build:
needs: generate-matrix
Expand Down
97 changes: 0 additions & 97 deletions .github/workflows/build_wheels_windows.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/dashboard_perf_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
matrix:
torch-spec:
- '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124'
- '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126'
steps:
- uses: actions/checkout@v4

Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/float8_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,14 @@ jobs:
include:
- name: SM-89
runs-on: linux.g6.4xlarge.experimental.nvidia.gpu
torch-spec: '--pre torch==2.7.0.dev20250122 --index-url https://download.pytorch.org/whl/nightly/cu124'
torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu126'
gpu-arch-type: "cuda"
gpu-arch-version: "12.4"
gpu-arch-version: "12.6"
- name: H100
runs-on: linux.aws.h100
torch-spec: '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu124'
torch-spec: '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126'
gpu-arch-type: "cuda"
gpu-arch-version: "12.4"

permissions:
id-token: write
contents: read
Expand Down
Loading
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.