Skip to content

Commit 48642c5

Browse files
committed
finally capture build cache errors!
1 parent d4bb484 commit 48642c5

File tree

2 files changed

+107
-20
lines changed

2 files changed

+107
-20
lines changed

src/eval.py

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@
55
import requests
66
import torch
77
import torch.nn as nn
8-
import os
8+
import os, subprocess
99
from pydantic import BaseModel
1010
import numpy as np
1111
import random
1212
import json
1313
from contextlib import redirect_stdout, redirect_stderr
1414
from io import StringIO
15+
import sys
1516

1617
from . import utils
1718

@@ -168,6 +169,45 @@ def graceful_eval_cleanup(curr_context: dict, device: torch.device):
168169

169170
# _cleanup_cuda_extensions() # SIMON NOTE: is this necessary?
170171

172+
def build_compile_cache_legacy(
173+
custom_model_src: str,
174+
verbose: bool = False,
175+
build_dir: os.PathLike = None,
176+
) -> tuple[bool, str, str]:
177+
"""
178+
Try to build the compiled cuda code for sample and store in the cache directory
179+
Should be able to run on CPUs to do this massively in parallel
180+
181+
Don't limit ninja to set default number of workers, let it use all the cpu cores possible
182+
183+
NOTE: currently stdout_buffer does not capture all the compiler warning and failure messages
184+
Returns:
185+
tuple[bool, str]: whether compilation is successful, stdout content as string
186+
"""
187+
context = {}
188+
stdout_buffer = StringIO()
189+
190+
if verbose:
191+
print("[Compilation] Pre-compile custom cuda binaries")
192+
193+
try:
194+
os.environ["TORCH_USE_CUDA_DSA"] = "1" # compile with device side assertion
195+
# sys.stdout.flush()
196+
197+
# Capture stdout during compilation
198+
with redirect_stdout(stdout_buffer), redirect_stderr(stdout_buffer):
199+
load_custom_model(custom_model_src, context, build_dir)
200+
# sys.stdout.flush()
201+
202+
if verbose:
203+
print(f"[Compilation] Compilation Successful, saved cache at: {build_dir}")
204+
except Exception as e:
205+
print(f"[Compilation] Failed to compile custom CUDA kernel. Unable to cache, \nError: {e}")
206+
return False, stdout_buffer.getvalue(), str(e)
207+
208+
return True, stdout_buffer.getvalue(), None
209+
210+
171211

172212
def build_compile_cache(
173213
custom_model_src: str,
@@ -179,7 +219,7 @@ def build_compile_cache(
179219
Should be able to run on CPUs to do this massively in parallel
180220
181221
Don't limit ninja to set default number of workers, let it use all the cpu cores possible
182-
222+
# try do this with a subprocess
183223
NOTE: currently stdout_buffer does not capture all the compiler warning and failure messages
184224
Returns:
185225
tuple[bool, str]: whether compilation is successful, stdout content as string
@@ -192,10 +232,12 @@ def build_compile_cache(
192232

193233
try:
194234
os.environ["TORCH_USE_CUDA_DSA"] = "1" # compile with device side assertion
235+
# sys.stdout.flush()
195236

196237
# Capture stdout during compilation
197238
with redirect_stdout(stdout_buffer), redirect_stderr(stdout_buffer):
198239
load_custom_model(custom_model_src, context, build_dir)
240+
# sys.stdout.flush()
199241

200242
if verbose:
201243
print(f"[Compilation] Compilation Successful, saved cache at: {build_dir}")
@@ -206,6 +248,49 @@ def build_compile_cache(
206248
return True, stdout_buffer.getvalue(), None
207249

208250

251+
def build_compile_cache_with_capturing(
252+
custom_model_src: str,
253+
verbose: bool = False,
254+
build_dir: os.PathLike = None
255+
) -> tuple[int, str, str]:
256+
"""
257+
Write a temporary python file to compile the custom model on CPU
258+
Captures the return code, stdout, and stderr
259+
This works for capturing, build_compile_cache does not
260+
"""
261+
if build_dir:
262+
# Add import at the start of the source code
263+
custom_model_src = (
264+
"import os\n" f"os.environ['TORCH_EXTENSIONS_DIR'] = '{build_dir}'\n"
265+
) + custom_model_src
266+
267+
kernel_hash = hash(custom_model_src)
268+
# tmp is a temp python file we write to for compilation
269+
tmp = os.path.join(build_dir, f"tmp_{kernel_hash}.py")
270+
os.makedirs(os.path.dirname(tmp), exist_ok=True)
271+
272+
with open(tmp, "w", encoding="utf-8") as f:
273+
f.write(custom_model_src)
274+
275+
# Execute the temporary Python file and capture output
276+
process = subprocess.Popen(['python', tmp], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
277+
stdout, stderr = process.communicate()
278+
returncode = process.returncode
279+
280+
# Clean up temporary file
281+
os.remove(tmp)
282+
283+
284+
if verbose:
285+
print("[CPU Precompile] return code: ", returncode)
286+
print("[CPU Precompile] stdout: \n", stdout.decode('utf-8'))
287+
print("[CPU Precompile] stderr: \n", stderr.decode('utf-8'))
288+
289+
return returncode, stdout.decode('utf-8'), stderr.decode('utf-8')
290+
291+
292+
293+
209294
def eval_kernel_against_ref(
210295
original_model_src: str,
211296
custom_model_src: str,

src/prompt_constructor.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def prompt_generate_custom_cuda_fewshot_and_template(ref_arch_src: str, shots: l
8383
Avaliable few shot options to start with:
8484
- ex_add: pointwise addition
8585
- ex_fuse_gelu: fused gelu
86-
- ex_fuse_mnist2: fused convolutions and relus
86+
- ex_mnist2: fused convolutions and relus
8787
- ex_tiled_matmul: tiled matrix multiplication
8888
"""
8989
prompt = PROBLEM_STATEMENT_CLEANED
@@ -107,13 +107,13 @@ def prompt_generate_custom_cuda_fewshot_and_template(ref_arch_src: str, shots: l
107107
example_fuse_gelu_desc = "This given architecture is for a fused gelu: "
108108

109109
# k = 3
110-
example_fuse_mnist2 = read_file(
110+
example_mnist2 = read_file(
111111
os.path.join(REPO_TOP_PATH, "src/prompts/few_shot/model_ex_mnist2.py")
112112
)
113-
example_fuse_mnist2_new = read_file(
113+
example_mnist2_new = read_file(
114114
os.path.join(REPO_TOP_PATH, "src/prompts/few_shot/model_new_ex_mnist2.py")
115115
)
116-
exmaple_fuse_mnist2_desc = "This given architecture is for a model with fused convolutions and relus: "
116+
exmaple_mnist2_desc = "This given architecture is for a model with fused convolutions and relus: "
117117

118118
# k = 4
119119
example_tiled_matmul = read_file(
@@ -127,14 +127,14 @@ def prompt_generate_custom_cuda_fewshot_and_template(ref_arch_src: str, shots: l
127127

128128
examples = []
129129
for s in shots:
130-
if s not in ["ex_add", "ex_fuse_gelu", "ex_fuse_mnist2", "ex_tiled_matmul"]:
130+
if s not in ["ex_add", "ex_fuse_gelu", "ex_mnist2", "ex_tiled_matmul"]:
131131
raise ValueError(f"Invalid shot: {s}")
132132
elif s == "ex_add":
133133
examples.append((example_add, example_add_new, example_add_desc))
134134
elif s == "ex_fuse_gelu":
135135
examples.append((example_fuse_gelu, example_fuse_gelu_new, example_fuse_gelu_desc))
136-
elif s == "ex_fuse_mnist2":
137-
examples.append((example_fuse_mnist2, example_fuse_mnist2_new, exmaple_fuse_mnist2_desc))
136+
elif s == "ex_mnist2":
137+
examples.append((example_mnist2, example_mnist2_new, exmaple_mnist2_desc))
138138
elif s == "ex_tiled_matmul":
139139
examples.append((example_tiled_matmul, example_tiled_matmul_new, example_tiled_matmul_desc))
140140

@@ -171,7 +171,7 @@ def prompt_generate_ex_with_CoT_template(ref_arch_src: str, cot_example: str) ->
171171
Generate a prompt with a CoT example following a template
172172
Avaliable CoT examples:
173173
- ex_fuse_gelu: fused gelu
174-
- ex_fuse_mnist2: fused convolutions and relus
174+
- ex_mnist2: fused convolutions and relus
175175
- ex_tiled_matmul: tiled matrix multiplication
176176
"""
177177

@@ -184,7 +184,7 @@ def prompt_generate_ex_with_CoT_template(ref_arch_src: str, cot_example: str) ->
184184

185185
prompt = PROBLEM_STATEMENT_CLEANED
186186

187-
assert cot_example in ["ex_fuse_gelu", "ex_fuse_mnist2", "ex_tiled_matmul"]
187+
assert cot_example in ["ex_fuse_gelu", "ex_mnist2", "ex_tiled_matmul"]
188188

189189
# k = 2
190190
example_fuse_gelu = read_file(
@@ -199,16 +199,16 @@ def prompt_generate_ex_with_CoT_template(ref_arch_src: str, cot_example: str) ->
199199
example_fuse_gelu_desc = "This given architecture is for a fused gelu: "
200200

201201
# k = 3
202-
example_fuse_mnist2 = read_file(
202+
example_mnist2 = read_file(
203203
os.path.join(REPO_TOP_PATH, "src/prompts/few_shot/model_ex_mnist2.py")
204204
)
205-
example_fuse_mnist2_cot = read_file(
205+
example_mnist2_cot = read_file(
206206
os.path.join(REPO_TOP_PATH, "src/prompts/cot/model_cot_mnist2.py")
207207
)
208-
example_fuse_mnist2_new = read_file(
208+
example_mnist2_new = read_file(
209209
os.path.join(REPO_TOP_PATH, "src/prompts/few_shot/model_new_ex_mnist2.py")
210210
)
211-
exmaple_fuse_mnist2_desc = "This given architecture is for a model with fused convolutions and relus: "
211+
exmaple_mnist2_desc = "This given architecture is for a model with fused convolutions and relus: "
212212

213213
# k = 4
214214
example_tiled_matmul = read_file(
@@ -228,16 +228,18 @@ def prompt_generate_ex_with_CoT_template(ref_arch_src: str, cot_example: str) ->
228228
cot = example_fuse_gelu_cot
229229
kernel = example_fuse_gelu_new
230230
desc = example_fuse_gelu_desc
231-
case "ex_fuse_mnist2":
232-
base = example_fuse_mnist2
233-
cot = example_fuse_mnist2_cot
234-
kernel = example_fuse_mnist2_new
235-
desc = exmaple_fuse_mnist2_desc
231+
case "ex_mnist2":
232+
base = example_mnist2
233+
cot = example_mnist2_cot
234+
kernel = example_mnist2_new
235+
desc = exmaple_mnist2_desc
236236
case "ex_tiled_matmul":
237237
base = example_tiled_matmul
238238
cot = example_tiled_matmul_cot
239239
kernel = example_tiled_matmul_new
240240
desc = example_tiled_matmul_desc
241+
case _:
242+
raise ValueError(f"Invalid CoT example: {cot_example} not found in CoT examples")
241243

242244
# construct example with
243245
# NOTE: we only do one example with CoT for now

0 commit comments

Comments
 (0)