@@ -22,24 +22,22 @@ BUILD_DIR:= $(ROOT_DIR)/build
2222FILES_CUDA := $(CSRC ) /ops.cu $(CSRC ) /kernels.cu
2323FILES_CPP := $(CSRC ) /common.cpp $(CSRC ) /cpu_ops.cpp $(CSRC ) /pythonInterface.c
2424
25- INCLUDE := -I $(CUDA_HOME ) /include -I $(ROOT_DIR ) /csrc -I $(CONDA_PREFIX ) /include -I $(ROOT_DIR ) /dependencies/cub -I $(ROOT_DIR ) /include
25+ INCLUDE := -I $(CUDA_HOME ) /include -I $(ROOT_DIR ) /csrc -I $(CONDA_PREFIX ) /include -I $(ROOT_DIR ) /include
26+ INCLUDE_10x := -I $(CUDA_HOME ) /include -I $(ROOT_DIR ) /csrc -I $(ROOT_DIR ) /dependencies/cub -I $(ROOT_DIR ) /include
2627LIB := -L $(CUDA_HOME ) /lib64 -lcudart -lcublas -lcublasLt -lcurand -lcusparse -L $(CONDA_PREFIX ) /lib
2728
2829# NVIDIA NVCC compilation flags
29- COMPUTE_CAPABILITY := -gencode arch=compute_35,code=sm_35 # Kepler
30- COMPUTE_CAPABILITY += -gencode arch=compute_37,code=sm_37 # Kepler
3130COMPUTE_CAPABILITY += -gencode arch=compute_50,code=sm_50 # Maxwell
3231COMPUTE_CAPABILITY += -gencode arch=compute_52,code=sm_52 # Maxwell
3332COMPUTE_CAPABILITY += -gencode arch=compute_60,code=sm_60 # Pascal
3433COMPUTE_CAPABILITY += -gencode arch=compute_61,code=sm_61 # Pascal
3534COMPUTE_CAPABILITY += -gencode arch=compute_70,code=sm_70 # Volta
3635COMPUTE_CAPABILITY += -gencode arch=compute_72,code=sm_72 # Volta
3736
38- # CUDA 9.2 supports CC 3.0, but CUDA >= 11.0 does not
39- CC_CUDA92 : = -gencode arch=compute_30 ,code=sm_30
37+ CC_KEPLER := -gencode arch=compute_35,code=sm_35 # Kepler
38+ CC_KEPLER + = -gencode arch=compute_37 ,code=sm_37 # Kepler
4039
4140# Later versions of CUDA support the new architectures
42- CC_CUDA10x := -gencode arch=compute_30,code=sm_30
4341CC_CUDA10x += -gencode arch=compute_75,code=sm_75
4442
4543CC_CUDA110 := -gencode arch=compute_75,code=sm_75
@@ -49,37 +47,46 @@ CC_CUDA11x := -gencode arch=compute_75,code=sm_75
4947CC_CUDA11x += -gencode arch=compute_80,code=sm_80
5048CC_CUDA11x += -gencode arch=compute_86,code=sm_86
5149
50+
5251CC_cublasLt110 := -gencode arch=compute_75,code=sm_75
5352CC_cublasLt110 += -gencode arch=compute_80,code=sm_80
5453
5554CC_cublasLt111 := -gencode arch=compute_75,code=sm_75
5655CC_cublasLt111 += -gencode arch=compute_80,code=sm_80
5756CC_cublasLt111 += -gencode arch=compute_86,code=sm_86
5857
58+ CC_ADA_HOPPER := -gencode arch=compute_89,code=sm_89
59+ CC_ADA_HOPPER += -gencode arch=compute_90,code=sm_90
60+
5961
6062all : $(ROOT_DIR ) /dependencies/cub $(BUILD_DIR ) env
61- $(NVCC ) $(COMPUTE_CAPABILITY ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR )
62- $(NVCC ) $(COMPUTE_CAPABILITY ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
63+ $(NVCC ) $(COMPUTE_CAPABILITY ) $( CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR )
64+ $(NVCC ) $(COMPUTE_CAPABILITY ) $( CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
6365 $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) .so $(LIB )
6466
6567cuda92 : $(ROOT_DIR ) /dependencies/cub $(BUILD_DIR ) env
66- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA92 ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
67- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA92 ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
68+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA92 ) $( CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
69+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA92 ) $( CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
6870 $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
6971
7072cuda10x_nomatmul : $(ROOT_DIR ) /dependencies/cub $(BUILD_DIR ) env
71- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA10x ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
72- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA10x ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
73+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA10x ) $( CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE_10x ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
74+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA10x ) $( CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
7375 $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
7476
7577cuda110_nomatmul : $(BUILD_DIR ) env
76- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA110 ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
77- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA110 ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
78+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA110 ) $( CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
79+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA110 ) $( CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
7880 $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
7981
8082cuda11x_nomatmul : $(BUILD_DIR ) env
81- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
82- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
83+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) $(CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
84+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) $(CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
85+ $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
86+
87+ cuda12x_nomatmul : $(BUILD_DIR ) env
88+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) $(CC_ADA_HOPPER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
89+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) $(CC_ADA_HOPPER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
8390 $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
8491
8592cuda110 : $(BUILD_DIR ) env
@@ -92,6 +99,11 @@ cuda11x: $(BUILD_DIR) env
9299 $(NVCC ) $(CC_cublasLt111 ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
93100 $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) .so $(LIB )
94101
102+ cuda12x : $(BUILD_DIR ) env
103+ $(NVCC ) $(CC_cublasLt111 ) $(CC_ADA_HOPPER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR )
104+ $(NVCC ) $(CC_cublasLt111 ) $(CC_ADA_HOPPER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
105+ $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) .so $(LIB )
106+
95107cpuonly : $(BUILD_DIR ) env
96108 $(GPP ) -std=c++14 -shared -fPIC -I $(ROOT_DIR ) /csrc -I $(ROOT_DIR ) /include $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cpu.so
97109
0 commit comments