@@ -402,12 +402,26 @@ if (LLAMA_CUBLAS)
402402endif ()
403403
404404if (LLAMA_CUDA)
405- cmake_minimum_required (VERSION 3.17)
405+ cmake_minimum_required (VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES
406406
407407 find_package (CUDAToolkit)
408408 if (CUDAToolkit_FOUND)
409409 message (STATUS "CUDA found" )
410410
411+ if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
412+ # 52 == lowest CUDA 12 standard
413+ # 60 == f16 CUDA intrinsics
414+ # 61 == integer CUDA intrinsics
415+ # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
416+ if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
417+ set (CMAKE_CUDA_ARCHITECTURES "60;61;70" ) # needed for f16 CUDA intrinsics
418+ else ()
419+ set (CMAKE_CUDA_ARCHITECTURES "52;61;70" ) # lowest CUDA 12 standard + lowest for integer intrinsics
420+ #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work
421+ endif ()
422+ endif ()
423+ message (STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES} " )
424+
411425 enable_language (CUDA)
412426
413427 set (GGML_HEADERS_CUDA ggml-cuda.h)
@@ -472,21 +486,6 @@ if (LLAMA_CUDA)
472486 else ()
473487 set (LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ...
474488 endif ()
475-
476- if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
477- # 52 == lowest CUDA 12 standard
478- # 60 == f16 CUDA intrinsics
479- # 61 == integer CUDA intrinsics
480- # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
481- if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
482- set (CMAKE_CUDA_ARCHITECTURES "60;61;70" ) # needed for f16 CUDA intrinsics
483- else ()
484- set (CMAKE_CUDA_ARCHITECTURES "52;61;70" ) # lowest CUDA 12 standard + lowest for integer intrinsics
485- #set(CMAKE_CUDA_ARCHITECTURES "") # use this to compile much faster, but only F16 models work
486- endif ()
487- endif ()
488- message (STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES} " )
489-
490489 else ()
491490 message (WARNING "CUDA not found" )
492491 endif ()
0 commit comments