Skip to content

[RFC] Redesigned CMake build infrastructure for C++ API #944

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
573 changes: 104 additions & 469 deletions CMakeLists.txt

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions cmake/Components.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Define the component structure
set(FLASHINFER_COMPONENTS "Headers")

if(FLASHINFER_BUILD_KERNELS)
list(APPEND FLASHINFER_COMPONENTS "Kernels")

if(FLASHINFER_TVM_BINDING)
list(APPEND FLASHINFER_COMPONENTS "TVMBinding")
endif()
endif()

if(FLASHINFER_DISTRIBUTED)
list(APPEND FLASHINFER_COMPONENTS "Distributed")
endif()
142 changes: 142 additions & 0 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# === Required Dependencies for Core Functionality ===
find_package(CUDAToolkit REQUIRED)
find_package(Python3 REQUIRED)
if(NOT Python3_FOUND)
message(
FATAL_ERROR
"Python3 not found it is required to generate the kernel sources.")
endif()

find_package(Thrust REQUIRED)

# === Test Dependencies ===
if(FLASHINFER_UNITTESTS)
include(FetchContent)

# Google Test for unit testing
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG 6910c9d9165801d8827d628cb72eb7ea9dd538c5 # release-1.16.0
FIND_PACKAGE_ARGS NAMES GTest)
FetchContent_MakeAvailable(googletest)
endif()

# === Benchmark Dependencies ===
if(FLASHINFER_CXX_BENCHMARKS)
include(FetchContent)

# NVBench for GPU benchmarking
FetchContent_Declare(
nvbench
GIT_REPOSITORY https://github.com/NVIDIA/nvbench.git
GIT_TAG c03033b50e46748207b27685b1cdfcbe4a2fec59)
FetchContent_MakeAvailable(nvbench)
endif()

# === Boost Dependency for FP16 QK Reductions ===
if(FLASHINFER_GEN_USE_FP16_QK_REDUCTIONS)
include(FetchContent)
set(BOOST_ENABLE_CMAKE ON)
FetchContent_Declare(boost_math
GIT_REPOSITORY https://github.com/boostorg/math.git)
FetchContent_MakeAvailable(boost_math)

set(USE_FP16_QK_REDUCTIONS "true")
message(STATUS "USE_FP16_QK_REDUCTIONS=${USE_FP16_QK_REDUCTIONS}")
else()
set(USE_FP16_QK_REDUCTIONS "false")
message(STATUS "USE_FP16_QK_REDUCTIONS=${USE_FP16_QK_REDUCTIONS}")
endif()

# === Distributed component dependencies ===
if(FLASHINFER_DISTRIBUTED OR FLASHINFER_DIST_UNITTESTS)
include(FetchContent)
FetchContent_Declare(
mscclpp
GIT_REPOSITORY https://github.com/microsoft/mscclpp.git
GIT_TAG 11e62024d3eb190e005b4689f8c8443d91a6c82e)
FetchContent_MakeAvailable(mscclpp)

# Create alias for distributed component
if(NOT TARGET flashinfer::mscclpp)
add_library(flashinfer::mscclpp ALIAS mscclpp)
endif()

# Fetch spdlog for distributed tests (header-only usage)
FetchContent_Declare(
spdlog
GIT_REPOSITORY https://github.com/gabime/spdlog.git
GIT_TAG f355b3d58f7067eee1706ff3c801c2361011f3d5 # release-1.15.1
FIND_PACKAGE_ARGS NAMES spdlog)

# Use Populate instead of MakeAvailable since we only need the headers
FetchContent_Populate(spdlog)

# Set the include directory for later use
set(SPDLOG_INCLUDE_DIR "${spdlog_SOURCE_DIR}/include")
message(STATUS "Using spdlog from ${SPDLOG_INCLUDE_DIR}")

find_package(MPI REQUIRED)
endif()

# === FP8 Dependencies ===
if(FLASHINFER_FP8_TESTS OR FLASHINFER_FP8_BENCHMARKS)
# Verify CUDA architecture is SM90 or higher
if(NOT CMAKE_CUDA_ARCHITECTURES STREQUAL "90"
AND NOT CMAKE_CUDA_ARCHITECTURES STREQUAL "90a")
message(
FATAL_ERROR "FP8 tests/benchmarks require SM90 or higher architecture")
endif()

# Find PyTorch which is required for FP8 features
find_package(Torch REQUIRED)
if(NOT Torch_FOUND)
message(
FATAL_ERROR "PyTorch is required for FP8 tests/benchmarks but not found")
endif()
message(STATUS "Found PyTorch: ${TORCH_INCLUDE_DIRS}")

# Fetch Flash Attention repository with specific commit
include(FetchContent)
FetchContent_Declare(
flash_attention
GIT_REPOSITORY https://github.com/Dao-AILab/flash-attention.git
GIT_TAG 29ef580560761838c0e9e82bc0e98d04ba75f949)
FetchContent_Populate(flash_attention)

# Set Flash Attention 3 include directory
set(FA3_INCLUDE_DIR "${flash_attention_SOURCE_DIR}/csrc/flash_attn/hopper")
message(STATUS "Flash Attention 3 source directory: ${FA3_INCLUDE_DIR}")

# Compile Flash Attention 3 kernel library
file(GLOB FA3_IMPL_FILES "${FA3_INCLUDE_DIR}/flash_fwd_*.cu")
endif()

# === TVM Binding dependencies ===
if(FLASHINFER_TVM_BINDING)
# Resolve TVM source directory
if(NOT FLASHINFER_TVM_SOURCE_DIR STREQUAL "")
set(TVM_SOURCE_DIR_SET ${FLASHINFER_TVM_SOURCE_DIR})
elseif(DEFINED ENV{TVM_SOURCE_DIR})
set(TVM_SOURCE_DIR_SET $ENV{TVM_SOURCE_DIR})
elseif(DEFINED ENV{TVM_HOME})
set(TVM_SOURCE_DIR_SET $ENV{TVM_HOME})
else()
message(
FATAL_ERROR
"TVM source directory not found. Set FLASHINFER_TVM_SOURCE_DIR.")
endif()
endif()

# === CUTLASS Configuration ===
if(FLASHINFER_CUTLASS_DIR)
list(APPEND CMAKE_PREFIX_PATH ${FLASHINFER_CUTLASS_DIR})

set(CUTLASS_INCLUDE_DIRS ${FLASHINFER_CUTLASS_DIR}/include
${FLASHINFER_CUTLASS_DIR}/tools/util/include)
message(STATUS "Using CUTLASS from ${FLASHINFER_CUTLASS_DIR}")
else()
message(
FATAL_ERROR "FLASHINFER_CUTLASS_DIR must be set to the path of CUTLASS")
endif()
86 changes: 86 additions & 0 deletions cmake/Options.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# cmake-format: off
# NOTE:
# a) Do not modify this file to change option values. Options should be
# configured using either a config.cmake file (refer the default file
# inside the cmake folder), or by setting the required -DFLASHINFER_XXX
# option through command-line.
#
# b) This file should only contain option definitions and should not contain
# any other CMake commands.
#
# c) All new options should be defined here with a default value and a short
# description.
#
# d) Add new options under the appropriate section.

# === COMPONENT OPTIONS ===
flashinfer_option(FLASHINFER_BUILD_KERNELS "Build and install kernel libraries" OFF)
flashinfer_option(FLASHINFER_TVM_BINDING "Build TVM binding support" OFF)
flashinfer_option(FLASHINFER_DISTRIBUTED "Build distributed support" OFF)

# === DATA TYPE OPTIONS ===
flashinfer_option(FLASHINFER_ENABLE_FP8 "Enable FP8 data type support" ON)
flashinfer_option(FLASHINFER_ENABLE_FP8_E4M3 "Enable FP8 E4M3 format specifically" ON)
flashinfer_option(FLASHINFER_ENABLE_FP8_E5M2 "Enable FP8 E5M2 format specifically" ON)
flashinfer_option(FLASHINFER_ENABLE_F16 "Enable F16 data type support" ON)
flashinfer_option(FLASHINFER_ENABLE_BF16 "Enable BF16 data type support" ON)

# === CODE GENERATION OPTIONS ===
flashinfer_option(FLASHINFER_GEN_HEAD_DIMS "Head dimensions to enable" 64 128 256)
flashinfer_option(FLASHINFER_GEN_POS_ENCODING_MODES "Position encoding modes to enable" 0 1 2)
flashinfer_option(FLASHINFER_GEN_MASK_MODES "Mask modes to enable" 0 1 2)
flashinfer_option(FLASHINFER_GEN_USE_FP16_QK_REDUCTIONS "Use FP16 for QK reductions" OFF)
flashinfer_option(FLASHINFER_SM90_ALLOWED_HEAD_DIMS "64,64" "128,128" "256,256" "192,128")

# === BUILD TYPE OPTIONS ===
flashinfer_option(FLASHINFER_UNITTESTS "Build unit tests" OFF)
flashinfer_option(FLASHINFER_CXX_BENCHMARKS "Build benchmarks" OFF)
flashinfer_option(FLASHINFER_DIST_UNITTESTS "Build distributed unit tests" OFF)

# === FEATURE-SPECIFIC TESTS/BENCHMARKS ===
flashinfer_option(FLASHINFER_FP8_TESTS "Build FP8 tests" OFF)
flashinfer_option(FLASHINFER_FP8_BENCHMARKS "Build FP8 benchmarks" OFF)

# === ARCHITECTURE OPTIONS ===
flashinfer_option(FLASHINFER_CUDA_ARCHITECTURES "CUDA architectures to compile for" "")

# === PATH OPTIONS ===
flashinfer_option(FLASHINFER_CUTLASS_DIR "Path to CUTLASS installation" "")
flashinfer_option(FLASHINFER_TVM_SOURCE_DIR "Path to TVM source directory" "")

# === AUTO-DERIVED OPTIONS ===
# Handle CUDA architectures
if(FLASHINFER_CUDA_ARCHITECTURES)
message(STATUS "CMAKE_CUDA_ARCHITECTURES set to ${FLASHINFER_CUDA_ARCHITECTURES}.")
set(CMAKE_CUDA_ARCHITECTURES ${FLASHINFER_CUDA_ARCHITECTURES})
endif()

# Handle automatic enabling of dependent features
if(FLASHINFER_FP8_TESTS)
set(FLASHINFER_UNITTESTS ON CACHE BOOL "Tests enabled for FP8" FORCE)
endif()

if(FLASHINFER_FP8_BENCHMARKS)
set(FLASHINFER_CXX_BENCHMARKS ON CACHE BOOL "Benchmarks enabled for FP8" FORCE)
endif()

if(FLASHINFER_DIST_UNITTESTS)
set(FLASHINFER_UNITTESTS ON CACHE BOOL "Tests enabled for distributed" FORCE)
endif()

if(FLASHINFER_TVM_BINDING AND NOT FLASHINFER_BUILD_KERNELS)
message(FATAL_ERROR "TVM binding requires FLASHINFER_BUILD_KERNELS to be ON")
endif()

if(FLASHINFER_ENABLE_FP8)
# Enable both FP8 formats when FP8 is enabled
set(FLASHINFER_ENABLE_FP8_E4M3 ON CACHE BOOL "Enable FP8 E4M3 format" FORCE)
set(FLASHINFER_ENABLE_FP8_E5M2 ON CACHE BOOL "Enable FP8 E5M2 format" FORCE)
endif()

# Ensure FP8 is enabled for FP8 tests/benchmarks
if(FLASHINFER_FP8_TESTS OR FLASHINFER_FP8_BENCHMARKS)
set(FLASHINFER_ENABLE_FP8 ON CACHE BOOL "FP8 enabled for tests/benchmarks" FORCE)
set(FLASHINFER_ENABLE_FP8_E4M3 ON CACHE BOOL "FP8_E4M3 enabled for tests/benchmarks" FORCE)
endif()
# cmake-format: on
72 changes: 72 additions & 0 deletions cmake/flashinferConfig.cmake.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
@PACKAGE_INIT@

include(CMakeFindDependencyMacro)
# Required dependencies for FlashInfer headers
find_dependency(CUDAToolkit REQUIRED)

# Optional dependencies based on components
if("Distributed" IN_LIST flashinfer_FIND_COMPONENTS)
find_dependency(MPI REQUIRED)
find_package(mscclpp REQUIRED)
endif()

# Define available components passed from main CMakeLists.txt
set(_flashinfer_available_components @FLASHINFER_COMPONENTS@)

# Initialize component found status
foreach(_comp Headers Kernels TVMBinding Distributed)
set(flashinfer_${_comp}_FOUND FALSE)
endforeach()

# Mark available components as found
foreach(_comp ${_flashinfer_available_components})
set(flashinfer_${_comp}_FOUND TRUE)
endforeach()

# Check for requested components
foreach(_comp ${flashinfer_FIND_COMPONENTS})
if(NOT _comp IN_LIST _flashinfer_available_components)
set(flashinfer_FOUND False)
set(flashinfer_NOT_FOUND_MESSAGE "Requested component: ${_comp} is not available")
return()
endif()
endforeach()

# Headers component is always available and included
set(flashinfer_Headers_FOUND TRUE)

# Include kernel targets if available and requested
if(flashinfer_Kernels_FOUND AND
("Kernels" IN_LIST flashinfer_FIND_COMPONENTS OR NOT flashinfer_FIND_COMPONENTS))
include("${CMAKE_CURRENT_LIST_DIR}/libflashinferTargets.cmake" OPTIONAL)
endif()

# Include TVM binding targets if available and requested
if(flashinfer_TVMBinding_FOUND AND
"TVMBinding" IN_LIST flashinfer_FIND_COMPONENTS)
include("${CMAKE_CURRENT_LIST_DIR}/flashinferTVMBindingTargets.cmake" OPTIONAL)
endif()

# Include Distributed targets if available and requested
if(flashinfer_Distributed_FOUND AND
"Distributed" IN_LIST flashinfer_FIND_COMPONENTS)
# Create aliases to make usage consistent
if(NOT TARGET flashinfer::dist)
add_library(flashinfer::dist INTERFACE IMPORTED)
set_target_properties(flashinfer::dist PROPERTIES
INTERFACE_LINK_LIBRARIES mscclpp)
endif()

include("${CMAKE_CURRENT_LIST_DIR}/flashinferDistTargets.cmake")
endif()

# Set include directories
set(FLASHINFER_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/include")

# Print status message
if(NOT flashinfer_FIND_QUIETLY)
message(STATUS "Found flashinfer: ${PACKAGE_PREFIX_DIR} (version: ${flashinfer_VERSION})")
message(STATUS "Available components: ${_flashinfer_available_components}")
endif()

check_required_components(flashinfer)
36 changes: 36 additions & 0 deletions cmake/utils/CalculateSM90HeadDims.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Computed the head dims for Nvidia sm90 devices based on the
# FLASHINFER_GEN_HEAD_DIMS and FLASHINFER_SM90_ALLOWED_HEAD_DIMS options
function(flashinfer_compute_sm90_head_dims)
set(oneValueArgs RESULT)
cmake_parse_arguments(ARG "${options}" "${oneValueArgs}" "${multiValueArgs}"
${ARGN})

if(NOT DEFINED ARG_RESULT)
message(
FATAL_ERROR "flashinfer_compute_sm90_head_dims: missing RESULT argument")
endif()

set(HEAD_DIMS_SM90 "")

foreach(DIM_VAL ${FLASHINFER_GEN_HEAD_DIMS})
string(CONCAT TUPLE_VAL "${DIM_VAL}" "," "${DIM_VAL}")
list(FIND FLASHINFER_SM90_ALLOWED_HEAD_DIMS ${TUPLE_VAL} RESULT)
if(NOT ${RESULT} EQUAL -1)
list(APPEND HEAD_DIMS_SM90 ${TUPLE_VAL})
endif()
endforeach()

foreach(TUPLE_VAL ${FLASHINFER_SM90_ALLOWED_HEAD_DIMS})
string(REPLACE "," ";" HEAD_DIMS_LIST ${TUPLE_VAL})
list(GET HEAD_DIMS_LIST 0 K)
list(GET HEAD_DIMS_LIST 1 V)
if(NOT K EQUAL V)
list(APPEND HEAD_DIMS_SM90 ${TUPLE_VAL})
endif()
endforeach()

list(REMOVE_DUPLICATES HEAD_DIMS_SM90)
set(${ARG_RESULT}
${HEAD_DIMS_SM90}
PARENT_SCOPE)
endfunction()
Loading