-
Notifications
You must be signed in to change notification settings - Fork 76
/
Copy pathCMakeLists.txt
172 lines (152 loc) · 6.18 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
cmake_minimum_required(VERSION 3.19...3.30)
project(tokenizers_cpp C CXX)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
include(FetchContent)
# update to contain more rust flags
set(TOKENIZERS_CPP_RUST_FLAGS "")
set(TOKENIZERS_CPP_CARGO_TARGET "")
# extra link libraries
set(TOKENIZERS_CPP_LINK_LIBS "")
set(TOKENIZERS_C_LINK_LIBS "")
set(CARGO_EXTRA_ENVS "")
message(STATUS "system-name" ${CMAKE_SYSTEM_NAME})
if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
list(APPEND TOKENIZERS_C_LINK_LIBS ${CMAKE_DL_LIBS})
elseif (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(TOKENIZERS_CPP_CARGO_TARGET wasm32-unknown-emscripten)
elseif (CMAKE_SYSTEM_NAME STREQUAL "iOS")
if (CMAKE_OSX_SYSROOT MATCHES ".*iPhoneSimulator\\.platform.*")
if(CMAKE_OSX_ARCHITECTURES MATCHES "x86_64")
set(TOKENIZERS_CPP_CARGO_TARGET x86_64-apple-ios)
else ()
set(TOKENIZERS_CPP_CARGO_TARGET aarch64-apple-ios-sim)
endif ()
else ()
set(TOKENIZERS_CPP_CARGO_TARGET aarch64-apple-ios)
endif ()
# add extra dependency needed for rust tokenizer in iOS
find_library(FOUNDATION_LIB Foundation)
find_library(SECURITY_LIB Security)
list(APPEND TOKENIZERS_C_LINK_LIBS ${FOUNDATION_LIB} ${SECURITY_LIB})
elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")
set(TOKENIZERS_CPP_CARGO_TARGET aarch64-apple-darwin)
endif()
if (CMAKE_OSX_DEPLOYMENT_TARGET)
set(CARGO_EXTRA_ENVS
MACOSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET}
)
endif()
elseif (CMAKE_SYSTEM_NAME STREQUAL "Android")
if (ANDROID_ABI STREQUAL "arm64-v8a")
set(TOKENIZERS_CPP_CARGO_TARGET aarch64-linux-android)
elseif (ANDROID_ABI STREQUAL "armeabi-v7a")
set(TOKENIZERS_CPP_CARGO_TARGET armv7-linux-androideabi)
elseif (ANDROID_ABI STREQUAL "x86_64")
set(TOKENIZERS_CPP_CARGO_TARGET x86_64-linux-android)
elseif (ANDROID_ABI STREQUAL "x86")
set(TOKENIZERS_CPP_CARGO_TARGET i686-linux-android)
endif()
set(CARGO_EXTRA_ENVS
AR_${TOKENIZERS_CPP_CARGO_TARGET}=${ANDROID_TOOLCHAIN_ROOT}/bin/llvm-ar
CC_${TOKENIZERS_CPP_CARGO_TARGET}=${ANDROID_TOOLCHAIN_ROOT}/bin/${TOKENIZERS_CPP_CARGO_TARGET}${ANDROID_NATIVE_API_LEVEL}-clang
CXX_${TOKENIZERS_CPP_CARGO_TARGET}=${ANDROID_TOOLCHAIN_ROOT}/bin/${TOKENIZERS_CPP_CARGO_TARGET}${ANDROID_NATIVE_API_LEVEL}-clang++
)
elseif (CMAKE_SYSTEM_NAME STREQUAL "Windows")
set(TOKENIZERS_CPP_CARGO_TARGET x86_64-pc-windows-msvc)
endif()
if(WIN32)
list(APPEND TOKENIZERS_C_LINK_LIBS
ntdll wsock32 ws2_32 Bcrypt
iphlpapi userenv psapi
)
endif()
set(TOKENIZERS_CPP_CARGO_FLAGS "")
set(TOKENIZERS_CPP_CARGO_TARGET_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(TOKENIZERS_CPP_CARGO_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
if (NOT TOKENIZERS_CPP_CARGO_TARGET STREQUAL "")
list(APPEND TOKENIZERS_CPP_CARGO_FLAGS --target ${TOKENIZERS_CPP_CARGO_TARGET})
set(TOKENIZERS_CPP_CARGO_BINARY_DIR
"${TOKENIZERS_CPP_CARGO_BINARY_DIR}/${TOKENIZERS_CPP_CARGO_TARGET}")
endif()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(TOKENIZERS_CPP_CARGO_BINARY_DIR "${TOKENIZERS_CPP_CARGO_BINARY_DIR}/debug")
else ()
list(APPEND TOKENIZERS_CPP_CARGO_FLAGS --release)
set(TOKENIZERS_CPP_CARGO_BINARY_DIR "${TOKENIZERS_CPP_CARGO_BINARY_DIR}/release")
endif ()
get_filename_component(TOKENIZERS_CPP_ROOT ${CMAKE_CURRENT_LIST_FILE} DIRECTORY)
set(TOKENIZERS_CPP_CARGO_SOURCE_PATH ${TOKENIZERS_CPP_ROOT}/rust)
option(MSGPACK_USE_BOOST "Use Boost libraried" OFF)
add_subdirectory(msgpack)
option(MLC_ENABLE_SENTENCEPIECE_TOKENIZER "Enable SentencePiece tokenizer" ON)
if(MSVC)
set(TOKENIZERS_RUST_LIB "${TOKENIZERS_CPP_CARGO_BINARY_DIR}/tokenizers_c.lib")
else()
set(TOKENIZERS_RUST_LIB "${TOKENIZERS_CPP_CARGO_BINARY_DIR}/libtokenizers_c.a")
endif()
set(TOKENIZERS_CPP_INCLUDE ${TOKENIZERS_CPP_ROOT}/include)
# Find cargo executable
if(CMAKE_HOST_WIN32)
set(USER_HOME "$ENV{USERPROFILE}")
else()
set(USER_HOME "$ENV{HOME}")
endif()
if(NOT DEFINED CARGO_HOME)
if("$ENV{CARGO_HOME}" STREQUAL "")
set(CARGO_HOME "${USER_HOME}/.cargo")
else()
set(CARGO_HOME "$ENV{CARGO_HOME}")
endif()
endif()
find_program(CARGO_EXECUTABLE cargo
HINTS "${CARGO_HOME}"
PATH_SUFFIXES "bin")
mark_as_advanced(CARGO_EXECUTABLE)
# NOTE: need to use cmake -E env to be portable in win
add_custom_command(
OUTPUT ${TOKENIZERS_RUST_LIB}
COMMAND
${CMAKE_COMMAND} -E env
CARGO_TARGET_DIR=${TOKENIZERS_CPP_CARGO_TARGET_DIR}
${CARGO_EXTRA_ENVS}
RUSTFLAGS="${TOKENIZERS_CPP_RUST_FLAGS}"
${CARGO_EXECUTABLE} build ${TOKENIZERS_CPP_CARGO_FLAGS}
WORKING_DIRECTORY ${TOKENIZERS_CPP_CARGO_SOURCE_PATH}
POST_BUILD COMMAND
${CMAKE_COMMAND} -E copy
${TOKENIZERS_RUST_LIB} "${CMAKE_CURRENT_BINARY_DIR}"
)
set(
TOKENIZER_CPP_SRCS
src/sentencepiece_tokenizer.cc
src/huggingface_tokenizer.cc
src/rwkv_world_tokenizer.cc
)
add_library(tokenizers_cpp STATIC ${TOKENIZER_CPP_SRCS})
target_include_directories(tokenizers_cpp PRIVATE sentencepiece/src)
target_include_directories(tokenizers_cpp PRIVATE msgpack/include)
target_include_directories(tokenizers_cpp PUBLIC ${TOKENIZERS_CPP_INCLUDE})
if (MLC_ENABLE_SENTENCEPIECE_TOKENIZER STREQUAL "ON")
target_compile_definitions(tokenizers_cpp PUBLIC MLC_ENABLE_SENTENCEPIECE_TOKENIZER)
endif ()
target_link_libraries(tokenizers_cpp PRIVATE msgpack-cxx)
# sentencepiece config
option(SPM_ENABLE_SHARED "override sentence piece config" OFF)
option(SPM_ENABLE_TCMALLOC "" OFF)
# provide macro if it does not exist in cmake system
# it is OK to skip those since we do not provide these apps in the ios
# instead just link to the sentencepiece directly
if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
macro (set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE)
set_property (TARGET ${TARGET} PROPERTY
XCODE_ATTRIBUTE_${XCODE_PROPERTY} ${XCODE_VALUE})
endmacro (set_xcode_property)
endif()
add_subdirectory(sentencepiece sentencepiece EXCLUDE_FROM_ALL)
add_library(tokenizers_c INTERFACE ${TOKENIZERS_RUST_LIB})
target_link_libraries(tokenizers_c INTERFACE ${TOKENIZERS_RUST_LIB} ${TOKENIZERS_C_LINK_LIBS})
target_link_libraries(tokenizers_cpp PRIVATE tokenizers_c sentencepiece-static ${TOKENIZERS_CPP_LINK_LIBS})
target_include_directories(tokenizers_cpp PUBLIC ${TOKENIZERS_CPP_INCLUDE})