Skip to content

Commit 03d7e7e

Browse files
author
Timmy
committed
Merge pull request clMathLibraries#80 from TimmyLiu/master
merge develop branch into master branch
2 parents 7cdf31b + 5a4782e commit 03d7e7e

24 files changed

+857
-52
lines changed

.travis.yml

+3-11
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,24 @@ compiler:
55

66
before_install:
77
- sudo apt-get update -qq
8-
- sudo apt-get install -qq fglrx opencl-headers libboost-program-options-dev libgtest-dev
8+
- sudo apt-get install -qq fglrx opencl-headers libboost-program-options-dev
99
# Uncomment below to help verify the installs above work
1010
# - ls -la /usr/lib/libboost*
1111
# - ls -la /usr/include/boost
12-
# - ls -la /usr/src/gtest
13-
14-
install:
15-
- mkdir -p bin/gTest
16-
- cd bin/gTest
17-
- cmake -DCMAKE_BUILD_TYPE=Release /usr/src/gtest
18-
- make
19-
- sudo mv libg* /usr/lib
2012

2113
before_script:
2214
- cd ${TRAVIS_BUILD_DIR}
2315
- mkdir -p bin/clBLAS
2416
- cd bin/clBLAS
25-
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON ../../src
17+
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DCMAKE_INSTALL_PREFIX:PATH=$PWD/package ../../src
2618

2719
script:
2820
- make install
2921
# - ls -Rla package
3022
# Run a simple test to validate that the build works; CPU device in a VM
3123
- cd package/bin
3224
- export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clBLAS/package/lib64:${LD_LIBRARY_PATH}
33-
- ./client --cpu
25+
- ./clBLAS-client --cpu
3426

3527
after_success:
3628
- cd ${TRAVIS_BUILD_DIR}/bin/clBLAS

src/CMakeLists.txt

+33-16
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ option( BUILD_PERFORMANCE "Copy the performance scripts that can measure and gra
2323
option( BUILD_SAMPLE "Build the sample programs" OFF )
2424
option( BUILD_CLIENT "Build a command line clBLAS client program with a variety of configurable parameters (dependency on Boost)" OFF )
2525
option( BUILD_KTEST "A command line tool for testing single clBLAS kernel" ON )
26+
option( BUILD_SHARED_LIBS "Build shared libraries" ON )
2627

2728
# By default test-correctness is linked and tested against ACML library.
2829
# However, test-correctness can instead use NETLIB as a reference library
@@ -50,7 +51,7 @@ if( NOT DEFINED clBLAS_VERSION_MAJOR )
5051
endif( )
5152

5253
if( NOT DEFINED clBLAS_VERSION_MINOR )
53-
set( clBLAS_VERSION_MINOR 2 )
54+
set( clBLAS_VERSION_MINOR 4 )
5455
endif( )
5556

5657
if( NOT DEFINED clBLAS_VERSION_PATCH )
@@ -67,7 +68,7 @@ set( CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${PROJECT_SOURCE_DIR} )
6768

6869
# On windows, it's convenient to change the default install prefix such that it does NOT point to 'program files' (permissions problems)
6970
# Need to check out CMAKE_RUNTIME_OUTPUT_DIRECTORY variable, and see if that eliminates the need to modify install path
70-
if( CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT )
71+
if( WIN32 AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT )
7172
set( CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/package" CACHE PATH "Install path prefix, prepended onto install directories" FORCE )
7273
endif( )
7374

@@ -84,34 +85,33 @@ set( SUFFIX_BIN_DEFAULT "" )
8485

8586
if(TARGET_PLATFORM EQUAL 32 OR TARGET_PLATFORM EQUAL 64)
8687
set(TARGET_PLATFORM ${TARGET_PLATFORM} CACHE STRING "Target platform type (32-bit or 64-bit)" FORCE)
87-
if( TARGET_PLATFORM EQUAL 64 )
88-
set( SUFFIX_LIB_DEFAULT "64" )
89-
endif( )
9088
else()
9189
if(CMAKE_SIZEOF_VOID_P MATCHES 8)
9290
set(TARGET_PLATFORM "64" CACHE STRING "Target platform type (32-bit or 64-bit)" FORCE)
93-
set( SUFFIX_LIB_DEFAULT "64" )
9491
else()
9592
set(TARGET_PLATFORM "32" CACHE STRING "Target platform type (32-bit or 64-bit)" FORCE)
9693
endif()
9794
endif()
9895

99-
set( SUFFIX_LIB ${SUFFIX_LIB_DEFAULT} CACHE STRING "String to append to 'lib' install path" )
100-
set( SUFFIX_BIN ${SUFFIX_BIN_DEFAULT} CACHE STRING "String to append to 'bin' install path" )
101-
102-
if( MSVC_IDE )
103-
set_property( GLOBAL PROPERTY USE_FOLDERS TRUE )
104-
endif( )
105-
10696
message(STATUS "Target platform: ${TARGET_PLATFORM}-bit")
10797
if(TARGET_PLATFORM EQUAL 32)
10898
set(_arch "x86" INTERNAL)
10999
set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS FALSE)
110100
else()
111101
set(_arch "x86_64" INTERNAL)
112102
set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS TRUE)
103+
if( NOT APPLE )
104+
set( SUFFIX_LIB_DEFAULT "64" )
105+
endif( )
113106
endif()
114107

108+
set( SUFFIX_LIB ${SUFFIX_LIB_DEFAULT} CACHE STRING "String to append to 'lib' install path" )
109+
set( SUFFIX_BIN ${SUFFIX_BIN_DEFAULT} CACHE STRING "String to append to 'bin' install path" )
110+
111+
if( MSVC_IDE )
112+
set_property( GLOBAL PROPERTY USE_FOLDERS TRUE )
113+
endif( )
114+
115115
# add the math library for Linux
116116
if( UNIX )
117117
set(MATH_LIBRARY "m")
@@ -197,9 +197,6 @@ endif( )
197197
# This will define OPENCL_FOUND
198198
find_package( OpenCL )
199199

200-
# Find Google Test package
201-
find_package( GTest )
202-
203200
# Find Boost on the system, and configure the type of boost build we want
204201
set( Boost_USE_MULTITHREADED ON )
205202
set( Boost_USE_STATIC_LIBS ON )
@@ -297,6 +294,26 @@ if( BUILD_TEST )
297294
endif( )
298295
endif( )
299296

297+
if(WIN32)
298+
set(destdir CMake)
299+
else()
300+
set(destdir share/clBLAS)
301+
endif()
302+
string(REGEX REPLACE "[^/]+" ".." reldir "${destdir}")
303+
configure_file(
304+
${CMAKE_CURRENT_SOURCE_DIR}/clBLASConfigVersion.cmake.in
305+
${CMAKE_CURRENT_BINARY_DIR}/clBLASConfigVersion.cmake
306+
@ONLY)
307+
configure_file(
308+
${CMAKE_CURRENT_SOURCE_DIR}/clBLASConfig.cmake.in
309+
${CMAKE_CURRENT_BINARY_DIR}/clBLASConfig.cmake
310+
@ONLY)
311+
install(EXPORT Library DESTINATION ${destdir} FILE clBLASTargets.cmake)
312+
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/clBLASConfigVersion.cmake
313+
${CMAKE_CURRENT_BINARY_DIR}/clBLASConfig.cmake
314+
DESTINATION ${destdir})
315+
316+
300317
# The following code is setting variables to control the behavior of CPack to generate our
301318
if( WIN32 )
302319
set( CPACK_SOURCE_GENERATOR "ZIP" )

src/FindOpenCL.cmake

+6
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ find_path(OPENCL_INCLUDE_DIRS
5656
PATHS
5757
/usr/include
5858
/usr/local/include
59+
/usr/local/cuda/include
60+
/opt/cuda/include
5961
DOC "OpenCL header file path"
6062
)
6163
mark_as_advanced( OPENCL_INCLUDE_DIRS )
@@ -74,6 +76,8 @@ if( LIB64 )
7476
PATH_SUFFIXES x86_64 x64
7577
PATHS
7678
/usr/lib
79+
/usr/local/cuda/lib
80+
/opt/cuda/lib
7781
)
7882
else( )
7983
find_library( OPENCL_LIBRARIES
@@ -86,6 +90,8 @@ else( )
8690
PATH_SUFFIXES x86 Win32
8791
PATHS
8892
/usr/lib
93+
/usr/local/cuda/lib
94+
/opt/cuda/lib
8995
)
9096
endif( )
9197
mark_as_advanced( OPENCL_LIBRARIES )

src/clBLASConfig.cmake.in

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
include(${CMAKE_CURRENT_LIST_DIR}/clBLASTargets.cmake)
2+
get_filename_component(CLBLAS_INCLUDE_DIRS ${CMAKE_CURRENT_LIST_DIR}/@reldir@/include ABSOLUTE)
3+
set(CLBLAS_LIBRARIES clBLAS)

src/clBLASConfigVersion.cmake.in

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# This is a basic version file for the Config-mode of find_package().
2+
# It is used by write_basic_package_version_file() as input file for configure_file()
3+
# to create a version-file which can be installed along a config.cmake file.
4+
#
5+
# The created file sets PACKAGE_VERSION_EXACT if the current version string and
6+
# the requested version string are exactly the same and it sets
7+
# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
8+
# but only if the requested major version is the same as the current one.
9+
# The variable CLBLAS_VERSION must be set before calling configure_file().
10+
11+
12+
set(PACKAGE_VERSION "@CLBLAS_VERSION@")
13+
14+
if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}" )
15+
set(PACKAGE_VERSION_COMPATIBLE FALSE)
16+
else()
17+
18+
if("@CLBLAS_VERSION@" MATCHES "^([0-9]+)\\.")
19+
set(CLBLAS_VERSION_MAJOR "${CMAKE_MATCH_1}")
20+
else()
21+
set(CLBLAS_VERSION_MAJOR "@CLBLAS_VERSION@")
22+
endif()
23+
24+
if("${PACKAGE_FIND_VERSION_MAJOR}" STREQUAL "${CLBLAS_VERSION_MAJOR}")
25+
set(PACKAGE_VERSION_COMPATIBLE TRUE)
26+
else()
27+
set(PACKAGE_VERSION_COMPATIBLE FALSE)
28+
endif()
29+
30+
if( "${PACKAGE_FIND_VERSION}" STREQUAL "${PACKAGE_VERSION}")
31+
set(PACKAGE_VERSION_EXACT TRUE)
32+
endif()
33+
endif()
34+
35+
36+
# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
37+
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "@CMAKE_SIZEOF_VOID_P@" STREQUAL "")
38+
return()
39+
endif()
40+
41+
# check that the installed version has the same 32/64bit-ness as the one which is currently searching:
42+
if(NOT "${CMAKE_SIZEOF_VOID_P}" STREQUAL "@CMAKE_SIZEOF_VOID_P@")
43+
math(EXPR installedBits "@CMAKE_SIZEOF_VOID_P@ * 8")
44+
set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
45+
set(PACKAGE_VERSION_UNSUITABLE TRUE)
46+
endif()

src/client/CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ include_directories(
5252

5353
add_executable(client ${CLIENT_SRC} ${CLIENT_HEADER})
5454
target_link_libraries(client ${Boost_LIBRARIES} ${OPENCL_LIBRARIES} clBLAS)
55-
set_target_properties( client PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging" )
55+
set_target_properties( client PROPERTIES
56+
RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/staging"
57+
OUTPUT_NAME clBLAS-client )
5658

5759
add_executable(testPerfWrapper ${WRAPPER_SRC})
5860
target_link_libraries(testPerfWrapper ${Boost_LIBRARIES})

src/include/granulation.h

+2
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ typedef struct PGranularity {
4646
unsigned int wfSize;
4747
/** Record number of work-groups spawned */
4848
unsigned int numWGSpawned[2];
49+
/** max number of work group size */
50+
unsigned int maxWorkGroupSize;
4951
} PGranularity;
5052

5153
/**

src/library/CMakeLists.txt

+6-5
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ endif()
279279

280280
include( ExternalProject )
281281
ExternalProject_Add( tplgen
282-
URL "${CMAKE_SOURCE_DIR}/library/tools/tplgen"
282+
URL "${PROJECT_SOURCE_DIR}/library/tools/tplgen"
283283
INSTALL_COMMAND ""
284284
)
285285

@@ -294,7 +294,7 @@ endif()
294294

295295
add_custom_target( GENERATE_CLT
296296
COMMAND ${tplgenBinaryDir}/tplgen -o ${clBLAS_BINARY_DIR}/include ${SRC_CL_TEMPLATES}
297-
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/library/blas/gens/clTemplates
297+
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/library/blas/gens/clTemplates
298298
)
299299

300300
add_dependencies( GENERATE_CLT tplgen )
@@ -307,7 +307,7 @@ if( CMAKE_COMPILER_IS_GNUCC )
307307
DESTINATION lib${SUFFIX_LIB}/pkgconfig )
308308
endif( )
309309

310-
add_library(clBLAS SHARED ${CLBLAS_SOURCES} ${GLOBAL_HEADERS} ${SRC_BLAS_HEADERS} ${SRC_BLAS_GENS_HEADERS})
310+
add_library(clBLAS ${CLBLAS_SOURCES} ${GLOBAL_HEADERS} ${SRC_BLAS_HEADERS} ${SRC_BLAS_GENS_HEADERS})
311311
add_dependencies(clBLAS GENERATE_CLT)
312312
set_target_properties(clBLAS PROPERTIES VERSION ${clBLAS_VERSION})
313313
set_target_properties(clBLAS PROPERTIES SOVERSION ${clBLAS_SOVERSION})
@@ -316,8 +316,9 @@ target_link_libraries(clBLAS ${OPENCL_LIBRARIES} ${MATH_LIBRARY})
316316

317317
# CPack configuration; include the executable into the package
318318
install( TARGETS clBLAS
319-
RUNTIME DESTINATION bin${SUFFIX_BIN}
320-
LIBRARY DESTINATION lib${SUFFIX_LIB}
319+
EXPORT Library
320+
RUNTIME DESTINATION bin${SUFFIX_BIN}
321+
LIBRARY DESTINATION lib${SUFFIX_LIB}
321322
ARCHIVE DESTINATION lib${SUFFIX_LIB}/import
322323
)
323324

src/library/blas/generic/common.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,9 @@ setupBuildOpts(
527527
opts[0] = '\0';
528528

529529
#if !defined NDEBUG
530-
addBuildOpt(opts, BUILD_OPTS_MAXLEN, "-g");
530+
// Nvidia runtime does not appear to support the -g flag, at least in their OpenCL v1.1 runtime
531+
if( target.ident.vendor != VENDOR_NVIDIA )
532+
addBuildOpt( opts, BUILD_OPTS_MAXLEN, "-g" );
531533
#endif /* NDEBUG */
532534

533535
if (target.ident.vendor == VENDOR_NVIDIA &&

src/library/blas/gens/blas_kgen.h

+15
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
#include "tile.h"
5353
#include "fetch.h"
5454

55+
5556
#define BLAS_KGEN_FORMAT 1
5657

5758
#define genInternalLoopEnd(ctx) kgenEndBranch(ctx, NULL)
@@ -539,6 +540,18 @@ sprintfComplexMulUpdate(
539540
bool conjB,
540541
TileMulCore core);
541542

543+
void
544+
sprintfComplexMulUpdate_syr2k_beta0(
545+
Kstring *expr,
546+
const Kstring *dst,
547+
const Kstring *a,
548+
const Kstring *b,
549+
const Kstring *c,
550+
bool isDouble,
551+
bool conjA,
552+
bool conjB,
553+
TileMulCore core);
554+
542555
/**
543556
* @brief Sprintf expression of fast scalar mad
544557
*
@@ -892,4 +905,6 @@ checkGenRestoreTailCoords(
892905
UpdateResultFlags
893906
tailStatusToUpresFlags(TailStatus status);
894907

908+
909+
895910
#endif /* BLAS_KGEN_H_ */

src/library/blas/gens/fetch.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ sprintfNormalizedBaseCoord(
359359
kstrcpy(kstr, name);
360360
}
361361
else {
362-
ksprintf(kstr, "(%s >> %d)", name, shift);
362+
ksprintf(kstr, "(uint)(%s >> %d)", name, shift);
363363
}
364364
}
365365

@@ -515,7 +515,7 @@ sprintfLeadingDimension(Kstring *ld, const FetchContext *fctx)
515515

516516
shift = findHighestSetBit(fctx->physTile.vecLen);
517517
if (shift != 0) {
518-
ksprintf(ld, "(%s >> %d)", varName, shift);
518+
ksprintf(ld, "(uint)(%s >> %d)", varName, shift);
519519
done = true;
520520
}
521521
}
@@ -564,10 +564,10 @@ sprintfGboundK(Kstring *kstr, const FetchContext *fctx)
564564
}
565565
else {
566566
if (fctx->addrMode & FETCH_ADDR_TAILK_PADD) {
567-
ksprintf(kstr, "((%s + %u) >> %d)", varK, vecLen - 1, shift);
567+
ksprintf(kstr, "(uint)((%s + %u) >> %d)", varK, vecLen - 1, shift);
568568
}
569569
else {
570-
ksprintf(kstr, "(%s >> %d)", varK, shift);
570+
ksprintf(kstr, "(uint)(%s >> %d)", varK, shift);
571571
}
572572
}
573573
}

src/library/blas/gens/gemm.c

+10-3
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ blockGen(
560560
kgenAddStmt(ctx, tmp);
561561
}
562562
else {
563-
sprintf(globalIdB, "get_global_id(%d)", 1-i);
563+
sprintf(globalIdB, "(uint)get_global_id(%d)", 1-i);
564564
}
565565

566566
if (!(isColMajA || isColMajB)) {
@@ -758,7 +758,7 @@ subgGen(
758758
vecLenA = gset.tileA.vecLen;
759759

760760
// channel offset based coordinate
761-
ksprintf(&exprK, "( get_group_id(0)*%lu + k )", staggered/vecLenA*vecLenA);
761+
ksprintf(&exprK, "( (uint)(get_group_id(0))*%lu + k )", staggered/vecLenA*vecLenA);
762762

763763
// starting code generation--------------------------------------------------
764764
pCtx = createKgenContext(pBuf, buflen, true);
@@ -1104,6 +1104,8 @@ blockCheckCalcDecomp(
11041104
int check)
11051105
{
11061106
bool ret = true;
1107+
bool ret_multiple = false;
1108+
int i;
11071109

11081110
DUMMY_ARG_USAGE(subdimsNum);
11091111

@@ -1114,7 +1116,12 @@ blockCheckCalcDecomp(
11141116
minSize = (dtype == TYPE_COMPLEX_DOUBLE) ? 1 : 2;
11151117
ret = decompSanityCheck(subdims, minSize, maxSize, 24, dtype, true);
11161118
ret = ret && (subdims[0].bwidth == subdims[1].bwidth);
1117-
ret = ret && (pgran->wgSize[0] * pgran->wgSize[1] == 64);
1119+
for(i = 0; i < ( (pgran->maxWorkGroupSize) / (pgran->wfSize) ); i++)
1120+
{
1121+
// returns true if wgSize[0] * wgSize[1] is multiples of the 64 but not bigger than maxWorkGroupSize
1122+
ret_multiple = ret_multiple || ( pgran->wgSize[0] * pgran->wgSize[1] == pgran->wfSize * (i + 1) );
1123+
}
1124+
ret = ret && ret_multiple;
11181125
}
11191126
else {
11201127
calcPgranDedicated(pgran, subdims, 1, 3);

0 commit comments

Comments
 (0)