diff --git a/CMakeLists.txt b/CMakeLists.txt index 73928c4a..d097ed6b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,7 @@ if(CMAKE_VERSION VERSION_LESS 3.20) endif() include(CheckIncludeFile) include(CheckCCompilerFlag) +include(CheckSymbolExists) include(GenerateExportHeader) include(CMakePackageConfigHelpers) include(GNUInstallDirs) @@ -168,7 +169,8 @@ target_link_libraries(zone-bench PRIVATE zone) check_include_file(endian.h HAVE_ENDIAN_H) check_include_file(unistd.h HAVE_UNISTD_H) -if(NOT HAVE_UNISTD_H) +check_symbol_exists(getopt unistd.h HAVE_GETOPT) +if(NOT HAVE_UNISTD_H OR NOT HAVE_GETOPT) target_include_directories( zone-bench PRIVATE $) target_sources(zone-bench PRIVATE compat/getopt.c) @@ -222,7 +224,7 @@ foreach(match ${matches}) set(prefix "${CMAKE_MATCH_1}") set(variable "${CMAKE_MATCH_2}") set(suffix "${CMAKE_MATCH_3}") - if(NOT DEFINED ${variable} OR + if(NOT DEFINED ${variable} OR NOT ${variable} OR ${variable} MATCHES "^[Ff][Aa][Ll][Ss][Ee]$" OR ${variable} MATCHES "^[Oo][Ff][Ff]") set(replace "${prefix}/* #undef ${variable} */${suffix}") diff --git a/src/attributes.h b/src/attributes.h index 533bcef9..5ca7e1b4 100644 --- a/src/attributes.h +++ b/src/attributes.h @@ -24,6 +24,12 @@ # define unlikely(params) (params) #else // _MSC_VER +#if defined __has_builtin +# define has_builtin(params) __has_builtin(params) +#else +# define has_builtin(params) (0) +#endif + # if (zone_has_attribute(always_inline) || zone_has_gnuc(3, 1)) && ! defined __NO_INLINE__ // Compilation using GCC 4.2.1 without optimizations fails. // sorry, unimplemented: inlining failed in call to ... @@ -52,7 +58,7 @@ # define no_sanitize_undefined __attribute__((no_sanitize("undefined"))) # elif zone_has_attribute(no_sanitize_undefined) // GCC 4.9.0 added the UndefinedBehaviorSanitizer (ubsan) and the - // no_sanitize_undefined function attribute. + // no_sanitize_undefined function attribute. # define no_sanitize_undefined # else # define no_sanitize_undefined diff --git a/src/bench.c b/src/bench.c index 5d3938d7..79e1525a 100644 --- a/src/bench.c +++ b/src/bench.c @@ -10,7 +10,7 @@ #include #include #include -#if _WIN32 +#if !defined(HAVE_GETOPT) # include "getopt.h" #else # include diff --git a/src/fallback/bits.h b/src/fallback/bits.h index 31857a5f..2bca65a4 100644 --- a/src/fallback/bits.h +++ b/src/fallback/bits.h @@ -29,15 +29,60 @@ static really_inline uint64_t leading_zeroes(uint64_t mask) else return 64; } + #else + static really_inline uint64_t trailing_zeroes(uint64_t mask) { +#if has_builtin(__builtin_ctzll) return (uint64_t)__builtin_ctzll(mask); +#else + // Code by Kim Walish from https://www.chessprogramming.org/BitScan. + // Distributed under CC BY-SA 3.0. + static const uint64_t magic = 0x03f79d71b4cb0a89ull; + const int magictable[64] = { + 0, 47, 1, 56, 48, 27, 2, 60, + 57, 49, 41, 37, 28, 16, 3, 61, + 54, 58, 35, 52, 50, 42, 21, 44, + 38, 32, 29, 23, 17, 11, 4, 62, + 46, 55, 26, 59, 40, 36, 15, 53, + 34, 51, 20, 43, 31, 22, 10, 45, + 25, 39, 14, 33, 19, 30, 9, 24, + 13, 18, 8, 12, 7, 6, 5, 63 + }; + + return magictable[((mask ^ (mask - 1)) * magic) >> 58]; +#endif } static really_inline uint64_t leading_zeroes(uint64_t mask) { +#if has_builtin(__builtin_clzll) return (uint64_t)__builtin_clzll(mask); +#else + // Code by Kim Walish from https://www.chessprogramming.org/BitScan. + // Distributed under CC BY-SA 3.0. + static const uint64_t magic = 0x03f79d71b4cb0a89ull; + const int magictable[64] = { + 63, 16, 62, 7, 15, 36, 61, 3, + 6, 14, 22, 26, 35, 47, 60, 2, + 9, 5, 28, 11, 13, 21, 42, 19, + 25, 31, 34, 40, 46, 52, 59, 1, + 17, 8, 37, 4, 23, 27, 48, 10, + 29, 12, 43, 20, 32, 41, 53, 18, + 38, 24, 49, 30, 44, 33, 54, 39, + 50, 45, 55, 51, 56, 57, 58, 0 + }; + + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + mask |= mask >> 32; + + return magictable[(mask * magic) >> 58]; +#endif } #endif // _MSC_VER #endif // BITS_H diff --git a/src/generic/base16.h b/src/generic/base16.h index 599d8f3b..3510944b 100644 --- a/src/generic/base16.h +++ b/src/generic/base16.h @@ -173,7 +173,13 @@ static really_inline int base16_stream_decode( // Duff's device again: switch (st.bytes) { +#if defined(__SUNPRO_C) +#pragma error_messages(off, E_STATEMENT_NOT_REACHED) +#endif for (;;) +#if defined(__SUNPRO_C) +#pragma error_messages(default, E_STATEMENT_NOT_REACHED) +#endif { case 0: base16_dec_loop_generic_32(&s, &slen, &o, &olen); diff --git a/src/generic/base64.h b/src/generic/base64.h index a04ba069..ac9ef29f 100644 --- a/src/generic/base64.h +++ b/src/generic/base64.h @@ -575,7 +575,13 @@ static really_inline int base64_stream_decode( // Duff's device again: switch (st.bytes) { +#if defined(__SUNPRO_C) +#pragma error_messages(off, E_STATEMENT_NOT_REACHED) +#endif for (;;) +#if defined(__SUNPRO_C) +#pragma error_messages(default, E_STATEMENT_NOT_REACHED) +#endif { case 0: dec_loop_generic_32(&s, &slen, &o, &olen); diff --git a/src/generic/endian.h b/src/generic/endian.h index 3e1b772c..62f19e23 100644 --- a/src/generic/endian.h +++ b/src/generic/endian.h @@ -86,17 +86,123 @@ #include #endif -#if !defined BYTE_ORDER -#error "missing definition of BYTE_ORDER" +#if !defined(LITTLE_ENDIAN) +# if defined(__ORDER_LITTLE_ENDIAN__) +# define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__ +# else +# define LITTLE_ENDIAN 1234 +# endif #endif -#if !defined LITTLE_ENDIAN -#error "missing definition of LITTLE_ENDIAN" +#if !defined(BIG_ENDIAN) +# if defined(__ORDER_BIG_ENDIAN__) +# define BIG_ENDIAN __ORDER_BIG_ENDIAN__ +# else +# define BIG_ENDIAN 4321 +# endif #endif -#if !defined BIG_ENDIAN -#error "missing definition of BIG_ENDIAN" +#if !defined(BYTE_ORDER) +# if defined(__BYTE_ORDER__) +# define BYTE_ORDER __BYTE_ORDER__ +# elif defined(__BYTE_ORDER) +# define BYTE_ORDER __BYTE_ORDER +# elif defined(i386) || defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__i686__) || \ + defined(__x86) || defined(__x86_64) || defined(__x86_64__) || \ + defined(__amd64) || defined(__amd64__) +# define BYTE_ORDER LITTLE_ENDIAN +# elif defined(sparc) || defined(__sparc) || defined(__sparc__) || \ + defined(POWERPC) || defined(mc68000) || defined(sel) +# define BYTE_ORDER BIG_ENDIAN +# else +# error "missing definition of BYTE_ORDER" +# endif #endif + +static really_inline uint16_t bswap16(uint16_t x) +{ + // Copied from src/common/lib/libc/gen/bswap16.c in NetBSD + // Written by Manuel Bouyer . + // Public domain. + return ((x << 8) & 0xff00) | ((x >> 8) & 0x00ff); +} + +static really_inline uint32_t bswap32(uint32_t x) +{ + // Copied from src/common/lib/libc/gen/bswap32.c in NetBSD + // Written by Manuel Bouyer . + // Public domain. + return ( (x << 24) & 0xff000000 ) | + ( (x << 8) & 0x00ff0000 ) | + ( (x >> 8) & 0x0000ff00 ) | + ( (x >> 24) & 0x000000ff ); +} + +static really_inline uint64_t bswap64(uint64_t x) +{ + // Copied from src/common/lib/libc/gen/bswap64.c in NetBSD + // Written by Manuel Bouyer . + // Public domain. + return ( (x << 56) & 0xff00000000000000ull ) | + ( (x << 40) & 0x00ff000000000000ull ) | + ( (x << 24) & 0x0000ff0000000000ull ) | + ( (x << 8) & 0x000000ff00000000ull ) | + ( (x >> 8) & 0x00000000ff000000ull ) | + ( (x >> 24) & 0x0000000000ff0000ull ) | + ( (x >> 40) & 0x000000000000ff00ull ) | + ( (x >> 56) & 0x00000000000000ffull ); +} + +# if BYTE_ORDER == LITTLE_ENDIAN +# define htobe(bits, x) bswap ## bits((x)) +# define htole(bits, x) (x) +# define betoh(bits, x) bswap ## bits((x)) +# define letoh(bits, x) (x) +# else +# define htobe(bits, x) (x) +# define htole(bits, x) bswap ## bits((x)) +# define betoh(bits, x) (x) +# define letoh(bits, x) bswap ## bits((x)) +# endif + +# if !defined htobe16 +# define htobe16(x) htobe(16,(x)) +# endif +# if !defined htobe32 +# define htobe32(x) htobe(32,(x)) +# endif +# if !defined htobe64 +# define htobe64(x) htobe(64,(x)) +# endif +# if !defined htole16 +# define htole16(x) htole(16,(x)) +# endif +# if !defined htole32 +# define htole32(x) htole(32,(x)) +# endif +# if !defined htole64 +# define htole64(x) htole(64,(x)) +# endif + +# if !defined be16toh +# define be16toh(x) betoh(16,(x)) +# endif +# if !defined be32toh +# define be32toh(x) betoh(32,(x)) +# endif +# if !defined be64toh +# define be64toh(x) betoh(64,(x)) +# endif +# if !defined le16toh +# define le16toh(x) letoh(16,(x)) +# endif +# if !defined le32toh +# define le32toh(x) letoh(32,(x)) +# endif +# if !defined le64toh +# define le64toh(x) letoh(64,(x)) +# endif #endif #endif // ENDIAN_H diff --git a/src/generic/types.h b/src/generic/types.h index aa1d6f6a..12170cbf 100644 --- a/src/generic/types.h +++ b/src/generic/types.h @@ -66,15 +66,8 @@ static really_inline int32_t parse_text( #define UNKNOWN_TYPE(code) \ { { { "", 0 }, code }, 0, false, false, { 0, NULL }, check_generic_rr, parse_unknown_rdata } -#if _WIN32 -// FIXME: check functions can be simplified as int32_t is wide enough to -// represent errors and the maximum length of rdata. -#include -typedef SSIZE_T ssize_t; -#endif - nonnull((1,2,3,4)) -static really_inline ssize_t check_bytes( +static really_inline int32_t check_bytes( parser_t *parser, const type_info_t *type, const rdata_info_t *field, @@ -85,7 +78,7 @@ static really_inline ssize_t check_bytes( (void)data; if (length < size) SYNTAX_ERROR(parser, "Missing %s in %s", NAME(field), NAME(type)); - return (ssize_t)size; + return (int32_t)size; } #define check_int8(...) check_bytes(__VA_ARGS__, sizeof(uint8_t)) @@ -101,7 +94,7 @@ static really_inline ssize_t check_bytes( #define check_ilnp64(...) check_bytes(__VA_ARGS__, sizeof(uint64_t)) nonnull((1,2,3,4)) -static really_inline ssize_t check_ttl( +static really_inline int32_t check_ttl( parser_t *parser, const type_info_t *type, const rdata_info_t *field, @@ -123,55 +116,55 @@ static really_inline ssize_t check_ttl( } zone_nonnull((1,2,3,4)) -static really_inline ssize_t check_name( +static really_inline int32_t check_name( parser_t *parser, const type_info_t *type, const rdata_info_t *field, const uint8_t *data, const size_t length) { - size_t label = 0, count = 0; - while (count < length) { + int32_t label = 0, count = 0; + while (count < (int32_t)length) { label = data[count]; count += 1 + label; if (!label) break; } - if (!count || count > length) + if (!count || count > (int32_t)length) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); - return (ssize_t)count; + return count; } zone_nonnull((1,2,3,4)) -static really_inline ssize_t check_string( +static really_inline int32_t check_string( parser_t *parser, const type_info_t *type, const rdata_info_t *field, const uint8_t *data, const size_t length) { - size_t count; + int32_t count; - if (!length || (count = 1 + (size_t)data[0]) > length) + if (!length || (count = 1 + (size_t)data[0]) > (int32_t)length) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); - return (ssize_t)count; + return count; } zone_nonnull((1,2,3,4)) -static really_inline ssize_t check_nsec( +static really_inline int32_t check_nsec( parser_t *parser, const type_info_t *type, const rdata_info_t *field, const uint8_t *data, const size_t length) { - size_t count = 0; + int32_t count = 0; int32_t last_window = -1; - while ((count + 2) < length) { + while ((count + 2) < (int32_t)length) { const int32_t window = (int32_t)data[0]; const size_t blocks = (size_t)data[1]; if (window <= last_window) @@ -184,17 +177,17 @@ static really_inline ssize_t check_nsec( last_window = window; } - if (count != length) + if (count != (int32_t)length) SYNTAX_ERROR(parser, "Invalid %s in %s", NAME(field), NAME(type)); - return (ssize_t)count; + return count; } zone_nonnull((1)) -static really_inline int32_t check(size_t *length, ssize_t count) +static really_inline int32_t check(size_t *length, int32_t count) { if (count < 0) - return (int32_t)count; + return count; *length += (size_t)count; return 0; } diff --git a/src/haswell/base32.h b/src/haswell/base32.h index 0e15ac27..d2d44bfc 100644 --- a/src/haswell/base32.h +++ b/src/haswell/base32.h @@ -10,8 +10,6 @@ #define BASE32_H #include -#include // update if we need to support Windows. - ////////////////////////// /// Source: Wojciech Muła, Daniel Lemire, Faster Base64 Encoding and Decoding Using AVX2 Instructions, @@ -44,7 +42,7 @@ static size_t base32hex_avx(uint8_t *dst, const uint8_t *src) { unsigned int m = (unsigned)_mm256_movemask_epi8(check); if (m) { - int length = __builtin_ctz(m); + int length = (int)trailing_zeroes(m); if (length == 0) { break; } diff --git a/src/haswell/bits.h b/src/haswell/bits.h index 6e4ef7de..a9e90c05 100644 --- a/src/haswell/bits.h +++ b/src/haswell/bits.h @@ -14,7 +14,12 @@ #include static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#if has_builtin(__builtin_uaddll_overflow) return __builtin_uaddll_overflow(value1, value2, (unsigned long long *)result); +#else + *result = value1 + value2; + return *result < value1; +#endif } static inline uint64_t count_ones(uint64_t bits) { @@ -23,7 +28,7 @@ static inline uint64_t count_ones(uint64_t bits) { no_sanitize_undefined static inline uint64_t trailing_zeroes(uint64_t bits) { - return (uint64_t)__builtin_ctzll(bits); + return (uint64_t)_tzcnt_u64(bits); } // result might be undefined when bits is zero @@ -32,15 +37,25 @@ static inline uint64_t clear_lowest_bit(uint64_t bits) { } static inline uint64_t leading_zeroes(uint64_t bits) { - return (uint64_t)__builtin_clzll(bits); + return (uint64_t)_lzcnt_u64(bits); } static inline uint64_t prefix_xor(const uint64_t bitmask) { + __m128i all_ones = _mm_set1_epi8('\xFF'); + __m128i mask = _mm_set_epi64x(0ULL, (long long)bitmask); +#if defined __SUNPRO_C + // Oracle Developer Studio has issues generating vpclmulqdq + // Oracle Solaris and Intel assembler use the opposite order for source and + // destination operands. See x86 Assemble Language Reference Manual. + __asm volatile ("vpclmulqdq $0,%[all_ones],%[mask],%[mask]" + : [mask] "+x" (mask) + : [all_ones] "x" (all_ones)); +#else // There should be no such thing with a processor supporting avx2 // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, (long long)bitmask), all_ones, 0); - return (uint64_t)_mm_cvtsi128_si64(result); + mask = _mm_clmulepi64_si128(mask, all_ones, 0); +#endif + return (uint64_t)_mm_cvtsi128_si64(mask); } #endif // BITS_H diff --git a/src/westmere/base32.h b/src/westmere/base32.h index 981bb577..e049382b 100644 --- a/src/westmere/base32.h +++ b/src/westmere/base32.h @@ -10,7 +10,7 @@ #define BASE32_H #include -#include // update if we need to support Windows. +#include ////////////////////////// @@ -37,7 +37,7 @@ static size_t base32hex_sse(uint8_t *dst, const uint8_t *src) { unsigned int m = (unsigned)_mm_movemask_epi8(check); if (m) { - int length = __builtin_ctz(m); + int length = (int)trailing_zeroes(m); if (length == 0) { break; } diff --git a/src/westmere/bits.h b/src/westmere/bits.h index d9570e98..a73f501f 100644 --- a/src/westmere/bits.h +++ b/src/westmere/bits.h @@ -13,7 +13,12 @@ #include static inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { +#if has_builtin(__builtin_uaddll_overflow) return __builtin_uaddll_overflow(value1, value2, (unsigned long long *)result); +#else + *result = value1 + value2; + return *result < value1; +#endif } static inline uint64_t count_ones(uint64_t input_num) { @@ -21,8 +26,16 @@ static inline uint64_t count_ones(uint64_t input_num) { } no_sanitize_undefined -static inline uint64_t trailing_zeroes(uint64_t input_num) { - return (uint64_t)__builtin_ctzll(input_num); +static inline uint64_t trailing_zeroes(uint64_t mask) { +#if has_builtin(__builtin_ctzll) + return (uint64_t)__builtin_ctzll(mask); +#else + uint64_t result; + asm("bsfq %[mask], %[result]" + : [result] "=r" (result) + : [mask] "mr" (mask)); + return result; +#endif } // result might be undefined when input_num is zero @@ -30,8 +43,16 @@ static inline uint64_t clear_lowest_bit(uint64_t input_num) { return input_num & (input_num-1); } -static inline uint64_t leading_zeroes(uint64_t input_num) { - return (uint64_t)__builtin_clzll(input_num); +static inline uint64_t leading_zeroes(uint64_t mask) { +#if has_builtin(__builtin_clzll) + return (uint64_t)__builtin_clzll(mask); +#else + uint64_t result; + asm("bsrq %[mask], %[result]" : + [result] "=r" (result) : + [mask] "mr" (mask)); + return 63 ^ (int)result; +#endif } static inline uint64_t prefix_xor(const uint64_t bitmask) { diff --git a/src/westmere/time.h b/src/westmere/time.h index 937e0948..6e183d19 100644 --- a/src/westmere/time.h +++ b/src/westmere/time.h @@ -59,8 +59,13 @@ static bool sse_parse_time(const char *date_string, uint32_t *time_in_second) { // or if months are in the range 12 to 19. __m128i abide_by_limits = _mm_subs_epu8(v, limit); // must be all zero +#if defined __SUNPRO_C + __m128i byteflip = _mm_setr_epi64((__m64){0x0607040502030001ULL}, + (__m64){0x0e0f0c0d0a0b0809ULL}); +#else __m128i byteflip = _mm_setr_epi64((__m64)0x0607040502030001ULL, (__m64)0x0e0f0c0d0a0b0809ULL); +#endif __m128i little_endian = _mm_shuffle_epi8(v, byteflip); __m128i limit16 = _mm_setr_epi16(0x0909, 0x0909, 0x0102, 0x0301, 0x0203, diff --git a/src/zone.c b/src/zone.c index f1051ae6..257d383a 100644 --- a/src/zone.c +++ b/src/zone.c @@ -27,6 +27,8 @@ typedef zone_file_t file_t; #if _MSC_VER # define strcasecmp(s1, s2) _stricmp(s1, s2) # define strncasecmp(s1, s2, n) _strnicmp(s1, s2, n) +#else +#include #endif static const char not_a_file[] = ""; diff --git a/tests/base32.c b/tests/base32.c index c657f6ff..86f78c6e 100644 --- a/tests/base32.c +++ b/tests/base32.c @@ -12,6 +12,7 @@ #include #include "zone.h" +#include "attributes.h" #include "generic/endian.h" static int32_t add_rr( diff --git a/tests/time.c b/tests/time.c index 7dab690b..0183babb 100644 --- a/tests/time.c +++ b/tests/time.c @@ -13,6 +13,7 @@ #include #include "zone.h" +#include "attributes.h" #include "generic/endian.h" static int32_t add_rr(