diff --git a/.github/workflows/linux_blas.yml b/.github/workflows/linux_blas.yml new file mode 100644 index 000000000000..db2480c67bbd --- /dev/null +++ b/.github/workflows/linux_blas.yml @@ -0,0 +1,282 @@ +name: BLAS tests (Linux) + +# This file is meant for testing different BLAS/LAPACK flavors and build +# options on Linux. All other yml files for Linux will only test without BLAS +# (mostly because that's easier and faster to build) or with the same 64-bit +# OpenBLAS build that is used in the wheel jobs. +# +# Jobs and their purpose: +# +# - openblas32_stable_nightly: +# Uses the 32-bit OpenBLAS builds, both the latest stable release +# and a nightly build. +# - openblas_no_pkgconfig_fedora: +# Test OpenBLAS on Fedora. Fedora doesn't ship .pc files for OpenBLAS, +# hence this exercises the "system dependency" detection method. +# - flexiblas_fedora: +# Tests FlexiBLAS (the default on Fedora for its own packages), via +# pkg-config. FlexiBLAS allows runtime switching of BLAS/LAPACK +# libraries, which is a useful capability (not tested in this job). +# - openblas_cmake: +# Tests whether OpenBLAS LP64 is detected correctly when only CMake +# and not pkg-config is installed. +# - netlib-debian: +# Installs libblas/liblapack, which in Debian contains libcblas within +# libblas. +# - netlib-split: +# Installs vanilla Netlib blas/lapack with separate libcblas, which is +# the last option tried in auto-detection. +# - mkl: +# Tests MKL installed from PyPI (because easiest/fastest, if broken) in +# 3 ways: both LP64 and ILP64 via pkg-config, and then using the +# Single Dynamic Library (SDL, or `libmkl_rt`). +# - blis: +# Simple test for LP64 via pkg-config +# - atlas: +# Simple test for LP64 via pkg-config + +on: + pull_request: + branches: + - main + - maintenance/** + push: + branches: + - ci-blas + +defaults: + run: + shell: bash + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + # openblas32_stable_nightly: (see linux_meson.yml for scipy-openblas32 test job) + # atlas: (see linux_meson.yml for ATLAS test job) + # accelerate: (see macos_meson.yml) + + openblas_no_pkgconfig_fedora: + #if: "github.repository == 'rgommers/scipy'" + if: false # WORKS + runs-on: ubuntu-latest + container: fedora:39 + name: "OpenBLAS (Fedora, no pkg-config)" # openblas.pc not shipped by Fedora + steps: + - name: Install system dependencies + run: | + dnf install git gcc-gfortran g++ python3-devel openblas-devel -y + + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies from PyPI + run: | + pip install -r requirements/build.txt + pip install build + pip install git+https://github.com/numpy/meson.git@main-numpymeson + + - name: Build + run: | + python3 -m build -wnx -Cbuild-dir=build + python3 -m pip install dist/scipy*.whl + + - name: Test + run: | + pip install -r requirements/test.txt + cd tools + pytest --pyargs scipy -n2 -m "not slow" + + flexiblas_fedora: + #if: "github.repository == 'rgommers/scipy'" + if: false # WORKS + runs-on: ubuntu-latest + container: fedora:39 + name: "FlexiBLAS Fedora" # flexiblas.pc is shipped by Fedora + steps: + - name: Install system dependencies + run: | + dnf install git gcc-gfortran g++ python3-devel flexiblas-devel -y + + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install dependencies from PyPI + run: | + pip install -r requirements/build.txt + pip install build + pip install git+https://github.com/numpy/meson.git@main-numpymeson + + - name: Build + run: | + python3 -m build -wnx -Cbuild-dir=build + python3 -m pip install dist/scipy*.whl + + - name: Test + run: | + pip install -r requirements/test.txt + cd tools + pytest --pyargs scipy -n2 -m "not slow" + + + netlib-debian: + #if: "github.repository == 'rgommers/scipy'" + if: false # WORKS + runs-on: ubuntu-latest + name: "Debian libblas/liblapack" + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install -r requirements/all.txt + pip install git+https://github.com/numpy/meson.git@main-numpymeson + sudo apt-get install liblapack-dev pkg-config + + - name: Build + run: | + python dev.py build + + - name: Test + run: | + python dev.py test -j2 + + + netlib-split: + #if: "github.repository == 'rgommers/scipy'" + if: false # WORKS + runs-on: ubuntu-latest + container: opensuse/tumbleweed + name: "OpenSUSE Netlib BLAS/LAPACK" + steps: + - name: Install system dependencies + run: | + # No blas.pc on OpenSUSE as of Nov 2023, so no need to install pkg-config. + # If it is needed in the future, use install name `pkgconf-pkg-config` + zypper install -y git gcc-c++ gcc-fortran python3-pip python311 python3-devel blas cblas lapack + + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + + - name: Install PyPI dependencies + run: | + pip install --break-system-packages -r requirements/build.txt + pip install --break-system-packages git+https://github.com/numpy/meson.git@main-numpymeson + + - name: Build + run: | + pip install . --break-system-packages --no-build-isolation -v -Csetup-args=-Dblas=blas -Csetup-args=-Dlapack=lapack + + - name: Test + run: | + pip install --break-system-packages pytest hypothesis pytest-xdist threadpoolctl pooch + cd tools + pytest --pyargs scipy -n2 -m "not slow" + + + mkl: + if: "github.repository == 'rgommers/scipy'" + runs-on: ubuntu-latest + name: "MKL (LP64 split, SDL)" + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install -r requirements/all.txt + pip install git+https://github.com/numpy/meson.git@main-numpymeson + pip install "mkl==2023.2.0" "mkl-devel==2023.2.0" + + - name: Repair MKL pkg-config files and symlinks + run: | + # MKL 2023.2 works when installed from conda-forge (except for `-iomp` + # and `-tbb` pkg-config files), Spack, or with the standalone Intel + # installer. The standalone installer is the worst option, since it's + # large and clumsy to install and requires running a setvars.sh script + # before things work. The PyPI MKL packages are broken and need the + # fixes in this step. For details, see + # https://github.com/conda-forge/intel_repack-feedstock/issues/34 + cd $Python3_ROOT_DIR/lib/pkgconfig + sed -i 's/\/intel64//g' mkl*.pc + # add the expected .so -> .so.2 symlinks to fix linking + cd .. + for i in $( ls libmkl*.so.2 ); do ln -s $i ${i%.*}; done + + - name: Build with defaults (LP64) + run: | + pkg-config --libs mkl-dynamic-lp64-seq # check link flags + python dev.py build + + - name: Test + run: python dev.py test + + - name: Build without pkg-config (default options, SDL) + run: | + git clean -xdf > /dev/null + pushd $Python3_ROOT_DIR/lib/pkgconfig + rm mkl*.pc + popd + export MKLROOT=$Python3_ROOT_DIR + python dev.py build + + - name: Test + run: python dev.py test + + + blis: + #if: "github.repository == 'rgommers/scipy'" + if: false # WORKS + runs-on: ubuntu-latest + name: "BLIS" + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install -r requirements/all.txt + pip install git+https://github.com/numpy/meson.git@main-numpymeson + sudo apt-get install libblis-dev libopenblas-dev pkg-config + + - name: Add BLIS pkg-config file + run: | + # Needed because blis.pc missing in Debian: + # https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=989076 + # The alternative here would be to use another distro or Miniforge + sudo cp ci/_blis_debian.pc /usr/lib/x86_64-linux-gnu/pkgconfig/blis.pc + # Check if the patch works: + pkg-config --libs blis + pkg-config --cflags blis + + - name: Build + run: python dev.py build -C-Dblas=blis + + - name: Test + run: python dev.py test diff --git a/.github/workflows/linux_mkl_ilp64.yml b/.github/workflows/linux_mkl_ilp64.yml new file mode 100644 index 000000000000..bf35aaaf1e6e --- /dev/null +++ b/.github/workflows/linux_mkl_ilp64.yml @@ -0,0 +1,108 @@ +name: BLAS tests (Linux) + +# This file is meant for testing different BLAS/LAPACK flavors and build +# options on Linux. All other yml files for Linux will only test without BLAS +# (mostly because that's easier and faster to build) or with the same 64-bit +# OpenBLAS build that is used in the wheel jobs. +# +# Jobs and their purpose: +# +# - mkl: +# Tests MKL installed from PyPI (because easiest/fastest, if broken) in +# 3 ways: both LP64 and ILP64 via pkg-config, and then using the +# Single Dynamic Library (SDL, or `libmkl_rt`). + + +on: + pull_request: + branches: + - main + - maintenance/** + +defaults: + run: + shell: bash + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + + mkl-lp64: + if: "github.repository == 'ev-br/scipy'" + runs-on: ubuntu-latest + name: "MKL (LP64 split, SDL)" + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y gfortran + pip install cython numpy pybind11 pythran pytest hypothesis pytest-xdist pooch + pip install -r requirements/dev.txt + pip install git+https://github.com/numpy/meson.git@main-numpymeson + pip install mkl mkl-devel + + - name: Build with defaults (LP64) + run: | + pkg-config --libs mkl-dynamic-lp64-seq # check link flags + python dev.py build + + - name: Test + run: python dev.py test + + - name: Build without pkg-config (default options, SDL) + run: | + git clean -xdf > /dev/null + pushd $Python3_ROOT_DIR/lib/pkgconfig + rm mkl*.pc + popd + export MKLROOT=$Python3_ROOT_DIR + python dev.py build + + - name: Test + run: python dev.py test -j 2 + + + mkl-ilp64: + if: "github.repository == 'ev-br/scipy'" + runs-on: ubuntu-latest + name: "MKL ILP64" + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y gfortran + pip install cython numpy pybind11 pythran pytest hypothesis pytest-xdist pooch + pip install -r requirements/dev.txt + pip install git+https://github.com/numpy/meson.git@main-numpymeson + pip install mkl mkl-devel + + - name: Build with ILP64 + run: | + pkg-config --libs mkl-dynamic-ilp64-seq # check link flags + python dev.py build -C-Dblas-order=mkl -C-Duse-ilp64=true + + - name: Test + run: python dev.py test -j 2 + + diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index cc2f83189737..fdfce1cf8935 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -4,6 +4,7 @@ on: push: branches: - maintenance/** + - ci-blas pull_request: branches: - main @@ -217,3 +218,38 @@ jobs: pip install pooch pytest hypothesis python dev.py -n test + + accelerate: + name: Accelerate (LP64) + if: "github.repository == 'rgommers/scipy'" + runs-on: macos-13 + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.10' + + - uses: maxim-lobanov/setup-xcode@60606e260d2fc5762a71e64e74b2174e8ea3c8bd # v1.6.0 + with: + xcode-version: '14.3' + + - name: Install gfortran with Homebrew + run: | + brew install gfortran + brew list + gfortran --version + + - name: Install dependencies + run: | + pip install -r requirements/all.txt + pip install git+https://github.com/numpy/meson.git@main-numpymeson + + - name: Build against Accelerate (LP64) + run: python dev.py build + + - name: Test + run: python dev.py test -j2 diff --git a/ci/_blis_debian.pc b/ci/_blis_debian.pc new file mode 100644 index 000000000000..a4a5c7fbb244 --- /dev/null +++ b/ci/_blis_debian.pc @@ -0,0 +1,8 @@ +libdir=/usr/lib/x86_64-linux-gnu/blis-pthread/ +includedir=/usr/include/x86_64-linux-gnu/blis-pthread + +Name: BLIS +Description: BLAS-like Library Instantiation Software Framework - specific to SciPy CI job, needed until Debian ships blis.pc (see .github/workflows/linux_blas.yml) +Version: 0.9.0-scipy-ci +Libs: -L${libdir} -lblis +Cflags: -I${includedir} diff --git a/meson.build b/meson.build index 371d983a59d1..cdbcc0a5f9ed 100644 --- a/meson.build +++ b/meson.build @@ -9,8 +9,6 @@ project( 'b_ndebug=if-release', 'c_std=c17', 'cpp_std=c++17', - 'blas=openblas', - 'lapack=openblas' ], ) diff --git a/meson.options b/meson.options index 3257cb8a8ff5..096d0aa0f6d8 100644 --- a/meson.options +++ b/meson.options @@ -1,7 +1,17 @@ -option('blas', type: 'string', value: 'openblas', - description: 'option for BLAS library switching') -option('lapack', type: 'string', value: 'openblas', - description: 'option for LAPACK library switching') +option('blas', type: 'string', value: 'auto', + description: 'Option for BLAS library selection. By default, try to find any in the order given by `blas-order`') +option('lapack', type: 'string', value: 'auto', + description: 'Option for LAPACK library selection. By default, try to find any in the order given by `lapack-order`') +option('blas-order', type: 'array', value: ['auto'], + description: 'Order of BLAS libraries to search for. E.g.: mkl,openblas,blis,blas') +option('lapack-order', type: 'array', value: ['auto'], + description: 'Order of LAPACK libraries to search for. E.g.: mkl,openblas,lapack') +option('use-ilp64', type: 'boolean', value: false, + description: 'Use ILP64 (64-bit integer) BLAS and LAPACK interfaces') +option('blas-symbol-suffix', type: 'string', value: 'auto', + description: 'BLAS and LAPACK symbol suffix to use, if any') +option('mkl-threading', type: 'string', value: 'auto', + description: 'MKL threading method, one of: `seq`, `iomp`, `gomp`, `tbb`') option('use-g77-abi', type: 'boolean', value: false, description: 'If set to true, forces using g77 compatibility wrappers ' + 'for LAPACK functions. The default is to use gfortran ' + diff --git a/scipy/_build_utils/generate_blas_symbolsuffix_shims.py b/scipy/_build_utils/generate_blas_symbolsuffix_shims.py new file mode 100644 index 000000000000..92d288f6e0e9 --- /dev/null +++ b/scipy/_build_utils/generate_blas_symbolsuffix_shims.py @@ -0,0 +1,200 @@ +import re +import os + +import numpy as np + +__all__ = ['blas_ilp64_pre_build_hook', 'generic_pre_build_hook', + 'write_file_content'] + + +# See _fflag_lp64/ilp64 in scipy/meson.build for these +# def get_fcompiler_ilp64_flags(): +# def get_fcompiler_macro_include_flags(path): + +# Done in scipy/meson.build +# def get_f2py_int64_options(): + + +def _blas_ilp64_pre_build_hook(outdir, prefix='', suffix=''): + if suffix: + if not suffix.endswith('_'): + # Symbol suffix has to end with '_' to be Fortran-compatible + raise RuntimeError("BLAS/LAPACK has incompatible symbol suffix: " + "{!r}".format(suffix)) + + suffix = suffix[:-1] + + # When symbol prefix/suffix is present, we have to patch sources + + # Create name-mapping include files + include_name_f = 'blas64-prefix-defines.inc' + include_name_c = 'blas64-prefix-defines.h' + include_fn_f = os.path.join(outdir, include_name_f) + include_fn_c = os.path.join(outdir, include_name_c) + + text = "" + for symbol in get_blas_lapack_symbols(): + text += f'#define {symbol} {prefix}{symbol}_{suffix}\n' + text += f'#define {symbol.upper()} {prefix}{symbol}_{suffix}\n' + + # Code generation may give source codes with mixed-case names + for j in (1, 2): + s = symbol[:j].lower() + symbol[j:].upper() + text += f'#define {s} {prefix}{symbol}_{suffix}\n' + s = symbol[:j].upper() + symbol[j:].lower() + text += f'#define {s} {prefix}{symbol}_{suffix}\n' + + write_file_content(include_fn_f, text) + + ctext = re.sub(r'^#define (.*) (.*)$', r'#define \1_ \2_', text, flags=re.M) + write_file_content(include_fn_c, text + "\n" + ctext) + + # Patch sources to include it + def patch_source(filename, old_text): + text = f'#include "{include_name_f}"\n' + text += old_text + return text + + # TODO: update source file names + #return generic_pre_build_hook(patch_source_func=patch_source, + # source_fnpart="_blas64") + + +def generic_pre_build_hook(cmd, ext, fcompiler_flags, patch_source_func=None, + source_fnpart=None): + """ + Pre-build hook for adding compiler flags and patching sources. + + Parameters + ---------- + cmd : distutils.core.Command + Hook input. Current distutils command (build_clib or build_ext). + ext : dict or numpy.distutils.extension.Extension + Hook input. Configuration information for library (dict, build_clib) + or extension (numpy.distutils.extension.Extension, build_ext). + fcompiler_flags : dict + Dictionary of ``{'compiler_name': ['-flag1', ...]}`` containing + compiler flags to set. + patch_source_func : callable, optional + Function patching sources, see `_generic_patch_sources` below. + source_fnpart : str, optional + String to append to the modified file basename before extension. + + """ + # Mangle sources + if patch_source_func is not None: + build_info.setdefault('depends', []).extend(build_info['sources']) + new_sources = _generic_patch_sources(build_info['sources'], patch_source_func, + source_fnpart) + build_info['sources'][:] = new_sources + + + +def _generic_patch_sources(filenames, patch_source_func, source_fnpart, root_dir=None): + """ + Patch Fortran sources, creating new source files. + + Parameters + ---------- + filenames : list + List of Fortran source files to patch. + Files not ending in ``.f`` or ``.f90`` are left unaltered. + patch_source_func : callable(filename, old_contents) -> new_contents + Function to apply to file contents, returning new file contents + as a string. + source_fnpart : str + String to append to the modified file basename before extension. + root_dir : str, optional + Source root directory. Default: cwd + + Returns + ------- + new_filenames : list + List of names of the newly created patched sources. + + """ + new_filenames = [] + + if root_dir is None: + root_dir = os.getcwd() + + root_dir = os.path.abspath(root_dir) + src_dir = os.path.join(root_dir, _get_build_src_dir()) + + for src in filenames: + base, ext = os.path.splitext(os.path.basename(src)) + + if ext not in ('.f', '.f90'): + new_filenames.append(src) + continue + + with open(src) as fsrc: + text = patch_source_func(src, fsrc.read()) + + # Generate useful target directory name under src_dir + src_path = os.path.abspath(os.path.dirname(src)) + + for basedir in [src_dir, root_dir]: + if os.path.commonpath([src_path, basedir]) == basedir: + rel_path = os.path.relpath(src_path, basedir) + break + else: + raise ValueError(f"{src!r} not under {root_dir!r}") + + dst = os.path.join(src_dir, rel_path, base + source_fnpart + ext) + write_file_content(dst, text) + + new_filenames.append(dst) + + return new_filenames + + +def write_file_content(filename, content): + """ + Write content to file, but only if it differs from the current one. + """ + if os.path.isfile(filename): + with open(filename) as f: + old_content = f.read() + + if old_content == content: + return + + dirname = os.path.dirname(filename) + if not os.path.isdir(dirname): + os.makedirs(dirname) + + with open(filename, 'w') as f: + f.write(content) + + +def get_blas_lapack_symbols(): + cached = getattr(get_blas_lapack_symbols, 'cached', None) + if cached is not None: + return cached + + # Obtain symbol list from Cython Blas/Lapack interface + srcdir = os.path.join(os.path.dirname(__file__), os.pardir, 'linalg') + + symbols = [] + + # Get symbols from the generated files + for fn in ['cython_blas_signatures.txt', 'cython_lapack_signatures.txt']: + with open(os.path.join(srcdir, fn)) as f: + for line in f: + m = re.match(r"^\s*[a-z]+\s+([a-z0-9]+)\(", line) + if m: + symbols.append(m.group(1)) + + # Get the rest from the generator script + # (we cannot import it directly here, so use exec) + sig_fn = os.path.join(srcdir, '_cython_signature_generator.py') + with open(sig_fn) as f: + code = f.read() + ns = {'__name__': ''} + exec(code, ns) + symbols.extend(ns['blas_exclusions']) + symbols.extend(ns['lapack_exclusions']) + + get_blas_lapack_symbols.cached = tuple(sorted(set(symbols))) + return get_blas_lapack_symbols.cached diff --git a/scipy/_build_utils/int64.f2cmap.in b/scipy/_build_utils/int64.f2cmap.in new file mode 100644 index 000000000000..86ffa326b6e6 --- /dev/null +++ b/scipy/_build_utils/int64.f2cmap.in @@ -0,0 +1 @@ +{'integer': {'': '@int64_name@'}, 'logical': {'': '@int64_name@'}} diff --git a/scipy/_build_utils/meson.build b/scipy/_build_utils/meson.build index c9de2910900a..b73d50969008 100644 --- a/scipy/_build_utils/meson.build +++ b/scipy/_build_utils/meson.build @@ -11,7 +11,6 @@ if generate_blas_wrappers blas_lapack_wrapper_lib = static_library('blas_lapack_wrapper_lib', blas_lapack_wrappers, - c_args: numpy_nodepr_api, dependencies: [blas_dep, lapack_dep, np_dep, py3_dep], include_directories: ['../linalg/', './src'], gnu_symbol_visibility: 'hidden', diff --git a/scipy/_build_utils/src/npy_cblas.h b/scipy/_build_utils/src/npy_cblas.h index de65ad903284..d68af2846bab 100644 --- a/scipy/_build_utils/src/npy_cblas.h +++ b/scipy/_build_utils/src/npy_cblas.h @@ -26,6 +26,21 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; #define CBLAS_INDEX size_t /* this may vary between platforms */ +#ifdef ACCELERATE_NEW_LAPACK + #if __MAC_OS_X_VERSION_MAX_ALLOWED < 130300 + #ifdef HAVE_BLAS_ILP64 + #error "Accelerate ILP64 support is only available with macOS 13.3 SDK or later" + #endif + #else + #define NO_APPEND_FORTRAN + #ifdef HAVE_BLAS_ILP64 + #define BLAS_SYMBOL_SUFFIX $NEWLAPACK$ILP64 + #else + #define BLAS_SYMBOL_SUFFIX $NEWLAPACK + #endif + #endif +#endif + #ifdef NO_APPEND_FORTRAN #define BLAS_FORTRAN_SUFFIX #else @@ -50,7 +65,6 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; #define BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2) prefix ## name ## suffix ## suffix2 #define BLAS_FUNC_EXPAND(name,prefix,suffix,suffix2) BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2) -#define CBLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,,BLAS_SYMBOL_SUFFIX) /* * Use either the OpenBLAS scheme with the `64_` suffix behind the Fortran * compiler symbol mangling, or the MKL scheme (and upcoming @@ -62,6 +76,12 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; #define BLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,BLAS_SYMBOL_SUFFIX,BLAS_FORTRAN_SUFFIX) #endif +/* + * Note that CBLAS doesn't include Fortran compiler symbol mangling, so ends up + * being the same in both schemes + */ +#define CBLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,,BLAS_SYMBOL_SUFFIX) + #ifdef HAVE_BLAS_ILP64 #define CBLAS_INT npy_int64 #define CBLAS_INT_MAX NPY_MAX_INT64 diff --git a/scipy/integrate/_ode.py b/scipy/integrate/_ode.py index 1c1fff502c48..ed81ad16b27b 100644 --- a/scipy/integrate/_ode.py +++ b/scipy/integrate/_ode.py @@ -95,7 +95,6 @@ class complex_ode _vode_int_dtype = _vode.types.intvar.dtype _lsoda_int_dtype = _lsoda.types.intvar.dtype - # lsoda, vode and zvode are not thread-safe. VODE_LOCK protects both vode and # zvode; they share the `def run` implementation LSODA_LOCK = threading.Lock() diff --git a/scipy/integrate/lsoda.pyf b/scipy/integrate/lsoda.pyf index d09e32a59184..20cc688320f5 100644 --- a/scipy/integrate/lsoda.pyf +++ b/scipy/integrate/lsoda.pyf @@ -23,11 +23,20 @@ python module lsoda__user__routines end python module lsoda__user__routines python module _lsoda + usercode ''' + +#ifdef HAVE_BLAS_ILP64 +typedef npy_int64 F_INT; +#else +typedef int F_INT; +#endif +''' + interface subroutine lsoda(f,neq,y,t,tout,itol,rtol,atol,itask,istate,iopt,rwork,lrw,iwork,liw,jac,jt) ! y1,t,istate = lsoda(f,jac,y0,t0,t1,rtol,atol,itask,istate,rwork,iwork,mf) callstatement (*f2py_func)(cb_f_in_lsoda__user__routines,&neq,y,&t,&tout,&itol,rtol,atol,&itask,&istate,&iopt,rwork,&lrw,iwork,&liw,cb_jac_in_lsoda__user__routines,&jt) - callprotoargument void*,int*,double*,double*,double*,int*,double*,double*,int*,int*,int*,double*,int*,int*,int*,void*,int* + callprotoargument void*,F_INT*,double*,double*,double*,F_INT*,double*,double*,F_INT*,F_INT*,F_INT*,double*,F_INT*,F_INT*,F_INT*,void*,F_INT* use lsoda__user__routines external f external jac diff --git a/scipy/integrate/meson.build b/scipy/integrate/meson.build index 206d504f44b0..d35a6de55cae 100644 --- a/scipy/integrate/meson.build +++ b/scipy/integrate/meson.build @@ -35,66 +35,101 @@ quadpack_test_src = [ 'tests/_test_multivariate.c' ] + +if use_ilp64 + _fflag_intsize = _fflag_ilp64 +else + _fflag_intsize = _fflag_lp64 +endif + + mach_lib = static_library('mach_lib', mach_src, - fortran_args: fortran_ignore_warnings, gnu_symbol_visibility: 'hidden', + fortran_args: [fortran_ignore_warnings, _fflag_intsize], ) + lsoda_lib = static_library('lsoda_lib', lsoda_src, - fortran_args: fortran_ignore_warnings, + fortran_args: [fortran_ignore_warnings, _fflag_intsize], override_options: ['b_lto=false'], gnu_symbol_visibility: 'hidden', ) vode_lib = static_library('vode_lib', vode_src, - fortran_args: fortran_ignore_warnings, + fortran_args: [fortran_ignore_warnings, _fflag_intsize], override_options: ['b_lto=false'], gnu_symbol_visibility: 'hidden', ) dop_lib = static_library('dop_lib', dop_src, - fortran_args: fortran_ignore_warnings, + fortran_args: [fortran_ignore_warnings, _fflag_intsize], gnu_symbol_visibility: 'hidden', ) py3.extension_module('_quadpack', ['__quadpack.h', '__quadpack.c'], - dependencies: [np_dep, ccallback_dep], + dependencies: [np_dep, ccallback_dep, lapack_ilp64], install: true, subdir: 'scipy/integrate' ) + +_c_args_lsoda = [] +if use_ilp64 + _c_args_lsoda += ['-DHAVE_BLAS_ILP64'] +endif + py3.extension_module('_odepack', '_odepackmodule.c', - fortran_args: fortran_ignore_warnings, + fortran_args: [fortran_ignore_warnings, _fflag_intsize], + c_args: _c_args_lsoda, link_with: [lsoda_lib, mach_lib], link_args: version_link_args, - dependencies: [lapack_dep, np_dep], + dependencies: [lapack_ilp64, np_dep], install: true, link_language: 'fortran', subdir: 'scipy/integrate' ) + +if use_ilp64 + # generator only accepts strings, not files + f2c_map_file = f2py_ilp64_opts[1] + extra_arg = f2py_ilp64_opts[0] + '=' + fs.parent(f2c_map_file) / fs.name(f2c_map_file) + _vodemodule_obj = f2py_gen.process('vode.pyf', extra_args: extra_arg) + _lsodamodule_obj = f2py_gen.process('lsoda.pyf', extra_args: extra_arg) + _dopmodule_obj = f2py_gen.process('dop.pyf', extra_args: extra_arg) + _testodeintmodule_obj = f2py_gen.process('tests/test_odeint_banded.pyf', extra_args: extra_arg) +else + _vodemodule_obj = f2py_gen.process('vode.pyf') + _lsodamodule_obj = f2py_gen.process('lsoda.pyf') + _dopmodule_obj = f2py_gen.process('dop.pyf') + _testodeintmodule_obj = f2py_gen.process('tests/test_odeint_banded.pyf') +endif + + py3.extension_module('_vode', - f2py_gen.process('vode.pyf'), + _vodemodule_obj, link_with: [vode_lib], - c_args: [Wno_unused_variable], + fortran_args: [_fflag_intsize], + c_args: [Wno_unused_variable] + _c_args_lsoda, link_args: version_link_args, - dependencies: [lapack_dep, blas_dep, fortranobject_dep], + dependencies: [lapack_ilp64, blas_ilp64, fortranobject_dep], install: true, link_language: 'fortran', subdir: 'scipy/integrate' ) py3.extension_module('_lsoda', - f2py_gen.process('lsoda.pyf'), + _lsodamodule_obj, link_with: [lsoda_lib, mach_lib], - c_args: [Wno_unused_variable], - dependencies: [lapack_dep, fortranobject_dep], + fortran_args: [_fflag_intsize], + c_args: [Wno_unused_variable] + _c_args_lsoda, + dependencies: [lapack_ilp64, fortranobject_dep], link_args: version_link_args, install: true, link_language: 'fortran', @@ -102,9 +137,10 @@ py3.extension_module('_lsoda', ) py3.extension_module('_dop', - f2py_gen.process('dop.pyf'), + _dopmodule_obj, link_with: [dop_lib], - c_args: [Wno_unused_variable], + fortran_args: [_fflag_intsize], + c_args: [Wno_unused_variable] + _c_args_lsoda, dependencies: [fortranobject_dep], link_args: version_link_args, install: true, @@ -121,11 +157,12 @@ py3.extension_module('_test_multivariate', ) py3.extension_module('_test_odeint_banded', - ['tests/banded5x5.f', f2py_gen.process('tests/test_odeint_banded.pyf')], + ['tests/banded5x5.f', _testodeintmodule_obj], link_with: [lsoda_lib, mach_lib], - fortran_args: _fflag_Wno_unused_dummy_argument, + fortran_args: [_fflag_intsize, _fflag_Wno_unused_dummy_argument], + c_args: _c_args_lsoda, link_args: version_link_args, - dependencies: [lapack_dep, fortranobject_dep], + dependencies: [lapack_ilp64, fortranobject_dep], install: true, link_language: 'fortran', subdir: 'scipy/integrate', diff --git a/scipy/integrate/vode.pyf b/scipy/integrate/vode.pyf index 90774653486f..94181009d268 100644 --- a/scipy/integrate/vode.pyf +++ b/scipy/integrate/vode.pyf @@ -50,11 +50,18 @@ python module zvode__user__routines end python module zvode__user__routines python module _vode + usercode ''' +#ifdef HAVE_BLAS_ILP64 +typedef npy_int64 F_INT; +#else +typedef int F_INT; +#endif +''' interface subroutine dvode(f,jac,neq,y,t,tout,itol,rtol,atol,itask,istate,iopt,rwork,lrw,iwork,liw,mf,rpar,ipar) ! y1,t,istate = dvode(f,jac,y0,t0,t1,rtol,atol,itask,istate,rwork,iwork,mf) callstatement (*f2py_func)(cb_f_in_dvode__user__routines,&neq,y,&t,&tout,&itol,rtol,atol,&itask,&istate,&iopt,rwork,&lrw,iwork,&liw,cb_jac_in_dvode__user__routines,&mf,&rpar,&ipar) - callprotoargument void*,int*,double*,double*,double*,int*,double*,double*,int*,int*,int*,double*,int*,int*,int*,void*,int*,double*,int* + callprotoargument void*,F_INT*,double*,double*,double*,F_INT*,double*,double*,F_INT*,F_INT*,F_INT*,double*,F_INT*,F_INT*,F_INT*,void*,F_INT*,double*,F_INT* use dvode__user__routines external f external jac @@ -85,7 +92,7 @@ python module _vode subroutine zvode(f,jac,neq,y,t,tout,itol,rtol,atol,itask,istate,iopt,zwork,lzw,rwork,lrw,iwork,liw,mf,rpar,ipar) ! y1,t,istate = zvode(f,jac,y0,t0,t1,rtol,atol,itask,istate,rwork,iwork,mf) callstatement (*f2py_func)(cb_f_in_zvode__user__routines,&neq,y,&t,&tout,&itol,rtol,atol,&itask,&istate,&iopt,zwork,&lzw,rwork,&lrw,iwork,&liw,cb_jac_in_zvode__user__routines,&mf,&rpar,&ipar) - callprotoargument void*,int*,complex_double*,double*,double*,int*,double*,double*,int*,int*,int*,complex_double*,int*,double*,int*,int*,int*,void*,int*,double*,int* + callprotoargument void*,F_INT*,complex_double*,double*,double*,F_INT*,double*,double*,F_INT*,F_INT*,F_INT*,complex_double*,F_INT*,double*,F_INT*,F_INT*,F_INT*,void*,F_INT*,double*,F_INT* use zvode__user__routines external f external jac diff --git a/scipy/interpolate/meson.build b/scipy/interpolate/meson.build index 72e2d20fd1dd..a62c4480382e 100644 --- a/scipy/interpolate/meson.build +++ b/scipy/interpolate/meson.build @@ -87,10 +87,15 @@ fitpack_src = [ 'fitpack/pardtc.f' ] -# TODO: Add flags for 64 bit ints +if use_ilp64 + _fflag_intsize = _fflag_ilp64 +else + _fflag_intsize = _fflag_lp64 +endif + fitpack_lib = static_library('fitpack_lib', fitpack_src, - fortran_args: _fflag_Wno_maybe_uninitialized, + fortran_args: [_fflag_Wno_maybe_uninitialized, _fflag_intsize], override_options: ['b_lto=false'], gnu_symbol_visibility: 'hidden', ) @@ -123,7 +128,7 @@ py3.extension_module('_rgi_cython', __fitpack_lib = static_library('__fitpack', ['src/__fitpack.h', 'src/__fitpack.cc'], - dependencies:[lapack, np_dep, py3_dep], + dependencies:[lapack_ilp64, np_dep, py3_dep], ) __fitpack_dep = declare_dependency( @@ -134,7 +139,7 @@ py3.extension_module('_bspl', cython_gen_cpp.process('_bspl.pyx'), cpp_args: cython_cpp_args, include_directories: 'src/', - dependencies: [lapack, np_dep, __fitpack_dep], + dependencies: [np_dep, __fitpack_dep], link_args: version_link_args, install: true, subdir: 'scipy/interpolate' @@ -150,9 +155,18 @@ py3.extension_module('_dierckx', subdir: 'scipy/interpolate' ) -# TODO: Add flags for 64 bit ints -py3.extension_module('_fitpack', +# Build _fitpack and dfitpack extensions: both are FITPACK wrappers. +# XXX: some functions from dfitpack use the F_INT macro defined in dfitpack.pyf, while +# others rely on the build flags only. Consider cleaning this up at some point. + +_c_args_fitpack = [] +if use_ilp64 + _c_args_fitpack += ['-DHAVE_BLAS_ILP64'] +endif + +_fitpack = py3.extension_module('_fitpack', ['src/_fitpackmodule.c'], + c_args: _c_args_fitpack, link_with: [fitpack_lib], include_directories: 'src/', dependencies: np_dep, @@ -162,12 +176,20 @@ py3.extension_module('_fitpack', subdir: 'scipy/interpolate' ) -# TODO: Add flags for 64 bit ints +if use_ilp64 + # generator only accepts strings, not files + f2c_map_file = f2py_ilp64_opts[1] + extra_arg = f2py_ilp64_opts[0] + '=' + fs.parent(f2c_map_file) / fs.name(f2c_map_file) + _dfitpackmodule_obj = f2py_gen.process('src/dfitpack.pyf', extra_args: extra_arg) +else + _dfitpackmodule_obj = f2py_gen.process('src/dfitpack.pyf') +endif + py3.extension_module('_dfitpack', - f2py_gen.process('src/dfitpack.pyf'), - c_args: [Wno_unused_variable], + _dfitpackmodule_obj, + c_args: [Wno_unused_variable] + _c_args_fitpack, link_args: version_link_args, - dependencies: [lapack_dep, fortranobject_dep], + dependencies: [fortranobject_dep], link_with: [fitpack_lib], override_options: ['b_lto=false'], install: true, @@ -175,6 +197,7 @@ py3.extension_module('_dfitpack', subdir: 'scipy/interpolate' ) + if use_pythran py3.extension_module('_rbfinterp_pythran', pythran_gen.process('_rbfinterp_pythran.py'), diff --git a/scipy/interpolate/src/_fitpackmodule.c b/scipy/interpolate/src/_fitpackmodule.c index e569fe93034f..59ebf4600e85 100644 --- a/scipy/interpolate/src/_fitpackmodule.c +++ b/scipy/interpolate/src/_fitpackmodule.c @@ -5,7 +5,7 @@ static PyObject *fitpack_error; -#ifdef HAVE_ILP64 +#ifdef HAVE_BLAS_ILP64 #define F_INT npy_int64 #define F_INT_NPY NPY_INT64 diff --git a/scipy/interpolate/src/dfitpack.pyf b/scipy/interpolate/src/dfitpack.pyf index 829e1cdbbe7c..35b5d0846a73 100644 --- a/scipy/interpolate/src/dfitpack.pyf +++ b/scipy/interpolate/src/dfitpack.pyf @@ -13,7 +13,7 @@ python module _dfitpack ! in usercode ''' -#ifdef HAVE_ILP64 +#ifdef HAVE_BLAS_ILP64 typedef npy_int64 F_INT; #else typedef int F_INT; diff --git a/scipy/linalg/meson.build b/scipy/linalg/meson.build index 95925a791cbc..c5abda2be2ae 100644 --- a/scipy/linalg/meson.build +++ b/scipy/linalg/meson.build @@ -50,7 +50,7 @@ linalg_cython_gen = generator(cython, # fblas fblas_module = custom_target('fblas_module', - output: ['_fblasmodule.c'], + output: ['_fblasmodule.c', '_fblas-f2pywrappers.f'], input: 'fblas.pyf.src', command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'] + f2py_freethreading_arg, depend_files: @@ -66,9 +66,12 @@ fblas_module = custom_target('fblas_module', # LAPACK - we have historically put these in `_fblas`. py3.extension_module('_fblas', fblas_module, + fortran_args: _fflag_lp64, link_args: version_link_args, dependencies: [lapack_dep, blas_dep, fortranobject_dep], + link_with: [g77_abi_wrappers], install: true, + link_language: 'fortran', subdir: 'scipy/linalg' ) @@ -95,14 +98,75 @@ flapack_module = custom_target('flapack_module', py3.extension_module('_flapack', flapack_module, c_args: [Wno_empty_body], + fortran_args: _fflag_lp64, link_args: version_link_args, dependencies: [lapack_dep, blas_dep, fortranobject_dep], install: true, subdir: 'scipy/linalg' ) +# Add _fblas_64 and _flapack_64 if we're building with ILP64 support +# +# NOTE: what happened in the setup.py build was that we were linking LP64 +# libopenblas.so to `_fblas` and ILP64 `libopenblas64_.so` to `_fblas_64` +# and used both at the same time. We never shipped wheels that way, it only +# worked in a CI job. We are re-exporting the LP64 symbols in +# `cython_blas`/`cython_lapack`, so we can't use only ILP64 even if we support +# it in all SciPy code. +# TODO: right now we're only detecting one BLAS library (like NumPy does), but +# we need two blas and two lapack dependency objects here. +# The ILP64 CI job in the 1.10.x branch downloads two OpenBLAS tarballs +# and then uses both in the build (search for `Download-OpenBLAS('1')) +# in azure-pipelines.yml if you want to check that). +if use_ilp64 + fblas64_module = custom_target('fblas64_module', + output: ['_fblas_64module.c', '_fblas_64-f2pywrappers.f'], + input: 'fblas_64.pyf.src', + command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'] + f2py_ilp64_opts, + depend_files: + [ + 'fblas_l1.pyf.src', + 'fblas_l2.pyf.src', + 'fblas_l3.pyf.src', + ] + ) + + py3.extension_module('_fblas_64', + fblas64_module, + #['_fblas_64module.c'], + fortran_args: _fflag_ilp64, + link_args: version_link_args, + dependencies: [lapack_ilp64, blas_ilp64, fortranobject_dep], #g77_abi_wrappers], + link_with: [g77_abi_wrappers], + install: true, + link_language: 'fortran', + subdir: 'scipy/linalg' + ) + + flapack64_module = custom_target('flapack64_module', + output: ['_flapack_64module.c', '_flapack_64-f2pywrappers.f'], + input: 'flapack_64.pyf.src', + command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'] + f2py_ilp64_opts + ) + + py3.extension_module('_flapack_64', + flapack64_module, + #['_flapack_64module.c'], + c_args: [Wno_empty_body], + fortran_args: _fflag_ilp64, + link_args: version_link_args, + dependencies: [lapack_ilp64, blas_ilp64, fortranobject_dep], #g77_abi_wrappers], + link_with: [g77_abi_wrappers], + install: true, + link_language: 'fortran', + subdir: 'scipy/linalg' + ) +endif + + # TODO: cblas/clapack are built *only* for ATLAS. Why? Is it still needed? + # _decomp_interpolative py3.extension_module('_decomp_interpolative', linalg_cython_gen.process('_decomp_interpolative.pyx'), diff --git a/scipy/linalg/tests/test_batch.py b/scipy/linalg/tests/test_batch.py index 7a32e7a5cd66..7871a31ffa1f 100644 --- a/scipy/linalg/tests/test_batch.py +++ b/scipy/linalg/tests/test_batch.py @@ -441,8 +441,8 @@ def test_solve(self, bdim, dtype): if len(bdim) == 1: x = x[..., np.newaxis] b = b[..., np.newaxis] - assert_allclose(A @ x - b, 0, atol=1.5e-6) - assert_allclose(x, np.linalg.solve(A, b), atol=2e-6) + assert_allclose(A @ x - b, 0, atol=2e-6) + assert_allclose(x, np.linalg.solve(A, b), atol=3e-6) @pytest.mark.parametrize('bdim', [(5,), (5, 4), (2, 3, 5, 4)]) @pytest.mark.parametrize('dtype', floating) @@ -455,8 +455,8 @@ def test_lu_solve(self, bdim, dtype): if len(bdim) == 1: x = x[..., np.newaxis] b = b[..., np.newaxis] - assert_allclose(A @ x - b, 0, atol=1.5e-6) - assert_allclose(x, np.linalg.solve(A, b), atol=2e-6) + assert_allclose(A @ x - b, 0, atol=2e-6) + assert_allclose(x, np.linalg.solve(A, b), atol=3e-6) @pytest.mark.parametrize('l_and_u', [(1, 1), ([2, 1, 0], [0, 1 , 2])]) @pytest.mark.parametrize('bdim', [(5,), (5, 4), (2, 3, 5, 4)]) diff --git a/scipy/meson.build b/scipy/meson.build index b3803a4f4592..cf9d09f00298 100644 --- a/scipy/meson.build +++ b/scipy/meson.build @@ -91,12 +91,15 @@ inc_np = include_directories(incdir_numpy) numpy_nodepr_api = ['-DNPY_NO_DEPRECATED_API=NPY_1_9_API_VERSION'] np_dep = declare_dependency(include_directories: inc_np, compile_args: numpy_nodepr_api) + incdir_f2py = incdir_numpy / '..' / '..' / 'f2py' / 'src' inc_f2py = include_directories(incdir_f2py) fortranobject_c = incdir_f2py / 'fortranobject.c' npymath_path = _incdir_numpy_abs / '..' / 'lib' +npyrandom_path = _incdir_numpy_abs / '..' / '..' / 'random' / 'lib' npymath_lib = cc.find_library('npymath', dirs: npymath_path) +npyrandom_lib = cc.find_library('npyrandom', dirs: npyrandom_path) pybind11_dep = dependency('pybind11', version: '>=2.13.2') @@ -140,7 +143,6 @@ cpp_args_pythran = [ # Share this object across multiple modules. fortranobject_lib = static_library('_fortranobject', fortranobject_c, - c_args: numpy_nodepr_api, dependencies: py3_dep, include_directories: [inc_np, inc_f2py], gnu_symbol_visibility: 'hidden', @@ -218,22 +220,149 @@ endif # 2. targets with #include's (due to no `depend_files` - see feature request # at meson#8295) f2py_gen = generator(generate_f2pymod, - arguments : ['@INPUT@', '-o', '@BUILD_DIR@'] + f2py_freethreading_arg, + arguments : ['@INPUT@', '-o', '@BUILD_DIR@', '@EXTRA_ARGS@'] + f2py_freethreading_arg, output : ['_@BASENAME@module.c', '_@BASENAME@-f2pywrappers.f'], ) -# TODO: 64-bit BLAS and LAPACK -# -# Note that this works as long as BLAS and LAPACK are detected properly via -# pkg-config. By default we look for OpenBLAS, other libraries can be configured via -# `meson configure -Dblas=blas -Dlapack=lapack` (example to build with Netlib -# BLAS and LAPACK). -# For MKL and for auto-detecting one of multiple libs, we'll need a custom -# dependency in Meson (like is done for scalapack) - see -# https://github.com/mesonbuild/meson/issues/2835 +# Start of BLAS/LAPACK detection + blas_name = get_option('blas') lapack_name = get_option('lapack') +blas_symbol_suffix = get_option('blas-symbol-suffix') +use_ilp64 = get_option('use-ilp64') +blas_order = get_option('blas-order') +lapack_order = get_option('lapack-order') + +# Detect LP64 (32-bit) BLAS and LAPACK, we always need that +blas_interface = ['interface: lp64'] +if blas_order == ['auto'] + blas_order = [] + if host_machine.system() == 'darwin' + blas_order += 'accelerate' + endif + if host_machine.cpu_family() == 'x86_64' + blas_order += 'mkl' + endif + blas_order += ['openblas', 'flexiblas', 'blis', 'blas'] +endif +if lapack_order == ['auto'] + lapack_order = [] + if host_machine.system() == 'darwin' + lapack_order += 'accelerate' + endif + if host_machine.cpu_family() == 'x86_64' + lapack_order += 'mkl' + endif + lapack_order += ['openblas', 'flexiblas', 'lapack'] +endif + +# MKL-specific options +_threading_opt = get_option('mkl-threading') +if _threading_opt == 'auto' + # Switch default to iomp once conda-forge missing openmp.pc issue is fixed + mkl_opts = ['threading: seq'] +else + mkl_opts = ['threading: ' + _threading_opt] +endif +blas_opts = {'mkl': mkl_opts} +mkl_version_req = '>=2023.0' # see gh-24824 +mkl_may_use_sdl = not use_ilp64 and _threading_opt in ['auto', 'iomp'] + + +# BLAS detection. +# +# First try scipy-openblas, and if found don't look for blas/lapack, we +# know what's inside the scipy-openblas wheels already. +if blas_name == 'openblas' or blas_name == 'auto' + blas = dependency('scipy-openblas', method: 'pkg-config', required: false) + if blas.found() + blas_name = 'scipy-openblas' + endif +endif +if blas_name == 'auto' + foreach _name : blas_order + if _name == 'mkl' + blas = dependency('mkl', + modules: blas_interface + mkl_opts, + required: false, # may be required, but we need to emit a custom error message + version: mkl_version_req, + ) + # Insert a second try with MKL, because we may be rejecting older versions + # or missing it because no pkg-config installed. If so, we need to retry + # with MKL SDL, and drop the version constraint (this always worked). + if not blas.found() and mkl_may_use_sdl + blas = dependency('mkl', modules: ['sdl: true'], required: false) + endif + else + blas = dependency(_name, modules: blas_interface, required: false) + endif + if blas.found() + break + endif + endforeach +else + if uses_mkl + mkl_uses_sdl = false + blas = dependency('mkl', + modules: blas_interface + mkl_opts, + required: false, + version: mkl_version_req, + ) + # Same deal as above - try again for MKL + if not blas.found() and mkl_may_use_sdl + mkl_uses_sdl = true + blas = dependency('mkl', modules: ['sdl: true'], required: false) + endif + else + blas = dependency(blas_name, modules: blas_interface, required: false) + endif +endif + +if not blas.found() + error('No BLAS library detected! SciPy needs one, please install it.') +endif + +_args_blas = [] # note: used for C and C++ via `blas_dep` below +if blas_symbol_suffix == 'auto' + blas_symbol_suffix = blas.get_variable('symbol_suffix', default_value: '') + message(f'BLAS symbol suffix: @blas_symbol_suffix@') +endif +_blas_incdir = [] +if blas_symbol_suffix != '' + _args_blas += ['-DBLAS_SYMBOL_SUFFIX=' + blas_symbol_suffix] + _blas_incdir = ['.'] # for the generated symbol rename shims in the build dir +endif +blas_dep = declare_dependency( + dependencies: [blas], + compile_args: _args_blas, + include_directories: _blas_incdir, +) + +# LAPACK detection +if 'mkl' in blas.name() or blas.name() == 'accelerate' or blas_name == 'scipy-openblas' + # For these libraries we know that they contain LAPACK, and it's desirable to + # use that - no need to run the full detection twice. + lapack = blas +else + if lapack_name == 'auto' + foreach _name : lapack_order + lapack = dependency(_name, modules: ['lapack'] + blas_interface, required: false) + if lapack.found() + break + endif + endforeach + else + lapack = dependency(lapack_name, modules: ['lapack'] + blas_interface, required: false) + endif +endif + +if not lapack.found() + error('No LAPACK library detected! SciPy needs one, please install it.') +endif +lapack_dep = declare_dependency(dependencies: [lapack, blas_dep]) + + macOS13_3_or_later = false if host_machine.system() == 'darwin' @@ -258,27 +387,7 @@ if blas_name == 'accelerate' accelerate_flag = '-a' endif -# First try scipy-openblas, and if found don't look for cblas or lapack, we -# know what's inside the scipy-openblas wheels already. -if blas_name == 'openblas' or blas_name == 'auto' - blas = dependency('scipy-openblas', method: 'pkg-config', required: false) - if blas.found() - blas_name = 'scipy-openblas' - generate_blas_wrappers = true - endif -endif -# pkg-config uses a lower-case name while CMake uses a capitalized name, so try -# that too to make the fallback detection with CMake work -if blas_name == 'openblas' - blas = dependency(['openblas', 'OpenBLAS']) -elif blas_name != 'scipy-openblas' # if so, we found it already - blas = dependency(blas_name) -endif -blas_dep = declare_dependency( - dependencies: blas, - compile_args: _args_blas_lapack -) if blas_name == 'blas' # Netlib BLAS has a separate `libcblas.so` which we use directly in the g77 # ABI wrappers, so detect it and error out if we cannot find it. @@ -290,19 +399,6 @@ else cblas = [] endif -if 'mkl' in blas.name() or blas.name().to_lower() == 'accelerate' or blas_name == 'scipy-openblas' - # For these libraries we know that they contain LAPACK, and it's desirable to - # use that - no need to run the full detection twice. - lapack = blas -elif lapack_name == 'openblas' - lapack = dependency(['openblas', 'OpenBLAS']) -else - lapack = dependency(lapack_name) -endif -lapack_dep = declare_dependency( - dependencies: lapack, - compile_args: _args_blas_lapack -) dependency_map = { 'BLAS': blas, @@ -310,29 +406,99 @@ dependency_map = { 'PYBIND11': pybind11_dep, } -# FIXME: conda-forge sets MKL_INTERFACE_LAYER=LP64,GNU, see gh-11812. -# This needs work on gh-16200 to make MKL robust. We should be -# requesting `mkl-dynamic-lp64-seq` here. And then there's work needed -# in general to enable the ILP64 interface (also for OpenBLAS). -uses_mkl = blas.name().to_lower().startswith('mkl') or lapack.name().to_lower().startswith('mkl') -uses_accelerate = blas.name().to_lower().startswith('accelerate') or lapack.name().to_lower().startswith('accelerate') +# NB: from this point on blas_name is e.g. 'mkl-lp64-dynamic-seq' +blas_name = blas.name() +lapack_name = lapack.name() +uses_mkl = blas_name.to_lower().startswith('mkl') +uses_accelerate = blas_name.to_lower().startswith('accelerate') use_g77_abi = uses_mkl or uses_accelerate or get_option('use-g77-abi') if use_g77_abi g77_abi_wrappers = static_library( 'g77_abi_wrappers', ['_build_utils/src/wrap_g77_abi.c'], - dependencies: [py3_dep, cblas, blas_dep, np_dep], + dependencies: [py3_dep, blas, np_dep], gnu_symbol_visibility: 'hidden', ) else g77_abi_wrappers = static_library( 'dummy_g77_abi_wrappers', ['_build_utils/src/wrap_dummy_g77_abi.c'], - dependencies: [py3_dep, blas_dep, np_dep], + dependencies: [py3_dep, blas, np_dep], gnu_symbol_visibility: 'hidden', ) endif +# Reuse the names, so we ensure we don't lose the arguments wrapped in with +# declare_dependency. Also, avoids changing `dependencies: blas` to blas_dep in other files. +blas = blas_dep +lapack = lapack_dep + +if use_ilp64 + # Okay, we need ILP64 BLAS and LAPACK *in addition to LP64*. So we need to + # detect the ILP64 variants of the found LP64 libraries now. + _args_blas = [] + blas_interface = ['interface: ilp64'] + if blas_name == 'flexiblas' + blas_name = 'flexiblas64' + endif + _args_blas += ['-DHAVE_BLAS_ILP64'] + if 'openblas' in blas_name + _args_blas += ['-DOPENBLAS_ILP64_NAMING_SCHEME'] + endif + + # Run the detection + if uses_mkl + mkl_uses_sdl = false # FIXME, why + if mkl_uses_sdl + mkl_opts = ['sdl: true'] + endif + blas_ilp64 = dependency('mkl', modules: ['interface: ilp64'] + mkl_opts) + lapack_ilp64 = blas_ilp64 + else + if blas_name == 'openblas' + # We cannot allow plain openblas here, that's already the LP64 library and + # will lead to problems (there is, as of now, no combined OpenBLAS build + # with 32 and 64 bit symbols) + blas_name = ['openblas64', 'openblas_ilp64'] + endif + blas_ilp64 = dependency(blas_name, modules: blas_interface) + lapack_ilp64 = dependency(lapack_name, modules: ['lapack'] + blas_interface) + endif + + # Pick up the symbol suffix, it may be auto-detected by Meson and different from LP64 + if blas_symbol_suffix == 'auto' + if blas_name == 'scipy-openblas' + blas_symbol_suffix = '64_' + else + blas_symbol_suffix = blas_ilp64.get_variable('symbol_suffix', default_value: '') + endif + message(f'BLAS symbol suffix (ILP64): @blas_symbol_suffix@') + endif + _blas_incdir = [] + if blas_symbol_suffix != '' + _args_blas += ['-DBLAS_SYMBOL_SUFFIX=' + blas_symbol_suffix] + _blas_incdir = ['.'] + endif + # Declare the ILP64 dependencies + message('BLAS / LAPACK ILP64 detected: ', blas_ilp64.name(), ', ', lapack_ilp64.name()) + + blas_ilp64 = declare_dependency( + dependencies: [blas_ilp64], + compile_args: _args_blas, + include_directories: _blas_incdir, + ) + lapack_ilp64 = declare_dependency(dependencies: [lapack_ilp64, blas_ilp64]) +else + # we're not using ILP64; user will link to the always-available LP64 blas/lapack + # (all users must use preprocessor macros BLAS_NAME to handle the two options) + blas_ilp64 = blas + lapack_ilp64 = lapack + message('LAPACK ILP64 not requested / not detected.') +endif + +# End of BLAS/LAPACK handling + + scipy_dir = py3.get_install_dir() / 'scipy' # Generate version.py for sdist @@ -360,6 +526,9 @@ python_sources = [ '__init__.py', '_distributor_init.py', 'conftest.py', + # 'linalg.pxd', + # 'optimize.pxd', + # 'special.pxd', ] if fs.exists('_distributor_init_local.py') @@ -479,13 +648,51 @@ fortran_ignore_warnings = ff.get_supported_arguments( # Intel Fortran (ifort) does not run the preprocessor by default, if Fortran # code uses preprocessor statements, add this compile flag to it. -_fflag_fpp = [] -if ff.get_id() in ['intel-cl', 'intel-llvm-cl'] - if is_windows - _fflag_fpp = ff.get_supported_arguments('/fpp') + +# Gfortran does run the preprocessor for .F files, and PROPACK is the only +# component which needs the preprocessor (unless we need symbol renaming for +# blas_symbol_suffix). +_fflag_preprocess = [] +_gfortran_preprocess = ['-cpp', '-ffree-line-length-none', '-ffixed-line-length-none'] +if ff.has_multi_arguments(_gfortran_preprocess) + _fflag_preprocess = _gfortran_preprocess +else + _fflag_preprocess = ff.first_supported_argument(['-fpp', '/fpp', 'cpp']) +endif + +_fflag_lp64 = [] +_fflag_ilp64 = [] +f2py_ilp64_opts = [] +if use_ilp64 + # Gfortran and Clang use `-fdefault-integer-8` to switch to 64-bit integers by + # default, all other known compilers use `-i8` + _fflag_ilp64 = ff.first_supported_argument(['-fdefault-integer-8', '-i8']) + + # Write out a mapping file for f2py for defaulting to ILP64 + conf_data = configuration_data() + if cc.sizeof('long') == 8 + conf_data.set('int64_name', 'long') + elif cc.sizeof('long long') == 8 + conf_data.set('int64_name', 'long long') else - _fflag_fpp = ff.get_supported_arguments('-fpp') + error('Neither `long` nor `long long` is 64-bit, giving up.') endif + int64_f2cmap = configure_file( + input: '_build_utils/int64.f2cmap.in', + output: 'int64.f2cmap', + configuration: conf_data, + install: false, + ) + f2py_ilp64_opts = ['--f2cmap', int64_f2cmap] +endif + +if blas_symbol_suffix != '' + # We need to patch source files that use BLAS/LAPACK symbols. + # In addition, we now need to enable the Fortran preprocessor on all targets + # that depend on BLAS/LAPACK + # Note: this came from `scipy/build_utils/_fortran.py` in the distutils build. + _fflag_lp64 += _fflag_preprocess # TODO: propagate _fflag_lp64 + _fflag_ilp64 += _fflag_preprocess endif # Deal with M_PI & friends; add `use_math_defines` to c_args or cpp_args diff --git a/scipy/odr/meson.build b/scipy/odr/meson.build index c7f53bc2365d..6ed10624194c 100644 --- a/scipy/odr/meson.build +++ b/scipy/odr/meson.build @@ -5,7 +5,10 @@ odrpack = static_library('odrpack', 'odrpack/d_odr.f', 'odrpack/dlunoc.f' ], - fortran_args: _fflag_Wno_conversion, # silence "conversion from REAL(8) to INTEGER(4)" + fortran_args: [ + _fflag_Wno_conversion, # silence "conversion from REAL(8) to INTEGER(4)" + _fflag_ilp64 + ], override_options: ['b_lto=false'], gnu_symbol_visibility: 'hidden', ) @@ -14,7 +17,7 @@ py3.extension_module('__odrpack', '__odrpack.c', link_with: odrpack, link_args: version_link_args, - dependencies: [blas_dep, np_dep], + dependencies: [blas_ilp64, np_dep], install: true, link_language: 'fortran', subdir: 'scipy/odr' diff --git a/scipy/optimize/_trlib/meson.build b/scipy/optimize/_trlib/meson.build index 9b15bb50fd23..734e84964f84 100644 --- a/scipy/optimize/_trlib/meson.build +++ b/scipy/optimize/_trlib/meson.build @@ -12,7 +12,7 @@ py3.extension_module('_trlib', '../../_lib', '../../_build_utils/src' ], - dependencies: [lapack_dep, blas_dep, np_dep], + dependencies: [lapack_ilp64, blas_ilp64, np_dep], link_args: version_link_args, install: true, subdir: 'scipy/optimize/_trlib' diff --git a/scipy/optimize/meson.build b/scipy/optimize/meson.build index 375f4c3aef79..5c183582ef76 100644 --- a/scipy/optimize/meson.build +++ b/scipy/optimize/meson.build @@ -62,6 +62,8 @@ py3.extension_module('_zeros', subdir: 'scipy/optimize' ) + +# TODO: link to ILP64 LAPACK py3.extension_module('_lbfgsb', [ '__lbfgsb.h', diff --git a/scipy/sparse/linalg/_eigen/arpack/meson.build b/scipy/sparse/linalg/_eigen/arpack/meson.build index 52c8ab968b22..edb77bc4430e 100644 --- a/scipy/sparse/linalg/_eigen/arpack/meson.build +++ b/scipy/sparse/linalg/_eigen/arpack/meson.build @@ -89,7 +89,7 @@ _suppress_all_warnings = ff.get_supported_arguments('-w') arpack_lib = static_library('arpack_lib', arpack_sources, - fortran_args: [fortran_ignore_warnings, _suppress_all_warnings], + fortran_args: [fortran_ignore_warnings, _suppress_all_warnings, _fflag_ilp64], include_directories: ['ARPACK/SRC'], override_options: ['b_lto=false'], gnu_symbol_visibility: 'hidden', @@ -98,14 +98,16 @@ arpack_lib = static_library('arpack_lib', arpack_module = custom_target('arpack_module', output: ['_arpackmodule.c', '_arpack-f2pywrappers.f'], input: 'arpack.pyf.src', - command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'] + f2py_freethreading_arg, + command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'] + f2py_freethreading_arg + f2py_ilp64_opts, ) _arpack = py3.extension_module('_arpack', arpack_module, - link_with: arpack_lib, + link_with: [arpack_lib, g77_abi_wrappers], link_args: version_link_args, - dependencies: [lapack_dep, blas_dep, fortranobject_dep], + dependencies: [lapack_ilp64, blas_ilp64, fortranobject_dep], + fortran_args: _fflag_ilp64, + c_args: ['-DHAVE_BLAS_ILP64'], install: true, link_language: 'fortran', subdir: 'scipy/sparse/linalg/_eigen/arpack' diff --git a/scipy/sparse/linalg/_propack/meson.build b/scipy/sparse/linalg/_propack/meson.build index d33cdc0e7646..b6b5dd94d419 100644 --- a/scipy/sparse/linalg/_propack/meson.build +++ b/scipy/sparse/linalg/_propack/meson.build @@ -94,7 +94,7 @@ foreach ele: elements fortran_ignore_warnings, _fflag_Wno_intrinsic_shadow, _fflag_Wno_uninitialized, - _fflag_fpp, + _fflag_preprocess, ], gnu_symbol_visibility: 'hidden', ) diff --git a/scipy/special/_ellip_harm.pxd b/scipy/special/_ellip_harm.pxd index 42f8ae366e8b..939ed1cd50e1 100644 --- a/scipy/special/_ellip_harm.pxd +++ b/scipy/special/_ellip_harm.pxd @@ -34,14 +34,10 @@ from . cimport sf_error from libc.math cimport sqrt, fabs, pow, NAN from libc.stdlib cimport malloc, free +from scipy.linalg.cython_lapack cimport dstevr -cdef extern from "lapack_defs.h": +cdef extern from "npy_cblas.h": ctypedef int CBLAS_INT # actual type defined in the header - void c_dstevr(char *jobz, char *range, CBLAS_INT *n, double *d, double *e, - double *vl, double *vu, CBLAS_INT *il, CBLAS_INT *iu, double *abstol, - CBLAS_INT *m, double *w, double *z, CBLAS_INT *ldz, CBLAS_INT *isuppz, - double *work, CBLAS_INT *lwork, CBLAS_INT *iwork, CBLAS_INT *liwork, - CBLAS_INT *info) nogil @cython.wraparound(False) @@ -162,7 +158,7 @@ cdef inline double* lame_coefficients(double h2, double k2, int n, int p, for i in range(0, size-1): dd[i] = g[i]*ss[i]/ss[i+1] - c_dstevr("V", "I", &size, d, dd, &vl, &vu, &tp, &tp, &tol, &c, w, eigv, + dstevr("V", "I", &size, d, dd, &vl, &vu, &tp, &tp, &tol, &c, w, eigv, &size, isuppz, work, &lwork, iwork, &liwork, &info) if info != 0: diff --git a/scipy/special/lapack_defs.h b/scipy/special/lapack_defs.h deleted file mode 100644 index 0d20ba1ca619..000000000000 --- a/scipy/special/lapack_defs.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Handle different Fortran conventions. - */ - -#include "npy_cblas.h" - -extern void BLAS_FUNC(dstevr)(char *jobz, char *range, CBLAS_INT *n, double *d, double *e, - double *vl, double *vu, CBLAS_INT *il, CBLAS_INT *iu, double *abstol, - CBLAS_INT *m, double *w, double *z, CBLAS_INT *ldz, CBLAS_INT *isuppz, - double *work, CBLAS_INT *lwork, CBLAS_INT *iwork, CBLAS_INT *liwork, - CBLAS_INT *info, size_t jobz_len, size_t range_len); - -static void c_dstevr(char *jobz, char *range, CBLAS_INT *n, double *d, double *e, - double *vl, double *vu, CBLAS_INT *il, CBLAS_INT *iu, double *abstol, - CBLAS_INT *m, double *w, double *z, CBLAS_INT *ldz, CBLAS_INT *isuppz, - double *work, CBLAS_INT *lwork, CBLAS_INT *iwork, CBLAS_INT *liwork, CBLAS_INT *info) { - BLAS_FUNC(dstevr)(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, - w, z, ldz, isuppz, work, lwork, iwork, liwork, info, - 1, 1); -} diff --git a/scipy/special/meson.build b/scipy/special/meson.build index 583c29c1f300..b47fcd5684e6 100644 --- a/scipy/special/meson.build +++ b/scipy/special/meson.build @@ -176,7 +176,7 @@ py3.extension_module('_ellip_harm_2', cpp_args: ['-DSP_SPECFUN_ERROR'], include_directories: ['../_lib', '../_build_utils/src'], link_args: version_link_args, - dependencies: [lapack_dep, np_dep], + dependencies: [np_dep], install: true, subdir: 'scipy/special', ) @@ -223,7 +223,6 @@ py3.install_sources( ) # Must use `custom_target`, because `py3.install_sources` does not work with -# generated sources - see https://github.com/mesonbuild/meson/issues/7372 npz_files = [ [ '_data_boost', @@ -259,6 +258,8 @@ foreach npz_file: npz_files ) endforeach + + # Headers for special functions in `xsf` library are included in # both build and install dirs for development purposes. Not public! diff --git a/tools/generate_f2pymod.py b/tools/generate_f2pymod.py index e61524cc39b5..ad5fb5e8de6b 100644 --- a/tools/generate_f2pymod.py +++ b/tools/generate_f2pymod.py @@ -9,6 +9,7 @@ import os import re import subprocess +import sys # START OF CODE VENDORED FROM `numpy.distutils.from_template` @@ -266,6 +267,8 @@ def main(): parser.add_argument("--free-threading", action=argparse.BooleanOptionalAction, help="Whether to add --free-threading-compatible") + parser.add_argument("--f2cmap", type=str, + help="Path to the f2cmap file") args = parser.parse_args() if not args.infile.endswith(('.pyf', '.pyf.src', '.f.src')): @@ -290,10 +293,16 @@ def main(): # Now invoke f2py to generate the C API module file if args.infile.endswith(('.pyf.src', '.pyf')): - p = subprocess.Popen( - ['f2py', fname_pyf, '--build-dir', outdir_abs] + nogil_arg, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.getcwd() - ) + cmd = [sys.executable, '-m', 'numpy.f2py', fname_pyf, + '--build-dir', outdir_abs] + nogil_arg + if args.f2cmap: + cmd += ['--f2cmap', args.f2cmap] + + print("!!! CMD =", cmd, " at ", os.getcwd()) + + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, cwd=os.getcwd()) out, err = p.communicate() if not (p.returncode == 0): raise RuntimeError(f"Processing {fname_pyf} with f2py failed!\n"