diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 7767b4c8d34c..e38703903929 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -57,7 +57,6 @@ benchmarks/asv.conf.json @larsoner # CI config .circleci/ @larsoner .github/workflows/ @larsoner @andyfaff -.cirrus.star @larsoner @andyfaff # Doc requirements/doc.txt @tupui diff --git a/.github/label-globs.yml b/.github/label-globs.yml index 18b6e046a254..e19700021f2c 100644 --- a/.github/label-globs.yml +++ b/.github/label-globs.yml @@ -156,7 +156,6 @@ CI: - .circleci/** - .github/workflows/** - ci/** - - .cirrus.star DX: - changed-files: diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index b06323bc4c97..b8faec729b26 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -285,7 +285,7 @@ jobs: ################################################################################# prerelease_deps_coverage_64bit_blas: # TODO: re-enable ILP64 build. - name: Prerelease deps & coverage report, full, py3.11/npMin & py3.11/npPre, dev.py + name: Prerelease deps & coverage report, full, py3.11/npMin & py3.11/npPre, dev.py, SCIPY_ARRAY_API=1 needs: get_commit_message if: > needs.get_commit_message.outputs.message == 1 @@ -362,6 +362,7 @@ jobs: - name: Test SciPy run: | export OPENBLAS_NUM_THREADS=1 + export SCIPY_ARRAY_API=1 python dev.py --no-build test --coverage -j2 --mode full -- --cov --cov-report term-missing ################################################################################# @@ -508,7 +509,6 @@ jobs: if: ${{ matrix.parallel == '1'}} env: # Excluded modules: - # - scipy.special and scipy.stats are waiting on special.errstate being made thread-safe # - scipy.spatial has multiple issues in kdtree/qhull, and gh-20655 is pending. TEST_SUBMODULES: >- -t scipy.cluster @@ -527,6 +527,8 @@ jobs: -t scipy.optimize -t scipy.signal -t scipy.sparse + -t scipy.special + -t scipy.stats run: | # Note: only fast tests; full test suite is unlikely to uncover anything more, # and it'll be quite slow with pytest-run-parallel diff --git a/.github/workflows/linux_blas_ilp64.yml b/.github/workflows/linux_blas_ilp64.yml new file mode 100644 index 000000000000..5f4f4b9eda93 --- /dev/null +++ b/.github/workflows/linux_blas_ilp64.yml @@ -0,0 +1,129 @@ +name: BLAS tests (Linux) + +# This file is meant for testing different BLAS/LAPACK flavors and build +# options on Linux. All other yml files for Linux will only test without BLAS +# (mostly because that's easier and faster to build) or with the same 64-bit +# OpenBLAS build that is used in the wheel jobs. +# +# Jobs and their purpose: +# +# - mkl: +# Tests MKL installed from PyPI (because easiest/fastest, if broken) in +# 3 ways: both LP64 and ILP64 via pkg-config, and then using the +# Single Dynamic Library (SDL, or `libmkl_rt`). +# +# - scipy-openblas64: +# Test ILP64-enabled build with scipy-openblas32 and scipy-openblas64. +# + +on: + pull_request: + branches: + - main + - maintenance/** + +defaults: + run: + shell: bash + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + contents: read # to fetch code (actions/checkout) + +jobs: + + mkl-lp64: + runs-on: ubuntu-latest + name: "MKL LP64" + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y gfortran + pip install cython numpy pybind11 pythran pytest hypothesis pytest-xdist pooch + pip install -r requirements/dev.txt + pip install git+https://github.com/numpy/meson.git@main-numpymeson + pip install mkl mkl-devel + + - name: Build with defaults (LP64) + run: | + pkg-config --libs mkl-dynamic-lp64-seq # check link flags + python dev.py build -C-Dblas=mkl + + - name: Test + run: python dev.py test -j 2 + + + mkl-ilp64: + runs-on: ubuntu-latest + name: "MKL ILP64" + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y gfortran + pip install cython numpy pybind11 pythran pytest hypothesis pytest-xdist pooch + pip install -r requirements/dev.txt + pip install git+https://github.com/numpy/meson.git@main-numpymeson + pip install mkl mkl-devel + + - name: Build with ILP64 + run: | + pkg-config --libs mkl-dynamic-ilp64-seq # check link flags + python dev.py build -C-Dblas=mkl -C-Duse-ilp64=true + + - name: Test + run: python dev.py test -j 2 + + + scipy-openblas-ilp64: + runs-on: ubuntu-latest + name: "scipy-openblas ILP64" + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y gfortran + pip install cython numpy pybind11 pythran pytest hypothesis pytest-xdist pooch + pip install -r requirements/dev.txt + pip install git+https://github.com/numpy/meson.git@main-numpymeson + pip install scipy-openblas32 scipy-openblas64 + # dev.py does this for scipy-openblas32 + python -c'import scipy_openblas64 as so64; print(so64.get_pkg_config())' > scipy-openblas64.pc + export PKG_CONFIG_PATH=`pwd` + + - name: Build with ILP64 + run: | + python dev.py build --with-scipy-openblas -C-Duse-ilp64=true + + - name: Test + run: python dev.py test -j 2 + + diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml index cc2f83189737..ed1a529ae0b0 100644 --- a/.github/workflows/macos.yml +++ b/.github/workflows/macos.yml @@ -217,3 +217,4 @@ jobs: pip install pooch pytest hypothesis python dev.py -n test + diff --git a/.github/workflows/macos_blas_ilp64.yml b/.github/workflows/macos_blas_ilp64.yml new file mode 100644 index 000000000000..59164c0f0370 --- /dev/null +++ b/.github/workflows/macos_blas_ilp64.yml @@ -0,0 +1,56 @@ +name: macOS BLAS ILP64 tests + +on: + push: + branches: + - maintenance/** + pull_request: + branches: + - main + - maintenance/** + +permissions: + contents: read # to fetch code (actions/checkout) + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + accelerate: + name: Accelerate (ILP64) + if: "github.repository == 'ev-br/scipy'" + runs-on: macos-15 + strategy: + matrix: + python-version: ["3.11"] + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + submodules: recursive + + - name: Setup Python + uses: actions/setup-python@8d9ed9ac5c53483de85588cdf95a591a75ab9f55 # v5.5.0 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: Build and Install SciPy + run: | + sudo xcode-select -s /Applications/Xcode_16.app + + git submodule update --init + GFORTRAN_LOC=$(which gfortran-13) + ln -s $GFORTRAN_LOC gfortran + export PATH=$PWD:$PATH + + # Ensure we have gfortran dylib + GFORTRAN_LIB=$(dirname `gfortran --print-file-name libgfortran.dylib`) + export DYLD_LIBRARY_PATH=$GFORTRAN_LIB + + pip install click doit pydevtool rich_click meson cython pythran pybind11 ninja numpy + python dev.py build -C-Dblas=accelerate -C-Duse-ilp64=true + + pip install pooch pytest hypothesis + python dev.py -n test -v diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 825823ba579e..8c8c18f80661 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -216,8 +216,7 @@ jobs: env: CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}* CIBW_ARCHS: ${{ matrix.buildplat[2] }} - CIBW_PRERELEASE_PYTHONS: True - CIBW_FREE_THREADED_SUPPORT: True + CIBW_ENABLE: cpython-freethreading cpython-prerelease - name: Rename macOS wheels if: startsWith( matrix.buildplat[0], 'macos-' ) diff --git a/doc/source/building/blas_lapack.rst b/doc/source/building/blas_lapack.rst index 284c3672bb1e..61e27b0e618e 100644 --- a/doc/source/building/blas_lapack.rst +++ b/doc/source/building/blas_lapack.rst @@ -96,6 +96,44 @@ user wants to override this autodetection mechanism for building against plain $ python -m build -C-Duse-g77-abi=true -Csetup-args=-Dblas=blas -Csetup-args=-Dlapack=lapack +64-bit integer (ILP64) BLAS/LAPACK +---------------------------------- + +Support for ILP64 BLAS and LAPACK is still experimental; at the time of writing +(Apr 2025) it is only available for two BLAS/LAPACK configurations: MKL and +``scipy-openblas``. + +SciPy always requires LP64 (32-bit integer size) BLAS/LAPACK. You can build SciPy +with *additional* ILP64 support. This will result in SciPy requiring both BLAS and +LAPACK variants, where some extensions link to the ILP64 variant, while other +extensions link to the LP64 variant. From python, choosing the variant is done +through the ``get_blas_funcs`` and ``get_lapack_funcs`` functions:: + + >>> from scipy.linalg.blas import get_blas_funcs + >>> daxpy = get_blas_funcs('axpy', (np.ones(3),), ilp64='preferred') + >>> daxpy.int_dtype + dtype('int64') + +Building with ILP64 support requires several NumPy additions to ``meson``, which have +not been merged to upstream yet:: + + $ pip install git+https://github.com/numpy/meson.git@main-numpymeson + +For a development build with MKL, install the library and its development headers, and +give use the ``ilp64=true`` command line argument + + $ pip install mkl mkl-devel + $ python dev.py build -C-Dblas=mkl -C-Duse-ilp64=true + +For a development build with ``scipy-openblas64``, make sure you have installed both +``scipy-openblas32`` and ``scipy-openblas64``, and generate the pkg-config file +for the ILP64 variant:: + + >>> python -c'import scipy_openblas64 as so64; print(so64.get_pkg_config())' > scipy-openblas64.pc + >>> export PKG_CONFIG_PATH=`pwd` + >>> python dev.py build --with-scipy-openblas -C-Duse-ilp64=true + + Work-in-progress ---------------- diff --git a/doc/source/dev/contributor/continuous_integration.rst b/doc/source/dev/contributor/continuous_integration.rst index 242e5293eee3..389bf957a98e 100644 --- a/doc/source/dev/contributor/continuous_integration.rst +++ b/doc/source/dev/contributor/continuous_integration.rst @@ -56,12 +56,6 @@ CircleCI * ``run_benchmarks``: verify how the changes impact performance * ``refguide_check``: doctests from examples and benchmarks -CirrusCI --------- -* ``Tests``: test suite for specific architecture like - ``musllinux, arm, aarch`` -* ``Wheels``: build and upload some wheels - .. _skip-ci: Skipping @@ -79,7 +73,6 @@ Skipping CI can be achieved by adding a special text in the commit message: * ``[skip actions]``: will skip GitHub Actions * ``[skip circle]``: will skip CircleCI -* ``[skip cirrus]``: will skip CirrusCI * ``[docs only]``: will skip *all but* the CircleCI checks and the linter * ``[lint only]``: will skip *all but* the linter * ``[skip ci]``: will skip *all* CI @@ -88,7 +81,7 @@ Of course, you can combine these to skip multiple workflows. This skip information should be placed on a new line. In this example, we just updated a ``.rst`` file in the documentation and ask to skip all but the -relevant docs checks (skip Cirrus and GitHub Actions' workflows):: +relevant docs checks (skip GitHub Actions' workflows):: DOC: improve QMCEngine examples. diff --git a/meson.options b/meson.options index 3257cb8a8ff5..b7c3968ebdb0 100644 --- a/meson.options +++ b/meson.options @@ -2,6 +2,15 @@ option('blas', type: 'string', value: 'openblas', description: 'option for BLAS library switching') option('lapack', type: 'string', value: 'openblas', description: 'option for LAPACK library switching') + +# NB ILP64 build is experimental. +# See https://scipy.github.io/devdocs/building/blas_lapack.html for details +option('use-ilp64', type: 'boolean', value: false, + description: 'Use ILP64 (64-bit integer) BLAS and LAPACK interfaces') +option('blas-symbol-suffix', type: 'string', value: 'auto', + description: 'BLAS and LAPACK symbol suffix to use, if any') +option('mkl-threading', type: 'string', value: 'auto', + description: 'MKL threading method, one of: `seq`, `iomp`, `gomp`, `tbb`') option('use-g77-abi', type: 'boolean', value: false, description: 'If set to true, forces using g77 compatibility wrappers ' + 'for LAPACK functions. The default is to use gfortran ' + diff --git a/mypy.ini b/mypy.ini index e45c588cadf7..ffc23fb21bb9 100644 --- a/mypy.ini +++ b/mypy.ini @@ -120,7 +120,7 @@ ignore_missing_imports = True [mypy-scipy.optimize._bglu_dense] ignore_missing_imports = True -[mypy-scipy.optimize._slsqp] +[mypy-scipy.optimize._slsqplib] ignore_missing_imports = True [mypy-scipy.interpolate._dfitpack] diff --git a/pyproject.toml b/pyproject.toml index 1d7f207e9dab..1181de3fe34f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ test = [ "scikit-umfpack", "pooch", "hypothesis>=6.30", - "array-api-strict>=2.3", + "array-api-strict>=2.3.1", "Cython", "meson", 'ninja; sys_platform != "emscripten"', diff --git a/requirements/test.txt b/requirements/test.txt index 666ff489f4ed..78a5873d9038 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -11,7 +11,7 @@ threadpoolctl # scikit-umfpack # circular dependency issues pooch hypothesis>=6.30 -array-api-strict>=2.0,<2.1.1 +array-api-strict>=2.3.1 Cython meson ninja; sys_platform != "emscripten" diff --git a/scipy/_build_utils/int64.f2cmap.in b/scipy/_build_utils/int64.f2cmap.in new file mode 100644 index 000000000000..86ffa326b6e6 --- /dev/null +++ b/scipy/_build_utils/int64.f2cmap.in @@ -0,0 +1 @@ +{'integer': {'': '@int64_name@'}, 'logical': {'': '@int64_name@'}} diff --git a/scipy/_build_utils/src/_blas64_defines.h b/scipy/_build_utils/src/_blas64_defines.h new file mode 100644 index 000000000000..20d9834079ca --- /dev/null +++ b/scipy/_build_utils/src/_blas64_defines.h @@ -0,0 +1,33 @@ +/* + * A common include for fblas_64 and flapack_64 f2py sources. + * + * f2py accounts for the Fortran name mangling (upppercase/lowercase, trailing underscore), + * via its hardcoded F_FUNC define. + * + * For ILP64 variants, we need a more flexible naming scheme, to potentially include + * the _64 or 64_ suffixes. This is what the `BLAS_FUNC` macro from `npy_cblas.h` does. + * + * We therefore inject the define into the f2py-generated sources. + */ + +#ifdef F_FUNC +#undef F_FUNC +#endif + +#include "npy_cblas.h" +#define F_FUNC(f, F) BLAS_FUNC(f) + +#ifdef FIX_MKL_2025_ILP64_MISSING_SYMBOL +#define cspr_64_ cspr_64 +#endif + +#define F_INT npy_int64 + + +#ifndef HAVE_BLAS_ILP64 +#error("HAVE_BLAS_ILP64 not defined.") +#endif + +#ifndef BLAS_SYMBOL_SUFFIX +#error("BLAS_SYMBOL_SUFFIX not defined") +#endif diff --git a/scipy/_build_utils/src/npy_cblas.h b/scipy/_build_utils/src/npy_cblas.h index de65ad903284..56c5e05916bd 100644 --- a/scipy/_build_utils/src/npy_cblas.h +++ b/scipy/_build_utils/src/npy_cblas.h @@ -26,6 +26,21 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; #define CBLAS_INDEX size_t /* this may vary between platforms */ +#ifdef ACCELERATE_NEW_LAPACK + #if __MAC_OS_X_VERSION_MAX_ALLOWED < 130300 + #ifdef HAVE_BLAS_ILP64 + #error "Accelerate ILP64 support is only available with macOS 13.3 SDK or later" + #endif + #else + /* #define NO_APPEND_FORTRAN */ + #ifdef HAVE_BLAS_ILP64 + #define BLAS_SYMBOL_SUFFIX $NEWLAPACK$ILP64 + #else + #define BLAS_SYMBOL_SUFFIX $NEWLAPACK + #endif + #endif +#endif + #ifdef NO_APPEND_FORTRAN #define BLAS_FORTRAN_SUFFIX #else @@ -50,7 +65,6 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; #define BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2) prefix ## name ## suffix ## suffix2 #define BLAS_FUNC_EXPAND(name,prefix,suffix,suffix2) BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2) -#define CBLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,,BLAS_SYMBOL_SUFFIX) /* * Use either the OpenBLAS scheme with the `64_` suffix behind the Fortran * compiler symbol mangling, or the MKL scheme (and upcoming @@ -62,6 +76,12 @@ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; #define BLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,BLAS_SYMBOL_SUFFIX,BLAS_FORTRAN_SUFFIX) #endif +/* + * Note that CBLAS doesn't include Fortran compiler symbol mangling, so ends up + * being the same in both schemes + */ +#define CBLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,,BLAS_SYMBOL_SUFFIX) + #ifdef HAVE_BLAS_ILP64 #define CBLAS_INT npy_int64 #define CBLAS_INT_MAX NPY_MAX_INT64 diff --git a/scipy/_build_utils/src/wrap_g77_abi.c b/scipy/_build_utils/src/wrap_g77_abi.c index ac11f9c53c57..f8ff60d5a325 100644 --- a/scipy/_build_utils/src/wrap_g77_abi.c +++ b/scipy/_build_utils/src/wrap_g77_abi.c @@ -25,6 +25,12 @@ return values, struct complex arguments work without segfaulting. #include "npy_cblas.h" #include "fortran_defs.h" +#ifdef HAVE_BLAS_ILP64 +/* NB: this redefines F_FUNC */ +#include "_blas64_defines.h" +#endif + + #ifdef __cplusplus extern "C" { #endif diff --git a/scipy/_lib/_array_api.py b/scipy/_lib/_array_api.py index b7fb0c25ce35..a724ff74144a 100644 --- a/scipy/_lib/_array_api.py +++ b/scipy/_lib/_array_api.py @@ -39,12 +39,13 @@ __all__ = [ '_asarray', 'array_namespace', 'assert_almost_equal', 'assert_array_almost_equal', - 'get_xp_devices', 'default_xp', 'is_lazy_array', 'is_marray', + 'default_xp', 'is_lazy_array', 'is_marray', 'is_array_api_strict', 'is_complex', 'is_cupy', 'is_jax', 'is_numpy', 'is_torch', 'SCIPY_ARRAY_API', 'SCIPY_DEVICE', 'scipy_namespace_for', 'xp_assert_close', 'xp_assert_equal', 'xp_assert_less', 'xp_copy', 'xp_device', 'xp_ravel', 'xp_size', 'xp_unsupported_param_msg', 'xp_vector_norm', 'xp_capabilities', + 'xp_result_type', 'xp_promote' ] @@ -420,42 +421,6 @@ def is_complex(x: Array, xp: ModuleType) -> bool: return xp.isdtype(x.dtype, 'complex floating') -def get_xp_devices(xp: ModuleType) -> list[str] | list[None]: - """Returns a list of available devices for the given namespace.""" - devices: list[str] = [] - if is_torch(xp): - devices += ['cpu'] - import torch # type: ignore[import] - num_cuda = torch.cuda.device_count() - for i in range(0, num_cuda): - devices += [f'cuda:{i}'] - if torch.backends.mps.is_available(): - devices += ['mps'] - return devices - elif is_cupy(xp): - import cupy # type: ignore[import] - num_cuda = cupy.cuda.runtime.getDeviceCount() - for i in range(0, num_cuda): - devices += [f'cuda:{i}'] - return devices - elif is_jax(xp): - import jax # type: ignore[import] - num_cpu = jax.device_count(backend='cpu') - for i in range(0, num_cpu): - devices += [f'cpu:{i}'] - num_gpu = jax.device_count(backend='gpu') - for i in range(0, num_gpu): - devices += [f'gpu:{i}'] - num_tpu = jax.device_count(backend='tpu') - for i in range(0, num_tpu): - devices += [f'tpu:{i}'] - return devices - - # given namespace is not known to have a list of available devices; - # return `[None]` so that one can use this in tests for `device=None`. - return [None] - - def scipy_namespace_for(xp: ModuleType) -> ModuleType | None: """Return the `scipy`-like namespace of a non-NumPy backend @@ -513,31 +478,87 @@ def xp_ravel(x: Array, /, *, xp: ModuleType | None = None) -> Array: return xp.reshape(x, (-1,)) -# utility to broadcast arrays and promote to common dtype -def xp_broadcast_promote(*args, ensure_writeable=False, force_floating=False, xp=None): - xp = array_namespace(*args) if xp is None else xp - - args = [(_asarray(arg, subok=True) if arg is not None else arg) for arg in args] +# utility to find common dtype with option to force floating +def xp_result_type(*args, force_floating=False, xp): + """ + Returns the dtype that results from applying type promotion rules + (see Array API Standard Type Promotion Rules) to the arguments. Augments + standard `result_type` in a few ways: + + - There is a `force_floating` argument that ensures that the result type + is floating point, even when all args are integer. + - When a TypeError is raised (e.g. due to an unsupported promotion) + and `force_floating=True`, we define a custom rule: use the result type + of the default float and any other floats passed. See + https://github.com/scipy/scipy/pull/22695/files#r1997905891 + for rationale. + - This function accepts array-like iterables, which are immediately converted + to the namespace's arrays before result type calculation. Consequently, the + result dtype may be different when an argument is `1.` vs `[1.]`. + + Typically, this function will be called shortly after `array_namespace` + on a subset of the arguments passed to `array_namespace`. + """ + args = [(_asarray(arg, subok=True, xp=xp) if np.iterable(arg) else arg) + for arg in args] args_not_none = [arg for arg in args if arg is not None] + if force_floating: + args_not_none.append(1.0) - # determine minimum dtype - default_float = xp.asarray(1.).dtype - dtypes = [arg.dtype for arg in args_not_none] - try: # follow library's prefered mixed promotion rules - dtype = xp.result_type(*dtypes) - if force_floating and xp.isdtype(dtype, 'integral'): - # If we were to add `default_float` before checking whether the result - # type is otherwise integral, we risk promotion from lower float. - dtype = xp.result_type(dtype, default_float) + if is_numpy(xp) and xp.__version__ < '2.0': + # Follow NEP 50 promotion rules anyway + args_not_none = [arg.dtype if getattr(arg, 'size', 0) == 1 else arg + for arg in args_not_none] + return xp.result_type(*args_not_none) + + try: # follow library's preferred promotion rules + return xp.result_type(*args_not_none) except TypeError: # mixed type promotion isn't defined - float_dtypes = [dtype for dtype in dtypes - if not xp.isdtype(dtype, 'integral')] - if float_dtypes: - dtype = xp.result_type(*float_dtypes, default_float) - elif force_floating: - dtype = default_float - else: - dtype = xp.result_type(*dtypes) + if not force_floating: + raise + # use `result_type` of default floating point type and any floats present + # This can be revisited, but right now, the only backends that get here + # are array-api-strict (which is not for production use) and PyTorch + # (due to data-apis/array-api-compat#279). + float_args = [] + for arg in args_not_none: + arg_array = xp.asarray(arg) if np.isscalar(arg) else arg + dtype = getattr(arg_array, 'dtype', arg) + if xp.isdtype(dtype, ('real floating', 'complex floating')): + float_args.append(arg) + return xp.result_type(*float_args, xp_default_dtype(xp)) + + +def xp_promote(*args, broadcast=False, force_floating=False, xp): + """ + Promotes elements of *args to result dtype, ignoring `None`s. + Includes options for forcing promotion to floating point and + broadcasting the arrays, again ignoring `None`s. + Type promotion rules follow `xp_result_type` instead of `xp.result_type`. + + Typically, this function will be called shortly after `array_namespace` + on a subset of the arguments passed to `array_namespace`. + + This function accepts array-like iterables, which are immediately converted + to the namespace's arrays before result type calculation. Consequently, the + result dtype may be different when an argument is `1.` vs `[1.]`. + + See Also + -------- + xp_result_type + """ + args = [(_asarray(arg, subok=True, xp=xp) if np.iterable(arg) else arg) + for arg in args] # solely to prevent double conversion of iterable to array + + dtype = xp_result_type(*args, force_floating=force_floating, xp=xp) + + args = [(_asarray(arg, dtype=dtype, subok=True, xp=xp) if arg is not None else arg) + for arg in args] + + if not broadcast: + return args[0] if len(args)==1 else tuple(args) + + args_not_none = [arg for arg in args if arg is not None] # determine result shape shapes = {arg.shape for arg in args_not_none} @@ -561,12 +582,13 @@ def xp_broadcast_promote(*args, ensure_writeable=False, force_floating=False, xp kwargs = {'subok': True} if is_numpy(xp) else {} arg = xp.broadcast_to(arg, shape, **kwargs) - # convert dtype/copy only if needed - if (arg.dtype != dtype) or ensure_writeable: - arg = xp.astype(arg, dtype, copy=True) + # This is much faster than xp.astype(arg, dtype, copy=False) + if arg.dtype != dtype: + arg = xp.astype(arg, dtype) + out.append(arg) - return out + return out[0] if len(out)==1 else tuple(out) def xp_float_to_complex(arr: Array, xp: ModuleType | None = None) -> Array: diff --git a/scipy/_lib/_elementwise_iterative_method.py b/scipy/_lib/_elementwise_iterative_method.py index 05efe86d31c1..c0d5a3d06ae0 100644 --- a/scipy/_lib/_elementwise_iterative_method.py +++ b/scipy/_lib/_elementwise_iterative_method.py @@ -15,7 +15,7 @@ import math import numpy as np from ._util import _RichResult, _call_callback_maybe_halt -from ._array_api import array_namespace, xp_size +from ._array_api import array_namespace, xp_size, xp_result_type _ESIGNERR = -1 _ECONVERR = -2 @@ -82,9 +82,8 @@ def _initialize(func, xs, args, complex_ok=False, preserve_shape=None, xp=None): # and cause failure. # There might be benefit to combining the `xs` into a single array and # calling `func` once on the combined array. For now, keep them separate. + xat = xp_result_type(*xs, force_floating=True, xp=xp) xas = xp.broadcast_arrays(*xs, *args) # broadcast and rename - xat = xp.result_type(*[xa.dtype for xa in xas]) - xat = xp.asarray(1.).dtype if xp.isdtype(xat, "integral") else xat xs, args = xas[:nx], xas[nx:] xs = [xp.asarray(x, dtype=xat) for x in xs] # use copy=False when implemented fs = [xp.asarray(func(x, *args)) for x in xs] diff --git a/scipy/_lib/_util.py b/scipy/_lib/_util.py index 88ecb85c5bfc..2de7552b45d9 100644 --- a/scipy/_lib/_util.py +++ b/scipy/_lib/_util.py @@ -12,11 +12,11 @@ import numpy as np from scipy._lib._array_api import (Array, array_namespace, is_lazy_array, - is_numpy, is_marray, xp_size) + is_numpy, is_marray, xp_size, xp_result_type) from scipy._lib._docscrape import FunctionDoc, Parameter from scipy._lib._sparse import issparse -from numpy.exceptions import AxisError, DTypePromotionError +from numpy.exceptions import AxisError np_long: type @@ -1012,13 +1012,7 @@ def _rng_spawn(rng, n_children): def _get_nan(*data, xp=None): xp = array_namespace(*data) if xp is None else xp # Get NaN of appropriate dtype for data - data = [xp.asarray(item) for item in data] - try: - min_float = getattr(xp, 'float16', xp.float32) - dtype = xp.result_type(*data, min_float) # must be at least a float - except DTypePromotionError: - # fallback to float64 - dtype = xp.float64 + dtype = xp_result_type(*data, force_floating=True, xp=xp) res = xp.asarray(xp.nan, dtype=dtype)[()] # whenever mdhaber/marray#89 is resolved, could just return `res` return res.data if is_marray(xp) else res diff --git a/scipy/_lib/array_api_compat b/scipy/_lib/array_api_compat index 8d991b437cdc..621494be1bd8 160000 --- a/scipy/_lib/array_api_compat +++ b/scipy/_lib/array_api_compat @@ -1 +1 @@ -Subproject commit 8d991b437cdcdf2cd91bef33fbfd491a409cb64f +Subproject commit 621494be1bd8682f1d76ae874272c12464953d3d diff --git a/scipy/_lib/array_api_extra b/scipy/_lib/array_api_extra index de481f2cac82..0d26a7462a3f 160000 --- a/scipy/_lib/array_api_extra +++ b/scipy/_lib/array_api_extra @@ -1 +1 @@ -Subproject commit de481f2cac821c2db7ab2a45b83ed29963c2e1eb +Subproject commit 0d26a7462a3fbf5ed9e42e261bdb3b39f25e2faf diff --git a/scipy/_lib/meson.build b/scipy/_lib/meson.build index acf54b58c2cf..01cedf0d94a1 100644 --- a/scipy/_lib/meson.build +++ b/scipy/_lib/meson.build @@ -211,6 +211,7 @@ py3.install_sources( 'array_api_compat/array_api_compat/torch/__init__.py', 'array_api_compat/array_api_compat/torch/_aliases.py', 'array_api_compat/array_api_compat/torch/_info.py', + 'array_api_compat/array_api_compat/torch/_typing.py', 'array_api_compat/array_api_compat/torch/fft.py', 'array_api_compat/array_api_compat/torch/linalg.py', ], diff --git a/scipy/_lib/tests/test_array_api.py b/scipy/_lib/tests/test_array_api.py index d0da4d45135c..663ee0ac0024 100644 --- a/scipy/_lib/tests/test_array_api.py +++ b/scipy/_lib/tests/test_array_api.py @@ -1,14 +1,17 @@ +import re + import numpy as np import pytest from scipy._lib._array_api import ( _GLOBAL_CONFIG, array_namespace, _asarray, xp_copy, xp_assert_equal, is_numpy, - np_compat, xp_default_dtype + np_compat, xp_default_dtype, xp_result_type, is_torch ) from scipy._lib import array_api_extra as xpx from scipy._lib._array_api_no_0d import xp_assert_equal as xp_assert_equal_no_0d from scipy._lib.array_api_extra.testing import lazy_xp_function + lazy_xp_function(_asarray, static_argnames=( "dtype", "order", "copy", "xp", "check_finite", "subok")) lazy_xp_function(xp_copy, static_argnames=("xp", )) @@ -225,3 +228,96 @@ def test_check_scalar_no_0d(self, xp): def test_default_dtype(self, xp): assert xp_default_dtype(xp) == xp.asarray(1.).dtype + + +scalars = [1, 1., 1. + 1j] +lists = [[1], [1.], [1. + 1j]] +types = ('int8 int16 int32 int64 ' + 'uint8 uint16 uint32 uint64 ' + 'float32 float64 complex64 complex128').split() +arrays = [np.asarray([1], dtype=getattr(np, t)) for t in types] + + +def convert_type(x, xp): + # Convert NumPy array to xp-array + # Convert string to indicated dtype from xp + # Return Python scalars unchanged + if isinstance(x, np.ndarray): + return xp.asarray(x) + elif isinstance(x, str): + return getattr(xp, x) + return x + + +def is_inexact(x, xp): + # Determine whether `x` is of inexact (real of complex floating) dtype + x = xp.asarray(x) if np.isscalar(x) or isinstance(x, list) else x + dtype = getattr(x, 'dtype', x) + return xp.isdtype(dtype, ('real floating', 'complex floating')) + + +@pytest.mark.parametrize('x', scalars + lists + types + arrays) +@pytest.mark.parametrize('y', scalars + lists + types + arrays) +def test_xp_result_type_no_force(x, y, xp): + # When force_floating==False (default), behavior of `xp_result_type` + # should match that of `xp.result_type` on the same arguments after + # converting lists to arrays of type `xp`. + x = convert_type(x, xp) + y = convert_type(y, xp) + x_ref = xp.asarray(x) if isinstance(x, list) else x + y_ref = xp.asarray(y) if isinstance(y, list) else y + + try: + dtype_ref = xp.result_type(x_ref, y_ref) + expected_error = None + except Exception as e: + expected_error = (type(e), str(e)) + + if expected_error is not None: + with pytest.raises(expected_error[0], match=re.escape(expected_error[1])): + xp_result_type(x, y, xp=xp) + return + + dtype_res = xp_result_type(x, y, xp=xp) + assert dtype_res == dtype_ref + + +@pytest.mark.parametrize('x', scalars + lists + types + arrays) +@pytest.mark.parametrize('y', scalars + lists + types + arrays) +def test_xp_result_type_force_floating(x, y, xp): + # When `force_floating==True`, behavior of `xp_result_type` + # should match that of `xp.result_type` with `1.0` appended to the set of + # arguments (after converting lists to arrays of type `xp`). + # If this raises a `TypeError`, which is the case when the result + # type is not defined by the standard, the result type should be + # the result type of any inexact (real or complex floating) arguments + # and the default floating point type. + if (is_torch(xp) and not(isinstance(x, str) or isinstance(y, str)) + and np.isscalar(x) and np.isscalar(y)): + pytest.skip("See 3/27/2024 comment at data-apis/array-api-compat#277") + + x = convert_type(x, xp) + y = convert_type(y, xp) + x_ref = xp.asarray(x) if isinstance(x, list) else x + y_ref = xp.asarray(y) if isinstance(y, list) else y + + expected_error = None + try: + dtype_ref = xp.result_type(x_ref, y_ref, 1.0) + except TypeError: + args = [] + if is_inexact(x_ref, xp): + args.append(x_ref) + if is_inexact(y_ref, xp): + args.append(y_ref) + dtype_ref = xp.result_type(*args, xp.asarray(1.0)) + except Exception as e: + expected_error = (type(e), str(e)) + + if expected_error is not None: + with pytest.raises(expected_error[0], match=expected_error[1]): + xp_result_type(x, y, xp=xp) + return + + dtype_res = xp_result_type(x, y, force_floating=True, xp=xp) + assert dtype_res == dtype_ref diff --git a/scipy/cluster/hierarchy.py b/scipy/cluster/hierarchy.py index 2fb11dcf0651..56620ad9a8d9 100644 --- a/scipy/cluster/hierarchy.py +++ b/scipy/cluster/hierarchy.py @@ -134,7 +134,8 @@ import numpy as np from . import _hierarchy, _optimal_leaf_ordering import scipy.spatial.distance as distance -from scipy._lib._array_api import array_namespace, _asarray, xp_copy, is_jax +from scipy._lib._array_api import (_asarray, array_namespace, is_dask, is_jax, + is_lazy_array, xp_copy) from scipy._lib._disjoint_set import DisjointSet import scipy._lib.array_api_extra as xpx @@ -1005,6 +1006,7 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): """ xp = array_namespace(y) y = _asarray(y, order='C', dtype=xp.float64, xp=xp) + lazy = is_lazy_array(y) if method not in _LINKAGE_METHODS: raise ValueError(f"Invalid method: {method}") @@ -1016,35 +1018,40 @@ def linkage(y, method='single', metric='euclidean', optimal_ordering=False): if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') elif y.ndim == 2: - if (y.shape[0] == y.shape[1] and np.allclose(np.diag(y), 0) and - xp.all(y >= 0) and np.allclose(y, y.T)): + if (not lazy and y.shape[0] == y.shape[1] + and xp.all(xpx.isclose(xp.linalg.diagonal(y), 0)) + and xp.all(y >= 0) and xp.all(xpx.isclose(y, y.T))): warnings.warn('The symmetric non-negative hollow observation ' 'matrix looks suspiciously like an uncondensed ' 'distance matrix', ClusterWarning, stacklevel=2) y = distance.pdist(y, metric) - y = xp.asarray(y) else: raise ValueError("`y` must be 1 or 2 dimensional.") - if not xp.all(xp.isfinite(y)): + if not lazy and not xp.all(xp.isfinite(y)): raise ValueError("The condensed distance matrix must contain only " "finite values.") - n = int(distance.num_obs_y(y)) + n = distance.num_obs_y(y) method_code = _LINKAGE_METHODS[method] - y = np.asarray(y) - if method == 'single': - result = _hierarchy.mst_single_linkage(y, n) - elif method in ['complete', 'average', 'weighted', 'ward']: - result = _hierarchy.nn_chain(y, n, method_code) - else: - result = _hierarchy.fast_linkage(y, n, method_code) - result = xp.asarray(result) + def cy_linkage(y, validate): + if validate and not np.all(np.isfinite(y)): + raise ValueError("The condensed distance matrix must contain only " + "finite values.") + + if method == 'single': + return _hierarchy.mst_single_linkage(y, n) + elif method in ('complete', 'average', 'weighted', 'ward'): + return _hierarchy.nn_chain(y, n, method_code) + else: + return _hierarchy.fast_linkage(y, n, method_code) + + result = xpx.lazy_apply(cy_linkage, y, validate=lazy, + shape=(n - 1, 4), dtype=xp.float64, as_numpy=True) if optimal_ordering: - y = xp.asarray(y) return optimal_leaf_ordering(result, y) else: return result @@ -1514,31 +1521,39 @@ def optimal_leaf_ordering(Z, y, metric='euclidean'): """ xp = array_namespace(Z, y) Z = _asarray(Z, order='C', xp=xp) - is_valid_linkage(Z, throw=True, name='Z') - y = _asarray(y, order='C', dtype=xp.float64, xp=xp) + lazy = is_lazy_array(Z) + _is_valid_linkage(Z, throw=True, name='Z') if y.ndim == 1: distance.is_valid_y(y, throw=True, name='y') elif y.ndim == 2: - if (y.shape[0] == y.shape[1] and np.allclose(np.diag(y), 0) and - np.all(y >= 0) and np.allclose(y, y.T)): + if (not lazy and y.shape[0] == y.shape[1] + and xp.all(xpx.isclose(xp.linalg.diagonal(y), 0)) + and xp.all(y >= 0) and xp.all(xpx.isclose(y, y.T))): warnings.warn('The symmetric non-negative hollow observation ' 'matrix looks suspiciously like an uncondensed ' 'distance matrix', ClusterWarning, stacklevel=2) y = distance.pdist(y, metric) - y = xp.asarray(y) else: raise ValueError("`y` must be 1 or 2 dimensional.") - - if not xp.all(xp.isfinite(y)): + if not lazy and not xp.all(xp.isfinite(y)): raise ValueError("The condensed distance matrix must contain only " "finite values.") - Z = np.asarray(Z) - y = np.asarray(y) - return xp.asarray(_optimal_leaf_ordering.optimal_leaf_ordering(Z, y)) + # The function name is prominently visible on the user-facing Dask dashboard; + # make sure it is meaningful. + def optimal_leaf_ordering_(Z, y, validate): + if validate: + is_valid_linkage(Z, throw=True, name='Z') + if not np.all(np.isfinite(y)): + raise ValueError("The condensed distance matrix must contain only " + "finite values.") + return _optimal_leaf_ordering.optimal_leaf_ordering(Z, y) + + return xpx.lazy_apply(optimal_leaf_ordering_, Z, y, validate=lazy, + shape=Z.shape, dtype=Z.dtype, as_numpy=True) def cophenet(Z, Y=None): @@ -1924,10 +1939,9 @@ def to_mlab_linkage(Z): """ xp = array_namespace(Z) Z = _asarray(Z, order='C', dtype=xp.float64, xp=xp) - Zs = Z.shape - if len(Zs) == 0 or (len(Zs) == 1 and Zs[0] == 0): + if Z.ndim == 0 or (Z.ndim == 1 and Z.shape[0] == 0): return xp_copy(Z, xp=xp) - is_valid_linkage(Z, throw=True, name='Z') + _is_valid_linkage(Z, throw=True, name='Z') return xp.concat((Z[:, :2] + 1.0, Z[:, 2:3]), axis=1) @@ -2012,7 +2026,7 @@ def is_monotonic(Z): """ xp = array_namespace(Z) Z = _asarray(Z, order='c', xp=xp) - is_valid_linkage(Z, throw=True, name='Z') + _is_valid_linkage(Z, throw=True, name='Z') # We expect the i'th value to be greater than its successor. return xp.all(Z[1:, 2] >= Z[:-1, 2]) @@ -2042,7 +2056,13 @@ def is_valid_im(R, warning=False, throw=False, name=None): Returns ------- b : bool - True if the inconsistency matrix is valid. + True if the inconsistency matrix is valid; False otherwise. + + Notes + ----- + *Array API support (experimental):* If the input is a lazy Array (e.g. Dask + or JAX), the return value may be a 0-dimensional bool Array. When warning=True + or throw=True, calling this function materializes the array. See Also -------- @@ -2104,10 +2124,17 @@ def is_valid_im(R, warning=False, throw=False, name=None): >>> is_valid_im(R) False + """ + return _is_valid_im(R, warning=warning, throw=throw, name=name, materialize=True) + + +def _is_valid_im(R, warning=False, throw=False, name=None, materialize=False): + """Variant of `is_valid_im` to be called internally by other scipy functions, + which by default does not materialize lazy input arrays (Dask, JAX, etc.) when + warning=True or throw=True. """ xp = array_namespace(R) - R = _asarray(R, order='c', xp=xp) - valid = True + R = _asarray(R, xp=xp) name_str = f"{name!r} " if name else '' try: if R.dtype != xp.float64: @@ -2122,23 +2149,23 @@ def is_valid_im(R, warning=False, throw=False, name=None): if R.shape[0] < 1: raise ValueError(f'Inconsistency matrix {name_str}' 'must have at least one row.') - if xp.any(R[:, 0] < 0): - raise ValueError(f'Inconsistency matrix {name_str}' - 'contains negative link height means.') - if xp.any(R[:, 1] < 0): - raise ValueError(f'Inconsistency matrix {name_str}' - 'contains negative link height standard deviations.') - if xp.any(R[:, 2] < 0): - raise ValueError(f'Inconsistency matrix {name_str}' - 'contains negative link counts.') - except Exception as e: + except (TypeError, ValueError) as e: if throw: raise if warning: _warning(str(e)) - valid = False + return False - return valid + return _lazy_valid_checks( + (xp.any(R[:, 0] < 0), + f'Inconsistency matrix {name_str} contains negative link height means.'), + (xp.any(R[:, 1] < 0), + f'Inconsistency matrix {name_str} contains negative link height standard ' + 'deviations.'), + (xp.any(R[:, 2] < 0), + f'Inconsistency matrix {name_str} contains negative link counts.'), + throw=throw, warning=warning, materialize=materialize, xp=xp + ) def is_valid_linkage(Z, warning=False, throw=False, name=None): @@ -2179,7 +2206,13 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None): Returns ------- b : bool - True if the inconsistency matrix is valid. + True if the inconsistency matrix is valid; False otherwise. + + Notes + ----- + *Array API support (experimental):* If the input is a lazy Array (e.g. Dask + or JAX), the return value may be a 0-dimensional bool Array. When warning=True + or throw=True, calling this function materializes the array. See Also -------- @@ -2225,10 +2258,18 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None): >>> is_valid_linkage(Z) False + """ + return _is_valid_linkage(Z, warning=warning, throw=throw, + name=name, materialize=True) + + +def _is_valid_linkage(Z, warning=False, throw=False, name=None, materialize=False): + """Variant of `is_valid_linkage` to be called internally by other scipy functions, + which by default does not materialize lazy input arrays (Dask, JAX, etc.) when + warning=True or throw=True. """ xp = array_namespace(Z) - Z = _asarray(Z, order='c', xp=xp) - valid = True + Z = _asarray(Z, xp=xp) name_str = f"{name!r} " if name else '' try: if Z.dtype != xp.float64: @@ -2241,32 +2282,85 @@ def is_valid_linkage(Z, warning=False, throw=False, name=None): if Z.shape[0] == 0: raise ValueError('Linkage must be computed on at least two ' 'observations.') - n = Z.shape[0] - if n > 1: - if (xp.any(Z[:, 0] < 0) or xp.any(Z[:, 1] < 0)): - raise ValueError(f'Linkage {name_str}contains negative indices.') - if xp.any(Z[:, 2] < 0): - raise ValueError(f'Linkage {name_str}contains negative distances.') - if xp.any(Z[:, 3] < 0): - raise ValueError(f'Linkage {name_str}contains negative counts.') - if xp.any(Z[:, 3] > (Z.shape[0] + 1)): - raise ValueError('Linkage matrix contains excessive observations' - 'in a cluster') - if xp.any( - xp.max(Z[:, :2], axis=1) >= xp.arange(n + 1, 2 * n + 1, dtype=Z.dtype) - ): - raise ValueError(f'Linkage {name_str}uses non-singleton cluster before' - ' it is formed.') - if xpx.nunique(Z[:, :2]) < n * 2: - raise ValueError(f'Linkage {name_str}uses the same cluster more than once.') - except Exception as e: + except (TypeError, ValueError) as e: if throw: raise if warning: _warning(str(e)) - valid = False + return False + + n = Z.shape[0] + if n < 2: + return True + + return _lazy_valid_checks( + (xp.any(Z[:, :2] < 0), + f'Linkage {name_str}contains negative indices.'), + (xp.any(Z[:, 2] < 0), + f'Linkage {name_str}contains negative distances.'), + (xp.any(Z[:, 3] < 0), + f'Linkage {name_str}contains negative counts.'), + (xp.any(Z[:, 3] > n + 1), + f'Linkage {name_str}contains excessive observations in a cluster'), + (xp.any(xp.max(Z[:, :2], axis=1) >= xp.arange(n + 1, 2 * n + 1, dtype=Z.dtype)), + f'Linkage {name_str}uses non-singleton cluster before it is formed.'), + (xpx.nunique(Z[:, :2]) < n * 2, + f'Linkage {name_str}uses the same cluster more than once.'), + throw=throw, warning=warning, materialize=materialize, xp=xp + ) + + +def _lazy_valid_checks(*args, throw=False, warning=False, materialize=False, xp): + """Validate a set of conditions on the contents of possibly lazy arrays. - return valid + Parameters + ---------- + args : tuples of (Array, str) + The first element of each tuple must be a 0-dimensional Array + that evaluates to bool; the second element must be the message to convey + if the first element evaluates to True. + throw: bool + Set to True to `raise ValueError(args[i][1])` if `args[i][0]` is True. + warning: bool + Set to True to issue a warning with message `args[i][1]` if `args[i][0]` + is True. + materialize: bool + Set to True to force materialization of lazy arrays when throw=True or + warning=True. If the inputs are lazy and materialize=False, ignore the + `throw` and `warning` flags. + xp: module + Array API namespace + + Returns + ------- + If xp is an eager backend (e.g. numpy) and all conditions are False, return True. + If throw is True, raise. Otherwise, return False. + + If xp is a lazy backend (e.g. Dask or JAX), return a 0-dimensional bool Array. + """ + conds = xp.concat([xp.reshape(cond, (1, )) for cond, _ in args]) + + lazy = is_lazy_array(conds) + if not throw and not warning or (lazy and not materialize): + out = ~xp.any(conds) + return out if lazy else bool(out) + + if is_dask(xp): + # Only materialize the graph once, instead of once per check + conds = conds.compute() + + # Don't call np.asarray(conds), as it would be blocked by the device transfer + # guard on CuPy and PyTorch and the densification guard on Sparse, whereas + # bool() will not. + conds = [bool(cond) for cond in conds] + + for cond, (_, msg) in zip(conds, args): + if throw and cond: + raise ValueError(msg) + elif warning and cond: + warnings.warn(msg, ClusterWarning, stacklevel=3) + + return not any(conds) def num_obs_linkage(Z): @@ -2304,8 +2398,8 @@ def num_obs_linkage(Z): """ xp = array_namespace(Z) Z = _asarray(Z, order='c', xp=xp) - is_valid_linkage(Z, throw=True, name='Z') - return (Z.shape[0] + 1) + _is_valid_linkage(Z, throw=True, name='Z') + return Z.shape[0] + 1 def correspond(Z, Y): @@ -2357,7 +2451,7 @@ def correspond(Z, Y): True """ - is_valid_linkage(Z, throw=True) + _is_valid_linkage(Z, throw=True) distance.is_valid_y(Y, throw=True) xp = array_namespace(Z, Y) Z = _asarray(Z, order='c', xp=xp) @@ -2640,11 +2734,9 @@ def fclusterdata(X, t, criterion='inconsistent', X = _asarray(X, order='C', dtype=xp.float64, xp=xp) if X.ndim != 2: - raise TypeError('The observation matrix X must be an n by m ' - 'array.') + raise TypeError('The observation matrix X must be an n by m array.') Y = distance.pdist(X, metric=metric) - Y = xp.asarray(Y) Z = linkage(Y, method=method) if R is None: R = inconsistent(Z, d=depth) @@ -4121,11 +4213,16 @@ def leaders(Z, T): >>> M array([1, 2, 3, 4], dtype=int32) + Notes + ----- + *Array API support (experimental):* This function returns arrays + with data-dependent shape. In JAX, at the moment of writing this makes it + impossible to execute it inside `@jax.jit`. """ xp = array_namespace(Z, T) Z = _asarray(Z, order='C', dtype=xp.float64, xp=xp) T = _asarray(T, order='C', xp=xp) - is_valid_linkage(Z, throw=True, name='Z') + _is_valid_linkage(Z, throw=True, name='Z') if T.dtype != xp.int32: raise TypeError('T must be a 1-D array of dtype int32.') @@ -4133,15 +4230,20 @@ def leaders(Z, T): if T.shape[0] != Z.shape[0] + 1: raise ValueError('Mismatch: len(T)!=Z.shape[0] + 1.') - n_clusters = int(xpx.nunique(T)) - n_obs = int(Z.shape[0] + 1) - L = np.zeros(n_clusters, dtype=np.int32) - M = np.zeros(n_clusters, dtype=np.int32) - Z = np.asarray(Z) - T = np.asarray(T, dtype=np.int32) - s = _hierarchy.leaders(Z, T, L, M, n_clusters, n_obs) - if s >= 0: - raise ValueError('T is not a valid assignment vector. Error found ' - f'when examining linkage node {s} (< 2n-1).') - L, M = xp.asarray(L), xp.asarray(M) - return (L, M) + n_obs = Z.shape[0] + 1 + + def leaders_(Z, T, validate): + if validate: + is_valid_linkage(Z, throw=True, name='Z') + n_clusters = int(xpx.nunique(T)) + L = np.zeros(n_clusters, dtype=np.int32) + M = np.zeros(n_clusters, dtype=np.int32) + s = _hierarchy.leaders(Z, T, L, M, n_clusters, n_obs) + if s >= 0: + raise ValueError('T is not a valid assignment vector. Error found ' + f'when examining linkage node {s} (< 2n-1).') + return L, M + + return xpx.lazy_apply(leaders_, Z, T, validate=is_lazy_array(Z), + shape=((None,), (None, )), dtype=(xp.int32, xp.int32), + as_numpy=True) diff --git a/scipy/cluster/tests/test_hierarchy.py b/scipy/cluster/tests/test_hierarchy.py index b4c806b2ac6b..50622ace2f93 100644 --- a/scipy/cluster/tests/test_hierarchy.py +++ b/scipy/cluster/tests/test_hierarchy.py @@ -41,15 +41,16 @@ from scipy.cluster.hierarchy import ( ClusterWarning, linkage, from_mlab_linkage, to_mlab_linkage, num_obs_linkage, inconsistent, cophenet, fclusterdata, fcluster, - is_isomorphic, single, leaders, + is_isomorphic, single, ward, leaders, correspond, is_monotonic, maxdists, maxinconsts, maxRstat, is_valid_linkage, is_valid_im, to_tree, leaves_list, dendrogram, set_link_color_palette, cut_tree, optimal_leaf_ordering, - _order_cluster_tree, _hierarchy, _LINKAGE_METHODS) + _order_cluster_tree, _hierarchy, _EUCLIDEAN_METHODS, _LINKAGE_METHODS) from scipy.spatial.distance import pdist from scipy.cluster._hierarchy import Heap from scipy._lib._array_api import xp_assert_close, xp_assert_equal import scipy._lib.array_api_extra as xpx +from scipy._lib.array_api_extra.testing import lazy_xp_function from threading import Lock @@ -69,24 +70,61 @@ have_matplotlib = False skip_xp_backends = pytest.mark.skip_xp_backends - - +xfail_xp_backends = pytest.mark.xfail_xp_backends +use_linkage = skip_xp_backends(cpu_only=True, exceptions=["jax.numpy"], + reason="linkage() invokes Cython code") + +lazy_xp_function(single) +lazy_xp_function(ward) +lazy_xp_function(linkage, static_argnames=('method', 'metric', 'optimal_ordering')) +lazy_xp_function(cut_tree, static_argnames=('n_clusters', 'height')) +lazy_xp_function(to_tree, jax_jit=False, allow_dask_compute=999, + static_argnames=('rd', )) +lazy_xp_function(optimal_leaf_ordering, static_argnames=('metric',)) +lazy_xp_function(cophenet, jax_jit=False, allow_dask_compute=2) +lazy_xp_function(inconsistent, jax_jit=False, allow_dask_compute=2, + static_argnames=('d',)) +lazy_xp_function(from_mlab_linkage, jax_jit=False, allow_dask_compute=2) +lazy_xp_function(to_mlab_linkage, jax_jit=False, allow_dask_compute=1) +lazy_xp_function(is_monotonic) + +# Note: these functions materialize lazy arrays when warning=True or throw=True +lazy_xp_function(is_valid_im, static_argnames=("warning", "throw", "name")) +lazy_xp_function(is_valid_linkage, static_argnames=("warning", "throw", "name")) + +lazy_xp_function(num_obs_linkage) +lazy_xp_function(correspond) +lazy_xp_function(fcluster, jax_jit=False, allow_dask_compute=999, + static_argnames=('criterion', 'depth')) +lazy_xp_function(fclusterdata, jax_jit=False, allow_dask_compute=999, + static_argnames=('criterion', 'metric', 'depth', 'method')) +lazy_xp_function(leaves_list, jax_jit=False, allow_dask_compute=2) +lazy_xp_function(dendrogram, jax_jit=False, allow_dask_compute=999) +lazy_xp_function(is_isomorphic, jax_jit=False, allow_dask_compute=2) +lazy_xp_function(maxdists, jax_jit=False, allow_dask_compute=999) +lazy_xp_function(maxinconsts, jax_jit=False, allow_dask_compute=999) +lazy_xp_function(maxRstat, jax_jit=False, allow_dask_compute=999, + static_argnames=('i',)) + +# Returns data-dependent shape +lazy_xp_function(leaders, jax_jit=False) + + +@use_linkage class TestLinkage: - @skip_xp_backends(cpu_only=True) + @skip_xp_backends("jax.numpy", reason="Can't raise inside jax.pure_callback") def test_linkage_non_finite_elements_in_distance_matrix(self, xp): # Tests linkage(Y) where Y contains a non-finite element (e.g. NaN or Inf). # Exception expected. y = xp.asarray([xp.nan] + [0.0]*5) assert_raises(ValueError, linkage, y) - @skip_xp_backends(cpu_only=True) def test_linkage_empty_distance_matrix(self, xp): # Tests linkage(Y) where Y is a 0x4 linkage matrix. Exception expected. y = xp.zeros((0,)) assert_raises(ValueError, linkage, y) - @skip_xp_backends(cpu_only=True) def test_linkage_tdist(self, xp): for method in ['single', 'complete', 'average', 'weighted']: self.check_linkage_tdist(method, xp) @@ -97,7 +135,6 @@ def check_linkage_tdist(self, method, xp): expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_' + method) xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-10) - @skip_xp_backends(cpu_only=True) def test_linkage_X(self, xp): for method in ['centroid', 'median', 'ward']: self.check_linkage_q(method, xp) @@ -108,12 +145,11 @@ def check_linkage_q(self, method, xp): expectedZ = getattr(hierarchy_test_data, 'linkage_X_' + method) xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-06) - y = scipy.spatial.distance.pdist(hierarchy_test_data.X, - metric="euclidean") - Z = linkage(xp.asarray(y), method) + X = xp.asarray(hierarchy_test_data.X) + y = pdist(X, metric="euclidean") + Z = linkage(y, method) xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-06) - @skip_xp_backends(cpu_only=True) def test_compare_with_trivial(self, xp): rng = np.random.RandomState(0) n = 20 @@ -125,14 +161,13 @@ def test_compare_with_trivial(self, xp): Z = linkage(xp.asarray(d), method) xp_assert_close(Z, xp.asarray(Z_trivial), rtol=1e-14, atol=1e-15) - @skip_xp_backends(cpu_only=True) def test_optimal_leaf_ordering(self, xp): Z = linkage(xp.asarray(hierarchy_test_data.ytdist), optimal_ordering=True) expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_single_olo') xp_assert_close(Z, xp.asarray(expectedZ), atol=1e-10) -@skip_xp_backends(cpu_only=True) +@use_linkage class TestLinkageTies: _expectations = { @@ -321,9 +356,8 @@ class TestLeaders: def test_leaders_single(self, xp): # Tests leaders using a flat clustering generated by single linkage. - X = hierarchy_test_data.Q_X + X = xp.asarray(hierarchy_test_data.Q_X) Y = pdist(X) - Y = xp.asarray(Y) Z = linkage(Y) T = fcluster(Z, criterion='maxclust', t=3) Lright = (xp.asarray([53, 55, 56]), xp.asarray([2, 3, 1])) @@ -373,8 +407,8 @@ def test_is_isomorphic_4B(self, xp): # (3 flat clusters, different labelings, nonisomorphic) a = xp.asarray([1, 2, 3, 3]) b = xp.asarray([1, 3, 2, 3]) - assert is_isomorphic(a, b) is False - assert is_isomorphic(b, a) is False + assert not is_isomorphic(a, b) + assert not is_isomorphic(b, a) def test_is_isomorphic_4C(self, xp): # Tests is_isomorphic on test case #4C @@ -419,20 +453,16 @@ def help_is_isomorphic_randperm(self, nobs, nclusters, noniso=False, nerrors=0, assert is_isomorphic(b, a) == (not noniso) -@skip_xp_backends(cpu_only=True) class TestIsValidLinkage: - def test_is_valid_linkage_various_size(self, xp): - for nrow, ncol, valid in [(2, 5, False), (2, 3, False), - (1, 4, True), (2, 4, True)]: - self.check_is_valid_linkage_various_size(nrow, ncol, valid, xp) - - def check_is_valid_linkage_various_size(self, nrow, ncol, valid, xp): + @pytest.mark.parametrize("nrow, ncol, valid", [(2, 5, False), (2, 3, False), + (1, 4, True), (2, 4, True)]) + def test_is_valid_linkage_various_size(self, nrow, ncol, valid, xp): # Tests is_valid_linkage(Z) with linkage matrices of various sizes Z = xp.asarray([[0, 1, 3.0, 2, 5], [3, 2, 4.0, 3, 3]], dtype=xp.float64) Z = Z[:nrow, :ncol] - assert_(is_valid_linkage(Z) == valid) + xp_assert_equal(is_valid_linkage(Z), valid, check_namespace=False) if not valid: assert_raises(ValueError, is_valid_linkage, Z, throw=True) @@ -440,15 +470,16 @@ def test_is_valid_linkage_int_type(self, xp): # Tests is_valid_linkage(Z) with integer type. Z = xp.asarray([[0, 1, 3.0, 2], [3, 2, 4.0, 3]], dtype=xp.int64) - assert_(is_valid_linkage(Z) is False) + xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False) assert_raises(TypeError, is_valid_linkage, Z, throw=True) def test_is_valid_linkage_empty(self, xp): # Tests is_valid_linkage(Z) with empty linkage. Z = xp.zeros((0, 4), dtype=xp.float64) - assert_(is_valid_linkage(Z) is False) + xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False) assert_raises(ValueError, is_valid_linkage, Z, throw=True) + @use_linkage def test_is_valid_linkage_4_and_up(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3). @@ -456,8 +487,9 @@ def test_is_valid_linkage_4_and_up(self, xp): y = np.random.rand(i*(i-1)//2) y = xp.asarray(y) Z = linkage(y) - assert_(is_valid_linkage(Z) is True) + xp_assert_equal(is_valid_linkage(Z), True, check_namespace=False) + @use_linkage def test_is_valid_linkage_4_and_up_neg_index_left(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative indices (left). @@ -466,9 +498,13 @@ def test_is_valid_linkage_4_and_up_neg_index_left(self, xp): y = xp.asarray(y) Z = linkage(y) Z = xpx.at(Z)[i//2, 0].set(-2) - assert_(is_valid_linkage(Z) is False) - assert_raises(ValueError, is_valid_linkage, Z, throw=True) + xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False) + # Use fully-qualified function name to bypass lazy_xp_function(), + # because `is_valid_*` materializes. + with pytest.raises(ValueError): + scipy.cluster.hierarchy.is_valid_linkage(Z, throw=True) + @use_linkage def test_is_valid_linkage_4_and_up_neg_index_right(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative indices (right). @@ -477,9 +513,12 @@ def test_is_valid_linkage_4_and_up_neg_index_right(self, xp): y = xp.asarray(y) Z = linkage(y) Z = xpx.at(Z)[i//2, 1].set(-2) - assert_(is_valid_linkage(Z) is False) - assert_raises(ValueError, is_valid_linkage, Z, throw=True) + xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False) + with pytest.raises(ValueError): + scipy.cluster.hierarchy.is_valid_linkage(Z, throw=True) + + @use_linkage def test_is_valid_linkage_4_and_up_neg_dist(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative distances. @@ -488,9 +527,11 @@ def test_is_valid_linkage_4_and_up_neg_dist(self, xp): y = xp.asarray(y) Z = linkage(y) Z = xpx.at(Z)[i//2, 2].set(-0.5) - assert_(is_valid_linkage(Z) is False) - assert_raises(ValueError, is_valid_linkage, Z, throw=True) + xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False) + with pytest.raises(ValueError): + scipy.cluster.hierarchy.is_valid_linkage(Z, throw=True) + @use_linkage def test_is_valid_linkage_4_and_up_neg_counts(self, xp): # Tests is_valid_linkage(Z) on linkage on observation sets between # sizes 4 and 15 (step size 3) with negative counts. @@ -499,40 +540,38 @@ def test_is_valid_linkage_4_and_up_neg_counts(self, xp): y = xp.asarray(y) Z = linkage(y) Z = xpx.at(Z)[i//2, 3].set(-2) - assert_(is_valid_linkage(Z) is False) - assert_raises(ValueError, is_valid_linkage, Z, throw=True) + xp_assert_equal(is_valid_linkage(Z), False, check_namespace=False) + with pytest.raises(ValueError): + scipy.cluster.hierarchy.is_valid_linkage(Z, throw=True) -@skip_xp_backends(cpu_only=True) class TestIsValidInconsistent: def test_is_valid_im_int_type(self, xp): # Tests is_valid_im(R) with integer type. R = xp.asarray([[0, 1, 3.0, 2], [3, 2, 4.0, 3]], dtype=xp.int64) - assert_(is_valid_im(R) is False) + xp_assert_equal(is_valid_im(R), False, check_namespace=False) assert_raises(TypeError, is_valid_im, R, throw=True) - def test_is_valid_im_various_size(self, xp): - for nrow, ncol, valid in [(2, 5, False), (2, 3, False), - (1, 4, True), (2, 4, True)]: - self.check_is_valid_im_various_size(nrow, ncol, valid, xp) - - def check_is_valid_im_various_size(self, nrow, ncol, valid, xp): + @pytest.mark.parametrize("nrow, ncol, valid", [(2, 5, False), (2, 3, False), + (1, 4, True), (2, 4, True)]) + def test_is_valid_im_various_size(self, nrow, ncol, valid, xp): # Tests is_valid_im(R) with linkage matrices of various sizes R = xp.asarray([[0, 1, 3.0, 2, 5], [3, 2, 4.0, 3, 3]], dtype=xp.float64) R = R[:nrow, :ncol] - assert_(is_valid_im(R) == valid) + xp_assert_equal(is_valid_im(R), valid, check_namespace=False) if not valid: assert_raises(ValueError, is_valid_im, R, throw=True) def test_is_valid_im_empty(self, xp): # Tests is_valid_im(R) with empty inconsistency matrix. R = xp.zeros((0, 4), dtype=xp.float64) - assert_(is_valid_im(R) is False) + xp_assert_equal(is_valid_im(R), False, check_namespace=False) assert_raises(ValueError, is_valid_im, R, throw=True) + @use_linkage def test_is_valid_im_4_and_up(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3). @@ -541,8 +580,9 @@ def test_is_valid_im_4_and_up(self, xp): y = xp.asarray(y) Z = linkage(y) R = inconsistent(Z) - assert_(is_valid_im(R) is True) + xp_assert_equal(is_valid_im(R), True, check_namespace=False) + @use_linkage def test_is_valid_im_4_and_up_neg_index_left(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3) with negative link height means. @@ -552,9 +592,13 @@ def test_is_valid_im_4_and_up_neg_index_left(self, xp): Z = linkage(y) R = inconsistent(Z) R = xpx.at(R)[i//2 , 0].set(-2.0) - assert_(is_valid_im(R) is False) - assert_raises(ValueError, is_valid_im, R, throw=True) + xp_assert_equal(is_valid_im(R), False, check_namespace=False) + # Use fully-qualified function name to bypass lazy_xp_function(), + # because `is_valid_*`materializes. + with pytest.raises(ValueError): + scipy.cluster.hierarchy.is_valid_im(R, throw=True) + @use_linkage def test_is_valid_im_4_and_up_neg_index_right(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3) with negative link height standard deviations. @@ -564,9 +608,11 @@ def test_is_valid_im_4_and_up_neg_index_right(self, xp): Z = linkage(y) R = inconsistent(Z) R = xpx.at(R)[i//2 , 1].set(-2.0) - assert_(is_valid_im(R) is False) - assert_raises(ValueError, is_valid_im, R, throw=True) + xp_assert_equal(is_valid_im(R), False, check_namespace=False) + with pytest.raises(ValueError): + scipy.cluster.hierarchy.is_valid_im(R, throw=True) + @use_linkage def test_is_valid_im_4_and_up_neg_dist(self, xp): # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15 # (step size 3) with negative link counts. @@ -576,13 +622,13 @@ def test_is_valid_im_4_and_up_neg_dist(self, xp): Z = linkage(y) R = inconsistent(Z) R = xpx.at(R)[i//2, 2].set(-0.5) - assert_(is_valid_im(R) is False) - assert_raises(ValueError, is_valid_im, R, throw=True) + xp_assert_equal(is_valid_im(R), False, check_namespace=False) + with pytest.raises(ValueError): + scipy.cluster.hierarchy.is_valid_im(R, throw=True) class TestNumObsLinkage: - @skip_xp_backends(cpu_only=True) def test_num_obs_linkage_empty(self, xp): # Tests num_obs_linkage(Z) with empty linkage. Z = xp.zeros((0, 4), dtype=xp.float64) @@ -591,15 +637,15 @@ def test_num_obs_linkage_empty(self, xp): def test_num_obs_linkage_1x4(self, xp): # Tests num_obs_linkage(Z) on linkage over 2 observations. Z = xp.asarray([[0, 1, 3.0, 2]], dtype=xp.float64) - assert_equal(num_obs_linkage(Z), 2) + assert num_obs_linkage(Z) == 2 def test_num_obs_linkage_2x4(self, xp): # Tests num_obs_linkage(Z) on linkage over 3 observations. Z = xp.asarray([[0, 1, 3.0, 2], [3, 2, 4.0, 3]], dtype=xp.float64) - assert_equal(num_obs_linkage(Z), 3) + assert num_obs_linkage(Z) == 3 - @skip_xp_backends(cpu_only=True) + @use_linkage def test_num_obs_linkage_4_and_up(self, xp): # Tests num_obs_linkage(Z) on linkage on observation sets between sizes # 4 and 15 (step size 3). @@ -607,7 +653,16 @@ def test_num_obs_linkage_4_and_up(self, xp): y = np.random.rand(i*(i-1)//2) y = xp.asarray(y) Z = linkage(y) - assert_equal(num_obs_linkage(Z), i) + assert num_obs_linkage(Z) == i + + @use_linkage + def test_num_obs_linkage_multi_matrix(self, xp): + # Tests num_obs_linkage with observation matrices of multiple sizes. + for n in range(2, 10): + X = xp.asarray(np.random.rand(n, 4)) + Y = pdist(X) + Z = linkage(Y) + assert num_obs_linkage(Z) == n @skip_xp_backends(cpu_only=True) @@ -648,7 +703,6 @@ def test_Q_subtree_pre_order(self, xp): rtol=1e-15) -@skip_xp_backends(cpu_only=True) class TestCorrespond: def test_correspond_empty(self, xp): @@ -657,6 +711,7 @@ def test_correspond_empty(self, xp): Z = xp.zeros((0,4), dtype=xp.float64) assert_raises(ValueError, correspond, Z, y) + @use_linkage def test_correspond_2_and_up(self, xp): # Tests correspond(Z, y) on linkage and CDMs over observation sets of # different sizes. @@ -671,6 +726,7 @@ def test_correspond_2_and_up(self, xp): Z = linkage(y) assert_(correspond(Z, y)) + @use_linkage def test_correspond_4_and_up(self, xp): # Tests correspond(Z, y) on linkage and CDMs over observation sets of # different sizes. Correspondence should be false. @@ -685,6 +741,7 @@ def test_correspond_4_and_up(self, xp): assert not correspond(Z, y2) assert not correspond(Z2, y) + @use_linkage def test_correspond_4_and_up_2(self, xp): # Tests correspond(Z, y) on linkage and CDMs over observation sets of # different sizes. Correspondence should be false. @@ -699,17 +756,7 @@ def test_correspond_4_and_up_2(self, xp): assert not correspond(Z, y2) assert not correspond(Z2, y) - def test_num_obs_linkage_multi_matrix(self, xp): - # Tests num_obs_linkage with observation matrices of multiple sizes. - for n in range(2, 10): - X = np.random.rand(n, 4) - Y = pdist(X) - Y = xp.asarray(Y) - Z = linkage(Y) - assert_equal(num_obs_linkage(Z), n) - -@skip_xp_backends(cpu_only=True) class TestIsMonotonic: def test_is_monotonic_empty(self, xp): @@ -762,12 +809,14 @@ def test_is_monotonic_3x4_F3(self, xp): [4, 5, 0.2, 4]], dtype=xp.float64) assert not is_monotonic(Z) + @use_linkage def test_is_monotonic_tdist_linkage1(self, xp): # Tests is_monotonic(Z) on clustering generated by single linkage on # tdist data set. Expecting True. Z = linkage(xp.asarray(hierarchy_test_data.ytdist), 'single') assert is_monotonic(Z) + @use_linkage def test_is_monotonic_tdist_linkage2(self, xp): # Tests is_monotonic(Z) on clustering generated by single linkage on # tdist data set. Perturbing. Expecting False. @@ -775,6 +824,7 @@ def test_is_monotonic_tdist_linkage2(self, xp): Z = xpx.at(Z)[2, 2].set(0.0) assert not is_monotonic(Z) + @use_linkage def test_is_monotonic_Q_linkage(self, xp): # Tests is_monotonic(Z) on clustering generated by single linkage on # Q data set. Expecting True. @@ -1166,18 +1216,19 @@ def calculate_maximum_inconsistencies(Z, R, k=3, xp=np): @pytest.mark.thread_unsafe -@skip_xp_backends(cpu_only=True) +@use_linkage +@skip_xp_backends(eager_only=True) def test_unsupported_uncondensed_distance_matrix_linkage_warning(xp): assert_warns(ClusterWarning, linkage, xp.asarray([[0, 1], [1, 0]])) def test_euclidean_linkage_value_error(xp): - for method in scipy.cluster.hierarchy._EUCLIDEAN_METHODS: + for method in _EUCLIDEAN_METHODS: assert_raises(ValueError, linkage, xp.asarray([[1, 1], [1, 1]]), method=method, metric='cityblock') -@skip_xp_backends(cpu_only=True) +@use_linkage def test_2x2_linkage(xp): Z1 = linkage(xp.asarray([1]), method='single', metric='euclidean') Z2 = linkage(xp.asarray([[0, 1], [0, 0]]), method='single', metric='euclidean') @@ -1190,7 +1241,7 @@ def test_node_compare(xp): nobs = 50 X = np.random.randn(nobs, 4) X = xp.asarray(X) - Z = scipy.cluster.hierarchy.ward(X) + Z = ward(X) tree = to_tree(Z) assert_(tree > tree.get_left()) assert_(tree.get_right() > tree.get_left()) @@ -1204,7 +1255,7 @@ def test_cut_tree(xp): nobs = 50 X = np.random.randn(nobs, 4) X = xp.asarray(X) - Z = scipy.cluster.hierarchy.ward(X) + Z = ward(X) cutree = cut_tree(Z) # cutree.dtype varies between int32 and int64 over platforms @@ -1275,7 +1326,8 @@ def test_Heap(xp): assert_equal(pair['value'], 10) -@skip_xp_backends(cpu_only=True) +@use_linkage +@skip_xp_backends("jax.numpy", reason="Can't raise inside jax.pure_callback") def test_centroid_neg_distance(xp): # gh-21011 values = xp.asarray([0, 0, -1]) diff --git a/scipy/conftest.py b/scipy/conftest.py index 247a69d8a716..b3fc5a96d8a5 100644 --- a/scipy/conftest.py +++ b/scipy/conftest.py @@ -13,7 +13,8 @@ from scipy._lib._fpumode import get_fpu_mode from scipy._lib._array_api import ( - SCIPY_ARRAY_API, SCIPY_DEVICE, array_namespace, default_xp + SCIPY_ARRAY_API, SCIPY_DEVICE, array_namespace, default_xp, + is_cupy, is_dask, is_jax, ) from scipy._lib._testutils import FPUModeChangeWarning from scipy._lib.array_api_extra.testing import patch_lazy_xp_functions @@ -154,7 +155,6 @@ def num_parallel_threads(): try: import torch # type: ignore[import-not-found] xp_available_backends.update({'torch': torch}) - # can use `mps` or `cpu` torch.set_default_device(SCIPY_DEVICE) if SCIPY_DEVICE != "cpu": xp_skip_cpu_only_backends.add('torch') @@ -409,6 +409,41 @@ def skip_or_xfail_xp_backends(request: pytest.FixtureRequest, skip_or_xfail(reason=reason) +@pytest.fixture +def devices(xp): + """Fixture that returns a list of all devices for the backend, plus None. + Used to test input->output device propagation. + + Usage + ----- + from scipy._lib._array_api import xp_device + + def test_device(xp, devices): + for d in devices: + x = xp.asarray(..., device=d) + y = f(x) + assert xp_device(y) == xp_device(x) + """ + if is_cupy(xp): + # CuPy does not support devices other than the current one + # data-apis/array-api-compat#293 + pytest.xfail(reason="data-apis/array-api-compat#293") + if is_dask(xp): + # Skip dummy DASK_DEVICE from array-api-compat, which does not propagate + return ["cpu", None] + if is_jax(xp): + # The .device attribute is not accessible inside jax.jit; the consequence + # (downstream of array-api-compat hacks) is that a non-default device in + # input is not guaranteed to propagate to the output even if the scipy code + # states `device=xp_device(arg)`` in all array creation functions. + # While this issue is specific to jax.jit, it would be unnecessarily + # verbose to skip the test for each jit-capable function and run it for + # those that only support eager mode. + pytest.xfail(reason="jax-ml/jax#26000") + + return xp.__array_namespace_info__().devices() + [None] + + # Following the approach of NumPy's conftest.py... # Use a known and persistent tmpdir for hypothesis' caches, which # can be automatically cleared by the OS or user. diff --git a/scipy/differentiate/_differentiate.py b/scipy/differentiate/_differentiate.py index 010ab1a0f172..8dbf3f755541 100644 --- a/scipy/differentiate/_differentiate.py +++ b/scipy/differentiate/_differentiate.py @@ -3,7 +3,7 @@ import numpy as np import scipy._lib._elementwise_iterative_method as eim from scipy._lib._util import _RichResult -from scipy._lib._array_api import array_namespace, xp_copy +from scipy._lib._array_api import array_namespace, xp_copy, xp_promote _EERRORINCREASE = -1 # used in derivative @@ -906,9 +906,7 @@ def f(x): """ xp = array_namespace(x) - x = xp.asarray(x) - int_dtype = xp.isdtype(x.dtype, 'integral') - x0 = xp.asarray(x, dtype=xp.asarray(1.0).dtype) if int_dtype else x + x0 = xp_promote(x, force_floating=True, xp=xp) if x0.ndim < 1: message = "Argument `x` must be at least 1-D." @@ -1097,9 +1095,9 @@ def hessian(f, x, *, tolerances=None, maxiter=10, rtol = tolerances.get('rtol', None) xp = array_namespace(x) - x = xp.asarray(x) - dtype = x.dtype if not xp.isdtype(x.dtype, 'integral') else xp.asarray(1.).dtype - finfo = xp.finfo(dtype) + x0 = xp_promote(x, force_floating=True, xp=xp) + + finfo = xp.finfo(x0.dtype) rtol = finfo.eps**0.5 if rtol is None else rtol # keep same as `derivative` # tighten the inner tolerance to make the inner error negligible diff --git a/scipy/fft/_basic.py b/scipy/fft/_basic.py index a3fc021c9ef9..8c07ee697b3c 100644 --- a/scipy/fft/_basic.py +++ b/scipy/fft/_basic.py @@ -1166,6 +1166,17 @@ def rfft2(x, s=None, axes=(-2, -1), norm=None, overwrite_x=False, workers=None, This is really just `rfftn` with different default behavior. For more details see `rfftn`. + Examples + -------- + >>> import scipy.fft + >>> import numpy as np + >>> x = np.broadcast_to([1, 0, -1, 0], (4, 4)) + >>> scipy.fft.rfft2(x) + array([[0.+0.j, 8.+0.j, 0.+0.j], + [0.+0.j, 0.+0.j, 0.+0.j], + [0.+0.j, 0.+0.j, 0.+0.j], + [0.+0.j, 0.+0.j, 0.+0.j]]) + """ return (Dispatchable(x, np.ndarray),) @@ -1484,6 +1495,15 @@ def hfft2(x, s=None, axes=(-2, -1), norm=None, overwrite_x=False, workers=None, This is really just `hfftn` with different default behavior. For more details see `hfftn`. + Examples + -------- + >>> import scipy.fft + >>> import numpy as np + >>> x = np.array([[1+0j, 2+0j], [2+0j, 1+0j]]) # Hermitian-symmetric input + >>> scipy.fft.hfft2(x, s=(2, 2)) + array([[ 6., 0.], + [ 0., -2.]]) + """ return (Dispatchable(x, np.ndarray),) diff --git a/scipy/fft/_realtransforms.py b/scipy/fft/_realtransforms.py index 1c7a3d683dd7..b7324f5d9a81 100644 --- a/scipy/fft/_realtransforms.py +++ b/scipy/fft/_realtransforms.py @@ -621,6 +621,19 @@ def dst(x, type=2, n=None, axis=-1, norm=None, overwrite_x=False, workers=None, The (unnormalized) DST-IV is its own inverse, up to a factor :math:`2N`. The orthonormalized DST-IV is exactly its own inverse. + Examples + -------- + Compute the DST of a simple 1D array: + + >>> import numpy as np + >>> from scipy.fft import dst + >>> x = np.array([1, -1, 1, -1]) + >>> dst(x, type=2) + array([0., 0., 0., 8.]) + + This computes the Discrete Sine Transform (DST) of type-II for the input array. + The output contains the transformed values corresponding to the given input sequence + References ---------- .. [1] Wikipedia, "Discrete sine transform", diff --git a/scipy/fft/tests/test_helper.py b/scipy/fft/tests/test_helper.py index 6a5fa58492fa..5492ce844d5d 100644 --- a/scipy/fft/tests/test_helper.py +++ b/scipy/fft/tests/test_helper.py @@ -10,9 +10,7 @@ import pytest import numpy as np import sys -from scipy._lib._array_api import ( - xp_assert_close, get_xp_devices, xp_device -) +from scipy._lib._array_api import xp_assert_close, xp_device from scipy import fft skip_xp_backends = pytest.mark.skip_xp_backends @@ -507,12 +505,7 @@ def test_uneven_dims(self, xp): xp_assert_close(fft.ifftshift(shift_dim_both), freqs) -@skip_xp_backends("cupy", - reason="CuPy has not implemented the `device` param") -@skip_xp_backends("jax.numpy", - reason="JAX has not implemented the `device` param") class TestFFTFreq: - def test_definition(self, xp): x = xp.asarray([0, 1, 2, 3, 4, -4, -3, -2, -1], dtype=xp.float64) x2 = xp.asarray([0, 1, 2, 3, 4, -5, -4, -3, -2, -1], dtype=xp.float64) @@ -531,18 +524,13 @@ def test_definition(self, xp): y = 10 * xp.pi * fft.fftfreq(10, xp.pi, xp=xp) xp_assert_close(y, x2, check_dtype=False) - def test_device(self, xp): - devices = get_xp_devices(xp) + def test_device(self, xp, devices): for d in devices: y = fft.fftfreq(9, xp=xp, device=d) x = xp.empty(0, device=d) assert xp_device(y) == xp_device(x) -@skip_xp_backends("cupy", - reason="CuPy has not implemented the `device` param") -@skip_xp_backends("jax.numpy", - reason="JAX has not implemented the `device` param") class TestRFFTFreq: def test_definition(self, xp): @@ -563,8 +551,7 @@ def test_definition(self, xp): y = 10 * xp.pi * fft.rfftfreq(10, xp.pi, xp=xp) xp_assert_close(y, x2, check_dtype=False) - def test_device(self, xp): - devices = get_xp_devices(xp) + def test_device(self, xp, devices): for d in devices: y = fft.rfftfreq(9, xp=xp, device=d) x = xp.empty(0, device=d) diff --git a/scipy/integrate/_cubature.py b/scipy/integrate/_cubature.py index 3e6d8911d13e..ad806d6c2373 100644 --- a/scipy/integrate/_cubature.py +++ b/scipy/integrate/_cubature.py @@ -10,7 +10,7 @@ array_namespace, xp_size, xp_copy, - xp_broadcast_promote + xp_promote ) from scipy._lib._util import MapWrapper @@ -323,7 +323,8 @@ def cubature(f, a, b, *, rule="gk21", rtol=1e-8, atol=0, max_subdivisions=10000, # Convert a and b to arrays and convert each point in points to an array, promoting # each to a common floating dtype. - a, b, *points = xp_broadcast_promote(a, b, *points, force_floating=True) + a, b, *points = xp_promote(a, b, *points, broadcast=True, force_floating=True, + xp=xp) result_dtype = a.dtype if xp_size(a) == 0 or xp_size(b) == 0: diff --git a/scipy/integrate/_quadrature.py b/scipy/integrate/_quadrature.py index 4f4d508216e3..86d70feed7a4 100644 --- a/scipy/integrate/_quadrature.py +++ b/scipy/integrate/_quadrature.py @@ -8,7 +8,7 @@ from scipy.special import roots_legendre from scipy.special import gammaln, logsumexp from scipy._lib._util import _rng_spawn -from scipy._lib._array_api import _asarray, array_namespace, xp_broadcast_promote +from scipy._lib._array_api import _asarray, array_namespace, xp_result_type __all__ = ['fixed_quad', 'romb', @@ -124,7 +124,7 @@ def trapezoid(y, x=None, dx=1.0, axis=-1): # Cannot just use the broadcasted arrays that are returned # because trapezoid does not follow normal broadcasting rules # cf. https://github.com/scipy/scipy/pull/21524#issuecomment-2354105942 - result_dtype = xp_broadcast_promote(y, force_floating=True, xp=xp)[0].dtype + result_dtype = xp_result_type(y, force_floating=True, xp=xp) nd = y.ndim slice1 = [slice(None)]*nd slice2 = [slice(None)]*nd diff --git a/scipy/integrate/_tanhsinh.py b/scipy/integrate/_tanhsinh.py index b577511a24f6..f973bcd2c1a7 100644 --- a/scipy/integrate/_tanhsinh.py +++ b/scipy/integrate/_tanhsinh.py @@ -4,7 +4,8 @@ from scipy import special import scipy._lib._elementwise_iterative_method as eim from scipy._lib._util import _RichResult -from scipy._lib._array_api import array_namespace, xp_copy, xp_ravel +from scipy._lib._array_api import (array_namespace, xp_copy, xp_ravel, + xp_promote) __all__ = ['nsum'] @@ -97,8 +98,9 @@ def tanhsinh(f, a, b, *, args=(), log=False, maxlevel=None, minlevel=2, Absolute termination tolerance (default: 0) and relative termination tolerance (default: ``eps**0.75``, where ``eps`` is the precision of the result dtype), respectively. Iteration will stop when - ``res.error < atol + rtol * abs(res.df)``. The error estimate is as - described in [1]_ Section 5. While not theoretically rigorous or + ``res.error < atol`` or ``res.error < res.integral * rtol``. The error + estimate is as described in [1]_ Section 5 but with a lower bound of + ``eps * res.integral``. While not theoretically rigorous or conservative, it is said to work well in practice. Must be non-negative and finite if `log` is False, and must be expressed as the log of a non-negative and finite number if `log` is True. @@ -443,9 +445,9 @@ def check_termination(work): stop[i] = True else: # Terminate if convergence criterion is met - work.rerr, work.aerr = _estimate_error(work, xp) - i = ((work.rerr < rtol) | (work.rerr + xp.real(work.Sn) < atol) if log - else (work.rerr < rtol) | (work.rerr * xp.abs(work.Sn) < atol)) + rerr, aerr = _estimate_error(work, xp) + i = (rerr < rtol) | (aerr < atol) + work.aerr = xp.reshape(xp.astype(aerr, work.dtype), work.Sn.shape) work.status[i] = eim._ECONVERGED stop[i] = True @@ -767,22 +769,23 @@ def _estimate_error(work, xp): d2 = xp.real(special.logsumexp(xp.stack([work.Sn, Snm2 + work.pi*1j]), axis=0)) d3 = log_e1 + xp.max(xp.real(work.fjwj), axis=-1) d4 = work.d4 - ds = xp.stack([d1 ** 2 / d2, 2 * d1, d3, d4]) + d5 = log_e1 + xp.real(work.Sn) + temp = xp.where(d1 > -xp.inf, d1 ** 2 / d2, -xp.inf) + ds = xp.stack([temp, 2 * d1, d3, d4, d5]) aerr = xp.max(ds, axis=0) - rerr = xp.maximum(log_e1, aerr - xp.real(work.Sn)) + rerr = aerr - xp.real(work.Sn) else: # Note: explicit computation of log10 of each of these is unnecessary. d1 = xp.abs(work.Sn - Snm1) d2 = xp.abs(work.Sn - Snm2) d3 = e1 * xp.max(xp.abs(work.fjwj), axis=-1) d4 = work.d4 - # If `d1` is 0, no need to warn. This does the right thing. - # with np.errstate(divide='ignore'): - ds = xp.stack([d1**(xp.log(d1)/xp.log(d2)), d1**2, d3, d4]) + d5 = e1 * xp.abs(work.Sn) + temp = xp.where(d1 > 0, d1**(xp.log(d1)/xp.log(d2)), 0) + ds = xp.stack([temp, d1**2, d3, d4, d5]) aerr = xp.max(ds, axis=0) - rerr = xp.maximum(e1, aerr/xp.abs(work.Sn)) + rerr = aerr/xp.abs(work.Sn) - aerr = xp.reshape(xp.astype(aerr, work.dtype), work.Sn.shape) return rerr, aerr @@ -818,14 +821,13 @@ def _tanhsinh_iv(f, a, b, log, maxfun, maxlevel, minlevel, # Input validation and standardization xp = array_namespace(a, b) + a, b = xp_promote(a, b, broadcast=True, force_floating=True, xp=xp) message = '`f` must be callable.' if not callable(f): raise ValueError(message) message = 'All elements of `a` and `b` must be real numbers.' - a, b = xp.asarray(a), xp.asarray(b) - a, b = xp.broadcast_arrays(a, b) if (xp.isdtype(a.dtype, 'complex floating') or xp.isdtype(b.dtype, 'complex floating')): raise ValueError(message) @@ -894,16 +896,15 @@ def _tanhsinh_iv(f, a, b, log, maxfun, maxlevel, minlevel, def _nsum_iv(f, a, b, step, args, log, maxterms, tolerances): # Input validation and standardization - xp = array_namespace(a, b) + xp = array_namespace(a, b, step) + a, b, step = xp_promote(a, b, step, broadcast=True, force_floating=True, xp=xp) message = '`f` must be callable.' if not callable(f): raise ValueError(message) message = 'All elements of `a`, `b`, and `step` must be real numbers.' - a, b, step = xp.broadcast_arrays(xp.asarray(a), xp.asarray(b), xp.asarray(step)) - dtype = xp.result_type(a.dtype, b.dtype, step.dtype) - if not xp.isdtype(dtype, 'numeric') or xp.isdtype(dtype, 'complex floating'): + if not xp.isdtype(a.dtype, ('integral', 'real floating')): raise ValueError(message) valid_b = b >= a # NaNs will be False diff --git a/scipy/integrate/lsoda.pyf b/scipy/integrate/lsoda.pyf index d09e32a59184..20cc688320f5 100644 --- a/scipy/integrate/lsoda.pyf +++ b/scipy/integrate/lsoda.pyf @@ -23,11 +23,20 @@ python module lsoda__user__routines end python module lsoda__user__routines python module _lsoda + usercode ''' + +#ifdef HAVE_BLAS_ILP64 +typedef npy_int64 F_INT; +#else +typedef int F_INT; +#endif +''' + interface subroutine lsoda(f,neq,y,t,tout,itol,rtol,atol,itask,istate,iopt,rwork,lrw,iwork,liw,jac,jt) ! y1,t,istate = lsoda(f,jac,y0,t0,t1,rtol,atol,itask,istate,rwork,iwork,mf) callstatement (*f2py_func)(cb_f_in_lsoda__user__routines,&neq,y,&t,&tout,&itol,rtol,atol,&itask,&istate,&iopt,rwork,&lrw,iwork,&liw,cb_jac_in_lsoda__user__routines,&jt) - callprotoargument void*,int*,double*,double*,double*,int*,double*,double*,int*,int*,int*,double*,int*,int*,int*,void*,int* + callprotoargument void*,F_INT*,double*,double*,double*,F_INT*,double*,double*,F_INT*,F_INT*,F_INT*,double*,F_INT*,F_INT*,F_INT*,void*,F_INT* use lsoda__user__routines external f external jac diff --git a/scipy/integrate/tests/test_tanhsinh.py b/scipy/integrate/tests/test_tanhsinh.py index 9eeae2519619..aa847a815dd4 100644 --- a/scipy/integrate/tests/test_tanhsinh.py +++ b/scipy/integrate/tests/test_tanhsinh.py @@ -745,6 +745,16 @@ def test_compress_nodes_weights_gh21496(self, xp): x[-1] = 1000 _tanhsinh(np.sin, 1, x) + def test_gh_22681_finite_error(self, xp): + # gh-22681 noted a case in which the error was NaN on some platforms; + # check that this does in fact fail in CI. + a = complex(12, -10) + b = complex(12, 39) + def f(t): + return xp.sin(a * (1 - t) + b * t) + res = _tanhsinh(f, xp.asarray(0.), xp.asarray(1.), atol=0, rtol=0, maxlevel=10) + assert xp.isfinite(res.error) + @pytest.mark.skip_xp_backends('torch', reason='data-apis/array-api-compat#271') @pytest.mark.skip_xp_backends('array_api_strict', reason='No fancy indexing.') diff --git a/scipy/integrate/vode.pyf b/scipy/integrate/vode.pyf index 90774653486f..94181009d268 100644 --- a/scipy/integrate/vode.pyf +++ b/scipy/integrate/vode.pyf @@ -50,11 +50,18 @@ python module zvode__user__routines end python module zvode__user__routines python module _vode + usercode ''' +#ifdef HAVE_BLAS_ILP64 +typedef npy_int64 F_INT; +#else +typedef int F_INT; +#endif +''' interface subroutine dvode(f,jac,neq,y,t,tout,itol,rtol,atol,itask,istate,iopt,rwork,lrw,iwork,liw,mf,rpar,ipar) ! y1,t,istate = dvode(f,jac,y0,t0,t1,rtol,atol,itask,istate,rwork,iwork,mf) callstatement (*f2py_func)(cb_f_in_dvode__user__routines,&neq,y,&t,&tout,&itol,rtol,atol,&itask,&istate,&iopt,rwork,&lrw,iwork,&liw,cb_jac_in_dvode__user__routines,&mf,&rpar,&ipar) - callprotoargument void*,int*,double*,double*,double*,int*,double*,double*,int*,int*,int*,double*,int*,int*,int*,void*,int*,double*,int* + callprotoargument void*,F_INT*,double*,double*,double*,F_INT*,double*,double*,F_INT*,F_INT*,F_INT*,double*,F_INT*,F_INT*,F_INT*,void*,F_INT*,double*,F_INT* use dvode__user__routines external f external jac @@ -85,7 +92,7 @@ python module _vode subroutine zvode(f,jac,neq,y,t,tout,itol,rtol,atol,itask,istate,iopt,zwork,lzw,rwork,lrw,iwork,liw,mf,rpar,ipar) ! y1,t,istate = zvode(f,jac,y0,t0,t1,rtol,atol,itask,istate,rwork,iwork,mf) callstatement (*f2py_func)(cb_f_in_zvode__user__routines,&neq,y,&t,&tout,&itol,rtol,atol,&itask,&istate,&iopt,zwork,&lzw,rwork,&lrw,iwork,&liw,cb_jac_in_zvode__user__routines,&mf,&rpar,&ipar) - callprotoargument void*,int*,complex_double*,double*,double*,int*,double*,double*,int*,int*,int*,complex_double*,int*,double*,int*,int*,int*,void*,int*,double*,int* + callprotoargument void*,F_INT*,complex_double*,double*,double*,F_INT*,double*,double*,F_INT*,F_INT*,F_INT*,complex_double*,F_INT*,double*,F_INT*,F_INT*,F_INT*,void*,F_INT*,double*,F_INT* use zvode__user__routines external f external jac diff --git a/scipy/interpolate/_bspl.pyx b/scipy/interpolate/_bspl.pyx deleted file mode 100644 index a47590b8541a..000000000000 --- a/scipy/interpolate/_bspl.pyx +++ /dev/null @@ -1,376 +0,0 @@ -""" -Routines for evaluating and manipulating B-splines. - -""" - -import numpy as np -cimport numpy as cnp - -from numpy cimport npy_intp, npy_int64, npy_int32 - -cimport cython -from libc.math cimport NAN - -cnp.import_array() - -cdef extern from "src/__fitpack.h" namespace "fitpack": - void _deBoor_D(const double *t, double x, int k, int ell, int m, double *result - ) noexcept nogil - npy_int64 _find_interval(const double* tptr, npy_int64 len_t, - int k, - double xval, - npy_int64 prev_l, - int extrapolate - ) noexcept nogil - - -#------------------------------------------------------------------------------ -# B-splines -#------------------------------------------------------------------------------ - -@cython.wraparound(False) -@cython.boundscheck(False) -@cython.nonecheck(False) -def evaluate_ndbspline(const double[:, ::1] xi, - const double[:, ::1] t, - const npy_int32[::1] len_t, - const npy_int32[::1] k, - int[::1] nu, - bint extrapolate, - const double[::1] c1r, - npy_intp num_c_tr, - const npy_intp[::1] strides_c1, - const npy_intp[:, ::] indices_k1d, - double[:, ::1] out, - ): - """Evaluate an N-dim tensor product spline or its derivative. - - Parameters - ---------- - xi : ndarray, shape(npoints, ndim) - ``npoints`` values to evaluate the spline at, each value is - a point in an ``ndim``-dimensional space. - t : ndarray, shape(ndim, max_len_t) - Array of knots for each dimension. - This array packs the tuple of knot arrays per dimension into a single - 2D array. The array is ragged (knot lengths may differ), hence - the real knots in dimension ``d`` are ``t[d, :len_t[d]]``. - len_t : ndarray, 1D, shape (ndim,) - Lengths of the knot arrays, per dimension. - k : tuple of ints, len(ndim) - Spline degrees in each dimension. - nu : ndarray of ints, shape(ndim,) - Orders of derivatives to compute, per dimension. - extrapolate : int - Whether to extrapolate out of bounds or return nans. - c1r: ndarray, one-dimensional - Flattened array of coefficients. - The original N-dimensional coefficient array ``c`` has shape - ``(n1, ..., nd, ...)`` where each ``ni == len(t[d]) - k[d] - 1``, - and the second "..." represents trailing dimensions of ``c``. - In code, given the C-ordered array ``c``, ``c1r`` is - ``c1 = c.reshape(c.shape[:ndim] + (-1,)); c1r = c1.ravel()`` - num_c_tr : int - The number of elements of ``c1r``, which correspond to the trailing - dimensions of ``c``. In code, this is - ``c1 = c.reshape(c.shape[:ndim] + (-1,)); num_c_tr = c1.shape[-1]``. - strides_c1 : ndarray, one-dimensional - Pre-computed strides of the ``c1`` array. - Note: These are *data* strides, not numpy-style byte strides. - This array is equivalent to - ``[stride // s1.dtype.itemsize for stride in s1.strides]``. - indices_k1d : ndarray, shape((k+1)**ndim, ndim) - Pre-computed mapping between indices for iterating over a flattened - array of shape ``[k[d] + 1) for d in range(ndim)`` and - ndim-dimensional indices of the ``(k+1,)*ndim`` dimensional array. - This is essentially a transposed version of - ``np.unravel_index(np.arange((k+1)**ndim), (k+1,)*ndim)``. - out : ndarray, shape (npoints, num_c_tr) - Output values of the b-spline at given ``xi`` points. - - Notes - ----- - - This function is essentially equivalent to the following: given an - N-dimensional vector ``x = (x1, x2, ..., xN)``, iterate over the - dimensions, form linear combinations of products, - B(x1) * B(x2) * ... B(xN) of (k+1)**N b-splines which are non-zero - at ``x``. - - Since b-splines are localized, the sum has (k+1)**N non-zero elements. - - If ``i = (i1, i2, ..., iN)`` is a vector if intervals of the knot - vectors, ``t[d, id] <= xd < t[d, id+1]``, for ``d=1, 2, ..., N``, then - the core loop of this function is nothing but - - ``` - result = 0 - iters = [range(i[d] - self.k[d], i[d] + 1) for d in range(ndim)] - for idx in itertools.product(*iters): - term = self.c[idx] * np.prod([B(x[d], self.k[d], idx[d], self.t[d]) - for d in range(ndim)]) - result += term - ``` - - For efficiency reasons, we iterate over the flattened versions of the - arrays. - - """ - cdef: - npy_intp ndim = len(t) - - # 'intervals': indices for a point in xi into the knot arrays t - npy_intp[::1] i = np.empty(ndim, dtype=np.intp) - - # container for non-zero b-splines at each point in xi - double[:, ::1] b = np.empty((ndim, max(k) + 1), dtype=float) - - const double[::1] xv # an ndim-dimensional input point - double xd # d-th component of x - - const double[::1] td # knots in dimension d - - npy_intp kd # d-th component of k - - npy_intp i_c # index to loop over range(num_c_tr) - npy_intp iflat # index to loop over (k+1)**ndim non-zero terms - npy_intp volume # the number of non-zero terms - const npy_intp[:] idx_b # ndim-dimensional index corresponding to iflat - - int out_of_bounds - npy_intp idx_cflat_base, idx - double factor - double[::1] wrk = np.empty(2*max(k) + 2, dtype=float) - - if xi.shape[1] != ndim: - raise ValueError(f"Expacted data points in {ndim}-D space, got" - f" {xi.shape[1]}-D points.") - - if out.shape[0] != xi.shape[0]: - raise ValueError(f"out and xi are inconsistent: expected" - f" {xi.shape[0]} output values, got" - f" {out.shape[0]}.") - if out.shape[1] != num_c_tr: - raise ValueError(f"out and c are inconsistent: num_c={num_c_tr} " - f" and out.shape[1] = {out.shape[1]}.") - - - with nogil: - # the number of non-zero terms for each point in ``xi``. - volume = 1 - for d in range(ndim): - volume *= k[d] + 1 - - ### Iterate over the data points - for j in range(xi.shape[0]): - xv = xi[j, :] - - # For each point, iterate over the dimensions - out_of_bounds = 0 - for d in range(ndim): - td = t[d, :len_t[d]] - xd = xv[d] - kd = k[d] - - # get the location of x[d] in t[d] - i[d] = _find_interval(&td[0], td.shape[0], kd, xd, kd, extrapolate) - - if i[d] < 0: - out_of_bounds = 1 - break - - # compute non-zero b-splines at this value of xd in dimension d - _deBoor_D(&td[0], xd, kd, i[d], nu[d], &wrk[0]) - b[d, :kd+1] = wrk[:kd+1] - - if out_of_bounds: - # xd was nan or extrapolate=False: Fill the output array - # *for this xv value*, and continue to the next xv in xi. - for i_c in range(num_c_tr): - out[j, i_c] = NAN - continue - - for i_c in range(num_c_tr): - out[j, i_c] = 0.0 - - # iterate over the direct products of non-zero b-splines - for iflat in range(volume): - idx_b = indices_k1d[iflat, :] - # The line above is equivalent to - # idx_b = np.unravel_index(iflat, (k+1,)*ndim) - - # From the indices in ``idx_b``, we prepare to index into - # c1.ravel() : for each dimension d, need to shift the index - # by ``i[d] - k[d]`` (see the docstring above). - # - # Since the strides of `c1` are pre-computed, and the array - # is already raveled and is guaranteed to be C-ordered, we only - # need to compute the base index for iterating over ``num_c_tr`` - # elements which represent the trailing dimensions of ``c``. - # - # This all is essentially equivalent to iterating over - # idx_cflat = np.ravel_multi_index(tuple(idx_c) + (i_c,), - # c1.shape) - idx_cflat_base = 0 - factor = 1.0 - for d in range(ndim): - factor *= b[d, idx_b[d]] - idx = idx_b[d] + i[d] - k[d] - idx_cflat_base += idx * strides_c1[d] - - ### collect linear combinations of coef * factor - for i_c in range(num_c_tr): - out[j, i_c] = out[j, i_c] + c1r[idx_cflat_base + i_c] * factor - - -@cython.wraparound(False) -@cython.nonecheck(False) -@cython.boundscheck(False) -def _colloc_nd(const double[:, ::1] xvals, - const double[:, ::1] _t, - const npy_int32[::1] len_t, - const npy_int32[::1] k, - const npy_intp[:, ::1] _indices_k1d, - const npy_intp[::1] _cstrides): - """Construct the N-D tensor product collocation matrix as a CSR array. - - In the dense representation, each row of the collocation matrix corresponds - to a data point and contains non-zero b-spline basis functions which are - non-zero at this data point. - - Parameters - ---------- - xvals : ndarray, shape(size, ndim) - Data points. ``xvals[j, :]`` gives the ``j``-th data point as an - ``ndim``-dimensional array. - t : tuple of 1D arrays, length-ndim - Tuple of knot vectors - k : ndarray, shape (ndim,) - Spline degrees - - Returns - ------- - csr_data, csr_indices, csr_indptr - The collocation matrix in the CSR array format. - - Notes - ----- - Algorithm: given `xvals` and the tuple of knots `t`, we construct a tensor - product spline, i.e. a linear combination of - - B(x1; i1, t1) * B(x2; i2, t2) * ... * B(xN; iN, tN) - - - Here ``B(x; i, t)`` is the ``i``-th b-spline defined by the knot vector - ``t`` evaluated at ``x``. - - Since ``B`` functions are localized, for each point `(x1, ..., xN)` we - loop over the dimensions, and - - find the location in the knot array, `t[i] <= x < t[i+1]`, - - compute all non-zero `B` values - - place these values into the relevant row - - In the dense representation, the collocation matrix would have had a row per - data point, and each row has the values of the basis elements (i.e., tensor - products of B-splines) evaluated at this data point. Since the matrix is very - sparse (has size = len(x)**ndim, with only (k+1)**ndim non-zero elements per - row), we construct it in the CSR format. - """ - cdef: - npy_intp size = xvals.shape[0] - npy_intp ndim = xvals.shape[1] - - # 'intervals': indices for a point in xi into the knot arrays t - npy_intp[::1] i = np.empty(ndim, dtype=np.intp) - - # container for non-zero b-splines at each point in xi - double[:, ::1] b = np.empty((ndim, max(k) + 1), dtype=float) - - double xd # d-th component of x - const double[::1] td # knots in the dimension d - npy_intp kd # d-th component of k - - npy_intp iflat # index to loop over (k+1)**ndim non-zero terms - npy_intp volume # the number of non-zero terms - - # shifted indices into the data array - npy_intp[::1] idx_c = np.ones(ndim, dtype=np.intp) * (-101) # any sentinel would do, really - npy_intp idx_cflat - - npy_intp[::1] nu = np.zeros(ndim, dtype=np.intp) - - int out_of_bounds - double factor - double[::1] wrk = np.empty(2*max(k) + 2, dtype=float) - - # output - double[::1] csr_data - npy_int64[::1] csr_indices - - int j, d - - # the number of non-zero b-splines for each data point. - volume = 1 - for d in range(ndim): - volume *= k[d] + 1 - - # Allocate the collocation matrix in the CSR format. - # If dense, this would have been - # >>> matr = np.zeros((size, max_row_index), dtype=float) - csr_indices = np.empty(shape=(size*volume,), dtype=np.int64) - csr_data = np.empty(shape=(size*volume,), dtype=float) - csr_indptr = np.arange(0, volume*size + 1, volume, dtype=np.int64) - - # ### Iterate over the data points ### - for j in range(size): - xv = xvals[j, :] - - # For each point, iterate over the dimensions - out_of_bounds = 0 - for d in range(ndim): - td = _t[d, :len_t[d]] - xd = xv[d] - kd = k[d] - - # get the location of x[d] in t[d] - i[d] = _find_interval(&td[0], td.shape[0], kd, xd, kd, True) - - if i[d] < 0: - out_of_bounds = 1 - break - - # compute non-zero b-splines at this value of xd in dimension d - _deBoor_D(&td[0], xd, kd, i[d], nu[d], &wrk[0]) - b[d, :kd+1] = wrk[:kd+1] - - if out_of_bounds: - raise ValueError(f"Out of bounds in {d = }, with {xv = }") - - # Iterate over the products of non-zero b-splines and place them - # into the current row of the design matrix - for iflat in range(volume): - # the line below is an unrolled version of - # idx_b = np.unravel_index(iflat, tuple(kd+1 for kd in k)) - idx_b = _indices_k1d[iflat, :] - - factor = 1.0 - idx_cflat = 0 - for d in range(ndim): - factor *= b[d, idx_b[d]] - idx_c[d] = idx_b[d] + i[d] - k[d] - idx_cflat += idx_c[d] * _cstrides[d] - - # The `idx_cflat` computation above is an unrolled version of - # idx_cflat = np.ravel_multi_index(tuple(idx_c), c_shape) - - # Fill the row of the collocation matrix in the CSR format. - # If it were dense, it would have been just - # >>> matr[j, idx_cflat] = factor - - # Each row of the full matrix has `volume` non-zero elements. - # Thus the CSR format `indptr` increases in steps of `volume` - csr_indices[j*volume + iflat] = idx_cflat - csr_data[j*volume + iflat] = factor - - return np.asarray(csr_data), np.asarray(csr_indices), csr_indptr - diff --git a/scipy/interpolate/_ndbspline.py b/scipy/interpolate/_ndbspline.py index 51ac566ed5ff..0b2dea8ca5fc 100644 --- a/scipy/interpolate/_ndbspline.py +++ b/scipy/interpolate/_ndbspline.py @@ -5,7 +5,7 @@ from math import prod -from . import _bspl # type: ignore[attr-defined] +from . import _dierckx # type: ignore[attr-defined] import scipy.sparse.linalg as ssl from scipy.sparse import csr_array @@ -139,9 +139,9 @@ def __call__(self, xi, *, nu=None, extrapolate=None): extrapolate = bool(extrapolate) if nu is None: - nu = np.zeros((ndim,), dtype=np.intc) + nu = np.zeros((ndim,), dtype=np.int64) else: - nu = np.asarray(nu, dtype=np.intc) + nu = np.asarray(nu, dtype=np.int64) if nu.ndim != 1 or nu.shape[0] != ndim: raise ValueError( f"invalid number of derivative orders {nu = } for " @@ -173,12 +173,10 @@ def __call__(self, xi, *, nu=None, extrapolate=None): # replacement for np.ravel_multi_index for indexing of `c1`: _strides_c1 = np.asarray([s // c1.dtype.itemsize - for s in c1.strides], dtype=np.intp) + for s in c1.strides], dtype=np.int64) num_c_tr = c1.shape[-1] # # of trailing coefficients - out = np.empty(xi.shape[:-1] + (num_c_tr,), dtype=c1.dtype) - - _bspl.evaluate_ndbspline(xi, + out = _dierckx.evaluate_ndbspline(xi, self._t, self._len_t, self._k, @@ -188,7 +186,7 @@ def __call__(self, xi, *, nu=None, extrapolate=None): num_c_tr, _strides_c1, self._indices_k1d, - out,) + ) out = out.view(self.c.dtype) return out.reshape(xi_shape[:-1] + self.c.shape[ndim:]) @@ -235,15 +233,12 @@ def design_matrix(cls, xvals, t, k, extrapolate=True): # The strides of the coeffs array: the computation is equivalent to # >>> cstrides = [s // 8 for s in np.empty(c_shape).strides] cs = c_shape[1:] + (1,) - cstrides = np.cumprod(cs[::-1], dtype=np.intp)[::-1].copy() + cstrides = np.cumprod(cs[::-1], dtype=np.int64)[::-1].copy() # heavy lifting happens here - data, indices, indptr = _bspl._colloc_nd(xvals, - _t, - len_t, - k, - _indices_k1d, - cstrides) + data, indices, indptr = _dierckx._coloc_nd(xvals, + _t, len_t, k, _indices_k1d, cstrides) + return csr_array((data, indices, indptr)) @@ -271,7 +266,7 @@ def _preprocess_inputs(k, t_tpl): # make k a tuple k = (k,)*ndim - k = np.asarray([operator.index(ki) for ki in k], dtype=np.int32) + k = np.asarray([operator.index(ki) for ki in k], dtype=np.int64) if len(k) != ndim: raise ValueError(f"len(t) = {len(t_tpl)} != {len(k) = }.") @@ -305,7 +300,7 @@ def _preprocess_inputs(k, t_tpl): # non-zero b-spline elements shape = tuple(kd + 1 for kd in k) indices = np.unravel_index(np.arange(prod(shape)), shape) - _indices_k1d = np.asarray(indices, dtype=np.intp).T.copy() + _indices_k1d = np.asarray(indices, dtype=np.int64).T.copy() # 5. pack the knots into a single array: # ([1, 2, 3, 4], [5, 6], (7, 8, 9)) --> @@ -318,7 +313,7 @@ def _preprocess_inputs(k, t_tpl): _t.fill(np.nan) for d in range(ndim): _t[d, :len(t_tpl[d])] = t_tpl[d] - len_t = np.asarray(len_t, dtype=np.int32) + len_t = np.asarray(len_t, dtype=np.int64) return k, _indices_k1d, (_t, len_t) diff --git a/scipy/interpolate/meson.build b/scipy/interpolate/meson.build index 72e2d20fd1dd..8223134b05db 100644 --- a/scipy/interpolate/meson.build +++ b/scipy/interpolate/meson.build @@ -87,10 +87,15 @@ fitpack_src = [ 'fitpack/pardtc.f' ] -# TODO: Add flags for 64 bit ints +if use_ilp64 + _fflag_intsize = _fflag_ilp64 +else + _fflag_intsize = _fflag_lp64 +endif + fitpack_lib = static_library('fitpack_lib', fitpack_src, - fortran_args: _fflag_Wno_maybe_uninitialized, + fortran_args: [_fflag_Wno_maybe_uninitialized, _fflag_intsize], override_options: ['b_lto=false'], gnu_symbol_visibility: 'hidden', ) @@ -123,23 +128,13 @@ py3.extension_module('_rgi_cython', __fitpack_lib = static_library('__fitpack', ['src/__fitpack.h', 'src/__fitpack.cc'], - dependencies:[lapack, np_dep, py3_dep], + dependencies:[lapack_ilp64, np_dep, py3_dep], ) __fitpack_dep = declare_dependency( link_with: __fitpack_lib, ) -py3.extension_module('_bspl', - cython_gen_cpp.process('_bspl.pyx'), - cpp_args: cython_cpp_args, - include_directories: 'src/', - dependencies: [lapack, np_dep, __fitpack_dep], - link_args: version_link_args, - install: true, - subdir: 'scipy/interpolate' -) - py3.extension_module('_dierckx', ['src/_dierckxmodule.cc'], @@ -150,9 +145,14 @@ py3.extension_module('_dierckx', subdir: 'scipy/interpolate' ) -# TODO: Add flags for 64 bit ints +# Build _fitpack and dfitpack extensions: both are FITPACK wrappers. +# XXX: some functions from dfitpack use the F_INT macro defined in dfitpack.pyf, while +# others rely on the build flags only. Consider cleaning this up at some point. + + py3.extension_module('_fitpack', ['src/_fitpackmodule.c'], + c_args: c_flags_ilp64, link_with: [fitpack_lib], include_directories: 'src/', dependencies: np_dep, @@ -162,12 +162,20 @@ py3.extension_module('_fitpack', subdir: 'scipy/interpolate' ) -# TODO: Add flags for 64 bit ints +if use_ilp64 + # generator only accepts strings, not files + f2c_map_file = f2py_ilp64_opts[1] + extra_arg = f2py_ilp64_opts[0] + '=' + fs.parent(f2c_map_file) / fs.name(f2c_map_file) + _dfitpackmodule_obj = f2py_gen.process('src/dfitpack.pyf', extra_args: extra_arg) +else + _dfitpackmodule_obj = f2py_gen.process('src/dfitpack.pyf') +endif + py3.extension_module('_dfitpack', - f2py_gen.process('src/dfitpack.pyf'), - c_args: [Wno_unused_variable], + _dfitpackmodule_obj, + c_args: [Wno_unused_variable] + c_flags_ilp64, link_args: version_link_args, - dependencies: [lapack_dep, fortranobject_dep], + dependencies: [fortranobject_dep], link_with: [fitpack_lib], override_options: ['b_lto=false'], install: true, diff --git a/scipy/interpolate/src/__fitpack.cc b/scipy/interpolate/src/__fitpack.cc index 247f28119fd7..27d9e25c9e9e 100644 --- a/scipy/interpolate/src/__fitpack.cc +++ b/scipy/interpolate/src/__fitpack.cc @@ -1,4 +1,7 @@ #include +#include +#include +#include #include "__fitpack.h" namespace fitpack{ @@ -215,7 +218,6 @@ data_matrix( /* inputs */ triangularized matrix. This routine MODIFIES `a` & `y` in-place. - */ void qr_reduce(double *aptr, const int64_t m, const int64_t nz, // a(m, nz), packed @@ -426,7 +428,6 @@ fpknot(const double *x_ptr, int64_t m, } - /* * Evaluate the spline function */ @@ -470,7 +471,6 @@ _evaluate_spline( out(ip, jp) += c(interval + a -k, jp) * wrk[a]; } } - } } } @@ -515,6 +515,7 @@ _coloc_matrix(const double *xptr, int64_t m, // x, shape(m,) } } + void norm_eq_lsq(const double *xptr, int64_t m, // x, shape (m,) const double *tptr, int64_t len_t, // t, shape (len_t,) @@ -568,4 +569,234 @@ norm_eq_lsq(const double *xptr, int64_t m, // x, shape (m,) } } + +/*** NDBSpline ***/ + +/* Evaluate an N-dim tensor product spline or its derivative */ +void +_evaluate_ndbspline(const double *xi_ptr, int64_t npts, int64_t ndim, // xi, shape(npts, ndim) + const double *t_ptr, int64_t max_len_t, // t, shape (ndim, max_len_t) + const int64_t *len_t_ptr, // len_t, shape (ndim,) + const int64_t *k_ptr, // k, shape (ndim,) + const int64_t *nu_ptr, // nu, shape (ndim,) + int i_extrap, + const double *c1_ptr, int64_t num_c1, // flattened coefficients + // pre-tabulated helpers for iterating over (k+1)**ndim subarrays + const int64_t *strides_c1_ptr, // shape (ndim,) + const int64_t *indices_k1d_ptr, int64_t num_k1d, // shape (num_k1, ndim) + double *out_ptr, int64_t num_c_tr // out, shape(npts, num_c_tr) +) +{ + auto xi = ConstRealArray2D(xi_ptr, npts, ndim); + auto t = ConstRealArray2D(t_ptr, ndim, max_len_t); + auto len_t = ConstIndexArray1D(len_t_ptr, ndim); + auto k = ConstIndexArray1D(k_ptr, ndim); + auto nu = ConstIndexArray1D(nu_ptr, ndim); + auto c1 = ConstRealArray1D(c1_ptr, num_c1); + auto strides_c1 = ConstIndexArray1D(strides_c1_ptr, ndim); + auto indices_k1d = ConstIndexArray2D(indices_k1d_ptr, num_k1d, ndim); + auto out = RealArray2D(out_ptr, npts, num_c_tr); + + // allocate work arrays (small, allocations unlikely to fail) + int64_t max_k = *std::max_element(k_ptr, k_ptr + ndim); + std::vector wrk(2*max_k + 2); + std::vector i(ndim); + + std::vector v_b(ndim * (max_k + 1)); + auto b = RealArray2D(v_b.data(), ndim, max_k + 1); + + // the number of non-zero terms for each point in ``xi`` + int64_t volume = 1; + for (int d=0; d < ndim; d++) { + volume *= k(d) + 1; + } + + // Iterate over the data points + for (int64_t j=0; j < npts; j++){ + + // For each point, iterate over the dimensions + bool out_of_bounds = false; + for(int d=0; d < ndim; d++) { + double xd = xi(j, d); + int64_t kd = k(d); + + // knots in the dimension d + const double *td = t.data + max_len_t*d; + + // get the location of x[d] in td + int64_t i_d = _find_interval(td, len_t(d), kd, xd, kd, i_extrap); + + if (i_d < 0) { + out_of_bounds = true; + break; + } + + // compute non-zero b-splines at this value of xd in dimension d + _deBoor_D(td, xd, kd, i_d, nu(d), wrk.data()); + + for (int s=0; s < kd + 1; s++) { + b(d, s) = wrk[s]; + } + i[d] = i_d; + } // for (d=... + + if (out_of_bounds) { + // xd was nan or extrapolate=False: Fill the output array + // for this data point, xi(j, :), and continue to the next xv in xi. + + for (int i_c=0; i_c < num_c_tr; i_c++) { + out(j, i_c) = std::numeric_limits::quiet_NaN(); + } + continue; + } + + // proceed to combining non-zero terms + for(int i_c=0; i_c < num_c_tr; i_c++) { + out(j, i_c) = 0; + } + + // iterate over the direct product of non-zero b-splines + for (int64_t iflat=0; iflat < volume; iflat++) { + /* `idx_b = indiced_k1d[iflat, :]` assignment is equivalent to + * idx_b = np.unravel_index(iflat, (k+1,)*ndim) + * i.e. `idx_b` would be an ndim-dimensional index corresponding to + * `iflat`. + * + * From the indices in ``idx_b``, we prepare to index into + * c1.ravel() : for each dimension d, need to shift the index + * by ``i[d] - k[d]`` (see the docstring above). + * + * Since the strides of `c1` are pre-computed, and the array + * is already raveled and is guaranteed to be C-ordered, we only + * need to compute the base index for iterating over ``num_c_tr`` + * elements which represent the trailing dimensions of ``c``. + * + * This all is essentially equivalent to iterating over + * idx_cflat = np.ravel_multi_index(tuple(idx_c) + (i_c,), + * c1.shape) + */ + int64_t idx_cflat_base = 0; + double factor = 1.0; + for (int d=0; d < ndim; d++) { + int64_t idx_d = indices_k1d(iflat, d); + factor *= b(d, idx_d); + int64_t idx = idx_d + i[d] - k(d); + idx_cflat_base += idx * strides_c1(d); + } + + // finally, collect linear combinations of coef * factor + for (int i_c=0; i_c < num_c_tr; i_c++) { + out(j, i_c) += c1(idx_cflat_base + i_c) * factor; + } + } + } // for (j=... +} + + +/* + * Construct the N-D tensor product collocation matrix as a CSR array + * Return value is 0 on a normal return, and negative on error: + * if the data point `j` is problematic, return `-j`. + */ +int +_coloc_nd(/* inputs */ + const double *xi_ptr, int64_t npts, int64_t ndim, // xi, shape(npts, ndim) + const double *t_ptr, int64_t max_len_t, // t, shape (ndim, max_len_t) + const int64_t *len_t_ptr, // len_t, shape (ndim,) + const int64_t *k_ptr, // k, shape (ndim,) + /* pre-tabulated helpers for iterating over (k+1)**ndim subarrays */ + const int64_t *indices_k1d_ptr, int64_t num_k1d, // shape (num_k1, ndim) + const int64_t *strides_c1_ptr, // shape (ndim,) + /* outputs */ + int64_t *csr_indices_ptr, int64_t volume, // shape (npts*volume,) + double *csr_data_ptr +) +{ + auto xi = ConstRealArray2D(xi_ptr, npts, ndim); + auto t = ConstRealArray2D(t_ptr, ndim, max_len_t); + auto len_t = ConstIndexArray1D(len_t_ptr, ndim); + auto k = ConstIndexArray1D(k_ptr, ndim); + + auto strides_c1 = ConstIndexArray1D(strides_c1_ptr, ndim); + auto indices_k1d = ConstIndexArray2D(indices_k1d_ptr, num_k1d, ndim); + + auto csr_indices = IndexArray1D(csr_indices_ptr, npts*volume); + auto csr_data = RealArray1D(csr_data_ptr, npts*volume); + + // allocate work arrays (small, allocations unlikely to fail) + int64_t max_k = *std::max_element(k_ptr, k_ptr + ndim); + std::vector wrk(2*max_k + 2); + std::vector i(ndim); + + std::vector v_b(ndim * (max_k + 1)); + auto b = RealArray2D(v_b.data(), ndim, max_k + 1); + + // Iterate over the data points + for (int64_t j=0; j < npts; j++){ + + // For each point, iterate over the dimensions + bool out_of_bounds = false; + for(int d=0; d < ndim; d++) { + double xd = xi(j, d); + int64_t kd = k(d); + + // knots in the dimension d + const double *td = t.data + max_len_t*d; + + // get the location of x[d] in td + int64_t i_d = _find_interval(td, len_t(d), kd, xd, kd, 1); + + if (i_d < 0) { + out_of_bounds = true; + break; + } + + // compute non-zero b-splines at this value of xd in dimension d + _deBoor_D(td, xd, kd, i_d, 0, wrk.data()); + + for (int s=0; s < kd + 1; s++) { + b(d, s) = wrk[s]; + } + i[d] = i_d; + } // for (d=... + + if (out_of_bounds) { + // bail out + return -j; + } + + // Iterate over the products of non-zero b-splines and place them + // into the current row of the design matrix + for (int64_t iflat=0; iflat < volume; iflat++) { + // The `idx_cflat` computation is an unrolled version of + // idx_cflat = np.ravel_multi_index(tuple(idx_c), c_shape) + // + // `_indiced_k1d` array is pre-tabulated such that `idx_d` is a d-th component + // of `idx_b = np.unravel_index(iflat, tuple(kd+1 for kd in k))` + int64_t idx_cflat = 0; + double factor = 1.0; + for (int d=0; d < ndim; d++) { + int64_t idx_d = indices_k1d(iflat, d); + factor *= b(d, idx_d); + int64_t idx = idx_d + i[d] - k(d); + idx_cflat += idx * strides_c1(d); + } + + /* + * Fill the row of the colocation matrix in the CSR format. + * If it were dense, it would have been just + * >>> matr[j, idx_cflat] = factor + * + * Each row of the full matrix has `volume` non-zero elements. + * Thus the CSR format `indptr` increases in steps of `volume` + */ + csr_indices(j*volume + iflat) = idx_cflat; + csr_data(j*volume + iflat) = factor; + } // for (iflat=... + } // for( j=... + + return 0; +} + + } // namespace fitpack diff --git a/scipy/interpolate/src/__fitpack.h b/scipy/interpolate/src/__fitpack.h index c39e2dacb7c8..d803844ae0cf 100644 --- a/scipy/interpolate/src/__fitpack.h +++ b/scipy/interpolate/src/__fitpack.h @@ -101,6 +101,10 @@ typedef Array1D RealArray1D; typedef Array1D ConstRealArray1D; typedef Array2D ConstRealArray2D; +typedef Array1D IndexArray1D; +typedef Array1D ConstIndexArray1D; +typedef Array2D ConstIndexArray2D; + /* @@ -231,4 +235,41 @@ norm_eq_lsq(const double *xptr, int64_t m, // x, shape (m,) double *wrk ); + +/* + * Evaluate an ND spline function + */ +void +_evaluate_ndbspline(/* inputs */ + const double *xi_ptr, int64_t npts, int64_t ndim, // xi, shape(npts, ndim) + const double *t_ptr, int64_t max_len_t, // t, shape (ndim, max_len_t) + const int64_t *len_t_ptr, // len_t, shape (ndim,) + const int64_t *k_ptr, // k, shape (ndim,) + const int64_t *nu_ptr, // nu, shape (ndim,) + int i_extrap, + /* flattened coefficients */ + const double *c1_ptr, int64_t num_c1, + /* pre-tabulated helpers for iterating over (k+1)**ndim subarrays */ + const int64_t *strides_c1_ptr, // shape (ndim,) + const int64_t *indices_k1d_ptr, int64_t num_k1d, // shape (num_k1, ndim) + /* output */ + double *out_ptr, int64_t num_c_tr // out, shape(npts, num_c_tr) +); + + +int +_coloc_nd(/* inputs */ + const double *xi_ptr, int64_t npts, int64_t ndim, // xi, shape(npts, ndim) + const double *t_ptr, int64_t max_len_t, // t, shape (ndim, max_len_t) + const int64_t *len_t_ptr, // len_t, shape (ndim,) + const int64_t *k_ptr, // k, shape (ndim,) + /* pre-tabulated helpers for iterating over (k+1)**ndim subarrays */ + const int64_t *indices_k1d_ptr, int64_t num_k1d, // shape (num_k1, ndim) + const int64_t *strides_c1_ptr, // shape (ndim,) + /* outputs */ + int64_t *csr_indices_ptr, int64_t volume, // shape (npts*volume,) + double *csr_data_ptr +); + + } // namespace fitpack diff --git a/scipy/interpolate/src/_dierckxmodule.cc b/scipy/interpolate/src/_dierckxmodule.cc index c74207c69205..6d51f20dca81 100644 --- a/scipy/interpolate/src/_dierckxmodule.cc +++ b/scipy/interpolate/src/_dierckxmodule.cc @@ -706,14 +706,320 @@ py_find_interval(PyObject *self, PyObject *args) PyObject *py_interval = PyLong_FromSsize_t(interval); return py_interval; +} + + +/*** NDBspline ***/ + + +static char doc_evaluate_ndbspline[] = + "Evaluate an N-dim tensor product spline or its derivative.\n" + "\n" + "Parameters\n" + "----------\n" + "xi : ndarray, shape(npoints, ndim)\n" + " ``npoints`` values to evaluate the spline at, each value is\n" + " a point in an ``ndim``-dimensional space.\n" + "t : ndarray, shape(ndim, max_len_t)\n" + " Array of knots for each dimension.\n" + " This array packs the tuple of knot arrays per dimension into a single\n" + " 2D array. The array is ragged (knot lengths may differ), hence\n" + " the real knots in dimension ``d`` are ``t[d, :len_t[d]]``.\n" + "len_t : ndarray, 1D, shape (ndim,)\n" + " Lengths of the knot arrays, per dimension.\n" + "k : tuple of ints, len(ndim)\n" + " Spline degrees in each dimension.\n" + "nu : ndarray of ints, shape(ndim,)\n" + " Orders of derivatives to compute, per dimension.\n" + "extrapolate : int\n" + " Whether to extrapolate out of bounds or return nans.\n" + "c1r: ndarray, one-dimensional\n" + " Flattened array of coefficients.\n" + " The original N-dimensional coefficient array ``c`` has shape\n" + " ``(n1, ..., nd, ...)`` where each ``ni == len(t[d]) - k[d] - 1``,\n" + " and the second '...' represents trailing dimensions of ``c``.\n" + " In code, given the C-ordered array ``c``, ``c1r`` is\n" + " ``c1 = c.reshape(c.shape[:ndim] + (-1,)); c1r = c1.ravel()``\n" + "num_c_tr : int\n" + " The number of elements of ``c1r``, which correspond to the trailing\n" + " dimensions of ``c``. In code, this is\n" + " ``c1 = c.reshape(c.shape[:ndim] + (-1,)); num_c_tr = c1.shape[-1]``.\n" + "strides_c1 : ndarray, one-dimensional\n" + " Pre-computed strides of the ``c1`` array.\n" + " Note: These are *data* strides, not numpy-style byte strides.\n" + " This array is equivalent to\n" + " ``[stride // s1.dtype.itemsize for stride in s1.strides]``.\n" + "indices_k1d : ndarray, shape((k+1)**ndim, ndim)\n" + " Pre-computed mapping between indices for iterating over a flattened\n" + " array of shape ``[k[d] + 1) for d in range(ndim)`` and\n" + " ndim-dimensional indices of the ``(k+1,)*ndim`` dimensional array.\n" + " This is essentially a transposed version of\n" + " ``np.unravel_index(np.arange((k+1)**ndim), (k+1,)*ndim)``.\n" + "\n" + "Returns\n" + "-------\n" + "out : ndarray, shape (npoints, num_c_tr)\n" + " Output values of the b-spline at given ``xi`` points.\n" + "\n" + "Notes\n" + "-----\n" + "\n" + "This function is essentially equivalent to the following: given an\n" + "N-dimensional vector ``x = (x1, x2, ..., xN)``, iterate over the\n" + "dimensions, form linear combinations of products,\n" + "B(x1) * B(x2) * ... B(xN) of (k+1)**N b-splines which are non-zero\n" + "at ``x``.\n" + "\n" + "Since b-splines are localized, the sum has (k+1)**N non-zero elements.\n" + "\n" + "If ``i = (i1, i2, ..., iN)`` is a vector if intervals of the knot\n" + "vectors, ``t[d, id] <= xd < t[d, id+1]``, for ``d=1, 2, ..., N``, then\n" + "the core loop of this function is nothing but\n" + "\n" + "```\n" + "result = 0\n" + "iters = [range(i[d] - self.k[d], i[d] + 1) for d in range(ndim)]\n" + "for idx in itertools.product(*iters):\n" + " term = self.c[idx] * np.prod([B(x[d], self.k[d], idx[d], self.t[d])\n" + " for d in range(ndim)])\n" + " result += term\n" + "```\n" + "\n" + "For efficiency reasons, we iterate over the flattened versions of the arrays.\n"; +/* +def evaluate_ndbspline(const double[:, ::1] xi, + const double[:, ::1] t, + const npy_int64[::1] len_t, + const npy_int64[::1] k, + npy_int64[::1] nu, + bint extrapolate, + const double[::1] c1r, + int num_c_tr, + const npy_int64[::1] strides_c1, + const npy_int64[:, ::] indices_k1d, +*/ +static PyObject* +py_evaluate_ndbspline(PyObject *self, PyObject *args) +{ + PyObject *py_xi=NULL; + PyObject *py_t=NULL, *py_c1r=NULL, *py_strides_c1=NULL, *py_indices_k1d=NULL; + + PyObject *py_len_t=NULL, *py_k=NULL, *py_nu=NULL; + int num_c_tr; + int i_extrap; + + if(!PyArg_ParseTuple(args, "OOOOOiOiOO", + &py_xi, &py_t, &py_len_t, &py_k, &py_nu, &i_extrap, + &py_c1r, &num_c_tr, &py_strides_c1, &py_indices_k1d)) { + return NULL; + } + + if (!(check_array(py_xi, 2, NPY_DOUBLE) && + check_array(py_t, 2, NPY_DOUBLE) && + check_array(py_len_t, 1, NPY_INT64) && + check_array(py_k, 1, NPY_INT64) && + check_array(py_nu, 1, NPY_INT64) && + check_array(py_c1r, 1, NPY_DOUBLE) && + check_array(py_strides_c1, 1, NPY_INT64) && + check_array(py_indices_k1d, 2, NPY_INT64))) { + return NULL; + } + PyArrayObject *a_xi = (PyArrayObject *)py_xi; + PyArrayObject *a_t = (PyArrayObject *)py_t; + + PyArrayObject *a_len_t = (PyArrayObject *)py_len_t; + PyArrayObject *a_k = (PyArrayObject *)py_k; + PyArrayObject *a_nu = (PyArrayObject *)py_nu; + + PyArrayObject *a_c1r = (PyArrayObject *)py_c1r; + PyArrayObject *a_strides_c1 = (PyArrayObject *)py_strides_c1; + PyArrayObject *a_indices_k1d = (PyArrayObject *)py_indices_k1d; + + // sanity checks + int64_t ndim = PyArray_DIM(a_t, 0); + if (PyArray_DIM(a_xi, 1) != ndim) { + std::string msg = ("Expected data points in " + std::to_string(ndim) + "-D" + " space, got " + std::to_string(PyArray_DIM(a_xi, 1)) + + "-D points."); + PyErr_SetString(PyExc_ValueError, msg.c_str()); + return NULL; + } + + // allocate the output + npy_intp dims[2] = {PyArray_DIM(a_xi, 0), num_c_tr}; + PyArrayObject *a_out = (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_DOUBLE); + if (a_out == NULL) { + PyErr_NoMemory(); + return NULL; + } + + // heavy lifting happens here + try { + fitpack::_evaluate_ndbspline( + /* inputs */ + static_cast(PyArray_DATA(a_xi)), PyArray_DIM(a_xi, 0), PyArray_DIM(a_xi, 1), + static_cast(PyArray_DATA(a_t)), PyArray_DIM(a_t, 1), + static_cast(PyArray_DATA(a_len_t)), + static_cast(PyArray_DATA(a_k)), + static_cast(PyArray_DATA(a_nu)), + i_extrap, + /* flattened coefficients */ + static_cast(PyArray_DATA(a_c1r)), PyArray_DIM(a_c1r, 0), + /* tabulated helpers */ + static_cast(PyArray_DATA(a_strides_c1)), + static_cast(PyArray_DATA(a_indices_k1d)), PyArray_DIM(a_indices_k1d, 0), + + /* output */ + static_cast(PyArray_DATA(a_out)), num_c_tr + ); + + return (PyObject *)(a_out); + } + catch (std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return NULL; + } +} + + +static char doc_coloc_nd[] = + "Construct the N-D tensor product collocation matrix as a CSR array.\n" + "\n" + "In the dense representation, each row of the collocation matrix corresponds\n" + "to a data point and contains non-zero b-spline basis functions which are\n" + "non-zero at this data point.\n" + "\n" + "Parameters\n" + "----------\n" + "xvals : ndarray, shape(size, ndim)\n" + " Data points. ``xvals[j, :]`` gives the ``j``-th data point as an\n" + " ``ndim``-dimensional array.\n" + "t : tuple of 1D arrays, length-ndim\n" + " Tuple of knot vectors\n" + "k : ndarray, shape (ndim,)\n" + " Spline degrees\n" + "\n" + "Returns\n" + "-------\n" + "csr_data, csr_indices, csr_indptr\n" + " The collocation matrix in the CSR array format.\n" + "\n" + "Notes\n" + "-----\n" + "Algorithm: given `xvals` and the tuple of knots `t`, we construct a tensor\n" + "product spline, i.e. a linear combination of\n" + "\n" + " B(x1; i1, t1) * B(x2; i2, t2) * ... * B(xN; iN, tN)\n" + "\n" + "Here ``B(x; i, t)`` is the ``i``-th b-spline defined by the knot vector\n" + "``t`` evaluated at ``x``.\n" + "\n" + "Since ``B`` functions are localized, for each point `(x1, ..., xN)` we\n" + "loop over the dimensions, and\n" + "- find the location in the knot array, `t[i] <= x < t[i+1]`,\n" + "- compute all non-zero `B` values\n" + "- place these values into the relevant row\n" + "\n" + "In the dense representation, the collocation matrix would have had a row per\n" + "data point, and each row has the values of the basis elements (i.e., tensor\n" + "products of B-splines) evaluated at this data point. Since the matrix is very\n" + "sparse (has size = len(x)**ndim, with only (k+1)**ndim non-zero elements per\n" + "row), we construct it in the CSR format.\n"; +/* +def _colloc_nd(const double[:, ::1] xvals, + const double[:, ::1] _t, + const npy_int64[::1] len_t, + const npy_int64[::1] k, + const npy_int64[:, ::1] _indices_k1d, + const npy_int64[::1] _cstrides): +*/ +static PyObject* +py_coloc_nd(PyObject *self, PyObject *args) +{ + PyObject *py_xi, *py_t, *py_len_t, *py_k, *py_indices_k1d, *py_strides; + if(!PyArg_ParseTuple(args, "OOOOOO", + &py_xi, &py_t, &py_len_t, &py_k, + &py_indices_k1d, &py_strides)) { + return NULL; + } + + if (!(check_array(py_xi, 2, NPY_DOUBLE) && + check_array(py_t, 2, NPY_DOUBLE) && + check_array(py_len_t, 1, NPY_INT64) && + check_array(py_k, 1, NPY_INT64) && + check_array(py_indices_k1d, 2, NPY_INT64) && + check_array(py_strides, 1, NPY_INT64))) { + return NULL; + } + PyArrayObject *a_xi = (PyArrayObject *)py_xi; + PyArrayObject *a_t = (PyArrayObject *)py_t; + PyArrayObject *a_len_t = (PyArrayObject *)py_len_t; + PyArrayObject *a_k = (PyArrayObject *)py_k; + PyArrayObject *a_indices_k1d = (PyArrayObject *)py_indices_k1d; + PyArrayObject *a_strides = (PyArrayObject *)py_strides; + + /* allocate the outputs */ + npy_intp npts = PyArray_DIM(a_xi, 0); + npy_intp ndim = PyArray_DIM(a_xi, 1); + + // the number of non-zero b-splines at each data point + npy_intp volume = 1; + int64_t *k_data = static_cast(PyArray_DATA(a_k)); + for (int d=0; d < ndim; d++) { + volume *= k_data[d] + 1; + } + + // Allocate the colocation matrix in the CSR format. + npy_intp dims[1] = {npts*volume}; + PyObject *py_csr_data = PyArray_SimpleNew(1, dims, NPY_DOUBLE); + PyObject *py_csr_indices = PyArray_SimpleNew(1, dims, NPY_INT64); + PyObject *py_csr_indptr = PyArray_Arange(0, volume*npts + 1, volume, NPY_INT64); + + if ((py_csr_data == NULL) || (py_csr_indices == NULL) || (py_csr_indptr == NULL)) { + PyErr_NoMemory(); + return NULL; + } + + PyArrayObject *a_csr_data = (PyArrayObject *)py_csr_data; + PyArrayObject *a_csr_indices = (PyArrayObject *)py_csr_indices; + + // heavy lifting happens here + try { + int status = fitpack::_coloc_nd( + /* inputs */ + static_cast(PyArray_DATA(a_xi)), npts, ndim, + static_cast(PyArray_DATA(a_t)), PyArray_DIM(a_t, 1), + static_cast(PyArray_DATA(a_len_t)), + static_cast(PyArray_DATA(a_k)), + /* tabulated helpers */ + static_cast(PyArray_DATA(a_indices_k1d)), PyArray_DIM(a_indices_k1d, 0), + static_cast(PyArray_DATA(a_strides)), + /* outputs */ + static_cast(PyArray_DATA(a_csr_indices)), volume, + static_cast(PyArray_DATA(a_csr_data)) + ); + if (status < 0) { + std::string mesg = ("Data point " + std::to_string(-status) + " is out of bounds"); + PyErr_SetString(PyExc_ValueError, mesg.c_str()); + } + + return Py_BuildValue("(NNN)", PyArray_Return(a_csr_data), + PyArray_Return(a_csr_indices), + py_csr_indptr + ); + } + catch (std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + return NULL; + } } ///////////////////////////////////// static PyMethodDef DierckxMethods[] = { - //... + /* FITPACK replacement helpers*/ {"fpknot", py_fpknot, METH_VARARGS, "fpknot replacement"}, {"fpback", py_fpback, METH_VARARGS, @@ -722,16 +1028,23 @@ static PyMethodDef DierckxMethods[] = { "row-by-row QR triangularization"}, {"data_matrix", py_data_matrix, METH_VARARGS, "(m, k+1) array of non-zero b-splines"}, - {"_coloc", py_coloc, METH_VARARGS, - doc_coloc}, - {"_norm_eq_lsq", py_norm_eq_lsq, METH_VARARGS, - doc_norm_eq_lsq}, + /* BSpline helpers */ {"evaluate_spline", py_evaluate_spline, METH_VARARGS, doc_evaluate_spline}, {"evaluate_all_bspl", py_evaluate_all_bspl, METH_VARARGS, doc_evaluate_all_bspl}, {"find_interval", py_find_interval, METH_VARARGS, doc_find_interval}, + /* make_{interp,lsq}_spline helpers*/ + {"_coloc", py_coloc, METH_VARARGS, + doc_coloc}, + {"_norm_eq_lsq", py_norm_eq_lsq, METH_VARARGS, + doc_norm_eq_lsq}, + /* NdBSpline helpers */ + {"evaluate_ndbspline", py_evaluate_ndbspline, METH_VARARGS, + doc_evaluate_ndbspline}, + {"_coloc_nd", py_coloc_nd, METH_VARARGS, + doc_coloc_nd}, //... {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/scipy/interpolate/src/_fitpackmodule.c b/scipy/interpolate/src/_fitpackmodule.c index e569fe93034f..59ebf4600e85 100644 --- a/scipy/interpolate/src/_fitpackmodule.c +++ b/scipy/interpolate/src/_fitpackmodule.c @@ -5,7 +5,7 @@ static PyObject *fitpack_error; -#ifdef HAVE_ILP64 +#ifdef HAVE_BLAS_ILP64 #define F_INT npy_int64 #define F_INT_NPY NPY_INT64 diff --git a/scipy/interpolate/src/dfitpack.pyf b/scipy/interpolate/src/dfitpack.pyf index 829e1cdbbe7c..35b5d0846a73 100644 --- a/scipy/interpolate/src/dfitpack.pyf +++ b/scipy/interpolate/src/dfitpack.pyf @@ -13,7 +13,7 @@ python module _dfitpack ! in usercode ''' -#ifdef HAVE_ILP64 +#ifdef HAVE_BLAS_ILP64 typedef npy_int64 F_INT; #else typedef int F_INT; diff --git a/scipy/interpolate/tests/test_bsplines.py b/scipy/interpolate/tests/test_bsplines.py index 58e7fe3fb6f1..b6898752be28 100644 --- a/scipy/interpolate/tests/test_bsplines.py +++ b/scipy/interpolate/tests/test_bsplines.py @@ -2377,6 +2377,11 @@ def test_2D_separable(self): xp_assert_close(bspl2(xi), target, atol=1e-14) + # test that a nan in -> nan out + xi = np.asarray(xi) + xi[0, 1] = np.nan + xp_assert_equal(np.isnan(bspl2(xi)), np.asarray([True, False, False])) + # now check on a multidim xi rng = np.random.default_rng(12345) xi = rng.uniform(size=(4, 3, 2)) * 5 @@ -2828,6 +2833,15 @@ def test_2D_mixed(self, k): bspl = make_ndbspl((x, y), values, k=k, solver=ssl.spsolve) xp_assert_close(bspl(xi), values.ravel(), atol=1e-15) + def test_2D_nans(self): + x = np.arange(6) + y = np.arange(6) + 0.5 + y[-1] = np.nan + values = x[:, None]**3 * (y**3 + 2*y)[None, :] + + with assert_raises(ValueError): + make_ndbspl((x, y), values, k=1) + def _get_sample_2d_data(self): # from test_rgi.py::TestIntepN x = np.array([.5, 2., 3., 4., 5.5, 6.]) diff --git a/scipy/interpolate/tests/test_fitpack.py b/scipy/interpolate/tests/test_fitpack.py index d798f0eda4eb..2c30112ea3a1 100644 --- a/scipy/interpolate/tests/test_fitpack.py +++ b/scipy/interpolate/tests/test_fitpack.py @@ -1,5 +1,6 @@ import itertools import os +import sys import numpy as np from scipy._lib._array_api import ( @@ -448,6 +449,7 @@ def test_splprep_segfault(): tck, u = splprep([x, y], task=-1, t=uknots) # here is the crash +@pytest.mark.skipif(sys.platform == 'darwin', reason='XXX: crashes on ILP64 CI, why') def test_bisplev_integer_overflow(): np.random.seed(1) diff --git a/scipy/linalg/fblas_64.pyf.src b/scipy/linalg/fblas_64.pyf.src index 40aa47151dae..fb0f16dc5696 100644 --- a/scipy/linalg/fblas_64.pyf.src +++ b/scipy/linalg/fblas_64.pyf.src @@ -1,9 +1,6 @@ python module _fblas_64 usercode ''' -#if defined(BLAS_SYMBOL_PREFIX) || defined(BLAS_SYMBOL_SUFFIX) -#include "blas64-prefix-defines.h" -#endif -#define F_INT npy_int64 +#include "_blas64_defines.h" ''' interface diff --git a/scipy/linalg/fblas_l1.pyf.src b/scipy/linalg/fblas_l1.pyf.src index ccaebcc6b5ca..3fe5c5684b9d 100644 --- a/scipy/linalg/fblas_l1.pyf.src +++ b/scipy/linalg/fblas_l1.pyf.src @@ -136,9 +136,9 @@ subroutine rot(n,x,offx,incx,y,offy,incy,c,s) check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n - check(len(y)-offy>(n-1)*abs(incy)) :: n + n = (len(x)-1-offx)/labs(incx)+1 + check(len(x)-offx>(n-1)*labs(incx)) :: n + check(len(y)-offy>(n-1)*labs(incy)) :: n end subroutine rot @@ -167,9 +167,9 @@ subroutine rotm(n,x,offx,incx,y,offy,incy,param) check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n - check(len(y)-offy>(n-1)*abs(incy)) :: n + n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n + check(len(y)-offy>(n-1)*labs(incy)) :: n end subroutine rotm @@ -188,9 +188,9 @@ subroutine swap(n,x,offx,incx,y,offy,incy) check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n - check(len(y)-offy>(n-1)*abs(incy)) :: n + n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n + check(len(y)-offy>(n-1)*labs(incy)) :: n end subroutine swap @@ -206,8 +206,8 @@ subroutine scal(n,a,x,offx,incx) integer optional, intent(in), check(incx>0||incx<0) :: incx = 1 integer optional, intent(in), depend(x) :: offx=0 check(offx>=0 && offx(n-1)*abs(incx)) :: n + integer optional, intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n end subroutine scal @@ -225,8 +225,8 @@ subroutine scal(n,a,x,offx,incx) integer optional, intent(in),check(incx>0||incx<0) :: incx = 1 integer optional, intent(in),depend(x) :: offx=0 check(offx>=0 && offx(n-1)*abs(incx)) :: n + integer optional, intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n end subroutine scal @@ -246,9 +246,9 @@ subroutine copy(n,x,offx,incx,y,offy,incy) check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n - check(len(y)-offy>(n-1)*abs(incy)) :: n + n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n + check(len(y)-offy>(n-1)*labs(incy)) :: n end subroutine copy @@ -269,9 +269,9 @@ subroutine axpy(n,a,x,offx,incx,y,offy,incy) check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n - check(len(y)-offy>(n-1)*abs(incy)) :: n + n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n + check(len(y)-offy>(n-1)*labs(incy)) :: n end subroutine axpy @@ -294,9 +294,9 @@ function sdot(n,x,offx,incx,y,offy,incy) result (xy) check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n - check(len(y)-offy>(n-1)*abs(incy)) :: n + n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n + check(len(y)-offy>(n-1)*labs(incy)) :: n end function sdot @@ -319,9 +319,9 @@ function ddot(n,x,offx,incx,y,offy,incy) result (xy) check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n - check(len(y)-offy>(n-1)*abs(incy)) :: n + n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n + check(len(y)-offy>(n-1)*labs(incy)) :: n end function ddot @@ -345,9 +345,9 @@ subroutine dotu(n,x,offx,incx,y,offy,incy,xy) check(offy>=0 && offy(n-1)*abs(incx)) :: n - check(len(y)-offy>(n-1)*abs(incy)) :: n + :: n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n + check(len(y)-offy>(n-1)*labs(incy)) :: n end subroutine dotu @@ -370,9 +370,9 @@ subroutine dotc(n,x,offx,incx,y,offy,incy,xy) check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n - check(len(y)-offy>(n-1)*abs(incy)) :: n + integer optional,intent(in),depend(x,incx,offx,y,incy,offy) :: n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n + check(len(y)-offy>(n-1)*labs(incy)) :: n end subroutine dotc @@ -393,8 +393,8 @@ function nrm2(n,x,offx,incx) result(n2) integer optional,intent(in),depend(x) :: offx=0 check(offx>=0 && offx(n-1)*abs(incx)) :: n + integer optional,intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n end function nrm2 @@ -415,8 +415,8 @@ function nrm2(n,x,offx,incx) result(n2) integer optional,intent(in),depend(x) :: offx=0 check(offx>=0 && offx(n-1)*abs(incx)) :: n + integer optional,intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n end function nrm2 @@ -434,8 +434,8 @@ function asum(n,x,offx,incx) result (s) integer optional, intent(in), check(incx>0||incx<0) :: incx = 1 integer optional, intent(in), depend(x) :: offx=0 check(offx>=0 && offx(n-1)*abs(incx)) :: n + integer optional, intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n end function asum @@ -453,8 +453,8 @@ function asum(n,x,offx,incx) result (s) integer optional, intent(in), check(incx>0||incx<0) :: incx = 1 integer optional, intent(in), depend(x) :: offx=0 check(offx>=0 && offx(n-1)*abs(incx)) :: n + integer optional, intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n end function asum @@ -473,8 +473,8 @@ function iamax(n,x,offx,incx) result(k) integer optional, intent(in), check(incx>0||incx<0) :: incx = 1 integer optional, intent(in), depend(x) :: offx=0 check(offx>=0 && offx(n-1)*abs(incx)) :: n + integer optional, intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/labs(incx) + check(len(x)-offx>(n-1)*labs(incx)) :: n end function iamax diff --git a/scipy/linalg/fblas_l2.pyf.src b/scipy/linalg/fblas_l2.pyf.src index 27b9972c0df7..f74a98200b87 100644 --- a/scipy/linalg/fblas_l2.pyf.src +++ b/scipy/linalg/fblas_l2.pyf.src @@ -34,7 +34,7 @@ subroutine gemv(m,n,alpha,a,x,beta,y,offx,incx,offy,incy,trans,rows,cols dimension(*), intent(in) :: x dimension(ly), intent(in,copy,out), depend(ly),optional :: y integer intent(hide), depend(incy,rows,offy) :: ly = & - (y_capi==Py_None?1+offy+(rows-1)*abs(incy):-1) + (y_capi==Py_None?1+offy+(rows-1)*labs(incy):-1) dimension(m,n), intent(in) :: a integer depend(a), intent(hide):: m = shape(a,0) integer depend(a), intent(hide):: n = shape(a,1) @@ -42,11 +42,11 @@ subroutine gemv(m,n,alpha,a,x,beta,y,offx,incx,offy,incy,trans,rows,cols integer optional, intent(in) :: offx=0 integer optional, intent(in) :: offy=0 check(offx>=0 && offxoffx+(cols-1)*abs(incx)) :: x + check(len(x)>offx+(cols-1)*labs(incx)) :: x depend(offx,cols,incx) :: x check(offy>=0 && offyoffy+(rows-1)*abs(incy)) :: y + check(len(y)>offy+(rows-1)*labs(incy)) :: y depend(offy,rows,incy) :: y integer depend(m,n,trans), intent(hide) :: rows = (trans?n:m) @@ -76,7 +76,7 @@ subroutine gbmv(m,n,kl,ku,alpha,a,lda,x,incx,offx,beta,y,incy,offy,trans integer optional, intent(in),check(incx>0||incx<0) :: incx = 1 integer optional, intent(in),check(incy>0||incy<0) :: incy = 1 integer intent(hide),depend(m,n,incy,offy,trans) :: ly = & - (y_capi==Py_None?1+offy+(trans==0?m-1:n-1)*abs(incy):-1) + (y_capi==Py_None?1+offy+(trans==0?m-1:n-1)*labs(incy):-1) integer optional, intent(in) :: offx=0 integer optional, intent(in) :: offy=0 @@ -87,12 +87,12 @@ subroutine gbmv(m,n,kl,ku,alpha,a,lda,x,incx,offx,beta,y,incy,offy,trans dimension(ly), intent(in,out,copy,out=yout),depend(ly),optional :: y check(offy>=0 && offyoffy+(trans==0?m-1:n-1)*abs(incy)) :: y + check(len(y)>offy+(trans==0?m-1:n-1)*labs(incy)) :: y depend(offy,n,incy) :: y dimension(*), intent(in) :: x check(offx>=0 && offxoffx+(trans==0?n-1:m-1)*abs(incx)) :: x + check(len(x)>offx+(trans==0?n-1:m-1)*labs(incx)) :: x depend(offx,n,incx) :: x end subroutine gbmv @@ -115,7 +115,7 @@ subroutine bmv(n,k,alpha,a,lda,x,incx,offx,beta,y,incy,offy,low integer intent(in),depend(lda),check(k>=0&&k<=lda-1) :: k integer optional, intent(in),check(incx>0||incx<0) :: incx = 1 integer optional, intent(in),check(incy>0||incy<0) :: incy = 1 - integer intent(hide),depend(incy,n,offy) :: ly = (y_capi==Py_None?1+offy+(n-1)*abs(incy):-1) + integer intent(hide),depend(incy,n,offy) :: ly = (y_capi==Py_None?1+offy+(n-1)*labs(incy):-1) integer optional, intent(in) :: offx=0 integer optional, intent(in) :: offy=0 @@ -126,12 +126,12 @@ subroutine bmv(n,k,alpha,a,lda,x,incx,offx,beta,y,incy,offy,low dimension(ly), intent(in,out,copy,out=yout),depend(ly),optional :: y check(offy>=0 && offyoffy+(n-1)*abs(incy)) :: y + check(len(y)>offy+(n-1)*labs(incy)) :: y depend(offy,n,incy) :: y dimension(*), intent(in) :: x check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x end subroutine bmv @@ -149,7 +149,7 @@ subroutine pmv(n,alpha,ap,x,incx,offx,beta,y,incy,offy,low integer intent(in),check(n>=0) :: n integer optional, intent(in),check(incx>0||incx<0) :: incx = 1 integer optional, intent(in),check(incy>0||incy<0) :: incy = 1 - integer intent(hide),depend(incy,n,offy) :: ly = (y_capi==Py_None?1+offy+(n-1)*abs(incy):-1) + integer intent(hide),depend(incy,n,offy) :: ly = (y_capi==Py_None?1+offy+(n-1)*labs(incy):-1) integer optional, intent(in) :: offx=0 integer optional, intent(in) :: offy=0 @@ -160,12 +160,12 @@ subroutine pmv(n,alpha,ap,x,incx,offx,beta,y,incy,offy,low dimension(ly), intent(in,out,copy,out=yout),depend(ly),optional :: y check(offy>=0 && offyoffy+(n-1)*abs(incy)) :: y + check(len(y)>offy+(n-1)*labs(incy)) :: y depend(offy,n,incy) :: y dimension(*), intent(in) :: x check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x end subroutine pmv @@ -190,18 +190,18 @@ subroutine (n,alpha,a,x,beta,y,offx,incx,offy,incy,lowe dimension(*), intent(in) :: x dimension(ly), intent(in,copy,out),depend(ly),optional :: y integer intent(hide),depend(incy,n,offy) :: ly = & - (y_capi==Py_None?1+offy+(n-1)*abs(incy):-1) + (y_capi==Py_None?1+offy+(n-1)*labs(incy):-1) dimension(n,n), intent(in),check(shape(a,0)==shape(a,1)) :: a integer depend(a), intent(hide):: n = shape(a,0) integer optional, intent(in) :: offx=0 integer optional, intent(in) :: offy=0 check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x check(offy>=0 && offyoffy+(n-1)*abs(incy)) :: y + check(len(y)>offy+(n-1)*labs(incy)) :: y depend(offy,n,incy) :: y end subroutine @@ -246,9 +246,9 @@ subroutine r(alpha,x,lower,incx,offx,n,a) integer, intent(in), optional :: offx = 0 integer, intent(in), optional, check(incx>0||incx<0) :: incx = 1 - integer, intent(in), optional :: n = (len(x)-1-offx)/abs(incx)+1 + integer, intent(in), optional :: n = (len(x)-1-offx)/labs(incx)+1 check(n >= 0) :: n - check(n <= (len(x)-1-offx)/abs(incx)+1) :: n + check(n <= (len(x)-1-offx)/labs(incx)+1) :: n depend(x, offx, incx) :: n dimension(n,n), intent(in,copy,out), optional :: a @@ -275,11 +275,11 @@ subroutine r2(alpha,x,y,lower,incx,offx,incy,offy,n,a) integer intent(in), optional, check(incy>0||incy<0) :: incy = 1 integer intent(in), optional :: offy = 0 - integer intent(in), optional :: n = ((len(x)-1-offx)/abs(incx)+1 <= (len(y)-1-offy)/abs(incy)+1 ? (len(x)-1-offx)/abs(incx)+1 : (len(y)-1-offy)/abs(incy)+1) + integer intent(in), optional :: n = ((len(x)-1-offx)/labs(incx)+1 <= (len(y)-1-offy)/labs(incy)+1 ? (len(x)-1-offx)/labs(incx)+1 : (len(y)-1-offy)/labs(incy)+1) depend(x,incx,offx,y,incy,offy) :: n check(n>=0) :: n - check(n <= (len(x)-1-offx)/abs(incx)+1) :: n - check(n <= (len(y)-1-offy)/abs(incy)+1) :: n + check(n <= (len(x)-1-offx)/labs(incx)+1) :: n + check(n <= (len(y)-1-offy)/labs(incy)+1) :: n dimension(n,n), intent(in,copy,out), optional :: a depend(incx, offx, x, incy, offy, y, n) :: a @@ -305,7 +305,7 @@ subroutine pr(n,alpha,x,incx,offx,ap,lower) dimension(*), intent(in) :: x check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x dimension(*),depend(n),intent(in,out,copy,out=apu) :: ap @@ -334,12 +334,12 @@ subroutine pr2(n,alpha,x,incx,offx,y,incy,offy,ap,lower) dimension(*), intent(in) :: x check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x dimension(*), intent(in) :: y check(offy>=0 && offyoffy+(n-1)*abs(incy)) :: y + check(len(y)>offy+(n-1)*labs(incy)) :: y depend(offy,n,incy) :: y dimension(*),depend(n),intent(in,out,copy,out=apu) :: ap @@ -374,7 +374,7 @@ subroutine tbsv(n,k,a,lda,x,incx,offx,lower,trans,diag) dimension(*), intent(in,out,copy,out=xout) :: x check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x end subroutine tbsv @@ -405,7 +405,7 @@ subroutine tpsv(n,ap,x,incx,offx,lower,trans,diag) dimension(*), intent(in,out,copy,out=xout) :: x check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x end subroutine tpsv @@ -433,7 +433,7 @@ subroutine trmv(n,a,x,offx,incx,lower,trans,diag) integer optional, intent(in), depend(x) :: offx=0 check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: n + check(len(x)>offx+(n-1)*labs(incx)) :: n depend(x,offx,incx) :: n end subroutine trmv @@ -465,7 +465,7 @@ subroutine trsv(n,a,lda,x,incx,offx,lower,trans,diag) dimension(*), intent(in,out,copy,out=xout) :: x check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x end subroutine trsv @@ -495,7 +495,7 @@ subroutine tbmv(n,k,a,lda,x,incx,offx,lower,trans,diag) dimension(*), intent(in,out,copy,out=xout) :: x check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x end subroutine tbmv @@ -523,7 +523,7 @@ subroutine tpmv(n,ap,x,incx,offx,lower,trans,diag) dimension(*), intent(in,out,copy,out=xout) :: x check(offx>=0 && offxoffx+(n-1)*abs(incx)) :: x + check(len(x)>offx+(n-1)*labs(incx)) :: x depend(offx,n,incx) :: x end subroutine tpmv diff --git a/scipy/linalg/flapack_64.pyf.src b/scipy/linalg/flapack_64.pyf.src index 28e276c7030f..d997981bae44 100644 --- a/scipy/linalg/flapack_64.pyf.src +++ b/scipy/linalg/flapack_64.pyf.src @@ -16,10 +16,7 @@ python module _flapack_64 usercode ''' -#if defined(BLAS_SYMBOL_PREFIX) || defined(BLAS_SYMBOL_SUFFIX) -#include "blas64-prefix-defines.h" -#endif -#define F_INT npy_int64 +#include "_blas64_defines.h" ''' interface diff --git a/scipy/linalg/flapack_other.pyf.src b/scipy/linalg/flapack_other.pyf.src index 1db51bbd8363..2e772887c2c8 100644 --- a/scipy/linalg/flapack_other.pyf.src +++ b/scipy/linalg/flapack_other.pyf.src @@ -2107,7 +2107,7 @@ subroutine laswp(n,a,nrows,k1,k2,piv,off,inc,m,npiv) integer optional, intent(in),check(inc>0||inc<0) :: inc = 1 integer optional,intent(in),depend(npiv),check(off>=0 && off(m-1)*abs(inc)) :: m = (len(piv)-off)/abs(inc) + integer intent(hide),depend(npiv,inc,off),check(npiv-off>(m-1)*labs(inc)) :: m = (len(piv)-off)/labs(inc) end subroutine laswp @@ -2227,7 +2227,7 @@ subroutine larf(side,m,n,v,incv,tau,c,ldc,work,lwork) character intent(in), check(side[0]=='L'||side[0]=='R') :: side = 'L' integer intent(in,hide), depend(c) :: m = shape(c,0) integer intent(in,hide), depend(c) :: n = shape(c,1) - intent(in),dimension((side[0]=='L'?(1 + (m-1)*abs(incv)):(1 + (n-1)*abs(incv)))),depend(n,m,side,incv) :: v + intent(in),dimension((side[0]=='L'?(1 + (m-1)*labs(incv)):(1 + (n-1)*labs(incv)))),depend(n,m,side,incv) :: v integer intent(in), check(incv>0||incv<0) :: incv = 1 intent(in) :: tau dimension(m,n), intent(in,copy,out) :: c @@ -2258,9 +2258,9 @@ subroutine rot(n,x,offx,incx,y,offy,incy,c,s,lx,ly) integer optional, intent(in), check(incy>0||incy<0) :: incy = 1 integer optional, intent(in), depend(lx), check(offx>=0 && offx=0 && offy(n-1)*abs(incx)) :: n - check(ly-offy>(n-1)*abs(incy)) :: n + integer optional, intent(in), depend(lx,incx,offx,ly,incy,offy) :: n = (lx-1-offx)/labs(incx)+1 + check(lx-offx>(n-1)*labs(incx)) :: n + check(ly-offy>(n-1)*labs(incy)) :: n end subroutine rot subroutine ilaver(major, minor, patch) diff --git a/scipy/linalg/meson.build b/scipy/linalg/meson.build index 95925a791cbc..4df5f8d2d468 100644 --- a/scipy/linalg/meson.build +++ b/scipy/linalg/meson.build @@ -50,7 +50,7 @@ linalg_cython_gen = generator(cython, # fblas fblas_module = custom_target('fblas_module', - output: ['_fblasmodule.c'], + output: ['_fblasmodule.c', '_fblas-f2pywrappers.f'], input: 'fblas.pyf.src', command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'] + f2py_freethreading_arg, depend_files: @@ -66,8 +66,10 @@ fblas_module = custom_target('fblas_module', # LAPACK - we have historically put these in `_fblas`. py3.extension_module('_fblas', fblas_module, + fortran_args: _fflag_lp64, link_args: version_link_args, dependencies: [lapack_dep, blas_dep, fortranobject_dep], + link_with: [g77_abi_wrappers], install: true, subdir: 'scipy/linalg' ) @@ -95,12 +97,74 @@ flapack_module = custom_target('flapack_module', py3.extension_module('_flapack', flapack_module, c_args: [Wno_empty_body], + fortran_args: _fflag_lp64, link_args: version_link_args, dependencies: [lapack_dep, blas_dep, fortranobject_dep], install: true, subdir: 'scipy/linalg' ) +# Add _fblas_64 and _flapack_64 if we're building with ILP64 support +# +# NOTE: what happened in the setup.py build was that we were linking LP64 +# libopenblas.so to `_fblas` and ILP64 `libopenblas64_.so` to `_fblas_64` +# and used both at the same time. We never shipped wheels that way, it only +# worked in a CI job. We are re-exporting the LP64 symbols in +# `cython_blas`/`cython_lapack`, so we can't use only ILP64 even if we support +# it in all SciPy code. +# TODO: right now we're only detecting one BLAS library (like NumPy does), but +# we need two blas and two lapack dependency objects here. +# The ILP64 CI job in the 1.10.x branch downloads two OpenBLAS tarballs +# and then uses both in the build (search for `Download-OpenBLAS('1')) +# in azure-pipelines.yml if you want to check that). +if use_ilp64 + fblas64_module = custom_target('fblas64_module', + output: ['_fblas_64module.c', '_fblas_64-f2pywrappers.f'], + input: 'fblas_64.pyf.src', + command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'] + f2py_ilp64_opts + f2py_freethreading_arg, + depend_files: + [ + 'fblas_l1.pyf.src', + 'fblas_l2.pyf.src', + 'fblas_l3.pyf.src', + ] + ) + + py3.extension_module('_fblas_64', + fblas64_module, + #['_fblas_64module.c'], + fortran_args: _fflag_ilp64, + link_args: version_link_args, + include_directories: ['../_build_utils/src'], # for npy_cblas.h + dependencies: [lapack_ilp64, blas_ilp64, fortranobject_dep], + link_with: [g77_abi_wrappers_ilp64], + install: true, + link_language: 'fortran', + subdir: 'scipy/linalg' + ) + + flapack64_module = custom_target('flapack64_module', + output: ['_flapack_64module.c', '_flapack_64-f2pywrappers.f'], + input: 'flapack_64.pyf.src', + command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'] + f2py_ilp64_opts + f2py_freethreading_arg, + ) + + py3.extension_module('_flapack_64', + flapack64_module, + #['_flapack_64module.c'], + c_args: [Wno_empty_body], + fortran_args: _fflag_ilp64, + link_args: version_link_args, + include_directories: ['../_build_utils/src'], # for npy_cblas.h + dependencies: [lapack_ilp64, blas_ilp64, fortranobject_dep], + link_with: [g77_abi_wrappers_ilp64], + install: true, + link_language: 'fortran', + subdir: 'scipy/linalg' + ) +endif + + # TODO: cblas/clapack are built *only* for ATLAS. Why? Is it still needed? # _decomp_interpolative diff --git a/scipy/linalg/tests/test_batch.py b/scipy/linalg/tests/test_batch.py index 7a32e7a5cd66..7871a31ffa1f 100644 --- a/scipy/linalg/tests/test_batch.py +++ b/scipy/linalg/tests/test_batch.py @@ -441,8 +441,8 @@ def test_solve(self, bdim, dtype): if len(bdim) == 1: x = x[..., np.newaxis] b = b[..., np.newaxis] - assert_allclose(A @ x - b, 0, atol=1.5e-6) - assert_allclose(x, np.linalg.solve(A, b), atol=2e-6) + assert_allclose(A @ x - b, 0, atol=2e-6) + assert_allclose(x, np.linalg.solve(A, b), atol=3e-6) @pytest.mark.parametrize('bdim', [(5,), (5, 4), (2, 3, 5, 4)]) @pytest.mark.parametrize('dtype', floating) @@ -455,8 +455,8 @@ def test_lu_solve(self, bdim, dtype): if len(bdim) == 1: x = x[..., np.newaxis] b = b[..., np.newaxis] - assert_allclose(A @ x - b, 0, atol=1.5e-6) - assert_allclose(x, np.linalg.solve(A, b), atol=2e-6) + assert_allclose(A @ x - b, 0, atol=2e-6) + assert_allclose(x, np.linalg.solve(A, b), atol=3e-6) @pytest.mark.parametrize('l_and_u', [(1, 1), ([2, 1, 0], [0, 1 , 2])]) @pytest.mark.parametrize('bdim', [(5,), (5, 4), (2, 3, 5, 4)]) diff --git a/scipy/meson.build b/scipy/meson.build index b3803a4f4592..218ef4cdacd9 100644 --- a/scipy/meson.build +++ b/scipy/meson.build @@ -218,22 +218,30 @@ endif # 2. targets with #include's (due to no `depend_files` - see feature request # at meson#8295) f2py_gen = generator(generate_f2pymod, - arguments : ['@INPUT@', '-o', '@BUILD_DIR@'] + f2py_freethreading_arg, + arguments : ['@INPUT@', '-o', '@BUILD_DIR@', '@EXTRA_ARGS@'] + f2py_freethreading_arg, output : ['_@BASENAME@module.c', '_@BASENAME@-f2pywrappers.f'], ) -# TODO: 64-bit BLAS and LAPACK -# -# Note that this works as long as BLAS and LAPACK are detected properly via -# pkg-config. By default we look for OpenBLAS, other libraries can be configured via -# `meson configure -Dblas=blas -Dlapack=lapack` (example to build with Netlib -# BLAS and LAPACK). -# For MKL and for auto-detecting one of multiple libs, we'll need a custom -# dependency in Meson (like is done for scalapack) - see -# https://github.com/mesonbuild/meson/issues/2835 +# Start of BLAS/LAPACK detection + blas_name = get_option('blas') lapack_name = get_option('lapack') +blas_symbol_suffix = get_option('blas-symbol-suffix') +use_ilp64 = get_option('use-ilp64') + +# MKL-specific options +_threading_opt = get_option('mkl-threading') +if _threading_opt == 'auto' + # Switch default to iomp once conda-forge missing openmp.pc issue is fixed + mkl_opts = ['threading: seq'] +else + mkl_opts = ['threading: ' + _threading_opt] +endif +blas_opts = {'mkl': mkl_opts} +mkl_version_req = '>=2023.0' # see gh-24824 +mkl_may_use_sdl = not use_ilp64 and _threading_opt in ['auto', 'iomp'] + macOS13_3_or_later = false if host_machine.system() == 'darwin' @@ -268,6 +276,7 @@ if blas_name == 'openblas' or blas_name == 'auto' endif endif +# Try any other openblas # pkg-config uses a lower-case name while CMake uses a capitalized name, so try # that too to make the fallback detection with CMake work if blas_name == 'openblas' @@ -275,10 +284,7 @@ if blas_name == 'openblas' elif blas_name != 'scipy-openblas' # if so, we found it already blas = dependency(blas_name) endif -blas_dep = declare_dependency( - dependencies: blas, - compile_args: _args_blas_lapack -) + if blas_name == 'blas' # Netlib BLAS has a separate `libcblas.so` which we use directly in the g77 # ABI wrappers, so detect it and error out if we cannot find it. @@ -290,6 +296,30 @@ else cblas = [] endif +if blas_name == 'mkl' + blas = dependency('mkl', + modules: ['interface: lp64'] + mkl_opts, + required: false, # may be required, but we need to emit a custom error message + version: mkl_version_req, + ) + # Insert a second try with MKL, because we may be rejecting older versions + # or missing it because no pkg-config installed. If so, we need to retry + # with MKL SDL, and drop the version constraint (this always worked). + if not blas.found() and mkl_may_use_sdl + blas = dependency('mkl', modules: ['sdl: true'], required: false) + endif +endif + +# fallback BLAS detection +blas_dep = declare_dependency( + dependencies: blas, + compile_args: _args_blas_lapack +) + +if not blas.found() + error('No BLAS library detected! SciPy needs one, please install it.') +endif + if 'mkl' in blas.name() or blas.name().to_lower() == 'accelerate' or blas_name == 'scipy-openblas' # For these libraries we know that they contain LAPACK, and it's desirable to # use that - no need to run the full detection twice. @@ -299,6 +329,11 @@ elif lapack_name == 'openblas' else lapack = dependency(lapack_name) endif + +if not lapack.found() + error('No LAPACK library detected! SciPy needs one, please install it.') +endif + lapack_dep = declare_dependency( dependencies: lapack, compile_args: _args_blas_lapack @@ -310,12 +345,11 @@ dependency_map = { 'PYBIND11': pybind11_dep, } -# FIXME: conda-forge sets MKL_INTERFACE_LAYER=LP64,GNU, see gh-11812. -# This needs work on gh-16200 to make MKL robust. We should be -# requesting `mkl-dynamic-lp64-seq` here. And then there's work needed -# in general to enable the ILP64 interface (also for OpenBLAS). -uses_mkl = blas.name().to_lower().startswith('mkl') or lapack.name().to_lower().startswith('mkl') -uses_accelerate = blas.name().to_lower().startswith('accelerate') or lapack.name().to_lower().startswith('accelerate') +# NB: from this point on blas_name is e.g. 'mkl-lp64-dynamic-seq' +blas_name = blas.name() +lapack_name = lapack.name() +uses_mkl = blas_name.to_lower().startswith('mkl') +uses_accelerate = blas_name.to_lower().startswith('accelerate') use_g77_abi = uses_mkl or uses_accelerate or get_option('use-g77-abi') if use_g77_abi g77_abi_wrappers = static_library( @@ -333,6 +367,107 @@ else ) endif +# Reuse the names, so we ensure we don't lose the arguments wrapped in with +# declare_dependency. Also, avoids changing `dependencies: blas` to blas_dep in other files. +# XXX: unused, remove? +#blas = blas_dep +#lapack = lapack_dep + +# Run ILP64 BLAS detection, if asked +if use_ilp64 + # Okay, we need ILP64 BLAS and LAPACK *in addition to LP64*. So we need to + # detect the ILP64 variants of the found LP64 libraries now. + _args_blas_ilp64 = ['-DHAVE_BLAS_ILP64'] + c_flags_ilp64 = ['-DHAVE_BLAS_ILP64'] + blas_interface = ['interface: ilp64'] + + if 'openblas' in blas_name + _args_blas_ilp64 += ['-DOPENBLAS_ILP64_NAMING_SCHEME'] + endif + + # Run the detection + if uses_mkl + mkl_uses_sdl = false # FIXME, why + if mkl_uses_sdl + mkl_opts = ['sdl: true'] + endif + blas_ilp64 = dependency('mkl', modules: ['interface: ilp64'] + mkl_opts) + lapack_ilp64 = blas_ilp64 + + _args_blas_ilp64 += [ + '-DBLAS_SYMBOL_SUFFIX=_64', + '-DFIX_MKL_2025_ILP64_MISSING_SYMBOL' + ] + c_flags_ilp64 += ['-DBLAS_SYMBOL_SUFFIX=_64'] + + elif blas_name == 'scipy-openblas' + # scipy_openblas64, a separate library + blas_ilp64 = dependency('scipy-openblas64') + lapack_ilp64 = blas_ilp64 + + if not blas_ilp64.found() + error('scipy-openblas64 not found.') + endif + + elif blas_name == 'accelerate' + blas_ilp64 = dependency(blas_name, modules: blas_interface) + _args_blas_ilp64 += ['-DACCELERATE_NEW_LAPACK'] + else + # XXX: ILP64 detection has only been tested for MKL and scipy-openblas64 + if blas_name == 'openblas' + # We cannot allow plain openblas here, that's already the LP64 library and + # will lead to problems (there is, as of now, no combined OpenBLAS build + # with 32 and 64 bit symbols) + blas_name = ['openblas64', 'openblas_ilp64'] + endif + + blas_ilp64 = dependency(blas_name, modules: blas_interface) + lapack_ilp64 = dependency(lapack_name, modules: ['lapack'] + blas_interface) + endif + + # Pick up the symbol suffix, it may be auto-detected by Meson and different from LP64 + if blas_symbol_suffix == 'auto' + if blas_name == 'scipy-openblas' + blas_symbol_suffix = '64_' + else + blas_symbol_suffix = blas_ilp64.get_variable('symbol_suffix', default_value: '') + endif + message(f'BLAS symbol suffix (ILP64): @blas_symbol_suffix@') + endif + _blas_incdir = [] + if blas_symbol_suffix != '' + _args_blas_ilp64 += ['-DBLAS_SYMBOL_SUFFIX=' + blas_symbol_suffix] + _blas_incdir = ['.'] + endif + # Declare the ILP64 dependencies + message('BLAS / LAPACK ILP64 detected: ', blas_ilp64.name(), ', ', lapack_ilp64.name()) + + blas_ilp64 = declare_dependency( + dependencies: [blas_ilp64], + compile_args: _args_blas_ilp64, + include_directories: _blas_incdir, + ) + lapack_ilp64 = declare_dependency(dependencies: [lapack_ilp64, blas_ilp64]) + + g77_abi_wrappers_ilp64 = static_library( + 'g77_abi_wrappers_ilp64', + ['_build_utils/src/wrap_g77_abi.c'], + dependencies: [py3_dep, blas_ilp64, np_dep], + c_args: _args_blas_ilp64, + gnu_symbol_visibility: 'hidden', + ) +else + # we're not using ILP64; user code will link to the always-available LP64 blas/lapack + # (all users must use preprocessor macros BLAS_NAME to handle the two options) + blas_ilp64 = blas + lapack_ilp64 = lapack + c_flags_ilp64 = [] + message('LAPACK ILP64 not requested / not detected.') +endif + +# End of BLAS/LAPACK handling + + scipy_dir = py3.get_install_dir() / 'scipy' # Generate version.py for sdist @@ -380,18 +515,41 @@ _cython_tree = [fs.copyfile('__init__.py')] cython_args = ['-3', '--fast-fail', '--output-file', '@OUTPUT@', '--include-dir', '@BUILD_ROOT@', '@INPUT@'] if cy.version().version_compare('>=3.1.0') cython_args += ['-Xfreethreading_compatible=True'] + + cython_shared_src = custom_target( + install: false, + output: '_cyutility.c', + command: [ + cython, '-3', '--fast-fail', '-Xfreethreading_compatible=True', + '--generate-shared=' + meson.current_build_dir()/'_cyutility.c' + ], + ) + + cython_shared_module = py3.extension_module('_cyutility', + cython_shared_src, + subdir: 'scipy', + cython_args: cython_args, + install: true, + install_tag: 'python-runtime', + ) + + cython_args += ['--shared=scipy._cyutility'] +else + cython_shared_module = [] endif cython_cplus_args = ['--cplus'] + cython_args cython_gen = generator(cython, arguments : cython_args, output : '@BASENAME@.c', - depends : _cython_tree) + depends : [_cython_tree, cython_shared_module] +) cython_gen_cpp = generator(cython, arguments : cython_cplus_args, output : '@BASENAME@.cpp', - depends : [_cython_tree]) + depends : [_cython_tree, cython_shared_module] +) if use_pythran # TODO: add argument to mark extension modules as safe to run without the GIL, @@ -479,13 +637,51 @@ fortran_ignore_warnings = ff.get_supported_arguments( # Intel Fortran (ifort) does not run the preprocessor by default, if Fortran # code uses preprocessor statements, add this compile flag to it. -_fflag_fpp = [] -if ff.get_id() in ['intel-cl', 'intel-llvm-cl'] - if is_windows - _fflag_fpp = ff.get_supported_arguments('/fpp') + +# Gfortran does run the preprocessor for .F files, and PROPACK is the only +# component which needs the preprocessor (unless we need symbol renaming for +# blas_symbol_suffix). +_fflag_preprocess = [] +_gfortran_preprocess = ['-cpp', '-ffree-line-length-none', '-ffixed-line-length-none'] +if ff.has_multi_arguments(_gfortran_preprocess) + _fflag_preprocess = _gfortran_preprocess +else + _fflag_preprocess = ff.first_supported_argument(['-fpp', '/fpp', 'cpp']) +endif + +_fflag_lp64 = [] +_fflag_ilp64 = [] +f2py_ilp64_opts = [] +if use_ilp64 + # Gfortran and Clang use `-fdefault-integer-8` to switch to 64-bit integers by + # default, all other known compilers use `-i8` + _fflag_ilp64 = ff.first_supported_argument(['-fdefault-integer-8', '-i8']) + + # Write out a mapping file for f2py for defaulting to ILP64 + conf_data = configuration_data() + if cc.sizeof('long') == 8 + conf_data.set('int64_name', 'long') + elif cc.sizeof('long long') == 8 + conf_data.set('int64_name', 'long long') else - _fflag_fpp = ff.get_supported_arguments('-fpp') + error('Neither `long` nor `long long` is 64-bit, giving up.') endif + int64_f2cmap = configure_file( + input: '_build_utils/int64.f2cmap.in', + output: 'int64.f2cmap', + configuration: conf_data, + install: false, + ) + f2py_ilp64_opts = ['--f2cmap', int64_f2cmap] +endif + +if blas_symbol_suffix != '' + # We need to patch source files that use BLAS/LAPACK symbols. + # In addition, we now need to enable the Fortran preprocessor on all targets + # that depend on BLAS/LAPACK + # Note: this came from `scipy/build_utils/_fortran.py` in the distutils build. + _fflag_lp64 += _fflag_preprocess # TODO: propagate _fflag_lp64 + _fflag_ilp64 += _fflag_preprocess endif # Deal with M_PI & friends; add `use_math_defines` to c_args or cpp_args @@ -625,6 +821,17 @@ lapack_dep = declare_dependency( link_with: [g77_abi_wrappers, blas_lapack_wrapper_lib] ) +if use_ilp64 + blas_ilp64 = declare_dependency( + dependencies: blas_ilp64, + link_with: [g77_abi_wrappers_ilp64] + ) + lapack_ilp64 = declare_dependency( + dependencies: lapack_ilp64, + link_with: [g77_abi_wrappers_ilp64] + ) +endif + subdir('_lib') subdir('special') subdir('linalg') diff --git a/scipy/ndimage/_filters.py b/scipy/ndimage/_filters.py index 0ee98674ed6d..e26b9329570b 100644 --- a/scipy/ndimage/_filters.py +++ b/scipy/ndimage/_filters.py @@ -36,6 +36,7 @@ import math from scipy._lib._util import normalize_axis_index +from scipy._lib._array_api import array_namespace, is_cupy, xp_size from . import _ni_support from . import _nd_image from . import _ni_docstrings @@ -53,8 +54,10 @@ def _vectorized_filter_iv(input, function, size, footprint, output, mode, cval, origin, axes, batch_memory): + xp = array_namespace(input, footprint, output) + # vectorized_filter input validation and standardization - input = np.asarray(input) + input = xp.asarray(input) if not callable(function): raise ValueError("`function` must be a callable.") @@ -71,12 +74,12 @@ def _vectorized_filter_iv(input, function, size, footprint, output, mode, cval, if size is not None: # If provided, size must be an integer or tuple of integers. size = (size,)*input.ndim if np.isscalar(size) else tuple(size) - valid = [np.issubdtype(np.asarray(i).dtype, np.integer) and i > 0 for i in size] + valid = [xp.isdtype(xp.asarray(i).dtype, 'integral') and i > 0 for i in size] if not all(valid): raise ValueError("All elements of `size` must be positive integers.") else: # If provided, `footprint` must be array-like - footprint = np.asarray(footprint, dtype=bool) + footprint = xp.asarray(footprint, dtype=xp.bool) size = footprint.shape def footprinted_function(input, *args, axis=-1, **kwargs): return function(input[..., footprint], *args, axis=-1, **kwargs) @@ -108,7 +111,7 @@ def footprinted_function(input, *args, axis=-1, **kwargs): origin = (0,) * n_axes else: origin = (origin,)*n_axes if np.isscalar(origin) else tuple(origin) - integral = [np.issubdtype(np.asarray(i).dtype, np.integer) for i in origin] + integral = [xp.isdtype(xp.asarray(i).dtype, 'integral') for i in origin] if not all(integral): raise ValueError("All elements of `origin` must be integers.") if not len(origin) == n_axes: @@ -117,7 +120,7 @@ def footprinted_function(input, *args, axis=-1, **kwargs): raise ValueError(message) # mode must be one of the allowed strings, and we should convert it to the - # value required by `np.pad` here. + # value required by `np.pad`/`cp.pad` here. valid_modes = {'reflect', 'constant', 'nearest', 'mirror', 'wrap', 'grid-mirror', 'grid-constant', 'grid-wrap', 'valid'} if mode not in valid_modes: @@ -136,20 +139,20 @@ def footprinted_function(input, *args, axis=-1, **kwargs): raise ValueError("Use of `cval` is compatible only with `mode='constant'`.") # `cval` must be a scalar or "broadcastable" to a tuple with the same - # dimensionality of `input`. (Full input validation done by `np.pad`.) - if not np.issubdtype(np.asarray(cval).dtype, np.number): + # dimensionality of `input`. (Full input validation done by `np.pad`/`cp.pad`.) + if not xp.isdtype(xp.asarray(cval).dtype, 'numeric'): raise ValueError("`cval` must include only numbers.") # `batch_memory` must be a positive number. - temp = np.asarray(batch_memory) - if temp.ndim != 0 or (not np.issubdtype(temp.dtype, np.number)) or temp <= 0: + temp = xp.asarray(batch_memory) + if temp.ndim != 0 or (not xp.isdtype(temp.dtype, 'numeric')) or temp <= 0: raise ValueError("`batch_memory` must be positive number.") # For simplicity, work with `axes` at the end. working_axes = tuple(range(-n_axes, 0)) if axes is not None: - input = np.moveaxis(input, axes, working_axes) - output = (np.moveaxis(output, axes, working_axes) + input = xp.moveaxis(input, axes, working_axes) + output = (xp.moveaxis(output, axes, working_axes) if output is not None else output) # Wrap the function to limit maximum memory usage, deal with `footprint`, @@ -159,7 +162,7 @@ def wrapped_function(view, output=output): kwargs = {'axis': working_axes} if working_axes == (): - return footprinted_function(view, **kwargs) + return footprinted_function(xp.asarray(view), **kwargs) # for now, assume we only have to iterate over zeroth axis chunk_size = math.prod(view.shape[1:]) * view.dtype.itemsize @@ -169,9 +172,9 @@ def wrapped_function(view, output=output): elif slices_per_batch == view.shape[0]: if output is None: - return footprinted_function(view, **kwargs) + return footprinted_function(xp.asarray(view), **kwargs) else: - output[...] = footprinted_function(view, **kwargs) + output[...] = footprinted_function(xp.asarray(view), **kwargs) return output for i in range(0, view.shape[0], slices_per_batch): @@ -179,15 +182,16 @@ def wrapped_function(view, output=output): if output is None: # Look at the dtype before allocating the array. (In a follow-up, we # can also look at the shape to support non-scalar elements.) - temp = footprinted_function(view[i:i2], **kwargs) - output = np.empty(view.shape[:-n_axes], dtype=temp.dtype) - output[i:i2] = temp + temp = footprinted_function(xp.asarray(view[i:i2]), **kwargs) + output = xp.empty(view.shape[:-n_axes], dtype=temp.dtype) + output[i:i2, ...] = temp else: - output[i:i2] = footprinted_function(view[i:i2], **kwargs) + output[i:i2, ...] = footprinted_function(xp.asarray(view[i:i2]), + **kwargs) return output return (input, wrapped_function, size, mode, cval, - origin, working_axes, n_axes, n_batch) + origin, working_axes, n_axes, n_batch, xp) @_ni_docstrings.docfiller @@ -405,39 +409,53 @@ def vectorized_filter(input, function, *, size=None, footprint=None, output=None """ # noqa: E501 - (input, function, size, mode, cval, origin, working_axes, n_axes, n_batch + (input, function, size, mode, cval, origin, working_axes, n_axes, n_batch, xp ) = _vectorized_filter_iv(input, function, size, footprint, output, mode, cval, origin, axes, batch_memory) - # `np.pad` raises with these sorts of cases, but the best result is probably - # to return the original array. It could be argued that we should call the - # function on the empty array with `axis=None` just to determine the output + # `np.pad`/`cp.pad` raises with these sorts of cases, but the best result is + # probably to return the original array. It could be argued that we should call + # the function on the empty array with `axis=None` just to determine the output # dtype, but I can also see rationale against that. - if input.size == 0: - return input + if xp_size(input) == 0: + return xp.asarray(input) # This seems to be defined. if input.ndim == 0 and size == (): - return np.asarray(function(input) if footprint is None + return xp.asarray(function(input) if footprint is None else function(input[footprint])) - # Border the image according to `mode` and `offset`. `np.pad` does the work, - # but it uses different names; adjust `mode` accordingly. - # Move this to input validation. + if is_cupy(xp): + # CuPy is the only GPU backend that has `pad` (with all modes) + # and `sliding_window_view`. An enhancement would be to use + # no-copy conversion to CuPy whenever the data is on the GPU. + cp = xp # let there be no ambiguity! + swv = cp.lib.stride_tricks.sliding_window_view + pad = cp.pad + else: + # Try to perform no-copy conversion to NumPy for padding and + # `sliding_window_view`. (If that fails, fine - for now, the only + # GPU backend we support is CuPy.) + swv = np.lib.stride_tricks.sliding_window_view + pad = np.pad + input = np.asarray(input) + cval = np.asarray(cval)[()] if mode == 'constant' else None + + # Border the image according to `mode` and `offset`. if mode != 'valid': kwargs = {'constant_values': cval} if mode == 'constant' else {} borders = tuple((i//2 + j, (i-1)//2 - j) for i, j in zip(size, origin)) - bordered_input = np.pad(input, ((0, 0),)*n_batch + borders, mode=mode, **kwargs) + bordered_input = pad(input, ((0, 0),)*n_batch + borders, mode=mode, **kwargs) else: bordered_input = input # Evaluate function with sliding window view. Function is already wrapped to # manage memory, deal with `footprint`, populate `output`, etc. - view = np.lib.stride_tricks.sliding_window_view(bordered_input, size, working_axes) + view = swv(bordered_input, size, working_axes) res = function(view) # move working_axes back to original positions - return np.moveaxis(res, working_axes, axes) if axes is not None else res + return xp.moveaxis(res, working_axes, axes) if axes is not None else res def _invalid_origin(origin, lenw): @@ -1861,6 +1879,8 @@ def maximum_filter(input, size=None, footprint=None, output=None, A sequence of modes (one per axis) is only supported when the footprint is separable. Otherwise, a single mode string must be provided. + %(nan)s + Examples -------- >>> from scipy import ndimage, datasets diff --git a/scipy/ndimage/_interpolation.py b/scipy/ndimage/_interpolation.py index 8e5ce6bc818d..9527cdce7245 100644 --- a/scipy/ndimage/_interpolation.py +++ b/scipy/ndimage/_interpolation.py @@ -33,6 +33,7 @@ import numpy as np from scipy._lib._util import normalize_axis_index +from scipy._lib import array_api_extra as xpx from scipy import special from . import _ni_support @@ -841,6 +842,12 @@ def zoom(input, zoom, output=None, order=3, mode='constant', cval=0.0, complex_output = np.iscomplexobj(input) output = _ni_support._get_output(output, input, shape=output_shape, complex_output=complex_output) + if all(z == 1 for z in zoom) and prefilter: # early exit for gh-20999 + # zoom 1 means "return original image". If `prefilter=False`, + # `input` is *not* the original image; processing is still needed + # to undo the filter. So we only early exit if `prefilter`. + output = xpx.at(output)[...].set(input) + return output if complex_output: # import under different name to avoid confusion with zoom parameter from scipy.ndimage._interpolation import zoom as _zoom diff --git a/scipy/ndimage/_support_alternative_backends.py b/scipy/ndimage/_support_alternative_backends.py index 88cb6e994932..ad7a2a27a54b 100644 --- a/scipy/ndimage/_support_alternative_backends.py +++ b/scipy/ndimage/_support_alternative_backends.py @@ -23,6 +23,11 @@ def _maybe_convert_arg(arg, xp): return arg +# Some cupyx.scipy.ndimage functions don't exist or are incompatible with +# their SciPy counterparts +CUPY_BLOCKLIST = ['vectorized_filter'] + + def delegate_xp(delegator, module_name): def inner(func): @functools.wraps(func) @@ -30,7 +35,7 @@ def wrapper(*args, **kwds): xp = delegator(*args, **kwds) # try delegating to a cupyx/jax namesake - if is_cupy(xp): + if is_cupy(xp) and func.__name__ not in CUPY_BLOCKLIST: # https://github.com/cupy/cupy/issues/8336 import importlib cupyx_module = importlib.import_module(f"cupyx.scipy.{module_name}") diff --git a/scipy/ndimage/tests/test_filters.py b/scipy/ndimage/tests/test_filters.py index 8cf88b5177ac..f2542aebce9a 100644 --- a/scipy/ndimage/tests/test_filters.py +++ b/scipy/ndimage/tests/test_filters.py @@ -15,7 +15,8 @@ xp_assert_close, xp_assert_equal, ) -from scipy._lib._array_api import is_cupy, is_torch, array_namespace +from scipy._lib._array_api import (is_cupy, is_torch, is_dask, is_jax, array_namespace, + is_array_api_strict, xp_copy) from scipy.ndimage._filters import _gaussian_kernel1d from . import types, float_types, complex_types @@ -2766,6 +2767,8 @@ def test_gh_22333(): assert_array_equal(actual, expected) +@pytest.mark.filterwarnings("ignore:The given NumPy array is not writable:UserWarning") +@pytest.mark.skip_xp_backends(cpu_only=True, exceptions=['cupy']) class TestVectorizedFilter: @pytest.mark.parametrize("axes, size", [(None, (3, 4, 5)), ((0, 2), (3, 4)), ((-1,), (5,))]) @@ -2773,45 +2776,65 @@ class TestVectorizedFilter: @pytest.mark.parametrize("mode", ['reflect', 'nearest', 'mirror', 'wrap', 'constant']) @pytest.mark.parametrize("use_output", [False, True]) - def test_against_generic_filter(self, axes, size, origin, mode, use_output): + def test_against_generic_filter(self, axes, size, origin, mode, use_output, xp): rng = np.random.default_rng(435982456983456987356) + if use_output and (is_dask(xp) or is_jax(xp)): + pytest.skip("Requires mutable arrays.") + input = rng.random(size=(11, 12, 13)) input_copy = input.copy() # check that it is not modified - output = np.zeros_like(input) if use_output else None - - kwargs = dict(axes=axes, size=size, origin=origin, mode=mode, output=output) - ref = ndimage.generic_filter(input, np.mean, **kwargs) - res = ndimage.vectorized_filter(input, np.mean, **kwargs) - xp_assert_close(res, ref, atol=1e-15) - if use_output: - xp_assert_equal(output, res) + output = xp.zeros(input.shape) if use_output else None - kwargs.pop('size') - kwargs['footprint'] = rng.random(size=size or input.shape) > 0.5 + kwargs = dict(axes=axes, size=size, origin=origin, mode=mode) ref = ndimage.generic_filter(input, np.mean, **kwargs) - res = ndimage.vectorized_filter(input, np.mean, **kwargs) - xp_assert_close(res, ref, atol=1e-15) + kwargs['output'] = output + res = ndimage.vectorized_filter(xp.asarray(input.tolist()), + xp.mean, **kwargs) + xp_assert_close(res, xp.asarray(ref.tolist()), atol=1e-15) if use_output: xp_assert_equal(output, res) - xp_assert_equal(input, input_copy) + if not (is_array_api_strict(xp) or is_dask(xp)): + # currently requires support for [..., mask] indexing + kwargs.pop('size') + kwargs.pop('output') + kwargs['footprint'] = rng.random(size=size or input.shape) > 0.5 + ref = ndimage.generic_filter(input, np.mean, **kwargs) + kwargs['footprint'] = xp.asarray(kwargs['footprint']) + kwargs['output'] = output + res = ndimage.vectorized_filter(xp.asarray(input.tolist()), + xp.mean, **kwargs) + xp_assert_close(res, xp.asarray(ref.tolist()), atol=1e-15) + if use_output: + xp_assert_equal(output, res) + + xp_assert_equal(xp.asarray(input), xp.asarray(input_copy)) @pytest.mark.parametrize("dtype", - [np.uint8, np.uint16, np.uint32, np.uint64, - np.int8, np.int16, np.int32, np.int64, - np.float32, np.float64, np.complex64, np.complex128]) + ["uint8", "uint16", "uint32", "uint64", + "int8", "int16", "int32", "int64", + "float32", "float64", "complex64", "complex128"]) @pytest.mark.parametrize("batch_memory", [1, 16*3, np.inf]) @pytest.mark.parametrize("use_footprint", [False, True]) - def test_dtype_batch_memory(self, dtype, batch_memory, use_footprint): + def test_dtype_batch_memory(self, dtype, batch_memory, use_footprint, xp): rng = np.random.default_rng(435982456983456987356) w = 3 + if is_jax(xp) and not (batch_memory == 1): + pytest.skip("Requires mutable array.") + if is_torch(xp) and dtype in {'uint16', 'uint32', 'uint64'}: + pytest.skip("Needs uint support.") + + dtype = getattr(xp, dtype) + if use_footprint: - footprint = np.asarray([True, False, True]) + if (is_dask(xp) or is_array_api_strict(xp)): + pytest.skip("Requires [..., mask] indexing.") + footprint = xp.asarray([True, False, True]) kwargs = dict(footprint=footprint, batch_memory=batch_memory) else: - footprint = np.asarray([True, True, True]) + footprint = xp.asarray([True, True, True]) kwargs = dict(size=w, batch_memory=batch_memory) # The intent here is to exercise all the code paths involved in `batch_memory` @@ -2821,44 +2844,48 @@ def test_dtype_batch_memory(self, dtype, batch_memory, use_footprint): # *won't* fit. n = 16*3 + 1 input = rng.integers(0, 42, size=(n,)) - input = input + input*1j if np.issubdtype(dtype, np.complexfloating) else input - input = input.astype(dtype) + input = input + input*1j if xp.isdtype(dtype, 'complex floating') else input + input_padded = xp.asarray(np.pad(input, [(1, 1)], mode='symmetric'), + dtype=dtype) + input = xp.asarray(input, dtype=dtype) - input2 = np.pad(input, [(1, 1)], mode='symmetric') - ref = [np.sum(input2[i: i + w][footprint]) for i in range(n)] - sum_dtype = np.sum(input2).dtype + ref = [xp.sum(input_padded[i: i + w][footprint]) for i in range(n)] + sum_dtype = xp.sum(input_padded).dtype message = "`batch_memory` is insufficient for minimum chunk size." context = (pytest.raises(ValueError, match=message) if batch_memory == 1 else contextlib.nullcontext()) with context: - res = ndimage.vectorized_filter(input, np.sum, **kwargs) - xp_assert_close(res, np.asarray(ref, dtype=sum_dtype)) + res = ndimage.vectorized_filter(input, xp.sum, **kwargs) + xp_assert_close(res, xp.asarray(ref, dtype=sum_dtype)) assert res.dtype == sum_dtype - output = np.empty_like(input) - res = ndimage.vectorized_filter(input, np.sum, output=output, **kwargs) - xp_assert_close(res, np.asarray(ref, dtype=dtype)) + output = xp.empty_like(input) + res = ndimage.vectorized_filter(input, xp.sum, output=output, **kwargs) + xp_assert_close(res, xp.asarray(ref, dtype=dtype)) assert res.dtype == dtype - def test_mode_valid(self): + def test_mode_valid(self, xp): rng = np.random.default_rng(435982456983456987356) input = rng.random(size=(10, 11)) - input_copy = input.copy() # check that it is not modified + input_xp = xp.asarray(input) + input_xp_copy = xp_copy(input_xp) # check that it is not modified size = (3, 5) - function = np.mean - res = ndimage.vectorized_filter(input, function, size=size, mode='valid') + + res = ndimage.vectorized_filter(input_xp, xp.mean, size=size, mode='valid') + view = np.lib.stride_tricks.sliding_window_view(input, size) - ref = function(view, axis=(-2, -1)) - xp_assert_close(res, ref) - xp_assert_equal(res.shape, input.shape - np.asarray(size) + 1) - xp_assert_equal(input, input_copy) + ref = np.mean(view, axis=(-2, -1)) - def test_input_validation(self): - input = np.ones((10, 10)) - function = np.mean + xp_assert_close(res, xp.asarray(ref)) + assert res.shape == tuple(input.shape - np.asarray(size) + 1) + xp_assert_equal(input_xp, input_xp_copy) + + def test_input_validation(self, xp): + input = xp.ones((10, 10)) + function = xp.mean size = 2 - footprint = np.ones((2, 2)) + footprint = xp.ones((2, 2)) message = "`function` must be a callable." with pytest.raises(ValueError, match=message): @@ -2874,7 +2901,7 @@ def test_input_validation(self): message = "All elements of `size` must be positive integers." with pytest.raises(ValueError, match=message): - ndimage.vectorized_filter(input, function, size=(1, None)) + ndimage.vectorized_filter(input, function, size=(1, -1)) with pytest.raises(ValueError, match=message): ndimage.vectorized_filter(input, function, size=0) @@ -2882,7 +2909,7 @@ def test_input_validation(self): with pytest.raises(ValueError, match=message): ndimage.vectorized_filter(input, function, size=(1, 2, 3)) with pytest.raises(ValueError, match=message): - ndimage.vectorized_filter(input, function, footprint=np.ones((2, 2, 2))) + ndimage.vectorized_filter(input, function, footprint=xp.ones((2, 2, 2))) message = "`axes` must be provided if the dimensionality..." with pytest.raises(ValueError, match=message): @@ -2890,7 +2917,7 @@ def test_input_validation(self): message = "All elements of `origin` must be integers" with pytest.raises(ValueError, match=message): - ndimage.vectorized_filter(input, function, size=size, origin=(1, None)) + ndimage.vectorized_filter(input, function, size=size, origin=(1, 1.5)) message = "`origin` must be an integer or tuple of integers with length..." with pytest.raises(ValueError, match=message): @@ -2909,44 +2936,45 @@ def test_input_validation(self): with pytest.raises(ValueError, match=message): ndimage.vectorized_filter(input, function, size=size, mode='valid', cval=1) - message = "`cval` must include only numbers." - with pytest.raises(ValueError, match=message): + other_messages = "|Unsupported|The array_api_strict|new|Value 'a duck'" + message = "`cval` must include only numbers." + other_messages + with pytest.raises((ValueError, TypeError), match=message): ndimage.vectorized_filter(input, function, size=size, - mode='constant', cval='a duck') + mode='constant', cval='a duck') - message = "`batch_memory` must be positive number." + message = "`batch_memory` must be positive number." + other_messages with pytest.raises(ValueError, match=message): ndimage.vectorized_filter(input, function, size=size, batch_memory=0) with pytest.raises(ValueError, match=message): ndimage.vectorized_filter(input, function, size=size, batch_memory=(1, 2)) - with pytest.raises(ValueError, match=message): - ndimage.vectorized_filter(input, function, size=size, - batch_memory="shrubbery") + with pytest.raises((ValueError, TypeError), match=message): + ndimage.vectorized_filter(input, function, size=size, batch_memory="a duck") @pytest.mark.parametrize('shape', [(0,), (1, 0), (0, 1, 0)]) - def test_zero_size(self, shape): - input = np.empty(shape) - res = ndimage.vectorized_filter(input, np.mean, size=1) + def test_zero_size(self, shape, xp): + input = xp.empty(shape) + res = ndimage.vectorized_filter(input, xp.mean, size=1) xp_assert_equal(res, input) - def test_edge_cases(self): + @pytest.mark.filterwarnings("ignore:Mean of empty slice.:RuntimeWarning") + def test_edge_cases(self, xp): rng = np.random.default_rng(4835982345234982) - function = np.mean + function = xp.mean # 0-D input - input = np.asarray(1) - res = ndimage.vectorized_filter(1, function, size=()) - xp_assert_equal(res, np.asarray(function(input, axis=()))) + input = xp.asarray(1.) + res = ndimage.vectorized_filter(input, function, size=()) + xp_assert_equal(res, xp.asarray(function(input, axis=()))) - res = ndimage.vectorized_filter(1, function, footprint=True) - xp_assert_equal(res, np.asarray(function(input[True], axis=()))) + if not (is_array_api_strict(xp) or is_dask(xp)): + res = ndimage.vectorized_filter(input, function, footprint=True) + xp_assert_equal(res, xp.asarray(function(input[True], axis=()))) - with pytest.warns(RuntimeWarning, match="Mean of empty slice."): - res = ndimage.vectorized_filter(1, function, footprint=False) - xp_assert_equal(res, np.asarray(function(input[False], axis=()))) + res = ndimage.vectorized_filter(input, function, footprint=False) + xp_assert_equal(res, xp.asarray(function(input[False], axis=()))) # 1x1 window - input = rng.random((5, 5)) + input = xp.asarray(rng.random((5, 5))) res = ndimage.vectorized_filter(input, function, size=1) xp_assert_equal(res, input) diff --git a/scipy/ndimage/tests/test_interpolation.py b/scipy/ndimage/tests/test_interpolation.py index 08edb9219c8c..03cbcdc4a19c 100644 --- a/scipy/ndimage/tests/test_interpolation.py +++ b/scipy/ndimage/tests/test_interpolation.py @@ -1323,6 +1323,23 @@ def test_zoom_0d_array(self, xp): expected = ndimage.zoom(a, factor) xp_assert_close(actual, expected) + @xfail_xp_backends("cupy", reason="CuPy `zoom` needs similar fix.") + def test_zoom_1_gh20999(self, xp): + # gh-20999 reported that zoom with `zoom=1` (or sequence of ones) + # introduced noise. Check that this is resolved. + x = xp.eye(3) + xp_assert_equal(ndimage.zoom(x, 1), x) + xp_assert_equal(ndimage.zoom(x, (1, 1)), x) + + @xfail_xp_backends("cupy", reason="CuPy `zoom` needs similar fix.") + @skip_xp_backends("jax.numpy", reason="read-only backend") + @xfail_xp_backends("dask.array", reason="numpy round-trip") + def test_zoom_1_gh20999_output(self, xp): + x = xp.eye(3) + output = xp.zeros_like(x) + ndimage.zoom(x, 1, output=output) + xp_assert_equal(output, x) + class TestRotate: diff --git a/scipy/optimize/__nnls.c b/scipy/optimize/__nnls.c new file mode 100644 index 000000000000..480b0e1aa64d --- /dev/null +++ b/scipy/optimize/__nnls.c @@ -0,0 +1,268 @@ +#include "__nnls.h" +#include + +/* Algorithm NNLS: NONNEGATIVE LEAST SQUARES +* +* Given an m by n matrix A, an m-vector B, and an n-vector X, compute an +* n-vector X which solves the least squares problem +* +* a * x = b subject to x >= 0 +* +* This is a C translation of the original Fortran code, which was developed by +* Charles L. Lawson and Richard J. Hanson at Jet Propulsion Laboratory +* 1973 JUN 15, and published in the book "SOLVING LEAST SQUARES PROBLEMS", +* Prentice-HalL, 1974. Revised FEB 1995 to accompany reprinting of the book +* (DOI: 10.1137/1.9781611971217) by SIAM. +* +*/ +void +__nnls(const int m, const int n, double* restrict a, double* restrict b, + double* restrict x, double* restrict w, double* restrict zz, + int* restrict indices, const int maxiter, double* rnorm, int* info) +{ + int i = 0, ii = 0, ip = 0, indz = 0, iteration = 0, iz = 0, izmax = 0; + int j = 0, jj = 0, k = 0, one = 1, tmpint = 0; + double tau = 0.0, unorm = 0.0, ztest, alpha, cc, ss, wmax, T, tmp_work; + double pivot = 1.0, pivot2 = 0.0, tmp = 0.0, spacing = 0.0; + *info = 1; + if (m <= 0 || n <= 0) + { + *info = 2; + return; + } + + // Initialize the indices and the solution vector x. + for (i = 0; i < n; i++) { indices[i] = i; } + for (i = 0; i < n; i++) { x[i] = 0.0; } + + // Outer loop + while (indz < (m < n ? m : n)) + { + // Compute the dual vector components in set Z. + // Essentially a permuted gemv operation via BLAS ddot, in NumPy notation; + // w[indices[indz:]] = A[indz:m, indices[indsz:]] @ b[indz:m] + for (i = indz; i < n; i++) + { + j = indices[i]; + tmpint = m - indz; + w[j] = ddot_(&tmpint, &a[indz + j*m], &one, &b[indz], &one); + } + + // Find the next linearly independent column that corresponds to the + // largest entry in the dual vector w. + // ==================================================================== + while (1) + { + // Finding the largest w[j] and its index + // izmax, wmax = argmax(w[indices[indz:]]) + wmax = 0.0; + for (k = indz; k < n; k++) + { + j = indices[k]; + if (w[j] > wmax) { wmax = w[j]; izmax = k; } + } + // If wmax <= 0.0, terminate since this is a KKT certificate. + if (wmax <= 0.0) { goto END; } + iz = izmax; + j = indices[iz]; + + // The sign of wmax is OK for j to be moved to set p. Begin the + // transformation and check new diagonal element to avoid near-linear + // dependence. + pivot = a[indz + j*m]; + tmpint = m - indz; + dlarfgp_(&tmpint, &pivot, &a[indz + 1 + j*m], &one, &tau); + + // Compute the norm of a[0:indz, j] to check for linear dependence. + unorm = (indz > 0 ? dnrm2_(&indz, &a[j*m], &one) : 0.0); + // unorm is nonnegative + spacing = (unorm > 0.0 ? nextafter(unorm, 2*unorm) - unorm : 0.0); + + // Test for independence by checking the pivot for zero. + if (fabs(pivot) > 100.0*spacing) + { + // Column j is sufficiently independent. Copy b into zz and solve + // for ztest which is the new prospective value for x[j]. + for (i = 0; i < m; i++) { zz[i] = b[i]; } + tmpint = m - indz; + pivot2 = a[indz + j*m]; + a[indz + j*m] = 1.0; + dlarf_("L", &tmpint, &one, &a[indz + j*m], &one, &tau, &zz[indz], &tmpint, &tmp_work); + // See if ztest is positive. This is from the original F77 code. + // Probably better to use a sign test instead of a division. + ztest = zz[indz] / pivot; + if (ztest > 0.0) + { + break; + } else { + a[indz + j*m] = pivot2; + } + } + // Reject j as a candidate to be moved from set z to set p. + // a(indz,j) is restored, set w(j)=0., and loop back to test dual + // coeffs again. + w[j] = 0.0; + } + // ==================================================================== + + // the index j=indices[iz] has been selected to be moved from set z to + // set p. Update b, update indices, apply householder transformations to + // cols in new set z, zero subdiagonal elements in col j, set w(j)=0. + for (i = 0; i < m; i++) { b[i] = zz[i]; } + indices[iz] = indices[indz]; + indices[indz] = j; + indz++; + // Apply the householder transformation to the remaining columns. + if (indz < n) + { + tmpint = m - indz + 1; + for (k = indz; k < n; k++) + { + jj = indices[k]; + dlarf_("L", &tmpint, &one, &a[indz - 1 + j*m], &one, &tau, &a[indz - 1 + jj*m], &tmpint, &tmp_work); + } + } + // Restore the pivot element into a, zero the subdiagonal elements in col j + a[indz - 1 + j*m] = pivot; + if (indz < m) { for (i = indz; i < m; i++) { a[j*m + i] = 0.0; } } + // Zero the dual coefficient for the column. + w[j] = 0.0; + + // Solve the permuted triangular system, store in zz. + for (k = 0; k < indz; k++) + { + // ip traverses the indices of P set in reverse + ip = indz - 1 - k; + if (k != 0) + { + for (i = 0; i <= ip; i++) + { + zz[i] = zz[i] - a[i + jj*m] * zz[ip + 1]; + } + } + jj = indices[ip]; + zz[ip] = zz[ip] / a[ip + jj*m]; + } + + // ****** Inner loop ****** + while (1) + { + iteration++; + if (iteration >= maxiter) { *info = 3; goto END; } + + // See if all new constrained coefficients are feasible, + // if not compute alpha + alpha = 2.0; + for (ip = 0; ip < indz; ip++) + { + k = indices[ip]; + if (zz[ip] <= 0.0) + { + T = -x[k] / (zz[ip] - x[k]); + if (alpha > T) + { + alpha = T; + jj = ip; + } + } + } + // If all new constrained coefficients are feasible, alpha is still + // 2.0. If so exit from secondary loop to main loop. + if (alpha == 2.0) { break; } // Get back to outer loop + + // Otherwise interpolate between old x and zz. + for (ip = 0; ip < indz; ip++) + { + k = indices[ip]; + x[k] = x[k] + alpha*(zz[ip] - x[k]); + } + + // Modify a, b, and the indicies to move coefficient i from set p + // to set z. While loop simulates a goto in the original F77 code. + i = indices[jj]; + while (1) + { + x[i] = 0.0; + + if (jj != indz-1) + { + jj++; + for (j = jj; j < indz; j++) + { + ii = indices[j]; + indices[j-1] = ii; + dlartgp_(&a[j-1 + ii*m], &a[j + ii*m], &cc, &ss, &a[j-1 + ii*m]); + a[j + ii*m] = 0.0; + // Apply the Givens rotation to all columns except ii. + // Because the columns are not ordered we do it manually. + for (k = 0; k < n; k++) + { + if (k != ii) + { + tmp = a[j-1 + k*m]; + a[j-1 + k*m] = cc*tmp + ss*a[j + k*m]; + a[j + k*m] = -ss*tmp + cc*a[j + k*m]; + } + } + tmp = b[j-1]; + b[j-1] = cc*tmp + ss*b[j]; + b[j] = -ss*tmp + cc*b[j]; + } + } + indz--; + indices[indz] = i; + + // See if remaining coefficients in set P are feasible + // since determination of alpha guarantees it. If still + // there are infeasible ones, they are due to numerical + // noise. Any that are nonpositive will be set to zero + // and moved from set p to set z. + int nobreak = 0; + for (jj = 0; jj < indz; jj++) + { + i = indices[jj]; + if (x[i] <= 0.0) { break; } + if (jj == indz - 1) { nobreak = 1; } + } + // If for loop completes without break, then leave the while loop + if (nobreak) { break; } + } + + for (i = 0; i < m; i++) { zz[i] = b[i]; } + for (k = 0; k < indz; k++) + { + ip = indz - 1 - k; + if (k != 0) + { + for (i = 0; i <= ip; i++) + { + zz[i] = zz[i] - a[i + jj*m] * zz[ip + 1]; + + } + } + jj = indices[ip]; + zz[ip] = zz[ip] / a[ip + jj*m]; + } + // ****** end of inner loop ****** + } + + // Back in the outer loop + for (k = 0; k < indz; k++) + { + i = indices[k]; + x[i] = zz[k]; + } + // ****** end of outer loop ****** + } +END: + // Compute the residual vector and its norm. + if (indz < m) + { + tmpint = m - indz; + *rnorm = dnrm2_(&tmpint, &b[indz], &one); + } else { + for (i = 0; i < n; i++) { w[i] = 0.0; } + *rnorm = 0.0; + } + return; +} diff --git a/scipy/optimize/__nnls.h b/scipy/optimize/__nnls.h new file mode 100644 index 000000000000..5644e6859eb0 --- /dev/null +++ b/scipy/optimize/__nnls.h @@ -0,0 +1,17 @@ +#ifndef __NNLS_H +#define __NNLS_H +#include + +double ddot_(int* n, double* dx, int* incx, double* dy, int* incy); +void dlarf_(char* side, int* m, int* n, double* v, int* incv, double* tau, double* c, int* ldc, double* work); +void dlarfgp_(int* n, double* alpha, double* x, int* incx, double* tau); +void dlartgp_(double* f, double* g, double* cs, double* sn, double* r); +double dnrm2_(int* n, double* x, int* incx); + +void +__nnls(const int m, const int n, double* restrict a, double* restrict b, + double* restrict x, double* restrict w, double* restrict zz, + int* restrict indices, const int maxiter, double* rnorm, int* info); + + +#endif diff --git a/scipy/optimize/__slsqp.c b/scipy/optimize/__slsqp.c new file mode 100644 index 000000000000..2a9c0716f372 --- /dev/null +++ b/scipy/optimize/__slsqp.c @@ -0,0 +1,1010 @@ +#include "__slsqp.h" + +void __nnls(const int m, const int n, double* restrict a, double* restrict b, double* restrict x, double* restrict w, double* restrict zz, int* restrict indices, const int maxiter, double* rnorm, int* info); +static void ldp(int m, int n, double* g, double* h, double* x, double* buffer, int* indices, double* xnorm, int* mode); +static void lsi(int me, int mg, int n, double* e, double* f, double* g, double* h, double* x, double* buffer, int* jw, double* xnorm, int* mode); +static void lsei(int ma, int me, int mg, int n, double* a, double* b, double* e, double* f, double* g, double* h, double* x, double* buffer, int* jw, double* xnorm, int* mode); +static void lsq(int m, int meq, int n, int augment, double aug_weight, double* Lf, double* gradx, double* C, double* d, double* xl, double* xu, double* x, double* y, double* buffer, int* jw, int* mode); +static void ldl_update(int n, double* a, double* z, double sigma, double* w); + +/* + * The main SLSQP function. The function argument naming in the Fortran code is + * exceedingly inconsistent and very difficult to follow. Hence we adopted the + * following naming convention in SLSQP and the nested function arguments: + * + * - funx: The function value at the current point. (1) + * - gradx: The gradient of the function at the current point. (n) + * - C: The equality and inequality constraint normals. (m x n) + * - d: The equality and inequality constraints, (m) + * - xl: The lower bounds on x, (n) + * - xu: The upper bounds on x, (n) + * - sol: The solution vector, (n) + * - mult: The Lagrange multipliers, (m + 2*n + 2) + * - buffer: A buffer to hold various intermediate arrays. + * - indices: An array to hold the indices of the active constraints. (m + 2*n + 2) + * + * The buffer size should be greater than: + * n*(n+1)//2 + m + 4*n + 3 # SLSQP + * (n+1)*(n+2) + (n+1)*meq + m + (mineq + 2*n + 2)*(n+1) + 3*n + 3 # LSQ + * mineq + 2n + 2 + 2*meq + (n+1) + (mineq + 3n + 3)*(n + 1 - meq) # LSEI + * (mineq + 2n + 2 + 2)*(n + 2) + mineq + 2n + 2 # LDP + * mineq + 2n + 2 # NNLS + * + * + * If applicable, the following are the problem matrix naming convention: + * + * - A: The coefficient matrix of cost function |Ax - b| + * - b: The RHS of cost function |Ax - b| + * - E: The (E)quality constraint matrix of Ex = f + * - f: The equality constraint RHS of Ex = f + * - G: The inequality constraint matrix of Gx >= h + * - h: The inequality constraint RHS of Gx >= h + * + */ +void +__slsqp_body( + struct SLSQP_vars* S, double* funx, double* restrict gradx, + double* restrict C, double* restrict d, double* restrict sol, + double* restrict mult, double* restrict xl, double* restrict xu, double* buffer, + int* indices) +{ + + int one = 1, lda = (S->m > 0 ? S->m : 1); + int j; + double done = 1.0, dmone = -1.0, alfmin = 0.1; + int n = S->n; + int m = S->m; + int n1 = n + 1; + int n2 = n1*n/2; + + // Chop the buffer for various array pointers. + double* restrict bfgs = &buffer[0]; + double* restrict x0 = &buffer[n2]; + double* restrict mu = &buffer[n2 + n]; + double* restrict s = &buffer[n2 + n + m]; + double* restrict u = &buffer[n2 + n + m + n1]; + double* restrict v = &buffer[n2 + n + m + n1 + n1]; + double* restrict lsq_buffer = &buffer[n2 + n + m + n1 + n1 + n1]; + + // The badlin flag keeps track whether the SQP problem on the current + // iteration was inconsistent or not. + int badlin = 0; + + // Fortran code uses reverse communication for the iterations hence it + // needs to jump back to where it left off. Thus the goto statements are + // kept as is. Fortunately, they do not overlap too much and have a relatively + // clean separation. + if (S->mode == 0) { goto MODE0; } + if (S->mode == -1) { goto MODEM1; } + if (S->mode == 1) { goto MODE1; } + +MODE0: + // We always use inexact line search, since exact search is broken in the + // original Fortran code. + S->exact = 0; // (S->acc < 0.0 ? 1 : 0); + S->acc = fabs(S->acc); + S->tol = 10*S->acc; + S->iter = 0; + S->reset = 0; + for (int i = 0; i < n; i++) { s[i] = 0.0; } + for (int i = 0; i < m; i++) { mu[i] = 0.0; } + +RESET_BFGS: + // Reset the BFGS matrix stored in packed format + S->reset++; + if (S->reset > 5) { goto LABEL255;} + for (int i = 0; i < n2; i++) { bfgs[i] = 0.0; } + j = 0; + for (int i = 0; i < n; i++) + { + bfgs[j] = 1.0; + j += n - i; + } + // 120 + +ITER_START: + // Main iteration: Search direction, steplength, LDL'-update + // 130 + S->mode = 9; + if (S->iter >= S->itermax) { return; } + S->iter++; + + // Search direction as solution of the QP-problem + for (int i = 0; i < n; i++) + { + u[i] = -sol[i] + xl[i] ; + v[i] = -sol[i] + xu[i] ; + } + + S->h4 = 1.0; + // augment and aug_weight are not used and hence 0. + lsq(m, S->meq, n, 0, 0, bfgs, gradx, C, d, u, v, s, mult, lsq_buffer, indices, &S->mode); + + // Augmented problem for inconsistent linearization + + // If it turns out that the original SQP problem is inconsistent, + // disallow termination with convergence on this iteration, + // even if the augmented problem was solved. + badlin = 0; + + // If equality constraints are not full rank and all are equality constrained + // then the problem is inconsistent. + if ((S->mode == 6) && (n == S->meq)) { S->mode = 4;} + + // If inconsistency detected, we augment the problem and try again. + // Fortran code augments the problem matrices by embedding them in larger + // buffers then calls lsq. However, these matrices are then copied into + // another buffer inside lsq hence we can let lsq insert into the second + // buffer without modifying the original matrices. The only change lsq needs + // is the weightvalue of the augmented variable which starts at 100 and + // being multiplied by 10 on each iteration. Hence we only pass that value + // with "aug_weight". + if (S->mode == 4) + { + badlin = 1; + // Reset the RHS of the constraints to zero of the augmented system. + for (int i = 0; i < n; i++) { s[i] = 0.0; } + S->h3 = 0.0; + double rho = 100.0; + S->inconsistent = 0; + while (1) + { + lsq(m, S->meq, n, 1, rho, bfgs, gradx, C, d, u, v, s, mult, lsq_buffer, indices, &S->mode); + S->h4 = 1.0 - s[n]; + if (S->mode == 4) + { + rho *= 10.0; + S->inconsistent++; + if (S->inconsistent > 5) { return; } + continue; + } else if (S->mode != 1) { + return; + } + break; + } + } else if (S->mode != 1) { + return; + } + + // Update multipliers for L1-test + for (int i = 0; i < n; i++) { v[i] = gradx[i]; } + dgemv_("T", &m, &n, &dmone, C, &lda, mult, &one, &done, v, &one); + + S->f0 = *funx; + for (int i = 0; i < n; i++) { x0[i] = sol[i]; } + S->gs = ddot_(&n, gradx, &one, s, &one); + S->h1 = fabs(S->gs); + S->h2 = 0.0; + for (int j = 0; j < m; j++) + { + if (j < S->meq) + { + S->h3 = d[j]; + } else { + S->h3 = 0.0; + } + S->h2 = S->h2 + fmax(-d[j], S->h3); + S->h3 = fabs(mult[j]); + mu[j] = fmax(S->h3, (mu[j] + S->h3)/2.0); + S->h1 = S->h1 + S->h3*fabs(d[j]); + } + + // Check convergence + S->mode = 0; + if ((S->h1 < S->acc) && (S->h2 < S->acc) && (!badlin) && (*funx == *funx)) { return; } + S->h1 = 0.0; + for (int j = 0; j < m; j++) + { + if (j < S->meq) + { + S->h3 = d[j]; + } else { + S->h3 = 0.0; + } + S->h1 += mu[j]*fmax(-d[j], S->h3); + } + // 180 + S->t0 = *funx + S->h1; + S->h3 = S->gs - S->h1*S->h4; + S->mode = 8; + if (S->h3 >= 0.0) { goto RESET_BFGS; } + + // Line search with an L1 test function + S->line = 0; + S->alpha = 1.0; + + // Inexact line search +LINE_SEARCH: + + S->line++; + S->h3 = (S->alpha) * (S->h3); + dscal_(&n, &S->alpha, s, &one); + for (int i = 0; i < n; i++) { sol[i] = x0[i]; } + daxpy_(&n, &done, s, &one, sol, &one); + + S->mode = 1; + return; + +MODE1: + + S->t = *funx; + for (int j = 0; j < m; j++) + { + if (j < S->meq) + { + S->h1 = d[j]; + } else { + S->h1 = 0.0; + } + S->t = S->t + mu[j]*fmax(-d[j], S->h1); + } + S->h1 = S->t - S->t0; + + if ((S->h1 > (S->h3 / 10.0)) && (S->line <= 10)) + { + S->alpha = fmax(S->h3/(2.0*(S->h3 - S->h1)), alfmin); + goto LINE_SEARCH; + } + + // Check convergence + S->h3 = 0.0; + for (int j = 0; j < m; j++) + { + if (j < S->meq) + { + S->h1 = d[j]; + } else { + S->h1 = 0.0; + } + S->h3 = S->h3 + fmax(-d[j], S->h1); + } + if ( + ((fabs(*funx - S->f0) < S->acc) || (dnrm2_(&n, s, &one) < S->acc)) && + (S->h3 < S->acc) && + (!badlin) && + (*funx == *funx) // To filter for finite entries + ) + { + S->mode = 0; + return; + } else { + S->mode = -1; + } + return; + +LABEL255: + // Check relaxed convergence in case of positive directional derivative + S->h3 = 0.0; + for (int j = 0; j < m; j++) + { + if (j < S->meq) + { + S->h1 = d[j]; + } else { + S->h1 = 0.0; + } + S->h3 = S->h3 + fmax(-d[j], S->h1); + } + if (((fabs(*funx - S->f0) < S->tol) || (dnrm2_(&n, s, &one) < S->tol)) && + (S->h3 < S->tol) && + (!badlin) && + (*funx == *funx) + ) + { + S->mode = 0; + } else { + S->mode = 8; + } + return; + +MODEM1: + + // Call Jacobian at current x + + // Update Cholesky factors of Hessian matrix modified by BFGS formula + // u[i] = gradx[i] - C.T @ mult - v[i] + + for (int i = 0; i < n; i++) { u[i] = gradx[i]; } + dgemv_("T", &m, &n, &dmone, C, &lda, mult, &one, &done, u, &one); + for (int i = 0; i < n; i++) + { + u[i] = u[i] - v[i]; + } + + // L'*S + for (int i = 0; i < n; i++) { v[i] = s[i]; } + dtpmv_("L", "T", "U", &n, bfgs, v, &one); + + // D*L'*S + j = 0; + for (int i = 0; i < n; i++) { + v[i] = bfgs[j]*v[i]; + j += n - i; + } + + // L*D*L'*S + dtpmv_("L", "N", "U", &n, bfgs, v, &one); + + S->h1 = ddot_(&n, s, &one, u, &one); + S->h2 = ddot_(&n, s, &one, v, &one); + S->h3 = 0.2*(S->h2); + if (S->h1 < S->h3) + { + S->h4 = (S->h2 - S->h3) / (S->h2 - S->h1); + S->h1 = S->h3; + double tmp_dbl = 1.0 - S->h4; + dscal_(&n, &S->h4, u, &one); + daxpy_(&n, &tmp_dbl, v, &one, u, &one); + } + + // Test for singular update, and reset hessian if so + if ((S->h1 == 0.0) || (S->h2 == 0.0)) { goto RESET_BFGS; } + + ldl_update(n, bfgs, u, 1.0 / S->h1, v); + ldl_update(n, bfgs, v, -1.0 / S->h2, u); + + // End of main iteration + goto ITER_START; + + return; +} + + +/* + * min |A*x - b| + * E*x = f + * G*x >= h + * xl <= x <= xu + * + * Problem data is kept in Lf, gradx, C, d, xl, xu arrays in a rather tedious + * format. C(m, n) is the constraint normals, d(n) is the constraint bounds. + * xl(n) and xu(n) are the lower and upper bounds on x. + * + * Lf is the LDL' factor of the BFGS matrix also holding the diagonal entries. + * + * NaN entries in xl, xu, signify unconstrained variables and hence not included. + * + * The C matrix, for a problem with all x bounds are given and finite, + * broken into E and G as follows: + * + * ┌────┐ ┌────┐ ┌┐ + * meq │ │ │ E │ = ││ f + * │ ─┼────┼> │ ││ + * ┼────┼ └────┘ └┘ + * │ │ ┌────┐ ┌┐ + * │ │ │ │ ││ + * mineq = m - meq │ ─┼────┼> │ ││ + * │ │ │ │ ││ + * │ │ │ │ ││ + * └────┘ │ │ >= ││ + * C ┼────┼ ┼┼ + * n │ I │ ││ xl + * ┼────┼ ┼┼ + * n │ -I │ ││ -xu + * └────┘ └┘ + * G h + * + * A and b are stored in Lf[] in LAPACK packed format where Lf holds a unit, lower + * triangular matrix with diagonal entries are overwritten by the entries of d[] + * and vector and gradx[]. + * + * Lf[] = [d[0], s[1], s[2], . , d[1], s[n + 2], d[2], ...] + * + * interpreted as: + * + * [d[ 0 ], ] + * [s[ 1 ], d[ 1 ], . , ] + * Lf[] = [s[ 2 ], s[n+2], . , ] + * [ . , . , . , d[n-1] ] + * [s[ n ], s[2*n], . , . , d[n]] + * + * Then, the following relations recover the A and b + * + * A = sqrt(d[]) * Lf[]^T + * b = - inv( Lf[] * sqrt(d[]) ) * gradx[] + * + * The solution is returned in x() and the Lagrange multipliers are returned in y(). + * + * For solving the problem in case of a detection of inconsistent linearization, + * see D. Kraft, "A software package for Sequential Quadratic Programming" + * Section 2.2.3 + * + * In the original code, the augmented system is detected by mismatch of certain + * integers which is making things quite unreadable. Here we explicitly pass a + * flag. + * + * Inconsistent linearization augments all arrays to accomodate for the dummy + * variable. The function is still called with the original sizes but the flag + * allows for enlarging the problem and hence the supplied buffer should accomodate + * for this extra space. + * + * The required buffer size is given by: + * (2*(m - meq)*(n + 1)+2)*(n - meq +1) + 2*2*(m-meq)*(n + 1) + 2*(m-meq)*(n + 1) + * + 2*meq + ld + (ld + 2*(m-meq)*(n + 1))*(n - meq) + * + */ +void lsq( + int m, int meq, int n, int augment, double aug_weight, double* restrict Lf, + double* restrict gradx, double* restrict C, double* restrict d, + double* restrict xl, double* restrict xu, double* restrict x, + double* restrict y, double* buffer, int* jw, int* mode) +{ + int one = 1, orign = n; + int mineq = m - meq; + double xnorm = 0.0; + int cursor = 0; + int ld = n; + int n_wG_rows = 0; + + if (augment) { + ld = n + 1; + x[n] = 1.0; + xl[n] = 0.0; + xu[n] = 1.0; + } + + // Recover A and b from Lf and gradx + for (int i = 0; i < (ld+2)*ld; i++) { buffer[i] = 0.0; } + double* restrict wA = buffer; + double* restrict wb = &buffer[ld*(ld+1)]; + + // Depending on augmented, wA is either the full array or the top-left block. + + for (int j = 0; j < n; j++) + { + double diag = sqrt(Lf[cursor++]); // Extract the diagonal value from Lf. + wA[j + j * ld] = diag; // Place the sqrt diagonal. + for (int i = j + 1; i < n; i++) + { + wA[j + i * ld] = Lf[cursor++] * diag; + } + } + + // Compute b = - 1/sqrt(d[]) * inv(Lf[]) * gradx[]. Lf is already in packed format. + for (int i = 0; i < n; i++) { wb[i] = gradx[i]; } + dtpsv_("L", "N", "U", &n, Lf, wb, &one); + cursor = 0; + for (int i = 0; i < n; i++) + { + wb[i] /= -sqrt(Lf[cursor]); + cursor += n - i; + } + + // If augmented, fill in the extra entry in the bottom right corner. + if (augment) { wA[ld*ld - 1] = aug_weight; } + + // If augmented, also increase the number of variables by 1. + if (augment) { n++; } + + // Get the equality constraints if given. + double* restrict wE = &buffer[n*(n+1) + n]; + double* restrict wf = &buffer[n*(n+1) + n + n*meq]; + if (meq > 0) + { + for (int j = 0; j < n-1; j++) + { + for (int i = 0; i < meq; i++) + { + wE[i + j*meq] = C[i + j*m]; + } + } + if (augment) + { + // n is incremented hence all Ceq is now in wE. Add the extra column. + for (int i = 0; i < meq; i++) { wE[i + (n-1)*meq] = -d[i]; } + + } else { + // If not augmented then handle j = n - 1 that is skipped. + for (int i = 0; i < meq; i++) { wE[i + (n-1)*meq] = C[i + (n-1)*m]; } + + } + for (int i = 0; i < meq; i++) { wf[i] = -d[i]; } + } + + // Get the inequality constraints if given. First zero out wG and wh. + double* restrict wG = &buffer[n*(n+1) + n + n*meq + meq]; + double* restrict wh = &buffer[n*(n+1) + n + n*meq + meq + (mineq + 2*n)*ld]; + // Zero out wG and wh + for (int i = 0; i < (mineq + 2*n)*(ld + 1); i++) { wG[i] = 0.0; } + + // Convert the bounds on x to +I and -I blocks in G. + // Augment h by xl and -xu. + // Unbounded constraints are signified by NaN values and they do not appear + // in G and h. Hence there is a nancount tab to keep track of them. + + // We first populate "wh" to get the number of unbounded constraints. That will + // define the unskipped row number of wG. This is different than the original + // Fortran code where the max allocated row number and the actual row number + // of wG has been kept separate and it causes to be sent to every nested + // function call. Instead we form wG and wh once with fixed size. + + int nancount = 0; + int nrow = mineq; + if (m > meq) + { + for (int i = 0; i < mineq; i++) { wh[i] = -d[meq + i]; } + } + for (int i = 0; i < n; i++) + { + if (isnan(xl[i])) + { + nancount++; + } else { + wh[nrow++] = xl[i]; + } + } + for (int i = 0; i < n; i++) + { + if (isnan(xu[i])) + { + nancount++; + } else { + wh[nrow++] = -xu[i]; + } + } + + n_wG_rows = mineq + 2*n - nancount; + + // Now that we know the actual row number of wG, we can finally populate + // the top part with C. + if (m > meq) + { + for (int j = 0; j < orign; j++) + { + for (int i = 0; i < mineq; i++) + { + wG[i + j*n_wG_rows] = C[meq + i + j*m]; + } + } + } + + // If augmented add the extra column. + if (augment) + { + for (int i = 0; i < mineq; i++) + { + wG[i + orign*n_wG_rows] = fmax(-d[meq + i], 0.0); + } + } + + // Reset counter + nrow = mineq; + for (int i = 0; i < n; i++) + { + if (!isnan(xl[i])) + { + wG[nrow + i*n_wG_rows] = 1.0; + nrow++; + } + } + for (int i = 0; i < n; i++) + { + if (!isnan(xu[i])) + { + wG[nrow + i*n_wG_rows] = -1.0; + nrow++; + } + } + + // Assign the remaining part of the buffer to the LSEI problem. + double* restrict lsei_scratch = &wh[mineq + 2*n]; + + lsei(ld, meq, n_wG_rows, n, wA, wb, wE, wf, wG, wh, x, lsei_scratch, jw, &xnorm, mode); + + if (*mode == 1) + { + // Restore the Lagrange multipliers, first equality, then inequality. + for (int i = 0; i < meq; i++) { y[i] = lsei_scratch[i+n_wG_rows]; } + for (int i = 0; i < mineq; i++) { y[meq + i] = lsei_scratch[i]; } + + // Set the user-defined bounds on x to NaN + for (int i = 0; i < 2*n; i++) { y[m + i] = NAN; } + } + + // Clamp the solution, if given, to the finite bound interval + for (int i = 0; i < n; i++) + { + if ((!isnan(xl[i])) && (x[i] < xl[i])) { x[i] = xl[i]; } + else if ((!isnan(xu[i])) && (x[i] > xu[i])) { x[i] = xu[i]; } + } + + return; +} + + +/* + * Solve equality and inequality constrained least squares problem (LSEI) + * min |A*x - b|, subject to E*x = f, G*x >= h. + * + * ma, me, mg : number of rows in A, E, G + * n : number of columns in A, x + * a : matrix A (ma x n) + * b : vector b (ma) + * e : matrix E (me x n) + * f : vector f (me) + * g : matrix G (mg x n) + * h : vector h (mg) + * x : solution vector x (n) + * buffer : work buffer (mg + 2)*(n - me +1) + 3*mg + 2*me + ma + (ma + mg)*(n - me) + * jw : integer work array + * xnorm : norm of the solution + * mode : return code + * + * The buffer pointers that will be used: + * buffer[0] : Lagrange multipliers (mg + me) + * buffer[mg + me] : wb, Modified b vector (ma) + * buffer[mg + me + ma] : tau, Pivots for the RQ decomposition of E (me) + * buffer[mg + 2*me + ma] : Scratch space + * + */ +void +lsei(int ma, int me, int mg, int n, + double* restrict a, double* restrict b, double* restrict e, + double* restrict f, double* restrict g, double* restrict h, + double* restrict x, double* restrict buffer, int* jw, + double* xnorm, int* mode) +{ + int one = 1, nvars = 0, info = 0, lde = 0, ldg = 0; + double done = 1.0, dmone = -1.0, dzero = 0.0, t= 0.0; + const double epsmach = 2.220446049250313e-16; + + for (int i = 0; i < n; i++) { x[i] = 0.0; } + // Return if the problem is over-constrained. + if (me > n) { *mode = 2; return; } + + // [E] [E2 | R] [x ] + // [A] @ Q.T = [A2 | A1] ,and, x is partitioned as x = [--] + // [G] [G2 | G1] [xe] + + // me = 0 skips the equality constraint related computations even though it + // causes aliasing below. The aliased arrays are not referenced in that case. + // Use at least 1 for the leading dimension of E even when me = 0 for LAPACK + // calls. + nvars = (n - me); + double* restrict gmults = &buffer[0]; + double* restrict emults = &buffer[mg]; + double* restrict wb = &buffer[me + mg]; + double* restrict tau = &buffer[me + mg + ma]; + double* restrict a2 = &buffer[mg + 2*me + ma]; + double* restrict g2 = &buffer[mg + 2*me + ma + ma*nvars]; + double* restrict lsi_scratch = &buffer[mg + 2*me + ma + (ma + mg)*nvars]; + + // RQ decomposition of equality constraint data E and application to A, G. + // LAPACK RQ routine dgerq2 forms R on the right. + // dgeqr2 is the unblocked versions of dgeqrf without the memory allocation. + // Use top of the yet unutilized scratch space for throw-away work. + lde = (me > 0 ? me : 1); + ldg = (mg > 0 ? mg : 1); + dgerq2_(&me, &n, e, &lde, tau, lsi_scratch, &info); + + // Right triangularize E and apply Q.T to A and G from the right. + dormr2_("R", "T", &ma, &n, &me, e, &lde, tau, a, &ma, lsi_scratch, &info); + dormr2_("R", "T", &mg, &n, &me, e, &lde, tau, g, &ldg, lsi_scratch, &info); + + // Check the diagonal elements of E for rank deficiency. + for (int i = 0; i < me; i++) + { + if (!(fabs(e[i + (nvars + i)*me]) >= epsmach)) { *mode = 6;return; } + } + // Solve E*x = f and modify b. + // Note: RQ forms R at the right of E instead of [0, 0] position. + for (int i = 0; i < me; i++) { x[nvars + i] = f[i]; } + dtrsv_("U", "N", "N", &me, &e[(nvars)*me], &lde, &x[nvars], &one); + + *mode = 1; + // Zero out the inequality multiplier. + for (int i = 0; i < mg; i++) { gmults[i] = 0.0; } + + // If the problem is fully equality-constrained, revert the basis and return. + if (me == n) { goto ORIGINAL_BASIS; } + + // Compute the modified RHS wb = b - A1*x + // Copy b into wb + for (int i = 0; i < ma; i++) { wb[i] = b[i]; } + // Compute wb -= A1*xe + dgemv_("N", &ma, &me, &dmone, &a[ma*nvars], &ma, &x[nvars], &one, &done, wb, &one); + + // Store the transformed A2 and G2 in the buffer + for (int j = 0; j < nvars; j++) + { + for (int i = 0; i < ma; i++) + { + a2[i + j*ma] = a[i + j*ma]; + } + for (int i = 0; i < mg; i++) + { + g2[i + j*mg] = g[i + j*mg]; + } + } + + if (mg == 0) + { + // No inequality constraints, solve the least squares problem directly. + // We deliberately use the unblocked algorithm to avoid allocation. + int lwork = ma*nvars + 3*nvars + 1; + // Save the RHS for residual computation + double* restrict wb_orig = &lsi_scratch[lwork]; + for (int i = 0; i < ma; i++) { wb_orig[i] = wb[i]; } + + int krank = 0; + t = sqrt(epsmach); + dgelsy_(&ma, &nvars, &one, a2, &ma, wb, &ma, jw, &t, &krank, lsi_scratch, &lwork, &info); + + // Copy the solution to x + for (int i = 0; i < nvars; i++) { x[i] = wb[i]; } + + // Compute the residual and its norm, use a since a2 is overwritten. + dgemv_("N", &ma, &nvars, &done, a, &ma, x, &one, &dmone, wb_orig, &one); + *xnorm = dnrm2_(&ma, wb_orig, &one); + + *mode = 7; + if (krank < nvars) { return; } + *mode = 1; + goto ORIGINAL_BASIS; + } + + // Modify h, and solve the inequality constrained least squares problem. + // h -= G1*xe + dgemv_("N", &mg, &me, &dmone, &g[mg*nvars], &ldg, &x[nvars], &one, &done, h, &one); + + lsi(ma, mg, nvars, a2, wb, g2, h, x, lsi_scratch, jw, xnorm, mode); + + // Copy multipliers from scratch to gmults + for (int i = 0; i < mg; i++) { gmults[i] = lsi_scratch[i]; } + + // If no equality constraints this was an LSI problem all along. + if (me == 0) { return; } + + t = dnrm2_(&me, &x[nvars], &one); + // Modify the norm by adding the equality solution. + *xnorm = hypot(*xnorm, t); + if (*mode != 1) { return; } + +ORIGINAL_BASIS: + // Convert the solution and multipliers to the original basis. + // b = A*x - b (residuals) + dgemv_("N", &ma, &n, &done, a, &ma, x, &one, &dmone, b, &one); + // f = A1^T*b - G1^T*w + dgemv_("T", &ma, &me, &done, &a[nvars*ma], &ma, b, &one, &dzero, f, &one); + dgemv_("T", &mg, &me, &dmone, &g[nvars*mg], &ldg, gmults, &one, &done, f, &one); + + // x = Q.T*x + dormr2_("L", "T", &n, &one, &me, e, &lde, tau, x, &n, lsi_scratch, &info); + + // Solve the triangular system for the equality multipliers, emults. + for (int i = 0; i < me; i++) { emults[i] = f[i]; } + dtrsv_("U", "T", "N", &me, &e[(n - me)*me], &lde, emults, &one); + + return; +} + + +/* + * Solve inequality constrained least squares problem + * min |Ax - b| subject to Gx >= h + * + * A is (ma x n), b is (ma), G is (mg x n), h is (mg), x is (n) + * buffer is at least (mg+2)*(n+1) + 2*mg + * jw is at least (mg) + * xnorm is the 2-norm of the residual vector + * mode is the integer return code + * + * Return codes for mode + * 1: successful computation + * 2: error return because of wrong dimensions + * 3: iteration count exceeded by nnls + * 4: inequality constraints incompatible + * 5: matrix A is not rank n + * +*/ +void +lsi(int ma, int mg, int n, double* restrict a, double* restrict b, double* restrict g, + double* restrict h, double* restrict x, double* restrict buffer, int* jw, + double* xnorm, int* mode) +{ + int one = 1, tmp_int = 0, info = 0; + double done = 1.0, dmone = -1.0, tmp_dbl = 0.0; + const double epsmach = 2.220446049250313e-16; + + // QR decomposition of A and application to b. + // We use the unblocked versions of the LAPACK routines to avoid + // allocating extra "work" memory for the blocked versions. + tmp_int = (ma < n ? ma : n); + dgeqr2_(&ma, &n, a, &ma, buffer, &buffer[tmp_int], &info); + + // Compute Q^T b + dorm2r_("L", "T", &ma, &one, &tmp_int, a, &ma, buffer, b, &ma, &buffer[tmp_int], &info); + + // Check the diagonal elements of R for rank deficiency. + *mode = 5; + *xnorm = 0.0; + for (int i = 0; i < tmp_int; i++) { + if (!(fabs(a[i + i*ma]) >= epsmach)) { return; } + } + // Transform G and h to form the LDP problem. + // Solve XR = G where R is the upper triangular matrix from the QR. + // The result is stored in G. + // Note: There is an inherent assumption that ma >= n. This is a bug carried + // over here from the original slsqp implementation. + dtrsm_("R", "U", "N", "N", &mg, &n, &done, a, &ma, g, &mg); + // h = h - Xf + dgemv_("N", &mg, &n, &dmone, g, &mg, b, &one, &done, h, &one); + + // Solve the LDP problem. + ldp(mg, n, g, h, x, buffer, jw, xnorm, mode); + if (*mode != 1) { return; } + + // Convert to the solution of the original problem. + daxpy_(&n, &done, b, &one, x, &one); + dtrsv_("U", "N", "N", &n, a, &ma, x, &one); + + // If any, compute the norm of the tail of b and add to xnorm + tmp_int = ma - n; + tmp_dbl = dnrm2_(&tmp_int, &b[(n + 1 > ma ? ma : n + 1) - 1], &one); + *xnorm = hypot(*xnorm, tmp_dbl); + + return; +} + +/* + * Solve least distance problem + * min (1/2)|x|^2 subject to Gx >= h + * + * G is (m x n), h is (m) + * buffer is at least (m+2)*(n+1) + 2*m + * indices is int(n) + * x is (n) + * xnorm is the norm of the solution if succeded + * mode is the return code integer + * + * Mode return values + * 1 : solution found + * 2 : bad input dimensions + * 3 : iteration count exceeded by nnls + * 4 : inequality constraints incompatible + * +*/ +void +ldp(int m, int n, double* restrict g, double* restrict h, double* restrict x, + double* restrict buffer, int* indices, double* xnorm, int* mode) +{ + int one = 1; + double dzero = 0.0, rnorm = 0.0; + // Check for inputs and initialize x + if (n <= 0) { *mode = 2; return; } + for (int i = 0; i < n; i++) { x[i] = 0.0; } + if (m == 0) { *mode = 1; return; } + + // Define pointers for the variables on buffer + double* restrict a = &buffer[0]; + double* restrict b = &buffer[m*(n+1)]; + double* restrict zz = &buffer[(m+1)*(n+1)]; + double* restrict y = &buffer[(m+2)*(n+1)]; + double* restrict w = &buffer[(m+2)*(n+1) + m]; + + // Save the dual problem data into buffer + // dual problem [G^T] [x] = [0] + // [h^T] [1] + + // LHS, G is (m x n), h is (m). Both transposed and stacked into (n+1) x m. + for (int j = 0; j < m; j++) + { + for (int i = 0; i < n; i++) + { + a[i + j*(n+1)] = g[j + i*m]; + } + // Place h in the last row. + a[n + j*(n+1)] = h[j]; + } + // RHS is (n+1) + for (int i = 0; i < n; i++) { b[i] = 0.0; } + b[n] = 1.0; + + // Solve the dual problem + __nnls(n+1, m, a, b, y, w, zz, indices, 3*m, &rnorm, mode); + if (*mode != 1) { return; } + *mode = 4; + if (rnorm <= 0.0) { return; } + + // Solve the primal problem + double fac = 1.0 - ddot_(&m, h, &one, y, &one); + if (!((1.0 + fac) - 1.0 > 0.0)) { return; } + *mode = 1; + fac = 1.0 / fac; + dgemv_("T", &m, &n, &fac, g, &m, y, &one, &dzero, x, &one); + *xnorm = dnrm2_(&n, x, &one); + + // Compute the lagrange multipliers for the primal problem + for (int i = 0; i < m; i++) { buffer[i] = fac*y[i]; } + return; +} + + +/* + * + * Updates the LDL' factors of matrix a by rank-one matrix sigma*z*z' + * n : order of the coefficient matrix a + * a : positive definite matrix of dimension n; only the lower triangle is + * used and is stored column by column as one dimensional array of + * dimension n*(n+1)/2. + * z : vector of dimension n of updating elements + * sigma : scalar factor by which the modifying dyade z*z' is multiplied + * w : working array of dimension n + * + * Uses the composite-t method of fletcher and powell as described in "On the + * modification of LDL' factorizations", DOI:10.1090/S0025-5718-1974-0359297-1 + * + * Implemented by: Dieter Kraft, dfvlr - Institut für Dynamik der Flugsysteme + * D-8031 Oberpfaffenhofen + * + */ +static void +ldl_update(int n, double* restrict a, double* restrict z, double sigma, double* restrict w) +{ + int j, ij = 0; + const double epsmach = 2.220446049250313e-16; + if (sigma == 0.0) { return; } + double alpha, beta, delta, gamma, u, v, tp, t = 1.0 / sigma; + + if (sigma <= 0.0) + { + // Negative update + for (int i = 0; i < n; i++) { w[i] = z[i]; } + for (int i = 0; i < n; i++) + { + v = w[i]; + t = t + v*v/a[ij]; + for (int j = i + 1; j < n; j++) + { + ij++; + w[j] = w[j] - v*a[ij]; + } + ij++; + } + if (t >= 0.0) { t = epsmach / sigma; } + + for (int i = 0; i < n; i++) + { + j = n - i - 1; + ij -= i + 1; + u = w[j]; + w[j] = t; + t = t - u*u / a[ij]; + } + } + + // Positive update + for (int i = 0; i < n; i++) + { + v = z[i]; + delta = v / a[ij]; + // sigma == 0.0 is handled at the beginning. + tp = (sigma < 0.0 ? w[i] : t + delta*v); + alpha = tp / t; + a[ij] = alpha*a[ij]; + if (i == n - 1) { return; } + beta = delta / tp; + if (alpha <= 4.0) + { + for (int j = i + 1; j < n; j++) + { + ij++; + z[j] = z[j] - v * a[ij]; + a[ij] = a[ij] + beta * z[j]; + } + } else { + gamma = t / tp; + for (int j = i + 1; j < n; j++) + { + ij++; + u = a[ij]; + a[ij] = gamma * u + beta * z[j]; + z[j] = z[j] - v * u; + } + } + ij++; + t = tp; + } + + return; +} diff --git a/scipy/optimize/__slsqp.h b/scipy/optimize/__slsqp.h new file mode 100644 index 000000000000..fabb8ae514de --- /dev/null +++ b/scipy/optimize/__slsqp.h @@ -0,0 +1,412 @@ +/* + * This file and the accompanying __slsqp.c file are the C translations of the + * Fortran77 code of the SLSQP algorithm for the SciPy project and hence inherits + * SciPy license. The original Fortran code is available at + * http://www.netlib.org/toms/733 written by Dieter Kraft, see: + * + * ALGORITHM 733, COLLECTED ALGORITHMS FROM ACM. + * TRANSACTIONS ON MATHEMATICAL SOFTWARE, + * VOL. 20, NO. 3, SEPTEMBER, 1994, PP. 262-281. + * https://doi.org/10.1145/192115.192124 + * + * + * The original Fortran code is released for use under BSD license, with the + * following statement from the original license holder ACM publications: + * + * https://web.archive.org/web/20170106155705/http://permalink.gmane.org/gmane.comp.python.scientific.devel/6725 + * ------ + * From: Deborah Cotton + * Date: Fri, 14 Sep 2007 12:35:55 -0500 + * Subject: RE: Algorithm License requested + * To: Alan Isaac + * + * Prof. Issac, + * + * In that case, then because the author consents to [the ACM] releasing + * the code currently archived at http://www.netlib.org/toms/733 under the + * BSD license, the ACM hereby releases this code under the BSD license. + * + * Regards, + * + * Deborah Cotton, Copyright & Permissions + * ACM Publications + * 2 Penn Plaza, Suite 701** + * New York, NY 10121-0701 + * permissions@acm.org + * 212.869.7440 ext. 652 + * Fax. 212.869.0481 + * ------ +*/ + +#ifndef __SLSQPLIB_H +#define __SLSQPLIB_H + +#define PY_SSIZE_T_CLEAN +#include "Python.h" +#include "numpy/arrayobject.h" + +#define PYERR(errobj,message) {PyErr_SetString(errobj,message); return NULL;} +static PyObject* slsqp_error; + +#include +#include "__nnls.h" + +// BLAS/LAPACK function prototypes used in SLSQP +void daxpy_(int* n, double* sa, double* sx, int* incx, double* sy, int* incy); +double ddot_(int* n, double* dx, int* incx, double* dy, int* incy); +void dgelsy_(int* m, int* n, int* nrhs, double* a, int* lda, double* b, int* ldb, int* jpvt, double* rcond, int* rank, double* work, int* lwork, int* info); +void dgemv_(char* trans, int* m, int* n, double* alpha, double* a, int* lda, double* x, int* incx, double* beta, double* y, int* incy); +void dgeqr2_(int* m, int* n, double* a, int* lda, double* tau, double* work, int* info); +void dgeqrf_(int* m, int* n, double* a, int* lda, double* tau, double* work, double* lwork, int* info); +void dgerq2_(int* m, int* n, double* a, int* lda, double* tau, double* work, int* info); +void dlarf_(char* side, int* m, int* n, double* v, int* incv, double* tau, double* c, int* ldc, double* work); +void dlarfgp_(int* n, double* alpha, double* x, int* incx, double* tau); +void dlartgp_(double* f, double* g, double* cs, double* sn, double* r); +double dnrm2_(int* n, double* x, int* incx); +void dorm2r_(char* side, char* trans, int* m, int* n, int* k, double* a, int* lda, double* tau, double* c, int* ldc, double* work, int* info); +void dormr2_(char* side, char* trans, int* m, int* n, int* k, double* a, int* lda, double* tau, double* c, int* ldc, double* work, int* info); +void dscal_(int* n, double* da, double* dx, int* incx); +void dtpmv_(char* uplo, char* trans, char* diag, int* n, double* ap, double* x, int* incx); +void dtpsv_(char* uplo, char* trans, char* diag, int* n, double* ap, double* x, int* incx); +void dtrsm_(char* side, char* uplo, char* transa, char* diag, int* m, int* n, double* alpha, double* a, int* lda, double* b, int* ldb); +void dtrsv_(char* uplo, char* trans, char* diag, int* n, double* a, int* lda, double* x, int* incx); + + +// The SLSQP_vars struct holds the state of the algorithm and passed to Python +// and back such that it is thread-safe. +struct SLSQP_vars { + double acc, alpha, f0, gs, h1, h2, h3, h4, t, t0, tol; + int exact, inconsistent, reset, iter, itermax, line, m, meq, mode, n; +}; + + +void __slsqp_body(struct SLSQP_vars* S, double* funx, double* gradx, double* C, double* d, double* sol, double* mult, double* xl, double* xu, double* buffer, int* indices); + + +static PyObject* +nnls(PyObject* Py_UNUSED(dummy), PyObject* args) { + + int maxiter, info = 0; + PyArrayObject* ap_A=NULL; + PyArrayObject* ap_b=NULL; + double* buffer; + double rnorm; + + // Get the input array + if (!PyArg_ParseTuple(args, + ("O!O!i"), + &PyArray_Type, (PyObject **)&ap_A, + &PyArray_Type, (PyObject **)&ap_b, + &maxiter) + ) + { + return NULL; + } + + // Check for dtype compatibility + if ((PyArray_TYPE(ap_A) != NPY_FLOAT64) || (PyArray_TYPE(ap_b) != NPY_FLOAT64)) + { + PYERR(slsqp_error, "Inputs to nnls must be of type numpy.float64."); + } + + int ndim = PyArray_NDIM(ap_A); // Number of dimensions + if (ndim != 2) + { + PYERR(slsqp_error, "Input array A must be 2D."); + } + npy_intp* shape = PyArray_SHAPE(ap_A); // Array shape + npy_intp m = shape[0]; // Number of rows + npy_intp n = shape[1]; // Number of columns + + int ndim_b = PyArray_NDIM(ap_b); // Number of dimensions + npy_intp* shape_b = PyArray_SHAPE(ap_b); // Array shape + if (ndim_b == 1) + { + if (shape_b[0] != m) + { + PYERR(slsqp_error, "Input array b must have the same number of rows as A."); + } + } else if (ndim_b == 2) { + if (shape_b[0] != m) + { + PYERR(slsqp_error, "Input array b must have the same number of rows as A."); + } + if (shape_b[1] != 1) + { + PYERR(slsqp_error, "Input array b must have only one column."); + } + } else { + PYERR(slsqp_error, "Input array b must be 1D or 2D with one column."); + } + + // Allocate memory for the algorithm, + // A is m x n, b is m, x is n, w is n, zz is m + // total m*(n+2) + 2*n + //indices is n + buffer = malloc((m*(n+2) + 3*n)*sizeof(double)); + if (buffer == NULL) + { + PYERR(slsqp_error, "Memory allocation failed."); + } + int *indices = malloc(n*sizeof(int)); + if (indices == NULL) + { + free(buffer); + PYERR(slsqp_error, "Memory allocation failed."); + } + + double* x = &buffer[0]; // Solution vector x (n) + double* a = &buffer[n]; // Matrix A (m x n) + double* b = &buffer[n*m + n]; // Vector b (m) + double* w = &buffer[(n+1)*m + n]; // Vector w (n) + double* zz = &buffer[(n+1)*m + 2*n]; // Vector zz (m) + + npy_intp* restrict strides = PyArray_STRIDES(ap_A); + double* restrict data_A = (double *)PyArray_DATA(ap_A); + npy_intp* restrict stride_b = PyArray_STRIDES(ap_b); + // If b is 2D then pick the stride of the first dimension + npy_intp rc_stride = (ndim_b == 1 ? stride_b[0] : stride_b[1]); + double* restrict data_b = (double *)PyArray_DATA(ap_b); + + // Copy the data from the numpy array + for (int j = 0; j < n; j++) { + for (int i = 0; i < m; i++) { + a[i + j*m] = data_A[(j*strides[1] + i*strides[0])/sizeof(double)]; + } + } + for (int i = 0; i < m; i++) + { + b[i] = data_b[(i * rc_stride)/sizeof(double)]; + } + + // Call nnls + __nnls((int)m, (int)n, a, b, x, w, zz, indices, maxiter, &rnorm, &info); + // x is the first n elements of buffer, shrink buffer to n elements + free(indices); + double* mem_ret = realloc(buffer, n*sizeof(double)); + // Very unlikely, but just in case + if (mem_ret == NULL) + { + free(buffer); + PYERR(slsqp_error, "Memory reallocation failed."); + } + npy_intp shape_ret[1] = {n}; + PyArrayObject* ap_ret = (PyArrayObject*)PyArray_SimpleNewFromData(1, shape_ret, NPY_FLOAT64, mem_ret); + // Return the result + return Py_BuildValue("Ndi",PyArray_Return(ap_ret), rnorm, info); + +} + + +static PyObject* +slsqp(PyObject* Py_UNUSED(dummy), PyObject* args) +{ + PyArrayObject *ap_gradx=NULL, *ap_C=NULL, *ap_d=NULL, *ap_mult=NULL; + PyArrayObject *ap_sol =NULL, *ap_xl=NULL, *ap_xu=NULL, *ap_buffer=NULL; + PyArrayObject* ap_indices=NULL; + PyObject* input_dict = NULL; + double funx; + struct SLSQP_vars Vars; + + // The Python input should provide with a dictionary that maps to the struct + // SLSQP_vars. Necessary fields that would make the algorithm change + // behavior are m, meq, n, acc, maxiter, and mode. The rest can be left as zero. + // Changing values mid run is not recommended as they hold the internal state + // of the algorithm. + // The reason why they are returned is to make the algorithm stateless. + + // The required arrays C, d, x, xl, xu, gradx, sol are passed as numpy arrays. + // The remaining arrays are going to be allocated in the buffer. + + if (!PyArg_ParseTuple(args, "O!dO!O!O!O!O!O!O!O!O!", + &PyDict_Type, (PyObject **)&input_dict, + &funx, + &PyArray_Type, (PyObject **)&ap_gradx, + &PyArray_Type, (PyObject **)&ap_C, + &PyArray_Type, (PyObject **)&ap_d, + &PyArray_Type, (PyObject **)&ap_sol, + &PyArray_Type, (PyObject **)&ap_mult, + &PyArray_Type, (PyObject **)&ap_xl, + &PyArray_Type, (PyObject **)&ap_xu, + &PyArray_Type, (PyObject **)&ap_buffer, + &PyArray_Type, (PyObject **)&ap_indices)) + { + return NULL; + } + + // Some helper x macros to pack and unpack the SLSQP_vars struct and + // the Python dictionary. + + #define STRUCT_DOUBLE_FIELD_NAMES X(acc) X(alpha) X(f0) X(gs) X(h1) X(h2) X(h3) X(h4) X(t) X(t0) X(tol) + #define STRUCT_INT_FIELD_NAMES X(exact) X(inconsistent) X(reset) X(iter) X(itermax) X(line) X(m) X(meq) X(mode) X(n) + #define STRUCT_FIELD_NAMES STRUCT_INT_FIELD_NAMES STRUCT_DOUBLE_FIELD_NAMES + + // Parse the dictionary, if the field is not found, raise an error. + // Do it separately for doubles and ints. + // Initialize the struct that will be populated from dict with zeros + #define X(name) Vars.name = 0; + STRUCT_FIELD_NAMES + #undef X + + // PyDict_GetItemString returns a borrowed reference. + #define X(name) \ + PyObject* name##_obj = PyDict_GetItemString(input_dict, #name); \ + if (!name##_obj) { PYERR(slsqp_error, #name " not found in the dictionary."); } \ + Vars.name = PyFloat_AsDouble(name##_obj); + STRUCT_DOUBLE_FIELD_NAMES + #undef X + + #define X(name) \ + PyObject* name##_obj = PyDict_GetItemString(input_dict, #name); \ + if (!name##_obj) { PYERR(slsqp_error, #name " not found in the dictionary."); } \ + Vars.name = (int)PyLong_AsLong(name##_obj); + STRUCT_INT_FIELD_NAMES + #undef X + + // Basic error checks for the numpy arrays. + if ((PyArray_TYPE(ap_C) != NPY_FLOAT64) || (PyArray_TYPE(ap_d) != NPY_FLOAT64) || + (PyArray_TYPE(ap_gradx) != NPY_FLOAT64) || (PyArray_TYPE(ap_sol) != NPY_FLOAT64) || + (PyArray_TYPE(ap_xl) != NPY_FLOAT64) || (PyArray_TYPE(ap_xu) != NPY_FLOAT64) || + (PyArray_TYPE(ap_buffer) != NPY_FLOAT64) || (PyArray_TYPE(ap_indices) != NPY_INT32)) + { + PYERR(slsqp_error, "All inputs to slsqp must be of type numpy.float64, " + "except \"indices\" which must be of numpy.int32."); + } + + // Buffer is 1D hence both F and C contiguous, test with either of them. + if (!PyArray_IS_C_CONTIGUOUS(ap_buffer)) { PYERR(slsqp_error, "Input array buffer must be 1d contiguous."); } + + // Derive the number of variables from the solution vector length. + int ndim_sol = PyArray_NDIM(ap_sol); + npy_intp* shape_sol = PyArray_SHAPE(ap_sol); + int ndim_mult = PyArray_NDIM(ap_mult); + npy_intp* shape_mult = PyArray_SHAPE(ap_mult); + int ndim_C = PyArray_NDIM(ap_C); + int ndim_d = PyArray_NDIM(ap_d); + int ndim_gradx = PyArray_NDIM(ap_gradx); + int ndim_xl = PyArray_NDIM(ap_xl); + int ndim_xu = PyArray_NDIM(ap_xu); + + if (ndim_sol != 1) { PYERR(slsqp_error, "Input array sol must be 1D."); } + if ((int)shape_sol[0] != Vars.n) { PYERR(slsqp_error, "Input array \"sol\" must have at least n elements."); } + if (ndim_mult != 1) { PYERR(slsqp_error, "Input array \"mult\" must be 1D."); } + if ((int)shape_mult[0] != 2*Vars.n + Vars.m + 2) { PYERR(slsqp_error, "Input array \"mult\" must have m + 2*n + 2 elements."); } + if (ndim_C != 2) { PYERR(slsqp_error, "Input array \"C\" must be 2D."); } + if (ndim_d != 1) { PYERR(slsqp_error, "Input array d must be 1D."); } + if (ndim_gradx != 1) { PYERR(slsqp_error, "Input array gradx must be 1D."); } + if (ndim_xl != 1) { PYERR(slsqp_error, "Input array xl must be 1D."); } + if (ndim_xu != 1) { PYERR(slsqp_error, "Input array xu must be 1D."); } + + double* gradx_data = (double*)PyArray_DATA(ap_gradx); + double* C_data = (double*)PyArray_DATA(ap_C); + double* d_data = (double*)PyArray_DATA(ap_d); + double* restrict sol_data = (double*)PyArray_DATA(ap_sol); + double* mult_data = (double*)PyArray_DATA(ap_mult); + double* restrict xl_data = (double*)PyArray_DATA(ap_xl); + double* restrict xu_data = (double*)PyArray_DATA(ap_xu); + double* buffer_data = (double*)PyArray_DATA(ap_buffer); + int* indices_data = (int*)PyArray_DATA(ap_indices); + + __slsqp_body(&Vars, &funx, gradx_data, C_data, d_data, sol_data, mult_data, xl_data, xu_data, buffer_data, indices_data); + + // During the intermediate steps, there can be a few ULPs of bound violations, + // hence we clamp the solution if given, to the finite bound values when mode + // is 1 or -1. + if ((Vars.mode == 1) || (Vars.mode == -1)) + { + int n = Vars.n; + for (int i = 0; i < n; i++) + { + if ((!isnan(xl_data[i])) && (sol_data[i] < xl_data[i])) { sol_data[i] = xl_data[i]; } + else if ((!isnan(xu_data[i])) && (sol_data[i] > xu_data[i])) { sol_data[i] = xu_data[i]; } + } + } + + // Map struct variables back to dictionary. + // Py_XXX_FromXXX returns a new reference, hence needs to be decremented. + + #define X(name) do { \ + PyObject* tmp_##name = PyFloat_FromDouble(Vars.name); \ + if ((!tmp_##name) || (PyDict_SetItemString(input_dict, #name, tmp_##name) < 0)) { \ + Py_XDECREF(tmp_##name); \ + PYERR(slsqp_error, "Setting '" #name "' failed."); \ + } \ + Py_DECREF(tmp_##name); \ + } while (0); + STRUCT_DOUBLE_FIELD_NAMES + #undef X + + #define X(name) do { \ + PyObject* tmp_##name = PyLong_FromLong((long)Vars.name); \ + if ((!tmp_##name) || (PyDict_SetItemString(input_dict, #name, tmp_##name) < 0)) { \ + Py_XDECREF(tmp_##name); \ + PYERR(slsqp_error, "Setting '" #name "' failed."); \ + } \ + Py_DECREF(tmp_##name); \ + } while (0); + STRUCT_INT_FIELD_NAMES + #undef X + #undef STRUCT_FIELD_NAMES + #undef STRUCT_INT_FIELD_NAMES + #undef STRUCT_DOUBLE_FIELD_NAMES + + Py_RETURN_NONE; + +}; + + +static char doc_nnls[] = ("Compute the nonnegative least squares solution.\n\n" + " x, info = nnls(A)\n\n"); + + +static char doc_slsqp[] = ( + "Sequential Least Squares Programming (SLSQP) optimizer.\n\n" + " x, info = slsqp(S: dict, funx: np.float64, " + "gradx: NDArray, C: NDarray, d: NDArray, " + "sol: NDArray, xl: NDArray, xu: NDArray, buffer: NDArray, indices: NDArray)" + "\n\n"); + + +// Sentinel terminated method list. +static struct PyMethodDef slsqplib_module_methods[] = { + {"nnls", nnls, METH_VARARGS, doc_nnls}, + {"slsqp", slsqp, METH_VARARGS, doc_slsqp}, + {NULL, NULL, 0, NULL} +}; + + +struct PyModuleDef moduledef = { + PyModuleDef_HEAD_INIT, + "_slsqplib", + NULL, + -1, + slsqplib_module_methods, + NULL, + NULL, + NULL, + NULL +}; + + +PyMODINIT_FUNC +PyInit__slsqplib(void) +{ + import_array(); + + PyObject* module = PyModule_Create(&moduledef); + if (module == NULL) { return NULL; } + PyObject* mdict = PyModule_GetDict(module); + if (mdict == NULL) { return NULL; } + slsqp_error = PyErr_NewException("_slsqplib.error", NULL, NULL); + if (slsqp_error == NULL) { return NULL; } + if (PyDict_SetItemString(mdict, "error", slsqp_error)) { return NULL; } + +#if Py_GIL_DISABLED + PyUnstable_Module_SetGIL(module, Py_MOD_GIL_NOT_USED); +#endif + + return module; +} + + +#endif // __SLSQPLIB_H diff --git a/scipy/optimize/_bracket.py b/scipy/optimize/_bracket.py index 1db2c5b49e7a..8bc5ab0dc2f5 100644 --- a/scipy/optimize/_bracket.py +++ b/scipy/optimize/_bracket.py @@ -1,7 +1,7 @@ import numpy as np import scipy._lib._elementwise_iterative_method as eim from scipy._lib._util import _RichResult -from scipy._lib._array_api import array_namespace, xp_ravel, xp_default_dtype +from scipy._lib._array_api import array_namespace, xp_ravel, xp_promote _ELIMITS = -1 # used in _bracket_root _ESTOPONESIDE = 2 # used in _bracket_root @@ -14,13 +14,7 @@ def _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter): if not np.iterable(args): args = (args,) - xp = array_namespace(xl0) - xl0 = xp.asarray(xl0)[()] - if (not xp.isdtype(xl0.dtype, "numeric") - or xp.isdtype(xl0.dtype, "complex floating")): - raise ValueError('`xl0` must be numeric and real.') - if not xp.isdtype(xl0.dtype, "real floating"): - xl0 = xp.asarray(xl0, dtype=xp_default_dtype(xp)) + xp = array_namespace(xl0, xr0, xmin, xmax, factor, *args) # If xr0 is not supplied, fill with a dummy value for the sake of # broadcasting. We need to wait until xmax has been validated to @@ -33,8 +27,11 @@ def _bracket_root_iv(func, xl0, xr0, xmin, xmax, factor, args, maxiter): xmin = -xp.inf if xmin is None else xmin xmax = xp.inf if xmax is None else xmax factor = 2. if factor is None else factor - xl0, xr0, xmin, xmax, factor = xp.broadcast_arrays( - xl0, xp.asarray(xr0), xp.asarray(xmin), xp.asarray(xmax), xp.asarray(factor)) + xl0, xr0, xmin, xmax, factor = xp_promote( + xl0, xr0, xmin, xmax, factor, broadcast=True, force_floating=True, xp=xp) + + if not xp.isdtype(xl0.dtype, ('integral', 'real floating')): + raise ValueError('`xl0` must be numeric and real.') if (not xp.isdtype(xr0.dtype, "numeric") or xp.isdtype(xr0.dtype, "complex floating")): @@ -425,13 +422,7 @@ def _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter): if not np.iterable(args): args = (args,) - xp = array_namespace(xm0) - xm0 = xp.asarray(xm0)[()] - if (not xp.isdtype(xm0.dtype, "numeric") - or xp.isdtype(xm0.dtype, "complex floating")): - raise ValueError('`xm0` must be numeric and real.') - if not xp.isdtype(xm0.dtype, "real floating"): - xm0 = xp.asarray(xm0, dtype=xp_default_dtype(xp)) + xp = array_namespace(xm0, xl0, xr0, xmin, xmax, factor, *args) xmin = -xp.inf if xmin is None else xmin xmax = xp.inf if xmax is None else xmax @@ -450,10 +441,12 @@ def _bracket_minimum_iv(func, xm0, xl0, xr0, xmin, xmax, factor, args, maxiter): xr0_not_supplied = True factor = 2.0 if factor is None else factor - xl0, xm0, xr0, xmin, xmax, factor = xp.broadcast_arrays( - xp.asarray(xl0), xm0, xp.asarray(xr0), xp.asarray(xmin), - xp.asarray(xmax), xp.asarray(factor) - ) + + xm0, xl0, xr0, xmin, xmax, factor = xp_promote( + xm0, xl0, xr0, xmin, xmax, factor, broadcast=True, force_floating=True, xp=xp) + + if not xp.isdtype(xm0.dtype, ('integral', 'real floating')): + raise ValueError('`xm0` must be numeric and real.') if (not xp.isdtype(xl0.dtype, "numeric") or xp.isdtype(xl0.dtype, "complex floating")): diff --git a/scipy/optimize/_cython_nnls.pyx b/scipy/optimize/_cython_nnls.pyx deleted file mode 100644 index 53bfb82adf95..000000000000 --- a/scipy/optimize/_cython_nnls.pyx +++ /dev/null @@ -1,216 +0,0 @@ -# cython: boundscheck=False -# cython: initializedcheck=False -# cython: wraparound=False -# cython: cdivision=True -# cython: cpow=True - - -__all__ = ['_nnls'] - -from scipy.linalg.cython_lapack cimport dlarfgp, dlarf, dlartgp -from scipy.linalg.cython_blas cimport dnrm2 -import numpy as np -cimport numpy as cnp -cnp.import_array() - -def _nnls(cnp.ndarray[cnp.float64_t, ndim=2] A_in, - cnp.ndarray[cnp.float64_t, ndim=1] b_in, - int maxiter): - # Make copies of the input to be mutated - cdef cnp.ndarray[cnp.float64_t, ndim=2, mode='c'] A = A_in.copy(order='C') - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] b = b_in.copy() - - cdef int m = A.shape[0], n = A.shape[1] - cdef int i = 0, ii = 0, ip = 0, iteration = 0, iz = 0, iz1 = 0, izmax = 0 - cdef int j = 0, jj = 0, k = 0 - cdef int col = 0, nrow = 0, nsetp = 0, one = 1, tmpint = 0 - cdef double tau = 0.0, unorm = 0.0, ztest, tmp, alpha, beta, cc, ss, wmax, T - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] w - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] x - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] work - cdef cnp.ndarray[cnp.float64_t, ndim=1, mode='c'] zz - cdef cnp.ndarray[cnp.int32_t, ndim=1, mode='c'] inds - cdef bint skip = False - - inds = cnp.PyArray_Arange(0, n, 1, cnp.NPY_INT32) - w = cnp.PyArray_EMPTY(1, [n], cnp.NPY_FLOAT64, 0) - work = cnp.PyArray_EMPTY(1, [m], cnp.NPY_FLOAT64, 0) - x = cnp.PyArray_ZEROS(1, [n], cnp.NPY_FLOAT64, 0) - zz = cnp.PyArray_EMPTY(1, [m], cnp.NPY_FLOAT64, 0) - - # Quit if all coefficients are already in the solution or if m columns of A - # have been triangularized. - while (iz1 < n) and (nsetp < m): - # simulating a goto from col independence check - if skip: - skip = False - else: - w[inds[iz1:]] = b[nrow:] @ A[nrow:, inds[iz1:]] - - # Find the largest w[j] and its index. - wmax = 0.0 - for col in range(iz1, n): - j = inds[col] - if w[j] > wmax: - wmax = w[j] - izmax = col - iz = izmax - j = inds[iz] - - # If wmax <= 0.0, terminate since this is a KKT certificate. - if wmax <= 0.0: - break - - # The sign of wmax is OK for j to be moved to set p. Begin the transformation - # and check new diagonal element to avoid near-linear dependence. - work[nrow:] = A[nrow:, j] - tmpint = m - nrow - # DLARFGP( N, ALPHA, X, INCX, TAU ) - dlarfgp(&tmpint, &work[nrow], &work[nrow+1], &one, &tau) - beta = work[nrow] - work[nrow] = 1. - unorm = 0.0 - if nsetp > 0: - unorm = dnrm2(&nsetp, &A[0, j], &n) - - if ((unorm + abs(beta)*0.01) - unorm) > 0.0: - # Column j is sufficiently independent. Copy b into zz and solve for - # ztest which is the new prospective value for x[j]. - zz[:] = b[:] - # dlarf(SIDE, M, N, V, INCV, TAU, C, LDC, WORK) - dlarf('L', &tmpint, &one, &work[nrow], &one, &tau, - &zz[nrow], &tmpint, &tmp) - ztest = zz[nrow] / beta - - if ztest <= 0.0: - # reject column j as a candidate to be moved from set z to set p. - # Set w[j] to 0.0 and move to the next greatest entry in w. - w[j] = 0.0 - skip = True - continue - else: - # Column j is not numerically independent, reject column j - w[j] = 0.0 - skip = True - continue - - # column j accepted - A[nrow, j] = beta - b[:] = zz[:] - inds[iz] = inds[iz1] - inds[iz1] = j - iz1 += 1 - nsetp += 1 - - if iz1 < n: - # Apply the householder trafo to remaining columns - for col in inds[iz1:]: - zz[nrow:] = A[nrow:, col] - dlarf('L', &tmpint, &one, &work[nrow], &one, &tau, - &zz[nrow], &tmpint, &tmp) - A[nrow:, col] = zz[nrow:] - - nrow += 1 - - if nsetp < m-1: - A[nrow:, j] = 0.0 - - w[j] = 0.0 - - # Solve triangular system, store in zz - zz[:] = b[:] - for k in range(nsetp): - ip = nsetp - k - 1 - if k != 0: - for ii in range(ip+1): - zz[ii] -= A[ii, jj]*zz[ip+1] - jj = inds[ip] - zz[ip] /= A[ip, jj] - - # Inner loop - while True: - iteration += 1 - - if iteration == maxiter: - return x, 0.0, -1 - - # See if all new constrained coefficients are feasible - # otherwise compute alpha that should be in [0, 1] - alpha = 2.0 - for ip in range(nsetp): - k = inds[ip] - if zz[ip] <= 0.0: - T = -x[k] / (zz[ip]-x[k]) - if alpha > T: - alpha = T - jj = ip - - # If all new constrained coefficients are feasible - # alpha is still 2 then exit otherwise interpolate - # between old x and zz. - if alpha == 2.0: - break - - x[inds[:nsetp]] *= 1 - alpha - x[inds[:nsetp]] += alpha*zz[:nsetp] - - # Modify A, B, and the indices to move coefficient - # i from set p to set z. While loop simulates a goto - i = inds[jj] - while True: - x[i] = 0.0 - - if jj != nsetp: - jj += 1 - for j in range(jj, nsetp): - ii = inds[j] - inds[j-1] = ii - dlartgp(&A[j-1, ii], &A[j, ii], &cc, &ss, &A[j-1, ii]) - A[j, ii] = 0.0 - # Apply Givens rotation to all cols except ii - for col in range(n): - if col != ii: - tmp = A[j-1, col] - A[j-1, col] = cc*tmp + ss*A[j, col] - A[j, col] = -ss*tmp + cc*A[j, col] - - tmp = b[j-1] - b[j-1] = cc*tmp + ss*b[j] - b[j] = -ss*tmp + cc*b[j] - - nrow -= 1 - nsetp -= 1 - iz1 -= 1 - inds[iz1] = i - - # See if remaining coefficients in set P are feasible - # since determination of alpha guarantees it. If still - # there are infeasible ones, they are due to numerical - # noise. Any that are nonpositive will be set to zero - # and moved from set p to set z. - for jj in range(nsetp): - i = inds[jj] - if x[i] <= 0.0: - # numerical noise; back to top of while loop - break - else: - # No break; leave while loop - break - - zz[:] = b[:] - for k in range(nsetp): - ip = nsetp - k - 1 - if k != 0: - for ii in range(ip+1): - zz[ii] -= A[ii, jj]*zz[ip+1] - jj = inds[ip] - zz[ip] /= A[ip, jj] - - # Back to inner loop beginning - - # Back in outer loop - x[inds[:nsetp]] = zz[:nsetp] - - # Back to the outer loop beginning - - return x, np.linalg.norm(b[nrow:]), 0 diff --git a/scipy/optimize/_differentiable_functions.py b/scipy/optimize/_differentiable_functions.py index e990dae1ef15..c4115a1cfeb1 100644 --- a/scipy/optimize/_differentiable_functions.py +++ b/scipy/optimize/_differentiable_functions.py @@ -1,12 +1,11 @@ from collections import namedtuple -from functools import partial import numpy as np import scipy.sparse as sps from ._numdiff import approx_derivative, group_columns from ._hessian_update_strategy import HessianUpdateStrategy from scipy.sparse.linalg import LinearOperator -from scipy._lib._array_api import array_namespace +from scipy._lib._array_api import array_namespace, xp_copy from scipy._lib import array_api_extra as xpx from scipy._lib._util import _ScalarFunctionWrapper @@ -406,8 +405,112 @@ def fun_and_grad(self, x): return self.f, self.g -def _VectorFunWrapper(fun, x): - return np.atleast_1d(fun(x)) +class _VectorFunWrapper: + def __init__(self, fun): + self.fun = fun + self.nfev = 0 + + def __call__(self, x): + self.nfev += 1 + return np.atleast_1d(self.fun(x)) + + +class _VectorJacWrapper: + """ + Wrapper class for Jacobian calculation + """ + def __init__( + self, + jac, + fun=None, + finite_diff_options=None, + sparse_jacobian=None + ): + self.fun = fun + self.jac = jac + self.finite_diff_options = finite_diff_options + self.sparse_jacobian = sparse_jacobian + + self.njev = 0 + # number of function evaluations consumed by finite difference + self.nfev = 0 + + def __call__(self, x, f0=None, **kwds): + # Send a copy because the user may overwrite it. + # The user of this class might want `x` to remain unchanged. + if callable(self.jac): + J = self.jac(x) + self.njev += 1 + elif self.jac in FD_METHODS: + J, dct = approx_derivative( + self.fun, + x, + f0=f0, + **self.finite_diff_options, + ) + self.nfev += dct['nfev'] + + if self.sparse_jacobian: + return sps.csr_array(J) + elif sps.issparse(J): + return J.toarray() + elif isinstance(J, LinearOperator): + return J + else: + return np.atleast_2d(J) + + +class _VectorHessWrapper: + """ + Wrapper class for Jacobian calculation + """ + def __init__( + self, + hess, + jac=None, + finite_diff_options=None, + ): + self.jac = jac + self.hess = hess + self.finite_diff_options = finite_diff_options + self.nhev = 0 + # number of jac evaluations consumed by finite difference + self.njev = 0 + + def __call__(self, x, v, J0=None, **kwds): + # Send a copy because the user may overwrite it. + # The user of this class might want `x` to remain unchanged. + if callable(self.hess): + self.nhev += 1 + return self._callable_hess(x, v) + elif self.hess in FD_METHODS: + return self._fd_hess(x, v, J0=J0) + + def _fd_hess(self, x, v, J0=None): + if J0 is None: + J0 = self.jac(x) + self.njev += 1 + + # H will be a LinearOperator + H = approx_derivative(self.jac_dot_v, x, + f0=J0.T.dot(v), + args=(v,), + **self.finite_diff_options) + return H + + def jac_dot_v(self, x, v): + self.njev += 1 + return self.jac(x).T.dot(v) + + def _callable_hess(self, x, v): + H = self.hess(x, v) + + if sps.issparse(H): + return sps.csr_array(H) + elif isinstance(H, LinearOperator): + return H + else: + return np.atleast_2d(np.asarray(H)) class VectorFunction: @@ -429,7 +532,8 @@ class VectorFunction: """ def __init__(self, fun, x0, jac, hess, finite_diff_rel_step=None, finite_diff_jac_sparsity=None, - finite_diff_bounds=None, sparse_jacobian=None, workers=None): + finite_diff_bounds=(-np.inf, np.inf), sparse_jacobian=None, + workers=None): if not callable(jac) and jac not in FD_METHODS: raise ValueError(f"`jac` must be either callable or one of {FD_METHODS}.") @@ -450,14 +554,19 @@ def __init__(self, fun, x0, jac, hess, if xp.isdtype(_x.dtype, "real floating"): _dtype = _x.dtype - # promotes to floating + # store original functions + self._orig_fun = fun + self._orig_jac = jac + self._orig_hess = hess + + # promotes to floating, ensures that it's a copy self.x = xp.astype(_x, _dtype) self.x_dtype = _dtype self.n = self.x.size - self.nfev = 0 - self.njev = 0 - self.nhev = 0 + self._nfev = 0 + self._njev = 0 + self._nhev = 0 self.f_updated = False self.J_updated = False self.H_updated = False @@ -492,125 +601,55 @@ def __init__(self, fun, x0, jac, hess, "be estimated using one of the quasi-Newton " "strategies.") - fun_wrapped = partial(_VectorFunWrapper, fun) - - def update_fun(): - self.nfev += 1 - self.f = fun_wrapped(self.x) - - self._update_fun_impl = update_fun - update_fun() + self.fun_wrapped = _VectorFunWrapper(fun) + self._update_fun() self.v = np.zeros_like(self.f) self.m = self.v.size - # Jacobian Evaluation + # Initial Jacobian Evaluation if callable(jac): - self.J = jac(self.x) + self.J = jac(xp_copy(self.x)) self.J_updated = True - self.njev += 1 - - if (sparse_jacobian or - sparse_jacobian is None and sps.issparse(self.J)): - def jac_wrapped(x): - self.njev += 1 - return sps.csr_array(jac(x)) - self.J = sps.csr_array(self.J) - self.sparse_jacobian = True - - elif sps.issparse(self.J): - def jac_wrapped(x): - self.njev += 1 - return jac(x).toarray() - self.J = self.J.toarray() - self.sparse_jacobian = False - - else: - def jac_wrapped(x): - self.njev += 1 - return np.atleast_2d(jac(x)) - self.J = np.atleast_2d(self.J) - self.sparse_jacobian = False - - def update_jac(): - self.J = jac_wrapped(self.x) - + self._njev += 1 elif jac in FD_METHODS: - self.J, dct = approx_derivative(fun_wrapped, self.x, f0=self.f, - **finite_diff_options) + self.J, dct = approx_derivative( + self.fun_wrapped, self.x, f0=self.f, **finite_diff_options + ) self.J_updated = True - self.nfev += dct['nfev'] - - if (sparse_jacobian or - sparse_jacobian is None and sps.issparse(self.J)): - def update_jac(): - self._update_fun() - self.J, dct = sps.csr_array( - approx_derivative(fun_wrapped, self.x, f0=self.f, - **finite_diff_options)) - self.nfev += dct['nfev'] - self.J = sps.csr_array(self.J) - self.sparse_jacobian = True - - elif sps.issparse(self.J): - def update_jac(): - self._update_fun() - self.J, dct = approx_derivative(fun_wrapped, self.x, f0=self.f, - **finite_diff_options).toarray() - self.nfev += dct['nfev'] - self.J = self.J.toarray() - self.sparse_jacobian = False + self._nfev += dct['nfev'] + + self.sparse_jacobian = False + if (sparse_jacobian or + sparse_jacobian is None and sps.issparse(self.J)): + # something truthy was specified for sparse_jacobian, + # or it turns out that the Jacobian was sparse. + self.J = sps.csr_array(self.J) + self.sparse_jacobian = True + elif sps.issparse(self.J): + self.J = self.J.toarray() + elif isinstance(self.J, LinearOperator): + pass + else: + self.J = np.atleast_2d(self.J) - else: - def update_jac(): - self._update_fun() - J, dct = approx_derivative(fun_wrapped, self.x, f0=self.f, - **finite_diff_options) - self.J = np.atleast_2d(J) - self.nfev += dct['nfev'] - self.J = np.atleast_2d(self.J) - self.sparse_jacobian = False + self.jac_wrapped = _VectorJacWrapper( + jac, + fun=self.fun_wrapped, + finite_diff_options=finite_diff_options, + sparse_jacobian=self.sparse_jacobian + ) - self._update_jac_impl = update_jac + self.hess_wrapped = _VectorHessWrapper( + hess, jac=self.jac_wrapped, finite_diff_options=finite_diff_options + ) # Define Hessian - if callable(hess): - self.H = hess(self.x, self.v) - self.H_updated = True - self.nhev += 1 - - if sps.issparse(self.H): - def hess_wrapped(x, v): - self.nhev += 1 - return sps.csr_array(hess(x, v)) - self.H = sps.csr_array(self.H) - - elif isinstance(self.H, LinearOperator): - def hess_wrapped(x, v): - self.nhev += 1 - return hess(x, v) - - else: - def hess_wrapped(x, v): - self.nhev += 1 - return np.atleast_2d(np.asarray(hess(x, v))) - self.H = np.atleast_2d(np.asarray(self.H)) - - def update_hess(): - self.H = hess_wrapped(self.x, self.v) - elif hess in FD_METHODS: - def jac_dot_v(x, v): - return jac_wrapped(x).T.dot(v) - - def update_hess(): - self._update_jac() - self.H = approx_derivative(jac_dot_v, self.x, - f0=self.J.T.dot(self.v), - args=(self.v,), - **finite_diff_options) - - update_hess() + if callable(hess) or hess in FD_METHODS: + self.H = self.hess_wrapped(xp_copy(self.x), self.v, J0=self.J) self.H_updated = True + if callable(hess): + self._nhev += 1 elif isinstance(hess, HessianUpdateStrategy): self.H = hess self.H.initialize(self.n, 'hess') @@ -618,19 +657,26 @@ def update_hess(): self.x_prev = None self.J_prev = None - def update_hess(): - self._update_jac() - # When v is updated before x was updated, then x_prev and - # J_prev are None and we need this check. - if self.x_prev is not None and self.J_prev is not None: - delta_x = self.x - self.x_prev - delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v) - self.H.update(delta_x, delta_g) + @property + def nfev(self): + return self._nfev + self.jac_wrapped.nfev - self._update_hess_impl = update_hess + @property + def njev(self): + return self._njev + self.hess_wrapped.njev - if isinstance(hess, HessianUpdateStrategy): - def update_x(x): + @property + def nhev(self): + return self._nhev + + def _update_v(self, v): + if not np.array_equal(v, self.v): + self.v = v + self.H_updated = False + + def _update_x(self, x): + if not np.array_equal(x, self.x): + if isinstance(self._orig_hess, HessianUpdateStrategy): self._update_jac() self.x_prev = self.x self.J_prev = self.J @@ -640,48 +686,63 @@ def update_x(x): self.J_updated = False self.H_updated = False self._update_hess() - else: - def update_x(x): + else: _x = xpx.atleast_nd(self.xp.asarray(x), ndim=1, xp=self.xp) self.x = self.xp.astype(_x, self.x_dtype) self.f_updated = False self.J_updated = False self.H_updated = False - self._update_x_impl = update_x - - def _update_v(self, v): - if not np.array_equal(v, self.v): - self.v = v - self.H_updated = False - - def _update_x(self, x): - if not np.array_equal(x, self.x): - self._update_x_impl(x) - def _update_fun(self): if not self.f_updated: - self._update_fun_impl() + self.f = self.fun_wrapped(xp_copy(self.x)) + self._nfev += 1 self.f_updated = True def _update_jac(self): if not self.J_updated: - self._update_jac_impl() + if self._orig_jac in FD_METHODS: + # need to update fun to get f0 + self._update_fun() + else: + self._njev += 1 + + self.J = self.jac_wrapped(xp_copy(self.x), f0=self.f) self.J_updated = True def _update_hess(self): if not self.H_updated: - self._update_hess_impl() + if callable(self._orig_hess): + self.H = self.hess_wrapped(xp_copy(self.x), self.v) + self._nhev += 1 + elif self._orig_hess in FD_METHODS: + self._update_jac() + self.H = self.hess_wrapped(xp_copy(self.x), self.v, J0=self.J) + elif isinstance(self._orig_hess, HessianUpdateStrategy): + self._update_jac() + # When v is updated before x was updated, then x_prev and + # J_prev are None and we need this check. + if self.x_prev is not None and self.J_prev is not None: + delta_x = self.x - self.x_prev + delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v) + self.H.update(delta_x, delta_g) + self.H_updated = True def fun(self, x): self._update_x(x) self._update_fun() - return self.f + # returns a copy so that downstream can't overwrite the + # internal attribute + return xp_copy(self.f) def jac(self, x): self._update_x(x) self._update_jac() + if hasattr(self.J, "astype"): + # returns a copy so that downstream can't overwrite the + # internal attribute. But one can't copy a LinearOperator + return self.J.astype(self.J.dtype) return self.J def hess(self, x, v): @@ -689,6 +750,10 @@ def hess(self, x, v): self._update_v(v) self._update_x(x) self._update_hess() + if hasattr(self.H, "astype"): + # returns a copy so that downstream can't overwrite the + # internal attribute. But one can't copy non-arrays + return self.H.astype(self.H.dtype) return self.H diff --git a/scipy/optimize/_lbfgsb_py.py b/scipy/optimize/_lbfgsb_py.py index 1883b31dbead..2d1f608f8357 100644 --- a/scipy/optimize/_lbfgsb_py.py +++ b/scipy/optimize/_lbfgsb_py.py @@ -323,11 +323,11 @@ def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None, If `jac is None` the absolute step size used for numerical approximation of the jacobian via forward differences. maxfun : int - Maximum number of function evaluations. Note that this function - may violate the limit because of evaluating gradients by numerical - differentiation. + Maximum number of function evaluations before minimization terminates. + Note that this function may violate the limit if the gradients + are evaluated by numerical differentiation. maxiter : int - Maximum number of iterations. + Maximum number of algorithm iterations. iprint : int, optional Deprecated option that previously controlled the text printed on the screen during the problem solution. Now the code does not emit any @@ -359,6 +359,11 @@ def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None, relationship between the two is ``ftol = factr * numpy.finfo(float).eps``. I.e., `factr` multiplies the default machine floating-point precision to arrive at `ftol`. + If the minimization is slow to converge the optimizer may halt if the + total number of function evaluations exceeds `maxfun`, or the number of + algorithm iterations has reached `maxiter` (whichever comes first). If + this is the case then ``result.success=False``, and an appropriate + error message is contained in ``result.message``. """ _check_unknown_options(unknown_options) diff --git a/scipy/optimize/_lsq/dogbox.py b/scipy/optimize/_lsq/dogbox.py index b986929626f2..7694c75d491b 100644 --- a/scipy/optimize/_lsq/dogbox.py +++ b/scipy/optimize/_lsq/dogbox.py @@ -308,7 +308,7 @@ def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, cost = cost_new - J = jac(x, f) + J = jac(x) njev += 1 if loss_function is not None: diff --git a/scipy/optimize/_lsq/least_squares.py b/scipy/optimize/_lsq/least_squares.py index abd2aeca4073..0e32307b68f9 100644 --- a/scipy/optimize/_lsq/least_squares.py +++ b/scipy/optimize/_lsq/least_squares.py @@ -6,9 +6,11 @@ from scipy.sparse.linalg import LinearOperator from scipy.optimize import _minpack, OptimizeResult -from scipy.optimize._numdiff import approx_derivative, group_columns +from scipy.optimize._differentiable_functions import VectorFunction +from scipy.optimize._numdiff import group_columns from scipy.optimize._minimize import Bounds from scipy._lib._sparse import issparse +from scipy._lib._array_api import array_namespace from scipy._lib._util import _workers_wrapper from .trf import trf @@ -41,14 +43,9 @@ } -def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step): +def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, jac_method=None): n = x0.size - if diff_step is None: - epsfcn = EPS - else: - epsfcn = diff_step**2 - # Compute MINPACK's `diag`, which is inverse of our `x_scale` and # ``x_scale='jac'`` corresponds to ``diag=None``. if isinstance(x_scale, str) and x_scale == 'jac': @@ -60,33 +57,36 @@ def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step): col_deriv = False factor = 100.0 - if jac is None: - if max_nfev is None: - # n squared to account for Jacobian evaluations. - max_nfev = 100 * n * (n + 1) - x, info, status = _minpack._lmdif( - fun, x0, (), full_output, ftol, xtol, gtol, - max_nfev, epsfcn, factor, diag) - else: - if max_nfev is None: - max_nfev = 100 * n - x, info, status = _minpack._lmder( - fun, jac, x0, (), full_output, col_deriv, - ftol, xtol, gtol, max_nfev, factor, diag) + if max_nfev is None: + max_nfev = 100 * n - f = info['fvec'] + # lmder is typically used for systems with analytic jacobians, with lmdif being + # used if there is only an objective fun (lmdif uses finite differences to estimate + # jacobian). Otherwise they're very similar internally. + # We now do all the finite differencing in VectorFunction, which means we can drop + # lmdif and just use lmder. - if callable(jac): - J = jac(x) - else: - J = np.atleast_2d(approx_derivative(fun, x)) + # for sending a copy of x0 into _lmder + xp = array_namespace(x0) + + x, info, status = _minpack._lmder( + fun, jac, xp.astype(x0, x0.dtype), (), full_output, col_deriv, + ftol, xtol, gtol, max_nfev, factor, diag) + + f = info['fvec'] + J = jac(x) cost = 0.5 * np.dot(f, f) g = J.T.dot(f) g_norm = norm(g, ord=np.inf) nfev = info['nfev'] - njev = info.get('njev', None) + if callable(jac_method): + # user supplied a callable ("analytic") jac + njev = info.get('njev', None) + else: + # If there are no analytic jacobian evaluations we need to set `njev=None`. + njev = None status = FROM_MINPACK_TO_COMMON[status] active_mask = np.zeros_like(x0, dtype=int) @@ -242,6 +242,17 @@ def loss_function(f, cost_only=False): return loss_function +class _WrapArgsKwargs: + # Supplies a user function with args and kwargs. + def __init__(self, f, args=(), kwargs=None): + self.f = f + self.args = args + self.kwargs = kwargs or {} + + def __call__(self, x): + return self.f(x, *self.args, **self.kwargs) + + @_workers_wrapper def least_squares( fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf', @@ -286,12 +297,16 @@ def least_squares( twice as many operations as '2-point' (default). The scheme 'cs' uses complex steps, and while potentially the most accurate, it is applicable only when `fun` correctly handles complex inputs and - can be analytically continued to the complex plane. Method 'lm' - always uses the '2-point' scheme. If callable, it is used as + can be analytically continued to the complex plane. If callable, it is used as ``jac(x, *args, **kwargs)`` and should return a good approximation (or the exact value) for the Jacobian as an array_like (np.atleast_2d is applied), a sparse array (csr_array preferred for performance) or a `scipy.sparse.linalg.LinearOperator`. + + .. versionchanged:: 1.16.0 + An ability to use the '3-point', 'cs' keywords with the 'lm' method. + Previously 'lm' was limited to '2-point' and callable. + bounds : 2-tuple of array_like or `Bounds`, optional There are two ways to specify bounds: @@ -390,13 +405,16 @@ def least_squares( no effect with ``loss='linear'``, but for other `loss` values it is of crucial importance. max_nfev : None or int, optional - Maximum number of function evaluations before the termination. - If None (default), the value is chosen automatically: + For all methods this parameter controls the maximum number of function + evaluations used by each method, separate to those used in numerical + approximation of the jacobian. + If None (default), the value is chosen automatically as 100 * n. - * For 'trf' and 'dogbox' : 100 * n. - * For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1) - otherwise (because 'lm' counts function calls in Jacobian - estimation). + .. versionchanged:: 1.16.0 + The default for the 'lm' method is changed to 100 * n, for both a callable + and a numerically estimated jacobian. Previously the default when using an + estimated jacobian was 100 * n * (n + 1), because the method included + evaluations used in the estimation. diff_step : None or array_like, optional Determines the relative step size for the finite difference @@ -511,9 +529,14 @@ def least_squares( sequence of strictly feasible iterates and `active_mask` is determined within a tolerance threshold. nfev : int - Number of function evaluations done. Methods 'trf' and 'dogbox' do - not count function calls for numerical Jacobian approximation, as - opposed to 'lm' method. + Number of function evaluations done. This number does not include + the function calls used for numerical Jacobian approximation. + + .. versionchanged:: 1.16.0 + For the 'lm' method the number of function calls used in numerical + Jacobian approximation is no longer included. This is to bring all + methods into line. + njev : int or None Number of Jacobian evaluations done. If numerical Jacobian approximation is used in 'lm' method, it is set to None. @@ -541,8 +564,8 @@ def least_squares( Notes ----- - Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares - algorithms implemented in MINPACK (lmder, lmdif). It runs the + Method 'lm' (Levenberg-Marquardt) calls a wrapper over a least-squares + algorithm implemented in MINPACK (lmder). It runs the Levenberg-Marquardt algorithm formulated as a trust-region type algorithm. The implementation is based on paper [JJMore]_, it is very robust and efficient with a lot of smart tricks. It should be your first choice @@ -861,15 +884,36 @@ def least_squares( if method == 'trf': x0 = make_strictly_feasible(x0, lb, ub) - if kwargs is None: - kwargs = {} if tr_options is None: tr_options = {} - def fun_wrapped(x): - return np.atleast_1d(fun(x, *args, **kwargs)) - - f0 = fun_wrapped(x0) + ########################################################################### + # assemble VectorFunction + ########################################################################### + # first wrap the args/kwargs + fun_wrapped = _WrapArgsKwargs(fun, args=args, kwargs=kwargs) + jac_wrapped = jac + if callable(jac): + jac_wrapped = _WrapArgsKwargs(jac, args=args, kwargs=kwargs) + + def _dummy_hess(x, *args): + # we don't care about Hessian evaluations + return x + + vector_fun = VectorFunction( + fun_wrapped, + x0, + jac_wrapped, + _dummy_hess, + finite_diff_rel_step=diff_step, + finite_diff_jac_sparsity=jac_sparsity, + finite_diff_bounds=bounds, + workers=workers + ) + ########################################################################### + + f0 = vector_fun.fun(x0) + J0 = vector_fun.jac(x0) if f0.ndim != 1: raise ValueError("`fun` must return at most 1-d array_like. " @@ -897,82 +941,46 @@ def fun_wrapped(x): else: initial_cost = 0.5 * np.dot(f0, f0) - if callable(jac): - J0 = jac(x0, *args, **kwargs) - - if issparse(J0): - J0 = J0.tocsr() - - def jac_wrapped(x, _=None): - return jac(x, *args, **kwargs).tocsr() - - elif isinstance(J0, LinearOperator): - def jac_wrapped(x, _=None): - return jac(x, *args, **kwargs) - - else: - J0 = np.atleast_2d(J0) - - def jac_wrapped(x, _=None): - return np.atleast_2d(jac(x, *args, **kwargs)) - - else: # Estimate Jacobian by finite differences. + if not callable(jac): + # Estimate Jacobian by finite differences. if method == 'lm': if jac_sparsity is not None: raise ValueError("method='lm' does not support " "`jac_sparsity`.") - - if jac != '2-point': - warn(f"jac='{jac}' works equivalently to '2-point' for method='lm'.", - stacklevel=2) - - J0 = jac_wrapped = None else: + # this will raise a ValueError if the jac_sparsity isn't correct + _ = check_jac_sparsity(jac_sparsity, m, n) + if jac_sparsity is not None and tr_solver == 'exact': raise ValueError("tr_solver='exact' is incompatible " "with `jac_sparsity`.") - jac_sparsity = check_jac_sparsity(jac_sparsity, m, n) + if J0.shape != (m, n): + raise ValueError( + f"The return value of `jac` has wrong shape: expected {(m, n)}, " + f"actual {J0.shape}." + ) - def jac_wrapped(x, f): - J = approx_derivative(fun, x, rel_step=diff_step, method=jac, - f0=f, bounds=bounds, args=args, - kwargs=kwargs, sparsity=jac_sparsity, - workers=workers) - if J.ndim != 2: # J is guaranteed not sparse. - J = np.atleast_2d(J) + if not isinstance(J0, np.ndarray): + if method == 'lm': + raise ValueError("method='lm' works only with dense " + "Jacobian matrices.") - return J + if tr_solver == 'exact': + raise ValueError( + "tr_solver='exact' works only with dense " + "Jacobian matrices.") - J0 = jac_wrapped(x0, f0) + jac_scale = isinstance(x_scale, str) and x_scale == 'jac' + if isinstance(J0, LinearOperator) and jac_scale: + raise ValueError("x_scale='jac' can't be used when `jac` " + "returns LinearOperator.") - if J0 is not None: - if J0.shape != (m, n): - raise ValueError( - f"The return value of `jac` has wrong shape: expected {(m, n)}, " - f"actual {J0.shape}." - ) - - if not isinstance(J0, np.ndarray): - if method == 'lm': - raise ValueError("method='lm' works only with dense " - "Jacobian matrices.") - - if tr_solver == 'exact': - raise ValueError( - "tr_solver='exact' works only with dense " - "Jacobian matrices.") - - jac_scale = isinstance(x_scale, str) and x_scale == 'jac' - if isinstance(J0, LinearOperator) and jac_scale: - raise ValueError("x_scale='jac' can't be used when `jac` " - "returns LinearOperator.") - - if tr_solver is None: - if isinstance(J0, np.ndarray): - tr_solver = 'exact' - else: - tr_solver = 'lsmr' + if tr_solver is None: + if isinstance(J0, np.ndarray): + tr_solver = 'exact' + else: + tr_solver = 'lsmr' # Wrap callback function. If callback is None, callback_wrapped also is None callback_wrapped = _wrap_callback(callback) @@ -981,11 +989,11 @@ def jac_wrapped(x, f): if callback is not None: warn("Callback function specified, but not supported with `lm` method.", stacklevel=2) - result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol, - max_nfev, x_scale, diff_step) + result = call_minpack(vector_fun.fun, x0, vector_fun.jac, ftol, xtol, gtol, + max_nfev, x_scale, jac_method=jac) elif method == 'trf': - result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol, + result = trf(vector_fun.fun, vector_fun.jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, loss_function, tr_solver, tr_options.copy(), verbose, callback=callback_wrapped) @@ -997,7 +1005,7 @@ def jac_wrapped(x, f): tr_options = tr_options.copy() del tr_options['regularize'] - result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, + result = dogbox(vector_fun.fun, vector_fun.jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, loss_function, tr_solver, tr_options, verbose, callback=callback_wrapped) diff --git a/scipy/optimize/_lsq/trf.py b/scipy/optimize/_lsq/trf.py index c72fbfae00f0..f17ec17a68fa 100644 --- a/scipy/optimize/_lsq/trf.py +++ b/scipy/optimize/_lsq/trf.py @@ -373,7 +373,7 @@ def trf_bounds(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, cost = cost_new - J = jac(x, f) + J = jac(x) njev += 1 if loss_function is not None: @@ -548,7 +548,7 @@ def trf_no_bounds(fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev, cost = cost_new - J = jac(x, f) + J = jac(x) njev += 1 if loss_function is not None: diff --git a/scipy/optimize/_nnls.py b/scipy/optimize/_nnls.py index ecd12ee2ede5..8b3c79a9fce9 100644 --- a/scipy/optimize/_nnls.py +++ b/scipy/optimize/_nnls.py @@ -1,5 +1,5 @@ import numpy as np -from ._cython_nnls import _nnls +from ._slsqplib import nnls as _nnls from scipy._lib.deprecation import _deprecate_positional_args, _NoValue @@ -72,11 +72,13 @@ def nnls(A, b, *, maxiter=None, atol=_NoValue): b = np.asarray_chkfinite(b, dtype=np.float64) if len(A.shape) != 2: - raise ValueError("Expected a two-dimensional array (matrix)" + - f", but the shape of A is {A.shape}") - if len(b.shape) != 1: - raise ValueError("Expected a one-dimensional array (vector)" + - f", but the shape of b is {b.shape}") + raise ValueError(f"Expected a 2D array, but the shape of A is {A.shape}") + + if (b.ndim > 2) or ((b.ndim == 2) and (b.shape[1] != 1)): + raise ValueError("Expected a 1D array,(or 2D with one column), but the," + f" shape of b is {b.shape}") + elif (b.ndim == 2) and (b.shape[1] == 1): + b = b.ravel() m, n = A.shape @@ -88,7 +90,7 @@ def nnls(A, b, *, maxiter=None, atol=_NoValue): if not maxiter: maxiter = 3*n x, rnorm, info = _nnls(A, b, maxiter) - if info == -1: + if info == 3: raise RuntimeError("Maximum number of iterations reached.") return x, rnorm diff --git a/scipy/optimize/_optimize.py b/scipy/optimize/_optimize.py index cc99dac630a5..dffffcd73be4 100644 --- a/scipy/optimize/_optimize.py +++ b/scipy/optimize/_optimize.py @@ -41,7 +41,7 @@ from scipy._lib._util import (MapWrapper, check_random_state, _RichResult, _call_callback_maybe_halt, _transition_to_rng) from scipy.optimize._differentiable_functions import ScalarFunction, FD_METHODS -from scipy._lib._array_api import array_namespace, xp_capabilities +from scipy._lib._array_api import array_namespace, xp_capabilities, xp_promote from scipy._lib import array_api_extra as xpx @@ -382,9 +382,7 @@ def rosen(x): >>> plt.show() """ xp = array_namespace(x) - x = xp.asarray(x) - if xp.isdtype(x.dtype, 'integral'): - x = xp.astype(x, xp.asarray(1.).dtype) + x = xp_promote(x, force_floating=True, xp=xp) r = xp.sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0, axis=0, dtype=x.dtype) return r @@ -419,9 +417,7 @@ def rosen_der(x): """ xp = array_namespace(x) - x = xp.asarray(x) - if xp.isdtype(x.dtype, 'integral'): - x = xp.astype(x, xp.asarray(1.).dtype) + x = xp_promote(x, force_floating=True, xp=xp) xm = x[1:-1] xm_m1 = x[:-2] xm_p1 = x[2:] @@ -465,9 +461,8 @@ def rosen_hess(x): """ xp = array_namespace(x) - x = xpx.atleast_nd(x, ndim=1, xp=xp) - if xp.isdtype(x.dtype, 'integral'): - x = xp.astype(x, xp.asarray(1.).dtype) + x = xp_promote(x, force_floating=True, xp=xp) + H = (xpx.create_diagonal(-400 * x[:-1], offset=1, xp=xp) - xpx.create_diagonal(400 * x[:-1], offset=-1, xp=xp)) diagonal = xp.zeros(x.shape[0], dtype=x.dtype) @@ -510,9 +505,8 @@ def rosen_hess_prod(x, p): """ xp = array_namespace(x, p) + x = xp_promote(x, force_floating=True, xp=xp) x = xpx.atleast_nd(x, ndim=1, xp=xp) - if xp.isdtype(x.dtype, 'integral'): - x = xp.astype(x, xp.asarray(1.).dtype) p = xp.asarray(p, dtype=x.dtype) Hp = xp.zeros(x.shape[0], dtype=x.dtype) Hp[0] = (1200 * x[0]**2 - 400 * x[1] + 2) * p[0] - 400 * x[0] * p[1] diff --git a/scipy/optimize/_slsqp_py.py b/scipy/optimize/_slsqp_py.py index 5c92a76d71f6..42e9fa3fee2e 100644 --- a/scipy/optimize/_slsqp_py.py +++ b/scipy/optimize/_slsqp_py.py @@ -16,9 +16,8 @@ __all__ = ['approx_jacobian', 'fmin_slsqp'] import numpy as np -from scipy.optimize._slsqp import slsqp -from numpy import (zeros, array, linalg, append, concatenate, finfo, - sqrt, vstack, isfinite, atleast_1d) +from ._slsqplib import slsqp +from scipy.linalg import norm as lanorm from ._optimize import (OptimizeResult, _check_unknown_options, _prepare_scalar_function, _clip_x_for_func, _check_clip_x) @@ -26,11 +25,11 @@ from ._constraints import old_bound_to_new, _arr_to_scalar from scipy._lib._array_api import array_namespace from scipy._lib import array_api_extra as xpx - +from numpy.typing import NDArray __docformat__ = "restructuredtext en" -_epsilon = sqrt(finfo(float).eps) +_epsilon = np.sqrt(np.finfo(np.float64).eps) def approx_jacobian(x, func, epsilon, *args): @@ -225,7 +224,11 @@ def _minimize_slsqp(func, x0, args=(), jac=None, bounds=None, Options ------- ftol : float - Precision goal for the value of f in the stopping criterion. + Precision target for the value of f in the stopping criterion. This value + controls the final accuracy for checking various optimality conditions; + gradient of the lagrangian and absolute sum of the constraint violations + should be lower than ``ftol``. Similarly, if computed step size and the + objective function chage are checked against this value. Default is 1e-6. eps : float Step size used for numerical approximation of the Jacobian. disp : bool @@ -249,7 +252,6 @@ def _minimize_slsqp(func, x0, args=(), jac=None, bounds=None, """ _check_unknown_options(unknown_options) - iter = maxiter - 1 acc = ftol epsilon = eps @@ -338,26 +340,15 @@ def cjac(x, *args): # Set the parameters that SLSQP will need # meq, mieq: number of equality and inequality constraints - meq = sum(map(len, [atleast_1d(c['fun'](x, *c['args'])) + meq = sum(map(len, [np.atleast_1d(c['fun'](x, *c['args'])) for c in cons['eq']])) - mieq = sum(map(len, [atleast_1d(c['fun'](x, *c['args'])) + mieq = sum(map(len, [np.atleast_1d(c['fun'](x, *c['args'])) for c in cons['ineq']])) # m = The total number of constraints m = meq + mieq - # la = The number of constraints, or 1 if there are no constraints - la = array([1, m]).max() # n = The number of independent variables n = len(x) - # Define the workspaces for SLSQP - n1 = n + 1 - mineq = m - meq + n1 + n1 - len_w = (3*n1+m)*(n1+1)+(n1-meq+1)*(mineq+2) + 2*mineq+(n1+mineq)*(n1-meq) \ - + 2*meq + n1 + ((n+1)*n)//2 + 2*m + 3*n + 3*n1 + 1 - len_jw = mineq - w = zeros(len_w) - jw = zeros(len_jw) - # Decompose bounds into xl and xu if bounds is None or len(bounds) == 0: xl = np.empty(n, dtype=float) @@ -365,8 +356,8 @@ def cjac(x, *args): xl.fill(np.nan) xu.fill(np.nan) else: - bnds = array([(_arr_to_scalar(l), _arr_to_scalar(u)) - for (l, u) in bounds], float) + bnds = np.array([(_arr_to_scalar(lo), _arr_to_scalar(up)) + for (lo, up) in bounds], float) if bnds.shape[0] != n: raise IndexError('SLSQP Error: the length of bounds is not ' 'compatible with that of x0.') @@ -377,10 +368,10 @@ def cjac(x, *args): if bnderr.any(): raise ValueError("SLSQP Error: lb > ub in bounds " f"{', '.join(str(b) for b in bnderr)}.") - xl, xu = bnds[:, 0], bnds[:, 1] + xl, xu = bnds[:, 0].copy(), bnds[:, 1].copy() - # Mark infinite bounds with nans; the Fortran code understands this - infbnd = ~isfinite(bnds) + # Mark infinite bounds with nans; the C code expects this + infbnd = ~np.isfinite(bnds) xl[infbnd[:, 0]] = np.nan xu[infbnd[:, 1]] = np.nan @@ -393,125 +384,180 @@ def cjac(x, *args): wrapped_fun = _clip_x_for_func(sf.fun, new_bounds) wrapped_grad = _clip_x_for_func(sf.grad, new_bounds) - # Initialize the iteration counter and the mode value - mode = array(0, int) - acc = array(acc, float) - majiter = array(iter, int) - majiter_prev = 0 - - # Initialize internal SLSQP state variables - alpha = array(0, float) - f0 = array(0, float) - gs = array(0, float) - h1 = array(0, float) - h2 = array(0, float) - h3 = array(0, float) - h4 = array(0, float) - t = array(0, float) - t0 = array(0, float) - tol = array(0, float) - iexact = array(0, int) - incons = array(0, int) - ireset = array(0, int) - itermx = array(0, int) - line = array(0, int) - n1 = array(0, int) - n2 = array(0, int) - n3 = array(0, int) + # Initialize internal SLSQP state variables dictionary + # This dictionary is passed to the SLSQP matching the C struct defined as + # + # struct SLSQP_static_vars { + # double acc, alpha, f0, gs, h1, h2, h3, h4, t, t0, tol; + # int exact, inconsistent, reset, iter, itermax, line, mode, meq; + # }; + # + # exact : a dummy variable and should be kept 0 since the underlying code + # always uses an inexact search. + # inconsistent: a boolean set to 1 if the linearized QP is not well-defined + # while the original nonlinear problem is still solvable. Then + # the problem is augmented with a regularizing dummy variable. + # reset: holds the count of resetting bfgs to identity matrix. + # iter : the current and itermax is the maximum number of iterations. + # line : the current line search iteration. + # mode : the exit mode of the solver. + # alpha, f0, gs, h1, h2, h3, h4, t, t0 : internal variables used by the solver. + # + # The dict holds the intermediate state of the solver. The keys are the same + # as the C struct members and will be modified in-place. + state_dict = { + "acc": acc, + "alpha": 0.0, + "f0": 0.0, + "gs": 0.0, + "h1": 0.0, + "h2": 0.0, + "h3": 0.0, + "h4": 0.0, + "t": 0.0, + "t0": 0.0, + "tol": 10.0*acc, + "exact": 0, + "inconsistent": 0, + "reset": 0, + "iter": 0, + "itermax": maxiter, + "line": 0, + "m": m, + "meq": meq, + "mode": 0, + "n": n + } # Print the header if iprint >= 2 if iprint >= 2: print(f"{'NIT':>5} {'FC':>5} {'OBJFUN':>16} {'GNORM':>16}") + # Internal buffer and int array + indices = np.zeros([max(m + 2*n + 2, 1)], dtype=np.int32) + + # The worst case workspace requirements for the buffer are: + + # n*(n+1)//2 + m + 4*n + 3 # SLSQP + # (n+1)*(n+2) + (n+1)*meq + m + (mineq + 2*n + 2)*(n+1) + 3*n + 3 # LSQ + # mineq + 2n + 2 + 2*meq + (n+1) + (mineq + 3n + 3)*(n + 1 - meq) # LSEI + # (mineq + 2n + 2 + 2)*(n + 2) + mineq + 2n + 2 # LDP + # mineq + 2n + 2 # NNLS + + # If we sum all up and simplify by the help of sympy we get the following + buffer_size = ( + n*(n+1)//2 + 3*m*n - (m + 5*n + 7)*meq + 9*m + 8*n*n + 35*n + meq*meq + 28 + ) + # If no inequality constraints are given, top up workspace for the missing + # terms. + if mieq == 0: + buffer_size += 2*n*(n + 1) + buffer = np.zeros(max(buffer_size, 1), dtype=np.float64) + # mode is zero on entry, so call objective, constraints and gradients # there should be no func evaluations here because it's cached from # ScalarFunction fx = wrapped_fun(x) - g = append(wrapped_grad(x), 0.0) - c = _eval_constraint(x, cons) - a = _eval_con_normals(x, cons, la, n, m, meq, mieq) + g = wrapped_grad(x) + + # Allocate the multiplier array both for constraints and user specified + # bounds (extra +2 is for a possible augmented problem). + mult = np.zeros([max(1, m + 2*n + 2)], dtype=np.float64) + + # Allocate the constraints and normals once and repopulate as needed + C = np.zeros([max(1, m), n], dtype=np.float64, order='F') + d = np.zeros([max(1, m)], dtype=np.float64) + _eval_con_normals(C, x, cons, m, meq) + _eval_constraint(d, x, cons, m, meq) - while 1: + iter_prev = 0 + + while True: # Call SLSQP - slsqp(m, meq, x, xl, xu, fx, c, g, a, acc, majiter, mode, w, jw, - alpha, f0, gs, h1, h2, h3, h4, t, t0, tol, - iexact, incons, ireset, itermx, line, - n1, n2, n3) + slsqp(state_dict, fx, g, C, d, x, mult, xl, xu, buffer, indices) - if mode == 1: # objective and constraint evaluation required - fx = wrapped_fun(x) - c = _eval_constraint(x, cons) + if state_dict['mode'] == 1: # objective and constraint evaluation required + fx = sf.fun(x) + _eval_constraint(d, x, cons, m, meq) - if mode == -1: # gradient evaluation required - g = append(wrapped_grad(x), 0.0) - a = _eval_con_normals(x, cons, la, n, m, meq, mieq) + if state_dict['mode'] == -1: # gradient evaluation required + g = sf.grad(x) + _eval_con_normals(C, x, cons, m, meq) - if majiter > majiter_prev: + if state_dict['iter'] > iter_prev: # call callback if major iteration has incremented if callback is not None: callback(np.copy(x)) # Print the status of the current iterate if iprint > 2 if iprint >= 2: - print(f"{majiter:5d} {sf.nfev:5d} {fx:16.6E} {linalg.norm(g):16.6E}") + print(f"{state_dict['iter']:5d} {sf.nfev:5d} " + f"{fx:16.6E} {lanorm(g):16.6E}") # If exit mode is not -1 or 1, slsqp has completed - if abs(mode) != 1: + if abs(state_dict['mode']) != 1: break - majiter_prev = int(majiter) + iter_prev = state_dict['iter'] # Optimization loop complete. Print status if requested if iprint >= 1: - print(exit_modes[int(mode)] + " (Exit mode " + str(mode) + ')') + print( + exit_modes[state_dict['mode']] + f" (Exit mode {state_dict['mode']})" + ) print(" Current function value:", fx) - print(" Iterations:", majiter) + print(" Iterations:", state_dict['iter']) print(" Function evaluations:", sf.nfev) print(" Gradient evaluations:", sf.ngev) - return OptimizeResult(x=x, fun=fx, jac=g[:-1], nit=int(majiter), - nfev=sf.nfev, njev=sf.ngev, status=int(mode), - message=exit_modes[int(mode)], success=(mode == 0)) - - -def _eval_constraint(x, cons): - # Compute constraints - if cons['eq']: - c_eq = concatenate([atleast_1d(con['fun'](x, *con['args'])) - for con in cons['eq']]) - else: - c_eq = zeros(0) - - if cons['ineq']: - c_ieq = concatenate([atleast_1d(con['fun'](x, *con['args'])) - for con in cons['ineq']]) - else: - c_ieq = zeros(0) - - # Now combine c_eq and c_ieq into a single matrix - c = concatenate((c_eq, c_ieq)) - return c - - -def _eval_con_normals(x, cons, la, n, m, meq, mieq): - # Compute the normals of the constraints - if cons['eq']: - a_eq = vstack([con['jac'](x, *con['args']) - for con in cons['eq']]) - else: # no equality constraint - a_eq = zeros((meq, n)) - - if cons['ineq']: - a_ieq = vstack([con['jac'](x, *con['args']) - for con in cons['ineq']]) - else: # no inequality constraint - a_ieq = zeros((mieq, n)) - - # Now combine a_eq and a_ieq into a single a matrix - if m == 0: # no constraints - a = zeros((la, n)) - else: - a = vstack((a_eq, a_ieq)) - a = concatenate((a, zeros([la, 1])), 1) - - return a + return OptimizeResult( + x=x, fun=fx, jac=g, nit=state_dict['iter'], nfev=sf.nfev, njev=sf.ngev, + status=state_dict['mode'], message=exit_modes[state_dict['mode']], + success=(state_dict['mode'] == 0), multipliers=mult[:m] + ) + +# The following functions modify their first input argument in-place. +def _eval_constraint(d: NDArray, x: NDArray, cons: dict, m: int, meq: int): + if m == 0: + return + + # The reason why we don't use regular increments with a sane for loop is that + # the constraint evaluations do not necessarily return scalars. Their + # output length needs to be taken into account while placing them in d. + + if meq > 0: + row = 0 + for con in cons['eq']: + temp = np.atleast_1d(con['fun'](x, *con['args'])).ravel() + d[row:row + len(temp)] = temp + row += len(temp) + + if m > meq: + row = meq + for con in cons['ineq']: + temp = np.atleast_1d(con['fun'](x, *con['args'])).ravel() + d[row:row + len(temp)] = temp + row += len(temp) + + return + + +def _eval_con_normals(C: NDArray, x: NDArray, cons: dict, m: int, meq: int): + if m == 0: + return + + if meq > 0: + row = 0 + for con in cons['eq']: + temp = np.atleast_2d(con['jac'](x, *con['args'])) + C[row:row + temp.shape[0], :] = temp + row += temp.shape[0] + + if m > meq: + row = meq + for con in cons['ineq']: + temp = np.atleast_2d(con['jac'](x, *con['args'])) + C[row:row + temp.shape[0], :] = temp + row += temp.shape[0] + + return diff --git a/scipy/optimize/meson.build b/scipy/optimize/meson.build index 375f4c3aef79..d32cfe3f5ab2 100644 --- a/scipy/optimize/meson.build +++ b/scipy/optimize/meson.build @@ -62,6 +62,8 @@ py3.extension_module('_zeros', subdir: 'scipy/optimize' ) + +# TODO: link to ILP64 LAPACK py3.extension_module('_lbfgsb', [ '__lbfgsb.h', @@ -85,16 +87,6 @@ py3.extension_module('_moduleTNC', subdir: 'scipy/optimize' ) -py3.extension_module('_slsqp', - [f2py_gen.process('slsqp/slsqp.pyf'), 'slsqp/slsqp_optmz.f'], - fortran_args: fortran_ignore_warnings, - link_args: version_link_args, - dependencies: [fortranobject_dep], - install: true, - link_language: 'fortran', - subdir: 'scipy/optimize' -) - py3.extension_module('_pava_pybind', ['_pava/pava_pybind.cpp'], include_directories: '_pava', @@ -145,10 +137,14 @@ py3.extension_module('_bglu_dense', subdir: 'scipy/optimize' ) -py3.extension_module('_cython_nnls', - opt_gen.process('_cython_nnls.pyx'), - c_args: cython_c_args, - dependencies: np_dep, +py3.extension_module('_slsqplib', + [ + '__slsqp.h', + '__slsqp.c', + '__nnls.h', + '__nnls.c' + ], + dependencies: [lapack_dep, blas_dep, np_dep], link_args: version_link_args, install: true, subdir: 'scipy/optimize' diff --git a/scipy/optimize/slsqp.py b/scipy/optimize/slsqp.py index c2b77d2eb447..2b79d93a55d3 100644 --- a/scipy/optimize/slsqp.py +++ b/scipy/optimize/slsqp.py @@ -9,7 +9,6 @@ 'OptimizeResult', 'fmin_slsqp', 'slsqp', - 'zeros', ] diff --git a/scipy/optimize/slsqp/slsqp.pyf b/scipy/optimize/slsqp/slsqp.pyf deleted file mode 100644 index 5799a4805a04..000000000000 --- a/scipy/optimize/slsqp/slsqp.pyf +++ /dev/null @@ -1,48 +0,0 @@ -! -*- f90 -*- -! Note: the context of this file is case sensitive. - -python module _slsqp ! in - interface ! in :slsqp - subroutine slsqp(m,meq,la,n,x,xl,xu,f,c,g,a,acc,iter,mode,w,l_w,jw,l_jw,alpha,f0,gs,h1,h2,h3,h4,t,t0,tol,iexact,incons,ireset,itermx,line,n1,n2,n3) ! in :slsqp:slsqp_optmz.f - integer :: m - integer :: meq - integer optional,check(len(c)>=la),depend(c) :: la=len(c) - integer optional,check(len(x)>=n),depend(x) :: n=len(x) - double precision dimension(n), intent(inout) :: x - double precision dimension(n),depend(n) :: xl - double precision dimension(n),depend(n) :: xu - double precision :: f - double precision dimension(la) :: c - double precision dimension(n + 1),depend(n) :: g - double precision dimension(la,n + 1),depend(la,n) :: a - double precision, intent(inout) :: acc - integer, intent(inout) :: iter - integer, intent(inout) :: mode - double precision dimension(l_w) :: w - integer optional,check(len(w)>=l_w),depend(w) :: l_w=len(w) - integer dimension(l_jw) :: jw - integer optional,check(len(jw)>=l_jw),depend(jw) :: l_jw=len(jw) - double precision, intent(inout) :: alpha - double precision, intent(inout) :: f0 - double precision, intent(inout) :: gs - double precision, intent(inout) :: h1 - double precision, intent(inout) :: h2 - double precision, intent(inout) :: h3 - double precision, intent(inout) :: h4 - double precision, intent(inout) :: t - double precision, intent(inout) :: t0 - double precision, intent(inout) :: tol - integer, intent(inout) :: iexact - integer, intent(inout) :: incons - integer, intent(inout) :: ireset - integer, intent(inout) :: itermx - integer, intent(inout) :: line - integer, intent(inout) :: n1 - integer, intent(inout) :: n2 - integer, intent(inout) :: n3 - end subroutine slsqp - end interface -end python module slsqp - -! This file was auto-generated with f2py (version:2). -! See http://cens.ioc.ee/projects/f2py2e/ diff --git a/scipy/optimize/slsqp/slsqp_optmz.f b/scipy/optimize/slsqp/slsqp_optmz.f deleted file mode 100644 index b2e00a693e3b..000000000000 --- a/scipy/optimize/slsqp/slsqp_optmz.f +++ /dev/null @@ -1,2196 +0,0 @@ -C -C ALGORITHM 733, COLLECTED ALGORITHMS FROM ACM. -C TRANSACTIONS ON MATHEMATICAL SOFTWARE, -C VOL. 20, NO. 3, SEPTEMBER, 1994, PP. 262-281. -C https://doi.org/10.1145/192115.192124 -C -C -C https://web.archive.org/web/20170106155705/http://permalink.gmane.org/gmane.comp.python.scientific.devel/6725 -C ------ -C From: Deborah Cotton -C Date: Fri, 14 Sep 2007 12:35:55 -0500 -C Subject: RE: Algorithm License requested -C To: Alan Isaac -C -C Prof. Issac, -C -C In that case, then because the author consents to [the ACM] releasing -C the code currently archived at http://www.netlib.org/toms/733 under the -C BSD license, the ACM hereby releases this code under the BSD license. -C -C Regards, -C -C Deborah Cotton, Copyright & Permissions -C ACM Publications -C 2 Penn Plaza, Suite 701** -C New York, NY 10121-0701 -C permissions@acm.org -C 212.869.7440 ext. 652 -C Fax. 212.869.0481 -C ------ -C - -************************************************************************ -* optimizer * -************************************************************************ - - SUBROUTINE slsqp (m, meq, la, n, x, xl, xu, f, c, g, a, - * acc, iter, mode, w, l_w, jw, l_jw, - * alpha, f0, gs, h1, h2, h3, h4, t, t0, tol, - * iexact, incons, ireset, itermx, line, - * n1, n2, n3) - -C SLSQP S EQUENTIAL L EAST SQ UARES P ROGRAMMING -C TO SOLVE GENERAL NONLINEAR OPTIMIZATION PROBLEMS - -C*********************************************************************** -C* * -C* * -C* A NONLINEAR PROGRAMMING METHOD WITH * -C* QUADRATIC PROGRAMMING SUBPROBLEMS * -C* * -C* * -C* THIS SUBROUTINE SOLVES THE GENERAL NONLINEAR PROGRAMMING PROBLEM * -C* * -C* MINIMIZE F(X) * -C* * -C* SUBJECT TO C (X) .EQ. 0 , J = 1,...,MEQ * -C* J * -C* * -C* C (X) .GE. 0 , J = MEQ+1,...,M * -C* J * -C* * -C* XL .LE. X .LE. XU , I = 1,...,N. * -C* I I I * -C* * -C* THE ALGORITHM IMPLEMENTS THE METHOD OF HAN AND POWELL * -C* WITH BFGS-UPDATE OF THE B-MATRIX AND L1-TEST FUNCTION * -C* WITHIN THE STEPLENGTH ALGORITHM. * -C* * -C* PARAMETER DESCRIPTION: * -C* ( * MEANS THIS PARAMETER WILL BE CHANGED DURING CALCULATION ) * -C* * -C* M IS THE TOTAL NUMBER OF CONSTRAINTS, M .GE. 0 * -C* MEQ IS THE NUMBER OF EQUALITY CONSTRAINTS, MEQ .GE. 0 * -C* LA SEE A, LA .GE. MAX(M,1) * -C* N IS THE NUMBER OF VARIBLES, N .GE. 1 * -C* * X() X() STORES THE CURRENT ITERATE OF THE N VECTOR X * -C* ON ENTRY X() MUST BE INITIALIZED. ON EXIT X() * -C* STORES THE SOLUTION VECTOR X IF MODE = 0. * -C* XL() XL() STORES AN N VECTOR OF LOWER BOUNDS XL TO X. * -C* ELEMENTS MAY BE NAN TO INDICATE NO LOWER BOUND. * -C* XU() XU() STORES AN N VECTOR OF UPPER BOUNDS XU TO X. * -C* ELEMENTS MAY BE NAN TO INDICATE NO UPPER BOUND. * -C* F IS THE VALUE OF THE OBJECTIVE FUNCTION. * -C* C() C() STORES THE M VECTOR C OF CONSTRAINTS, * -C* EQUALITY CONSTRAINTS (IF ANY) FIRST. * -C* DIMENSION OF C MUST BE GREATER OR EQUAL LA, * -C* which must be GREATER OR EQUAL MAX(1,M). * -C* G() G() STORES THE N VECTOR G OF PARTIALS OF THE * -C* OBJECTIVE FUNCTION; DIMENSION OF G MUST BE * -C* GREATER OR EQUAL N+1. * -C* A(),LA,M,N THE LA BY N + 1 ARRAY A() STORES * -C* THE M BY N MATRIX A OF CONSTRAINT NORMALS. * -C* A() HAS FIRST DIMENSIONING PARAMETER LA, * -C* WHICH MUST BE GREATER OR EQUAL MAX(1,M). * -C* F,C,G,A MUST ALL BE SET BY THE USER BEFORE EACH CALL. * -C* * ACC ABS(ACC) CONTROLS THE FINAL ACCURACY. * -C* IF ACC .LT. ZERO AN EXACT LINESEARCH IS PERFORMED,* -C* OTHERWISE AN ARMIJO-TYPE LINESEARCH IS USED. * -C* * ITER PRESCRIBES THE MAXIMUM NUMBER OF ITERATIONS. * -C* ON EXIT ITER INDICATES THE NUMBER OF ITERATIONS. * -C* * MODE MODE CONTROLS CALCULATION: * -C* REVERSE COMMUNICATION IS USED IN THE SENSE THAT * -C* THE PROGRAM IS INITIALIZED BY MODE = 0; THEN IT IS* -C* TO BE CALLED REPEATEDLY BY THE USER UNTIL A RETURN* -C* WITH MODE .NE. IABS(1) TAKES PLACE. * -C* IF MODE = -1 GRADIENTS HAVE TO BE CALCULATED, * -C* WHILE WITH MODE = 1 FUNCTIONS HAVE TO BE CALCULATED -C* MODE MUST NOT BE CHANGED BETWEEN SUBSEQUENT CALLS * -C* OF SQP. * -C* EVALUATION MODES: * -C* MODE = -1: GRADIENT EVALUATION, (G&A) * -C* 0: ON ENTRY: INITIALIZATION, (F,G,C&A) * -C* ON EXIT : REQUIRED ACCURACY FOR SOLUTION OBTAINED * -C* 1: FUNCTION EVALUATION, (F&C) * -C* * -C* FAILURE MODES: * -C* 2: NUMBER OF EQUALITY CONSTRAINTS LARGER THAN N * -C* 3: MORE THAN 3*N ITERATIONS IN LSQ SUBPROBLEM * -C* 4: INEQUALITY CONSTRAINTS INCOMPATIBLE * -C* 5: SINGULAR MATRIX E IN LSQ SUBPROBLEM * -C* 6: SINGULAR MATRIX C IN LSQ SUBPROBLEM * -C* 7: RANK-DEFICIENT EQUALITY CONSTRAINT SUBPROBLEM HFTI* -C* 8: POSITIVE DIRECTIONAL DERIVATIVE FOR LINESEARCH * -C* 9: MORE THAN ITER ITERATIONS IN SQP * -C* >=10: WORKING SPACE W OR JW TOO SMALL, * -C* W SHOULD BE ENLARGED TO L_W=MODE/1000 * -C* JW SHOULD BE ENLARGED TO L_JW=MODE-1000*L_W * -C* * W(), L_W W() IS A ONE DIMENSIONAL WORKING SPACE, * -C* THE LENGTH L_W OF WHICH SHOULD BE AT LEAST * -C* (3*N1+M)*(N1+1) for LSQ * -C* +(N1-MEQ+1)*(MINEQ+2) + 2*MINEQ for LSI * -C* +(N1+MINEQ)*(N1-MEQ) + 2*MEQ + N1 for LSEI * -C* + N1*N/2 + 2*M + 3*N + 3*N1 + 1 for SLSQPB * -C* with MINEQ = M - MEQ + 2*N1 & N1 = N+1 * -C* NOTICE: FOR PROPER DIMENSIONING OF W IT IS RECOMMENDED TO * -C* COPY THE FOLLOWING STATEMENTS INTO THE HEAD OF * -C* THE CALLING PROGRAM (AND REMOVE THE COMMENT C) * -c####################################################################### -C INTEGER LEN_W, LEN_JW, M, N, N1, MEQ, MINEQ -C PARAMETER (M=... , MEQ=... , N=... ) -C PARAMETER (N1= N+1, MINEQ= M-MEQ+N1+N1) -C PARAMETER (LEN_W= -c $ (3*N1+M)*(N1+1) -c $ +(N1-MEQ+1)*(MINEQ+2) + 2*MINEQ -c $ +(N1+MINEQ)*(N1-MEQ) + 2*MEQ + N1 -c $ +(N+1)*N/2 + 2*M + 3*N + 3*N1 + 1, -c $ LEN_JW=MINEQ) -C DOUBLE PRECISION W(LEN_W) -C INTEGER JW(LEN_JW) -c####################################################################### -C* THE FIRST M+N+N*N1/2 ELEMENTS OF W MUST NOT BE * -C* CHANGED BETWEEN SUBSEQUENT CALLS OF SLSQP. * -C* ON RETURN W(1) ... W(M) CONTAIN THE MULTIPLIERS * -C* ASSOCIATED WITH THE GENERAL CONSTRAINTS, WHILE * -C* W(M+1) ... W(M+N(N+1)/2) STORE THE CHOLESKY FACTOR* -C* L*D*L(T) OF THE APPROXIMATE HESSIAN OF THE * -C* LAGRANGIAN COLUMNWISE DENSE AS LOWER TRIANGULAR * -C* UNIT MATRIX L WITH D IN ITS 'DIAGONAL' and * -C* W(M+N(N+1)/2+N+2 ... W(M+N(N+1)/2+N+2+M+2N) * -C* CONTAIN THE MULTIPLIERS ASSOCIATED WITH ALL * -C* ALL CONSTRAINTS OF THE QUADRATIC PROGRAM FINDING * -C* THE SEARCH DIRECTION TO THE SOLUTION X* * -C* * JW(), L_JW JW() IS A ONE DIMENSIONAL INTEGER WORKING SPACE * -C* THE LENGTH L_JW OF WHICH SHOULD BE AT LEAST * -C* MINEQ * -C* with MINEQ = M - MEQ + 2*N1 & N1 = N+1 * -C* * -C* THE USER HAS TO PROVIDE THE FOLLOWING SUBROUTINES: * -C* LDL(N,A,Z,SIG,W) : UPDATE OF THE LDL'-FACTORIZATION. * -C* LINMIN(A,B,F,TOL) : LINESEARCH ALGORITHM IF EXACT = 1 * -C* LSQ(M,MEQ,LA,N,NC,C,D,A,B,XL,XU,X,LAMBDA,W,....) : * -C* * -C* SOLUTION OF THE QUADRATIC PROGRAM * -C* QPSOL IS RECOMMENDED: * -C* PE GILL, W MURRAY, MA SAUNDERS, MH WRIGHT: * -C* USER'S GUIDE FOR SOL/QPSOL: * -C* A FORTRAN PACKAGE FOR QUADRATIC PROGRAMMING, * -C* TECHNICAL REPORT SOL 83-7, JULY 1983 * -C* DEPARTMENT OF OPERATIONS RESEARCH, STANFORD UNIVERSITY * -C* STANFORD, CA 94305 * -C* QPSOL IS THE MOST ROBUST AND EFFICIENT QP-SOLVER * -C* AS IT ALLOWS WARM STARTS WITH PROPER WORKING SETS * -C* * -C* IF IT IS NOT AVAILABLE USE LSEI, A CONSTRAINT LINEAR LEAST * -C* SQUARES SOLVER IMPLEMENTED USING THE SOFTWARE HFTI, LDP, NNLS * -C* FROM C.L. LAWSON, R.J.HANSON: SOLVING LEAST SQUARES PROBLEMS, * -C* PRENTICE HALL, ENGLEWOOD CLIFFS, 1974. * -C* LSEI COMES WITH THIS PACKAGE, together with all necessary SR's. * -C* * -C* TOGETHER WITH A COUPLE OF SUBROUTINES FROM BLAS LEVEL 1 * -C* * -C* SQP IS HEAD SUBROUTINE FOR BODY SUBROUTINE SQPBDY * -C* IN WHICH THE ALGORITHM HAS BEEN IMPLEMENTED. * -C* * -C* IMPLEMENTED BY: DIETER KRAFT, DFVLR OBERPFAFFENHOFEN * -C* as described in Dieter Kraft: A Software Package for * -C* Sequential Quadratic Programming * -C* DFVLR-FB 88-28, 1988 * -C* which should be referenced if the user publishes results of SLSQP * -C* * -C* DATE: APRIL - OCTOBER, 1981. * -C* STATUS: DECEMBER, 31-ST, 1984. * -C* STATUS: MARCH , 21-ST, 1987, REVISED TO FORTRAN 77 * -C* STATUS: MARCH , 20-th, 1989, REVISED TO MS-FORTRAN * -C* STATUS: APRIL , 14-th, 1989, HESSE in-line coded * -C* STATUS: FEBRUARY, 28-th, 1991, FORTRAN/2 Version 1.04 * -C* accepts Statement Functions * -C* STATUS: MARCH , 1-st, 1991, tested with SALFORD * -C* FTN77/386 COMPILER VERS 2.40* -C* in protected mode * -C* * -C*********************************************************************** -C* * -C* Copyright 1991: Dieter Kraft, FHM * -C* * -C*********************************************************************** - - INTEGER il, im, ir, is, iter, iu, iv, iw, ix, l_w, l_jw, - * jw(l_jw), la, m, meq, mineq, mode, n - - DOUBLE PRECISION acc, a(la,n+1), c(la), f, g(n+1), - * x(n), xl(n), xu(n), w(l_w) - - INTEGER iexact, incons, ireset, itermx, line, n1, n2, n3 - - DOUBLE PRECISION alpha, f0, gs, h1, h2, h3, h4, t, t0, tol - -c dim(W) = N1*(N1+1) + MEQ*(N1+1) + MINEQ*(N1+1) for LSQ -c +(N1-MEQ+1)*(MINEQ+2) + 2*MINEQ for LSI -c +(N1+MINEQ)*(N1-MEQ) + 2*MEQ + N1 for LSEI -c + N1*N/2 + 2*M + 3*N +3*N1 + 1 for SLSQPB -c with MINEQ = M - MEQ + 2*N1 & N1 = N+1 - -C CHECK LENGTH OF WORKING ARRAYS - - n1 = n+1 - mineq = m-meq+n1+n1 - il = (3*n1+m)*(n1+1) + - .(n1-meq+1)*(mineq+2) + 2*mineq + - .(n1+mineq)*(n1-meq) + 2*meq + - .n1*n/2 + 2*m + 3*n + 4*n1 + 1 - im = MAX(mineq, n1-meq) - IF (l_w .LT. il .OR. l_jw .LT. im) THEN - mode = 1000*MAX(10,il) - mode = mode+MAX(10,im) - RETURN - ENDIF - -C PREPARE DATA FOR CALLING SQPBDY - INITIAL ADDRESSES IN W - - im = 1 - il = im + MAX(1,m) - il = im + la - ix = il + n1*n/2 + 1 - ir = ix + n - is = ir + n + n + MAX(1,m) - is = ir + n + n + la - iu = is + n1 - iv = iu + n1 - iw = iv + n1 - - CALL slsqpb (m, meq, la, n, x, xl, xu, f, c, g, a, acc, iter, - * mode, w(ir), w(il), w(ix), w(im), w(is), w(iu), w(iv), w(iw), jw, - * alpha, f0, gs, h1, h2, h3, h4, t, t0, tol, - * iexact, incons, ireset, itermx, line, - * n1, n2, n3) - - END - - SUBROUTINE slsqpb (m, meq, la, n, x, xl, xu, f, c, g, a, acc, - * iter, mode, r, l, x0, mu, s, u, v, w, iw, - * alpha, f0, gs, h1, h2, h3, h4, t, t0, tol, - * iexact, incons, ireset, itermx, line, - * n1, n2, n3) - -C NONLINEAR PROGRAMMING BY SOLVING SEQUENTIALLY QUADRATIC PROGRAMS - -C - L1 - LINE SEARCH, POSITIVE DEFINITE BFGS UPDATE - - -C BODY SUBROUTINE FOR SLSQP - - INTEGER iw(*), i, iexact, incons, ireset, iter, itermx, - * k, j, la, line, m, meq, mode, n, n1, n2, n3 - LOGICAL badlin - - - DOUBLE PRECISION a(la,n+1), c(la), g(n+1), l((n+1)*(n+2)/2), - * mu(la), r(m+n+n+2), s(n+1), u(n+1), v(n+1), w(*), - * x(n), xl(n), xu(n), x0(n), - * ddot_sl, dnrm2_, linmin, - * acc, alfmin, alpha, f, f0, gs, h1, h2, h3, h4, - * hun, one, t, t0, ten, tol, two, ZERO - -c dim(W) = N1*(N1+1) + MEQ*(N1+1) + MINEQ*(N1+1) for LSQ -c +(N1-MEQ+1)*(MINEQ+2) + 2*MINEQ -c +(N1+MINEQ)*(N1-MEQ) + 2*MEQ + N1 for LSEI -c with MINEQ = M - MEQ + 2*N1 & N1 = N+1 - - DATA ZERO /0.0d0/, one /1.0d0/, alfmin /1.0d-1/, - * hun /1.0d+2/, ten /1.0d+1/, two /2.0d0/ - -C The badlin flag keeps track whether the SQP problem on the current -C iteration was inconsistent or not. - badlin = .false. - - IF (mode) 260, 100, 220 - - 100 itermx = iter - IF (acc.GE.ZERO) THEN - iexact = 0 - ELSE - iexact = 1 - ENDIF - acc = ABS(acc) - tol = ten*acc - iter = 0 - ireset = 0 - n1 = n + 1 - n2 = n1*n/2 - n3 = n2 + 1 - s(1) = ZERO - mu(1) = ZERO - CALL dcopy_(n, s(1), 0, s, 1) - CALL dcopy_(m, mu(1), 0, mu, 1) - -C RESET BFGS MATRIX - - 110 ireset = ireset + 1 - IF (ireset.GT.5) GO TO 255 - l(1) = ZERO - CALL dcopy_(n2, l(1), 0, l, 1) - j = 1 - DO 120 i=1,n - l(j) = one - j = j + n1 - i - 120 CONTINUE - -C MAIN ITERATION : SEARCH DIRECTION, STEPLENGTH, LDL'-UPDATE - - 130 iter = iter + 1 - mode = 9 - IF (iter.GT.itermx) GO TO 330 - -C SEARCH DIRECTION AS SOLUTION OF QP - SUBPROBLEM - - CALL dcopy_(n, xl, 1, u, 1) - CALL dcopy_(n, xu, 1, v, 1) - CALL daxpy_sl(n, -one, x, 1, u, 1) - CALL daxpy_sl(n, -one, x, 1, v, 1) - h4 = one - CALL lsq (m, meq, n , n3, la, l, g, a, c, u, v, s, r, w, iw, mode) - -C AUGMENTED PROBLEM FOR INCONSISTENT LINEARIZATION -C -C If it turns out that the original SQP problem is inconsistent, -C disallow termination with convergence on this iteration, -C even if the augmented problem was solved. - - badlin = .false. - IF (mode.EQ.6) THEN - IF (n.EQ.meq) THEN - mode = 4 - ENDIF - ENDIF - IF (mode.EQ.4) THEN - badlin = .true. - DO 140 j=1,m - IF (j.LE.meq) THEN - a(j,n1) = -c(j) - ELSE - a(j,n1) = MAX(-c(j),ZERO) - ENDIF - 140 CONTINUE - s(1) = ZERO - CALL dcopy_(n, s(1), 0, s, 1) - h3 = ZERO - g(n1) = ZERO - l(n3) = hun - s(n1) = one - u(n1) = ZERO - v(n1) = one - incons = 0 - 150 CALL lsq (m, meq, n1, n3, la, l, g, a, c, u, v, s, r, - * w, iw, mode) - h4 = one - s(n1) - IF (mode.EQ.4) THEN - l(n3) = ten*l(n3) - incons = incons + 1 - IF (incons.GT.5) GO TO 330 - GOTO 150 - ELSE IF (mode.NE.1) THEN - GOTO 330 - ENDIF - ELSE IF (mode.NE.1) THEN - GOTO 330 - ENDIF - -C UPDATE MULTIPLIERS FOR L1-TEST - - DO 160 i=1,n - v(i) = g(i) - ddot_sl(m,a(1,i),1,r,1) - 160 CONTINUE - f0 = f - CALL dcopy_(n, x, 1, x0, 1) - gs = ddot_sl(n, g, 1, s, 1) - h1 = ABS(gs) - h2 = ZERO - DO 170 j=1,m - IF (j.LE.meq) THEN - h3 = c(j) - ELSE - h3 = ZERO - ENDIF - h2 = h2 + MAX(-c(j),h3) - h3 = ABS(r(j)) - mu(j) = MAX(h3,(mu(j)+h3)/two) - h1 = h1 + h3*ABS(c(j)) - 170 CONTINUE - -C CHECK CONVERGENCE - - mode = 0 - IF (h1.LT.acc .AND. h2.LT.acc .AND. .NOT. badlin - * .AND. f .EQ. f) GO TO 330 - h1 = ZERO - DO 180 j=1,m - IF (j.LE.meq) THEN - h3 = c(j) - ELSE - h3 = ZERO - ENDIF - h1 = h1 + mu(j)*MAX(-c(j),h3) - 180 CONTINUE - t0 = f + h1 - h3 = gs - h1*h4 - mode = 8 - IF (h3.GE.ZERO) GO TO 110 - -C LINE SEARCH WITH AN L1-TESTFUNCTION - - line = 0 - alpha = one - IF (iexact.EQ.1) GOTO 210 - -C INEXACT LINESEARCH - - 190 line = line + 1 - h3 = alpha*h3 - CALL dscal_sl(n, alpha, s, 1) - CALL dcopy_(n, x0, 1, x, 1) - CALL daxpy_sl(n, one, s, 1, x, 1) - mode = 1 - GO TO 330 - 200 IF (h1.LE.h3/ten .OR. line.GT.10) GO TO 240 - alpha = MAX(h3/(two*(h3-h1)),alfmin) - GO TO 190 - -C EXACT LINESEARCH - - 210 IF (line.NE.3) THEN - alpha = linmin(line,alfmin,one,t,tol) - CALL dcopy_(n, x0, 1, x, 1) - CALL daxpy_sl(n, alpha, s, 1, x, 1) - mode = 1 - GOTO 330 - ENDIF - CALL dscal_sl(n, alpha, s, 1) - GOTO 240 - -C CALL FUNCTIONS AT CURRENT X - - 220 t = f - DO 230 j=1,m - IF (j.LE.meq) THEN - h1 = c(j) - ELSE - h1 = ZERO - ENDIF - t = t + mu(j)*MAX(-c(j),h1) - 230 CONTINUE - h1 = t - t0 - GOTO (200, 210) iexact+1 - -C CHECK CONVERGENCE - - 240 h3 = ZERO - DO 250 j=1,m - IF (j.LE.meq) THEN - h1 = c(j) - ELSE - h1 = ZERO - ENDIF - h3 = h3 + MAX(-c(j),h1) - 250 CONTINUE - IF ((ABS(f-f0).LT.acc .OR. dnrm2_(n,s,1).LT.acc) .AND. h3.LT.acc - * .AND. .NOT. badlin .AND. f .EQ. f) - * THEN - mode = 0 - ELSE - mode = -1 - ENDIF - GO TO 330 - -C CHECK relaxed CONVERGENCE in case of positive directional derivative - - 255 CONTINUE - h3 = ZERO - DO 256 j=1,m - IF (j.LE.meq) THEN - h1 = c(j) - ELSE - h1 = ZERO - ENDIF - h3 = h3 + MAX(-c(j),h1) - 256 CONTINUE - IF ((ABS(f-f0).LT.tol .OR. dnrm2_(n,s,1).LT.tol) .AND. h3.LT.tol - * .AND. .NOT. badlin .AND. f .EQ. f) - * THEN - mode = 0 - ELSE - mode = 8 - ENDIF - GO TO 330 - -C CALL JACOBIAN AT CURRENT X - -C UPDATE CHOLESKY-FACTORS OF HESSIAN MATRIX BY MODIFIED BFGS FORMULA - - 260 DO 270 i=1,n - u(i) = g(i) - ddot_sl(m,a(1,i),1,r,1) - v(i) - 270 CONTINUE - -C L'*S - - k = 0 - DO 290 i=1,n - h1 = ZERO - k = k + 1 - DO 280 j=i+1,n - k = k + 1 - h1 = h1 + l(k)*s(j) - 280 CONTINUE - v(i) = s(i) + h1 - 290 CONTINUE - -C D*L'*S - - k = 1 - DO 300 i=1,n - v(i) = l(k)*v(i) - k = k + n1 - i - 300 CONTINUE - -C L*D*L'*S - - DO 320 i=n,1,-1 - h1 = ZERO - k = i - DO 310 j=1,i - 1 - h1 = h1 + l(k)*v(j) - k = k + n - j - 310 CONTINUE - v(i) = v(i) + h1 - 320 CONTINUE - - h1 = ddot_sl(n,s,1,u,1) - h2 = ddot_sl(n,s,1,v,1) - h3 = 0.2d0*h2 - IF (h1.LT.h3) THEN - h4 = (h2-h3)/(h2-h1) - h1 = h3 - CALL dscal_sl(n, h4, u, 1) - CALL daxpy_sl(n, one-h4, v, 1, u, 1) - ENDIF - IF (h1.EQ.0 .or. h2.EQ.0) THEN -C Singular update: reset hessian. - GO TO 110 - end if - CALL ldl(n, l, u, +one/h1, v) - CALL ldl(n, l, v, -one/h2, u) - -C END OF MAIN ITERATION - - GO TO 130 - -C END OF SLSQPB - - 330 END - - - SUBROUTINE lsq(m,meq,n,nl,la,l,g,a,b,xl,xu,x,y,w,jw,mode) - -C MINIMIZE with respect to X - -C ||E*X - F|| -C 1/2 T -C WITH UPPER TRIANGULAR MATRIX E = +D *L , - -C -1/2 -1 -C AND VECTOR F = -D *L *G, - -C WHERE THE UNIT LOWER TRIDIANGULAR MATRIX L IS STORED COLUMNWISE -C DENSE IN THE N*(N+1)/2 ARRAY L WITH VECTOR D STORED IN ITS -C 'DIAGONAL' THUS SUBSTITUTING THE ONE-ELEMENTS OF L - -C SUBJECT TO - -C A(J)*X - B(J) = 0 , J=1,...,MEQ, -C A(J)*X - B(J) >=0, J=MEQ+1,...,M, -C XL(I) <= X(I) <= XU(I), I=1,...,N, -C ON ENTRY, THE USER HAS TO PROVIDE THE ARRAYS L, G, A, B, XL, XU. -C WITH DIMENSIONS: L(N*(N+1)/2), G(N), A(LA,N), B(M), XL(N), XU(N) -C THE WORKING ARRAY W MUST HAVE AT LEAST THE FOLLOWING DIMENSION: -c DIM(W) = (3*N+M)*(N+1) for LSQ -c +(N-MEQ+1)*(MINEQ+2) + 2*MINEQ for LSI -c +(N+MINEQ)*(N-MEQ) + 2*MEQ + N for LSEI -c with MINEQ = M - MEQ + 2*N -C ON RETURN, NO ARRAY WILL BE CHANGED BY THE SUBROUTINE. -C X STORES THE N-DIMENSIONAL SOLUTION VECTOR -C Y STORES THE VECTOR OF LAGRANGE MULTIPLIERS OF DIMENSION -C M+N+N (CONSTRAINTS+LOWER+UPPER BOUNDS) -C MODE IS A SUCCESS-FAILURE FLAG WITH THE FOLLOWING MEANINGS: -C MODE=1: SUCCESSFUL COMPUTATION -C 2: ERROR RETURN BECAUSE OF WRONG DIMENSIONS (N<1) -C 3: ITERATION COUNT EXCEEDED BY NNLS -C 4: INEQUALITY CONSTRAINTS INCOMPATIBLE -C 5: MATRIX E IS NOT OF FULL RANK -C 6: MATRIX C IS NOT OF FULL RANK -C 7: RANK DEFECT IN HFTI - -c coded Dieter Kraft, april 1987 -c revised march 1989 - - - DOUBLE PRECISION l,g,a,b,w,xl,xu,x,y, - . diag,ZERO,one,ddot_sl,xnorm - - INTEGER jw(*),i,ic,id,ie,IF,ig,ih,il,im,ip,iu,iw, - . i1,i2,i3,i4,la,m,meq,mineq,mode,m1,n,nl,n1,n2,n3, - . nancnt,j - - DIMENSION a(la,n), b(la), g(n), l(nl), - . w(*), x(n), xl(n), xu(n), y(m+n+n) - - DATA ZERO/0.0d0/, one/1.0d0/ - - n1 = n + 1 - mineq = m - meq - m1 = mineq + n + n - -c determine whether to solve problem -c with inconsistent linerarization (n2=1) -c or not (n2=0) - - n2 = n1*n/2 + 1 - IF (n2.EQ.nl) THEN - n2 = 0 - ELSE - n2 = 1 - ENDIF - n3 = n-n2 - -C RECOVER MATRIX E AND VECTOR F FROM L AND G - - i2 = 1 - i3 = 1 - i4 = 1 - ie = 1 - IF = n*n+1 - DO 10 i=1,n3 - i1 = n1-i - diag = SQRT (l(i2)) - w(i3) = ZERO - CALL dcopy_ (i1 , w(i3), 0, w(i3), 1) - CALL dcopy_ (i1-n2, l(i2), 1, w(i3), n) - CALL dscal_sl (i1-n2, diag, w(i3), n) - w(i3) = diag - w(IF-1+i) = (g(i) - ddot_sl(i-1, w(i4), 1, w(IF), 1))/diag - i2 = i2 + i1 - n2 - i3 = i3 + n1 - i4 = i4 + n - 10 CONTINUE - IF (n2.EQ.1) THEN - w(i3) = l(nl) - w(i4) = ZERO - CALL dcopy_ (n3, w(i4), 0, w(i4), 1) - w(IF-1+n) = ZERO - ENDIF - CALL dscal_sl (n, - one, w(IF), 1) - - ic = IF + n - id = ic + meq*n - - IF (meq .GT. 0) THEN - -C RECOVER MATRIX C FROM UPPER PART OF A - - DO 20 i=1,meq - CALL dcopy_ (n, a(i,1), la, w(ic-1+i), meq) - 20 CONTINUE - -C RECOVER VECTOR D FROM UPPER PART OF B - - CALL dcopy_ (meq, b(1), 1, w(id), 1) - CALL dscal_sl (meq, - one, w(id), 1) - - ENDIF - - ig = id + meq - -C RECOVER MATRIX G FROM LOWER PART OF A -C The matrix G(mineq+2*n,m1) is stored at w(ig) -C Not all rows will be filled if some of the upper/lower -C bounds are unbounded. - - IF (mineq .GT. 0) THEN - - DO 30 i=1,mineq - CALL dcopy_ (n, a(meq+i,1), la, w(ig-1+i), m1) - 30 CONTINUE - - ENDIF - - ih = ig + m1*n - iw = ih + mineq + 2*n - - IF (mineq .GT. 0) THEN - -C RECOVER H FROM LOWER PART OF B -C The vector H(mineq+2*n) is stored at w(ih) - - CALL dcopy_ (mineq, b(meq+1), 1, w(ih), 1) - CALL dscal_sl (mineq, - one, w(ih), 1) - - ENDIF - -C AUGMENT MATRIX G BY +I AND -I, AND, -C AUGMENT VECTOR H BY XL AND XU -C NaN value indicates no bound - - ip = ig + mineq - il = ih + mineq - nancnt = 0 - - DO 40 i=1,n - if (xl(i).eq.xl(i)) then - w(il) = xl(i) - do 41 j=1,n - w(ip + m1*(j-1)) = 0 - 41 continue - w(ip + m1*(i-1)) = 1 - ip = ip + 1 - il = il + 1 - else - nancnt = nancnt + 1 - end if - 40 CONTINUE - - DO 50 i=1,n - if (xu(i).eq.xu(i)) then - w(il) = -xu(i) - do 51 j=1,n - w(ip + m1*(j-1)) = 0 - 51 continue - w(ip + m1*(i-1)) = -1 - ip = ip + 1 - il = il + 1 - else - nancnt = nancnt + 1 - end if - 50 CONTINUE - - CALL lsei (w(ic), w(id), w(ie), w(IF), w(ig), w(ih), MAX(1,meq), - . meq, n, n, m1, m1-nancnt, n, x, xnorm, w(iw), jw, mode) - - IF (mode .EQ. 1) THEN - -c restore Lagrange multipliers (only for user-defined variables) - - CALL dcopy_ (m, w(iw), 1, y(1), 1) - -c set rest of the multipliers to nan (they are not used) - - IF (n3 .GT. 0) THEN - y(m+1) = 0 - y(m+1) = 0 / y(m+1) - do 60 i=m+2,m+n3+n3 - y(i) = y(m+1) - 60 continue - ENDIF - - ENDIF - call bound(n, x, xl, xu) - -C END OF SUBROUTINE LSQ - - END - - - SUBROUTINE lsei(c,d,e,f,g,h,lc,mc,LE,me,lg,mg,n,x,xnrm,w,jw,mode) - -C FOR MODE=1, THE SUBROUTINE RETURNS THE SOLUTION X OF -C EQUALITY & INEQUALITY CONSTRAINED LEAST SQUARES PROBLEM LSEI : - -C MIN ||E*X - F|| -C X - -C S.T. C*X = D, -C G*X >= H. - -C USING QR DECOMPOSITION & ORTHOGONAL BASIS OF NULLSPACE OF C -C CHAPTER 23.6 OF LAWSON & HANSON: SOLVING LEAST SQUARES PROBLEMS. - -C THE FOLLOWING DIMENSIONS OF THE ARRAYS DEFINING THE PROBLEM -C ARE NECESSARY -C DIM(E) : FORMAL (LE,N), ACTUAL (ME,N) -C DIM(F) : FORMAL (LE ), ACTUAL (ME ) -C DIM(C) : FORMAL (LC,N), ACTUAL (MC,N) -C DIM(D) : FORMAL (LC ), ACTUAL (MC ) -C DIM(G) : FORMAL (LG,N), ACTUAL (MG,N) -C DIM(H) : FORMAL (LG ), ACTUAL (MG ) -C DIM(X) : FORMAL (N ), ACTUAL (N ) -C DIM(W) : 2*MC+ME+(ME+MG)*(N-MC) for LSEI -C +(N-MC+1)*(MG+2)+2*MG for LSI -C DIM(JW): MAX(MG,L) -C ON ENTRY, THE USER HAS TO PROVIDE THE ARRAYS C, D, E, F, G, AND H. -C ON RETURN, ALL ARRAYS WILL BE CHANGED BY THE SUBROUTINE. -C X STORES THE SOLUTION VECTOR -C XNORM STORES THE RESIDUUM OF THE SOLUTION IN EUCLIDIAN NORM -C W STORES THE VECTOR OF LAGRANGE MULTIPLIERS IN ITS FIRST -C MC+MG ELEMENTS -C MODE IS A SUCCESS-FAILURE FLAG WITH THE FOLLOWING MEANINGS: -C MODE=1: SUCCESSFUL COMPUTATION -C 2: ERROR RETURN BECAUSE OF WRONG DIMENSIONS (N<1) -C 3: ITERATION COUNT EXCEEDED BY NNLS -C 4: INEQUALITY CONSTRAINTS INCOMPATIBLE -C 5: MATRIX E IS NOT OF FULL RANK -C 6: MATRIX C IS NOT OF FULL RANK -C 7: RANK DEFECT IN HFTI - -C 18.5.1981, DIETER KRAFT, DFVLR OBERPFAFFENHOFEN -C 20.3.1987, DIETER KRAFT, DFVLR OBERPFAFFENHOFEN - - INTEGER jw(*),i,ie,IF,ig,iw,j,k,krank,l,lc,LE,lg, - . mc,mc1,me,mg,mode,n - - DOUBLE PRECISION c(lc,n),e(LE,n),g(lg,n),d(lc),f(LE),h(lg),x(n), - . w(*),t,ddot_sl,xnrm,rnorm(1),dnrm2_,epmach,ZERO - DATA epmach/2.22d-16/,ZERO/0.0d+00/ - - mode=2 - IF(mc.GT.n) GOTO 75 - l=n-mc - mc1=mc+1 - iw=(l+1)*(mg+2)+2*mg+mc - ie=iw+mc+1 - IF=ie+me*l - ig=IF+me - -C TRIANGULARIZE C AND APPLY FACTORS TO E AND G - - DO 10 i=1,mc - j=MIN(i+1,lc) - CALL h12(1,i,i+1,n,c(i,1),lc,w(iw+i),c(j,1),lc,1,mc-i) - CALL h12(2,i,i+1,n,c(i,1),lc,w(iw+i),e ,LE,1,me) - 10 CALL h12(2,i,i+1,n,c(i,1),lc,w(iw+i),g ,lg,1,mg) - -C SOLVE C*X=D AND MODIFY F - - mode=6 - DO 15 i=1,mc - IF(ABS(c(i,i)).LT.epmach) GOTO 75 - x(i)=(d(i)-ddot_sl(i-1,c(i,1),lc,x,1))/c(i,i) - 15 CONTINUE - mode=1 - w(mc1) = ZERO - CALL dcopy_ (mg-mc,w(mc1),0,w(mc1),1) - - IF(mc.EQ.n) GOTO 50 - - DO 20 i=1,me - 20 w(IF-1+i)=f(i)-ddot_sl(mc,e(i,1),LE,x,1) - -C STORE TRANSFORMED E & G - - DO 25 i=1,me - 25 CALL dcopy_(l,e(i,mc1),LE,w(ie-1+i),me) - DO 30 i=1,mg - 30 CALL dcopy_(l,g(i,mc1),lg,w(ig-1+i),mg) - - IF(mg.GT.0) GOTO 40 - -C SOLVE LS WITHOUT INEQUALITY CONSTRAINTS - - mode=7 - k=MAX(LE,n) - t=SQRT(epmach) - CALL hfti (w(ie),me,me,l,w(IF),k,1,t,krank,rnorm,w,w(l+1),jw) -C HFTI IS MORE GENERIC, BUT WE ONLY CALL IT WITH NB=1, SO RETRIEVE THE -C SINGLE VALUE WE NEED FROM RNORM HERE - xnrm = rnorm(1) - CALL dcopy_(l,w(IF),1,x(mc1),1) - IF(krank.NE.l) GOTO 75 - mode=1 - GOTO 50 -C MODIFY H AND SOLVE INEQUALITY CONSTRAINED LS PROBLEM - - 40 DO 45 i=1,mg - 45 h(i)=h(i)-ddot_sl(mc,g(i,1),lg,x,1) - CALL lsi - . (w(ie),w(IF),w(ig),h,me,me,mg,mg,l,x(mc1),xnrm,w(mc1),jw,mode) - IF(mc.EQ.0) GOTO 75 - t=dnrm2_(mc,x,1) - xnrm=SQRT(xnrm*xnrm+t*t) - IF(mode.NE.1) GOTO 75 - -C SOLUTION OF ORIGINAL PROBLEM AND LAGRANGE MULTIPLIERS - - 50 DO 55 i=1,me - 55 f(i)=ddot_sl(n,e(i,1),LE,x,1)-f(i) - DO 60 i=1,mc - 60 d(i)=ddot_sl(me,e(1,i),1,f,1)-ddot_sl(mg,g(1,i),1,w(mc1),1) - - DO 65 i=mc,1,-1 - 65 CALL h12(2,i,i+1,n,c(i,1),lc,w(iw+i),x,1,1,1) - - DO 70 i=mc,1,-1 - j=MIN(i+1,lc) - w(i)=(d(i)-ddot_sl(mc-i,c(j,i),1,w(j),1))/c(i,i) - 70 CONTINUE - -C END OF SUBROUTINE LSEI - - 75 END - - - SUBROUTINE lsi(e,f,g,h,LE,me,lg,mg,n,x,xnorm,w,jw,mode) - -C FOR MODE=1, THE SUBROUTINE RETURNS THE SOLUTION X OF -C INEQUALITY CONSTRAINED LINEAR LEAST SQUARES PROBLEM: - -C MIN ||E*X-F|| -C X - -C S.T. G*X >= H - -C THE ALGORITHM IS BASED ON QR DECOMPOSITION AS DESCRIBED IN -C CHAPTER 23.5 OF LAWSON & HANSON: SOLVING LEAST SQUARES PROBLEMS - -C THE FOLLOWING DIMENSIONS OF THE ARRAYS DEFINING THE PROBLEM -C ARE NECESSARY -C DIM(E) : FORMAL (LE,N), ACTUAL (ME,N) -C DIM(F) : FORMAL (LE ), ACTUAL (ME ) -C DIM(G) : FORMAL (LG,N), ACTUAL (MG,N) -C DIM(H) : FORMAL (LG ), ACTUAL (MG ) -C DIM(X) : N -C DIM(W) : (N+1)*(MG+2) + 2*MG -C DIM(JW): LG -C ON ENTRY, THE USER HAS TO PROVIDE THE ARRAYS E, F, G, AND H. -C ON RETURN, ALL ARRAYS WILL BE CHANGED BY THE SUBROUTINE. -C X STORES THE SOLUTION VECTOR -C XNORM STORES THE RESIDUUM OF THE SOLUTION IN EUCLIDIAN NORM -C W STORES THE VECTOR OF LAGRANGE MULTIPLIERS IN ITS FIRST -C MG ELEMENTS -C MODE IS A SUCCESS-FAILURE FLAG WITH THE FOLLOWING MEANINGS: -C MODE=1: SUCCESSFUL COMPUTATION -C 2: ERROR RETURN BECAUSE OF WRONG DIMENSIONS (N<1) -C 3: ITERATION COUNT EXCEEDED BY NNLS -C 4: INEQUALITY CONSTRAINTS INCOMPATIBLE -C 5: MATRIX E IS NOT OF FULL RANK - -C 03.01.1980, DIETER KRAFT: CODED -C 20.03.1987, DIETER KRAFT: REVISED TO FORTRAN 77 - - INTEGER i,j,LE,lg,me,mg,mode,n,jw(lg) - - DOUBLE PRECISION e(LE,n),f(LE),g(lg,n),h(lg),x(n),w(*), - . ddot_sl,xnorm,dnrm2_,epmach,t,one - DATA epmach/2.22d-16/,one/1.0d+00/ - -C QR-FACTORS OF E AND APPLICATION TO F - - DO 10 i=1,n - j=MIN(i+1,n) - CALL h12(1,i,i+1,me,e(1,i),1,t,e(1,j),1,LE,n-i) - 10 CALL h12(2,i,i+1,me,e(1,i),1,t,f ,1,1 ,1 ) - -C TRANSFORM G AND H TO GET LEAST DISTANCE PROBLEM - - mode=5 - DO 30 i=1,mg - DO 20 j=1,n - IF (.NOT.(ABS(e(j,j)).GE.epmach)) GOTO 50 - 20 g(i,j)=(g(i,j)-ddot_sl(j-1,g(i,1),lg,e(1,j),1))/e(j,j) - 30 h(i)=h(i)-ddot_sl(n,g(i,1),lg,f,1) - -C SOLVE LEAST DISTANCE PROBLEM - - CALL ldp(g,lg,mg,n,h,x,xnorm,w,jw,mode) - IF (mode.NE.1) GOTO 50 - -C SOLUTION OF ORIGINAL PROBLEM - - CALL daxpy_sl(n,one,f,1,x,1) - DO 40 i=n,1,-1 - j=MIN(i+1,n) - 40 x(i)=(x(i)-ddot_sl(n-i,e(i,j),LE,x(j),1))/e(i,i) - j=MIN(n+1,me) - t=dnrm2_(me-n,f(j),1) - xnorm=SQRT(xnorm*xnorm+t*t) - -C END OF SUBROUTINE LSI - - 50 END - - SUBROUTINE ldp(g,mg,m,n,h,x,xnorm,w,INDEX,mode) - -C T -C MINIMIZE 1/2 X X SUBJECT TO G * X >= H. - -C C.L. LAWSON, R.J. HANSON: 'SOLVING LEAST SQUARES PROBLEMS' -C PRENTICE HALL, ENGLEWOOD CLIFFS, NEW JERSEY, 1974. - -C PARAMETER DESCRIPTION: - -C G(),MG,M,N ON ENTRY G() STORES THE M BY N MATRIX OF -C LINEAR INEQUALITY CONSTRAINTS. G() HAS FIRST -C DIMENSIONING PARAMETER MG -C H() ON ENTRY H() STORES THE M VECTOR H REPRESENTING -C THE RIGHT SIDE OF THE INEQUALITY SYSTEM - -C REMARK: G(),H() WILL NOT BE CHANGED DURING CALCULATIONS BY LDP - -C X() ON ENTRY X() NEED NOT BE INITIALIZED. -C ON EXIT X() STORES THE SOLUTION VECTOR X IF MODE=1. -C XNORM ON EXIT XNORM STORES THE EUCLIDIAN NORM OF THE -C SOLUTION VECTOR IF COMPUTATION IS SUCCESSFUL -C W() W IS A ONE DIMENSIONAL WORKING SPACE, THE LENGTH -C OF WHICH SHOULD BE AT LEAST (M+2)*(N+1) + 2*M -C ON EXIT W() STORES THE LAGRANGE MULTIPLIERS -C ASSOCIATED WITH THE CONSTRAINTS -C AT THE SOLUTION OF PROBLEM LDP -C INDEX() INDEX() IS A ONE DIMENSIONAL INTEGER WORKING SPACE -C OF LENGTH AT LEAST M -C MODE MODE IS A SUCCESS-FAILURE FLAG WITH THE FOLLOWING -C MEANINGS: -C MODE=1: SUCCESSFUL COMPUTATION -C 2: ERROR RETURN BECAUSE OF WRONG DIMENSIONS (N.LE.0) -C 3: ITERATION COUNT EXCEEDED BY NNLS -C 4: INEQUALITY CONSTRAINTS INCOMPATIBLE - - - DOUBLE PRECISION g,h,x,xnorm,w,u,v, - . ZERO,one,fac,rnorm,dnrm2_,ddot_sl,diff - INTEGER INDEX,i,IF,iw,iwdual,iy,iz,j,m,mg,mode,n,n1 - DIMENSION g(mg,n),h(m),x(n),w(*),INDEX(m) - diff(u,v)= u-v - DATA ZERO,one/0.0d0,1.0d0/ - - mode=2 - IF(n.LE.0) GOTO 50 - -C STATE DUAL PROBLEM - - mode=1 - x(1)=ZERO - CALL dcopy_(n,x(1),0,x,1) - xnorm=ZERO - IF(m.EQ.0) GOTO 50 - iw=0 - DO 20 j=1,m - DO 10 i=1,n - iw=iw+1 - 10 w(iw)=g(j,i) - iw=iw+1 - 20 w(iw)=h(j) - IF=iw+1 - DO 30 i=1,n - iw=iw+1 - 30 w(iw)=ZERO - w(iw+1)=one - n1=n+1 - iz=iw+2 - iy=iz+n1 - iwdual=iy+m - -C SOLVE DUAL PROBLEM - - CALL nnls (w,n1,n1,m,w(IF),w(iy),rnorm,w(iwdual),w(iz),INDEX,mode) - - IF(mode.NE.1) GOTO 50 - mode=4 - IF(rnorm.LE.ZERO) GOTO 50 - -C COMPUTE SOLUTION OF PRIMAL PROBLEM - - fac=one-ddot_sl(m,h,1,w(iy),1) - IF(.NOT.(diff(one+fac,one).GT.ZERO)) GOTO 50 - mode=1 - fac=one/fac - DO 40 j=1,n - 40 x(j)=fac*ddot_sl(m,g(1,j),1,w(iy),1) - xnorm=dnrm2_(n,x,1) - -C COMPUTE LAGRANGE MULTIPLIERS FOR PRIMAL PROBLEM - - w(1)=ZERO - CALL dcopy_(m,w(1),0,w,1) - CALL daxpy_sl(m,fac,w(iy),1,w,1) - -C END OF SUBROUTINE LDP - - 50 END - - - SUBROUTINE nnls (a, mda, m, n, b, x, rnorm, w, z, INDEX, mode) - -C C.L.LAWSON AND R.J.HANSON, JET PROPULSION LABORATORY: -C 'SOLVING LEAST SQUARES PROBLEMS'. PRENTICE-HALL.1974 - -C ********** NONNEGATIVE LEAST SQUARES ********** - -C GIVEN AN M BY N MATRIX, A, AND AN M-VECTOR, B, COMPUTE AN -C N-VECTOR, X, WHICH SOLVES THE LEAST SQUARES PROBLEM - -C A*X = B SUBJECT TO X >= 0 - -C A(),MDA,M,N -C MDA IS THE FIRST DIMENSIONING PARAMETER FOR THE ARRAY,A(). -C ON ENTRY A() CONTAINS THE M BY N MATRIX,A. -C ON EXIT A() CONTAINS THE PRODUCT Q*A, -C WHERE Q IS AN M BY M ORTHOGONAL MATRIX GENERATED -C IMPLICITLY BY THIS SUBROUTINE. -C EITHER M>=N OR M= M. EITHER M >= N OR M < N IS PERMITTED. -C THERE IS NO RESTRICTION ON THE RANK OF A. -C THE MATRIX A WILL BE MODIFIED BY THE SUBROUTINE. -C B(*,*),MDB,NB IF NB = 0 THE SUBROUTINE WILL MAKE NO REFERENCE -C TO THE ARRAY B. IF NB > 0 THE ARRAY B() MUST -C INITIALLY CONTAIN THE M x NB MATRIX B OF THE -C THE LEAST SQUARES PROBLEM AX = B AND ON RETURN -C THE ARRAY B() WILL CONTAIN THE N x NB SOLUTION X. -C IF NB>1 THE ARRAY B() MUST BE DOUBLE SUBSCRIPTED -C WITH FIRST DIMENSIONING PARAMETER MDB>=MAX(M,N), -C IF NB=1 THE ARRAY B() MAY BE EITHER SINGLE OR -C DOUBLE SUBSCRIPTED. -C TAU ABSOLUTE TOLERANCE PARAMETER FOR PSEUDORANK -C DETERMINATION, PROVIDED BY THE USER. -C KRANK PSEUDORANK OF A, SET BY THE SUBROUTINE. -C RNORM ON EXIT, RNORM(J) WILL CONTAIN THE EUCLIDIAN -C NORM OF THE RESIDUAL VECTOR FOR THE PROBLEM -C DEFINED BY THE J-TH COLUMN VECTOR OF THE ARRAY B. -C H(), G() ARRAYS OF WORKING SPACE OF LENGTH >= N. -C IP() INTEGER ARRAY OF WORKING SPACE OF LENGTH >= N -C RECORDING PERMUTATION INDICES OF COLUMN VECTORS - - INTEGER i,j,jb,k,kp1,krank,l,ldiag,lmax,m, - . mda,mdb,n,nb,ip(n) - - DOUBLE PRECISION a(mda,n),b(mdb,nb),h(n),g(n),rnorm(nb),factor, - . tau,ZERO,hmax,diff,tmp,ddot_sl,dnrm2_,u,v - diff(u,v)= u-v - DATA ZERO/0.0d0/, factor/1.0d-3/ - - k=0 - ldiag=MIN(m,n) - IF(ldiag.LE.0) GOTO 270 - -C COMPUTE LMAX - - DO 80 j=1,ldiag - IF(j.EQ.1) GOTO 20 - lmax=j - DO 10 l=j,n - h(l)=h(l)-a(j-1,l)**2 - 10 IF(h(l).GT.h(lmax)) lmax=l - IF(diff(hmax+factor*h(lmax),hmax).GT.ZERO) - . GOTO 50 - 20 lmax=j - DO 40 l=j,n - h(l)=ZERO - DO 30 i=j,m - 30 h(l)=h(l)+a(i,l)**2 - 40 IF(h(l).GT.h(lmax)) lmax=l - hmax=h(lmax) - -C COLUMN INTERCHANGES IF NEEDED - - 50 ip(j)=lmax - IF(ip(j).EQ.j) GOTO 70 - DO 60 i=1,m - tmp=a(i,j) - a(i,j)=a(i,lmax) - 60 a(i,lmax)=tmp - h(lmax)=h(j) - -C J-TH TRANSFORMATION AND APPLICATION TO A AND B - - 70 i=MIN(j+1,n) - CALL h12(1,j,j+1,m,a(1,j),1,h(j),a(1,i),1,mda,n-j) - 80 CALL h12(2,j,j+1,m,a(1,j),1,h(j),b,1,mdb,nb) - -C DETERMINE PSEUDORANK - - DO 90 j=1,ldiag - 90 IF(ABS(a(j,j)).LE.tau) GOTO 100 - k=ldiag - GOTO 110 - 100 k=j-1 - 110 kp1=k+1 - -C NORM OF RESIDUALS - - DO 130 jb=1,nb - 130 rnorm(jb)=dnrm2_(m-k,b(kp1,jb),1) - IF(k.GT.0) GOTO 160 - DO 150 jb=1,nb - DO 150 i=1,n - 150 b(i,jb)=ZERO - GOTO 270 - 160 IF(k.EQ.n) GOTO 180 - -C HOUSEHOLDER DECOMPOSITION OF FIRST K ROWS - - DO 170 i=k,1,-1 - 170 CALL h12(1,i,kp1,n,a(i,1),mda,g(i),a,mda,1,i-1) - 180 DO 250 jb=1,nb - -C SOLVE K*K TRIANGULAR SYSTEM - - DO 210 i=k,1,-1 - j=MIN(i+1,n) - 210 b(i,jb)=(b(i,jb)-ddot_sl(k-i,a(i,j),mda,b(j,jb),1))/a(i,i) - -C COMPLETE SOLUTION VECTOR - - IF(k.EQ.n) GOTO 240 - DO 220 j=kp1,n - 220 b(j,jb)=ZERO - DO 230 i=1,k - 230 CALL h12(2,i,kp1,n,a(i,1),mda,g(i),b(1,jb),1,mdb,1) - -C REORDER SOLUTION ACCORDING TO PREVIOUS COLUMN INTERCHANGES - - 240 DO 250 j=ldiag,1,-1 - IF(ip(j).EQ.j) GOTO 250 - l=ip(j) - tmp=b(l,jb) - b(l,jb)=b(j,jb) - b(j,jb)=tmp - 250 CONTINUE - 270 krank=k - END - - SUBROUTINE h12 (mode,lpivot,l1,m,u,iue,up,c,ice,icv,ncv) - -C C.L.LAWSON AND R.J.HANSON, JET PROPULSION LABORATORY, 1973 JUN 12 -C TO APPEAR IN 'SOLVING LEAST SQUARES PROBLEMS', PRENTICE-HALL, 1974 - -C CONSTRUCTION AND/OR APPLICATION OF A SINGLE -C HOUSEHOLDER TRANSFORMATION Q = I + U*(U**T)/B - -C MODE = 1 OR 2 TO SELECT ALGORITHM H1 OR H2 . -C LPIVOT IS THE INDEX OF THE PIVOT ELEMENT. -C L1,M IF L1 <= M THE TRANSFORMATION WILL BE CONSTRUCTED TO -C ZERO ELEMENTS INDEXED FROM L1 THROUGH M. -C IF L1 > M THE SUBROUTINE DOES AN IDENTITY TRANSFORMATION. -C U(),IUE,UP -C ON ENTRY TO H1 U() STORES THE PIVOT VECTOR. -C IUE IS THE STORAGE INCREMENT BETWEEN ELEMENTS. -C ON EXIT FROM H1 U() AND UP STORE QUANTITIES DEFINING -C THE VECTOR U OF THE HOUSEHOLDER TRANSFORMATION. -C ON ENTRY TO H2 U() AND UP -C SHOULD STORE QUANTITIES PREVIOUSLY COMPUTED BY H1. -C THESE WILL NOT BE MODIFIED BY H2. -C C() ON ENTRY TO H1 OR H2 C() STORES A MATRIX WHICH WILL BE -C REGARDED AS A SET OF VECTORS TO WHICH THE HOUSEHOLDER -C TRANSFORMATION IS TO BE APPLIED. -C ON EXIT C() STORES THE SET OF TRANSFORMED VECTORS. -C ICE STORAGE INCREMENT BETWEEN ELEMENTS OF VECTORS IN C(). -C ICV STORAGE INCREMENT BETWEEN VECTORS IN C(). -C NCV NUMBER OF VECTORS IN C() TO BE TRANSFORMED. -C IF NCV <= 0 NO OPERATIONS WILL BE DONE ON C(). - - INTEGER incr, ice, icv, iue, lpivot, l1, mode, ncv - INTEGER i, i2, i3, i4, j, m - DOUBLE PRECISION u,up,c,cl,clinv,b,sm,one,ZERO - DIMENSION u(iue,*), c(*) - DATA one/1.0d+00/, ZERO/0.0d+00/ - - IF (0.GE.lpivot.OR.lpivot.GE.l1.OR.l1.GT.m) GOTO 80 - cl=ABS(u(1,lpivot)) - IF (mode.EQ.2) GOTO 30 - -C ****** CONSTRUCT THE TRANSFORMATION ****** - - DO 10 j=l1,m - sm=ABS(u(1,j)) - 10 cl=MAX(sm,cl) - IF (cl.LE.ZERO) GOTO 80 - clinv=one/cl - sm=(u(1,lpivot)*clinv)**2 - DO 20 j=l1,m - 20 sm=sm+(u(1,j)*clinv)**2 - cl=cl*SQRT(sm) - IF (u(1,lpivot).GT.ZERO) cl=-cl - up=u(1,lpivot)-cl - u(1,lpivot)=cl - GOTO 40 -C ****** APPLY THE TRANSFORMATION I+U*(U**T)/B TO C ****** - - 30 IF (cl.LE.ZERO) GOTO 80 - 40 IF (ncv.LE.0) GOTO 80 - b=up*u(1,lpivot) - IF (b.GE.ZERO) GOTO 80 - b=one/b - i2=1-icv+ice*(lpivot-1) - incr=ice*(l1-lpivot) - DO 70 j=1,ncv - i2=i2+icv - i3=i2+incr - i4=i3 - sm=c(i2)*up - DO 50 i=l1,m - sm=sm+c(i3)*u(1,i) - 50 i3=i3+ice - IF (sm.EQ.ZERO) GOTO 70 - sm=sm*b - c(i2)=c(i2)+sm*up - DO 60 i=l1,m - c(i4)=c(i4)+sm*u(1,i) - 60 i4=i4+ice - 70 CONTINUE - 80 END - - SUBROUTINE ldl (n,a,z,sigma,w) -C LDL LDL' - RANK-ONE - UPDATE - -C PURPOSE: -C UPDATES THE LDL' FACTORS OF MATRIX A BY RANK-ONE MATRIX -C SIGMA*Z*Z' - -C INPUT ARGUMENTS: (* MEANS PARAMETERS ARE CHANGED DURING EXECUTION) -C N : ORDER OF THE COEFFICIENT MATRIX A -C * A : POSITIVE DEFINITE MATRIX OF DIMENSION N; -C ONLY THE LOWER TRIANGLE IS USED AND IS STORED COLUMN BY -C COLUMN AS ONE DIMENSIONAL ARRAY OF DIMENSION N*(N+1)/2. -C * Z : VECTOR OF DIMENSION N OF UPDATING ELEMENTS -C SIGMA : SCALAR FACTOR BY WHICH THE MODIFYING DYADE Z*Z' IS -C MULTIPLIED - -C OUTPUT ARGUMENTS: -C A : UPDATED LDL' FACTORS - -C WORKING ARRAY: -C W : VECTOR OP DIMENSION N (USED ONLY IF SIGMA .LT. ZERO) - -C METHOD: -C THAT OF FLETCHER AND POWELL AS DESCRIBED IN : -C FLETCHER,R.,(1974) ON THE MODIFICATION OF LDL' FACTORIZATION. -C POWELL,M.J.D. MATH.COMPUTATION 28, 1067-1078. - -C IMPLEMENTED BY: -C KRAFT,D., DFVLR - INSTITUT FUER DYNAMIK DER FLUGSYSTEME -C D-8031 OBERPFAFFENHOFEN - -C STATUS: 15. JANUARY 1980 - -C SUBROUTINES REQUIRED: NONE - - INTEGER i, ij, j, n - DOUBLE PRECISION a(*), t, v, w(*), z(*), u, tp, one, beta, four, - * ZERO, alpha, delta, gamma, sigma, epmach - DATA ZERO, one, four, epmach /0.0d0, 1.0d0, 4.0d0, 2.22d-16/ - - IF(sigma.EQ.ZERO) GOTO 280 - ij=1 - t=one/sigma - IF(sigma.GT.ZERO) GOTO 220 -C PREPARE NEGATIVE UPDATE - DO 150 i=1,n - 150 w(i)=z(i) - DO 170 i=1,n - v=w(i) - t=t+v*v/a(ij) - DO 160 j=i+1,n - ij=ij+1 - 160 w(j)=w(j)-v*a(ij) - 170 ij=ij+1 - IF(t.GE.ZERO) t=epmach/sigma - DO 210 i=1,n - j=n+1-i - ij=ij-i - u=w(j) - w(j)=t - 210 t=t-u*u/a(ij) - 220 CONTINUE -C HERE UPDATING BEGINS - DO 270 i=1,n - v=z(i) - delta=v/a(ij) - IF(sigma.LT.ZERO) tp=w(i) - IF(sigma.GT.ZERO) tp=t+delta*v - alpha=tp/t - a(ij)=alpha*a(ij) - IF(i.EQ.n) GOTO 280 - beta=delta/tp - IF(alpha.GT.four) GOTO 240 - DO 230 j=i+1,n - ij=ij+1 - z(j)=z(j)-v*a(ij) - 230 a(ij)=a(ij)+beta*z(j) - GOTO 260 - 240 gamma=t/tp - DO 250 j=i+1,n - ij=ij+1 - u=a(ij) - a(ij)=gamma*u+beta*z(j) - 250 z(j)=z(j)-v*u - 260 ij=ij+1 - 270 t=tp - 280 RETURN -C END OF LDL - END - - DOUBLE PRECISION FUNCTION linmin (mode, ax, bx, f, tol) -C LINMIN LINESEARCH WITHOUT DERIVATIVES - -C PURPOSE: - -C TO FIND THE ARGUMENT LINMIN WHERE THE FUNCTION F TAKES IT'S MINIMUM -C ON THE INTERVAL AX, BX. -C COMBINATION OF GOLDEN SECTION AND SUCCESSIVE QUADRATIC INTERPOLATION. - -C INPUT ARGUMENTS: (* MEANS PARAMETERS ARE CHANGED DURING EXECUTION) - -C *MODE SEE OUTPUT ARGUMENTS -C AX LEFT ENDPOINT OF INITIAL INTERVAL -C BX RIGHT ENDPOINT OF INITIAL INTERVAL -C F FUNCTION VALUE AT LINMIN WHICH IS TO BE BROUGHT IN BY -C REVERSE COMMUNICATION CONTROLLED BY MODE -C TOL DESIRED LENGTH OF INTERVAL OF UNCERTAINTY OF FINAL RESULT - -C OUTPUT ARGUMENTS: - -C LINMIN ABSCISSA APPROXIMATING THE POINT WHERE F ATTAINS A MINIMUM -C MODE CONTROLS REVERSE COMMUNICATION -C MUST BE SET TO 0 INITIALLY, RETURNS WITH INTERMEDIATE -C VALUES 1 AND 2 WHICH MUST NOT BE CHANGED BY THE USER, -C ENDS WITH CONVERGENCE WITH VALUE 3. - -C WORKING ARRAY: - -C NONE - -C METHOD: - -C THIS FUNCTION SUBPROGRAM IS A SLIGHTLY MODIFIED VERSION OF THE -C ALGOL 60 PROCEDURE LOCALMIN GIVEN IN -C R.P. BRENT: ALGORITHMS FOR MINIMIZATION WITHOUT DERIVATIVES, -C PRENTICE-HALL (1973). - -C IMPLEMENTED BY: - -C KRAFT, D., DFVLR - INSTITUT FUER DYNAMIK DER FLUGSYSTEME -C D-8031 OBERPFAFFENHOFEN - -C STATUS: 31. AUGUST 1984 - -C SUBROUTINES REQUIRED: NONE - - INTEGER mode - DOUBLE PRECISION f, tol, a, b, c, d, e, p, q, r, u, v, w, x, m, - & fu, fv, fw, fx, eps, tol1, tol2, ZERO, ax, bx - DATA c /0.381966011d0/, eps /1.5d-8/, ZERO /0.0d0/ - -C EPS = SQUARE - ROOT OF MACHINE PRECISION -C C = GOLDEN SECTION RATIO = (3-SQRT(5))/2 - - GOTO (10, 55), mode - -C INITIALIZATION - - a = ax - b = bx - e = ZERO - v = a + c*(b - a) - w = v - x = w - linmin = x - mode = 1 - GOTO 100 - -C MAIN LOOP STARTS HERE - - 10 fx = f - fv = fx - fw = fv - 20 m = 0.5d0*(a + b) - tol1 = eps*ABS(x) + tol - tol2 = tol1 + tol1 - -C TEST CONVERGENCE - - IF (ABS(x - m) .LE. tol2 - 0.5d0*(b - a)) GOTO 90 - r = ZERO - q = r - p = q - IF (ABS(e) .LE. tol1) GOTO 30 - -C FIT PARABOLA - - r = (x - w)*(fx - fv) - q = (x - v)*(fx - fw) - p = (x - v)*q - (x - w)*r - q = q - r - q = q + q - IF (q .GT. ZERO) p = -p - IF (q .LT. ZERO) q = -q - r = e - e = d - -C IS PARABOLA ACCEPTABLE - - 30 IF (ABS(p) .GE. 0.5d0*ABS(q*r) .OR. - & p .LE. q*(a - x) .OR. p .GE. q*(b-x)) GOTO 40 - -C PARABOLIC INTERPOLATION STEP - - d = p/q - -C F MUST NOT BE EVALUATED TOO CLOSE TO A OR B - - IF (u - a .LT. tol2) d = SIGN(tol1, m - x) - IF (b - u .LT. tol2) d = SIGN(tol1, m - x) - GOTO 50 - -C GOLDEN SECTION STEP - - 40 IF (x .GE. m) e = a - x - IF (x .LT. m) e = b - x - d = c*e - -C F MUST NOT BE EVALUATED TOO CLOSE TO X - - 50 IF (ABS(d) .LT. tol1) d = SIGN(tol1, d) - u = x + d - linmin = u - mode = 2 - GOTO 100 - 55 fu = f - -C UPDATE A, B, V, W, AND X - - IF (fu .GT. fx) GOTO 60 - IF (u .GE. x) a = x - IF (u .LT. x) b = x - v = w - fv = fw - w = x - fw = fx - x = u - fx = fu - GOTO 85 - 60 IF (u .LT. x) a = u - IF (u .GE. x) b = u - IF (fu .LE. fw .OR. w .EQ. x) GOTO 70 - IF (fu .LE. fv .OR. v .EQ. x .OR. v .EQ. w) GOTO 80 - GOTO 85 - 70 v = w - fv = fw - w = u - fw = fu - GOTO 85 - 80 v = u - fv = fu - 85 GOTO 20 - -C END OF MAIN LOOP - - 90 linmin = x - mode = 3 - 100 RETURN - -C END OF LINMIN - - END - -C## Following a selection from BLAS Level 1 - - SUBROUTINE daxpy_sl(n,da,dx,incx,dy,incy) - -C CONSTANT TIMES A VECTOR PLUS A VECTOR. -C USES UNROLLED LOOPS FOR INCREMENTS EQUAL TO ONE. -C JACK DONGARRA, LINPACK, 3/11/78. - - DOUBLE PRECISION dx(*),dy(*),da - INTEGER i,incx,incy,ix,iy,m,mp1,n - - IF(n.LE.0)RETURN - IF(da.EQ.0.0d0)RETURN - IF(incx.EQ.1.AND.incy.EQ.1)GO TO 20 - -C CODE FOR UNEQUAL INCREMENTS OR EQUAL INCREMENTS -C NOT EQUAL TO 1 - - ix = 1 - iy = 1 - IF(incx.LT.0)ix = (-n+1)*incx + 1 - IF(incy.LT.0)iy = (-n+1)*incy + 1 - DO 10 i = 1,n - dy(iy) = dy(iy) + da*dx(ix) - ix = ix + incx - iy = iy + incy - 10 CONTINUE - RETURN - -C CODE FOR BOTH INCREMENTS EQUAL TO 1 - -C CLEAN-UP LOOP - - 20 m = MOD(n,4) - IF( m .EQ. 0 ) GO TO 40 - DO 30 i = 1,m - dy(i) = dy(i) + da*dx(i) - 30 CONTINUE - IF( n .LT. 4 ) RETURN - 40 mp1 = m + 1 - DO 50 i = mp1,n,4 - dy(i) = dy(i) + da*dx(i) - dy(i + 1) = dy(i + 1) + da*dx(i + 1) - dy(i + 2) = dy(i + 2) + da*dx(i + 2) - dy(i + 3) = dy(i + 3) + da*dx(i + 3) - 50 CONTINUE - RETURN - END - - SUBROUTINE dcopy_(n,dx,incx,dy,incy) - -C COPIES A VECTOR, X, TO A VECTOR, Y. -C USES UNROLLED LOOPS FOR INCREMENTS EQUAL TO ONE. -C JACK DONGARRA, LINPACK, 3/11/78. - - DOUBLE PRECISION dx(*),dy(*) - INTEGER i,incx,incy,ix,iy,m,mp1,n - - IF(n.LE.0)RETURN - IF(incx.EQ.1.AND.incy.EQ.1)GO TO 20 - -C CODE FOR UNEQUAL INCREMENTS OR EQUAL INCREMENTS -C NOT EQUAL TO 1 - - ix = 1 - iy = 1 - IF(incx.LT.0)ix = (-n+1)*incx + 1 - IF(incy.LT.0)iy = (-n+1)*incy + 1 - DO 10 i = 1,n - dy(iy) = dx(ix) - ix = ix + incx - iy = iy + incy - 10 CONTINUE - RETURN - -C CODE FOR BOTH INCREMENTS EQUAL TO 1 - -C CLEAN-UP LOOP - - 20 m = MOD(n,7) - IF( m .EQ. 0 ) GO TO 40 - DO 30 i = 1,m - dy(i) = dx(i) - 30 CONTINUE - IF( n .LT. 7 ) RETURN - 40 mp1 = m + 1 - DO 50 i = mp1,n,7 - dy(i) = dx(i) - dy(i + 1) = dx(i + 1) - dy(i + 2) = dx(i + 2) - dy(i + 3) = dx(i + 3) - dy(i + 4) = dx(i + 4) - dy(i + 5) = dx(i + 5) - dy(i + 6) = dx(i + 6) - 50 CONTINUE - RETURN - END - - DOUBLE PRECISION FUNCTION ddot_sl(n,dx,incx,dy,incy) - -C FORMS THE DOT PRODUCT OF TWO VECTORS. -C USES UNROLLED LOOPS FOR INCREMENTS EQUAL TO ONE. -C JACK DONGARRA, LINPACK, 3/11/78. - - DOUBLE PRECISION dx(*),dy(*),dtemp - INTEGER i,incx,incy,ix,iy,m,mp1,n - - ddot_sl = 0.0d0 - dtemp = 0.0d0 - IF(n.LE.0)RETURN - IF(incx.EQ.1.AND.incy.EQ.1)GO TO 20 - -C CODE FOR UNEQUAL INCREMENTS OR EQUAL INCREMENTS -C NOT EQUAL TO 1 - - ix = 1 - iy = 1 - IF(incx.LT.0)ix = (-n+1)*incx + 1 - IF(incy.LT.0)iy = (-n+1)*incy + 1 - DO 10 i = 1,n - dtemp = dtemp + dx(ix)*dy(iy) - ix = ix + incx - iy = iy + incy - 10 CONTINUE - ddot_sl = dtemp - RETURN - -C CODE FOR BOTH INCREMENTS EQUAL TO 1 - -C CLEAN-UP LOOP - - 20 m = MOD(n,5) - IF( m .EQ. 0 ) GO TO 40 - DO 30 i = 1,m - dtemp = dtemp + dx(i)*dy(i) - 30 CONTINUE - IF( n .LT. 5 ) GO TO 60 - 40 mp1 = m + 1 - DO 50 i = mp1,n,5 - dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) + - * dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4) - 50 CONTINUE - 60 ddot_sl = dtemp - RETURN - END - - DOUBLE PRECISION FUNCTION dnrm1(n,x,i,j) - INTEGER n, i, j, k - DOUBLE PRECISION snormx, sum, x(n), ZERO, one, scale, temp - DATA ZERO/0.0d0/, one/1.0d0/ - -C DNRM1 - COMPUTES THE I-NORM OF A VECTOR -C BETWEEN THE ITH AND THE JTH ELEMENTS - -C INPUT - -C N LENGTH OF VECTOR -C X VECTOR OF LENGTH N -C I INITIAL ELEMENT OF VECTOR TO BE USED -C J FINAL ELEMENT TO USE - -C OUTPUT - -C DNRM1 NORM - - snormx=ZERO - DO 10 k=i,j - 10 snormx=MAX(snormx,ABS(x(k))) - dnrm1 = snormx - IF (snormx.EQ.ZERO) RETURN - scale = snormx - IF (snormx.GE.one) scale=SQRT(snormx) - sum=ZERO - DO 20 k=i,j - temp=ZERO - IF (ABS(x(k))+scale .NE. scale) temp = x(k)/snormx - IF (one+temp.NE.one) sum = sum+temp*temp - 20 CONTINUE - sum=SQRT(sum) - dnrm1=snormx*sum - RETURN - END - - DOUBLE PRECISION FUNCTION dnrm2_ ( n, dx, incx) - INTEGER n, i, j, nn, next, incx - DOUBLE PRECISION dx(*), cutlo, cuthi, hitest, sum, xmax, ZERO, one - DATA ZERO, one /0.0d0, 1.0d0/ - -C EUCLIDEAN NORM OF THE N-VECTOR STORED IN DX() WITH STORAGE -C INCREMENT INCX . -C IF N .LE. 0 RETURN WITH RESULT = 0. -C IF N .GE. 1 THEN INCX MUST BE .GE. 1 - -C C.L.LAWSON, 1978 JAN 08 - -C FOUR PHASE METHOD USING TWO BUILT-IN CONSTANTS THAT ARE -C HOPEFULLY APPLICABLE TO ALL MACHINES. -C CUTLO = MAXIMUM OF SQRT(U/EPS) OVER ALL KNOWN MACHINES. -C CUTHI = MINIMUM OF SQRT(V) OVER ALL KNOWN MACHINES. -C WHERE -C EPS = SMALLEST NO. SUCH THAT EPS + 1. .GT. 1. -C U = SMALLEST POSITIVE NO. (UNDERFLOW LIMIT) -C V = LARGEST NO. (OVERFLOW LIMIT) - -C BRIEF OUTLINE OF ALGORITHM.. - -C PHASE 1 SCANS ZERO COMPONENTS. -C MOVE TO PHASE 2 WHEN A COMPONENT IS NONZERO AND .LE. CUTLO -C MOVE TO PHASE 3 WHEN A COMPONENT IS .GT. CUTLO -C MOVE TO PHASE 4 WHEN A COMPONENT IS .GE. CUTHI/M -C WHERE M = N FOR X() REAL AND M = 2*N FOR COMPLEX. - -C VALUES FOR CUTLO AND CUTHI.. -C FROM THE ENVIRONMENTAL PARAMETERS LISTED IN THE IMSL CONVERTER -C DOCUMENT THE LIMITING VALUES ARE AS FOLLOWS.. -C CUTLO, S.P. U/EPS = 2**(-102) FOR HONEYWELL. CLOSE SECONDS ARE -C UNIVAC AND DEC AT 2**(-103) -C THUS CUTLO = 2**(-51) = 4.44089E-16 -C CUTHI, S.P. V = 2**127 FOR UNIVAC, HONEYWELL, AND DEC. -C THUS CUTHI = 2**(63.5) = 1.30438E19 -C CUTLO, D.P. U/EPS = 2**(-67) FOR HONEYWELL AND DEC. -C THUS CUTLO = 2**(-33.5) = 8.23181D-11 -C CUTHI, D.P. SAME AS S.P. CUTHI = 1.30438D19 -C DATA CUTLO, CUTHI / 8.232D-11, 1.304D19 / -C DATA CUTLO, CUTHI / 4.441E-16, 1.304E19 / - DATA cutlo, cuthi / 8.232d-11, 1.304d19 / - - IF(n .GT. 0) GO TO 10 - dnrm2_ = ZERO - GO TO 300 - - 10 assign 30 to next - sum = ZERO - nn = n * incx -C BEGIN MAIN LOOP - i = 1 - 20 GO TO next,(30, 50, 70, 110) - 30 IF( ABS(dx(i)) .GT. cutlo) GO TO 85 - assign 50 to next - xmax = ZERO - -C PHASE 1. SUM IS ZERO - - 50 IF( dx(i) .EQ. ZERO) GO TO 200 - IF( ABS(dx(i)) .GT. cutlo) GO TO 85 - -C PREPARE FOR PHASE 2. - - assign 70 to next - GO TO 105 - -C PREPARE FOR PHASE 4. - - 100 i = j - assign 110 to next - sum = (sum / dx(i)) / dx(i) - 105 xmax = ABS(dx(i)) - GO TO 115 - -C PHASE 2. SUM IS SMALL. -C SCALE TO AVOID DESTRUCTIVE UNDERFLOW. - - 70 IF( ABS(dx(i)) .GT. cutlo ) GO TO 75 - -C COMMON CODE FOR PHASES 2 AND 4. -C IN PHASE 4 SUM IS LARGE. SCALE TO AVOID OVERFLOW. - - 110 IF( ABS(dx(i)) .LE. xmax ) GO TO 115 - sum = one + sum * (xmax / dx(i))**2 - xmax = ABS(dx(i)) - GO TO 200 - - 115 sum = sum + (dx(i)/xmax)**2 - GO TO 200 - -C PREPARE FOR PHASE 3. - - 75 sum = (sum * xmax) * xmax - -C FOR REAL OR D.P. SET HITEST = CUTHI/N -C FOR COMPLEX SET HITEST = CUTHI/(2*N) - - 85 hitest = cuthi/float( n ) - -C PHASE 3. SUM IS MID-RANGE. NO SCALING. - - DO 95 j =i,nn,incx - IF(ABS(dx(j)) .GE. hitest) GO TO 100 - 95 sum = sum + dx(j)**2 - dnrm2_ = SQRT( sum ) - GO TO 300 - - 200 CONTINUE - i = i + incx - IF ( i .LE. nn ) GO TO 20 - -C END OF MAIN LOOP. - -C COMPUTE SQUARE ROOT AND ADJUST FOR SCALING. - - dnrm2_ = xmax * SQRT(sum) - 300 CONTINUE - RETURN - END - - SUBROUTINE dsrot (n,dx,incx,dy,incy,c,s) - -C APPLIES A PLANE ROTATION. -C JACK DONGARRA, LINPACK, 3/11/78. - - DOUBLE PRECISION dx(*),dy(*),dtemp,c,s - INTEGER i,incx,incy,ix,iy,n - - IF(n.LE.0)RETURN - IF(incx.EQ.1.AND.incy.EQ.1)GO TO 20 - -C CODE FOR UNEQUAL INCREMENTS OR EQUAL INCREMENTS NOT EQUAL -C TO 1 - - ix = 1 - iy = 1 - IF(incx.LT.0)ix = (-n+1)*incx + 1 - IF(incy.LT.0)iy = (-n+1)*incy + 1 - DO 10 i = 1,n - dtemp = c*dx(ix) + s*dy(iy) - dy(iy) = c*dy(iy) - s*dx(ix) - dx(ix) = dtemp - ix = ix + incx - iy = iy + incy - 10 CONTINUE - RETURN - -C CODE FOR BOTH INCREMENTS EQUAL TO 1 - - 20 DO 30 i = 1,n - dtemp = c*dx(i) + s*dy(i) - dy(i) = c*dy(i) - s*dx(i) - dx(i) = dtemp - 30 CONTINUE - RETURN - END - - SUBROUTINE dsrotg(da,db,c,s) - -C CONSTRUCT GIVENS PLANE ROTATION. -C JACK DONGARRA, LINPACK, 3/11/78. -C MODIFIED 9/27/86. - - DOUBLE PRECISION da,db,c,s,roe,scale,r,z,one,ZERO - DATA one, ZERO /1.0d+00, 0.0d+00/ - - roe = db - IF( ABS(da) .GT. ABS(db) ) roe = da - scale = ABS(da) + ABS(db) - IF( scale .NE. ZERO ) GO TO 10 - c = one - s = ZERO - r = ZERO - GO TO 20 - 10 r = scale*SQRT((da/scale)**2 + (db/scale)**2) - r = SIGN(one,roe)*r - c = da/r - s = db/r - 20 z = s - IF( ABS(c) .GT. ZERO .AND. ABS(c) .LE. s ) z = one/c - da = r - db = z - RETURN - END - - SUBROUTINE dscal_sl(n,da,dx,incx) - -C SCALES A VECTOR BY A CONSTANT. -C USES UNROLLED LOOPS FOR INCREMENT EQUAL TO ONE. -C JACK DONGARRA, LINPACK, 3/11/78. - - DOUBLE PRECISION da,dx(*) - INTEGER i,incx,m,mp1,n,nincx - - IF(n.LE.0)RETURN - IF(incx.EQ.1)GO TO 20 - - -C CODE FOR INCREMENT NOT EQUAL TO 1 - - nincx = n*incx - DO 10 i = 1,nincx,incx - dx(i) = da*dx(i) - 10 CONTINUE - RETURN - -C CODE FOR INCREMENT EQUAL TO 1 - -C CLEAN-UP LOOP - - 20 m = MOD(n,5) - IF( m .EQ. 0 ) GO TO 40 - DO 30 i = 1,m - dx(i) = da*dx(i) - 30 CONTINUE - IF( n .LT. 5 ) RETURN - 40 mp1 = m + 1 - DO 50 i = mp1,n,5 - dx(i) = da*dx(i) - dx(i + 1) = da*dx(i + 1) - dx(i + 2) = da*dx(i + 2) - dx(i + 3) = da*dx(i + 3) - dx(i + 4) = da*dx(i + 4) - 50 CONTINUE - RETURN - END - - subroutine bound(n, x, xl, xu) - integer n, i - double precision x(n), xl(n), xu(n) - do i = 1, n -C Note that xl(i) and xu(i) may be NaN to indicate no bound - if(xl(i).eq.xl(i).and.x(i) < xl(i))then - x(i) = xl(i) - else if(xu(i).eq.xu(i).and.x(i) > xu(i))then - x(i) = xu(i) - end if - end do - end subroutine bound diff --git a/scipy/optimize/tests/test_bracket.py b/scipy/optimize/tests/test_bracket.py index ca1b2ced20b1..95996730eca6 100644 --- a/scipy/optimize/tests/test_bracket.py +++ b/scipy/optimize/tests/test_bracket.py @@ -249,13 +249,13 @@ def test_input_validation(self, xp): with pytest.raises(ValueError, match=message): _bracket_root(lambda x: x, -4+1j, 4) with pytest.raises(ValueError, match=message): - _bracket_root(lambda x: x, -4, 'hello') + _bracket_root(lambda x: x, -4, 4+1j) with pytest.raises(ValueError, match=message): - _bracket_root(lambda x: x, -4, 4, xmin=np) + _bracket_root(lambda x: x, -4, 4, xmin=4+1j) with pytest.raises(ValueError, match=message): - _bracket_root(lambda x: x, -4, 4, xmax=object()) + _bracket_root(lambda x: x, -4, 4, xmax=4+1j) with pytest.raises(ValueError, match=message): - _bracket_root(lambda x: x, -4, 4, factor=sum) + _bracket_root(lambda x: x, -4, 4, factor=4+1j) message = "All elements of `factor` must be greater than 1." with pytest.raises(ValueError, match=message): @@ -321,7 +321,7 @@ def f(x): # 2. bracket endpoint hits root exactly f.count = 0 - res = _bracket_root(f, xp.asarray(5.), xp.asarray(10.), + res = _bracket_root(f, xp.asarray(5.), xp.asarray(10.), factor=2) assert res.nfev == 4 @@ -330,12 +330,12 @@ def f(x): # 3. bracket limit hits root exactly with np.errstate(over='ignore'): - res = _bracket_root(f, xp.asarray(5.), xp.asarray(10.), + res = _bracket_root(f, xp.asarray(5.), xp.asarray(10.), xmin=0) xp_assert_close(res.xl, xp.asarray(0.), atol=1e-15) with np.errstate(over='ignore'): - res = _bracket_root(f, xp.asarray(-10.), xp.asarray(-5.), + res = _bracket_root(f, xp.asarray(-10.), xp.asarray(-5.), xmax=0) xp_assert_close(res.xr, xp.asarray(0.), atol=1e-15) @@ -552,23 +552,21 @@ def test_input_validation(self, xp): with pytest.raises(ValueError, match=message): _bracket_minimum(lambda x: x**2, xp.asarray(4+1j)) with pytest.raises(ValueError, match=message): - _bracket_minimum(lambda x: x**2, xp.asarray(-4), xl0='hello') + _bracket_minimum(lambda x: x**2, xp.asarray(-4), xl0=4+1j) with pytest.raises(ValueError, match=message): - _bracket_minimum(lambda x: x**2, xp.asarray(-4), - xr0='farcical aquatic ceremony') + _bracket_minimum(lambda x: x**2, xp.asarray(-4), xr0=4+1j) with pytest.raises(ValueError, match=message): - _bracket_minimum(lambda x: x**2, xp.asarray(-4), xmin=np) + _bracket_minimum(lambda x: x**2, xp.asarray(-4), xmin=4+1j) with pytest.raises(ValueError, match=message): - _bracket_minimum(lambda x: x**2, xp.asarray(-4), xmax=object()) + _bracket_minimum(lambda x: x**2, xp.asarray(-4), xmax=4+1j) with pytest.raises(ValueError, match=message): - _bracket_minimum(lambda x: x**2, xp.asarray(-4), factor=sum) + _bracket_minimum(lambda x: x**2, xp.asarray(-4), factor=4+1j) message = "All elements of `factor` must be greater than 1." with pytest.raises(ValueError, match=message): _bracket_minimum(lambda x: x, xp.asarray(-4), factor=0.5) - message = "shape mismatch: objects cannot be broadcast" - # raised by `xp.broadcast, but the traceback is readable IMO + message = "Array shapes are incompatible for broadcasting." with pytest.raises(ValueError, match=message): _bracket_minimum(lambda x: x**2, xp.asarray([-2, -3]), xl0=[-3, -4, -5]) @@ -803,8 +801,10 @@ def bracket_minimum_single(xm0, xl0, xr0, xmin, xmax, factor, a): factor = rng.random(size=shape) + 1.5 refs = bracket_minimum_single(xm0, xl0, xr0, xmin, xmax, factor, a).ravel() args = tuple(xp.asarray(arg, dtype=xp.float64) for arg in args) - res = _bracket_minimum(f, xp.asarray(xm0), xl0=xl0, xr0=xr0, xmin=xmin, - xmax=xmax, factor=factor, args=args, maxiter=maxiter) + res = _bracket_minimum(f, xp.asarray(xm0), xl0=xp.asarray(xl0), + xr0=xp.asarray(xr0), xmin=xp.asarray(xmin), + xmax=xp.asarray(xmax), factor=xp.asarray(factor), + args=args, maxiter=maxiter) attrs = ['xl', 'xm', 'xr', 'fl', 'fm', 'fr', 'success', 'nfev', 'nit'] for attr in attrs: diff --git a/scipy/optimize/tests/test_chandrupatla.py b/scipy/optimize/tests/test_chandrupatla.py index 8714362c5f8b..f582aeb9dc06 100644 --- a/scipy/optimize/tests/test_chandrupatla.py +++ b/scipy/optimize/tests/test_chandrupatla.py @@ -282,7 +282,7 @@ def test_convergence(self, xp): # Test that the convergence tolerances behave as expected rng = np.random.default_rng(2585255913088665241) p = xp.asarray(rng.random(size=3)) - bracket = (xp.asarray(-5), xp.asarray(0), xp.asarray(5)) + bracket = (xp.asarray(-5, dtype=xp.float64), xp.asarray(0), xp.asarray(5)) args = (p,) kwargs0 = dict(args=args, xatol=0, xrtol=0, fatol=0, frtol=0) @@ -582,7 +582,8 @@ def f(*args, **kwargs): return self.f(*args, **kwargs) f.f_evals = 0 - res = find_root(f, (xp.asarray(-5.), xp.asarray(5.)), args=args_xp) + bracket = xp.asarray(-5., dtype=xp.float64), xp.asarray(5., dtype=xp.float64) + res = find_root(f, bracket, args=args_xp) refs = find_root_single(p).ravel() ref_x = [ref.x for ref in refs] diff --git a/scipy/optimize/tests/test_differentiable_functions.py b/scipy/optimize/tests/test_differentiable_functions.py index 5c16672f7371..1f8f53b794fd 100644 --- a/scipy/optimize/tests/test_differentiable_functions.py +++ b/scipy/optimize/tests/test_differentiable_functions.py @@ -481,8 +481,9 @@ def test_finite_difference_jac(self): assert_array_equal(analit.nfev, nfev) assert_array_equal(ex.njev, njev) assert_array_equal(analit.njev, njev) - approx = VectorFunction(ex.fun, x0, '2-point', ex.hess, None, None, - (-np.inf, np.inf), None) + # create with defaults for the keyword arguments, to + # ensure that the defaults work + approx = VectorFunction(ex.fun, x0, '2-point', ex.hess) nfev += 3 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) @@ -563,6 +564,24 @@ def test_finite_difference_jac(self): assert_array_almost_equal(f_analit, f_approx) assert_array_almost_equal(J_analit, J_approx) + def test_updating_on_initial_setup(self): + # Check that memoisation works with the freshly created VectorFunction + # On initialization vf.f_updated attribute wasn't being set correctly. + x0 = np.array([2.5, 3.0]) + ex = ExVectorialFunction() + vf = VectorFunction(ex.fun, x0, ex.jac, ex.hess) + assert vf.f_updated + assert vf.nfev == 1 + assert vf.njev == 1 + assert ex.nfev == 1 + assert ex.njev == 1 + vf.fun(x0) + vf.jac(x0) + assert vf.nfev == 1 + assert vf.njev == 1 + assert ex.nfev == 1 + assert ex.njev == 1 + @pytest.mark.fail_slow(5.0) def test_workers(self): x0 = np.array([2.5, 3.0]) @@ -761,6 +780,29 @@ def test_finite_difference_hess_linear_operator(self): assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev) + def test_fgh_overlap(self): + # VectorFunction.fun/jac should return copies to internal attributes + ex = ExVectorialFunction() + x0 = np.array([1.0, 0.0]) + + vf = VectorFunction(ex.fun, x0, '3-point', ex.hess, None, None, + (-np.inf, np.inf), None) + f = vf.fun(np.array([1.1, 0.1])) + J = vf.jac([1.1, 0.1]) + assert vf.f is not f + assert vf.J is not J + assert_equal(f, vf.f) + assert_equal(J, vf.J) + + vf = VectorFunction(ex.fun, x0, ex.jac, ex.hess, None, None, + (-np.inf, np.inf), None) + f = vf.fun(np.array([1.1, 0.1])) + J = vf.jac([1.1, 0.1]) + assert vf.f is not f + assert vf.J is not J + assert_equal(f, vf.f) + assert_equal(J, vf.J) + @pytest.mark.thread_unsafe def test_x_storage_overlap(self): # VectorFunction should not store references to arrays, it should @@ -818,6 +860,44 @@ def test_float_size(self): res = vf.jac(x0) assert res.dtype == np.float32 + def test_sparse_analytic_jac(self): + ex = ExVectorialFunction() + x0 = np.array([1.0, 0.0]) + def sparse_adapter(func): + def inner(x): + f_x = func(x) + return csr_array(f_x) + return inner + + # jac(x) returns dense jacobian + vf1 = VectorFunction(ex.fun, x0, ex.jac, ex.hess, None, None, + (-np.inf, np.inf), sparse_jacobian=None) + # jac(x) returns sparse jacobian, but sparse_jacobian=False requests dense + vf2 = VectorFunction(ex.fun, x0, sparse_adapter(ex.jac), ex.hess, None, None, + (-np.inf, np.inf), sparse_jacobian=False) + + res1 = vf1.jac(x0 + 1) + res2 = vf2.jac(x0 + 1) + assert_equal(res1, res2) + + def test_sparse_numerical_jac(self): + ex = ExVectorialFunction() + x0 = np.array([1.0, 0.0]) + N = len(x0) + + # normal dense numerical difference + vf1 = VectorFunction(ex.fun, x0, '2-point', ex.hess, None, None, + (-np.inf, np.inf), sparse_jacobian=None) + # use sparse numerical difference, but ask it to be converted to dense + finite_diff_jac_sparsity = csr_array(np.ones((N, N))) + vf2 = VectorFunction(ex.fun, x0, '2-point', ex.hess, None, + finite_diff_jac_sparsity, (-np.inf, np.inf), + sparse_jacobian=False) + + res1 = vf1.jac(x0 + 1) + res2 = vf2.jac(x0 + 1) + assert_equal(res1, res2) + def test_LinearVectorFunction(): A_dense = np.array([ @@ -911,7 +991,6 @@ def test_ScalarFunctionNoReferenceCycle(): platform.python_implementation() == "PyPy", reason="assert_deallocate not available on PyPy" ) -@pytest.mark.xfail(reason="TODO remove reference cycle from VectorFunction") def test_VectorFunctionNoReferenceCycle(): """Regression test for gh-20768.""" ex = ExVectorialFunction() diff --git a/scipy/optimize/tests/test_least_squares.py b/scipy/optimize/tests/test_least_squares.py index 0cfecc20d85f..0b1614b2673c 100644 --- a/scipy/optimize/tests/test_least_squares.py +++ b/scipy/optimize/tests/test_least_squares.py @@ -36,6 +36,15 @@ def fun_rosenbrock(x): return np.array([10 * (x[1] - x[0]**2), (1 - x[0])]) +class Fun_Rosenbrock: + def __init__(self): + self.nfev = 0 + + def __call__(self, x, a=0): + self.nfev += 1 + return fun_rosenbrock(x) + + def jac_rosenbrock(x): return np.array([ [-20 * x[0], 10], @@ -235,19 +244,13 @@ def test_x_scale_options(self): 2.0, x_scale=1.0+2.0j, method=self.method) def test_diff_step(self): - # res1 and res2 should be equivalent. - # res2 and res3 should be different. res1 = least_squares(fun_trivial, 2.0, diff_step=1e-1, method=self.method) - res2 = least_squares(fun_trivial, 2.0, diff_step=-1e-1, - method=self.method) res3 = least_squares(fun_trivial, 2.0, diff_step=None, method=self.method) assert_allclose(res1.x, 0, atol=1e-4) - assert_allclose(res2.x, 0, atol=1e-4) assert_allclose(res3.x, 0, atol=1e-4) - assert_equal(res1.x, res2.x) - assert_equal(res1.nfev, res2.nfev) + def test_incorrect_options_usage(self): assert_raises(TypeError, least_squares, fun_trivial, 2.0, @@ -267,7 +270,6 @@ def test_full_result(self): assert_allclose(res.optimality, 0, atol=1e-2) assert_equal(res.active_mask, 0) if self.method == 'lm': - assert_(res.nfev < 30) assert_(res.njev is None) else: assert_(res.nfev < 10) @@ -295,6 +297,17 @@ def test_full_result_single_fev(self): assert_equal(res.status, 0) assert_equal(res.success, 0) + def test_nfev(self): + # checks that the true number of nfev are being consumed + for i in range(1, 3): + rng = np.random.default_rng(128908) + x0 = rng.uniform(size=2) * 10 + ftrivial = Fun_Rosenbrock() + res = least_squares( + ftrivial, x0, jac=jac_rosenbrock, method=self.method, max_nfev=i + ) + assert res.nfev == ftrivial.nfev + def test_rosenbrock(self): x0 = [-2, 1] x_opt = [1, 1] diff --git a/scipy/optimize/tests/test_nnls.py b/scipy/optimize/tests/test_nnls.py index 911ec10c9fd7..7ff42e2de884 100644 --- a/scipy/optimize/tests/test_nnls.py +++ b/scipy/optimize/tests/test_nnls.py @@ -433,6 +433,37 @@ def test_atol_deprecation_warning(self): """Test that using atol parameter triggers deprecation warning""" a = np.array([[1, 0], [1, 0], [0, 1]]) b = np.array([2, 1, 1]) - + with pytest.warns(DeprecationWarning, match="{'atol'}"): nnls(a, b, atol=1e-8) + + def test_2D_singleton_RHS_input(self): + # Test that a 2D singleton RHS input is accepted + A = np.array([[1.0, 0.5, -1.], + [1.0, 0.5, 0.0], + [-1., 0.0, 1.0]]) + b = np.array([[-1.0, 2.0, 2.0]]).T + x, r = nnls(A, b) + assert_allclose(x, np.array([1.0, 2.0, 3.0])) + assert_allclose(r, 0.0) + + def test_2D_not_singleton_RHS_input_2(self): + # Test that a 2D but not a column vector RHS input is rejected + A = np.array([[1.0, 0.5, -1.], + [1.0, 0.5, 0.0], + [1.0, 0.5, 0.0], + [0.0, 0.0, 1.0]]) + b = np.ones(shape=[4, 2], dtype=np.float64) + with pytest.raises(ValueError, match="Expected a 1D array"): + nnls(A, b) + + def test_gh_22791_32bit(self): + # Scikit-learn got hit by this problem on 32-bit arch. + desired = [0, 0, 1.05617285, 0, 0, 0, 0, 0.23123048, 0, 0, 0, 0.26128651] + rng = np.random.RandomState(42) + n_samples, n_features = 5, 12 + X = rng.randn(n_samples, n_features) + X[:2, :] = 0 + y = rng.randn(n_samples) + coef, _ = nnls(X, y) + assert_allclose(coef, desired) diff --git a/scipy/optimize/tests/test_optimize.py b/scipy/optimize/tests/test_optimize.py index ff4140f65df2..5886ad0b2f48 100644 --- a/scipy/optimize/tests/test_optimize.py +++ b/scipy/optimize/tests/test_optimize.py @@ -3039,7 +3039,7 @@ def test_equal_bounds(method, kwds, bound_type, constraints, callback): # compare the output of a solution with FD vs that of an analytic grad assert res.success - assert_allclose(res.fun, expected.fun, rtol=1.5e-6) + assert_allclose(res.fun, expected.fun, rtol=2e-6) assert_allclose(res.x, expected.x, rtol=5e-4) if fd_needed or kwds['jac'] is False: diff --git a/scipy/optimize/tests/test_slsqp.py b/scipy/optimize/tests/test_slsqp.py index 45216aa296b5..33b87fc14bb5 100644 --- a/scipy/optimize/tests/test_slsqp.py +++ b/scipy/optimize/tests/test_slsqp.py @@ -592,7 +592,6 @@ def target(x): # The problem is infeasible, so it cannot succeed assert not res.success - @pytest.mark.thread_unsafe def test_parameters_stay_within_bounds(self): # gh11403. For some problems the SLSQP Fortran code suggests a step # outside one of the lower/upper bounds. When this happens @@ -607,7 +606,40 @@ def test_parameters_stay_within_bounds(self): def f(x): assert (x >= bounds.lb).all() return np.linalg.norm(x) + # The following should not raise any warnings which was the case, with the + # old Fortran code. + res = minimize(f, x0, method='SLSQP', bounds=bounds) + assert res.success + + +def test_slsqp_segfault_wrong_workspace_computation(): + # See gh-14915 + # This problem is not well-defined, however should not cause a segfault. + # The previous F77 workspace computation did not handle only equality- + # constrained problems correctly. + rng = np.random.default_rng(1742651087222879) + x = rng.uniform(size=[22,365]) + target = np.linspace(0.9, 4.0, 50) + + def metric(v, weights): + return [[0, 0],[1, 1]] + + def efficient_metric(v, target): + def metric_a(weights): + return metric(v, weights)[1][0] + + def metric_b(weights, v): + return metric(v, weights)[0][0] + + constraints = ({'type': 'eq', 'fun': lambda x: metric_a(x) - target}, + {'type': 'eq', 'fun': lambda x: np.sum(x) - 1}) + weights = np.array([len(v)*[1./len(v)]])[0] + result = minimize(metric_b, + weights, + args=(v,), + method='SLSQP', + constraints=constraints) + return result + + efficient_metric(x, target) - with pytest.warns(RuntimeWarning, match='x were outside bounds'): - res = minimize(f, x0, method='SLSQP', bounds=bounds) - assert res.success diff --git a/scipy/signal/_short_time_fft.py b/scipy/signal/_short_time_fft.py index d43370a18307..5da4017fce75 100644 --- a/scipy/signal/_short_time_fft.py +++ b/scipy/signal/_short_time_fft.py @@ -381,14 +381,14 @@ class ShortTimeFFT: It is possible to calculate the STFT of signal parts: - >>> p_q = SFT.nearest_k_p(N // 2) - >>> Sx0 = SFT.stft(x[:p_q]) - >>> Sx1 = SFT.stft(x[p_q:]) + >>> N2 = SFT.nearest_k_p(N // 2) + >>> Sx0 = SFT.stft(x[:N2]) + >>> Sx1 = SFT.stft(x[N2:]) When assembling sequential STFT parts together, the overlap needs to be considered: - >>> p0_ub = SFT.upper_border_begin(p_q)[1] - SFT.p_min + >>> p0_ub = SFT.upper_border_begin(N2)[1] - SFT.p_min >>> p1_le = SFT.lower_border_end[1] - SFT.p_min >>> Sx01 = np.hstack((Sx0[:, :p0_ub], ... Sx0[:, p0_ub:] + Sx1[:, :p1_le], @@ -1675,7 +1675,15 @@ def p_min(self) -> int: @lru_cache(maxsize=256) def _post_padding(self, n: int) -> tuple[int, int]: - """Largest signal index and slice index due to padding.""" + """Largest signal index and slice index due to padding. + + Parameters + ---------- + n : int + Number of samples of input signal (must be ≥ half of the window length). + """ + if not (n >= (m2p := self.m_num - self.m_num_mid)): + raise ValueError(f"Parameter n must be >= ceil(m_num/2) = {m2p}!") w2 = self.win.real**2 + self.win.imag**2 # move window to the right until the overlap for t < t[n] vanishes: q1 = n // self.hop # last slice index with t[p1] <= t[n] @@ -1696,6 +1704,11 @@ def k_max(self, n: int) -> int: A detailed example is provided in the :ref:`tutorial_stft_sliding_win` section of the :ref:`user_guide`. + Parameters + ---------- + n : int + Number of samples of input signal (must be ≥ half of the window length). + See Also -------- k_min: The smallest possible signal index. @@ -1797,6 +1810,19 @@ def upper_border_begin(self, n: int) -> tuple[int, int]: A detailed example is given :ref:`tutorial_stft_sliding_win` section of the :ref:`user_guide`. + Parameters + ---------- + n : int + Number of samples of input signal (must be ≥ half of the window length). + + Returns + ------- + k_ub : int + Lowest signal index, where a touching time slice sticks out past the + signal end. + p_ub : int + Lowest index of time slice of which the end sticks out past the signal end. + See Also -------- k_min: The smallest possible signal index. @@ -1808,13 +1834,15 @@ def upper_border_begin(self, n: int) -> tuple[int, int]: p_range: Determine and validate slice index range. ShortTimeFFT: Class this method belongs to. """ + if not (n >= (m2p := self.m_num - self.m_num_mid)): + raise ValueError(f"Parameter n must be >= ceil(m_num/2) = {m2p}!") w2 = self.win.real**2 + self.win.imag**2 q2 = n // self.hop + 1 # first t[q] >= t[n] q1 = max((n-self.m_num) // self.hop - 1, -1) # move window left until does not stick out to the right: for q_ in range(q2, q1, -1): k_ = q_ * self.hop + (self.m_num - self.m_num_mid) - if k_ < n or all(w2[n-k_:] == 0): + if k_ <= n or all(w2[n-k_:] == 0): return (q_ + 1) * self.hop - self.m_num_mid, q_ + 1 return 0, 0 # border starts at first slice diff --git a/scipy/signal/_spline_filters.py b/scipy/signal/_spline_filters.py index 920becdffbf7..e3d080cc9cae 100644 --- a/scipy/signal/_spline_filters.py +++ b/scipy/signal/_spline_filters.py @@ -5,7 +5,7 @@ moveaxis, abs, complex64, float32) import numpy as np -from scipy._lib._array_api import array_namespace +from scipy._lib._array_api import array_namespace, xp_promote from scipy._lib._util import normalize_axis_index @@ -714,8 +714,8 @@ def symiirorder1(signal, c0, z1, precision=-1.0): The filtered signal. """ xp = array_namespace(signal) - - # internals of symiirorder1 are numpy-only + signal = xp_promote(signal, force_floating=True, xp=xp) + # This function uses C internals signal = np.asarray(signal) if abs(z1) >= 1: @@ -729,9 +729,6 @@ def symiirorder1(signal, c0, z1, precision=-1.0): signal = signal[None, :] squeeze_dim = True - if np.issubdtype(signal.dtype, np.integer): - signal = signal.astype(np.promote_types(signal.dtype, np.float32)) - y0 = symiirorder1_ic(signal, z1, precision) # Apply first the system 1 / (1 - z1 * z^-1) @@ -797,10 +794,9 @@ def symiirorder2(input, r, omega, precision=-1.0): The filtered signal. """ xp = array_namespace(input) - - # internals are numpy-only - input = np.asarray(input) - omega = np.asarray(omega) + input = xp_promote(input, force_floating=True, xp=xp) + # This function uses C internals + input = np.ascontiguousarray(input) if r >= 1.0: raise ValueError('r must be less than 1.0') @@ -808,22 +804,16 @@ def symiirorder2(input, r, omega, precision=-1.0): if input.ndim > 2: raise ValueError('Input must be 1D or 2D') - if not input.flags.c_contiguous: - input = input.copy() - squeeze_dim = False if input.ndim == 1: input = input[None, :] squeeze_dim = True - if np.issubdtype(input.dtype, np.integer): - input = input.astype(np.promote_types(input.dtype, np.float32)) - rsq = r * r - a2 = 2 * r * np.cos(omega) + a2 = 2 * r * math.cos(omega) a3 = -rsq - cs = np.atleast_1d(1 - 2 * r * np.cos(omega) + rsq) - sos = np.atleast_2d(np.r_[cs, 0, 0, 1, -a2, -a3]).astype(input.dtype) + cs = 1 - 2 * r * math.cos(omega) + rsq + sos = np.asarray([cs, 0, 0, 1, -a2, -a3], dtype=input.dtype) # Find the starting (forward) conditions. ic_fwd = symiirorder2_ic_fwd(input, r, omega, precision) @@ -831,7 +821,7 @@ def symiirorder2(input, r, omega, precision=-1.0): # Apply first the system cs / (1 - a2 * z^-1 - a3 * z^-2) # Compute the initial conditions in the form expected by sosfilt # coef = np.asarray([[a3, a2], [0, a3]], dtype=input.dtype) - coef = np.r_[a3, a2, 0, a3].reshape(2, 2).astype(input.dtype) + coef = np.asarray([[a3, a2], [0, a3]], dtype=input.dtype) zi = np.matmul(coef, ic_fwd[:, :, None])[:, :, 0] y_fwd, _ = sosfilt(sos, axis_slice(input, 2), zi=zi[None]) diff --git a/scipy/signal/tests/_scipy_spectral_test_shim.py b/scipy/signal/tests/_scipy_spectral_test_shim.py index c23f310bcae4..42d3d830d0e3 100644 --- a/scipy/signal/tests/_scipy_spectral_test_shim.py +++ b/scipy/signal/tests/_scipy_spectral_test_shim.py @@ -103,7 +103,7 @@ def _stft_wrapper(x, fs=1.0, window='hann', nperseg=256, noverlap=None, # This is an edge case where shortTimeFFT returns one more time slice # than the Scipy stft() shorten to remove last time slice: if n % 2 == 1 and nperseg % 2 == 1 and noverlap % 2 == 1: - x = x[..., :axis - 1] + x = x[..., : -1] nadd = (-(x.shape[-1]-nperseg) % nstep) % nperseg zeros_shape = list(x.shape[:-1]) + [nadd] @@ -124,11 +124,8 @@ def _stft_wrapper(x, fs=1.0, window='hann', nperseg=256, noverlap=None, k_off = nperseg // 2 p0 = 0 # ST.lower_border_end[1] + 1 nn = x.shape[axis] if padded else n+k_off+1 - p1 = ST.upper_border_begin(nn)[1] # ST.p_max(n) + 1 - - # This is bad hack to pass the test test_roundtrip_boundary_extension(): - if padded is True and nperseg - noverlap == 1: - p1 -= nperseg // 2 - 1 # the reasoning behind this is not clear to me + # number of frames akin to legacy stft computation + p1 = (x.shape[axis] - nperseg) // nstep + 1 detr = None if detrend is False else detrend Sxx = ST.stft_detrend(x, detr, p0, p1, k_offset=k_off, axis=axis) @@ -136,11 +133,6 @@ def _stft_wrapper(x, fs=1.0, window='hann', nperseg=256, noverlap=None, if x.dtype in (np.float32, np.complex64): Sxx = Sxx.astype(np.complex64) - # workaround for test_average_all_segments() - seems to be buggy behavior: - if boundary is None and padded is False: - t, Sxx = t[1:-1], Sxx[..., :-2] - t -= k_off / fs - return ST.f, t, Sxx diff --git a/scipy/signal/tests/test_short_time_fft.py b/scipy/signal/tests/test_short_time_fft.py index 5e7c122c4aa5..afdeec5121eb 100644 --- a/scipy/signal/tests/test_short_time_fft.py +++ b/scipy/signal/tests/test_short_time_fft.py @@ -531,7 +531,11 @@ def test_border_values(): assert SFT.p_max(10) == 4 assert SFT.k_max(10) == 16 assert SFT.upper_border_begin(10) == (4, 2) - + # Raise exceptions: + with pytest.raises(ValueError, match="^Parameter n must be"): + SFT.upper_border_begin(3) + with pytest.raises(ValueError, match="^Parameter n must be"): + SFT._post_padding(3) def test_border_values_exotic(): """Ensure that the border calculations are correct for windows with @@ -541,7 +545,11 @@ def test_border_values_exotic(): assert SFT.lower_border_end == (0, 0) SFT = ShortTimeFFT(np.flip(w), hop=20, fs=1) - assert SFT.upper_border_begin(4) == (0, 0) + assert SFT.upper_border_begin(4) == (16, 1) + assert SFT.upper_border_begin(5) == (16, 1) + assert SFT.upper_border_begin(23) == (36, 2) + assert SFT.upper_border_begin(24) == (36, 2) + assert SFT.upper_border_begin(25) == (36, 2) SFT._hop = -1 # provoke unreachable line with pytest.raises(RuntimeError): diff --git a/scipy/signal/tests/test_splines.py b/scipy/signal/tests/test_splines.py index 0d2afa42e46c..185ce2ba2d65 100644 --- a/scipy/signal/tests/test_splines.py +++ b/scipy/signal/tests/test_splines.py @@ -3,7 +3,7 @@ import numpy as np import pytest import scipy._lib.array_api_extra as xpx -from scipy._lib._array_api import xp_assert_close, is_cupy +from scipy._lib._array_api import is_cupy, xp_assert_close, xp_default_dtype from scipy.signal._spline import ( symiirorder1_ic, symiirorder2_ic_fwd, symiirorder2_ic_bwd) @@ -190,7 +190,6 @@ def test_symiir1_values(self, dtype, xp): 0.19982875, 0.20355805, 0.47378628, 0.57232247, 0.51597393, 0.25935107, 0.31438554, 0.41096728, 0.4190693 , 0.25812255, 0.33671467], dtype=res.dtype) - assert res.dtype == dtype atol = {xp.float64: 1e-15, xp.float32: 1e-7}[dtype] xp_assert_close(res, exp_res, atol=atol) @@ -332,10 +331,10 @@ def test_symiir2_initial_bwd(self, dtype, precision, xp): def test_symiir2(self, dtype, precision, xp): dtype = getattr(xp, dtype) - r = xp.asarray(0.5, dtype=dtype) - omega = xp.asarray(xp.pi / 3.0, dtype=dtype) - cs = 1 - 2 * r * xp.cos(omega) + r * r - a2 = 2 * r * xp.cos(omega) + r = 0.5 + omega = math.pi / 3.0 + cs = 1 - 2 * r * math.cos(omega) + r * r + a2 = 2 * r * math.cos(omega) a3 = -r * r n = 100 @@ -367,13 +366,15 @@ def test_symiir2(self, dtype, precision, xp): out = symiirorder2(signal, r, omega, precision) xp_assert_close(out, exp, atol=4e-6, rtol=6e-7) - @skip_xp_backends(cpu_only=True) + @skip_xp_backends(cpu_only=True, exceptions=["cupy"], reason="C internals") @pytest.mark.parametrize('dtyp', ['float32', 'float64']) def test_symiir2_values(self, dtyp, xp): rng = np.random.RandomState(1234) s = rng.uniform(size=16).astype(dtyp) s = xp.asarray(s) - dtyp = getattr(xp, dtyp) + + # cupy returns f64 for f32 inputs + dtype = xp.float64 if is_cupy(xp) else getattr(xp, dtyp) res = symiirorder2(s, 0.1, 0.1, precision=1e-10) @@ -382,13 +383,9 @@ def test_symiir2_values(self, dtyp, xp): [0.26572609, 0.53408018, 0.51032696, 0.72115829, 0.69486885, 0.3649055 , 0.37349478, 0.74165032, 0.89718521, 0.80582483, 0.46758053, 0.51898709, 0.65025605, 0.65394321, 0.45273595, - 0.53539183], dtype=dtyp + 0.53539183], dtype=dtype ) - if not is_cupy(xp): - # cupy returns f64 for f32 inputs - assert res.dtype == dtyp - # The values in SciPy 1.14 agree with those in SciPy 1.9.1 to this # accuracy only. Implementation differences are twofold: # 1. boundary conditions are computed differently @@ -397,7 +394,7 @@ def test_symiir2_values(self, dtyp, xp): # test_symiir2_initial_{fwd,bwd} above, so the difference is likely # due to a different way roundoff errors accumulate in the filter. # In that respect, sosfilt is likely doing a better job. - xp_assert_close(res, exp_res, atol=2e-6, check_dtype=False) + xp_assert_close(res, exp_res, atol=2e-6) I1 = xp.asarray(1 + 1j, dtype=xp.result_type(s, xp.complex64)) s = s * I1 @@ -405,7 +402,7 @@ def test_symiir2_values(self, dtyp, xp): with pytest.raises((TypeError, ValueError)): res = symiirorder2(s, 0.5, 0.1) - @skip_xp_backends(cpu_only=True) + @skip_xp_backends(cpu_only=True, exceptions=["cupy"], reason="C internals") @xfail_xp_backends("cupy", reason="cupy does not accept integer arrays") def test_symiir1_integer_input(self, xp): s = xp.where( @@ -413,11 +410,11 @@ def test_symiir1_integer_input(self, xp): xp.asarray(-1), xp.asarray(1), ) - expected = symiirorder1(xp.astype(s, xp.float64), 0.5, 0.5) + expected = symiirorder1(xp.astype(s, xp_default_dtype(xp)), 0.5, 0.5) out = symiirorder1(s, 0.5, 0.5) xp_assert_close(out, expected) - @skip_xp_backends(cpu_only=True) + @skip_xp_backends(cpu_only=True, exceptions=["cupy"], reason="C internals") @xfail_xp_backends("cupy", reason="cupy does not accept integer arrays") def test_symiir2_integer_input(self, xp): s = xp.where( @@ -425,6 +422,6 @@ def test_symiir2_integer_input(self, xp): xp.asarray(-1), xp.asarray(1), ) - expected = symiirorder2(xp.astype(s, xp.float64), 0.5, xp.pi / 3.0) + expected = symiirorder2(xp.astype(s, xp_default_dtype(xp)), 0.5, xp.pi / 3.0) out = symiirorder2(s, 0.5, xp.pi / 3.0) xp_assert_close(out, expected) diff --git a/scipy/signal/tests/test_windows.py b/scipy/signal/tests/test_windows.py index 0a4cf945e8b3..1dad96494b5a 100644 --- a/scipy/signal/tests/test_windows.py +++ b/scipy/signal/tests/test_windows.py @@ -502,6 +502,10 @@ def test_basic(self, xp): class TestKaiserBesselDerived: def test_basic(self, xp): + # cover case `M < 1` + w = windows.kaiser_bessel_derived(0.5, beta=4.0, xp=xp) + xp_assert_equal(w, xp.asarray([])) + M = 100 w = windows.kaiser_bessel_derived(M, beta=4.0, xp=xp) w2 = windows.get_window(('kaiser bessel derived', 4.0), diff --git a/scipy/signal/windows/_windows.py b/scipy/signal/windows/_windows.py index d7faeacf1a0e..d7ddde67a2fc 100644 --- a/scipy/signal/windows/_windows.py +++ b/scipy/signal/windows/_windows.py @@ -1379,7 +1379,7 @@ def kaiser_bessel_derived(M, beta, *, sym=True, xp=None, device=None): "shapes" ) elif M < 1: - return xp.array([]) + return xp.asarray([]) elif M % 2: raise ValueError( "Kaiser-Bessel Derived windows are only defined for even number " @@ -2319,8 +2319,8 @@ def _fftautocorr(x): x_fft = sp_fft.rfft(x, use_N, axis=-1) cxy = sp_fft.irfft(x_fft * x_fft.conj(), n=use_N)[:, :N] # Or equivalently (but in most cases slower): - # cxy = xp.array([xp.convolve(xx, yy[::-1], mode='full') - # for xx, yy in zip(x, x)])[:, N-1:2*N-1] + # cxy = xp.asarray([xp.convolve(xx, yy[::-1], mode='full') + # for xx, yy in zip(x, x)])[:, N-1:2*N-1] return cxy diff --git a/scipy/sparse/linalg/_dsolve/_superlu_utils.c b/scipy/sparse/linalg/_dsolve/_superlu_utils.c index 49b928a4312d..63951b7f33f1 100644 --- a/scipy/sparse/linalg/_dsolve/_superlu_utils.c +++ b/scipy/sparse/linalg/_dsolve/_superlu_utils.c @@ -15,18 +15,8 @@ been allocated. (It's ok to FREE unallocated memory)---will be ignored. */ -#ifndef WITH_THREAD -static SuperLUGlobalObject superlu_py_global = {0}; -#endif - static SuperLUGlobalObject *get_tls_global(void) { -#ifndef WITH_THREAD - if (superlu_py_global.memory_dict == NULL) { - superlu_py_global.memory_dict = PyDict_New(); - } - return &superlu_py_global; -#else PyObject *thread_dict; SuperLUGlobalObject *obj; const char *key = "scipy.sparse.linalg._dsolve._superlu.__global_object"; @@ -53,7 +43,6 @@ static SuperLUGlobalObject *get_tls_global(void) PyDict_SetItemString(thread_dict, key, (PyObject *)obj); return obj; -#endif } jmp_buf *superlu_python_jmpbuf(void) diff --git a/scipy/sparse/linalg/_dsolve/tests/test_linsolve.py b/scipy/sparse/linalg/_dsolve/tests/test_linsolve.py index 660e3baae015..e9e101c3166d 100644 --- a/scipy/sparse/linalg/_dsolve/tests/test_linsolve.py +++ b/scipy/sparse/linalg/_dsolve/tests/test_linsolve.py @@ -4,7 +4,6 @@ import numpy as np from numpy import array, finfo, arange, eye, all, unique, ones, dot from numpy.exceptions import ComplexWarning -import numpy.random as random from numpy.testing import ( assert_array_almost_equal, assert_almost_equal, assert_equal, assert_array_equal, assert_, assert_allclose, @@ -61,7 +60,6 @@ def setup_method(self): d = arange(n) + 1 self.n = n self.A = dia_array(((d, 2*d, d[::-1]), (-3, 0, 5)), shape=(n,n)).tocsc() - random.seed(1234) def _check_singular(self): A = csc_array((5,5), dtype='d') @@ -71,7 +69,8 @@ def _check_singular(self): def _check_non_singular(self): # Make a diagonal dominant, to make sure it is not singular n = 5 - a = csc_array(random.rand(n, n)) + rng = np.random.default_rng(14332) + a = csc_array(rng.random((n, n))) b = ones(n) expected = splu(a).solve(b) @@ -113,9 +112,11 @@ def test_factorizes_nonsquare_matrix_with_umfpack(self): def test_call_with_incorrectly_sized_matrix_without_umfpack(self): use_solver(useUmfpack=False) solve = factorized(self.A) - b = random.rand(4) - B = random.rand(4, 3) - BB = random.rand(self.n, 3, 9) + + rng = np.random.default_rng(230498) + b = rng.random(4) + B = rng.random((4, 3)) + BB = rng.random((self.n, 3, 9)) with assert_raises(ValueError, match="is of incompatible size"): solve(b) @@ -129,9 +130,11 @@ def test_call_with_incorrectly_sized_matrix_without_umfpack(self): def test_call_with_incorrectly_sized_matrix_with_umfpack(self): use_solver(useUmfpack=True) solve = factorized(self.A) - b = random.rand(4) - B = random.rand(4, 3) - BB = random.rand(self.n, 3, 9) + + rng = np.random.default_rng(643095823) + b = rng.random(4) + B = rng.random((4, 3)) + BB = rng.random((self.n, 3, 9)) # does not raise solve(b) @@ -144,7 +147,8 @@ def test_call_with_incorrectly_sized_matrix_with_umfpack(self): def test_call_with_cast_to_complex_without_umfpack(self): use_solver(useUmfpack=False) solve = factorized(self.A) - b = random.rand(4) + rng = np.random.default_rng(23454) + b = rng.random(4) for t in [np.complex64, np.complex128]: with assert_raises(TypeError, match="Cannot cast array data"): solve(b.astype(t)) @@ -153,7 +157,8 @@ def test_call_with_cast_to_complex_without_umfpack(self): def test_call_with_cast_to_complex_with_umfpack(self): use_solver(useUmfpack=True) solve = factorized(self.A) - b = random.rand(4) + rng = np.random.default_rng(23454) + b = rng.random(4) for t in [np.complex64, np.complex128]: assert_warns(ComplexWarning, solve, b.astype(t)) @@ -246,8 +251,8 @@ def test_bvector_smoketest(self): [1., 0., 1.], [0., 0., 1.]]) As = csc_array(Adense) - random.seed(1234) - x = random.randn(3) + rng = np.random.default_rng(1234) + x = rng.standard_normal(3) b = As@x x2 = spsolve(As, b) @@ -258,8 +263,8 @@ def test_bmatrix_smoketest(self): [1., 0., 1.], [0., 0., 1.]]) As = csc_array(Adense) - random.seed(1234) - x = random.randn(3, 4) + rng = np.random.default_rng(1234) + x = rng.standard_normal((3, 4)) Bdense = As.dot(x) Bs = csc_array(Bdense) x2 = spsolve(As, Bs) @@ -452,7 +457,6 @@ def setup_method(self): d = arange(n) + 1 self.n = n self.A = dia_array(((d, 2*d, d[::-1]), (-3, 0, 5)), shape=(n, n)).tocsc() - random.seed(1234) def _smoketest(self, spxlu, check, dtype, idx_dtype): if np.issubdtype(dtype, np.complexfloating): @@ -465,7 +469,7 @@ def _smoketest(self, spxlu, check, dtype, idx_dtype): A.indptr = A.indptr.astype(idx_dtype, copy=False) lu = spxlu(A) - rng = random.RandomState(1234) + rng = np.random.RandomState(1234) # Input shapes for k in [None, 1, 2, self.n, self.n+2]: @@ -552,7 +556,7 @@ def test_spilu_nnz0(self): def test_splu_basic(self): # Test basic splu functionality. n = 30 - rng = random.RandomState(12) + rng = np.random.RandomState(12) a = rng.rand(n, n) a[a < 0.95] = 0 # First test with a singular matrix @@ -572,7 +576,8 @@ def test_splu_basic(self): def test_splu_perm(self): # Test the permutation vectors exposed by splu. n = 30 - a = random.random((n, n)) + rng = np.random.default_rng(1342354) + a = rng.random((n, n)) a[a < 0.95] = 0 # Make a diagonal dominant, to make sure it is not singular a += 4*eye(n) @@ -621,7 +626,8 @@ def test_natural_permc(self, splu_fun, rtol): def test_lu_refcount(self): # Test that we are keeping track of the reference count with splu. n = 30 - a = random.random((n, n)) + rng = np.random.default_rng(1342354) + a = rng.random((n, n)) a[a < 0.95] = 0 # Make a diagonal dominant, to make sure it is not singular a += 4*eye(n) @@ -638,14 +644,15 @@ def test_lu_refcount(self): def test_bad_inputs(self): A = self.A.tocsc() + rng = np.random.default_rng(235634) assert_raises(ValueError, splu, A[:,:4]) assert_raises(ValueError, spilu, A[:,:4]) for lu in [splu(A), spilu(A)]: - b = random.rand(42) - B = random.rand(42, 3) - BB = random.rand(self.n, 3, 9) + b = rng.random(42) + B = rng.random((42, 3)) + BB = rng.random((self.n, 3, 9)) assert_raises(ValueError, lu.solve, b) assert_raises(ValueError, lu.solve, B) assert_raises(ValueError, lu.solve, BB) @@ -877,14 +884,14 @@ def random_triangle_matrix(n, lower=True, format="csr", choice_of_A="real"): A = A.tocsr(copy=False) return A - np.random.seed(1234) + rng = np.random.default_rng(1234) A = random_triangle_matrix(n, lower=lower) if choice_of_b == "floats": - b = np.random.rand(n, m) + b = rng.random((n, m)) elif choice_of_b == "ints": - b = np.random.randint(-9, 9, (n, m)) + b = rng.integers(-9, 9, (n, m)) elif choice_of_b == "complexints": - b = np.random.randint(-9, 9, (n, m)) + np.random.randint(-9, 9, (n, m)) * 1j + b = rng.integers(-9, 9, (n, m)) + rng.integers(-9, 9, (n, m)) * 1j else: raise ValueError( "choice_of_b must be 'floats', 'ints', or 'complexints'.") diff --git a/scipy/sparse/linalg/_interface.py b/scipy/sparse/linalg/_interface.py index 7e8a765e2b6c..af8226343fec 100644 --- a/scipy/sparse/linalg/_interface.py +++ b/scipy/sparse/linalg/_interface.py @@ -821,22 +821,22 @@ def _matmat(self, X): def _adjoint(self): if self.__adj is None: - self.__adj = _AdjointMatrixOperator(self) + self.__adj = _AdjointMatrixOperator(self.A) return self.__adj + class _AdjointMatrixOperator(MatrixLinearOperator): - def __init__(self, adjoint): - self.A = adjoint.A.T.conj() - self.__adjoint = adjoint - self.args = (adjoint,) - self.shape = adjoint.shape[1], adjoint.shape[0] + def __init__(self, adjoint_array): + self.A = adjoint_array.T.conj() + self.args = (adjoint_array,) + self.shape = adjoint_array.shape[1], adjoint_array.shape[0] @property def dtype(self): - return self.__adjoint.dtype + return self.args[0].dtype def _adjoint(self): - return self.__adjoint + return MatrixLinearOperator(self.args[0]) class IdentityOperator(LinearOperator): diff --git a/scipy/sparse/linalg/_propack/meson.build b/scipy/sparse/linalg/_propack/meson.build index d33cdc0e7646..b6b5dd94d419 100644 --- a/scipy/sparse/linalg/_propack/meson.build +++ b/scipy/sparse/linalg/_propack/meson.build @@ -94,7 +94,7 @@ foreach ele: elements fortran_ignore_warnings, _fflag_Wno_intrinsic_shadow, _fflag_Wno_uninitialized, - _fflag_fpp, + _fflag_preprocess, ], gnu_symbol_visibility: 'hidden', ) diff --git a/scipy/sparse/linalg/tests/test_interface.py b/scipy/sparse/linalg/tests/test_interface.py index 5fc13bc49557..a28b28fad228 100644 --- a/scipy/sparse/linalg/tests/test_interface.py +++ b/scipy/sparse/linalg/tests/test_interface.py @@ -13,6 +13,7 @@ import scipy.sparse.linalg._interface as interface from scipy.sparse._sputils import matrix +from scipy._lib._gcutils import assert_deallocated, IS_PYPY class TestLinearOperator: @@ -524,3 +525,13 @@ def test_sparse_matmat_exception(): A @ np.identity(4) with assert_raises(ValueError): np.identity(4) @ A + + +@pytest.mark.skipif(IS_PYPY, reason="Test not meaningful on PyPy") +def test_MatrixLinearOperator_refcycle(): + # gh-10634 + # Test that MatrixLinearOperator can be automatically garbage collected + A = np.eye(2) + with assert_deallocated(interface.MatrixLinearOperator, A) as op: + op.adjoint() + del op diff --git a/scipy/spatial/distance.py b/scipy/spatial/distance.py index 9b64bbf5388a..8ed75b56d0d6 100644 --- a/scipy/spatial/distance.py +++ b/scipy/spatial/distance.py @@ -107,20 +107,19 @@ import math import warnings -import numpy as np import dataclasses - from collections.abc import Callable from functools import partial -from scipy._lib._util import _asarray_validated, _transition_to_rng -from scipy._lib.deprecation import _deprecated -from . import _distance_wrap -from . import _hausdorff -from ..linalg import norm -from ..special import rel_entr +import numpy as np -from . import _distance_pybind +from scipy._lib._array_api import _asarray +from scipy._lib._util import _asarray_validated, _transition_to_rng +from scipy._lib import array_api_extra as xpx +from scipy._lib.deprecation import _deprecated +from scipy.linalg import norm +from scipy.special import rel_entr +from . import _hausdorff, _distance_pybind, _distance_wrap def _copy_array_if_base_present(a): @@ -2293,14 +2292,33 @@ def pdist(X, metric='euclidean', *, out=None, **kwargs): # between all pairs of vectors in X using the distance metric 'abc' but # with a more succinct, verifiable, but less efficient implementation. + X = _asarray(X) + if X.ndim != 2: + raise ValueError('A 2-dimensional array must be passed.') + + n = X.shape[0] + return xpx.lazy_apply(_np_pdist, X, out, + # lazy_apply doesn't support Array kwargs + kwargs.pop('w', None), + kwargs.pop('V', None), + kwargs.pop('VI', None), + # See src/distance_pybind.cpp::pdist + shape=((n * (n - 1)) // 2, ), dtype=X.dtype, + as_numpy=True, metric=metric, **kwargs) + + +def _np_pdist(X, out, w, V, VI, metric='euclidean', **kwargs): + X = _asarray_validated(X, sparse_ok=False, objects_ok=True, mask_ok=True, check_finite=False) + m, n = X.shape - s = X.shape - if len(s) != 2: - raise ValueError('A 2-dimensional array must be passed.') - - m, n = s + if w is not None: + kwargs["w"] = w + if V is not None: + kwargs["V"] = V + if VI is not None: + kwargs["VI"] = VI if callable(metric): mstr = getattr(metric, '__name__', 'UnknownCustomMetric') @@ -2620,7 +2638,7 @@ def is_valid_y(y, warning=False, throw=False, name=None): throw : bool, optional Throws an exception if the variable passed is not a valid condensed distance matrix. - name : bool, optional + name : str, optional Used when referencing the offending variable in the warning or exception message. @@ -2648,34 +2666,25 @@ def is_valid_y(y, warning=False, throw=False, name=None): False """ - y = np.asarray(y, order='c') - valid = True + y = _asarray(y) + name_str = f"'{name}' " if name else "" try: if len(y.shape) != 1: - if name: - raise ValueError(f"Condensed distance matrix '{name}' must " - "have shape=1 (i.e. be one-dimensional).") - else: - raise ValueError('Condensed distance matrix must have shape=1 ' - '(i.e. be one-dimensional).') + raise ValueError(f"Condensed distance matrix {name_str}must " + "have shape=1 (i.e. be one-dimensional).") n = y.shape[0] d = int(np.ceil(np.sqrt(n * 2))) if (d * (d - 1) / 2) != n: - if name: - raise ValueError(f"Length n of condensed distance matrix '{name}' " - "must be a binomial coefficient, i.e." - "there must be a k such that (k \\choose 2)=n)!") - else: - raise ValueError('Length n of condensed distance matrix must ' - 'be a binomial coefficient, i.e. there must ' - 'be a k such that (k \\choose 2)=n)!') + raise ValueError(f"Length n of condensed distance matrix {name_str}" + "must be a binomial coefficient, i.e. " + "there must be a k such that (k \\choose 2)=n)!") except Exception as e: if throw: raise if warning: warnings.warn(str(e), stacklevel=2) - valid = False - return valid + return False + return True def num_obs_dm(d): @@ -2733,7 +2742,7 @@ def num_obs_y(Y): >>> num_obs_y(Y) 4 """ - Y = np.asarray(Y, order='c') + Y = _asarray(Y) is_valid_y(Y, throw=True, name='Y') k = Y.shape[0] if k == 0: diff --git a/scipy/spatial/tests/test_distance.py b/scipy/spatial/tests/test_distance.py index 774c773ff8bc..472f99394cd3 100644 --- a/scipy/spatial/tests/test_distance.py +++ b/scipy/spatial/tests/test_distance.py @@ -63,6 +63,7 @@ russellrao, seuclidean, sokalmichener, # noqa: F401 sokalsneath, sqeuclidean, yule) from scipy._lib._util import np_long, np_ulong +from scipy.conftest import skip_xp_invalid_arg @pytest.fixture(params=_METRICS_NAMES, scope="session") @@ -1370,6 +1371,7 @@ def test_pdist_canberra_ticket_711(self): right_y = 0.01492537 assert_allclose(pdist_y, right_y, atol=eps, verbose=verbose > 2) + @skip_xp_invalid_arg def test_pdist_custom_notdouble(self): # tests that when using a custom metric the data type is not altered class myclass: @@ -2106,7 +2108,7 @@ def test_Xdist_deprecated_args(metric): pdist(X1, metric, 2.) for arg in ["p", "V", "VI"]: - kwargs = {arg: "foo"} + kwargs = {arg: np.asarray(1.)} if ((arg == "V" and metric == "seuclidean") or (arg == "VI" and metric == "mahalanobis") diff --git a/scipy/spatial/transform/_rotation.pyx b/scipy/spatial/transform/_rotation.pyx index e0f086775a7e..53675734f271 100644 --- a/scipy/spatial/transform/_rotation.pyx +++ b/scipy/spatial/transform/_rotation.pyx @@ -2580,17 +2580,9 @@ cdef class Rotation: raise ValueError("Expected input of shape (3,) or (P, 3), " "got {}.".format(vectors.shape)) - single_vector = False - if vectors.shape == (3,): - single_vector = True - vectors = vectors[None, :] - - matrix = self.as_matrix() - if self._single: - matrix = matrix[None, :, :] - - n_vectors = vectors.shape[0] - n_rotations = len(self._quat) + cdef bint single_vector = vectors.ndim == 1 + cdef Py_ssize_t n_vectors = 1 if single_vector else len(vectors) + cdef Py_ssize_t n_rotations = 1 if self.single else len(self) if n_vectors != 1 and n_rotations != 1 and n_vectors != n_rotations: raise ValueError("Expected equal numbers of rotations and vectors " @@ -2598,15 +2590,25 @@ cdef class Rotation: "{} rotations and {} vectors.".format( n_rotations, n_vectors)) + cdef np.ndarray matrix = self.as_matrix() + if inverse: - result = np.einsum('ikj,ik->ij', matrix, vectors) - else: - result = np.einsum('ijk,ik->ij', matrix, vectors) + matrix = np.swapaxes(matrix, -1, -2) - if self._single and single_vector: - return result[0] - else: - return result + if single_vector: + return np.matmul(matrix, vectors) + + if self.single: + matrix = matrix[None, :, :] + + if n_rotations == 1: + # Single rotation/many vectors, use matmul for speed: The axes argument + # is such that the input arguments don't need to be transposed and the + # output argument is contineous in memory. + return np.matmul(matrix, vectors, axes=[(-2, -1), (-1, -2), (-1, -2)])[0] + + # for stacks of matrices einsum is faster + return np.einsum('ijk,ik->ij', matrix, vectors) @cython.embedsignature(True) def __mul__(Rotation self, Rotation other): diff --git a/scipy/spatial/transform/tests/test_rotation.py b/scipy/spatial/transform/tests/test_rotation.py index 1c06546e382f..2c91018f7a2b 100644 --- a/scipy/spatial/transform/tests/test_rotation.py +++ b/scipy/spatial/transform/tests/test_rotation.py @@ -5,7 +5,7 @@ from numpy.testing import assert_allclose from scipy.spatial.transform import Rotation, Slerp from scipy.stats import special_ortho_group -from itertools import permutations +from itertools import permutations, product import pickle import copy @@ -1206,6 +1206,23 @@ def test_apply_multiple_rotations_multiple_points(): v_inverse = np.array([[2, -1, 3], [4, 6, -5]]) assert_allclose(r.apply(v, inverse=True), v_inverse) +def test_apply_shapes(): + vector0 = np.array([1.0, 2.0, 3.0]) + vector1 = np.array([vector0]) + vector2 = np.array([vector0, vector0]) + matrix0 = np.identity(3) + matrix1 = np.array([matrix0]) + matrix2 = np.array([matrix0, matrix0]) + + for m, v in product([matrix0, matrix1, matrix2], [vector0, vector1, vector2]): + r = Rotation.from_matrix(m) + shape = v.shape + if not r.single and (v.shape == (3,) or v.shape == (1, 3)): + shape = (len(r), 3) + x = r.apply(v) + assert x.shape == shape + x = r.apply(v, inverse=True) + assert x.shape == shape def test_getitem(): mat = np.empty((2, 3, 3)) diff --git a/scipy/special/_logsumexp.py b/scipy/special/_logsumexp.py index 4bdb31251584..ac34e9b5c3fd 100644 --- a/scipy/special/_logsumexp.py +++ b/scipy/special/_logsumexp.py @@ -1,8 +1,9 @@ import numpy as np from scipy._lib._array_api import ( array_namespace, + xp_device, xp_size, - xp_broadcast_promote, + xp_promote, xp_float_to_complex, ) from scipy._lib import array_api_extra as xpx @@ -104,7 +105,7 @@ def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False): """ xp = array_namespace(a, b) - a, b = xp_broadcast_promote(a, b, ensure_writeable=True, force_floating=True, xp=xp) + a, b = xp_promote(a, b, broadcast=True, force_floating=True, xp=xp) a = xpx.atleast_nd(a, ndim=1, xp=xp) b = xpx.atleast_nd(b, ndim=1, xp=xp) if b is not None else b axis = tuple(range(a.ndim)) if axis is None else axis @@ -115,10 +116,10 @@ def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False): # delegate edge case handling to the behavior of `xp.log` and `xp.exp`, # which should follow the C99 standard for complex values. b_exp_a = xp.exp(a) if b is None else b * xp.exp(a) - sum = xp.sum(b_exp_a, axis=axis, keepdims=True) - sgn_inf = _sign(sum, xp) if return_sign else None - sum = xp.abs(sum) if return_sign else sum - out_inf = xp.log(sum) + sum_ = xp.sum(b_exp_a, axis=axis, keepdims=True) + sgn_inf = _sign(sum_, xp=xp) if return_sign else None + sum_ = xp.abs(sum_) if return_sign else sum_ + out_inf = xp.log(sum_) with np.errstate(divide='ignore', invalid='ignore'): # log of zero is OK out, sgn = _logsumexp(a, b, axis=axis, return_sign=return_sign, xp=xp) @@ -132,17 +133,17 @@ def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False): else: shape = np.asarray(a.shape) # NumPy is convenient for shape manipulation shape[axis] = 1 - out = xp.full(tuple(shape), -xp.inf, dtype=a.dtype) + out = xp.full(tuple(shape), -xp.inf, dtype=a.dtype, device=xp_device(a)) sgn = xp.sign(out) if xp.isdtype(out.dtype, 'complex floating'): if return_sign: real = xp.real(sgn) - imag = xp_float_to_complex(_wrap_radians(xp.imag(sgn), xp)) + imag = xp_float_to_complex(_wrap_radians(xp.imag(sgn), xp=xp), xp=xp) sgn = real + imag*1j else: real = xp.real(out) - imag = xp_float_to_complex(_wrap_radians(xp.imag(out), xp)) + imag = xp_float_to_complex(_wrap_radians(xp.imag(out), xp=xp), xp=xp) out = real + imag*1j # Deal with shape details - reducing dimensions and convert 0-D to scalar for NumPy @@ -154,8 +155,7 @@ def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False): return (out, sgn) if return_sign else out -def _wrap_radians(x, xp=None): - xp = array_namespace(x) if xp is None else xp +def _wrap_radians(x, *, xp): # Wrap radians to (-pi, pi] interval wrapped = -((-x + xp.pi) % (2 * xp.pi) - xp.pi) # preserve relative precision @@ -163,7 +163,7 @@ def _wrap_radians(x, xp=None): return xp.where(no_wrap, x, wrapped) -def _elements_and_indices_with_max_real(a, axis=-1, xp=None): +def _elements_and_indices_with_max_real(a, *, axis=-1, xp): # This is an array-API compatible `max` function that works something # like `np.max` for complex input. The important part is that it finds # the element with maximum real part. When there are multiple complex values @@ -172,53 +172,51 @@ def _elements_and_indices_with_max_real(a, axis=-1, xp=None): # `take_along_axis`, and even if it did, we would have problems with axis tuples. # Feel free to rewrite! It's ugly, but it's not the purpose of the PR, and # it gets the job done. - xp = array_namespace(a) if xp is None else xp if xp.isdtype(a.dtype, "complex floating"): # select all elements with max real part. real_a = xp.real(a) - max = xp.max(real_a, axis=axis, keepdims=True) - mask = real_a == max + max_ = xp.max(real_a, axis=axis, keepdims=True) + mask = real_a == max_ # Of those, choose one arbitrarily. This is a reasonably # simple, array-API compatible way of doing so that doesn't # have a problem with `axis` being a tuple or None. - i = xp.reshape(xp.arange(xp_size(a)), a.shape) + i = xp.reshape(xp.arange(xp_size(a), device=xp_device(a)), a.shape) i = xpx.at(i, ~mask).set(-1) max_i = xp.max(i, axis=axis, keepdims=True) mask = i == max_i a = xp.where(mask, a, 0.) - max = xp.sum(a, axis=axis, dtype=a.dtype, keepdims=True) + max_ = xp.sum(a, axis=axis, dtype=a.dtype, keepdims=True) else: - max = xp.max(a, axis=axis, keepdims=True) - mask = a == max + max_ = xp.max(a, axis=axis, keepdims=True) + mask = a == max_ - return xp.asarray(max), xp.asarray(mask) + return max_, mask -def _sign(x, xp): +def _sign(x, *, xp): return x / xp.where(x == 0, 1., xp.abs(x)) -def _logsumexp(a, b, axis, return_sign, xp): - +def _logsumexp(a, b, *, axis, return_sign, xp): # This has been around for about a decade, so let's consider it a feature: # Even if element of `a` is infinite or NaN, it adds nothing to the sum if # the corresponding weight is zero. if b is not None: - a = xpx.at(a, b == 0).set(-xp.inf) + a = xpx.at(a, b == 0).set(-xp.inf, copy=True) # Find element with maximum real part, since this is what affects the magnitude # of the exponential. Possible enhancement: include log of `b` magnitude in `a`. a_max, i_max = _elements_and_indices_with_max_real(a, axis=axis, xp=xp) # for precision, these terms are separated out of the main sum. - a = xpx.at(a, i_max).set(-xp.inf) + a = xpx.at(a, i_max).set(-xp.inf, copy=True if b is None else None) i_max_dt = xp.astype(i_max, a.dtype) # This is an inefficient way of getting `m` because it is the sum of a sparse # array; however, this is the simplest way I can think of to get the right shape. - m = (xp.sum(i_max_dt, axis=axis, keepdims=True, dtype=a.dtype) if b is None - else xp.sum(b * i_max_dt, axis=axis, keepdims=True, dtype=a.dtype)) + b_i_max = i_max_dt if b is None else b * i_max_dt + m = xp.sum(b_i_max, axis=axis, keepdims=True, dtype=a.dtype) # Shift, exponentiate, scale, and sum exp = b * xp.exp(a - a_max) if b is not None else xp.exp(a - a_max) diff --git a/scipy/special/_support_alternative_backends.py b/scipy/special/_support_alternative_backends.py index aa0d8fa011ed..65d98ac92059 100644 --- a/scipy/special/_support_alternative_backends.py +++ b/scipy/special/_support_alternative_backends.py @@ -1,11 +1,12 @@ -import sys import functools import operator import numpy as np from scipy._lib._array_api import ( - array_namespace, scipy_namespace_for, is_numpy, is_marray, SCIPY_ARRAY_API + array_namespace, scipy_namespace_for, is_numpy, is_dask, is_marray, + xp_promote, SCIPY_ARRAY_API ) +import scipy._lib.array_api_extra as xpx from . import _ufuncs # These don't really need to be imported, but otherwise IDEs might not realize # that these are defined in this file / report an error in __init__.py @@ -18,60 +19,68 @@ array_api_compat_prefix = "scipy._lib.array_api_compat" -def get_array_special_func(f_name, xp, n_array_args): - spx = scipy_namespace_for(xp) - f = None +def get_array_special_func(f_name, xp): if is_numpy(xp): - f = getattr(_ufuncs, f_name, None) - elif spx is not None: - f = getattr(spx.special, f_name, None) + return getattr(_ufuncs, f_name) - if f is not None: - return f + spx = scipy_namespace_for(xp) + if spx is not None: + f = getattr(spx.special, f_name, None) + if f is not None: + return f # if generic array-API implementation is available, use that; # otherwise, fall back to NumPy/SciPy if f_name in _generic_implementations: - _f = _generic_implementations[f_name](xp=xp, spx=spx) - if _f is not None: - return _f - - _f = getattr(_ufuncs, f_name, None) - def __f(*args, _f=_f, _xp=xp, **kwargs): - array_args = args[:n_array_args] - other_args = args[n_array_args:] - if is_marray(_xp): - data_args = [np.asarray(arg.data) for arg in array_args] - out = _f(*data_args, *other_args, **kwargs) - mask = functools.reduce(operator.or_, (arg.mask for arg in array_args)) - return _xp.asarray(out, mask=mask) - else: - array_args = [np.asarray(arg) for arg in array_args] - out = _f(*array_args, *other_args, **kwargs) - return _xp.asarray(out) - - return __f + f = _generic_implementations[f_name](xp=xp, spx=spx) + if f is not None: + return f + + def f(*args, **kwargs): + if is_marray(xp): + _f = globals()[f_name] # Allow nested wrapping + data_args = [arg.data for arg in args] + out = _f(*data_args, **kwargs) + mask = functools.reduce(operator.or_, (arg.mask for arg in args)) + return xp.asarray(out, mask=mask) + + elif is_dask(xp): + # IMPORTANT: map_blocks works only because all ufuncs in this module + # are elementwise. It would be a grave mistake to apply this to gufuncs + # or any other function with reductions, as they would change their + # output depending on chunking! + + _f = globals()[f_name] # Allow nested wrapping + # Hide dtype kwarg from map_blocks + return xp.map_blocks(functools.partial(_f, **kwargs), *args) + else: + _f = getattr(_ufuncs, f_name) + args = [np.asarray(arg) for arg in args] + out = _f(*args, **kwargs) + return xp.asarray(out) -def _get_shape_dtype(*args, xp): - args = xp.broadcast_arrays(*args) - shape = args[0].shape - dtype = xp.result_type(*args) - if xp.isdtype(dtype, 'integral'): - dtype = xp.float64 - args = [xp.asarray(arg, dtype=dtype) for arg in args] - return args, shape, dtype + return f def _rel_entr(xp, spx): def __rel_entr(x, y, *, xp=xp): - args, shape, dtype = _get_shape_dtype(x, y, xp=xp) - x, y = args - res = xp.full(x.shape, xp.inf, dtype=dtype) - res[(x == 0) & (y >= 0)] = xp.asarray(0, dtype=dtype) - i = (x > 0) & (y > 0) - res[i] = x[i] * (xp.log(x[i]) - xp.log(y[i])) + # https://github.com/data-apis/array-api-extra/issues/160 + mxp = array_namespace(x._meta, y._meta) if is_dask(xp) else xp + x, y = xp_promote(x, y, broadcast=True, force_floating=True, xp=xp) + xy_pos = (x > 0) & (y > 0) + xy_inf = xp.isinf(x) & xp.isinf(y) + res = xpx.apply_where( + xy_pos & ~xy_inf, + (x, y), + # Note: for very large x, this can overflow. + lambda x, y: x * (mxp.log(x) - mxp.log(y)), + fill_value=xp.inf + ) + res = xpx.at(res)[(x == 0) & (y >= 0)].set(0) + res = xpx.at(res)[xp.isnan(x) | xp.isnan(y) | (xy_pos & xy_inf)].set(xp.nan) return res + return __rel_entr @@ -83,14 +92,19 @@ def __xlogy(x, y, *, xp=xp): return __xlogy +def _get_native_func(xp, spx, f_name): + f = getattr(spx.special, f_name, None) if spx else None + if f is None and hasattr(xp, 'special'): + f = getattr(xp.special, f_name, None) + return f + + def _chdtr(xp, spx): # The difference between this and just using `gammainc` # defined by `get_array_special_func` is that if `gammainc` # isn't found, we don't want to use the SciPy version; we'll # return None here and use the SciPy version of `chdtr`. - gammainc = getattr(spx.special, 'gammainc', None) if spx else None # noqa: F811 - if gammainc is None and hasattr(xp, 'special'): - gammainc = getattr(xp.special, 'gammainc', None) + gammainc = _get_native_func(xp, spx, 'gammainc') # noqa: F811 if gammainc is None: return None @@ -109,9 +123,7 @@ def _chdtrc(xp, spx): # defined by `get_array_special_func` is that if `gammaincc` # isn't found, we don't want to use the SciPy version; we'll # return None here and use the SciPy version of `chdtrc`. - gammaincc = getattr(spx.special, 'gammaincc', None) if spx else None # noqa: F811 - if gammaincc is None and hasattr(xp, 'special'): - gammaincc = getattr(xp.special, 'gammaincc', None) + gammaincc = _get_native_func(xp, spx, 'gammaincc') # noqa: F811 if gammaincc is None: return None @@ -124,9 +136,7 @@ def __chdtrc(v, x): def _betaincc(xp, spx): - betainc = getattr(spx.special, 'betainc', None) if spx else None # noqa: F811 - if betainc is None and hasattr(xp, 'special'): - betainc = getattr(xp.special, 'betainc', None) + betainc = _get_native_func(xp, spx, 'betainc') # noqa: F811 if betainc is None: return None @@ -137,9 +147,7 @@ def __betaincc(a, b, x): def _stdtr(xp, spx): - betainc = getattr(spx.special, 'betainc', None) if spx else None # noqa: F811 - if betainc is None and hasattr(xp, 'special'): - betainc = getattr(xp.special, 'betainc', None) + betainc = _get_native_func(xp, spx, 'betainc') # noqa: F811 if betainc is None: return None @@ -152,10 +160,7 @@ def __stdtr(df, t): def _stdtrit(xp, spx): - betainc = getattr(spx.special, 'betainc', None) if spx else None # noqa: F811 - if betainc is None and hasattr(xp, 'special'): - betainc = getattr(xp.special, 'betainc', None) - + betainc = _get_native_func(xp, spx, 'betainc') # noqa: F811 # If betainc is not defined, the root-finding would be done with `xp` # despite `stdtr` being evaluated with SciPy/NumPy `stdtr`. Save the # conversions: in this case, just evaluate `stdtrit` with SciPy/NumPy. @@ -185,18 +190,19 @@ def fun(t, df, p): return stdtr(df, t) - p # functools.wraps doesn't work because: # 'numpy.ufunc' object has no attribute '__module__' -def support_alternative_backends(f_name, n_array_args): +def support_alternative_backends(f_name): func = getattr(_ufuncs, f_name) @functools.wraps(func) def wrapped(*args, **kwargs): - xp = array_namespace(*args[:n_array_args]) - f = get_array_special_func(f_name, xp, n_array_args) + xp = array_namespace(*args) + f = get_array_special_func(f_name, xp) return f(*args, **kwargs) return wrapped +# function name: number of args (for testing purposes) array_special_func_map = { 'log_ndtr': 1, 'ndtr': 1, @@ -223,10 +229,11 @@ def wrapped(*args, **kwargs): 'stdtrit': 2, } -for f_name, n_array_args in array_special_func_map.items(): - f = (support_alternative_backends(f_name, n_array_args) - if SCIPY_ARRAY_API - else getattr(_ufuncs, f_name)) - sys.modules[__name__].__dict__[f_name] = f +globals().update( + {f_name: support_alternative_backends(f_name) + if SCIPY_ARRAY_API + else getattr(_ufuncs, f_name) + for f_name in array_special_func_map} +) __all__ = list(array_special_func_map) diff --git a/scipy/special/meson.build b/scipy/special/meson.build index 583c29c1f300..e18876588aa6 100644 --- a/scipy/special/meson.build +++ b/scipy/special/meson.build @@ -53,7 +53,7 @@ endif py3.extension_module('_special_ufuncs', ['_special_ufuncs.cpp', '_special_ufuncs_docs.cpp', 'sf_error.cc'], - include_directories: ['../_lib', '../_build_utils/src'], + include_directories: ['..', '../_lib', '../_build_utils/src'], dependencies: [np_dep], link_args: version_link_args, cpp_args: ufuncs_cpp_args, @@ -63,7 +63,7 @@ py3.extension_module('_special_ufuncs', py3.extension_module('_gufuncs', ['_gufuncs.cpp', '_gufuncs_docs.cpp', 'sf_error.cc'], - include_directories: ['../_lib', '../_build_utils/src'], + include_directories: ['..', '../_lib', '../_build_utils/src'], dependencies: [np_dep], link_args: version_link_args, cpp_args: ufuncs_cpp_args, @@ -121,7 +121,7 @@ py3.extension_module('_ufuncs', ], c_args: [cython_c_args, Wno_maybe_uninitialized], cpp_args: ['-DSP_SPECFUN_ERROR'], - include_directories: ['../_lib', '../_build_utils/src'], + include_directories: ['..', '../_lib', '../_build_utils/src'], dependencies: [ lapack_dep, npymath_lib, @@ -162,7 +162,7 @@ py3.extension_module('_ufuncs_cxx', uf_cython_gen_cpp.process(cython_special[2]), # _ufuncs_cxx.pyx ], cpp_args: ufuncs_cxx_cpp_args, - include_directories: ['../_lib/boost_math/include', '../_lib', + include_directories: ['..', '../_lib/boost_math/include', '../_lib', '../_build_utils/src'], link_args: version_link_args, dependencies: [np_dep, ellint_dep], @@ -174,7 +174,7 @@ py3.extension_module('_ellip_harm_2', [uf_cython_gen.process('_ellip_harm_2.pyx'), 'sf_error.cc'], c_args: [cython_c_args], cpp_args: ['-DSP_SPECFUN_ERROR'], - include_directories: ['../_lib', '../_build_utils/src'], + include_directories: ['..', '../_lib', '../_build_utils/src'], link_args: version_link_args, dependencies: [lapack_dep, np_dep], install: true, @@ -191,7 +191,7 @@ py3.extension_module('cython_special', ], c_args: [cython_c_args, Wno_maybe_uninitialized], cpp_args: ['-DSP_SPECFUN_ERROR'], - include_directories: ['../_lib', '../_build_utils/src'], + include_directories: ['..', '../_lib', '../_build_utils/src'], link_args: version_link_args, dependencies: [np_dep, npymath_lib, lapack_dep], link_with: cdflib_lib, diff --git a/scipy/special/sf_error.cc b/scipy/special/sf_error.cc index 89e24fe522fc..180f09926772 100644 --- a/scipy/special/sf_error.cc +++ b/scipy/special/sf_error.cc @@ -4,10 +4,11 @@ #include #include +#include "scipy_config.h" #include "sf_error.h" /* If this isn't volatile clang tries to optimize it away */ -static volatile sf_action_t sf_error_actions[] = { +static volatile SCIPY_TLS sf_action_t sf_error_actions[] = { SF_ERROR_IGNORE, /* SF_ERROR_OK */ SF_ERROR_IGNORE, /* SF_ERROR_SINGULAR */ SF_ERROR_IGNORE, /* SF_ERROR_UNDERFLOW */ @@ -76,9 +77,7 @@ void sf_error_v(const char *func_name, sf_error_t code, const char *fmt, va_list PyOS_snprintf(msg, 2048, "scipy.special/%s: %s", func_name, sf_error_messages[(int) code]); } -#ifdef WITH_THREAD save = PyGILState_Ensure(); -#endif if (PyErr_Occurred()) { goto skip_warn; @@ -119,11 +118,7 @@ void sf_error_v(const char *func_name, sf_error_t code, const char *fmt, va_list } skip_warn: -#ifdef WITH_THREAD PyGILState_Release(save); -#else - ; -#endif } void sf_error(const char *func_name, sf_error_t code, const char *fmt, ...) { diff --git a/scipy/special/tests/test_logsumexp.py b/scipy/special/tests/test_logsumexp.py index dadb3aab0ddb..31d9e72779d9 100644 --- a/scipy/special/tests/test_logsumexp.py +++ b/scipy/special/tests/test_logsumexp.py @@ -4,19 +4,16 @@ import numpy as np -from scipy._lib._array_api import is_array_api_strict, xp_default_dtype +from scipy._lib._array_api import is_array_api_strict, xp_default_dtype, xp_device from scipy._lib._array_api_no_0d import (xp_assert_equal, xp_assert_close, xp_assert_less) from scipy.special import log_softmax, logsumexp, softmax from scipy.special._logsumexp import _wrap_radians -from scipy.stats.tests.test_stats import skip_xp_backends from scipy._lib.array_api_extra.testing import lazy_xp_function - - dtypes = ['float32', 'float64', 'int32', 'int64', 'complex64', 'complex128'] integral_dtypes = ['int32', 'int64'] @@ -31,7 +28,7 @@ def test_wrap_radians(xp): 0, 1e-300, 1, math.pi, math.pi+1]) ref = xp.asarray([math.pi-1, math.pi, -1, -1e-300, 0, 1e-300, 1, math.pi, -math.pi+1]) - res = _wrap_radians(x, xp) + res = _wrap_radians(x, xp=xp) xp_assert_close(res, ref, atol=0) @@ -184,7 +181,6 @@ def test_array_like(self): desired = np.asarray(1000.0 + math.log(2.0)) xp_assert_close(logsumexp(a), desired) - @skip_xp_backends('array_api_strict', reason='data-apis/array-api-strict#131') @pytest.mark.parametrize('dtype', dtypes) def test_dtypes_a(self, dtype, xp): dtype = getattr(xp, dtype) @@ -194,7 +190,6 @@ def test_dtypes_a(self, dtype, xp): desired = xp.asarray(1000.0 + math.log(2.0), dtype=desired_dtype) xp_assert_close(logsumexp(a), desired) - @skip_xp_backends('array_api_strict', reason='data-apis/array-api-strict#131') @pytest.mark.parametrize('dtype_a', dtypes) @pytest.mark.parametrize('dtype_b', dtypes) def test_dtypes_ab(self, dtype_a, dtype_b, xp): @@ -225,7 +220,6 @@ def test_gh18295(self, xp): ref = xp.logaddexp(a[0], a[1]) xp_assert_close(res, ref) - @skip_xp_backends('array_api_strict', reason='data-apis/array-api-strict#131') @pytest.mark.filterwarnings( "ignore:The `numpy.copyto` function is not implemented:FutureWarning:dask" ) @@ -298,6 +292,23 @@ def test_gh22601_infinite_elements(self, x, y, xp): ref = xp.log(xp.sum(xp.exp(xp.asarray([x, y])))) xp_assert_equal(res, ref) + def test_no_writeback(self, xp): + """Test that logsumexp doesn't accidentally write back to its parameters.""" + a = xp.asarray([5., 4.]) + b = xp.asarray([3., 2.]) + logsumexp(a) + logsumexp(a, b=b) + xp_assert_equal(a, xp.asarray([5., 4.])) + xp_assert_equal(b, xp.asarray([3., 2.])) + + @pytest.mark.parametrize("x_raw", [1.0, 1.0j, []]) + def test_device(self, x_raw, xp, devices): + """Test input device propagation to output.""" + for d in devices: + x = xp.asarray(x_raw, device=d) + assert xp_device(logsumexp(x)) == xp_device(x) + assert xp_device(logsumexp(x, b=x)) == xp_device(x) + class TestSoftmax: def test_softmax_fixtures(self, xp): diff --git a/scipy/special/tests/test_sf_error.py b/scipy/special/tests/test_sf_error.py index 2dfe8287ee4f..d86830415fbc 100644 --- a/scipy/special/tests/test_sf_error.py +++ b/scipy/special/tests/test_sf_error.py @@ -2,7 +2,7 @@ import warnings import numpy as np -from numpy.testing import assert_, assert_equal, IS_PYPY +from numpy.testing import assert_, assert_equal, HAS_REFCOUNT import pytest from pytest import raises as assert_raises @@ -73,7 +73,8 @@ def test_seterr(): sc.seterr(**entry_err) -@pytest.mark.skipif(IS_PYPY, reason="Test not meaningful on PyPy") +@pytest.mark.thread_unsafe +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") def test_sf_error_special_refcount(): # Regression test for gh-16233. # Check that the reference count of scipy.special is not increased diff --git a/scipy/special/tests/test_sph_harm.py b/scipy/special/tests/test_sph_harm.py index 310bda00b4d8..d4fa8791ac57 100644 --- a/scipy/special/tests/test_sph_harm.py +++ b/scipy/special/tests/test_sph_harm.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from numpy.testing import assert_allclose, suppress_warnings +from numpy.testing import assert_allclose import scipy.special as sc class TestSphHarm: @@ -47,6 +47,7 @@ def test_all(self, n_max, m_max): np.testing.assert_allclose(y_actual, y_desired, rtol=1e-05) +@pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_first_harmonics(): # Test against explicit representations of the first four # spherical harmonics which use `theta` as the azimuthal angle, @@ -78,9 +79,7 @@ def Y11(theta, phi): theta, phi = np.meshgrid(theta, phi) for harm, m, n in zip(harms, m, n): - with suppress_warnings() as sup: - sup.filter(category=DeprecationWarning) - assert_allclose(sc.sph_harm(m, n, theta, phi), - harm(theta, phi), - rtol=1e-15, atol=1e-15, - err_msg=f"Y^{m}_{n} incorrect") + assert_allclose(sc.sph_harm(m, n, theta, phi), + harm(theta, phi), + rtol=1e-15, atol=1e-15, + err_msg=f"Y^{m}_{n} incorrect") diff --git a/scipy/special/tests/test_support_alternative_backends.py b/scipy/special/tests/test_support_alternative_backends.py index bf19acf5a542..8f24a881344f 100644 --- a/scipy/special/tests/test_support_alternative_backends.py +++ b/scipy/special/tests/test_support_alternative_backends.py @@ -1,27 +1,41 @@ +from types import ModuleType + import pytest +from scipy import special from scipy.special._support_alternative_backends import (get_array_special_func, array_special_func_map) -from scipy import special from scipy._lib._array_api_no_0d import xp_assert_close from scipy._lib._array_api import (is_cupy, is_dask, is_jax, is_torch, - is_array_api_strict, SCIPY_DEVICE) + SCIPY_ARRAY_API, SCIPY_DEVICE) from scipy._lib.array_api_compat import numpy as np +from scipy._lib.array_api_extra.testing import lazy_xp_function + + +special_wrapped = ModuleType("special_wrapped") +lazy_xp_modules = [special_wrapped] +for f_name in array_special_func_map: + f = getattr(special, f_name) + setattr(special_wrapped, f_name, f) + lazy_xp_function(f) +@pytest.mark.skipif(not SCIPY_ARRAY_API, reason="Alternative backends must be enabled.") def test_dispatch_to_unrecognized_library(): xp = pytest.importorskip("array_api_strict") - f = get_array_special_func('ndtr', xp=xp, n_array_args=1) + f = get_array_special_func('ndtr', xp=xp) x = [1, 2, 3] res = f(xp.asarray(x)) ref = xp.asarray(special.ndtr(np.asarray(x))) xp_assert_close(res, ref) +@pytest.mark.skipif(not SCIPY_ARRAY_API, + reason="xp_promote won't accept non-numpy objects") @pytest.mark.parametrize('dtype', ['float32', 'float64', 'int64']) def test_rel_entr_generic(dtype): xp = pytest.importorskip("array_api_strict") - f = get_array_special_func('rel_entr', xp=xp, n_array_args=2) + f = get_array_special_func('rel_entr', xp=xp) dtype_np = getattr(np, dtype) dtype_xp = getattr(xp, dtype) x = [-1, 0, 0, 1] @@ -54,15 +68,12 @@ def test_support_alternative_backends(xp, f_name, n_args, dtype, shapes): ): pytest.skip(f"`{f_name}` does not have an array-agnostic implementation " "and cannot delegate to PyTorch.") - if is_dask(xp) and f_name == 'rel_entr': - pytest.skip("boolean index assignment") if is_jax(xp) and f_name == "stdtrit": pytest.skip(f"`{f_name}` requires scipy.optimize support for immutable arrays") - if is_array_api_strict(xp) and f_name == "xlogy": - pytest.skip(f"`{f_name}` needs data-apis/array-api-strict#131 to be resolved") shapes = shapes[:n_args] - f = getattr(special, f_name) + f = getattr(special, f_name) # Unwrapped + fw = getattr(special_wrapped, f_name) # Wrapped by lazy_xp_function dtype_np = getattr(np, dtype) dtype_xp = getattr(xp, dtype) @@ -91,7 +102,14 @@ def test_support_alternative_backends(xp, f_name, n_args, dtype, shapes): args_xp = [xp.asarray(arg, dtype=dtype_xp) for arg in args_np] - res = f(*args_xp) + if is_dask(xp): + # We're using map_blocks to dispatch the function to Dask. + # This is the correct thing to do IF all tested functions are elementwise; + # otherwise the output would change depending on chunking. + # Try to trigger bugs related to having multiple chunks. + args_xp = [arg.rechunk(5) for arg in args_xp] + + res = fw(*args_xp) ref = xp.asarray(f(*args_np), dtype=dtype_xp) eps = np.finfo(dtype_np).eps diff --git a/scipy/stats/_continued_fraction.py b/scipy/stats/_continued_fraction.py index 7e02fa66a253..efa0411608ab 100644 --- a/scipy/stats/_continued_fraction.py +++ b/scipy/stats/_continued_fraction.py @@ -1,7 +1,7 @@ import numpy as np from scipy._lib._array_api import ( - array_namespace, xp_ravel, xp_copy, is_torch, xp_default_dtype + array_namespace, xp_ravel, xp_copy, xp_promote ) import scipy._lib._elementwise_iterative_method as eim from scipy._lib._util import _RichResult @@ -29,6 +29,14 @@ def _continued_fraction_iv(a, b, args, tolerances, maxiter, log): if not np.iterable(args): args = (args,) + # Call each callable once to determine namespace and dtypes + a0, b0 = a(0, *args), b(0, *args) + xp = array_namespace(a0, b0, *args) + a0, b0, *args = xp_promote(a0, b0, *args, force_floating=True, broadcast=True, + xp=xp) + shape, dtype = a0.shape, a0.dtype + a0, b0, *args = (xp_ravel(arg) for arg in (a0, b0) + tuple(args)) + tolerances = {} if tolerances is None else tolerances eps = tolerances.get('eps', None) tiny = tolerances.get('tiny', None) @@ -53,7 +61,7 @@ def _continued_fraction_iv(a, b, args, tolerances, maxiter, log): if not isinstance(log, bool): raise ValueError('`log` must be boolean.') - return a, b, args, eps, tiny, maxiter, log + return a, b, args, eps, tiny, maxiter, log, a0, b0, shape, dtype, xp def _continued_fraction(a, b, *, args=(), tolerances=None, maxiter=100, log=False): @@ -265,7 +273,7 @@ def _continued_fraction(a, b, *, args=(), tolerances=None, maxiter=100, log=Fals """ res = _continued_fraction_iv(a, b, args, tolerances, maxiter, log) - a, b, args, eps, tiny, maxiter, log = res + a, b, args, eps, tiny, maxiter, log, a0, b0, shape, dtype, xp = res callback = None # don't want to test it, but easy to add later # The EIM framework was designed for the case in where there would @@ -274,7 +282,6 @@ def _continued_fraction(a, b, *, args=(), tolerances=None, maxiter=100, log=Fals # and the first argument is an integer (the number of the term). Rather # than complicate the framework, we wrap the user-provided callables to # make this problem fit within the existing framework. - xp = array_namespace(*args) if args else array_namespace(a(0)) def a(n, *args, a=a): n = int(xp.real(xp_ravel(n))[0]) @@ -287,36 +294,7 @@ def b(n, *args, b=b): def func(n, *args): return xp.stack((a(n, *args), b(n, *args)), axis=-1) - # Initialization - # The EIM framework was written with only one callable in mind. Again, - # rather than complicating the framework, we call its `initialize` function - # on each callable to get the shape and dtype, then we broadcast these - # shapes, compute the result dtype, and broadcast/promote the zeroth terms - # and `*args` to this shape/dtype. - - # `float32` here avoids influencing precision of resulting float type - # patch up promotion: in numpy (int64, float32) -> float64, while in torch - # (int64, float32) -> float32 irrespective of the default_dtype. - dt = {'dtype': None - if is_torch(xp) and xp_default_dtype(xp) == xp.float64 - else xp.float32} - zero = xp.asarray(0, **dt) - - temp = eim._initialize(a, (zero,), args, complex_ok=True) - _, _, fs_a, _, shape_a, dtype_a, xp_a = temp - temp = eim._initialize(b, (zero,), args, complex_ok=True) - _, _, fs_b, _, shape_b, dtype_b, xp_b = temp - - xp = array_namespace(fs_a[0], fs_b[0], *args) - - shape = np.broadcast_shapes(shape_a, shape_b) # OK to use NumPy on tuples - dtype = xp.result_type(dtype_a, dtype_b) - an = xp.astype(xp_ravel(xp.broadcast_to(xp.reshape(fs_a[0], shape_a), shape)), dtype) # noqa: E501 - bn = xp.astype(xp_ravel(xp.broadcast_to(xp.reshape(fs_b[0], shape_b), shape)), dtype) # noqa: E501 - args = [xp.astype(xp_ravel(xp.broadcast_to(arg, shape)), dtype) for arg in args] - - status = xp.full_like(an, xp.asarray(eim._EINPROGRESS), - dtype=xp.int32) # in progress + status = xp.full_like(a0, eim._EINPROGRESS, dtype=xp.int32) # in progress nit, nfev = 0, 1 # one function evaluation (per function) performed above maxiter = 100 if maxiter is None else maxiter @@ -331,7 +309,7 @@ def func(n, *args): # "Set f0 and C0 to the value b0 or to tiny if b0=0. Set D0 = 0. zero = -xp.inf if log else 0 - fn = xp.where(bn == zero, tiny, bn) + fn = xp.where(b0 == zero, tiny, b0) Cnm1 = xp_copy(fn) Dnm1 = xp.full_like(fn, zero) diff --git a/scipy/stats/_distribution_infrastructure.py b/scipy/stats/_distribution_infrastructure.py index 6d41162d066d..760501219279 100644 --- a/scipy/stats/_distribution_infrastructure.py +++ b/scipy/stats/_distribution_infrastructure.py @@ -3510,10 +3510,10 @@ def make_distribution(dist): is defined. The preferred interface may change in future SciPy versions, in which case support for an old interface version may be deprecated and eventually removed. - parameters : dict - Each key is the name of a parameter, + parameters : dict or tuple + If a dictionary, each key is the name of a parameter, and the corresponding value is either a dictionary or tuple. - If a dictionary, it may have the following items, with default + If the value is a dictionary, it may have the following items, with default values used for entries which aren't present. endpoints : tuple, default: (-inf, inf) @@ -3533,7 +3533,17 @@ def make_distribution(dist): ``endpoints`` tuple above, and should define a subinterval of the domain given by ``endpoints``. - A ``tuple`` value ``(a, b)`` is equivalent to ``{endpoints: (a, b)}``. + A tuple value ``(a, b)`` associated to a key in the ``parameters`` + dictionary is equivalent to ``{endpoints: (a, b)}``. + + Custom distributions with multiple parameterizations can be defined by + having the ``parameters`` attribute be a tuple of dictionaries with + the structure described above. In this case, ``dist``\'s class must also + define a method ``process_parameters`` to map between the different + parameterizations. It must take all parameters from all parameterizations + as optional keyword arguments and return a dictionary mapping parameters to + values, filling in values from other parameterizations using values from + the supplied parameterization. See example. support : dict or tuple A dictionary describing the support of the distribution or a tuple @@ -3548,7 +3558,9 @@ def make_distribution(dist): ``moment``, and ``sample``. If defined, these methods must accept the parameters of the distribution as keyword arguments and also accept any positional-only arguments accepted by - the corresponding method of `ContinuousDistribution`. The ``moment`` method + the corresponding method of `ContinuousDistribution`. + When multiple parameterizations are defined, these methods must accept + all parameters from all parameterizations. The ``moment`` method must accept the ``order`` and ``kind`` arguments by position or keyword, but may return ``None`` if a formula is not available for the arguments; in this case, the infrastructure will fall back to a default implementation. The @@ -3577,6 +3589,7 @@ class or its methods for more information. >>> import numpy as np >>> import matplotlib.pyplot as plt >>> from scipy import stats + >>> from scipy import special Create a `ContinuousDistribution` from `scipy.stats.loguniform`. @@ -3651,6 +3664,45 @@ class or its methods for more information. >>> np.isclose(X.cdf(2.1), Y.cdf(2.1)) np.True_ + Create a custom distribution with multiple parameterizations. Here we create a + custom version of the beta distribution that has an alternative parameterization + in terms of the mean ``mu`` and a dispersion parameter ``nu``. + + >>> class MyBeta: + ... @property + ... def __make_distribution_version__(self): + ... return "1.16.0" + ... + ... @property + ... def parameters(self): + ... return ({"a": (0, np.inf), "b": (0, np.inf)}, + ... {"mu": (0, 1), "nu": (0, np.inf)}) + ... + ... def process_parameters(self, a=None, b=None, mu=None, nu=None): + ... if a is not None and b is not None: + ... nu = a + b + ... mu = a / nu + ... else: + ... a = mu * nu + ... b = nu - a + ... return dict(a=a, b=b, mu=mu, nu=nu) + ... + ... @property + ... def support(self): + ... return {'endpoints': (0, 1)} + ... + ... def pdf(self, x, a, b, mu, nu): + ... return special._ufuncs._beta_pdf(x, a, b) + ... + ... def cdf(self, x, a, b, mu, nu): + ... return special.betainc(a, b, x) + >>> + >>> MyBeta = stats.make_distribution(MyBeta()) + >>> X = MyBeta(a=2.0, b=2.0) + >>> Y = MyBeta(mu=0.5, nu=4.0) + >>> np.isclose(X.pdf(0.3), Y.pdf(0.3)) + np.True_ + """ if dist in {stats.levy_stable, stats.vonmises}: raise NotImplementedError(f"`{dist.name}` is not supported.") @@ -3795,13 +3847,24 @@ def _get_domain_info(info): def _make_distribution_custom(dist): - parameters = [] - - for name, info in dist.parameters.items(): - domain_info, typical = _get_domain_info(info) - domain = _RealDomain(**domain_info) - param = _RealParameter(name, domain=domain, typical=typical) - parameters.append(param) + dist_parameters = ( + dist.parameters if isinstance(dist.parameters, tuple) else (dist.parameters, ) + ) + parameterizations = [] + for parameterization in dist_parameters: + # The attribute name ``parameters`` appears reasonable from a user facing + # perspective, but there is a little tension here with the internal. It's + # important to keep in mind that the ``parameters`` attribute in a + # user-created custom distribution specifies ``_parameterizations`` within + # the infrastructure. + parameters = [] + + for name, info in parameterization.items(): + domain_info, typical = _get_domain_info(info) + domain = _RealDomain(**domain_info) + param = _RealParameter(name, domain=domain, typical=typical) + parameters.append(param) + parameterizations.append(_Parameterization(*parameters) if parameters else []) domain_info, _ = _get_domain_info(dist.support) _x_support = _RealDomain(**domain_info) @@ -3809,8 +3872,7 @@ def _make_distribution_custom(dist): repr_str = dist.__class__.__name__ class CustomDistribution(ContinuousDistribution): - _parameterizations = ([_Parameterization(*parameters)] if parameters - else []) + _parameterizations = parameterizations _variable = _x_param def __repr__(self): @@ -3847,6 +3909,13 @@ def _moment_standardized_formula(self, order, **kwargs): CustomDistribution._moment_central_formula = _moment_central_formula CustomDistribution._moment_standardized_formula = _moment_standardized_formula + if hasattr(dist, 'process_parameters'): + setattr( + CustomDistribution, + "_process_parameters", + getattr(dist, "process_parameters") + ) + support_etc = _combine_docs(CustomDistribution, include_examples=False).lstrip() docs = [ f"This class represents `{repr_str}` as a subclass of " diff --git a/scipy/stats/_entropy.py b/scipy/stats/_entropy.py index 34bdb779ab13..12e7c45a0dd7 100644 --- a/scipy/stats/_entropy.py +++ b/scipy/stats/_entropy.py @@ -8,7 +8,7 @@ import numpy as np from scipy import special from ._axis_nan_policy import _axis_nan_policy_factory, _broadcast_arrays -from scipy._lib._array_api import array_namespace +from scipy._lib._array_api import array_namespace, xp_promote from scipy._lib import array_api_extra as xpx __all__ = ['entropy', 'differential_entropy'] @@ -318,9 +318,7 @@ class of statistics based on spacings. Scandinavian Journal of """ xp = array_namespace(values) - values = xp.asarray(values) - if xp.isdtype(values.dtype, "integral"): # type: ignore[union-attr] - values = xp.astype(values, xp.asarray(1.).dtype) + values = xp_promote(values, force_floating=True, xp=xp) values = xp.moveaxis(values, axis, -1) n = values.shape[-1] # type: ignore[union-attr] diff --git a/scipy/stats/_morestats.py b/scipy/stats/_morestats.py index 3926856d2510..27575dcd3f9b 100644 --- a/scipy/stats/_morestats.py +++ b/scipy/stats/_morestats.py @@ -16,6 +16,7 @@ array_namespace, xp_size, xp_vector_norm, + xp_promote, ) from ._ansari_swilk_statistics import gscale, swilk @@ -863,8 +864,7 @@ def _log_var(logx, xp): # compute log of variance of x from log(x) logmean = _log_mean(logx) # get complex dtype with component dtypes same as `logx` dtype; - # see data-apis/array-api#841 - dtype = xp.result_type(logx.dtype, xp.complex64) + dtype = xp.result_type(logx.dtype, 1j) pij = xp.full(logx.shape, pi * 1j, dtype=dtype) logxmu = special.logsumexp(xp.stack((logx, logmean + pij)), axis=0) res = (xp.real(xp.asarray(special.logsumexp(2 * logxmu, axis=0))) @@ -896,14 +896,17 @@ def boxcox_llf(lmb, data): Notes ----- - The Box-Cox log-likelihood function is defined here as + The Box-Cox log-likelihood function :math:`l` is defined here as .. math:: - llf = (\lambda - 1) \sum_i(\log(x_i)) - - N/2 \log(\sum_i (y_i - \bar{y})^2 / N), + l = (\lambda - 1) \sum_i^N \log(x_i) - + \frac{N}{2} \log\left(\sum_i^N (y_i - \bar{y})^2 / N\right), - where ``y`` is the Box-Cox transformed input data ``x``. + where :math:`N` is the number of data points ``data`` and :math:`y` is the Box-Cox + transformed input data. + This corresponds to the *profile log-likelihood* of the original data :math:`x` + with some constant terms dropped. Examples -------- @@ -953,16 +956,12 @@ def boxcox_llf(lmb, data): """ xp = array_namespace(data) - data = xp.asarray(data) + data = xp_promote(data, force_floating=True, xp=xp) + N = data.shape[0] if N == 0: return xp.nan - dt = data.dtype - if xp.isdtype(dt, 'integral'): - data = xp.asarray(data, dtype=xp.float64) - dt = xp.float64 - logdata = xp.log(data) # Compute the variance of the transformed data. @@ -977,7 +976,7 @@ def boxcox_llf(lmb, data): logvar = _log_var(logx, xp) - 2 * math.log(abs(lmb)) res = (lmb - 1) * xp.sum(logdata, axis=0) - N/2 * logvar - res = xp.astype(res, dt) + res = xp.astype(res, data.dtype, copy=False) res = res[()] if res.ndim == 0 else res return res @@ -1081,10 +1080,15 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None): Notes ----- - The Box-Cox transform is given by:: + The Box-Cox transform is given by: + + .. math:: - y = (x**lmbda - 1) / lmbda, for lmbda != 0 - log(x), for lmbda = 0 + y = + \begin{cases} + \frac{x^\lambda - 1}{\lambda}, &\text{for } \lambda \neq 0 + \log(x), &\text{for } \lambda = 0 + \end{cases} `boxcox` requires the input data to be positive. Sometimes a Box-Cox transformation provides a shift parameter to achieve this; `boxcox` does @@ -1096,9 +1100,9 @@ def boxcox(x, lmbda=None, alpha=None, optimizer=None): .. math:: - llf(\hat{\lambda}) - llf(\lambda) < \frac{1}{2}\chi^2(1 - \alpha, 1), + l(\hat{\lambda}) - l(\lambda) < \frac{1}{2}\chi^2(1 - \alpha, 1), - with ``llf`` the log-likelihood function and :math:`\chi^2` the chi-squared + with :math:`l` the log-likelihood function and :math:`\chi^2` the chi-squared function. References @@ -1537,12 +1541,24 @@ def yeojohnson(x, lmbda=None): Notes ----- - The Yeo-Johnson transform is given by:: + The Yeo-Johnson transform is given by: + + .. math:: - y = ((x + 1)**lmbda - 1) / lmbda, for x >= 0, lmbda != 0 - log(x + 1), for x >= 0, lmbda = 0 - -((-x + 1)**(2 - lmbda) - 1) / (2 - lmbda), for x < 0, lmbda != 2 - -log(-x + 1), for x < 0, lmbda = 2 + y = + \begin{cases} + \frac{(x + 1)^\lambda - 1}{\lambda}, + &\text{for } x \geq 0, \lambda \neq 0 + \\ + \log(x + 1), + &\text{for } x \geq 0, \lambda = 0 + \\ + -\frac{(-x + 1)^{2 - \lambda} - 1}{2 - \lambda}, + &\text{for } x < 0, \lambda \neq 2 + \\ + -\log(-x + 1), + &\text{for } x < 0, \lambda = 2 + \end{cases} Unlike `boxcox`, `yeojohnson` does not require the input data to be positive. @@ -1650,15 +1666,18 @@ def yeojohnson_llf(lmb, data): Notes ----- - The Yeo-Johnson log-likelihood function is defined here as + The Yeo-Johnson log-likelihood function :math:`l` is defined here as .. math:: - llf = -N/2 \log(\hat{\sigma}^2) + (\lambda - 1) - \sum_i \text{ sign }(x_i)\log(|x_i| + 1) + l = -\frac{N}{2} \log(\hat{\sigma}^2) + (\lambda - 1) + \sum_i^N \text{sign}(x_i) \log(|x_i| + 1) - where :math:`\hat{\sigma}^2` is estimated variance of the Yeo-Johnson - transformed input data ``x``. + where :math:`N` is the number of data points :math:`x`=``data`` and + :math:`\hat{\sigma}^2` is the estimated variance of the Yeo-Johnson transformed + input data :math:`x`. + This corresponds to the *profile log-likelihood* of the original data :math:`x` + with some constant terms dropped. .. versionadded:: 1.2.0 @@ -3931,9 +3950,7 @@ def median_test(*samples, ties='below', correction=True, lambda_=1, def _circfuncs_common(samples, period, xp=None): xp = array_namespace(samples) if xp is None else xp - if xp.isdtype(samples.dtype, 'integral'): - dtype = xp.asarray(1.).dtype # get default float type - samples = xp.asarray(samples, dtype=dtype) + samples = xp_promote(samples, force_floating=True, xp=xp) # Recast samples as radians that range between 0 and 2 pi and calculate # the sine and cosine diff --git a/scipy/stats/_multivariate.py b/scipy/stats/_multivariate.py index 0fdb6f63bfcd..bad820cfef12 100644 --- a/scipy/stats/_multivariate.py +++ b/scipy/stats/_multivariate.py @@ -3859,6 +3859,7 @@ class random_correlation_gen(multi_rv_generic): r"""A random correlation matrix. Return a random correlation matrix, given a vector of eigenvalues. + The returned matrix is symmetric positive semidefinite with unit diagonal. The `eigs` keyword specifies the eigenvalues of the correlation matrix, and implies the dimension. @@ -3871,7 +3872,8 @@ class random_correlation_gen(multi_rv_generic): Parameters ---------- eigs : 1d ndarray - Eigenvalues of correlation matrix + Eigenvalues of correlation matrix. All eigenvalues need to be non-negative and + need to sum to the number of eigenvalues. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional If `seed` is None (or `np.random`), the `numpy.random.RandomState` singleton is used. diff --git a/scipy/stats/_page_trend_test.py b/scipy/stats/_page_trend_test.py index 87a4d0d17c07..1e11f5ac01fb 100644 --- a/scipy/stats/_page_trend_test.py +++ b/scipy/stats/_page_trend_test.py @@ -1,9 +1,12 @@ +from dataclasses import dataclass from itertools import permutations -import numpy as np import math +import threading + +import numpy as np + from ._continuous_distns import norm import scipy.stats -from dataclasses import dataclass @dataclass @@ -304,6 +307,8 @@ def page_trend_test(data, ranked=False, predicted_ranks=None, method='auto'): method='exact') """ + if not hasattr(_pagel_state, 'state'): + _pagel_state.state = _PageL() # Possible values of the method parameter and the corresponding function # used to evaluate the p value @@ -405,8 +410,8 @@ def _l_p_exact(L, m, n): # [1] uses m, n; [5] uses n, k. # Switch convention here because exact calculation code references [5]. L, n, k = int(L), int(m), int(n) - _pagel_state.set_k(k) - return _pagel_state.sf(L, n) + _pagel_state.state.set_k(k) + return _pagel_state.state.sf(L, n) class _PageL: @@ -476,4 +481,6 @@ def pmf(self, l, n): # Maintain state for faster repeat calls to page_trend_test w/ method='exact' -_pagel_state = _PageL() +# _PageL() is calculated once per thread and stored as an attribute on +# this thread-local variable inside page_trend_test(). +_pagel_state = threading.local() diff --git a/scipy/stats/_quantile.py b/scipy/stats/_quantile.py index 7ddfce7e992b..4d2a8778b82c 100644 --- a/scipy/stats/_quantile.py +++ b/scipy/stats/_quantile.py @@ -1,6 +1,6 @@ import numpy as np from scipy.special import betainc -from scipy._lib._array_api import xp_default_dtype, xp_ravel, array_namespace +from scipy._lib._array_api import xp_ravel, array_namespace, xp_promote import scipy._lib.array_api_extra as xpx from scipy.stats._axis_nan_policy import _broadcast_arrays, _contains_nan from scipy.stats._stats_py import _length_nonmasked @@ -8,17 +8,16 @@ def _quantile_iv(x, p, method, axis, nan_policy, keepdims): xp = array_namespace(x, p) - x = xp.asarray(x) - p = xp.asarray(p) - if not xp.isdtype(x.dtype, ('integral', 'real floating')): + if not xp.isdtype(xp.asarray(x).dtype, ('integral', 'real floating')): raise ValueError("`x` must have real dtype.") - if xp.isdtype(x.dtype, 'integral'): - x = xp.astype(x, xp_default_dtype(xp)) - if not xp.isdtype(p.dtype, 'real floating'): + if not xp.isdtype(xp.asarray(p).dtype, 'real floating'): raise ValueError("`p` must have real floating dtype.") + x, p = xp_promote(x, p, force_floating=True, xp=xp) + dtype = x.dtype + axis_none = axis is None ndim = max(x.ndim, p.ndim) if axis_none: @@ -47,10 +46,6 @@ def _quantile_iv(x, p, method, axis, nan_policy, keepdims): message = "If specified, `keepdims` must be True or False." raise ValueError(message) - dtype = xp.result_type(p, x) - x = xp.astype(x, dtype, copy=False) - p = xp.astype(p, dtype, copy=False) - # If data has length zero along `axis`, the result will be an array of NaNs just # as if the data had length 1 along axis and were filled with NaNs. This is treated # naturally below whether `nan_policy` is `'propagate'` or `'omit'`. diff --git a/scipy/stats/_resampling.py b/scipy/stats/_resampling.py index 8552f4538427..27b1daf1375f 100644 --- a/scipy/stats/_resampling.py +++ b/scipy/stats/_resampling.py @@ -7,7 +7,7 @@ from scipy._lib._util import (check_random_state, _rename_parameter, rng_integers, _transition_to_rng) -from scipy._lib._array_api import array_namespace, is_numpy +from scipy._lib._array_api import array_namespace, is_numpy, xp_result_type from scipy.special import ndtr, ndtri, comb, factorial from ._common import ConfidenceInterval @@ -698,6 +698,7 @@ def _monte_carlo_test_iv(data, rvs, statistic, vectorized, n_resamples, vectorized = 'axis' in signature xp = array_namespace(*data) + dtype = xp_result_type(*data, force_floating=True, xp=xp) if not vectorized: if is_numpy(xp): @@ -732,10 +733,6 @@ def _monte_carlo_test_iv(data, rvs, statistic, vectorized, n_resamples, if alternative not in alternatives: raise ValueError(f"`alternative` must be in {alternatives}") - # Infer the desired p-value dtype based on the input types - min_float = getattr(xp, 'float16', xp.float32) - dtype = xp.result_type(*data_iv, min_float) - return (data_iv, rvs, statistic_vectorized, vectorized, n_resamples_int, batch_iv, alternative, axis_int, dtype, xp) diff --git a/scipy/stats/_stats_py.py b/scipy/stats/_stats_py.py index cb9551e36069..2f7e8e1af065 100644 --- a/scipy/stats/_stats_py.py +++ b/scipy/stats/_stats_py.py @@ -78,7 +78,7 @@ is_marray, xp_size, xp_vector_norm, - xp_broadcast_promote, + xp_promote, xp_capabilities, xp_ravel, ) @@ -145,16 +145,6 @@ def _chk2_asarray(a, b, axis): return a, b, outaxis -def _convert_common_float(*arrays, xp=None): - xp = array_namespace(*arrays) if xp is None else xp - arrays = [_asarray(array, subok=True) for array in arrays] - dtypes = [(xp.asarray(1.).dtype if xp.isdtype(array.dtype, 'integral') - else array.dtype) for array in arrays] - dtype = xp.result_type(*dtypes) - arrays = [xp.astype(array, dtype, copy=False) for array in arrays] - return arrays[0] if len(arrays)==1 else tuple(arrays) - - SignificanceResult = _make_tuple_bunch('SignificanceResult', ['statistic', 'pvalue'], []) # Let's call a SignificanceResult with legacy :correlation" attribute a @@ -630,9 +620,10 @@ def _put_val_to_limits(a, limits, inclusive, val=np.nan, xp=None): mask |= (a < lower_limit) if lower_include else a <= lower_limit if upper_limit is not None: mask |= (a > upper_limit) if upper_include else a >= upper_limit - if xp.all(mask): + lazy = is_lazy_array(mask) + if not lazy and xp.all(mask): raise ValueError("No array values within given limits") - if xp.any(mask): + if lazy or xp.any(mask): a = xp.where(mask, val, a) return a, mask @@ -805,11 +796,15 @@ def tmin(a, lowerlimit=None, axis=0, inclusive=True, nan_policy='propagate'): a, mask = _put_val_to_limits(a, (lowerlimit, None), (inclusive, None), val=max_, xp=xp) - min_ = xp.min(a, axis=axis) - valid = ~xp.all(mask, axis=axis) # At least one element above lowerlimit - # Output dtype is data-dependent - # Possible loss of precision for int types - res = min_ if xp.all(valid) else xp.where(valid, min_, xp.nan) + res = xp.min(a, axis=axis) + invalid = xp.all(mask, axis=axis) # All elements are below lowerlimit + + # For eager backends, output dtype is data-dependent + if is_lazy_array(invalid) or xp.any(invalid): + # Possible loss of precision for int types + res = xp_promote(res, force_floating=True, xp=xp) + res = xp.where(invalid, xp.nan, res) + return res[()] if res.ndim == 0 else res @@ -864,11 +859,15 @@ def tmax(a, upperlimit=None, axis=0, inclusive=True, nan_policy='propagate'): a, mask = _put_val_to_limits(a, (None, upperlimit), (None, inclusive), val=min_, xp=xp) - max_ = xp.max(a, axis=axis) - valid = ~xp.all(mask, axis=axis) # At least one element below upperlimit - # Output dtype is data-dependent - # Possible loss of precision for int types - res = max_ if xp.all(valid) else xp.where(valid, max_, xp.nan) + res = xp.max(a, axis=axis) + invalid = xp.all(mask, axis=axis) # All elements are above upperlimit + + # For eager backends, output dtype is data-dependent + if is_lazy_array(invalid) or xp.any(invalid): + # Possible loss of precision for int types + res = xp_promote(res, force_floating=True, xp=xp) + res = xp.where(invalid, xp.nan, res) + return res[()] if res.ndim == 0 else res @@ -1118,10 +1117,7 @@ def moment(a, order=1, axis=0, nan_policy='propagate', *, center=None): xp = array_namespace(a) a, axis = _chk_asarray(a, axis, xp=xp) - if xp.isdtype(a.dtype, 'integral'): - a = xp.asarray(a, dtype=xp.float64) - else: - a = xp.asarray(a) + a = xp_promote(a, force_floating=True, xp=xp) order = xp.asarray(order, dtype=a.dtype) if xp_size(order) == 0: @@ -1158,7 +1154,8 @@ def _demean(a, mean, axis, *, xp, precision_warning=True): # Used in e.g. `_moment`, `_zscore`, `_xp_var`. See gh-15905. a_zero_mean = a - mean - if xp_size(a_zero_mean) == 0: + if (xp_size(a_zero_mean) == 0 or not precision_warning + or is_lazy_array(a_zero_mean)): return a_zero_mean eps = xp.finfo(mean.dtype).eps * 10 @@ -1171,7 +1168,7 @@ def _demean(a, mean, axis, *, xp, precision_warning=True): with np.errstate(invalid='ignore'): precision_loss = xp.any(xp.asarray(rel_diff < eps) & xp.asarray(n > 1)) - if precision_loss and precision_warning: + if precision_loss: message = ("Precision loss occurred in moment calculation due to " "catastrophic cancellation. This occurs when the data " "are nearly identical. Results may be unreliable.") @@ -1188,9 +1185,7 @@ def _moment(a, order, axis, *, mean=None, xp=None): """ xp = array_namespace(a) if xp is None else xp - if xp.isdtype(a.dtype, 'integral'): - a = xp.asarray(a, dtype=xp.float64) - + a = xp_promote(a, force_floating=True, xp=xp) dtype = a.dtype # moment of empty array is the same regardless of order @@ -2860,7 +2855,7 @@ def gzscore(a, *, axis=0, ddof=0, nan_policy='propagate'): """ xp = array_namespace(a) - a = _convert_common_float(a, xp=xp) + a = xp_promote(a, force_floating=True, xp=xp) log = ma.log if isinstance(a, ma.MaskedArray) else xp.log return zscore(log(a), axis=axis, ddof=ddof, nan_policy=nan_policy) @@ -2922,7 +2917,7 @@ def zmap(scores, compare, axis=0, ddof=0, nan_policy='propagate'): like_zscore = (scores is compare) xp = array_namespace(scores, compare) - scores, compare = _convert_common_float(scores, compare, xp=xp) + scores, compare = xp_promote(scores, compare, force_floating=True, xp=xp) with warnings.catch_warnings(): if like_zscore: # zscore should not emit SmallSampleWarning @@ -3053,7 +3048,7 @@ def gstd(a, axis=0, ddof=1, *, keepdims=False, nan_policy='propagate'): """ xp = array_namespace(a) - a = xp_broadcast_promote(a, force_floating=True)[0] # just promote to correct float + a = xp_promote(a, force_floating=True, xp=xp) kwargs = dict(axis=axis, correction=ddof, keepdims=keepdims, nan_policy=nan_policy) with np.errstate(invalid='ignore', divide='ignore'): @@ -4652,8 +4647,8 @@ def pearsonr(x, y, *, alternative='two-sided', method=None, axis=0): """ xp = array_namespace(x, y) - x = xp.asarray(x) - y = xp.asarray(y) + x, y = xp_promote(x, y, force_floating=True, xp=xp) + dtype = x.dtype if not is_numpy(xp) and method is not None: method = 'invalid' @@ -4693,10 +4688,6 @@ def pearsonr(x, y, *, alternative='two-sided', method=None, axis=0): y = xp.moveaxis(y, axis, -1) axis = -1 - dtype = xp.result_type(x.dtype, y.dtype) - if xp.isdtype(dtype, "integral"): - dtype = xp.asarray(1.).dtype - if xp.isdtype(dtype, "complex floating"): raise ValueError('This function does not support complex data') @@ -6753,11 +6744,7 @@ def ttest_ind(a, b, *, axis=0, equal_var=True, nan_policy='propagate', """ xp = array_namespace(a, b) - default_float = xp.asarray(1.).dtype - if xp.isdtype(a.dtype, 'integral'): - a = xp.astype(a, default_float) - if xp.isdtype(b.dtype, 'integral'): - b = xp.astype(b, default_float) + a, b = xp_promote(a, b, force_floating=True, xp=xp) if axis is None: a, b, axis = xp_ravel(a), xp_ravel(b), 0 @@ -7355,8 +7342,8 @@ def power_divergence(f_obs, f_exp=None, ddof=0, axis=0, lambda_=None): def _power_divergence(f_obs, f_exp, ddof, axis, lambda_, sum_check=True): - xp = array_namespace(f_obs) - default_float = xp.asarray(1.).dtype + xp = array_namespace(f_obs, f_exp) + f_obs, f_exp = xp_promote(f_obs, f_exp, force_floating=True, xp=xp) # Convert the input argument `lambda_` to a numerical value. if isinstance(lambda_, str): @@ -7368,16 +7355,9 @@ def _power_divergence(f_obs, f_exp, ddof, axis, lambda_, sum_check=True): elif lambda_ is None: lambda_ = 1 - f_obs = xp.asarray(f_obs) - dtype = default_float if xp.isdtype(f_obs.dtype, 'integral') else f_obs.dtype - f_obs = xp.asarray(f_obs, dtype=dtype) - f_obs_float = xp.asarray(f_obs, dtype=xp.float64) - if f_exp is not None: - f_exp = xp.asarray(f_exp) - dtype = default_float if xp.isdtype(f_exp.dtype, 'integral') else f_exp.dtype - f_exp = xp.asarray(f_exp, dtype=dtype) - + # not sure why we force to float64, but not going to touch it + f_obs_float = xp.asarray(f_obs, dtype=xp.float64) bshape = _broadcast_shapes((f_obs_float.shape, f_exp.shape)) f_obs_float = xp.broadcast_to(f_obs_float, bshape) f_exp = xp.broadcast_to(f_exp, bshape) @@ -9051,8 +9031,8 @@ def combine_pvalues(pvalues, method='fisher', weights=None, *, axis=0): """ xp = array_namespace(pvalues, weights) - pvalues, weights = xp_broadcast_promote(pvalues, weights, - force_floating=True, xp=xp) + pvalues, weights = xp_promote(pvalues, weights, broadcast=True, + force_floating=True, xp=xp) if xp_size(pvalues) == 0: # This is really only needed for *testing* _axis_nan_policy decorator @@ -10914,7 +10894,7 @@ def _xp_mean(x, /, *, axis=None, weights=None, keepdims=False, nan_policy='propa or (weights is not None and xp_size(weights) == 0)): return gmean(x, weights=weights, axis=axis, keepdims=keepdims) - x, weights = xp_broadcast_promote(x, weights, force_floating=True) + x, weights = xp_promote(x, weights, broadcast=True, force_floating=True, xp=xp) if weights is not None: x, weights = _share_masks(x, weights, xp=xp) @@ -10938,11 +10918,12 @@ def _xp_mean(x, /, *, axis=None, weights=None, keepdims=False, nan_policy='propa # appear in `x` or `weights`. Emit warning if there is an all-NaN slice. # Test nan_policy before the implicit call to bool(contains_nan) # to avoid raising on lazy xps on the default nan_policy='propagate' - if nan_policy == 'omit' and contains_nan: + lazy = is_lazy_array(x) + if nan_policy == 'omit' and (lazy or contains_nan): nan_mask = xp.isnan(x) if weights is not None: nan_mask |= xp.isnan(weights) - if xp.any(xp.all(nan_mask, axis=axis)): + if not lazy and xp.any(xp.all(nan_mask, axis=axis)): message = (too_small_1d_omit if (x.ndim == 1 or axis is None) else too_small_nd_omit) warnings.warn(message, SmallSampleWarning, stacklevel=2) diff --git a/scipy/stats/tests/test_continued_fraction.py b/scipy/stats/tests/test_continued_fraction.py index c3b9ae5cbd5d..ac7ba3280169 100644 --- a/scipy/stats/tests/test_continued_fraction.py +++ b/scipy/stats/tests/test_continued_fraction.py @@ -19,23 +19,25 @@ class TestContinuedFraction: p = rng.uniform(1, 10, size=10) def a1(self, n, x=1.5): - xp = array_namespace(x) if n == 0: y = 0*x elif n == 1: y = x else: y = -x**2 - return xp.asarray(y, dtype=x.dtype) + if np.isscalar(y) and np.__version__ < "2.0": + y = np.full_like(x, y) # preserve dtype pre NEP 50 + return y def b1(self, n, x=1.5): - xp = array_namespace(x) if n == 0: y = 0*x else: one = x/x # gets array of correct type, dtype, and shape y = one * (2*n - 1) - return xp.asarray(y, dtype=x.dtype) + if np.isscalar(y) and np.__version__ < "2.0": + y = np.full_like(x, y) # preserve dtype pre NEP 50 + return y def log_a1(self, n, x): xp = array_namespace(x) @@ -45,7 +47,7 @@ def log_a1(self, n, x): y = xp.log(x) else: y = 2 * xp.log(x) + math.pi * 1j - return xp.asarray(y, dtype=x.dtype) + return y def log_b1(self, n, x): xp = array_namespace(x) @@ -54,7 +56,7 @@ def log_b1(self, n, x): else: one = x - x # gets array of correct type, dtype, and shape y = one + math.log(2 * n - 1) - return xp.asarray(y, dtype=x.dtype) + return y def test_input_validation(self, xp): a1 = self.a1 diff --git a/scipy/stats/tests/test_continuous.py b/scipy/stats/tests/test_continuous.py index e8e4eeeabb27..94f0fd4e1f8a 100644 --- a/scipy/stats/tests/test_continuous.py +++ b/scipy/stats/tests/test_continuous.py @@ -10,6 +10,7 @@ from hypothesis import strategies, given, reproduce_failure, settings # noqa: F401 import hypothesis.extra.numpy as npst +from scipy import special from scipy import stats from scipy.stats._fit import _kolmogorov_smirnov from scipy.stats._ksstats import kolmogn @@ -1272,6 +1273,70 @@ def cdf(self, x, *, c, mu, sigma): assert_allclose(X1.icdf(p), X2.icdf(p)) assert_allclose(X1.iccdf(p), X2.iccdf(p)) + @pytest.mark.parametrize("a", [0.5, np.asarray([0.5, 1.0, 2.0, 4.0, 8.0])]) + @pytest.mark.parametrize("b", [0.5, np.asarray([0.5, 1.0, 2.0, 4.0, 8.0])]) + def test_custom_multiple_parameterizations(self, a, b): + rng = np.random.default_rng(7548723590230982) + class MyBeta: + @property + def __make_distribution_version__(self): + return "1.16.0" + + @property + def parameters(self): + return ( + {"a": (0, np.inf), "b": (0, np.inf)}, + {"mu": (0, 1), "nu": (0, np.inf)}, + ) + + def process_parameters(self, a=None, b=None, mu=None, nu=None): + if a is not None and b is not None and mu is None and nu is None: + nu = a + b + mu = a / nu + else: + a = mu * nu + b = nu - a + return {"a": a, "b": b, "mu": mu, "nu": nu} + + @property + def support(self): + return {'endpoints': (0, 1)} + + def pdf(self, x, a, b, mu, nu): + return special._ufuncs._beta_pdf(x, a, b) + + def cdf(self, x, a, b, mu, nu): + return special.betainc(a, b, x) + + Beta = stats.make_distribution(stats.beta) + MyBeta = stats.make_distribution(MyBeta()) + + mu = a / (a + b) + nu = a + b + + X = MyBeta(a=a, b=b) + Y = MyBeta(mu=mu, nu=nu) + Z = Beta(a=a, b=b) + + x = Z.sample(shape=10, rng=rng) + p = Z.cdf(x) + + assert_allclose(X.support(), Z.support()) + assert_allclose(X.median(), Z.median()) + assert_allclose(X.pdf(x), Z.pdf(x)) + assert_allclose(X.cdf(x), Z.cdf(x)) + assert_allclose(X.ccdf(x), Z.ccdf(x)) + assert_allclose(X.icdf(p), Z.icdf(p)) + assert_allclose(X.iccdf(p), Z.iccdf(p)) + + assert_allclose(Y.support(), Z.support()) + assert_allclose(Y.median(), Z.median()) + assert_allclose(Y.pdf(x), Z.pdf(x)) + assert_allclose(Y.cdf(x), Z.cdf(x)) + assert_allclose(Y.ccdf(x), Z.ccdf(x)) + assert_allclose(Y.icdf(p), Z.icdf(p)) + assert_allclose(Y.iccdf(p), Z.iccdf(p)) + def test_input_validation(self): message = '`levy_stable` is not supported.' with pytest.raises(NotImplementedError, match=message): diff --git a/scipy/stats/tests/test_marray.py b/scipy/stats/tests/test_marray.py index df6644ab1ff2..ffb3121fcf35 100644 --- a/scipy/stats/tests/test_marray.py +++ b/scipy/stats/tests/test_marray.py @@ -3,7 +3,7 @@ from scipy import stats from scipy._lib._array_api import xp_assert_close, xp_assert_equal -from scipy.stats._stats_py import _xp_mean, _xp_var +from scipy.stats._stats_py import _xp_mean, _xp_var, _length_nonmasked from scipy.stats._axis_nan_policy import _axis_nan_policy_factory @@ -38,6 +38,7 @@ def get_arrays(n_arrays, *, dtype='float64', xp=np, shape=(7, 8), seed=849121654 @skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711') @skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.") +@skip_backend('torch', reason="marray#99") @pytest.mark.parametrize('fun, kwargs', [(stats.gmean, {}), (stats.hmean, {}), (stats.pmean, {'p': 2})]) @@ -51,6 +52,7 @@ def test_xmean(fun, kwargs, axis, xp): @skip_backend('dask.array', reason='Arrays need `device` attribute: dask/dask#11711') @skip_backend('jax.numpy', reason="JAX doesn't allow item assignment.") +@skip_backend('torch', reason="marray#99") @pytest.mark.parametrize('axis', [0, 1, None]) @pytest.mark.parametrize('keepdims', [False, True]) def test_xp_mean(axis, keepdims, xp): @@ -272,3 +274,16 @@ def test_ttest_ind_from_stats(xp): xp_assert_close(res.pvalue.mask, mask) assert res.statistic.shape == shape assert res.pvalue.shape == shape + +def test_length_nonmasked_marray_iterable_axis_raises(): + xp = marray._get_namespace(np) + + data = [[1.0, 2.0], [3.0, 4.0]] + mask = [[False, False], [True, False]] + marr = xp.asarray(data, mask=mask) + + # Axis tuples are not currently supported for MArray input. + # This test can be removed after support is added. + with pytest.raises(NotImplementedError, + match="`axis` must be an integer or None for use with `MArray`"): + _length_nonmasked(marr, axis=(0, 1), xp=xp) diff --git a/scipy/stats/tests/test_morestats.py b/scipy/stats/tests/test_morestats.py index 9a8209eb7831..bb1d8b5d5a2b 100644 --- a/scipy/stats/tests/test_morestats.py +++ b/scipy/stats/tests/test_morestats.py @@ -2035,7 +2035,7 @@ def test_gh_6873(self, xp): xp_assert_close(llf, xp.asarray(-17.93934208579061)) def test_instability_gh20021(self, xp): - data = xp.asarray([2003, 1950, 1997, 2000, 2009]) + data = xp.asarray([2003, 1950, 1997, 2000, 2009], dtype=xp.float64) llf = stats.boxcox_llf(1e-8, data) # The expected value was computed with mpsci, set mpmath.mp.dps=100 # expect float64 output for integer input diff --git a/scipy/stats/tests/test_quantile.py b/scipy/stats/tests/test_quantile.py index b181f305b53d..744d9e2cea29 100644 --- a/scipy/stats/tests/test_quantile.py +++ b/scipy/stats/tests/test_quantile.py @@ -2,7 +2,7 @@ import numpy as np from scipy import stats -from scipy._lib._array_api import xp_default_dtype, is_numpy, is_torch +from scipy._lib._array_api import xp_default_dtype, is_numpy, is_torch, SCIPY_ARRAY_API from scipy._lib._array_api_no_0d import xp_assert_close, xp_assert_equal from scipy._lib._util import _apply_over_batch @@ -133,6 +133,8 @@ def test_against_reference(self, axis, keepdims, nan_policy, dtype, method, xp): if is_torch(xp): pytest.skip("sum_cpu not implemented for UInt64, see " "data-apis/array-api-compat#242") + if not SCIPY_ARRAY_API: + pytest.skip("MArray is only available if SCIPY_ARRAY_API=1") marray = pytest.importorskip('marray') kwargs = dict(axis=axis, keepdims=keepdims, method=method) mxp = marray._get_namespace(xp) @@ -160,7 +162,11 @@ def test_integer_input_output_dtype(self, xp): ([[], []], 0.5, np.full(2, np.nan), {'axis': -1}), ([[], []], 0.5, np.zeros((0,)), {'axis': 0, 'keepdims': False}), ([[], []], 0.5, np.zeros((1, 0)), {'axis': 0, 'keepdims': True}), - ([], [0.5, 0.6], np.full(2, np.nan), {}),]) + ([], [0.5, 0.6], np.full(2, np.nan), {}), + (np.arange(1, 28).reshape((3, 3, 3)), 0.5, [[[14.]]], + {'axis': None, 'keepdims': True}), + ([[1, 2], [3, 4]], [0.25, 0.5, 0.75], [[1.75, 2.5, 3.25]], + {'axis': None, 'keepdims': True}),]) def test_edge_cases(self, x, p, ref, kwargs, xp): default_dtype = xp_default_dtype(xp) x, p, ref = xp.asarray(x), xp.asarray(p), xp.asarray(ref, dtype=default_dtype) diff --git a/scipy/stats/tests/test_stats.py b/scipy/stats/tests/test_stats.py index 2dfd19d8e1b8..77e96e3beddf 100644 --- a/scipy/stats/tests/test_stats.py +++ b/scipy/stats/tests/test_stats.py @@ -44,7 +44,8 @@ is_torch, xp_default_dtype, xp_size, SCIPY_ARRAY_API, make_skip_xp_backends) from scipy._lib._array_api_no_0d import xp_assert_close, xp_assert_equal -from scipy._lib import array_api_extra as xpx +import scipy._lib.array_api_extra as xpx +from scipy._lib.array_api_extra.testing import lazy_xp_function skip_xp_backends = pytest.mark.skip_xp_backends boolean_index_skip_reason = 'JAX/Dask arrays do not support boolean assignment.' @@ -74,15 +75,20 @@ TINY = array([1e-12,2e-12,3e-12,4e-12,5e-12,6e-12,7e-12,8e-12,9e-12], float) ROUND = array([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5], float) +lazy_xp_modules = [stats] +lazy_xp_function(stats.tmean, static_argnames=("inclusive", "axis")) +lazy_xp_function(stats.tvar, static_argnames=("inclusive", "axis", "ddof")) +lazy_xp_function(stats.tstd, static_argnames=("inclusive", "axis", "ddof")) +lazy_xp_function(stats.tsem, static_argnames=("inclusive", "axis", "ddof")) +lazy_xp_function(stats.tmin, static_argnames=("inclusive", "axis")) +lazy_xp_function(stats.tmax, static_argnames=("inclusive", "axis")) + class TestTrimmedStats: # TODO: write these tests to handle missing values properly dprec = np.finfo(np.float64).precision @make_skip_xp_backends(stats.tmean) - @pytest.mark.filterwarnings( - "ignore:invalid value encountered in divide:RuntimeWarning:dask" - ) def test_tmean(self, xp): default_dtype = xp_default_dtype(xp) x = xp.asarray(X, dtype=default_dtype) @@ -172,19 +178,17 @@ def test_tstd(self, xp): xp_assert_close(y, xp.std(x, correction=1)) @make_skip_xp_backends(stats.tmin) - @pytest.mark.xfail_xp_backends("array_api_strict", - reason="broadcast int dtype vs. xp.nan") def test_tmin(self, xp): - x = xp.arange(10) - xp_assert_equal(stats.tmin(x), xp.asarray(0)) - xp_assert_equal(stats.tmin(x, lowerlimit=0), xp.asarray(0)) - xp_assert_equal(stats.tmin(x, lowerlimit=0, inclusive=False), xp.asarray(1)) + x = xp.arange(10.) + xp_assert_equal(stats.tmin(x), xp.asarray(0.)) + xp_assert_equal(stats.tmin(x, lowerlimit=0), xp.asarray(0.)) + xp_assert_equal(stats.tmin(x, lowerlimit=0, inclusive=False), xp.asarray(1.)) x = xp.reshape(x, (5, 2)) xp_assert_equal(stats.tmin(x, lowerlimit=0, inclusive=False), - xp.asarray([2, 1])) - xp_assert_equal(stats.tmin(x, axis=1), xp.asarray([0, 2, 4, 6, 8])) - xp_assert_equal(stats.tmin(x, axis=None), xp.asarray(0)) + xp.asarray([2., 1.])) + xp_assert_equal(stats.tmin(x, axis=1), xp.asarray([0., 2., 4., 6., 8.])) + xp_assert_equal(stats.tmin(x, axis=None), xp.asarray(0.)) x = xpx.at(xp.arange(10.), 9).set(xp.nan) xp_assert_equal(stats.tmin(x), xp.asarray(xp.nan)) @@ -213,19 +217,17 @@ def test_tmin_scalar_and_nanpolicy(self, xp): stats.tmin(x, nan_policy='foobar') @make_skip_xp_backends(stats.tmax) - @pytest.mark.xfail_xp_backends("array_api_strict", - reason="broadcast int dtype vs. xp.nan") def test_tmax(self, xp): - x = xp.arange(10) - xp_assert_equal(stats.tmax(x), xp.asarray(9)) - xp_assert_equal(stats.tmax(x, upperlimit=9), xp.asarray(9)) - xp_assert_equal(stats.tmax(x, upperlimit=9, inclusive=False), xp.asarray(8)) + x = xp.arange(10.) + xp_assert_equal(stats.tmax(x), xp.asarray(9.)) + xp_assert_equal(stats.tmax(x, upperlimit=9), xp.asarray(9.)) + xp_assert_equal(stats.tmax(x, upperlimit=9, inclusive=False), xp.asarray(8.)) x = xp.reshape(x, (5, 2)) xp_assert_equal(stats.tmax(x, upperlimit=9, inclusive=False), - xp.asarray([8, 7])) - xp_assert_equal(stats.tmax(x, axis=1), xp.asarray([1, 3, 5, 7, 9])) - xp_assert_equal(stats.tmax(x, axis=None), xp.asarray(9)) + xp.asarray([8., 7.])) + xp_assert_equal(stats.tmax(x, axis=1), xp.asarray([1., 3., 5., 7., 9.])) + xp_assert_equal(stats.tmax(x, axis=None), xp.asarray(9.)) x = xpx.at(xp.arange(10.), 9).set(xp.nan) xp_assert_equal(stats.tmax(x), xp.asarray(xp.nan)) @@ -255,6 +257,29 @@ def test_tmax_scalar_and_nanpolicy(self, xp): with assert_raises(ValueError, match=msg): stats.tmax(x, nan_policy='foobar') + @make_skip_xp_backends(stats.tmin, stats.tmax) + def test_tmin_tmax_int_dtype(self, xp): + x = xp.reshape(xp.arange(10, dtype=xp.int16), (2, 5)).T + + # When tmin/tmax don't need to inject any NaNs, + # retain the input dtype. Dask/JAX can't inspect + # the data so they always return float. + expect_dtype = xp_default_dtype(xp) if is_lazy_array(x) else x.dtype + xp_assert_equal(stats.tmin(x), xp.asarray([0, 5], dtype=expect_dtype)) + xp_assert_equal(stats.tmax(x), xp.asarray([4, 9], dtype=expect_dtype)) + + # When they do inject NaNs, all backends behave the same. + xp_assert_equal(stats.tmin(x, lowerlimit=6), xp.asarray([xp.nan, 6.])) + xp_assert_equal(stats.tmax(x, upperlimit=3), xp.asarray([3., xp.nan])) + + @skip_xp_backends(eager_only=True, reason="Only with data-dependent output dtype") + @make_skip_xp_backends(stats.tmin, stats.tmax) + def test_gh_22626(self, xp): + # Test that `tmin`/`tmax` returns exact result with outrageously large integers + x = xp.arange(2**62, 2**62+10) + xp_assert_equal(stats.tmin(x[None, :]), x) + xp_assert_equal(stats.tmax(x[None, :]), x) + @make_skip_xp_backends(stats.tsem) def test_tsem(self, xp): x = xp.asarray(X.tolist()) # use default dtype of xp @@ -264,13 +289,6 @@ def test_tsem(self, xp): xp_assert_close(y, xp.std(y_ref, correction=1) / xp_size(y_ref)**0.5) xp_assert_close(stats.tsem(x, limits=[-1, 10]), stats.tsem(x, limits=None)) - @make_skip_xp_backends(stats.tmax, stats.tmin) - def test_gh_22626(self, xp): - # Test that `tmin`/`tmax` returns exact result with outrageously large integers - x = xp.arange(2**62, 2**62+10) - xp_assert_equal(stats.tmin(x[None, :]), x) - xp_assert_equal(stats.tmax(x[None, :]), x) - class TestPearsonrWilkinson: """ W.II.D. Compute a correlation matrix on all the variables. @@ -2111,15 +2129,15 @@ def wkq(x, y, rank, weigher, add): def weigher(x): return 1. / (x + 1) - np.random.seed(42) + rng = np.random.default_rng(42) for s in range(3,10): a = [] # Generate rankings with ties for i in range(s): a += [i]*i b = list(a) - np.random.shuffle(a) - np.random.shuffle(b) + rng.shuffle(a) + rng.shuffle(b) # First pass: use element indices as ranks rank = np.arange(len(a), dtype=np.intp) for _ in range(2): @@ -2128,7 +2146,7 @@ def weigher(x): actual = stats.weightedtau(a, b, rank, weigher, add).statistic assert_approx_equal(expected, actual) # Second pass: use a random rank - np.random.shuffle(rank) + rng.shuffle(rank) class TestFindRepeats: @@ -2746,7 +2764,7 @@ def test_gh16955(self, nan_policy): # was deprecated, so check for the appropriate error. my_dtype = np.dtype([('asdf', np.uint8), ('qwer', np.float64, (3,))]) test = np.zeros(10, dtype=my_dtype) - message = "Argument `a` is not....|An argument has dtype..." + message = "Argument `a` is not....|An argument has dtype...|The DType..." with pytest.raises(TypeError, match=message): stats.mode(test, nan_policy=nan_policy) @@ -3025,7 +3043,11 @@ def test_zscore_nan_raise(self, xp): def test_zscore_constant_input_1d(self, xp): x = xp.asarray([-0.087] * 3) - with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): + warn_ctx = ( + contextlib.nullcontext() if is_lazy_array(x) + else pytest.warns(RuntimeWarning, match="Precision loss occurred...")) + + with warn_ctx: z = stats.zscore(x) xp_assert_equal(z, xp.full(x.shape, xp.nan)) @@ -3036,12 +3058,16 @@ def test_zscore_constant_input_1d(self, xp): def test_zscore_constant_input_2d(self, xp): x = xp.asarray([[10.0, 10.0, 10.0, 10.0], [10.0, 11.0, 12.0, 13.0]]) - with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): + warn_ctx = ( + contextlib.nullcontext() if is_lazy_array(x) + else pytest.warns(RuntimeWarning, match="Precision loss occurred...")) + + with warn_ctx: z0 = stats.zscore(x, axis=0) xp_assert_close(z0, xp.asarray([[xp.nan, -1.0, -1.0, -1.0], [xp.nan, 1.0, 1.0, 1.0]])) - with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): + with warn_ctx: z1 = stats.zscore(x, axis=1) xp_assert_equal(z1, xp.stack([xp.asarray([xp.nan, xp.nan, xp.nan, xp.nan]), stats.zscore(x[1, :])])) @@ -3050,7 +3076,7 @@ def test_zscore_constant_input_2d(self, xp): xp_assert_equal(z, xp.reshape(stats.zscore(xp.reshape(x, (-1,))), x.shape)) y = xp.ones((3, 6)) - with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): + with warn_ctx: z = stats.zscore(y, axis=None) xp_assert_equal(z, xp.full(y.shape, xp.asarray(xp.nan))) @@ -3061,14 +3087,17 @@ def test_zscore_constant_input_2d_nan_policy_omit(self, xp): [10.0, 12.0, xp.nan, 10.0]]) s = (3/2)**0.5 s2 = 2**0.5 + warn_ctx = ( + contextlib.nullcontext() if is_lazy_array(x) + else pytest.warns(RuntimeWarning, match="Precision loss occurred...")) - with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): + with warn_ctx: z0 = stats.zscore(x, nan_policy='omit', axis=0) xp_assert_close(z0, xp.asarray([[xp.nan, -s, -1.0, xp.nan], [xp.nan, 0, 1.0, xp.nan], [xp.nan, s, xp.nan, xp.nan]])) - with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): + with warn_ctx: z1 = stats.zscore(x, nan_policy='omit', axis=1) xp_assert_close(z1, xp.asarray([[xp.nan, xp.nan, xp.nan, xp.nan], [-s, 0, s, xp.nan], @@ -3145,7 +3174,11 @@ def test_degenerate_input(self, xp): scores = xp.arange(3) compare = xp.ones(3) ref = xp.asarray([-xp.inf, xp.nan, xp.inf]) - with pytest.warns(RuntimeWarning, match="Precision loss occurred..."): + warn_ctx = ( + contextlib.nullcontext() if is_lazy_array(scores) + else pytest.warns(RuntimeWarning, match="Precision loss occurred...")) + + with warn_ctx: res = stats.zmap(scores, compare) xp_assert_equal(res, ref) @@ -3698,19 +3731,29 @@ def test_skew_propagate_nan(self, xp): def test_skew_constant_value(self, xp): # Skewness of a constant input should be NaN (gh-16061) - with pytest.warns(RuntimeWarning, match="Precision loss occurred"): - a = xp.asarray([-0.27829495]*10) # xp.repeat not currently available + a = xp.asarray([-0.27829495]*10) # xp.repeat not currently available + warn_ctx = ( + contextlib.nullcontext() if is_lazy_array(a) + else pytest.warns(RuntimeWarning, match="Precision loss occurred...")) + + with warn_ctx: xp_assert_equal(stats.skew(a), xp.asarray(xp.nan)) + with warn_ctx: xp_assert_equal(stats.skew(a*2.**50), xp.asarray(xp.nan)) + with warn_ctx: xp_assert_equal(stats.skew(a/2.**50), xp.asarray(xp.nan)) + with warn_ctx: xp_assert_equal(stats.skew(a, bias=False), xp.asarray(xp.nan)) - # # similarly, from gh-11086: - a = xp.asarray([14.3]*7) + # # similarly, from gh-11086: + a = xp.asarray([14.3]*7) + with warn_ctx: xp_assert_equal(stats.skew(a), xp.asarray(xp.nan)) - a = 1. + xp.arange(-3., 4)*1e-16 + a = 1. + xp.arange(-3., 4)*1e-16 + with warn_ctx: xp_assert_equal(stats.skew(a), xp.asarray(xp.nan)) + @skip_xp_backends(eager_only=True) def test_precision_loss_gh15554(self, xp): # gh-15554 was one of several issues that have reported problems with # constant or near-constant input. We can't always fix these, but @@ -3721,7 +3764,6 @@ def test_precision_loss_gh15554(self, xp): a[:, 0] = 1.01 stats.skew(xp.asarray(a)) - @pytest.mark.skip_xp_backends('dask.array', reason=boolean_index_skip_reason) @pytest.mark.parametrize('axis', [-1, 0, 2, None]) @pytest.mark.parametrize('bias', [False, True]) def test_vectorization(self, xp, axis, bias): @@ -3810,13 +3852,19 @@ def test_kurtosis_propagate_nan(self): def test_kurtosis_constant_value(self, xp): # Kurtosis of a constant input should be NaN (gh-16061) a = xp.asarray([-0.27829495]*10) - with pytest.warns(RuntimeWarning, match="Precision loss occurred"): + warn_ctx = ( + contextlib.nullcontext() if is_lazy_array(a) + else pytest.warns(RuntimeWarning, match="Precision loss occurred...")) + + with warn_ctx: assert xp.isnan(stats.kurtosis(a, fisher=False)) + with warn_ctx: assert xp.isnan(stats.kurtosis(a * float(2**50), fisher=False)) + with warn_ctx: assert xp.isnan(stats.kurtosis(a / float(2**50), fisher=False)) + with warn_ctx: assert xp.isnan(stats.kurtosis(a, fisher=False, bias=False)) - @pytest.mark.skip_xp_backends('dask.array', reason=boolean_index_skip_reason) @pytest.mark.parametrize('axis', [-1, 0, 2, None]) @pytest.mark.parametrize('bias', [False, True]) @pytest.mark.parametrize('fisher', [False, True]) @@ -5702,8 +5750,9 @@ def test_ttest_many_dims(self, kwds, equal_var): def test_nans_on_axis(self, kwds, axis): # confirm that with `nan_policy='propagate'`, NaN results are returned # on the correct location - a = np.random.randint(10, size=(5, 3, 10)).astype('float') - b = np.random.randint(10, size=(5, 3, 10)).astype('float') + rng = np.random.default_rng(363836384995579937222) + a = rng.integers(10, size=(5, 3, 10)).astype('float') + b = rng.integers(10, size=(5, 3, 10)).astype('float') # set some indices in `a` and `b` to be `np.nan`. a[0][2][3] = np.nan b[2][0][6] = np.nan @@ -6061,8 +6110,13 @@ def test_ttest_ind_zero_division(self, xp): # test zero division problem x = xp.zeros(3) y = xp.ones(3) - with pytest.warns(RuntimeWarning, match="Precision loss occurred"): + warn_ctx = ( + contextlib.nullcontext() if is_lazy_array(x) + else pytest.warns(RuntimeWarning, match="Precision loss occurred...")) + + with warn_ctx: t, p = stats.ttest_ind(x, y, equal_var=False) + xp_assert_equal(t, xp.asarray(-xp.inf)) xp_assert_equal(p, xp.asarray(0.)) @@ -7276,16 +7330,17 @@ def test_1d_numeric_array_like_input(self, xp): assert_allclose(gstd_actual, self.gstd_array_1d) @skip_xp_invalid_arg - def test_raises_value_error_non_numeric_input(self, xp): - # this is raised by NumPy, but it's quite interpretable - with pytest.raises(TypeError, match="ufunc 'log' not supported"): + def test_raises_error_non_numeric_input(self, xp): + message = "could not convert string to float|The DType..." + with pytest.raises((ValueError, TypeError), match=message): stats.gstd('You cannot take the logarithm of a string.') - @skip_xp_backends(eager_only=True) + @pytest.mark.filterwarnings("ignore:divide by zero encountered:RuntimeWarning:dask") + @pytest.mark.filterwarnings("ignore:invalid value encountered:RuntimeWarning:dask") @pytest.mark.parametrize('bad_value', (0, -1, np.inf, np.nan)) def test_returns_nan_invalid_value(self, bad_value, xp): x = xp.asarray(self.array_1d + [bad_value]) - if np.isfinite(bad_value): + if np.isfinite(bad_value) and not is_lazy_array(x): message = "The geometric standard deviation is only defined..." with pytest.warns(RuntimeWarning, match=message): res = stats.gstd(x) @@ -9488,6 +9543,11 @@ def test_nan_policy(self, xp): ref = xp.mean(x[~mask]) xp_assert_close(res, ref) + @skip_xp_backends(eager_only=True) + def test_nan_policy_warns(self, xp): + x = xp.arange(10.) + x = xp.where(x == 3, xp.nan, x) + # Check for warning if omitting NaNs causes empty slice message = 'After omitting NaNs...' with pytest.warns(RuntimeWarning, match=message): @@ -9547,7 +9607,6 @@ def test_complex_gh22404(self, xp): xp_assert_close(res, xp.asarray(ref)) -@pytest.mark.skip_xp_backends('dask.array', reason=boolean_index_skip_reason) class TestXP_Var: @pytest.mark.parametrize('axis', [None, 1, -1, (-2, 2)]) @pytest.mark.parametrize('keepdims', [False, True]) @@ -9596,6 +9655,11 @@ def test_nan_policy(self, xp): ref = xp.var(x[~mask]) xp_assert_close(res, ref) + @skip_xp_backends(eager_only=True) + def test_nan_policy_warns(self, xp): + x = xp.arange(10.) + x = xp.where(x == 3, xp.nan, x) + # Check for warning if omitting NaNs causes empty slice message = 'After omitting NaNs...' with pytest.warns(RuntimeWarning, match=message): @@ -9627,17 +9691,18 @@ def test_empty(self, xp): ref = xp.asarray([]) xp_assert_equal(res, ref) + @pytest.mark.filterwarnings( + "ignore:overflow encountered in reduce:RuntimeWarning" + ) # Overflow occurs for float32 input def test_dtype(self, xp): max = xp.finfo(xp.float32).max x_np = np.asarray([max, max/2], dtype=np.float32) x_xp = xp.asarray(x_np) - # Overflow occurs for float32 input - with np.errstate(over='ignore'): - res = _xp_var(x_xp) - ref = np.var(x_np) - np.testing.assert_equal(ref, np.inf) - xp_assert_close(res, xp.asarray(ref)) + res = _xp_var(x_xp) + ref = np.var(x_np) + np.testing.assert_equal(ref, np.inf) + xp_assert_close(res, xp.asarray(ref)) # correct result is returned if `float64` is used res = _xp_var(x_xp, dtype=xp.float64) diff --git a/scipy/stats/tests/test_variation.py b/scipy/stats/tests/test_variation.py index 2381a6b38e95..5fc906530a07 100644 --- a/scipy/stats/tests/test_variation.py +++ b/scipy/stats/tests/test_variation.py @@ -172,9 +172,9 @@ def test_neg_inf_nan(self, xp): reason='`nan_policy` only supports NumPy backend') @pytest.mark.parametrize("nan_policy", ['propagate', 'omit']) def test_combined_edge_cases(self, nan_policy, xp): - x = xp.array([[0, 10, xp.nan, 1], - [0, -5, xp.nan, 2], - [0, -5, xp.nan, 3]]) + x = xp.asarray([[0, 10, xp.nan, 1], + [0, -5, xp.nan, 2], + [0, -5, xp.nan, 3]]) if nan_policy == 'omit': with pytest.warns(SmallSampleWarning, match=too_small_nd_omit): y = variation(x, axis=0, nan_policy=nan_policy) diff --git a/tools/generate_f2pymod.py b/tools/generate_f2pymod.py index e61524cc39b5..aaedad2bd26f 100644 --- a/tools/generate_f2pymod.py +++ b/tools/generate_f2pymod.py @@ -9,6 +9,7 @@ import os import re import subprocess +import sys # START OF CODE VENDORED FROM `numpy.distutils.from_template` @@ -266,6 +267,8 @@ def main(): parser.add_argument("--free-threading", action=argparse.BooleanOptionalAction, help="Whether to add --free-threading-compatible") + parser.add_argument("--f2cmap", type=str, + help="Path to the f2cmap file") args = parser.parse_args() if not args.infile.endswith(('.pyf', '.pyf.src', '.f.src')): @@ -290,10 +293,13 @@ def main(): # Now invoke f2py to generate the C API module file if args.infile.endswith(('.pyf.src', '.pyf')): - p = subprocess.Popen( - ['f2py', fname_pyf, '--build-dir', outdir_abs] + nogil_arg, - stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.getcwd() - ) + cmd = [sys.executable, '-m', 'numpy.f2py', fname_pyf, + '--build-dir', outdir_abs] + nogil_arg + if args.f2cmap: + cmd += ['--f2cmap', args.f2cmap] + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, cwd=os.getcwd()) out, err = p.communicate() if not (p.returncode == 0): raise RuntimeError(f"Processing {fname_pyf} with f2py failed!\n"