forked from 170010011/fr
481 lines
11 KiB
Python
481 lines
11 KiB
Python
|
"""
|
||
|
Low-level BLAS functions (:mod:`scipy.linalg.blas`)
|
||
|
===================================================
|
||
|
|
||
|
This module contains low-level functions from the BLAS library.
|
||
|
|
||
|
.. versionadded:: 0.12.0
|
||
|
|
||
|
.. note::
|
||
|
|
||
|
The common ``overwrite_<>`` option in many routines, allows the
|
||
|
input arrays to be overwritten to avoid extra memory allocation.
|
||
|
However this requires the array to satisfy two conditions
|
||
|
which are memory order and the data type to match exactly the
|
||
|
order and the type expected by the routine.
|
||
|
|
||
|
As an example, if you pass a double precision float array to any
|
||
|
``S....`` routine which expects single precision arguments, f2py
|
||
|
will create an intermediate array to match the argument types and
|
||
|
overwriting will be performed on that intermediate array.
|
||
|
|
||
|
Similarly, if a C-contiguous array is passed, f2py will pass a
|
||
|
FORTRAN-contiguous array internally. Please make sure that these
|
||
|
details are satisfied. More information can be found in the f2py
|
||
|
documentation.
|
||
|
|
||
|
.. warning::
|
||
|
|
||
|
These functions do little to no error checking.
|
||
|
It is possible to cause crashes by mis-using them,
|
||
|
so prefer using the higher-level routines in `scipy.linalg`.
|
||
|
|
||
|
Finding functions
|
||
|
-----------------
|
||
|
|
||
|
.. autosummary::
|
||
|
:toctree: generated/
|
||
|
|
||
|
get_blas_funcs
|
||
|
find_best_blas_type
|
||
|
|
||
|
BLAS Level 1 functions
|
||
|
----------------------
|
||
|
|
||
|
.. autosummary::
|
||
|
:toctree: generated/
|
||
|
|
||
|
caxpy
|
||
|
ccopy
|
||
|
cdotc
|
||
|
cdotu
|
||
|
crotg
|
||
|
cscal
|
||
|
csrot
|
||
|
csscal
|
||
|
cswap
|
||
|
dasum
|
||
|
daxpy
|
||
|
dcopy
|
||
|
ddot
|
||
|
dnrm2
|
||
|
drot
|
||
|
drotg
|
||
|
drotm
|
||
|
drotmg
|
||
|
dscal
|
||
|
dswap
|
||
|
dzasum
|
||
|
dznrm2
|
||
|
icamax
|
||
|
idamax
|
||
|
isamax
|
||
|
izamax
|
||
|
sasum
|
||
|
saxpy
|
||
|
scasum
|
||
|
scnrm2
|
||
|
scopy
|
||
|
sdot
|
||
|
snrm2
|
||
|
srot
|
||
|
srotg
|
||
|
srotm
|
||
|
srotmg
|
||
|
sscal
|
||
|
sswap
|
||
|
zaxpy
|
||
|
zcopy
|
||
|
zdotc
|
||
|
zdotu
|
||
|
zdrot
|
||
|
zdscal
|
||
|
zrotg
|
||
|
zscal
|
||
|
zswap
|
||
|
|
||
|
BLAS Level 2 functions
|
||
|
----------------------
|
||
|
|
||
|
.. autosummary::
|
||
|
:toctree: generated/
|
||
|
|
||
|
sgbmv
|
||
|
sgemv
|
||
|
sger
|
||
|
ssbmv
|
||
|
sspr
|
||
|
sspr2
|
||
|
ssymv
|
||
|
ssyr
|
||
|
ssyr2
|
||
|
stbmv
|
||
|
stpsv
|
||
|
strmv
|
||
|
strsv
|
||
|
dgbmv
|
||
|
dgemv
|
||
|
dger
|
||
|
dsbmv
|
||
|
dspr
|
||
|
dspr2
|
||
|
dsymv
|
||
|
dsyr
|
||
|
dsyr2
|
||
|
dtbmv
|
||
|
dtpsv
|
||
|
dtrmv
|
||
|
dtrsv
|
||
|
cgbmv
|
||
|
cgemv
|
||
|
cgerc
|
||
|
cgeru
|
||
|
chbmv
|
||
|
chemv
|
||
|
cher
|
||
|
cher2
|
||
|
chpmv
|
||
|
chpr
|
||
|
chpr2
|
||
|
ctbmv
|
||
|
ctbsv
|
||
|
ctpmv
|
||
|
ctpsv
|
||
|
ctrmv
|
||
|
ctrsv
|
||
|
csyr
|
||
|
zgbmv
|
||
|
zgemv
|
||
|
zgerc
|
||
|
zgeru
|
||
|
zhbmv
|
||
|
zhemv
|
||
|
zher
|
||
|
zher2
|
||
|
zhpmv
|
||
|
zhpr
|
||
|
zhpr2
|
||
|
ztbmv
|
||
|
ztbsv
|
||
|
ztpmv
|
||
|
ztrmv
|
||
|
ztrsv
|
||
|
zsyr
|
||
|
|
||
|
BLAS Level 3 functions
|
||
|
----------------------
|
||
|
|
||
|
.. autosummary::
|
||
|
:toctree: generated/
|
||
|
|
||
|
sgemm
|
||
|
ssymm
|
||
|
ssyr2k
|
||
|
ssyrk
|
||
|
strmm
|
||
|
strsm
|
||
|
dgemm
|
||
|
dsymm
|
||
|
dsyr2k
|
||
|
dsyrk
|
||
|
dtrmm
|
||
|
dtrsm
|
||
|
cgemm
|
||
|
chemm
|
||
|
cher2k
|
||
|
cherk
|
||
|
csymm
|
||
|
csyr2k
|
||
|
csyrk
|
||
|
ctrmm
|
||
|
ctrsm
|
||
|
zgemm
|
||
|
zhemm
|
||
|
zher2k
|
||
|
zherk
|
||
|
zsymm
|
||
|
zsyr2k
|
||
|
zsyrk
|
||
|
ztrmm
|
||
|
ztrsm
|
||
|
|
||
|
"""
|
||
|
#
|
||
|
# Author: Pearu Peterson, March 2002
|
||
|
# refactoring by Fabian Pedregosa, March 2010
|
||
|
#
|
||
|
|
||
|
__all__ = ['get_blas_funcs', 'find_best_blas_type']
|
||
|
|
||
|
import numpy as _np
|
||
|
import functools
|
||
|
|
||
|
from scipy.linalg import _fblas
|
||
|
try:
|
||
|
from scipy.linalg import _cblas
|
||
|
except ImportError:
|
||
|
_cblas = None
|
||
|
|
||
|
try:
|
||
|
from scipy.linalg import _fblas_64
|
||
|
HAS_ILP64 = True
|
||
|
except ImportError:
|
||
|
HAS_ILP64 = False
|
||
|
_fblas_64 = None
|
||
|
|
||
|
# Expose all functions (only fblas --- cblas is an implementation detail)
|
||
|
empty_module = None
|
||
|
from scipy.linalg._fblas import *
|
||
|
del empty_module
|
||
|
|
||
|
# all numeric dtypes '?bBhHiIlLqQefdgFDGO' that are safe to be converted to
|
||
|
|
||
|
# single precision float : '?bBhH!!!!!!ef!!!!!!'
|
||
|
# double precision float : '?bBhHiIlLqQefdg!!!!'
|
||
|
# single precision complex : '?bBhH!!!!!!ef!!F!!!'
|
||
|
# double precision complex : '?bBhHiIlLqQefdgFDG!'
|
||
|
|
||
|
_type_score = {x: 1 for x in '?bBhHef'}
|
||
|
_type_score.update({x: 2 for x in 'iIlLqQd'})
|
||
|
|
||
|
# Handle float128(g) and complex256(G) separately in case non-Windows systems.
|
||
|
# On Windows, the values will be rewritten to the same key with the same value.
|
||
|
_type_score.update({'F': 3, 'D': 4, 'g': 2, 'G': 4})
|
||
|
|
||
|
# Final mapping to the actual prefixes and dtypes
|
||
|
_type_conv = {1: ('s', _np.dtype('float32')),
|
||
|
2: ('d', _np.dtype('float64')),
|
||
|
3: ('c', _np.dtype('complex64')),
|
||
|
4: ('z', _np.dtype('complex128'))}
|
||
|
|
||
|
# some convenience alias for complex functions
|
||
|
_blas_alias = {'cnrm2': 'scnrm2', 'znrm2': 'dznrm2',
|
||
|
'cdot': 'cdotc', 'zdot': 'zdotc',
|
||
|
'cger': 'cgerc', 'zger': 'zgerc',
|
||
|
'sdotc': 'sdot', 'sdotu': 'sdot',
|
||
|
'ddotc': 'ddot', 'ddotu': 'ddot'}
|
||
|
|
||
|
|
||
|
def find_best_blas_type(arrays=(), dtype=None):
|
||
|
"""Find best-matching BLAS/LAPACK type.
|
||
|
|
||
|
Arrays are used to determine the optimal prefix of BLAS routines.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
arrays : sequence of ndarrays, optional
|
||
|
Arrays can be given to determine optimal prefix of BLAS
|
||
|
routines. If not given, double-precision routines will be
|
||
|
used, otherwise the most generic type in arrays will be used.
|
||
|
dtype : str or dtype, optional
|
||
|
Data-type specifier. Not used if `arrays` is non-empty.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
prefix : str
|
||
|
BLAS/LAPACK prefix character.
|
||
|
dtype : dtype
|
||
|
Inferred Numpy data type.
|
||
|
prefer_fortran : bool
|
||
|
Whether to prefer Fortran order routines over C order.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import scipy.linalg.blas as bla
|
||
|
>>> a = np.random.rand(10,15)
|
||
|
>>> b = np.asfortranarray(a) # Change the memory layout order
|
||
|
>>> bla.find_best_blas_type((a,))
|
||
|
('d', dtype('float64'), False)
|
||
|
>>> bla.find_best_blas_type((a*1j,))
|
||
|
('z', dtype('complex128'), False)
|
||
|
>>> bla.find_best_blas_type((b,))
|
||
|
('d', dtype('float64'), True)
|
||
|
|
||
|
"""
|
||
|
dtype = _np.dtype(dtype)
|
||
|
max_score = _type_score.get(dtype.char, 5)
|
||
|
prefer_fortran = False
|
||
|
|
||
|
if arrays:
|
||
|
# In most cases, single element is passed through, quicker route
|
||
|
if len(arrays) == 1:
|
||
|
max_score = _type_score.get(arrays[0].dtype.char, 5)
|
||
|
prefer_fortran = arrays[0].flags['FORTRAN']
|
||
|
else:
|
||
|
# use the most generic type in arrays
|
||
|
scores = [_type_score.get(x.dtype.char, 5) for x in arrays]
|
||
|
max_score = max(scores)
|
||
|
ind_max_score = scores.index(max_score)
|
||
|
# safe upcasting for mix of float64 and complex64 --> prefix 'z'
|
||
|
if max_score == 3 and (2 in scores):
|
||
|
max_score = 4
|
||
|
|
||
|
if arrays[ind_max_score].flags['FORTRAN']:
|
||
|
# prefer Fortran for leading array with column major order
|
||
|
prefer_fortran = True
|
||
|
|
||
|
# Get the LAPACK prefix and the corresponding dtype if not fall back
|
||
|
# to 'd' and double precision float.
|
||
|
prefix, dtype = _type_conv.get(max_score, ('d', _np.dtype('float64')))
|
||
|
|
||
|
return prefix, dtype, prefer_fortran
|
||
|
|
||
|
|
||
|
def _get_funcs(names, arrays, dtype,
|
||
|
lib_name, fmodule, cmodule,
|
||
|
fmodule_name, cmodule_name, alias,
|
||
|
ilp64=False):
|
||
|
"""
|
||
|
Return available BLAS/LAPACK functions.
|
||
|
|
||
|
Used also in lapack.py. See get_blas_funcs for docstring.
|
||
|
"""
|
||
|
|
||
|
funcs = []
|
||
|
unpack = False
|
||
|
dtype = _np.dtype(dtype)
|
||
|
module1 = (cmodule, cmodule_name)
|
||
|
module2 = (fmodule, fmodule_name)
|
||
|
|
||
|
if isinstance(names, str):
|
||
|
names = (names,)
|
||
|
unpack = True
|
||
|
|
||
|
prefix, dtype, prefer_fortran = find_best_blas_type(arrays, dtype)
|
||
|
|
||
|
if prefer_fortran:
|
||
|
module1, module2 = module2, module1
|
||
|
|
||
|
for name in names:
|
||
|
func_name = prefix + name
|
||
|
func_name = alias.get(func_name, func_name)
|
||
|
func = getattr(module1[0], func_name, None)
|
||
|
module_name = module1[1]
|
||
|
if func is None:
|
||
|
func = getattr(module2[0], func_name, None)
|
||
|
module_name = module2[1]
|
||
|
if func is None:
|
||
|
raise ValueError(
|
||
|
'%s function %s could not be found' % (lib_name, func_name))
|
||
|
func.module_name, func.typecode = module_name, prefix
|
||
|
func.dtype = dtype
|
||
|
if not ilp64:
|
||
|
func.int_dtype = _np.dtype(_np.intc)
|
||
|
else:
|
||
|
func.int_dtype = _np.dtype(_np.int64)
|
||
|
func.prefix = prefix # Backward compatibility
|
||
|
funcs.append(func)
|
||
|
|
||
|
if unpack:
|
||
|
return funcs[0]
|
||
|
else:
|
||
|
return funcs
|
||
|
|
||
|
|
||
|
def _memoize_get_funcs(func):
|
||
|
"""
|
||
|
Memoized fast path for _get_funcs instances
|
||
|
"""
|
||
|
memo = {}
|
||
|
func.memo = memo
|
||
|
|
||
|
@functools.wraps(func)
|
||
|
def getter(names, arrays=(), dtype=None, ilp64=False):
|
||
|
key = (names, dtype, ilp64)
|
||
|
for array in arrays:
|
||
|
# cf. find_blas_funcs
|
||
|
key += (array.dtype.char, array.flags.fortran)
|
||
|
|
||
|
try:
|
||
|
value = memo.get(key)
|
||
|
except TypeError:
|
||
|
# unhashable key etc.
|
||
|
key = None
|
||
|
value = None
|
||
|
|
||
|
if value is not None:
|
||
|
return value
|
||
|
|
||
|
value = func(names, arrays, dtype, ilp64)
|
||
|
|
||
|
if key is not None:
|
||
|
memo[key] = value
|
||
|
|
||
|
return value
|
||
|
|
||
|
return getter
|
||
|
|
||
|
|
||
|
@_memoize_get_funcs
|
||
|
def get_blas_funcs(names, arrays=(), dtype=None, ilp64=False):
|
||
|
"""Return available BLAS function objects from names.
|
||
|
|
||
|
Arrays are used to determine the optimal prefix of BLAS routines.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
names : str or sequence of str
|
||
|
Name(s) of BLAS functions without type prefix.
|
||
|
|
||
|
arrays : sequence of ndarrays, optional
|
||
|
Arrays can be given to determine optimal prefix of BLAS
|
||
|
routines. If not given, double-precision routines will be
|
||
|
used, otherwise the most generic type in arrays will be used.
|
||
|
|
||
|
dtype : str or dtype, optional
|
||
|
Data-type specifier. Not used if `arrays` is non-empty.
|
||
|
|
||
|
ilp64 : {True, False, 'preferred'}, optional
|
||
|
Whether to return ILP64 routine variant.
|
||
|
Choosing 'preferred' returns ILP64 routine if available,
|
||
|
and otherwise the 32-bit routine. Default: False
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
funcs : list
|
||
|
List containing the found function(s).
|
||
|
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This routine automatically chooses between Fortran/C
|
||
|
interfaces. Fortran code is used whenever possible for arrays with
|
||
|
column major order. In all other cases, C code is preferred.
|
||
|
|
||
|
In BLAS, the naming convention is that all functions start with a
|
||
|
type prefix, which depends on the type of the principal
|
||
|
matrix. These can be one of {'s', 'd', 'c', 'z'} for the NumPy
|
||
|
types {float32, float64, complex64, complex128} respectively.
|
||
|
The code and the dtype are stored in attributes `typecode` and `dtype`
|
||
|
of the returned functions.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import scipy.linalg as LA
|
||
|
>>> a = np.random.rand(3,2)
|
||
|
>>> x_gemv = LA.get_blas_funcs('gemv', (a,))
|
||
|
>>> x_gemv.typecode
|
||
|
'd'
|
||
|
>>> x_gemv = LA.get_blas_funcs('gemv',(a*1j,))
|
||
|
>>> x_gemv.typecode
|
||
|
'z'
|
||
|
|
||
|
"""
|
||
|
if isinstance(ilp64, str):
|
||
|
if ilp64 == 'preferred':
|
||
|
ilp64 = HAS_ILP64
|
||
|
else:
|
||
|
raise ValueError("Invalid value for 'ilp64'")
|
||
|
|
||
|
if not ilp64:
|
||
|
return _get_funcs(names, arrays, dtype,
|
||
|
"BLAS", _fblas, _cblas, "fblas", "cblas",
|
||
|
_blas_alias, ilp64=False)
|
||
|
else:
|
||
|
if not HAS_ILP64:
|
||
|
raise RuntimeError("BLAS ILP64 routine requested, but Scipy "
|
||
|
"compiled only with 32-bit BLAS")
|
||
|
return _get_funcs(names, arrays, dtype,
|
||
|
"BLAS", _fblas_64, None, "fblas_64", None,
|
||
|
_blas_alias, ilp64=True)
|