Source code for MCEq.solvers


import numpy as np
import mceq_config as config
from MCEq.misc import info


[docs]def solv_numpy(nsteps, dX, rho_inv, int_m, dec_m, phi, grid_idcs):
    """:mod:`numpy` implementation of forward-euler integration.

    Args:
      nsteps (int): number of integration steps
      dX (numpy.array[nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2
      rho_inv (numpy.array[nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}`
      int_m (numpy.array): interaction matrix :eq:`int_matrix` in dense or sparse representation
      dec_m (numpy.array): decay  matrix :eq:`dec_matrix` in dense or sparse representation
      phi (numpy.array): initial state vector :math:`\\Phi(X_0)`
    Returns:
      numpy.array: state vector :math:`\\Phi(X_{nsteps})` after integration
    """

    grid_sol = []
    grid_step = 0

    imc = int_m
    dmc = dec_m
    dxc = dX
    ric = rho_inv
    phc = phi

    dXaccum = 0.

    from time import time
    start = time()

    for step in range(nsteps):
        phc += (imc.dot(phc) + dmc.dot(ric[step] * phc)) * dxc[step]

        dXaccum += dxc[step]

        if (grid_idcs and grid_step < len(grid_idcs)
                and grid_idcs[grid_step] == step):
            grid_sol.append(np.copy(phc))
            grid_step += 1

    info(
        2, "Performance: {0:6.2f}ms/iteration".format(1e3 * (time() - start) /
                                                      float(nsteps)))

    return phc, np.array(grid_sol)


[docs]class CUDASparseContext(object):
    """This class handles the transfer between CPU and GPU memory, and the calling
    of GPU kernels. Initialized by :class:`MCEq.core.MCEqRun` and used by
    :func:`solv_CUDA_sparse`.
    """
    def __init__(self, int_m, dec_m, device_id=0):

        if config.cuda_fp_precision == 32:
            self.fl_pr = np.float32
        elif config.cuda_fp_precision == 64:
            self.fl_pr = np.float64
        else:
            raise Exception(
                "CUDASparseContext(): Unknown precision specified.")
        # Setup GPU stuff and upload data to it
        try:
            import cupy as cp
            import cupyx.scipy as cpx
            self.cp = cp
            self.cpx = cpx
            self.cubl = cp.cuda.cublas
        except ImportError:
            raise Exception("solv_CUDA_sparse(): Numbapro CUDA libaries not " +
                            "installed.\nCan not use GPU.")

        cp.cuda.Device(config.cuda_gpu_id).use()
        self.cubl_handle = self.cubl.create()
        self.set_matrices(int_m, dec_m)

[docs]    def set_matrices(self, int_m, dec_m):
        """Upload sparce matrices to GPU memory"""
        self.cu_int_m = self.cpx.sparse.csr_matrix(int_m, dtype=self.fl_pr)
        self.cu_dec_m = self.cpx.sparse.csr_matrix(dec_m, dtype=self.fl_pr)
        self.cu_delta_phi = self.cp.zeros(self.cu_int_m.shape[0],
                                          dtype=self.fl_pr)

[docs]    def alloc_grid_sol(self, dim, nsols):
        """Allocates memory for intermediate if grid solution requested."""
        self.curr_sol_idx = 0
        self.grid_sol = self.cp.zeros((nsols, dim))

[docs]    def dump_sol(self):
        """Saves current solution to a new index in grid solution memory."""
        self.cp.copyto(self.grid_sol[self.curr_sol_idx, :], self.cu_curr_phi)
        self.curr_sol_idx += 1
        # self.grid_sol[self.curr_sol, :] = self.cu_curr_phi

[docs]    def get_gridsol(self):
        """Downloads grid solution to main memory."""
        return self.cp.asnumpy(self.grid_sol)

[docs]    def set_phi(self, phi):
        """Uploads initial condition to GPU memory."""
        self.cu_curr_phi = self.cp.asarray(phi, dtype=self.fl_pr)

[docs]    def get_phi(self):
        """Downloads current solution from GPU memory."""
        return self.cp.asnumpy(self.cu_curr_phi)

[docs]    def solve_step(self, rho_inv, dX):
        """Makes one solver step on GPU using cuSparse (BLAS)"""

        self.cp.cusparse.csrmv(a=self.cu_int_m,
                               x=self.cu_curr_phi,
                               y=self.cu_delta_phi,
                               alpha=1.,
                               beta=0.)
        self.cp.cusparse.csrmv(a=self.cu_dec_m,
                               x=self.cu_curr_phi,
                               y=self.cu_delta_phi,
                               alpha=rho_inv,
                               beta=1.)
        self.cubl.saxpy(self.cubl_handle, self.cu_delta_phi.shape[0], dX,
                        self.cu_delta_phi.data.ptr, 1,
                        self.cu_curr_phi.data.ptr, 1)


[docs]def solv_CUDA_sparse(nsteps, dX, rho_inv, context, phi, grid_idcs):
    """`NVIDIA CUDA cuSPARSE <https://developer.nvidia.com/cusparse>`_ implementation
    of forward-euler integration.

    Function requires a working :mod:`accelerate` installation.

    Args:
      nsteps (int): number of integration steps
      dX (numpy.array[nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2
      rho_inv (numpy.array[nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}`
      int_m (numpy.array): interaction matrix :eq:`int_matrix` in dense or sparse representation
      dec_m (numpy.array): decay  matrix :eq:`dec_matrix` in dense or sparse representation
      phi (numpy.array): initial state vector :math:`\\Phi(X_0)`
      mu_loss_handler (object): object of type :class:`SemiLagrangianEnergyLosses`
    Returns:
      numpy.array: state vector :math:`\\Phi(X_{nsteps})` after integration
    """

    c = context
    c.set_phi(phi)

    grid_step = 0

    from time import time
    start = time()
    if len(grid_idcs) > 0:
        c.alloc_grid_sol(phi.shape[0], len(grid_idcs))

    for step in range(nsteps):
        c.solve_step(rho_inv[step], dX[step])

        if (grid_idcs and grid_step < len(grid_idcs)
                and grid_idcs[grid_step] == step):
            c.dump_sol()
            grid_step += 1

    info(
        2, "Performance: {0:6.2f}ms/iteration".format(1e3 * (time() - start) /
                                                      float(nsteps)))

    return c.get_phi(), c.get_gridsol() if len(grid_idcs) > 0 else []


[docs]def solv_MKL_sparse(nsteps, dX, rho_inv, int_m, dec_m, phi, grid_idcs):
    # mu_loss_handler):
    """`Intel MKL sparse BLAS
    <https://software.intel.com/en-us/articles/intel-mkl-sparse-blas-overview?language=en>`_
    implementation of forward-euler integration.

    Function requires that the path to the MKL runtime library ``libmkl_rt.[so/dylib]``
    defined in the config file.

    Args:
      nsteps (int): number of integration steps
      dX (numpy.array[nsteps]): vector of step-sizes :math:`\\Delta X_i` in g/cm**2
      rho_inv (numpy.array[nsteps]): vector of density values :math:`\\frac{1}{\\rho(X_i)}`
      int_m (numpy.array): interaction matrix :eq:`int_matrix` in dense or sparse representation
      dec_m (numpy.array): decay  matrix :eq:`dec_matrix` in dense or sparse representation
      phi (numpy.array): initial state vector :math:`\\Phi(X_0)`
      grid_idcs (list): indices at which longitudinal solutions have to be saved.

    Returns:
      numpy.array: state vector :math:`\\Phi(X_{nsteps})` after integration
    """

    from ctypes import c_int, c_char, POINTER, byref
    from mceq_config import mkl

    gemv = None
    axpy = None
    np_fl = None
    from ctypes import c_double as fl_pr
    # sparse CSR-matrix x dense vector
    gemv = mkl.mkl_dcsrmv
    # dense vector + dense vector
    axpy = mkl.cblas_daxpy
    np_fl = np.float64

    # Prepare CTYPES pointers for MKL sparse CSR BLAS
    int_m_data = int_m.data.ctypes.data_as(POINTER(fl_pr))
    int_m_ci = int_m.indices.ctypes.data_as(POINTER(c_int))
    int_m_pb = int_m.indptr[:-1].ctypes.data_as(POINTER(c_int))
    int_m_pe = int_m.indptr[1:].ctypes.data_as(POINTER(c_int))

    dec_m_data = dec_m.data.ctypes.data_as(POINTER(fl_pr))
    dec_m_ci = dec_m.indices.ctypes.data_as(POINTER(c_int))
    dec_m_pb = dec_m.indptr[:-1].ctypes.data_as(POINTER(c_int))
    dec_m_pe = dec_m.indptr[1:].ctypes.data_as(POINTER(c_int))

    npphi = np.copy(phi).astype(np_fl)
    phi = npphi.ctypes.data_as(POINTER(fl_pr))
    npdelta_phi = np.zeros_like(npphi)
    delta_phi = npdelta_phi.ctypes.data_as(POINTER(fl_pr))

    trans = c_char(b'n')
    npmatd = np.chararray(6)
    npmatd[0] = b'G'
    npmatd[3] = b'C'
    matdsc = npmatd.ctypes.data_as(POINTER(c_char))
    m = c_int(int_m.shape[0])
    cdzero = fl_pr(0.)
    cdone = fl_pr(1.)
    cione = c_int(1)

    grid_step = 0
    grid_sol = []

    from time import time
    start = time()

    for step in range(nsteps):
        # delta_phi = int_m.dot(phi)
        gemv(byref(trans), byref(m), byref(m), byref(cdone),
             matdsc, int_m_data, int_m_ci, int_m_pb, int_m_pe, phi,
             byref(cdzero), delta_phi)
        # delta_phi = rho_inv * dec_m.dot(phi) + delta_phi
        gemv(byref(trans), byref(m), byref(m), byref(fl_pr(rho_inv[step])),
             matdsc, dec_m_data, dec_m_ci, dec_m_pb, dec_m_pe, phi,
             byref(cdone), delta_phi)
        # phi = delta_phi * dX + phi
        axpy(m, fl_pr(dX[step]), delta_phi, cione, phi, cione)

        if (grid_idcs and grid_step < len(grid_idcs)
                and grid_idcs[grid_step] == step):
            grid_sol.append(np.copy(npphi))
            grid_step += 1

    info(
        2, "Performance: {0:6.2f}ms/iteration".format(1e3 * (time() - start) /
                                                      float(nsteps)))

    return npphi, np.asarray(grid_sol)


# # TODO: Debug this and transition to BDF
# def _odepack(dXstep=.1,
#                 initial_depth=0.0,
#                 int_grid=None,
#                 grid_var='X',
#                 *args,
#                 **kwargs):
#     """Solves the transport equations with solvers from ODEPACK.

#     Args:
#         dXstep (float): external step size (adaptive sovlers make more steps internally)
#         initial_depth (float): starting depth in g/cm**2
#         int_grid (list): list of depths at which results are recorded
#         grid_var (str): Can be depth `X` or something else (currently only `X` supported)

#     """
#     from scipy.integrate import ode
#     ri = self.density_model.r_X2rho

#     if config.enable_muon_energy_loss:
#         raise NotImplementedError(
#             'Energy loss not imlemented for this solver.')

#     # Functional to solve
#     def dPhi_dX(X, phi, *args):
#         return self.int_m.dot(phi) + self.dec_m.dot(ri(X) * phi)

#     # Jacobian doesn't work with sparse matrices, and any precision
#     # or speed advantage disappear if used with dense algebra
#     def jac(X, phi, *args):
#         # print 'jac', X, phi
#         return (self.int_m + self.dec_m * ri(X)).todense()

#     # Initial condition
#     phi0 = np.copy(self.phi0)

#     # Initialize variables
#     grid_sol = []

#     # Setup solver
#     r = ode(dPhi_dX).set_integrator(
#         with_jacobian=False, **config.ode_params)

#     if int_grid is not None:
#         initial_depth = int_grid[0]
#         int_grid = int_grid[1:]
#         max_X = int_grid[-1]
#         grid_sol.append(phi0)

#     else:
#         max_X = self.density_model.max_X

#     info(
#         1,
#         'your X-grid is shorter then the material',
#         condition=max_X < self.density_model.max_X)
#     info(
#         1,
#         'your X-grid exceeds the dimentsions of the material',
#         condition=max_X > self.density_model.max_X)

#     # Initial value
#     r.set_initial_value(phi0, initial_depth)

#     info(
#         2, 'initial depth: {0:3.2e}, maximal depth {1:}'.format(
#             initial_depth, max_X))

#     start = time()
#     if int_grid is None:
#         i = 0
#         while r.successful() and (r.t + dXstep) < max_X - 1:
#             info(5, "Solving at depth X =", r.t, condition=(i % 5000) == 0)
#             r.integrate(r.t + dXstep)
#             i += 1
#         if r.t < max_X:
#             r.integrate(max_X)
#         # Do last step to make sure the rational number max_X is reached
#         r.integrate(max_X)
#     else:
#         for i, Xi in enumerate(int_grid):
#             info(5, 'integrating at X =', Xi, condition=i % 10 == 0)

#             while r.successful() and (r.t + dXstep) < Xi:
#                 r.integrate(r.t + dXstep)

#             # Make sure the integrator arrives at requested step
#             r.integrate(Xi)
#             # Store the solution on grid
#             grid_sol.append(r.y)

#     info(2,
#             'time elapsed during integration: {1} sec'.format(time() - start))

#     self.solution = r.y
#     self.grid_sol = grid_sol