AdaptiveOpticsControl
cudacomp.c File Reference

CUDA functions wrapper. More...

Functions

static int clock_gettime (int clk_id, struct mach_timespec *t)
 
int_fast8_t CUDACOMP_test_cli ()
 
int_fast8_t CUDACOMP_MatMatMult_testPseudoInverse_cli ()
 
int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_SVD_cli ()
 
int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_cli ()
 
int_fast8_t CUDACOMP_Coeff2Map_Loop_cli ()
 
int_fast8_t CUDACOMP_Coeff2Map_offset_Loop_cli ()
 
int_fast8_t CUDACOMP_extractModesLoop_cli ()
 
int_fast8_t init_cudacomp ()
 Initialize cudacomp module and command line interface. More...
 
int_fast8_t CUDACOMP_init ()
 Initialize CUDA and MAGMA. More...
 
int_fast8_t GPUcomp_test (long NBact, long NBmodes, long WFSsize, long GPUcnt)
 
void matrixMulCPU (float *cMat, float *wfsVec, float *dmVec, int M, int N)
 CPU-based matrix vector multiplication. More...
 
void * compute_function (void *ptr)
 
int GPUloadCmat (int index)
 
int GPU_loop_MultMat_setup (int index, const char *IDcontrM_name, const char *IDwfsim_name, const char *IDoutdmmodes_name, long NBGPUs, int *GPUdevice, int orientation, int USEsem, int initWFSref, long loopnb)
 
int GPU_loop_MultMat_execute (int index, int_fast8_t *status, int_fast8_t *GPUstatus, float alpha, float beta, int timing)
 
int GPU_loop_MultMat_free (int index)
 
long CUDACOMP_MatMatMult_testPseudoInverse (const char *IDmatA_name, const char *IDmatAinv_name, const char *IDmatOut_name)
 Test pseudo inverse. More...
 
int CUDACOMP_magma_compute_SVDpseudoInverse_SVD (const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name)
 Compute pseudoinverse using MAGMA-based SVD. More...
 
int CUDACOMP_magma_compute_SVDpseudoInverse (const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name, int LOOPmode)
 Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A. More...
 
int GPU_SVD_computeControlMatrix (int device, const char *ID_Rmatrix_name, const char *ID_Cmatrix_name, double SVDeps, const char *ID_VTmatrix_name)
 
int CUDACOMP_Coeff2Map_Loop (const char *IDmodes_name, const char *IDcoeff_name, int GPUindex, const char *IDoutmap_name, int offsetmode, const char *IDoffset_name)
 
int CUDACOMP_extractModesLoop (const char *in_stream, const char *intot_stream, const char *IDmodes_name, const char *IDrefin_name, const char *IDrefout_name, const char *IDmodes_val_name, int GPUindex, int PROCESS, int TRACEMODE, int MODENORM, int insem, int axmode, long twait)
 extract mode coefficients from data stream More...
 

Variables

int FORCESEMINIT = 1
 
DATA data
 System includes. More...
 
pid_t CLIPID
 important directories and info More...
 
static struct timespec tnow
 
static struct timespec tdiff
 
static double tdiffv
 
static int IDtimerinit = 0
 
static long IDtiming = -1
 
static int deviceCount
 
GPUMATMULTCONF gpumatmultconf [20]
 
static cudaError_t error
 
static cublasStatus_t stat
 
static float cublasSgemv_alpha = 1.0
 
static float cublasSgemv_beta = 0.0
 
static int INIT_MAGMA = 0
 
static magma_queue_t magmaqueue
 
static long MAGMAloop_iter = 0
 
static double * magma_h_A
 
static double * magma_d_A
 
static double * magma_d_AtA
 
static double * magma_h_AtA
 
static double * magma_w1
 
static double * magma_h_R
 
static double * magma_h_work
 
static double * magma_d_VT1
 
static double * magma_h_VT1
 
static double * magma_d_M2
 
static double * magma_d_Ainv
 
static double * magma_h_Ainv
 
static double * magma_h_M2
 
static float * magmaf_h_A
 
static float * magmaf_d_A
 
static float * magmaf_d_AtA
 
static float * magmaf_h_AtA
 
static float * magmaf_w1
 
static float * magmaf_h_R
 
static float * magmaf_h_work
 
static float * magmaf_d_VT1
 
static float * magmaf_h_VT1
 
static float * magmaf_d_M2
 
static float * magmaf_d_Ainv
 
static float * magmaf_h_Ainv
 
static float * magmaf_h_M2
 
static magma_int_t magma_aux_iwork [1]
 
static magma_int_t magma_lwork
 
static magma_int_t magma_liwork
 
static magma_int_t * magma_iwork
 

Detailed Description

CUDA functions wrapper.

Also uses MAGMA library

Author
O. Guyon
Date
3 Jul 2017
Bug:
MAGMA execution can hang on dsyevd routine. This seems to be a MAGMA issue.

Function Documentation

static int clock_gettime ( int  clk_id,
struct mach_timespec *  t 
)
static
void* compute_function ( void *  ptr)
int CUDACOMP_Coeff2Map_Loop ( const char *  IDmodes_name,
const char *  IDcoeff_name,
int  GPUindex,
const char *  IDoutmap_name,
int  offsetmode,
const char *  IDoffset_name 
)
int_fast8_t CUDACOMP_Coeff2Map_Loop_cli ( )
int_fast8_t CUDACOMP_Coeff2Map_offset_Loop_cli ( )
int CUDACOMP_extractModesLoop ( const char *  in_stream,
const char *  intot_stream,
const char *  IDmodes_name,
const char *  IDrefin_name,
const char *  IDrefout_name,
const char *  IDmodes_val_name,
int  GPUindex,
int  PROCESS,
int  TRACEMODE,
int  MODENORM,
int  insem,
int  axmode,
long  twait 
)

extract mode coefficients from data stream

int_fast8_t CUDACOMP_extractModesLoop_cli ( )
int_fast8_t CUDACOMP_init ( )

Initialize CUDA and MAGMA.

Finds CUDA devices Initializes CUDA and MAGMA libraries

Returns
number of CUDA devices found
int CUDACOMP_magma_compute_SVDpseudoInverse ( const char *  ID_Rmatrix_name,
const char *  ID_Cmatrix_name,
double  SVDeps,
long  MaxNBmodes,
const char *  ID_VTmatrix_name,
int  LOOPmode 
)

Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A.

Computes pseuso inverse of M x N matrix

When used with AOloopControl module: N: number of actuators M: number of sensors assumes M>N

use LOOPmode = 1 for computing the same size SVD, same input and output location

Notations: AT is transpose of A A+ is pseudo inverse of A

Computes pseudo-inverse : A+ = (AT A)^-1 AT Inverse of AT A is computed by SVD

SVD: A = U S V^T U are eigenvectors of A A^T V are eigenvectors of A^T A, computed at step 4 below

Linear algebra reminder: equivalence between (AT A)^-1 AT and V S^-1 UT

Definition of pseudoinverse: A+ = (AT A)^-1 AT singular value decomposition of A = U S VT A+ = ( V S UT U S VT )^-1 V S UT Since U is unitary, UT U = Id -> A+ = ( V S^2 VT )^-1 V S UT A+ = VT^-1 S^-2 V^-1 V S UT A+ = V S^-1 UT

Main steps:

STEP 1 : Fill input data into magmaf_h_A on host

STEP 2 : Copy input data to GPU -> magmaf_d_A (MxN matrix on device)

STEP 3 : Compute trans(A) x A : magmaf_d_A x magmaf_d_A -> magmaf_d_AtA (NxN matrix on device)

STEP 4 : Compute eigenvalues and eigenvectors of A^T A -> magmaf_d_AtA (NxN matrix on device) Calls magma_ssyevd_gpu : Compute the eigenvalues and optionally eigenvectors of a symmetric real matrix in single precision, GPU interface, big matrix. This function computes in single precision all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A defined on the device. The first parameter can take the values MagmaVec,'V' or MagmaNoVec,'N' and answers the question whether the eigenvectors are desired. If the eigenvectors are desired, it uses a divide and conquer algorithm. The symmetric matrix A can be stored in lower (MagmaLower,'L') or upper (MagmaUpper,'U') mode. If the eigenvectors are desired, then on exit A contains orthonormal eigenvectors. The eigenvalues are stored in an array w

STEP 5 : Set eigenvalue limit

STEP 6 : Write eigenvectors to V^T matrix

STEP 7 : Write eigenvectors/eigenvalue to magma_h_VT1 if eigenvalue > limit Copy to magma_d_VT1

STEP 8 : Compute M2 = VT1 VT. M2 is (AT A)^-1

STEP 9 : Compute Ainv = M2 A. This is the pseudo inverse

Note
SVDeps^2 is applied as a limit to the eigenvectors of AT A, which are equal to the squares of the singular values of A, so this is equivalent to applying SVDeps as a limit on the singular values of A
When used to compute AO control matrix, N=number of actuators/modes, M=number of WFS elements
EIGENVALUES are good to about 1e-6 of peak eigenvalue in single precision, much better with double precision
2.5x faster in single precision

Timing tests

< 1 if timing test ON, 0 otherwise

< Timers

< Times in sec

< 1 if single precision, 0 if double precision

The input matrix can be a 2D or a 3D image

If 2D image : M = xsize N = ysize

If 3D image : M = xsize*ysize N = ysize

Note that a tall input matrix (M>N) will appear short if viewed as an image. There is a 90 deg rotation between image and matrix

MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M j = 0 ... N

each column (N=cst) of A is a z=cst slice of image Rmatrix

each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)

Initialize MAGMA if needed

memory is only allocated on first pass

if pseudo-inverse is only computed once, these arrays can be freed

w1 values are the EIGENVALUES of AT A Note: w1 values are the SQUARE of the singular values of A

int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_cli ( )
int CUDACOMP_magma_compute_SVDpseudoInverse_SVD ( const char *  ID_Rmatrix_name,
const char *  ID_Cmatrix_name,
double  SVDeps,
long  MaxNBmodes,
const char *  ID_VTmatrix_name 
)

Compute pseudoinverse using MAGMA-based SVD.

int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_SVD_cli ( )
long CUDACOMP_MatMatMult_testPseudoInverse ( const char *  IDmatA_name,
const char *  IDmatAinv_name,
const char *  IDmatOut_name 
)

Test pseudo inverse.

IDmatA is an image loaded as a M x N matrix IDmatAinv is an image loaded as a M x M matrix, representing the transpose of the pseudo inverse of IDmatA

The input matrices can be 2D or a 3D images

If 2D image : IDmatA M = xsize IDmatA N = ysize

If 3D image : IDmatA M = xsize*ysize IDmatA N = ysize

MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M j = 0 ... N

each column (N=cst) of A is a z=cst slice of image Rmatrix

each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)

Initialize MAGAM if needed

load matA in h_A -> d_A

load matAinv in h_Ainv -> d_Ainv

int_fast8_t CUDACOMP_MatMatMult_testPseudoInverse_cli ( )
int_fast8_t CUDACOMP_test_cli ( )
int GPU_loop_MultMat_execute ( int  index,
int_fast8_t *  status,
int_fast8_t *  GPUstatus,
float  alpha,
float  beta,
int  timing 
)
int GPU_loop_MultMat_free ( int  index)
int GPU_loop_MultMat_setup ( int  index,
const char *  IDcontrM_name,
const char *  IDwfsim_name,
const char *  IDoutdmmodes_name,
long  NBGPUs,
int *  GPUdevice,
int  orientation,
int  USEsem,
int  initWFSref,
long  loopnb 
)

setup matrix multiplication using multiple GPUs

Load Control Matrix

Load Input vectors

int GPU_SVD_computeControlMatrix ( int  device,
const char *  ID_Rmatrix_name,
const char *  ID_Cmatrix_name,
double  SVDeps,
const char *  ID_VTmatrix_name 
)
int_fast8_t GPUcomp_test ( long  NBact,
long  NBmodes,
long  WFSsize,
long  GPUcnt 
)
int GPUloadCmat ( int  index)
int_fast8_t init_cudacomp ( )

Initialize cudacomp module and command line interface.

void matrixMulCPU ( float *  cMat,
float *  wfsVec,
float *  dmVec,
int  M,
int  N 
)

CPU-based matrix vector multiplication.

Variable Documentation

pid_t CLIPID

important directories and info

float cublasSgemv_alpha = 1.0
static
float cublasSgemv_beta = 0.0
static
DATA data

System includes.

External libraries

int deviceCount
static
cudaError_t error
static
int FORCESEMINIT = 1
GPUMATMULTCONF gpumatmultconf[20]
int IDtimerinit = 0
static
long IDtiming = -1
static
int INIT_MAGMA = 0
static
magma_int_t magma_aux_iwork[1]
static
double* magma_d_A
static
double* magma_d_Ainv
static
double* magma_d_AtA
static
double* magma_d_M2
static
double* magma_d_VT1
static
double* magma_h_A
static
double* magma_h_Ainv
static
double* magma_h_AtA
static
double* magma_h_M2
static
double* magma_h_R
static
double* magma_h_VT1
static
double* magma_h_work
static
magma_int_t* magma_iwork
static
magma_int_t magma_liwork
static
magma_int_t magma_lwork
static
double* magma_w1
static
float* magmaf_d_A
static
float* magmaf_d_Ainv
static
float* magmaf_d_AtA
static
float* magmaf_d_M2
static
float* magmaf_d_VT1
static
float* magmaf_h_A
static
float* magmaf_h_Ainv
static
float* magmaf_h_AtA
static
float* magmaf_h_M2
static
float* magmaf_h_R
static
float* magmaf_h_VT1
static
float* magmaf_h_work
static
float* magmaf_w1
static
long MAGMAloop_iter = 0
static
magma_queue_t magmaqueue
static
cublasStatus_t stat
static
struct timespec tdiff
static
double tdiffv
static
struct timespec tnow
static