Function prototypes for CUDA/MAGMA wrapper. More...

Data Structures
struct	THDATA

struct	GPUMATMULTCONF
	This structure holds the GPU computation setup for matrix multiplication. More...

Functions
1. INITIALIZATION
Initialization functions
int_fast8_t	init_cudacomp ()
	Initialize cudacomp module and command line interface. More...

int_fast8_t	CUDACOMP_init ()
	Initialize CUDA and MAGMA. More...

int_fast8_t	GPUcomp_test (long NBact, long NBmodes, long WFSsize, long GPUcnt)

2. LOW-LEVEL MATRIX VECTOR MULTIPLICATION FUNCTIONS
Multi-GPU Matrix Vector multiplication
void	matrixMulCPU (float cMat, float wfsVec, float *dmVec, int M, int N)
	CPU-based matrix vector multiplication. More...

void *	compute_function (void *ptr)

int	GPUloadCmat (int index)

int	GPU_loop_MultMat_setup (int index, const char IDcontrM_name, const char IDwfsim_name, const char IDoutdmmodes_name, long NBGPUs, int GPUdevice, int orientation, int USEsem, int initWFSref, long loopnb)

int	GPU_loop_MultMat_execute (int index, int_fast8_t status, int_fast8_t GPUstatus, float alpha, float beta, int timing)

int	GPU_loop_MultMat_free (int index)

3. SINGULAR VALUE DECOMPOSITION, PSEUDO-INVERSE
Call to MAGMA SVD
long	CUDACOMP_MatMatMult_testPseudoInverse (const char IDmatA_name, const char IDmatAinv_name, const char *IDmatOut_name)
	Test pseudo inverse. More...

int	CUDACOMP_magma_compute_SVDpseudoInverse_SVD (const char ID_Rmatrix_name, const char ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name)
	Compute pseudoinverse using MAGMA-based SVD. More...

int	CUDACOMP_magma_compute_SVDpseudoInverse (const char ID_Rmatrix_name, const char ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name, int LOOPmode)
	Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A. More...

int	GPU_SVD_computeControlMatrix (int device, const char ID_Rmatrix_name, const char ID_Cmatrix_name, double SVDeps, const char *ID_VTmatrix_name)

4. HIGH LEVEL FUNCTIONS
int	CUDACOMP_Coeff2Map_Loop (const char IDmodes_name, const char IDcoeff_name, int GPUindex, const char IDoutmap_name, int offsetmode, const char IDoffset_name)

int	CUDACOMP_extractModesLoop (const char in_stream, const char intot_stream, const char IDmodes_name, const char IDrefin_name, const char IDrefout_name, const char IDmodes_val_name, int GPUindex, int PROCESS, int TRACEMODE, int MODENORM, int insem, int axmode, long twait)
	extract mode coefficients from data stream More...

Detailed Description

Function prototypes for CUDA/MAGMA wrapper.

Also uses MAGMA library

Author: O. Guyon

Date: 3 Jul 2017

Bug:: Magma can hang on magma_dsyevd_gpu

Function Documentation

void* compute_function ( void * ptr )

int CUDACOMP_Coeff2Map_Loop	(	const char *	IDmodes_name,
		const char *	IDcoeff_name,
		int	GPUindex,
		const char *	IDoutmap_name,
		int	offsetmode,
		const char *	IDoffset_name
	)

int CUDACOMP_extractModesLoop	(	const char *	in_stream,
		const char *	intot_stream,
		const char *	IDmodes_name,
		const char *	IDrefin_name,
		const char *	IDrefout_name,
		const char *	IDmodes_val_name,
		int	GPUindex,
		int	PROCESS,
		int	TRACEMODE,
		int	MODENORM,
		int	insem,
		int	axmode,
		long	twait
	)

extract mode coefficients from data stream

modes need to be orthogonal single GPU computation

Parameters

[in]	in_stream	input stream
[in]	intot_stream	[optional] input normalization stream
[in]	IDmodes_name	Modes
[in]	IDrefin_name	[optional] input reference - to be subtracted
[in]	IDrefout_name	[optional] output reference - to be added
[out]	IDmodes_val_name	ouput stream
[in]	GPUindex	GPU index
[in]	PROCESS	1 if postprocessing
[in]	TRACEMODE	1 if writing trace
[in]	MODENORM	1 if input modes should be normalized
[in]	insem	input semaphore index
[in]	axmode	0 for normal mode extraction, 1 for expansion
[in]	twait	if >0, insert time wait [us] at each iteration

Note: IMPORTANT: if IDmodes_val_name exits, use it and do not compute it; if IDrefout_name exists, match output image size to IDrefout_name

int_fast8_t CUDACOMP_init ( )

Initialize CUDA and MAGMA.

Finds CUDA devices Initializes CUDA and MAGMA libraries

Returns: number of CUDA devices found

int CUDACOMP_magma_compute_SVDpseudoInverse	(	const char *	ID_Rmatrix_name,
		const char *	ID_Cmatrix_name,
		double	SVDeps,
		long	MaxNBmodes,
		const char *	ID_VTmatrix_name,
		int	LOOPmode
	)

Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A.

Regularization (eigenvalue cutoff) is set by parameters SVDeps and MaxNBmodes Both contraints are applied, so that the number of modes is the minimum of both constraints

Parameters

[in]	ID_Rmatrix_name	Input data matrix, can be 2D or 3D
[out]	ID_Cmatrix_name	Pseudoinverse (result)
[in]	SVDeps	SVD eigenvalue limit for pseudoinverse
[in]	MaxNBmodes	Maximum number of modes kept
[out]	ID_VTmatrix_name	VT output matrix
[in]	LOOPmode	if set to 1, repeat routine as input data is updated

Returns: void

Note: When called by AOloopControl module to compute control matrix, N = number of actuators, M = number of sensors

Warning: Requires M>N (tall matrix)

Computes pseuso inverse of M x N matrix

When used with AOloopControl module: N: number of actuators M: number of sensors assumes M>N

use LOOPmode = 1 for computing the same size SVD, same input and output location

Notations: AT is transpose of A A+ is pseudo inverse of A

Computes pseudo-inverse : A+ = (AT A)^-1 AT Inverse of AT A is computed by SVD

SVD: A = U S V^T U are eigenvectors of A A^T V are eigenvectors of A^T A, computed at step 4 below

Linear algebra reminder: equivalence between (AT A)^-1 AT and V S^-1 UT

Definition of pseudoinverse: A+ = (AT A)^-1 AT singular value decomposition of A = U S VT A+ = ( V S UT U S VT )^-1 V S UT Since U is unitary, UT U = Id -> A+ = ( V S^2 VT )^-1 V S UT A+ = VT^-1 S^-2 V^-1 V S UT A+ = V S^-1 UT

Main steps:

STEP 1 : Fill input data into magmaf_h_A on host

STEP 2 : Copy input data to GPU -> magmaf_d_A (MxN matrix on device)

STEP 3 : Compute trans(A) x A : magmaf_d_A x magmaf_d_A -> magmaf_d_AtA (NxN matrix on device)

STEP 4 : Compute eigenvalues and eigenvectors of A^T A -> magmaf_d_AtA (NxN matrix on device) Calls magma_ssyevd_gpu : Compute the eigenvalues and optionally eigenvectors of a symmetric real matrix in single precision, GPU interface, big matrix. This function computes in single precision all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A defined on the device. The first parameter can take the values MagmaVec,'V' or MagmaNoVec,'N' and answers the question whether the eigenvectors are desired. If the eigenvectors are desired, it uses a divide and conquer algorithm. The symmetric matrix A can be stored in lower (MagmaLower,'L') or upper (MagmaUpper,'U') mode. If the eigenvectors are desired, then on exit A contains orthonormal eigenvectors. The eigenvalues are stored in an array w

STEP 5 : Set eigenvalue limit

STEP 6 : Write eigenvectors to V^T matrix

STEP 7 : Write eigenvectors/eigenvalue to magma_h_VT1 if eigenvalue > limit Copy to magma_d_VT1

STEP 8 : Compute M2 = VT1 VT. M2 is (AT A)^-1

STEP 9 : Compute Ainv = M2 A. This is the pseudo inverse

Note: SVDeps^2 is applied as a limit to the eigenvectors of AT A, which are equal to the squares of the singular values of A, so this is equivalent to applying SVDeps as a limit on the singular values of A; When used to compute AO control matrix, N=number of actuators/modes, M=number of WFS elements; EIGENVALUES are good to about 1e-6 of peak eigenvalue in single precision, much better with double precision; 2.5x faster in single precision

Timing tests

< 1 if timing test ON, 0 otherwise

< Timers

< Times in sec

< 1 if single precision, 0 if double precision

The input matrix can be a 2D or a 3D image

If 2D image : M = xsize N = ysize

If 3D image : M = xsize*ysize N = ysize

Note that a tall input matrix (M>N) will appear short if viewed as an image. There is a 90 deg rotation between image and matrix

MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M j = 0 ... N

each column (N=cst) of A is a z=cst slice of image Rmatrix

each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)

Initialize MAGMA if needed

memory is only allocated on first pass

if pseudo-inverse is only computed once, these arrays can be freed

w1 values are the EIGENVALUES of AT A Note: w1 values are the SQUARE of the singular values of A

int CUDACOMP_magma_compute_SVDpseudoInverse_SVD	(	const char *	ID_Rmatrix_name,
		const char *	ID_Cmatrix_name,
		double	SVDeps,
		long	MaxNBmodes,
		const char *	ID_VTmatrix_name
	)

Compute pseudoinverse using MAGMA-based SVD.

long CUDACOMP_MatMatMult_testPseudoInverse	(	const char *	IDmatA_name,
		const char *	IDmatAinv_name,
		const char *	IDmatOut_name
	)

Test pseudo inverse.

IDmatA is an image loaded as a M x N matrix IDmatAinv is an image loaded as a M x M matrix, representing the transpose of the pseudo inverse of IDmatA

The input matrices can be 2D or a 3D images

If 2D image : IDmatA M = xsize IDmatA N = ysize

If 3D image : IDmatA M = xsize*ysize IDmatA N = ysize

MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M j = 0 ... N

each column (N=cst) of A is a z=cst slice of image Rmatrix

each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)

Initialize MAGAM if needed

load matA in h_A -> d_A

load matAinv in h_Ainv -> d_Ainv

int GPU_loop_MultMat_execute	(	int	index,
		int_fast8_t *	status,
		int_fast8_t *	GPUstatus,
		float	alpha,
		float	beta,
		int	timing
	)

int GPU_loop_MultMat_free ( int index )

int GPU_loop_MultMat_setup	(	int	index,
		const char *	IDcontrM_name,
		const char *	IDwfsim_name,
		const char *	IDoutdmmodes_name,
		long	NBGPUs,
		int *	GPUdevice,
		int	orientation,
		int	USEsem,
		int	initWFSref,
		long	loopnb
	)

setup matrix multiplication using multiple GPUs

Load Control Matrix

Load Input vectors

int GPU_SVD_computeControlMatrix	(	int	device,
		const char *	ID_Rmatrix_name,
		const char *	ID_Cmatrix_name,
		double	SVDeps,
		const char *	ID_VTmatrix_name
	)

int_fast8_t GPUcomp_test	(	long	NBact,
		long	NBmodes,
		long	WFSsize,
		long	GPUcnt
	)

int GPUloadCmat ( int index )

int_fast8_t init_cudacomp ( )

Initialize cudacomp module and command line interface.

void matrixMulCPU	(	float *	cMat,
		float *	wfsVec,
		float *	dmVec,
		int	M,
		int	N
	)

CPU-based matrix vector multiplication.

Data Structures

Functions

Detailed Description

Function Documentation