CUDA functions wrapper. More...

Functions
static int	clock_gettime (int clk_id, struct mach_timespec *t)

int_fast8_t	CUDACOMP_test_cli ()

int_fast8_t	CUDACOMP_MatMatMult_testPseudoInverse_cli ()

int_fast8_t	CUDACOMP_magma_compute_SVDpseudoInverse_SVD_cli ()

int_fast8_t	CUDACOMP_magma_compute_SVDpseudoInverse_cli ()

int_fast8_t	CUDACOMP_Coeff2Map_Loop_cli ()

int_fast8_t	CUDACOMP_Coeff2Map_offset_Loop_cli ()

int_fast8_t	CUDACOMP_extractModesLoop_cli ()

int_fast8_t	init_cudacomp ()
	Initialize cudacomp module and command line interface. More...

int_fast8_t	CUDACOMP_init ()
	Initialize CUDA and MAGMA. More...

int_fast8_t	GPUcomp_test (long NBact, long NBmodes, long WFSsize, long GPUcnt)

void	matrixMulCPU (float cMat, float wfsVec, float *dmVec, int M, int N)
	CPU-based matrix vector multiplication. More...

void *	compute_function (void *ptr)

int	GPUloadCmat (int index)

int	GPU_loop_MultMat_setup (int index, const char IDcontrM_name, const char IDwfsim_name, const char IDoutdmmodes_name, long NBGPUs, int GPUdevice, int orientation, int USEsem, int initWFSref, long loopnb)

int	GPU_loop_MultMat_execute (int index, int_fast8_t status, int_fast8_t GPUstatus, float alpha, float beta, int timing)

int	GPU_loop_MultMat_free (int index)

long	CUDACOMP_MatMatMult_testPseudoInverse (const char IDmatA_name, const char IDmatAinv_name, const char *IDmatOut_name)
	Test pseudo inverse. More...

int	CUDACOMP_magma_compute_SVDpseudoInverse_SVD (const char ID_Rmatrix_name, const char ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name)
	Compute pseudoinverse using MAGMA-based SVD. More...

int	CUDACOMP_magma_compute_SVDpseudoInverse (const char ID_Rmatrix_name, const char ID_Cmatrix_name, double SVDeps, long MaxNBmodes, const char *ID_VTmatrix_name, int LOOPmode)
	Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A. More...

int	GPU_SVD_computeControlMatrix (int device, const char ID_Rmatrix_name, const char ID_Cmatrix_name, double SVDeps, const char *ID_VTmatrix_name)

int	CUDACOMP_Coeff2Map_Loop (const char IDmodes_name, const char IDcoeff_name, int GPUindex, const char IDoutmap_name, int offsetmode, const char IDoffset_name)

int	CUDACOMP_extractModesLoop (const char in_stream, const char intot_stream, const char IDmodes_name, const char IDrefin_name, const char IDrefout_name, const char IDmodes_val_name, int GPUindex, int PROCESS, int TRACEMODE, int MODENORM, int insem, int axmode, long twait)
	extract mode coefficients from data stream More...

Variables
int	FORCESEMINIT = 1

DATA	data
	System includes. More...

pid_t	CLIPID
	important directories and info More...

static struct timespec	tnow

static struct timespec	tdiff

static double	tdiffv

static int	IDtimerinit = 0

static long	IDtiming = -1

static int	deviceCount

GPUMATMULTCONF	gpumatmultconf [20]

static cudaError_t	error

static cublasStatus_t	stat

static float	cublasSgemv_alpha = 1.0

static float	cublasSgemv_beta = 0.0

static int	INIT_MAGMA = 0

static magma_queue_t	magmaqueue

static long	MAGMAloop_iter = 0

static double *	magma_h_A

static double *	magma_d_A

static double *	magma_d_AtA

static double *	magma_h_AtA

static double *	magma_w1

static double *	magma_h_R

static double *	magma_h_work

static double *	magma_d_VT1

static double *	magma_h_VT1

static double *	magma_d_M2

static double *	magma_d_Ainv

static double *	magma_h_Ainv

static double *	magma_h_M2

static float *	magmaf_h_A

static float *	magmaf_d_A

static float *	magmaf_d_AtA

static float *	magmaf_h_AtA

static float *	magmaf_w1

static float *	magmaf_h_R

static float *	magmaf_h_work

static float *	magmaf_d_VT1

static float *	magmaf_h_VT1

static float *	magmaf_d_M2

static float *	magmaf_d_Ainv

static float *	magmaf_h_Ainv

static float *	magmaf_h_M2

static magma_int_t	magma_aux_iwork [1]

static magma_int_t	magma_lwork

static magma_int_t	magma_liwork

static magma_int_t *	magma_iwork

Detailed Description

CUDA functions wrapper.

Also uses MAGMA library

Author: O. Guyon

Date: 3 Jul 2017

Bug:: MAGMA execution can hang on dsyevd routine. This seems to be a MAGMA issue.

Function Documentation

static int clock_gettime	(	int	clk_id,
		struct mach_timespec *	t
	)

static

void* compute_function ( void * ptr )

int CUDACOMP_Coeff2Map_Loop	(	const char *	IDmodes_name,
		const char *	IDcoeff_name,
		int	GPUindex,
		const char *	IDoutmap_name,
		int	offsetmode,
		const char *	IDoffset_name
	)

int_fast8_t CUDACOMP_Coeff2Map_Loop_cli ( )

int_fast8_t CUDACOMP_Coeff2Map_offset_Loop_cli ( )

int CUDACOMP_extractModesLoop	(	const char *	in_stream,
		const char *	intot_stream,
		const char *	IDmodes_name,
		const char *	IDrefin_name,
		const char *	IDrefout_name,
		const char *	IDmodes_val_name,
		int	GPUindex,
		int	PROCESS,
		int	TRACEMODE,
		int	MODENORM,
		int	insem,
		int	axmode,
		long	twait
	)

extract mode coefficients from data stream

int_fast8_t CUDACOMP_extractModesLoop_cli ( )

int_fast8_t CUDACOMP_init ( )

Initialize CUDA and MAGMA.

Finds CUDA devices Initializes CUDA and MAGMA libraries

Returns: number of CUDA devices found

int CUDACOMP_magma_compute_SVDpseudoInverse	(	const char *	ID_Rmatrix_name,
		const char *	ID_Cmatrix_name,
		double	SVDeps,
		long	MaxNBmodes,
		const char *	ID_VTmatrix_name,
		int	LOOPmode
	)

Computes matrix pseudo-inverse (AT A)^-1 AT, using eigenvector/eigenvalue decomposition of AT A.

Computes pseuso inverse of M x N matrix

When used with AOloopControl module: N: number of actuators M: number of sensors assumes M>N

use LOOPmode = 1 for computing the same size SVD, same input and output location

Notations: AT is transpose of A A+ is pseudo inverse of A

Computes pseudo-inverse : A+ = (AT A)^-1 AT Inverse of AT A is computed by SVD

SVD: A = U S V^T U are eigenvectors of A A^T V are eigenvectors of A^T A, computed at step 4 below

Linear algebra reminder: equivalence between (AT A)^-1 AT and V S^-1 UT

Definition of pseudoinverse: A+ = (AT A)^-1 AT singular value decomposition of A = U S VT A+ = ( V S UT U S VT )^-1 V S UT Since U is unitary, UT U = Id -> A+ = ( V S^2 VT )^-1 V S UT A+ = VT^-1 S^-2 V^-1 V S UT A+ = V S^-1 UT

Main steps:

STEP 1 : Fill input data into magmaf_h_A on host

STEP 2 : Copy input data to GPU -> magmaf_d_A (MxN matrix on device)

STEP 3 : Compute trans(A) x A : magmaf_d_A x magmaf_d_A -> magmaf_d_AtA (NxN matrix on device)

STEP 4 : Compute eigenvalues and eigenvectors of A^T A -> magmaf_d_AtA (NxN matrix on device) Calls magma_ssyevd_gpu : Compute the eigenvalues and optionally eigenvectors of a symmetric real matrix in single precision, GPU interface, big matrix. This function computes in single precision all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A defined on the device. The first parameter can take the values MagmaVec,'V' or MagmaNoVec,'N' and answers the question whether the eigenvectors are desired. If the eigenvectors are desired, it uses a divide and conquer algorithm. The symmetric matrix A can be stored in lower (MagmaLower,'L') or upper (MagmaUpper,'U') mode. If the eigenvectors are desired, then on exit A contains orthonormal eigenvectors. The eigenvalues are stored in an array w

STEP 5 : Set eigenvalue limit

STEP 6 : Write eigenvectors to V^T matrix

STEP 7 : Write eigenvectors/eigenvalue to magma_h_VT1 if eigenvalue > limit Copy to magma_d_VT1

STEP 8 : Compute M2 = VT1 VT. M2 is (AT A)^-1

STEP 9 : Compute Ainv = M2 A. This is the pseudo inverse

Note: SVDeps^2 is applied as a limit to the eigenvectors of AT A, which are equal to the squares of the singular values of A, so this is equivalent to applying SVDeps as a limit on the singular values of A; When used to compute AO control matrix, N=number of actuators/modes, M=number of WFS elements; EIGENVALUES are good to about 1e-6 of peak eigenvalue in single precision, much better with double precision; 2.5x faster in single precision

Timing tests

< 1 if timing test ON, 0 otherwise

< Timers

< Times in sec

< 1 if single precision, 0 if double precision

The input matrix can be a 2D or a 3D image

If 2D image : M = xsize N = ysize

If 3D image : M = xsize*ysize N = ysize

Note that a tall input matrix (M>N) will appear short if viewed as an image. There is a 90 deg rotation between image and matrix

MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M j = 0 ... N

each column (N=cst) of A is a z=cst slice of image Rmatrix

each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)

Initialize MAGMA if needed

memory is only allocated on first pass

if pseudo-inverse is only computed once, these arrays can be freed

w1 values are the EIGENVALUES of AT A Note: w1 values are the SQUARE of the singular values of A

int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_cli ( )

int CUDACOMP_magma_compute_SVDpseudoInverse_SVD	(	const char *	ID_Rmatrix_name,
		const char *	ID_Cmatrix_name,
		double	SVDeps,
		long	MaxNBmodes,
		const char *	ID_VTmatrix_name
	)

Compute pseudoinverse using MAGMA-based SVD.

int_fast8_t CUDACOMP_magma_compute_SVDpseudoInverse_SVD_cli ( )

long CUDACOMP_MatMatMult_testPseudoInverse	(	const char *	IDmatA_name,
		const char *	IDmatAinv_name,
		const char *	IDmatOut_name
	)

Test pseudo inverse.

IDmatA is an image loaded as a M x N matrix IDmatAinv is an image loaded as a M x M matrix, representing the transpose of the pseudo inverse of IDmatA

The input matrices can be 2D or a 3D images

If 2D image : IDmatA M = xsize IDmatA N = ysize

If 3D image : IDmatA M = xsize*ysize IDmatA N = ysize

MAGMA uses column-major matrices. For matrix A with dimension (M,N), element A(i,j) is A[ j*M + i] i = 0 ... M j = 0 ... N

each column (N=cst) of A is a z=cst slice of image Rmatrix

each column (N=cst) of A is a line (y=cst) of Rmatrix (90 deg rotation)

Initialize MAGAM if needed

load matA in h_A -> d_A

load matAinv in h_Ainv -> d_Ainv

int_fast8_t CUDACOMP_MatMatMult_testPseudoInverse_cli ( )

int_fast8_t CUDACOMP_test_cli ( )

int GPU_loop_MultMat_execute	(	int	index,
		int_fast8_t *	status,
		int_fast8_t *	GPUstatus,
		float	alpha,
		float	beta,
		int	timing
	)

int GPU_loop_MultMat_free ( int index )

int GPU_loop_MultMat_setup	(	int	index,
		const char *	IDcontrM_name,
		const char *	IDwfsim_name,
		const char *	IDoutdmmodes_name,
		long	NBGPUs,
		int *	GPUdevice,
		int	orientation,
		int	USEsem,
		int	initWFSref,
		long	loopnb
	)

setup matrix multiplication using multiple GPUs

Load Control Matrix

Load Input vectors

int GPU_SVD_computeControlMatrix	(	int	device,
		const char *	ID_Rmatrix_name,
		const char *	ID_Cmatrix_name,
		double	SVDeps,
		const char *	ID_VTmatrix_name
	)

int_fast8_t GPUcomp_test	(	long	NBact,
		long	NBmodes,
		long	WFSsize,
		long	GPUcnt
	)

int GPUloadCmat ( int index )

int_fast8_t init_cudacomp ( )

Initialize cudacomp module and command line interface.

void matrixMulCPU	(	float *	cMat,
		float *	wfsVec,
		float *	dmVec,
		int	M,
		int	N
	)

CPU-based matrix vector multiplication.

Variable Documentation

pid_t CLIPID

important directories and info

float cublasSgemv_alpha = 1.0

static

float cublasSgemv_beta = 0.0

static

DATA data

System includes.

External libraries

int deviceCount

static

cudaError_t error

static

int FORCESEMINIT = 1

GPUMATMULTCONF gpumatmultconf[20]

int IDtimerinit = 0

static

long IDtiming = -1

static

int INIT_MAGMA = 0

static

magma_int_t magma_aux_iwork[1]

static

double* magma_d_A

static

double* magma_d_Ainv

static

double* magma_d_AtA

static

double* magma_d_M2

static

double* magma_d_VT1

static

double* magma_h_A

static

double* magma_h_Ainv

static

double* magma_h_AtA

static

double* magma_h_M2

static

double* magma_h_R

static

double* magma_h_VT1

static

double* magma_h_work

static

magma_int_t* magma_iwork

static

magma_int_t magma_liwork

static

magma_int_t magma_lwork

static

double* magma_w1

static

float* magmaf_d_A

static

float* magmaf_d_Ainv

static

float* magmaf_d_AtA

static

float* magmaf_d_M2

static

float* magmaf_d_VT1

static

float* magmaf_h_A

static

float* magmaf_h_Ainv

static

float* magmaf_h_AtA

static

float* magmaf_h_M2

static

float* magmaf_h_R

static

float* magmaf_h_VT1

static

float* magmaf_h_work

static

float* magmaf_w1

static

long MAGMAloop_iter = 0

static

magma_queue_t magmaqueue

static

cublasStatus_t stat

static

struct timespec tdiff

static

double tdiffv

static

struct timespec tnow

static

Functions

Variables

Detailed Description

Function Documentation

Variable Documentation