#include /* Emulated use of an NVIDIA C routine from Fortran */ /* ! cublasSgemm performs one of the matrix-matrix operations ! C := alpha*op( A )*op( B ) + beta*C, ! where op( X ) is one of ! op( X ) = X or op( X ) = X', ! alpha and beta are scalars, and A, B and C are matrices, with op( A ) ! an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */ void cublasSgemm (char transa, char transb, int m, int n, int k, float alpha, const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc) { // Print out input arguments that show a correct interface. int ii,jj,kk; printf(" Values of adjoint flags TRANSA, TRANSB = %c,%c\n",transa,transb); printf(" Values of sizes m,n,k,lda,ldb and ldc = %d,%d,%d,%d,%d,%d\n", m,n,k,lda,ldb,ldc); printf(" Values of alpha, beta = %e, %e\n",alpha,beta); // Print out input arrays A,B and their column-oriented storage. for(jj=1;jj<=k;jj++) { for(ii=1;ii<=m;ii++) { printf("Value of i,j and A(i,j) = %d, %d, %e\n",ii,jj,A[(ii-1)+lda*(jj-1)]); } } printf("\n"); for(jj=1;jj<=n;jj++) { for(ii=1;ii<=k;ii++) { printf("Value of i,j and B(i,j) = %d, %d, %e\n",ii,jj,B[(ii-1)+ldb*(jj-1)]); } } /* Multiplication of A and B */ printf("\n"); for (ii = 1; ii <= m; ii++) for (jj = 1; jj <= n; jj++) { C[ii-1 + ldc*(jj-1)] = beta*C[ii-1 + ldc*(jj-1)]; for (kk = 1; kk <= k; kk++) { C[ii-1 + ldc*(jj-1)] = C[ii-1 + ldc*(jj-1)] + alpha*A[ii-1 + lda*(kk-1)]*B[kk-1 + ldb*(jj-1)]; } } printf("\n"); for(jj=1;jj<=n;jj++) { for(ii=1;ii<=m;ii++) { printf("Value of i,j and C(i,j) = %d, %d, %e\n",ii,jj,C[(ii-1)+ldc*(jj-1)]); } } return; }