/******************************************************** * matrix_serial_std.c * * A Collection of matrix multiplication Algorithms * (Matrix_A X Matrix_B) => Matrix_C * * Compile Using: * >gcc -Werror -Wall matrix_serial_std.c -o matrix_serial_std.out * or for Bebugging use * >gcc -DDEBUG -Werror -Wall matrix_serial_std.c -o matrix_serial_dbg_std.out * or for Verification of the Results Including execution time comparison between block and simple code * >gcc -DVERIFY -Werror -Wall matrix_serial_std.c -o matrix_serial_verify_std.out * Execution: ./matrix_serial_verify_std.out 1024 32 4 * Performance Evaluation: * perf stat -e cycles -e instructions -e cache-references -e cache-misses ./matrix_serial_dbg_block.out * ************************************************************** * Last Modified: 21/10/2014 (For EPL370, CS, UCY) * Author: Petros Panayi, PhD (email: petrosp@cs.ucy.ac.cy) * **************************************************************/ #include #include #include //#include "support.h" /* Print and Matrix in an appropriate form. */ void printMatrix(int size, unsigned long int * M[]){ int row, column; for(row = 0; row < size; row ++){ for (column = 0; column < size; column++) printf("%lu \t",M[row][column]); printf("\n");} } /* Fill in matrices A and B with Values and initialise MC to ZERO */ void initMatrices(int size, unsigned long int ** MA, unsigned long int ** MB, unsigned long int ** MC) { unsigned long int row, column; srand(time(NULL)); for (row = 0; row < size; row++) { for (column = 0; column < size; column++) { #ifdef DEBUG MA[row][column] = row + column + 1; #else MA[row][column] = rand(); #endif } } for (row = 0; row < size; row++) { for (column = 0; column < size; column++) { #ifdef DEBUG MB[row][column] = 1; #else MB[row][column] = rand(); #endif } } for (row = 0; row < size; row++) { for (column = 0; column < size; column++) { MC[row][column] = 0; } } printf("MATRIX A is ready.\n"); #ifdef DEBUG printMatrix(size, MA); #endif printf("MATRIX B is ready.\n"); #ifdef DEBUG printMatrix(size, MB); #endif printf("MATRIX C is ready.\n"); #ifdef DEBUG printMatrix(size, MC); #endif } /* Block Matrix Multiplication Algorithm */ void block_matmul(int matrixSize, int blockSize, unsigned long int ** MA, unsigned long int ** MB, unsigned long int ** MC){ int MC_row, i, j, MC_column, MC_block_row, MC_block_col, num_of_blocks; int row, column; // startTime(); /* Process Matrix, by row, column. */ num_of_blocks = matrixSize/blockSize; for (row = 0; row < matrixSize; row++) for (column = 0; column < matrixSize; column++) MC[row][column] = 0; //For All the output Blocks for(MC_block_row = 0; MC_block_row < matrixSize; MC_block_row+= blockSize) for (MC_block_col = 0; MC_block_col < matrixSize; MC_block_col+=blockSize) // For Each of the OUTPUT Blocks Visit Number of Blocks time for(i = 0; i < num_of_blocks; i++){ //For each of the Elements in the the OUTPUT BLOCK for(MC_row = MC_block_row; MC_row < MC_block_row+blockSize; MC_row++) for (MC_column = MC_block_col; MC_column < MC_block_col+blockSize; MC_column++) // For The Input Blocks for(j = 0; j < blockSize; j++) MC[MC_row][MC_column] += MA[MC_row][(i*blockSize)+j] * MB[(i*blockSize)+j][MC_column] ; #ifdef DEBUG printf("MATRIX: The resulting matrix C is;\n"); printMatrix(matrixSize, MC); #endif } // stopTime(); // elapsedTime(); } /* Simple Matrix Multiplication Algorithm */ void matmul_simple(int size, unsigned long int ** MA, unsigned long int ** MB, unsigned long int ** MC){ int row, column, position; // startTime(); /* Process Matrix, by row, column. */ for(row = 0; row < size; row++) for (column = 0; column < size; column++) { MC[row][column] = 0; for(position = 0; position < size; position++) MC[row][column] += MA[row][position] * MB[position][column] ; } // stopTime(); // elapsedTime(); } /* Simple Matrix Multiplication Algorithm */ void matmul_reverse(int size, unsigned long int ** MA, unsigned long int ** MB, unsigned long int ** MC){ int row, column, position; // startTime(); for (row = 0; row < size; row++) for (column = 0; column < size; column++) MC[row][column] = 0; /* Process Matrix, by row, column. */ for(row = 0; row < size; row++) for(position = 0; position < size; position++){ for (column = 0; column < size; column++) MC[row][column] += MA[row][position] * MB[position][column] ; } // stopTime(); // elapsedTime(); } /* Simple Matrix Multiplication Algorithm */ void matmul_tmp(int size, unsigned long int ** MA, unsigned long int ** MB, unsigned long int ** MC){ int row, column, position; unsigned long int tmp; // startTime(); for(row = 0; row < size; row++) for (column = 0; column < size; column++) { tmp = 0; for(position = 0; position < size; position++) tmp += MA[row][position] * MB[position][column] ; MC[row][column] = tmp; } // stopTime(); // elapsedTime(); } unsigned long int verificationCode(int size, unsigned long int * M[]){ int row, column; unsigned long int verificationSum = 0; for(row = 0; row < size; row ++) for (column = 0; column < size; column++) verificationSum += M[row][column]; return verificationSum; } /* * Main: */ extern int main(int argc, char **argv) { int row, matrixSize=1024, blockSize=10, numberOfBlocks = 2, algorithm = 4; unsigned long int ** MA; unsigned long int ** MB; unsigned long int ** MC; if (argc < 3){ printf("Use: matrix_serial_dbg_block.out [MatMul Algorithm=4]\n"); printf("EXAMPLE: matrix_serial_dbg_block.out 1024 32 4"); return 0; }else{ matrixSize = atoi(argv[1]); numberOfBlocks = atoi(argv[2]); algorithm = atoi(argv[3]); blockSize = matrixSize/numberOfBlocks; if (matrixSize%numberOfBlocks != 0){ printf("The Number of Blocks must devide the blocksize \n"); return 0; } } #ifdef DEBUG printf("Number of Arguments %d\n",argc); printf("Square Matrix Dimention: %d. Block Size = %d.\n",matrixSize,blockSize); #endif MA = (unsigned long int **)malloc(sizeof(unsigned long int*)*matrixSize); MB = (unsigned long int **)malloc(sizeof(unsigned long int*)*matrixSize); MC = (unsigned long int **)malloc(sizeof(unsigned long int*)*matrixSize); for (row = 0; row < matrixSize; row++) { MA[row] = (unsigned long int *) malloc(sizeof(unsigned long int)*matrixSize); MB[row] = (unsigned long int *) malloc(sizeof(unsigned long int)*matrixSize); MC[row] = (unsigned long int *) malloc(sizeof(unsigned long int)*matrixSize); } /* Initialize the matrices A and B. */ initMatrices(matrixSize, MA, MB, MC); if (algorithm == 1) { printf ("Running Simple MatMul Algorithm\n"); matmul_simple(matrixSize, MA, MB, MC); }else if (algorithm == 2){ printf ("Running tmp MatMul Algorithm\n"); matmul_tmp(matrixSize, MA, MB, MC); }else if (algorithm == 3){ printf ("Running reverse MatMul Algorithm\n"); matmul_reverse(matrixSize, MA, MB, MC); }else if (algorithm == 4){ printf ("Running Block MatMul Algorithm\n"); block_matmul(matrixSize, blockSize, MA, MB, MC); }else { printf ("Running Block MatMul Algorithm\n"); block_matmul(matrixSize, blockSize, MA, MB, MC); } #ifdef VERIFY printf("VerificationCode: %lu\n",verificationCode(matrixSize, MC)); #endif #ifdef DEBUG printf("MATRIX: The resulting matrix C is;\n"); printMatrix(matrixSize, MC); #endif #ifdef VERIFY matmul_simple(matrixSize, MA, MB, MC); printf("VerificationCode: %lu\n",verificationCode(matrixSize, MC)); matmul_tmp(matrixSize, MA, MB, MC); printf("VerificationCode: %lu\n",verificationCode(matrixSize, MC)); matmul_reverse(matrixSize, MA, MB, MC); printf("VerificationCode: %lu\n",verificationCode(matrixSize, MC)); block_matmul(matrixSize, blockSize, MA, MB, MC); printf("VerificationCode: %lu\n",verificationCode(matrixSize, MC)); #endif return 0; }