#include #include #include #include "mympi.h" typedef int Element_type ; # define MPI_TYPE MPI_INT # define REORDER 1 int main ( int argc , char * argv []) { Element_type **A ;/* matrix to be multiplied */ Element_type *A_storage ;/* backing storage for matrix */ Element_type *X_local ;/* vectorj */ Element_type *B_sums ;/* result vector A*X = B */ Element_type *B_partial; /* subblock of result vector */ Element_type *input_vec; /* read in input vector */ int ncols ; // total number of columns in input matrix int id ; // process rank in MPI_COMM_WORLD int p; // number of processes in MPI_COMM_WORLD int i, j; // loop indices int error ; // error exit value of calls int nlocal_rows ; // number of rows belonging to this process int nlocal_cols ; // number of cols belonging to this process int grid_id ; // rank of process within Cartesian grid int row_id ; // rank of process within its row subgroup int grid_size[2]; // grid dimensions int grid_coords[2]; // process 's coordinates in Cartesian grid MPI_Comm grid_comm ; // Cartesian communicator for grid of procs MPI_Comm row_comm ; // communicator for all processes in a grid row MPI_Comm col_comm ; // communicator for all processes in a grid col int *send_count ; // array of counts for MPI_Scatterv function int *send_offset ; // array of offsets for MPI_Scatterv function int periodic[2]; // array of wraparound flags when creating grid int nrows_X; // number of rows of X vector local to process FILE *file; // for open input files MPI_Init(&argc, &argv ) ; MPI_Comm_rank( MPI_COMM_WORLD, &id ) ; MPI_Comm_size( MPI_COMM_WORLD, &p ) ; grid_size[0] = 0; // Let MPI choose dimensions */ grid_size[1] = 0; MPI_Dims_create (p, 2, grid_size ) ; periodic[0] = 0; // No wraparound periodic[1] = 0; // No wraparound MPI_Cart_create( MPI_COMM_WORLD, 2, grid_size, periodic, REORDER, &grid_comm ); MPI_Comm_rank( grid_comm, &grid_id ); // grid_id is rank in Cartesian grid */ MPI_Cart_coords( grid_comm, grid_id, 2, grid_coords ); // coordinates in grid MPI_Comm_split( grid_comm, grid_coords[0], grid_coords[1], &row_comm ); MPI_Comm_split( grid_comm, grid_coords[1], grid_coords[0], &col_comm ) ; read_and_distribute_2dblock_matrix( argv [1], ( void *)&A, ( void *)&A_storage , MPI_TYPE, &nrows, &ncols, &error, grid_comm ) ; if ( 0 != error ) MPI_Abort(MPI_COMM_WORLD, OPEN_FILE_ERROR ) ; collect_and_print_2dblock_matrix((void **)A , MPI_TYPE, nrows, ncols, grid_comm); nlocal_rows = size_of_block(grid_coords[0] , nrows, grid_size[0]) ; nlocal_cols = size_of_block(grid_coords[1] , ncols, grid_size[1]) ; if ( 0 == grid_coords [0] ) { MPI_Comm_rank(row_comm, &row_id ) ; if ( 0 == row_id ) { /* we could have also just checked column coord . */ file = fopen (argv [2], " r " ) ; if ( NULL == file ) { MPI_Abort( MPI_COMM_WORLD, MPI_ERR_FILE ) ; } else { int temp; fread (&temp, sizeof(int), 1, file ) ; /* Make sure vector length matches matrix width . */ if ( temp != ncols ) { fprintf( stderr, " Vector size and matrix size unmatched .\ n" ) ; MPI_Abort( MPI_COMM_WORLD, OPEN_FILE_ERROR ) ; } input_vec = ( Element_type *) malloc( ncols * sizeof ( Element_type ) ) ; if ( NULL == input_vec ) { fprintf ( stderr , " Could not allocate more storage .\ n " ) ; MPI_Abort( MPI_COMM_WORLD, MPI_ERR_NO_MEM ) ; } fread ( input_vec, sizeof( Element_type ) , ncols, file ) ; } } } nrows_X = size_of_block( grid_coords [1], ncols, grid_size [1] ) ; X_local = ( Element_type *) malloc( nrows_X * sizeof ( Element_type ) ) ; if ( NULL == X_local ) { fprintf ( stderr , " Could not allocate more storage .\ n " ) ; MPI_Abort( MPI_COMM_WORLD, MPI_ERR_NO_MEM ) ; } if ( 0 == grid_coords [0] ) { if ( grid_size [1] > 1) { send_count = ( int *) malloc ( grid_size [1] * sizeof ( int ) ) ; send_offset = ( int *) malloc ( grid_size [1] * sizeof ( int ) ) ; if ( NULL == send_count || NULL == send_offset ) { fprintf( stderr , " Could not allocate more storage .\ n " ) ; MPI_Abort ( MPI_COMM_WORLD , MPI_ERR_NO_MEM ) ; } init_communication_arrays( grid_size [1], ncols, send_count, send_offset); MPI_Scatterv( input_vec, send_count, send_offset, MPI_TYPE, X_local, nrows_X, MPI_TYPE, 0, row_comm ) ; } else for ( i = 0; i < ncols ; i ++) X_local[ i ] = input_vec[ i ]; } MPI_Bcast( X_local, nrows_X, MPI_TYPE, 0, col_comm ) ; B_partial = ( Element_type *) malloc ( nlocal_rows * sizeof ( Element_type ) ) ; B_sums = ( Element_type *) malloc ( nlocal_rows * sizeof ( Element_type ) ) ; if ( NULL == B_sums || NULL == B_partial ) { fprintf( stderr , " Could not allocate more storage .\ n " ) ; MPI_Abort( MPI_COMM_WORLD , MPI_ERR_NO_MEM ) ; } for ( i = 0; i < nlocal_rows ; i ++) { B_partial[ i ] = 0.0; for ( j = 0; j < nlocal_cols ; j ++) B_partial[ i ] += A[ i ][ j ] * X_local[ j ]; } MPI_Reduce ( B_partial , B_sums , nlocal_rows, MPI_TYPE, MPI_SUM, 0, row_comm ) ; if ( grid_coords [1] == 0) print_block_vector( B_sums, MPI_TYPE, nrows, col_comm ) ; MPI_Finalize() ; return 0; }