/* @TITLE "Shared-memory Matrix multiply" */ /* * Shared memory matrix multiply example * p4 version * * David Kotz 1991 */ #include #include "p4.h" #include "dfk.h" #include "timing.h" #include "divide.h" char *progname; static void InitRow(); static void MultiplyRow(); static void PrintMatrix(); char *alloc(); int N; /* the size of each square matrix */ float **A, **B, **C; /* the matrices */ p4_barrier_monitor_t *halt; /* a barrier between key parts of program */ boolean printmatrix; /* should we print the matrix? */ main(argc, argv) int argc; char **argv; { p4_initenv(&argc, argv); /* Get arguments */ progname = argv[0]; if (argc < 2) { fprintf(stderr, "usage: %s [p4opts] N [print]\n", progname); exit(-1); } N = atoi(argv[1]); if (N <= 1) { fprintf(stderr, "N=%d; should be > 1. (N=atoi(%s))\n", progname, N, argv[1]); exit(-1); } printmatrix = (argc >= 3); /* INITIALIZE */ A = (float **)alloc2d(N, N, sizeof(float)); B = (float **)alloc2d(N, N, sizeof(float)); C = (float **)alloc2d(N, N, sizeof(float)); if (!(A && B && C)) { fprintf(stderr, "%s: Not enough memory.\n", progname); exit(1); } halt = (p4_barrier_monitor_t *) alloc(sizeof(p4_barrier_monitor_t)); if (halt == NULL) { fprintf(stderr, "%s: Not enough memory.\n", progname); exit(1); } p4_barrier_init(halt); /* GENERATE PROCESSES */ /* all new procs will share values of previously-set variables: */ /* N, halt, A, B, C, printmatrix */ p4_create_procgroup(); slave(); /* we now act just like the others */ p4_wait_for_end(); } /* @SUBTITLE "slave: worker tasks" */ /* this is also executed by the master */ slave() { TICS start; /* start time */ int nprocs; /* total number of procs */ int mynode; /* this process's number */ int firstrow, lastrow; /* the rows for this proc to compute */ double time; /* elapsed time for multiply */ int row; /* the row in action */ int rowsperproc, leftover; /* division of rows over procs, and remainder */ mynode = p4_get_my_id(); nprocs = p4_num_total_slaves()+1; if (!divide_problem(mynode, nprocs, N, &firstrow, &lastrow)) return; p4_dprintfl(5, "Proc %d doing rows %d through %d\n", mynode, firstrow, lastrow); /* Initialize each row of both matrices */ for (row = firstrow; row <= lastrow; row++) InitRow(row); p4_barrier(halt, nprocs); if (printmatrix && mynode == 0) { PrintMatrix("A", A, N, N); PrintMatrix("B", B, N, N); } p4_barrier(halt, nprocs); /* now compute the value of each result location */ /* N^3 flops */ if (mynode == 0) { (void) timer_init(); start = timer_get(); } for (row = firstrow; row <= lastrow; row++) MultiplyRow(row); p4_barrier(halt, nprocs); if (mynode == 0) { time = (timer_get() - start) * SECperTIC; if (printmatrix) PrintMatrix("C", C, N, N); printf("%d %g %g %d\n", nprocs, time, N*N*N/time, N); } } /* @SUBTITLE "InitRow: initialize one row" */ static void InitRow(row) int row; /* the row we are to do */ { int col; /* the column number */ float *Arow = A[row]; /* a row of A */ float *Brow = B[row]; /* a row of B */ srandom((int)(row * p4_clock())); for (col = 0; col < N; col++) { Arow[col] = (random() % 10000 - 5000) / 50.; Brow[col] = (random() % 10000 - 5000) / 50.; } } /* @SUBTITLE "Multiply: compute one output row" */ static void MultiplyRow(row) int row; /* the row we are to compute */ { int col; /* the column we are computing */ int i; /* the loop variable */ double sum; /* the running sum */ float *Arow; /* a row of A */ Arow = A[row]; /* this loop takes N*N flops */ for (col = 0; col < N; col++) { /* this loop takes N flops */ for (i = 0, sum = 0.0; i < N; i++) { /* this is one flop */ sum += Arow[i] * B[i][col]; } C[row][col] = sum; } } /* @SUBTITLE "PrintMatrix: print out a matrix" */ static void PrintMatrix(name, A, m, n) char *name; float **A; /* the matrix A[m][n] */ int m,n; { int i,j; printf("\nMatrix %s[%d][%d]:\n", name, m, n); for (i = 0; i < m; i++) { printf("row %d\n", i); for (j = 0; j < n; j++) { printf("%g\t", A[i][j]); } printf("\n"); } } /* @SUBTITLE "alloc" */ /* define alloc, used by alloc2d, to get shared memory */ char * alloc(size) int size; { return(p4_shmalloc(size)); }