/* @TITLE "Message-passing Matrix multiply" */ /* * Message-passing matrix multiply example * SLAVE * * David Kotz 1991 */ #include #include "p4.h" #include "dfk.h" #include "broadcast.h" #include "timing.h" #include "mpmatrix.h" static void Init(); static double DotProduct(); static void PrintMatrix(); static char *alloc(); void do_one_row(); static void gettoken(), sendtoken(); /* @SUBTITLE "slave: worker tasks" */ slave() { int N; /* the size of each square matrix */ int nprocs; /* total number of procs */ int mynode; /* this process's number */ int length; /* length of message */ struct problem_s *message; /* the message */ boolean printmatrix; /* should we print the matrix? */ mynode = p4_get_my_id(); nprocs = p4_num_total_slaves()+1; /* Get problem description from master */ broadcast(mynode, nprocs, &message, &length); N = message->N; printmatrix = message->printmatrix; p4_msg_free(message); if (N == nprocs) /* we can only handle simple cases */ do_one_row(nprocs, mynode, printmatrix, N); } /* @SUBTITLE "do_one_row: compute one row of the solution" */ /* this is executed by all processes */ void do_one_row(nprocs, mynode, printmatrix, N) { int row = mynode; /* the row we are handling */ int col = row; /* the current column we are handling */ TICS start; /* for measuring time */ int from; /* address of sending process */ int len; /* length of arriving message */ float *A, *B, *C; /* one row or column of each matrix */ /* INITIALIZE */ /* We assume that the data is already nicely distributed */ /* One row of A and of C per proc; One col of B per proc. */ A = (float *)alloc(N * sizeof(float)); /* a row of A */ B = (float *)alloc(N * sizeof(float)); /* a col of B */ C = (float *)alloc(N * sizeof(float)); /* a row of C */ /* Initialize our row of A, and our col of B */ Init(row, A, B, N); if (printmatrix) { PrintMatrix(mynode, nprocs, "A", A, row, N, N); PrintMatrix(mynode, nprocs, "B", B, row, N, N); } /* We time just the matrix computation itself. */ if (mynode == 0) { (void) timer_init(); start = timer_get(); } /* go through all columns, sent around from process to process */ do { p4_dprintfl(2, "computing row %d, col %d\n", row, col); C[col] = DotProduct(A, B, N); /* send this column to the next processor */ p4_send(col, (mynode+1) % nprocs, B, N*sizeof(float)); if (col == row) free(B); /* first time: B was alloc'd */ else p4_msg_free(B); /* other times: B was a message */ /* receive a different column from another processor */ col = -1; from = -1; p4_recv(&col, &from, &B, &len); /* col has a new value now */ } while (col != row); p4_msg_free(B); /* B was a message: must be freed */ /* stop timer */ if (mynode == 0) { float time = (timer_get() - start) * SECperTIC; /* N^3 flops were done above */ printf("%d %g %g %d\n", nprocs, time, N*N*N/time, N); } if (printmatrix) PrintMatrix(mynode, nprocs, "C", C, row, N, N); } /* @SUBTITLE "InitRow: initialize one row" */ static void Init(row, A, B, N) float *A, *B; /* two vectors of length N */ int row; /* the row we are to do */ int N; /* size of matrix */ { int i; /* the column/row number */ srandom(row); /* randomize */ for (i = 0; i < N; i++) { A[i] = (random() % 10000 - 5000) / 50.; B[i] = (random() % 10000 - 5000) / 50.; } } /* @SUBTITLE "MultiplyRow: compute one output location" */ static double DotProduct(A, B, N) float *A, *B; /* two vectors of length N */ int N; /* length of vectors */ { int i; /* the loop variable */ double sum; /* the running sum */ /* this loop takes N flops */ for (i = 0, sum = 0.0; i < N; i++) { /* this is one flop: add and multiply */ sum += A[i] * B[i]; } return(sum); } /* @SUBTITLE "PrintMatrix: print out a matrix" */ /* This is called in parallel. We have to pass a token around. */ static void PrintMatrix(node, nprocs, name, A, row, m, n) int node; /* my node number */ int nprocs; /* number of procs */ char *name; float *A; /* the row A[n] */ int row; /* the row number we have */ int m,n; { int i; if (node == 0) { /* MASTER */ printf("\nMatrix %s[%d][%d]:\n", name, m, n); } else { /* SLAVE */ /* Wait for token to arrive */ gettoken(); } printf("row %d\n", row); for (i = 0; i < n; i++) { printf("%g\t", A[i]); } printf("\n"); /* Pass token on to next process */ sendtoken((node+1) % nprocs); if (node == 0) gettoken(); /* wait for token to come back to us */ } /* @SUBTITLE "send/gettoken" */ static void sendtoken(node) int node; { int dummy; p4_send(0, node, &dummy, sizeof(dummy)); } static void gettoken() { int type = 0; int from = -1; int len; char *message; p4_recv(&type, &from, &message, &len); p4_msg_free(message); } /* @SUBTITLE "alloc" */ static char * alloc(size) int size; { char *p; p = malloc(size); if (p == NULL) { fprintf("Out of memory: request was %d bytes.\n", size); abort(); } return(p); }