Hitmap 1.3
 All Data Structures Namespaces Files Functions Variables Typedefs Friends Macros Groups Pages
refMPIJacobi2D.v2.1.c
Go to the documentation of this file.
1 /*
2 * refMPIJacobi2D.v2.1.c
3 * MPI reference code manually developed and optimized
4 * Stencil code: Jacobi 2D for the heat equation. Implemented as a 2D cellular automata.
5 *
6 * Allocate only memory needed for the local part
7 * Asynchronous sends and receives
8 *
9 * v2.1
10 * (c) 2007-2015, Arturo Gonzalez-Escribano
11 */
12 
13 /*
14  * <license>
15  *
16  * Hitmap v1.2
17  *
18  * This software is provided to enhance knowledge and encourage progress in the scientific
19  * community. It should be used only for research and educational purposes. Any reproduction
20  * or use for commercial purpose, public redistribution, in source or binary forms, with or
21  * without modifications, is NOT ALLOWED without the previous authorization of the copyright
22  * holder. The origin of this software must not be misrepresented; you must not claim that you
23  * wrote the original software. If you use this software for any purpose (e.g. publication),
24  * a reference to the software package and the authors must be included.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY
27  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
28  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
29  * THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  * Copyright (c) 2007-2015, Trasgo Group, Universidad de Valladolid.
37  * All rights reserved.
38  *
39  * More information on http://trasgo.infor.uva.es/
40  *
41  * </license>
42 */
43 
44 #include<stdio.h>
45 #include<math.h>
46 #include<stdlib.h>
47 #include<mpi.h>
48 
49 
50 #define mpiTestError(ok,cad) \
51  if ( ok != MPI_SUCCESS ) { \
52  fprintf(stderr,"RunTime-Error, Rank %d: %s - %d\n", mpiRank, cad, ok);\
53  exit(-1); \
54  }
55 
56 
57 int mpiRank;
60 
61 
62 /*
63  * UTILITY: DECOMPOSE A NUMBER IN 2 PRIME FACTORS AS NEAR AS POSSIBLE TO THE SQUARE ROOT
64  */
65 int * factors2D(int numProc) {
66  int *result;
67  int low, high;
68  int product;
69 
70  /* 1. ALLOCATE MEMORY FOR result */
71  result = (int *)calloc(2, sizeof(int));
72  if ( result == NULL )
73  fprintf(stderr,"RunTime-Error, Rank %d: Allocating factors2D result\n", mpiRank);
74 
75  /* 2. START AS NEAR AS POSSIBLE TO THE SQUARE ROOT */
76  low = high = (int)sqrt((double)numProc);
77  product = low*high;
78 
79  /* 3. MOVE UP/DOWN TO SEARCH THE NEAREST FACTORS */
80  while ( product != numProc ) {
81  if (product < numProc) {
82  high++;
83  product = product + low;
84  }
85  else {
86  low--;
87  if (low==1) { high=numProc; product=numProc; }
88  else product = product - high;
89  }
90  }
91 
92  /* 4. RETURN RESULT */
93  result[0] = high;
94  result[1] = low;
95  return result;
96 }
97 
98 
99 /*
100  * MAIN PROGRAM
101  */
102 int main(int argc, char *argv[]) {
103  int ok;
104  int i,j;
105 #ifdef DEBUG
106  setbuf(stderr, NULL);
107  setbuf(stdout, NULL);
108 #endif
109 
110  /* 0. READ PARAMETERS */
111  int rows, columns, numIter;
112 
113  if ( argc != 4 ) {
114  fprintf(stderr, "\nUsage: %s <numRows> <numColumns> <numIterations>\n", argv[0]);
115  exit(EXIT_FAILURE);
116  }
117  rows = atoi( argv[1] );
118  columns = atoi( argv[2] );
119  numIter = atoi( argv[3] );
120 
121 
122  /* 1. INITIALIZE MPI */
123  ok = MPI_Init( &argc, &argv);
124  mpiTestError( ok, "Initializing MPI" );
125 
126  /* 2. GET BASIC GROUP PARAMETERS */
127  MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank);
128  MPI_Comm_size(MPI_COMM_WORLD, &mpiNProc);
129 
130  /* 3. START CLOCK */
131  MPI_Barrier( MPI_COMM_WORLD );
132  clock = MPI_Wtime();
133 
134  /* 4. CARTESIAN TOPOLOGY */
135  MPI_Comm cartesianComm;
136  int *procs = factors2D( mpiNProc );
137 
138  /* 4.1. CHECK: NO SUPPORT FOR PARTITIONING LESS ELEMENTS THAN PROCS IN A DIMENSION */
139  if ( procs[0] > rows-2 || procs[1] > columns-2 ) {
140  if ( mpiRank == 0 ) fprintf(stderr, "Error: This code does not support partitioning less rows/columns than processors in that axis\n");
141  MPI_Finalize();
142  }
143 
144  int periods[2] = { 0, 0 };
145  ok = MPI_Cart_create(MPI_COMM_WORLD, 2, procs, periods, 0, &cartesianComm);
146  mpiTestError( ok, "Creating cartesian toology" );
147 
148  int myCoords[2];
149  ok = MPI_Cart_coords(cartesianComm, mpiRank, 2, myCoords);
150  mpiTestError( ok, "Getting cartesian coordinates" );
151 
152  int rankUp=-1000, rankDown=-1000, rankLeft=-1000, rankRight=-1000;
153  int foo;
154 
155  if ( myCoords[0] != 0 ) {
156  ok = MPI_Cart_shift(cartesianComm, 0, -1, &foo, &rankUp);
157  mpiTestError( ok, "Shifting cartesian coordinates up" );
158  }
159 
160  if ( myCoords[0] != procs[0]-1 ) {
161  ok = MPI_Cart_shift(cartesianComm, 0, +1, &foo, &rankDown);
162  mpiTestError( ok, "Shifting cartesian coordinates down" );
163  }
164 
165  if ( myCoords[1] != 0 ) {
166  ok = MPI_Cart_shift(cartesianComm, 1, -1, &foo, &rankLeft);
167  mpiTestError( ok, "Shifting cartesian coordinates left" );
168  }
169 
170  if ( myCoords[1] != procs[1]-1 ) {
171  ok = MPI_Cart_shift(cartesianComm, 1, +1, &foo, &rankRight);
172  mpiTestError( ok, "Shifting cartesian coordinates right" );
173  }
174 
175 #ifdef DEBUG
176 fprintf(stderr,"CTRL RANKS: %d (%d,%d,%d,%d)\n", mpiRank, rankUp, rankDown, rankLeft, rankRight);
177 #endif
178 
179  /* 5. COMPUTE THE LOCAL PART OF THE MATRIX */
180  double procRatio[2];
181  procRatio[0] = (double)(rows-2)/procs[0];
182  procRatio[1] = (double)(columns-2)/procs[1];
183 
184  int begin[2];
185  int end[2];
186 
187 #ifdef DEBUG
188 fprintf(stderr,"CTRL %d, myCoords %d,%d\n", mpiRank, myCoords[0], myCoords[1]);
189 #endif
190 
191  begin[0] = 1 + (int)floor( myCoords[0]*procRatio[0] );
192  end[0] = 1 + (int)floor( (myCoords[0]+((rows > procs[0]) ? 1 : 0))*procRatio[0] - ((rows > procs[0]) ? 1 : 0) );
193  begin[1] = 1 + (int)floor( myCoords[1]*procRatio[1] );
194  end[1] = 1 + (int)floor( (myCoords[1]+((columns > procs[1]) ? 1 : 0))*procRatio[1] - ((columns > procs[1]) ? 1 : 0) );
195 
196 #ifdef DEBUG
197 fprintf(stderr,"CTRL %d, matrix[%d:%d][%d:%d]\n", mpiRank, begin[0], end[0], begin[1], end[1]);
198 #endif
199 
200  /* 6. DEFINE MAIN DATA STRUCTURES */
201  int size[2] = { end[0]-begin[0]+1+2, end[1]-begin[1]+1+2 };
202  double *matrix;
203  double *matrixCopy;
204  matrix = (double *)calloc( (size_t)(size[0] * size[1]), sizeof(double) );
205  matrixCopy = (double *)calloc( (size_t)(size[0] * size[1]), sizeof(double) );
206 
207  if ( matrix==NULL || matrixCopy==NULL) {
208  fprintf(stderr,"RunTime-Error, Rank %d: Allocating data structures\n", mpiRank);
209  exit(2);
210  }
211 
212 #define elementGlobal(mat, idx1, idx2) (mat[(idx1)*columns+(idx2)])
213 #define element(mat, idx1, idx2) (mat[(idx1)*size[1]+(idx2)])
214 
215  /* 7. INITIAL MATRIX VALUES */
216  for (i=1; i<size[0]; i++) {
217  for (j=1; j<size[1]; j++) {
218  element(matrix, i, j) = 0.0;
219  }
220  }
221  /* 8. FIXED BOUNDARY VALUES */
222  if ( myCoords[1] == 0 ) {
223  for (i=0; i<size[0]; i++) {
224  element(matrix, i, 0) = 3.0;
225  element(matrixCopy, i, 0) = 3.0;
226  }
227  }
228  if ( myCoords[1] == procs[1]-1 ) {
229  for (i=0; i<size[0]; i++) {
230  element(matrix, i, size[1]-1) = 4.0;
231  element(matrixCopy, i, size[1]-1) = 4.0;
232  }
233  }
234  if ( myCoords[0] == 0 ) {
235  for (i=0; i<size[1]; i++) {
236  element(matrix, 0, i) = 1.0;
237  element(matrixCopy, 0, i) = 1.0;
238  }
239  }
240  if ( myCoords[0] == procs[0]-1 ) {
241  for (i=0; i<size[1]; i++) {
242  element(matrix, size[0]-1, i) = 2.0;
243  element(matrixCopy, size[0]-1, i) = 2.0;
244  }
245  }
246 
247  /* 9. COMMIT MPI TYPES */
248  MPI_Datatype verticalBorderType;
249  ok = MPI_Type_hvector( size[0]-2, 1, (int)sizeof(double)*size[1], MPI_DOUBLE, &verticalBorderType );
250  mpiTestError( ok, "Creating the columns type" );
251  ok = MPI_Type_commit( &verticalBorderType );
252  mpiTestError( ok, "Commiting the columns type" );
253 
254  MPI_Request request[8] = { MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL, MPI_REQUEST_NULL };
255  MPI_Status statusV[8];
256 
257  /* 10. LOOP: UNROLLED ONE ITERATION TO AVOID EXTRA COMM. STAGE */
258  int loopIndex;
259  for (loopIndex = 0; loopIndex < numIter-1; loopIndex++) {
260 
261  /* 11. COMPUTE LOCAL PART */
262  for (i=1; i<size[0]-1; i++) {
263  for (j=1; j<size[1]-1; j++) {
264  element( matrix,i,j ) = ( element( matrixCopy,i-1,j ) + element( matrixCopy,i+1,j ) + element( matrixCopy,i,j-1 ) + element( matrixCopy,i,j+1 ) ) / 4;
265  }
266  }
267 
268  /* 12. COMMUNICATE BORDERS: SEND MATRIX VALUES */
269  if ( myCoords[0] != 0 ) {
270  ok = MPI_Isend( &element(matrix, 1, 1), size[1]-2, MPI_DOUBLE, rankUp, 0, MPI_COMM_WORLD, &request[0] );
271  mpiTestError( ok, "Send Up" );
272  }
273  if ( myCoords[0] != procs[0]-1 ) {
274  ok = MPI_Isend( &element(matrix, size[0]-2, 1), size[1]-2, MPI_DOUBLE, rankDown, 1, MPI_COMM_WORLD, &request[1] );
275  mpiTestError( ok, "Send Down" );
276  }
277  if ( myCoords[1] != 0 ) {
278  ok = MPI_Isend( &element(matrix, 1, 1), 1, verticalBorderType, rankLeft, 2, MPI_COMM_WORLD, &request[2] );
279  mpiTestError( ok, "Send Left" );
280  }
281  if ( myCoords[1] != procs[1]-1 ) {
282  ok = MPI_Isend( &element(matrix, 1, size[1]-2), 1, verticalBorderType, rankRight, 3, MPI_COMM_WORLD, &request[3] );
283  mpiTestError( ok, "Send Right" );
284  }
285 
286  /* 13. LOCAL SHADOW COPY (WITHOUT BORDERS) */
287  for (i=1; i<size[0]-1; i++) {
288  for (j=1; j<size[1]-1; j++) {
289  element( matrixCopy,i,j ) = element( matrix,i,j );
290  }
291  }
292 
293  /* 14. COMMUNICATE BORDERS: RECV IN MATRIX COPY */
294  if ( myCoords[0] != procs[0]-1 ) {
295  ok = MPI_Irecv( &element(matrixCopy, size[0]-1, 1), size[1]-2, MPI_DOUBLE, rankDown, 0, MPI_COMM_WORLD, &request[4] );
296  mpiTestError( ok, "Recv from Down" );
297  }
298  if ( myCoords[0] != 0 ) {
299  ok = MPI_Irecv( &element(matrixCopy, 0, 1), size[1]-2, MPI_DOUBLE, rankUp, 1, MPI_COMM_WORLD, &request[5] );
300  mpiTestError( ok, "Recv from Up" );
301  }
302  if ( myCoords[1] != procs[1]-1 ) {
303  ok = MPI_Irecv( &element(matrixCopy, 1, size[1]-1), 1, verticalBorderType, rankRight, 2, MPI_COMM_WORLD, &request[6] );
304  mpiTestError( ok, "Recv from Right" );
305  }
306  if ( myCoords[1] != 0 ) {
307  ok = MPI_Irecv( &element(matrixCopy, 1, 0), 1, verticalBorderType, rankLeft, 3, MPI_COMM_WORLD, &request[7] );
308  mpiTestError( ok, "Recv from Left" );
309  }
310 
311  /* 15. END Communications */
312  MPI_Waitall( 8, request, statusV );
313 
314  } /* END LOOP */
315 
316  // UNROLLED LOOP: LAST ITERATION WITHOUT COMMUNICATION
317  /* 16. COMPUTE LOCAL PART */
318  for (i=1; i<size[0]-1; i++) {
319  for (j=1; j<size[1]-1; j++) {
320  element( matrix,i,j ) = ( element( matrixCopy,i-1,j ) + element( matrixCopy,i+1,j ) + element( matrixCopy,i,j-1 ) + element( matrixCopy,i,j+1 ) ) / 4;
321  }
322  }
323 
324  /* 17. FREE COMMUNICATION PATTERN RESOURCES */
325  ok = MPI_Type_free( &verticalBorderType );
326  mpiTestError( ok, "Freeing vertical border type" );
327 
328 #ifdef DEBUG
329 for (i=1; i<size[0]-1; i++) {
330  for (j=1; j<size[1]-1; j++) {
331  fprintf(stderr,"M[%d][%d] %lf\n", i,j, element(matrix,i,j));
332  }
333 }
334 #endif
335 
336  double *matrixResult = NULL;
337 
338  /* 18. COMMUNICATE ALL LOCAL PIECES */
339  if ( mpiRank == 0 ) {
340  matrixResult = (double *)calloc( (size_t)(rows * columns), sizeof(double) );
341 
342  /* 17.0. COPY MY OWN RESULTS */
343  for (i=0; i<size[0]; i++) {
344  for (j=0; j<size[1]; j++) {
345  elementGlobal( matrixResult,i,j ) = element( matrix,i,j );
346  }
347  }
348 
349  /* 18.1. RANK 0 RECEIVES ALL */
350  int rank;
351  for ( rank=1; rank<mpiNProc; rank++ ) {
352  /* COMPUTE THE POSITION OF THE REMOTE LOCAL PART TO BE RECEIVED */
353  int remoteCoords[2];
354  ok = MPI_Cart_coords(cartesianComm, rank, 2, remoteCoords);
355  mpiTestError( ok, "Getting remote cartesian coordinates in end comm." );
356 
357  int remoteBegin[2];
358  int remoteEnd[2];
359 
360  remoteBegin[0] = 1 + (int)floor( remoteCoords[0]*procRatio[0] );
361  remoteEnd[0] = 1 + (int)floor( (remoteCoords[0]+((rows > procs[0]) ? 1 : 0))*procRatio[0] - ((rows > procs[0]) ? 1 : 0) );
362  remoteBegin[1] = 1 + (int)floor( remoteCoords[1]*procRatio[1] );
363  remoteEnd[1] = 1 + (int)floor( (remoteCoords[1]+((columns > procs[1]) ? 1 : 0))*procRatio[1] - ((columns > procs[1]) ? 1 : 0) );
364 
365  // INCLUDE BORDERS
366  if ( remoteBegin[0] == 1 ) remoteBegin[0]--;
367  if ( remoteBegin[1] == 1 ) remoteBegin[1]--;
368  if ( remoteEnd[0] == rows-2 ) remoteEnd[0]++;
369  if ( remoteEnd[1] == columns-2 ) remoteEnd[1]++;
370 
371 #ifdef DEBUG
372 fprintf(stderr,"CTRL RemoteMatrix %d, [%d:%d][%d:%d]\n", rank, remoteBegin[0], remoteEnd[0], remoteBegin[1], remoteEnd[1]);
373 #endif
374  MPI_Status status;
375  MPI_Datatype remoteType;
376  ok = MPI_Type_hvector( remoteEnd[0]-remoteBegin[0]+1, remoteEnd[1]-remoteBegin[1]+1, (MPI_Aint)((int)sizeof(double)*columns), MPI_DOUBLE, &remoteType );
377  mpiTestError( ok, "Getting remote type in end comm." );
378  ok = MPI_Type_commit( &remoteType );
379  mpiTestError( ok, "Commiting the remote type in end comm." );
380 
381  ok = MPI_Recv( &elementGlobal( matrixResult,remoteBegin[0],remoteBegin[1] ), 1, remoteType, rank, rank, MPI_COMM_WORLD, &status );
382  mpiTestError( ok, "Receiving remote parts in end comm." );
383 
384  ok = MPI_Type_free( &remoteType );
385  mpiTestError( ok, "Freeing remote type in end comm." );
386  }
387  }
388  else {
389  /* 18.2. OTHERS SEND THE LOCAL PART */
390 #ifdef DEBUG
391 fprintf(stderr,"CTRL LocalMatrix %d, [%d:%d][%d:%d]\n", mpiRank, begin[0], end[0], begin[1], end[1]);
392 #endif
393 
394  // INCLUDE BORDERS
395  int localBegin[2];
396  int localEnd[2];
397  localBegin[0] = ( begin[0] == 1 ) ? 0 : 1;
398  localBegin[1] = ( begin[1] == 1 ) ? 0 : 1;
399  localEnd[0] = ( end[0] == rows-2 ) ? size[0]-1 : size[0]-2;
400  localEnd[1] = ( end[1] == columns-2 ) ? size[1]-1 : size[1]-2;
401 
402  MPI_Datatype localType;
403  ok = MPI_Type_hvector( localEnd[0]-localBegin[0]+1, localEnd[1]-localBegin[1]+1, (int)sizeof(double)*size[1], MPI_DOUBLE, &localType );
404  mpiTestError( ok, "Getting local type in end comm." );
405  ok = MPI_Type_commit( &localType );
406  mpiTestError( ok, "Commiting the local type in end comm." );
407 
408  ok = MPI_Send( &element( matrix,localBegin[0],localBegin[1] ), 1, localType, 0, mpiRank, MPI_COMM_WORLD );
409  mpiTestError( ok, "Sending local part in end comm." );
410 
411  ok = MPI_Type_free( &localType );
412  mpiTestError( ok, "Freeing local type in end comm." );
413  }
414 
415  /* 19. STOP AND PRINT CLOCK */
416  clock = MPI_Wtime() - clock;
417  MPI_Reduce( &clock, &clockReduce, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD );
418  if (mpiRank==0) printf("Clock_%s: %14.6lf\n", "mainClock", clockReduce);
419 
420  /* 20. WRITE RESULT IN FILE */
421 #ifdef WRITE_RESULT
422  if ( mpiRank == 0 ) {
423  FILE *f;
424  if ((f=fopen("Result.out.dtxt","w")) == NULL) {
425  fprintf(stderr,"Error: Imposible to open output file\n");
426  exit(-1);
427  }
428  for (i=0; i<rows; i++) {
429  for (j=0; j<rows; j++) {
430  fprintf(f,"%014.4lf", elementGlobal(matrixResult,i,j) );
431  fprintf(f,"\n");
432  }
433  }
434  fclose(f);
435  }
436 #endif
437 
438  /* 21. END PROGRAM */
439  free( procs );
440  free( matrix );
441  free( matrixCopy );
442  if ( mpiRank == 0 ) free( matrixResult );
443  MPI_Comm_free( &cartesianComm );
444  MPI_Finalize();
445 
446  /* 22. RETURN CORRECT */
447  return 0;
448 }
#define element(mat, idx1, idx2)
#define elementGlobal(mat, idx1, idx2)
int mpiNProc
Definition: refMPICannon.c:69
int rank
Definition: SWpar_ref.c:181
double clockReduce
Definition: refMPICannon.c:70
int size[2]
Definition: SWcommon.c:57
int mpiRank
Definition: refMPICannon.c:68
int main(int argc, char *argv[])
Definition: cannonAsync.c:62
double clock
int * factors2D(int numProc)
#define mpiTestError(ok, cad)