Hitmap 1.3
 All Data Structures Namespaces Files Functions Variables Typedefs Friends Macros Groups Pages
mg.c
Go to the documentation of this file.
1 
11 /*
12  * <license>
13  *
14  * Hitmap v1.2
15  *
16  * This software is provided to enhance knowledge and encourage progress in the scientific
17  * community. It should be used only for research and educational purposes. Any reproduction
18  * or use for commercial purpose, public redistribution, in source or binary forms, with or
19  * without modifications, is NOT ALLOWED without the previous authorization of the copyright
20  * holder. The origin of this software must not be misrepresented; you must not claim that you
21  * wrote the original software. If you use this software for any purpose (e.g. publication),
22  * a reference to the software package and the authors must be included.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY
25  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
27  * THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
28  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * Copyright (c) 2007-2015, Trasgo Group, Universidad de Valladolid.
35  * All rights reserved.
36  *
37  * More information on http://trasgo.infor.uva.es/
38  *
39  * </license>
40 */
41 
42 /* INCLUDES */
43 #include <hitmap.h>
44 #include <math.h>
45 #include <time.h>
46 #include "c_randi8.h"
47 
48 
49 /* ACTIVE OPTIMIZATIONS: The same as NAS version */
50 #define OPT_ZU
51 #define OPT_OPERATOR_A
52 #define OPT_OPERATOR_S
53 #define OPT_RESID_BUFFER
54 #define OPT_PSINV_BUFFER
55 #define OPT_RPRJ3_BUFFER
56 #define OPT_INTERP_BUFFER
57 
58 
59 /* DEBUG */
60 //#define DEBUG
61 
62 /* CLASS NAMES */
63 #define S 'S'
64 #define A 'A'
65 #define B 'B'
66 #define C 'C'
67 #define D 'D'
68 #define E 'E'
69 
70 /* DEFAULT PROBLEM CLASS */
71 #ifndef CLASS
72 # define CLASS S
73 #endif
74 
82 #if CLASS == S
83 # define k_num 5
84 # define iterations 4
85  int problem_size[3] = {32,32,32};
86  double verify_value = 0.5307707005734e-04;
87 #endif
88 
89 #if CLASS == A
90 # define k_num 8
91 # define iterations 4
92  int problem_size[3] = {256,256,256};
93  double verify_value = 0.2433365309069e-05;
94 #endif
95 
96 #if CLASS == B
97 # define k_num 8
98 # define iterations 20
99  int problem_size[3] = {256,256,256};
100  double verify_value = 0.1800564401355e-05;
101 #endif
102 
103 #if CLASS == C
104 # define k_num 9
105 # define iterations 20
106  int problem_size[3] = {512,512,512};
107  double verify_value = 0.5706732285740e-06;
108 #endif
109 
110 #if CLASS == D
111 # define k_num 10
112 # define iterations 50
113  int problem_size[3] = {1024,1024,1024};
114  double verify_value = 0.1583275060440e-09;
115 #endif
116 
117 #if CLASS == E
118 # define k_num 11
119 # define iterations 50
120  int problem_size[3] = {2048,2048,2048};
121  double verify_value = 0.5630442584711e-10;
122 #endif
123 
125 #define k_max (k_num-1)
126 
127 /* MAX VERIFY DIFERENCE */
128 #define epsilon 1e-8
129 
130 /* OPERATORS */
132 double operatorA[4] = {-8.0/3.0, 0.0, 1.0/6.0, 1.0/12.0};
134 double operatorP[4] = {1.0/2.0, 1.0/4.0, 1.0/8.0, 1.0/16.0};
135 #if CLASS == S || CLASS == A
136 
137 double operatorS[4] = {-3.0/8.0, 1.0/32.0, -1.0/64.0, 0.0};
138 #else
139 
140 double operatorS[4] = {-3.0/17.0, 1.0/33.0, -1.0/61.0, 0.0};
141 #endif
142 
143 double operatorQ[4] = {1.0, 1.0/2.0, 1.0/4.0, 1.0/8.0};
144 
145 /* Number of +1 or -1 values in grid v */
146 #define mm 10
147 
148 /* TOPOLOGY AND LAYOUT FOR EACH LEVEL */
151 
152 /* HITMAP DOUBLE TILE */
153 hit_tileNewType(double);
154 
155 /* BLOCK SHAPES */
157 
158 /* GLOBAL CUBIC GRID */
160 
161 /* BLOCK TILES (include border data) */
162 HitTile_double block_u;
163 HitTile_double block_v;
164 HitTile_double block_z[k_num];
165 HitTile_double block_r[k_num];
166 
167 /* COMMUNICATION PATTERNS */
172 
173 /* HITMAP REDUCE OPERATIONS */
177 
178 /* HITMAP CLOCKS */
182 
184 #define swap(a,b,c){ \
185  (c) = (a); \
186  (a) = (b); \
187  (b) = (c); \
188 }
189 
191 #define tag(dir) (100 * (dir + 10*dim))
192 
193 
194 #ifdef DEBUG
195 /* Macros to sequentialize debug parallel operations */
196 #define ALL_SEQUENTIAL_BEGIN(var) \
197  struct timespec var ## ts = {0,1000000}; \
198  fflush(stdout); \
199  int var = 0; \
200  for(var=-1;var<hit_NProcs+1;var++){ \
201  if(hit_Rank == var){
202 #define ALL_SEQUENTIAL_END(var) \
203  } else { \
204  nanosleep(&(var ## ts),NULL); \
205  } \
206  fflush(stdout); \
207  MPI_Barrier(*hit_Comm); \
208  }
209 #endif
210 
211 
212 /* FUNCTION PROTOTYPES */
213 void setup();
214 void warming_up();
215 void zero3(HitTile_double tile);
216 void zran3(HitTile_double matrix);
217 double power(double a, int n);
218 void bubble(double ten[mm][2], int ten_index[mm][2][3],int ind);
219 void resid(HitTile_double m_u, HitTile_double m_v, HitTile_double m_r, int k);
220 void apply_correction(HitTile_double u, HitTile_double r, int k);
221 void v_cycle(HitTile_double r, HitTile_double z, int k);
222 void rprj3(HitTile_double m_ro, HitTile_double m_r, int k);
223 void interp(HitTile_double m_zo, HitTile_double m_z, int k);
224 void psinv(HitTile_double m_z, HitTile_double m_r, int k);
225 double norm2u3(HitTile_double m_r,int k);
226 void free_resources();
227 void add(HitTile_double m_x, HitTile_double m_y);
228 
229 /* OUTPUT FUNCIONS */
230 #define printfRootInternal(...) { if( hit_Rank == 0 ) { printf(__VA_ARGS__); fflush(stdout); }}
231 #define printfRoot(...) printfRootInternal(__VA_ARGS__)
232 
233 /* DEBUG OUTPUT FUNCIONS */
234 #ifdef DEBUG
235 #define printfAllDebug(...) printfAllInternal(__VA_ARGS__)
236 #define printfRootDebug(...) printfRootInternal(__VA_ARGS__)
237 #define printfDebug(...) printf(__VA_ARGS__)
238 void printfAllInternal( const char* format, ... );
239 #else
240 #define printfAllDebug(...)
241 #define printfRootDebug(...)
242 #define printfDebug(...)
243 #endif
244 
245 
249 int main(int argc, char ** argv){
250 
251  /* Initialize hitmap communication */
252  hit_comInit(&argc,&argv);
253  printfRootDebug("Initialize Hitmap\n");
254 
255 
256  // Check that nprocs = 2^x
257  int j;
258  int sum = 0;
259  int nprocs = hit_NProcs;
260  for( j=0; (size_t)j < sizeof(int)*8 ; j++ ){
261  sum += (nprocs >> j) & 1;
262  }
263  if(sum != 1){
264  printfRoot("\nThe number of processors must be a power of two\n\n");
265  hit_comFinalize();
266  exit(EXIT_FAILURE);
267  }
268 
269 
270 
271 #ifdef DEBUG
272  printfRoot("DEBUG\n");
273 #endif
274 
276 
277  hit_clockStart(initClk);
278  hit_clockStart(setupClk);
279  /* Setup function */
280  setup();
281  hit_clockStop(setupClk);
282 
283 
284  /* Warming up iteration */
285  warming_up();
286 
287  /* Init u and v grids*/
288  zero3(block_u);
289  zran3(block_v);
290 
291  hit_clockStop(initClk);
292  hit_comBarrier(layout[k_max]);
293  hit_clockStart(processClk);
294 
295  /* Main loop */
296  int i;
297  for (i = 0; i < iterations; i++) {
298  printfRoot(" * ITERATION %d *\n",i);
299  resid(block_u, block_v, block_r[k_max], k_max);
300  apply_correction(block_u, block_r[k_max], k_max);
301  }
302  resid(block_u, block_v, block_r[k_max], k_max);
303 
304  /* Calculate L2 Norm */
305  double result = norm2u3(block_r[k_max],k_max);
306 
307  hit_clockStop(processClk);
308 
309  hit_clockReduce( layout[k_max], setupClk );
310  hit_clockReduce( layout[k_max], initClk );
311  hit_clockReduce( layout[k_max], processClk );
312 
313  /* Print results */
314  if( fabs( result - verify_value ) <= epsilon ){
315  printfRoot("VERIFICATION SUCCESSFUL\n");
316  printfRoot("\tL2 Norm is: %20.12e\n",result);
317  printfRoot("\tThe correct L2 Norm is %20.12e\n",verify_value);
318  printfRoot("\tError: %20.12e\n",result-verify_value);
319  } else {
320  printfRoot("VERIFICATION FAILED\n");
321  printfRoot("\tL2 Norm is: %20.12e\n",result);
322  printfRoot("\tThe correct L2 Norm is: %20.12e\n",verify_value);
323  printfRoot("\tError: %20.12e\n",result-verify_value);
324  }
325  printfRoot("Setup time: %12.3f\n",hit_clockGetMaxSeconds(setupClk));
326  printfRoot("Init time: %12.3f\n",hit_clockGetMaxSeconds(initClk));
327  printfRoot("Process time: %12.2f\n",hit_clockGetMaxSeconds(processClk));
328 
329  // Free the resources
330  free_resources();
331 
332  printfRootDebug("Finalize hitmap\n");
333  hit_comFinalize();
334 
335  return 0;
336 }
337 
338 
339 
343 void setup(){
344 
345  printfRootDebug(" = SETUP = \n");
346 
347  HitSig sig0, sig1, sig2;
348  HitShape blockEx;
349  int k, dim;
350 
351  /* 3D Array Topology */
352  topology = hit_topology(plug_topArray3D);
353 
354  /* Problem size at current level */
355  int current_size[3];
356  current_size[0] = problem_size[0];
357  current_size[1] = problem_size[1];
358  current_size[2] = problem_size[2];
359 
360  /* Loop through all levels */
361  for(k=k_num-1;k>=0;k--){
362 
363  printfRootDebug("* Allocating grids for level: %d\n",k);
364 
365  /* Create shape for global cubic grid */
366  sig0 = hit_sig(0,current_size[0]-1,1);
367  sig1 = hit_sig(0,current_size[1]-1,1);
368  sig2 = hit_sig(0,current_size[2]-1,1);
369  grid[k] = hit_shape(3,sig0,sig1,sig2);
370 
371  /* Layout for the k level */
372  layout[k] = hit_layout(plug_layBlocksL,topology,grid[k]);
373  hit_layWrapNeighbors(&(layout[k]));
374 
375  /* My block at level k */
376  block[k] = hit_layShape(layout[k]);
377 
378 #ifdef DEBUG
379  if(hit_shapeCmp(block[k],HIT_SHAPE_NULL)){
380  printfAllDebug(" I have: nothing\n");
381  } else{
382  printfAllDebug("I have: [%d-%d,%d-%d,%d-%d]\n",
383  hit_shapeSig(block[k],0).begin,
384  hit_shapeSig(block[k],0).end,
385  hit_shapeSig(block[k],1).begin,
386  hit_shapeSig(block[k],1).end,
387  hit_shapeSig(block[k],2).begin,
388  hit_shapeSig(block[k],2).end);
389  }
390 #endif
391 
392  /* Calculate the block with the borders */
393  blockEx = hit_shapeExpand(block[k],3,1);
394 
395  /* Allocates for the top level: u, v, r and z. */
396  if(k == k_max){
397 
398  printfRootDebug(" -> Allocating blocks u, v, r, z\n");
399 
400  hit_tileDomainShapeAlloc(&block_v,double,blockEx);
401 
402  hit_tileDomainShapeAlloc(&(block_r[k]),double,blockEx);
403  hit_tileDomainShapeAlloc(&(block_z[k]),double,blockEx);
404 #ifdef OPT_ZU
405  // OPT_ZU: Using this optimization, grid u and grid z are the same at max level
406  block_u = block_z[k];
407 #else
408  hit_tileDomainShapeAlloc(&block_u,double,blockEx);
409 #endif
410 
411  /* Allocates for other levels: r and z. */
412  } else {
413 
414  printfRootDebug(" -> Allocating blocks r, z\n");
415 
416  hit_tileDomainShapeAlloc(&(block_r[k]),double,blockEx);
417  hit_tileDomainShapeAlloc(&(block_z[k]),double,blockEx);
418  }
419 
420 
421  /* Communication patterns */
422  printfRootDebug("* Creating Comm Pattern for level: %d\n",k);
423  if(k == k_max)
424  pattern_v = hit_pattern( HIT_PAT_ORDERED );
425  pattern_r[k] = hit_pattern( HIT_PAT_ORDERED );
426  pattern_z[k] = hit_pattern( HIT_PAT_ORDERED );
427  pattern_z_ex[k] = hit_pattern( HIT_PAT_ORDERED );
428 
429  /* Border exchange */
430  for(dim=0;dim<3;dim++){
431 
432  /* Neighbor location */
433  HitRanks nbr_l = hit_layNeighbor(layout[k],dim,-1);
434  HitRanks nbr_r = hit_layNeighbor(layout[k],dim,+1);
435 
436  /* Shape for the face to Give and to Take */
437  HitShape faceGive_l, faceGive_r, faceTake_l, faceTake_r;
438 
439  faceGive_l = hit_shapeBorder(block[k],dim,HIT_SHAPE_BEGIN,0);
440  faceGive_r = hit_shapeBorder(block[k],dim,HIT_SHAPE_END,0);
441 
442  faceTake_l = hit_shapeBorder(block[k],dim,HIT_SHAPE_BEGIN,1);
443  faceTake_r = hit_shapeBorder(block[k],dim,HIT_SHAPE_END,1);
444 
445  faceGive_l = hit_shapeExpand(faceGive_l,dim,1);
446  faceGive_r = hit_shapeExpand(faceGive_r,dim,1);
447  faceTake_l = hit_shapeExpand(faceTake_l,dim,1);
448  faceTake_r = hit_shapeExpand(faceTake_r,dim,1);
449 
450  printfAllDebug("Face Give l, dim %d: [%d-%d,%d-%d,%d-%d]\n",dim,
451  hit_shapeSig(faceGive_l,0).begin,
452  hit_shapeSig(faceGive_l,0).end,
453  hit_shapeSig(faceGive_l,1).begin,
454  hit_shapeSig(faceGive_l,1).end,
455  hit_shapeSig(faceGive_l,2).begin,
456  hit_shapeSig(faceGive_l,2).end);
457 
458  printfAllDebug("Face Give r, dim %d: [%d-%d,%d-%d,%d-%d]\n",dim,
459  hit_shapeSig(faceGive_r,0).begin,
460  hit_shapeSig(faceGive_r,0).end,
461  hit_shapeSig(faceGive_r,1).begin,
462  hit_shapeSig(faceGive_r,1).end,
463  hit_shapeSig(faceGive_r,2).begin,
464  hit_shapeSig(faceGive_r,2).end);
465 
466  printfAllDebug("Face Take l, dim %d: [%d-%d,%d-%d,%d-%d]\n",dim,
467  hit_shapeSig(faceTake_l,0).begin,
468  hit_shapeSig(faceTake_l,0).end,
469  hit_shapeSig(faceTake_l,1).begin,
470  hit_shapeSig(faceTake_l,1).end,
471  hit_shapeSig(faceTake_l,2).begin,
472  hit_shapeSig(faceTake_l,2).end);
473 
474  printfAllDebug("Face Take r, dim %d: [%d-%d,%d-%d,%d-%d]\n",dim,
475  hit_shapeSig(faceTake_r,0).begin,
476  hit_shapeSig(faceTake_r,0).end,
477  hit_shapeSig(faceTake_r,1).begin,
478  hit_shapeSig(faceTake_r,1).end,
479  hit_shapeSig(faceTake_r,2).begin,
480  hit_shapeSig(faceTake_r,2).end);
481 
482  /* Default Communications Patterns */
483  hit_patternAdd( &pattern_r[k],
484  hit_comSendRecvSelectTag(layout[k], nbr_l, &block_r[k], faceGive_l, HIT_COM_ARRAYCOORDS, nbr_r, &block_r[k], faceTake_r, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(0)));
485  hit_patternAdd( &pattern_r[k],
486  hit_comSendRecvSelectTag(layout[k], nbr_r, &block_r[k], faceGive_r, HIT_COM_ARRAYCOORDS, nbr_l, &block_r[k], faceTake_l, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(1)));
487 
488  hit_patternAdd( &pattern_z[k],
489  hit_comSendRecvSelectTag(layout[k], nbr_l, &block_z[k], faceGive_l, HIT_COM_ARRAYCOORDS, nbr_r, &block_z[k], faceTake_r, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(0)));
490  hit_patternAdd( &pattern_z[k],
491  hit_comSendRecvSelectTag(layout[k], nbr_r, &block_z[k], faceGive_r, HIT_COM_ARRAYCOORDS, nbr_l, &block_z[k], faceTake_l, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(1)));
492 
493  if(k == k_max){
494  hit_patternAdd( &pattern_v,
495  hit_comSendRecvSelectTag(layout[k], nbr_l, &block_v, faceGive_l, HIT_COM_ARRAYCOORDS, nbr_r, &block_v, faceTake_r, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(0)));
496  hit_patternAdd( &pattern_v,
497  hit_comSendRecvSelectTag(layout[k], nbr_r, &block_v, faceGive_r, HIT_COM_ARRAYCOORDS, nbr_l, &block_v, faceTake_l, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(1)));
498 
499  } else {
500 
501  /* Neighbor when there are inactive processors */
502  HitRanks nbr_l_ex = hit_layNeighbor(layout[k+1],dim,-1);
503  HitRanks nbr_r_ex = hit_layNeighbor(layout[k+1],dim,+1);
504 
505  /* Neighbor shapes at this and next level */
506  HitShape shape = hit_layShapeOther(layout[k], nbr_l_ex);
507  HitShape shape2 = hit_layShapeOther(layout[k+1], nbr_l_ex);
508 
509 
510 #ifdef DEBUG
511  ALL_SEQUENTIAL_BEGIN(seq)
512 #endif
513 
514 
515 
516 
517  /* This processor has to GIVE data to rebuild a inactive processor */
518  if (hit_sigCard(hit_shapeSig(block[k+1],dim)) == 1 && hit_sigCard(hit_shapeSig(shape,dim)) == 0 && hit_sigCard(hit_shapeSig(shape2,dim)) == 1) {
519  printfDebug("p%d: GIVE, level %d, dim %d\n",HIT_TOPOLOGY_INFO.selfRank,k+1,dim);
520 
521  HitShape faceGive_l_ex = hit_shapeExpand(block[k+1],3,1);
522  HitShape faceGive_r_ex = hit_shapeExpand(block[k+1],3,1);
523 
524  hit_shapeSig(faceGive_l_ex,dim).end -= 1;
525  hit_shapeSig(faceGive_r_ex,dim).begin += 1;
526  hit_shapeSig(faceGive_r_ex,dim).end -= 1;
527 
528  printfDebug("Face Give l ex, dim %d: [%d-%d,%d-%d,%d-%d] Cards[%d,%d,%d]\n",dim,
529  hit_shapeSig(faceGive_l_ex,0).begin,
530  hit_shapeSig(faceGive_l_ex,0).end,
531  hit_shapeSig(faceGive_l_ex,1).begin,
532  hit_shapeSig(faceGive_l_ex,1).end,
533  hit_shapeSig(faceGive_l_ex,2).begin,
534  hit_shapeSig(faceGive_l_ex,2).end,
535  hit_sigCard(hit_shapeSig(faceGive_l_ex,0)),
536  hit_sigCard(hit_shapeSig(faceGive_l_ex,1)),
537  hit_sigCard(hit_shapeSig(faceGive_l_ex,2)));
538 
539  printfDebug("Face Give r ex, dim %d: [%d-%d,%d-%d,%d-%d] Cards[%d,%d,%d]\n",dim,
540  hit_shapeSig(faceGive_r_ex,0).begin,
541  hit_shapeSig(faceGive_r_ex,0).end,
542  hit_shapeSig(faceGive_r_ex,1).begin,
543  hit_shapeSig(faceGive_r_ex,1).end,
544  hit_shapeSig(faceGive_r_ex,2).begin,
545  hit_shapeSig(faceGive_r_ex,2).end,
546  hit_sigCard(hit_shapeSig(faceGive_r_ex,0)),
547  hit_sigCard(hit_shapeSig(faceGive_r_ex,1)),
548  hit_sigCard(hit_shapeSig(faceGive_r_ex,2)));
549 
550  hit_patternAdd( &pattern_z_ex[k+1],
551  hit_comSendSelectTag(layout[k+1], nbr_l_ex, &block_z[k+1], faceGive_l_ex, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(0)));
552  hit_patternAdd( &pattern_z_ex[k+1],
553  hit_comSendSelectTag(layout[k+1], nbr_r_ex, &block_z[k+1], faceGive_r_ex, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(1)));
554 
555 
556  }
557  /* This processor has to TAKE data to rebuild himself because it is inactive */
558  if (hit_sigCard(hit_shapeSig(block[k],dim)) == 0 && hit_sigCard(hit_shapeSig(shape,dim)) == 1 && hit_sigCard(hit_shapeSig(shape2,dim)) == 1) {
559  printfDebug("p%d: TAKE, level %d, dim %d\n",HIT_TOPOLOGY_INFO.selfRank,k+1,dim);
560 
561  HitShape faceTake_l_ex = hit_shapeExpand(block[k+1],3,1);
562  HitShape faceTake_r_ex = hit_shapeExpand(block[k+1],3,1);
563 
564  hit_shapeSig(faceTake_l_ex,dim).end -= 2;
565  hit_shapeSig(faceTake_r_ex,dim).begin += 1;
566 
567  printfDebug("Face Take l ex, dim %d: [%d-%d,%d-%d,%d-%d] Cards[%d,%d,%d]\n",dim,
568  hit_shapeSig(faceTake_l_ex,0).begin,
569  hit_shapeSig(faceTake_l_ex,0).end,
570  hit_shapeSig(faceTake_l_ex,1).begin,
571  hit_shapeSig(faceTake_l_ex,1).end,
572  hit_shapeSig(faceTake_l_ex,2).begin,
573  hit_shapeSig(faceTake_l_ex,2).end,
574  hit_sigCard(hit_shapeSig(faceTake_l_ex,0)),
575  hit_sigCard(hit_shapeSig(faceTake_l_ex,1)),
576  hit_sigCard(hit_shapeSig(faceTake_l_ex,2)));
577 
578  printfDebug("Face Take r ex, dim %d: [%d-%d,%d-%d,%d-%d] Cards[%d,%d,%d]\n",dim,
579  hit_shapeSig(faceTake_r_ex,0).begin,
580  hit_shapeSig(faceTake_r_ex,0).end,
581  hit_shapeSig(faceTake_r_ex,1).begin,
582  hit_shapeSig(faceTake_r_ex,1).end,
583  hit_shapeSig(faceTake_r_ex,2).begin,
584  hit_shapeSig(faceTake_r_ex,2).end,
585  hit_sigCard(hit_shapeSig(faceTake_r_ex,0)),
586  hit_sigCard(hit_shapeSig(faceTake_r_ex,1)),
587  hit_sigCard(hit_shapeSig(faceTake_r_ex,2)));
588 
589  hit_patternAdd( &pattern_z_ex[k+1],
590  hit_comRecvSelectTag(layout[k+1], nbr_l_ex, &block_z[k+1], faceTake_l_ex, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(1)));
591  hit_patternAdd( &pattern_z_ex[k+1],
592  hit_comRecvSelectTag(layout[k+1], nbr_r_ex, &block_z[k+1], faceTake_r_ex, HIT_COM_ARRAYCOORDS, HIT_DOUBLE,tag(0)));
593  }
594 #ifdef DEBUG
595  ALL_SEQUENTIAL_END(seq)
596 #endif
597 
598  }
599  }
600 
601  /* Problem size of the next level */
602  current_size[0] /= 2;
603  current_size[1] /= 2;
604  current_size[2] /= 2;
605  }
606 
607  /* Hit collective functions */
611 }
612 
613 
614 
619 void zero3(HitTile_double tile){
620 
621  double aux=0.0;
622  hit_tileFill( &tile, &aux );
623 
624 }
625 
626 
630 void zran3(HitTile_double matrix){
631 
632  printfRootDebug(" = ZRAN3 = \n");
633 
634 
635  int b1 = hit_tileArray2Tile(matrix,0,hit_shapeSig(block[k_max],0).begin);
636  int b2 = hit_tileArray2Tile(matrix,1,hit_shapeSig(block[k_max],1).begin);
637  int b3 = hit_tileArray2Tile(matrix,2,hit_shapeSig(block[k_max],2).begin);
638 
639  int n1 = hit_sigCard(hit_shapeSig(block[k_max],0));
640  int n2 = hit_sigCard(hit_shapeSig(block[k_max],1));
641  int n3 = hit_sigCard(hit_shapeSig(block[k_max],2));
642 
643  int e1 = b1 + n1;
644  int e2 = b2 + n2;
645  int e3 = b3 + n3;
646 
647 #undef m
648 #define m(a,b,c) hit_tileElemAtNoStride3(matrix,a,b,c)
649 
650 #define A0 pow(5.0,13)
651 #define x 314159265.e0
652 
653 
654  /*
655  * a1 = A0^{ dim grid z }
656  * a2 = A0^{ dim griz y * dim grid z }
657  */
658  double a1 = power( A0, hit_sigCard(hit_shapeSig(grid[k_max],2)) );
659  double a2 = power( A0, hit_sigCard(hit_shapeSig(grid[k_max],1)) * hit_sigCard(hit_shapeSig(grid[k_max],2)) );
660 
661  /* Clean the block */
662  zero3(matrix);
663 
664  /* First item of the block in a linear mode */
665  int i = hit_shapeSig(block[k_max],2).begin + hit_sigCard(hit_shapeSig(grid[k_max],2)) *
666  (hit_shapeSig(block[k_max],1).begin + hit_sigCard(hit_shapeSig(grid[k_max],1)) * (hit_shapeSig(block[k_max],0).begin ));
667 
668  printfAllDebug("My first item is: %d\n",i);
669 
670  // ai = a^i
671  double ai = power( A0, i );
672 
673  double x0 = x;
674  int rdummy = (int) randlc( &x0, ai );
675 
676  int i1,i2,i3;
677  for(i1=b1;i1<e1;i1++){
678 
679  double x1 = x0;
680  for(i2=b2;i2<e2;i2++){
681 
682  double xx = x1;
683  /* Fill the vector (i1,i2,b3) with n3 pseudo-random numbers */
684  vranlc(n3,&xx,A0,&m(i1,i2,b3));
685  rdummy = (int) randlc(&x1,a1);
686  }
687  rdummy = (int) randlc(&x0,a2);
688  }
689  // @arturo Ago 2015: Avoid a warning of variable initialized but not used
690  (void)rdummy;
691 
692  /* Arrays to keep the +1 and -1 values */
693  double ten[mm][2];
694  int ten_index[mm][2][3];
695 
696  /* Start ten and ten_index with 0 */
697  for(i=0;i<mm;i++){
698 
699  ten[i][1] = 0;
700  ten_index[i][1][0] = 0;
701  ten_index[i][1][1] = 0;
702  ten_index[i][1][2] = 0;
703 
704  ten[i][0] = 1;
705  ten_index[i][0][0] = 0;
706  ten_index[i][0][1] = 0;
707  ten_index[i][0][2] = 0;
708  }
709 
710  /* Find +1 and -1 values */
711  for (i1 = b1; i1 < e1; i1++) {
712  for (i2 = b2; i2 < e2; i2++) {
713  for (i3 = b3; i3 < e3; i3++) {
714 
715  if (m(i1,i2,i3) > ten[0][1]) {
716 
717  ten[0][1] = m(i1,i2,i3);
718  ten_index[0][1][0] = i1;
719  ten_index[0][1][1] = i2;
720  ten_index[0][1][2] = i3;
721 
722  bubble(ten, ten_index, 1);
723  }
724 
725  if (m(i1,i2,i3) < ten[0][0]) {
726 
727  ten[0][0] = m(i1,i2,i3);
728  ten_index[0][0][0] = i1;
729  ten_index[0][0][1] = i2;
730  ten_index[0][0][2] = i3;
731 
732  bubble(ten, ten_index, 0);
733  }
734  }
735  }
736  }
737 
738 
739 #ifdef DEBUG
740  char a_aux[1024];
741  char * aux;
742  aux = a_aux;
743  int j;
744  for(j=0;j<mm;j++){
745  aux += sprintf(aux,"[%d,%d,%d](%f) ",
746  hit_tileTile2Array(matrix,0,ten_index[j][1][0]),
747  hit_tileTile2Array(matrix,0,ten_index[j][1][1]),
748  hit_tileTile2Array(matrix,0,ten_index[j][1][2]),
749  m(ten_index[j][0][0],ten_index[j][0][1],ten_index[j][0][2]));
750  }
751  printfAllDebug("My -1 are: %s\n",a_aux);
752  aux = a_aux;
753  for(j=0;j<mm;j++){
754  aux += sprintf(aux,"[%d,%d,%d](%f) ",
755  hit_tileTile2Array(matrix,0,ten_index[j][1][0]),
756  hit_tileTile2Array(matrix,0,ten_index[j][1][1]),
757  hit_tileTile2Array(matrix,0,ten_index[j][1][2]),
758  m(ten_index[j][1][0],ten_index[j][1][1],ten_index[j][1][2]));
759  }
760  printfAllDebug("My +1 are: %s\n",a_aux);
761 #endif
762 
763 
764  i1 = mm-1;
765  int i0 = mm-1;
766  HitTile_double temp, best;
767 
768  hit_tileDomainAlloc(&temp, double, 1,1);
769  hit_tileDomainAlloc(&best, double, 1,1);
770  HitCom com_max = hit_comReduce(layout[k_max], HIT_RANKS_NULL, &best, &temp, HIT_DOUBLE, op_max);
771  HitCom com_min = hit_comReduce(layout[k_max], HIT_RANKS_NULL, &best, &temp, HIT_DOUBLE, op_min);
772 
773 
774  for (i = mm - 1; i >= 0; i--) {
775 
776  hit_tileElemAt(best,1,0) = m(ten_index[i1][1][0],ten_index[i1][1][1],ten_index[i1][1][2]);
777  hit_comDo(&com_max);
778  if (hit_tileElemAt(best,1,0) == hit_tileElemAt(temp,1,0)) {
779  i1--;
780  }
781  ten[i][1] = hit_tileElemAt(best,1,0);
782 
783  hit_tileElemAt(best,1,0) = m(ten_index[i0][0][0],ten_index[i0][0][1],ten_index[i0][0][2]);
784  hit_comDo(&com_min);
785  if (hit_tileElemAt(best,1,0) == hit_tileElemAt(temp,1,0)) {
786  i0--;
787  }
788  ten[i][0] = hit_tileElemAt(best,1,0);
789  }
790 
791  int m1 = i1 + 1;
792  int m0 = i0 + 1;
793 
794  zero3(matrix);
795 
796  for (i = mm - 1; i >= m1; i--)
797  m(ten_index[i][1][0],ten_index[i][1][1],ten_index[i][1][2]) = +1.0;
798  for (i = mm - 1; i >= m0; i--)
799  m(ten_index[i][0][0],ten_index[i][0][1],ten_index[i][0][2]) = -1.0;
800 
801 
802  printfAllDebug("I have %d +1s\n",mm-m1);
803  printfAllDebug("I have %d -1s\n",mm-m0);
804 
805  hit_tileFree(best);
806  hit_tileFree(temp);
807  hit_comFree(com_max);
808  hit_comFree(com_min);
809 
810  hit_patternDo(pattern_v);
811 
812 }
813 
814 
815 
821 double power(double a, int n){
822 
823  double aj;
824  int nj;
825  double rdummy;
826 
827  double powerv = 1.0;
828 
829  nj = n;
830  aj = a;
831 
832  while (nj != 0) {
833  if( (nj%2) == 1 ) rdummy = randlc( &powerv, aj );
834  rdummy = randlc( &aj, aj );
835  nj = nj/2;
836  }
837  // @arturo Ago 2015: Avoid a warning of variable initialized but not used
838  (void)rdummy;
839 
840  return powerv;
841 }
842 
843 
844 
845 
846 void bubble(double ten[mm][2], int ten_index[mm][2][3],int ind){
847 
848  double temp;
849  int i, temp_index;
850 
851  if(ind == 1){
852 
853  for(i=0;i<mm-1;i++){
854  if( ten[i][ind] > ten[i+1][ind] ){
855  swap(ten[i+1][ind],ten[i][ind],temp);
856  swap(ten_index[i+1][ind][0],ten_index[i][ind][0],temp_index);
857  swap(ten_index[i+1][ind][1],ten_index[i][ind][1],temp_index);
858  swap(ten_index[i+1][ind][2],ten_index[i][ind][2],temp_index);
859  } else {
860  return;
861  }
862  }
863 
864  } else {
865 
866  for(i=0;i<mm-1;i++){
867  if( ten[i][ind] < ten[i+1][ind] ) {
868  swap(ten[i+1][ind],ten[i][ind],temp);
869  swap(ten_index[i+1][ind][0],ten_index[i][ind][0],temp_index);
870  swap(ten_index[i+1][ind][1],ten_index[i][ind][1],temp_index);
871  swap(ten_index[i+1][ind][2],ten_index[i][ind][2],temp_index);
872 
873  } else {
874  return;
875  }
876  }
877 
878  }
879 
880 }
881 
882 
886 void resid(HitTile_double m_u, HitTile_double m_v, HitTile_double m_r, int k){
887 
888  printfRootDebug(" = RESID =, level %d \n",k);
889 
890  /* Note: I suppose that all the HitTile have the same shape */
891  int b1 = hit_tileArray2Tile(m_u,0,hit_shapeSig(block[k],0).begin);
892  int b2 = hit_tileArray2Tile(m_u,1,hit_shapeSig(block[k],1).begin);
893  int b3 = hit_tileArray2Tile(m_u,2,hit_shapeSig(block[k],2).begin);
894 
895  int n1 = hit_sigCard(hit_shapeSig(block[k],0));
896  int n2 = hit_sigCard(hit_shapeSig(block[k],1));
897  int n3 = hit_sigCard(hit_shapeSig(block[k],2));
898 
899  int e1 = b1 + n1;
900  int e2 = b2 + n2;
901  int e3 = b3 + n3;
902 
903 #undef u
904 #undef v
905 #undef r
906 #define u(a,b,c) hit_tileElemAtNoStride3(m_u,a,b,c)
907 #define v(a,b,c) hit_tileElemAtNoStride3(m_v,a,b,c)
908 #define r(a,b,c) hit_tileElemAtNoStride3(m_r,a,b,c)
909 
910 #ifdef OPT_RESID_BUFFER
911 
912 
913  double temp1[n3+2];
914  double temp2[n3+2];
915 
916 
917  int i1,i2,i3;
918 
919  for(i1=b1;i1<e1;i1++){
920  for(i2=b2;i2<e2;i2++){
921  for(i3=b3-1;i3<e3+1;i3++){
922  temp1[i3] = u(i1-1, i2, i3) + u(i1+1, i2, i3)
923  + u( i1,i2-1, i3) + u( i1,i2+1, i3);
924  temp2[i3] = u(i1-1,i2-1, i3) + u(i1+1,i2-1, i3)
925  + u(i1-1,i2+1, i3) + u(i1+1,i2+1, i3);
926  }
927  for(i3=b3;i3<e3;i3++){
928 
929  r(i1,i2,i3) = v(i1,i2,i3) - (
930  operatorA[0] * u(i1,i2,i3)
931 #ifndef OPT_OPERATOR_A
932  + operatorA[1] * (temp1[i3] + u(i1,i2,i3-1) + u(i1,i2,i3+1))
933 #endif
934  + operatorA[2] * (temp2[i3] + temp1[i3-1] + temp1[i3+1])
935  + operatorA[3] * (temp2[i3-1] + temp2[i3+1])
936  );
937 
938  }
939  }
940  }
941 
942 #else
943 
944  int i1,i2,i3;
945 
946  for(i1=1;i1<e1;i1++){
947  for(i2=1;i2<e2;i2++){
948  for(i3=1;i3<e3;i3++){
949 
950  r(i1,i2,i3) = v(i1,i2,i3) - (
951  + operatorA[0] * u(i1,i2,i3)
952 #ifndef OPT_OPERATOR_A
953  + operatorA[1] * ( u(i1-1, i2, i3) + u(i1+1, i2, i3)
954  + u( i1,i2-1, i3) + u( i1,i2+1, i3)
955  + u( i1, i2,i3-1) + u( i1, i2,i3+1))
956 #endif
957  + operatorA[2] * ( u(i1-1,i2-1, i3) + u(i1-1,i2+1, i3)
958  + u(i1-1, i2,i3-1) + u(i1-1, i2,i3+1)
959  + u(i1+1,i2-1, i3) + u(i1+1,i2+1, i3)
960  + u(i1+1, i2,i3-1) + u(i1+1, i2,i3+1)
961  + u( i1,i2-1,i3-1) + u( i1,i2-1,i3+1)
962  + u( i1,i2+1,i3-1) + u( i1,i2+1,i3+1))
963 
964  + operatorA[3] * ( u(i1-1,i2-1,i3-1) + u(i1-1,i2-1,i3+1)
965  + u(i1-1,i2+1,i3-1) + u(i1-1,i2+1,i3+1)
966  + u(i1+1,i2-1,i3-1) + u(i1+1,i2-1,i3+1)
967  + u(i1+1,i2+1,i3-1) + u(i1+1,i2+1,i3+1))
968  );
969  }
970  }
971  }
972 #endif
973 
974 
975  hit_patternDo( pattern_r[k]);
976 }
977 
978 
979 
980 
981 
982 void apply_correction(HitTile_double u, HitTile_double r, int k){
983 
984  printfRootDebug(" = APPLY CORRECTION =, level %d \n",k);
985 
986 
987 #ifdef OPT_ZU
988 
989 #define zu u
990 
991  HitTile_double r_1 = block_r[k - 1];
992  HitTile_double z_1 = block_z[k - 1];
993 
994  // Same V-cycle till level k-1
995  rprj3(r,r_1,k);
996 
997 
998  v_cycle(r_1,z_1,k-1);
999 
1000  // Not call zero at level k (zu = z_k+u)
1001  interp(z_1,zu,k-1);
1002 
1003  // resid and psinv as normal
1004  resid(zu,block_v,r,k);
1005  psinv(zu,r,k);
1006 
1007 #else
1008 
1009  HitTile_double z = block_z[k];
1010 
1011  // Apply the V-cycle to obtain z
1012  v_cycle(r,z,k);
1013 
1014  // Add the correction z to u
1015  add(u,z);
1016 
1017 #endif
1018 
1019 }
1020 
1021 
1022 /*
1023  * x += t
1024  */
1025 void add(HitTile_double m_x, HitTile_double m_y){
1026 
1027 #undef x
1028 #undef y
1029 #define x(a,b,c) hit_tileElemAtNoStride3(m_x,a,b,c)
1030 #define y(a,b,c) hit_tileElemAtNoStride3(m_y,a,b,c)
1031 
1032  int i1,i2,i3;
1033 
1034  for(i1=0;i1<hit_tileDimCard(m_x,0);i1++)
1035  for(i2=0;i2<hit_tileDimCard(m_x,1);i2++)
1036  for(i3=0;i3<hit_tileDimCard(m_x,2);i3++)
1037  x(i1,i2,i3) += y(i1,i2,i3);
1038 
1039 }
1040 
1041 
1042 
1043 void v_cycle(HitTile_double r, HitTile_double z, int k){
1044 
1045  HitTile_double r_1 = block_r[k-1];
1046  HitTile_double z_1 = block_z[k-1];
1047 
1048  if(k > 0){
1049 
1050  rprj3(r,r_1,k);
1051 
1052 
1053  v_cycle(r_1,z_1,k-1);
1054 
1055  zero3(z);
1056  interp(z_1,z,k-1);
1057 
1058  resid(z,r,r,k);
1059 
1060  psinv(z,r,k);
1061 
1062  } else {
1063 
1064  // z_1 = S r_1
1065  zero3(z);
1066  psinv(z,r,0);
1067  }
1068 }
1069 
1070 /* r_k-1 = P r_k */
1071 void rprj3(HitTile_double m_ro, HitTile_double m_r, int k){
1072 
1073  printfRootDebug(" = RPRJ3 =, level %d -> %d \n",k,k-1);
1074 
1075  //int b1o = hit_tileArray2Tile(m_ro,0,hit_shapeSig(block[k],0).begin);
1076  //int b2o = hit_tileArray2Tile(m_ro,1,hit_shapeSig(block[k],1).begin);
1077  //int b3o = hit_tileArray2Tile(m_ro,2,hit_shapeSig(block[k],2).begin);
1078 
1079  int n1o = hit_sigCard(hit_shapeSig(block[k],0));
1080  int n2o = hit_sigCard(hit_shapeSig(block[k],1));
1081  int n3o = hit_sigCard(hit_shapeSig(block[k],2));
1082 
1083  //int e1o = b1o + n1o;
1084  //int e2o = b2o + n2o;
1085  //int e3o = b3o + n3o;
1086 
1087  k--;
1088 
1089  int b1 = hit_tileArray2Tile(m_r,0,hit_shapeSig(block[k],0).begin);
1090  int b2 = hit_tileArray2Tile(m_r,1,hit_shapeSig(block[k],1).begin);
1091  int b3 = hit_tileArray2Tile(m_r,2,hit_shapeSig(block[k],2).begin);
1092 
1093  int n1 = hit_sigCard(hit_shapeSig(block[k],0));
1094  int n2 = hit_sigCard(hit_shapeSig(block[k],1));
1095  int n3 = hit_sigCard(hit_shapeSig(block[k],2));
1096 
1097 
1098  int e1 = b1 + n1;
1099  int e2 = b2 + n2;
1100  int e3 = b3 + n3;
1101 
1102 #undef ro
1103 #undef r
1104 #define ro(a,b,c) hit_tileElemAtNoStride3(m_ro,a,b,c)
1105 #define r(a,b,c) hit_tileElemAtNoStride3(m_r,a,b,c)
1106 
1107  /* Indices */
1108  int i1o,i2o,i3o;
1109  int i1,i2,i3;
1110 
1111  /* Offsets */
1112  int d1,d2,d3;
1113 
1114  if(n1o==1){
1115  d1 = 1;
1116  } else {
1117  d1 = 0;
1118  }
1119  if(n2o==1){
1120  d2 = 1;
1121  } else {
1122  d2 = 0;
1123  }
1124  if(n3o==1){
1125  d3 = 1;
1126  } else {
1127  d3 = 0;
1128  }
1129 
1130 
1131 
1132 
1133 #ifdef OPT_RPRJ3_BUFFER
1134 
1135 
1136  double temp_x1[n3o+2];
1137  double temp_y1[n3o+2];
1138 
1139  double x2, y2;
1140 
1141  for(i1=b1;i1<e1;i1++){
1142  i1o = i1 * 2 - d1;
1143  for(i2=b2;i2<e2;i2++){
1144  i2o = i2 * 2 - d2;
1145 
1146  for(i3=b3;i3<e3+1;i3++){
1147  i3o = i3 * 2 - d3;
1148  temp_x1[i3o-1] = ro( i1o,i2o-1,i3o-1) + ro( i1o,i2o+1,i3o-1)
1149  + ro(i1o-1, i2o,i3o-1) + ro(i1o+1, i2o,i3o-1);
1150  temp_y1[i3o-1] = ro(i1o-1,i2o-1,i3o-1) + ro(i1o+1,i2o-1,i3o-1)
1151  + ro(i1o-1,i2o+1,i3o-1) + ro(i1o+1,i2o+1,i3o-1);
1152  }
1153  for(i3=b3;i3<e3;i3++){
1154  i3o = i3 * 2 - d3;
1155 
1156  y2 = ro(i1o-1,i2o-1, i3o) + ro(i1o+1,i2o-1, i3o)
1157  + ro(i1o-1,i2o+1, i3o) + ro(i1o+1,i2o+1, i3o);
1158  x2 = ro( i1o,i2o-1, i3o) + ro(i1o, i2o+1, i3o)
1159  + ro(i1o-1, i2o, i3o) + ro(i1o+1, i2o, i3o);
1160 
1161  r(i1,i2,i3) =
1162  + operatorP[0] * ro(i1o,i2o,i3o)
1163  + operatorP[1] * (ro(i1o,i2o,i3o-1) + ro(i1o,i2o,i3o+1) +x2)
1164  + operatorP[2] * ( temp_x1[i3o-1] + temp_x1[i3o+1] + y2)
1165  + operatorP[3] * ( temp_y1[i3o-1] + temp_y1[i3o+1] );
1166  }
1167  }
1168  }
1169 
1170 
1171 #else
1172 
1173 
1174 for(i1=b1;i1<e1;i1++){
1175  i1o = i1 * 2 - d1;
1176 
1177  for(i2=b2;i2<e2;i2++){
1178  i2o = i2 * 2 - d2;
1179 
1180  for(i3=b3;i3<e3;i3++){
1181  i3o = i3 * 2 - d3;
1182 
1183  r(i1,i2,i3) = (
1184  + operatorP[0] * ro(i1o,i2o,i3o)
1185  + operatorP[1] *
1186  ( ro(i1o-1, i2o, i3o) + ro(i1o+1, i2o, i3o)
1187  + ro( i1o,i2o-1, i3o) + ro( i1o,i2o+1, i3o)
1188  + ro( i1o, i2o,i3o-1) + ro( i1o, i2o,i3o+1))
1189  + operatorP[2] *
1190  ( ro(i1o-1,i2o-1, i3o) + ro(i1o-1,i2o+1, i3o)
1191  + ro(i1o-1, i2o,i3o-1) + ro(i1o-1, i2o,i3o+1)
1192  + ro(i1o+1,i2o-1, i3o) + ro(i1o+1,i2o+1, i3o)
1193  + ro(i1o+1, i2o,i3o-1) + ro(i1o+1, i2o,i3o+1)
1194  + ro( i1o,i2o-1,i3o-1) + ro( i1o,i2o-1,i3o+1)
1195  + ro( i1o,i2o+1,i3o-1) + ro( i1o,i2o+1,i3o+1))
1196  + operatorP[3] *
1197  ( ro(i1o-1,i2o-1,i3o-1) + ro(i1o-1,i2o-1,i3o+1)
1198  + ro(i1o-1,i2o+1,i3o-1) + ro(i1o-1,i2o+1,i3o+1)
1199  + ro(i1o+1,i2o-1,i3o-1) + ro(i1o+1,i2o-1,i3o+1)
1200  + ro(i1o+1,i2o+1,i3o-1) + ro(i1o+1,i2o+1,i3o+1))
1201  );
1202 
1203  }
1204  }
1205 }
1206 
1207 
1208 
1209 #endif
1210 
1211 
1212  hit_patternDo(pattern_r[k]);
1213 
1214 
1215 }
1216 
1217 
1218 
1219 void interp(HitTile_double m_zo, HitTile_double m_z, int k){
1220 
1221 
1222  printfRootDebug(" = INTERP =, level %d -> %d \n",k,k+1);
1223 
1224  int b1o = hit_tileArray2Tile(m_zo,0,hit_shapeSig(block[k],0).begin);
1225  int b2o = hit_tileArray2Tile(m_zo,1,hit_shapeSig(block[k],1).begin);
1226  int b3o = hit_tileArray2Tile(m_zo,2,hit_shapeSig(block[k],2).begin);
1227 
1228  int n1o = hit_sigCard(hit_shapeSig(block[k],0));
1229  int n2o = hit_sigCard(hit_shapeSig(block[k],1));
1230  int n3o = hit_sigCard(hit_shapeSig(block[k],2));
1231 
1232  int e1o = b1o + n1o;
1233  int e2o = b2o + n2o;
1234  int e3o = b3o + n3o;
1235 
1236  k++;
1237 
1238  //int b1 = hit_tileArray2Tile(m_z,0,hit_shapeSig(block[k],0).begin);
1239  //int b2 = hit_tileArray2Tile(m_z,1,hit_shapeSig(block[k],1).begin);
1240  //int b3 = hit_tileArray2Tile(m_z,2,hit_shapeSig(block[k],2).begin);
1241 
1242  int n1 = hit_sigCard(hit_shapeSig(block[k],0));
1243  int n2 = hit_sigCard(hit_shapeSig(block[k],1));
1244  int n3 = hit_sigCard(hit_shapeSig(block[k],2));
1245 
1246  //int e1 = b1 + n1;
1247  //int e2 = b2 + n2;
1248  //int e3 = b3 + n3;
1249 
1250 
1251 
1252 # undef zo
1253 # undef z
1254 #define zo(a,b,c) hit_tileElemAtNoStride3(m_zo,a,b,c)
1255 #define z(a,b,c) hit_tileElemAtNoStride3(m_z,a,b,c)
1256 
1257  int i1o,i2o,i3o;
1258  int i1,i2,i3;
1259 
1260 
1261 
1262 // Inactive if
1263 if( layout[k-1].active ){
1264 
1265 
1266 #ifdef OPT_INTERP_BUFFER
1267 
1268 
1269  double temp1[n3o+2];
1270  double temp2[n3o+2];
1271  double temp3[n3o+2];
1272 
1273 
1274  if( n1o != 1 && n2o != 1 && n3o != 1 ){
1275 
1276  for(i1o=b1o-1;i1o<e1o;i1o++){
1277  i1 = i1o * 2;
1278 
1279  for(i2o=b2o-1;i2o<e2o;i2o++){
1280  i2 = i2o * 2;
1281 
1282  for(i3o=b3o-1;i3o<e3o+1;i3o++){
1283  temp1[i3o] = zo( i1o,i2o+1, i3o) + zo( i1o, i2o, i3o);
1284  temp2[i3o] = zo(i1o+1, i2o, i3o) + zo( i1o, i2o, i3o);
1285  temp3[i3o] = zo(i1o+1,i2o+1, i3o) + zo(i1o+1, i2o, i3o)
1286  + temp1[i3o];
1287  }
1288  for(i3o=b3o-1;i3o<e3o;i3o++){
1289  i3 = i3o * 2;
1290  z(i1,i2,i3) += operatorQ[0] * zo( i1o, i2o, i3o);
1291  z(i1,i2,i3+1) += operatorQ[1] *
1292  (zo( i1o, i2o,i3o+1) + zo( i1o, i2o, i3o));
1293  }
1294  for(i3o=b3o-1;i3o<e3o;i3o++){
1295  i3 = i3o * 2;
1296  z(i1,i2+1,i3) += operatorQ[1] * temp1[i3o];
1297  z(i1,i2+1,i3+1) += operatorQ[2] *
1298  ( temp1[i3o] + temp1[i3o+1 ] );
1299  }
1300  for(i3o=b3o-1;i3o<e3o;i3o++){
1301  i3 = i3o * 2;
1302  z(i1+1,i2,i3) += operatorQ[1] * temp2[i3o];
1303  z(i1+1,i2,i3+1) += operatorQ[2] *
1304  ( temp2[i3o] + temp2[i3o+1] );
1305  }
1306  for(i3o=b3o-1;i3o<e3o;i3o++){
1307  i3 = i3o * 2;
1308  z(i1+1,i2+1,i3) += operatorQ[2] * temp3[i3o];
1309  z(i1+1,i2+1,i3+1) += operatorQ[3] *
1310  ( temp3[i3o] + temp3[i3o+1]) ;
1311  }
1312  }
1313  }
1314 #else
1315 
1316 
1317 
1318  if( n1o != 1 && n2o != 1 && n3o != 1 ){
1319 
1320  for(i1o=b1o-1;i1o<e1o;i1o++){
1321  i1 = i1o * 2;
1322 
1323  for(i2o=b2o-1;i2o<e2o;i2o++){
1324  i2 = i2o * 2;
1325 
1326 
1327  for(i3o=b3o-1;i3o<e3o;i3o++){
1328  i3 = i3o * 2;
1329  z(i1,i2,i3) += operatorQ[0] * zo( i1o, i2o, i3o);
1330  z(i1,i2,i3+1) += operatorQ[1] *
1331  (zo( i1o, i2o,i3o+1) + zo( i1o, i2o, i3o));
1332  }
1333  for(i3o=b3o-1;i3o<e3o;i3o++){
1334  i3 = i3o * 2;
1335  z(i1,i2+1,i3) += operatorQ[1] *
1336  ( zo( i1o,i2o+1, i3o) + zo( i1o, i2o, i3o));
1337  z(i1,i2+1,i3+1) += operatorQ[2] *
1338  ( zo( i1o,i2o+1, i3o) + zo( i1o, i2o, i3o)
1339  + zo( i1o,i2o+1, i3o+1) + zo( i1o, i2o, i3o+1) );
1340  }
1341  for(i3o=b3o-1;i3o<e3o;i3o++){
1342  i3 = i3o * 2;
1343  z(i1+1,i2,i3) += operatorQ[1] *
1344  ( zo(i1o+1, i2o, i3o) + zo( i1o, i2o, i3o));
1345  z(i1+1,i2,i3+1) += operatorQ[2] *
1346  ( zo(i1o+1, i2o, i3o) + zo( i1o, i2o, i3o)
1347  + zo(i1o+1, i2o, i3o+1) + zo( i1o, i2o, i3o+1) );
1348  }
1349  for(i3o=b3o-1;i3o<e3o;i3o++){
1350  i3 = i3o * 2;
1351  z(i1+1,i2+1,i3) += operatorQ[2] *
1352  ( zo(i1o+1,i2o+1, i3o) + zo(i1o+1, i2o, i3o)
1353  + zo( i1o,i2o+1, i3o) + zo( i1o, i2o, i3o));
1354  z(i1+1,i2+1,i3+1) += operatorQ[3] *
1355  ( zo(i1o+1,i2o+1, i3o) + zo(i1o+1, i2o, i3o)
1356  + zo( i1o,i2o+1, i3o) + zo( i1o, i2o, i3o)
1357  + zo(i1o+1,i2o+1, i3o+1) + zo(i1o+1, i2o, i3o+1)
1358  + zo( i1o,i2o+1, i3o+1) + zo( i1o, i2o, i3o+1)) ;
1359  }
1360  }
1361  }
1362 
1363 
1364 #endif
1365 
1366 
1367  } else {
1368 
1369  // dx are the offset for the first element
1370  // tx for the second one
1371  int d1,d2,d3;
1372  int t1,t2,t3;
1373 
1374  if(n1==1){
1375  d1 = 1;
1376  t1 = 1;
1377  } else {
1378  d1 = 0;
1379  t1 = 0;
1380  }
1381  if(n2==1){
1382  d2 = 1;
1383  t2 = 1;
1384  } else {
1385  d2 = 0;
1386  t2 = 0;
1387  }
1388  if(n3==1){
1389  d3 = 1;
1390  t3 = 1;
1391  } else {
1392  d3 = 0;
1393  t3 = 0;
1394  }
1395 
1396  for(i1o=b1o-1+t1;i1o<e1o;i1o++){
1397  i1 = i1o * 2 - d1;
1398 
1399  for(i2o=b2o-1+t2;i2o<e2o;i2o++){
1400  i2 = i2o * 2 - d2;
1401 
1402  for(i3o=b3o-1+t3;i3o<e3o;i3o++){
1403  i3 = i3o * 2 - d3;
1404  z(i1,i2,i3) += operatorQ[0] * zo( i1o ,i2o, i3o);
1405  }
1406  for(i3o=b3o-1;i3o<e3o;i3o++){
1407  i3 = i3o * 2 - d3;
1408  z(i1,i2,i3+1) += operatorQ[1] *
1409  (zo( i1o, i2o,i3o+1) + zo( i1o, i2o, i3o));
1410  }
1411  }
1412  for(i2o=b2o-1;i2o<e2o;i2o++){
1413  i2 = i2o * 2 - d2;
1414 
1415  for(i3o=b3o-1+t3;i3o<e3o;i3o++){
1416  i3 = i3o * 2 - d3;
1417  z(i1,i2+1,i3) += operatorQ[1] *
1418  ( zo( i1o,i2o+1, i3o) + zo( i1o, i2o, i3o));
1419  }
1420  for(i3o=b3o-1;i3o<e3o;i3o++){
1421  i3 = i3o * 2 - d3;
1422  z(i1,i2+1,i3+1) += operatorQ[2] *
1423  ( zo( i1o,i2o+1,i3o+1) + zo( i1o, i2o,i3o+1)
1424  + zo( i1o,i2o+1, i3o) + zo( i1o, i2o, i3o));
1425  }
1426  }
1427  }
1428  for(i1o=b1o-1;i1o<e1o;i1o++){
1429  i1 = i1o * 2 - d1;
1430 
1431  for(i2o=b2o-1+t2;i2o<e2o;i2o++){
1432  i2 = i2o * 2 - d2;
1433 
1434  for(i3o=b3o-1+t3;i3o<e3o;i3o++){
1435  i3 = i3o * 2 - d3;
1436  z(i1+1,i2,i3) += operatorQ[1] *
1437  (zo(i1o+1, i2o, i3o) + zo( i1o, i2o, i3o));
1438  }
1439  for(i3o=b3o-1;i3o<e3o;i3o++){
1440  i3 = i3o * 2 - d3;
1441  z(i1+1,i2,i3+1) += operatorQ[2] *
1442  ( zo(i1o+1, i2o,i3o+1) + zo(i1o+1, i2o, i3o)
1443  + zo( i1o, i2o,i3o+1) + zo( i1o, i2o, i3o));
1444  }
1445  }
1446  for(i2o=b2o-1;i2o<e2o;i2o++){
1447  i2 = i2o * 2 - d2;
1448  for(i3o=b3o-1+t3;i3o<e3o;i3o++){
1449  i3 = i3o * 2 - d3;
1450  z(i1+1,i2+1,i3) += operatorQ[2] *
1451  ( zo(i1o+1,i2o+1, i3o) + zo(i1o+1, i2o, i3o)
1452  + zo( i1o,i2o+1, i3o) + zo( i1o, i2o, i3o));
1453  }
1454  for(i3o=b3o-1;i3o<e3o;i3o++){
1455  i3 = i3o * 2 - d3;
1456  z(i1+1,i2+1,i3+1) += operatorQ[3] *
1457  ( zo(i1o+1,i2o+1,i3o+1) + zo(i1o+1, i2o,i3o+1)
1458  + zo(i1o+1,i2o+1, i3o) + zo(i1o+1, i2o, i3o)
1459  + zo( i1o,i2o+1,i3o+1) + zo( i1o, i2o,i3o+1)
1460  + zo( i1o,i2o+1, i3o) + zo( i1o, i2o, i3o));
1461  }
1462  }
1463  }
1464  }
1465 
1466 
1467 // Inactive if
1468 }
1469 
1470  hit_patternDo( pattern_z_ex[k]);
1471 }
1472 
1473 
1474 
1475 void psinv(HitTile_double m_z, HitTile_double m_r, int k){
1476 
1477  printfRootDebug(" = PSINV =, level %d\n",k);
1478 
1479  int b1 = hit_tileArray2Tile(m_z,0,hit_shapeSig(block[k],0).begin);
1480  int b2 = hit_tileArray2Tile(m_z,1,hit_shapeSig(block[k],1).begin);
1481  int b3 = hit_tileArray2Tile(m_z,2,hit_shapeSig(block[k],2).begin);
1482 
1483  int n1 = hit_sigCard(hit_shapeSig(block[k],0));
1484  int n2 = hit_sigCard(hit_shapeSig(block[k],1));
1485  int n3 = hit_sigCard(hit_shapeSig(block[k],2));
1486 
1487  int e1 = b1 + n1;
1488  int e2 = b2 + n2;
1489  int e3 = b3 + n3;
1490 
1491 #undef z
1492 #undef r
1493 #define z(a,b,c) hit_tileElemAtNoStride3(m_z,a,b,c)
1494 #define r(a,b,c) hit_tileElemAtNoStride3(m_r,a,b,c)
1495 
1496  int i1,i2,i3;
1497 
1498  #ifdef OPT_PSINV_BUFFER
1499 
1500 
1501  double temp1[n3+2];
1502  double temp2[n3+2];
1503 
1504  for(i1=b1;i1<e1;i1++){
1505  for(i2=b2;i2<e2;i2++){
1506  for(i3=b3-1;i3<e3+1;i3++){
1507  temp1[i3] = r(i1-1, i2, i3) + r(i1+1, i2, i3)
1508  + r( i1,i2-1, i3) + r( i1,i2+1, i3);
1509  temp2[i3] = r(i1-1,i2-1, i3) + r(i1+1,i2-1, i3)
1510  + r(i1-1,i2+1, i3) + r(i1+1,i2+1, i3);
1511  }
1512  for(i3=b3;i3<e3;i3++){
1513 
1514  z(i1,i2,i3) = z(i1,i2,i3) + (
1515  operatorS[0] * r(i1,i2,i3)
1516  + operatorS[1] * (temp1[i3] + r(i1,i2,i3-1) + r(i1,i2,i3+1))
1517  + operatorS[2] * (temp2[i3] + temp1[i3-1] + temp1[i3+1])
1518  #ifndef OPT_OPERATOR_S
1519  + operatorS[3] * (temp2[i3-1] + temp2[i3+1])
1520  #endif
1521  );
1522  }
1523  }
1524  }
1525 
1526  #else
1527 
1528  for(i1=b1;i1<e1;i1++){
1529  for(i2=b2;i2<e2;i2++){
1530  for(i3=b3;i3<e3;i3++){
1531 
1532  z(i1,i2,i3) = z(i1,i2,i3) + (
1533  + operatorS[0] * r(i1,i2,i3)
1534  + operatorS[1] * ( r(i1-1, i2, i3) + r(i1+1, i2, i3)
1535  + r( i1,i2-1, i3) + r( i1,i2+1, i3)
1536  + r( i1, i2,i3-1) + r( i1, i2,i3+1))
1537  + operatorS[2] * ( r(i1-1,i2-1, i3) + r(i1-1,i2+1, i3)
1538  + r(i1-1, i2,i3-1) + r(i1-1, i2,i3+1)
1539  + r(i1+1,i2-1, i3) + r(i1+1,i2+1, i3)
1540  + r(i1+1, i2,i3-1) + r(i1+1, i2,i3+1)
1541  + r( i1,i2-1,i3-1) + r( i1,i2-1,i3+1)
1542  + r( i1,i2+1,i3-1) + r( i1,i2+1,i3+1))
1543  #ifndef OP_OPERATOR_S
1544  + operatorS[3] * ( r(i1-1,i2-1,i3-1) + r(i1-1,i2-1,i3+1)
1545  + r(i1-1,i2+1,i3-1) + r(i1-1,i2+1,i3+1)
1546  + r(i1+1,i2-1,i3-1) + r(i1+1,i2-1,i3+1)
1547  + r(i1+1,i2+1,i3-1) + r(i1+1,i2+1,i3+1))
1548  #endif
1549  );
1550  }
1551  }
1552  }
1553 
1554  #endif
1555 
1556  hit_patternDo( pattern_z[k]);
1557 }
1558 
1559 
1560 
1561 double norm2u3(HitTile_double m_r,int k){
1562 
1563  printfRootDebug(" = NORM2U3 =, level %d\n",k);
1564 
1565  int b1 = hit_tileArray2Tile(m_r,0,hit_shapeSig(block[k],0).begin);
1566  int b2 = hit_tileArray2Tile(m_r,1,hit_shapeSig(block[k],1).begin);
1567  int b3 = hit_tileArray2Tile(m_r,2,hit_shapeSig(block[k],2).begin);
1568 
1569  int n1 = hit_sigCard(hit_shapeSig(block[k],0));
1570  int n2 = hit_sigCard(hit_shapeSig(block[k],1));
1571  int n3 = hit_sigCard(hit_shapeSig(block[k],2));
1572 
1573  int e1 = b1 + n1;
1574  int e2 = b2 + n2;
1575  int e3 = b3 + n3;
1576 
1577 #undef r
1578 #define r(a,b,c) hit_tileElemAtNoStride3(m_r,a,b,c)
1579 
1580  HitTile_double sum, acum_sum;
1581  double res = 0.0;
1582  hit_tileDomainAlloc(&sum, double, 1,1);
1583  hit_tileDomainAlloc(&acum_sum, double, 1,1);
1584 
1585  int i3, i2, i1;
1586 
1587  hit_tileElemAt(sum,1,0) = 0;
1588  for(i1=b1;i1<e1;i1++){
1589  for(i2=b2;i2<e2;i2++){
1590  for(i3=b3;i3<e3;i3++){
1591  hit_tileElemAt(sum,1,0) += pow(r(i1,i2,i3),2);
1592  }
1593  }
1594  }
1595 
1596  HitRanks root = {{0,0,0,-1}};
1597  HitCom com_sum = hit_comReduce(layout[k_max], root, &sum, &acum_sum, HIT_DOUBLE, op_sum);
1598  hit_comDo(&com_sum);
1599  hit_comFree(com_sum);
1600 
1601  if( hit_Rank == 0 ){
1602  res = hit_tileElemAt(acum_sum,1,0) / (problem_size[0] * problem_size[1] * problem_size[2]);
1603  res = sqrt( res );
1604  }
1605 
1606  hit_tileFree(sum);
1607  hit_tileFree(acum_sum);
1608 
1609  return res;
1610 }
1611 
1615 void warming_up(){
1616 
1617  zero3(block_u);
1618  zran3(block_v);
1619  resid(block_u,block_v,block_r[k_max],k_max);
1620  apply_correction(block_u,block_r[k_max],k_max);
1621 }
1622 
1623 
1625 
1626  printfRootDebug(" = FREE RESOURCES = \n");
1627 
1628  int k;
1629 
1630  /* Loop through all levels */
1631  for (k = k_num - 1; k >= 0; k--) {
1632 
1633  /* Free Layout */
1634  hit_layFree( layout[k] );
1635 
1636  /* Free for the top level: u, v, r and z. */
1637  if (k == k_max) {
1638 
1639  printfRootDebug(" -> Freeing blocks u, v, r, z\n");
1640 
1643 
1644  hit_tileFree(block_r[k]);
1645 #ifndef OPT_ZU
1646  hit_tileFree(block_z[k]);
1647 #endif
1648 
1649  /* Allocates for other levels: r and z. */
1650  } else {
1651 
1652  printfRootDebug(" -> Freeing blocks r, z\n");
1653 
1654  hit_tileFree(block_r[k]);
1655  hit_tileFree(block_z[k]);
1656  }
1657 
1658  /* Communication patterns */
1659  printfRootDebug("* Freeing Comm Pattern for level: %d\n",k);
1660  if (k == k_max)
1661  hit_patternFree(&pattern_v);
1662  hit_patternFree(&pattern_r[k]);
1663  hit_patternFree(&pattern_z[k]);
1664  hit_patternFree(&pattern_z_ex[k]);
1665 
1666  }
1667 
1668  /* Free Hit collective functions */
1672 
1673  /* Free topology */
1674  hit_topFree( topology );
1675 }
1676 
1677 
1678 #ifdef DEBUG
1679 void printBlock(HitTile_double matrix, int k, int d){
1680 
1681  int b1 = hit_tileArray2Tile(matrix,0,hit_shapeSig(block[k],0).begin);
1682  int b2 = hit_tileArray2Tile(matrix,1,hit_shapeSig(block[k],1).begin);
1683  int b3 = hit_tileArray2Tile(matrix,2,hit_shapeSig(block[k],2).begin);
1684 
1685  int n1 = hit_sigCard(hit_shapeSig(block[k],0));
1686  int n2 = hit_sigCard(hit_shapeSig(block[k],1));
1687  int n3 = hit_sigCard(hit_shapeSig(block[k],2));
1688 
1689  int e1 = b1 + n1;
1690  int e2 = b2 + n2;
1691  int e3 = b3 + n3;
1692 
1693 #undef m
1694 #define m(a,b,c) hit_tileElemAtNoStride3(matrix,a,b,c)
1695 
1696  ALL_SEQUENTIAL_BEGIN(iii)
1697 
1698  if( !(n1==0 || n2 == 0 || n3 == 0)){
1699 
1701  int i3, i2, i1;
1702  printf("\n(%d) =============== Block ================ (%d,%d,%d)\n",HIT_TOPOLOGY_INFO.selfRank,n1,n2,n3);
1703 
1704  for(i1=b1-d;i1<e1+d;i1++){
1705  for(i2=b2-d;i2<e2+d;i2++){
1706  for(i3=b3-d;i3<e3+d;i3++){
1707  printf("%.2f\t ",m(i1,i2,i3)*100);
1708  }
1709  printf("\n");
1710  }
1711  printf("=======================================\n");
1712  }
1714  }
1715 
1716  ALL_SEQUENTIAL_END(iii)
1717 
1718 }
1719 
1720 
1721 void printfAllInternal( const char* format, ... ) {
1722 
1723  ALL_SEQUENTIAL_BEGIN(i)
1724 
1725  fprintf( stdout, "%2d: ",i);
1726  va_list args;
1727  va_start( args, format );
1728  vfprintf( stdout, format, args );
1729  va_end( args );
1730 
1731  ALL_SEQUENTIAL_END(i)
1732  }
1733 
1734 
1735 void fill3(HitTile_double matrix,int k){
1736 
1737  int b1 = hit_shapeSig(block[k],0).begin;
1738  int b2 = hit_shapeSig(block[k],1).begin;
1739  int b3 = hit_shapeSig(block[k],2).begin;
1740 
1741 
1742  int e1 = hit_shapeSig(block[k],0).end;
1743  int e2 = hit_shapeSig(block[k],1).end;
1744  int e3 = hit_shapeSig(block[k],2).end;
1745 
1746 
1747 #undef m
1748 #define m(a,b,c) hit_tileElemAtArrayCoords3(matrix,a,b,c)
1749 
1750  int i1,i2,i3;
1751 
1752  for(i1=b1;i1<=e1;i1++)
1753  for(i2=b2;i2<=e2;i2++)
1754  for(i3=b3;i3<=e3;i3++)
1755  m(i1,i2,i3) = (100 * (i1+1) + 10 * (i2+1) + (i3+1)) / 100.0;
1756 
1757 }
1758 
1759 #endif
1760 
void add(HitTile_double m_x, HitTile_double m_y)
Definition: mg.c:1025
int problem_size[3]
Definition: mg.c:85
#define hit_shape(nd,...)
Definition: hit_sshape.h:175
MPI_Op HitOp
Definition: hit_com.h:118
HitShape hit_shapeBorder(HitShape shape, int dim, int position, int offset)
Definition: hit_shape.c:241
HitShape block[k_num]
Definition: mg.c:156
void interp(HitTile_double m_zo, HitTile_double m_z, int k)
Definition: mg.c:1219
void resid(HitTile_double m_u, HitTile_double m_v, HitTile_double m_r, int k)
Definition: mg.c:886
#define hit_layShape(lay)
Definition: hit_layout.h:650
#define hit_tileNewType(baseType)
Definition: hit_tile.h:163
void warming_up()
Definition: mg.c:1615
void hit_comOpSumDouble(void *, void *, int *, HitType *)
Definition: hit_com.c:2665
HitOp op_sum
Definition: mg.c:176
#define hit_comOpFree(operation)
Definition: hit_com.h:1048
#define hit_clockSynchronizeAll()
Definition: hit_utils.h:55
#define hit_tileFill(var, value)
Definition: hit_tile.h:390
HitShape hit_shapeExpand(HitShape shape, int dims, int offset)
Definition: hit_shape.c:210
#define hit_Rank
Definition: hit_com.h:140
double verify_value
Definition: mg.c:86
#define zu
double norm2u3(HitTile_double m_r, int k)
Definition: mg.c:1561
#define hit_tileElemAt(var, ndims,...)
Definition: hit_tile.h:519
#define ro(a, b, c)
HitPTopology * HIT_TOPOLOGY_INFO
Definition: hit_topology.c:60
void hit_comFree(HitCom issue)
Definition: hit_com.c:1995
HitTile_double block_z[k_num]
Definition: mg.c:164
HitCom hit_comSendRecvSelectTag(HitLayout lay, HitRanks sendTo, const void *tilePSend, HitShape selectionSend, int modeSelectSend, HitRanks receiveFrom, const void *tilePRecv, HitShape selectionRecv, int modeSelectRecv, HitType baseType, int tag)
Definition: hit_com.c:450
void v_cycle(HitTile_double r, HitTile_double z, int k)
Definition: mg.c:1043
void hit_comDo(HitCom *issue)
Definition: hit_com.c:2408
void psinv(HitTile_double m_z, HitTile_double m_r, int k)
Definition: mg.c:1475
int hit_shapeCmp(HitShape sh1, HitShape sh2)
Definition: hit_shape.c:109
#define y(a, b, c)
void vranlc(int N, double *X, double A, Y)
Definition: c_randi8.c:26
#define hit_tileDomainAlloc(newVarP, baseType, numDims,...)
Definition: hit_tile.h:336
#define printfDebug(...)
Definition: mg.c:242
HitLayout layout[k_num]
Definition: mg.c:150
HitShape grid[k_num]
Definition: mg.c:159
#define z(a, b, c)
#define hit_topology(name,...)
Definition: hit_topology.h:308
#define hit_layShapeOther(lay, ranks)
Definition: hit_layout.h:732
HitRanks HIT_RANKS_NULL
Definition: hit_topology.c:68
#define hit_sigCard(sig)
Definition: hit_sig.h:162
Definition: hit_sig.h:79
#define printfRootDebug(...)
Definition: mg.c:241
void apply_correction(HitTile_double u, HitTile_double r, int k)
Definition: mg.c:982
#define hit_clockGetMaxSeconds(c)
Definition: hit_utils.h:191
#define mm
Definition: mg.c:146
#define r(a, b, c)
HitTile_double block_r[k_num]
Definition: mg.c:165
#define k_num
Definition: mg.c:118
void bubble(double ten[mm][2], int ten_index[mm][2][3], int ind)
Definition: mg.c:846
#define epsilon
Definition: mg.c:128
#define HIT_COM_ARRAYCOORDS
Definition: hit_com.h:345
#define u(a, b, c)
void hit_patternAdd(HitPattern *pattern, HitCom comm)
Definition: hit_pattern.c:61
void setup()
Definition: mg.c:343
#define hit_NProcs
Definition: hit_com.h:142
void zran3(HitTile_double matrix)
Definition: mg.c:630
#define hit_tileArray2Tile(var, dim, pos)
Definition: hit_tile.h:875
#define hit_tileDimCard(var, dim)
Definition: hit_tile.h:750
#define zo(a, b, c)
#define hit_tileDomainShapeAlloc(newVarP, baseType, shape)
Definition: hit_tile.h:354
HitOp op_max
Definition: mg.c:174
#define x
HitClock initClk
Definition: mg.c:180
double operatorS[4]
Definition: mg.c:137
void zero3(HitTile_double tile)
Definition: mg.c:619
#define A0
HitOp op_min
Definition: mg.c:175
void hit_topFree(HitTopology topo)
Definition: hit_topology.c:129
#define hit_comOp(function, operation)
Definition: hit_com.h:1036
double power(double a, int n)
Definition: mg.c:821
HitPattern pattern_v
Definition: mg.c:168
void rprj3(HitTile_double m_ro, HitTile_double m_r, int k)
Definition: mg.c:1071
void hit_layWrapNeighbors(HitLayout *lay)
Definition: hit_layout.c:1593
double operatorP[4]
Definition: mg.c:134
#define HIT_PAT_ORDERED
Definition: hit_pattern.h:79
void hit_patternFree(HitPattern *pattern)
Definition: hit_pattern.c:94
#define hit_tileTile2Array(var, dim, pos)
Definition: hit_tile.h:864
HitShape HIT_SHAPE_NULL
Definition: hit_shape.c:60
void hit_comInit(int *pargc, char **pargv[])
Definition: hit_com.c:111
HitShape shape
HitTile_double block_v
Definition: mg.c:163
void hit_comOpMaxDouble(void *, void *, int *, HitType *)
Definition: hit_com.c:2671
#define printfRoot(...)
Definition: mg.c:231
HitPattern pattern_r[k_num]
Definition: mg.c:169
HitPattern pattern_z[k_num]
Definition: mg.c:170
void hit_layFree(HitLayout lay)
Definition: hit_layout.c:2251
#define hit_clockStop(c)
Definition: hit_utils.h:109
int main(int argc, char *argv[])
Definition: cannonAsync.c:62
#define iterations
Definition: mg.c:119
#define m(a, b, c)
HitPattern pattern_z_ex[k_num]
Definition: mg.c:171
#define swap(a, b, c)
Definition: mg.c:184
#define hit_layNeighbor(lay, dim, shift)
Definition: hit_layout.h:722
#define hit_comReduce(lay, root, tilePSend, tilePRecv, baseType, operation)
Definition: hit_com.h:725
HitClock setupClk
Definition: mg.c:179
double operatorQ[4]
Definition: mg.c:143
HitTile_double block_u
Definition: mg.c:162
#define hit_comBarrier(lay)
Definition: hit_com.h:1027
double randlc(double *X, double *A)
Definition: c_randlc.c:40
#define v(a, b, c)
#define hit_layout(name, topo,...)
Definition: hit_layout.h:415
#define hit_shapeSig(shape, dim)
Definition: hit_sshape.h:400
#define hit_comSendSelectTag(lay, sendTo, tileP, selection, modeSelect, baseType, tag)
Definition: hit_com.h:419
#define hit_tileFree(var)
Definition: hit_tile.h:369
#define tag(dir)
Definition: mg.c:191
HitClock processClk
Definition: mg.c:181
double operatorA[4]
Definition: mg.c:132
#define HIT_DOUBLE
Definition: hit_com.h:78
HitTopology topology
Definition: is.c:194
void free_resources()
Definition: mg.c:1624
#define hit_patternDo(pattern)
Definition: hit_pattern.h:157
#define hit_comRecvSelectTag(lay, receiveFrom, tileP, selection, modeSelect, baseType, tag)
Definition: hit_com.h:466
void hit_comFinalize()
Definition: hit_com.c:159
#define HIT_SHAPE_END
Definition: hit_sshape.h:522
void hit_comOpMinDouble(void *, void *, int *, HitType *)
Definition: hit_com.c:2677
#define hit_clockReduce(lay, c)
Definition: hit_utils.h:139
#define k_max
Definition: mg.c:125
#define hit_clockStart(c)
Definition: hit_utils.h:87
#define printfAllDebug(...)
Definition: mg.c:240
#define HIT_SHAPE_BEGIN
Definition: hit_sshape.h:515