51 #define OPT_OPERATOR_A
52 #define OPT_OPERATOR_S
53 #define OPT_RESID_BUFFER
54 #define OPT_PSINV_BUFFER
55 #define OPT_RPRJ3_BUFFER
56 #define OPT_INTERP_BUFFER
98 # define iterations 20
105 # define iterations 20
112 # define iterations 50
119 # define iterations 50
125 #define k_max (k_num-1)
132 double operatorA[4] = {-8.0/3.0, 0.0, 1.0/6.0, 1.0/12.0};
134 double operatorP[4] = {1.0/2.0, 1.0/4.0, 1.0/8.0, 1.0/16.0};
135 #if CLASS == S || CLASS == A
137 double operatorS[4] = {-3.0/8.0, 1.0/32.0, -1.0/64.0, 0.0};
140 double operatorS[4] = {-3.0/17.0, 1.0/33.0, -1.0/61.0, 0.0};
184 #define swap(a,b,c){ \
191 #define tag(dir) (100 * (dir + 10*dim))
196 #define ALL_SEQUENTIAL_BEGIN(var) \
197 struct timespec var ## ts = {0,1000000}; \
200 for(var=-1;var<hit_NProcs+1;var++){ \
202 #define ALL_SEQUENTIAL_END(var) \
204 nanosleep(&(var ## ts),NULL); \
207 MPI_Barrier(*hit_Comm); \
215 void zero3(HitTile_double tile);
216 void zran3(HitTile_double matrix);
217 double power(
double a,
int n);
218 void bubble(
double ten[
mm][2],
int ten_index[mm][2][3],
int ind);
219 void resid(HitTile_double m_u, HitTile_double m_v, HitTile_double m_r,
int k);
221 void v_cycle(HitTile_double
r, HitTile_double
z,
int k);
222 void rprj3(HitTile_double m_ro, HitTile_double m_r,
int k);
223 void interp(HitTile_double m_zo, HitTile_double m_z,
int k);
224 void psinv(HitTile_double m_z, HitTile_double m_r,
int k);
225 double norm2u3(HitTile_double m_r,
int k);
227 void add(HitTile_double m_x, HitTile_double m_y);
230 #define printfRootInternal(...) { if( hit_Rank == 0 ) { printf(__VA_ARGS__); fflush(stdout); }}
231 #define printfRoot(...) printfRootInternal(__VA_ARGS__)
235 #define printfAllDebug(...) printfAllInternal(__VA_ARGS__)
236 #define printfRootDebug(...) printfRootInternal(__VA_ARGS__)
237 #define printfDebug(...) printf(__VA_ARGS__)
238 void printfAllInternal(
const char* format, ... );
240 #define printfAllDebug(...)
241 #define printfRootDebug(...)
242 #define printfDebug(...)
249 int main(
int argc,
char ** argv){
260 for( j=0; (size_t)j <
sizeof(
int)*8 ; j++ ){
261 sum += (nprocs >> j) & 1;
264 printfRoot(
"\nThe number of processors must be a power of two\n\n");
361 for(k=
k_num-1;k>=0;k--){
366 sig0 = hit_sig(0,current_size[0]-1,1);
367 sig1 = hit_sig(0,current_size[1]-1,1);
368 sig2 = hit_sig(0,current_size[2]-1,1);
372 layout[k] =
hit_layout(plug_layBlocksL,topology,grid[k]);
430 for(dim=0;dim<3;dim++){
437 HitShape faceGive_l, faceGive_r, faceTake_l, faceTake_r;
484 hit_comSendRecvSelectTag(layout[k], nbr_l, &
block_r[k], faceGive_l,
HIT_COM_ARRAYCOORDS, nbr_r, &
block_r[k], faceTake_r,
HIT_COM_ARRAYCOORDS,
HIT_DOUBLE,
tag(0)));
486 hit_comSendRecvSelectTag(layout[k], nbr_r, &
block_r[k], faceGive_r,
HIT_COM_ARRAYCOORDS, nbr_l, &
block_r[k], faceTake_l,
HIT_COM_ARRAYCOORDS,
HIT_DOUBLE,
tag(1)));
489 hit_comSendRecvSelectTag(layout[k], nbr_l, &
block_z[k], faceGive_l,
HIT_COM_ARRAYCOORDS, nbr_r, &
block_z[k], faceTake_r,
HIT_COM_ARRAYCOORDS,
HIT_DOUBLE,
tag(0)));
491 hit_comSendRecvSelectTag(layout[k], nbr_r, &
block_z[k], faceGive_r,
HIT_COM_ARRAYCOORDS, nbr_l, &
block_z[k], faceTake_l,
HIT_COM_ARRAYCOORDS,
HIT_DOUBLE,
tag(1)));
495 hit_comSendRecvSelectTag(layout[k], nbr_l, &
block_v, faceGive_l,
HIT_COM_ARRAYCOORDS, nbr_r, &
block_v, faceTake_r,
HIT_COM_ARRAYCOORDS,
HIT_DOUBLE,
tag(0)));
497 hit_comSendRecvSelectTag(layout[k], nbr_r, &
block_v, faceGive_r,
HIT_COM_ARRAYCOORDS, nbr_l, &
block_v, faceTake_l,
HIT_COM_ARRAYCOORDS,
HIT_DOUBLE,
tag(1)));
511 ALL_SEQUENTIAL_BEGIN(seq)
528 printfDebug(
"Face Give l ex, dim %d: [%d-%d,%d-%d,%d-%d] Cards[%d,%d,%d]\n",dim,
539 printfDebug(
"Face Give r ex, dim %d: [%d-%d,%d-%d,%d-%d] Cards[%d,%d,%d]\n",dim,
567 printfDebug(
"Face Take l ex, dim %d: [%d-%d,%d-%d,%d-%d] Cards[%d,%d,%d]\n",dim,
578 printfDebug(
"Face Take r ex, dim %d: [%d-%d,%d-%d,%d-%d] Cards[%d,%d,%d]\n",dim,
595 ALL_SEQUENTIAL_END(seq)
602 current_size[0] /= 2;
603 current_size[1] /= 2;
604 current_size[2] /= 2;
648 #define m(a,b,c) hit_tileElemAtNoStride3(matrix,a,b,c)
650 #define A0 pow(5.0,13)
651 #define x 314159265.e0
674 int rdummy = (int)
randlc( &x0, ai );
677 for(i1=b1;i1<e1;i1++){
680 for(i2=b2;i2<e2;i2++){
685 rdummy = (int)
randlc(&x1,a1);
687 rdummy = (int)
randlc(&x0,a2);
694 int ten_index[
mm][2][3];
700 ten_index[i][1][0] = 0;
701 ten_index[i][1][1] = 0;
702 ten_index[i][1][2] = 0;
705 ten_index[i][0][0] = 0;
706 ten_index[i][0][1] = 0;
707 ten_index[i][0][2] = 0;
711 for (i1 = b1; i1 < e1; i1++) {
712 for (i2 = b2; i2 < e2; i2++) {
713 for (i3 = b3; i3 < e3; i3++) {
715 if (
m(i1,i2,i3) > ten[0][1]) {
717 ten[0][1] =
m(i1,i2,i3);
718 ten_index[0][1][0] = i1;
719 ten_index[0][1][1] = i2;
720 ten_index[0][1][2] = i3;
722 bubble(ten, ten_index, 1);
725 if (
m(i1,i2,i3) < ten[0][0]) {
727 ten[0][0] =
m(i1,i2,i3);
728 ten_index[0][0][0] = i1;
729 ten_index[0][0][1] = i2;
730 ten_index[0][0][2] = i3;
732 bubble(ten, ten_index, 0);
745 aux += sprintf(aux,
"[%d,%d,%d](%f) ",
749 m(ten_index[j][0][0],ten_index[j][0][1],ten_index[j][0][2]));
754 aux += sprintf(aux,
"[%d,%d,%d](%f) ",
758 m(ten_index[j][1][0],ten_index[j][1][1],ten_index[j][1][2]));
766 HitTile_double temp, best;
774 for (i = mm - 1; i >= 0; i--) {
776 hit_tileElemAt(best,1,0) =
m(ten_index[i1][1][0],ten_index[i1][1][1],ten_index[i1][1][2]);
783 hit_tileElemAt(best,1,0) =
m(ten_index[i0][0][0],ten_index[i0][0][1],ten_index[i0][0][2]);
796 for (i = mm - 1; i >= m1; i--)
797 m(ten_index[i][1][0],ten_index[i][1][1],ten_index[i][1][2]) = +1.0;
798 for (i = mm - 1; i >= m0; i--)
799 m(ten_index[i][0][0],ten_index[i][0][1],ten_index[i][0][2]) = -1.0;
833 if( (nj%2) == 1 ) rdummy =
randlc( &powerv, aj );
834 rdummy =
randlc( &aj, aj );
846 void bubble(
double ten[
mm][2],
int ten_index[mm][2][3],
int ind){
854 if( ten[i][ind] > ten[i+1][ind] ){
855 swap(ten[i+1][ind],ten[i][ind],temp);
856 swap(ten_index[i+1][ind][0],ten_index[i][ind][0],temp_index);
857 swap(ten_index[i+1][ind][1],ten_index[i][ind][1],temp_index);
858 swap(ten_index[i+1][ind][2],ten_index[i][ind][2],temp_index);
867 if( ten[i][ind] < ten[i+1][ind] ) {
868 swap(ten[i+1][ind],ten[i][ind],temp);
869 swap(ten_index[i+1][ind][0],ten_index[i][ind][0],temp_index);
870 swap(ten_index[i+1][ind][1],ten_index[i][ind][1],temp_index);
871 swap(ten_index[i+1][ind][2],ten_index[i][ind][2],temp_index);
886 void resid(HitTile_double m_u, HitTile_double m_v, HitTile_double m_r,
int k){
906 #define u(a,b,c) hit_tileElemAtNoStride3(m_u,a,b,c)
907 #define v(a,b,c) hit_tileElemAtNoStride3(m_v,a,b,c)
908 #define r(a,b,c) hit_tileElemAtNoStride3(m_r,a,b,c)
910 #ifdef OPT_RESID_BUFFER
919 for(i1=b1;i1<e1;i1++){
920 for(i2=b2;i2<e2;i2++){
921 for(i3=b3-1;i3<e3+1;i3++){
922 temp1[i3] =
u(i1-1, i2, i3) +
u(i1+1, i2, i3)
923 +
u( i1,i2-1, i3) +
u( i1,i2+1, i3);
924 temp2[i3] =
u(i1-1,i2-1, i3) +
u(i1+1,i2-1, i3)
925 +
u(i1-1,i2+1, i3) +
u(i1+1,i2+1, i3);
927 for(i3=b3;i3<e3;i3++){
929 r(i1,i2,i3) =
v(i1,i2,i3) - (
931 #ifndef OPT_OPERATOR_A
932 +
operatorA[1] * (temp1[i3] +
u(i1,i2,i3-1) +
u(i1,i2,i3+1))
934 +
operatorA[2] * (temp2[i3] + temp1[i3-1] + temp1[i3+1])
935 +
operatorA[3] * (temp2[i3-1] + temp2[i3+1])
946 for(i1=1;i1<e1;i1++){
947 for(i2=1;i2<e2;i2++){
948 for(i3=1;i3<e3;i3++){
950 r(i1,i2,i3) =
v(i1,i2,i3) - (
952 #ifndef OPT_OPERATOR_A
953 +
operatorA[1] * (
u(i1-1, i2, i3) +
u(i1+1, i2, i3)
954 +
u( i1,i2-1, i3) +
u( i1,i2+1, i3)
955 +
u( i1, i2,i3-1) +
u( i1, i2,i3+1))
957 +
operatorA[2] * (
u(i1-1,i2-1, i3) +
u(i1-1,i2+1, i3)
958 +
u(i1-1, i2,i3-1) +
u(i1-1, i2,i3+1)
959 +
u(i1+1,i2-1, i3) +
u(i1+1,i2+1, i3)
960 +
u(i1+1, i2,i3-1) +
u(i1+1, i2,i3+1)
961 +
u( i1,i2-1,i3-1) +
u( i1,i2-1,i3+1)
962 +
u( i1,i2+1,i3-1) +
u( i1,i2+1,i3+1))
964 +
operatorA[3] * (
u(i1-1,i2-1,i3-1) +
u(i1-1,i2-1,i3+1)
965 +
u(i1-1,i2+1,i3-1) +
u(i1-1,i2+1,i3+1)
966 +
u(i1+1,i2-1,i3-1) +
u(i1+1,i2-1,i3+1)
967 +
u(i1+1,i2+1,i3-1) +
u(i1+1,i2+1,i3+1))
991 HitTile_double r_1 =
block_r[k - 1];
992 HitTile_double z_1 =
block_z[k - 1];
1025 void add(HitTile_double m_x, HitTile_double m_y){
1029 #define x(a,b,c) hit_tileElemAtNoStride3(m_x,a,b,c)
1030 #define y(a,b,c) hit_tileElemAtNoStride3(m_y,a,b,c)
1037 x(i1,i2,i3) +=
y(i1,i2,i3);
1045 HitTile_double r_1 =
block_r[k-1];
1046 HitTile_double z_1 =
block_z[k-1];
1071 void rprj3(HitTile_double m_ro, HitTile_double m_r,
int k){
1104 #define ro(a,b,c) hit_tileElemAtNoStride3(m_ro,a,b,c)
1105 #define r(a,b,c) hit_tileElemAtNoStride3(m_r,a,b,c)
1133 #ifdef OPT_RPRJ3_BUFFER
1136 double temp_x1[n3o+2];
1137 double temp_y1[n3o+2];
1141 for(i1=b1;i1<e1;i1++){
1143 for(i2=b2;i2<e2;i2++){
1146 for(i3=b3;i3<e3+1;i3++){
1148 temp_x1[i3o-1] =
ro( i1o,i2o-1,i3o-1) +
ro( i1o,i2o+1,i3o-1)
1149 +
ro(i1o-1, i2o,i3o-1) +
ro(i1o+1, i2o,i3o-1);
1150 temp_y1[i3o-1] =
ro(i1o-1,i2o-1,i3o-1) +
ro(i1o+1,i2o-1,i3o-1)
1151 +
ro(i1o-1,i2o+1,i3o-1) +
ro(i1o+1,i2o+1,i3o-1);
1153 for(i3=b3;i3<e3;i3++){
1156 y2 =
ro(i1o-1,i2o-1, i3o) +
ro(i1o+1,i2o-1, i3o)
1157 +
ro(i1o-1,i2o+1, i3o) +
ro(i1o+1,i2o+1, i3o);
1158 x2 =
ro( i1o,i2o-1, i3o) +
ro(i1o, i2o+1, i3o)
1159 +
ro(i1o-1, i2o, i3o) +
ro(i1o+1, i2o, i3o);
1163 +
operatorP[1] * (
ro(i1o,i2o,i3o-1) +
ro(i1o,i2o,i3o+1) +x2)
1164 +
operatorP[2] * ( temp_x1[i3o-1] + temp_x1[i3o+1] + y2)
1165 +
operatorP[3] * ( temp_y1[i3o-1] + temp_y1[i3o+1] );
1174 for(i1=b1;i1<e1;i1++){
1177 for(i2=b2;i2<e2;i2++){
1180 for(i3=b3;i3<e3;i3++){
1186 (
ro(i1o-1, i2o, i3o) +
ro(i1o+1, i2o, i3o)
1187 +
ro( i1o,i2o-1, i3o) +
ro( i1o,i2o+1, i3o)
1188 +
ro( i1o, i2o,i3o-1) +
ro( i1o, i2o,i3o+1))
1190 (
ro(i1o-1,i2o-1, i3o) +
ro(i1o-1,i2o+1, i3o)
1191 +
ro(i1o-1, i2o,i3o-1) +
ro(i1o-1, i2o,i3o+1)
1192 +
ro(i1o+1,i2o-1, i3o) +
ro(i1o+1,i2o+1, i3o)
1193 +
ro(i1o+1, i2o,i3o-1) +
ro(i1o+1, i2o,i3o+1)
1194 +
ro( i1o,i2o-1,i3o-1) +
ro( i1o,i2o-1,i3o+1)
1195 +
ro( i1o,i2o+1,i3o-1) +
ro( i1o,i2o+1,i3o+1))
1197 (
ro(i1o-1,i2o-1,i3o-1) +
ro(i1o-1,i2o-1,i3o+1)
1198 +
ro(i1o-1,i2o+1,i3o-1) +
ro(i1o-1,i2o+1,i3o+1)
1199 +
ro(i1o+1,i2o-1,i3o-1) +
ro(i1o+1,i2o-1,i3o+1)
1200 +
ro(i1o+1,i2o+1,i3o-1) +
ro(i1o+1,i2o+1,i3o+1))
1219 void interp(HitTile_double m_zo, HitTile_double m_z,
int k){
1232 int e1o = b1o + n1o;
1233 int e2o = b2o + n2o;
1234 int e3o = b3o + n3o;
1254 #define zo(a,b,c) hit_tileElemAtNoStride3(m_zo,a,b,c)
1255 #define z(a,b,c) hit_tileElemAtNoStride3(m_z,a,b,c)
1263 if( layout[k-1].active ){
1266 #ifdef OPT_INTERP_BUFFER
1269 double temp1[n3o+2];
1270 double temp2[n3o+2];
1271 double temp3[n3o+2];
1274 if( n1o != 1 && n2o != 1 && n3o != 1 ){
1276 for(i1o=b1o-1;i1o<e1o;i1o++){
1279 for(i2o=b2o-1;i2o<e2o;i2o++){
1282 for(i3o=b3o-1;i3o<e3o+1;i3o++){
1283 temp1[i3o] =
zo( i1o,i2o+1, i3o) +
zo( i1o, i2o, i3o);
1284 temp2[i3o] =
zo(i1o+1, i2o, i3o) +
zo( i1o, i2o, i3o);
1285 temp3[i3o] =
zo(i1o+1,i2o+1, i3o) +
zo(i1o+1, i2o, i3o)
1288 for(i3o=b3o-1;i3o<e3o;i3o++){
1292 (
zo( i1o, i2o,i3o+1) +
zo( i1o, i2o, i3o));
1294 for(i3o=b3o-1;i3o<e3o;i3o++){
1298 ( temp1[i3o] + temp1[i3o+1 ] );
1300 for(i3o=b3o-1;i3o<e3o;i3o++){
1304 ( temp2[i3o] + temp2[i3o+1] );
1306 for(i3o=b3o-1;i3o<e3o;i3o++){
1308 z(i1+1,i2+1,i3) +=
operatorQ[2] * temp3[i3o];
1310 ( temp3[i3o] + temp3[i3o+1]) ;
1318 if( n1o != 1 && n2o != 1 && n3o != 1 ){
1320 for(i1o=b1o-1;i1o<e1o;i1o++){
1323 for(i2o=b2o-1;i2o<e2o;i2o++){
1327 for(i3o=b3o-1;i3o<e3o;i3o++){
1331 (
zo( i1o, i2o,i3o+1) +
zo( i1o, i2o, i3o));
1333 for(i3o=b3o-1;i3o<e3o;i3o++){
1336 (
zo( i1o,i2o+1, i3o) +
zo( i1o, i2o, i3o));
1338 (
zo( i1o,i2o+1, i3o) +
zo( i1o, i2o, i3o)
1339 +
zo( i1o,i2o+1, i3o+1) +
zo( i1o, i2o, i3o+1) );
1341 for(i3o=b3o-1;i3o<e3o;i3o++){
1344 (
zo(i1o+1, i2o, i3o) +
zo( i1o, i2o, i3o));
1346 (
zo(i1o+1, i2o, i3o) +
zo( i1o, i2o, i3o)
1347 +
zo(i1o+1, i2o, i3o+1) +
zo( i1o, i2o, i3o+1) );
1349 for(i3o=b3o-1;i3o<e3o;i3o++){
1352 (
zo(i1o+1,i2o+1, i3o) +
zo(i1o+1, i2o, i3o)
1353 +
zo( i1o,i2o+1, i3o) +
zo( i1o, i2o, i3o));
1355 (
zo(i1o+1,i2o+1, i3o) +
zo(i1o+1, i2o, i3o)
1356 +
zo( i1o,i2o+1, i3o) +
zo( i1o, i2o, i3o)
1357 +
zo(i1o+1,i2o+1, i3o+1) +
zo(i1o+1, i2o, i3o+1)
1358 +
zo( i1o,i2o+1, i3o+1) +
zo( i1o, i2o, i3o+1)) ;
1396 for(i1o=b1o-1+t1;i1o<e1o;i1o++){
1399 for(i2o=b2o-1+t2;i2o<e2o;i2o++){
1402 for(i3o=b3o-1+t3;i3o<e3o;i3o++){
1406 for(i3o=b3o-1;i3o<e3o;i3o++){
1409 (
zo( i1o, i2o,i3o+1) +
zo( i1o, i2o, i3o));
1412 for(i2o=b2o-1;i2o<e2o;i2o++){
1415 for(i3o=b3o-1+t3;i3o<e3o;i3o++){
1418 (
zo( i1o,i2o+1, i3o) +
zo( i1o, i2o, i3o));
1420 for(i3o=b3o-1;i3o<e3o;i3o++){
1423 (
zo( i1o,i2o+1,i3o+1) +
zo( i1o, i2o,i3o+1)
1424 +
zo( i1o,i2o+1, i3o) +
zo( i1o, i2o, i3o));
1428 for(i1o=b1o-1;i1o<e1o;i1o++){
1431 for(i2o=b2o-1+t2;i2o<e2o;i2o++){
1434 for(i3o=b3o-1+t3;i3o<e3o;i3o++){
1437 (
zo(i1o+1, i2o, i3o) +
zo( i1o, i2o, i3o));
1439 for(i3o=b3o-1;i3o<e3o;i3o++){
1442 (
zo(i1o+1, i2o,i3o+1) +
zo(i1o+1, i2o, i3o)
1443 +
zo( i1o, i2o,i3o+1) +
zo( i1o, i2o, i3o));
1446 for(i2o=b2o-1;i2o<e2o;i2o++){
1448 for(i3o=b3o-1+t3;i3o<e3o;i3o++){
1451 (
zo(i1o+1,i2o+1, i3o) +
zo(i1o+1, i2o, i3o)
1452 +
zo( i1o,i2o+1, i3o) +
zo( i1o, i2o, i3o));
1454 for(i3o=b3o-1;i3o<e3o;i3o++){
1457 (
zo(i1o+1,i2o+1,i3o+1) +
zo(i1o+1, i2o,i3o+1)
1458 +
zo(i1o+1,i2o+1, i3o) +
zo(i1o+1, i2o, i3o)
1459 +
zo( i1o,i2o+1,i3o+1) +
zo( i1o, i2o,i3o+1)
1460 +
zo( i1o,i2o+1, i3o) +
zo( i1o, i2o, i3o));
1475 void psinv(HitTile_double m_z, HitTile_double m_r,
int k){
1493 #define z(a,b,c) hit_tileElemAtNoStride3(m_z,a,b,c)
1494 #define r(a,b,c) hit_tileElemAtNoStride3(m_r,a,b,c)
1498 #ifdef OPT_PSINV_BUFFER
1504 for(i1=b1;i1<e1;i1++){
1505 for(i2=b2;i2<e2;i2++){
1506 for(i3=b3-1;i3<e3+1;i3++){
1507 temp1[i3] =
r(i1-1, i2, i3) +
r(i1+1, i2, i3)
1508 +
r( i1,i2-1, i3) +
r( i1,i2+1, i3);
1509 temp2[i3] =
r(i1-1,i2-1, i3) +
r(i1+1,i2-1, i3)
1510 +
r(i1-1,i2+1, i3) +
r(i1+1,i2+1, i3);
1512 for(i3=b3;i3<e3;i3++){
1514 z(i1,i2,i3) =
z(i1,i2,i3) + (
1516 +
operatorS[1] * (temp1[i3] +
r(i1,i2,i3-1) +
r(i1,i2,i3+1))
1517 +
operatorS[2] * (temp2[i3] + temp1[i3-1] + temp1[i3+1])
1518 #ifndef OPT_OPERATOR_S
1519 +
operatorS[3] * (temp2[i3-1] + temp2[i3+1])
1528 for(i1=b1;i1<e1;i1++){
1529 for(i2=b2;i2<e2;i2++){
1530 for(i3=b3;i3<e3;i3++){
1532 z(i1,i2,i3) =
z(i1,i2,i3) + (
1534 +
operatorS[1] * (
r(i1-1, i2, i3) +
r(i1+1, i2, i3)
1535 +
r( i1,i2-1, i3) +
r( i1,i2+1, i3)
1536 +
r( i1, i2,i3-1) +
r( i1, i2,i3+1))
1537 +
operatorS[2] * (
r(i1-1,i2-1, i3) +
r(i1-1,i2+1, i3)
1538 +
r(i1-1, i2,i3-1) +
r(i1-1, i2,i3+1)
1539 +
r(i1+1,i2-1, i3) +
r(i1+1,i2+1, i3)
1540 +
r(i1+1, i2,i3-1) +
r(i1+1, i2,i3+1)
1541 +
r( i1,i2-1,i3-1) +
r( i1,i2-1,i3+1)
1542 +
r( i1,i2+1,i3-1) +
r( i1,i2+1,i3+1))
1543 #ifndef OP_OPERATOR_S
1544 +
operatorS[3] * (
r(i1-1,i2-1,i3-1) +
r(i1-1,i2-1,i3+1)
1545 +
r(i1-1,i2+1,i3-1) +
r(i1-1,i2+1,i3+1)
1546 +
r(i1+1,i2-1,i3-1) +
r(i1+1,i2-1,i3+1)
1547 +
r(i1+1,i2+1,i3-1) +
r(i1+1,i2+1,i3+1))
1578 #define r(a,b,c) hit_tileElemAtNoStride3(m_r,a,b,c)
1580 HitTile_double sum, acum_sum;
1588 for(i1=b1;i1<e1;i1++){
1589 for(i2=b2;i2<e2;i2++){
1590 for(i3=b3;i3<e3;i3++){
1631 for (k =
k_num - 1; k >= 0; k--) {
1679 void printBlock(HitTile_double matrix,
int k,
int d){
1694 #define m(a,b,c) hit_tileElemAtNoStride3(matrix,a,b,c)
1696 ALL_SEQUENTIAL_BEGIN(iii)
1698 if( !(n1==0 || n2 == 0 || n3 == 0)){
1702 printf(
"\n(%d) =============== Block ================ (%d,%d,%d)\n",
HIT_TOPOLOGY_INFO.selfRank,n1,n2,n3);
1704 for(i1=b1-d;i1<e1+d;i1++){
1705 for(i2=b2-d;i2<e2+d;i2++){
1706 for(i3=b3-d;i3<e3+d;i3++){
1707 printf(
"%.2f\t ",
m(i1,i2,i3)*100);
1711 printf(
"=======================================\n");
1716 ALL_SEQUENTIAL_END(iii)
1721 void printfAllInternal(
const char* format, ... ) {
1723 ALL_SEQUENTIAL_BEGIN(i)
1725 fprintf( stdout, "%2d: ",i);
1727 va_start( args, format );
1728 vfprintf( stdout, format, args );
1731 ALL_SEQUENTIAL_END(i)
1735 void fill3(HitTile_double matrix,
int k){
1748 #define m(a,b,c) hit_tileElemAtArrayCoords3(matrix,a,b,c)
1752 for(i1=b1;i1<=e1;i1++)
1753 for(i2=b2;i2<=e2;i2++)
1754 for(i3=b3;i3<=e3;i3++)
1755 m(i1,i2,i3) = (100 * (i1+1) + 10 * (i2+1) + (i3+1)) / 100.0;
void add(HitTile_double m_x, HitTile_double m_y)
#define hit_shape(nd,...)
HitShape hit_shapeBorder(HitShape shape, int dim, int position, int offset)
void interp(HitTile_double m_zo, HitTile_double m_z, int k)
void resid(HitTile_double m_u, HitTile_double m_v, HitTile_double m_r, int k)
#define hit_layShape(lay)
#define hit_tileNewType(baseType)
void hit_comOpSumDouble(void *, void *, int *, HitType *)
#define hit_comOpFree(operation)
#define hit_clockSynchronizeAll()
#define hit_tileFill(var, value)
HitShape hit_shapeExpand(HitShape shape, int dims, int offset)
double norm2u3(HitTile_double m_r, int k)
#define hit_tileElemAt(var, ndims,...)
HitPTopology * HIT_TOPOLOGY_INFO
void hit_comFree(HitCom issue)
HitTile_double block_z[k_num]
HitCom hit_comSendRecvSelectTag(HitLayout lay, HitRanks sendTo, const void *tilePSend, HitShape selectionSend, int modeSelectSend, HitRanks receiveFrom, const void *tilePRecv, HitShape selectionRecv, int modeSelectRecv, HitType baseType, int tag)
void v_cycle(HitTile_double r, HitTile_double z, int k)
void hit_comDo(HitCom *issue)
void psinv(HitTile_double m_z, HitTile_double m_r, int k)
int hit_shapeCmp(HitShape sh1, HitShape sh2)
void vranlc(int N, double *X, double A, Y)
#define hit_tileDomainAlloc(newVarP, baseType, numDims,...)
#define hit_topology(name,...)
#define hit_layShapeOther(lay, ranks)
#define printfRootDebug(...)
void apply_correction(HitTile_double u, HitTile_double r, int k)
#define hit_clockGetMaxSeconds(c)
HitTile_double block_r[k_num]
void bubble(double ten[mm][2], int ten_index[mm][2][3], int ind)
#define HIT_COM_ARRAYCOORDS
void hit_patternAdd(HitPattern *pattern, HitCom comm)
void zran3(HitTile_double matrix)
#define hit_tileArray2Tile(var, dim, pos)
#define hit_tileDimCard(var, dim)
#define hit_tileDomainShapeAlloc(newVarP, baseType, shape)
void zero3(HitTile_double tile)
void hit_topFree(HitTopology topo)
#define hit_comOp(function, operation)
double power(double a, int n)
void rprj3(HitTile_double m_ro, HitTile_double m_r, int k)
void hit_layWrapNeighbors(HitLayout *lay)
void hit_patternFree(HitPattern *pattern)
#define hit_tileTile2Array(var, dim, pos)
void hit_comInit(int *pargc, char **pargv[])
void hit_comOpMaxDouble(void *, void *, int *, HitType *)
HitPattern pattern_r[k_num]
HitPattern pattern_z[k_num]
void hit_layFree(HitLayout lay)
int main(int argc, char *argv[])
HitPattern pattern_z_ex[k_num]
#define hit_layNeighbor(lay, dim, shift)
#define hit_comReduce(lay, root, tilePSend, tilePRecv, baseType, operation)
#define hit_comBarrier(lay)
double randlc(double *X, double *A)
#define hit_layout(name, topo,...)
#define hit_shapeSig(shape, dim)
#define hit_comSendSelectTag(lay, sendTo, tileP, selection, modeSelect, baseType, tag)
#define hit_tileFree(var)
#define hit_patternDo(pattern)
#define hit_comRecvSelectTag(lay, receiveFrom, tileP, selection, modeSelect, baseType, tag)
void hit_comOpMinDouble(void *, void *, int *, HitType *)
#define hit_clockReduce(lay, c)
#define hit_clockStart(c)
#define printfAllDebug(...)