Hitmap 1.3
 All Data Structures Namespaces Files Functions Variables Typedefs Friends Macros Groups Pages
refMPIluBack.c
Go to the documentation of this file.
1 /*
2 * refMPIluback.c
3 * MPI reference code manually developed and optimized
4 * Solving an equation system: LU factorization + Back Substitution
5 * Both algorithms use parallel block-cyclic data distribution
6 *
7 * C implementation of the correspondig SCALAPACK algorithms
8 *
9 * v1.1
10 * (c) 2010, Carlos de Blas Carton
11 * (c) 2015, Arturo Gonzalez-Escribano
12 */
13 
14 /*
15  * <license>
16  *
17  * Hitmap v1.2
18  *
19  * This software is provided to enhance knowledge and encourage progress in the scientific
20  * community. It should be used only for research and educational purposes. Any reproduction
21  * or use for commercial purpose, public redistribution, in source or binary forms, with or
22  * without modifications, is NOT ALLOWED without the previous authorization of the copyright
23  * holder. The origin of this software must not be misrepresented; you must not claim that you
24  * wrote the original software. If you use this software for any purpose (e.g. publication),
25  * a reference to the software package and the authors must be included.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" AND ANY
28  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
30  * THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36  *
37  * Copyright (c) 2007-2015, Trasgo Group, Universidad de Valladolid.
38  * All rights reserved.
39  *
40  * More information on http://trasgo.infor.uva.es/
41  *
42  * </license>
43 */
44 
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <time.h>
49 #include <sys/time.h>
50 #include <math.h>
51 #include <mpi.h>
52 #include <unistd.h>
53 
54 #define BC_OK 0
55 #define BC_ERROR -1
56 #define min(a,b) (((a)<(b))?(a):(b))
57 #define max(a,b) (((a)>(b))?(a):(b))
58 #define TRUE 1
59 #define FALSE 0
60 
61 typedef struct mat {
62  int dim1;
63  int dim2;
66  int blocksize;
67  int nblock1;
68  int nblock2;
69  int last1;
70  int last2;
71  int mydim1;
72  int mydim2;
73  int mynblock1;
74  int mynblock2;
75  int mylast1;
76  int mylast2;
77  double ** mat;
78 } MATRIX;
79 
80 int initialize(int*, char ***);
81 int freeAll();
82 int initializeMat(MATRIX *, int, int, int, int, int*, int*);
83 int initializeVec(MATRIX *, int, int, int*);
84 int freeMat (MATRIX *);
85 int randomMat(MATRIX *, int, double, double);
86 
87 int print(MATRIX *, const char *);
88 int printVec(MATRIX *, const char *);
89 
90 int initializeClock();
91 int stopClock();
92 int printClock();
93 
94 void factorizeMat(MATRIX *, int, int);
95 void solveMat(MATRIX *, MATRIX *, int);
96 
97 #define lastsize(iproc,nproc,nblock,lastsize,blocksize) ((iproc==((nblock-1)%nproc))?lastsize:blocksize)
98 #define ismine(coordb,coordp,numproc) ((coordb%numproc)==coordp)
99 
100 #define localMatrixBlockAt(m,i,j) m->mat[((i)/mydata.numproccol)*m->mynblock2+((j)/mydata.numprocrow)]
101 #define matrixBlockAt(m,i,j) m->mat[(i)*m->mynblock2+(j)]
102 
103 double mytime, ** buffer2, ** buffer;
105 MPI_Aint *addresses;
106 
107 MPI_Status estado;
108 MPI_Request recibo;
109 MPI_Datatype TBLOQUEM,TBLOQUEV, *types;
110 MPI_Op sumavec;
111 
112 //process information
113 typedef struct proc {
114  int myrow;
115  int mycolumn;
116  int mynumber;
117  int numproc;
120  MPI_Comm comfilas;
121  MPI_Comm comcolumnas;
122 } PROC;
123 
125 
126 //reduce operation
127 void vectorAdd (double *in, double *inout, int *len, MPI_Datatype* type) {
128  (void)len;
129  int i,size;
130  MPI_Type_size(*type,&size);
131  size/=(int)sizeof(double);
132  for(i=0;i<size;i++) inout[i]+=in[i];
133 }
134 
135 //clock utilities
137  mytime=MPI_Wtime();
138  return BC_OK;
139 }
140 
141 int stopClock() {
142  mytime = MPI_Wtime() - mytime;
143  return BC_OK;
144 }
145 
146 int printClock() {
147  double tpored;
148  MPI_Reduce(&mytime,&tpored,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
149  // @arturo 2015: Clock output format similar to other examples
150  // if (mydata.myrow==0 && mydata.mycolumn==0) {
151  if ( mydata.mynumber == 0 ) {
152  //printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\t%lf\n",mydata.numproc,mydata.numprocrow,mydata.numproccol,dimmat1,dimmat2,dimbloq1,dimbloq2,tpored);
153  printf("Clock_mainClock Max: %lf\n", tpored );
154  }
155  return BC_OK;
156 }
157 
158 //initialization
159 int initialize (int * argc, char ***argv) {
160 int low,high,product;
161  MPI_Init(argc,argv);
162 
163  MPI_Comm_size(MPI_COMM_WORLD,&mydata.numproc);
164  MPI_Comm_rank(MPI_COMM_WORLD,&mydata.mynumber);
165 
166  low = high = (int)sqrt((double)mydata.numproc);
167  product = low*high;
168  /* MOVE UP/DOWN TO SEARCH THE NEAREST FACTORS */
169  while ( product != mydata.numproc ) {
170  if (product < mydata.numproc) {
171  high++;
172  product += low;
173  }
174  else {
175  low--;
176  if (low==1) high=product=mydata.numproc;
177  else product -= high;
178  }
179  }
180  mydata.numproccol=high;
181  mydata.numprocrow=low;
182 
183  return BC_OK;
184 }
185 
186 int numroc(int iproc, int nproc, int nblock, int blocksize, int last) {
187 //computes the number of rows or columns in the local part of a matrix from a processor
188  int res,bloqextra;
189  res=((nblock-1)/nproc)*blocksize;
190  bloqextra=(nblock-1)%nproc;
191  if(iproc<bloqextra){
192  res+=blocksize;
193  }
194  else if(iproc==bloqextra){
195  if (last==0) return res+blocksize;
196  else res+=last;
197  }
198  return res;
199 }
200 
201 int numbroc(int iproc, int nproc, int nblock) {
202 //computes the number of rows or columns of blocks in the local part of a matrix from a processor
203  int res,bloqextra;
204  res=(nblock/nproc);
205  bloqextra=nblock%nproc;
206  if(iproc<bloqextra){
207  res++;
208  }
209  return res;
210 }
211 
212 //resources release
213 int freeAll() {
214  int i;
215  MPI_Comm_free(&mydata.comfilas);
216  MPI_Comm_free(&mydata.comcolumnas);
217  MPI_Type_free(&TBLOQUEV);
218  MPI_Type_free(&TBLOQUEM);
219  MPI_Op_free(&sumavec);
220  for(i=0;i<numbroc(mydata.myrow,mydata.numproccol,(dimmat1-1)/dimbloq1+1);i++) free(buffer[i]);
221  for(i=0;i<numbroc(mydata.mycolumn,mydata.numprocrow,(dimmat2-1)/dimbloq2+1);i++) free(buffer2[i]);
222  return BC_OK;
223 }
224 
225 //matrix resources release
226 int freeMat (MATRIX * mat) {
227  int i,j;
228  for(i=0;i<mat->mynblock1;i++) for(j=0;j<mat->mynblock2;j++) free(mat->mat[i*mat->mynblock2+j]);
229  free(mat->mat);
230  return BC_OK;
231 }
232 
233 
234 int initializeMat(MATRIX * mat, int n1, int n2, int tb1, int tb2, int * nb1, int * nb2) {
235  //initializes matrix structure
236  int i,j;
237  dimmat1=n1;
238  dimmat2=n2;
239  dimbloq1=tb1;
240  dimbloq2=tb2;
241 
242  MPI_Comm_split(MPI_COMM_WORLD,mydata.mynumber%mydata.numproccol,mydata.mynumber,&mydata.comfilas);
243  MPI_Comm_split(MPI_COMM_WORLD,mydata.mynumber/mydata.numproccol,mydata.mynumber,&mydata.comcolumnas);
244 
245  MPI_Comm_rank(mydata.comfilas,&mydata.mycolumn);
246  MPI_Comm_rank(mydata.comcolumnas,&mydata.myrow);
247 
248  mat->dim1=n1;
249  mat->dim2=n2;
250  mat->blocksize1=tb1;
251  mat->blocksize2=tb2;
252  mat->blocksize=tb1*tb2;
253  mat->nblock1=(n1-1)/tb1 + 1; //ceil
254  mat->nblock2=(n2-1)/tb2 + 1; //ceil
255  if ((mat->last1 = (n1 % tb1))==0) mat->last1=tb1;
256  if ((mat->last2 = (n2 % tb2))==0) mat->last2=tb2;
257  if (nb1!=NULL) *nb1=mat->nblock1;
258  if (nb2!=NULL) *nb2=mat->nblock2;
259 
260  mat->mydim1=numroc(mydata.myrow,mydata.numproccol,mat->nblock1,mat->blocksize1,mat->last1);
261  mat->mydim2=numroc(mydata.mycolumn,mydata.numprocrow,mat->nblock2,mat->blocksize2,mat->last2);
262  mat->mynblock1=numbroc(mydata.myrow,mydata.numproccol,mat->nblock1);
263  mat->mynblock2=numbroc(mydata.mycolumn,mydata.numprocrow,mat->nblock2);
264  mat->mylast1=lastsize(mydata.myrow,mydata.numproccol,mat->nblock1,mat->last1,mat->blocksize1);
265  mat->mylast2=lastsize(mydata.mycolumn,mydata.numprocrow,mat->nblock2,mat->last2,mat->blocksize2);
266 
267  //we reserve space for entire blocks: padding with 0's
268  mat->mat=calloc((size_t)(mat->mynblock1*mat->mynblock2),sizeof(double *));
269 
270  for(i=0;i<mat->mynblock1;i++) {
271  for(j=0;j<mat->mynblock2;j++) {
272  matrixBlockAt(mat,i,j)=calloc((size_t)(mat->blocksize1*mat->blocksize2),sizeof(double));
273  }
274  }
275 
276  buffer=calloc((size_t)mat->mynblock1,sizeof(double*));
277  for(j=0;j<mat->mynblock1;j++) {
278  buffer[j]=calloc((size_t)(mat->blocksize1*mat->blocksize2),sizeof(double));
279  }
280 
281 
282  buffer2=calloc((size_t)mat->mynblock2,sizeof(double*));
283  for(j=0;j<mat->mynblock2;j++) {
284  buffer2[j]=calloc((size_t)(mat->blocksize1*mat->blocksize2),sizeof(double));
285  }
286 
287  MPI_Type_contiguous(tb1*tb2,MPI_DOUBLE,&TBLOQUEM);
288  MPI_Type_commit(&TBLOQUEM);
289 
290  sizes = calloc((size_t)max(mat->mynblock1,mat->mynblock2),sizeof(int));
291  addresses = calloc((size_t)max(mat->mynblock1,mat->mynblock2),sizeof(MPI_Aint));
292  types = calloc((size_t)max(mat->mynblock1,mat->mynblock2),sizeof(MPI_Datatype));
293  for(j=0;j<max(mat->mynblock1,mat->mynblock2);j++) {types[j]=TBLOQUEM; sizes[j]=1;}
294 
295  return BC_OK;
296 }
297 
298 int initializeVec (MATRIX * mat, int n, int tb, int * nb) {
299  //initializes vector structure
300  int i,j;
301  mat->dim1=1;
302  mat->dim2=n;
303  mat->blocksize1=1;
304  mat->blocksize2=tb;
305  mat->blocksize=tb;
306  mat->nblock1=1;
307  mat->nblock2=(n-1)/tb + 1; //ceil
308  mat->last1 = 1;
309  if ((mat->last2 = (n % tb))==0) mat->last2=tb;
310  if (nb!=NULL) *nb=mat->nblock1;
311 
312  mat->mydim1=numroc(mydata.myrow,mydata.numproccol,mat->nblock1,mat->blocksize1,mat->last1);
313  mat->mydim2=numroc(mydata.mycolumn,mydata.numprocrow,mat->nblock2,mat->blocksize2,mat->last2);
314  mat->mynblock1=numbroc(mydata.myrow,mydata.numproccol,mat->nblock1);
315  mat->mynblock2=numbroc(mydata.mycolumn,mydata.numprocrow,mat->nblock2);
316  mat->mylast1=lastsize(mydata.myrow,mydata.numproccol,mat->nblock1,mat->last1,mat->blocksize1);
317  mat->mylast2=lastsize(mydata.mycolumn,mydata.numprocrow,mat->nblock2,mat->last2,mat->blocksize2);
318 
319  //we reserve space for entire blocks: padding with 0's
320  mat->mat=calloc((size_t)(mat->mynblock1*mat->mynblock2),sizeof(double *));
321 
322  for(i=0;i<mat->mynblock1;i++) {
323  for(j=0;j<mat->mynblock2;j++) {
324  matrixBlockAt(mat,i,j)=calloc((size_t)(mat->blocksize1*mat->blocksize2),sizeof(double));
325  }
326  }
327 
328  MPI_Type_contiguous(tb,MPI_DOUBLE,&TBLOQUEV);
329  MPI_Type_commit(&TBLOQUEV);
330 
331  MPI_Op_create((MPI_User_function*)vectorAdd,1,&sumavec);
332 
333  return BC_OK;
334 }
335 
336 int randomMat(MATRIX * mat, int seed, double min, double max) {
337  //random initialization of a matrix using a seed
338  int i,j,k,l,tam1,tam2,mio;
339  double * punteroM;
340 
341  if (min>=max) return BC_ERROR;
342  srand48((long)seed);
343  for(i=0;i<mat->nblock1;i++) {
344  tam1=(i<mat->nblock1-1)?mat->blocksize1:mat->last1;
345  mio=ismine(i,mydata.myrow,mydata.numproccol);
346  for(k=0;k<tam1;k++) {
347  for(j=0;j<mat->nblock2;j++) {
348  tam2=(j<mat->nblock2-1)?mat->blocksize2:mat->last2;
349  if (ismine(j,mydata.mycolumn,mydata.numprocrow) && mio) {
350  punteroM=localMatrixBlockAt(mat,i,j);
351  for (l=0;l<tam2;l++) punteroM[k*mat->blocksize2+l]=drand48()*(max-min)+min;
352  }
353  else {
354  for (l=0;l<tam2;l++) drand48();
355  }
356  }
357  }
358  }
359  return BC_OK;
360 }
361 
362 int print(MATRIX * mat, const char * matrixName) {
363  //prints a matrix
364  int i,j,k,l,mio,tam1,tam2,offset1;
365  // int offset2;
366  double * buf,*punteroM;
367 
368  // @arturo 2015: Output compatible with check script
369  MPI_Barrier( MPI_COMM_WORLD );
370  FILE *f = fopen( matrixName, "w+" );
371  if ( f == NULL ) {
372  fprintf(stderr,"Error: Opening matrix file %s\n", matrixName );
373  MPI_Finalize();
374  exit( EXIT_FAILURE );
375  }
376  MPI_Barrier( MPI_COMM_WORLD );
377 
378  if (mydata.myrow==0 && mydata.mycolumn==0) {
379  // @arturo 2015: Output compatible with check script
380  // putc(10,stdout);
381  buf=calloc((size_t)mat->blocksize2,sizeof(double));
382 
383  for(i=0;i<mat->nblock1;i++) {
384  tam1=(i<mat->nblock1-1)?mat->blocksize1:mat->last1;
385  mio=ismine(i,0,mydata.numproccol);
386  offset1=i*mat->blocksize1;
387  for(k=0;k<tam1;k++) {
388  for(j=0;j<mat->nblock2;j++) {
389  tam2=(j<mat->nblock2-1)?mat->blocksize2:mat->last2;
390  //offset2=j*mat->blocksize2;
391  if (ismine(j,0,mydata.numprocrow) && mio) {
392  punteroM=localMatrixBlockAt(mat,i,j);
393  for (l=0;l<tam2;l++)
394  // @arturo 2015: Output compatible with check script
395  //fprintf(f,"%s(%d,%d)=%f\n",matrixName,offset1,offset2+l,punteroM[l+k*mat->blocksize2]);
396  fprintf(f,"%016.6lf\n",punteroM[l+k*mat->blocksize2]);
397  }
398  else {
399  MPI_Recv(buf,tam2,MPI_DOUBLE,i%mydata.numproccol+(j%mydata.numprocrow)*mydata.numproccol,MPI_ANY_TAG,MPI_COMM_WORLD,&estado);
400  for (l=0;l<tam2;l++)
401  // @arturo 2015: Output compatible with check script
402  //fprintf(f,"%s(%d,%d)=%f\n",matrixName,offset1,offset2+l,buf[l]);
403  fprintf(f,"%016.6lf\n",buf[l]);
404  }
405  }
406  offset1++;
407  // @arturo 2015: Output compatible with check script
408  //putc(10,stdout);
409  }
410  }
411  free(buf);
412  // @arturo 2015: Output compatible with check script
413  //fflush(stdout);
414  fclose(f);
415  }
416  else {
417  for(i=0;i<mat->nblock1;i++) {
418  tam1=(i<mat->nblock1-1)?mat->blocksize1:mat->last1;
419  mio=ismine(i,mydata.myrow,mydata.numproccol);
420  for(k=0;k<tam1;k++) {
421  for(j=0;j<mat->nblock2;j++) {
422  tam2=(j<mat->nblock2-1)?mat->blocksize2:mat->last2;
423  if (ismine(j,mydata.mycolumn,mydata.numprocrow) && mio) {
424  punteroM=localMatrixBlockAt(mat,i,j);
425  MPI_Send(&punteroM[k*mat->blocksize2],tam2,MPI_DOUBLE,0,0,MPI_COMM_WORLD);
426  }
427  }
428  }
429  }
430  }
431  MPI_Barrier(MPI_COMM_WORLD);
432  return BC_OK;
433 }
434 
435 
436 int printVec(MATRIX * mat, const char * matrixName) {
437  //prints a vector
438  int j,l,tam2;
439  // int offset2;
440  double * buf,*punteroM;
441 
442  // @arturo 2015: Output compatible with check script
443  MPI_Barrier( MPI_COMM_WORLD );
444  FILE *f = fopen( matrixName, "w+" );
445  if ( f == NULL ) {
446  fprintf(stderr,"Error: Opening vector file %s\n", matrixName );
447  MPI_Finalize();
448  exit( EXIT_FAILURE );
449  }
450  MPI_Barrier( MPI_COMM_WORLD );
451 
452  if (mydata.myrow==0 && mydata.mycolumn==0) {
453  // @arturo 2015: Output compatible with check script
454  //putc(10,stdout);
455  buf=calloc((size_t)mat->blocksize2,sizeof(double));
456 
457  for(j=0;j<mat->nblock2;j++) {
458  tam2=(j<mat->nblock2-1)?mat->blocksize2:mat->last2;
459  //offset2=j*mat->blocksize2;
460  if (ismine(j,0,mydata.numprocrow)) {
461  punteroM=localMatrixBlockAt(mat,0,j);
462  for (l=0;l<tam2;l++)
463  // @arturo 2015: Output compatible with check script
464  //fprintf(f,"%s(%d,%d)=%f\n\n",matrixName,offset2+l,0,punteroM[l]);
465  fprintf(f,"%016.6lf\n",punteroM[l]);
466  }
467  else {
468  MPI_Recv(buf,tam2,MPI_DOUBLE,(j%mydata.numprocrow)*mydata.numproccol,MPI_ANY_TAG,MPI_COMM_WORLD,&estado);
469  for (l=0;l<tam2;l++)
470  // @arturo 2015: Output compatible with check script
471  //fprintf(f,"%s(%d,%d)=%f\n\n",matrixName,offset2+l,0,buf[l]);
472  fprintf(f,"%016.6lf\n",buf[l]);
473  }
474  }
475  free(buf);
476  // @arturo 2015: Output compatible with check script
477  //fflush(stdout);
478  fclose(f);
479  }
480  else if (mydata.myrow==0) {
481  for(j=0;j<mat->nblock2;j++) {
482  tam2=(j<mat->nblock2-1)?mat->blocksize2:mat->last2;
483  if (ismine(j,mydata.mycolumn,mydata.numprocrow)) {
484  punteroM=localMatrixBlockAt(mat,0,j);
485  MPI_Send(punteroM,tam2,MPI_DOUBLE,0,0,MPI_COMM_WORLD);
486  }
487  }
488  }
489  MPI_Barrier(MPI_COMM_WORLD);
490  return BC_OK;
491 }
492 
493 void strsmu(int m, int n, double * a, double * b, int lda, int ldb) {
494 //solves AX=B / A MxM matrix, X and B MxN matrices
495 //A is upper triangular
496 //solution in B
497 //requires m,n>=0
498 
499  int j,k,i;
500  double temp;
501 
502  if (m==0 || n==0) return;
503 
504  for(k=m-1;k>=0;k--) {
505  for(i=m-1;i>k;i--) {
506  if((temp=-a[k*lda+i])!=0.0) {
507  for (j=0;j<n;j++) {
508 #ifdef DEBUG
509 printf("RUT: STRSMU, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
510 printf("%lf -= %lf*%lf = %lf\n",b[k*ldb+j],-temp,b[i*ldb+j],b[k*ldb+j]+temp*b[i*ldb+j]);
511 #endif
512  b[k*ldb+j]+=temp*b[i*ldb+j];
513  }
514  }
515  }
516  temp=a[k*lda+k];
517  for(j=0;j<n;j++) {
518 #ifdef DEBUG
519 printf("RUT: STRSMU, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
520 printf("%lf /= %lf = %lf\n",b[k*ldb+j],temp,b[k*ldb+j]/temp);
521 #endif
522  b[k*ldb+j]/=temp;
523  }
524  }
525 }
526 
527 void strsml(int m, int n, double * a, double * b, int lda, int ldb) {
528 //solves AX=B / A MxM matrix, X and B MxN matrices
529 //A is lower triangular with 1's in the diagonal
530 //solution in B
531 //requires m,n>=0
532 
533  int j,k,i;
534  double temp;
535 
536  if (m==0 || n==0) return;
537 
538  for(k=0;k<m;k++) {
539  for(i=0;i<k;i++) {
540  if((temp=-a[k*lda+i])!=0.0) {
541  for (j=0;j<n;j++) {
542 #ifdef DEBUG
543 printf("RUT: STRSML, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
544 printf("%lf -= %lf*%lf = %lf\n",b[k*ldb+j],-temp,b[i*ldb+j],b[k*ldb+j]+temp*b[i*ldb+j]);
545 #endif
546  b[k*ldb+j]+=temp*b[i*ldb+j];
547  }
548  }
549  }
550  }
551 }
552 
553 void strsml2(int m, int n, double * a, double * b, int lda, int ldb) {
554 //solves XA=B / A NxN matrix, X and B MxN matrices
555 //A is lower triangular with 1's in the diagonal
556 //solution in B
557 //requires m,n>=0
558 
559  int j,k,i;
560  double temp;
561 
562  if (m==0 || n==0) return;
563 
564  for(k=0;k<n;k++) {
565  for(j=0;j<m;j++) {
566  if (b[j*ldb+k]!=0.0) {
567 #ifdef DEBUG
568 printf("RUT: STRSML2, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
569 printf("%lf /= %lf = %lf\n",b[j*ldb+k],a[k*lda+k],b[j*ldb+k]/a[k*lda+k]);
570 #endif
571  b[j*ldb+k]/=a[k*lda+k];
572  temp=-b[j*ldb+k];
573  for(i=k+1;i<n;i++) {
574 #ifdef DEBUG
575 printf("RUT: STRSML2, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
576 printf("%lf -= %lf*%lf = %lf\n",b[j*ldb+i],-temp,a[k*lda+i],b[j*ldb+i]+temp*a[k*lda+i]);
577 #endif
578  b[j*ldb+i]+=temp*a[k*lda+i];
579  }
580  }
581  }
582  }
583 }
584 
585 void sgemm (int m, int n, int k, double * a, double * b, double * c, int lda, int ldb, int ldc) {
586 //solves C=C-AB / A MxK matrix, B KxN matrix and C MxN matrix
587 //requires m,n,k>=0
588  double temp;
589  int i,j,l;
590 
591  if (n==0 || m==0 || k==0) return;
592 
593  for(i=0;i<m;i++) {
594  for(l=0;l<k;l++) {
595  if((temp=-a[i*lda+l])!=0.0) {
596  for(j=0;j<n;j++) {
597 #ifdef DEBUG
598 printf("RUT: SGEMM, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
599 printf("%lf -= %lf*%lf = %lf\n",c[i*ldc+j],-temp,b[l*ldb+j],c[i*ldc+j]+temp*b[l*ldb+j]);
600 #endif
601  c[i*ldc+j]+=temp*b[l*ldb+j];
602  }
603  }
604  }
605  }
606 }
607 
609 
610 void factorizeMat(MATRIX * mat, int m, int n) {
611  //factorizes A in L*U
612 
613  int i,j,k,ii,jj,min,tam1,tam2,tam3,p1,p2,b1,b2,bb2,bb1,bini;
614  double *b, *c, temp, *buf;
615  min=min(m,n);
616  MPI_Datatype tbloqfil, tbloqfilrcv, tbloqcol, tbloqcolrcv, tcol;
617 
618  p1=0;
619  p2=0;
620  b1=0;
621  b2=0;
622 
623  buf=calloc((size_t)mat->blocksize1,sizeof(double));
624 
625  #define lda mat->blocksize2
626 
627  for(i=0;i<min;i++) {
628  bb1=(mydata.myrow>p1)?b1:b1+1;
629  bb2=(mydata.mycolumn>p2)?b2:b2+1;
630 
631  tam1=(i<mat->nblock1-1)?mat->blocksize1:mat->last1;
632  if(bb2<mat->mynblock2) {
633  if (b1<mat->mynblock1) {
634  for(j=bb2;j<mat->mynblock2;j++) {
635  MPI_Address(matrixBlockAt(mat,b1,j),&addresses[j]);
636  }
637  for(j=mat->mynblock2-1;j>=bb2;j--) {
638  addresses[j]-=addresses[bb2];
639  }
640  MPI_Type_create_hindexed(mat->mynblock2-bb2,sizes,&addresses[bb2],TBLOQUEM,&tbloqfil);
641  MPI_Type_commit(&tbloqfil);
642  }
643  for(j=bb2;j<mat->mynblock2;j++) {
644  MPI_Address(buffer2[j],&addresses[j]);
645  }
646  for(j=mat->mynblock2-1;j>=bb2;j--) {
647  addresses[j]-=addresses[bb2];
648  }
649  MPI_Type_create_hindexed(mat->mynblock2-bb2,sizes,&addresses[bb2],TBLOQUEM,&tbloqfilrcv);
650  MPI_Type_commit(&tbloqfilrcv);
651  }
652  if(bb1<mat->mynblock1) {
653  if (b2<mat->mynblock2) {
654 
655  for(j=bb1;j<mat->mynblock1;j++) {
656  MPI_Address(matrixBlockAt(mat,j,b2),&addresses[j]);
657  }
658  for(j=mat->mynblock1-1;j>=bb1;j--) {
659  addresses[j]-=addresses[bb1];
660  }
661  MPI_Type_create_hindexed(mat->mynblock1-bb1,sizes,&addresses[bb1],TBLOQUEM,&tbloqcol);
662  MPI_Type_commit(&tbloqcol);
663  }
664  for(j=bb1;j<mat->mynblock1;j++) {
665  MPI_Address(buffer[j],&addresses[j]);
666  }
667  for(j=mat->mynblock1-1;j>=bb1;j--) {
668  addresses[j]-=addresses[bb1];
669  }
670  MPI_Type_create_hindexed(mat->mynblock1-bb1,sizes,&addresses[bb1],TBLOQUEM,&tbloqcolrcv);
671  MPI_Type_commit(&tbloqcolrcv);
672  }
674  //process with diagonal block//
676  if (mydata.myrow==p1 && mydata.mycolumn==p2) {
677  //factorize diagonal block and blocks on its right
678  b=matrixBlockAt(mat,b1,b2);
679 
680  for(k=0;k<tam1-1;k++){
681  if (b[k*lda+k]==0.0) {
682  fprintf(stderr,"0.0 in position %d,%d\n",k+i*mat->blocksize1,k+i*mat->blocksize1);
683  }
684  for(ii=k+1;ii<tam1;ii++) {
685  //L multiplier
686  if(b[ii*lda+k]!=0.0) {
687 #ifdef DEBUG
688 printf("RUT: FACT, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
689 printf("%lf /= %lf = %lf\n",b[ii*lda+k],b[k*lda+k],b[ii*lda+k]/b[k*lda+k]);
690 #endif
691  b[ii*lda+k]/=b[k*lda+k];
692  }
693  }
694  MPI_Type_vector(tam1-k-1,1,lda,MPI_DOUBLE,&tcol);
695  MPI_Type_commit(&tcol);
696  MPI_Bcast(&b[(k+1)*lda+k],1,tcol,p2,mydata.comfilas);
697  //update trailing matrix
698  for(ii=k+1;ii<tam1;ii++) {
699  if((temp=-b[ii*lda+k])!=0.0) {
700  tam3=(b2<mat->nblock2-1)?mat->blocksize2:mat->last2;
701  for(jj=k+1;jj<tam3;jj++) {
702 #ifdef DEBUG
703 printf("RUT: FACT, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
704 printf("%lf -= %lf*%lf = %lf\n",b[ii*lda+jj],-temp,b[k*lda+jj],b[ii*lda+jj]+temp*b[k*lda+jj]);
705 #endif
706  b[ii*lda+jj]+=temp*b[k*lda+jj];
707  }
708  for(j=b2+1;j<mat->mynblock2;j++) {
709  tam3=(j<mat->mynblock2-1)?mat->blocksize2:mat->mylast2;
710  c=matrixBlockAt(mat,b1,j);
711  for(jj=0;jj<tam3;jj++) {
712 #ifdef DEBUG
713 printf("RUT: FACT, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
714 printf("%lf -= %lf*%lf = %lf\n",c[ii*lda+jj],-temp,c[k*lda+jj],c[ii*lda+jj]+temp*c[k*lda+jj]);
715 #endif
716  c[ii*lda+jj]+=temp*c[k*lda+jj];
717  }
718  }
719  }
720  }
721  MPI_Type_free(&tcol);
722  }
723  if (b[(tam1-1)*lda+tam1-1]==0.0) {
724  fprintf(stderr,"Hay un 0.0 en la posicion %d,%d\n",tam1-1+i*mat->blocksize1,tam1-1+i*mat->blocksize1);
725  }
726 
728 
729  //sending of diagonal block
730  MPI_Bcast(b,1,TBLOQUEM,p1,mydata.comcolumnas);
731 
732  //column blocks behind diagonal block
733  for(j=b1+1;j<mat->mynblock1;j++) {
734  //a(j,i)/=a(i,i)
735  tam2=(j<mat->mynblock1-1)?mat->blocksize1:mat->mylast1;
736  c=matrixBlockAt(mat,j,b2);
737  strsml2(tam2,tam1,b,c,mat->blocksize2,mat->blocksize2);
738  }
739 
741 
742  if(b2+1<mat->mynblock2) {
743  //sending of row blocks
744  MPI_Bcast(matrixBlockAt(mat,b1,b2+1),1,tbloqfil,p1,mydata.comcolumnas);
745  }
746  if(b1+1<mat->mynblock1) {
747  //sending of column blocks
748  MPI_Bcast(matrixBlockAt(mat,b1+1,b2),1,tbloqcol,p2,mydata.comfilas);
749  }
750 
751  //rest of blocks
752  //column blocks
753  for(j=b1+1;j<mat->mynblock1;j++) {
754  tam2=(j<mat->mynblock1-1)?mat->blocksize1:mat->mylast1;
755  c=matrixBlockAt(mat,j,b2);
756  //row blocks
757  for(k=b2+1;k<mat->mynblock2;k++) {
758  //a(j,k)-=a(j,i)*a(i,k)
759  tam3=(k<mat->mynblock2-1)?mat->blocksize2:mat->mylast2;
760  sgemm(tam2,tam3,tam1,c,matrixBlockAt(mat,b1,k),matrixBlockAt(mat,j,k),mat->blocksize2,mat->blocksize2,mat->blocksize2);
761  }
762  }
763  }
765  //processes with blocks below diagonal block//
767  else if (mydata.mycolumn==p2) {
768  //reception of diagonal blocks
769  MPI_Bcast(buffer2[b2],1,TBLOQUEM,p1,mydata.comcolumnas);
770 
771  //column blocks below diagonal block
772  b=buffer2[b2];
773  for(j=bb1;j<mat->mynblock1;j++) {
774  //a(j,i)/=a(i,i)
775  tam2=(j<mat->mynblock1-1)?mat->blocksize1:mat->mylast1;
776  strsml2(tam2,tam1,b,matrixBlockAt(mat,j,b2),mat->blocksize2,mat->blocksize2);
777  }
778 
780  //reception of row blocks
781  if (b2+1<mat->mynblock2) {
782  MPI_Bcast(buffer2[b2+1],1,tbloqfilrcv,p1,mydata.comcolumnas);
783  }
784 
785  //sending of column blocks
786  if(bb1<mat->mynblock1){
787  MPI_Bcast(matrixBlockAt(mat,bb1,b2),1,tbloqcol,p2,mydata.comfilas);
788  }
789 
790  //rest of blocks
791  //column blocks
792  for(j=bb1;j<mat->mynblock1;j++) {
793  tam2=(j<mat->mynblock1-1)?mat->blocksize1:mat->mylast1;
794  c=matrixBlockAt(mat,j,b2);
795  //row blocks
796  for(k=b2+1;k<mat->mynblock2;k++) {
797  //a(j,k)-=a(j,i)*a(i,k)
798  tam3=(k<mat->mynblock2-1)?mat->blocksize2:mat->mylast2;
799  sgemm(tam2,tam3,tam1,c,buffer2[k],matrixBlockAt(mat,j,k),mat->blocksize2,mat->blocksize2,mat->blocksize2);
800  }
801  }
802  }
804  //processes with blocks on the right of diagonal block//
806  else if (mydata.myrow==p1) {
807  bini=(mydata.mycolumn>p2)?b2:b2+1;
808  //factorize blocks on the right of diagonal block
809  for(k=0;k<tam1-1;k++) {
810  //reception of column
811  MPI_Type_contiguous(tam1-k-1,MPI_DOUBLE,&tcol);
812  MPI_Type_commit(&tcol);
813  MPI_Bcast(buf,1,tcol,p2,mydata.comfilas);
814  //update trailing matrix
815  for(ii=k+1;ii<tam1;ii++) {
816  //L multiplier
817  if((temp=-buf[ii-k-1])!=0.0) {
818  for(j=bini;j<mat->mynblock2;j++) {
819  c=matrixBlockAt(mat,b1,j);
820  tam3=(j<mat->mynblock2-1)?mat->blocksize2:mat->mylast2;
821  for(jj=0;jj<tam3;jj++) {
822 #ifdef DEBUG
823 printf("RUT: FACT, PROC %d;%d ",mydata.myrow,mydata.mycolumn);
824 printf("%lf -= %lf*%lf = %lf\n",c[ii*lda+jj],-temp,c[k*lda+jj],c[ii*lda+jj]+temp*c[k*lda+jj]);
825 #endif
826  c[ii*lda+jj]+=temp*c[k*lda+jj];
827  }
828  }
829  }
830  }
831  MPI_Type_free(&tcol);
832  }
833 
835  if(bb2<mat->mynblock2) {
836  //sending of row blocks
837  MPI_Bcast(matrixBlockAt(mat,b1,bb2),1,tbloqfil,p1,mydata.comcolumnas);
838  }
839  if(b1+1<mat->mynblock1) {
840  //reception of column blocks
841  MPI_Bcast(buffer[b1+1],1,tbloqcolrcv,p2,mydata.comfilas);
842  }
843  //rest of blocks
844  //column blocks
845  for(j=b1+1;j<mat->mynblock1;j++) {
846  tam2=(j<mat->mynblock1-1)?mat->blocksize1:mat->mylast1;
847  c=buffer[j];
848  //row blocks
849  for(k=bb2;k<mat->mynblock2;k++) {
850  //a(j,k)-=a(j,i)*a(i,k)
851  tam3=(k<mat->mynblock2-1)?mat->blocksize2:mat->mylast2;
852  sgemm(tam2,tam3,tam1,c,matrixBlockAt(mat,b1,k),matrixBlockAt(mat,j,k),mat->blocksize2,mat->blocksize2,mat->blocksize2);
853  }
854  }
855  }
857  //rest of processes//
859  else {
860  //reception of row blocks
861  if(bb2<mat->mynblock2) {
862  MPI_Bcast(buffer2[bb2],1,tbloqfilrcv,p1,mydata.comcolumnas);
863  }
864  if(bb1<mat->mynblock1) {
865  //reception of column blocks
866  MPI_Bcast(buffer[bb1],1,tbloqcolrcv,p2,mydata.comfilas);
867  }
868  //column blocks
869  for(j=bb1;j<mat->mynblock1;j++) {
870  tam2=(j<mat->mynblock1-1)?mat->blocksize1:mat->mylast1;
871  c=buffer[j];
872  //row blocks
873  for(k=bb2;k<mat->mynblock2;k++) {
874  //a(j,k)-=a(j,i)*a(i,k)
875  tam3=(k<mat->mynblock2-1)?mat->blocksize2:mat->mylast2;
876  sgemm(tam2,tam3,tam1,c,buffer2[k],matrixBlockAt(mat,j,k),mat->blocksize2,mat->blocksize2,mat->blocksize2);
877  }
878  }
879  }
880  if(bb2<mat->mynblock2) {
881  if (b1<mat->mynblock1) MPI_Type_free(&tbloqfil);
882  MPI_Type_free(&tbloqfilrcv);
883  }
884  if(bb1<mat->mynblock1) {
885  if (b2<mat->mynblock2) MPI_Type_free(&tbloqcol);
886  MPI_Type_free(&tbloqcolrcv);
887  }
888  p1=(p1+1)%mydata.numproccol;
889  p2=(p2+1)%mydata.numprocrow;
890  if (p1==0) b1++;
891  if (p2==0) b2++;
892  }
893  free(buf);
894 }
895 
896 void solveMat(MATRIX * mat, MATRIX * matb, int n) {
897  //solves the system Ax=B
898 
899  int p1,p2,b1,b2,bb2,i,j,tam1,tam2;
900  double *b,*c,*temp;
901  MPI_Datatype tbloqfil, tbloqfilrcv;
902 
903  p1=0;
904  p2=0;
905  b1=0;
906  b2=0;
907 
908  temp=calloc((size_t)matb->blocksize2,sizeof(double));
909 
910  //solves Ly=b
911  for(i=0;i<n;i++) {
912  //MPI type creation
913  tam1=(i<mat->nblock1-1)?mat->blocksize1:mat->last1;
914  bb2=(mydata.mycolumn>p2)?b2-1:b2;
915  if(bb2>=0) {
916  if (b1<mat->mynblock1) {
917  for(j=0;j<=bb2;j++) {
918  MPI_Address(matrixBlockAt(mat,b1,j),&addresses[j]);
919  }
920  for(j=bb2;j>=0;j--) {
921  addresses[j]-=addresses[0];
922  }
923  MPI_Type_create_hindexed(bb2+1,sizes,addresses,TBLOQUEM,&tbloqfil);
924  MPI_Type_commit(&tbloqfil);
925  }
926  for(j=0;j<=bb2;j++) {
927  MPI_Address(buffer2[j],&addresses[j]);
928  }
929  for(j=bb2;j>=0;j--) {
930  addresses[j]-=addresses[0];
931  }
932  MPI_Type_create_hindexed(bb2+1,sizes,addresses,TBLOQUEM,&tbloqfilrcv);
933  MPI_Type_commit(&tbloqfilrcv);
934  }
935 
937  //process with the i-th block of the vector//
939  if (mydata.myrow==0 && mydata.mycolumn==p2) {
940  b=matrixBlockAt(matb,0,b2);
941  if (p1==0) { //blocks of matrix are also mine
942  c=matrixBlockAt(mat,b1,b2);
943  for(j=b2-1;j>=0;j--) {
944  //b(i)-=b(j)*a(i,j)
945  sgemm(tam1,1,matb->blocksize2,matrixBlockAt(mat,b1,j),matrixBlockAt(matb,0,j),b,mat->blocksize2,1,1);
946  }
947  }
948  else {
949  //matrix blocks reception
950  if(b2>=0) {
951  MPI_Recv(buffer2[0],1,tbloqfilrcv,p1,i,mydata.comcolumnas,&estado);
952  }
953  c=buffer2[b2];
954  for(j=b2-1;j>=0;j--) {
955  //b(i)-=b(j)*a(i,j)
956  tam2=(j<matb->mynblock1-1)?matb->blocksize1:matb->mylast1;
957  sgemm(tam1,1,matb->blocksize2,buffer2[j],matrixBlockAt(matb,0,j),b,mat->blocksize2,1,1);
958  }
959  }
960  //Reduction of partial results
961  for(j=0;j<matb->blocksize2;j++) temp[j]=b[j];
962  MPI_Reduce(temp,b,1,TBLOQUEV,sumavec,p2,mydata.comfilas);
963  strsml(tam1,1,c,b,mat->blocksize2,1);
964  }
966  //processes with other blocks of the vector//
968  else if (mydata.myrow==0) { //mydata.mycolumn/=p2
969  //accumulator block initialized to 0
970  for(j=0;j<matb->blocksize2;j++) temp[j]=0.0;
971  if (p1==0) { //blocks of matrix are also mine
972  for(j=bb2;j>=0;j--) {
973  //b(i)-=b(j)*a(i,j)
974  sgemm(tam1,1,matb->blocksize2,matrixBlockAt(mat,b1,j),matrixBlockAt(matb,0,j),temp,mat->blocksize2,1,1);
975  }
976  }
977  else {
978  //matrix blocks reception
979  if(bb2>=0) {
980  MPI_Recv(buffer2[0],1,tbloqfilrcv,p1,i,mydata.comcolumnas,&estado);
981  }
982  for(j=bb2;j>=0;j--) {
983  //b(i)-=b(j)*a(i,j)
984  sgemm(tam1,1,matb->blocksize2,buffer2[j],matrixBlockAt(matb,0,j),temp,mat->blocksize2,1,1);
985  }
986  }
987  //sending of partial result
988  MPI_Reduce(temp,NULL,1,TBLOQUEV,sumavec,p2,mydata.comfilas);
989  }
991  //processs with blocks in the i-th block row of the matrix//
993  else if (mydata.myrow==p1) {
994  //p2!=0
995  if(bb2>=0) {
996  //block sending
997  MPI_Send(matrixBlockAt(mat,b1,0),1,tbloqfil,0,i,mydata.comcolumnas);
998  }
999  }
1000  if(bb2>=0) {
1001  if (b1<mat->mynblock1) MPI_Type_free(&tbloqfil);
1002  MPI_Type_free(&tbloqfilrcv);
1003  }
1004  p1=(p1+1)%mydata.numproccol;
1005  p2=(p2+1)%mydata.numprocrow;
1006  if (p1==0) b1++;
1007  if (p2==0) b2++;
1008  }
1009  p1=(p1==0)?(mydata.numproccol-1):(p1-1);
1010  p2=(p2==0)?(mydata.numprocrow-1):(p2-1);
1011  if (p1==mydata.numproccol-1) b1--;
1012  if (p2==mydata.numprocrow-1) b2--;
1013  //solves Ux=y
1014  for(i=n-1;i>=0;i--) {
1015  //MPI Type creaction
1016  bb2=(mydata.mycolumn>=p2)?b2:b2+1;
1017  tam1=(i<mat->nblock1-1)?mat->blocksize1:mat->last1;
1018  if(bb2<mat->mynblock2) {
1019  if (b1<mat->mynblock1) {
1020  for(j=bb2;j<mat->mynblock2;j++) {
1021  MPI_Address(matrixBlockAt(mat,b1,j),&addresses[j]);
1022  }
1023  for(j=mat->mynblock2-1;j>=bb2;j--) {
1024  addresses[j]-=addresses[bb2];
1025  }
1026  MPI_Type_create_hindexed(mat->mynblock2-bb2,sizes,&addresses[bb2],TBLOQUEM,&tbloqfil);
1027  MPI_Type_commit(&tbloqfil);
1028  }
1029  for(j=bb2;j<mat->mynblock2;j++) {
1030  MPI_Address(buffer2[j],&addresses[j]);
1031  }
1032  for(j=mat->mynblock2-1;j>=bb2;j--) {
1033  addresses[j]-=addresses[bb2];
1034  }
1035  MPI_Type_create_hindexed(mat->mynblock2-bb2,sizes,&addresses[bb2],TBLOQUEM,&tbloqfilrcv);
1036  MPI_Type_commit(&tbloqfilrcv);
1037  }
1038 
1040  //process with the i-th block of the vector//
1042  if (mydata.myrow==0 && mydata.mycolumn==p2) {
1043  b=matrixBlockAt(matb,0,b2);
1044  if (p1==0) { //blocks of matrix are also mine
1045  for(j=b2+1;j<matb->mynblock2;j++) {
1046  //b(i)-=b(j)*a(i,j)
1047  tam2=(j<matb->mynblock2-1)?matb->blocksize2:matb->mylast2;
1048  sgemm(tam1,1,tam2,matrixBlockAt(mat,b1,j),matrixBlockAt(matb,0,j),b,mat->blocksize2,1,1);
1049  }
1050  c=matrixBlockAt(mat,b1,b2);
1051  }
1052  else {
1053  //matrix blocks reception
1054  if(b2<matb->mynblock2) {
1055  MPI_Recv(buffer2[b2],1,tbloqfilrcv,p1,i,mydata.comcolumnas,&estado);
1056  }
1057  c=buffer2[b2];
1058  for(j=b2+1;j<matb->mynblock2;j++) {
1059  //b(i)-=b(j)*a(i,j)
1060  tam2=(j<matb->mynblock2-1)?matb->blocksize2:matb->mylast2;
1061  sgemm(tam1,1,tam2,buffer2[j],matrixBlockAt(matb,0,j),b,mat->blocksize2,1,1);
1062  }
1063  }
1064  //Reduction of partial results
1065  for(j=0;j<matb->blocksize2;j++) temp[j]=b[j];
1066  MPI_Reduce(temp,b,1,TBLOQUEV,sumavec,p2,mydata.comfilas);
1067  strsmu(tam1,1,c,b,mat->blocksize2,1);
1068  }
1070  //processes with other blocks of the vector//
1072  else if (mydata.myrow==0) { //mydata.mycolumn/=p2
1073  //accumulator block initialized to 0
1074  for(j=0;j<matb->blocksize2;j++) temp[j]=0.0;
1075  if (p1==0) { //blocks of matrix are also mine
1076  for(j=bb2;j<matb->mynblock2;j++) {
1077  //b(i)-=b(j)*a(i,j)
1078  tam2=(j<matb->mynblock2-1)?matb->blocksize2:matb->mylast2;
1079  sgemm(tam1,1,tam2,matrixBlockAt(mat,b1,j),matrixBlockAt(matb,0,j),temp,mat->blocksize2,1,1);
1080  }
1081  }
1082  else {
1083  //matrix blocks reception
1084  if(bb2<matb->mynblock2) {
1085  MPI_Recv(buffer2[bb2],1,tbloqfilrcv,p1,i,mydata.comcolumnas,&estado);
1086  }
1087 
1088  for(j=bb2;j<matb->mynblock2;j++) {
1089  //b(i)-=b(j)*a(i,j)
1090  tam2=(j<matb->mynblock2-1)?matb->blocksize2:matb->mylast2;
1091  sgemm(tam1,1,tam2,buffer2[j],matrixBlockAt(matb,0,j),temp,mat->blocksize2,1,1);
1092  }
1093  }
1094  //sending of partial result
1095  MPI_Reduce(temp,NULL,1,TBLOQUEV,sumavec,p2,mydata.comfilas);
1096  }
1098  //processs with blocks in the i-th block row of the matrix//
1100  else if (mydata.myrow==p1) {
1101  //p2!=0
1102  //block sending
1103  if(bb2<mat->mynblock2) {
1104  MPI_Send(matrixBlockAt(mat,b1,bb2),1,tbloqfil,0,i,mydata.comcolumnas);
1105  }
1106  }
1107  if(bb2<mat->mynblock2) {
1108  if (b1<mat->mynblock1) MPI_Type_free(&tbloqfil);
1109  MPI_Type_free(&tbloqfilrcv);
1110  }
1111  p1=(p1==0)?(mydata.numproccol-1):(p1-1);
1112  p2=(p2==0)?(mydata.numprocrow-1):(p2-1);
1113  if (p1==mydata.numproccol-1) b1--;
1114  if (p2==mydata.numprocrow-1) b2--;
1115  }
1116  free(temp);
1117 }
1118 
1119 // Main
1120 int main(int argc, char** argv) {
1121 
1122  int n,bs1,nblocks,mblocks;
1123  MATRIX a,b;
1124 
1125  initialize(&argc,&argv);
1126 
1127  // @arturo 2015: Check parameters
1128  if ( argc < 3 ) {
1129  fprintf(stderr, "Usage: %s <matrixSize> <blockSize>\n", argv[0]);
1130  MPI_Finalize();
1131  exit( EXIT_FAILURE );
1132  }
1133 
1134  n=atoi(argv[1]);
1135  bs1=atoi(argv[2]);
1136 
1137  initializeMat(&a,n,n,bs1,bs1,&mblocks,&nblocks);
1138  initializeVec(&b,n,bs1,NULL);
1139  randomMat(&a,0xCAFE,-50.0,50.0);
1140  randomMat(&b,0xBECA,-50.0,50.0);
1141 
1142 #ifdef WRITE_DATA
1143  print(&a,"A.dtxt");
1144  printVec(&b,"B.dtxt");
1145 #endif
1146 
1147  initializeClock();
1148 
1149  factorizeMat(&a,mblocks,nblocks);
1150  solveMat(&a,&b,min(nblocks,mblocks));
1151 
1152  stopClock();
1153 
1154 // @arturo 2015: All output should be before or after the measured
1155 #ifdef WRITE_DATA
1156  print(&a,"LU.dtxt");
1157 #endif
1158 #ifdef WRITE_DATA
1159  printVec(&b,"X.dtxt");
1160 #endif
1161  freeMat(&a);
1162  freeMat(&b);
1163  freeAll();
1164 
1165  printClock();
1166 
1167  MPI_Finalize();
1168  return 0;
1169 }
int blocksize
Definition: refMPIluBack.c:66
int dimmat1
Definition: refMPIluBack.c:104
int dim2
Definition: refMPIluBack.c:63
int * sizes
Definition: refMPIluBack.c:104
HitTile_aa_t p1
Definition: SWpar.c:206
int nblock1
Definition: refMPIluBack.c:67
MPI_Datatype TBLOQUEM
Definition: refMPIluBack.c:109
#define lda
int mycolumn
Definition: refMPIluBack.c:115
int numproccol
Definition: refMPIluBack.c:119
void randomMat(HitBlockTile selection, int n1, int n2, int seed, double min, double max)
Definition: luBack.c:189
#define max(a, b)
Definition: refMPIluBack.c:57
#define matrixBlockAt(m, i, j)
Definition: refMPIluBack.c:101
void vectorAdd(double *in, double *inout, int *len, MPI_Datatype *type)
Definition: refMPIluBack.c:127
int initializeMat(MATRIX *, int, int, int, int, int *, int *)
Definition: refMPIluBack.c:234
int mynumber
Definition: refMPIluBack.c:116
void srand48(long)
MPI_Comm comfilas
Definition: refMPIluBack.c:120
int stopClock()
Definition: refMPIluBack.c:141
int blocksize1
Definition: refMPIluBack.c:64
int numbroc(int iproc, int nproc, int nblock)
Definition: refMPIluBack.c:201
int dimmat2
Definition: refMPIluBack.c:104
double ** buffer2
Definition: refMPIluBack.c:103
int freeAll()
Definition: refMPIluBack.c:213
double ** mat
Definition: refMPIluBack.c:77
int numproc
Definition: refMPIluBack.c:117
void print(HitBlockTile selection, const char *fileName)
Definition: luBack.c:241
int initializeVec(MATRIX *, int, int, int *)
Definition: refMPIluBack.c:298
int dimbloq1
Definition: refMPIluBack.c:104
MPI_Datatype * types
Definition: refMPIluBack.c:109
int mylast2
Definition: refMPIluBack.c:76
#define lastsize(iproc, nproc, nblock, lastsize, blocksize)
Definition: refMPIluBack.c:97
#define BC_OK
Definition: refMPIluBack.c:54
int last2
Definition: refMPIluBack.c:70
void factorizeMat(HitLayout, HitBlockTile)
Definition: luBack.c:396
int mynblock2
Definition: refMPIluBack.c:74
PROC mydata
Definition: refMPIluBack.c:124
struct proc PROC
int size[2]
Definition: SWcommon.c:57
int dim1
Definition: refMPIluBack.c:62
int dimbloq2
Definition: refMPIluBack.c:104
#define ismine(coordb, coordp, numproc)
Definition: refMPIluBack.c:98
int last1
Definition: refMPIluBack.c:69
int blocksize2
Definition: refMPIluBack.c:65
int nblock2
Definition: refMPIluBack.c:68
MPI_Op sumavec
Definition: refMPIluBack.c:110
void solveMat(HitLayout, HitBlockTile, HitBlockTile)
Definition: luBack.c:644
void sgemm(int m, int n, int k, double *a, double *b, double *c, int lda, int ldb, int ldc)
Definition: refMPIluBack.c:585
double mytime
Definition: refMPIluBack.c:103
MPI_Status estado
Definition: refMPIluBack.c:107
int main(int argc, char *argv[])
Definition: cannonAsync.c:62
MPI_Aint * addresses
Definition: refMPIluBack.c:105
#define m(a, b, c)
int printClock()
Definition: refMPIluBack.c:146
HitTile_aa_t p2
Definition: SWpar.c:207
int initialize(int *, char ***)
Definition: refMPIluBack.c:159
MPI_Datatype TBLOQUEV
Definition: refMPIluBack.c:109
int mynblock1
Definition: refMPIluBack.c:73
int mylast1
Definition: refMPIluBack.c:75
int initializeClock()
Definition: refMPIluBack.c:136
#define localMatrixBlockAt(m, i, j)
Definition: refMPIluBack.c:100
void strsmu(int m, int n, double *a, double *b, int lda, int ldb)
Definition: refMPIluBack.c:493
int mydim2
Definition: refMPIluBack.c:72
int myrow
Definition: refMPIluBack.c:114
MPI_Comm comcolumnas
Definition: refMPIluBack.c:121
#define min(a, b)
Definition: refMPIluBack.c:56
#define BC_ERROR
Definition: refMPIluBack.c:55
int mydim1
Definition: refMPIluBack.c:71
void strsml2(HitTile_double a, HitTile_double b)
Definition: luBack.c:321
double drand48()
struct mat MATRIX
void strsml(int m, int n, double *a, double *b, int lda, int ldb)
Definition: refMPIluBack.c:527
double ** buffer
Definition: refMPIluBack.c:103
int freeMat(MATRIX *)
Definition: refMPIluBack.c:226
int numprocrow
Definition: refMPIluBack.c:118
int printVec(MATRIX *, const char *)
Definition: refMPIluBack.c:436
MPI_Request recibo
Definition: refMPIluBack.c:108
int numroc(int iproc, int nproc, int nblock, int blocksize, int last)
Definition: refMPIluBack.c:186