56 #define min(a,b) (((a)<(b))?(a):(b))
57 #define max(a,b) (((a)>(b))?(a):(b))
97 #define lastsize(iproc,nproc,nblock,lastsize,blocksize) ((iproc==((nblock-1)%nproc))?lastsize:blocksize)
98 #define ismine(coordb,coordp,numproc) ((coordb%numproc)==coordp)
100 #define localMatrixBlockAt(m,i,j) m->mat[((i)/mydata.numproccol)*m->mynblock2+((j)/mydata.numprocrow)]
101 #define matrixBlockAt(m,i,j) m->mat[(i)*m->mynblock2+(j)]
127 void vectorAdd (
double *in,
double *inout,
int *len, MPI_Datatype* type) {
130 MPI_Type_size(*type,&size);
131 size/=(int)
sizeof(
double);
132 for(i=0;i<
size;i++) inout[i]+=in[i];
148 MPI_Reduce(&
mytime,&tpored,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);
153 printf(
"Clock_mainClock Max: %lf\n", tpored );
160 int low,high,product;
163 MPI_Comm_size(MPI_COMM_WORLD,&mydata.
numproc);
164 MPI_Comm_rank(MPI_COMM_WORLD,&mydata.
mynumber);
166 low = high = (int)sqrt((
double)mydata.
numproc);
169 while ( product != mydata.
numproc ) {
170 if (product < mydata.
numproc) {
176 if (low==1) high=product=mydata.
numproc;
177 else product -= high;
186 int numroc(
int iproc,
int nproc,
int nblock,
int blocksize,
int last) {
189 res=((nblock-1)/nproc)*blocksize;
190 bloqextra=(nblock-1)%nproc;
194 else if(iproc==bloqextra){
195 if (last==0)
return res+blocksize;
201 int numbroc(
int iproc,
int nproc,
int nblock) {
205 bloqextra=nblock%nproc;
255 if ((mat->
last1 = (n1 % tb1))==0) mat->
last1=tb1;
256 if ((mat->
last2 = (n2 % tb2))==0) mat->
last2=tb2;
257 if (nb1!=NULL) *nb1=mat->
nblock1;
258 if (nb2!=NULL) *nb2=mat->
nblock2;
287 MPI_Type_contiguous(tb1*tb2,MPI_DOUBLE,&
TBLOQUEM);
309 if ((mat->
last2 = (n % tb))==0) mat->
last2=tb;
310 if (nb!=NULL) *nb=mat->
nblock1;
328 MPI_Type_contiguous(tb,MPI_DOUBLE,&
TBLOQUEV);
338 int i,j,k,l,tam1,tam2,mio;
346 for(k=0;k<tam1;k++) {
354 for (l=0;l<tam2;l++)
drand48();
364 int i,j,k,l,mio,tam1,tam2,offset1;
366 double * buf,*punteroM;
369 MPI_Barrier( MPI_COMM_WORLD );
370 FILE *
f = fopen( matrixName,
"w+" );
372 fprintf(stderr,
"Error: Opening matrix file %s\n", matrixName );
374 exit( EXIT_FAILURE );
376 MPI_Barrier( MPI_COMM_WORLD );
381 buf=calloc((
size_t)mat->
blocksize2,
sizeof(
double));
387 for(k=0;k<tam1;k++) {
396 fprintf(f,
"%016.6lf\n",punteroM[l+k*mat->
blocksize2]);
403 fprintf(f,
"%016.6lf\n",buf[l]);
420 for(k=0;k<tam1;k++) {
425 MPI_Send(&punteroM[k*mat->
blocksize2],tam2,MPI_DOUBLE,0,0,MPI_COMM_WORLD);
431 MPI_Barrier(MPI_COMM_WORLD);
440 double * buf,*punteroM;
443 MPI_Barrier( MPI_COMM_WORLD );
444 FILE *
f = fopen( matrixName,
"w+" );
446 fprintf(stderr,
"Error: Opening vector file %s\n", matrixName );
448 exit( EXIT_FAILURE );
450 MPI_Barrier( MPI_COMM_WORLD );
455 buf=calloc((
size_t)mat->
blocksize2,
sizeof(
double));
465 fprintf(f,
"%016.6lf\n",punteroM[l]);
472 fprintf(f,
"%016.6lf\n",buf[l]);
480 else if (mydata.
myrow==0) {
485 MPI_Send(punteroM,tam2,MPI_DOUBLE,0,0,MPI_COMM_WORLD);
489 MPI_Barrier(MPI_COMM_WORLD);
493 void strsmu(
int m,
int n,
double * a,
double * b,
int lda,
int ldb) {
502 if (m==0 || n==0)
return;
504 for(k=m-1;k>=0;k--) {
506 if((temp=-a[k*lda+i])!=0.0) {
509 printf(
"RUT: STRSMU, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
510 printf(
"%lf -= %lf*%lf = %lf\n",b[k*ldb+j],-temp,b[i*ldb+j],b[k*ldb+j]+temp*b[i*ldb+j]);
512 b[k*ldb+j]+=temp*b[i*ldb+j];
519 printf(
"RUT: STRSMU, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
520 printf(
"%lf /= %lf = %lf\n",b[k*ldb+j],temp,b[k*ldb+j]/temp);
527 void strsml(
int m,
int n,
double * a,
double * b,
int lda,
int ldb) {
536 if (m==0 || n==0)
return;
540 if((temp=-a[k*lda+i])!=0.0) {
543 printf(
"RUT: STRSML, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
544 printf(
"%lf -= %lf*%lf = %lf\n",b[k*ldb+j],-temp,b[i*ldb+j],b[k*ldb+j]+temp*b[i*ldb+j]);
546 b[k*ldb+j]+=temp*b[i*ldb+j];
553 void strsml2(
int m,
int n,
double * a,
double * b,
int lda,
int ldb) {
562 if (m==0 || n==0)
return;
566 if (b[j*ldb+k]!=0.0) {
568 printf(
"RUT: STRSML2, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
569 printf(
"%lf /= %lf = %lf\n",b[j*ldb+k],a[k*lda+k],b[j*ldb+k]/a[k*lda+k]);
571 b[j*ldb+k]/=a[k*lda+k];
575 printf(
"RUT: STRSML2, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
576 printf(
"%lf -= %lf*%lf = %lf\n",b[j*ldb+i],-temp,a[k*lda+i],b[j*ldb+i]+temp*a[k*lda+i]);
578 b[j*ldb+i]+=temp*a[k*lda+i];
585 void sgemm (
int m,
int n,
int k,
double * a,
double * b,
double * c,
int lda,
int ldb,
int ldc) {
591 if (n==0 || m==0 || k==0)
return;
595 if((temp=-a[i*lda+l])!=0.0) {
598 printf(
"RUT: SGEMM, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
599 printf(
"%lf -= %lf*%lf = %lf\n",c[i*ldc+j],-temp,b[l*ldb+j],c[i*ldc+j]+temp*b[l*ldb+j]);
601 c[i*ldc+j]+=temp*b[l*ldb+j];
613 int i,j,k,ii,jj,
min,tam1,tam2,tam3,
p1,
p2,b1,b2,bb2,bb1,bini;
614 double *b, *c, temp, *buf;
616 MPI_Datatype tbloqfil, tbloqfilrcv, tbloqcol, tbloqcolrcv, tcol;
623 buf=calloc((
size_t)mat->
blocksize1,
sizeof(
double));
625 #define lda mat->blocksize2
632 if(bb2<mat->mynblock2) {
633 if (b1<mat->mynblock1) {
641 MPI_Type_commit(&tbloqfil);
650 MPI_Type_commit(&tbloqfilrcv);
652 if(bb1<mat->mynblock1) {
653 if (b2<mat->mynblock2) {
662 MPI_Type_commit(&tbloqcol);
671 MPI_Type_commit(&tbloqcolrcv);
680 for(k=0;k<tam1-1;k++){
681 if (b[k*
lda+k]==0.0) {
684 for(ii=k+1;ii<tam1;ii++) {
686 if(b[ii*
lda+k]!=0.0) {
688 printf(
"RUT: FACT, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
689 printf(
"%lf /= %lf = %lf\n",b[ii*
lda+k],b[k*
lda+k],b[ii*
lda+k]/b[k*
lda+k]);
694 MPI_Type_vector(tam1-k-1,1,
lda,MPI_DOUBLE,&tcol);
695 MPI_Type_commit(&tcol);
696 MPI_Bcast(&b[(k+1)*
lda+k],1,tcol,p2,mydata.
comfilas);
698 for(ii=k+1;ii<tam1;ii++) {
699 if((temp=-b[ii*
lda+k])!=0.0) {
701 for(jj=k+1;jj<tam3;jj++) {
703 printf(
"RUT: FACT, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
704 printf(
"%lf -= %lf*%lf = %lf\n",b[ii*
lda+jj],-temp,b[k*
lda+jj],b[ii*
lda+jj]+temp*b[k*
lda+jj]);
706 b[ii*
lda+jj]+=temp*b[k*
lda+jj];
711 for(jj=0;jj<tam3;jj++) {
713 printf(
"RUT: FACT, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
714 printf(
"%lf -= %lf*%lf = %lf\n",c[ii*
lda+jj],-temp,c[k*
lda+jj],c[ii*
lda+jj]+temp*c[k*
lda+jj]);
716 c[ii*
lda+jj]+=temp*c[k*
lda+jj];
721 MPI_Type_free(&tcol);
723 if (b[(tam1-1)*
lda+tam1-1]==0.0) {
724 fprintf(stderr,
"Hay un 0.0 en la posicion %d,%d\n",tam1-1+i*mat->
blocksize1,tam1-1+i*mat->
blocksize1);
760 sgemm(tam2,tam3,tam1,c,
matrixBlockAt(mat,b1,k),
matrixBlockAt(mat,j,k),mat->
blocksize2,mat->
blocksize2,mat->
blocksize2);
786 if(bb1<mat->mynblock1){
799 sgemm(tam2,tam3,tam1,c,
buffer2[k],
matrixBlockAt(mat,j,k),mat->
blocksize2,mat->
blocksize2,mat->
blocksize2);
806 else if (mydata.
myrow==p1) {
809 for(k=0;k<tam1-1;k++) {
811 MPI_Type_contiguous(tam1-k-1,MPI_DOUBLE,&tcol);
812 MPI_Type_commit(&tcol);
813 MPI_Bcast(buf,1,tcol,p2,mydata.
comfilas);
815 for(ii=k+1;ii<tam1;ii++) {
817 if((temp=-buf[ii-k-1])!=0.0) {
821 for(jj=0;jj<tam3;jj++) {
823 printf(
"RUT: FACT, PROC %d;%d ",mydata.
myrow,mydata.
mycolumn);
824 printf(
"%lf -= %lf*%lf = %lf\n",c[ii*
lda+jj],-temp,c[k*
lda+jj],c[ii*
lda+jj]+temp*c[k*
lda+jj]);
826 c[ii*
lda+jj]+=temp*c[k*
lda+jj];
831 MPI_Type_free(&tcol);
835 if(bb2<mat->mynblock2) {
849 for(k=bb2;k<mat->mynblock2;k++) {
852 sgemm(tam2,tam3,tam1,c,
matrixBlockAt(mat,b1,k),
matrixBlockAt(mat,j,k),mat->
blocksize2,mat->
blocksize2,mat->
blocksize2);
861 if(bb2<mat->mynblock2) {
864 if(bb1<mat->mynblock1) {
873 for(k=bb2;k<mat->mynblock2;k++) {
876 sgemm(tam2,tam3,tam1,c,
buffer2[k],
matrixBlockAt(mat,j,k),mat->
blocksize2,mat->
blocksize2,mat->
blocksize2);
880 if(bb2<mat->mynblock2) {
881 if (b1<mat->mynblock1) MPI_Type_free(&tbloqfil);
882 MPI_Type_free(&tbloqfilrcv);
884 if(bb1<mat->mynblock1) {
885 if (b2<mat->mynblock2) MPI_Type_free(&tbloqcol);
886 MPI_Type_free(&tbloqcolrcv);
899 int p1,
p2,b1,b2,bb2,i,j,tam1,tam2;
901 MPI_Datatype tbloqfil, tbloqfilrcv;
908 temp=calloc((
size_t)matb->
blocksize2,
sizeof(
double));
916 if (b1<mat->mynblock1) {
917 for(j=0;j<=bb2;j++) {
920 for(j=bb2;j>=0;j--) {
924 MPI_Type_commit(&tbloqfil);
926 for(j=0;j<=bb2;j++) {
929 for(j=bb2;j>=0;j--) {
933 MPI_Type_commit(&tbloqfilrcv);
943 for(j=b2-1;j>=0;j--) {
945 sgemm(tam1,1,matb->
blocksize2,
matrixBlockAt(mat,b1,j),
matrixBlockAt(matb,0,j),b,mat->
blocksize2,1,1);
954 for(j=b2-1;j>=0;j--) {
957 sgemm(tam1,1,matb->
blocksize2,
buffer2[j],
matrixBlockAt(matb,0,j),b,mat->
blocksize2,1,1);
968 else if (mydata.
myrow==0) {
972 for(j=bb2;j>=0;j--) {
974 sgemm(tam1,1,matb->
blocksize2,
matrixBlockAt(mat,b1,j),
matrixBlockAt(matb,0,j),temp,mat->
blocksize2,1,1);
982 for(j=bb2;j>=0;j--) {
984 sgemm(tam1,1,matb->
blocksize2,
buffer2[j],
matrixBlockAt(matb,0,j),temp,mat->
blocksize2,1,1);
993 else if (mydata.
myrow==p1) {
1001 if (b1<mat->mynblock1) MPI_Type_free(&tbloqfil);
1002 MPI_Type_free(&tbloqfilrcv);
1014 for(i=n-1;i>=0;i--) {
1018 if(bb2<mat->mynblock2) {
1019 if (b1<mat->mynblock1) {
1027 MPI_Type_commit(&tbloqfil);
1036 MPI_Type_commit(&tbloqfilrcv);
1048 sgemm(tam1,1,tam2,
matrixBlockAt(mat,b1,j),
matrixBlockAt(matb,0,j),b,mat->
blocksize2,1,1);
1054 if(b2<matb->mynblock2) {
1061 sgemm(tam1,1,tam2,
buffer2[j],
matrixBlockAt(matb,0,j),b,mat->
blocksize2,1,1);
1065 for(j=0;j<matb->
blocksize2;j++) temp[j]=b[j];
1072 else if (mydata.
myrow==0) {
1079 sgemm(tam1,1,tam2,
matrixBlockAt(mat,b1,j),
matrixBlockAt(matb,0,j),temp,mat->
blocksize2,1,1);
1084 if(bb2<matb->mynblock2) {
1091 sgemm(tam1,1,tam2,
buffer2[j],
matrixBlockAt(matb,0,j),temp,mat->
blocksize2,1,1);
1100 else if (mydata.
myrow==p1) {
1103 if(bb2<mat->mynblock2) {
1107 if(bb2<mat->mynblock2) {
1108 if (b1<mat->mynblock1) MPI_Type_free(&tbloqfil);
1109 MPI_Type_free(&tbloqfilrcv);
1122 int n,bs1,nblocks,mblocks;
1129 fprintf(stderr,
"Usage: %s <matrixSize> <blockSize>\n", argv[0]);
1131 exit( EXIT_FAILURE );
1156 print(&a,
"LU.dtxt");
void randomMat(HitBlockTile selection, int n1, int n2, int seed, double min, double max)
#define matrixBlockAt(m, i, j)
void vectorAdd(double *in, double *inout, int *len, MPI_Datatype *type)
int initializeMat(MATRIX *, int, int, int, int, int *, int *)
int numbroc(int iproc, int nproc, int nblock)
void print(HitBlockTile selection, const char *fileName)
int initializeVec(MATRIX *, int, int, int *)
#define lastsize(iproc, nproc, nblock, lastsize, blocksize)
void factorizeMat(HitLayout, HitBlockTile)
#define ismine(coordb, coordp, numproc)
void solveMat(HitLayout, HitBlockTile, HitBlockTile)
void sgemm(int m, int n, int k, double *a, double *b, double *c, int lda, int ldb, int ldc)
int main(int argc, char *argv[])
int initialize(int *, char ***)
#define localMatrixBlockAt(m, i, j)
void strsmu(int m, int n, double *a, double *b, int lda, int ldb)
void strsml2(HitTile_double a, HitTile_double b)
void strsml(int m, int n, double *a, double *b, int lda, int ldb)
int printVec(MATRIX *, const char *)
int numroc(int iproc, int nproc, int nblock, int blocksize, int last)