File: | mat/impls/aij/mpi/mpimatmatmult.c |
Warning: | line 641, column 13 Array access (from variable 'pi_oth') results in a null pointer dereference |
[?] Use j/k keys for keyboard navigation
1 | ||||
2 | /* | |||
3 | Defines matrix-matrix product routines for pairs of MPIAIJ matrices | |||
4 | C = A * B | |||
5 | */ | |||
6 | #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ | |||
7 | #include <../src/mat/utils/freespace.h> | |||
8 | #include <../src/mat/impls/aij/mpi/mpiaij.h> | |||
9 | #include <petscbt.h> | |||
10 | #include <../src/mat/impls/dense/mpi/mpidense.h> | |||
11 | #include <petsc/private/vecimpl.h> | |||
12 | #include <petsc/private/vecscatterimpl.h> | |||
13 | ||||
14 | #if defined(PETSC_HAVE_HYPRE) | |||
15 | PETSC_INTERNextern __attribute__((visibility ("hidden"))) PetscErrorCode MatMatMultSymbolic_AIJ_AIJ_wHYPRE(Mat,Mat,PetscReal,Mat*); | |||
16 | #endif | |||
17 | ||||
18 | PETSC_INTERNextern __attribute__((visibility ("hidden"))) PetscErrorCode MatMatMult_MPIAIJ_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill, Mat *C) | |||
19 | { | |||
20 | PetscErrorCode ierr; | |||
21 | #if defined(PETSC_HAVE_HYPRE) | |||
22 | const char *algTypes[4] = {"scalable","nonscalable","seqmpi","hypre"}; | |||
23 | PetscInt nalg = 4; | |||
24 | #else | |||
25 | const char *algTypes[3] = {"scalable","nonscalable","seqmpi"}; | |||
26 | PetscInt nalg = 3; | |||
27 | #endif | |||
28 | PetscInt alg = 1; /* set nonscalable algorithm as default */ | |||
29 | MPI_Comm comm; | |||
30 | PetscBool flg; | |||
31 | ||||
32 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 32; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
33 | if (scall == MAT_INITIAL_MATRIX) { | |||
34 | ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),34,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
35 | if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend)return PetscError(comm,35,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,60,PETSC_ERROR_INITIAL,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)" ,A->cmap->rstart,A->cmap->rend,B->rmap->rstart ,B->rmap->rend); | |||
36 | ||||
37 | ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatMatMult","Mat")0; do { PetscOptionItems PetscOptionsObjectBase; PetscOptionItems *PetscOptionsObject = &PetscOptionsObjectBase; PetscMemzero (PetscOptionsObject,sizeof(PetscOptionItems)); for (PetscOptionsObject ->count=(PetscOptionsPublish?-1:1); PetscOptionsObject-> count<2; PetscOptionsObject->count++) { PetscErrorCode _5_ierr = PetscOptionsBegin_Private(PetscOptionsObject,PetscObjectComm ((PetscObject)A),((PetscObject)A)->prefix,"MatMatMult","Mat" );do {if (__builtin_expect(!!(_5_ierr),0)) return PetscError( ((MPI_Comm)0x44000001),37,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),37,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
38 | ierr = PetscOptionsEList("-matmatmult_via","Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[1],&alg,&flg)PetscOptionsEList_Private(PetscOptionsObject,"-matmatmult_via" ,"Algorithmic approach","MatMatMult",algTypes,nalg,algTypes[1 ],&alg,&flg);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),38,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
39 | ierr = PetscOptionsEnd()_5_ierr = PetscOptionsEnd_Private(PetscOptionsObject);do {if ( __builtin_expect(!!(_5_ierr),0)) return PetscError(((MPI_Comm )0x44000001),39,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);}} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),39,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
40 | ||||
41 | if (!flg && B->cmap->N > 100000) { /* may switch to scalable algorithm as default */ | |||
42 | MatInfo Ainfo,Binfo; | |||
43 | PetscInt nz_local; | |||
44 | PetscBool alg_scalable_loc=PETSC_FALSE,alg_scalable; | |||
45 | ||||
46 | ierr = MatGetInfo(A,MAT_LOCAL,&Ainfo);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),46,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
47 | ierr = MatGetInfo(B,MAT_LOCAL,&Binfo);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),47,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
48 | nz_local = (PetscInt)(Ainfo.nz_allocated + Binfo.nz_allocated); | |||
49 | ||||
50 | if (B->cmap->N > fill*nz_local) alg_scalable_loc = PETSC_TRUE; | |||
51 | ierr = MPIU_Allreduce(&alg_scalable_loc,&alg_scalable,1,MPIU_BOOL,MPI_LOR,comm)(PetscAllreduceBarrierCheck(comm,1,51,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || MPI_Allreduce((&alg_scalable_loc),(&alg_scalable),(1 ),(MPIU_BOOL),((MPI_Op)(0x58000007)),(comm))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),51,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
52 | ||||
53 | if (alg_scalable) { | |||
54 | alg = 0; /* scalable algorithm would 50% slower than nonscalable algorithm */ | |||
55 | ierr = PetscInfo2(B,"Use scalable algorithm, BN %D, fill*nz_allocated %g\n",B->cmap->N,fill*nz_local)PetscInfo_Private(__func__,B,"Use scalable algorithm, BN %D, fill*nz_allocated %g\n" ,B->cmap->N,fill*nz_local);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),55,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
56 | } | |||
57 | } | |||
58 | ||||
59 | ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLB)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),59,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
60 | switch (alg) { | |||
61 | case 1: | |||
62 | ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ_nonscalable(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),62,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
63 | break; | |||
64 | case 2: | |||
65 | ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ_seqMPI(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),65,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
66 | break; | |||
67 | #if defined(PETSC_HAVE_HYPRE) | |||
68 | case 3: | |||
69 | ierr = MatMatMultSymbolic_AIJ_AIJ_wHYPRE(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),69,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
70 | break; | |||
71 | #endif | |||
72 | default: | |||
73 | ierr = MatMatMultSymbolic_MPIAIJ_MPIAIJ(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),73,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
74 | break; | |||
75 | } | |||
76 | ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLE)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),76,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
77 | ||||
78 | if (alg == 0 || alg == 1) { | |||
79 | Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*C)->data; | |||
80 | Mat_APMPI *ap = c->ap; | |||
81 | ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)(*C)),((PetscObject)(*C))->prefix,"MatFreeIntermediateDataStructures","Mat")0; do { PetscOptionItems PetscOptionsObjectBase; PetscOptionItems *PetscOptionsObject = &PetscOptionsObjectBase; PetscMemzero (PetscOptionsObject,sizeof(PetscOptionItems)); for (PetscOptionsObject ->count=(PetscOptionsPublish?-1:1); PetscOptionsObject-> count<2; PetscOptionsObject->count++) { PetscErrorCode _5_ierr = PetscOptionsBegin_Private(PetscOptionsObject,PetscObjectComm ((PetscObject)(*C)),((PetscObject)(*C))->prefix,"MatFreeIntermediateDataStructures" ,"Mat");do {if (__builtin_expect(!!(_5_ierr),0)) return PetscError (((MPI_Comm)0x44000001),81,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),81,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
82 | ap->freestruct = PETSC_FALSE; | |||
83 | ierr = PetscOptionsBool("-mat_freeintermediatedatastructures","Free intermediate data structures", "MatFreeIntermediateDataStructures",ap->freestruct,&ap->freestruct, NULL)PetscOptionsBool_Private(PetscOptionsObject,"-mat_freeintermediatedatastructures" ,"Free intermediate data structures","MatFreeIntermediateDataStructures" ,ap->freestruct,&ap->freestruct,((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),83,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
84 | ierr = PetscOptionsEnd()_5_ierr = PetscOptionsEnd_Private(PetscOptionsObject);do {if ( __builtin_expect(!!(_5_ierr),0)) return PetscError(((MPI_Comm )0x44000001),84,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);}} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),84,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
85 | } | |||
86 | } | |||
87 | ||||
88 | ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLB)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),88,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
89 | ierr = (*(*C)->ops->matmultnumeric)(A,B,*C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),89,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
90 | ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLE)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),90,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
91 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
92 | } | |||
93 | ||||
94 | PetscErrorCode MatDestroy_MPIAIJ_MatMatMult(Mat A) | |||
95 | { | |||
96 | PetscErrorCode ierr; | |||
97 | Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; | |||
98 | Mat_APMPI *ptap = a->ap; | |||
99 | ||||
100 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 100; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
101 | ierr = PetscFree2(ptap->startsj_s,ptap->startsj_r)PetscFreeA(2,101,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(ptap->startsj_s),&(ptap->startsj_r));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),101,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
102 | ierr = PetscFree(ptap->bufa)((*PetscTrFree)((void*)(ptap->bufa),102,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((ptap->bufa) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),102,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
103 | ierr = MatDestroy(&ptap->P_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),103,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
104 | ierr = MatDestroy(&ptap->P_oth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),104,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
105 | ierr = MatDestroy(&ptap->Pt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),105,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
106 | ierr = PetscFree(ptap->api)((*PetscTrFree)((void*)(ptap->api),106,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((ptap->api) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),106,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
107 | ierr = PetscFree(ptap->apj)((*PetscTrFree)((void*)(ptap->apj),107,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((ptap->apj) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),107,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
108 | ierr = PetscFree(ptap->apa)((*PetscTrFree)((void*)(ptap->apa),108,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((ptap->apa) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),108,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
109 | ierr = ptap->destroy(A);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),109,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
110 | ierr = PetscFree(ptap)((*PetscTrFree)((void*)(ptap),110,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((ptap) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),110,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
111 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
112 | } | |||
113 | ||||
114 | PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ_nonscalable(Mat A,Mat P,Mat C) | |||
115 | { | |||
116 | PetscErrorCode ierr; | |||
117 | Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*c=(Mat_MPIAIJ*)C->data; | |||
118 | Mat_SeqAIJ *ad =(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; | |||
119 | Mat_SeqAIJ *cd =(Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; | |||
120 | PetscScalar *cda=cd->a,*coa=co->a; | |||
121 | Mat_SeqAIJ *p_loc,*p_oth; | |||
122 | PetscScalar *apa,*ca; | |||
123 | PetscInt cm =C->rmap->n; | |||
124 | Mat_APMPI *ptap=c->ap; | |||
125 | PetscInt *api,*apj,*apJ,i,k; | |||
126 | PetscInt cstart=C->cmap->rstart; | |||
127 | PetscInt cdnz,conz,k0,k1; | |||
128 | MPI_Comm comm; | |||
129 | PetscMPIInt size; | |||
130 | ||||
131 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 131; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
132 | ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),132,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
133 | ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),133,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
134 | ||||
135 | if (!ptap->P_oth && size>1) SETERRQ(comm,PETSC_ERR_ARG_WRONGSTATE,"AP cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'")return PetscError(comm,135,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,73,PETSC_ERROR_INITIAL,"AP cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'" ); | |||
136 | ||||
137 | /* 1) get P_oth = ptap->P_oth and P_loc = ptap->P_loc */ | |||
138 | /*-----------------------------------------------------*/ | |||
139 | /* update numerical values of P_oth and P_loc */ | |||
140 | ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_REUSE_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),140,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
141 | ierr = MatMPIAIJGetLocalMat(P,MAT_REUSE_MATRIX,&ptap->P_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),141,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
142 | ||||
143 | /* 2) compute numeric C_loc = A_loc*P = Ad*P_loc + Ao*P_oth */ | |||
144 | /*----------------------------------------------------------*/ | |||
145 | /* get data from symbolic products */ | |||
146 | p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; | |||
147 | p_oth = NULL((void*)0); | |||
148 | if (size >1) { | |||
149 | p_oth = (Mat_SeqAIJ*)(ptap->P_oth)->data; | |||
150 | } | |||
151 | ||||
152 | /* get apa for storing dense row A[i,:]*P */ | |||
153 | apa = ptap->apa; | |||
154 | ||||
155 | api = ptap->api; | |||
156 | apj = ptap->apj; | |||
157 | for (i=0; i<cm; i++) { | |||
158 | /* compute apa = A[i,:]*P */ | |||
159 | AProw_nonscalable(i,ad,ao,p_loc,p_oth,apa){ PetscInt _anz,_pnz,_j,_k,*_ai,*_aj,_row,*_pi,*_pj; PetscScalar *_aa,_valtmp,*_pa; _ai = ad->i; _anz = _ai[i+1] - _ai[i]; _aj = ad->j + _ai[i]; _aa = ad->a + _ai[i]; for (_j=0; _j<_anz; _j++) { _row = _aj[_j]; _pi = p_loc->i; _pnz = _pi[_row+1] - _pi[_row]; _pj = p_loc->j + _pi[_row]; _pa = p_loc->a + _pi[_row]; _valtmp = _aa[_j]; for (_k=0; _k< _pnz; _k++) { apa[_pj[_k]] += _valtmp*_pa[_k]; } (void)PetscLogFlops (2.0*_pnz); } if (p_oth){ _ai = ao->i; _anz = _ai[i+1] - _ai [i]; _aj = ao->j + _ai[i]; _aa = ao->a + _ai[i]; for (_j =0; _j<_anz; _j++) { _row = _aj[_j]; _pi = p_oth->i; _pnz = _pi[_row+1] - _pi[_row]; _pj = p_oth->j + _pi[_row]; _pa = p_oth->a + _pi[_row]; _valtmp = _aa[_j]; for (_k=0; _k< _pnz; _k++) { apa[_pj[_k]] += _valtmp*_pa[_k]; } (void)PetscLogFlops (2.0*_pnz); } }}; | |||
160 | ||||
161 | /* set values in C */ | |||
162 | apJ = apj + api[i]; | |||
163 | cdnz = cd->i[i+1] - cd->i[i]; | |||
164 | conz = co->i[i+1] - co->i[i]; | |||
165 | ||||
166 | /* 1st off-diagoanl part of C */ | |||
167 | ca = coa + co->i[i]; | |||
168 | k = 0; | |||
169 | for (k0=0; k0<conz; k0++) { | |||
170 | if (apJ[k] >= cstart) break; | |||
171 | ca[k0] = apa[apJ[k]]; | |||
172 | apa[apJ[k++]] = 0.0; | |||
173 | } | |||
174 | ||||
175 | /* diagonal part of C */ | |||
176 | ca = cda + cd->i[i]; | |||
177 | for (k1=0; k1<cdnz; k1++) { | |||
178 | ca[k1] = apa[apJ[k]]; | |||
179 | apa[apJ[k++]] = 0.0; | |||
180 | } | |||
181 | ||||
182 | /* 2nd off-diagoanl part of C */ | |||
183 | ca = coa + co->i[i]; | |||
184 | for (; k0<conz; k0++) { | |||
185 | ca[k0] = apa[apJ[k]]; | |||
186 | apa[apJ[k++]] = 0.0; | |||
187 | } | |||
188 | } | |||
189 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),189,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
190 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),190,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
191 | ||||
192 | if (ptap->freestruct) { | |||
193 | ierr = MatFreeIntermediateDataStructures(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),193,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
194 | } | |||
195 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
196 | } | |||
197 | ||||
198 | PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ_nonscalable(Mat A,Mat P,PetscReal fill,Mat *C) | |||
199 | { | |||
200 | PetscErrorCode ierr; | |||
201 | MPI_Comm comm; | |||
202 | PetscMPIInt size; | |||
203 | Mat Cmpi; | |||
204 | Mat_APMPI *ptap; | |||
205 | PetscFreeSpaceList free_space=NULL((void*)0),current_space=NULL((void*)0); | |||
206 | Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*c; | |||
207 | Mat_SeqAIJ *ad =(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*p_loc,*p_oth; | |||
208 | PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*dnz,*onz; | |||
209 | PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,rstart=A->rmap->rstart; | |||
210 | PetscInt *lnk,i,pnz,row,*api,*apj,*Jptr,apnz,nspacedouble=0,j,nzi; | |||
211 | PetscInt am=A->rmap->n,pN=P->cmap->N,pn=P->cmap->n,pm=P->rmap->n; | |||
212 | PetscBT lnkbt; | |||
213 | PetscReal afill; | |||
214 | MatType mtype; | |||
215 | ||||
216 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 216; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
217 | ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),217,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
218 | ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),218,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
219 | ||||
220 | /* create struct Mat_APMPI and attached it to C later */ | |||
221 | ierr = PetscNew(&ptap)PetscMallocA(1,PETSC_TRUE,221,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(1)*sizeof(**((&ptap))),((&ptap)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),221,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
222 | ||||
223 | /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ | |||
224 | ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),224,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
225 | ||||
226 | /* get P_loc by taking all local rows of P */ | |||
227 | ierr = MatMPIAIJGetLocalMat(P,MAT_INITIAL_MATRIX,&ptap->P_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),227,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
228 | ||||
229 | p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; | |||
230 | pi_loc = p_loc->i; pj_loc = p_loc->j; | |||
231 | if (size > 1) { | |||
232 | p_oth = (Mat_SeqAIJ*)(ptap->P_oth)->data; | |||
233 | pi_oth = p_oth->i; pj_oth = p_oth->j; | |||
234 | } else { | |||
235 | p_oth = NULL((void*)0); | |||
236 | pi_oth = NULL((void*)0); pj_oth = NULL((void*)0); | |||
237 | } | |||
238 | ||||
239 | /* first, compute symbolic AP = A_loc*P = A_diag*P_loc + A_off*P_oth */ | |||
240 | /*-------------------------------------------------------------------*/ | |||
241 | ierr = PetscMalloc1(am+2,&api)PetscMallocA(1,PETSC_FALSE,241,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(am+2)*sizeof(**(&api)),(&api));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),241,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
242 | ptap->api = api; | |||
243 | api[0] = 0; | |||
244 | ||||
245 | /* create and initialize a linked list */ | |||
246 | ierr = PetscLLCondensedCreate(pN,pN,&lnk,&lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),246,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
247 | ||||
248 | /* Initial FreeSpace size is fill*(nnz(A)+nnz(P)) */ | |||
249 | ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(adi[am],PetscIntSumTruncate(aoi[am],pi_loc[pm]))),&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),249,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
250 | current_space = free_space; | |||
251 | ||||
252 | ierr = MatPreallocateInitialize(comm,am,pn,dnz,onz)0; do { PetscErrorCode _4_ierr; PetscInt __nrows = (am),__ncols = (pn),__rstart,__start,__end; _4_ierr = PetscMallocA(2,PETSC_TRUE ,252,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)((size_t)__nrows)*sizeof(**(&dnz)),(&dnz),(size_t )((size_t)__nrows)*sizeof(**(&onz)),(&onz));do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),252 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = 0; __end = __start; _4_ierr = MPI_Scan(&__ncols,&__end,1,((MPI_Datatype )0x4c000405),(MPI_Op)(0x58000003),comm);do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),252 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = __end - __ncols; _4_ierr = MPI_Scan(&__nrows,&__rstart,1,( (MPI_Datatype)0x4c000405),(MPI_Op)(0x58000003),comm);do {if ( __builtin_expect(!!(_4_ierr),0)) return PetscError(((MPI_Comm )0x44000001),252,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __rstart = __rstart - __nrows; do { } while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),252,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
253 | for (i=0; i<am; i++) { | |||
254 | /* diagonal portion of A */ | |||
255 | nzi = adi[i+1] - adi[i]; | |||
256 | for (j=0; j<nzi; j++) { | |||
257 | row = *adj++; | |||
258 | pnz = pi_loc[row+1] - pi_loc[row]; | |||
259 | Jptr = pj_loc + pi_loc[row]; | |||
260 | /* add non-zero cols of P into the sorted linked list lnk */ | |||
261 | ierr = PetscLLCondensedAddSorted(pnz,Jptr,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),261,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
262 | } | |||
263 | /* off-diagonal portion of A */ | |||
264 | nzi = aoi[i+1] - aoi[i]; | |||
265 | for (j=0; j<nzi; j++) { | |||
266 | row = *aoj++; | |||
267 | pnz = pi_oth[row+1] - pi_oth[row]; | |||
268 | Jptr = pj_oth + pi_oth[row]; | |||
269 | ierr = PetscLLCondensedAddSorted(pnz,Jptr,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),269,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
270 | } | |||
271 | ||||
272 | apnz = lnk[0]; | |||
273 | api[i+1] = api[i] + apnz; | |||
274 | ||||
275 | /* if free space is not available, double the total space in the list */ | |||
276 | if (current_space->local_remaining<apnz) { | |||
277 | ierr = PetscFreeSpaceGet(PetscIntSumTruncate(apnz,current_space->total_array_size),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),277,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
278 | nspacedouble++; | |||
279 | } | |||
280 | ||||
281 | /* Copy data into free space, then initialize lnk */ | |||
282 | ierr = PetscLLCondensedClean(pN,apnz,current_space->array,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),282,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
283 | ierr = MatPreallocateSet(i+rstart,apnz,current_space->array,dnz,onz)0;do { PetscInt __i; if (i+rstart < __rstart) return PetscError (((MPI_Comm)0x44000001),283,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D less than first local row %D" ,i+rstart,__rstart); if (i+rstart >= __rstart+__nrows) return PetscError(((MPI_Comm)0x44000001),283,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D greater than last local row %D" ,i+rstart,__rstart+__nrows-1); for (__i=0; __i<apnz; __i++ ) { if ((current_space->array)[__i] < __start || (current_space ->array)[__i] >= __end) onz[i+rstart - __rstart]++; else if (dnz[i+rstart - __rstart] < __ncols) dnz[i+rstart - __rstart ]++; }} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),283,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
284 | ||||
285 | current_space->array += apnz; | |||
286 | current_space->local_used += apnz; | |||
287 | current_space->local_remaining -= apnz; | |||
288 | } | |||
289 | ||||
290 | /* Allocate space for apj, initialize apj, and */ | |||
291 | /* destroy list of free space and other temporary array(s) */ | |||
292 | ierr = PetscMalloc1(api[am]+1,&ptap->apj)PetscMallocA(1,PETSC_FALSE,292,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(api[am]+1)*sizeof(**(&ptap->apj)),(&ptap ->apj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),292,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
293 | apj = ptap->apj; | |||
294 | ierr = PetscFreeSpaceContiguous(&free_space,ptap->apj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),294,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
295 | ierr = PetscLLDestroy(lnk,lnkbt)(((*PetscTrFree)((void*)(lnk),295,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((lnk) = 0,0)) || PetscBTDestroy(&(lnkbt)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),295,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
296 | ||||
297 | /* malloc apa to store dense row A[i,:]*P */ | |||
298 | ierr = PetscCalloc1(pN,&ptap->apa)PetscMallocA(1,PETSC_TRUE,298,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(pN)*sizeof(**(&ptap->apa)),(&ptap->apa ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),298,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
299 | ||||
300 | /* create and assemble symbolic parallel matrix Cmpi */ | |||
301 | /*----------------------------------------------------*/ | |||
302 | ierr = MatCreate(comm,&Cmpi);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),302,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
303 | ierr = MatSetSizes(Cmpi,am,pn,PETSC_DETERMINE-1,PETSC_DETERMINE-1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),303,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
304 | ierr = MatSetBlockSizesFromMats(Cmpi,A,P);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),304,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
305 | ||||
306 | ierr = MatGetType(A,&mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),306,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
307 | ierr = MatSetType(Cmpi,mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),307,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
308 | ierr = MatMPIAIJSetPreallocation(Cmpi,0,dnz,0,onz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),308,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
309 | ||||
310 | ierr = MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Cmpi, apj, api);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),310,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
311 | ierr = MatAssemblyBegin(Cmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),311,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
312 | ierr = MatAssemblyEnd(Cmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),312,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
313 | ierr = MatPreallocateFinalize(dnz,onz)0;_4_ierr = PetscFreeA(2,313,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(dnz),&(onz));do {if (__builtin_expect(!!(_4_ierr), 0)) return PetscError(((MPI_Comm)0x44000001),313,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),313,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
314 | ||||
315 | ptap->destroy = Cmpi->ops->destroy; | |||
316 | ptap->duplicate = Cmpi->ops->duplicate; | |||
317 | Cmpi->ops->matmultnumeric = MatMatMultNumeric_MPIAIJ_MPIAIJ_nonscalable; | |||
318 | Cmpi->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; | |||
319 | Cmpi->ops->freeintermediatedatastructures = MatFreeIntermediateDataStructures_MPIAIJ_AP; | |||
320 | ||||
321 | /* attach the supporting struct to Cmpi for reuse */ | |||
322 | c = (Mat_MPIAIJ*)Cmpi->data; | |||
323 | c->ap = ptap; | |||
324 | ||||
325 | *C = Cmpi; | |||
326 | ||||
327 | /* set MatInfo */ | |||
328 | afill = (PetscReal)api[am]/(adi[am]+aoi[am]+pi_loc[pm]+1) + 1.e-5; | |||
329 | if (afill < 1.0) afill = 1.0; | |||
330 | Cmpi->info.mallocs = nspacedouble; | |||
331 | Cmpi->info.fill_ratio_given = fill; | |||
332 | Cmpi->info.fill_ratio_needed = afill; | |||
333 | ||||
334 | #if defined(PETSC_USE_INFO1) | |||
335 | if (api[am]) { | |||
336 | ierr = PetscInfo3(Cmpi,"Reallocs %D; Fill ratio: given %g needed %g.\n",nspacedouble,(double)fill,(double)afill)PetscInfo_Private(__func__,Cmpi,"Reallocs %D; Fill ratio: given %g needed %g.\n" ,nspacedouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),336,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
337 | ierr = PetscInfo1(Cmpi,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,Cmpi,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),337,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
338 | } else { | |||
339 | ierr = PetscInfo(Cmpi,"Empty matrix product\n")PetscInfo_Private(__func__,Cmpi,"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),339,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
340 | } | |||
341 | #endif | |||
342 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
343 | } | |||
344 | ||||
345 | PETSC_INTERNextern __attribute__((visibility ("hidden"))) PetscErrorCode MatMatMult_MPIAIJ_MPIDense(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) | |||
346 | { | |||
347 | PetscErrorCode ierr; | |||
348 | ||||
349 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 349; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
350 | if (scall == MAT_INITIAL_MATRIX) { | |||
351 | ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLB)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),351,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
352 | ierr = MatMatMultSymbolic_MPIAIJ_MPIDense(A,B,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),352,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
353 | ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultSymbolic].active) ? (*PetscLogPLE)((MAT_MatMultSymbolic ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),353,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
354 | } | |||
355 | ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLB)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),355,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
356 | ierr = MatMatMultNumeric_MPIAIJ_MPIDense(A,B,*C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),356,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
357 | ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_MatMultNumeric].active) ? (*PetscLogPLE)((MAT_MatMultNumeric ),0,(PetscObject)(A),(PetscObject)(B),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),357,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
358 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
359 | } | |||
360 | ||||
361 | typedef struct { | |||
362 | Mat workB; | |||
363 | PetscScalar *rvalues,*svalues; | |||
364 | MPI_Request *rwaits,*swaits; | |||
365 | } MPIAIJ_MPIDense; | |||
366 | ||||
367 | PetscErrorCode MatMPIAIJ_MPIDenseDestroy(void *ctx) | |||
368 | { | |||
369 | MPIAIJ_MPIDense *contents = (MPIAIJ_MPIDense*) ctx; | |||
370 | PetscErrorCode ierr; | |||
371 | ||||
372 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 372; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
373 | ierr = MatDestroy(&contents->workB);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),373,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
374 | ierr = PetscFree4(contents->rvalues,contents->svalues,contents->rwaits,contents->swaits)PetscFreeA(4,374,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(contents->rvalues),&(contents->svalues),& (contents->rwaits),&(contents->swaits));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),374,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
375 | ierr = PetscFree(contents)((*PetscTrFree)((void*)(contents),375,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((contents) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),375,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
376 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
377 | } | |||
378 | ||||
379 | /* | |||
380 | This is a "dummy function" that handles the case where matrix C was created as a dense matrix | |||
381 | directly by the user and passed to MatMatMult() with the MAT_REUSE_MATRIX option | |||
382 | ||||
383 | It is the same as MatMatMultSymbolic_MPIAIJ_MPIDense() except does not create C | |||
384 | */ | |||
385 | PetscErrorCode MatMatMultNumeric_MPIDense(Mat A,Mat B,Mat C) | |||
386 | { | |||
387 | PetscErrorCode ierr; | |||
388 | PetscBool flg; | |||
389 | Mat_MPIAIJ *aij = (Mat_MPIAIJ*) A->data; | |||
390 | PetscInt nz = aij->B->cmap->n,to_n,to_entries,from_n,from_entries; | |||
391 | PetscContainer container; | |||
392 | MPIAIJ_MPIDense *contents; | |||
393 | VecScatter ctx = aij->Mvctx; | |||
394 | ||||
395 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 395; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
396 | ierr = PetscObjectTypeCompare((PetscObject)B,MATMPIDENSE"mpidense",&flg);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),396,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
397 | if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Second matrix must be mpidense")return PetscError(((MPI_Comm)0x44000001),397,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,62,PETSC_ERROR_INITIAL,"Second matrix must be mpidense"); | |||
398 | ||||
399 | /* Handle case where where user provided the final C matrix rather than calling MatMatMult() with MAT_INITIAL_MATRIX*/ | |||
400 | ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ"mpiaij",&flg);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),400,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
401 | if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"First matrix must be MPIAIJ")return PetscError(((MPI_Comm)0x44000001),401,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,62,PETSC_ERROR_INITIAL,"First matrix must be MPIAIJ"); | |||
402 | ||||
403 | C->ops->matmultnumeric = MatMatMultNumeric_MPIAIJ_MPIDense; | |||
404 | ||||
405 | ierr = PetscNew(&contents)PetscMallocA(1,PETSC_TRUE,405,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(1)*sizeof(**((&contents))),((&contents)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),405,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
406 | /* Create work matrix used to store off processor rows of B needed for local product */ | |||
407 | ierr = MatCreateSeqDense(PETSC_COMM_SELF((MPI_Comm)0x44000001),nz,B->cmap->N,NULL((void*)0),&contents->workB);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),407,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
408 | /* Create work arrays needed */ | |||
409 | ierr = VecScatterGetRemoteCount_Private(ctx,PETSC_TRUE/*send*/,&to_n,&to_entries);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),409,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
410 | ierr = VecScatterGetRemoteCount_Private(ctx,PETSC_FALSE/*recv*/,&from_n,&from_entries);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),410,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
411 | ierr = PetscMalloc4(B->cmap->N*from_entries,&contents->rvalues,B->cmap->N*to_entries,&contents->svalues,from_n,&contents->rwaits,to_n,&contents->swaits)PetscMallocA(4,PETSC_FALSE,411,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(B->cmap->N*from_entries)*sizeof(**(&contents ->rvalues)),(&contents->rvalues),(size_t)(B->cmap ->N*to_entries)*sizeof(**(&contents->svalues)),(& contents->svalues),(size_t)(from_n)*sizeof(**(&contents ->rwaits)),(&contents->rwaits),(size_t)(to_n)*sizeof (**(&contents->swaits)),(&contents->swaits));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),411,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
412 | ||||
413 | ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A),&container);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),413,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
414 | ierr = PetscContainerSetPointer(container,contents);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),414,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
415 | ierr = PetscContainerSetUserDestroy(container,MatMPIAIJ_MPIDenseDestroy);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),415,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
416 | ierr = PetscObjectCompose((PetscObject)C,"workB",(PetscObject)container);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),416,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
417 | ierr = PetscContainerDestroy(&container);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),417,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
418 | ||||
419 | ierr = (*C->ops->matmultnumeric)(A,B,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),419,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
420 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
421 | } | |||
422 | ||||
423 | PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C) | |||
424 | { | |||
425 | PetscErrorCode ierr; | |||
426 | Mat_MPIAIJ *aij = (Mat_MPIAIJ*) A->data; | |||
427 | PetscInt nz = aij->B->cmap->n,to_n,to_entries,from_n,from_entries; | |||
428 | PetscContainer container; | |||
429 | MPIAIJ_MPIDense *contents; | |||
430 | VecScatter ctx = aij->Mvctx; | |||
431 | PetscInt m = A->rmap->n,n=B->cmap->n; | |||
432 | ||||
433 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 433; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
434 | ierr = MatCreate(PetscObjectComm((PetscObject)B),C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),434,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
435 | ierr = MatSetSizes(*C,m,n,A->rmap->N,B->cmap->N);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),435,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
436 | ierr = MatSetBlockSizesFromMats(*C,A,B);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),436,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
437 | ierr = MatSetType(*C,MATMPIDENSE"mpidense");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),437,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
438 | ierr = MatMPIDenseSetPreallocation(*C,NULL((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),438,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
439 | ierr = MatAssemblyBegin(*C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),439,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
440 | ierr = MatAssemblyEnd(*C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),440,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
441 | ||||
442 | (*C)->ops->matmultnumeric = MatMatMultNumeric_MPIAIJ_MPIDense; | |||
443 | ||||
444 | ierr = PetscNew(&contents)PetscMallocA(1,PETSC_TRUE,444,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(1)*sizeof(**((&contents))),((&contents)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),444,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
445 | /* Create work matrix used to store off processor rows of B needed for local product */ | |||
446 | ierr = MatCreateSeqDense(PETSC_COMM_SELF((MPI_Comm)0x44000001),nz,B->cmap->N,NULL((void*)0),&contents->workB);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),446,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
447 | /* Create work arrays needed */ | |||
448 | ierr = VecScatterGetRemoteCount_Private(ctx,PETSC_TRUE/*send*/,&to_n,&to_entries);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),448,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
449 | ierr = VecScatterGetRemoteCount_Private(ctx,PETSC_FALSE/*recv*/,&from_n,&from_entries);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),449,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
450 | ierr = PetscMalloc4(B->cmap->N*from_entries,&contents->rvalues,B->cmap->N*to_entries,&contents->svalues,from_n,&contents->rwaits,to_n,&contents->swaits)PetscMallocA(4,PETSC_FALSE,450,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(B->cmap->N*from_entries)*sizeof(**(&contents ->rvalues)),(&contents->rvalues),(size_t)(B->cmap ->N*to_entries)*sizeof(**(&contents->svalues)),(& contents->svalues),(size_t)(from_n)*sizeof(**(&contents ->rwaits)),(&contents->rwaits),(size_t)(to_n)*sizeof (**(&contents->swaits)),(&contents->swaits));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),450,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
451 | ||||
452 | ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A),&container);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),452,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
453 | ierr = PetscContainerSetPointer(container,contents);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),453,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
454 | ierr = PetscContainerSetUserDestroy(container,MatMPIAIJ_MPIDenseDestroy);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),454,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
455 | ierr = PetscObjectCompose((PetscObject)(*C),"workB",(PetscObject)container);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),455,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
456 | ierr = PetscContainerDestroy(&container);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),456,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
457 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
458 | } | |||
459 | ||||
460 | /* | |||
461 | Performs an efficient scatter on the rows of B needed by this process; this is | |||
462 | a modification of the VecScatterBegin_() routines. | |||
463 | */ | |||
464 | PetscErrorCode MatMPIDenseScatter(Mat A,Mat B,Mat C,Mat *outworkB) | |||
465 | { | |||
466 | Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; | |||
467 | PetscErrorCode ierr; | |||
468 | const PetscScalar *b; | |||
469 | PetscScalar *w,*svalues,*rvalues; | |||
470 | VecScatter ctx = aij->Mvctx; | |||
471 | PetscInt i,j,k; | |||
472 | const PetscInt *sindices,*sstarts,*rindices,*rstarts; | |||
473 | const PetscMPIInt *sprocs,*rprocs; | |||
474 | PetscInt nsends,nrecvs,nrecvs2; | |||
475 | MPI_Request *swaits,*rwaits; | |||
476 | MPI_Comm comm; | |||
477 | PetscMPIInt tag = ((PetscObject)ctx)->tag,ncols = B->cmap->N, nrows = aij->B->cmap->n,imdex,nrowsB = B->rmap->n,nsends_mpi,nrecvs_mpi; | |||
478 | MPI_Status status; | |||
479 | MPIAIJ_MPIDense *contents; | |||
480 | PetscContainer container; | |||
481 | Mat workB; | |||
482 | ||||
483 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 483; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
484 | ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),484,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
485 | ierr = PetscObjectQuery((PetscObject)C,"workB",(PetscObject*)&container);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),485,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
486 | if (!container) SETERRQ(comm,PETSC_ERR_PLIB,"Container does not exist")return PetscError(comm,486,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,77,PETSC_ERROR_INITIAL,"Container does not exist"); | |||
487 | ierr = PetscContainerGetPointer(container,(void**)&contents);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),487,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
488 | ||||
489 | workB = *outworkB = contents->workB; | |||
490 | if (nrows != workB->rmap->n) SETERRQ2(comm,PETSC_ERR_PLIB,"Number of rows of workB %D not equal to columns of aij->B %D",nrows,workB->cmap->n)return PetscError(comm,490,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,77,PETSC_ERROR_INITIAL,"Number of rows of workB %D not equal to columns of aij->B %D" ,nrows,workB->cmap->n); | |||
491 | ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&sindices,&sprocs,NULL((void*)0)/*bs*/);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),491,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
492 | ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,&rindices,&rprocs,NULL((void*)0)/*bs*/);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),492,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
493 | ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),493,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
494 | ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),494,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
495 | svalues = contents->svalues; | |||
496 | rvalues = contents->rvalues; | |||
497 | swaits = contents->swaits; | |||
498 | rwaits = contents->rwaits; | |||
499 | ||||
500 | ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),500,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
501 | ierr = MatDenseGetArray(workB,&w);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),501,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
502 | ||||
503 | for (i=0; i<nrecvs; i++) { | |||
504 | ierr = MPI_Irecv(rvalues+ncols*(rstarts[i]-rstarts[0]),ncols*(rstarts[i+1]-rstarts[i]),MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i)((petsc_irecv_ct++,0) || PetscMPITypeSize((ncols*(rstarts[i+1 ]-rstarts[i])),(((MPI_Datatype)0x4c00080b)),&(petsc_irecv_len )) || MPI_Irecv((rvalues+ncols*(rstarts[i]-rstarts[0])),(ncols *(rstarts[i+1]-rstarts[i])),(((MPI_Datatype)0x4c00080b)),(rprocs [i]),(tag),(comm),(rwaits+i)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),504,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
505 | } | |||
506 | ||||
507 | for (i=0; i<nsends; i++) { | |||
508 | /* pack a message at a time */ | |||
509 | for (j=0; j<sstarts[i+1]-sstarts[i]; j++) { | |||
510 | for (k=0; k<ncols; k++) { | |||
511 | svalues[ncols*(sstarts[i]-sstarts[0]+j) + k] = b[sindices[sstarts[i]+j] + nrowsB*k]; | |||
512 | } | |||
513 | } | |||
514 | ierr = MPI_Isend(svalues+ncols*(sstarts[i]-sstarts[0]),ncols*(sstarts[i+1]-sstarts[i]),MPIU_SCALAR,sprocs[i],tag,comm,swaits+i)((petsc_isend_ct++,0) || PetscMPITypeSize((ncols*(sstarts[i+1 ]-sstarts[i])),(((MPI_Datatype)0x4c00080b)),&(petsc_isend_len )) || MPI_Isend((svalues+ncols*(sstarts[i]-sstarts[0])),(ncols *(sstarts[i+1]-sstarts[i])),(((MPI_Datatype)0x4c00080b)),(sprocs [i]),(tag),(comm),(swaits+i)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),514,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
515 | } | |||
516 | ||||
517 | nrecvs2 = nrecvs; | |||
518 | while (nrecvs2) { | |||
519 | ierr = MPI_Waitany(nrecvs_mpi,rwaits,&imdex,&status)((petsc_wait_any_ct++,petsc_sum_of_waits_ct++,0) || MPI_Waitany ((nrecvs_mpi),(rwaits),(&imdex),(&status)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),519,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
520 | nrecvs2--; | |||
521 | /* unpack a message at a time */ | |||
522 | for (j=0; j<rstarts[imdex+1]-rstarts[imdex]; j++) { | |||
523 | for (k=0; k<ncols; k++) { | |||
524 | w[rindices[rstarts[imdex]+j] + nrows*k] = rvalues[ncols*(rstarts[imdex]-rstarts[0]+j) + k]; | |||
525 | } | |||
526 | } | |||
527 | } | |||
528 | if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE)((petsc_wait_all_ct++,petsc_sum_of_waits_ct += (PetscLogDouble ) (nsends_mpi),0) || MPI_Waitall((nsends_mpi),(swaits),((MPI_Status *)1)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),528,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
529 | ||||
530 | ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&sindices,&sprocs,NULL((void*)0)/*bs*/);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),530,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
531 | ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,&rindices,&rprocs,NULL((void*)0)/*bs*/);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),531,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
532 | ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),532,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
533 | ierr = MatDenseRestoreArray(workB,&w);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),533,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
534 | ierr = MatAssemblyBegin(workB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),534,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
535 | ierr = MatAssemblyEnd(workB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),535,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
536 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
537 | ||||
538 | } | |||
539 | extern PetscErrorCode MatMatMultNumericAdd_SeqAIJ_SeqDense(Mat,Mat,Mat); | |||
540 | ||||
541 | PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIDense(Mat A,Mat B,Mat C) | |||
542 | { | |||
543 | PetscErrorCode ierr; | |||
544 | Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; | |||
545 | Mat_MPIDense *bdense = (Mat_MPIDense*)B->data; | |||
546 | Mat_MPIDense *cdense = (Mat_MPIDense*)C->data; | |||
547 | Mat workB; | |||
548 | ||||
549 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 549; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
550 | /* diagonal block of A times all local rows of B*/ | |||
551 | ierr = MatMatMultNumeric_SeqAIJ_SeqDense(aij->A,bdense->A,cdense->A);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),551,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
552 | ||||
553 | /* get off processor parts of B needed to complete the product */ | |||
554 | ierr = MatMPIDenseScatter(A,B,C,&workB);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),554,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
555 | ||||
556 | /* off-diagonal block of A times nonlocal rows of B */ | |||
557 | ierr = MatMatMultNumericAdd_SeqAIJ_SeqDense(aij->B,workB,cdense->A);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),557,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
558 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),558,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
559 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),559,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
560 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
561 | } | |||
562 | ||||
563 | PetscErrorCode MatMatMultNumeric_MPIAIJ_MPIAIJ(Mat A,Mat P,Mat C) | |||
564 | { | |||
565 | PetscErrorCode ierr; | |||
566 | Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data,*c=(Mat_MPIAIJ*)C->data; | |||
567 | Mat_SeqAIJ *ad = (Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data; | |||
568 | Mat_SeqAIJ *cd = (Mat_SeqAIJ*)(c->A)->data,*co=(Mat_SeqAIJ*)(c->B)->data; | |||
569 | PetscInt *adi = ad->i,*adj,*aoi=ao->i,*aoj; | |||
570 | PetscScalar *ada,*aoa,*cda=cd->a,*coa=co->a; | |||
571 | Mat_SeqAIJ *p_loc,*p_oth; | |||
572 | PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*pj; | |||
573 | PetscScalar *pa_loc,*pa_oth,*pa,valtmp,*ca; | |||
574 | PetscInt cm = C->rmap->n,anz,pnz; | |||
575 | Mat_APMPI *ptap = c->ap; | |||
576 | PetscScalar *apa_sparse; | |||
577 | PetscInt *api,*apj,*apJ,i,j,k,row; | |||
578 | PetscInt cstart = C->cmap->rstart; | |||
579 | PetscInt cdnz,conz,k0,k1,nextp; | |||
580 | MPI_Comm comm; | |||
581 | PetscMPIInt size; | |||
582 | ||||
583 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 583; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
584 | ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),584,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
585 | ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),585,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
586 | ||||
587 | if (!ptap->P_oth && size>1) { | |||
| ||||
588 | SETERRQ(comm,PETSC_ERR_ARG_WRONGSTATE,"AP cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'")return PetscError(comm,588,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,73,PETSC_ERROR_INITIAL,"AP cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'" ); | |||
589 | } | |||
590 | apa_sparse = ptap->apa; | |||
591 | ||||
592 | /* 1) get P_oth = ptap->P_oth and P_loc = ptap->P_loc */ | |||
593 | /*-----------------------------------------------------*/ | |||
594 | /* update numerical values of P_oth and P_loc */ | |||
595 | ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_REUSE_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),595,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
596 | ierr = MatMPIAIJGetLocalMat(P,MAT_REUSE_MATRIX,&ptap->P_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),596,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
597 | ||||
598 | /* 2) compute numeric C_loc = A_loc*P = Ad*P_loc + Ao*P_oth */ | |||
599 | /*----------------------------------------------------------*/ | |||
600 | /* get data from symbolic products */ | |||
601 | p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; | |||
602 | pi_loc = p_loc->i; pj_loc = p_loc->j; pa_loc = p_loc->a; | |||
603 | if (size >1) { | |||
604 | p_oth = (Mat_SeqAIJ*)(ptap->P_oth)->data; | |||
605 | pi_oth = p_oth->i; pj_oth = p_oth->j; pa_oth = p_oth->a; | |||
606 | } else { | |||
607 | p_oth = NULL((void*)0); pi_oth = NULL((void*)0); pj_oth = NULL((void*)0); pa_oth = NULL((void*)0); | |||
608 | } | |||
609 | ||||
610 | api = ptap->api; | |||
611 | apj = ptap->apj; | |||
612 | for (i=0; i<cm; i++) { | |||
613 | apJ = apj + api[i]; | |||
614 | ||||
615 | /* diagonal portion of A */ | |||
616 | anz = adi[i+1] - adi[i]; | |||
617 | adj = ad->j + adi[i]; | |||
618 | ada = ad->a + adi[i]; | |||
619 | for (j=0; j<anz; j++) { | |||
620 | row = adj[j]; | |||
621 | pnz = pi_loc[row+1] - pi_loc[row]; | |||
622 | pj = pj_loc + pi_loc[row]; | |||
623 | pa = pa_loc + pi_loc[row]; | |||
624 | /* perform sparse axpy */ | |||
625 | valtmp = ada[j]; | |||
626 | nextp = 0; | |||
627 | for (k=0; nextp<pnz; k++) { | |||
628 | if (apJ[k] == pj[nextp]) { /* column of AP == column of P */ | |||
629 | apa_sparse[k] += valtmp*pa[nextp++]; | |||
630 | } | |||
631 | } | |||
632 | ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),632,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
633 | } | |||
634 | ||||
635 | /* off-diagonal portion of A */ | |||
636 | anz = aoi[i+1] - aoi[i]; | |||
637 | aoj = ao->j + aoi[i]; | |||
638 | aoa = ao->a + aoi[i]; | |||
639 | for (j=0; j<anz; j++) { | |||
640 | row = aoj[j]; | |||
641 | pnz = pi_oth[row+1] - pi_oth[row]; | |||
| ||||
642 | pj = pj_oth + pi_oth[row]; | |||
643 | pa = pa_oth + pi_oth[row]; | |||
644 | /* perform sparse axpy */ | |||
645 | valtmp = aoa[j]; | |||
646 | nextp = 0; | |||
647 | for (k=0; nextp<pnz; k++) { | |||
648 | if (apJ[k] == pj[nextp]) { /* column of AP == column of P */ | |||
649 | apa_sparse[k] += valtmp*pa[nextp++]; | |||
650 | } | |||
651 | } | |||
652 | ierr = PetscLogFlops(2.0*pnz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),652,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
653 | } | |||
654 | ||||
655 | /* set values in C */ | |||
656 | cdnz = cd->i[i+1] - cd->i[i]; | |||
657 | conz = co->i[i+1] - co->i[i]; | |||
658 | ||||
659 | /* 1st off-diagoanl part of C */ | |||
660 | ca = coa + co->i[i]; | |||
661 | k = 0; | |||
662 | for (k0=0; k0<conz; k0++) { | |||
663 | if (apJ[k] >= cstart) break; | |||
664 | ca[k0] = apa_sparse[k]; | |||
665 | apa_sparse[k] = 0.0; | |||
666 | k++; | |||
667 | } | |||
668 | ||||
669 | /* diagonal part of C */ | |||
670 | ca = cda + cd->i[i]; | |||
671 | for (k1=0; k1<cdnz; k1++) { | |||
672 | ca[k1] = apa_sparse[k]; | |||
673 | apa_sparse[k] = 0.0; | |||
674 | k++; | |||
675 | } | |||
676 | ||||
677 | /* 2nd off-diagoanl part of C */ | |||
678 | ca = coa + co->i[i]; | |||
679 | for (; k0<conz; k0++) { | |||
680 | ca[k0] = apa_sparse[k]; | |||
681 | apa_sparse[k] = 0.0; | |||
682 | k++; | |||
683 | } | |||
684 | } | |||
685 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),685,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
686 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),686,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
687 | ||||
688 | if (ptap->freestruct) { | |||
689 | ierr = MatFreeIntermediateDataStructures(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),689,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
690 | } | |||
691 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
692 | } | |||
693 | ||||
694 | /* same as MatMatMultSymbolic_MPIAIJ_MPIAIJ_nonscalable(), except using LLCondensed to avoid O(BN) memory requirement */ | |||
695 | PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ(Mat A,Mat P,PetscReal fill,Mat *C) | |||
696 | { | |||
697 | PetscErrorCode ierr; | |||
698 | MPI_Comm comm; | |||
699 | PetscMPIInt size; | |||
700 | Mat Cmpi; | |||
701 | Mat_APMPI *ptap; | |||
702 | PetscFreeSpaceList free_space = NULL((void*)0),current_space=NULL((void*)0); | |||
703 | Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data,*c; | |||
704 | Mat_SeqAIJ *ad = (Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*p_loc,*p_oth; | |||
705 | PetscInt *pi_loc,*pj_loc,*pi_oth,*pj_oth,*dnz,*onz; | |||
706 | PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,*aoj=ao->j,rstart=A->rmap->rstart; | |||
707 | PetscInt i,pnz,row,*api,*apj,*Jptr,apnz,nspacedouble=0,j,nzi,*lnk,apnz_max=0; | |||
708 | PetscInt am=A->rmap->n,pn=P->cmap->n,pm=P->rmap->n,lsize=pn+20; | |||
709 | PetscReal afill; | |||
710 | MatType mtype; | |||
711 | ||||
712 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 712; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
713 | ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),713,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
714 | ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),714,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
715 | ||||
716 | /* create struct Mat_APMPI and attached it to C later */ | |||
717 | ierr = PetscNew(&ptap)PetscMallocA(1,PETSC_TRUE,717,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(1)*sizeof(**((&ptap))),((&ptap)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),717,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
718 | ||||
719 | /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ | |||
720 | ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),720,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
721 | ||||
722 | /* get P_loc by taking all local rows of P */ | |||
723 | ierr = MatMPIAIJGetLocalMat(P,MAT_INITIAL_MATRIX,&ptap->P_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),723,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
724 | ||||
725 | p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; | |||
726 | pi_loc = p_loc->i; pj_loc = p_loc->j; | |||
727 | if (size > 1) { | |||
728 | p_oth = (Mat_SeqAIJ*)(ptap->P_oth)->data; | |||
729 | pi_oth = p_oth->i; pj_oth = p_oth->j; | |||
730 | } else { | |||
731 | p_oth = NULL((void*)0); | |||
732 | pi_oth = NULL((void*)0); pj_oth = NULL((void*)0); | |||
733 | } | |||
734 | ||||
735 | /* first, compute symbolic AP = A_loc*P = A_diag*P_loc + A_off*P_oth */ | |||
736 | /*-------------------------------------------------------------------*/ | |||
737 | ierr = PetscMalloc1(am+2,&api)PetscMallocA(1,PETSC_FALSE,737,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(am+2)*sizeof(**(&api)),(&api));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),737,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
738 | ptap->api = api; | |||
739 | api[0] = 0; | |||
740 | ||||
741 | ierr = PetscLLCondensedCreate_Scalable(lsize,&lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),741,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
742 | ||||
743 | /* Initial FreeSpace size is fill*(nnz(A)+nnz(P)) */ | |||
744 | ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(adi[am],PetscIntSumTruncate(aoi[am],pi_loc[pm]))),&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),744,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
745 | current_space = free_space; | |||
746 | ierr = MatPreallocateInitialize(comm,am,pn,dnz,onz)0; do { PetscErrorCode _4_ierr; PetscInt __nrows = (am),__ncols = (pn),__rstart,__start,__end; _4_ierr = PetscMallocA(2,PETSC_TRUE ,746,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)((size_t)__nrows)*sizeof(**(&dnz)),(&dnz),(size_t )((size_t)__nrows)*sizeof(**(&onz)),(&onz));do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),746 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = 0; __end = __start; _4_ierr = MPI_Scan(&__ncols,&__end,1,((MPI_Datatype )0x4c000405),(MPI_Op)(0x58000003),comm);do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),746 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = __end - __ncols; _4_ierr = MPI_Scan(&__nrows,&__rstart,1,( (MPI_Datatype)0x4c000405),(MPI_Op)(0x58000003),comm);do {if ( __builtin_expect(!!(_4_ierr),0)) return PetscError(((MPI_Comm )0x44000001),746,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __rstart = __rstart - __nrows; do { } while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),746,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
747 | for (i=0; i<am; i++) { | |||
748 | /* diagonal portion of A */ | |||
749 | nzi = adi[i+1] - adi[i]; | |||
750 | for (j=0; j<nzi; j++) { | |||
751 | row = *adj++; | |||
752 | pnz = pi_loc[row+1] - pi_loc[row]; | |||
753 | Jptr = pj_loc + pi_loc[row]; | |||
754 | /* Expand list if it is not long enough */ | |||
755 | if (pnz+apnz_max > lsize) { | |||
756 | lsize = pnz+apnz_max; | |||
757 | ierr = PetscLLCondensedExpand_Scalable(lsize, &lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),757,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
758 | } | |||
759 | /* add non-zero cols of P into the sorted linked list lnk */ | |||
760 | ierr = PetscLLCondensedAddSorted_Scalable(pnz,Jptr,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),760,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
761 | apnz = *lnk; /* The first element in the list is the number of items in the list */ | |||
762 | api[i+1] = api[i] + apnz; | |||
763 | if (apnz > apnz_max) apnz_max = apnz; | |||
764 | } | |||
765 | /* off-diagonal portion of A */ | |||
766 | nzi = aoi[i+1] - aoi[i]; | |||
767 | for (j=0; j<nzi; j++) { | |||
768 | row = *aoj++; | |||
769 | pnz = pi_oth[row+1] - pi_oth[row]; | |||
770 | Jptr = pj_oth + pi_oth[row]; | |||
771 | /* Expand list if it is not long enough */ | |||
772 | if (pnz+apnz_max > lsize) { | |||
773 | lsize = pnz + apnz_max; | |||
774 | ierr = PetscLLCondensedExpand_Scalable(lsize, &lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),774,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
775 | } | |||
776 | /* add non-zero cols of P into the sorted linked list lnk */ | |||
777 | ierr = PetscLLCondensedAddSorted_Scalable(pnz,Jptr,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),777,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
778 | apnz = *lnk; /* The first element in the list is the number of items in the list */ | |||
779 | api[i+1] = api[i] + apnz; | |||
780 | if (apnz > apnz_max) apnz_max = apnz; | |||
781 | } | |||
782 | apnz = *lnk; | |||
783 | api[i+1] = api[i] + apnz; | |||
784 | if (apnz > apnz_max) apnz_max = apnz; | |||
785 | ||||
786 | /* if free space is not available, double the total space in the list */ | |||
787 | if (current_space->local_remaining<apnz) { | |||
788 | ierr = PetscFreeSpaceGet(PetscIntSumTruncate(apnz,current_space->total_array_size),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),788,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
789 | nspacedouble++; | |||
790 | } | |||
791 | ||||
792 | /* Copy data into free space, then initialize lnk */ | |||
793 | ierr = PetscLLCondensedClean_Scalable(apnz,current_space->array,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),793,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
794 | ierr = MatPreallocateSet(i+rstart,apnz,current_space->array,dnz,onz)0;do { PetscInt __i; if (i+rstart < __rstart) return PetscError (((MPI_Comm)0x44000001),794,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D less than first local row %D" ,i+rstart,__rstart); if (i+rstart >= __rstart+__nrows) return PetscError(((MPI_Comm)0x44000001),794,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D greater than last local row %D" ,i+rstart,__rstart+__nrows-1); for (__i=0; __i<apnz; __i++ ) { if ((current_space->array)[__i] < __start || (current_space ->array)[__i] >= __end) onz[i+rstart - __rstart]++; else if (dnz[i+rstart - __rstart] < __ncols) dnz[i+rstart - __rstart ]++; }} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),794,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
795 | ||||
796 | current_space->array += apnz; | |||
797 | current_space->local_used += apnz; | |||
798 | current_space->local_remaining -= apnz; | |||
799 | } | |||
800 | ||||
801 | /* Allocate space for apj, initialize apj, and */ | |||
802 | /* destroy list of free space and other temporary array(s) */ | |||
803 | ierr = PetscMalloc1(api[am]+1,&ptap->apj)PetscMallocA(1,PETSC_FALSE,803,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(api[am]+1)*sizeof(**(&ptap->apj)),(&ptap ->apj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),803,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
804 | apj = ptap->apj; | |||
805 | ierr = PetscFreeSpaceContiguous(&free_space,ptap->apj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),805,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
806 | ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),806,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
807 | ||||
808 | /* create and assemble symbolic parallel matrix Cmpi */ | |||
809 | /*----------------------------------------------------*/ | |||
810 | ierr = MatCreate(comm,&Cmpi);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),810,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
811 | ierr = MatSetSizes(Cmpi,am,pn,PETSC_DETERMINE-1,PETSC_DETERMINE-1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),811,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
812 | ierr = MatSetBlockSizesFromMats(Cmpi,A,P);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),812,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
813 | ierr = MatGetType(A,&mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),813,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
814 | ierr = MatSetType(Cmpi,mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),814,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
815 | ierr = MatMPIAIJSetPreallocation(Cmpi,0,dnz,0,onz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),815,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
816 | ||||
817 | /* malloc apa for assembly Cmpi */ | |||
818 | ierr = PetscCalloc1(apnz_max,&ptap->apa)PetscMallocA(1,PETSC_TRUE,818,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(apnz_max)*sizeof(**(&ptap->apa)),(&ptap-> apa));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),818,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
819 | ||||
820 | ierr = MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Cmpi, apj, api);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),820,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
821 | ierr = MatAssemblyBegin(Cmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),821,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
822 | ierr = MatAssemblyEnd(Cmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),822,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
823 | ierr = MatPreallocateFinalize(dnz,onz)0;_4_ierr = PetscFreeA(2,823,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(dnz),&(onz));do {if (__builtin_expect(!!(_4_ierr), 0)) return PetscError(((MPI_Comm)0x44000001),823,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),823,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
824 | ||||
825 | ptap->destroy = Cmpi->ops->destroy; | |||
826 | ptap->duplicate = Cmpi->ops->duplicate; | |||
827 | Cmpi->ops->matmultnumeric = MatMatMultNumeric_MPIAIJ_MPIAIJ; | |||
828 | Cmpi->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; | |||
829 | Cmpi->ops->freeintermediatedatastructures = MatFreeIntermediateDataStructures_MPIAIJ_AP; | |||
830 | ||||
831 | /* attach the supporting struct to Cmpi for reuse */ | |||
832 | c = (Mat_MPIAIJ*)Cmpi->data; | |||
833 | c->ap = ptap; | |||
834 | *C = Cmpi; | |||
835 | ||||
836 | /* set MatInfo */ | |||
837 | afill = (PetscReal)api[am]/(adi[am]+aoi[am]+pi_loc[pm]+1) + 1.e-5; | |||
838 | if (afill < 1.0) afill = 1.0; | |||
839 | Cmpi->info.mallocs = nspacedouble; | |||
840 | Cmpi->info.fill_ratio_given = fill; | |||
841 | Cmpi->info.fill_ratio_needed = afill; | |||
842 | ||||
843 | #if defined(PETSC_USE_INFO1) | |||
844 | if (api[am]) { | |||
845 | ierr = PetscInfo3(Cmpi,"Reallocs %D; Fill ratio: given %g needed %g.\n",nspacedouble,(double)fill,(double)afill)PetscInfo_Private(__func__,Cmpi,"Reallocs %D; Fill ratio: given %g needed %g.\n" ,nspacedouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),845,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
846 | ierr = PetscInfo1(Cmpi,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,Cmpi,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),846,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
847 | } else { | |||
848 | ierr = PetscInfo(Cmpi,"Empty matrix product\n")PetscInfo_Private(__func__,Cmpi,"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),848,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
849 | } | |||
850 | #endif | |||
851 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
852 | } | |||
853 | ||||
854 | /* This function is needed for the seqMPI matrix-matrix multiplication. */ | |||
855 | /* Three input arrays are merged to one output array. The size of the */ | |||
856 | /* output array is also output. Duplicate entries only show up once. */ | |||
857 | static void Merge3SortedArrays(PetscInt size1, PetscInt *in1, | |||
858 | PetscInt size2, PetscInt *in2, | |||
859 | PetscInt size3, PetscInt *in3, | |||
860 | PetscInt *size4, PetscInt *out) | |||
861 | { | |||
862 | int i = 0, j = 0, k = 0, l = 0; | |||
863 | ||||
864 | /* Traverse all three arrays */ | |||
865 | while (i<size1 && j<size2 && k<size3) { | |||
866 | if (in1[i] < in2[j] && in1[i] < in3[k]) { | |||
867 | out[l++] = in1[i++]; | |||
868 | } | |||
869 | else if(in2[j] < in1[i] && in2[j] < in3[k]) { | |||
870 | out[l++] = in2[j++]; | |||
871 | } | |||
872 | else if(in3[k] < in1[i] && in3[k] < in2[j]) { | |||
873 | out[l++] = in3[k++]; | |||
874 | } | |||
875 | else if(in1[i] == in2[j] && in1[i] < in3[k]) { | |||
876 | out[l++] = in1[i]; | |||
877 | i++, j++; | |||
878 | } | |||
879 | else if(in1[i] == in3[k] && in1[i] < in2[j]) { | |||
880 | out[l++] = in1[i]; | |||
881 | i++, k++; | |||
882 | } | |||
883 | else if(in3[k] == in2[j] && in2[j] < in1[i]) { | |||
884 | out[l++] = in2[j]; | |||
885 | k++, j++; | |||
886 | } | |||
887 | else if(in1[i] == in2[j] && in1[i] == in3[k]) { | |||
888 | out[l++] = in1[i]; | |||
889 | i++, j++, k++; | |||
890 | } | |||
891 | } | |||
892 | ||||
893 | /* Traverse two remaining arrays */ | |||
894 | while (i<size1 && j<size2) { | |||
895 | if (in1[i] < in2[j]) { | |||
896 | out[l++] = in1[i++]; | |||
897 | } | |||
898 | else if(in1[i] > in2[j]) { | |||
899 | out[l++] = in2[j++]; | |||
900 | } | |||
901 | else { | |||
902 | out[l++] = in1[i]; | |||
903 | i++, j++; | |||
904 | } | |||
905 | } | |||
906 | ||||
907 | while (i<size1 && k<size3) { | |||
908 | if (in1[i] < in3[k]) { | |||
909 | out[l++] = in1[i++]; | |||
910 | } | |||
911 | else if(in1[i] > in3[k]) { | |||
912 | out[l++] = in3[k++]; | |||
913 | } | |||
914 | else { | |||
915 | out[l++] = in1[i]; | |||
916 | i++, k++; | |||
917 | } | |||
918 | } | |||
919 | ||||
920 | while (k<size3 && j<size2) { | |||
921 | if (in3[k] < in2[j]) { | |||
922 | out[l++] = in3[k++]; | |||
923 | } | |||
924 | else if(in3[k] > in2[j]) { | |||
925 | out[l++] = in2[j++]; | |||
926 | } | |||
927 | else { | |||
928 | out[l++] = in3[k]; | |||
929 | k++, j++; | |||
930 | } | |||
931 | } | |||
932 | ||||
933 | /* Traverse one remaining array */ | |||
934 | while (i<size1) out[l++] = in1[i++]; | |||
935 | while (j<size2) out[l++] = in2[j++]; | |||
936 | while (k<size3) out[l++] = in3[k++]; | |||
937 | ||||
938 | *size4 = l; | |||
939 | } | |||
940 | ||||
941 | /* This matrix-matrix multiplication algorithm divides the multiplication into three multiplications and */ | |||
942 | /* adds up the products. Two of these three multiplications are performed with existing (sequential) */ | |||
943 | /* matrix-matrix multiplications. */ | |||
944 | PetscErrorCode MatMatMultSymbolic_MPIAIJ_MPIAIJ_seqMPI(Mat A, Mat P, PetscReal fill, Mat *C) | |||
945 | { | |||
946 | PetscErrorCode ierr; | |||
947 | MPI_Comm comm; | |||
948 | PetscMPIInt size; | |||
949 | Mat Cmpi; | |||
950 | Mat_APMPI *ptap; | |||
951 | PetscFreeSpaceList free_space_diag=NULL((void*)0), current_space=NULL((void*)0); | |||
952 | Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data; | |||
953 | Mat_SeqAIJ *ad =(Mat_SeqAIJ*)(a->A)->data,*ao=(Mat_SeqAIJ*)(a->B)->data,*p_loc; | |||
954 | Mat_MPIAIJ *p =(Mat_MPIAIJ*)P->data; | |||
955 | Mat_MPIAIJ *c; | |||
956 | Mat_SeqAIJ *adpd_seq, *p_off, *aopoth_seq; | |||
957 | PetscInt adponz, adpdnz; | |||
958 | PetscInt *pi_loc,*dnz,*onz; | |||
959 | PetscInt *adi=ad->i,*adj=ad->j,*aoi=ao->i,rstart=A->rmap->rstart; | |||
960 | PetscInt *lnk,i, i1=0,pnz,row,*adpoi,*adpoj, *api, *adpoJ, *aopJ, *apJ,*Jptr, aopnz, nspacedouble=0,j,nzi, | |||
961 | *apj,apnz, *adpdi, *adpdj, *adpdJ, *poff_i, *poff_j, *j_temp, *aopothi, *aopothj; | |||
962 | PetscInt am=A->rmap->n,pN=P->cmap->N,pn=P->cmap->n,pm=P->rmap->n, p_colstart, p_colend; | |||
963 | PetscBT lnkbt; | |||
964 | PetscReal afill; | |||
965 | PetscMPIInt rank; | |||
966 | Mat adpd, aopoth; | |||
967 | MatType mtype; | |||
968 | const char *prefix; | |||
969 | ||||
970 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 970; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
971 | ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),971,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
972 | ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),972,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
973 | ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),973,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
974 | ierr = MatGetOwnershipRangeColumn(P, &p_colstart, &p_colend); CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),974,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
975 | ||||
976 | /* create struct Mat_APMPI and attached it to C later */ | |||
977 | ierr = PetscNew(&ptap)PetscMallocA(1,PETSC_TRUE,977,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(1)*sizeof(**((&ptap))),((&ptap)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),977,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
978 | ||||
979 | /* get P_oth by taking rows of P (= non-zero cols of local A) from other processors */ | |||
980 | ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&ptap->startsj_s,&ptap->startsj_r,&ptap->bufa,&ptap->P_oth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),980,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
981 | ||||
982 | /* get P_loc by taking all local rows of P */ | |||
983 | ierr = MatMPIAIJGetLocalMat(P,MAT_INITIAL_MATRIX,&ptap->P_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),983,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
984 | ||||
985 | ||||
986 | p_loc = (Mat_SeqAIJ*)(ptap->P_loc)->data; | |||
987 | pi_loc = p_loc->i; | |||
988 | ||||
989 | /* Allocate memory for the i arrays of the matrices A*P, A_diag*P_off and A_offd * P */ | |||
990 | ierr = PetscMalloc1(am+2,&api)PetscMallocA(1,PETSC_FALSE,990,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(am+2)*sizeof(**(&api)),(&api));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),990,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
991 | ierr = PetscMalloc1(am+2,&adpoi)PetscMallocA(1,PETSC_FALSE,991,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(am+2)*sizeof(**(&adpoi)),(&adpoi));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),991,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
992 | ||||
993 | adpoi[0] = 0; | |||
994 | ptap->api = api; | |||
995 | api[0] = 0; | |||
996 | ||||
997 | /* create and initialize a linked list, will be used for both A_diag * P_loc_off and A_offd * P_oth */ | |||
998 | ierr = PetscLLCondensedCreate(pN,pN,&lnk,&lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),998,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
999 | ierr = MatPreallocateInitialize(comm,am,pn,dnz,onz)0; do { PetscErrorCode _4_ierr; PetscInt __nrows = (am),__ncols = (pn),__rstart,__start,__end; _4_ierr = PetscMallocA(2,PETSC_TRUE ,999,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)((size_t)__nrows)*sizeof(**(&dnz)),(&dnz),(size_t )((size_t)__nrows)*sizeof(**(&onz)),(&onz));do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),999 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = 0; __end = __start; _4_ierr = MPI_Scan(&__ncols,&__end,1,((MPI_Datatype )0x4c000405),(MPI_Op)(0x58000003),comm);do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),999 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = __end - __ncols; _4_ierr = MPI_Scan(&__nrows,&__rstart,1,( (MPI_Datatype)0x4c000405),(MPI_Op)(0x58000003),comm);do {if ( __builtin_expect(!!(_4_ierr),0)) return PetscError(((MPI_Comm )0x44000001),999,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __rstart = __rstart - __nrows; do { } while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),999,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1000 | ||||
1001 | /* Symbolic calc of A_loc_diag * P_loc_diag */ | |||
1002 | ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1002,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1003 | ierr = MatSetOptionsPrefix(a->A,prefix);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1003,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1004 | ierr = MatAppendOptionsPrefix(a->A,"inner_diag_");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1004,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1005 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(a->A, p->A, fill, &adpd);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1005,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1006 | adpd_seq = (Mat_SeqAIJ*)((adpd)->data); | |||
1007 | adpdi = adpd_seq->i; adpdj = adpd_seq->j; | |||
1008 | p_off = (Mat_SeqAIJ*)((p->B)->data); | |||
1009 | poff_i = p_off->i; poff_j = p_off->j; | |||
1010 | ||||
1011 | /* j_temp stores indices of a result row before they are added to the linked list */ | |||
1012 | ierr = PetscMalloc1(pN+2,&j_temp)PetscMallocA(1,PETSC_FALSE,1012,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(pN+2)*sizeof(**(&j_temp)),(&j_temp));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1012,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1013 | ||||
1014 | ||||
1015 | /* Symbolic calc of the A_diag * p_loc_off */ | |||
1016 | /* Initial FreeSpace size is fill*(nnz(A)+nnz(P)) */ | |||
1017 | ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,PetscIntSumTruncate(adi[am],PetscIntSumTruncate(aoi[am],pi_loc[pm]))),&free_space_diag);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1017,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1018 | current_space = free_space_diag; | |||
1019 | ||||
1020 | for (i=0; i<am; i++) { | |||
1021 | /* A_diag * P_loc_off */ | |||
1022 | nzi = adi[i+1] - adi[i]; | |||
1023 | for (j=0; j<nzi; j++) { | |||
1024 | row = *adj++; | |||
1025 | pnz = poff_i[row+1] - poff_i[row]; | |||
1026 | Jptr = poff_j + poff_i[row]; | |||
1027 | for(i1 = 0; i1 < pnz; i1++) { | |||
1028 | j_temp[i1] = p->garray[Jptr[i1]]; | |||
1029 | } | |||
1030 | /* add non-zero cols of P into the sorted linked list lnk */ | |||
1031 | ierr = PetscLLCondensedAddSorted(pnz,j_temp,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1031,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1032 | } | |||
1033 | ||||
1034 | adponz = lnk[0]; | |||
1035 | adpoi[i+1] = adpoi[i] + adponz; | |||
1036 | ||||
1037 | /* if free space is not available, double the total space in the list */ | |||
1038 | if (current_space->local_remaining<adponz) { | |||
1039 | ierr = PetscFreeSpaceGet(PetscIntSumTruncate(adponz,current_space->total_array_size),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1039,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1040 | nspacedouble++; | |||
1041 | } | |||
1042 | ||||
1043 | /* Copy data into free space, then initialize lnk */ | |||
1044 | ierr = PetscLLCondensedClean(pN,adponz,current_space->array,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1044,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1045 | ||||
1046 | current_space->array += adponz; | |||
1047 | current_space->local_used += adponz; | |||
1048 | current_space->local_remaining -= adponz; | |||
1049 | } | |||
1050 | ||||
1051 | /* Symbolic calc of A_off * P_oth */ | |||
1052 | ierr = MatSetOptionsPrefix(a->B,prefix);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1052,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1053 | ierr = MatAppendOptionsPrefix(a->B,"inner_offdiag_");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1053,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1054 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(a->B, ptap->P_oth, fill, &aopoth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1054,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1055 | aopoth_seq = (Mat_SeqAIJ*)((aopoth)->data); | |||
1056 | aopothi = aopoth_seq->i; aopothj = aopoth_seq->j; | |||
1057 | ||||
1058 | /* Allocate space for apj, adpj, aopj, ... */ | |||
1059 | /* destroy lists of free space and other temporary array(s) */ | |||
1060 | ||||
1061 | ierr = PetscMalloc1(aopothi[am] + adpoi[am] + adpdi[am]+2, &ptap->apj)PetscMallocA(1,PETSC_FALSE,1061,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(aopothi[am] + adpoi[am] + adpdi[am]+2)*sizeof(**(& ptap->apj)),(&ptap->apj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1061,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1062 | ierr = PetscMalloc1(adpoi[am]+2, &adpoj)PetscMallocA(1,PETSC_FALSE,1062,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(adpoi[am]+2)*sizeof(**(&adpoj)),(&adpoj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1062,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1063 | ||||
1064 | /* Copy from linked list to j-array */ | |||
1065 | ierr = PetscFreeSpaceContiguous(&free_space_diag,adpoj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1065,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1066 | ierr = PetscLLDestroy(lnk,lnkbt)(((*PetscTrFree)((void*)(lnk),1066,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((lnk) = 0,0)) || PetscBTDestroy(&(lnkbt)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1066,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1067 | ||||
1068 | adpoJ = adpoj; | |||
1069 | adpdJ = adpdj; | |||
1070 | aopJ = aopothj; | |||
1071 | apj = ptap->apj; | |||
1072 | apJ = apj; /* still empty */ | |||
1073 | ||||
1074 | /* Merge j-arrays of A_off * P, A_diag * P_loc_off, and */ | |||
1075 | /* A_diag * P_loc_diag to get A*P */ | |||
1076 | for (i = 0; i < am; i++) { | |||
1077 | aopnz = aopothi[i+1] - aopothi[i]; | |||
1078 | adponz = adpoi[i+1] - adpoi[i]; | |||
1079 | adpdnz = adpdi[i+1] - adpdi[i]; | |||
1080 | ||||
1081 | /* Correct indices from A_diag*P_diag */ | |||
1082 | for(i1 = 0; i1 < adpdnz; i1++) { | |||
1083 | adpdJ[i1] += p_colstart; | |||
1084 | } | |||
1085 | /* Merge j-arrays of A_diag * P_loc_off and A_diag * P_loc_diag and A_off * P_oth */ | |||
1086 | Merge3SortedArrays(adponz, adpoJ, adpdnz, adpdJ, aopnz, aopJ, &apnz, apJ); | |||
1087 | ierr = MatPreallocateSet(i+rstart, apnz, apJ, dnz, onz)0;do { PetscInt __i; if (i+rstart < __rstart) return PetscError (((MPI_Comm)0x44000001),1087,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D less than first local row %D" ,i+rstart,__rstart); if (i+rstart >= __rstart+__nrows) return PetscError(((MPI_Comm)0x44000001),1087,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D greater than last local row %D" ,i+rstart,__rstart+__nrows-1); for (__i=0; __i<apnz; __i++ ) { if ((apJ)[__i] < __start || (apJ)[__i] >= __end) onz [i+rstart - __rstart]++; else if (dnz[i+rstart - __rstart] < __ncols) dnz[i+rstart - __rstart]++; }} while (0); CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1087,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1088 | ||||
1089 | aopJ += aopnz; | |||
1090 | adpoJ += adponz; | |||
1091 | adpdJ += adpdnz; | |||
1092 | apJ += apnz; | |||
1093 | api[i+1] = api[i] + apnz; | |||
1094 | } | |||
1095 | ||||
1096 | /* malloc apa to store dense row A[i,:]*P */ | |||
1097 | ierr = PetscCalloc1(pN+2,&ptap->apa)PetscMallocA(1,PETSC_TRUE,1097,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(pN+2)*sizeof(**(&ptap->apa)),(&ptap-> apa));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1097,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1098 | ||||
1099 | /* create and assemble symbolic parallel matrix Cmpi */ | |||
1100 | ierr = MatCreate(comm,&Cmpi);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1100,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1101 | ierr = MatSetSizes(Cmpi,am,pn,PETSC_DETERMINE-1,PETSC_DETERMINE-1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1101,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1102 | ierr = MatSetBlockSizesFromMats(Cmpi,A,P);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1102,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1103 | ierr = MatGetType(A,&mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1103,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1104 | ierr = MatSetType(Cmpi,mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1104,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1105 | ierr = MatMPIAIJSetPreallocation(Cmpi,0,dnz,0,onz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1105,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1106 | ||||
1107 | ||||
1108 | ierr = MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Cmpi, apj, api);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1108,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1109 | ierr = MatAssemblyBegin(Cmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1109,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1110 | ierr = MatAssemblyEnd(Cmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1110,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1111 | ierr = MatPreallocateFinalize(dnz,onz)0;_4_ierr = PetscFreeA(2,1111,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(dnz),&(onz));do {if (__builtin_expect(!!(_4_ierr), 0)) return PetscError(((MPI_Comm)0x44000001),1111,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1111,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1112 | ||||
1113 | ||||
1114 | ptap->destroy = Cmpi->ops->destroy; | |||
1115 | ptap->duplicate = Cmpi->ops->duplicate; | |||
1116 | Cmpi->ops->matmultnumeric = MatMatMultNumeric_MPIAIJ_MPIAIJ_nonscalable; | |||
1117 | Cmpi->ops->destroy = MatDestroy_MPIAIJ_MatMatMult; | |||
1118 | ||||
1119 | /* attach the supporting struct to Cmpi for reuse */ | |||
1120 | c = (Mat_MPIAIJ*)Cmpi->data; | |||
1121 | c->ap = ptap; | |||
1122 | *C = Cmpi; | |||
1123 | ||||
1124 | /* set MatInfo */ | |||
1125 | afill = (PetscReal)api[am]/(adi[am]+aoi[am]+pi_loc[pm]+1) + 1.e-5; | |||
1126 | if (afill < 1.0) afill = 1.0; | |||
1127 | Cmpi->info.mallocs = nspacedouble; | |||
1128 | Cmpi->info.fill_ratio_given = fill; | |||
1129 | Cmpi->info.fill_ratio_needed = afill; | |||
1130 | ||||
1131 | #if defined(PETSC_USE_INFO1) | |||
1132 | if (api[am]) { | |||
1133 | ierr = PetscInfo3(Cmpi,"Reallocs %D; Fill ratio: given %g needed %g.\n",nspacedouble,(double)fill,(double)afill)PetscInfo_Private(__func__,Cmpi,"Reallocs %D; Fill ratio: given %g needed %g.\n" ,nspacedouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1133,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1134 | ierr = PetscInfo1(Cmpi,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n",(double)afill)PetscInfo_Private(__func__,Cmpi,"Use MatMatMult(A,B,MatReuse,%g,&C) for best performance.;\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1134,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1135 | } else { | |||
1136 | ierr = PetscInfo(Cmpi,"Empty matrix product\n")PetscInfo_Private(__func__,Cmpi,"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1136,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1137 | } | |||
1138 | #endif | |||
1139 | ||||
1140 | ierr = MatDestroy(&aopoth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1140,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1141 | ierr = MatDestroy(&adpd);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1141,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1142 | ierr = PetscFree(j_temp)((*PetscTrFree)((void*)(j_temp),1142,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((j_temp) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1142,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1143 | ierr = PetscFree(adpoj)((*PetscTrFree)((void*)(adpoj),1143,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((adpoj) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1143,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1144 | ierr = PetscFree(adpoi)((*PetscTrFree)((void*)(adpoi),1144,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((adpoi) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1144,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1145 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1146 | } | |||
1147 | ||||
1148 | ||||
1149 | /*-------------------------------------------------------------------------*/ | |||
1150 | PetscErrorCode MatTransposeMatMult_MPIAIJ_MPIAIJ(Mat P,Mat A,MatReuse scall,PetscReal fill,Mat *C) | |||
1151 | { | |||
1152 | PetscErrorCode ierr; | |||
1153 | const char *algTypes[3] = {"scalable","nonscalable","matmatmult"}; | |||
1154 | PetscInt aN=A->cmap->N,alg=1; /* set default algorithm */ | |||
1155 | PetscBool flg; | |||
1156 | ||||
1157 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 1157; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1158 | if (scall == MAT_INITIAL_MATRIX) { | |||
1159 | ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MatTransposeMatMult","Mat")0; do { PetscOptionItems PetscOptionsObjectBase; PetscOptionItems *PetscOptionsObject = &PetscOptionsObjectBase; PetscMemzero (PetscOptionsObject,sizeof(PetscOptionItems)); for (PetscOptionsObject ->count=(PetscOptionsPublish?-1:1); PetscOptionsObject-> count<2; PetscOptionsObject->count++) { PetscErrorCode _5_ierr = PetscOptionsBegin_Private(PetscOptionsObject,PetscObjectComm ((PetscObject)A),((PetscObject)A)->prefix,"MatTransposeMatMult" ,"Mat");do {if (__builtin_expect(!!(_5_ierr),0)) return PetscError (((MPI_Comm)0x44000001),1159,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1159,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1160 | ierr = PetscOptionsEList("-mattransposematmult_via","Algorithmic approach","MatTransposeMatMult",algTypes,3,algTypes[1],&alg,&flg)PetscOptionsEList_Private(PetscOptionsObject,"-mattransposematmult_via" ,"Algorithmic approach","MatTransposeMatMult",algTypes,3,algTypes [1],&alg,&flg);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1160,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1161 | ierr = PetscOptionsEnd()_5_ierr = PetscOptionsEnd_Private(PetscOptionsObject);do {if ( __builtin_expect(!!(_5_ierr),0)) return PetscError(((MPI_Comm )0x44000001),1161,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);}} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1161,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1162 | ||||
1163 | ierr = PetscLogEventBegin(MAT_TransposeMatMultSymbolic,P,A,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_TransposeMatMultSymbolic].active) ? (*PetscLogPLB)((MAT_TransposeMatMultSymbolic ),0,(PetscObject)(P),(PetscObject)(A),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1163,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1164 | switch (alg) { | |||
1165 | case 1: | |||
1166 | if (!flg && aN > 100000) { /* may switch to scalable algorithm as default */ | |||
1167 | MatInfo Ainfo,Pinfo; | |||
1168 | PetscInt nz_local; | |||
1169 | PetscBool alg_scalable_loc=PETSC_FALSE,alg_scalable; | |||
1170 | MPI_Comm comm; | |||
1171 | ||||
1172 | ierr = MatGetInfo(A,MAT_LOCAL,&Ainfo);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1172,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1173 | ierr = MatGetInfo(P,MAT_LOCAL,&Pinfo);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1173,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1174 | nz_local = (PetscInt)(Ainfo.nz_allocated + Pinfo.nz_allocated); /* estimated local nonzero entries */ | |||
1175 | ||||
1176 | if (aN > fill*nz_local) alg_scalable_loc = PETSC_TRUE; | |||
1177 | ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1177,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1178 | ierr = MPIU_Allreduce(&alg_scalable_loc,&alg_scalable,1,MPIU_BOOL,MPI_LOR,comm)(PetscAllreduceBarrierCheck(comm,1,1178,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((petsc_allreduce_ct += PetscMPIParallelComm((comm)),0) || MPI_Allreduce((&alg_scalable_loc),(&alg_scalable),(1 ),(MPIU_BOOL),((MPI_Op)(0x58000007)),(comm))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1178,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1179 | ||||
1180 | if (alg_scalable) { | |||
1181 | alg = 0; /* scalable algorithm would slower than nonscalable algorithm */ | |||
1182 | ierr = MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ(P,A,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1182,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1183 | break; | |||
1184 | } | |||
1185 | } | |||
1186 | ierr = MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ_nonscalable(P,A,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1186,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1187 | break; | |||
1188 | case 2: | |||
1189 | { | |||
1190 | Mat Pt; | |||
1191 | Mat_APMPI *ptap; | |||
1192 | Mat_MPIAIJ *c; | |||
1193 | ierr = MatTranspose(P,MAT_INITIAL_MATRIX,&Pt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1193,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1194 | ierr = MatMatMult(Pt,A,MAT_INITIAL_MATRIX,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1194,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1195 | c = (Mat_MPIAIJ*)(*C)->data; | |||
1196 | ptap = c->ap; | |||
1197 | if (ptap) { | |||
1198 | ptap->Pt = Pt; | |||
1199 | (*C)->ops->freeintermediatedatastructures = MatFreeIntermediateDataStructures_MPIAIJ_AP; | |||
1200 | } | |||
1201 | (*C)->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ_matmatmult; | |||
1202 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1203 | } | |||
1204 | break; | |||
1205 | default: /* scalable algorithm */ | |||
1206 | ierr = MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ(P,A,fill,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1206,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1207 | break; | |||
1208 | } | |||
1209 | ierr = PetscLogEventEnd(MAT_TransposeMatMultSymbolic,P,A,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_TransposeMatMultSymbolic].active) ? (*PetscLogPLE)((MAT_TransposeMatMultSymbolic ),0,(PetscObject)(P),(PetscObject)(A),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1209,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1210 | ||||
1211 | { | |||
1212 | Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*C)->data; | |||
1213 | Mat_APMPI *ap = c->ap; | |||
1214 | ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)(*C)),((PetscObject)(*C))->prefix,"MatFreeIntermediateDataStructures","Mat")0; do { PetscOptionItems PetscOptionsObjectBase; PetscOptionItems *PetscOptionsObject = &PetscOptionsObjectBase; PetscMemzero (PetscOptionsObject,sizeof(PetscOptionItems)); for (PetscOptionsObject ->count=(PetscOptionsPublish?-1:1); PetscOptionsObject-> count<2; PetscOptionsObject->count++) { PetscErrorCode _5_ierr = PetscOptionsBegin_Private(PetscOptionsObject,PetscObjectComm ((PetscObject)(*C)),((PetscObject)(*C))->prefix,"MatFreeIntermediateDataStructures" ,"Mat");do {if (__builtin_expect(!!(_5_ierr),0)) return PetscError (((MPI_Comm)0x44000001),1214,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1214,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1215 | ap->freestruct = PETSC_FALSE; | |||
1216 | ierr = PetscOptionsBool("-mat_freeintermediatedatastructures","Free intermediate data structures", "MatFreeIntermediateDataStructures",ap->freestruct,&ap->freestruct, NULL)PetscOptionsBool_Private(PetscOptionsObject,"-mat_freeintermediatedatastructures" ,"Free intermediate data structures","MatFreeIntermediateDataStructures" ,ap->freestruct,&ap->freestruct,((void*)0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1216,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1217 | ierr = PetscOptionsEnd()_5_ierr = PetscOptionsEnd_Private(PetscOptionsObject);do {if ( __builtin_expect(!!(_5_ierr),0)) return PetscError(((MPI_Comm )0x44000001),1217,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_5_ierr,PETSC_ERROR_REPEAT," ");} while (0);}} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1217,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1218 | } | |||
1219 | } | |||
1220 | ||||
1221 | ierr = PetscLogEventBegin(MAT_TransposeMatMultNumeric,P,A,0,0)(((PetscLogPLB && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_TransposeMatMultNumeric].active) ? (*PetscLogPLB)((MAT_TransposeMatMultNumeric ),0,(PetscObject)(P),(PetscObject)(A),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1221,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1222 | ierr = (*(*C)->ops->mattransposemultnumeric)(P,A,*C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1222,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1223 | ierr = PetscLogEventEnd(MAT_TransposeMatMultNumeric,P,A,0,0)(((PetscLogPLE && petsc_stageLog->stageInfo[petsc_stageLog ->curStage].perfInfo.active && petsc_stageLog-> stageInfo[petsc_stageLog->curStage].eventLog->eventInfo [MAT_TransposeMatMultNumeric].active) ? (*PetscLogPLE)((MAT_TransposeMatMultNumeric ),0,(PetscObject)(P),(PetscObject)(A),(PetscObject)(0),(PetscObject )(0)) : 0 ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1223,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1224 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1225 | } | |||
1226 | ||||
1227 | /* This routine only works when scall=MAT_REUSE_MATRIX! */ | |||
1228 | PetscErrorCode MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ_matmatmult(Mat P,Mat A,Mat C) | |||
1229 | { | |||
1230 | PetscErrorCode ierr; | |||
1231 | Mat_MPIAIJ *c=(Mat_MPIAIJ*)C->data; | |||
1232 | Mat_APMPI *ptap= c->ap; | |||
1233 | Mat Pt; | |||
1234 | ||||
1235 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 1235; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1236 | if (!ptap->Pt) { | |||
1237 | MPI_Comm comm; | |||
1238 | ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1238,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1239 | SETERRQ(comm,PETSC_ERR_ARG_WRONGSTATE,"PtA cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'")return PetscError(comm,1239,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,73,PETSC_ERROR_INITIAL,"PtA cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'" ); | |||
1240 | } | |||
1241 | ||||
1242 | Pt=ptap->Pt; | |||
1243 | ierr = MatTranspose(P,MAT_REUSE_MATRIX,&Pt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1243,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1244 | ierr = MatMatMultNumeric(Pt,A,C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1244,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1245 | ||||
1246 | /* supporting struct ptap consumes almost same amount of memory as C=PtAP, release it if C will not be updated by A and P */ | |||
1247 | if (ptap->freestruct) { | |||
1248 | ierr = MatFreeIntermediateDataStructures(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1248,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1249 | } | |||
1250 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1251 | } | |||
1252 | ||||
1253 | /* This routine is modified from MatPtAPSymbolic_MPIAIJ_MPIAIJ() */ | |||
1254 | PetscErrorCode MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ_nonscalable(Mat P,Mat A,PetscReal fill,Mat *C) | |||
1255 | { | |||
1256 | PetscErrorCode ierr; | |||
1257 | Mat_APMPI *ptap; | |||
1258 | Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data,*c; | |||
1259 | MPI_Comm comm; | |||
1260 | PetscMPIInt size,rank; | |||
1261 | Mat Cmpi; | |||
1262 | PetscFreeSpaceList free_space=NULL((void*)0),current_space=NULL((void*)0); | |||
1263 | PetscInt pn=P->cmap->n,aN=A->cmap->N,an=A->cmap->n; | |||
1264 | PetscInt *lnk,i,k,nsend; | |||
1265 | PetscBT lnkbt; | |||
1266 | PetscMPIInt tagi,tagj,*len_si,*len_s,*len_ri,icompleted=0,nrecv; | |||
1267 | PetscInt **buf_rj,**buf_ri,**buf_ri_k; | |||
1268 | PetscInt len,proc,*dnz,*onz,*owners,nzi; | |||
1269 | PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; | |||
1270 | MPI_Request *swaits,*rwaits; | |||
1271 | MPI_Status *sstatus,rstatus; | |||
1272 | PetscLayout rowmap; | |||
1273 | PetscInt *owners_co,*coi,*coj; /* i and j array of (p->B)^T*A*P - used in the communication */ | |||
1274 | PetscMPIInt *len_r,*id_r; /* array of length of comm->size, store send/recv matrix values */ | |||
1275 | PetscInt *Jptr,*prmap=p->garray,con,j,Crmax; | |||
1276 | Mat_SeqAIJ *a_loc,*c_loc,*c_oth; | |||
1277 | PetscTable ta; | |||
1278 | MatType mtype; | |||
1279 | const char *prefix; | |||
1280 | ||||
1281 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 1281; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1282 | ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1282,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1283 | ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1283,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1284 | ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1284,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1285 | ||||
1286 | /* create symbolic parallel matrix Cmpi */ | |||
1287 | ierr = MatCreate(comm,&Cmpi);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1287,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1288 | ierr = MatGetType(A,&mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1288,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1289 | ierr = MatSetType(Cmpi,mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1289,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1290 | ||||
1291 | Cmpi->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ_nonscalable; | |||
1292 | ||||
1293 | /* create struct Mat_APMPI and attached it to C later */ | |||
1294 | ierr = PetscNew(&ptap)PetscMallocA(1,PETSC_TRUE,1294,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(1)*sizeof(**((&ptap))),((&ptap)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1294,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1295 | ptap->reuse = MAT_INITIAL_MATRIX; | |||
1296 | ||||
1297 | /* (0) compute Rd = Pd^T, Ro = Po^T */ | |||
1298 | /* --------------------------------- */ | |||
1299 | ierr = MatTranspose_SeqAIJ(p->A,MAT_INITIAL_MATRIX,&ptap->Rd);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1299,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1300 | ierr = MatTranspose_SeqAIJ(p->B,MAT_INITIAL_MATRIX,&ptap->Ro);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1300,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1301 | ||||
1302 | /* (1) compute symbolic A_loc */ | |||
1303 | /* ---------------------------*/ | |||
1304 | ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&ptap->A_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1304,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1305 | ||||
1306 | /* (2-1) compute symbolic C_oth = Ro*A_loc */ | |||
1307 | /* ------------------------------------ */ | |||
1308 | ierr = MatGetOptionsPrefix(A,&prefix);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1308,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1309 | ierr = MatSetOptionsPrefix(ptap->Ro,prefix);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1309,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1310 | ierr = MatAppendOptionsPrefix(ptap->Ro,"inner_offdiag_");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1310,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1311 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(ptap->Ro,ptap->A_loc,fill,&ptap->C_oth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1311,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1312 | ||||
1313 | /* (3) send coj of C_oth to other processors */ | |||
1314 | /* ------------------------------------------ */ | |||
1315 | /* determine row ownership */ | |||
1316 | ierr = PetscLayoutCreate(comm,&rowmap);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1316,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1317 | rowmap->n = pn; | |||
1318 | rowmap->bs = 1; | |||
1319 | ierr = PetscLayoutSetUp(rowmap);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1319,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1320 | owners = rowmap->range; | |||
1321 | ||||
1322 | /* determine the number of messages to send, their lengths */ | |||
1323 | ierr = PetscMalloc4(size,&len_s,size,&len_si,size,&sstatus,size+2,&owners_co)PetscMallocA(4,PETSC_FALSE,1323,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(size)*sizeof(**(&len_s)),(&len_s),(size_t)( size)*sizeof(**(&len_si)),(&len_si),(size_t)(size)*sizeof (**(&sstatus)),(&sstatus),(size_t)(size+2)*sizeof(**( &owners_co)),(&owners_co));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1323,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1324 | ierr = PetscArrayzero(len_s,size)PetscMemzero(len_s,(size)*sizeof(*(len_s)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1324,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1325 | ierr = PetscArrayzero(len_si,size)PetscMemzero(len_si,(size)*sizeof(*(len_si)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1325,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1326 | ||||
1327 | c_oth = (Mat_SeqAIJ*)ptap->C_oth->data; | |||
1328 | coi = c_oth->i; coj = c_oth->j; | |||
1329 | con = ptap->C_oth->rmap->n; | |||
1330 | proc = 0; | |||
1331 | for (i=0; i<con; i++) { | |||
1332 | while (prmap[i] >= owners[proc+1]) proc++; | |||
1333 | len_si[proc]++; /* num of rows in Co(=Pt*A) to be sent to [proc] */ | |||
1334 | len_s[proc] += coi[i+1] - coi[i]; /* num of nonzeros in Co to be sent to [proc] */ | |||
1335 | } | |||
1336 | ||||
1337 | len = 0; /* max length of buf_si[], see (4) */ | |||
1338 | owners_co[0] = 0; | |||
1339 | nsend = 0; | |||
1340 | for (proc=0; proc<size; proc++) { | |||
1341 | owners_co[proc+1] = owners_co[proc] + len_si[proc]; | |||
1342 | if (len_s[proc]) { | |||
1343 | nsend++; | |||
1344 | len_si[proc] = 2*(len_si[proc] + 1); /* length of buf_si to be sent to [proc] */ | |||
1345 | len += len_si[proc]; | |||
1346 | } | |||
1347 | } | |||
1348 | ||||
1349 | /* determine the number and length of messages to receive for coi and coj */ | |||
1350 | ierr = PetscGatherNumberOfMessages(comm,NULL((void*)0),len_s,&nrecv);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1350,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1351 | ierr = PetscGatherMessageLengths2(comm,nsend,nrecv,len_s,len_si,&id_r,&len_r,&len_ri);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1351,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1352 | ||||
1353 | /* post the Irecv and Isend of coj */ | |||
1354 | ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1354,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1355 | ierr = PetscPostIrecvInt(comm,tagj,nrecv,id_r,len_r,&buf_rj,&rwaits);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1355,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1356 | ierr = PetscMalloc1(nsend+1,&swaits)PetscMallocA(1,PETSC_FALSE,1356,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(nsend+1)*sizeof(**(&swaits)),(&swaits));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1356,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1357 | for (proc=0, k=0; proc<size; proc++) { | |||
1358 | if (!len_s[proc]) continue; | |||
1359 | i = owners_co[proc]; | |||
1360 | ierr = MPI_Isend(coj+coi[i],len_s[proc],MPIU_INT,proc,tagj,comm,swaits+k)((petsc_isend_ct++,0) || PetscMPITypeSize((len_s[proc]),(((MPI_Datatype )0x4c000405)),&(petsc_isend_len)) || MPI_Isend((coj+coi[i ]),(len_s[proc]),(((MPI_Datatype)0x4c000405)),(proc),(tagj),( comm),(swaits+k)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1360,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1361 | k++; | |||
1362 | } | |||
1363 | ||||
1364 | /* (2-2) compute symbolic C_loc = Rd*A_loc */ | |||
1365 | /* ---------------------------------------- */ | |||
1366 | ierr = MatSetOptionsPrefix(ptap->Rd,prefix);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1366,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1367 | ierr = MatAppendOptionsPrefix(ptap->Rd,"inner_diag_");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1367,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1368 | ierr = MatMatMultSymbolic_SeqAIJ_SeqAIJ(ptap->Rd,ptap->A_loc,fill,&ptap->C_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1368,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1369 | c_loc = (Mat_SeqAIJ*)ptap->C_loc->data; | |||
1370 | ||||
1371 | /* receives coj are complete */ | |||
1372 | for (i=0; i<nrecv; i++) { | |||
1373 | ierr = MPI_Waitany(nrecv,rwaits,&icompleted,&rstatus)((petsc_wait_any_ct++,petsc_sum_of_waits_ct++,0) || MPI_Waitany ((nrecv),(rwaits),(&icompleted),(&rstatus)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1373,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1374 | } | |||
1375 | ierr = PetscFree(rwaits)((*PetscTrFree)((void*)(rwaits),1375,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((rwaits) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1375,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1376 | if (nsend) {ierr = MPI_Waitall(nsend,swaits,sstatus)((petsc_wait_all_ct++,petsc_sum_of_waits_ct += (PetscLogDouble ) (nsend),0) || MPI_Waitall((nsend),(swaits),(sstatus)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1376,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
1377 | ||||
1378 | /* add received column indices into ta to update Crmax */ | |||
1379 | a_loc = (Mat_SeqAIJ*)(ptap->A_loc)->data; | |||
1380 | ||||
1381 | /* create and initialize a linked list */ | |||
1382 | ierr = PetscTableCreate(an,aN,&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1382,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); /* for compute Crmax */ | |||
1383 | MatRowMergeMax_SeqAIJ(a_loc,ptap->A_loc->rmap->N,ta){ PetscInt _j,_row,_nz,*_col; if (a_loc) { for (_row=0; _row< ptap->A_loc->rmap->N; _row++) { _nz = a_loc->i[_row +1] - a_loc->i[_row]; for (_j=0; _j<_nz; _j++) { _col = _j + a_loc->j + a_loc->i[_row]; PetscTableAdd(ta,*_col +1,1,INSERT_VALUES); } } } }; | |||
1384 | ||||
1385 | for (k=0; k<nrecv; k++) {/* k-th received message */ | |||
1386 | Jptr = buf_rj[k]; | |||
1387 | for (j=0; j<len_r[k]; j++) { | |||
1388 | ierr = PetscTableAdd(ta,*(Jptr+j)+1,1,INSERT_VALUES);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1388,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1389 | } | |||
1390 | } | |||
1391 | ierr = PetscTableGetCount(ta,&Crmax);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1391,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1392 | ierr = PetscTableDestroy(&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1392,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1393 | ||||
1394 | /* (4) send and recv coi */ | |||
1395 | /*-----------------------*/ | |||
1396 | ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1396,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1397 | ierr = PetscPostIrecvInt(comm,tagi,nrecv,id_r,len_ri,&buf_ri,&rwaits);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1397,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1398 | ierr = PetscMalloc1(len+1,&buf_s)PetscMallocA(1,PETSC_FALSE,1398,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(len+1)*sizeof(**(&buf_s)),(&buf_s));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1398,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1399 | buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ | |||
1400 | for (proc=0,k=0; proc<size; proc++) { | |||
1401 | if (!len_s[proc]) continue; | |||
1402 | /* form outgoing message for i-structure: | |||
1403 | buf_si[0]: nrows to be sent | |||
1404 | [1:nrows]: row index (global) | |||
1405 | [nrows+1:2*nrows+1]: i-structure index | |||
1406 | */ | |||
1407 | /*-------------------------------------------*/ | |||
1408 | nrows = len_si[proc]/2 - 1; /* num of rows in Co to be sent to [proc] */ | |||
1409 | buf_si_i = buf_si + nrows+1; | |||
1410 | buf_si[0] = nrows; | |||
1411 | buf_si_i[0] = 0; | |||
1412 | nrows = 0; | |||
1413 | for (i=owners_co[proc]; i<owners_co[proc+1]; i++) { | |||
1414 | nzi = coi[i+1] - coi[i]; | |||
1415 | buf_si_i[nrows+1] = buf_si_i[nrows] + nzi; /* i-structure */ | |||
1416 | buf_si[nrows+1] = prmap[i] -owners[proc]; /* local row index */ | |||
1417 | nrows++; | |||
1418 | } | |||
1419 | ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,swaits+k)((petsc_isend_ct++,0) || PetscMPITypeSize((len_si[proc]),(((MPI_Datatype )0x4c000405)),&(petsc_isend_len)) || MPI_Isend((buf_si),( len_si[proc]),(((MPI_Datatype)0x4c000405)),(proc),(tagi),(comm ),(swaits+k)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1419,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1420 | k++; | |||
1421 | buf_si += len_si[proc]; | |||
1422 | } | |||
1423 | for (i=0; i<nrecv; i++) { | |||
1424 | ierr = MPI_Waitany(nrecv,rwaits,&icompleted,&rstatus)((petsc_wait_any_ct++,petsc_sum_of_waits_ct++,0) || MPI_Waitany ((nrecv),(rwaits),(&icompleted),(&rstatus)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1424,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1425 | } | |||
1426 | ierr = PetscFree(rwaits)((*PetscTrFree)((void*)(rwaits),1426,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((rwaits) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1426,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1427 | if (nsend) {ierr = MPI_Waitall(nsend,swaits,sstatus)((petsc_wait_all_ct++,petsc_sum_of_waits_ct += (PetscLogDouble ) (nsend),0) || MPI_Waitall((nsend),(swaits),(sstatus)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1427,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
1428 | ||||
1429 | ierr = PetscFree4(len_s,len_si,sstatus,owners_co)PetscFreeA(4,1429,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(len_s),&(len_si),&(sstatus),&(owners_co));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1429,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1430 | ierr = PetscFree(len_ri)((*PetscTrFree)((void*)(len_ri),1430,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((len_ri) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1430,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1431 | ierr = PetscFree(swaits)((*PetscTrFree)((void*)(swaits),1431,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((swaits) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1431,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1432 | ierr = PetscFree(buf_s)((*PetscTrFree)((void*)(buf_s),1432,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((buf_s) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1432,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1433 | ||||
1434 | /* (5) compute the local portion of Cmpi */ | |||
1435 | /* ------------------------------------------ */ | |||
1436 | /* set initial free space to be Crmax, sufficient for holding nozeros in each row of Cmpi */ | |||
1437 | ierr = PetscFreeSpaceGet(Crmax,&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1437,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1438 | current_space = free_space; | |||
1439 | ||||
1440 | ierr = PetscMalloc3(nrecv,&buf_ri_k,nrecv,&nextrow,nrecv,&nextci)PetscMallocA(3,PETSC_FALSE,1440,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(nrecv)*sizeof(**(&buf_ri_k)),(&buf_ri_k),(size_t )(nrecv)*sizeof(**(&nextrow)),(&nextrow),(size_t)(nrecv )*sizeof(**(&nextci)),(&nextci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1440,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1441 | for (k=0; k<nrecv; k++) { | |||
1442 | buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ | |||
1443 | nrows = *buf_ri_k[k]; | |||
1444 | nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ | |||
1445 | nextci[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ | |||
1446 | } | |||
1447 | ||||
1448 | ierr = MatPreallocateInitialize(comm,pn,an,dnz,onz)0; do { PetscErrorCode _4_ierr; PetscInt __nrows = (pn),__ncols = (an),__rstart,__start,__end; _4_ierr = PetscMallocA(2,PETSC_TRUE ,1448,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)((size_t)__nrows)*sizeof(**(&dnz)),(&dnz),(size_t )((size_t)__nrows)*sizeof(**(&onz)),(&onz));do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),1448 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = 0; __end = __start; _4_ierr = MPI_Scan(&__ncols,&__end,1,((MPI_Datatype )0x4c000405),(MPI_Op)(0x58000003),comm);do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),1448 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = __end - __ncols; _4_ierr = MPI_Scan(&__nrows,&__rstart,1,( (MPI_Datatype)0x4c000405),(MPI_Op)(0x58000003),comm);do {if ( __builtin_expect(!!(_4_ierr),0)) return PetscError(((MPI_Comm )0x44000001),1448,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __rstart = __rstart - __nrows; do { } while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1448,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1449 | ierr = PetscLLCondensedCreate(Crmax,aN,&lnk,&lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1449,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1450 | for (i=0; i<pn; i++) { | |||
1451 | /* add C_loc into Cmpi */ | |||
1452 | nzi = c_loc->i[i+1] - c_loc->i[i]; | |||
1453 | Jptr = c_loc->j + c_loc->i[i]; | |||
1454 | ierr = PetscLLCondensedAddSorted(nzi,Jptr,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1454,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1455 | ||||
1456 | /* add received col data into lnk */ | |||
1457 | for (k=0; k<nrecv; k++) { /* k-th received message */ | |||
1458 | if (i == *nextrow[k]) { /* i-th row */ | |||
1459 | nzi = *(nextci[k]+1) - *nextci[k]; | |||
1460 | Jptr = buf_rj[k] + *nextci[k]; | |||
1461 | ierr = PetscLLCondensedAddSorted(nzi,Jptr,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1461,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1462 | nextrow[k]++; nextci[k]++; | |||
1463 | } | |||
1464 | } | |||
1465 | nzi = lnk[0]; | |||
1466 | ||||
1467 | /* copy data into free space, then initialize lnk */ | |||
1468 | ierr = PetscLLCondensedClean(aN,nzi,current_space->array,lnk,lnkbt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1468,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1469 | ierr = MatPreallocateSet(i+owners[rank],nzi,current_space->array,dnz,onz)0;do { PetscInt __i; if (i+owners[rank] < __rstart) return PetscError(((MPI_Comm)0x44000001),1469,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D less than first local row %D" ,i+owners[rank],__rstart); if (i+owners[rank] >= __rstart+ __nrows) return PetscError(((MPI_Comm)0x44000001),1469,__func__ ,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D greater than last local row %D" ,i+owners[rank],__rstart+__nrows-1); for (__i=0; __i<nzi; __i ++) { if ((current_space->array)[__i] < __start || (current_space ->array)[__i] >= __end) onz[i+owners[rank] - __rstart]++ ; else if (dnz[i+owners[rank] - __rstart] < __ncols) dnz[i +owners[rank] - __rstart]++; }} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1469,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1470 | } | |||
1471 | ierr = PetscFree3(buf_ri_k,nextrow,nextci)PetscFreeA(3,1471,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(buf_ri_k),&(nextrow),&(nextci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1471,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1472 | ierr = PetscLLDestroy(lnk,lnkbt)(((*PetscTrFree)((void*)(lnk),1472,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((lnk) = 0,0)) || PetscBTDestroy(&(lnkbt)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1472,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1473 | ierr = PetscFreeSpaceDestroy(free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1473,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1474 | ||||
1475 | /* local sizes and preallocation */ | |||
1476 | ierr = MatSetSizes(Cmpi,pn,an,PETSC_DETERMINE-1,PETSC_DETERMINE-1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1476,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1477 | if (P->cmap->bs > 0) {ierr = PetscLayoutSetBlockSize(Cmpi->rmap,P->cmap->bs);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1477,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
1478 | if (A->cmap->bs > 0) {ierr = PetscLayoutSetBlockSize(Cmpi->cmap,A->cmap->bs);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1478,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
1479 | ierr = MatMPIAIJSetPreallocation(Cmpi,0,dnz,0,onz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1479,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1480 | ierr = MatPreallocateFinalize(dnz,onz)0;_4_ierr = PetscFreeA(2,1480,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(dnz),&(onz));do {if (__builtin_expect(!!(_4_ierr), 0)) return PetscError(((MPI_Comm)0x44000001),1480,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1480,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1481 | ||||
1482 | /* members in merge */ | |||
1483 | ierr = PetscFree(id_r)((*PetscTrFree)((void*)(id_r),1483,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((id_r) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1483,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1484 | ierr = PetscFree(len_r)((*PetscTrFree)((void*)(len_r),1484,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((len_r) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1484,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1485 | ierr = PetscFree(buf_ri[0])((*PetscTrFree)((void*)(buf_ri[0]),1485,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((buf_ri[0]) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1485,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1486 | ierr = PetscFree(buf_ri)((*PetscTrFree)((void*)(buf_ri),1486,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((buf_ri) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1486,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1487 | ierr = PetscFree(buf_rj[0])((*PetscTrFree)((void*)(buf_rj[0]),1487,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((buf_rj[0]) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1487,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1488 | ierr = PetscFree(buf_rj)((*PetscTrFree)((void*)(buf_rj),1488,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((buf_rj) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1488,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1489 | ierr = PetscLayoutDestroy(&rowmap);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1489,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1490 | ||||
1491 | /* attach the supporting struct to Cmpi for reuse */ | |||
1492 | c = (Mat_MPIAIJ*)Cmpi->data; | |||
1493 | c->ap = ptap; | |||
1494 | ptap->destroy = Cmpi->ops->destroy; | |||
1495 | ||||
1496 | /* Cmpi is not ready for use - assembly will be done by MatPtAPNumeric() */ | |||
1497 | Cmpi->assembled = PETSC_FALSE; | |||
1498 | Cmpi->ops->destroy = MatDestroy_MPIAIJ_PtAP; | |||
1499 | Cmpi->ops->freeintermediatedatastructures = MatFreeIntermediateDataStructures_MPIAIJ_AP; | |||
1500 | ||||
1501 | *C = Cmpi; | |||
1502 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1503 | } | |||
1504 | ||||
1505 | PetscErrorCode MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ_nonscalable(Mat P,Mat A,Mat C) | |||
1506 | { | |||
1507 | PetscErrorCode ierr; | |||
1508 | Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; | |||
1509 | Mat_SeqAIJ *c_seq; | |||
1510 | Mat_APMPI *ptap = c->ap; | |||
1511 | Mat A_loc,C_loc,C_oth; | |||
1512 | PetscInt i,rstart,rend,cm,ncols,row; | |||
1513 | const PetscInt *cols; | |||
1514 | const PetscScalar *vals; | |||
1515 | ||||
1516 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 1516; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1517 | if (!ptap->A_loc) { | |||
1518 | MPI_Comm comm; | |||
1519 | ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1519,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1520 | SETERRQ(comm,PETSC_ERR_ARG_WRONGSTATE,"PtA cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'")return PetscError(comm,1520,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,73,PETSC_ERROR_INITIAL,"PtA cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'" ); | |||
1521 | } | |||
1522 | ||||
1523 | ierr = MatZeroEntries(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1523,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1524 | ||||
1525 | if (ptap->reuse == MAT_REUSE_MATRIX) { | |||
1526 | /* These matrices are obtained in MatTransposeMatMultSymbolic() */ | |||
1527 | /* 1) get R = Pd^T, Ro = Po^T */ | |||
1528 | /*----------------------------*/ | |||
1529 | ierr = MatTranspose_SeqAIJ(p->A,MAT_REUSE_MATRIX,&ptap->Rd);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1529,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1530 | ierr = MatTranspose_SeqAIJ(p->B,MAT_REUSE_MATRIX,&ptap->Ro);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1530,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1531 | ||||
1532 | /* 2) compute numeric A_loc */ | |||
1533 | /*--------------------------*/ | |||
1534 | ierr = MatMPIAIJGetLocalMat(A,MAT_REUSE_MATRIX,&ptap->A_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1534,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1535 | } | |||
1536 | ||||
1537 | /* 3) C_loc = Rd*A_loc, C_oth = Ro*A_loc */ | |||
1538 | A_loc = ptap->A_loc; | |||
1539 | ierr = ((ptap->C_loc)->ops->matmultnumeric)(ptap->Rd,A_loc,ptap->C_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1539,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1540 | ierr = ((ptap->C_oth)->ops->matmultnumeric)(ptap->Ro,A_loc,ptap->C_oth);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1540,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1541 | C_loc = ptap->C_loc; | |||
1542 | C_oth = ptap->C_oth; | |||
1543 | ||||
1544 | /* add C_loc and Co to to C */ | |||
1545 | ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1545,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1546 | ||||
1547 | /* C_loc -> C */ | |||
1548 | cm = C_loc->rmap->N; | |||
1549 | c_seq = (Mat_SeqAIJ*)C_loc->data; | |||
1550 | cols = c_seq->j; | |||
1551 | vals = c_seq->a; | |||
1552 | for (i=0; i<cm; i++) { | |||
1553 | ncols = c_seq->i[i+1] - c_seq->i[i]; | |||
1554 | row = rstart + i; | |||
1555 | ierr = MatSetValues(C,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1555,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1556 | cols += ncols; vals += ncols; | |||
1557 | } | |||
1558 | ||||
1559 | /* Co -> C, off-processor part */ | |||
1560 | cm = C_oth->rmap->N; | |||
1561 | c_seq = (Mat_SeqAIJ*)C_oth->data; | |||
1562 | cols = c_seq->j; | |||
1563 | vals = c_seq->a; | |||
1564 | for (i=0; i<cm; i++) { | |||
1565 | ncols = c_seq->i[i+1] - c_seq->i[i]; | |||
1566 | row = p->garray[i]; | |||
1567 | ierr = MatSetValues(C,1,&row,ncols,cols,vals,ADD_VALUES);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1567,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1568 | cols += ncols; vals += ncols; | |||
1569 | } | |||
1570 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1570,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1571 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1571,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1572 | ||||
1573 | ptap->reuse = MAT_REUSE_MATRIX; | |||
1574 | ||||
1575 | /* supporting struct ptap consumes almost same amount of memory as C=PtAP, release it if C will not be updated by A and P */ | |||
1576 | if (ptap->freestruct) { | |||
1577 | ierr = MatFreeIntermediateDataStructures(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1577,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1578 | } | |||
1579 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1580 | } | |||
1581 | ||||
1582 | PetscErrorCode MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ(Mat P,Mat A,Mat C) | |||
1583 | { | |||
1584 | PetscErrorCode ierr; | |||
1585 | Mat_Merge_SeqsToMPI *merge; | |||
1586 | Mat_MPIAIJ *p =(Mat_MPIAIJ*)P->data,*c=(Mat_MPIAIJ*)C->data; | |||
1587 | Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; | |||
1588 | Mat_APMPI *ptap; | |||
1589 | PetscInt *adj; | |||
1590 | PetscInt i,j,k,anz,pnz,row,*cj,nexta; | |||
1591 | MatScalar *ada,*ca,valtmp; | |||
1592 | PetscInt am =A->rmap->n,cm=C->rmap->n,pon=(p->B)->cmap->n; | |||
1593 | MPI_Comm comm; | |||
1594 | PetscMPIInt size,rank,taga,*len_s; | |||
1595 | PetscInt *owners,proc,nrows,**buf_ri_k,**nextrow,**nextci; | |||
1596 | PetscInt **buf_ri,**buf_rj; | |||
1597 | PetscInt cnz=0,*bj_i,*bi,*bj,bnz,nextcj; /* bi,bj,ba: local array of C(mpi mat) */ | |||
1598 | MPI_Request *s_waits,*r_waits; | |||
1599 | MPI_Status *status; | |||
1600 | MatScalar **abuf_r,*ba_i,*pA,*coa,*ba; | |||
1601 | PetscInt *ai,*aj,*coi,*coj,*poJ,*pdJ; | |||
1602 | Mat A_loc; | |||
1603 | Mat_SeqAIJ *a_loc; | |||
1604 | ||||
1605 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 1605; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1606 | ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1606,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1607 | ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1607,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1608 | ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1608,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1609 | ||||
1610 | ptap = c->ap; | |||
1611 | if (!ptap->A_loc) SETERRQ(comm,PETSC_ERR_ARG_WRONGSTATE,"PtA cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'")return PetscError(comm,1611,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,73,PETSC_ERROR_INITIAL,"PtA cannot be reused. Do not call MatFreeIntermediateDataStructures() or use '-mat_freeintermediatedatastructures'" ); | |||
1612 | merge = ptap->merge; | |||
1613 | ||||
1614 | /* 2) compute numeric C_seq = P_loc^T*A_loc */ | |||
1615 | /*------------------------------------------*/ | |||
1616 | /* get data from symbolic products */ | |||
1617 | coi = merge->coi; coj = merge->coj; | |||
1618 | ierr = PetscCalloc1(coi[pon]+1,&coa)PetscMallocA(1,PETSC_TRUE,1618,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(coi[pon]+1)*sizeof(**(&coa)),(&coa));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1618,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1619 | bi = merge->bi; bj = merge->bj; | |||
1620 | owners = merge->rowmap->range; | |||
1621 | ierr = PetscCalloc1(bi[cm]+1,&ba)PetscMallocA(1,PETSC_TRUE,1621,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(bi[cm]+1)*sizeof(**(&ba)),(&ba));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1621,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1622 | ||||
1623 | /* get A_loc by taking all local rows of A */ | |||
1624 | A_loc = ptap->A_loc; | |||
1625 | ierr = MatMPIAIJGetLocalMat(A,MAT_REUSE_MATRIX,&A_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1625,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1626 | a_loc = (Mat_SeqAIJ*)(A_loc)->data; | |||
1627 | ai = a_loc->i; | |||
1628 | aj = a_loc->j; | |||
1629 | ||||
1630 | for (i=0; i<am; i++) { | |||
1631 | anz = ai[i+1] - ai[i]; | |||
1632 | adj = aj + ai[i]; | |||
1633 | ada = a_loc->a + ai[i]; | |||
1634 | ||||
1635 | /* 2-b) Compute Cseq = P_loc[i,:]^T*A[i,:] using outer product */ | |||
1636 | /*-------------------------------------------------------------*/ | |||
1637 | /* put the value into Co=(p->B)^T*A (off-diagonal part, send to others) */ | |||
1638 | pnz = po->i[i+1] - po->i[i]; | |||
1639 | poJ = po->j + po->i[i]; | |||
1640 | pA = po->a + po->i[i]; | |||
1641 | for (j=0; j<pnz; j++) { | |||
1642 | row = poJ[j]; | |||
1643 | cj = coj + coi[row]; | |||
1644 | ca = coa + coi[row]; | |||
1645 | /* perform sparse axpy */ | |||
1646 | nexta = 0; | |||
1647 | valtmp = pA[j]; | |||
1648 | for (k=0; nexta<anz; k++) { | |||
1649 | if (cj[k] == adj[nexta]) { | |||
1650 | ca[k] += valtmp*ada[nexta]; | |||
1651 | nexta++; | |||
1652 | } | |||
1653 | } | |||
1654 | ierr = PetscLogFlops(2.0*anz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1654,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1655 | } | |||
1656 | ||||
1657 | /* put the value into Cd (diagonal part) */ | |||
1658 | pnz = pd->i[i+1] - pd->i[i]; | |||
1659 | pdJ = pd->j + pd->i[i]; | |||
1660 | pA = pd->a + pd->i[i]; | |||
1661 | for (j=0; j<pnz; j++) { | |||
1662 | row = pdJ[j]; | |||
1663 | cj = bj + bi[row]; | |||
1664 | ca = ba + bi[row]; | |||
1665 | /* perform sparse axpy */ | |||
1666 | nexta = 0; | |||
1667 | valtmp = pA[j]; | |||
1668 | for (k=0; nexta<anz; k++) { | |||
1669 | if (cj[k] == adj[nexta]) { | |||
1670 | ca[k] += valtmp*ada[nexta]; | |||
1671 | nexta++; | |||
1672 | } | |||
1673 | } | |||
1674 | ierr = PetscLogFlops(2.0*anz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1674,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1675 | } | |||
1676 | } | |||
1677 | ||||
1678 | /* 3) send and recv matrix values coa */ | |||
1679 | /*------------------------------------*/ | |||
1680 | buf_ri = merge->buf_ri; | |||
1681 | buf_rj = merge->buf_rj; | |||
1682 | len_s = merge->len_s; | |||
1683 | ierr = PetscCommGetNewTag(comm,&taga);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1683,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1684 | ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1684,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1685 | ||||
1686 | ierr = PetscMalloc2(merge->nsend+1,&s_waits,size,&status)PetscMallocA(2,PETSC_FALSE,1686,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(merge->nsend+1)*sizeof(**(&s_waits)),(&s_waits ),(size_t)(size)*sizeof(**(&status)),(&status));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1686,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1687 | for (proc=0,k=0; proc<size; proc++) { | |||
1688 | if (!len_s[proc]) continue; | |||
1689 | i = merge->owners_co[proc]; | |||
1690 | ierr = MPI_Isend(coa+coi[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k)((petsc_isend_ct++,0) || PetscMPITypeSize((len_s[proc]),(((MPI_Datatype )0x4c00080b)),&(petsc_isend_len)) || MPI_Isend((coa+coi[i ]),(len_s[proc]),(((MPI_Datatype)0x4c00080b)),(proc),(taga),( comm),(s_waits+k)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1690,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1691 | k++; | |||
1692 | } | |||
1693 | if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status)((petsc_wait_all_ct++,petsc_sum_of_waits_ct += (PetscLogDouble ) (merge->nrecv),0) || MPI_Waitall((merge->nrecv),(r_waits ),(status)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1693,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
1694 | if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status)((petsc_wait_all_ct++,petsc_sum_of_waits_ct += (PetscLogDouble ) (merge->nsend),0) || MPI_Waitall((merge->nsend),(s_waits ),(status)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1694,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
1695 | ||||
1696 | ierr = PetscFree2(s_waits,status)PetscFreeA(2,1696,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(s_waits),&(status));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1696,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1697 | ierr = PetscFree(r_waits)((*PetscTrFree)((void*)(r_waits),1697,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((r_waits) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1697,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1698 | ierr = PetscFree(coa)((*PetscTrFree)((void*)(coa),1698,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((coa) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1698,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1699 | ||||
1700 | /* 4) insert local Cseq and received values into Cmpi */ | |||
1701 | /*----------------------------------------------------*/ | |||
1702 | ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextci)PetscMallocA(3,PETSC_FALSE,1702,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(merge->nrecv)*sizeof(**(&buf_ri_k)),(&buf_ri_k ),(size_t)(merge->nrecv)*sizeof(**(&nextrow)),(&nextrow ),(size_t)(merge->nrecv)*sizeof(**(&nextci)),(&nextci ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1702,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1703 | for (k=0; k<merge->nrecv; k++) { | |||
1704 | buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ | |||
1705 | nrows = *(buf_ri_k[k]); | |||
1706 | nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ | |||
1707 | nextci[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ | |||
1708 | } | |||
1709 | ||||
1710 | for (i=0; i<cm; i++) { | |||
1711 | row = owners[rank] + i; /* global row index of C_seq */ | |||
1712 | bj_i = bj + bi[i]; /* col indices of the i-th row of C */ | |||
1713 | ba_i = ba + bi[i]; | |||
1714 | bnz = bi[i+1] - bi[i]; | |||
1715 | /* add received vals into ba */ | |||
1716 | for (k=0; k<merge->nrecv; k++) { /* k-th received message */ | |||
1717 | /* i-th row */ | |||
1718 | if (i == *nextrow[k]) { | |||
1719 | cnz = *(nextci[k]+1) - *nextci[k]; | |||
1720 | cj = buf_rj[k] + *(nextci[k]); | |||
1721 | ca = abuf_r[k] + *(nextci[k]); | |||
1722 | nextcj = 0; | |||
1723 | for (j=0; nextcj<cnz; j++) { | |||
1724 | if (bj_i[j] == cj[nextcj]) { /* bcol == ccol */ | |||
1725 | ba_i[j] += ca[nextcj++]; | |||
1726 | } | |||
1727 | } | |||
1728 | nextrow[k]++; nextci[k]++; | |||
1729 | ierr = PetscLogFlops(2.0*cnz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1729,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1730 | } | |||
1731 | } | |||
1732 | ierr = MatSetValues(C,1,&row,bnz,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1732,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1733 | } | |||
1734 | ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1734,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1735 | ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1735,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1736 | ||||
1737 | ierr = PetscFree(ba)((*PetscTrFree)((void*)(ba),1737,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((ba) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1737,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1738 | ierr = PetscFree(abuf_r[0])((*PetscTrFree)((void*)(abuf_r[0]),1738,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((abuf_r[0]) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1738,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1739 | ierr = PetscFree(abuf_r)((*PetscTrFree)((void*)(abuf_r),1739,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((abuf_r) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1739,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1740 | ierr = PetscFree3(buf_ri_k,nextrow,nextci)PetscFreeA(3,1740,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(buf_ri_k),&(nextrow),&(nextci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1740,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1741 | ||||
1742 | if (ptap->freestruct) { | |||
1743 | ierr = MatFreeIntermediateDataStructures(C);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1743,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1744 | } | |||
1745 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
1746 | } | |||
1747 | ||||
1748 | PetscErrorCode MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ(Mat P,Mat A,PetscReal fill,Mat *C) | |||
1749 | { | |||
1750 | PetscErrorCode ierr; | |||
1751 | Mat Cmpi,A_loc,POt,PDt; | |||
1752 | Mat_APMPI *ptap; | |||
1753 | PetscFreeSpaceList free_space=NULL((void*)0),current_space=NULL((void*)0); | |||
1754 | Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data,*a=(Mat_MPIAIJ*)A->data,*c; | |||
1755 | PetscInt *pdti,*pdtj,*poti,*potj,*ptJ; | |||
1756 | PetscInt nnz; | |||
1757 | PetscInt *lnk,*owners_co,*coi,*coj,i,k,pnz,row; | |||
1758 | PetscInt am =A->rmap->n,pn=P->cmap->n; | |||
1759 | MPI_Comm comm; | |||
1760 | PetscMPIInt size,rank,tagi,tagj,*len_si,*len_s,*len_ri; | |||
1761 | PetscInt **buf_rj,**buf_ri,**buf_ri_k; | |||
1762 | PetscInt len,proc,*dnz,*onz,*owners; | |||
1763 | PetscInt nzi,*bi,*bj; | |||
1764 | PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextci; | |||
1765 | MPI_Request *swaits,*rwaits; | |||
1766 | MPI_Status *sstatus,rstatus; | |||
1767 | Mat_Merge_SeqsToMPI *merge; | |||
1768 | PetscInt *ai,*aj,*Jptr,anz,*prmap=p->garray,pon,nspacedouble=0,j; | |||
1769 | PetscReal afill =1.0,afill_tmp; | |||
1770 | PetscInt rstart = P->cmap->rstart,rmax,aN=A->cmap->N,Armax; | |||
1771 | Mat_SeqAIJ *a_loc,*pdt,*pot; | |||
1772 | PetscTable ta; | |||
1773 | MatType mtype; | |||
1774 | ||||
1775 | PetscFunctionBegindo { do { ; if (petscstack && (petscstack->currentsize < 64)) { petscstack->function[petscstack->currentsize ] = __func__; petscstack->file[petscstack->currentsize] = "/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ; petscstack->line[petscstack->currentsize] = 1775; petscstack ->petscroutine[petscstack->currentsize] = PETSC_TRUE; petscstack ->currentsize++; } if (petscstack) { petscstack->hotdepth += (PETSC_FALSE || petscstack->hotdepth); } ; } while (0) ; ; } while (0); | |||
1776 | ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1776,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1777 | /* check if matrix local sizes are compatible */ | |||
1778 | if (A->rmap->rstart != P->rmap->rstart || A->rmap->rend != P->rmap->rend) SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != P (%D,%D)",A->rmap->rstart,A->rmap->rend,P->rmap->rstart,P->rmap->rend)return PetscError(comm,1778,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,60,PETSC_ERROR_INITIAL,"Matrix local dimensions are incompatible, A (%D, %D) != P (%D,%D)" ,A->rmap->rstart,A->rmap->rend,P->rmap->rstart ,P->rmap->rend); | |||
1779 | ||||
1780 | ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1780,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1781 | ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1781,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1782 | ||||
1783 | /* create struct Mat_APMPI and attached it to C later */ | |||
1784 | ierr = PetscNew(&ptap)PetscMallocA(1,PETSC_TRUE,1784,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(1)*sizeof(**((&ptap))),((&ptap)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1784,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1785 | ||||
1786 | /* get A_loc by taking all local rows of A */ | |||
1787 | ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&A_loc);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1787,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1788 | ||||
1789 | ptap->A_loc = A_loc; | |||
1790 | a_loc = (Mat_SeqAIJ*)(A_loc)->data; | |||
1791 | ai = a_loc->i; | |||
1792 | aj = a_loc->j; | |||
1793 | ||||
1794 | /* determine symbolic Co=(p->B)^T*A - send to others */ | |||
1795 | /*----------------------------------------------------*/ | |||
1796 | ierr = MatTransposeSymbolic_SeqAIJ(p->A,&PDt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1796,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1797 | pdt = (Mat_SeqAIJ*)PDt->data; | |||
1798 | pdti = pdt->i; pdtj = pdt->j; | |||
1799 | ||||
1800 | ierr = MatTransposeSymbolic_SeqAIJ(p->B,&POt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1800,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1801 | pot = (Mat_SeqAIJ*)POt->data; | |||
1802 | poti = pot->i; potj = pot->j; | |||
1803 | ||||
1804 | /* then, compute symbolic Co = (p->B)^T*A */ | |||
1805 | pon = (p->B)->cmap->n; /* total num of rows to be sent to other processors | |||
1806 | >= (num of nonzero rows of C_seq) - pn */ | |||
1807 | ierr = PetscMalloc1(pon+1,&coi)PetscMallocA(1,PETSC_FALSE,1807,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(pon+1)*sizeof(**(&coi)),(&coi));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1807,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1808 | coi[0] = 0; | |||
1809 | ||||
1810 | /* set initial free space to be fill*(nnz(p->B) + nnz(A)) */ | |||
1811 | nnz = PetscRealIntMultTruncate(fill,PetscIntSumTruncate(poti[pon],ai[am])); | |||
1812 | ierr = PetscFreeSpaceGet(nnz,&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1812,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1813 | current_space = free_space; | |||
1814 | ||||
1815 | /* create and initialize a linked list */ | |||
1816 | ierr = PetscTableCreate(A->cmap->n + a->B->cmap->N,aN,&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1816,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1817 | MatRowMergeMax_SeqAIJ(a_loc,am,ta){ PetscInt _j,_row,_nz,*_col; if (a_loc) { for (_row=0; _row< am; _row++) { _nz = a_loc->i[_row+1] - a_loc->i[_row]; for (_j=0; _j<_nz; _j++) { _col = _j + a_loc->j + a_loc-> i[_row]; PetscTableAdd(ta,*_col+1,1,INSERT_VALUES); } } } }; | |||
1818 | ierr = PetscTableGetCount(ta,&Armax);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1818,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1819 | ||||
1820 | ierr = PetscLLCondensedCreate_Scalable(Armax,&lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1820,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1821 | ||||
1822 | for (i=0; i<pon; i++) { | |||
1823 | pnz = poti[i+1] - poti[i]; | |||
1824 | ptJ = potj + poti[i]; | |||
1825 | for (j=0; j<pnz; j++) { | |||
1826 | row = ptJ[j]; /* row of A_loc == col of Pot */ | |||
1827 | anz = ai[row+1] - ai[row]; | |||
1828 | Jptr = aj + ai[row]; | |||
1829 | /* add non-zero cols of AP into the sorted linked list lnk */ | |||
1830 | ierr = PetscLLCondensedAddSorted_Scalable(anz,Jptr,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1830,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1831 | } | |||
1832 | nnz = lnk[0]; | |||
1833 | ||||
1834 | /* If free space is not available, double the total space in the list */ | |||
1835 | if (current_space->local_remaining<nnz) { | |||
1836 | ierr = PetscFreeSpaceGet(PetscIntSumTruncate(nnz,current_space->total_array_size),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1836,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1837 | nspacedouble++; | |||
1838 | } | |||
1839 | ||||
1840 | /* Copy data into free space, and zero out denserows */ | |||
1841 | ierr = PetscLLCondensedClean_Scalable(nnz,current_space->array,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1841,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1842 | ||||
1843 | current_space->array += nnz; | |||
1844 | current_space->local_used += nnz; | |||
1845 | current_space->local_remaining -= nnz; | |||
1846 | ||||
1847 | coi[i+1] = coi[i] + nnz; | |||
1848 | } | |||
1849 | ||||
1850 | ierr = PetscMalloc1(coi[pon]+1,&coj)PetscMallocA(1,PETSC_FALSE,1850,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(coi[pon]+1)*sizeof(**(&coj)),(&coj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1850,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1851 | ierr = PetscFreeSpaceContiguous(&free_space,coj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1851,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1852 | ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1852,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); /* must destroy to get a new one for C */ | |||
1853 | ||||
1854 | afill_tmp = (PetscReal)coi[pon]/(poti[pon] + ai[am]+1); | |||
1855 | if (afill_tmp > afill) afill = afill_tmp; | |||
1856 | ||||
1857 | /* send j-array (coj) of Co to other processors */ | |||
1858 | /*----------------------------------------------*/ | |||
1859 | /* determine row ownership */ | |||
1860 | ierr = PetscNew(&merge)PetscMallocA(1,PETSC_TRUE,1860,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(1)*sizeof(**((&merge))),((&merge)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1860,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1861 | ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1861,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1862 | ||||
1863 | merge->rowmap->n = pn; | |||
1864 | merge->rowmap->bs = 1; | |||
1865 | ||||
1866 | ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1866,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1867 | owners = merge->rowmap->range; | |||
1868 | ||||
1869 | /* determine the number of messages to send, their lengths */ | |||
1870 | ierr = PetscCalloc1(size,&len_si)PetscMallocA(1,PETSC_TRUE,1870,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(size)*sizeof(**(&len_si)),(&len_si));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1870,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1871 | ierr = PetscCalloc1(size,&merge->len_s)PetscMallocA(1,PETSC_TRUE,1871,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(size)*sizeof(**(&merge->len_s)),(&merge-> len_s));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1871,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1872 | ||||
1873 | len_s = merge->len_s; | |||
1874 | merge->nsend = 0; | |||
1875 | ||||
1876 | ierr = PetscMalloc1(size+2,&owners_co)PetscMallocA(1,PETSC_FALSE,1876,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(size+2)*sizeof(**(&owners_co)),(&owners_co) );CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1876,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1877 | ||||
1878 | proc = 0; | |||
1879 | for (i=0; i<pon; i++) { | |||
1880 | while (prmap[i] >= owners[proc+1]) proc++; | |||
1881 | len_si[proc]++; /* num of rows in Co to be sent to [proc] */ | |||
1882 | len_s[proc] += coi[i+1] - coi[i]; | |||
1883 | } | |||
1884 | ||||
1885 | len = 0; /* max length of buf_si[] */ | |||
1886 | owners_co[0] = 0; | |||
1887 | for (proc=0; proc<size; proc++) { | |||
1888 | owners_co[proc+1] = owners_co[proc] + len_si[proc]; | |||
1889 | if (len_si[proc]) { | |||
1890 | merge->nsend++; | |||
1891 | len_si[proc] = 2*(len_si[proc] + 1); | |||
1892 | len += len_si[proc]; | |||
1893 | } | |||
1894 | } | |||
1895 | ||||
1896 | /* determine the number and length of messages to receive for coi and coj */ | |||
1897 | ierr = PetscGatherNumberOfMessages(comm,NULL((void*)0),len_s,&merge->nrecv);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1897,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1898 | ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1898,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1899 | ||||
1900 | /* post the Irecv and Isend of coj */ | |||
1901 | ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1901,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1902 | ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rwaits);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1902,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1903 | ierr = PetscMalloc1(merge->nsend+1,&swaits)PetscMallocA(1,PETSC_FALSE,1903,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(merge->nsend+1)*sizeof(**(&swaits)),(&swaits ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1903,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1904 | for (proc=0, k=0; proc<size; proc++) { | |||
1905 | if (!len_s[proc]) continue; | |||
1906 | i = owners_co[proc]; | |||
1907 | ierr = MPI_Isend(coj+coi[i],len_s[proc],MPIU_INT,proc,tagj,comm,swaits+k)((petsc_isend_ct++,0) || PetscMPITypeSize((len_s[proc]),(((MPI_Datatype )0x4c000405)),&(petsc_isend_len)) || MPI_Isend((coj+coi[i ]),(len_s[proc]),(((MPI_Datatype)0x4c000405)),(proc),(tagj),( comm),(swaits+k)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1907,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1908 | k++; | |||
1909 | } | |||
1910 | ||||
1911 | /* receives and sends of coj are complete */ | |||
1912 | ierr = PetscMalloc1(size,&sstatus)PetscMallocA(1,PETSC_FALSE,1912,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(size)*sizeof(**(&sstatus)),(&sstatus));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1912,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1913 | for (i=0; i<merge->nrecv; i++) { | |||
1914 | PetscMPIInt icompleted; | |||
1915 | ierr = MPI_Waitany(merge->nrecv,rwaits,&icompleted,&rstatus)((petsc_wait_any_ct++,petsc_sum_of_waits_ct++,0) || MPI_Waitany ((merge->nrecv),(rwaits),(&icompleted),(&rstatus)) );CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1915,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1916 | } | |||
1917 | ierr = PetscFree(rwaits)((*PetscTrFree)((void*)(rwaits),1917,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((rwaits) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1917,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1918 | if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,swaits,sstatus)((petsc_wait_all_ct++,petsc_sum_of_waits_ct += (PetscLogDouble ) (merge->nsend),0) || MPI_Waitall((merge->nsend),(swaits ),(sstatus)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1918,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
1919 | ||||
1920 | /* add received column indices into table to update Armax */ | |||
1921 | /* Armax can be as large as aN if a P[row,:] is dense, see src/ksp/ksp/examples/tutorials/ex56.c! */ | |||
1922 | for (k=0; k<merge->nrecv; k++) {/* k-th received message */ | |||
1923 | Jptr = buf_rj[k]; | |||
1924 | for (j=0; j<merge->len_r[k]; j++) { | |||
1925 | ierr = PetscTableAdd(ta,*(Jptr+j)+1,1,INSERT_VALUES);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1925,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1926 | } | |||
1927 | } | |||
1928 | ierr = PetscTableGetCount(ta,&Armax);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1928,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1929 | /* printf("Armax %d, an %d + Bn %d = %d, aN %d\n",Armax,A->cmap->n,a->B->cmap->N,A->cmap->n+a->B->cmap->N,aN); */ | |||
1930 | ||||
1931 | /* send and recv coi */ | |||
1932 | /*-------------------*/ | |||
1933 | ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1933,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1934 | ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&rwaits);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1934,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1935 | ierr = PetscMalloc1(len+1,&buf_s)PetscMallocA(1,PETSC_FALSE,1935,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(len+1)*sizeof(**(&buf_s)),(&buf_s));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1935,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1936 | buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ | |||
1937 | for (proc=0,k=0; proc<size; proc++) { | |||
1938 | if (!len_s[proc]) continue; | |||
1939 | /* form outgoing message for i-structure: | |||
1940 | buf_si[0]: nrows to be sent | |||
1941 | [1:nrows]: row index (global) | |||
1942 | [nrows+1:2*nrows+1]: i-structure index | |||
1943 | */ | |||
1944 | /*-------------------------------------------*/ | |||
1945 | nrows = len_si[proc]/2 - 1; | |||
1946 | buf_si_i = buf_si + nrows+1; | |||
1947 | buf_si[0] = nrows; | |||
1948 | buf_si_i[0] = 0; | |||
1949 | nrows = 0; | |||
1950 | for (i=owners_co[proc]; i<owners_co[proc+1]; i++) { | |||
1951 | nzi = coi[i+1] - coi[i]; | |||
1952 | buf_si_i[nrows+1] = buf_si_i[nrows] + nzi; /* i-structure */ | |||
1953 | buf_si[nrows+1] = prmap[i] -owners[proc]; /* local row index */ | |||
1954 | nrows++; | |||
1955 | } | |||
1956 | ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,swaits+k)((petsc_isend_ct++,0) || PetscMPITypeSize((len_si[proc]),(((MPI_Datatype )0x4c000405)),&(petsc_isend_len)) || MPI_Isend((buf_si),( len_si[proc]),(((MPI_Datatype)0x4c000405)),(proc),(tagi),(comm ),(swaits+k)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1956,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1957 | k++; | |||
1958 | buf_si += len_si[proc]; | |||
1959 | } | |||
1960 | i = merge->nrecv; | |||
1961 | while (i--) { | |||
1962 | PetscMPIInt icompleted; | |||
1963 | ierr = MPI_Waitany(merge->nrecv,rwaits,&icompleted,&rstatus)((petsc_wait_any_ct++,petsc_sum_of_waits_ct++,0) || MPI_Waitany ((merge->nrecv),(rwaits),(&icompleted),(&rstatus)) );CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1963,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1964 | } | |||
1965 | ierr = PetscFree(rwaits)((*PetscTrFree)((void*)(rwaits),1965,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((rwaits) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1965,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1966 | if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,swaits,sstatus)((petsc_wait_all_ct++,petsc_sum_of_waits_ct += (PetscLogDouble ) (merge->nsend),0) || MPI_Waitall((merge->nsend),(swaits ),(sstatus)));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1966,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0);} | |||
1967 | ierr = PetscFree(len_si)((*PetscTrFree)((void*)(len_si),1967,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((len_si) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1967,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1968 | ierr = PetscFree(len_ri)((*PetscTrFree)((void*)(len_ri),1968,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((len_ri) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1968,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1969 | ierr = PetscFree(swaits)((*PetscTrFree)((void*)(swaits),1969,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((swaits) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1969,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1970 | ierr = PetscFree(sstatus)((*PetscTrFree)((void*)(sstatus),1970,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((sstatus) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1970,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1971 | ierr = PetscFree(buf_s)((*PetscTrFree)((void*)(buf_s),1971,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ) || ((buf_s) = 0,0));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1971,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1972 | ||||
1973 | /* compute the local portion of C (mpi mat) */ | |||
1974 | /*------------------------------------------*/ | |||
1975 | /* allocate bi array and free space for accumulating nonzero column info */ | |||
1976 | ierr = PetscMalloc1(pn+1,&bi)PetscMallocA(1,PETSC_FALSE,1976,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(pn+1)*sizeof(**(&bi)),(&bi));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1976,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1977 | bi[0] = 0; | |||
1978 | ||||
1979 | /* set initial free space to be fill*(nnz(P) + nnz(AP)) */ | |||
1980 | nnz = PetscRealIntMultTruncate(fill,PetscIntSumTruncate(pdti[pn],PetscIntSumTruncate(poti[pon],ai[am]))); | |||
1981 | ierr = PetscFreeSpaceGet(nnz,&free_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1981,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1982 | current_space = free_space; | |||
1983 | ||||
1984 | ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextci)PetscMallocA(3,PETSC_FALSE,1984,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(merge->nrecv)*sizeof(**(&buf_ri_k)),(&buf_ri_k ),(size_t)(merge->nrecv)*sizeof(**(&nextrow)),(&nextrow ),(size_t)(merge->nrecv)*sizeof(**(&nextci)),(&nextci ));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1984,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1985 | for (k=0; k<merge->nrecv; k++) { | |||
1986 | buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ | |||
1987 | nrows = *buf_ri_k[k]; | |||
1988 | nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ | |||
1989 | nextci[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recieved i-structure */ | |||
1990 | } | |||
1991 | ||||
1992 | ierr = PetscLLCondensedCreate_Scalable(Armax,&lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1992,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1993 | ierr = MatPreallocateInitialize(comm,pn,A->cmap->n,dnz,onz)0; do { PetscErrorCode _4_ierr; PetscInt __nrows = (pn),__ncols = (A->cmap->n),__rstart,__start,__end; _4_ierr = PetscMallocA (2,PETSC_TRUE,1993,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)((size_t)__nrows)*sizeof(**(&dnz)),(&dnz),(size_t )((size_t)__nrows)*sizeof(**(&onz)),(&onz));do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),1993 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = 0; __end = __start; _4_ierr = MPI_Scan(&__ncols,&__end,1,((MPI_Datatype )0x4c000405),(MPI_Op)(0x58000003),comm);do {if (__builtin_expect (!!(_4_ierr),0)) return PetscError(((MPI_Comm)0x44000001),1993 ,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __start = __end - __ncols; _4_ierr = MPI_Scan(&__nrows,&__rstart,1,( (MPI_Datatype)0x4c000405),(MPI_Op)(0x58000003),comm);do {if ( __builtin_expect(!!(_4_ierr),0)) return PetscError(((MPI_Comm )0x44000001),1993,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0); __rstart = __rstart - __nrows; do { } while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),1993,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
1994 | rmax = 0; | |||
1995 | for (i=0; i<pn; i++) { | |||
1996 | /* add pdt[i,:]*AP into lnk */ | |||
1997 | pnz = pdti[i+1] - pdti[i]; | |||
1998 | ptJ = pdtj + pdti[i]; | |||
1999 | for (j=0; j<pnz; j++) { | |||
2000 | row = ptJ[j]; /* row of AP == col of Pt */ | |||
2001 | anz = ai[row+1] - ai[row]; | |||
2002 | Jptr = aj + ai[row]; | |||
2003 | /* add non-zero cols of AP into the sorted linked list lnk */ | |||
2004 | ierr = PetscLLCondensedAddSorted_Scalable(anz,Jptr,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2004,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2005 | } | |||
2006 | ||||
2007 | /* add received col data into lnk */ | |||
2008 | for (k=0; k<merge->nrecv; k++) { /* k-th received message */ | |||
2009 | if (i == *nextrow[k]) { /* i-th row */ | |||
2010 | nzi = *(nextci[k]+1) - *nextci[k]; | |||
2011 | Jptr = buf_rj[k] + *nextci[k]; | |||
2012 | ierr = PetscLLCondensedAddSorted_Scalable(nzi,Jptr,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2012,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2013 | nextrow[k]++; nextci[k]++; | |||
2014 | } | |||
2015 | } | |||
2016 | nnz = lnk[0]; | |||
2017 | ||||
2018 | /* if free space is not available, make more free space */ | |||
2019 | if (current_space->local_remaining<nnz) { | |||
2020 | ierr = PetscFreeSpaceGet(PetscIntSumTruncate(nnz,current_space->total_array_size),¤t_space);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2020,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2021 | nspacedouble++; | |||
2022 | } | |||
2023 | /* copy data into free space, then initialize lnk */ | |||
2024 | ierr = PetscLLCondensedClean_Scalable(nnz,current_space->array,lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2024,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2025 | ierr = MatPreallocateSet(i+owners[rank],nnz,current_space->array,dnz,onz)0;do { PetscInt __i; if (i+owners[rank] < __rstart) return PetscError(((MPI_Comm)0x44000001),2025,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D less than first local row %D" ,i+owners[rank],__rstart); if (i+owners[rank] >= __rstart+ __nrows) return PetscError(((MPI_Comm)0x44000001),2025,__func__ ,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,63,PETSC_ERROR_INITIAL,"Trying to set preallocation for row %D greater than last local row %D" ,i+owners[rank],__rstart+__nrows-1); for (__i=0; __i<nnz; __i ++) { if ((current_space->array)[__i] < __start || (current_space ->array)[__i] >= __end) onz[i+owners[rank] - __rstart]++ ; else if (dnz[i+owners[rank] - __rstart] < __ncols) dnz[i +owners[rank] - __rstart]++; }} while (0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2025,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2026 | ||||
2027 | current_space->array += nnz; | |||
2028 | current_space->local_used += nnz; | |||
2029 | current_space->local_remaining -= nnz; | |||
2030 | ||||
2031 | bi[i+1] = bi[i] + nnz; | |||
2032 | if (nnz > rmax) rmax = nnz; | |||
2033 | } | |||
2034 | ierr = PetscFree3(buf_ri_k,nextrow,nextci)PetscFreeA(3,2034,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(buf_ri_k),&(nextrow),&(nextci));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2034,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2035 | ||||
2036 | ierr = PetscMalloc1(bi[pn]+1,&bj)PetscMallocA(1,PETSC_FALSE,2036,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,(size_t)(bi[pn]+1)*sizeof(**(&bj)),(&bj));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2036,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2037 | ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2037,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2038 | afill_tmp = (PetscReal)bi[pn]/(pdti[pn] + poti[pon] + ai[am]+1); | |||
2039 | if (afill_tmp > afill) afill = afill_tmp; | |||
2040 | ierr = PetscLLCondensedDestroy_Scalable(lnk);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2040,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2041 | ierr = PetscTableDestroy(&ta);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2041,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2042 | ||||
2043 | ierr = MatDestroy(&POt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2043,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2044 | ierr = MatDestroy(&PDt);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2044,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2045 | ||||
2046 | /* create symbolic parallel matrix Cmpi - why cannot be assembled in Numeric part */ | |||
2047 | /*----------------------------------------------------------------------------------*/ | |||
2048 | ierr = MatCreate(comm,&Cmpi);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2048,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2049 | ierr = MatSetSizes(Cmpi,pn,A->cmap->n,PETSC_DETERMINE-1,PETSC_DETERMINE-1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2049,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2050 | ierr = MatSetBlockSizes(Cmpi,PetscAbs(P->cmap->bs)(((P->cmap->bs) >= 0) ? (P->cmap->bs) : (-(P-> cmap->bs))),PetscAbs(A->cmap->bs)(((A->cmap->bs) >= 0) ? (A->cmap->bs) : (-(A-> cmap->bs))));CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2050,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2051 | ierr = MatGetType(A,&mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2051,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2052 | ierr = MatSetType(Cmpi,mtype);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2052,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2053 | ierr = MatMPIAIJSetPreallocation(Cmpi,0,dnz,0,onz);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2053,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2054 | ierr = MatPreallocateFinalize(dnz,onz)0;_4_ierr = PetscFreeA(2,2054,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,&(dnz),&(onz));do {if (__builtin_expect(!!(_4_ierr), 0)) return PetscError(((MPI_Comm)0x44000001),2054,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,_4_ierr,PETSC_ERROR_REPEAT," ");} while (0);} while(0);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2054,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2055 | ierr = MatSetBlockSize(Cmpi,1);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2055,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2056 | for (i=0; i<pn; i++) { | |||
2057 | row = i + rstart; | |||
2058 | nnz = bi[i+1] - bi[i]; | |||
2059 | Jptr = bj + bi[i]; | |||
2060 | ierr = MatSetValues(Cmpi,1,&row,nnz,Jptr,NULL((void*)0),INSERT_VALUES);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2060,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2061 | } | |||
2062 | ierr = MatAssemblyBegin(Cmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2062,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2063 | ierr = MatAssemblyEnd(Cmpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2063,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2064 | merge->bi = bi; | |||
2065 | merge->bj = bj; | |||
2066 | merge->coi = coi; | |||
2067 | merge->coj = coj; | |||
2068 | merge->buf_ri = buf_ri; | |||
2069 | merge->buf_rj = buf_rj; | |||
2070 | merge->owners_co = owners_co; | |||
2071 | ||||
2072 | /* attach the supporting struct to Cmpi for reuse */ | |||
2073 | c = (Mat_MPIAIJ*)Cmpi->data; | |||
2074 | ||||
2075 | c->ap = ptap; | |||
2076 | ptap->api = NULL((void*)0); | |||
2077 | ptap->apj = NULL((void*)0); | |||
2078 | ptap->merge = merge; | |||
2079 | ptap->apa = NULL((void*)0); | |||
2080 | ptap->destroy = Cmpi->ops->destroy; | |||
2081 | ptap->duplicate = Cmpi->ops->duplicate; | |||
2082 | ||||
2083 | Cmpi->ops->mattransposemultnumeric = MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ; | |||
2084 | Cmpi->ops->destroy = MatDestroy_MPIAIJ_PtAP; | |||
2085 | Cmpi->ops->freeintermediatedatastructures = MatFreeIntermediateDataStructures_MPIAIJ_AP; | |||
2086 | ||||
2087 | *C = Cmpi; | |||
2088 | #if defined(PETSC_USE_INFO1) | |||
2089 | if (bi[pn] != 0) { | |||
2090 | ierr = PetscInfo3(Cmpi,"Reallocs %D; Fill ratio: given %g needed %g.\n",nspacedouble,(double)fill,(double)afill)PetscInfo_Private(__func__,Cmpi,"Reallocs %D; Fill ratio: given %g needed %g.\n" ,nspacedouble,(double)fill,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2090,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2091 | ierr = PetscInfo1(Cmpi,"Use MatTransposeMatMult(A,B,MatReuse,%g,&C) for best performance.\n",(double)afill)PetscInfo_Private(__func__,Cmpi,"Use MatTransposeMatMult(A,B,MatReuse,%g,&C) for best performance.\n" ,(double)afill);CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2091,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2092 | } else { | |||
2093 | ierr = PetscInfo(Cmpi,"Empty matrix product\n")PetscInfo_Private(__func__,Cmpi,"Empty matrix product\n");CHKERRQ(ierr)do {if (__builtin_expect(!!(ierr),0)) return PetscError(((MPI_Comm )0x44000001),2093,__func__,"/sandbox/petsc/petsc.next-tmp/src/mat/impls/aij/mpi/mpimatmatmult.c" ,ierr,PETSC_ERROR_REPEAT," ");} while (0); | |||
2094 | } | |||
2095 | #endif | |||
2096 | PetscFunctionReturn(0)do { do { ; if (petscstack && petscstack->currentsize > 0) { petscstack->currentsize--; petscstack->function [petscstack->currentsize] = 0; petscstack->file[petscstack ->currentsize] = 0; petscstack->line[petscstack->currentsize ] = 0; petscstack->petscroutine[petscstack->currentsize ] = PETSC_FALSE; } if (petscstack) { petscstack->hotdepth = (((petscstack->hotdepth-1)<(0)) ? (0) : (petscstack-> hotdepth-1)); } ; } while (0); return(0);} while (0); | |||
2097 | } |