Actual source code: matstash.c

 2:  #include src/mat/matimpl.h

  4: /*
  5:        The input to the stash is ALWAYS in MatScalar precision, and the 
  6:     internal storage and output is also in MatScalar.
  7: */
  8: #define DEFAULT_STASH_SIZE   10000

 10: /*
 11:   MatStashCreate_Private - Creates a stash,currently used for all the parallel 
 12:   matrix implementations. The stash is where elements of a matrix destined 
 13:   to be stored on other processors are kept until matrix assembly is done.

 15:   This is a simple minded stash. Simply adds entries to end of stash.

 17:   Input Parameters:
 18:   comm - communicator, required for scatters.
 19:   bs   - stash block size. used when stashing blocks of values

 21:   Output Parameters:
 22:   stash    - the newly created stash
 23: */
 26: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
 27: {
 29:   PetscInt       max,*opt,nopt;
 30:   PetscTruth     flg;

 33:   /* Require 2 tags,get the second using PetscCommGetNewTag() */
 34:   stash->comm = comm;
 35:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 36:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 37:   MPI_Comm_size(stash->comm,&stash->size);
 38:   MPI_Comm_rank(stash->comm,&stash->rank);

 40:   nopt = stash->size;
 41:   PetscMalloc(nopt*sizeof(PetscInt),&opt);
 42:   PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
 43:   if (flg) {
 44:     if (nopt == 1)                max = opt[0];
 45:     else if (nopt == stash->size) max = opt[stash->rank];
 46:     else if (stash->rank < nopt)  max = opt[stash->rank];
 47:     else                          max = 0; /* Use default */
 48:     stash->umax = max;
 49:   } else {
 50:     stash->umax = 0;
 51:   }
 52:   PetscFree(opt);
 53:   if (bs <= 0) bs = 1;

 55:   stash->bs       = bs;
 56:   stash->nmax     = 0;
 57:   stash->oldnmax  = 0;
 58:   stash->n        = 0;
 59:   stash->reallocs = -1;
 60:   stash->idx      = 0;
 61:   stash->idy      = 0;
 62:   stash->array    = 0;

 64:   stash->send_waits  = 0;
 65:   stash->recv_waits  = 0;
 66:   stash->send_status = 0;
 67:   stash->nsends      = 0;
 68:   stash->nrecvs      = 0;
 69:   stash->svalues     = 0;
 70:   stash->rvalues     = 0;
 71:   stash->rmax        = 0;
 72:   stash->nprocs      = 0;
 73:   stash->nprocessed  = 0;
 74:   return(0);
 75: }

 77: /* 
 78:    MatStashDestroy_Private - Destroy the stash
 79: */
 82: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
 83: {

 87:   if (stash->array) {
 88:     PetscFree(stash->array);
 89:     stash->array = 0;
 90:   }
 91:   return(0);
 92: }

 94: /* 
 95:    MatStashScatterEnd_Private - This is called as the fial stage of
 96:    scatter. The final stages of messagepassing is done here, and
 97:    all the memory used for messagepassing is cleanedu up. This
 98:    routine also resets the stash, and deallocates the memory used
 99:    for the stash. It also keeps track of the current memory usage
100:    so that the same value can be used the next time through.
101: */
104: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
105: {
107:   int         nsends=stash->nsends,bs2,oldnmax;
108:   MPI_Status  *send_status;

111:   /* wait on sends */
112:   if (nsends) {
113:     PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
114:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
115:     PetscFree(send_status);
116:   }

118:   /* Now update nmaxold to be app 10% more than max n used, this way the
119:      wastage of space is reduced the next time this stash is used.
120:      Also update the oldmax, only if it increases */
121:   if (stash->n) {
122:     bs2      = stash->bs*stash->bs;
123:     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
124:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
125:   }

127:   stash->nmax       = 0;
128:   stash->n          = 0;
129:   stash->reallocs   = -1;
130:   stash->rmax       = 0;
131:   stash->nprocessed = 0;

133:   if (stash->array) {
134:     PetscFree(stash->array);
135:     stash->array = 0;
136:     stash->idx   = 0;
137:     stash->idy   = 0;
138:   }
139:   if (stash->send_waits) {
140:     PetscFree(stash->send_waits);
141:     stash->send_waits = 0;
142:   }
143:   if (stash->recv_waits) {
144:     PetscFree(stash->recv_waits);
145:     stash->recv_waits = 0;
146:   }
147:   if (stash->svalues) {
148:     PetscFree(stash->svalues);
149:     stash->svalues = 0;
150:   }
151:   if (stash->rvalues) {
152:     PetscFree(stash->rvalues);
153:     stash->rvalues = 0;
154:   }
155:   if (stash->nprocs) {
156:     PetscFree(stash->nprocs);
157:     stash->nprocs = 0;
158:   }

160:   return(0);
161: }

163: /* 
164:    MatStashGetInfo_Private - Gets the relavant statistics of the stash

166:    Input Parameters:
167:    stash    - the stash
168:    nstash   - the size of the stash. Indicates the number of values stored.
169:    reallocs - the number of additional mallocs incurred.
170:    
171: */
174: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
175: {
176:   PetscInt bs2 = stash->bs*stash->bs;

179:   if (nstash) *nstash   = stash->n*bs2;
180:   if (reallocs) {
181:     if (stash->reallocs < 0) *reallocs = 0;
182:     else                     *reallocs = stash->reallocs;
183:   }
184:   return(0);
185: }


188: /* 
189:    MatStashSetInitialSize_Private - Sets the initial size of the stash

191:    Input Parameters:
192:    stash  - the stash
193:    max    - the value that is used as the max size of the stash. 
194:             this value is used while allocating memory.
195: */
198: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
199: {
201:   stash->umax = max;
202:   return(0);
203: }

205: /* MatStashExpand_Private - Expand the stash. This function is called
206:    when the space in the stash is not sufficient to add the new values
207:    being inserted into the stash.
208:    
209:    Input Parameters:
210:    stash - the stash
211:    incr  - the minimum increase requested
212:    
213:    Notes: 
214:    This routine doubles the currently used memory. 
215:  */
218: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
219: {
221:   PetscInt       *n_idx,*n_idy,newnmax,bs2;
222:   MatScalar *n_array;

225:   /* allocate a larger stash */
226:   bs2     = stash->bs*stash->bs;
227:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
228:     if (stash->umax)                  newnmax = stash->umax/bs2;
229:     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
230:   } else if (!stash->nmax) { /* resuing stash */
231:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
232:     else                              newnmax = stash->oldnmax/bs2;
233:   } else                              newnmax = stash->nmax*2;
234:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

236:   PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);
237:   n_idx = (PetscInt*)(n_array + bs2*newnmax);
238:   n_idy = (PetscInt*)(n_idx + newnmax);
239:   PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));
240:   PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
241:   PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));
242:   if (stash->array) {PetscFree(stash->array);}
243:   stash->array   = n_array;
244:   stash->idx     = n_idx;
245:   stash->idy     = n_idy;
246:   stash->nmax    = newnmax;
247:   stash->reallocs++;
248:   return(0);
249: }
250: /*
251:   MatStashValuesRow_Private - inserts values into the stash. This function
252:   expects the values to be roworiented. Multiple columns belong to the same row
253:   can be inserted with a single call to this function.

255:   Input Parameters:
256:   stash  - the stash
257:   row    - the global row correspoiding to the values
258:   n      - the number of elements inserted. All elements belong to the above row.
259:   idxn   - the global column indices corresponding to each of the values.
260:   values - the values inserted
261: */
264: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
265: {
267:   PetscInt i;

270:   /* Check and see if we have sufficient memory */
271:   if ((stash->n + n) > stash->nmax) {
272:     MatStashExpand_Private(stash,n);
273:   }
274:   for (i=0; i<n; i++) {
275:     stash->idx[stash->n]   = row;
276:     stash->idy[stash->n]   = idxn[i];
277:     stash->array[stash->n] = values[i];
278:     stash->n++;
279:   }
280:   return(0);
281: }
282: /*
283:   MatStashValuesCol_Private - inserts values into the stash. This function
284:   expects the values to be columnoriented. Multiple columns belong to the same row
285:   can be inserted with a single call to this function.

287:   Input Parameters:
288:   stash   - the stash
289:   row     - the global row correspoiding to the values
290:   n       - the number of elements inserted. All elements belong to the above row.
291:   idxn    - the global column indices corresponding to each of the values.
292:   values  - the values inserted
293:   stepval - the consecutive values are sepated by a distance of stepval.
294:             this happens because the input is columnoriented.
295: */
298: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
299: {
301:   PetscInt i;

304:   /* Check and see if we have sufficient memory */
305:   if ((stash->n + n) > stash->nmax) {
306:     MatStashExpand_Private(stash,n);
307:   }
308:   for (i=0; i<n; i++) {
309:     stash->idx[stash->n]   = row;
310:     stash->idy[stash->n]   = idxn[i];
311:     stash->array[stash->n] = values[i*stepval];
312:     stash->n++;
313:   }
314:   return(0);
315: }

317: /*
318:   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 
319:   This function expects the values to be roworiented. Multiple columns belong 
320:   to the same block-row can be inserted with a single call to this function.
321:   This function extracts the sub-block of values based on the dimensions of
322:   the original input block, and the row,col values corresponding to the blocks.

324:   Input Parameters:
325:   stash  - the stash
326:   row    - the global block-row correspoiding to the values
327:   n      - the number of elements inserted. All elements belong to the above row.
328:   idxn   - the global block-column indices corresponding to each of the blocks of 
329:            values. Each block is of size bs*bs.
330:   values - the values inserted
331:   rmax   - the number of block-rows in the original block.
332:   cmax   - the number of block-columsn on the original block.
333:   idx    - the index of the current block-row in the original block.
334: */
337: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
338: {
340:   PetscInt i,j,k,bs2,bs=stash->bs;
341:   const MatScalar *vals;
342:   MatScalar       *array;

345:   bs2 = bs*bs;
346:   if ((stash->n+n) > stash->nmax) {
347:     MatStashExpand_Private(stash,n);
348:   }
349:   for (i=0; i<n; i++) {
350:     stash->idx[stash->n]   = row;
351:     stash->idy[stash->n] = idxn[i];
352:     /* Now copy over the block of values. Store the values column oriented.
353:        This enables inserting multiple blocks belonging to a row with a single
354:        funtion call */
355:     array = stash->array + bs2*stash->n;
356:     vals  = values + idx*bs2*n + bs*i;
357:     for (j=0; j<bs; j++) {
358:       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
359:       array += 1;
360:       vals  += cmax*bs;
361:     }
362:     stash->n++;
363:   }
364:   return(0);
365: }

367: /*
368:   MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 
369:   This function expects the values to be roworiented. Multiple columns belong 
370:   to the same block-row can be inserted with a single call to this function.
371:   This function extracts the sub-block of values based on the dimensions of
372:   the original input block, and the row,col values corresponding to the blocks.

374:   Input Parameters:
375:   stash  - the stash
376:   row    - the global block-row correspoiding to the values
377:   n      - the number of elements inserted. All elements belong to the above row.
378:   idxn   - the global block-column indices corresponding to each of the blocks of 
379:            values. Each block is of size bs*bs.
380:   values - the values inserted
381:   rmax   - the number of block-rows in the original block.
382:   cmax   - the number of block-columsn on the original block.
383:   idx    - the index of the current block-row in the original block.
384: */
387: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
388: {
390:   PetscInt i,j,k,bs2,bs=stash->bs;
391:   const MatScalar *vals;
392:   MatScalar       *array;

395:   bs2 = bs*bs;
396:   if ((stash->n+n) > stash->nmax) {
397:     MatStashExpand_Private(stash,n);
398:   }
399:   for (i=0; i<n; i++) {
400:     stash->idx[stash->n]   = row;
401:     stash->idy[stash->n] = idxn[i];
402:     /* Now copy over the block of values. Store the values column oriented.
403:      This enables inserting multiple blocks belonging to a row with a single
404:      funtion call */
405:     array = stash->array + bs2*stash->n;
406:     vals  = values + idx*bs + bs2*rmax*i;
407:     for (j=0; j<bs; j++) {
408:       for (k=0; k<bs; k++) {array[k] = vals[k];}
409:       array += bs;
410:       vals  += rmax*bs;
411:     }
412:     stash->n++;
413:   }
414:   return(0);
415: }
416: /*
417:   MatStashScatterBegin_Private - Initiates the transfer of values to the
418:   correct owners. This function goes through the stash, and check the
419:   owners of each stashed value, and sends the values off to the owner
420:   processors.

422:   Input Parameters:
423:   stash  - the stash
424:   owners - an array of size 'no-of-procs' which gives the ownership range
425:            for each node.

427:   Notes: The 'owners' array in the cased of the blocked-stash has the 
428:   ranges specified blocked global indices, and for the regular stash in
429:   the proper global indices.
430: */
433: PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
434: {
435:   PetscInt         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
436:   PetscInt         size=stash->size,*nprocs,nsends,nreceives;
438:   PetscInt         nmax,count,*sindices,*rindices,i,j,idx;
439:   MatScalar   *rvalues,*svalues;
440:   MPI_Comm    comm = stash->comm;
441:   MPI_Request *send_waits,*recv_waits;


445:   bs2   = stash->bs*stash->bs;
446:   /*  first count number of contributors to each processor */
447:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
448:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
449:   PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);

451:   for (i=0; i<stash->n; i++) {
452:     idx = stash->idx[i];
453:     for (j=0; j<size; j++) {
454:       if (idx >= owners[j] && idx < owners[j+1]) {
455:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
456:       }
457:     }
458:   }
459:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
460: 
461:   /* inform other processors of number of messages and max length*/
462:   PetscMaxSum(comm,nprocs,&nmax,&nreceives);

464:   /* post receives: 
465:      since we don't know how long each individual message is we 
466:      allocate the largest needed buffer for each receive. Potentially 
467:      this is a lot of wasted space.
468:   */
469:   PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&rvalues);
470:   rindices = (PetscInt*)(rvalues + bs2*nreceives*nmax);
471:   PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
472:   for (i=0,count=0; i<nreceives; i++) {
473:     MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
474:     MPI_Irecv(rindices+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
475:   }

477:   /* do sends:
478:       1) starts[i] gives the starting index in svalues for stuff going to 
479:          the ith processor
480:   */
481:   PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);
482:   sindices = (PetscInt*)(svalues + bs2*stash->n);
483:   PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
484:   PetscMalloc(2*size*sizeof(PetscInt),&startv);
485:   starti   = startv + size;
486:   /* use 2 sends the first with all_a, the next with all_i and all_j */
487:   startv[0]  = 0; starti[0] = 0;
488:   for (i=1; i<size; i++) {
489:     startv[i] = startv[i-1] + nprocs[2*i-2];
490:     starti[i] = starti[i-1] + nprocs[2*i-2]*2;
491:   }
492:   for (i=0; i<stash->n; i++) {
493:     j = owner[i];
494:     if (bs2 == 1) {
495:       svalues[startv[j]]              = stash->array[i];
496:     } else {
497:       PetscInt       k;
498:       MatScalar *buf1,*buf2;
499:       buf1 = svalues+bs2*startv[j];
500:       buf2 = stash->array+bs2*i;
501:       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
502:     }
503:     sindices[starti[j]]               = stash->idx[i];
504:     sindices[starti[j]+nprocs[2*j]]   = stash->idy[i];
505:     startv[j]++;
506:     starti[j]++;
507:   }
508:   startv[0] = 0;
509:   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];}
510:   for (i=0,count=0; i<size; i++) {
511:     if (nprocs[2*i+1]) {
512:       MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm,send_waits+count++);
513:       MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
514:     }
515:   }
516:   PetscFree(owner);
517:   PetscFree(startv);
518:   /* This memory is reused in scatter end  for a different purpose*/
519:   for (i=0; i<2*size; i++) nprocs[i] = -1;
520:   stash->nprocs      = nprocs;

522:   stash->svalues    = svalues;    stash->rvalues    = rvalues;
523:   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
524:   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
525:   stash->rmax       = nmax;
526:   return(0);
527: }

529: /* 
530:    MatStashScatterGetMesg_Private - This function waits on the receives posted 
531:    in the function MatStashScatterBegin_Private() and returns one message at 
532:    a time to the calling function. If no messages are left, it indicates this
533:    by setting flg = 0, else it sets flg = 1.

535:    Input Parameters:
536:    stash - the stash

538:    Output Parameters:
539:    nvals - the number of entries in the current message.
540:    rows  - an array of row indices (or blocked indices) corresponding to the values
541:    cols  - an array of columnindices (or blocked indices) corresponding to the values
542:    vals  - the values
543:    flg   - 0 indicates no more message left, and the current call has no values associated.
544:            1 indicates that the current call successfully received a message, and the
545:              other output parameters nvals,rows,cols,vals are set appropriately.
546: */
549: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
550: {
552:   PetscMPIInt    i;
553:   PetscInt       *flg_v,i1,i2,*rindices,bs2;
554:   MPI_Status     recv_status;
555:   PetscTruth     match_found = PETSC_FALSE;


559:   *flg = 0; /* When a message is discovered this is reset to 1 */
560:   /* Return if no more messages to process */
561:   if (stash->nprocessed == stash->nrecvs) { return(0); }

563:   flg_v = stash->nprocs;
564:   bs2   = stash->bs*stash->bs;
565:   /* If a matching pair of receieves are found, process them, and return the data to
566:      the calling function. Until then keep receiving messages */
567:   while (!match_found) {
568:     MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
569:     /* Now pack the received message into a structure which is useable by others */
570:     if (i % 2) {
571:       MPI_Get_count(&recv_status,MPIU_INT,nvals);
572:       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
573:       *nvals = *nvals/2; /* This message has both row indices and col indices */
574:     } else {
575:       MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);
576:       flg_v[2*recv_status.MPI_SOURCE] = i/2;
577:       *nvals = *nvals/bs2;
578:     }
579: 
580:     /* Check if we have both the messages from this proc */
581:     i1 = flg_v[2*recv_status.MPI_SOURCE];
582:     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
583:     if (i1 != -1 && i2 != -1) {
584:       rindices    = (PetscInt*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
585:       *rows       = rindices + 2*i2*stash->rmax;
586:       *cols       = *rows + *nvals;
587:       *vals       = stash->rvalues + i1*bs2*stash->rmax;
588:       *flg        = 1;
589:       stash->nprocessed ++;
590:       match_found = PETSC_TRUE;
591:     }
592:   }
593:   return(0);
594: }