Actual source code: vecstash.c

  1: /*$Id: vecstash.c,v 1.26 2001/03/23 23:21:18 balay Exp $*/

 3:  #include src/vec/vecimpl.h

  5: #define DEFAULT_STASH_SIZE   100

  7: /*
  8:   VecStashCreate_Private - Creates a stash,currently used for all the parallel 
  9:   matrix implementations. The stash is where elements of a matrix destined 
 10:   to be stored on other processors are kept until matrix assembly is done.

 12:   This is a simple minded stash. Simply adds entries to end of stash.

 14:   Input Parameters:
 15:   comm - communicator, required for scatters.
 16:   bs   - stash block size. used when stashing blocks of values

 18:   Output Parameters:
 19:   stash    - the newly created stash
 20: */
 21: int VecStashCreate_Private(MPI_Comm comm,int bs,VecStash *stash)
 22: {
 23:   int        ierr,max,*opt,nopt;
 24:   PetscTruth flg;

 27:   /* Require 2 tags, get the second using PetscCommGetNewTag() */
 28:   stash->comm = comm;
 29:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 30:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 31:   MPI_Comm_size(stash->comm,&stash->size);
 32:   MPI_Comm_rank(stash->comm,&stash->rank);

 34:   nopt = stash->size;
 35:   PetscMalloc(nopt*sizeof(int),&opt);
 36:   PetscOptionsGetIntArray(PETSC_NULL,"-vecstash_initial_size",opt,&nopt,&flg);
 37:   if (flg) {
 38:     if (nopt == 1)                max = opt[0];
 39:     else if (nopt == stash->size) max = opt[stash->rank];
 40:     else if (stash->rank < nopt)  max = opt[stash->rank];
 41:     else                          max = 0; /* use default */
 42:     stash->umax = max;
 43:   } else {
 44:     stash->umax = 0;
 45:   }
 46:   PetscFree(opt);

 48:   if (bs <= 0) bs = 1;

 50:   stash->bs       = bs;
 51:   stash->nmax     = 0;
 52:   stash->oldnmax  = 0;
 53:   stash->n        = 0;
 54:   stash->reallocs = -1;
 55:   stash->idx      = 0;
 56:   stash->array    = 0;

 58:   stash->send_waits  = 0;
 59:   stash->recv_waits  = 0;
 60:   stash->send_status = 0;
 61:   stash->nsends      = 0;
 62:   stash->nrecvs      = 0;
 63:   stash->svalues     = 0;
 64:   stash->rvalues     = 0;
 65:   stash->rmax        = 0;
 66:   stash->nprocs      = 0;
 67:   stash->nprocessed  = 0;
 68:   return(0);
 69: }

 71: /* 
 72:    VecStashDestroy_Private - Destroy the stash
 73: */
 74: int VecStashDestroy_Private(VecStash *stash)
 75: {

 79:   if (stash->array) {
 80:     PetscFree(stash->array);
 81:     stash->array = 0;
 82:   }
 83:   return(0);
 84: }

 86: /* 
 87:    VecStashScatterEnd_Private - This is called as the fial stage of
 88:    scatter. The final stages of message passing is done here, and
 89:    all the memory used for message passing is cleanedu up. This
 90:    routine also resets the stash, and deallocates the memory used
 91:    for the stash. It also keeps track of the current memory usage
 92:    so that the same value can be used the next time through.
 93: */
 94: int VecStashScatterEnd_Private(VecStash *stash)
 95: {
 96:   int         nsends=stash->nsends,ierr,oldnmax;
 97:   MPI_Status  *send_status;

100:   /* wait on sends */
101:   if (nsends) {
102:     PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
103:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
104:     PetscFree(send_status);
105:   }

107:   /* Now update nmaxold to be app 10% more than max n, this way the
108:      wastage of space is reduced the next time this stash is used.
109:      Also update the oldmax, only if it increases */
110:   if (stash->n) {
111:     oldnmax  = ((int)(stash->n * 1.1) + 5)*stash->bs;
112:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
113:   }

115:   stash->nmax       = 0;
116:   stash->n          = 0;
117:   stash->reallocs   = -1;
118:   stash->rmax       = 0;
119:   stash->nprocessed = 0;

121:   if (stash->array) {
122:     PetscFree(stash->array);
123:     stash->array = 0;
124:     stash->idx   = 0;
125:   }
126:   if (stash->send_waits) {
127:     PetscFree(stash->send_waits);
128:     stash->send_waits = 0;
129:   }
130:   if (stash->recv_waits) {
131:     PetscFree(stash->recv_waits);
132:     stash->recv_waits = 0;
133:   }
134:   if (stash->svalues) {
135:     PetscFree(stash->svalues);
136:   stash->svalues = 0;
137:   }
138:   if (stash->rvalues) {
139:     PetscFree(stash->rvalues);
140:     stash->rvalues = 0;
141:   }
142:   if (stash->nprocs) {
143:     PetscFree(stash->nprocs);
144:     stash->nprocs = 0;
145:   }

147:   return(0);
148: }

150: /* 
151:    VecStashGetInfo_Private - Gets the relavant statistics of the stash

153:    Input Parameters:
154:    stash    - the stash
155:    nstash   - the size of the stash
156:    reallocs - the number of additional mallocs incurred.
157:    
158: */
159: int VecStashGetInfo_Private(VecStash *stash,int *nstash,int *reallocs)
160: {

163:   *nstash   = stash->n*stash->bs;
164:   if (stash->reallocs < 0) *reallocs = 0;
165:   else                     *reallocs = stash->reallocs;

167:   return(0);
168: }


171: /* 
172:    VecStashSetInitialSize_Private - Sets the initial size of the stash

174:    Input Parameters:
175:    stash  - the stash
176:    max    - the value that is used as the max size of the stash. 
177:             this value is used while allocating memory. It specifies
178:             the number of vals stored, even with the block-stash
179: */
180: int VecStashSetInitialSize_Private(VecStash *stash,int max)
181: {
183:   stash->umax = max;
184:   return(0);
185: }

187: /* VecStashExpand_Private - Expand the stash. This function is called
188:    when the space in the stash is not sufficient to add the new values
189:    being inserted into the stash.
190:    
191:    Input Parameters:
192:    stash - the stash
193:    incr  - the minimum increase requested
194:    
195:    Notes: 
196:    This routine doubles the currently used memory. 
197: */
198: int VecStashExpand_Private(VecStash *stash,int incr)
199: {
200:   int    *n_idx,newnmax,bs=stash->bs,ierr;
201:   Scalar *n_array;

204:   /* allocate a larger stash. */
205:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
206:     if (stash->umax)                  newnmax = stash->umax/bs;
207:     else                              newnmax = DEFAULT_STASH_SIZE/bs;
208:   } else if (!stash->nmax) { /* resuing stash */
209:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
210:     else                              newnmax = stash->oldnmax/bs;
211:   } else                              newnmax = stash->nmax*2;

213:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

215:   ierr  = PetscMalloc((newnmax)*(sizeof(int)+bs*sizeof(Scalar)),&n_array);
216:   n_idx = (int*)(n_array + bs*newnmax);
217:   ierr  = PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(Scalar));
218:   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
219:   if (stash->array) {PetscFree(stash->array);}
220:   stash->array   = n_array;
221:   stash->idx     = n_idx;
222:   stash->nmax    = newnmax;
223:   stash->reallocs++;
224:   return(0);
225: }
226: /*
227:   VecStashScatterBegin_Private - Initiates the transfer of values to the
228:   correct owners. This function goes through the stash, and check the
229:   owners of each stashed value, and sends the values off to the owner
230:   processors.

232:   Input Parameters:
233:   stash  - the stash
234:   owners - an array of size 'no-of-procs' which gives the ownership range
235:            for each node.

237:   Notes: The 'owners' array in the cased of the blocked-stash has the 
238:   ranges specified blocked global indices, and for the regular stash in
239:   the proper global indices.
240: */
241: int VecStashScatterBegin_Private(VecStash *stash,int *owners)
242: {
243:   int         *owner,*start,tag1=stash->tag1,tag2=stash->tag2;
244:   int         rank=stash->rank,size=stash->size,*nprocs,*procs,nsends,nreceives;
245:   int         nmax,*work,count,ierr,*sindices,*rindices,i,j,idx,bs=stash->bs;
246:   Scalar      *rvalues,*svalues;
247:   MPI_Comm    comm = stash->comm;
248:   MPI_Request *send_waits,*recv_waits;


252:   /*  first count number of contributors to each processor */
253:   PetscMalloc(2*size*sizeof(int),&nprocs);
254:   procs  = nprocs + size;
255:   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));
256:   ierr   = PetscMalloc((stash->n+1)*sizeof(int),&owner);

258:   for (i=0; i<stash->n; i++) {
259:     idx = stash->idx[i];
260:     for (j=0; j<size; j++) {
261:       if (idx >= owners[j] && idx < owners[j+1]) {
262:         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
263:       }
264:     }
265:   }
266:   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
267: 
268:   /* inform other processors of number of messages and max length*/
269:   ierr      = PetscMalloc(2*size*sizeof(int),&work);
270:   ierr      = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);
271:   nmax      = work[rank];
272:   nreceives = work[size+rank];
273:   ierr      = PetscFree(work);
274:   /* post receives: 
275:      since we don't know how long each individual message is we 
276:      allocate the largest needed buffer for each receive. Potentially 
277:      this is a lot of wasted space.
278:   */
279:   ierr     = PetscMalloc((nreceives+1)*(nmax+1)*(bs*sizeof(Scalar)+sizeof(int)),&rvalues);
280:   rindices = (int*)(rvalues + bs*nreceives*nmax);
281:   ierr     = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
282:   for (i=0,count=0; i<nreceives; i++) {
283:     MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
284:     MPI_Irecv(rindices+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
285:   }

287:   /* do sends:
288:       1) starts[i] gives the starting index in svalues for stuff going to 
289:          the ith processor
290:   */
291:   ierr       = PetscMalloc((stash->n+1)*(bs*sizeof(Scalar)+sizeof(int)),&svalues);
292:   sindices   = (int*)(svalues + bs*stash->n);
293:   PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
294:   PetscMalloc(size*sizeof(int),&start);
295:   /* use 2 sends the first with all_v, the next with all_i */
296:   start[0] = 0;
297:   for (i=1; i<size; i++) {
298:     start[i] = start[i-1] + nprocs[i-1];
299:   }
300:   for (i=0; i<stash->n; i++) {
301:     j = owner[i];
302:     if (bs == 1) {
303:       svalues[start[j]] = stash->array[i];
304:     } else {
305:       PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(Scalar));
306:     }
307:     sindices[start[j]]  = stash->idx[i];
308:     start[j]++;
309:   }
310:   start[0] = 0;
311:   for (i=1; i<size; i++) { start[i] = start[i-1] + nprocs[i-1];}
312:   for (i=0,count=0; i<size; i++) {
313:     if (procs[i]) {
314:       MPI_Isend(svalues+bs*start[i],bs*nprocs[i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
315:       MPI_Isend(sindices+start[i],nprocs[i],MPI_INT,i,tag2,comm,send_waits+count++);
316:     }
317:   }
318:   PetscFree(owner);
319:   PetscFree(start);
320:   /* This memory is reused in scatter end  for a different purpose*/
321:   for (i=0; i<2*size; i++) nprocs[i] = -1;
322:   stash->nprocs      = nprocs;

324:   stash->svalues    = svalues;    stash->rvalues    = rvalues;
325:   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
326:   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
327:   stash->rmax       = nmax;
328:   return(0);
329: }

331: /* 
332:    VecStashScatterGetMesg_Private - This function waits on the receives posted 
333:    in the function VecStashScatterBegin_Private() and returns one message at 
334:    a time to the calling function. If no messages are left, it indicates this
335:    by setting flg = 0, else it sets flg = 1.

337:    Input Parameters:
338:    stash - the stash

340:    Output Parameters:
341:    nvals - the number of entries in the current message.
342:    rows  - an array of row indices (or blocked indices) corresponding to the values
343:    cols  - an array of columnindices (or blocked indices) corresponding to the values
344:    vals  - the values
345:    flg   - 0 indicates no more message left, and the current call has no values associated.
346:            1 indicates that the current call successfully received a message, and the
347:              other output parameters nvals,rows,cols,vals are set appropriately.
348: */
349: int VecStashScatterGetMesg_Private(VecStash *stash,int *nvals,int **rows,Scalar **vals,int *flg)
350: {
351:   int         i,ierr,size=stash->size,*flg_v,*flg_i;
352:   int         i1,i2,*rindices,bs=stash->bs;
353:   MPI_Status  recv_status;
354:   PetscTruth  match_found = PETSC_FALSE;


358:   *flg = 0; /* When a message is discovered this is reset to 1 */
359:   /* Return if no more messages to process */
360:   if (stash->nprocessed == stash->nrecvs) { return(0); }

362:   flg_v = stash->nprocs;
363:   flg_i = flg_v + size;
364:   /* If a matching pair of receieves are found, process them, and return the data to
365:      the calling function. Until then keep receiving messages */
366:   while (!match_found) {
367:     MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
368:     /* Now pack the received message into a structure which is useable by others */
369:     if (i % 2) {
370:       MPI_Get_count(&recv_status,MPI_INT,nvals);
371:       flg_i[recv_status.MPI_SOURCE] = i/2;
372:     } else {
373:       MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
374:       flg_v[recv_status.MPI_SOURCE] = i/2;
375:       *nvals = *nvals/bs;
376:     }
377: 
378:     /* Check if we have both the messages from this proc */
379:     i1 = flg_v[recv_status.MPI_SOURCE];
380:     i2 = flg_i[recv_status.MPI_SOURCE];
381:     if (i1 != -1 && i2 != -1) {
382:       rindices    = (int*)(stash->rvalues + bs*stash->rmax*stash->nrecvs);
383:       *rows       = rindices + i2*stash->rmax;
384:       *vals       = stash->rvalues + i1*bs*stash->rmax;
385:       *flg        = 1;
386:       stash->nprocessed ++;
387:       match_found = PETSC_TRUE;
388:     }
389:   }
390:   return(0);
391: }