Actual source code: matstash.c
2: #include src/mat/matimpl.h
4: /*
5: The input to the stash is ALWAYS in MatScalar precision, and the
6: internal storage and output is also in MatScalar.
7: */
8: #define DEFAULT_STASH_SIZE 10000
10: /*
11: MatStashCreate_Private - Creates a stash,currently used for all the parallel
12: matrix implementations. The stash is where elements of a matrix destined
13: to be stored on other processors are kept until matrix assembly is done.
15: This is a simple minded stash. Simply adds entries to end of stash.
17: Input Parameters:
18: comm - communicator, required for scatters.
19: bs - stash block size. used when stashing blocks of values
21: Output Parameters:
22: stash - the newly created stash
23: */
26: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
27: {
29: PetscInt max,*opt,nopt;
30: PetscTruth flg;
33: /* Require 2 tags,get the second using PetscCommGetNewTag() */
34: stash->comm = comm;
35: PetscCommGetNewTag(stash->comm,&stash->tag1);
36: PetscCommGetNewTag(stash->comm,&stash->tag2);
37: MPI_Comm_size(stash->comm,&stash->size);
38: MPI_Comm_rank(stash->comm,&stash->rank);
40: nopt = stash->size;
41: PetscMalloc(nopt*sizeof(PetscInt),&opt);
42: PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
43: if (flg) {
44: if (nopt == 1) max = opt[0];
45: else if (nopt == stash->size) max = opt[stash->rank];
46: else if (stash->rank < nopt) max = opt[stash->rank];
47: else max = 0; /* Use default */
48: stash->umax = max;
49: } else {
50: stash->umax = 0;
51: }
52: PetscFree(opt);
53: if (bs <= 0) bs = 1;
55: stash->bs = bs;
56: stash->nmax = 0;
57: stash->oldnmax = 0;
58: stash->n = 0;
59: stash->reallocs = -1;
60: stash->idx = 0;
61: stash->idy = 0;
62: stash->array = 0;
64: stash->send_waits = 0;
65: stash->recv_waits = 0;
66: stash->send_status = 0;
67: stash->nsends = 0;
68: stash->nrecvs = 0;
69: stash->svalues = 0;
70: stash->rvalues = 0;
71: stash->rmax = 0;
72: stash->nprocs = 0;
73: stash->nprocessed = 0;
74: return(0);
75: }
77: /*
78: MatStashDestroy_Private - Destroy the stash
79: */
82: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
83: {
87: if (stash->array) {
88: PetscFree(stash->array);
89: stash->array = 0;
90: }
91: return(0);
92: }
94: /*
95: MatStashScatterEnd_Private - This is called as the fial stage of
96: scatter. The final stages of messagepassing is done here, and
97: all the memory used for messagepassing is cleanedu up. This
98: routine also resets the stash, and deallocates the memory used
99: for the stash. It also keeps track of the current memory usage
100: so that the same value can be used the next time through.
101: */
104: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
105: {
107: int nsends=stash->nsends,bs2,oldnmax;
108: MPI_Status *send_status;
111: /* wait on sends */
112: if (nsends) {
113: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
114: MPI_Waitall(2*nsends,stash->send_waits,send_status);
115: PetscFree(send_status);
116: }
118: /* Now update nmaxold to be app 10% more than max n used, this way the
119: wastage of space is reduced the next time this stash is used.
120: Also update the oldmax, only if it increases */
121: if (stash->n) {
122: bs2 = stash->bs*stash->bs;
123: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
124: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
125: }
127: stash->nmax = 0;
128: stash->n = 0;
129: stash->reallocs = -1;
130: stash->rmax = 0;
131: stash->nprocessed = 0;
133: if (stash->array) {
134: PetscFree(stash->array);
135: stash->array = 0;
136: stash->idx = 0;
137: stash->idy = 0;
138: }
139: if (stash->send_waits) {
140: PetscFree(stash->send_waits);
141: stash->send_waits = 0;
142: }
143: if (stash->recv_waits) {
144: PetscFree(stash->recv_waits);
145: stash->recv_waits = 0;
146: }
147: if (stash->svalues) {
148: PetscFree(stash->svalues);
149: stash->svalues = 0;
150: }
151: if (stash->rvalues) {
152: PetscFree(stash->rvalues);
153: stash->rvalues = 0;
154: }
155: if (stash->nprocs) {
156: PetscFree(stash->nprocs);
157: stash->nprocs = 0;
158: }
160: return(0);
161: }
163: /*
164: MatStashGetInfo_Private - Gets the relavant statistics of the stash
166: Input Parameters:
167: stash - the stash
168: nstash - the size of the stash. Indicates the number of values stored.
169: reallocs - the number of additional mallocs incurred.
170:
171: */
174: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
175: {
176: PetscInt bs2 = stash->bs*stash->bs;
179: if (nstash) *nstash = stash->n*bs2;
180: if (reallocs) {
181: if (stash->reallocs < 0) *reallocs = 0;
182: else *reallocs = stash->reallocs;
183: }
184: return(0);
185: }
188: /*
189: MatStashSetInitialSize_Private - Sets the initial size of the stash
191: Input Parameters:
192: stash - the stash
193: max - the value that is used as the max size of the stash.
194: this value is used while allocating memory.
195: */
198: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
199: {
201: stash->umax = max;
202: return(0);
203: }
205: /* MatStashExpand_Private - Expand the stash. This function is called
206: when the space in the stash is not sufficient to add the new values
207: being inserted into the stash.
208:
209: Input Parameters:
210: stash - the stash
211: incr - the minimum increase requested
212:
213: Notes:
214: This routine doubles the currently used memory.
215: */
218: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
219: {
221: PetscInt *n_idx,*n_idy,newnmax,bs2;
222: MatScalar *n_array;
225: /* allocate a larger stash */
226: bs2 = stash->bs*stash->bs;
227: if (!stash->oldnmax && !stash->nmax) { /* new stash */
228: if (stash->umax) newnmax = stash->umax/bs2;
229: else newnmax = DEFAULT_STASH_SIZE/bs2;
230: } else if (!stash->nmax) { /* resuing stash */
231: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
232: else newnmax = stash->oldnmax/bs2;
233: } else newnmax = stash->nmax*2;
234: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
236: PetscMalloc((newnmax)*(2*sizeof(PetscInt)+bs2*sizeof(MatScalar)),&n_array);
237: n_idx = (PetscInt*)(n_array + bs2*newnmax);
238: n_idy = (PetscInt*)(n_idx + newnmax);
239: PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));
240: PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
241: PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(PetscInt));
242: if (stash->array) {PetscFree(stash->array);}
243: stash->array = n_array;
244: stash->idx = n_idx;
245: stash->idy = n_idy;
246: stash->nmax = newnmax;
247: stash->reallocs++;
248: return(0);
249: }
250: /*
251: MatStashValuesRow_Private - inserts values into the stash. This function
252: expects the values to be roworiented. Multiple columns belong to the same row
253: can be inserted with a single call to this function.
255: Input Parameters:
256: stash - the stash
257: row - the global row correspoiding to the values
258: n - the number of elements inserted. All elements belong to the above row.
259: idxn - the global column indices corresponding to each of the values.
260: values - the values inserted
261: */
264: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
265: {
267: PetscInt i;
270: /* Check and see if we have sufficient memory */
271: if ((stash->n + n) > stash->nmax) {
272: MatStashExpand_Private(stash,n);
273: }
274: for (i=0; i<n; i++) {
275: stash->idx[stash->n] = row;
276: stash->idy[stash->n] = idxn[i];
277: stash->array[stash->n] = values[i];
278: stash->n++;
279: }
280: return(0);
281: }
282: /*
283: MatStashValuesCol_Private - inserts values into the stash. This function
284: expects the values to be columnoriented. Multiple columns belong to the same row
285: can be inserted with a single call to this function.
287: Input Parameters:
288: stash - the stash
289: row - the global row correspoiding to the values
290: n - the number of elements inserted. All elements belong to the above row.
291: idxn - the global column indices corresponding to each of the values.
292: values - the values inserted
293: stepval - the consecutive values are sepated by a distance of stepval.
294: this happens because the input is columnoriented.
295: */
298: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
299: {
301: PetscInt i;
304: /* Check and see if we have sufficient memory */
305: if ((stash->n + n) > stash->nmax) {
306: MatStashExpand_Private(stash,n);
307: }
308: for (i=0; i<n; i++) {
309: stash->idx[stash->n] = row;
310: stash->idy[stash->n] = idxn[i];
311: stash->array[stash->n] = values[i*stepval];
312: stash->n++;
313: }
314: return(0);
315: }
317: /*
318: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
319: This function expects the values to be roworiented. Multiple columns belong
320: to the same block-row can be inserted with a single call to this function.
321: This function extracts the sub-block of values based on the dimensions of
322: the original input block, and the row,col values corresponding to the blocks.
324: Input Parameters:
325: stash - the stash
326: row - the global block-row correspoiding to the values
327: n - the number of elements inserted. All elements belong to the above row.
328: idxn - the global block-column indices corresponding to each of the blocks of
329: values. Each block is of size bs*bs.
330: values - the values inserted
331: rmax - the number of block-rows in the original block.
332: cmax - the number of block-columsn on the original block.
333: idx - the index of the current block-row in the original block.
334: */
337: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
338: {
340: PetscInt i,j,k,bs2,bs=stash->bs;
341: const MatScalar *vals;
342: MatScalar *array;
345: bs2 = bs*bs;
346: if ((stash->n+n) > stash->nmax) {
347: MatStashExpand_Private(stash,n);
348: }
349: for (i=0; i<n; i++) {
350: stash->idx[stash->n] = row;
351: stash->idy[stash->n] = idxn[i];
352: /* Now copy over the block of values. Store the values column oriented.
353: This enables inserting multiple blocks belonging to a row with a single
354: funtion call */
355: array = stash->array + bs2*stash->n;
356: vals = values + idx*bs2*n + bs*i;
357: for (j=0; j<bs; j++) {
358: for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
359: array += 1;
360: vals += cmax*bs;
361: }
362: stash->n++;
363: }
364: return(0);
365: }
367: /*
368: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
369: This function expects the values to be roworiented. Multiple columns belong
370: to the same block-row can be inserted with a single call to this function.
371: This function extracts the sub-block of values based on the dimensions of
372: the original input block, and the row,col values corresponding to the blocks.
374: Input Parameters:
375: stash - the stash
376: row - the global block-row correspoiding to the values
377: n - the number of elements inserted. All elements belong to the above row.
378: idxn - the global block-column indices corresponding to each of the blocks of
379: values. Each block is of size bs*bs.
380: values - the values inserted
381: rmax - the number of block-rows in the original block.
382: cmax - the number of block-columsn on the original block.
383: idx - the index of the current block-row in the original block.
384: */
387: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
388: {
390: PetscInt i,j,k,bs2,bs=stash->bs;
391: const MatScalar *vals;
392: MatScalar *array;
395: bs2 = bs*bs;
396: if ((stash->n+n) > stash->nmax) {
397: MatStashExpand_Private(stash,n);
398: }
399: for (i=0; i<n; i++) {
400: stash->idx[stash->n] = row;
401: stash->idy[stash->n] = idxn[i];
402: /* Now copy over the block of values. Store the values column oriented.
403: This enables inserting multiple blocks belonging to a row with a single
404: funtion call */
405: array = stash->array + bs2*stash->n;
406: vals = values + idx*bs + bs2*rmax*i;
407: for (j=0; j<bs; j++) {
408: for (k=0; k<bs; k++) {array[k] = vals[k];}
409: array += bs;
410: vals += rmax*bs;
411: }
412: stash->n++;
413: }
414: return(0);
415: }
416: /*
417: MatStashScatterBegin_Private - Initiates the transfer of values to the
418: correct owners. This function goes through the stash, and check the
419: owners of each stashed value, and sends the values off to the owner
420: processors.
422: Input Parameters:
423: stash - the stash
424: owners - an array of size 'no-of-procs' which gives the ownership range
425: for each node.
427: Notes: The 'owners' array in the cased of the blocked-stash has the
428: ranges specified blocked global indices, and for the regular stash in
429: the proper global indices.
430: */
433: PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
434: {
435: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
436: PetscInt size=stash->size,*nprocs,nsends,nreceives;
438: PetscInt nmax,count,*sindices,*rindices,i,j,idx;
439: MatScalar *rvalues,*svalues;
440: MPI_Comm comm = stash->comm;
441: MPI_Request *send_waits,*recv_waits;
445: bs2 = stash->bs*stash->bs;
446: /* first count number of contributors to each processor */
447: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
448: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
449: PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);
451: for (i=0; i<stash->n; i++) {
452: idx = stash->idx[i];
453: for (j=0; j<size; j++) {
454: if (idx >= owners[j] && idx < owners[j+1]) {
455: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
456: }
457: }
458: }
459: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
460:
461: /* inform other processors of number of messages and max length*/
462: PetscMaxSum(comm,nprocs,&nmax,&nreceives);
464: /* post receives:
465: since we don't know how long each individual message is we
466: allocate the largest needed buffer for each receive. Potentially
467: this is a lot of wasted space.
468: */
469: PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&rvalues);
470: rindices = (PetscInt*)(rvalues + bs2*nreceives*nmax);
471: PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
472: for (i=0,count=0; i<nreceives; i++) {
473: MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
474: MPI_Irecv(rindices+2*nmax*i,2*nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
475: }
477: /* do sends:
478: 1) starts[i] gives the starting index in svalues for stuff going to
479: the ith processor
480: */
481: PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);
482: sindices = (PetscInt*)(svalues + bs2*stash->n);
483: PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
484: PetscMalloc(2*size*sizeof(PetscInt),&startv);
485: starti = startv + size;
486: /* use 2 sends the first with all_a, the next with all_i and all_j */
487: startv[0] = 0; starti[0] = 0;
488: for (i=1; i<size; i++) {
489: startv[i] = startv[i-1] + nprocs[2*i-2];
490: starti[i] = starti[i-1] + nprocs[2*i-2]*2;
491: }
492: for (i=0; i<stash->n; i++) {
493: j = owner[i];
494: if (bs2 == 1) {
495: svalues[startv[j]] = stash->array[i];
496: } else {
497: PetscInt k;
498: MatScalar *buf1,*buf2;
499: buf1 = svalues+bs2*startv[j];
500: buf2 = stash->array+bs2*i;
501: for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
502: }
503: sindices[starti[j]] = stash->idx[i];
504: sindices[starti[j]+nprocs[2*j]] = stash->idy[i];
505: startv[j]++;
506: starti[j]++;
507: }
508: startv[0] = 0;
509: for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];}
510: for (i=0,count=0; i<size; i++) {
511: if (nprocs[2*i+1]) {
512: MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm,send_waits+count++);
513: MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
514: }
515: }
516: PetscFree(owner);
517: PetscFree(startv);
518: /* This memory is reused in scatter end for a different purpose*/
519: for (i=0; i<2*size; i++) nprocs[i] = -1;
520: stash->nprocs = nprocs;
522: stash->svalues = svalues; stash->rvalues = rvalues;
523: stash->nsends = nsends; stash->nrecvs = nreceives;
524: stash->send_waits = send_waits; stash->recv_waits = recv_waits;
525: stash->rmax = nmax;
526: return(0);
527: }
529: /*
530: MatStashScatterGetMesg_Private - This function waits on the receives posted
531: in the function MatStashScatterBegin_Private() and returns one message at
532: a time to the calling function. If no messages are left, it indicates this
533: by setting flg = 0, else it sets flg = 1.
535: Input Parameters:
536: stash - the stash
538: Output Parameters:
539: nvals - the number of entries in the current message.
540: rows - an array of row indices (or blocked indices) corresponding to the values
541: cols - an array of columnindices (or blocked indices) corresponding to the values
542: vals - the values
543: flg - 0 indicates no more message left, and the current call has no values associated.
544: 1 indicates that the current call successfully received a message, and the
545: other output parameters nvals,rows,cols,vals are set appropriately.
546: */
549: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
550: {
552: PetscMPIInt i;
553: PetscInt *flg_v,i1,i2,*rindices,bs2;
554: MPI_Status recv_status;
555: PetscTruth match_found = PETSC_FALSE;
559: *flg = 0; /* When a message is discovered this is reset to 1 */
560: /* Return if no more messages to process */
561: if (stash->nprocessed == stash->nrecvs) { return(0); }
563: flg_v = stash->nprocs;
564: bs2 = stash->bs*stash->bs;
565: /* If a matching pair of receieves are found, process them, and return the data to
566: the calling function. Until then keep receiving messages */
567: while (!match_found) {
568: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
569: /* Now pack the received message into a structure which is useable by others */
570: if (i % 2) {
571: MPI_Get_count(&recv_status,MPIU_INT,nvals);
572: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
573: *nvals = *nvals/2; /* This message has both row indices and col indices */
574: } else {
575: MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);
576: flg_v[2*recv_status.MPI_SOURCE] = i/2;
577: *nvals = *nvals/bs2;
578: }
579:
580: /* Check if we have both the messages from this proc */
581: i1 = flg_v[2*recv_status.MPI_SOURCE];
582: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
583: if (i1 != -1 && i2 != -1) {
584: rindices = (PetscInt*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
585: *rows = rindices + 2*i2*stash->rmax;
586: *cols = *rows + *nvals;
587: *vals = stash->rvalues + i1*bs2*stash->rmax;
588: *flg = 1;
589: stash->nprocessed ++;
590: match_found = PETSC_TRUE;
591: }
592: }
593: return(0);
594: }