Actual source code: matstash.c
1: /*$Id: matstash.c,v 1.50 2001/03/23 23:22:45 balay Exp $*/
3: #include src/mat/matimpl.h
5: /*
6: The input to the stash is ALWAYS in MatScalar precision, and the
7: internal storage and output is also in MatScalar.
8: */
9: #define DEFAULT_STASH_SIZE 10000
11: /*
12: MatStashCreate_Private - Creates a stash,currently used for all the parallel
13: matrix implementations. The stash is where elements of a matrix destined
14: to be stored on other processors are kept until matrix assembly is done.
16: This is a simple minded stash. Simply adds entries to end of stash.
18: Input Parameters:
19: comm - communicator, required for scatters.
20: bs - stash block size. used when stashing blocks of values
22: Output Parameters:
23: stash - the newly created stash
24: */
25: #undef __FUNCT__
27: int MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash)
28: {
29: int ierr,max,*opt,nopt;
30: PetscTruth flg;
33: /* Require 2 tags,get the second using PetscCommGetNewTag() */
34: stash->comm = comm;
35: PetscCommGetNewTag(stash->comm,&stash->tag1);
36: PetscCommGetNewTag(stash->comm,&stash->tag2);
37: MPI_Comm_size(stash->comm,&stash->size);
38: MPI_Comm_rank(stash->comm,&stash->rank);
40: nopt = stash->size;
41: PetscMalloc(nopt*sizeof(int),&opt);
42: PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);
43: if (flg) {
44: if (nopt == 1) max = opt[0];
45: else if (nopt == stash->size) max = opt[stash->rank];
46: else if (stash->rank < nopt) max = opt[stash->rank];
47: else max = 0; /* Use default */
48: stash->umax = max;
49: } else {
50: stash->umax = 0;
51: }
52: PetscFree(opt);
53: if (bs <= 0) bs = 1;
55: stash->bs = bs;
56: stash->nmax = 0;
57: stash->oldnmax = 0;
58: stash->n = 0;
59: stash->reallocs = -1;
60: stash->idx = 0;
61: stash->idy = 0;
62: stash->array = 0;
64: stash->send_waits = 0;
65: stash->recv_waits = 0;
66: stash->send_status = 0;
67: stash->nsends = 0;
68: stash->nrecvs = 0;
69: stash->svalues = 0;
70: stash->rvalues = 0;
71: stash->rmax = 0;
72: stash->nprocs = 0;
73: stash->nprocessed = 0;
74: return(0);
75: }
77: /*
78: MatStashDestroy_Private - Destroy the stash
79: */
80: #undef __FUNCT__
82: int MatStashDestroy_Private(MatStash *stash)
83: {
87: if (stash->array) {
88: PetscFree(stash->array);
89: stash->array = 0;
90: }
91: return(0);
92: }
94: /*
95: MatStashScatterEnd_Private - This is called as the fial stage of
96: scatter. The final stages of messagepassing is done here, and
97: all the memory used for messagepassing is cleanedu up. This
98: routine also resets the stash, and deallocates the memory used
99: for the stash. It also keeps track of the current memory usage
100: so that the same value can be used the next time through.
101: */
102: #undef __FUNCT__
104: int MatStashScatterEnd_Private(MatStash *stash)
105: {
106: int nsends=stash->nsends,ierr,bs2,oldnmax;
107: MPI_Status *send_status;
110: /* wait on sends */
111: if (nsends) {
112: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
113: MPI_Waitall(2*nsends,stash->send_waits,send_status);
114: PetscFree(send_status);
115: }
117: /* Now update nmaxold to be app 10% more than max n used, this way the
118: wastage of space is reduced the next time this stash is used.
119: Also update the oldmax, only if it increases */
120: if (stash->n) {
121: bs2 = stash->bs*stash->bs;
122: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
123: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
124: }
126: stash->nmax = 0;
127: stash->n = 0;
128: stash->reallocs = -1;
129: stash->rmax = 0;
130: stash->nprocessed = 0;
132: if (stash->array) {
133: ierr = PetscFree(stash->array);
134: stash->array = 0;
135: stash->idx = 0;
136: stash->idy = 0;
137: }
138: if (stash->send_waits) {
139: PetscFree(stash->send_waits);
140: stash->send_waits = 0;
141: }
142: if (stash->recv_waits) {
143: PetscFree(stash->recv_waits);
144: stash->recv_waits = 0;
145: }
146: if (stash->svalues) {
147: PetscFree(stash->svalues);
148: stash->svalues = 0;
149: }
150: if (stash->rvalues) {
151: PetscFree(stash->rvalues);
152: stash->rvalues = 0;
153: }
154: if (stash->nprocs) {
155: PetscFree(stash->nprocs);
156: stash->nprocs = 0;
157: }
159: return(0);
160: }
162: /*
163: MatStashGetInfo_Private - Gets the relavant statistics of the stash
165: Input Parameters:
166: stash - the stash
167: nstash - the size of the stash. Indicates the number of values stored.
168: reallocs - the number of additional mallocs incurred.
169:
170: */
171: #undef __FUNCT__
173: int MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs)
174: {
175: int bs2 = stash->bs*stash->bs;
178: *nstash = stash->n*bs2;
179: if (stash->reallocs < 0) *reallocs = 0;
180: else *reallocs = stash->reallocs;
181: return(0);
182: }
185: /*
186: MatStashSetInitialSize_Private - Sets the initial size of the stash
188: Input Parameters:
189: stash - the stash
190: max - the value that is used as the max size of the stash.
191: this value is used while allocating memory.
192: */
193: #undef __FUNCT__
195: int MatStashSetInitialSize_Private(MatStash *stash,int max)
196: {
198: stash->umax = max;
199: return(0);
200: }
202: /* MatStashExpand_Private - Expand the stash. This function is called
203: when the space in the stash is not sufficient to add the new values
204: being inserted into the stash.
205:
206: Input Parameters:
207: stash - the stash
208: incr - the minimum increase requested
209:
210: Notes:
211: This routine doubles the currently used memory.
212: */
213: #undef __FUNCT__
215: static int MatStashExpand_Private(MatStash *stash,int incr)
216: {
217: int *n_idx,*n_idy,newnmax,bs2,ierr;
218: MatScalar *n_array;
221: /* allocate a larger stash */
222: bs2 = stash->bs*stash->bs;
223: if (!stash->oldnmax && !stash->nmax) { /* new stash */
224: if (stash->umax) newnmax = stash->umax/bs2;
225: else newnmax = DEFAULT_STASH_SIZE/bs2;
226: } else if (!stash->nmax) { /* resuing stash */
227: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
228: else newnmax = stash->oldnmax/bs2;
229: } else newnmax = stash->nmax*2;
230: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
232: ierr = PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)),&n_array);
233: n_idx = (int*)(n_array + bs2*newnmax);
234: n_idy = (int*)(n_idx + newnmax);
235: ierr = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));
236: ierr = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));
237: ierr = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));
238: if (stash->array) {PetscFree(stash->array);}
239: stash->array = n_array;
240: stash->idx = n_idx;
241: stash->idy = n_idy;
242: stash->nmax = newnmax;
243: stash->reallocs++;
244: return(0);
245: }
246: /*
247: MatStashValuesRow_Private - inserts values into the stash. This function
248: expects the values to be roworiented. Multiple columns belong to the same row
249: can be inserted with a single call to this function.
251: Input Parameters:
252: stash - the stash
253: row - the global row correspoiding to the values
254: n - the number of elements inserted. All elements belong to the above row.
255: idxn - the global column indices corresponding to each of the values.
256: values - the values inserted
257: */
258: #undef __FUNCT__
260: int MatStashValuesRow_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values)
261: {
262: int ierr,i;
265: /* Check and see if we have sufficient memory */
266: if ((stash->n + n) > stash->nmax) {
267: MatStashExpand_Private(stash,n);
268: }
269: for (i=0; i<n; i++) {
270: stash->idx[stash->n] = row;
271: stash->idy[stash->n] = idxn[i];
272: stash->array[stash->n] = values[i];
273: stash->n++;
274: }
275: return(0);
276: }
277: /*
278: MatStashValuesCol_Private - inserts values into the stash. This function
279: expects the values to be columnoriented. Multiple columns belong to the same row
280: can be inserted with a single call to this function.
282: Input Parameters:
283: stash - the stash
284: row - the global row correspoiding to the values
285: n - the number of elements inserted. All elements belong to the above row.
286: idxn - the global column indices corresponding to each of the values.
287: values - the values inserted
288: stepval - the consecutive values are sepated by a distance of stepval.
289: this happens because the input is columnoriented.
290: */
291: #undef __FUNCT__
293: int MatStashValuesCol_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int stepval)
294: {
295: int ierr,i;
298: /* Check and see if we have sufficient memory */
299: if ((stash->n + n) > stash->nmax) {
300: MatStashExpand_Private(stash,n);
301: }
302: for (i=0; i<n; i++) {
303: stash->idx[stash->n] = row;
304: stash->idy[stash->n] = idxn[i];
305: stash->array[stash->n] = values[i*stepval];
306: stash->n++;
307: }
308: return(0);
309: }
311: /*
312: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
313: This function expects the values to be roworiented. Multiple columns belong
314: to the same block-row can be inserted with a single call to this function.
315: This function extracts the sub-block of values based on the dimensions of
316: the original input block, and the row,col values corresponding to the blocks.
318: Input Parameters:
319: stash - the stash
320: row - the global block-row correspoiding to the values
321: n - the number of elements inserted. All elements belong to the above row.
322: idxn - the global block-column indices corresponding to each of the blocks of
323: values. Each block is of size bs*bs.
324: values - the values inserted
325: rmax - the number of block-rows in the original block.
326: cmax - the number of block-columsn on the original block.
327: idx - the index of the current block-row in the original block.
328: */
329: #undef __FUNCT__
331: int MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx)
332: {
333: int ierr,i,j,k,bs2,bs=stash->bs;
334: MatScalar *vals,*array;
337: bs2 = bs*bs;
338: if ((stash->n+n) > stash->nmax) {
339: MatStashExpand_Private(stash,n);
340: }
341: for (i=0; i<n; i++) {
342: stash->idx[stash->n] = row;
343: stash->idy[stash->n] = idxn[i];
344: /* Now copy over the block of values. Store the values column oriented.
345: This enables inserting multiple blocks belonging to a row with a single
346: funtion call */
347: array = stash->array + bs2*stash->n;
348: vals = values + idx*bs2*n + bs*i;
349: for (j=0; j<bs; j++) {
350: for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
351: array += 1;
352: vals += cmax*bs;
353: }
354: stash->n++;
355: }
356: return(0);
357: }
359: /*
360: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
361: This function expects the values to be roworiented. Multiple columns belong
362: to the same block-row can be inserted with a single call to this function.
363: This function extracts the sub-block of values based on the dimensions of
364: the original input block, and the row,col values corresponding to the blocks.
366: Input Parameters:
367: stash - the stash
368: row - the global block-row correspoiding to the values
369: n - the number of elements inserted. All elements belong to the above row.
370: idxn - the global block-column indices corresponding to each of the blocks of
371: values. Each block is of size bs*bs.
372: values - the values inserted
373: rmax - the number of block-rows in the original block.
374: cmax - the number of block-columsn on the original block.
375: idx - the index of the current block-row in the original block.
376: */
377: #undef __FUNCT__
379: int MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,int *idxn,MatScalar *values,int rmax,int cmax,int idx)
380: {
381: int ierr,i,j,k,bs2,bs=stash->bs;
382: MatScalar *vals,*array;
385: bs2 = bs*bs;
386: if ((stash->n+n) > stash->nmax) {
387: MatStashExpand_Private(stash,n);
388: }
389: for (i=0; i<n; i++) {
390: stash->idx[stash->n] = row;
391: stash->idy[stash->n] = idxn[i];
392: /* Now copy over the block of values. Store the values column oriented.
393: This enables inserting multiple blocks belonging to a row with a single
394: funtion call */
395: array = stash->array + bs2*stash->n;
396: vals = values + idx*bs + bs2*rmax*i;
397: for (j=0; j<bs; j++) {
398: for (k=0; k<bs; k++) {array[k] = vals[k];}
399: array += bs;
400: vals += rmax*bs;
401: }
402: stash->n++;
403: }
404: return(0);
405: }
406: /*
407: MatStashScatterBegin_Private - Initiates the transfer of values to the
408: correct owners. This function goes through the stash, and check the
409: owners of each stashed value, and sends the values off to the owner
410: processors.
412: Input Parameters:
413: stash - the stash
414: owners - an array of size 'no-of-procs' which gives the ownership range
415: for each node.
417: Notes: The 'owners' array in the cased of the blocked-stash has the
418: ranges specified blocked global indices, and for the regular stash in
419: the proper global indices.
420: */
421: #undef __FUNCT__
423: int MatStashScatterBegin_Private(MatStash *stash,int *owners)
424: {
425: int *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
426: int size=stash->size,*nprocs,nsends,nreceives;
427: int nmax,count,ierr,*sindices,*rindices,i,j,idx;
428: MatScalar *rvalues,*svalues;
429: MPI_Comm comm = stash->comm;
430: MPI_Request *send_waits,*recv_waits;
434: bs2 = stash->bs*stash->bs;
435: /* first count number of contributors to each processor */
436: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
437: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
438: ierr = PetscMalloc((stash->n+1)*sizeof(int),&owner);
440: for (i=0; i<stash->n; i++) {
441: idx = stash->idx[i];
442: for (j=0; j<size; j++) {
443: if (idx >= owners[j] && idx < owners[j+1]) {
444: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
445: }
446: }
447: }
448: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
449:
450: /* inform other processors of number of messages and max length*/
451: PetscMaxSum(comm,nprocs,&nmax,&nreceives);
453: /* post receives:
454: since we don't know how long each individual message is we
455: allocate the largest needed buffer for each receive. Potentially
456: this is a lot of wasted space.
457: */
458: ierr = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&rvalues);
459: rindices = (int*)(rvalues + bs2*nreceives*nmax);
460: ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);
461: for (i=0,count=0; i<nreceives; i++) {
462: MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,
463: recv_waits+count++);
464: MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
465: }
467: /* do sends:
468: 1) starts[i] gives the starting index in svalues for stuff going to
469: the ith processor
470: */
471: ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&svalues);
472: sindices = (int*)(svalues + bs2*stash->n);
473: ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);
474: ierr = PetscMalloc(2*size*sizeof(int),&startv);
475: starti = startv + size;
476: /* use 2 sends the first with all_a, the next with all_i and all_j */
477: startv[0] = 0; starti[0] = 0;
478: for (i=1; i<size; i++) {
479: startv[i] = startv[i-1] + nprocs[2*i-2];
480: starti[i] = starti[i-1] + nprocs[2*i-2]*2;
481: }
482: for (i=0; i<stash->n; i++) {
483: j = owner[i];
484: if (bs2 == 1) {
485: svalues[startv[j]] = stash->array[i];
486: } else {
487: int k;
488: MatScalar *buf1,*buf2;
489: buf1 = svalues+bs2*startv[j];
490: buf2 = stash->array+bs2*i;
491: for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
492: }
493: sindices[starti[j]] = stash->idx[i];
494: sindices[starti[j]+nprocs[2*j]] = stash->idy[i];
495: startv[j]++;
496: starti[j]++;
497: }
498: startv[0] = 0;
499: for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];}
500: for (i=0,count=0; i<size; i++) {
501: if (nprocs[2*i+1]) {
502: MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm,
503: send_waits+count++);
504: MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPI_INT,i,tag2,comm,
505: send_waits+count++);
506: }
507: }
508: PetscFree(owner);
509: PetscFree(startv);
510: /* This memory is reused in scatter end for a different purpose*/
511: for (i=0; i<2*size; i++) nprocs[i] = -1;
512: stash->nprocs = nprocs;
514: stash->svalues = svalues; stash->rvalues = rvalues;
515: stash->nsends = nsends; stash->nrecvs = nreceives;
516: stash->send_waits = send_waits; stash->recv_waits = recv_waits;
517: stash->rmax = nmax;
518: return(0);
519: }
521: /*
522: MatStashScatterGetMesg_Private - This function waits on the receives posted
523: in the function MatStashScatterBegin_Private() and returns one message at
524: a time to the calling function. If no messages are left, it indicates this
525: by setting flg = 0, else it sets flg = 1.
527: Input Parameters:
528: stash - the stash
530: Output Parameters:
531: nvals - the number of entries in the current message.
532: rows - an array of row indices (or blocked indices) corresponding to the values
533: cols - an array of columnindices (or blocked indices) corresponding to the values
534: vals - the values
535: flg - 0 indicates no more message left, and the current call has no values associated.
536: 1 indicates that the current call successfully received a message, and the
537: other output parameters nvals,rows,cols,vals are set appropriately.
538: */
539: #undef __FUNCT__
541: int MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg)
542: {
543: int i,ierr,*flg_v,i1,i2,*rindices,bs2;
544: MPI_Status recv_status;
545: PetscTruth match_found = PETSC_FALSE;
549: *flg = 0; /* When a message is discovered this is reset to 1 */
550: /* Return if no more messages to process */
551: if (stash->nprocessed == stash->nrecvs) { return(0); }
553: flg_v = stash->nprocs;
554: bs2 = stash->bs*stash->bs;
555: /* If a matching pair of receieves are found, process them, and return the data to
556: the calling function. Until then keep receiving messages */
557: while (!match_found) {
558: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
559: /* Now pack the received message into a structure which is useable by others */
560: if (i % 2) {
561: MPI_Get_count(&recv_status,MPI_INT,nvals);
562: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
563: *nvals = *nvals/2; /* This message has both row indices and col indices */
564: } else {
565: MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);
566: flg_v[2*recv_status.MPI_SOURCE] = i/2;
567: *nvals = *nvals/bs2;
568: }
569:
570: /* Check if we have both the messages from this proc */
571: i1 = flg_v[2*recv_status.MPI_SOURCE];
572: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
573: if (i1 != -1 && i2 != -1) {
574: rindices = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
575: *rows = rindices + 2*i2*stash->rmax;
576: *cols = *rows + *nvals;
577: *vals = stash->rvalues + i1*bs2*stash->rmax;
578: *flg = 1;
579: stash->nprocessed ++;
580: match_found = PETSC_TRUE;
581: }
582: }
583: return(0);
584: }