SuperLU_DIST  4.0
superlu_dist on CPU and GPU clusters
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
superlu_defs.h
Go to the documentation of this file.
1 
12 #ifndef __SUPERLU_DEFS /* allow multiple inclusions */
13 #define __SUPERLU_DEFS
14 
15 /*
16  * File name: superlu_defs.h
17  * Purpose: Definitions which are precision-neutral
18  */
19 #ifdef _CRAY
20 #include <fortran.h>
21 #include <string.h>
22 #endif
23 #include <mpi.h>
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <limits.h>
27 
28 /* Define my integer size int_t */
29 #ifdef _CRAY
30 typedef short int_t;
31 /*#undef int Revert back to int of default size. */
32 #define mpi_int_t MPI_SHORT
33 #elif defined (_LONGINT)
34 typedef long long int int_t;
35 #define mpi_int_t MPI_LONG
36 #else /* Default */
37 typedef int int_t;
38 #define mpi_int_t MPI_INT
39 #endif
40 
41 #include "superlu_enum_consts.h"
42 #include "Cnames.h"
43 #include "supermatrix.h"
44 #include "util_dist.h"
45 #include "psymbfact.h"
46 
47 
48 /***********************************************************************
49  * Constants
50  ***********************************************************************/
51 /*
52  * For each block column of L, the index[] array contains both the row
53  * subscripts and the integers describing the size of the blocks.
54  * The organization of index[] looks like:
55  *
56  * [ BLOCK COLUMN HEADER (size BC_HEADER)
57  * number of blocks
58  * number of row subscripts, i.e., LDA of nzval[]
59  * BLOCK 0 <----
60  * BLOCK DESCRIPTOR (of size LB_DESCRIPTOR) |
61  * block number (global) |
62  * number of full rows in the block |
63  * actual row subscripts |
64  * BLOCK 1 | Repeat ...
65  * BLOCK DESCRIPTOR | number of blocks
66  * block number (global) |
67  * number of full rows in the block |
68  * actual row subscripts |
69  * . |
70  * . |
71  * . <----
72  * ]
73  *
74  * For each block row of U, the organization of index[] looks like:
75  *
76  * [ BLOCK ROW HEADER (of size BR_HEADER)
77  * number of blocks
78  * number of entries in nzval[]
79  * number of entries in index[]
80  * BLOCK 0 <----
81  * BLOCK DESCRIPTOR (of size UB_DESCRIPTOR) |
82  * block number (global) |
83  * number of nonzeros in the block |
84  * actual fstnz subscripts |
85  * BLOCK 1 | Repeat ...
86  * BLOCK DESCRIPTOR | number of blocks
87  * block number (global) |
88  * number of nonzeros in the block |
89  * actual fstnz subscripts |
90  * . |
91  * . |
92  * . <----
93  * ]
94  *
95  */
96 #define BC_HEADER 2
97 #define LB_DESCRIPTOR 2
98 #define BR_HEADER 3
99 #define UB_DESCRIPTOR 2
100 #define NBUFFERS 5
101 
102 /*
103  * Communication tags
104  */
105 /* Return the mpi_tag assuming 5 pairs of communications and MPI_TAG_UB >= 5 *
106  * for each supernodal column "num", the five communications are: *
107  * 0,1: for sending L to "right" *
108  * 2,3: for sending off-diagonal blocks of U "down" *
109  * 4 : for sending the diagonal blcok down (in pxgstrf2) */
110 #define SLU_MPI_TAG(id,num) ( (5*(num)+id) % tag_ub )
111 
112  /* For numeric factorization. */
113 #if 0
114 #define NTAGS 10000
115 #else
116 #define NTAGS INT_MAX
117 #endif
118 #define UjROW 10
119 #define UkSUB 11
120 #define UkVAL 12
121 #define LkSUB 13
122 #define LkVAL 14
123 #define LkkDIAG 15
124  /* For triangular solves. */
125 #define XK_H 2 /* The header preceeding each X block. */
126 #define LSUM_H 2 /* The header preceeding each MOD block. */
127 #define GSUM 20
128 #define Xk 21
129 #define Yk 22
130 #define LSUM 23
131 
132 /*
133  * Communication scopes
134  */
135 #define COMM_ALL 100
136 #define COMM_COLUMN 101
137 #define COMM_ROW 102
138 
139 /*
140  * Matrix distribution for sparse matrix-vector multiplication
141  */
142 #define SUPER_LINEAR 11
143 #define SUPER_BLOCK 12
144 
145 /*
146  * No of marker arrays used in the symbolic factorization, each of size n
147  */
148 #define NO_MARKER 3
149 
150 
151 
152 /***********************************************************************
153  * Macros
154  ***********************************************************************/
155 #define IAM(comm) { int rank; MPI_Comm_rank ( comm, &rank ); rank};
156 #define MYROW(iam,grid) ( (iam) / grid->npcol )
157 #define MYCOL(iam,grid) ( (iam) % grid->npcol )
158 #define BlockNum(i) ( supno[i] )
159 #define FstBlockC(bnum) ( xsup[bnum] )
160 #define SuperSize(bnum) ( xsup[bnum+1]-xsup[bnum] )
161 #define LBi(bnum,grid) ( (bnum)/grid->nprow )/* Global to local block rowwise */
162 #define LBj(bnum,grid) ( (bnum)/grid->npcol )/* Global to local block columnwise*/
163 #define PROW(bnum,grid) ( (bnum) % grid->nprow )
164 #define PCOL(bnum,grid) ( (bnum) % grid->npcol )
165 #define PNUM(i,j,grid) ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */
166 #define CEILING(a,b) ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) )
167  /* For triangular solves */
168 #define RHS_ITERATE(i) \
169  for (i = 0; i < nrhs; ++i)
170 #define X_BLK(i) \
171  ilsum[i] * nrhs + (i+1) * XK_H
172 #define LSUM_BLK(i) \
173  ilsum[i] * nrhs + (i+1) * LSUM_H
174 
175 #define SuperLU_timer_ SuperLU_timer_dist_
176 #define LOG2(x) (log10((double) x) / log10(2.0))
177 
178 
179 #if ( VAMPIR>=1 )
180 #define VT_TRACEON VT_traceon()
181 #define VT_TRACEOFF VT_traceoff()
182 #else
183 #define VT_TRACEON
184 #define VT_TRACEOFF
185 #endif
186 
187 
188 /***********************************************************************
189  * New data types
190  ***********************************************************************/
191 
192 /*
193  * Define the 2D mapping of matrix blocks to process grid.
194  *
195  * Process grid:
196  * Processes are numbered (0 : P-1).
197  * P = Pr x Pc, where Pr, Pc are the number of process rows and columns.
198  * (pr,pc) is the coordinate of IAM; 0 <= pr < Pr, 0 <= pc < Pc.
199  *
200  * Matrix blocks:
201  * Matrix is partitioned according to supernode partitions, both
202  * column and row-wise.
203  * The k-th block columns (rows) contains columns (rows) (s:t), where
204  * s=xsup[k], t=xsup[k+1]-1.
205  * Block A(I,J) contains
206  * rows from (xsup[I]:xsup[I+1]-1) and
207  * columns from (xsup[J]:xsup[J+1]-1)
208  *
209  * Mapping of matrix entry (i,j) to matrix block (I,J):
210  * (I,J) = ( supno[i], supno[j] )
211  *
212  * Mapping of matrix block (I,J) to process grid (pr,pc):
213  * (pr,pc) = ( MOD(I,NPROW), MOD(J,NPCOL) )
214  *
215  * (xsup[nsupers],supno[n]) are replicated on all processors.
216  *
217  */
218 
219 /*-- Communication subgroup */
220 typedef struct {
221  MPI_Comm comm; /* MPI communicator */
222  int Np; /* number of processes */
223  int Iam; /* my process number */
225 
226 /*-- Process grid definition */
227 typedef struct {
228  MPI_Comm comm; /* MPI communicator */
229  superlu_scope_t rscp; /* row scope */
230  superlu_scope_t cscp; /* column scope */
231  int iam; /* my process number in this scope */
232  int_t nprow; /* number of process rows */
233  int_t npcol; /* number of process columns */
234 } gridinfo_t;
235 
236 
237 /*
238  *-- The structures are determined by SYMBFACT and used thereafter.
239  *
240  * (xsup,supno) describes mapping between supernode and column:
241  * xsup[s] is the leading column of the s-th supernode.
242  * supno[i] is the supernode no to which column i belongs;
243  * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12)
244  * xsup 0 1 2 4 7 12
245  * Note: dfs will be performed on supernode rep. relative to the new
246  * row pivoting ordering
247  *
248  * This is allocated during symbolic factorization SYMBFACT.
249  */
250 typedef struct {
253 } Glu_persist_t;
254 
255 /*
256  *-- The structures are determined by SYMBFACT and used by DDISTRIBUTE.
257  *
258  * (xlsub,lsub): lsub[*] contains the compressed subscript of
259  * rectangular supernodes; xlsub[j] points to the starting
260  * location of the j-th column in lsub[*]. Note that xlsub
261  * is indexed by column.
262  * Storage: original row subscripts
263  *
264  * During the course of sparse LU factorization, we also use
265  * (xlsub,lsub) for the purpose of symmetric pruning. For each
266  * supernode {s,s+1,...,t=s+r} with first column s and last
267  * column t, the subscript set
268  * lsub[j], j=xlsub[s], .., xlsub[s+1]-1
269  * is the structure of column s (i.e. structure of this supernode).
270  * It is used for the storage of numerical values.
271  * Furthermore,
272  * lsub[j], j=xlsub[t], .., xlsub[t+1]-1
273  * is the structure of the last column t of this supernode.
274  * It is for the purpose of symmetric pruning. Therefore, the
275  * structural subscripts can be rearranged without making physical
276  * interchanges among the numerical values.
277  *
278  * However, if the supernode has only one column, then we
279  * only keep one set of subscripts. For any subscript interchange
280  * performed, similar interchange must be done on the numerical
281  * values.
282  *
283  * The last column structures (for pruning) will be removed
284  * after the numercial LU factorization phase.
285  *
286  * (xusub,usub): xusub[i] points to the starting location of column i
287  * in usub[]. For each U-segment, only the row index of first nonzero
288  * is stored in usub[].
289  *
290  * Each U column consists of a number of full segments. Each full segment
291  * starts from a leading nonzero, running up to the supernode (block)
292  * boundary. (Recall that the column-wise supernode partition is also
293  * imposed on the rows.) Because the segment is full, we don't store all
294  * the row indices. Instead, only the leading nonzero index is stored.
295  * The rest can be found together with xsup/supno pair.
296  * For example,
297  * usub[xsub[j+1]] - usub[xsub[j]] = number of segments in column j.
298  * for any i in usub[],
299  * supno[i] = block number in which i belongs to
300  * xsup[supno[i]+1] = first row of the next block
301  * The nonzeros of this segment are:
302  * i, i+1 ... xsup[supno[i]+1]-1 (only i is stored in usub[])
303  *
304  */
305 typedef struct {
306  int_t *lsub; /* compressed L subscripts */
308  int_t *usub; /* compressed U subscripts */
310  int_t nzlmax; /* current max size of lsub */
311  int_t nzumax; /* " " " usub */
312  LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
313  int_t *llvl; /* keep track of level in L for level-based ILU */
314  int_t *ulvl; /* keep track of level in U for level-based ILU */
316 
317 
318 /*
319  *-- The structure used to store matrix A of the linear system and
320  * several vectors describing the transformations done to matrix A.
321  *
322  * A (SuperMatrix*)
323  * Matrix A in A*X=B, of dimension (A->nrow, A->ncol).
324  * The number of linear equations is A->nrow. The type of A can be:
325  * Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
326  *
327  * DiagScale (DiagScale_t)
328  * Specifies the form of equilibration that was done.
329  * = NOEQUIL: No equilibration.
330  * = ROW: Row equilibration, i.e., A was premultiplied by diag(R).
331  * = COL: Column equilibration, i.e., A was postmultiplied by diag(C).
332  * = BOTH: Both row and column equilibration, i.e., A was replaced
333  * by diag(R)*A*diag(C).
334  *
335  * R double*, dimension (A->nrow)
336  * The row scale factors for A.
337  * If DiagScale = ROW or BOTH, A is multiplied on the left by diag(R).
338  * If DiagScale = NOEQUIL or COL, R is not defined.
339  *
340  * C double*, dimension (A->ncol)
341  * The column scale factors for A.
342  * If DiagScale = COL or BOTH, A is multiplied on the right by diag(C).
343  * If DiagScale = NOEQUIL or ROW, C is not defined.
344  *
345  * perm_r (int*) dimension (A->nrow)
346  * Row permutation vector which defines the permutation matrix Pr,
347  * perm_r[i] = j means row i of A is in position j in Pr*A.
348  *
349  * perm_c (int*) dimension (A->ncol)
350  * Column permutation vector, which defines the
351  * permutation matrix Pc; perm_c[i] = j means column i of A is
352  * in position j in A*Pc.
353  *
354  */
355 typedef struct {
357  double *R;
358  double *C;
362 
363 /*
364  *-- This contains the options used to control the solution process.
365  *
366  * Fact (fact_t)
367  * Specifies whether or not the factored form of the matrix
368  * A is supplied on entry, and if not, how the matrix A should
369  * be factorizaed.
370  * = DOFACT: The matrix A will be factorized from scratch, and the
371  * factors will be stored in L and U.
372  * = SamePattern: The matrix A will be factorized assuming
373  * that a factorization of a matrix with the same sparsity
374  * pattern was performed prior to this one. Therefore, this
375  * factorization will reuse column permutation vector
376  * ScalePermstruct->perm_c and the column elimination tree
377  * LUstruct->etree.
378  * = SamePattern_SameRowPerm: The matrix A will be factorized
379  * assuming that a factorization of a matrix with the same
380  * sparsity pattern and similar numerical values was performed
381  * prior to this one. Therefore, this factorization will reuse
382  * both row and column scaling factors R and C, both row and
383  * column permutation vectors perm_r and perm_c, and the
384  * data structure set up from the previous symbolic factorization.
385  * = FACTORED: On entry, L, U, perm_r and perm_c contain the
386  * factored form of A. If DiagScale is not NOEQUIL, the matrix
387  * A has been equilibrated with scaling factors R and C.
388  *
389  * Equil (yes_no_t)
390  * Specifies whether to equilibrate the system (scale A's row and
391  * columns to have unit norm).
392  *
393  * ColPerm (colperm_t)
394  * Specifies what type of column permutation to use to reduce fill.
395  * = NATURAL: use the natural ordering
396  * = MMD_ATA: use minimum degree ordering on structure of A'*A
397  * = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
398  * = COLAMD: use approximate minimum degree column ordering
399  * = MY_PERMC: use the ordering specified by the user
400  *
401  * Trans (trans_t)
402  * Specifies the form of the system of equations:
403  * = NOTRANS: A * X = B (No transpose)
404  * = TRANS: A**T * X = B (Transpose)
405  * = CONJ: A**H * X = B (Transpose)
406  *
407  * IterRefine (IterRefine_t)
408  * Specifies whether to perform iterative refinement.
409  * = NO: no iterative refinement
410  * = SINGLE: perform iterative refinement in single precision
411  * = DOUBLE: perform iterative refinement in double precision
412  * = EXTRA: perform iterative refinement in extra precision
413  *
414  * DiagPivotThresh (double, in [0.0, 1.0]) (only for serial SuperLU)
415  * Specifies the threshold used for a diagonal entry to be an
416  * acceptable pivot.
417  *
418  * SymmetricMode (yest_no_t) (only for serial SuperLU)
419  * Specifies whether to use symmetric mode. Symmetric mode gives
420  * preference to diagonal pivots, and uses an (A'+A)-based column
421  * permutation algorithm.
422  *
423  * PivotGrowth (yes_no_t) (only for serial SuperLU)
424  * Specifies whether to compute the reciprocal pivot growth.
425  *
426  * ConditionNumber (ues_no_t) (only for serial SuperLU)
427  * Specifies whether to compute the reciprocal condition number.
428  *
429  * RowPerm (rowperm_t) (only for SuperLU_DIST or ILU in serial SuperLU)
430  * Specifies whether to permute rows of the original matrix.
431  * = NO: not to permute the rows
432  * = LargeDiag: make the diagonal large relative to the off-diagonal
433  * = MY_PERMR: use the permutation given by the user
434  *
435  * ILU_DropRule (int) (only for serial SuperLU)
436  * Specifies the dropping rule:
437  * = DROP_BASIC: Basic dropping rule, supernodal based ILUTP(tau).
438  * = DROP_PROWS: Supernodal based ILUTP(p,tau), p = gamma * nnz(A)/n.
439  * = DROP_COLUMN: Variant of ILUTP(p,tau), for j-th column,
440  * p = gamma * nnz(A(:,j)).
441  * = DROP_AREA: Variation of ILUTP, for j-th column, use
442  * nnz(F(:,1:j)) / nnz(A(:,1:j)) to control memory.
443  * = DROP_DYNAMIC: Modify the threshold tau during factorizaion:
444  * If nnz(L(:,1:j)) / nnz(A(:,1:j)) > gamma
445  * tau_L(j) := MIN(tau_0, tau_L(j-1) * 2);
446  * Otherwise
447  * tau_L(j) := MAX(tau_0, tau_L(j-1) / 2);
448  * tau_U(j) uses the similar rule.
449  * NOTE: the thresholds used by L and U are separate.
450  * = DROP_INTERP: Compute the second dropping threshold by
451  * interpolation instead of sorting (default).
452  * In this case, the actual fill ratio is not
453  * guaranteed to be smaller than gamma.
454  * Note: DROP_PROWS, DROP_COLUMN and DROP_AREA are mutually exclusive.
455  * ( Default: DROP_BASIC | DROP_AREA )
456  *
457  * ILU_DropTol (double) (only for serial SuperLU)
458  * numerical threshold for dropping.
459  *
460  * ILU_FillFactor (double) (only for serial SuperLU)
461  * Gamma in the secondary dropping.
462  *
463  * ILU_Norm (norm_t) (only for serial SuperLU)
464  * Specify which norm to use to measure the row size in a
465  * supernode: infinity-norm, 1-norm, or 2-norm.
466  *
467  * ILU_FillTol (double) (only for serial SuperLU)
468  * numerical threshold for zero pivot perturbation.
469  *
470  * ILU_MILU (milu_t) (only for serial SuperLU)
471  * Specifies which version of MILU to use.
472  *
473  * ILU_MILU_Dim (double)
474  * Dimension of the PDE if available.
475  *
476  * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
477  * Specifies whether to replace the tiny diagonals by
478  * sqrt(epsilon)*||A|| during LU factorization.
479  *
480  * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
481  * Specifies whether the initialization has been performed to the
482  * triangular solve.
483  *
484  * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
485  * Specifies whether the initialization has been performed to the
486  * sparse matrix-vector multiplication routine needed in iterative
487  * refinement.
488  *
489  * num_lookaheads (int) (only for SuperLU_DIST)
490  * Specifies the number of levels in the look-ahead factorization
491  *
492  * lookahead_etree (yes_no_t) (only for SuperLU_DIST)
493  * Specifies whether to use the elimination tree computed from the
494  * serial symbolic factorization to perform scheduling.
495  *
496  * SymPattern (yes_no_t) (only for SuperLU_DIST)
497  * Gives the scheduling algorithm a hint whether the matrix
498  * would have symmetric pattern.
499  *
500  */
501 typedef struct {
513  double ILU_DropTol; /* threshold for dropping */
514  double ILU_FillFactor; /* gamma in the secondary dropping */
515  norm_t ILU_Norm; /* infinity-norm, 1-norm, or 2-norm */
516  double ILU_FillTol; /* threshold for zero pivot perturbation */
518  double ILU_MILU_Dim; /* Dimension of PDE (if available) */
520  yes_no_t ReplaceTinyPivot; /* used in SuperLU_DIST */
524  int nnzL, nnzU; /* used to store nnzs for now */
525  int num_lookaheads; /* num of levels in look-ahead */
526  yes_no_t lookahead_etree; /* use etree computed from the
527  serial symbolic factorization */
528  yes_no_t SymPattern; /* symmetric factorization */
530 
531 typedef struct {
532  float for_lu;
533  float total;
535  long long int nnzL, nnzU;
536 } mem_usage_t;
537 
538 /*
539  *-- The new structures added in the hybrid CUDA + OpenMP + MPI code.
540  */
541 typedef struct {
546 
547 } Ublock_info_t;
548 
549 typedef struct {
553 } Remain_info_t;
554 
555 typedef struct
556 {
557  int id, key;
558  void *next;
559 } etree_node;
560 
561 struct pair
562 {
563  int ind;
564  int val;
565 };
566 
570 /***********************************************************************
571  * Function prototypes
572  ***********************************************************************/
573 
574 #ifdef __cplusplus
575 extern "C" {
576 #endif
577 
579 extern void superlu_gridinit(MPI_Comm, int_t, int_t, gridinfo_t *);
580 extern void superlu_gridmap(MPI_Comm, int_t, int_t, int_t [], int_t,
581  gridinfo_t *);
582 extern void superlu_gridexit(gridinfo_t *);
592  SuperMatrix*);
593 extern int sp_symetree_dist(int_t *, int_t *, int_t *, int_t, int_t *);
594 extern int sp_coletree_dist (int_t *, int_t *, int_t *, int_t, int_t, int_t *);
595 extern void get_perm_c_dist(int_t, int_t, SuperMatrix *, int_t *);
596 extern void at_plus_a_dist(const int_t, const int_t, int_t *, int_t *,
597  int_t *, int_t **, int_t **);
598 extern int genmmd_dist_(int_t *, int_t *, int_t *a,
599  int_t *, int_t *, int_t *, int_t *,
600  int_t *, int_t *, int_t *, int_t *, int_t *);
601 extern void bcast_tree(void *, int, MPI_Datatype, int, int,
602  gridinfo_t *, int, int *);
603 extern int_t symbfact(superlu_options_t *, int, SuperMatrix *, int_t *,
605 extern int_t symbfact_SubInit(fact_t, void *, int_t, int_t, int_t, int_t,
608  Glu_freeable_t *);
610 extern void countnz_dist (const int_t, int_t *,
611  long long int *, long long int *,
613 extern long long int fixupL_dist (const int_t, const int_t *, Glu_persist_t *,
614  Glu_freeable_t *);
615 extern int_t *TreePostorder_dist (int_t, int_t *);
616 extern float slamch_(const char *);
617 extern double dlamch_(const char *);
618 extern void *superlu_malloc_dist (size_t);
619 extern void superlu_free_dist (void*);
620 extern int_t *intMalloc_dist (int_t);
621 extern int_t *intCalloc_dist (int_t);
622 extern int_t mc64id_dist(int_t *);
623 
624 /* Auxiliary routines */
625 extern double SuperLU_timer_ ();
626 extern void superlu_abort_and_exit_dist(char *);
627 extern int_t sp_ienv_dist (int_t);
628 extern int lsame_ (char *, char *);
629 extern int xerbla_ (char *, int *);
630 extern void ifill_dist (int_t *, int_t, int_t);
631 extern void super_stats_dist (int_t, int_t *);
632 extern void ScalePermstructInit(const int_t, const int_t,
635 extern void get_diag_procs(int_t, Glu_persist_t *, gridinfo_t *, int_t *,
636  int_t **, int_t **);
638 extern int xerbla_ (char *, int *);
639 extern void pxerbla (char *, gridinfo_t *, int_t);
640 extern void PStatInit(SuperLUStat_t *);
641 extern void PStatFree(SuperLUStat_t *);
643 extern void log_memory(long long, SuperLUStat_t *);
644 extern void print_memorylog(SuperLUStat_t *, char *);
645 
646 /* Prototypes for parallel symbolic factorization */
647 extern float symbfact_dist
648 (int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *,
649  Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, mem_usage_t *);
650 
651 /* Get the column permutation using parmetis */
652 extern float get_perm_c_parmetis
653 (SuperMatrix *, int_t *, int_t *, int, int,
654  int_t **, int_t **, gridinfo_t *, MPI_Comm *);
655 
656 /* Auxiliary routines for memory expansions used during
657  the parallel symbolic factorization routine */
658 
662 
666 
670 
673 
674 #ifdef GPU_ACC /* GPU related */
675 extern void gemm_division_cpu_gpu (int *, int *, int *, int,
676  int, int, int *, int);
677 #endif
678 
679 /* Routines for debugging */
680 extern void print_panel_seg_dist(int_t, int_t, int_t, int_t, int_t *, int_t *);
681 extern void check_repfnz_dist(int_t, int_t, int_t, int_t *);
682 extern int_t CheckZeroDiagonal(int_t, int_t *, int_t *, int_t *);
683 extern void PrintDouble5(char *, int_t, double *);
684 extern void PrintInt10(char *, int_t, int_t *);
685 extern int file_PrintInt10(FILE *, char *, int, int *);
686 extern int file_PrintLong10(FILE *, char *, int_t, int_t *);
687 
688 #ifdef __cplusplus
689  }
690 #endif
691 
692 #endif /* __SUPERLU_DEFS */
IterRefine_t
Definition: superlu_enum_consts.h:23
void print_options_dist(superlu_options_t *)
Print the options setting.
Definition: util.c:348
double dlamch_(char *cmach)
Definition: dlamch.c:59
void Destroy_CompRowLoc_Matrix_dist(SuperMatrix *)
Definition: util.c:51
yes_no_t PivotGrowth
Definition: superlu_defs.h:509
float get_perm_c_parmetis(SuperMatrix *A, int_t *perm_r, int_t *perm_c, int nprocs_i, int noDomains, int_t **sizes, int_t **fstVtxSep, gridinfo_t *grid, MPI_Comm *metis_comm)
Definition: get_perm_c_parmetis.c:91
int_t rukp
Definition: superlu_defs.h:542
Definition: superlu_defs.h:541
yes_no_t
Definition: superlu_enum_consts.h:16
LU_space_t MemModel
Definition: superlu_defs.h:312
Definition: superlu_defs.h:555
int_t nzumax
Definition: superlu_defs.h:311
Definition: psymbfact.h:95
int_t sp_ienv_dist(int_t ispec)
Purpose
Definition: sp_ienv.c:59
Definition: superlu_defs.h:250
rowperm_t RowPerm
Definition: superlu_defs.h:511
Definition: psymbfact.h:47
long long int fixupL_dist(const int_t, const int_t *, Glu_persist_t *, Glu_freeable_t *)
Definition: util.c:290
void bcast_tree(void *buf, int count, MPI_Datatype dtype, int root, int tag, gridinfo_t *grid, int scope, int *recvcnt)
Definition: comm.c:62
yes_no_t SymmetricMode
Definition: superlu_defs.h:508
void print_sp_ienv_dist(superlu_options_t *)
Print the blocking parameters.
Definition: util.c:369
trans_t
Definition: superlu_enum_consts.h:21
colperm_t ColPerm
Definition: superlu_defs.h:504
void get_diag_procs(int_t, Glu_persist_t *, gridinfo_t *, int_t *, int_t **, int_t **)
Definition: util.c:699
Definition: superlu_defs.h:501
Header for utilities.
int_t FullRow
Definition: superlu_defs.h:552
int ILU_DropRule
Definition: superlu_defs.h:512
yes_no_t SymPattern
Definition: superlu_defs.h:528
int_t mc64id_dist(int_t *icntl)
Definition: mc64ad_dist.c:57
void log_memory(long long, SuperLUStat_t *)
Definition: util.c:943
int val
Definition: superlu_defs.h:564
void set_default_options_dist(superlu_options_t *)
Set the default values for the options argument.
Definition: util.c:328
int_t symbfact_SubFree(Glu_freeable_t *Glu_freeable)
Definition: memory.c:403
Local information on vertices distribution.
Definition: psymbfact.h:129
enum constants header file
float for_lu
Definition: superlu_defs.h:532
Definition: util_dist.h:72
int_t * intCalloc_dist(int_t n)
Definition: memory.c:150
void check_repfnz_dist(int_t, int_t, int_t, int_t *)
Check whether repfnz[] == EMPTY after reset.
Definition: util.c:780
yes_no_t ReplaceTinyPivot
Definition: superlu_defs.h:520
double ILU_FillFactor
Definition: superlu_defs.h:514
Definition: supermatrix.h:44
int sp_symetree_dist(int_t *acolst, int_t *acolend, int_t *arow, int_t n, int_t *parent)
Symmetric elimination tree.
Definition: etree.c:146
int file_PrintLong10(FILE *, char *, int_t, int_t *)
Definition: util.c:818
void superlu_gridinit(MPI_Comm, int_t, int_t, gridinfo_t *)
All processes in the MPI communicator must call this routine.
Definition: superlu_grid.c:18
void super_stats_dist(int_t, int_t *)
Definition: util.c:740
fact_t
Definition: superlu_enum_consts.h:17
int_t lptr
Definition: superlu_defs.h:550
void ScalePermstructInit(const int_t, const int_t, ScalePermstruct_t *)
Allocate storage in ScalePermstruct.
Definition: util.c:162
MemType
Definition: superlu_enum_consts.h:24
statistics collected during parallel symbolic factorization
Definition: psymbfact.h:183
milu_t ILU_MILU
Definition: superlu_defs.h:517
Definitions for parallel symbolic factorization routine.
int num_lookaheads
Definition: superlu_defs.h:525
void pxerbla(char *srname, gridinfo_t *grid, int_t info)
Definition: pxerbla.c:13
int nnzU
Definition: superlu_defs.h:524
int_t full_u_cols
Definition: superlu_defs.h:545
Definition: superlu_defs.h:220
int_t * supno
Definition: superlu_defs.h:252
norm_t
Definition: superlu_enum_consts.h:27
MPI_Comm comm
Definition: superlu_defs.h:221
fact_t Fact
Definition: superlu_defs.h:502
Macro definitions.
Matrix type definitions.
yes_no_t Equil
Definition: superlu_defs.h:503
DiagScale_t
Definition: superlu_enum_consts.h:22
int_t * intMalloc_dist(int_t n)
Definition: memory.c:143
int_t psymbfact_LUXpand_RL(int_t iam, int_t n, int_t vtxXp, int_t next, int_t len_texp, int_t mem_type, Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, vtcsInfo_symbfact_t *VInfo, psymbfact_stat_t *PS)
Definition: psymbfact_util.c:372
void PrintDouble5(char *name, int_t len, double *x)
Definition: dutil.c:342
yes_no_t ParSymbFact
Definition: superlu_defs.h:519
int_t * perm_c
Definition: superlu_defs.h:360
void superlu_abort_and_exit_dist(char *msg)
Definition: memory.c:35
int_t ib
Definition: superlu_defs.h:551
int file_PrintInt10(FILE *, char *, int, int *)
Definition: util.c:805
void print_panel_seg_dist(int_t, int_t, int_t, int_t, int_t *, int_t *)
Diagnostic print of segment info after panel_dfs().
Definition: util.c:553
void ifill_dist(int_t *, int_t, int_t)
Fills an integer array with a given value.
Definition: util.c:691
int genmmd_dist_(int_t *neqns, int_t *xadj, int_t *adjncy, int_t *invp, int_t *perm, int_t *delta, int_t *dhead, int_t *qsize, int_t *llist, int_t *marker, int_t *maxint, int_t *nofsub)
Definition: mmd.c:54
int_t * xusub
Definition: superlu_defs.h:309
Definition: superlu_defs.h:355
norm_t ILU_Norm
Definition: superlu_defs.h:515
double ILU_DropTol
Definition: superlu_defs.h:513
rowperm_t
Definition: superlu_enum_consts.h:18
yes_no_t ConditionNumber
Definition: superlu_defs.h:510
int_t QuerySpace_dist(int_t n, int_t lsub_size, Glu_freeable_t *Glu_freeable, mem_usage_t *mem_usage)
Definition: memory.c:541
void superlu_gridmap(MPI_Comm, int_t, int_t, int_t[], int_t, gridinfo_t *)
All processes in the MPI communicator must call this routine.
Definition: superlu_grid.c:48
milu_t
Definition: superlu_enum_consts.h:28
int ind
Definition: superlu_defs.h:563
void * next
Definition: superlu_defs.h:558
double DiagPivotThresh
Definition: superlu_defs.h:507
void sp_colorder(superlu_options_t *options, SuperMatrix *A, int_t *perm_c, int_t *etree, SuperMatrix *AC)
Definition: sp_colorder.c:70
int_t psymbfact_prLUXpand(int_t iam, int_t min_new_len, int mem_type, Llu_symbfact_t *Llu_symbfact, psymbfact_stat_t *PS)
Definition: psymbfact_util.c:490
Definition: superlu_defs.h:227
double ILU_MILU_Dim
Definition: superlu_defs.h:518
int lsame_(char *ca, char *cb)
Definition: lsame.c:32
void Destroy_SuperNode_Matrix_dist(SuperMatrix *)
Definition: util.c:70
int_t * xsup
Definition: superlu_defs.h:251
float slamch_(char *cmach)
Definition: slamch.c:60
void Destroy_SuperMatrix_Store_dist(SuperMatrix *)
Deallocate the structure pointing to the actual storage of the matrix.
Definition: util.c:35
int Iam
Definition: superlu_defs.h:223
int_t psymbfact_LUXpand(int_t iam, int_t n, int_t fstVtxLvl_loc, int_t vtxXp, int_t *p_next, int_t min_new_len, int_t mem_type, int_t rout_type, int_t free_prev_mem, Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, vtcsInfo_symbfact_t *VInfo, psymbfact_stat_t *PS)
Definition: psymbfact_util.c:201
void * superlu_malloc_dist(size_t size)
Definition: memory.c:105
Definition: superlu_defs.h:305
int_t * ulvl
Definition: superlu_defs.h:314
void Destroy_CompRow_Matrix_dist(SuperMatrix *)
Definition: util.c:61
Definition: superlu_defs.h:549
int key
Definition: superlu_defs.h:557
void superlu_gridexit(gridinfo_t *)
Definition: superlu_grid.c:157
int_t * xlsub
Definition: superlu_defs.h:307
void Destroy_CompCol_Matrix_dist(SuperMatrix *)
Definition: util.c:41
void ScalePermstructFree(ScalePermstruct_t *)
Deallocate ScalePermstruct.
Definition: util.c:173
int_t npcol
Definition: superlu_defs.h:233
double * R
Definition: superlu_defs.h:357
void countnz_dist(const int_t, int_t *, long long int *, long long int *, Glu_persist_t *, Glu_freeable_t *)
Definition: util.c:230
superlu_scope_t cscp
Definition: superlu_defs.h:230
double ILU_FillTol
Definition: superlu_defs.h:516
trans_t Trans
Definition: superlu_defs.h:505
yes_no_t RefineInitialized
Definition: superlu_defs.h:522
MPI_Comm comm
Definition: superlu_defs.h:228
void print_memorylog(SuperLUStat_t *, char *)
Definition: util.c:951
void PStatFree(SuperLUStat_t *)
Definition: util.c:683
int_t nprow
Definition: superlu_defs.h:232
int_t * usub
Definition: superlu_defs.h:308
#define SuperLU_timer_
Definition: superlu_defs.h:175
float total
Definition: superlu_defs.h:533
int_t psymbfact_LUXpandMem(int_t iam, int_t n, int_t vtxXp, int_t next, int_t min_new_len, int_t mem_type, int_t rout_type, int_t free_prev_mem, Pslu_freeable_t *Pslu_freeable, Llu_symbfact_t *Llu_symbfact, vtcsInfo_symbfact_t *VInfo, psymbfact_stat_t *PS)
Definition: psymbfact_util.c:81
int_t expansions
Definition: superlu_defs.h:534
double * C
Definition: superlu_defs.h:358
int int_t
Definition: superlu_defs.h:37
void Destroy_CompCol_Permuted_dist(SuperMatrix *)
A is of type Stype==NCP.
Definition: util.c:83
int_t CheckZeroDiagonal(int_t, int_t *, int_t *, int_t *)
Definition: util.c:832
Definition: superlu_defs.h:561
DiagScale_t DiagScale
Definition: superlu_defs.h:356
void PrintInt10(char *, int_t, int_t *)
Definition: util.c:793
void PStatPrint(superlu_options_t *, SuperLUStat_t *, gridinfo_t *)
Definition: util.c:584
yes_no_t PrintStat
Definition: superlu_defs.h:523
int_t symbfact_SubXpand(int_t n, int_t jcol, int_t next, MemType mem_type, int_t *maxlen, Glu_freeable_t *Glu_freeable)
Definition: memory.c:357
int iam
Definition: superlu_defs.h:231
LU_space_t
Definition: superlu_enum_consts.h:26
int_t symbfact_SubInit(fact_t fact, void *work, int_t lwork, int_t m, int_t n, int_t annz, Glu_persist_t *Glu_persist, Glu_freeable_t *Glu_freeable)
Definition: memory.c:228
yes_no_t SolveInitialized
Definition: superlu_defs.h:521
void superlu_free_dist(void *addr)
Definition: memory.c:112
int_t * perm_r
Definition: superlu_defs.h:359
int_t symbfact(superlu_options_t *, int, SuperMatrix *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *)
Definition: symbfact.c:72
int sp_coletree_dist(int_t *acolst, int_t *acolend, int_t *arow, int_t nr, int_t nc, int_t *parent)
Nonsymmetric elimination tree.
Definition: etree.c:213
void at_plus_a_dist(const int_t n, const int_t nz, int_t *colptr, int_t *rowind, int_t *bnz, int_t **b_colptr, int_t **b_rowind)
Definition: get_perm_c.c:254
int Np
Definition: superlu_defs.h:222
int_t jb
Definition: superlu_defs.h:544
yes_no_t lookahead_etree
Definition: superlu_defs.h:526
int xerbla_(char *, int *)
Definition: xerbla.c:42
int_t nzlmax
Definition: superlu_defs.h:310
Definition: superlu_defs.h:531
colperm_t
Definition: superlu_enum_consts.h:19
int_t * llvl
Definition: superlu_defs.h:313
superlu_scope_t rscp
Definition: superlu_defs.h:229
void get_perm_c_dist(int_t pnum, int_t ispec, SuperMatrix *A, int_t *perm_c)
Definition: get_perm_c.c:417
void PStatInit(SuperLUStat_t *)
Definition: util.c:568
long long int nnzU
Definition: superlu_defs.h:535
int_t * lsub
Definition: superlu_defs.h:306
float symbfact_dist(int nprocs_num, int nprocs_symb, SuperMatrix *A, int_t *perm_c, int_t *perm_r, int_t *sizes, int_t *fstVtxSep, Pslu_freeable_t *Pslu_freeable, MPI_Comm *num_comm, MPI_Comm *symb_comm, mem_usage_t *symb_mem_usage)
Definition: psymbfact.c:131
int_t * TreePostorder_dist(int_t n, int_t *parent)
Definition: etree.c:383
int_t iukp
Definition: superlu_defs.h:543
IterRefine_t IterRefine
Definition: superlu_defs.h:506