|
#define | mpi_int_t MPI_INT |
|
#define | BC_HEADER 2 |
|
#define | LB_DESCRIPTOR 2 |
|
#define | BR_HEADER 3 |
|
#define | UB_DESCRIPTOR 2 |
|
#define | NBUFFERS 5 |
|
#define | SLU_MPI_TAG(id, num) ( (5*(num)+id) % tag_ub ) |
|
#define | NTAGS INT_MAX |
|
#define | UjROW 10 |
|
#define | UkSUB 11 |
|
#define | UkVAL 12 |
|
#define | LkSUB 13 |
|
#define | LkVAL 14 |
|
#define | LkkDIAG 15 |
|
#define | XK_H 2 /* The header preceeding each X block. */ |
|
#define | LSUM_H 2 /* The header preceeding each MOD block. */ |
|
#define | GSUM 20 |
|
#define | Xk 21 |
|
#define | Yk 22 |
|
#define | LSUM 23 |
|
#define | COMM_ALL 100 |
|
#define | COMM_COLUMN 101 |
|
#define | COMM_ROW 102 |
|
#define | SUPER_LINEAR 11 |
|
#define | SUPER_BLOCK 12 |
|
#define | NO_MARKER 3 |
|
#define | IAM(comm) { int rank; MPI_Comm_rank ( comm, &rank ); rank}; |
|
#define | MYROW(iam, grid) ( (iam) / grid->npcol ) |
|
#define | MYCOL(iam, grid) ( (iam) % grid->npcol ) |
|
#define | BlockNum(i) ( supno[i] ) |
|
#define | FstBlockC(bnum) ( xsup[bnum] ) |
|
#define | SuperSize(bnum) ( xsup[bnum+1]-xsup[bnum] ) |
|
#define | LBi(bnum, grid) ( (bnum)/grid->nprow )/* Global to local block rowwise */ |
|
#define | LBj(bnum, grid) ( (bnum)/grid->npcol )/* Global to local block columnwise*/ |
|
#define | PROW(bnum, grid) ( (bnum) % grid->nprow ) |
|
#define | PCOL(bnum, grid) ( (bnum) % grid->npcol ) |
|
#define | PNUM(i, j, grid) ( (i)*grid->npcol + j ) /* Process number at coord(i,j) */ |
|
#define | CEILING(a, b) ( ((a)%(b)) ? ((a)/(b) + 1) : ((a)/(b)) ) |
|
#define | RHS_ITERATE(i) for (i = 0; i < nrhs; ++i) |
|
#define | X_BLK(i) ilsum[i] * nrhs + (i+1) * XK_H |
|
#define | LSUM_BLK(i) ilsum[i] * nrhs + (i+1) * LSUM_H |
|
#define | SuperLU_timer_ SuperLU_timer_dist_ |
|
#define | LOG2(x) (log10((double) x) / log10(2.0)) |
|
#define | VT_TRACEON |
|
#define | VT_TRACEOFF |
|
|
void | set_default_options_dist (superlu_options_t *) |
| Set the default values for the options argument. More...
|
|
void | superlu_gridinit (MPI_Comm, int_t, int_t, gridinfo_t *) |
| All processes in the MPI communicator must call this routine. More...
|
|
void | superlu_gridmap (MPI_Comm, int_t, int_t, int_t[], int_t, gridinfo_t *) |
| All processes in the MPI communicator must call this routine. More...
|
|
void | superlu_gridexit (gridinfo_t *) |
|
void | print_options_dist (superlu_options_t *) |
| Print the options setting. More...
|
|
void | print_sp_ienv_dist (superlu_options_t *) |
| Print the blocking parameters. More...
|
|
void | Destroy_CompCol_Matrix_dist (SuperMatrix *) |
|
void | Destroy_SuperNode_Matrix_dist (SuperMatrix *) |
|
void | Destroy_SuperMatrix_Store_dist (SuperMatrix *) |
| Deallocate the structure pointing to the actual storage of the matrix. More...
|
|
void | Destroy_CompCol_Permuted_dist (SuperMatrix *) |
| A is of type Stype==NCP. More...
|
|
void | Destroy_CompRowLoc_Matrix_dist (SuperMatrix *) |
|
void | Destroy_CompRow_Matrix_dist (SuperMatrix *) |
|
void | sp_colorder (superlu_options_t *, SuperMatrix *, int_t *, int_t *, SuperMatrix *) |
|
int | sp_symetree_dist (int_t *, int_t *, int_t *, int_t, int_t *) |
| Symmetric elimination tree. More...
|
|
int | sp_coletree_dist (int_t *, int_t *, int_t *, int_t, int_t, int_t *) |
| Nonsymmetric elimination tree. More...
|
|
void | get_perm_c_dist (int_t, int_t, SuperMatrix *, int_t *) |
|
void | at_plus_a_dist (const int_t, const int_t, int_t *, int_t *, int_t *, int_t **, int_t **) |
|
int | genmmd_dist_ (int_t *, int_t *, int_t *a, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *, int_t *) |
|
void | bcast_tree (void *, int, MPI_Datatype, int, int, gridinfo_t *, int, int *) |
|
int_t | symbfact (superlu_options_t *, int, SuperMatrix *, int_t *, int_t *, Glu_persist_t *, Glu_freeable_t *) |
|
int_t | symbfact_SubInit (fact_t, void *, int_t, int_t, int_t, int_t, Glu_persist_t *, Glu_freeable_t *) |
|
int_t | symbfact_SubXpand (int_t, int_t, int_t, MemType, int_t *, Glu_freeable_t *) |
|
int_t | symbfact_SubFree (Glu_freeable_t *) |
|
void | countnz_dist (const int_t, int_t *, long long int *, long long int *, Glu_persist_t *, Glu_freeable_t *) |
|
long long int | fixupL_dist (const int_t, const int_t *, Glu_persist_t *, Glu_freeable_t *) |
|
int_t * | TreePostorder_dist (int_t, int_t *) |
|
float | slamch_ (const char *) |
|
double | dlamch_ (const char *) |
|
void * | superlu_malloc_dist (size_t) |
|
void | superlu_free_dist (void *) |
|
int_t * | intMalloc_dist (int_t) |
|
int_t * | intCalloc_dist (int_t) |
|
int_t | mc64id_dist (int_t *) |
|
double | SuperLU_timer_ () |
|
void | superlu_abort_and_exit_dist (char *) |
|
int_t | sp_ienv_dist (int_t) |
| Purpose
More...
|
|
int | lsame_ (char *, char *) |
|
int | xerbla_ (char *, int *) |
|
void | ifill_dist (int_t *, int_t, int_t) |
| Fills an integer array with a given value. More...
|
|
void | super_stats_dist (int_t, int_t *) |
|
void | ScalePermstructInit (const int_t, const int_t, ScalePermstruct_t *) |
| Allocate storage in ScalePermstruct. More...
|
|
void | ScalePermstructFree (ScalePermstruct_t *) |
| Deallocate ScalePermstruct. More...
|
|
void | get_diag_procs (int_t, Glu_persist_t *, gridinfo_t *, int_t *, int_t **, int_t **) |
|
int_t | QuerySpace_dist (int_t, int_t, Glu_freeable_t *, mem_usage_t *) |
|
void | pxerbla (char *, gridinfo_t *, int_t) |
|
void | PStatInit (SuperLUStat_t *) |
|
void | PStatFree (SuperLUStat_t *) |
|
void | PStatPrint (superlu_options_t *, SuperLUStat_t *, gridinfo_t *) |
|
void | log_memory (long long, SuperLUStat_t *) |
|
void | print_memorylog (SuperLUStat_t *, char *) |
|
float | symbfact_dist (int, int, SuperMatrix *, int_t *, int_t *, int_t *, int_t *, Pslu_freeable_t *, MPI_Comm *, MPI_Comm *, mem_usage_t *) |
|
float | get_perm_c_parmetis (SuperMatrix *, int_t *, int_t *, int, int, int_t **, int_t **, gridinfo_t *, MPI_Comm *) |
|
int_t | psymbfact_LUXpandMem (int_t, int_t, int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *) |
|
int_t | psymbfact_LUXpand (int_t, int_t, int_t, int_t, int_t *, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *) |
|
int_t | psymbfact_LUXpand_RL (int_t, int_t, int_t, int_t, int_t, int_t, Pslu_freeable_t *, Llu_symbfact_t *, vtcsInfo_symbfact_t *, psymbfact_stat_t *) |
|
int_t | psymbfact_prLUXpand (int_t, int_t, int, Llu_symbfact_t *, psymbfact_stat_t *) |
|
void | print_panel_seg_dist (int_t, int_t, int_t, int_t, int_t *, int_t *) |
| Diagnostic print of segment info after panel_dfs(). More...
|
|
void | check_repfnz_dist (int_t, int_t, int_t, int_t *) |
| Check whether repfnz[] == EMPTY after reset. More...
|
|
int_t | CheckZeroDiagonal (int_t, int_t *, int_t *, int_t *) |
|
void | PrintDouble5 (char *, int_t, double *) |
|
void | PrintInt10 (char *, int_t, int_t *) |
|
int | file_PrintInt10 (FILE *, char *, int, int *) |
|
int | file_PrintLong10 (FILE *, char *, int_t, int_t *) |
|
Definitions which are precision-neutral.
– Distributed SuperLU routine (version 2.2) –
Lawrence Berkeley National Lab, Univ. of California Berkeley.
November 1, 2007
Feburary 20, 2008
void bcast_tree |
( |
void * |
buf, |
|
|
int |
count, |
|
|
MPI_Datatype |
dtype, |
|
|
int |
root, |
|
|
int |
tag, |
|
|
gridinfo_t * |
grid, |
|
|
int |
scope, |
|
|
int * |
recvcnt |
|
) |
| |
Purpose
Broadcast an array of dtype numbers. The communication pattern
is a tree with number of branches equal to NBRANCHES.
The process ranks are between 0 and Np-1.
The following two pairs of graphs give different ways of viewing the same
algorithm. The first pair shows the trees as they should be visualized
when examining the algorithm. The second pair are isomorphic graphs of
of the first, which show the actual pattern of data movement.
Note that a tree broadcast with NBRANCHES = 2 is isomorphic with a
hypercube broadcast (however, it does not require the nodes be a
power of two to work).
TREE BROADCAST, NBRANCHES = 2 * TREE BROADCAST, NBRANCHES = 3
root=2
i=4 &______________ *
| \ * root=2
i=2 &______ &______ * i=3 &______________________
| \ | \ * | \ \
i=1 &__ &__ &__ &__ * i=1 &______ &______ &__
| \ | \ | \ | \ * | \ \ | \ \ | \
2 3 4 5 6 7 0 1 * 2 3 4 5 6 7 0 1
ISOMORPHIC GRAPHS OF ABOVE, SHOWN IN MORE FAMILIAR TERMS:
2 2
_________|_________ ___________|____________
/ | \ / | | \
6 4 3 5 0 3 4
/ \ | / \ |
0 7 5 6 7 1
|
1
Arguments
scope
Purpose
GET_PERM_C_DIST obtains a permutation matrix Pc, by applying the multiple
minimum degree ordering code by Joseph Liu to matrix A'*A or A+A',
or using approximate minimum degree column ordering by Davis et. al.
The LU factorization of A*Pc tends to have less fill than the LU
factorization of A.
Arguments
ispec (input) colperm_t
Specifies what type of column permutation to use to reduce fill.
= NATURAL: natural ordering (i.e., Pc = I)
= MMD_AT_PLUS_A: minimum degree ordering on structure of A'+A
= MMD_ATA: minimum degree ordering on structure of A'*A
= METIS_AT_PLUS_A: MeTis on A'+A
A (input) SuperMatrix*
Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
of the linear equations is A->nrow. Currently, the type of A
can be: Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
In the future, more general A can be handled.
perm_c (output) int*
Column permutation vector of size A->ncol, which defines the
permutation matrix Pc; perm_c[i] = j means column i of A is
in position j in A*Pc.
Purpose
GET_PERM_C_PARMETIS obtains a permutation matrix Pc, by applying a
graph partitioning algorithm to the symmetrized graph A+A'. The
multilevel graph partitioning algorithm used is the
ParMETIS_V3_NodeND routine available in the parallel graph
partitioning package parMETIS.
The number of independent sub-domains noDomains computed by this
algorithm has to be a power of 2. Hence noDomains is the larger
number power of 2 that is smaller than nprocs_i, where nprocs_i = nprow
* npcol is the number of processors used in SuperLU_DIST.
Arguments
A (input) SuperMatrix*
Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
of the linear equations is A->nrow. Matrix A is distributed
in NRformat_loc format.
perm_r (input) int_t*
Row permutation vector of size A->nrow, which defines the
permutation matrix Pr; perm_r[i] = j means row i of A is in
position j in Pr*A.
perm_c (output) int_t*
Column permutation vector of size A->ncol, which defines the
permutation matrix Pc; perm_c[i] = j means column i of A is
in position j in A*Pc.
nprocs_i (input) int*
Number of processors the input matrix is distributed on in a block
row format. It corresponds to number of processors used in
SuperLU_DIST.
noDomains (input) int*, must be power of 2
Number of independent domains to be computed by the graph
partitioning algorithm. ( noDomains <= nprocs_i )
sizes (output) int_t**, of size 2 * noDomains
Returns pointer to an array containing the number of nodes
for each sub-domain and each separator. Separators are stored
from left to right.
Memory for the array is allocated in this routine.
fstVtxSep (output) int_t**, of size 2 * noDomains
Returns pointer to an array containing first node for each
sub-domain and each separator.
Memory for the array is allocated in this routine.
Return value
< 0, number of bytes allocated on return from the symbolic factorization.
0, number of bytes allocated when out of memory.
Purpose
sp_colorder() permutes the columns of the original matrix. It performs
the following steps:
1. Apply column permutation perm_c[] to A's column pointers to form AC;
2. If options->Fact = DOFACT, then
(1) Compute column elimination tree etree[] of AC'AC;
(2) Post order etree[] to get a postordered elimination tree etree[],
and a postorder permutation post[];
(3) Apply post[] permutation to columns of AC;
(4) Overwrite perm_c[] with the product perm_c * post.
Arguments
options (input) superlu_options_t*
Specifies whether or not the elimination tree will be re-used.
If options->Fact == DOFACT, this means first time factor A,
etree is computed and output.
Otherwise, re-factor A, etree is input, unchanged on exit.
A (input) SuperMatrix*
Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
of the linear equations is A->nrow. Currently, the type of A can be:
Stype = SLU_NC or SLU_NCP; Dtype = SLU__D; Mtype = SLU_GE.
In the future, more general A can be handled.
perm_c (input/output) int*
Column permutation vector of size A->ncol, which defines the
permutation matrix Pc; perm_c[i] = j means column i of A is
in position j in A*Pc.
If options->Fact == DOFACT, perm_c is both input and output.
On output, it is changed according to a postorder of etree.
Otherwise, perm_c is input.
etree (input/output) int*
Elimination tree of Pc*(A'+A)*Pc', dimension A->ncol.
If options->Fact == DOFACT, etree is an output argument,
otherwise it is an input argument.
Note: etree is a vector of parent pointers for a forest whose
vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
AC (output) SuperMatrix*
The resulting matrix after applied the column permutation
perm_c[] to matrix A. The type of AC can be:
Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
Purpose
sp_ienv_dist() is inquired to choose machine-dependent parameters for the
local environment. See ISPEC for a description of the parameters.
This version provides a set of parameters which should give good,
but not optimal, performance on many of the currently available
computers. Users are encouraged to modify this subroutine to set
the tuning parameters for their particular machine using the option
and problem size information in the arguments.
Arguments
=========
ISPEC (input) int
Specifies the parameter to be returned as the value of SP_IENV_DIST.
= 1: the panel size w; a panel consists of w consecutive
columns of matrix A in the process of Gaussian elimination.
The best value depends on machine's cache characters.
= 2: the relaxation parameter relax; if the number of
nodes (columns) in a subtree of the elimination tree is less
than relax, this subtree is considered as one supernode,
regardless of the their row structures.
= 3: the maximum size for a supernode, which must be greater
than or equal to relaxation parameter (see case 2);
= 4: the minimum row dimension for 2-D blocking to be used;
= 5: the minimum column dimension for 2-D blocking to be used;
= 6: the estimated fills factor for the adjacency structures
of L and U, compared with A;
(SP_IENV_DIST) (output) int >= 0: the value of the parameter specified by ISPEC < 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value.
Purpose
sp_ienv_dist() is inquired to choose machine-dependent parameters for the
local environment. See ISPEC for a description of the parameters.
This version provides a set of parameters which should give good,
but not optimal, performance on many of the currently available
computers. Users are encouraged to modify this subroutine to set
the tuning parameters for their particular machine using the option
and problem size information in the arguments.
Arguments
ISPEC (input) int
Specifies the parameter to be returned as the value of SP_IENV_DIST.
= 1: the panel size w; a panel consists of w consecutive
columns of matrix A in the process of Gaussian elimination.
The best value depends on machine's cache characters.
= 2: the relaxation parameter relax; if the number of
nodes (columns) in a subtree of the elimination tree is less
than relax, this subtree is considered as one supernode,
regardless of the their row structures.
= 3: the maximum size for a supernode, which must be greater
than or equal to relaxation parameter (see case 2);
= 4: the minimum row dimension for 2-D blocking to be used;
= 5: the minimum column dimension for 2-D blocking to be used;
= 6: the estimated fills factor for the adjacency structures
of L and U, compared with A;
(SP_IENV_DIST) (output) int
>= 0: the value of the parameter specified by ISPEC
< 0: if SP_IENV_DIST = -k, the k-th argument had an illegal value.
Symmetric elimination tree.
p = spsymetree (A);
Find the elimination tree for symmetric matrix A.
This uses Liu's algorithm, and runs in time O(nz*log n).
Input:
Square sparse matrix A. No check is made for symmetry;
elements below and on the diagonal are ignored.
Numeric values are ignored, so any explicit zeros are
treated as nonzero.
Output:
Integer array of parents representing the etree, with n
meaning a root of the elimination forest.
Note:
This routine uses only the upper triangle, while sparse
Cholesky (as in spchol.c) uses only the lower. Matlab's
dense Cholesky uses only the upper. This routine could
be modified to use the lower triangle either by transposing
the matrix or by traversing it by rows with auxiliary
pointer and link arrays.
John R. Gilbert, Xerox, 10 Dec 1990
Based on code by JRG dated 1987, 1988, and 1990.
Modified by X.S. Li, November 1999.
float symbfact_dist |
( |
int |
, |
|
|
int |
, |
|
|
SuperMatrix * |
, |
|
|
int_t * |
, |
|
|
int_t * |
, |
|
|
int_t * |
, |
|
|
int_t * |
, |
|
|
Pslu_freeable_t * |
, |
|
|
MPI_Comm * |
, |
|
|
MPI_Comm * |
, |
|
|
mem_usage_t * |
|
|
) |
| |
Purpose
=======
symbfact_dist() performs symbolic factorization of matrix A suitable
for performing the supernodal Gaussian elimination with no pivoting (GEPP).
This routine computes the structure of one column of L and one row of U
at a time. It uses:
o distributed input matrix
o supernodes
o symmetric structure pruning
Arguments
=========
nprocs_num (input) int
Number of processors SuperLU_DIST is executed on, and the input
matrix is distributed on.
nprocs_symb (input) int
Number of processors on which the symbolic factorization is
performed. It is equal to the number of independent domains
idenfied in the graph partitioning algorithm executed
previously and has to be a power of 2. It corresponds to
number of leaves in the separator tree.
A (input) SuperMatrix*
Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The
number of the linear equations is A->nrow. Matrix A is
distributed in NRformat_loc format.
Matrix A is not yet permuted by perm_c.
perm_c (input) int_t*
Column permutation vector of size A->ncol, which defines the
permutation matrix Pc; perm_c[i] = j means column i of A is
in position j in A*Pc.
perm_r (input) int_t*
Row permutation vector of size A->nrow, which defines the
permutation matrix Pr; perm_r[i] = j means column i of A is
in position j in Pr*A.
sizes (input) int_t*
Contains the number of vertices in each separator.
fstVtxSep (input) int_t*
Contains first vertex for each separator.
Pslu_freeable (output) Pslu_freeable_t*
Returns the local L and U structure, and global to local
information on the indexing of the vertices. Contains all
the information necessary for performing the data
distribution towards the numeric factorization.
num_comm (input) MPI_Comm*
Communicator for numerical factorization
symb_comm (input) MPI_Comm*
Communicator for symbolic factorization
symb_mem_usage (input) mem_usage_t *
Statistics on memory usage.
Return value
============
< 0, number of bytes allocated on return from the symbolic factorization.
> 0, number of bytes allocated when out of memory.
Sketch of the algorithm
=======================
Distrbute the vertices on the processors using a subtree to
subcube algorithm.
Redistribute the structure of the input matrix A according to the
subtree to subcube computed previously for the symbolic
factorization routine. This implies in particular a distribution
from nprocs_num processors to nprocs_symb processors.
Perform symbolic factorization guided by the separator tree provided by
a graph partitioning algorithm. The symbolic factorization uses a
combined left-looking, right-looking approach.