SuperLU_DIST  4.0
superlu_dist on CPU and GPU clusters
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
Macros | Functions
pdgstrf.c File Reference

Performs LU factorization in parallel. More...

#include <math.h>
#include "omp.h"
#include "superlu_ddefs.h"
#include "dscatter.c"
#include "dlook_ahead_update.c"
#include "dSchCompUdt-2Ddynamic.c"

Macros

#define PHI_FRAMEWORK
 
#define PDGSTRF2   pdgstrf2_trsm
 
#define PDGSTRS2   pdgstrs2_omp
 
#define ISORT   /* Note: qsort() has bug on Mac */
 

Functions

void PDGSTRF2 (superlu_options_t *, int_t, int_t, double, Glu_persist_t *, gridinfo_t *, LocalLU_t *, MPI_Request *, int, SuperLUStat_t *, int *)
 
void PDGSTRS2 (int_t, int_t, Glu_persist_t *, gridinfo_t *, LocalLU_t *, SuperLUStat_t *)
 
void isort (int_t N, int_t *ARRAY1, int_t *ARRAY2)
 
void isort1 (int_t N, int_t *ARRAY)
 
int get_thread_per_process ()
 
int get_mic_offload ()
 
int_t get_max_buffer_size ()
 
int_t get_cublas_nb ()
 
int_t get_num_cuda_streams ()
 
int_t get_mnk_dgemm ()
 
int AssignMic (int my_rank)
 

Detailed Description

Performs LU factorization in parallel.

– Distributed SuperLU routine (version 4.0) –
Lawrence Berkeley National Lab, Univ. of California Berkeley.
October 1, 2014
Modified:
    September 1, 1999
    Feburary 7, 2001  use MPI_Isend/MPI_Irecv
    October 15, 2008  latency-reducing panel factorization
    July    12, 2011  static scheduling and arbitrary look-ahead
    March   13, 2013  change NTAGS to MPI_TAG_UB value
Sketch of the algorithm 


The following relations hold:
    * A_kk = L_kk * U_kk
    * L_ik = Aik * U_kk^(-1)
    * U_kj = L_kk^(-1) * A_kj


| | | -—|--------------------------— | | \ U_kk| | | | \ | U_kj | | |L_kk \ | || | -—|----—|------—||-------—

\/

| | L_ik ==> A_ij |


Handle the first block of columns separately.
    * Factor diagonal and subdiagonal blocks and test for exact
      singularity. ( pdgstrf2(0), one column at a time )
    * Compute block row of U
    * Update trailing matrix
Loop over the remaining blocks of columns.
  mycol = MYCOL( iam, grid );
  myrow = MYROW( iam, grid );
  N = nsupers;
  For (k = 1; k < N; ++k) {
      krow = PROW( k, grid );
      kcol = PCOL( k, grid );
      Pkk = PNUM( krow, kcol, grid );
    * Factor diagonal and subdiagonal blocks and test for exact
      singularity.
      if ( mycol == kcol ) {
          pdgstrf2(k), one column at a time
      }
    * Parallel triangular solve
      if ( iam == Pkk ) multicast L_k,k to this process row;
      if ( myrow == krow && mycol != kcol ) {
         Recv L_k,k from process Pkk;
         for (j = k+1; j < N; ++j)
             if ( PCOL( j, grid ) == mycol && A_k,j != 0 )
                U_k,j = L_k,k \ A_k,j;
      }
    * Parallel rank-k update
      if ( myrow == krow ) multicast U_k,k+1:N to this process column;
      if ( mycol == kcol ) multicast L_k+1:N,k to this process row;
      if ( myrow != krow ) {
         Pkj = PNUM( krow, mycol, grid );
         Recv U_k,k+1:N from process Pkj;
      }
      if ( mycol != kcol ) {
         Pik = PNUM( myrow, kcol, grid );
         Recv L_k+1:N,k from process Pik;
      }
      for (j = k+1; k < N; ++k) {
         for (i = k+1; i < N; ++i)
             if ( myrow == PROW( i, grid ) && mycol == PCOL( j, grid )
                  && L_i,k != 0 && U_k,j != 0 )
                A_i,j = A_i,j - L_i,k * U_k,j;
      }
 }
 

Macro Definition Documentation

#define ISORT   /* Note: qsort() has bug on Mac */
#define PDGSTRF2   pdgstrf2_trsm
#define PDGSTRS2   pdgstrs2_omp
#define PHI_FRAMEWORK

Function Documentation

int AssignMic ( int  my_rank)
int_t get_cublas_nb ( )
int_t get_max_buffer_size ( )
int get_mic_offload ( )
int_t get_mnk_dgemm ( )
int_t get_num_cuda_streams ( )
int get_thread_per_process ( )
void isort ( int_t  N,
int_t ARRAY1,
int_t ARRAY2 
)
void isort1 ( int_t  N,
int_t ARRAY 
)
void PDGSTRF2 ( superlu_options_t ,
int_t  ,
int_t  ,
double  ,
Glu_persist_t ,
gridinfo_t ,
LocalLU_t ,
MPI_Request *  ,
int  ,
SuperLUStat_t ,
int *   
)
void PDGSTRS2 ( int_t  ,
int_t  ,
Glu_persist_t ,
gridinfo_t ,
LocalLU_t ,
SuperLUStat_t  
)