• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

sst/elements/genericProc/programs/MTGL/mtgl/SMVkernel.h

Go to the documentation of this file.
00001 /*  _________________________________________________________________________
00002  *
00003  *  MTGL: The MultiThreaded Graph Library
00004  *  Copyright (c) 2008 Sandia Corporation.
00005  *  This software is distributed under the BSD License.
00006  *  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
00007  *  the U.S. Government retains certain rights in this software.
00008  *  For more information, see the README file in the top MTGL directory.
00009  *  _________________________________________________________________________
00010  */
00011 
00012 /****************************************************************************/
00013 /*! \file SMVkernel.h
00014 
00015     \brief Thread-safe sparse matrix data structures and algorithms.
00016 
00017     \author Robert Heaphy
00018 
00019     \date 2005
00020 */
00021 /****************************************************************************/
00022 
00023 #ifndef MTGL_SMVKERNEL_H
00024 #define MTGL_SMVKERNEL_H
00025 
00026 #include <cstdio>
00027 #include <cstdlib>
00028 #include <cmath>
00029 
00030 #include <mtgl/util.hpp>
00031 
00032 // Disable the warning about unknown pragmas.
00033 #ifndef __MTA__
00034 #pragma warning ( disable : 4068 )
00035 #endif
00036 
00037 #ifdef __MTA__
00038  #define BRAKET <size_type, T>
00039  #include <sys/mta_task.h>
00040  #include <machine/runtime.h>
00041 #else
00042  #define BRAKET <size_type, T>
00043 #endif
00044 
00045 // Current ISSUE: error handling is erratic.
00046 // Current ISSUE: look for NOT IMPLEMENTED and implement.
00047 
00048 // Base classes-forward declarations.
00049 template <typename size_type, typename T> class VectorBase;
00050 template <typename size_type, typename T> class MatrixBase;
00051 
00052 // Derived classes-forward declarations.
00053 template <typename size_type, typename T> class DenseVector;
00054 
00055 template <typename size_type, typename T> class SparseMatrixCSR;
00056 template <typename size_type, typename T> class SparseMatrixCSC;
00057 template <typename size_type, typename T> class SparseMatrixCOO;
00058 
00059 template <typename size_type, typename T>
00060 DenseVector<size_type, T>
00061 operator*(const SparseMatrixCSR<size_type, T>& a,
00062           const DenseVector<size_type, T>& b);
00063 
00064 template <typename size_type, typename T>
00065 DenseVector<size_type, T>
00066 diagonal(const SparseMatrixCSR<size_type, T>& a);
00067 
00068 template <typename size_type, typename T>
00069 DenseVector<size_type, T>
00070 Transpose_SMVm(const SparseMatrixCSR<size_type, T>&,
00071                const DenseVector<size_type, T>&);
00072 
00073 template <typename size_type, typename T>
00074 DenseVector<size_type, T>
00075 operator*(const SparseMatrixCSC<size_type, T>&,
00076            const DenseVector<size_type, T>&);
00077 
00078 template <typename size_type, typename T>
00079 DenseVector<size_type, T>
00080 operator*(const SparseMatrixCOO<size_type, T>&,
00081           const DenseVector<size_type, T>&);
00082 
00083 template <typename size_type, typename T>
00084 DenseVector<size_type, T>
00085 operator*(size_type const, const DenseVector<size_type, T>&);
00086 
00087 template <typename size_type, typename T>
00088 DenseVector<size_type, T>
00089 operator*(double const, const DenseVector<size_type, T>&);
00090 
00091 template <typename size_type, typename T>
00092 SparseMatrixCSR<size_type, T>
00093 operator*(size_type const, const SparseMatrixCSR<size_type, T>&);
00094 
00095 template <typename size_type, typename T>
00096 SparseMatrixCSR<size_type, T>
00097 operator* (double const, const SparseMatrixCSR<size_type, T>&);
00098 
00099 template <typename size_type, typename T>
00100 T
00101 operator*(const DenseVector<size_type, T>&, const DenseVector<size_type, T>&);
00102 
00103 template <typename size_type, typename T>
00104 DenseVector<size_type, T>
00105 diagonal(const SparseMatrixCSR<size_type, T>&);
00106 
00107 template <typename size_type, typename T>
00108 DenseVector<size_type, T>
00109 Transpose_SMVm(const SparseMatrixCSR<size_type, T>&,
00110                const DenseVector<size_type, T>&);
00111 
00112 /***********************  Base Classes  ****************************/
00113 
00114 template <typename size_type, typename T> class VectorBase {
00115 protected:
00116   size_type length;
00117   T* values;                          // Vector elements.
00118 
00119 public:
00120   VectorBase() : length(0), values(0)  {}
00121 
00122   VectorBase (size_type const n) : length(n)
00123   {
00124     this->values = (T*) malloc (n * sizeof(T));
00125 
00126     T* const this_values = this->values;    // Force MTA to place on stack.
00127     T const zero = T();                     // Force MTA to place on stack.
00128     size_type const finish = this->length;  // Force MTA to place on stack.
00129 
00130     #pragma mta assert parallel
00131     for (size_type i = 0; i < finish; i++) this_values[i] = zero;
00132   }
00133 
00134   VectorBase (const VectorBase<size_type, T>& a) : length(a.length)
00135   {
00136     this->values = (T*) malloc (a.length * sizeof(T));
00137 
00138     size_type const stop = a.length;       // Force MTA to place on stack.
00139     T* const this_values = this->values;   // Force MTA to place on stack.
00140     T* const a_values = a.values;          // Force MTA to place on stack.
00141 
00142     #pragma mta assert parallel
00143     for (size_type i = 0; i < stop; i++) this_values[i] = a_values[i];
00144   }
00145 
00146   ~VectorBase()
00147   {
00148     if (this->values) free (this->values);
00149     this->values = NULL;
00150   }
00151 
00152   void clear()
00153   {
00154     if (this->values) free(this->values);
00155     this->values = NULL;
00156   }
00157 
00158   void VectorPrint (char const* name) const;
00159 };
00160 
00161 template <typename size_type, typename T> class MatrixBase {
00162 protected:
00163   size_type nRow, nCol, nNonZero;  // Number of rows, columns, and non zeros.
00164   T* values;                       // Non-zero matrix elements.
00165   size_type* index;                // Generally used for sparse storage index.
00166   bool data_owned;
00167 
00168 public:
00169   MatrixBase() : nRow(0), nCol(0), nNonZero(0), values(0), index(0),
00170                  data_owned(true) {}
00171 
00172   MatrixBase(size_type const row, size_type const col,
00173              size_type const count, bool own = true) :
00174     nRow(row), nCol(col), nNonZero(count), index(0), data_owned(own)
00175   {
00176 //    printf("MatrixBase()\n");
00177     values = (T*) malloc (count * sizeof(T));
00178 //    printf("MatrixBase() alloc'd\n");
00179 
00180     T* const this_values = this->values;      // Force MTA to place on stack.
00181     size_type const finish = this->nNonZero;
00182 
00183     #pragma mta assert parallel
00184     for (size_type i = 0; i < finish; i++) this_values[i] = i;
00185   }
00186 
00187   ~MatrixBase()
00188   {
00189     if (data_owned && this->values) free(this->values);
00190     this->values = 0;
00191 
00192     if (data_owned && this->index) free(this->index);
00193     this->index  = 0;
00194   }
00195 
00196   MatrixBase(const MatrixBase<size_type, T>& a) :
00197     nRow(a.nRow), nCol(a.nCol), nNonZero(a.nNonZero),
00198     index(0), data_owned(true)        // not debugged!
00199   {
00200     this->values = (T*) malloc (a.nNonZero * sizeof (T));
00201     T* const this_values = this->values;  // Force MTA to place on stack.
00202     T* const a_values = a.values;         // Force MTA to place on stack.
00203     size_type const stop = a.nNonZero;
00204 
00205     #pragma mta assert parallel
00206     for (size_type i = 0; i < stop; i++) this_values[i] = a_values[i];
00207   }
00208 
00209   void MatrixPrint (char const* name);
00210   size_type num_nonzero() const { return this->nNonZero; }
00211 
00212   T* swap_values(T* new_values)
00213   {
00214     T* tmp = values;
00215     values = new_values;
00216     return tmp;
00217   }
00218 
00219   size_type* get_index() const { return index; }
00220 };
00221 
00222 /***************************  DenseVector *************************************/
00223 
00224 template <typename size_type, typename T = double>
00225 class DenseVector : public VectorBase<size_type, T> {
00226 public:
00227   DenseVector() {}
00228   DenseVector (size_type const n) : VectorBase<size_type, T> (n) {}
00229   DenseVector (const DenseVector<size_type, T>& a) :
00230     VectorBase<size_type, T> (a) {}
00231 
00232   ~DenseVector() {}
00233 
00234   DenseVector<size_type, T>& operator=(const DenseVector<size_type, T>& a)
00235   {
00236     if (this->length != a.length) printf("DenseVector copy assignment error\n");
00237 
00238     if (this != &a)
00239     {
00240       size_type const stop = this->length;   // Force MTA to place on stack.
00241       T*  const this_values = this->values;  // Force MTA to place on stack.
00242       T*  const a_values = a.values;         // Force MTA to place on stack.
00243 
00244       #pragma mta assert no dependence
00245       for (size_type i = 0; i < stop; i++) this_values[i] = a_values[i];
00246     }
00247 
00248     return *this;
00249   }
00250 
00251   const T operator[](size_type idx)
00252   {
00253     if (idx < 0 || idx > this->length)
00254     {
00255       fprintf(stderr, "error: index out of bounds in DenseVector\n");
00256       exit(1);
00257     }
00258 
00259     return(this->values[idx]);
00260   }
00261 
00262   DenseVector<size_type, T>& operator+=(const DenseVector<size_type, T>& a)
00263   {
00264     size_type const stop = this->length;  // Force MTA to place on stack.
00265     T* const this_values = this->values;  // Force MTA to place on stack.
00266     T* const a_values = a.values;         // Force MTA to place on stack.
00267 
00268     #pragma mta assert parallel
00269     for (size_type i = 0; i < stop; i++)
00270     {
00271 //      safe_incr(this_values[i], a_values[i]);
00272       this_values[i] += a_values[i];
00273     }
00274 
00275     return *this;
00276   }
00277 
00278   DenseVector<size_type, T>& operator-=(const DenseVector<size_type, T>& a)
00279   {
00280     size_type const stop = this->length;    // Force MTA to place on stack.
00281 //    T* const this_values = this->values;    // Force MTA to place on stack.
00282 //    T* const a_values = a.values;           // Force MTA to place on stack.
00283 
00284     #pragma mta assert parallel
00285     for (size_type i = 0; i < stop; i++)
00286     {
00287       // safe_incr(this->values[i], -a.values[i]);
00288       this->values[i] += -a.values[i];
00289     }
00290 
00291     return *this;
00292   }
00293 
00294   DenseVector<size_type, T>& operator() ()
00295   {
00296     size_type const stop = this->length;  // Force MTA to place on stack.
00297     T* const this_values = this->values;  // Force MTA to place on stack.
00298     T const zero = T();
00299 
00300     #pragma mta assert no dependence
00301     for (size_type i = 0; i < stop; i++) this_values[i] = zero;
00302 
00303     return *this;
00304   }
00305 
00306   T norm2 () const
00307   {
00308     T temp = T();
00309     size_type const stop = this->length;
00310     T* const this_values = this->values;  // Force MTA to place on stack.
00311 
00312     #pragma mta assert parallel
00313     for (size_type i = 0; i < stop; i++)
00314     {
00315       // safe_incr(temp, this_values[i] * this_values[i]);
00316       temp += this_values[i] * this_values[i];
00317     }
00318 
00319     return (T) sqrt(temp);
00320   }
00321 
00322   T norm_inf () const
00323   {
00324     T temp = T();
00325     size_type const stop = this->length;
00326     T* const this_values = this->values;  // Force MTA to place on stack.
00327 
00328     #pragma mta assert parallel
00329     for (size_type i = 0; i < stop; i++)
00330     {
00331       if (fabs(this_values[i]) > temp) temp = (T) fabs(this_values[i]);
00332     }
00333 
00334     return temp;
00335   }
00336 
00337   size_type VectorLength() const { return this->length; }
00338 
00339   friend DenseVector<size_type, T>
00340   operator* BRAKET (const SparseMatrixCSR<size_type, T>&,
00341                     const DenseVector<size_type, T>&);
00342 
00343   DenseVector<size_type, T> friend
00344   operator* BRAKET (const SparseMatrixCSC<size_type, T>&,
00345                     const DenseVector<size_type, T>&);
00346 
00347   DenseVector<size_type, T> friend
00348   operator* BRAKET (const SparseMatrixCOO<size_type, T>&,
00349                     const DenseVector<size_type, T>&);
00350 
00351   DenseVector<size_type, T> friend
00352   operator* BRAKET (size_type const, const DenseVector<size_type, T>&);
00353 
00354   DenseVector<size_type, T> friend
00355   operator* BRAKET (double const, const DenseVector<size_type, T>&);
00356 
00357   T friend
00358   operator* BRAKET (const DenseVector<size_type, T>&,
00359                     const DenseVector<size_type, T>&);
00360 
00361   DenseVector<size_type, T> friend
00362   diagonal BRAKET (const SparseMatrixCSR<size_type, T>&);
00363 
00364   DenseVector<size_type, T> friend
00365   Transpose_SMVm BRAKET (const SparseMatrixCSR<size_type, T>&,
00366                          const DenseVector<size_type, T>&);
00367 
00368   // Approximate solver, asolve, derived from  asolve.c found with linbcg.c
00369   // in "Numerical Recipes in C", second edition, Press, Vetterling, Teukolsky,
00370   // Flannery, pp 86-89.
00371   DenseVector<size_type, T> asolve(DenseVector<size_type, T>& a)
00372   {
00373     #pragma mta trace "asolve(DenseVector) start"
00374     DenseVector<size_type, T> temp(this->length);
00375 
00376     size_type const stop = this->length;
00377     T* const temp_values = temp.values;   // Force MTA to place on stack.
00378     T* const this_values = this->values;  // Force MTA to place on stack.
00379     T* const a_values = a.values;         // Force MTA to place on stack.
00380 
00381     #pragma mta assert no dependence
00382     for (size_type i = 0; i < stop; i++)
00383     {
00384       T const zero = T();
00385       temp_values[i] = (this_values[i] != zero) ?
00386                        a_values[i] / this_values[i] : a_values[i];
00387     }
00388 
00389     #pragma mta trace "asolve(DenseVector) stop"
00390 
00391     return temp;
00392   }
00393 
00394   // The following methods are for development and may be removed / modified.
00395   /* DenseVector::fill() */
00396   void fill(T const* const val)
00397   {
00398     size_type const stop = this->length;
00399     T* const this_values = this->values;  // Force MTA to place on stack.
00400 
00401     #pragma mta assert no dependence
00402     for (size_type i = 0; i < stop; i++) this_values[i] = val[i];
00403   }
00404 
00405   void VectorPrint(char const* name)
00406   {
00407     printf("DenseVector Print %s length %d\n", name, this->length);
00408 
00409     size_type minimum = (this->length < 10) ? this->length : 10;
00410 
00411     printf("DenseVector Values: ");
00412     for (size_type i = 0; i < minimum; i++)
00413     {
00414       printf("%g, ", this->values[i]);
00415     }
00416     printf("\n");
00417   }
00418 };
00419 
00420 template <typename size_type, typename T> DenseVector<size_type, T> inline
00421 operator*(size_type const a, const DenseVector<size_type, T>& b)
00422 {
00423   #pragma mta trace "operator* (int, DenseVector) start"
00424 
00425   DenseVector<size_type, T> temp (b.length);
00426 
00427   size_type const stop = b.length;
00428   T* const temp_values = temp.values;  // Force MTA to place on stack.
00429   T* const b_values = b.values;        // Force MTA to place on stack.
00430 
00431   #pragma mta assert no dependence
00432   for (size_type i = 0; i < stop; i++) temp_values[i] = a * b_values[i];
00433 
00434   #pragma mta trace "operator* (int, DenseVector) stop"
00435 
00436   return temp;
00437 }
00438 
00439 template <typename size_type, typename T> DenseVector<size_type, T> inline
00440 operator*(double const a, const DenseVector<size_type, T>& b)
00441 {
00442   DenseVector<size_type, T> temp (b.length);
00443 
00444   size_type const stop = b.length;
00445   T* const temp_values = temp.values;  // Force MTA to place on stack.
00446   T* const b_values = b.values;        // Force MTA to place on stack.
00447 
00448   #pragma mta assert no dependence
00449   for (size_type i = 0; i < stop; i++) temp_values[i] = a * b_values[i];
00450 
00451   return temp;
00452 }
00453 
00454 template <typename size_type, typename T> T inline
00455 operator* (const DenseVector<size_type, T>& a,
00456            const DenseVector<size_type, T>& b)
00457 {
00458   T temp = T();
00459   size_type const stop = a.length;
00460   T* const a_values = a.values;   // Force MTA to place on stack.
00461   T* const b_values = b.values;   // Force MTA to place on stack.
00462 
00463   #pragma mta assert parallel
00464   for (size_type i = 0; i < stop; i++)
00465   {
00466     //safe_incr(temp, a_values[i] * b_values[i]) ;
00467     temp += a_values[i] * b_values[i];
00468   }
00469 
00470   return temp;
00471 }
00472 
00473 template <typename size_type, typename T>
00474 DenseVector<size_type, T>
00475 operator-(const DenseVector<size_type, T>& a,
00476           const DenseVector<size_type, T>& b)
00477 {
00478   DenseVector<size_type, T> temp(a);
00479   temp -= b;
00480   return temp;
00481 }
00482 
00483 template <typename size_type, typename T>
00484 DenseVector<size_type, T>
00485 operator+(const DenseVector<size_type, T>& a,
00486           const DenseVector<size_type, T>& b)
00487 {
00488   DenseVector<size_type, T> temp(a);
00489   temp += b;
00490   return temp;
00491 }
00492 
00493 /***************************** CSR  SparseMatrix ******************************/
00494 /* CSR : Compressed Sparse Row                                                */
00495 template <typename size_type, typename T = double>
00496 class SparseMatrixCSR : public MatrixBase<size_type, T> {
00497 private:
00498   size_type* columns;
00499 
00500 public:
00501   typedef size_type* column_iterator;
00502   typedef T* value_iterator;
00503 
00504   SparseMatrixCSR() : MatrixBase<size_type, T>(), columns(0) {}
00505 
00506   // SparseMatrixCSR Constructor #1.
00507   SparseMatrixCSR(size_type const row, size_type const col,
00508                   size_type const count) :
00509     MatrixBase<size_type, T> (row, col, count, true)
00510   {
00511     this->columns = (size_type*) malloc(count * sizeof(size_type));
00512 
00513     this->index = (size_type*) malloc((this->nRow + 1) * sizeof(size_type));
00514 
00515     // Force MTA to place on stack.
00516     size_type* const this_columns = this->columns;
00517     size_type const finish = this->nNonZero;
00518 
00519     #pragma mta assert parallel
00520     for (size_type i = 0; i < finish; i++) this_columns[i] = 0;
00521 
00522     // Force MTA to place on stack.
00523     size_type* const this_index = this->index;
00524     size_type const stop = this->nRow + 1;
00525 
00526     #pragma mta assert parallel
00527     for (size_type i = 0; i < stop; i++) this_index[i] = 0;
00528   }
00529 
00530   // SparseMatrixCSR Constructor #2.
00531   SparseMatrixCSR(const size_type row, const size_type col,
00532                   const size_type count, size_type* indx, T* val,
00533                   size_type*  cols) :
00534     MatrixBase<size_type, T>(row, col, count, false),
00535     MatrixBase<size_type, T>::index(indx), columns(cols) {}
00536 
00537   // SparseMatrixCSR Copy Constructor.
00538   SparseMatrixCSR(const SparseMatrixCSR<size_type, T>& a) :
00539     MatrixBase<size_type, T> (a)
00540   {
00541     columns = (size_type*) malloc(this->nNonZero * sizeof (size_type));
00542 
00543     this->index  = (size_type*) malloc((this->nRow + 1) * sizeof (size_type));
00544 
00545     size_type const stop = this->nNonZero;
00546 
00547     // Force MTA to place on stack.
00548     size_type* const this_columns = this->columns;
00549 
00550     size_type* const a_columns = a.columns;  // Force MTA to place on stack.
00551     size_type* const a_index = a.index;      // Force MTA to place on stack.
00552 
00553     #pragma mta assert parallel
00554     for (size_type i = 0; i < stop; i++) this_columns[i] = a_columns[i];
00555 
00556     size_type* const this_index = this->index;  // Force MTA to place on stack.
00557     size_type const end = this->nRow + 1;       // Force MTA to place on stack.
00558 
00559     #pragma mta assert parallel
00560     for (size_type i = 0; i < end; i++) this_index [i] = a_index[i];
00561   }
00562 
00563   ~SparseMatrixCSR()
00564   {
00565     if (this->data_owned && this->columns) free (this->columns);
00566     if (this->data_owned && this->index) free (this->index);
00567 
00568     this->columns = 0;
00569     this->index   = 0;
00570   }
00571 
00572   size_type* get_index() const { return this->index; }
00573 
00574   void init(const size_type row, const size_type col, const size_type count,
00575             size_type* indx, T* vals, size_type*  cols)
00576   {
00577     clear();
00578 
00579     this->nRow = row;
00580     this->nCol = col;
00581     this->nNonZero = count;
00582     this->index = indx;
00583     this->columns = cols;
00584     this->values = vals;
00585     this->data_owned = false;
00586   }
00587 
00588   void clear()
00589   {
00590     if (this->data_owned && this->values) free(this->values);
00591     this->values = 0;
00592 
00593     if (this->data_owned && this->index) free(this->index);
00594     this->index  = 0;
00595   }
00596 
00597   SparseMatrixCSR<size_type, T>&
00598   operator=(const SparseMatrixCSR<size_type, T>& a)
00599   {
00600     if (this != &a)
00601     {
00602       this->nRow = a.nRow;
00603       this->nCol = a.nCol;
00604       this->nNonZero = a.nNonZero;
00605 
00606       if (columns) free (columns);
00607       columns = (size_type*) malloc(this->nNonZero * sizeof(size_type));
00608 
00609       if (this->index) free (this->index);
00610       this->index = (size_type*) malloc((this->nRow + 1) * sizeof(size_type));
00611 
00612       if (this->values) free (this->values);
00613       this->values = (T*) malloc(this->nNonZero * sizeof(T));
00614 
00615       // Force MTA to place on stack.
00616       size_type const stop = this->nNonZero;
00617       size_type* const this_columns = this->columns;
00618       T* const this_values = this->values;
00619       size_type* const a_columns = a.columns;
00620       T* const a_values = a.values;
00621 
00622       #pragma mta assert parallel
00623       for (size_type i = 0; i < stop; i++)
00624       {
00625         this_columns[i] = a_columns[i];
00626         this_values[i] = a_values[i];
00627       }
00628 
00629       // Force MTA to place on stack.
00630       size_type const end = this->nRow + 1;
00631       size_type* const this_index = this->index;
00632       size_type* const a_index = a.index;
00633 
00634       #pragma mta assert parallel
00635       for (size_type i = 0; i < end; i++) this_index[i] = a_index[i];
00636     }
00637 
00638     return *this;
00639   }
00640 
00641   SparseMatrixCSR<size_type, T>&
00642   operator=(const SparseMatrixCSC<size_type, T>& a)
00643   {
00644     printf("NOT IMPLEMENTED Converting from CSC to CSR\n");
00645     return *this;
00646   }
00647 
00648   DenseVector<size_type, T> friend
00649   operator* <size_type, T> (const SparseMatrixCSR<size_type, T>&,
00650                             const DenseVector<size_type, T>&);
00651 
00652   SparseMatrixCSR<size_type, T> friend
00653   operator* <size_type, T> (double const,
00654                             const SparseMatrixCSR<size_type, T>&);
00655 
00656   SparseMatrixCSR<size_type, T> friend
00657   operator* <size_type, T> (size_type const,
00658                             const SparseMatrixCSR<size_type, T>&);
00659 
00660   DenseVector<size_type, T> friend
00661   diagonal BRAKET (const SparseMatrixCSR<size_type, T>&);
00662 
00663   DenseVector<size_type, T> friend
00664   Transpose_SMVm BRAKET (const SparseMatrixCSR<size_type, T>&,
00665                          const DenseVector<size_type, T>&);
00666 
00667   /* SparseMatrixCSR fill()
00668    *
00669    */
00670   void fill(size_type const* const indx, T const* const val,
00671             size_type const* const cols)
00672   {
00673     // Force MTA to place on stack.
00674     size_type const stop = this->nRow;
00675     size_type* const this_index = this->index;
00676 
00677     #pragma mta assert no dependence
00678     for (size_type rows = 0; rows < stop; rows++) this_index[rows] = indx[rows];
00679 
00680     this_index[this->nRow] = this->nNonZero;
00681 
00682     // Force MTA to place on stack.
00683     size_type const end = this->nNonZero;
00684     T* const this_values = this->values;
00685     size_type* const this_columns = this->columns;
00686 
00687     #pragma mta assert no dependence
00688     for (size_type i = 0; i < end; i++)
00689     {
00690       this_values[i]  = val[i];
00691       this_columns[i] = cols[i];
00692     }
00693   }
00694 
00695   size_type col_index(size_type row) const { return this->index[row]; }
00696 
00697   T* col_values_begin(size_type row)
00698   {
00699     if ((row >= 0) && (row < this->nRow))
00700     {
00701       return &this->values[this->index[row]];
00702     }
00703     else
00704     {
00705       return 0;
00706     }
00707   }
00708 
00709   T* col_values_end(size_type row)
00710   {
00711     size_type ind = row + 1;
00712 
00713     if ((ind >= 0) && (ind <= this->nRow))
00714     {
00715       return &this->values[this->index[ind]];
00716     }
00717     else
00718     {
00719       return 0;
00720     }
00721   }
00722 
00723   size_type* col_indices_begin(size_type row)
00724   {
00725     if ((row >= 0) && (row < this->nRow))
00726     {
00727       return &this->columns[this->index[row]];
00728     }
00729     else
00730     {
00731       return 0;
00732     }
00733   }
00734 
00735   size_type column(size_type j)
00736   {
00737     //if ((j >= 0) && (row < this->nCol))
00738     return this->columns[j];
00739     //else
00740     //  return 0;
00741   }
00742 
00743   size_type* col_indices_end(size_type row)
00744   {
00745     size_type ind = row + 1;
00746 
00747     if ((ind >= 0) && (ind <= this->nRow))
00748     {
00749       return &this->columns[this->index[ind]];
00750     }
00751     else
00752     {
00753       return 0;
00754     }
00755   }
00756 
00757   void MatrixPrint (char const* name) const
00758   {
00759     printf("SparseMatrixCSR Print %s row %d col %d\n",
00760            name, this->nRow, this->nCol);
00761   }
00762 
00763   size_type MatrixRows() const { return this->nRow; }
00764   size_type MatrixCols() const { return this->nCol; }
00765 };
00766 
00767 template <typename size_type, typename T>
00768 DenseVector<size_type, T>
00769 operator*(const SparseMatrixCSR<size_type, T>& a,
00770           const DenseVector<size_type, T>& b)
00771 {
00772   #pragma mta trace "operator* (SparseMatrixCSR, DenseVector) start"
00773 
00774   if (a.nCol != b.length)
00775   {
00776     printf("INCOMPATIBLE SparseMatrixCSR * DenseVector multiplication\n");
00777     exit(1);
00778   }
00779 
00780   DenseVector<size_type, T> temp(b.length);
00781   T* const temp_values = temp.values;   // force MTA to place on stack
00782   T const zero = T();
00783   size_type const finish = temp.length;
00784 
00785   #pragma mta assert parallel
00786   for (size_type i = 0; i < finish; i++) temp_values[i] = zero;
00787 
00788 #ifdef __MTA__
00789   size_type starttimer = mta_get_clock(0);
00790 #endif
00791 
00792   size_type const stop = a.nRow;
00793   size_type* const a_index = a.index;      // Force MTA to place on stack.
00794   T* const a_values = a.values;            // Force MTA to place on stack.
00795   T* const b_values = b.values;            // Force MTA to place on stack.
00796   size_type* const a_columns = a.columns;  // Force MTA to place on stack.
00797 
00798   #pragma mta assert parallel
00799   for (size_type row = 0; row < stop; row++)
00800   {
00801     #pragma mta trace "next_row"
00802 
00803     size_type const start  = a_index[row];
00804     size_type const finish = a_index[row + 1];
00805 
00806     for (size_type i = start; i < finish; i++)
00807     {
00808       temp_values[row] += a_values[i] * b_values[ a_columns[i] ];
00809     }
00810   }
00811 
00812 #ifdef __MTA__
00813   size_type stoptimer = mta_get_clock(starttimer);
00814   // printf("MVm total time %g\n", stoptimer/220000000.0);
00815 #endif
00816 
00817   #pragma mta trace "operator* (SparseMatrixCSR, DenseVector) stop"
00818 
00819   return temp;
00820 }
00821 
00822 template <typename size_type, typename T>
00823 SparseMatrixCSR<size_type, T>
00824 operator*(size_type const a, const SparseMatrixCSR<size_type, T>& b)
00825 {
00826   SparseMatrixCSR<size_type, T> temp (b.nRow, b.nCol, b.nNonZero);
00827 
00828   size_type const stop = b.nNonZero;
00829   T* const temp_values = temp.values;   // Force MTA to place on stack.
00830   T* const b_values = b.values;         // Force MTA to place on stack.
00831 
00832   #pragma mta assert no dependence
00833   for (size_type i = 0; i < stop; i++) temp_values[i] = a * b_values[i];
00834 
00835   return temp;
00836 }
00837 
00838 template <typename size_type, typename T> SparseMatrixCSR<size_type, T>
00839 operator*(double const a, const SparseMatrixCSR<size_type, T>& b)
00840 {
00841   SparseMatrixCSR<size_type, T> temp (b.nRow, b.nCol, b.nNonZero);
00842 
00843   size_type const stop = b.nNonZero;
00844   T* temp_values = temp.values;   // Force MTA to place on stack.
00845   T* b_values = b.values;         // Force MTA to place on stack.
00846 
00847   #pragma mta assert no dependence
00848   for (size_type i = 0; i < stop; i++) temp_values[i] = a * b_values[i];
00849 
00850   return temp;
00851 }
00852 
00853 template <typename size_type, typename T> DenseVector<size_type, T> diagonal (const SparseMatrixCSR<size_type, T>& a)
00854 {
00855   #pragma mta trace "diagonal(SparseMatrixCSR) start"
00856   if (a.nRow != a.nCol)
00857   {
00858     printf("diagonal called on non square matrix\n");
00859     //printf("nRow=%d, nCol=%d\n", a.nRow, a.nCol);
00860     exit(1);
00861   }
00862 
00863   DenseVector<size_type, T> temp(a.nRow);
00864   temp();
00865 
00866   size_type const finish = a.nRow;
00867   size_type* const a_index = a.index;      // Force MTA to place on stack.
00868   size_type* const a_columns = a.columns;  // Force MTA to place on stack.
00869   T* const a_values = a.values;            // Force MTA to place on stack.
00870   T* const temp_values = temp.values;      // Force MTA to place on stack.
00871 
00872   #pragma mta assert parallel
00873   #pragma mta loop future
00874   for (size_type row = 0; row < finish; row++)
00875   {
00876     size_type const start = a_index[row];
00877     size_type const stop  = a_index[row + 1];
00878 
00879     #pragma mta assert parallel
00880     for (size_type i = start; i < stop; i++)
00881     {
00882       if (row == a_columns[i])
00883       {
00884         temp_values[row] = a_values[i];
00885 #ifndef __MTA__
00886         break;
00887 #endif
00888       }
00889     }
00890   }
00891 
00892   #pragma mta trace "diagonal(SparseMatrixCSR) stop"
00893 
00894   return temp;
00895 }
00896 
00897 template <typename size_type, typename T>
00898 DenseVector<size_type, T>
00899 Transpose_SMVm (const SparseMatrixCSR<size_type, T>& a,
00900                 const DenseVector<size_type, T>& b)
00901 {
00902   #pragma mta trace "Transpose_SMVm start"
00903   if (a.nCol != b.length)
00904   {
00905     printf("INCOMPATIBLE Transpose (SparseMatrixCSR) * DenseVector "
00906            "multiplication\n");
00907     exit(1);
00908   }
00909 
00910   DenseVector<size_type, T> temp(b.length);
00911   T* const temp_values = temp.values;      // Force MTA to place on stack.
00912   T* const a_values = a.values;            // Force MTA to place on stack.
00913   T* const b_values = b.values;            // Force MTA to place on stack.
00914   size_type* const a_index = a.index;      // Force MTA to place on stack.
00915   size_type* const a_columns = a.columns;  // Force MTA to place on stack.
00916   size_type const stop = temp.length;
00917 
00918   #pragma mta assert no dependence
00919   for (size_type i = 0; i < stop; i++)
00920   {
00921     temp_values[i] = T();
00922   }
00923 
00924   size_type const finish = temp.length;
00925 
00926   #pragma mta assert parallel
00927   for (size_type row = 0; row < finish; row++)
00928   {
00929     size_type const start = a_index[row];     // Force MTA to place on stack.
00930     size_type const stop = a_index[row + 1];  // Force MTA to place on stack.
00931 
00932     for (size_type i = start; i < stop; i++)
00933     {
00934       T temp_i = mt_readfe(temp_values[ a_columns[i] ]);
00935       temp_i += a_values[i] * b_values[row];
00936       mt_write(temp_values[ a_columns[i] ], temp_i);
00937       //mt_incr(temp_values[a_columns[i]], a_values[i] * b_values[row]);
00938       //temp_values[ a_columns[i] ] += a_values[i] * b_values[row];
00939     }
00940   }
00941 
00942   #pragma mta trace "Transpose_SMVm start"
00943 
00944   return temp;
00945 }
00946 
00947 /***************************** CSC  SparseMatrix ******************************/
00948 
00949 template <typename size_type, typename T = double>
00950 class SparseMatrixCSC : MatrixBase<size_type, T> {
00951 private:
00952   size_type* rows;
00953 
00954 public:
00955   SparseMatrixCSC<size_type, T> () : MatrixBase<size_type, T> ()
00956   { rows = 0; }
00957 
00958   SparseMatrixCSC<size_type, T>(size_type const row, size_type const col,
00959                                 size_type const count) :
00960     MatrixBase<size_type, T> (row, col, count)
00961   {
00962     rows  = (size_type*) malloc (count * sizeof(size_type));
00963 
00964     this->index = (size_type*) malloc ((this->nCol + 1) * sizeof(size_type));
00965   }
00966 
00967   SparseMatrixCSC<size_type, T>(const SparseMatrixCSC<size_type, T>&a) :
00968     MatrixBase<size_type, T> (a)
00969   {
00970     rows = (size_type*) malloc(this->nNonZero * sizeof(size_type));
00971 
00972     this->index = (size_type*) malloc((this->nCol + 1) * sizeof(size_type));
00973 
00974     size_type* const a_rows = a.rows;         // Force MTA to place on stack.
00975     size_type* const this_rows = this->rows;  // Force MTA to place on stack.
00976     size_type const stop = this->nNonZero;
00977 
00978     #pragma mta assert parallel
00979     for (size_type i = 0; i < stop; i++) this_rows[i] = a_rows[i];
00980 
00981     size_type* const this_index = this->index;  // Force MTA to place on stack.
00982     size_type* const a_index = a.index;         // Force MTA to place on stack.
00983     size_type const end = this->nCol + 1;
00984 
00985     #pragma mta assert parallel
00986     for (size_type i = 0; i < end; i++) this_index[i] = a_index[i];
00987   }
00988 
00989   ~SparseMatrixCSC<size_type, T> ()
00990   {
00991     if (this->data_owned && this->rows) free (this->rows);
00992     if (this->data_owned && this->index) free (this->index);
00993 
00994     this->rows  = 0;
00995     this->index = 0;
00996   }
00997 
00998   SparseMatrixCSC<size_type, T>&
00999   operator=(const SparseMatrixCSR<size_type, T>& a)
01000   {
01001     printf("NOT IMPLEMENTED: Converting from CSR to CSC\n");
01002     return *this;
01003   }
01004 
01005   SparseMatrixCSC<size_type, T>&
01006   operator=(const SparseMatrixCSC<size_type, T>& a)
01007   {
01008     if (this != &a)
01009     {
01010       this->nRow = a.nRow;
01011       this->nCol = a.nCol;
01012       this->nNonZero = a.nNonZero;
01013 
01014       size_type* const this_rows = this->rows;  // Force MTA to place on stack.
01015       size_type* const a_rows = a.rows;         // Force MTA to place on stack.
01016       T* const this_values = this->values;      // Force MTA to place on stack.
01017       T* const a_values = a.values;             // Force MTA to place on stack.
01018       size_type const stop = this->nNonZero;
01019 
01020       #pragma mta assert no dependence
01021       for (size_type i = 0; i < stop; i++)
01022       {
01023         this_rows[i]   = a_rows[i];
01024         this_values[i] = a_values[i];
01025       }
01026 
01027       size_type* const this_index = this->index; // Force MTA to place on stack.
01028       size_type* const a_index = a.index;        // Force MTA to place on stack.
01029       size_type const end = this->nCol + 1;
01030 
01031       #pragma mta assert no dependence
01032       for (size_type i = 0; i < end; i++) this_index[i] = a_index[i];
01033     }
01034 
01035     return *this;
01036   }
01037 
01038   SparseMatrixCSC<size_type, T>& operator()(SparseMatrixCSR<size_type, T>&);
01039 
01040   friend DenseVector<size_type, T>
01041   operator* <size_type, T> (const SparseMatrixCSC<size_type, T>&,
01042                             const DenseVector<size_type, T>&);
01043 
01044   void MatrixPrint (char const* name)
01045   {
01046     printf("SparseMatrixCSC Print %s row %d col %d\n",
01047            name, this->nRow, this->nCol);
01048   }
01049 
01050   size_type MatrixRows() const { return this->nRow; }
01051   size_type MatrixCols() const { return this->nCol; }
01052 };
01053 
01054 template <typename size_type, typename T> DenseVector<size_type, T>
01055 operator*(const SparseMatrixCSC<size_type, T>& a,
01056           const DenseVector<size_type, T>& b)
01057 {
01058   if (a.nCol != b.length)
01059   {
01060     printf("INCOMPATIBLE SparseMatrixCSC * DenseVector multiplication\n");
01061     exit(1);
01062   }
01063 
01064   DenseVector<size_type, T> temp(b.length);
01065   T* const temp_values = temp.values;   // force MTA to place on stack
01066   size_type const istop = temp.length;
01067   T const zero = T();
01068 
01069   #pragma mta assert parallel
01070   for (size_type i = 0; i < istop; i++) temp_values[i] = zero;
01071 
01072   size_type const colstop = a.nCol;
01073   size_type* const a_index = a.index;  // Force MTA to place on stack.
01074   size_type* const a_rows = a.rows;    // Force MTA to place on stack.
01075   T* const a_values = a.values;        // Force MTA to place on stack.
01076   T* const b_values = b.values;        // Force MTA to place on stack.
01077 
01078   #pragma mta assert parallel
01079   #pragma mta loop future
01080   for (size_type col = 0; col < colstop; col++)
01081   {
01082     size_type const start = a_index[col];
01083     size_type const stop  = a_index[col + 1];
01084 
01085     #pragma mta assert parallel
01086     for (size_type i = start; i < stop; i++)
01087     {
01088       //mt_incr(temp_values[a_rows[i]], a_values[i] * b_values[a_rows[i]]);
01089       temp_values[ a_rows[i] ] += a_values[i] * b_values[ a_rows[i] ];
01090     }
01091   }
01092 
01093   return temp;
01094 }
01095 
01096 /***************************** COO  SparseMatrix ******************************/
01097 /* COO : */
01098 
01099 template <typename size_type, typename T = double>
01100 class SparseMatrixCOO : MatrixBase<size_type, T> {
01101 private:
01102   size_type* columns;
01103   size_type* rows;
01104 
01105 public:
01106   SparseMatrixCOO() : MatrixBase<size_type, T> ()
01107   {
01108     this->columns = 0;
01109     this->rows    = 0;
01110   }
01111 
01112   SparseMatrixCOO(size_type const row, size_type const col,
01113                   size_type const nnz) :
01114     MatrixBase<size_type, T> (row, col, nnz)
01115   {
01116     rows = (size_type*) malloc (nnz * sizeof(size_type));
01117     columns = (size_type*) malloc (nnz * sizeof(size_type));
01118   }
01119 
01120   SparseMatrixCOO (SparseMatrixCOO<size_type, T>& a) :
01121     MatrixBase<size_type, T>(a)
01122   {
01123     rows = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01124     columns = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01125 
01126     size_type const stop = this->nNonZero;
01127     T* const this_values = this->values;       // MTA -to force onto stack.
01128     T* const a_values = a.values;              // MTA -to force onto stack.
01129     size_type* const this_rows = this->rows;        // MTA -to force onto stack.
01130     size_type* const this_columns = this->columns;  // MTA -to force onto stack.
01131     size_type* const a_rows = a.rows;               // MTA -to force onto stack.
01132     size_type* const a_columns = a.columns;         // MTA -to force onto stack.
01133 
01134     #pragma mta assert parallel
01135     for (size_type i = 0; i < stop; i++)
01136     {
01137       this_values[i] = a_values[i];
01138       this_rows[i] = a_rows[i];
01139       this_columns[i] = a_columns[i];
01140     }
01141   }
01142 
01143   ~SparseMatrixCOO()
01144   {
01145     if (this->columns) free (this->columns);
01146     if (this->index) free (this->index);
01147     if (this->rows) free (this->rows);
01148 
01149     this->columns = 0;
01150     this->index = 0;
01151     this->rows = 0;
01152   }
01153 
01154   SparseMatrixCOO<size_type, T>&
01155   operator=(const SparseMatrixCOO<size_type, T>& a)
01156   {
01157     if (this != &a)
01158     {
01159       this->nRow = a.nRow;
01160       this->nCol = a.nCol;
01161       this->nNonZero = a.nNonZero;
01162       this->index = 0;
01163 
01164       if (rows) free(rows);
01165 
01166       rows = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01167 
01168       if (columns) free(columns);
01169 
01170       columns = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01171 
01172       if (this->values) free(this->values);
01173 
01174       this->values = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01175 
01176       // MTA -to force onto stack.
01177       size_type const stop = this->nNonZero;
01178       T* const this_values = this->values;
01179       T* const a_values = a.values;
01180       size_type* const this_rows = this->rows;
01181       size_type* const this_columns = this->columns;
01182       size_type* const a_rows = a.rows;
01183       size_type* const a_columns = a.columns;
01184 
01185       #pragma mta assert no dependence
01186       for (size_type i = 0; i < stop; i++)
01187       {
01188         this_values[i]  = a_values[i];
01189         this_rows[i]    = a_rows[i];
01190         this_columns[i] = a_columns[i];
01191       }
01192     }
01193 
01194     return *this;
01195   }
01196 
01197   SparseMatrixCOO<size_type, T>&
01198   operator=(const SparseMatrixCSR<size_type, T>& a)
01199   {
01200     printf("NOT IMPLEMENTED: Converting from CSR to COO\n");
01201     return *this;
01202   }
01203 
01204   SparseMatrixCOO<size_type, T>&
01205   operator=(const SparseMatrixCSC<size_type, T>& a)
01206   {
01207     printf("NOT IMPLEMENTED: Converting from CSC to COO\n");
01208     return *this;
01209   }
01210 
01211   SparseMatrixCOO<size_type, T>&
01212   operator()(const SparseMatrixCSR<size_type, T>&);
01213 
01214   SparseMatrixCOO<size_type, T>&
01215   operator()(const SparseMatrixCSC<size_type, T>&);
01216 
01217   DenseVector<size_type, T> friend
01218   operator* <size_type, T> (const SparseMatrixCOO<size_type, T>&,
01219                             const DenseVector<size_type, T>&);
01220 
01221   void MatrixPrint (char const* name)
01222   {
01223     printf("SparseMatrixCOO Print %s row $d col %d\n", name,
01224            this->nRow, this->nCol);
01225   }
01226 };
01227 
01228 template <typename size_type, typename T>
01229 DenseVector<size_type, T>
01230 operator*(const SparseMatrixCOO<size_type, T>& a,
01231           const DenseVector<size_type, T>& b)
01232 {
01233   if (a.nCol != b.length)
01234   {
01235     printf("INCOMPATIBLE SparseMatrixCoo * DenseVector multiplication\n");
01236     exit(1);
01237   }
01238 
01239   DenseVector<size_type, T> temp(b.length);
01240   size_type const stop = temp.length;
01241   T* const temp_values = temp.values;
01242   T const zero = T();
01243 
01244   #pragma mta assert parallel
01245   for (size_type i = 0; i < stop; i++) temp_values[i] = zero;
01246 
01247   size_type const end = a.nNonZero;
01248   T* const a_values = a.values;            // MTA -to force onto stack.
01249   T* const b_values = b.values;            // MTA -to force onto stack.
01250   size_type* const a_rows = a.rows;        // MTA -to force onto stack.
01251   size_type* const b_rows = b.rows;        // MTA -to force onto stack.
01252   size_type* const a_columns = a.columns;  // MTA -to force onto stack.
01253 
01254   #pragma mta assert parallel
01255   for (size_type i = 0; i < end; i++)
01256   {
01257     mt_inc (temp_values[a_rows[i]], a_values[i] * b_values[a_columns[i]]);
01258   }
01259 
01260   return temp;
01261 }
01262 
01263 /* biconjugate gradient solver derived from linbcg.c in "Numerical Recipes */
01264 /* in C", second edition, Press, Vetterling, Teukolsky, Flannery, pp 86-89 */
01265 template <typename size_type, typename T>
01266 DenseVector<size_type, T>& linbcg (const SparseMatrixCSR<size_type, T>& A,
01267                                    DenseVector<size_type, T>& x,
01268                                    const DenseVector<size_type, T>& b,
01269                                    size_type const itermax,
01270                                    T& err,
01271                                    T const tol)
01272 {
01273   #pragma mta trace "linbcg start"
01274 
01275   size_type const length = A.MatrixRows();
01276   double const bnorm = b.norm2();
01277   double ak = 0, akden = 0, bk = 0, bknum = 0, bkden = 0;
01278 
01279   DenseVector<size_type, T> p(length), pp(length);
01280   DenseVector<size_type, T> r(length), rr(length);
01281   DenseVector<size_type, T> z(length), zz(length);
01282   DenseVector<size_type, T> d = diagonal(A);
01283 
01284   r = b - (A * x);
01285   z = d.asolve (r);
01286   rr = r;
01287 
01288   for (size_type iter = 0; iter < itermax; iter++)
01289   {
01290     zz = d.asolve (rr);
01291 
01292     bknum = z * rr;
01293 
01294     if (iter == 0)
01295     {
01296       p  = z;
01297       pp = zz;
01298     }
01299     else
01300     {
01301       bk = bknum / bkden;
01302       p  = (bk * p)  + z;
01303       pp = (bk * pp) + zz;
01304     }
01305 
01306     bkden =  bknum;
01307 
01308     z = A * p;
01309     akden = z * pp;
01310     ak = bknum / akden;
01311 
01312     zz = Transpose_SMVm(A, pp);
01313 
01314     x += (ak * p);
01315     r -= (ak * z);
01316     rr -= (ak * zz);
01317 
01318     z = d.asolve(r);
01319     err = r.norm2() / bnorm;
01320 
01321     if (err < tol) break;
01322   }
01323 
01324   #pragma mta trace "linbcg stop"
01325 
01326   return x;
01327 }
01328 
01329 #endif

Generated on Fri Oct 22 2010 11:02:23 for SST by  doxygen 1.7.1