00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef MTGL_SMVKERNEL_H
00024 #define MTGL_SMVKERNEL_H
00025
00026 #include <cstdio>
00027 #include <cstdlib>
00028 #include <cmath>
00029
00030 #include <mtgl/util.hpp>
00031
00032
00033 #ifndef __MTA__
00034 #pragma warning ( disable : 4068 )
00035 #endif
00036
00037 #ifdef __MTA__
00038 #define BRAKET <size_type, T>
00039 #include <sys/mta_task.h>
00040 #include <machine/runtime.h>
00041 #else
00042 #define BRAKET <size_type, T>
00043 #endif
00044
00045
00046
00047
00048
00049 template <typename size_type, typename T> class VectorBase;
00050 template <typename size_type, typename T> class MatrixBase;
00051
00052
00053 template <typename size_type, typename T> class DenseVector;
00054
00055 template <typename size_type, typename T> class SparseMatrixCSR;
00056 template <typename size_type, typename T> class SparseMatrixCSC;
00057 template <typename size_type, typename T> class SparseMatrixCOO;
00058
00059 template <typename size_type, typename T>
00060 DenseVector<size_type, T>
00061 operator*(const SparseMatrixCSR<size_type, T>& a,
00062 const DenseVector<size_type, T>& b);
00063
00064 template <typename size_type, typename T>
00065 DenseVector<size_type, T>
00066 diagonal(const SparseMatrixCSR<size_type, T>& a);
00067
00068 template <typename size_type, typename T>
00069 DenseVector<size_type, T>
00070 Transpose_SMVm(const SparseMatrixCSR<size_type, T>&,
00071 const DenseVector<size_type, T>&);
00072
00073 template <typename size_type, typename T>
00074 DenseVector<size_type, T>
00075 operator*(const SparseMatrixCSC<size_type, T>&,
00076 const DenseVector<size_type, T>&);
00077
00078 template <typename size_type, typename T>
00079 DenseVector<size_type, T>
00080 operator*(const SparseMatrixCOO<size_type, T>&,
00081 const DenseVector<size_type, T>&);
00082
00083 template <typename size_type, typename T>
00084 DenseVector<size_type, T>
00085 operator*(size_type const, const DenseVector<size_type, T>&);
00086
00087 template <typename size_type, typename T>
00088 DenseVector<size_type, T>
00089 operator*(double const, const DenseVector<size_type, T>&);
00090
00091 template <typename size_type, typename T>
00092 SparseMatrixCSR<size_type, T>
00093 operator*(size_type const, const SparseMatrixCSR<size_type, T>&);
00094
00095 template <typename size_type, typename T>
00096 SparseMatrixCSR<size_type, T>
00097 operator* (double const, const SparseMatrixCSR<size_type, T>&);
00098
00099 template <typename size_type, typename T>
00100 T
00101 operator*(const DenseVector<size_type, T>&, const DenseVector<size_type, T>&);
00102
00103 template <typename size_type, typename T>
00104 DenseVector<size_type, T>
00105 diagonal(const SparseMatrixCSR<size_type, T>&);
00106
00107 template <typename size_type, typename T>
00108 DenseVector<size_type, T>
00109 Transpose_SMVm(const SparseMatrixCSR<size_type, T>&,
00110 const DenseVector<size_type, T>&);
00111
00112
00113
00114 template <typename size_type, typename T> class VectorBase {
00115 protected:
00116 size_type length;
00117 T* values;
00118
00119 public:
00120 VectorBase() : length(0), values(0) {}
00121
00122 VectorBase (size_type const n) : length(n)
00123 {
00124 this->values = (T*) malloc (n * sizeof(T));
00125
00126 T* const this_values = this->values;
00127 T const zero = T();
00128 size_type const finish = this->length;
00129
00130 #pragma mta assert parallel
00131 for (size_type i = 0; i < finish; i++) this_values[i] = zero;
00132 }
00133
00134 VectorBase (const VectorBase<size_type, T>& a) : length(a.length)
00135 {
00136 this->values = (T*) malloc (a.length * sizeof(T));
00137
00138 size_type const stop = a.length;
00139 T* const this_values = this->values;
00140 T* const a_values = a.values;
00141
00142 #pragma mta assert parallel
00143 for (size_type i = 0; i < stop; i++) this_values[i] = a_values[i];
00144 }
00145
00146 ~VectorBase()
00147 {
00148 if (this->values) free (this->values);
00149 this->values = NULL;
00150 }
00151
00152 void clear()
00153 {
00154 if (this->values) free(this->values);
00155 this->values = NULL;
00156 }
00157
00158 void VectorPrint (char const* name) const;
00159 };
00160
00161 template <typename size_type, typename T> class MatrixBase {
00162 protected:
00163 size_type nRow, nCol, nNonZero;
00164 T* values;
00165 size_type* index;
00166 bool data_owned;
00167
00168 public:
00169 MatrixBase() : nRow(0), nCol(0), nNonZero(0), values(0), index(0),
00170 data_owned(true) {}
00171
00172 MatrixBase(size_type const row, size_type const col,
00173 size_type const count, bool own = true) :
00174 nRow(row), nCol(col), nNonZero(count), index(0), data_owned(own)
00175 {
00176
00177 values = (T*) malloc (count * sizeof(T));
00178
00179
00180 T* const this_values = this->values;
00181 size_type const finish = this->nNonZero;
00182
00183 #pragma mta assert parallel
00184 for (size_type i = 0; i < finish; i++) this_values[i] = i;
00185 }
00186
00187 ~MatrixBase()
00188 {
00189 if (data_owned && this->values) free(this->values);
00190 this->values = 0;
00191
00192 if (data_owned && this->index) free(this->index);
00193 this->index = 0;
00194 }
00195
00196 MatrixBase(const MatrixBase<size_type, T>& a) :
00197 nRow(a.nRow), nCol(a.nCol), nNonZero(a.nNonZero),
00198 index(0), data_owned(true)
00199 {
00200 this->values = (T*) malloc (a.nNonZero * sizeof (T));
00201 T* const this_values = this->values;
00202 T* const a_values = a.values;
00203 size_type const stop = a.nNonZero;
00204
00205 #pragma mta assert parallel
00206 for (size_type i = 0; i < stop; i++) this_values[i] = a_values[i];
00207 }
00208
00209 void MatrixPrint (char const* name);
00210 size_type num_nonzero() const { return this->nNonZero; }
00211
00212 T* swap_values(T* new_values)
00213 {
00214 T* tmp = values;
00215 values = new_values;
00216 return tmp;
00217 }
00218
00219 size_type* get_index() const { return index; }
00220 };
00221
00222
00223
00224 template <typename size_type, typename T = double>
00225 class DenseVector : public VectorBase<size_type, T> {
00226 public:
00227 DenseVector() {}
00228 DenseVector (size_type const n) : VectorBase<size_type, T> (n) {}
00229 DenseVector (const DenseVector<size_type, T>& a) :
00230 VectorBase<size_type, T> (a) {}
00231
00232 ~DenseVector() {}
00233
00234 DenseVector<size_type, T>& operator=(const DenseVector<size_type, T>& a)
00235 {
00236 if (this->length != a.length) printf("DenseVector copy assignment error\n");
00237
00238 if (this != &a)
00239 {
00240 size_type const stop = this->length;
00241 T* const this_values = this->values;
00242 T* const a_values = a.values;
00243
00244 #pragma mta assert no dependence
00245 for (size_type i = 0; i < stop; i++) this_values[i] = a_values[i];
00246 }
00247
00248 return *this;
00249 }
00250
00251 const T operator[](size_type idx)
00252 {
00253 if (idx < 0 || idx > this->length)
00254 {
00255 fprintf(stderr, "error: index out of bounds in DenseVector\n");
00256 exit(1);
00257 }
00258
00259 return(this->values[idx]);
00260 }
00261
00262 DenseVector<size_type, T>& operator+=(const DenseVector<size_type, T>& a)
00263 {
00264 size_type const stop = this->length;
00265 T* const this_values = this->values;
00266 T* const a_values = a.values;
00267
00268 #pragma mta assert parallel
00269 for (size_type i = 0; i < stop; i++)
00270 {
00271
00272 this_values[i] += a_values[i];
00273 }
00274
00275 return *this;
00276 }
00277
00278 DenseVector<size_type, T>& operator-=(const DenseVector<size_type, T>& a)
00279 {
00280 size_type const stop = this->length;
00281
00282
00283
00284 #pragma mta assert parallel
00285 for (size_type i = 0; i < stop; i++)
00286 {
00287
00288 this->values[i] += -a.values[i];
00289 }
00290
00291 return *this;
00292 }
00293
00294 DenseVector<size_type, T>& operator() ()
00295 {
00296 size_type const stop = this->length;
00297 T* const this_values = this->values;
00298 T const zero = T();
00299
00300 #pragma mta assert no dependence
00301 for (size_type i = 0; i < stop; i++) this_values[i] = zero;
00302
00303 return *this;
00304 }
00305
00306 T norm2 () const
00307 {
00308 T temp = T();
00309 size_type const stop = this->length;
00310 T* const this_values = this->values;
00311
00312 #pragma mta assert parallel
00313 for (size_type i = 0; i < stop; i++)
00314 {
00315
00316 temp += this_values[i] * this_values[i];
00317 }
00318
00319 return (T) sqrt(temp);
00320 }
00321
00322 T norm_inf () const
00323 {
00324 T temp = T();
00325 size_type const stop = this->length;
00326 T* const this_values = this->values;
00327
00328 #pragma mta assert parallel
00329 for (size_type i = 0; i < stop; i++)
00330 {
00331 if (fabs(this_values[i]) > temp) temp = (T) fabs(this_values[i]);
00332 }
00333
00334 return temp;
00335 }
00336
00337 size_type VectorLength() const { return this->length; }
00338
00339 friend DenseVector<size_type, T>
00340 operator* BRAKET (const SparseMatrixCSR<size_type, T>&,
00341 const DenseVector<size_type, T>&);
00342
00343 DenseVector<size_type, T> friend
00344 operator* BRAKET (const SparseMatrixCSC<size_type, T>&,
00345 const DenseVector<size_type, T>&);
00346
00347 DenseVector<size_type, T> friend
00348 operator* BRAKET (const SparseMatrixCOO<size_type, T>&,
00349 const DenseVector<size_type, T>&);
00350
00351 DenseVector<size_type, T> friend
00352 operator* BRAKET (size_type const, const DenseVector<size_type, T>&);
00353
00354 DenseVector<size_type, T> friend
00355 operator* BRAKET (double const, const DenseVector<size_type, T>&);
00356
00357 T friend
00358 operator* BRAKET (const DenseVector<size_type, T>&,
00359 const DenseVector<size_type, T>&);
00360
00361 DenseVector<size_type, T> friend
00362 diagonal BRAKET (const SparseMatrixCSR<size_type, T>&);
00363
00364 DenseVector<size_type, T> friend
00365 Transpose_SMVm BRAKET (const SparseMatrixCSR<size_type, T>&,
00366 const DenseVector<size_type, T>&);
00367
00368
00369
00370
00371 DenseVector<size_type, T> asolve(DenseVector<size_type, T>& a)
00372 {
00373 #pragma mta trace "asolve(DenseVector) start"
00374 DenseVector<size_type, T> temp(this->length);
00375
00376 size_type const stop = this->length;
00377 T* const temp_values = temp.values;
00378 T* const this_values = this->values;
00379 T* const a_values = a.values;
00380
00381 #pragma mta assert no dependence
00382 for (size_type i = 0; i < stop; i++)
00383 {
00384 T const zero = T();
00385 temp_values[i] = (this_values[i] != zero) ?
00386 a_values[i] / this_values[i] : a_values[i];
00387 }
00388
00389 #pragma mta trace "asolve(DenseVector) stop"
00390
00391 return temp;
00392 }
00393
00394
00395
00396 void fill(T const* const val)
00397 {
00398 size_type const stop = this->length;
00399 T* const this_values = this->values;
00400
00401 #pragma mta assert no dependence
00402 for (size_type i = 0; i < stop; i++) this_values[i] = val[i];
00403 }
00404
00405 void VectorPrint(char const* name)
00406 {
00407 printf("DenseVector Print %s length %d\n", name, this->length);
00408
00409 size_type minimum = (this->length < 10) ? this->length : 10;
00410
00411 printf("DenseVector Values: ");
00412 for (size_type i = 0; i < minimum; i++)
00413 {
00414 printf("%g, ", this->values[i]);
00415 }
00416 printf("\n");
00417 }
00418 };
00419
00420 template <typename size_type, typename T> DenseVector<size_type, T> inline
00421 operator*(size_type const a, const DenseVector<size_type, T>& b)
00422 {
00423 #pragma mta trace "operator* (int, DenseVector) start"
00424
00425 DenseVector<size_type, T> temp (b.length);
00426
00427 size_type const stop = b.length;
00428 T* const temp_values = temp.values;
00429 T* const b_values = b.values;
00430
00431 #pragma mta assert no dependence
00432 for (size_type i = 0; i < stop; i++) temp_values[i] = a * b_values[i];
00433
00434 #pragma mta trace "operator* (int, DenseVector) stop"
00435
00436 return temp;
00437 }
00438
00439 template <typename size_type, typename T> DenseVector<size_type, T> inline
00440 operator*(double const a, const DenseVector<size_type, T>& b)
00441 {
00442 DenseVector<size_type, T> temp (b.length);
00443
00444 size_type const stop = b.length;
00445 T* const temp_values = temp.values;
00446 T* const b_values = b.values;
00447
00448 #pragma mta assert no dependence
00449 for (size_type i = 0; i < stop; i++) temp_values[i] = a * b_values[i];
00450
00451 return temp;
00452 }
00453
00454 template <typename size_type, typename T> T inline
00455 operator* (const DenseVector<size_type, T>& a,
00456 const DenseVector<size_type, T>& b)
00457 {
00458 T temp = T();
00459 size_type const stop = a.length;
00460 T* const a_values = a.values;
00461 T* const b_values = b.values;
00462
00463 #pragma mta assert parallel
00464 for (size_type i = 0; i < stop; i++)
00465 {
00466
00467 temp += a_values[i] * b_values[i];
00468 }
00469
00470 return temp;
00471 }
00472
00473 template <typename size_type, typename T>
00474 DenseVector<size_type, T>
00475 operator-(const DenseVector<size_type, T>& a,
00476 const DenseVector<size_type, T>& b)
00477 {
00478 DenseVector<size_type, T> temp(a);
00479 temp -= b;
00480 return temp;
00481 }
00482
00483 template <typename size_type, typename T>
00484 DenseVector<size_type, T>
00485 operator+(const DenseVector<size_type, T>& a,
00486 const DenseVector<size_type, T>& b)
00487 {
00488 DenseVector<size_type, T> temp(a);
00489 temp += b;
00490 return temp;
00491 }
00492
00493
00494
00495 template <typename size_type, typename T = double>
00496 class SparseMatrixCSR : public MatrixBase<size_type, T> {
00497 private:
00498 size_type* columns;
00499
00500 public:
00501 typedef size_type* column_iterator;
00502 typedef T* value_iterator;
00503
00504 SparseMatrixCSR() : MatrixBase<size_type, T>(), columns(0) {}
00505
00506
00507 SparseMatrixCSR(size_type const row, size_type const col,
00508 size_type const count) :
00509 MatrixBase<size_type, T> (row, col, count, true)
00510 {
00511 this->columns = (size_type*) malloc(count * sizeof(size_type));
00512
00513 this->index = (size_type*) malloc((this->nRow + 1) * sizeof(size_type));
00514
00515
00516 size_type* const this_columns = this->columns;
00517 size_type const finish = this->nNonZero;
00518
00519 #pragma mta assert parallel
00520 for (size_type i = 0; i < finish; i++) this_columns[i] = 0;
00521
00522
00523 size_type* const this_index = this->index;
00524 size_type const stop = this->nRow + 1;
00525
00526 #pragma mta assert parallel
00527 for (size_type i = 0; i < stop; i++) this_index[i] = 0;
00528 }
00529
00530
00531 SparseMatrixCSR(const size_type row, const size_type col,
00532 const size_type count, size_type* indx, T* val,
00533 size_type* cols) :
00534 MatrixBase<size_type, T>(row, col, count, false),
00535 MatrixBase<size_type, T>::index(indx), columns(cols) {}
00536
00537
00538 SparseMatrixCSR(const SparseMatrixCSR<size_type, T>& a) :
00539 MatrixBase<size_type, T> (a)
00540 {
00541 columns = (size_type*) malloc(this->nNonZero * sizeof (size_type));
00542
00543 this->index = (size_type*) malloc((this->nRow + 1) * sizeof (size_type));
00544
00545 size_type const stop = this->nNonZero;
00546
00547
00548 size_type* const this_columns = this->columns;
00549
00550 size_type* const a_columns = a.columns;
00551 size_type* const a_index = a.index;
00552
00553 #pragma mta assert parallel
00554 for (size_type i = 0; i < stop; i++) this_columns[i] = a_columns[i];
00555
00556 size_type* const this_index = this->index;
00557 size_type const end = this->nRow + 1;
00558
00559 #pragma mta assert parallel
00560 for (size_type i = 0; i < end; i++) this_index [i] = a_index[i];
00561 }
00562
00563 ~SparseMatrixCSR()
00564 {
00565 if (this->data_owned && this->columns) free (this->columns);
00566 if (this->data_owned && this->index) free (this->index);
00567
00568 this->columns = 0;
00569 this->index = 0;
00570 }
00571
00572 size_type* get_index() const { return this->index; }
00573
00574 void init(const size_type row, const size_type col, const size_type count,
00575 size_type* indx, T* vals, size_type* cols)
00576 {
00577 clear();
00578
00579 this->nRow = row;
00580 this->nCol = col;
00581 this->nNonZero = count;
00582 this->index = indx;
00583 this->columns = cols;
00584 this->values = vals;
00585 this->data_owned = false;
00586 }
00587
00588 void clear()
00589 {
00590 if (this->data_owned && this->values) free(this->values);
00591 this->values = 0;
00592
00593 if (this->data_owned && this->index) free(this->index);
00594 this->index = 0;
00595 }
00596
00597 SparseMatrixCSR<size_type, T>&
00598 operator=(const SparseMatrixCSR<size_type, T>& a)
00599 {
00600 if (this != &a)
00601 {
00602 this->nRow = a.nRow;
00603 this->nCol = a.nCol;
00604 this->nNonZero = a.nNonZero;
00605
00606 if (columns) free (columns);
00607 columns = (size_type*) malloc(this->nNonZero * sizeof(size_type));
00608
00609 if (this->index) free (this->index);
00610 this->index = (size_type*) malloc((this->nRow + 1) * sizeof(size_type));
00611
00612 if (this->values) free (this->values);
00613 this->values = (T*) malloc(this->nNonZero * sizeof(T));
00614
00615
00616 size_type const stop = this->nNonZero;
00617 size_type* const this_columns = this->columns;
00618 T* const this_values = this->values;
00619 size_type* const a_columns = a.columns;
00620 T* const a_values = a.values;
00621
00622 #pragma mta assert parallel
00623 for (size_type i = 0; i < stop; i++)
00624 {
00625 this_columns[i] = a_columns[i];
00626 this_values[i] = a_values[i];
00627 }
00628
00629
00630 size_type const end = this->nRow + 1;
00631 size_type* const this_index = this->index;
00632 size_type* const a_index = a.index;
00633
00634 #pragma mta assert parallel
00635 for (size_type i = 0; i < end; i++) this_index[i] = a_index[i];
00636 }
00637
00638 return *this;
00639 }
00640
00641 SparseMatrixCSR<size_type, T>&
00642 operator=(const SparseMatrixCSC<size_type, T>& a)
00643 {
00644 printf("NOT IMPLEMENTED Converting from CSC to CSR\n");
00645 return *this;
00646 }
00647
00648 DenseVector<size_type, T> friend
00649 operator* <size_type, T> (const SparseMatrixCSR<size_type, T>&,
00650 const DenseVector<size_type, T>&);
00651
00652 SparseMatrixCSR<size_type, T> friend
00653 operator* <size_type, T> (double const,
00654 const SparseMatrixCSR<size_type, T>&);
00655
00656 SparseMatrixCSR<size_type, T> friend
00657 operator* <size_type, T> (size_type const,
00658 const SparseMatrixCSR<size_type, T>&);
00659
00660 DenseVector<size_type, T> friend
00661 diagonal BRAKET (const SparseMatrixCSR<size_type, T>&);
00662
00663 DenseVector<size_type, T> friend
00664 Transpose_SMVm BRAKET (const SparseMatrixCSR<size_type, T>&,
00665 const DenseVector<size_type, T>&);
00666
00667
00668
00669
00670 void fill(size_type const* const indx, T const* const val,
00671 size_type const* const cols)
00672 {
00673
00674 size_type const stop = this->nRow;
00675 size_type* const this_index = this->index;
00676
00677 #pragma mta assert no dependence
00678 for (size_type rows = 0; rows < stop; rows++) this_index[rows] = indx[rows];
00679
00680 this_index[this->nRow] = this->nNonZero;
00681
00682
00683 size_type const end = this->nNonZero;
00684 T* const this_values = this->values;
00685 size_type* const this_columns = this->columns;
00686
00687 #pragma mta assert no dependence
00688 for (size_type i = 0; i < end; i++)
00689 {
00690 this_values[i] = val[i];
00691 this_columns[i] = cols[i];
00692 }
00693 }
00694
00695 size_type col_index(size_type row) const { return this->index[row]; }
00696
00697 T* col_values_begin(size_type row)
00698 {
00699 if ((row >= 0) && (row < this->nRow))
00700 {
00701 return &this->values[this->index[row]];
00702 }
00703 else
00704 {
00705 return 0;
00706 }
00707 }
00708
00709 T* col_values_end(size_type row)
00710 {
00711 size_type ind = row + 1;
00712
00713 if ((ind >= 0) && (ind <= this->nRow))
00714 {
00715 return &this->values[this->index[ind]];
00716 }
00717 else
00718 {
00719 return 0;
00720 }
00721 }
00722
00723 size_type* col_indices_begin(size_type row)
00724 {
00725 if ((row >= 0) && (row < this->nRow))
00726 {
00727 return &this->columns[this->index[row]];
00728 }
00729 else
00730 {
00731 return 0;
00732 }
00733 }
00734
00735 size_type column(size_type j)
00736 {
00737
00738 return this->columns[j];
00739
00740
00741 }
00742
00743 size_type* col_indices_end(size_type row)
00744 {
00745 size_type ind = row + 1;
00746
00747 if ((ind >= 0) && (ind <= this->nRow))
00748 {
00749 return &this->columns[this->index[ind]];
00750 }
00751 else
00752 {
00753 return 0;
00754 }
00755 }
00756
00757 void MatrixPrint (char const* name) const
00758 {
00759 printf("SparseMatrixCSR Print %s row %d col %d\n",
00760 name, this->nRow, this->nCol);
00761 }
00762
00763 size_type MatrixRows() const { return this->nRow; }
00764 size_type MatrixCols() const { return this->nCol; }
00765 };
00766
00767 template <typename size_type, typename T>
00768 DenseVector<size_type, T>
00769 operator*(const SparseMatrixCSR<size_type, T>& a,
00770 const DenseVector<size_type, T>& b)
00771 {
00772 #pragma mta trace "operator* (SparseMatrixCSR, DenseVector) start"
00773
00774 if (a.nCol != b.length)
00775 {
00776 printf("INCOMPATIBLE SparseMatrixCSR * DenseVector multiplication\n");
00777 exit(1);
00778 }
00779
00780 DenseVector<size_type, T> temp(b.length);
00781 T* const temp_values = temp.values;
00782 T const zero = T();
00783 size_type const finish = temp.length;
00784
00785 #pragma mta assert parallel
00786 for (size_type i = 0; i < finish; i++) temp_values[i] = zero;
00787
00788 #ifdef __MTA__
00789 size_type starttimer = mta_get_clock(0);
00790 #endif
00791
00792 size_type const stop = a.nRow;
00793 size_type* const a_index = a.index;
00794 T* const a_values = a.values;
00795 T* const b_values = b.values;
00796 size_type* const a_columns = a.columns;
00797
00798 #pragma mta assert parallel
00799 for (size_type row = 0; row < stop; row++)
00800 {
00801 #pragma mta trace "next_row"
00802
00803 size_type const start = a_index[row];
00804 size_type const finish = a_index[row + 1];
00805
00806 for (size_type i = start; i < finish; i++)
00807 {
00808 temp_values[row] += a_values[i] * b_values[ a_columns[i] ];
00809 }
00810 }
00811
00812 #ifdef __MTA__
00813 size_type stoptimer = mta_get_clock(starttimer);
00814
00815 #endif
00816
00817 #pragma mta trace "operator* (SparseMatrixCSR, DenseVector) stop"
00818
00819 return temp;
00820 }
00821
00822 template <typename size_type, typename T>
00823 SparseMatrixCSR<size_type, T>
00824 operator*(size_type const a, const SparseMatrixCSR<size_type, T>& b)
00825 {
00826 SparseMatrixCSR<size_type, T> temp (b.nRow, b.nCol, b.nNonZero);
00827
00828 size_type const stop = b.nNonZero;
00829 T* const temp_values = temp.values;
00830 T* const b_values = b.values;
00831
00832 #pragma mta assert no dependence
00833 for (size_type i = 0; i < stop; i++) temp_values[i] = a * b_values[i];
00834
00835 return temp;
00836 }
00837
00838 template <typename size_type, typename T> SparseMatrixCSR<size_type, T>
00839 operator*(double const a, const SparseMatrixCSR<size_type, T>& b)
00840 {
00841 SparseMatrixCSR<size_type, T> temp (b.nRow, b.nCol, b.nNonZero);
00842
00843 size_type const stop = b.nNonZero;
00844 T* temp_values = temp.values;
00845 T* b_values = b.values;
00846
00847 #pragma mta assert no dependence
00848 for (size_type i = 0; i < stop; i++) temp_values[i] = a * b_values[i];
00849
00850 return temp;
00851 }
00852
00853 template <typename size_type, typename T> DenseVector<size_type, T> diagonal (const SparseMatrixCSR<size_type, T>& a)
00854 {
00855 #pragma mta trace "diagonal(SparseMatrixCSR) start"
00856 if (a.nRow != a.nCol)
00857 {
00858 printf("diagonal called on non square matrix\n");
00859
00860 exit(1);
00861 }
00862
00863 DenseVector<size_type, T> temp(a.nRow);
00864 temp();
00865
00866 size_type const finish = a.nRow;
00867 size_type* const a_index = a.index;
00868 size_type* const a_columns = a.columns;
00869 T* const a_values = a.values;
00870 T* const temp_values = temp.values;
00871
00872 #pragma mta assert parallel
00873 #pragma mta loop future
00874 for (size_type row = 0; row < finish; row++)
00875 {
00876 size_type const start = a_index[row];
00877 size_type const stop = a_index[row + 1];
00878
00879 #pragma mta assert parallel
00880 for (size_type i = start; i < stop; i++)
00881 {
00882 if (row == a_columns[i])
00883 {
00884 temp_values[row] = a_values[i];
00885 #ifndef __MTA__
00886 break;
00887 #endif
00888 }
00889 }
00890 }
00891
00892 #pragma mta trace "diagonal(SparseMatrixCSR) stop"
00893
00894 return temp;
00895 }
00896
00897 template <typename size_type, typename T>
00898 DenseVector<size_type, T>
00899 Transpose_SMVm (const SparseMatrixCSR<size_type, T>& a,
00900 const DenseVector<size_type, T>& b)
00901 {
00902 #pragma mta trace "Transpose_SMVm start"
00903 if (a.nCol != b.length)
00904 {
00905 printf("INCOMPATIBLE Transpose (SparseMatrixCSR) * DenseVector "
00906 "multiplication\n");
00907 exit(1);
00908 }
00909
00910 DenseVector<size_type, T> temp(b.length);
00911 T* const temp_values = temp.values;
00912 T* const a_values = a.values;
00913 T* const b_values = b.values;
00914 size_type* const a_index = a.index;
00915 size_type* const a_columns = a.columns;
00916 size_type const stop = temp.length;
00917
00918 #pragma mta assert no dependence
00919 for (size_type i = 0; i < stop; i++)
00920 {
00921 temp_values[i] = T();
00922 }
00923
00924 size_type const finish = temp.length;
00925
00926 #pragma mta assert parallel
00927 for (size_type row = 0; row < finish; row++)
00928 {
00929 size_type const start = a_index[row];
00930 size_type const stop = a_index[row + 1];
00931
00932 for (size_type i = start; i < stop; i++)
00933 {
00934 T temp_i = mt_readfe(temp_values[ a_columns[i] ]);
00935 temp_i += a_values[i] * b_values[row];
00936 mt_write(temp_values[ a_columns[i] ], temp_i);
00937
00938
00939 }
00940 }
00941
00942 #pragma mta trace "Transpose_SMVm start"
00943
00944 return temp;
00945 }
00946
00947
00948
00949 template <typename size_type, typename T = double>
00950 class SparseMatrixCSC : MatrixBase<size_type, T> {
00951 private:
00952 size_type* rows;
00953
00954 public:
00955 SparseMatrixCSC<size_type, T> () : MatrixBase<size_type, T> ()
00956 { rows = 0; }
00957
00958 SparseMatrixCSC<size_type, T>(size_type const row, size_type const col,
00959 size_type const count) :
00960 MatrixBase<size_type, T> (row, col, count)
00961 {
00962 rows = (size_type*) malloc (count * sizeof(size_type));
00963
00964 this->index = (size_type*) malloc ((this->nCol + 1) * sizeof(size_type));
00965 }
00966
00967 SparseMatrixCSC<size_type, T>(const SparseMatrixCSC<size_type, T>&a) :
00968 MatrixBase<size_type, T> (a)
00969 {
00970 rows = (size_type*) malloc(this->nNonZero * sizeof(size_type));
00971
00972 this->index = (size_type*) malloc((this->nCol + 1) * sizeof(size_type));
00973
00974 size_type* const a_rows = a.rows;
00975 size_type* const this_rows = this->rows;
00976 size_type const stop = this->nNonZero;
00977
00978 #pragma mta assert parallel
00979 for (size_type i = 0; i < stop; i++) this_rows[i] = a_rows[i];
00980
00981 size_type* const this_index = this->index;
00982 size_type* const a_index = a.index;
00983 size_type const end = this->nCol + 1;
00984
00985 #pragma mta assert parallel
00986 for (size_type i = 0; i < end; i++) this_index[i] = a_index[i];
00987 }
00988
00989 ~SparseMatrixCSC<size_type, T> ()
00990 {
00991 if (this->data_owned && this->rows) free (this->rows);
00992 if (this->data_owned && this->index) free (this->index);
00993
00994 this->rows = 0;
00995 this->index = 0;
00996 }
00997
00998 SparseMatrixCSC<size_type, T>&
00999 operator=(const SparseMatrixCSR<size_type, T>& a)
01000 {
01001 printf("NOT IMPLEMENTED: Converting from CSR to CSC\n");
01002 return *this;
01003 }
01004
01005 SparseMatrixCSC<size_type, T>&
01006 operator=(const SparseMatrixCSC<size_type, T>& a)
01007 {
01008 if (this != &a)
01009 {
01010 this->nRow = a.nRow;
01011 this->nCol = a.nCol;
01012 this->nNonZero = a.nNonZero;
01013
01014 size_type* const this_rows = this->rows;
01015 size_type* const a_rows = a.rows;
01016 T* const this_values = this->values;
01017 T* const a_values = a.values;
01018 size_type const stop = this->nNonZero;
01019
01020 #pragma mta assert no dependence
01021 for (size_type i = 0; i < stop; i++)
01022 {
01023 this_rows[i] = a_rows[i];
01024 this_values[i] = a_values[i];
01025 }
01026
01027 size_type* const this_index = this->index;
01028 size_type* const a_index = a.index;
01029 size_type const end = this->nCol + 1;
01030
01031 #pragma mta assert no dependence
01032 for (size_type i = 0; i < end; i++) this_index[i] = a_index[i];
01033 }
01034
01035 return *this;
01036 }
01037
01038 SparseMatrixCSC<size_type, T>& operator()(SparseMatrixCSR<size_type, T>&);
01039
01040 friend DenseVector<size_type, T>
01041 operator* <size_type, T> (const SparseMatrixCSC<size_type, T>&,
01042 const DenseVector<size_type, T>&);
01043
01044 void MatrixPrint (char const* name)
01045 {
01046 printf("SparseMatrixCSC Print %s row %d col %d\n",
01047 name, this->nRow, this->nCol);
01048 }
01049
01050 size_type MatrixRows() const { return this->nRow; }
01051 size_type MatrixCols() const { return this->nCol; }
01052 };
01053
01054 template <typename size_type, typename T> DenseVector<size_type, T>
01055 operator*(const SparseMatrixCSC<size_type, T>& a,
01056 const DenseVector<size_type, T>& b)
01057 {
01058 if (a.nCol != b.length)
01059 {
01060 printf("INCOMPATIBLE SparseMatrixCSC * DenseVector multiplication\n");
01061 exit(1);
01062 }
01063
01064 DenseVector<size_type, T> temp(b.length);
01065 T* const temp_values = temp.values;
01066 size_type const istop = temp.length;
01067 T const zero = T();
01068
01069 #pragma mta assert parallel
01070 for (size_type i = 0; i < istop; i++) temp_values[i] = zero;
01071
01072 size_type const colstop = a.nCol;
01073 size_type* const a_index = a.index;
01074 size_type* const a_rows = a.rows;
01075 T* const a_values = a.values;
01076 T* const b_values = b.values;
01077
01078 #pragma mta assert parallel
01079 #pragma mta loop future
01080 for (size_type col = 0; col < colstop; col++)
01081 {
01082 size_type const start = a_index[col];
01083 size_type const stop = a_index[col + 1];
01084
01085 #pragma mta assert parallel
01086 for (size_type i = start; i < stop; i++)
01087 {
01088
01089 temp_values[ a_rows[i] ] += a_values[i] * b_values[ a_rows[i] ];
01090 }
01091 }
01092
01093 return temp;
01094 }
01095
01096
01097
01098
01099 template <typename size_type, typename T = double>
01100 class SparseMatrixCOO : MatrixBase<size_type, T> {
01101 private:
01102 size_type* columns;
01103 size_type* rows;
01104
01105 public:
01106 SparseMatrixCOO() : MatrixBase<size_type, T> ()
01107 {
01108 this->columns = 0;
01109 this->rows = 0;
01110 }
01111
01112 SparseMatrixCOO(size_type const row, size_type const col,
01113 size_type const nnz) :
01114 MatrixBase<size_type, T> (row, col, nnz)
01115 {
01116 rows = (size_type*) malloc (nnz * sizeof(size_type));
01117 columns = (size_type*) malloc (nnz * sizeof(size_type));
01118 }
01119
01120 SparseMatrixCOO (SparseMatrixCOO<size_type, T>& a) :
01121 MatrixBase<size_type, T>(a)
01122 {
01123 rows = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01124 columns = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01125
01126 size_type const stop = this->nNonZero;
01127 T* const this_values = this->values;
01128 T* const a_values = a.values;
01129 size_type* const this_rows = this->rows;
01130 size_type* const this_columns = this->columns;
01131 size_type* const a_rows = a.rows;
01132 size_type* const a_columns = a.columns;
01133
01134 #pragma mta assert parallel
01135 for (size_type i = 0; i < stop; i++)
01136 {
01137 this_values[i] = a_values[i];
01138 this_rows[i] = a_rows[i];
01139 this_columns[i] = a_columns[i];
01140 }
01141 }
01142
01143 ~SparseMatrixCOO()
01144 {
01145 if (this->columns) free (this->columns);
01146 if (this->index) free (this->index);
01147 if (this->rows) free (this->rows);
01148
01149 this->columns = 0;
01150 this->index = 0;
01151 this->rows = 0;
01152 }
01153
01154 SparseMatrixCOO<size_type, T>&
01155 operator=(const SparseMatrixCOO<size_type, T>& a)
01156 {
01157 if (this != &a)
01158 {
01159 this->nRow = a.nRow;
01160 this->nCol = a.nCol;
01161 this->nNonZero = a.nNonZero;
01162 this->index = 0;
01163
01164 if (rows) free(rows);
01165
01166 rows = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01167
01168 if (columns) free(columns);
01169
01170 columns = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01171
01172 if (this->values) free(this->values);
01173
01174 this->values = (size_type*) malloc(a.nNonZero * sizeof(size_type));
01175
01176
01177 size_type const stop = this->nNonZero;
01178 T* const this_values = this->values;
01179 T* const a_values = a.values;
01180 size_type* const this_rows = this->rows;
01181 size_type* const this_columns = this->columns;
01182 size_type* const a_rows = a.rows;
01183 size_type* const a_columns = a.columns;
01184
01185 #pragma mta assert no dependence
01186 for (size_type i = 0; i < stop; i++)
01187 {
01188 this_values[i] = a_values[i];
01189 this_rows[i] = a_rows[i];
01190 this_columns[i] = a_columns[i];
01191 }
01192 }
01193
01194 return *this;
01195 }
01196
01197 SparseMatrixCOO<size_type, T>&
01198 operator=(const SparseMatrixCSR<size_type, T>& a)
01199 {
01200 printf("NOT IMPLEMENTED: Converting from CSR to COO\n");
01201 return *this;
01202 }
01203
01204 SparseMatrixCOO<size_type, T>&
01205 operator=(const SparseMatrixCSC<size_type, T>& a)
01206 {
01207 printf("NOT IMPLEMENTED: Converting from CSC to COO\n");
01208 return *this;
01209 }
01210
01211 SparseMatrixCOO<size_type, T>&
01212 operator()(const SparseMatrixCSR<size_type, T>&);
01213
01214 SparseMatrixCOO<size_type, T>&
01215 operator()(const SparseMatrixCSC<size_type, T>&);
01216
01217 DenseVector<size_type, T> friend
01218 operator* <size_type, T> (const SparseMatrixCOO<size_type, T>&,
01219 const DenseVector<size_type, T>&);
01220
01221 void MatrixPrint (char const* name)
01222 {
01223 printf("SparseMatrixCOO Print %s row $d col %d\n", name,
01224 this->nRow, this->nCol);
01225 }
01226 };
01227
01228 template <typename size_type, typename T>
01229 DenseVector<size_type, T>
01230 operator*(const SparseMatrixCOO<size_type, T>& a,
01231 const DenseVector<size_type, T>& b)
01232 {
01233 if (a.nCol != b.length)
01234 {
01235 printf("INCOMPATIBLE SparseMatrixCoo * DenseVector multiplication\n");
01236 exit(1);
01237 }
01238
01239 DenseVector<size_type, T> temp(b.length);
01240 size_type const stop = temp.length;
01241 T* const temp_values = temp.values;
01242 T const zero = T();
01243
01244 #pragma mta assert parallel
01245 for (size_type i = 0; i < stop; i++) temp_values[i] = zero;
01246
01247 size_type const end = a.nNonZero;
01248 T* const a_values = a.values;
01249 T* const b_values = b.values;
01250 size_type* const a_rows = a.rows;
01251 size_type* const b_rows = b.rows;
01252 size_type* const a_columns = a.columns;
01253
01254 #pragma mta assert parallel
01255 for (size_type i = 0; i < end; i++)
01256 {
01257 mt_inc (temp_values[a_rows[i]], a_values[i] * b_values[a_columns[i]]);
01258 }
01259
01260 return temp;
01261 }
01262
01263
01264
01265 template <typename size_type, typename T>
01266 DenseVector<size_type, T>& linbcg (const SparseMatrixCSR<size_type, T>& A,
01267 DenseVector<size_type, T>& x,
01268 const DenseVector<size_type, T>& b,
01269 size_type const itermax,
01270 T& err,
01271 T const tol)
01272 {
01273 #pragma mta trace "linbcg start"
01274
01275 size_type const length = A.MatrixRows();
01276 double const bnorm = b.norm2();
01277 double ak = 0, akden = 0, bk = 0, bknum = 0, bkden = 0;
01278
01279 DenseVector<size_type, T> p(length), pp(length);
01280 DenseVector<size_type, T> r(length), rr(length);
01281 DenseVector<size_type, T> z(length), zz(length);
01282 DenseVector<size_type, T> d = diagonal(A);
01283
01284 r = b - (A * x);
01285 z = d.asolve (r);
01286 rr = r;
01287
01288 for (size_type iter = 0; iter < itermax; iter++)
01289 {
01290 zz = d.asolve (rr);
01291
01292 bknum = z * rr;
01293
01294 if (iter == 0)
01295 {
01296 p = z;
01297 pp = zz;
01298 }
01299 else
01300 {
01301 bk = bknum / bkden;
01302 p = (bk * p) + z;
01303 pp = (bk * pp) + zz;
01304 }
01305
01306 bkden = bknum;
01307
01308 z = A * p;
01309 akden = z * pp;
01310 ak = bknum / akden;
01311
01312 zz = Transpose_SMVm(A, pp);
01313
01314 x += (ak * p);
01315 r -= (ak * z);
01316 rr -= (ak * zz);
01317
01318 z = d.asolve(r);
01319 err = r.norm2() / bnorm;
01320
01321 if (err < tol) break;
01322 }
01323
01324 #pragma mta trace "linbcg stop"
01325
01326 return x;
01327 }
01328
01329 #endif