fathom/moab-docs/tstt__perf__binding_8cpp_source.html

00001 /** TSTT Mesh Interface brick mesh performance test
00002  *
00003  * This program tests TSTT mesh interface functions used to create a square structured
00004  * mesh.  Boilerplate taken from tstt mesh interface test in MOAB and performance test in MOAB
00005  *
00006  */
00007
00008 // Different platforms follow different conventions for usage
00009 #ifndef _WIN32
00010 #include <sys/resource.h>
00011 #endif
00012 #ifdef SOLARIS
00013 extern "C" int getrusage( int, struct rusage* );
00014 #ifndef RUSAGE_SELF
00015 #include </usr/ucbinclude/sys/rusage.h>
00016 #endif
00017 #endif
00018
00019 #include <cstdlib>
00020 #include <cstdio>
00021 #include <iostream>
00022
00023 #include <iostream>
00024 #include <cassert>
00025 #include "iMesh.h"
00026
00027 // needed to get the proper size for handles
00028
00029 using namespace std;
00030
00031 double LENGTH = 1.0;
00032
00033 // forward declare some functions
00034 void query_elem_to_vert( iMesh_Instance mesh );
00035 void query_vert_to_elem( iMesh_Instance mesh );
00036 void print_time( const bool print_em, double& tot_time, double& utime, double& stime, long& imem, long& rmem );
00037 void build_connect( const int nelem, const int vstart, int*& connect );
00038 void testB( iMesh_Instance mesh, const int nelem, const double* coords, int* connect );
00039 void testC( iMesh_Instance mesh, const int nelem, const double* coords );
00040 void compute_edge( double* start, const int nelem, const double xint, const int stride );
00041 void compute_face( double* a, const int nelem, const double xint, const int stride1, const int stride2 );
00042 void build_coords( const int nelem, double*& coords );
00043 void build_connect( const int nelem, const int vstart, int*& connect );
00044
00045 int main( int argc, char* argv[] )
00046 {
00047     int nelem = 20;
00048     if( argc < 3 )
00049     {
00050         std::cout << "Usage: " << argv[0] << " <ints_per_side> <A|B|C>" << std::endl;
00051         return 1;
00052     }
00053
00054     char which_test = '\0';
00055
00056     sscanf( argv[1], "%d", &nelem );
00057     sscanf( argv[2], "%c", &which_test );
00058
00059     if( which_test != 'B' && which_test != 'C' )
00060     {
00061         std::cout << "Must indicate B or C for test." << std::endl;
00062         return 1;
00063     }
00064
00065     std::cout << "number of elements: " << nelem << "; test " << which_test << std::endl;
00066
00067     // initialize the data in native format
00068
00069     // pre-build the coords array
00070     double* coords;
00071     build_coords( nelem, coords );
00072     assert( NULL != coords );
00073
00074     // test B: create mesh using bulk interface
00075
00076     // create an implementation
00077     iMesh_Instance mesh;
00078     int result;
00079     iMesh_newMesh( NULL, &mesh, &result, 0 );
00080     int* connect = NULL;
00081
00082     if( iBase_SUCCESS != result )
00083     {
00084         cerr << "Couldn't create mesh instance." << endl;
00085         iMesh_dtor( mesh, &result );
00086         return 1;
00087     }
00088
00089     iMesh_setGeometricDimension( mesh, 3, &result );
00090     if( iBase_SUCCESS != result )
00091     {
00092         cerr << "Couldn't set geometric dimension." << endl;
00093         iMesh_dtor( mesh, &result );
00094         return 1;
00095     }
00096
00097     switch( which_test )
00098     {
00099         case 'B':
00100             build_connect( nelem, 1, connect );
00101
00102             // test B: create mesh using bulk interface
00103             testB( mesh, nelem, coords, connect );
00104             break;
00105
00106         case 'C':
00107             // test C: create mesh using individual interface
00108             testC( mesh, nelem, coords );
00109             break;
00110     }
00111
00112     free( coords );
00113
00114     return 0;
00115 }
00116
00117 void testB( iMesh_Instance mesh, const int nelem, const double* coords, int* connect )
00118 {
00119     double utime, stime, ttime0, ttime1, ttime2, ttime3, ttime4;
00120     long imem0, rmem0, imem1, rmem1, imem2, rmem2, imem3, rmem3, imem4, rmem4;
00121
00122     print_time( false, ttime0, utime, stime, imem0, rmem0 );
00123     int num_verts = ( nelem + 1 ) * ( nelem + 1 ) * ( nelem + 1 );
00124     int num_elems = nelem * nelem * nelem;
00125
00126     // create vertices as a block; initialize to NULL so allocation is done in interface
00127     iBase_EntityHandle* vertices = NULL;
00128     int vertices_allocated       = 0, vertices_size;
00129     int result;
00130     iMesh_createVtxArr( mesh, num_verts, iBase_BLOCKED, coords, 3 * num_verts, &vertices, &vertices_allocated,
00131                         &vertices_size, &result );
00132     if( iBase_SUCCESS != result )
00133     {
00134         cerr << "Couldn't create vertices in bulk call" << endl;
00135         free( vertices );
00136         return;
00137     }
00138
00139     // need to explicitly fill connectivity array, since we don't know
00140     // the format of entity handles
00141     int nconnect                     = 8 * num_elems;
00142     iBase_EntityHandle* sidl_connect = (iBase_EntityHandle*)malloc( nconnect * sizeof( iBase_EntityHandle ) );
00143
00144     for( int i = 0; i < nconnect; i++ )
00145     {
00146         // use connect[i]-1 because we used starting vertex index (vstart) of 1
00147         assert( connect[i] - 1 < num_verts );
00148         sidl_connect[i] = vertices[connect[i] - 1];
00149     }
00150
00151     // no longer need vertices and connect arrays, free here to reduce overall peak memory usage
00152     free( vertices );
00153     free( connect );
00154
00155     // create the entities
00156     iBase_EntityHandle* new_hexes = NULL;
00157     int new_hexes_allocated       = 0, new_hexes_size;
00158     int* status                   = NULL;
00159     int status_allocated          = 0, status_size;
00160
00161     iMesh_createEntArr( mesh, iMesh_HEXAHEDRON, sidl_connect, nconnect, &new_hexes, &new_hexes_allocated,
00162                         &new_hexes_size, &status, &status_allocated, &status_size, &result );
00163     if( iBase_SUCCESS != result )
00164     {
00165         cerr << "Couldn't create hex elements in bulk call" << endl;
00166         free( new_hexes );
00167         free( status );
00168         free( sidl_connect );
00169         return;
00170     }
00171
00172     print_time( false, ttime1, utime, stime, imem1, rmem1 );
00173
00174     free( status );
00175     free( new_hexes );
00176     free( sidl_connect );
00177
00178     // query the mesh 2 ways
00179     query_elem_to_vert( mesh );
00180
00181     print_time( false, ttime2, utime, stime, imem2, rmem2 );
00182
00183     query_vert_to_elem( mesh );
00184
00185     print_time( false, ttime3, utime, stime, imem3, rmem3 );
00186
00187     iMesh_dtor( mesh, &result );
00188
00189     print_time( false, ttime4, utime, stime, imem4, rmem4 );
00190
00191     std::cout << "TSTTb/MOAB_ucd_blocked: nelem, construct, e_to_v, v_to_e, after_dtor = " << nelem << ", "
00192               << ttime1 - ttime0 << ", " << ttime2 - ttime1 << ", " << ttime3 - ttime2 << ", " << ttime4 - ttime3
00193               << " seconds" << std::endl;
00194     std::cout << "TSTTb/MOAB_ucd_blocked_memory_(rss): initial, after_construction, e-v, v-e, after_dtor:" << rmem0
00195               << ", " << rmem1 << ", " << rmem2 << ", " << rmem3 << ", " << rmem4 << " kb" << std::endl;
00196 }
00197
00198 void testC( iMesh_Instance mesh, const int nelem, const double* coords )
00199 {
00200     double utime, stime, ttime0, ttime1, ttime2, ttime3, ttime4;
00201     long imem0, rmem0, imem1, rmem1, imem2, rmem2, imem3, rmem3, imem4, rmem4;
00202     print_time( false, ttime0, utime, stime, imem0, rmem0 );
00203
00204     // need some dimensions
00205     int numv      = nelem + 1;
00206     int numv_sq   = numv * numv;
00207     int num_verts = numv * numv * numv;
00208 #define VINDEX( i, j, k ) ( ( i ) + ( (j)*numv ) + ( (k)*numv_sq ) )
00209
00210     // array to hold vertices created individually
00211     iBase_EntityHandle* sidl_vertices = (iBase_EntityHandle*)malloc( num_verts * sizeof( iBase_EntityHandle ) );
00212     int result;
00213
00214     for( int i = 0; i < num_verts; i++ )
00215     {
00216         // create the vertex
00217         iMesh_createVtx( mesh, coords[i], coords[i + num_verts], coords[i + 2 * num_verts], sidl_vertices + i,
00218                          &result );
00219         if( iBase_SUCCESS != result )
00220         {
00221             cerr << "Couldn't create vertex in individual call" << endl;
00222             return;
00223         }
00224     }
00225
00226     iBase_EntityHandle tmp_conn[8], new_hex;
00227
00228     for( int i = 0; i < nelem; i++ )
00229     {
00230         for( int j = 0; j < nelem; j++ )
00231         {
00232             for( int k = 0; k < nelem; k++ )
00233             {
00234                 int vijk    = VINDEX( i, j, k );
00235                 tmp_conn[0] = sidl_vertices[vijk];
00236                 tmp_conn[1] = sidl_vertices[vijk + 1];
00237                 tmp_conn[2] = sidl_vertices[vijk + 1 + numv];
00238                 tmp_conn[3] = sidl_vertices[vijk + numv];
00239                 tmp_conn[4] = sidl_vertices[vijk + numv * numv];
00240                 tmp_conn[5] = sidl_vertices[vijk + 1 + numv * numv];
00241                 tmp_conn[6] = sidl_vertices[vijk + 1 + numv + numv * numv];
00242                 tmp_conn[7] = sidl_vertices[vijk + numv + numv * numv];
00243
00244                 // create the entity
00245
00246                 int status;
00247                 iMesh_createEnt( mesh, iMesh_HEXAHEDRON, tmp_conn, 8, &new_hex, &status, &result );
00248                 if( iBase_SUCCESS != result )
00249                 {
00250                     cerr << "Couldn't create hex element in individual call" << endl;
00251                     // return;  do not return if errors, as we would leak memory
00252                 }
00253             }
00254         }
00255     }
00256
00257     print_time( false, ttime1, utime, stime, imem1, rmem1 );
00258
00259     free( sidl_vertices );
00260
00261     // query the mesh 2 ways
00262     query_elem_to_vert( mesh );
00263
00264     print_time( false, ttime2, utime, stime, imem2, rmem2 );
00265
00266     query_vert_to_elem( mesh );
00267
00268     print_time( false, ttime3, utime, stime, imem3, rmem3 );
00269
00270     iMesh_dtor( mesh, &result );
00271
00272     print_time( false, ttime4, utime, stime, imem4, rmem4 );
00273
00274     std::cout << "TSTTb/MOAB_ucd_indiv: nelem, construct, e_to_v, v_to_e, after_dtor = " << nelem << ", "
00275               << ttime1 - ttime0 << ", " << ttime2 - ttime1 << ", " << ttime3 - ttime2 << ", " << ttime4 - ttime3
00276               << " seconds" << std::endl;
00277     std::cout << "TSTTb/MOAB_ucd_indiv_memory_(rss): initial, after_construction, e-v, v-e, after_dtor:" << rmem0
00278               << ", " << rmem1 << ", " << rmem2 << ", " << rmem3 << ", " << rmem4 << " kb" << std::endl;
00279 }
00280
00281 void query_elem_to_vert( iMesh_Instance mesh )
00282 {
00283     iBase_EntityHandle* all_hexes = NULL;
00284     int all_hexes_size, all_hexes_allocated = 0;
00285
00286     // get all the hex elements
00287     int success;
00288     iBase_EntitySetHandle root_set;
00289     iMesh_getRootSet( mesh, &root_set, &success );
00290     if( iBase_SUCCESS != success )
00291     {
00292         cerr << "Couldn't get root set." << endl;
00293         return;
00294     }
00295
00296     iMesh_getEntities( mesh, root_set, iBase_REGION, iMesh_HEXAHEDRON, &all_hexes, &all_hexes_allocated,
00297                        &all_hexes_size, &success );
00298     if( iBase_SUCCESS != success )
00299     {
00300         cerr << "Couldn't get all hex elements in query_mesh" << endl;
00301         return;
00302     }
00303
00304     // now loop over elements
00305     iBase_EntityHandle* dum_connect = NULL;
00306     int dum_connect_allocated       = 0, dum_connect_size;
00307     double* dum_coords              = NULL;
00308     int dum_coords_size, dum_coords_allocated = 0;
00309     int order;
00310     iMesh_getDfltStorage( mesh, &order, &success );
00311     if( iBase_SUCCESS != success ) return;
00312
00313     for( int i = 0; i < all_hexes_size; i++ )
00314     {
00315         // get the connectivity of this element; will allocate space on 1st iteration,
00316         // but will have correct size on subsequent ones
00317         iMesh_getEntAdj( mesh, all_hexes[i], iBase_VERTEX, &dum_connect, &dum_connect_allocated, &dum_connect_size,
00318                          &success );
00319
00320         if( iBase_SUCCESS == success )
00321         {
00322             // get vertex coordinates; ; will allocate space on 1st iteration,
00323             // but will have correct size on subsequent ones
00324             iMesh_getVtxArrCoords( mesh, dum_connect, dum_connect_size, order, &dum_coords, &dum_coords_allocated,
00325                                    &dum_coords_size, &success );
00326
00327             double centroid[3] = { 0.0, 0.0, 0.0 };
00328             if( order == iBase_BLOCKED )
00329             {
00330                 for( int j = 0; j < 8; j++ )
00331                 {
00332                     centroid[0] += dum_coords[j];
00333                     centroid[1] += dum_coords[8 + j];
00334                     centroid[2] += dum_coords[16 + j];
00335                 }
00336             }
00337             else
00338             {
00339                 for( int j = 0; j < 8; j++ )
00340                 {
00341                     centroid[0] += dum_coords[3 * j];
00342                     centroid[1] += dum_coords[3 * j + 1];
00343                     centroid[2] += dum_coords[3 * j + 2];
00344                 }
00345             }
00346         }
00347
00348         if( iBase_SUCCESS != success )
00349         {
00350             cerr << "Problem getting connectivity or vertex coords." << endl;
00351             return;
00352         }
00353     }
00354
00355     free( all_hexes );
00356     free( dum_connect );
00357     free( dum_coords );
00358 }
00359
00360 void query_vert_to_elem( iMesh_Instance mesh )
00361 {
00362     iBase_EntityHandle* all_verts = NULL;
00363     int all_verts_allocated       = 0, all_verts_size;
00364
00365     iBase_EntitySetHandle root_set;
00366     int success;
00367     iMesh_getRootSet( mesh, &root_set, &success );
00368     if( iBase_SUCCESS != success )
00369     {
00370         cerr << "Couldn't get root set." << endl;
00371         return;
00372     }
00373
00374     // get all the vertices elements
00375     iMesh_getEntities( mesh, root_set, iBase_VERTEX, iMesh_POINT, &all_verts, &all_verts_allocated, &all_verts_size,
00376                        &success );
00377     if( iBase_SUCCESS != success )
00378     {
00379         cerr << "Couldn't get all vertices in query_vert_to_elem" << endl;
00380         return;
00381     }
00382
00383     // for this mesh, should never be more than 8 hexes connected to a vertex
00384     iBase_EntityHandle* dum_hexes = (iBase_EntityHandle*)calloc( 8, sizeof( iBase_EntityHandle ) );
00385
00386     int dum_hexes_allocated = 8, dum_hexes_size;
00387
00388     // now loop over vertices
00389     for( int i = 0; i < all_verts_size; i++ )
00390     {
00391
00392         // get the connectivity of this element; will have to allocate space on every
00393         // iteration, since size can vary
00394         iMesh_getEntAdj( mesh, all_verts[i], iBase_REGION, &dum_hexes, &dum_hexes_allocated, &dum_hexes_size,
00395                          &success );
00396         if( iBase_SUCCESS != success )
00397         {
00398             cerr << "Problem getting connectivity or vertex coords." << endl;
00399             // do not return early, as we would leak memory
00400         }
00401     }
00402
00403     free( dum_hexes );
00404     free( all_verts );
00405 }
00406
00407 void print_time( const bool print_em, double& tot_time, double& utime, double& stime, long& imem, long& rmem )
00408 {
00409     // Disabling this for windows
00410     // Different platforms follow different conventions for usage
00411 #ifndef _WIN32  // Windows does not have rusage
00412     struct rusage r_usage;
00413     getrusage( RUSAGE_SELF, &r_usage );
00414     utime    = (double)r_usage.ru_utime.tv_sec + ( (double)r_usage.ru_utime.tv_usec / 1.e6 );
00415     stime    = (double)r_usage.ru_stime.tv_sec + ( (double)r_usage.ru_stime.tv_usec / 1.e6 );
00416     tot_time = utime + stime;
00417     if( print_em )
00418         std::cout << "User, system, total time = " << utime << ", " << stime << ", " << tot_time << std::endl;
00419
00420 #ifndef LINUX
00421     imem = r_usage.ru_idrss;
00422     rmem = r_usage.ru_maxrss;
00423     std::cout << "Max resident set size = " << r_usage.ru_maxrss << " kbytes" << std::endl;
00424     std::cout << "Int resident set size = " << r_usage.ru_idrss << " kbytes" << std::endl;
00425 #else
00426     imem = rmem = 0;
00427     system( "ps o args,drs,rss | grep perf | grep -v grep" );  // RedHat 9.0 doesnt fill in actual
00428                                                                // memory data
00429 #endif
00430     // delete [] hex_array;
00431 #endif  // if not on windows
00432 }
00433
00434 void compute_edge( double* start, const int nelem, const double xint, const int stride )
00435 {
00436     for( int i = 1; i < nelem; i++ )
00437     {
00438         start[i * stride]                     = start[0] + i * xint;
00439         start[nelem + 1 + i * stride]         = start[nelem + 1] + i * xint;
00440         start[2 * ( nelem + 1 ) + i * stride] = start[2 * ( nelem + 1 )] + i * xint;
00441     }
00442 }
00443
00444 void compute_face( double* a, const int nelem, const double xint, const int stride1, const int stride2 )
00445 {
00446     // 2D TFI on a face starting at a, with strides stride1 in ada and stride2 in tse
00447     for( int j = 1; j < nelem; j++ )
00448     {
00449         double tse = j * xint;
00450         for( int i = 1; i < nelem; i++ )
00451         {
00452             double ada = i * xint;
00453
00454             a[i * stride1 + j * stride2] =
00455                 ( 1.0 - ada ) * a[i * stride1] + ada * a[i * stride1 + nelem * stride2] +
00456                 ( 1.0 - tse ) * a[j * stride2] + tse * a[j * stride2 + nelem * stride1] -
00457                 ( 1.0 - tse ) * ( 1.0 - ada ) * a[0] - ( 1.0 - tse ) * ada * a[nelem * stride1] -
00458                 tse * ( 1.0 - ada ) * a[nelem * stride2] - tse * ada * a[nelem * ( stride1 + stride2 )];
00459             a[nelem + 1 + i * stride1 + j * stride2] =
00460                 ( 1.0 - ada ) * a[nelem + 1 + i * stride1] + ada * a[nelem + 1 + i * stride1 + nelem * stride2] +
00461                 ( 1.0 - tse ) * a[nelem + 1 + j * stride2] + tse * a[nelem + 1 + j * stride2 + nelem * stride1] -
00462                 ( 1.0 - tse ) * ( 1.0 - ada ) * a[nelem + 1 + 0] -
00463                 ( 1.0 - tse ) * ada * a[nelem + 1 + nelem * stride1] -
00464                 tse * ( 1.0 - ada ) * a[nelem + 1 + nelem * stride2] -
00465                 tse * ada * a[nelem + 1 + nelem * ( stride1 + stride2 )];
00466             a[2 * ( nelem + 1 ) + i * stride1 + j * stride2] =
00467                 ( 1.0 - ada ) * a[2 * ( nelem + 1 ) + i * stride1] +
00468                 ada * a[2 * ( nelem + 1 ) + i * stride1 + nelem * stride2] +
00469                 ( 1.0 - tse ) * a[2 * ( nelem + 1 ) + j * stride2] +
00470                 tse * a[2 * ( nelem + 1 ) + j * stride2 + nelem * stride1] -
00471                 ( 1.0 - tse ) * ( 1.0 - ada ) * a[2 * ( nelem + 1 ) + 0] -
00472                 ( 1.0 - tse ) * ada * a[2 * ( nelem + 1 ) + nelem * stride1] -
00473                 tse * ( 1.0 - ada ) * a[2 * ( nelem + 1 ) + nelem * stride2] -
00474                 tse * ada * a[2 * ( nelem + 1 ) + nelem * ( stride1 + stride2 )];
00475         }
00476     }
00477 }
00478
00479 void build_coords( const int nelem, double*& coords )
00480 {
00481     double ttime0, ttime1, utime1, stime1;
00482     long imem, rmem;
00483     print_time( false, ttime0, utime1, stime1, imem, rmem );
00484
00485     // allocate the memory
00486     int numv     = nelem + 1;
00487     int numv_sq  = numv * numv;
00488     int tot_numv = numv * numv * numv;
00489     coords       = (double*)malloc( 3 * tot_numv * sizeof( double ) );
00490
00491 // use FORTRAN-like indexing
00492 #define VINDEX( i, j, k ) ( ( i ) + ( (j)*numv ) + ( (k)*numv_sq ) )
00493     int idx;
00494     double scale1, scale2, scale3;
00495     // use these to prevent optimization on 1-scale, etc (real map wouldn't have
00496     // all these equal)
00497     scale1 = LENGTH / nelem;
00498     scale2 = LENGTH / nelem;
00499     scale3 = LENGTH / nelem;
00500
00501 #ifdef REALTFI
00502     // use a real TFI xform to compute coordinates
00503     // compute edges
00504     // i (stride=1)
00505     compute_edge( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, 1 );
00506     compute_edge( &coords[VINDEX( 0, nelem, 0 )], nelem, scale1, 1 );
00507     compute_edge( &coords[VINDEX( 0, 0, nelem )], nelem, scale1, 1 );
00508     compute_edge( &coords[VINDEX( 0, nelem, nelem )], nelem, scale1, 1 );
00509     // j (stride=numv)
00510     compute_edge( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, numv );
00511     compute_edge( &coords[VINDEX( nelem, 0, 0 )], nelem, scale1, numv );
00512     compute_edge( &coords[VINDEX( 0, 0, nelem )], nelem, scale1, numv );
00513     compute_edge( &coords[VINDEX( nelem, 0, nelem )], nelem, scale1, numv );
00514     // k (stride=numv^2)
00515     compute_edge( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, numv_sq );
00516     compute_edge( &coords[VINDEX( nelem, 0, 0 )], nelem, scale1, numv_sq );
00517     compute_edge( &coords[VINDEX( 0, nelem, 0 )], nelem, scale1, numv_sq );
00518     compute_edge( &coords[VINDEX( nelem, nelem, 0 )], nelem, scale1, numv_sq );
00519
00520     // compute faces
00521     // i=0, nelem
00522     compute_face( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, numv, numv_sq );
00523     compute_face( &coords[VINDEX( nelem, 0, 0 )], nelem, scale1, numv, numv_sq );
00524     // j=0, nelem
00525     compute_face( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, 1, numv_sq );
00526     compute_face( &coords[VINDEX( 0, nelem, 0 )], nelem, scale1, 1, numv_sq );
00527     // k=0, nelem
00528     compute_face( &coords[VINDEX( 0, 0, 0 )], nelem, scale1, 1, numv );
00529     compute_face( &coords[VINDEX( 0, 0, nelem )], nelem, scale1, 1, numv );
00530
00531     // initialize corner indices
00532     int i000 = VINDEX( 0, 0, 0 );
00533     int ia00 = VINDEX( nelem, 0, 0 );
00534     int i0t0 = VINDEX( 0, nelem, 0 );
00535     int iat0 = VINDEX( nelem, nelem, 0 );
00536     int i00g = VINDEX( 0, 0, nelem );
00537     int ia0g = VINDEX( nelem, 0, nelem );
00538     int i0tg = VINDEX( 0, nelem, nelem );
00539     int iatg = VINDEX( nelem, nelem, nelem );
00540     double cX, cY, cZ;
00541     int adaInts   = nelem;
00542     int tseInts   = nelem;
00543     int gammaInts = nelem;
00544
00545     for( int i = 1; i < nelem; i++ )
00546     {
00547         for( int j = 1; j < nelem; j++ )
00548         {
00549             for( int k = 1; k < nelem; k++ )
00550             {
00551                 // idx = VINDEX(i,j,k);
00552                 double tse   = i * scale1;
00553                 double ada   = j * scale2;
00554                 double gamma = k * scale3;
00555                 double tm1   = 1.0 - tse;
00556                 double am1   = 1.0 - ada;
00557                 double gm1   = 1.0 - gamma;
00558
00559                 cX = gm1 * ( am1 * ( tm1 * coords[i000] + tse * coords[i0t0] ) +
00560                              ada * ( tm1 * coords[ia00] + tse * coords[iat0] ) ) +
00561                      gamma * ( am1 * ( tm1 * coords[i00g] + tse * coords[i0tg] ) +
00562                                ada * ( tm1 * coords[ia0g] + tse * coords[iatg] ) );
00563
00564                 cY = gm1 * ( am1 * ( tm1 * coords[i000] + tse * coords[i0t0] ) +
00565                              ada * ( tm1 * coords[ia00] + tse * coords[iat0] ) ) +
00566                      gamma * ( am1 * ( tm1 * coords[i00g] + tse * coords[i0tg] ) +
00567                                ada * ( tm1 * coords[ia0g] + tse * coords[iatg] ) );
00568
00569                 cZ = gm1 * ( am1 * ( tm1 * coords[i000] + tse * coords[i0t0] ) +
00570                              ada * ( tm1 * coords[ia00] + tse * coords[iat0] ) ) +
00571                      gamma * ( am1 * ( tm1 * coords[i00g] + tse * coords[i0tg] ) +
00572                                ada * ( tm1 * coords[ia0g] + tse * coords[iatg] ) );
00573
00574                 double* ai0k = &coords[VINDEX( k, 0, i )];
00575                 double* aiak = &coords[VINDEX( k, adaInts, i )];
00576                 double* a0jk = &coords[VINDEX( k, j, 0 )];
00577                 double* atjk = &coords[VINDEX( k, j, tseInts )];
00578                 double* aij0 = &coords[VINDEX( 0, j, i )];
00579                 double* aijg = &coords[VINDEX( gammaInts, j, i )];
00580
00581                 coords[VINDEX( i, j, k )] = ( am1 * ai0k[0] + ada * aiak[0] + tm1 * a0jk[0] + tse * atjk[0] +
00582                                               gm1 * aij0[0] + gamma * aijg[0] ) /
00583                                                 2.0 -
00584                                             cX / 2.0;
00585
00586                 coords[nelem + 1 + VINDEX( i, j, k )] =
00587                     ( am1 * ai0k[nelem + 1] + ada * aiak[nelem + 1] + tm1 * a0jk[nelem + 1] + tse * atjk[nelem + 1] +
00588                       gm1 * aij0[nelem + 1] + gamma * aijg[nelem + 1] ) /
00589                         2.0 -
00590                     cY / 2.0;
00591
00592                 coords[2 * ( nelem + 1 ) + VINDEX( i, j, k )] =
00593                     ( am1 * ai0k[2 * ( nelem + 1 )] + ada * aiak[2 * ( nelem + 1 )] + tm1 * a0jk[2 * ( nelem + 1 )] +
00594                       tse * atjk[2 * ( nelem + 1 )] + gm1 * aij0[2 * ( nelem + 1 )] +
00595                       gamma * aijg[2 * ( nelem + 1 )] ) /
00596                         2.0 -
00597                     cZ / 2.0;
00598             }
00599         }
00600     }
00601
00602 #else
00603     for( int i = 0; i < numv; i++ )
00604     {
00605         for( int j = 0; j < numv; j++ )
00606         {
00607             for( int k = 0; k < numv; k++ )
00608             {
00609                 idx = VINDEX( i, j, k );
00610                 // blocked coordinate ordering
00611                 coords[idx]                = i * scale1;
00612                 coords[tot_numv + idx]     = j * scale2;
00613                 coords[2 * tot_numv + idx] = k * scale3;
00614             }
00615         }
00616     }
00617 #endif
00618
00619     print_time( false, ttime1, utime1, stime1, imem, rmem );
00620     std::cout << "TSTTbinding/MOAB: TFI time = " << ttime1 - ttime0 << " sec" << std::endl;
00621 }
00622
00623 void build_connect( const int nelem, const int vstart, int*& connect )
00624 {
00625     // allocate the memory
00626     int nume_tot = nelem * nelem * nelem;
00627     connect      = (int*)malloc( 8 * nume_tot * sizeof( int ) );
00628
00629     int vijk;
00630     int numv    = nelem + 1;
00631     int numv_sq = numv * numv;
00632     int idx     = 0;
00633     for( int i = 0; i < nelem; i++ )
00634     {
00635         for( int j = 0; j < nelem; j++ )
00636         {
00637             for( int k = 0; k < nelem; k++ )
00638             {
00639                 vijk           = vstart + VINDEX( i, j, k );
00640                 connect[idx++] = vijk;
00641                 connect[idx++] = vijk + 1;
00642                 connect[idx++] = vijk + 1 + numv;
00643                 connect[idx++] = vijk + numv;
00644                 connect[idx++] = vijk + numv * numv;
00645                 connect[idx++] = vijk + 1 + numv * numv;
00646                 connect[idx++] = vijk + 1 + numv + numv * numv;
00647                 connect[idx++] = vijk + numv + numv * numv;
00648                 assert( i <= numv * numv * numv );
00649             }
00650         }
00651     }
00652 }