MOAB: Mesh Oriented datABase  (version 5.3.0)
findpt.c
Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <stdarg.h>
00004 #include <math.h> /* for cos, fabs */
00005 #include <float.h>
00006 #include <string.h> /* for memcpy */
00007 
00008 #include "moab/FindPtFuncs.h"
00009 
00010 const unsigned opt_no_constraints_2 = 3 + 1;
00011 const unsigned opt_no_constraints_3 = 9 + 3 + 1;
00012 
00013 /*--------------------------------------------------------------------------
00014    Lobatto Polynomial Bounds
00015 
00016    Needed inputs are the Gauss-Lobatto quadrature nodes and weights:
00017      unsigned nr = ..., ns = ...;
00018      realType zr[nr], wr[nr];
00019      realType zs[ns], ws[ns];
00020 
00021      lobatto_nodes(zr,nr); lobatto_weights(zr,wr,nr);
00022      lobatto_nodes(zs,ns); lobatto_weights(zs,ws,ns);
00023 
00024    The number of points in the constructed piecewise (bi-)linear bounds
00025    is a parameter; more points give tighter bounds
00026 
00027      unsigned mr = 2*nr, ms = 2*ns;
00028 
00029    The necessary setup is accomplished via:
00030      lob_bnd_base b_data_r;
00031      lob_bnd_ext  e_data_s;
00032 
00033      lob_bnd_base_alloc(&b_data_r,nr,mr);
00034      lob_bnd_base_setup(&b_data_r,zr,wr);
00035      lob_bnd_ext_alloc(&e_data_s,ns,ms);
00036      lob_bnd_ext_setup(&e_data_s,zs,ws);
00037 
00038    Bounds may then be computed via:
00039      realType work1r[2*mr], work1s[2*ms], work2[2*mr + 2*mr*ns + 2*mr*ms];
00040      realType ur[nr], us[ns];    // 1-d polynomials on the zr[] and zs[] nodes
00041      realType u[ns][nr];         // 2-d polynomial on zr[] (x) zs[]
00042      realType bound[2];          // = { min, max } (to be computed)
00043 
00044      lob_bnd_1(&b_data_r  ,ur,bound,work1r); // compute bounds on ur
00045      lob_bnd_1(&e_data_s.b,us,bound,work1s); // compute bounds on us
00046      lob_bnd_2(&b_data_r, &e_data_s,
00047                (const double*)&u[0][0],bound,work2); // compute bounds on u
00048    The above routines access the zr,zs arrays passed to *_setup
00049      (so do not delete them between calls)
00050 
00051    Memory allocated in *_setup is freed with
00052      lob_bnd_base_free(&b_data_r);
00053      lob_bnd_ext_free(&e_data_s);
00054 
00055   --------------------------------------------------------------------------*/
00056 
00057 typedef struct
00058 {
00059     unsigned  n; /* number of Lobatto nodes in input */
00060     unsigned  m; /* number of Chebyshev nodes used to calculate bounds */
00061     realType *Q0, *Q1; /* Q0[n], Q1[n] -- first two rows of change of basis matrix
00062                       from Lobatto node Lagrangian to Legendre */
00063     const realType* z; /* z[n] -- external; Lobatto nodes */
00064     realType*       h; /* h[m] -- Chebyshev nodes */
00065     realType *      uv, *ov; /* uv[n][m], ov[n][m] --
00066                               uv[j][:] is a piecewise linear function in the nodal
00067                                        basis with nodes h[m] that is everywhere less
00068                                        than or equal to the jth Lagrangian basis
00069                                        function (with the Lobatto nodes z[n])
00070                               ov[j][:] is everywhere greater than or equal */
00071 } lob_bnd_base;
00072 
00073 typedef struct
00074 {
00075     lob_bnd_base b;
00076     realType *   uvp, *uvn, *ovp, *ovn; /* [n][m] -- uv and ov split into
00077                                          positive and negative parts */
00078 } lob_bnd_ext;
00079 
00080 static void lob_bnd_base_alloc( lob_bnd_base* p, unsigned n, unsigned m )
00081 {
00082     p->n = n, p->m = m;
00083     p->Q0 = tmalloc( realType, 2 * n + m + 2 * n * m );
00084     p->Q1 = p->Q0 + n;
00085     p->h = p->Q1 + n;
00086     p->uv = p->h + m;
00087     p->ov = p->uv + n * m;
00088 }
00089 
00090 static void lob_bnd_base_free( lob_bnd_base* p )
00091 {
00092     free( p->Q0 );
00093 }
00094 
00095 static void lob_bnd_ext_alloc( lob_bnd_ext* p, unsigned n, unsigned m )
00096 {
00097     p->b.n = n, p->b.m = m;
00098     p->b.Q0 = tmalloc( realType, 2 * n + m + 6 * n * m );
00099     p->b.Q1 = p->b.Q0 + n;
00100     p->b.h = p->b.Q1 + n;
00101     p->b.uv = p->b.h + m;
00102     p->b.ov = p->b.uv + n * m;
00103     p->uvp = p->b.ov + n * m;
00104     p->uvn = p->uvp + n * m;
00105     p->ovp = p->uvn + n * m;
00106     p->ovn = p->ovp + n * m;
00107 }
00108 
00109 static void lob_bnd_ext_free( lob_bnd_ext* p )
00110 {
00111     free( p->b.Q0 );
00112 }
00113 
00114 static void lob_bnd_base_setup( lob_bnd_base* p, const realType* z, const realType* w )
00115 {
00116     unsigned  i, j, m = p->m, n = p->n, mm = 2 * m - 1;
00117     realType *q = tmalloc( realType, ( 2 * n + 1 ) * mm + 6 * n ), *J = q + mm, *D = J + n * mm, *work = D + n * mm;
00118     p->z = z;
00119     for( i = 0; i < n; ++i )
00120         p->Q0[ i ] = w[ i ] / 2, p->Q1[ i ] = 3 * p->Q0[ i ] * z[ i ];
00121     p->h[ 0 ] = -1, p->h[ m - 1 ] = 1;
00122     for( j = 1; j < m - 1; ++j )
00123         p->h[ j ] = mbcos( ( m - j - 1 ) * MOAB_POLY_PI / ( m - 1 ) );
00124     for( j = 0; j < m - 1; ++j )
00125         q[ 2 * j ] = p->h[ j ], q[ 2 * j + 1 ] = ( p->h[ j ] + p->h[ j + 1 ] ) / 2;
00126     q[ mm - 1 ] = p->h[ m - 1 ];
00127     lagrange_weights_deriv( z, n, q, mm, J, D, work );
00128     for( i = 0; i < n; ++i )
00129     {
00130         realType *uv = p->uv + i * m, *ov = p->ov + i * m;
00131         ov[ 0 ] = uv[ 0 ] = J[ i ];
00132         ov[ m - 1 ] = uv[ m - 1 ] = J[ ( mm - 1 ) * n + i ];
00133         for( j = 1; j < m - 1; ++j )
00134         {
00135             unsigned jj = 2 * j;
00136             realType c0 = J[ ( jj - 1 ) * n + i ] + ( q[ jj ] - q[ jj - 1 ] ) * D[ ( jj - 1 ) * n + i ];
00137             realType c1 = J[ ( jj + 1 ) * n + i ] + ( q[ jj ] - q[ jj + 1 ] ) * D[ ( jj + 1 ) * n + i ];
00138             realType c2 = J[ jj * n + i ];
00139             rminmax_3( &uv[ j ], &ov[ j ], c0, c1, c2 );
00140         }
00141     }
00142     free( q );
00143 }
00144 
00145 static void lob_bnd_ext_setup( lob_bnd_ext* p, const realType* z, const realType* w )
00146 {
00147     unsigned i, mn = p->b.m * p->b.n;
00148     lob_bnd_base_setup( &p->b, z, w );
00149     for( i = 0; i < mn; ++i )
00150     {
00151         realType uvi = p->b.uv[ i ], ovi = p->b.ov[ i ];
00152         p->uvp[ i ] = p->uvn[ i ] = p->ovp[ i ] = p->ovn[ i ] = 0;
00153         if( uvi > 0 )
00154             p->uvp[ i ] = uvi;
00155         else
00156             p->uvn[ i ] = uvi;
00157         if( ovi > 0 )
00158             p->ovp[ i ] = ovi;
00159         else
00160             p->ovn[ i ] = ovi;
00161     }
00162 }
00163 
00164 static void lob_bnd_lines( const lob_bnd_base* p, const realType* u, realType* a, realType* b )
00165 {
00166     unsigned        i, j;
00167     realType        a0 = 0, a1 = 0;
00168     const realType *uv = p->uv, *ov = p->ov;
00169     for( i = 0; i < p->n; ++i )
00170         a0 += p->Q0[ i ] * u[ i ], a1 += p->Q1[ i ] * u[ i ];
00171     for( j = 0; j < p->m; ++j )
00172         b[ j ] = a[ j ] = a0 + a1 * p->h[ j ];
00173     for( i = 0; i < p->n; ++i )
00174     {
00175         realType w = u[ i ] - ( a0 + a1 * p->z[ i ] );
00176         if( w >= 0 )
00177             for( j = 0; j < p->m; ++j )
00178                 a[ j ] += w * ( *uv++ ), b[ j ] += w * ( *ov++ );
00179         else
00180             for( j = 0; j < p->m; ++j )
00181                 a[ j ] += w * ( *ov++ ), b[ j ] += w * ( *uv++ );
00182     }
00183 }
00184 
00185 /* work holds p->m * 2 doubles */
00186 static void lob_bnd_1( const lob_bnd_base* p, const realType* u, realType bnd[ 2 ], realType* work )
00187 {
00188     unsigned  j;
00189     realType *a = work, *b = work + p->m;
00190     lob_bnd_lines( p, u, a, b );
00191     bnd[ 0 ] = a[ 0 ], bnd[ 1 ] = b[ 0 ];
00192     for( j = 1; j < p->m; ++j )
00193     {
00194         if( a[ j ] < bnd[ 0 ] ) bnd[ 0 ] = a[ j ];
00195         if( b[ j ] > bnd[ 1 ] ) bnd[ 1 ] = b[ j ];
00196     }
00197 }
00198 
00199 /* work holds 2*mr + 2*mr*ns + 2*mr*ms doubles */
00200 static void lob_bnd_2( const lob_bnd_base* pr, const lob_bnd_ext* ps, const realType* u, realType bnd[ 2 ],
00201                        realType* work )
00202 {
00203     unsigned  nr = pr->n, mr = pr->m, ns = ps->b.n, ms = ps->b.m;
00204     realType *a0 = work, *a1 = a0 + mr, *ar_ = a1 + mr, *ar = ar_, *br_ = ar + mr * ns, *br = br_, *a_ = br + mr * ns,
00205              *a = a_, *b_ = a + mr * ms, *b = b_, *uvp, *ovp, *uvn, *ovn;
00206     realType b0, b1;
00207     unsigned i, j, k;
00208     for( i = 0; i < mr; ++i )
00209         a0[ i ] = a1[ i ] = 0;
00210     for( j = 0; j < ns; ++j, u += nr )
00211     {
00212         realType q0 = ps->b.Q0[ j ], q1 = ps->b.Q1[ j ];
00213         lob_bnd_lines( pr, u, ar, br );
00214         for( i = 0; i < mr; ++i )
00215         {
00216             realType t = ( *ar++ + *br++ ) / 2;
00217             a0[ i ] += q0 * t, a1[ i ] += q1 * t;
00218         }
00219     }
00220     for( i = 0; i < mr; ++i )
00221     {
00222         realType a0i = a0[ i ], a1i = a1[ i ];
00223         for( k = 0; k < ms; ++k )
00224             *b++ = *a++ = a0i + a1i * ps->b.h[ k ];
00225     }
00226     ar = ar_, br = br_;
00227     uvp = ps->uvp, ovp = ps->ovp, uvn = ps->uvn, ovn = ps->ovn;
00228     for( j = 0; j < ns; ++j, uvp += ms, uvn += ms, ovp += ms, ovn += ms )
00229     {
00230         realType zj = ps->b.z[ j ];
00231         a = a_, b = b_;
00232         for( i = 0; i < mr; ++i )
00233         {
00234             realType t = a0[ i ] + a1[ i ] * zj;
00235             realType uw = *ar++ - t;
00236             realType ow = *br++ - t;
00237             if( uw >= 0 ) /* 0  <= uw <= ow */
00238                 for( k = 0; k < ms; ++k )
00239                     *a++ += uw * uvp[ k ] + ow * uvn[ k ], *b++ += ow * ovp[ k ] + uw * ovn[ k ];
00240             else if( ow <= 0 ) /* uw <= ow <= 0  */
00241                 for( k = 0; k < ms; ++k )
00242                     *a++ += uw * ovp[ k ] + ow * ovn[ k ], *b++ += ow * uvp[ k ] + uw * uvn[ k ];
00243             else /* uw <  0  <  ow */
00244                 for( k = 0; k < ms; ++k )
00245                     *a++ += uw * ovp[ k ] + ow * uvn[ k ], *b++ += ow * ovp[ k ] + uw * uvn[ k ];
00246         }
00247     }
00248     b0 = a_[ 0 ], b1 = b_[ 0 ];
00249     for( i = 1; i < mr * ms; ++i )
00250     {
00251         if( a_[ i ] < b0 ) b0 = a_[ i ];
00252         if( b_[ i ] > b1 ) b1 = b_[ i ];
00253     }
00254     bnd[ 0 ] = b0, bnd[ 1 ] = b1;
00255 }
00256 
00257 /*--------------------------------------------------------------------------
00258    Small Matrix Inverse
00259   --------------------------------------------------------------------------*/
00260 
00261 static void mat_inv_2( const realType A[ 4 ], realType inv[ 4 ] )
00262 {
00263     const realType idet = 1 / ( A[ 0 ] * A[ 3 ] - A[ 1 ] * A[ 2 ] );
00264     inv[ 0 ] = idet * A[ 3 ];
00265     inv[ 1 ] = -( idet * A[ 1 ] );
00266     inv[ 2 ] = -( idet * A[ 2 ] );
00267     inv[ 3 ] = idet * A[ 0 ];
00268 }
00269 
00270 static void mat_inv_3( const realType A[ 9 ], realType inv[ 9 ] )
00271 {
00272     const realType a = A[ 4 ] * A[ 8 ] - A[ 5 ] * A[ 7 ], b = A[ 5 ] * A[ 6 ] - A[ 3 ] * A[ 8 ],
00273                    c = A[ 3 ] * A[ 7 ] - A[ 4 ] * A[ 6 ], idet = 1 / ( A[ 0 ] * a + A[ 1 ] * b + A[ 2 ] * c );
00274     inv[ 0 ] = idet * a;
00275     inv[ 1 ] = idet * ( A[ 2 ] * A[ 7 ] - A[ 1 ] * A[ 8 ] );
00276     inv[ 2 ] = idet * ( A[ 1 ] * A[ 5 ] - A[ 2 ] * A[ 4 ] );
00277     inv[ 3 ] = idet * b;
00278     inv[ 4 ] = idet * ( A[ 0 ] * A[ 8 ] - A[ 2 ] * A[ 6 ] );
00279     inv[ 5 ] = idet * ( A[ 2 ] * A[ 3 ] - A[ 0 ] * A[ 5 ] );
00280     inv[ 6 ] = idet * c;
00281     inv[ 7 ] = idet * ( A[ 1 ] * A[ 6 ] - A[ 0 ] * A[ 7 ] );
00282     inv[ 8 ] = idet * ( A[ 0 ] * A[ 4 ] - A[ 1 ] * A[ 3 ] );
00283 }
00284 
00285 static void mat_app_2r( realType y[ 2 ], const realType A[ 4 ], const realType x[ 2 ] )
00286 {
00287     y[ 0 ] = A[ 0 ] * x[ 0 ] + A[ 1 ] * x[ 1 ];
00288     y[ 1 ] = A[ 2 ] * x[ 0 ] + A[ 3 ] * x[ 1 ];
00289 }
00290 
00291 static void mat_app_2c( realType y[ 2 ], const realType A[ 4 ], const realType x[ 2 ] )
00292 {
00293     y[ 0 ] = A[ 0 ] * x[ 0 ] + A[ 2 ] * x[ 1 ];
00294     y[ 1 ] = A[ 1 ] * x[ 0 ] + A[ 3 ] * x[ 1 ];
00295 }
00296 
00297 static void mat_app_3r( realType y[ 3 ], const realType A[ 9 ], const realType x[ 3 ] )
00298 {
00299     y[ 0 ] = A[ 0 ] * x[ 0 ] + A[ 1 ] * x[ 1 ] + A[ 2 ] * x[ 2 ];
00300     y[ 1 ] = A[ 3 ] * x[ 0 ] + A[ 4 ] * x[ 1 ] + A[ 5 ] * x[ 2 ];
00301     y[ 2 ] = A[ 6 ] * x[ 0 ] + A[ 7 ] * x[ 1 ] + A[ 8 ] * x[ 2 ];
00302 }
00303 
00304 static void mat_app_3c( realType y[ 3 ], const realType A[ 9 ], const realType x[ 3 ] )
00305 {
00306     y[ 0 ] = A[ 0 ] * x[ 0 ] + A[ 3 ] * x[ 1 ] + A[ 6 ] * x[ 2 ];
00307     y[ 1 ] = A[ 1 ] * x[ 0 ] + A[ 4 ] * x[ 1 ] + A[ 7 ] * x[ 2 ];
00308     y[ 2 ] = A[ 2 ] * x[ 0 ] + A[ 5 ] * x[ 1 ] + A[ 8 ] * x[ 2 ];
00309 }
00310 
00311 static void tinyla_solve_2( realType x[ 2 ], const realType A[ 4 ], const realType b[ 2 ] )
00312 {
00313     realType inv[ 4 ];
00314     mat_inv_2( A, inv );
00315     mat_app_2r( x, inv, b );
00316 }
00317 
00318 static void tinyla_solve_3( realType x[ 3 ], const realType A[ 9 ], const realType b[ 3 ] )
00319 {
00320     realType inv[ 9 ];
00321     mat_inv_3( A, inv );
00322     mat_app_3r( x, inv, b );
00323 }
00324 
00325 /* solve
00326    A[0] x0 + A[2] x1 = b0,
00327    A[2] x0 + A[1] x1 = b1
00328 */
00329 static void tinyla_solve_sym_2( realType* x0, realType* x1, const realType A[ 3 ], realType b0, realType b1 )
00330 {
00331     const realType idet = 1 / ( A[ 0 ] * A[ 1 ] - A[ 2 ] * A[ 2 ] );
00332     *x0 = idet * ( A[ 1 ] * b0 - A[ 2 ] * b1 );
00333     *x1 = idet * ( A[ 0 ] * b1 - A[ 2 ] * b0 );
00334 }
00335 
00336 /*--------------------------------------------------------------------------
00337    Oriented Bounding Box
00338 
00339    Suffixes on names are _2 for 2-d and _3 for 3-d
00340 
00341    Needed inputs are the Gauss-Lobatto quadrature nodes and weights:
00342      unsigned nr = ..., ns = ...;
00343      realType zr[nr], wr[nr];
00344      realType zs[ns], ws[ns];
00345 
00346      lobatto_nodes(zr,nr); lobatto_weights(zr,wr,nr);
00347      lobatto_nodes(zs,ns); lobatto_weights(zs,ws,ns);
00348 
00349    The number of points in the constructed piecewise (bi-)linear bounds
00350    for the boundaries is a parameter; more points give tighter bounds
00351 
00352      unsigned mr = 2*nr, ms = 2*ns;
00353 
00354    Bounding boxes are increased by a relative amount as a parameter
00355 
00356      realType tol = 0.01;
00357 
00358    Setup is accomplished via:
00359 
00360      const realType *z[2] = {zr,zs}, *w[2] = {wr,ws};
00361      const unsigned n[2] = {nr,ns}, m[2] = {mr,ms};
00362      obbox_data_2 *data = obbox_setup_2(z,w,n,m);
00363 
00364    Bounding box data may then be computed:
00365 
00366      obbox_2 box;                 // will store bounding box information
00367      realType xm[ns][nr], ym[ns][nr]; // x, y coordinates of the element nodes
00368 
00369      obbox_calc_2(data, tol, (const realType *)&xm[0][0],
00370                              (const realType *)&ym[0][0], &box);
00371 
00372    A point may be tested:
00373 
00374      const realType x[2]; // point to test
00375      realType r[2];
00376 
00377      if( obbox_axis_test_2(&box, x) )
00378        ... // x failed axis-aligned bounding box test
00379 
00380      if( obbox_test_2(&box, x, r) )
00381        ... // x failed oriented bounding box test
00382      else
00383        ... // r suitable as initial guess for parametric coords
00384 
00385    Once all bounding box information has been computed
00386 
00387      obbox_free_2(data);
00388 
00389    to free the memory allocated with obbox_setup_2.
00390 
00391   --------------------------------------------------------------------------*/
00392 
00393 typedef struct
00394 {
00395     lob_bnd_base dr, ds;
00396     realType *   Jr0, *Dr0, *Js0, *Ds0, *work;
00397 } obbox_data_2;
00398 
00399 typedef struct
00400 {
00401     lob_bnd_base dr;
00402     lob_bnd_ext  ds, dt;
00403     realType *   Jr0, *Dr0, *Js0, *Ds0, *Jt0, *Dt0, *work;
00404 } obbox_data_3;
00405 
00406 static void obbox_data_alloc_2( obbox_data_2* p, const unsigned n[ 2 ], const unsigned m[ 2 ] )
00407 {
00408     const unsigned max_npm = umax_2( n[ 0 ] + m[ 0 ], n[ 1 ] + m[ 1 ] );
00409     lob_bnd_base_alloc( &p->dr, n[ 0 ], m[ 0 ] );
00410     lob_bnd_base_alloc( &p->ds, n[ 1 ], m[ 1 ] );
00411     p->Jr0 = tmalloc( realType, 2 * n[ 0 ] + 2 * n[ 1 ] + 2 * max_npm );
00412     p->Dr0 = p->Jr0 + n[ 0 ];
00413     p->Js0 = p->Dr0 + n[ 0 ];
00414     p->Ds0 = p->Js0 + n[ 1 ];
00415     p->work = p->Ds0 + n[ 1 ];
00416 }
00417 
00418 static void obbox_data_free_2( obbox_data_2* p )
00419 {
00420     lob_bnd_base_free( &p->dr );
00421     lob_bnd_base_free( &p->ds );
00422     free( p->Jr0 );
00423 }
00424 
00425 static void obbox_data_alloc_3( obbox_data_3* p, const unsigned n[ 3 ], const unsigned m[ 3 ] )
00426 {
00427     const unsigned wk1 = 3 * n[ 0 ] * n[ 1 ] + 2 * m[ 0 ] + 2 * m[ 0 ] * n[ 1 ] + 2 * m[ 0 ] * m[ 1 ];
00428     const unsigned wk2 = 3 * n[ 0 ] * n[ 2 ] + 2 * m[ 0 ] + 2 * m[ 0 ] * n[ 2 ] + 2 * m[ 0 ] * m[ 2 ];
00429     const unsigned wk3 = 3 * n[ 1 ] * n[ 2 ] + 2 * m[ 1 ] + 2 * m[ 1 ] * n[ 2 ] + 2 * m[ 1 ] * m[ 2 ];
00430     const unsigned wk_max = umax_3( wk1, wk2, wk3 );
00431     lob_bnd_base_alloc( &p->dr, n[ 0 ], m[ 0 ] );
00432     lob_bnd_ext_alloc( &p->ds, n[ 1 ], m[ 1 ] );
00433     lob_bnd_ext_alloc( &p->dt, n[ 2 ], m[ 2 ] );
00434     p->Jr0 = tmalloc( realType, 2 * n[ 0 ] + 2 * n[ 1 ] + 2 * n[ 2 ] + wk_max );
00435     p->Dr0 = p->Jr0 + n[ 0 ];
00436     p->Js0 = p->Dr0 + n[ 0 ];
00437     p->Ds0 = p->Js0 + n[ 1 ];
00438     p->Jt0 = p->Ds0 + n[ 1 ];
00439     p->Dt0 = p->Jt0 + n[ 2 ];
00440     p->work = p->Dt0 + n[ 2 ];
00441 }
00442 
00443 static void obbox_data_free_3( obbox_data_3* p )
00444 {
00445     lob_bnd_base_free( &p->dr );
00446     lob_bnd_ext_free( &p->ds );
00447     lob_bnd_ext_free( &p->dt );
00448     free( p->Jr0 );
00449 }
00450 
00451 static obbox_data_2* obbox_setup_2( const realType* const z[ 2 ], const realType* const w[ 2 ], const unsigned n[ 2 ],
00452                                     const unsigned m[ 2 ] )
00453 {
00454     const realType zero = 0;
00455     realType*      work;
00456     obbox_data_2*  p = tmalloc( obbox_data_2, 1 );
00457     obbox_data_alloc_2( p, n, m );
00458     lob_bnd_base_setup( &p->dr, z[ 0 ], w[ 0 ] );
00459     lob_bnd_base_setup( &p->ds, z[ 1 ], w[ 1 ] );
00460     work = tmalloc( realType, 6 * umax_2( n[ 0 ], n[ 1 ] ) );
00461     lagrange_weights_deriv( z[ 0 ], n[ 0 ], &zero, 1, p->Jr0, p->Dr0, work );
00462     lagrange_weights_deriv( z[ 1 ], n[ 1 ], &zero, 1, p->Js0, p->Ds0, work );
00463     free( work );
00464     return p;
00465 }
00466 
00467 static obbox_data_3* obbox_setup_3( const realType* const z[ 3 ], const realType* const w[ 3 ], const unsigned n[ 3 ],
00468                                     const unsigned m[ 3 ] )
00469 {
00470     const realType zero = 0;
00471     realType*      work;
00472     obbox_data_3*  p = tmalloc( obbox_data_3, 1 );
00473     obbox_data_alloc_3( p, n, m );
00474     lob_bnd_base_setup( &p->dr, z[ 0 ], w[ 0 ] );
00475     lob_bnd_ext_setup( &p->ds, z[ 1 ], w[ 1 ] );
00476     lob_bnd_ext_setup( &p->dt, z[ 2 ], w[ 2 ] );
00477     work = tmalloc( realType, 6 * umax_3( n[ 0 ], n[ 1 ], n[ 2 ] ) );
00478     lagrange_weights_deriv( z[ 0 ], n[ 0 ], &zero, 1, p->Jr0, p->Dr0, work );
00479     lagrange_weights_deriv( z[ 1 ], n[ 1 ], &zero, 1, p->Js0, p->Ds0, work );
00480     lagrange_weights_deriv( z[ 2 ], n[ 2 ], &zero, 1, p->Jt0, p->Dt0, work );
00481     free( work );
00482     return p;
00483 }
00484 
00485 static void obbox_free_2( obbox_data_2* p )
00486 {
00487     obbox_data_free_2( p );
00488     free( p );
00489 }
00490 
00491 static void obbox_free_3( obbox_data_3* p )
00492 {
00493     obbox_data_free_3( p );
00494     free( p );
00495 }
00496 
00497 int obbox_axis_test_2( const obbox_2* p, const realType x[ 2 ] )
00498 {
00499     return ( x[ 0 ] < p->axis_bnd[ 0 ] || x[ 0 ] > p->axis_bnd[ 1 ] || x[ 1 ] < p->axis_bnd[ 2 ] ||
00500              x[ 1 ] > p->axis_bnd[ 3 ] );
00501 }
00502 
00503 int obbox_axis_test_3( const obbox_3* p, const realType x[ 3 ] )
00504 {
00505     return ( x[ 0 ] < p->axis_bnd[ 0 ] || x[ 0 ] > p->axis_bnd[ 1 ] || x[ 1 ] < p->axis_bnd[ 2 ] ||
00506              x[ 1 ] > p->axis_bnd[ 3 ] || x[ 2 ] < p->axis_bnd[ 4 ] || x[ 2 ] > p->axis_bnd[ 5 ] );
00507 }
00508 
00509 int obbox_test_2( const obbox_2* p, const realType x[ 2 ], realType r[ 2 ] )
00510 {
00511     realType xt[ 2 ];
00512     xt[ 0 ] = x[ 0 ] - p->x[ 0 ];
00513     xt[ 1 ] = x[ 1 ] - p->x[ 1 ];
00514     r[ 0 ] = p->A[ 0 ] * xt[ 0 ] + p->A[ 1 ] * xt[ 1 ];
00515     if( mbabs( r[ 0 ] ) > 1 ) return 1;
00516     r[ 1 ] = p->A[ 2 ] * xt[ 0 ] + p->A[ 3 ] * xt[ 1 ];
00517     return mbabs( r[ 1 ] ) > 1;
00518 }
00519 
00520 int obbox_test_3( const obbox_3* p, const realType x[ 3 ], realType r[ 3 ] )
00521 {
00522     realType xt[ 3 ];
00523     xt[ 0 ] = x[ 0 ] - p->x[ 0 ];
00524     xt[ 1 ] = x[ 1 ] - p->x[ 1 ];
00525     xt[ 2 ] = x[ 2 ] - p->x[ 2 ];
00526     r[ 0 ] = p->A[ 0 ] * xt[ 0 ] + p->A[ 1 ] * xt[ 1 ] + p->A[ 2 ] * xt[ 2 ];
00527     if( mbabs( r[ 0 ] ) > 1 ) return 1;
00528     r[ 1 ] = p->A[ 3 ] * xt[ 0 ] + p->A[ 4 ] * xt[ 1 ] + p->A[ 5 ] * xt[ 2 ];
00529     if( mbabs( r[ 1 ] ) > 1 ) return 1;
00530     r[ 2 ] = p->A[ 6 ] * xt[ 0 ] + p->A[ 7 ] * xt[ 1 ] + p->A[ 8 ] * xt[ 2 ];
00531     return mbabs( r[ 2 ] ) > 1;
00532 }
00533 
00534 void obbox_calc_tfm_2( const realType* x, const realType* y, unsigned n, unsigned s, const realType c0[ 2 ],
00535                        const realType A[ 4 ], realType* u )
00536 {
00537     unsigned  i;
00538     realType* v = u + n;
00539     for( i = 0; i < n; ++i, x += s, y += s )
00540     {
00541         const realType xt = *x - c0[ 0 ], yt = *y - c0[ 1 ];
00542         u[ i ] = A[ 0 ] * xt + A[ 1 ] * yt;
00543         v[ i ] = A[ 2 ] * xt + A[ 3 ] * yt;
00544     }
00545 }
00546 
00547 void obbox_calc_tfm_3( const realType* x, const realType* y, const realType* z, unsigned nr, unsigned sr, unsigned ns,
00548                        unsigned ss, const realType c0[ 3 ], const realType A[ 9 ], realType* u )
00549 {
00550     unsigned  i, j;
00551     realType *v = u + nr * ns, *w = v + nr * ns;
00552     for( j = 0; j < ns; ++j, x += ss, y += ss, z += ss )
00553     {
00554         for( i = 0; i < nr; ++i, x += sr, y += sr, z += sr )
00555         {
00556             const realType xt = *x - c0[ 0 ], yt = *y - c0[ 1 ], zt = *z - c0[ 2 ];
00557             *u++ = A[ 0 ] * xt + A[ 1 ] * yt + A[ 2 ] * zt;
00558             *v++ = A[ 3 ] * xt + A[ 4 ] * yt + A[ 5 ] * zt;
00559             *w++ = A[ 6 ] * xt + A[ 7 ] * yt + A[ 8 ] * zt;
00560         }
00561     }
00562 }
00563 
00564 void obbox_merge_2( realType* b, const realType* ob )
00565 {
00566     if( ob[ 0 ] < b[ 0 ] ) b[ 0 ] = ob[ 0 ];
00567     if( ob[ 1 ] > b[ 1 ] ) b[ 1 ] = ob[ 1 ];
00568     if( ob[ 2 ] < b[ 2 ] ) b[ 2 ] = ob[ 2 ];
00569     if( ob[ 3 ] > b[ 3 ] ) b[ 3 ] = ob[ 3 ];
00570 }
00571 
00572 void obbox_merge_3( realType* b, const realType* ob )
00573 {
00574     if( ob[ 0 ] < b[ 0 ] ) b[ 0 ] = ob[ 0 ];
00575     if( ob[ 1 ] > b[ 1 ] ) b[ 1 ] = ob[ 1 ];
00576     if( ob[ 2 ] < b[ 2 ] ) b[ 2 ] = ob[ 2 ];
00577     if( ob[ 3 ] > b[ 3 ] ) b[ 3 ] = ob[ 3 ];
00578     if( ob[ 4 ] < b[ 4 ] ) b[ 4 ] = ob[ 4 ];
00579     if( ob[ 5 ] > b[ 5 ] ) b[ 5 ] = ob[ 5 ];
00580 }
00581 
00582 /* work holds 2*n + 2*m realTypes */
00583 void obbox_side_2( const realType* x, const realType* y, unsigned n, unsigned s, const realType c0[ 2 ],
00584                    const realType A[ 4 ], realType* work, const lob_bnd_base* lbd, realType bnd[ 4 ] )
00585 {
00586     obbox_calc_tfm_2( x, y, n, s, c0, A, work );
00587     lob_bnd_1( lbd, work, bnd, work + 2 * n );
00588     lob_bnd_1( lbd, work + n, bnd + 2, work + 2 * n );
00589 }
00590 
00591 /* work holds 3*nr*ns + 2*mr + 2*mr*ns + 2*mr*ms realTypes */
00592 void obbox_side_3( const realType* x, const realType* y, const realType* z, unsigned nr, unsigned sr, unsigned ns,
00593                    unsigned ss, const realType c0[ 3 ], const realType A[ 9 ], realType* work, const lob_bnd_base* dr,
00594                    const lob_bnd_ext* ds, realType bnd[ 6 ] )
00595 {
00596     obbox_calc_tfm_3( x, y, z, nr, sr, ns, ss, c0, A, work );
00597     lob_bnd_2( dr, ds, work, bnd, work + 3 * nr * ns );
00598     lob_bnd_2( dr, ds, work + nr * ns, bnd + 2, work + 3 * nr * ns );
00599     lob_bnd_2( dr, ds, work + 2 * nr * ns, bnd + 4, work + 3 * nr * ns );
00600 }
00601 
00602 /* return bounds on u = A (x - c0)
00603    bnd[0] <= u_0 <= bnd[1]
00604    bnd[2] <= u_1 <= bnd[3] */
00605 void obbox_bnd_2( const obbox_data_2* p, const realType* x, const realType* y, const realType c0[ 2 ],
00606                   const realType A[ 4 ], realType bnd[ 4 ] )
00607 {
00608     unsigned i, nr = p->dr.n, ns = p->ds.n;
00609     realType obnd[ 4 ];
00610 
00611     i = nr * ( ns - 1 );
00612     obbox_side_2( x, y, nr, 1, c0, A, p->work, &p->dr, bnd );
00613     obbox_side_2( x + i, y + i, nr, 1, c0, A, p->work, &p->dr, obnd );
00614     obbox_merge_2( bnd, obnd );
00615 
00616     i = nr - 1;
00617     obbox_side_2( x, y, ns, nr, c0, A, p->work, &p->ds, obnd );
00618     obbox_merge_2( bnd, obnd );
00619     obbox_side_2( x + i, y + i, nr, nr, c0, A, p->work, &p->ds, obnd );
00620     obbox_merge_2( bnd, obnd );
00621 }
00622 
00623 /* return bounds on u = A (x - c0)
00624    bnd[0] <= u_0 <= bnd[1]
00625    bnd[2] <= u_1 <= bnd[3]
00626    bnd[4] <= u_2 <= bnd[5] */
00627 void obbox_bnd_3( const obbox_data_3* p, const realType* x, const realType* y, const realType* z,
00628                   const realType c0[ 3 ], const realType A[ 9 ], realType bnd[ 6 ] )
00629 {
00630     unsigned i, nr = p->dr.n, ns = p->ds.b.n, nt = p->dt.b.n;
00631     realType obnd[ 6 ];
00632 
00633     i = nr * ns * ( nt - 1 );
00634     obbox_side_3( x, y, z, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, bnd );
00635     obbox_side_3( x + i, y + i, z + i, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, obnd );
00636     obbox_merge_3( bnd, obnd );
00637 
00638     i = nr * ( ns - 1 );
00639     obbox_side_3( x, y, z, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd );
00640     obbox_merge_3( bnd, obnd );
00641     obbox_side_3( x + i, y + i, z + i, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd );
00642     obbox_merge_3( bnd, obnd );
00643 
00644     i = nr - 1;
00645     obbox_side_3( x, y, z, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd );
00646     obbox_merge_3( bnd, obnd );
00647     obbox_side_3( x + i, y + i, z + i, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd );
00648     obbox_merge_3( bnd, obnd );
00649 }
00650 
00651 void obbox_calc_2( const obbox_data_2* p, realType tol, const realType* x, const realType* y, obbox_2* b )
00652 {
00653     const realType zero[ 2 ] = { 0, 0 }, id[ 4 ] = { 1, 0, 0, 1 };
00654     realType       c0[ 2 ], jac[ 4 ], inv[ 4 ], bnd[ 4 ], u0[ 2 ], d[ 2 ];
00655 
00656     obbox_bnd_2( p, x, y, zero, id, b->axis_bnd );
00657     d[ 0 ] = b->axis_bnd[ 1 ] - b->axis_bnd[ 0 ];
00658     d[ 1 ] = b->axis_bnd[ 3 ] - b->axis_bnd[ 2 ];
00659     b->axis_bnd[ 0 ] -= tol * d[ 0 ], b->axis_bnd[ 1 ] += tol * d[ 0 ];
00660     b->axis_bnd[ 2 ] -= tol * d[ 1 ], b->axis_bnd[ 3 ] += tol * d[ 1 ];
00661 
00662     c0[ 0 ] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, x, jac, p->work );
00663     c0[ 1 ] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, y, jac + 2, p->work );
00664     mat_inv_2( jac, inv );
00665 
00666     obbox_bnd_2( p, x, y, c0, inv, bnd );
00667 
00668     u0[ 0 ] = ( bnd[ 0 ] + bnd[ 1 ] ) / 2;
00669     u0[ 1 ] = ( bnd[ 2 ] + bnd[ 3 ] ) / 2;
00670     d[ 0 ] = 2 / ( ( 1 + tol ) * ( bnd[ 1 ] - bnd[ 0 ] ) );
00671     d[ 1 ] = 2 / ( ( 1 + tol ) * ( bnd[ 3 ] - bnd[ 2 ] ) );
00672     b->x[ 0 ] = c0[ 0 ] + jac[ 0 ] * u0[ 0 ] + jac[ 1 ] * u0[ 1 ];
00673     b->x[ 1 ] = c0[ 1 ] + jac[ 2 ] * u0[ 0 ] + jac[ 3 ] * u0[ 1 ];
00674     b->A[ 0 ] = d[ 0 ] * inv[ 0 ], b->A[ 1 ] = d[ 0 ] * inv[ 1 ];
00675     b->A[ 2 ] = d[ 1 ] * inv[ 2 ], b->A[ 3 ] = d[ 1 ] * inv[ 3 ];
00676 }
00677 
00678 void obbox_calc_3( const obbox_data_3* p, realType tol, const realType* x, const realType* y, const realType* z,
00679                    obbox_3* b )
00680 {
00681     const realType zero[ 3 ] = { 0, 0 }, id[ 9 ] = { 1, 0, 0, 0, 1, 0, 0, 0, 1 };
00682     realType       c0[ 3 ], jac[ 9 ], inv[ 9 ], bnd[ 6 ], u0[ 3 ], d[ 3 ];
00683 
00684     obbox_bnd_3( p, x, y, z, zero, id, b->axis_bnd );
00685     d[ 0 ] = b->axis_bnd[ 1 ] - b->axis_bnd[ 0 ];
00686     d[ 1 ] = b->axis_bnd[ 3 ] - b->axis_bnd[ 2 ];
00687     d[ 2 ] = b->axis_bnd[ 5 ] - b->axis_bnd[ 4 ];
00688     b->axis_bnd[ 0 ] -= tol * d[ 0 ], b->axis_bnd[ 1 ] += tol * d[ 0 ];
00689     b->axis_bnd[ 2 ] -= tol * d[ 1 ], b->axis_bnd[ 3 ] += tol * d[ 1 ];
00690     b->axis_bnd[ 4 ] -= tol * d[ 2 ], b->axis_bnd[ 5 ] += tol * d[ 2 ];
00691 
00692     c0[ 0 ] =
00693         tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, x, jac, p->work );
00694     c0[ 1 ] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, y, jac + 3,
00695                           p->work );
00696     c0[ 2 ] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, z, jac + 6,
00697                           p->work );
00698     mat_inv_3( jac, inv );
00699 
00700     obbox_bnd_3( p, x, y, z, c0, inv, bnd );
00701 
00702     u0[ 0 ] = ( bnd[ 0 ] + bnd[ 1 ] ) / 2;
00703     u0[ 1 ] = ( bnd[ 2 ] + bnd[ 3 ] ) / 2;
00704     u0[ 2 ] = ( bnd[ 4 ] + bnd[ 5 ] ) / 2;
00705     d[ 0 ] = 2 / ( ( 1 + tol ) * ( bnd[ 1 ] - bnd[ 0 ] ) );
00706     d[ 1 ] = 2 / ( ( 1 + tol ) * ( bnd[ 3 ] - bnd[ 2 ] ) );
00707     d[ 2 ] = 2 / ( ( 1 + tol ) * ( bnd[ 5 ] - bnd[ 4 ] ) );
00708     b->x[ 0 ] = c0[ 0 ] + jac[ 0 ] * u0[ 0 ] + jac[ 1 ] * u0[ 1 ] + jac[ 2 ] * u0[ 2 ];
00709     b->x[ 1 ] = c0[ 1 ] + jac[ 3 ] * u0[ 0 ] + jac[ 4 ] * u0[ 1 ] + jac[ 5 ] * u0[ 2 ];
00710     b->x[ 2 ] = c0[ 2 ] + jac[ 6 ] * u0[ 0 ] + jac[ 7 ] * u0[ 1 ] + jac[ 8 ] * u0[ 2 ];
00711     b->A[ 0 ] = d[ 0 ] * inv[ 0 ], b->A[ 1 ] = d[ 0 ] * inv[ 1 ], b->A[ 2 ] = d[ 0 ] * inv[ 2 ];
00712     b->A[ 3 ] = d[ 1 ] * inv[ 3 ], b->A[ 4 ] = d[ 1 ] * inv[ 4 ], b->A[ 5 ] = d[ 1 ] * inv[ 5 ];
00713     b->A[ 6 ] = d[ 2 ] * inv[ 6 ], b->A[ 7 ] = d[ 2 ] * inv[ 7 ], b->A[ 8 ] = d[ 2 ] * inv[ 8 ];
00714 }
00715 
00716 /*--------------------------------------------------------------------------
00717    Point to Possible Elements Hashing
00718 
00719    Initializing the data:
00720      unsigned nel;        // number of elements
00721      const unsigned n[3]; // number of nodes in r, s, t directions
00722      const realType *xm[3];   // n[0]*n[1]*n[2]*nel x,y,z coordinates
00723      realType tol = 0.01;     // how far point is allowed to be outside element
00724                           //   relative to element size
00725      unsigned max_size = n[0]*n[1]*n[2]*nel; // maximum size of hash table
00726 
00727      hash_data_3 data;
00728      hash_build_3(&data, xm, n, nel, max_size, tol);
00729 
00730    Using the data:
00731      realType x[3];   // point to find
00732 
00733      unsigned index = hash_index_3(&data, x);
00734      unsigned i, b = data.offset[index], e = data.offset[index+1];
00735 
00736      // point may be in elements
00737      //   data.offset[b], data.offset[b+1], ... , data.offset[e-1]
00738      //
00739      // list has maximum size data.max (e.g., e-b <= data.max)
00740 
00741      for(i=b; i!=e; ++i) {
00742        unsigned el = data.offset[i];
00743        const obbox_3 *obb = &data.obb[el]; // bounding box data for element el
00744        ...
00745      }
00746 
00747    When done:
00748      hash_free_3(&data);
00749 
00750   --------------------------------------------------------------------------*/
00751 
00752 static int ifloor( realType x )
00753 {
00754     /*
00755     int y = x;
00756     return (double)y > x ? y-1 : y;
00757     */
00758     return mbfloor( x );
00759 }
00760 
00761 static int iceil( realType x )
00762 {
00763     /*
00764     int y = x;
00765     return (double)y < x ? y+1 : y;
00766     */
00767     return mbceil( x );
00768 }
00769 
00770 static unsigned hash_index_helper( realType low, realType fac, unsigned n, realType x )
00771 {
00772     const int i = ifloor( ( x - low ) * fac );
00773     if( i < 0 ) return 0;
00774     return umin_2( i, n - 1 );
00775 }
00776 
00777 static uint hash_index_2( const hash_data_2* p, const realType x[ 2 ] )
00778 {
00779     const unsigned n = p->hash_n;
00780     return (uint)hash_index_helper( p->bnd[ 2 ], p->fac[ 1 ], n, x[ 1 ] ) * n +
00781            hash_index_helper( p->bnd[ 0 ], p->fac[ 0 ], n, x[ 0 ] );
00782 }
00783 
00784 static uint hash_index_3( const hash_data_3* p, const realType x[ 3 ] )
00785 {
00786     const unsigned n = p->hash_n;
00787     return ( (uint)hash_index_helper( p->bnd[ 4 ], p->fac[ 2 ], n, x[ 2 ] ) * n +
00788              hash_index_helper( p->bnd[ 2 ], p->fac[ 1 ], n, x[ 1 ] ) ) *
00789                n +
00790            hash_index_helper( p->bnd[ 0 ], p->fac[ 0 ], n, x[ 0 ] );
00791 }
00792 
00793 static void hash_setfac_2( hash_data_2* p, unsigned n )
00794 {
00795     p->hash_n = n;
00796     p->fac[ 0 ] = n / ( p->bnd[ 1 ] - p->bnd[ 0 ] );
00797     p->fac[ 1 ] = n / ( p->bnd[ 3 ] - p->bnd[ 2 ] );
00798 }
00799 
00800 static void hash_setfac_3( hash_data_3* p, unsigned n )
00801 {
00802     p->hash_n = n;
00803     p->fac[ 0 ] = n / ( p->bnd[ 1 ] - p->bnd[ 0 ] );
00804     p->fac[ 1 ] = n / ( p->bnd[ 3 ] - p->bnd[ 2 ] );
00805     p->fac[ 2 ] = n / ( p->bnd[ 5 ] - p->bnd[ 4 ] );
00806 }
00807 
00808 static void hash_range_2( const hash_data_2* p, uint i, unsigned d, unsigned* ia, unsigned* ib )
00809 {
00810     const realType a = p->obb[ i ].axis_bnd[ d * 2 ];
00811     const realType b = p->obb[ i ].axis_bnd[ d * 2 + 1 ];
00812     const int      i0 = ifloor( ( a - p->bnd[ d * 2 ] ) * p->fac[ d ] );
00813     const unsigned i1 = iceil( ( b - p->bnd[ d * 2 ] ) * p->fac[ d ] );
00814     *ia = imax_2( 0, i0 );
00815     *ib = imin_2( i1, p->hash_n );
00816     if( *ib == *ia ) ++( *ib );
00817 }
00818 
00819 static void hash_range_3( const hash_data_3* p, uint i, unsigned d, unsigned* ia, unsigned* ib )
00820 {
00821     const realType a = p->obb[ i ].axis_bnd[ d * 2 ];
00822     const realType b = p->obb[ i ].axis_bnd[ d * 2 + 1 ];
00823     const int      i0 = ifloor( ( a - p->bnd[ d * 2 ] ) * p->fac[ d ] );
00824     const unsigned i1 = iceil( ( b - p->bnd[ d * 2 ] ) * p->fac[ d ] );
00825     *ia = imax_2( 0, i0 );
00826     *ib = imin_2( i1, p->hash_n );
00827     if( *ib == *ia ) ++( *ib );
00828 }
00829 
00830 static uint hash_count_2( hash_data_2* p, uint nel, unsigned n )
00831 {
00832     uint i, count = 0;
00833     hash_setfac_2( p, n );
00834     for( i = 0; i < nel; ++i )
00835     {
00836         unsigned ia, ib;
00837         uint     ci;
00838         hash_range_2( p, i, 0, &ia, &ib );
00839         ci = ib - ia;
00840         hash_range_2( p, i, 1, &ia, &ib );
00841         ci *= ib - ia;
00842         count += ci;
00843     }
00844     return count;
00845 }
00846 
00847 static uint hash_count_3( hash_data_3* p, uint nel, unsigned n )
00848 {
00849     uint i, count = 0;
00850     hash_setfac_3( p, n );
00851     for( i = 0; i < nel; ++i )
00852     {
00853         unsigned ia, ib;
00854         uint     ci;
00855         hash_range_3( p, i, 0, &ia, &ib );
00856         ci = ib - ia;
00857         hash_range_3( p, i, 1, &ia, &ib );
00858         ci *= ib - ia;
00859         hash_range_3( p, i, 2, &ia, &ib );
00860         ci *= ib - ia;
00861         count += ci;
00862     }
00863     return count;
00864 }
00865 
00866 static uint hash_opt_size_2( hash_data_2* p, uint nel, uint max_size )
00867 {
00868     unsigned nl = 1, nu = ceil( sqrt( max_size - nel ) );
00869     uint     size_low = 2 + nel;
00870     while( nu - nl > 1 )
00871     {
00872         unsigned nm = nl + ( nu - nl ) / 2;
00873         uint     size = (uint)nm * nm + 1 + hash_count_2( p, nel, nm );
00874         if( size <= max_size )
00875             nl = nm, size_low = size;
00876         else
00877             nu = nm;
00878     }
00879     hash_setfac_2( p, nl );
00880     return size_low;
00881 }
00882 
00883 static uint hash_opt_size_3( hash_data_3* p, uint nel, uint max_size )
00884 {
00885     unsigned nl = 1, nu = ceil( pow( max_size - nel, 1.0 / 3 ) );
00886     uint     size_low = 2 + nel;
00887     while( nu - nl > 1 )
00888     {
00889         unsigned nm = nl + ( nu - nl ) / 2;
00890         uint     size = (uint)nm * nm * nm + 1 + hash_count_3( p, nel, nm );
00891         if( size <= max_size )
00892             nl = nm, size_low = size;
00893         else
00894             nu = nm;
00895     }
00896     hash_setfac_3( p, nl );
00897     return size_low;
00898 }
00899 
00900 static void hash_getbb_2( hash_data_2* p, const realType* const elx[ 2 ], const unsigned n[ 2 ], uint nel,
00901                           realType tol )
00902 {
00903     obbox_data_2*   data;
00904     realType *      z[ 2 ], *w[ 2 ];
00905     uint            i;
00906     unsigned        d;
00907     const unsigned  nn = n[ 0 ] * n[ 1 ];
00908     unsigned int    m[ 2 ];
00909     const realType* x[ 2 ];
00910 
00911     for( i = 0; i < 2; ++i )
00912     {
00913         x[ i ] = elx[ i ];
00914         m[ i ] = 2 * n[ i ];
00915     }
00916 
00917     z[ 0 ] = tmalloc( realType, 2 * ( n[ 0 ] + n[ 1 ] ) );
00918     w[ 0 ] = z[ 0 ] + n[ 0 ];
00919     z[ 1 ] = w[ 0 ] + n[ 0 ], w[ 1 ] = z[ 1 ] + n[ 1 ];
00920     for( d = 0; d < 2; ++d )
00921         lobatto_nodes( z[ d ], n[ d ] ), lobatto_weights( z[ d ], w[ d ], n[ d ] );
00922     data = obbox_setup_2( (const realType* const*)z, (const realType* const*)w, n, m );
00923     obbox_calc_2( data, tol, x[ 0 ], x[ 1 ], &p->obb[ 0 ] );
00924     memcpy( &p->bnd[ 0 ], (const realType*)&p->obb[ 0 ].axis_bnd[ 0 ], 4 * sizeof( realType ) );
00925     for( i = 0; i < nel; ++i, x[ 0 ] += nn, x[ 1 ] += nn )
00926     {
00927         obbox_calc_2( data, tol, x[ 0 ], x[ 1 ], &p->obb[ i ] );
00928         obbox_merge_2( &p->bnd[ 0 ], (const realType*)&p->obb[ i ].axis_bnd[ 0 ] );
00929     }
00930     obbox_free_2( data );
00931     free( z[ 0 ] );
00932 }
00933 
00934 static void hash_getbb_3( hash_data_3* p, const realType* const elx[ 3 ], const unsigned n[ 3 ], uint nel,
00935                           realType tol )
00936 {
00937     obbox_data_3*   data;
00938     const realType* x[ 3 ];
00939     realType *      z[ 3 ], *w[ 3 ];
00940     uint            i;
00941     unsigned        d;
00942     const unsigned  nn = n[ 0 ] * n[ 1 ] * n[ 2 ];
00943     unsigned int    m[ 3 ];
00944 
00945     for( i = 0; i < 3; ++i )
00946     {
00947         x[ i ] = elx[ i ];
00948         m[ i ] = 2 * n[ i ];
00949     }
00950 
00951     z[ 0 ] = tmalloc( realType, 2 * ( n[ 0 ] + n[ 1 ] + n[ 2 ] ) );
00952     w[ 0 ] = z[ 0 ] + n[ 0 ];
00953     for( d = 1; d < 3; ++d )
00954         z[ d ] = w[ d - 1 ] + n[ d - 1 ], w[ d ] = z[ d ] + n[ d ];
00955     for( d = 0; d < 3; ++d )
00956         lobatto_nodes( z[ d ], n[ d ] ), lobatto_weights( z[ d ], w[ d ], n[ d ] );
00957     data = obbox_setup_3( (const realType* const*)z, (const realType* const*)w, n, m );
00958     obbox_calc_3( data, tol, x[ 0 ], x[ 1 ], x[ 2 ], &p->obb[ 0 ] );
00959     memcpy( &p->bnd[ 0 ], (const realType*)&p->obb[ 0 ].axis_bnd[ 0 ], 6 * sizeof( realType ) );
00960     for( i = 0; i < nel; ++i, x[ 0 ] += nn, x[ 1 ] += nn, x[ 2 ] += nn )
00961     {
00962         obbox_calc_3( data, tol, x[ 0 ], x[ 1 ], x[ 2 ], &p->obb[ i ] );
00963         obbox_merge_3( &p->bnd[ 0 ], (const realType*)&p->obb[ i ].axis_bnd[ 0 ] );
00964     }
00965     obbox_free_3( data );
00966     free( z[ 0 ] );
00967 }
00968 
00969 static void hash_build_2( hash_data_2* p, const realType* const x[ 2 ], const unsigned n[ 2 ], uint nel,
00970                           uint max_hash_size, realType tol )
00971 {
00972     uint      i, el, size, hn2, sum;
00973     unsigned  hn;
00974     unsigned* count;
00975     p->obb = tmalloc( obbox_2, nel );
00976     hash_getbb_2( p, x, n, nel, tol );
00977     size = hash_opt_size_2( p, nel, max_hash_size );
00978     p->offset = tmalloc( uint, size );
00979     hn = p->hash_n;
00980     hn2 = (uint)hn * hn;
00981     count = tcalloc( unsigned, hn2 );
00982     for( el = 0; el < nel; ++el )
00983     {
00984         unsigned ia, ib, j, ja, jb;
00985         hash_range_2( p, el, 0, &ia, &ib );
00986         hash_range_2( p, el, 1, &ja, &jb );
00987         for( j = ja; j < jb; ++j )
00988             for( i = ia; i < ib; ++i )
00989                 ++count[ (uint)j * hn + i ];
00990     }
00991     sum = hn2 + 1, p->max = count[ 0 ];
00992     p->offset[ 0 ] = sum;
00993     for( i = 0; i < hn2; ++i )
00994     {
00995         if( count[ i ] > p->max ) p->max = count[ i ];
00996         sum += count[ i ];
00997         p->offset[ i + 1 ] = sum;
00998     }
00999     for( el = 0; el < nel; ++el )
01000     {
01001         unsigned ia, ib, j, ja, jb;
01002         hash_range_2( p, el, 0, &ia, &ib );
01003         hash_range_2( p, el, 1, &ja, &jb );
01004         for( j = ja; j < jb; ++j )
01005             for( i = ia; i < ib; ++i )
01006             {
01007                 uint arrindex = (uint)j * hn + i;
01008                 p->offset[ p->offset[ arrindex + 1 ] - count[ arrindex ] ] = el;
01009                 --count[ arrindex ];
01010             }
01011     }
01012     free( count );
01013 }
01014 
01015 static void hash_build_3( hash_data_3* p, const realType* const x[ 3 ], const unsigned n[ 3 ], uint nel,
01016                           uint max_hash_size, realType tol )
01017 {
01018     uint      i, el, size, hn3, sum;
01019     unsigned  hn;
01020     unsigned* count;
01021     p->obb = tmalloc( obbox_3, nel );
01022     hash_getbb_3( p, x, n, nel, tol );
01023     size = hash_opt_size_3( p, nel, max_hash_size );
01024     p->offset = tmalloc( uint, size );
01025     hn = p->hash_n;
01026     hn3 = (uint)hn * hn * hn;
01027     count = tcalloc( unsigned, hn3 );
01028     for( el = 0; el < nel; ++el )
01029     {
01030         unsigned ia, ib, j, ja, jb, k, ka, kb;
01031         hash_range_3( p, el, 0, &ia, &ib );
01032         hash_range_3( p, el, 1, &ja, &jb );
01033         hash_range_3( p, el, 2, &ka, &kb );
01034         for( k = ka; k < kb; ++k )
01035             for( j = ja; j < jb; ++j )
01036                 for( i = ia; i < ib; ++i )
01037                     ++count[ ( (uint)k * hn + j ) * hn + i ];
01038     }
01039     sum = hn3 + 1, p->max = count[ 0 ];
01040     p->offset[ 0 ] = sum;
01041     for( i = 0; i < hn3; ++i )
01042     {
01043         if( count[ i ] > p->max ) p->max = count[ i ];
01044         sum += count[ i ];
01045         p->offset[ i + 1 ] = sum;
01046     }
01047     for( el = 0; el < nel; ++el )
01048     {
01049         unsigned ia, ib, j, ja, jb, k, ka, kb;
01050         hash_range_3( p, el, 0, &ia, &ib );
01051         hash_range_3( p, el, 1, &ja, &jb );
01052         hash_range_3( p, el, 2, &ka, &kb );
01053         for( k = ka; k < kb; ++k )
01054             for( j = ja; j < jb; ++j )
01055                 for( i = ia; i < ib; ++i )
01056                 {
01057                     uint arrindex = ( (uint)k * hn + j ) * hn + i;
01058                     p->offset[ p->offset[ arrindex + 1 ] - count[ arrindex ] ] = el;
01059                     --count[ arrindex ];
01060                 }
01061     }
01062     free( count );
01063 }
01064 
01065 static void hash_free_2( hash_data_2* p )
01066 {
01067     free( p->obb );
01068     free( p->offset );
01069 }
01070 
01071 static void hash_free_3( hash_data_3* p )
01072 {
01073     free( p->obb );
01074     free( p->offset );
01075 }
01076 
01077 /*--------------------------------------------------------------------------
01078    Optimization algorithm to find a point within an element
01079 
01080    Given x(r)  (as values of x,y,z at all Lobatto nodes) and x_star,
01081    find the r that minimizes || x_star - x(r) ||_2
01082 
01083    As a minimization problem, the Newton step is
01084 
01085                __ 3
01086      [ J^T J - >_ d=1  resid_d H_d ] dr = J^t resid
01087 
01088    where resid = x_star - x(r), J = [ dx_i/dr_j ],
01089    and H_d = [ d^2 x_d/dr_i dr_j ].
01090 
01091    This is the appropriate step to take whenever constraints are active,
01092    and the current iterate is on a boundary of the element. When the current
01093    iterate is inside, J is square ( dim r = dim x ), resid will become small,
01094    and the step
01095 
01096      J dr = resid
01097 
01098    may be used instead, still giving quadratic convergence.
01099 
01100 
01101    Names use a _3 suffix for 3-d and _2 for 2-d.
01102    The routines require an initialized lagrange_data array as input:
01103      unsigned d, n[3] = { ... };
01104      realType *z[3] = { tmalloc(realType, n[0]), ... };
01105      for(d=0;d<3;++d) lobatto_nodes(z[d],n[d]);
01106 
01107      lagrange_data ld[3];
01108      for(d=0;d<3;++d) lagrange_setup(&ld[d],z[d],n[d]);
01109 
01110    Initialization:
01111      opt_data_3 data;
01112      opt_alloc_3(&data, ld);
01113 
01114    Use:
01115      const realType *xm[3];  // 3 pointers, each to n[0]*n[1]*n[2] realTypes
01116                          //   giving the nodal x, y, or z coordinates
01117 
01118      const realType x_star[3] = { ... };  // point to find
01119      realType r[3] = { 0,0,0 };           // initial guess with
01120      unsigned c = opt_no_constraints_3;   //   these constraints active
01121 
01122      realType dist = opt_findpt_3(&data,xm,x_star,r,&c);
01123      // minimizer is r with constraints c; 2-norm of resid is dist
01124 
01125    Clean-up:
01126      opt_free_3(&data);
01127 
01128      for(d=0;d<3;++d) lagrange_free(&ld[d]);
01129      for(d=0;d<3;++d) free(z[d]);
01130 
01131    The constraint number works as follows. Let cr be the constraints
01132    on the r variable:
01133       cr = 0       r fixed at -1
01134       cr = 1       r not fixed
01135       cr = 2       r fixed at 1
01136    Then the constraint number is (ct*3+cs)*3+cr
01137 
01138   --------------------------------------------------------------------------*/
01139 
01140 /* how many directions are constrained? */
01141 static const char opt_constr_num_2[ 9 ] = { 2, 1, 2, 1, 0, 1, 2, 1, 2 };
01142 static const char opt_constr_num_3[ 27 ] = { 3, 2, 3, 2, 1, 2, 3, 2, 3, 2, 1, 2, 1, 0,
01143                                              1, 2, 1, 2, 3, 2, 3, 2, 1, 2, 3, 2, 3 };
01144 
01145 /* which direction is constrained? */
01146 /* static const char opt_constr_dir_2[9] = {-1, 1,-1,  0,-1, 0, -1, 1,-1}; */
01147 static const char opt_constr_dir_3[ 27 ] = { -1, -1, -1, -1, 2,  -1, -1, -1, -1, -1, 1,  -1, 0, -1,
01148                                              0,  -1, 1,  -1, -1, -1, -1, -1, 2,  -1, -1, -1, -1 };
01149 
01150 /* which direction is not constrained? */
01151 static const char opt_constr_not[ 27 ] = { -1, 0, -1, 1, -1, 1, -1, 0, -1, 2, -1, 2, -1, -1,
01152                                            -1, 2, -1, 2, -1, 0, -1, 1, -1, 1, -1, 0, -1 };
01153 
01154 static const char opt_constr_wide[ 27 ] = { 0x00, 0x01, 0x02, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a,
01155                                             0x10, 0x11, 0x12, 0x14, 0x15, 0x16, 0x18, 0x19, 0x1a,
01156                                             0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a };
01157 
01158 static const unsigned opt_other1_3[ 3 ] = { 1, 0, 0 }, opt_other2_3[ 3 ] = { 2, 2, 1 };
01159 
01160 static unsigned opt_constr( unsigned constraints, unsigned d )
01161 {
01162     return ( opt_constr_wide[ constraints ] >> ( d * 2 ) ) & 3;
01163 }
01164 
01165 static void opt_constr_unpack_2( unsigned constraints, unsigned* c )
01166 {
01167     const char cw = opt_constr_wide[ constraints ];
01168     c[ 0 ] = cw & 3;
01169     c[ 1 ] = cw >> 2;
01170 }
01171 
01172 static void opt_constr_unpack_3( unsigned constraints, unsigned* c )
01173 {
01174     const char cw = opt_constr_wide[ constraints ];
01175     c[ 0 ] = cw & 3;
01176     c[ 1 ] = ( cw >> 2 ) & 3;
01177     c[ 2 ] = cw >> 4;
01178 }
01179 
01180 static unsigned opt_constr_pack_2( const unsigned* c )
01181 {
01182     return c[ 1 ] * 3 + c[ 0 ];
01183 }
01184 
01185 static unsigned opt_constr_pack_3( const unsigned* c )
01186 {
01187     return ( c[ 2 ] * 3 + c[ 1 ] ) * 3 + c[ 0 ];
01188 }
01189 
01190 /*--------------------------------------------------------------------------
01191 
01192    3 - D
01193 
01194   --------------------------------------------------------------------------*/
01195 
01196 void opt_alloc_3( opt_data_3* p, lagrange_data* ld )
01197 {
01198     const unsigned nr = ld[ 0 ].n, ns = ld[ 1 ].n, nt = ld[ 2 ].n, nf = umax_3( nr * ns, nr * nt, ns * nt ),
01199                    ne = umax_3( nr, ns, nt ), nw = 2 * ns * nt + 3 * ns;
01200     p->size[ 0 ] = 1;
01201     p->size[ 1 ] = nr;
01202     p->size[ 2 ] = nr * ns;
01203     p->size[ 3 ] = p->size[ 2 ] * nt;
01204     p->ld = ld;
01205     p->work = tmalloc( realType, 6 * nf + 9 * ne + nw );
01206     p->fd.x[ 0 ] = p->work + nw;
01207     p->fd.x[ 1 ] = p->fd.x[ 0 ] + nf;
01208     p->fd.x[ 2 ] = p->fd.x[ 1 ] + nf;
01209     p->fd.fdn[ 0 ] = p->fd.x[ 2 ] + nf;
01210     p->fd.fdn[ 1 ] = p->fd.fdn[ 0 ] + nf;
01211     p->fd.fdn[ 2 ] = p->fd.fdn[ 1 ] + nf;
01212     p->ed.x[ 0 ] = p->fd.fdn[ 2 ] + nf;
01213     p->ed.x[ 1 ] = p->ed.x[ 0 ] + ne;
01214     p->ed.x[ 2 ] = p->ed.x[ 1 ] + ne;
01215     p->ed.fd1[ 0 ] = p->ed.x[ 2 ] + ne;
01216     p->ed.fd1[ 1 ] = p->ed.fd1[ 0 ] + ne;
01217     p->ed.fd1[ 2 ] = p->ed.fd1[ 1 ] + ne;
01218     p->ed.fd2[ 0 ] = p->ed.fd1[ 2 ] + ne;
01219     p->ed.fd2[ 1 ] = p->ed.fd2[ 0 ] + ne;
01220     p->ed.fd2[ 2 ] = p->ed.fd2[ 1 ] + ne;
01221 }
01222 
01223 void opt_free_3( opt_data_3* p )
01224 {
01225     free( p->work );
01226 }
01227 
01228 static void opt_vol_set_3( opt_data_3* p, const realType r[ 3 ] )
01229 {
01230     lagrange_1( &p->ld[ 0 ], r[ 0 ] );
01231     lagrange_1( &p->ld[ 1 ], r[ 1 ] );
01232     lagrange_1( &p->ld[ 2 ], r[ 2 ] );
01233 }
01234 
01235 /* work holds 2*ns*nt + 3*ns realTypes */
01236 static void opt_vol_intp_3( opt_data_3* p )
01237 {
01238     unsigned             d;
01239     const lagrange_data* ld = p->ld;
01240 
01241     for( d = 0; d < 3; ++d )
01242         p->x[ d ] = tensor_ig3( ld[ 0 ].J, ld[ 0 ].D, ld[ 0 ].n, ld[ 1 ].J, ld[ 1 ].D, ld[ 1 ].n, ld[ 2 ].J, ld[ 2 ].D,
01243                                 ld[ 2 ].n, p->elx[ d ], &p->jac[ d * 3 ], p->work );
01244 }
01245 
01246 void opt_vol_set_intp_3( opt_data_3* p, const realType r[ 3 ] )
01247 {
01248     opt_vol_set_3( p, r );
01249     opt_vol_intp_3( p );
01250 }
01251 
01252 static void opt_face_proj_3( opt_data_3* p )
01253 {
01254     unsigned       d, off = 0;
01255     const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, so = p->size[ d2 ] - p->size[ d1 + 1 ],
01256                    s1 = p->size[ d1 ], sn = p->size[ dn ], n1 = p->ld[ d1 ].n, n2 = p->ld[ d2 ].n, nn = p->ld[ dn ].n;
01257     const realType* D = p->ld[ dn ].D_z0;
01258     if( opt_constr( p->fd.constraints, dn ) == 2 ) off = p->size[ dn + 1 ] - p->size[ dn ], D = p->ld[ dn ].D_zn;
01259     for( d = 0; d < 3; ++d )
01260     {
01261         unsigned        i, j, k, arrindex = 0;
01262         const realType* in = p->elx[ d ] + off;
01263         for( j = n2; j; --j, in += so )
01264             for( i = n1; i; --i, ++arrindex, in += s1 )
01265             {
01266                 const realType* ind = in - off;
01267                 realType*       fdn = &p->fd.fdn[ d ][ arrindex ];
01268                 p->fd.x[ d ][ arrindex ] = *in;
01269                 *fdn = 0;
01270                 for( k = 0; k < nn; ++k, ind += sn )
01271                     *fdn += *ind * D[ k ];
01272             }
01273     }
01274 }
01275 
01276 static void opt_face_set_3( opt_data_3* p, const realType r[ 3 ], unsigned constr )
01277 {
01278     if( p->fd.constraints != constr )
01279     {
01280         p->fd.constraints = constr;
01281         p->fd.dn = opt_constr_dir_3[ constr ];
01282         p->fd.d1 = opt_other1_3[ p->fd.dn ];
01283         p->fd.d2 = opt_other2_3[ p->fd.dn ];
01284         opt_face_proj_3( p );
01285     }
01286     lagrange_1( &p->ld[ p->fd.d1 ], r[ p->fd.d1 ] );
01287     lagrange_1( &p->ld[ p->fd.d2 ], r[ p->fd.d2 ] );
01288 }
01289 
01290 /* work holds 2*ld[d2].n realTypes */
01291 static void opt_face_intp_3( opt_data_3* p )
01292 {
01293     unsigned        d;
01294     const unsigned  dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[ d1 ].n, n2 = p->ld[ d2 ].n;
01295     const realType *J1 = p->ld[ d1 ].J, *J2 = p->ld[ d2 ].J, *D1 = p->ld[ d1 ].D, *D2 = p->ld[ d2 ].D;
01296 
01297     for( d = 0; d < 3; ++d )
01298     {
01299         realType g[ 2 ];
01300         p->x[ d ] = tensor_ig2( J1, D1, n1, J2, D2, n2, p->fd.x[ d ], &g[ 0 ], p->work );
01301         p->jac[ d * 3 + d1 ] = g[ 0 ];
01302         p->jac[ d * 3 + d2 ] = g[ 1 ];
01303         p->jac[ d * 3 + dn ] = tensor_i2( J1, n1, J2, n2, p->fd.fdn[ d ], p->work );
01304     }
01305 }
01306 
01307 static void opt_face_set_intp_3( opt_data_3* p, const realType r[ 3 ], unsigned constr )
01308 {
01309     opt_face_set_3( p, r, constr );
01310     opt_face_intp_3( p );
01311 }
01312 
01313 static void opt_face_hess_3( opt_data_3* p, realType hess[ 9 ] )
01314 {
01315     unsigned        d;
01316     const unsigned  d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[ d1 ].n, n2 = p->ld[ d2 ].n;
01317     const realType *J1 = p->ld[ d1 ].J, *J2 = p->ld[ d2 ].J, *D1 = p->ld[ d1 ].D, *D2 = p->ld[ d2 ].D,
01318                    *S1 = p->ld[ d1 ].D2, *S2 = p->ld[ d2 ].D2;
01319 
01320     lagrange_2u( &p->ld[ d1 ] );
01321     lagrange_2u( &p->ld[ d2 ] );
01322 
01323     for( d = 0; d < 3; ++d )
01324     {
01325         (void)tensor_ig2( J1, S1, n1, J2, S2, n2, p->fd.x[ d ], hess + d * 3, p->work );
01326         hess[ d * 3 + 0 ] = tensor_i2( S1, n1, J2, n2, p->fd.x[ d ], p->work );
01327         hess[ d * 3 + 1 ] = tensor_i2( J1, n1, S2, n2, p->fd.x[ d ], p->work );
01328         hess[ d * 3 + 2 ] = tensor_i2( D1, n1, D2, n2, p->fd.x[ d ], p->work );
01329     }
01330 }
01331 
01332 static void opt_edge_proj_3( opt_data_3* p )
01333 {
01334     unsigned       d, off, off1 = 0, off2 = 0;
01335     const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, se = p->size[ de ], s1 = p->size[ d1 ],
01336                    s2 = p->size[ d2 ], ne = p->ld[ de ].n, n1 = p->ld[ d1 ].n, n2 = p->ld[ d2 ].n;
01337     const realType *fD1, *fD2;
01338     if( opt_constr( p->ed.constraints, d1 ) == 0 )
01339         fD1 = p->ld[ d1 ].D_z0;
01340     else
01341         fD1 = p->ld[ d1 ].D_zn, off1 = p->size[ d1 + 1 ] - p->size[ d1 ];
01342     if( opt_constr( p->ed.constraints, d2 ) == 0 )
01343         fD2 = p->ld[ d2 ].D_z0;
01344     else
01345         fD2 = p->ld[ d2 ].D_zn, off2 = p->size[ d2 + 1 ] - p->size[ d2 ];
01346     off = off1 + off2;
01347     for( d = 0; d < 3; ++d )
01348     {
01349         unsigned        i, j;
01350         const realType* in = p->elx[ d ] + off;
01351         for( i = 0; i < ne; ++i, in += se )
01352         {
01353             const realType *in1 = in - off1, *in2 = in - off2;
01354             realType *      fd1 = &p->ed.fd1[ d ][ i ], *fd2 = &p->ed.fd2[ d ][ i ];
01355             p->ed.x[ d ][ i ] = *in;
01356             *fd1 = *fd2 = 0;
01357             for( j = 0; j < n1; ++j, in1 += s1 )
01358                 *fd1 += *in1 * fD1[ j ];
01359             for( j = 0; j < n2; ++j, in2 += s2 )
01360                 *fd2 += *in2 * fD2[ j ];
01361         }
01362     }
01363 }
01364 
01365 static void opt_edge_set_3( opt_data_3* p, const realType r[ 3 ], unsigned constr )
01366 {
01367     if( p->ed.constraints != constr )
01368     {
01369         p->ed.constraints = constr;
01370         p->ed.de = opt_constr_not[ constr ];
01371         p->ed.d1 = opt_other1_3[ p->ed.de ];
01372         p->ed.d2 = opt_other2_3[ p->ed.de ];
01373         opt_edge_proj_3( p );
01374     }
01375     lagrange_1( &p->ld[ p->ed.de ], r[ p->ed.de ] );
01376 }
01377 
01378 static void opt_edge_intp_3( opt_data_3* p )
01379 {
01380     unsigned        d;
01381     const unsigned  de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, n = p->ld[ de ].n;
01382     const realType *J = p->ld[ de ].J, *D = p->ld[ de ].D;
01383 
01384     for( d = 0; d < 3; ++d )
01385     {
01386         p->x[ d ] = tensor_ig1( J, D, n, p->ed.x[ d ], &p->jac[ d * 3 + de ] );
01387         p->jac[ d * 3 + d1 ] = tensor_i1( J, n, p->ed.fd1[ d ] );
01388         p->jac[ d * 3 + d2 ] = tensor_i1( J, n, p->ed.fd2[ d ] );
01389     }
01390 }
01391 
01392 static void opt_edge_set_intp_3( opt_data_3* p, const realType r[ 3 ], unsigned constr )
01393 {
01394     opt_edge_set_3( p, r, constr );
01395     opt_edge_intp_3( p );
01396 }
01397 
01398 static void opt_edge_hess_3( opt_data_3* p, realType hess[ 3 ] )
01399 {
01400     unsigned        d;
01401     const unsigned  de = p->ed.de, n = p->ld[ de ].n;
01402     const realType* D2 = p->ld[ de ].D2;
01403     lagrange_2u( &p->ld[ de ] );
01404     for( d = 0; d < 3; ++d )
01405         hess[ d ] = tensor_i1( D2, n, p->ed.x[ d ] );
01406 }
01407 
01408 static void opt_point_proj_3( opt_data_3* p )
01409 {
01410     unsigned        off[ 3 ], offt, d, c[ 3 ];
01411     const realType* fD[ 3 ];
01412     opt_constr_unpack_3( p->pd.constraints, c );
01413     for( d = 0; d < 3; ++d )
01414         if( c[ d ] == 0 )
01415             fD[ d ] = p->ld[ d ].D_z0, off[ d ] = 0;
01416         else
01417             fD[ d ] = p->ld[ d ].D_zn, off[ d ] = p->size[ d + 1 ] - p->size[ d ];
01418     offt = off[ 0 ] + off[ 1 ] + off[ 2 ];
01419     for( d = 0; d < 9; ++d )
01420         p->pd.jac[ d ] = 0;
01421     for( d = 0; d < 3; ++d )
01422     {
01423         unsigned i, j;
01424         p->pd.x[ d ] = p->elx[ d ][ offt ];
01425         for( i = 0; i < 3; ++i )
01426         {
01427             const realType* in = p->elx[ d ] + offt - off[ i ];
01428             for( j = 0; j < p->ld[ i ].n; ++j, in += p->size[ i ] )
01429                 p->pd.jac[ d * 3 + i ] += *in * fD[ i ][ j ];
01430         }
01431     }
01432 }
01433 
01434 static void opt_point_set_3( opt_data_3* p, unsigned constr )
01435 {
01436     if( p->pd.constraints != constr )
01437     {
01438         p->pd.constraints = constr;
01439         opt_point_proj_3( p );
01440     }
01441 }
01442 
01443 static void opt_point_intp_3( opt_data_3* p )
01444 {
01445     memcpy( p->x, p->pd.x, 3 * sizeof( realType ) );
01446     memcpy( p->jac, p->pd.jac, 9 * sizeof( realType ) );
01447 }
01448 
01449 static void opt_point_set_intp_3( opt_data_3* p, unsigned constr )
01450 {
01451     opt_point_set_3( p, constr );
01452     opt_point_intp_3( p );
01453 }
01454 
01455 #define DIAGNOSTICS 0
01456 
01457 double opt_findpt_3( opt_data_3* p, const realType* const elx[ 3 ], const realType xstar[ 3 ], realType r[ 3 ],
01458                      unsigned* constr )
01459 {
01460     realType dr[ 3 ], resid[ 3 ], steep[ 3 ];
01461 
01462     unsigned c = *constr, ac, d, cc[ 3 ], step = 0;
01463 
01464     p->elx[ 0 ] = elx[ 0 ], p->elx[ 1 ] = elx[ 1 ], p->elx[ 2 ] = elx[ 2 ];
01465 
01466     p->fd.constraints = opt_no_constraints_3;
01467     p->ed.constraints = opt_no_constraints_3;
01468     p->pd.constraints = opt_no_constraints_3;
01469 
01470 #if DIAGNOSTICS
01471     printf( "opt_findpt: xstar = %g, %g, %g\n", xstar[ 0 ], xstar[ 1 ], xstar[ 2 ] );
01472 #endif
01473 
01474     do
01475     {
01476         ++step;
01477         if( step == 50 ) /*fail("%s: opt_findpt_3 did not converge\n",__FILE__);*/
01478             return 1.e+30;
01479 #if DIAGNOSTICS
01480         printf( "  iteration %u\n", step );
01481         printf( "    %d constraint(s) active\n", (int)opt_constr_num_3[ c ] );
01482 #endif
01483         /* update face/edge/point data if necessary,
01484            and evaluate x(r) as well as the jacobian */
01485         switch( opt_constr_num_3[ c ] )
01486         {
01487             case 0:
01488                 opt_vol_set_intp_3( p, r );
01489                 break;
01490             case 1:
01491                 opt_face_set_intp_3( p, r, c );
01492                 break;
01493             case 2:
01494                 opt_edge_set_intp_3( p, r, c );
01495                 break;
01496             case 3:
01497                 opt_point_set_intp_3( p, c );
01498                 break;
01499         }
01500 #if DIAGNOSTICS
01501         printf( "    r = %g, %g, %g\n", r[ 0 ], r[ 1 ], r[ 2 ] );
01502         printf( "    x = %g, %g, %g\n", p->x[ 0 ], p->x[ 1 ], p->x[ 2 ] );
01503 #endif
01504         /* compute residual */
01505         for( d = 0; d < 3; ++d )
01506             resid[ d ] = xstar[ d ] - p->x[ d ];
01507 #if DIAGNOSTICS
01508         printf( "    resid = %g, %g, %g\n", resid[ 0 ], resid[ 1 ], resid[ 2 ] );
01509         printf( "    2-norm = %g\n", r2norm_3( resid[ 0 ], resid[ 1 ], resid[ 2 ] ) );
01510 #endif
01511         /* check constraints against steepest descent direction */
01512         ac = c;
01513         if( opt_constr_num_3[ c ] )
01514         {
01515             opt_constr_unpack_3( c, cc );
01516             mat_app_3c( steep, p->jac, resid ); /* steepest descent = J^T r */
01517 #if DIAGNOSTICS
01518             printf( "    steepest descent = %g, %g, %g\n", steep[ 0 ], steep[ 1 ], steep[ 2 ] );
01519 #endif
01520             for( d = 0; d < 3; ++d )
01521                 if( ( cc[ d ] == 0 && steep[ d ] > 0 ) || ( cc[ d ] == 2 && steep[ d ] < 0 ) ) cc[ d ] = 1;
01522             ac = opt_constr_pack_3( cc );
01523         }
01524         /* update face/edge/point data if necessary */
01525         if( ac != c )
01526         {
01527             c = ac;
01528 #if DIAGNOSTICS
01529             printf( "    relaxed to %d constraints\n", (int)opt_constr_num_3[ c ] );
01530 #endif
01531             switch( opt_constr_num_3[ c ] )
01532             {
01533                 case 1:
01534                     opt_face_set_3( p, r, c );
01535                     break;
01536                 case 2:
01537                     opt_edge_set_3( p, r, c );
01538                     break;
01539                 case 3:
01540                     opt_point_set_3( p, c );
01541                     break;
01542             }
01543         }
01544         /* compute Newton step */
01545         switch( opt_constr_num_3[ c ] )
01546         {
01547             case 0:
01548                 tinyla_solve_3( dr, p->jac, resid );
01549                 break;
01550             case 1: {
01551                 const unsigned  dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2;
01552                 realType        A[ 4 ], H[ 9 ];
01553                 const realType* J = p->jac;
01554                 opt_face_hess_3( p, H );
01555                 A[ 0 ] = J[ d1 ] * J[ d1 ] + J[ 3 + d1 ] * J[ 3 + d1 ] + J[ 6 + d1 ] * J[ 6 + d1 ];
01556                 A[ 1 ] = J[ d2 ] * J[ d2 ] + J[ 3 + d2 ] * J[ 3 + d2 ] + J[ 6 + d2 ] * J[ 6 + d2 ];
01557                 A[ 2 ] = J[ d1 ] * J[ d2 ] + J[ 3 + d1 ] * J[ 3 + d2 ] + J[ 6 + d1 ] * J[ 6 + d2 ];
01558                 A[ 0 ] -= resid[ 0 ] * H[ 0 ] + resid[ 1 ] * H[ 3 ] + resid[ 2 ] * H[ 6 ];
01559                 A[ 1 ] -= resid[ 0 ] * H[ 1 ] + resid[ 1 ] * H[ 4 ] + resid[ 2 ] * H[ 7 ];
01560                 A[ 2 ] -= resid[ 0 ] * H[ 2 ] + resid[ 1 ] * H[ 5 ] + resid[ 2 ] * H[ 8 ];
01561                 tinyla_solve_sym_2( &dr[ d1 ], &dr[ d2 ], A, steep[ d1 ], steep[ d2 ] );
01562                 dr[ dn ] = 0;
01563             }
01564             break;
01565             case 2: {
01566                 const unsigned  de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2;
01567                 realType        fac, H[ 3 ];
01568                 const realType* J = p->jac + de;
01569                 opt_edge_hess_3( p, H );
01570                 fac = J[ 0 ] * J[ 0 ] + J[ 3 ] * J[ 3 ] + J[ 6 ] * J[ 6 ] -
01571                       ( resid[ 0 ] * H[ 0 ] + resid[ 1 ] * H[ 1 ] + resid[ 2 ] * H[ 2 ] );
01572                 dr[ de ] = steep[ de ] / fac;
01573                 dr[ d1 ] = 0, dr[ d2 ] = 0;
01574             }
01575             break;
01576             case 3:
01577                 dr[ 0 ] = dr[ 1 ] = dr[ 2 ] = 0;
01578                 break;
01579         }
01580 #if DIAGNOSTICS
01581         printf( "    dr = %g, %g, %g\n", dr[ 0 ], dr[ 1 ], dr[ 2 ] );
01582 #endif
01583         /* project new iteration onto [-1,1]^3 */
01584         opt_constr_unpack_3( c, cc );
01585         for( d = 0; d < 3; ++d )
01586         {
01587             if( cc[ d ] != 1 ) continue;
01588             r[ d ] += dr[ d ];
01589             if( r[ d ] <= -1 )
01590                 dr[ d ] -= r[ d ] + 1, r[ d ] = -1, cc[ d ] = 0;
01591             else if( r[ d ] >= 1 )
01592                 dr[ d ] -= r[ d ] - 1, r[ d ] = 1, cc[ d ] = 2;
01593         }
01594         c = opt_constr_pack_3( cc );
01595     } while( r1norm_3( dr[ 0 ], dr[ 1 ], dr[ 2 ] ) > 3 * MOAB_POLY_EPS );
01596     *constr = c;
01597 #if 0
01598   printf("opt_findpt_3 converged in %u iterations\n", step);
01599 #endif
01600     return r2norm_3( resid[ 0 ], resid[ 1 ], resid[ 2 ] );
01601 }
01602 
01603 #undef DIAGNOSTICS
01604 
01605 void opt_alloc_2( opt_data_2* p, lagrange_data* ld )
01606 {
01607     const unsigned nr = ld[ 0 ].n, ns = ld[ 1 ].n, ne = umax_2( nr, ns ), nw = 2 * ns;
01608     p->size[ 0 ] = 1;
01609     p->size[ 1 ] = nr;
01610     p->size[ 2 ] = nr * ns;
01611     p->ld = ld;
01612     p->work = tmalloc( realType, 4 * ne + nw );
01613     p->ed.x[ 0 ] = p->work + nw;
01614     p->ed.x[ 1 ] = p->ed.x[ 0 ] + ne;
01615     p->ed.fd1[ 0 ] = p->ed.x[ 1 ] + ne;
01616     p->ed.fd1[ 1 ] = p->ed.fd1[ 0 ] + ne;
01617 }
01618 
01619 void opt_free_2( opt_data_2* p )
01620 {
01621     free( p->work );
01622 }
01623 
01624 static void opt_area_set_2( opt_data_2* p, const realType r[ 2 ] )
01625 {
01626     lagrange_1( &p->ld[ 0 ], r[ 0 ] );
01627     lagrange_1( &p->ld[ 1 ], r[ 1 ] );
01628 }
01629 
01630 /* work holds 2*ns realTypes */
01631 static void opt_area_intp_2( opt_data_2* p )
01632 {
01633     unsigned             d;
01634     const lagrange_data* ld = p->ld;
01635 
01636     for( d = 0; d < 2; ++d )
01637         p->x[ d ] = tensor_ig2( ld[ 0 ].J, ld[ 0 ].D, ld[ 0 ].n, ld[ 1 ].J, ld[ 1 ].D, ld[ 1 ].n, p->elx[ d ],
01638                                 &p->jac[ d * 2 ], p->work );
01639 }
01640 
01641 static void opt_area_set_intp_2( opt_data_2* p, const realType r[ 2 ] )
01642 {
01643     opt_area_set_2( p, r );
01644     opt_area_intp_2( p );
01645 }
01646 
01647 static void opt_edge_proj_2( opt_data_2* p )
01648 {
01649     unsigned       d, off = 0;
01650     const unsigned de = p->ed.de, d1 = p->ed.d1, se = p->size[ de ], s1 = p->size[ d1 ], ne = p->ld[ de ].n,
01651                    n1 = p->ld[ d1 ].n;
01652     const realType* fD1;
01653     if( opt_constr( p->ed.constraints, d1 ) == 0 )
01654         fD1 = p->ld[ d1 ].D_z0;
01655     else
01656         fD1 = p->ld[ d1 ].D_zn, off = p->size[ d1 + 1 ] - p->size[ d1 ];
01657     for( d = 0; d < 2; ++d )
01658     {
01659         unsigned        i, j;
01660         const realType* in = p->elx[ d ] + off;
01661         for( i = 0; i < ne; ++i, in += se )
01662         {
01663             const realType* in1 = in - off;
01664             realType*       fd1 = &p->ed.fd1[ d ][ i ];
01665             p->ed.x[ d ][ i ] = *in;
01666             *fd1 = 0;
01667             for( j = 0; j < n1; ++j, in1 += s1 )
01668                 *fd1 += *in1 * fD1[ j ];
01669         }
01670     }
01671 }
01672 
01673 static void opt_edge_set_2( opt_data_2* p, const realType r[ 2 ], unsigned constr )
01674 {
01675     if( p->ed.constraints != constr )
01676     {
01677         p->ed.constraints = constr;
01678         p->ed.de = opt_constr_not[ constr ];
01679         p->ed.d1 = 1 - p->ed.de;
01680         opt_edge_proj_2( p );
01681     }
01682     lagrange_1( &p->ld[ p->ed.de ], r[ p->ed.de ] );
01683 }
01684 
01685 static void opt_edge_intp_2( opt_data_2* p )
01686 {
01687     unsigned        d;
01688     const unsigned  de = p->ed.de, d1 = p->ed.d1, n = p->ld[ de ].n;
01689     const realType *J = p->ld[ de ].J, *D = p->ld[ de ].D;
01690     for( d = 0; d < 2; ++d )
01691     {
01692         p->x[ d ] = tensor_ig1( J, D, n, p->ed.x[ d ], &p->jac[ d * 2 + de ] );
01693         p->jac[ d * 2 + d1 ] = tensor_i1( J, n, p->ed.fd1[ d ] );
01694     }
01695 }
01696 
01697 static void opt_edge_set_intp_2( opt_data_2* p, const realType r[ 2 ], unsigned constr )
01698 {
01699     opt_edge_set_2( p, r, constr );
01700     opt_edge_intp_2( p );
01701 }
01702 
01703 static void opt_edge_hess_2( opt_data_2* p, realType hess[ 2 ] )
01704 {
01705     unsigned        d;
01706     const unsigned  de = p->ed.de, n = p->ld[ de ].n;
01707     const realType* D2 = p->ld[ de ].D2;
01708     lagrange_2u( &p->ld[ de ] );
01709     for( d = 0; d < 2; ++d )
01710         hess[ d ] = tensor_i1( D2, n, p->ed.x[ d ] );
01711 }
01712 
01713 static void opt_point_proj_2( opt_data_2* p )
01714 {
01715     unsigned        off[ 2 ], offt, d, c[ 2 ];
01716     const realType* fD[ 2 ];
01717     opt_constr_unpack_2( p->pd.constraints, c );
01718     for( d = 0; d < 2; ++d )
01719         if( c[ d ] == 0 )
01720             fD[ d ] = p->ld[ d ].D_z0, off[ d ] = 0;
01721         else
01722             fD[ d ] = p->ld[ d ].D_zn, off[ d ] = p->size[ d + 1 ] - p->size[ d ];
01723     offt = off[ 0 ] + off[ 1 ];
01724     for( d = 0; d < 4; ++d )
01725         p->pd.jac[ d ] = 0;
01726     for( d = 0; d < 2; ++d )
01727     {
01728         unsigned i, j;
01729         p->pd.x[ d ] = p->elx[ d ][ offt ];
01730         for( i = 0; i < 2; ++i )
01731         {
01732             const realType* in = p->elx[ d ] + offt - off[ i ];
01733             for( j = 0; j < p->ld[ i ].n; ++j, in += p->size[ i ] )
01734                 p->pd.jac[ d * 2 + i ] += *in * fD[ i ][ j ];
01735         }
01736     }
01737 }
01738 
01739 static void opt_point_set_2( opt_data_2* p, unsigned constr )
01740 {
01741     if( p->pd.constraints != constr )
01742     {
01743         p->pd.constraints = constr;
01744         opt_point_proj_2( p );
01745     }
01746 }
01747 
01748 static void opt_point_intp_2( opt_data_2* p )
01749 {
01750     memcpy( p->x, p->pd.x, 2 * sizeof( realType ) );
01751     memcpy( p->jac, p->pd.jac, 4 * sizeof( realType ) );
01752 }
01753 
01754 static void opt_point_set_intp_2( opt_data_2* p, unsigned constr )
01755 {
01756     opt_point_set_2( p, constr );
01757     opt_point_intp_2( p );
01758 }
01759 
01760 #define DIAGNOSTICS 0
01761 
01762 double opt_findpt_2( opt_data_2* p, const realType* const elx[ 2 ], const realType xstar[ 2 ], realType r[ 2 ],
01763                      unsigned* constr )
01764 {
01765     realType dr[ 2 ], resid[ 2 ], steep[ 2 ];
01766 
01767     unsigned c = *constr, ac, d, cc[ 2 ], step = 0;
01768 
01769     p->elx[ 0 ] = elx[ 0 ], p->elx[ 1 ] = elx[ 1 ];
01770 
01771     p->ed.constraints = opt_no_constraints_2;
01772     p->pd.constraints = opt_no_constraints_2;
01773 
01774 #if DIAGNOSTICS
01775     printf( "opt_findpt: xstar = %g, %g\n", xstar[ 0 ], xstar[ 1 ] );
01776 #endif
01777 
01778     do
01779     {
01780         ++step;
01781         if( step == 50 ) /*fail("%s: opt_findpt_2 did not converge\n",__FILE__);*/
01782             return 1.e+30;
01783 #if DIAGNOSTICS
01784         printf( "  iteration %u\n", step );
01785         printf( "    %d constraint(s) active\n", (int)opt_constr_num_2[ c ] );
01786 #endif
01787         /* update face/edge/point data if necessary,
01788            and evaluate x(r) as well as the jacobian */
01789         switch( opt_constr_num_2[ c ] )
01790         {
01791             case 0:
01792                 opt_area_set_intp_2( p, r );
01793                 break;
01794             case 1:
01795                 opt_edge_set_intp_2( p, r, c );
01796                 break;
01797             case 2:
01798                 opt_point_set_intp_2( p, c );
01799                 break;
01800         }
01801 #if DIAGNOSTICS
01802         printf( "    r = %g, %g\n", r[ 0 ], r[ 1 ] );
01803         printf( "    x = %g, %g\n", p->x[ 0 ], p->x[ 1 ] );
01804 #endif
01805         /* compute residual */
01806         for( d = 0; d < 2; ++d )
01807             resid[ d ] = xstar[ d ] - p->x[ d ];
01808 #if DIAGNOSTICS
01809         printf( "    resid = %g, %g\n", resid[ 0 ], resid[ 1 ] );
01810         printf( "    2-norm = %g\n", r2norm_2( resid[ 0 ], resid[ 1 ] ) );
01811 #endif
01812         /* check constraints against steepest descent direction */
01813         ac = c;
01814         if( opt_constr_num_2[ c ] )
01815         {
01816             opt_constr_unpack_2( c, cc );
01817             mat_app_2c( steep, p->jac, resid ); /* steepest descent = J^T r */
01818 #if DIAGNOSTICS
01819             printf( "    steepest descent = %g, %g\n", steep[ 0 ], steep[ 1 ] );
01820 #endif
01821             for( d = 0; d < 2; ++d )
01822                 if( ( cc[ d ] == 0 && steep[ d ] > 0 ) || ( cc[ d ] == 2 && steep[ d ] < 0 ) ) cc[ d ] = 1;
01823             ac = opt_constr_pack_2( cc );
01824         }
01825         /* update face/edge/point data if necessary */
01826         if( ac != c )
01827         {
01828             c = ac;
01829 #if DIAGNOSTICS
01830             printf( "    relaxed to %d constraints\n", (int)opt_constr_num_2[ c ] );
01831 #endif
01832             switch( opt_constr_num_2[ c ] )
01833             {
01834                 case 1:
01835                     opt_edge_set_2( p, r, c );
01836                     break;
01837                 case 2:
01838                     opt_point_set_2( p, c );
01839                     break;
01840             }
01841         }
01842         /* compute Newton step */
01843         switch( opt_constr_num_2[ c ] )
01844         {
01845             case 0:
01846                 tinyla_solve_2( dr, p->jac, resid );
01847                 break;
01848             case 1: {
01849                 const unsigned  de = p->ed.de, d1 = p->ed.d1;
01850                 realType        fac, H[ 2 ];
01851                 const realType* J = p->jac + de;
01852                 opt_edge_hess_2( p, H );
01853                 fac = J[ 0 ] * J[ 0 ] + J[ 2 ] * J[ 2 ] - ( resid[ 0 ] * H[ 0 ] + resid[ 1 ] * H[ 1 ] );
01854                 dr[ de ] = steep[ de ] / fac;
01855                 dr[ d1 ] = 0;
01856             }
01857             break;
01858             case 2:
01859                 dr[ 0 ] = dr[ 1 ] = 0;
01860                 break;
01861         }
01862 #if DIAGNOSTICS
01863         printf( "    dr = %g, %g\n", dr[ 0 ], dr[ 1 ] );
01864 #endif
01865         /* project new iteration onto [-1,1]^2 */
01866         opt_constr_unpack_2( c, cc );
01867         for( d = 0; d < 2; ++d )
01868         {
01869             if( cc[ d ] != 1 ) continue;
01870             r[ d ] += dr[ d ];
01871             if( r[ d ] <= -1 )
01872                 dr[ d ] -= r[ d ] + 1, r[ d ] = -1, cc[ d ] = 0;
01873             else if( r[ d ] >= 1 )
01874                 dr[ d ] -= r[ d ] - 1, r[ d ] = 1, cc[ d ] = 2;
01875         }
01876         c = opt_constr_pack_2( cc );
01877     } while( r1norm_2( dr[ 0 ], dr[ 1 ] ) > 2 * MOAB_POLY_EPS );
01878     *constr = c;
01879     return r2norm_2( resid[ 0 ], resid[ 1 ] );
01880 }
01881 
01882 #undef DIAGNOSTICS
01883 
01884 /*--------------------------------------------------------------------------
01885    Point Finding  (interface/top-level)
01886 
01887    Initializing the data:
01888      unsigned nel;        // number of elements
01889      const unsigned n[3]; // number of nodes in r, s, t directions
01890      const realType *xm[3];   // n[0]*n[1]*n[2]*nel x,y,z coordinates
01891      realType tol = 0.01;     // how far point is allowed to be outside element
01892                           //   relative to element size
01893      unsigned max_size = n[0]*n[1]*n[2]*nel; // maximum size of hash table
01894 
01895      findpt_data_3 *data = findpt_setup_3(xm,n,nel,max_size,tol);
01896 
01897    Using the data:
01898      realType x[3] = { ... };   // point to find
01899      int el; // element number
01900      realType r[3]; // parametric coordinates
01901      int guess = 0; // do we use (el,r,s,t) as an initial guess?
01902      int code; // 0 : normal, -1 : outside all elements,
01903                // 1 : border, or outside but within tolerance
01904      realType dist; // distance in xyz space from returned (el,r,s,t) to given
01905                 // (x,y,z)
01906 
01907      code = findpt_3(data, x, guess, &el, r, &dist);
01908 
01909    When done:
01910      findpt_free_3(&data);
01911 
01912   --------------------------------------------------------------------------*/
01913 
01914 /* heap sort on A[0:n-1] with key A[i]->dist
01915    precondition: n!=0 */
01916 static void findpt_list_sort( findpt_listel** A, unsigned n )
01917 {
01918     unsigned i;
01919     --A; /* make A have a base index of 1 */
01920     /* build heap */
01921     for( i = 2; i <= n; ++i )
01922     {
01923         findpt_listel* item = A[ i ];
01924         unsigned       hole = i, parent = hole >> 1;
01925         if( A[ parent ]->dist >= item->dist ) continue;
01926         do
01927         {
01928             A[ hole ] = A[ parent ];
01929             hole = parent;
01930             parent >>= 1;
01931         } while( parent && A[ parent ]->dist < item->dist );
01932         A[ hole ] = item;
01933     }
01934     /* extract */
01935     for( i = n - 1; i; --i )
01936     {
01937         findpt_listel* item = A[ i + 1 ];
01938         unsigned       hole = 1;
01939         A[ i + 1 ] = A[ 1 ];
01940         for( ;; )
01941         {
01942             unsigned ch = hole << 1, r = ch + 1;
01943             if( r <= i && A[ ch ]->dist < A[ r ]->dist ) ch = r;
01944             if( ch > i || item->dist >= A[ ch ]->dist ) break;
01945             A[ hole ] = A[ ch ];
01946             hole = ch;
01947         }
01948         A[ hole ] = item;
01949     }
01950 }
01951 
01952 findpt_data_2* findpt_setup_2( const realType* const xw[ 2 ], const unsigned n[ 2 ], uint nel, uint max_hash_size,
01953                                realType bbox_tol )
01954 {
01955     unsigned       d;
01956     findpt_data_2* p = tmalloc( findpt_data_2, 1 );
01957 
01958     p->hash = tmalloc( hash_data_2, 1 );
01959     p->od = tmalloc( opt_data_2, 1 );
01960 
01961     for( d = 0; d < 2; ++d )
01962         p->xw[ d ] = xw[ d ];
01963     p->nptel = n[ 0 ] * n[ 1 ];
01964 
01965     hash_build_2( p->hash, xw, n, nel, max_hash_size, bbox_tol );
01966 
01967     for( d = 0; d < 2; ++d )
01968     {
01969         p->z[ d ] = tmalloc( realType, n[ d ] );
01970         lobatto_nodes( p->z[ d ], n[ d ] );
01971         lagrange_setup( &p->ld[ d ], p->z[ d ], n[ d ] );
01972     }
01973 
01974     p->list = tmalloc( findpt_listel, p->hash->max );
01975     p->sorted = tmalloc( findpt_listel*, p->hash->max );
01976 
01977     opt_alloc_2( p->od, p->ld );
01978     p->od_work = p->od->work;
01979 
01980     return p;
01981 }
01982 
01983 findpt_data_3* findpt_setup_3( const realType* const xw[ 3 ], const unsigned n[ 3 ], uint nel, uint max_hash_size,
01984                                realType bbox_tol )
01985 {
01986     unsigned       d;
01987     findpt_data_3* p = tmalloc( findpt_data_3, 1 );
01988 
01989     p->hash = tmalloc( hash_data_3, 1 );
01990     p->od = tmalloc( opt_data_3, 1 );
01991 
01992     for( d = 0; d < 3; ++d )
01993         p->xw[ d ] = xw[ d ];
01994     p->nptel = n[ 0 ] * n[ 1 ] * n[ 2 ];
01995 
01996     hash_build_3( p->hash, xw, n, nel, max_hash_size, bbox_tol );
01997 
01998     for( d = 0; d < 3; ++d )
01999     {
02000         p->z[ d ] = tmalloc( realType, n[ d ] );
02001         lobatto_nodes( p->z[ d ], n[ d ] );
02002         lagrange_setup( &p->ld[ d ], p->z[ d ], n[ d ] );
02003     }
02004 
02005     p->list = tmalloc( findpt_listel, p->hash->max );
02006     p->sorted = tmalloc( findpt_listel*, p->hash->max );
02007 
02008     opt_alloc_3( p->od, p->ld );
02009     p->od_work = p->od->work;
02010 
02011     return p;
02012 }
02013 
02014 void findpt_free_2( findpt_data_2* p )
02015 {
02016     unsigned d;
02017     opt_free_2( p->od );
02018     free( p->od );
02019     hash_free_2( p->hash );
02020     free( p->hash );
02021     free( p->list );
02022     free( p->sorted );
02023     for( d = 0; d < 2; ++d )
02024         free( p->z[ d ] );
02025     free( p );
02026 }
02027 
02028 void findpt_free_3( findpt_data_3* p )
02029 {
02030     unsigned d;
02031     opt_free_3( p->od );
02032     free( p->od );
02033     hash_free_3( p->hash );
02034     free( p->hash );
02035     free( p->list );
02036     free( p->sorted );
02037     for( d = 0; d < 3; ++d )
02038         free( p->z[ d ] );
02039     free( p );
02040 }
02041 
02042 const realType* findpt_allbnd_2( const findpt_data_2* p )
02043 {
02044     return p->hash->bnd;
02045 }
02046 
02047 const realType* findpt_allbnd_3( const findpt_data_3* p )
02048 {
02049     return p->hash->bnd;
02050 }
02051 
02052 static void findpt_hash_2( findpt_data_2* p, const realType x[ 2 ] )
02053 {
02054     findpt_listel *list = p->list, **sorted = p->sorted;
02055     const uint     hi = hash_index_2( p->hash, x );
02056     const uint*    offset = p->hash->offset;
02057     uint           i;
02058     const uint     b = offset[ hi ], e = offset[ hi + 1 ];
02059     for( i = b; i != e; ++i )
02060     {
02061         const uint     el = offset[ i ];
02062         realType*      r = &list->r[ 0 ];
02063         const obbox_2* obb = &p->hash->obb[ el ];
02064         if( obbox_axis_test_2( obb, x ) ) continue;
02065         if( obbox_test_2( obb, x, r ) ) continue;
02066         list->el = el;
02067         list->dist = r1norm_2( r[ 0 ], r[ 1 ] );
02068         *sorted++ = list++;
02069     }
02070     p->end = sorted;
02071     if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted );
02072 }
02073 
02074 static void findpt_hash_3( findpt_data_3* p, const realType x[ 3 ] )
02075 {
02076     findpt_listel *list = p->list, **sorted = p->sorted;
02077     const uint     hi = hash_index_3( p->hash, x );
02078     const uint*    offset = p->hash->offset;
02079     uint           i;
02080     const uint     b = offset[ hi ], e = offset[ hi + 1 ];
02081     for( i = b; i != e; ++i )
02082     {
02083         const uint     el = offset[ i ];
02084         realType*      r = &list->r[ 0 ];
02085         const obbox_3* obb = &p->hash->obb[ el ];
02086         if( obbox_axis_test_3( obb, x ) ) continue;
02087         if( obbox_test_3( obb, x, r ) ) continue;
02088         list->el = el;
02089         list->dist = r1norm_3( r[ 0 ], r[ 1 ], r[ 2 ] );
02090         *sorted++ = list++;
02091     }
02092     p->end = sorted;
02093     if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted );
02094 }
02095 
02096 static int findpt_guess_2( findpt_data_2* p, const realType x[ 2 ], uint el, realType r[ 2 ], realType* dist )
02097 {
02098     const uint      arrindex = p->nptel * el;
02099     const realType* elx[ 2 ];
02100     realType        g[ 2 ];
02101     unsigned        c = opt_no_constraints_2;
02102     const obbox_2*  obb = &p->hash->obb[ el ];
02103     elx[ 0 ] = p->xw[ 0 ] + arrindex;
02104     elx[ 1 ] = p->xw[ 1 ] + arrindex;
02105     if( obbox_axis_test_2( obb, x ) || obbox_test_2( obb, x, g ) ) return 0;
02106     *dist = opt_findpt_2( p->od, elx, x, r, &c );
02107     return c == opt_no_constraints_2;
02108 }
02109 
02110 static int findpt_guess_3( findpt_data_3* p, const realType x[ 3 ], uint el, realType r[ 3 ], realType* dist )
02111 {
02112     const uint      arrindex = p->nptel * el;
02113     const realType* elx[ 3 ];
02114     realType        g[ 3 ];
02115     unsigned        c = opt_no_constraints_3;
02116     const obbox_3*  obb = &p->hash->obb[ el ];
02117     elx[ 0 ] = p->xw[ 0 ] + arrindex;
02118     elx[ 1 ] = p->xw[ 1 ] + arrindex;
02119     elx[ 2 ] = p->xw[ 2 ] + arrindex;
02120     if( obbox_axis_test_3( obb, x ) || obbox_test_3( obb, x, g ) ) return 0;
02121     *dist = opt_findpt_3( p->od, elx, x, r, &c );
02122     return c == opt_no_constraints_3;
02123 }
02124 
02125 #define DIAGNOSTICS 0
02126 
02127 static int findpt_pass_2( findpt_data_2* p, const realType x[ 2 ], uint* el, realType r[ 2 ], realType* dist_min )
02128 {
02129     findpt_listel** qq = p->sorted;
02130     const realType* bnd;
02131     realType        dist;
02132     do
02133     {
02134         findpt_listel*  q = *qq;
02135         const uint      arrindex = p->nptel * q->el;
02136         const realType* elx[ 2 ];
02137         unsigned        c = opt_no_constraints_2;
02138         elx[ 0 ] = p->xw[ 0 ] + arrindex;
02139         elx[ 1 ] = p->xw[ 1 ] + arrindex;
02140         dist = opt_findpt_2( p->od, elx, x, q->r, &c );
02141         if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_2 )
02142         {
02143             *dist_min = dist;
02144             *el = q->el;
02145             memcpy( r, q->r, 2 * sizeof( realType ) );
02146             if( c == opt_no_constraints_2 ) return 0;
02147         }
02148     } while( ++qq != p->end );
02149     bnd = p->hash->obb[ *el ].axis_bnd;
02150     return *dist_min > r2norm_2( bnd[ 1 ] - bnd[ 0 ], bnd[ 3 ] - bnd[ 2 ] ) ? -1 : 1;
02151 }
02152 
02153 static int findpt_pass_3( findpt_data_3* p, const realType x[ 3 ], uint* el, realType r[ 3 ], realType* dist_min )
02154 {
02155     findpt_listel** qq = p->sorted;
02156     const realType* bnd;
02157     realType        dist;
02158     do
02159     {
02160         findpt_listel*  q = *qq;
02161         const uint      arrindex = p->nptel * q->el;
02162         const realType* elx[ 3 ];
02163         unsigned        c = opt_no_constraints_3;
02164         elx[ 0 ] = p->xw[ 0 ] + arrindex;
02165         elx[ 1 ] = p->xw[ 1 ] + arrindex;
02166         elx[ 2 ] = p->xw[ 2 ] + arrindex;
02167         dist = opt_findpt_3( p->od, elx, x, q->r, &c );
02168         if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_3 )
02169         {
02170             *dist_min = dist;
02171             *el = q->el;
02172             memcpy( r, q->r, 3 * sizeof( realType ) );
02173             if( c == opt_no_constraints_3 )
02174             {
02175 #if DIAGNOSTICS
02176                 printf( "point found in element #%d\n", qq - p->sorted );
02177 #endif
02178                 return 0;
02179             }
02180         }
02181     } while( ++qq != p->end );
02182     bnd = p->hash->obb[ *el ].axis_bnd;
02183     return *dist_min > r2norm_3( bnd[ 1 ] - bnd[ 0 ], bnd[ 3 ] - bnd[ 2 ], bnd[ 5 ] - bnd[ 4 ] ) ? -1 : 1;
02184 }
02185 
02186 int findpt_2( findpt_data_2* p, const realType x[ 2 ], int guess, uint* el, realType r[ 2 ], realType* dist )
02187 {
02188     if( guess && findpt_guess_2( p, x, *el, r, dist ) ) return 0;
02189     findpt_hash_2( p, x );
02190     if( p->sorted == p->end ) return -1;
02191     return findpt_pass_2( p, x, el, r, dist );
02192 }
02193 
02194 int findpt_3( findpt_data_3* p, const realType x[ 3 ], int guess, uint* el, realType r[ 3 ], realType* dist )
02195 {
02196     if( guess && findpt_guess_3( p, x, *el, r, dist ) ) return 0;
02197     findpt_hash_3( p, x );
02198 #if DIAGNOSTICS
02199     printf( "hashing leaves %d elements to consider\n", p->end - p->sorted );
02200 #endif
02201     if( p->sorted == p->end ) return -1;
02202     return findpt_pass_3( p, x, el, r, dist );
02203 }
02204 
02205 void findpt_weights_2( findpt_data_2* p, const realType r[ 2 ] )
02206 {
02207     lagrange_0( &p->ld[ 0 ], r[ 0 ] );
02208     lagrange_0( &p->ld[ 1 ], r[ 1 ] );
02209 }
02210 
02211 void findpt_weights_3( findpt_data_3* p, const realType r[ 3 ] )
02212 {
02213     lagrange_0( &p->ld[ 0 ], r[ 0 ] );
02214     lagrange_0( &p->ld[ 1 ], r[ 1 ] );
02215     lagrange_0( &p->ld[ 2 ], r[ 2 ] );
02216 }
02217 
02218 double findpt_eval_2( findpt_data_2* p, const realType* u )
02219 {
02220     return tensor_i2( p->ld[ 0 ].J, p->ld[ 0 ].n, p->ld[ 1 ].J, p->ld[ 1 ].n, u, p->od_work );
02221 }
02222 
02223 double findpt_eval_3( findpt_data_3* p, const realType* u )
02224 {
02225     return tensor_i3( p->ld[ 0 ].J, p->ld[ 0 ].n, p->ld[ 1 ].J, p->ld[ 1 ].n, p->ld[ 2 ].J, p->ld[ 2 ].n, u,
02226                       p->od_work );
02227 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines