fathom/moab-docs/findpt_8c_source.html

00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <stdarg.h>
00004 #include <math.h> /* for cos, fabs */
00005 #include <float.h>
00006 #include <string.h> /* for memcpy */
00007
00008 #include "moab/FindPtFuncs.h"
00009
00010 const unsigned opt_no_constraints_2 = 3 + 1;
00011 const unsigned opt_no_constraints_3 = 9 + 3 + 1;
00012
00013 /*--------------------------------------------------------------------------
00014    Lobatto Polynomial Bounds
00015
00016    Needed inputs are the Gauss-Lobatto quadrature nodes and weights:
00017      unsigned nr = ..., ns = ...;
00018      realType zr[nr], wr[nr];
00019      realType zs[ns], ws[ns];
00020
00021      lobatto_nodes(zr,nr); lobatto_weights(zr,wr,nr);
00022      lobatto_nodes(zs,ns); lobatto_weights(zs,ws,ns);
00023
00024    The number of points in the constructed piecewise (bi-)linear bounds
00025    is a parameter; more points give tighter bounds
00026
00027      unsigned mr = 2*nr, ms = 2*ns;
00028
00029    The necessary setup is accomplished via:
00030      lob_bnd_base b_data_r;
00031      lob_bnd_ext  e_data_s;
00032
00033      lob_bnd_base_alloc(&b_data_r,nr,mr);
00034      lob_bnd_base_setup(&b_data_r,zr,wr);
00035      lob_bnd_ext_alloc(&e_data_s,ns,ms);
00036      lob_bnd_ext_setup(&e_data_s,zs,ws);
00037
00038    Bounds may then be computed via:
00039      realType work1r[2*mr], work1s[2*ms], work2[2*mr + 2*mr*ns + 2*mr*ms];
00040      realType ur[nr], us[ns];    // 1-d polynomials on the zr[] and zs[] nodes
00041      realType u[ns][nr];         // 2-d polynomial on zr[] (x) zs[]
00042      realType bound[2];          // = { min, max } (to be computed)
00043
00044      lob_bnd_1(&b_data_r  ,ur,bound,work1r); // compute bounds on ur
00045      lob_bnd_1(&e_data_s.b,us,bound,work1s); // compute bounds on us
00046      lob_bnd_2(&b_data_r, &e_data_s,
00047                (const double*)&u[0][0],bound,work2); // compute bounds on u
00048    The above routines access the zr,zs arrays passed to *_setup
00049      (so do not delete them between calls)
00050
00051    Memory allocated in *_setup is freed with
00052      lob_bnd_base_free(&b_data_r);
00053      lob_bnd_ext_free(&e_data_s);
00054
00055   --------------------------------------------------------------------------*/
00056
00057 typedef struct
00058 {
00059     unsigned n;        /* number of Lobatto nodes in input */
00060     unsigned m;        /* number of Chebyshev nodes used to calculate bounds */
00061     realType *Q0, *Q1; /* Q0[n], Q1[n] -- first two rows of change of basis matrix
00062                       from Lobatto node Lagrangian to Legendre */
00063     const realType* z; /* z[n] -- external; Lobatto nodes */
00064     realType* h;       /* h[m] -- Chebyshev nodes */
00065     realType *uv, *ov; /* uv[n][m], ov[n][m] --
00066                               uv[j][:] is a piecewise linear function in the nodal
00067                                        basis with nodes h[m] that is everywhere less
00068                                        than or equal to the jth Lagrangian basis
00069                                        function (with the Lobatto nodes z[n])
00070                               ov[j][:] is everywhere greater than or equal */
00071 } lob_bnd_base;
00072
00073 typedef struct
00074 {
00075     lob_bnd_base b;
00076     realType *uvp, *uvn, *ovp, *ovn; /* [n][m] -- uv and ov split into
00077                                          positive and negative parts */
00078 } lob_bnd_ext;
00079
00080 static void lob_bnd_base_alloc( lob_bnd_base* p, unsigned n, unsigned m )
00081 {
00082     p->n = n, p->m = m;
00083     p->Q0 = tmalloc( realType, 2 * n + m + 2 * n * m );
00084     p->Q1 = p->Q0 + n;
00085     p->h  = p->Q1 + n;
00086     p->uv = p->h + m;
00087     p->ov = p->uv + n * m;
00088 }
00089
00090 static void lob_bnd_base_free( lob_bnd_base* p )
00091 {
00092     free( p->Q0 );
00093 }
00094
00095 static void lob_bnd_ext_alloc( lob_bnd_ext* p, unsigned n, unsigned m )
00096 {
00097     p->b.n = n, p->b.m = m;
00098     p->b.Q0 = tmalloc( realType, 2 * n + m + 6 * n * m );
00099     p->b.Q1 = p->b.Q0 + n;
00100     p->b.h  = p->b.Q1 + n;
00101     p->b.uv = p->b.h + m;
00102     p->b.ov = p->b.uv + n * m;
00103     p->uvp  = p->b.ov + n * m;
00104     p->uvn  = p->uvp + n * m;
00105     p->ovp  = p->uvn + n * m;
00106     p->ovn  = p->ovp + n * m;
00107 }
00108
00109 static void lob_bnd_ext_free( lob_bnd_ext* p )
00110 {
00111     free( p->b.Q0 );
00112 }
00113
00114 static void lob_bnd_base_setup( lob_bnd_base* p, const realType* z, const realType* w )
00115 {
00116     unsigned i, j, m = p->m, n = p->n, mm = 2 * m - 1;
00117     realType *q = tmalloc( realType, ( 2 * n + 1 ) * mm + 6 * n ), *J = q + mm, *D = J + n * mm, *work = D + n * mm;
00118     p->z = z;
00119     for( i = 0; i < n; ++i )
00120         p->Q0[i] = w[i] / 2, p->Q1[i] = 3 * p->Q0[i] * z[i];
00121     p->h[0] = -1, p->h[m - 1] = 1;
00122     for( j = 1; j < m - 1; ++j )
00123         p->h[j] = mbcos( ( m - j - 1 ) * MOAB_POLY_PI / ( m - 1 ) );
00124     for( j = 0; j < m - 1; ++j )
00125         q[2 * j] = p->h[j], q[2 * j + 1] = ( p->h[j] + p->h[j + 1] ) / 2;
00126     q[mm - 1] = p->h[m - 1];
00127     lagrange_weights_deriv( z, n, q, mm, J, D, work );
00128     for( i = 0; i < n; ++i )
00129     {
00130         realType *uv = p->uv + i * m, *ov = p->ov + i * m;
00131         ov[0] = uv[0] = J[i];
00132         ov[m - 1] = uv[m - 1] = J[( mm - 1 ) * n + i];
00133         for( j = 1; j < m - 1; ++j )
00134         {
00135             unsigned jj = 2 * j;
00136             realType c0 = J[( jj - 1 ) * n + i] + ( q[jj] - q[jj - 1] ) * D[( jj - 1 ) * n + i];
00137             realType c1 = J[( jj + 1 ) * n + i] + ( q[jj] - q[jj + 1] ) * D[( jj + 1 ) * n + i];
00138             realType c2 = J[jj * n + i];
00139             rminmax_3( &uv[j], &ov[j], c0, c1, c2 );
00140         }
00141     }
00142     free( q );
00143 }
00144
00145 static void lob_bnd_ext_setup( lob_bnd_ext* p, const realType* z, const realType* w )
00146 {
00147     unsigned i, mn = p->b.m * p->b.n;
00148     lob_bnd_base_setup( &p->b, z, w );
00149     for( i = 0; i < mn; ++i )
00150     {
00151         realType uvi = p->b.uv[i], ovi = p->b.ov[i];
00152         p->uvp[i] = p->uvn[i] = p->ovp[i] = p->ovn[i] = 0;
00153         if( uvi > 0 )
00154             p->uvp[i] = uvi;
00155         else
00156             p->uvn[i] = uvi;
00157         if( ovi > 0 )
00158             p->ovp[i] = ovi;
00159         else
00160             p->ovn[i] = ovi;
00161     }
00162 }
00163
00164 static void lob_bnd_lines( const lob_bnd_base* p, const realType* u, realType* a, realType* b )
00165 {
00166     unsigned i, j;
00167     realType a0 = 0, a1 = 0;
00168     const realType *uv = p->uv, *ov = p->ov;
00169     for( i = 0; i < p->n; ++i )
00170         a0 += p->Q0[i] * u[i], a1 += p->Q1[i] * u[i];
00171     for( j = 0; j < p->m; ++j )
00172         b[j] = a[j] = a0 + a1 * p->h[j];
00173     for( i = 0; i < p->n; ++i )
00174     {
00175         realType w = u[i] - ( a0 + a1 * p->z[i] );
00176         if( w >= 0 )
00177             for( j = 0; j < p->m; ++j )
00178                 a[j] += w * ( *uv++ ), b[j] += w * ( *ov++ );
00179         else
00180             for( j = 0; j < p->m; ++j )
00181                 a[j] += w * ( *ov++ ), b[j] += w * ( *uv++ );
00182     }
00183 }
00184
00185 /* work holds p->m * 2 doubles */
00186 static void lob_bnd_1( const lob_bnd_base* p, const realType* u, realType bnd[2], realType* work )
00187 {
00188     unsigned j;
00189     realType *a = work, *b = work + p->m;
00190     lob_bnd_lines( p, u, a, b );
00191     bnd[0] = a[0], bnd[1] = b[0];
00192     for( j = 1; j < p->m; ++j )
00193     {
00194         if( a[j] < bnd[0] ) bnd[0] = a[j];
00195         if( b[j] > bnd[1] ) bnd[1] = b[j];
00196     }
00197 }
00198
00199 /* work holds 2*mr + 2*mr*ns + 2*mr*ms doubles */
00200 static void lob_bnd_2( const lob_bnd_base* pr,
00201                        const lob_bnd_ext* ps,
00202                        const realType* u,
00203                        realType bnd[2],
00204                        realType* work )
00205 {
00206     unsigned nr = pr->n, mr = pr->m, ns = ps->b.n, ms = ps->b.m;
00207     realType *a0 = work, *a1 = a0 + mr, *ar_ = a1 + mr, *ar = ar_, *br_ = ar + mr * ns, *br = br_, *a_ = br + mr * ns,
00208              *a = a_, *b_ = a + mr * ms, *b = b_, *uvp, *ovp, *uvn, *ovn;
00209     realType b0, b1;
00210     unsigned i, j, k;
00211     for( i = 0; i < mr; ++i )
00212         a0[i] = a1[i] = 0;
00213     for( j = 0; j < ns; ++j, u += nr )
00214     {
00215         realType q0 = ps->b.Q0[j], q1 = ps->b.Q1[j];
00216         lob_bnd_lines( pr, u, ar, br );
00217         for( i = 0; i < mr; ++i )
00218         {
00219             realType t = ( *ar++ + *br++ ) / 2;
00220             a0[i] += q0 * t, a1[i] += q1 * t;
00221         }
00222     }
00223     for( i = 0; i < mr; ++i )
00224     {
00225         realType a0i = a0[i], a1i = a1[i];
00226         for( k = 0; k < ms; ++k )
00227             *b++ = *a++ = a0i + a1i * ps->b.h[k];
00228     }
00229     ar = ar_, br = br_;
00230     uvp = ps->uvp, ovp = ps->ovp, uvn = ps->uvn, ovn = ps->ovn;
00231     for( j = 0; j < ns; ++j, uvp += ms, uvn += ms, ovp += ms, ovn += ms )
00232     {
00233         realType zj = ps->b.z[j];
00234         a = a_, b = b_;
00235         for( i = 0; i < mr; ++i )
00236         {
00237             realType t  = a0[i] + a1[i] * zj;
00238             realType uw = *ar++ - t;
00239             realType ow = *br++ - t;
00240             if( uw >= 0 ) /* 0  <= uw <= ow */
00241                 for( k = 0; k < ms; ++k )
00242                     *a++ += uw * uvp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * ovn[k];
00243             else if( ow <= 0 ) /* uw <= ow <= 0  */
00244                 for( k = 0; k < ms; ++k )
00245                     *a++ += uw * ovp[k] + ow * ovn[k], *b++ += ow * uvp[k] + uw * uvn[k];
00246             else /* uw <  0  <  ow */
00247                 for( k = 0; k < ms; ++k )
00248                     *a++ += uw * ovp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * uvn[k];
00249         }
00250     }
00251     b0 = a_[0], b1 = b_[0];
00252     for( i = 1; i < mr * ms; ++i )
00253     {
00254         if( a_[i] < b0 ) b0 = a_[i];
00255         if( b_[i] > b1 ) b1 = b_[i];
00256     }
00257     bnd[0] = b0, bnd[1] = b1;
00258 }
00259
00260 /*--------------------------------------------------------------------------
00261    Small Matrix Inverse
00262   --------------------------------------------------------------------------*/
00263
00264 static void mat_inv_2( const realType A[4], realType inv[4] )
00265 {
00266     const realType idet = 1 / ( A[0] * A[3] - A[1] * A[2] );
00267     inv[0]              = idet * A[3];
00268     inv[1]              = -( idet * A[1] );
00269     inv[2]              = -( idet * A[2] );
00270     inv[3]              = idet * A[0];
00271 }
00272
00273 static void mat_inv_3( const realType A[9], realType inv[9] )
00274 {
00275     const realType a = A[4] * A[8] - A[5] * A[7], b = A[5] * A[6] - A[3] * A[8], c = A[3] * A[7] - A[4] * A[6],
00276                    idet = 1 / ( A[0] * a + A[1] * b + A[2] * c );
00277     inv[0]              = idet * a;
00278     inv[1]              = idet * ( A[2] * A[7] - A[1] * A[8] );
00279     inv[2]              = idet * ( A[1] * A[5] - A[2] * A[4] );
00280     inv[3]              = idet * b;
00281     inv[4]              = idet * ( A[0] * A[8] - A[2] * A[6] );
00282     inv[5]              = idet * ( A[2] * A[3] - A[0] * A[5] );
00283     inv[6]              = idet * c;
00284     inv[7]              = idet * ( A[1] * A[6] - A[0] * A[7] );
00285     inv[8]              = idet * ( A[0] * A[4] - A[1] * A[3] );
00286 }
00287
00288 static void mat_app_2r( realType y[2], const realType A[4], const realType x[2] )
00289 {
00290     y[0] = A[0] * x[0] + A[1] * x[1];
00291     y[1] = A[2] * x[0] + A[3] * x[1];
00292 }
00293
00294 static void mat_app_2c( realType y[2], const realType A[4], const realType x[2] )
00295 {
00296     y[0] = A[0] * x[0] + A[2] * x[1];
00297     y[1] = A[1] * x[0] + A[3] * x[1];
00298 }
00299
00300 static void mat_app_3r( realType y[3], const realType A[9], const realType x[3] )
00301 {
00302     y[0] = A[0] * x[0] + A[1] * x[1] + A[2] * x[2];
00303     y[1] = A[3] * x[0] + A[4] * x[1] + A[5] * x[2];
00304     y[2] = A[6] * x[0] + A[7] * x[1] + A[8] * x[2];
00305 }
00306
00307 static void mat_app_3c( realType y[3], const realType A[9], const realType x[3] )
00308 {
00309     y[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2];
00310     y[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2];
00311     y[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2];
00312 }
00313
00314 static void tinyla_solve_2( realType x[2], const realType A[4], const realType b[2] )
00315 {
00316     realType inv[4];
00317     mat_inv_2( A, inv );
00318     mat_app_2r( x, inv, b );
00319 }
00320
00321 static void tinyla_solve_3( realType x[3], const realType A[9], const realType b[3] )
00322 {
00323     realType inv[9];
00324     mat_inv_3( A, inv );
00325     mat_app_3r( x, inv, b );
00326 }
00327
00328 /* solve
00329    A[0] x0 + A[2] x1 = b0,
00330    A[2] x0 + A[1] x1 = b1
00331 */
00332 static void tinyla_solve_sym_2( realType* x0, realType* x1, const realType A[3], realType b0, realType b1 )
00333 {
00334     const realType idet = 1 / ( A[0] * A[1] - A[2] * A[2] );
00335     *x0                 = idet * ( A[1] * b0 - A[2] * b1 );
00336     *x1                 = idet * ( A[0] * b1 - A[2] * b0 );
00337 }
00338
00339 /*--------------------------------------------------------------------------
00340    Oriented Bounding Box
00341
00342    Suffixes on names are _2 for 2-d and _3 for 3-d
00343
00344    Needed inputs are the Gauss-Lobatto quadrature nodes and weights:
00345      unsigned nr = ..., ns = ...;
00346      realType zr[nr], wr[nr];
00347      realType zs[ns], ws[ns];
00348
00349      lobatto_nodes(zr,nr); lobatto_weights(zr,wr,nr);
00350      lobatto_nodes(zs,ns); lobatto_weights(zs,ws,ns);
00351
00352    The number of points in the constructed piecewise (bi-)linear bounds
00353    for the boundaries is a parameter; more points give tighter bounds
00354
00355      unsigned mr = 2*nr, ms = 2*ns;
00356
00357    Bounding boxes are increased by a relative amount as a parameter
00358
00359      realType tol = 0.01;
00360
00361    Setup is accomplished via:
00362
00363      const realType *z[2] = {zr,zs}, *w[2] = {wr,ws};
00364      const unsigned n[2] = {nr,ns}, m[2] = {mr,ms};
00365      obbox_data_2 *data = obbox_setup_2(z,w,n,m);
00366
00367    Bounding box data may then be computed:
00368
00369      obbox_2 box;                 // will store bounding box information
00370      realType xm[ns][nr], ym[ns][nr]; // x, y coordinates of the element nodes
00371
00372      obbox_calc_2(data, tol, (const realType *)&xm[0][0],
00373                              (const realType *)&ym[0][0], &box);
00374
00375    A point may be tested:
00376
00377      const realType x[2]; // point to test
00378      realType r[2];
00379
00380      if( obbox_axis_test_2(&box, x) )
00381        ... // x failed axis-aligned bounding box test
00382
00383      if( obbox_test_2(&box, x, r) )
00384        ... // x failed oriented bounding box test
00385      else
00386        ... // r suitable as initial guess for parametric coords
00387
00388    Once all bounding box information has been computed
00389
00390      obbox_free_2(data);
00391
00392    to free the memory allocated with obbox_setup_2.
00393
00394   --------------------------------------------------------------------------*/
00395
00396 typedef struct
00397 {
00398     lob_bnd_base dr, ds;
00399     realType *Jr0, *Dr0, *Js0, *Ds0, *work;
00400 } obbox_data_2;
00401
00402 typedef struct
00403 {
00404     lob_bnd_base dr;
00405     lob_bnd_ext ds, dt;
00406     realType *Jr0, *Dr0, *Js0, *Ds0, *Jt0, *Dt0, *work;
00407 } obbox_data_3;
00408
00409 static void obbox_data_alloc_2( obbox_data_2* p, const unsigned n[2], const unsigned m[2] )
00410 {
00411     const unsigned max_npm = umax_2( n[0] + m[0], n[1] + m[1] );
00412     lob_bnd_base_alloc( &p->dr, n[0], m[0] );
00413     lob_bnd_base_alloc( &p->ds, n[1], m[1] );
00414     p->Jr0  = tmalloc( realType, 2 * n[0] + 2 * n[1] + 2 * max_npm );
00415     p->Dr0  = p->Jr0 + n[0];
00416     p->Js0  = p->Dr0 + n[0];
00417     p->Ds0  = p->Js0 + n[1];
00418     p->work = p->Ds0 + n[1];
00419 }
00420
00421 static void obbox_data_free_2( obbox_data_2* p )
00422 {
00423     lob_bnd_base_free( &p->dr );
00424     lob_bnd_base_free( &p->ds );
00425     free( p->Jr0 );
00426 }
00427
00428 static void obbox_data_alloc_3( obbox_data_3* p, const unsigned n[3], const unsigned m[3] )
00429 {
00430     const unsigned wk1    = 3 * n[0] * n[1] + 2 * m[0] + 2 * m[0] * n[1] + 2 * m[0] * m[1];
00431     const unsigned wk2    = 3 * n[0] * n[2] + 2 * m[0] + 2 * m[0] * n[2] + 2 * m[0] * m[2];
00432     const unsigned wk3    = 3 * n[1] * n[2] + 2 * m[1] + 2 * m[1] * n[2] + 2 * m[1] * m[2];
00433     const unsigned wk_max = umax_3( wk1, wk2, wk3 );
00434     lob_bnd_base_alloc( &p->dr, n[0], m[0] );
00435     lob_bnd_ext_alloc( &p->ds, n[1], m[1] );
00436     lob_bnd_ext_alloc( &p->dt, n[2], m[2] );
00437     p->Jr0  = tmalloc( realType, 2 * n[0] + 2 * n[1] + 2 * n[2] + wk_max );
00438     p->Dr0  = p->Jr0 + n[0];
00439     p->Js0  = p->Dr0 + n[0];
00440     p->Ds0  = p->Js0 + n[1];
00441     p->Jt0  = p->Ds0 + n[1];
00442     p->Dt0  = p->Jt0 + n[2];
00443     p->work = p->Dt0 + n[2];
00444 }
00445
00446 static void obbox_data_free_3( obbox_data_3* p )
00447 {
00448     lob_bnd_base_free( &p->dr );
00449     lob_bnd_ext_free( &p->ds );
00450     lob_bnd_ext_free( &p->dt );
00451     free( p->Jr0 );
00452 }
00453
00454 static obbox_data_2* obbox_setup_2( const realType* const z[2],
00455                                     const realType* const w[2],
00456                                     const unsigned n[2],
00457                                     const unsigned m[2] )
00458 {
00459     const realType zero = 0;
00460     realType* work;
00461     obbox_data_2* p = tmalloc( obbox_data_2, 1 );
00462     obbox_data_alloc_2( p, n, m );
00463     lob_bnd_base_setup( &p->dr, z[0], w[0] );
00464     lob_bnd_base_setup( &p->ds, z[1], w[1] );
00465     work = tmalloc( realType, 6 * umax_2( n[0], n[1] ) );
00466     lagrange_weights_deriv( z[0], n[0], &zero, 1, p->Jr0, p->Dr0, work );
00467     lagrange_weights_deriv( z[1], n[1], &zero, 1, p->Js0, p->Ds0, work );
00468     free( work );
00469     return p;
00470 }
00471
00472 static obbox_data_3* obbox_setup_3( const realType* const z[3],
00473                                     const realType* const w[3],
00474                                     const unsigned n[3],
00475                                     const unsigned m[3] )
00476 {
00477     const realType zero = 0;
00478     realType* work;
00479     obbox_data_3* p = tmalloc( obbox_data_3, 1 );
00480     obbox_data_alloc_3( p, n, m );
00481     lob_bnd_base_setup( &p->dr, z[0], w[0] );
00482     lob_bnd_ext_setup( &p->ds, z[1], w[1] );
00483     lob_bnd_ext_setup( &p->dt, z[2], w[2] );
00484     work = tmalloc( realType, 6 * umax_3( n[0], n[1], n[2] ) );
00485     lagrange_weights_deriv( z[0], n[0], &zero, 1, p->Jr0, p->Dr0, work );
00486     lagrange_weights_deriv( z[1], n[1], &zero, 1, p->Js0, p->Ds0, work );
00487     lagrange_weights_deriv( z[2], n[2], &zero, 1, p->Jt0, p->Dt0, work );
00488     free( work );
00489     return p;
00490 }
00491
00492 static void obbox_free_2( obbox_data_2* p )
00493 {
00494     obbox_data_free_2( p );
00495     free( p );
00496 }
00497
00498 static void obbox_free_3( obbox_data_3* p )
00499 {
00500     obbox_data_free_3( p );
00501     free( p );
00502 }
00503
00504 int obbox_axis_test_2( const obbox_2* p, const realType x[2] )
00505 {
00506     return ( x[0] < p->axis_bnd[0] || x[0] > p->axis_bnd[1] || x[1] < p->axis_bnd[2] || x[1] > p->axis_bnd[3] );
00507 }
00508
00509 int obbox_axis_test_3( const obbox_3* p, const realType x[3] )
00510 {
00511     return ( x[0] < p->axis_bnd[0] || x[0] > p->axis_bnd[1] || x[1] < p->axis_bnd[2] || x[1] > p->axis_bnd[3] ||
00512              x[2] < p->axis_bnd[4] || x[2] > p->axis_bnd[5] );
00513 }
00514
00515 int obbox_test_2( const obbox_2* p, const realType x[2], realType r[2] )
00516 {
00517     realType xt[2];
00518     xt[0] = x[0] - p->x[0];
00519     xt[1] = x[1] - p->x[1];
00520     r[0]  = p->A[0] * xt[0] + p->A[1] * xt[1];
00521     if( mbabs( r[0] ) > 1 ) return 1;
00522     r[1] = p->A[2] * xt[0] + p->A[3] * xt[1];
00523     return mbabs( r[1] ) > 1;
00524 }
00525
00526 int obbox_test_3( const obbox_3* p, const realType x[3], realType r[3] )
00527 {
00528     realType xt[3];
00529     xt[0] = x[0] - p->x[0];
00530     xt[1] = x[1] - p->x[1];
00531     xt[2] = x[2] - p->x[2];
00532     r[0]  = p->A[0] * xt[0] + p->A[1] * xt[1] + p->A[2] * xt[2];
00533     if( mbabs( r[0] ) > 1 ) return 1;
00534     r[1] = p->A[3] * xt[0] + p->A[4] * xt[1] + p->A[5] * xt[2];
00535     if( mbabs( r[1] ) > 1 ) return 1;
00536     r[2] = p->A[6] * xt[0] + p->A[7] * xt[1] + p->A[8] * xt[2];
00537     return mbabs( r[2] ) > 1;
00538 }
00539
00540 void obbox_calc_tfm_2( const realType* x,
00541                        const realType* y,
00542                        unsigned n,
00543                        unsigned s,
00544                        const realType c0[2],
00545                        const realType A[4],
00546                        realType* u )
00547 {
00548     unsigned i;
00549     realType* v = u + n;
00550     for( i = 0; i < n; ++i, x += s, y += s )
00551     {
00552         const realType xt = *x - c0[0], yt = *y - c0[1];
00553         u[i] = A[0] * xt + A[1] * yt;
00554         v[i] = A[2] * xt + A[3] * yt;
00555     }
00556 }
00557
00558 void obbox_calc_tfm_3( const realType* x,
00559                        const realType* y,
00560                        const realType* z,
00561                        unsigned nr,
00562                        unsigned sr,
00563                        unsigned ns,
00564                        unsigned ss,
00565                        const realType c0[3],
00566                        const realType A[9],
00567                        realType* u )
00568 {
00569     unsigned i, j;
00570     realType *v = u + nr * ns, *w = v + nr * ns;
00571     for( j = 0; j < ns; ++j, x += ss, y += ss, z += ss )
00572     {
00573         for( i = 0; i < nr; ++i, x += sr, y += sr, z += sr )
00574         {
00575             const realType xt = *x - c0[0], yt = *y - c0[1], zt = *z - c0[2];
00576             *u++ = A[0] * xt + A[1] * yt + A[2] * zt;
00577             *v++ = A[3] * xt + A[4] * yt + A[5] * zt;
00578             *w++ = A[6] * xt + A[7] * yt + A[8] * zt;
00579         }
00580     }
00581 }
00582
00583 void obbox_merge_2( realType* b, const realType* ob )
00584 {
00585     if( ob[0] < b[0] ) b[0] = ob[0];
00586     if( ob[1] > b[1] ) b[1] = ob[1];
00587     if( ob[2] < b[2] ) b[2] = ob[2];
00588     if( ob[3] > b[3] ) b[3] = ob[3];
00589 }
00590
00591 void obbox_merge_3( realType* b, const realType* ob )
00592 {
00593     if( ob[0] < b[0] ) b[0] = ob[0];
00594     if( ob[1] > b[1] ) b[1] = ob[1];
00595     if( ob[2] < b[2] ) b[2] = ob[2];
00596     if( ob[3] > b[3] ) b[3] = ob[3];
00597     if( ob[4] < b[4] ) b[4] = ob[4];
00598     if( ob[5] > b[5] ) b[5] = ob[5];
00599 }
00600
00601 /* work holds 2*n + 2*m realTypes */
00602 void obbox_side_2( const realType* x,
00603                    const realType* y,
00604                    unsigned n,
00605                    unsigned s,
00606                    const realType c0[2],
00607                    const realType A[4],
00608                    realType* work,
00609                    const lob_bnd_base* lbd,
00610                    realType bnd[4] )
00611 {
00612     obbox_calc_tfm_2( x, y, n, s, c0, A, work );
00613     lob_bnd_1( lbd, work, bnd, work + 2 * n );
00614     lob_bnd_1( lbd, work + n, bnd + 2, work + 2 * n );
00615 }
00616
00617 /* work holds 3*nr*ns + 2*mr + 2*mr*ns + 2*mr*ms realTypes */
00618 void obbox_side_3( const realType* x,
00619                    const realType* y,
00620                    const realType* z,
00621                    unsigned nr,
00622                    unsigned sr,
00623                    unsigned ns,
00624                    unsigned ss,
00625                    const realType c0[3],
00626                    const realType A[9],
00627                    realType* work,
00628                    const lob_bnd_base* dr,
00629                    const lob_bnd_ext* ds,
00630                    realType bnd[6] )
00631 {
00632     obbox_calc_tfm_3( x, y, z, nr, sr, ns, ss, c0, A, work );
00633     lob_bnd_2( dr, ds, work, bnd, work + 3 * nr * ns );
00634     lob_bnd_2( dr, ds, work + nr * ns, bnd + 2, work + 3 * nr * ns );
00635     lob_bnd_2( dr, ds, work + 2 * nr * ns, bnd + 4, work + 3 * nr * ns );
00636 }
00637
00638 /* return bounds on u = A (x - c0)
00639    bnd[0] <= u_0 <= bnd[1]
00640    bnd[2] <= u_1 <= bnd[3] */
00641 void obbox_bnd_2( const obbox_data_2* p,
00642                   const realType* x,
00643                   const realType* y,
00644                   const realType c0[2],
00645                   const realType A[4],
00646                   realType bnd[4] )
00647 {
00648     unsigned i, nr = p->dr.n, ns = p->ds.n;
00649     realType obnd[4];
00650
00651     i = nr * ( ns - 1 );
00652     obbox_side_2( x, y, nr, 1, c0, A, p->work, &p->dr, bnd );
00653     obbox_side_2( x + i, y + i, nr, 1, c0, A, p->work, &p->dr, obnd );
00654     obbox_merge_2( bnd, obnd );
00655
00656     i = nr - 1;
00657     obbox_side_2( x, y, ns, nr, c0, A, p->work, &p->ds, obnd );
00658     obbox_merge_2( bnd, obnd );
00659     obbox_side_2( x + i, y + i, nr, nr, c0, A, p->work, &p->ds, obnd );
00660     obbox_merge_2( bnd, obnd );
00661 }
00662
00663 /* return bounds on u = A (x - c0)
00664    bnd[0] <= u_0 <= bnd[1]
00665    bnd[2] <= u_1 <= bnd[3]
00666    bnd[4] <= u_2 <= bnd[5] */
00667 void obbox_bnd_3( const obbox_data_3* p,
00668                   const realType* x,
00669                   const realType* y,
00670                   const realType* z,
00671                   const realType c0[3],
00672                   const realType A[9],
00673                   realType bnd[6] )
00674 {
00675     unsigned i, nr = p->dr.n, ns = p->ds.b.n, nt = p->dt.b.n;
00676     realType obnd[6];
00677
00678     i = nr * ns * ( nt - 1 );
00679     obbox_side_3( x, y, z, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, bnd );
00680     obbox_side_3( x + i, y + i, z + i, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, obnd );
00681     obbox_merge_3( bnd, obnd );
00682
00683     i = nr * ( ns - 1 );
00684     obbox_side_3( x, y, z, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd );
00685     obbox_merge_3( bnd, obnd );
00686     obbox_side_3( x + i, y + i, z + i, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd );
00687     obbox_merge_3( bnd, obnd );
00688
00689     i = nr - 1;
00690     obbox_side_3( x, y, z, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd );
00691     obbox_merge_3( bnd, obnd );
00692     obbox_side_3( x + i, y + i, z + i, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd );
00693     obbox_merge_3( bnd, obnd );
00694 }
00695
00696 void obbox_calc_2( const obbox_data_2* p, realType tol, const realType* x, const realType* y, obbox_2* b )
00697 {
00698     const realType zero[2] = { 0, 0 }, id[4] = { 1, 0, 0, 1 };
00699     realType c0[2], jac[4], inv[4], bnd[4], u0[2], d[2];
00700
00701     obbox_bnd_2( p, x, y, zero, id, b->axis_bnd );
00702     d[0] = b->axis_bnd[1] - b->axis_bnd[0];
00703     d[1] = b->axis_bnd[3] - b->axis_bnd[2];
00704     b->axis_bnd[0] -= tol * d[0], b->axis_bnd[1] += tol * d[0];
00705     b->axis_bnd[2] -= tol * d[1], b->axis_bnd[3] += tol * d[1];
00706
00707     c0[0] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, x, jac, p->work );
00708     c0[1] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, y, jac + 2, p->work );
00709     mat_inv_2( jac, inv );
00710
00711     obbox_bnd_2( p, x, y, c0, inv, bnd );
00712
00713     u0[0]   = ( bnd[0] + bnd[1] ) / 2;
00714     u0[1]   = ( bnd[2] + bnd[3] ) / 2;
00715     d[0]    = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) );
00716     d[1]    = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) );
00717     b->x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1];
00718     b->x[1] = c0[1] + jac[2] * u0[0] + jac[3] * u0[1];
00719     b->A[0] = d[0] * inv[0], b->A[1] = d[0] * inv[1];
00720     b->A[2] = d[1] * inv[2], b->A[3] = d[1] * inv[3];
00721 }
00722
00723 void obbox_calc_3( const obbox_data_3* p,
00724                    realType tol,
00725                    const realType* x,
00726                    const realType* y,
00727                    const realType* z,
00728                    obbox_3* b )
00729 {
00730     const realType zero[3] = { 0, 0 }, id[9] = { 1, 0, 0, 0, 1, 0, 0, 0, 1 };
00731     realType c0[3], jac[9], inv[9], bnd[6], u0[3], d[3];
00732
00733     obbox_bnd_3( p, x, y, z, zero, id, b->axis_bnd );
00734     d[0] = b->axis_bnd[1] - b->axis_bnd[0];
00735     d[1] = b->axis_bnd[3] - b->axis_bnd[2];
00736     d[2] = b->axis_bnd[5] - b->axis_bnd[4];
00737     b->axis_bnd[0] -= tol * d[0], b->axis_bnd[1] += tol * d[0];
00738     b->axis_bnd[2] -= tol * d[1], b->axis_bnd[3] += tol * d[1];
00739     b->axis_bnd[4] -= tol * d[2], b->axis_bnd[5] += tol * d[2];
00740
00741     c0[0] =
00742         tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, x, jac, p->work );
00743     c0[1] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, y, jac + 3,
00744                         p->work );
00745     c0[2] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, z, jac + 6,
00746                         p->work );
00747     mat_inv_3( jac, inv );
00748
00749     obbox_bnd_3( p, x, y, z, c0, inv, bnd );
00750
00751     u0[0]   = ( bnd[0] + bnd[1] ) / 2;
00752     u0[1]   = ( bnd[2] + bnd[3] ) / 2;
00753     u0[2]   = ( bnd[4] + bnd[5] ) / 2;
00754     d[0]    = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) );
00755     d[1]    = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) );
00756     d[2]    = 2 / ( ( 1 + tol ) * ( bnd[5] - bnd[4] ) );
00757     b->x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1] + jac[2] * u0[2];
00758     b->x[1] = c0[1] + jac[3] * u0[0] + jac[4] * u0[1] + jac[5] * u0[2];
00759     b->x[2] = c0[2] + jac[6] * u0[0] + jac[7] * u0[1] + jac[8] * u0[2];
00760     b->A[0] = d[0] * inv[0], b->A[1] = d[0] * inv[1], b->A[2] = d[0] * inv[2];
00761     b->A[3] = d[1] * inv[3], b->A[4] = d[1] * inv[4], b->A[5] = d[1] * inv[5];
00762     b->A[6] = d[2] * inv[6], b->A[7] = d[2] * inv[7], b->A[8] = d[2] * inv[8];
00763 }
00764
00765 /*--------------------------------------------------------------------------
00766    Point to Possible Elements Hashing
00767
00768    Initializing the data:
00769      unsigned nel;        // number of elements
00770      const unsigned n[3]; // number of nodes in r, s, t directions
00771      const realType *xm[3];   // n[0]*n[1]*n[2]*nel x,y,z coordinates
00772      realType tol = 0.01;     // how far point is allowed to be outside element
00773                           //   relative to element size
00774      unsigned max_size = n[0]*n[1]*n[2]*nel; // maximum size of hash table
00775
00776      hash_data_3 data;
00777      hash_build_3(&data, xm, n, nel, max_size, tol);
00778
00779    Using the data:
00780      realType x[3];   // point to find
00781
00782      unsigned index = hash_index_3(&data, x);
00783      unsigned i, b = data.offset[index], e = data.offset[index+1];
00784
00785      // point may be in elements
00786      //   data.offset[b], data.offset[b+1], ... , data.offset[e-1]
00787      //
00788      // list has maximum size data.max (e.g., e-b <= data.max)
00789
00790      for(i=b; i!=e; ++i) {
00791        unsigned el = data.offset[i];
00792        const obbox_3 *obb = &data.obb[el]; // bounding box data for element el
00793        ...
00794      }
00795
00796    When done:
00797      hash_free_3(&data);
00798
00799   --------------------------------------------------------------------------*/
00800
00801 static int ifloor( realType x )
00802 {
00803     /*
00804     int y = x;
00805     return (double)y > x ? y-1 : y;
00806     */
00807     return mbfloor( x );
00808 }
00809
00810 static int iceil( realType x )
00811 {
00812     /*
00813     int y = x;
00814     return (double)y < x ? y+1 : y;
00815     */
00816     return mbceil( x );
00817 }
00818
00819 static unsigned hash_index_helper( realType low, realType fac, unsigned n, realType x )
00820 {
00821     const int i = ifloor( ( x - low ) * fac );
00822     if( i < 0 ) return 0;
00823     return umin_2( i, n - 1 );
00824 }
00825
00826 static uint hash_index_2( const hash_data_2* p, const realType x[2] )
00827 {
00828     const unsigned n = p->hash_n;
00829     return (uint)hash_index_helper( p->bnd[2], p->fac[1], n, x[1] ) * n +
00830            hash_index_helper( p->bnd[0], p->fac[0], n, x[0] );
00831 }
00832
00833 static uint hash_index_3( const hash_data_3* p, const realType x[3] )
00834 {
00835     const unsigned n = p->hash_n;
00836     return ( (uint)hash_index_helper( p->bnd[4], p->fac[2], n, x[2] ) * n +
00837              hash_index_helper( p->bnd[2], p->fac[1], n, x[1] ) ) *
00838                n +
00839            hash_index_helper( p->bnd[0], p->fac[0], n, x[0] );
00840 }
00841
00842 static void hash_setfac_2( hash_data_2* p, unsigned n )
00843 {
00844     p->hash_n = n;
00845     p->fac[0] = n / ( p->bnd[1] - p->bnd[0] );
00846     p->fac[1] = n / ( p->bnd[3] - p->bnd[2] );
00847 }
00848
00849 static void hash_setfac_3( hash_data_3* p, unsigned n )
00850 {
00851     p->hash_n = n;
00852     p->fac[0] = n / ( p->bnd[1] - p->bnd[0] );
00853     p->fac[1] = n / ( p->bnd[3] - p->bnd[2] );
00854     p->fac[2] = n / ( p->bnd[5] - p->bnd[4] );
00855 }
00856
00857 static void hash_range_2( const hash_data_2* p, uint i, unsigned d, unsigned* ia, unsigned* ib )
00858 {
00859     const realType a  = p->obb[i].axis_bnd[d * 2];
00860     const realType b  = p->obb[i].axis_bnd[d * 2 + 1];
00861     const int i0      = ifloor( ( a - p->bnd[d * 2] ) * p->fac[d] );
00862     const unsigned i1 = iceil( ( b - p->bnd[d * 2] ) * p->fac[d] );
00863     *ia               = imax_2( 0, i0 );
00864     *ib               = imin_2( i1, p->hash_n );
00865     if( *ib == *ia ) ++( *ib );
00866 }
00867
00868 static void hash_range_3( const hash_data_3* p, uint i, unsigned d, unsigned* ia, unsigned* ib )
00869 {
00870     const realType a  = p->obb[i].axis_bnd[d * 2];
00871     const realType b  = p->obb[i].axis_bnd[d * 2 + 1];
00872     const int i0      = ifloor( ( a - p->bnd[d * 2] ) * p->fac[d] );
00873     const unsigned i1 = iceil( ( b - p->bnd[d * 2] ) * p->fac[d] );
00874     *ia               = imax_2( 0, i0 );
00875     *ib               = imin_2( i1, p->hash_n );
00876     if( *ib == *ia ) ++( *ib );
00877 }
00878
00879 static uint hash_count_2( hash_data_2* p, uint nel, unsigned n )
00880 {
00881     uint i, count = 0;
00882     hash_setfac_2( p, n );
00883     for( i = 0; i < nel; ++i )
00884     {
00885         unsigned ia, ib;
00886         uint ci;
00887         hash_range_2( p, i, 0, &ia, &ib );
00888         ci = ib - ia;
00889         hash_range_2( p, i, 1, &ia, &ib );
00890         ci *= ib - ia;
00891         count += ci;
00892     }
00893     return count;
00894 }
00895
00896 static uint hash_count_3( hash_data_3* p, uint nel, unsigned n )
00897 {
00898     uint i, count = 0;
00899     hash_setfac_3( p, n );
00900     for( i = 0; i < nel; ++i )
00901     {
00902         unsigned ia, ib;
00903         uint ci;
00904         hash_range_3( p, i, 0, &ia, &ib );
00905         ci = ib - ia;
00906         hash_range_3( p, i, 1, &ia, &ib );
00907         ci *= ib - ia;
00908         hash_range_3( p, i, 2, &ia, &ib );
00909         ci *= ib - ia;
00910         count += ci;
00911     }
00912     return count;
00913 }
00914
00915 static uint hash_opt_size_2( hash_data_2* p, uint nel, uint max_size )
00916 {
00917     unsigned nl = 1, nu = ceil( sqrt( max_size - nel ) );
00918     uint size_low = 2 + nel;
00919     while( nu - nl > 1 )
00920     {
00921         unsigned nm = nl + ( nu - nl ) / 2;
00922         uint size   = (uint)nm * nm + 1 + hash_count_2( p, nel, nm );
00923         if( size <= max_size )
00924             nl = nm, size_low = size;
00925         else
00926             nu = nm;
00927     }
00928     hash_setfac_2( p, nl );
00929     return size_low;
00930 }
00931
00932 static uint hash_opt_size_3( hash_data_3* p, uint nel, uint max_size )
00933 {
00934     unsigned nl = 1, nu = ceil( pow( max_size - nel, 1.0 / 3 ) );
00935     uint size_low = 2 + nel;
00936     while( nu - nl > 1 )
00937     {
00938         unsigned nm = nl + ( nu - nl ) / 2;
00939         uint size   = (uint)nm * nm * nm + 1 + hash_count_3( p, nel, nm );
00940         if( size <= max_size )
00941             nl = nm, size_low = size;
00942         else
00943             nu = nm;
00944     }
00945     hash_setfac_3( p, nl );
00946     return size_low;
00947 }
00948
00949 static void hash_getbb_2( hash_data_2* p, const realType* const elx[2], const unsigned n[2], uint nel, realType tol )
00950 {
00951     obbox_data_2* data;
00952     realType *z[2], *w[2];
00953     uint i;
00954     unsigned d;
00955     const unsigned nn = n[0] * n[1];
00956     unsigned int m[2];
00957     const realType* x[2];
00958
00959     for( i = 0; i < 2; ++i )
00960     {
00961         x[i] = elx[i];
00962         m[i] = 2 * n[i];
00963     }
00964
00965     z[0] = tmalloc( realType, 2 * ( n[0] + n[1] ) );
00966     w[0] = z[0] + n[0];
00967     z[1] = w[0] + n[0], w[1] = z[1] + n[1];
00968     for( d = 0; d < 2; ++d )
00969         lobatto_nodes( z[d], n[d] ), lobatto_weights( z[d], w[d], n[d] );
00970     data = obbox_setup_2( (const realType* const*)z, (const realType* const*)w, n, m );
00971     obbox_calc_2( data, tol, x[0], x[1], &p->obb[0] );
00972     memcpy( &p->bnd[0], (const realType*)&p->obb[0].axis_bnd[0], 4 * sizeof( realType ) );
00973     for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn )
00974     {
00975         obbox_calc_2( data, tol, x[0], x[1], &p->obb[i] );
00976         obbox_merge_2( &p->bnd[0], (const realType*)&p->obb[i].axis_bnd[0] );
00977     }
00978     obbox_free_2( data );
00979     free( z[0] );
00980 }
00981
00982 static void hash_getbb_3( hash_data_3* p, const realType* const elx[3], const unsigned n[3], uint nel, realType tol )
00983 {
00984     obbox_data_3* data;
00985     const realType* x[3];
00986     realType *z[3], *w[3];
00987     uint i;
00988     unsigned d;
00989     const unsigned nn = n[0] * n[1] * n[2];
00990     unsigned int m[3];
00991
00992     for( i = 0; i < 3; ++i )
00993     {
00994         x[i] = elx[i];
00995         m[i] = 2 * n[i];
00996     }
00997
00998     z[0] = tmalloc( realType, 2 * ( n[0] + n[1] + n[2] ) );
00999     w[0] = z[0] + n[0];
01000     for( d = 1; d < 3; ++d )
01001         z[d] = w[d - 1] + n[d - 1], w[d] = z[d] + n[d];
01002     for( d = 0; d < 3; ++d )
01003         lobatto_nodes( z[d], n[d] ), lobatto_weights( z[d], w[d], n[d] );
01004     data = obbox_setup_3( (const realType* const*)z, (const realType* const*)w, n, m );
01005     obbox_calc_3( data, tol, x[0], x[1], x[2], &p->obb[0] );
01006     memcpy( &p->bnd[0], (const realType*)&p->obb[0].axis_bnd[0], 6 * sizeof( realType ) );
01007     for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn, x[2] += nn )
01008     {
01009         obbox_calc_3( data, tol, x[0], x[1], x[2], &p->obb[i] );
01010         obbox_merge_3( &p->bnd[0], (const realType*)&p->obb[i].axis_bnd[0] );
01011     }
01012     obbox_free_3( data );
01013     free( z[0] );
01014 }
01015
01016 static void hash_build_2( hash_data_2* p,
01017                           const realType* const x[2],
01018                           const unsigned n[2],
01019                           uint nel,
01020                           uint max_hash_size,
01021                           realType tol )
01022 {
01023     uint i, el, size, hn2, sum;
01024     unsigned hn;
01025     unsigned* count;
01026     p->obb = tmalloc( obbox_2, nel );
01027     hash_getbb_2( p, x, n, nel, tol );
01028     size      = hash_opt_size_2( p, nel, max_hash_size );
01029     p->offset = tmalloc( uint, size );
01030     hn        = p->hash_n;
01031     hn2       = (uint)hn * hn;
01032     count     = tcalloc( unsigned, hn2 );
01033     for( el = 0; el < nel; ++el )
01034     {
01035         unsigned ia, ib, j, ja, jb;
01036         hash_range_2( p, el, 0, &ia, &ib );
01037         hash_range_2( p, el, 1, &ja, &jb );
01038         for( j = ja; j < jb; ++j )
01039             for( i = ia; i < ib; ++i )
01040                 ++count[(uint)j * hn + i];
01041     }
01042     sum = hn2 + 1, p->max = count[0];
01043     p->offset[0] = sum;
01044     for( i = 0; i < hn2; ++i )
01045     {
01046         if( count[i] > p->max ) p->max = count[i];
01047         sum += count[i];
01048         p->offset[i + 1] = sum;
01049     }
01050     for( el = 0; el < nel; ++el )
01051     {
01052         unsigned ia, ib, j, ja, jb;
01053         hash_range_2( p, el, 0, &ia, &ib );
01054         hash_range_2( p, el, 1, &ja, &jb );
01055         for( j = ja; j < jb; ++j )
01056             for( i = ia; i < ib; ++i )
01057             {
01058                 uint arrindex                                        = (uint)j * hn + i;
01059                 p->offset[p->offset[arrindex + 1] - count[arrindex]] = el;
01060                 --count[arrindex];
01061             }
01062     }
01063     free( count );
01064 }
01065
01066 static void hash_build_3( hash_data_3* p,
01067                           const realType* const x[3],
01068                           const unsigned n[3],
01069                           uint nel,
01070                           uint max_hash_size,
01071                           realType tol )
01072 {
01073     uint i, el, size, hn3, sum;
01074     unsigned hn;
01075     unsigned* count;
01076     p->obb = tmalloc( obbox_3, nel );
01077     hash_getbb_3( p, x, n, nel, tol );
01078     size      = hash_opt_size_3( p, nel, max_hash_size );
01079     p->offset = tmalloc( uint, size );
01080     hn        = p->hash_n;
01081     hn3       = (uint)hn * hn * hn;
01082     count     = tcalloc( unsigned, hn3 );
01083     for( el = 0; el < nel; ++el )
01084     {
01085         unsigned ia, ib, j, ja, jb, k, ka, kb;
01086         hash_range_3( p, el, 0, &ia, &ib );
01087         hash_range_3( p, el, 1, &ja, &jb );
01088         hash_range_3( p, el, 2, &ka, &kb );
01089         for( k = ka; k < kb; ++k )
01090             for( j = ja; j < jb; ++j )
01091                 for( i = ia; i < ib; ++i )
01092                     ++count[( (uint)k * hn + j ) * hn + i];
01093     }
01094     sum = hn3 + 1, p->max = count[0];
01095     p->offset[0] = sum;
01096     for( i = 0; i < hn3; ++i )
01097     {
01098         if( count[i] > p->max ) p->max = count[i];
01099         sum += count[i];
01100         p->offset[i + 1] = sum;
01101     }
01102     for( el = 0; el < nel; ++el )
01103     {
01104         unsigned ia, ib, j, ja, jb, k, ka, kb;
01105         hash_range_3( p, el, 0, &ia, &ib );
01106         hash_range_3( p, el, 1, &ja, &jb );
01107         hash_range_3( p, el, 2, &ka, &kb );
01108         for( k = ka; k < kb; ++k )
01109             for( j = ja; j < jb; ++j )
01110                 for( i = ia; i < ib; ++i )
01111                 {
01112                     uint arrindex                                        = ( (uint)k * hn + j ) * hn + i;
01113                     p->offset[p->offset[arrindex + 1] - count[arrindex]] = el;
01114                     --count[arrindex];
01115                 }
01116     }
01117     free( count );
01118 }
01119
01120 static void hash_free_2( hash_data_2* p )
01121 {
01122     free( p->obb );
01123     free( p->offset );
01124 }
01125
01126 static void hash_free_3( hash_data_3* p )
01127 {
01128     free( p->obb );
01129     free( p->offset );
01130 }
01131
01132 /*--------------------------------------------------------------------------
01133    Optimization algorithm to find a point within an element
01134
01135    Given x(r)  (as values of x,y,z at all Lobatto nodes) and x_star,
01136    find the r that minimizes || x_star - x(r) ||_2
01137
01138    As a minimization problem, the Newton step is
01139
01140                __ 3
01141      [ J^T J - >_ d=1  resid_d H_d ] dr = J^t resid
01142
01143    where resid = x_star - x(r), J = [ dx_i/dr_j ],
01144    and H_d = [ d^2 x_d/dr_i dr_j ].
01145
01146    This is the appropriate step to take whenever constraints are active,
01147    and the current iterate is on a boundary of the element. When the current
01148    iterate is inside, J is square ( dim r = dim x ), resid will become small,
01149    and the step
01150
01151      J dr = resid
01152
01153    may be used instead, still giving quadratic convergence.
01154
01155
01156    Names use a _3 suffix for 3-d and _2 for 2-d.
01157    The routines require an initialized lagrange_data array as input:
01158      unsigned d, n[3] = { ... };
01159      realType *z[3] = { tmalloc(realType, n[0]), ... };
01160      for(d=0;d<3;++d) lobatto_nodes(z[d],n[d]);
01161
01162      lagrange_data ld[3];
01163      for(d=0;d<3;++d) lagrange_setup(&ld[d],z[d],n[d]);
01164
01165    Initialization:
01166      opt_data_3 data;
01167      opt_alloc_3(&data, ld);
01168
01169    Use:
01170      const realType *xm[3];  // 3 pointers, each to n[0]*n[1]*n[2] realTypes
01171                          //   giving the nodal x, y, or z coordinates
01172
01173      const realType x_star[3] = { ... };  // point to find
01174      realType r[3] = { 0,0,0 };           // initial guess with
01175      unsigned c = opt_no_constraints_3;   //   these constraints active
01176
01177      realType dist = opt_findpt_3(&data,xm,x_star,r,&c);
01178      // minimizer is r with constraints c; 2-norm of resid is dist
01179
01180    Clean-up:
01181      opt_free_3(&data);
01182
01183      for(d=0;d<3;++d) lagrange_free(&ld[d]);
01184      for(d=0;d<3;++d) free(z[d]);
01185
01186    The constraint number works as follows. Let cr be the constraints
01187    on the r variable:
01188       cr = 0       r fixed at -1
01189       cr = 1       r not fixed
01190       cr = 2       r fixed at 1
01191    Then the constraint number is (ct*3+cs)*3+cr
01192
01193   --------------------------------------------------------------------------*/
01194
01195 /* how many directions are constrained? */
01196 static const char opt_constr_num_2[9]  = { 2, 1, 2, 1, 0, 1, 2, 1, 2 };
01197 static const char opt_constr_num_3[27] = { 3, 2, 3, 2, 1, 2, 3, 2, 3, 2, 1, 2, 1, 0,
01198                                            1, 2, 1, 2, 3, 2, 3, 2, 1, 2, 3, 2, 3 };
01199
01200 /* which direction is constrained? */
01201 /* static const char opt_constr_dir_2[9] = {-1, 1,-1,  0,-1, 0, -1, 1,-1}; */
01202 static const char opt_constr_dir_3[27] = { -1, -1, -1, -1, 2,  -1, -1, -1, -1, -1, 1,  -1, 0, -1,
01203                                            0,  -1, 1,  -1, -1, -1, -1, -1, 2,  -1, -1, -1, -1 };
01204
01205 /* which direction is not constrained? */
01206 static const char opt_constr_not[27] = { -1, 0, -1, 1, -1, 1, -1, 0, -1, 2, -1, 2, -1, -1,
01207                                          -1, 2, -1, 2, -1, 0, -1, 1, -1, 1, -1, 0, -1 };
01208
01209 static const char opt_constr_wide[27] = { 0x00, 0x01, 0x02, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a,
01210                                           0x10, 0x11, 0x12, 0x14, 0x15, 0x16, 0x18, 0x19, 0x1a,
01211                                           0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a };
01212
01213 static const unsigned opt_other1_3[3] = { 1, 0, 0 }, opt_other2_3[3] = { 2, 2, 1 };
01214
01215 static unsigned opt_constr( unsigned constraints, unsigned d )
01216 {
01217     return ( opt_constr_wide[constraints] >> ( d * 2 ) ) & 3;
01218 }
01219
01220 static void opt_constr_unpack_2( unsigned constraints, unsigned* c )
01221 {
01222     const char cw = opt_constr_wide[constraints];
01223     c[0]          = cw & 3;
01224     c[1]          = cw >> 2;
01225 }
01226
01227 static void opt_constr_unpack_3( unsigned constraints, unsigned* c )
01228 {
01229     const char cw = opt_constr_wide[constraints];
01230     c[0]          = cw & 3;
01231     c[1]          = ( cw >> 2 ) & 3;
01232     c[2]          = cw >> 4;
01233 }
01234
01235 static unsigned opt_constr_pack_2( const unsigned* c )
01236 {
01237     return c[1] * 3 + c[0];
01238 }
01239
01240 static unsigned opt_constr_pack_3( const unsigned* c )
01241 {
01242     return ( c[2] * 3 + c[1] ) * 3 + c[0];
01243 }
01244
01245 /*--------------------------------------------------------------------------
01246
01247    3 - D
01248
01249   --------------------------------------------------------------------------*/
01250
01251 void opt_alloc_3( opt_data_3* p, lagrange_data* ld )
01252 {
01253     const unsigned nr = ld[0].n, ns = ld[1].n, nt = ld[2].n, nf = umax_3( nr * ns, nr * nt, ns * nt ),
01254                    ne = umax_3( nr, ns, nt ), nw = 2 * ns * nt + 3 * ns;
01255     p->size[0]   = 1;
01256     p->size[1]   = nr;
01257     p->size[2]   = nr * ns;
01258     p->size[3]   = p->size[2] * nt;
01259     p->ld        = ld;
01260     p->work      = tmalloc( realType, 6 * nf + 9 * ne + nw );
01261     p->fd.x[0]   = p->work + nw;
01262     p->fd.x[1]   = p->fd.x[0] + nf;
01263     p->fd.x[2]   = p->fd.x[1] + nf;
01264     p->fd.fdn[0] = p->fd.x[2] + nf;
01265     p->fd.fdn[1] = p->fd.fdn[0] + nf;
01266     p->fd.fdn[2] = p->fd.fdn[1] + nf;
01267     p->ed.x[0]   = p->fd.fdn[2] + nf;
01268     p->ed.x[1]   = p->ed.x[0] + ne;
01269     p->ed.x[2]   = p->ed.x[1] + ne;
01270     p->ed.fd1[0] = p->ed.x[2] + ne;
01271     p->ed.fd1[1] = p->ed.fd1[0] + ne;
01272     p->ed.fd1[2] = p->ed.fd1[1] + ne;
01273     p->ed.fd2[0] = p->ed.fd1[2] + ne;
01274     p->ed.fd2[1] = p->ed.fd2[0] + ne;
01275     p->ed.fd2[2] = p->ed.fd2[1] + ne;
01276 }
01277
01278 void opt_free_3( opt_data_3* p )
01279 {
01280     free( p->work );
01281 }
01282
01283 static void opt_vol_set_3( opt_data_3* p, const realType r[3] )
01284 {
01285     lagrange_1( &p->ld[0], r[0] );
01286     lagrange_1( &p->ld[1], r[1] );
01287     lagrange_1( &p->ld[2], r[2] );
01288 }
01289
01290 /* work holds 2*ns*nt + 3*ns realTypes */
01291 static void opt_vol_intp_3( opt_data_3* p )
01292 {
01293     unsigned d;
01294     const lagrange_data* ld = p->ld;
01295
01296     for( d = 0; d < 3; ++d )
01297         p->x[d] = tensor_ig3( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, ld[2].J, ld[2].D, ld[2].n,
01298                               p->elx[d], &p->jac[d * 3], p->work );
01299 }
01300
01301 void opt_vol_set_intp_3( opt_data_3* p, const realType r[3] )
01302 {
01303     opt_vol_set_3( p, r );
01304     opt_vol_intp_3( p );
01305 }
01306
01307 static void opt_face_proj_3( opt_data_3* p )
01308 {
01309     unsigned d, off = 0;
01310     const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, so = p->size[d2] - p->size[d1 + 1], s1 = p->size[d1],
01311                    sn = p->size[dn], n1 = p->ld[d1].n, n2 = p->ld[d2].n, nn = p->ld[dn].n;
01312     const realType* D = p->ld[dn].D_z0;
01313     if( opt_constr( p->fd.constraints, dn ) == 2 ) off = p->size[dn + 1] - p->size[dn], D = p->ld[dn].D_zn;
01314     for( d = 0; d < 3; ++d )
01315     {
01316         unsigned i, j, k, arrindex = 0;
01317         const realType* in = p->elx[d] + off;
01318         for( j = n2; j; --j, in += so )
01319             for( i = n1; i; --i, ++arrindex, in += s1 )
01320             {
01321                 const realType* ind  = in - off;
01322                 realType* fdn        = &p->fd.fdn[d][arrindex];
01323                 p->fd.x[d][arrindex] = *in;
01324                 *fdn                 = 0;
01325                 for( k = 0; k < nn; ++k, ind += sn )
01326                     *fdn += *ind * D[k];
01327             }
01328     }
01329 }
01330
01331 static void opt_face_set_3( opt_data_3* p, const realType r[3], unsigned constr )
01332 {
01333     if( p->fd.constraints != constr )
01334     {
01335         p->fd.constraints = constr;
01336         p->fd.dn          = opt_constr_dir_3[constr];
01337         p->fd.d1          = opt_other1_3[p->fd.dn];
01338         p->fd.d2          = opt_other2_3[p->fd.dn];
01339         opt_face_proj_3( p );
01340     }
01341     lagrange_1( &p->ld[p->fd.d1], r[p->fd.d1] );
01342     lagrange_1( &p->ld[p->fd.d2], r[p->fd.d2] );
01343 }
01344
01345 /* work holds 2*ld[d2].n realTypes */
01346 static void opt_face_intp_3( opt_data_3* p )
01347 {
01348     unsigned d;
01349     const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[d1].n, n2 = p->ld[d2].n;
01350     const realType *J1 = p->ld[d1].J, *J2 = p->ld[d2].J, *D1 = p->ld[d1].D, *D2 = p->ld[d2].D;
01351
01352     for( d = 0; d < 3; ++d )
01353     {
01354         realType g[2];
01355         p->x[d]            = tensor_ig2( J1, D1, n1, J2, D2, n2, p->fd.x[d], &g[0], p->work );
01356         p->jac[d * 3 + d1] = g[0];
01357         p->jac[d * 3 + d2] = g[1];
01358         p->jac[d * 3 + dn] = tensor_i2( J1, n1, J2, n2, p->fd.fdn[d], p->work );
01359     }
01360 }
01361
01362 static void opt_face_set_intp_3( opt_data_3* p, const realType r[3], unsigned constr )
01363 {
01364     opt_face_set_3( p, r, constr );
01365     opt_face_intp_3( p );
01366 }
01367
01368 static void opt_face_hess_3( opt_data_3* p, realType hess[9] )
01369 {
01370     unsigned d;
01371     const unsigned d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[d1].n, n2 = p->ld[d2].n;
01372     const realType *J1 = p->ld[d1].J, *J2 = p->ld[d2].J, *D1 = p->ld[d1].D, *D2 = p->ld[d2].D, *S1 = p->ld[d1].D2,
01373                    *S2 = p->ld[d2].D2;
01374
01375     lagrange_2u( &p->ld[d1] );
01376     lagrange_2u( &p->ld[d2] );
01377
01378     for( d = 0; d < 3; ++d )
01379     {
01380         (void)tensor_ig2( J1, S1, n1, J2, S2, n2, p->fd.x[d], hess + d * 3, p->work );
01381         hess[d * 3 + 0] = tensor_i2( S1, n1, J2, n2, p->fd.x[d], p->work );
01382         hess[d * 3 + 1] = tensor_i2( J1, n1, S2, n2, p->fd.x[d], p->work );
01383         hess[d * 3 + 2] = tensor_i2( D1, n1, D2, n2, p->fd.x[d], p->work );
01384     }
01385 }
01386
01387 static void opt_edge_proj_3( opt_data_3* p )
01388 {
01389     unsigned d, off, off1 = 0, off2 = 0;
01390     const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, se = p->size[de], s1 = p->size[d1], s2 = p->size[d2],
01391                    ne = p->ld[de].n, n1 = p->ld[d1].n, n2 = p->ld[d2].n;
01392     const realType *fD1, *fD2;
01393     if( opt_constr( p->ed.constraints, d1 ) == 0 )
01394         fD1 = p->ld[d1].D_z0;
01395     else
01396         fD1 = p->ld[d1].D_zn, off1 = p->size[d1 + 1] - p->size[d1];
01397     if( opt_constr( p->ed.constraints, d2 ) == 0 )
01398         fD2 = p->ld[d2].D_z0;
01399     else
01400         fD2 = p->ld[d2].D_zn, off2 = p->size[d2 + 1] - p->size[d2];
01401     off = off1 + off2;
01402     for( d = 0; d < 3; ++d )
01403     {
01404         unsigned i, j;
01405         const realType* in = p->elx[d] + off;
01406         for( i = 0; i < ne; ++i, in += se )
01407         {
01408             const realType *in1 = in - off1, *in2 = in - off2;
01409             realType *fd1 = &p->ed.fd1[d][i], *fd2 = &p->ed.fd2[d][i];
01410             p->ed.x[d][i] = *in;
01411             *fd1 = *fd2 = 0;
01412             for( j = 0; j < n1; ++j, in1 += s1 )
01413                 *fd1 += *in1 * fD1[j];
01414             for( j = 0; j < n2; ++j, in2 += s2 )
01415                 *fd2 += *in2 * fD2[j];
01416         }
01417     }
01418 }
01419
01420 static void opt_edge_set_3( opt_data_3* p, const realType r[3], unsigned constr )
01421 {
01422     if( p->ed.constraints != constr )
01423     {
01424         p->ed.constraints = constr;
01425         p->ed.de          = opt_constr_not[constr];
01426         p->ed.d1          = opt_other1_3[p->ed.de];
01427         p->ed.d2          = opt_other2_3[p->ed.de];
01428         opt_edge_proj_3( p );
01429     }
01430     lagrange_1( &p->ld[p->ed.de], r[p->ed.de] );
01431 }
01432
01433 static void opt_edge_intp_3( opt_data_3* p )
01434 {
01435     unsigned d;
01436     const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, n = p->ld[de].n;
01437     const realType *J = p->ld[de].J, *D = p->ld[de].D;
01438
01439     for( d = 0; d < 3; ++d )
01440     {
01441         p->x[d]            = tensor_ig1( J, D, n, p->ed.x[d], &p->jac[d * 3 + de] );
01442         p->jac[d * 3 + d1] = tensor_i1( J, n, p->ed.fd1[d] );
01443         p->jac[d * 3 + d2] = tensor_i1( J, n, p->ed.fd2[d] );
01444     }
01445 }
01446
01447 static void opt_edge_set_intp_3( opt_data_3* p, const realType r[3], unsigned constr )
01448 {
01449     opt_edge_set_3( p, r, constr );
01450     opt_edge_intp_3( p );
01451 }
01452
01453 static void opt_edge_hess_3( opt_data_3* p, realType hess[3] )
01454 {
01455     unsigned d;
01456     const unsigned de = p->ed.de, n = p->ld[de].n;
01457     const realType* D2 = p->ld[de].D2;
01458     lagrange_2u( &p->ld[de] );
01459     for( d = 0; d < 3; ++d )
01460         hess[d] = tensor_i1( D2, n, p->ed.x[d] );
01461 }
01462
01463 static void opt_point_proj_3( opt_data_3* p )
01464 {
01465     unsigned off[3], offt, d, c[3];
01466     const realType* fD[3];
01467     opt_constr_unpack_3( p->pd.constraints, c );
01468     for( d = 0; d < 3; ++d )
01469         if( c[d] == 0 )
01470             fD[d] = p->ld[d].D_z0, off[d] = 0;
01471         else
01472             fD[d] = p->ld[d].D_zn, off[d] = p->size[d + 1] - p->size[d];
01473     offt = off[0] + off[1] + off[2];
01474     for( d = 0; d < 9; ++d )
01475         p->pd.jac[d] = 0;
01476     for( d = 0; d < 3; ++d )
01477     {
01478         unsigned i, j;
01479         p->pd.x[d] = p->elx[d][offt];
01480         for( i = 0; i < 3; ++i )
01481         {
01482             const realType* in = p->elx[d] + offt - off[i];
01483             for( j = 0; j < p->ld[i].n; ++j, in += p->size[i] )
01484                 p->pd.jac[d * 3 + i] += *in * fD[i][j];
01485         }
01486     }
01487 }
01488
01489 static void opt_point_set_3( opt_data_3* p, unsigned constr )
01490 {
01491     if( p->pd.constraints != constr )
01492     {
01493         p->pd.constraints = constr;
01494         opt_point_proj_3( p );
01495     }
01496 }
01497
01498 static void opt_point_intp_3( opt_data_3* p )
01499 {
01500     memcpy( p->x, p->pd.x, 3 * sizeof( realType ) );
01501     memcpy( p->jac, p->pd.jac, 9 * sizeof( realType ) );
01502 }
01503
01504 static void opt_point_set_intp_3( opt_data_3* p, unsigned constr )
01505 {
01506     opt_point_set_3( p, constr );
01507     opt_point_intp_3( p );
01508 }
01509
01510 #define DIAGNOSTICS 0
01511
01512 double opt_findpt_3( opt_data_3* p,
01513                      const realType* const elx[3],
01514                      const realType xstar[3],
01515                      realType r[3],
01516                      unsigned* constr )
01517 {
01518     realType dr[3], resid[3], steep[3];
01519
01520     unsigned c = *constr, ac, d, cc[3], step = 0;
01521
01522     p->elx[0] = elx[0], p->elx[1] = elx[1], p->elx[2] = elx[2];
01523
01524     p->fd.constraints = opt_no_constraints_3;
01525     p->ed.constraints = opt_no_constraints_3;
01526     p->pd.constraints = opt_no_constraints_3;
01527
01528 #if DIAGNOSTICS
01529     printf( "opt_findpt: xstar = %g, %g, %g\n", xstar[0], xstar[1], xstar[2] );
01530 #endif
01531
01532     do
01533     {
01534         ++step;
01535         if( step == 50 ) /*fail("%s: opt_findpt_3 did not converge\n",__FILE__);*/
01536             return 1.e+30;
01537 #if DIAGNOSTICS
01538         printf( "  iteration %u\n", step );
01539         printf( "    %d constraint(s) active\n", (int)opt_constr_num_3[c] );
01540 #endif
01541         /* update face/edge/point data if necessary,
01542            and evaluate x(r) as well as the jacobian */
01543         switch( opt_constr_num_3[c] )
01544         {
01545             case 0:
01546                 opt_vol_set_intp_3( p, r );
01547                 break;
01548             case 1:
01549                 opt_face_set_intp_3( p, r, c );
01550                 break;
01551             case 2:
01552                 opt_edge_set_intp_3( p, r, c );
01553                 break;
01554             case 3:
01555                 opt_point_set_intp_3( p, c );
01556                 break;
01557         }
01558 #if DIAGNOSTICS
01559         printf( "    r = %g, %g, %g\n", r[0], r[1], r[2] );
01560         printf( "    x = %g, %g, %g\n", p->x[0], p->x[1], p->x[2] );
01561 #endif
01562         /* compute residual */
01563         for( d = 0; d < 3; ++d )
01564             resid[d] = xstar[d] - p->x[d];
01565 #if DIAGNOSTICS
01566         printf( "    resid = %g, %g, %g\n", resid[0], resid[1], resid[2] );
01567         printf( "    2-norm = %g\n", r2norm_3( resid[0], resid[1], resid[2] ) );
01568 #endif
01569         /* check constraints against steepest descent direction */
01570         ac = c;
01571         if( opt_constr_num_3[c] )
01572         {
01573             opt_constr_unpack_3( c, cc );
01574             mat_app_3c( steep, p->jac, resid ); /* steepest descent = J^T r */
01575 #if DIAGNOSTICS
01576             printf( "    steepest descent = %g, %g, %g\n", steep[0], steep[1], steep[2] );
01577 #endif
01578             for( d = 0; d < 3; ++d )
01579                 if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1;
01580             ac = opt_constr_pack_3( cc );
01581         }
01582         /* update face/edge/point data if necessary */
01583         if( ac != c )
01584         {
01585             c = ac;
01586 #if DIAGNOSTICS
01587             printf( "    relaxed to %d constraints\n", (int)opt_constr_num_3[c] );
01588 #endif
01589             switch( opt_constr_num_3[c] )
01590             {
01591                 case 1:
01592                     opt_face_set_3( p, r, c );
01593                     break;
01594                 case 2:
01595                     opt_edge_set_3( p, r, c );
01596                     break;
01597                 case 3:
01598                     opt_point_set_3( p, c );
01599                     break;
01600             }
01601         }
01602         /* compute Newton step */
01603         switch( opt_constr_num_3[c] )
01604         {
01605             case 0:
01606                 tinyla_solve_3( dr, p->jac, resid );
01607                 break;
01608             case 1: {
01609                 const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2;
01610                 realType A[4], H[9];
01611                 const realType* J = p->jac;
01612                 opt_face_hess_3( p, H );
01613                 A[0] = J[d1] * J[d1] + J[3 + d1] * J[3 + d1] + J[6 + d1] * J[6 + d1];
01614                 A[1] = J[d2] * J[d2] + J[3 + d2] * J[3 + d2] + J[6 + d2] * J[6 + d2];
01615                 A[2] = J[d1] * J[d2] + J[3 + d1] * J[3 + d2] + J[6 + d1] * J[6 + d2];
01616                 A[0] -= resid[0] * H[0] + resid[1] * H[3] + resid[2] * H[6];
01617                 A[1] -= resid[0] * H[1] + resid[1] * H[4] + resid[2] * H[7];
01618                 A[2] -= resid[0] * H[2] + resid[1] * H[5] + resid[2] * H[8];
01619                 tinyla_solve_sym_2( &dr[d1], &dr[d2], A, steep[d1], steep[d2] );
01620                 dr[dn] = 0;
01621             }
01622             break;
01623             case 2: {
01624                 const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2;
01625                 realType fac, H[3];
01626                 const realType* J = p->jac + de;
01627                 opt_edge_hess_3( p, H );
01628                 fac = J[0] * J[0] + J[3] * J[3] + J[6] * J[6] - ( resid[0] * H[0] + resid[1] * H[1] + resid[2] * H[2] );
01629                 dr[de] = steep[de] / fac;
01630                 dr[d1] = 0, dr[d2] = 0;
01631             }
01632             break;
01633             case 3:
01634                 dr[0] = dr[1] = dr[2] = 0;
01635                 break;
01636         }
01637 #if DIAGNOSTICS
01638         printf( "    dr = %g, %g, %g\n", dr[0], dr[1], dr[2] );
01639 #endif
01640         /* project new iteration onto [-1,1]^3 */
01641         opt_constr_unpack_3( c, cc );
01642         for( d = 0; d < 3; ++d )
01643         {
01644             if( cc[d] != 1 ) continue;
01645             r[d] += dr[d];
01646             if( r[d] <= -1 )
01647                 dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0;
01648             else if( r[d] >= 1 )
01649                 dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2;
01650         }
01651         c = opt_constr_pack_3( cc );
01652     } while( r1norm_3( dr[0], dr[1], dr[2] ) > 3 * MOAB_POLY_EPS );
01653     *constr = c;
01654 #if 0
01655   printf("opt_findpt_3 converged in %u iterations\n", step);
01656 #endif
01657     return r2norm_3( resid[0], resid[1], resid[2] );
01658 }
01659
01660 #undef DIAGNOSTICS
01661
01662 void opt_alloc_2( opt_data_2* p, lagrange_data* ld )
01663 {
01664     const unsigned nr = ld[0].n, ns = ld[1].n, ne = umax_2( nr, ns ), nw = 2 * ns;
01665     p->size[0]   = 1;
01666     p->size[1]   = nr;
01667     p->size[2]   = nr * ns;
01668     p->ld        = ld;
01669     p->work      = tmalloc( realType, 4 * ne + nw );
01670     p->ed.x[0]   = p->work + nw;
01671     p->ed.x[1]   = p->ed.x[0] + ne;
01672     p->ed.fd1[0] = p->ed.x[1] + ne;
01673     p->ed.fd1[1] = p->ed.fd1[0] + ne;
01674 }
01675
01676 void opt_free_2( opt_data_2* p )
01677 {
01678     free( p->work );
01679 }
01680
01681 static void opt_area_set_2( opt_data_2* p, const realType r[2] )
01682 {
01683     lagrange_1( &p->ld[0], r[0] );
01684     lagrange_1( &p->ld[1], r[1] );
01685 }
01686
01687 /* work holds 2*ns realTypes */
01688 static void opt_area_intp_2( opt_data_2* p )
01689 {
01690     unsigned d;
01691     const lagrange_data* ld = p->ld;
01692
01693     for( d = 0; d < 2; ++d )
01694         p->x[d] =
01695             tensor_ig2( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, p->elx[d], &p->jac[d * 2], p->work );
01696 }
01697
01698 static void opt_area_set_intp_2( opt_data_2* p, const realType r[2] )
01699 {
01700     opt_area_set_2( p, r );
01701     opt_area_intp_2( p );
01702 }
01703
01704 static void opt_edge_proj_2( opt_data_2* p )
01705 {
01706     unsigned d, off = 0;
01707     const unsigned de = p->ed.de, d1 = p->ed.d1, se = p->size[de], s1 = p->size[d1], ne = p->ld[de].n, n1 = p->ld[d1].n;
01708     const realType* fD1;
01709     if( opt_constr( p->ed.constraints, d1 ) == 0 )
01710         fD1 = p->ld[d1].D_z0;
01711     else
01712         fD1 = p->ld[d1].D_zn, off = p->size[d1 + 1] - p->size[d1];
01713     for( d = 0; d < 2; ++d )
01714     {
01715         unsigned i, j;
01716         const realType* in = p->elx[d] + off;
01717         for( i = 0; i < ne; ++i, in += se )
01718         {
01719             const realType* in1 = in - off;
01720             realType* fd1       = &p->ed.fd1[d][i];
01721             p->ed.x[d][i]       = *in;
01722             *fd1                = 0;
01723             for( j = 0; j < n1; ++j, in1 += s1 )
01724                 *fd1 += *in1 * fD1[j];
01725         }
01726     }
01727 }
01728
01729 static void opt_edge_set_2( opt_data_2* p, const realType r[2], unsigned constr )
01730 {
01731     if( p->ed.constraints != constr )
01732     {
01733         p->ed.constraints = constr;
01734         p->ed.de          = opt_constr_not[constr];
01735         p->ed.d1          = 1 - p->ed.de;
01736         opt_edge_proj_2( p );
01737     }
01738     lagrange_1( &p->ld[p->ed.de], r[p->ed.de] );
01739 }
01740
01741 static void opt_edge_intp_2( opt_data_2* p )
01742 {
01743     unsigned d;
01744     const unsigned de = p->ed.de, d1 = p->ed.d1, n = p->ld[de].n;
01745     const realType *J = p->ld[de].J, *D = p->ld[de].D;
01746     for( d = 0; d < 2; ++d )
01747     {
01748         p->x[d]            = tensor_ig1( J, D, n, p->ed.x[d], &p->jac[d * 2 + de] );
01749         p->jac[d * 2 + d1] = tensor_i1( J, n, p->ed.fd1[d] );
01750     }
01751 }
01752
01753 static void opt_edge_set_intp_2( opt_data_2* p, const realType r[2], unsigned constr )
01754 {
01755     opt_edge_set_2( p, r, constr );
01756     opt_edge_intp_2( p );
01757 }
01758
01759 static void opt_edge_hess_2( opt_data_2* p, realType hess[2] )
01760 {
01761     unsigned d;
01762     const unsigned de = p->ed.de, n = p->ld[de].n;
01763     const realType* D2 = p->ld[de].D2;
01764     lagrange_2u( &p->ld[de] );
01765     for( d = 0; d < 2; ++d )
01766         hess[d] = tensor_i1( D2, n, p->ed.x[d] );
01767 }
01768
01769 static void opt_point_proj_2( opt_data_2* p )
01770 {
01771     unsigned off[2], offt, d, c[2];
01772     const realType* fD[2];
01773     opt_constr_unpack_2( p->pd.constraints, c );
01774     for( d = 0; d < 2; ++d )
01775         if( c[d] == 0 )
01776             fD[d] = p->ld[d].D_z0, off[d] = 0;
01777         else
01778             fD[d] = p->ld[d].D_zn, off[d] = p->size[d + 1] - p->size[d];
01779     offt = off[0] + off[1];
01780     for( d = 0; d < 4; ++d )
01781         p->pd.jac[d] = 0;
01782     for( d = 0; d < 2; ++d )
01783     {
01784         unsigned i, j;
01785         p->pd.x[d] = p->elx[d][offt];
01786         for( i = 0; i < 2; ++i )
01787         {
01788             const realType* in = p->elx[d] + offt - off[i];
01789             for( j = 0; j < p->ld[i].n; ++j, in += p->size[i] )
01790                 p->pd.jac[d * 2 + i] += *in * fD[i][j];
01791         }
01792     }
01793 }
01794
01795 static void opt_point_set_2( opt_data_2* p, unsigned constr )
01796 {
01797     if( p->pd.constraints != constr )
01798     {
01799         p->pd.constraints = constr;
01800         opt_point_proj_2( p );
01801     }
01802 }
01803
01804 static void opt_point_intp_2( opt_data_2* p )
01805 {
01806     memcpy( p->x, p->pd.x, 2 * sizeof( realType ) );
01807     memcpy( p->jac, p->pd.jac, 4 * sizeof( realType ) );
01808 }
01809
01810 static void opt_point_set_intp_2( opt_data_2* p, unsigned constr )
01811 {
01812     opt_point_set_2( p, constr );
01813     opt_point_intp_2( p );
01814 }
01815
01816 #define DIAGNOSTICS 0
01817
01818 double opt_findpt_2( opt_data_2* p,
01819                      const realType* const elx[2],
01820                      const realType xstar[2],
01821                      realType r[2],
01822                      unsigned* constr )
01823 {
01824     realType dr[2], resid[2], steep[2];
01825
01826     unsigned c = *constr, ac, d, cc[2], step = 0;
01827
01828     p->elx[0] = elx[0], p->elx[1] = elx[1];
01829
01830     p->ed.constraints = opt_no_constraints_2;
01831     p->pd.constraints = opt_no_constraints_2;
01832
01833 #if DIAGNOSTICS
01834     printf( "opt_findpt: xstar = %g, %g\n", xstar[0], xstar[1] );
01835 #endif
01836
01837     do
01838     {
01839         ++step;
01840         if( step == 50 ) /*fail("%s: opt_findpt_2 did not converge\n",__FILE__);*/
01841             return 1.e+30;
01842 #if DIAGNOSTICS
01843         printf( "  iteration %u\n", step );
01844         printf( "    %d constraint(s) active\n", (int)opt_constr_num_2[c] );
01845 #endif
01846         /* update face/edge/point data if necessary,
01847            and evaluate x(r) as well as the jacobian */
01848         switch( opt_constr_num_2[c] )
01849         {
01850             case 0:
01851                 opt_area_set_intp_2( p, r );
01852                 break;
01853             case 1:
01854                 opt_edge_set_intp_2( p, r, c );
01855                 break;
01856             case 2:
01857                 opt_point_set_intp_2( p, c );
01858                 break;
01859         }
01860 #if DIAGNOSTICS
01861         printf( "    r = %g, %g\n", r[0], r[1] );
01862         printf( "    x = %g, %g\n", p->x[0], p->x[1] );
01863 #endif
01864         /* compute residual */
01865         for( d = 0; d < 2; ++d )
01866             resid[d] = xstar[d] - p->x[d];
01867 #if DIAGNOSTICS
01868         printf( "    resid = %g, %g\n", resid[0], resid[1] );
01869         printf( "    2-norm = %g\n", r2norm_2( resid[0], resid[1] ) );
01870 #endif
01871         /* check constraints against steepest descent direction */
01872         ac = c;
01873         if( opt_constr_num_2[c] )
01874         {
01875             opt_constr_unpack_2( c, cc );
01876             mat_app_2c( steep, p->jac, resid ); /* steepest descent = J^T r */
01877 #if DIAGNOSTICS
01878             printf( "    steepest descent = %g, %g\n", steep[0], steep[1] );
01879 #endif
01880             for( d = 0; d < 2; ++d )
01881                 if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1;
01882             ac = opt_constr_pack_2( cc );
01883         }
01884         /* update face/edge/point data if necessary */
01885         if( ac != c )
01886         {
01887             c = ac;
01888 #if DIAGNOSTICS
01889             printf( "    relaxed to %d constraints\n", (int)opt_constr_num_2[c] );
01890 #endif
01891             switch( opt_constr_num_2[c] )
01892             {
01893                 case 1:
01894                     opt_edge_set_2( p, r, c );
01895                     break;
01896                 case 2:
01897                     opt_point_set_2( p, c );
01898                     break;
01899             }
01900         }
01901         /* compute Newton step */
01902         switch( opt_constr_num_2[c] )
01903         {
01904             case 0:
01905                 tinyla_solve_2( dr, p->jac, resid );
01906                 break;
01907             case 1: {
01908                 const unsigned de = p->ed.de, d1 = p->ed.d1;
01909                 realType fac, H[2];
01910                 const realType* J = p->jac + de;
01911                 opt_edge_hess_2( p, H );
01912                 fac    = J[0] * J[0] + J[2] * J[2] - ( resid[0] * H[0] + resid[1] * H[1] );
01913                 dr[de] = steep[de] / fac;
01914                 dr[d1] = 0;
01915             }
01916             break;
01917             case 2:
01918                 dr[0] = dr[1] = 0;
01919                 break;
01920         }
01921 #if DIAGNOSTICS
01922         printf( "    dr = %g, %g\n", dr[0], dr[1] );
01923 #endif
01924         /* project new iteration onto [-1,1]^2 */
01925         opt_constr_unpack_2( c, cc );
01926         for( d = 0; d < 2; ++d )
01927         {
01928             if( cc[d] != 1 ) continue;
01929             r[d] += dr[d];
01930             if( r[d] <= -1 )
01931                 dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0;
01932             else if( r[d] >= 1 )
01933                 dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2;
01934         }
01935         c = opt_constr_pack_2( cc );
01936     } while( r1norm_2( dr[0], dr[1] ) > 2 * MOAB_POLY_EPS );
01937     *constr = c;
01938     return r2norm_2( resid[0], resid[1] );
01939 }
01940
01941 #undef DIAGNOSTICS
01942
01943 /*--------------------------------------------------------------------------
01944    Point Finding  (interface/top-level)
01945
01946    Initializing the data:
01947      unsigned nel;        // number of elements
01948      const unsigned n[3]; // number of nodes in r, s, t directions
01949      const realType *xm[3];   // n[0]*n[1]*n[2]*nel x,y,z coordinates
01950      realType tol = 0.01;     // how far point is allowed to be outside element
01951                           //   relative to element size
01952      unsigned max_size = n[0]*n[1]*n[2]*nel; // maximum size of hash table
01953
01954      findpt_data_3 *data = findpt_setup_3(xm,n,nel,max_size,tol);
01955
01956    Using the data:
01957      realType x[3] = { ... };   // point to find
01958      int el; // element number
01959      realType r[3]; // parametric coordinates
01960      int guess = 0; // do we use (el,r,s,t) as an initial guess?
01961      int code; // 0 : normal, -1 : outside all elements,
01962                // 1 : border, or outside but within tolerance
01963      realType dist; // distance in xyz space from returned (el,r,s,t) to given
01964                 // (x,y,z)
01965
01966      code = findpt_3(data, x, guess, &el, r, &dist);
01967
01968    When done:
01969      findpt_free_3(&data);
01970
01971   --------------------------------------------------------------------------*/
01972
01973 /* heap sort on A[0:n-1] with key A[i]->dist
01974    precondition: n!=0 */
01975 static void findpt_list_sort( findpt_listel** A, unsigned n )
01976 {
01977     unsigned i;
01978     --A; /* make A have a base index of 1 */
01979     /* build heap */
01980     for( i = 2; i <= n; ++i )
01981     {
01982         findpt_listel* item = A[i];
01983         unsigned hole = i, parent = hole >> 1;
01984         if( A[parent]->dist >= item->dist ) continue;
01985         do
01986         {
01987             A[hole] = A[parent];
01988             hole    = parent;
01989             parent >>= 1;
01990         } while( parent && A[parent]->dist < item->dist );
01991         A[hole] = item;
01992     }
01993     /* extract */
01994     for( i = n - 1; i; --i )
01995     {
01996         findpt_listel* item = A[i + 1];
01997         unsigned hole       = 1;
01998         A[i + 1]            = A[1];
01999         for( ;; )
02000         {
02001             unsigned ch = hole << 1, r = ch + 1;
02002             if( r <= i && A[ch]->dist < A[r]->dist ) ch = r;
02003             if( ch > i || item->dist >= A[ch]->dist ) break;
02004             A[hole] = A[ch];
02005             hole    = ch;
02006         }
02007         A[hole] = item;
02008     }
02009 }
02010
02011 findpt_data_2* findpt_setup_2( const realType* const xw[2],
02012                                const unsigned n[2],
02013                                uint nel,
02014                                uint max_hash_size,
02015                                realType bbox_tol )
02016 {
02017     unsigned d;
02018     findpt_data_2* p = tmalloc( findpt_data_2, 1 );
02019
02020     p->hash = tmalloc( hash_data_2, 1 );
02021     p->od   = tmalloc( opt_data_2, 1 );
02022
02023     for( d = 0; d < 2; ++d )
02024         p->xw[d] = xw[d];
02025     p->nptel = n[0] * n[1];
02026
02027     hash_build_2( p->hash, xw, n, nel, max_hash_size, bbox_tol );
02028
02029     for( d = 0; d < 2; ++d )
02030     {
02031         p->z[d] = tmalloc( realType, n[d] );
02032         lobatto_nodes( p->z[d], n[d] );
02033         lagrange_setup( &p->ld[d], p->z[d], n[d] );
02034     }
02035
02036     p->list   = tmalloc( findpt_listel, p->hash->max );
02037     p->sorted = tmalloc( findpt_listel*, p->hash->max );
02038
02039     opt_alloc_2( p->od, p->ld );
02040     p->od_work = p->od->work;
02041
02042     return p;
02043 }
02044
02045 findpt_data_3* findpt_setup_3( const realType* const xw[3],
02046                                const unsigned n[3],
02047                                uint nel,
02048                                uint max_hash_size,
02049                                realType bbox_tol )
02050 {
02051     unsigned d;
02052     findpt_data_3* p = tmalloc( findpt_data_3, 1 );
02053
02054     p->hash = tmalloc( hash_data_3, 1 );
02055     p->od   = tmalloc( opt_data_3, 1 );
02056
02057     for( d = 0; d < 3; ++d )
02058         p->xw[d] = xw[d];
02059     p->nptel = n[0] * n[1] * n[2];
02060
02061     hash_build_3( p->hash, xw, n, nel, max_hash_size, bbox_tol );
02062
02063     for( d = 0; d < 3; ++d )
02064     {
02065         p->z[d] = tmalloc( realType, n[d] );
02066         lobatto_nodes( p->z[d], n[d] );
02067         lagrange_setup( &p->ld[d], p->z[d], n[d] );
02068     }
02069
02070     p->list   = tmalloc( findpt_listel, p->hash->max );
02071     p->sorted = tmalloc( findpt_listel*, p->hash->max );
02072
02073     opt_alloc_3( p->od, p->ld );
02074     p->od_work = p->od->work;
02075
02076     return p;
02077 }
02078
02079 void findpt_free_2( findpt_data_2* p )
02080 {
02081     unsigned d;
02082     opt_free_2( p->od );
02083     free( p->od );
02084     hash_free_2( p->hash );
02085     free( p->hash );
02086     free( p->list );
02087     free( p->sorted );
02088     for( d = 0; d < 2; ++d )
02089         free( p->z[d] );
02090     free( p );
02091 }
02092
02093 void findpt_free_3( findpt_data_3* p )
02094 {
02095     unsigned d;
02096     opt_free_3( p->od );
02097     free( p->od );
02098     hash_free_3( p->hash );
02099     free( p->hash );
02100     free( p->list );
02101     free( p->sorted );
02102     for( d = 0; d < 3; ++d )
02103         free( p->z[d] );
02104     free( p );
02105 }
02106
02107 const realType* findpt_allbnd_2( const findpt_data_2* p )
02108 {
02109     return p->hash->bnd;
02110 }
02111
02112 const realType* findpt_allbnd_3( const findpt_data_3* p )
02113 {
02114     return p->hash->bnd;
02115 }
02116
02117 static void findpt_hash_2( findpt_data_2* p, const realType x[2] )
02118 {
02119     findpt_listel *list = p->list, **sorted = p->sorted;
02120     const uint hi      = hash_index_2( p->hash, x );
02121     const uint* offset = p->hash->offset;
02122     uint i;
02123     const uint b = offset[hi], e = offset[hi + 1];
02124     for( i = b; i != e; ++i )
02125     {
02126         const uint el      = offset[i];
02127         realType* r        = &list->r[0];
02128         const obbox_2* obb = &p->hash->obb[el];
02129         if( obbox_axis_test_2( obb, x ) ) continue;
02130         if( obbox_test_2( obb, x, r ) ) continue;
02131         list->el   = el;
02132         list->dist = r1norm_2( r[0], r[1] );
02133         *sorted++  = list++;
02134     }
02135     p->end = sorted;
02136     if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted );
02137 }
02138
02139 static void findpt_hash_3( findpt_data_3* p, const realType x[3] )
02140 {
02141     findpt_listel *list = p->list, **sorted = p->sorted;
02142     const uint hi      = hash_index_3( p->hash, x );
02143     const uint* offset = p->hash->offset;
02144     uint i;
02145     const uint b = offset[hi], e = offset[hi + 1];
02146     for( i = b; i != e; ++i )
02147     {
02148         const uint el      = offset[i];
02149         realType* r        = &list->r[0];
02150         const obbox_3* obb = &p->hash->obb[el];
02151         if( obbox_axis_test_3( obb, x ) ) continue;
02152         if( obbox_test_3( obb, x, r ) ) continue;
02153         list->el   = el;
02154         list->dist = r1norm_3( r[0], r[1], r[2] );
02155         *sorted++  = list++;
02156     }
02157     p->end = sorted;
02158     if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted );
02159 }
02160
02161 static int findpt_guess_2( findpt_data_2* p, const realType x[2], uint el, realType r[2], realType* dist )
02162 {
02163     const uint arrindex = p->nptel * el;
02164     const realType* elx[2];
02165     realType g[2];
02166     unsigned c         = opt_no_constraints_2;
02167     const obbox_2* obb = &p->hash->obb[el];
02168     elx[0]             = p->xw[0] + arrindex;
02169     elx[1]             = p->xw[1] + arrindex;
02170     if( obbox_axis_test_2( obb, x ) || obbox_test_2( obb, x, g ) ) return 0;
02171     *dist = opt_findpt_2( p->od, elx, x, r, &c );
02172     return c == opt_no_constraints_2;
02173 }
02174
02175 static int findpt_guess_3( findpt_data_3* p, const realType x[3], uint el, realType r[3], realType* dist )
02176 {
02177     const uint arrindex = p->nptel * el;
02178     const realType* elx[3];
02179     realType g[3];
02180     unsigned c         = opt_no_constraints_3;
02181     const obbox_3* obb = &p->hash->obb[el];
02182     elx[0]             = p->xw[0] + arrindex;
02183     elx[1]             = p->xw[1] + arrindex;
02184     elx[2]             = p->xw[2] + arrindex;
02185     if( obbox_axis_test_3( obb, x ) || obbox_test_3( obb, x, g ) ) return 0;
02186     *dist = opt_findpt_3( p->od, elx, x, r, &c );
02187     return c == opt_no_constraints_3;
02188 }
02189
02190 #define DIAGNOSTICS 0
02191
02192 static int findpt_pass_2( findpt_data_2* p, const realType x[2], uint* el, realType r[2], realType* dist_min )
02193 {
02194     findpt_listel** qq = p->sorted;
02195     const realType* bnd;
02196     realType dist;
02197     do
02198     {
02199         findpt_listel* q    = *qq;
02200         const uint arrindex = p->nptel * q->el;
02201         const realType* elx[2];
02202         unsigned c = opt_no_constraints_2;
02203         elx[0]     = p->xw[0] + arrindex;
02204         elx[1]     = p->xw[1] + arrindex;
02205         dist       = opt_findpt_2( p->od, elx, x, q->r, &c );
02206         if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_2 )
02207         {
02208             *dist_min = dist;
02209             *el       = q->el;
02210             memcpy( r, q->r, 2 * sizeof( realType ) );
02211             if( c == opt_no_constraints_2 ) return 0;
02212         }
02213     } while( ++qq != p->end );
02214     bnd = p->hash->obb[*el].axis_bnd;
02215     return *dist_min > r2norm_2( bnd[1] - bnd[0], bnd[3] - bnd[2] ) ? -1 : 1;
02216 }
02217
02218 static int findpt_pass_3( findpt_data_3* p, const realType x[3], uint* el, realType r[3], realType* dist_min )
02219 {
02220     findpt_listel** qq = p->sorted;
02221     const realType* bnd;
02222     realType dist;
02223     do
02224     {
02225         findpt_listel* q    = *qq;
02226         const uint arrindex = p->nptel * q->el;
02227         const realType* elx[3];
02228         unsigned c = opt_no_constraints_3;
02229         elx[0]     = p->xw[0] + arrindex;
02230         elx[1]     = p->xw[1] + arrindex;
02231         elx[2]     = p->xw[2] + arrindex;
02232         dist       = opt_findpt_3( p->od, elx, x, q->r, &c );
02233         if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_3 )
02234         {
02235             *dist_min = dist;
02236             *el       = q->el;
02237             memcpy( r, q->r, 3 * sizeof( realType ) );
02238             if( c == opt_no_constraints_3 )
02239             {
02240 #if DIAGNOSTICS
02241                 printf( "point found in element #%d\n", qq - p->sorted );
02242 #endif
02243                 return 0;
02244             }
02245         }
02246     } while( ++qq != p->end );
02247     bnd = p->hash->obb[*el].axis_bnd;
02248     return *dist_min > r2norm_3( bnd[1] - bnd[0], bnd[3] - bnd[2], bnd[5] - bnd[4] ) ? -1 : 1;
02249 }
02250
02251 int findpt_2( findpt_data_2* p, const realType x[2], int guess, uint* el, realType r[2], realType* dist )
02252 {
02253     if( guess && findpt_guess_2( p, x, *el, r, dist ) ) return 0;
02254     findpt_hash_2( p, x );
02255     if( p->sorted == p->end ) return -1;
02256     return findpt_pass_2( p, x, el, r, dist );
02257 }
02258
02259 int findpt_3( findpt_data_3* p, const realType x[3], int guess, uint* el, realType r[3], realType* dist )
02260 {
02261     if( guess && findpt_guess_3( p, x, *el, r, dist ) ) return 0;
02262     findpt_hash_3( p, x );
02263 #if DIAGNOSTICS
02264     printf( "hashing leaves %d elements to consider\n", p->end - p->sorted );
02265 #endif
02266     if( p->sorted == p->end ) return -1;
02267     return findpt_pass_3( p, x, el, r, dist );
02268 }
02269
02270 void findpt_weights_2( findpt_data_2* p, const realType r[2] )
02271 {
02272     lagrange_0( &p->ld[0], r[0] );
02273     lagrange_0( &p->ld[1], r[1] );
02274 }
02275
02276 void findpt_weights_3( findpt_data_3* p, const realType r[3] )
02277 {
02278     lagrange_0( &p->ld[0], r[0] );
02279     lagrange_0( &p->ld[1], r[1] );
02280     lagrange_0( &p->ld[2], r[2] );
02281 }
02282
02283 double findpt_eval_2( findpt_data_2* p, const realType* u )
02284 {
02285     return tensor_i2( p->ld[0].J, p->ld[0].n, p->ld[1].J, p->ld[1].n, u, p->od_work );
02286 }
02287
02288 double findpt_eval_3( findpt_data_3* p, const realType* u )
02289 {
02290     return tensor_i3( p->ld[0].J, p->ld[0].n, p->ld[1].J, p->ld[1].n, p->ld[2].J, p->ld[2].n, u, p->od_work );
02291 }