![]() |
Mesh Oriented datABase
(version 5.4.1)
Array-based unstructured mesh datastructure
|
00001 #include
00002 #include
00003 #include
00004 #include /* for cos, fabs */
00005 #include
00006 #include /* for memcpy */
00007
00008 #include "moab/FindPtFuncs.h"
00009
00010 const unsigned opt_no_constraints_2 = 3 + 1;
00011 const unsigned opt_no_constraints_3 = 9 + 3 + 1;
00012
00013 /*--------------------------------------------------------------------------
00014 Lobatto Polynomial Bounds
00015
00016 Needed inputs are the Gauss-Lobatto quadrature nodes and weights:
00017 unsigned nr = ..., ns = ...;
00018 realType zr[nr], wr[nr];
00019 realType zs[ns], ws[ns];
00020
00021 lobatto_nodes(zr,nr); lobatto_weights(zr,wr,nr);
00022 lobatto_nodes(zs,ns); lobatto_weights(zs,ws,ns);
00023
00024 The number of points in the constructed piecewise (bi-)linear bounds
00025 is a parameter; more points give tighter bounds
00026
00027 unsigned mr = 2*nr, ms = 2*ns;
00028
00029 The necessary setup is accomplished via:
00030 lob_bnd_base b_data_r;
00031 lob_bnd_ext e_data_s;
00032
00033 lob_bnd_base_alloc(&b_data_r,nr,mr);
00034 lob_bnd_base_setup(&b_data_r,zr,wr);
00035 lob_bnd_ext_alloc(&e_data_s,ns,ms);
00036 lob_bnd_ext_setup(&e_data_s,zs,ws);
00037
00038 Bounds may then be computed via:
00039 realType work1r[2*mr], work1s[2*ms], work2[2*mr + 2*mr*ns + 2*mr*ms];
00040 realType ur[nr], us[ns]; // 1-d polynomials on the zr[] and zs[] nodes
00041 realType u[ns][nr]; // 2-d polynomial on zr[] (x) zs[]
00042 realType bound[2]; // = { min, max } (to be computed)
00043
00044 lob_bnd_1(&b_data_r ,ur,bound,work1r); // compute bounds on ur
00045 lob_bnd_1(&e_data_s.b,us,bound,work1s); // compute bounds on us
00046 lob_bnd_2(&b_data_r, &e_data_s,
00047 (const double*)&u[0][0],bound,work2); // compute bounds on u
00048 The above routines access the zr,zs arrays passed to *_setup
00049 (so do not delete them between calls)
00050
00051 Memory allocated in *_setup is freed with
00052 lob_bnd_base_free(&b_data_r);
00053 lob_bnd_ext_free(&e_data_s);
00054
00055 --------------------------------------------------------------------------*/
00056
00057 typedef struct
00058 {
00059 unsigned n; /* number of Lobatto nodes in input */
00060 unsigned m; /* number of Chebyshev nodes used to calculate bounds */
00061 realType *Q0, *Q1; /* Q0[n], Q1[n] -- first two rows of change of basis matrix
00062 from Lobatto node Lagrangian to Legendre */
00063 const realType* z; /* z[n] -- external; Lobatto nodes */
00064 realType* h; /* h[m] -- Chebyshev nodes */
00065 realType *uv, *ov; /* uv[n][m], ov[n][m] --
00066 uv[j][:] is a piecewise linear function in the nodal
00067 basis with nodes h[m] that is everywhere less
00068 than or equal to the jth Lagrangian basis
00069 function (with the Lobatto nodes z[n])
00070 ov[j][:] is everywhere greater than or equal */
00071 } lob_bnd_base;
00072
00073 typedef struct
00074 {
00075 lob_bnd_base b;
00076 realType *uvp, *uvn, *ovp, *ovn; /* [n][m] -- uv and ov split into
00077 positive and negative parts */
00078 } lob_bnd_ext;
00079
00080 static void lob_bnd_base_alloc( lob_bnd_base* p, unsigned n, unsigned m )
00081 {
00082 p->n = n, p->m = m;
00083 p->Q0 = tmalloc( realType, 2 * n + m + 2 * n * m );
00084 p->Q1 = p->Q0 + n;
00085 p->h = p->Q1 + n;
00086 p->uv = p->h + m;
00087 p->ov = p->uv + n * m;
00088 }
00089
00090 static void lob_bnd_base_free( lob_bnd_base* p )
00091 {
00092 free( p->Q0 );
00093 }
00094
00095 static void lob_bnd_ext_alloc( lob_bnd_ext* p, unsigned n, unsigned m )
00096 {
00097 p->b.n = n, p->b.m = m;
00098 p->b.Q0 = tmalloc( realType, 2 * n + m + 6 * n * m );
00099 p->b.Q1 = p->b.Q0 + n;
00100 p->b.h = p->b.Q1 + n;
00101 p->b.uv = p->b.h + m;
00102 p->b.ov = p->b.uv + n * m;
00103 p->uvp = p->b.ov + n * m;
00104 p->uvn = p->uvp + n * m;
00105 p->ovp = p->uvn + n * m;
00106 p->ovn = p->ovp + n * m;
00107 }
00108
00109 static void lob_bnd_ext_free( lob_bnd_ext* p )
00110 {
00111 free( p->b.Q0 );
00112 }
00113
00114 static void lob_bnd_base_setup( lob_bnd_base* p, const realType* z, const realType* w )
00115 {
00116 unsigned i, j, m = p->m, n = p->n, mm = 2 * m - 1;
00117 realType *q = tmalloc( realType, ( 2 * n + 1 ) * mm + 6 * n ), *J = q + mm, *D = J + n * mm, *work = D + n * mm;
00118 p->z = z;
00119 for( i = 0; i < n; ++i )
00120 p->Q0[i] = w[i] / 2, p->Q1[i] = 3 * p->Q0[i] * z[i];
00121 p->h[0] = -1, p->h[m - 1] = 1;
00122 for( j = 1; j < m - 1; ++j )
00123 p->h[j] = mbcos( ( m - j - 1 ) * MOAB_POLY_PI / ( m - 1 ) );
00124 for( j = 0; j < m - 1; ++j )
00125 q[2 * j] = p->h[j], q[2 * j + 1] = ( p->h[j] + p->h[j + 1] ) / 2;
00126 q[mm - 1] = p->h[m - 1];
00127 lagrange_weights_deriv( z, n, q, mm, J, D, work );
00128 for( i = 0; i < n; ++i )
00129 {
00130 realType *uv = p->uv + i * m, *ov = p->ov + i * m;
00131 ov[0] = uv[0] = J[i];
00132 ov[m - 1] = uv[m - 1] = J[( mm - 1 ) * n + i];
00133 for( j = 1; j < m - 1; ++j )
00134 {
00135 unsigned jj = 2 * j;
00136 realType c0 = J[( jj - 1 ) * n + i] + ( q[jj] - q[jj - 1] ) * D[( jj - 1 ) * n + i];
00137 realType c1 = J[( jj + 1 ) * n + i] + ( q[jj] - q[jj + 1] ) * D[( jj + 1 ) * n + i];
00138 realType c2 = J[jj * n + i];
00139 rminmax_3( &uv[j], &ov[j], c0, c1, c2 );
00140 }
00141 }
00142 free( q );
00143 }
00144
00145 static void lob_bnd_ext_setup( lob_bnd_ext* p, const realType* z, const realType* w )
00146 {
00147 unsigned i, mn = p->b.m * p->b.n;
00148 lob_bnd_base_setup( &p->b, z, w );
00149 for( i = 0; i < mn; ++i )
00150 {
00151 realType uvi = p->b.uv[i], ovi = p->b.ov[i];
00152 p->uvp[i] = p->uvn[i] = p->ovp[i] = p->ovn[i] = 0;
00153 if( uvi > 0 )
00154 p->uvp[i] = uvi;
00155 else
00156 p->uvn[i] = uvi;
00157 if( ovi > 0 )
00158 p->ovp[i] = ovi;
00159 else
00160 p->ovn[i] = ovi;
00161 }
00162 }
00163
00164 static void lob_bnd_lines( const lob_bnd_base* p, const realType* u, realType* a, realType* b )
00165 {
00166 unsigned i, j;
00167 realType a0 = 0, a1 = 0;
00168 const realType *uv = p->uv, *ov = p->ov;
00169 for( i = 0; i < p->n; ++i )
00170 a0 += p->Q0[i] * u[i], a1 += p->Q1[i] * u[i];
00171 for( j = 0; j < p->m; ++j )
00172 b[j] = a[j] = a0 + a1 * p->h[j];
00173 for( i = 0; i < p->n; ++i )
00174 {
00175 realType w = u[i] - ( a0 + a1 * p->z[i] );
00176 if( w >= 0 )
00177 for( j = 0; j < p->m; ++j )
00178 a[j] += w * ( *uv++ ), b[j] += w * ( *ov++ );
00179 else
00180 for( j = 0; j < p->m; ++j )
00181 a[j] += w * ( *ov++ ), b[j] += w * ( *uv++ );
00182 }
00183 }
00184
00185 /* work holds p->m * 2 doubles */
00186 static void lob_bnd_1( const lob_bnd_base* p, const realType* u, realType bnd[2], realType* work )
00187 {
00188 unsigned j;
00189 realType *a = work, *b = work + p->m;
00190 lob_bnd_lines( p, u, a, b );
00191 bnd[0] = a[0], bnd[1] = b[0];
00192 for( j = 1; j < p->m; ++j )
00193 {
00194 if( a[j] < bnd[0] ) bnd[0] = a[j];
00195 if( b[j] > bnd[1] ) bnd[1] = b[j];
00196 }
00197 }
00198
00199 /* work holds 2*mr + 2*mr*ns + 2*mr*ms doubles */
00200 static void lob_bnd_2( const lob_bnd_base* pr,
00201 const lob_bnd_ext* ps,
00202 const realType* u,
00203 realType bnd[2],
00204 realType* work )
00205 {
00206 unsigned nr = pr->n, mr = pr->m, ns = ps->b.n, ms = ps->b.m;
00207 realType *a0 = work, *a1 = a0 + mr, *ar_ = a1 + mr, *ar = ar_, *br_ = ar + mr * ns, *br = br_, *a_ = br + mr * ns,
00208 *a = a_, *b_ = a + mr * ms, *b = b_, *uvp, *ovp, *uvn, *ovn;
00209 realType b0, b1;
00210 unsigned i, j, k;
00211 for( i = 0; i < mr; ++i )
00212 a0[i] = a1[i] = 0;
00213 for( j = 0; j < ns; ++j, u += nr )
00214 {
00215 realType q0 = ps->b.Q0[j], q1 = ps->b.Q1[j];
00216 lob_bnd_lines( pr, u, ar, br );
00217 for( i = 0; i < mr; ++i )
00218 {
00219 realType t = ( *ar++ + *br++ ) / 2;
00220 a0[i] += q0 * t, a1[i] += q1 * t;
00221 }
00222 }
00223 for( i = 0; i < mr; ++i )
00224 {
00225 realType a0i = a0[i], a1i = a1[i];
00226 for( k = 0; k < ms; ++k )
00227 *b++ = *a++ = a0i + a1i * ps->b.h[k];
00228 }
00229 ar = ar_, br = br_;
00230 uvp = ps->uvp, ovp = ps->ovp, uvn = ps->uvn, ovn = ps->ovn;
00231 for( j = 0; j < ns; ++j, uvp += ms, uvn += ms, ovp += ms, ovn += ms )
00232 {
00233 realType zj = ps->b.z[j];
00234 a = a_, b = b_;
00235 for( i = 0; i < mr; ++i )
00236 {
00237 realType t = a0[i] + a1[i] * zj;
00238 realType uw = *ar++ - t;
00239 realType ow = *br++ - t;
00240 if( uw >= 0 ) /* 0 <= uw <= ow */
00241 for( k = 0; k < ms; ++k )
00242 *a++ += uw * uvp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * ovn[k];
00243 else if( ow <= 0 ) /* uw <= ow <= 0 */
00244 for( k = 0; k < ms; ++k )
00245 *a++ += uw * ovp[k] + ow * ovn[k], *b++ += ow * uvp[k] + uw * uvn[k];
00246 else /* uw < 0 < ow */
00247 for( k = 0; k < ms; ++k )
00248 *a++ += uw * ovp[k] + ow * uvn[k], *b++ += ow * ovp[k] + uw * uvn[k];
00249 }
00250 }
00251 b0 = a_[0], b1 = b_[0];
00252 for( i = 1; i < mr * ms; ++i )
00253 {
00254 if( a_[i] < b0 ) b0 = a_[i];
00255 if( b_[i] > b1 ) b1 = b_[i];
00256 }
00257 bnd[0] = b0, bnd[1] = b1;
00258 }
00259
00260 /*--------------------------------------------------------------------------
00261 Small Matrix Inverse
00262 --------------------------------------------------------------------------*/
00263
00264 static void mat_inv_2( const realType A[4], realType inv[4] )
00265 {
00266 const realType idet = 1 / ( A[0] * A[3] - A[1] * A[2] );
00267 inv[0] = idet * A[3];
00268 inv[1] = -( idet * A[1] );
00269 inv[2] = -( idet * A[2] );
00270 inv[3] = idet * A[0];
00271 }
00272
00273 static void mat_inv_3( const realType A[9], realType inv[9] )
00274 {
00275 const realType a = A[4] * A[8] - A[5] * A[7], b = A[5] * A[6] - A[3] * A[8], c = A[3] * A[7] - A[4] * A[6],
00276 idet = 1 / ( A[0] * a + A[1] * b + A[2] * c );
00277 inv[0] = idet * a;
00278 inv[1] = idet * ( A[2] * A[7] - A[1] * A[8] );
00279 inv[2] = idet * ( A[1] * A[5] - A[2] * A[4] );
00280 inv[3] = idet * b;
00281 inv[4] = idet * ( A[0] * A[8] - A[2] * A[6] );
00282 inv[5] = idet * ( A[2] * A[3] - A[0] * A[5] );
00283 inv[6] = idet * c;
00284 inv[7] = idet * ( A[1] * A[6] - A[0] * A[7] );
00285 inv[8] = idet * ( A[0] * A[4] - A[1] * A[3] );
00286 }
00287
00288 static void mat_app_2r( realType y[2], const realType A[4], const realType x[2] )
00289 {
00290 y[0] = A[0] * x[0] + A[1] * x[1];
00291 y[1] = A[2] * x[0] + A[3] * x[1];
00292 }
00293
00294 static void mat_app_2c( realType y[2], const realType A[4], const realType x[2] )
00295 {
00296 y[0] = A[0] * x[0] + A[2] * x[1];
00297 y[1] = A[1] * x[0] + A[3] * x[1];
00298 }
00299
00300 static void mat_app_3r( realType y[3], const realType A[9], const realType x[3] )
00301 {
00302 y[0] = A[0] * x[0] + A[1] * x[1] + A[2] * x[2];
00303 y[1] = A[3] * x[0] + A[4] * x[1] + A[5] * x[2];
00304 y[2] = A[6] * x[0] + A[7] * x[1] + A[8] * x[2];
00305 }
00306
00307 static void mat_app_3c( realType y[3], const realType A[9], const realType x[3] )
00308 {
00309 y[0] = A[0] * x[0] + A[3] * x[1] + A[6] * x[2];
00310 y[1] = A[1] * x[0] + A[4] * x[1] + A[7] * x[2];
00311 y[2] = A[2] * x[0] + A[5] * x[1] + A[8] * x[2];
00312 }
00313
00314 static void tinyla_solve_2( realType x[2], const realType A[4], const realType b[2] )
00315 {
00316 realType inv[4];
00317 mat_inv_2( A, inv );
00318 mat_app_2r( x, inv, b );
00319 }
00320
00321 static void tinyla_solve_3( realType x[3], const realType A[9], const realType b[3] )
00322 {
00323 realType inv[9];
00324 mat_inv_3( A, inv );
00325 mat_app_3r( x, inv, b );
00326 }
00327
00328 /* solve
00329 A[0] x0 + A[2] x1 = b0,
00330 A[2] x0 + A[1] x1 = b1
00331 */
00332 static void tinyla_solve_sym_2( realType* x0, realType* x1, const realType A[3], realType b0, realType b1 )
00333 {
00334 const realType idet = 1 / ( A[0] * A[1] - A[2] * A[2] );
00335 *x0 = idet * ( A[1] * b0 - A[2] * b1 );
00336 *x1 = idet * ( A[0] * b1 - A[2] * b0 );
00337 }
00338
00339 /*--------------------------------------------------------------------------
00340 Oriented Bounding Box
00341
00342 Suffixes on names are _2 for 2-d and _3 for 3-d
00343
00344 Needed inputs are the Gauss-Lobatto quadrature nodes and weights:
00345 unsigned nr = ..., ns = ...;
00346 realType zr[nr], wr[nr];
00347 realType zs[ns], ws[ns];
00348
00349 lobatto_nodes(zr,nr); lobatto_weights(zr,wr,nr);
00350 lobatto_nodes(zs,ns); lobatto_weights(zs,ws,ns);
00351
00352 The number of points in the constructed piecewise (bi-)linear bounds
00353 for the boundaries is a parameter; more points give tighter bounds
00354
00355 unsigned mr = 2*nr, ms = 2*ns;
00356
00357 Bounding boxes are increased by a relative amount as a parameter
00358
00359 realType tol = 0.01;
00360
00361 Setup is accomplished via:
00362
00363 const realType *z[2] = {zr,zs}, *w[2] = {wr,ws};
00364 const unsigned n[2] = {nr,ns}, m[2] = {mr,ms};
00365 obbox_data_2 *data = obbox_setup_2(z,w,n,m);
00366
00367 Bounding box data may then be computed:
00368
00369 obbox_2 box; // will store bounding box information
00370 realType xm[ns][nr], ym[ns][nr]; // x, y coordinates of the element nodes
00371
00372 obbox_calc_2(data, tol, (const realType *)&xm[0][0],
00373 (const realType *)&ym[0][0], &box);
00374
00375 A point may be tested:
00376
00377 const realType x[2]; // point to test
00378 realType r[2];
00379
00380 if( obbox_axis_test_2(&box, x) )
00381 ... // x failed axis-aligned bounding box test
00382
00383 if( obbox_test_2(&box, x, r) )
00384 ... // x failed oriented bounding box test
00385 else
00386 ... // r suitable as initial guess for parametric coords
00387
00388 Once all bounding box information has been computed
00389
00390 obbox_free_2(data);
00391
00392 to free the memory allocated with obbox_setup_2.
00393
00394 --------------------------------------------------------------------------*/
00395
00396 typedef struct
00397 {
00398 lob_bnd_base dr, ds;
00399 realType *Jr0, *Dr0, *Js0, *Ds0, *work;
00400 } obbox_data_2;
00401
00402 typedef struct
00403 {
00404 lob_bnd_base dr;
00405 lob_bnd_ext ds, dt;
00406 realType *Jr0, *Dr0, *Js0, *Ds0, *Jt0, *Dt0, *work;
00407 } obbox_data_3;
00408
00409 static void obbox_data_alloc_2( obbox_data_2* p, const unsigned n[2], const unsigned m[2] )
00410 {
00411 const unsigned max_npm = umax_2( n[0] + m[0], n[1] + m[1] );
00412 lob_bnd_base_alloc( &p->dr, n[0], m[0] );
00413 lob_bnd_base_alloc( &p->ds, n[1], m[1] );
00414 p->Jr0 = tmalloc( realType, 2 * n[0] + 2 * n[1] + 2 * max_npm );
00415 p->Dr0 = p->Jr0 + n[0];
00416 p->Js0 = p->Dr0 + n[0];
00417 p->Ds0 = p->Js0 + n[1];
00418 p->work = p->Ds0 + n[1];
00419 }
00420
00421 static void obbox_data_free_2( obbox_data_2* p )
00422 {
00423 lob_bnd_base_free( &p->dr );
00424 lob_bnd_base_free( &p->ds );
00425 free( p->Jr0 );
00426 }
00427
00428 static void obbox_data_alloc_3( obbox_data_3* p, const unsigned n[3], const unsigned m[3] )
00429 {
00430 const unsigned wk1 = 3 * n[0] * n[1] + 2 * m[0] + 2 * m[0] * n[1] + 2 * m[0] * m[1];
00431 const unsigned wk2 = 3 * n[0] * n[2] + 2 * m[0] + 2 * m[0] * n[2] + 2 * m[0] * m[2];
00432 const unsigned wk3 = 3 * n[1] * n[2] + 2 * m[1] + 2 * m[1] * n[2] + 2 * m[1] * m[2];
00433 const unsigned wk_max = umax_3( wk1, wk2, wk3 );
00434 lob_bnd_base_alloc( &p->dr, n[0], m[0] );
00435 lob_bnd_ext_alloc( &p->ds, n[1], m[1] );
00436 lob_bnd_ext_alloc( &p->dt, n[2], m[2] );
00437 p->Jr0 = tmalloc( realType, 2 * n[0] + 2 * n[1] + 2 * n[2] + wk_max );
00438 p->Dr0 = p->Jr0 + n[0];
00439 p->Js0 = p->Dr0 + n[0];
00440 p->Ds0 = p->Js0 + n[1];
00441 p->Jt0 = p->Ds0 + n[1];
00442 p->Dt0 = p->Jt0 + n[2];
00443 p->work = p->Dt0 + n[2];
00444 }
00445
00446 static void obbox_data_free_3( obbox_data_3* p )
00447 {
00448 lob_bnd_base_free( &p->dr );
00449 lob_bnd_ext_free( &p->ds );
00450 lob_bnd_ext_free( &p->dt );
00451 free( p->Jr0 );
00452 }
00453
00454 static obbox_data_2* obbox_setup_2( const realType* const z[2],
00455 const realType* const w[2],
00456 const unsigned n[2],
00457 const unsigned m[2] )
00458 {
00459 const realType zero = 0;
00460 realType* work;
00461 obbox_data_2* p = tmalloc( obbox_data_2, 1 );
00462 obbox_data_alloc_2( p, n, m );
00463 lob_bnd_base_setup( &p->dr, z[0], w[0] );
00464 lob_bnd_base_setup( &p->ds, z[1], w[1] );
00465 work = tmalloc( realType, 6 * umax_2( n[0], n[1] ) );
00466 lagrange_weights_deriv( z[0], n[0], &zero, 1, p->Jr0, p->Dr0, work );
00467 lagrange_weights_deriv( z[1], n[1], &zero, 1, p->Js0, p->Ds0, work );
00468 free( work );
00469 return p;
00470 }
00471
00472 static obbox_data_3* obbox_setup_3( const realType* const z[3],
00473 const realType* const w[3],
00474 const unsigned n[3],
00475 const unsigned m[3] )
00476 {
00477 const realType zero = 0;
00478 realType* work;
00479 obbox_data_3* p = tmalloc( obbox_data_3, 1 );
00480 obbox_data_alloc_3( p, n, m );
00481 lob_bnd_base_setup( &p->dr, z[0], w[0] );
00482 lob_bnd_ext_setup( &p->ds, z[1], w[1] );
00483 lob_bnd_ext_setup( &p->dt, z[2], w[2] );
00484 work = tmalloc( realType, 6 * umax_3( n[0], n[1], n[2] ) );
00485 lagrange_weights_deriv( z[0], n[0], &zero, 1, p->Jr0, p->Dr0, work );
00486 lagrange_weights_deriv( z[1], n[1], &zero, 1, p->Js0, p->Ds0, work );
00487 lagrange_weights_deriv( z[2], n[2], &zero, 1, p->Jt0, p->Dt0, work );
00488 free( work );
00489 return p;
00490 }
00491
00492 static void obbox_free_2( obbox_data_2* p )
00493 {
00494 obbox_data_free_2( p );
00495 free( p );
00496 }
00497
00498 static void obbox_free_3( obbox_data_3* p )
00499 {
00500 obbox_data_free_3( p );
00501 free( p );
00502 }
00503
00504 int obbox_axis_test_2( const obbox_2* p, const realType x[2] )
00505 {
00506 return ( x[0] < p->axis_bnd[0] || x[0] > p->axis_bnd[1] || x[1] < p->axis_bnd[2] || x[1] > p->axis_bnd[3] );
00507 }
00508
00509 int obbox_axis_test_3( const obbox_3* p, const realType x[3] )
00510 {
00511 return ( x[0] < p->axis_bnd[0] || x[0] > p->axis_bnd[1] || x[1] < p->axis_bnd[2] || x[1] > p->axis_bnd[3] ||
00512 x[2] < p->axis_bnd[4] || x[2] > p->axis_bnd[5] );
00513 }
00514
00515 int obbox_test_2( const obbox_2* p, const realType x[2], realType r[2] )
00516 {
00517 realType xt[2];
00518 xt[0] = x[0] - p->x[0];
00519 xt[1] = x[1] - p->x[1];
00520 r[0] = p->A[0] * xt[0] + p->A[1] * xt[1];
00521 if( mbabs( r[0] ) > 1 ) return 1;
00522 r[1] = p->A[2] * xt[0] + p->A[3] * xt[1];
00523 return mbabs( r[1] ) > 1;
00524 }
00525
00526 int obbox_test_3( const obbox_3* p, const realType x[3], realType r[3] )
00527 {
00528 realType xt[3];
00529 xt[0] = x[0] - p->x[0];
00530 xt[1] = x[1] - p->x[1];
00531 xt[2] = x[2] - p->x[2];
00532 r[0] = p->A[0] * xt[0] + p->A[1] * xt[1] + p->A[2] * xt[2];
00533 if( mbabs( r[0] ) > 1 ) return 1;
00534 r[1] = p->A[3] * xt[0] + p->A[4] * xt[1] + p->A[5] * xt[2];
00535 if( mbabs( r[1] ) > 1 ) return 1;
00536 r[2] = p->A[6] * xt[0] + p->A[7] * xt[1] + p->A[8] * xt[2];
00537 return mbabs( r[2] ) > 1;
00538 }
00539
00540 void obbox_calc_tfm_2( const realType* x,
00541 const realType* y,
00542 unsigned n,
00543 unsigned s,
00544 const realType c0[2],
00545 const realType A[4],
00546 realType* u )
00547 {
00548 unsigned i;
00549 realType* v = u + n;
00550 for( i = 0; i < n; ++i, x += s, y += s )
00551 {
00552 const realType xt = *x - c0[0], yt = *y - c0[1];
00553 u[i] = A[0] * xt + A[1] * yt;
00554 v[i] = A[2] * xt + A[3] * yt;
00555 }
00556 }
00557
00558 void obbox_calc_tfm_3( const realType* x,
00559 const realType* y,
00560 const realType* z,
00561 unsigned nr,
00562 unsigned sr,
00563 unsigned ns,
00564 unsigned ss,
00565 const realType c0[3],
00566 const realType A[9],
00567 realType* u )
00568 {
00569 unsigned i, j;
00570 realType *v = u + nr * ns, *w = v + nr * ns;
00571 for( j = 0; j < ns; ++j, x += ss, y += ss, z += ss )
00572 {
00573 for( i = 0; i < nr; ++i, x += sr, y += sr, z += sr )
00574 {
00575 const realType xt = *x - c0[0], yt = *y - c0[1], zt = *z - c0[2];
00576 *u++ = A[0] * xt + A[1] * yt + A[2] * zt;
00577 *v++ = A[3] * xt + A[4] * yt + A[5] * zt;
00578 *w++ = A[6] * xt + A[7] * yt + A[8] * zt;
00579 }
00580 }
00581 }
00582
00583 void obbox_merge_2( realType* b, const realType* ob )
00584 {
00585 if( ob[0] < b[0] ) b[0] = ob[0];
00586 if( ob[1] > b[1] ) b[1] = ob[1];
00587 if( ob[2] < b[2] ) b[2] = ob[2];
00588 if( ob[3] > b[3] ) b[3] = ob[3];
00589 }
00590
00591 void obbox_merge_3( realType* b, const realType* ob )
00592 {
00593 if( ob[0] < b[0] ) b[0] = ob[0];
00594 if( ob[1] > b[1] ) b[1] = ob[1];
00595 if( ob[2] < b[2] ) b[2] = ob[2];
00596 if( ob[3] > b[3] ) b[3] = ob[3];
00597 if( ob[4] < b[4] ) b[4] = ob[4];
00598 if( ob[5] > b[5] ) b[5] = ob[5];
00599 }
00600
00601 /* work holds 2*n + 2*m realTypes */
00602 void obbox_side_2( const realType* x,
00603 const realType* y,
00604 unsigned n,
00605 unsigned s,
00606 const realType c0[2],
00607 const realType A[4],
00608 realType* work,
00609 const lob_bnd_base* lbd,
00610 realType bnd[4] )
00611 {
00612 obbox_calc_tfm_2( x, y, n, s, c0, A, work );
00613 lob_bnd_1( lbd, work, bnd, work + 2 * n );
00614 lob_bnd_1( lbd, work + n, bnd + 2, work + 2 * n );
00615 }
00616
00617 /* work holds 3*nr*ns + 2*mr + 2*mr*ns + 2*mr*ms realTypes */
00618 void obbox_side_3( const realType* x,
00619 const realType* y,
00620 const realType* z,
00621 unsigned nr,
00622 unsigned sr,
00623 unsigned ns,
00624 unsigned ss,
00625 const realType c0[3],
00626 const realType A[9],
00627 realType* work,
00628 const lob_bnd_base* dr,
00629 const lob_bnd_ext* ds,
00630 realType bnd[6] )
00631 {
00632 obbox_calc_tfm_3( x, y, z, nr, sr, ns, ss, c0, A, work );
00633 lob_bnd_2( dr, ds, work, bnd, work + 3 * nr * ns );
00634 lob_bnd_2( dr, ds, work + nr * ns, bnd + 2, work + 3 * nr * ns );
00635 lob_bnd_2( dr, ds, work + 2 * nr * ns, bnd + 4, work + 3 * nr * ns );
00636 }
00637
00638 /* return bounds on u = A (x - c0)
00639 bnd[0] <= u_0 <= bnd[1]
00640 bnd[2] <= u_1 <= bnd[3] */
00641 void obbox_bnd_2( const obbox_data_2* p,
00642 const realType* x,
00643 const realType* y,
00644 const realType c0[2],
00645 const realType A[4],
00646 realType bnd[4] )
00647 {
00648 unsigned i, nr = p->dr.n, ns = p->ds.n;
00649 realType obnd[4];
00650
00651 i = nr * ( ns - 1 );
00652 obbox_side_2( x, y, nr, 1, c0, A, p->work, &p->dr, bnd );
00653 obbox_side_2( x + i, y + i, nr, 1, c0, A, p->work, &p->dr, obnd );
00654 obbox_merge_2( bnd, obnd );
00655
00656 i = nr - 1;
00657 obbox_side_2( x, y, ns, nr, c0, A, p->work, &p->ds, obnd );
00658 obbox_merge_2( bnd, obnd );
00659 obbox_side_2( x + i, y + i, nr, nr, c0, A, p->work, &p->ds, obnd );
00660 obbox_merge_2( bnd, obnd );
00661 }
00662
00663 /* return bounds on u = A (x - c0)
00664 bnd[0] <= u_0 <= bnd[1]
00665 bnd[2] <= u_1 <= bnd[3]
00666 bnd[4] <= u_2 <= bnd[5] */
00667 void obbox_bnd_3( const obbox_data_3* p,
00668 const realType* x,
00669 const realType* y,
00670 const realType* z,
00671 const realType c0[3],
00672 const realType A[9],
00673 realType bnd[6] )
00674 {
00675 unsigned i, nr = p->dr.n, ns = p->ds.b.n, nt = p->dt.b.n;
00676 realType obnd[6];
00677
00678 i = nr * ns * ( nt - 1 );
00679 obbox_side_3( x, y, z, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, bnd );
00680 obbox_side_3( x + i, y + i, z + i, nr, 1, ns, 0, c0, A, p->work, &p->dr, &p->ds, obnd );
00681 obbox_merge_3( bnd, obnd );
00682
00683 i = nr * ( ns - 1 );
00684 obbox_side_3( x, y, z, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd );
00685 obbox_merge_3( bnd, obnd );
00686 obbox_side_3( x + i, y + i, z + i, nr, 1, nt, i, c0, A, p->work, &p->dr, &p->dt, obnd );
00687 obbox_merge_3( bnd, obnd );
00688
00689 i = nr - 1;
00690 obbox_side_3( x, y, z, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd );
00691 obbox_merge_3( bnd, obnd );
00692 obbox_side_3( x + i, y + i, z + i, ns, nr, nt, 0, c0, A, p->work, &p->ds.b, &p->dt, obnd );
00693 obbox_merge_3( bnd, obnd );
00694 }
00695
00696 void obbox_calc_2( const obbox_data_2* p, realType tol, const realType* x, const realType* y, obbox_2* b )
00697 {
00698 const realType zero[2] = { 0, 0 }, id[4] = { 1, 0, 0, 1 };
00699 realType c0[2], jac[4], inv[4], bnd[4], u0[2], d[2];
00700
00701 obbox_bnd_2( p, x, y, zero, id, b->axis_bnd );
00702 d[0] = b->axis_bnd[1] - b->axis_bnd[0];
00703 d[1] = b->axis_bnd[3] - b->axis_bnd[2];
00704 b->axis_bnd[0] -= tol * d[0], b->axis_bnd[1] += tol * d[0];
00705 b->axis_bnd[2] -= tol * d[1], b->axis_bnd[3] += tol * d[1];
00706
00707 c0[0] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, x, jac, p->work );
00708 c0[1] = tensor_ig2( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.n, y, jac + 2, p->work );
00709 mat_inv_2( jac, inv );
00710
00711 obbox_bnd_2( p, x, y, c0, inv, bnd );
00712
00713 u0[0] = ( bnd[0] + bnd[1] ) / 2;
00714 u0[1] = ( bnd[2] + bnd[3] ) / 2;
00715 d[0] = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) );
00716 d[1] = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) );
00717 b->x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1];
00718 b->x[1] = c0[1] + jac[2] * u0[0] + jac[3] * u0[1];
00719 b->A[0] = d[0] * inv[0], b->A[1] = d[0] * inv[1];
00720 b->A[2] = d[1] * inv[2], b->A[3] = d[1] * inv[3];
00721 }
00722
00723 void obbox_calc_3( const obbox_data_3* p,
00724 realType tol,
00725 const realType* x,
00726 const realType* y,
00727 const realType* z,
00728 obbox_3* b )
00729 {
00730 const realType zero[3] = { 0, 0 }, id[9] = { 1, 0, 0, 0, 1, 0, 0, 0, 1 };
00731 realType c0[3], jac[9], inv[9], bnd[6], u0[3], d[3];
00732
00733 obbox_bnd_3( p, x, y, z, zero, id, b->axis_bnd );
00734 d[0] = b->axis_bnd[1] - b->axis_bnd[0];
00735 d[1] = b->axis_bnd[3] - b->axis_bnd[2];
00736 d[2] = b->axis_bnd[5] - b->axis_bnd[4];
00737 b->axis_bnd[0] -= tol * d[0], b->axis_bnd[1] += tol * d[0];
00738 b->axis_bnd[2] -= tol * d[1], b->axis_bnd[3] += tol * d[1];
00739 b->axis_bnd[4] -= tol * d[2], b->axis_bnd[5] += tol * d[2];
00740
00741 c0[0] =
00742 tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, x, jac, p->work );
00743 c0[1] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, y, jac + 3,
00744 p->work );
00745 c0[2] = tensor_ig3( p->Jr0, p->Dr0, p->dr.n, p->Js0, p->Ds0, p->ds.b.n, p->Jt0, p->Dt0, p->dt.b.n, z, jac + 6,
00746 p->work );
00747 mat_inv_3( jac, inv );
00748
00749 obbox_bnd_3( p, x, y, z, c0, inv, bnd );
00750
00751 u0[0] = ( bnd[0] + bnd[1] ) / 2;
00752 u0[1] = ( bnd[2] + bnd[3] ) / 2;
00753 u0[2] = ( bnd[4] + bnd[5] ) / 2;
00754 d[0] = 2 / ( ( 1 + tol ) * ( bnd[1] - bnd[0] ) );
00755 d[1] = 2 / ( ( 1 + tol ) * ( bnd[3] - bnd[2] ) );
00756 d[2] = 2 / ( ( 1 + tol ) * ( bnd[5] - bnd[4] ) );
00757 b->x[0] = c0[0] + jac[0] * u0[0] + jac[1] * u0[1] + jac[2] * u0[2];
00758 b->x[1] = c0[1] + jac[3] * u0[0] + jac[4] * u0[1] + jac[5] * u0[2];
00759 b->x[2] = c0[2] + jac[6] * u0[0] + jac[7] * u0[1] + jac[8] * u0[2];
00760 b->A[0] = d[0] * inv[0], b->A[1] = d[0] * inv[1], b->A[2] = d[0] * inv[2];
00761 b->A[3] = d[1] * inv[3], b->A[4] = d[1] * inv[4], b->A[5] = d[1] * inv[5];
00762 b->A[6] = d[2] * inv[6], b->A[7] = d[2] * inv[7], b->A[8] = d[2] * inv[8];
00763 }
00764
00765 /*--------------------------------------------------------------------------
00766 Point to Possible Elements Hashing
00767
00768 Initializing the data:
00769 unsigned nel; // number of elements
00770 const unsigned n[3]; // number of nodes in r, s, t directions
00771 const realType *xm[3]; // n[0]*n[1]*n[2]*nel x,y,z coordinates
00772 realType tol = 0.01; // how far point is allowed to be outside element
00773 // relative to element size
00774 unsigned max_size = n[0]*n[1]*n[2]*nel; // maximum size of hash table
00775
00776 hash_data_3 data;
00777 hash_build_3(&data, xm, n, nel, max_size, tol);
00778
00779 Using the data:
00780 realType x[3]; // point to find
00781
00782 unsigned index = hash_index_3(&data, x);
00783 unsigned i, b = data.offset[index], e = data.offset[index+1];
00784
00785 // point may be in elements
00786 // data.offset[b], data.offset[b+1], ... , data.offset[e-1]
00787 //
00788 // list has maximum size data.max (e.g., e-b <= data.max)
00789
00790 for(i=b; i!=e; ++i) {
00791 unsigned el = data.offset[i];
00792 const obbox_3 *obb = &data.obb[el]; // bounding box data for element el
00793 ...
00794 }
00795
00796 When done:
00797 hash_free_3(&data);
00798
00799 --------------------------------------------------------------------------*/
00800
00801 static int ifloor( realType x )
00802 {
00803 /*
00804 int y = x;
00805 return (double)y > x ? y-1 : y;
00806 */
00807 return mbfloor( x );
00808 }
00809
00810 static int iceil( realType x )
00811 {
00812 /*
00813 int y = x;
00814 return (double)y < x ? y+1 : y;
00815 */
00816 return mbceil( x );
00817 }
00818
00819 static unsigned hash_index_helper( realType low, realType fac, unsigned n, realType x )
00820 {
00821 const int i = ifloor( ( x - low ) * fac );
00822 if( i < 0 ) return 0;
00823 return umin_2( i, n - 1 );
00824 }
00825
00826 static uint hash_index_2( const hash_data_2* p, const realType x[2] )
00827 {
00828 const unsigned n = p->hash_n;
00829 return (uint)hash_index_helper( p->bnd[2], p->fac[1], n, x[1] ) * n +
00830 hash_index_helper( p->bnd[0], p->fac[0], n, x[0] );
00831 }
00832
00833 static uint hash_index_3( const hash_data_3* p, const realType x[3] )
00834 {
00835 const unsigned n = p->hash_n;
00836 return ( (uint)hash_index_helper( p->bnd[4], p->fac[2], n, x[2] ) * n +
00837 hash_index_helper( p->bnd[2], p->fac[1], n, x[1] ) ) *
00838 n +
00839 hash_index_helper( p->bnd[0], p->fac[0], n, x[0] );
00840 }
00841
00842 static void hash_setfac_2( hash_data_2* p, unsigned n )
00843 {
00844 p->hash_n = n;
00845 p->fac[0] = n / ( p->bnd[1] - p->bnd[0] );
00846 p->fac[1] = n / ( p->bnd[3] - p->bnd[2] );
00847 }
00848
00849 static void hash_setfac_3( hash_data_3* p, unsigned n )
00850 {
00851 p->hash_n = n;
00852 p->fac[0] = n / ( p->bnd[1] - p->bnd[0] );
00853 p->fac[1] = n / ( p->bnd[3] - p->bnd[2] );
00854 p->fac[2] = n / ( p->bnd[5] - p->bnd[4] );
00855 }
00856
00857 static void hash_range_2( const hash_data_2* p, uint i, unsigned d, unsigned* ia, unsigned* ib )
00858 {
00859 const realType a = p->obb[i].axis_bnd[d * 2];
00860 const realType b = p->obb[i].axis_bnd[d * 2 + 1];
00861 const int i0 = ifloor( ( a - p->bnd[d * 2] ) * p->fac[d] );
00862 const unsigned i1 = iceil( ( b - p->bnd[d * 2] ) * p->fac[d] );
00863 *ia = imax_2( 0, i0 );
00864 *ib = imin_2( i1, p->hash_n );
00865 if( *ib == *ia ) ++( *ib );
00866 }
00867
00868 static void hash_range_3( const hash_data_3* p, uint i, unsigned d, unsigned* ia, unsigned* ib )
00869 {
00870 const realType a = p->obb[i].axis_bnd[d * 2];
00871 const realType b = p->obb[i].axis_bnd[d * 2 + 1];
00872 const int i0 = ifloor( ( a - p->bnd[d * 2] ) * p->fac[d] );
00873 const unsigned i1 = iceil( ( b - p->bnd[d * 2] ) * p->fac[d] );
00874 *ia = imax_2( 0, i0 );
00875 *ib = imin_2( i1, p->hash_n );
00876 if( *ib == *ia ) ++( *ib );
00877 }
00878
00879 static uint hash_count_2( hash_data_2* p, uint nel, unsigned n )
00880 {
00881 uint i, count = 0;
00882 hash_setfac_2( p, n );
00883 for( i = 0; i < nel; ++i )
00884 {
00885 unsigned ia, ib;
00886 uint ci;
00887 hash_range_2( p, i, 0, &ia, &ib );
00888 ci = ib - ia;
00889 hash_range_2( p, i, 1, &ia, &ib );
00890 ci *= ib - ia;
00891 count += ci;
00892 }
00893 return count;
00894 }
00895
00896 static uint hash_count_3( hash_data_3* p, uint nel, unsigned n )
00897 {
00898 uint i, count = 0;
00899 hash_setfac_3( p, n );
00900 for( i = 0; i < nel; ++i )
00901 {
00902 unsigned ia, ib;
00903 uint ci;
00904 hash_range_3( p, i, 0, &ia, &ib );
00905 ci = ib - ia;
00906 hash_range_3( p, i, 1, &ia, &ib );
00907 ci *= ib - ia;
00908 hash_range_3( p, i, 2, &ia, &ib );
00909 ci *= ib - ia;
00910 count += ci;
00911 }
00912 return count;
00913 }
00914
00915 static uint hash_opt_size_2( hash_data_2* p, uint nel, uint max_size )
00916 {
00917 unsigned nl = 1, nu = ceil( sqrt( max_size - nel ) );
00918 uint size_low = 2 + nel;
00919 while( nu - nl > 1 )
00920 {
00921 unsigned nm = nl + ( nu - nl ) / 2;
00922 uint size = (uint)nm * nm + 1 + hash_count_2( p, nel, nm );
00923 if( size <= max_size )
00924 nl = nm, size_low = size;
00925 else
00926 nu = nm;
00927 }
00928 hash_setfac_2( p, nl );
00929 return size_low;
00930 }
00931
00932 static uint hash_opt_size_3( hash_data_3* p, uint nel, uint max_size )
00933 {
00934 unsigned nl = 1, nu = ceil( pow( max_size - nel, 1.0 / 3 ) );
00935 uint size_low = 2 + nel;
00936 while( nu - nl > 1 )
00937 {
00938 unsigned nm = nl + ( nu - nl ) / 2;
00939 uint size = (uint)nm * nm * nm + 1 + hash_count_3( p, nel, nm );
00940 if( size <= max_size )
00941 nl = nm, size_low = size;
00942 else
00943 nu = nm;
00944 }
00945 hash_setfac_3( p, nl );
00946 return size_low;
00947 }
00948
00949 static void hash_getbb_2( hash_data_2* p, const realType* const elx[2], const unsigned n[2], uint nel, realType tol )
00950 {
00951 obbox_data_2* data;
00952 realType *z[2], *w[2];
00953 uint i;
00954 unsigned d;
00955 const unsigned nn = n[0] * n[1];
00956 unsigned int m[2];
00957 const realType* x[2];
00958
00959 for( i = 0; i < 2; ++i )
00960 {
00961 x[i] = elx[i];
00962 m[i] = 2 * n[i];
00963 }
00964
00965 z[0] = tmalloc( realType, 2 * ( n[0] + n[1] ) );
00966 w[0] = z[0] + n[0];
00967 z[1] = w[0] + n[0], w[1] = z[1] + n[1];
00968 for( d = 0; d < 2; ++d )
00969 lobatto_nodes( z[d], n[d] ), lobatto_weights( z[d], w[d], n[d] );
00970 data = obbox_setup_2( (const realType* const*)z, (const realType* const*)w, n, m );
00971 obbox_calc_2( data, tol, x[0], x[1], &p->obb[0] );
00972 memcpy( &p->bnd[0], (const realType*)&p->obb[0].axis_bnd[0], 4 * sizeof( realType ) );
00973 for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn )
00974 {
00975 obbox_calc_2( data, tol, x[0], x[1], &p->obb[i] );
00976 obbox_merge_2( &p->bnd[0], (const realType*)&p->obb[i].axis_bnd[0] );
00977 }
00978 obbox_free_2( data );
00979 free( z[0] );
00980 }
00981
00982 static void hash_getbb_3( hash_data_3* p, const realType* const elx[3], const unsigned n[3], uint nel, realType tol )
00983 {
00984 obbox_data_3* data;
00985 const realType* x[3];
00986 realType *z[3], *w[3];
00987 uint i;
00988 unsigned d;
00989 const unsigned nn = n[0] * n[1] * n[2];
00990 unsigned int m[3];
00991
00992 for( i = 0; i < 3; ++i )
00993 {
00994 x[i] = elx[i];
00995 m[i] = 2 * n[i];
00996 }
00997
00998 z[0] = tmalloc( realType, 2 * ( n[0] + n[1] + n[2] ) );
00999 w[0] = z[0] + n[0];
01000 for( d = 1; d < 3; ++d )
01001 z[d] = w[d - 1] + n[d - 1], w[d] = z[d] + n[d];
01002 for( d = 0; d < 3; ++d )
01003 lobatto_nodes( z[d], n[d] ), lobatto_weights( z[d], w[d], n[d] );
01004 data = obbox_setup_3( (const realType* const*)z, (const realType* const*)w, n, m );
01005 obbox_calc_3( data, tol, x[0], x[1], x[2], &p->obb[0] );
01006 memcpy( &p->bnd[0], (const realType*)&p->obb[0].axis_bnd[0], 6 * sizeof( realType ) );
01007 for( i = 0; i < nel; ++i, x[0] += nn, x[1] += nn, x[2] += nn )
01008 {
01009 obbox_calc_3( data, tol, x[0], x[1], x[2], &p->obb[i] );
01010 obbox_merge_3( &p->bnd[0], (const realType*)&p->obb[i].axis_bnd[0] );
01011 }
01012 obbox_free_3( data );
01013 free( z[0] );
01014 }
01015
01016 static void hash_build_2( hash_data_2* p,
01017 const realType* const x[2],
01018 const unsigned n[2],
01019 uint nel,
01020 uint max_hash_size,
01021 realType tol )
01022 {
01023 uint i, el, size, hn2, sum;
01024 unsigned hn;
01025 unsigned* count;
01026 p->obb = tmalloc( obbox_2, nel );
01027 hash_getbb_2( p, x, n, nel, tol );
01028 size = hash_opt_size_2( p, nel, max_hash_size );
01029 p->offset = tmalloc( uint, size );
01030 hn = p->hash_n;
01031 hn2 = (uint)hn * hn;
01032 count = tcalloc( unsigned, hn2 );
01033 for( el = 0; el < nel; ++el )
01034 {
01035 unsigned ia, ib, j, ja, jb;
01036 hash_range_2( p, el, 0, &ia, &ib );
01037 hash_range_2( p, el, 1, &ja, &jb );
01038 for( j = ja; j < jb; ++j )
01039 for( i = ia; i < ib; ++i )
01040 ++count[(uint)j * hn + i];
01041 }
01042 sum = hn2 + 1, p->max = count[0];
01043 p->offset[0] = sum;
01044 for( i = 0; i < hn2; ++i )
01045 {
01046 if( count[i] > p->max ) p->max = count[i];
01047 sum += count[i];
01048 p->offset[i + 1] = sum;
01049 }
01050 for( el = 0; el < nel; ++el )
01051 {
01052 unsigned ia, ib, j, ja, jb;
01053 hash_range_2( p, el, 0, &ia, &ib );
01054 hash_range_2( p, el, 1, &ja, &jb );
01055 for( j = ja; j < jb; ++j )
01056 for( i = ia; i < ib; ++i )
01057 {
01058 uint arrindex = (uint)j * hn + i;
01059 p->offset[p->offset[arrindex + 1] - count[arrindex]] = el;
01060 --count[arrindex];
01061 }
01062 }
01063 free( count );
01064 }
01065
01066 static void hash_build_3( hash_data_3* p,
01067 const realType* const x[3],
01068 const unsigned n[3],
01069 uint nel,
01070 uint max_hash_size,
01071 realType tol )
01072 {
01073 uint i, el, size, hn3, sum;
01074 unsigned hn;
01075 unsigned* count;
01076 p->obb = tmalloc( obbox_3, nel );
01077 hash_getbb_3( p, x, n, nel, tol );
01078 size = hash_opt_size_3( p, nel, max_hash_size );
01079 p->offset = tmalloc( uint, size );
01080 hn = p->hash_n;
01081 hn3 = (uint)hn * hn * hn;
01082 count = tcalloc( unsigned, hn3 );
01083 for( el = 0; el < nel; ++el )
01084 {
01085 unsigned ia, ib, j, ja, jb, k, ka, kb;
01086 hash_range_3( p, el, 0, &ia, &ib );
01087 hash_range_3( p, el, 1, &ja, &jb );
01088 hash_range_3( p, el, 2, &ka, &kb );
01089 for( k = ka; k < kb; ++k )
01090 for( j = ja; j < jb; ++j )
01091 for( i = ia; i < ib; ++i )
01092 ++count[( (uint)k * hn + j ) * hn + i];
01093 }
01094 sum = hn3 + 1, p->max = count[0];
01095 p->offset[0] = sum;
01096 for( i = 0; i < hn3; ++i )
01097 {
01098 if( count[i] > p->max ) p->max = count[i];
01099 sum += count[i];
01100 p->offset[i + 1] = sum;
01101 }
01102 for( el = 0; el < nel; ++el )
01103 {
01104 unsigned ia, ib, j, ja, jb, k, ka, kb;
01105 hash_range_3( p, el, 0, &ia, &ib );
01106 hash_range_3( p, el, 1, &ja, &jb );
01107 hash_range_3( p, el, 2, &ka, &kb );
01108 for( k = ka; k < kb; ++k )
01109 for( j = ja; j < jb; ++j )
01110 for( i = ia; i < ib; ++i )
01111 {
01112 uint arrindex = ( (uint)k * hn + j ) * hn + i;
01113 p->offset[p->offset[arrindex + 1] - count[arrindex]] = el;
01114 --count[arrindex];
01115 }
01116 }
01117 free( count );
01118 }
01119
01120 static void hash_free_2( hash_data_2* p )
01121 {
01122 free( p->obb );
01123 free( p->offset );
01124 }
01125
01126 static void hash_free_3( hash_data_3* p )
01127 {
01128 free( p->obb );
01129 free( p->offset );
01130 }
01131
01132 /*--------------------------------------------------------------------------
01133 Optimization algorithm to find a point within an element
01134
01135 Given x(r) (as values of x,y,z at all Lobatto nodes) and x_star,
01136 find the r that minimizes || x_star - x(r) ||_2
01137
01138 As a minimization problem, the Newton step is
01139
01140 __ 3
01141 [ J^T J - >_ d=1 resid_d H_d ] dr = J^t resid
01142
01143 where resid = x_star - x(r), J = [ dx_i/dr_j ],
01144 and H_d = [ d^2 x_d/dr_i dr_j ].
01145
01146 This is the appropriate step to take whenever constraints are active,
01147 and the current iterate is on a boundary of the element. When the current
01148 iterate is inside, J is square ( dim r = dim x ), resid will become small,
01149 and the step
01150
01151 J dr = resid
01152
01153 may be used instead, still giving quadratic convergence.
01154
01155
01156 Names use a _3 suffix for 3-d and _2 for 2-d.
01157 The routines require an initialized lagrange_data array as input:
01158 unsigned d, n[3] = { ... };
01159 realType *z[3] = { tmalloc(realType, n[0]), ... };
01160 for(d=0;d<3;++d) lobatto_nodes(z[d],n[d]);
01161
01162 lagrange_data ld[3];
01163 for(d=0;d<3;++d) lagrange_setup(&ld[d],z[d],n[d]);
01164
01165 Initialization:
01166 opt_data_3 data;
01167 opt_alloc_3(&data, ld);
01168
01169 Use:
01170 const realType *xm[3]; // 3 pointers, each to n[0]*n[1]*n[2] realTypes
01171 // giving the nodal x, y, or z coordinates
01172
01173 const realType x_star[3] = { ... }; // point to find
01174 realType r[3] = { 0,0,0 }; // initial guess with
01175 unsigned c = opt_no_constraints_3; // these constraints active
01176
01177 realType dist = opt_findpt_3(&data,xm,x_star,r,&c);
01178 // minimizer is r with constraints c; 2-norm of resid is dist
01179
01180 Clean-up:
01181 opt_free_3(&data);
01182
01183 for(d=0;d<3;++d) lagrange_free(&ld[d]);
01184 for(d=0;d<3;++d) free(z[d]);
01185
01186 The constraint number works as follows. Let cr be the constraints
01187 on the r variable:
01188 cr = 0 r fixed at -1
01189 cr = 1 r not fixed
01190 cr = 2 r fixed at 1
01191 Then the constraint number is (ct*3+cs)*3+cr
01192
01193 --------------------------------------------------------------------------*/
01194
01195 /* how many directions are constrained? */
01196 static const char opt_constr_num_2[9] = { 2, 1, 2, 1, 0, 1, 2, 1, 2 };
01197 static const char opt_constr_num_3[27] = { 3, 2, 3, 2, 1, 2, 3, 2, 3, 2, 1, 2, 1, 0,
01198 1, 2, 1, 2, 3, 2, 3, 2, 1, 2, 3, 2, 3 };
01199
01200 /* which direction is constrained? */
01201 /* static const char opt_constr_dir_2[9] = {-1, 1,-1, 0,-1, 0, -1, 1,-1}; */
01202 static const char opt_constr_dir_3[27] = { -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, 1, -1, 0, -1,
01203 0, -1, 1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1 };
01204
01205 /* which direction is not constrained? */
01206 static const char opt_constr_not[27] = { -1, 0, -1, 1, -1, 1, -1, 0, -1, 2, -1, 2, -1, -1,
01207 -1, 2, -1, 2, -1, 0, -1, 1, -1, 1, -1, 0, -1 };
01208
01209 static const char opt_constr_wide[27] = { 0x00, 0x01, 0x02, 0x04, 0x05, 0x06, 0x08, 0x09, 0x0a,
01210 0x10, 0x11, 0x12, 0x14, 0x15, 0x16, 0x18, 0x19, 0x1a,
01211 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a };
01212
01213 static const unsigned opt_other1_3[3] = { 1, 0, 0 }, opt_other2_3[3] = { 2, 2, 1 };
01214
01215 static unsigned opt_constr( unsigned constraints, unsigned d )
01216 {
01217 return ( opt_constr_wide[constraints] >> ( d * 2 ) ) & 3;
01218 }
01219
01220 static void opt_constr_unpack_2( unsigned constraints, unsigned* c )
01221 {
01222 const char cw = opt_constr_wide[constraints];
01223 c[0] = cw & 3;
01224 c[1] = cw >> 2;
01225 }
01226
01227 static void opt_constr_unpack_3( unsigned constraints, unsigned* c )
01228 {
01229 const char cw = opt_constr_wide[constraints];
01230 c[0] = cw & 3;
01231 c[1] = ( cw >> 2 ) & 3;
01232 c[2] = cw >> 4;
01233 }
01234
01235 static unsigned opt_constr_pack_2( const unsigned* c )
01236 {
01237 return c[1] * 3 + c[0];
01238 }
01239
01240 static unsigned opt_constr_pack_3( const unsigned* c )
01241 {
01242 return ( c[2] * 3 + c[1] ) * 3 + c[0];
01243 }
01244
01245 /*--------------------------------------------------------------------------
01246
01247 3 - D
01248
01249 --------------------------------------------------------------------------*/
01250
01251 void opt_alloc_3( opt_data_3* p, lagrange_data* ld )
01252 {
01253 const unsigned nr = ld[0].n, ns = ld[1].n, nt = ld[2].n, nf = umax_3( nr * ns, nr * nt, ns * nt ),
01254 ne = umax_3( nr, ns, nt ), nw = 2 * ns * nt + 3 * ns;
01255 p->size[0] = 1;
01256 p->size[1] = nr;
01257 p->size[2] = nr * ns;
01258 p->size[3] = p->size[2] * nt;
01259 p->ld = ld;
01260 p->work = tmalloc( realType, 6 * nf + 9 * ne + nw );
01261 p->fd.x[0] = p->work + nw;
01262 p->fd.x[1] = p->fd.x[0] + nf;
01263 p->fd.x[2] = p->fd.x[1] + nf;
01264 p->fd.fdn[0] = p->fd.x[2] + nf;
01265 p->fd.fdn[1] = p->fd.fdn[0] + nf;
01266 p->fd.fdn[2] = p->fd.fdn[1] + nf;
01267 p->ed.x[0] = p->fd.fdn[2] + nf;
01268 p->ed.x[1] = p->ed.x[0] + ne;
01269 p->ed.x[2] = p->ed.x[1] + ne;
01270 p->ed.fd1[0] = p->ed.x[2] + ne;
01271 p->ed.fd1[1] = p->ed.fd1[0] + ne;
01272 p->ed.fd1[2] = p->ed.fd1[1] + ne;
01273 p->ed.fd2[0] = p->ed.fd1[2] + ne;
01274 p->ed.fd2[1] = p->ed.fd2[0] + ne;
01275 p->ed.fd2[2] = p->ed.fd2[1] + ne;
01276 }
01277
01278 void opt_free_3( opt_data_3* p )
01279 {
01280 free( p->work );
01281 }
01282
01283 static void opt_vol_set_3( opt_data_3* p, const realType r[3] )
01284 {
01285 lagrange_1( &p->ld[0], r[0] );
01286 lagrange_1( &p->ld[1], r[1] );
01287 lagrange_1( &p->ld[2], r[2] );
01288 }
01289
01290 /* work holds 2*ns*nt + 3*ns realTypes */
01291 static void opt_vol_intp_3( opt_data_3* p )
01292 {
01293 unsigned d;
01294 const lagrange_data* ld = p->ld;
01295
01296 for( d = 0; d < 3; ++d )
01297 p->x[d] = tensor_ig3( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, ld[2].J, ld[2].D, ld[2].n,
01298 p->elx[d], &p->jac[d * 3], p->work );
01299 }
01300
01301 void opt_vol_set_intp_3( opt_data_3* p, const realType r[3] )
01302 {
01303 opt_vol_set_3( p, r );
01304 opt_vol_intp_3( p );
01305 }
01306
01307 static void opt_face_proj_3( opt_data_3* p )
01308 {
01309 unsigned d, off = 0;
01310 const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, so = p->size[d2] - p->size[d1 + 1], s1 = p->size[d1],
01311 sn = p->size[dn], n1 = p->ld[d1].n, n2 = p->ld[d2].n, nn = p->ld[dn].n;
01312 const realType* D = p->ld[dn].D_z0;
01313 if( opt_constr( p->fd.constraints, dn ) == 2 ) off = p->size[dn + 1] - p->size[dn], D = p->ld[dn].D_zn;
01314 for( d = 0; d < 3; ++d )
01315 {
01316 unsigned i, j, k, arrindex = 0;
01317 const realType* in = p->elx[d] + off;
01318 for( j = n2; j; --j, in += so )
01319 for( i = n1; i; --i, ++arrindex, in += s1 )
01320 {
01321 const realType* ind = in - off;
01322 realType* fdn = &p->fd.fdn[d][arrindex];
01323 p->fd.x[d][arrindex] = *in;
01324 *fdn = 0;
01325 for( k = 0; k < nn; ++k, ind += sn )
01326 *fdn += *ind * D[k];
01327 }
01328 }
01329 }
01330
01331 static void opt_face_set_3( opt_data_3* p, const realType r[3], unsigned constr )
01332 {
01333 if( p->fd.constraints != constr )
01334 {
01335 p->fd.constraints = constr;
01336 p->fd.dn = opt_constr_dir_3[constr];
01337 p->fd.d1 = opt_other1_3[p->fd.dn];
01338 p->fd.d2 = opt_other2_3[p->fd.dn];
01339 opt_face_proj_3( p );
01340 }
01341 lagrange_1( &p->ld[p->fd.d1], r[p->fd.d1] );
01342 lagrange_1( &p->ld[p->fd.d2], r[p->fd.d2] );
01343 }
01344
01345 /* work holds 2*ld[d2].n realTypes */
01346 static void opt_face_intp_3( opt_data_3* p )
01347 {
01348 unsigned d;
01349 const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[d1].n, n2 = p->ld[d2].n;
01350 const realType *J1 = p->ld[d1].J, *J2 = p->ld[d2].J, *D1 = p->ld[d1].D, *D2 = p->ld[d2].D;
01351
01352 for( d = 0; d < 3; ++d )
01353 {
01354 realType g[2];
01355 p->x[d] = tensor_ig2( J1, D1, n1, J2, D2, n2, p->fd.x[d], &g[0], p->work );
01356 p->jac[d * 3 + d1] = g[0];
01357 p->jac[d * 3 + d2] = g[1];
01358 p->jac[d * 3 + dn] = tensor_i2( J1, n1, J2, n2, p->fd.fdn[d], p->work );
01359 }
01360 }
01361
01362 static void opt_face_set_intp_3( opt_data_3* p, const realType r[3], unsigned constr )
01363 {
01364 opt_face_set_3( p, r, constr );
01365 opt_face_intp_3( p );
01366 }
01367
01368 static void opt_face_hess_3( opt_data_3* p, realType hess[9] )
01369 {
01370 unsigned d;
01371 const unsigned d1 = p->fd.d1, d2 = p->fd.d2, n1 = p->ld[d1].n, n2 = p->ld[d2].n;
01372 const realType *J1 = p->ld[d1].J, *J2 = p->ld[d2].J, *D1 = p->ld[d1].D, *D2 = p->ld[d2].D, *S1 = p->ld[d1].D2,
01373 *S2 = p->ld[d2].D2;
01374
01375 lagrange_2u( &p->ld[d1] );
01376 lagrange_2u( &p->ld[d2] );
01377
01378 for( d = 0; d < 3; ++d )
01379 {
01380 (void)tensor_ig2( J1, S1, n1, J2, S2, n2, p->fd.x[d], hess + d * 3, p->work );
01381 hess[d * 3 + 0] = tensor_i2( S1, n1, J2, n2, p->fd.x[d], p->work );
01382 hess[d * 3 + 1] = tensor_i2( J1, n1, S2, n2, p->fd.x[d], p->work );
01383 hess[d * 3 + 2] = tensor_i2( D1, n1, D2, n2, p->fd.x[d], p->work );
01384 }
01385 }
01386
01387 static void opt_edge_proj_3( opt_data_3* p )
01388 {
01389 unsigned d, off, off1 = 0, off2 = 0;
01390 const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, se = p->size[de], s1 = p->size[d1], s2 = p->size[d2],
01391 ne = p->ld[de].n, n1 = p->ld[d1].n, n2 = p->ld[d2].n;
01392 const realType *fD1, *fD2;
01393 if( opt_constr( p->ed.constraints, d1 ) == 0 )
01394 fD1 = p->ld[d1].D_z0;
01395 else
01396 fD1 = p->ld[d1].D_zn, off1 = p->size[d1 + 1] - p->size[d1];
01397 if( opt_constr( p->ed.constraints, d2 ) == 0 )
01398 fD2 = p->ld[d2].D_z0;
01399 else
01400 fD2 = p->ld[d2].D_zn, off2 = p->size[d2 + 1] - p->size[d2];
01401 off = off1 + off2;
01402 for( d = 0; d < 3; ++d )
01403 {
01404 unsigned i, j;
01405 const realType* in = p->elx[d] + off;
01406 for( i = 0; i < ne; ++i, in += se )
01407 {
01408 const realType *in1 = in - off1, *in2 = in - off2;
01409 realType *fd1 = &p->ed.fd1[d][i], *fd2 = &p->ed.fd2[d][i];
01410 p->ed.x[d][i] = *in;
01411 *fd1 = *fd2 = 0;
01412 for( j = 0; j < n1; ++j, in1 += s1 )
01413 *fd1 += *in1 * fD1[j];
01414 for( j = 0; j < n2; ++j, in2 += s2 )
01415 *fd2 += *in2 * fD2[j];
01416 }
01417 }
01418 }
01419
01420 static void opt_edge_set_3( opt_data_3* p, const realType r[3], unsigned constr )
01421 {
01422 if( p->ed.constraints != constr )
01423 {
01424 p->ed.constraints = constr;
01425 p->ed.de = opt_constr_not[constr];
01426 p->ed.d1 = opt_other1_3[p->ed.de];
01427 p->ed.d2 = opt_other2_3[p->ed.de];
01428 opt_edge_proj_3( p );
01429 }
01430 lagrange_1( &p->ld[p->ed.de], r[p->ed.de] );
01431 }
01432
01433 static void opt_edge_intp_3( opt_data_3* p )
01434 {
01435 unsigned d;
01436 const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2, n = p->ld[de].n;
01437 const realType *J = p->ld[de].J, *D = p->ld[de].D;
01438
01439 for( d = 0; d < 3; ++d )
01440 {
01441 p->x[d] = tensor_ig1( J, D, n, p->ed.x[d], &p->jac[d * 3 + de] );
01442 p->jac[d * 3 + d1] = tensor_i1( J, n, p->ed.fd1[d] );
01443 p->jac[d * 3 + d2] = tensor_i1( J, n, p->ed.fd2[d] );
01444 }
01445 }
01446
01447 static void opt_edge_set_intp_3( opt_data_3* p, const realType r[3], unsigned constr )
01448 {
01449 opt_edge_set_3( p, r, constr );
01450 opt_edge_intp_3( p );
01451 }
01452
01453 static void opt_edge_hess_3( opt_data_3* p, realType hess[3] )
01454 {
01455 unsigned d;
01456 const unsigned de = p->ed.de, n = p->ld[de].n;
01457 const realType* D2 = p->ld[de].D2;
01458 lagrange_2u( &p->ld[de] );
01459 for( d = 0; d < 3; ++d )
01460 hess[d] = tensor_i1( D2, n, p->ed.x[d] );
01461 }
01462
01463 static void opt_point_proj_3( opt_data_3* p )
01464 {
01465 unsigned off[3], offt, d, c[3];
01466 const realType* fD[3];
01467 opt_constr_unpack_3( p->pd.constraints, c );
01468 for( d = 0; d < 3; ++d )
01469 if( c[d] == 0 )
01470 fD[d] = p->ld[d].D_z0, off[d] = 0;
01471 else
01472 fD[d] = p->ld[d].D_zn, off[d] = p->size[d + 1] - p->size[d];
01473 offt = off[0] + off[1] + off[2];
01474 for( d = 0; d < 9; ++d )
01475 p->pd.jac[d] = 0;
01476 for( d = 0; d < 3; ++d )
01477 {
01478 unsigned i, j;
01479 p->pd.x[d] = p->elx[d][offt];
01480 for( i = 0; i < 3; ++i )
01481 {
01482 const realType* in = p->elx[d] + offt - off[i];
01483 for( j = 0; j < p->ld[i].n; ++j, in += p->size[i] )
01484 p->pd.jac[d * 3 + i] += *in * fD[i][j];
01485 }
01486 }
01487 }
01488
01489 static void opt_point_set_3( opt_data_3* p, unsigned constr )
01490 {
01491 if( p->pd.constraints != constr )
01492 {
01493 p->pd.constraints = constr;
01494 opt_point_proj_3( p );
01495 }
01496 }
01497
01498 static void opt_point_intp_3( opt_data_3* p )
01499 {
01500 memcpy( p->x, p->pd.x, 3 * sizeof( realType ) );
01501 memcpy( p->jac, p->pd.jac, 9 * sizeof( realType ) );
01502 }
01503
01504 static void opt_point_set_intp_3( opt_data_3* p, unsigned constr )
01505 {
01506 opt_point_set_3( p, constr );
01507 opt_point_intp_3( p );
01508 }
01509
01510 #define DIAGNOSTICS 0
01511
01512 double opt_findpt_3( opt_data_3* p,
01513 const realType* const elx[3],
01514 const realType xstar[3],
01515 realType r[3],
01516 unsigned* constr )
01517 {
01518 realType dr[3], resid[3], steep[3];
01519
01520 unsigned c = *constr, ac, d, cc[3], step = 0;
01521
01522 p->elx[0] = elx[0], p->elx[1] = elx[1], p->elx[2] = elx[2];
01523
01524 p->fd.constraints = opt_no_constraints_3;
01525 p->ed.constraints = opt_no_constraints_3;
01526 p->pd.constraints = opt_no_constraints_3;
01527
01528 #if DIAGNOSTICS
01529 printf( "opt_findpt: xstar = %g, %g, %g\n", xstar[0], xstar[1], xstar[2] );
01530 #endif
01531
01532 do
01533 {
01534 ++step;
01535 if( step == 50 ) /*fail("%s: opt_findpt_3 did not converge\n",__FILE__);*/
01536 return 1.e+30;
01537 #if DIAGNOSTICS
01538 printf( " iteration %u\n", step );
01539 printf( " %d constraint(s) active\n", (int)opt_constr_num_3[c] );
01540 #endif
01541 /* update face/edge/point data if necessary,
01542 and evaluate x(r) as well as the jacobian */
01543 switch( opt_constr_num_3[c] )
01544 {
01545 case 0:
01546 opt_vol_set_intp_3( p, r );
01547 break;
01548 case 1:
01549 opt_face_set_intp_3( p, r, c );
01550 break;
01551 case 2:
01552 opt_edge_set_intp_3( p, r, c );
01553 break;
01554 case 3:
01555 opt_point_set_intp_3( p, c );
01556 break;
01557 }
01558 #if DIAGNOSTICS
01559 printf( " r = %g, %g, %g\n", r[0], r[1], r[2] );
01560 printf( " x = %g, %g, %g\n", p->x[0], p->x[1], p->x[2] );
01561 #endif
01562 /* compute residual */
01563 for( d = 0; d < 3; ++d )
01564 resid[d] = xstar[d] - p->x[d];
01565 #if DIAGNOSTICS
01566 printf( " resid = %g, %g, %g\n", resid[0], resid[1], resid[2] );
01567 printf( " 2-norm = %g\n", r2norm_3( resid[0], resid[1], resid[2] ) );
01568 #endif
01569 /* check constraints against steepest descent direction */
01570 ac = c;
01571 if( opt_constr_num_3[c] )
01572 {
01573 opt_constr_unpack_3( c, cc );
01574 mat_app_3c( steep, p->jac, resid ); /* steepest descent = J^T r */
01575 #if DIAGNOSTICS
01576 printf( " steepest descent = %g, %g, %g\n", steep[0], steep[1], steep[2] );
01577 #endif
01578 for( d = 0; d < 3; ++d )
01579 if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1;
01580 ac = opt_constr_pack_3( cc );
01581 }
01582 /* update face/edge/point data if necessary */
01583 if( ac != c )
01584 {
01585 c = ac;
01586 #if DIAGNOSTICS
01587 printf( " relaxed to %d constraints\n", (int)opt_constr_num_3[c] );
01588 #endif
01589 switch( opt_constr_num_3[c] )
01590 {
01591 case 1:
01592 opt_face_set_3( p, r, c );
01593 break;
01594 case 2:
01595 opt_edge_set_3( p, r, c );
01596 break;
01597 case 3:
01598 opt_point_set_3( p, c );
01599 break;
01600 }
01601 }
01602 /* compute Newton step */
01603 switch( opt_constr_num_3[c] )
01604 {
01605 case 0:
01606 tinyla_solve_3( dr, p->jac, resid );
01607 break;
01608 case 1: {
01609 const unsigned dn = p->fd.dn, d1 = p->fd.d1, d2 = p->fd.d2;
01610 realType A[4], H[9];
01611 const realType* J = p->jac;
01612 opt_face_hess_3( p, H );
01613 A[0] = J[d1] * J[d1] + J[3 + d1] * J[3 + d1] + J[6 + d1] * J[6 + d1];
01614 A[1] = J[d2] * J[d2] + J[3 + d2] * J[3 + d2] + J[6 + d2] * J[6 + d2];
01615 A[2] = J[d1] * J[d2] + J[3 + d1] * J[3 + d2] + J[6 + d1] * J[6 + d2];
01616 A[0] -= resid[0] * H[0] + resid[1] * H[3] + resid[2] * H[6];
01617 A[1] -= resid[0] * H[1] + resid[1] * H[4] + resid[2] * H[7];
01618 A[2] -= resid[0] * H[2] + resid[1] * H[5] + resid[2] * H[8];
01619 tinyla_solve_sym_2( &dr[d1], &dr[d2], A, steep[d1], steep[d2] );
01620 dr[dn] = 0;
01621 }
01622 break;
01623 case 2: {
01624 const unsigned de = p->ed.de, d1 = p->ed.d1, d2 = p->ed.d2;
01625 realType fac, H[3];
01626 const realType* J = p->jac + de;
01627 opt_edge_hess_3( p, H );
01628 fac = J[0] * J[0] + J[3] * J[3] + J[6] * J[6] - ( resid[0] * H[0] + resid[1] * H[1] + resid[2] * H[2] );
01629 dr[de] = steep[de] / fac;
01630 dr[d1] = 0, dr[d2] = 0;
01631 }
01632 break;
01633 case 3:
01634 dr[0] = dr[1] = dr[2] = 0;
01635 break;
01636 }
01637 #if DIAGNOSTICS
01638 printf( " dr = %g, %g, %g\n", dr[0], dr[1], dr[2] );
01639 #endif
01640 /* project new iteration onto [-1,1]^3 */
01641 opt_constr_unpack_3( c, cc );
01642 for( d = 0; d < 3; ++d )
01643 {
01644 if( cc[d] != 1 ) continue;
01645 r[d] += dr[d];
01646 if( r[d] <= -1 )
01647 dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0;
01648 else if( r[d] >= 1 )
01649 dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2;
01650 }
01651 c = opt_constr_pack_3( cc );
01652 } while( r1norm_3( dr[0], dr[1], dr[2] ) > 3 * MOAB_POLY_EPS );
01653 *constr = c;
01654 #if 0
01655 printf("opt_findpt_3 converged in %u iterations\n", step);
01656 #endif
01657 return r2norm_3( resid[0], resid[1], resid[2] );
01658 }
01659
01660 #undef DIAGNOSTICS
01661
01662 void opt_alloc_2( opt_data_2* p, lagrange_data* ld )
01663 {
01664 const unsigned nr = ld[0].n, ns = ld[1].n, ne = umax_2( nr, ns ), nw = 2 * ns;
01665 p->size[0] = 1;
01666 p->size[1] = nr;
01667 p->size[2] = nr * ns;
01668 p->ld = ld;
01669 p->work = tmalloc( realType, 4 * ne + nw );
01670 p->ed.x[0] = p->work + nw;
01671 p->ed.x[1] = p->ed.x[0] + ne;
01672 p->ed.fd1[0] = p->ed.x[1] + ne;
01673 p->ed.fd1[1] = p->ed.fd1[0] + ne;
01674 }
01675
01676 void opt_free_2( opt_data_2* p )
01677 {
01678 free( p->work );
01679 }
01680
01681 static void opt_area_set_2( opt_data_2* p, const realType r[2] )
01682 {
01683 lagrange_1( &p->ld[0], r[0] );
01684 lagrange_1( &p->ld[1], r[1] );
01685 }
01686
01687 /* work holds 2*ns realTypes */
01688 static void opt_area_intp_2( opt_data_2* p )
01689 {
01690 unsigned d;
01691 const lagrange_data* ld = p->ld;
01692
01693 for( d = 0; d < 2; ++d )
01694 p->x[d] =
01695 tensor_ig2( ld[0].J, ld[0].D, ld[0].n, ld[1].J, ld[1].D, ld[1].n, p->elx[d], &p->jac[d * 2], p->work );
01696 }
01697
01698 static void opt_area_set_intp_2( opt_data_2* p, const realType r[2] )
01699 {
01700 opt_area_set_2( p, r );
01701 opt_area_intp_2( p );
01702 }
01703
01704 static void opt_edge_proj_2( opt_data_2* p )
01705 {
01706 unsigned d, off = 0;
01707 const unsigned de = p->ed.de, d1 = p->ed.d1, se = p->size[de], s1 = p->size[d1], ne = p->ld[de].n, n1 = p->ld[d1].n;
01708 const realType* fD1;
01709 if( opt_constr( p->ed.constraints, d1 ) == 0 )
01710 fD1 = p->ld[d1].D_z0;
01711 else
01712 fD1 = p->ld[d1].D_zn, off = p->size[d1 + 1] - p->size[d1];
01713 for( d = 0; d < 2; ++d )
01714 {
01715 unsigned i, j;
01716 const realType* in = p->elx[d] + off;
01717 for( i = 0; i < ne; ++i, in += se )
01718 {
01719 const realType* in1 = in - off;
01720 realType* fd1 = &p->ed.fd1[d][i];
01721 p->ed.x[d][i] = *in;
01722 *fd1 = 0;
01723 for( j = 0; j < n1; ++j, in1 += s1 )
01724 *fd1 += *in1 * fD1[j];
01725 }
01726 }
01727 }
01728
01729 static void opt_edge_set_2( opt_data_2* p, const realType r[2], unsigned constr )
01730 {
01731 if( p->ed.constraints != constr )
01732 {
01733 p->ed.constraints = constr;
01734 p->ed.de = opt_constr_not[constr];
01735 p->ed.d1 = 1 - p->ed.de;
01736 opt_edge_proj_2( p );
01737 }
01738 lagrange_1( &p->ld[p->ed.de], r[p->ed.de] );
01739 }
01740
01741 static void opt_edge_intp_2( opt_data_2* p )
01742 {
01743 unsigned d;
01744 const unsigned de = p->ed.de, d1 = p->ed.d1, n = p->ld[de].n;
01745 const realType *J = p->ld[de].J, *D = p->ld[de].D;
01746 for( d = 0; d < 2; ++d )
01747 {
01748 p->x[d] = tensor_ig1( J, D, n, p->ed.x[d], &p->jac[d * 2 + de] );
01749 p->jac[d * 2 + d1] = tensor_i1( J, n, p->ed.fd1[d] );
01750 }
01751 }
01752
01753 static void opt_edge_set_intp_2( opt_data_2* p, const realType r[2], unsigned constr )
01754 {
01755 opt_edge_set_2( p, r, constr );
01756 opt_edge_intp_2( p );
01757 }
01758
01759 static void opt_edge_hess_2( opt_data_2* p, realType hess[2] )
01760 {
01761 unsigned d;
01762 const unsigned de = p->ed.de, n = p->ld[de].n;
01763 const realType* D2 = p->ld[de].D2;
01764 lagrange_2u( &p->ld[de] );
01765 for( d = 0; d < 2; ++d )
01766 hess[d] = tensor_i1( D2, n, p->ed.x[d] );
01767 }
01768
01769 static void opt_point_proj_2( opt_data_2* p )
01770 {
01771 unsigned off[2], offt, d, c[2];
01772 const realType* fD[2];
01773 opt_constr_unpack_2( p->pd.constraints, c );
01774 for( d = 0; d < 2; ++d )
01775 if( c[d] == 0 )
01776 fD[d] = p->ld[d].D_z0, off[d] = 0;
01777 else
01778 fD[d] = p->ld[d].D_zn, off[d] = p->size[d + 1] - p->size[d];
01779 offt = off[0] + off[1];
01780 for( d = 0; d < 4; ++d )
01781 p->pd.jac[d] = 0;
01782 for( d = 0; d < 2; ++d )
01783 {
01784 unsigned i, j;
01785 p->pd.x[d] = p->elx[d][offt];
01786 for( i = 0; i < 2; ++i )
01787 {
01788 const realType* in = p->elx[d] + offt - off[i];
01789 for( j = 0; j < p->ld[i].n; ++j, in += p->size[i] )
01790 p->pd.jac[d * 2 + i] += *in * fD[i][j];
01791 }
01792 }
01793 }
01794
01795 static void opt_point_set_2( opt_data_2* p, unsigned constr )
01796 {
01797 if( p->pd.constraints != constr )
01798 {
01799 p->pd.constraints = constr;
01800 opt_point_proj_2( p );
01801 }
01802 }
01803
01804 static void opt_point_intp_2( opt_data_2* p )
01805 {
01806 memcpy( p->x, p->pd.x, 2 * sizeof( realType ) );
01807 memcpy( p->jac, p->pd.jac, 4 * sizeof( realType ) );
01808 }
01809
01810 static void opt_point_set_intp_2( opt_data_2* p, unsigned constr )
01811 {
01812 opt_point_set_2( p, constr );
01813 opt_point_intp_2( p );
01814 }
01815
01816 #define DIAGNOSTICS 0
01817
01818 double opt_findpt_2( opt_data_2* p,
01819 const realType* const elx[2],
01820 const realType xstar[2],
01821 realType r[2],
01822 unsigned* constr )
01823 {
01824 realType dr[2], resid[2], steep[2];
01825
01826 unsigned c = *constr, ac, d, cc[2], step = 0;
01827
01828 p->elx[0] = elx[0], p->elx[1] = elx[1];
01829
01830 p->ed.constraints = opt_no_constraints_2;
01831 p->pd.constraints = opt_no_constraints_2;
01832
01833 #if DIAGNOSTICS
01834 printf( "opt_findpt: xstar = %g, %g\n", xstar[0], xstar[1] );
01835 #endif
01836
01837 do
01838 {
01839 ++step;
01840 if( step == 50 ) /*fail("%s: opt_findpt_2 did not converge\n",__FILE__);*/
01841 return 1.e+30;
01842 #if DIAGNOSTICS
01843 printf( " iteration %u\n", step );
01844 printf( " %d constraint(s) active\n", (int)opt_constr_num_2[c] );
01845 #endif
01846 /* update face/edge/point data if necessary,
01847 and evaluate x(r) as well as the jacobian */
01848 switch( opt_constr_num_2[c] )
01849 {
01850 case 0:
01851 opt_area_set_intp_2( p, r );
01852 break;
01853 case 1:
01854 opt_edge_set_intp_2( p, r, c );
01855 break;
01856 case 2:
01857 opt_point_set_intp_2( p, c );
01858 break;
01859 }
01860 #if DIAGNOSTICS
01861 printf( " r = %g, %g\n", r[0], r[1] );
01862 printf( " x = %g, %g\n", p->x[0], p->x[1] );
01863 #endif
01864 /* compute residual */
01865 for( d = 0; d < 2; ++d )
01866 resid[d] = xstar[d] - p->x[d];
01867 #if DIAGNOSTICS
01868 printf( " resid = %g, %g\n", resid[0], resid[1] );
01869 printf( " 2-norm = %g\n", r2norm_2( resid[0], resid[1] ) );
01870 #endif
01871 /* check constraints against steepest descent direction */
01872 ac = c;
01873 if( opt_constr_num_2[c] )
01874 {
01875 opt_constr_unpack_2( c, cc );
01876 mat_app_2c( steep, p->jac, resid ); /* steepest descent = J^T r */
01877 #if DIAGNOSTICS
01878 printf( " steepest descent = %g, %g\n", steep[0], steep[1] );
01879 #endif
01880 for( d = 0; d < 2; ++d )
01881 if( ( cc[d] == 0 && steep[d] > 0 ) || ( cc[d] == 2 && steep[d] < 0 ) ) cc[d] = 1;
01882 ac = opt_constr_pack_2( cc );
01883 }
01884 /* update face/edge/point data if necessary */
01885 if( ac != c )
01886 {
01887 c = ac;
01888 #if DIAGNOSTICS
01889 printf( " relaxed to %d constraints\n", (int)opt_constr_num_2[c] );
01890 #endif
01891 switch( opt_constr_num_2[c] )
01892 {
01893 case 1:
01894 opt_edge_set_2( p, r, c );
01895 break;
01896 case 2:
01897 opt_point_set_2( p, c );
01898 break;
01899 }
01900 }
01901 /* compute Newton step */
01902 switch( opt_constr_num_2[c] )
01903 {
01904 case 0:
01905 tinyla_solve_2( dr, p->jac, resid );
01906 break;
01907 case 1: {
01908 const unsigned de = p->ed.de, d1 = p->ed.d1;
01909 realType fac, H[2];
01910 const realType* J = p->jac + de;
01911 opt_edge_hess_2( p, H );
01912 fac = J[0] * J[0] + J[2] * J[2] - ( resid[0] * H[0] + resid[1] * H[1] );
01913 dr[de] = steep[de] / fac;
01914 dr[d1] = 0;
01915 }
01916 break;
01917 case 2:
01918 dr[0] = dr[1] = 0;
01919 break;
01920 }
01921 #if DIAGNOSTICS
01922 printf( " dr = %g, %g\n", dr[0], dr[1] );
01923 #endif
01924 /* project new iteration onto [-1,1]^2 */
01925 opt_constr_unpack_2( c, cc );
01926 for( d = 0; d < 2; ++d )
01927 {
01928 if( cc[d] != 1 ) continue;
01929 r[d] += dr[d];
01930 if( r[d] <= -1 )
01931 dr[d] -= r[d] + 1, r[d] = -1, cc[d] = 0;
01932 else if( r[d] >= 1 )
01933 dr[d] -= r[d] - 1, r[d] = 1, cc[d] = 2;
01934 }
01935 c = opt_constr_pack_2( cc );
01936 } while( r1norm_2( dr[0], dr[1] ) > 2 * MOAB_POLY_EPS );
01937 *constr = c;
01938 return r2norm_2( resid[0], resid[1] );
01939 }
01940
01941 #undef DIAGNOSTICS
01942
01943 /*--------------------------------------------------------------------------
01944 Point Finding (interface/top-level)
01945
01946 Initializing the data:
01947 unsigned nel; // number of elements
01948 const unsigned n[3]; // number of nodes in r, s, t directions
01949 const realType *xm[3]; // n[0]*n[1]*n[2]*nel x,y,z coordinates
01950 realType tol = 0.01; // how far point is allowed to be outside element
01951 // relative to element size
01952 unsigned max_size = n[0]*n[1]*n[2]*nel; // maximum size of hash table
01953
01954 findpt_data_3 *data = findpt_setup_3(xm,n,nel,max_size,tol);
01955
01956 Using the data:
01957 realType x[3] = { ... }; // point to find
01958 int el; // element number
01959 realType r[3]; // parametric coordinates
01960 int guess = 0; // do we use (el,r,s,t) as an initial guess?
01961 int code; // 0 : normal, -1 : outside all elements,
01962 // 1 : border, or outside but within tolerance
01963 realType dist; // distance in xyz space from returned (el,r,s,t) to given
01964 // (x,y,z)
01965
01966 code = findpt_3(data, x, guess, &el, r, &dist);
01967
01968 When done:
01969 findpt_free_3(&data);
01970
01971 --------------------------------------------------------------------------*/
01972
01973 /* heap sort on A[0:n-1] with key A[i]->dist
01974 precondition: n!=0 */
01975 static void findpt_list_sort( findpt_listel** A, unsigned n )
01976 {
01977 unsigned i;
01978 --A; /* make A have a base index of 1 */
01979 /* build heap */
01980 for( i = 2; i <= n; ++i )
01981 {
01982 findpt_listel* item = A[i];
01983 unsigned hole = i, parent = hole >> 1;
01984 if( A[parent]->dist >= item->dist ) continue;
01985 do
01986 {
01987 A[hole] = A[parent];
01988 hole = parent;
01989 parent >>= 1;
01990 } while( parent && A[parent]->dist < item->dist );
01991 A[hole] = item;
01992 }
01993 /* extract */
01994 for( i = n - 1; i; --i )
01995 {
01996 findpt_listel* item = A[i + 1];
01997 unsigned hole = 1;
01998 A[i + 1] = A[1];
01999 for( ;; )
02000 {
02001 unsigned ch = hole << 1, r = ch + 1;
02002 if( r <= i && A[ch]->dist < A[r]->dist ) ch = r;
02003 if( ch > i || item->dist >= A[ch]->dist ) break;
02004 A[hole] = A[ch];
02005 hole = ch;
02006 }
02007 A[hole] = item;
02008 }
02009 }
02010
02011 findpt_data_2* findpt_setup_2( const realType* const xw[2],
02012 const unsigned n[2],
02013 uint nel,
02014 uint max_hash_size,
02015 realType bbox_tol )
02016 {
02017 unsigned d;
02018 findpt_data_2* p = tmalloc( findpt_data_2, 1 );
02019
02020 p->hash = tmalloc( hash_data_2, 1 );
02021 p->od = tmalloc( opt_data_2, 1 );
02022
02023 for( d = 0; d < 2; ++d )
02024 p->xw[d] = xw[d];
02025 p->nptel = n[0] * n[1];
02026
02027 hash_build_2( p->hash, xw, n, nel, max_hash_size, bbox_tol );
02028
02029 for( d = 0; d < 2; ++d )
02030 {
02031 p->z[d] = tmalloc( realType, n[d] );
02032 lobatto_nodes( p->z[d], n[d] );
02033 lagrange_setup( &p->ld[d], p->z[d], n[d] );
02034 }
02035
02036 p->list = tmalloc( findpt_listel, p->hash->max );
02037 p->sorted = tmalloc( findpt_listel*, p->hash->max );
02038
02039 opt_alloc_2( p->od, p->ld );
02040 p->od_work = p->od->work;
02041
02042 return p;
02043 }
02044
02045 findpt_data_3* findpt_setup_3( const realType* const xw[3],
02046 const unsigned n[3],
02047 uint nel,
02048 uint max_hash_size,
02049 realType bbox_tol )
02050 {
02051 unsigned d;
02052 findpt_data_3* p = tmalloc( findpt_data_3, 1 );
02053
02054 p->hash = tmalloc( hash_data_3, 1 );
02055 p->od = tmalloc( opt_data_3, 1 );
02056
02057 for( d = 0; d < 3; ++d )
02058 p->xw[d] = xw[d];
02059 p->nptel = n[0] * n[1] * n[2];
02060
02061 hash_build_3( p->hash, xw, n, nel, max_hash_size, bbox_tol );
02062
02063 for( d = 0; d < 3; ++d )
02064 {
02065 p->z[d] = tmalloc( realType, n[d] );
02066 lobatto_nodes( p->z[d], n[d] );
02067 lagrange_setup( &p->ld[d], p->z[d], n[d] );
02068 }
02069
02070 p->list = tmalloc( findpt_listel, p->hash->max );
02071 p->sorted = tmalloc( findpt_listel*, p->hash->max );
02072
02073 opt_alloc_3( p->od, p->ld );
02074 p->od_work = p->od->work;
02075
02076 return p;
02077 }
02078
02079 void findpt_free_2( findpt_data_2* p )
02080 {
02081 unsigned d;
02082 opt_free_2( p->od );
02083 free( p->od );
02084 hash_free_2( p->hash );
02085 free( p->hash );
02086 free( p->list );
02087 free( p->sorted );
02088 for( d = 0; d < 2; ++d )
02089 free( p->z[d] );
02090 free( p );
02091 }
02092
02093 void findpt_free_3( findpt_data_3* p )
02094 {
02095 unsigned d;
02096 opt_free_3( p->od );
02097 free( p->od );
02098 hash_free_3( p->hash );
02099 free( p->hash );
02100 free( p->list );
02101 free( p->sorted );
02102 for( d = 0; d < 3; ++d )
02103 free( p->z[d] );
02104 free( p );
02105 }
02106
02107 const realType* findpt_allbnd_2( const findpt_data_2* p )
02108 {
02109 return p->hash->bnd;
02110 }
02111
02112 const realType* findpt_allbnd_3( const findpt_data_3* p )
02113 {
02114 return p->hash->bnd;
02115 }
02116
02117 static void findpt_hash_2( findpt_data_2* p, const realType x[2] )
02118 {
02119 findpt_listel *list = p->list, **sorted = p->sorted;
02120 const uint hi = hash_index_2( p->hash, x );
02121 const uint* offset = p->hash->offset;
02122 uint i;
02123 const uint b = offset[hi], e = offset[hi + 1];
02124 for( i = b; i != e; ++i )
02125 {
02126 const uint el = offset[i];
02127 realType* r = &list->r[0];
02128 const obbox_2* obb = &p->hash->obb[el];
02129 if( obbox_axis_test_2( obb, x ) ) continue;
02130 if( obbox_test_2( obb, x, r ) ) continue;
02131 list->el = el;
02132 list->dist = r1norm_2( r[0], r[1] );
02133 *sorted++ = list++;
02134 }
02135 p->end = sorted;
02136 if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted );
02137 }
02138
02139 static void findpt_hash_3( findpt_data_3* p, const realType x[3] )
02140 {
02141 findpt_listel *list = p->list, **sorted = p->sorted;
02142 const uint hi = hash_index_3( p->hash, x );
02143 const uint* offset = p->hash->offset;
02144 uint i;
02145 const uint b = offset[hi], e = offset[hi + 1];
02146 for( i = b; i != e; ++i )
02147 {
02148 const uint el = offset[i];
02149 realType* r = &list->r[0];
02150 const obbox_3* obb = &p->hash->obb[el];
02151 if( obbox_axis_test_3( obb, x ) ) continue;
02152 if( obbox_test_3( obb, x, r ) ) continue;
02153 list->el = el;
02154 list->dist = r1norm_3( r[0], r[1], r[2] );
02155 *sorted++ = list++;
02156 }
02157 p->end = sorted;
02158 if( p->end != p->sorted ) findpt_list_sort( p->sorted, p->end - p->sorted );
02159 }
02160
02161 static int findpt_guess_2( findpt_data_2* p, const realType x[2], uint el, realType r[2], realType* dist )
02162 {
02163 const uint arrindex = p->nptel * el;
02164 const realType* elx[2];
02165 realType g[2];
02166 unsigned c = opt_no_constraints_2;
02167 const obbox_2* obb = &p->hash->obb[el];
02168 elx[0] = p->xw[0] + arrindex;
02169 elx[1] = p->xw[1] + arrindex;
02170 if( obbox_axis_test_2( obb, x ) || obbox_test_2( obb, x, g ) ) return 0;
02171 *dist = opt_findpt_2( p->od, elx, x, r, &c );
02172 return c == opt_no_constraints_2;
02173 }
02174
02175 static int findpt_guess_3( findpt_data_3* p, const realType x[3], uint el, realType r[3], realType* dist )
02176 {
02177 const uint arrindex = p->nptel * el;
02178 const realType* elx[3];
02179 realType g[3];
02180 unsigned c = opt_no_constraints_3;
02181 const obbox_3* obb = &p->hash->obb[el];
02182 elx[0] = p->xw[0] + arrindex;
02183 elx[1] = p->xw[1] + arrindex;
02184 elx[2] = p->xw[2] + arrindex;
02185 if( obbox_axis_test_3( obb, x ) || obbox_test_3( obb, x, g ) ) return 0;
02186 *dist = opt_findpt_3( p->od, elx, x, r, &c );
02187 return c == opt_no_constraints_3;
02188 }
02189
02190 #define DIAGNOSTICS 0
02191
02192 static int findpt_pass_2( findpt_data_2* p, const realType x[2], uint* el, realType r[2], realType* dist_min )
02193 {
02194 findpt_listel** qq = p->sorted;
02195 const realType* bnd;
02196 realType dist;
02197 do
02198 {
02199 findpt_listel* q = *qq;
02200 const uint arrindex = p->nptel * q->el;
02201 const realType* elx[2];
02202 unsigned c = opt_no_constraints_2;
02203 elx[0] = p->xw[0] + arrindex;
02204 elx[1] = p->xw[1] + arrindex;
02205 dist = opt_findpt_2( p->od, elx, x, q->r, &c );
02206 if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_2 )
02207 {
02208 *dist_min = dist;
02209 *el = q->el;
02210 memcpy( r, q->r, 2 * sizeof( realType ) );
02211 if( c == opt_no_constraints_2 ) return 0;
02212 }
02213 } while( ++qq != p->end );
02214 bnd = p->hash->obb[*el].axis_bnd;
02215 return *dist_min > r2norm_2( bnd[1] - bnd[0], bnd[3] - bnd[2] ) ? -1 : 1;
02216 }
02217
02218 static int findpt_pass_3( findpt_data_3* p, const realType x[3], uint* el, realType r[3], realType* dist_min )
02219 {
02220 findpt_listel** qq = p->sorted;
02221 const realType* bnd;
02222 realType dist;
02223 do
02224 {
02225 findpt_listel* q = *qq;
02226 const uint arrindex = p->nptel * q->el;
02227 const realType* elx[3];
02228 unsigned c = opt_no_constraints_3;
02229 elx[0] = p->xw[0] + arrindex;
02230 elx[1] = p->xw[1] + arrindex;
02231 elx[2] = p->xw[2] + arrindex;
02232 dist = opt_findpt_3( p->od, elx, x, q->r, &c );
02233 if( qq == p->sorted || dist < *dist_min || c == opt_no_constraints_3 )
02234 {
02235 *dist_min = dist;
02236 *el = q->el;
02237 memcpy( r, q->r, 3 * sizeof( realType ) );
02238 if( c == opt_no_constraints_3 )
02239 {
02240 #if DIAGNOSTICS
02241 printf( "point found in element #%d\n", qq - p->sorted );
02242 #endif
02243 return 0;
02244 }
02245 }
02246 } while( ++qq != p->end );
02247 bnd = p->hash->obb[*el].axis_bnd;
02248 return *dist_min > r2norm_3( bnd[1] - bnd[0], bnd[3] - bnd[2], bnd[5] - bnd[4] ) ? -1 : 1;
02249 }
02250
02251 int findpt_2( findpt_data_2* p, const realType x[2], int guess, uint* el, realType r[2], realType* dist )
02252 {
02253 if( guess && findpt_guess_2( p, x, *el, r, dist ) ) return 0;
02254 findpt_hash_2( p, x );
02255 if( p->sorted == p->end ) return -1;
02256 return findpt_pass_2( p, x, el, r, dist );
02257 }
02258
02259 int findpt_3( findpt_data_3* p, const realType x[3], int guess, uint* el, realType r[3], realType* dist )
02260 {
02261 if( guess && findpt_guess_3( p, x, *el, r, dist ) ) return 0;
02262 findpt_hash_3( p, x );
02263 #if DIAGNOSTICS
02264 printf( "hashing leaves %d elements to consider\n", p->end - p->sorted );
02265 #endif
02266 if( p->sorted == p->end ) return -1;
02267 return findpt_pass_3( p, x, el, r, dist );
02268 }
02269
02270 void findpt_weights_2( findpt_data_2* p, const realType r[2] )
02271 {
02272 lagrange_0( &p->ld[0], r[0] );
02273 lagrange_0( &p->ld[1], r[1] );
02274 }
02275
02276 void findpt_weights_3( findpt_data_3* p, const realType r[3] )
02277 {
02278 lagrange_0( &p->ld[0], r[0] );
02279 lagrange_0( &p->ld[1], r[1] );
02280 lagrange_0( &p->ld[2], r[2] );
02281 }
02282
02283 double findpt_eval_2( findpt_data_2* p, const realType* u )
02284 {
02285 return tensor_i2( p->ld[0].J, p->ld[0].n, p->ld[1].J, p->ld[1].n, u, p->od_work );
02286 }
02287
02288 double findpt_eval_3( findpt_data_3* p, const realType* u )
02289 {
02290 return tensor_i3( p->ld[0].J, p->ld[0].n, p->ld[1].J, p->ld[1].n, p->ld[2].J, p->ld[2].n, u, p->od_work );
02291 }