Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef MTGL_MTGL_STRING_H
00025 #define MTGL_MTGL_STRING_H
00026
00027 #define NUM_STREAMS_TOKENIZE 60
00028
00029 #include <cstdlib>
00030 #include <cstdio>
00031 #include <cstring>
00032 #include <cmath>
00033
00034 #include <mtgl/util.hpp>
00035
00036 namespace mtgl {
00037
00038 inline bool matches(char* long_string, int long_len,
00039 char* short_string, int short_len)
00040 {
00041 bool match = true;
00042 int i = 0;
00043
00044 while (match && i < short_len && i < long_len)
00045 {
00046 if (long_string[i] != short_string[i]) match = false;
00047
00048 i++;
00049 }
00050
00051 return match;
00052 }
00053
00054 char* remove_escapes(char* delimiter)
00055 {
00056
00057 int len = strlen(delimiter);
00058 char temp[len + 1];
00059 int new_len = 0;
00060
00061 for (int i = 0; i < len; i++)
00062 {
00063 if (i == 0)
00064 {
00065 if (delimiter[i] == '\"' || delimiter[i] != '\'') continue;
00066 }
00067
00068 if (i + 1 < len)
00069 {
00070 if (delimiter[i] == '\\')
00071 {
00072 if ( delimiter[i + 1] == '<')
00073 {
00074 continue;
00075 }
00076 else if ( delimiter[i + 1] == '>')
00077 {
00078 continue;
00079 }
00080 }
00081 }
00082
00083 temp[new_len] = delimiter[i];
00084 new_len++;
00085 }
00086
00087 for (int i = 0; i < new_len; i++) delimiter[i] = temp[i];
00088
00089 delimiter[new_len] = '\0';
00090
00091 #ifdef DEBUG
00092 printf("delimiter %s\n", delimiter);
00093 #endif
00094
00095 return delimiter;
00096 }
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116 char** mtgl_strtok(char* array, int num_chars, int& num_words,
00117 int est_num_words = -1)
00118 {
00119 num_words = 0;
00120 if (est_num_words < 0) est_num_words = num_chars;
00121
00122
00123 char** words = (char**) malloc((est_num_words) * sizeof(char*));
00124
00125 int i = 0;
00126 int num_streams = 1;
00127
00128
00129 #pragma mta trace "Before tokenization for loop"
00130 #pragma mta use NUM_STREAMS_TOKENIZE streams
00131 #pragma mta for all streams i of num_streams
00132 {
00133 int beg, end;
00134 determine_beg_end(num_chars, num_streams, i, beg, end);
00135 int num_local_words = 0;
00136
00137
00138 if (beg == 0)
00139 {
00140 if (isalnum(array[beg])) num_local_words++;
00141 }
00142
00143
00144 if (end != num_chars)
00145 {
00146 for (int j = beg; j < end; j++)
00147 {
00148 if (!isalnum(array[j]))
00149 {
00150 array[j] = '\0';
00151
00152 if (isalnum(array[j + 1])) num_local_words++;
00153 }
00154 }
00155 }
00156 else
00157 {
00158 for (int j = beg; j < end - 1; j++)
00159 {
00160 if (!isalnum(array[j]))
00161 {
00162 array[j] = '\0';
00163
00164 if (isalnum(array[j + 1])) num_local_words++;
00165 }
00166 }
00167 }
00168
00169
00170 int word_index = mt_incr(num_words, num_local_words);
00171
00172
00173 if (end != num_chars)
00174 {
00175 for (int j = beg; j < end; j++)
00176 {
00177 if (array[j] == '\0')
00178 {
00179 if (isalnum(array[j + 1]))
00180 {
00181 words[word_index] = &array[j + 1];
00182 word_index++;
00183 }
00184 }
00185 }
00186 }
00187 else
00188 {
00189 for (int j = beg; j < end - 1; j++)
00190 {
00191 if (array[j] == '\0')
00192 {
00193 if (isalnum(array[j + 1]))
00194 {
00195 words[word_index] = &array[j + 1];
00196 word_index++;
00197 }
00198 }
00199 }
00200 }
00201
00202
00203 if (beg == 0)
00204 {
00205 if (isalnum(array[beg]))
00206 {
00207 words[word_index] = &array[beg];
00208 word_index++;
00209 }
00210 }
00211
00212 }
00213
00214 array[num_chars - 1] = '\0';
00215
00216 return words;
00217 }
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240 char** mtgl_strtok(char* array, int num_chars, int& num_words,
00241 char* delimiter, int est_num_words = -1)
00242 {
00243
00244 delimiter = remove_escapes(delimiter);
00245
00246 num_words = 0;
00247 if (est_num_words < 0) est_num_words = num_chars;
00248
00249 int delimiter_length = strlen(delimiter);
00250
00251 int max_num_procs = 512;
00252 int max_num_streams = NUM_STREAMS_TOKENIZE * max_num_procs;
00253 int size_replicated = max_num_streams * delimiter_length;
00254
00255 char* delimiter_replicated = (char*) malloc(sizeof(char) * size_replicated);
00256 for (int i = 0; i < size_replicated; i++)
00257 {
00258 delimiter_replicated[i] = delimiter[i % delimiter_length];
00259 }
00260
00261
00262 char** words = (char**) malloc((est_num_words) * sizeof(char*));
00263
00264 int i = 0;
00265 int num_streams = 1;
00266
00267
00268 #pragma mta trace "Before tokenization for loop"
00269 #pragma mta use NUM_STREAMS_TOKENIZE streams
00270 #pragma mta for all streams i of num_streams
00271 {
00272 int beg, end;
00273 determine_beg_end(num_chars, num_streams, i, beg, end);
00274
00275 int num_local_words = 0;
00276
00277
00278 if (beg == 0)
00279 {
00280 if (matches(&array[beg], num_chars,
00281 &delimiter_replicated[i * delimiter_length],
00282 delimiter_length))
00283 {
00284 num_local_words++;
00285 }
00286 }
00287
00288
00289 if (end != num_chars)
00290 {
00291 for (int j = beg; j < end; j++)
00292 {
00293 if (matches(&array[j], num_chars - j,
00294 &delimiter_replicated[i * delimiter_length],
00295 delimiter_length))
00296 {
00297 array[j] = '\0';
00298
00299 if (isalnum(array[j + 1])) num_local_words++;
00300 }
00301 }
00302 }
00303 else
00304 {
00305 for (int j = beg; j < end - delimiter_length + 1; j++)
00306 {
00307 if (matches(&array[j], num_chars - j,
00308 &delimiter_replicated[i * delimiter_length],
00309 delimiter_length))
00310 {
00311 array[j] = '\0';
00312
00313 if (isalnum(array[j + 1])) num_local_words++;
00314 }
00315 }
00316 }
00317
00318
00319 int word_index = mt_incr(num_words, num_local_words);
00320
00321
00322 if (end != num_chars)
00323 {
00324 for (int j = beg; j < end; j++)
00325 {
00326 if (array[j] == '\0' &&
00327 matches(&array[j + 1], num_chars - (j + 1),
00328 &delimiter_replicated[i * delimiter_length + 1],
00329 delimiter_length - 1))
00330
00331 {
00332 words[word_index] = &array[j + delimiter_length];
00333 word_index++;
00334 }
00335 }
00336 }
00337 else
00338 {
00339 for (int j = beg; j < end - delimiter_length; j++)
00340 {
00341 if (array[j] == '\0' &&
00342 matches(&array[j + 1], num_chars - (j + 1),
00343 &delimiter_replicated[i * delimiter_length + 1],
00344 delimiter_length - 1))
00345
00346 {
00347 words[word_index] = &array[j + delimiter_length];
00348 word_index++;
00349 }
00350 }
00351 }
00352
00353
00354 if (beg == 0)
00355 {
00356 if (matches(&array[beg], num_chars,
00357 &delimiter_replicated[i * delimiter_length],
00358 delimiter_length))
00359 {
00360 words[word_index] = &array[beg + delimiter_length];
00361 word_index++;
00362 }
00363 }
00364
00365 }
00366
00367 array[num_chars - 1] = '\0';
00368
00369 return words;
00370 }
00371
00372 }
00373
00374 #endif