NFFT  3.5.3
fastsum.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
25 #include "config.h"
26 
27 #include <stdlib.h>
28 #include <math.h>
29 #ifdef HAVE_COMPLEX_H
30 #include <complex.h>
31 #endif
32 
33 #include "nfft3.h"
34 #include "fastsum.h"
35 #include "infft.h"
36 
37 // Required for test if (ths->k == one_over_x)
38 #include "kernels.h"
39 
46 static int max_i(int a, int b)
47 {
48  return a >= b ? a : b;
49 }
50 
52 static R fak(int n)
53 {
54  if (n <= 1)
55  return K(1.0);
56  else
57  return (R)(n) * fak(n - 1);
58 }
59 
61 static R binom(int n, int m)
62 {
63  return fak(n) / fak(m) / fak(n - m);
64 }
65 
67 static R BasisPoly(int m, int r, R xx)
68 {
69  int k;
70  R sum = K(0.0);
71 
72  for (k = 0; k <= m - r; k++)
73  {
74  sum += binom(m + k, k) * POW((xx + K(1.0)) / K(2.0), (R) k);
75  }
76  return sum * POW((xx + K(1.0)), (R) r) * POW(K(1.0) - xx, (R) (m + 1))
77  / (R)(1 << (m + 1)) / fak(r); /* 1<<(m+1) = 2^(m+1) */
78 }
79 
81 C regkern(kernel k, R xx, int p, const R *param, R a, R b)
82 {
83  int r;
84  C sum = K(0.0);
85 
86  if (xx < -K(0.5))
87  xx = -K(0.5);
88  if (xx > K(0.5))
89  xx = K(0.5);
90  if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
91  {
92  return k(xx, 0, param);
93  }
94  else if (xx < -K(0.5) + b)
95  {
96  sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
97  * BasisPoly(p - 1, 0, K(2.0) * xx / b + (K(1.0) - b) / b);
98  for (r = 0; r < p; r++)
99  {
100  sum += POW(-b / K(2.0), (R) r) * k(-K(0.5) + b, r, param)
101  * BasisPoly(p - 1, r, -K(2.0) * xx / b + (b - K(1.0)) / b);
102  }
103  return sum;
104  }
105  else if ((xx > -a) && (xx < a))
106  {
107  for (r = 0; r < p; r++)
108  {
109  sum +=
110  POW(a, (R) r)
111  * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a)
112  + k(a, r, param) * BasisPoly(p - 1, r, -xx / a)
113  * (r & 1 ? -1 : 1));
114  }
115  return sum;
116  }
117  else if (xx > K(0.5) - b)
118  {
119  sum = (k(-K(0.5), 0, param) + k(K(0.5), 0, param)) / K(2.0)
120  * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
121  for (r = 0; r < p; r++)
122  {
123  sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
124  * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
125  }
126  return sum;
127  }
128  return k(xx, 0, param);
129 }
130 
134 static C regkern1(kernel k, R xx, int p, const R *param, R a, R b)
135 {
136  int r;
137  C sum = K(0.0);
138 
139  if (xx < -K(0.5))
140  xx = -K(0.5);
141  if (xx > K(0.5))
142  xx = K(0.5);
143  if ((xx >= -K(0.5) + b && xx <= -a) || (xx >= a && xx <= K(0.5) - b))
144  {
145  return k(xx, 0, param);
146  }
147  else if ((xx > -a) && (xx < a))
148  {
149  for (r = 0; r < p; r++)
150  {
151  sum +=
152  POW(a, (R) r)
153  * (k(-a, r, param) * BasisPoly(p - 1, r, xx / a)
154  + k(a, r, param) * BasisPoly(p - 1, r, -xx / a)
155  * (r & 1 ? -1 : 1));
156  }
157  return sum;
158  }
159  else if (xx < -K(0.5) + b)
160  {
161  for (r = 0; r < p; r++)
162  {
163  sum += POW(b, (R) r)
164  * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx + K(0.5)) / b)
165  + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx + K(0.5)) / b)
166  * (r & 1 ? -1 : 1));
167  }
168  return sum;
169  }
170  else if (xx > K(0.5) - b)
171  {
172  for (r = 0; r < p; r++)
173  {
174  sum += POW(b, (R) r)
175  * (k(K(0.5) - b, r, param) * BasisPoly(p - 1, r, (xx - K(0.5)) / b)
176  + k(-K(0.5) + b, r, param) * BasisPoly(p - 1, r, -(xx - K(0.5)) / b)
177  * (r & 1 ? -1 : 1));
178  }
179  return sum;
180  }
181  return k(xx, 0, param);
182 }
183 
185 //static C regkern2(kernel k, R xx, int p, const R *param, R a, R b)
186 //{
187 // int r;
188 // C sum = K(0.0);
189 //
190 // xx = FABS(xx);
191 //
192 // if (xx > K(0.5))
193 // {
194 // for (r = 0; r < p; r++)
195 // {
196 // sum += POW(b, (R) r) * k(K(0.5) - b, r, param)
197 // * (BasisPoly(p - 1, r, 0) + BasisPoly(p - 1, r, 0));
198 // }
199 // return sum;
200 // }
201 // else if ((a <= xx) && (xx <= K(0.5) - b))
202 // {
203 // return k(xx, 0, param);
204 // }
205 // else if (xx < a)
206 // {
207 // for (r = 0; r < p; r++)
208 // {
209 // sum += POW(-a, (R) r) * k(a, r, param)
210 // * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a));
211 // }
212 // return sum;
213 // }
214 // else if ((K(0.5) - b < xx) && (xx <= K(0.5)))
215 // {
216 // for (r = 0; r < p; r++)
217 // {
218 // sum += POW(b, (R) r) * k(K(0.5) - b, r, param)
219 // * (BasisPoly(p - 1, r, (xx - K(0.5)) / b)
220 // + BasisPoly(p - 1, r, -(xx - K(0.5)) / b));
221 // }
222 // return sum;
223 // }
224 // return K(0.0);
225 //}
226 
230 static C regkern3(kernel k, R xx, int p, const R *param, R a, R b)
231 {
232  int r;
233  C sum = K(0.0);
234 
235  xx = FABS(xx);
236 
237  if (xx >= K(0.5))
238  {
239  /*return kern(typ,c,0,K(0.5));*/
240  xx = K(0.5);
241  }
242  /* else */
243  if ((a <= xx) && (xx <= K(0.5) - b))
244  {
245  return k(xx, 0, param);
246  }
247  else if (xx < a)
248  {
249  for (r = 0; r < p; r++)
250  {
251  sum += POW(-a, (R) r) * k(a, r, param)
252  * (BasisPoly(p - 1, r, xx / a) + BasisPoly(p - 1, r, -xx / a));
253  }
254  /*sum=kern(typ,c,0,xx); */
255  return sum;
256  }
257  else if ((K(0.5) - b < xx) && (xx <= K(0.5)))
258  {
259  sum = k(K(0.5), 0, param) * BasisPoly(p - 1, 0, -K(2.0) * xx / b + (K(1.0) - b) / b);
260  /* sum=regkern2(typ,c,p,a,b, K(0.5))*BasisPoly(p-1,0,-K(2.0)*xx/b+(K(1.0)-b)/b); */
261  for (r = 0; r < p; r++)
262  {
263  sum += POW(b / K(2.0), (R) r) * k(K(0.5) - b, r, param)
264  * BasisPoly(p - 1, r, K(2.0) * xx / b - (K(1.0) - b) / b);
265  }
266  return sum;
267  }
268  return K(0.0);
269 }
270 
272 //static C linintkern(const R x, const C *Add, const int Ad, const R a)
273 //{
274 // R c, c1, c3;
275 // int r;
276 // C f1, f2;
277 //
278 // c = x * Ad / a;
279 // r = (int)(LRINT(c));
280 // r = abs(r);
281 // f1 = Add[r];
282 // f2 = Add[r + 1];
283 // c = FABS(c);
284 // c1 = c - r;
285 // c3 = c1 - K(1.0);
286 // return (-f1 * c3 + f2 * c1);
287 //}
288 //
289 //static C quadrintkern(const R x, const C *Add, const int Ad, const R a)
290 //{
291 // R c, c1, c2, c3;
292 // int r;
293 // C f0, f1, f2;
294 //
295 // c = x * Ad / a;
296 // r = (int)(LRINT(c));
297 // r = abs(r);
298 // if (r == 0)
299 // {
300 // f0 = Add[r + 1];
301 // f1 = Add[r];
302 // f2 = Add[r + 1];
303 // }
304 // else
305 // {
306 // f0 = Add[r - 1];
307 // f1 = Add[r];
308 // f2 = Add[r + 1];
309 // }
310 // c = FABS(c);
311 // c1 = c - r;
312 // c2 = c1 + K(1.0);
313 // c3 = c1 - K(1.0);
314 // return (f0 * c1 * c3 / K(2.0) - f1 * c2 * c3 + f2 * c2 * c1 / K(2.0));
315 //}
316 
318 C kubintkern(const R x, const C *Add, const int Ad, const R a)
319 {
320  R c, c1, c2, c3, c4;
321  int r;
322  C f0, f1, f2, f3;
323  c = x * (R)(Ad) / a;
324  r = (int)(LRINT(c));
325  r = abs(r);
326  if (r == 0)
327  {
328  f0 = Add[r + 1];
329  f1 = Add[r];
330  f2 = Add[r + 1];
331  f3 = Add[r + 2];
332  }
333  else
334  {
335  f0 = Add[r - 1];
336  f1 = Add[r];
337  f2 = Add[r + 1];
338  f3 = Add[r + 2];
339  }
340  c = FABS(c);
341  c1 = c - (R)(r);
342  c2 = c1 + K(1.0);
343  c3 = c1 - K(1.0);
344  c4 = c1 - K(2.0);
345  /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2-
346  f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */
347  return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
348  - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
349 }
350 
352 static C kubintkern1(const R x, const C *Add, const int Ad, const R a)
353 {
354  R c, c1, c2, c3, c4;
355  int r;
356  C f0, f1, f2, f3;
357  Add += 2;
358  c = (x + a) * (R)(Ad) / K(2.0) / a;
359  r = (int)(LRINT(c));
360  r = abs(r);
361  /*if (r==0) {f0=Add[r];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
362  else */
363  {
364  f0 = Add[r - 1];
365  f1 = Add[r];
366  f2 = Add[r + 1];
367  f3 = Add[r + 2];
368  }
369  c = FABS(c);
370  c1 = c - (R)(r);
371  c2 = c1 + K(1.0);
372  c3 = c1 - K(1.0);
373  c4 = c1 - K(2.0);
374  /* return(-f0*(c-r)*(c-r-K(1.0))*(c-r-K(2.0))/K(6.0)+f1*(c-r+K(1.0))*(c-r-K(1.0))*(c-r-K(2.0))/2-
375  f2*(c-r+K(1.0))*(c-r)*(c-r-K(2.0))/2+f3*(c-r+K(1.0))*(c-r)*(c-r-K(1.0))/K(6.0)); */
376  return (-f0 * c1 * c3 * c4 / K(6.0) + f1 * c2 * c3 * c4 / K(2.0)
377  - f2 * c2 * c1 * c4 / K(2.0) + f3 * c2 * c1 * c3 / K(6.0));
378 }
379 
381 static void quicksort(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
382 {
383  int lpos = 0;
384  int rpos = N - 1;
385  /*R pivot=x[((N-1)/2)*d+t];*/
386  R pivot = x[(N / 2) * d + t];
387 
388  int k;
389  R temp1;
390  C temp2;
391  int temp_int;
392 
393  while (lpos <= rpos)
394  {
395  while (x[lpos * d + t] < pivot)
396  lpos++;
397  while (x[rpos * d + t] > pivot)
398  rpos--;
399  if (lpos <= rpos)
400  {
401  for (k = 0; k < d; k++)
402  {
403  temp1 = x[lpos * d + k];
404  x[lpos * d + k] = x[rpos * d + k];
405  x[rpos * d + k] = temp1;
406  }
407  temp2 = alpha[lpos];
408  alpha[lpos] = alpha[rpos];
409  alpha[rpos] = temp2;
410 
411  if (permutation_x_alpha)
412  {
413  temp_int = permutation_x_alpha[lpos];
414  permutation_x_alpha[lpos] = permutation_x_alpha[rpos];
415  permutation_x_alpha[rpos] = temp_int;
416  }
417 
418  lpos++;
419  rpos--;
420  }
421  }
422  if (0 < rpos)
423  quicksort(d, t, x, alpha, permutation_x_alpha, rpos + 1);
424  if (lpos < N - 1)
425  quicksort(d, t, x + lpos * d, alpha + lpos, permutation_x_alpha ? permutation_x_alpha + lpos : NULL, N - lpos);
426 }
427 
429 static void BuildBox(fastsum_plan *ths)
430 {
431  int t, l;
432  int *box_index;
433  R val[ths->d];
434 
435  box_index = (int *) NFFT(malloc)((size_t)(ths->box_count) * sizeof(int));
436  for (t = 0; t < ths->box_count; t++)
437  box_index[t] = 0;
438 
439  for (l = 0; l < ths->N_total; l++)
440  {
441  int ind = 0;
442  for (t = 0; t < ths->d; t++)
443  {
444  val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0);
445  ind *= ths->box_count_per_dim;
446  ind += (int) (val[t] / ths->eps_I);
447  }
448  box_index[ind]++;
449  }
450 
451  ths->box_offset[0] = 0;
452  for (t = 1; t <= ths->box_count; t++)
453  {
454  ths->box_offset[t] = ths->box_offset[t - 1] + box_index[t - 1];
455  box_index[t - 1] = ths->box_offset[t - 1];
456  }
457 
458  for (l = 0; l < ths->N_total; l++)
459  {
460  int ind = 0;
461  for (t = 0; t < ths->d; t++)
462  {
463  val[t] = ths->x[ths->d * l + t] + K(0.25) - ths->eps_B / K(2.0);
464  ind *= ths->box_count_per_dim;
465  ind += (int) (val[t] / ths->eps_I);
466  }
467 
468  ths->box_alpha[box_index[ind]] = ths->alpha[l];
469 
470  for (t = 0; t < ths->d; t++)
471  {
472  ths->box_x[ths->d * box_index[ind] + t] = ths->x[ths->d * l + t];
473  }
474  box_index[ind]++;
475  }
476  NFFT(free)(box_index);
477 }
478 
480 static inline C calc_SearchBox(int d, R *y, R *x, C *alpha, int start,
481  int end_lt, const C *Add, const int Ad, int p, R a, const kernel k,
482  const R *param, const unsigned flags)
483 {
484  C result = K(0.0);
485 
486  int m, l;
487  R r;
488 
489  for (m = start; m < end_lt; m++)
490  {
491  if (d == 1)
492  {
493  r = y[0] - x[m];
494  }
495  else
496  {
497  r = K(0.0);
498  for (l = 0; l < d; l++)
499  r += (y[l] - x[m * d + l]) * (y[l] - x[m * d + l]);
500  r = SQRT(r);
501  }
502  if (FABS(r) < a)
503  {
504  result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */
505  if (d == 1)
506  {
507  if (flags & EXACT_NEARFIELD)
508  result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D) */
509  else
510  result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */
511  }
512  else
513  {
514  if (flags & EXACT_NEARFIELD)
515  result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD) */
516  else
517 #if defined(NF_KUB)
518  result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */
519 #elif defined(NF_QUADR)
520  result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
521 #elif defined(NF_LIN)
522  result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
523 #else
524 #error define interpolation method
525 #endif
526  }
527  }
528  }
529  return result;
530 }
531 
533 static C SearchBox(R *y, fastsum_plan *ths)
534 {
535  C val = K(0.0);
536  int t;
537  int y_multiind[ths->d];
538  int multiindex[ths->d];
539  int y_ind;
540 
541  for (t = 0; t < ths->d; t++)
542  {
543  y_multiind[t] = (int)(LRINT((y[t] + K(0.25) - ths->eps_B / K(2.0)) / ths->eps_I));
544  }
545 
546  if (ths->d == 1)
547  {
548  for (y_ind = max_i(0, y_multiind[0] - 1);
549  y_ind < ths->box_count_per_dim && y_ind <= y_multiind[0] + 1; y_ind++)
550  {
551  val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
552  ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add, ths->Ad,
553  ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
554  }
555  }
556  else if (ths->d == 2)
557  {
558  for (multiindex[0] = max_i(0, y_multiind[0] - 1);
559  multiindex[0] < ths->box_count_per_dim
560  && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
561  for (multiindex[1] = max_i(0, y_multiind[1] - 1);
562  multiindex[1] < ths->box_count_per_dim
563  && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
564  {
565  y_ind = (ths->box_count_per_dim * multiindex[0]) + multiindex[1];
566  val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
567  ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add,
568  ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
569  }
570  }
571  else if (ths->d == 3)
572  {
573  for (multiindex[0] = max_i(0, y_multiind[0] - 1);
574  multiindex[0] < ths->box_count_per_dim
575  && multiindex[0] <= y_multiind[0] + 1; multiindex[0]++)
576  for (multiindex[1] = max_i(0, y_multiind[1] - 1);
577  multiindex[1] < ths->box_count_per_dim
578  && multiindex[1] <= y_multiind[1] + 1; multiindex[1]++)
579  for (multiindex[2] = max_i(0, y_multiind[2] - 1);
580  multiindex[2] < ths->box_count_per_dim
581  && multiindex[2] <= y_multiind[2] + 1; multiindex[2]++)
582  {
583  y_ind = ((ths->box_count_per_dim * multiindex[0]) + multiindex[1])
584  * ths->box_count_per_dim + multiindex[2];
585  val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha,
586  ths->box_offset[y_ind], ths->box_offset[y_ind + 1], ths->Add,
587  ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param,
588  ths->flags);
589  }
590  }
591  else
592  {
593  return 0.0/0.0; //exit(EXIT_FAILURE);
594  }
595  return val;
596 }
597 
599 static void BuildTree(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
600 {
601  if (N > 1)
602  {
603  int m = N / 2;
604 
605  quicksort(d, t, x, alpha, permutation_x_alpha, N);
606 
607  BuildTree(d, (t + 1) % d, x, alpha, permutation_x_alpha, m);
608  BuildTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), permutation_x_alpha ? permutation_x_alpha + (m + 1) : NULL, N - m - 1);
609  }
610 }
611 
613 static C SearchTree(const int d, const int t, const R *x, const C *alpha,
614  const R *xmin, const R *xmax, const int N, const kernel k, const R *param,
615  const int Ad, const C *Add, const int p, const unsigned flags)
616 {
617  if (N == 0)
618  {
619  return K(0.0);
620  }
621  else
622  {
623  int m = N / 2;
624  R Min = xmin[t];
625  R Max = xmax[t];
626  R Median = x[m * d + t];
627  R a = FABS(Max - Min) / 2;
628  int l;
629  int E = 0;
630  R r;
631 
632  if (Min > Median)
633  return SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
634  xmax, N - m - 1, k, param, Ad, Add, p, flags);
635  else if (Max < Median)
636  return SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad,
637  Add, p, flags);
638  else
639  {
640  C result = K(0.0);
641  E = 0;
642 
643  for (l = 0; l < d; l++)
644  {
645  if (x[m * d + l] > xmin[l] && x[m * d + l] < xmax[l])
646  E++;
647  }
648 
649  if (E == d)
650  {
651  if (d == 1)
652  {
653  r = xmin[0] + a - x[m]; /* remember: xmin+a = y */
654  }
655  else
656  {
657  r = K(0.0);
658  for (l = 0; l < d; l++)
659  r += (xmin[l] + a - x[m * d + l]) * (xmin[l] + a - x[m * d + l]); /* remember: xmin+a = y */
660  r = SQRT(r);
661  }
662  if (FABS(r) < a)
663  {
664  result += alpha[m] * k(r, 0, param); /* alpha*(kern-regkern) */
665  if (d == 1)
666  {
667  if (flags & EXACT_NEARFIELD)
668  result -= alpha[m] * regkern1(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in 1D) */
669  else
670  result -= alpha[m] * kubintkern1(r, Add, Ad, a); /* spline approximation */
671  }
672  else
673  {
674  if (flags & EXACT_NEARFIELD)
675  result -= alpha[m] * regkern(k, r, p, param, a, K(1.0) / K(16.0)); /* exact value (in dD) */
676  else
677 #if defined(NF_KUB)
678  result -= alpha[m] * kubintkern(r, Add, Ad, a); /* spline approximation */
679 #elif defined(NF_QUADR)
680  result -= alpha[m]*quadrintkern(r,Add,Ad,a); /* spline approximation */
681 #elif defined(NF_LIN)
682  result -= alpha[m]*linintkern(r,Add,Ad,a); /* spline approximation */
683 #else
684 #error define interpolation method
685 #endif
686  }
687  }
688  }
689  result += SearchTree(d, (t + 1) % d, x + (m + 1) * d, alpha + (m + 1), xmin,
690  xmax, N - m - 1, k, param, Ad, Add, p, flags);
691  result += SearchTree(d, (t + 1) % d, x, alpha, xmin, xmax, m, k, param, Ad, Add,
692  p, flags);
693  return result;
694  }
695  }
696 }
697 
698 static void fastsum_precompute_kernel(fastsum_plan *ths)
699 {
700  int j, k, t;
701  INT N[ths->d];
702  int n_total;
703 #ifdef MEASURE_TIME
704  ticks t0, t1;
705 #endif
706 
707  ths->MEASURE_TIME_t[0] = K(0.0);
708 
709 #ifdef MEASURE_TIME
710  t0 = getticks();
711 #endif
713  if (ths->eps_I > 0.0 && !(ths->flags & EXACT_NEARFIELD))
714  {
715  if (ths->d == 1)
716 #ifdef _OPENMP
717  #pragma omp parallel for default(shared) private(k)
718 #endif
719  for (k = -ths->Ad / 2 - 2; k <= ths->Ad / 2 + 2; k++)
720  ths->Add[k + ths->Ad / 2 + 2] = regkern1(ths->k,
721  ths->eps_I * (R) k / (R)(ths->Ad) * K(2.0), ths->p, ths->kernel_param,
722  ths->eps_I, ths->eps_B);
723  else
724 #ifdef _OPENMP
725  #pragma omp parallel for default(shared) private(k)
726 #endif
727  for (k = 0; k <= ths->Ad + 2; k++)
728  ths->Add[k] = regkern3(ths->k, ths->eps_I * (R) k / (R)(ths->Ad), ths->p,
729  ths->kernel_param, ths->eps_I, ths->eps_B);
730  }
731 #ifdef MEASURE_TIME
732  t1 = getticks();
733  ths->MEASURE_TIME_t[0] += NFFT(elapsed_seconds)(t1,t0);
734 #endif
735 
736 #ifdef MEASURE_TIME
737  t0 = getticks();
738 #endif
740  n_total = 1;
741  for (t = 0; t < ths->d; t++)
742  n_total *= ths->n;
743 
744 #ifdef _OPENMP
745  #pragma omp parallel for default(shared) private(j,k,t)
746 #endif
747  for (j = 0; j < n_total; j++)
748  {
749  if (ths->d == 1)
750  ths->b[j] = regkern1(ths->k, (R) - (j / (R)(ths->n) - K(0.5)), ths->p,
751  ths->kernel_param, ths->eps_I, ths->eps_B) / (R)(n_total);
752  else
753  {
754  k = j;
755  ths->b[j] = K(0.0);
756  for (t = 0; t < ths->d; t++)
757  {
758  ths->b[j] += ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5))
759  * ((R) (k % (ths->n)) / (R)(ths->n) - K(0.5));
760  k = k / (ths->n);
761  }
762  ths->b[j] = regkern3(ths->k, SQRT(CREAL(ths->b[j])), ths->p, ths->kernel_param,
763  ths->eps_I, ths->eps_B) / (R)(n_total);
764  }
765  }
766 
767  for (t = 0; t < ths->d; t++)
768  N[t] = ths->n;
769 
770  NFFT(fftshift_complex)(ths->b, (int)(ths->d), N);
771  FFTW(execute)(ths->fft_plan);
772  NFFT(fftshift_complex)(ths->b, (int)(ths->d), N);
773 #ifdef MEASURE_TIME
774  t1 = getticks();
775  ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
776 #endif
777 }
778 
779 void fastsum_init_guru_kernel(fastsum_plan *ths, int d, kernel k, R *param,
780  unsigned flags, int nn, int p, R eps_I, R eps_B)
781 {
782  int t;
783  int N[d];
784  int n_total;
785 #ifdef _OPENMP
786  int nthreads = NFFT(get_num_threads)();
787 #endif
788 
789  ths->d = d;
790 
791  ths->k = k;
792  ths->kernel_param = param;
793 
794  ths->flags = flags;
795 
796  ths->p = p;
797  ths->eps_I = eps_I; /* =(R)ths->p/(R)nn; */
798  ths->eps_B = eps_B; /* =K(1.0)/K(16.0); */
801  if (ths->eps_I > 0.0 && !(ths->flags & EXACT_NEARFIELD))
802  {
803  if (ths->d == 1)
804  {
805  ths->Ad = 4 * (ths->p) * (ths->p);
806  ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 5) * (sizeof(C)));
807  }
808  else
809  {
810  if (ths->k == one_over_x)
811  {
812  R delta = K(1e-8);
813  switch (p)
814  {
815  case 2:
816  delta = K(1e-3);
817  break;
818  case 3:
819  delta = K(1e-4);
820  break;
821  case 4:
822  delta = K(1e-5);
823  break;
824  case 5:
825  delta = K(1e-6);
826  break;
827  case 6:
828  delta = K(1e-6);
829  break;
830  case 7:
831  delta = K(1e-7);
832  break;
833  default:
834  delta = K(1e-8);
835  }
836 
837 #if defined(NF_KUB)
838  ths->Ad = max_i(10, (int)(LRINT(CEIL(K(1.4) / POW(delta, K(1.0) / K(4.0))))));
839  ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C)));
840 #elif defined(NF_QUADR)
841  ths->Ad = (int)(LRINT(CEIL(K(2.2)/POW(delta,K(1.0)/K(3.0)))));
842  ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C)));
843 #elif defined(NF_LIN)
844  ths->Ad = (int)(LRINT(CEIL(K(1.7)/pow(delta,K(1.0)/K(2.0)))));
845  ths->Add = (C *)NFFT(malloc)((size_t)(ths->Ad+3)*(sizeof(C)));
846 #else
847 #error define NF_LIN or NF_QUADR or NF_KUB
848 #endif
849  }
850  else
851  {
852  ths->Ad = 2 * (ths->p) * (ths->p);
853  ths->Add = (C *) NFFT(malloc)((size_t)(ths->Ad + 3) * (sizeof(C)));
854  }
855  } /* multi-dimensional case */
856  } /* !EXACT_NEARFIELD == spline approximation in near field AND eps_I > 0 */
857 
858  ths->n = nn;
859  for (t = 0; t < d; t++)
860  {
861  N[t] = nn;
862  }
863 
865  n_total = 1;
866  for (t = 0; t < d; t++)
867  n_total *= nn;
868 
869  ths->b = (C*) NFFT(malloc)((size_t)(n_total) * sizeof(C));
870  ths->f_hat = (C*) NFFT(malloc)((size_t)(n_total) * sizeof(C));
871 #ifdef _OPENMP
872  #pragma omp critical (nfft_omp_critical_fftw_plan)
873  {
874  FFTW(plan_with_nthreads)(nthreads);
875 #endif
876 
877  ths->fft_plan = FFTW(plan_dft)(d, N, ths->b, ths->b, FFTW_FORWARD,
878  FFTW_ESTIMATE);
879 
880 #ifdef _OPENMP
881 }
882 #endif
883 
884  fastsum_precompute_kernel(ths);
885 }
886 
887 void fastsum_init_guru_source_nodes(fastsum_plan *ths, int N_total, int nn_oversampled, int m)
888 {
889  int t;
890  int N[ths->d], n[ths->d];
891  unsigned sort_flags_adjoint = 0U;
892 
893  if (ths->d > 1)
894  {
895 #ifdef _OPENMP
896  sort_flags_adjoint = NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT;
897 #else
898  sort_flags_adjoint = NFFT_SORT_NODES;
899 #endif
900  }
901 
902  ths->N_total = N_total;
903 
904  ths->x = (R *) NFFT(malloc)((size_t)(ths->d * N_total) * (sizeof(R)));
905  ths->alpha = (C *) NFFT(malloc)((size_t)(N_total) * (sizeof(C)));
906 
908  for (t = 0; t < ths->d; t++)
909  {
910  N[t] = ths->n;
911  n[t] = nn_oversampled;
912  }
913 
914  NFFT(init_guru)(&(ths->mv1), ths->d, N, N_total, n, m,
915  sort_flags_adjoint |
916  PRE_PHI_HUT | PRE_PSI | /*MALLOC_X | MALLOC_F_HAT | MALLOC_F |*/ FFTW_INIT
917  | ((ths->d == 1) ? FFT_OUT_OF_PLACE : 0U),
918  FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
919  ths->mv1.x = ths->x;
920  ths->mv1.f = ths->alpha;
921  ths->mv1.f_hat = ths->f_hat;
922 
923  ths->box_offset = NULL;
924  ths->box_alpha = NULL;
925  ths->box_x = NULL;
926  ths->permutation_x_alpha = NULL;
927 
928  if (ths->flags & NEARFIELD_BOXES)
929  {
930  if (ths->eps_I > 0.0)
931  {
932  ths->box_count_per_dim = (int)(LRINT(FLOOR((K(0.5) - ths->eps_B) / ths->eps_I))) + 1;
933  ths->box_count = 1;
934  for (t = 0; t < ths->d; t++)
935  ths->box_count *= ths->box_count_per_dim;
936 
937  ths->box_offset = (int *) NFFT(malloc)((size_t)(ths->box_count + 1) * sizeof(int));
938 
939  ths->box_alpha = (C *) NFFT(malloc)((size_t)(ths->N_total) * (sizeof(C)));
940 
941  ths->box_x = (R *) NFFT(malloc)((size_t)(ths->d * ths->N_total) * sizeof(R));
942  } /* eps_I > 0 */
943  } /* NEARFIELD_BOXES */
944  else
945  {
946  if ((ths->flags & STORE_PERMUTATION_X_ALPHA) && (ths->eps_I > 0.0))
947  {
948  ths->permutation_x_alpha = (int *) NFFT(malloc)((size_t)(ths->N_total) * (sizeof(int)));
949  for (int i=0; i<ths->N_total; i++)
950  ths->permutation_x_alpha[i] = i;
951  }
952  } /* search tree */
953 }
954 
955 void fastsum_init_guru_target_nodes(fastsum_plan *ths, int M_total, int nn_oversampled, int m)
956 {
957  int t;
958  int N[ths->d], n[ths->d];
959  unsigned sort_flags_trafo = 0U;
960 
961  if (ths->d > 1)
962  sort_flags_trafo = NFFT_SORT_NODES;
963 
964  ths->M_total = M_total;
965 
966  ths->y = (R *) NFFT(malloc)((size_t)(ths->d * M_total) * (sizeof(R)));
967  ths->f = (C *) NFFT(malloc)((size_t)(M_total) * (sizeof(C)));
968 
970  for (t = 0; t < ths->d; t++)
971  {
972  N[t] = ths->n;
973  n[t] = nn_oversampled;
974  }
975 
976  NFFT(init_guru)(&(ths->mv2), ths->d, N, M_total, n, m,
977  sort_flags_trafo |
978  PRE_PHI_HUT | PRE_PSI | /*MALLOC_X | MALLOC_F_HAT | MALLOC_F |*/ FFTW_INIT
979  | ((ths->d == 1) ? FFT_OUT_OF_PLACE : 0U),
980  FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
981  ths->mv2.x = ths->y;
982  ths->mv2.f = ths->f;
983  ths->mv2.f_hat = ths->f_hat;
984 }
985 
987 void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total,
988  kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
989 {
990  fastsum_init_guru_kernel(ths, d, k, param, flags, nn, p, eps_I, eps_B);
991  fastsum_init_guru_source_nodes(ths, N_total, 2*nn, m);
992  fastsum_init_guru_target_nodes(ths, M_total, 2*nn, m);
993 }
994 
997 {
998  NFFT(free)(ths->x);
999  NFFT(free)(ths->alpha);
1000 
1001  NFFT(finalize)(&(ths->mv1));
1002 
1003  if (ths->flags & NEARFIELD_BOXES)
1004  {
1005  if (ths->eps_I > 0.0)
1006  {
1007  NFFT(free)(ths->box_offset);
1008  NFFT(free)(ths->box_alpha);
1009  NFFT(free)(ths->box_x);
1010  }
1011  } /* NEARFIELD_BOXES */
1012  else
1013  {
1014  if (ths->permutation_x_alpha)
1015  NFFT(free)(ths->permutation_x_alpha);
1016  } /* search tree */
1017 }
1018 
1021 {
1022  NFFT(free)(ths->y);
1023  NFFT(free)(ths->f);
1024 
1025  NFFT(finalize)(&(ths->mv2));
1026 }
1027 
1030 {
1031  if (ths->eps_I > 0.0 && !(ths->flags & EXACT_NEARFIELD))
1032  NFFT(free)(ths->Add);
1033 
1034 #ifdef _OPENMP
1035  #pragma omp critical (nfft_omp_critical_fftw_plan)
1036  {
1037 #endif
1038  FFTW(destroy_plan)(ths->fft_plan);
1039 #ifdef _OPENMP
1040 }
1041 #endif
1042 
1043  NFFT(free)(ths->b);
1044  NFFT(free)(ths->f_hat);
1045 }
1046 
1049 {
1053 }
1054 
1057 {
1058  int j, k;
1059  int t;
1060  R r;
1061 
1062 #ifdef _OPENMP
1063  #pragma omp parallel for default(shared) private(j,k,t,r)
1064 #endif
1065  for (j = 0; j < ths->M_total; j++)
1066  {
1067  ths->f[j] = K(0.0);
1068  for (k = 0; k < ths->N_total; k++)
1069  {
1070  if (ths->d == 1)
1071  r = ths->y[j] - ths->x[k];
1072  else
1073  {
1074  r = K(0.0);
1075  for (t = 0; t < ths->d; t++)
1076  r += (ths->y[j * ths->d + t] - ths->x[k * ths->d + t])
1077  * (ths->y[j * ths->d + t] - ths->x[k * ths->d + t]);
1078  r = SQRT(r);
1079  }
1080  ths->f[j] += ths->alpha[k] * ths->k(r, 0, ths->kernel_param);
1081  }
1082  }
1083 }
1084 
1087 {
1088 #ifdef MEASURE_TIME
1089  ticks t0, t1;
1090 #endif
1091 
1092  ths->MEASURE_TIME_t[1] = K(0.0);
1093  ths->MEASURE_TIME_t[3] = K(0.0);
1094 
1095 #ifdef MEASURE_TIME
1096  t0 = getticks();
1097 #endif
1098 
1099  if (ths->eps_I > 0.0)
1100  {
1101  if (ths->flags & NEARFIELD_BOXES)
1102  BuildBox(ths);
1103  else
1105  BuildTree(ths->d, 0, ths->x, ths->alpha, ths->permutation_x_alpha, ths->N_total);
1106  } /* eps_I > 0 */
1107 
1108 #ifdef MEASURE_TIME
1109  t1 = getticks();
1110  ths->MEASURE_TIME_t[3] += nfft_elapsed_seconds(t1,t0);
1111 #endif
1112 
1113 #ifdef MEASURE_TIME
1114  t0 = getticks();
1115 #endif
1117 // for (k = 0; k < ths->mv1.M_total; k++)
1118 // for (t = 0; t < ths->mv1.d; t++)
1119 // ths->mv1.x[ths->mv1.d * k + t] = -ths->x[ths->mv1.d * k + t]; /* note the factor -1 for transposed transform instead of adjoint*/
1120 
1122  if (ths->mv1.flags & PRE_LIN_PSI)
1123  NFFT(precompute_lin_psi)(&(ths->mv1));
1124 
1125  if (ths->mv1.flags & PRE_PSI)
1126  NFFT(precompute_psi)(&(ths->mv1));
1127 
1128  if (ths->mv1.flags & PRE_FULL_PSI)
1129  NFFT(precompute_full_psi)(&(ths->mv1));
1130 #ifdef MEASURE_TIME
1131  t1 = getticks();
1132  ths->MEASURE_TIME_t[1] += nfft_elapsed_seconds(t1,t0);
1133 #endif
1134 
1135 // /** init Fourier coefficients */
1136 // for (k = 0; k < ths->mv1.M_total; k++)
1137 // ths->mv1.f[k] = ths->alpha[k];
1138 }
1139 
1142 {
1143 #ifdef MEASURE_TIME
1144  ticks t0, t1;
1145 #endif
1146 
1147  ths->MEASURE_TIME_t[2] = K(0.0);
1148 
1149 #ifdef MEASURE_TIME
1150  t0 = getticks();
1151 #endif
1153 // for (j = 0; j < ths->mv2.M_total; j++)
1154 // for (t = 0; t < ths->mv2.d; t++)
1155 // ths->mv2.x[ths->mv2.d * j + t] = -ths->y[ths->mv2.d * j + t]; /* note the factor -1 for conjugated transform instead of standard*/
1156 
1158  if (ths->mv2.flags & PRE_LIN_PSI)
1159  NFFT(precompute_lin_psi)(&(ths->mv2));
1160 
1161  if (ths->mv2.flags & PRE_PSI)
1162  NFFT(precompute_psi)(&(ths->mv2));
1163 
1164  if (ths->mv2.flags & PRE_FULL_PSI)
1165  NFFT(precompute_full_psi)(&(ths->mv2));
1166 #ifdef MEASURE_TIME
1167  t1 = getticks();
1168  ths->MEASURE_TIME_t[2] += NFFT(elapsed_seconds)(t1,t0);
1169 #endif
1170 }
1171 
1174 {
1177 }
1178 
1181 {
1182  int j, k, t;
1183 #ifdef MEASURE_TIME
1184  ticks t0, t1;
1185 #endif
1186 
1187  ths->MEASURE_TIME_t[4] = K(0.0);
1188  ths->MEASURE_TIME_t[5] = K(0.0);
1189  ths->MEASURE_TIME_t[6] = K(0.0);
1190  ths->MEASURE_TIME_t[7] = K(0.0);
1191 
1192 #ifdef MEASURE_TIME
1193  t0 = getticks();
1194 #endif
1196  NFFT(adjoint)(&(ths->mv1));
1197 #ifdef MEASURE_TIME
1198  t1 = getticks();
1199  ths->MEASURE_TIME_t[4] += NFFT(elapsed_seconds)(t1,t0);
1200 #endif
1201 
1202 #ifdef MEASURE_TIME
1203  t0 = getticks();
1204 #endif
1206 #ifdef _OPENMP
1207  #pragma omp parallel for default(shared) private(k)
1208 #endif
1209  for (k = 0; k < ths->mv2.N_total; k++)
1210  ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k];
1211 #ifdef MEASURE_TIME
1212  t1 = getticks();
1213  ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0);
1214 #endif
1215 
1216 #ifdef MEASURE_TIME
1217  t0 = getticks();
1218 #endif
1220  NFFT(trafo)(&(ths->mv2));
1221 #ifdef MEASURE_TIME
1222  t1 = getticks();
1223  ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0);
1224 #endif
1225 
1226 #ifdef MEASURE_TIME
1227  t0 = getticks();
1228 #endif
1229 
1231 #ifdef _OPENMP
1232  #pragma omp parallel for default(shared) private(j)
1233 #endif
1234  for (j = 0; j < ths->M_total; j++)
1235  ths->f[j] = ths->mv2.f[j];
1236 
1237  if (ths->eps_I > 0.0)
1238  {
1240  #ifdef _OPENMP
1241  #pragma omp parallel for default(shared) private(j,k,t)
1242  #endif
1243  for (j = 0; j < ths->M_total; j++)
1244  {
1245  R ymin[ths->d], ymax[ths->d];
1247  if (ths->flags & NEARFIELD_BOXES)
1248  ths->f[j] += SearchBox(ths->y + ths->d * j, ths);
1249  else
1250  {
1251  for (t = 0; t < ths->d; t++)
1252  {
1253  ymin[t] = ths->y[ths->d * j + t] - ths->eps_I;
1254  ymax[t] = ths->y[ths->d * j + t] + ths->eps_I;
1255  }
1256  ths->f[j]
1257  += SearchTree(ths->d, 0, ths->x, ths->alpha, ymin, ymax, ths->N_total,
1258  ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags);
1259  }
1260  }
1261  }
1262 
1263 #ifdef MEASURE_TIME
1264  t1 = getticks();
1265  ths->MEASURE_TIME_t[7] += NFFT(elapsed_seconds)(t1,t0);
1266 #endif
1267 }
1268 /* \} */
1269 
1270 /* fastsum.c */
Header file for the fast NFFT-based summation algorithm.
int N_total
number of source knots
Definition: fastsum.h:88
void fastsum_precompute(fastsum_plan *ths)
precomputation for fastsum
Definition: fastsum.c:1173
static void BuildTree(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
recursive sort of source knots dimension by dimension to get tree structure
Definition: fastsum.c:599
int M_total
number of target knots
Definition: fastsum.h:89
void fastsum_init_guru_target_nodes(fastsum_plan *ths, int M_total, int nn_oversampled, int m)
initialize target nodes dependent part of fast summation plan
Definition: fastsum.c:955
static R binom(int n, int m)
binomial coefficient
Definition: fastsum.c:61
int p
degree of smoothness of regularization
Definition: fastsum.h:112
int n
FS__ - fast summation.
Definition: fastsum.h:108
int Ad
near field
Definition: fastsum.h:120
R * kernel_param
parameters for kernel function
Definition: fastsum.h:98
#define STORE_PERMUTATION_X_ALPHA
If this flag is set, and eps_I > 0.0 and NEARFIELD_BOXES is not set, then the vector permutation_x_al...
Definition: fastsum.h:79
void fastsum_finalize_target_nodes(fastsum_plan *ths)
finalization of fastsum plan
Definition: fastsum.c:1020
void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
initialization of fastsum plan
Definition: fastsum.c:987
R * x
source knots in d-ball with radius 1/4-eps_b/2
Definition: fastsum.h:94
void fastsum_init_guru_kernel(fastsum_plan *ths, int d, kernel k, R *param, unsigned flags, int nn, int p, R eps_I, R eps_B)
initialize node independent part of fast summation plan
Definition: fastsum.c:779
static C kubintkern1(const R x, const C *Add, const int Ad, const R a)
cubic spline interpolation in near field with arbitrary kernels
Definition: fastsum.c:352
static int max_i(int a, int b)
max
Definition: fastsum.c:46
C * f_hat
Fourier coefficients of nfft plans.
Definition: fastsum.h:110
static C regkern1(kernel k, R xx, int p, const R *param, R a, R b)
regularized kernel with K_I arbitrary and K_B periodized (used in 1D)
Definition: fastsum.c:134
R MEASURE_TIME_t[8]
Measured time for each step if MEASURE_TIME is set.
Definition: fastsum.h:134
C * b
expansion coefficients
Definition: fastsum.h:109
static void BuildBox(fastsum_plan *ths)
initialize box-based search data structures
Definition: fastsum.c:429
C * f
target evaluations
Definition: fastsum.h:92
C * alpha
source coefficients
Definition: fastsum.h:91
static C SearchTree(const int d, const int t, const R *x, const C *alpha, const R *xmin, const R *xmax, const int N, const kernel k, const R *param, const int Ad, const C *Add, const int p, const unsigned flags)
fast search in tree of source knots for near field computation
Definition: fastsum.c:613
R eps_B
outer boundary
Definition: fastsum.h:114
static C SearchBox(R *y, fastsum_plan *ths)
box-based near field correction
Definition: fastsum.c:533
void fastsum_precompute_target_nodes(fastsum_plan *ths)
precomputation for fastsum
Definition: fastsum.c:1141
static C regkern3(kernel k, R xx, int p, const R *param, R a, R b)
regularized kernel for even kernels with K_I even and K_B mirrored
Definition: fastsum.c:230
void fastsum_precompute_source_nodes(fastsum_plan *ths)
precomputation for fastsum
Definition: fastsum.c:1086
void fastsum_trafo(fastsum_plan *ths)
fast NFFT-based summation
Definition: fastsum.c:1180
void fastsum_exact(fastsum_plan *ths)
direct computation of sums
Definition: fastsum.c:1056
kernel k
kernel function
Definition: fastsum.h:97
void fastsum_finalize_kernel(fastsum_plan *ths)
finalization of fastsum plan
Definition: fastsum.c:1029
static R fak(int n)
factorial
Definition: fastsum.c:52
unsigned flags
flags precomp.
Definition: fastsum.h:100
void fastsum_finalize(fastsum_plan *ths)
finalization of fastsum plan
Definition: fastsum.c:1048
C * Add
spline values
Definition: fastsum.h:121
int * permutation_x_alpha
permutation vector of source nodes if STORE_PERMUTATION_X_ALPHA is set
Definition: fastsum.h:132
#define EXACT_NEARFIELD
Constant symbols.
Definition: fastsum.h:73
C regkern(kernel k, R xx, int p, const R *param, R a, R b)
regularized kernel with K_I arbitrary and K_B smooth to zero
Definition: fastsum.c:81
static void quicksort(int d, int t, R *x, C *alpha, int *permutation_x_alpha, int N)
quicksort algorithm for source knots and associated coefficients
Definition: fastsum.c:381
static C calc_SearchBox(int d, R *y, R *x, C *alpha, int start, int end_lt, const C *Add, const int Ad, int p, R a, const kernel k, const R *param, const unsigned flags)
inner computation function for box-based near field correction
Definition: fastsum.c:480
int d
api
Definition: fastsum.h:86
void fastsum_init_guru_source_nodes(fastsum_plan *ths, int N_total, int nn_oversampled, int m)
initialize source nodes dependent part of fast summation plan
Definition: fastsum.c:887
C one_over_x(R x, int der, const R *param)
K(x) = 1/x.
Definition: kernels.c:233
void fastsum_finalize_source_nodes(fastsum_plan *ths)
finalization of fastsum plan
Definition: fastsum.c:996
static R BasisPoly(int m, int r, R xx)
basis polynomial for regularized kernel
Definition: fastsum.c:67
C kubintkern(const R x, const C *Add, const int Ad, const R a)
linear spline interpolation in near field with even kernels
Definition: fastsum.c:318
R * y
target knots in d-ball with radius 1/4-eps_b/2
Definition: fastsum.h:95
R eps_I
inner boundary
Definition: fastsum.h:113
#define PRE_FULL_PSI
Definition: nfft3.h:186
#define FFT_OUT_OF_PLACE
Definition: nfft3.h:190
#define PRE_PSI
Definition: nfft3.h:185
#define PRE_LIN_PSI
Definition: nfft3.h:183
#define FFTW_INIT
Definition: nfft3.h:191
#define PRE_PHI_HUT
Definition: nfft3.h:181
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
Internal header file for auxiliary definitions and functions.
Header file with predefined kernels for the fast summation algorithm.
Header file for the nfft3 library.
plan for fast summation algorithm
Definition: fastsum.h:83