NFFT  3.5.3
nfsft.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002, 2019 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
25 #include "config.h"
26 
27 /* Include standard C headers. */
28 #include <math.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #ifdef HAVE_COMPLEX_H
32 #include <complex.h>
33 #endif
34 
35 #ifdef _OPENMP
36 #include <omp.h>
37 #endif
38 
39 /* Include NFFT3 utilities header. */
40 
41 /* Include NFFT3 library header. */
42 #include "nfft3.h"
43 
44 #include "infft.h"
45 
46 /* Include private associated Legendre functions header. */
47 #include "legendre.h"
48 
49 /* Include private API header. */
50 #include "api.h"
51 
52 #ifdef _OPENMP
53 #include "../fpt/fpt.h"
54 #endif
55 
65 #define NFSFT_DEFAULT_NFFT_CUTOFF 6
66 
72 #define NFSFT_DEFAULT_THRESHOLD 1000
73 
79 #define NFSFT_BREAK_EVEN 5
80 
87 #ifdef _OPENMP
88  static struct nfsft_wisdom wisdom = {false,0U,-1,-1,0,0,0,0,0,0};
89 #else
90  static struct nfsft_wisdom wisdom = {false,0U,-1,-1,0,0,0,0,0};
91 #endif
92 
115 static inline void c2e(nfsft_plan *plan)
116 {
117  int k;
118  int n;
119  double _Complex last;
120  double _Complex act;
121  double _Complex *xp;
122  double _Complex *xm;
123  int low;
124  int up;
125  int lowe;
126  int upe;
128  /* Set the first row to order to zero since it is unused. */
129  memset(plan->f_hat_intern,0U,(2*plan->N+2)*sizeof(double _Complex));
130 
131  /* Determine lower and upper bounds for loop processing even terms. */
132  lowe = -plan->N + (plan->N%2);
133  upe = -lowe;
134 
135  /* Process even terms. */
136  for (n = lowe; n <= upe; n += 2)
137  {
138  /* Compute new coefficients \f$\left(c_k^n\right)_{k=-M,\ldots,M}\f$ from
139  * old coefficients $\left(b_k^n\right)_{k=0,\ldots,M}$. */
140  xm = &(plan->f_hat_intern[NFSFT_INDEX(-1,n,plan)]);
141  xp = &(plan->f_hat_intern[NFSFT_INDEX(+1,n,plan)]);
142  for(k = 1; k <= plan->N; k++)
143  {
144  *xp *= 0.5;
145  *xm-- = *xp++;
146  }
147  /* Set the first coefficient in the array corresponding to this order to
148  * zero since it is unused. */
149  *xm = 0.0;
150  }
151 
152  /* Determine lower and upper bounds for loop processing odd terms. */
153  low = -plan->N + (1-plan->N%2);
154  up = -low;
155 
156  /* Process odd terms incorporating the additional sine term
157  * \f$\sin \vartheta\f$. */
158  for (n = low; n <= up; n += 2)
159  {
160  /* Compute new coefficients \f$\left(c_k^n\right)_{k=-M,\ldots,M}\f$ from
161  * old coefficients $\left(b_k^n\right)_{k=0,\ldots,M-1}$ incorporating
162  * the additional term \f$\sin \vartheta\f$. */
163  plan->f_hat_intern[NFSFT_INDEX(0,n,plan)] *= 2.0;
164  xp = &(plan->f_hat_intern[NFSFT_INDEX(-plan->N-1,n,plan)]);
165  /* Set the first coefficient in the array corresponding to this order to zero
166  * since it is unused. */
167  *xp++ = 0.0;
168  xm = &(plan->f_hat_intern[NFSFT_INDEX(plan->N,n,plan)]);
169  last = *xm;
170  *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
171  *xp++ = -(*xm--);
172  for (k = plan->N-1; k > 0; k--)
173  {
174  act = *xm;
175  *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
176  *xp++ = -(*xm--);
177  last = act;
178  }
179  *xm = 0.0;
180  }
181 }
182 
193 static inline void c2e_transposed(nfsft_plan *plan)
194 {
195  int k;
196  int n;
197  double _Complex last;
198  double _Complex act;
199  double _Complex *xp;
200  double _Complex *xm;
201  int low;
202  int up;
203  int lowe;
204  int upe;
206  /* Determine lower and upper bounds for loop processing even terms. */
207  lowe = -plan->N + (plan->N%2);
208  upe = -lowe;
209 
210  /* Process even terms. */
211  for (n = lowe; n <= upe; n += 2)
212  {
213  /* Compute new coefficients \f$\left(b_k^n\right)_{k=0,\ldots,M}\f$ from
214  * old coefficients $\left(c_k^n\right)_{k=-M,\ldots,M}$. */
215  xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
216  xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
217  for(k = 1; k <= plan->N; k++)
218  {
219  *xp += *xm--;
220  *xp++ *= 0.5;;
221  }
222  }
223 
224  /* Determine lower and upper bounds for loop processing odd terms. */
225  low = -plan->N + (1-plan->N%2);
226  up = -low;
227 
228  /* Process odd terms. */
229  for (n = low; n <= up; n += 2)
230  {
231  /* Compute new coefficients \f$\left(b_k^n\right)_{k=0,\ldots,M-1}\f$ from
232  * old coefficients $\left(c_k^n\right)_{k=0,\ldots,M-1}$. */
233  xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
234  xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
235  for(k = 1; k <= plan->N; k++)
236  {
237  *xp++ -= *xm--;
238  }
239 
240  plan->f_hat[NFSFT_INDEX(0,n,plan)] =
241  -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(1,n,plan)];
242  last = plan->f_hat[NFSFT_INDEX(1,n,plan)];
243  plan->f_hat[NFSFT_INDEX(1,n,plan)] =
244  -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(2,n,plan)];
245 
246  xp = &(plan->f_hat[NFSFT_INDEX(2,n,plan)]);
247  for (k = 2; k < plan->N; k++)
248  {
249  act = *xp;
250  *xp = -0.25 * _Complex_I * (xp[1] - last);
251  xp++;
252  last = act;
253  }
254  *xp = 0.25 * _Complex_I * last;
255 
256  plan->f_hat[NFSFT_INDEX(0,n,plan)] *= 2.0;
257  }
258 }
259 
260 void nfsft_init(nfsft_plan *plan, int N, int M)
261 {
262  /* Call nfsft_init_advanced with default flags. */
265 }
266 
267 void nfsft_init_advanced(nfsft_plan* plan, int N, int M,
268  unsigned int flags)
269 {
270  /* Call nfsft_init_guru with the flags and default NFFT cut-off. */
271  nfsft_init_guru(plan, N, M, flags, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | NFFT_OMP_BLOCKWISE_ADJOINT,
273 }
274 
275 void nfsft_init_guru(nfsft_plan *plan, int N, int M, unsigned int flags,
276  unsigned int nfft_flags, int nfft_cutoff)
277 {
278  int *nfft_size; /*< NFFT size */
279  int *fftw_size; /*< FFTW size */
280 
281  /* Save the flags in the plan. */
282  plan->flags = flags;
283 
284  /* Save the bandwidth N and the number of samples M in the plan. */
285  plan->N = N;
286  plan->M_total = M;
287 
288  /* M is fixed for FSFT algorithm */
289  if (plan->flags & NFSFT_EQUISPACED)
290  plan->M_total = (2*plan->N+2)*(plan->N+2);
291 
292  /* Calculate the next greater power of two with respect to the bandwidth N
293  * and the corresponding exponent. */
294  //next_power_of_2_exp_int(plan->N,&plan->NPT,&plan->t);
295 
296  /* Save length of array of Fourier coefficients. Owing to the data layout the
297  * length is (2N+2)(2N+2) */
298  plan->N_total = (2*plan->N+2)*(2*plan->N+2);
299 
300  /* Allocate memory for auxiliary array of spherical Fourier coefficients,
301  * if necessary. */
302  if (plan->flags & NFSFT_PRESERVE_F_HAT)
303  {
304  plan->f_hat_intern = (double _Complex*) nfft_malloc(plan->N_total*
305  sizeof(double _Complex));
306  }
307 
308  /* Allocate memory for spherical Fourier coefficients, if necessary. */
309  if (plan->flags & NFSFT_MALLOC_F_HAT)
310  {
311  plan->f_hat = (double _Complex*) nfft_malloc(plan->N_total*
312  sizeof(double _Complex));
313  }
314 
315  /* Allocate memory for samples, if necessary. */
316  if (plan->flags & NFSFT_MALLOC_F)
317  {
318  plan->f = (double _Complex*) nfft_malloc(plan->M_total*sizeof(double _Complex));
319  }
320 
321  /* Allocate memory for nodes, if necessary. */
322  if (plan->flags & NFSFT_MALLOC_X)
323  {
324  plan->x = (double*) nfft_malloc(plan->M_total*2*sizeof(double));
325  if (plan->flags & NFSFT_EQUISPACED)
326  /* Set equispaced nodes. This way also trafo_direct works correctly. */
327  for (int i=0; i<2*plan->N+2; i++)
328  for (int j=0; j<plan->N+2; j++)
329  {
330  plan->x[2*(i*(plan->N+1) + j)] = ((double)i-plan->N-1)/(2.0*plan->N+2);
331  plan->x[2*(i*(plan->N+1) + j) + 1] = ((double)j)/(2.0*plan->N+2);
332  }
333  }
334 
335  /* Check if fast algorithm is activated. */
336  if ((plan->flags & NFSFT_NO_FAST_ALGORITHM) || (plan->flags & NFSFT_EQUISPACED))
337  {
338  }
339  else
340  {
341  nfft_size = (int*)nfft_malloc(2*sizeof(int));
342  fftw_size = (int*)nfft_malloc(2*sizeof(int));
343 
345  nfft_size[0] = 2*plan->N+2;
346  nfft_size[1] = 2*plan->N+2;
347  fftw_size[0] = 4*plan->N;
348  fftw_size[1] = 4*plan->N;
349 
351  nfft_init_guru(&plan->plan_nfft, 2, nfft_size, plan->M_total, fftw_size,
352  nfft_cutoff, nfft_flags,
353  FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
354 
355  /* Assign angle array. */
356  plan->plan_nfft.x = plan->x;
357  /* Assign result array. */
358  plan->plan_nfft.f = plan->f;
359  /* Assign Fourier coefficients array. */
360  plan->plan_nfft.f_hat = plan->f_hat;
361 
364  /* Precompute. */
365  //nfft_precompute_one_psi(&plan->plan_nfft);
366 
367  /* Free auxilliary arrays. */
368  nfft_free(nfft_size);
369  nfft_free(fftw_size);
370  }
371 
372  plan->mv_trafo = (void (*) (void* ))nfsft_trafo;
373  plan->mv_adjoint = (void (*) (void* ))nfsft_adjoint;
374 }
375 
376 void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags,
377  unsigned int fpt_flags)
378 {
379  int n; /*< The order n */
380 
381  /* Check if already initialized. */
382  if (wisdom.initialized == true)
383  {
384  return;
385  }
386 
387 #ifdef _OPENMP
388  #pragma omp parallel default(shared)
389  {
390  int nthreads = omp_get_num_threads();
391  #pragma omp single
392  {
393  wisdom.nthreads = nthreads;
394  }
395  }
396 #endif
397 
398  /* Save the precomputation flags. */
399  wisdom.flags = nfsft_flags;
400 
401  /* Compute and save N_max = 2^{\ceil{log_2 N}} as next greater
402  * power of two with respect to N. */
403  X(next_power_of_2_exp_int)(N,&wisdom.N_MAX,&wisdom.T_MAX);
404 
405  /* Check, if precomputation for direct algorithms needs to be performed. */
406  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
407  {
408  wisdom.alpha = NULL;
409  wisdom.beta = NULL;
410  wisdom.gamma = NULL;
411  }
412  else
413  {
414  /* Allocate memory for three-term recursion coefficients. */
415  wisdom.alpha = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
416  sizeof(double));
417  wisdom.beta = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
418  sizeof(double));
419  wisdom.gamma = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
420  sizeof(double));
422  /* Compute three-term recurrence coefficients alpha_k^n, beta_k^n, and
423  * gamma_k^n. */
427  }
428 
429  /* Check, if precomputation for fast algorithms needs to be performed. */
430  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
431  {
432  }
433  else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
434  {
435  /* Precompute data for DPT/FPT. */
436 
437  /* Check, if recursion coefficients have already been calculated. */
438  if (wisdom.alpha != NULL)
439  {
440 #ifdef _OPENMP
441  #pragma omp parallel default(shared) private(n)
442  {
443  int nthreads = omp_get_num_threads();
444  int threadid = omp_get_thread_num();
445  #pragma omp single
446  {
447  wisdom.nthreads = nthreads;
448  wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
449  }
450 
451  if (threadid == 0)
452  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
453  fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
454  else
455  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
456  fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA | FPT_NO_INIT_FPT_DATA);
457 
458  #pragma omp barrier
459 
460  if (threadid != 0)
461  wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
462 
463  /* Perform the first part of precomputation which contains most allocations in one thread */
464  #pragma omp master
465  {
466  for (int n = 0; n <= wisdom.N_MAX; n++)
467  fpt_precompute_1(wisdom.set_threads[0],n,n);
468  }
469  #pragma omp barrier
470 
471  #pragma omp for private(n) schedule(dynamic)
472  for (n = 0; n <= wisdom.N_MAX; n++)
473  fpt_precompute_2(wisdom.set_threads[threadid],n,&wisdom.alpha[ROW(n)],
474  &wisdom.beta[ROW(n)], &wisdom.gamma[ROW(n)],n,kappa);
475  }
476 #else
477  /* Use the recursion coefficients to precompute FPT data using persistent
478  * arrays. */
480  fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
481  for (n = 0; n <= wisdom.N_MAX; n++)
482  {
483  /*fprintf(stderr,"%d\n",n);
484  fflush(stderr);*/
485  /* Precompute data for FPT transformation for order n. */
486  fpt_precompute(wisdom.set,n,&wisdom.alpha[ROW(n)],&wisdom.beta[ROW(n)],
487  &wisdom.gamma[ROW(n)],n,kappa);
488  }
489 #endif
490  }
491  else
492  {
493 #ifdef _OPENMP
494  #pragma omp parallel default(shared) private(n)
495  {
496  double *alpha, *beta, *gamma;
497  int nthreads = omp_get_num_threads();
498  int threadid = omp_get_thread_num();
499  #pragma omp single
500  {
501  wisdom.nthreads = nthreads;
502  wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
503  }
504 
505  alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
506  beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
507  gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
508 
509  if (threadid == 0)
510  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
511  fpt_flags | FPT_AL_SYMMETRY);
512  else
513  wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
514  fpt_flags | FPT_AL_SYMMETRY | FPT_NO_INIT_FPT_DATA);
515 
516  #pragma omp barrier
517 
518  if (threadid != 0)
519  wisdom.set_threads[threadid]->dpt = wisdom.set_threads[0]->dpt;
520 
521  /* Perform the first part of precomputation which contains most allocations in one thread */
522  #pragma omp master
523  {
524  for (int n = 0; n <= wisdom.N_MAX; n++)
525  fpt_precompute_1(wisdom.set_threads[0],n,n);
526  }
527  #pragma omp barrier
528 
529  #pragma omp for private(n) schedule(dynamic)
530  for (n = 0; n <= wisdom.N_MAX; n++)
531  {
532  alpha_al_row(alpha,wisdom.N_MAX,n);
533  beta_al_row(beta,wisdom.N_MAX,n);
534  gamma_al_row(gamma,wisdom.N_MAX,n);
535 
536  /* Precompute data for FPT transformation for order n. */
537  fpt_precompute_2(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
538  kappa);
539  }
540  /* Free auxilliary arrays. */
541  nfft_free(alpha);
542  nfft_free(beta);
543  nfft_free(gamma);
544  }
545 #else
546  /* Allocate memory for three-term recursion coefficients. */
547  double *alpha, *beta, *gamma;
548  alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
549  beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
550  gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
552  fpt_flags | FPT_AL_SYMMETRY);
553  for (n = 0; n <= wisdom.N_MAX; n++)
554  {
555  /*fprintf(stderr,"%d NO_DIRECT\n",n);
556  fflush(stderr);*/
557  /* Compute three-term recurrence coefficients alpha_k^n, beta_k^n, and
558  * gamma_k^n. */
559  alpha_al_row(alpha,wisdom.N_MAX,n);
560  beta_al_row(beta,wisdom.N_MAX,n);
561  gamma_al_row(gamma,wisdom.N_MAX,n);
562 
563  /* Precompute data for FPT transformation for order n. */
565  kappa);
566  }
567  /* Free auxilliary arrays. */
568  nfft_free(alpha);
569  nfft_free(beta);
570  nfft_free(gamma);
571 #endif
572  }
573  }
574 
575  /* Wisdom has been initialised. */
576  wisdom.initialized = true;
577 }
578 
579 void nfsft_forget(void)
580 {
581  /* Check if wisdom has been initialised. */
582  if (wisdom.initialized == false)
583  {
584  /* Nothing to do. */
585  return;
586  }
587 
588  /* Check, if precomputation for direct algorithms has been performed. */
589  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
590  {
591  }
592  else
593  {
594  /* Free arrays holding three-term recurrence coefficients. */
595  nfft_free(wisdom.alpha);
596  nfft_free(wisdom.beta);
597  nfft_free(wisdom.gamma);
598  wisdom.alpha = NULL;
599  wisdom.beta = NULL;
600  wisdom.gamma = NULL;
601  }
602 
603  /* Check, if precomputation for fast algorithms has been performed. */
604  if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
605  {
606  }
607  else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
608  {
609 #ifdef _OPENMP
610  int k;
611  for (k = 0; k < wisdom.nthreads; k++)
612  fpt_finalize(wisdom.set_threads[k]);
613  nfft_free(wisdom.set_threads);
614 #else
615  /* Free precomputed data for FPT transformation. */
616  fpt_finalize(wisdom.set);
617 #endif
618  }
619 
620  /* Wisdom is now uninitialised. */
621  wisdom.initialized = false;
622 }
623 
624 
626 {
627  if (!plan)
628  return;
629 
630  if (!(plan->flags & NFSFT_NO_FAST_ALGORITHM) && !(plan->flags & NFSFT_EQUISPACED))
631  {
632  /* Finalise the nfft plan. */
633  nfft_finalize(&plan->plan_nfft);
634  }
635 
636  /* De-allocate memory for auxilliary array of spherical Fourier coefficients,
637  * if neccesary. */
638  if (plan->flags & NFSFT_PRESERVE_F_HAT)
639  {
640  nfft_free(plan->f_hat_intern);
641  }
642 
643  /* De-allocate memory for spherical Fourier coefficients, if necessary. */
644  if (plan->flags & NFSFT_MALLOC_F_HAT)
645  {
646  //fprintf(stderr,"deallocating f_hat\n");
647  nfft_free(plan->f_hat);
648  }
649 
650  /* De-allocate memory for samples, if necessary. */
651  if (plan->flags & NFSFT_MALLOC_F)
652  {
653  //fprintf(stderr,"deallocating f\n");
654  nfft_free(plan->f);
655  }
656 
657  /* De-allocate memory for nodes, if necessary. */
658  if (plan->flags & NFSFT_MALLOC_X)
659  {
660  //fprintf(stderr,"deallocating x\n");
661  nfft_free(plan->x);
662  }
663 }
664 
665 static void nfsft_set_f_nan(nfsft_plan *plan)
666 {
667  int m;
668  double nan_value = nan("");
669  for (m = 0; m < plan->M_total; m++)
670  plan->f[m] = nan_value;
671 }
672 
673 void nfsft_trafo_direct(nfsft_plan *plan)
674 {
675  int m; /*< The node index */
676  int k; /*< The degree k */
677  int n; /*< The order n */
678  int n_abs; /*< The absolute value of the order n, ie n_abs = |n| */
679  double *alpha; /*< Pointer to current three-term recurrence
680  coefficient alpha_k^n for associated Legendre
681  functions P_k^n */
682  double *gamma; /*< Pointer to current three-term recurrence
683  coefficient beta_k^n for associated Legendre
684  functions P_k^n */
685  double _Complex *a; /*< Pointer to auxiliary array for Clenshaw algor. */
686  double stheta; /*< Current angle theta for Clenshaw algorithm */
687  double sphi; /*< Current angle phi for Clenshaw algorithm */
688 
689 #ifdef MEASURE_TIME
690  plan->MEASURE_TIME_t[0] = 0.0;
691  plan->MEASURE_TIME_t[1] = 0.0;
692  plan->MEASURE_TIME_t[2] = 0.0;
693 #endif
694 
695  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
696  {
697  nfsft_set_f_nan(plan);
698  return;
699  }
700 
701  /* Copy spherical Fourier coefficients, if necessary. */
702  if (plan->flags & NFSFT_PRESERVE_F_HAT)
703  {
704  memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
705  sizeof(double _Complex));
706  }
707  else
708  {
709  plan->f_hat_intern = plan->f_hat;
710  }
711 
712  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
713  * multiply spherical Fourier coefficients with corresponding normalization
714  * weight. */
715  if (plan->flags & NFSFT_NORMALIZED)
716  {
717  /* Traverse Fourier coefficients array. */
718 #ifdef _OPENMP
719  #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
720 #endif
721  for (k = 0; k <= plan->N; k++)
722  {
723  for (n = -k; n <= k; n++)
724  {
725  /* Multiply with normalization weight. */
726  plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
727  sqrt((2*k+1)/(4.0*KPI));
728  }
729  }
730  }
731 
732  /* Distinguish by bandwidth M. */
733  if (plan->N == 0)
734  {
735  /* N = 0 */
736 
737  /* Constant function */
738  for (m = 0; m < plan->M_total; m++)
739  {
740  plan->f[m] = plan->f_hat_intern[NFSFT_INDEX(0,0,plan)];
741  }
742  }
743  else
744  {
745  /* N > 0 */
746 
747  /* Evaluate
748  * \sum_{k=0}^N \sum_{n=-k}^k a_k^n P_k^{|n|}(cos theta_m) e^{i n phi_m}
749  * = \sum_{n=-N}^N \sum_{k=|n|}^N a_k^n P_k^{|n|}(cos theta_m)
750  * e^{i n phi_m}.
751  */
752 #ifdef _OPENMP
753  #pragma omp parallel for default(shared) private(m,stheta,sphi,n,a,n_abs,alpha,gamma,k)
754 #endif
755  for (m = 0; m < plan->M_total; m++)
756  {
757  /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
758  stheta = cos(2.0*KPI*plan->x[2*m+1]);
759  /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
760  sphi = 2.0*KPI*plan->x[2*m];
761 
762  /* Initialize result for current node. */
763  plan->f[m] = 0.0;
764 
765  /* For n = -N,...,N, evaluate
766  * b_n := \sum_{k=|n|}^N a_k^n P_k^{|n|}(cos theta_m)
767  * using Clenshaw's algorithm.
768  */
769  for (n = -plan->N; n <= plan->N; n++)
770  {
771  /* Get pointer to Fourier coefficients vector for current order n. */
772  a = &(plan->f_hat_intern[NFSFT_INDEX(0,n,plan)]);
773 
774  /* Take absolute value of n. */
775  n_abs = abs(n);
776 
777  /* Get pointers to three-term recurrence coefficients arrays. */
778  alpha = &(wisdom.alpha[ROW(n_abs)]);
779  gamma = &(wisdom.gamma[ROW(n_abs)]);
780 
781  if (plan->N > 1024)
782  {
783  /* Clenshaw's algorithm */
784  long double _Complex it2 = a[plan->N];
785  long double _Complex it1 = a[plan->N-1];
786  for (k = plan->N; k > n_abs + 1; k--)
787  {
788  long double _Complex temp = a[k-2] + it2 * gamma[k];
789  it2 = it1 + it2 * alpha[k] * stheta;
790  it1 = temp;
791  }
792 
793  /* Compute final step if neccesary. */
794  if (n_abs < plan->N)
795  {
796  it2 = it1 + it2 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
797  }
798 
799  /* Compute final result by multiplying the fixed part
800  * gamma_|n| (1-cos^2(theta))^{|n|/2}
801  * for order n and the exponential part
802  * e^{i n phi}
803  * and add the result to f_m.
804  */
805  long double _Complex result = it2 * wisdom.gamma[ROW(n_abs)] *
806  powl(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
807 
808  plan->f[m] += result;
809  }
810  else
811  {
812  /* Clenshaw's algorithm */
813  double _Complex it2 = a[plan->N];
814  double _Complex it1 = a[plan->N-1];
815  for (k = plan->N; k > n_abs + 1; k--)
816  {
817  double _Complex temp = a[k-2] + it2 * gamma[k];
818  it2 = it1 + it2 * alpha[k] * stheta;
819  it1 = temp;
820  }
821 
822  /* Compute final step if neccesary. */
823  if (n_abs < plan->N)
824  {
825  it2 = it1 + it2 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
826  }
827 
828  /* Compute final result by multiplying the fixed part
829  * gamma_|n| (1-cos^2(theta))^{|n|/2}
830  * for order n and the exponential part
831  * e^{i n phi}
832  * and add the result to f_m.
833  */
834  plan->f[m] += it2 * wisdom.gamma[ROW(n_abs)] *
835  pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
836  }
837  }
838 
839  /* Write result f_m for current node to array f. */
840 // plan->f[m] = f_m;
841  }
842  }
843 }
844 
845 static void nfsft_set_f_hat_nan(nfsft_plan *plan)
846 {
847  int k, n;
848  double nan_value = nan("");
849  for (k = 0; k <= plan->N; k++)
850  for (n = -k; n <= k; n++)
851  plan->f_hat[NFSFT_INDEX(k,n,plan)] = nan_value;
852 }
853 
854 void nfsft_adjoint_direct(nfsft_plan *plan)
855 {
856  int m; /*< The node index */
857  int k; /*< The degree k */
858  int n; /*< The order n */
859  int n_abs; /*< The absolute value of the order n, ie n_abs = |n| */
860  double *alpha; /*< Pointer to current three-term recurrence
861  coefficient alpha_k^n for associated Legendre
862  functions P_k^n */
863  double *gamma; /*< Pointer to current three-term recurrence
864  coefficient beta_k^n for associated Legendre
865  functions P_k^n */
866 
867 #ifdef MEASURE_TIME
868  plan->MEASURE_TIME_t[0] = 0.0;
869  plan->MEASURE_TIME_t[1] = 0.0;
870  plan->MEASURE_TIME_t[2] = 0.0;
871 #endif
872 
873  if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
874  {
875  nfsft_set_f_hat_nan(plan);
876  return;
877  }
878 
879  /* Initialise spherical Fourier coefficients array with zeros. */
880  memset(plan->f_hat,0U,plan->N_total*sizeof(double _Complex));
881 
882  /* Distinguish by bandwidth N. */
883  if (plan->N == 0)
884  {
885  /* N == 0 */
886 
887  /* Constant function */
888  for (m = 0; m < plan->M_total; m++)
889  {
890  plan->f_hat[NFSFT_INDEX(0,0,plan)] += plan->f[m];
891  }
892  }
893  else
894  {
895  /* N > 0 */
896 
897 #ifdef _OPENMP
898  /* Traverse all orders n. */
899  #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,k) schedule(dynamic)
900  for (n = -plan->N; n <= plan->N; n++)
901  {
902  /* Take absolute value of n. */
903  n_abs = abs(n);
904 
905  /* Get pointers to three-term recurrence coefficients arrays. */
906  alpha = &(wisdom.alpha[ROW(n_abs)]);
907  gamma = &(wisdom.gamma[ROW(n_abs)]);
908 
909  /* Traverse all nodes. */
910  for (m = 0; m < plan->M_total; m++)
911  {
912  /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
913  double stheta = cos(2.0*KPI*plan->x[2*m+1]);
914  /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
915  double sphi = 2.0*KPI*plan->x[2*m];
916 
917  /* Transposed Clenshaw algorithm */
918  if (plan->N > 1024)
919  {
920  /* Initial step */
921  long double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
922  powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
923  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
924  long double _Complex it2 = 0.0;
925 
926  if (n_abs < plan->N)
927  {
928  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
929  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
930  }
931 
932  /* Loop for transposed Clenshaw algorithm */
933  for (k = n_abs+2; k <= plan->N; k++)
934  {
935  long double _Complex temp = it2;
936  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
937  it1 = temp;
938  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
939  }
940  }
941  else
942  {
943  /* Initial step */
944  double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
945  pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
946  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
947  double _Complex it2 = 0.0;
948 
949  if (n_abs < plan->N)
950  {
951  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
952  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
953  }
954 
955  /* Loop for transposed Clenshaw algorithm */
956  for (k = n_abs+2; k <= plan->N; k++)
957  {
958  double _Complex temp = it2;
959  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
960  it1 = temp;
961  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
962  }
963  }
964  }
965  }
966 #else
967  /* Traverse all nodes. */
968  for (m = 0; m < plan->M_total; m++)
969  {
970  /* Scale angle theta from [0,1/2] to [0,pi] and apply cosine. */
971  double stheta = cos(2.0*KPI*plan->x[2*m+1]);
972  /* Scale angle phi from [-1/2,1/2] to [-pi,pi]. */
973  double sphi = 2.0*KPI*plan->x[2*m];
974 
975  /* Traverse all orders n. */
976  for (n = -plan->N; n <= plan->N; n++)
977  {
978  /* Take absolute value of n. */
979  n_abs = abs(n);
980 
981  /* Get pointers to three-term recurrence coefficients arrays. */
982  alpha = &(wisdom.alpha[ROW(n_abs)]);
983  gamma = &(wisdom.gamma[ROW(n_abs)]);
984 
985  /* Transposed Clenshaw algorithm */
986  if (plan->N > 1024)
987  {
988  /* Initial step */
989  long double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
990  powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
991  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
992  long double _Complex it2 = 0.0;
993 
994  if (n_abs < plan->N)
995  {
996  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
997  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
998  }
999 
1000  /* Loop for transposed Clenshaw algorithm */
1001  for (k = n_abs+2; k <= plan->N; k++)
1002  {
1003  long double _Complex temp = it2;
1004  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
1005  it1 = temp;
1006  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
1007  }
1008  }
1009  else
1010  {
1011  /* Initial step */
1012  double _Complex it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
1013  pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
1014  plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
1015  double _Complex it2 = 0.0;
1016 
1017  if (n_abs < plan->N)
1018  {
1019  it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
1020  plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
1021  }
1022 
1023  /* Loop for transposed Clenshaw algorithm */
1024  for (k = n_abs+2; k <= plan->N; k++)
1025  {
1026  double _Complex temp = it2;
1027  it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
1028  it1 = temp;
1029  plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
1030  }
1031  }
1032  }
1033  }
1034 #endif
1035  }
1036 
1037  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1038  * multiply spherical Fourier coefficients with corresponding normalization
1039  * weight. */
1040  if (plan->flags & NFSFT_NORMALIZED)
1041  {
1042  /* Traverse Fourier coefficients array. */
1043 #ifdef _OPENMP
1044  #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1045 #endif
1046  for (k = 0; k <= plan->N; k++)
1047  {
1048  for (n = -k; n <= k; n++)
1049  {
1050  /* Multiply with normalization weight. */
1051  plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
1052  sqrt((2*k+1)/(4.0*KPI));
1053  }
1054  }
1055  }
1056 
1057  /* Set unused coefficients to zero. */
1058  if (plan->flags & NFSFT_ZERO_F_HAT)
1059  {
1060  for (n = -plan->N; n <= plan->N+1; n++)
1061  {
1062  memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1063  (plan->N+1+abs(n))*sizeof(double _Complex));
1064  }
1065  }
1066 }
1067 
1069 {
1070  int k; /*< The degree k */
1071  int n; /*< The order n */
1072 #ifdef MEASURE_TIME
1073  ticks t0, t1;
1074 #endif
1075  #ifdef DEBUG
1076  double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
1077  t_pre = 0.0;
1078  t_norm = 0.0;
1079  t_fpt = 0.0;
1080  t_c2e = 0.0;
1081  t_nfft = 0.0;
1082  #endif
1083 
1084 #ifdef MEASURE_TIME
1085  plan->MEASURE_TIME_t[0] = 0.0;
1086  plan->MEASURE_TIME_t[1] = 0.0;
1087  plan->MEASURE_TIME_t[2] = 0.0;
1088 #endif
1089 
1090  if ((wisdom.flags & NFSFT_NO_FAST_ALGORITHM) || (plan->flags & NFSFT_NO_FAST_ALGORITHM))
1091  {
1092  nfsft_set_f_nan(plan);
1093  return;
1094  }
1095 
1096  /* Check, if precomputation was done and that the bandwidth N is not too
1097  * big.
1098  */
1099  if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
1100  {
1101  nfsft_set_f_nan(plan);
1102  return;
1103  }
1104 
1105  /* Check, if slow transformation should be used due to small bandwidth. */
1106  if (plan->N < NFSFT_BREAK_EVEN)
1107  {
1108  /* Use NDSFT. */
1109  nfsft_trafo_direct(plan);
1110  }
1111 
1112  /* Check for correct value of the bandwidth N. */
1113  else if (plan->N <= wisdom.N_MAX)
1114  {
1115  /* Copy spherical Fourier coefficients, if necessary. */
1116  if (plan->flags & NFSFT_PRESERVE_F_HAT)
1117  {
1118  memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
1119  sizeof(double _Complex));
1120  }
1121  else
1122  {
1123  plan->f_hat_intern = plan->f_hat;
1124  }
1125 
1126  /* Propagate pointer values to the internal NFFT plan to assure
1127  * consistency. Pointers may have been modified externally.
1128  */
1129  if (!(plan->flags & NFSFT_EQUISPACED))
1130  {
1131  plan->plan_nfft.x = plan->x;
1132  plan->plan_nfft.f = plan->f;
1133  plan->plan_nfft.f_hat = plan->f_hat_intern;
1134  }
1135 
1136  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1137  * multiply spherical Fourier coefficients with corresponding normalization
1138  * weight. */
1139  if (plan->flags & NFSFT_NORMALIZED)
1140  {
1141  /* Traverse Fourier coefficients array. */
1142 #ifdef _OPENMP
1143  #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1144 #endif
1145  for (k = 0; k <= plan->N; k++)
1146  {
1147  for (n = -k; n <= k; n++)
1148  {
1149  /* Multiply with normalization weight. */
1150  plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
1151  sqrt((2*k+1)/(4.0*KPI));
1152  }
1153  }
1154  }
1155 
1156 #ifdef MEASURE_TIME
1157  t0 = getticks();
1158 #endif
1159  /* Check, which polynomial transform algorithm should be used. */
1160  if (plan->flags & NFSFT_USE_DPT)
1161  {
1162 #ifdef _OPENMP
1163  n = 0;
1164  fpt_trafo_direct(wisdom.set_threads[0],abs(n),
1165  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1166  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1167  plan->N,0U);
1168 
1169  int n_abs;
1170  #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1171  for (n_abs = 1; n_abs <= plan->N; n_abs++)
1172  {
1173  int threadid = omp_get_thread_num();
1174  n = -n_abs;
1175  fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1176  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1177  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1178  plan->N,0U);
1179  n = n_abs;
1180  fpt_trafo_direct(wisdom.set_threads[threadid],abs(n),
1181  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1182  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1183  plan->N,0U);
1184  }
1185 #else
1186  /* Use direct discrete polynomial transform DPT. */
1187  for (n = -plan->N; n <= plan->N; n++)
1188  {
1189  //fprintf(stderr,"nfsft_trafo: n = %d\n",n);
1190  //fflush(stderr);
1191  fpt_trafo_direct(wisdom.set,abs(n),
1192  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1193  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1194  plan->N,0U);
1195  }
1196 #endif
1197  }
1198  else
1199  {
1200 #ifdef _OPENMP
1201  n = 0;
1202  fpt_trafo(wisdom.set_threads[0],abs(n),
1203  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1204  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1205  plan->N,0U);
1206 
1207  int n_abs;
1208  #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1209  for (n_abs = 1; n_abs <= plan->N; n_abs++)
1210  {
1211  int threadid = omp_get_thread_num();
1212  n = -n_abs;
1213  fpt_trafo(wisdom.set_threads[threadid],abs(n),
1214  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1215  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1216  plan->N,0U);
1217  n = n_abs;
1218  fpt_trafo(wisdom.set_threads[threadid],abs(n),
1219  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1220  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1221  plan->N,0U);
1222  }
1223 #else
1224  /* Use fast polynomial transform FPT. */
1225  for (n = -plan->N; n <= plan->N; n++)
1226  {
1227  //fprintf(stderr,"nfsft_trafo: n = %d\n",n);
1228  //fflush(stderr);
1229  fpt_trafo(wisdom.set,abs(n),
1230  &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1231  &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1232  plan->N,0U);
1233  }
1234 #endif
1235  }
1236 #ifdef MEASURE_TIME
1237  t1 = getticks();
1238  plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
1239 #endif
1240 
1241 #ifdef MEASURE_TIME
1242  t0 = getticks();
1243 #endif
1244  /* Convert Chebyshev coefficients to Fourier coefficients. */
1245  c2e(plan);
1246 #ifdef MEASURE_TIME
1247  t1 = getticks();
1248  plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
1249 #endif
1250 
1251 #ifdef MEASURE_TIME
1252  t0 = getticks();
1253 #endif
1254  if (plan->flags & NFSFT_EQUISPACED)
1255  {
1256  /* Algorithm for equispaced nodes.
1257  * plan_nfft is not initialized if NFSFT_EQUISPACED is set. */
1258 
1259 #ifdef _OPENMP
1260  INT nthreads = Y(get_num_threads)();
1261 #endif
1262 
1263  int N[2];
1264  N[0] = 2*plan->N+2;
1265  N[1] = 2*plan->N+2;
1266  fftw_plan plan_fftw;
1267 
1268  for (int j=0; j<N[0]; j++)
1269  for (int k=0; k<N[1]; k++)
1270  if ((j+k)%2)
1271  plan->f_hat_intern[j*N[1]+k] *= -1;
1272 // f_hat[j*N[1]+k] = plan->f_hat_intern[j*N[1]+k] * CEXP(II*KPI*(j+k));
1273 
1274 #ifdef _OPENMP
1275 #pragma omp critical (nfft_omp_critical_fftw_plan)
1276  {
1277  FFTW(plan_with_nthreads)(nthreads);
1278 #endif
1279  plan_fftw = fftw_plan_dft(2, N, plan->f_hat_intern, plan->f_hat_intern, FFTW_FORWARD, FFTW_ESTIMATE);
1280 #ifdef _OPENMP
1281  }
1282 #endif
1283  fftw_execute(plan_fftw);
1284  for (int j=0; j<N[0]; j++)
1285 // for (int k=j%2-1; k<N[1]; k+=2)
1286 // plan->f[j*N[1]+k] *= -1;
1287  for (int k=N[1]/2; k<N[1]+1; k++)
1288  plan->f[j*(N[1]/2+1)+(k-N[1]/2)] = plan->f_hat_intern[j*N[1]+k%N[1]] * ((j+k)%2 ? -1 : 1);
1289 // plan->f[j*N[1]+k] *= CEXP(II*KPI*(j-N[0]/2 + k-N[1]/2));
1290 #ifdef _OPENMP
1291 #pragma omp critical (nfft_omp_critical_fftw_plan)
1292 #endif
1293  fftw_destroy_plan(plan_fftw);
1294  }
1295  /* Check, which nonequispaced discrete Fourier transform algorithm should
1296  * be used.
1297  */
1298  else if (plan->flags & NFSFT_USE_NDFT)
1299  {
1300  /* Use NDFT. */
1301  nfft_trafo_direct(&plan->plan_nfft);
1302  }
1303  else
1304  {
1305  /* Use NFFT. */
1306  //fprintf(stderr,"nfsft_adjoint: nfft_trafo\n");
1307  nfft_trafo_2d(&plan->plan_nfft);
1308  }
1309 #ifdef MEASURE_TIME
1310  t1 = getticks();
1311  plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
1312 #endif
1313  }
1314 }
1315 
1317 {
1318  int k; /*< The degree k */
1319  int n; /*< The order n */
1320 #ifdef MEASURE_TIME
1321  ticks t0, t1;
1322 #endif
1323 
1324 #ifdef MEASURE_TIME
1325  plan->MEASURE_TIME_t[0] = 0.0;
1326  plan->MEASURE_TIME_t[1] = 0.0;
1327  plan->MEASURE_TIME_t[2] = 0.0;
1328 #endif
1329 
1330  if ((wisdom.flags & NFSFT_NO_FAST_ALGORITHM) || (plan->flags & NFSFT_NO_FAST_ALGORITHM))
1331  {
1332  nfsft_set_f_hat_nan(plan);
1333  return;
1334  }
1335 
1336  /* Check, if precomputation was done and that the bandwidth N is not too
1337  * big.
1338  */
1339  if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
1340  {
1341  nfsft_set_f_hat_nan(plan);
1342  return;
1343  }
1344 
1345  /* Check, if slow transformation should be used due to small bandwidth. */
1346  if (plan->N < NFSFT_BREAK_EVEN)
1347  {
1348  /* Use adjoint NDSFT. */
1349  nfsft_adjoint_direct(plan);
1350  }
1351  /* Check for correct value of the bandwidth N. */
1352  else if (plan->N <= wisdom.N_MAX)
1353  {
1354  //fprintf(stderr,"nfsft_adjoint: Starting\n");
1355  //fflush(stderr);
1356  /* Propagate pointer values to the internal NFFT plan to assure
1357  * consistency. Pointers may have been modified externally.
1358  */
1359  if (!(plan->flags & NFSFT_EQUISPACED))
1360  {
1361  plan->plan_nfft.x = plan->x;
1362  plan->plan_nfft.f = plan->f;
1363  plan->plan_nfft.f_hat = plan->f_hat;
1364  }
1365 
1366 #ifdef MEASURE_TIME
1367  t0 = getticks();
1368 #endif
1369  if (plan->flags & NFSFT_EQUISPACED)
1370  {
1371  /* Algorithm for equispaced nodes.
1372  * plan_nfft is not initialized if NFSFT_EQUISPACED is set. */
1373  int N[2];
1374  N[0] = 2*plan->N+2;
1375  N[1] = 2*plan->N+2;
1376 
1377  for (int j=0; j<N[0]; j++)
1378  {
1379  for (int k=0; k<N[1]/2+1; k++)
1380  plan->f_hat[j*N[1]+k] = 0;
1381  for (int k=N[1]/2; k<N[1]+1; k++)
1382  plan->f_hat[j*N[1]+k%N[1]] = plan->f[j*(N[1]/2+1)+k-N[1]/2] * ((j+k)%2 ? -1 : 1);
1383  }
1384  fftw_plan plan_fftw = FFTW(plan_dft)(2, N, plan->f_hat, plan->f_hat, FFTW_BACKWARD, FFTW_ESTIMATE);
1385  fftw_execute(plan_fftw);
1386  for (int j=0; j<N[0]; j++)
1387  for (int k=0; k<N[1]; k++)
1388  if ((j+k)%2)
1389  plan->f_hat[j*N[1]+k] *= -1;
1390  fftw_destroy_plan(plan_fftw);
1391  }
1392  /* Check, which adjoint nonequispaced discrete Fourier transform algorithm
1393  * should be used.
1394  */
1395  else if (plan->flags & NFSFT_USE_NDFT)
1396  {
1397  //fprintf(stderr,"nfsft_adjoint: Executing nfft_adjoint_direct\n");
1398  //fflush(stderr);
1399  /* Use adjoint NDFT. */
1400  nfft_adjoint_direct(&plan->plan_nfft);
1401  }
1402  else
1403  {
1404  //fprintf(stderr,"nfsft_adjoint: Executing nfft_adjoint\n");
1405  //fflush(stderr);
1406  //fprintf(stderr,"nfsft_adjoint: nfft_adjoint\n");
1407  /* Use adjoint NFFT. */
1408  nfft_adjoint_2d(&plan->plan_nfft);
1409  }
1410 #ifdef MEASURE_TIME
1411  t1 = getticks();
1412  plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
1413 #endif
1414 
1415  //fprintf(stderr,"nfsft_adjoint: Executing c2e_transposed\n");
1416  //fflush(stderr);
1417 #ifdef MEASURE_TIME
1418  t0 = getticks();
1419 #endif
1420  /* Convert Fourier coefficients to Chebyshev coefficients. */
1421  c2e_transposed(plan);
1422 #ifdef MEASURE_TIME
1423  t1 = getticks();
1424  plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
1425 #endif
1426 
1427 #ifdef MEASURE_TIME
1428  t0 = getticks();
1429 #endif
1430  /* Check, which transposed polynomial transform algorithm should be used */
1431  if (plan->flags & NFSFT_USE_DPT)
1432  {
1433 #ifdef _OPENMP
1434  n = 0;
1435  fpt_transposed_direct(wisdom.set_threads[0],abs(n),
1436  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1437  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1438  plan->N,0U);
1439 
1440  int n_abs;
1441  #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1442  for (n_abs = 1; n_abs <= plan->N; n_abs++)
1443  {
1444  int threadid = omp_get_thread_num();
1445  n = -n_abs;
1446  fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1447  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1448  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1449  plan->N,0U);
1450  n = n_abs;
1451  fpt_transposed_direct(wisdom.set_threads[threadid],abs(n),
1452  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1453  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1454  plan->N,0U);
1455  }
1456 #else
1457  /* Use transposed DPT. */
1458  for (n = -plan->N; n <= plan->N; n++)
1459  {
1460  //fprintf(stderr,"nfsft_adjoint: Executing dpt_transposed\n");
1461  //fflush(stderr);
1462  fpt_transposed_direct(wisdom.set,abs(n),
1463  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1464  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1465  plan->N,0U);
1466  }
1467 #endif
1468  }
1469  else
1470  {
1471 #ifdef _OPENMP
1472  n = 0;
1473  fpt_transposed(wisdom.set_threads[0],abs(n),
1474  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1475  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1476  plan->N,0U);
1477 
1478  int n_abs;
1479  #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1480  for (n_abs = 1; n_abs <= plan->N; n_abs++)
1481  {
1482  int threadid = omp_get_thread_num();
1483  n = -n_abs;
1484  fpt_transposed(wisdom.set_threads[threadid],abs(n),
1485  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1486  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1487  plan->N,0U);
1488  n = n_abs;
1489  fpt_transposed(wisdom.set_threads[threadid],abs(n),
1490  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1491  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1492  plan->N,0U);
1493  }
1494 #else
1495  //fprintf(stderr,"nfsft_adjoint: fpt_transposed\n");
1496  /* Use transposed FPT. */
1497  for (n = -plan->N; n <= plan->N; n++)
1498  {
1499  //fprintf(stderr,"nfsft_adjoint: Executing fpt_transposed\n");
1500  //fflush(stderr);
1501  fpt_transposed(wisdom.set,abs(n),
1502  &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1503  &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1504  plan->N,0U);
1505  }
1506 #endif
1507  }
1508 #ifdef MEASURE_TIME
1509  t1 = getticks();
1510  plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
1511 #endif
1512 
1513  /* Check, if we compute with L^2-normalized spherical harmonics. If so,
1514  * multiply spherical Fourier coefficients with corresponding normalization
1515  * weight. */
1516  if (plan->flags & NFSFT_NORMALIZED)
1517  {
1518  //fprintf(stderr,"nfsft_adjoint: Normalizing\n");
1519  //fflush(stderr);
1520  /* Traverse Fourier coefficients array. */
1521 #ifdef _OPENMP
1522  #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1523 #endif
1524  for (k = 0; k <= plan->N; k++)
1525  {
1526  for (n = -k; n <= k; n++)
1527  {
1528  /* Multiply with normalization weight. */
1529  plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
1530  sqrt((2*k+1)/(4.0*KPI));
1531  }
1532  }
1533  }
1534 
1535  /* Set unused coefficients to zero. */
1536  if (plan->flags & NFSFT_ZERO_F_HAT)
1537  {
1538  //fprintf(stderr,"nfsft_adjoint: Setting to zero\n");
1539  //fflush(stderr);
1540  for (n = -plan->N; n <= plan->N+1; n++)
1541  {
1542  memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1543  (plan->N+1+abs(n))*sizeof(double _Complex));
1544  }
1545  }
1546  //fprintf(stderr,"nfsft_adjoint: Finished\n");
1547  //fflush(stderr);
1548  }
1549 }
1550 
1551 void nfsft_precompute_x(nfsft_plan *plan)
1552 {
1553  if ((plan->flags & NFSFT_NO_FAST_ALGORITHM) || (plan->flags & NFSFT_EQUISPACED))
1554  return;
1555 
1556  /* Pass angle array to NFFT plan. */
1557  plan->plan_nfft.x = plan->x;
1558 
1559  /* Precompute. */
1560  if (plan->plan_nfft.flags & PRE_ONE_PSI)
1561  nfft_precompute_one_psi(&plan->plan_nfft);
1562 }
1563 /* \} */
#define FPT_PERSISTENT_DATA
If set, TODO complete comment.
Definition: nfft3.h:619
*We expand this macro for each supported precision * X
Definition: nfft3.h:99
void fpt_transposed(fpt_set set, const int m, double _Complex *x, double _Complex *y, const int k_end, const unsigned int flags)
Definition: fpt.c:1740
#define FPT_AL_SYMMETRY
If set, TODO complete comment.
Definition: nfft3.h:624
fpt_set fpt_init(const int M, const int t, const unsigned int flags)
Definition: fpt.c:795
void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, double *gam, int k_start, const double threshold)
Definition: fpt.c:1307
#define PRE_ONE_PSI
Definition: nfft3.h:194
#define PRE_PSI
Definition: nfft3.h:185
#define FFTW_INIT
Definition: nfft3.h:191
#define PRE_PHI_HUT
Definition: nfft3.h:181
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
#define NFSFT_MALLOC_X
Definition: nfft3.h:564
static struct nfsft_wisdom wisdom
The global wisdom structure for precomputed data.
Definition: nfsft.c:90
static void c2e_transposed(nfsft_plan *plan)
Transposed version of the function c2e.
Definition: nfsft.c:193
double * gamma
Precomputed recursion coefficients /f$\gamma_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
fpt_set set
Structure for discrete polynomial transform (DPT)
#define NFSFT_DEFAULT_NFFT_CUTOFF
The default NFFT cutoff parameter.
Definition: nfsft.c:65
void nfsft_init_advanced(nfsft_plan *plan, int N, int M, unsigned int flags)
Definition: nfsft.c:267
void nfsft_forget(void)
Definition: nfsft.c:579
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials.
Definition: nfsft.c:115
#define NFSFT_BREAK_EVEN
The break-even bandwidth .
Definition: nfsft.c:79
void nfsft_trafo(nfsft_plan *plan)
Definition: nfsft.c:1068
#define NFSFT_NORMALIZED
Definition: nfft3.h:561
void nfsft_init(nfsft_plan *plan, int N, int M)
Definition: nfsft.c:260
void alpha_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Definition: legendre.c:89
#define NFSFT_USE_DPT
Definition: nfft3.h:563
double * beta
Precomputed recursion coefficients /f$\beta_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
double * alpha
Precomputed recursion coefficients /f$\alpha_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
#define NFSFT_ZERO_F_HAT
Definition: nfft3.h:578
void nfsft_adjoint(nfsft_plan *plan)
Definition: nfsft.c:1316
void gamma_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Definition: legendre.c:107
#define NFSFT_INDEX(k, n, plan)
Definition: nfft3.h:581
#define NFSFT_NO_DIRECT_ALGORITHM
Definition: nfft3.h:576
#define NFSFT_NO_FAST_ALGORITHM
Definition: nfft3.h:577
void nfsft_finalize(nfsft_plan *plan)
Definition: nfsft.c:625
#define NFSFT_MALLOC_F_HAT
Definition: nfft3.h:565
#define NFSFT_USE_NDFT
Definition: nfft3.h:562
bool initialized
Indicates wether the structure has been initialized.
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
Definition: nfsft.c:376
int N_MAX
Stores precomputation flags.
#define NFSFT_EQUISPACED
Definition: nfft3.h:573
#define NFSFT_PRESERVE_F_HAT
Definition: nfft3.h:567
int T_MAX
The logarithm /f$t = \log_2 N_{\text{max}}/f$ of the maximum bandwidth.
void beta_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Definition: legendre.c:98
#define NFSFT_MALLOC_F
Definition: nfft3.h:566
Internal header file for auxiliary definitions and functions.
Header file with internal API of the NFSFT module.
Header file for the nfft3 library.
Wisdom structure.