53 #include "../fpt/fpt.h"
65 #define NFSFT_DEFAULT_NFFT_CUTOFF 6
72 #define NFSFT_DEFAULT_THRESHOLD 1000
79 #define NFSFT_BREAK_EVEN 5
119 double _Complex last;
129 memset(plan->f_hat_intern,0U,(2*plan->N+2)*
sizeof(
double _Complex));
132 lowe = -plan->N + (plan->N%2);
136 for (n = lowe; n <= upe; n += 2)
140 xm = &(plan->f_hat_intern[
NFSFT_INDEX(-1,n,plan)]);
141 xp = &(plan->f_hat_intern[
NFSFT_INDEX(+1,n,plan)]);
142 for(k = 1; k <= plan->N; k++)
153 low = -plan->N + (1-plan->N%2);
158 for (n = low; n <= up; n += 2)
164 xp = &(plan->f_hat_intern[
NFSFT_INDEX(-plan->N-1,n,plan)]);
168 xm = &(plan->f_hat_intern[
NFSFT_INDEX(plan->N,n,plan)]);
170 *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
172 for (k = plan->N-1; k > 0; k--)
175 *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
197 double _Complex last;
207 lowe = -plan->N + (plan->N%2);
211 for (n = lowe; n <= upe; n += 2)
217 for(k = 1; k <= plan->N; k++)
225 low = -plan->N + (1-plan->N%2);
229 for (n = low; n <= up; n += 2)
235 for(k = 1; k <= plan->N; k++)
241 -0.25*_Complex_I*plan->f_hat[
NFSFT_INDEX(1,n,plan)];
244 -0.25*_Complex_I*plan->f_hat[
NFSFT_INDEX(2,n,plan)];
247 for (k = 2; k < plan->N; k++)
250 *xp = -0.25 * _Complex_I * (xp[1] - last);
254 *xp = 0.25 * _Complex_I * last;
275 void nfsft_init_guru(
nfsft_plan *plan,
int N,
int M,
unsigned int flags,
276 unsigned int nfft_flags,
int nfft_cutoff)
290 plan->M_total = (2*plan->N+2)*(plan->N+2);
298 plan->N_total = (2*plan->N+2)*(2*plan->N+2);
304 plan->f_hat_intern = (
double _Complex*) nfft_malloc(plan->N_total*
305 sizeof(
double _Complex));
311 plan->f_hat = (
double _Complex*) nfft_malloc(plan->N_total*
312 sizeof(
double _Complex));
318 plan->f = (
double _Complex*) nfft_malloc(plan->M_total*
sizeof(
double _Complex));
324 plan->x = (
double*) nfft_malloc(plan->M_total*2*
sizeof(
double));
327 for (
int i=0; i<2*plan->N+2; i++)
328 for (
int j=0; j<plan->N+2; j++)
330 plan->x[2*(i*(plan->N+1) + j)] = ((double)i-plan->N-1)/(2.0*plan->N+2);
331 plan->x[2*(i*(plan->N+1) + j) + 1] = ((double)j)/(2.0*plan->N+2);
341 nfft_size = (
int*)nfft_malloc(2*
sizeof(
int));
342 fftw_size = (
int*)nfft_malloc(2*
sizeof(
int));
345 nfft_size[0] = 2*plan->N+2;
346 nfft_size[1] = 2*plan->N+2;
347 fftw_size[0] = 4*plan->N;
348 fftw_size[1] = 4*plan->N;
351 nfft_init_guru(&plan->plan_nfft, 2, nfft_size, plan->M_total, fftw_size,
352 nfft_cutoff, nfft_flags,
353 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
356 plan->plan_nfft.x = plan->x;
358 plan->plan_nfft.f = plan->f;
360 plan->plan_nfft.f_hat = plan->f_hat;
368 nfft_free(nfft_size);
369 nfft_free(fftw_size);
377 unsigned int fpt_flags)
388 #pragma omp parallel default(shared)
390 int nthreads = omp_get_num_threads();
393 wisdom.nthreads = nthreads;
399 wisdom.flags = nfsft_flags;
441 #pragma omp parallel default(shared) private(n)
443 int nthreads = omp_get_num_threads();
444 int threadid = omp_get_thread_num();
447 wisdom.nthreads = nthreads;
448 wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*
sizeof(fpt_set));
461 wisdom.set_threads[threadid]->dpt =
wisdom.set_threads[0]->dpt;
467 fpt_precompute_1(
wisdom.set_threads[0],n,n);
471 #pragma omp for private(n) schedule(dynamic)
494 #pragma omp parallel default(shared) private(n)
497 int nthreads = omp_get_num_threads();
498 int threadid = omp_get_thread_num();
501 wisdom.nthreads = nthreads;
502 wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*
sizeof(fpt_set));
519 wisdom.set_threads[threadid]->dpt =
wisdom.set_threads[0]->dpt;
525 fpt_precompute_1(
wisdom.set_threads[0],n,n);
529 #pragma omp for private(n) schedule(dynamic)
611 for (k = 0; k <
wisdom.nthreads; k++)
612 fpt_finalize(
wisdom.set_threads[k]);
613 nfft_free(
wisdom.set_threads);
633 nfft_finalize(&plan->plan_nfft);
640 nfft_free(plan->f_hat_intern);
647 nfft_free(plan->f_hat);
668 double nan_value = nan(
"");
669 for (m = 0; m < plan->M_total; m++)
670 plan->f[m] = nan_value;
690 plan->MEASURE_TIME_t[0] = 0.0;
691 plan->MEASURE_TIME_t[1] = 0.0;
692 plan->MEASURE_TIME_t[2] = 0.0;
697 nfsft_set_f_nan(plan);
704 memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
705 sizeof(
double _Complex));
709 plan->f_hat_intern = plan->f_hat;
719 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
721 for (k = 0; k <= plan->N; k++)
723 for (n = -k; n <= k; n++)
727 sqrt((2*k+1)/(4.0*KPI));
738 for (m = 0; m < plan->M_total; m++)
740 plan->f[m] = plan->f_hat_intern[
NFSFT_INDEX(0,0,plan)];
753 #pragma omp parallel for default(shared) private(m,stheta,sphi,n,a,n_abs,alpha,gamma,k)
755 for (m = 0; m < plan->M_total; m++)
758 stheta = cos(2.0*KPI*plan->x[2*m+1]);
760 sphi = 2.0*KPI*plan->x[2*m];
769 for (n = -plan->N; n <= plan->N; n++)
784 long double _Complex it2 = a[plan->N];
785 long double _Complex it1 = a[plan->N-1];
786 for (k = plan->N; k > n_abs + 1; k--)
788 long double _Complex temp = a[k-2] + it2 *
gamma[k];
789 it2 = it1 + it2 *
alpha[k] * stheta;
796 it2 = it1 + it2 *
wisdom.
alpha[ROWK(n_abs)+1] * stheta;
805 long double _Complex result = it2 *
wisdom.
gamma[ROW(n_abs)] *
806 powl(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
808 plan->f[m] += result;
813 double _Complex it2 = a[plan->N];
814 double _Complex it1 = a[plan->N-1];
815 for (k = plan->N; k > n_abs + 1; k--)
817 double _Complex temp = a[k-2] + it2 *
gamma[k];
818 it2 = it1 + it2 *
alpha[k] * stheta;
825 it2 = it1 + it2 *
wisdom.
alpha[ROWK(n_abs)+1] * stheta;
835 pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
845 static void nfsft_set_f_hat_nan(
nfsft_plan *plan)
848 double nan_value = nan(
"");
849 for (k = 0; k <= plan->N; k++)
850 for (n = -k; n <= k; n++)
868 plan->MEASURE_TIME_t[0] = 0.0;
869 plan->MEASURE_TIME_t[1] = 0.0;
870 plan->MEASURE_TIME_t[2] = 0.0;
875 nfsft_set_f_hat_nan(plan);
880 memset(plan->f_hat,0U,plan->N_total*
sizeof(
double _Complex));
888 for (m = 0; m < plan->M_total; m++)
899 #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,k) schedule(dynamic)
900 for (n = -plan->N; n <= plan->N; n++)
910 for (m = 0; m < plan->M_total; m++)
913 double stheta = cos(2.0*KPI*plan->x[2*m+1]);
915 double sphi = 2.0*KPI*plan->x[2*m];
921 long double _Complex it1 = plan->f[m] *
wisdom.
gamma[ROW(n_abs)] *
922 powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
924 long double _Complex it2 = 0.0;
933 for (k = n_abs+2; k <= plan->N; k++)
935 long double _Complex temp = it2;
936 it2 =
alpha[k] * stheta * it2 +
gamma[k] * it1;
944 double _Complex it1 = plan->f[m] *
wisdom.
gamma[ROW(n_abs)] *
945 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
947 double _Complex it2 = 0.0;
956 for (k = n_abs+2; k <= plan->N; k++)
958 double _Complex temp = it2;
959 it2 =
alpha[k] * stheta * it2 +
gamma[k] * it1;
968 for (m = 0; m < plan->M_total; m++)
971 double stheta = cos(2.0*KPI*plan->x[2*m+1]);
973 double sphi = 2.0*KPI*plan->x[2*m];
976 for (n = -plan->N; n <= plan->N; n++)
989 long double _Complex it1 = plan->f[m] *
wisdom.
gamma[ROW(n_abs)] *
990 powl(1 - stheta * stheta, 0.5*n_abs) * cexpl(-_Complex_I*n*sphi);
992 long double _Complex it2 = 0.0;
1001 for (k = n_abs+2; k <= plan->N; k++)
1003 long double _Complex temp = it2;
1004 it2 =
alpha[k] * stheta * it2 +
gamma[k] * it1;
1012 double _Complex it1 = plan->f[m] *
wisdom.
gamma[ROW(n_abs)] *
1013 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
1015 double _Complex it2 = 0.0;
1017 if (n_abs < plan->N)
1024 for (k = n_abs+2; k <= plan->N; k++)
1026 double _Complex temp = it2;
1027 it2 =
alpha[k] * stheta * it2 +
gamma[k] * it1;
1044 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1046 for (k = 0; k <= plan->N; k++)
1048 for (n = -k; n <= k; n++)
1052 sqrt((2*k+1)/(4.0*KPI));
1060 for (n = -plan->N; n <= plan->N+1; n++)
1062 memset(&plan->f_hat[
NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1063 (plan->N+1+abs(n))*
sizeof(
double _Complex));
1076 double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
1085 plan->MEASURE_TIME_t[0] = 0.0;
1086 plan->MEASURE_TIME_t[1] = 0.0;
1087 plan->MEASURE_TIME_t[2] = 0.0;
1092 nfsft_set_f_nan(plan);
1101 nfsft_set_f_nan(plan);
1109 nfsft_trafo_direct(plan);
1118 memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
1119 sizeof(
double _Complex));
1123 plan->f_hat_intern = plan->f_hat;
1131 plan->plan_nfft.x = plan->x;
1132 plan->plan_nfft.f = plan->f;
1133 plan->plan_nfft.f_hat = plan->f_hat_intern;
1143 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1145 for (k = 0; k <= plan->N; k++)
1147 for (n = -k; n <= k; n++)
1151 sqrt((2*k+1)/(4.0*KPI));
1164 fpt_trafo_direct(
wisdom.set_threads[0],abs(n),
1170 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1171 for (n_abs = 1; n_abs <= plan->N; n_abs++)
1173 int threadid = omp_get_thread_num();
1175 fpt_trafo_direct(
wisdom.set_threads[threadid],abs(n),
1180 fpt_trafo_direct(
wisdom.set_threads[threadid],abs(n),
1187 for (n = -plan->N; n <= plan->N; n++)
1202 fpt_trafo(
wisdom.set_threads[0],abs(n),
1208 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1209 for (n_abs = 1; n_abs <= plan->N; n_abs++)
1211 int threadid = omp_get_thread_num();
1213 fpt_trafo(
wisdom.set_threads[threadid],abs(n),
1218 fpt_trafo(
wisdom.set_threads[threadid],abs(n),
1225 for (n = -plan->N; n <= plan->N; n++)
1260 INT nthreads = Y(get_num_threads)();
1266 fftw_plan plan_fftw;
1268 for (
int j=0; j<N[0]; j++)
1269 for (
int k=0; k<N[1]; k++)
1271 plan->f_hat_intern[j*N[1]+k] *= -1;
1275 #pragma omp critical (nfft_omp_critical_fftw_plan)
1277 FFTW(plan_with_nthreads)(nthreads);
1279 plan_fftw = fftw_plan_dft(2, N, plan->f_hat_intern, plan->f_hat_intern, FFTW_FORWARD, FFTW_ESTIMATE);
1283 fftw_execute(plan_fftw);
1284 for (
int j=0; j<N[0]; j++)
1287 for (
int k=N[1]/2; k<N[1]+1; k++)
1288 plan->f[j*(N[1]/2+1)+(k-N[1]/2)] = plan->f_hat_intern[j*N[1]+k%N[1]] * ((j+k)%2 ? -1 : 1);
1291 #pragma omp critical (nfft_omp_critical_fftw_plan)
1293 fftw_destroy_plan(plan_fftw);
1301 nfft_trafo_direct(&plan->plan_nfft);
1307 nfft_trafo_2d(&plan->plan_nfft);
1325 plan->MEASURE_TIME_t[0] = 0.0;
1326 plan->MEASURE_TIME_t[1] = 0.0;
1327 plan->MEASURE_TIME_t[2] = 0.0;
1332 nfsft_set_f_hat_nan(plan);
1341 nfsft_set_f_hat_nan(plan);
1349 nfsft_adjoint_direct(plan);
1361 plan->plan_nfft.x = plan->x;
1362 plan->plan_nfft.f = plan->f;
1363 plan->plan_nfft.f_hat = plan->f_hat;
1377 for (
int j=0; j<N[0]; j++)
1379 for (
int k=0; k<N[1]/2+1; k++)
1380 plan->f_hat[j*N[1]+k] = 0;
1381 for (
int k=N[1]/2; k<N[1]+1; k++)
1382 plan->f_hat[j*N[1]+k%N[1]] = plan->f[j*(N[1]/2+1)+k-N[1]/2] * ((j+k)%2 ? -1 : 1);
1384 fftw_plan plan_fftw = FFTW(plan_dft)(2, N, plan->f_hat, plan->f_hat, FFTW_BACKWARD, FFTW_ESTIMATE);
1385 fftw_execute(plan_fftw);
1386 for (
int j=0; j<N[0]; j++)
1387 for (
int k=0; k<N[1]; k++)
1389 plan->f_hat[j*N[1]+k] *= -1;
1390 fftw_destroy_plan(plan_fftw);
1400 nfft_adjoint_direct(&plan->plan_nfft);
1408 nfft_adjoint_2d(&plan->plan_nfft);
1435 fpt_transposed_direct(
wisdom.set_threads[0],abs(n),
1441 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1442 for (n_abs = 1; n_abs <= plan->N; n_abs++)
1444 int threadid = omp_get_thread_num();
1446 fpt_transposed_direct(
wisdom.set_threads[threadid],abs(n),
1451 fpt_transposed_direct(
wisdom.set_threads[threadid],abs(n),
1458 for (n = -plan->N; n <= plan->N; n++)
1462 fpt_transposed_direct(
wisdom.
set,abs(n),
1479 #pragma omp parallel for default(shared) private(n_abs,n) num_threads(wisdom.nthreads) schedule(dynamic)
1480 for (n_abs = 1; n_abs <= plan->N; n_abs++)
1482 int threadid = omp_get_thread_num();
1497 for (n = -plan->N; n <= plan->N; n++)
1522 #pragma omp parallel for default(shared) private(k,n) schedule(dynamic)
1524 for (k = 0; k <= plan->N; k++)
1526 for (n = -k; n <= k; n++)
1530 sqrt((2*k+1)/(4.0*KPI));
1540 for (n = -plan->N; n <= plan->N+1; n++)
1542 memset(&plan->f_hat[
NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1543 (plan->N+1+abs(n))*
sizeof(
double _Complex));
1557 plan->plan_nfft.x = plan->x;
1561 nfft_precompute_one_psi(&plan->plan_nfft);
#define FPT_PERSISTENT_DATA
If set, TODO complete comment.
*We expand this macro for each supported precision * X
void fpt_transposed(fpt_set set, const int m, double _Complex *x, double _Complex *y, const int k_end, const unsigned int flags)
#define FPT_AL_SYMMETRY
If set, TODO complete comment.
fpt_set fpt_init(const int M, const int t, const unsigned int flags)
void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, double *gam, int k_start, const double threshold)
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
static struct nfsft_wisdom wisdom
The global wisdom structure for precomputed data.
static void c2e_transposed(nfsft_plan *plan)
Transposed version of the function c2e.
double * gamma
Precomputed recursion coefficients /f$\gamma_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
fpt_set set
Structure for discrete polynomial transform (DPT)
#define NFSFT_DEFAULT_NFFT_CUTOFF
The default NFFT cutoff parameter.
void nfsft_init_advanced(nfsft_plan *plan, int N, int M, unsigned int flags)
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials.
#define NFSFT_BREAK_EVEN
The break-even bandwidth .
void nfsft_trafo(nfsft_plan *plan)
void nfsft_init(nfsft_plan *plan, int N, int M)
void alpha_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
double * beta
Precomputed recursion coefficients /f$\beta_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
double * alpha
Precomputed recursion coefficients /f$\alpha_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
void nfsft_adjoint(nfsft_plan *plan)
void gamma_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
#define NFSFT_INDEX(k, n, plan)
#define NFSFT_NO_DIRECT_ALGORITHM
#define NFSFT_NO_FAST_ALGORITHM
void nfsft_finalize(nfsft_plan *plan)
#define NFSFT_MALLOC_F_HAT
bool initialized
Indicates wether the structure has been initialized.
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
int N_MAX
Stores precomputation flags.
#define NFSFT_PRESERVE_F_HAT
int T_MAX
The logarithm /f$t = \log_2 N_{\text{max}}/f$ of the maximum bandwidth.
void beta_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Internal header file for auxiliary definitions and functions.
Header file with internal API of the NFSFT module.
Header file for the nfft3 library.