43 #undef FPT_CLENSHAW_USE_ONLY_LONG_DOUBLE
48 #define K_START_TILDE(x,y) (MAX(MIN(x,y-2),0))
51 #define K_END_TILDE(x,y) MIN(x,y-1)
54 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
57 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
59 #define N_TILDE(y) (y-1)
61 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
63 #define FPT_BREAK_EVEN 4
68 static inline void abuvxpwy(
double a,
double b,
double _Complex* u,
double _Complex* x,
69 double* v,
double _Complex* y,
double* w,
int n)
71 int l;
double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
72 double *v_ptr = v, *w_ptr = w;
73 for (l = 0; l < n; l++)
74 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
77 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
78 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
79 double* v, double _Complex* y, double* w, int n) \
81 const int n2 = n>>1; \
82 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
83 double *v_ptr = v, *w_ptr = w; \
84 for (l = 0; l < n2; l++) \
85 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
87 for (l = 0; l < n2; l++) \
88 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
91 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
92 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
94 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
95 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
96 double* v, double _Complex* y, int n, double *xx) \
98 const int n2 = n>>1; \
99 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
100 double *v_ptr = v, *xx_ptr = xx; \
101 for (l = 0; l < n2; l++, v_ptr++) \
102 *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
104 for (l = 0; l < n2; l++, v_ptr--) \
105 *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
108 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
109 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
111 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
112 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
113 double _Complex* y, double* w, int n, double *xx) \
115 const int n2 = n>>1; \
116 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
117 double *w_ptr = w, *xx_ptr = xx; \
118 for (l = 0; l < n2; l++, w_ptr++) \
119 *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
121 for (l = 0; l < n2; l++, w_ptr--) \
122 *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
125 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
126 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
128 static inline
void auvxpwy(
double a,
double _Complex* u,
double _Complex* x,
double* v,
129 double _Complex* y,
double* w,
int n)
132 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
133 double *v_ptr = v, *w_ptr = w;
134 for (l = n; l > 0; l--)
135 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
138 static inline void auvxpwy_symmetric(
double a,
double _Complex* u,
double _Complex* x,
139 double* v,
double _Complex* y,
double* w,
int n)
141 const int n2 = n>>1; \
143 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
144 double *v_ptr = v, *w_ptr = w;
145 for (l = n2; l > 0; l--)
146 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
148 for (l = n2; l > 0; l--)
149 *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
152 static inline void auvxpwy_symmetric_1(
double a,
double _Complex* u,
double _Complex* x,
153 double* v,
double _Complex* y,
double* w,
int n,
double *xx)
155 const int n2 = n>>1; \
157 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
158 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
159 for (l = n2; l > 0; l--, xx_ptr++)
160 *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
162 for (l = n2; l > 0; l--, xx_ptr++)
163 *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
166 static inline void auvxpwy_symmetric_2(
double a,
double _Complex* u,
double _Complex* x,
167 double* v,
double _Complex* y,
double* w,
int n,
double *xx)
169 const int n2 = n>>1; \
171 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
172 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
173 for (l = n2; l > 0; l--, xx_ptr++)
174 *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
176 for (l = n2; l > 0; l--, xx_ptr++)
177 *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
180 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
181 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
182 double *a21, double *a22, double g, int tau, fpt_set set) \
185 int length = 1<<(tau+1); \
187 double norm = 1.0/(length<<1); \
194 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
195 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
208 M2_FUNCTION(norm,b,b,a22,a,a21,length); \
214 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
215 M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
216 memcpy(b,set->z,length*sizeof(double _Complex)); \
218 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
224 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
229 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
230 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
234 static inline void fpt_do_step_symmetric_u(
double _Complex *a,
double _Complex *b,
235 double *a11,
double *a12,
double *a21,
double *a22,
double *x,
236 double gam,
int tau, fpt_set set)
239 int length = 1<<(tau+1);
241 double norm = 1.0/(length<<1);
250 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
251 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
264 auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
270 auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
271 auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
272 memcpy(b,set->z,length*
sizeof(
double _Complex));
274 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
280 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
285 static inline void fpt_do_step_symmetric_l(
double _Complex *a,
double _Complex *b,
286 double *a11,
double *a12,
double *a21,
double *a22,
double *x,
double gam,
int tau, fpt_set set)
289 int length = 1<<(tau+1);
291 double norm = 1.0/(length<<1);
300 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
301 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
314 auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
320 auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
321 auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
322 memcpy(b,set->z,length*
sizeof(
double _Complex));
324 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
330 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
335 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
336 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
337 double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
340 int length = 1<<(tau+1); \
342 double norm = 1.0/(length<<1); \
345 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
346 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
349 M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
350 M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
351 memcpy(a,set->z,length*sizeof(double _Complex)); \
354 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
355 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
358 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
359 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
364 static inline void fpt_do_step_t_symmetric_u(
double _Complex *a,
365 double _Complex *b,
double *a11,
double *a12,
double *x,
366 double gam,
int tau, fpt_set set)
369 int length = 1<<(tau+1);
371 double norm = 1.0/(length<<1);
374 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
375 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
378 abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
379 abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
380 memcpy(a,set->z,length*
sizeof(
double _Complex));
383 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
384 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
387 static inline void fpt_do_step_t_symmetric_l(
double _Complex *a,
388 double _Complex *b,
double *a21,
double *a22,
389 double *x,
double gam,
int tau, fpt_set set)
392 int length = 1<<(tau+1);
394 double norm = 1.0/(length<<1);
397 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)a,(
double*)a);
398 fftw_execute_r2r(set->plans_dct3[tau-1],(
double*)b,(
double*)b);
401 abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
402 abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
403 memcpy(a,set->z,length*
sizeof(
double _Complex));
406 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)a,(
double*)a);
407 fftw_execute_r2r(set->plans_dct2[tau-1],(
double*)b,(
double*)b);
411 static void eval_clenshaw(
const double *x,
double *y,
int size,
int k,
const double *alpha,
412 const double *beta,
const double *gam)
420 const double *alpha_act, *beta_act, *gamma_act;
425 for (i = 0; i < size; i++)
427 double x_val_act = *x_act;
435 alpha_act = &(alpha[k]);
436 beta_act = &(beta[k]);
437 gamma_act = &(gam[k]);
439 #ifdef FPT_CLENSHAW_USE_ONLY_LONG_DOUBLE
442 for (j = k; j > 1; j--)
444 long double a_old = a;
445 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
446 b = a_old*(*gamma_act);
451 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
456 for (j = k; j > 1 && fabs(a) < 1e247; j--)
459 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
460 b = a_old*(*gamma_act);
466 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
469 long double a_ld = a;
470 long double b_ld = b;
473 long double a_old = a_ld;
474 a_ld = b_ld + a_old*((*alpha_act)*x_val_act+(*beta_act));
475 b_ld = a_old*(*gamma_act);
480 *y_act = (a_ld*((*alpha_act)*x_val_act+(*beta_act))+b_ld);
490 static void eval_clenshaw2(
const double *x,
double *z,
double *y,
int size1,
int size,
int k,
const double *alpha,
491 const double *beta,
const double *gam)
497 double a,b,x_val_act,a_old;
499 double *y_act, *z_act;
500 const double *alpha_act, *beta_act, *gamma_act;
506 for (i = 0; i < size; i++)
519 alpha_act = &(alpha[k]);
520 beta_act = &(beta[k]);
521 gamma_act = &(gam[k]);
522 for (j = k; j > 1; j--)
525 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
526 b = a_old*(*gamma_act);
533 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
546 static int eval_clenshaw_thresh2(
const double *x,
double *z,
double *y,
int size,
int k,
547 const double *alpha,
const double *beta,
const double *gam,
const
556 double *y_act, *z_act;
557 const double *alpha_act, *beta_act, *gamma_act;
558 const R threshold_abs = FABS(threshold);
564 for (i = 0; i < size; i++)
575 alpha_act = &(alpha[k]);
576 beta_act = &(beta[k]);
577 gamma_act = &(gam[k]);
579 #ifdef FPT_CLENSHAW_USE_ONLY_LONG_DOUBLE
582 for (j = k; j > 1; j--)
584 long double a_old = a;
585 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
586 b = a_old*(*gamma_act);
592 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
596 for (j = k; j > 1 && fabs(a) < 1e247; j--)
599 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
600 b = a_old*(*gamma_act);
608 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
612 long double a_ld = a;
613 long double b_ld = b;
616 long double a_old = a_ld;
617 a_ld = b_ld + a_old*((*alpha_act)*x_val_act+(*beta_act));
618 b_ld = a_old*(*gamma_act);
624 *y_act = (a_ld*((*alpha_act)*x_val_act+(*beta_act))+b_ld);
627 if (FABS(*y_act) > threshold_abs)
637 static inline void eval_sum_clenshaw_fast(
const int N,
const int M,
638 const double _Complex *a,
const double *x,
double _Complex *y,
639 const double *alpha,
const double *beta,
const double *gam,
645 for (j = 0; j <= M; j++)
649 for (j = 0; j <= M; j++)
652 #ifdef FPT_CLENSHAW_USE_ONLY_LONG_DOUBLE
653 long double _Complex tmp1 = a[N-1];
654 long double _Complex tmp2 = a[N];
655 for (k = N-1; k > 0; k--)
657 long double _Complex tmp3 = a[k-1] + tmp2 * gam[k];
658 tmp2 *= (alpha[k] * xc + beta[k]);
662 tmp2 *= (alpha[0] * xc + beta[0]);
663 y[j] = lambda * (tmp2 + tmp1);
665 double _Complex tmp1 = a[N-1];
666 double _Complex tmp2 = a[N];
668 for (k = N-1; k > 0 && fabs(creal(tmp2)) < 1e247 && fabs(cimag(tmp2)) < 1e247; k--)
670 double _Complex tmp3 = a[k-1] + tmp2 * gam[k];
671 tmp2 *= (alpha[k] * xc + beta[k]);
677 tmp2 *= (alpha[0] * xc + beta[0]);
678 y[j] = lambda * (tmp2 + tmp1);
682 long double _Complex tmp1_ld = tmp1;
683 long double _Complex tmp2_ld = tmp2;
686 long double _Complex tmp3_ld = a[k-1] + tmp2_ld * gam[k];
687 tmp2_ld *= (alpha[k] * xc + beta[k]);
691 tmp2_ld *= (alpha[0] * xc + beta[0]);
692 y[j] = lambda * (tmp2_ld + tmp1_ld);
718 double _Complex *y,
double _Complex *temp,
double *alpha,
double *beta,
double *gam,
722 double _Complex* it1 = temp;
723 double _Complex* it2 = y;
728 for (j = 0; j <= M; j++)
730 it2[j] = lambda * y[j];
738 for (j = 0; j <= M; j++)
741 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
745 for (k = 2; k <= N; k++)
748 for (j = 0; j <= M; j++)
752 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
759 static void eval_sum_clenshaw_transposed_ld(
int N,
int M,
double _Complex* a,
double *x,
760 double _Complex *y,
double _Complex *temp,
double *alpha,
double *beta,
double *gam,
765 for (k = 0; k <= N; k++)
769 for (j = 0; j <= M; j++)
770 a[0] += lambda * y[j];
773 for (j = 0; j <= M; j++)
776 long double _Complex it2 = lambda * y[j];
780 long double _Complex it1 = it2;
781 it2 = it2 * (alpha[0] * x[j] + beta[0]);
784 for (k = 2; k <= N; k++)
786 long double _Complex aux = it1;
788 it2 = it2*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
795 fpt_set
fpt_init(
const int M,
const int t,
const unsigned int flags)
805 int nthreads =
X(get_num_threads)();
818 if (!(flags & FPT_NO_INIT_FPT_DATA))
824 for (m = 0; m < set->
M; m++)
827 set->
dpt[m].precomputed =
false;
841 set->
xcvecs = (
double**) nfft_malloc((set->
t)*
sizeof(
double*));
845 for (tau = 1; tau < t+1; tau++)
848 set->
xcvecs[tau-1] = (
double*) nfft_malloc(plength*
sizeof(
double));
849 for (k = 0; k < plength; k++)
851 set->
xcvecs[tau-1][k] = cos(((k+0.5)*KPI)/plength);
853 plength = plength << 1;
857 set->work = (
double _Complex*) nfft_malloc((2*set->
N)*
sizeof(
double _Complex));
858 set->result = (
double _Complex*) nfft_malloc((2*set->
N)*
sizeof(
double _Complex));
860 set->
plans_dct2 = (fftw_plan*) nfft_malloc(
sizeof(fftw_plan)*(set->
t));
861 set->
kindsr = (fftw_r2r_kind*) nfft_malloc(2*
sizeof(fftw_r2r_kind));
862 set->
kindsr[0] = FFTW_REDFT10;
863 set->
kindsr[1] = FFTW_REDFT10;
864 for (tau = 0, plength = 4; tau < set->
t; tau++, plength<<=1)
867 #pragma omp critical (nfft_omp_critical_fftw_plan)
869 fftw_plan_with_nthreads(nthreads);
872 fftw_plan_many_r2r(1, &plength, 2, (
double*)set->work, NULL,
873 2, 1, (
double*)set->result, NULL, 2, 1,set->
kindsr,
881 set->
plans_dct3 = (fftw_plan*) nfft_malloc(
sizeof(fftw_plan)*(set->
t));
882 set->
kinds = (fftw_r2r_kind*) nfft_malloc(2*
sizeof(fftw_r2r_kind));
883 set->
kinds[0] = FFTW_REDFT01;
884 set->
kinds[1] = FFTW_REDFT01;
885 for (tau = 0, plength = 4; tau < set->
t; tau++, plength<<=1)
888 #pragma omp critical (nfft_omp_critical_fftw_plan)
890 fftw_plan_with_nthreads(nthreads);
893 fftw_plan_many_r2r(1, &plength, 2, (
double*)set->work, NULL,
894 2, 1, (
double*)set->result, NULL, 2, 1, set->
kinds,
900 nfft_free(set->
kinds);
916 set->vec3 = (
double _Complex*) nfft_malloc(set->
N*
sizeof(
double _Complex));
917 set->vec4 = (
double _Complex*) nfft_malloc(set->
N*
sizeof(
double _Complex));
918 set->z = (
double _Complex*) nfft_malloc(set->
N*
sizeof(
double _Complex));
923 set->xc_slow = (
double*) nfft_malloc((set->
N+1)*
sizeof(double));
924 set->temp = (
double _Complex*) nfft_malloc((set->
N+1)*
sizeof(
double _Complex));
926 if (!(flags & FPT_NO_INIT_FPT_DATA))
928 for (m = 0; m < set->
M; m++)
942 void fpt_precompute_1(fpt_set set,
const int m,
int k_start)
958 data = &(set->dpt[m]);
961 if (data->
steps != NULL)
974 data->
alphaN = (
double*) nfft_malloc(3*(set->t-1)*
sizeof(double));
980 N_tilde = N_TILDE(set->N);
986 for (tau = 1; tau < set->t; tau++)
991 firstl =
FIRST_L(k_start_tilde,plength);
993 lastl =
LAST_L(N_tilde,plength);
1001 for (l = firstl; l <= lastl; l++)
1005 clength = plength/2;
1013 data->
steps[tau][l].
a = (
double*) nfft_malloc(
sizeof(
double)*clength*4);
1016 plength = plength << 1;
1022 data->
_alpha = (
double*) nfft_malloc(3*(set->N+1)*
sizeof(double));
1028 void fpt_precompute_2(fpt_set set,
const int m,
double *alpha,
double *beta,
1029 double *gam,
int k_start,
const double threshold)
1052 const double *calpha;
1053 const double *cbeta;
1054 const double *cgamma;
1065 data = &(set->dpt[m]);
1068 if ((data->
steps != NULL) && (data->precomputed))
1088 for (tau = 2; tau <= set->t; tau++)
1091 data->
alphaN[tau-2] = alpha[1<<tau];
1092 data->
betaN[tau-2] = beta[1<<tau];
1093 data->
gammaN[tau-2] = gam[1<<tau];
1101 N_tilde = N_TILDE(set->N);
1108 for (tau = 1; tau < set->t; tau++)
1111 degree = plength>>1;
1113 firstl =
FIRST_L(k_start_tilde,plength);
1115 lastl =
LAST_L(N_tilde,plength);
1123 for (l = firstl; l <= lastl; l++)
1129 clength = plength/2;
1146 calpha = &(alpha[plength*l+1+1]);
1147 cbeta = &(beta[plength*l+1+1]);
1148 cgamma = &(gam[plength*l+1+1]);
1150 double *a11 = data->
steps[tau][l].
a;
1151 double *a12 = a11+clength;
1152 double *a21 = a12+clength;
1153 double *a22 = a21+clength;
1161 eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
1163 eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
1173 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
1174 degree-1, calpha, cbeta, cgamma, threshold);
1178 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
1179 degree, calpha, cbeta, cgamma, threshold);
1188 data->
steps[tau][l].g = gam[plength*l+1+1];
1194 degree_stab = degree*(2*l+1);
1195 X(next_power_of_2_exp_int)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
1198 nfft_free(data->
steps[tau][l].
a);
1204 plength_stab = N_stab;
1211 clength_1 = plength_stab;
1212 clength_2 = plength_stab;
1214 data->
steps[tau][l].
a = (
double*) nfft_malloc(
sizeof(
double)*(clength_1*2+clength_2*2));
1215 a11 = data->
steps[tau][l].
a;
1216 a12 = a11+clength_1;
1217 a21 = a12+clength_1;
1218 a22 = a21+clength_2;
1220 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1221 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
1222 clength_2, degree_stab-1, calpha, cbeta, cgamma);
1223 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
1224 clength_2, degree_stab+0, calpha, cbeta, cgamma);
1228 clength = plength_stab/2;
1229 data->
steps[tau][l].
a = (
double*) nfft_malloc(
sizeof(
double)*clength*2);
1232 a11 = data->
steps[tau][l].
a;
1234 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
1235 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
1236 degree_stab-2, calpha, cbeta, cgamma);
1237 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
1238 degree_stab-1, calpha, cbeta, cgamma);
1242 a21 = data->
steps[tau][l].
a;
1244 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1245 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
1246 degree_stab-1,calpha, cbeta, cgamma);
1247 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
1248 degree_stab+0, calpha, cbeta, cgamma);
1254 clength_1 = plength_stab;
1255 clength_2 = plength_stab;
1256 data->
steps[tau][l].
a = (
double*) nfft_malloc(
sizeof(
double)*(clength_1*2+clength_2*2));
1257 a11 = data->
steps[tau][l].
a;
1258 a12 = a11+clength_1;
1259 a21 = a12+clength_1;
1260 a22 = a21+clength_2;
1261 calpha = &(alpha[2]);
1267 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
1268 calpha, cbeta, cgamma);
1269 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
1270 calpha, cbeta, cgamma);
1274 data->
steps[tau][l].g = gam[1+1];
1276 data->
steps[tau][l].
ts = t_stab;
1277 data->
steps[tau][l].
Ns = N_stab;
1281 plength = plength << 1;
1283 data->precomputed =
true;
1291 data->
_alpha = (
double*) alpha;
1292 data->
_beta = (
double*) beta;
1293 data->
_gamma = (
double*) gam;
1300 memcpy(data->
_alpha,alpha,(set->N+1)*
sizeof(
double));
1301 memcpy(data->
_beta,beta,(set->N+1)*
sizeof(
double));
1302 memcpy(data->
_gamma,gam,(set->N+1)*
sizeof(
double));
1308 double *gam,
int k_start,
const double threshold)
1310 fpt_precompute_1(set, m, k_start);
1311 fpt_precompute_2(set, m, alpha, beta, gam, k_start, threshold);
1314 void fpt_trafo_direct(fpt_set set,
const int m,
const double _Complex *x,
double _Complex *y,
1315 const int k_end,
const unsigned int flags)
1325 X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk);
1338 for (j = 0; j <= k_end; j++)
1340 set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1344 memset(set->result,0U,data->
k_start*
sizeof(
double _Complex));
1345 memcpy(&set->result[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1350 eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
1355 memset(set->temp,0U,data->
k_start*
sizeof(
double _Complex));
1356 memcpy(&set->temp[data->
k_start],x,(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1358 eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
1362 fftw_execute_r2r(set->plans_dct2[tk-2],(
double*)set->result,
1363 (
double*)set->result);
1365 set->result[0] *= 0.5;
1366 for (j = 0; j < Nk; j++)
1368 set->result[j] *= norm;
1371 memcpy(y,set->result,(k_end+1)*
sizeof(
double _Complex));
1375 void fpt_trafo(fpt_set set,
const int m,
const double _Complex *x,
double _Complex *y,
1376 const int k_end,
const unsigned int flags)
1406 int length = k_end+1;
1407 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
1412 double _Complex *work_ptr;
1413 const double _Complex *x_ptr;
1416 if (k_end < FPT_BREAK_EVEN)
1419 fpt_trafo_direct(set, m, x, y, k_end, flags);
1423 X(next_power_of_2_exp_int)(k_end,&Nk,&tk);
1434 int nthreads =
X(get_num_threads)();
1435 #pragma omp critical (nfft_omp_critical_fftw_plan)
1437 fftw_plan_with_nthreads(nthreads);
1439 plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
1440 (
double*)set->work, NULL, 2, 1, kinds, 0U);
1447 memset(set->result,0U,2*Nk*
sizeof(
double _Complex));
1452 memset(set->work,0U,2*data->
k_start*
sizeof(
double _Complex));
1454 work_ptr = &set->work[2*data->
k_start];
1457 for (k = 0; k <= k_end_tilde-data->
k_start; k++)
1459 *work_ptr++ = *x_ptr++;
1460 *work_ptr++ = K(0.0);
1464 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*
sizeof(
double _Complex));
1470 set->work[2*(Nk-2)] += data->
gammaN[tk-2]*x[Nk-data->
k_start];
1471 set->work[2*(Nk-1)] += data->
betaN[tk-2]*x[Nk-data->
k_start];
1472 set->work[2*(Nk-1)+1] = data->
alphaN[tk-2]*x[Nk-data->
k_start];
1477 for (tau = 1; tau < tk; tau++)
1480 firstl =
FIRST_L(k_start_tilde,plength);
1482 lastl =
LAST_L(k_end_tilde,plength);
1485 for (l = firstl; l <= lastl; l++)
1488 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*
sizeof(
double _Complex));
1489 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*
sizeof(
double _Complex));
1490 memset(&set->vec3[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
1491 memset(&set->vec4[plength/2],0U,(plength/2)*
sizeof(
double _Complex));
1494 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*
sizeof(
double _Complex));
1495 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*
sizeof(
double _Complex));
1496 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*
sizeof(
double _Complex));
1499 step = &(data->
steps[tau][l]);
1507 int clength = 1<<(tau);
1508 double *a11 = step->
a;
1509 double *a12 = a11+clength;
1510 double *a21 = a12+clength;
1511 double *a22 = a21+clength;
1519 fpt_do_step_symmetric(set->vec3, set->vec4, a11,
1520 a12, a21, a22, step->g, tau, set);
1524 int clength = 1<<(tau+1);
1525 double *a11 = step->
a;
1526 double *a12 = a11+clength;
1527 double *a21 = a12+clength;
1528 double *a22 = a21+clength;
1530 fpt_do_step(set->vec3, set->vec4, a11, a12,
1531 a21, a22, step->g, tau, set);
1536 for (k = 0; k < plength; k++)
1538 set->work[plength*2*l+k] += set->vec3[k];
1541 for (k = 0; k < plength; k++)
1543 set->work[plength*(2*l+1)+k] += set->vec4[k];
1551 plength_stab = step->
Ns;
1564 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
1565 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*
sizeof(
double _Complex));
1573 int clength_1 = plength_stab;
1574 int clength_2 = plength_stab;
1575 double *a11 = step->
a;
1576 double *a12 = a11+clength_1;
1577 double *a21 = a12+clength_1;
1578 double *a22 = a21+clength_2;
1579 fpt_do_step_symmetric(set->vec3, set->vec4, a11, a12,
1580 a21, a22, step->g, t_stab-1, set);
1584 int clength = plength_stab/2;
1585 double *a11 = step->
a;
1586 double *a12 = a11+clength;
1589 fpt_do_step_symmetric_u(set->vec3, set->vec4, a11, a12,
1591 set->xcvecs[t_stab-2], step->g, t_stab-1, set);
1595 int clength = plength_stab/2;
1598 double *a21 = step->
a;
1599 double *a22 = a21+clength;
1600 fpt_do_step_symmetric_l(set->vec3, set->vec4,
1603 a22, set->xcvecs[t_stab-2], step->g, t_stab-1, set);
1608 int clength_1 = plength_stab;
1609 int clength_2 = plength_stab;
1610 double *a11 = step->
a;
1611 double *a12 = a11+clength_1;
1612 double *a21 = a12+clength_1;
1613 double *a22 = a21+clength_2;
1614 fpt_do_step(set->vec3, set->vec4, a11, a12,
1615 a21, a22, step->g, t_stab-1, set);
1620 for (k = 0; k < plength_stab; k++)
1622 set->result[k] += set->vec3[k];
1626 for (k = 0; k < plength_stab; k++)
1628 set->result[Nk+k] += set->vec4[k];
1633 plength = plength<<1;
1647 for (k = 0; k < 2*Nk; k++)
1649 set->result[k] += set->work[k];
1654 y[0] = data->
gamma_m1*(set->result[0] + data->
beta_0*set->result[Nk] +
1655 data->
alpha_0*set->result[Nk+1]*0.5);
1656 y[1] = data->
gamma_m1*(set->result[1] + data->
beta_0*set->result[Nk+1]+
1657 data->
alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
1658 y[k_end-1] = data->
gamma_m1*(set->result[k_end-1] +
1659 data->
beta_0*set->result[Nk+k_end-1] +
1660 data->
alpha_0*set->result[Nk+k_end-2]*0.5);
1661 y[k_end] = data->
gamma_m1*(0.5*data->
alpha_0*set->result[Nk+k_end-1]);
1662 for (k = 2; k <= k_end-2; k++)
1664 y[k] = data->
gamma_m1*(set->result[k] + data->
beta_0*set->result[Nk+k] +
1665 data->
alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
1671 fftw_execute_r2r(plan,(
double*)y,(
double*)y);
1673 #pragma omp critical (nfft_omp_critical_fftw_plan)
1675 fftw_destroy_plan(plan);
1676 for (k = 0; k <= k_end; k++)
1683 void fpt_transposed_direct(fpt_set set,
const int m,
double _Complex *x,
1684 double _Complex *y,
const int k_end,
const unsigned int flags)
1692 X(next_power_of_2_exp_int)(k_end+1,&Nk,&tk);
1702 for (j = 0; j <= k_end; j++)
1704 set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1712 sizeof(
double _Complex));
1716 memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
1717 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*
sizeof(
double _Complex));
1719 for (j = 0; j < Nk; j++)
1721 set->result[j] *= norm;
1724 fftw_execute_r2r(set->plans_dct3[tk-2],(
double*)set->result,
1725 (
double*)set->result);
1728 eval_sum_clenshaw_transposed_ld(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
1736 memcpy(x,&set->temp[data->
k_start],(k_end-data->
k_start+1)*
sizeof(
double _Complex));
1741 double _Complex *y,
const int k_end,
const unsigned int flags)
1770 int length = k_end+1;
1771 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
1777 if (k_end < FPT_BREAK_EVEN)
1780 fpt_transposed_direct(set, m, x, y, k_end, flags);
1784 X(next_power_of_2_exp_int)(k_end,&Nk,&tk);
1797 int nthreads =
X(get_num_threads)();
1798 #pragma omp critical (nfft_omp_critical_fftw_plan)
1800 fftw_plan_with_nthreads(nthreads);
1802 plan = fftw_plan_many_r2r(1, &length, 2, (
double*)set->work, NULL, 2, 1,
1803 (
double*)set->work, NULL, 2, 1, kinds, 0U);
1807 fftw_execute_r2r(plan,(
double*)y,(
double*)set->result);
1809 #pragma omp critical (nfft_omp_critical_fftw_plan)
1811 fftw_destroy_plan(plan);
1812 for (k = 0; k <= k_end; k++)
1814 set->result[k] *= 0.5;
1819 memcpy(set->result,y,(k_end+1)*
sizeof(
double _Complex));
1823 memset(set->work,0U,2*Nk*
sizeof(
double _Complex));
1826 for (k = 0; k <= k_end; k++)
1828 set->work[k] = data->
gamma_m1*set->result[k];
1833 data->
alpha_0*set->result[1]);
1834 for (k = 1; k < k_end; k++)
1837 data->
alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
1841 memset(&set->work[k_end],0U,(Nk-k_end)*
sizeof(
double _Complex));
1845 memcpy(set->result,set->work,2*Nk*
sizeof(
double _Complex));
1849 for (tau = tk-1; tau >= 1; tau--)
1852 firstl =
FIRST_L(k_start_tilde,plength);
1854 lastl =
LAST_L(k_end_tilde,plength);
1857 for (l = firstl; l <= lastl; l++)
1860 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*
sizeof(
double _Complex));
1861 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*
sizeof(
double _Complex));
1863 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
1864 (plength/2)*
sizeof(
double _Complex));
1867 step = &(data->
steps[tau][l]);
1875 int clength = 1<<(tau);
1876 double *a11 = step->
a;
1877 double *a12 = a11+clength;
1878 double *a21 = a12+clength;
1879 double *a22 = a21+clength;
1880 fpt_do_step_t_symmetric(set->vec3, set->vec4, a11, a12,
1881 a21, a22, step->g, tau, set);
1886 int clength = 1<<(tau+1);
1887 double *a11 = step->
a;
1888 double *a12 = a11+clength;
1889 double *a21 = a12+clength;
1890 double *a22 = a21+clength;
1891 fpt_do_step_t(set->vec3, set->vec4, a11, a12,
1892 a21, a22, step->g, tau, set);
1894 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*
sizeof(
double _Complex));
1896 for (k = 0; k < plength; k++)
1898 set->work[plength*(4*l+2)/2+k] = set->vec3[k];
1904 plength_stab = step->
Ns;
1907 memcpy(set->vec3,set->result,plength_stab*
sizeof(
double _Complex));
1908 memcpy(set->vec4,&(set->result[Nk]),plength_stab*
sizeof(
double _Complex));
1915 int clength_1 = plength_stab;
1916 int clength_2 = plength_stab;
1917 double *a11 = step->
a;
1918 double *a12 = a11+clength_1;
1919 double *a21 = a12+clength_1;
1920 double *a22 = a21+clength_2;
1921 fpt_do_step_t_symmetric(set->vec3, set->vec4, a11, a12,
1922 a21, a22, step->g, t_stab-1, set);
1926 int clength = plength_stab/2;
1927 double *a11 = step->
a;
1928 double *a12 = a11+clength;
1929 fpt_do_step_t_symmetric_u(set->vec3, set->vec4, a11, a12,
1930 set->xcvecs[t_stab-2], step->g, t_stab-1, set);
1934 int clength = plength_stab/2;
1935 double *a21 = step->
a;
1936 double *a22 = a21+clength;
1937 fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
1938 a21, a22, set->xcvecs[t_stab-2], step->g, t_stab-1, set);
1943 int clength_1 = plength_stab;
1944 int clength_2 = plength_stab;
1945 double *a11 = step->
a;
1946 double *a12 = a11+clength_1;
1947 double *a21 = a12+clength_1;
1948 double *a22 = a21+clength_2;
1949 fpt_do_step_t(set->vec3, set->vec4, a11, a12,
1950 a21, a22, step->g, t_stab-1, set);
1953 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*
sizeof(
double _Complex));
1955 for (k = 0; k < plength; k++)
1957 set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
1962 plength = plength>>1;
1966 for (k = 0; k <= k_end_tilde-data->
k_start; k++)
1968 x[k] = set->work[2*(data->
k_start+k)];
1973 data->
gammaN[tk-2]*set->work[2*(Nk-2)]
1974 + data->
betaN[tk-2] *set->work[2*(Nk-1)]
1975 + data->
alphaN[tk-2]*set->work[2*(Nk-1)+1];
1979 void fpt_finalize(fpt_set set)
1988 const int M = set->M;
1991 if (!(set->flags & FPT_NO_INIT_FPT_DATA))
1993 for (m = 0; m < M; m++)
2010 N_tilde = N_TILDE(set->N);
2012 for (tau = 1; tau < set->t; tau++)
2015 firstl =
FIRST_L(k_start_tilde,plength);
2017 lastl =
LAST_L(N_tilde,plength);
2020 for (l = firstl; l <= lastl; l++)
2023 if (data->
steps[tau][l].
a != NULL)
2025 nfft_free(data->
steps[tau][l].
a);
2026 data->
steps[tau][l].
a = NULL;
2030 nfft_free(data->
steps[tau]);
2031 data->
steps[tau] = NULL;
2033 plength = plength<<1;
2036 nfft_free(data->
steps);
2045 if (data->
_alpha != NULL)
2055 nfft_free(set->dpt);
2059 for (tau = 1; tau < set->t+1; tau++)
2061 nfft_free(set->xcvecs[tau-1]);
2062 set->xcvecs[tau-1] = NULL;
2064 nfft_free(set->xcvecs);
2068 nfft_free(set->work);
2069 nfft_free(set->result);
2074 for(tau = 0; tau < set->t; tau++)
2077 #pragma omp critical (nfft_omp_critical_fftw_plan)
2080 fftw_destroy_plan(set->plans_dct3[tau]);
2081 fftw_destroy_plan(set->plans_dct2[tau]);
2083 set->plans_dct3[tau] = NULL;
2084 set->plans_dct2[tau] = NULL;
2087 nfft_free(set->plans_dct3);
2088 nfft_free(set->plans_dct2);
2089 set->plans_dct3 = NULL;
2090 set->plans_dct2 = NULL;
2096 nfft_free(set->vec3);
2097 nfft_free(set->vec4);
2107 nfft_free(set->xc_slow);
2108 set->xc_slow = NULL;
2109 nfft_free(set->temp);
static void eval_sum_clenshaw_transposed(int N, int M, double _Complex *a, double *x, double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam, double lambda)
Clenshaw algorithm.
#define K_START_TILDE(x, y)
If defined, perform critical computations in three-term recurrence always in long double instead of u...
#define K_END_TILDE(x, y)
Maximum degree at top of a cascade.
#define FIRST_L(x, y)
Index of first block of four functions at level.
#define LAST_L(x, y)
Index of last block of four functions at level.
#define FPT_PERSISTENT_DATA
If set, TODO complete comment.
#define FPT_NO_FAST_ALGORITHM
If set, TODO complete comment.
#define FPT_NO_STABILIZATION
If set, no stabilization will be used.
*We expand this macro for each supported precision * X
void fpt_transposed(fpt_set set, const int m, double _Complex *x, double _Complex *y, const int k_end, const unsigned int flags)
#define FPT_AL_SYMMETRY
If set, TODO complete comment.
fpt_set fpt_init(const int M, const int t, const unsigned int flags)
void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta, double *gam, int k_start, const double threshold)
#define FPT_FUNCTION_VALUES
If set, the output are function values at Chebyshev nodes rather than Chebyshev coefficients.
#define FPT_NO_DIRECT_ALGORITHM
If set, TODO complete comment.
#define UNUSED(x)
Dummy use of unused parameters to silence compiler warnings.
Internal header file for auxiliary definitions and functions.
Header file for the nfft3 library.
Holds data for a single cascade summation.
double * _alpha
< TODO Add comment here.
double * _gamma
TODO Add comment here.
double * gammaN
TODO Add comment here.
fpt_step ** steps
The cascade summation steps
double beta_0
TODO Add comment here.
double * alphaN
TODO Add comment here.
double * betaN
TODO Add comment here.
double gamma_m1
TODO Add comment here.
double alpha_0
TODO Add comment here.
double * _beta
TODO Add comment here.
int k_start
TODO Add comment here.
Holds data for a set of cascade summations.
double ** xcvecs
Array of pointers to arrays containing the Chebyshev nodes
int M
The number of DPT transforms
unsigned int flags
The flags
fftw_r2r_kind * kinds
Transform kinds for fftw library
fftw_plan * plans_dct2
Transform plans for the fftw library
fpt_data * dpt
The DPT transform data
int N
The transform length.
fftw_r2r_kind * kindsr
Transform kinds for fftw library
fftw_plan * plans_dct3
Transform plans for the fftw library
Holds data for a single multiplication step in the cascade summation.
double * a
The matrix components
bool stable
Indicates if the values contained represent a fast or a slow stabilized step.
int Ns
TODO Add comment here.
int ts
TODO Add comment here.