NFFT  3.5.3
fastsum_benchomp_detail.c
1 /*
2  * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 #include <stdio.h>
20 #include <math.h>
21 #include <string.h>
22 #include <stdlib.h>
23 #include <complex.h>
24 
25 #include "nfft3.h"
26 #include "infft.h"
27 #include "fastsum.h"
28 #include "kernels.h"
29 #ifdef _OPENMP
30 #include <omp.h>
31 #endif
32 
33 int bench_openmp(FILE *infile, int n, int m, int p,
34  C (*kernel)(R, int, const R *), R c, R eps_I, R eps_B)
35 {
36  fastsum_plan my_fastsum_plan;
37  int d, L, M;
38  int t, j;
39  R re, im;
40  R r_max = K(0.25) - my_fastsum_plan.eps_B / K(2.0);
41  ticks t0, t1;
42  R tt_total;
43 
44  fscanf(infile, "%d %d %d", &d, &L, &M);
45 
46 #ifdef _OPENMP
47  FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_threads.plan");
48 #else
49  FFTW(import_wisdom_from_filename)("fastsum_benchomp_detail_single.plan");
50 #endif
51 
52  fastsum_init_guru(&my_fastsum_plan, d, L, M, kernel, &c, NEARFIELD_BOXES, n,
53  m, p, eps_I, eps_B);
54 
55 #ifdef _OPENMP
56  FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_threads.plan");
57 #else
58  FFTW(export_wisdom_to_filename)("fastsum_benchomp_detail_single.plan");
59 #endif
60 
61  for (j = 0; j < L; j++)
62  {
63  for (t = 0; t < d; t++)
64  {
65  R v;
66  fscanf(infile, __FR__, &v);
67  my_fastsum_plan.x[d * j + t] = v * r_max;
68  }
69  }
70 
71  for (j = 0; j < L; j++)
72  {
73  fscanf(infile, __FR__ " " __FR__, &re, &im);
74  my_fastsum_plan.alpha[j] = re + II * im;
75  }
76 
77  for (j = 0; j < M; j++)
78  {
79  for (t = 0; t < d; t++)
80  {
81  R v;
82  fscanf(infile, __FR__, &v);
83  my_fastsum_plan.y[d * j + t] = v * r_max;
84  }
85  }
86 
88  t0 = getticks();
89  fastsum_precompute(&my_fastsum_plan);
90 
92  fastsum_trafo(&my_fastsum_plan);
93  t1 = getticks();
94  tt_total = NFFT(elapsed_seconds)(t1, t0);
95 
96 #ifndef MEASURE_TIME
97  my_fastsum_plan.MEASURE_TIME_t[0] = K(0.0);
98  my_fastsum_plan.MEASURE_TIME_t[1] = K(0.0);
99  my_fastsum_plan.MEASURE_TIME_t[2] = K(0.0);
100  my_fastsum_plan.MEASURE_TIME_t[3] = K(0.0);
101  my_fastsum_plan.MEASURE_TIME_t[4] = K(0.0);
102  my_fastsum_plan.MEASURE_TIME_t[5] = K(0.0);
103  my_fastsum_plan.MEASURE_TIME_t[6] = K(0.0);
104  my_fastsum_plan.MEASURE_TIME_t[7] = K(0.0);
105  my_fastsum_plan.mv1.MEASURE_TIME_t[0] = K(0.0);
106  my_fastsum_plan.mv1.MEASURE_TIME_t[2] = K(0.0);
107  my_fastsum_plan.mv2.MEASURE_TIME_t[0] = K(0.0);
108  my_fastsum_plan.mv2.MEASURE_TIME_t[2] = K(0.0);
109 #endif
110 #ifndef MEASURE_TIME_FFTW
111  my_fastsum_plan.mv1.MEASURE_TIME_t[1] = K(0.0);
112  my_fastsum_plan.mv2.MEASURE_TIME_t[1] = K(0.0);
113 #endif
114 
115  printf(
116  "%.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ " %.6" __FES__ " %.6" __FES__ " %6" __FES__ "\n",
117  my_fastsum_plan.MEASURE_TIME_t[0], my_fastsum_plan.MEASURE_TIME_t[1],
118  my_fastsum_plan.MEASURE_TIME_t[2], my_fastsum_plan.MEASURE_TIME_t[3],
119  my_fastsum_plan.MEASURE_TIME_t[4], my_fastsum_plan.MEASURE_TIME_t[5],
120  my_fastsum_plan.MEASURE_TIME_t[6], my_fastsum_plan.MEASURE_TIME_t[7],
121  tt_total - my_fastsum_plan.MEASURE_TIME_t[0]
122  - my_fastsum_plan.MEASURE_TIME_t[1]
123  - my_fastsum_plan.MEASURE_TIME_t[2]
124  - my_fastsum_plan.MEASURE_TIME_t[3]
125  - my_fastsum_plan.MEASURE_TIME_t[4]
126  - my_fastsum_plan.MEASURE_TIME_t[5]
127  - my_fastsum_plan.MEASURE_TIME_t[6]
128  - my_fastsum_plan.MEASURE_TIME_t[7], tt_total,
129  my_fastsum_plan.mv1.MEASURE_TIME_t[0],
130  my_fastsum_plan.mv1.MEASURE_TIME_t[1],
131  my_fastsum_plan.mv1.MEASURE_TIME_t[2],
132  my_fastsum_plan.mv2.MEASURE_TIME_t[0],
133  my_fastsum_plan.mv2.MEASURE_TIME_t[1],
134  my_fastsum_plan.mv2.MEASURE_TIME_t[2]);
135 
136  fastsum_finalize(&my_fastsum_plan);
137 
138  return 0;
139 }
140 
141 int main(int argc, char **argv)
142 {
143  int n;
144  int m;
145  int p;
146  char *s;
147  C (*kernel)(R, int, const R *);
148  R c;
149  R eps_I;
150  R eps_B;
152 #ifdef _OPENMP
153  int nthreads;
154 
155  if (argc != 9)
156  return EXIT_FAILURE;
157 
158  nthreads = atoi(argv[8]);
159  FFTW(init_threads)();
160  omp_set_num_threads(nthreads);
161 #else
162  if (argc != 8)
163  return EXIT_FAILURE;
164 #endif
165 
166  n = atoi(argv[1]);
167  m = atoi(argv[2]);
168  p = atoi(argv[3]);
169  s = argv[4];
170  c = atof(argv[5]);
171  eps_I = (R)(atof(argv[6]));
172  eps_B = (R)(atof(argv[7]));
173  if (strcmp(s, "gaussian") == 0)
174  kernel = gaussian;
175  else if (strcmp(s, "multiquadric") == 0)
176  kernel = multiquadric;
177  else if (strcmp(s, "inverse_multiquadric") == 0)
178  kernel = inverse_multiquadric;
179  else if (strcmp(s, "logarithm") == 0)
180  kernel = logarithm;
181  else if (strcmp(s, "thinplate_spline") == 0)
182  kernel = thinplate_spline;
183  else if (strcmp(s, "one_over_square") == 0)
184  kernel = one_over_square;
185  else if (strcmp(s, "one_over_modulus") == 0)
186  kernel = one_over_modulus;
187  else if (strcmp(s, "one_over_x") == 0)
188  kernel = one_over_x;
189  else if (strcmp(s, "inverse_multiquadric3") == 0)
190  kernel = inverse_multiquadric3;
191  else if (strcmp(s, "sinc_kernel") == 0)
192  kernel = sinc_kernel;
193  else if (strcmp(s, "cosc") == 0)
194  kernel = cosc;
195  else if (strcmp(s, "cot") == 0)
196  kernel = kcot;
197  else if (strcmp(s, "one_over_cube") == 0)
198  kernel = one_over_cube;
199  else if (strcmp(s, "log_sin") == 0)
200  kernel = log_sin;
201  else
202  {
203  s = "multiquadric";
204  kernel = multiquadric;
205  }
206 
207  bench_openmp(stdin, n, m, p, kernel, c, eps_I, eps_B);
208 
209  return EXIT_SUCCESS;
210 }
211 
Header file for the fast NFFT-based summation algorithm.
void fastsum_precompute(fastsum_plan *ths)
precomputation for fastsum
Definition: fastsum.c:1173
C inverse_multiquadric(R x, int der, const R *param)
K(x)=1/sqrt(x^2+c^2)
Definition: kernels.c:90
C logarithm(R x, int der, const R *param)
K(x)=log |x|.
Definition: kernels.c:116
C multiquadric(R x, int der, const R *param)
K(x)=sqrt(x^2+c^2)
Definition: kernels.c:64
void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, kernel k, R *param, unsigned flags, int nn, int m, int p, R eps_I, R eps_B)
initialization of fastsum plan
Definition: fastsum.c:987
R * x
source knots in d-ball with radius 1/4-eps_b/2
Definition: fastsum.h:94
C one_over_cube(R x, int der, const R *param)
K(x) = 1/x^3.
Definition: kernels.c:374
C one_over_square(R x, int der, const R *param)
K(x) = 1/x^2.
Definition: kernels.c:177
C sinc_kernel(R x, int der, const R *param)
K(x) = sin(cx)/x.
Definition: kernels.c:287
C kcot(R x, int der, const R *param)
K(x) = cot(cx)
Definition: kernels.c:346
R MEASURE_TIME_t[8]
Measured time for each step if MEASURE_TIME is set.
Definition: fastsum.h:134
C one_over_modulus(R x, int der, const R *param)
K(x) = 1/|x|.
Definition: kernels.c:205
C * alpha
source coefficients
Definition: fastsum.h:91
R eps_B
outer boundary
Definition: fastsum.h:114
C thinplate_spline(R x, int der, const R *param)
K(x) = x^2 log |x|.
Definition: kernels.c:149
void fastsum_trafo(fastsum_plan *ths)
fast NFFT-based summation
Definition: fastsum.c:1180
void fastsum_finalize(fastsum_plan *ths)
finalization of fastsum plan
Definition: fastsum.c:1048
C inverse_multiquadric3(R x, int der, const R *param)
K(x) = 1/sqrt(x^2+c^2)^3.
Definition: kernels.c:261
C gaussian(R x, int der, const R *param)
K(x)=exp(-x^2/c^2)
Definition: kernels.c:38
C one_over_x(R x, int der, const R *param)
K(x) = 1/x.
Definition: kernels.c:233
C cosc(R x, int der, const R *param)
K(x) = cos(cx)/x.
Definition: kernels.c:314
R * y
target knots in d-ball with radius 1/4-eps_b/2
Definition: fastsum.h:95
C log_sin(R x, int der, const R *param)
K(x) = log(|sin(cx)|)
Definition: kernels.c:402
Internal header file for auxiliary definitions and functions.
Header file with predefined kernels for the fast summation algorithm.
Header file for the nfft3 library.
plan for fast summation algorithm
Definition: fastsum.h:83