ViennaCL - The Vienna Computing Library  1.5.2
fft.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_FFT_HPP
2 #define VIENNACL_FFT_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include <viennacl/vector.hpp>
26 #include <viennacl/matrix.hpp>
27 
29 
30 #include <cmath>
31 
32 #include <stdexcept>
33 
34 namespace viennacl
35 {
36  namespace detail
37  {
38  namespace fft
39  {
41 
42  namespace FFT_DATA_ORDER {
43  enum DATA_ORDER {
46  };
47  }
48  }
49  }
50 }
51 
53 namespace viennacl
54 {
55  namespace detail
56  {
57  namespace fft
58  {
59 
60  inline bool is_radix2(vcl_size_t data_size) {
61  return !((data_size > 2) && (data_size & (data_size - 1)));
62 
63  }
64 
65  inline vcl_size_t next_power_2(vcl_size_t n) {
66  n = n - 1;
67 
68  vcl_size_t power = 1;
69 
70  while(power < sizeof(vcl_size_t) * 8) {
71  n = n | (n >> power);
72  power *= 2;
73  }
74 
75  return n + 1;
76  }
77 
78  inline vcl_size_t num_bits(vcl_size_t size)
79  {
80  vcl_size_t bits_datasize = 0;
81  vcl_size_t ds = 1;
82 
83  while(ds < size)
84  {
85  ds = ds << 1;
86  bits_datasize++;
87  }
88 
89  return bits_datasize;
90  }
91 
92 
99  template<class SCALARTYPE>
100  void direct(const viennacl::ocl::handle<cl_mem>& in,
102  vcl_size_t size,
104  vcl_size_t batch_num,
105  SCALARTYPE sign = -1.0f,
107  )
108  {
109  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context());
111 
113  if (data_order == FFT_DATA_ORDER::COL_MAJOR)
114  {
117  }
118  else
120  viennacl::ocl::kernel& kernel = ctx.get_kernel(program_string, "fft_direct");
121  viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size), static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num), sign));
122  }
123 
124  /*
125  * This function performs reorder of input data. Indexes are sorted in bit-reversal order.
126  * Such reordering should be done before in-place FFT.
127  */
128  template <typename SCALARTYPE>
129  void reorder(const viennacl::ocl::handle<cl_mem>& in,
130  vcl_size_t size,
131  vcl_size_t stride,
132  vcl_size_t bits_datasize,
133  vcl_size_t batch_num,
135  )
136  {
137  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context());
139 
141  if (data_order == FFT_DATA_ORDER::COL_MAJOR)
142  {
145  }
146  else
148 
149  viennacl::ocl::kernel& kernel = ctx.get_kernel(program_string, "fft_reorder");
150  viennacl::ocl::enqueue(kernel(in,
151  static_cast<cl_uint>(bits_datasize),
152  static_cast<cl_uint>(size),
153  static_cast<cl_uint>(stride),
154  static_cast<cl_uint>(batch_num)
155  )
156  );
157  }
158 
166  template<class SCALARTYPE>
167  void radix2(const viennacl::ocl::handle<cl_mem>& in,
168  vcl_size_t size,
169  vcl_size_t stride,
170  vcl_size_t batch_num,
171  SCALARTYPE sign = -1.0f,
173  )
174  {
175  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(in.context());
177 
178  assert(batch_num != 0);
179  assert(is_radix2(size));
180 
182  if (data_order == FFT_DATA_ORDER::COL_MAJOR)
183  {
186  }
187  else
189 
190  vcl_size_t bits_datasize = num_bits(size);
191 
192  if(size <= MAX_LOCAL_POINTS_NUM)
193  {
194  viennacl::ocl::kernel& kernel = ctx.get_kernel(program_string, "fft_radix2_local");
195  viennacl::ocl::enqueue(kernel(in,
196  viennacl::ocl::local_mem((size * 4) * sizeof(SCALARTYPE)),
197  static_cast<cl_uint>(bits_datasize),
198  static_cast<cl_uint>(size),
199  static_cast<cl_uint>(stride),
200  static_cast<cl_uint>(batch_num),
201  sign));
202  }
203  else
204  {
205  reorder<SCALARTYPE>(in, size, stride, bits_datasize, batch_num);
206 
207  for(vcl_size_t step = 0; step < bits_datasize; step++)
208  {
209  viennacl::ocl::kernel& kernel = ctx.get_kernel(program_string, "fft_radix2");
210  viennacl::ocl::enqueue(kernel(in,
211  static_cast<cl_uint>(step),
212  static_cast<cl_uint>(bits_datasize),
213  static_cast<cl_uint>(size),
214  static_cast<cl_uint>(stride),
215  static_cast<cl_uint>(batch_num),
216  sign));
217  }
218 
219  }
220  }
221 
229  template<class SCALARTYPE, unsigned int ALIGNMENT>
230  void bluestein(viennacl::vector<SCALARTYPE, ALIGNMENT>& in,
232  vcl_size_t /*batch_num*/)
233  {
234  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
236 
237  vcl_size_t size = in.size() >> 1;
238  vcl_size_t ext_size = next_power_2(2 * size - 1);
239 
242 
244 
245  {
247  viennacl::ocl::enqueue(kernel(
248  A,
249  B,
250  static_cast<cl_uint>(ext_size)
251  ));
252 
253  }
254  {
256  viennacl::ocl::enqueue(kernel(
257  in,
258  A,
259  B,
260  static_cast<cl_uint>(size),
261  static_cast<cl_uint>(ext_size)
262  ));
263  }
264 
266 
267  {
269  viennacl::ocl::enqueue(kernel(
270  Z,
271  out,
272  static_cast<cl_uint>(size)
273  ));
274  }
275  }
276 
277  template<class SCALARTYPE, unsigned int ALIGNMENT>
278  void multiply(viennacl::vector<SCALARTYPE, ALIGNMENT> const & input1,
281  {
282  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input1).context());
284  vcl_size_t size = input1.size() >> 1;
286  viennacl::ocl::enqueue(kernel(input1, input2, output, static_cast<cl_uint>(size)));
287  }
288 
289  template<class SCALARTYPE, unsigned int ALIGNMENT>
291  {
292  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
294 
296  vcl_size_t size = input.size() >> 1;
297  SCALARTYPE norm_factor = static_cast<SCALARTYPE>(size);
298  viennacl::ocl::enqueue(kernel(input, static_cast<cl_uint>(size), norm_factor));
299  }
300 
301  template<class SCALARTYPE, unsigned int ALIGNMENT>
303  {
304  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
306 
308  viennacl::ocl::enqueue(kernel(input,
309  static_cast<cl_uint>(input.internal_size1()),
310  static_cast<cl_uint>(input.internal_size2()) >> 1));
311  }
312 
313  template<class SCALARTYPE, unsigned int ALIGNMENT>
316  {
317  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(input).context());
319 
321  viennacl::ocl::enqueue(kernel(input,
322  output,
323  static_cast<cl_uint>(input.internal_size1()),
324  static_cast<cl_uint>(input.internal_size2() >> 1))
325  );
326  }
327 
328  template<class SCALARTYPE>
329  void real_to_complex(viennacl::vector_base<SCALARTYPE> const & in,
331  vcl_size_t size)
332  {
333  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
336  viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size)));
337  }
338 
339  template<class SCALARTYPE>
340  void complex_to_real(viennacl::vector_base<SCALARTYPE> const & in,
342  vcl_size_t size)
343  {
344  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
347  viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size)));
348  }
349 
350  template<class SCALARTYPE>
351  void reverse(viennacl::vector_base<SCALARTYPE>& in)
352  {
353  viennacl::ocl::context & ctx = const_cast<viennacl::ocl::context &>(viennacl::traits::opencl_handle(in).context());
355  vcl_size_t size = in.size();
357  viennacl::ocl::enqueue(kernel(in, static_cast<cl_uint>(size)));
358  }
359 
360 
361  } //namespace fft
362  } //namespace detail
363 
371  template<class SCALARTYPE, unsigned int ALIGNMENT>
372  void inplace_fft(viennacl::vector<SCALARTYPE, ALIGNMENT>& input,
373  vcl_size_t batch_num = 1,
374  SCALARTYPE sign = -1.0)
375  {
376  vcl_size_t size = (input.size() >> 1) / batch_num;
377 
378  if(!viennacl::detail::fft::is_radix2(size))
379  {
381  viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
382  viennacl::traits::opencl_handle(output),
383  size,
384  size,
385  batch_num,
386  sign);
387 
388  viennacl::copy(output, input);
389  } else {
390  viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(input), size, size, batch_num, sign);
391  }
392  }
393 
402  template<class SCALARTYPE, unsigned int ALIGNMENT>
405  vcl_size_t batch_num = 1,
406  SCALARTYPE sign = -1.0
407  )
408  {
409  vcl_size_t size = (input.size() >> 1) / batch_num;
410 
411  if(viennacl::detail::fft::is_radix2(size))
412  {
413  viennacl::copy(input, output);
414  viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(output), size, size, batch_num, sign);
415  } else {
416  viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
417  viennacl::traits::opencl_handle(output),
418  size,
419  size,
420  batch_num,
421  sign);
422  }
423  }
424 
431  template<class SCALARTYPE, unsigned int ALIGNMENT>
433  SCALARTYPE sign = -1.0)
434  {
435  vcl_size_t rows_num = input.size1();
436  vcl_size_t cols_num = input.size2() >> 1;
437 
438  vcl_size_t cols_int = input.internal_size2() >> 1;
439 
440  // batch with rows
441  if(viennacl::detail::fft::is_radix2(cols_num))
442  {
443  viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(input), cols_num, cols_int, rows_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::ROW_MAJOR);
444  }
445  else
446  {
448 
449  viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
450  viennacl::traits::opencl_handle(output),
451  cols_num,
452  cols_int,
453  rows_num,
454  sign,
456  );
457 
458  input = output;
459  }
460 
461  // batch with cols
462  if (viennacl::detail::fft::is_radix2(rows_num)) {
463  viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(input), rows_num, cols_int, cols_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::COL_MAJOR);
464  } else {
466 
467  viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
468  viennacl::traits::opencl_handle(output),
469  rows_num,
470  cols_int,
471  cols_num,
472  sign,
474 
475  input = output;
476  }
477 
478  }
479 
487  template<class SCALARTYPE, unsigned int ALIGNMENT>
490  SCALARTYPE sign = -1.0)
491  {
492  vcl_size_t rows_num = input.size1();
493  vcl_size_t cols_num = input.size2() >> 1;
494 
495  vcl_size_t cols_int = input.internal_size2() >> 1;
496 
497  // batch with rows
498  if(viennacl::detail::fft::is_radix2(cols_num))
499  {
500  output = input;
501  viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(output), cols_num, cols_int, rows_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::ROW_MAJOR);
502  }
503  else
504  {
505  viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
506  viennacl::traits::opencl_handle(output),
507  cols_num,
508  cols_int,
509  rows_num,
510  sign,
512  );
513  }
514 
515  // batch with cols
516  if(viennacl::detail::fft::is_radix2(rows_num))
517  {
518  viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(output), rows_num, cols_int, cols_num, sign, viennacl::detail::fft::FFT_DATA_ORDER::COL_MAJOR);
519  }
520  else
521  {
523  tmp = output;
524 
525  viennacl::detail::fft::direct(viennacl::traits::opencl_handle(tmp),
526  viennacl::traits::opencl_handle(output),
527  rows_num,
528  cols_int,
529  cols_num,
530  sign,
532  }
533  }
534 
544  template<class SCALARTYPE, unsigned int ALIGNMENT>
545  void inplace_ifft(viennacl::vector<SCALARTYPE, ALIGNMENT>& input,
546  vcl_size_t batch_num = 1)
547  {
548  viennacl::inplace_fft(input, batch_num, SCALARTYPE(1.0));
550  }
551 
562  template<class SCALARTYPE, unsigned int ALIGNMENT>
565  vcl_size_t batch_num = 1
566  )
567  {
568  viennacl::fft(input, output, batch_num, SCALARTYPE(1.0));
570  }
571 
572  namespace linalg
573  {
583  template<class SCALARTYPE, unsigned int ALIGNMENT>
584  void convolve(viennacl::vector<SCALARTYPE, ALIGNMENT>& input1,
587  )
588  {
589  assert(input1.size() == input2.size());
590  assert(input1.size() == output.size());
591  //temporal arrays
595 
596  // align input arrays to equal size
597  // FFT of input data
598  viennacl::fft(input1, tmp1);
599  viennacl::fft(input2, tmp2);
600 
601  // multiplication of input data
602  viennacl::detail::fft::multiply(tmp1, tmp2, tmp3);
603  // inverse FFT of input data
604  viennacl::ifft(tmp3, output);
605  }
606 
616  template<class SCALARTYPE, unsigned int ALIGNMENT>
620  )
621  {
622  assert(input1.size() == input2.size());
623  assert(input1.size() == output.size());
624 
625  viennacl::inplace_fft(input1);
626  viennacl::inplace_fft(input2);
627 
628  viennacl::detail::fft::multiply(input1, input2, output);
629 
630  viennacl::inplace_ifft(output);
631  }
632  }
633 } //namespace linalg
634 
636 #endif
std::vector< IndexT > reorder(std::vector< std::map< IndexT, ValueT > > const &matrix, cuthill_mckee_tag)
Function for the calculation of a node number permutation to reduce the bandwidth of an incidence mat...
Definition: cuthill_mckee.hpp:364
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
OpenCL kernel file for FFT operations.
std::size_t vcl_size_t
Definition: forwards.h:58
viennacl::ocl::context const & context() const
Definition: handle.hpp:191
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
Main kernel class for generating OpenCL kernels for the fast Fourier transform.
Definition: fft.hpp:243
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
A dense matrix class.
Definition: forwards.h:293
static void init(viennacl::ocl::context &ctx)
Definition: fft.hpp:250
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
static void init(viennacl::ocl::context &ctx)
Definition: matrix.hpp:884
void normalize(VectorType &x, vcl_size_t size)
Definition: qr-method-common.hpp:87
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:649
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector.hpp:837
void convolve_i(viennacl::vector< SCALARTYPE, ALIGNMENT > &input1, viennacl::vector< SCALARTYPE, ALIGNMENT > &input2, viennacl::vector< SCALARTYPE, ALIGNMENT > &output)
void transpose(MatrixType &A)
Definition: qr-method-common.hpp:107
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
const vcl_size_t MAX_LOCAL_POINTS_NUM
Definition: fft.hpp:40
static std::string program_name()
Definition: matrix.hpp:879
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:647
size_type size1() const
Returns the number of rows.
Definition: matrix.hpp:625
SCALARTYPE sign(SCALARTYPE val)
Definition: qr-method-common.hpp:71