1 #ifndef VIENNACL_FFT_HPP
2 #define VIENNACL_FFT_HPP
42 namespace FFT_DATA_ORDER {
61 return !((data_size > 2) && (data_size & (data_size - 1)));
99 template<
class SCALARTYPE>
105 SCALARTYPE
sign = -1.0f,
121 viennacl::ocl::enqueue(kernel(in, out, static_cast<cl_uint>(size), static_cast<cl_uint>(stride), static_cast<cl_uint>(batch_num),
sign));
128 template <
typename SCALARTYPE>
151 static_cast<cl_uint>(bits_datasize),
152 static_cast<cl_uint>(size),
153 static_cast<cl_uint>(stride),
154 static_cast<cl_uint>(batch_num)
166 template<
class SCALARTYPE>
171 SCALARTYPE
sign = -1.0f,
178 assert(batch_num != 0);
179 assert(is_radix2(size));
197 static_cast<cl_uint>(bits_datasize),
198 static_cast<cl_uint>(size),
199 static_cast<cl_uint>(stride),
200 static_cast<cl_uint>(batch_num),
205 reorder<SCALARTYPE>(in,
size,
stride, bits_datasize, batch_num);
207 for(
vcl_size_t step = 0; step < bits_datasize; step++)
211 static_cast<cl_uint>(step),
212 static_cast<cl_uint>(bits_datasize),
213 static_cast<cl_uint>(size),
214 static_cast<cl_uint>(stride),
215 static_cast<cl_uint>(batch_num),
229 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
238 vcl_size_t ext_size = next_power_2(2 * size - 1);
250 static_cast<cl_uint>(ext_size)
260 static_cast<cl_uint>(size),
261 static_cast<cl_uint>(ext_size)
272 static_cast<cl_uint>(size)
277 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
289 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
297 SCALARTYPE norm_factor =
static_cast<SCALARTYPE
>(
size);
301 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
313 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
328 template<
class SCALARTYPE>
339 template<
class SCALARTYPE>
350 template<
class SCALARTYPE>
371 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
374 SCALARTYPE
sign = -1.0)
378 if(!viennacl::detail::fft::is_radix2(size))
381 viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
382 viennacl::traits::opencl_handle(output),
390 viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(input), size, size, batch_num,
sign);
402 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
406 SCALARTYPE
sign = -1.0
411 if(viennacl::detail::fft::is_radix2(size))
414 viennacl::detail::fft::radix2(viennacl::traits::opencl_handle(output), size, size, batch_num,
sign);
416 viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
417 viennacl::traits::opencl_handle(output),
431 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
433 SCALARTYPE
sign = -1.0)
441 if(viennacl::detail::fft::is_radix2(cols_num))
449 viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
450 viennacl::traits::opencl_handle(output),
462 if (viennacl::detail::fft::is_radix2(rows_num)) {
467 viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
468 viennacl::traits::opencl_handle(output),
487 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
490 SCALARTYPE
sign = -1.0)
498 if(viennacl::detail::fft::is_radix2(cols_num))
505 viennacl::detail::fft::direct(viennacl::traits::opencl_handle(input),
506 viennacl::traits::opencl_handle(output),
516 if(viennacl::detail::fft::is_radix2(rows_num))
525 viennacl::detail::fft::direct(viennacl::traits::opencl_handle(tmp),
526 viennacl::traits::opencl_handle(output),
544 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
548 viennacl::inplace_fft(input, batch_num, SCALARTYPE(1.0));
562 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
568 viennacl::fft(input, output, batch_num, SCALARTYPE(1.0));
583 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
589 assert(input1.
size() == input2.
size());
590 assert(input1.
size() == output.
size());
598 viennacl::fft(input1, tmp1);
599 viennacl::fft(input2, tmp2);
602 viennacl::detail::fft::multiply(tmp1, tmp2, tmp3);
604 viennacl::ifft(tmp3, output);
616 template<
class SCALARTYPE,
unsigned int ALIGNMENT>
622 assert(input1.
size() == input2.
size());
623 assert(input1.
size() == output.
size());
625 viennacl::inplace_fft(input1);
626 viennacl::inplace_fft(input2);
628 viennacl::detail::fft::multiply(input1, input2, output);
630 viennacl::inplace_ifft(output);
std::vector< IndexT > reorder(std::vector< std::map< IndexT, ValueT > > const &matrix, cuthill_mckee_tag)
Function for the calculation of a node number permutation to reduce the bandwidth of an incidence mat...
Definition: cuthill_mckee.hpp:364
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
OpenCL kernel file for FFT operations.
std::size_t vcl_size_t
Definition: forwards.h:58
viennacl::ocl::context const & context() const
Definition: handle.hpp:191
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
Main kernel class for generating OpenCL kernels for the fast Fourier transform.
Definition: fft.hpp:243
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
A dense matrix class.
Definition: forwards.h:293
static void init(viennacl::ocl::context &ctx)
Definition: fft.hpp:250
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
static void init(viennacl::ocl::context &ctx)
Definition: matrix.hpp:884
void normalize(VectorType &x, vcl_size_t size)
Definition: qr-method-common.hpp:87
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:649
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
size_type size() const
Returns the length of the vector (cf. std::vector)
Definition: vector.hpp:837
void convolve_i(viennacl::vector< SCALARTYPE, ALIGNMENT > &input1, viennacl::vector< SCALARTYPE, ALIGNMENT > &input2, viennacl::vector< SCALARTYPE, ALIGNMENT > &output)
void transpose(MatrixType &A)
Definition: qr-method-common.hpp:107
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
const vcl_size_t MAX_LOCAL_POINTS_NUM
Definition: fft.hpp:40
DATA_ORDER
Definition: fft.hpp:43
static std::string program_name()
Definition: matrix.hpp:879
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:647
size_type size1() const
Returns the number of rows.
Definition: matrix.hpp:625
SCALARTYPE sign(SCALARTYPE val)
Definition: qr-method-common.hpp:71