1 #ifndef VIENNACL_LINALG_QR_METHOD_HPP_
2 #define VIENNACL_LINALG_QR_METHOD_HPP_
27 #include <boost/numeric/ublas/vector.hpp>
28 #include <boost/numeric/ublas/matrix.hpp>
40 template<
typename MatrixType,
typename VectorType>
50 typedef typename MatrixType::value_type ScalarType;
62 static_cast<cl_uint>(matrix.size1()),
63 static_cast<cl_uint>(matrix.internal_size2()),
64 static_cast<cl_uint>(l),
65 static_cast<cl_uint
>(m - 1)
73 template <
typename SCALARTYPE,
unsigned int ALIGNMENT>
75 boost::numeric::ublas::vector<SCALARTYPE> & d,
76 boost::numeric::ublas::vector<SCALARTYPE> & e)
78 int n =
static_cast<int>(Q.
size1());
80 boost::numeric::ublas::vector<SCALARTYPE> cs(n), ss(n);
83 for (
int i = 1; i < n; i++)
90 SCALARTYPE eps = 2 *
static_cast<SCALARTYPE
>(EPS);
92 for (
int l = 0; l < n; l++)
95 tst1 = std::max<SCALARTYPE>(tst1, std::fabs(d(l)) + std::fabs(e(l)));
99 if (std::fabs(e(m)) <= eps * tst1)
114 SCALARTYPE p = (d(l + 1) - g) / (2 * e(l));
115 SCALARTYPE r = pythag<SCALARTYPE>(p, 1);
121 d(l) = e(l) / (p + r);
122 d(l + 1) = e(l) * (p + r);
123 SCALARTYPE dl1 = d(l + 1);
124 SCALARTYPE h = g - d(l);
125 for (
int i = l + 2; i < n; i++)
137 SCALARTYPE el1 = e(l + 1);
140 for (
int i = m - 1; i >= l; i--)
151 p = c * d(i) - s * g;
152 d(i + 1) = h + s * (c * g + s * d(i));
158 p = -s * s2 * c3 * el1 * e(l) / dl1;
171 while (std::fabs(e(l)) > eps * tst1);
178 template <
typename SCALARTYPE,
typename MatrixT>
192 static_cast<cl_uint>(A.internal_size1()),
193 static_cast<cl_uint>(n),
194 static_cast<cl_uint
>(last_n),
200 template <
typename SCALARTYPE,
typename MatrixT>
202 const std::vector<SCALARTYPE>& buf,
218 static_cast<cl_uint>(A.internal_size1()),
220 static_cast<cl_uint>(m),
221 static_cast<cl_uint
>(n),
222 static_cast<cl_uint>(last_n)
226 template <
typename SCALARTYPE,
typename MatrixT>
234 for (
int i = 0; i < last_n; i++)
236 SCALARTYPE v_in = A(i, n);
237 SCALARTYPE z = A(i, n - 1);
238 A(i, n - 1) = q * z + p * v_in;
239 A(i, n) = q * v_in - p * z;
243 template <
typename SCALARTYPE,
typename MatrixT>
245 const std::vector<SCALARTYPE>& buf,
252 for (
int i = 0; i < last_i; i++)
254 int start_k = is_triangular?std::max(i + 1, m):m;
256 SCALARTYPE* a_row = A.row(i);
258 SCALARTYPE a_ik = a_row[start_k];
259 SCALARTYPE a_ik_1 = 0;
260 SCALARTYPE a_ik_2 = 0;
263 a_ik_1 = a_row[start_k + 1];
265 for(
int k = start_k; k < n; k++)
267 bool notlast = (k != n - 1);
269 SCALARTYPE p = buf[5 * k] * a_ik + buf[5 * k + 1] * a_ik_1;
273 a_ik_2 = a_row[k + 2];
274 p = p + buf[5 * k + 2] * a_ik_2;
275 a_ik_2 = a_ik_2 - p * buf[5 * k + 4];
279 a_ik_1 = a_ik_1 - p * buf[5 * k + 3];
291 template <
typename SCALARTYPE>
302 data.resize(internal_size * internal_size);
307 return data[i * internal_size_ + j];
312 return &
data[i * internal_size_];
334 template <
typename SCALARTYPE,
unsigned int ALIGNMENT>
337 boost::numeric::ublas::vector<SCALARTYPE>& d,
338 boost::numeric::ublas::vector<SCALARTYPE>& e)
342 int nn =
static_cast<int>(vcl_H.
size1());
346 std::vector<SCALARTYPE> buf(5 * nn);
354 SCALARTYPE eps = 2 *
static_cast<SCALARTYPE
>(EPS);
355 SCALARTYPE exshift = 0;
366 SCALARTYPE out1, out2;
370 for (
int i = 0; i < nn; i++)
372 for (
int j = std::max(i - 1, 0); j < nn; j++)
373 norm = norm + std::fabs(H(i, j));
384 s = std::fabs(H(l - 1, l - 1)) + std::fabs(H(l, l));
385 if (s == 0) s = norm;
386 if (std::fabs(H(l, l - 1)) < eps * s)
396 H(n, n) = H(n, n) + exshift;
405 w = H(n, n - 1) * H(n - 1, n);
406 p = (H(n - 1, n - 1) - H(n, n)) / 2;
408 z =
static_cast<SCALARTYPE
>(std::sqrt(std::fabs(q)));
409 H(n, n) = H(n, n) + exshift;
410 H(n - 1, n - 1) = H(n - 1, n - 1) + exshift;
416 z = (p >= 0) ? (p + z) : (p - z);
424 s = std::fabs(x) + std::fabs(z);
427 r =
static_cast<SCALARTYPE
>(std::sqrt(p * p + q * q));
432 for (
int j = n - 1; j < nn; j++)
434 SCALARTYPE h_nj = H(n, j);
436 H(n - 1, j) = q * z + p * h_nj;
437 H(n, j) = q * h_nj - p * z;
466 w = H(n, n - 1) * H(n - 1, n);
473 for (
int i = 0; i <= n; i++)
476 s = std::fabs(H(n, n - 1)) + std::fabs(H(n - 1, n - 2));
477 x = y = SCALARTYPE(0.75) * s;
478 w = SCALARTYPE(-0.4375) * s * s;
488 s =
static_cast<SCALARTYPE
>(std::sqrt(s));
490 s = x - w / ((y - x) / 2 + s);
491 for (
int i = 0; i <= n; i++)
494 x = y = w = SCALARTYPE(0.964);
504 SCALARTYPE h_m1_m1 = H(m + 1, m + 1);
508 p = (r * s - w) / H(m + 1, m) + H(m, m + 1);
509 q = h_m1_m1 - z - r - s;
511 s = std::fabs(p) + std::fabs(q) + std::fabs(r);
517 if (std::fabs(H(m, m - 1)) * (std::fabs(q) + std::fabs(r)) < eps * (std::fabs(p) * (std::fabs(H(m - 1, m - 1)) + std::fabs(z) + std::fabs(h_m1_m1))))
522 for (
int i = m + 2; i <= n; i++)
530 for (
int k = m; k < n; k++)
532 bool notlast = (k != n - 1);
537 r = (notlast ? H(k + 2, k - 1) : 0);
538 x = std::fabs(p) + std::fabs(q) + std::fabs(r);
549 s =
static_cast<SCALARTYPE
>(std::sqrt(p * p + q * q + r * r));
555 H(k, k - 1) = -s * x;
558 H(k, k - 1) = -H(k, k - 1);
574 SCALARTYPE* a_row_k = H.
row(k);
575 SCALARTYPE* a_row_k_1 = H.row(k + 1);
576 SCALARTYPE* a_row_k_2 = H.row(k + 2);
578 for (
int j = k; j < nn; j++)
580 SCALARTYPE h_kj = a_row_k[j];
581 SCALARTYPE h_k1_j = a_row_k_1[j];
583 p = h_kj + q * h_k1_j;
586 SCALARTYPE h_k2_j = a_row_k_2[j];
588 a_row_k_2[j] = h_k2_j - p * z;
591 a_row_k[j] = h_kj - p * x;
592 a_row_k_1[j] = h_k1_j - p * y;
599 for (
int i = k; i < std::min(nn, k + 4); i++)
601 p = x * H(i, k) + y * H(i, k + 1);
604 p = p + z * H(i, k + 2);
605 H(i, k + 2) = H(i, k + 2) - p * r;
608 H(i, k) = H(i, k) - p;
609 H(i, k + 1) = H(i, k + 1) - p * q;
638 for (n = nn - 1; n >= 0; n--)
648 for (
int i = n - 1; i >= 0; i--)
652 for (
int j = l; j <= n; j++)
653 r = r + H(i, j) * H(j, n);
665 H(i, n) = (w != 0) ? (-r / w) : (-r / (eps * norm));
672 q = (d(i) - p) * (d(i) - p) + e(i) * e(i);
673 t = (x * s - z * r) / q;
675 H(i + 1, n) = (std::fabs(x) > std::fabs(z)) ? ((-r - w * t) / x) : ((-s - y * t) / z);
679 t = std::fabs(H(i, n));
680 if ((eps * t) * t > 1)
681 for (
int j = i; j <= n; j++)
692 if (std::fabs(H(n, n - 1)) > std::fabs(H(n - 1, n)))
694 H(n - 1, n - 1) = q / H(n, n - 1);
695 H(n - 1, n) = -(H(n, n) - p) / H(n, n - 1);
699 cdiv<SCALARTYPE>(0, -H(n - 1, n), H(n - 1, n - 1) - p, q, out1, out2);
701 H(n - 1, n - 1) = out1;
707 for (
int i = n - 2; i >= 0; i--)
709 SCALARTYPE ra, sa, vr, vi;
712 for (
int j = l; j <= n; j++)
714 SCALARTYPE h_ij = H(i, j);
715 ra = ra + h_ij * H(j, n - 1);
716 sa = sa + h_ij * H(j, n);
732 cdiv<SCALARTYPE>(-ra, -sa, w, q, out1, out2);
741 vr = (d(i) - p) * (d(i) - p) + e(i) * e(i) - q * q;
742 vi = (d(i) - p) * 2 * q;
743 if ( (vr == 0) && (vi == 0) )
744 vr = eps * norm * (std::fabs(w) + std::fabs(q) + std::fabs(x) + std::fabs(y) + std::fabs(z));
746 cdiv<SCALARTYPE>(x * r - z * ra + q * sa, x * s - z * sa - q * ra, vr, vi, out1, out2);
752 if (std::fabs(x) > (std::fabs(z) + std::fabs(q)))
754 H(i + 1, n - 1) = (-ra - w * H(i, n - 1) + q * H(i, n)) / x;
755 H(i + 1, n) = (-sa - w * H(i, n) - q * H(i, n - 1)) / x;
759 cdiv<SCALARTYPE>(-r - y * H(i, n - 1), -s - y * H(i, n), z, q, out1, out2);
761 H(i + 1, n - 1) = out1;
767 t = std::max(std::fabs(H(i, n - 1)), std::fabs(H(i, n)));
768 if ((eps * t) * t > 1)
770 for (
int j = i; j <= n; j++)
789 template <
typename SCALARTYPE,
unsigned int ALIGNMENT>
798 if(start + 2 >= A.
size1())
809 static_cast<cl_uint>(start + 1),
810 static_cast<cl_uint>(start),
811 static_cast<cl_uint>(A.
size1()),
812 static_cast<cl_uint>(A.
size2()),
824 static_cast<cl_uint>(0),
825 static_cast<cl_uint>(0),
826 static_cast<cl_uint>(A.
size1()),
827 static_cast<cl_uint>(A.
size2()),
839 static_cast<cl_uint>(A.
size1()),
840 static_cast<cl_uint>(A.
size2()),
849 template <
typename SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
864 template <
typename SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
867 boost::numeric::ublas::vector<SCALARTYPE> & D,
868 boost::numeric::ublas::vector<SCALARTYPE> & E,
869 bool is_symmetric =
true)
873 assert(A.
size1() == A.
size2() && bool(
"Input matrix must be square for QR method!"));
904 boost::numeric::ublas::matrix<float> eigen_values(A.
size1(), A.
size1());
905 eigen_values.clear();
909 if(std::fabs(E(i)) < EPS)
911 eigen_values(i, i) = D(i);
915 eigen_values(i, i) = D(i);
916 eigen_values(i, i + 1) = E(i);
917 eigen_values(i + 1, i) = -E(i);
918 eigen_values(i + 1, i + 1) = D(i);
923 copy(eigen_values, A);
928 template <
typename SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
931 boost::numeric::ublas::vector<SCALARTYPE>& D,
932 boost::numeric::ublas::vector<SCALARTYPE>& E
938 template <
typename SCALARTYPE,
typename F,
unsigned int ALIGNMENT>
941 boost::numeric::ublas::vector<SCALARTYPE>& D
944 boost::numeric::ublas::vector<SCALARTYPE> E(A.
size1());
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
std::size_t vcl_size_t
Definition: forwards.h:58
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Definition: sparse_matrix_operations.hpp:330
void final_iter_update(MatrixT &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
Definition: qr-method.hpp:227
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: forwards.h:299
FastMatrix()
Definition: qr-method.hpp:295
const std::string SVD_HOUSEHOLDER_UPDATE_A_RIGHT_KERNEL
Definition: qr-method-common.hpp:44
SCALARTYPE * end()
Definition: qr-method.hpp:320
Internal helper class representing a row-major dense matrix used for the QR method for the purpose of...
Definition: qr-method.hpp:292
void tridiagonal_reduction(viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &Q)
Definition: qr-method.hpp:850
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
void tql2(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &Q, boost::numeric::ublas::vector< SCALARTYPE > &d, boost::numeric::ublas::vector< SCALARTYPE > &e)
Definition: qr-method.hpp:74
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
void qr_method(viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &Q, boost::numeric::ublas::vector< SCALARTYPE > &D, boost::numeric::ublas::vector< SCALARTYPE > &E, bool is_symmetric=true)
Definition: qr-method.hpp:865
A dense matrix class.
Definition: forwards.h:293
SCALARTYPE * begin()
Definition: qr-method.hpp:315
void prepare_householder_vector(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &A, viennacl::vector< SCALARTYPE, ALIGNMENT > &D, vcl_size_t size, vcl_size_t row_start, vcl_size_t col_start, vcl_size_t start, bool is_column)
Definition: qr-method-common.hpp:173
bool householder_twoside(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &Q, viennacl::vector< SCALARTYPE, ALIGNMENT > &D, vcl_size_t start)
Definition: qr-method.hpp:790
void hqr2(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &vcl_H, viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &V, boost::numeric::ublas::vector< SCALARTYPE > &d, boost::numeric::ublas::vector< SCALARTYPE > &e)
Definition: qr-method.hpp:335
vcl_size_t internal_size(vector_base< NumericT > const &vec)
Helper routine for obtaining the buffer length of a ViennaCL vector.
Definition: size.hpp:268
FastMatrix(vcl_size_t sz, vcl_size_t internal_size)
Definition: qr-method.hpp:300
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:649
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
VectorT prod(std::vector< std::vector< T, A1 >, A2 > const &matrix, VectorT const &vector)
Definition: prod.hpp:91
static void init(viennacl::ocl::context &ctx)
Definition: svd.hpp:510
SCALARTYPE & operator()(int i, int j)
Definition: qr-method.hpp:305
Common routines used for the QR method and SVD. Experimental.
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
Main kernel class for generating OpenCL kernels for singular value decomposition of dense matrices...
Definition: svd.hpp:503
const std::string SVD_HOUSEHOLDER_UPDATE_QL_KERNEL
Definition: qr-method-common.hpp:45
const std::string SVD_FINAL_ITER_UPDATE_KERNEL
Definition: qr-method-common.hpp:53
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
SCALARTYPE * row(int i)
Definition: qr-method.hpp:310
void qr_method_nsm(viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &Q, boost::numeric::ublas::vector< SCALARTYPE > &D, boost::numeric::ublas::vector< SCALARTYPE > &E)
Definition: qr-method.hpp:929
void final_iter_update_gpu(MatrixT &A, int n, int last_n, SCALARTYPE q, SCALARTYPE p)
Definition: qr-method.hpp:179
std::vector< SCALARTYPE > data
Definition: qr-method.hpp:325
const std::string SVD_GIVENS_NEXT_KERNEL
Definition: qr-method-common.hpp:52
void bidiag_pack(viennacl::matrix< SCALARTYPE, row_major, ALIGNMENT > &A, VectorType &dh, VectorType &sh)
Definition: qr-method-common.hpp:197
SCALARTYPE pythag(SCALARTYPE a, SCALARTYPE b)
Definition: qr-method-common.hpp:65
const std::string SVD_HOUSEHOLDER_UPDATE_A_LEFT_KERNEL
Definition: qr-method-common.hpp:43
void transpose(MatrixType &A)
Definition: qr-method-common.hpp:107
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
void update_float_QR_column(MatrixT &A, const std::vector< SCALARTYPE > &buf, int m, int n, int last_i, bool is_triangular)
Definition: qr-method.hpp:244
const std::string SVD_UPDATE_QR_COLUMN_KERNEL
Definition: qr-method-common.hpp:54
T::ERROR_CANNOT_DEDUCE_CPU_SCALAR_TYPE_FOR_T type
Definition: result_of.hpp:276
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void givens_next(MatrixType &matrix, VectorType &tmp1, VectorType &tmp2, int l, int m)
Definition: qr-method.hpp:41
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
void qr_method_sym(viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &A, viennacl::matrix< SCALARTYPE, F, ALIGNMENT > &Q, boost::numeric::ublas::vector< SCALARTYPE > &D)
Definition: qr-method.hpp:939
void update_float_QR_column_gpu(MatrixT &A, const std::vector< SCALARTYPE > &buf, viennacl::vector< SCALARTYPE > &buf_vcl, int m, int n, int last_n, bool)
Definition: qr-method.hpp:201
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
STL-like transfer of a GPU vector to the CPU. The cpu type is assumed to reside in a linear piece of ...
Definition: vector.hpp:1262
size_type size1() const
Returns the number of rows.
Definition: matrix.hpp:625