ViennaCL - The Vienna Computing Library  1.5.2
qr.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_QR_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
27 #include <utility>
28 #include <iostream>
29 #include <fstream>
30 #include <string>
31 #include <algorithm>
32 #include <vector>
33 #include <math.h>
34 #include <cmath>
35 #include <sstream>
36 #include "viennacl/ocl/backend.hpp"
37 #include "boost/numeric/ublas/vector.hpp"
38 #include "boost/numeric/ublas/matrix.hpp"
39 #include "boost/numeric/ublas/matrix_proxy.hpp"
40 #include "boost/numeric/ublas/storage.hpp"
41 #include "boost/numeric/ublas/io.hpp"
42 #include "boost/numeric/ublas/matrix_expression.hpp"
43 #include "boost/numeric/ublas/detail/matrix_assign.hpp"
44 
45 #include "viennacl/vector.hpp"
46 #include "viennacl/matrix.hpp"
47 
51 
52 namespace viennacl
53 {
54  namespace linalg
55  {
56  namespace detail
57  {
58  namespace spai
59  {
60 
61  //********** DEBUG FUNCTIONS *****************//
62  template< typename T, typename InputIterator>
63  void Print(std::ostream& ostr, InputIterator it_begin, InputIterator it_end){
64  //std::ostream_iterator<int> it_os(ostr, delimiter);
65  std::string delimiters = " ";
66  std::copy(it_begin, it_end, std::ostream_iterator<T>(ostr, delimiters.c_str()));
67  ostr<<std::endl;
68  }
69 
70  template<typename VectorType, typename MatrixType>
71  void write_to_block(VectorType& con_A_I_J, unsigned int start_ind, const std::vector<unsigned int>& I, const std::vector<unsigned int>& J, MatrixType& m){
72  m.resize(I.size(), J.size(), false);
73  for(vcl_size_t i = 0; i < J.size(); ++i){
74  for(vcl_size_t j = 0; j < I.size(); ++j){
75  m(j,i) = con_A_I_J[start_ind + i*I.size() + j];
76  }
77  }
78  }
79 
80  template<typename VectorType>
81  void print_continious_matrix(VectorType& con_A_I_J, std::vector<cl_uint>& blocks_ind,
82  const std::vector<std::vector<unsigned int> >& g_I, const std::vector<std::vector<unsigned int> >& g_J){
83  typedef typename VectorType::value_type ScalarType;
84  std::vector<boost::numeric::ublas::matrix<ScalarType> > com_A_I_J(g_I.size());
85  for(vcl_size_t i = 0; i < g_I.size(); ++i){
86  write_to_block( con_A_I_J, blocks_ind[i], g_I[i], g_J[i], com_A_I_J[i]);
87  std::cout<<com_A_I_J[i]<<std::endl;
88  }
89  }
90  template<typename VectorType>
91  void print_continious_vector(VectorType& con_v, std::vector<cl_uint>& block_ind, const std::vector<std::vector<unsigned int> >& g_J){
92  typedef typename VectorType::value_type ScalarType;
93  std::vector<boost::numeric::ublas::vector<ScalarType> > com_v(g_J.size());
94  //Print<ScalarType>(std::cout, con_v.begin(), con_v.end());
95  for(vcl_size_t i = 0; i < g_J.size(); ++i){
96  com_v[i].resize(g_J[i].size());
97  for(vcl_size_t j = 0; j < g_J[i].size(); ++j){
98  com_v[i](j) = con_v[block_ind[i] + j];
99  }
100  std::cout<<com_v[i]<<std::endl;
101  }
102  }
103 
105 
112  inline void compute_blocks_size(const std::vector<std::vector<unsigned int> >& g_I, const std::vector<std::vector<unsigned int> >& g_J,
113  unsigned int& sz, std::vector<cl_uint>& blocks_ind, std::vector<cl_uint>& matrix_dims)
114  {
115  sz = 0;
116  for(vcl_size_t i = 0; i < g_I.size(); ++i){
117  sz += static_cast<unsigned int>(g_I[i].size()*g_J[i].size());
118  matrix_dims[2*i] = static_cast<cl_uint>(g_I[i].size());
119  matrix_dims[2*i + 1] = static_cast<cl_uint>(g_J[i].size());
120  blocks_ind[i+1] = blocks_ind[i] + static_cast<cl_uint>(g_I[i].size()*g_J[i].size());
121 
122  }
123  }
128  template <typename SizeType>
129  void get_size(const std::vector<std::vector<SizeType> >& inds, SizeType & size){
130  size = 0;
131  for (vcl_size_t i = 0; i < inds.size(); ++i) {
132  size += static_cast<unsigned int>(inds[i].size());
133  }
134  }
135 
140  template <typename SizeType>
141  void init_start_inds(const std::vector<std::vector<SizeType> >& inds, std::vector<cl_uint>& start_inds){
142  for(vcl_size_t i = 0; i < inds.size(); ++i){
143  start_inds[i+1] = start_inds[i] + static_cast<cl_uint>(inds[i].size());
144  }
145  }
146 
147  //************************************* QR FUNCTIONS ***************************************//
153  template<typename MatrixType, typename ScalarType>
154  void dot_prod(const MatrixType& A, unsigned int beg_ind, ScalarType& res){
155  res = static_cast<ScalarType>(0);
156  for(vcl_size_t i = beg_ind; i < A.size1(); ++i){
157  res += A(i, beg_ind-1)*A(i, beg_ind-1);
158  }
159  }
167  template<typename MatrixType, typename VectorType, typename ScalarType>
168  void custom_inner_prod(const MatrixType& A, const VectorType& v, unsigned int col_ind, unsigned int start_ind, ScalarType& res){
169  res = static_cast<ScalarType>(0);
170  for(unsigned int i = start_ind; i < static_cast<unsigned int>(A.size1()); ++i){
171  res += A(i, col_ind)*v(i);
172  }
173  }
174 
180  template<typename MatrixType, typename VectorType>
181  void copy_vector(const MatrixType & A, VectorType & v, const unsigned int beg_ind){
182  for(unsigned int i = beg_ind; i < static_cast<unsigned int>(A.size1()); ++i){
183  v(i) = A( i, beg_ind-1);
184  }
185  }
186 
187  //householder reflection c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.210
194  template<typename MatrixType, typename VectorType, typename ScalarType>
195  void householder_vector(const MatrixType& A, unsigned int j, VectorType& v, ScalarType& b)
196  {
197  ScalarType sg;
198  //
199  dot_prod(A, j+1, sg);
200  copy_vector(A, v, j+1);
201  ScalarType mu;
202  v(j) = static_cast<ScalarType>(1.0);
203  if(sg == 0){
204  b = 0;
205  }
206  else{
207  mu = std::sqrt(A(j,j)*A(j, j) + sg);
208  if(A(j, j) <= 0){
209  v(j) = A(j, j) - mu;
210  }else{
211  v(j) = -sg/(A(j, j) + mu);
212  }
213  b = 2*(v(j)*v(j))/(sg + v(j)*v(j));
214  v = v/v(j);
215  }
216  }
223  template<typename MatrixType, typename VectorType, typename ScalarType>
224  void apply_householder_reflection(MatrixType& A, unsigned int iter_cnt, VectorType& v, ScalarType b)
225  {
226  //update every column of matrix A
227  ScalarType in_prod_res;
228  for(unsigned int i = iter_cnt; i < static_cast<unsigned int>(A.size2()); ++i){
229  //update each column in a fashion: ai = ai - b*v*(v'*ai)
230  custom_inner_prod(A, v, i, iter_cnt, in_prod_res);
231  for(unsigned int j = iter_cnt; j < static_cast<unsigned int>(A.size1()); ++j){
232  A(j, i) -= b*in_prod_res*v(j);
233  }
234  }
235  }
236 
242  template<typename MatrixType, typename VectorType>
243  void store_householder_vector(MatrixType& A, unsigned int ind, VectorType& v)
244  {
245  for(unsigned int i = ind; i < static_cast<unsigned int>(A.size1()); ++i){
246  A(i, ind-1) = v(i);
247  }
248  }
249 
250 
251  //QR algorithm
256  template<typename MatrixType, typename VectorType>
257  void single_qr(MatrixType& R, VectorType& b_v)
258  {
259  typedef typename MatrixType::value_type ScalarType;
260  if((R.size1() > 0) && (R.size2() > 0)){
261  VectorType v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size1());
262  b_v = (VectorType)boost::numeric::ublas::zero_vector<ScalarType>(R.size2());
263  for(unsigned int i = 0; i < static_cast<unsigned int>(R.size2()); ++i){
264  householder_vector(R, i, v, b_v[i]);
265  apply_householder_reflection(R, i, v, b_v[i]);
266  if(i < R.size1()) store_householder_vector(R, i+1, v);
267  }
268  }
269  }
270 
271  //********************** HELP FUNCTIONS FOR GPU-based QR factorization *************************//
272  /* * @brief Reading from text file into string
273  * @param file_name file name
274  * @param kernel_source string that contains file
275 
276  void read_kernel_from_file(std::string& file_name, std::string& kernel_source){
277  std::ifstream ifs(file_name.c_str(), std::ifstream::in);
278 
279  if (!ifs)
280  std::cerr << "WARNING: Cannot open file " << file_name << std::endl;
281 
282  std::string line;
283  std::ostringstream ost;
284  while (std::getline(ifs, line)) {
285  ost<<line<<std::endl;
286  }
287  kernel_source = ost.str();
288  }*/
289 
294  template <typename SizeType>
295  void get_max_block_size(const std::vector<std::vector<SizeType> >& inds, SizeType & max_size)
296  {
297  max_size = 0;
298  for(vcl_size_t i = 0; i < inds.size(); ++i){
299  if(inds[i].size() > max_size){
300  max_size = static_cast<SizeType>(inds[i].size());
301  }
302  }
303  }
304 
311  template<typename MatrixType, typename VectorType, typename ScalarType>
312  void custom_dot_prod(const MatrixType& A, const VectorType& v, unsigned int ind, ScalarType& res)
313  {
314  res = static_cast<ScalarType>(0);
315  for(unsigned int j = ind; j < A.size1(); ++j){
316  if(j == ind){
317  res += v(j);
318  }else{
319  res += A(j, ind)*v(j);
320  }
321  }
322  }
323 
329  template<typename MatrixType, typename VectorType>
330  void apply_q_trans_vec(const MatrixType& R, const VectorType& b_v, VectorType& y)
331  {
332  typedef typename MatrixType::value_type ScalarType;
333  ScalarType inn_prod = static_cast<ScalarType>(0);
334  for(vcl_size_t i = 0; i < R.size2(); ++i){
335  custom_dot_prod(R, y, static_cast<unsigned int>(i), inn_prod);
336  for(vcl_size_t j = i; j < R.size1(); ++j){
337  if(i == j){
338  y(j) -= b_v(i)*inn_prod;
339  }
340  else{
341  y(j) -= b_v(i)*inn_prod*R(j,i);
342  }
343  }
344  }
345  }
346 
352  template<typename MatrixType, typename VectorType>
353  void apply_q_trans_mat(const MatrixType& R, const VectorType& b_v, MatrixType& A)
354  {
355  VectorType tmp_v;
356  for(vcl_size_t i = 0; i < A.size2(); ++i){
357  tmp_v = (VectorType)column(A,i);
358  apply_q_trans_vec(R, b_v, tmp_v);
359  column(A,i) = tmp_v;
360  }
361  }
362 
363  //parallel QR for GPU
373  template<typename ScalarType>
374  void block_qr(std::vector<std::vector<unsigned int> >& g_I,
375  std::vector<std::vector<unsigned int> >& g_J,
376  block_matrix& g_A_I_J_vcl,
377  block_vector& g_bv_vcl,
378  std::vector<cl_uint>& g_is_update,
379  viennacl::context ctx)
380  {
381  viennacl::ocl::context & opencl_ctx = const_cast<viennacl::ocl::context &>(ctx.opencl_context());
382 
383  //typedef typename MatrixType::value_type ScalarType;
384  unsigned int bv_size = 0;
385  unsigned int v_size = 0;
386  //set up arguments for GPU
387  //find maximum size of rows/columns
388  unsigned int local_r_n = 0;
389  unsigned int local_c_n = 0;
390  //find max size for blocks
391  get_max_block_size(g_I, local_r_n);
392  get_max_block_size(g_J, local_c_n);
393  //get size
394  get_size(g_J, bv_size);
395  get_size(g_I, v_size);
396  //get start indices
397  std::vector<cl_uint> start_bv_inds(g_I.size() + 1, 0);
398  std::vector<cl_uint> start_v_inds(g_I.size() + 1, 0);
399  init_start_inds(g_J, start_bv_inds);
400  init_start_inds(g_I, start_v_inds);
401  //init arrays
402  std::vector<ScalarType> b_v(bv_size, static_cast<ScalarType>(0));
403  std::vector<ScalarType> v(v_size, static_cast<ScalarType>(0));
404  //call qr program
405  block_vector v_vcl;
406 
407  g_bv_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
408  static_cast<unsigned int>(sizeof(ScalarType)*bv_size),
409  &(b_v[0]));
410 
411  v_vcl.handle() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
412  static_cast<unsigned int>(sizeof(ScalarType)*v_size),
413  &(v[0]));
414  //the same as j_start_inds
415  g_bv_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
416  static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),
417  &(start_bv_inds[0]));
418 
419  v_vcl.handle1() = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
420  static_cast<unsigned int>(sizeof(cl_uint)*g_I.size()),
421  &(start_v_inds[0]));
422  viennacl::ocl::handle<cl_mem> g_is_update_vcl = opencl_ctx.create_memory(CL_MEM_READ_WRITE,
423  static_cast<unsigned int>(sizeof(cl_uint)*g_is_update.size()),
424  &(g_is_update[0]));
425  //local memory
426  //viennacl::ocl::enqueue(k(vcl_vec, size, viennacl::ocl::local_mem(sizeof(SCALARTYPE) * k.local_work_size()), temp));
429  qr_kernel.local_work_size(0, local_c_n);
430  qr_kernel.global_work_size(0, local_c_n*256);
431  viennacl::ocl::enqueue(qr_kernel(g_A_I_J_vcl.handle(), g_A_I_J_vcl.handle1(), g_bv_vcl.handle(),
432  v_vcl.handle(), g_A_I_J_vcl.handle2(),
433  g_bv_vcl.handle1(), v_vcl.handle1(), g_is_update_vcl,
434  viennacl::ocl::local_mem(static_cast<unsigned int>(sizeof(ScalarType)*(local_r_n*local_c_n))),
435  static_cast<cl_uint>(g_I.size())));
436 
437  }
438  }
439  }
440  }
441 }
442 #endif
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
Definition: spai.hpp:570
void apply_q_trans_mat(const MatrixType &R, const VectorType &b_v, MatrixType &A)
Multiplication of Q'*A, where Q is in implicit for lower part of R and vector of betas - b_v...
Definition: qr.hpp:353
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
Definition: block_vector.hpp:60
std::size_t vcl_size_t
Definition: forwards.h:58
Represents a contigious matrices on GPU.
Definition: block_matrix.hpp:49
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
Definition: block_matrix.hpp:56
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
OpenCL kernel file for sparse approximate inverse operations.
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
Definition: block_matrix.hpp:64
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Definition: block_vector.hpp:56
void apply_householder_reflection(MatrixType &A, unsigned int iter_cnt, VectorType &v, ScalarType b)
Inplace application of Householder vector to a matrix A.
Definition: qr.hpp:224
void custom_dot_prod(const MatrixType &A, const VectorType &v, unsigned int ind, ScalarType &res)
Dot_prod(column(A, ind), v) starting from index ind+1.
Definition: qr.hpp:312
void write_to_block(VectorType &con_A_I_J, unsigned int start_ind, const std::vector< unsigned int > &I, const std::vector< unsigned int > &J, MatrixType &m)
Definition: qr.hpp:71
void get_max_block_size(const std::vector< std::vector< SizeType > > &inds, SizeType &max_size)
Getting max size of rows/columns from container of index set.
Definition: qr.hpp:295
void store_householder_vector(MatrixType &A, unsigned int ind, VectorType &v)
Storage of vector v in column(A, ind), starting from ind-1 index of a column.
Definition: qr.hpp:243
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
Implementation of a bunch of (small) matrices on GPU. Experimental.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.
Definition: context.hpp:199
Implementation of a bunch of vectors on GPU. Experimental.
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
void get_size(const std::vector< std::vector< SizeType > > &inds, SizeType &size)
Computes size of particular container of index set.
Definition: qr.hpp:129
void init_start_inds(const std::vector< std::vector< SizeType > > &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Definition: qr.hpp:141
void apply_q_trans_vec(const MatrixType &R, const VectorType &b_v, VectorType &y)
Recovery Q from matrix R and vector of betas b_v.
Definition: qr.hpp:330
void print_continious_vector(VectorType &con_v, std::vector< cl_uint > &block_ind, const std::vector< std::vector< unsigned int > > &g_J)
Definition: qr.hpp:91
void Print(std::ostream &ostr, InputIterator it_begin, InputIterator it_end)
Definition: qr.hpp:63
void compute_blocks_size(const std::vector< std::vector< unsigned int > > &g_I, const std::vector< std::vector< unsigned int > > &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
Definition: qr.hpp:112
Implementations of the OpenCL backend, where all contexts are stored in.
void dot_prod(const MatrixType &A, unsigned int beg_ind, ScalarType &res)
Dot prod of particular column of martix A with it's self starting at a certain index beg_ind...
Definition: qr.hpp:154
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
Represents a contigious vector on GPU.
Definition: block_vector.hpp:49
void single_qr(MatrixType &R, VectorType &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
Definition: qr.hpp:257
void block_qr(std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224 performed on GPU.
Definition: qr.hpp:374
void custom_inner_prod(const MatrixType &A, const VectorType &v, unsigned int col_ind, unsigned int start_ind, ScalarType &res)
Dot prod of particular matrix column with arbitrary vector: A(:, col_ind)
Definition: qr.hpp:168
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
Definition: matrix.hpp:918
static void init(viennacl::ocl::context &ctx)
Definition: spai.hpp:577
void copy_vector(const MatrixType &A, VectorType &v, const unsigned int beg_ind)
Copying part of matrix column.
Definition: qr.hpp:181
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
Definition: block_matrix.hpp:60
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
void print_continious_matrix(VectorType &con_A_I_J, std::vector< cl_uint > &blocks_ind, const std::vector< std::vector< unsigned int > > &g_I, const std::vector< std::vector< unsigned int > > &g_J)
Definition: qr.hpp:81
void householder_vector(const MatrixType &A, unsigned int j, VectorType &v, ScalarType &b)
Coputation of Householder vector, householder reflection c.f. Gene H. Golub, Charles F...
Definition: qr.hpp:195