ViennaCL - The Vienna Computing Library  1.5.2
hyb_matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_HYB_MATRIX_HPP_
2 #define VIENNACL_HYB_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
27 #include "viennacl/forwards.h"
28 #include "viennacl/vector.hpp"
29 
30 #include "viennacl/tools/tools.hpp"
31 
33 
34 namespace viennacl
35 {
37  template<typename SCALARTYPE, unsigned int ALIGNMENT /* see forwards.h for default argument */>
38  class hyb_matrix
39  {
40  public:
43 
44  hyb_matrix() : csr_threshold_(SCALARTYPE(0.8)), rows_(0), cols_(0) {}
45 
46  hyb_matrix(viennacl::context ctx) : csr_threshold_(SCALARTYPE(0.8)), rows_(0), cols_(0)
47  {
48  ell_coords_.switch_active_handle_id(ctx.memory_type());
49  ell_elements_.switch_active_handle_id(ctx.memory_type());
50 
51  csr_rows_.switch_active_handle_id(ctx.memory_type());
52  csr_cols_.switch_active_handle_id(ctx.memory_type());
53  csr_elements_.switch_active_handle_id(ctx.memory_type());
54 
55 #ifdef VIENNACL_WITH_OPENCL
56  if (ctx.memory_type() == OPENCL_MEMORY)
57  {
58  ell_coords_.opencl_handle().context(ctx.opencl_context());
59  ell_elements_.opencl_handle().context(ctx.opencl_context());
60 
61  csr_rows_.opencl_handle().context(ctx.opencl_context());
62  csr_cols_.opencl_handle().context(ctx.opencl_context());
63  csr_elements_.opencl_handle().context(ctx.opencl_context());
64  }
65 #endif
66  }
67 
68  SCALARTYPE csr_threshold() const { return csr_threshold_; }
69  void csr_threshold(SCALARTYPE thr) { csr_threshold_ = thr; }
70 
71  vcl_size_t internal_size1() const { return viennacl::tools::align_to_multiple<vcl_size_t>(rows_, ALIGNMENT); }
72  vcl_size_t internal_size2() const { return viennacl::tools::align_to_multiple<vcl_size_t>(cols_, ALIGNMENT); }
73 
74  vcl_size_t size1() const { return rows_; }
75  vcl_size_t size2() const { return cols_; }
76 
77  vcl_size_t internal_ellnnz() const {return viennacl::tools::align_to_multiple<vcl_size_t>(ellnnz_, ALIGNMENT); }
78  vcl_size_t ell_nnz() const { return ellnnz_; }
79  vcl_size_t csr_nnz() const { return csrnnz_; }
80 
81  const handle_type & handle() const { return ell_elements_; }
82  const handle_type & handle2() const { return ell_coords_; }
83  const handle_type & handle3() const { return csr_rows_; }
84  const handle_type & handle4() const { return csr_cols_; }
85  const handle_type & handle5() const { return csr_elements_; }
86 
87  public:
88  #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
89  template <typename CPU_MATRIX>
90  friend void copy(const CPU_MATRIX & cpu_matrix, hyb_matrix & gpu_matrix );
91  #else
92  template <typename CPU_MATRIX, typename T, unsigned int ALIGN>
93  friend void copy(const CPU_MATRIX & cpu_matrix, hyb_matrix<T, ALIGN> & gpu_matrix );
94  #endif
95 
96  private:
97  SCALARTYPE csr_threshold_;
98  vcl_size_t rows_;
99  vcl_size_t cols_;
100  vcl_size_t ellnnz_;
101  vcl_size_t csrnnz_;
102 
103  handle_type ell_coords_; // ell coords
104  handle_type ell_elements_; // ell elements
105 
106  handle_type csr_rows_;
107  handle_type csr_cols_;
108  handle_type csr_elements_;
109  };
110 
111  template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
112  void copy(const CPU_MATRIX& cpu_matrix, hyb_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix )
113  {
114  assert( (gpu_matrix.size1() == 0 || viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
115  assert( (gpu_matrix.size2() == 0 || viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
116 
117  if(cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0)
118  {
119  //determine max capacity for row
120  vcl_size_t max_entries_per_row = 0;
121  std::vector<vcl_size_t> hist_entries(cpu_matrix.size1() + 1, 0);
122 
123  for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
124  {
125  vcl_size_t num_entries = 0;
126  for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
127  {
128  ++num_entries;
129  }
130 
131  hist_entries[num_entries] += 1;
132  max_entries_per_row = std::max(max_entries_per_row, num_entries);
133  }
134 
135  vcl_size_t sum = 0;
136  for(vcl_size_t ind = 0; ind <= max_entries_per_row; ind++)
137  {
138  sum += hist_entries[ind];
139 
140  if(sum >= gpu_matrix.csr_threshold() * cpu_matrix.size1())
141  {
142  max_entries_per_row = ind;
143  break;
144  }
145  }
146 
147  //setup GPU matrix
148  gpu_matrix.ellnnz_ = max_entries_per_row;
149  gpu_matrix.rows_ = cpu_matrix.size1();
150  gpu_matrix.cols_ = cpu_matrix.size2();
151 
152  vcl_size_t nnz = gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz();
153 
154  viennacl::backend::typesafe_host_array<unsigned int> ell_coords(gpu_matrix.ell_coords_, nnz);
155  viennacl::backend::typesafe_host_array<unsigned int> csr_rows(gpu_matrix.csr_rows_, cpu_matrix.size1() + 1);
156  std::vector<unsigned int> csr_cols;
157 
158  std::vector<SCALARTYPE> ell_elements(nnz);
159  std::vector<SCALARTYPE> csr_elements;
160 
161  vcl_size_t csr_index = 0;
162 
163  for (typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
164  {
165  vcl_size_t data_index = 0;
166 
167  csr_rows.set(row_it.index1(), csr_index);
168 
169  for (typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
170  {
171  if(data_index < max_entries_per_row)
172  {
173  ell_coords.set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2());
174  ell_elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it;
175  }
176  else
177  {
178  csr_cols.push_back(static_cast<unsigned int>(col_it.index2()));
179  csr_elements.push_back(*col_it);
180 
181  csr_index++;
182  }
183 
184  data_index++;
185  }
186 
187  }
188 
189  if(csr_cols.empty())
190  {
191  csr_cols.push_back(0);
192  csr_elements.push_back(0);
193  }
194 
195  csr_rows.set(csr_rows.size() - 1, csr_index);
196 
197  gpu_matrix.csrnnz_ = csr_cols.size();
198 
199  viennacl::backend::typesafe_host_array<unsigned int> csr_cols_for_gpu(gpu_matrix.csr_cols_, csr_cols.size());
200  for (vcl_size_t i=0; i<csr_cols.size(); ++i)
201  csr_cols_for_gpu.set(i, csr_cols[i]);
202 
203  viennacl::backend::memory_create(gpu_matrix.ell_coords_, ell_coords.raw_size(), traits::context(gpu_matrix.ell_coords_), ell_coords.get());
204  viennacl::backend::memory_create(gpu_matrix.ell_elements_, sizeof(SCALARTYPE) * ell_elements.size(), traits::context(gpu_matrix.ell_elements_), &(ell_elements[0]));
205 
206  viennacl::backend::memory_create(gpu_matrix.csr_rows_, csr_rows.raw_size(), traits::context(gpu_matrix.csr_rows_), csr_rows.get());
207  viennacl::backend::memory_create(gpu_matrix.csr_cols_, csr_cols_for_gpu.raw_size(), traits::context(gpu_matrix.csr_cols_), csr_cols_for_gpu.get());
208  viennacl::backend::memory_create(gpu_matrix.csr_elements_, sizeof(SCALARTYPE) * csr_elements.size(), traits::context(gpu_matrix.csr_elements_), &(csr_elements[0]));
209  }
210  }
211 
212  template <typename CPU_MATRIX, typename SCALARTYPE, unsigned int ALIGNMENT>
213  void copy(const hyb_matrix<SCALARTYPE, ALIGNMENT>& gpu_matrix, CPU_MATRIX& cpu_matrix)
214  {
215  assert( (viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) && bool("Size mismatch") );
216  assert( (viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) && bool("Size mismatch") );
217 
218  if(gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)
219  {
220  std::vector<SCALARTYPE> ell_elements(gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz());
221  viennacl::backend::typesafe_host_array<unsigned int> ell_coords(gpu_matrix.handle2(), gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz());
222 
223  std::vector<SCALARTYPE> csr_elements(gpu_matrix.csr_nnz());
224  viennacl::backend::typesafe_host_array<unsigned int> csr_rows(gpu_matrix.handle3(), gpu_matrix.size1() + 1);
225  viennacl::backend::typesafe_host_array<unsigned int> csr_cols(gpu_matrix.handle4(), gpu_matrix.csr_nnz());
226 
227  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * ell_elements.size(), &(ell_elements[0]));
228  viennacl::backend::memory_read(gpu_matrix.handle2(), 0, ell_coords.raw_size(), ell_coords.get());
229  viennacl::backend::memory_read(gpu_matrix.handle3(), 0, csr_rows.raw_size(), csr_rows.get());
230  viennacl::backend::memory_read(gpu_matrix.handle4(), 0, csr_cols.raw_size(), csr_cols.get());
231  viennacl::backend::memory_read(gpu_matrix.handle5(), 0, sizeof(SCALARTYPE) * csr_elements.size(), &(csr_elements[0]));
232 
233 
234  for(vcl_size_t row = 0; row < gpu_matrix.size1(); row++)
235  {
236  for(vcl_size_t ind = 0; ind < gpu_matrix.internal_ellnnz(); ind++)
237  {
238  vcl_size_t offset = gpu_matrix.internal_size1() * ind + row;
239 
240  if(ell_elements[offset] == static_cast<SCALARTYPE>(0.0))
241  {
242  continue;
243  }
244 
245  if(ell_coords[offset] >= gpu_matrix.size2())
246  {
247  std::cerr << "ViennaCL encountered invalid data " << offset << " " << ind << " " << row << " " << ell_coords[offset] << " " << gpu_matrix.size2() << std::endl;
248  return;
249  }
250 
251  cpu_matrix(row, ell_coords[offset]) = ell_elements[offset];
252  }
253 
254  for(vcl_size_t ind = csr_rows[row]; ind < csr_rows[row+1]; ind++)
255  {
256  if(csr_elements[ind] == static_cast<SCALARTYPE>(0.0))
257  {
258  continue;
259  }
260 
261  if(csr_cols[ind] >= gpu_matrix.size2())
262  {
263  std::cerr << "ViennaCL encountered invalid data " << std::endl;
264  return;
265  }
266 
267  cpu_matrix(row, csr_cols[ind]) = csr_elements[ind];
268  }
269  }
270  }
271  }
272 
273 
274  //
275  // Specify available operations:
276  //
277 
280  namespace linalg
281  {
282  namespace detail
283  {
284  // x = A * y
285  template <typename T, unsigned int A>
286  struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
287  {
288  static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
289  {
290  // check for the special case x = A * x
291  if (viennacl::traits::handle(lhs) == viennacl::traits::handle(rhs.rhs()))
292  {
293  viennacl::vector<T> temp(lhs);
294  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
295  lhs = temp;
296  }
297  else
298  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
299  }
300  };
301 
302  template <typename T, unsigned int A>
303  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
304  {
305  static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
306  {
307  viennacl::vector<T> temp(lhs);
308  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
309  lhs += temp;
310  }
311  };
312 
313  template <typename T, unsigned int A>
314  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
315  {
316  static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> const & rhs)
317  {
318  viennacl::vector<T> temp(lhs);
319  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), temp);
320  lhs -= temp;
321  }
322  };
323 
324 
325  // x = A * vec_op
326  template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
327  struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
328  {
329  static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
330  {
331  viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
332  viennacl::linalg::prod_impl(rhs.lhs(), temp, lhs);
333  }
334  };
335 
336  // x = A * vec_op
337  template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
338  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
339  {
340  static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
341  {
342  viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
343  viennacl::vector<T> temp_result(lhs);
344  viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
345  lhs += temp_result;
346  }
347  };
348 
349  // x = A * vec_op
350  template <typename T, unsigned int A, typename LHS, typename RHS, typename OP>
351  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
352  {
353  static void apply(vector_base<T> & lhs, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> const & rhs)
354  {
355  viennacl::vector<T> temp(rhs.rhs(), viennacl::traits::context(rhs));
356  viennacl::vector<T> temp_result(lhs);
357  viennacl::linalg::prod_impl(rhs.lhs(), temp, temp_result);
358  lhs -= temp_result;
359  }
360  };
361 
362  } // namespace detail
363  } // namespace linalg
364 
366 }
367 
368 #endif
vcl_size_t ell_nnz() const
Definition: hyb_matrix.hpp:78
scalar< typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT< SCALARTYPE >::ResultType > value_type
Definition: hyb_matrix.hpp:42
vcl_size_t internal_size2() const
Definition: hyb_matrix.hpp:72
viennacl::backend::mem_handle handle_type
Definition: hyb_matrix.hpp:41
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:95
std::size_t vcl_size_t
Definition: forwards.h:58
const handle_type & handle3() const
Definition: hyb_matrix.hpp:83
vcl_size_t size() const
Definition: util.hpp:163
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:172
Various little tools used here and there in ViennaCL.
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
This file provides the forward declarations for the main types used within ViennaCL.
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
vcl_size_t raw_size() const
Definition: util.hpp:158
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:321
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
hyb_matrix()
Definition: hyb_matrix.hpp:44
void set(vcl_size_t index, U value)
Definition: util.hpp:145
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
vcl_size_t internal_size1() const
Definition: hyb_matrix.hpp:71
const handle_type & handle2() const
Definition: hyb_matrix.hpp:82
friend void copy(const CPU_MATRIX &cpu_matrix, hyb_matrix< T, ALIGN > &gpu_matrix)
Definition: forwards.h:480
vcl_size_t internal_ellnnz() const
Definition: hyb_matrix.hpp:77
const handle_type & handle4() const
Definition: hyb_matrix.hpp:84
hyb_matrix(viennacl::context ctx)
Definition: hyb_matrix.hpp:46
const handle_type & handle5() const
Definition: hyb_matrix.hpp:85
Implementations of operations using sparse matrices.
void csr_threshold(SCALARTYPE thr)
Definition: hyb_matrix.hpp:69
vcl_size_t size2() const
Definition: hyb_matrix.hpp:75
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
void * get()
Definition: util.hpp:150
viennacl::memory_types memory_type() const
Definition: context.hpp:76
const handle_type & handle() const
Definition: hyb_matrix.hpp:81
vcl_size_t csr_nnz() const
Definition: hyb_matrix.hpp:79
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
vcl_size_t size1() const
Definition: hyb_matrix.hpp:74
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:62
void prod_impl(const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:350
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:94
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
SCALARTYPE csr_threshold() const
Definition: hyb_matrix.hpp:68