1 #ifndef VIENNACL_HYB_MATRIX_HPP_
2 #define VIENNACL_HYB_MATRIX_HPP_
37 template<
typename SCALARTYPE,
unsigned int ALIGNMENT >
44 hyb_matrix() : csr_threshold_(SCALARTYPE(0.8)), rows_(0), cols_(0) {}
55 #ifdef VIENNACL_WITH_OPENCL
58 ell_coords_.opencl_handle().context(ctx.opencl_context());
59 ell_elements_.opencl_handle().context(ctx.opencl_context());
61 csr_rows_.opencl_handle().context(ctx.opencl_context());
62 csr_cols_.opencl_handle().context(ctx.opencl_context());
63 csr_elements_.opencl_handle().context(ctx.opencl_context());
81 const handle_type &
handle()
const {
return ell_elements_; }
82 const handle_type &
handle2()
const {
return ell_coords_; }
83 const handle_type &
handle3()
const {
return csr_rows_; }
84 const handle_type &
handle4()
const {
return csr_cols_; }
85 const handle_type &
handle5()
const {
return csr_elements_; }
88 #if defined(_MSC_VER) && _MSC_VER < 1500 //Visual Studio 2005 needs special treatment
89 template <
typename CPU_MATRIX>
90 friend void copy(
const CPU_MATRIX & cpu_matrix,
hyb_matrix & gpu_matrix );
92 template <
typename CPU_MATRIX,
typename T,
unsigned int ALIGN>
97 SCALARTYPE csr_threshold_;
103 handle_type ell_coords_;
104 handle_type ell_elements_;
106 handle_type csr_rows_;
107 handle_type csr_cols_;
108 handle_type csr_elements_;
111 template <
typename CPU_MATRIX,
typename SCALARTYPE,
unsigned int ALIGNMENT>
114 assert( (gpu_matrix.size1() == 0 ||
viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1()) &&
bool(
"Size mismatch") );
115 assert( (gpu_matrix.size2() == 0 ||
viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2()) &&
bool(
"Size mismatch") );
117 if(cpu_matrix.size1() > 0 && cpu_matrix.size2() > 0)
121 std::vector<vcl_size_t> hist_entries(cpu_matrix.size1() + 1, 0);
123 for (
typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
126 for (
typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
131 hist_entries[num_entries] += 1;
132 max_entries_per_row = std::max(max_entries_per_row, num_entries);
136 for(
vcl_size_t ind = 0; ind <= max_entries_per_row; ind++)
138 sum += hist_entries[ind];
140 if(sum >= gpu_matrix.csr_threshold() * cpu_matrix.size1())
142 max_entries_per_row = ind;
148 gpu_matrix.ellnnz_ = max_entries_per_row;
149 gpu_matrix.rows_ = cpu_matrix.size1();
150 gpu_matrix.cols_ = cpu_matrix.size2();
152 vcl_size_t nnz = gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz();
156 std::vector<unsigned int> csr_cols;
158 std::vector<SCALARTYPE> ell_elements(nnz);
159 std::vector<SCALARTYPE> csr_elements;
163 for (
typename CPU_MATRIX::const_iterator1 row_it = cpu_matrix.begin1(); row_it != cpu_matrix.end1(); ++row_it)
167 csr_rows.set(row_it.index1(), csr_index);
169 for (
typename CPU_MATRIX::const_iterator2 col_it = row_it.begin(); col_it != row_it.end(); ++col_it)
171 if(data_index < max_entries_per_row)
173 ell_coords.
set(gpu_matrix.internal_size1() * data_index + col_it.index1(), col_it.index2());
174 ell_elements[gpu_matrix.internal_size1() * data_index + col_it.index1()] = *col_it;
178 csr_cols.push_back(static_cast<unsigned int>(col_it.index2()));
179 csr_elements.push_back(*col_it);
191 csr_cols.push_back(0);
192 csr_elements.push_back(0);
195 csr_rows.
set(csr_rows.size() - 1, csr_index);
197 gpu_matrix.csrnnz_ = csr_cols.
size();
201 csr_cols_for_gpu.
set(i, csr_cols[i]);
212 template <
typename CPU_MATRIX,
typename SCALARTYPE,
unsigned int ALIGNMENT>
218 if(gpu_matrix.size1() > 0 && gpu_matrix.size2() > 0)
220 std::vector<SCALARTYPE> ell_elements(gpu_matrix.internal_size1() * gpu_matrix.internal_ellnnz());
223 std::vector<SCALARTYPE> csr_elements(gpu_matrix.csr_nnz());
236 for(
vcl_size_t ind = 0; ind < gpu_matrix.internal_ellnnz(); ind++)
240 if(ell_elements[offset] == static_cast<SCALARTYPE>(0.0))
245 if(ell_coords[offset] >= gpu_matrix.size2())
247 std::cerr <<
"ViennaCL encountered invalid data " << offset <<
" " << ind <<
" " << row <<
" " << ell_coords[offset] <<
" " << gpu_matrix.size2() << std::endl;
251 cpu_matrix(row, ell_coords[offset]) = ell_elements[offset];
256 if(csr_elements[ind] == static_cast<SCALARTYPE>(0.0))
261 if(csr_cols[ind] >= gpu_matrix.size2())
263 std::cerr <<
"ViennaCL encountered invalid data " << std::endl;
267 cpu_matrix(
row, csr_cols[ind]) = csr_elements[ind];
285 template <
typename T,
unsigned int A>
286 struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
288 static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>,
const vector_base<T>, op_prod>
const & rhs)
302 template <
typename T,
unsigned int A>
303 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
305 static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>,
const vector_base<T>, op_prod>
const & rhs)
313 template <
typename T,
unsigned int A>
314 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_base<T>, op_prod> >
316 static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>,
const vector_base<T>, op_prod>
const & rhs)
326 template <
typename T,
unsigned int A,
typename LHS,
typename RHS,
typename OP>
327 struct op_executor<vector_base<T>, op_assign, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
329 static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>,
const vector_expression<const LHS, const RHS, OP>, op_prod>
const & rhs)
337 template <
typename T,
unsigned int A,
typename LHS,
typename RHS,
typename OP>
338 struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
340 static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>,
const vector_expression<const LHS, const RHS, OP>, op_prod>
const & rhs)
350 template <
typename T,
unsigned int A,
typename LHS,
typename RHS,
typename OP>
351 struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const hyb_matrix<T, A>, const vector_expression<const LHS, const RHS, OP>, op_prod> >
353 static void apply(vector_base<T> & lhs, vector_expression<
const hyb_matrix<T, A>,
const vector_expression<const LHS, const RHS, OP>, op_prod>
const & rhs)
vcl_size_t ell_nnz() const
Definition: hyb_matrix.hpp:78
scalar< typename viennacl::tools::CHECK_SCALAR_TEMPLATE_ARGUMENT< SCALARTYPE >::ResultType > value_type
Definition: hyb_matrix.hpp:42
vcl_size_t internal_size2() const
Definition: hyb_matrix.hpp:72
viennacl::backend::mem_handle handle_type
Definition: hyb_matrix.hpp:41
Helper class implementing an array on the host. Default case: No conversion necessary.
Definition: util.hpp:95
std::size_t vcl_size_t
Definition: forwards.h:58
const handle_type & handle3() const
Definition: hyb_matrix.hpp:83
vcl_size_t size() const
Definition: util.hpp:163
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:172
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
This file provides the forward declarations for the main types used within ViennaCL.
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
vcl_size_t raw_size() const
Definition: util.hpp:158
Sparse matrix class using a hybrid format composed of the ELL and CSR format for storing the nonzeros...
Definition: forwards.h:321
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
hyb_matrix()
Definition: hyb_matrix.hpp:44
void set(vcl_size_t index, U value)
Definition: util.hpp:145
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
vcl_size_t internal_size1() const
Definition: hyb_matrix.hpp:71
const handle_type & handle2() const
Definition: hyb_matrix.hpp:82
friend void copy(const CPU_MATRIX &cpu_matrix, hyb_matrix< T, ALIGN > &gpu_matrix)
Definition: forwards.h:480
vcl_size_t internal_ellnnz() const
Definition: hyb_matrix.hpp:77
const handle_type & handle4() const
Definition: hyb_matrix.hpp:84
hyb_matrix(viennacl::context ctx)
Definition: hyb_matrix.hpp:46
const handle_type & handle5() const
Definition: hyb_matrix.hpp:85
Implementations of operations using sparse matrices.
void csr_threshold(SCALARTYPE thr)
Definition: hyb_matrix.hpp:69
vcl_size_t size2() const
Definition: hyb_matrix.hpp:75
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
void * get()
Definition: util.hpp:150
viennacl::memory_types memory_type() const
Definition: context.hpp:76
const handle_type & handle() const
Definition: hyb_matrix.hpp:81
vcl_size_t csr_nnz() const
Definition: hyb_matrix.hpp:79
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
A vector class representing a linear memory sequence on the GPU. Inspired by boost::numeric::ublas::v...
Definition: forwards.h:208
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
vcl_size_t size1() const
Definition: hyb_matrix.hpp:74
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:62
void prod_impl(const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:350
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:94
viennacl::backend::mem_handle & handle(T &obj)
Returns the generic memory handle of an object. Non-const version.
Definition: handle.hpp:41
SCALARTYPE csr_threshold() const
Definition: hyb_matrix.hpp:68