1 #ifndef VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
2 #define VIENNACL_LINALG_DETAIL_SPAI_SPAI_DYNAMIC_HPP
38 #include "boost/numeric/ublas/vector.hpp"
39 #include "boost/numeric/ublas/matrix.hpp"
40 #include "boost/numeric/ublas/matrix_proxy.hpp"
41 #include "boost/numeric/ublas/vector_proxy.hpp"
42 #include "boost/numeric/ublas/storage.hpp"
43 #include "boost/numeric/ublas/io.hpp"
44 #include "boost/numeric/ublas/lu.hpp"
45 #include "boost/numeric/ublas/triangular.hpp"
46 #include "boost/numeric/ublas/matrix_expression.hpp"
79 template <
typename T1,
typename T2>
80 bool operator()(std::pair<T1, T2>
const & left, std::pair<T1, T2>
const & right)
82 return static_cast<double>(left.second) > static_cast<double>(right.second);
92 template<
typename MatrixType>
93 void composeNewR(
const MatrixType& A,
const MatrixType& R_n, MatrixType& R)
95 typedef typename MatrixType::value_type ScalarType;
96 vcl_size_t row_n = R_n.size1() - (A.size1() - R.size2());
97 MatrixType C = boost::numeric::ublas::zero_matrix<ScalarType>(R.size1() + row_n, R.size2() + A.size2());
102 R.size2() + A.size2())) +=
105 if(R_n.size1() > 0 && R_n.size2() > 0)
115 template<
typename VectorType>
118 typedef typename VectorType::value_type ScalarType;
119 VectorType w = boost::numeric::ublas::zero_vector<ScalarType>(v.size() + v_n.size());
129 template<
typename SparseVectorType,
typename ScalarType>
132 for(
typename SparseVectorType::const_iterator vec_it = v.begin(); vec_it != v.end(); ++vec_it)
133 norm += (vec_it->second)*(vec_it->second);
135 norm = std::sqrt(norm);
143 template<
typename SparseVectorType,
typename ScalarType>
144 void sparse_inner_prod(
const SparseVectorType& v1,
const SparseVectorType& v2, ScalarType& res_v)
146 typename SparseVectorType::const_iterator v_it1 = v1.begin();
147 typename SparseVectorType::const_iterator v_it2 = v2.begin();
148 while((v_it1 != v1.end())&&(v_it2 != v2.end()))
150 if(v_it1->first == v_it2->first)
152 res_v += (v_it1->second)*(v_it2->second);
156 else if(v_it1->first < v_it2->first)
170 template <
typename SparseVectorType,
typename ScalarType>
172 const SparseVectorType& res,
173 std::vector<unsigned int>& J,
174 std::vector<unsigned int>& J_u,
177 std::vector<std::pair<unsigned int, ScalarType> > p;
179 ScalarType inprod, norm2;
181 for(
typename SparseVectorType::const_iterator res_it = res.begin(); res_it != res.end(); ++res_it)
188 p.push_back(std::pair<unsigned int, ScalarType>(res_it->first, (inprod*inprod)/(norm2*norm2)));
193 while ((cur_size < J.size())&&(p.size() > 0))
195 J_u.push_back(p[0].first);
200 return (cur_size > 0);
209 template<
typename SparseVectorType>
210 void buildNewRowSet(
const std::vector<SparseVectorType>& A_v_c,
const std::vector<unsigned int>& I,
211 const std::vector<unsigned int>& J_n, std::vector<unsigned int>& I_n)
215 for(
typename SparseVectorType::const_iterator col_it = A_v_c[J_n[i]].begin(); col_it!=A_v_c[J_n[i]].end(); ++col_it)
218 I_n.push_back(col_it->first);
228 template<
typename MatrixType>
231 typedef typename MatrixType::value_type ScalarType;
232 vcl_size_t row_n1 = A_I_J_u.size1() - A_I_J.size2();
236 MatrixType C = boost::numeric::ublas::zero_matrix<ScalarType>(row_n, col_n);
257 template<
typename SparseMatrixType,
typename SparseVectorType,
typename DenseMatrixType,
typename VectorType>
258 void block_update(
const SparseMatrixType& A,
const std::vector<SparseVectorType>& A_v_c,
259 std::vector<SparseVectorType>& g_res,
260 std::vector<bool>& g_is_update,
261 std::vector<std::vector<unsigned int> >& g_I,
262 std::vector<std::vector<unsigned int> >& g_J,
263 std::vector<VectorType>& g_b_v,
264 std::vector<DenseMatrixType>& g_A_I_J,
267 typedef typename DenseMatrixType::value_type ScalarType;
269 std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
271 std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
273 std::vector<DenseMatrixType> g_A_I_J_u(g_J.size());
275 std::vector<DenseMatrixType> g_A_I_u_J_u(g_J.size());
277 std::vector<VectorType> g_b_v_u(g_J.size());
278 #ifdef VIENNACL_WITH_OPENMP
279 #pragma omp parallel for
281 for(
long i = 0; i < static_cast<long>(g_J.size()); ++i)
285 if(buildAugmentedIndexSet<SparseVectorType, ScalarType>(A_v_c, g_res[i], g_J[i], g_J_u[i], tag))
299 composeNewR(g_A_I_J_u[i], g_A_I_u_J_u[i], g_A_I_J[i]);
302 g_J[i].insert(g_J[i].end(), g_J_u[i].begin(), g_J_u[i].end());
303 g_I[i].insert(g_I[i].end(), g_I_u[i].begin(), g_I_u[i].end());
307 g_is_update[i] =
false;
325 template<
typename ScalarType>
327 const std::vector<std::vector<unsigned int> >& g_I,
331 std::vector<cl_uint>& g_is_update,
335 unsigned int local_r_n = 0;
336 unsigned int local_c_n = 0;
337 unsigned int sz_blocks = 0;
341 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
342 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
347 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
357 static_cast<cl_uint
>(g_I.size())));
366 template <
typename SizeType>
367 void assemble_qr_row_inds(
const std::vector<std::vector<SizeType> >& g_I,
const std::vector<std::vector<SizeType> > g_J,
368 const std::vector<std::vector<SizeType> >& g_I_u,
369 std::vector<std::vector<SizeType> >& g_I_q)
371 #ifdef VIENNACL_WITH_OPENMP
372 #pragma omp parallel for
374 for(
long i = 0; i < static_cast<long>(g_I.size()); ++i)
377 g_I_q[i].push_back(g_I[i][j]);
379 for(
vcl_size_t j = 0; j < g_I_u[i].size(); ++j)
380 g_I_q[i].push_back(g_I_u[i][j]);
398 template<
typename ScalarType>
400 const std::vector<std::vector<unsigned int> >& g_J,
401 const std::vector<std::vector<unsigned int> >& g_I,
402 const std::vector<std::vector<unsigned int> >& g_J_u,
403 const std::vector<std::vector<unsigned int> >& g_I_u,
404 std::vector<std::vector<unsigned int> >& g_I_q,
408 std::vector<cl_uint>& g_is_update,
409 const bool is_empty_block,
416 unsigned int sz_blocks;
417 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
418 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
420 std::vector<ScalarType> con_A_I_J_q(sz_blocks, static_cast<ScalarType>(0));
425 static_cast<unsigned int>(
sizeof(ScalarType)*sz_blocks),
430 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
435 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size() + 1)),
440 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
460 static_cast<unsigned int>(g_I.size())));
472 static_cast<unsigned int>(g_I.size())));
490 template<
typename ScalarType>
491 void assemble_r(std::vector<std::vector<unsigned int> >& g_I, std::vector<std::vector<unsigned int> >& g_J,
497 std::vector<cl_uint>& g_is_update,
501 std::vector<cl_uint> matrix_dims(g_I.size()*2,
static_cast<cl_uint
>(0));
502 std::vector<cl_uint> blocks_ind(g_I.size() + 1,
static_cast<cl_uint
>(0));
503 std::vector<cl_uint> start_bv_r_inds(g_I.size() + 1, 0);
504 unsigned int sz_blocks, bv_size;
508 std::vector<ScalarType> con_A_I_J_r(sz_blocks, static_cast<ScalarType>(0));
509 std::vector<ScalarType> b_v_r(bv_size, static_cast<ScalarType>(0));
514 static_cast<unsigned int>(
sizeof(ScalarType)*sz_blocks),
519 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size())),
524 static_cast<unsigned int>(
sizeof(cl_uint)*2*static_cast<unsigned int>(g_I.size() + 1)),
529 static_cast<unsigned int>(
sizeof(ScalarType)*bv_size),
534 static_cast<unsigned int>(
sizeof(cl_uint)*(g_I.size() + 1)),
535 &(start_bv_r_inds[0]));
539 static_cast<unsigned int>(
sizeof(cl_uint)*(g_is_update.size())),
550 g_is_update_vcl,
static_cast<cl_uint
>(g_I.size())));
554 bv_assembly_kernel.global_work_size(0, 256);
558 static_cast<cl_uint
>(g_I.size())));
578 template<
typename ScalarType,
unsigned int MAT_ALIGNMENT,
typename SparseVectorType>
580 std::vector<cl_uint>& g_is_update,
581 std::vector<SparseVectorType>& g_res,
582 std::vector<std::vector<unsigned int> >& g_J,
583 std::vector<std::vector<unsigned int> >& g_I,
590 std::vector<std::vector<unsigned int> > g_J_u(g_J.size());
592 std::vector<std::vector<unsigned int> > g_I_u(g_J.size());
594 std::vector<std::vector<unsigned int> > g_I_q(g_J.size());
602 #ifdef VIENNACL_WITH_OPENMP
603 #pragma omp parallel for
605 for(
long i = 0; i < static_cast<long>(g_J.size()); ++i)
609 if(buildAugmentedIndexSet<SparseVectorType, ScalarType>(A_v_c, g_res[i], g_J[i], g_J_u[i], tag))
614 block_assembly(A, g_J_u, g_I, g_A_I_J_u_vcl, g_is_update, is_empty_block);
616 block_q_multiplication<ScalarType>(g_J_u, g_I, g_A_I_J_vcl, g_bv_vcl, g_A_I_J_u_vcl, g_is_update, ctx);
618 block_assembly(A, g_J_u, g_I_u, g_A_I_u_J_u_vcl, g_is_update, is_empty_block);
619 assemble_qr_block<ScalarType>(g_J, g_I, g_J_u, g_I_u, g_I_q, g_A_I_J_u_vcl, g_A_I_J_vcl.handle1(),
620 g_A_I_u_J_u_vcl, g_is_update, is_empty_block, ctx);
622 block_qr<ScalarType>(g_I_q, g_J_u, g_A_I_u_J_u_vcl, g_bv_u_vcl, g_is_update, ctx);
624 #ifdef VIENNACL_WITH_OPENMP
625 #pragma omp parallel for
627 for(
long i = 0; i < static_cast<long>(g_J.size()); ++i)
629 g_J[i].insert(g_J[i].end(), g_J_u[i].begin(), g_J_u[i].end());
630 g_I[i].insert(g_I[i].end(), g_I_u[i].begin(), g_I_u[i].end());
632 assemble_r<ScalarType>(g_I, g_J, g_A_I_J_vcl, g_A_I_J_u_vcl, g_A_I_u_J_u_vcl, g_bv_vcl, g_bv_u_vcl, g_is_update, ctx);
viennacl::ocl::kernel & get_kernel(std::string const &program_name, std::string const &kernel_name)
Convenience function for retrieving the kernel of a program directly from the context.
Definition: context.hpp:470
Main kernel class for generating OpenCL kernels for the sparse approximate inverse preconditioners...
Definition: spai.hpp:570
void apply_q_trans_mat(const MatrixType &R, const VectorType &b_v, MatrixType &A)
Multiplication of Q'*A, where Q is in implicit for lower part of R and vector of betas - b_v...
Definition: qr.hpp:353
viennacl::ocl::handle< cl_mem > & handle1()
Return handle to start indices.
Definition: block_vector.hpp:60
std::size_t vcl_size_t
Definition: forwards.h:58
viennacl::ocl::context const & context() const
Definition: handle.hpp:191
Represents a contigious matrices on GPU.
Definition: block_matrix.hpp:49
void sparse_norm_2(const SparseVectorType &v, ScalarType &norm)
Computation of Euclidean norm for sparse vector.
Definition: spai-dynamic.hpp:130
Implementations of dense matrix related operations including matrix-vector products.
Helper functor for comparing std::pair<> based on the second member.
Definition: spai-dynamic.hpp:77
viennacl::ocl::handle< cl_mem > & handle()
Returns a handle to the elements.
Definition: block_matrix.hpp:56
Generic interface for matrix-vector and matrix-matrix products. See viennacl/linalg/vector_operations...
Represents an OpenCL kernel within ViennaCL.
Definition: kernel.hpp:59
Implementation of the dense matrix class.
OpenCL kernel file for sparse approximate inverse operations.
matrix_range< MatrixType > project(MatrixType &A, viennacl::range const &r1, viennacl::range const &r2)
Definition: matrix_proxy.hpp:251
void block_update(const SparseMatrixType &A, const std::vector< SparseVectorType > &A_v_c, std::vector< SparseVectorType > &g_res, std::vector< bool > &g_is_update, std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, std::vector< VectorType > &g_b_v, std::vector< DenseMatrixType > &g_A_I_J, spai_tag const &tag)
CPU-based dynamic update for SPAI preconditioner.
Definition: spai-dynamic.hpp:258
bool isInIndexSet(const std::vector< SizeType > &J, SizeType ind)
Determines if element ind is in set {J}.
Definition: spai-static.hpp:71
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
viennacl::ocl::handle< cl_mem > & handle2()
Returns a handle to the start indices of matrix.
Definition: block_matrix.hpp:64
viennacl::ocl::handle< cl_mem > & handle()
Return handle to the elements.
Definition: block_vector.hpp:56
void composeNewVector(const VectorType &v_n, VectorType &v)
Composition of new vector of coefficients beta from QR factorizations(necessary for Q recovery) ...
Definition: spai-dynamic.hpp:116
void assemble_qr_block(const std::vector< std::vector< unsigned int > > &g_J, const std::vector< std::vector< unsigned int > > &g_I, const std::vector< std::vector< unsigned int > > &g_J_u, const std::vector< std::vector< unsigned int > > &g_I_u, std::vector< std::vector< unsigned int > > &g_I_q, block_matrix &g_A_I_J_u_vcl, viennacl::ocl::handle< cl_mem > &matrix_dimensions, block_matrix &g_A_I_u_J_u_vcl, std::vector< cl_uint > &g_is_update, const bool is_empty_block, viennacl::context ctx)
Performs assembly for new QR block.
Definition: spai-dynamic.hpp:399
Main implementation of SPAI (not FSPAI). Experimental.
A tag for SPAI Contains values for the algorithm. Must be passed to spai_precond constructor.
Definition: spai_tag.hpp:63
Generic interface for the computation of inner products. See viennacl/linalg/vector_operations.hpp for implementations.
void get_max_block_size(const std::vector< std::vector< SizeType > > &inds, SizeType &max_size)
Getting max size of rows/columns from container of index set.
Definition: qr.hpp:295
bool buildAugmentedIndexSet(const std::vector< SparseVectorType > &A_v_c, const SparseVectorType &res, std::vector< unsigned int > &J, std::vector< unsigned int > &J_u, const spai_tag &tag)
Building a new set of column indices J_u, cf. Kallischko dissertation p.31.
Definition: spai-dynamic.hpp:171
void composeNewR(const MatrixType &A, const MatrixType &R_n, MatrixType &R)
Composition of new matrix R, that is going to be used in Least Square problem solving.
Definition: spai-dynamic.hpp:93
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
void buildNewRowSet(const std::vector< SparseVectorType > &A_v_c, const std::vector< unsigned int > &I, const std::vector< unsigned int > &J_n, std::vector< unsigned int > &I_n)
Building a new indices to current set of row indices I_n, cf. Kallischko dissertation p...
Definition: spai-dynamic.hpp:210
Implementation of a bunch of (small) matrices on GPU. Experimental.
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
void enqueue(KernelType &k, viennacl::ocl::command_queue const &queue)
Enqueues a kernel in the provided queue.
Definition: enqueue.hpp:48
A class representing local (shared) OpenCL memory. Typically used as kernel argument.
Definition: local_mem.hpp:33
viennacl::ocl::handle< cl_mem > create_memory(cl_mem_flags flags, unsigned int size, void *ptr=NULL) const
Creates a memory buffer within the context.
Definition: context.hpp:199
void initProjectSubMatrix(const SparseMatrixType &A_in, const std::vector< unsigned int > &J, std::vector< unsigned int > &I, DenseMatrixType &A_out)
Initializes a dense matrix from a sparse one.
Definition: spai.hpp:143
Implementation of a simultaneous QR factorization of multiple matrices. Experimental.
Implementations of incomplete factorization preconditioners. Convenience header file.
Implementation of a static SPAI. Experimental.
void assemble_r(std::vector< std::vector< unsigned int > > &g_I, std::vector< std::vector< unsigned int > > &g_J, block_matrix &g_A_I_J_vcl, block_matrix &g_A_I_J_u_vcl, block_matrix &g_A_I_u_J_u_vcl, block_vector &g_bv_vcl, block_vector &g_bv_vcl_u, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Performs assembly for new R matrix on GPU.
Definition: spai-dynamic.hpp:491
Implementation of the compressed_matrix class.
Implementation of a bunch of vectors on GPU. Experimental.
Implementations of operations using sparse matrices.
void block_assembly(const viennacl::compressed_matrix< ScalarType, MAT_ALIGNMENT > &A, const std::vector< std::vector< unsigned int > > &g_J, const std::vector< std::vector< unsigned int > > &g_I, block_matrix &g_A_I_J_vcl, std::vector< cl_uint > &g_is_update, bool &is_empty_block)
Assembly of blocks on GPU by a gived set of row indices: g_I and column indices: g_J.
Definition: spai.hpp:468
void get_size(const std::vector< std::vector< SizeType > > &inds, SizeType &size)
Computes size of particular container of index set.
Definition: qr.hpp:129
Implementation of the spai tag holding SPAI configuration parameters. Experimental.
void init_start_inds(const std::vector< std::vector< SizeType > > &inds, std::vector< cl_uint > &start_inds)
Initializes start indices of particular index set.
Definition: qr.hpp:141
The conjugate gradient method is implemented here.
double getResidualThreshold() const
Definition: spai_tag.hpp:88
void compute_blocks_size(const std::vector< std::vector< unsigned int > > &g_I, const std::vector< std::vector< unsigned int > > &g_J, unsigned int &sz, std::vector< cl_uint > &blocks_ind, std::vector< cl_uint > &matrix_dims)
**************************************** BLOCK FUNCTIONS ************************************// ...
Definition: qr.hpp:112
Implementations of the OpenCL backend, where all contexts are stored in.
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
Represents a contigious vector on GPU.
Definition: block_vector.hpp:49
void sparse_inner_prod(const SparseVectorType &v1, const SparseVectorType &v2, ScalarType &res_v)
Dot product of two sparse vectors.
Definition: spai-dynamic.hpp:144
void QRBlockComposition(const MatrixType &A_I_J, const MatrixType &A_I_J_u, MatrixType &A_I_u_J_u)
Composition of new block for QR factorization cf. Kallischko dissertation p.82, figure 4...
Definition: spai-dynamic.hpp:229
void single_qr(MatrixType &R, VectorType &b_v)
Inplace QR factorization via Householder reflections c.f. Gene H. Golub, Charles F. Van Loan "Matrix Computations" 3rd edition p.224.
Definition: qr.hpp:257
basic_range range
Definition: forwards.h:339
size_type global_work_size(int index=0) const
Returns the global work size at the respective dimension.
Definition: kernel.hpp:759
void block_q_multiplication(const std::vector< std::vector< unsigned int > > &g_J_u, const std::vector< std::vector< unsigned int > > &g_I, block_matrix &g_A_I_J_vcl, block_vector &g_bv_vcl, block_matrix &g_A_I_J_u_vcl, std::vector< cl_uint > &g_is_update, viennacl::context ctx)
Performs multiplication Q'*A(I, \tilde J) on GPU.
Definition: spai-dynamic.hpp:326
static void init(viennacl::ocl::context &ctx)
Definition: spai.hpp:577
viennacl::ocl::handle< cl_mem > & handle1()
Returns a handle to the matrix dimensions.
Definition: block_matrix.hpp:60
size_type local_work_size(int index=0) const
Returns the local work size at the respective dimension.
Definition: kernel.hpp:750
Implementation of the ViennaCL scalar class.
bool operator()(std::pair< T1, T2 > const &left, std::pair< T1, T2 > const &right)
Definition: spai-dynamic.hpp:80
void assemble_qr_row_inds(const std::vector< std::vector< SizeType > > &g_I, const std::vector< std::vector< SizeType > > g_J, const std::vector< std::vector< SizeType > > &g_I_u, std::vector< std::vector< SizeType > > &g_I_q)
Assembly of container of index row sets: I_q, row indices for new "QR block".
Definition: spai-dynamic.hpp:367