ViennaCL - The Vienna Computing Library  1.5.2
matrix.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_MATRIX_HPP_
2 #define VIENNACL_MATRIX_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/scalar.hpp"
27 #include "viennacl/vector.hpp"
30 #include "viennacl/tools/tools.hpp"
34 //#include "viennacl/rand/utils.hpp"
36 
37 namespace viennacl
38 {
43  template<typename SCALARTYPE>
44  class implicit_matrix_base
45  {
46  protected:
48  implicit_matrix_base(size_type size1, size_type size2, std::pair<SCALARTYPE, bool> value, bool diag) : size1_(size1), size2_(size2), value_(value), diag_(diag){ }
49  public:
50  typedef SCALARTYPE const & const_reference;
51  typedef SCALARTYPE cpu_value_type;
52 
53  size_type size1() const { return size1_; }
54  size_type size2() const { return size2_; }
55 
56  SCALARTYPE value() const { return value_.first; }
57  bool is_value_static( ) const { return value_.second; }
58  bool diag() const { return diag_; }
59 
60  const_reference operator()(size_type i, size_type j) const {
61  if(diag_) return (i == j) ? value_.first : 0;
62  return value_.first;
63  }
64 
65  protected:
66  size_type size1_;
67  size_type size2_;
68  std::pair<SCALARTYPE, bool> value_;
69  bool diag_;
70  };
71 
72  //
73  // Initializer types
74  //
76  template <typename SCALARTYPE>
77  class identity_matrix
78  {
79  public:
81  typedef SCALARTYPE const & const_reference;
82 
83  identity_matrix(size_type s, viennacl::context ctx = viennacl::context()) : size_(s), diag_(1), off_diag_(0), ctx_(ctx) {}
84 
85  size_type size1() const { return size_; }
86  size_type size2() const { return size_; }
87  const_reference operator()(size_type i, size_type j) const { return (i == j) ? diag_ : off_diag_; }
88 
89  viennacl::context context() const { return ctx_; }
90 
91  private:
92  size_type size_;
93  SCALARTYPE diag_;
94  SCALARTYPE off_diag_;
95  viennacl::context ctx_;
96  };
97 
98 
100  template <typename SCALARTYPE>
101  class zero_matrix
102  {
103  public:
105  typedef SCALARTYPE const & const_reference;
106 
107  zero_matrix(size_type s1, size_type s2, viennacl::context ctx = viennacl::context()) : size1_(s1), size2_(s2), val_(0), ctx_(ctx) {}
108 
109  size_type size1() const { return size1_; }
110  size_type size2() const { return size2_; }
111  const_reference operator()(size_type /*i*/, size_type /*j*/) const { return val_; }
112 
113  viennacl::context context() const { return ctx_; }
114 
115  private:
116  size_type size1_;
117  size_type size2_;
118  SCALARTYPE val_;
119  viennacl::context ctx_;
120  };
121 
122 
124  template <typename SCALARTYPE>
125  class scalar_matrix
126  {
127  public:
129  typedef SCALARTYPE const & const_reference;
130 
131  scalar_matrix(size_type s1, size_type s2, const_reference val, viennacl::context ctx = viennacl::context()) : size1_(s1), size2_(s2), value_(val), ctx_(ctx) {}
132 
133  size_type size1() const { return size1_; }
134  size_type size2() const { return size2_; }
135  const_reference operator()(size_type /*i*/, size_type /*j*/) const { return value_; }
136 
137  viennacl::context context() const { return ctx_; }
138 
139  private:
140  size_type size1_;
141  size_type size2_;
142  SCALARTYPE value_;
143  viennacl::context ctx_;
144  };
145 
146 
147 
148 //#ifdef VIENNACL_WITH_OPENCL
149 // template<class SCALARTYPE, class DISTRIBUTION>
150 // rand::random_matrix_t<SCALARTYPE, DISTRIBUTION> random_matrix(unsigned int size1, unsigned int size2, DISTRIBUTION const & distribution){
151 // return rand::random_matrix_t<SCALARTYPE,DISTRIBUTION>(size1,size2,distribution);
152 // }
153 //#endif
154 
161  template <typename LHS, typename RHS, typename OP>
162  class matrix_expression
163  {
164  typedef typename viennacl::result_of::reference_if_nonscalar<LHS>::type lhs_reference_type;
165  typedef typename viennacl::result_of::reference_if_nonscalar<RHS>::type rhs_reference_type;
166 
167  public:
169 
170  matrix_expression(LHS & lhs, RHS & rhs) : lhs_(lhs), rhs_(rhs) {}
171 
174  LHS & lhs() const { return lhs_; }
177  RHS & rhs() const { return rhs_; }
178 
182 
183  private:
185  lhs_reference_type lhs_;
187  rhs_reference_type rhs_;
188  };
189 
190 
192  struct row_iteration {};
193 
195  struct col_iteration {};
196 
197  //STL-like iterator. TODO: STL-compliance...
199  template <typename ROWCOL, typename MATRIXTYPE>
201  {
203  public:
204  typedef typename MATRIXTYPE::value_type value_type;
205 
206  matrix_iterator(MATRIXTYPE & mat,
207  vcl_size_t start_row,
208  vcl_size_t start_col) : mat_(mat), row_(start_row), col_(start_col) {}
209 
210  value_type operator*(void) { return mat_(row_, col_); }
211  self_type & operator++(void) { viennacl::tools::MATRIX_ITERATOR_INCREMENTER<ROWCOL, MATRIXTYPE>::apply(mat_, row_, col_); return *this; }
212  self_type operator++(int) { self_type tmp = *this; ++(*this); return tmp; }
213 
214  bool operator==(self_type const & other) { return (row_ == other.row_) && (col_ == other.col_); }
215  bool operator!=(self_type const & other) { return !(*this == other); }
216 
217  vcl_size_t index1() { return row_; }
218  vcl_size_t index2() { return col_; }
219 
220  MATRIXTYPE & operator()(void) const { return mat_; }
221 
222  private:
223  MATRIXTYPE & mat_;
224  vcl_size_t row_;
225  vcl_size_t col_;
226  };
227 
228 
235  template <class SCALARTYPE, typename F, typename SizeType /* see forwards.h for default type */, typename DistanceType /* see forwards.h for default type */>
236  class matrix_base
237  {
238  typedef matrix_base<SCALARTYPE, F, SizeType, DistanceType> self_type;
239  public:
240 
244  typedef SCALARTYPE cpu_value_type;
245  typedef SizeType size_type;
246  typedef DistanceType difference_type;
249  typedef typename F::orientation_category orientation_category;
250 
251  static const size_type alignment = 128;
252 
253 
255  explicit matrix_base() : size1_(0), size2_(0), start1_(0), start2_(0), stride1_(1), stride2_(1), internal_size1_(0), internal_size2_(0) {}
256 
263  explicit matrix_base(size_type rows, size_type columns, viennacl::context ctx = viennacl::context())
264  : size1_(rows), size2_(columns), start1_(0), start2_(0), stride1_(1), stride2_(1),
265  internal_size1_(viennacl::tools::align_to_multiple<size_type>(rows, alignment)),
266  internal_size2_(viennacl::tools::align_to_multiple<size_type>(columns, alignment))
267  {
268  if (rows > 0 && columns > 0)
269  {
270  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), ctx);
271  clear();
272  }
273  }
274 
275 
278  size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1,
279  size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2)
280  : size1_(mat_size1), size2_(mat_size2),
281  start1_(mat_start1), start2_(mat_start2),
282  stride1_(mat_stride1), stride2_(mat_stride2),
283  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2),
284  elements_(h) {}
285 
286  template <typename LHS, typename RHS, typename OP>
288  size1_(viennacl::traits::size1(proxy)), size2_(viennacl::traits::size2(proxy)), start1_(0), start2_(0), stride1_(1), stride2_(1),
289  internal_size1_(viennacl::tools::align_to_multiple<size_type>(size1_, alignment)),
290  internal_size2_(viennacl::tools::align_to_multiple<size_type>(size2_, alignment))
291  {
293  if (internal_size() > 0)
294  {
295  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy));
296  clear();
297  self_type::operator=(proxy);
298  }
299  }
300 
301  // CUDA or host memory:
302  explicit matrix_base(SCALARTYPE * ptr_to_mem, viennacl::memory_types mem_type,
303  size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1,
304  size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2)
305  : size1_(mat_size1), size2_(mat_size2),
306  start1_(mat_start1), start2_(mat_start2),
307  stride1_(mat_stride1), stride2_(mat_stride2),
308  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2)
309  {
310  if (mem_type == viennacl::CUDA_MEMORY)
311  {
312 #ifdef VIENNACL_WITH_CUDA
314  elements_.cuda_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
315  elements_.cuda_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
316 #else
318 #endif
319  }
320  else if (mem_type == viennacl::MAIN_MEMORY)
321  {
323  elements_.ram_handle().reset(reinterpret_cast<char*>(ptr_to_mem));
324  elements_.ram_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
325  }
326 
327  elements_.raw_size(sizeof(SCALARTYPE) * internal_size());
328  }
329 
330 #ifdef VIENNACL_WITH_OPENCL
331  explicit matrix_base(cl_mem mem, size_type rows, size_type columns, viennacl::context ctx = viennacl::context())
332  : size1_(rows), size2_(columns),
333  start1_(0), start2_(0),
334  stride1_(1), stride2_(1),
335  internal_size1_(rows), internal_size2_(columns)
336  {
338  elements_.opencl_handle() = mem;
339  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
340  elements_.opencl_handle().context(ctx.opencl_context());
341  elements_.raw_size(sizeof(SCALARTYPE)*internal_size());
342  }
343 
344  explicit matrix_base(cl_mem mem, viennacl::context ctx,
345  size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1,
346  size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2)
347  : size1_(mat_size1), size2_(mat_size2),
348  start1_(mat_start1), start2_(mat_start2),
349  stride1_(mat_stride1), stride2_(mat_stride2),
350  internal_size1_(mat_internal_size1), internal_size2_(mat_internal_size2)
351  {
353  elements_.opencl_handle() = mem;
354  elements_.opencl_handle().inc(); //prevents that the user-provided memory is deleted once the vector object is destroyed.
355  elements_.opencl_handle().context(ctx.opencl_context());
356  elements_.raw_size(sizeof(SCALARTYPE)*internal_size());
357  }
358 #endif
359 
360 
361  self_type & operator=(const self_type & other) //enables implicit conversions
362  {
363  if (internal_size() == 0)
364  {
365  if (other.internal_size() == 0)
366  return *this;
367  resize(other.size1(), other.size2(), false);
368  }
369 
370  viennacl::linalg::am(*this,
371  other, cpu_value_type(1.0), 1, false, false);
372  return *this;
373  }
374 
376  /*template<class DISTRIBUTION>
377  matrix(rand::random_matrix_t<SCALARTYPE, DISTRIBUTION> const & m) : rows_(m.size1), columns_(m.size2)
378  {
379  if (internal_size() > 0)
380  {
381  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size());
382  rand::buffer_dumper<SCALARTYPE, DISTRIBUTION>::dump(elements_,m.distribution,0,internal_size());
383  }
384  }*/
385 
386 
387 
392  template <typename LHS, typename RHS, typename OP>
394  {
395  assert( (viennacl::traits::size1(proxy) == size1() || size1() == 0)
396  && (viennacl::traits::size2(proxy) == size2() || size2() == 0)
397  && bool("Incompatible matrix sizes!"));
398 
399  if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0)
400  {
401  size1_ = viennacl::traits::size1(proxy);
402  size2_ = viennacl::traits::size2(proxy);
403  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
404  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
405  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy));
406  if (size1_ != internal_size1_ || size2_ != internal_size2_)
407  clear();
408  }
409 
410  if (internal_size() > 0)
412 
413  return *this;
414  }
415 
416 
417  // A = trans(B). Currently achieved in CPU memory
418  self_type & operator=(const matrix_expression< const self_type,
419  const self_type,
420  op_trans> & proxy)
421  {
422  assert( (handle() != proxy.lhs().handle()) && bool("Self-assignment of matrix transpose not implemented"));
423  assert( ( (proxy.lhs().size1() == size2()) || (size2() == 0) ) && bool("Matrix dimensions do not match!"));
424  assert( ( (proxy.lhs().size2() == size1()) || (size1() == 0) ) && bool("Matrix dimensions do not match!"));
425 
426  if (internal_size() == 0 && viennacl::traits::size1(proxy) > 0 && viennacl::traits::size2(proxy) > 0)
427  {
428  size1_ = viennacl::traits::size1(proxy);
429  size2_ = viennacl::traits::size2(proxy);
430  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
431  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
432  }
433 
434  std::vector<SCALARTYPE> temp(proxy.lhs().internal_size());
435 
436  viennacl::backend::memory_read(proxy.lhs().handle(), 0, sizeof(SCALARTYPE)*proxy.lhs().internal_size(), &(temp[0]));
437 
438  // now transpose it
439  std::vector<SCALARTYPE> temp_trans(internal_size());
440 
441  for (vcl_size_t i=0; i<proxy.lhs().size1(); ++i)
442  for (vcl_size_t j=0; j<proxy.lhs().size2(); ++j)
443  temp_trans[F::mem_index(start2() + stride2() * j,
444  start1() + stride1() * i,
446  = temp[F::mem_index(proxy.lhs().start1() + proxy.lhs().stride1() * i,
447  proxy.lhs().start2() + proxy.lhs().stride2() * j,
448  proxy.lhs().internal_size1(), proxy.lhs().internal_size2())];
449 
450  // write back
451  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(proxy), &(temp_trans[0]));
452 
453  return *this;
454  }
455 
456  template <typename LHS, typename RHS, typename OP>
458  {
459  assert( (viennacl::traits::size1(proxy) == size1())
460  && (viennacl::traits::size2(proxy) == size2())
461  && bool("Incompatible matrix sizes!"));
462  assert( (size1() > 0) && bool("Vector not yet initialized!") );
463  assert( (size2() > 0) && bool("Vector not yet initialized!") );
464 
466 
467  return *this;
468  }
469 
470  template <typename LHS, typename RHS, typename OP>
472  {
473  assert( (viennacl::traits::size1(proxy) == size1())
474  && (viennacl::traits::size2(proxy) == size2())
475  && bool("Incompatible matrix sizes!"));
476  assert( (size1() > 0) && bool("Vector not yet initialized!") );
477  assert( (size2() > 0) && bool("Vector not yet initialized!") );
478 
480 
481  return *this;
482  }
483 
486  {
487  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
488  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
489 
490  if (internal_size() == 0)
491  {
492  size1_ = m.size1();
493  size2_ = m.size2();
494  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
495  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
496  if (internal_size() > 0)
497  {
498  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context());
499  clear();
500  }
501  }
502  else
503  viennacl::linalg::matrix_assign(*this, SCALARTYPE(0));
504 
505  if (internal_size() > 0)
507 
508  return *this;
509  }
510 
512  self_type & operator = (zero_matrix<SCALARTYPE> const & m)
513  {
514  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
515  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
516 
517  if (internal_size() == 0)
518  {
519  size1_ = m.size1();
520  size2_ = m.size2();
521  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
522  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
523  if (internal_size() > 0)
524  {
525  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context());
526  clear();
527  }
528  }
529  else
530  viennacl::linalg::matrix_assign(*this, SCALARTYPE(0));
531 
532  return *this;
533  }
534 
536  self_type & operator = (scalar_matrix<SCALARTYPE> const & m)
537  {
538  assert( (m.size1() == size1_ || size1_ == 0) && bool("Size mismatch!") );
539  assert( (m.size2() == size2_ || size2_ == 0) && bool("Size mismatch!") );
540 
541  if (internal_size() == 0)
542  {
543  size1_ = m.size1();
544  size2_ = m.size2();
545  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
546  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
547  if (internal_size() > 0)
548  {
549  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), m.context());
550  clear();
551  }
552  }
553 
554  if (internal_size() > 0)
555  {
556  viennacl::linalg::matrix_assign(*this, m(0,0));
557  }
558 
559  return *this;
560  }
561 
562 
563  //read-write access to an element of the matrix/matrix_range/matrix_slice
566  entry_proxy<SCALARTYPE> operator()(size_type row_index, size_type col_index)
567  {
568  return entry_proxy<SCALARTYPE>(F::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
569  }
570 
573  const_entry_proxy<SCALARTYPE> operator()(size_type row_index, size_type col_index) const
574  {
575  return const_entry_proxy<SCALARTYPE>(F::mem_index(start1_ + stride1_ * row_index, start2_ + stride2_ * col_index, internal_size1(), internal_size2()), elements_);
576  }
577 
578  //
579  // Operator overloads for enabling implicit conversions:
580  //
581  self_type & operator += (const self_type & other)
582  {
584  *this, SCALARTYPE(1.0), 1, false, false,
585  other, SCALARTYPE(1.0), 1, false, false);
586  return *this;
587  }
588 
589  self_type & operator -= (const self_type & other)
590  {
592  *this, SCALARTYPE(1.0), 1, false, false,
593  other, SCALARTYPE(1.0), 1, false, true);
594  return *this;
595  }
596 
599  self_type & operator *= (SCALARTYPE val)
600  {
601  //viennacl::linalg::inplace_mult(*this, val);
602  viennacl::linalg::am(*this,
603  *this, val, 1, false, false);
604  return *this;
605  }
606 
609  self_type & operator /= (SCALARTYPE val)
610  {
611  //viennacl::linalg::inplace_mult(*this, static_cast<SCALARTYPE>(1) / val);
612  viennacl::linalg::am(*this,
613  *this, val, 1, true, false);
614  return *this;
615  }
616 
617 
620  {
622  }
623 
625  size_type size1() const { return size1_;}
627  size_type size2() const { return size2_; }
628 
630  size_type start1() const { return start1_;}
632  size_type start2() const { return start2_; }
633 
635  size_type stride1() const { return stride1_;}
637  size_type stride2() const { return stride2_; }
638 
640  void clear()
641  {
642  viennacl::linalg::matrix_assign(*this, SCALARTYPE(0), true);
643  }
644 
645 
647  size_type internal_size1() const { return internal_size1_; }
649  size_type internal_size2() const { return internal_size2_; }
651  size_type internal_size() const { return internal_size1() * internal_size2(); }
652 
654  handle_type & handle() { return elements_; }
656  const handle_type & handle() const { return elements_; }
657 
658 
660  {
661  return elements_.get_active_handle_id();
662  }
663 
664  protected:
665 
667  {
668  elements_ = h;
669  }
670 
672  {
673  viennacl::backend::switch_memory_context<SCALARTYPE>(elements_, new_ctx);
674  }
675 
676 
684  void resize(size_type rows, size_type columns, bool preserve = true)
685  {
686  assert( (rows > 0 && columns > 0) && bool("Check failed in matrix::resize(): Number of rows and columns must be positive!"));
687 
688  if (preserve && internal_size() > 0)
689  {
690  //get old entries:
691  std::vector< SCALARTYPE > old_entries(internal_size());
692  viennacl::backend::memory_read(elements_, 0, sizeof(SCALARTYPE)*internal_size(), &(old_entries[0]));
693 
694  //set up entries of new matrix:
695  std::vector< SCALARTYPE > new_entries( viennacl::tools::align_to_multiple<vcl_size_t>(rows, alignment)
696  * viennacl::tools::align_to_multiple<vcl_size_t>(columns, alignment));
697  for (size_type i=0; i<rows; ++i)
698  {
699  if (i >= size1_)
700  continue;
701 
702  for (size_type j=0; j<columns; ++j)
703  {
704  if (j >= size2_)
705  continue;
706  new_entries[F::mem_index(i, j, viennacl::tools::align_to_multiple<vcl_size_t>(rows, alignment), viennacl::tools::align_to_multiple<vcl_size_t>(columns, alignment))]
707  = old_entries[F::mem_index(i, j, internal_size1(), internal_size2())];
708  }
709  }
710 
711  //copy new entries to GPU:
712  size1_ = rows;
713  size2_ = columns;
714  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
715  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
716  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*new_entries.size(), viennacl::traits::context(elements_), &(new_entries[0]));
717  }
718  else //discard old entries:
719  {
720  size1_ = rows;
721  size2_ = columns;
722  internal_size1_ = viennacl::tools::align_to_multiple<size_type>(size1_, alignment);
723  internal_size2_ = viennacl::tools::align_to_multiple<size_type>(size2_, alignment);
724 
725  viennacl::backend::memory_create(elements_, sizeof(SCALARTYPE)*internal_size(), viennacl::traits::context(elements_));
726  clear();
727  }
728  }
729 
730  private:
731  size_type size1_;
732  size_type size2_;
733  size_type start1_;
734  size_type start2_;
735  difference_type stride1_;
736  difference_type stride2_;
737  size_type internal_size1_;
738  size_type internal_size2_;
739  handle_type elements_;
740  }; //matrix
741 
742 
743 
750  template <class SCALARTYPE, typename F, unsigned int ALIGNMENT>
751  class matrix : public matrix_base<SCALARTYPE, F>
752  {
753  typedef matrix<SCALARTYPE, F, ALIGNMENT> self_type;
754  typedef matrix_base<SCALARTYPE, F> base_type;
755  public:
756  typedef typename base_type::size_type size_type;
757 
759  explicit matrix() : base_type() {}
760 
767  explicit matrix(size_type rows, size_type columns, viennacl::context ctx = viennacl::context()) : base_type(rows, columns, ctx) {}
768 
769 #ifdef VIENNACL_WITH_OPENCL
770  explicit matrix(cl_mem mem, size_type rows, size_type columns) : base_type(mem, rows, columns) {}
771 #endif
772 
773  template <typename LHS, typename RHS, typename OP>
774  matrix(matrix_expression< LHS, RHS, OP> const & proxy) : base_type(proxy) {}
775 
777  matrix(identity_matrix<SCALARTYPE> const & m) : base_type(m.size1(), m.size2(), m.context())
778  {
779  if (base_type::internal_size() > 0)
781  }
782 
784  matrix(zero_matrix<SCALARTYPE> const & m) : base_type(m.size1(), m.size2(), m.context())
785  {
786  if (base_type::internal_size() > 0)
788  }
789 
791  matrix(scalar_matrix<SCALARTYPE> const & m) : base_type(m.size1(), m.size2(), m.context())
792  {
793  if (base_type::internal_size() > 0)
795  }
796 
797  matrix(const base_type & other) : base_type(other.size1(), other.size2(), viennacl::traits::context(other))
798  {
799  base_type::operator=(other);
800  }
801 
802 
803  //copy constructor:
804  matrix(const self_type & other) : base_type(other.size1(), other.size2(), viennacl::traits::context(other))
805  {
806  base_type::operator=(other);
807  }
808 
809 
810  /*template <typename M1>
811  self_type & operator=(const matrix_expression< const M1, const M1, op_trans> & proxy)
812  {
813  self_type temp(proxy.lhs());
814  *this = trans(temp);
815  return *this;
816  }*/
817 
818  using base_type::operator=;
819 
827  void resize(size_type rows, size_type columns, bool preserve = true)
828  {
829  base_type::resize(rows, columns, preserve);
830  }
831 
832  }; //matrix
833 
834 
835 
841  template<class SCALARTYPE, typename F>
842  std::ostream & operator<<(std::ostream & s, const matrix_base<SCALARTYPE, F> & gpu_matrix)
843  {
844  typedef typename matrix_base<SCALARTYPE, F>::size_type size_type;
845 
846  std::vector<SCALARTYPE> tmp(gpu_matrix.internal_size());
847  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE) * gpu_matrix.internal_size(), &(tmp[0]));
848 
849  s << "[" << gpu_matrix.size1() << "," << gpu_matrix.size2() << "]";
850 
851  s << "(";
852  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
853  {
854  s << "(";
855  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
856  {
857  s << tmp[F::mem_index(i * gpu_matrix.stride1() + gpu_matrix.start1(), j * gpu_matrix.stride2() + gpu_matrix.start2(), gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
858  if (j < gpu_matrix.size2() - 1)
859  s << ",";
860  }
861  s << ")";
862  if (i < gpu_matrix.size1() - 1)
863  s << ",";
864  }
865  s << ")";
866  return s;
867  }
868 
874  template<typename LHS, typename RHS, typename OP>
875  std::ostream & operator<<(std::ostream & s, const matrix_expression<LHS, RHS, OP> & expr)
876  {
878 
879  matrix<ScalarType> temp = expr;
880  s << temp;
881  return s;
882  }
883 
885  template<typename NumericT, typename F>
886  matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_trans>
887  trans(const matrix_base<NumericT, F> & mat)
888  {
889  return matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_trans>(mat, mat);
890  }
891 
892  //diag():
893  template<typename NumericT, typename F>
894  vector_expression< const matrix_base<NumericT, F>, const int, op_matrix_diag>
895  diag(const matrix_base<NumericT, F> & A, int k = 0)
896  {
898  }
899 
900  template<typename NumericT>
901  matrix_expression< const vector_base<NumericT>, const int, op_vector_diag>
902  diag(const vector_base<NumericT> & v, int k = 0)
903  {
905  }
906 
907  // row():
908  template<typename NumericT, typename F>
909  vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>
910  row(const matrix_base<NumericT, F> & A, unsigned int i)
911  {
912  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_row>(A, i);
913  }
914 
915  // column():
916  template<typename NumericT, typename F>
917  vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>
918  column(const matrix_base<NumericT, F> & A, unsigned int j)
919  {
920  return vector_expression< const matrix_base<NumericT, F>, const unsigned int, op_column>(A, j);
921  }
922 
924 
925  //
926  //cpu to gpu, generic type:
927  //
933  template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
934  void copy(const CPU_MATRIX & cpu_matrix,
935  matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
936  {
937  typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
938 
939  //std::cout << "Copying CPU_MATRIX!" << std::endl;
940  //std::cout << "Size at begin: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
941  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
942  {
943  gpu_matrix.resize(cpu_matrix.size1(),
944  cpu_matrix.size2(), false);
945  }
946 
947  assert( (gpu_matrix.size1() == cpu_matrix.size1()) && (gpu_matrix.size2() == cpu_matrix.size2()) && bool("Matrix dimensions mismatch.") );
948 
949  std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
950  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
951  {
952  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
953  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
954  }
955 
956  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
957  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
958  //std::cout << "Size at end: " << gpu_matrix.size1() << ", " << gpu_matrix.size2() << std::endl;
959  }
960 
961  //
962  //cpu to gpu, STL type:
963  //
969  template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
970  void copy(const std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix,
971  matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix )
972  {
973  typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
974 
975  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
976  {
977  gpu_matrix.resize(cpu_matrix.size(),
978  cpu_matrix[0].size(),
979  false);
980  }
981 
982  assert( (gpu_matrix.size1() == cpu_matrix.size()) && bool("Matrix dimensions mismatch.") );
983 
984  std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
985  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
986  {
987  assert( (gpu_matrix.size2() == cpu_matrix[i].size()) && bool("Matrix dimensions mismatch.") );
988 
989  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
990  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
991  }
992 
993  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
994  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
995  }
996 
997 
998  //
999  //cpu to gpu, another STL type:
1000  //
1007  template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
1008  void fast_copy(SCALARTYPE * cpu_matrix_begin,
1009  SCALARTYPE * cpu_matrix_end,
1010  matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
1011  {
1012  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin), viennacl::traits::context(gpu_matrix), cpu_matrix_begin);
1013  /*gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE,
1014  sizeof(SCALARTYPE) * (cpu_matrix_end - cpu_matrix_begin),
1015  cpu_matrix_begin);*/
1016  }
1017 
1018 
1019  #ifdef VIENNACL_WITH_EIGEN
1020 
1025  template <typename F, unsigned int ALIGNMENT>
1026  void copy(const Eigen::MatrixXf & cpu_matrix,
1027  matrix<float, F, ALIGNMENT> & gpu_matrix)
1028  {
1029  typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
1030 
1031  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1032  {
1033  gpu_matrix.resize(cpu_matrix.rows(),
1034  cpu_matrix.cols(),
1035  false);
1036  }
1037  else
1038  {
1039  assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
1040  && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
1041  && bool("matrix size mismatch")
1042  );
1043  }
1044 
1045  std::vector<float> data(gpu_matrix.internal_size());
1046  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1047  {
1048  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1049  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
1050  }
1051 
1052  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(float) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
1053  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1054  }
1055 
1061  template <typename F, unsigned int ALIGNMENT>
1062  void copy(const Eigen::MatrixXd & cpu_matrix,
1063  matrix<double, F, ALIGNMENT> & gpu_matrix)
1064  {
1065  typedef typename matrix<double, F, ALIGNMENT>::size_type size_type;
1066 
1067  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1068  {
1069  gpu_matrix.resize(cpu_matrix.rows(),
1070  cpu_matrix.cols(),
1071  false);
1072  }
1073  else
1074  {
1075  assert( (gpu_matrix.size1() == static_cast<vcl_size_t>(cpu_matrix.rows()))
1076  && (gpu_matrix.size2() == static_cast<vcl_size_t>(cpu_matrix.cols()))
1077  && bool("matrix size mismatch")
1078  );
1079  }
1080 
1081  std::vector<double> data(gpu_matrix.internal_size());
1082  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1083  {
1084  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1085  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix(i,j);
1086  }
1087 
1088  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(double) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
1089  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1090  }
1091  #endif
1092 
1093  #ifdef VIENNACL_WITH_MTL4
1094 
1099  template <typename SCALARTYPE, typename T, typename F, unsigned int ALIGNMENT>
1100  void copy(const mtl::dense2D<SCALARTYPE, T>& cpu_matrix,
1101  matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix)
1102  {
1103  typedef typename matrix<SCALARTYPE, F, ALIGNMENT>::size_type size_type;
1104 
1105  if (gpu_matrix.size1() == 0 || gpu_matrix.size2() == 0)
1106  {
1107  gpu_matrix.resize(cpu_matrix.num_rows(),
1108  cpu_matrix.num_cols(),
1109  false);
1110  }
1111  else
1112  {
1113  assert( (gpu_matrix.size1() == cpu_matrix.num_rows())
1114  && (gpu_matrix.size2() == cpu_matrix.num_cols())
1115  && bool("matrix size mismatch")
1116  );
1117  }
1118 
1119  std::vector<SCALARTYPE> data(gpu_matrix.internal_size());
1120  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1121  {
1122  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1123  data[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())] = cpu_matrix[i][j];
1124  }
1125 
1126  viennacl::backend::memory_create(gpu_matrix.handle(), sizeof(SCALARTYPE) * data.size(), viennacl::traits::context(gpu_matrix), &(data[0]));
1127  //gpu_matrix.elements_ = viennacl::ocl::current_context().create_memory(CL_MEM_READ_WRITE, data);
1128  }
1129  #endif
1130 
1131 
1132 
1133 
1134  //
1135  //gpu to cpu, generic type
1136  //
1142  template <typename CPU_MATRIX, typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
1143  void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
1144  CPU_MATRIX & cpu_matrix )
1145  {
1146  typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
1147 
1148  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1149  {
1150  assert( viennacl::traits::size1(cpu_matrix) == gpu_matrix.size1() && bool("Matrix dimensions mismatch: rows"));
1151 
1152  std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
1153  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1154 
1155  //now copy entries to cpu_matrix:
1156  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1157  {
1158  assert( viennacl::traits::size2(cpu_matrix) == gpu_matrix.size2() && bool("Matrix dimensions mismatch: columns"));
1159  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1160  cpu_matrix(i,j) = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1161  }
1162  }
1163  }
1164 
1165  //gpu to cpu, STL type
1171  template <typename SCALARTYPE, typename A1, typename A2, typename F, unsigned int ALIGNMENT>
1172  void copy(const matrix<SCALARTYPE, F, ALIGNMENT> & gpu_matrix,
1173  std::vector< std::vector<SCALARTYPE, A1>, A2> & cpu_matrix)
1174  {
1175  typedef typename matrix<float, F, ALIGNMENT>::size_type size_type;
1176 
1177  if ( (gpu_matrix.size1() > 0) && (gpu_matrix.size2() > 0) )
1178  {
1179  assert( (cpu_matrix.size() == gpu_matrix.size1()) && bool("Matrix dimensions mismatch: rows"));
1180 
1181  std::vector<SCALARTYPE> temp_buffer(gpu_matrix.internal_size());
1182  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), &(temp_buffer[0]));
1183 
1184  //now copy entries to cpu_matrix:
1185  for (size_type i = 0; i < gpu_matrix.size1(); ++i)
1186  {
1187  assert( (cpu_matrix[i].size() == gpu_matrix.size2()) && bool("Matrix dimensions mismatch: columns"));
1188 
1189  for (size_type j = 0; j < gpu_matrix.size2(); ++j)
1190  cpu_matrix[i][j] = temp_buffer[F::mem_index(i, j, gpu_matrix.internal_size1(), gpu_matrix.internal_size2())];
1191  }
1192  }
1193  }
1194 
1195  //gpu to cpu, STL type
1201  template <typename SCALARTYPE, typename F, unsigned int ALIGNMENT>
1203  SCALARTYPE * cpu_matrix_begin)
1204  {
1205  viennacl::backend::memory_read(gpu_matrix.handle(), 0, sizeof(SCALARTYPE)*gpu_matrix.internal_size(), cpu_matrix_begin);
1206  }
1207 
1208 
1209 
1211 
1212 
1213  // operator +
1215  template <typename LHS1, typename RHS1, typename OP1,
1216  typename LHS2, typename RHS2, typename OP2>
1217  matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1218  const matrix_expression<const LHS2, const RHS2, OP2>,
1219  op_add>
1221  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1222  {
1223  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1224  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1225  && bool("Incompatible matrix sizes!"));
1226  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1227  const matrix_expression<const LHS2, const RHS2, OP2>,
1228  op_add>(proxy1, proxy2);
1229  }
1230 
1231  template <typename LHS1, typename RHS1, typename OP1,
1232  typename NumericT, typename F>
1233  matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1234  const matrix_base<NumericT, F>,
1235  op_add>
1237  matrix_base<NumericT, F> const & proxy2)
1238  {
1239  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1240  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1241  && bool("Incompatible matrix sizes!"));
1242  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1243  const matrix_base<NumericT, F>,
1244  op_add>(proxy1, proxy2);
1245  }
1246 
1247  template <typename NumericT, typename F,
1248  typename LHS2, typename RHS2, typename OP2>
1249  matrix_expression< const matrix_base<NumericT, F>,
1250  const matrix_expression<const LHS2, const RHS2, OP2>,
1251  op_add>
1252  operator + (matrix_base<NumericT, F> const & proxy1,
1253  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1254  {
1255  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1256  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1257  && bool("Incompatible matrix sizes!"));
1258  return matrix_expression< const matrix_base<NumericT, F>,
1259  const matrix_expression<const LHS2, const RHS2, OP2>,
1260  op_add>(proxy1, proxy2);
1261  }
1262 
1264  template <typename NumericT, typename F>
1265  matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_add >
1266  operator + (const matrix_base<NumericT, F> & m1, const matrix_base<NumericT, F> & m2)
1267  {
1268  return matrix_expression< const matrix_base<NumericT, F>,
1269  const matrix_base<NumericT, F>,
1270  op_add > (m1, m2);
1271  }
1272 
1273 
1274  // operator -
1275  template <typename LHS1, typename RHS1, typename OP1,
1276  typename LHS2, typename RHS2, typename OP2>
1277  matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1278  const matrix_expression<const LHS2, const RHS2, OP2>,
1279  op_sub>
1281  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1282  {
1283  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1284  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1285  && bool("Incompatible matrix sizes!"));
1286  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1287  const matrix_expression<const LHS2, const RHS2, OP2>,
1288  op_sub>(proxy1, proxy2);
1289  }
1290 
1291  template <typename LHS1, typename RHS1, typename OP1,
1292  typename NumericT, typename F>
1293  matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1294  const matrix_base<NumericT, F>,
1295  op_sub>
1297  matrix_base<NumericT, F> const & proxy2)
1298  {
1299  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1300  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1301  && bool("Incompatible matrix sizes!"));
1302  return matrix_expression< const matrix_expression<const LHS1, const RHS1, OP1>,
1303  const matrix_base<NumericT, F>,
1304  op_sub>(proxy1, proxy2);
1305  }
1306 
1307  template <typename NumericT, typename F,
1308  typename LHS2, typename RHS2, typename OP2>
1309  matrix_expression< const matrix_base<NumericT, F>,
1310  const matrix_expression<const LHS2, const RHS2, OP2>,
1311  op_sub>
1312  operator - (matrix_base<NumericT, F> const & proxy1,
1313  matrix_expression<const LHS2, const RHS2, OP2> const & proxy2)
1314  {
1315  assert( (viennacl::traits::size1(proxy1) == viennacl::traits::size1(proxy2))
1316  && (viennacl::traits::size2(proxy1) == viennacl::traits::size2(proxy2))
1317  && bool("Incompatible matrix sizes!"));
1318  return matrix_expression< const matrix_base<NumericT, F>,
1319  const matrix_expression<const LHS2, const RHS2, OP2>,
1320  op_sub>(proxy1, proxy2);
1321  }
1322 
1324  template <typename NumericT, typename F>
1325  matrix_expression< const matrix_base<NumericT, F>, const matrix_base<NumericT, F>, op_sub >
1326  operator - (const matrix_base<NumericT, F> & m1, const matrix_base<NumericT, F> & m2)
1327  {
1328  return matrix_expression< const matrix_base<NumericT, F>,
1329  const matrix_base<NumericT, F>,
1330  op_sub > (m1, m2);
1331  }
1332 
1333 
1334 
1335  // operator *
1341  template <typename S1, typename NumericT, typename F>
1343  matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult>
1344  >::type
1345  operator * (S1 const & value, matrix_base<NumericT, F> const & m1)
1346  {
1347  return matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult>(m1, value);
1348  }
1349 
1350 
1356  template <typename LHS, typename RHS, typename OP, typename S1>
1358  matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1360  S1 const & val)
1361  {
1362  return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);
1363  }
1364 
1365 
1371  template <typename S1, typename LHS, typename RHS, typename OP>
1373  matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult> >::type
1374  operator * (S1 const & val,
1375  matrix_expression< LHS, RHS, OP> const & proxy)
1376  {
1377  return matrix_expression< const matrix_expression< LHS, RHS, OP>, const S1, op_mult>(proxy, val);
1378  }
1379 
1382  template <typename NumericT, typename F, typename S1>
1384  matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult> >::type
1385  operator * (matrix_base<NumericT, F> const & m1, S1 const & s1)
1386  {
1387  return matrix_expression< const matrix_base<NumericT, F>, const S1, op_mult>(m1, s1);
1388  }
1389 
1390 
1391  // operator *=
1392 
1395  template <typename NumericT, typename F, typename S1>
1397  matrix_base<NumericT, F> &
1398  >::type
1399  operator *= (matrix_base<NumericT, F> & m1, S1 const & gpu_val)
1400  {
1401  //viennacl::linalg::inplace_mult(*this, gpu_val);
1403  m1, gpu_val, 1, false, (viennacl::is_flip_sign_scalar<S1>::value ? true : false));
1404  return m1;
1405  }
1406 
1407 
1408  // operator /
1409 
1410 
1416  template <typename LHS, typename RHS, typename OP, typename S1>
1418  matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div> >::type
1420  S1 const & val)
1421  {
1422  return matrix_expression< const matrix_expression<const LHS, const RHS, OP>, const S1, op_div>(proxy, val);
1423  }
1424 
1425 
1428  template <typename NumericT, typename F, typename S1>
1430  matrix_expression< const matrix_base<NumericT, F>, const S1, op_div> >::type
1431  operator / (matrix_base<NumericT, F> const & m1, S1 const & s1)
1432  {
1433  return matrix_expression< const matrix_base<NumericT, F>, const S1, op_div>(m1, s1);
1434  }
1435 
1436 
1437  // operator /=
1438 
1441  template <typename NumericT, typename F, typename S1>
1443  matrix_base<NumericT, F> &
1444  >::type
1445  operator /= (matrix_base<NumericT, F> & m1, S1 const & gpu_val)
1446  {
1447  //viennacl::linalg::inplace_divide(*this, gpu_val);
1449  m1, gpu_val, 1, true, (viennacl::is_flip_sign_scalar<S1>::value ? true : false));
1450  return m1;
1451  }
1452 
1453 
1454 
1455 
1456 
1457  // outer_prod(v1, v2) * val;
1458  template <typename NumericT, typename S1>
1461  const S1,
1462  op_mult>
1463  >::type
1464  operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,
1465  const S1 & val)
1466  {
1467  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1468  const S1,
1469  op_mult>(proxy, val);
1470  }
1471 
1472  template <typename NumericT, typename S1>
1474  viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1475  const NumericT,
1476  op_mult>
1477  >::type
1478  operator*(const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy,
1479  const S1 & val)
1480  {
1481  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1482  const NumericT,
1483  op_mult>(proxy, NumericT(val));
1484  }
1485 
1486  // val * outer_prod(v1, v2);
1487  template <typename NumericT, typename S1>
1489  viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1490  const S1,
1491  op_mult>
1492  >::type
1493  operator*(const S1 & val,
1494  const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
1495  {
1496  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1497  const S1,
1498  op_mult>(proxy, val);
1499  }
1500 
1501  template<typename NumericT, typename S1>
1503  viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1504  const NumericT,
1505  op_mult>
1506  >::type
1507  operator*(const S1 & val,
1508  const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod> & proxy)
1509  {
1510  return viennacl::matrix_expression< const viennacl::matrix_expression< const vector_base<NumericT>, const vector_base<NumericT>, op_prod>,
1511  const NumericT,
1512  op_mult>(proxy, NumericT(val));
1513  }
1514 
1515 
1516 
1517  //
1518  // Specify available operations:
1519  //
1520 
1523  namespace linalg
1524  {
1525  namespace detail
1526  {
1527 
1528  // x = y
1529  template <typename T, typename F>
1530  struct op_executor<matrix_base<T, F>, op_assign, matrix_base<T, F> >
1531  {
1532  static void apply(matrix_base<T, F> & lhs, matrix_base<T, F> const & rhs)
1533  {
1534  viennacl::linalg::am(lhs, rhs, T(1), 1, false, false);
1535  }
1536  };
1537 
1538  // x += y
1539  template <typename T, typename F>
1540  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_base<T, F> >
1541  {
1542  static void apply(matrix_base<T, F> & lhs, matrix_base<T, F> const & rhs)
1543  {
1544  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, false);
1545  }
1546  };
1547 
1548  // x -= y
1549  template <typename T, typename F>
1550  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_base<T, F> >
1551  {
1552  static void apply(matrix_base<T, F> & lhs, matrix_base<T, F> const & rhs)
1553  {
1554  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, rhs, T(1), 1, false, true);
1555  }
1556  };
1557 
1559 
1560 
1561  // x = alpha * y
1562  template <typename T, typename F, typename ScalarType>
1563  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> >
1564  {
1565  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> const & proxy)
1566  {
1567  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, false, false);
1568  }
1569  };
1570 
1571  // x += alpha * y
1572  template <typename T, typename F, typename ScalarType>
1573  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> >
1574  {
1575  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> const & proxy)
1576  {
1577  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, false);
1578  }
1579  };
1580 
1581  // x -= alpha * y
1582  template <typename T, typename F, typename ScalarType>
1583  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> >
1584  {
1585  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult> const & proxy)
1586  {
1587  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, false, true);
1588  }
1589  };
1590 
1591 
1593 
1594  // x = alpha * vec_expr
1595  template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
1596  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1597  {
1598  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1599  {
1600  matrix<T, F> temp(proxy.lhs());
1601  lhs = temp * proxy.rhs();
1602  }
1603  };
1604 
1605  // x += alpha * vec_expr
1606  template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
1607  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1608  {
1609  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1610  {
1611  matrix<T, F> temp(proxy.lhs());
1612  lhs += temp * proxy.rhs();
1613  }
1614  };
1615 
1616  // x -= alpha * vec_expr
1617  template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
1618  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> >
1619  {
1620  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_mult> const & proxy)
1621  {
1622  matrix<T, F> temp(proxy.lhs());
1623  lhs -= temp * proxy.rhs();
1624  }
1625  };
1626 
1627 
1629 
1630  // x = y / alpha
1631  template <typename T, typename F, typename ScalarType>
1632  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> >
1633  {
1634  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> const & proxy)
1635  {
1636  viennacl::linalg::am(lhs, proxy.lhs(), proxy.rhs(), 1, true, false);
1637  }
1638  };
1639 
1640  // x += y / alpha
1641  template <typename T, typename F, typename ScalarType>
1642  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> >
1643  {
1644  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> const & proxy)
1645  {
1646  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, false);
1647  }
1648  };
1649 
1650  // x -= y / alpha
1651  template <typename T, typename F, typename ScalarType>
1652  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> >
1653  {
1654  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const ScalarType, op_div> const & proxy)
1655  {
1656  viennacl::linalg::ambm(lhs, lhs, T(1), 1, false, false, proxy.lhs(), proxy.rhs(), 1, true, true);
1657  }
1658  };
1659 
1660 
1662 
1663  // x = vec_expr / alpha
1664  template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
1665  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
1666  {
1667  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
1668  {
1669  matrix<T, F> temp(proxy.lhs());
1670  lhs = temp / proxy.rhs();
1671  }
1672  };
1673 
1674  // x += vec_expr / alpha
1675  template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
1676  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
1677  {
1678  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
1679  {
1680  matrix<T, F> temp(proxy.lhs());
1681  lhs += temp / proxy.rhs();
1682  }
1683  };
1684 
1685  // x -= vec_expr / alpha
1686  template <typename T, typename F, typename LHS, typename RHS, typename OP, typename ScalarType>
1687  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> >
1688  {
1689  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS, const RHS, OP>, const ScalarType, op_div> const & proxy)
1690  {
1691  matrix<T, F> temp(proxy.lhs());
1692  lhs -= temp / proxy.rhs();
1693  }
1694  };
1695 
1696 
1697 
1698  // generic x = vec_expr1 + vec_expr2:
1699  template <typename T, typename F, typename LHS, typename RHS>
1700  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_add> >
1701  {
1702  // generic x = vec_expr1 + vec_expr2:
1703  template <typename LHS1, typename RHS1>
1704  static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
1705  {
1706  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
1707  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
1708 
1709  if (op_aliasing_lhs || op_aliasing_rhs)
1710  {
1711  matrix_base<T, F> temp(proxy.lhs());
1712  op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
1713  lhs = temp;
1714  }
1715  else
1716  {
1717  op_executor<matrix_base<T, F>, op_assign, LHS>::apply(lhs, proxy.lhs());
1718  op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
1719  }
1720  }
1721 
1722  // x = y + z
1723  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_add> const & proxy)
1724  {
1726  proxy.lhs(), T(1), 1, false, false,
1727  proxy.rhs(), T(1), 1, false, false);
1728  }
1729 
1730  // x = alpha * y + z
1731  template <typename ScalarType>
1732  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
1733  const matrix_base<T, F>,
1734  op_add> const & proxy)
1735  {
1737  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
1738  proxy.rhs(), T(1), 1, false, false);
1739  }
1740 
1741  // x = y / alpha + z
1742  template <typename ScalarType>
1743  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
1744  const matrix_base<T, F>,
1745  op_add> const & proxy)
1746  {
1748  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
1749  proxy.rhs(), T(1), 1, false, false);
1750  }
1751 
1752  // x = y + beta * z
1753  template <typename ScalarType>
1754  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
1755  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
1756  op_add> const & proxy)
1757  {
1759  proxy.lhs(), T(1), 1, false, false,
1760  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
1761  }
1762 
1763  // x = y + z / beta
1764  template <typename ScalarType>
1765  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
1766  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
1767  op_add> const & proxy)
1768  {
1770  proxy.lhs(), T(1), 1, false, false,
1771  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
1772  }
1773 
1774  // x = alpha * y + beta * z
1775  template <typename ScalarType1, typename ScalarType2>
1776  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
1777  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
1778  op_add> const & proxy)
1779  {
1781  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
1782  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
1783  }
1784 
1785  // x = alpha * y + z / beta
1786  template <typename ScalarType1, typename ScalarType2>
1787  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
1788  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
1789  op_add> const & proxy)
1790  {
1792  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
1793  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
1794  }
1795 
1796  // x = y / alpha + beta * z
1797  template <typename ScalarType1, typename ScalarType2>
1798  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
1799  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
1800  op_add> const & proxy)
1801  {
1803  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
1804  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
1805  }
1806 
1807  // x = y / alpha + z / beta
1808  template <typename ScalarType1, typename ScalarType2>
1809  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
1810  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
1811  op_add> const & proxy)
1812  {
1814  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
1815  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
1816  }
1817  };
1818 
1819  // dense = sparse * dense
1820  template <typename T, typename F1, typename LHS, typename RHS>
1821  struct op_executor<matrix_base<T, F1>, op_assign, matrix_expression<const LHS, const RHS, op_prod> >
1822  {
1823  template < typename SparseMatrixType, typename F2 >
1824  static void apply(matrix_base<T, F1> & lhs, matrix_expression<const SparseMatrixType,
1826  viennacl::op_prod> const & proxy)
1827  {
1828  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
1829  }
1830 
1831  // dense = sparse * trans(dense)
1832  template < typename SparseMatrixType, typename F2 >
1833  static void apply(matrix_base<T, F1> & lhs, matrix_expression<const SparseMatrixType,
1837  viennacl::op_prod> const & proxy)
1838  {
1839  viennacl::linalg::prod_impl(proxy.lhs(), proxy.rhs(), lhs);
1840  }
1841 
1842  };
1843 
1844  // generic x += vec_expr1 + vec_expr2:
1845  template <typename T, typename F, typename LHS, typename RHS>
1846  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_add> >
1847  {
1848  // generic x += vec_expr1 + vec_expr2:
1849  template <typename LHS1, typename RHS1>
1850  static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
1851  {
1852  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
1853  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
1854 
1855  if (op_aliasing_lhs || op_aliasing_rhs)
1856  {
1857  matrix_base<T, F> temp(proxy.lhs());
1858  op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
1859  lhs += temp;
1860  }
1861  else
1862  {
1863  op_executor<matrix_base<T, F>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
1864  op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
1865  }
1866  }
1867 
1868  // x += y + z
1869  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_add> const & proxy)
1870  {
1872  proxy.lhs(), T(1), 1, false, false,
1873  proxy.rhs(), T(1), 1, false, false);
1874  }
1875 
1876  // x += alpha * y + z
1877  template <typename ScalarType>
1878  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
1879  const matrix_base<T, F>,
1880  op_add> const & proxy)
1881  {
1883  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
1884  proxy.rhs(), T(1), 1, false, false);
1885  }
1886 
1887  // x += y / alpha + z
1888  template <typename ScalarType>
1889  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
1890  const matrix_base<T, F>,
1891  op_add> const & proxy)
1892  {
1894  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
1895  proxy.rhs(), T(1), 1, false, false);
1896  }
1897 
1898  // x += y + beta * z
1899  template <typename ScalarType>
1900  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
1901  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
1902  op_add> const & proxy)
1903  {
1905  proxy.lhs(), T(1), 1, false, false,
1906  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
1907  }
1908 
1909  // x += y + z / beta
1910  template <typename ScalarType>
1911  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
1912  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
1913  op_add> const & proxy)
1914  {
1916  proxy.lhs(), T(1), 1, false, false,
1917  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
1918  }
1919 
1920  // x += alpha * y + beta * z
1921  template <typename ScalarType1, typename ScalarType2>
1922  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
1923  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
1924  op_add> const & proxy)
1925  {
1927  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
1928  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
1929  }
1930 
1931  // x += alpha * y + z / beta
1932  template <typename ScalarType1, typename ScalarType2>
1933  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
1934  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
1935  op_add> const & proxy)
1936  {
1938  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
1939  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
1940  }
1941 
1942  // x += y / alpha + beta * z
1943  template <typename ScalarType1, typename ScalarType2>
1944  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
1945  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
1946  op_add> const & proxy)
1947  {
1949  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
1950  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
1951  }
1952 
1953  // x += y / alpha + z / beta
1954  template <typename ScalarType1, typename ScalarType2>
1955  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
1956  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
1957  op_add> const & proxy)
1958  {
1960  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
1961  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
1962  }
1963  };
1964 
1965 
1966 
1967  // generic x -= vec_expr1 + vec_expr2:
1968  template <typename T, typename F, typename LHS, typename RHS>
1969  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_add> >
1970  {
1971  // generic x -= vec_expr1 + vec_expr2:
1972  template <typename LHS1, typename RHS1>
1973  static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_add> const & proxy)
1974  {
1975  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
1976  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
1977 
1978  if (op_aliasing_lhs || op_aliasing_rhs)
1979  {
1980  matrix_base<T, F> temp(proxy.lhs());
1981  op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(temp, proxy.rhs());
1982  lhs -= temp;
1983  }
1984  else
1985  {
1986  op_executor<matrix_base<T, F>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
1987  op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
1988  }
1989  }
1990 
1991  // x -= y + z
1992  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_add> const & proxy)
1993  {
1995  proxy.lhs(), T(1), 1, false, true,
1996  proxy.rhs(), T(1), 1, false, true);
1997  }
1998 
1999  // x -= alpha * y + z
2000  template <typename ScalarType>
2001  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
2002  const matrix_base<T, F>,
2003  op_add> const & proxy)
2004  {
2006  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2007  proxy.rhs(), T(1), 1, false, true);
2008  }
2009 
2010  // x -= y / alpha + z
2011  template <typename ScalarType>
2012  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
2013  const matrix_base<T, F>,
2014  op_add> const & proxy)
2015  {
2017  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2018  proxy.rhs(), T(1), 1, false, true);
2019  }
2020 
2021  // x -= y + beta * z
2022  template <typename ScalarType>
2023  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
2024  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
2025  op_add> const & proxy)
2026  {
2028  proxy.lhs(), T(1), 1, false, true,
2029  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2030  }
2031 
2032  // x -= y + z / beta
2033  template <typename ScalarType>
2034  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
2035  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
2036  op_add> const & proxy)
2037  {
2039  proxy.lhs(), T(1), 1, false, true,
2040  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2041  }
2042 
2043  // x -= alpha * y + beta * z
2044  template <typename ScalarType1, typename ScalarType2>
2045  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
2046  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
2047  op_add> const & proxy)
2048  {
2050  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2051  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2052  }
2053 
2054  // x -= alpha * y + z / beta
2055  template <typename ScalarType1, typename ScalarType2>
2056  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
2057  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
2058  op_add> const & proxy)
2059  {
2061  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2062  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2063  }
2064 
2065  // x -= y / alpha + beta * z
2066  template <typename ScalarType1, typename ScalarType2>
2067  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
2068  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
2069  op_add> const & proxy)
2070  {
2072  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2073  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2074  }
2075 
2076  // x -= y / alpha + z / beta
2077  template <typename ScalarType1, typename ScalarType2>
2078  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
2079  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
2080  op_add> const & proxy)
2081  {
2083  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2084  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2085  }
2086  };
2087 
2088 
2089 
2091 
2092 
2093 
2094  // generic x = vec_expr1 - vec_expr2:
2095  template <typename T, typename F, typename LHS, typename RHS>
2096  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_sub> >
2097  {
2098  // generic x = vec_expr1 - vec_expr2:
2099  template <typename LHS1, typename RHS1>
2100  static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2101  {
2102  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2103  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2104 
2105  if (op_aliasing_lhs || op_aliasing_rhs)
2106  {
2107  matrix_base<T, F> temp(proxy.lhs());
2108  op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2109  lhs = temp;
2110  }
2111  else
2112  {
2113  op_executor<matrix_base<T, F>, op_assign, LHS>::apply(lhs, proxy.lhs());
2114  op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2115  }
2116  }
2117 
2118  // x = y - z
2119  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_sub> const & proxy)
2120  {
2122  proxy.lhs(), T(1), 1, false, false,
2123  proxy.rhs(), T(1), 1, false, true);
2124  }
2125 
2126  // x = alpha * y - z
2127  template <typename ScalarType>
2128  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
2129  const matrix_base<T, F>,
2130  op_sub> const & proxy)
2131  {
2133  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2134  proxy.rhs(), T(1), 1, false, true);
2135  }
2136 
2137  // x = y / alpha - z
2138  template <typename ScalarType>
2139  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
2140  const matrix_base<T, F>,
2141  op_sub> const & proxy)
2142  {
2144  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2145  proxy.rhs(), T(1), 1, false, true);
2146  }
2147 
2148  // x = y - beta * z
2149  template <typename ScalarType>
2150  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
2151  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
2152  op_sub> const & proxy)
2153  {
2155  proxy.lhs(), T(1), 1, false, false,
2156  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2157  }
2158 
2159  // x = y - z / beta
2160  template <typename ScalarType>
2161  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
2162  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
2163  op_sub> const & proxy)
2164  {
2166  proxy.lhs(), T(1), 1, false, false,
2167  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2168  }
2169 
2170  // x = alpha * y - beta * z
2171  template <typename ScalarType1, typename ScalarType2>
2172  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
2173  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
2174  op_sub> const & proxy)
2175  {
2177  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2178  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2179  }
2180 
2181  // x = alpha * y - z / beta
2182  template <typename ScalarType1, typename ScalarType2>
2183  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
2184  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
2185  op_sub> const & proxy)
2186  {
2188  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2189  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2190  }
2191 
2192  // x = y / alpha - beta * z
2193  template <typename ScalarType1, typename ScalarType2>
2194  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
2195  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
2196  op_sub> const & proxy)
2197  {
2199  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2200  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2201  }
2202 
2203  // x = y / alpha - z / beta
2204  template <typename ScalarType1, typename ScalarType2>
2205  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
2206  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
2207  op_sub> const & proxy)
2208  {
2210  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2211  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2212  }
2213  };
2214 
2215 
2216  // generic x += vec_expr1 - vec_expr2:
2217  template <typename T, typename F, typename LHS, typename RHS>
2218  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_sub> >
2219  {
2220  // generic x += vec_expr1 - vec_expr2:
2221  template <typename LHS1, typename RHS1>
2222  static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2223  {
2224  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2225  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2226 
2227  if (op_aliasing_lhs || op_aliasing_rhs)
2228  {
2229  matrix_base<T, F> temp(proxy.lhs());
2230  op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2231  lhs += temp;
2232  }
2233  else
2234  {
2235  op_executor<matrix_base<T, F>, op_inplace_add, LHS>::apply(lhs, proxy.lhs());
2236  op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(lhs, proxy.rhs());
2237  }
2238  }
2239 
2240  // x += y - z
2241  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_sub> const & proxy)
2242  {
2244  proxy.lhs(), T(1), 1, false, false,
2245  proxy.rhs(), T(1), 1, false, true);
2246  }
2247 
2248  // x += alpha * y - z
2249  template <typename ScalarType>
2250  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
2251  const matrix_base<T, F>,
2252  op_sub> const & proxy)
2253  {
2255  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2256  proxy.rhs(), T(1), 1, false, true);
2257  }
2258 
2259  // x += y / alpha - z
2260  template <typename ScalarType>
2261  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
2262  const matrix_base<T, F>,
2263  op_sub> const & proxy)
2264  {
2266  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2267  proxy.rhs(), T(1), 1, false, true);
2268  }
2269 
2270  // x += y - beta * z
2271  template <typename ScalarType>
2272  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
2273  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
2274  op_sub> const & proxy)
2275  {
2277  proxy.lhs(), T(1), 1, false, false,
2278  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2279  }
2280 
2281  // x += y - z / beta
2282  template <typename ScalarType>
2283  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
2284  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
2285  op_sub> const & proxy)
2286  {
2288  proxy.lhs(), T(1), 1, false, false,
2289  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2290  }
2291 
2292  // x += alpha * y - beta * z
2293  template <typename ScalarType1, typename ScalarType2>
2294  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
2295  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
2296  op_sub> const & proxy)
2297  {
2299  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2300  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2301  }
2302 
2303  // x += alpha * y - z / beta
2304  template <typename ScalarType1, typename ScalarType2>
2305  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
2306  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
2307  op_sub> const & proxy)
2308  {
2310  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, false,
2311  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2312  }
2313 
2314  // x += y / alpha - beta * z
2315  template <typename ScalarType1, typename ScalarType2>
2316  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
2317  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
2318  op_sub> const & proxy)
2319  {
2321  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2322  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, true);
2323  }
2324 
2325  // x += y / alpha - z / beta
2326  template <typename ScalarType1, typename ScalarType2>
2327  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
2328  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
2329  op_sub> const & proxy)
2330  {
2332  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, false,
2333  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, true);
2334  }
2335  };
2336 
2337 
2338 
2339  // generic x -= vec_expr1 - vec_expr2:
2340  template <typename T, typename F, typename LHS, typename RHS>
2341  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_sub> >
2342  {
2343  // generic x -= vec_expr1 - vec_expr2:
2344  template <typename LHS1, typename RHS1>
2345  static void apply(matrix_base<T, F> & lhs, matrix_expression<const LHS1, const RHS1, op_sub> const & proxy)
2346  {
2347  bool op_aliasing_lhs = op_aliasing(lhs, proxy.lhs());
2348  bool op_aliasing_rhs = op_aliasing(lhs, proxy.rhs());
2349 
2350  if (op_aliasing_lhs || op_aliasing_rhs)
2351  {
2352  matrix_base<T, F> temp(proxy.lhs());
2353  op_executor<matrix_base<T, F>, op_inplace_sub, RHS>::apply(temp, proxy.rhs());
2354  lhs -= temp;
2355  }
2356  else
2357  {
2358  op_executor<matrix_base<T, F>, op_inplace_sub, LHS>::apply(lhs, proxy.lhs());
2359  op_executor<matrix_base<T, F>, op_inplace_add, RHS>::apply(lhs, proxy.rhs());
2360  }
2361  }
2362 
2363  // x -= y - z
2364  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_sub> const & proxy)
2365  {
2367  proxy.lhs(), T(1), 1, false, true,
2368  proxy.rhs(), T(1), 1, false, false);
2369  }
2370 
2371  // x -= alpha * y - z
2372  template <typename ScalarType>
2373  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
2374  const matrix_base<T, F>,
2375  op_sub> const & proxy)
2376  {
2378  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2379  proxy.rhs(), T(1), 1, false, false);
2380  }
2381 
2382  // x -= y / alpha - z
2383  template <typename ScalarType>
2384  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
2385  const matrix_base<T, F>,
2386  op_sub> const & proxy)
2387  {
2389  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2390  proxy.rhs(), T(1), 1, false, false);
2391  }
2392 
2393  // x -= y - beta * z
2394  template <typename ScalarType>
2395  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
2396  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_mult>,
2397  op_sub> const & proxy)
2398  {
2400  proxy.lhs(), T(1), 1, false, true,
2401  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2402  }
2403 
2404  // x -= y - z / beta
2405  template <typename ScalarType>
2406  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>,
2407  const matrix_expression<const matrix_base<T, F>, const ScalarType, op_div>,
2408  op_sub> const & proxy)
2409  {
2411  proxy.lhs(), T(1), 1, false, true,
2412  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2413  }
2414 
2415  // x -= alpha * y - beta * z
2416  template <typename ScalarType1, typename ScalarType2>
2417  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
2418  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
2419  op_sub> const & proxy)
2420  {
2422  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2423  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2424  }
2425 
2426  // x -= alpha * y - z / beta
2427  template <typename ScalarType1, typename ScalarType2>
2428  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_mult>,
2429  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
2430  op_sub> const & proxy)
2431  {
2433  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, false, true,
2434  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2435  }
2436 
2437  // x -= y / alpha - beta * z
2438  template <typename ScalarType1, typename ScalarType2>
2439  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
2440  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_mult>,
2441  op_sub> const & proxy)
2442  {
2444  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2445  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, false, false);
2446  }
2447 
2448  // x -= y / alpha - z / beta
2449  template <typename ScalarType1, typename ScalarType2>
2450  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F>, const ScalarType1, op_div>,
2451  const matrix_expression<const matrix_base<T, F>, const ScalarType2, op_div>,
2452  op_sub> const & proxy)
2453  {
2455  proxy.lhs().lhs(), proxy.lhs().rhs(), 1, true, true,
2456  proxy.rhs().lhs(), proxy.rhs().rhs(), 1, true, false);
2457  }
2458  };
2459 
2460 
2462 
2463  template <typename T, typename F, typename LHS>
2464  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const int, op_vector_diag> >
2465  {
2466  static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const int, op_vector_diag> const & proxy)
2467  {
2468  viennacl::linalg::matrix_diag_from_vector(proxy.lhs(), proxy.rhs(), lhs);
2469  }
2470  };
2471 
2472 
2473  template <typename T, typename LHS>
2474  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const int, op_matrix_diag> >
2475  {
2476  template <typename F>
2477  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const int, op_matrix_diag> const & proxy)
2478  {
2479  viennacl::linalg::matrix_diag_to_vector(proxy.lhs(), proxy.rhs(), lhs);
2480  }
2481  };
2482 
2483  template <typename T, typename LHS>
2484  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_row> >
2485  {
2486  template <typename F>
2487  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const unsigned int, op_row> const & proxy)
2488  {
2489  viennacl::linalg::matrix_row(proxy.lhs(), proxy.rhs(), lhs);
2490  }
2491  };
2492 
2493 
2494  template <typename T, typename LHS>
2495  struct op_executor<vector_base<T>, op_assign, vector_expression<const LHS, const unsigned int, op_column> >
2496  {
2497  template <typename F>
2498  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const unsigned int, op_column> const & proxy)
2499  {
2500  viennacl::linalg::matrix_column(proxy.lhs(), proxy.rhs(), lhs);
2501  }
2502  };
2503 
2504 
2506 
2507  // generic x = mat_expr1 .* mat_expr2:
2508  template <typename T, typename F, typename LHS, typename RHS, typename OP>
2509  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2510  {
2511  // x = y .* z
2512  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
2513  {
2514  viennacl::linalg::element_op(lhs, proxy);
2515  }
2516 
2517  // x = y .* mat_expr
2518  template <typename LHS2, typename RHS2, typename OP2>
2519  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2520  {
2521  matrix<T, F> temp(proxy.rhs());
2522  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(proxy.lhs(), temp));
2523  }
2524 
2525  // x = mat_expr .* z
2526  template <typename LHS1, typename RHS1, typename OP1>
2527  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
2528  {
2529  matrix<T, F> temp(proxy.lhs());
2530  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp, proxy.rhs()));
2531  }
2532 
2533  // x = mat_expr .* mat_expr
2534  template <typename LHS1, typename RHS1, typename OP1,
2535  typename LHS2, typename RHS2, typename OP2>
2536  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2537  const matrix_expression<const LHS2, const RHS2, OP2>,
2538  op_element_binary<OP> > const & proxy)
2539  {
2540  matrix<T, F> temp1(proxy.lhs());
2541  matrix<T, F> temp2(proxy.rhs());
2542  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp1, temp2));
2543  }
2544  };
2545 
2546  // generic x += mat_expr .* mat_expr:
2547  template <typename T, typename F, typename LHS, typename RHS, typename OP>
2548  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2549  {
2550  // x += y .* z
2551  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
2552  {
2553  viennacl::matrix<T, F> temp(proxy);
2554  lhs += temp;
2555  }
2556 
2557  // x += y .* mat_expr
2558  template <typename LHS2, typename RHS2, typename OP2>
2559  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2560  {
2561  matrix<T, F> temp(proxy.rhs());
2562  matrix<T, F> temp2(temp.size1(), temp.size2());
2563  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(proxy.lhs(), temp));
2564  lhs += temp2;
2565  }
2566 
2567  // x += mat_expr .* z
2568  template <typename LHS1, typename RHS1, typename OP1>
2569  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
2570  {
2571  matrix<T, F> temp(proxy.lhs());
2572  matrix<T, F> temp2(temp.size1(), temp.size2());
2573  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp, proxy.rhs()));
2574  lhs += temp2;
2575  }
2576 
2577  // x += mat_expr .* mat_expr
2578  template <typename LHS1, typename RHS1, typename OP1,
2579  typename LHS2, typename RHS2, typename OP2>
2580  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2581  const matrix_expression<const LHS2, const RHS2, OP2>,
2582  op_element_binary<OP> > const & proxy)
2583  {
2584  matrix<T, F> temp1(proxy.lhs());
2585  matrix<T, F> temp2(proxy.rhs());
2586  matrix<T, F> temp3(temp1.size1(), temp1.size2());
2587  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp1, temp2));
2588  lhs += temp3;
2589  }
2590  };
2591 
2592  // generic x -= mat_expr1 .* mat_expr2:
2593  template <typename T, typename F, typename LHS, typename RHS, typename OP>
2594  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_binary<OP> > >
2595  {
2596 
2597  // x -= y .* z
2598  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
2599  {
2600  viennacl::matrix<T, F> temp(proxy);
2601  lhs -= temp;
2602  }
2603 
2604  // x -= y .* mat_expr
2605  template <typename LHS2, typename RHS2, typename OP2>
2606  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_expression<const LHS2, const RHS2, OP2>, op_element_binary<OP> > const & proxy)
2607  {
2608  matrix<T, F> temp(proxy.rhs());
2609  matrix<T, F> temp2(temp.size1(), temp.size2());
2610  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(proxy.lhs(), temp));
2611  lhs -= temp2;
2612  }
2613 
2614  // x -= mat_expr .* z
2615  template <typename LHS1, typename RHS1, typename OP1>
2616  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>, const matrix_base<T, F>, op_element_binary<OP> > const & proxy)
2617  {
2618  matrix<T, F> temp(proxy.lhs());
2619  matrix<T, F> temp2(temp.size1(), temp.size2());
2620  viennacl::linalg::element_op(temp2, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp, proxy.rhs()));
2621  lhs -= temp2;
2622  }
2623 
2624  // x -= mat_expr .* mat_expr
2625  template <typename LHS1, typename RHS1, typename OP1,
2626  typename LHS2, typename RHS2, typename OP2>
2627  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS1, const RHS1, OP1>,
2628  const matrix_expression<const LHS2, const RHS2, OP2>,
2629  op_element_binary<OP> > const & proxy)
2630  {
2631  matrix<T, F> temp1(proxy.lhs());
2632  matrix<T, F> temp2(proxy.rhs());
2633  matrix<T, F> temp3(temp1.size1(), temp1.size2());
2634  viennacl::linalg::element_op(temp3, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_binary<OP> >(temp1, temp2));
2635  lhs -= temp3;
2636  }
2637  };
2638 
2640 
2641  template <typename T, typename F, typename LHS, typename RHS, typename OP>
2642  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
2643  {
2644  // x = OP(y)
2645  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> > const & proxy)
2646  {
2647  viennacl::linalg::element_op(lhs, proxy);
2648  }
2649 
2650  // x = OP(vec_expr)
2651  template <typename LHS2, typename RHS2, typename OP2>
2652  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
2653  const matrix_expression<const LHS2, const RHS2, OP2>,
2654  op_element_unary<OP> > const & proxy)
2655  {
2656  matrix<T, F> temp(proxy.rhs());
2657  viennacl::linalg::element_op(lhs, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> >(temp, temp));
2658  }
2659  };
2660 
2661  template <typename T, typename F, typename LHS, typename RHS, typename OP>
2662  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
2663  {
2664  // x += OP(y)
2665  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> > const & proxy)
2666  {
2667  matrix<T, F> temp(proxy);
2668  lhs += temp;
2669  }
2670 
2671  // x += OP(vec_expr)
2672  template <typename LHS2, typename RHS2, typename OP2>
2673  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
2674  const matrix_expression<const LHS2, const RHS2, OP2>,
2675  op_element_unary<OP> > const & proxy)
2676  {
2677  matrix<T, F> temp(proxy.rhs());
2678  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
2679  lhs += temp;
2680  }
2681  };
2682 
2683  template <typename T, typename F, typename LHS, typename RHS, typename OP>
2684  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const LHS, const RHS, op_element_unary<OP> > >
2685  {
2686  // x -= OP(y)
2687  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> > const & proxy)
2688  {
2689  matrix<T, F> temp(proxy);
2690  lhs -= temp;
2691  }
2692 
2693  // x -= OP(vec_expr)
2694  template <typename LHS2, typename RHS2, typename OP2>
2695  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const LHS2, const RHS2, OP2>,
2696  const matrix_expression<const LHS2, const RHS2, OP2>,
2697  op_element_unary<OP> > const & proxy)
2698  {
2699  matrix<T, F> temp(proxy.rhs());
2700  viennacl::linalg::element_op(temp, viennacl::matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_element_unary<OP> >(temp, temp)); // inplace operation is safe here
2701  lhs -= temp;
2702  }
2703  };
2704 
2705 
2706 
2708 
2709  // C = A * B
2710  template <typename T, typename F, typename F1, typename F2>
2711  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> >
2712  {
2713  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> const & rhs)
2714  {
2715  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
2716  }
2717  };
2718 
2719  // C = A * B^T
2720  template <typename T, typename F, typename F1, typename F2>
2721  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_base<T, F1>,
2722  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2723  op_mat_mat_prod> >
2724  {
2725  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>,
2726  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2727  op_mat_mat_prod> const & rhs)
2728  {
2729  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
2730  }
2731  };
2732 
2733  // C = A^T * B
2734  template <typename T, typename F, typename F1, typename F2>
2735  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2736  const matrix_base<T, F2>,
2737  op_mat_mat_prod> >
2738  {
2739  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2740  const matrix_base<T, F2>,
2741  op_mat_mat_prod> const & rhs)
2742  {
2743  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
2744  }
2745  };
2746 
2747  // C = A^T * B^T
2748  template <typename T, typename F, typename F1, typename F2>
2749  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2750  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2751  op_mat_mat_prod> >
2752  {
2753  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2754  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2755  op_mat_mat_prod> const & rhs)
2756  {
2757  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(0));
2758  }
2759  };
2760 
2761 
2762  // C += A * B
2763  template <typename T, typename F, typename F1, typename F2>
2764  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> >
2765  {
2766  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> const & rhs)
2767  {
2768  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
2769  }
2770  };
2771 
2772  // C += A * B^T
2773  template <typename T, typename F, typename F1, typename F2>
2774  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_base<T, F1>,
2775  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2776  op_mat_mat_prod> >
2777  {
2778  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>,
2779  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2780  op_mat_mat_prod> const & rhs)
2781  {
2782  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
2783  }
2784  };
2785 
2786  // C += A^T * B
2787  template <typename T, typename F, typename F1, typename F2>
2788  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2789  const matrix_base<T, F2>,
2790  op_mat_mat_prod> >
2791  {
2792  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2793  const matrix_base<T, F2>,
2794  op_mat_mat_prod> const & rhs)
2795  {
2796  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
2797  }
2798  };
2799 
2800  // C += A^T * B^T
2801  template <typename T, typename F, typename F1, typename F2>
2802  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2803  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2804  op_mat_mat_prod> >
2805  {
2806  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2807  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2808  op_mat_mat_prod> const & rhs)
2809  {
2810  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(1.0), T(1.0));
2811  }
2812  };
2813 
2814 
2815  // C -= A * B
2816  template <typename T, typename F, typename F1, typename F2>
2817  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> >
2818  {
2819  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F2>, op_mat_mat_prod> const & rhs)
2820  {
2821  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
2822  }
2823  };
2824 
2825  // C -= A * B^T
2826  template <typename T, typename F, typename F1, typename F2>
2827  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_base<T, F1>,
2828  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2829  op_mat_mat_prod> >
2830  {
2831  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_base<T, F1>,
2832  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2833  op_mat_mat_prod> const & rhs)
2834  {
2835  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
2836  }
2837  };
2838 
2839  // C -= A^T * B
2840  template <typename T, typename F, typename F1, typename F2>
2841  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2842  const matrix_base<T, F2>,
2843  op_mat_mat_prod> >
2844  {
2845  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2846  const matrix_base<T, F2>,
2847  op_mat_mat_prod> const & rhs)
2848  {
2849  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
2850  }
2851  };
2852 
2853  // C -= A^T * B^T
2854  template <typename T, typename F, typename F1, typename F2>
2855  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2856  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2857  op_mat_mat_prod> >
2858  {
2859  static void apply(matrix_base<T, F> & lhs, matrix_expression<const matrix_expression<const matrix_base<T, F1>, const matrix_base<T, F1>, op_trans>,
2860  const matrix_expression<const matrix_base<T, F2>, const matrix_base<T, F2>, op_trans>,
2861  op_mat_mat_prod> const & rhs)
2862  {
2863  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs, T(-1.0), T(1.0));
2864  }
2865  };
2866 
2868 
2869  // y = A * x
2870  template <typename T, typename F>
2871  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> >
2872  {
2873  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> const & rhs)
2874  {
2875  // check for x = A * x
2876  if (op_aliasing(lhs, rhs.rhs()))
2877  {
2878  vector_base<T> temp(rhs);
2879  lhs = temp;
2880  }
2881  else
2882  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
2883  }
2884  };
2885 
2886  // y = A^T * x
2887  template <typename T, typename F>
2888  struct op_executor<vector_base<T>, op_assign, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
2889  const vector_base<T>,
2890  op_prod> >
2891  {
2892  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
2893  const vector_base<T>,
2894  op_prod> const & rhs)
2895  {
2896  // check for x = A^T * x
2897  if (op_aliasing(lhs, rhs.rhs()))
2898  {
2899  vector_base<T> temp(rhs);
2900  lhs = temp;
2901  }
2902  else
2903  viennacl::linalg::prod_impl(rhs.lhs(), rhs.rhs(), lhs);
2904  }
2905  };
2906 
2907 
2908  // y += A * x
2909  template <typename T, typename F>
2910  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> >
2911  {
2912  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> const & rhs)
2913  {
2914  vector_base<T> temp(rhs);
2915  lhs += temp;
2916  }
2917  };
2918 
2919  // y += A^T * x
2920  template <typename T, typename F>
2921  struct op_executor<vector_base<T>, op_inplace_add, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
2922  const vector_base<T>,
2923  op_prod> >
2924  {
2925  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
2926  const vector_base<T>,
2927  op_prod> const & rhs)
2928  {
2929  vector_base<T> temp(rhs);
2930  lhs += temp;
2931  }
2932  };
2933 
2934 
2935  // y -= A * x
2936  template <typename T, typename F>
2937  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> >
2938  {
2939  static void apply(vector_base<T> & lhs, vector_expression<const matrix_base<T, F>, const vector_base<T>, op_prod> const & rhs)
2940  {
2941  vector_base<T> temp(rhs);
2942  lhs -= temp;
2943  }
2944  };
2945 
2946  // y -= A^T * x
2947  template <typename T, typename F>
2948  struct op_executor<vector_base<T>, op_inplace_sub, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
2949  const vector_base<T>,
2950  op_prod> >
2951  {
2952  static void apply(vector_base<T> & lhs, vector_expression<const matrix_expression<const matrix_base<T, F>, const matrix_base<T, F>, op_trans>,
2953  const vector_base<T>,
2954  op_prod> const & rhs)
2955  {
2956  vector_base<T> temp(rhs);
2957  lhs -= temp;
2958  }
2959  };
2960 
2961 
2962 
2964 
2965  // A = v1 * v2^T
2966  template <typename T, typename F>
2967  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
2968  {
2969  static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
2970  {
2971  lhs.clear();
2972  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
2973  }
2974  };
2975 
2976  // A = alpha * v1 * v2^T
2977  template <typename T, typename F, typename ScalarType>
2978  struct op_executor<matrix_base<T, F>, op_assign, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
2979  const ScalarType,
2980  op_mult> >
2981  {
2982  static void apply(matrix_base<T, F> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
2983  const ScalarType,
2984  op_mult> const & rhs)
2985  {
2986  lhs.clear();
2987  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
2988  }
2989  };
2990 
2991  // A += v1 * v2^T
2992  template <typename T, typename F>
2993  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
2994  {
2995  static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
2996  {
2997  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, false, rhs.lhs(), rhs.rhs());
2998  }
2999  };
3000 
3001  // A += alpha * v1 * v2^T
3002  template <typename T, typename F, typename ScalarType>
3003  struct op_executor<matrix_base<T, F>, op_inplace_add, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3004  const ScalarType,
3005  op_mult> >
3006  {
3007  static void apply(matrix_base<T, F> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3008  const ScalarType,
3009  op_mult> const & rhs)
3010  {
3011  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, false, rhs.lhs().lhs(), rhs.lhs().rhs());
3012  }
3013  };
3014 
3015  // A -= v1 * v2^T
3016  template <typename T, typename F>
3017  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> >
3018  {
3019  static void apply(matrix_base<T, F> & lhs, matrix_expression<const vector_base<T>, const vector_base<T>, op_prod> const & rhs)
3020  {
3021  viennacl::linalg::scaled_rank_1_update(lhs, T(1.0), 1, false, true, rhs.lhs(), rhs.rhs());
3022  }
3023  };
3024 
3025  // A -= alpha * v1 * v2^T
3026  template <typename T, typename F, typename ScalarType>
3027  struct op_executor<matrix_base<T, F>, op_inplace_sub, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3028  const ScalarType,
3029  op_mult> >
3030  {
3031  static void apply(matrix_base<T, F> & lhs, matrix_expression< const matrix_expression<const vector_base<T>, const vector_base<T>, op_prod>,
3032  const ScalarType,
3033  op_mult> const & rhs)
3034  {
3035  viennacl::linalg::scaled_rank_1_update(lhs, rhs.rhs(), 1, false, true, rhs.lhs().lhs(), rhs.lhs().rhs());
3036  }
3037  };
3038 
3039 
3040  } // namespace detail
3041 
3042  } // namespace linalg
3043 
3046 } //namespace viennacl
3047 
3048 #endif
matrix_iterator< col_iteration, self_type > iterator2
Definition: matrix.hpp:242
Simple enable-if variant that uses the SFINAE pattern.
Definition: enable_if.hpp:29
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_expression< const LHS, const RHS, OP >, const S1, op_div > >::type operator/(matrix_expression< const LHS, const RHS, OP > const &proxy, S1 const &val)
Operator overload for the division of a matrix expression by a scalar from the right, e.g. (beta * m1) / alpha. Here, beta * m1 is wrapped into a matrix_expression and then divided by alpha.
Definition: matrix.hpp:1419
viennacl::memory_types memory_domain() const
Definition: matrix.hpp:659
void ambm_m(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:118
A tag class representing multiplication by a scalar.
Definition: forwards.h:74
static vcl_size_t size2(LHS &, RHS &rhs)
Definition: matrix_size_deducer.hpp:51
SizeType size_type
Definition: matrix.hpp:245
std::size_t vcl_size_t
Definition: forwards.h:58
size_type internal_size() const
Returns the total amount of allocated memory in multiples of sizeof(SCALARTYPE)
Definition: matrix.hpp:651
matrix_base(matrix_expression< const LHS, const RHS, OP > const &proxy)
Definition: matrix.hpp:287
Definition: forwards.h:498
bool diag_
Definition: matrix.hpp:69
vcl_size_t index1()
Definition: matrix.hpp:217
self_type & operator=(const matrix_expression< const LHS, const RHS, OP > &proxy)
Creates the matrix from the supplied random matrix.
Definition: matrix.hpp:393
const_reference operator()(size_type i, size_type j) const
Definition: matrix.hpp:87
SCALARTYPE const & const_reference
Definition: matrix.hpp:50
viennacl::enable_if< viennacl::is_any_sparse_matrix< M1 >::value, matrix_expression< const M1, const M1, op_trans > >::type trans(const M1 &mat)
Returns an expression template class representing a transposed matrix.
Definition: sparse_matrix_operations.hpp:330
Represents a vector consisting of 1 at a given index and zeros otherwise. To be used as an initialize...
Definition: forwards.h:299
ram_handle_type & ram_handle()
Returns the handle to a buffer in CPU RAM. NULL is returned if no such buffer has been allocated...
Definition: mem_handle.hpp:72
matrix(identity_matrix< SCALARTYPE > const &m)
Creates the matrix from the supplied identity matrix.
Definition: matrix.hpp:777
vcl_size_t index2()
Definition: matrix.hpp:218
Worker class for decomposing expression templates.
Definition: op_executor.hpp:79
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: entry_proxy.hpp:178
Implementations of dense matrix related operations including matrix-vector products.
viennacl::context context() const
Definition: matrix.hpp:89
size_type size1_
Definition: matrix.hpp:66
size_type size2() const
Returns the number of columns.
Definition: matrix.hpp:627
Helper struct for checking whether a type represents a sign flip on a viennacl::scalar<> ...
Definition: forwards.h:377
This class represents a single scalar value on the GPU and behaves mostly like a built-in scalar type...
Definition: forwards.h:172
void matrix_diag_to_vector(const matrix_base< NumericT, F > &A, int k, vector_base< NumericT > &v)
Dispatcher interface for v = diag(A, k)
Definition: matrix_operations.hpp:231
Various little tools used here and there in ViennaCL.
A tag class representing the extraction of a matrix column to a vector.
Definition: forwards.h:147
matrix_expression< const self_type, const SCALARTYPE, op_mult > operator-() const
Sign flip for the matrix. Emulated to be equivalent to -1.0 * matrix.
Definition: matrix.hpp:619
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
void am(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha)
Definition: matrix_operations.hpp:55
vcl_size_t size2() const
Definition: matrix.hpp:181
A tag class representing a matrix given by a vector placed on a certain (off-)diagonal.
Definition: forwards.h:141
void ambm(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t len_beta, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:83
SCALARTYPE const & const_reference
Definition: matrix.hpp:81
A tag class representing subtraction.
Definition: forwards.h:72
Represents a vector consisting of scalars 's' only, i.e. v[i] = s for all i. To be used as an initial...
Definition: forwards.h:305
matrix_base(size_type rows, size_type columns, viennacl::context ctx=viennacl::context())
Creates the matrix with the given dimensions.
Definition: matrix.hpp:263
A tag indicating iteration along increasing row index of a matrix.
Definition: matrix.hpp:192
size_type stride1() const
Returns the number of rows.
Definition: matrix.hpp:635
F orientation_functor
Definition: matrix.hpp:248
A dense matrix class.
Definition: forwards.h:293
matrix(size_type rows, size_type columns, viennacl::context ctx=viennacl::context())
Creates the matrix with the given dimensions.
Definition: matrix.hpp:767
Common base class for dense vectors, vector ranges, and vector slices.
Definition: forwards.h:205
viennacl::context context() const
Definition: matrix.hpp:113
This file provides the forward declarations for the main types used within ViennaCL.
A tag class representing division.
Definition: forwards.h:80
void memory_read(mem_handle const &src_buffer, vcl_size_t src_offset, vcl_size_t bytes_to_read, void *ptr, bool async=false)
Reads data from a buffer back to main RAM.
Definition: memory.hpp:261
void matrix_row(const matrix_base< NumericT, F > &A, unsigned int i, vector_base< NumericT > &v)
Definition: matrix_operations.hpp:256
size_type start2() const
Returns the number of columns.
Definition: matrix.hpp:632
const_reference operator()(size_type i, size_type j) const
Definition: matrix.hpp:60
RHS & rhs() const
Get right hand side operand.
Definition: matrix.hpp:177
LHS & lhs() const
Get left hand side operand.
Definition: matrix.hpp:174
size_type size1() const
Definition: matrix.hpp:85
void matrix_assign(matrix_base< NumericT, F > &mat, NumericT s, bool clear=false)
Definition: matrix_operations.hpp:152
implicit_matrix_base(size_type size1, size_type size2, std::pair< SCALARTYPE, bool > value, bool diag)
Definition: matrix.hpp:48
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT, F > & >::type operator/=(matrix_base< NumericT, F > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1445
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
F::orientation_category orientation_category
Definition: matrix.hpp:249
Definition: forwards.h:481
static const size_type alignment
Definition: matrix.hpp:251
memory_types
Definition: forwards.h:476
value_type operator*(void)
Definition: matrix.hpp:210
bool operator==(self_type const &other)
Definition: matrix.hpp:214
MATRIXTYPE::value_type value_type
Definition: matrix.hpp:204
size_type stride2() const
Returns the number of columns.
Definition: matrix.hpp:637
A tag class representing the (off-)diagonal of a matrix.
Definition: forwards.h:138
std::pair< SCALARTYPE, bool > value_
Definition: matrix.hpp:68
Represents a generic 'context' similar to an OpenCL context, but is backend-agnostic and thus also su...
Definition: context.hpp:39
memory_types get_active_handle_id() const
Returns an ID for the currently active memory buffer. Other memory buffers might contain old or no da...
Definition: mem_handle.hpp:91
size_type internal_size2() const
Returns the internal number of columns. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:649
A dense matrix class.
Definition: forwards.h:290
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
const_reference operator()(size_type, size_type) const
Definition: matrix.hpp:111
viennacl::enable_if< viennacl::is_any_scalar< S1 >::value, matrix_expression< const matrix_base< NumericT, F >, const S1, op_mult > >::type operator*(S1 const &value, matrix_base< NumericT, F > const &m1)
Operator overload for the expression alpha * m1, where alpha is a host scalar (float or double) and m...
Definition: matrix.hpp:1345
Obtain the cpu scalar type from a type, including a GPU type like viennacl::scalar ...
Definition: tools.hpp:207
self_type & operator-=(const matrix_expression< const LHS, const RHS, OP > &proxy)
Definition: matrix.hpp:471
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
A proxy class for a single element of a vector or matrix. This proxy should not be noticed by end-use...
Definition: forwards.h:178
self_type & operator/=(SCALARTYPE val)
Scales this matrix by a CPU scalar value.
Definition: matrix.hpp:609
Definition: forwards.h:480
SCALARTYPE const & const_reference
Definition: matrix.hpp:105
size_type size2() const
Definition: matrix.hpp:86
void scaled_rank_1_update(matrix_base< NumericT, F > &mat1, S1 const &alpha, vcl_size_t len_alpha, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
Definition: matrix_operations.hpp:748
self_type & operator=(const self_type &other)
Definition: matrix.hpp:361
size_type size1() const
Definition: matrix.hpp:53
bool operator!=(self_type const &other)
Definition: matrix.hpp:215
SCALARTYPE cpu_value_type
Definition: matrix.hpp:51
const handle_type & handle() const
Returns the OpenCL handle, const-version.
Definition: matrix.hpp:656
self_type & operator++(void)
Definition: matrix.hpp:211
matrix(matrix_expression< LHS, RHS, OP > const &proxy)
Definition: matrix.hpp:774
Implementations of operations using sparse matrices.
bool diag() const
Definition: matrix.hpp:58
matrix_iterator(MATRIXTYPE &mat, vcl_size_t start_row, vcl_size_t start_col)
Definition: matrix.hpp:206
A tag class representing addition.
Definition: forwards.h:70
identity_matrix(size_type s, viennacl::context ctx=viennacl::context())
Definition: matrix.hpp:83
matrix(scalar_matrix< SCALARTYPE > const &m)
Creates the matrix from the supplied scalar matrix.
Definition: matrix.hpp:791
Represents a vector consisting of zeros only. To be used as an initializer for viennacl::vector, vector_range, or vector_slize only.
Definition: forwards.h:302
An expression template class that represents a binary operation that yields a vector.
Definition: forwards.h:181
void copy(std::vector< SCALARTYPE > &cpu_vec, circulant_matrix< SCALARTYPE, ALIGNMENT > &gpu_mat)
Copies a circulant matrix from the std::vector to the OpenCL device (either GPU or multi-core CPU) ...
Definition: circulant_matrix.hpp:150
static vcl_size_t size1(LHS &lhs, RHS &)
Definition: matrix_size_deducer.hpp:50
DistanceType difference_type
Definition: matrix.hpp:246
void element_op(matrix_base< T, F > &A, matrix_expression< const matrix_base< T, F >, const matrix_base< T, F >, OP > const &proxy)
Implementation of the element-wise operation A = B .* C and A = B ./ C for matrices (using MATLAB syn...
Definition: matrix_operations.hpp:598
size_type size1() const
Definition: matrix.hpp:109
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:283
bool is_value_static() const
Definition: matrix.hpp:57
vcl_size_t size_type
Definition: matrix.hpp:128
viennacl::memory_types active_handle_id(T const &obj)
Returns an ID for the currently active memory domain of an object.
Definition: handle.hpp:201
handle_type & handle()
Returns the OpenCL handle, non-const-version.
Definition: matrix.hpp:654
vcl_size_t size_type
Definition: matrix.hpp:168
matrix(const self_type &other)
Definition: matrix.hpp:804
viennacl::vector< NumericT > operator-(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT, F >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 - A * v2', where A is a matrix.
Definition: matrix_operations.hpp:858
viennacl::context context() const
Definition: matrix.hpp:137
scalar< SCALARTYPE > value_type
Definition: matrix.hpp:243
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
matrix(zero_matrix< SCALARTYPE > const &m)
Creates the matrix from the supplied zero matrix.
Definition: matrix.hpp:784
matrix_expression(LHS &lhs, RHS &rhs)
Definition: matrix.hpp:170
size_type size2_
Definition: matrix.hpp:67
self_type & operator+=(const matrix_expression< const LHS, const RHS, OP > &proxy)
Definition: matrix.hpp:457
void resize(size_type rows, size_type columns, bool preserve=true)
Resizes the matrix. Existing entries can be preserved, but.
Definition: matrix.hpp:684
matrix()
The default constructor. Does not allocate any memory.
Definition: matrix.hpp:759
vcl_size_t size1() const
Returns the size of the result vector.
Definition: matrix.hpp:180
size_type start1() const
Returns the number of rows.
Definition: matrix.hpp:630
vector_expression< const matrix_base< NumericT, F >, const int, op_matrix_diag > diag(const matrix_base< NumericT, F > &A, int k=0)
Definition: matrix.hpp:895
A tag class representing matrix-vector products and element-wise multiplications. ...
Definition: forwards.h:76
vcl_size_t raw_size() const
Returns the number of bytes of the currently active buffer.
Definition: mem_handle.hpp:203
viennacl::context context(T const &t)
Returns an ID for the currently active memory domain of an object.
Definition: context.hpp:41
vcl_size_t size_type
Definition: matrix.hpp:47
void matrix_column(const matrix_base< NumericT, F > &A, unsigned int j, vector_base< NumericT > &v)
Definition: matrix_operations.hpp:281
viennacl::backend::mem_handle handle_type
Definition: matrix.hpp:247
INT_TYPE align_to_multiple(INT_TYPE to_reach, INT_TYPE base)
Rounds an integer to the next multiple of another integer.
Definition: tools.hpp:127
uBLAS-like iterator class for iterating over the entries of a dense matrix.
Definition: matrix.hpp:200
matrix_base()
The default constructor. Does not allocate any memory.
Definition: matrix.hpp:255
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
self_type & operator*=(SCALARTYPE val)
Scales a matrix by a CPU scalar value.
Definition: matrix.hpp:599
viennacl::enable_if< viennacl::is_scalar< S1 >::value, matrix_base< NumericT, F > & >::type operator*=(matrix_base< NumericT, F > &m1, S1 const &gpu_val)
Scales a matrix by a GPU scalar value.
Definition: matrix.hpp:1399
void inc()
Definition: shared_ptr.hpp:140
void clear()
Resets all entries to zero.
Definition: matrix.hpp:640
void resize(size_type rows, size_type columns, bool preserve=true)
Resizes the matrix. Existing entries can optionally be preserved.
Definition: matrix.hpp:827
Definition: forwards.h:479
void matrix_diag_from_vector(const vector_base< NumericT > &v, int k, matrix_base< NumericT, F > &A)
Dispatcher interface for A = diag(v, k)
Definition: matrix_operations.hpp:205
matrix_iterator< row_iteration, self_type > iterator1
Definition: matrix.hpp:241
base_type::size_type size_type
Definition: matrix.hpp:756
SCALARTYPE const & const_reference
Definition: matrix.hpp:129
Main abstraction class for multiple memory domains. Represents a buffer in either main RAM...
Definition: mem_handle.hpp:62
A tag class representing transposed matrices.
Definition: forwards.h:165
Helper implementations that deduce the dimensions of the supplied matrix-valued expressions.
vcl_size_t size_type
Definition: matrix.hpp:104
static void apply(const MATRIXTYPE &, unsigned int &, unsigned int &)
Definition: forwards.h:523
matrix_base(SCALARTYPE *ptr_to_mem, viennacl::memory_types mem_type, size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2)
Definition: matrix.hpp:302
size_type size2() const
Definition: matrix.hpp:110
matrix(const base_type &other)
Definition: matrix.hpp:797
size_type size1() const
Definition: matrix.hpp:133
entry_proxy< SCALARTYPE > operator()(size_type row_index, size_type col_index)
Read-write access to a single element of the matrix/matrix_range/matrix_slice.
Definition: matrix.hpp:566
void prod_impl(const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:350
void memory_create(mem_handle &handle, vcl_size_t size_in_bytes, viennacl::context const &ctx, const void *host_ptr=NULL)
Creates an array of the specified size. If the second argument is provided, the buffer is initialized...
Definition: memory.hpp:87
MATRIXTYPE & operator()(void) const
Definition: matrix.hpp:220
void reset()
Definition: shared_ptr.hpp:108
scalar_matrix(size_type s1, size_type s2, const_reference val, viennacl::context ctx=viennacl::context())
Definition: matrix.hpp:131
void switch_active_handle_id(memory_types new_id)
Switches the currently active handle. If no support for that backend is provided, an exception is thr...
Definition: mem_handle.hpp:94
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_column > column(const matrix_base< NumericT, F > &A, unsigned int j)
Definition: matrix.hpp:918
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
SCALARTYPE cpu_value_type
Definition: matrix.hpp:244
A tag class representing the extraction of a matrix row to a vector.
Definition: forwards.h:144
const_entry_proxy< SCALARTYPE > operator()(size_type row_index, size_type col_index) const
Read access to a single element of the matrix/matrix_range/matrix_slice.
Definition: matrix.hpp:573
void set_handle(viennacl::backend::mem_handle const &h)
Definition: matrix.hpp:666
size_type internal_size1() const
Returns the internal number of rows. Usually required for launching OpenCL kernels only...
Definition: matrix.hpp:647
bool op_aliasing(vector_base< T > const &, B const &)
Definition: op_executor.hpp:35
Implementation of the ViennaCL scalar class.
self_type & operator=(const matrix_expression< const self_type, const self_type, op_trans > &proxy)
Definition: matrix.hpp:418
zero_matrix(size_type s1, size_type s2, viennacl::context ctx=viennacl::context())
Definition: matrix.hpp:107
self_type operator++(int)
Definition: matrix.hpp:212
void fast_copy(const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_begin, const const_vector_iterator< SCALARTYPE, ALIGNMENT > &gpu_end, CPU_ITERATOR cpu_begin)
STL-like transfer of a GPU vector to the CPU. The cpu type is assumed to reside in a linear piece of ...
Definition: vector.hpp:1262
viennacl::vector< NumericT > operator+(const vector_base< NumericT > &v1, const vector_expression< const matrix_base< NumericT, F >, const vector_base< NumericT >, op_prod > &proxy)
Implementation of the operation 'result = v1 + A * v2', where A is a matrix.
Definition: matrix_operations.hpp:840
size_type size2() const
Definition: matrix.hpp:134
A collection of compile time type deductions.
SCALARTYPE value() const
Definition: matrix.hpp:56
size_type size1() const
Returns the number of rows.
Definition: matrix.hpp:625
void matrix_diagonal_assign(matrix_base< NumericT, F > &mat, NumericT s)
Definition: matrix_operations.hpp:178
size_type size2() const
Definition: matrix.hpp:54
void switch_memory_context(viennacl::context new_ctx)
Definition: matrix.hpp:671
A tag indicating iteration along increasing columns index of a matrix.
Definition: matrix.hpp:195
Simple enable-if variant that uses the SFINAE pattern.
vcl_size_t size_type
Definition: matrix.hpp:80
const_reference operator()(size_type, size_type) const
Definition: matrix.hpp:135
matrix_base(viennacl::backend::mem_handle &h, size_type mat_size1, size_type mat_start1, difference_type mat_stride1, size_type mat_internal_size1, size_type mat_size2, size_type mat_start2, difference_type mat_stride2, size_type mat_internal_size2)
Constructor for creating a matrix_range or matrix_stride from some other matrix/matrix_range/matrix_s...
Definition: matrix.hpp:277