ViennaCL - The Vienna Computing Library  1.5.2
matrix_operations.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_HOST_BASED_MATRIX_OPERATIONS_HPP_
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
25 #include "viennacl/forwards.h"
26 #include "viennacl/scalar.hpp"
27 #include "viennacl/vector.hpp"
29 #include "viennacl/tools/tools.hpp"
33 #include "viennacl/traits/size.hpp"
39 
40 namespace viennacl
41 {
42  namespace linalg
43  {
44  namespace host_based
45  {
46 
47  //
48  // Introductory note: By convention, all dimensions are already checked in the dispatcher frontend. No need to double-check again in here!
49  //
50 
51  template <typename NumericT, typename F, typename ScalarType1>
53  matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha)
54  {
55  typedef NumericT value_type;
56 
57  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
58  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
59 
60  value_type data_alpha = alpha;
61  if (flip_sign_alpha)
62  data_alpha = -data_alpha;
63 
64  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
65  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
68  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
69  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
70  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
71  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
72 
73  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
74  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
77  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
78  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
79 
80  detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
81  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
82  //typedef typename detail::majority_struct_for_orientation<typename M1::orientation_category>::type index_generator_A;
83  //typedef typename detail::majority_struct_for_orientation<typename M2::orientation_category>::type index_generator_B;
84 
85  if (detail::is_row_major(typename F::orientation_category()))
86  {
87  if (reciprocal_alpha)
88  {
89 #ifdef VIENNACL_WITH_OPENMP
90  #pragma omp parallel for
91 #endif
92  for (long row = 0; row < static_cast<long>(A_size1); ++row)
93  for (long col = 0; col < static_cast<long>(A_size2); ++col)
94  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
95  }
96  else
97  {
98 #ifdef VIENNACL_WITH_OPENMP
99  #pragma omp parallel for
100 #endif
101  for (long row = 0; row < static_cast<long>(A_size1); ++row)
102  for (long col = 0; col < static_cast<long>(A_size2); ++col)
103  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
104  }
105  }
106  else
107  {
108  if (reciprocal_alpha)
109  {
110 #ifdef VIENNACL_WITH_OPENMP
111  #pragma omp parallel for
112 #endif
113  for (long col = 0; col < static_cast<long>(A_size2); ++col)
114  for (long row = 0; row < static_cast<long>(A_size1); ++row)
115  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha;
116  }
117  else
118  {
119 #ifdef VIENNACL_WITH_OPENMP
120  #pragma omp parallel for
121 #endif
122  for (long col = 0; col < static_cast<long>(A_size2); ++col)
123  for (long row = 0; row < static_cast<long>(A_size1); ++row)
124  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha;
125  }
126  }
127  }
128 
129 
130  template <typename NumericT, typename F,
131  typename ScalarType1, typename ScalarType2>
133  matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
134  matrix_base<NumericT, F> const & mat3, ScalarType2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
135  {
136  typedef NumericT value_type;
137 
138  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
139  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
140  value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
141 
142  value_type data_alpha = alpha;
143  if (flip_sign_alpha)
144  data_alpha = -data_alpha;
145 
146  value_type data_beta = beta;
147  if (flip_sign_beta)
148  data_beta = -data_beta;
149 
150  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
151  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
152  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
153  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
154  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
155  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
156  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
157  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
158 
159  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
160  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
161  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
162  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
163  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
164  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
165 
166  vcl_size_t C_start1 = viennacl::traits::start1(mat3);
167  vcl_size_t C_start2 = viennacl::traits::start2(mat3);
168  vcl_size_t C_inc1 = viennacl::traits::stride1(mat3);
169  vcl_size_t C_inc2 = viennacl::traits::stride2(mat3);
170  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3);
171  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3);
172 
173  detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
174  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
175  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
176 
177  if (detail::is_row_major(typename F::orientation_category()))
178  {
179  if (reciprocal_alpha && reciprocal_beta)
180  {
181 #ifdef VIENNACL_WITH_OPENMP
182  #pragma omp parallel for
183 #endif
184  for (long row = 0; row < static_cast<long>(A_size1); ++row)
185  for (long col = 0; col < static_cast<long>(A_size2); ++col)
186  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
187  }
188  else if (reciprocal_alpha && !reciprocal_beta)
189  {
190 #ifdef VIENNACL_WITH_OPENMP
191  #pragma omp parallel for
192 #endif
193  for (long row = 0; row < static_cast<long>(A_size1); ++row)
194  for (long col = 0; col < static_cast<long>(A_size2); ++col)
195  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
196  }
197  else if (!reciprocal_alpha && reciprocal_beta)
198  {
199 #ifdef VIENNACL_WITH_OPENMP
200  #pragma omp parallel for
201 #endif
202  for (long row = 0; row < static_cast<long>(A_size1); ++row)
203  for (long col = 0; col < static_cast<long>(A_size2); ++col)
204  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
205  }
206  else if (!reciprocal_alpha && !reciprocal_beta)
207  {
208 #ifdef VIENNACL_WITH_OPENMP
209  #pragma omp parallel for
210 #endif
211  for (long row = 0; row < static_cast<long>(A_size1); ++row)
212  for (long col = 0; col < static_cast<long>(A_size2); ++col)
213  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
214  }
215  }
216  else
217  {
218  if (reciprocal_alpha && reciprocal_beta)
219  {
220 #ifdef VIENNACL_WITH_OPENMP
221  #pragma omp parallel for
222 #endif
223  for (long col = 0; col < static_cast<long>(A_size2); ++col)
224  for (long row = 0; row < static_cast<long>(A_size1); ++row)
225  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
226  }
227  else if (reciprocal_alpha && !reciprocal_beta)
228  {
229 #ifdef VIENNACL_WITH_OPENMP
230  #pragma omp parallel for
231 #endif
232  for (long col = 0; col < static_cast<long>(A_size2); ++col)
233  for (long row = 0; row < static_cast<long>(A_size1); ++row)
234  wrapper_A(row, col) = wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
235  }
236  else if (!reciprocal_alpha && reciprocal_beta)
237  {
238 #ifdef VIENNACL_WITH_OPENMP
239  #pragma omp parallel for
240 #endif
241  for (long col = 0; col < static_cast<long>(A_size2); ++col)
242  for (long row = 0; row < static_cast<long>(A_size1); ++row)
243  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
244  }
245  else if (!reciprocal_alpha && !reciprocal_beta)
246  {
247 #ifdef VIENNACL_WITH_OPENMP
248  #pragma omp parallel for
249 #endif
250  for (long col = 0; col < static_cast<long>(A_size2); ++col)
251  for (long row = 0; row < static_cast<long>(A_size1); ++row)
252  wrapper_A(row, col) = wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
253  }
254  }
255 
256  }
257 
258 
259  template <typename NumericT, typename F,
260  typename ScalarType1, typename ScalarType2>
262  matrix_base<NumericT, F> const & mat2, ScalarType1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
263  matrix_base<NumericT, F> const & mat3, ScalarType2 const & beta, vcl_size_t /*len_beta*/, bool reciprocal_beta, bool flip_sign_beta)
264  {
265  typedef NumericT value_type;
266 
267  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
268  value_type const * data_B = detail::extract_raw_pointer<value_type>(mat2);
269  value_type const * data_C = detail::extract_raw_pointer<value_type>(mat3);
270 
271  value_type data_alpha = alpha;
272  if (flip_sign_alpha)
273  data_alpha = -data_alpha;
274 
275  value_type data_beta = beta;
276  if (flip_sign_beta)
277  data_beta = -data_beta;
278 
279  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
280  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
281  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
282  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
283  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
284  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
285  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
286  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
287 
288  vcl_size_t B_start1 = viennacl::traits::start1(mat2);
289  vcl_size_t B_start2 = viennacl::traits::start2(mat2);
290  vcl_size_t B_inc1 = viennacl::traits::stride1(mat2);
291  vcl_size_t B_inc2 = viennacl::traits::stride2(mat2);
292  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(mat2);
293  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(mat2);
294 
295  vcl_size_t C_start1 = viennacl::traits::start1(mat3);
296  vcl_size_t C_start2 = viennacl::traits::start2(mat3);
297  vcl_size_t C_inc1 = viennacl::traits::stride1(mat3);
298  vcl_size_t C_inc2 = viennacl::traits::stride2(mat3);
299  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(mat3);
300  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(mat3);
301 
302  detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
303  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
304  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
305 
306  //typedef typename detail::majority_struct_for_orientation<typename M1::orientation_category>::type index_generator_A;
307  //typedef typename detail::majority_struct_for_orientation<typename M2::orientation_category>::type index_generator_B;
308  //typedef typename detail::majority_struct_for_orientation<typename M3::orientation_category>::type index_generator_C;
309 
310  if (detail::is_row_major(typename F::orientation_category()))
311  {
312  if (reciprocal_alpha && reciprocal_beta)
313  {
314 #ifdef VIENNACL_WITH_OPENMP
315  #pragma omp parallel for
316 #endif
317  for (long row = 0; row < static_cast<long>(A_size1); ++row)
318  for (long col = 0; col < static_cast<long>(A_size2); ++col)
319  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
320  }
321  else if (reciprocal_alpha && !reciprocal_beta)
322  {
323 #ifdef VIENNACL_WITH_OPENMP
324  #pragma omp parallel for
325 #endif
326  for (long row = 0; row < static_cast<long>(A_size1); ++row)
327  for (long col = 0; col < static_cast<long>(A_size2); ++col)
328  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
329  }
330  else if (!reciprocal_alpha && reciprocal_beta)
331  {
332 #ifdef VIENNACL_WITH_OPENMP
333  #pragma omp parallel for
334 #endif
335  for (long row = 0; row < static_cast<long>(A_size1); ++row)
336  for (long col = 0; col < static_cast<long>(A_size2); ++col)
337  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
338  }
339  else if (!reciprocal_alpha && !reciprocal_beta)
340  {
341 #ifdef VIENNACL_WITH_OPENMP
342  #pragma omp parallel for
343 #endif
344  for (long row = 0; row < static_cast<long>(A_size1); ++row)
345  for (long col = 0; col < static_cast<long>(A_size2); ++col)
346  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
347  }
348  }
349  else
350  {
351  if (reciprocal_alpha && reciprocal_beta)
352  {
353 #ifdef VIENNACL_WITH_OPENMP
354  #pragma omp parallel for
355 #endif
356  for (long col = 0; col < static_cast<long>(A_size2); ++col)
357  for (long row = 0; row < static_cast<long>(A_size1); ++row)
358  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) / data_beta;
359  }
360  else if (reciprocal_alpha && !reciprocal_beta)
361  {
362 #ifdef VIENNACL_WITH_OPENMP
363  #pragma omp parallel for
364 #endif
365  for (long col = 0; col < static_cast<long>(A_size2); ++col)
366  for (long row = 0; row < static_cast<long>(A_size1); ++row)
367  wrapper_A(row, col) += wrapper_B(row, col) / data_alpha + wrapper_C(row, col) * data_beta;
368  }
369  else if (!reciprocal_alpha && reciprocal_beta)
370  {
371 #ifdef VIENNACL_WITH_OPENMP
372  #pragma omp parallel for
373 #endif
374  for (long col = 0; col < static_cast<long>(A_size2); ++col)
375  for (long row = 0; row < static_cast<long>(A_size1); ++row)
376  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) / data_beta;
377  }
378  else if (!reciprocal_alpha && !reciprocal_beta)
379  {
380 #ifdef VIENNACL_WITH_OPENMP
381  #pragma omp parallel for
382 #endif
383  for (long col = 0; col < static_cast<long>(A_size2); ++col)
384  for (long row = 0; row < static_cast<long>(A_size1); ++row)
385  wrapper_A(row, col) += wrapper_B(row, col) * data_alpha + wrapper_C(row, col) * data_beta;
386  }
387  }
388 
389  }
390 
391 
392 
393 
394  template <typename NumericT, typename F>
395  void matrix_assign(matrix_base<NumericT, F> & mat, NumericT s, bool clear = false)
396  {
397  typedef NumericT value_type;
398 
399  value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
400  value_type alpha = static_cast<value_type>(s);
401 
402  vcl_size_t A_start1 = viennacl::traits::start1(mat);
403  vcl_size_t A_start2 = viennacl::traits::start2(mat);
408  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
409  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
410 
411  detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
412 
413  if (detail::is_row_major(typename F::orientation_category()))
414  {
415 #ifdef VIENNACL_WITH_OPENMP
416  #pragma omp parallel for
417 #endif
418  for (long row = 0; row < static_cast<long>(A_size1); ++row)
419  for (long col = 0; col < static_cast<long>(A_size2); ++col)
420  wrapper_A(row, col) = alpha;
421  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
422  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
423  }
424  else
425  {
426 #ifdef VIENNACL_WITH_OPENMP
427  #pragma omp parallel for
428 #endif
429  for (long col = 0; col < static_cast<long>(A_size2); ++col)
430  for (long row = 0; row < static_cast<long>(A_size1); ++row)
431  wrapper_A(row, col) = alpha;
432  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
433  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha;
434  }
435  }
436 
437 
438 
439  template <typename NumericT, typename F>
441  {
442  typedef NumericT value_type;
443 
444  value_type * data_A = detail::extract_raw_pointer<value_type>(mat);
445  value_type alpha = static_cast<value_type>(s);
446 
447  vcl_size_t A_start1 = viennacl::traits::start1(mat);
448  vcl_size_t A_start2 = viennacl::traits::start2(mat);
451  vcl_size_t A_size1 = viennacl::traits::size1(mat);
452  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
453  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
454  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
455 
456  detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
457 
458 #ifdef VIENNACL_WITH_OPENMP
459  #pragma omp parallel for
460 #endif
461  for (long row = 0; row < static_cast<long>(A_size1); ++row)
462  wrapper_A(row, row) = alpha;
463  }
464 
465  template <typename NumericT, typename F>
467  {
468  typedef NumericT value_type;
469 
470  value_type *data_A = detail::extract_raw_pointer<value_type>(mat);
471  value_type const *data_vec = detail::extract_raw_pointer<value_type>(vec);
472 
473  vcl_size_t A_start1 = viennacl::traits::start1(mat);
474  vcl_size_t A_start2 = viennacl::traits::start2(mat);
477  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
478  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
479  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
480  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
481 
482  vcl_size_t v_start = viennacl::traits::start(vec);
484  vcl_size_t v_size = viennacl::traits::size(vec);
485 
486  detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
487 
488  vcl_size_t row_start = 0;
489  vcl_size_t col_start = 0;
490 
491  if (k >= 0)
492  col_start = static_cast<vcl_size_t>(k);
493  else
494  row_start = static_cast<vcl_size_t>(-k);
495 
496  matrix_assign(mat, NumericT(0));
497 
498  for (vcl_size_t i = 0; i < v_size; ++i)
499  wrapper_A(row_start + i, col_start + i) = data_vec[v_start + i * v_inc];
500 
501  }
502 
503  template <typename NumericT, typename F>
505  {
506  typedef NumericT value_type;
507 
508  value_type const *data_A = detail::extract_raw_pointer<value_type>(mat);
509  value_type *data_vec = detail::extract_raw_pointer<value_type>(vec);
510 
511  vcl_size_t A_start1 = viennacl::traits::start1(mat);
512  vcl_size_t A_start2 = viennacl::traits::start2(mat);
515  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
516  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
517  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
518  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
519 
520  vcl_size_t v_start = viennacl::traits::start(vec);
522  vcl_size_t v_size = viennacl::traits::size(vec);
523 
524  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
525 
526  vcl_size_t row_start = 0;
527  vcl_size_t col_start = 0;
528 
529  if (k >= 0)
530  col_start = static_cast<vcl_size_t>(k);
531  else
532  row_start = static_cast<vcl_size_t>(-k);
533 
534  for (vcl_size_t i = 0; i < v_size; ++i)
535  data_vec[v_start + i * v_inc] = wrapper_A(row_start + i, col_start + i);
536  }
537 
538  template <typename NumericT, typename F>
539  void matrix_row(const matrix_base<NumericT, F> & mat, unsigned int i, vector_base<NumericT> & vec)
540  {
541  typedef NumericT value_type;
542 
543  value_type const *data_A = detail::extract_raw_pointer<value_type>(mat);
544  value_type *data_vec = detail::extract_raw_pointer<value_type>(vec);
545 
546  vcl_size_t A_start1 = viennacl::traits::start1(mat);
547  vcl_size_t A_start2 = viennacl::traits::start2(mat);
550  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
551  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
552  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
553  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
554 
555  vcl_size_t v_start = viennacl::traits::start(vec);
557  vcl_size_t v_size = viennacl::traits::size(vec);
558 
559  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
560 
561  for (vcl_size_t j = 0; j < v_size; ++j)
562  data_vec[v_start + j * v_inc] = wrapper_A(i, j);
563  }
564 
565  template <typename NumericT, typename F>
566  void matrix_column(const matrix_base<NumericT, F> & mat, unsigned int j, vector_base<NumericT> & vec)
567  {
568  typedef NumericT value_type;
569 
570  value_type const *data_A = detail::extract_raw_pointer<value_type>(mat);
571  value_type *data_vec = detail::extract_raw_pointer<value_type>(vec);
572 
573  vcl_size_t A_start1 = viennacl::traits::start1(mat);
574  vcl_size_t A_start2 = viennacl::traits::start2(mat);
577  //vcl_size_t A_size1 = viennacl::traits::size1(mat);
578  //vcl_size_t A_size2 = viennacl::traits::size2(mat);
579  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
580  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
581 
582  vcl_size_t v_start = viennacl::traits::start(vec);
584  vcl_size_t v_size = viennacl::traits::size(vec);
585 
586  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
587 
588  for (vcl_size_t i = 0; i < v_size; ++i)
589  data_vec[v_start + i * v_inc] = wrapper_A(i, j);
590  }
591 
592  //
594  //
595 
596  // Binary operations A = B .* C and A = B ./ C
597 
603  template <typename NumericT, typename F, typename OP>
606  {
607  typedef NumericT value_type;
609 
610  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
611  value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
612  value_type const * data_C = detail::extract_raw_pointer<value_type>(proxy.rhs());
613 
614  vcl_size_t A_start1 = viennacl::traits::start1(A);
615  vcl_size_t A_start2 = viennacl::traits::start2(A);
618  vcl_size_t A_size1 = viennacl::traits::size1(A);
619  vcl_size_t A_size2 = viennacl::traits::size2(A);
620  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
621  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
622 
623  vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
624  vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
625  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs());
626  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs());
627  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs());
628  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs());
629 
630  vcl_size_t C_start1 = viennacl::traits::start1(proxy.rhs());
631  vcl_size_t C_start2 = viennacl::traits::start2(proxy.rhs());
632  vcl_size_t C_inc1 = viennacl::traits::stride1(proxy.rhs());
633  vcl_size_t C_inc2 = viennacl::traits::stride2(proxy.rhs());
634  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(proxy.rhs());
635  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(proxy.rhs());
636 
637  detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
638  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
639  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
640 
641  if (detail::is_row_major(typename F::orientation_category()))
642  {
643 #ifdef VIENNACL_WITH_OPENMP
644  #pragma omp parallel for
645 #endif
646  for (long row = 0; row < static_cast<long>(A_size1); ++row)
647  for (long col = 0; col < static_cast<long>(A_size2); ++col)
648  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
649  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
650  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
651  // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
652  }
653  else
654  {
655 #ifdef VIENNACL_WITH_OPENMP
656  #pragma omp parallel for
657 #endif
658  for (long col = 0; col < static_cast<long>(A_size2); ++col)
659  for (long row = 0; row < static_cast<long>(A_size1); ++row)
660  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col), wrapper_C(row, col));
661 
662  //data_A[index_generator_A::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)]
663  // = data_B[index_generator_B::mem_index(row * B_inc1 + B_start1, col * B_inc2 + B_start2, B_internal_size1, B_internal_size2)] * alpha
664  // + data_C[index_generator_C::mem_index(row * C_inc1 + C_start1, col * C_inc2 + C_start2, C_internal_size1, C_internal_size2)] * beta;
665  }
666  }
667 
668  // Unary operations
669 
670  // A = op(B)
671  template <typename NumericT, typename F, typename OP>
674  {
675  typedef NumericT value_type;
677 
678  value_type * data_A = detail::extract_raw_pointer<value_type>(A);
679  value_type const * data_B = detail::extract_raw_pointer<value_type>(proxy.lhs());
680 
681  vcl_size_t A_start1 = viennacl::traits::start1(A);
682  vcl_size_t A_start2 = viennacl::traits::start2(A);
685  vcl_size_t A_size1 = viennacl::traits::size1(A);
686  vcl_size_t A_size2 = viennacl::traits::size2(A);
687  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
688  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
689 
690  vcl_size_t B_start1 = viennacl::traits::start1(proxy.lhs());
691  vcl_size_t B_start2 = viennacl::traits::start2(proxy.lhs());
692  vcl_size_t B_inc1 = viennacl::traits::stride1(proxy.lhs());
693  vcl_size_t B_inc2 = viennacl::traits::stride2(proxy.lhs());
694  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(proxy.lhs());
695  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(proxy.lhs());
696 
697  detail::matrix_array_wrapper<value_type, typename F::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
698  detail::matrix_array_wrapper<value_type const, typename F::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
699 
700  if (detail::is_row_major(typename F::orientation_category()))
701  {
702 #ifdef VIENNACL_WITH_OPENMP
703  #pragma omp parallel for
704 #endif
705  for (long row = 0; row < static_cast<long>(A_size1); ++row)
706  for (long col = 0; col < static_cast<long>(A_size2); ++col)
707  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
708  }
709  else
710  {
711 #ifdef VIENNACL_WITH_OPENMP
712  #pragma omp parallel for
713 #endif
714  for (long col = 0; col < static_cast<long>(A_size2); ++col)
715  for (long row = 0; row < static_cast<long>(A_size1); ++row)
716  OpFunctor::apply(wrapper_A(row, col), wrapper_B(row, col));
717  }
718  }
719 
720 
721 
722  //
724  //
725 
726  // A * x
727 
736  template <typename NumericT, typename F>
738  const vector_base<NumericT> & vec,
739  vector_base<NumericT> & result)
740  {
741  typedef NumericT value_type;
742 
743  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat);
744  value_type const * data_x = detail::extract_raw_pointer<value_type>(vec);
745  value_type * data_result = detail::extract_raw_pointer<value_type>(result);
746 
747  vcl_size_t A_start1 = viennacl::traits::start1(mat);
748  vcl_size_t A_start2 = viennacl::traits::start2(mat);
751  vcl_size_t A_size1 = viennacl::traits::size1(mat);
752  vcl_size_t A_size2 = viennacl::traits::size2(mat);
753  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat);
754  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat);
755 
758 
760  vcl_size_t inc2 = viennacl::traits::stride(result);
761 
762  if (detail::is_row_major(typename F::orientation_category()))
763  {
764 #ifdef VIENNACL_WITH_OPENMP
765  #pragma omp parallel for
766 #endif
767  for (long row = 0; row < static_cast<long>(A_size1); ++row)
768  {
769  value_type temp = 0;
770  for (vcl_size_t col = 0; col < A_size2; ++col)
771  temp += data_A[viennacl::row_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
772 
773  data_result[row * inc2 + start2] = temp;
774  }
775  }
776  else
777  {
778  {
779  value_type temp = data_x[start1];
780  for (vcl_size_t row = 0; row < A_size1; ++row)
781  data_result[row * inc2 + start2] = data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, A_start2, A_internal_size1, A_internal_size2)] * temp;
782  }
783  for (vcl_size_t col = 1; col < A_size2; ++col) //run through matrix sequentially
784  {
785  value_type temp = data_x[col * inc1 + start1];
786  for (vcl_size_t row = 0; row < A_size1; ++row)
787  data_result[row * inc2 + start2] += data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
788  }
789  }
790  }
791 
792 
793  // trans(A) * x
794 
803  template <typename NumericT, typename F>
805  const vector_base<NumericT> & vec,
806  vector_base<NumericT> & result)
807  {
808  typedef NumericT value_type;
809 
810  value_type const * data_A = detail::extract_raw_pointer<value_type>(mat_trans.lhs());
811  value_type const * data_x = detail::extract_raw_pointer<value_type>(vec);
812  value_type * data_result = detail::extract_raw_pointer<value_type>(result);
813 
814  vcl_size_t A_start1 = viennacl::traits::start1(mat_trans.lhs());
815  vcl_size_t A_start2 = viennacl::traits::start2(mat_trans.lhs());
816  vcl_size_t A_inc1 = viennacl::traits::stride1(mat_trans.lhs());
817  vcl_size_t A_inc2 = viennacl::traits::stride2(mat_trans.lhs());
818  vcl_size_t A_size1 = viennacl::traits::size1(mat_trans.lhs());
819  vcl_size_t A_size2 = viennacl::traits::size2(mat_trans.lhs());
820  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat_trans.lhs());
821  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat_trans.lhs());
822 
825 
827  vcl_size_t inc2 = viennacl::traits::stride(result);
828 
829  if (detail::is_row_major(typename F::orientation_category()))
830  {
831  {
832  value_type temp = data_x[start1];
833  for (vcl_size_t row = 0; row < A_size2; ++row)
834  data_result[row * inc2 + start2] = data_A[viennacl::row_major::mem_index(A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
835  }
836 
837  for (vcl_size_t col = 1; col < A_size1; ++col) //run through matrix sequentially
838  {
839  value_type temp = data_x[col * inc1 + start1];
840  for (vcl_size_t row = 0; row < A_size2; ++row)
841  {
842  data_result[row * inc2 + start2] += data_A[viennacl::row_major::mem_index(col * A_inc1 + A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * temp;
843  }
844  }
845  }
846  else
847  {
848 #ifdef VIENNACL_WITH_OPENMP
849  #pragma omp parallel for
850 #endif
851  for (long row = 0; row < static_cast<long>(A_size2); ++row)
852  {
853  value_type temp = 0;
854  for (vcl_size_t col = 0; col < A_size1; ++col)
855  temp += data_A[viennacl::column_major::mem_index(col * A_inc1 + A_start1, row * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] * data_x[col * inc1 + start1];
856 
857  data_result[row * inc2 + start2] = temp;
858  }
859  }
860  }
861 
862 
863  //
865  //
866 
867  namespace detail
868  {
869  template <typename A, typename B, typename C, typename NumericT>
870  void prod(A & a, B & b, C & c,
871  vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2,
872  NumericT alpha, NumericT beta)
873  {
874 #ifdef VIENNACL_WITH_OPENMP
875  #pragma omp parallel for
876 #endif
877  for (long i=0; i<static_cast<long>(C_size1); ++i)
878  {
879  for (vcl_size_t j=0; j<C_size2; ++j)
880  {
881  NumericT temp = 0;
882  for (vcl_size_t k=0; k<A_size2; ++k)
883  temp += a(i, k) * b(k, j);
884 
885  temp *= alpha;
886  if (beta != 0)
887  temp += beta * c(i,j);
888  c(i,j) = temp;
889  }
890  }
891  }
892 
893  }
894 
900  template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
902  const matrix_base<NumericT, F2> & B,
904  ScalarType alpha,
905  ScalarType beta)
906  {
907  typedef NumericT value_type;
908 
909  value_type const * data_A = detail::extract_raw_pointer<value_type>(A);
910  value_type const * data_B = detail::extract_raw_pointer<value_type>(B);
911  value_type * data_C = detail::extract_raw_pointer<value_type>(C);
912 
913  vcl_size_t A_start1 = viennacl::traits::start1(A);
914  vcl_size_t A_start2 = viennacl::traits::start2(A);
917  vcl_size_t A_size2 = viennacl::traits::size2(A);
918  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
919  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
920 
921  vcl_size_t B_start1 = viennacl::traits::start1(B);
922  vcl_size_t B_start2 = viennacl::traits::start2(B);
925  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B);
926  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B);
927 
928  vcl_size_t C_start1 = viennacl::traits::start1(C);
929  vcl_size_t C_start2 = viennacl::traits::start2(C);
932  vcl_size_t C_size1 = viennacl::traits::size1(C);
933  vcl_size_t C_size2 = viennacl::traits::size2(C);
934  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C);
935  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C);
936 
937  detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
938  detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
939  detail::matrix_array_wrapper<value_type, typename F3::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
940 
941  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
942  }
943 
944 
945 
951  template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
954  op_trans> & A,
955  const matrix_base<NumericT, F2> & B,
957  ScalarType alpha,
958  ScalarType beta)
959  {
960  typedef NumericT value_type;
961 
962  value_type const * data_A = detail::extract_raw_pointer<value_type>(A.lhs());
963  value_type const * data_B = detail::extract_raw_pointer<value_type>(B);
964  value_type * data_C = detail::extract_raw_pointer<value_type>(C);
965 
966  vcl_size_t A_start1 = viennacl::traits::start1(A.lhs());
967  vcl_size_t A_start2 = viennacl::traits::start2(A.lhs());
968  vcl_size_t A_inc1 = viennacl::traits::stride1(A.lhs());
969  vcl_size_t A_inc2 = viennacl::traits::stride2(A.lhs());
970  vcl_size_t A_size1 = viennacl::traits::size1(A.lhs());
971  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A.lhs());
972  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A.lhs());
973 
974  vcl_size_t B_start1 = viennacl::traits::start1(B);
975  vcl_size_t B_start2 = viennacl::traits::start2(B);
978  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B);
979  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B);
980 
981  vcl_size_t C_start1 = viennacl::traits::start1(C);
982  vcl_size_t C_start2 = viennacl::traits::start2(C);
985  vcl_size_t C_size1 = viennacl::traits::size1(C);
986  vcl_size_t C_size2 = viennacl::traits::size2(C);
987  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C);
988  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C);
989 
990  detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
991  detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, false> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
992  detail::matrix_array_wrapper<value_type, typename F3::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
993 
994  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
995  }
996 
997 
998 
999 
1005  template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
1009  ScalarType alpha,
1010  ScalarType beta)
1011  {
1012  typedef NumericT value_type;
1013 
1014  value_type const * data_A = detail::extract_raw_pointer<value_type>(A);
1015  value_type const * data_B = detail::extract_raw_pointer<value_type>(B.lhs());
1016  value_type * data_C = detail::extract_raw_pointer<value_type>(C);
1017 
1018  vcl_size_t A_start1 = viennacl::traits::start1(A);
1019  vcl_size_t A_start2 = viennacl::traits::start2(A);
1022  vcl_size_t A_size2 = viennacl::traits::size2(A);
1023  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A);
1024  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A);
1025 
1026  vcl_size_t B_start1 = viennacl::traits::start1(B.lhs());
1027  vcl_size_t B_start2 = viennacl::traits::start2(B.lhs());
1028  vcl_size_t B_inc1 = viennacl::traits::stride1(B.lhs());
1029  vcl_size_t B_inc2 = viennacl::traits::stride2(B.lhs());
1030  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B.lhs());
1031  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B.lhs());
1032 
1033  vcl_size_t C_start1 = viennacl::traits::start1(C);
1034  vcl_size_t C_start2 = viennacl::traits::start2(C);
1037  vcl_size_t C_size1 = viennacl::traits::size1(C);
1038  vcl_size_t C_size2 = viennacl::traits::size2(C);
1039  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C);
1040  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C);
1041 
1042  detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, false> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1043  detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1044  detail::matrix_array_wrapper<value_type, typename F3::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1045 
1046  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size2, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1047  }
1048 
1049 
1050 
1056  template <typename NumericT, typename F1, typename F2, typename F3, typename ScalarType >
1060  ScalarType alpha,
1061  ScalarType beta)
1062  {
1063  typedef NumericT value_type;
1064 
1065  value_type const * data_A = detail::extract_raw_pointer<value_type>(A.lhs());
1066  value_type const * data_B = detail::extract_raw_pointer<value_type>(B.lhs());
1067  value_type * data_C = detail::extract_raw_pointer<value_type>(C);
1068 
1069  vcl_size_t A_start1 = viennacl::traits::start1(A.lhs());
1070  vcl_size_t A_start2 = viennacl::traits::start2(A.lhs());
1071  vcl_size_t A_inc1 = viennacl::traits::stride1(A.lhs());
1072  vcl_size_t A_inc2 = viennacl::traits::stride2(A.lhs());
1073  vcl_size_t A_size1 = viennacl::traits::size1(A.lhs());
1074  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(A.lhs());
1075  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(A.lhs());
1076 
1077  vcl_size_t B_start1 = viennacl::traits::start1(B.lhs());
1078  vcl_size_t B_start2 = viennacl::traits::start2(B.lhs());
1079  vcl_size_t B_inc1 = viennacl::traits::stride1(B.lhs());
1080  vcl_size_t B_inc2 = viennacl::traits::stride2(B.lhs());
1081  vcl_size_t B_internal_size1 = viennacl::traits::internal_size1(B.lhs());
1082  vcl_size_t B_internal_size2 = viennacl::traits::internal_size2(B.lhs());
1083 
1084  vcl_size_t C_start1 = viennacl::traits::start1(C);
1085  vcl_size_t C_start2 = viennacl::traits::start2(C);
1088  vcl_size_t C_size1 = viennacl::traits::size1(C);
1089  vcl_size_t C_size2 = viennacl::traits::size2(C);
1090  vcl_size_t C_internal_size1 = viennacl::traits::internal_size1(C);
1091  vcl_size_t C_internal_size2 = viennacl::traits::internal_size2(C);
1092 
1093  detail::matrix_array_wrapper<value_type const, typename F1::orientation_category, true> wrapper_A(data_A, A_start1, A_start2, A_inc1, A_inc2, A_internal_size1, A_internal_size2);
1094  detail::matrix_array_wrapper<value_type const, typename F2::orientation_category, true> wrapper_B(data_B, B_start1, B_start2, B_inc1, B_inc2, B_internal_size1, B_internal_size2);
1095  detail::matrix_array_wrapper<value_type, typename F3::orientation_category, false> wrapper_C(data_C, C_start1, C_start2, C_inc1, C_inc2, C_internal_size1, C_internal_size2);
1096 
1097  detail::prod(wrapper_A, wrapper_B, wrapper_C, C_size1, C_size2, A_size1, static_cast<value_type>(alpha), static_cast<value_type>(beta));
1098  }
1099 
1100 
1101 
1102 
1103  //
1105  //
1106 
1107 
1119  template <typename NumericT, typename F, typename S1>
1121  S1 const & alpha, vcl_size_t /*len_alpha*/, bool reciprocal_alpha, bool flip_sign_alpha,
1122  const vector_base<NumericT> & vec1,
1123  const vector_base<NumericT> & vec2)
1124  {
1125  typedef NumericT value_type;
1126 
1127  value_type * data_A = detail::extract_raw_pointer<value_type>(mat1);
1128  value_type const * data_v1 = detail::extract_raw_pointer<value_type>(vec1);
1129  value_type const * data_v2 = detail::extract_raw_pointer<value_type>(vec2);
1130 
1131  vcl_size_t A_start1 = viennacl::traits::start1(mat1);
1132  vcl_size_t A_start2 = viennacl::traits::start2(mat1);
1133  vcl_size_t A_inc1 = viennacl::traits::stride1(mat1);
1134  vcl_size_t A_inc2 = viennacl::traits::stride2(mat1);
1135  vcl_size_t A_size1 = viennacl::traits::size1(mat1);
1136  vcl_size_t A_size2 = viennacl::traits::size2(mat1);
1137  vcl_size_t A_internal_size1 = viennacl::traits::internal_size1(mat1);
1138  vcl_size_t A_internal_size2 = viennacl::traits::internal_size2(mat1);
1139 
1141  vcl_size_t inc1 = viennacl::traits::stride(vec1);
1142 
1144  vcl_size_t inc2 = viennacl::traits::stride(vec2);
1145 
1146  value_type data_alpha = alpha;
1147  if (flip_sign_alpha)
1148  data_alpha = -data_alpha;
1149  if (reciprocal_alpha)
1150  data_alpha = static_cast<value_type>(1) / data_alpha;
1151 
1152  if (detail::is_row_major(typename F::orientation_category()))
1153  {
1154  for (vcl_size_t row = 0; row < A_size1; ++row)
1155  {
1156  value_type value_v1 = data_alpha * data_v1[row * inc1 + start1];
1157  for (vcl_size_t col = 0; col < A_size2; ++col)
1158  data_A[viennacl::row_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += value_v1 * data_v2[col * inc2 + start2];
1159  }
1160  }
1161  else
1162  {
1163  for (vcl_size_t col = 0; col < A_size2; ++col) //run through matrix sequentially
1164  {
1165  value_type value_v2 = data_alpha * data_v2[col * inc2 + start2];
1166  for (vcl_size_t row = 0; row < A_size1; ++row)
1167  data_A[viennacl::column_major::mem_index(row * A_inc1 + A_start1, col * A_inc2 + A_start2, A_internal_size1, A_internal_size2)] += data_v1[row * inc1 + start1] * value_v2;
1168  }
1169  }
1170  }
1171 
1172  } // namespace host_based
1173  } //namespace linalg
1174 } //namespace viennacl
1175 
1176 
1177 #endif
bool is_row_major(viennacl::row_major_tag)
Definition: common.hpp:73
std::size_t vcl_size_t
Definition: forwards.h:58
void matrix_column(const matrix_base< NumericT, F > &mat, unsigned int j, vector_base< NumericT > &vec)
Definition: matrix_operations.hpp:566
result_of::size_type< matrix_base< NumericT, F > >::type stride2(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:68
void scaled_rank_1_update(matrix_base< NumericT, F > &mat1, S1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, const vector_base< NumericT > &vec1, const vector_base< NumericT > &vec2)
The implementation of the operation mat += alpha * vec1 * vec2^T, i.e. a scaled rank 1 update...
Definition: matrix_operations.hpp:1120
Generic size and resize functionality for different vector and matrix types.
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t, vcl_size_t num_cols)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:256
void prod(A &a, B &b, C &c, vcl_size_t C_size1, vcl_size_t C_size2, vcl_size_t A_size2, NumericT alpha, NumericT beta)
Definition: matrix_operations.hpp:870
Extracts the underlying OpenCL start index handle from a vector, a matrix, an expression etc...
Various little tools used here and there in ViennaCL.
void matrix_row(const matrix_base< NumericT, F > &mat, unsigned int i, vector_base< NumericT > &vec)
Definition: matrix_operations.hpp:539
void element_op(matrix_base< NumericT, F > &A, matrix_expression< const matrix_base< NumericT, F >, const matrix_base< NumericT, F >, op_element_binary< OP > > const &proxy)
Implementation of the element-wise operations A = B .* C and A = B ./ C (using MATLAB syntax) ...
Definition: matrix_operations.hpp:604
vcl_size_t size1(MatrixType const &mat)
Generic routine for obtaining the number of rows of a matrix (ViennaCL, uBLAS, etc.)
Definition: size.hpp:216
void matrix_assign(matrix_base< NumericT, F > &mat, NumericT s, bool clear=false)
Definition: matrix_operations.hpp:395
Worker class for decomposing expression templates.
Definition: op_applier.hpp:43
result_of::size_type< viennacl::vector_base< T > >::type stride(viennacl::vector_base< T > const &s)
Definition: stride.hpp:46
Common base class for dense vectors, vector ranges, and vector slices.
Definition: forwards.h:205
void clear(VectorType &vec)
Generic routine for setting all entries of a vector to zero. This is the version for non-ViennaCL obj...
Definition: clear.hpp:57
This file provides the forward declarations for the main types used within ViennaCL.
result_of::size_type< T >::type start1(T const &obj)
Definition: start.hpp:64
Determines row and column increments for matrices and matrix proxies.
void prod_impl(const matrix_base< NumericT, F > &mat, const vector_base< NumericT > &vec, vector_base< NumericT > &result)
Carries out matrix-vector multiplication.
Definition: matrix_operations.hpp:737
result_of::size_type< MatrixType >::type size2(MatrixType const &mat)
Generic routine for obtaining the number of columns of a matrix (ViennaCL, uBLAS, etc...
Definition: size.hpp:245
void matrix_diag_from_vector(const vector_base< NumericT > &vec, int k, matrix_base< NumericT, F > &mat)
Definition: matrix_operations.hpp:466
A dense matrix class.
Definition: forwards.h:290
void matrix_diag_to_vector(const matrix_base< NumericT, F > &mat, int k, vector_base< NumericT > &vec)
Definition: matrix_operations.hpp:504
result_of::size_type< matrix_base< NumericT, F > >::type stride1(matrix_base< NumericT, F > const &s)
Definition: stride.hpp:57
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
vcl_size_t size(VectorType const &vec)
Generic routine for obtaining the size of a vector (ViennaCL, uBLAS, etc.)
Definition: size.hpp:144
result_of::size_type< T >::type start2(T const &obj)
Definition: start.hpp:83
Helper array for accessing a strided submatrix embedded in a larger matrix.
Definition: common.hpp:100
result_of::size_type< T >::type start(T const &obj)
Definition: start.hpp:43
static vcl_size_t mem_index(vcl_size_t i, vcl_size_t j, vcl_size_t num_rows, vcl_size_t)
Returns the memory offset for entry (i,j) of a dense matrix.
Definition: forwards.h:273
Expression template class for representing a tree of expressions which ultimately result in a matrix...
Definition: forwards.h:283
vcl_size_t internal_size2(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per column of a ViennaCL matrix...
Definition: size.hpp:287
Proxy classes for vectors.
vector_expression< const matrix_base< NumericT, F >, const unsigned int, op_row > row(const matrix_base< NumericT, F > &A, unsigned int i)
Definition: matrix.hpp:910
All the predicates used within ViennaCL. Checks for expressions to be vectors, etc.
Common routines for single-threaded or OpenMP-enabled execution on CPU.
The vector type with operator-overloads and proxy classes is defined here. Linear algebra operations ...
void am(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha)
Definition: matrix_operations.hpp:52
A tag class representing transposed matrices.
Definition: forwards.h:165
A tag class representing element-wise binary operations (like multiplication) on vectors or matrices...
Definition: forwards.h:86
void matrix_diagonal_assign(matrix_base< NumericT, F > &mat, NumericT s)
Definition: matrix_operations.hpp:440
vcl_size_t internal_size1(matrix_base< NumericT, F > const &mat)
Helper routine for obtaining the internal number of entries per row of a ViennaCL matrix...
Definition: size.hpp:279
Extracts the underlying OpenCL handle from a vector, a matrix, an expression etc. ...
Defines the action of certain unary and binary operators and its arguments (for host execution)...
A tag class representing element-wise unary operations (like sin()) on vectors or matrices...
Definition: forwards.h:90
Implementation of the ViennaCL scalar class.
void ambm_m(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:261
A collection of compile time type deductions.
void ambm(matrix_base< NumericT, F > &mat1, matrix_base< NumericT, F > const &mat2, ScalarType1 const &alpha, vcl_size_t, bool reciprocal_alpha, bool flip_sign_alpha, matrix_base< NumericT, F > const &mat3, ScalarType2 const &beta, vcl_size_t, bool reciprocal_beta, bool flip_sign_beta)
Definition: matrix_operations.hpp:132
Simple enable-if variant that uses the SFINAE pattern.