1 #ifndef VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
2 #define VIENNACL_LINALG_OPENCL_KERNELS_VECTOR_HPP
42 template <
typename StringType>
45 source.append(
" for (unsigned int i = get_global_id(0); i < size1.z; i += get_global_size(0)) \n");
48 source.append(
" vec1[i*size1.y+size1.x] "); source.append(cfg.
assign_op); source.append(
" vec2[i*size2.y+size2.x] ");
50 source.append(
"* alpha ");
52 source.append(
"/ alpha ");
55 source.append(
"+ vec3[i*size3.y+size3.x] ");
57 source.append(
"* beta");
59 source.append(
"/ beta");
64 source.append(
" vec1[i] "); source.append(cfg.
assign_op); source.append(
" vec2[i] ");
66 source.append(
"* alpha ");
68 source.append(
"/ alpha ");
71 source.append(
"+ vec3[i] ");
73 source.append(
"* beta");
75 source.append(
"/ beta");
78 source.append(
"; \n");
81 template <
typename StringType>
84 source.append(
"__kernel void av");
91 source.append(
"_cpu");
93 source.append(
"_gpu");
96 source.append(
"_cpu");
98 source.append(
"_gpu");
99 source.append(
"( \n");
100 source.append(
" __global "); source.append(numeric_string); source.append(
" * vec1, \n");
101 source.append(
" uint4 size1, \n");
102 source.append(
" \n");
105 source.append(
" "); source.append(numeric_string); source.append(
" fac2, \n");
109 source.append(
" __global "); source.append(numeric_string); source.append(
" * fac2, \n");
111 source.append(
" unsigned int options2, \n");
112 source.append(
" __global const "); source.append(numeric_string); source.append(
" * vec2, \n");
113 source.append(
" uint4 size2");
117 source.append(
", \n\n");
120 source.append(
" "); source.append(numeric_string); source.append(
" fac3, \n");
124 source.append(
" __global "); source.append(numeric_string); source.append(
" * fac3, \n");
126 source.append(
" unsigned int options3, \n");
127 source.append(
" __global const "); source.append(numeric_string); source.append(
" * vec3, \n");
128 source.append(
" uint4 size3 \n");
130 source.append(
") { \n");
134 source.append(
" "); source.append(numeric_string); source.append(
" alpha = fac2; \n");
138 source.append(
" "); source.append(numeric_string); source.append(
" alpha = fac2[0]; \n");
140 source.append(
" if (options2 & (1 << 0)) \n");
141 source.append(
" alpha = -alpha; \n");
142 source.append(
" \n");
146 source.append(
" "); source.append(numeric_string); source.append(
" beta = fac3; \n");
150 source.append(
" "); source.append(numeric_string); source.append(
" beta = fac3[0]; \n");
154 source.append(
" if (options3 & (1 << 0)) \n");
155 source.append(
" beta = -beta; \n");
156 source.append(
" \n");
158 source.append(
" if (options2 & (1 << 1)) { \n");
161 source.append(
" if (options3 & (1 << 1)) {\n");
163 source.append(
" } else {\n");
165 source.append(
" } \n");
169 source.append(
" } else { \n");
172 source.append(
" if (options3 & (1 << 1)) {\n");
174 source.append(
" } else {\n");
176 source.append(
" } \n");
180 source.append(
" } \n");
181 source.append(
"} \n");
184 template <
typename StringType>
210 template <
typename StringType>
213 source.append(
"__kernel void plane_rotation( \n");
214 source.append(
" __global "); source.append(numeric_string); source.append(
" * vec1, \n");
215 source.append(
" unsigned int start1, \n");
216 source.append(
" unsigned int inc1, \n");
217 source.append(
" unsigned int size1, \n");
218 source.append(
" __global "); source.append(numeric_string); source.append(
" * vec2, \n");
219 source.append(
" unsigned int start2, \n");
220 source.append(
" unsigned int inc2, \n");
221 source.append(
" unsigned int size2, \n");
222 source.append(
" "); source.append(numeric_string); source.append(
" alpha, \n");
223 source.append(
" "); source.append(numeric_string); source.append(
" beta) \n");
224 source.append(
"{ \n");
225 source.append(
" "); source.append(numeric_string); source.append(
" tmp1 = 0; \n");
226 source.append(
" "); source.append(numeric_string); source.append(
" tmp2 = 0; \n");
227 source.append(
" \n");
228 source.append(
" for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0)) \n");
229 source.append(
" { \n");
230 source.append(
" tmp1 = vec1[i*inc1+start1]; \n");
231 source.append(
" tmp2 = vec2[i*inc2+start2]; \n");
232 source.append(
" \n");
233 source.append(
" vec1[i*inc1+start1] = alpha * tmp1 + beta * tmp2; \n");
234 source.append(
" vec2[i*inc2+start2] = alpha * tmp2 - beta * tmp1; \n");
235 source.append(
" } \n");
236 source.append(
" \n");
237 source.append(
"} \n");
240 template <
typename StringType>
243 source.append(
"__kernel void swap( \n");
244 source.append(
" __global "); source.append(numeric_string); source.append(
" * vec1, \n");
245 source.append(
" unsigned int start1, \n");
246 source.append(
" unsigned int inc1, \n");
247 source.append(
" unsigned int size1, \n");
248 source.append(
" __global "); source.append(numeric_string); source.append(
" * vec2, \n");
249 source.append(
" unsigned int start2, \n");
250 source.append(
" unsigned int inc2, \n");
251 source.append(
" unsigned int size2 \n");
252 source.append(
" ) \n");
253 source.append(
"{ \n");
254 source.append(
" "); source.append(numeric_string); source.append(
" tmp; \n");
255 source.append(
" for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0)) \n");
256 source.append(
" { \n");
257 source.append(
" tmp = vec2[i*inc2+start2]; \n");
258 source.append(
" vec2[i*inc2+start2] = vec1[i*inc1+start1]; \n");
259 source.append(
" vec1[i*inc1+start1] = tmp; \n");
260 source.append(
" } \n");
261 source.append(
"} \n");
264 template <
typename StringType>
267 source.append(
"__kernel void assign_cpu( \n");
268 source.append(
" __global "); source.append(numeric_string); source.append(
" * vec1, \n");
269 source.append(
" unsigned int start1, \n");
270 source.append(
" unsigned int inc1, \n");
271 source.append(
" unsigned int size1, \n");
272 source.append(
" unsigned int internal_size1, \n");
273 source.append(
" "); source.append(numeric_string); source.append(
" alpha) \n");
274 source.append(
"{ \n");
275 source.append(
" for (unsigned int i = get_global_id(0); i < internal_size1; i += get_global_size(0)) \n");
276 source.append(
" vec1[i*inc1+start1] = (i < size1) ? alpha : 0; \n");
277 source.append(
"} \n");
281 template <
typename StringType>
284 std::stringstream ss;
286 std::string vector_num_string = ss.str();
288 source.append(
"__kernel void inner_prod"); source.append(vector_num_string); source.append(
"( \n");
289 source.append(
" __global const "); source.append(numeric_string); source.append(
" * x, \n");
290 source.append(
" uint4 params_x, \n");
295 source.append(
" __global const "); source.append(numeric_string); source.append(
" * y"); source.append(ss.str()); source.append(
", \n");
296 source.append(
" uint4 params_y"); source.append(ss.str()); source.append(
", \n");
298 source.append(
" __local "); source.append(numeric_string); source.append(
" * tmp_buffer, \n");
299 source.append(
" __global "); source.append(numeric_string); source.append(
" * group_buffer) \n");
300 source.append(
"{ \n");
301 source.append(
" unsigned int entries_per_thread = (params_x.z - 1) / get_global_size(0) + 1; \n");
302 source.append(
" unsigned int vec_start_index = get_group_id(0) * get_local_size(0) * entries_per_thread; \n");
303 source.append(
" unsigned int vec_stop_index = min((unsigned int)((get_group_id(0) + 1) * get_local_size(0) * entries_per_thread), params_x.z); \n");
310 source.append(
" "); source.append(numeric_string); source.append(
" tmp"); source.append(ss.str()); source.append(
" = 0; \n");
312 source.append(
" for (unsigned int i = vec_start_index + get_local_id(0); i < vec_stop_index; i += get_local_size(0)) { \n");
313 source.append(
" "); source.append(numeric_string); source.append(
" val_x = x[i*params_x.y + params_x.x]; \n");
318 source.append(
" tmp"); source.append(ss.str()); source.append(
" += val_x * y"); source.append(ss.str()); source.append(
"[i * params_y"); source.append(ss.str()); source.append(
".y + params_y"); source.append(ss.str()); source.append(
".x]; \n");
320 source.append(
" } \n");
325 source.append(
" tmp_buffer[get_local_id(0) + "); source.append(ss.str()); source.append(
" * get_local_size(0)] = tmp"); source.append(ss.str()); source.append(
"; \n");
329 source.append(
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2) \n");
330 source.append(
" { \n");
331 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
332 source.append(
" if (get_local_id(0) < stride) { \n");
337 source.append(
" tmp_buffer[get_local_id(0) + "); source.append(ss.str()); source.append(
" * get_local_size(0)] += tmp_buffer[get_local_id(0) + "); source.append(ss.str()); source.append(
" * get_local_size(0) + stride]; \n");
339 source.append(
" } \n");
340 source.append(
" } \n");
341 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
343 source.append(
" if (get_local_id(0) == 0) { \n");
348 source.append(
" group_buffer[get_group_id(0) + "); source.append(ss.str()); source.append(
" * get_num_groups(0)] = tmp_buffer["); source.append(ss.str()); source.append(
" * get_local_size(0)]; \n");
350 source.append(
" } \n");
351 source.append(
"} \n");
355 template <
typename StringType>
358 bool is_float_or_double = (numeric_string ==
"float" || numeric_string ==
"double");
360 source.append(numeric_string); source.append(
" impl_norm( \n");
361 source.append(
" __global const "); source.append(numeric_string); source.append(
" * vec, \n");
362 source.append(
" unsigned int start1, \n");
363 source.append(
" unsigned int inc1, \n");
364 source.append(
" unsigned int size1, \n");
365 source.append(
" unsigned int norm_selector, \n");
366 source.append(
" __local "); source.append(numeric_string); source.append(
" * tmp_buffer) \n");
367 source.append(
"{ \n");
368 source.append(
" "); source.append(numeric_string); source.append(
" tmp = 0; \n");
369 source.append(
" if (norm_selector == 1) \n");
370 source.append(
" { \n");
371 source.append(
" for (unsigned int i = get_local_id(0); i < size1; i += get_local_size(0)) \n");
372 if (is_float_or_double)
373 source.append(
" tmp += fabs(vec[i*inc1 + start1]); \n");
375 source.append(
" tmp += abs(vec[i*inc1 + start1]); \n");
376 source.append(
" } \n");
377 source.append(
" else if (norm_selector == 2) \n");
378 source.append(
" { \n");
379 source.append(
" "); source.append(numeric_string); source.append(
" vec_entry = 0; \n");
380 source.append(
" for (unsigned int i = get_local_id(0); i < size1; i += get_local_size(0)) \n");
381 source.append(
" { \n");
382 source.append(
" vec_entry = vec[i*inc1 + start1]; \n");
383 source.append(
" tmp += vec_entry * vec_entry; \n");
384 source.append(
" } \n");
385 source.append(
" } \n");
386 source.append(
" else if (norm_selector == 0) \n");
387 source.append(
" { \n");
388 source.append(
" for (unsigned int i = get_local_id(0); i < size1; i += get_local_size(0)) \n");
389 if (is_float_or_double)
390 source.append(
" tmp = fmax(fabs(vec[i*inc1 + start1]), tmp); \n");
393 source.append(
" tmp = max(("); source.append(numeric_string); source.append(
")abs(vec[i*inc1 + start1]), tmp); \n");
395 source.append(
" } \n");
397 source.append(
" tmp_buffer[get_local_id(0)] = tmp; \n");
399 source.append(
" if (norm_selector > 0) \n");
400 source.append(
" { \n");
401 source.append(
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2) \n");
402 source.append(
" { \n");
403 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
404 source.append(
" if (get_local_id(0) < stride) \n");
405 source.append(
" tmp_buffer[get_local_id(0)] += tmp_buffer[get_local_id(0)+stride]; \n");
406 source.append(
" } \n");
407 source.append(
" return tmp_buffer[0]; \n");
408 source.append(
" } \n");
411 source.append(
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2) \n");
412 source.append(
" { \n");
413 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
414 source.append(
" if (get_local_id(0) < stride) \n");
415 if (is_float_or_double)
416 source.append(
" tmp_buffer[get_local_id(0)] = fmax(tmp_buffer[get_local_id(0)], tmp_buffer[get_local_id(0)+stride]); \n");
418 source.append(
" tmp_buffer[get_local_id(0)] = max(tmp_buffer[get_local_id(0)], tmp_buffer[get_local_id(0)+stride]); \n");
419 source.append(
" } \n");
421 source.append(
" return tmp_buffer[0]; \n");
422 source.append(
"}; \n");
424 source.append(
"__kernel void norm( \n");
425 source.append(
" __global const "); source.append(numeric_string); source.append(
" * vec, \n");
426 source.append(
" unsigned int start1, \n");
427 source.append(
" unsigned int inc1, \n");
428 source.append(
" unsigned int size1, \n");
429 source.append(
" unsigned int norm_selector, \n");
430 source.append(
" __local "); source.append(numeric_string); source.append(
" * tmp_buffer, \n");
431 source.append(
" __global "); source.append(numeric_string); source.append(
" * group_buffer) \n");
432 source.append(
"{ \n");
433 source.append(
" "); source.append(numeric_string); source.append(
" tmp = impl_norm(vec, \n");
434 source.append(
" ( get_group_id(0) * size1) / get_num_groups(0) * inc1 + start1, \n");
435 source.append(
" inc1, \n");
436 source.append(
" ( (1 + get_group_id(0)) * size1) / get_num_groups(0) \n");
437 source.append(
" - ( get_group_id(0) * size1) / get_num_groups(0), \n");
438 source.append(
" norm_selector, \n");
439 source.append(
" tmp_buffer); \n");
441 source.append(
" if (get_local_id(0) == 0) \n");
442 source.append(
" group_buffer[get_group_id(0)] = tmp; \n");
443 source.append(
"} \n");
447 template <
typename StringType>
451 source.append(
"__kernel void sum_inner_prod( \n");
452 source.append(
" __global "); source.append(numeric_string); source.append(
" * vec1, \n");
453 source.append(
" __local "); source.append(numeric_string); source.append(
" * tmp_buffer, \n");
454 source.append(
" __global "); source.append(numeric_string); source.append(
" * result, \n");
455 source.append(
" unsigned int start_result, \n");
456 source.append(
" unsigned int inc_result) \n");
457 source.append(
"{ \n");
458 source.append(
" tmp_buffer[get_local_id(0)] = vec1[get_global_id(0)]; \n");
460 source.append(
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2) \n");
461 source.append(
" { \n");
462 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
463 source.append(
" if (get_local_id(0) < stride) \n");
464 source.append(
" tmp_buffer[get_local_id(0)] += tmp_buffer[get_local_id(0) + stride]; \n");
465 source.append(
" } \n");
466 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
468 source.append(
" if (get_local_id(0) == 0) \n");
469 source.append(
" result[start_result + inc_result * get_group_id(0)] = tmp_buffer[0]; \n");
470 source.append(
"} \n");
474 template <
typename StringType>
475 void generate_sum(StringType & source, std::string
const & numeric_string)
478 source.append(
"__kernel void sum( \n");
479 source.append(
" __global "); source.append(numeric_string); source.append(
" * vec1, \n");
480 source.append(
" unsigned int start1, \n");
481 source.append(
" unsigned int inc1, \n");
482 source.append(
" unsigned int size1, \n");
483 source.append(
" unsigned int option, \n");
484 source.append(
" __local "); source.append(numeric_string); source.append(
" * tmp_buffer, \n");
485 source.append(
" __global "); source.append(numeric_string); source.append(
" * result) \n");
486 source.append(
"{ \n");
487 source.append(
" "); source.append(numeric_string); source.append(
" thread_sum = 0; \n");
488 source.append(
" "); source.append(numeric_string); source.append(
" tmp = 0; \n");
489 source.append(
" for (unsigned int i = get_local_id(0); i<size1; i += get_local_size(0)) \n");
490 source.append(
" { \n");
491 source.append(
" if (option > 0) \n");
492 source.append(
" thread_sum += vec1[i*inc1+start1]; \n");
493 source.append(
" else \n");
494 source.append(
" { \n");
495 source.append(
" tmp = vec1[i*inc1+start1]; \n");
496 source.append(
" tmp = (tmp < 0) ? -tmp : tmp; \n");
497 source.append(
" thread_sum = (thread_sum > tmp) ? thread_sum : tmp; \n");
498 source.append(
" } \n");
499 source.append(
" } \n");
501 source.append(
" tmp_buffer[get_local_id(0)] = thread_sum; \n");
503 source.append(
" for (unsigned int stride = get_local_size(0)/2; stride > 0; stride /= 2) \n");
504 source.append(
" { \n");
505 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
506 source.append(
" if (get_local_id(0) < stride) \n");
507 source.append(
" { \n");
508 source.append(
" if (option > 0) \n");
509 source.append(
" tmp_buffer[get_local_id(0)] += tmp_buffer[get_local_id(0) + stride]; \n");
510 source.append(
" else \n");
511 source.append(
" tmp_buffer[get_local_id(0)] = (tmp_buffer[get_local_id(0)] > tmp_buffer[get_local_id(0) + stride]) ? tmp_buffer[get_local_id(0)] : tmp_buffer[get_local_id(0) + stride]; \n");
512 source.append(
" } \n");
513 source.append(
" } \n");
514 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
516 source.append(
" if (get_global_id(0) == 0) \n");
517 source.append(
" { \n");
518 if (numeric_string ==
"float" || numeric_string ==
"double")
520 source.append(
" if (option == 2) \n");
521 source.append(
" *result = sqrt(tmp_buffer[0]); \n");
522 source.append(
" else \n");
524 source.append(
" *result = tmp_buffer[0]; \n");
525 source.append(
" } \n");
526 source.append(
"} \n");
530 template <
typename StringType>
534 source.append(
"unsigned int index_norm_inf_impl( \n");
535 source.append(
" __global const "); source.append(numeric_string); source.append(
" * vec, \n");
536 source.append(
" unsigned int start1, \n");
537 source.append(
" unsigned int inc1, \n");
538 source.append(
" unsigned int size1, \n");
539 source.append(
" __local "); source.append(numeric_string); source.append(
" * entry_buffer, \n");
540 source.append(
" __local unsigned int * index_buffer) \n");
541 source.append(
"{ \n");
543 source.append(
" "); source.append(numeric_string); source.append(
" cur_max = 0; \n");
544 source.append(
" "); source.append(numeric_string); source.append(
" tmp; \n");
545 source.append(
" for (unsigned int i = get_global_id(0); i < size1; i += get_global_size(0)) \n");
546 source.append(
" { \n");
547 if (numeric_string ==
"float" || numeric_string ==
"double")
548 source.append(
" tmp = fabs(vec[i*inc1+start1]); \n");
550 source.append(
" tmp = abs(vec[i*inc1+start1]); \n");
551 source.append(
" if (cur_max < tmp) \n");
552 source.append(
" { \n");
553 source.append(
" entry_buffer[get_global_id(0)] = tmp; \n");
554 source.append(
" index_buffer[get_global_id(0)] = i; \n");
555 source.append(
" cur_max = tmp; \n");
556 source.append(
" } \n");
557 source.append(
" } \n");
560 source.append(
" for (unsigned int stride = get_global_size(0)/2; stride > 0; stride /= 2) \n");
561 source.append(
" { \n");
562 source.append(
" barrier(CLK_LOCAL_MEM_FENCE); \n");
563 source.append(
" if (get_global_id(0) < stride) \n");
564 source.append(
" { \n");
566 source.append(
" if (entry_buffer[get_global_id(0)] < entry_buffer[get_global_id(0)+stride]) \n");
567 source.append(
" { \n");
568 source.append(
" index_buffer[get_global_id(0)] = index_buffer[get_global_id(0)+stride]; \n");
569 source.append(
" entry_buffer[get_global_id(0)] = entry_buffer[get_global_id(0)+stride]; \n");
570 source.append(
" } \n");
571 source.append(
" } \n");
572 source.append(
" } \n");
573 source.append(
" \n");
574 source.append(
" return index_buffer[0]; \n");
575 source.append(
"} \n");
577 source.append(
"__kernel void index_norm_inf( \n");
578 source.append(
" __global "); source.append(numeric_string); source.append(
" * vec, \n");
579 source.append(
" unsigned int start1, \n");
580 source.append(
" unsigned int inc1, \n");
581 source.append(
" unsigned int size1, \n");
582 source.append(
" __local "); source.append(numeric_string); source.append(
" * entry_buffer, \n");
583 source.append(
" __local unsigned int * index_buffer, \n");
584 source.append(
" __global unsigned int * result) \n");
585 source.append(
"{ \n");
586 source.append(
" entry_buffer[get_global_id(0)] = 0; \n");
587 source.append(
" index_buffer[get_global_id(0)] = 0; \n");
588 source.append(
" unsigned int tmp = index_norm_inf_impl(vec, start1, inc1, size1, entry_buffer, index_buffer); \n");
589 source.append(
" if (get_global_id(0) == 0) *result = tmp; \n");
590 source.append(
"} \n");
599 template <
class TYPE>
612 static std::map<cl_context, bool> init_done;
616 source.reserve(8192);
618 viennacl::ocl::append_double_precision_pragma<TYPE>(ctx, source);
634 #ifdef VIENNACL_BUILD_INFO
635 std::cout <<
"Creating program " << prog_name << std::endl;
637 ctx.add_program(source, prog_name);
638 init_done[ctx.handle().get()] =
true;
645 template <
class TYPE>
658 static std::map<cl_context, bool> init_done;
662 source.reserve(8192);
664 viennacl::ocl::append_double_precision_pragma<TYPE>(ctx, source);
674 #ifdef VIENNACL_BUILD_INFO
675 std::cout <<
"Creating program " << prog_name << std::endl;
677 ctx.add_program(source, prog_name);
678 init_done[ctx.handle().get()] =
true;
avbv_scalar_type a
Definition: vector.hpp:37
std::size_t vcl_size_t
Definition: forwards.h:58
void generate_inner_prod(StringType &source, std::string const &numeric_string, vcl_size_t vector_num)
Definition: vector.hpp:282
void generate_index_norm_inf(StringType &source, std::string const &numeric_string)
Definition: vector.hpp:531
avbv_scalar_type
Enumeration for the scalar type in avbv-like operations.
Definition: vector.hpp:23
void generate_vector_swap(StringType &source, std::string const &numeric_string)
Definition: vector.hpp:241
Manages an OpenCL context and provides the respective convenience functions for creating buffers...
Definition: context.hpp:51
static std::string program_name()
Definition: vector.hpp:602
Main kernel class for generating OpenCL kernels for multiple inner products on/with viennacl::vector<...
Definition: vector.hpp:646
void generate_assign_cpu(StringType &source, std::string const &numeric_string, bool is_row_major)
Definition: matrix.hpp:257
Provides OpenCL-related utilities.
avbv_config()
Definition: vector.hpp:33
void generate_plane_rotation(StringType &source, std::string const &numeric_string)
Definition: vector.hpp:211
std::string assign_op
Definition: vector.hpp:36
const OCL_TYPE & get() const
Definition: handle.hpp:189
const viennacl::ocl::handle< cl_context > & handle() const
Returns the context handle.
Definition: context.hpp:476
avbv_scalar_type b
Definition: vector.hpp:38
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
void generate_avbv(StringType &source, std::string const &numeric_string)
Definition: vector.hpp:185
Definition: vector.hpp:25
void generate_sum(StringType &source, std::string const &numeric_string)
Definition: vector.hpp:475
Definition: vector.hpp:26
bool with_stride_and_range
Definition: vector.hpp:35
Configuration struct for generating OpenCL kernels for linear combinations of vectors.
Definition: vector.hpp:31
static void apply(viennacl::ocl::context const &)
Definition: utils.hpp:40
void generate_avbv_impl(StringType &source, std::string const &numeric_string, avbv_config const &cfg)
Definition: vector.hpp:82
void generate_avbv_impl2(StringType &source, std::string const &, avbv_config const &cfg, bool mult_alpha, bool mult_beta)
Definition: vector.hpp:43
void generate_inner_prod_sum(StringType &source, std::string const &numeric_string)
Definition: vector.hpp:448
Representation of an OpenCL kernel in ViennaCL.
void generate_norm(StringType &source, std::string const &numeric_string)
Definition: vector.hpp:356
static std::string program_name()
Definition: vector.hpp:648
static void init(viennacl::ocl::context &ctx)
Definition: vector.hpp:607
Helper class for converting a type to its string representation.
Definition: utils.hpp:57
Definition: vector.hpp:27
static void init(viennacl::ocl::context &ctx)
Definition: vector.hpp:653
Main kernel class for generating OpenCL kernels for operations on/with viennacl::vector<> without inv...
Definition: vector.hpp:600