ViennaCL - The Vienna Computing Library  1.5.2
profiles.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_GENERATOR_PROFILES_HPP
2 #define VIENNACL_GENERATOR_PROFILES_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2014, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8  Portions of this software are copyright by UChicago Argonne, LLC.
9 
10  -----------------
11  ViennaCL - The Vienna Computing Library
12  -----------------
13 
14  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15 
16  (A list of authors and contributors can be found in the PDF manual)
17 
18  License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 
27 #include <map>
28 
29 #include "viennacl/ocl/device.hpp"
30 
32 
34 
40 
41 namespace viennacl{
42 
43  namespace generator{
44 
45  namespace profiles{
46 
47  typedef cl_uint vendor_id_type;
48  typedef cl_device_type device_type;
49 
50  typedef std::string device_name_type;
52 
54  template<class KeyType, class ValueType>
55  struct map_wrapper{
56  typedef std::map<KeyType,ValueType> map_type;
57  map_type map;
58  ValueType & operator[](KeyType const & key){ return map[key]; }
59  };
60 
62  struct expression_map : public map_wrapper<expression_key_type, profile_base_ptr>{ };
63 
65  struct device_name_map : public map_wrapper<device_name_type, expression_map>{ };
66 
68  struct device_architecture_map : public map_wrapper<viennacl::ocl::device_architecture_family, device_name_map>{ };
69 
71  struct device_type_map : public map_wrapper<device_type,device_architecture_map>{ };
72 
74  struct database_type : public map_wrapper<vendor_id_type, device_type_map>{ };
75 
77  inline void set_generation_default_to(database_type & map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, expression_key_type expression, std::string const & device_name){
78  map[vendor_id][CL_DEVICE_TYPE_GPU][family][""][expression] = map[vendor_id][CL_DEVICE_TYPE_GPU][family][device_name][expression];
79  }
80 
82  inline void set_all_generation_default_to(database_type & map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, std::string const & device_name){
83  set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_SAXPY_TYPE,4),device_name);
84  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_SAXPY_TYPE,4),device_name);
85  set_generation_default_to(map,vendor_id,family,std::make_pair(SCALAR_REDUCE_TYPE,4),device_name);
86  set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Nx_TYPE,4),device_name);
87  set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Tx_TYPE,4),device_name);
88  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NN_TYPE,4),device_name);
89  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TN_TYPE,4),device_name);
90  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NT_TYPE,4),device_name);
91  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TT_TYPE,4),device_name);
92 
93  set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_SAXPY_TYPE,8),device_name);
94  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_SAXPY_TYPE,8),device_name);
95  set_generation_default_to(map,vendor_id,family,std::make_pair(SCALAR_REDUCE_TYPE,8),device_name);
96  set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Nx_TYPE,8),device_name);
97  set_generation_default_to(map,vendor_id,family,std::make_pair(VECTOR_REDUCE_Tx_TYPE,8),device_name);
98  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NN_TYPE,8),device_name);
99  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TN_TYPE,8),device_name);
100  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_NT_TYPE,8),device_name);
101  set_generation_default_to(map,vendor_id,family,std::make_pair(MATRIX_PRODUCT_TT_TYPE,8),device_name);
102  }
103 
105  static database_type init_database(){
106  database_type map;
107 
108  /*---------------------------*/
109  /* GPU Defaults */
110  /*---------------------------*/
111  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,128,128,true));
112  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
113  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(1, 128, 128, true));
114  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32));
115  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32));
116  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
117  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
118  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
119  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
120 
121 
122  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(1,128,128,true));
123  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
124  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(1, 128, 128, true));
125  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32));
126  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1, 1, 256, 32));
127  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
128  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
129  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
130  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,32,8,4,4,4,1,0));
131 
132  /*---------------------------*/
133  /* CPU Defaults */
134  /*---------------------------*/
135  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(8,16,256,true));
136  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
137  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,8,512,true));
138  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,2,1,8));
139  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,16,8,8));
140  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
141  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
142  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
143  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
144 
145 
146  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(8,16,32,true));
147  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
148  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(8,8,512,true));
149  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,1,8));
150  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,8,16,16));
151  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
152  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
153  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
154  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_CPU][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
155 
156 
157  /*---------------------------*/
158  /* ACCELERATOR Defaults */
159  /*---------------------------*/
160  //same as CPU for now
161  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(8,16,256,true));
162  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
163  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,8,512,true));
164  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,2,1,8));
165  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,16,8,8));
166  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
167  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
168  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
169  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
170 
171 
172  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(8,16,32,true));
173  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
174  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(8,8,512,true));
175  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,1,8));
176  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,8,16,16));
177  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
178  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
179  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
180  map[viennacl::ocl::unknown_id][CL_DEVICE_TYPE_ACCELERATOR][viennacl::ocl::UNKNOWN][""][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,16,64,1,8,4,32,0,0));
181 
182 
183 
184  /*---------------------------*/
185  /* AMD */
186  /*---------------------------*/
187 
188  //Evergreen
189 
190  //Cypress
191  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,4,64,true));
192  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
193  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,128,128,true));
194  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
195  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,32,8,256));
196  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,4,64,16,4,4,8,1,0));
197  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(4,4,64,16,4,4,8,1,0));
198  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,64,16,4,4,8,1,0));
199  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,128,8,8,4,4,0,0));
200 
201  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true));
202  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
203  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,256,64,true));
204  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
205  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,64,4,256));
206  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,32,16,2,2,8,0,0));
207  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(2,4,64,32,4,2,2,0,0));
208  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(4,2,64,32,8,8,4,0,0));
209  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Evergreen]["Cypress"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(2,16,64,8,2,2,4,0,0));
210 
211  //Default
212  set_all_generation_default_to(map,viennacl::ocl::amd_id,viennacl::ocl::Evergreen,"Cypress");
213 
214 
215  //Southern Islands
216  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,4,64,true));
217  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
218  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(8,128,128,true));
219  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
220  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,32,8,256));
221  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(4,8,128,32,4,4,4,1,0));
222  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,64,16,4,2,8,1,0));
223  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,16,4,4,8,1,0));
224  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(4,16,64,16,4,4,8,1,0));
225 
226  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true));
227  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
228  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,256,64,true));
229  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
230  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,64,4,256));
231  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(2,4,128,64,4,2,2,1,0));
232  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(2,2,128,32,4,2,2,0,0));
233  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(2,8,128,32,2,2,2,1,0));
234  map[viennacl::ocl::amd_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::SouthernIslands]["Tahiti"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(2,8,128,32,2,2,2,1,0));
235 
236 
237  //Default
238  set_all_generation_default_to(map,viennacl::ocl::amd_id,viennacl::ocl::SouthernIslands,"Tahiti");
239 
240 
241  /*---------------------------*/
242  /* NVidia */
243  /*---------------------------*/
244 
245  //-----Fermi
246 
247  //Geforce GTX 470
248  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,1,256,true));
249  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
250  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(4,64,512,true));
251  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
252  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,64,4,64));
253  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,2,64,64,8,4,2,1,0));
254  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,16,4,4,8,0,0));
255  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
256  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
257 
258  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true));
259  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(2,16,16,16,16,true));
260  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,64,512,true));
261  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,128,1024));
262  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,16,32,1024));
263  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,64,32,2,2,8,1,0));
264  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,64,128,4,2,2,8,0,1));
265  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
266  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Fermi]["GeForce GTX 470"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
267 
268  //default
269  set_all_generation_default_to(map,viennacl::ocl::nvidia_id,viennacl::ocl::Fermi,"GeForce GTX 470");
270 
271  //Geforce GTX 680
272  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_SAXPY_TYPE,4)] = profile_base_ptr(new vector_saxpy(1,1,256,true));
273  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_SAXPY_TYPE,4)] = profile_base_ptr(new matrix_saxpy(1,16,16,16,16,true));
274  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(SCALAR_REDUCE_TYPE,4)] = profile_base_ptr(new scalar_reduction(4,64,512,true));
275  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,1,256,1024));
276  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,4)] = profile_base_ptr(new vector_reduction(1,64,4,64));
277  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,4)] = profile_base_ptr(new matrix_product(1,2,64,64,8,4,2,1,0));
278  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,4)] = profile_base_ptr(new matrix_product(1,8,32,16,4,4,8,0,0));
279  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
280  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,4)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
281 
282  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_SAXPY_TYPE,8)] = profile_base_ptr(new vector_saxpy(2,1,64,true));
283  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_SAXPY_TYPE,8)] = profile_base_ptr(new matrix_saxpy(2,16,16,16,16,true));
284  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(SCALAR_REDUCE_TYPE,8)] = profile_base_ptr(new scalar_reduction(2,64,512,true));
285  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Nx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,1,128,1024));
286  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(VECTOR_REDUCE_Tx_TYPE,8)] = profile_base_ptr(new vector_reduction(1,16,32,1024));
287  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NN_TYPE,8)] = profile_base_ptr(new matrix_product(1,8,64,32,2,2,8,1,0));
288  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TN_TYPE,8)] = profile_base_ptr(new matrix_product(1,64,128,4,2,2,8,0,1));
289  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_NT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,128,32,4,8,4,1,0));
290  map[viennacl::ocl::nvidia_id][CL_DEVICE_TYPE_GPU][viennacl::ocl::Kepler]["GeForce GTX 680"][std::make_pair(MATRIX_PRODUCT_TT_TYPE,8)] = profile_base_ptr(new matrix_product(1,4,32,16,8,4,8,0,0));
291 
292  //default
293  set_all_generation_default_to(map,viennacl::ocl::nvidia_id,viennacl::ocl::Kepler,"GeForce GTX 680");
294 
295 
296  return map;
297  }
298  static database_type database = init_database();
299 
301  static profile_base * handle_failure(viennacl::ocl::device const & device, expression_descriptor const & descriptor, tools::shared_ptr<profile_base> const & profile){
302  //Returns default if the profile is invalid
303  if(profile->is_invalid(device, descriptor.scalartype_size))
304  return at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, device.type()).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, descriptor.make_key()).get();
305  return profile.get();
306  }
307 
309  static profile_base * get(viennacl::ocl::device const & device, expression_descriptor const & descriptor){
310  device_type dev_type = device.type();
311  vendor_id_type vendor_id = device.vendor_id();
312  viennacl::ocl::device_architecture_family device_architecture = device.architecture_family();
313  std::string const & device_name = device.name();
314  expression_key_type expression_key = descriptor.make_key();
315 
316  //std::cout << "Looking up vendor ID..." << std::endl;
317  /*-Vendor ID-*/
318  database_type::map_type::iterator vendor_it = database.map.find(vendor_id);
319  //Vendor not recognized => global default:
320  if(vendor_it==database.map.end())
321  return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key));
322 
323  /*-Device Type-*/
324  //std::cout << "Looking up device type..." << std::endl;
325  device_type_map::map_type::iterator device_type_it = vendor_it->second.map.find(dev_type);
326  //Device type not recognized for this vendor => global default
327  if(device_type_it==vendor_it->second.map.end())
328  return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key));
329 
330  /*-Device Architecture-*/
331  //std::cout << "Looking up device architecture..." << std::endl;
332  device_architecture_map::map_type::iterator architecture_it = device_type_it->second.map.find(device_architecture);
333  if(architecture_it==device_type_it->second.map.end())
334  return handle_failure(device, descriptor, at(at(at(at(at(database.map, viennacl::ocl::unknown_id).map, dev_type).map, viennacl::ocl::UNKNOWN).map, std::string("")).map, expression_key));
335 
336  /*-Device Name-*/
337  //std::cout << "Looking up device name..." << std::endl;
338  device_name_map::map_type::iterator device_name_it = architecture_it->second.map.find(device_name);
339  //Name not found => Vendor default
340  if(device_name_it==architecture_it->second.map.end())
341  return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key));
342 
343  //std::cout << "Looking up expression name.." << std::endl;
344  /*-Expression-*/
345  expression_map::map_type::iterator expression_it = device_name_it->second.map.find(expression_key);
346  //Expression not found => Vendor default
347  if(expression_it==device_name_it->second.map.end())
348  return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key));
349 
350  //std::cout << "Device found in the database! Getting profile..." << std::endl;
351  //Everything okay. Return specific profile//
352  return handle_failure(device, descriptor, at(at(at(at(at(database.map, vendor_id).map, dev_type).map, device_architecture).map, std::string("")).map, expression_key));
353  }
354 
355  }
356 
357  }
358 
359 }
360 
361 
362 #endif
363 
Represents device_type->device_arch in the map hierarchy vendor->device_type->device_arch->device->ex...
Definition: profiles.hpp:71
Definition: device_utils.hpp:54
Definition: device_utils.hpp:55
Represents an OpenCL device within ViennaCL.
Kernel generation class for matrix-matrix products.
Definition: matrix_product.hpp:44
OpenCL kernel generation class for matrix expressions of AXPY type, i.e. A = alpha * B + beta * C...
Definition: saxpy.hpp:117
void set_generation_default_to(database_type &map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, expression_key_type expression, std::string const &device_name)
Set a default of a generation to a particular device for a particular operation.
Definition: profiles.hpp:77
void set_all_generation_default_to(database_type &map, vendor_id_type vendor_id, viennacl::ocl::device_architecture_family family, std::string const &device_name)
Set a default of a generation to a particular device for all operations.
Definition: profiles.hpp:82
cl_device_type device_type
Definition: profiles.hpp:48
cl_uint vendor_id_type
Definition: profiles.hpp:47
viennacl::tools::shared_ptr< profile_base > profile_base_ptr
Definition: profiles.hpp:51
T * get() const
Definition: shared_ptr.hpp:134
Definition: device_utils.hpp:62
Represents expression->profile in the map hierarchy vendor->device_type->device_arch->device->express...
Definition: profiles.hpp:62
A class representing a compute device (e.g. a GPU)
Definition: device.hpp:49
ValueT const & at(std::map< KeyT, ValueT > const &map, KeyT const &key)
Emulation of C++11's .at() member for std::map<>
Definition: forwards.h:97
device_architecture_family
Definition: device_utils.hpp:51
Implementation of a shared pointer class (cf. std::shared_ptr, boost::shared_ptr). Will be used until C++11 is widely available.
Main namespace in ViennaCL. Holds all the basic types such as vector, matrix, etc. and defines operations upon them.
Definition: cpu_ram.hpp:29
OpenCL kernel generation class for vector expressions of AXPY type, i.e. x = alpha * y + beta * z...
Definition: saxpy.hpp:44
Definition: device_utils.hpp:58
Base classes for the profiles.
Helper struct for mapping a std::map. Used to avoids type length explosion when u...
Definition: profiles.hpp:55
Definition: device_utils.hpp:60
std::map< KeyType, ValueType > map_type
Definition: profiles.hpp:56
Represents device->expression in the map hierarchy vendor->device_type->device_arch->device->expressi...
Definition: profiles.hpp:65
A shared pointer class similar to boost::shared_ptr. Reimplemented in order to avoid a Boost-dependen...
Definition: shared_ptr.hpp:83
Forwards declaration.
Kernel template for the matrix product operation.
Kernel template for the vector reduction operation.
OpenCL kernel template for reductions resulting in a vector. Example: Computing the row norms of a ma...
Definition: vector_reduction.hpp:44
Kernel template for the saxpy-like operation.
std::pair< expression_type, vcl_size_t > expression_key_type
Definition: forwards.h:81
cl_device_type type() const
The OpenCL device type.
Definition: device.hpp:893
map_type map
Definition: profiles.hpp:57
Represents vendor->device_type in the map hierarchy vendor->device_type->device_arch->device->express...
Definition: profiles.hpp:74
ValueType & operator[](KeyType const &key)
Definition: profiles.hpp:58
Represents device_arch->device in the map hierarchy vendor->device_type->device_arch->device->express...
Definition: profiles.hpp:68
std::string device_name_type
Definition: profiles.hpp:50
OpenCL kernel generation template for scalar reduction operations such as s = norm_2(x).
Definition: scalar_reduction.hpp:45
Kernel template for the scalar reduction operation.