blitz Version 0.9
|
00001 /*************************************************************************** 00002 * blitz/tuning.h Platform-specific code tuning 00003 * 00004 * $Id: tuning.h,v 1.4 2003/01/14 11:29:18 patricg Exp $ 00005 * 00006 * Copyright (C) 1997-2001 Todd Veldhuizen <tveldhui@oonumerics.org> 00007 * 00008 * This program is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU General Public License 00010 * as published by the Free Software Foundation; either version 2 00011 * of the License, or (at your option) any later version. 00012 * 00013 * This program is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 * GNU General Public License for more details. 00017 * 00018 * Suggestions: blitz-dev@oonumerics.org 00019 * Bugs: blitz-bugs@oonumerics.org 00020 * 00021 * For more information, please see the Blitz++ Home Page: 00022 * http://oonumerics.org/blitz/ 00023 * 00024 ***************************************************************************/ 00025 00026 #ifndef BZ_TUNING_H 00027 #define BZ_TUNING_H 00028 00029 // These estimates should be conservative (i.e. underestimate the 00030 // cache sizes). 00031 #define BZ_L1_CACHE_ESTIMATED_SIZE 8192 00032 #define BZ_L2_CACHE_ESTIMATED_SIZE 65536 00033 00034 00035 #undef BZ_PARTIAL_LOOP_UNROLL 00036 #define BZ_PASS_EXPR_BY_VALUE 00037 #undef BZ_PTR_INC_FASTER_THAN_INDIRECTION 00038 #define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR 00039 #undef BZ_KCC_COPY_PROPAGATION_KLUDGE 00040 #undef BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS 00041 #undef BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE 00042 #define BZ_INLINE_GROUP1 00043 #define BZ_INLINE_GROUP2 00044 #define BZ_COLLAPSE_LOOPS 00045 #define BZ_USE_FAST_READ_ARRAY_EXPR 00046 #define BZ_ARRAY_EXPR_USE_COMMON_STRIDE 00047 #undef BZ_ARRAY_SPACE_FILLING_TRAVERSAL 00048 #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL 00049 #undef BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS 00050 #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL 00051 #define BZ_ARRAY_2D_STENCIL_TILING 00052 #define BZ_ARRAY_2D_STENCIL_TILE_SIZE 128 00053 #undef BZ_INTERLACE_ARRAYS 00054 #undef BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY 00055 #define BZ_FAST_COMPILE 00056 00057 00058 #ifndef BZ_DISABLE_NEW_ET 00059 #define BZ_NEW_EXPRESSION_TEMPLATES 00060 #endif 00061 00062 #ifdef BZ_FAST_COMPILE 00063 #define BZ_ETPARMS_CONSTREF 00064 #define BZ_NO_INLINE_ET 00065 #endif 00066 00067 /* 00068 * Platform-specific tuning 00069 */ 00070 00071 #ifdef _CRAYT3E 00072 // The backend compiler on the T3E does a better job of 00073 // loop unrolling. 00074 #undef BZ_PARTIAL_LOOP_UNROLL 00075 #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL 00076 #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL 00077 #endif 00078 00079 #ifdef __GNUC__ 00080 // The egcs compiler does a good job of loop unrolling, if 00081 // -funroll-loops is used. 00082 #undef BZ_PARTIAL_LOOP_UNROLL 00083 #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL 00084 #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL 00085 #endif 00086 00087 #ifdef BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE 00088 #undef BZ_KCC_COPY_PROPAGATION_KLUDGE 00089 #endif 00090 00091 #ifdef BZ_INLINE_GROUP1 00092 #define _bz_inline1 inline 00093 #else 00094 #define _bz_inline1 00095 #endif 00096 00097 #ifdef BZ_INLINE_GROUP2 00098 #define _bz_inline2 inline 00099 #else 00100 #define _bz_inline2 00101 #endif 00102 00103 #ifdef BZ_NO_INLINE_ET 00104 #define _bz_inline_et 00105 #else 00106 #define _bz_inline_et inline 00107 #endif 00108 00109 #ifdef BZ_ETPARMS_CONSTREF 00110 #define BZ_ETPARM(X) const X& 00111 #else 00112 #define BZ_ETPARM(X) X 00113 #endif 00114 00115 #ifdef __DECCXX 00116 // The DEC cxx compiler has problems with loop unrolling 00117 // because of aliasing. Loop unrolling and anti-aliasing 00118 // is done by Blitz++. 00119 00120 #define BZ_PARTIAL_LOOP_UNROLL 00121 #define BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS 00122 #define BZ_ARRAY_STACK_TRAVERSAL_UNROLL 00123 #endif 00124 00125 /* 00126 * BZ_NO_PROPAGATE(X) prevents the compiler from performing 00127 * copy propagation on a variable. This is used for loop 00128 * unrolling to prevent KAI C++ from rearranging the 00129 * ordering of memory accesses. 00130 */ 00131 00132 #define BZ_NO_PROPAGATE(X) X 00133 00134 #ifdef __KCC 00135 #ifdef BZ_USE_NO_PROPAGATE 00136 extern "C" int __kai_apply(const char*, ...); 00137 00138 #undef BZ_NO_PROPAGATE(X) 00139 #define BZ_NO_PROPAGATE(X) __kai_apply("(%a)",&X) 00140 #endif 00141 #endif 00142 00143 #endif // BZ_TUNING_H