tesseract 4.1.1
Loading...
Searching...
No Matches
simddetect.cpp
Go to the documentation of this file.
1
2// File: simddetect.cpp
3// Description: Architecture detector.
4// Author: Stefan Weil (based on code from Ray Smith)
5//
6// (C) Copyright 2014, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
17
18#include <numeric> // for std::inner_product
19#include "simddetect.h"
20#include "dotproduct.h"
21#include "intsimdmatrix.h" // for IntSimdMatrix
22#include "params.h" // for STRING_VAR
23#include "tprintf.h" // for tprintf
24
25#if defined(AVX) || defined(AVX2) || defined(FMA) || defined(SSE4_1)
26# define HAS_CPUID
27#endif
28
29#if defined(HAS_CPUID)
30#if defined(__GNUC__)
31# include <cpuid.h>
32#elif defined(_WIN32)
33# include <intrin.h>
34#endif
35#endif
36
37namespace tesseract {
38
39// Computes and returns the dot product of the two n-vectors u and v.
40// Note: because the order of addition is different among the different dot
41// product functions, the results can (and do) vary slightly (although they
42// agree to within about 4e-15). This produces different results when running
43// training, despite all random inputs being precisely equal.
44// To get consistent results, use just one of these dot product functions.
45// On a test multi-layer network, serial is 57% slower than SSE, and AVX
46// is about 8% faster than SSE. This suggests that the time is memory
47// bandwidth constrained and could benefit from holding the reused vector
48// in AVX registers.
50
51static STRING_VAR(dotproduct, "auto",
52 "Function used for calculation of dot product");
53
54SIMDDetect SIMDDetect::detector;
55
56// If true, then AVX has been detected.
57bool SIMDDetect::avx_available_;
58bool SIMDDetect::avx2_available_;
59bool SIMDDetect::avx512F_available_;
60bool SIMDDetect::avx512BW_available_;
61// If true, then FMA has been detected.
62bool SIMDDetect::fma_available_;
63// If true, then SSe4.1 has been detected.
64bool SIMDDetect::sse_available_;
65
66// Computes and returns the dot product of the two n-vectors u and v.
67static double DotProductGeneric(const double* u, const double* v, int n) {
68 double total = 0.0;
69 for (int k = 0; k < n; ++k) total += u[k] * v[k];
70 return total;
71}
72
73// Compute dot product using std::inner_product.
74static double DotProductStdInnerProduct(const double* u, const double* v, int n) {
75 return std::inner_product(u, u + n, v, 0.0);
76}
77
78static void SetDotProduct(DotProductFunction f, const IntSimdMatrix* m = nullptr) {
79 DotProduct = f;
81}
82
83// Constructor.
84// Tests the architecture in a system-dependent way to detect AVX, SSE and
85// any other available SIMD equipment.
86// __GNUC__ is also defined by compilers that include GNU extensions such as
87// clang.
88SIMDDetect::SIMDDetect() {
89 // The fallback is a generic dot product calculation.
90 SetDotProduct(DotProductGeneric);
91
92#if defined(HAS_CPUID)
93#if defined(__GNUC__)
94 unsigned int eax, ebx, ecx, edx;
95 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) != 0) {
96 // Note that these tests all use hex because the older compilers don't have
97 // the newer flags.
98#if defined(SSE4_1)
99 sse_available_ = (ecx & 0x00080000) != 0;
100#endif
101#if defined(FMA)
102 fma_available_ = (ecx & 0x00001000) != 0;
103#endif
104#if defined(AVX)
105 avx_available_ = (ecx & 0x10000000) != 0;
106 if (avx_available_) {
107 // There is supposed to be a __get_cpuid_count function, but this is all
108 // there is in my cpuid.h. It is a macro for an asm statement and cannot
109 // be used inside an if.
110 __cpuid_count(7, 0, eax, ebx, ecx, edx);
111 avx2_available_ = (ebx & 0x00000020) != 0;
112 avx512F_available_ = (ebx & 0x00010000) != 0;
113 avx512BW_available_ = (ebx & 0x40000000) != 0;
114 }
115#endif
116 }
117# elif defined(_WIN32)
118 int cpuInfo[4];
119 int max_function_id;
120 __cpuid(cpuInfo, 0);
121 max_function_id = cpuInfo[0];
122 if (max_function_id >= 1) {
123 __cpuid(cpuInfo, 1);
124#if defined(SSE4_1)
125 sse_available_ = (cpuInfo[2] & 0x00080000) != 0;
126#endif
127#if defined(AVX) || defined(AVX2) || defined(FMA)
128 if ((cpuInfo[2] & 0x08000000) && ((_xgetbv(0) & 6) == 6)) {
129 // OSXSAVE bit is set, XMM state and YMM state are fine.
130#if defined(FMA)
131 fma_available_ = (cpuInfo[2] & 0x00001000) != 0;
132#endif
133#if defined(AVX)
134 avx_available_ = (cpuInfo[2] & 0x10000000) != 0;
135#endif
136#if defined(AVX2)
137 if (max_function_id >= 7) {
138 __cpuid(cpuInfo, 7);
139 avx2_available_ = (cpuInfo[1] & 0x00000020) != 0;
140 avx512F_available_ = (cpuInfo[1] & 0x00010000) != 0;
141 avx512BW_available_ = (cpuInfo[1] & 0x40000000) != 0;
142 }
143#endif
144 }
145#endif
146 }
147#else
148#error "I don't know how to test for SIMD with this compiler"
149#endif
150#endif
151
152 // Select code for calculation of dot product based on autodetection.
153 if (false) {
154 // This is a dummy to support conditional compilation.
155#if defined(AVX2)
156 } else if (avx2_available_) {
157 // AVX2 detected.
159#endif
160#if defined(AVX)
161 } else if (avx_available_) {
162 // AVX detected.
164#endif
165#if defined(SSE4_1)
166 } else if (sse_available_) {
167 // SSE detected.
169#endif
170 }
171}
172
174 // Select code for calculation of dot product based on the
175 // value of the config variable if that value is not empty.
176 const char* dotproduct_method = "generic";
177 if (!strcmp(dotproduct.string(), "auto")) {
178 // Automatic detection. Nothing to be done.
179 } else if (!strcmp(dotproduct.string(), "generic")) {
180 // Generic code selected by config variable.
181 SetDotProduct(DotProductGeneric);
182 dotproduct_method = "generic";
183 } else if (!strcmp(dotproduct.string(), "native")) {
184 // Native optimized code selected by config variable.
185 SetDotProduct(DotProductNative);
186 dotproduct_method = "native";
187#if defined(AVX2)
188 } else if (!strcmp(dotproduct.string(), "avx2")) {
189 // AVX2 selected by config variable.
191 dotproduct_method = "avx2";
192#endif
193#if defined(AVX)
194 } else if (!strcmp(dotproduct.string(), "avx")) {
195 // AVX selected by config variable.
197 dotproduct_method = "avx";
198#endif
199#if defined(FMA)
200 } else if (!strcmp(dotproduct.string(), "fma")) {
201 // FMA selected by config variable.
203 dotproduct_method = "fma";
204#endif
205#if defined(SSE4_1)
206 } else if (!strcmp(dotproduct.string(), "sse")) {
207 // SSE selected by config variable.
209 dotproduct_method = "sse";
210#endif
211 } else if (!strcmp(dotproduct.string(), "std::inner_product")) {
212 // std::inner_product selected by config variable.
213 SetDotProduct(DotProductStdInnerProduct);
214 dotproduct_method = "std::inner_product";
215 } else {
216 // Unsupported value of config variable.
217 tprintf("Warning, ignoring unsupported config variable value: dotproduct=%s\n",
218 dotproduct.string());
219 tprintf("Support values for dotproduct: auto generic native"
220#if defined(AVX)
221 " avx"
222#endif
223#if defined(SSE4_1)
224 " sse"
225#endif
226 " std::inner_product.\n");
227 }
228
229 dotproduct.set_value(dotproduct_method);
230}
231
232} // namespace tesseract
#define STRING_VAR(name, val, comment)
Definition: params.h:309
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
double(*)(const double *, const double *, int) DotProductFunction
Definition: simddetect.h:25
DotProductFunction DotProduct
Definition: simddetect.cpp:49
double DotProductAVX(const double *u, const double *v, int n)
double DotProductNative(const double *u, const double *v, int n)
Definition: dotproduct.cpp:22
double DotProductFMA(const double *u, const double *v, int n)
double DotProductSSE(const double *u, const double *v, int n)
static const IntSimdMatrix intSimdMatrixAVX2
static const IntSimdMatrix * intSimdMatrix
static const IntSimdMatrix intSimdMatrixSSE
static TESS_API void Update()
Definition: simddetect.cpp:173