tesseract 4.1.1
Loading...
Searching...
No Matches
oclkernels.h
Go to the documentation of this file.
1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4// http://www.apache.org/licenses/LICENSE-2.0
5// Unless required by applicable law or agreed to in writing, software
6// distributed under the License is distributed on an "AS IS" BASIS,
7// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8// See the License for the specific language governing permissions and
9// limitations under the License.
10
11#ifndef TESSERACT_OPENCL_OCLKERNELS_H_
12#define TESSERACT_OPENCL_OCLKERNELS_H_
13
14#ifndef USE_EXTERNAL_KERNEL
15#define KERNEL(...) #__VA_ARGS__ "\n"
16// Double precision is a default of spreadsheets
17// cl_khr_fp64: Khronos extension
18// cl_amd_fp64: AMD extension
19// use build option outside to define fp_t
21static const char* kernel_src = KERNEL(
22\n#ifdef KHR_DP_EXTENSION\n
23\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n
24\n#elif AMD_DP_EXTENSION\n
25\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n
26\n#else\n
27\n#endif\n
28__kernel void composeRGBPixel(__global uint *tiffdata, int w, int h,int wpl, __global uint *output)
29{
30 int i = get_global_id(1);
31 int j = get_global_id(0);
32 int tiffword,rval,gval,bval;
33
34 //Ignore the excess
35 if ((i >= h) || (j >= w))
36 return;
37
38 tiffword = tiffdata[i * w + j];
39 rval = ((tiffword) & 0xff);
40 gval = (((tiffword) >> 8) & 0xff);
41 bval = (((tiffword) >> 16) & 0xff);
42 output[i*wpl+j] = (rval << (8 * (sizeof(uint) - 1 - 0))) | (gval << (8 * (sizeof(uint) - 1 - 1))) | (bval << (8 * (sizeof(uint) - 1 - 2)));
43}
44)
45
46KERNEL(
47\n__kernel void pixSubtract_inplace(__global int *dword, __global int *sword,
48 const int wpl, const int h)
49{
50 const unsigned int row = get_global_id(1);
51 const unsigned int col = get_global_id(0);
52 const unsigned int pos = row * wpl + col;
53
54 //Ignore the execss
55 if (row >= h || col >= wpl)
56 return;
57
58 *(dword + pos) &= ~(*(sword + pos));
59}\n
60)
61
62KERNEL(
63\n__kernel void morphoDilateHor_5x5(__global int *sword,__global int *dword,
64 const int wpl, const int h)
65{
66 const unsigned int pos = get_global_id(0);
67 unsigned int prevword, nextword, currword,tempword;
68 unsigned int destword;
69 const int col = pos % wpl;
70
71 //Ignore the execss
72 if (pos >= (wpl * h))
73 return;
74
75
76 currword = *(sword + pos);
77 destword = currword;
78
79 //Handle boundary conditions
80 if(col==0)
81 prevword=0;
82 else
83 prevword = *(sword + pos - 1);
84
85 if(col==(wpl - 1))
86 nextword=0;
87 else
88 nextword = *(sword + pos + 1);
89
90 //Loop unrolled
91
92 //1 bit to left and 1 bit to right
93 //Get the max value on LHS of every pixel
94 tempword = (prevword << (31)) | ((currword >> 1));
95 destword |= tempword;
96 //Get max value on RHS of every pixel
97 tempword = (currword << 1) | (nextword >> (31));
98 destword |= tempword;
99
100 //2 bit to left and 2 bit to right
101 //Get the max value on LHS of every pixel
102 tempword = (prevword << (30)) | ((currword >> 2));
103 destword |= tempword;
104 //Get max value on RHS of every pixel
105 tempword = (currword << 2) | (nextword >> (30));
106 destword |= tempword;
107
108
109 *(dword + pos) = destword;
110
111}\n
112)
113
114KERNEL(
115\n__kernel void morphoDilateVer_5x5(__global int *sword,__global int *dword,
116 const int wpl, const int h)
117{
118 const int col = get_global_id(0);
119 const int row = get_global_id(1);
120 const unsigned int pos = row * wpl + col;
121 unsigned int tempword;
122 unsigned int destword;
123 int i;
124
125 //Ignore the execss
126 if (row >= h || col >= wpl)
127 return;
128
129 destword = *(sword + pos);
130
131 //2 words above
132 i = (row - 2) < 0 ? row : (row - 2);
133 tempword = *(sword + i*wpl + col);
134 destword |= tempword;
135
136 //1 word above
137 i = (row - 1) < 0 ? row : (row - 1);
138 tempword = *(sword + i*wpl + col);
139 destword |= tempword;
140
141 //1 word below
142 i = (row >= (h - 1)) ? row : (row + 1);
143 tempword = *(sword + i*wpl + col);
144 destword |= tempword;
145
146 //2 words below
147 i = (row >= (h - 2)) ? row : (row + 2);
148 tempword = *(sword + i*wpl + col);
149 destword |= tempword;
150
151 *(dword + pos) = destword;
152}\n
153)
154
155KERNEL(
156\n__kernel void morphoDilateHor(__global int *sword,__global int *dword,const int xp, const int xn, const int wpl, const int h)
157{
158 const int col = get_global_id(0);
159 const int row = get_global_id(1);
160 const unsigned int pos = row * wpl + col;
161 unsigned int parbitsxp, parbitsxn, nwords;
162 unsigned int destword, tempword, lastword, currword;
163 unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
164 int i, j, siter, eiter;
165
166 //Ignore the execss
167 if (pos >= (wpl*h) || (xn < 1 && xp < 1))
168 return;
169
170 currword = *(sword + pos);
171 destword = currword;
172
173 parbitsxp = xp & 31;
174 parbitsxn = xn & 31;
175 nwords = xp >> 5;
176
177 if (parbitsxp > 0)
178 nwords += 1;
179 else
180 parbitsxp = 31;
181
182 siter = (col - nwords);
183 eiter = (col + nwords);
184
185 //Get prev word
186 if (col==0)
187 firstword = 0x0;
188 else
189 firstword = *(sword + pos - 1);
190
191 //Get next word
192 if (col == (wpl - 1))
193 secondword = 0x0;
194 else
195 secondword = *(sword + pos + 1);
196
197 //Last partial bits on either side
198 for (i = 1; i <= parbitsxp; i++)
199 {
200 //Get the max value on LHS of every pixel
201 tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32-i)) | ((currword >> i));
202
203 destword |= tempword;
204
205 //Get max value on RHS of every pixel
206 tempword = (currword << i) | (secondword >> (32 - i));
207 destword |= tempword;
208 }
209
210 //Return if halfwidth <= 1 word
211 if (nwords == 1)
212 {
213 if (xn == 32)
214 {
215 destword |= firstword;
216 }
217 if (xp == 32)
218 {
219 destword |= secondword;
220 }
221
222 *(dword + pos) = destword;
223 return;
224 }
225
226 if (siter < 0)
227 firstword = 0x0;
228 else
229 firstword = *(sword + row*wpl + siter);
230
231 if (eiter >= wpl)
232 lastword = 0x0;
233 else
234 lastword = *(sword + row*wpl + eiter);
235
236 for (i = 1; i < nwords; i++)
237 {
238 //Gets LHS words
239 if ((siter + i) < 0)
240 secondword = 0x0;
241 else
242 secondword = *(sword + row*wpl + siter + i);
243
244 lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
245
246 firstword = secondword;
247
248 if ((siter + i + 1) < 0)
249 secondword = 0x0;
250 else
251 secondword = *(sword + row*wpl + siter + i + 1);
252
253 lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
254
255 //Gets RHS words
256 if ((eiter - i) >= wpl)
257 firstword = 0x0;
258 else
259 firstword = *(sword + row*wpl + eiter - i);
260
261 rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
262
263 lastword = firstword;
264 if ((eiter - i - 1) >= wpl)
265 firstword = 0x0;
266 else
267 firstword = *(sword + row*wpl + eiter - i - 1);
268
269 rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
270
271 for (j = 1; j < 32; j++)
272 {
273 //OR LHS full words
274 tempword = (lprevword << j) | (lnextword >> (32 - j));
275 destword |= tempword;
276
277 //OR RHS full words
278 tempword = (rprevword << j) | (rnextword >> (32 - j));
279 destword |= tempword;
280 }
281
282 destword |= lprevword;
283 destword |= lnextword;
284 destword |= rprevword;
285 destword |= rnextword;
286
287 lastword = firstword;
288 firstword = secondword;
289 }
290
291 *(dword + pos) = destword;
292}\n
293)
294
295KERNEL(
296\n__kernel void morphoDilateHor_32word(__global int *sword,__global int *dword,
297 const int halfwidth,
298 const int wpl, const int h,
299 const char isEven)
300{
301 const int col = get_global_id(0);
302 const int row = get_global_id(1);
303 const unsigned int pos = row * wpl + col;
304 unsigned int prevword, nextword, currword,tempword;
305 unsigned int destword;
306 int i;
307
308 //Ignore the execss
309 if (pos >= (wpl * h))
310 return;
311
312 currword = *(sword + pos);
313 destword = currword;
314
315 //Handle boundary conditions
316 if(col==0)
317 prevword=0;
318 else
319 prevword = *(sword + pos - 1);
320
321 if(col==(wpl - 1))
322 nextword=0;
323 else
324 nextword = *(sword + pos + 1);
325
326 for (i = 1; i <= halfwidth; i++)
327 {
328 //Get the max value on LHS of every pixel
329 if (i == halfwidth && isEven)
330 {
331 tempword = 0x0;
332 }
333 else
334 {
335 tempword = (prevword << (32-i)) | ((currword >> i));
336 }
337
338 destword |= tempword;
339
340 //Get max value on RHS of every pixel
341 tempword = (currword << i) | (nextword >> (32 - i));
342
343 destword |= tempword;
344 }
345
346 *(dword + pos) = destword;
347}\n
348)
349
350KERNEL(
351\n__kernel void morphoDilateVer(__global int *sword,__global int *dword,
352 const int yp,
353 const int wpl, const int h,
354 const int yn)
355{
356 const int col = get_global_id(0);
357 const int row = get_global_id(1);
358 const unsigned int pos = row * wpl + col;
359 unsigned int tempword;
360 unsigned int destword;
361 int i, siter, eiter;
362
363 //Ignore the execss
364 if (row >= h || col >= wpl)
365 return;
366
367 destword = *(sword + pos);
368
369 //Set start position and end position considering the boundary conditions
370 siter = (row - yn) < 0 ? 0 : (row - yn);
371 eiter = (row >= (h - yp)) ? (h - 1) : (row + yp);
372
373 for (i = siter; i <= eiter; i++)
374 {
375 tempword = *(sword + i*wpl + col);
376
377 destword |= tempword;
378 }
379
380 *(dword + pos) = destword;
381}\n
382)
383
384KERNEL(
385\n__kernel void morphoErodeHor_5x5(__global int *sword,__global int *dword,
386 const int wpl, const int h)
387{
388 const unsigned int pos = get_global_id(0);
389 unsigned int prevword, nextword, currword,tempword;
390 unsigned int destword;
391 const int col = pos % wpl;
392
393 //Ignore the execss
394 if (pos >= (wpl * h))
395 return;
396
397 currword = *(sword + pos);
398 destword = currword;
399
400 //Handle boundary conditions
401 if(col==0)
402 prevword=0xffffffff;
403 else
404 prevword = *(sword + pos - 1);
405
406 if(col==(wpl - 1))
407 nextword=0xffffffff;
408 else
409 nextword = *(sword + pos + 1);
410
411 //Loop unrolled
412
413 //1 bit to left and 1 bit to right
414 //Get the min value on LHS of every pixel
415 tempword = (prevword << (31)) | ((currword >> 1));
416 destword &= tempword;
417 //Get min value on RHS of every pixel
418 tempword = (currword << 1) | (nextword >> (31));
419 destword &= tempword;
420
421 //2 bit to left and 2 bit to right
422 //Get the min value on LHS of every pixel
423 tempword = (prevword << (30)) | ((currword >> 2));
424 destword &= tempword;
425 //Get min value on RHS of every pixel
426 tempword = (currword << 2) | (nextword >> (30));
427 destword &= tempword;
428
429
430 *(dword + pos) = destword;
431
432}\n
433)
434
435KERNEL(
436\n__kernel void morphoErodeVer_5x5(__global int *sword,__global int *dword,
437 const int wpl, const int h,
438 const int fwmask, const int lwmask)
439{
440 const int col = get_global_id(0);
441 const int row = get_global_id(1);
442 const unsigned int pos = row * wpl + col;
443 unsigned int tempword;
444 unsigned int destword;
445 int i;
446
447 //Ignore the execss
448 if (row >= h || col >= wpl)
449 return;
450
451 destword = *(sword + pos);
452
453 if (row < 2 || row >= (h - 2))
454 {
455 destword = 0x0;
456 }
457 else
458 {
459 //2 words above
460 //i = (row - 2) < 0 ? row : (row - 2);
461 i = (row - 2);
462 tempword = *(sword + i*wpl + col);
463 destword &= tempword;
464
465 //1 word above
466 //i = (row - 1) < 0 ? row : (row - 1);
467 i = (row - 1);
468 tempword = *(sword + i*wpl + col);
469 destword &= tempword;
470
471 //1 word below
472 //i = (row >= (h - 1)) ? row : (row + 1);
473 i = (row + 1);
474 tempword = *(sword + i*wpl + col);
475 destword &= tempword;
476
477 //2 words below
478 //i = (row >= (h - 2)) ? row : (row + 2);
479 i = (row + 2);
480 tempword = *(sword + i*wpl + col);
481 destword &= tempword;
482
483 if (col == 0)
484 {
485 destword &= fwmask;
486 }
487 if (col == (wpl - 1))
488 {
489 destword &= lwmask;
490 }
491 }
492
493
494 *(dword + pos) = destword;
495}\n
496)
497
498KERNEL(
499\n__kernel void morphoErodeHor(__global int *sword,__global int *dword, const int xp, const int xn, const int wpl,
500 const int h, const char isAsymmetric, const int rwmask, const int lwmask)
501{
502 const int col = get_global_id(0);
503 const int row = get_global_id(1);
504 const unsigned int pos = row * wpl + col;
505 unsigned int parbitsxp, parbitsxn, nwords;
506 unsigned int destword, tempword, lastword, currword;
507 unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
508 int i, j, siter, eiter;
509
510 //Ignore the execss
511 if (pos >= (wpl*h) || (xn < 1 && xp < 1))
512 return;
513
514 currword = *(sword + pos);
515 destword = currword;
516
517 parbitsxp = xp & 31;
518 parbitsxn = xn & 31;
519 nwords = xp >> 5;
520
521 if (parbitsxp > 0)
522 nwords += 1;
523 else
524 parbitsxp = 31;
525
526 siter = (col - nwords);
527 eiter = (col + nwords);
528
529 //Get prev word
530 if (col==0)
531 firstword = 0xffffffff;
532 else
533 firstword = *(sword + pos - 1);
534
535 //Get next word
536 if (col == (wpl - 1))
537 secondword = 0xffffffff;
538 else
539 secondword = *(sword + pos + 1);
540
541 //Last partial bits on either side
542 for (i = 1; i <= parbitsxp; i++)
543 {
544 //Get the max value on LHS of every pixel
545 tempword = (firstword << (32-i)) | ((currword >> i));
546 destword &= tempword;
547
548 //Get max value on RHS of every pixel
549 tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i));
550
551 //tempword = (currword << i) | (secondword >> (32 - i));
552 destword &= tempword;
553 }
554
555 //Return if halfwidth <= 1 word
556 if (nwords == 1)
557 {
558 if (xp == 32)
559 {
560 destword &= firstword;
561 }
562 if (xn == 32)
563 {
564 destword &= secondword;
565 }
566
567 //Clear boundary pixels
568 if (isAsymmetric)
569 {
570 if (col == 0)
571 destword &= rwmask;
572 if (col == (wpl - 1))
573 destword &= lwmask;
574 }
575
576 *(dword + pos) = destword;
577 return;
578 }
579
580 if (siter < 0)
581 firstword = 0xffffffff;
582 else
583 firstword = *(sword + row*wpl + siter);
584
585 if (eiter >= wpl)
586 lastword = 0xffffffff;
587 else
588 lastword = *(sword + row*wpl + eiter);
589
590
591 for (i = 1; i < nwords; i++)
592 {
593 //Gets LHS words
594 if ((siter + i) < 0)
595 secondword = 0xffffffff;
596 else
597 secondword = *(sword + row*wpl + siter + i);
598
599 lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
600
601 firstword = secondword;
602
603 if ((siter + i + 1) < 0)
604 secondword = 0xffffffff;
605 else
606 secondword = *(sword + row*wpl + siter + i + 1);
607
608 lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
609
610 //Gets RHS words
611 if ((eiter - i) >= wpl)
612 firstword = 0xffffffff;
613 else
614 firstword = *(sword + row*wpl + eiter - i);
615
616 rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
617
618 lastword = firstword;
619 if ((eiter - i - 1) >= wpl)
620 firstword = 0xffffffff;
621 else
622 firstword = *(sword + row*wpl + eiter - i - 1);
623
624 rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
625
626 for (j = 0; j < 32; j++)
627 {
628 //OR LHS full words
629 tempword = (lprevword << j) | (lnextword >> (32 - j));
630 destword &= tempword;
631
632 //OR RHS full words
633 tempword = (rprevword << j) | (rnextword >> (32 - j));
634 destword &= tempword;
635 }
636
637 destword &= lprevword;
638 destword &= lnextword;
639 destword &= rprevword;
640 destword &= rnextword;
641
642 lastword = firstword;
643 firstword = secondword;
644 }
645
646 if (isAsymmetric)
647 {
648 //Clear boundary pixels
649 if (col < (nwords - 1))
650 destword = 0x0;
651 else if (col == (nwords - 1))
652 destword &= rwmask;
653 else if (col > (wpl - nwords))
654 destword = 0x0;
655 else if (col == (wpl - nwords))
656 destword &= lwmask;
657 }
658
659 *(dword + pos) = destword;
660}\n
661)
662
663KERNEL(
664\n__kernel void morphoErodeHor_32word(__global int *sword,__global int *dword,
665 const int halfwidth, const int wpl,
666 const int h, const char clearBoundPixH,
667 const int rwmask, const int lwmask,
668 const char isEven)
669{
670 const int col = get_global_id(0);
671 const int row = get_global_id(1);
672 const unsigned int pos = row * wpl + col;
673 unsigned int prevword, nextword, currword,tempword, destword;
674 int i;
675
676 //Ignore the execss
677 if (pos >= (wpl * h))
678 return;
679
680 currword = *(sword + pos);
681 destword = currword;
682
683 //Handle boundary conditions
684 if(col==0)
685 prevword=0xffffffff;
686 else
687 prevword = *(sword + pos - 1);
688
689 if(col==(wpl - 1))
690 nextword=0xffffffff;
691 else
692 nextword = *(sword + pos + 1);
693
694 for (i = 1; i <= halfwidth; i++)
695 {
696 //Get the min value on LHS of every pixel
697 tempword = (prevword << (32-i)) | ((currword >> i));
698
699 destword &= tempword;
700
701 //Get min value on RHS of every pixel
702 if (i == halfwidth && isEven)
703 {
704 tempword = 0xffffffff;
705 }
706 else
707 {
708 tempword = (currword << i) | (nextword >> (32 - i));
709 }
710
711 destword &= tempword;
712 }
713
714 if (clearBoundPixH)
715 {
716 if (col == 0)
717 {
718 destword &= rwmask;
719 }
720 else if (col == (wpl - 1))
721 {
722 destword &= lwmask;
723 }
724 }
725
726 *(dword + pos) = destword;
727}\n
728)
729
730KERNEL(
731\n__kernel void morphoErodeVer(__global int *sword,__global int *dword,
732 const int yp,
733 const int wpl, const int h,
734 const char clearBoundPixV, const int yn)
735{
736 const int col = get_global_id(0);
737 const int row = get_global_id(1);
738 const unsigned int pos = row * wpl + col;
739 unsigned int tempword, destword;
740 int i, siter, eiter;
741
742 //Ignore the execss
743 if (row >= h || col >= wpl)
744 return;
745
746 destword = *(sword + pos);
747
748 //Set start position and end position considering the boundary conditions
749 siter = (row - yp) < 0 ? 0 : (row - yp);
750 eiter = (row >= (h - yn)) ? (h - 1) : (row + yn);
751
752 for (i = siter; i <= eiter; i++)
753 {
754 tempword = *(sword + i*wpl + col);
755
756 destword &= tempword;
757 }
758
759 //Clear boundary pixels
760 if (clearBoundPixV && ((row < yp) || ((h - row) <= yn)))
761 {
762 destword = 0x0;
763 }
764
765 *(dword + pos) = destword;
766}\n
767)
768
769// HistogramRect Kernel: Accumulate
770// assumes 4 channels, i.e., bytes_per_pixel = 4
771// assumes number of pixels is multiple of 8
772// data is laid out as
773// ch0 ch1 ...
774// bin0 bin1 bin2... bin0...
775// rpt0,1,2...256 rpt0,1,2...
776KERNEL(
777\n#define HIST_REDUNDANCY 256\n
778\n#define GROUP_SIZE 256\n
779\n#define HIST_SIZE 256\n
780\n#define NUM_CHANNELS 4\n
781\n#define HR_UNROLL_SIZE 8 \n
782\n#define HR_UNROLL_TYPE uchar8 \n
783
784__attribute__((reqd_work_group_size(256, 1, 1)))
785__kernel
786void kernel_HistogramRectAllChannels(
787 __global const uchar8 *data,
788 uint numPixels,
789 __global uint *histBuffer) {
790
791 // declare variables
792 uchar8 pixels;
793 int threadOffset = get_global_id(0)%HIST_REDUNDANCY;
794
795 // for each pixel/channel, accumulate in global memory
796 for (uint pc = get_global_id(0); pc < numPixels*NUM_CHANNELS/HR_UNROLL_SIZE; pc += get_global_size(0)) {
797 pixels = data[pc];
798 // channel bin thread
799 atomic_inc(&histBuffer[0*HIST_SIZE*HIST_REDUNDANCY + pixels.s0*HIST_REDUNDANCY + threadOffset]); // ch0
800 atomic_inc(&histBuffer[0*HIST_SIZE*HIST_REDUNDANCY + pixels.s4*HIST_REDUNDANCY + threadOffset]); // ch0
801 atomic_inc(&histBuffer[1*HIST_SIZE*HIST_REDUNDANCY + pixels.s1*HIST_REDUNDANCY + threadOffset]); // ch1
802 atomic_inc(&histBuffer[1*HIST_SIZE*HIST_REDUNDANCY + pixels.s5*HIST_REDUNDANCY + threadOffset]); // ch1
803 atomic_inc(&histBuffer[2*HIST_SIZE*HIST_REDUNDANCY + pixels.s2*HIST_REDUNDANCY + threadOffset]); // ch2
804 atomic_inc(&histBuffer[2*HIST_SIZE*HIST_REDUNDANCY + pixels.s6*HIST_REDUNDANCY + threadOffset]); // ch2
805 atomic_inc(&histBuffer[3*HIST_SIZE*HIST_REDUNDANCY + pixels.s3*HIST_REDUNDANCY + threadOffset]); // ch3
806 atomic_inc(&histBuffer[3*HIST_SIZE*HIST_REDUNDANCY + pixels.s7*HIST_REDUNDANCY + threadOffset]); // ch3
807 }
808}
809)
810
811KERNEL(
812// NUM_CHANNELS = 1
813__attribute__((reqd_work_group_size(256, 1, 1)))
814__kernel
815void kernel_HistogramRectOneChannel(
816 __global const uchar8 *data,
817 uint numPixels,
818 __global uint *histBuffer) {
819
820 // declare variables
821 uchar8 pixels;
822 int threadOffset = get_global_id(0)%HIST_REDUNDANCY;
823
824 // for each pixel/channel, accumulate in global memory
825 for (uint pc = get_global_id(0); pc < numPixels/HR_UNROLL_SIZE; pc += get_global_size(0)) {
826 pixels = data[pc];
827 // bin thread
828 atomic_inc(&histBuffer[pixels.s0*HIST_REDUNDANCY + threadOffset]);
829 atomic_inc(&histBuffer[pixels.s1*HIST_REDUNDANCY + threadOffset]);
830 atomic_inc(&histBuffer[pixels.s2*HIST_REDUNDANCY + threadOffset]);
831 atomic_inc(&histBuffer[pixels.s3*HIST_REDUNDANCY + threadOffset]);
832 atomic_inc(&histBuffer[pixels.s4*HIST_REDUNDANCY + threadOffset]);
833 atomic_inc(&histBuffer[pixels.s5*HIST_REDUNDANCY + threadOffset]);
834 atomic_inc(&histBuffer[pixels.s6*HIST_REDUNDANCY + threadOffset]);
835 atomic_inc(&histBuffer[pixels.s7*HIST_REDUNDANCY + threadOffset]);
836 }
837}
838)
839
840// HistogramRect Kernel: Reduction
841// only supports 4 channels
842// each work group handles a single channel of a single histogram bin
843KERNEL(
844__attribute__((reqd_work_group_size(256, 1, 1)))
845__kernel
846void kernel_HistogramRectAllChannelsReduction(
847 int n, // unused pixel redundancy
848 __global uint *histBuffer,
849 __global int* histResult) {
850
851 // declare variables
852 int channel = get_group_id(0)/HIST_SIZE;
853 int bin = get_group_id(0)%HIST_SIZE;
854 int value = 0;
855
856 // accumulate in register
857 for (uint i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) {
858 value += histBuffer[ channel*HIST_SIZE*HIST_REDUNDANCY+bin*HIST_REDUNDANCY+i];
859 }
860
861 // reduction in local memory
862 __local int localHist[GROUP_SIZE];
863 localHist[get_local_id(0)] = value;
864 barrier(CLK_LOCAL_MEM_FENCE);
865 for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
866 if (get_local_id(0) < stride) {
867 value = localHist[ get_local_id(0)+stride];
868 }
869 barrier(CLK_LOCAL_MEM_FENCE);
870 if (get_local_id(0) < stride) {
871 localHist[ get_local_id(0)] += value;
872 }
873 barrier(CLK_LOCAL_MEM_FENCE);
874 }
875
876 // write reduction to final result
877 if (get_local_id(0) == 0) {
878 histResult[get_group_id(0)] = localHist[0];
879 }
880} // kernel_HistogramRectAllChannels
881)
882
883
884KERNEL(
885// NUM_CHANNELS = 1
886__attribute__((reqd_work_group_size(256, 1, 1)))
887__kernel
888void kernel_HistogramRectOneChannelReduction(
889 int n, // unused pixel redundancy
890 __global uint *histBuffer,
891 __global int* histResult) {
892
893 // declare variables
894 // int channel = get_group_id(0)/HIST_SIZE;
895 int bin = get_group_id(0)%HIST_SIZE;
896 int value = 0;
897
898 // accumulate in register
899 for (int i = get_local_id(0); i < HIST_REDUNDANCY; i+=GROUP_SIZE) {
900 value += histBuffer[ bin*HIST_REDUNDANCY+i];
901 }
902
903 // reduction in local memory
904 __local int localHist[GROUP_SIZE];
905 localHist[get_local_id(0)] = value;
906 barrier(CLK_LOCAL_MEM_FENCE);
907 for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) {
908 if (get_local_id(0) < stride) {
909 value = localHist[ get_local_id(0)+stride];
910 }
911 barrier(CLK_LOCAL_MEM_FENCE);
912 if (get_local_id(0) < stride) {
913 localHist[ get_local_id(0)] += value;
914 }
915 barrier(CLK_LOCAL_MEM_FENCE);
916 }
917
918 // write reduction to final result
919 if (get_local_id(0) == 0) {
920 histResult[get_group_id(0)] = localHist[0];
921 }
922} // kernel_HistogramRectOneChannelReduction
923)
924
925// ThresholdRectToPix Kernel
926// only supports 4 channels
927// imageData is input image (24-bits/pixel)
928// pix is output image (1-bit/pixel)
929KERNEL(
930\n#define CHAR_VEC_WIDTH 4 \n
931\n#define PIXELS_PER_WORD 32 \n
932\n#define PIXELS_PER_BURST 8 \n
933\n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n
934 typedef union {
935 uchar s[PIXELS_PER_BURST*NUM_CHANNELS];
936 uchar4 v[(PIXELS_PER_BURST*NUM_CHANNELS)/CHAR_VEC_WIDTH];
937 } charVec;
938
939__attribute__((reqd_work_group_size(256, 1, 1)))
940__kernel
941void kernel_ThresholdRectToPix(
942 __global const uchar4 *imageData,
943 int height,
944 int width,
945 int wpl, // words per line
946 __global int *thresholds,
947 __global int *hi_values,
948 __global int *pix) {
949
950 // declare variables
951 int pThresholds[NUM_CHANNELS];
952 int pHi_Values[NUM_CHANNELS];
953 for (int i = 0; i < NUM_CHANNELS; i++) {
954 pThresholds[i] = thresholds[i];
955 pHi_Values[i] = hi_values[i];
956 }
957
958 // for each word (32 pixels) in output image
959 for (uint w = get_global_id(0); w < wpl*height; w += get_global_size(0)) {
960 unsigned int word = 0; // all bits start at zero
961 // for each burst in word
962 for (int b = 0; b < BURSTS_PER_WORD; b++) {
963 // load burst
964 charVec pixels;
965 int offset = (w / wpl) * width;
966 offset += (w % wpl) * PIXELS_PER_WORD;
967 offset += b * PIXELS_PER_BURST;
968
969 for (int i = 0; i < PIXELS_PER_BURST; ++i)
970 pixels.v[i] = imageData[offset + i];
971
972 // for each pixel in burst
973 for (int p = 0; p < PIXELS_PER_BURST; p++) {
974 for (int c = 0; c < NUM_CHANNELS; c++) {
975 unsigned char pixChan = pixels.s[p*NUM_CHANNELS + c];
976 if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) {
977 const uint kTopBit = 0x80000000;
978 word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
979 }
980 }
981 }
982 }
983 pix[w] = word;
984 }
985}
986
987\n#define CHAR_VEC_WIDTH 8 \n
988\n#define PIXELS_PER_WORD 32 \n
989\n#define PIXELS_PER_BURST 8 \n
990\n#define BURSTS_PER_WORD (PIXELS_PER_WORD/PIXELS_PER_BURST) \n
991 typedef union {
992 uchar s[PIXELS_PER_BURST*1];
993 uchar8 v[(PIXELS_PER_BURST*1)/CHAR_VEC_WIDTH];
994 } charVec1;
995
996__attribute__((reqd_work_group_size(256, 1, 1)))
997__kernel
998void kernel_ThresholdRectToPix_OneChan(
999 __global const uchar8 *imageData,
1000 int height,
1001 int width,
1002 int wpl, // words per line of output image
1003 __global int *thresholds,
1004 __global int *hi_values,
1005 __global int *pix) {
1006
1007 // declare variables
1008 int pThresholds[1];
1009 int pHi_Values[1];
1010 for (int i = 0; i < 1; i++) {
1011 pThresholds[i] = thresholds[i];
1012 pHi_Values[i] = hi_values[i];
1013 }
1014
1015 // for each word (32 pixels) in output image
1016 for (uint w = get_global_id(0); w < wpl*height; w += get_global_size(0)) {
1017 unsigned int word = 0; // all bits start at zero
1018
1019 // for each burst in word
1020 for (int b = 0; b < BURSTS_PER_WORD; b++) {
1021
1022 // load burst
1023 charVec1 pixels;
1024 // for each char8 in burst
1025 pixels.v[0] = imageData[
1026 w*BURSTS_PER_WORD
1027 + b
1028 + 0 ];
1029
1030 // for each pixel in burst
1031 for (int p = 0; p < PIXELS_PER_BURST; p++) {
1032
1033 //int littleEndianIdx = p ^ 3;
1034 //int bigEndianIdx = p;
1035 int idx =
1036\n#ifdef __ENDIAN_LITTLE__\n
1037 p ^ 3;
1038\n#else\n
1039 p;
1040\n#endif\n
1041 unsigned char pixChan = pixels.s[idx];
1042 if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) {
1043 const uint kTopBit = 0x80000000;
1044 word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31));
1045 }
1046 }
1047 }
1048 pix[w] = word;
1049 }
1050}
1051)
1052
1053 ; // close char*
1054
1055#endif // USE_EXTERNAL_KERNEL
1056#endif // TESSERACT_OPENCL_OCLKERNELS_H_
1057/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define KERNEL(...)
Definition: oclkernels.h:15