O slideshow foi denunciado.
Utilizamos seu perfil e dados de atividades no LinkedIn para personalizar e exibir anúncios mais relevantes. Altere suas preferências de anúncios quando desejar.

Prelude to halide_public

1.215 visualizações

Publicada em

Halide勉強会 @フィックスターズの資料です

Publicada em: Software
  • Seja o primeiro a comentar

Prelude to halide_public

  1. 1. . 1. 8 1 21 10 8 1 21 10
  2. 2. è t : : è ln a F è pu ke i – rsx • Fg o h V P LN • .. da k N LP LN – I • 1 S ke iG )0)2 2H v LP M E A ( A 8A C
  3. 3. è 21 . F è 21 . 21 . CA è 2 0. 2 .2 F è 21 . CA 2 0. 2 .2 è 1 8 81 22 1 08 . . .
  4. 4. 3 23 02102 0 .. 2 3 2 3 2
  5. 5. è Vlo s / – DLLI, D FEA F GC H C è – . / W – re hn gb pR sa W • re hn • i sa U gb pR sa – iR xU PdRlv S • iR , N+ -:6 8 0: 64 4/4- 2 2 N CHG • t , 6 4: . . 8I G. 8I G2 6 LF 6 L F ) O ( + 1EN L .H IH LEHG -FF ECDL M A [1] J.Ragan-Kelley, et al, Halide: A Language and Compiler for Optimizing Parallelism, Locality, and Recomputation in Image Processing Pipelines, PLDI 2013
  6. 6. ) ) ) ( ( è iL a v lD – x A – iC H • a n e • gdLr s h iC H • FLo L è 10 D a l – t – 5202. L p 0 5 5 5 0 2 11 50. 5 58
  7. 7. èd a è a S – c – 0 6 C e – F – A 8 6 2 2 6 21 00 .6
  8. 8. 2 7 2 02102 0 .. 2 2 2 Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; }
  9. 9. 2 2 12 10 1 . 1 2 1 2 1 Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; } 8
  10. 10. 2 ) ( ,2 89- 8 -92 33 20198 8 . Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; } F C A
  11. 11. 2 8 (. ) 1 21 ) .10 . Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; } F C A 2) .0
  12. 12. 2 2 02102 0 .. 2 2 2 Func conv3x3(Func in) { Func f; Var x, y; RDom r(-1, 3, -1, 3); Func clamped = BoudnaryConditions::constant_exterior(in, 0); f(x, y) = sum(clamped(x+r.x, y+r.y)); return f; } A 8 C
  13. 13. 1 è c M [ sc – A • v dhon r sgt Ra – • Iuhr • d Ie p mIpe iI è n mx ic T ]y l T – cS Pa ] – n mx ic S Pa T H F C A ) A A C ( A C A8 8A 8 0 2 . 8C ( C C 6 G 16 8 , 8 8 A 68 8 8 (). 1 0( ,
  14. 14. 3 23 02102 0 .. 2 3 2 3 2 Func blur_x, blur_y; Var x, y; blur_x(x, y) = in(x, y) + in(x+1, y); blur_y(x, y) = (blur_x(x, y) + blur_x(x, y+1)) / 4; for (int y=0; y<height; y++) { for (int x=0; x<width; x++) { blur_x[y][x] = in[y][x] + in[y][x+1]; } } for (int y=0; y<height; y++) { for (int x=0; x<width; x++) { blur_y[y][x] = (blur_x[y][x] + blur_x[y+1][x]) / 4; } } for (int y=0; y<height; y++) { for (int x=0; x<width; x++) { blur_x[0][x] = in[y][x] + in[y][x+1]; blur_x[1][x] = in[y+1][x] + in[y+1][x+1]; } for (int x=0; x<width; x++) { blur_y[y][x] = (blur_x[0][x] + blur_x[1][x]) / 4; } } for (int y=0; y<height; y++) { for (int x=0; x<width; x++) { blur_y[y][x] = (in[x][y] + in[x+1][y] + in[x][y+1] + in[x+1][y+1]) / 4; } }8
  15. 15. 0 4 0 2 11 0. 8 Func box_filter_3x3(Func in) { Func blurx, blury; Var x, y; blurx(x, y) = (in(x-1, y) + in(x, y) + in(x+1, y))/3; blury(x, y) = (blurx(x, y-1) + blurx(x, y) + blurx(x, y+1))/3; if (get_target().has_gpu_feature()) { blury.gpu_tile(x, y, xi, yi, 32, 8); blurx.compute_at(blury, x); } else { blury.tile(x, y, xi, yi, 256, 32).vectorize(xi, 8).parallel(y); blurx.compute_at(blury, x).store_at(blury, x).vectorize(x, 8); } return blury; } F ltd ltd è oC rltAgveC – C xhA PU G èP sp gve ltai – P Cgve n ltd
  16. 16. è1A )5 – 00 IaO VU N 8 o pn è 5/ C ) A C C – ghN c m I elNo è 2 G – .2 . 2 CC D P C FM 8 N idT I H GCD C ) A D ( DC C 8
  17. 17. è – 1 68 8 2-.81 7. . /2 8 78 1 2-. 2 8 78 76 7 2 7201 8 7.8.7:.-
  18. 18. 7 2 02102 0 .. 2 2 2
  19. 19. è) 0 a d C eg èov – s hrF eg – FH i • A a FH plA – tn ( 8 2 2 21 00 .
  20. 20. 8 98 8 2 1 1 10 f(x, y) = in(x, y) + 1; produce f$1 { let f.y.loop_max = f.y.max let f.y.loop_min = f.y.min let f.y.loop_extent = ((f.y.max + 1) - f.y.min) let f.x.loop_max = f.x.max let f.x.loop_min = f.x.min let f.x.loop_extent = ((f.x.max + 1) - f.x.min) for (f.y, f.y.loop_min, f.y.loop_extent) { for (f.x, f.x.loop_min, f.x.loop_extent) { f(f.x, f.x) = in(f.x, f.y) + 1 } } } 28 9 1 8 1 10 01 .
  21. 21. M & FG EF )CEDCE G CB E GF E F EI . 00 1 ) FH8F GF 0C E B (CHB F B E B GG B B 2 CDG A L G CB 0CCD 2 EG G CB ) )C A B G CB . 2EC E A 2 FF BEC B GCE L G CB
  22. 22. èIRVisitor IRGraphVisitor – R a 0 0 • rt a • IRGraphVisitor d e V C a – n ol s C i V èIRMutator – C rt FV 0 0 – vh V A i V è p VA I gx V 0 0 2 11 0./ 8
  23. 23. è A8 C 2 12 10 1 . 1 2 1 2 1 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); }
  24. 24. èF AI 0 3 3 032 11 0. 8 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); } 0 0 3 CA CA
  25. 25. è 8 4 2 02102 4 0 .. 2 4 2 2 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); } 02C8 4 A8 8F
  26. 26. è F 8 . 5 1 21 5.10 . 5 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); } 1 F C A F
  27. 27. è F SCL A 0 8 6 6 6 80 11 60. 8 6 6 class PrintLoops : public IRVisitor { using IRVisitor::visit; void visit(const For *op) { std::cout << op->name << std::endl; IRVisitor::visit(op); } }; void print_loops(Stmt s) { PrintLoops v; s.accept(v); } 828C 60 8 A C P
  28. 28. 7 2 02102 0 .. 2 2 2
  29. 29. è o A A è hAg C n n e adli – C A Ag – n – n A 8 A F – n A A 2 12 10 1 . 1 2 1 2 1
  30. 30. ) )( ) è bnky ) )( ) – p • p • p • p s p PC • p dg PC – p x p t S • 1 8 1 . 1 S – l o aedv 01 i r P – t crh pv F M è +8 10 801 , 8+ u A 8 98 8 2 1 1 10
  31. 31. 8 Ss 8 . 1 1 c A vu S – • 1 . 48 1 8 S n – • 1 . 48 1 8 c . 1 1 ha S n – • V d V S n vu l t S P i IC Sx a S 310 54 2 48 c i or mpSiC A e F g S P 4 . 8 8 . 48 55 423 1 1 10
  32. 32. è 1 8 0 3 ed è 1 : ! F 1 8 :18 "# I GP – ! 1 : – $% 1 8 0 3 • D a VS – &% 21 .1 1: • D ed D a bc C 8 1 ) 1 8 ( 8 A "# = $ D# ×($# , &# , 1). ≥ 0}
  33. 33. è AC – 1 8 1 8 • !"# = (&, () – 8. 1 1 • *"# = (+, ,) – 1 8 1 8 P • 0 ≤ & < + (⇒ 0 ≤ & ≤ + − 1) • 0 ≤ ( < , (⇒ 0 ≤ ( ≤ , − 1) 8 8 8 23 1 1 10 G I F for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; y<KS; ky++) for (kx=0; x<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; }
  34. 34. è AC – 1 8 1 8 • !"# = (&, (, )&, )() – 8. 1 1 • +"# = (,, -, ./) – 1 8 1 8 P • 0 ≤ & < , (⇒ 0 ≤ ( ≤ , − 1) • 0 ≤ ( < - (⇒ 0 ≤ & ≤ - − 1) • 0 ≤ )& < ., (⇒ 0 ≤ )& ≤ ., − 1) • 0 ≤ )( < .- (⇒ 0 ≤ )( ≤ .- − 1) 8 8 8 23 1 1 10 G I F for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; y<KS; ky++) for (kx=0; x<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; }
  35. 35. è 2 C A D"# C è D"$ C 8 . 4 34 13213 .10 3. 4 3 4 3 %"# = { (, * | 1 0 0 0 0 −1 0 1 0 −1 0 1 0 0 0 0 −1 0 1 −1 ( * H W 1 ≥ 2} %"$ = (, *, 4(, 4* 1 0 0 0 0 0 0 0 −1 0 1 0 0 0 0 −1 0 1 0 0 0 0 0 0 0 −1 0 1 0 0 0 −1 0 0 1 0 0 0 0 0 0 0 −1 0 0 0 1 −1 0 0 0 1 0 0 0 0 0 0 0 −1 0 0 0 −1 ( * 4( 4* H W KS 1 ≥ 2} %"# F for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; y<KS; ky++) for (kx=0; x<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; } O W-1 H-1 y x
  36. 36. è 050 F VC è 050 ! 0 3 ,0. "# A .20 3 1 . 3 $# ("# ) F S è$#' "#' ≪ $#) "#) ⇒ (!+, "#') (!-, "#)) I – (≪: A ) 3 8 3 312 0 0 0 $# ("# ) = Θ# ×("# , 2, 1)4
  37. 37. è F d 6 e è . e N F 6 a – S F g S è T S P AC 2 8 2 6 201 . 3 3 for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; ky<KS; ky++) for (kx=0; kx<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; } !"# $, & = 0, $, 0, &, 0 ) !"* $, &, +$, +& = 0, $, 0, &, 1, +$, 0, +&, 0 ) !"- $, & = 0, $ 0, &, 2 ) T S P T S PF
  38. 38. è3 CFΘ A 85 5 2 32 21 00 . 5 5 7 "#$ %, ' = 0, %, 1, ', 0 + = 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 % ' H W 1 "#. /01 = 0, %, 0, ', 1, 2%, 0, 2' + = 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 % ' 2% 2' H W KS 1
  39. 39. è 0 0 FA IS 3 0 è 0 0 ! 0 3 ,0. "# FA I 0 ..0 . 3 $("# ) M C I 3 8 3 312 0 0 0 $("# ) = F×("# , +# , 1)-
  40. 40. 8 . 3 13213 .10 3. 3 3 !"#$%& ', ), *', *) = ' + *', ) + *) - = 1 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 ' ) *' *) H W KS 1 !"45$6 ', ) = ', ) - = 1 0 0 0 0 0 1 0 0 0 ' ) H W 1 9 for (y=0; y<H; y++) for (x=0; x<W; x++) { S1: s = 0; //S1 for (ky=0; y<KS; ky++) for (kx=0; x<KS; kx++) S2: s += src[y+ky][x+kx] * kernel[ky][kx]; S3: dst[y][x] = s >> t; }
  41. 41. è 8102. 82 0 2 AF C AF è S 8102. 82 0 2 !"# $" – % • 0: 4 0 2 11 0. !"# $" = %×!"($")
  42. 42. ) ( 4 2 11 . 8 !"# $, & = 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 $ & N M 1 !"#, $, & = 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 - 0 0 0 0 0 0 0 0 0 $ & N M 1 for (i=0; i<N; i++) for (j=0; j<M; j++) S1(i, j); for (j=0; j<M; j++) for (i=0; i<N; i++) S1(i, j); . = 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 × F A 0 C
  43. 43. è 2 28 D CVihFh I 2 8 .20 e S I – e F 2 gc è 2 28 ! " 2 281280 , 421 #$%& P dL a A 8 4 2 2 21 #$%& = ()*, ),) .$ .& /$%& ×()$, )&, 1$, 1&, 1)& ≥ ≥ = 4}
  44. 44. C 1 . 48 8 .4 F D – C 1 . 48 8 .4 4 . 8 8 . 48 423 1 1 10 1 1 01 8 310 . for (i=0; i<N; i++) { S: s[i] = 0; for (j=0; j<M; j++) { T: s[i] = s[i] + a[i][j] * x[j]; } } AI !"#$ = 1 0 0 0 0 0 −1 0 0 1 0 0 0 1 0 0 0 0 0 −1 0 1 0 0 0 0 1 0 0 0 0 0 −1 0 1 0 1 −1 0 0 0 0 )" )$ *$ + M 1 ≥ ≥ ≥ ≥ ≥ ≥ = 0
  45. 45. A 0 CD – 0 – 0 8 44 12 0 0 0 080 0 . 4 20 for (i=0; i<N; i++) { S: s[i] = 0; for (j=0; j<M; j++) { T: s[i] = s[i] + a[i][j] * x[j]; } } I F !"#$ = 1 0 0 0 0 0 −1 0 0 1 0 0 0 1 0 0 0 0 0 −1 0 1 0 0 0 0 1 0 0 0 0 0 −1 0 1 0 1 −1 0 0 0 0 )" )$ *$ + M 1 ≥ ≥ ≥ ≥ ≥ ≥ = 0
  46. 46. D. 2 5 8 58 F – 2 5 8 58 – . 2 5 8 58 D. 2 A 002 80 5 8 S P – !: #$ = #& 5 5 8 5 4 2 2 21 '$(& = 1 0 0 0 0 0 −1 0 0 1 0 0 0 1 0 0 0 0 0 −1 0 1 0 0 0 0 1 0 0 0 0 0 −1 0 1 0 1 −1 0 0 0 0 #$ #& ,& - M 1 ≥ ≥ ≥ ≥ ≥ ≥ = 0 for (i=0; i<N; i++) { S: s[i] = 0; for (j=0; j<M; j++) { T: s[i] = s[i] + a[i][j] * x[j]; } } MIC 2 281280A A421
  47. 47. 0 èP 6 4. 4 1 . !"#$ %"#$ DC & L 0 F – D & L 0 F A 2 8 8 6 2 4 201 8 8 . %"#$ = &(" )" − &($ )$ ≫ 0 !"#$ = 1 0 0 0 0 0 −1 0 0 1 0 0 0 1 0 0 0 0 0 −1 0 1 0 0 0 0 1 0 0 0 0 0 −1 0 1 0 1 −1 0 0 0 0 ." .$ /$ 0 M 1 ≥ ≥ ≥ ≥ ≥ ≥ = 0
  48. 48. 8 .74 4 1 21 .10 . 4 4
  49. 49. è yit – 0 F8 2 / ro sm • - 2 2 • 148 C C 4 • / F 44 AA C 4 – x eHagl kc dHe • vb e uM kph – -0uM . 2 F P S y n L I è H – 8 A 8C 4 2: 2 C 2 F8 2 A 2 A 2 8 A A D
  50. 50. èF I A – 0 8 4 20/9.4 /04 HC è . 4 .9 98 9. 44 9 12 90 09 0/ Func matmul(Func a, Func b, int size) { Func c; Var i, j; RDom k(0, size); c(i, j) = 0; c(i, j) += a(k, j) * b(i, k); return c; } for (c$3.s0.j, c$3.s0.j.loop_min, c$3.s0.j.loop_extent) { for (c$3.s0.i, c$3.s0.i.loop_min, c$3.s0.i.loop_extent) { c$3(c$3.s0.i, c$3.s0.j) = 0 } } for (c$3.s1.j, c$3.s1.j.loop_min, c$3.s1.j.loop_extent) { for (c$3.s1.i, c$3.s1.i.loop_min, c$3.s1.i.loop_extent) { for (c$3.s1.r78$x, 0, 100) { c$3(c$3.s1.i, c$3.s1.j) = (c$3(c$3.s1.i, c$3.s1.j) + (a$3(c$3.s1.r78$x, c$3.s1.j)*b$3(c$3.s1.i, c$3.s1.r78$x))) } } } .4 /0 .4 /0
  51. 51. 2 02102 0 .. 2 2 25 Building polyhedral models... Iteration Sets := (c$3.s0.j, c$3.s0.i) Domain := [c$3.s0.j.loop_min, ((c$3.s0.j.loop_min + c$3.s0.j.loop_extent) + -1)], [c$3.s0.i.loop_min, ((c$3.s0.i.loop_min + c$3.s0.i.loop_extent) + -1)] Schedule := (2, c$3.s0.j, 0, c$3.s0.i, 0) Provides := c$3 := (c$3.s0.i, c$3.s0.j) : (c$3.s0.i, c$3.s0.j) Iteration Sets := (c$3.s1.j, c$3.s1.i, c$3.s1.r78$x) Domain := [c$3.s1.j.loop_min, ((c$3.s1.j.loop_min + c$3.s1.j.loop_extent) + -1)], [c$3.s1.i.loop_min, ((c$3.s1.i.loop_min + c$3.s1.i.loop_extent) + -1)], [0, 99] Schedule := (3, c$3.s1.j, 0, c$3.s1.i, 0, c$3.s1.r78$x, 0) Provides := c$3 := (c$3.s1.i, c$3.s1.j) : (c$3.s1.i, c$3.s1.j) Calls := c$3 := (c$3.s1.i, c$3.s1.j) : (c$3.s1.i, c$3.s1.j) a$3 := (c$3.s1.r78$x, c$3.s1.j) : (c$3.s1.r78$x, c$3.s1.j) b$3 := (c$3.s1.i, c$3.s1.r78$x) : (c$3.s1.i, c$3.s1.r78$x)
  52. 52. 0 5 5 5 0 2 11 50. 5 58 for (c$3.s0.j, c$3.s0.j.loop_min, c$3.s0.j.loop_extent) { for (c$3.s0.i, c$3.s0.i.loop_min, c$3.s0.i.loop_extent) { c$3(c$3.s0.i, c$3.s0.j) = 0 } } for (c$3.s1.j, c$3.s1.j.loop_min, c$3.s1.j.loop_extent) { for (c$3.s1.i, c$3.s1.i.loop_min, c$3.s1.i.loop_extent) { for (c$3.s1.r78$x, 0, 100) { c$3(c$3.s1.i, c$3.s1.j) = (c$3(c$3.s1.i, c$3.s1.j) + (a$3(c$3.s1.r78$x, c$3.s1.j)*b$3(c$3.s1.i, c$3.s1.r78$x))) } } } parallel (c$3.s0.j, c$3.s0.j.loop_min, c$3.s0.j.loop_extent) { for (c$3.s0.i, c$3.s0.i.loop_min, c$3.s0.i.loop_extent) { c$3(c$3.s0.i, c$3.s0.j) = 0 } } parallel (c$3.s1.j, c$3.s1.j.loop_min, c$3.s1.j.loop_extent) { for (c$3.s1.i, c$3.s1.i.loop_min, c$3.s1.i.loop_extent) { for (c$3.s1.r78$x, 0, 100) { c$3(c$3.s1.i, c$3.s1.j) = (c$3(c$3.s1.i, c$3.s1.j) + (a$3(c$3.s1.r78$x, c$3.s1.j)*b$3(c$3.s1.i, c$3.s1.r78$x))) } } } 10 10
  53. 53. è 2 5 2 2 8 8 5 5 25 201 8 8 . Building polyhedral models... Iteration Sets := (f.s1.r4$x) Domain := [2, 99] Schedule := (1, f.s1.r4$x, 0) Provides := f := (f.s1.r4$x) : (f.s1.r4$x) Calls := f := ((f.s1.r4$x + -2)) : (f.s1.r4$x) f := ((f.s1.r4$x + -1)) : (f.s1.r4$x) Flow: f(f.s1.r4$x) -> f((f.s1.r4$x + -2)) : (=, -, =) Flow: f(f.s1.r4$x) -> f((f.s1.r4$x + -1)) : (=, -, =) f(x) = x; f(r.x) = f(r.x-2) + f(r.x-1); for (f.s0.x, f.s0.x.loop_min, f.s0.x.loop_extent) { f(f.s0.x) = f.s0.x } for (f.s1.r4$x, 2, 98) { f(f.s1.r4$x) = (f((f.s1.r4$x + -2)) + f((f.s1.r4$x + -1))) } 2. 2. FAC
  54. 54. 8 . 5 3 13213 5.10 3. 5 3 3
  55. 55. è 5401 – odHag il n D . – r L rAp – H rA v R S Ia è 5401 – Rtxs ehC a F eh – 4 8 4 55 42 1 1 10

×