The Battle for Wesnoth  1.17.17+dev
xbrz.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2014 - 2018 by Chris Beck <render787@gmail.com>
3  Part of the Battle for Wesnoth Project https://www.wesnoth.org/
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; either version 2 of the License, or
7  (at your option) any later version.
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY.
10  See the COPYING file for more details.
11 
12  This is a derivative work of the xBRZ component of the HqMAME project
13  by Zenju. The original Licensing statement follows, indented with //
14  The primary changes are, syntactic to make it compile with C99+Boost,
15  and to make it handle an alpha channel in the image in a manner proper
16  for SDL.
17 
18  It is not possible to extend the MAME 'special exception' to all of
19  the Battle for Wesnoth project, however, the special exception is
20  granted for my derivative forms of this work.
21 */
22 
23 // ****************************************************************************
24 // * This file is part of the HqMAME project. It is distributed under *
25 // * GNU General Public License: http://www.gnu.org/licenses/gpl.html *
26 // * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved *
27 // * *
28 // * Additionally and as a special exception, the author gives permission *
29 // * to link the code of this program with the MAME library (or with modified *
30 // * versions of MAME that use the same license as MAME), and distribute *
31 // * linked combinations including the two. You must obey the GNU General *
32 // * Public License in all respects for all of the code used other than MAME. *
33 // * If you modify this file, you may extend this exception to your version *
34 // * of the file, but you are not obligated to do so. If you do not wish to *
35 // * do so, delete this exception statement from your version. *
36 // ****************************************************************************
37 
38 #include "xBRZ/xbrz.hpp"
39 #include "config.hpp"
40 #include <cassert>
41 #include <cmath>
42 #include <algorithm>
43 
44 #include <functional>
45 
46 namespace
47 {
48 template <uint32_t N> inline
49 unsigned char getByte(uint32_t val) { return static_cast<unsigned char>((val >> (8 * N)) & 0xff); }
50 
51 inline unsigned char getRed (uint32_t val) { return getByte<2>(val); }
52 inline unsigned char getGreen(uint32_t val) { return getByte<1>(val); }
53 inline unsigned char getBlue (uint32_t val) { return getByte<0>(val); }
54 
55 template <class T> inline
56 T abs(T value)
57 {
58  //static_assert(std::is_signed<T>::value, "");
59  return value < 0 ? -value : value;
60 }
61 
62 const uint32_t redMask = 0xff0000;
63 const uint32_t greenMask = 0x00ff00;
64 const uint32_t blueMask = 0x0000ff;
65 const uint32_t alphaMask = 0xff000000;
66 
67 template <unsigned int N, unsigned int M> inline
68 void alphaBlend(uint32_t& dst, uint32_t col) //blend color over destination with opacity N / M
69 {
70  //static_assert(N < 256, "possible overflow of (col & redMask) * N");
71  //static_assert(M < 256, "possible overflow of (col & redMask ) * N + (dst & redMask ) * (M - N)");
72  //static_assert(0 < N && N < M, "");
73 
74  //Note: I had to change this to perform alpha compositing -- xbrz assumes there is no alpha channel (and sets it to zero when it blends), our
75  //sprites have alpha however.
76  uint32_t col_alpha = col >> 24; // & with alphaMask is unnecessary
77 
78  if (!col_alpha) return;
79 
80  uint32_t dst_alpha = dst >> 24;
81 
82  if (!dst_alpha) {
83  dst = col;
84  return;
85  }
86 
87  //uint32_t out_alpha = 0xffff - (((0xff - col_alpha)* (0xff - dst_alpha)) >> 8);
88 
89  //TODO: Figure out if there's some way to combine the multiplicative approached with the "averaged alpha", and to feedback the
90  // alpha into the colors, without making it all very slow. Current approach looks okay, but I think shadows could be better,
91  // also I think some units are getting 'black outlines' now because their black pixels with 0 alpha (background) are getting
92  // averaged with their foreground.
93 
94  dst = (redMask & ((col & redMask ) * N + (dst & redMask ) * (M - N)) / M) | //this works because 8 upper bits are free
95  (greenMask & ((col & greenMask ) * N + (dst & greenMask ) * (M - N)) / M) |
96  (blueMask & ((col & blueMask ) * N + (dst & blueMask ) * (M - N)) / M) |
97  (alphaMask & (((col_alpha * N + dst_alpha * (M - N)) / M) << 24)); // need to downshift and upshift because of overflow
98 
99 /*
100  if (!(dst >> 24)) {
101  dst = (col & (redMask | greenMask | blueMask)) |
102  (((((col >> 24) * N) / M) << 24) & alphaMask);
103  return;
104  }
105 */
106 /*
107 
108  double src_alpha = static_cast<double>(col >> 24) / 256; //xbrz basically assumes there is no alpha channel, our sprites have alpha however.
109  double dst_alpha = static_cast<double>(dst >> 24) / 256;
110 
111  src_alpha = 1 - ((1 - src_alpha) * (1 - (N/M))); //apply blending arguments
112 
113  // For discussion of alpha compositing, see here: http://en.wikipedia.org/wiki/Alpha_compositing#Analytical_derivation_of_the_over_operator
114  double out_alpha = 1 - ((1- src_alpha) * (1-dst_alpha));
115 
116  double src_coeff = src_alpha / out_alpha;
117 
118  double dst_coeff = dst_alpha / out_alpha;
119 
120 
121 
122  uint32_t red_val = (((col & redMask ) >> 16) * src_coeff) + (((dst & redMask ) >> 16) * dst_coeff);
123 
124  uint32_t grn_val = (((col & greenMask) >> 8 ) * src_coeff) + (((dst & greenMask) >> 8 ) * dst_coeff);
125 
126  uint32_t blu_val = (((col & blueMask ) >> 0 ) * src_coeff) + (((dst & blueMask ) >> 0 ) * dst_coeff);
127 
128 
129 
130  dst = (red_val << 16) |
131  (grn_val << 8 ) |
132  (blu_val << 0) |
133  (alphaMask & (static_cast<uint32_t>(256 * out_alpha) << 24));
134 // 0xff000000; //adding this to try to get rid of black outlines, there are code comments that say 0 is transparent for SDL, not 255 -- iceiceice
135 */
136 }
137 
138 
139 //inline
140 //double fastSqrt(double n)
141 //{
142 // __asm //speeds up xBRZ by about 9% compared to std::sqrt
143 // {
144 // fld n
145 // fsqrt
146 // }
147 //}
148 //
149 
150 #if 0
151 inline
152 uint32_t alphaBlend2(uint32_t pix1, uint32_t pix2, double alpha)
153 {
154  return (redMask & static_cast<uint32_t>((pix1 & redMask ) * alpha + (pix2 & redMask ) * (1 - alpha))) |
155  (greenMask & static_cast<uint32_t>((pix1 & greenMask) * alpha + (pix2 & greenMask) * (1 - alpha))) |
156  (blueMask & static_cast<uint32_t>((pix1 & blueMask ) * alpha + (pix2 & blueMask ) * (1 - alpha)));
157 }
158 #endif
159 
160 uint32_t* byteAdvance( uint32_t* ptr, int bytes) { return reinterpret_cast< uint32_t*>(reinterpret_cast< char*>(ptr) + bytes); }
161 const uint32_t* byteAdvance(const uint32_t* ptr, int bytes) { return reinterpret_cast<const uint32_t*>(reinterpret_cast<const char*>(ptr) + bytes); }
162 
163 
164 //fill block with the given color
165 inline
166 void fillBlock(uint32_t* trg, int pitch, uint32_t col, int blockWidth, int blockHeight)
167 {
168  //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
169  // std::fill(trg, trg + blockWidth, col);
170 
171  for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
172  for (int x = 0; x < blockWidth; ++x)
173  trg[x] = col;
174 }
175 
176 inline
177 void fillBlock(uint32_t* trg, int pitch, uint32_t col, int n) { fillBlock(trg, pitch, col, n, n); }
178 
179 
180 #ifdef _MSC_VER
181 #define FORCE_INLINE __forceinline
182 #elif defined __GNUC__
183 #define FORCE_INLINE __attribute__((always_inline)) inline
184 #else
185 #define FORCE_INLINE inline
186 #endif
187 
188 
189 enum RotationDegree //clock-wise
190 {
191  ROT_0,
192  ROT_90,
193  ROT_180,
194  ROT_270
195 };
196 
197 //calculate input matrix coordinates after rotation at compile time
198 template <RotationDegree rotDeg, size_t I, size_t J, size_t N>
199 struct MatrixRotation;
200 
201 template <size_t I, size_t J, size_t N>
202 struct MatrixRotation<ROT_0, I, J, N>
203 {
204  static const size_t I_old = I;
205  static const size_t J_old = J;
206 };
207 
208 template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix
209 struct MatrixRotation
210 {
211  static const size_t I_old = N - 1 - MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::J_old; //old coordinates before rotation!
212  static const size_t J_old = MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::I_old; //
213 };
214 
215 
216 template <size_t N, RotationDegree rotDeg>
217 class OutputMatrix
218 {
219 public:
220  OutputMatrix(uint32_t* out, int outWidth) : //access matrix area, top-left at position "out" for image with given width
221  out_(out),
222  outWidth_(outWidth) {}
223 
224  template <size_t I, size_t J>
225  uint32_t& ref() const
226  {
227  static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old;
228  static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old;
229  return *(out_ + J_old + I_old * outWidth_);
230  }
231 
232 private:
233  uint32_t* out_;
234  const int outWidth_;
235 };
236 
237 
238 template <class T> inline
239 T square(T value) { return value * value; }
240 
241 
242 /*
243 inline
244 void rgbtoLuv(uint32_t c, double& L, double& u, double& v)
245 {
246  //http://www.easyrgb.com/index.php?X=MATH&H=02#text2
247  double r = getRed (c) / 255.0;
248  double g = getGreen(c) / 255.0;
249  double b = getBlue (c) / 255.0;
250 
251  if ( r > 0.04045 )
252  r = std::pow(( ( r + 0.055 ) / 1.055 ) , 2.4);
253  else
254  r /= 12.92;
255  if ( g > 0.04045 )
256  g = std::pow(( ( g + 0.055 ) / 1.055 ) , 2.4);
257  else
258  g /= 12.92;
259  if ( b > 0.04045 )
260  b = std::pow(( ( b + 0.055 ) / 1.055 ) , 2.4);
261  else
262  b /= 12.92;
263 
264  r *= 100;
265  g *= 100;
266  b *= 100;
267 
268  double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;
269  double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;
270  double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
271  //---------------------
272  double var_U = 4 * x / ( x + 15 * y + 3 * z );
273  double var_V = 9 * y / ( x + 15 * y + 3 * z );
274  double var_Y = y / 100;
275 
276  if ( var_Y > 0.008856 ) var_Y = std::pow(var_Y , 1.0/3 );
277  else var_Y = 7.787 * var_Y + 16.0 / 116;
278 
279  const double ref_X = 95.047; //Observer= 2 (degrees), Illuminant= D65
280  const double ref_Y = 100.000;
281  const double ref_Z = 108.883;
282 
283  const double ref_U = ( 4 * ref_X ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );
284  const double ref_V = ( 9 * ref_Y ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );
285 
286  L = ( 116 * var_Y ) - 16;
287  u = 13 * L * ( var_U - ref_U );
288  v = 13 * L * ( var_V - ref_V );
289 }
290 */
291 
292 #if 0
293 inline
294 void rgbtoLab(uint32_t c, unsigned char& L, signed char& A, signed char& B)
295 {
296  //code: http://www.easyrgb.com/index.php?X=MATH
297  //test: http://www.workwithcolor.com/color-converter-01.htm
298  //------RGB to XYZ------
299  double r = getRed (c) / 255.0;
300  double g = getGreen(c) / 255.0;
301  double b = getBlue (c) / 255.0;
302 
303  r = r > 0.04045 ? std::pow(( r + 0.055 ) / 1.055, 2.4) : r / 12.92;
304  r = g > 0.04045 ? std::pow(( g + 0.055 ) / 1.055, 2.4) : g / 12.92;
305  r = b > 0.04045 ? std::pow(( b + 0.055 ) / 1.055, 2.4) : b / 12.92;
306 
307  r *= 100;
308  g *= 100;
309  b *= 100;
310 
311  double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;
312  double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;
313  double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
314  //------XYZ to Lab------
315  const double refX = 95.047; //
316  const double refY = 100.000; //Observer= 2 (degrees), Illuminant= D65
317  const double refZ = 108.883; //
318  double var_X = x / refX;
319  double var_Y = y / refY;
320  double var_Z = z / refZ;
321 
322  var_X = var_X > 0.008856 ? std::pow(var_X, 1.0 / 3) : 7.787 * var_X + 4.0 / 29;
323  var_Y = var_Y > 0.008856 ? std::pow(var_Y, 1.0 / 3) : 7.787 * var_Y + 4.0 / 29;
324  var_Z = var_Z > 0.008856 ? std::pow(var_Z, 1.0 / 3) : 7.787 * var_Z + 4.0 / 29;
325 
326  L = static_cast<unsigned char>(116 * var_Y - 16);
327  A = static_cast< signed char>(500 * (var_X - var_Y));
328  B = static_cast< signed char>(200 * (var_Y - var_Z));
329 };
330 #endif
331 
332 #if 0
333 inline
334 double distLAB(uint32_t pix1, uint32_t pix2)
335 {
336  unsigned char L1 = 0; //[0, 100]
337  signed char a1 = 0; //[-128, 127]
338  signed char b1 = 0; //[-128, 127]
339  rgbtoLab(pix1, L1, a1, b1);
340 
341  unsigned char L2 = 0;
342  signed char a2 = 0;
343  signed char b2 = 0;
344  rgbtoLab(pix2, L2, a2, b2);
345 
346  //-----------------------------
347  //http://www.easyrgb.com/index.php?X=DELT
348 
349  //Delta E/CIE76
350  return std::sqrt(square(1.0 * L1 - L2) +
351  square(1.0 * a1 - a2) +
352  square(1.0 * b1 - b2));
353 }
354 #endif
355 
356 /*
357 inline
358 void rgbtoHsl(uint32_t c, double& h, double& s, double& l)
359 {
360  //http://www.easyrgb.com/index.php?X=MATH&H=18#text18
361  const int r = getRed (c);
362  const int g = getGreen(c);
363  const int b = getBlue (c);
364 
365  const int varMin = numeric::min(r, g, b);
366  const int varMax = numeric::max(r, g, b);
367  const int delMax = varMax - varMin;
368 
369  l = (varMax + varMin) / 2.0 / 255.0;
370 
371  if (delMax == 0) //gray, no chroma...
372  {
373  h = 0;
374  s = 0;
375  }
376  else
377  {
378  s = l < 0.5 ?
379  delMax / (1.0 * varMax + varMin) :
380  delMax / (2.0 * 255 - varMax - varMin);
381 
382  double delR = ((varMax - r) / 6.0 + delMax / 2.0) / delMax;
383  double delG = ((varMax - g) / 6.0 + delMax / 2.0) / delMax;
384  double delB = ((varMax - b) / 6.0 + delMax / 2.0) / delMax;
385 
386  if (r == varMax)
387  h = delB - delG;
388  else if (g == varMax)
389  h = 1 / 3.0 + delR - delB;
390  else if (b == varMax)
391  h = 2 / 3.0 + delG - delR;
392 
393  if (h < 0)
394  h += 1;
395  if (h > 1)
396  h -= 1;
397  }
398 }
399 
400 inline
401 double distHSL(uint32_t pix1, uint32_t pix2, double lightningWeight)
402 {
403  double h1 = 0;
404  double s1 = 0;
405  double l1 = 0;
406  rgbtoHsl(pix1, h1, s1, l1);
407  double h2 = 0;
408  double s2 = 0;
409  double l2 = 0;
410  rgbtoHsl(pix2, h2, s2, l2);
411 
412  //HSL is in cylindric coordinatates where L represents height, S radius, H angle,
413  //however we interpret the cylinder as a bi-conic solid with top/bottom radius 0, middle radius 1
414  assert(0 <= h1 && h1 <= 1);
415  assert(0 <= h2 && h2 <= 1);
416 
417  double r1 = l1 < 0.5 ?
418  l1 * 2 :
419  2 - l1 * 2;
420 
421  double x1 = r1 * s1 * std::cos(h1 * 2 * numeric::pi);
422  double y1 = r1 * s1 * std::sin(h1 * 2 * numeric::pi);
423  double z1 = l1;
424 
425  double r2 = l2 < 0.5 ?
426  l2 * 2 :
427  2 - l2 * 2;
428 
429  double x2 = r2 * s2 * std::cos(h2 * 2 * numeric::pi);
430  double y2 = r2 * s2 * std::sin(h2 * 2 * numeric::pi);
431  double z2 = l2;
432 
433  return 255 * std::sqrt(square(x1 - x2) + square(y1 - y2) + square(lightningWeight * (z1 - z2)));
434 }
435 */
436 
437 #if 0
438 inline
439 double distRGB(uint32_t pix1, uint32_t pix2)
440 {
441  const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
442  const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
443  const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
444 
445  //euklidean RGB distance
446  return std::sqrt(square(r_diff) + square(g_diff) + square(b_diff));
447 }
448 #endif
449 
450 #if 0
451 inline
452 double distNonLinearRGB(uint32_t pix1, uint32_t pix2)
453 {
454  //non-linear rgb: http://www.compuphase.com/cmetric.htm
455  const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
456  const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
457  const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
458 
459  const double r_avg = (static_cast<double>(getRed(pix1)) + getRed(pix2)) / 2;
460  return std::sqrt((2 + r_avg / 255) * square(r_diff) + 4 * square(g_diff) + (2 + (255 - r_avg) / 255) * square(b_diff));
461 }
462 #endif
463 
464 inline
465 double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)
466 {
467  //http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
468  //YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!
469  const int r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2); //we may delay division by 255 to after matrix multiplication
470  const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //
471  const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2); //subtraction for int is noticeable faster than for double!
472 
473  const double k_b = 0.0722; //ITU-R BT.709 conversion
474  const double k_r = 0.2126; //
475  const double k_g = 1 - k_b - k_r;
476 
477  const double scale_b = 0.5 / (1 - k_b);
478  const double scale_r = 0.5 / (1 - k_r);
479 
480  const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
481  const double c_b = scale_b * (b_diff - y);
482  const double c_r = scale_r * (r_diff - y);
483 
484  //we skip division by 255 to have similar range like other distance functions
485  return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r));
486 }
487 
488 #if 0
489 inline
490 double distYUV(uint32_t pix1, uint32_t pix2, double luminanceWeight)
491 {
492  //perf: it's not worthwhile to buffer the YUV-conversion, the direct code is faster by ~ 6%
493  //since RGB -> YUV conversion is essentially a matrix multiplication, we can calculate the RGB diff before the conversion (distributive property)
494  const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
495  const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
496  const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
497 
498  //http://en.wikipedia.org/wiki/YUV#Conversion_to.2Ffrom_RGB
499  const double w_b = 0.114;
500  const double w_r = 0.299;
501  const double w_g = 1 - w_r - w_b;
502 
503  const double u_max = 0.436;
504  const double v_max = 0.615;
505 
506  const double scale_u = u_max / (1 - w_b);
507  const double scale_v = v_max / (1 - w_r);
508 
509  double y = w_r * r_diff + w_g * g_diff + w_b * b_diff;//value range: 255 * [-1, 1]
510  double u = scale_u * (b_diff - y); //value range: 255 * 2 * u_max * [-1, 1]
511  double v = scale_v * (r_diff - y); //value range: 255 * 2 * v_max * [-1, 1]
512 
513 #ifndef NDEBUG
514  const double eps = 0.5;
515 #endif
516  assert(std::abs(y) <= 255 + eps);
517  assert(std::abs(u) <= 255 * 2 * u_max + eps);
518  assert(std::abs(v) <= 255 * 2 * v_max + eps);
519 
520  return std::sqrt(square(luminanceWeight * y) + square(u) + square(v));
521 }
522 #endif
523 
524 inline
525 double colorDist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
526 {
527  if (pix1 == pix2) //about 8% perf boost
528  return 0;
529 
530  //return distHSL(pix1, pix2, luminanceWeight);
531  //return distRGB(pix1, pix2);
532  //return distLAB(pix1, pix2);
533  //return distNonLinearRGB(pix1, pix2);
534  //return distYUV(pix1, pix2, luminanceWeight);
535 
536  return distYCbCr(pix1, pix2, luminanceWeight);
537 }
538 
539 
540 enum BlendType
541 {
542  BLEND_NONE = 0,
543  BLEND_NORMAL, //a normal indication to blend
544  BLEND_DOMINANT, //a strong indication to blend
545  //attention: BlendType must fit into the value range of 2 bit!!!
546 };
547 
548 struct BlendResult
549 {
550  BlendType
551  /**/blend_f, blend_g,
552  /**/blend_j, blend_k;
553 
554  BlendResult() : blend_f(), blend_g(), blend_j(), blend_k() {}
555 };
556 
557 
558 struct Kernel_4x4 //kernel for preprocessing step
559 {
560  uint32_t
561  /**/a=0, b=0, c=0, d=0,
562  /**/e=0, f=0, g=0, h=0,
563  /**/i=0, j=0, k=0, l=0,
564  /**/m=0, n=0, o=0, p=0;
565 
566  Kernel_4x4() {}
567 };
568 
569 /*
570 input kernel area naming convention:
571 -----------------
572 | A | B | C | D |
573 ----|---|---|---|
574 | E | F | G | H | //evaluate the four corners between F, G, J, K
575 ----|---|---|---| //input pixel is at position F
576 | I | J | K | L |
577 ----|---|---|---|
578 | M | N | O | P |
579 -----------------
580 */
581 FORCE_INLINE //detect blend direction
582 BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg) //result: F, G, J, K corners of "GradientType"
583 {
584  BlendResult result;
585 
586  if ((ker.f == ker.g &&
587  ker.j == ker.k) ||
588  (ker.f == ker.j &&
589  ker.g == ker.k))
590  return result;
591 
592  auto dist = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
593 
594  const int weight = 4;
595  double jg = dist(ker.i, ker.f) + dist(ker.f, ker.c) + dist(ker.n, ker.k) + dist(ker.k, ker.h) + weight * dist(ker.j, ker.g);
596  double fk = dist(ker.e, ker.j) + dist(ker.j, ker.o) + dist(ker.b, ker.g) + dist(ker.g, ker.l) + weight * dist(ker.f, ker.k);
597 
598  if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
599  {
600  const bool dominantGradient = cfg.dominantDirectionThreshold * jg < fk;
601  if (ker.f != ker.g && ker.f != ker.j)
602  result.blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
603 
604  if (ker.k != ker.j && ker.k != ker.g)
605  result.blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
606  }
607  else if (fk < jg)
608  {
609  const bool dominantGradient = cfg.dominantDirectionThreshold * fk < jg;
610  if (ker.j != ker.f && ker.j != ker.k)
611  result.blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
612 
613  if (ker.g != ker.f && ker.g != ker.k)
614  result.blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
615  }
616  return result;
617 }
618 
619 struct Kernel_3x3
620 {
621  uint32_t
622  /**/a=0, b=0, c=0,
623  /**/d=0, e=0, f=0,
624  /**/g=0, h=0, i=0;
625 
626  Kernel_3x3() {}
627 };
628 
629 #define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }
630 //we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token
634 #undef DEF_GETTER
635 
636 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_90>(const Kernel_3x3& ker) { return ker.y; }
637 /*DEF_GETTER(a, g)*/ DEF_GETTER(b, d) DEF_GETTER(c, a)
640 #undef DEF_GETTER
641 
642 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_180>(const Kernel_3x3& ker) { return ker.y; }
643 /*DEF_GETTER(a, i)*/ DEF_GETTER(b, h) DEF_GETTER(c, g)
646 #undef DEF_GETTER
647 
648 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; }
649 /*DEF_GETTER(a, c)*/ DEF_GETTER(b, f) DEF_GETTER(c, i)
652 #undef DEF_GETTER
653 
654 //compress four blend types into a single byte
655 //inline BlendType getTopL (unsigned char b) { return static_cast<BlendType>(0x3 & b); }
656 inline BlendType getTopR (unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 2)); }
657 inline BlendType getBottomR(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 4)); }
658 inline BlendType getBottomL(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 6)); }
659 
660 inline void setTopL (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing!
661 inline void setTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); }
662 inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); }
663 inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); }
664 
665 inline bool blendingNeeded(unsigned char b) { return b != 0; }
666 
667 template <RotationDegree rotDeg> inline
668 unsigned char rotateBlendInfo(unsigned char b) { return b; }
669 template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; }
670 template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; }
671 template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; }
672 
673 
674 #ifndef NDEBUG
675 int debugPixelX = -1;
676 int debugPixelY = 84;
677 bool breakIntoDebugger = false;
678 #endif
679 
680 /*
681 input kernel area naming convention:
682 -------------
683 | A | B | C |
684 ----|---|---|
685 | D | E | F | //input pixel is at position E
686 ----|---|---|
687 | G | H | I |
688 -------------
689 */
690 template <class Scaler, RotationDegree rotDeg>
691 FORCE_INLINE //perf: quite worth it!
692 void scalePixel(const Kernel_3x3& ker,
693  uint32_t* target, int trgWidth,
694  unsigned char blendInfo, //result of preprocessing all four corners of pixel "e"
695  const xbrz::ScalerCfg& cfg)
696 {
697 #define a get_a<rotDeg>(ker)
698 #define b get_b<rotDeg>(ker)
699 #define c get_c<rotDeg>(ker)
700 #define d get_d<rotDeg>(ker)
701 #define e get_e<rotDeg>(ker)
702 #define f get_f<rotDeg>(ker)
703 #define g get_g<rotDeg>(ker)
704 #define h get_h<rotDeg>(ker)
705 #define i get_i<rotDeg>(ker)
706 
707 #ifndef NDEBUG
708  (void) breakIntoDebugger;
709  //if (breakIntoDebugger)
710  // __debugbreak(); //__asm int 3;
711 #endif
712 
713  const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo);
714 
715  if (getBottomR(blend) >= BLEND_NORMAL)
716  {
717  auto eq = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_) < cfg.equalColorTolerance_; };
718 
719  auto dist = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
720 
721  const uint32_t px = dist(e, f) <= dist(e, h) ? f : h; //choose most similar color
722 
723  OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth);
724 
725  bool doLineBlend = true;
726  {
727  if (getBottomR(blend) >= BLEND_DOMINANT)
728  doLineBlend = true;
729 
730  //make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
731  else if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90 (degrees) corners
732  doLineBlend = false;
733  else if (getBottomL(blend) != BLEND_NONE && !eq(e, c))
734  doLineBlend = false;
735 
736  //no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
737  else if (eq(g, h) && eq(h , i) && eq(i, f) && eq(f, c) && !eq(e, i))
738  doLineBlend = false;
739 
740  else doLineBlend = true;
741  }
742 
743  if (doLineBlend)
744  {
745  const double fg = dist(f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
746  const double hc = dist(h, c); //
747 
748  const bool haveShallowLine = cfg.steepDirectionThreshold * fg <= hc && e != g && d != g;
749  const bool haveSteepLine = cfg.steepDirectionThreshold * hc <= fg && e != c && b != c;
750 
751  if (haveShallowLine)
752  {
753  if (haveSteepLine)
754  Scaler::blendLineSteepAndShallow(px, out);
755  else
756  Scaler::blendLineShallow(px, out);
757  }
758  else
759  {
760  if (haveSteepLine)
761  Scaler::blendLineSteep(px, out);
762  else
763  Scaler::blendLineDiagonal(px,out);
764  }
765  }
766  else
767  Scaler::blendCorner(px, out);
768  }
769 
770 #undef a
771 #undef b
772 #undef c
773 #undef d
774 #undef e
775 #undef f
776 #undef g
777 #undef h
778 #undef i
779 }
780 
781 
782 template <class Scaler> //scaler policy: see "Scaler2x" reference implementation
783 void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
784 {
785  yFirst = std::max(yFirst, 0);
786  yLast = std::min(yLast, srcHeight);
787  if (yFirst >= yLast || srcWidth <= 0)
788  return;
789 
790  const int trgWidth = srcWidth * Scaler::scale;
791 
792  //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of
793  //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing
794  const int bufferSize = srcWidth;
795  unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize;
796  std::fill(preProcBuffer, preProcBuffer + bufferSize, static_cast<unsigned char>(0));
797  //static_assert(BLEND_NONE == 0, "");
798 
799  //initialize preprocessing buffer for first row: detect upper left and right corner blending
800  //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
801  if (yFirst > 0)
802  {
803  const int y = yFirst - 1;
804 
805  const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
806  const uint32_t* s_0 = src + srcWidth * y; //center line
807  const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
808  const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
809 
810  for (int x = 0; x < srcWidth; ++x)
811  {
812  const int x_m1 = std::max(x - 1, 0);
813  const int x_p1 = std::min(x + 1, srcWidth - 1);
814  const int x_p2 = std::min(x + 2, srcWidth - 1);
815 
816  Kernel_4x4 ker; //perf: initialization is negligible
817  ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
818  ker.b = s_m1[x];
819  ker.c = s_m1[x_p1];
820  ker.d = s_m1[x_p2];
821 
822  ker.e = s_0[x_m1];
823  ker.f = s_0[x];
824  ker.g = s_0[x_p1];
825  ker.h = s_0[x_p2];
826 
827  ker.i = s_p1[x_m1];
828  ker.j = s_p1[x];
829  ker.k = s_p1[x_p1];
830  ker.l = s_p1[x_p2];
831 
832  ker.m = s_p2[x_m1];
833  ker.n = s_p2[x];
834  ker.o = s_p2[x_p1];
835  ker.p = s_p2[x_p2];
836 
837  const BlendResult res = preProcessCorners(ker, cfg);
838  /*
839  preprocessing blend result:
840  ---------
841  | F | G | //evaluate corner between F, G, J, K
842  ----|---| //input pixel is at position F
843  | J | K |
844  ---------
845  */
846  setTopR(preProcBuffer[x], res.blend_j);
847 
848  if (x + 1 < srcWidth)
849  setTopL(preProcBuffer[x + 1], res.blend_k);
850  }
851  }
852  //------------------------------------------------------------------------------------
853 
854  for (int y = yFirst; y < yLast; ++y)
855  {
856  uint32_t* out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access
857 
858  const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
859  const uint32_t* s_0 = src + srcWidth * y; //center line
860  const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
861  const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
862 
863  unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
864 
865  for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
866  {
867 #ifndef NDEBUG
868  breakIntoDebugger = debugPixelX == x && debugPixelY == y;
869 #endif
870  //all those bounds checks have only insignificant impact on performance!
871  const int x_m1 = std::max(x - 1, 0); //perf: prefer array indexing to additional pointers!
872  const int x_p1 = std::min(x + 1, srcWidth - 1);
873  const int x_p2 = std::min(x + 2, srcWidth - 1);
874 
875  //evaluate the four corners on bottom-right of current pixel
876  unsigned char blend_xy = 0; //for current (x, y) position
877  {
878  Kernel_4x4 ker; //perf: initialization is negligible
879  ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
880  ker.b = s_m1[x];
881  ker.c = s_m1[x_p1];
882  ker.d = s_m1[x_p2];
883 
884  ker.e = s_0[x_m1];
885  ker.f = s_0[x];
886  ker.g = s_0[x_p1];
887  ker.h = s_0[x_p2];
888 
889  ker.i = s_p1[x_m1];
890  ker.j = s_p1[x];
891  ker.k = s_p1[x_p1];
892  ker.l = s_p1[x_p2];
893 
894  ker.m = s_p2[x_m1];
895  ker.n = s_p2[x];
896  ker.o = s_p2[x_p1];
897  ker.p = s_p2[x_p2];
898 
899  const BlendResult res = preProcessCorners(ker, cfg);
900  /*
901  preprocessing blend result:
902  ---------
903  | F | G | // evaluate corner between F, G, J, K
904  ----|---| // current input pixel is at position F
905  | J | K |
906  ---------
907  */
908  blend_xy = preProcBuffer[x];
909  setBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
910 
911  setTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
912  preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row
913 
914  blend_xy1 = 0;
915  setTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
916 
917  if (x + 1 < srcWidth) //set 3rd known corner for (x + 1, y)
918  setBottomL(preProcBuffer[x + 1], res.blend_g);
919  }
920 
921  //fill block of size scale * scale with the given color
922  fillBlock(out, trgWidth * sizeof(uint32_t), s_0[x], Scaler::scale); //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
923 
924  //blend four corners of current pixel
925  if (blendingNeeded(blend_xy)) //good 20% perf-improvement
926  {
927  Kernel_3x3 ker; //perf: initialization is negligible
928 
929  ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
930  ker.b = s_m1[x];
931  ker.c = s_m1[x_p1];
932 
933  ker.d = s_0[x_m1];
934  ker.e = s_0[x];
935  ker.f = s_0[x_p1];
936 
937  ker.g = s_p1[x_m1];
938  ker.h = s_p1[x];
939  ker.i = s_p1[x_p1];
940 
941  scalePixel<Scaler, ROT_0 >(ker, out, trgWidth, blend_xy, cfg);
942  scalePixel<Scaler, ROT_90 >(ker, out, trgWidth, blend_xy, cfg);
943  scalePixel<Scaler, ROT_180>(ker, out, trgWidth, blend_xy, cfg);
944  scalePixel<Scaler, ROT_270>(ker, out, trgWidth, blend_xy, cfg);
945  }
946  }
947  }
948 }
949 
950 
951 struct Scaler2x
952 {
953  static const int scale = 2;
954 
955  template <class OutputMatrix>
956  static void blendLineShallow(uint32_t col, OutputMatrix& out)
957  {
958  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
959  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
960  }
961 
962  template <class OutputMatrix>
963  static void blendLineSteep(uint32_t col, OutputMatrix& out)
964  {
965  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
966  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
967  }
968 
969  template <class OutputMatrix>
970  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
971  {
972  alphaBlend<1, 4>(out.template ref<1, 0>(), col);
973  alphaBlend<1, 4>(out.template ref<0, 1>(), col);
974  alphaBlend<5, 6>(out.template ref<1, 1>(), col); //[!] fixes 7/8 used in xBR
975  }
976 
977  template <class OutputMatrix>
978  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
979  {
980  alphaBlend<1, 2>(out.template ref<1, 1>(), col);
981  }
982 
983  template <class OutputMatrix>
984  static void blendCorner(uint32_t col, OutputMatrix& out)
985  {
986  //model a round corner
987  alphaBlend<21, 100>(out.template ref<1, 1>(), col); //exact: 1 - pi/4 = 0.2146018366
988  }
989 };
990 
991 
992 struct Scaler3x
993 {
994  static const int scale = 3;
995 
996  template <class OutputMatrix>
997  static void blendLineShallow(uint32_t col, OutputMatrix& out)
998  {
999  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1000  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1001 
1002  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1003  out.template ref<scale - 1, 2>() = col;
1004  }
1005 
1006  template <class OutputMatrix>
1007  static void blendLineSteep(uint32_t col, OutputMatrix& out)
1008  {
1009  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1010  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1011 
1012  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1013  out.template ref<2, scale - 1>() = col;
1014  }
1015 
1016  template <class OutputMatrix>
1017  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
1018  {
1019  alphaBlend<1, 4>(out.template ref<2, 0>(), col);
1020  alphaBlend<1, 4>(out.template ref<0, 2>(), col);
1021  alphaBlend<3, 4>(out.template ref<2, 1>(), col);
1022  alphaBlend<3, 4>(out.template ref<1, 2>(), col);
1023  out.template ref<2, 2>() = col;
1024  }
1025 
1026  template <class OutputMatrix>
1027  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
1028  {
1029  alphaBlend<1, 8>(out.template ref<1, 2>(), col);
1030  alphaBlend<1, 8>(out.template ref<2, 1>(), col);
1031  alphaBlend<7, 8>(out.template ref<2, 2>(), col);
1032  }
1033 
1034  template <class OutputMatrix>
1035  static void blendCorner(uint32_t col, OutputMatrix& out)
1036  {
1037  //model a round corner
1038  alphaBlend<45, 100>(out.template ref<2, 2>(), col); //exact: 0.4545939598
1039  //alphaBlend<14, 1000>(out.template ref<2, 1>(), col); //0.01413008627 -> negligible
1040  //alphaBlend<14, 1000>(out.template ref<1, 2>(), col); //0.01413008627
1041  }
1042 };
1043 
1044 
1045 struct Scaler4x
1046 {
1047  static const int scale = 4;
1048 
1049  template <class OutputMatrix>
1050  static void blendLineShallow(uint32_t col, OutputMatrix& out)
1051  {
1052  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1053  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1054 
1055  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1056  alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);
1057 
1058  out.template ref<scale - 1, 2>() = col;
1059  out.template ref<scale - 1, 3>() = col;
1060  }
1061 
1062  template <class OutputMatrix>
1063  static void blendLineSteep(uint32_t col, OutputMatrix& out)
1064  {
1065  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1066  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1067 
1068  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1069  alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);
1070 
1071  out.template ref<2, scale - 1>() = col;
1072  out.template ref<3, scale - 1>() = col;
1073  }
1074 
1075  template <class OutputMatrix>
1076  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
1077  {
1078  alphaBlend<3, 4>(out.template ref<3, 1>(), col);
1079  alphaBlend<3, 4>(out.template ref<1, 3>(), col);
1080  alphaBlend<1, 4>(out.template ref<3, 0>(), col);
1081  alphaBlend<1, 4>(out.template ref<0, 3>(), col);
1082  alphaBlend<1, 3>(out.template ref<2, 2>(), col); //[!] fixes 1/4 used in xBR
1083  out.template ref<3, 3>() = out.template ref<3, 2>() = out.template ref<2, 3>() = col;
1084  }
1085 
1086  template <class OutputMatrix>
1087  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
1088  {
1089  alphaBlend<1, 2>(out.template ref<scale - 1, scale / 2 >(), col);
1090  alphaBlend<1, 2>(out.template ref<scale - 2, scale / 2 + 1>(), col);
1091  out.template ref<scale - 1, scale - 1>() = col;
1092  }
1093 
1094  template <class OutputMatrix>
1095  static void blendCorner(uint32_t col, OutputMatrix& out)
1096  {
1097  //model a round corner
1098  alphaBlend<68, 100>(out.template ref<3, 3>(), col); //exact: 0.6848532563
1099  alphaBlend< 9, 100>(out.template ref<3, 2>(), col); //0.08677704501
1100  alphaBlend< 9, 100>(out.template ref<2, 3>(), col); //0.08677704501
1101  }
1102 };
1103 
1104 
1105 struct Scaler5x
1106 {
1107  static const int scale = 5;
1108 
1109  template <class OutputMatrix>
1110  static void blendLineShallow(uint32_t col, OutputMatrix& out)
1111  {
1112  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1113  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1114  alphaBlend<1, 4>(out.template ref<scale - 3, 4>(), col);
1115 
1116  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1117  alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);
1118 
1119  out.template ref<scale - 1, 2>() = col;
1120  out.template ref<scale - 1, 3>() = col;
1121  out.template ref<scale - 1, 4>() = col;
1122  out.template ref<scale - 2, 4>() = col;
1123  }
1124 
1125  template <class OutputMatrix>
1126  static void blendLineSteep(uint32_t col, OutputMatrix& out)
1127  {
1128  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1129  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1130  alphaBlend<1, 4>(out.template ref<4, scale - 3>(), col);
1131 
1132  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1133  alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);
1134 
1135  out.template ref<2, scale - 1>() = col;
1136  out.template ref<3, scale - 1>() = col;
1137  out.template ref<4, scale - 1>() = col;
1138  out.template ref<4, scale - 2>() = col;
1139  }
1140 
1141  template <class OutputMatrix>
1142  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
1143  {
1144  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1145  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1146  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1147 
1148  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1149  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1150  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1151 
1152  out.template ref<2, scale - 1>() = col;
1153  out.template ref<3, scale - 1>() = col;
1154 
1155  out.template ref<scale - 1, 2>() = col;
1156  out.template ref<scale - 1, 3>() = col;
1157 
1158  out.template ref<4, scale - 1>() = col;
1159 
1160  alphaBlend<2, 3>(out.template ref<3, 3>(), col);
1161  }
1162 
1163  template <class OutputMatrix>
1164  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
1165  {
1166  alphaBlend<1, 8>(out.template ref<scale - 1, scale / 2 >(), col);
1167  alphaBlend<1, 8>(out.template ref<scale - 2, scale / 2 + 1>(), col);
1168  alphaBlend<1, 8>(out.template ref<scale - 3, scale / 2 + 2>(), col);
1169 
1170  alphaBlend<7, 8>(out.template ref<4, 3>(), col);
1171  alphaBlend<7, 8>(out.template ref<3, 4>(), col);
1172 
1173  out.template ref<4, 4>() = col;
1174  }
1175 
1176  template <class OutputMatrix>
1177  static void blendCorner(uint32_t col, OutputMatrix& out)
1178  {
1179  //model a round corner
1180  alphaBlend<86, 100>(out.template ref<4, 4>(), col); //exact: 0.8631434088
1181  alphaBlend<23, 100>(out.template ref<4, 3>(), col); //0.2306749731
1182  alphaBlend<23, 100>(out.template ref<3, 4>(), col); //0.2306749731
1183  //alphaBlend<8, 1000>(out.template ref<4, 2>(), col); //0.008384061834 -> negligible
1184  //alphaBlend<8, 1000>(out.template ref<2, 4>(), col); //0.008384061834
1185  }
1186 };
1187 }
1188 
1189 
1190 void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
1191 {
1192  switch (factor)
1193  {
1194  case 2:
1195  return scaleImage<Scaler2x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1196  case 3:
1197  return scaleImage<Scaler3x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1198  case 4:
1199  return scaleImage<Scaler4x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1200  case 5:
1201  return scaleImage<Scaler5x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1202  }
1203  assert(false);
1204 }
1205 
1206 
1207 bool xbrz::equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
1208 {
1209  return colorDist(col1, col2, luminanceWeight) < equalColorTolerance;
1210 }
1211 
1212 
1213 void xbrz::nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitch,
1214  uint32_t* trg, int trgWidth, int trgHeight, int trgPitch,
1215  SliceType st, int yFirst, int yLast)
1216 {
1217  if (srcPitch < srcWidth * static_cast<int>(sizeof(uint32_t)) ||
1218  trgPitch < trgWidth * static_cast<int>(sizeof(uint32_t)))
1219  {
1220  assert(false);
1221  return;
1222  }
1223 
1224  switch (st)
1225  {
1226  case NN_SCALE_SLICE_SOURCE:
1227  //nearest-neighbor (going over source image - fast for upscaling, since source is read only once
1228  yFirst = std::max(yFirst, 0);
1229  yLast = std::min(yLast, srcHeight);
1230  if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) return;
1231 
1232  for (int y = yFirst; y < yLast; ++y)
1233  {
1234  //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
1235  // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
1236 
1237  //keep within for loop to support MT input slices!
1238  const int yTrg_first = ( y * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
1239  const int yTrg_last = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
1240  const int blockHeight = yTrg_last - yTrg_first;
1241 
1242  if (blockHeight > 0)
1243  {
1244  const uint32_t* srcLine = byteAdvance(src, y * srcPitch);
1245  uint32_t* trgLine = byteAdvance(trg, yTrg_first * trgPitch);
1246  int xTrg_first = 0;
1247 
1248  for (int x = 0; x < srcWidth; ++x)
1249  {
1250  int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;
1251  const int blockWidth = xTrg_last - xTrg_first;
1252  if (blockWidth > 0)
1253  {
1254  xTrg_first = xTrg_last;
1255  fillBlock(trgLine, trgPitch, srcLine[x], blockWidth, blockHeight);
1256  trgLine += blockWidth;
1257  }
1258  }
1259  }
1260  }
1261  break;
1262 
1263  case NN_SCALE_SLICE_TARGET:
1264  //nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)
1265  yFirst = std::max(yFirst, 0);
1266  yLast = std::min(yLast, trgHeight);
1267  if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;
1268 
1269  for (int y = yFirst; y < yLast; ++y)
1270  {
1271  uint32_t* trgLine = byteAdvance(trg, y * trgPitch);
1272  const int ySrc = srcHeight * y / trgHeight;
1273  const uint32_t* srcLine = byteAdvance(src, ySrc * srcPitch);
1274  for (int x = 0; x < trgWidth; ++x)
1275  {
1276  const int xSrc = srcWidth * x / trgWidth;
1277  trgLine[x] = srcLine[xSrc];
1278  }
1279  }
1280  break;
1281  }
1282 }
void fill(const SDL_Rect &rect, uint8_t r, uint8_t g, uint8_t b, uint8_t a)
Fill an area with the given colour.
Definition: draw.cpp:41
bool equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
Definition: xbrz.cpp:1207
SliceType
Definition: xbrz.hpp:83
@ NN_SCALE_SLICE_TARGET
Definition: xbrz.hpp:85
@ NN_SCALE_SLICE_SOURCE
Definition: xbrz.hpp:84
void nearestNeighborScale(const uint32_t *src, int srcWidth, int srcHeight, uint32_t *trg, int trgWidth, int trgHeight)
Definition: xbrz.hpp:100
void scale(size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight, const ScalerCfg &cfg=ScalerCfg(), int yFirst=0, int yLast=std::numeric_limits< int >::max())
Definition: xbrz.cpp:1190
double luminanceWeight_
Definition: config.hpp:54
double steepDirectionThreshold
Definition: config.hpp:57
double dominantDirectionThreshold
Definition: config.hpp:56
double equalColorTolerance_
Definition: config.hpp:55
mock_party p
static map_location::DIRECTION n
#define i
#define g
#define d
#define e
#define DEF_GETTER(x)
Definition: xbrz.cpp:629
#define h
#define f
#define a
#define c
#define b
#define FORCE_INLINE
Definition: xbrz.cpp:185