xbrz.cpp


SUBMITTED BY: Guest

DATE: Jan. 20, 2014, 5:36 p.m.

FORMAT: C++

SIZE: 46.5 kB

HITS: 1011

  1. // ****************************************************************************
  2. // * This file is part of the HqMAME project. It is distributed under *
  3. // * GNU General Public License: http://www.gnu.org/licenses/gpl.html *
  4. // * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved *
  5. // * *
  6. // * Additionally and as a special exception, the author gives permission *
  7. // * to link the code of this program with the MAME library (or with modified *
  8. // * versions of MAME that use the same license as MAME), and distribute *
  9. // * linked combinations including the two. You must obey the GNU General *
  10. // * Public License in all respects for all of the code used other than MAME. *
  11. // * If you modify this file, you may extend this exception to your version *
  12. // * of the file, but you are not obligated to do so. If you do not wish to *
  13. // * do so, delete this exception statement from your version. *
  14. // ****************************************************************************
  15. #include "xbrz.h"
  16. #include <cassert>
  17. #include <algorithm>
  18. namespace
  19. {
  20. template <uint32_t N> inline
  21. unsigned char getByte(uint32_t val) { return static_cast<unsigned char>((val >> (8 * N)) & 0xff); }
  22. inline unsigned char getRed (uint32_t val) { return getByte<2>(val); }
  23. inline unsigned char getGreen(uint32_t val) { return getByte<1>(val); }
  24. inline unsigned char getBlue (uint32_t val) { return getByte<0>(val); }
  25. template <class T> inline
  26. T abs(T value)
  27. {
  28. static_assert(std::is_signed<T>::value, "");
  29. return value < 0 ? -value : value;
  30. }
  31. const uint32_t redMask = 0xff0000;
  32. const uint32_t greenMask = 0x00ff00;
  33. const uint32_t blueMask = 0x0000ff;
  34. template <unsigned int N, unsigned int M> inline
  35. void alphaBlend(uint32_t& dst, uint32_t col) //blend color over destination with opacity N / M
  36. {
  37. static_assert(N < 256, "possible overflow of (col & redMask) * N");
  38. static_assert(M < 256, "possible overflow of (col & redMask ) * N + (dst & redMask ) * (M - N)");
  39. static_assert(0 < N && N < M, "");
  40. dst = (redMask & ((col & redMask ) * N + (dst & redMask ) * (M - N)) / M) | //this works because 8 upper bits are free
  41. (greenMask & ((col & greenMask) * N + (dst & greenMask) * (M - N)) / M) |
  42. (blueMask & ((col & blueMask ) * N + (dst & blueMask ) * (M - N)) / M);
  43. }
  44. //inline
  45. //double fastSqrt(double n)
  46. //{
  47. // __asm //speeds up xBRZ by about 9% compared to std::sqrt which internally uses the same assembler instructions but adds some "fluff"
  48. // {
  49. // fld n
  50. // fsqrt
  51. // }
  52. //}
  53. //
  54. inline
  55. uint32_t alphaBlend2(uint32_t pix1, uint32_t pix2, double alpha)
  56. {
  57. return (redMask & static_cast<uint32_t>((pix1 & redMask ) * alpha + (pix2 & redMask ) * (1 - alpha))) |
  58. (greenMask & static_cast<uint32_t>((pix1 & greenMask) * alpha + (pix2 & greenMask) * (1 - alpha))) |
  59. (blueMask & static_cast<uint32_t>((pix1 & blueMask ) * alpha + (pix2 & blueMask ) * (1 - alpha)));
  60. }
  61. uint32_t* byteAdvance( uint32_t* ptr, int bytes) { return reinterpret_cast< uint32_t*>(reinterpret_cast< char*>(ptr) + bytes); }
  62. const uint32_t* byteAdvance(const uint32_t* ptr, int bytes) { return reinterpret_cast<const uint32_t*>(reinterpret_cast<const char*>(ptr) + bytes); }
  63. //fill block with the given color
  64. inline
  65. void fillBlock(uint32_t* trg, int pitch, uint32_t col, int blockWidth, int blockHeight)
  66. {
  67. //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
  68. // std::fill(trg, trg + blockWidth, col);
  69. for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
  70. for (int x = 0; x < blockWidth; ++x)
  71. trg[x] = col;
  72. }
  73. inline
  74. void fillBlock(uint32_t* trg, int pitch, uint32_t col, int n) { fillBlock(trg, pitch, col, n, n); }
  75. #ifdef _MSC_VER
  76. #define FORCE_INLINE __forceinline
  77. #elif defined __GNUC__
  78. #define FORCE_INLINE __attribute__((always_inline)) inline
  79. #else
  80. #define FORCE_INLINE inline
  81. #endif
  82. enum RotationDegree //clock-wise
  83. {
  84. ROT_0,
  85. ROT_90,
  86. ROT_180,
  87. ROT_270
  88. };
  89. //calculate input matrix coordinates after rotation at compile time
  90. template <RotationDegree rotDeg, size_t I, size_t J, size_t N>
  91. struct MatrixRotation;
  92. template <size_t I, size_t J, size_t N>
  93. struct MatrixRotation<ROT_0, I, J, N>
  94. {
  95. static const size_t I_old = I;
  96. static const size_t J_old = J;
  97. };
  98. template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix
  99. struct MatrixRotation
  100. {
  101. static const size_t I_old = N - 1 - MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::J_old; //old coordinates before rotation!
  102. static const size_t J_old = MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::I_old; //
  103. };
  104. template <size_t N, RotationDegree rotDeg>
  105. class OutputMatrix
  106. {
  107. public:
  108. OutputMatrix(uint32_t* out, int outWidth) : //access matrix area, top-left at position "out" for image with given width
  109. out_(out),
  110. outWidth_(outWidth) {}
  111. template <size_t I, size_t J>
  112. uint32_t& ref() const
  113. {
  114. static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old;
  115. static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old;
  116. return *(out_ + J_old + I_old * outWidth_);
  117. }
  118. private:
  119. uint32_t* out_;
  120. const int outWidth_;
  121. };
  122. template <class T> inline
  123. T square(T value) { return value * value; }
  124. /*
  125. inline
  126. void rgbtoLuv(uint32_t c, double& L, double& u, double& v)
  127. {
  128. //http://www.easyrgb.com/index.php?X=MATH&H=02#text2
  129. double r = getRed (c) / 255.0;
  130. double g = getGreen(c) / 255.0;
  131. double b = getBlue (c) / 255.0;
  132. if ( r > 0.04045 )
  133. r = std::pow(( ( r + 0.055 ) / 1.055 ) , 2.4);
  134. else
  135. r /= 12.92;
  136. if ( g > 0.04045 )
  137. g = std::pow(( ( g + 0.055 ) / 1.055 ) , 2.4);
  138. else
  139. g /= 12.92;
  140. if ( b > 0.04045 )
  141. b = std::pow(( ( b + 0.055 ) / 1.055 ) , 2.4);
  142. else
  143. b /= 12.92;
  144. r *= 100;
  145. g *= 100;
  146. b *= 100;
  147. double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;
  148. double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;
  149. double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
  150. //---------------------
  151. double var_U = 4 * x / ( x + 15 * y + 3 * z );
  152. double var_V = 9 * y / ( x + 15 * y + 3 * z );
  153. double var_Y = y / 100;
  154. if ( var_Y > 0.008856 ) var_Y = std::pow(var_Y , 1.0/3 );
  155. else var_Y = 7.787 * var_Y + 16.0 / 116;
  156. const double ref_X = 95.047; //Observer= 2°, Illuminant= D65
  157. const double ref_Y = 100.000;
  158. const double ref_Z = 108.883;
  159. const double ref_U = ( 4 * ref_X ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );
  160. const double ref_V = ( 9 * ref_Y ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );
  161. L = ( 116 * var_Y ) - 16;
  162. u = 13 * L * ( var_U - ref_U );
  163. v = 13 * L * ( var_V - ref_V );
  164. }
  165. */
  166. inline
  167. void rgbtoLab(uint32_t c, unsigned char& L, signed char& A, signed char& B)
  168. {
  169. //code: http://www.easyrgb.com/index.php?X=MATH
  170. //test: http://www.workwithcolor.com/color-converter-01.htm
  171. //------RGB to XYZ------
  172. double r = getRed (c) / 255.0;
  173. double g = getGreen(c) / 255.0;
  174. double b = getBlue (c) / 255.0;
  175. r = r > 0.04045 ? std::pow(( r + 0.055 ) / 1.055, 2.4) : r / 12.92;
  176. r = g > 0.04045 ? std::pow(( g + 0.055 ) / 1.055, 2.4) : g / 12.92;
  177. r = b > 0.04045 ? std::pow(( b + 0.055 ) / 1.055, 2.4) : b / 12.92;
  178. r *= 100;
  179. g *= 100;
  180. b *= 100;
  181. double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;
  182. double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;
  183. double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
  184. //------XYZ to Lab------
  185. const double refX = 95.047; //
  186. const double refY = 100.000; //Observer= 2°, Illuminant= D65
  187. const double refZ = 108.883; //
  188. double var_X = x / refX;
  189. double var_Y = y / refY;
  190. double var_Z = z / refZ;
  191. var_X = var_X > 0.008856 ? std::pow(var_X, 1.0 / 3) : 7.787 * var_X + 4.0 / 29;
  192. var_Y = var_Y > 0.008856 ? std::pow(var_Y, 1.0 / 3) : 7.787 * var_Y + 4.0 / 29;
  193. var_Z = var_Z > 0.008856 ? std::pow(var_Z, 1.0 / 3) : 7.787 * var_Z + 4.0 / 29;
  194. L = static_cast<unsigned char>(116 * var_Y - 16);
  195. A = static_cast< signed char>(500 * (var_X - var_Y));
  196. B = static_cast< signed char>(200 * (var_Y - var_Z));
  197. };
  198. inline
  199. double distLAB(uint32_t pix1, uint32_t pix2)
  200. {
  201. unsigned char L1 = 0; //[0, 100]
  202. signed char a1 = 0; //[-128, 127]
  203. signed char b1 = 0; //[-128, 127]
  204. rgbtoLab(pix1, L1, a1, b1);
  205. unsigned char L2 = 0;
  206. signed char a2 = 0;
  207. signed char b2 = 0;
  208. rgbtoLab(pix2, L2, a2, b2);
  209. //-----------------------------
  210. //http://www.easyrgb.com/index.php?X=DELT
  211. //Delta E/CIE76
  212. return std::sqrt(square(1.0 * L1 - L2) +
  213. square(1.0 * a1 - a2) +
  214. square(1.0 * b1 - b2));
  215. }
  216. /*
  217. inline
  218. void rgbtoHsl(uint32_t c, double& h, double& s, double& l)
  219. {
  220. //http://www.easyrgb.com/index.php?X=MATH&H=18#text18
  221. const int r = getRed (c);
  222. const int g = getGreen(c);
  223. const int b = getBlue (c);
  224. const int varMin = numeric::min(r, g, b);
  225. const int varMax = numeric::max(r, g, b);
  226. const int delMax = varMax - varMin;
  227. l = (varMax + varMin) / 2.0 / 255.0;
  228. if (delMax == 0) //gray, no chroma...
  229. {
  230. h = 0;
  231. s = 0;
  232. }
  233. else
  234. {
  235. s = l < 0.5 ?
  236. delMax / (1.0 * varMax + varMin) :
  237. delMax / (2.0 * 255 - varMax - varMin);
  238. double delR = ((varMax - r) / 6.0 + delMax / 2.0) / delMax;
  239. double delG = ((varMax - g) / 6.0 + delMax / 2.0) / delMax;
  240. double delB = ((varMax - b) / 6.0 + delMax / 2.0) / delMax;
  241. if (r == varMax)
  242. h = delB - delG;
  243. else if (g == varMax)
  244. h = 1 / 3.0 + delR - delB;
  245. else if (b == varMax)
  246. h = 2 / 3.0 + delG - delR;
  247. if (h < 0)
  248. h += 1;
  249. if (h > 1)
  250. h -= 1;
  251. }
  252. }
  253. inline
  254. double distHSL(uint32_t pix1, uint32_t pix2, double lightningWeight)
  255. {
  256. double h1 = 0;
  257. double s1 = 0;
  258. double l1 = 0;
  259. rgbtoHsl(pix1, h1, s1, l1);
  260. double h2 = 0;
  261. double s2 = 0;
  262. double l2 = 0;
  263. rgbtoHsl(pix2, h2, s2, l2);
  264. //HSL is in cylindric coordinatates where L represents height, S radius, H angle,
  265. //however we interpret the cylinder as a bi-conic solid with top/bottom radius 0, middle radius 1
  266. assert(0 <= h1 && h1 <= 1);
  267. assert(0 <= h2 && h2 <= 1);
  268. double r1 = l1 < 0.5 ?
  269. l1 * 2 :
  270. 2 - l1 * 2;
  271. double x1 = r1 * s1 * std::cos(h1 * 2 * numeric::pi);
  272. double y1 = r1 * s1 * std::sin(h1 * 2 * numeric::pi);
  273. double z1 = l1;
  274. double r2 = l2 < 0.5 ?
  275. l2 * 2 :
  276. 2 - l2 * 2;
  277. double x2 = r2 * s2 * std::cos(h2 * 2 * numeric::pi);
  278. double y2 = r2 * s2 * std::sin(h2 * 2 * numeric::pi);
  279. double z2 = l2;
  280. return 255 * std::sqrt(square(x1 - x2) + square(y1 - y2) + square(lightningWeight * (z1 - z2)));
  281. }
  282. */
  283. inline
  284. double distRGB(uint32_t pix1, uint32_t pix2)
  285. {
  286. const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
  287. const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
  288. const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
  289. //euklidean RGB distance
  290. return std::sqrt(square(r_diff) + square(g_diff) + square(b_diff));
  291. }
  292. inline
  293. double distNonLinearRGB(uint32_t pix1, uint32_t pix2)
  294. {
  295. //non-linear rgb: http://www.compuphase.com/cmetric.htm
  296. const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
  297. const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
  298. const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
  299. const double r_avg = (static_cast<double>(getRed(pix1)) + getRed(pix2)) / 2;
  300. return std::sqrt((2 + r_avg / 255) * square(r_diff) + 4 * square(g_diff) + (2 + (255 - r_avg) / 255) * square(b_diff));
  301. }
  302. inline
  303. double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)
  304. {
  305. //http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
  306. //YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!
  307. const int r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2); //we may delay division by 255 to after matrix multiplication
  308. const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //
  309. const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2); //substraction for int is noticeable faster than for double!
  310. const double k_b = 0.0722; //ITU-R BT.709 conversion
  311. const double k_r = 0.2126; //
  312. const double k_g = 1 - k_b - k_r;
  313. const double scale_b = 0.5 / (1 - k_b);
  314. const double scale_r = 0.5 / (1 - k_r);
  315. const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
  316. const double c_b = scale_b * (b_diff - y);
  317. const double c_r = scale_r * (r_diff - y);
  318. //we skip division by 255 to have similar range like other distance functions
  319. return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r));
  320. }
  321. inline
  322. double distYUV(uint32_t pix1, uint32_t pix2, double luminanceWeight)
  323. {
  324. //perf: it's not worthwhile to buffer the YUV-conversion, the direct code is faster by ~ 6%
  325. //since RGB -> YUV conversion is essentially a matrix multiplication, we can calculate the RGB diff before the conversion (distributive property)
  326. const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
  327. const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
  328. const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
  329. //http://en.wikipedia.org/wiki/YUV#Conversion_to.2Ffrom_RGB
  330. const double w_b = 0.114;
  331. const double w_r = 0.299;
  332. const double w_g = 1 - w_r - w_b;
  333. const double u_max = 0.436;
  334. const double v_max = 0.615;
  335. const double scale_u = u_max / (1 - w_b);
  336. const double scale_v = v_max / (1 - w_r);
  337. double y = w_r * r_diff + w_g * g_diff + w_b * b_diff;//value range: 255 * [-1, 1]
  338. double u = scale_u * (b_diff - y); //value range: 255 * 2 * u_max * [-1, 1]
  339. double v = scale_v * (r_diff - y); //value range: 255 * 2 * v_max * [-1, 1]
  340. #ifndef NDEBUG
  341. const double eps = 0.5;
  342. #endif
  343. assert(std::abs(y) <= 255 + eps);
  344. assert(std::abs(u) <= 255 * 2 * u_max + eps);
  345. assert(std::abs(v) <= 255 * 2 * v_max + eps);
  346. return std::sqrt(square(luminanceWeight * y) + square(u) + square(v));
  347. }
  348. inline
  349. double colorDist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
  350. {
  351. if (pix1 == pix2) //about 8% perf boost
  352. return 0;
  353. //return distHSL(pix1, pix2, luminanceWeight);
  354. //return distRGB(pix1, pix2);
  355. //return distLAB(pix1, pix2);
  356. //return distNonLinearRGB(pix1, pix2);
  357. //return distYUV(pix1, pix2, luminanceWeight);
  358. return distYCbCr(pix1, pix2, luminanceWeight);
  359. }
  360. inline
  361. bool equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
  362. {
  363. return colorDist(col1, col2, luminanceWeight) < equalColorTolerance;
  364. }
  365. enum BlendType
  366. {
  367. BLEND_NONE = 0,
  368. BLEND_NORMAL, //a normal indication to blend
  369. BLEND_DOMINANT, //a strong indication to blend
  370. //attention: BlendType must fit into the value range of 2 bit!!!
  371. };
  372. struct BlendResult
  373. {
  374. BlendType
  375. /**/blend_f, blend_g,
  376. /**/blend_j, blend_k;
  377. };
  378. struct Kernel_4x4 //kernel for preprocessing step
  379. {
  380. uint32_t
  381. /**/a, b, c, d,
  382. /**/e, f, g, h,
  383. /**/i, j, k, l,
  384. /**/m, n, o, p;
  385. };
  386. /*
  387. input kernel area naming convention:
  388. -----------------
  389. | A | B | C | D |
  390. ----|---|---|---|
  391. | E | F | G | H | //evalute the four corners between F, G, J, K
  392. ----|---|---|---| //input pixel is at position F
  393. | I | J | K | L |
  394. ----|---|---|---|
  395. | M | N | O | P |
  396. -----------------
  397. */
  398. FORCE_INLINE //detect blend direction
  399. BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg) //result: F, G, J, K corners of "GradientType"
  400. {
  401. BlendResult result = {};
  402. if ((ker.f == ker.g &&
  403. ker.j == ker.k) ||
  404. (ker.f == ker.j &&
  405. ker.g == ker.k))
  406. return result;
  407. auto dist = [&](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
  408. const int weight = 4;
  409. double jg = dist(ker.i, ker.f) + dist(ker.f, ker.c) + dist(ker.n, ker.k) + dist(ker.k, ker.h) + weight * dist(ker.j, ker.g);
  410. double fk = dist(ker.e, ker.j) + dist(ker.j, ker.o) + dist(ker.b, ker.g) + dist(ker.g, ker.l) + weight * dist(ker.f, ker.k);
  411. if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
  412. {
  413. const bool dominantGradient = cfg.dominantDirectionThreshold * jg < fk;
  414. if (ker.f != ker.g && ker.f != ker.j)
  415. result.blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
  416. if (ker.k != ker.j && ker.k != ker.g)
  417. result.blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
  418. }
  419. else if (fk < jg)
  420. {
  421. const bool dominantGradient = cfg.dominantDirectionThreshold * fk < jg;
  422. if (ker.j != ker.f && ker.j != ker.k)
  423. result.blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
  424. if (ker.g != ker.f && ker.g != ker.k)
  425. result.blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
  426. }
  427. return result;
  428. }
  429. struct Kernel_3x3
  430. {
  431. uint32_t
  432. /**/a, b, c,
  433. /**/d, e, f,
  434. /**/g, h, i;
  435. };
  436. #define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }
  437. //we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token
  438. DEF_GETTER(a) DEF_GETTER(b) DEF_GETTER(c)
  439. DEF_GETTER(d) DEF_GETTER(e) DEF_GETTER(f)
  440. DEF_GETTER(g) DEF_GETTER(h) DEF_GETTER(i)
  441. #undef DEF_GETTER
  442. #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_90>(const Kernel_3x3& ker) { return ker.y; }
  443. DEF_GETTER(a, g) DEF_GETTER(b, d) DEF_GETTER(c, a)
  444. DEF_GETTER(d, h) DEF_GETTER(e, e) DEF_GETTER(f, b)
  445. DEF_GETTER(g, i) DEF_GETTER(h, f) DEF_GETTER(i, c)
  446. #undef DEF_GETTER
  447. #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_180>(const Kernel_3x3& ker) { return ker.y; }
  448. DEF_GETTER(a, i) DEF_GETTER(b, h) DEF_GETTER(c, g)
  449. DEF_GETTER(d, f) DEF_GETTER(e, e) DEF_GETTER(f, d)
  450. DEF_GETTER(g, c) DEF_GETTER(h, b) DEF_GETTER(i, a)
  451. #undef DEF_GETTER
  452. #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; }
  453. DEF_GETTER(a, c) DEF_GETTER(b, f) DEF_GETTER(c, i)
  454. DEF_GETTER(d, b) DEF_GETTER(e, e) DEF_GETTER(f, h)
  455. DEF_GETTER(g, a) DEF_GETTER(h, d) DEF_GETTER(i, g)
  456. #undef DEF_GETTER
  457. //compress four blend types into a single byte
  458. inline BlendType getTopL (unsigned char b) { return static_cast<BlendType>(0x3 & b); }
  459. inline BlendType getTopR (unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 2)); }
  460. inline BlendType getBottomR(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 4)); }
  461. inline BlendType getBottomL(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 6)); }
  462. inline void setTopL (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing!
  463. inline void setTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); }
  464. inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); }
  465. inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); }
  466. inline bool blendingNeeded(unsigned char b) { return b != 0; }
  467. template <RotationDegree rotDeg> inline
  468. unsigned char rotateBlendInfo(unsigned char b) { return b; }
  469. template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; }
  470. template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; }
  471. template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; }
  472. #ifndef NDEBUG
  473. int debugPixelX = -1;
  474. int debugPixelY = 84;
  475. bool breakIntoDebugger = false;
  476. #endif
  477. /*
  478. input kernel area naming convention:
  479. -------------
  480. | A | B | C |
  481. ----|---|---|
  482. | D | E | F | //input pixel is at position E
  483. ----|---|---|
  484. | G | H | I |
  485. -------------
  486. */
  487. template <class Scaler, RotationDegree rotDeg>
  488. FORCE_INLINE //perf: quite worth it!
  489. void scalePixel(const Kernel_3x3& ker,
  490. uint32_t* target, int trgWidth,
  491. unsigned char blendInfo, //result of preprocessing all four corners of pixel "e"
  492. const xbrz::ScalerCfg& cfg)
  493. {
  494. #define a get_a<rotDeg>(ker)
  495. #define b get_b<rotDeg>(ker)
  496. #define c get_c<rotDeg>(ker)
  497. #define d get_d<rotDeg>(ker)
  498. #define e get_e<rotDeg>(ker)
  499. #define f get_f<rotDeg>(ker)
  500. #define g get_g<rotDeg>(ker)
  501. #define h get_h<rotDeg>(ker)
  502. #define i get_i<rotDeg>(ker)
  503. #ifndef NDEBUG
  504. if (breakIntoDebugger)
  505. __debugbreak(); //__asm int 3;
  506. #endif
  507. const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo);
  508. if (getBottomR(blend) >= BLEND_NORMAL)
  509. {
  510. auto eq = [&](uint32_t col1, uint32_t col2) { return equalColor(col1, col2, cfg.luminanceWeight_, cfg.equalColorTolerance_); };
  511. auto dist = [&](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
  512. const bool doLineBlend = [&]() -> bool
  513. {
  514. if (getBottomR(blend) >= BLEND_DOMINANT)
  515. return true;
  516. //make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
  517. if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90° corners
  518. return false;
  519. if (getBottomL(blend) != BLEND_NONE && !eq(e, c))
  520. return false;
  521. //no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
  522. if (eq(g, h) && eq(h , i) && eq(i, f) && eq(f, c) && !eq(e, i))
  523. return false;
  524. return true;
  525. }();
  526. const uint32_t px = dist(e, f) <= dist(e, h) ? f : h; //choose most similar color
  527. OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth);
  528. if (doLineBlend)
  529. {
  530. const double fg = dist(f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
  531. const double hc = dist(h, c); //
  532. const bool haveShallowLine = cfg.steepDirectionThreshold * fg <= hc && e != g && d != g;
  533. const bool haveSteepLine = cfg.steepDirectionThreshold * hc <= fg && e != c && b != c;
  534. if (haveShallowLine)
  535. {
  536. if (haveSteepLine)
  537. Scaler::blendLineSteepAndShallow(px, out);
  538. else
  539. Scaler::blendLineShallow(px, out);
  540. }
  541. else
  542. {
  543. if (haveSteepLine)
  544. Scaler::blendLineSteep(px, out);
  545. else
  546. Scaler::blendLineDiagonal(px,out);
  547. }
  548. }
  549. else
  550. Scaler::blendCorner(px, out);
  551. }
  552. #undef a
  553. #undef b
  554. #undef c
  555. #undef d
  556. #undef e
  557. #undef f
  558. #undef g
  559. #undef h
  560. #undef i
  561. }
  562. template <class Scaler> //scaler policy: see "Scaler2x" reference implementation
  563. void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
  564. {
  565. yFirst = std::max(yFirst, 0);
  566. yLast = std::min(yLast, srcHeight);
  567. if (yFirst >= yLast || srcWidth <= 0)
  568. return;
  569. const int trgWidth = srcWidth * Scaler::scale;
  570. //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of
  571. //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing
  572. const int bufferSize = srcWidth;
  573. unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize;
  574. std::fill(preProcBuffer, preProcBuffer + bufferSize, 0);
  575. static_assert(BLEND_NONE == 0, "");
  576. //initialize preprocessing buffer for first row: detect upper left and right corner blending
  577. //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
  578. if (yFirst > 0)
  579. {
  580. const int y = yFirst - 1;
  581. const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
  582. const uint32_t* s_0 = src + srcWidth * y; //center line
  583. const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
  584. const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
  585. for (int x = 0; x < srcWidth; ++x)
  586. {
  587. const int x_m1 = std::max(x - 1, 0);
  588. const int x_p1 = std::min(x + 1, srcWidth - 1);
  589. const int x_p2 = std::min(x + 2, srcWidth - 1);
  590. Kernel_4x4 ker = {}; //perf: initialization is negligable
  591. ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
  592. ker.b = s_m1[x];
  593. ker.c = s_m1[x_p1];
  594. ker.d = s_m1[x_p2];
  595. ker.e = s_0[x_m1];
  596. ker.f = s_0[x];
  597. ker.g = s_0[x_p1];
  598. ker.h = s_0[x_p2];
  599. ker.i = s_p1[x_m1];
  600. ker.j = s_p1[x];
  601. ker.k = s_p1[x_p1];
  602. ker.l = s_p1[x_p2];
  603. ker.m = s_p2[x_m1];
  604. ker.n = s_p2[x];
  605. ker.o = s_p2[x_p1];
  606. ker.p = s_p2[x_p2];
  607. const BlendResult res = preProcessCorners(ker, cfg);
  608. /*
  609. preprocessing blend result:
  610. ---------
  611. | F | G | //evalute corner between F, G, J, K
  612. ----|---| //input pixel is at position F
  613. | J | K |
  614. ---------
  615. */
  616. setTopR(preProcBuffer[x], res.blend_j);
  617. if (x + 1 < srcWidth)
  618. setTopL(preProcBuffer[x + 1], res.blend_k);
  619. }
  620. }
  621. //------------------------------------------------------------------------------------
  622. for (int y = yFirst; y < yLast; ++y)
  623. {
  624. uint32_t* out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access
  625. const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
  626. const uint32_t* s_0 = src + srcWidth * y; //center line
  627. const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
  628. const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
  629. unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
  630. for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
  631. {
  632. #ifndef NDEBUG
  633. breakIntoDebugger = debugPixelX == x && debugPixelY == y;
  634. #endif
  635. //all those bounds checks have only insignificant impact on performance!
  636. const int x_m1 = std::max(x - 1, 0); //perf: prefer array indexing to additional pointers!
  637. const int x_p1 = std::min(x + 1, srcWidth - 1);
  638. const int x_p2 = std::min(x + 2, srcWidth - 1);
  639. //evaluate the four corners on bottom-right of current pixel
  640. unsigned char blend_xy = 0; //for current (x, y) position
  641. {
  642. Kernel_4x4 ker = {}; //perf: initialization is negligable
  643. ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
  644. ker.b = s_m1[x];
  645. ker.c = s_m1[x_p1];
  646. ker.d = s_m1[x_p2];
  647. ker.e = s_0[x_m1];
  648. ker.f = s_0[x];
  649. ker.g = s_0[x_p1];
  650. ker.h = s_0[x_p2];
  651. ker.i = s_p1[x_m1];
  652. ker.j = s_p1[x];
  653. ker.k = s_p1[x_p1];
  654. ker.l = s_p1[x_p2];
  655. ker.m = s_p2[x_m1];
  656. ker.n = s_p2[x];
  657. ker.o = s_p2[x_p1];
  658. ker.p = s_p2[x_p2];
  659. const BlendResult res = preProcessCorners(ker, cfg);
  660. /*
  661. preprocessing blend result:
  662. ---------
  663. | F | G | //evalute corner between F, G, J, K
  664. ----|---| //current input pixel is at position F
  665. | J | K |
  666. ---------
  667. */
  668. blend_xy = preProcBuffer[x];
  669. setBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
  670. setTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
  671. preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row
  672. blend_xy1 = 0;
  673. setTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
  674. if (x + 1 < srcWidth) //set 3rd known corner for (x + 1, y)
  675. setBottomL(preProcBuffer[x + 1], res.blend_g);
  676. }
  677. //fill block of size scale * scale with the given color
  678. fillBlock(out, trgWidth * sizeof(uint32_t), s_0[x], Scaler::scale); //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
  679. //blend four corners of current pixel
  680. if (blendingNeeded(blend_xy)) //good 20% perf-improvement
  681. {
  682. Kernel_3x3 ker = {}; //perf: initialization is negligable
  683. ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
  684. ker.b = s_m1[x];
  685. ker.c = s_m1[x_p1];
  686. ker.d = s_0[x_m1];
  687. ker.e = s_0[x];
  688. ker.f = s_0[x_p1];
  689. ker.g = s_p1[x_m1];
  690. ker.h = s_p1[x];
  691. ker.i = s_p1[x_p1];
  692. scalePixel<Scaler, ROT_0 >(ker, out, trgWidth, blend_xy, cfg);
  693. scalePixel<Scaler, ROT_90 >(ker, out, trgWidth, blend_xy, cfg);
  694. scalePixel<Scaler, ROT_180>(ker, out, trgWidth, blend_xy, cfg);
  695. scalePixel<Scaler, ROT_270>(ker, out, trgWidth, blend_xy, cfg);
  696. }
  697. }
  698. }
  699. }
  700. struct Scaler2x
  701. {
  702. static const int scale = 2;
  703. template <class OutputMatrix>
  704. static void blendLineShallow(uint32_t col, OutputMatrix& out)
  705. {
  706. alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
  707. alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
  708. }
  709. template <class OutputMatrix>
  710. static void blendLineSteep(uint32_t col, OutputMatrix& out)
  711. {
  712. alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
  713. alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
  714. }
  715. template <class OutputMatrix>
  716. static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
  717. {
  718. alphaBlend<1, 4>(out.template ref<1, 0>(), col);
  719. alphaBlend<1, 4>(out.template ref<0, 1>(), col);
  720. alphaBlend<5, 6>(out.template ref<1, 1>(), col); //[!] fixes 7/8 used in xBR
  721. }
  722. template <class OutputMatrix>
  723. static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
  724. {
  725. alphaBlend<1, 2>(out.template ref<1, 1>(), col);
  726. }
  727. template <class OutputMatrix>
  728. static void blendCorner(uint32_t col, OutputMatrix& out)
  729. {
  730. //model a round corner
  731. alphaBlend<21, 100>(out.template ref<1, 1>(), col); //exact: 1 - pi/4 = 0.2146018366
  732. }
  733. };
  734. struct Scaler3x
  735. {
  736. static const int scale = 3;
  737. template <class OutputMatrix>
  738. static void blendLineShallow(uint32_t col, OutputMatrix& out)
  739. {
  740. alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
  741. alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
  742. alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
  743. out.template ref<scale - 1, 2>() = col;
  744. }
  745. template <class OutputMatrix>
  746. static void blendLineSteep(uint32_t col, OutputMatrix& out)
  747. {
  748. alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
  749. alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
  750. alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
  751. out.template ref<2, scale - 1>() = col;
  752. }
  753. template <class OutputMatrix>
  754. static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
  755. {
  756. alphaBlend<1, 4>(out.template ref<2, 0>(), col);
  757. alphaBlend<1, 4>(out.template ref<0, 2>(), col);
  758. alphaBlend<3, 4>(out.template ref<2, 1>(), col);
  759. alphaBlend<3, 4>(out.template ref<1, 2>(), col);
  760. out.template ref<2, 2>() = col;
  761. }
  762. template <class OutputMatrix>
  763. static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
  764. {
  765. alphaBlend<1, 8>(out.template ref<1, 2>(), col);
  766. alphaBlend<1, 8>(out.template ref<2, 1>(), col);
  767. alphaBlend<7, 8>(out.template ref<2, 2>(), col);
  768. }
  769. template <class OutputMatrix>
  770. static void blendCorner(uint32_t col, OutputMatrix& out)
  771. {
  772. //model a round corner
  773. alphaBlend<45, 100>(out.template ref<2, 2>(), col); //exact: 0.4545939598
  774. //alphaBlend<14, 1000>(out.template ref<2, 1>(), col); //0.01413008627 -> negligable
  775. //alphaBlend<14, 1000>(out.template ref<1, 2>(), col); //0.01413008627
  776. }
  777. };
  778. struct Scaler4x
  779. {
  780. static const int scale = 4;
  781. template <class OutputMatrix>
  782. static void blendLineShallow(uint32_t col, OutputMatrix& out)
  783. {
  784. alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
  785. alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
  786. alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
  787. alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);
  788. out.template ref<scale - 1, 2>() = col;
  789. out.template ref<scale - 1, 3>() = col;
  790. }
  791. template <class OutputMatrix>
  792. static void blendLineSteep(uint32_t col, OutputMatrix& out)
  793. {
  794. alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
  795. alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
  796. alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
  797. alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);
  798. out.template ref<2, scale - 1>() = col;
  799. out.template ref<3, scale - 1>() = col;
  800. }
  801. template <class OutputMatrix>
  802. static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
  803. {
  804. alphaBlend<3, 4>(out.template ref<3, 1>(), col);
  805. alphaBlend<3, 4>(out.template ref<1, 3>(), col);
  806. alphaBlend<1, 4>(out.template ref<3, 0>(), col);
  807. alphaBlend<1, 4>(out.template ref<0, 3>(), col);
  808. alphaBlend<1, 3>(out.template ref<2, 2>(), col); //[!] fixes 1/4 used in xBR
  809. out.template ref<3, 3>() = out.template ref<3, 2>() = out.template ref<2, 3>() = col;
  810. }
  811. template <class OutputMatrix>
  812. static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
  813. {
  814. alphaBlend<1, 2>(out.template ref<scale - 1, scale / 2 >(), col);
  815. alphaBlend<1, 2>(out.template ref<scale - 2, scale / 2 + 1>(), col);
  816. out.template ref<scale - 1, scale - 1>() = col;
  817. }
  818. template <class OutputMatrix>
  819. static void blendCorner(uint32_t col, OutputMatrix& out)
  820. {
  821. //model a round corner
  822. alphaBlend<68, 100>(out.template ref<3, 3>(), col); //exact: 0.6848532563
  823. alphaBlend< 9, 100>(out.template ref<3, 2>(), col); //0.08677704501
  824. alphaBlend< 9, 100>(out.template ref<2, 3>(), col); //0.08677704501
  825. }
  826. };
  827. struct Scaler5x
  828. {
  829. static const int scale = 5;
  830. template <class OutputMatrix>
  831. static void blendLineShallow(uint32_t col, OutputMatrix& out)
  832. {
  833. alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
  834. alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
  835. alphaBlend<1, 4>(out.template ref<scale - 3, 4>(), col);
  836. alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
  837. alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);
  838. out.template ref<scale - 1, 2>() = col;
  839. out.template ref<scale - 1, 3>() = col;
  840. out.template ref<scale - 1, 4>() = col;
  841. out.template ref<scale - 2, 4>() = col;
  842. }
  843. template <class OutputMatrix>
  844. static void blendLineSteep(uint32_t col, OutputMatrix& out)
  845. {
  846. alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
  847. alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
  848. alphaBlend<1, 4>(out.template ref<4, scale - 3>(), col);
  849. alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
  850. alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);
  851. out.template ref<2, scale - 1>() = col;
  852. out.template ref<3, scale - 1>() = col;
  853. out.template ref<4, scale - 1>() = col;
  854. out.template ref<4, scale - 2>() = col;
  855. }
  856. template <class OutputMatrix>
  857. static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
  858. {
  859. alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
  860. alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
  861. alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
  862. alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
  863. alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
  864. alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
  865. out.template ref<2, scale - 1>() = col;
  866. out.template ref<3, scale - 1>() = col;
  867. out.template ref<scale - 1, 2>() = col;
  868. out.template ref<scale - 1, 3>() = col;
  869. out.template ref<4, scale - 1>() = col;
  870. alphaBlend<2, 3>(out.template ref<3, 3>(), col);
  871. }
  872. template <class OutputMatrix>
  873. static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
  874. {
  875. alphaBlend<1, 8>(out.template ref<scale - 1, scale / 2 >(), col);
  876. alphaBlend<1, 8>(out.template ref<scale - 2, scale / 2 + 1>(), col);
  877. alphaBlend<1, 8>(out.template ref<scale - 3, scale / 2 + 2>(), col);
  878. alphaBlend<7, 8>(out.template ref<4, 3>(), col);
  879. alphaBlend<7, 8>(out.template ref<3, 4>(), col);
  880. out.template ref<4, 4>() = col;
  881. }
  882. template <class OutputMatrix>
  883. static void blendCorner(uint32_t col, OutputMatrix& out)
  884. {
  885. //model a round corner
  886. alphaBlend<86, 100>(out.template ref<4, 4>(), col); //exact: 0.8631434088
  887. alphaBlend<23, 100>(out.template ref<4, 3>(), col); //0.2306749731
  888. alphaBlend<23, 100>(out.template ref<3, 4>(), col); //0.2306749731
  889. //alphaBlend<8, 1000>(out.template ref<4, 2>(), col); //0.008384061834 -> negligable
  890. //alphaBlend<8, 1000>(out.template ref<2, 4>(), col); //0.008384061834
  891. }
  892. };
  893. }
  894. void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
  895. {
  896. switch (factor)
  897. {
  898. case 2:
  899. return scaleImage<Scaler2x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
  900. case 3:
  901. return scaleImage<Scaler3x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
  902. case 4:
  903. return scaleImage<Scaler4x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
  904. case 5:
  905. return scaleImage<Scaler5x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
  906. }
  907. assert(false);
  908. }
  909. bool xbrz::equalColorTest(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
  910. {
  911. return equalColor(col1, col2, luminanceWeight, equalColorTolerance);
  912. }
  913. void xbrz::nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitch,
  914. uint32_t* trg, int trgWidth, int trgHeight, int trgPitch,
  915. SliceType st, int yFirst, int yLast)
  916. {
  917. if (srcPitch < srcWidth * static_cast<int>(sizeof(uint32_t)) ||
  918. trgPitch < trgWidth * static_cast<int>(sizeof(uint32_t)))
  919. {
  920. assert(false);
  921. return;
  922. }
  923. switch (st)
  924. {
  925. case NN_SCALE_SLICE_SOURCE:
  926. //nearest-neighbor (going over source image - fast for upscaling, since source is read only once
  927. yFirst = std::max(yFirst, 0);
  928. yLast = std::min(yLast, srcHeight);
  929. if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) return;
  930. for (int y = yFirst; y < yLast; ++y)
  931. {
  932. //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
  933. // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
  934. //keep within for loop to support MT input slices!
  935. const int yTrg_first = ( y * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
  936. const int yTrg_last = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
  937. const int blockHeight = yTrg_last - yTrg_first;
  938. if (blockHeight > 0)
  939. {
  940. const uint32_t* srcLine = byteAdvance(src, y * srcPitch);
  941. uint32_t* trgLine = byteAdvance(trg, yTrg_first * trgPitch);
  942. int xTrg_first = 0;
  943. for (int x = 0; x < srcWidth; ++x)
  944. {
  945. int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;
  946. const int blockWidth = xTrg_last - xTrg_first;
  947. if (blockWidth > 0)
  948. {
  949. xTrg_first = xTrg_last;
  950. fillBlock(trgLine, trgPitch, srcLine[x], blockWidth, blockHeight);
  951. trgLine += blockWidth;
  952. }
  953. }
  954. }
  955. }
  956. break;
  957. case NN_SCALE_SLICE_TARGET:
  958. //nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)
  959. yFirst = std::max(yFirst, 0);
  960. yLast = std::min(yLast, trgHeight);
  961. if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;
  962. for (int y = yFirst; y < yLast; ++y)
  963. {
  964. uint32_t* trgLine = byteAdvance(trg, y * trgPitch);
  965. const int ySrc = srcHeight * y / trgHeight;
  966. const uint32_t* srcLine = byteAdvance(src, ySrc * srcPitch);
  967. for (int x = 0; x < trgWidth; ++x)
  968. {
  969. const int xSrc = srcWidth * x / trgWidth;
  970. trgLine[x] = srcLine[xSrc];
  971. }
  972. }
  973. break;
  974. }
  975. }

comments powered by Disqus