@@ -19,6 +19,7 @@
Loading
19 19
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
*/
21 21
22 +
#include "rawspeedconfig.h" // for HAVE_OPENMP
22 23
#include "interpolators/Cr2sRawInterpolator.h"
23 24
#include "common/Array2DRef.h"            // for Array2DRef
24 25
#include "common/Common.h"                // for clampBits
@@ -51,6 +52,13 @@
Loading
51 52
    p->Cr = data[1];
52 53
  }
53 54
55 +
  inline static void CopyCbCr(YCbCr* p, const YCbCr& pSrc) {
56 +
    assert(p);
57 +
58 +
    p->Cb = pSrc.Cb;
59 +
    p->Cr = pSrc.Cr;
60 +
  }
61 +
54 62
  YCbCr() = default;
55 63
56 64
  inline void signExtend() {
@@ -84,20 +92,37 @@
Loading
84 92
  }
85 93
};
86 94
87 -
// NOTE: Thread safe.
88 -
template <int version>
89 -
inline void Cr2sRawInterpolator::interpolate_422_row(int row) {
95 +
template <int version> void Cr2sRawInterpolator::interpolate_422_row(int row) {
90 96
  const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef());
91 97
92 -
  assert(out.width >= 6);
93 -
  assert(out.width % 6 == 0);
94 -
95 -
  int numPixels = out.width / 3;
96 -
  auto inCol = [](int pixel) {
97 -
    assert(pixel % 2 == 0);
98 -
    return 4 * pixel / 2;
98 +
  constexpr int InputComponentsPerMCU = 4;
99 +
  constexpr int PixelsPerMCU = 2;
100 +
  constexpr int YsPerMCU = PixelsPerMCU;
101 +
  constexpr int ComponentsPerPixel = 3;
102 +
  constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU;
103 +
104 +
  assert(input.width % InputComponentsPerMCU == 0);
105 +
  int numMCUs = input.width / InputComponentsPerMCU;
106 +
  assert(numMCUs > 1);
107 +
108 +
  using MCUTy = std::array<YCbCr, PixelsPerMCU>;
109 +
110 +
  auto LoadMCU = [&](int MCUIdx) {
111 +
    MCUTy MCU;
112 +
    for (int YIdx = 0; YIdx < PixelsPerMCU; ++YIdx)
113 +
      YCbCr::LoadY(&MCU[YIdx],
114 +
                   &input(row, InputComponentsPerMCU * MCUIdx + YIdx));
115 +
    YCbCr::LoadCbCr(&MCU[0],
116 +
                    &input(row, InputComponentsPerMCU * MCUIdx + YsPerMCU));
117 +
    return MCU;
118 +
  };
119 +
  auto StoreMCU = [&](const MCUTy& MCU, int MCUIdx) {
120 +
    for (int Pixel = 0; Pixel < PixelsPerMCU; ++Pixel) {
121 +
      YUV_TO_RGB<version>(MCU[Pixel],
122 +
                          &out(row, OutputComponentsPerMCU * MCUIdx +
123 +
                                        ComponentsPerPixel * Pixel));
124 +
    }
99 125
  };
100 -
  auto outCol = [](int pixel) { return 3 * pixel; };
101 126
102 127
  // The packed input format is:
103 128
  //   p0 p1 p0 p0     p2 p3 p2 p2
@@ -111,34 +136,29 @@
Loading
111 136
  // for last (odd) pixel of the line,  just keep Cb/Cr from previous pixel
112 137
  // see http://lclevy.free.fr/cr2/#sraw
113 138
114 -
  int pixel;
115 -
  for (pixel = 0; pixel < numPixels - 2; pixel += 2) {
116 -
    assert(pixel + 4 <= numPixels);
117 -
    assert(pixel % 2 == 0);
118 -
119 -
    // load, process and output first pixel, which is full
120 -
    YCbCr p0;
121 -
    YCbCr::LoadY(&p0, &input(row, inCol(pixel) + 0));
122 -
    YCbCr::LoadCbCr(&p0, &input(row, inCol(pixel) + 2));
123 -
    p0.process(hue);
124 -
    YUV_TO_RGB<version>(p0, &out(row, outCol(pixel)));
125 -
126 -
    // load Y from second pixel, Cb/Cr need to be interpolated
127 -
    YCbCr p;
128 -
    YCbCr::LoadY(&p, &input(row, inCol(pixel) + 1));
129 -
130 -
    // load Cb/Cr from third pixel, which is full
131 -
    YCbCr p1;
132 -
    YCbCr::LoadCbCr(&p1, &input(row, inCol(pixel + 2) + 2));
133 -
    p1.process(hue);
134 -
135 -
    // and finally, interpolate and output the middle pixel
136 -
    p.interpolateCbCr(p0, p1);
137 -
    YUV_TO_RGB<version>(p, &out(row, outCol(pixel + 1)));
139 +
  int MCUIdx;
140 +
  // Process all MCU's except the last one.
141 +
  for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) {
142 +
    assert(MCUIdx + 1 <= numMCUs);
143 +
144 +
    // For 4:2:2, one MCU encodes 2 pixels, and odd pixels need interpolation,
145 +
    // so we need to load three pixels, and thus we must load 2 MCU's.
146 +
    std::array<MCUTy, 2> MCUs;
147 +
    for (size_t SubMCUIdx = 0; SubMCUIdx < MCUs.size(); ++SubMCUIdx)
148 +
      MCUs[SubMCUIdx] = LoadMCU(MCUIdx + SubMCUIdx);
149 +
150 +
    // Process first pixel, which is full
151 +
    MCUs[0][0].process(hue);
152 +
    // Process third pixel, which is, again, full
153 +
    MCUs[1][0].process(hue);
154 +
    // Interpolate the middle pixel, for which only the Y was known.
155 +
    MCUs[0][1].interpolateCbCr(MCUs[0][0], MCUs[1][0]);
156 +
157 +
    // And finally, store the first MCU, i.e. first two pixels.
158 +
    StoreMCU(MCUs[0], MCUIdx);
138 159
  }
139 160
140 -
  assert(pixel + 2 == numPixels);
141 -
  assert(pixel % 2 == 0);
161 +
  assert(MCUIdx + 1 == numMCUs);
142 162
143 163
  // Last two pixels, the packed input format is:
144 164
  //      p0 p1 p0 p0
@@ -147,44 +167,69 @@
Loading
147 167
  //      p0             p1
148 168
  //  .. [ Y1 Cb  Cr  ] [ Y2 ... ... ]
149 169
150 -
  // load, process and output first pixel, which is full
151 -
  YCbCr p;
152 -
  YCbCr::LoadY(&p, &input(row, inCol(pixel) + 0));
153 -
  YCbCr::LoadCbCr(&p, &input(row, inCol(pixel) + 2));
154 -
  p.process(hue);
155 -
  YUV_TO_RGB<version>(p, &out(row, outCol(pixel)));
170 +
  MCUTy MCU = LoadMCU(MCUIdx);
156 171
157 -
  // load Y from second pixel, keep Cb/Cr from previous pixel, and output
158 -
  YCbCr::LoadY(&p, &input(row, inCol(pixel) + 1));
159 -
  YUV_TO_RGB<version>(p, &out(row, outCol(pixel + 1)));
172 +
  MCU[0].process(hue);
173 +
  YCbCr::CopyCbCr(&MCU[1], MCU[0]);
174 +
175 +
  StoreMCU(MCU, MCUIdx);
160 176
}
161 177
162 -
template <int version> inline void Cr2sRawInterpolator::interpolate_422() {
178 +
template <int version> void Cr2sRawInterpolator::interpolate_422() {
163 179
  const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef());
164 180
  assert(out.width > 0);
165 181
  assert(out.height > 0);
166 182
183 +
  // Benchmarking suggests that for real-world usage, it is not beneficial to
184 +
  // parallelize this, and in fact leads to worse performance.
167 185
  for (int row = 0; row < out.height; row++)
168 186
    interpolate_422_row<version>(row);
169 187
}
170 188
171 -
// NOTE: Not thread safe, since it writes inplace.
172 -
template <int version>
173 -
inline void Cr2sRawInterpolator::interpolate_420_row(int row) {
189 +
template <int version> void Cr2sRawInterpolator::interpolate_420_row(int row) {
174 190
  const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef());
175 191
176 -
  assert(row + 4 <= out.height);
177 -
  assert(row % 2 == 0);
178 -
179 -
  assert(out.width >= 6);
180 -
  assert(out.width % 6 == 0);
181 -
182 -
  int numPixels = out.width / 3;
183 -
  auto inCol = [](int pixel) {
184 -
    assert(pixel % 2 == 0);
185 -
    return 6 * pixel / 2;
192 +
  constexpr int X_S_F = 2;
193 +
  constexpr int Y_S_F = 2;
194 +
  constexpr int PixelsPerMCU = X_S_F * Y_S_F;
195 +
  constexpr int InputComponentsPerMCU = 2 + PixelsPerMCU;
196 +
197 +
  constexpr int YsPerMCU = PixelsPerMCU;
198 +
  constexpr int ComponentsPerPixel = 3;
199 +
  constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU;
200 +
201 +
  assert(input.width % InputComponentsPerMCU == 0);
202 +
  int numMCUs = input.width / InputComponentsPerMCU;
203 +
  assert(numMCUs > 1);
204 +
205 +
  using MCUTy = std::array<std::array<YCbCr, X_S_F>, Y_S_F>;
206 +
207 +
  auto LoadMCU = [&](int Row, int MCUIdx) __attribute__((always_inline)) {
208 +
    MCUTy MCU;
209 +
    for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
210 +
      for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
211 +
        YCbCr::LoadY(&MCU[MCURow][MCUCol],
212 +
                     &input(Row, InputComponentsPerMCU * MCUIdx +
213 +
                                     X_S_F * MCURow + MCUCol));
214 +
      }
215 +
    }
216 +
    YCbCr::LoadCbCr(&MCU[0][0],
217 +
                    &input(Row, InputComponentsPerMCU * MCUIdx + YsPerMCU));
218 +
    return MCU;
219 +
  };
220 +
  auto StoreMCU = [&](const MCUTy& MCU, int MCUIdx, int Row)
221 +
      __attribute__((always_inline)) {
222 +
    for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
223 +
      for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
224 +
        YUV_TO_RGB<version>(
225 +
            MCU[MCURow][MCUCol],
226 +
            &out(2 * Row + MCURow, ((OutputComponentsPerMCU * MCUIdx) / Y_S_F) +
227 +
                                       ComponentsPerPixel * MCUCol));
228 +
      }
229 +
    }
186 230
  };
187 -
  auto outCol = [](int pixel) { return 3 * pixel; };
231 +
232 +
  assert(row + 1 <= input.height);
188 233
189 234
  // The packed input format is:
190 235
  //          p0 p1 p2 p3 p0 p0     p4 p5 p6 p7 p4 p4
@@ -222,62 +267,47 @@
Loading
222 267
  //           .. .   .       .. .   .       .. .   .
223 268
  // see http://lclevy.free.fr/cr2/#sraw
224 269
225 -
  int pixel;
226 -
  for (pixel = 0; pixel < numPixels - 2; pixel += 2) {
227 -
    assert(pixel + 4 <= numPixels);
228 -
    assert(pixel % 2 == 0);
229 -
230 -
    // load, process and output first pixel of first row, which is full
231 -
    YCbCr p0;
232 -
    YCbCr::LoadY(&p0, &input(row / 2, inCol(pixel)));
233 -
    YCbCr::LoadCbCr(&p0, &input(row / 2, inCol(pixel) + 4));
234 -
    p0.process(hue);
235 -
    YUV_TO_RGB<version>(p0, &out(row, outCol(pixel)));
236 -
237 -
    // load Y from second pixel of first row
238 -
    YCbCr ph;
239 -
    YCbCr::LoadY(&ph, &input(row / 2, inCol(pixel) + 1));
240 -
241 -
    // load Cb/Cr from third pixel of first row
242 -
    YCbCr p1;
243 -
    YCbCr::LoadCbCr(&p1, &input(row / 2, inCol(pixel + 2) + 4));
244 -
    p1.process(hue);
245 -
246 -
    // and finally, interpolate and output the middle pixel of first row
247 -
    ph.interpolateCbCr(p0, p1);
248 -
    YUV_TO_RGB<version>(ph, &out(row, outCol(pixel + 1)));
249 -
250 -
    // load Y from first pixel of second row
251 -
    YCbCr pv;
252 -
    YCbCr::LoadY(&pv, &input(row / 2, inCol(pixel) + 2));
253 -
254 -
    // load Cb/Cr from first pixel of third row
255 -
    YCbCr p2;
256 -
    YCbCr::LoadCbCr(&p2, &input((row / 2) + 1, inCol(pixel) + 4));
257 -
    p2.process(hue);
258 -
259 -
    // and finally, interpolate and output the first pixel of second row
260 -
    pv.interpolateCbCr(p0, p2);
261 -
    YUV_TO_RGB<version>(pv, &out(row + 1, outCol(pixel)));
262 -
263 -
    // load Y from second pixel of second row
264 -
    YCbCr p;
265 -
    YCbCr::LoadY(&p, &input(row / 2, inCol(pixel) + 3));
266 -
267 -
    // load Cb/Cr from third pixel of third row
268 -
    YCbCr p3;
269 -
    YCbCr::LoadCbCr(&p3, &input((row / 2) + 1, inCol(pixel + 2) + 4));
270 -
    p3.process(hue);
271 -
272 -
    // and finally, interpolate and output the second pixel of second row
273 -
    // NOTE: we interpolate 4 full pixels here, located on diagonals
274 -
    // dcraw interpolates from already interpolated pixels
275 -
    p.interpolateCbCr(p0, p1, p2, p3);
276 -
    YUV_TO_RGB<version>(p, &out(row + 1, outCol(pixel + 1)));
270 +
  int MCUIdx;
271 +
  for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) {
272 +
    assert(MCUIdx + 1 <= numMCUs);
273 +
274 +
    // For 4:2:0, one MCU encodes 4 pixels (2x2), and odd pixels need
275 +
    // interpolation, so we need to load eight pixels,
276 +
    // and thus we must load 4 MCU's.
277 +
    std::array<std::array<MCUTy, 2>, 2> MCUs;
278 +
    for (int Row = 0; Row < 2; ++Row)
279 +
      for (int Col = 0; Col < 2; ++Col)
280 +
        MCUs[Row][Col] = LoadMCU(row + Row, MCUIdx + Col);
281 +
282 +
    // Process first pixels of MCU's, which are full
283 +
    for (int Row = 0; Row < 2; ++Row)
284 +
      for (int Col = 0; Col < 2; ++Col)
285 +
        MCUs[Row][Col][0][0].process(hue);
286 +
287 +
    // Interpolate the middle pixel of first row.
288 +
    MCUs[0][0][0][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0]);
289 +
290 +
    // Interpolate the first pixel of second row.
291 +
    MCUs[0][0][1][0].interpolateCbCr(MCUs[0][0][0][0], MCUs[1][0][0][0]);
292 +
293 +
    // Interpolate the second pixel of second row.
294 +
    MCUs[0][0][1][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0],
295 +
                                     MCUs[1][0][0][0], MCUs[1][1][0][0]);
296 +
297 +
    // FIXME: we should instead simply interpolate odd pixels on even rows
298 +
    //        and then even pixels on odd rows, as specified in the standard.
299 +
    // for (int Row = 0; Row < 2; ++Row)
300 +
    //   MCUs[Row][0][0][1].interpolateCbCr(MCUs[Row][0][0][0],
301 +
    //                                      MCUs[Row][1][0][0]);
302 +
    // for (int Col = 0; Col < 2; ++Col)
303 +
    //   MCUs[0][0][1][Col].interpolateCbCr(MCUs[0][0][0][Col],
304 +
    //                                      MCUs[1][0][0][Col]);
305 +
306 +
    // And finally, store the first MCU, i.e. first two pixels on two rows.
307 +
    StoreMCU(MCUs[0][0], MCUIdx, row);
277 308
  }
278 309
279 -
  assert(pixel + 2 == numPixels);
280 -
  assert(pixel % 2 == 0);
310 +
  assert(MCUIdx + 1 == numMCUs);
281 311
282 312
  // Last two pixels of the lines, the packed input format is:
283 313
  //              p0 p1 p2 p3 p0 p0
@@ -292,60 +322,74 @@
Loading
292 322
  //  row 3: ... [ Y3 ... ... ] [ Y4 ... ... ]
293 323
  //               .. .   .       .. .   .
294 324
295 -
  // load, process and output first pixel of first row, which is full
296 -
  YCbCr p0;
297 -
  YCbCr::LoadY(&p0, &input(row / 2, inCol(pixel)));
298 -
  YCbCr::LoadCbCr(&p0, &input(row / 2, inCol(pixel) + 4));
299 -
  p0.process(hue);
300 -
  YUV_TO_RGB<version>(p0, &out(row, outCol(pixel)));
301 -
302 -
  // keep Cb/Cr from first pixel of first row
303 -
  // load Y from second pixel of first row, output
304 -
  YCbCr::LoadY(&p0, &input(row / 2, inCol(pixel) + 1));
305 -
  YUV_TO_RGB<version>(p0, &out(row, outCol(pixel + 1)));
306 -
307 -
  // load Y from first pixel of second row
308 -
  YCbCr pv;
309 -
  YCbCr::LoadY(&pv, &input(row / 2, inCol(pixel) + 2));
310 -
311 -
  // load Cb/Cr from first pixel of third row
312 -
  YCbCr p2;
313 -
  YCbCr::LoadCbCr(&p2, &input((row / 2) + 1, inCol(pixel) + 4));
314 -
  p2.process(hue);
315 -
316 -
  // and finally, interpolate and output the first pixel of second row
317 -
  pv.interpolateCbCr(p0, p2);
318 -
  YUV_TO_RGB<version>(pv, &out(row + 1, outCol(pixel)));
319 -
320 -
  // keep Cb/Cr from first pixel of second row
321 -
  // load Y from second pixel of second row, output
322 -
  YCbCr::LoadY(&pv, &input(row / 2, inCol(pixel) + 3));
323 -
  YUV_TO_RGB<version>(pv, &out(row + 1, outCol(pixel + 1)));
324 -
}
325 +
  std::array<MCUTy, 2> MCUs;
326 +
  for (int Row = 0; Row < 2; ++Row)
327 +
    MCUs[Row] = LoadMCU(row + Row, MCUIdx);
325 328
326 -
// NOTE: Not thread safe, since it writes inplace.
327 -
template <int version> inline void Cr2sRawInterpolator::interpolate_420() {
328 -
  const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef());
329 +
  for (int Row = 0; Row < 2; ++Row)
330 +
    MCUs[Row][0][0].process(hue);
329 331
330 -
  assert(out.width >= 6);
331 -
  assert(out.width % 6 == 0);
332 +
  MCUs[0][1][0].interpolateCbCr(MCUs[0][0][0], MCUs[1][0][0]);
333 +
334 +
  for (int Row = 0; Row < 2; ++Row)
335 +
    YCbCr::CopyCbCr(&MCUs[0][Row][1], MCUs[0][Row][0]);
336 +
337 +
  StoreMCU(MCUs[0], MCUIdx, row);
338 +
}
332 339
333 -
  assert(out.height >= 2);
334 -
  assert(out.height % 2 == 0);
340 +
template <int version> void Cr2sRawInterpolator::interpolate_420() {
341 +
  const Array2DRef<uint16_t> out(mRaw->getU16DataAsUncroppedArray2DRef());
335 342
336 -
  int numPixels = out.width / 3;
337 -
  auto inCol = [](int pixel) {
338 -
    assert(pixel % 2 == 0);
339 -
    return 6 * pixel / 2;
343 +
  constexpr int X_S_F = 2;
344 +
  constexpr int Y_S_F = 2;
345 +
  constexpr int PixelsPerMCU = X_S_F * Y_S_F;
346 +
  constexpr int InputComponentsPerMCU = 2 + PixelsPerMCU;
347 +
348 +
  constexpr int YsPerMCU = PixelsPerMCU;
349 +
  constexpr int ComponentsPerPixel = 3;
350 +
  constexpr int OutputComponentsPerMCU = ComponentsPerPixel * PixelsPerMCU;
351 +
352 +
  assert(input.width % InputComponentsPerMCU == 0);
353 +
  int numMCUs = input.width / InputComponentsPerMCU;
354 +
  assert(numMCUs > 1);
355 +
356 +
  using MCUTy = std::array<std::array<YCbCr, X_S_F>, Y_S_F>;
357 +
358 +
  auto LoadMCU = [&](int Row, int MCUIdx) __attribute__((always_inline)) {
359 +
    MCUTy MCU;
360 +
    for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
361 +
      for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
362 +
        YCbCr::LoadY(&MCU[MCURow][MCUCol],
363 +
                     &input(Row, InputComponentsPerMCU * MCUIdx +
364 +
                                     X_S_F * MCURow + MCUCol));
365 +
      }
366 +
    }
367 +
    YCbCr::LoadCbCr(&MCU[0][0],
368 +
                    &input(Row, InputComponentsPerMCU * MCUIdx + YsPerMCU));
369 +
    return MCU;
370 +
  };
371 +
  auto StoreMCU = [&](const MCUTy& MCU, int MCUIdx, int Row)
372 +
      __attribute__((always_inline)) {
373 +
    for (int MCURow = 0; MCURow < Y_S_F; ++MCURow) {
374 +
      for (int MCUCol = 0; MCUCol < X_S_F; ++MCUCol) {
375 +
        YUV_TO_RGB<version>(
376 +
            MCU[MCURow][MCUCol],
377 +
            &out(2 * Row + MCURow, ((OutputComponentsPerMCU * MCUIdx) / Y_S_F) +
378 +
                                       ComponentsPerPixel * MCUCol));
379 +
      }
380 +
    }
340 381
  };
341 -
  auto outCol = [](int pixel) { return 3 * pixel; };
342 382
343 383
  int row;
344 -
  for (row = 0; row < out.height - 2; row += 2)
384 +
#ifdef HAVE_OPENMP
385 +
#pragma omp parallel for default(none) schedule(static)                        \
386 +
    num_threads(rawspeed_get_number_of_processor_cores())                      \
387 +
        OMPFIRSTPRIVATECLAUSE(out) lastprivate(row)
388 +
#endif
389 +
  for (row = 0; row < input.height - 1; ++row)
345 390
    interpolate_420_row<version>(row);
346 391
347 -
  assert(row + 2 == out.height);
348 -
  assert(row % 2 == 0);
392 +
  assert(row + 1 == input.height);
349 393
350 394
  // Last two lines, the packed input format is:
351 395
  //          p0 p1 p2 p3 p0 p0     p4 p5 p6 p7 p4 p4
@@ -357,47 +401,36 @@
Loading
357 401
  //  row 0: [ Y1 Cb  Cr  ] [ Y2 ... ... ] [ Y1 Cb  Cr  ] [ Y2 ... ... ] ...
358 402
  //  row 1: [ Y3 ... ... ] [ Y4 ... ... ] [ Y3 ... ... ] [ Y4 ... ... ] ...
359 403
360 -
  int pixel;
361 -
  for (pixel = 0; pixel < numPixels - 2; pixel += 2) {
362 -
    assert(pixel + 4 <= numPixels);
363 -
    assert(pixel % 2 == 0);
364 -
365 -
    // load, process and output first pixel of first row, which is full
366 -
    YCbCr p0;
367 -
    YCbCr::LoadY(&p0, &input(row / 2, inCol(pixel)));
368 -
    YCbCr::LoadCbCr(&p0, &input(row / 2, inCol(pixel) + 4));
369 -
    p0.process(hue);
370 -
    YUV_TO_RGB<version>(p0, &out(row, outCol(pixel)));
371 -
372 -
    // load Y from second pixel of first row
373 -
    YCbCr ph;
374 -
    YCbCr::LoadY(&ph, &input(row / 2, inCol(pixel) + 1));
375 -
376 -
    // load Cb/Cr from third pixel of first row
377 -
    YCbCr p1;
378 -
    YCbCr::LoadCbCr(&p1, &input(row / 2, inCol(pixel + 2) + 4));
379 -
    p1.process(hue);
380 -
381 -
    // and finally, interpolate and output the middle pixel of first row
382 -
    ph.interpolateCbCr(p0, p1);
383 -
    YUV_TO_RGB<version>(ph, &out(row, outCol(pixel + 1)));
384 -
385 -
    // keep Cb/Cr from first pixel of first row
386 -
    // load Y from first pixel of second row; and output
387 -
    YCbCr::LoadY(&p0, &input(row / 2, inCol(pixel) + 2));
388 -
    YUV_TO_RGB<version>(p0, &out(row + 1, outCol(pixel)));
389 -
390 -
    // keep Cb/Cr from second pixel of first row
391 -
    // load Y from second pixel of second row; and output
392 -
    YCbCr::LoadY(&ph, &input(row / 2, inCol(pixel) + 3));
393 -
    YUV_TO_RGB<version>(ph, &out(row + 1, outCol(pixel + 1)));
404 +
  int MCUIdx;
405 +
  for (MCUIdx = 0; MCUIdx < numMCUs - 1; ++MCUIdx) {
406 +
    assert(MCUIdx + 1 < numMCUs);
407 +
408 +
    // For 4:2:0, one MCU encodes 4 pixels (2x2), and odd pixels need
409 +
    // interpolation, so we need to load eight pixels,
410 +
    // and thus we must load 4 MCU's.
411 +
    std::array<std::array<MCUTy, 2>, 1> MCUs;
412 +
    for (int Row = 0; Row < 1; ++Row)
413 +
      for (int Col = 0; Col < 2; ++Col)
414 +
        MCUs[Row][Col] = LoadMCU(row + Row, MCUIdx + Col);
415 +
416 +
    // Process first pixels of MCU's, which are full
417 +
    for (int Row = 0; Row < 1; ++Row)
418 +
      for (int Col = 0; Col < 2; ++Col)
419 +
        MCUs[Row][Col][0][0].process(hue);
420 +
421 +
    // Interpolate the middle pixel of first row.
422 +
    MCUs[0][0][0][1].interpolateCbCr(MCUs[0][0][0][0], MCUs[0][1][0][0]);
423 +
424 +
    // Copy Cb/Cr to the first two pixels of second row from the two pixels
425 +
    // of first row.
426 +
    for (int Col = 0; Col < 2; ++Col)
427 +
      YCbCr::CopyCbCr(&MCUs[0][0][1][Col], MCUs[0][0][0][Col]);
428 +
429 +
    // And finally, store the first MCU, i.e. first two pixels on two rows.
430 +
    StoreMCU(MCUs[0][0], MCUIdx, row);
394 431
  }
395 432
396 -
  assert(row + 2 == out.height);
397 -
  assert(row % 2 == 0);
398 -
399 -
  assert(pixel + 2 == numPixels);
400 -
  assert(pixel % 2 == 0);
433 +
  assert(MCUIdx + 1 == numMCUs);
401 434
402 435
  // Last two pixels of last two lines, the packed input format is:
403 436
  //              p0 p1 p2 p3 p0 p0
@@ -409,26 +442,16 @@
Loading
409 442
  //  row 0:  ... [ Y1 Cb  Cr  ] [ Y2 ... ... ]
410 443
  //  row 1:  ... [ Y3 ... ... ] [ Y4 ... ... ]
411 444
412 -
  // load, process and output first pixel of first row, which is full
413 -
  YCbCr p;
414 -
  YCbCr::LoadY(&p, &input(row / 2, inCol(pixel)));
415 -
  YCbCr::LoadCbCr(&p, &input(row / 2, inCol(pixel) + 4));
416 -
  p.process(hue);
417 -
  YUV_TO_RGB<version>(p, &out(row, outCol(pixel)));
418 -
419 -
  // rest keeps Cb/Cr from this original pixel, because rest only have Y
445 +
  MCUTy MCU = LoadMCU(row, MCUIdx);
420 446
421 -
  // load Y from second pixel of first row, and output
422 -
  YCbCr::LoadY(&p, &input(row / 2, inCol(pixel) + 1));
423 -
  YUV_TO_RGB<version>(p, &out(row, outCol(pixel + 1)));
447 +
  MCU[0][0].process(hue);
424 448
425 -
  // load Y from first pixel of second row, and output
426 -
  YCbCr::LoadY(&p, &input(row / 2, inCol(pixel) + 2));
427 -
  YUV_TO_RGB<version>(p, &out(row + 1, outCol(pixel)));
449 +
  // Distribute the same Cb/Cr to all four pixels.
450 +
  for (int Row = 0; Row < 2; ++Row)
451 +
    for (int Col = 0; Col < 2; ++Col)
452 +
      YCbCr::CopyCbCr(&MCU[Row][Col], MCU[0][0]);
428 453
429 -
  // load Y from second pixel of second row, and output
430 -
  YCbCr::LoadY(&p, &input(row / 2, inCol(pixel) + 3));
431 -
  YUV_TO_RGB<version>(p, &out(row + 1, outCol(pixel + 1)));
454 +
  StoreMCU(MCU, MCUIdx, row);
432 455
}
433 456
434 457
inline void Cr2sRawInterpolator::STORE_RGB(uint16_t* X, int r, int g, int b) {

@@ -49,11 +49,11 @@
Loading
49 49
50 50
  static inline void STORE_RGB(uint16_t* X, int r, int g, int b);
51 51
52 -
  template <int version> inline void interpolate_422_row(int row);
53 -
  template <int version> inline void interpolate_422();
52 +
  template <int version> void interpolate_422_row(int row);
53 +
  template <int version> void interpolate_422();
54 54
55 -
  template <int version> inline void interpolate_420_row(int row);
56 -
  template <int version> inline void interpolate_420();
55 +
  template <int version> void interpolate_420_row(int row);
56 +
  template <int version> void interpolate_420();
57 57
};
58 58
59 59
} // namespace rawspeed
Files Coverage
fuzz 1.10%
src 58.88%
test/librawspeed 62.64%
Project Totals (214 files) 57.18%
1232.4
CXX=g++-10
PLATFORM=linux
TRAVIS_OS_NAME=linux
integration rpu_u
1232.4
CXX=g++-10
PLATFORM=linux
TRAVIS_OS_NAME=linux
unittests
1
codecov:
2
  notify:
3
    require_ci_to_pass: true
4
coverage:
5
  precision: 2
6
  range: "0...100"
7
  round: down
8
  status:
9
    changes: false
10
    patch: false
11
    project: false
12
  ignore:
13
    - test/.*
14
  notify:
15
    irc:
16
      default:
17
        server: "chat.freenode.net"
18
        channel: "#rawspeed"
19
parsers:
20
  gcov:
21
    branch_detection:
22
      conditional: true
23
      loop: true
24
      macro: false
25
      method: false
26
comment:
27
  behavior: default
28
  layout: header, diff
29
  require_changes: false
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading