OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_transform.cpp
Go to the documentation of this file.
1 //***************************************************************************/
2 // This software is released under the 2-Clause BSD license, included
3 // below.
4 //
5 // Copyright (c) 2019, Aous Naman
6 // Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7 // Copyright (c) 2019, The University of New South Wales, Australia
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 // 1. Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //
16 // 2. Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //***************************************************************************/
32 // This file is part of the OpenJPH software implementation.
33 // File: ojph_transform.cpp
34 // Author: Aous Naman
35 // Date: 28 August 2019
36 //***************************************************************************/
37 
38 #include <cstdio>
39 
40 #include "ojph_arch.h"
41 #include "ojph_mem.h"
42 #include "ojph_transform.h"
43 #include "ojph_transform_local.h"
44 
45 namespace ojph {
46  struct line_buf;
47 
48  namespace local {
49 
51  // Reversible functions
53 
56  (const line_buf* src1, const line_buf* src2, line_buf *dst,
57  ui32 repeat)
59 
62  (const line_buf* src1, const line_buf* src2, line_buf *dst,
63  ui32 repeat)
65 
68  (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
70 
73  (const line_buf* src1, const line_buf* src2, line_buf *dst,
74  ui32 repeat)
76 
79  (const line_buf* src1, const line_buf* src2, line_buf *dst,
80  ui32 repeat)
82 
85  (line_buf* dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even)
87 
89  // Irreversible functions
91 
94  (const line_buf* src1, const line_buf* src2, line_buf *dst,
95  int step_num, ui32 repeat)
97 
100  (const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis,
101  ui32 repeat)
103 
106  (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
108 
111  (line_buf* src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
113 
116 
119  {
121  return;
122 
133 
134 #ifndef OJPH_DISABLE_INTEL_SIMD
135  int level = cpu_ext_level();
136 
137  if (level >= 2)
138  {
143  }
144 
145  if (level >= 3)
146  {
153  }
154 
155  if (level >= 7)
156  {
161  }
162 
163  if (level >= 8)
164  {
171  }
172 #endif
173 
175  }
176 
178  const float LIFTING_FACTORS::steps[8] =
179  {
180  -1.586134342059924f, -0.052980118572961f, +0.882911075530934f,
181  +0.443506852043971f,
182  +1.586134342059924f, +0.052980118572961f, -0.882911075530934f,
183  -0.443506852043971f
184  };
185  const float LIFTING_FACTORS::K = 1.230174104914001f;
186  const float LIFTING_FACTORS::K_inv = (float)(1.0 / 1.230174104914001);
187 
190  const line_buf* line_src2,
191  line_buf *line_dst, ui32 repeat)
192  {
193  si32 *dst = line_dst->i32;
194  const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
195  for (ui32 i = repeat; i > 0; --i)
196  *dst++ -= (*src1++ + *src2++) >> 1;
197  }
198 
200  void gen_rev_vert_wvlt_fwd_update(const line_buf* line_src1,
201  const line_buf* line_src2,
202  line_buf *line_dst, ui32 repeat)
203  {
204  si32 *dst = line_dst->i32;
205  const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
206  for (ui32 i = repeat; i > 0; --i)
207  *dst++ += (*src1++ + *src2++ + 2) >> 2;
208  }
209 
211  void gen_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst,
212  line_buf *line_hdst, ui32 width, bool even)
213  {
214  if (width > 1)
215  {
216  si32 *src = line_src->i32;
217  si32 *ldst = line_ldst->i32, *hdst = line_hdst->i32;
218 
219  const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
220  const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
221 
222  // extension
223  src[-1] = src[1];
224  src[width] = src[width-2];
225  // predict
226  const si32* sp = src + (even ? 1 : 0);
227  si32 *dph = hdst;
228  for (ui32 i = H_width; i > 0; --i, sp+=2)
229  *dph++ = sp[0] - ((sp[-1] + sp[1]) >> 1);
230 
231  // extension
232  hdst[-1] = hdst[0];
233  hdst[H_width] = hdst[H_width-1];
234  // update
235  sp = src + (even ? 0 : 1);
236  const si32* sph = hdst + (even ? 0 : 1);
237  si32 *dpl = ldst;
238  for (ui32 i = L_width; i > 0; --i, sp+=2, sph++)
239  *dpl++ = *sp + ((2 + sph[-1] + sph[0]) >> 2);
240  }
241  else
242  {
243  if (even)
244  line_ldst->i32[0] = line_src->i32[0];
245  else
246  line_hdst->i32[0] = line_src->i32[0] << 1;
247  }
248  }
249 
252  const line_buf* line_src2,
253  line_buf *line_dst, ui32 repeat)
254  {
255  si32 *dst = line_dst->i32;
256  const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
257  for (ui32 i = repeat; i > 0; --i)
258  *dst++ += (*src1++ + *src2++) >> 1;
259  }
260 
262  void gen_rev_vert_wvlt_bwd_update(const line_buf* line_src1,
263  const line_buf* line_src2,
264  line_buf *line_dst, ui32 repeat)
265  {
266  si32 *dst = line_dst->i32;
267  const si32 *src1 = line_src1->i32, *src2 = line_src2->i32;
268  for (ui32 i = repeat; i > 0; --i)
269  *dst++ -= (2 + *src1++ + *src2++) >> 2;
270  }
271 
273  void gen_rev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc,
274  line_buf *line_hsrc, ui32 width, bool even)
275  {
276  if (width > 1)
277  {
278  si32 *lsrc = line_lsrc->i32, *hsrc = line_hsrc->i32;
279  si32 *dst = line_dst->i32;
280 
281  const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
282  const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
283 
284  // extension
285  hsrc[-1] = hsrc[0];
286  hsrc[H_width] = hsrc[H_width-1];
287  //inverse update
288  const si32 *sph = hsrc + (even ? 0 : 1);
289  si32 *spl = lsrc;
290  for (ui32 i = L_width; i > 0; --i, sph++, spl++)
291  *spl -= ((2 + sph[-1] + sph[0]) >> 2);
292 
293  // extension
294  lsrc[-1] = lsrc[0];
295  lsrc[L_width] = lsrc[L_width - 1];
296  // inverse predict and combine
297  si32 *dp = dst + (even ? 0 : -1);
298  spl = lsrc + (even ? 0 : -1);
299  sph = hsrc;
300  for (ui32 i = L_width + (even ? 0 : 1); i > 0; --i, spl++, sph++)
301  {
302  *dp++ = *spl;
303  *dp++ = *sph + ((spl[0] + spl[1]) >> 1);
304  }
305  }
306  else
307  {
308  if (even)
309  line_dst->i32[0] = line_lsrc->i32[0];
310  else
311  line_dst->i32[0] = line_hsrc->i32[0] >> 1;
312  }
313  }
314 
315 
317  void gen_irrev_vert_wvlt_step(const line_buf* line_src1,
318  const line_buf* line_src2,
319  line_buf *line_dst,
320  int step_num, ui32 repeat)
321  {
322  float *dst = line_dst->f32;
323  const float *src1 = line_src1->f32, *src2 = line_src2->f32;
324  float factor = LIFTING_FACTORS::steps[step_num];
325  for (ui32 i = repeat; i > 0; --i)
326  *dst++ += factor * (*src1++ + *src2++);
327  }
328 
330  void gen_irrev_vert_wvlt_K(const line_buf* line_src,
331  line_buf* line_dst,
332  bool L_analysis_or_H_synthesis, ui32 repeat)
333  {
334  float *dst = line_dst->f32;
335  const float *src = line_src->f32;
336  float factor = LIFTING_FACTORS::K_inv;
337  factor = L_analysis_or_H_synthesis ? factor : LIFTING_FACTORS::K;
338  for (ui32 i = repeat; i > 0; --i)
339  *dst++ = *src++ * factor;
340  }
341 
342 
345  line_buf *line_ldst,
346  line_buf *line_hdst,
347  ui32 width, bool even)
348  {
349  if (width > 1)
350  {
351  float *src = line_src->f32;
352  float *ldst = line_ldst->f32, *hdst = line_hdst->f32;
353 
354  const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
355  const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
356 
357  //extension
358  src[-1] = src[1];
359  src[width] = src[width-2];
360  // predict
361  float factor = LIFTING_FACTORS::steps[0];
362  const float* sp = src + (even ? 1 : 0);
363  float *dph = hdst;
364  for (ui32 i = H_width; i > 0; --i, sp+=2)
365  *dph++ = sp[0] + factor * (sp[-1] + sp[1]);
366 
367  // extension
368  hdst[-1] = hdst[0];
369  hdst[H_width] = hdst[H_width-1];
370  // update
371  factor = LIFTING_FACTORS::steps[1];
372  sp = src + (even ? 0 : 1);
373  const float* sph = hdst + (even ? 0 : 1);
374  float *dpl = ldst;
375  for (ui32 i = L_width; i > 0; --i, sp+=2, sph++)
376  *dpl++ = sp[0] + factor * (sph[-1] + sph[0]);
377 
378  //extension
379  ldst[-1] = ldst[0];
380  ldst[L_width] = ldst[L_width-1];
381  //predict
382  factor = LIFTING_FACTORS::steps[2];
383  const float* spl = ldst + (even ? 1 : 0);
384  dph = hdst;
385  for (ui32 i = H_width; i > 0; --i, spl++)
386  *dph++ += factor * (spl[-1] + spl[0]);
387 
388  // extension
389  hdst[-1] = hdst[0];
390  hdst[H_width] = hdst[H_width-1];
391  // update
392  factor = LIFTING_FACTORS::steps[3];
393  sph = hdst + (even ? 0 : 1);
394  dpl = ldst;
395  for (ui32 i = L_width; i > 0; --i, sph++)
396  *dpl++ += factor * (sph[-1] + sph[0]);
397 
398  //multipliers
399  float *dp = ldst;
400  for (ui32 i = L_width; i > 0; --i, dp++)
401  *dp *= LIFTING_FACTORS::K_inv;
402  dp = hdst;
403  for (ui32 i = H_width; i > 0; --i, dp++)
404  *dp *= LIFTING_FACTORS::K;
405  }
406  else
407  {
408  if (even)
409  line_ldst->f32[0] = line_src->f32[0];
410  else
411  line_hdst->f32[0] = line_src->f32[0] + line_src->f32[0];
412  }
413  }
414 
416  void gen_irrev_horz_wvlt_bwd_tx(line_buf* line_dst, line_buf *line_lsrc,
417  line_buf *line_hsrc, ui32 width,
418  bool even)
419  {
420  if (width > 1)
421  {
422  float *lsrc = line_lsrc->f32, *hsrc = line_hsrc->f32;
423  float *dst = line_dst->f32;
424 
425  const ui32 L_width = (width + (even ? 1 : 0)) >> 1;
426  const ui32 H_width = (width + (even ? 0 : 1)) >> 1;
427 
428  //multipliers
429  float *dp = lsrc;
430  for (ui32 i = L_width; i > 0; --i, dp++)
431  *dp *= LIFTING_FACTORS::K;
432  dp = hsrc;
433  for (ui32 i = H_width; i > 0; --i, dp++)
434  *dp *= LIFTING_FACTORS::K_inv;
435 
436  //extension
437  hsrc[-1] = hsrc[0];
438  hsrc[H_width] = hsrc[H_width-1];
439  //inverse update
440  float factor = LIFTING_FACTORS::steps[7];
441  const float *sph = hsrc + (even ? 0 : 1);
442  float *dpl = lsrc;
443  for (ui32 i = L_width; i > 0; --i, dpl++, sph++)
444  *dpl += factor * (sph[-1] + sph[0]);
445 
446  //extension
447  lsrc[-1] = lsrc[0];
448  lsrc[L_width] = lsrc[L_width-1];
449  //inverse perdict
450  factor = LIFTING_FACTORS::steps[6];
451  const float *spl = lsrc + (even ? 0 : -1);
452  float *dph = hsrc;
453  for (ui32 i = H_width; i > 0; --i, dph++, spl++)
454  *dph += factor * (spl[0] + spl[1]);
455 
456  //extension
457  hsrc[-1] = hsrc[0];
458  hsrc[H_width] = hsrc[H_width-1];
459  //inverse update
460  factor = LIFTING_FACTORS::steps[5];
461  sph = hsrc + (even ? 0 : 1);
462  dpl = lsrc;
463  for (ui32 i = L_width; i > 0; --i, dpl++, sph++)
464  *dpl += factor * (sph[-1] + sph[0]);
465 
466  //extension
467  lsrc[-1] = lsrc[0];
468  lsrc[L_width] = lsrc[L_width-1];
469  //inverse perdict and combine
470  factor = LIFTING_FACTORS::steps[4];
471  dp = dst + (even ? 0 : -1);
472  spl = lsrc + (even ? 0 : -1);
473  sph = hsrc;
474  for (ui32 i = L_width+(even?0:1); i > 0; --i, spl++, sph++)
475  {
476  *dp++ = *spl;
477  *dp++ = *sph + factor * (spl[0] + spl[1]);
478  }
479  }
480  else
481  {
482  if (even)
483  line_dst->f32[0] = line_lsrc->f32[0];
484  else
485  line_dst->f32[0] = line_hsrc->f32[0] * 0.5f;
486  }
487  }
488  }
489 }
void avx2_rev_vert_wvlt_fwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void sse_irrev_horz_wvlt_bwd_tx(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void gen_rev_vert_wvlt_bwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void gen_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void sse2_rev_horz_wvlt_fwd_tx(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void avx_irrev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void avx2_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void gen_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void(* irrev_horz_wvlt_fwd_tx)(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void avx2_rev_vert_wvlt_fwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void avx_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void sse2_rev_vert_wvlt_fwd_update(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* rev_horz_wvlt_bwd_tx)(line_buf *dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_wvlt_bwd_tx(line_buf *dst, line_buf *lsrc, line_buf *hsrc, ui32 width, bool even)
void gen_rev_vert_wvlt_fwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
static bool wavelet_transform_functions_initialized
void avx_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void avx2_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void init_wavelet_transform_functions()
void(* rev_vert_wvlt_fwd_update)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void avx_irrev_vert_wvlt_step(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, int step_num, ui32 repeat)
void sse_irrev_horz_wvlt_fwd_tx(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void sse2_rev_vert_wvlt_bwd_predict(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void gen_rev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void(* rev_horz_wvlt_fwd_tx)(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void(* irrev_vert_wvlt_step)(const line_buf *src1, const line_buf *src2, line_buf *dst, int step_num, ui32 repeat)
void gen_irrev_vert_wvlt_K(const line_buf *line_src, line_buf *line_dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void gen_irrev_horz_wvlt_fwd_tx(line_buf *line_src, line_buf *line_ldst, line_buf *line_hdst, ui32 width, bool even)
void sse2_rev_vert_wvlt_fwd_predict(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* rev_vert_wvlt_bwd_update)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void avx2_rev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void gen_irrev_horz_wvlt_bwd_tx(line_buf *line_dst, line_buf *line_lsrc, line_buf *line_hsrc, ui32 width, bool even)
void(* rev_vert_wvlt_bwd_predict)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* rev_vert_wvlt_fwd_predict)(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
void(* irrev_horz_wvlt_bwd_tx)(line_buf *src, line_buf *ldst, line_buf *hdst, ui32 width, bool even)
void sse_irrev_vert_wvlt_K(const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void(* irrev_vert_wvlt_K)(const line_buf *src, line_buf *dst, bool L_analysis_or_H_synthesis, ui32 repeat)
void sse_irrev_vert_wvlt_step(const line_buf *src1, const line_buf *src2, line_buf *dst, int step_num, ui32 repeat)
void gen_rev_vert_wvlt_bwd_predict(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void avx2_rev_vert_wvlt_bwd_update(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, ui32 repeat)
void gen_irrev_vert_wvlt_step(const line_buf *line_src1, const line_buf *line_src2, line_buf *line_dst, int step_num, ui32 repeat)
void sse2_rev_vert_wvlt_bwd_update(const line_buf *src1, const line_buf *src2, line_buf *dst, ui32 repeat)
int cpu_ext_level()
Definition: ojph_arch.cpp:170
int32_t si32
Definition: ojph_defs.h:55
uint32_t ui32
Definition: ojph_defs.h:54
float * f32
Definition: ojph_mem.h:156
si32 * i32
Definition: ojph_mem.h:155