OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_codestream_avx2.cpp
Go to the documentation of this file.
1 //***************************************************************************/
2 // This software is released under the 2-Clause BSD license, included
3 // below.
4 //
5 // Copyright (c) 2022, Aous Naman
6 // Copyright (c) 2022, Kakadu Software Pty Ltd, Australia
7 // Copyright (c) 2022, The University of New South Wales, Australia
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 // 1. Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //
16 // 2. Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //***************************************************************************/
32 // This file is part of the OpenJPH software implementation.
33 // File: ojph_codestream_avx2.cpp
34 // Author: Aous Naman
35 // Date: 15 May 2022
36 //***************************************************************************/
37 
38 #include <immintrin.h>
39 #include "ojph_defs.h"
40 
41 namespace ojph {
42  namespace local {
43 
46  {
47  __m128i x0 = _mm_loadu_si128((__m128i*)address);
48  __m128i x1 = _mm_loadu_si128((__m128i*)address + 1);
49  x0 = _mm_or_si128(x0, x1);
50  x1 = _mm_shuffle_epi32(x0, 0xEE); // x1 = x0[2,3,2,3]
51  x0 = _mm_or_si128(x0, x1);
52  x1 = _mm_shuffle_epi32(x0, 0x55); // x1 = x0[1,1,1,1]
53  x0 = _mm_or_si128(x0, x1);
54  ui32 t = (ui32)_mm_extract_epi32(x0, 0);
55  return t;
56  }
57 
59  void avx2_rev_tx_to_cb(const void *sp, ui32 *dp, ui32 K_max,
60  float delta_inv, ui32 count, ui32* max_val)
61  {
62  ojph_unused(delta_inv);
63 
64  // convert to sign and magnitude and keep max_val
65  ui32 shift = 31 - K_max;
66  __m256i m0 = _mm256_set1_epi32((int)0x80000000);
67  __m256i tmax = _mm256_loadu_si256((__m256i*)max_val);
68  __m256i *p = (__m256i*)sp;
69  for (ui32 i = 0; i < count; i += 8, p += 1, dp += 8)
70  {
71  __m256i v = _mm256_loadu_si256(p);
72  __m256i sign = _mm256_and_si256(v, m0);
73  __m256i val = _mm256_abs_epi32(v);
74  val = _mm256_slli_epi32(val, (int)shift);
75  tmax = _mm256_or_si256(tmax, val);
76  val = _mm256_or_si256(val, sign);
77  _mm256_storeu_si256((__m256i*)dp, val);
78  }
79  _mm256_storeu_si256((__m256i*)max_val, tmax);
80  }
81 
83  void avx2_irv_tx_to_cb(const void *sp, ui32 *dp, ui32 K_max,
84  float delta_inv, ui32 count, ui32* max_val)
85  {
86  ojph_unused(K_max);
87 
88  //quantize and convert to sign and magnitude and keep max_val
89  __m256 d = _mm256_set1_ps(delta_inv);
90  __m256i m0 = _mm256_set1_epi32((int)0x80000000);
91  __m256i tmax = _mm256_loadu_si256((__m256i*)max_val);
92  float *p = (float*)sp;
93 
94  for (ui32 i = 0; i < count; i += 8, p += 8, dp += 8)
95  {
96  __m256 vf = _mm256_loadu_ps(p);
97  vf = _mm256_mul_ps(vf, d); // multiply
98  __m256i val = _mm256_cvtps_epi32(vf); // convert to int
99  __m256i sign = _mm256_and_si256(val, m0); // get sign
100  val = _mm256_abs_epi32(val);
101  tmax = _mm256_or_si256(tmax, val);
102  val = _mm256_or_si256(val, sign);
103  _mm256_storeu_si256((__m256i*)dp, val);
104  }
105  _mm256_storeu_si256((__m256i*)max_val, tmax);
106  }
107 
109  void avx2_rev_tx_from_cb(const ui32 *sp, void *dp, ui32 K_max,
110  float delta, ui32 count)
111  {
112  ojph_unused(delta);
113  ui32 shift = 31 - K_max;
114  __m256i m1 = _mm256_set1_epi32(0x7FFFFFFF);
115  si32 *p = (si32*)dp;
116  for (ui32 i = 0; i < count; i += 8, sp += 8, p += 8)
117  {
118  __m256i v = _mm256_load_si256((__m256i*)sp);
119  __m256i val = _mm256_and_si256(v, m1);
120  val = _mm256_srli_epi32(val, (int)shift);
121  val = _mm256_sign_epi32(val, v);
122  _mm256_storeu_si256((__m256i*)p, val);
123  }
124  }
125 
127  void avx2_irv_tx_from_cb(const ui32 *sp, void *dp, ui32 K_max,
128  float delta, ui32 count)
129  {
130  ojph_unused(K_max);
131  __m256i m1 = _mm256_set1_epi32(0x7FFFFFFF);
132  __m256 d = _mm256_set1_ps(delta);
133  float *p = (float*)dp;
134  for (ui32 i = 0; i < count; i += 8, sp += 8, p += 8)
135  {
136  __m256i v = _mm256_load_si256((__m256i*)sp);
137  __m256i vali = _mm256_and_si256(v, m1);
138  __m256 valf = _mm256_cvtepi32_ps(vali);
139  valf = _mm256_mul_ps(valf, d);
140  __m256i sign = _mm256_andnot_si256(m1, v);
141  valf = _mm256_or_ps(valf, _mm256_castsi256_ps(sign));
142  _mm256_storeu_ps(p, valf);
143  }
144  }
145  }
146 }
ui32 avx2_find_max_val(ui32 *address)
void avx2_irv_tx_from_cb(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void avx2_rev_tx_to_cb(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void avx2_rev_tx_from_cb(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void avx2_irv_tx_to_cb(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
int32_t si32
Definition: ojph_defs.h:55
uint32_t ui32
Definition: ojph_defs.h:54
#define ojph_unused(x)
Definition: ojph_defs.h:78