OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_block_decoder.cpp
Go to the documentation of this file.
1 //***************************************************************************/
2 // This software is released under the 2-Clause BSD license, included
3 // below.
4 //
5 // Copyright (c) 2019, Aous Naman
6 // Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7 // Copyright (c) 2019, The University of New South Wales, Australia
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 // 1. Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //
16 // 2. Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //***************************************************************************/
32 // This file is part of the OpenJPH software implementation.
33 // File: ojph_block_decoder.cpp
34 // Author: Aous Naman
35 // Date: 13 May 2022
36 //***************************************************************************/
37 
38 //***************************************************************************/
43 #include <string>
44 #include <iostream>
45 
46 #include <cassert>
47 #include <cstring>
48 #include "ojph_block_common.h"
49 #include "ojph_block_decoder.h"
50 #include "ojph_arch.h"
51 #include "ojph_message.h"
52 
53 namespace ojph {
54  namespace local {
55 
56  //************************************************************************/
63  struct dec_mel_st {
64  dec_mel_st() : data(NULL), tmp(0), bits(0), size(0), unstuff(false),
65  k(0), num_runs(0), runs(0)
66  {}
67  // data decoding machinary
68  ui8* data;
70  int bits;
71  int size;
72  bool unstuff;
73  int k;
74 
75  // queue of decoded runs
76  int num_runs;
78  };
79 
80  //************************************************************************/
92  static inline
93  void mel_read(dec_mel_st *melp)
94  {
95  if (melp->bits > 32) //there are enough bits in the tmp variable
96  return; // return without reading new data
97 
98  ui32 val = 0xFFFFFFFF; // feed in 0xFF if buffer is exhausted
99  if (melp->size > 4) { // if there is data in the MEL segment
100  val = *(ui32*)melp->data; // read 32 bits from MEL data
101  melp->data += 4; // advance pointer
102  melp->size -= 4; // reduce counter
103  }
104  else if (melp->size > 0)
105  { // 4 or less
106  int i = 0;
107  while (melp->size > 1) {
108  ui32 v = *melp->data++; // read one byte at a time
109  ui32 m = ~(0xFFu << i); // mask of location
110  val = (val & m) | (v << i);// put one byte in its correct location
111  --melp->size;
112  i += 8;
113  }
114  // size equal to 1
115  ui32 v = *melp->data++; // the one before the last is different
116  v |= 0xF; // MEL and VLC segments can overlap
117  ui32 m = ~(0xFFu << i);
118  val = (val & m) | (v << i);
119  --melp->size;
120  }
121 
122  // next we unstuff them before adding them to the buffer
123  int bits = 32 - melp->unstuff; // number of bits in val, subtract 1 if
124  // the previously read byte requires
125  // unstuffing
126 
127  // data is unstuffed and accumulated in t
128  // bits has the number of bits in t
129  ui32 t = val & 0xFF;
130  bool unstuff = ((val & 0xFF) == 0xFF); // true if we need unstuffing
131  bits -= unstuff; // there is one less bit in t if unstuffing is needed
132  t = t << (8 - unstuff); // move up to make room for the next byte
133 
134  //this is a repeat of the above
135  t |= (val>>8) & 0xFF;
136  unstuff = (((val >> 8) & 0xFF) == 0xFF);
137  bits -= unstuff;
138  t = t << (8 - unstuff);
139 
140  t |= (val>>16) & 0xFF;
141  unstuff = (((val >> 16) & 0xFF) == 0xFF);
142  bits -= unstuff;
143  t = t << (8 - unstuff);
144 
145  t |= (val>>24) & 0xFF;
146  melp->unstuff = (((val >> 24) & 0xFF) == 0xFF);
147 
148  // move t to tmp, and push the result all the way up, so we read from
149  // the MSB
150  melp->tmp |= ((ui64)t) << (64 - bits - melp->bits);
151  melp->bits += bits; //increment the number of bits in tmp
152  }
153 
154  //************************************************************************/
169  static inline
170  void mel_decode(dec_mel_st *melp)
171  {
172  static const int mel_exp[13] = { //MEL exponents
173  0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5
174  };
175 
176  if (melp->bits < 6) // if there are less than 6 bits in tmp
177  mel_read(melp); // then read from the MEL bitstream
178  // 6 bits is the largest decodable MEL cwd
179 
180  //repeat so long that there is enough decodable bits in tmp,
181  // and the runs store is not full (num_runs < 8)
182  while (melp->bits >= 6 && melp->num_runs < 8)
183  {
184  int eval = mel_exp[melp->k]; // number of bits associated with state
185  int run = 0;
186  if (melp->tmp & (1ull<<63)) //The next bit to decode (stored in MSB)
187  { //one is found
188  run = 1 << eval;
189  run--; // consecutive runs of 0 events - 1
190  melp->k = melp->k + 1 < 12 ? melp->k + 1 : 12;//increment, max is 12
191  melp->tmp <<= 1; // consume one bit from tmp
192  melp->bits -= 1;
193  run = run << 1; // a stretch of zeros not terminating in one
194  }
195  else
196  { //0 is found
197  run = (int)(melp->tmp >> (63 - eval)) & ((1 << eval) - 1);
198  melp->k = melp->k - 1 > 0 ? melp->k - 1 : 0; //decrement, min is 0
199  melp->tmp <<= eval + 1; //consume eval + 1 bits (max is 6)
200  melp->bits -= eval + 1;
201  run = (run << 1) + 1; // a stretch of zeros terminating with one
202  }
203  eval = melp->num_runs * 7; // 7 bits per run
204  melp->runs &= ~((ui64)0x3F << eval); // 6 bits are sufficient
205  melp->runs |= ((ui64)run) << eval; // store the value in runs
206  melp->num_runs++; // increment count
207  }
208  }
209 
210  //************************************************************************/
220  static inline
221  void mel_init(dec_mel_st *melp, ui8* bbuf, int lcup, int scup)
222  {
223  melp->data = bbuf + lcup - scup; // move the pointer to the start of MEL
224  melp->bits = 0; // 0 bits in tmp
225  melp->tmp = 0; //
226  melp->unstuff = false; // no unstuffing
227  melp->size = scup - 1; // size is the length of MEL+VLC-1
228  melp->k = 0; // 0 for state
229  melp->num_runs = 0; // num_runs is 0
230  melp->runs = 0; //
231 
232  //This code is borrowed; original is for a different architecture
233  //These few lines take care of the case where data is not at a multiple
234  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the MEL segment
235  int num = 4 - (int)(intptr_t(melp->data) & 0x3);
236  for (int i = 0; i < num; ++i) { // this code is similar to mel_read
237  assert(melp->unstuff == false || melp->data[0] <= 0x8F);
238  ui64 d = (melp->size > 0) ? *melp->data : 0xFF;//if buffer is consumed
239  //set data to 0xFF
240  if (melp->size == 1) d |= 0xF; //if this is MEL+VLC-1, set LSBs to 0xF
241  // see the standard
242  melp->data += melp->size-- > 0; //increment if the end is not reached
243  int d_bits = 8 - melp->unstuff; //if unstuffing is needed, reduce by 1
244  melp->tmp = (melp->tmp << d_bits) | d; //store bits in tmp
245  melp->bits += d_bits; //increment tmp by number of bits
246  melp->unstuff = ((d & 0xFF) == 0xFF); //true of next byte needs
247  //unstuffing
248  }
249  melp->tmp <<= (64 - melp->bits); //push all the way up so the first bit
250  // is the MSB
251  }
252 
253  //************************************************************************/
259  static inline
261  {
262  if (melp->num_runs == 0) //if no runs, decode more bit from MEL segment
263  mel_decode(melp);
264 
265  int t = melp->runs & 0x7F; //retrieve one run
266  melp->runs >>= 7; // remove the retrieved run
267  melp->num_runs--;
268  return t; // return run
269  }
270 
271  //************************************************************************/
275  struct rev_struct {
276  rev_struct() : data(NULL), tmp(0), bits(0), size(0), unstuff(false)
277  {}
278  //storage
282  int size;
283  bool unstuff;
285  };
286 
287  //************************************************************************/
307  static inline
308  void rev_read(rev_struct *vlcp)
309  {
310  //process 4 bytes at a time
311  if (vlcp->bits > 32) // if there are more than 32 bits in tmp, then
312  return; // reading 32 bits can overflow vlcp->tmp
313  ui32 val = 0;
314  //the next line (the if statement) needs to be tested first
315  if (vlcp->size > 3) // if there are more than 3 bytes left in VLC
316  {
317  // (vlcp->data - 3) move pointer back to read 32 bits at once
318  val = *(ui32*)(vlcp->data - 3); // then read 32 bits
319  vlcp->data -= 4; // move data pointer back by 4
320  vlcp->size -= 4; // reduce available byte by 4
321  }
322  else if (vlcp->size > 0)
323  { // 4 or less
324  int i = 24;
325  while (vlcp->size > 0) {
326  ui32 v = *vlcp->data--; // read one byte at a time
327  val |= (v << i); // put byte in its correct location
328  --vlcp->size;
329  i -= 8;
330  }
331  }
332 
333  //accumulate in tmp, number of bits in tmp are stored in bits
334  ui32 tmp = val >> 24; //start with the MSB byte
335  ui32 bits;
336 
337  // test unstuff (previous byte is >0x8F), and this byte is 0x7F
338  bits = 8 - ((vlcp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
339  bool unstuff = (val >> 24) > 0x8F; //this is for the next byte
340 
341  tmp |= ((val >> 16) & 0xFF) << bits; //process the next byte
342  bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
343  unstuff = ((val >> 16) & 0xFF) > 0x8F;
344 
345  tmp |= ((val >> 8) & 0xFF) << bits;
346  bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
347  unstuff = ((val >> 8) & 0xFF) > 0x8F;
348 
349  tmp |= (val & 0xFF) << bits;
350  bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
351  unstuff = (val & 0xFF) > 0x8F;
352 
353  // now move the read and unstuffed bits into vlcp->tmp
354  vlcp->tmp |= (ui64)tmp << vlcp->bits;
355  vlcp->bits += bits;
356  vlcp->unstuff = unstuff; // this for the next read
357  }
358 
359  //************************************************************************/
373  static inline
374  void rev_init(rev_struct *vlcp, ui8* data, int lcup, int scup)
375  {
376  //first byte has only the upper 4 bits
377  vlcp->data = data + lcup - 2;
378 
379  //size can not be larger than this, in fact it should be smaller
380  vlcp->size = scup - 2;
381 
382  ui32 d = *vlcp->data--; // read one byte (this is a half byte)
383  vlcp->tmp = d >> 4; // both initialize and set
384  vlcp->bits = 4 - ((vlcp->tmp & 7) == 7); //check standard
385  vlcp->unstuff = (d | 0xF) > 0x8F; //this is useful for the next byte
386 
387  //This code is designed for an architecture that read address should
388  // align to the read size (address multiple of 4 if read size is 4)
389  //These few lines take care of the case where data is not at a multiple
390  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the VLC bitstream.
391  // To read 32 bits, read from (vlcp->data - 3)
392  int num = 1 + (int)(intptr_t(vlcp->data) & 0x3);
393  int tnum = num < vlcp->size ? num : vlcp->size;
394  for (int i = 0; i < tnum; ++i) {
395  ui64 d;
396  d = *vlcp->data--; // read one byte and move read pointer
397  //check if the last byte was >0x8F (unstuff == true) and this is 0x7F
398  ui32 d_bits = 8 - ((vlcp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
399  vlcp->tmp |= d << vlcp->bits; // move data to vlcp->tmp
400  vlcp->bits += d_bits;
401  vlcp->unstuff = d > 0x8F; // for next byte
402  }
403  vlcp->size -= tnum;
404  rev_read(vlcp); // read another 32 buts
405  }
406 
407  //************************************************************************/
414  static inline
416  {
417  if (vlcp->bits < 32) // if there are less then 32 bits, read more
418  {
419  rev_read(vlcp); // read 32 bits, but unstuffing might reduce this
420  if (vlcp->bits < 32)// if there is still space in vlcp->tmp for 32 bits
421  rev_read(vlcp); // read another 32
422  }
423  return (ui32)vlcp->tmp; // return the head (bottom-most) of vlcp->tmp
424  }
425 
426  //************************************************************************/
432  static inline
433  ui32 rev_advance(rev_struct *vlcp, ui32 num_bits)
434  {
435  assert(num_bits <= vlcp->bits); // vlcp->tmp must have more than num_bits
436  vlcp->tmp >>= num_bits; // remove bits
437  vlcp->bits -= num_bits; // decrement the number of bits
438  return (ui32)vlcp->tmp;
439  }
440 
441  //************************************************************************/
452  static inline
454  {
455  //process 4 bytes at a time
456  if (mrp->bits > 32)
457  return;
458  ui32 val = 0;
459  if (mrp->size > 3) // If there are 3 byte or more
460  { // (mrp->data - 3) move pointer back to read 32 bits at once
461  val = *(ui32*)(mrp->data - 3); // read 32 bits
462  mrp->data -= 4; // move back pointer
463  mrp->size -= 4; // reduce count
464  }
465  else if (mrp->size > 0)
466  {
467  int i = 24;
468  while (mrp->size > 0) {
469  ui32 v = *mrp->data--; // read one byte at a time
470  val |= (v << i); // put byte in its correct location
471  --mrp->size;
472  i -= 8;
473  }
474  }
475 
476  //accumulate in tmp, and keep count in bits
477  ui32 bits, tmp = val >> 24;
478 
479  //test if the last byte > 0x8F (unstuff must be true) and this is 0x7F
480  bits = 8 - ((mrp->unstuff && (((val >> 24) & 0x7F) == 0x7F)) ? 1 : 0);
481  bool unstuff = (val >> 24) > 0x8F;
482 
483  //process the next byte
484  tmp |= ((val >> 16) & 0xFF) << bits;
485  bits += 8 - ((unstuff && (((val >> 16) & 0x7F) == 0x7F)) ? 1 : 0);
486  unstuff = ((val >> 16) & 0xFF) > 0x8F;
487 
488  tmp |= ((val >> 8) & 0xFF) << bits;
489  bits += 8 - ((unstuff && (((val >> 8) & 0x7F) == 0x7F)) ? 1 : 0);
490  unstuff = ((val >> 8) & 0xFF) > 0x8F;
491 
492  tmp |= (val & 0xFF) << bits;
493  bits += 8 - ((unstuff && ((val & 0x7F) == 0x7F)) ? 1 : 0);
494  unstuff = (val & 0xFF) > 0x8F;
495 
496  mrp->tmp |= (ui64)tmp << mrp->bits; // move data to mrp pointer
497  mrp->bits += bits;
498  mrp->unstuff = unstuff; // next byte
499  }
500 
501  //************************************************************************/
516  static inline
517  void rev_init_mrp(rev_struct *mrp, ui8* data, int lcup, int len2)
518  {
519  mrp->data = data + lcup + len2 - 1;
520  mrp->size = len2;
521  mrp->unstuff = true;
522  mrp->bits = 0;
523  mrp->tmp = 0;
524 
525  //This code is designed for an architecture that read address should
526  // align to the read size (address multiple of 4 if read size is 4)
527  //These few lines take care of the case where data is not at a multiple
528  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the MRP stream
529  int num = 1 + (int)(intptr_t(mrp->data) & 0x3);
530  for (int i = 0; i < num; ++i) {
531  ui64 d;
532  //read a byte, 0 if no more data
533  d = (mrp->size-- > 0) ? *mrp->data-- : 0;
534  //check if unstuffing is needed
535  ui32 d_bits = 8 - ((mrp->unstuff && ((d & 0x7F) == 0x7F)) ? 1 : 0);
536  mrp->tmp |= d << mrp->bits; // move data to vlcp->tmp
537  mrp->bits += d_bits;
538  mrp->unstuff = d > 0x8F; // for next byte
539  }
540  rev_read_mrp(mrp);
541  }
542 
543  //************************************************************************/
550  static inline
552  {
553  if (mrp->bits < 32) // if there are less than 32 bits in mrp->tmp
554  {
555  rev_read_mrp(mrp); // read 30-32 bits from mrp
556  if (mrp->bits < 32) // if there is a space of 32 bits
557  rev_read_mrp(mrp); // read more
558  }
559  return (ui32)mrp->tmp; // return the head of mrp->tmp
560  }
561 
562  //************************************************************************/
568  static inline
570  {
571  assert(num_bits <= mrp->bits); // we must not consume more than mrp->bits
572  mrp->tmp >>= num_bits; // discard the lowest num_bits bits
573  mrp->bits -= num_bits;
574  return (ui32)mrp->tmp; // return data after consumption
575  }
576 
577  //************************************************************************/
581  struct frwd_struct {
582  const ui8* data;
586  int size;
587  };
588 
589  //************************************************************************/
607  template<int X>
608  static inline
610  {
611  assert(msp->bits <= 32); // assert that there is a space for 32 bits
612 
613  ui32 val = 0;
614  if (msp->size > 3) {
615  val = *(ui32*)msp->data; // read 32 bits
616  msp->data += 4; // increment pointer
617  msp->size -= 4; // reduce size
618  }
619  else if (msp->size > 0)
620  {
621  int i = 0;
622  val = X != 0 ? 0xFFFFFFFFu : 0;
623  while (msp->size > 0) {
624  ui32 v = *msp->data++; // read one byte at a time
625  ui32 m = ~(0xFFu << i); // mask of location
626  val = (val & m) | (v << i);// put one byte in its correct location
627  --msp->size;
628  i += 8;
629  }
630  }
631  else
632  val = X != 0 ? 0xFFFFFFFFu : 0;
633 
634  // we accumulate in t and keep a count of the number of bits in bits
635  ui32 bits = 8 - msp->unstuff;
636  ui32 t = val & 0xFF;
637  bool unstuff = ((val & 0xFF) == 0xFF); // Do we need unstuffing next?
638 
639  t |= ((val >> 8) & 0xFF) << bits;
640  bits += 8 - unstuff;
641  unstuff = (((val >> 8) & 0xFF) == 0xFF);
642 
643  t |= ((val >> 16) & 0xFF) << bits;
644  bits += 8 - unstuff;
645  unstuff = (((val >> 16) & 0xFF) == 0xFF);
646 
647  t |= ((val >> 24) & 0xFF) << bits;
648  bits += 8 - unstuff;
649  msp->unstuff = (((val >> 24) & 0xFF) == 0xFF); // for next byte
650 
651  msp->tmp |= ((ui64)t) << msp->bits; // move data to msp->tmp
652  msp->bits += bits;
653  }
654 
655  //************************************************************************/
664  template<int X>
665  static inline
666  void frwd_init(frwd_struct *msp, const ui8* data, int size)
667  {
668  msp->data = data;
669  msp->tmp = 0;
670  msp->bits = 0;
671  msp->unstuff = 0;
672  msp->size = size;
673 
674  //This code is designed for an architecture that read address should
675  // align to the read size (address multiple of 4 if read size is 4)
676  //These few lines take care of the case where data is not at a multiple
677  // of 4 boundary. It reads 1,2,3 up to 4 bytes from the bitstream
678  int num = 4 - (int)(intptr_t(msp->data) & 0x3);
679  for (int i = 0; i < num; ++i)
680  {
681  ui64 d;
682  //read a byte if the buffer is not exhausted, otherwise set it to X
683  d = msp->size-- > 0 ? *msp->data++ : X;
684  msp->tmp |= (d << msp->bits); // store data in msp->tmp
685  msp->bits += 8 - msp->unstuff; // number of bits added to msp->tmp
686  msp->unstuff = ((d & 0xFF) == 0xFF); // unstuffing for next byte
687  }
688  frwd_read<X>(msp); // read 32 bits more
689  }
690 
691  //************************************************************************/
697  static inline
698  void frwd_advance(frwd_struct *msp, ui32 num_bits)
699  {
700  assert(num_bits <= msp->bits);
701  msp->tmp >>= num_bits; // consume num_bits
702  msp->bits -= num_bits;
703  }
704 
705  //************************************************************************/
712  template<int X>
713  static inline
715  {
716  if (msp->bits < 32)
717  {
718  frwd_read<X>(msp);
719  if (msp->bits < 32) //need to test
720  frwd_read<X>(msp);
721  }
722  return (ui32)msp->tmp;
723  }
724 
725  //************************************************************************/
742  bool ojph_decode_codeblock(ui8* coded_data, ui32* decoded_data,
743  ui32 missing_msbs, ui32 num_passes,
744  ui32 lengths1, ui32 lengths2,
745  ui32 width, ui32 height, ui32 stride,
746  bool stripe_causal)
747  {
748  static bool insufficient_precision = false;
749  static bool modify_code = false;
750  static bool truncate_spp_mrp = false;
751 
752  if (num_passes > 1 && lengths2 == 0)
753  {
754  OJPH_WARN(0x00010001, "A malformed codeblock that has more than "
755  "one coding pass, but zero length for "
756  "2nd and potential 3rd pass.\n");
757  num_passes = 1;
758  }
759 
760  if (num_passes > 3)
761  {
762  OJPH_WARN(0x00010002, "We do not support more than 3 coding passes; "
763  "This codeblocks has %d passes.\n",
764  num_passes);
765  return false;
766  }
767 
768  if (missing_msbs > 30) // p < 0
769  {
770  if (insufficient_precision == false)
771  {
772  insufficient_precision = true;
773  OJPH_WARN(0x00010003, "32 bits are not enough to decode this "
774  "codeblock. This message will not be "
775  "displayed again.\n");
776  }
777  return false;
778  }
779  else if (missing_msbs == 30) // p == 0
780  { // not enough precision to decode and set the bin center to 1
781  if (modify_code == false) {
782  modify_code = true;
783  OJPH_WARN(0x00010004, "Not enough precision to decode the cleanup "
784  "pass. The code can be modified to support "
785  "this case. This message will not be "
786  "displayed again.\n");
787  }
788  return false; // 32 bits are not enough to decode this
789  }
790  else if (missing_msbs == 29) // if p is 1, then num_passes must be 1
791  {
792  if (num_passes > 1) {
793  num_passes = 1;
794  if (truncate_spp_mrp == false) {
795  truncate_spp_mrp = true;
796  OJPH_WARN(0x00010005, "Not enough precision to decode the SgnProp "
797  "nor MagRef passes; both will be skipped. "
798  "This message will not be displayed "
799  "again.\n");
800  }
801  }
802  }
803  ui32 p = 30 - missing_msbs; // The least significant bitplane for CUP
804  // There is a way to handle the case of p == 0, but a different path
805  // is required
806 
807  if (lengths1 < 2)
808  {
809  OJPH_WARN(0x00010006, "Wrong codeblock length.\n");
810  return false;
811  }
812 
813  // read scup and fix the bytes there
814  int lcup, scup;
815  lcup = (int)lengths1; // length of CUP
816  //scup is the length of MEL + VLC
817  scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
818  if (scup < 2 || scup > lcup || scup > 4079) //something is wrong
819  return false;
820 
821  // The temporary storage scratch holds two types of data in an
822  // interleaved fashion. The interleaving allows us to use one
823  // memory pointer.
824  // We have one entry for a decoded VLC code, and one entry for UVLC.
825  // Entries are 16 bits each, corresponding to one quad,
826  // but since we want to use XMM registers of the SSE family
827  // of SIMD; we allocated 16 bytes or more per quad row; that is,
828  // the width is no smaller than 16 bytes (or 8 entries), and the
829  // height is 512 quads
830  // Each VLC entry contains, in the following order, starting
831  // from MSB
832  // e_k (4bits), e_1 (4bits), rho (4bits), useless for step 2 (4bits)
833  // Each entry in UVLC contains u_q
834  // One extra row to handle the case of SPP propagating downwards
835  // when codeblock width is 4
836  ui16 scratch[8 * 513] = {0}; // 8 kB
837 
838  // We need an extra two entries (one inf and one u_q) beyond
839  // the last column.
840  // If the block width is 4 (2 quads), then we use sstr of 8
841  // (enough for 4 quads). If width is 8 (4 quads) we use
842  // sstr is 16 (enough for 8 quads). For a width of 16 (8
843  // quads), we use 24 (enough for 12 quads).
844  ui32 sstr = ((width + 2u) + 7u) & ~7u; // multiples of 8
845 
846  ui32 mmsbp2 = missing_msbs + 2;
847 
848  // The cleanup pass is decoded in two steps; in step one,
849  // the VLC and MEL segments are decoded, generating a record that
850  // has 2 bytes per quad. The 2 bytes contain, u, rho, e^1 & e^k.
851  // This information should be sufficient for the next step.
852  // In step 2, we decode the MagSgn segment.
853 
854  // step 1 decoding VLC and MEL segments
855  {
856  // init structures
857  dec_mel_st mel;
858  mel_init(&mel, coded_data, lcup, scup);
859  rev_struct vlc;
860  rev_init(&vlc, coded_data, lcup, scup);
861 
862  int run = mel_get_run(&mel); // decode runs of events from MEL bitstrm
863  // data represented as runs of 0 events
864  // See mel_decode description
865 
866  ui32 vlc_val;
867  ui32 c_q = 0;
868  ui16 *sp = scratch;
869  //initial quad row
870  for (ui32 x = 0; x < width; sp += 4)
871  {
872  // decode VLC
874 
875  // first quad
876  vlc_val = rev_fetch(&vlc);
877 
878  //decode VLC using the context c_q and the head of VLC bitstream
879  ui16 t0 = vlc_tbl0[ c_q + (vlc_val & 0x7F) ];
880 
881  // if context is zero, use one MEL event
882  if (c_q == 0) //zero context
883  {
884  run -= 2; //subtract 2, since events number if multiplied by 2
885 
886  // Is the run terminated in 1? if so, use decoded VLC code,
887  // otherwise, discard decoded data, since we will decoded again
888  // using a different context
889  t0 = (run == -1) ? t0 : 0;
890 
891  // is run -1 or -2? this means a run has been consumed
892  if (run < 0)
893  run = mel_get_run(&mel); // get another run
894  }
895  //run -= (c_q == 0) ? 2 : 0;
896  //t0 = (c_q != 0 || run == -1) ? t0 : 0;
897  //if (run < 0)
898  // run = mel_get_run(&mel); // get another run
899  sp[0] = t0;
900  x += 2;
901 
902  // prepare context for the next quad; eqn. 1 in ITU T.814
903  c_q = ((t0 & 0x10U) << 3) | ((t0 & 0xE0U) << 2);
904 
905  //remove data from vlc stream (0 bits are removed if vlc is not used)
906  vlc_val = rev_advance(&vlc, t0 & 0x7);
907 
908  //second quad
909  ui16 t1 = 0;
910 
911  //decode VLC using the context c_q and the head of VLC bitstream
912  t1 = vlc_tbl0[c_q + (vlc_val & 0x7F)];
913 
914  // if context is zero, use one MEL event
915  if (c_q == 0 && x < width) //zero context
916  {
917  run -= 2; //subtract 2, since events number if multiplied by 2
918 
919  // if event is 0, discard decoded t1
920  t1 = (run == -1) ? t1 : 0;
921 
922  if (run < 0) // have we consumed all events in a run
923  run = mel_get_run(&mel); // if yes, then get another run
924  }
925  t1 = x < width ? t1 : 0;
926  //run -= (c_q == 0 && x < width) ? 2 : 0;
927  //t1 = (c_q != 0 || run == -1) ? t1 : 0;
928  //if (run < 0)
929  // run = mel_get_run(&mel); // get another run
930  sp[2] = t1;
931  x += 2;
932 
933  //prepare context for the next quad, eqn. 1 in ITU T.814
934  c_q = ((t1 & 0x10U) << 3) | ((t1 & 0xE0U) << 2);
935 
936  //remove data from vlc stream, if qinf is not used, cwdlen is 0
937  vlc_val = rev_advance(&vlc, t1 & 0x7);
938 
939  // decode u
941  // uvlc_mode is made up of u_offset bits from the quad pair
942  ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
943  if (uvlc_mode == 0xc0)// if both u_offset are set, get an event from
944  { // the MEL run of events
945  run -= 2; //subtract 2, since events number if multiplied by 2
946 
947  uvlc_mode += (run == -1) ? 0x40 : 0; // increment uvlc_mode by
948  // is 0x40
949 
950  if (run < 0)//if run is consumed (run is -1 or -2), get another run
951  run = mel_get_run(&mel);
952  }
953  //run -= (uvlc_mode == 0xc0) ? 2 : 0;
954  //uvlc_mode += (uvlc_mode == 0xc0 && run == -1) ? 0x40 : 0;
955  //if (run < 0)
956  // run = mel_get_run(&mel); // get another run
957 
958  //decode uvlc_mode to get u for both quads
959  ui32 uvlc_entry = uvlc_tbl0[uvlc_mode + (vlc_val & 0x3F)];
960  //remove total prefix length
961  vlc_val = rev_advance(&vlc, uvlc_entry & 0x7);
962  uvlc_entry >>= 3;
963  //extract suffixes for quad 0 and 1
964  ui32 len = uvlc_entry & 0xF; //suffix length for 2 quads
965  ui32 tmp = vlc_val & ((1 << len) - 1); //suffix value for 2 quads
966  vlc_val = rev_advance(&vlc, len);
967  uvlc_entry >>= 4;
968  // quad 0 length
969  len = uvlc_entry & 0x7; // quad 0 suffix length
970  uvlc_entry >>= 3;
971  ui16 u_q = (ui16)(1 + (uvlc_entry&7) + (tmp&~(0xFFU<<len)));//kap. 1
972  sp[1] = u_q;
973  u_q = (ui16)(1 + (uvlc_entry >> 3) + (tmp >> len)); //kappa == 1
974  sp[3]= u_q;
975  }
976  sp[0] = sp[1] = 0;
977 
978  //non initial quad rows
979  for (ui32 y = 2; y < height; y += 2)
980  {
981  c_q = 0; // context
982  ui16 *sp = scratch + (y >> 1) * sstr; // this row of quads
983 
984  for (ui32 x = 0; x < width; sp += 4)
985  {
986  // decode VLC
988 
989  // sigma_q (n, ne, nf)
990  c_q |= ((sp[0 - (si32)sstr] & 0xA0U) << 2);
991  c_q |= ((sp[2 - (si32)sstr] & 0x20U) << 4);
992 
993  // first quad
994  vlc_val = rev_fetch(&vlc);
995 
996  //decode VLC using the context c_q and the head of VLC bitstream
997  ui16 t0 = vlc_tbl1[ c_q + (vlc_val & 0x7F) ];
998 
999  // if context is zero, use one MEL event
1000  if (c_q == 0) //zero context
1001  {
1002  run -= 2; //subtract 2, since events number is multiplied by 2
1003 
1004  // Is the run terminated in 1? if so, use decoded VLC code,
1005  // otherwise, discard decoded data, since we will decoded again
1006  // using a different context
1007  t0 = (run == -1) ? t0 : 0;
1008 
1009  // is run -1 or -2? this means a run has been consumed
1010  if (run < 0)
1011  run = mel_get_run(&mel); // get another run
1012  }
1013  //run -= (c_q == 0) ? 2 : 0;
1014  //t0 = (c_q != 0 || run == -1) ? t0 : 0;
1015  //if (run < 0)
1016  // run = mel_get_run(&mel); // get another run
1017  sp[0] = t0;
1018  x += 2;
1019 
1020  // prepare context for the next quad; eqn. 2 in ITU T.814
1021  // sigma_q (w, sw)
1022  c_q = ((t0 & 0x40U) << 2) | ((t0 & 0x80U) << 1);
1023  // sigma_q (nw)
1024  c_q |= sp[0 - (si32)sstr] & 0x80;
1025  // sigma_q (n, ne, nf)
1026  c_q |= ((sp[2 - (si32)sstr] & 0xA0U) << 2);
1027  c_q |= ((sp[4 - (si32)sstr] & 0x20U) << 4);
1028 
1029  //remove data from vlc stream (0 bits are removed if vlc is unused)
1030  vlc_val = rev_advance(&vlc, t0 & 0x7);
1031 
1032  //second quad
1033  ui16 t1 = 0;
1034 
1035  //decode VLC using the context c_q and the head of VLC bitstream
1036  t1 = vlc_tbl1[ c_q + (vlc_val & 0x7F)];
1037 
1038  // if context is zero, use one MEL event
1039  if (c_q == 0 && x < width) //zero context
1040  {
1041  run -= 2; //subtract 2, since events number if multiplied by 2
1042 
1043  // if event is 0, discard decoded t1
1044  t1 = (run == -1) ? t1 : 0;
1045 
1046  if (run < 0) // have we consumed all events in a run
1047  run = mel_get_run(&mel); // if yes, then get another run
1048  }
1049  t1 = x < width ? t1 : 0;
1050  //run -= (c_q == 0 && x < width) ? 2 : 0;
1051  //t1 = (c_q != 0 || run == -1) ? t1 : 0;
1052  //if (run < 0)
1053  // run = mel_get_run(&mel); // get another run
1054  sp[2] = t1;
1055  x += 2;
1056 
1057  // partial c_q, will be completed when we process the next quad
1058  // sigma_q (w, sw)
1059  c_q = ((t1 & 0x40U) << 2) | ((t1 & 0x80U) << 1);
1060  // sigma_q (nw)
1061  c_q |= sp[2 - (si32)sstr] & 0x80;
1062 
1063  //remove data from vlc stream, if qinf is not used, cwdlen is 0
1064  vlc_val = rev_advance(&vlc, t1 & 0x7);
1065 
1066  // decode u
1068  // uvlc_mode is made up of u_offset bits from the quad pair
1069  ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1070  ui32 uvlc_entry = uvlc_tbl1[uvlc_mode + (vlc_val & 0x3F)];
1071  //remove total prefix length
1072  vlc_val = rev_advance(&vlc, uvlc_entry & 0x7);
1073  uvlc_entry >>= 3;
1074  //extract suffixes for quad 0 and 1
1075  ui32 len = uvlc_entry & 0xF; //suffix length for 2 quads
1076  ui32 tmp = vlc_val & ((1 << len) - 1); //suffix value for 2 quads
1077  vlc_val = rev_advance(&vlc, len);
1078  uvlc_entry >>= 4;
1079  // quad 0 length
1080  len = uvlc_entry & 0x7; // quad 0 suffix length
1081  uvlc_entry >>= 3;
1082  ui16 u_q = (ui16)((uvlc_entry & 7) + (tmp & ~(0xFU << len))); //u_q
1083  sp[1] = u_q;
1084  u_q = (ui16)((uvlc_entry >> 3) + (tmp >> len)); // u_q
1085  sp[3] = u_q;
1086  }
1087  sp[0] = sp[1] = 0;
1088  }
1089  }
1090 
1091  // step2 we decode magsgn
1092  {
1093  // We allocate a scratch row for storing v_n values.
1094  // We have 512 quads horizontally.
1095  // We need an extra entry to handle the case of vp[1]
1096  // when vp is at the last column.
1097  // Here, we allocate 4 instead of 1 to make the buffer size
1098  // a multipled of 16 bytes.
1099  const int v_n_size = 512 + 4;
1100  ui32 v_n_scratch[v_n_size] = {0}; // 2+ kB
1101 
1102  frwd_struct magsgn;
1103  frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);
1104 
1105  ui16 *sp = scratch;
1106  ui32 *vp = v_n_scratch;
1107  ui32 *dp = decoded_data;
1108 
1109  ui32 prev_v_n = 0;
1110  for (ui32 x = 0; x < width; sp += 2, ++vp)
1111  {
1112  ui32 inf = sp[0];
1113  ui32 U_q = sp[1];
1114  if (U_q > mmsbp2)
1115  return false;
1116 
1117  ui32 v_n;
1118  ui32 val = 0;
1119  ui32 bit = 0;
1120  if (inf & (1 << (4 + bit)))
1121  {
1122  //get 32 bits of magsgn data
1123  ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1124  ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1125  frwd_advance(&magsgn, m_n); //consume m_n
1126 
1127  val = ms_val << 31; // get sign bit
1128  v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1129  v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1130  v_n |= 1; // add center of bin
1131  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1132  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1133  val |= (v_n + 2) << (p - 1);
1134  }
1135  dp[0] = val;
1136 
1137  v_n = 0;
1138  val = 0;
1139  bit = 1;
1140  if (inf & (1 << (4 + bit)))
1141  {
1142  //get 32 bits of magsgn data
1143  ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1144  ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1145  frwd_advance(&magsgn, m_n); //consume m_n
1146 
1147  val = ms_val << 31; // get sign bit
1148  v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1149  v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1150  v_n |= 1; // add center of bin
1151  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1152  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1153  val |= (v_n + 2) << (p - 1);
1154  }
1155  dp[stride] = val;
1156  vp[0] = prev_v_n | v_n;
1157  prev_v_n = 0;
1158  ++dp;
1159  if (++x >= width)
1160  { ++vp; break; }
1161 
1162  val = 0;
1163  bit = 2;
1164  if (inf & (1 << (4 + bit)))
1165  {
1166  //get 32 bits of magsgn data
1167  ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1168  ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1169  frwd_advance(&magsgn, m_n); //consume m_n
1170 
1171  val = ms_val << 31; // get sign bit
1172  v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1173  v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1174  v_n |= 1; // add center of bin
1175  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1176  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1177  val |= (v_n + 2) << (p - 1);
1178  }
1179  dp[0] = val;
1180 
1181  v_n = 0;
1182  val = 0;
1183  bit = 3;
1184  if (inf & (1 << (4 + bit)))
1185  {
1186  //get 32 bits of magsgn data
1187  ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1188  ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1189  frwd_advance(&magsgn, m_n); //consume m_n
1190 
1191  val = ms_val << 31; // get sign bit
1192  v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1193  v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1194  v_n |= 1; // add center of bin
1195  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1196  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1197  val |= (v_n + 2) << (p - 1);
1198  }
1199  dp[stride] = val;
1200  prev_v_n = v_n;
1201  ++dp;
1202  ++x;
1203  }
1204  vp[0] = prev_v_n;
1205 
1206  for (ui32 y = 2; y < height; y += 2)
1207  {
1208  ui16 *sp = scratch + (y >> 1) * sstr;
1209  ui32 *vp = v_n_scratch;
1210  ui32 *dp = decoded_data + y * stride;
1211 
1212  prev_v_n = 0;
1213  for (ui32 x = 0; x < width; sp += 2, ++vp)
1214  {
1215  ui32 inf = sp[0];
1216  ui32 u_q = sp[1];
1217 
1218  ui32 gamma = inf & 0xF0; gamma &= gamma - 0x10; //is gamma_q 1?
1219  ui32 emax = vp[0] | vp[1];
1220  emax = 31 - count_leading_zeros(emax | 2); // emax - 1
1221  ui32 kappa = gamma ? emax : 1;
1222 
1223  ui32 U_q = u_q + kappa;
1224  if (U_q > mmsbp2)
1225  return false;
1226 
1227  ui32 v_n;
1228  ui32 val = 0;
1229  ui32 bit = 0;
1230  if (inf & (1 << (4 + bit)))
1231  {
1232  //get 32 bits of magsgn data
1233  ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1234  ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1235  frwd_advance(&magsgn, m_n); //consume m_n
1236 
1237  val = ms_val << 31; // get sign bit
1238  v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1239  v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1240  v_n |= 1; // add center of bin
1241  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1242  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1243  val |= (v_n + 2) << (p - 1);
1244  }
1245  dp[0] = val;
1246 
1247  v_n = 0;
1248  val = 0;
1249  bit = 1;
1250  if (inf & (1 << (4 + bit)))
1251  {
1252  //get 32 bits of magsgn data
1253  ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1254  ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1255  frwd_advance(&magsgn, m_n); //consume m_n
1256 
1257  val = ms_val << 31; // get sign bit
1258  v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1259  v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1260  v_n |= 1; // add center of bin
1261  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1262  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1263  val |= (v_n + 2) << (p - 1);
1264  }
1265  dp[stride] = val;
1266  vp[0] = prev_v_n | v_n;
1267  prev_v_n = 0;
1268  ++dp;
1269  if (++x >= width)
1270  { ++vp; break; }
1271 
1272  val = 0;
1273  bit = 2;
1274  if (inf & (1 << (4 + bit)))
1275  {
1276  //get 32 bits of magsgn data
1277  ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1278  ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1279  frwd_advance(&magsgn, m_n); //consume m_n
1280 
1281  val = ms_val << 31; // get sign bit
1282  v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1283  v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1284  v_n |= 1; // add center of bin
1285  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1286  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1287  val |= (v_n + 2) << (p - 1);
1288  }
1289  dp[0] = val;
1290 
1291  v_n = 0;
1292  val = 0;
1293  bit = 3;
1294  if (inf & (1 << (4 + bit)))
1295  {
1296  //get 32 bits of magsgn data
1297  ui32 ms_val = frwd_fetch<0xFF>(&magsgn);
1298  ui32 m_n = U_q - ((inf >> (12 + bit)) & 1); // remove e_k
1299  frwd_advance(&magsgn, m_n); //consume m_n
1300 
1301  val = ms_val << 31; // get sign bit
1302  v_n = ms_val & ((1 << m_n) - 1); // keep only m_n bits
1303  v_n |= ((inf >> (8 + bit)) & 1) << m_n; // add EMB e_1 as MSB
1304  v_n |= 1; // add center of bin
1305  //v_n now has 2 * (\mu - 1) + 0.5 with correct sign bit
1306  //add 2 to make it 2*\mu+0.5, shift it up to missing MSBs
1307  val |= (v_n + 2) << (p - 1);
1308  }
1309  dp[stride] = val;
1310  prev_v_n = v_n;
1311  ++dp;
1312  ++x;
1313  }
1314  vp[0] = prev_v_n;
1315  }
1316  }
1317 
1318  if (num_passes > 1)
1319  {
1320  // We use scratch again, we can divide it into multiple regions
1321  // sigma holds all the significant samples, and it cannot
1322  // be modified after it is set. it will be used during the
1323  // Magnitude Refinement Pass
1324  ui16* const sigma = scratch;
1325 
1326  ui32 mstr = (width + 3u) >> 2; // divide by 4, since each
1327  // ui16 contains 4 columns
1328  mstr = ((mstr + 2u) + 7u) & ~7u; // multiples of 8
1329 
1330  // We re-arrange quad significance, where each 4 consecutive
1331  // bits represent one quad, into column significance, where,
1332  // each 4 consequtive bits represent one column of 4 rows
1333  {
1334  ui32 y;
1335  for (y = 0; y < height; y += 4)
1336  {
1337  ui16* sp = scratch + (y >> 1) * sstr;
1338  ui16* dp = sigma + (y >> 2) * mstr;
1339  for (ui32 x = 0; x < width; x += 4, sp += 4, ++dp) {
1340  ui32 t0 = 0, t1 = 0;
1341  t0 = ((sp[0 ] & 0x30u) >> 4) | ((sp[0 ] & 0xC0u) >> 2);
1342  t0 |= ((sp[2 ] & 0x30u) << 4) | ((sp[2 ] & 0xC0u) << 6);
1343  t1 = ((sp[0+sstr] & 0x30u) >> 2) | ((sp[0+sstr] & 0xC0u) );
1344  t1 |= ((sp[2+sstr] & 0x30u) << 6) | ((sp[2+sstr] & 0xC0u) << 8);
1345  dp[0] = (ui16)(t0 | t1);
1346  }
1347  dp[0] = 0; // set an extra entry on the right with 0
1348  }
1349  {
1350  // reset one row after the codeblock
1351  ui16* dp = sigma + (y >> 2) * mstr;
1352  for (ui32 x = 0; x < width; x += 4, ++dp)
1353  dp[0] = 0;
1354  dp[0] = 0; // set an extra entry on the right with 0
1355  }
1356  }
1357 
1358  // We perform Significance Propagation Pass here
1359  {
1360  // This stores significance information of the previous
1361  // 4 rows. Significance information in this array includes
1362  // all signicant samples in bitplane p - 1; that is,
1363  // significant samples for bitplane p (discovered during the
1364  // cleanup pass and stored in sigma) and samples that have recently
1365  // became significant (during the SPP) in bitplane p-1.
1366  // We store enough for the widest row, containing 1024 columns,
1367  // which is equivalent to 256 of ui16, since each stores 4 columns.
1368  // We add an extra 8 entries, just in case we need more
1369  ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes
1370 
1371  frwd_struct sigprop;
1372  frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2);
1373 
1374  for (ui32 y = 0; y < height; y += 4)
1375  {
1376  ui32 pattern = 0xFFFFu; // a pattern needed samples
1377  if (height - y < 4) {
1378  pattern = 0x7777u;
1379  if (height - y < 3) {
1380  pattern = 0x3333u;
1381  if (height - y < 2)
1382  pattern = 0x1111u;
1383  }
1384  }
1385 
1386  // prev holds sign. info. for the previous quad, together
1387  // with the rows on top of it and below it.
1388  ui32 prev = 0;
1389  ui16 *prev_sig = prev_row_sig;
1390  ui16 *cur_sig = sigma + (y >> 2) * mstr;
1391  ui32 *dpp = decoded_data + y * stride;
1392  for (ui32 x = 0; x < width; x += 4, ++cur_sig, ++prev_sig)
1393  {
1394  // only rows and columns inside the stripe are included
1395  si32 s = (si32)x + 4 - (si32)width;
1396  s = ojph_max(s, 0);
1397  pattern = pattern >> (s * 4);
1398 
1399  // We first find locations that need to be tested (potential
1400  // SPP members); these location will end up in mbr
1401  // In each iteration, we produce 16 bits because cwd can have
1402  // up to 16 bits of significance information, followed by the
1403  // corresponding 16 bits of sign information; therefore, it is
1404  // sufficient to fetch 32 bit data per loop.
1405 
1406  // Althougth we are interested in 16 bits only, we load 32 bits.
1407  // For the 16 bits we are producing, we need the next 4 bits --
1408  // We need data for at least 5 columns out of 8.
1409  // Therefore loading 32 bits is easier than loading 16 bits
1410  // twice.
1411  ui32 ps = *(ui32*)prev_sig;
1412  ui32 ns = *(ui32*)(cur_sig + mstr);
1413  ui32 u = (ps & 0x88888888) >> 3; // the row on top
1414  if (!stripe_causal)
1415  u |= (ns & 0x11111111) << 3; // the row below
1416 
1417  ui32 cs = *(ui32*)cur_sig;
1418  // vertical integration
1419  ui32 mbr = cs; // this sig. info.
1420  mbr |= (cs & 0x77777777) << 1; //above neighbors
1421  mbr |= (cs & 0xEEEEEEEE) >> 1; //below neighbors
1422  mbr |= u;
1423  // horizontal integration
1424  ui32 t = mbr;
1425  mbr |= t << 4; // neighbors on the left
1426  mbr |= t >> 4; // neighbors on the right
1427  mbr |= prev >> 12; // significance of previous group
1428 
1429  // remove outside samples, and already significant samples
1430  mbr &= pattern;
1431  mbr &= ~cs;
1432 
1433  // find samples that become significant during the SPP
1434  ui32 new_sig = mbr;
1435  if (new_sig)
1436  {
1437  ui32 cwd = frwd_fetch<0>(&sigprop);
1438 
1439  ui32 cnt = 0;
1440  ui32 col_mask = 0xFu;
1441  ui32 inv_sig = ~cs & pattern;
1442  for (int i = 0; i < 16; i += 4, col_mask <<= 4)
1443  {
1444  if ((col_mask & new_sig) == 0)
1445  continue;
1446 
1447  //scan one column
1448  ui32 sample_mask = 0x1111u & col_mask;
1449  if (new_sig & sample_mask)
1450  {
1451  new_sig &= ~sample_mask;
1452  if (cwd & 1)
1453  {
1454  ui32 t = 0x33u << i;
1455  new_sig |= t & inv_sig;
1456  }
1457  cwd >>= 1; ++cnt;
1458  }
1459 
1460  sample_mask <<= 1;
1461  if (new_sig & sample_mask)
1462  {
1463  new_sig &= ~sample_mask;
1464  if (cwd & 1)
1465  {
1466  ui32 t = 0x76u << i;
1467  new_sig |= t & inv_sig;
1468  }
1469  cwd >>= 1; ++cnt;
1470  }
1471 
1472  sample_mask <<= 1;
1473  if (new_sig & sample_mask)
1474  {
1475  new_sig &= ~sample_mask;
1476  if (cwd & 1)
1477  {
1478  ui32 t = 0xECu << i;
1479  new_sig |= t & inv_sig;
1480  }
1481  cwd >>= 1; ++cnt;
1482  }
1483 
1484  sample_mask <<= 1;
1485  if (new_sig & sample_mask)
1486  {
1487  new_sig &= ~sample_mask;
1488  if (cwd & 1)
1489  {
1490  ui32 t = 0xC8u << i;
1491  new_sig |= t & inv_sig;
1492  }
1493  cwd >>= 1; ++cnt;
1494  }
1495  }
1496 
1497  if (new_sig)
1498  {
1499  // new_sig has newly-discovered sig. samples during SPP
1500  // find the signs and update decoded_data
1501  ui32 *dp = dpp + x;
1502  ui32 val = 3u << (p - 2);
1503  col_mask = 0xFu;
1504  for (int i = 0; i < 4; ++i, ++dp, col_mask <<= 4)
1505  {
1506  if ((col_mask & new_sig) == 0)
1507  continue;
1508 
1509  //scan 4 signs
1510  ui32 sample_mask = 0x1111u & col_mask;
1511  if (new_sig & sample_mask)
1512  {
1513  assert(dp[0] == 0);
1514  dp[0] = (cwd << 31) | val;
1515  cwd >>= 1; ++cnt;
1516  }
1517 
1518  sample_mask += sample_mask;
1519  if (new_sig & sample_mask)
1520  {
1521  assert(dp[stride] == 0);
1522  dp[stride] = (cwd << 31) | val;
1523  cwd >>= 1; ++cnt;
1524  }
1525 
1526  sample_mask += sample_mask;
1527  if (new_sig & sample_mask)
1528  {
1529  assert(dp[2 * stride] == 0);
1530  dp[2 * stride] = (cwd << 31) | val;
1531  cwd >>= 1; ++cnt;
1532  }
1533 
1534  sample_mask += sample_mask;
1535  if (new_sig & sample_mask)
1536  {
1537  assert(dp[3 * stride] == 0);
1538  dp[3 * stride] = (cwd << 31) | val;
1539  cwd >>= 1; ++cnt;
1540  }
1541  }
1542  }
1543  frwd_advance(&sigprop, cnt);
1544  }
1545 
1546  new_sig |= cs;
1547  *prev_sig = (ui16)(new_sig);
1548 
1549  // vertical integration for the new sig. info.
1550  t = new_sig;
1551  new_sig |= (t & 0x7777) << 1; //above neighbors
1552  new_sig |= (t & 0xEEEE) >> 1; //below neighbors
1553  // add sig. info. from the row on top and below
1554  prev = new_sig | u;
1555  // we need only the bits in 0xF000
1556  prev &= 0xF000;
1557  }
1558  }
1559  }
1560 
1561  // We perform Magnitude Refinement Pass here
1562  if (num_passes > 2)
1563  {
1564  rev_struct magref;
1565  rev_init_mrp(&magref, coded_data, (int)lengths1, (int)lengths2);
1566 
1567  for (ui32 y = 0; y < height; y += 4)
1568  {
1569  ui32 *cur_sig = (ui32*)(sigma + (y >> 2) * mstr);
1570  ui32 *dpp = decoded_data + y * stride;
1571  ui32 half = 1 << (p - 2);
1572  for (ui32 i = 0; i < width; i += 8)
1573  {
1574  //Process one entry from sigma array at a time
1575  // Each nibble (4 bits) in the sigma array represents 4 rows,
1576  // and the 32 bits contain 8 columns
1577  ui32 cwd = rev_fetch_mrp(&magref); // get 32 bit data
1578  ui32 sig = *cur_sig++; // 32 bit that will be processed now
1579  ui32 col_mask = 0xFu; // a mask for a column in sig
1580  if (sig) // if any of the 32 bits are set
1581  {
1582  for (int j = 0; j < 8; ++j) //one column at a time
1583  {
1584  if (sig & col_mask) // lowest nibble
1585  {
1586  ui32 *dp = dpp + i + j; // next column in decoded samples
1587  ui32 sample_mask = 0x11111111u & col_mask; //LSB
1588 
1589  for (int k = 0; k < 4; ++k) {
1590  if (sig & sample_mask) //if LSB is set
1591  {
1592  assert(dp[0] != 0); // decoded value cannot be zero
1593  assert((dp[0] & half) == 0); // no half
1594  ui32 sym = cwd & 1; // get it value
1595  sym = (1 - sym) << (p - 1); // previous center of bin
1596  sym |= half; // put half the center of bin
1597  dp[0] ^= sym; // remove old bin center and put new
1598  cwd >>= 1; // consume word
1599  }
1600  sample_mask += sample_mask; //next row
1601  dp += stride; // next samples row
1602  }
1603  }
1604  col_mask <<= 4; //next column
1605  }
1606  }
1607  // consume data according to the number of bits set
1608  rev_advance_mrp(&magref, population_count(sig));
1609  }
1610  }
1611  }
1612  }
1613  return true;
1614  }
1615  }
1616 }
ui16 uvlc_tbl0[256+64]
uvlc_tbl0 contains decoding information for initial row of quads
ui16 uvlc_tbl1[256]
uvlc_tbl1 contains decoding information for non-initial row of quads
ui16 vlc_tbl1[1024]
vlc_tbl1 contains decoding information for non-initial row of quads
ui16 vlc_tbl0[1024]
vlc_tbl0 contains decoding information for initial row of quads
static ui32 rev_fetch(rev_struct *vlcp)
Retrieves 32 bits from the head of a rev_struct structure.
static void rev_init_mrp(rev_struct *mrp, ui8 *data, int lcup, int len2)
Initialized rev_struct structure for MRP segment, and reads a number of bytes such that the next 32 b...
static void mel_read(dec_mel_st *melp)
Reads and unstuffs the MEL bitstream.
static void frwd_advance(frwd_struct *msp, ui32 num_bits)
Consume num_bits bits from the bitstream of frwd_struct.
static void rev_read_mrp(rev_struct *mrp)
Reads and unstuffs from rev_struct.
static ui32 rev_fetch_mrp(rev_struct *mrp)
Retrieves 32 bits from the head of a rev_struct structure.
static ui32 rev_advance_mrp(rev_struct *mrp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static void rev_read(rev_struct *vlcp)
Read and unstuff data from a backwardly-growing segment.
static int mel_get_run(dec_mel_st *melp)
Retrieves one run from dec_mel_st; if there are no runs stored MEL segment is decoded.
static void rev_init(rev_struct *vlcp, ui8 *data, int lcup, int scup)
Initiates the rev_struct structure and reads a few bytes to move the read address to multiple of 4.
static void mel_init(dec_mel_st *melp, ui8 *bbuf, int lcup, int scup)
Initiates a dec_mel_st structure for MEL decoding and reads some bytes in order to get the read addre...
static ui32 rev_advance(rev_struct *vlcp, ui32 num_bits)
Consumes num_bits from a rev_struct structure.
static void frwd_read(frwd_struct *msp)
Read and unstuffs 32 bits from forward-growing bitstream.
static ui32 frwd_fetch(frwd_struct *msp)
Fetches 32 bits from the frwd_struct bitstream.
static void frwd_init(frwd_struct *msp, const ui8 *data, int size)
Initialize frwd_struct struct and reads some bytes.
bool ojph_decode_codeblock(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
static void mel_decode(dec_mel_st *melp)
Decodes unstuffed MEL segment bits stored in tmp to runs.
uint64_t ui64
Definition: ojph_defs.h:56
uint16_t ui16
Definition: ojph_defs.h:52
static ui32 population_count(ui32 val)
Definition: ojph_arch.h:89
static ui32 count_leading_zeros(ui32 val)
Definition: ojph_arch.h:109
int32_t si32
Definition: ojph_defs.h:55
uint32_t ui32
Definition: ojph_defs.h:54
uint8_t ui8
Definition: ojph_defs.h:50
#define ojph_max(a, b)
Definition: ojph_defs.h:73
#define OJPH_WARN(t,...)
Definition: ojph_message.h:128
MEL state structure for reading and decoding the MEL bitstream.
bool unstuff
true if the next bit needs to be unstuffed
int num_runs
number of decoded runs left in runs (maximum 8)
int size
number of bytes in MEL code
ui8 * data
the address of data (or bitstream)
int k
state of MEL decoder
int bits
number of bits stored in tmp
ui64 tmp
temporary buffer for read data
ui64 runs
runs of decoded MEL codewords (7 bits/run)
State structure for reading and unstuffing of forward-growing bitstreams; these are: MagSgn and SPP b...
const ui8 * data
pointer to bitstream
ui32 bits
number of bits stored in tmp
ui64 tmp
temporary buffer of read data
ui32 unstuff
1 if a bit needs to be unstuffed from next byte
A structure for reading and unstuffing a segment that grows backward, such as VLC and MRP.
ui32 bits
number of bits stored in tmp
int size
number of bytes left
ui8 * data
pointer to where to read data
ui64 tmp
temporary buffer of read data