SeqAn3  3.2.0-rc.1
The Modern C++ library for sequence analysis.
bz2_ostream.hpp
1 // bzip2stream Library License:
2 // --------------------------
3 //
4 // The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
5 //
6 // This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
7 //
8 // Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
9 //
10 // 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 //
12 // 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 //
14 // 3. This notice may not be removed or altered from any source distribution
15 //
16 // Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
17 // Altered bzip2_stream header
18 // Author: Hannes Hauswedell <hannes.hauswedell@fu-berlin.de>
19 
20 #pragma once
21 
22 #include <algorithm>
23 #include <cstdint>
24 #include <cstring>
25 #include <iostream>
26 #include <vector>
27 
28 #if !defined(SEQAN3_HAS_BZIP2) && !defined(SEQAN3_HEADER_TEST)
29 #error "This file cannot be used when building without BZIP2-support."
30 #endif // !defined(SEQAN3_HAS_BZIP2) && !defined(SEQAN3_HEADER_TEST)
31 
32 #if defined(SEQAN3_HAS_BZIP2)
33 
34 #define BZ_NO_STDIO
35 #include <bzlib.h>
36 
37 namespace seqan3::contrib
38 {
39 
40 // --------------------------------------------------------------------------
41 // Class basic_bz2_ostreambuf
42 // --------------------------------------------------------------------------
43 
44 const size_t BZ2_OUTPUT_DEFAULT_BUFFER_SIZE = 4096;
45 
46 template<
47  typename Elem,
48  typename Tr = std::char_traits<Elem>,
49  typename ElemA = std::allocator<Elem>,
50  typename ByteT = char,
51  typename ByteAT = std::allocator<ByteT>
52 >
53 class basic_bz2_ostreambuf :
54  public std::basic_streambuf<Elem, Tr>
55 {
56 public:
57  typedef std::basic_streambuf< Elem, Tr > basic_streambuf_type;
58  typedef std::basic_ostream<Elem, Tr>& ostream_reference;
59  typedef ElemA char_allocator_type;
60  typedef ByteT byte_type;
61  typedef ByteAT byte_allocator_type;
62  typedef byte_type* byte_buffer_type;
63  typedef typename Tr::char_type char_type;
64  typedef typename Tr::int_type int_type;
65  typedef std::vector<byte_type, byte_allocator_type > byte_vector_type;
66  typedef std::vector<char_type, char_allocator_type > char_vector_type;
67 
68  using basic_streambuf_type::epptr;
69  using basic_streambuf_type::pbase;
70  using basic_streambuf_type::pptr;
71 
72  basic_bz2_ostreambuf(
73  ostream_reference ostream_,
74  size_t block_size_100k_ ,
75  size_t verbosity_ ,
76  size_t work_factor_,
77  size_t buffer_size_
78  );
79 
80  ~basic_bz2_ostreambuf();
81 
82  int sync ();
83  int_type overflow (int_type c);
84 
85  std::streamsize flush(int flush_mode);
86  int get_zerr() const
87  { return m_err;};
88  uint64_t get_in_size() const
89  {
90  return ((uint64_t)m_bzip2_stream.total_in_hi32 << 32)
91  + m_bzip2_stream.total_in_lo32;
92  }
93  uint64_t get_out_size() const
94  {
95  return ((uint64_t)m_bzip2_stream.total_out_hi32 << 32)
96  + m_bzip2_stream.total_out_lo32;
97  }
98 private:
99  bool bzip2_to_stream( char_type*, std::streamsize);
100  size_t fill_input_buffer();
101 
102  ostream_reference m_ostream;
103  bz_stream m_bzip2_stream;
104  int m_err;
105  byte_vector_type m_output_buffer;
106  char_vector_type m_buffer;
107 };
108 
109 // --------------------------------------------------------------------------
110 // Class basic_bz2_ostreambuf implementation
111 // --------------------------------------------------------------------------
112 
113 template<
114  typename Elem,
115  typename Tr,
116  typename ElemA,
117  typename ByteT,
118  typename ByteAT
119 >
120 basic_bz2_ostreambuf<
121  Elem,Tr,ElemA,ByteT,ByteAT
122  >:: basic_bz2_ostreambuf(
123  ostream_reference ostream_,
124  size_t block_size_100k_,
125  size_t verbosity_,
126  size_t work_factor_,
127  size_t buffer_size_
128  )
129 :
130  m_ostream(ostream_),
131  m_output_buffer(buffer_size_,0),
132  m_buffer(buffer_size_,0)
133 {
134  m_bzip2_stream.bzalloc=NULL;
135  m_bzip2_stream.bzfree=NULL;
136 
137  m_bzip2_stream.next_in=NULL;
138  m_bzip2_stream.avail_in=0;
139  m_bzip2_stream.avail_out=0;
140  m_bzip2_stream.next_out=NULL;
141 
142  m_err=BZ2_bzCompressInit(
143  &m_bzip2_stream,
144  std::min( 9, static_cast<int>(block_size_100k_) ),
145  std::min( 4, static_cast<int>(verbosity_) ),
146  std::min( 250, static_cast<int>(work_factor_) )
147  );
148 
149  this->setp( &(m_buffer[0]), &(m_buffer[m_buffer.size()-1]));
150 }
151 
152 template<
153  typename Elem,
154  typename Tr,
155  typename ElemA,
156  typename ByteT,
157  typename ByteAT
158 >
159 basic_bz2_ostreambuf<
160  Elem,Tr,ElemA,ByteT,ByteAT
161  >::~basic_bz2_ostreambuf()
162 {
163  flush(BZ_FINISH);
164  m_ostream.flush();
165  m_err=BZ2_bzCompressEnd(&m_bzip2_stream);
166 }
167 
168 template<
169  typename Elem,
170  typename Tr,
171  typename ElemA,
172  typename ByteT,
173  typename ByteAT
174 >
175 int basic_bz2_ostreambuf<
176  Elem,Tr,ElemA,ByteT,ByteAT
177  >::sync ()
178 {
179  if ( this->pptr() && this->pptr() > this->pbase())
180  {
181  int c = overflow( EOF);
182 
183  if ( c == EOF)
184  return -1;
185  }
186 
187  return 0;
188 }
189 
190 template<
191  typename Elem,
192  typename Tr,
193  typename ElemA,
194  typename ByteT,
195  typename ByteAT
196 >
197 typename basic_bz2_ostreambuf<
198  Elem,Tr,ElemA,ByteT,ByteAT
199  >::int_type
200  basic_bz2_ostreambuf<
201  Elem,Tr,ElemA,ByteT,ByteAT
202  >::overflow (
203  typename basic_bz2_ostreambuf<
204  Elem,Tr,ElemA,ByteT,ByteAT
205  >::int_type c
206  )
207 {
208  int w = static_cast<int>(this->pptr() - this->pbase());
209  if (c != EOF) {
210  *this->pptr() = c;
211  ++w;
212  }
213  if ( bzip2_to_stream( this->pbase(), w)) {
214  this->setp( this->pbase(), this->epptr());
215  return c;
216  } else
217  return EOF;
218 }
219 
220 template<
221  typename Elem,
222  typename Tr,
223  typename ElemA,
224  typename ByteT,
225  typename ByteAT
226 >
227 bool basic_bz2_ostreambuf<
228  Elem,Tr,ElemA,ByteT,ByteAT
229  >::bzip2_to_stream(
230  typename basic_bz2_ostreambuf<
231  Elem,Tr,ElemA,ByteT,ByteAT
232  >::char_type* buffer_,
233  std::streamsize buffer_size_
234  )
235 {
236  std::streamsize written_byte_size=0, total_written_byte_size = 0;
237 
238  m_bzip2_stream.next_in=(byte_buffer_type)buffer_;
239  m_bzip2_stream.avail_in=buffer_size_*sizeof(char_type);
240  m_bzip2_stream.avail_out=static_cast<unsigned int>(m_output_buffer.size());
241  m_bzip2_stream.next_out=&(m_output_buffer[0]);
242  size_t remainder=0;
243 
244  do
245  {
246  m_err = BZ2_bzCompress (&m_bzip2_stream, BZ_RUN );
247 
248  if (m_err == BZ_RUN_OK || m_err == BZ_STREAM_END)
249  {
250  written_byte_size= static_cast<std::streamsize>(m_output_buffer.size()) - m_bzip2_stream.avail_out;
251  total_written_byte_size+=written_byte_size;
252  // output buffer is full, dumping to ostream
253  m_ostream.write(
254  (const char_type*) &(m_output_buffer[0]),
255  static_cast<std::streamsize>( written_byte_size/sizeof(char_type) )
256  );
257 
258  // checking if some bytes were not written.
259  if ( (remainder = written_byte_size%sizeof(char_type))!=0)
260  {
261  // copy to the beginning of the stream
262  std::memmove(
263  &(m_output_buffer[0]),
264  &(m_output_buffer[written_byte_size-remainder]),
265  remainder);
266 
267  }
268 
269  m_bzip2_stream.avail_out=static_cast<unsigned int>(m_output_buffer.size()-remainder);
270  m_bzip2_stream.next_out=&m_output_buffer[remainder];
271  }
272  }
273  while (m_bzip2_stream.avail_in != 0 && m_err == BZ_RUN_OK);
274 
275  return m_err == BZ_RUN_OK || m_err == BZ_FLUSH_OK;
276 }
277 
278 template<
279  typename Elem,
280  typename Tr,
281  typename ElemA,
282  typename ByteT,
283  typename ByteAT
284 >
285 std::streamsize basic_bz2_ostreambuf<
286  Elem,Tr,ElemA,ByteT,ByteAT
287  >::flush(int flush_mode)
288 {
289  std::streamsize written_byte_size=0, total_written_byte_size=0;
290 
291  int const buffer_size = static_cast< int >( pptr() - pbase() ); // amount of data currently in buffer
292 
293  m_bzip2_stream.next_in=(byte_buffer_type)pbase();
294  m_bzip2_stream.avail_in=static_cast< unsigned int >(buffer_size*sizeof(char_type));
295  m_bzip2_stream.avail_out=static_cast< unsigned int >(m_output_buffer.size());
296  m_bzip2_stream.next_out=&(m_output_buffer[0]);
297  size_t remainder=0;
298 
299  do
300  {
301  m_err = BZ2_bzCompress (&m_bzip2_stream, flush_mode);
302  if (m_err == BZ_FINISH_OK || m_err == BZ_STREAM_END)
303  {
304  written_byte_size=
305  static_cast<std::streamsize>(m_output_buffer.size())
306  - m_bzip2_stream.avail_out;
307  total_written_byte_size+=written_byte_size;
308  // output buffer is full, dumping to ostream
309  m_ostream.write(
310  (const char_type*) &(m_output_buffer[0]),
311  static_cast<std::streamsize>( written_byte_size/sizeof(char_type)*sizeof(char) )
312  );
313 
314  // checking if some bytes were not written.
315  if ( (remainder = written_byte_size%sizeof(char_type))!=0)
316  {
317  // copy to the beginning of the stream
318  std::memmove(
319  &(m_output_buffer[0]),
320  &(m_output_buffer[written_byte_size-remainder]),
321  remainder);
322 
323  }
324 
325  m_bzip2_stream.avail_out=static_cast<unsigned int>(m_output_buffer.size()-remainder);
326  m_bzip2_stream.next_out=&(m_output_buffer[remainder]);
327  }
328  } while (m_err == BZ_FINISH_OK);
329 
330  m_ostream.flush();
331 
332  return total_written_byte_size;
333 }
334 
335 // --------------------------------------------------------------------------
336 // Class basic_bz2_ostreambase
337 // --------------------------------------------------------------------------
338 
339 template<
340  typename Elem,
341  typename Tr = std::char_traits<Elem>,
342  typename ElemA = std::allocator<Elem>,
343  typename ByteT = char,
344  typename ByteAT = std::allocator<ByteT>
345 >
346 class basic_bz2_ostreambase : virtual public std::basic_ios<Elem,Tr>
347 {
348 public:
349  typedef std::basic_ostream<Elem, Tr>& ostream_reference;
350  typedef basic_bz2_ostreambuf<
351  Elem,Tr,ElemA,ByteT,ByteAT> bzip2_streambuf_type;
352 
353  basic_bz2_ostreambase(
354  ostream_reference ostream_,
355  size_t block_size_100k_ ,
356  size_t verbosity_ ,
357  size_t work_factor_,
358  size_t buffer_size_
359  )
360  : m_buf(ostream_,block_size_100k_, verbosity_, work_factor_, buffer_size_)
361  {
362  this->init(&m_buf );
363  };
364 
365  bzip2_streambuf_type* rdbuf() { return &m_buf; };
366 
367 private:
368  bzip2_streambuf_type m_buf;
369 };
370 
371 // --------------------------------------------------------------------------
372 // Class basic_bz2_ostream
373 // --------------------------------------------------------------------------
374 
375 template<
376  typename Elem,
377  typename Tr = std::char_traits<Elem>,
378  typename ElemA = std::allocator<Elem>,
379  typename ByteT = char,
380  typename ByteAT = std::allocator<ByteT>
381 >
382 class basic_bz2_ostream :
383  public basic_bz2_ostreambase<Elem,Tr,ElemA,ByteT,ByteAT>,
384  public std::basic_ostream<Elem,Tr>
385 {
386 public:
387  typedef basic_bz2_ostreambase<
388  Elem,Tr,ElemA,ByteT,ByteAT> bzip2_ostreambase_type;
389  typedef std::basic_ostream<Elem,Tr> ostream_type;
390  typedef ostream_type& ostream_reference;
391 
392  basic_bz2_ostream(
393  ostream_reference ostream_,
394  size_t block_size_100k_ = 9,
395  size_t verbosity_ = 0,
396  size_t work_factor_ = 30,
397  size_t buffer_size_ = BZ2_OUTPUT_DEFAULT_BUFFER_SIZE
398  )
399  :
400  bzip2_ostreambase_type(ostream_,block_size_100k_, verbosity_, work_factor_,buffer_size_),
401  ostream_type(bzip2_ostreambase_type::rdbuf())
402  {
403 
404  };
405 
406  basic_bz2_ostream& add_header();
407  basic_bz2_ostream& zflush()
408  {
409  this->flush(); this->rdbuf()->flush(); return *this;
410  };
411 
412 #ifdef _WIN32
413 private:
414  void _Add_vtordisp1() { } // Required to avoid VC++ warning C4250
415  void _Add_vtordisp2() { } // Required to avoid VC++ warning C4250
416 #endif
417 };
418 
419 // --------------------------------------------------------------------------
420 // Typedefs
421 // --------------------------------------------------------------------------
422 
423 typedef basic_bz2_ostream<char> bz2_ostream;
424 typedef basic_bz2_ostream<wchar_t> bz2_wostream;
425 
426 } // namespace seqan3::contrib
427 
428 #endif // defined(SEQAN3_HAS_BZIP2)
T flush(T... args)
T init(T... args)
typename stream::int_type int_type
Declares the associated int type.
typename stream::char_type char_type
Declares the associated char type.
T memmove(T... args)
T min(T... args)
T rdbuf(T... args)
T remainder(T... args)