dlvhex
2.5.0
|
00001 #ifndef BMENCODING_H__INCLUDED__ 00002 #define BMENCODING_H__INCLUDED__ 00003 /* 00004 Copyright(c) 2002-2009 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com) 00005 00006 00007 Permission is hereby granted, free of charge, to any person 00008 obtaining a copy of this software and associated documentation 00009 files (the "Software"), to deal in the Software without restriction, 00010 including without limitation the rights to use, copy, modify, merge, 00011 publish, distribute, sublicense, and/or sell copies of the Software, 00012 and to permit persons to whom the Software is furnished to do so, 00013 subject to the following conditions: 00014 00015 The above copyright notice and this permission notice shall be included 00016 in all copies or substantial portions of the Software. 00017 00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00019 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 00020 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00021 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 00022 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 00023 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 00024 OTHER DEALINGS IN THE SOFTWARE. 00025 00026 00027 For more information please visit: http://bmagic.sourceforge.net 00028 00029 */ 00030 00031 #include <memory.h> 00032 #include "bmutil.h" 00033 00034 namespace bm 00035 { 00036 00037 00038 // ---------------------------------------------------------------- 00045 class encoder 00046 { 00047 public: 00048 typedef unsigned char* position_type; 00049 public: 00050 encoder(unsigned char* buf, unsigned size); 00051 void put_8(unsigned char c); 00052 void put_16(bm::short_t s); 00053 void put_16(const bm::short_t* s, unsigned count); 00054 void put_32(bm::word_t w); 00055 void put_32(const bm::word_t* w, unsigned count); 00056 void put_prefixed_array_32(unsigned char c, 00057 const bm::word_t* w, unsigned count); 00058 void put_prefixed_array_16(unsigned char c, 00059 const bm::short_t* s, unsigned count, 00060 bool encode_count); 00061 unsigned size() const; 00062 unsigned char* get_pos() const; 00063 void set_pos(unsigned char* buf_pos); 00064 private: 00065 unsigned char* buf_; 00066 unsigned char* start_; 00067 unsigned int size_; 00068 }; 00069 00070 // ---------------------------------------------------------------- 00074 class decoder_base 00075 { 00076 public: 00077 decoder_base(const unsigned char* buf) { buf_ = start_ = buf; } 00079 BMFORCEINLINE unsigned char get_8() { return *buf_++; } 00081 BMFORCEINLINE 00082 unsigned size() const { return (unsigned)(buf_ - start_); } 00084 BMFORCEINLINE 00085 void seek(int delta) { buf_ += delta; } 00086 protected: 00087 const unsigned char* buf_; 00088 const unsigned char* start_; 00089 }; 00090 00091 00092 // ---------------------------------------------------------------- 00097 class decoder : public decoder_base 00098 { 00099 public: 00100 decoder(const unsigned char* buf); 00101 bm::short_t get_16(); 00102 bm::word_t get_32(); 00103 void get_32(bm::word_t* w, unsigned count); 00104 void get_16(bm::short_t* s, unsigned count); 00105 }; 00106 00107 // ---------------------------------------------------------------- 00114 typedef decoder decoder_big_endian; 00115 00116 00117 // ---------------------------------------------------------------- 00124 class decoder_little_endian : public decoder_base 00125 { 00126 public: 00127 decoder_little_endian(const unsigned char* buf); 00128 bm::short_t get_16(); 00129 bm::word_t get_32(); 00130 void get_32(bm::word_t* w, unsigned count); 00131 void get_16(bm::short_t* s, unsigned count); 00132 }; 00133 00134 00140 template<class TEncoder> 00141 class bit_out 00142 { 00143 public: 00144 bit_out(TEncoder& dest) 00145 : dest_(dest), used_bits_(0), accum_(0) 00146 {} 00147 00148 ~bit_out() 00149 { 00150 if (used_bits_) 00151 dest_.put_32(accum_); 00152 } 00153 00154 void put_bit(unsigned value) 00155 { 00156 BM_ASSERT(value <= 1); 00157 accum_ |= (value << used_bits_); 00158 if (++used_bits_ == (sizeof(accum_) * 8)) 00159 flush_accum(); 00160 } 00161 00162 void put_bits(unsigned value, unsigned count) 00163 { 00164 unsigned used = used_bits_; 00165 unsigned acc = accum_; 00166 00167 { 00168 unsigned mask = ~0; 00169 mask >>= (sizeof(accum_) * 8) - count; 00170 value &= mask; 00171 } 00172 for (;count;) 00173 { 00174 acc |= value << used; 00175 00176 unsigned free_bits = (sizeof(accum_) * 8) - used; 00177 if (count <= free_bits) 00178 { 00179 used += count; 00180 break; 00181 } 00182 else 00183 { 00184 value >>= free_bits; 00185 count -= free_bits; 00186 dest_.put_32(acc); 00187 acc = used = 0; 00188 continue; 00189 } 00190 } 00191 used_bits_ = used; 00192 accum_ = acc; 00193 } 00194 00195 void put_zero_bit() 00196 { 00197 if (++used_bits_ == (sizeof(accum_) * 8)) 00198 flush_accum(); 00199 } 00200 00201 void put_zero_bits(register unsigned count) 00202 { 00203 register unsigned used = used_bits_; 00204 unsigned free_bits = (sizeof(accum_) * 8) - used; 00205 if (count >= free_bits) 00206 { 00207 flush_accum(); 00208 count -= free_bits; 00209 used = 0; 00210 00211 for ( ;count >= sizeof(accum_) * 8; count -= sizeof(accum_) * 8) 00212 { 00213 dest_.put_32(0); 00214 } 00215 used += count; 00216 } 00217 else 00218 { 00219 used += count; 00220 } 00221 accum_ |= (1 << used); 00222 if (++used == (sizeof(accum_) * 8)) 00223 flush_accum(); 00224 else 00225 used_bits_ = used; 00226 } 00227 00228 00229 void gamma(unsigned value) 00230 { 00231 BM_ASSERT(value); 00232 00233 unsigned logv = 00234 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER)) 00235 bm::bsr_asm32(value); 00236 #else 00237 bm::ilog2_LUT(value); 00238 #endif 00239 00240 // Put zeroes + 1 bit 00241 00242 unsigned used = used_bits_; 00243 unsigned acc = accum_; 00244 const unsigned acc_bits = (sizeof(acc) * 8); 00245 unsigned free_bits = acc_bits - used; 00246 00247 { 00248 unsigned count = logv; 00249 if (count >= free_bits) 00250 { 00251 dest_.put_32(acc); 00252 acc = used ^= used; 00253 count -= free_bits; 00254 00255 for ( ;count >= acc_bits; count -= acc_bits) 00256 { 00257 dest_.put_32(0); 00258 } 00259 used += count; 00260 } 00261 else 00262 { 00263 used += count; 00264 } 00265 acc |= (1 << used); 00266 if (++used == acc_bits) 00267 { 00268 dest_.put_32(acc); 00269 acc = used ^= used; 00270 } 00271 } 00272 00273 // Put the value bits 00274 // 00275 { 00276 unsigned mask = (~0u); 00277 mask >>= acc_bits - logv; 00278 value &= mask; 00279 } 00280 for (;logv;) 00281 { 00282 acc |= value << used; 00283 free_bits = acc_bits - used; 00284 if (logv <= free_bits) 00285 { 00286 used += logv; 00287 break; 00288 } 00289 else 00290 { 00291 value >>= free_bits; 00292 logv -= free_bits; 00293 dest_.put_32(acc); 00294 acc = used ^= used; 00295 continue; 00296 } 00297 } // for 00298 00299 used_bits_ = used; 00300 accum_ = acc; 00301 } 00302 00303 00304 void flush() 00305 { 00306 if (used_bits_) 00307 flush_accum(); 00308 } 00309 00310 private: 00311 void flush_accum() 00312 { 00313 dest_.put_32(accum_); 00314 used_bits_ = accum_ = 0; 00315 } 00316 private: 00317 bit_out(const bit_out&); 00318 bit_out& operator=(const bit_out&); 00319 00320 private: 00321 TEncoder& dest_; 00322 unsigned used_bits_; 00323 unsigned accum_; 00324 }; 00325 00326 00332 template<class TDecoder> 00333 class bit_in 00334 { 00335 public: 00336 bit_in(TDecoder& decoder) 00337 : src_(decoder), 00338 used_bits_(sizeof(accum_) * 8), 00339 accum_(0) 00340 { 00341 } 00342 00343 unsigned gamma() 00344 { 00345 unsigned acc = accum_; 00346 unsigned used = used_bits_; 00347 00348 if (used == (sizeof(acc) * 8)) 00349 { 00350 acc = src_.get_32(); 00351 used ^= used; 00352 } 00353 unsigned zero_bits = 0; 00354 while (true) 00355 { 00356 if (acc == 0) 00357 { 00358 zero_bits += (sizeof(acc) * 8) - used; 00359 used = 0; 00360 acc = src_.get_32(); 00361 continue; 00362 } 00363 unsigned first_bit_idx = 00364 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER)) 00365 bm::bsf_asm32(acc); 00366 #else 00367 bm::bit_scan_fwd(acc); 00368 #endif 00369 acc >>= first_bit_idx; 00370 zero_bits += first_bit_idx; 00371 used += first_bit_idx; 00372 break; 00373 } // while 00374 00375 // eat the border bit 00376 // 00377 if (used == (sizeof(acc) * 8)) 00378 { 00379 acc = src_.get_32(); 00380 used = 1; 00381 } 00382 else 00383 { 00384 ++used; 00385 } 00386 acc >>= 1; 00387 00388 // get the value 00389 unsigned current; 00390 00391 unsigned free_bits = (sizeof(acc) * 8) - used; 00392 if (zero_bits <= free_bits) 00393 { 00394 take_accum: 00395 current = 00396 (acc & block_set_table<true>::_left[zero_bits]) | (1 << zero_bits); 00397 acc >>= zero_bits; 00398 used += zero_bits; 00399 goto ret; 00400 } 00401 00402 if (used == (sizeof(acc) * 8)) 00403 { 00404 acc = src_.get_32(); 00405 used ^= used; 00406 goto take_accum; 00407 } 00408 00409 // take the part 00410 current = acc; 00411 // read the next word 00412 acc = src_.get_32(); 00413 used = zero_bits - free_bits; 00414 current |= 00415 ((acc & block_set_table<true>::_left[used]) << free_bits) | 00416 (1 << zero_bits); 00417 00418 acc >>= used; 00419 ret: 00420 accum_ = acc; 00421 used_bits_ = used; 00422 return current; 00423 } 00424 00425 00426 private: 00427 bit_in(const bit_in&); 00428 bit_in& operator=(const bit_in&); 00429 private: 00430 TDecoder& src_; 00431 unsigned used_bits_; 00432 unsigned accum_; 00433 }; 00434 00435 00439 template<typename T, typename TBitIO> 00440 class gamma_encoder 00441 { 00442 public: 00443 gamma_encoder(TBitIO& bout) : bout_(bout) 00444 {} 00445 00449 BMFORCEINLINE 00450 void operator()(T value) 00451 { 00452 bout_.gamma(value); 00453 } 00454 private: 00455 gamma_encoder(const gamma_encoder&); 00456 gamma_encoder& operator=(const gamma_encoder&); 00457 private: 00458 TBitIO& bout_; 00459 }; 00460 00461 00465 template<typename T, typename TBitIO> 00466 class gamma_decoder 00467 { 00468 public: 00469 gamma_decoder(TBitIO& bin) : bin_(bin) 00470 {} 00471 00475 void start() 00476 {} 00477 00481 void stop() 00482 {} 00483 00487 T operator()(void) 00488 { 00489 return (T)bin_.gamma(); 00490 } 00491 private: 00492 gamma_decoder(const gamma_decoder&); 00493 gamma_decoder& operator=(const gamma_decoder&); 00494 private: 00495 TBitIO& bin_; 00496 }; 00497 00498 00499 // ---------------------------------------------------------------- 00500 // Implementation details. 00501 // ---------------------------------------------------------------- 00502 00509 inline encoder::encoder(unsigned char* buf, unsigned size) 00510 : buf_(buf), start_(buf), size_(size) 00511 { 00512 } 00516 inline void encoder::put_prefixed_array_32(unsigned char c, 00517 const bm::word_t* w, 00518 unsigned count) 00519 { 00520 put_8(c); 00521 put_32(w, count); 00522 } 00523 00527 inline void encoder::put_prefixed_array_16(unsigned char c, 00528 const bm::short_t* s, 00529 unsigned count, 00530 bool encode_count) 00531 { 00532 put_8(c); 00533 if (encode_count) 00534 put_16((bm::short_t) count); 00535 put_16(s, count); 00536 } 00537 00538 00544 BMFORCEINLINE void encoder::put_8(unsigned char c) 00545 { 00546 *buf_++ = c; 00547 } 00548 00554 BMFORCEINLINE void encoder::put_16(bm::short_t s) 00555 { 00556 #if (BM_UNALIGNED_ACCESS_OK == 1) 00557 *((bm::short_t*)buf_) = s; 00558 buf_ += sizeof(s); 00559 #else 00560 *buf_++ = (unsigned char) s; 00561 s >>= 8; 00562 *buf_++ = (unsigned char) s; 00563 #endif 00564 } 00565 00569 inline void encoder::put_16(const bm::short_t* s, unsigned count) 00570 { 00571 #if (BM_UNALIGNED_ACCESS_OK == 1) 00572 unsigned short* buf = (unsigned short*)buf_; 00573 const bm::short_t* s_end = s + count; 00574 do 00575 { 00576 *buf++ = *s++; 00577 } while (s < s_end); 00578 00579 buf_ = (unsigned char*)buf; 00580 #else 00581 unsigned char* buf = buf_; 00582 const bm::short_t* s_end = s + count; 00583 do 00584 { 00585 bm::short_t w16 = *s++; 00586 unsigned char a = (unsigned char) w16; 00587 unsigned char b = (unsigned char) (w16 >> 8); 00588 00589 *buf++ = a; 00590 *buf++ = b; 00591 00592 } while (s < s_end); 00593 00594 buf_ = (unsigned char*)buf; 00595 #endif 00596 } 00597 00598 00603 inline unsigned encoder::size() const 00604 { 00605 return (unsigned)(buf_ - start_); 00606 } 00607 00611 inline encoder::position_type encoder::get_pos() const 00612 { 00613 return buf_; 00614 } 00615 00619 inline void encoder::set_pos(encoder::position_type buf_pos) 00620 { 00621 buf_ = buf_pos; 00622 } 00623 00624 00630 BMFORCEINLINE void encoder::put_32(bm::word_t w) 00631 { 00632 #if (BM_UNALIGNED_ACCESS_OK == 1) 00633 *((bm::word_t*) buf_) = w; 00634 buf_ += sizeof(w); 00635 #else 00636 *buf_++ = (unsigned char) w; 00637 *buf_++ = (unsigned char) (w >> 8); 00638 *buf_++ = (unsigned char) (w >> 16); 00639 *buf_++ = (unsigned char) (w >> 24); 00640 #endif 00641 } 00642 00646 inline 00647 void encoder::put_32(const bm::word_t* w, unsigned count) 00648 { 00649 #if (BM_UNALIGNED_ACCESS_OK == 1) 00650 bm::word_t* buf = (bm::word_t*)buf_; 00651 const bm::word_t* w_end = w + count; 00652 do 00653 { 00654 *buf++ = *w++; 00655 } while (w < w_end); 00656 00657 buf_ = (unsigned char*)buf; 00658 #else 00659 unsigned char* buf = buf_; 00660 const bm::word_t* w_end = w + count; 00661 do 00662 { 00663 bm::word_t w32 = *w++; 00664 unsigned char a = (unsigned char) w32; 00665 unsigned char b = (unsigned char) (w32 >> 8); 00666 unsigned char c = (unsigned char) (w32 >> 16); 00667 unsigned char d = (unsigned char) (w32 >> 24); 00668 00669 *buf++ = a; 00670 *buf++ = b; 00671 *buf++ = c; 00672 *buf++ = d; 00673 } while (w < w_end); 00674 00675 buf_ = (unsigned char*)buf; 00676 #endif 00677 } 00678 00679 00680 // --------------------------------------------------------------------- 00681 00687 inline decoder::decoder(const unsigned char* buf) 00688 : decoder_base(buf) 00689 { 00690 } 00691 00696 BMFORCEINLINE bm::short_t decoder::get_16() 00697 { 00698 #if (BM_UNALIGNED_ACCESS_OK == 1) 00699 bm::short_t a = *((bm::short_t*)buf_); 00700 #else 00701 bm::short_t a = (bm::short_t)(buf_[0] + ((bm::short_t)buf_[1] << 8)); 00702 #endif 00703 buf_ += sizeof(a); 00704 return a; 00705 } 00706 00711 BMFORCEINLINE bm::word_t decoder::get_32() 00712 { 00713 #if (BM_UNALIGNED_ACCESS_OK == 1) 00714 bm::word_t a = *((bm::word_t*)buf_); 00715 #else 00716 bm::word_t a = buf_[0]+ ((unsigned)buf_[1] << 8) + 00717 ((unsigned)buf_[2] << 16) + ((unsigned)buf_[3] << 24); 00718 #endif 00719 buf_+=sizeof(a); 00720 return a; 00721 } 00722 00723 00730 inline void decoder::get_32(bm::word_t* w, unsigned count) 00731 { 00732 if (!w) 00733 { 00734 seek(count * 4); 00735 return; 00736 } 00737 #if (BM_UNALIGNED_ACCESS_OK == 1) 00738 memcpy(w, buf_, count * sizeof(bm::word_t)); 00739 seek(count * 4); 00740 return; 00741 #else 00742 const unsigned char* buf = buf_; 00743 const bm::word_t* w_end = w + count; 00744 do 00745 { 00746 bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) + 00747 ((unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24); 00748 *w++ = a; 00749 buf += sizeof(a); 00750 } while (w < w_end); 00751 buf_ = (unsigned char*)buf; 00752 #endif 00753 } 00754 00761 inline void decoder::get_16(bm::short_t* s, unsigned count) 00762 { 00763 if (!s) 00764 { 00765 seek(count * 2); 00766 return; 00767 } 00768 #if (BM_UNALIGNED_ACCESS_OK == 1) 00769 const bm::short_t* buf = (bm::short_t*)buf_; 00770 const bm::short_t* s_end = s + count; 00771 do 00772 { 00773 *s++ = *buf++; 00774 } while (s < s_end); 00775 #else 00776 const unsigned char* buf = buf_; 00777 const bm::short_t* s_end = s + count; 00778 do 00779 { 00780 bm::short_t a = (bm::short_t)(buf[0] + ((bm::short_t)buf[1] << 8)); 00781 *s++ = a; 00782 buf += sizeof(a); 00783 } while (s < s_end); 00784 #endif 00785 buf_ = (unsigned char*)buf; 00786 } 00787 00788 00789 00790 // --------------------------------------------------------------------- 00791 00792 inline decoder_little_endian::decoder_little_endian(const unsigned char* buf) 00793 : decoder_base(buf) 00794 { 00795 } 00796 00797 BMFORCEINLINE bm::short_t decoder_little_endian::get_16() 00798 { 00799 bm::short_t a = ((bm::short_t)buf_[0] << 8) + ((bm::short_t)buf_[1]); 00800 buf_ += sizeof(a); 00801 return a; 00802 } 00803 00804 BMFORCEINLINE bm::word_t decoder_little_endian::get_32() 00805 { 00806 bm::word_t a = ((unsigned)buf_[0] << 24)+ ((unsigned)buf_[1] << 16) + 00807 ((unsigned)buf_[2] << 8) + ((unsigned)buf_[3]); 00808 buf_+=sizeof(a); 00809 return a; 00810 } 00811 00812 inline void decoder_little_endian::get_32(bm::word_t* w, unsigned count) 00813 { 00814 if (!w) 00815 { 00816 seek(count * 4); 00817 return; 00818 } 00819 00820 const unsigned char* buf = buf_; 00821 const bm::word_t* w_end = w + count; 00822 do 00823 { 00824 bm::word_t a = ((unsigned)buf[0] << 24)+ ((unsigned)buf[1] << 16) + 00825 ((unsigned)buf[2] << 8) + ((unsigned)buf[3]); 00826 *w++ = a; 00827 buf += sizeof(a); 00828 } while (w < w_end); 00829 buf_ = (unsigned char*)buf; 00830 } 00831 00832 inline void decoder_little_endian::get_16(bm::short_t* s, unsigned count) 00833 { 00834 if (!s) 00835 { 00836 seek(count * 2); 00837 return; 00838 } 00839 00840 const unsigned char* buf = buf_; 00841 const bm::short_t* s_end = s + count; 00842 do 00843 { 00844 bm::short_t a = ((bm::short_t)buf[0] << 8) + ((bm::short_t)buf[1]); 00845 *s++ = a; 00846 buf += sizeof(a); 00847 } while (s < s_end); 00848 buf_ = (unsigned char*)buf; 00849 } 00850 00851 00852 } // namespace bm 00853 00854 #endif