dlvhex
2.5.0
|
00001 #ifndef BMSERIAL__H__INCLUDED__ 00002 #define BMSERIAL__H__INCLUDED__ 00003 /* 00004 Copyright(c) 2002-2010 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com) 00005 00006 Permission is hereby granted, free of charge, to any person 00007 obtaining a copy of this software and associated documentation 00008 files (the "Software"), to deal in the Software without restriction, 00009 including without limitation the rights to use, copy, modify, merge, 00010 publish, distribute, sublicense, and/or sell copies of the Software, 00011 and to permit persons to whom the Software is furnished to do so, 00012 subject to the following conditions: 00013 00014 The above copyright notice and this permission notice shall be included 00015 in all copies or substantial portions of the Software. 00016 00017 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 00019 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00020 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 00021 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 00022 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 00023 OTHER DEALINGS IN THE SOFTWARE. 00024 00025 For more information please visit: http://bmagic.sourceforge.net 00026 00027 */ 00028 00029 00036 #ifndef BM__H__INCLUDED__ 00037 #define BM__H__INCLUDED__ 00038 00039 #include "bm.h" 00040 00041 #endif 00042 00043 #ifdef _MSC_VER 00044 #pragma warning( push ) 00045 #pragma warning( disable : 4311 4312 4127) 00046 #endif 00047 00048 00049 00050 #include "encoding.h" 00051 #include "bmdef.h" 00052 #include "bmfunc.h" 00053 #include "bmtrans.h" 00054 #include "bmalgo_impl.h" 00055 #include "bmutil.h" 00056 00057 //#include "bmgamma.h" 00058 00059 00060 namespace bm 00061 { 00062 00063 00064 // Serialization stream markup constants 00065 00066 00067 const unsigned char set_block_end = 0; 00068 const unsigned char set_block_1zero = 1; 00069 const unsigned char set_block_1one = 2; 00070 const unsigned char set_block_8zero = 3; 00071 const unsigned char set_block_8one = 4; 00072 const unsigned char set_block_16zero = 5; 00073 const unsigned char set_block_16one = 6; 00074 const unsigned char set_block_32zero = 7; 00075 const unsigned char set_block_32one = 8; 00076 const unsigned char set_block_azero = 9; 00077 const unsigned char set_block_aone = 10; 00078 const unsigned char set_block_bit = 11; 00079 const unsigned char set_block_sgapbit = 12; 00080 const unsigned char set_block_sgapgap = 13; 00081 const unsigned char set_block_gap = 14; 00082 const unsigned char set_block_gapbit = 15; 00083 const unsigned char set_block_arrbit = 16; 00084 const unsigned char set_block_bit_interval = 17; 00085 const unsigned char set_block_arrgap = 18; 00086 const unsigned char set_block_bit_1bit = 19; 00087 const unsigned char set_block_gap_egamma = 20; 00088 const unsigned char set_block_arrgap_egamma = 21; 00089 const unsigned char set_block_bit_0runs = 22; 00090 const unsigned char set_block_arrgap_egamma_inv = 23; 00091 const unsigned char set_block_arrgap_inv = 24; 00092 00093 00096 enum serialization_header_mask { 00097 BM_HM_DEFAULT = 1, 00098 BM_HM_RESIZE = (1 << 1), 00099 BM_HM_ID_LIST = (1 << 2), 00100 BM_HM_NO_BO = (1 << 3), 00101 BM_HM_NO_GAPL = (1 << 4) 00102 }; 00103 00104 00105 00106 #define SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO) \ 00107 if (nb == 1) \ 00108 enc.put_8(B_1ZERO); \ 00109 else if (nb < 256) \ 00110 { \ 00111 enc.put_8(B_8ZERO); \ 00112 enc.put_8((unsigned char)nb); \ 00113 } \ 00114 else if (nb < 65536) \ 00115 { \ 00116 enc.put_8(B_16ZERO); \ 00117 enc.put_16((unsigned short)nb); \ 00118 } \ 00119 else \ 00120 {\ 00121 enc.put_8(B_32ZERO); \ 00122 enc.put_32(nb); \ 00123 } 00124 00125 00126 #define BM_SET_ONE_BLOCKS(x) \ 00127 {\ 00128 unsigned end_block = i + x; \ 00129 for (;i < end_block; ++i) \ 00130 bman.set_block_all_set(i); \ 00131 } \ 00132 --i 00133 00134 00135 00136 00145 template<class BV> 00146 class serializer 00147 { 00148 public: 00149 typedef typename BV::allocator_type allocator_type; 00150 typedef typename BV::blocks_manager_type blocks_manager_type; 00151 public: 00152 serializer(const allocator_type& alloc = allocator_type()); 00153 ~serializer(); 00154 00160 void set_compression_level(unsigned clevel); 00161 00165 unsigned get_compression_level() const; 00166 00181 unsigned serialize(const BV& bv, 00182 unsigned char* buf, unsigned buf_size); 00183 00184 00190 void gap_length_serialization(bool value); 00196 void byte_order_serialization(bool value); 00197 00198 protected: 00202 void encode_header(const BV& bv, bm::encoder& enc); 00203 00207 void encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc); 00208 00212 void gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc); 00213 00217 void gamma_gap_array(const bm::gap_word_t* gap_block, 00218 unsigned arr_len, 00219 bm::encoder& enc, 00220 bool inverted = false); 00221 00225 void encode_bit_interval(const bm::word_t* blk, 00226 bm::encoder& enc, 00227 unsigned size_control); 00228 00229 private: 00230 serializer(const serializer&); 00231 serializer& operator=(const serializer&); 00232 00233 private: 00234 00235 typedef bm::bit_out<bm::encoder> bit_out_type; 00236 typedef bm::gamma_encoder<bm::gap_word_t, bit_out_type> gamma_encoder_func; 00237 00238 private: 00239 allocator_type alloc_; 00240 bool gap_serial_; 00241 bool byte_order_serial_; 00242 bm::word_t* temp_block_; 00243 unsigned compression_level_; 00244 }; 00245 00250 template<class DEC> 00251 class deseriaizer_base 00252 { 00253 public: 00254 typedef DEC decoder_type; 00255 protected: 00256 deseriaizer_base(){} 00257 00262 unsigned read_gap_block(decoder_type& decoder, 00263 unsigned block_type, 00264 bm::gap_word_t* dst_block, 00265 bm::gap_word_t& gap_head); 00266 00270 unsigned read_id_list(decoder_type& decoder, 00271 unsigned block_type, 00272 bm::gap_word_t* dst_arr); 00273 00274 protected: 00275 bm::gap_word_t id_array_[bm::gap_equiv_len * 2]; 00276 }; 00277 00282 template<class BV, class DEC> 00283 class deserializer : protected deseriaizer_base<DEC> 00284 { 00285 public: 00286 typedef BV bvector_type; 00287 typedef typename deseriaizer_base<DEC>::decoder_type decoder_type; 00288 // typedef DEC decoder_type; 00289 public: 00290 deserializer() : temp_block_(0) {} 00291 00292 unsigned deserialize(bvector_type& bv, 00293 const unsigned char* buf, 00294 bm::word_t* temp_block); 00295 protected: 00296 typedef typename BV::blocks_manager_type blocks_manager_type; 00297 typedef typename BV::allocator_type allocator_type; 00298 00299 protected: 00300 void deserialize_gap(unsigned char btype, decoder_type& dec, 00301 bvector_type& bv, blocks_manager_type& bman, 00302 unsigned i, 00303 bm::word_t* blk); 00304 protected: 00305 bm::gap_word_t gap_temp_block_[bm::gap_equiv_len * 4]; 00306 bm::word_t* temp_block_; 00307 }; 00308 00309 00316 template<class BV, class SerialIterator> 00317 class iterator_deserializer 00318 { 00319 public: 00320 typedef BV bvector_type; 00321 typedef SerialIterator serial_iterator_type; 00322 public: 00323 static 00324 unsigned deserialize(bvector_type& bv, 00325 serial_iterator_type& sit, 00326 bm::word_t* temp_block, 00327 set_operation op = bm::set_OR); 00328 00332 static 00333 void deserialize(bvector_type& bv_target, 00334 const bvector_type& bv_mask, 00335 serial_iterator_type& sit, 00336 bm::word_t* temp_block, 00337 set_operation op); 00338 00339 private: 00340 typedef typename BV::blocks_manager_type blocks_manager_type; 00341 00343 static 00344 void load_id_list(bvector_type& bv, 00345 serial_iterator_type& sit, 00346 unsigned id_count, 00347 bool set_clear); 00348 00350 static 00351 unsigned finalize_target_vector(blocks_manager_type& bman, 00352 set_operation op, 00353 unsigned bv_block_idx); 00354 00356 static 00357 unsigned process_id_list(bvector_type& bv, 00358 serial_iterator_type& sit, 00359 set_operation op); 00360 00361 00362 }; 00363 00370 template<class DEC> 00371 class serial_stream_iterator : protected deseriaizer_base<DEC> 00372 { 00373 public: 00374 typedef typename deseriaizer_base<DEC>::decoder_type decoder_type; 00375 public: 00376 serial_stream_iterator(const unsigned char* buf); 00377 00379 unsigned bv_size() const { return bv_size_; } 00380 00382 bool is_eof() const { return end_of_stream_; } 00383 00385 void next(); 00386 00388 void skip_mono_blocks(); 00389 00391 unsigned get_bit_block(bm::word_t* dst_block, 00392 bm::word_t* tmp_block, 00393 set_operation op); 00394 00395 00397 void get_gap_block(bm::gap_word_t* dst_block); 00398 00400 unsigned dec_size() const { return decoder_.size(); } 00401 00403 decoder_type& decoder() { return decoder_; } 00404 00408 enum iterator_state 00409 { 00410 e_unknown = 0, 00411 e_list_ids, 00412 e_blocks, 00413 e_zero_blocks, 00414 e_one_blocks, 00415 e_bit_block, 00416 e_gap_block 00417 00418 }; 00419 00421 iterator_state state() const { return this->state_; } 00422 00423 iterator_state get_state() const { return this->state_; } 00425 unsigned get_id_count() const { return this->id_cnt_; } 00426 00428 bm::id_t get_id() const { return this->last_id_; } 00429 00431 unsigned block_idx() const { return this->block_idx_; } 00432 00433 public: 00436 typedef 00437 unsigned (serial_stream_iterator<DEC>::*get_bit_func_type) 00438 (bm::word_t*,bm::word_t*); 00439 00440 unsigned 00441 get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block); 00442 unsigned 00443 get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block); 00444 unsigned 00445 get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block); 00446 unsigned 00447 get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block); 00448 unsigned 00449 get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block); 00450 unsigned 00451 get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block); 00452 unsigned 00453 get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block); 00454 unsigned 00455 get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block); 00456 unsigned 00457 get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block); 00458 unsigned 00459 get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block); 00460 unsigned 00461 get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block); 00462 unsigned 00463 get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block); 00464 unsigned 00465 get_bit_block_COUNT_B(bm::word_t* dst_block, bm::word_t* tmp_block) 00466 { 00467 return get_bit_block_COUNT(dst_block, tmp_block); 00468 } 00469 00473 unsigned get_arr_bit(bm::word_t* dst_block, 00474 bool clear_target=true); 00475 00477 unsigned get_block_type() const { return block_type_; } 00478 00479 unsigned get_bit(); 00480 00481 protected: 00482 get_bit_func_type bit_func_table_[bm::set_END]; 00483 00484 decoder_type decoder_; 00485 bool end_of_stream_; 00486 unsigned bv_size_; 00487 iterator_state state_; 00488 unsigned id_cnt_; 00489 bm::id_t last_id_; 00490 gap_word_t glevels_[bm::gap_levels]; 00491 00492 unsigned block_type_; 00493 unsigned block_idx_; 00494 unsigned mono_block_cnt_; 00495 00496 gap_word_t gap_head_; 00497 }; 00498 00505 template<class BV> 00506 class operation_deserializer 00507 { 00508 public: 00509 typedef BV bvector_type; 00510 public: 00511 static 00512 unsigned deserialize(bvector_type& bv, 00513 const unsigned char* buf, 00514 bm::word_t* temp_block, 00515 set_operation op = bm::set_OR); 00516 private: 00518 static 00519 void deserialize(bvector_type& bv_target, 00520 const bvector_type& bv_mask, 00521 const unsigned char* buf, 00522 bm::word_t* temp_block, 00523 set_operation op); 00524 00525 private: 00526 typedef 00527 typename BV::blocks_manager_type blocks_manager_type; 00528 typedef 00529 serial_stream_iterator<bm::decoder> serial_stream_current; 00530 typedef 00531 serial_stream_iterator<bm::decoder_big_endian> serial_stream_be; 00532 typedef 00533 serial_stream_iterator<bm::decoder_little_endian> serial_stream_le; 00534 00535 }; 00536 00537 00538 00539 00540 00541 //--------------------------------------------------------------------- 00542 00543 template<class BV> 00544 serializer<BV>::serializer(const allocator_type& alloc) 00545 : alloc_(alloc), 00546 gap_serial_(false), 00547 byte_order_serial_(true), 00548 temp_block_(0), 00549 compression_level_(3) 00550 { 00551 temp_block_ = alloc_.alloc_bit_block(); 00552 } 00553 00554 template<class BV> 00555 void serializer<BV>::set_compression_level(unsigned clevel) 00556 { 00557 compression_level_ = clevel; 00558 } 00559 00560 template<class BV> 00561 unsigned serializer<BV>::get_compression_level() const 00562 { 00563 return compression_level_; 00564 } 00565 00566 template<class BV> 00567 serializer<BV>::~serializer() 00568 { 00569 alloc_.free_bit_block(temp_block_); 00570 } 00571 00572 00573 template<class BV> 00574 void serializer<BV>::gap_length_serialization(bool value) 00575 { 00576 gap_serial_ = value; 00577 } 00578 00579 template<class BV> 00580 void serializer<BV>::byte_order_serialization(bool value) 00581 { 00582 byte_order_serial_ = value; 00583 } 00584 00585 template<class BV> 00586 void serializer<BV>::encode_header(const BV& bv, bm::encoder& enc) 00587 { 00588 const blocks_manager_type& bman = bv.get_blocks_manager(); 00589 00590 unsigned char header_flag = 0; 00591 if (bv.size() == bm::id_max) // no dynamic resize 00592 header_flag |= BM_HM_DEFAULT; 00593 else 00594 header_flag |= BM_HM_RESIZE; 00595 00596 if (!byte_order_serial_) 00597 header_flag |= BM_HM_NO_BO; 00598 00599 if (!gap_serial_) 00600 header_flag |= BM_HM_NO_GAPL; 00601 00602 enc.put_8(header_flag); 00603 00604 if (byte_order_serial_) 00605 { 00606 ByteOrder bo = globals<true>::byte_order(); 00607 enc.put_8((unsigned char)bo); 00608 } 00609 00610 // keep GAP levels information 00611 if (gap_serial_) 00612 { 00613 enc.put_16(bman.glen(), bm::gap_levels); 00614 } 00615 00616 // save size (only if bvector has been down-sized) 00617 if (header_flag & BM_HM_RESIZE) 00618 { 00619 enc.put_32(bv.size()); 00620 } 00621 00622 } 00623 00624 template<class BV> 00625 void serializer<BV>::gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc) 00626 { 00627 unsigned len = gap_length(gap_block); 00628 00629 // Use Elias Gamma encoding 00630 if (len > 6 && (compression_level_ > 3)) 00631 { 00632 encoder::position_type enc_pos0 = enc.get_pos(); 00633 { 00634 bit_out_type bout(enc); 00635 gamma_encoder_func gamma(bout); 00636 00637 enc.put_8(set_block_gap_egamma); 00638 enc.put_16(gap_block[0]); 00639 00640 for_each_dgap(gap_block, gamma); 00641 } 00642 00643 // evaluate gamma coding efficiency 00644 encoder::position_type enc_pos1 = enc.get_pos(); 00645 unsigned gamma_size = enc_pos1 - enc_pos0; 00646 if (gamma_size > (len-1)*sizeof(gap_word_t)) 00647 { 00648 enc.set_pos(enc_pos0); 00649 } 00650 else 00651 { 00652 return; 00653 } 00654 } 00655 00656 // save as plain GAP block 00657 enc.put_8(set_block_gap); 00658 enc.put_16(gap_block, len-1); 00659 } 00660 00661 template<class BV> 00662 void serializer<BV>::gamma_gap_array(const bm::gap_word_t* gap_array, 00663 unsigned arr_len, 00664 bm::encoder& enc, 00665 bool inverted) 00666 { 00667 if (compression_level_ > 3 && arr_len > 25) 00668 { 00669 encoder::position_type enc_pos0 = enc.get_pos(); 00670 { 00671 bit_out_type bout(enc); 00672 00673 enc.put_8( 00674 inverted ? set_block_arrgap_egamma_inv 00675 : set_block_arrgap_egamma); 00676 00677 bout.gamma(arr_len); 00678 00679 gap_word_t prev = gap_array[0]; 00680 bout.gamma(prev + 1); 00681 00682 for (unsigned i = 1; i < arr_len; ++i) 00683 { 00684 gap_word_t curr = gap_array[i]; 00685 bout.gamma(curr - prev); 00686 prev = curr; 00687 } 00688 } 00689 00690 encoder::position_type enc_pos1 = enc.get_pos(); 00691 unsigned gamma_size = enc_pos1 - enc_pos0; 00692 if (gamma_size > (arr_len)*sizeof(gap_word_t)) 00693 { 00694 enc.set_pos(enc_pos0); 00695 } 00696 else 00697 { 00698 return; 00699 } 00700 } 00701 00702 // save as an plain array 00703 enc.put_prefixed_array_16(inverted ? set_block_arrgap_inv : set_block_arrgap, 00704 gap_array, arr_len, true); 00705 } 00706 00707 00708 template<class BV> 00709 void serializer<BV>::encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc) 00710 { 00711 if (compression_level_ > 2) 00712 { 00713 gap_word_t* gap_temp_block = (gap_word_t*) temp_block_; 00714 gap_word_t arr_len; 00715 00716 unsigned bc = gap_bit_count(gap_block); 00717 if (bc == 1) 00718 { 00719 arr_len = gap_convert_to_arr(gap_temp_block, 00720 gap_block, 00721 bm::gap_equiv_len-10); 00722 BM_ASSERT(arr_len == 1); 00723 enc.put_8(set_block_bit_1bit); 00724 enc.put_16(gap_temp_block[0]); 00725 return; 00726 } 00727 00728 unsigned len = gap_length(gap_block); 00729 bool invert, use_array; 00730 invert = use_array = false; 00731 00732 if (bc < len-1) 00733 { 00734 use_array = true; 00735 } 00736 else // inverted array is a better alternative? 00737 { 00738 unsigned inverted_bc = bm::gap_max_bits - bc; 00739 if (inverted_bc < len-1) 00740 { 00741 use_array = invert = true; 00742 } 00743 } 00744 if (use_array) 00745 { 00746 arr_len = gap_convert_to_arr(gap_temp_block, 00747 gap_block, 00748 bm::gap_equiv_len-10, 00749 invert); 00750 if (arr_len) 00751 { 00752 gamma_gap_array(gap_temp_block, arr_len, enc, invert); 00753 return; 00754 } 00755 } 00756 } 00757 00758 gamma_gap_block(gap_block, enc); 00759 } 00760 00761 template<class BV> 00762 void serializer<BV>::encode_bit_interval(const bm::word_t* blk, 00763 bm::encoder& enc, 00764 unsigned //size_control 00765 ) 00766 { 00767 enc.put_8(set_block_bit_0runs); 00768 enc.put_8((blk[0]==0) ? 0 : 1); // encode start 00769 00770 unsigned i,j;//,k; 00771 00772 for (i = 0; i < bm::set_block_size; ++i) 00773 { 00774 if (blk[i] == 0) 00775 { 00776 // scan fwd to find 0 island length 00777 for (j = i+1; j < bm::set_block_size; ++j) 00778 { 00779 if (blk[j] != 0) 00780 break; 00781 } 00782 enc.put_16((gap_word_t)(j-i)); 00783 i = j - 1; 00784 } 00785 else 00786 { 00787 // scan fwd to find non-0 island length 00788 for (j = i+1; j < bm::set_block_size; ++j) 00789 { 00790 if (blk[j] == 0) 00791 { 00792 // look ahead to identify and ignore short 0-run 00793 if (((j+1 < bm::set_block_size) && blk[j+1]) || 00794 ((j+2 < bm::set_block_size) && blk[j+2]) 00795 ) 00796 { 00797 ++j; // skip zero word 00798 continue; 00799 } 00800 break; 00801 } 00802 } 00803 enc.put_16((gap_word_t)(j-i)); 00804 // stream all bit-words now 00805 BM_ASSERT(i < j); 00806 enc.put_32(blk + i, j - i); 00807 i = j - 1; 00808 } 00809 } 00810 } 00811 00812 00813 template<class BV> 00814 unsigned serializer<BV>::serialize(const BV& bv, 00815 unsigned char* buf, unsigned buf_size) 00816 { 00817 BM_ASSERT(temp_block_); 00818 00819 const blocks_manager_type& bman = bv.get_blocks_manager(); 00820 00821 gap_word_t* gap_temp_block = (gap_word_t*) temp_block_; 00822 00823 bm::encoder enc(buf, buf_size); // create the encoder 00824 encode_header(bv, enc); 00825 00826 unsigned i,j; 00827 00828 00829 // save blocks. 00830 for (i = 0; i < bm::set_total_blocks; ++i) 00831 { 00832 bm::word_t* blk = bman.get_block(i); 00833 // ----------------------------------------- 00834 // Empty or ONE block serialization 00835 00836 bool flag; 00837 flag = bman.is_block_zero(i, blk, false); 00838 if (flag) 00839 { 00840 zero_block: 00841 unsigned next_nb = bman.find_next_nz_block(i+1, false); 00842 if (next_nb == bm::set_total_blocks) // no more blocks 00843 { 00844 enc.put_8(set_block_azero); 00845 return enc.size(); 00846 } 00847 unsigned nb = next_nb - i; 00848 00849 if (nb > 1 && nb < 128) 00850 { 00851 // special (but frequent) case -- GAP delta fits in 7bits 00852 unsigned char c = (unsigned char)((1u << 7) | nb); 00853 enc.put_8(c); 00854 } 00855 else 00856 { 00857 SER_NEXT_GRP(enc, nb, set_block_1zero, 00858 set_block_8zero, 00859 set_block_16zero, 00860 set_block_32zero) 00861 } 00862 i = next_nb - 1; 00863 continue; 00864 } 00865 else 00866 { 00867 flag = bman.is_block_one(i, blk, false); 00868 if (flag) 00869 { 00870 // Look ahead for similar blocks 00871 for(j = i+1; j < bm::set_total_blocks; ++j) 00872 { 00873 bm::word_t* blk_next = bman.get_block(j); 00874 if (flag != bman.is_block_one(j, blk_next, false)) 00875 break; 00876 } 00877 if (j == bm::set_total_blocks) 00878 { 00879 enc.put_8(set_block_aone); 00880 break; 00881 } 00882 else 00883 { 00884 unsigned nb = j - i; 00885 SER_NEXT_GRP(enc, nb, set_block_1one, 00886 set_block_8one, 00887 set_block_16one, 00888 set_block_32one) 00889 } 00890 i = j - 1; 00891 continue; 00892 } 00893 } 00894 00895 // ------------------------------ 00896 // GAP serialization 00897 00898 if (BM_IS_GAP(blk)) 00899 { 00900 gap_word_t* gblk = BMGAP_PTR(blk); 00901 encode_gap_block(gblk, enc); 00902 continue; 00903 } 00904 00905 // ---------------------------------------------- 00906 // BIT BLOCK serialization 00907 00908 { 00909 if (compression_level_ <= 1) 00910 { 00911 enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size); 00912 continue; 00913 } 00914 00915 // compute bit-block statistics: bit-count and number of GAPS 00916 unsigned block_bc = 0; 00917 bm::id_t bit_gaps = 00918 bit_block_calc_count_change(blk, blk + bm::set_block_size, 00919 &block_bc); 00920 unsigned block_bc_inv = bm::gap_max_bits - block_bc; 00921 switch (block_bc) 00922 { 00923 case 1: // corner case: only 1 bit on 00924 { 00925 bm::id_t bit_idx = 0; 00926 bit_find_in_block(blk, bit_idx, &bit_idx); 00927 enc.put_8(set_block_bit_1bit); enc.put_16((short)bit_idx); 00928 continue; 00929 } 00930 case 0: goto zero_block; // empty block 00931 } 00932 00933 00934 // compute alternative representation sizes 00935 // 00936 unsigned arr_block_size = sizeof(gap_word_t) + (block_bc * sizeof(gap_word_t)); 00937 unsigned arr_block_size_inv = sizeof(gap_word_t) + (block_bc_inv * sizeof(gap_word_t)); 00938 unsigned gap_block_size = sizeof(gap_word_t) + ((bit_gaps+1) * sizeof(gap_word_t)); 00939 unsigned interval_block_size; 00940 interval_block_size = bit_count_nonzero_size(blk, bm::set_block_size); 00941 bool inverted = false; 00942 00943 if (arr_block_size_inv < arr_block_size && 00944 arr_block_size_inv < gap_block_size && 00945 arr_block_size_inv < interval_block_size) 00946 { 00947 inverted = true; 00948 goto bit_as_array; 00949 } 00950 00951 // if interval representation is not a good alternative 00952 if ((interval_block_size > arr_block_size) || 00953 (interval_block_size > gap_block_size)) 00954 { 00955 if (gap_block_size < (bm::gap_equiv_len-64) && 00956 gap_block_size < arr_block_size) 00957 { 00958 unsigned len = bit_convert_to_gap(gap_temp_block, 00959 blk, 00960 bm::gap_max_bits, 00961 bm::gap_equiv_len-64); 00962 if (len) // save as GAP 00963 { 00964 gamma_gap_block(gap_temp_block, enc); 00965 continue; 00966 } 00967 } 00968 00969 if (arr_block_size < ((bm::gap_equiv_len-64) * sizeof(gap_word_t))) 00970 { 00971 bit_as_array: 00972 gap_word_t arr_len; 00973 unsigned mask = inverted ? ~0 : 0; 00974 arr_len = bit_convert_to_arr(gap_temp_block, 00975 blk, 00976 bm::gap_max_bits, 00977 bm::gap_equiv_len-64, 00978 mask); 00979 if (arr_len) 00980 { 00981 gamma_gap_array(gap_temp_block, arr_len, enc, inverted); 00982 continue; 00983 } 00984 00985 } 00986 // full bit-block 00987 enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size); 00988 continue; 00989 } 00990 00991 // if interval block is a winner 00992 if (interval_block_size < arr_block_size && 00993 interval_block_size < gap_block_size) 00994 { 00995 encode_bit_interval(blk, enc, interval_block_size); 00996 continue; 00997 } 00998 00999 if (gap_block_size < bm::gap_equiv_len && 01000 gap_block_size < arr_block_size) 01001 { 01002 unsigned len = bit_convert_to_gap(gap_temp_block, 01003 blk, 01004 bm::gap_max_bits, 01005 bm::gap_equiv_len-64); 01006 if (len) // save as GAP 01007 { 01008 gamma_gap_block(gap_temp_block, enc); 01009 continue; 01010 } 01011 } 01012 01013 01014 // if array is best 01015 if (arr_block_size < bm::gap_equiv_len-64) 01016 { 01017 goto bit_as_array; 01018 } 01019 01020 // full bit-block 01021 enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size); 01022 continue; 01023 } 01024 } 01025 01026 enc.put_8(set_block_end); 01027 01028 unsigned encoded_size = enc.size(); 01029 return encoded_size; 01030 01031 } 01032 01033 01034 01037 enum serialization_flags { 01038 BM_NO_BYTE_ORDER = 1, 01039 BM_NO_GAP_LENGTH = (1 << 1) 01040 }; 01041 01069 /* 01070 Serialization format: 01071 <pre> 01072 01073 | HEADER | BLOCKS | 01074 01075 Header structure: 01076 BYTE : Serialization header (bit mask of BM_HM_*) 01077 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian) 01078 INT16: Reserved (0) 01079 INT16: Reserved Flags (0) 01080 01081 </pre> 01082 */ 01083 template<class BV> 01084 unsigned serialize(const BV& bv, 01085 unsigned char* buf, 01086 bm::word_t* temp_block, 01087 unsigned serialization_flags = 0) 01088 { 01089 bm::serializer<BV> bv_serial; 01090 if (serialization_flags & BM_NO_BYTE_ORDER) 01091 bv_serial.byte_order_serialization(false); 01092 01093 if (serialization_flags & BM_NO_GAP_LENGTH) 01094 bv_serial.gap_length_serialization(false); 01095 else 01096 bv_serial.gap_length_serialization(true); 01097 01098 bv_serial.set_compression_level(4); 01099 01100 return bv_serial.serialize(bv, buf, 0); 01101 } 01102 01109 template<class BV> 01110 unsigned serialize(BV& bv, 01111 unsigned char* buf, 01112 unsigned serialization_flags=0) 01113 { 01114 bm::serializer<BV> bv_serial; 01115 if (serialization_flags & BM_NO_BYTE_ORDER) 01116 bv_serial.byte_order_serialization(false); 01117 01118 if (serialization_flags & BM_NO_GAP_LENGTH) 01119 bv_serial.gap_length_serialization(false); 01120 else 01121 bv_serial.gap_length_serialization(true); 01122 01123 bv_serial.set_compression_level(4); 01124 01125 return bv_serial.serialize(bv, buf, 0); 01126 } 01127 01128 01129 01145 template<class BV> 01146 unsigned deserialize(BV& bv, 01147 const unsigned char* buf, 01148 bm::word_t* temp_block=0) 01149 { 01150 ByteOrder bo_current = globals<true>::byte_order(); 01151 01152 bm::decoder dec(buf); 01153 unsigned char header_flag = dec.get_8(); 01154 ByteOrder bo = bo_current; 01155 if (!(header_flag & BM_HM_NO_BO)) 01156 { 01157 bo = (bm::ByteOrder) dec.get_8(); 01158 } 01159 01160 if (bo_current == bo) 01161 { 01162 deserializer<BV, bm::decoder> deserial; 01163 return deserial.deserialize(bv, buf, temp_block); 01164 } 01165 switch (bo_current) 01166 { 01167 case BigEndian: 01168 { 01169 deserializer<BV, bm::decoder_big_endian> deserial; 01170 return deserial.deserialize(bv, buf, temp_block); 01171 } 01172 case LittleEndian: 01173 { 01174 deserializer<BV, bm::decoder_little_endian> deserial; 01175 return deserial.deserialize(bv, buf, temp_block); 01176 } 01177 default: 01178 BM_ASSERT(0); 01179 }; 01180 return 0; 01181 } 01182 01183 template<class DEC> 01184 unsigned deseriaizer_base<DEC>::read_id_list(decoder_type& decoder, 01185 unsigned block_type, 01186 bm::gap_word_t* dst_arr) 01187 { 01188 typedef bit_in<DEC> bit_in_type; 01189 01190 gap_word_t len = 0; 01191 01192 switch (block_type) 01193 { 01194 case set_block_bit_1bit: 01195 dst_arr[0] = decoder.get_16(); 01196 len = 1; 01197 break; 01198 case set_block_arrgap: 01199 case set_block_arrgap_inv: 01200 len = decoder.get_16(); 01201 decoder.get_16(dst_arr, len); 01202 break; 01203 case set_block_arrgap_egamma: 01204 case set_block_arrgap_egamma_inv: 01205 { 01206 bit_in_type bin(decoder); 01207 len = (gap_word_t)bin.gamma(); 01208 gap_word_t prev, bit_idx; 01209 prev = 0; 01210 for (gap_word_t k = 0; k < len; ++k) 01211 { 01212 bit_idx = (gap_word_t)bin.gamma(); 01213 bit_idx -= (k == 0); 01214 bit_idx += prev; 01215 dst_arr[k] = prev = bit_idx; 01216 } // for 01217 } 01218 break; 01219 default: 01220 BM_ASSERT(0); 01221 } 01222 return len; 01223 } 01224 01225 01226 template<class DEC> 01227 unsigned deseriaizer_base<DEC>::read_gap_block(decoder_type& decoder, 01228 unsigned block_type, 01229 bm::gap_word_t* dst_block, 01230 bm::gap_word_t& gap_head) 01231 { 01232 typedef bit_in<DEC> bit_in_type; 01233 unsigned gap_len = 0; 01234 01235 switch (block_type) 01236 { 01237 case set_block_gap: 01238 { 01239 gap_len = gap_length(&gap_head); 01240 --gap_len; 01241 *dst_block = gap_head; 01242 decoder.get_16(dst_block+1, gap_len - 1); 01243 dst_block[gap_len] = gap_max_bits - 1; 01244 ++gap_len; 01245 } 01246 break; 01247 case set_block_bit_1bit: 01248 { 01249 gap_set_all(dst_block, bm::gap_max_bits, 0); 01250 gap_word_t bit_idx = decoder.get_16(); 01251 gap_len = gap_add_value(dst_block, bit_idx); 01252 ++gap_len; 01253 } 01254 break; 01255 case set_block_arrgap: 01256 case set_block_arrgap_inv: 01257 { 01258 gap_set_all(dst_block, bm::gap_max_bits, 0); 01259 gap_word_t len = decoder.get_16(); 01260 01261 for (gap_word_t k = 0; k < len; ++k) 01262 { 01263 gap_word_t bit_idx = decoder.get_16(); 01264 gap_len = gap_add_value(dst_block, bit_idx); 01265 } // for 01266 ++gap_len; 01267 } 01268 break; 01269 case set_block_arrgap_egamma: 01270 case set_block_arrgap_egamma_inv: 01271 { 01272 unsigned arr_len = read_id_list(decoder, block_type, id_array_); 01273 dst_block[0] = 0; 01274 gap_len = gap_set_array(dst_block, id_array_, arr_len); 01275 } 01276 break; 01277 case set_block_gap_egamma: 01278 { 01279 gap_len = (gap_head >> 3); 01280 --gap_len; 01281 // read gamma GAP block into a dest block 01282 { 01283 *dst_block = gap_head; 01284 gap_word_t* gap_data_ptr = dst_block + 1; 01285 01286 bit_in_type bin(decoder); 01287 { 01288 gap_word_t v = (gap_word_t)bin.gamma(); 01289 gap_word_t gap_sum = *gap_data_ptr = v - 1; 01290 for (unsigned i = 1; i < gap_len; ++i) 01291 { 01292 v = (gap_word_t)bin.gamma(); 01293 *(++gap_data_ptr) = gap_sum += v; 01294 } 01295 dst_block[++gap_len] = bm::gap_max_bits - 1; 01296 } 01297 } 01298 01299 } 01300 break; 01301 default: 01302 BM_ASSERT(0); 01303 } 01304 01305 if (block_type == set_block_arrgap_egamma_inv || 01306 block_type == set_block_arrgap_inv) 01307 { 01308 gap_invert(dst_block); 01309 } 01310 return gap_len; 01311 } 01312 01313 01314 template<class BV, class DEC> 01315 void 01316 deserializer<BV, DEC>::deserialize_gap(unsigned char btype, decoder_type& dec, 01317 bvector_type& bv, blocks_manager_type& bman, 01318 unsigned i, 01319 bm::word_t* blk) 01320 { 01321 typedef bit_in<DEC> bit_in_type; 01322 gap_word_t gap_head; 01323 gap_word_t gap_len = 0; 01324 01325 switch (btype) 01326 { 01327 case set_block_gap: 01328 case set_block_gapbit: 01329 { 01330 gap_word_t gap_head = (gap_word_t) 01331 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32()); 01332 01333 gap_len = gap_length(&gap_head); 01334 int level = gap_calc_level(gap_len, bman.glen()); 01335 --gap_len; 01336 if (level == -1) // Too big to be GAP: convert to BIT block 01337 { 01338 *gap_temp_block_ = gap_head; 01339 dec.get_16(gap_temp_block_+1, gap_len - 1); 01340 gap_temp_block_[gap_len] = gap_max_bits - 1; 01341 01342 if (blk == 0) // block does not exist yet 01343 { 01344 blk = bman.get_allocator().alloc_bit_block(); 01345 bman.set_block(i, blk); 01346 gap_convert_to_bitset(blk, gap_temp_block_); 01347 } 01348 else // We have some data already here. Apply OR operation. 01349 { 01350 blk = bman.deoptimize_block(i); 01351 gap_add_to_bitset(blk, gap_temp_block_); 01352 } 01353 return; 01354 } // level == -1 01355 01356 set_gap_level(&gap_head, level); 01357 01358 if (blk == 0) 01359 { 01360 gap_word_t* gap_blk = 01361 bman.get_allocator().alloc_gap_block(level, bman.glen()); 01362 gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk); 01363 *gap_blk_ptr = gap_head; 01364 set_gap_level(gap_blk_ptr, level); 01365 bman.set_block(i, (bm::word_t*)gap_blk); 01366 bman.set_block_gap(i); 01367 dec.get_16(gap_blk + 1, gap_len - 1); 01368 gap_blk[gap_len] = bm::gap_max_bits - 1; 01369 } 01370 else // target block exists 01371 { 01372 // read GAP block into a temp memory and perform OR 01373 *gap_temp_block_ = gap_head; 01374 dec.get_16(gap_temp_block_ + 1, gap_len - 1); 01375 gap_temp_block_[gap_len] = bm::gap_max_bits - 1; 01376 ++gap_len; 01377 break; 01378 } 01379 return; 01380 } 01381 case set_block_arrgap: 01382 case set_block_arrgap_egamma: 01383 { 01384 unsigned arr_len = read_id_list(dec, btype, this->id_array_); 01385 gap_len = gap_set_array(gap_temp_block_, this->id_array_, arr_len); 01386 break; 01387 } 01388 case set_block_gap_egamma: 01389 gap_head = (gap_word_t) 01390 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32()); 01391 case set_block_arrgap_egamma_inv: 01392 case set_block_arrgap_inv: 01393 gap_len = read_gap_block(dec, btype, gap_temp_block_, gap_head); 01394 break; 01395 default: 01396 BM_ASSERT(0); 01397 } 01398 01399 // check if encoded GAP block length is too high and use bit-block instead 01400 01401 if (gap_len > bm::gap_length_threashold) 01402 { 01403 blk = bman.deoptimize_block(i); 01404 if (!blk) 01405 { 01406 blk = bman.get_allocator().alloc_bit_block(); 01407 bman.set_block(i, blk); 01408 bit_block_set(blk, 0); 01409 } 01410 gap_add_to_bitset_l(blk, gap_temp_block_, gap_len-1); 01411 } 01412 else 01413 { 01414 bv.combine_operation_with_block(i, 01415 (bm::word_t*)gap_temp_block_, 01416 1, 01417 BM_OR); 01418 } 01419 01420 } 01421 01422 01423 template<class BV, class DEC> 01424 unsigned deserializer<BV, DEC>::deserialize(bvector_type& bv, 01425 const unsigned char* buf, 01426 bm::word_t* temp_block) 01427 { 01428 blocks_manager_type& bman = bv.get_blocks_manager(); 01429 01430 bm::wordop_t* tmp_buf = 01431 temp_block ? (bm::wordop_t*) temp_block 01432 : (bm::wordop_t*)bman.check_allocate_tempblock(); 01433 01434 temp_block_ = temp_block = (word_t*)tmp_buf; 01435 bm::strategy strat = bv.get_new_blocks_strat(); 01436 bv.set_new_blocks_strat(BM_GAP); 01437 01438 decoder_type dec(buf); 01439 01440 BM_SET_MMX_GUARD 01441 01442 // Reading header 01443 01444 unsigned char header_flag = dec.get_8(); 01445 if (!(header_flag & BM_HM_NO_BO)) 01446 { 01447 /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8(); 01448 } 01449 01450 if (header_flag & BM_HM_ID_LIST) 01451 { 01452 // special case: the next comes plain list of integers 01453 if (header_flag & BM_HM_RESIZE) 01454 { 01455 unsigned bv_size = dec.get_32(); 01456 if (bv_size > bv.size()) 01457 { 01458 bv.resize(bv_size); 01459 } 01460 } 01461 01462 01463 for (unsigned cnt = dec.get_32(); cnt; --cnt) { 01464 bm::id_t id = dec.get_32(); 01465 bv.set(id); 01466 } // for 01467 // -1 for compatibility with other deserialization branches 01468 return dec.size()-1; 01469 } 01470 01471 unsigned i; 01472 01473 if (!(header_flag & BM_HM_NO_GAPL)) 01474 { 01475 gap_word_t glevels[bm::gap_levels]; 01476 // read GAP levels information 01477 for (i = 0; i < bm::gap_levels; ++i) 01478 { 01479 glevels[i] = dec.get_16(); 01480 } 01481 } 01482 01483 if (header_flag & (1 << 1)) 01484 { 01485 unsigned bv_size = dec.get_32(); 01486 if (bv_size > bv.size()) 01487 { 01488 bv.resize(bv_size); 01489 } 01490 } 01491 01492 unsigned char btype; 01493 unsigned nb; 01494 01495 for (i = 0; i < bm::set_total_blocks; ++i) 01496 { 01497 btype = dec.get_8(); 01498 bm::word_t* blk = bman.get_block(i); 01499 // pre-check if we have short zero-run packaging here 01500 // 01501 if (btype & (1 << 7)) 01502 { 01503 nb = btype & ~(1 << 7); 01504 i += nb-1; 01505 continue; 01506 } 01507 01508 switch (btype) 01509 { 01510 case set_block_azero: 01511 case set_block_end: 01512 i = bm::set_total_blocks; 01513 break; 01514 case set_block_1zero: 01515 continue; 01516 case set_block_8zero: 01517 nb = dec.get_8(); 01518 i += nb-1; 01519 continue; 01520 case set_block_16zero: 01521 nb = dec.get_16(); 01522 i += nb-1; 01523 continue; 01524 case set_block_32zero: 01525 nb = dec.get_32(); 01526 i += nb-1; 01527 continue; 01528 case set_block_aone: 01529 for (;i < bm::set_total_blocks; ++i) 01530 { 01531 bman.set_block_all_set(i); 01532 } 01533 break; 01534 case set_block_1one: 01535 bman.set_block_all_set(i); 01536 continue; 01537 case set_block_8one: 01538 BM_SET_ONE_BLOCKS(dec.get_8()); 01539 continue; 01540 case set_block_16one: 01541 BM_SET_ONE_BLOCKS(dec.get_16()); 01542 continue; 01543 case set_block_32one: 01544 BM_SET_ONE_BLOCKS(dec.get_32()); 01545 continue; 01546 case set_block_bit: 01547 { 01548 if (blk == 0) 01549 { 01550 blk = bman.get_allocator().alloc_bit_block(); 01551 bman.set_block(i, blk); 01552 dec.get_32(blk, bm::set_block_size); 01553 continue; 01554 } 01555 01556 dec.get_32(temp_block, bm::set_block_size); 01557 bv.combine_operation_with_block(i, 01558 temp_block, 01559 0, BM_OR); 01560 01561 continue; 01562 } 01563 case set_block_bit_1bit: 01564 { 01565 unsigned bit_idx = dec.get_16(); 01566 bit_idx += i * bm::bits_in_block; 01567 bv.set_bit(bit_idx); 01568 continue; 01569 } 01570 case set_block_bit_0runs: 01571 { 01572 //TODO: optimization if block exists 01573 bit_block_set(temp_block, 0); 01574 01575 unsigned char run_type = dec.get_8(); 01576 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 01577 { 01578 unsigned run_length = dec.get_16(); 01579 if (run_type) 01580 { 01581 unsigned run_end = j + run_length; 01582 for (;j < run_end; ++j) 01583 { 01584 BM_ASSERT(j < bm::set_block_size); 01585 temp_block[j] = dec.get_32(); 01586 } 01587 } 01588 else 01589 { 01590 j += run_length; 01591 } 01592 } // for 01593 01594 bv.combine_operation_with_block(i, 01595 temp_block, 01596 0, BM_OR); 01597 continue; 01598 } 01599 case set_block_bit_interval: 01600 { 01601 unsigned head_idx, tail_idx; 01602 head_idx = dec.get_16(); 01603 tail_idx = dec.get_16(); 01604 01605 if (blk == 0) 01606 { 01607 blk = bman.get_allocator().alloc_bit_block(); 01608 bman.set_block(i, blk); 01609 for (unsigned i = 0; i < head_idx; ++i) 01610 { 01611 blk[i] = 0; 01612 } 01613 dec.get_32(blk + head_idx, tail_idx - head_idx + 1); 01614 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j) 01615 { 01616 blk[j] = 0; 01617 } 01618 continue; 01619 } 01620 bit_block_set(temp_block, 0); 01621 dec.get_32(temp_block + head_idx, tail_idx - head_idx + 1); 01622 01623 bv.combine_operation_with_block(i, 01624 temp_block, 01625 0, BM_OR); 01626 continue; 01627 } 01628 case set_block_gap: 01629 case set_block_gapbit: 01630 case set_block_arrgap: 01631 case set_block_gap_egamma: 01632 case set_block_arrgap_egamma: 01633 case set_block_arrgap_egamma_inv: 01634 case set_block_arrgap_inv: 01635 deserialize_gap(btype, dec, bv, bman, i, blk); 01636 continue; 01637 case set_block_arrbit: 01638 { 01639 gap_word_t len = (gap_word_t) 01640 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32()); 01641 01642 if (bman.is_block_gap(i)) 01643 { 01644 // Here we most probably does not want to keep 01645 // the block GAP since generic bitblock offers better 01646 // performance. 01647 blk = bman.convert_gap2bitset(i); 01648 } 01649 else 01650 { 01651 if (blk == 0) // block does not exists yet 01652 { 01653 blk = bman.get_allocator().alloc_bit_block(); 01654 bit_block_set(blk, 0); 01655 bman.set_block(i, blk); 01656 } 01657 } 01658 01659 // Get the array one by one and set the bits. 01660 for (unsigned k = 0; k < len; ++k) 01661 { 01662 gap_word_t bit_idx = dec.get_16(); 01663 set_bit(blk, bit_idx); 01664 } 01665 continue; 01666 } 01667 default: 01668 BM_ASSERT(0); // unknown block type 01669 } // switch 01670 } // for i 01671 01672 bv.forget_count(); 01673 bv.set_new_blocks_strat(strat); 01674 01675 return dec.size(); 01676 } 01677 01678 01679 01680 template<class DEC> 01681 serial_stream_iterator<DEC>::serial_stream_iterator(const unsigned char* buf) 01682 : decoder_(buf), 01683 end_of_stream_(false), 01684 bv_size_(0), 01685 state_(e_unknown), 01686 id_cnt_(0), 01687 block_idx_(0), 01688 mono_block_cnt_(0) 01689 { 01690 ::memset(bit_func_table_, 0, sizeof(bit_func_table_)); 01691 01692 bit_func_table_[bm::set_AND] = 01693 &serial_stream_iterator<DEC>::get_bit_block_AND; 01694 bit_func_table_[bm::set_ASSIGN] = 01695 &serial_stream_iterator<DEC>::get_bit_block_ASSIGN; 01696 bit_func_table_[bm::set_OR] = 01697 &serial_stream_iterator<DEC>::get_bit_block_OR; 01698 bit_func_table_[bm::set_SUB] = 01699 &serial_stream_iterator<DEC>::get_bit_block_SUB; 01700 bit_func_table_[bm::set_XOR] = 01701 &serial_stream_iterator<DEC>::get_bit_block_XOR; 01702 bit_func_table_[bm::set_COUNT] = 01703 &serial_stream_iterator<DEC>::get_bit_block_COUNT; 01704 bit_func_table_[bm::set_COUNT_AND] = 01705 &serial_stream_iterator<DEC>::get_bit_block_COUNT_AND; 01706 bit_func_table_[bm::set_COUNT_XOR] = 01707 &serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR; 01708 bit_func_table_[bm::set_COUNT_OR] = 01709 &serial_stream_iterator<DEC>::get_bit_block_COUNT_OR; 01710 bit_func_table_[bm::set_COUNT_SUB_AB] = 01711 &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB; 01712 bit_func_table_[bm::set_COUNT_SUB_BA] = 01713 &serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA; 01714 bit_func_table_[bm::set_COUNT_A] = 01715 &serial_stream_iterator<DEC>::get_bit_block_COUNT_A; 01716 bit_func_table_[bm::set_COUNT_B] = 01717 &serial_stream_iterator<DEC>::get_bit_block_COUNT; 01718 01719 01720 // reading stream header 01721 unsigned char header_flag = decoder_.get_8(); 01722 if (!(header_flag & BM_HM_NO_BO)) 01723 { 01724 /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8(); 01725 } 01726 01727 // check if bitvector comes as an inverted, sorted list of ints 01728 // 01729 if (header_flag & BM_HM_ID_LIST) 01730 { 01731 // special case: the next comes plain list of unsigned integers 01732 if (header_flag & BM_HM_RESIZE) 01733 { 01734 bv_size_ = decoder_.get_32(); 01735 } 01736 01737 state_ = e_list_ids; 01738 id_cnt_ = decoder_.get_32(); 01739 next(); // read first id 01740 } 01741 else 01742 { 01743 if (!(header_flag & BM_HM_NO_GAPL)) 01744 { 01745 unsigned i; 01746 // keep GAP levels info 01747 for (i = 0; i < bm::gap_levels; ++i) 01748 { 01749 glevels_[i] = decoder_.get_16(); 01750 } 01751 } 01752 01753 if (header_flag & (1 << 1)) 01754 { 01755 bv_size_ = decoder_.get_32(); 01756 } 01757 state_ = e_blocks; 01758 } 01759 } 01760 01761 template<class DEC> 01762 void serial_stream_iterator<DEC>::next() 01763 { 01764 if (is_eof()) 01765 { 01766 ++block_idx_; 01767 return; 01768 } 01769 01770 switch (state_) 01771 { 01772 case e_list_ids: 01773 // read inverted ids one by one 01774 if (id_cnt_ == 0) 01775 { 01776 end_of_stream_ = true; 01777 state_ = e_unknown; 01778 break; 01779 } 01780 last_id_ = decoder_.get_32(); 01781 --id_cnt_; 01782 break; 01783 01784 case e_blocks: 01785 if (block_idx_ == bm::set_total_blocks) 01786 { 01787 end_of_stream_ = true; 01788 state_ = e_unknown; 01789 break; 01790 } 01791 01792 block_type_ = decoder_.get_8(); 01793 01794 // pre-check for 7-bit zero block 01795 // 01796 if (block_type_ & (1 << 7)) 01797 { 01798 mono_block_cnt_ = (block_type_ & ~(1 << 7)) - 1; 01799 state_ = e_zero_blocks; 01800 break; 01801 } 01802 01803 switch (block_type_) 01804 { 01805 case set_block_azero: 01806 case set_block_end: 01807 end_of_stream_ = true; state_ = e_unknown; 01808 break; 01809 case set_block_1zero: 01810 state_ = e_zero_blocks; 01811 mono_block_cnt_ = 0; 01812 break; 01813 case set_block_8zero: 01814 state_ = e_zero_blocks; 01815 mono_block_cnt_ = decoder_.get_8()-1; 01816 break; 01817 case set_block_16zero: 01818 state_ = e_zero_blocks; 01819 mono_block_cnt_ = decoder_.get_16()-1; 01820 break; 01821 case set_block_32zero: 01822 state_ = e_zero_blocks; 01823 mono_block_cnt_ = decoder_.get_32()-1; 01824 break; 01825 case set_block_aone: 01826 state_ = e_one_blocks; 01827 mono_block_cnt_ = bm::set_total_blocks - block_idx_; 01828 break; 01829 case set_block_1one: 01830 state_ = e_one_blocks; 01831 mono_block_cnt_ = 0; 01832 break; 01833 case set_block_8one: 01834 state_ = e_one_blocks; 01835 mono_block_cnt_ = decoder_.get_8()-1; 01836 break; 01837 case set_block_16one: 01838 state_ = e_one_blocks; 01839 mono_block_cnt_ = decoder_.get_16()-1; 01840 break; 01841 case set_block_32one: 01842 state_ = e_one_blocks; 01843 mono_block_cnt_ = decoder_.get_32()-1; 01844 break; 01845 01846 case set_block_bit: 01847 case set_block_bit_interval: 01848 case set_block_bit_0runs: 01849 case set_block_arrbit: 01850 state_ = e_bit_block; 01851 break; 01852 01853 case set_block_gap: 01854 case set_block_gap_egamma: 01855 gap_head_ = (gap_word_t) 01856 (sizeof(gap_word_t) == 2 ? 01857 decoder_.get_16() : decoder_.get_32()); 01858 case set_block_arrgap: 01859 case set_block_arrgap_egamma: 01860 case set_block_arrgap_egamma_inv: 01861 case set_block_arrgap_inv: 01862 case set_block_bit_1bit: 01863 state_ = e_gap_block; 01864 break; 01865 case set_block_gapbit: 01866 state_ = e_gap_block; //e_bit_block; // TODO: make a better decision here 01867 break; 01868 01869 default: 01870 BM_ASSERT(0); 01871 }// switch 01872 01873 break; 01874 01875 case e_zero_blocks: 01876 case e_one_blocks: 01877 ++block_idx_; 01878 if (!mono_block_cnt_) 01879 { 01880 state_ = e_blocks; // get new block token 01881 break; 01882 } 01883 --mono_block_cnt_; 01884 break; 01885 01886 case e_unknown: 01887 default: 01888 BM_ASSERT(0); 01889 } // switch 01890 } 01891 01892 template<class DEC> 01893 void serial_stream_iterator<DEC>::skip_mono_blocks() 01894 { 01895 BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks); 01896 if (!mono_block_cnt_) 01897 { 01898 ++block_idx_; 01899 } 01900 else 01901 { 01902 block_idx_ += mono_block_cnt_+1; 01903 mono_block_cnt_ = 0; 01904 } 01905 state_ = e_blocks; 01906 } 01907 01908 template<class DEC> 01909 unsigned 01910 serial_stream_iterator<DEC>::get_bit_block_ASSIGN( 01911 bm::word_t* dst_block, 01912 bm::word_t* /*tmp_block*/) 01913 { 01914 BM_ASSERT(this->state_ == e_bit_block); 01915 unsigned count = 0; 01916 01917 switch (this->block_type_) 01918 { 01919 case set_block_bit: 01920 decoder_.get_32(dst_block, bm::set_block_size); 01921 break; 01922 case set_block_bit_0runs: 01923 { 01924 if (dst_block) 01925 bit_block_set(dst_block, 0); 01926 unsigned char run_type = decoder_.get_8(); 01927 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 01928 { 01929 unsigned run_length = decoder_.get_16(); 01930 if (run_type) 01931 { 01932 decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length); 01933 } 01934 j += run_length; 01935 } // for 01936 } 01937 break; 01938 case set_block_bit_interval: 01939 { 01940 unsigned head_idx = decoder_.get_16(); 01941 unsigned tail_idx = decoder_.get_16(); 01942 if (dst_block) 01943 { 01944 for (unsigned i = 0; i < head_idx; ++i) 01945 dst_block[i] = 0; 01946 decoder_.get_32(dst_block + head_idx, 01947 tail_idx - head_idx + 1); 01948 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j) 01949 dst_block[j] = 0; 01950 } 01951 else 01952 { 01953 decoder_.seek((tail_idx - head_idx + 1) * 4); 01954 } 01955 } 01956 break; 01957 case set_block_arrbit: 01958 case set_block_bit_1bit: 01959 get_arr_bit(dst_block, true /*clear target*/); 01960 break; 01961 case set_block_gapbit: 01962 BM_ASSERT(0); 01963 break; 01964 default: 01965 BM_ASSERT(0); 01966 } // switch 01967 return count; 01968 } 01969 01970 template<class DEC> 01971 unsigned 01972 serial_stream_iterator<DEC>::get_bit_block_OR(bm::word_t* dst_block, 01973 bm::word_t* /*tmp_block*/) 01974 { 01975 BM_ASSERT(this->state_ == e_bit_block); 01976 unsigned count = 0; 01977 switch (block_type_) 01978 { 01979 case set_block_bit: 01980 { 01981 bitblock_get_adapter ga(dst_block); 01982 bit_OR<bm::word_t> func; 01983 bitblock_store_adapter sa(dst_block); 01984 01985 bit_recomb(ga, 01986 decoder_, 01987 func, 01988 sa 01989 ); 01990 } 01991 break; 01992 case set_block_bit_interval: 01993 { 01994 unsigned head_idx = decoder_.get_16(); 01995 unsigned tail_idx = decoder_.get_16(); 01996 for (unsigned i = head_idx; i <= tail_idx; ++i) 01997 dst_block[i] |= decoder_.get_32(); 01998 } 01999 break; 02000 case set_block_bit_0runs: 02001 { 02002 unsigned char run_type = decoder_.get_8(); 02003 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02004 { 02005 unsigned run_length = decoder_.get_16(); 02006 if (run_type) 02007 { 02008 unsigned run_end = j + run_length; 02009 for (;j < run_end; ++j) 02010 { 02011 BM_ASSERT(j < bm::set_block_size); 02012 dst_block[j] |= decoder_.get_32(); 02013 } 02014 } 02015 else 02016 { 02017 j += run_length; 02018 } 02019 } // for 02020 } 02021 break; 02022 case set_block_bit_1bit: 02023 case set_block_arrbit: 02024 get_arr_bit(dst_block, false /*don't clear target*/); 02025 break; 02026 default: 02027 BM_ASSERT(0); 02028 } // switch 02029 return count; 02030 } 02031 02032 template<class DEC> 02033 unsigned 02034 serial_stream_iterator<DEC>::get_bit_block_AND(bm::word_t* dst_block, 02035 bm::word_t* tmp_block) 02036 { 02037 BM_ASSERT(this->state_ == e_bit_block); 02038 BM_ASSERT(dst_block != tmp_block); 02039 02040 unsigned count = 0; 02041 switch (block_type_) 02042 { 02043 case set_block_bit: 02044 for (unsigned i = 0; i < bm::set_block_size; i+=4) 02045 { 02046 dst_block[i] &= decoder_.get_32(); 02047 dst_block[i+1] &= decoder_.get_32(); 02048 dst_block[i+2] &= decoder_.get_32(); 02049 dst_block[i+3] &= decoder_.get_32(); 02050 } 02051 break; 02052 case set_block_bit_0runs: 02053 { 02054 unsigned char run_type = decoder_.get_8(); 02055 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02056 { 02057 unsigned run_length = decoder_.get_16(); 02058 unsigned run_end = j + run_length; 02059 if (run_type) 02060 { 02061 for (;j < run_end; ++j) 02062 { 02063 BM_ASSERT(j < bm::set_block_size); 02064 dst_block[j] &= decoder_.get_32(); 02065 } 02066 } 02067 else 02068 { 02069 for (;j < run_end; ++j) 02070 { 02071 BM_ASSERT(j < bm::set_block_size); 02072 dst_block[j] = 0; 02073 } 02074 } 02075 } // for 02076 } 02077 break; 02078 case set_block_bit_interval: 02079 { 02080 unsigned head_idx = decoder_.get_16(); 02081 unsigned tail_idx = decoder_.get_16(); 02082 unsigned i; 02083 for ( i = 0; i < head_idx; ++i) 02084 dst_block[i] = 0; 02085 for ( i = head_idx; i <= tail_idx; ++i) 02086 dst_block[i] &= decoder_.get_32(); 02087 for ( i = tail_idx + 1; i < bm::set_block_size; ++i) 02088 dst_block[i] = 0; 02089 } 02090 break; 02091 case set_block_bit_1bit: 02092 case set_block_arrbit: 02093 get_arr_bit(tmp_block, true /*clear target*/); 02094 if (dst_block) 02095 bit_block_and(dst_block, tmp_block); 02096 break; 02097 default: 02098 BM_ASSERT(0); 02099 } // switch 02100 return count; 02101 } 02102 02103 template<class DEC> 02104 unsigned 02105 serial_stream_iterator<DEC>::get_bit_block_XOR(bm::word_t* dst_block, 02106 bm::word_t* tmp_block) 02107 { 02108 BM_ASSERT(this->state_ == e_bit_block); 02109 BM_ASSERT(dst_block != tmp_block); 02110 02111 unsigned count = 0; 02112 switch (block_type_) 02113 { 02114 case set_block_bit: 02115 for (unsigned i = 0; i < bm::set_block_size; ++i) 02116 dst_block[i] ^= decoder_.get_32(); 02117 break; 02118 case set_block_bit_0runs: 02119 { 02120 unsigned char run_type = decoder_.get_8(); 02121 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02122 { 02123 unsigned run_length = decoder_.get_16(); 02124 if (run_type) 02125 { 02126 unsigned run_end = j + run_length; 02127 for (;j < run_end; ++j) 02128 { 02129 BM_ASSERT(j < bm::set_block_size); 02130 dst_block[j] ^= decoder_.get_32(); 02131 } 02132 } 02133 else 02134 { 02135 j += run_length; 02136 } 02137 } // for 02138 } 02139 break; 02140 case set_block_bit_interval: 02141 { 02142 unsigned head_idx = decoder_.get_16(); 02143 unsigned tail_idx = decoder_.get_16(); 02144 for (unsigned i = head_idx; i <= tail_idx; ++i) 02145 dst_block[i] ^= decoder_.get_32(); 02146 } 02147 break; 02148 case set_block_bit_1bit: 02149 // TODO: optimization 02150 case set_block_arrbit: 02151 get_arr_bit(tmp_block, true /*clear target*/); 02152 if (dst_block) 02153 { 02154 bit_block_xor(dst_block, tmp_block); 02155 } 02156 break; 02157 default: 02158 BM_ASSERT(0); 02159 } // switch 02160 return count; 02161 } 02162 02163 template<class DEC> 02164 unsigned 02165 serial_stream_iterator<DEC>::get_bit_block_SUB(bm::word_t* dst_block, 02166 bm::word_t* tmp_block) 02167 { 02168 BM_ASSERT(this->state_ == e_bit_block); 02169 BM_ASSERT(dst_block != tmp_block); 02170 02171 unsigned count = 0; 02172 switch (block_type_) 02173 { 02174 case set_block_bit: 02175 for (unsigned i = 0; i < bm::set_block_size; ++i) 02176 dst_block[i] &= ~decoder_.get_32(); 02177 break; 02178 case set_block_bit_0runs: 02179 { 02180 unsigned char run_type = decoder_.get_8(); 02181 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02182 { 02183 unsigned run_length = decoder_.get_16(); 02184 if (run_type) 02185 { 02186 unsigned run_end = j + run_length; 02187 for (;j < run_end; ++j) 02188 { 02189 BM_ASSERT(j < bm::set_block_size); 02190 dst_block[j] &= ~decoder_.get_32(); 02191 } 02192 } 02193 else 02194 { 02195 j += run_length; 02196 } 02197 } // for 02198 } 02199 break; 02200 case set_block_bit_interval: 02201 { 02202 unsigned head_idx = decoder_.get_16(); 02203 unsigned tail_idx = decoder_.get_16(); 02204 for (unsigned i = head_idx; i <= tail_idx; ++i) 02205 dst_block[i] &= ~decoder_.get_32(); 02206 } 02207 break; 02208 case set_block_bit_1bit: 02209 // TODO: optimization 02210 case set_block_arrbit: 02211 get_arr_bit(tmp_block, true /*clear target*/); 02212 if (dst_block) 02213 bit_block_sub(dst_block, tmp_block); 02214 break; 02215 default: 02216 BM_ASSERT(0); 02217 } // switch 02218 return count; 02219 } 02220 02221 02222 template<class DEC> 02223 unsigned 02224 serial_stream_iterator<DEC>::get_bit_block_COUNT(bm::word_t* /*dst_block*/, 02225 bm::word_t* /*tmp_block*/) 02226 { 02227 BM_ASSERT(this->state_ == e_bit_block); 02228 02229 unsigned count = 0; 02230 switch (block_type_) 02231 { 02232 case set_block_bit: 02233 for (unsigned i = 0; i < bm::set_block_size; ++i) 02234 count += word_bitcount(decoder_.get_32()); 02235 break; 02236 case set_block_bit_0runs: 02237 { 02238 unsigned count = 0; 02239 unsigned char run_type = decoder_.get_8(); 02240 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02241 { 02242 unsigned run_length = decoder_.get_16(); 02243 if (run_type) 02244 { 02245 unsigned run_end = j + run_length; 02246 for (;j < run_end; ++j) 02247 { 02248 count += word_bitcount(decoder_.get_32()); 02249 } 02250 } 02251 else 02252 { 02253 j += run_length; 02254 } 02255 } // for 02256 return count; 02257 } 02258 case set_block_bit_interval: 02259 { 02260 unsigned head_idx = decoder_.get_16(); 02261 unsigned tail_idx = decoder_.get_16(); 02262 for (unsigned i = head_idx; i <= tail_idx; ++i) 02263 count += word_bitcount(decoder_.get_32()); 02264 } 02265 break; 02266 case set_block_arrbit: 02267 count += get_arr_bit(0); 02268 break; 02269 case set_block_bit_1bit: 02270 ++count; 02271 decoder_.get_16(); 02272 break; 02273 default: 02274 BM_ASSERT(0); 02275 } // switch 02276 return count; 02277 } 02278 02279 template<class DEC> 02280 unsigned 02281 serial_stream_iterator<DEC>::get_bit_block_COUNT_A(bm::word_t* dst_block, 02282 bm::word_t* /*tmp_block*/) 02283 { 02284 BM_ASSERT(this->state_ == e_bit_block); 02285 unsigned count = 0; 02286 if (dst_block) 02287 { 02288 // count the block bitcount 02289 count = 02290 bit_block_calc_count(dst_block, 02291 dst_block + bm::set_block_size); 02292 } 02293 02294 switch (block_type_) 02295 { 02296 case set_block_bit: 02297 decoder_.get_32(0, bm::set_block_size); 02298 break; 02299 case set_block_bit_0runs: 02300 { 02301 unsigned char run_type = decoder_.get_8(); 02302 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02303 { 02304 unsigned run_length = decoder_.get_16(); 02305 if (run_type) 02306 { 02307 unsigned run_end = j + run_length; 02308 for (;j < run_end; ++j) 02309 { 02310 decoder_.get_32(); 02311 } 02312 } 02313 else 02314 { 02315 j += run_length; 02316 } 02317 } // for 02318 } 02319 break; 02320 02321 case set_block_bit_interval: 02322 { 02323 unsigned head_idx = decoder_.get_16(); 02324 unsigned tail_idx = decoder_.get_16(); 02325 for (unsigned i = head_idx; i <= tail_idx; ++i) 02326 decoder_.get_32(); 02327 } 02328 break; 02329 case set_block_arrbit: 02330 get_arr_bit(0); 02331 break; 02332 case set_block_bit_1bit: 02333 decoder_.get_16(); 02334 break; 02335 default: 02336 BM_ASSERT(0); 02337 } // switch 02338 return count; 02339 } 02340 02341 02342 template<class DEC> 02343 unsigned 02344 serial_stream_iterator<DEC>::get_bit_block_COUNT_AND(bm::word_t* dst_block, 02345 bm::word_t* tmp_block) 02346 { 02347 BM_ASSERT(this->state_ == e_bit_block); 02348 BM_ASSERT(dst_block); 02349 02350 unsigned count = 0; 02351 switch (block_type_) 02352 { 02353 case set_block_bit: 02354 for (unsigned i = 0; i < bm::set_block_size; ++i) 02355 count += word_bitcount(dst_block[i] & decoder_.get_32()); 02356 break; 02357 case set_block_bit_0runs: 02358 { 02359 unsigned count = 0; 02360 unsigned char run_type = decoder_.get_8(); 02361 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02362 { 02363 unsigned run_length = decoder_.get_16(); 02364 if (run_type) 02365 { 02366 unsigned run_end = j + run_length; 02367 for (;j < run_end; ++j) 02368 { 02369 count += word_bitcount(dst_block[j] & decoder_.get_32()); 02370 } 02371 } 02372 else 02373 { 02374 j += run_length; 02375 } 02376 } // for 02377 return count; 02378 } 02379 case set_block_bit_interval: 02380 { 02381 unsigned head_idx = decoder_.get_16(); 02382 unsigned tail_idx = decoder_.get_16(); 02383 for (unsigned i = head_idx; i <= tail_idx; ++i) 02384 count += word_bitcount(dst_block[i] & decoder_.get_32()); 02385 } 02386 break; 02387 case set_block_bit_1bit: 02388 // TODO: optimization 02389 case set_block_arrbit: 02390 get_arr_bit(tmp_block, true /*clear target*/); 02391 count += 02392 bit_operation_and_count(dst_block, dst_block + bm::set_block_size, 02393 tmp_block); 02394 break; 02395 default: 02396 BM_ASSERT(0); 02397 } // switch 02398 return count; 02399 } 02400 02401 template<class DEC> 02402 unsigned 02403 serial_stream_iterator<DEC>::get_bit_block_COUNT_OR(bm::word_t* dst_block, 02404 bm::word_t* tmp_block) 02405 { 02406 BM_ASSERT(this->state_ == e_bit_block); 02407 BM_ASSERT(dst_block); 02408 02409 bitblock_sum_adapter count_adapter; 02410 switch (block_type_) 02411 { 02412 case set_block_bit: 02413 { 02414 bitblock_get_adapter ga(dst_block); 02415 bit_COUNT_OR<bm::word_t> func; 02416 02417 bit_recomb(ga, 02418 decoder_, 02419 func, 02420 count_adapter 02421 ); 02422 } 02423 break; 02424 case set_block_bit_0runs: 02425 { 02426 unsigned count = 0; 02427 unsigned char run_type = decoder_.get_8(); 02428 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02429 { 02430 unsigned run_length = decoder_.get_16(); 02431 unsigned run_end = j + run_length; 02432 if (run_type) 02433 { 02434 for (;j < run_end; ++j) 02435 { 02436 BM_ASSERT(j < bm::set_block_size); 02437 count += word_bitcount(dst_block[j] | decoder_.get_32()); 02438 } 02439 } 02440 else 02441 { 02442 for (;j < run_end; ++j) 02443 { 02444 BM_ASSERT(j < bm::set_block_size); 02445 count += word_bitcount(dst_block[j]); 02446 } 02447 } 02448 } // for 02449 return count; 02450 } 02451 case set_block_bit_interval: 02452 { 02453 unsigned head_idx = decoder_.get_16(); 02454 unsigned tail_idx = decoder_.get_16(); 02455 unsigned count = 0; 02456 unsigned i; 02457 for (i = 0; i < head_idx; ++i) 02458 count += word_bitcount(dst_block[i]); 02459 for (i = head_idx; i <= tail_idx; ++i) 02460 count += word_bitcount(dst_block[i] | decoder_.get_32()); 02461 for (i = tail_idx + 1; i < bm::set_block_size; ++i) 02462 count += word_bitcount(dst_block[i]); 02463 return count; 02464 } 02465 case set_block_bit_1bit: 02466 // TODO: optimization 02467 case set_block_arrbit: 02468 get_arr_bit(tmp_block, true /* clear target*/); 02469 return 02470 bit_operation_or_count(dst_block, 02471 dst_block + bm::set_block_size, 02472 tmp_block); 02473 default: 02474 BM_ASSERT(0); 02475 } // switch 02476 return count_adapter.sum(); 02477 } 02478 02479 template<class DEC> 02480 unsigned 02481 serial_stream_iterator<DEC>::get_bit_block_COUNT_XOR(bm::word_t* dst_block, 02482 bm::word_t* tmp_block) 02483 { 02484 BM_ASSERT(this->state_ == e_bit_block); 02485 BM_ASSERT(dst_block); 02486 02487 bitblock_sum_adapter count_adapter; 02488 switch (block_type_) 02489 { 02490 case set_block_bit: 02491 { 02492 bitblock_get_adapter ga(dst_block); 02493 bit_COUNT_XOR<bm::word_t> func; 02494 02495 bit_recomb(ga, 02496 decoder_, 02497 func, 02498 count_adapter 02499 ); 02500 } 02501 break; 02502 case set_block_bit_0runs: 02503 { 02504 unsigned count = 0; 02505 unsigned char run_type = decoder_.get_8(); 02506 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02507 { 02508 unsigned run_length = decoder_.get_16(); 02509 unsigned run_end = j + run_length; 02510 if (run_type) 02511 { 02512 for (;j < run_end; ++j) 02513 { 02514 BM_ASSERT(j < bm::set_block_size); 02515 count += word_bitcount(dst_block[j] ^ decoder_.get_32()); 02516 } 02517 } 02518 else 02519 { 02520 for (;j < run_end; ++j) 02521 { 02522 BM_ASSERT(j < bm::set_block_size); 02523 count += word_bitcount(dst_block[j]); 02524 } 02525 } 02526 } // for 02527 return count; 02528 } 02529 case set_block_bit_interval: 02530 { 02531 unsigned head_idx = decoder_.get_16(); 02532 unsigned tail_idx = decoder_.get_16(); 02533 unsigned count = 0; 02534 unsigned i; 02535 for (i = 0; i < head_idx; ++i) 02536 count += word_bitcount(dst_block[i]); 02537 for (i = head_idx; i <= tail_idx; ++i) 02538 count += word_bitcount(dst_block[i] ^ decoder_.get_32()); 02539 for (i = tail_idx + 1; i < bm::set_block_size; ++i) 02540 count += word_bitcount(dst_block[i]); 02541 return count; 02542 } 02543 case set_block_bit_1bit: 02544 // TODO: optimization 02545 case set_block_arrbit: 02546 get_arr_bit(tmp_block, true /* clear target*/); 02547 return 02548 bit_operation_xor_count(dst_block, 02549 dst_block + bm::set_block_size, 02550 tmp_block); 02551 default: 02552 BM_ASSERT(0); 02553 } // switch 02554 return count_adapter.sum(); 02555 } 02556 02557 template<class DEC> 02558 unsigned 02559 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, 02560 bm::word_t* tmp_block) 02561 { 02562 BM_ASSERT(this->state_ == e_bit_block); 02563 BM_ASSERT(dst_block); 02564 02565 bitblock_sum_adapter count_adapter; 02566 switch (block_type_) 02567 { 02568 case set_block_bit: 02569 { 02570 bitblock_get_adapter ga(dst_block); 02571 bit_COUNT_SUB_AB<bm::word_t> func; 02572 02573 bit_recomb(ga, 02574 decoder_, 02575 func, 02576 count_adapter 02577 ); 02578 } 02579 break; 02580 case set_block_bit_0runs: 02581 { 02582 unsigned count = 0; 02583 unsigned char run_type = decoder_.get_8(); 02584 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02585 { 02586 unsigned run_length = decoder_.get_16(); 02587 unsigned run_end = j + run_length; 02588 if (run_type) 02589 { 02590 for (;j < run_end; ++j) 02591 { 02592 BM_ASSERT(j < bm::set_block_size); 02593 count += word_bitcount(dst_block[j] & ~decoder_.get_32()); 02594 } 02595 } 02596 else 02597 { 02598 for (;j < run_end; ++j) 02599 { 02600 BM_ASSERT(j < bm::set_block_size); 02601 count += word_bitcount(dst_block[j]); 02602 } 02603 } 02604 } // for 02605 return count; 02606 } 02607 case set_block_bit_interval: 02608 { 02609 unsigned head_idx = decoder_.get_16(); 02610 unsigned tail_idx = decoder_.get_16(); 02611 unsigned count = 0; 02612 unsigned i; 02613 for (i = 0; i < head_idx; ++i) 02614 count += word_bitcount(dst_block[i]); 02615 for (i = head_idx; i <= tail_idx; ++i) 02616 count += word_bitcount(dst_block[i] & (~decoder_.get_32())); 02617 for (i = tail_idx + 1; i < bm::set_block_size; ++i) 02618 count += word_bitcount(dst_block[i]); 02619 return count; 02620 } 02621 break; 02622 case set_block_bit_1bit: 02623 //TODO: optimization 02624 case set_block_arrbit: 02625 get_arr_bit(tmp_block, true /* clear target*/); 02626 return 02627 bit_operation_sub_count(dst_block, 02628 dst_block + bm::set_block_size, 02629 tmp_block); 02630 default: 02631 BM_ASSERT(0); 02632 } // switch 02633 return count_adapter.sum(); 02634 } 02635 02636 template<class DEC> 02637 unsigned 02638 serial_stream_iterator<DEC>::get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, 02639 bm::word_t* tmp_block) 02640 { 02641 BM_ASSERT(this->state_ == e_bit_block); 02642 BM_ASSERT(dst_block); 02643 02644 bitblock_sum_adapter count_adapter; 02645 switch (block_type_) 02646 { 02647 case set_block_bit: 02648 { 02649 bitblock_get_adapter ga(dst_block); 02650 bit_COUNT_SUB_BA<bm::word_t> func; 02651 02652 bit_recomb(ga, 02653 decoder_, 02654 func, 02655 count_adapter 02656 ); 02657 } 02658 break; 02659 case set_block_bit_0runs: 02660 { 02661 unsigned count = 0; 02662 unsigned char run_type = decoder_.get_8(); 02663 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type) 02664 { 02665 unsigned run_length = decoder_.get_16(); 02666 unsigned run_end = j + run_length; 02667 if (run_type) 02668 { 02669 for (;j < run_end; ++j) 02670 { 02671 BM_ASSERT(j < bm::set_block_size); 02672 count += word_bitcount(decoder_.get_32() & (~dst_block[j])); 02673 } 02674 } 02675 else 02676 { 02677 j += run_length; 02678 } 02679 } // for 02680 return count; 02681 } 02682 02683 case set_block_bit_interval: 02684 { 02685 unsigned head_idx = decoder_.get_16(); 02686 unsigned tail_idx = decoder_.get_16(); 02687 unsigned count = 0; 02688 unsigned i; 02689 for (i = head_idx; i <= tail_idx; ++i) 02690 count += word_bitcount(decoder_.get_32() & (~dst_block[i])); 02691 return count; 02692 } 02693 break; 02694 case set_block_bit_1bit: 02695 // TODO: optimization 02696 case set_block_arrbit: 02697 get_arr_bit(tmp_block, true /* clear target*/); 02698 return 02699 bit_operation_sub_count(tmp_block, 02700 tmp_block + bm::set_block_size, 02701 dst_block); 02702 default: 02703 BM_ASSERT(0); 02704 } // switch 02705 return count_adapter.sum(); 02706 } 02707 02708 02709 02710 template<class DEC> 02711 unsigned serial_stream_iterator<DEC>::get_arr_bit(bm::word_t* dst_block, 02712 bool clear_target) 02713 { 02714 BM_ASSERT(this->block_type_ == set_block_arrbit || 02715 this->block_type_ == set_block_bit_1bit); 02716 02717 gap_word_t len = decoder_.get_16(); // array length / 1bit_idx 02718 if (dst_block) 02719 { 02720 if (clear_target) 02721 bit_block_set(dst_block, 0); 02722 02723 if (this->block_type_ == set_block_bit_1bit) 02724 { 02725 // len contains idx of 1 bit set 02726 set_bit(dst_block, len); 02727 return 1; 02728 } 02729 02730 for (unsigned k = 0; k < len; ++k) 02731 { 02732 gap_word_t bit_idx = decoder_.get_16(); 02733 set_bit(dst_block, bit_idx); 02734 } 02735 } 02736 else 02737 { 02738 if (this->block_type_ == set_block_bit_1bit) 02739 { 02740 return 1; // nothing to do: len var already consumed 16bits 02741 } 02742 // fwd the decocing stream 02743 decoder_.seek(len * 2); 02744 } 02745 return len; 02746 } 02747 02748 template<class DEC> 02749 unsigned serial_stream_iterator<DEC>::get_bit() 02750 { 02751 BM_ASSERT(this->block_type_ == set_block_bit_1bit); 02752 ++(this->block_idx_); 02753 this->state_ = e_blocks; 02754 02755 return decoder_.get_16(); // 1bit_idx 02756 } 02757 02758 template<class DEC> 02759 void 02760 serial_stream_iterator<DEC>::get_gap_block(bm::gap_word_t* dst_block) 02761 { 02762 BM_ASSERT(this->state_ == e_gap_block || 02763 this->block_type_ == set_block_bit_1bit); 02764 BM_ASSERT(dst_block); 02765 02766 read_gap_block(this->decoder_, 02767 this->block_type_, 02768 dst_block, 02769 this->gap_head_); 02770 02771 ++(this->block_idx_); 02772 this->state_ = e_blocks; 02773 } 02774 02775 02776 template<class DEC> 02777 unsigned 02778 serial_stream_iterator<DEC>::get_bit_block(bm::word_t* dst_block, 02779 bm::word_t* tmp_block, 02780 set_operation op) 02781 { 02782 BM_ASSERT(this->state_ == e_bit_block); 02783 get_bit_func_type bit_func = bit_func_table_[op]; 02784 BM_ASSERT(bit_func); 02785 unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block); 02786 this->state_ = e_blocks; 02787 ++(this->block_idx_); 02788 return cnt; 02789 } 02790 02791 02792 02793 template<class BV> 02794 unsigned operation_deserializer<BV>::deserialize( 02795 bvector_type& bv, 02796 const unsigned char* buf, 02797 bm::word_t* temp_block, 02798 set_operation op) 02799 { 02800 ByteOrder bo_current = globals<true>::byte_order(); 02801 02802 bm::decoder dec(buf); 02803 unsigned char header_flag = dec.get_8(); 02804 ByteOrder bo = bo_current; 02805 if (!(header_flag & BM_HM_NO_BO)) 02806 { 02807 bo = (bm::ByteOrder) dec.get_8(); 02808 } 02809 02810 blocks_manager_type& bman = bv.get_blocks_manager(); 02811 bit_block_guard<blocks_manager_type> bg(bman); 02812 if (temp_block == 0) 02813 { 02814 temp_block = bg.allocate(); 02815 } 02816 02817 if (bo_current == bo) 02818 { 02819 serial_stream_current ss(buf); 02820 return 02821 iterator_deserializer<BV, serial_stream_current>:: 02822 deserialize(bv, ss, temp_block, op); 02823 } 02824 switch (bo_current) 02825 { 02826 case BigEndian: 02827 { 02828 serial_stream_be ss(buf); 02829 return 02830 iterator_deserializer<BV, serial_stream_be>:: 02831 deserialize(bv, ss, temp_block, op); 02832 } 02833 case LittleEndian: 02834 { 02835 serial_stream_le ss(buf); 02836 return 02837 iterator_deserializer<BV, serial_stream_le>:: 02838 deserialize(bv, ss, temp_block, op); 02839 } 02840 default: 02841 BM_ASSERT(0); 02842 }; 02843 return 0; 02844 } 02845 02846 02847 template<class BV> 02848 void operation_deserializer<BV>::deserialize( 02849 bvector_type& bv_target, 02850 const bvector_type& bv_mask, 02851 const unsigned char* buf, 02852 bm::word_t* temp_block, 02853 set_operation op) 02854 { 02855 ByteOrder bo_current = globals<true>::byte_order(); 02856 02857 bm::decoder dec(buf); 02858 unsigned char header_flag = dec.get_8(); 02859 ByteOrder bo = bo_current; 02860 if (!(header_flag & BM_HM_NO_BO)) 02861 { 02862 bo = (bm::ByteOrder) dec.get_8(); 02863 } 02864 02865 blocks_manager_type& bman = bv_target.get_blocks_manager(); 02866 bit_block_guard<blocks_manager_type> bg(bman); 02867 if (temp_block == 0) 02868 { 02869 temp_block = bg.allocate(); 02870 } 02871 02872 if (bo_current == bo) 02873 { 02874 serial_stream_current ss(buf); 02875 iterator_deserializer<BV, serial_stream_current>:: 02876 deserialize(bv_target, bv_mask, ss, temp_block, op); 02877 return; 02878 } 02879 switch (bo_current) 02880 { 02881 case BigEndian: 02882 { 02883 serial_stream_be ss(buf); 02884 iterator_deserializer<BV, serial_stream_be>:: 02885 deserialize(bv_target, bv_mask, ss, temp_block, op); 02886 } 02887 case LittleEndian: 02888 { 02889 serial_stream_le ss(buf); 02890 iterator_deserializer<BV, serial_stream_le>:: 02891 deserialize(bv_target, bv_mask, ss, temp_block, op); 02892 } 02893 default: 02894 BM_ASSERT(0); 02895 }; 02896 } 02897 02898 02899 template<class BV, class SerialIterator> 02900 void iterator_deserializer<BV, SerialIterator>::load_id_list( 02901 bvector_type& bv, 02902 serial_iterator_type& sit, 02903 unsigned id_count, 02904 bool set_clear) 02905 { 02906 const unsigned win_size = 64; 02907 bm::id_t id_buffer[win_size+1]; 02908 02909 if (set_clear) // set bits 02910 { 02911 for (unsigned i = 0; i <= id_count;) 02912 { 02913 unsigned j; 02914 for (j = 0; j < win_size && i <= id_count; ++j, ++i) 02915 { 02916 id_buffer[j] = sit.get_id(); 02917 sit.next(); 02918 } // for j 02919 bm::combine_or(bv, id_buffer, id_buffer + j); 02920 } // for i 02921 } 02922 else // clear bits 02923 { 02924 for (unsigned i = 0; i <= id_count;) 02925 { 02926 unsigned j; 02927 for (j = 0; j < win_size && i <= id_count; ++j, ++i) 02928 { 02929 id_buffer[j] = sit.get_id(); 02930 sit.next(); 02931 } // for j 02932 bm::combine_sub(bv, id_buffer, id_buffer + j); 02933 } // for i 02934 } 02935 } 02936 02937 template<class BV, class SerialIterator> 02938 unsigned 02939 iterator_deserializer<BV, SerialIterator>::finalize_target_vector( 02940 blocks_manager_type& bman, 02941 set_operation op, 02942 unsigned bv_block_idx) 02943 { 02944 unsigned count = 0; 02945 switch (op) 02946 { 02947 case set_OR: case set_SUB: case set_XOR: 02948 case set_COUNT: case set_COUNT_B: case set_COUNT_AND: 02949 case set_COUNT_SUB_BA: 02950 // nothing to do 02951 break; 02952 case set_AND: case set_ASSIGN: 02953 // clear the rest of the target vector 02954 { 02955 unsigned i, j; 02956 bman.get_block_coord(bv_block_idx, &i, &j); 02957 bm::word_t*** blk_root = bman.get_rootblock(); 02958 unsigned effective_top_size = 02959 bman.effective_top_block_size(); 02960 for (;i < effective_top_size; ++i) 02961 { 02962 bm::word_t** blk_blk = blk_root[i]; 02963 if (blk_blk == 0) 02964 { 02965 bv_block_idx+=bm::set_array_size-j; 02966 j = 0; 02967 continue; 02968 } 02969 for (;j < bm::set_array_size; ++j, ++bv_block_idx) 02970 { 02971 if (blk_blk[j]) 02972 bman.zero_block(bv_block_idx); 02973 } // for j 02974 j = 0; 02975 } // for i 02976 02977 } 02978 break; 02979 case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR: 02980 case set_COUNT_SUB_AB: 02981 // count bits in the target vector 02982 { 02983 unsigned i, j; 02984 bman.get_block_coord(bv_block_idx, &i, &j); 02985 bm::word_t*** blk_root = bman.get_rootblock(); 02986 unsigned effective_top_size = 02987 bman.effective_top_block_size(); 02988 for (;i < effective_top_size; ++i) 02989 { 02990 bm::word_t** blk_blk = blk_root[i]; 02991 if (blk_blk == 0) 02992 { 02993 bv_block_idx+=bm::set_array_size-j; 02994 j = 0; 02995 continue; 02996 } 02997 for (;j < bm::set_array_size; ++j, ++bv_block_idx) 02998 { 02999 if (blk_blk[j]) 03000 count += bman.block_bitcount(blk_blk[j]);//, bv_block_idx); 03001 } // for j 03002 j = 0; 03003 } // for i 03004 03005 } 03006 break; 03007 default: 03008 BM_ASSERT(0); 03009 } 03010 return count; 03011 } 03012 03013 template<class BV, class SerialIterator> 03014 unsigned 03015 iterator_deserializer<BV, SerialIterator>::process_id_list( 03016 bvector_type& bv, 03017 serial_iterator_type& sit, 03018 set_operation op) 03019 { 03020 unsigned count = 0; 03021 unsigned id_count = sit.get_id_count(); 03022 bool set_clear = true; 03023 switch (op) 03024 { 03025 case set_AND: 03026 { 03027 // TODO: use some more optimal algorithm without temp vector 03028 BV bv_tmp(BM_GAP); 03029 load_id_list(bv_tmp, sit, id_count, true); 03030 bv &= bv_tmp; 03031 } 03032 break; 03033 case set_ASSIGN: 03034 bv.clear(true); 03035 // intentional case fall through here (not a bug) 03036 case set_OR: 03037 set_clear = true; 03038 // fall to SUB 03039 case set_SUB: 03040 load_id_list(bv, sit, id_count, set_clear); 03041 break; 03042 case set_XOR: 03043 for (unsigned i = 0; i < id_count; ++i) 03044 { 03045 bm::id_t id = sit.get_id(); 03046 bv[id] ^= true; 03047 sit.next(); 03048 } // for 03049 break; 03050 case set_COUNT: case set_COUNT_B: 03051 for (unsigned i = 0; i < id_count; ++i) 03052 { 03053 /* bm::id_t id = */ sit.get_id(); 03054 ++count; 03055 sit.next(); 03056 } // for 03057 break; 03058 case set_COUNT_A: 03059 return bv.count(); 03060 case set_COUNT_AND: 03061 for (unsigned i = 0; i < id_count; ++i) 03062 { 03063 bm::id_t id = sit.get_id(); 03064 count += bv.get_bit(id); 03065 sit.next(); 03066 } // for 03067 break; 03068 case set_COUNT_XOR: 03069 { 03070 // TODO: get rid of the temp vector 03071 BV bv_tmp(BM_GAP); 03072 load_id_list(bv_tmp, sit, id_count, true); 03073 count += count_xor(bv, bv_tmp); 03074 } 03075 break; 03076 case set_COUNT_OR: 03077 { 03078 // TODO: get rid of the temp. vector 03079 BV bv_tmp(BM_GAP); 03080 load_id_list(bv_tmp, sit, id_count, true); 03081 count += count_or(bv, bv_tmp); 03082 } 03083 break; 03084 case set_COUNT_SUB_AB: 03085 { 03086 // TODO: get rid of the temp. vector 03087 BV bv_tmp(bv); 03088 load_id_list(bv_tmp, sit, id_count, false); 03089 count += bv_tmp.count(); 03090 } 03091 break; 03092 default: 03093 BM_ASSERT(0); 03094 } // switch 03095 03096 return count; 03097 } 03098 03099 03100 template<class BV, class SerialIterator> 03101 void iterator_deserializer<BV, SerialIterator>::deserialize( 03102 bvector_type& bv_target, 03103 const bvector_type& bv_mask, 03104 serial_iterator_type& sit, 03105 bm::word_t* temp_block, 03106 set_operation op) 03107 { 03108 BM_ASSERT(temp_block); 03109 BM_ASSERT(op == bm::set_AND || 03110 op == bm::set_OR || op == bm::set_XOR || op == bm::set_SUB); 03111 03112 gap_word_t gap_temp_block[bm::gap_equiv_len*3]; 03113 gap_temp_block[0] = 0; 03114 03115 bv_target.clear(true); // clear and free the memory 03116 03117 const blocks_manager_type& bman_mask = bv_mask.get_blocks_manager(); 03118 blocks_manager_type& bman_target = bv_target.get_blocks_manager(); 03119 03120 unsigned bv_size = sit.bv_size(); 03121 if (bv_mask.size() > bv_size) 03122 { 03123 bv_size = bv_mask.size(); 03124 } 03125 if (bv_target.size() < bv_size) 03126 { 03127 bv_target.resize(bv_size); 03128 } 03129 03130 unsigned top_blocks = bman_mask.effective_top_block_size(); 03131 03132 BM_SET_MMX_GUARD 03133 03134 typename serial_iterator_type::iterator_state state; 03135 state = sit.get_state(); 03136 if (state == serial_iterator_type::e_list_ids) 03137 { 03138 bv_target = bv_mask; 03139 process_id_list(bv_target, sit, op); 03140 return; 03141 } 03142 03143 unsigned bv_block_idx = 0; 03144 for (;1;) 03145 { 03146 bv_block_idx = sit.block_idx(); 03147 // early exit check to avoid over-scan 03148 { 03149 unsigned tb_idx = bv_block_idx >> bm::set_array_shift; // current top block 03150 if (tb_idx > top_blocks) 03151 { 03152 if (op == bm::set_AND) 03153 break; 03154 if (sit.is_eof()) 03155 break; 03156 } 03157 } 03158 03159 if (sit.is_eof()) // argument stream ended 03160 { 03161 if (op == bm::set_AND) 03162 break; 03163 // (OR, SUB, XOR) need to scan fwd until mask vector ends 03164 state = serial_iterator_type::e_zero_blocks; 03165 } 03166 else 03167 { 03168 state = sit.state(); 03169 } 03170 03171 switch (state) 03172 { 03173 case serial_iterator_type::e_blocks: 03174 sit.next(); 03175 continue; 03176 case serial_iterator_type::e_bit_block: 03177 { 03178 bm::set_operation sop = op; 03179 const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx); 03180 bm::word_t* blk_target = 0; 03181 if (!blk_mask) 03182 { 03183 switch (op) 03184 { 03185 case set_AND: case set_SUB: 03186 // first arg is 0, so the operation gives us zero 03187 // all we need to do is to seek the input stream 03188 sop = set_ASSIGN; 03189 break; 03190 case set_OR: case set_XOR: 03191 blk_target = bman_target.make_bit_block(bv_block_idx); 03192 break; 03193 default: 03194 BM_ASSERT(0); 03195 } 03196 } 03197 else // block exists 03198 { 03199 int is_gap = BM_IS_GAP(blk_mask); 03200 blk_target = bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap); 03201 } 03202 03203 // 2 bit blocks recombination 03204 sit.get_bit_block(blk_target, temp_block, sop); 03205 } 03206 break; 03207 03208 case serial_iterator_type::e_zero_blocks: 03209 { 03210 if (op == set_AND) 03211 { 03212 sit.skip_mono_blocks(); 03213 break; 03214 } 03215 sit.next(); 03216 // set_SUB: set_OR: set_XOR: 03217 bman_target.copy_block(bv_block_idx, bman_mask); 03218 } 03219 break; 03220 03221 case serial_iterator_type::e_one_blocks: 03222 { 03223 BM_ASSERT(bv_block_idx == sit.block_idx()); 03224 const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx); 03225 sit.next(); 03226 03227 switch (op) 03228 { 03229 case set_OR: 03230 bman_target.set_block_all_set(bv_block_idx); 03231 break; 03232 case set_SUB: 03233 break; 03234 case set_AND: 03235 bman_target.copy_block(bv_block_idx, bman_mask); 03236 break; 03237 case set_XOR: 03238 if (blk_mask) 03239 { 03240 int is_gap = BM_IS_GAP(blk_mask); 03241 bm::word_t* blk_target = 03242 bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap); 03243 bit_block_xor(blk_target, FULL_BLOCK_ADDR); 03244 } 03245 else 03246 { 03247 // 0 XOR 1 = 1 03248 bman_target.set_block_all_set(bv_block_idx); 03249 } 03250 break; 03251 default: 03252 BM_ASSERT(0); 03253 } // switch 03254 03255 } 03256 break; 03257 03258 case serial_iterator_type::e_gap_block: 03259 { 03260 // Single bit-in-block optimization 03261 if (sit.get_block_type() == set_block_bit_1bit) 03262 { 03263 if (op == set_AND) 03264 { 03265 unsigned bit_idx = sit.get_bit(); 03266 unsigned bn = (bv_block_idx << bm::set_block_shift) | bit_idx; 03267 bool bval_mask = bv_mask.test(bn); 03268 bv_target.set_bit(bn, bval_mask); 03269 break; 03270 } 03271 } 03272 03273 const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx); 03274 03275 sit.get_gap_block(gap_temp_block); 03276 // gap_word_t gap_head = gap_temp_block[0]; 03277 03278 unsigned len = gap_length(gap_temp_block); 03279 int level = gap_calc_level(len, bman_target.glen()); 03280 --len; 03281 03282 03283 if (!blk_mask) 03284 { 03285 if (op == set_OR || op == set_XOR) 03286 { 03287 bman_target.set_gap_block(bv_block_idx, gap_temp_block, level); 03288 } 03289 } 03290 else // mask block exists 03291 { 03292 03293 bm::operation bop = bm::setop2op(op); 03294 bman_target.copy_block(bv_block_idx, bman_mask); 03295 bv_target.combine_operation_with_block( 03296 bv_block_idx, 03297 (bm::word_t*)gap_temp_block, 03298 1, // GAP 03299 bop); 03300 } 03301 03302 } 03303 break; 03304 03305 default: 03306 BM_ASSERT(0); 03307 } // switch 03308 03309 03310 } // for (deserialization) 03311 03312 bv_target.forget_count(); 03313 } 03314 03315 03316 03317 template<class BV, class SerialIterator> 03318 unsigned 03319 iterator_deserializer<BV, SerialIterator>::deserialize( 03320 bvector_type& bv, 03321 serial_iterator_type& sit, 03322 bm::word_t* temp_block, 03323 set_operation op) 03324 { 03325 BM_ASSERT(temp_block); 03326 03327 unsigned count = 0; 03328 gap_word_t gap_temp_block[bm::gap_equiv_len*3]; 03329 gap_temp_block[0] = 0; 03330 03331 blocks_manager_type& bman = bv.get_blocks_manager(); 03332 03333 bv.forget_count(); 03334 if (sit.bv_size() && (sit.bv_size() > bv.size())) 03335 { 03336 bv.resize(sit.bv_size()); 03337 } 03338 03339 BM_SET_MMX_GUARD 03340 03341 typename serial_iterator_type::iterator_state state; 03342 state = sit.get_state(); 03343 if (state == serial_iterator_type::e_list_ids) 03344 { 03345 count = process_id_list(bv, sit, op); 03346 return count; 03347 } 03348 03349 unsigned bv_block_idx = 0; 03350 03351 for (;1;) 03352 { 03353 bm::set_operation sop = op; 03354 if (sit.is_eof()) // argument stream ended 03355 { 03356 count += finalize_target_vector(bman, op, bv_block_idx); 03357 return count; 03358 } 03359 03360 state = sit.state(); 03361 switch (state) 03362 { 03363 case serial_iterator_type::e_blocks: 03364 sit.next(); 03365 continue; 03366 case serial_iterator_type::e_bit_block: 03367 { 03368 BM_ASSERT(sit.block_idx() == bv_block_idx); 03369 bm::word_t* blk = bman.get_block(bv_block_idx); 03370 03371 if (!blk) 03372 { 03373 switch (op) 03374 { 03375 case set_AND: case set_SUB: case set_COUNT_AND: 03376 case set_COUNT_SUB_AB: case set_COUNT_A: 03377 // one arg is 0, so the operation gives us zero 03378 // all we need to do is to seek the input stream 03379 sop = set_ASSIGN; 03380 break; 03381 03382 case set_OR: case set_XOR: case set_ASSIGN: 03383 blk = bman.make_bit_block(bv_block_idx); 03384 break; 03385 03386 case set_COUNT: case set_COUNT_XOR: case set_COUNT_OR: 03387 case set_COUNT_SUB_BA: case set_COUNT_B: 03388 // first arg is not required (should work as is) 03389 sop = set_COUNT; 03390 break; 03391 03392 case set_END: 03393 default: 03394 BM_ASSERT(0); 03395 } 03396 } 03397 else // block exists 03398 { 03399 int is_gap = BM_IS_GAP(blk); 03400 if (is_gap || IS_FULL_BLOCK(blk)) 03401 { 03402 if (IS_FULL_BLOCK(blk) && is_const_set_operation(op)) 03403 { 03404 } 03405 else 03406 { 03407 // TODO: make sure const operations do not 03408 // deoptimize GAP blocks 03409 blk = bman.deoptimize_block(bv_block_idx); 03410 } 03411 } 03412 } 03413 03414 // 2 bit blocks recombination 03415 unsigned c = sit.get_bit_block(blk, temp_block, sop); 03416 count += c; 03417 } 03418 break; 03419 03420 case serial_iterator_type::e_zero_blocks: 03421 { 03422 BM_ASSERT(bv_block_idx == sit.block_idx()); 03423 bm::word_t* blk = bman.get_block(bv_block_idx); 03424 sit.next(); 03425 03426 if (blk) 03427 { 03428 switch (op) 03429 { 03430 case set_AND: case set_ASSIGN: 03431 // the result is 0 03432 blk = bman.zero_block(bv_block_idx); 03433 break; 03434 03435 case set_SUB: case set_COUNT_AND: case set_OR: 03436 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B: 03437 // nothing to do 03438 break; 03439 03440 case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR: 03441 case set_COUNT: case set_COUNT_XOR: 03442 // valid bit block recombined with 0 block 03443 // results with same block data 03444 // all we need is to bitcount bv block 03445 { 03446 unsigned c = bman.block_bitcount(blk);//, bv_block_idx); 03447 count += c; 03448 } 03449 break; 03450 case set_END: 03451 default: 03452 BM_ASSERT(0); 03453 } // switch op 03454 } // if blk 03455 } 03456 break; 03457 03458 case serial_iterator_type::e_one_blocks: 03459 { 03460 BM_ASSERT(bv_block_idx == sit.block_idx()); 03461 bm::word_t* blk = bman.get_block(bv_block_idx); 03462 sit.next(); 03463 03464 switch (op) 03465 { 03466 case set_OR: case set_ASSIGN: 03467 bman.set_block_all_set(bv_block_idx); 03468 break; 03469 case set_COUNT_OR: case set_COUNT_B: case set_COUNT: 03470 // block becomes all set 03471 count += bm::bits_in_block; 03472 break; 03473 case set_SUB: 03474 blk = bman.zero_block(bv_block_idx); 03475 break; 03476 case set_COUNT_SUB_AB: case set_AND: 03477 // nothing to do 03478 break; 03479 case set_COUNT_AND: case set_COUNT_A: 03480 count += bman.block_bitcount(blk);//, bv_block_idx); 03481 break; 03482 default: 03483 if (blk) 03484 { 03485 switch (op) 03486 { 03487 case set_XOR: 03488 blk = bman.deoptimize_block(bv_block_idx); 03489 bit_block_xor(blk, FULL_BLOCK_ADDR); 03490 break; 03491 case set_COUNT_XOR: 03492 { 03493 count += 03494 combine_count_operation_with_block( 03495 blk, 03496 FULL_BLOCK_ADDR, 03497 bm::COUNT_XOR); 03498 } 03499 break; 03500 case set_COUNT_SUB_BA: 03501 { 03502 count += 03503 combine_count_operation_with_block( 03504 blk, 03505 FULL_BLOCK_ADDR, 03506 bm::COUNT_SUB_BA); 03507 } 03508 break; 03509 default: 03510 BM_ASSERT(0); 03511 } // switch 03512 } 03513 else // blk == 0 03514 { 03515 switch (op) 03516 { 03517 case set_XOR: 03518 // 0 XOR 1 = 1 03519 bman.set_block_all_set(bv_block_idx); 03520 break; 03521 case set_COUNT_XOR: 03522 count += bm::bits_in_block; 03523 break; 03524 case set_COUNT_SUB_BA: 03525 // 1 - 0 = 1 03526 count += bm::bits_in_block; 03527 break; 03528 default: 03529 break; 03530 } // switch 03531 } // else 03532 } // switch 03533 03534 } 03535 break; 03536 03537 case serial_iterator_type::e_gap_block: 03538 { 03539 BM_ASSERT(bv_block_idx == sit.block_idx()); 03540 bm::word_t* blk = bman.get_block(bv_block_idx); 03541 03542 sit.get_gap_block(gap_temp_block); 03543 03544 unsigned len = gap_length(gap_temp_block); 03545 int level = gap_calc_level(len, bman.glen()); 03546 --len; 03547 03548 bool const_op = is_const_set_operation(op); 03549 if (const_op) 03550 { 03551 distance_metric metric = operation2metric(op); 03552 bm::word_t* gptr = (bm::word_t*)gap_temp_block; 03553 BMSET_PTRGAP(gptr); 03554 unsigned c = 03555 combine_count_operation_with_block( 03556 blk, 03557 gptr, 03558 metric); 03559 count += c; 03560 } 03561 else // non-const set operation 03562 { 03563 if ((sop == set_ASSIGN) && blk) // target block override 03564 { 03565 blk = bman.zero_block(bv_block_idx); 03566 sop = set_OR; 03567 } 03568 if (blk == 0) // target block not found 03569 { 03570 switch (sop) 03571 { 03572 case set_AND: case set_SUB: 03573 break; 03574 case set_OR: case set_XOR: case set_ASSIGN: 03575 bman.set_gap_block( 03576 bv_block_idx, gap_temp_block, level); 03577 break; 03578 default: 03579 BM_ASSERT(0); 03580 } // switch 03581 } 03582 else // target block exists 03583 { 03584 bm::operation bop = bm::setop2op(op); 03585 if (level == -1) // too big to GAP 03586 { 03587 gap_convert_to_bitset(temp_block, gap_temp_block); 03588 bv.combine_operation_with_block(bv_block_idx, 03589 temp_block, 03590 0, // BIT 03591 bop); 03592 } 03593 else // GAP fits 03594 { 03595 set_gap_level(gap_temp_block, level); 03596 bv.combine_operation_with_block( 03597 bv_block_idx, 03598 (bm::word_t*)gap_temp_block, 03599 1, // GAP 03600 bop); 03601 } 03602 } 03603 } 03604 } 03605 break; 03606 03607 default: 03608 BM_ASSERT(0); 03609 } // switch 03610 03611 ++bv_block_idx; 03612 03613 } // for (deserialization) 03614 03615 return count; 03616 } 03617 03618 03619 03620 03621 } // namespace bm 03622 03623 #include "bmundef.h" 03624 03625 #ifdef _MSC_VER 03626 #pragma warning( pop ) 03627 #endif 03628 03629 03630 #endif