dlvhex
2.5.0
|
00001 #ifndef BMFUNC__H__INCLUDED__ 00002 #define BMFUNC__H__INCLUDED__ 00003 /* 00004 Copyright(c) 2002-2010 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com) 00005 00006 Permission is hereby granted, free of charge, to any person 00007 obtaining a copy of this software and associated documentation 00008 files (the "Software"), to deal in the Software without restriction, 00009 including without limitation the rights to use, copy, modify, merge, 00010 publish, distribute, sublicense, and/or sell copies of the Software, 00011 and to permit persons to whom the Software is furnished to do so, 00012 subject to the following conditions: 00013 00014 The above copyright notice and this permission notice shall be included 00015 in all copies or substantial portions of the Software. 00016 00017 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 00019 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00020 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 00021 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 00022 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 00023 OTHER DEALINGS IN THE SOFTWARE. 00024 00025 For more information please visit: http://bmagic.sourceforge.net 00026 00027 */ 00028 00029 #include <memory.h> 00030 00031 #include "bmdef.h" 00032 #include "bmutil.h" 00033 00034 00035 #ifdef _MSC_VER 00036 # pragma warning( disable: 4146 ) 00037 #endif 00038 00039 namespace bm 00040 { 00041 00042 // added by PS: forward required by clang 00043 inline 00044 bm::id_t bit_block_any_range(const bm::word_t* block, 00045 bm::word_t left, 00046 bm::word_t right); 00047 // added by PS: forward required by clang 00048 inline 00049 bm::id_t bit_block_calc_count_range(const bm::word_t* block, 00050 bm::word_t left, 00051 bm::word_t right); 00052 00053 00059 struct bv_statistics 00060 { 00062 unsigned bit_blocks; 00064 unsigned gap_blocks; 00066 unsigned max_serialize_mem; 00068 unsigned memory_used; 00070 gap_word_t gap_length[bm::set_total_blocks]; 00072 gap_word_t gap_levels[bm::gap_levels]; 00073 00074 00075 00077 void add_bit_block() 00078 { 00079 ++bit_blocks; 00080 unsigned mem_used = sizeof(bm::word_t) * bm::set_block_size; 00081 memory_used += mem_used; 00082 max_serialize_mem += mem_used; 00083 } 00084 00086 void add_gap_block(unsigned capacity, unsigned length) 00087 { 00088 ++gap_blocks; 00089 unsigned mem_used = capacity * sizeof(gap_word_t); 00090 memory_used += mem_used; 00091 max_serialize_mem += length * sizeof(gap_word_t); 00092 } 00093 }; 00094 00095 00113 template<bool T> struct gap_len_table 00114 { 00115 static const gap_word_t _len[bm::gap_levels]; 00116 }; 00117 00118 template<bool T> 00119 const gap_word_t gap_len_table<T>::_len[bm::gap_levels] = 00120 { 128, 256, 512, bm::gap_max_buff_len }; 00121 00122 00128 template<bool T> struct gap_len_table_min 00129 { 00130 static const gap_word_t _len[bm::gap_levels]; 00131 }; 00132 00133 template<bool T> 00134 const gap_word_t gap_len_table_min<T>::_len[bm::gap_levels] = 00135 { 32, 96, 128, 512 }; 00136 00137 00138 00139 //--------------------------------------------------------------------- 00140 00144 template<bool T> struct block_set_table 00145 { 00146 static const unsigned _left[32]; 00147 static const unsigned _right[32]; 00148 }; 00149 00150 template<bool T> 00151 const unsigned block_set_table<T>::_left[32] = { 00152 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff, 0x7ff, 00153 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff, 0x1ffff, 0x3ffff, 0x7ffff, 00154 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff, 0xffffff, 0x1ffffff, 0x3ffffff, 00155 0x7ffffff, 0xfffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff 00156 }; 00157 00158 template<bool T> 00159 const unsigned block_set_table<T>::_right[32] = { 00160 0xffffffff, 0xfffffffe, 0xfffffffc, 0xfffffff8, 0xfffffff0, 00161 0xffffffe0, 0xffffffc0, 0xffffff80, 0xffffff00, 0xfffffe00, 00162 0xfffffc00, 0xfffff800, 0xfffff000, 0xffffe000, 0xffffc000, 00163 0xffff8000, 0xffff0000, 0xfffe0000, 0xfffc0000, 0xfff80000, 00164 0xfff00000, 0xffe00000, 0xffc00000, 0xff800000, 0xff000000, 00165 0xfe000000, 0xfc000000, 0xf8000000, 0xf0000000, 0xe0000000, 00166 0xc0000000, 0x80000000 00167 }; 00168 00169 00170 00175 BMFORCEINLINE 00176 bm::id_t word_bitcount(bm::id_t w) 00177 { 00178 #ifdef BMSSE4OPT 00179 return _mm_popcnt_u32(w); 00180 #else 00181 return 00182 bm::bit_count_table<true>::_count[(unsigned char)(w)] + 00183 bm::bit_count_table<true>::_count[(unsigned char)((w) >> 8)] + 00184 bm::bit_count_table<true>::_count[(unsigned char)((w) >> 16)] + 00185 bm::bit_count_table<true>::_count[(unsigned char)((w) >> 24)]; 00186 #endif 00187 } 00188 00189 inline 00190 int parallel_popcnt_32(unsigned int n) 00191 { 00192 unsigned int tmp; 00193 00194 tmp = n - ((n >> 1) & 033333333333) 00195 - ((n >> 2) & 011111111111); 00196 return ((tmp + (tmp >> 3)) & 030707070707) % 63; 00197 } 00198 00199 #ifdef BM64OPT 00200 00205 inline 00206 int word_bitcount64(bm::id64_t x) 00207 { 00208 x = x - ((x >> 1) & 0x5555555555555555); 00209 x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333); 00210 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F; 00211 x = x + (x >> 8); 00212 x = x + (x >> 16); 00213 x = x + (x >> 32); 00214 return x & 0xFF; 00215 } 00216 00217 inline 00218 unsigned bitcount64_4way(bm::id64_t x, bm::id64_t y, 00219 bm::id64_t u, bm::id64_t v) 00220 { 00221 const bm::id64_t m1 = 0x5555555555555555; 00222 const bm::id64_t m2 = 0x3333333333333333; 00223 const bm::id64_t m3 = 0x0F0F0F0F0F0F0F0F; 00224 const bm::id64_t m4 = 0x000000FF000000FF; 00225 00226 x = x - ((x >> 1) & m1); 00227 y = y - ((y >> 1) & m1); 00228 u = u - ((u >> 1) & m1); 00229 v = v - ((v >> 1) & m1); 00230 x = (x & m2) + ((x >> 2) & m2); 00231 y = (y & m2) + ((y >> 2) & m2); 00232 u = (u & m2) + ((u >> 2) & m2); 00233 v = (v & m2) + ((v >> 2) & m2); 00234 x = x + y; 00235 u = u + v; 00236 x = (x & m3) + ((x >> 4) & m3); 00237 u = (u & m3) + ((u >> 4) & m3); 00238 x = x + u; 00239 x = x + (x >> 8); 00240 x = x + (x >> 16); 00241 x = x & m4; 00242 x = x + (x >> 32); 00243 return x & 0x000001FF; 00244 } 00245 00246 00247 #endif 00248 00249 00250 00251 //--------------------------------------------------------------------- 00252 00256 enum set_operation 00257 { 00258 set_AND = 0, 00259 set_OR = 1, 00260 set_SUB = 2, 00261 set_XOR = 3, 00262 set_ASSIGN = 4, 00263 set_COUNT = 5, 00264 set_COUNT_AND = 6, 00265 set_COUNT_XOR = 7, 00266 set_COUNT_OR = 8, 00267 set_COUNT_SUB_AB= 9, 00268 set_COUNT_SUB_BA= 10, 00269 set_COUNT_A = 11, 00270 set_COUNT_B = 12, 00271 00272 set_END 00273 }; 00274 00276 inline 00277 bool is_const_set_operation(set_operation op) 00278 { 00279 return (int(op) >= int(set_COUNT)); 00280 } 00281 00285 enum operation 00286 { 00287 BM_AND = set_AND, 00288 BM_OR = set_OR, 00289 BM_SUB = set_SUB, 00290 BM_XOR = set_XOR 00291 }; 00292 00296 inline 00297 bm::operation setop2op(bm::set_operation op) 00298 { 00299 BM_ASSERT(op == set_AND || 00300 op == set_OR || 00301 op == set_SUB || 00302 op == set_XOR); 00303 return (bm::operation) op; 00304 } 00305 00306 //--------------------------------------------------------------------- 00307 00312 template<bool T> struct all_set 00313 { 00314 struct BM_ALIGN16 all_set_block 00315 { 00316 bm::word_t _p[bm::set_block_size] BM_ALIGN16ATTR; 00317 00318 all_set_block() 00319 { 00320 ::memset(_p, 0xFF, sizeof(_p)); 00321 } 00322 }; 00323 00324 static all_set_block _block; 00325 }; 00326 00327 00328 template<bool T> typename all_set<T>::all_set_block all_set<T>::_block; 00329 00331 template<typename W> 00332 void xor_swap(W& x, W& y) 00333 { 00334 BM_ASSERT(&x != &y); 00335 x ^= y; 00336 y ^= x; 00337 x ^= y; 00338 } 00339 00340 00341 //--------------------------------------------------------------------- 00342 00352 template<typename T> int wordcmp0(T w1, T w2) 00353 { 00354 while (w1 != w2) 00355 { 00356 int res = (w1 & 1) - (w2 & 1); 00357 if (res != 0) return res; 00358 w1 >>= 1; 00359 w2 >>= 1; 00360 } 00361 return 0; 00362 } 00363 00364 00374 /* 00375 template<typename T> int wordcmp(T w1, T w2) 00376 { 00377 T diff = w1 ^ w2; 00378 return diff ? ((w1 & diff & (diff ^ (diff - 1)))? 1 : -1) : 0; 00379 } 00380 */ 00381 00382 template<typename T> int wordcmp(T a, T b) 00383 { 00384 T diff = a ^ b; 00385 return diff? ( (a & diff & -diff)? 1 : -1 ) : 0; 00386 } 00387 00388 00389 // Low bit extraction 00390 // x & (x ^ (x-1)) 00391 00392 00396 template<bool T> struct _copyright 00397 { 00398 static const char _p[]; 00399 }; 00400 00401 template<bool T> const char _copyright<T>::_p[] = 00402 "BitMagic C++ Library. v.3.7.0 (c) 2002-2010 Anatoliy Kuznetsov."; 00403 00404 00408 enum ByteOrder 00409 { 00410 BigEndian = 0, 00411 LittleEndian = 1 00412 }; 00413 00414 00418 template<bool T> struct globals 00419 { 00420 struct bo 00421 { 00422 ByteOrder _byte_order; 00423 00424 bo() 00425 { 00426 unsigned x; 00427 unsigned char *s = (unsigned char *)&x; 00428 s[0] = 1; 00429 s[1] = 2; 00430 s[2] = 3; 00431 s[3] = 4; 00432 00433 if(x == 0x04030201) 00434 { 00435 _byte_order = LittleEndian; 00436 return; 00437 } 00438 00439 if(x == 0x01020304) 00440 { 00441 _byte_order = BigEndian; 00442 return; 00443 } 00444 00445 BM_ASSERT(0); // "Invalid Byte Order\n" 00446 _byte_order = LittleEndian; 00447 } 00448 }; 00449 00450 static bo _bo; 00451 00452 static ByteOrder byte_order() { return _bo._byte_order; } 00453 00454 }; 00455 00456 template<bool T> typename globals<T>::bo globals<T>::_bo; 00457 00458 00459 00460 00461 00462 00463 /* 00464 \brief Binary search for the block where bit = pos located. 00465 \param buf - GAP buffer pointer. 00466 \param pos - index of the element. 00467 \param is_set - output. GAP value (0 or 1). 00468 \return GAP index. 00469 @ingroup gapfunc 00470 */ 00471 template<typename T> 00472 unsigned gap_bfind(const T* buf, unsigned pos, unsigned* is_set) 00473 { 00474 BM_ASSERT(pos < bm::gap_max_bits); 00475 *is_set = (*buf) & 1; 00476 00477 register unsigned start = 1; 00478 register unsigned end = 1 + ((*buf) >> 3); 00479 00480 while ( start != end ) 00481 { 00482 unsigned curr = (start + end) >> 1; 00483 if ( buf[curr] < pos ) 00484 start = curr + 1; 00485 else 00486 end = curr; 00487 } 00488 *is_set ^= ((start-1) & 1); 00489 return start; 00490 } 00491 00492 00500 template<typename T> unsigned gap_test(const T* buf, unsigned pos) 00501 { 00502 BM_ASSERT(pos < bm::gap_max_bits); 00503 00504 unsigned start = 1; 00505 unsigned end = 1 + ((*buf) >> 3); 00506 00507 if (end - start < 10) 00508 { 00509 unsigned sv = *buf & 1; 00510 unsigned sv1= sv ^ 1; 00511 if (buf[1] >= pos) return sv; 00512 if (buf[2] >= pos) return sv1; 00513 if (buf[3] >= pos) return sv; 00514 if (buf[4] >= pos) return sv1; 00515 if (buf[5] >= pos) return sv; 00516 if (buf[6] >= pos) return sv1; 00517 if (buf[7] >= pos) return sv; 00518 if (buf[8] >= pos) return sv1; 00519 if (buf[9] >= pos) return sv; 00520 BM_ASSERT(0); 00521 } 00522 else 00523 while ( start != end ) 00524 { 00525 unsigned curr = (start + end) >> 1; 00526 if ( buf[curr] < pos ) 00527 start = curr + 1; 00528 else 00529 end = curr; 00530 } 00531 return ((*buf) & 1) ^ ((--start) & 1); 00532 } 00533 00534 00535 00538 template<class T, class F> 00539 void for_each_nzblock(T*** root, unsigned size1, //unsigned size2, 00540 F& f) 00541 { 00542 for (unsigned i = 0; i < size1; ++i) 00543 { 00544 T** blk_blk = root[i]; 00545 if (!blk_blk) 00546 { 00547 f.on_empty_top(i); 00548 continue; 00549 } 00550 00551 unsigned non_empty_top = 0; 00552 unsigned r = i * bm::set_array_size; 00553 for (unsigned j = 0;j < bm::set_array_size; ++j) 00554 { 00555 if (blk_blk[j]) 00556 { 00557 f(blk_blk[j], r + j); 00558 non_empty_top += (blk_blk[j] != 0);// re-check for mutation 00559 } 00560 else 00561 { 00562 f.on_empty_block(r + j); 00563 } 00564 } // for j 00565 if (non_empty_top == 0) 00566 { 00567 f.on_empty_top(i); 00568 } 00569 } // for i 00570 } 00571 00574 template<class T, class F> 00575 void for_each_nzblock2(T*** root, unsigned size1, F& f) 00576 { 00577 for (unsigned i = 0; i < size1; ++i) 00578 { 00579 T** blk_blk; 00580 if ((blk_blk = root[i])!=0) 00581 { 00582 unsigned j = 0; 00583 do 00584 { 00585 if (blk_blk[j]) 00586 f(blk_blk[j]); 00587 if (blk_blk[j+1]) 00588 f(blk_blk[j+1]); 00589 if (blk_blk[j+2]) 00590 f(blk_blk[j+2]); 00591 if (blk_blk[j+3]) 00592 f(blk_blk[j+3]); 00593 j += 4; 00594 } while (j < bm::set_array_size); 00595 } 00596 } // for i 00597 } 00598 00599 00603 template<class T, class F> 00604 bool for_each_nzblock_if(T*** root, unsigned size1, F& f) 00605 { 00606 unsigned block_idx = 0; 00607 for (unsigned i = 0; i < size1; ++i) 00608 { 00609 T** blk_blk = root[i]; 00610 if (!blk_blk) 00611 { 00612 block_idx += bm::set_array_size; 00613 continue; 00614 } 00615 00616 for (unsigned j = 0;j < bm::set_array_size; ++j, ++block_idx) 00617 { 00618 if (blk_blk[j]) 00619 if (f(blk_blk[j], block_idx)) return true; 00620 } 00621 } 00622 return false; 00623 } 00624 00627 template<class T, class F> 00628 void for_each_block(T*** root, unsigned size1, F& f) 00629 { 00630 unsigned block_idx = 0; 00631 00632 for (unsigned i = 0; i < size1; ++i) 00633 { 00634 T** blk_blk = root[i]; 00635 00636 if (blk_blk) 00637 { 00638 for (unsigned j = 0;j < bm::set_array_size; ++j, ++block_idx) 00639 { 00640 f(blk_blk[j], block_idx); 00641 } 00642 } 00643 else 00644 { 00645 for (unsigned j = 0;j < bm::set_array_size; ++j, ++block_idx) 00646 { 00647 f(0, block_idx); 00648 } 00649 } 00650 } 00651 } 00652 00653 00654 00657 template<class T, class F> F bmfor_each(T first, T last, F f) 00658 { 00659 do 00660 { 00661 f(*first); 00662 ++first; 00663 } while (first < last); 00664 return f; 00665 } 00666 00669 template<class T> T sum_arr(T* first, T* last) 00670 { 00671 T sum = 0; 00672 while (first < last) 00673 { 00674 sum += *first; 00675 ++first; 00676 } 00677 return sum; 00678 } 00679 00680 00681 00689 template<typename T> unsigned gap_bit_count(const T* buf, unsigned dsize=0) 00690 { 00691 register const T* pcurr = buf; 00692 if (dsize == 0) 00693 dsize = (*pcurr >> 3); 00694 00695 register const T* pend = pcurr + dsize; 00696 00697 register unsigned bits_counter = 0; 00698 ++pcurr; 00699 00700 if (*buf & 1) 00701 { 00702 bits_counter += *pcurr + 1; 00703 ++pcurr; 00704 } 00705 ++pcurr; // set GAP to 1 00706 00707 while (pcurr <= pend) 00708 { 00709 bits_counter += *pcurr - *(pcurr-1); 00710 pcurr += 2; // jump to the next positive GAP 00711 } 00712 00713 return bits_counter; 00714 } 00715 00716 00724 template<typename T> 00725 unsigned gap_bit_count_range(const T* buf, T left, T right) 00726 { 00727 BM_ASSERT(left <= right); 00728 00729 const T* pcurr = buf; 00730 const T* pend = pcurr + (*pcurr >> 3); 00731 00732 unsigned bits_counter = 0; 00733 unsigned is_set; 00734 unsigned start_pos = gap_bfind(buf, left, &is_set); 00735 00736 pcurr = buf + start_pos; 00737 if (right <= *pcurr) // we are in the target block right now 00738 { 00739 if (is_set) 00740 bits_counter = (right - left + 1); 00741 return bits_counter; 00742 } 00743 if (is_set) 00744 bits_counter += *pcurr - left + 1; 00745 00746 unsigned prev_gap = *pcurr++; 00747 is_set ^= 1; 00748 while (right > *pcurr) 00749 { 00750 if (is_set) 00751 bits_counter += *pcurr - prev_gap; 00752 if (pcurr == pend) 00753 return bits_counter; 00754 prev_gap = *pcurr++; 00755 is_set ^= 1; 00756 } 00757 if (is_set) 00758 bits_counter += right - prev_gap; 00759 00760 return bits_counter; 00761 } 00762 00771 template<class T, class Func> 00772 void for_each_dgap(const T* gap_buf, Func& func) 00773 { 00774 const T* pcurr = gap_buf; 00775 const T* pend = pcurr + (*pcurr >> 3); 00776 ++pcurr; 00777 00778 T prev = *pcurr; 00779 func(prev + 1); // first element incremented to avoid 0 00780 ++pcurr; 00781 do 00782 { 00783 func(*pcurr - prev); // all others are [N] - [N-1] 00784 prev = *pcurr; 00785 } while (++pcurr < pend); 00786 } 00787 00791 template<typename T> struct d_copy_func 00792 { 00793 d_copy_func(T* dg_buf) : dgap_buf_(dg_buf) {} 00794 void operator()(T dgap) { *dgap_buf_++ = dgap; } 00795 00796 T* dgap_buf_; 00797 }; 00798 00812 template<typename T> 00813 T* gap_2_dgap(const T* gap_buf, T* dgap_buf, bool copy_head=true) 00814 { 00815 if (copy_head) // copy GAP header 00816 { 00817 *dgap_buf++ = *gap_buf; 00818 } 00819 00820 d_copy_func<T> copy_func(dgap_buf); 00821 for_each_dgap<T, d_copy_func<T> >(gap_buf, copy_func); 00822 return copy_func.dgap_buf_; 00823 } 00824 00836 template<typename T> 00837 void dgap_2_gap(const T* dgap_buf, T* gap_buf, T gap_header=0) 00838 { 00839 const T* pcurr = dgap_buf; 00840 unsigned len; 00841 if (!gap_header) // GAP header is already part of the stream 00842 { 00843 len = *pcurr >> 3; 00844 *gap_buf++ = *pcurr++; // copy GAP header 00845 } 00846 else // GAP header passed as a parameter 00847 { 00848 len = gap_header >> 3; 00849 *gap_buf++ = gap_header; // assign GAP header 00850 } 00851 --len; // last element is actually not encoded 00852 register const T* pend = pcurr + len; 00853 00854 *gap_buf = *pcurr++; // copy first element 00855 if (*gap_buf == 0) 00856 *gap_buf = 65535; // fix +1 overflow 00857 else 00858 *gap_buf = *gap_buf - 1; 00859 00860 for (++gap_buf; pcurr < pend; ++pcurr) 00861 { 00862 T prev = *(gap_buf-1); // don't remove temp(undef expression!) 00863 *gap_buf++ = *pcurr + prev; 00864 } 00865 *gap_buf = 65535; // add missing last element 00866 } 00867 00868 00877 template<typename T> int gapcmp(const T* buf1, const T* buf2) 00878 { 00879 const T* pcurr1 = buf1; 00880 const T* pend1 = pcurr1 + (*pcurr1 >> 3); 00881 unsigned bitval1 = *buf1 & 1; 00882 ++pcurr1; 00883 00884 const T* pcurr2 = buf2; 00885 unsigned bitval2 = *buf2 & 1; 00886 ++pcurr2; 00887 00888 while (pcurr1 <= pend1) 00889 { 00890 if (*pcurr1 == *pcurr2) 00891 { 00892 if (bitval1 != bitval2) 00893 { 00894 return (bitval1) ? 1 : -1; 00895 } 00896 } 00897 else 00898 { 00899 if (bitval1 == bitval2) 00900 { 00901 if (bitval1) 00902 { 00903 return (*pcurr1 < *pcurr2) ? -1 : 1; 00904 } 00905 else 00906 { 00907 return (*pcurr1 < *pcurr2) ? 1 : -1; 00908 } 00909 } 00910 else 00911 { 00912 return (bitval1) ? 1 : -1; 00913 } 00914 } 00915 00916 ++pcurr1; ++pcurr2; 00917 00918 bitval1 ^= 1; 00919 bitval2 ^= 1; 00920 } 00921 00922 return 0; 00923 } 00924 00925 00943 template<typename T, class F> 00944 void gap_buff_op(T* BMRESTRICT dest, 00945 const T* BMRESTRICT vect1, 00946 unsigned vect1_mask, 00947 const T* BMRESTRICT vect2, 00948 unsigned vect2_mask, 00949 F& f, 00950 unsigned& dlen) 00951 { 00952 register const T* cur1 = vect1; 00953 register const T* cur2 = vect2; 00954 00955 T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask); 00956 T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask); 00957 00958 T bitval = (T) f(bitval1, bitval2); 00959 T bitval_prev = bitval; 00960 00961 register T* res = dest; 00962 *res = bitval; 00963 ++res; 00964 00965 while (1) 00966 { 00967 bitval = (T) f(bitval1, bitval2); 00968 00969 // Check if GAP value changes and we need to 00970 // start the next one. 00971 if (bitval != bitval_prev) 00972 { 00973 ++res; 00974 bitval_prev = bitval; 00975 } 00976 00977 if (*cur1 < *cur2) 00978 { 00979 *res = *cur1; 00980 ++cur1; 00981 bitval1 ^= 1; 00982 } 00983 else // >= 00984 { 00985 *res = *cur2; 00986 if (*cur2 < *cur1) 00987 { 00988 bitval2 ^= 1; 00989 } 00990 else // equal 00991 { 00992 if (*cur2 == (bm::gap_max_bits - 1)) 00993 { 00994 break; 00995 } 00996 00997 ++cur1; 00998 bitval1 ^= 1; 00999 bitval2 ^= 1; 01000 } 01001 ++cur2; 01002 } 01003 01004 } // while 01005 01006 dlen = (unsigned)(res - dest); 01007 *dest = (T)((*dest & 7) + (dlen << 3)); 01008 01009 } 01010 01025 template<typename T, class F> 01026 unsigned gap_buff_any_op(const T* BMRESTRICT vect1, 01027 unsigned vect1_mask, 01028 const T* BMRESTRICT vect2, 01029 unsigned vect2_mask, 01030 F f) 01031 { 01032 register const T* cur1 = vect1; 01033 register const T* cur2 = vect2; 01034 01035 unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask; 01036 unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask; 01037 01038 unsigned bitval = f(bitval1, bitval2); 01039 if (bitval) 01040 return bitval; 01041 unsigned bitval_prev = bitval; 01042 01043 while (1) 01044 { 01045 bitval = f(bitval1, bitval2); 01046 if (bitval) 01047 return bitval; 01048 01049 if (bitval != bitval_prev) 01050 bitval_prev = bitval; 01051 01052 if (*cur1 < *cur2) 01053 { 01054 ++cur1; 01055 bitval1 ^= 1; 01056 } 01057 else // >= 01058 { 01059 if (*cur2 < *cur1) 01060 { 01061 bitval2 ^= 1; 01062 } 01063 else // equal 01064 { 01065 if (*cur2 == (bm::gap_max_bits - 1)) 01066 { 01067 break; 01068 } 01069 01070 ++cur1; 01071 bitval1 ^= 1; 01072 bitval2 ^= 1; 01073 } 01074 ++cur2; 01075 } 01076 01077 } // while 01078 01079 return 0; 01080 } 01081 01082 01083 01094 template<typename T, class F> 01095 unsigned gap_buff_count_op(const T* vect1, const T* vect2, F f) 01096 { 01097 unsigned count;// = 0; 01098 const T* cur1 = vect1; 01099 const T* cur2 = vect2; 01100 01101 unsigned bitval1 = (*cur1++ & 1); 01102 unsigned bitval2 = (*cur2++ & 1); 01103 unsigned bitval = count = f(bitval1, bitval2); 01104 unsigned bitval_prev = bitval; 01105 01106 //if (bitval) ++count; 01107 01108 T res, res_prev; 01109 res = res_prev = 0; 01110 01111 while (1) 01112 { 01113 bitval = f(bitval1, bitval2); 01114 01115 // Check if GAP value changes and we need to 01116 // start the next one. 01117 if (bitval != bitval_prev) 01118 { 01119 bitval_prev = bitval; 01120 res_prev = res; 01121 } 01122 01123 if (*cur1 < *cur2) 01124 { 01125 res = *cur1; 01126 if (bitval) 01127 { 01128 count += res - res_prev; 01129 res_prev = res; 01130 } 01131 ++cur1; 01132 bitval1 ^= 1; 01133 } 01134 else // >= 01135 { 01136 res = *cur2; 01137 if (bitval) 01138 { 01139 count += res - res_prev; 01140 res_prev = res; 01141 } 01142 if (*cur2 < *cur1) 01143 { 01144 bitval2 ^= 1; 01145 } 01146 else // equal 01147 { 01148 if (*cur2 == (bm::gap_max_bits - 1)) 01149 { 01150 break; 01151 } 01152 01153 ++cur1; 01154 bitval1 ^= 1; 01155 bitval2 ^= 1; 01156 } 01157 ++cur2; 01158 } 01159 01160 } // while 01161 01162 return count; 01163 } 01164 01165 01166 01179 template<typename T> unsigned gap_set_value(unsigned val, 01180 T* BMRESTRICT buf, 01181 unsigned pos, 01182 unsigned* BMRESTRICT is_set) 01183 { 01184 BM_ASSERT(pos < bm::gap_max_bits); 01185 unsigned curr = gap_bfind(buf, pos, is_set); 01186 01187 register T end = (*buf >> 3); 01188 if (*is_set == val) 01189 { 01190 *is_set = 0; 01191 return end; 01192 } 01193 *is_set = 1; 01194 01195 register T* pcurr = buf + curr; 01196 register T* pprev = pcurr - 1; 01197 register T* pend = buf + end; 01198 01199 // Special case, first bit GAP operation. There is no platform beside it. 01200 // initial flag must be inverted. 01201 if (pos == 0) 01202 { 01203 *buf ^= 1; 01204 if ( buf[1] ) // We need to insert a 1 bit platform here. 01205 { 01206 ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t)); 01207 buf[1] = 0; 01208 ++end; 01209 } 01210 else // Only 1 bit in the GAP. We need to delete the first GAP. 01211 { 01212 pprev = buf + 1; 01213 pcurr = pprev + 1; 01214 do 01215 { 01216 *pprev++ = *pcurr++; 01217 } while (pcurr < pend); 01218 --end; 01219 } 01220 } 01221 else if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit 01222 { 01223 ++(*pprev); 01224 if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP. 01225 { 01226 --end; 01227 if (pcurr != pend) // GAP merge: 2 GAPS to be deleted 01228 { 01229 --end; 01230 ++pcurr; 01231 do 01232 { 01233 *pprev++ = *pcurr++; 01234 } while (pcurr < pend); 01235 } 01236 } 01237 } 01238 else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left. 01239 { 01240 --(*pcurr); 01241 if (pcurr == pend) 01242 { 01243 ++end; 01244 } 01245 } 01246 else // Worst case we need to split current block. 01247 { 01248 ::memmove(pcurr+2, pcurr,(end - curr + 1)*sizeof(T)); 01249 *pcurr++ = (T)(pos - 1); 01250 *pcurr = (T)pos; 01251 end+=2; 01252 } 01253 01254 // Set correct length word. 01255 *buf = (*buf & 7) + (end << 3); 01256 01257 buf[end] = bm::gap_max_bits - 1; 01258 return end; 01259 } 01260 01271 template<typename T> 01272 unsigned gap_add_value(T* buf, T pos) 01273 { 01274 BM_ASSERT(pos < bm::gap_max_bits); 01275 01276 register T end = (*buf >> 3); 01277 T curr = end; 01278 register T* pcurr = buf + end; 01279 register T* pend = pcurr; 01280 register T* pprev = pcurr - 1; 01281 01282 // Special case, first bit GAP operation. There is no platform beside it. 01283 // initial flag must be inverted. 01284 if (pos == 0) 01285 { 01286 *buf ^= 1; 01287 if ( buf[1] ) // We need to insert a 1 bit platform here. 01288 { 01289 ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t)); 01290 buf[1] = 0; 01291 ++end; 01292 } 01293 else // Only 1 bit in the GAP. We need to delete the first GAP. 01294 { 01295 pprev = buf + 1; 01296 pcurr = pprev + 1; 01297 do 01298 { 01299 *pprev++ = *pcurr++; 01300 } while (pcurr < pend); 01301 --end; 01302 } 01303 } 01304 else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) // Left border bit 01305 { 01306 ++(*pprev); 01307 if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP. 01308 { 01309 --end; 01310 if (pcurr != pend) // GAP merge: 2 GAPS to be deleted 01311 { 01312 // TODO: should never get here... 01313 --end; 01314 ++pcurr; 01315 do 01316 { 01317 *pprev++ = *pcurr++; 01318 } while (pcurr < pend); 01319 } 01320 } 01321 } 01322 else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left. 01323 { 01324 --(*pcurr); 01325 if (pcurr == pend) 01326 { 01327 ++end; 01328 } 01329 } 01330 else // Worst case we need to split current block. 01331 { 01332 *pcurr++ = pos - 1; 01333 *pcurr = pos; 01334 end+=2; 01335 } 01336 01337 // Set correct length word. 01338 *buf = (*buf & 7) + (end << 3); 01339 01340 buf[end] = bm::gap_max_bits - 1; 01341 return end; 01342 } 01343 01356 template<typename T> 01357 unsigned gap_set_array(T* buf, const T* arr, unsigned len) 01358 { 01359 *buf = (*buf & 6u) + (1u << 3); // gap header setup 01360 01361 T* pcurr = buf + 1; 01362 01363 unsigned i = 0; 01364 T curr = arr[i]; 01365 if (curr != 0) // need to add the first gap: (0 to arr[0]-1) 01366 { 01367 *pcurr = curr - 1; 01368 ++pcurr; 01369 } 01370 else 01371 { 01372 *buf += 1; // GAP starts with 1 01373 } 01374 T prev = curr; 01375 T acc = prev; 01376 01377 for (i = 1; i < len; ++i) 01378 { 01379 T curr = arr[i]; 01380 if (curr == prev + 1) 01381 { 01382 ++acc; 01383 prev = curr; 01384 } 01385 else 01386 { 01387 *pcurr++ = acc; 01388 acc = curr; 01389 *pcurr++ = curr-1; 01390 } 01391 prev = curr; 01392 } 01393 *pcurr = acc; 01394 if (acc != bm::gap_max_bits - 1) 01395 { 01396 ++pcurr; 01397 *pcurr = bm::gap_max_bits - 1; 01398 } 01399 01400 unsigned end = pcurr - buf; 01401 01402 *buf = (T)((*buf & 7) + (end << 3)); 01403 return end+1; 01404 } 01405 01406 01407 //------------------------------------------------------------------------ 01408 01416 template<typename T> 01417 unsigned bit_array_compute_gaps(const T* arr, 01418 unsigned len) 01419 { 01420 unsigned gap_count = 1; 01421 T prev = arr[0]; 01422 if (prev > 0) 01423 ++gap_count; 01424 for (unsigned i = 1; i < len; ++i) 01425 { 01426 T curr = arr[i]; 01427 if (curr != prev + 1) 01428 { 01429 gap_count += 2; 01430 } 01431 prev = curr; 01432 } 01433 return gap_count; 01434 } 01435 01436 01437 //------------------------------------------------------------------------ 01438 01447 template<typename T> int gap_find_in_block(const T* buf, 01448 unsigned nbit, 01449 bm::id_t* prev) 01450 { 01451 BM_ASSERT(nbit < bm::gap_max_bits); 01452 01453 unsigned bitval; 01454 unsigned gap_idx = bm::gap_bfind(buf, nbit, &bitval); 01455 01456 if (bitval) // positive block. 01457 { 01458 return 1; 01459 } 01460 01461 register unsigned val = buf[gap_idx] + 1; 01462 *prev += val - nbit; 01463 01464 return (val != bm::gap_max_bits); // no bug here. 01465 } 01466 01472 BMFORCEINLINE void set_bit(unsigned* dest, unsigned bitpos) 01473 { 01474 unsigned nbit = unsigned(bitpos & bm::set_block_mask); 01475 unsigned nword = unsigned(nbit >> bm::set_word_shift); 01476 nbit &= bm::set_word_mask; 01477 dest[nword] |= unsigned(1 << nbit); 01478 } 01479 01485 BMFORCEINLINE unsigned test_bit(const unsigned* block, unsigned bitpos) 01486 { 01487 unsigned nbit = unsigned(bitpos & bm::set_block_mask); 01488 unsigned nword = unsigned(nbit >> bm::set_word_shift); 01489 nbit &= bm::set_word_mask; 01490 return (block[nword] >> nbit) & 1u; 01491 } 01492 01493 01502 inline void or_bit_block(unsigned* dest, 01503 unsigned bitpos, 01504 unsigned bitcount) 01505 { 01506 unsigned nbit = unsigned(bitpos & bm::set_block_mask); 01507 unsigned nword = unsigned(nbit >> bm::set_word_shift); 01508 nbit &= bm::set_word_mask; 01509 01510 bm::word_t* word = dest + nword; 01511 01512 if (bitcount == 1) // special case (only 1 bit to set) 01513 { 01514 *word |= unsigned(1 << nbit); 01515 return; 01516 } 01517 01518 if (nbit) // starting position is not aligned 01519 { 01520 unsigned right_margin = nbit + bitcount; 01521 01522 // here we checking if we setting bits only in the current 01523 // word. Example: 00111000000000000000000000000000 (32 bits word) 01524 01525 if (right_margin < 32) 01526 { 01527 unsigned mask = 01528 block_set_table<true>::_right[nbit] & 01529 block_set_table<true>::_left[right_margin-1]; 01530 *word |= mask; 01531 return; // we are done 01532 } 01533 else 01534 { 01535 *word |= block_set_table<true>::_right[nbit]; 01536 bitcount -= 32 - nbit; 01537 } 01538 ++word; 01539 } 01540 01541 // now we are word aligned, lets find out how many words we 01542 // can now turn ON using loop 01543 01544 for ( ;bitcount >= 32; bitcount -= 32) 01545 { 01546 *word++ = 0xffffffff; 01547 } 01548 01549 if (bitcount) 01550 { 01551 *word |= block_set_table<true>::_left[bitcount-1]; 01552 } 01553 } 01554 01555 01564 inline void sub_bit_block(unsigned* dest, 01565 unsigned bitpos, 01566 unsigned bitcount) 01567 { 01568 unsigned nbit = unsigned(bitpos & bm::set_block_mask); 01569 unsigned nword = unsigned(nbit >> bm::set_word_shift); 01570 nbit &= bm::set_word_mask; 01571 01572 bm::word_t* word = dest + nword; 01573 01574 if (bitcount == 1) // special case (only 1 bit to set) 01575 { 01576 *word &= ~unsigned(1 << nbit); 01577 return; 01578 } 01579 01580 if (nbit) // starting position is not aligned 01581 { 01582 unsigned right_margin = nbit + bitcount; 01583 01584 // here we checking if we setting bits only in the current 01585 // word. Example: 00111000000000000000000000000000 (32 bits word) 01586 01587 if (right_margin < 32) 01588 { 01589 unsigned mask = 01590 block_set_table<true>::_right[nbit] & 01591 block_set_table<true>::_left[right_margin-1]; 01592 *word &= ~mask; 01593 return; // we are done 01594 } 01595 else 01596 { 01597 *word &= ~block_set_table<true>::_right[nbit]; 01598 bitcount -= 32 - nbit; 01599 } 01600 ++word; 01601 } 01602 01603 // now we are word aligned, lets find out how many words we 01604 // can now turn ON using loop 01605 01606 for ( ;bitcount >= 32; bitcount -= 32) 01607 { 01608 *word++ = 0; 01609 } 01610 01611 if (bitcount) 01612 { 01613 *word &= ~block_set_table<true>::_left[bitcount-1]; 01614 } 01615 } 01616 01617 01626 inline void xor_bit_block(unsigned* dest, 01627 unsigned bitpos, 01628 unsigned bitcount) 01629 { 01630 unsigned nbit = unsigned(bitpos & bm::set_block_mask); 01631 unsigned nword = unsigned(nbit >> bm::set_word_shift); 01632 nbit &= bm::set_word_mask; 01633 01634 bm::word_t* word = dest + nword; 01635 01636 if (bitcount == 1) // special case (only 1 bit to set) 01637 { 01638 *word ^= unsigned(1 << nbit); 01639 return; 01640 } 01641 01642 if (nbit) // starting position is not aligned 01643 { 01644 unsigned right_margin = nbit + bitcount; 01645 01646 // here we checking if we setting bits only in the current 01647 // word. Example: 00111000000000000000000000000000 (32 bits word) 01648 01649 if (right_margin < 32) 01650 { 01651 unsigned mask = 01652 block_set_table<true>::_right[nbit] & 01653 block_set_table<true>::_left[right_margin-1]; 01654 *word ^= mask; 01655 return; // we are done 01656 } 01657 else 01658 { 01659 *word ^= block_set_table<true>::_right[nbit]; 01660 bitcount -= 32 - nbit; 01661 } 01662 ++word; 01663 } 01664 01665 // now we are word aligned, lets find out how many words we 01666 // can now turn ON using loop 01667 01668 for ( ;bitcount >= 32; bitcount -= 32) 01669 { 01670 *word++ ^= 0xffffffff; 01671 } 01672 01673 if (bitcount) 01674 { 01675 *word ^= block_set_table<true>::_left[bitcount-1]; 01676 } 01677 } 01678 01679 01687 template<typename T> 01688 void gap_sub_to_bitset(unsigned* BMRESTRICT dest, const T* BMRESTRICT buf) 01689 { 01690 const T* pend = buf + (*buf >> 3); 01691 T b = *buf & 1; 01692 ++buf; 01693 01694 if (b) // Starts with 1 01695 { 01696 sub_bit_block(dest, 0, *buf + 1); 01697 ++buf; 01698 } 01699 for (++buf; buf <= pend; buf += 2) 01700 { 01701 T prev = *(buf-1); 01702 BM_ASSERT(*buf > prev); 01703 sub_bit_block(dest, prev + 1, *buf - prev); 01704 } 01705 } 01706 01707 01715 template<typename T> 01716 void gap_xor_to_bitset(unsigned* BMRESTRICT dest, const T* BMRESTRICT buf) 01717 { 01718 const T* pend = buf + (*buf >> 3); 01719 T b = *buf & 1; 01720 ++buf; 01721 01722 if (b) // Starts with 1 01723 { 01724 xor_bit_block(dest, 0, *buf + 1); 01725 ++buf; 01726 } 01727 for (++buf; buf <= pend; buf += 2) 01728 { 01729 T prev = *(buf-1); 01730 BM_ASSERT(*buf > prev); 01731 xor_bit_block(dest, prev + 1, *buf - prev); 01732 } 01733 } 01734 01743 template<typename T> 01744 void gap_add_to_bitset_l(unsigned* dest, const T* buf, unsigned buf_len) 01745 { 01746 BM_ASSERT(buf_len); 01747 register const T* pend = buf + buf_len; 01748 T b = *buf & 1; 01749 ++buf; 01750 01751 if (b) // Starts with 1 01752 { 01753 or_bit_block(dest, 0, *buf + 1); 01754 ++buf; 01755 } 01756 for (++buf; buf <= pend; buf += 2) 01757 { 01758 T prev = *(buf-1); 01759 BM_ASSERT(*buf > prev); 01760 or_bit_block(dest, prev + 1, *buf - prev); 01761 } 01762 } 01763 01764 01765 01773 template<typename T> 01774 void gap_add_to_bitset(unsigned* dest, const T* buf) 01775 { 01776 gap_add_to_bitset_l(dest, buf, *buf >> 3); 01777 } 01778 01779 01787 template<typename T> 01788 void gap_and_to_bitset(unsigned* dest, const T* buf) 01789 { 01790 register const T* pend = buf + (*buf >> 3); 01791 T b = *buf & 1; 01792 ++buf; 01793 01794 if (!b ) // Starts with 0 01795 { 01796 // Instead of AND we can SUB 0 gaps here 01797 sub_bit_block(dest, 0, *buf + 1); 01798 ++buf; 01799 } 01800 for (++buf; buf <= pend; buf += 2) 01801 { 01802 T prev = *(buf-1); 01803 BM_ASSERT(*buf > prev); 01804 sub_bit_block(dest, prev + 1, *buf - prev); 01805 } 01806 } 01807 01808 01816 template<typename T> 01817 bm::id_t gap_bitset_and_count(const unsigned* block, const T* buf) 01818 { 01819 BM_ASSERT(block); 01820 01821 const T* pcurr = buf; 01822 const T* pend = pcurr + (*pcurr >> 3); 01823 ++pcurr; 01824 01825 bm::id_t count = 0; 01826 if (*buf & 1) // Starts with 1 01827 { 01828 count += bit_block_calc_count_range(block, 0, *pcurr); 01829 ++pcurr; 01830 } 01831 ++pcurr; // now we are in GAP "1" again 01832 for (;pcurr <= pend; pcurr += 2) 01833 { 01834 count += bit_block_calc_count_range(block, *(pcurr-1)+1, *pcurr); 01835 } 01836 return count; 01837 } 01838 01839 01847 template<typename T> 01848 bm::id_t gap_bitset_and_any(const unsigned* block, const T* buf) 01849 { 01850 BM_ASSERT(block); 01851 01852 register const T* pcurr = buf; 01853 register const T* pend = pcurr + (*pcurr >> 3); 01854 ++pcurr; 01855 01856 bm::id_t count = 0; 01857 if (*buf & 1) // Starts with 1 01858 { 01859 count += bit_block_any_range(block, 0, *pcurr); 01860 if (count) 01861 return count; 01862 ++pcurr; 01863 } 01864 ++pcurr; // now we are in GAP "1" again 01865 01866 while (pcurr <= pend) 01867 { 01868 bm::id_t c = bit_block_any_range(block, *(pcurr-1)+1, *pcurr); 01869 count += c; 01870 if (count) 01871 break; 01872 pcurr += 2; 01873 } 01874 return count; 01875 } 01876 01877 01878 01886 template<typename T> 01887 bm::id_t gap_bitset_sub_count(const unsigned* block, const T* buf) 01888 { 01889 BM_ASSERT(block); 01890 01891 register const T* pcurr = buf; 01892 register const T* pend = pcurr + (*pcurr >> 3); 01893 ++pcurr; 01894 01895 bm::id_t count = 0; 01896 01897 if (!(*buf & 1)) // Starts with 0 01898 { 01899 count += bit_block_calc_count_range(block, 0, *pcurr); 01900 ++pcurr; 01901 } 01902 ++pcurr; // now we are in GAP "0" again 01903 01904 for (;pcurr <= pend; pcurr+=2) 01905 { 01906 count += bit_block_calc_count_range(block, *(pcurr-1)+1, *pcurr); 01907 } 01908 return count; 01909 } 01910 01911 01919 template<typename T> 01920 bm::id_t gap_bitset_sub_any(const unsigned* block, const T* buf) 01921 { 01922 BM_ASSERT(block); 01923 01924 register const T* pcurr = buf; 01925 register const T* pend = pcurr + (*pcurr >> 3); 01926 ++pcurr; 01927 01928 bm::id_t count = 0; 01929 01930 if (!(*buf & 1)) // Starts with 0 01931 { 01932 count += bit_block_any_range(block, 0, *pcurr); 01933 if (count) 01934 return count; 01935 ++pcurr; 01936 } 01937 ++pcurr; // now we are in GAP "0" again 01938 01939 for (;pcurr <= pend; pcurr+=2) 01940 { 01941 count += bit_block_any_range(block, *(pcurr-1)+1, *pcurr); 01942 if (count) 01943 return count; 01944 } 01945 return count; 01946 } 01947 01948 01949 01957 template<typename T> 01958 bm::id_t gap_bitset_xor_count(const unsigned* block, const T* buf) 01959 { 01960 BM_ASSERT(block); 01961 01962 register const T* pcurr = buf; 01963 register const T* pend = pcurr + (*pcurr >> 3); 01964 ++pcurr; 01965 01966 unsigned bitval = *buf & 1; 01967 01968 register bm::id_t count = bit_block_calc_count_range(block, 0, *pcurr); 01969 if (bitval) 01970 { 01971 count = *pcurr + 1 - count; 01972 } 01973 01974 for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr) 01975 { 01976 T prev = *(pcurr-1)+1; 01977 bm::id_t c = bit_block_calc_count_range(block, prev, *pcurr); 01978 01979 if (bitval) // 1 gap; means Result = Total_Bits - BitCount; 01980 { 01981 c = (*pcurr - prev + 1) - c; 01982 } 01983 count += c; 01984 } 01985 return count; 01986 } 01987 01995 template<typename T> 01996 bm::id_t gap_bitset_xor_any(const unsigned* block, const T* buf) 01997 { 01998 BM_ASSERT(block); 01999 02000 register const T* pcurr = buf; 02001 register const T* pend = pcurr + (*pcurr >> 3); 02002 ++pcurr; 02003 02004 unsigned bitval = *buf & 1; 02005 02006 register bm::id_t count = bit_block_any_range(block, 0, *pcurr); 02007 if (bitval) 02008 { 02009 count = *pcurr + 1 - count; 02010 } 02011 02012 for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr) 02013 { 02014 T prev = *(pcurr-1)+1; 02015 bm::id_t c = bit_block_any_range(block, prev, *pcurr); 02016 02017 if (bitval) // 1 gap; means Result = Total_Bits - BitCount; 02018 { 02019 c = (*pcurr - prev + 1) - c; 02020 } 02021 02022 count += c; 02023 if (count) 02024 return count; 02025 } 02026 return count; 02027 } 02028 02029 02030 02038 template<typename T> 02039 bm::id_t gap_bitset_or_count(const unsigned* block, const T* buf) 02040 { 02041 BM_ASSERT(block); 02042 02043 register const T* pcurr = buf; 02044 register const T* pend = pcurr + (*pcurr >> 3); 02045 ++pcurr; 02046 02047 unsigned bitval = *buf & 1; 02048 02049 register bm::id_t count; 02050 if (bitval) 02051 { 02052 count = *pcurr + 1; 02053 } 02054 else 02055 { 02056 count = bit_block_calc_count_range(block, 0, *pcurr); 02057 } 02058 02059 for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr) 02060 { 02061 T prev = *(pcurr-1)+1; 02062 bm::id_t c; 02063 02064 if (bitval) 02065 { 02066 c = (*pcurr - prev + 1); 02067 } 02068 else 02069 { 02070 c = bit_block_calc_count_range(block, prev, *pcurr); 02071 } 02072 02073 count += c; 02074 } 02075 return count; 02076 } 02077 02085 template<typename T> 02086 bm::id_t gap_bitset_or_any(const unsigned* block, const T* buf) 02087 { 02088 BM_ASSERT(block); 02089 02090 register const T* pcurr = buf; 02091 register const T* pend = pcurr + (*pcurr >> 3); 02092 ++pcurr; 02093 02094 unsigned bitval = *buf & 1; 02095 02096 register bm::id_t count; 02097 if (bitval) 02098 { 02099 count = *pcurr + 1; 02100 } 02101 else 02102 { 02103 count = bit_block_any_range(block, 0, *pcurr); 02104 } 02105 02106 for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr) 02107 { 02108 T prev = *(pcurr-1)+1; 02109 bm::id_t c; 02110 02111 if (bitval) 02112 { 02113 c = (*pcurr - prev + 1); 02114 } 02115 else 02116 { 02117 c = bit_block_any_range(block, prev, *pcurr); 02118 } 02119 count += c; 02120 if (count) 02121 return count; 02122 } 02123 return count; 02124 } 02125 02126 02127 02136 inline 02137 void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value) 02138 { 02139 //#ifdef BMVECTOPT 02140 // VECT_SET_BLOCK(dst, dst + bm::set_block_size, value); 02141 //#else 02142 ::memset(dst, value, bm::set_block_size * sizeof(bm::word_t)); 02143 //#endif 02144 } 02145 02146 02154 template<typename T> 02155 void gap_convert_to_bitset(unsigned* dest, const T* buf) 02156 { 02157 bit_block_set(dest, 0); 02158 gap_add_to_bitset(dest, buf); 02159 } 02160 02168 template<typename T> 02169 void gap_convert_to_bitset_l(unsigned* dest, const T* buf, unsigned buf_len) 02170 { 02171 bit_block_set(dest, 0); 02172 gap_add_to_bitset_l(dest, buf, buf_len ? buf_len : *buf >> 3); 02173 } 02174 02175 02176 02185 template<typename T> 02186 void gap_convert_to_bitset(unsigned* dest, const T* buf, unsigned dest_len) 02187 { 02188 ::memset(dest, 0, dest_len * sizeof(unsigned)); 02189 gap_add_to_bitset(dest, buf); 02190 } 02191 02192 02193 02206 template<typename T> 02207 unsigned* gap_convert_to_bitset_smart(unsigned* dest, 02208 const T* buf, 02209 id_t set_max) 02210 { 02211 if (buf[1] == set_max - 1) 02212 { 02213 return (buf[0] & 1) ? FULL_BLOCK_ADDR : 0; 02214 } 02215 02216 gap_convert_to_bitset(dest, buf); 02217 return dest; 02218 } 02219 02220 02229 template<typename T> unsigned gap_control_sum(const T* buf) 02230 { 02231 unsigned end = *buf >> 3; 02232 02233 register const T* pcurr = buf; 02234 register const T* pend = pcurr + (*pcurr >> 3); 02235 ++pcurr; 02236 02237 if (*buf & 1) // Starts with 1 02238 { 02239 ++pcurr; 02240 } 02241 ++pcurr; // now we are in GAP "1" again 02242 02243 while (pcurr <= pend) 02244 { 02245 BM_ASSERT(*pcurr > *(pcurr-1)); 02246 pcurr += 2; 02247 } 02248 return buf[end]; 02249 02250 } 02251 02252 02260 template<class T> void gap_set_all(T* buf, 02261 unsigned set_max, 02262 unsigned value) 02263 { 02264 BM_ASSERT(value == 0 || value == 1); 02265 *buf = (T)((*buf & 6u) + (1u << 3) + value); 02266 *(++buf) = (T)(set_max - 1); 02267 } 02268 02269 02280 template<class T> 02281 void gap_init_range_block(T* buf, 02282 T from, 02283 T to, 02284 T value, 02285 unsigned set_max) 02286 { 02287 BM_ASSERT(value == 0 || value == 1); 02288 02289 unsigned gap_len; 02290 if (from == 0) 02291 { 02292 if (to == set_max - 1) 02293 { 02294 gap_set_all(buf, set_max, value); 02295 } 02296 else 02297 { 02298 gap_len = 2; 02299 buf[1] = to; 02300 buf[2] = (T)(set_max - 1); 02301 buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value); 02302 } 02303 return; 02304 } 02305 // from != 0 02306 02307 value = !value; 02308 if (to == set_max - 1) 02309 { 02310 gap_len = 2; 02311 buf[1] = (T)(from - 1); 02312 buf[2] = (T)(set_max - 1); 02313 } 02314 else 02315 { 02316 gap_len = 3; 02317 buf[1] = from - 1; 02318 buf[2] = (T) to; 02319 buf[3] = (T)(set_max - 1); 02320 } 02321 buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value); 02322 } 02323 02324 02331 template<typename T> void gap_invert(T* buf) 02332 { 02333 *buf ^= 1; 02334 } 02335 02346 /* 02347 template<typename T> void gap_temp_invert(const T* buf) 02348 { 02349 T* buftmp = const_cast<T*>(buf); 02350 *buftmp ^= 1; 02351 } 02352 */ 02353 02362 template<typename T> 02363 bool gap_is_all_zero(const T* buf, unsigned set_max) 02364 { 02365 return (((*buf & 1)==0) && (*(++buf) == set_max - 1)); 02366 } 02367 02376 template<typename T> 02377 bool gap_is_all_one(const T* buf, unsigned set_max) 02378 { 02379 return ((*buf & 1) && (*(++buf) == set_max - 1)); 02380 } 02381 02389 template<typename T> T gap_length(const T* buf) 02390 { 02391 return (*buf >> 3) + 1; 02392 } 02393 02394 02402 template<typename T> 02403 unsigned gap_capacity(const T* buf, const T* glevel_len) 02404 { 02405 return glevel_len[(*buf >> 1) & 3]; 02406 } 02407 02408 02417 template<typename T> 02418 unsigned gap_limit(const T* buf, const T* glevel_len) 02419 { 02420 return glevel_len[(*buf >> 1) & 3]-4; 02421 } 02422 02423 02431 template<typename T> unsigned gap_level(const T* buf) 02432 { 02433 return (*buf >> 1) & 3; 02434 } 02435 02436 02444 template<typename T> 02445 void set_gap_level(T* buf, unsigned level) 02446 { 02447 BM_ASSERT(level < bm::gap_levels); 02448 *buf = ((level & 3) << 1) | (*buf & 1) | (*buf & ~7); 02449 } 02450 02451 02460 template<typename T> 02461 inline int gap_calc_level(int len, const T* glevel_len) 02462 { 02463 if (len <= (glevel_len[0]-4)) return 0; 02464 if (len <= (glevel_len[1]-4)) return 1; 02465 if (len <= (glevel_len[2]-4)) return 2; 02466 if (len <= (glevel_len[3]-4)) return 3; 02467 02468 BM_ASSERT(bm::gap_levels == 4); 02469 return -1; 02470 } 02471 02481 template<typename T> 02482 inline unsigned gap_free_elements(const T* buf, const T* glevel_len) 02483 { 02484 unsigned len = gap_length(buf); 02485 unsigned capacity = gap_capacity(buf, glevel_len); 02486 return capacity - len; 02487 } 02488 02489 02499 template<typename T> 02500 int bitcmp(const T* buf1, const T* buf2, unsigned len) 02501 { 02502 BM_ASSERT(len); 02503 const T* pend1 = buf1 + len; 02504 do 02505 { 02506 T w1 = *buf1++; 02507 T w2 = *buf2++; 02508 T diff = w1 ^ w2; 02509 02510 if (diff) 02511 { 02512 return (w1 & diff & -diff) ? 1 : -1; 02513 } 02514 02515 } while (buf1 < pend1); 02516 02517 return 0; 02518 } 02519 02520 02532 template<typename T> 02533 unsigned bit_convert_to_gap(T* BMRESTRICT dest, 02534 const unsigned* BMRESTRICT src, 02535 bm::id_t bits, 02536 unsigned dest_len) 02537 { 02538 register T* BMRESTRICT pcurr = dest; 02539 T* BMRESTRICT end = dest + dest_len; 02540 register int bitval = (*src) & 1; 02541 // *pcurr |= bitval; 02542 *pcurr = (T)bitval; 02543 02544 ++pcurr; 02545 *pcurr = 0; 02546 register unsigned bit_idx = 0; 02547 register int bitval_next; 02548 02549 unsigned val = *src; 02550 02551 do 02552 { 02553 // We can fast pace if *src == 0 or *src = 0xffffffff 02554 02555 while (val == 0 || val == 0xffffffff) 02556 { 02557 bitval_next = val ? 1 : 0; 02558 if (bitval != bitval_next) 02559 { 02560 *pcurr++ = (T)(bit_idx-1); 02561 BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2)); 02562 if (pcurr >= end) 02563 { 02564 return 0; // OUT of memory 02565 } 02566 bitval = bitval_next; 02567 } 02568 bit_idx += sizeof(*src) * 8; 02569 if (bit_idx >= bits) 02570 { 02571 goto complete; 02572 } 02573 ++src; 02574 val = *src; 02575 } 02576 02577 02578 register unsigned mask = 1; 02579 while (mask) 02580 { 02581 // Now plain bitshifting. TODO: Optimization wanted. 02582 02583 bitval_next = val & mask ? 1 : 0; 02584 if (bitval != bitval_next) 02585 { 02586 *pcurr++ = (T)(bit_idx-1); 02587 BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2)); 02588 bitval = bitval_next; 02589 if (pcurr >= end) 02590 { 02591 return 0; // OUT of memory 02592 } 02593 } 02594 02595 mask <<= 1; 02596 ++bit_idx; 02597 02598 } // while mask 02599 02600 if (bit_idx >= bits) 02601 { 02602 goto complete; 02603 } 02604 02605 ++src; 02606 val = *src; 02607 02608 } while(1); 02609 02610 complete: 02611 *pcurr = (T)(bit_idx-1); 02612 unsigned len = (unsigned)(pcurr - dest); 02613 *dest = (T)((*dest & 7) + (len << 3)); 02614 return len; 02615 } 02616 02617 02622 template<class T, class F> 02623 void for_each_gap_dbit(const T* buf, F& func) 02624 { 02625 const T* pcurr = buf; 02626 const T* pend = pcurr + (*pcurr >> 3); 02627 02628 ++pcurr; 02629 02630 unsigned prev = 0; 02631 unsigned first_inc; 02632 02633 if (*buf & 1) 02634 { 02635 first_inc = 0; 02636 unsigned to = *pcurr; 02637 for (unsigned i = 0; i <= to; ++i) 02638 { 02639 func(1); 02640 } 02641 prev = to; 02642 ++pcurr; 02643 } 02644 else 02645 { 02646 first_inc = 1; 02647 } 02648 ++pcurr; // set GAP to 1 02649 02650 while (pcurr <= pend) 02651 { 02652 unsigned from = *(pcurr-1)+1; 02653 unsigned to = *pcurr; 02654 if (first_inc) 02655 { 02656 func(from - prev + first_inc); 02657 first_inc = 0; 02658 } 02659 else 02660 { 02661 func(from - prev); 02662 } 02663 02664 for (unsigned i = from+1; i <= to; ++i) 02665 { 02666 func(1); 02667 } 02668 prev = to; 02669 pcurr += 2; // jump to the next positive GAP 02670 } 02671 } 02672 02677 template<typename D, typename T> 02678 D gap_convert_to_arr(D* BMRESTRICT dest, 02679 const T* BMRESTRICT buf, 02680 unsigned dest_len, 02681 bool invert = false) 02682 { 02683 register const T* BMRESTRICT pcurr = buf; 02684 register const T* pend = pcurr + (*pcurr >> 3); 02685 02686 D* BMRESTRICT dest_curr = dest; 02687 ++pcurr; 02688 02689 int bitval = (*buf) & 1; 02690 if (invert) 02691 bitval = !bitval; // invert the GAP buffer 02692 02693 if (bitval) 02694 { 02695 if (unsigned(*pcurr + 1) >= dest_len) 02696 return 0; // insufficient space 02697 dest_len -= *pcurr; 02698 T to = *pcurr; 02699 for (T i = 0; ;++i) 02700 { 02701 *dest_curr++ = i; 02702 if (i == to) break; 02703 } 02704 ++pcurr; 02705 } 02706 ++pcurr; // set GAP to 1 02707 02708 while (pcurr <= pend) 02709 { 02710 unsigned pending = *pcurr - *(pcurr-1); 02711 if (pending >= dest_len) 02712 return 0; 02713 dest_len -= pending; 02714 T from = *(pcurr-1)+1; 02715 T to = *pcurr; 02716 for (T i = from; ;++i) 02717 { 02718 *dest_curr++ = i; 02719 if (i == to) break; 02720 } 02721 pcurr += 2; // jump to the next positive GAP 02722 } 02723 return (D) (dest_curr - dest); 02724 } 02725 02726 02727 02736 inline 02737 bm::id_t bit_block_calc_count(const bm::word_t* block, 02738 const bm::word_t* block_end) 02739 { 02740 BM_ASSERT(block < block_end); 02741 bm::id_t count = 0; 02742 02743 #ifdef BMVECTOPT 02744 count = VECT_BITCOUNT(block, block_end); 02745 #else 02746 #ifdef BM64OPT 02747 // 64-bit optimized algorithm. No sparse vect opt. 02748 // instead it uses 4-way parallel pipelined version 02749 02750 const bm::id64_t* b1 = (bm::id64_t*) block; 02751 const bm::id64_t* b2 = (bm::id64_t*) block_end; 02752 do 02753 { 02754 count += bitcount64_4way(b1[0], b1[1], b1[2], b1[3]); 02755 b1 += 4; 02756 } while (b1 < b2); 02757 #else 02758 // For 32 bit code the fastest method is 02759 // to use bitcount table for each byte in the block. 02760 // As optimization for sparse bitsets used bits accumulator 02761 // to collect ON bits using bitwise OR. 02762 bm::word_t acc = *block++; 02763 do 02764 { 02765 bm::word_t in = *block++; 02766 bm::word_t acc_prev = acc; 02767 acc |= in; 02768 if (acc_prev &= in) // accumulator miss: counting bits 02769 { 02770 BM_INCWORD_BITCOUNT(count, acc); 02771 acc = acc_prev; 02772 } 02773 } while (block < block_end); 02774 02775 BM_INCWORD_BITCOUNT(count, acc); // count-in remaining accumulator 02776 02777 #endif 02778 #endif 02779 return count; 02780 } 02781 02782 02783 02796 inline 02797 bm::id_t bit_count_change(bm::word_t w) 02798 { 02799 unsigned count = 1; 02800 w ^= (w >> 1); 02801 02802 BM_INCWORD_BITCOUNT(count, w); 02803 count -= (w >> ((sizeof(w) * 8) - 1)); 02804 return count; 02805 } 02806 02807 02812 inline 02813 void bit_count_change32(const bm::word_t* block, 02814 const bm::word_t* block_end, 02815 unsigned* bit_count, 02816 unsigned* gap_count) 02817 { 02818 BM_ASSERT(block < block_end); 02819 BM_ASSERT(bit_count); 02820 BM_ASSERT(gap_count); 02821 02822 *gap_count = 1; 02823 *bit_count = 0; 02824 02825 bm::word_t w, w0, w_prev, w_l; 02826 w = w0 = *block; 02827 02828 BM_INCWORD_BITCOUNT(*bit_count, w); 02829 02830 const int w_shift = sizeof(w) * 8 - 1; 02831 w ^= (w >> 1); 02832 BM_INCWORD_BITCOUNT(*gap_count, w); 02833 *gap_count -= (w_prev = (w0 >> w_shift)); // negative value correction 02834 02835 for (++block ;block < block_end; ++block) 02836 { 02837 w = w0 = *block; 02838 ++(*gap_count); 02839 02840 if (!w) 02841 { 02842 *gap_count -= !w_prev; 02843 w_prev = 0; 02844 } 02845 else 02846 { 02847 BM_INCWORD_BITCOUNT(*bit_count, w); 02848 02849 w ^= (w >> 1); 02850 BM_INCWORD_BITCOUNT(*gap_count, w); 02851 02852 w_l = w0 & 1; 02853 *gap_count -= (w0 >> w_shift); // negative value correction 02854 *gap_count -= !(w_prev ^ w_l); // word border correction 02855 02856 w_prev = (w0 >> w_shift); 02857 } 02858 } // for 02859 02860 } 02861 02862 02874 inline 02875 bm::id_t bit_block_calc_count_change(const bm::word_t* block, 02876 const bm::word_t* block_end, 02877 unsigned* bit_count) 02878 { 02879 #if defined(BMSSE2OPT) || defined(BMSSE42OPT) 02880 02881 #ifdef BMSSE42OPT 02882 return sse4_bit_block_calc_count_change( 02883 (const __m128i*)block, (const __m128i*)block_end, bit_count); 02884 #else 02885 # ifdef BMSSE2OPT 02886 return sse2_bit_block_calc_count_change( 02887 (const __m128i*)block, (const __m128i*)block_end, bit_count); 02888 # endif 02889 #endif 02890 02891 #else // non-SSE code 02892 02893 BM_ASSERT(block < block_end); 02894 BM_ASSERT(bit_count); 02895 02896 02897 #ifdef BM64OPT 02898 bm::id_t count = 1; 02899 *bit_count = 0; 02900 02901 // 64-bit optimized algorithm. 02902 02903 const bm::id64_t* b1 = (bm::id64_t*) block; 02904 const bm::id64_t* b2 = (bm::id64_t*) block_end; 02905 02906 bm::id64_t w, w0, w_prev, w_l; 02907 w = w0 = *b1; 02908 02909 *bit_count = word_bitcount64(w); 02910 02911 const int w_shift = sizeof(w) * 8 - 1; 02912 w ^= (w >> 1); 02913 count += word_bitcount64(w); 02914 count -= (w_prev = (w0 >> w_shift)); // negative value correction 02915 02916 02917 for (++b1 ;b1 < b2; ++b1) 02918 { 02919 w = w0 = *b1; 02920 02921 ++count; 02922 02923 if (!w) 02924 { 02925 count -= !w_prev; 02926 w_prev = 0; 02927 } 02928 else 02929 { 02930 *bit_count += word_bitcount64(w); 02931 w ^= (w >> 1); 02932 count += word_bitcount64(w); 02933 02934 w_l = w0 & 1; 02935 count -= (w0 >> w_shift); // negative value correction 02936 count -= !(w_prev ^ w_l); // word border correction 02937 02938 w_prev = (w0 >> w_shift); 02939 } 02940 } // for 02941 return count; 02942 02943 #else 02944 unsigned gap_count; 02945 bit_count_change32(block, block_end, bit_count, &gap_count); 02946 return gap_count; 02947 #endif 02948 02949 #endif 02950 } 02951 02952 02960 inline 02961 bm::id_t bit_block_calc_count_range(const bm::word_t* block, 02962 bm::word_t left, 02963 bm::word_t right) 02964 { 02965 BM_ASSERT(left <= right); 02966 unsigned nword, nbit; 02967 nbit = left & bm::set_word_mask; 02968 const bm::word_t* word = 02969 block + (nword = unsigned(left >> bm::set_word_shift)); 02970 if (left == right) // special case (only 1 bit to check) 02971 { 02972 return (*word >> nbit) & 1; 02973 } 02974 bm::id_t count = 0; 02975 02976 unsigned acc; 02977 unsigned bitcount = right - left + 1; 02978 02979 if (nbit) // starting position is not aligned 02980 { 02981 unsigned right_margin = nbit + (right - left); 02982 02983 if (right_margin < 32) 02984 { 02985 unsigned mask = 02986 block_set_table<true>::_right[nbit] & 02987 block_set_table<true>::_left[right_margin]; 02988 acc = *word & mask; 02989 02990 BM_INCWORD_BITCOUNT(count, acc); 02991 return count; 02992 } 02993 else 02994 { 02995 acc = *word & block_set_table<true>::_right[nbit]; 02996 BM_INCWORD_BITCOUNT(count, acc); 02997 bitcount -= 32 - nbit; 02998 } 02999 ++word; 03000 } 03001 03002 // now when we are word aligned, we can count bits the usual way 03003 for ( ;bitcount >= 32; bitcount -= 32) 03004 { 03005 acc = *word++; 03006 BM_INCWORD_BITCOUNT(count, acc); 03007 } 03008 03009 if (bitcount) // we have a tail to count 03010 { 03011 acc = (*word) & block_set_table<true>::_left[bitcount-1]; 03012 BM_INCWORD_BITCOUNT(count, acc); 03013 } 03014 03015 return count; 03016 } 03017 03018 03026 inline 03027 bm::id_t bit_block_any_range(const bm::word_t* block, 03028 bm::word_t left, 03029 bm::word_t right) 03030 { 03031 BM_ASSERT(left <= right); 03032 03033 unsigned nbit = left; // unsigned(left & bm::set_block_mask); 03034 unsigned nword = unsigned(nbit >> bm::set_word_shift); 03035 nbit &= bm::set_word_mask; 03036 03037 const bm::word_t* word = block + nword; 03038 03039 if (left == right) // special case (only 1 bit to check) 03040 { 03041 return (*word >> nbit) & 1; 03042 } 03043 unsigned acc; 03044 unsigned bitcount = right - left + 1; 03045 03046 if (nbit) // starting position is not aligned 03047 { 03048 unsigned right_margin = nbit + (right - left); 03049 if (right_margin < 32) 03050 { 03051 unsigned mask = 03052 block_set_table<true>::_right[nbit] & 03053 block_set_table<true>::_left[right_margin]; 03054 acc = *word & mask; 03055 return acc; 03056 } 03057 else 03058 { 03059 acc = *word & block_set_table<true>::_right[nbit]; 03060 if (acc) 03061 return acc; 03062 bitcount -= 32 - nbit; 03063 } 03064 ++word; 03065 } 03066 03067 // now when we are word aligned, we can check bits the usual way 03068 for ( ;bitcount >= 32; bitcount -= 32) 03069 { 03070 acc = *word++; 03071 if (acc) 03072 return acc; 03073 } 03074 03075 if (bitcount) // we have a tail to count 03076 { 03077 acc = (*word) & block_set_table<true>::_left[bitcount-1]; 03078 if (acc) 03079 return acc; 03080 } 03081 03082 return 0; 03083 } 03084 03085 03086 03087 // ---------------------------------------------------------------------- 03088 03092 template<typename T> void bit_invert(T* start, T* end) 03093 { 03094 #ifdef BMVECTOPT 03095 VECT_INVERT_ARR(start, end); 03096 #else 03097 do 03098 { 03099 start[0] = ~start[0]; 03100 start[1] = ~start[1]; 03101 start[2] = ~start[2]; 03102 start[3] = ~start[3]; 03103 start+=4; 03104 } while (start < end); 03105 #endif 03106 } 03107 03108 // ---------------------------------------------------------------------- 03109 03113 inline bool is_bits_one(const bm::wordop_t* start, 03114 const bm::wordop_t* end) 03115 { 03116 do 03117 { 03118 bm::wordop_t tmp = 03119 start[0] & start[1] & start[2] & start[3]; 03120 if (tmp != bm::all_bits_mask) 03121 return false; 03122 start += 4; 03123 } while (start < end); 03124 03125 return true; 03126 } 03127 03128 // ---------------------------------------------------------------------- 03129 03130 03134 inline bool bit_is_all_zero(const bm::wordop_t* start, 03135 const bm::wordop_t* end) 03136 { 03137 do 03138 { 03139 bm::wordop_t tmp = 03140 start[0] | start[1] | start[2] | start[3]; 03141 if (tmp) 03142 return false; 03143 start += 4; 03144 } while (start < end); 03145 03146 return true; 03147 } 03148 03149 // ---------------------------------------------------------------------- 03150 03151 // GAP blocks manipulation functions: 03152 03154 BMFORCEINLINE unsigned and_op(unsigned v1, unsigned v2) 03155 { 03156 return v1 & v2; 03157 } 03158 03159 03161 BMFORCEINLINE unsigned xor_op(unsigned v1, unsigned v2) 03162 { 03163 return v1 ^ v2; 03164 } 03165 03166 03168 BMFORCEINLINE unsigned or_op(unsigned v1, unsigned v2) 03169 { 03170 return v1 | v2; 03171 } 03172 03174 BMFORCEINLINE unsigned sub_op(unsigned v1, unsigned v2) 03175 { 03176 return v1 & ~v2; 03177 } 03178 03179 03196 BMFORCEINLINE 03197 gap_word_t* gap_operation_and(const gap_word_t* BMRESTRICT vect1, 03198 const gap_word_t* BMRESTRICT vect2, 03199 gap_word_t* BMRESTRICT tmp_buf, 03200 unsigned& dsize) 03201 { 03202 gap_buff_op(tmp_buf, vect1, 0, vect2, 0, and_op, dsize); 03203 return tmp_buf; 03204 } 03205 03220 BMFORCEINLINE 03221 unsigned gap_operation_any_and(const gap_word_t* BMRESTRICT vect1, 03222 const gap_word_t* BMRESTRICT vect2) 03223 { 03224 return gap_buff_any_op(vect1, 0, vect2, 0, and_op); 03225 } 03226 03227 03237 BMFORCEINLINE 03238 unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1, 03239 const gap_word_t* BMRESTRICT vect2) 03240 { 03241 return gap_buff_count_op(vect1, vect2, and_op); 03242 } 03243 03244 03245 03262 BMFORCEINLINE 03263 gap_word_t* gap_operation_xor(const gap_word_t* BMRESTRICT vect1, 03264 const gap_word_t* BMRESTRICT vect2, 03265 gap_word_t* BMRESTRICT tmp_buf, 03266 unsigned& dsize) 03267 { 03268 gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::xor_op, dsize); 03269 return tmp_buf; 03270 } 03271 03272 03287 BMFORCEINLINE 03288 unsigned gap_operation_any_xor(const gap_word_t* BMRESTRICT vect1, 03289 const gap_word_t* BMRESTRICT vect2) 03290 { 03291 return gap_buff_any_op(vect1, 0, vect2, 0, bm::xor_op); 03292 } 03293 03303 BMFORCEINLINE 03304 unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1, 03305 const gap_word_t* BMRESTRICT vect2) 03306 { 03307 return gap_buff_count_op(vect1, vect2, bm::xor_op); 03308 } 03309 03310 03328 inline 03329 gap_word_t* gap_operation_or(const gap_word_t* BMRESTRICT vect1, 03330 const gap_word_t* BMRESTRICT vect2, 03331 gap_word_t* BMRESTRICT tmp_buf, 03332 unsigned& dsize) 03333 { 03334 gap_buff_op(tmp_buf, vect1, 1, vect2, 1, bm::and_op, dsize); 03335 gap_invert(tmp_buf); 03336 return tmp_buf; 03337 } 03338 03348 BMFORCEINLINE 03349 unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1, 03350 const gap_word_t* BMRESTRICT vect2) 03351 { 03352 return gap_buff_count_op(vect1, vect2, bm::or_op); 03353 } 03354 03355 03356 03374 inline gap_word_t* gap_operation_sub(const gap_word_t* BMRESTRICT vect1, 03375 const gap_word_t* BMRESTRICT vect2, 03376 gap_word_t* BMRESTRICT tmp_buf, 03377 unsigned& dsize) 03378 { 03379 gap_buff_op(tmp_buf, vect1, 0, vect2, 1, and_op, dsize); 03380 return tmp_buf; 03381 } 03382 03383 03398 BMFORCEINLINE 03399 unsigned gap_operation_any_sub(const gap_word_t* BMRESTRICT vect1, 03400 const gap_word_t* BMRESTRICT vect2) 03401 { 03402 return gap_buff_any_op(vect1, 0, vect2, 1, bm::and_op); 03403 } 03404 03405 03415 BMFORCEINLINE 03416 unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1, 03417 const gap_word_t* BMRESTRICT vect2) 03418 { 03419 return gap_buff_count_op(vect1, vect2, bm::sub_op); 03420 } 03421 03422 03423 // ---------------------------------------------------------------------- 03424 03425 // BIT blocks manipulation functions: 03426 03427 03436 inline 03437 void bit_block_copy(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src) 03438 { 03439 #ifdef BMVECTOPT 03440 VECT_COPY_BLOCK(dst, src, src + bm::set_block_size); 03441 #else 03442 ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t)); 03443 #endif 03444 } 03445 03446 03456 inline 03457 void bit_block_and(bm::word_t* BMRESTRICT dst, const bm::word_t* BMRESTRICT src) 03458 { 03459 #ifdef BMVECTOPT 03460 VECT_AND_ARR(dst, src, src + bm::set_block_size); 03461 #else 03462 const bm::wordop_t* BMRESTRICT wrd_ptr = (wordop_t*)src; 03463 const bm::wordop_t* BMRESTRICT wrd_end = (wordop_t*)(src + bm::set_block_size); 03464 bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst; 03465 03466 do 03467 { 03468 dst_ptr[0] &= wrd_ptr[0]; 03469 dst_ptr[1] &= wrd_ptr[1]; 03470 dst_ptr[2] &= wrd_ptr[2]; 03471 dst_ptr[3] &= wrd_ptr[3]; 03472 03473 dst_ptr+=4; 03474 wrd_ptr+=4; 03475 } while (wrd_ptr < wrd_end); 03476 #endif 03477 } 03478 03479 03490 inline 03491 unsigned bit_block_and_count(const bm::word_t* src1, 03492 const bm::word_t* src1_end, 03493 const bm::word_t* src2) 03494 { 03495 unsigned count; 03496 #ifdef BMVECTOPT 03497 count = VECT_BITCOUNT_AND(src1, src1_end, src2); 03498 #else 03499 count = 0; 03500 # ifdef BM64OPT 03501 const bm::id64_t* b1 = (bm::id64_t*) src1; 03502 const bm::id64_t* b1_end = (bm::id64_t*) src1_end; 03503 const bm::id64_t* b2 = (bm::id64_t*) src2; 03504 do 03505 { 03506 count += bitcount64_4way(b1[0] & b2[0], 03507 b1[1] & b2[1], 03508 b1[2] & b2[2], 03509 b1[3] & b2[3]); 03510 b1 += 4; 03511 b2 += 4; 03512 } while (b1 < b1_end); 03513 # else 03514 do 03515 { 03516 BM_INCWORD_BITCOUNT(count, src1[0] & src2[0]); 03517 BM_INCWORD_BITCOUNT(count, src1[1] & src2[1]); 03518 BM_INCWORD_BITCOUNT(count, src1[2] & src2[2]); 03519 BM_INCWORD_BITCOUNT(count, src1[3] & src2[3]); 03520 03521 src1+=4; 03522 src2+=4; 03523 } while (src1 < src1_end); 03524 # endif 03525 #endif 03526 return count; 03527 } 03528 03529 03540 inline 03541 unsigned bit_block_and_any(const bm::word_t* src1, 03542 const bm::word_t* src1_end, 03543 const bm::word_t* src2) 03544 { 03545 unsigned count = 0; 03546 do 03547 { 03548 count = (src1[0] & src2[0]) | 03549 (src1[1] & src2[1]) | 03550 (src1[2] & src2[2]) | 03551 (src1[3] & src2[3]); 03552 03553 src1+=4; src2+=4; 03554 } while ((src1 < src1_end) && (count == 0)); 03555 return count; 03556 } 03557 03558 03559 03560 03571 inline 03572 unsigned bit_block_xor_count(const bm::word_t* BMRESTRICT src1, 03573 const bm::word_t* BMRESTRICT src1_end, 03574 const bm::word_t* BMRESTRICT src2) 03575 { 03576 unsigned count; 03577 #ifdef BMVECTOPT 03578 count = VECT_BITCOUNT_XOR(src1, src1_end, src2); 03579 #else 03580 count = 0; 03581 # ifdef BM64OPT 03582 const bm::id64_t* b1 = (bm::id64_t*) src1; 03583 const bm::id64_t* b1_end = (bm::id64_t*) src1_end; 03584 const bm::id64_t* b2 = (bm::id64_t*) src2; 03585 do 03586 { 03587 count += bitcount64_4way(b1[0] ^ b2[0], 03588 b1[1] ^ b2[1], 03589 b1[2] ^ b2[2], 03590 b1[3] ^ b2[3]); 03591 b1 += 4; 03592 b2 += 4; 03593 } while (b1 < b1_end); 03594 # else 03595 do 03596 { 03597 BM_INCWORD_BITCOUNT(count, src1[0] ^ src2[0]); 03598 BM_INCWORD_BITCOUNT(count, src1[1] ^ src2[1]); 03599 BM_INCWORD_BITCOUNT(count, src1[2] ^ src2[2]); 03600 BM_INCWORD_BITCOUNT(count, src1[3] ^ src2[3]); 03601 03602 src1+=4; 03603 src2+=4; 03604 } while (src1 < src1_end); 03605 # endif 03606 #endif 03607 return count; 03608 } 03609 03610 03621 inline 03622 unsigned bit_block_xor_any(const bm::word_t* BMRESTRICT src1, 03623 const bm::word_t* BMRESTRICT src1_end, 03624 const bm::word_t* BMRESTRICT src2) 03625 { 03626 unsigned count = 0; 03627 do 03628 { 03629 count = (src1[0] ^ src2[0]) | 03630 (src1[1] ^ src2[1]) | 03631 (src1[2] ^ src2[2]) | 03632 (src1[3] ^ src2[3]); 03633 03634 src1+=4; src2+=4; 03635 } while ((src1 < src1_end) && (count == 0)); 03636 return count; 03637 } 03638 03639 03640 03641 03652 inline 03653 unsigned bit_block_sub_count(const bm::word_t* BMRESTRICT src1, 03654 const bm::word_t* BMRESTRICT src1_end, 03655 const bm::word_t* BMRESTRICT src2) 03656 { 03657 unsigned count; 03658 #ifdef BMVECTOPT 03659 count = VECT_BITCOUNT_SUB(src1, src1_end, src2); 03660 #else 03661 count = 0; 03662 # ifdef BM64OPT 03663 const bm::id64_t* b1 = (bm::id64_t*) src1; 03664 const bm::id64_t* b1_end = (bm::id64_t*) src1_end; 03665 const bm::id64_t* b2 = (bm::id64_t*) src2; 03666 do 03667 { 03668 count += bitcount64_4way(b1[0] & ~b2[0], 03669 b1[1] & ~b2[1], 03670 b1[2] & ~b2[2], 03671 b1[3] & ~b2[3]); 03672 b1 += 4; 03673 b2 += 4; 03674 } while (b1 < b1_end); 03675 # else 03676 do 03677 { 03678 BM_INCWORD_BITCOUNT(count, src1[0] & ~src2[0]); 03679 BM_INCWORD_BITCOUNT(count, src1[1] & ~src2[1]); 03680 BM_INCWORD_BITCOUNT(count, src1[2] & ~src2[2]); 03681 BM_INCWORD_BITCOUNT(count, src1[3] & ~src2[3]); 03682 03683 src1+=4; 03684 src2+=4; 03685 } while (src1 < src1_end); 03686 # endif 03687 #endif 03688 return count; 03689 } 03690 03701 inline 03702 unsigned bit_block_sub_any(const bm::word_t* BMRESTRICT src1, 03703 const bm::word_t* BMRESTRICT src1_end, 03704 const bm::word_t* BMRESTRICT src2) 03705 { 03706 unsigned count = 0; 03707 do 03708 { 03709 count = (src1[0] & ~src2[0]) | 03710 (src1[1] & ~src2[1]) | 03711 (src1[2] & ~src2[2]) | 03712 (src1[3] & ~src2[3]); 03713 03714 src1+=4; src2+=4; 03715 } while ((src1 < src1_end) && (count == 0)); 03716 return count; 03717 } 03718 03719 03720 03731 inline 03732 unsigned bit_block_or_count(const bm::word_t* src1, 03733 const bm::word_t* src1_end, 03734 const bm::word_t* src2) 03735 { 03736 unsigned count; 03737 #ifdef BMVECTOPT 03738 count = VECT_BITCOUNT_OR(src1, src1_end, src2); 03739 #else 03740 count = 0; 03741 # ifdef BM64OPT 03742 const bm::id64_t* b1 = (bm::id64_t*) src1; 03743 const bm::id64_t* b1_end = (bm::id64_t*) src1_end; 03744 const bm::id64_t* b2 = (bm::id64_t*) src2; 03745 do 03746 { 03747 count += bitcount64_4way(b1[0] | b2[0], 03748 b1[1] | b2[1], 03749 b1[2] | b2[2], 03750 b1[3] | b2[3]); 03751 b1 += 4; 03752 b2 += 4; 03753 } while (b1 < b1_end); 03754 # else 03755 do 03756 { 03757 BM_INCWORD_BITCOUNT(count, src1[0] | src2[0]); 03758 BM_INCWORD_BITCOUNT(count, src1[1] | src2[1]); 03759 BM_INCWORD_BITCOUNT(count, src1[2] | src2[2]); 03760 BM_INCWORD_BITCOUNT(count, src1[3] | src2[3]); 03761 03762 src1+=4; 03763 src2+=4; 03764 } while (src1 < src1_end); 03765 # endif 03766 #endif 03767 return count; 03768 } 03769 03780 inline 03781 unsigned bit_block_or_any(const bm::word_t* BMRESTRICT src1, 03782 const bm::word_t* BMRESTRICT src1_end, 03783 const bm::word_t* BMRESTRICT src2) 03784 { 03785 unsigned count = 0; 03786 do 03787 { 03788 count = (src1[0] | src2[0]) | 03789 (src1[1] | src2[1]) | 03790 (src1[2] | src2[2]) | 03791 (src1[3] | src2[3]); 03792 03793 src1+=4; src2+=4; 03794 } while ((src1 < src1_end) && (count == 0)); 03795 return count; 03796 } 03797 03798 03799 03800 03813 inline bm::word_t* bit_operation_and(bm::word_t* BMRESTRICT dst, 03814 const bm::word_t* BMRESTRICT src) 03815 { 03816 BM_ASSERT(dst || src); 03817 03818 bm::word_t* ret = dst; 03819 03820 if (IS_VALID_ADDR(dst)) // The destination block already exists 03821 { 03822 03823 if (!IS_VALID_ADDR(src)) 03824 { 03825 if (IS_EMPTY_BLOCK(src)) 03826 { 03827 //If the source block is zero 03828 //just clean the destination block 03829 return 0; 03830 } 03831 } 03832 else 03833 { 03834 // Regular operation AND on the whole block. 03835 bit_block_and(dst, src); 03836 } 03837 } 03838 else // The destination block does not exist yet 03839 { 03840 if(!IS_VALID_ADDR(src)) 03841 { 03842 if(IS_EMPTY_BLOCK(src)) 03843 { 03844 // The source block is empty. 03845 // One argument empty - all result is empty. 03846 return 0; 03847 } 03848 // Here we have nothing to do. 03849 // Src block is all ON, dst block remains as it is 03850 } 03851 else // destination block does not exists, src - valid block 03852 { 03853 if (IS_FULL_BLOCK(dst)) 03854 { 03855 return const_cast<bm::word_t*>(src); 03856 } 03857 // Nothng to do. 03858 // Dst block is all ZERO no combination required. 03859 } 03860 } 03861 03862 return ret; 03863 } 03864 03865 03877 inline 03878 bm::id_t bit_operation_and_count(const bm::word_t* BMRESTRICT src1, 03879 const bm::word_t* BMRESTRICT src1_end, 03880 const bm::word_t* BMRESTRICT src2) 03881 { 03882 if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2)) 03883 { 03884 return 0; 03885 } 03886 return bit_block_and_count(src1, src1_end, src2); 03887 } 03888 03900 inline 03901 bm::id_t bit_operation_and_any(const bm::word_t* BMRESTRICT src1, 03902 const bm::word_t* BMRESTRICT src1_end, 03903 const bm::word_t* BMRESTRICT src2) 03904 { 03905 if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2)) 03906 { 03907 return 0; 03908 } 03909 return bit_block_and_any(src1, src1_end, src2); 03910 } 03911 03912 03913 03925 inline 03926 bm::id_t bit_operation_sub_count(const bm::word_t* BMRESTRICT src1, 03927 const bm::word_t* BMRESTRICT src1_end, 03928 const bm::word_t* BMRESTRICT src2) 03929 { 03930 if (IS_EMPTY_BLOCK(src1)) 03931 { 03932 return 0; 03933 } 03934 03935 if (IS_EMPTY_BLOCK(src2)) // nothing to diff 03936 { 03937 return bit_block_calc_count(src1, src1_end); 03938 } 03939 return bit_block_sub_count(src1, src1_end, src2); 03940 } 03941 03942 03955 inline 03956 bm::id_t bit_operation_sub_count_inv(const bm::word_t* BMRESTRICT src1, 03957 const bm::word_t* BMRESTRICT src1_end, 03958 const bm::word_t* BMRESTRICT src2) 03959 { 03960 unsigned arr_size = unsigned(src1_end - src1); 03961 return bit_operation_sub_count(src2, src2+arr_size, src1); 03962 } 03963 03964 03976 inline 03977 bm::id_t bit_operation_sub_any(const bm::word_t* BMRESTRICT src1, 03978 const bm::word_t* BMRESTRICT src1_end, 03979 const bm::word_t* BMRESTRICT src2) 03980 { 03981 if (IS_EMPTY_BLOCK(src1)) 03982 { 03983 return 0; 03984 } 03985 03986 if (IS_EMPTY_BLOCK(src2)) // nothing to diff 03987 { 03988 return !bit_is_all_zero((bm::wordop_t*)src1, (bm::wordop_t*)src1_end); 03989 } 03990 return bit_block_sub_any(src1, src1_end, src2); 03991 } 03992 03993 03994 04006 inline 04007 bm::id_t bit_operation_or_count(const bm::word_t* BMRESTRICT src1, 04008 const bm::word_t* BMRESTRICT src1_end, 04009 const bm::word_t* BMRESTRICT src2) 04010 { 04011 if (IS_EMPTY_BLOCK(src1)) 04012 { 04013 if (!IS_EMPTY_BLOCK(src2)) 04014 return bit_block_calc_count(src2, src2 + (src1_end - src1)); 04015 else 04016 return 0; // both blocks are empty 04017 } 04018 else 04019 { 04020 if (IS_EMPTY_BLOCK(src2)) 04021 return bit_block_calc_count(src1, src1_end); 04022 } 04023 04024 return bit_block_or_count(src1, src1_end, src2); 04025 } 04026 04038 inline 04039 bm::id_t bit_operation_or_any(const bm::word_t* BMRESTRICT src1, 04040 const bm::word_t* BMRESTRICT src1_end, 04041 const bm::word_t* BMRESTRICT src2) 04042 { 04043 if (IS_EMPTY_BLOCK(src1)) 04044 { 04045 if (!IS_EMPTY_BLOCK(src2)) 04046 return !bit_is_all_zero((bm::wordop_t*)src2, 04047 (bm::wordop_t*)(src2 + (src1_end - src1))); 04048 else 04049 return 0; // both blocks are empty 04050 } 04051 else 04052 { 04053 if (IS_EMPTY_BLOCK(src2)) 04054 return !bit_is_all_zero((bm::wordop_t*)src1, (bm::wordop_t*)src1_end); 04055 } 04056 04057 return bit_block_or_any(src1, src1_end, src2); 04058 } 04059 04060 04061 04071 inline void bit_block_or(bm::word_t* BMRESTRICT dst, 04072 const bm::word_t* BMRESTRICT src) 04073 { 04074 #ifdef BMVECTOPT 04075 VECT_OR_ARR(dst, src, src + bm::set_block_size); 04076 #else 04077 const bm::wordop_t* BMRESTRICT wrd_ptr = (wordop_t*)src; 04078 const bm::wordop_t* BMRESTRICT wrd_end = (wordop_t*)(src + set_block_size); 04079 bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst; 04080 04081 do 04082 { 04083 dst_ptr[0] |= wrd_ptr[0]; 04084 dst_ptr[1] |= wrd_ptr[1]; 04085 dst_ptr[2] |= wrd_ptr[2]; 04086 dst_ptr[3] |= wrd_ptr[3]; 04087 04088 dst_ptr+=4; 04089 wrd_ptr+=4; 04090 04091 } while (wrd_ptr < wrd_end); 04092 #endif 04093 } 04094 04095 04108 inline 04109 bm::word_t* bit_operation_or(bm::word_t* BMRESTRICT dst, 04110 const bm::word_t* BMRESTRICT src) 04111 { 04112 BM_ASSERT(dst || src); 04113 04114 bm::word_t* ret = dst; 04115 04116 if (IS_VALID_ADDR(dst)) // The destination block already exists 04117 { 04118 if (!IS_VALID_ADDR(src)) 04119 { 04120 if (IS_FULL_BLOCK(src)) 04121 { 04122 // if the source block is all set 04123 // just set the destination block 04124 ::memset(dst, 0xFF, bm::set_block_size * sizeof(bm::word_t)); 04125 } 04126 } 04127 else 04128 { 04129 // Regular operation OR on the whole block 04130 bit_block_or(dst, src); 04131 } 04132 } 04133 else // The destination block does not exist yet 04134 { 04135 if (!IS_VALID_ADDR(src)) 04136 { 04137 if (IS_FULL_BLOCK(src)) 04138 { 04139 // The source block is all set, because dst does not exist 04140 // we can simply replace it. 04141 return const_cast<bm::word_t*>(FULL_BLOCK_ADDR); 04142 } 04143 } 04144 else 04145 { 04146 if (dst == 0) 04147 { 04148 // The only case when we have to allocate the new block: 04149 // Src is all zero and Dst does not exist 04150 return const_cast<bm::word_t*>(src); 04151 } 04152 } 04153 } 04154 return ret; 04155 } 04156 04166 inline 04167 void bit_block_sub(bm::word_t* BMRESTRICT dst, 04168 const bm::word_t* BMRESTRICT src) 04169 { 04170 #ifdef BMVECTOPT 04171 VECT_SUB_ARR(dst, src, src + bm::set_block_size); 04172 #else 04173 const bm::wordop_t* BMRESTRICT wrd_ptr = (wordop_t*) src; 04174 const bm::wordop_t* BMRESTRICT wrd_end = 04175 (wordop_t*) (src + bm::set_block_size); 04176 bm::wordop_t* dst_ptr = (wordop_t*)dst; 04177 04178 // Regular operation AND-NOT on the whole block. 04179 do 04180 { 04181 dst_ptr[0] &= ~wrd_ptr[0]; 04182 dst_ptr[1] &= ~wrd_ptr[1]; 04183 dst_ptr[2] &= ~wrd_ptr[2]; 04184 dst_ptr[3] &= ~wrd_ptr[3]; 04185 04186 dst_ptr+=4; 04187 wrd_ptr+=4; 04188 } while (wrd_ptr < wrd_end); 04189 #endif 04190 04191 } 04192 04193 04206 inline 04207 bm::word_t* bit_operation_sub(bm::word_t* BMRESTRICT dst, 04208 const bm::word_t* BMRESTRICT src) 04209 { 04210 BM_ASSERT(dst || src); 04211 04212 bm::word_t* ret = dst; 04213 if (IS_VALID_ADDR(dst)) // The destination block already exists 04214 { 04215 if (!IS_VALID_ADDR(src)) 04216 { 04217 if (IS_FULL_BLOCK(src)) 04218 { 04219 // If the source block is all set 04220 // just clean the destination block 04221 return 0; 04222 } 04223 } 04224 else 04225 { 04226 bit_block_sub(dst, src); 04227 } 04228 } 04229 else // The destination block does not exist yet 04230 { 04231 if (!IS_VALID_ADDR(src)) 04232 { 04233 if (IS_FULL_BLOCK(src)) 04234 { 04235 // The source block is full 04236 return 0; 04237 } 04238 } 04239 else 04240 { 04241 if (IS_FULL_BLOCK(dst)) 04242 { 04243 // The only case when we have to allocate the new block: 04244 // dst is all set and src exists 04245 return const_cast<bm::word_t*>(src); 04246 } 04247 } 04248 } 04249 return ret; 04250 } 04251 04252 04262 inline 04263 void bit_block_xor(bm::word_t* BMRESTRICT dst, 04264 const bm::word_t* BMRESTRICT src) 04265 { 04266 #ifdef BMVECTOPT 04267 VECT_XOR_ARR(dst, src, src + bm::set_block_size); 04268 #else 04269 const bm::wordop_t* BMRESTRICT wrd_ptr = (wordop_t*) src; 04270 const bm::wordop_t* BMRESTRICT wrd_end = 04271 (wordop_t*) (src + bm::set_block_size); 04272 bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst; 04273 04274 // Regular XOR operation on the whole block. 04275 do 04276 { 04277 dst_ptr[0] ^= wrd_ptr[0]; 04278 dst_ptr[1] ^= wrd_ptr[1]; 04279 dst_ptr[2] ^= wrd_ptr[2]; 04280 dst_ptr[3] ^= wrd_ptr[3]; 04281 04282 dst_ptr+=4; 04283 wrd_ptr+=4; 04284 } while (wrd_ptr < wrd_end); 04285 #endif 04286 04287 } 04288 04289 04302 inline 04303 bm::word_t* bit_operation_xor(bm::word_t* BMRESTRICT dst, 04304 const bm::word_t* BMRESTRICT src) 04305 { 04306 BM_ASSERT(dst || src); 04307 if (src == dst) return 0; // XOR rule 04308 04309 bm::word_t* ret = dst; 04310 04311 if (IS_VALID_ADDR(dst)) // The destination block already exists 04312 { 04313 if (!src) return dst; 04314 04315 bit_block_xor(dst, src); 04316 } 04317 else // The destination block does not exist yet 04318 { 04319 if (!src) return dst; // 1 xor 0 = 1 04320 04321 // Here we have two cases: 04322 // if dest block is full or zero if zero we need to copy the source 04323 // otherwise XOR loop against 0xFF... 04324 //BM_ASSERT(dst == 0); 04325 return const_cast<bm::word_t*>(src); // src is the final result 04326 } 04327 return ret; 04328 } 04329 04340 inline 04341 bm::id_t bit_operation_xor_count(const bm::word_t* BMRESTRICT src1, 04342 const bm::word_t* BMRESTRICT src1_end, 04343 const bm::word_t* BMRESTRICT src2) 04344 { 04345 if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2)) 04346 { 04347 if (IS_EMPTY_BLOCK(src1) && IS_EMPTY_BLOCK(src2)) 04348 return 0; 04349 const bm::word_t* block = IS_EMPTY_BLOCK(src1) ? src2 : src1; 04350 return bit_block_calc_count(block, block + (src1_end - src1)); 04351 } 04352 return bit_block_xor_count(src1, src1_end, src2); 04353 } 04354 04365 inline 04366 bm::id_t bit_operation_xor_any(const bm::word_t* BMRESTRICT src1, 04367 const bm::word_t* BMRESTRICT src1_end, 04368 const bm::word_t* BMRESTRICT src2) 04369 { 04370 if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2)) 04371 { 04372 if (IS_EMPTY_BLOCK(src1) && IS_EMPTY_BLOCK(src2)) 04373 return 0; 04374 const bm::word_t* block = IS_EMPTY_BLOCK(src1) ? src2 : src1; 04375 return !bit_is_all_zero((bm::wordop_t*)block, 04376 (bm::wordop_t*)(block + (src1_end - src1))); 04377 } 04378 return bit_block_xor_any(src1, src1_end, src2); 04379 } 04380 04381 04382 04394 template<class T> 04395 unsigned bit_count_nonzero_size(const T* blk, 04396 unsigned data_size) 04397 { 04398 BM_ASSERT(blk && data_size); 04399 unsigned count = 0; 04400 const T* blk_end = blk + data_size - 2; 04401 04402 do 04403 { 04404 if (*blk == 0) 04405 { 04406 // scan fwd to find 0 island length 04407 const T* blk_j = blk + 1; 04408 for (; blk_j < blk_end; ++blk_j) 04409 { 04410 if (*blk_j != 0) 04411 break; 04412 } 04413 blk = blk_j-1; 04414 count += sizeof(gap_word_t); 04415 } 04416 else 04417 { 04418 // scan fwd to find non-0 island length 04419 const T* blk_j = blk + 1; 04420 for ( ; blk_j < blk_end; ++blk_j) 04421 { 04422 if (*blk_j == 0) 04423 { 04424 // look ahead to identify and ignore short 0-run 04425 if (blk_j[1] | blk_j[2]) 04426 { 04427 // skip zero word 04428 ++blk_j; 04429 continue; 04430 } 04431 break; 04432 } 04433 } 04434 count += sizeof(gap_word_t); 04435 // count all bit-words now 04436 count += (blk_j - blk) * sizeof(T); 04437 blk = blk_j; 04438 } 04439 ++blk; 04440 } 04441 while(blk < blk_end); 04442 04443 return count + (2 * sizeof(T)); 04444 } 04445 04446 04455 inline 04456 int bit_find_in_block(const bm::word_t* data, 04457 unsigned nbit, 04458 bm::id_t* prev) 04459 { 04460 register bm::id_t p = *prev; 04461 int found = 0; 04462 04463 for(;;) 04464 { 04465 unsigned nword = nbit >> bm::set_word_shift; 04466 if (nword >= bm::set_block_size) break; 04467 04468 register bm::word_t val = data[nword] >> (p & bm::set_word_mask); 04469 04470 // TODO: consider BSF and de bruijn sequences here: 04471 // http://www.0xe3.com/text/ntz/ComputingTrailingZerosHOWTO.html#debruijn 04472 04473 if (val) 04474 { 04475 while((val & 1) == 0) 04476 { 04477 val >>= 1; 04478 ++nbit; 04479 ++p; 04480 } 04481 ++found; 04482 04483 break; 04484 } 04485 else 04486 { 04487 p += (bm::set_word_mask + 1) - (nbit & bm::set_word_mask); 04488 nbit += (bm::set_word_mask + 1) - (nbit & bm::set_word_mask); 04489 } 04490 } 04491 *prev = p; 04492 return found; 04493 } 04494 04502 template<typename T, typename F> 04503 void bit_for_each_4(T w, F& func) 04504 { 04505 for (unsigned sub_octet = 0; w != 0; w >>= 4, sub_octet += 4) 04506 { 04507 switch (w & 15) 04508 { 04509 case 0: // 0000 04510 break; 04511 case 1: // 0001 04512 func(sub_octet); 04513 break; 04514 case 2: // 0010 04515 func(sub_octet + 1); 04516 break; 04517 case 3: // 0011 04518 func(sub_octet, sub_octet + 1); 04519 break; 04520 case 4: // 0100 04521 func(sub_octet + 2); 04522 break; 04523 case 5: // 0101 04524 func(sub_octet, sub_octet + 2); 04525 break; 04526 case 6: // 0110 04527 func(sub_octet + 1, sub_octet + 2); 04528 break; 04529 case 7: // 0111 04530 func(sub_octet, sub_octet + 1, sub_octet + 2); 04531 break; 04532 case 8: // 1000 04533 func(sub_octet + 3); 04534 break; 04535 case 9: // 1001 04536 func(sub_octet, sub_octet + 3); 04537 break; 04538 case 10: // 1010 04539 func(sub_octet + 1, sub_octet + 3); 04540 break; 04541 case 11: // 1011 04542 func(sub_octet, sub_octet + 1, sub_octet + 3); 04543 break; 04544 case 12: // 1100 04545 func(sub_octet + 2, sub_octet + 3); 04546 break; 04547 case 13: // 1101 04548 func(sub_octet, sub_octet + 2, sub_octet + 3); 04549 break; 04550 case 14: // 1110 04551 func(sub_octet + 1, sub_octet + 2, sub_octet + 3); 04552 break; 04553 case 15: // 1111 04554 func(sub_octet, sub_octet + 1, sub_octet + 2, sub_octet + 3); 04555 break; 04556 default: 04557 BM_ASSERT(0); 04558 break; 04559 } 04560 04561 } // for 04562 } 04563 04564 04572 template<typename T, typename F> 04573 void bit_for_each(T w, F& func) 04574 { 04575 // Note: 4-bit table method works slower than plain check approach 04576 for (unsigned octet = 0; w != 0; w >>= 8, octet += 8) 04577 { 04578 if (w & 1) func(octet + 0); 04579 if (w & 2) func(octet + 1); 04580 if (w & 4) func(octet + 2); 04581 if (w & 8) func(octet + 3); 04582 if (w & 16) func(octet + 4); 04583 if (w & 32) func(octet + 5); 04584 if (w & 64) func(octet + 6); 04585 if (w & 128) func(octet + 7); 04586 04587 } // for 04588 } 04589 04593 template<typename B> class copy_to_array_functor 04594 { 04595 public: 04596 copy_to_array_functor(B* bits): bp_(bits) 04597 {} 04598 04599 B* ptr() { return bp_; } 04600 04601 void operator()(unsigned bit_idx) { *bp_++ = (B)bit_idx; } 04602 04603 void operator()(unsigned bit_idx0, 04604 unsigned bit_idx1) 04605 { 04606 bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; 04607 bp_+=2; 04608 } 04609 04610 void operator()(unsigned bit_idx0, 04611 unsigned bit_idx1, 04612 unsigned bit_idx2) 04613 { 04614 bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2; 04615 bp_+=3; 04616 } 04617 04618 void operator()(unsigned bit_idx0, 04619 unsigned bit_idx1, 04620 unsigned bit_idx2, 04621 unsigned bit_idx3) 04622 { 04623 bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; 04624 bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3; 04625 bp_+=4; 04626 } 04627 04628 private: 04629 copy_to_array_functor(const copy_to_array_functor&); 04630 copy_to_array_functor& operator=(const copy_to_array_functor&); 04631 private: 04632 B* bp_; 04633 }; 04634 04638 template<typename B> class copy_to_array_functor_inc 04639 { 04640 public: 04641 copy_to_array_functor_inc(B* bits, unsigned base_idx) 04642 : bp_(bits), base_idx_(base_idx) 04643 {} 04644 04645 B* ptr() { return bp_; } 04646 04647 void operator()(unsigned bit_idx) 04648 { 04649 *bp_++ = (B)(bit_idx + base_idx_); 04650 } 04651 04652 04653 void operator()(unsigned bit_idx0, 04654 unsigned bit_idx1) 04655 { 04656 bp_[0]=(B)(bit_idx0+base_idx_);bp_[1]=(B)(bit_idx1+base_idx_); 04657 bp_+=2; 04658 } 04659 04660 void operator()(unsigned bit_idx0, 04661 unsigned bit_idx1, 04662 unsigned bit_idx2) 04663 { 04664 bp_[0]=(B)(bit_idx0+base_idx_);bp_[1]=(B)(bit_idx1+base_idx_); 04665 bp_[2]=(B)(bit_idx2+base_idx_); 04666 bp_+=3; 04667 } 04668 04669 void operator()(unsigned bit_idx0, 04670 unsigned bit_idx1, 04671 unsigned bit_idx2, 04672 unsigned bit_idx3) 04673 { 04674 bp_[0]=(B)(bit_idx0+base_idx_);bp_[1]=(B)(bit_idx1+base_idx_); 04675 bp_[2]=(B)(bit_idx2+base_idx_);bp_[3]=(B)(bit_idx3+base_idx_); 04676 bp_+=4; 04677 } 04678 04679 private: 04680 copy_to_array_functor_inc(const copy_to_array_functor_inc&); 04681 copy_to_array_functor_inc& operator=(const copy_to_array_functor_inc&); 04682 private: 04683 B* bp_; 04684 unsigned base_idx_; 04685 }; 04686 04687 04696 template<typename T,typename B> unsigned bit_list_4(T w, B* bits) 04697 { 04698 copy_to_array_functor<B> func(bits); 04699 bit_for_each_4(w, func); 04700 return (unsigned)(func.ptr() - bits); 04701 } 04702 04711 template<typename T,typename B> unsigned bit_list(T w, B* bits) 04712 { 04713 copy_to_array_functor<B> func(bits); 04714 bit_for_each(w, func); 04715 return (unsigned)(func.ptr() - bits); 04716 } 04717 04718 04723 inline 04724 bm::set_representation best_representation(unsigned bit_count, 04725 unsigned total_possible_bitcount, 04726 unsigned gap_count, 04727 unsigned block_size) 04728 { 04729 unsigned arr_size = sizeof(bm::gap_word_t) * bit_count + sizeof(bm::gap_word_t); 04730 unsigned gap_size = sizeof(bm::gap_word_t) * gap_count + sizeof(bm::gap_word_t); 04731 unsigned inv_arr_size = sizeof(bm::gap_word_t) * (total_possible_bitcount - bit_count) + sizeof(bm::gap_word_t); 04732 04733 if ((gap_size < block_size) && (gap_size < arr_size) && (gap_size < inv_arr_size)) 04734 { 04735 return bm::set_gap; 04736 } 04737 04738 if (arr_size < inv_arr_size) 04739 { 04740 if ((arr_size < block_size) && (arr_size < gap_size)) 04741 { 04742 return bm::set_array1; 04743 } 04744 } 04745 else 04746 { 04747 if ((inv_arr_size < block_size) && (inv_arr_size < gap_size)) 04748 { 04749 return bm::set_array0; 04750 } 04751 } 04752 return bm::set_bitset; 04753 } 04754 04759 template<typename T> T bit_convert_to_arr(T* BMRESTRICT dest, 04760 const unsigned* BMRESTRICT src, 04761 bm::id_t bits, 04762 unsigned dest_len, 04763 unsigned mask = 0) 04764 { 04765 T* BMRESTRICT pcurr = dest; 04766 for(unsigned bit_idx=0; bit_idx < bits; ++src,bit_idx += sizeof(*src) * 8) 04767 { 04768 unsigned val = *src ^ mask; // possible to invert value by XOR 0xFF.. 04769 if (val == 0) 04770 { 04771 continue; 04772 } 04773 if (pcurr + sizeof(val)*8 >= dest + dest_len) // insufficient space 04774 { 04775 return 0; 04776 } 04777 04778 copy_to_array_functor_inc<T> func(pcurr, bit_idx); 04779 bit_for_each_4(val, func); 04780 unsigned word_bit_cnt = func.ptr() - pcurr; 04781 pcurr += word_bit_cnt; 04782 04783 } // for 04784 return (T)(pcurr - dest); 04785 } 04786 04787 04788 04789 04796 /* 04797 template<typename T> T bit_convert_to_arr2(T* BMRESTRICT dest, 04798 const unsigned* BMRESTRICT src, 04799 bm::id_t bits, 04800 unsigned dest_len) 04801 { 04802 register T* BMRESTRICT pcurr = dest; 04803 T* BMRESTRICT end = dest + dest_len; 04804 unsigned bit_idx = 0; 04805 04806 do 04807 { 04808 register unsigned val = *src; 04809 // We can skip if *src == 0 04810 04811 while (val == 0) 04812 { 04813 bit_idx += sizeof(*src) * 8; 04814 if (bit_idx >= bits) 04815 { 04816 return (T)(pcurr - dest); 04817 } 04818 val = *(++src); 04819 } 04820 04821 if (pcurr + sizeof(val)*8 > end) // insufficient space 04822 { 04823 return 0; 04824 } 04825 04826 for (int i = 0; i < 32; i+=4) 04827 { 04828 if (val & 1) 04829 *pcurr++ = bit_idx; 04830 val >>= 1; ++bit_idx; 04831 if (val & 1) 04832 *pcurr++ = bit_idx; 04833 val >>= 1; ++bit_idx; 04834 if (val & 1) 04835 *pcurr++ = bit_idx; 04836 val >>= 1; ++bit_idx; 04837 if (val & 1) 04838 *pcurr++ = bit_idx; 04839 val >>= 1; ++bit_idx; 04840 } 04841 04842 if (bits <= bit_idx) 04843 break; 04844 04845 val = *(++src); 04846 } while (1); 04847 04848 return (T)(pcurr - dest); 04849 } 04850 */ 04851 04852 04857 template<typename T> 04858 unsigned gap_overhead(const T* length, 04859 const T* length_end, 04860 const T* glevel_len) 04861 { 04862 BM_ASSERT(length && length_end && glevel_len); 04863 04864 unsigned overhead = 0; 04865 for (;length < length_end; ++length) 04866 { 04867 unsigned len = *length; 04868 int level = gap_calc_level(len, glevel_len); 04869 BM_ASSERT(level >= 0 && level < (int)bm::gap_levels); 04870 unsigned capacity = glevel_len[level]; 04871 BM_ASSERT(capacity >= len); 04872 overhead += capacity - len; 04873 } 04874 return overhead; 04875 } 04876 04877 04884 template<typename T> 04885 bool improve_gap_levels(const T* length, 04886 const T* length_end, 04887 T* glevel_len) 04888 { 04889 BM_ASSERT(length && length_end && glevel_len); 04890 04891 size_t lsize = length_end - length; 04892 04893 BM_ASSERT(lsize); 04894 04895 gap_word_t max_len = 0; 04896 unsigned i; 04897 for (i = 0; i < lsize; ++i) 04898 { 04899 if (length[i] > max_len) 04900 max_len = length[i]; 04901 } 04902 if (max_len < 5 || lsize <= bm::gap_levels) 04903 { 04904 glevel_len[0] = max_len + 4; 04905 for (i = 1; i < bm::gap_levels; ++i) 04906 { 04907 glevel_len[i] = bm::gap_max_buff_len; 04908 } 04909 return true; 04910 } 04911 04912 glevel_len[bm::gap_levels-1] = max_len + 5; 04913 04914 unsigned min_overhead = gap_overhead(length, length_end, glevel_len); 04915 bool is_improved = false; 04916 gap_word_t prev_value = glevel_len[bm::gap_levels-1]; 04917 04918 // main problem solving loop 04919 // 04920 for (i = bm::gap_levels-2; ; --i) 04921 { 04922 unsigned opt_len = 0; 04923 unsigned j; 04924 bool imp_flag = false; 04925 gap_word_t gap_saved_value = glevel_len[i]; 04926 for (j = 0; j < lsize; ++j) 04927 { 04928 glevel_len[i] = length[j]+4; 04929 unsigned ov = gap_overhead(length, length_end, glevel_len); 04930 if (ov <= min_overhead) 04931 { 04932 min_overhead = ov; 04933 opt_len = length[j]+4; 04934 imp_flag = true; 04935 } 04936 } 04937 if (imp_flag) 04938 { 04939 glevel_len[i] = (T)opt_len; // length[opt_idx]+4; 04940 is_improved = true; 04941 } 04942 else 04943 { 04944 glevel_len[i] = gap_saved_value; 04945 } 04946 if (i == 0) 04947 break; 04948 prev_value = glevel_len[i]; 04949 } 04950 // 04951 // Remove duplicates 04952 // 04953 04954 T val = *glevel_len; 04955 T* gp = glevel_len; 04956 T* res = glevel_len; 04957 for (i = 0; i < bm::gap_levels; ++i) 04958 { 04959 if (val != *gp) 04960 { 04961 val = *gp; 04962 *++res = val; 04963 } 04964 ++gp; 04965 } 04966 04967 // Filling the "unused" part with max. possible value 04968 while (++res < (glevel_len + bm::gap_levels)) 04969 { 04970 *res = bm::gap_max_buff_len; 04971 } 04972 04973 return is_improved; 04974 04975 } 04976 04977 04978 04984 class bitblock_get_adapter 04985 { 04986 public: 04987 bitblock_get_adapter(const bm::word_t* bit_block) : b_(bit_block) {} 04988 04989 BMFORCEINLINE 04990 bm::word_t get_32() { return *b_++; } 04991 private: 04992 const bm::word_t* b_; 04993 }; 04994 04995 05000 class bitblock_store_adapter 05001 { 05002 public: 05003 bitblock_store_adapter(bm::word_t* bit_block) : b_(bit_block) {} 05004 BMFORCEINLINE 05005 void push_back(bm::word_t w) { *b_++ = w; } 05006 private: 05007 bm::word_t* b_; 05008 }; 05009 05014 class bitblock_sum_adapter 05015 { 05016 public: 05017 bitblock_sum_adapter() : sum_(0) {} 05018 BMFORCEINLINE 05019 void push_back(bm::word_t w) { this->sum_+= w; } 05021 bm::word_t sum() const { return this->sum_; } 05022 private: 05023 bm::word_t sum_; 05024 }; 05025 05031 template<class DEC> class decoder_range_adapter 05032 { 05033 public: 05034 decoder_range_adapter(DEC& dec, unsigned from_idx, unsigned to_idx) 05035 : decoder_(dec), 05036 from_(from_idx), 05037 to_(to_idx), 05038 cnt_(0) 05039 {} 05040 05041 bm::word_t get_32() 05042 { 05043 if (cnt_ < from_ || cnt_ > to_) 05044 { 05045 ++cnt_; return 0; 05046 } 05047 ++cnt_; 05048 return decoder_.get_32(); 05049 } 05050 05051 private: 05052 DEC& decoder_; 05053 unsigned from_; 05054 unsigned to_; 05055 unsigned cnt_; 05056 }; 05057 05058 05063 template<class It1, class It2, class BinaryOp, class Encoder> 05064 void bit_recomb(It1& it1, It2& it2, 05065 BinaryOp& op, 05066 Encoder& enc, 05067 unsigned block_size = bm::set_block_size) 05068 { 05069 for (unsigned i = 0; i < block_size; ++i) 05070 { 05071 bm::word_t w1 = it1.get_32(); 05072 bm::word_t w2 = it2.get_32(); 05073 bm::word_t w = op(w1, w2); 05074 enc.push_back( w ); 05075 } // for 05076 } 05077 05079 template<typename W> struct bit_AND 05080 { 05081 W operator()(W w1, W w2) { return w1 & w2; } 05082 }; 05083 05085 template<typename W> struct bit_OR 05086 { 05087 W operator()(W w1, W w2) { return w1 | w2; } 05088 }; 05089 05091 template<typename W> struct bit_SUB 05092 { 05093 W operator()(W w1, W w2) { return w1 & ~w2; } 05094 }; 05095 05097 template<typename W> struct bit_XOR 05098 { 05099 W operator()(W w1, W w2) { return w1 ^ w2; } 05100 }; 05101 05103 template<typename W> struct bit_ASSIGN 05104 { 05105 W operator()(W w1, W w2) { return w2; } 05106 }; 05107 05109 template<typename W> struct bit_COUNT 05110 { 05111 W operator()(W w1, W w2) 05112 { 05113 w1 = 0; 05114 BM_INCWORD_BITCOUNT(w1, w2); 05115 return w1; 05116 } 05117 }; 05118 05120 template<typename W> struct bit_COUNT_AND 05121 { 05122 W operator()(W w1, W w2) 05123 { 05124 W r = 0; 05125 BM_INCWORD_BITCOUNT(r, w1 & w2); 05126 return r; 05127 } 05128 }; 05129 05131 template<typename W> struct bit_COUNT_XOR 05132 { 05133 W operator()(W w1, W w2) 05134 { 05135 W r = 0; 05136 BM_INCWORD_BITCOUNT(r, w1 ^ w2); 05137 return r; 05138 } 05139 }; 05140 05142 template<typename W> struct bit_COUNT_OR 05143 { 05144 W operator()(W w1, W w2) 05145 { 05146 W r = 0; 05147 BM_INCWORD_BITCOUNT(r, w1 | w2); 05148 return r; 05149 } 05150 }; 05151 05152 05154 template<typename W> struct bit_COUNT_SUB_AB 05155 { 05156 W operator()(W w1, W w2) 05157 { 05158 W r = 0; 05159 BM_INCWORD_BITCOUNT(r, w1 & (~w2)); 05160 return r; 05161 } 05162 }; 05163 05165 template<typename W> struct bit_COUNT_SUB_BA 05166 { 05167 W operator()(W w1, W w2) 05168 { 05169 W r = 0; 05170 BM_INCWORD_BITCOUNT(r, w2 & (~w1)); 05171 return r; 05172 } 05173 }; 05174 05176 template<typename W> struct bit_COUNT_A 05177 { 05178 W operator()(W w1, W w2) 05179 { 05180 W r = 0; 05181 BM_INCWORD_BITCOUNT(r, w1); 05182 return r; 05183 } 05184 }; 05185 05187 template<typename W> struct bit_COUNT_B 05188 { 05189 W operator()(W w1, W w2) 05190 { 05191 W r = 0; 05192 BM_INCWORD_BITCOUNT(r, w2); 05193 return r; 05194 } 05195 }; 05196 05197 typedef 05198 void (*gap_operation_to_bitset_func_type)(unsigned*, 05199 const gap_word_t*); 05200 05201 typedef 05202 gap_word_t* (*gap_operation_func_type)(const gap_word_t* BMRESTRICT, 05203 const gap_word_t* BMRESTRICT, 05204 gap_word_t* BMRESTRICT, 05205 unsigned& ); 05206 05207 typedef 05208 bm::id_t (*bit_operation_count_func_type)(const bm::word_t* BMRESTRICT, 05209 const bm::word_t* BMRESTRICT, 05210 const bm::word_t* BMRESTRICT); 05211 05212 05213 template<bool T> 05214 struct operation_functions 05215 { 05216 static 05217 gap_operation_to_bitset_func_type gap2bit_table_[bm::set_END]; 05218 static 05219 gap_operation_func_type gapop_table_[bm::set_END]; 05220 static 05221 bit_operation_count_func_type bit_op_count_table_[bm::set_END]; 05222 05223 static 05224 gap_operation_to_bitset_func_type gap_op_to_bit(unsigned i) 05225 { 05226 return gap2bit_table_[i]; 05227 } 05228 05229 static 05230 gap_operation_func_type gap_operation(unsigned i) 05231 { 05232 return gapop_table_[i]; 05233 } 05234 05235 static 05236 bit_operation_count_func_type bit_operation_count(unsigned i) 05237 { 05238 return bit_op_count_table_[i]; 05239 } 05240 }; 05241 05242 template<bool T> 05243 gap_operation_to_bitset_func_type 05244 operation_functions<T>::gap2bit_table_[bm::set_END] = { 05245 &gap_and_to_bitset<bm::gap_word_t>, // set_AND 05246 &gap_add_to_bitset<bm::gap_word_t>, // set_OR 05247 &gap_sub_to_bitset<bm::gap_word_t>, // set_SUB 05248 &gap_xor_to_bitset<bm::gap_word_t>, // set_XOR 05249 0 05250 }; 05251 05252 template<bool T> 05253 gap_operation_func_type 05254 operation_functions<T>::gapop_table_[bm::set_END] = { 05255 &gap_operation_and, // set_AND 05256 &gap_operation_or, // set_OR 05257 &gap_operation_sub, // set_SUB 05258 &gap_operation_xor, // set_XOR 05259 0 05260 }; 05261 05262 05263 template<bool T> 05264 bit_operation_count_func_type 05265 operation_functions<T>::bit_op_count_table_[bm::set_END] = { 05266 0, // set_AND 05267 0, // set_OR 05268 0, // set_SUB 05269 0, // set_XOR 05270 0, // set_ASSIGN 05271 0, // set_COUNT 05272 &bit_operation_and_count, // set_COUNT_AND 05273 &bit_operation_xor_count, // set_COUNT_XOR 05274 &bit_operation_or_count, // set_COUNT_OR 05275 &bit_operation_sub_count, // set_COUNT_SUB_AB 05276 &bit_operation_sub_count_inv, // set_COUNT_SUB_BA 05277 0, // set_COUNT_A 05278 0, // set_COUNT_B 05279 }; 05280 05281 } // namespace bm 05282 05283 #endif