dlvhex
2.5.0
|
00001 #ifndef BMALGO_IMPL__H__INCLUDED__ 00002 #define BMALGO_IMPL__H__INCLUDED__ 00003 /* 00004 Copyright(c) 2002-2010 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com) 00005 00006 Permission is hereby granted, free of charge, to any person 00007 obtaining a copy of this software and associated documentation 00008 files (the "Software"), to deal in the Software without restriction, 00009 including without limitation the rights to use, copy, modify, merge, 00010 publish, distribute, sublicense, and/or sell copies of the Software, 00011 and to permit persons to whom the Software is furnished to do so, 00012 subject to the following conditions: 00013 00014 The above copyright notice and this permission notice shall be included 00015 in all copies or substantial portions of the Software. 00016 00017 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 00019 OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00020 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 00021 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 00022 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 00023 OTHER DEALINGS IN THE SOFTWARE. 00024 00025 For more information please visit: http://bmagic.sourceforge.net 00026 00027 */ 00028 00029 #ifdef _MSC_VER 00030 #pragma warning( push ) 00031 #pragma warning( disable : 4311 4312 4127) 00032 #endif 00033 00034 #include "bmdef.h" 00035 #include "bmutil.h" 00036 00037 namespace bm 00038 { 00039 00055 enum distance_metric 00056 { 00057 COUNT_AND = set_COUNT_AND, 00058 COUNT_XOR = set_COUNT_XOR, 00059 COUNT_OR = set_COUNT_OR, 00060 COUNT_SUB_AB = set_COUNT_SUB_AB, 00061 COUNT_SUB_BA = set_COUNT_SUB_BA, 00062 COUNT_A = set_COUNT_A, 00063 COUNT_B = set_COUNT_B 00064 }; 00065 00070 inline 00071 distance_metric operation2metric(set_operation op) 00072 { 00073 BM_ASSERT(is_const_set_operation(op)); 00074 if (op == set_COUNT) op = set_COUNT_B; 00075 // distance metric is created as a set operation sub-class 00076 // simple cast will work to convert 00077 return (distance_metric) op; 00078 } 00079 00085 struct distance_metric_descriptor 00086 { 00087 distance_metric metric; 00088 bm::id_t result; 00089 00090 distance_metric_descriptor(distance_metric m) 00091 : metric(m), 00092 result(0) 00093 {} 00094 distance_metric_descriptor() 00095 : metric(bm::COUNT_XOR), 00096 result(0) 00097 {} 00098 00102 void reset() 00103 { 00104 result = 0; 00105 } 00106 }; 00107 00108 00115 inline 00116 void combine_count_operation_with_block(const bm::word_t* blk, 00117 const bm::word_t* arg_blk, 00118 distance_metric_descriptor* dmit, 00119 distance_metric_descriptor* dmit_end) 00120 00121 { 00122 gap_word_t* g1 = BMGAP_PTR(blk); 00123 gap_word_t* g2 = BMGAP_PTR(arg_blk); 00124 00125 unsigned gap = BM_IS_GAP(blk); 00126 unsigned arg_gap = BM_IS_GAP(arg_blk); 00127 00128 if (gap) // first block GAP-type 00129 { 00130 if (arg_gap) // both blocks GAP-type 00131 { 00132 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it) 00133 { 00134 distance_metric_descriptor& dmd = *it; 00135 00136 switch (dmd.metric) 00137 { 00138 case bm::COUNT_AND: 00139 dmd.result += gap_count_and(g1, g2); 00140 break; 00141 case bm::COUNT_OR: 00142 dmd.result += gap_count_or(g1, g2); 00143 break; 00144 case bm::COUNT_SUB_AB: 00145 dmd.result += gap_count_sub(g1, g2); 00146 break; 00147 case bm::COUNT_SUB_BA: 00148 dmd.result += gap_count_sub(g2, g1); 00149 break; 00150 case bm::COUNT_XOR: 00151 dmd.result += gap_count_xor(g1, g2); 00152 break; 00153 case bm::COUNT_A: 00154 dmd.result += gap_bit_count(g1); 00155 break; 00156 case bm::COUNT_B: 00157 dmd.result += gap_bit_count(g2); 00158 break; 00159 } // switch 00160 00161 } // for it 00162 00163 return; 00164 00165 } 00166 else // first block - GAP, argument - BITSET 00167 { 00168 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it) 00169 { 00170 distance_metric_descriptor& dmd = *it; 00171 00172 switch (dmd.metric) 00173 { 00174 case bm::COUNT_AND: 00175 if (arg_blk) 00176 dmd.result += gap_bitset_and_count(arg_blk, g1); 00177 break; 00178 case bm::COUNT_OR: 00179 if (!arg_blk) 00180 dmd.result += gap_bit_count(g1); 00181 else 00182 dmd.result += gap_bitset_or_count(arg_blk, g1); 00183 break; 00184 case bm::COUNT_SUB_AB: 00185 { 00186 bm::word_t BM_ALIGN16 temp_bit_blk[bm::set_block_size] BM_ALIGN16ATTR; 00187 00188 gap_convert_to_bitset((bm::word_t*) temp_bit_blk, g1); 00189 dmd.result += 00190 bit_operation_sub_count((bm::word_t*)temp_bit_blk, 00191 ((bm::word_t*)temp_bit_blk) + bm::set_block_size, 00192 arg_blk); 00193 } 00194 break; 00195 case bm::COUNT_SUB_BA: 00196 dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB 00197 combine_count_operation_with_block(arg_blk, 00198 blk, 00199 it, it+1); 00200 dmd.metric = bm::COUNT_SUB_BA; // restore status quo 00201 break; 00202 case bm::COUNT_XOR: 00203 if (!arg_blk) 00204 dmd.result += gap_bit_count(g1); 00205 else 00206 dmd.result += gap_bitset_xor_count(arg_blk, g1); 00207 break; 00208 case bm::COUNT_A: 00209 if (g1) 00210 dmd.result += gap_bit_count(g1); 00211 break; 00212 case bm::COUNT_B: 00213 if (arg_blk) 00214 { 00215 dmd.result += 00216 bit_block_calc_count(arg_blk, 00217 arg_blk + bm::set_block_size); 00218 } 00219 break; 00220 } // switch 00221 00222 } // for it 00223 00224 return; 00225 00226 } 00227 } 00228 else // first block is BITSET-type 00229 { 00230 if (arg_gap) // second argument block is GAP-type 00231 { 00232 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it) 00233 { 00234 distance_metric_descriptor& dmd = *it; 00235 00236 switch (dmd.metric) 00237 { 00238 case bm::COUNT_AND: 00239 if (blk) 00240 dmd.result += gap_bitset_and_count(blk, g2); 00241 break; 00242 case bm::COUNT_OR: 00243 if (!blk) 00244 dmd.result += gap_bit_count(g2); 00245 else 00246 dmd.result += gap_bitset_or_count(blk, g2); 00247 break; 00248 case bm::COUNT_SUB_AB: 00249 if (blk) 00250 dmd.result += gap_bitset_sub_count(blk, g2); 00251 break; 00252 case bm::COUNT_SUB_BA: 00253 dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB 00254 combine_count_operation_with_block(arg_blk, 00255 //arg_gap, 00256 blk, 00257 //gap, 00258 it, it+1); 00259 dmd.metric = bm::COUNT_SUB_BA; // restore status quo 00260 break; 00261 case bm::COUNT_XOR: 00262 if (!blk) 00263 dmd.result += gap_bit_count(g2); 00264 else 00265 dmd.result += gap_bitset_xor_count(blk, g2); 00266 break; 00267 case bm::COUNT_A: 00268 if (blk) 00269 { 00270 dmd.result += 00271 bit_block_calc_count(blk, 00272 blk + bm::set_block_size); 00273 } 00274 break; 00275 case bm::COUNT_B: 00276 if (g2) 00277 dmd.result += gap_bit_count(g2); 00278 break; 00279 } // switch 00280 00281 } // for it 00282 00283 return; 00284 } 00285 } 00286 00287 // -------------------------------------------- 00288 // 00289 // Here we combine two plain bitblocks 00290 00291 00292 for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it) 00293 { 00294 distance_metric_descriptor& dmd = *it; 00295 bit_operation_count_func_type gfunc = 00296 operation_functions<true>::bit_operation_count(dmd.metric); 00297 if (gfunc) 00298 { 00299 dmd.result += (*gfunc)(blk, blk + bm::set_block_size, arg_blk); 00300 } 00301 else 00302 { 00303 switch (dmd.metric) 00304 { 00305 case bm::COUNT_A: 00306 if (blk) 00307 dmd.result += bit_block_calc_count(blk, blk + bm::set_block_size); 00308 break; 00309 case bm::COUNT_B: 00310 if (arg_blk) 00311 dmd.result += 00312 bit_block_calc_count(arg_blk, 00313 arg_blk + bm::set_block_size); 00314 break; 00315 default: 00316 BM_ASSERT(0); 00317 } // switch 00318 } 00319 00320 } // for it 00321 } 00322 00328 inline 00329 unsigned combine_count_and_operation_with_block(const bm::word_t* blk, 00330 const bm::word_t* arg_blk) 00331 { 00332 unsigned gap = BM_IS_GAP(blk); 00333 unsigned arg_gap = BM_IS_GAP(arg_blk); 00334 if (gap) // first block GAP-type 00335 { 00336 if (arg_gap) // both blocks GAP-type 00337 { 00338 return gap_count_and(BMGAP_PTR(blk), BMGAP_PTR(arg_blk)); 00339 } 00340 else // first block - GAP, argument - BITSET 00341 { 00342 return gap_bitset_and_count(arg_blk, BMGAP_PTR(blk)); 00343 } 00344 } 00345 else // first block is BITSET-type 00346 { 00347 if (arg_gap) // second argument block is GAP-type 00348 { 00349 return gap_bitset_and_count(blk, BMGAP_PTR(arg_blk)); 00350 } 00351 } 00352 00353 // -------------------------------------------- 00354 // Here we combine two plain bitblocks 00355 00356 return bit_operation_and_count(blk, blk + (bm::set_block_size), arg_blk); 00357 } 00358 00359 00365 inline 00366 void combine_any_operation_with_block(const bm::word_t* blk, 00367 unsigned gap, 00368 const bm::word_t* arg_blk, 00369 int arg_gap, 00370 distance_metric_descriptor* dmit, 00371 distance_metric_descriptor* dmit_end) 00372 00373 { 00374 gap_word_t* res=0; 00375 00376 gap_word_t* g1 = BMGAP_PTR(blk); 00377 gap_word_t* g2 = BMGAP_PTR(arg_blk); 00378 00379 if (gap) // first block GAP-type 00380 { 00381 if (arg_gap) // both blocks GAP-type 00382 { 00383 gap_word_t tmp_buf[bm::gap_max_buff_len * 3]; // temporary result 00384 00385 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it) 00386 { 00387 distance_metric_descriptor& dmd = *it; 00388 if (dmd.result) 00389 { 00390 continue; 00391 } 00392 res = 0; 00393 unsigned dsize = 0; 00394 switch (dmd.metric) 00395 { 00396 case bm::COUNT_AND: 00397 dmd.result += gap_operation_any_and(g1, g2); 00398 break; 00399 case bm::COUNT_OR: 00400 res = gap_operation_or(g1, g2, tmp_buf, dsize); 00401 break; 00402 case bm::COUNT_SUB_AB: 00403 dmd.result += gap_operation_any_sub(g1, g2); 00404 break; 00405 case bm::COUNT_SUB_BA: 00406 dmd.result += gap_operation_any_sub(g2, g1); 00407 break; 00408 case bm::COUNT_XOR: 00409 dmd.result += gap_operation_any_xor(g1, g2); 00410 break; 00411 case bm::COUNT_A: 00412 res = g1; 00413 break; 00414 case bm::COUNT_B: 00415 res = g2; 00416 break; 00417 } // switch 00418 if (res) 00419 dmd.result += !gap_is_all_zero(res, bm::gap_max_bits); 00420 00421 } // for it 00422 00423 return; 00424 00425 } 00426 else // first block - GAP, argument - BITSET 00427 { 00428 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it) 00429 { 00430 distance_metric_descriptor& dmd = *it; 00431 if (dmd.result) 00432 { 00433 continue; 00434 } 00435 00436 switch (dmd.metric) 00437 { 00438 case bm::COUNT_AND: 00439 if (arg_blk) 00440 dmd.result += gap_bitset_and_any(arg_blk, g1); 00441 break; 00442 case bm::COUNT_OR: 00443 if (!arg_blk) 00444 dmd.result += !gap_is_all_zero(g1, bm::gap_max_bits); 00445 else 00446 dmd.result += gap_bitset_or_any(arg_blk, g1); 00447 break; 00448 case bm::COUNT_SUB_AB: 00449 { 00450 bm::word_t BM_ALIGN16 temp_blk[bm::set_block_size] BM_ALIGN16ATTR; 00451 gap_convert_to_bitset((bm::word_t*) temp_blk, g1); 00452 dmd.result += 00453 bit_operation_sub_any((bm::word_t*)temp_blk, 00454 ((bm::word_t*)temp_blk) + bm::set_block_size, 00455 arg_blk); 00456 } 00457 break; 00458 case bm::COUNT_SUB_BA: 00459 dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB 00460 combine_any_operation_with_block(arg_blk, 00461 arg_gap, 00462 blk, 00463 gap, 00464 it, it+1); 00465 dmd.metric = bm::COUNT_SUB_BA; // restore status quo 00466 break; 00467 case bm::COUNT_XOR: 00468 if (!arg_blk) 00469 dmd.result += !gap_is_all_zero(g1, bm::gap_max_bits); 00470 else 00471 dmd.result += gap_bitset_xor_any(arg_blk, g1); 00472 break; 00473 case bm::COUNT_A: 00474 if (g1) 00475 dmd.result += !gap_is_all_zero(g1, bm::gap_max_bits); 00476 break; 00477 case bm::COUNT_B: 00478 if (arg_blk) 00479 { 00480 dmd.result += 00481 !bit_is_all_zero((bm::wordop_t*)arg_blk, 00482 (bm::wordop_t*)(arg_blk + bm::set_block_size)); 00483 } 00484 break; 00485 } // switch 00486 00487 } // for it 00488 00489 return; 00490 00491 } 00492 } 00493 else // first block is BITSET-type 00494 { 00495 if (arg_gap) // second argument block is GAP-type 00496 { 00497 for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it) 00498 { 00499 distance_metric_descriptor& dmd = *it; 00500 if (dmd.result) 00501 { 00502 continue; 00503 } 00504 00505 switch (dmd.metric) 00506 { 00507 case bm::COUNT_AND: 00508 if (blk) 00509 dmd.result += gap_bitset_and_any(blk, g2); 00510 break; 00511 case bm::COUNT_OR: 00512 if (!blk) 00513 dmd.result += !gap_is_all_zero(g2, bm::gap_max_bits); 00514 else 00515 dmd.result += gap_bitset_or_any(blk, g2); 00516 break; 00517 case bm::COUNT_SUB_AB: 00518 if (blk) 00519 dmd.result += gap_bitset_sub_any(blk, g2); 00520 break; 00521 case bm::COUNT_SUB_BA: 00522 dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB 00523 combine_any_operation_with_block(arg_blk, 00524 arg_gap, 00525 blk, 00526 gap, 00527 it, it+1); 00528 dmd.metric = bm::COUNT_SUB_BA; // restore status quo 00529 break; 00530 case bm::COUNT_XOR: 00531 if (!blk) 00532 dmd.result += !gap_is_all_zero(g2, bm::gap_max_bits); 00533 else 00534 dmd.result += gap_bitset_xor_any(blk, g2); 00535 break; 00536 case bm::COUNT_A: 00537 if (blk) 00538 { 00539 dmd.result+= 00540 !bit_is_all_zero((bm::wordop_t*)blk, 00541 (bm::wordop_t*)blk + bm::set_block_size); 00542 } 00543 break; 00544 case bm::COUNT_B: 00545 if (g2) 00546 dmd.result += !gap_is_all_zero(g2, bm::gap_max_bits); 00547 break; 00548 } // switch 00549 00550 } // for it 00551 00552 return; 00553 } 00554 } 00555 00556 // -------------------------------------------- 00557 // 00558 // Here we combine two plain bitblocks 00559 00560 const bm::word_t* blk_end; 00561 const bm::word_t* arg_end; 00562 00563 blk_end = blk + (bm::set_block_size); 00564 arg_end = arg_blk + (bm::set_block_size); 00565 00566 for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it) 00567 { 00568 distance_metric_descriptor& dmd = *it; 00569 if (dmd.result) 00570 { 00571 continue; 00572 } 00573 00574 switch (dmd.metric) 00575 { 00576 case bm::COUNT_AND: 00577 dmd.result += 00578 bit_operation_and_any(blk, blk_end, arg_blk); 00579 break; 00580 case bm::COUNT_OR: 00581 dmd.result += 00582 bit_operation_or_any(blk, blk_end, arg_blk); 00583 break; 00584 case bm::COUNT_SUB_AB: 00585 dmd.result += 00586 bit_operation_sub_any(blk, blk_end, arg_blk); 00587 break; 00588 case bm::COUNT_SUB_BA: 00589 dmd.result += 00590 bit_operation_sub_any(arg_blk, arg_end, blk); 00591 break; 00592 case bm::COUNT_XOR: 00593 dmd.result += 00594 bit_operation_xor_any(blk, blk_end, arg_blk); 00595 break; 00596 case bm::COUNT_A: 00597 if (blk) 00598 dmd.result += !bit_is_all_zero((bm::wordop_t*)blk, 00599 (bm::wordop_t*)blk_end); 00600 break; 00601 case bm::COUNT_B: 00602 if (arg_blk) 00603 dmd.result += !bit_is_all_zero((bm::wordop_t*)arg_blk, 00604 (bm::wordop_t*)arg_end); 00605 break; 00606 } // switch 00607 00608 } // for it 00609 } 00610 00611 00612 00618 inline 00619 unsigned combine_count_operation_with_block(const bm::word_t* blk, 00620 const bm::word_t* arg_blk, 00621 distance_metric metric) 00622 { 00623 distance_metric_descriptor dmd(metric); 00624 combine_count_operation_with_block(blk, //gap, 00625 arg_blk, //arg_gap, 00626 //temp_blk, 00627 &dmd, &dmd+1); 00628 return dmd.result; 00629 } 00630 00631 00637 inline 00638 unsigned combine_any_operation_with_block(const bm::word_t* blk, 00639 unsigned gap, 00640 const bm::word_t* arg_blk, 00641 int arg_gap, 00642 distance_metric metric) 00643 { 00644 distance_metric_descriptor dmd(metric); 00645 combine_any_operation_with_block(blk, gap, 00646 arg_blk, arg_gap, 00647 &dmd, &dmd+1); 00648 return dmd.result; 00649 } 00650 00658 inline 00659 void distance_stage(const distance_metric_descriptor* dmit, 00660 const distance_metric_descriptor* dmit_end, 00661 bool* is_all_and) 00662 { 00663 for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it) 00664 { 00665 if (it->metric != bm::COUNT_AND) 00666 { 00667 *is_all_and = false; 00668 } 00669 } // for 00670 } 00671 00692 template<class BV> 00693 void distance_operation(const BV& bv1, 00694 const BV& bv2, 00695 distance_metric_descriptor* dmit, 00696 distance_metric_descriptor* dmit_end) 00697 { 00698 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager(); 00699 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager(); 00700 00701 bool is_all_and = true; // flag is distance operation is just COUNT_AND 00702 distance_stage(dmit, dmit_end, &is_all_and); 00703 00704 bm::word_t*** blk_root = bman1.get_rootblock(); 00705 unsigned block_idx = 0; 00706 unsigned i, j; 00707 00708 const bm::word_t* blk; 00709 const bm::word_t* arg_blk; 00710 00711 BM_SET_MMX_GUARD 00712 00713 unsigned effective_top_block_size = bman1.effective_top_block_size(); 00714 unsigned ebs2 = bman2.effective_top_block_size(); 00715 if (ebs2 > effective_top_block_size) 00716 effective_top_block_size = ebs2; 00717 00718 for (i = 0; i < effective_top_block_size; ++i) 00719 { 00720 bm::word_t** blk_blk = blk_root[i]; 00721 00722 if (blk_blk == 0) // not allocated 00723 { 00724 // AND operation requested - we can skip this portion here 00725 if (is_all_and) 00726 { 00727 block_idx += bm::set_array_size; 00728 continue; 00729 } 00730 const bm::word_t* const* bvbb = bman2.get_topblock(i); 00731 if (bvbb == 0) 00732 { 00733 block_idx += bm::set_array_size; 00734 continue; 00735 } 00736 00737 blk = 0; 00738 for (j = 0; j < bm::set_array_size; ++j,++block_idx) 00739 { 00740 arg_blk = bman2.get_block(i, j); 00741 if (!arg_blk) 00742 continue; 00743 combine_count_operation_with_block(blk, 00744 arg_blk, 00745 dmit, dmit_end); 00746 } // for j 00747 continue; 00748 } 00749 00750 for (j = 0; j < bm::set_array_size; ++j, ++block_idx) 00751 { 00752 blk = blk_blk[j]; 00753 if (blk == 0 && is_all_and) 00754 continue; 00755 00756 arg_blk = bman2.get_block(i, j); 00757 00758 if (blk == 0 && arg_blk == 0) 00759 continue; 00760 00761 combine_count_operation_with_block(blk, 00762 arg_blk, 00763 dmit, dmit_end); 00764 00765 00766 } // for j 00767 00768 } // for i 00769 } 00770 00771 00782 template<class BV> 00783 unsigned distance_and_operation(const BV& bv1, 00784 const BV& bv2) 00785 { 00786 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager(); 00787 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager(); 00788 00789 bm::word_t*** blk_root = bman1.get_rootblock(); 00790 bm::word_t*** blk_root_arg = bman2.get_rootblock(); 00791 00792 unsigned count = 0; 00793 00794 BM_SET_MMX_GUARD 00795 00796 unsigned effective_top_block_size = 00797 bm::min_value(bman1.effective_top_block_size(), 00798 bman2.effective_top_block_size()); 00799 00800 for (unsigned i = 0; i < effective_top_block_size; ++i) 00801 { 00802 bm::word_t** blk_blk; 00803 bm::word_t** blk_blk_arg; 00804 if ((blk_blk = blk_root[i]) == 0 || (blk_blk_arg= blk_root_arg[i]) == 0) 00805 { 00806 continue; 00807 } 00808 for (unsigned j = 0; j < bm::set_array_size; j+=4) 00809 { 00810 (blk_blk[j] && blk_blk_arg[j]) ? 00811 count += combine_count_and_operation_with_block(blk_blk[j],blk_blk_arg[j]) 00812 :0; 00813 (blk_blk[j+1] && blk_blk_arg[j+1]) ? 00814 count += combine_count_and_operation_with_block(blk_blk[j+1],blk_blk_arg[j+1]) 00815 :0; 00816 (blk_blk[j+2] && blk_blk_arg[j+2]) ? 00817 count += combine_count_and_operation_with_block(blk_blk[j+2],blk_blk_arg[j+2]) 00818 :0; 00819 (blk_blk[j+3] && blk_blk_arg[j+3]) ? 00820 count += combine_count_and_operation_with_block(blk_blk[j+3],blk_blk_arg[j+3]) 00821 :0; 00822 } // for j 00823 00824 } // for i 00825 return count; 00826 } 00827 00828 00829 00830 00852 template<class BV> 00853 void distance_operation_any(const BV& bv1, 00854 const BV& bv2, 00855 distance_metric_descriptor* dmit, 00856 distance_metric_descriptor* dmit_end) 00857 { 00858 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager(); 00859 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager(); 00860 00861 bool is_all_and = true; // flag is distance operation is just COUNT_AND 00862 //bm::word_t* temp_blk = 00863 distance_stage(dmit, dmit_end, &is_all_and); 00864 00865 bm::word_t*** blk_root = bman1.get_rootblock(); 00866 unsigned block_idx = 0; 00867 unsigned i, j; 00868 00869 const bm::word_t* blk; 00870 const bm::word_t* arg_blk; 00871 bool blk_gap; 00872 bool arg_gap; 00873 00874 BM_SET_MMX_GUARD 00875 00876 unsigned effective_top_block_size = bman1.effective_top_block_size(); 00877 unsigned ebs2 = bman2.effective_top_block_size(); 00878 if (ebs2 > effective_top_block_size) 00879 effective_top_block_size = ebs2; 00880 00881 for (i = 0; i < effective_top_block_size; ++i) 00882 { 00883 bm::word_t** blk_blk = blk_root[i]; 00884 00885 if (blk_blk == 0) // not allocated 00886 { 00887 // AND operation requested - we can skip this portion here 00888 if (is_all_and) 00889 { 00890 block_idx += bm::set_array_size; 00891 continue; 00892 } 00893 00894 const bm::word_t* const* bvbb = bman2.get_topblock(i); 00895 if (bvbb == 0) 00896 { 00897 block_idx += bm::set_array_size; 00898 continue; 00899 } 00900 00901 blk = 0; 00902 blk_gap = false; 00903 00904 for (j = 0; j < bm::set_array_size; ++j,++block_idx) 00905 { 00906 arg_blk = bman2.get_block(i, j); 00907 arg_gap = BM_IS_GAP(arg_blk); 00908 00909 if (!arg_blk) 00910 continue; 00911 combine_any_operation_with_block(blk, blk_gap, 00912 arg_blk, arg_gap, 00913 dmit, dmit_end); 00914 00915 // check if all distance requests alredy resolved 00916 bool all_resolved = false; 00917 distance_metric_descriptor* it=dmit; 00918 do 00919 { 00920 if (!it->result) 00921 { 00922 all_resolved = false; 00923 break; 00924 } 00925 ++it; 00926 } while (it < dmit_end); 00927 if (all_resolved) 00928 return; 00929 } // for j 00930 00931 continue; 00932 } 00933 00934 for (j = 0; j < bm::set_array_size; ++j, ++block_idx) 00935 { 00936 blk = blk_blk[j]; 00937 if (blk == 0 && is_all_and) 00938 continue; 00939 00940 arg_blk = bman2.get_block(i, j); 00941 00942 if (blk == 0 && arg_blk == 0) 00943 continue; 00944 00945 arg_gap = BM_IS_GAP(arg_blk); 00946 blk_gap = BM_IS_GAP(blk); 00947 00948 combine_any_operation_with_block(blk, blk_gap, 00949 arg_blk, arg_gap, 00950 dmit, dmit_end); 00951 00952 // check if all distance requests alredy resolved 00953 bool all_resolved = false; 00954 distance_metric_descriptor* it=dmit; 00955 do 00956 { 00957 if (!it->result) 00958 { 00959 all_resolved = false; 00960 break; 00961 } 00962 ++it; 00963 } while (it < dmit_end); 00964 if (all_resolved) 00965 return; 00966 00967 } // for j 00968 00969 } // for i 00970 } 00971 00972 00973 00981 template<class BV> 00982 bm::id_t count_and(const BV& bv1, const BV& bv2) 00983 { 00984 return distance_and_operation(bv1, bv2); 00985 } 00986 00994 template<class BV> 00995 bm::id_t any_and(const BV& bv1, const BV& bv2) 00996 { 00997 distance_metric_descriptor dmd(bm::COUNT_AND); 00998 00999 distance_operation_any(bv1, bv2, &dmd, &dmd+1); 01000 return dmd.result; 01001 } 01002 01003 01004 01012 template<class BV> 01013 bm::id_t count_xor(const BV& bv1, const BV& bv2) 01014 { 01015 distance_metric_descriptor dmd(bm::COUNT_XOR); 01016 01017 distance_operation(bv1, bv2, &dmd, &dmd+1); 01018 return dmd.result; 01019 } 01020 01028 template<class BV> 01029 bm::id_t any_xor(const BV& bv1, const BV& bv2) 01030 { 01031 distance_metric_descriptor dmd(bm::COUNT_XOR); 01032 01033 distance_operation_any(bv1, bv2, &dmd, &dmd+1); 01034 return dmd.result; 01035 } 01036 01037 01038 01046 template<class BV> 01047 bm::id_t count_sub(const BV& bv1, const BV& bv2) 01048 { 01049 distance_metric_descriptor dmd(bm::COUNT_SUB_AB); 01050 01051 distance_operation(bv1, bv2, &dmd, &dmd+1); 01052 return dmd.result; 01053 } 01054 01055 01063 template<class BV> 01064 bm::id_t any_sub(const BV& bv1, const BV& bv2) 01065 { 01066 distance_metric_descriptor dmd(bm::COUNT_SUB_AB); 01067 01068 distance_operation_any(bv1, bv2, &dmd, &dmd+1); 01069 return dmd.result; 01070 } 01071 01072 01080 template<class BV> 01081 bm::id_t count_or(const BV& bv1, const BV& bv2) 01082 { 01083 distance_metric_descriptor dmd(bm::COUNT_OR); 01084 01085 distance_operation(bv1, bv2, &dmd, &dmd+1); 01086 return dmd.result; 01087 } 01088 01096 template<class BV> 01097 bm::id_t any_or(const BV& bv1, const BV& bv2) 01098 { 01099 distance_metric_descriptor dmd(bm::COUNT_OR); 01100 01101 distance_operation_any(bv1, bv2, &dmd, &dmd+1); 01102 return dmd.result; 01103 } 01104 01105 01110 template<class It> 01111 It block_range_scan(It first, It last, unsigned nblock, unsigned* max_id) 01112 { 01113 It right; 01114 for (right = first; right != last; ++right) 01115 { 01116 unsigned v = unsigned(*right); 01117 BM_ASSERT(v < bm::id_max); 01118 if (v >= *max_id) 01119 *max_id = v; 01120 unsigned nb = v >> bm::set_block_shift; 01121 if (nb != nblock) 01122 break; 01123 } 01124 return right; 01125 } 01126 01140 template<class BV, class It> 01141 void combine_or(BV& bv, It first, It last) 01142 { 01143 typename BV::blocks_manager_type& bman = bv.get_blocks_manager(); 01144 unsigned max_id = 0; 01145 01146 while (first < last) 01147 { 01148 unsigned nblock = unsigned((*first) >> bm::set_block_shift); 01149 It right = block_range_scan(first, last, nblock, &max_id); 01150 01151 if (max_id >= bv.size()) 01152 { 01153 BM_ASSERT(max_id < bm::id_max); 01154 bv.resize(max_id + 1); 01155 } 01156 01157 // now we have one in-block array of bits to set 01158 01159 label1: 01160 01161 int block_type; 01162 bm::word_t* blk = 01163 bman.check_allocate_block(nblock, 01164 true, 01165 bv.get_new_blocks_strat(), 01166 &block_type); 01167 if (!blk) 01168 continue; 01169 01170 if (block_type == 1) // gap 01171 { 01172 bm::gap_word_t* gap_blk = BMGAP_PTR(blk); 01173 unsigned threshold = bm::gap_limit(gap_blk, bman.glen()); 01174 01175 for (; first < right; ++first) 01176 { 01177 unsigned is_set; 01178 unsigned nbit = (*first) & bm::set_block_mask; 01179 01180 unsigned new_block_len = 01181 gap_set_value(true, gap_blk, nbit, &is_set); 01182 if (new_block_len > threshold) 01183 { 01184 bman.extend_gap_block(nblock, gap_blk); 01185 ++first; 01186 goto label1; // block pointer changed - goto reset 01187 } 01188 } 01189 } 01190 else // bit block 01191 { 01192 for (; first < right; ++first) 01193 { 01194 unsigned nbit = unsigned(*first & bm::set_block_mask); 01195 unsigned nword = unsigned(nbit >> bm::set_word_shift); 01196 nbit &= bm::set_word_mask; 01197 blk[nword] |= (bm::word_t)1 << nbit; 01198 } // for 01199 } 01200 } // while 01201 01202 bv.forget_count(); 01203 } 01204 01205 01219 template<class BV, class It> 01220 void combine_xor(BV& bv, It first, It last) 01221 { 01222 typename BV::blocks_manager_type& bman = bv.get_blocks_manager(); 01223 unsigned max_id = 0; 01224 01225 while (first < last) 01226 { 01227 unsigned nblock = unsigned((*first) >> bm::set_block_shift); 01228 It right = block_range_scan(first, last, nblock, &max_id); 01229 01230 if (max_id >= bv.size()) 01231 { 01232 BM_ASSERT(max_id < bm::id_max); 01233 bv.resize(max_id + 1); 01234 } 01235 01236 // now we have one in-block array of bits to set 01237 01238 label1: 01239 01240 int block_type; 01241 bm::word_t* blk = 01242 bman.check_allocate_block(nblock, 01243 true, 01244 bv.get_new_blocks_strat(), 01245 &block_type, 01246 false /* no null return */); 01247 BM_ASSERT(blk); 01248 01249 if (block_type == 1) // gap 01250 { 01251 bm::gap_word_t* gap_blk = BMGAP_PTR(blk); 01252 unsigned threshold = bm::gap_limit(gap_blk, bman.glen()); 01253 01254 for (; first < right; ++first) 01255 { 01256 unsigned is_set; 01257 unsigned nbit = (*first) & bm::set_block_mask; 01258 01259 is_set = gap_test(gap_blk, nbit); 01260 BM_ASSERT(is_set <= 1); 01261 is_set ^= 1; 01262 01263 unsigned new_block_len = 01264 gap_set_value(is_set, gap_blk, nbit, &is_set); 01265 if (new_block_len > threshold) 01266 { 01267 bman.extend_gap_block(nblock, gap_blk); 01268 ++first; 01269 goto label1; // block pointer changed - goto reset 01270 } 01271 } 01272 } 01273 else // bit block 01274 { 01275 for (; first < right; ++first) 01276 { 01277 unsigned nbit = unsigned(*first & bm::set_block_mask); 01278 unsigned nword = unsigned(nbit >> bm::set_word_shift); 01279 nbit &= bm::set_word_mask; 01280 blk[nword] ^= (bm::word_t)1 << nbit; 01281 } // for 01282 } 01283 } // while 01284 01285 bv.forget_count(); 01286 } 01287 01288 01289 01303 template<class BV, class It> 01304 void combine_sub(BV& bv, It first, It last) 01305 { 01306 typename BV::blocks_manager_type& bman = bv.get_blocks_manager(); 01307 unsigned max_id = 0; 01308 01309 while (first < last) 01310 { 01311 unsigned nblock = unsigned((*first) >> bm::set_block_shift); 01312 It right = block_range_scan(first, last, nblock, &max_id); 01313 01314 if (max_id >= bv.size()) 01315 { 01316 BM_ASSERT(max_id < bm::id_max); 01317 bv.resize(max_id + 1); 01318 } 01319 01320 // now we have one in-block array of bits to set 01321 01322 label1: 01323 01324 int block_type; 01325 bm::word_t* blk = 01326 bman.check_allocate_block(nblock, 01327 false, 01328 bv.get_new_blocks_strat(), 01329 &block_type); 01330 01331 if (!blk) 01332 continue; 01333 01334 if (block_type == 1) // gap 01335 { 01336 bm::gap_word_t* gap_blk = BMGAP_PTR(blk); 01337 unsigned threshold = bm::gap_limit(gap_blk, bman.glen()); 01338 01339 for (; first < right; ++first) 01340 { 01341 unsigned is_set; 01342 unsigned nbit = (*first) & bm::set_block_mask; 01343 01344 is_set = gap_test(gap_blk, nbit); 01345 if (!is_set) 01346 continue; 01347 01348 unsigned new_block_len = 01349 gap_set_value(false, gap_blk, nbit, &is_set); 01350 if (new_block_len > threshold) 01351 { 01352 bman.extend_gap_block(nblock, gap_blk); 01353 ++first; 01354 goto label1; // block pointer changed - goto reset 01355 } 01356 } 01357 } 01358 else // bit block 01359 { 01360 for (; first < right; ++first) 01361 { 01362 unsigned nbit = unsigned(*first & bm::set_block_mask); 01363 unsigned nword = unsigned(nbit >> bm::set_word_shift); 01364 nbit &= bm::set_word_mask; 01365 blk[nword] &= ~((bm::word_t)1 << nbit); 01366 } // for 01367 } 01368 } // while 01369 01370 bv.forget_count(); 01371 } 01372 01385 template<class BV, class It> 01386 void combine_and_sorted(BV& bv, It first, It last) 01387 { 01388 bm::id_t prev = 0; 01389 for ( ;first < last; ++first) 01390 { 01391 bm::id_t id = *first; 01392 BM_ASSERT(id >= prev); // make sure it's sorted 01393 bv.set_bit_and(id, true); 01394 if (++prev < id) 01395 { 01396 bv.set_range(prev, id-1, false); 01397 } 01398 prev = id; 01399 } 01400 } 01401 01402 01417 template<class BV, class It> 01418 void combine_and(BV& bv, It first, It last) 01419 { 01420 BV bv_tmp; 01421 combine_or(bv_tmp, first, last); 01422 bv &= bv_tmp; 01423 } 01424 01440 template<class BV> 01441 bm::id_t count_intervals(const BV& bv) 01442 { 01443 const typename BV::blocks_manager_type& bman = bv.get_blocks_manager(); 01444 01445 bm::word_t*** blk_root = bman.get_rootblock(); 01446 typename BV::blocks_manager_type::block_count_change_func func(bman); 01447 for_each_block(blk_root, bman.top_block_size(), func); 01448 01449 return func.count(); 01450 } 01451 01466 template<class BV, class It> 01467 void export_array(BV& bv, It first, It last) 01468 { 01469 typename BV::blocks_manager_type& bman = bv.get_blocks_manager(); 01470 unsigned inp_word_size = sizeof(*first); 01471 size_t array_size = last - first; 01472 size_t bit_cnt = array_size * inp_word_size * 8; 01473 int block_type; 01474 bm::word_t tmp; 01475 unsigned b1, b2, b3, b4; 01476 01477 if (bit_cnt >= bv.size()) 01478 bv.resize((bm::id_t)bit_cnt + 1); 01479 else 01480 bv.set_range((bm::id_t)bit_cnt, bv.size() - 1, false); 01481 01482 switch (inp_word_size) 01483 { 01484 case 1: 01485 { 01486 size_t word_cnt = array_size / 4; 01487 for (unsigned i = 0; i < bm::set_total_blocks; ++i) 01488 { 01489 bm::word_t* blk = 01490 bman.check_allocate_block(i, 01491 false, 01492 BM_BIT, 01493 &block_type, 01494 false /*no NULL ret*/); 01495 if (block_type == 1) // gap 01496 { 01497 blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk)); 01498 } 01499 01500 bm::word_t* wrd_ptr = blk; 01501 if (word_cnt >= bm::set_block_size) { 01502 bm::word_t* wrd_end = blk + bm::set_block_size; 01503 do { 01504 b1 = *first++; b2 = *first++; 01505 b3 = *first++; b4 = *first++; 01506 tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24); 01507 *wrd_ptr++ = tmp; 01508 } while (wrd_ptr < wrd_end); 01509 word_cnt -= bm::set_block_size; 01510 } 01511 else 01512 { 01513 size_t to_convert = last - first; 01514 for (size_t j = 0; j < to_convert / 4; ++j) 01515 { 01516 b1 = *first++; b2 = *first++; 01517 b3 = *first++; b4 = *first++; 01518 tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24); 01519 *wrd_ptr++ = tmp; 01520 } 01521 while (1) 01522 { 01523 if (first == last) break; 01524 *wrd_ptr = *first++; 01525 if (first == last) break; 01526 *wrd_ptr |= (*first++) << 8; 01527 if (first == last) break; 01528 *wrd_ptr |= (*first++) << 16; 01529 if (first == last) break; 01530 *wrd_ptr |= (*first++) << 24; 01531 ++wrd_ptr; 01532 } 01533 } 01534 if (first == last) break; 01535 } // for 01536 } 01537 break; 01538 case 2: 01539 { 01540 size_t word_cnt = array_size / 2; 01541 for (unsigned i = 0; i < bm::set_total_blocks; ++i) 01542 { 01543 bm::word_t* blk = 01544 bman.check_allocate_block(i, 01545 false, 01546 BM_BIT, 01547 &block_type, 01548 false /*no NULL ret*/); 01549 if (block_type == 1) // gap 01550 { 01551 blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk)); 01552 } 01553 01554 bm::word_t* wrd_ptr = blk; 01555 if (word_cnt >= bm::set_block_size) { 01556 bm::word_t* wrd_end = blk + bm::set_block_size; 01557 do { 01558 b1 = *first++; b2 = *first++; 01559 tmp = b1 | (b2 << 16); 01560 *wrd_ptr++ = tmp; 01561 } while (wrd_ptr < wrd_end); 01562 word_cnt -= bm::set_block_size; 01563 } 01564 else 01565 { 01566 size_t to_convert = last - first; 01567 for (unsigned j = 0; j < to_convert / 2; ++j) 01568 { 01569 b1 = *first++; b2 = *first++; 01570 tmp = b1 | (b2 << 16); 01571 *wrd_ptr++ = tmp; 01572 } 01573 while (1) 01574 { 01575 if (first == last) break; 01576 *wrd_ptr = *first++; 01577 if (first == last) break; 01578 *wrd_ptr |= (*first++) << 16; 01579 ++wrd_ptr; 01580 } 01581 } 01582 if (first == last) break; 01583 } // for 01584 } 01585 break; 01586 case 4: 01587 { 01588 size_t word_cnt = array_size; 01589 for (unsigned i = 0; i < bm::set_total_blocks; ++i) 01590 { 01591 bm::word_t* blk = 01592 bman.check_allocate_block(i, 01593 false, 01594 BM_BIT, 01595 &block_type, 01596 false /*no NULL ret*/); 01597 if (block_type == 1) // gap 01598 { 01599 blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk)); 01600 } 01601 01602 bm::word_t* wrd_ptr = blk; 01603 if (word_cnt >= bm::set_block_size) { 01604 bm::word_t* wrd_end = blk + bm::set_block_size; 01605 do { 01606 *wrd_ptr++ = *first++; 01607 } while (wrd_ptr < wrd_end); 01608 word_cnt -= bm::set_block_size; 01609 } 01610 else 01611 { 01612 while (1) 01613 { 01614 if (first == last) break; 01615 *wrd_ptr = *first++; 01616 ++wrd_ptr; 01617 } 01618 } 01619 if (first == last) break; 01620 } // for 01621 } 01622 break; 01623 default: 01624 BM_ASSERT(0); 01625 } // switch 01626 01627 } 01628 01629 } // namespace bm 01630 01631 #ifdef _MSC_VER 01632 #pragma warning( pop ) 01633 #endif 01634 01635 #endif