倒排列表求交集算法 包括baeza yates的交集算法

Posted 将者,智、信、仁、勇、严也。

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了倒排列表求交集算法 包括baeza yates的交集算法相关的知识,希望对你有一定的参考价值。

#ifndef __INTERSECT_HPP__
#define __INTERSECT_HPP__

#include "probe.hpp"

namespace themas {

/*
 * like stl‘s set_intersect
 */
template<class InputIterator, class OutputIterator>
void linear_intersect(InputIterator begin1, InputIterator end1,
                      InputIterator begin2, InputIterator end2,
                      OutputIterator out)
{
  if ( (end2 - begin2) > (end1 - begin1) )
  {
    // why in the world would i do this?
    // hmmmmmmm.......... !
    std::swap(begin1, begin2);
    std::swap(end1, end2);
  }
  while (begin1 != end1 && begin2 != end2)
  {
    if (*begin1 < *begin2)
      ++begin1;
    else if (*begin2 < *begin1)
      ++begin2;
    else
    {
      *out++ = *begin1;
      ++begin1;
      ++begin2;
    }
  }
}

/*
 * this time with a comparator!
 */
template<class InputIterator, class OutputIterator, class Comparator >
void linear_intersect(InputIterator begin1, InputIterator end1,
                      InputIterator begin2, InputIterator end2,
                      OutputIterator out, Comparator cmp)
{
  if ( (end2 - begin2) > (end1 - begin1) )
  {
    // why in the world would i do this?
    // hmmmmmmm.......... !
    std::swap(begin1, begin2);
    std::swap(end1, end2);
  }
  while (begin1 != end1 && begin2 != end2)
  {
    if (cmp( *begin1, *begin2 ) )
      ++begin1;
    else if ( cmp(*begin2, *begin1) )
      ++begin2;
    else
    {
      *out++ = *begin1;
      ++begin1;
      ++begin2;
    }
  }
}

/*
 * baeza_intersect
 */
template< template <class, class> class Probe,
  class RandomAccessIterator, class OutputIterator>
void baeza_intersect(RandomAccessIterator begin1, RandomAccessIterator end1,
                     RandomAccessIterator begin2, RandomAccessIterator end2,
                     OutputIterator out)
{
  RandomAccessIterator probe1, probe2;

  if ( (end1 - begin1) < ( end2 - begin2 ) )
  {
    if ( begin1 == end1 )
      return;
    probe1 = begin1 + ( ( end1 - begin1 ) >> 1 );
    probe2 = lower_bound< Probe >( begin2, end2, *probe1 );
    baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out); // intersect left
    if (! (probe2 == end2 || *probe1 < *probe2 ))
      *out++ = *probe2++;
    baeza_intersect< Probe >(++probe1, end1, probe2, end2, out); // intersect right
  }
  else
  {
    if ( begin2 == end2 )
      return;
    probe2 = begin2 + ( ( end2 - begin2 ) >> 1 );
    probe1 = lower_bound< Probe >( begin1, end1, *probe2 );
    baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out); // intersect left
    if (! (probe1 == end1 || *probe2 < *probe1 ))
      *out++ = *probe1++;
    baeza_intersect< Probe >(probe1, end1, ++probe2, end2, out); // intersect right
  }
}

/*
 * with a comparator
 */
template< template <class, class> class Probe,
  class RandomAccessIterator, class OutputIterator, class Comparator >
void baeza_intersect(RandomAccessIterator begin1, RandomAccessIterator end1,
                     RandomAccessIterator begin2, RandomAccessIterator end2,
                     OutputIterator out, Comparator cmp)
{
  RandomAccessIterator probe1, probe2;

  if ( (end1 - begin1) < ( end2 - begin2 ) )
  {
    if ( begin1 == end1 )
      return;
    probe1 = begin1 + ( ( end1 - begin1 ) >> 1 );
    probe2 = lower_bound< Probe >( begin2, end2, *probe1, cmp );
    baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out, cmp); // intersect left
    if (! (probe2 == end2 || cmp( *probe1, *probe2 ) ))
      *out++ = *probe2++;
    baeza_intersect< Probe >(++probe1, end1, probe2, end2, out, cmp); // intersect right
  }
  else
  {
    if ( begin2 == end2 )
      return;
    probe2 = begin2 + ( ( end2 - begin2 ) >> 1 );
    probe1 = lower_bound< Probe >( begin1, end1, *probe2, cmp );
    baeza_intersect< Probe >(begin1, probe1, begin2, probe2, out, cmp); // intersect left
    if (! (probe1 == end1 || cmp( *probe2, *probe1 ) ))
      *out++ = *probe1++;
    baeza_intersect< Probe >(probe1, end1, ++probe2, end2, out, cmp); // intersect right
  }
}

} // themas

#endif // __INTERSECT_HPP__

转自:https://github.com/erikfrey/themas/blob/master/src/set_intersection/intersect.hpp

 

#include <iostream>
#include <vector>
#include <set>
#include <ctime>

#include <boost/random/mersenne_twister.hpp>

#include "intersect.hpp"

using namespace themas;

int main(int argc, char * argv[])
{
  std::set<int> nums1, nums2;
  std::vector<int> result1, result2, result3;

  boost::mt19937 rng(time(NULL));

  for ( unsigned int i = rng() % 16384; i != 0; --i )
    nums1.insert(rng());
  for ( unsigned int i = rng() % 16384; i != 0; --i )
    nums2.insert(rng());
  for ( unsigned int i = rng() % 16384; i != 0; --i )
  {
    unsigned int j = rng();
    nums1.insert(j);
    nums2.insert(j);
  }
  std::vector<int> v1(nums1.begin(), nums1.end()), v2(nums2.begin(), nums2.end());

  linear_intersect(v1.begin(), v1.end(), v2.begin(), v2.end(), std::back_inserter(result1));
  baeza_intersect < binary_probe > (v1.begin(), v1.end(), v2.begin(), v2.end(), std::back_inserter(result2));
  baeza_intersect < interpolation_probe > (v1.begin(), v1.end(), v2.begin(), v2.end(), std::back_inserter(result3));

  if (result1 != result2 || result1 != result3)
    std::cout << "FAIL!" << std::endl;
  else
    std::cout << "PASS!" << std::endl;
}

 

以上是关于倒排列表求交集算法 包括baeza yates的交集算法的主要内容,如果未能解决你的问题,请参考以下文章

倒排列表求交集算法汇总

C++求两个集合的交集

Elasticsearch调优篇 02 - 高效求交算法大比拼

Fisher–Yates-shuffle 洗牌算法

两个已排序List求交集

用C语言求线性表交集