Files
noggit-red/src/external/rapidfuzz-cpp/rapidfuzz/fuzz.hpp
2025-01-02 17:42:59 +00:00

790 lines
27 KiB
C++

/* SPDX-License-Identifier: MIT */
/* Copyright © 2021 Max Bachmann */
/* Copyright © 2011 Adam Cohen */
#pragma once
#include <rapidfuzz/details/CharSet.hpp>
#include <rapidfuzz/details/PatternMatchVector.hpp>
#include <rapidfuzz/details/common.hpp>
#include <rapidfuzz/distance/Indel.hpp>
namespace rapidfuzz::fuzz {
/**
* @defgroup Fuzz Fuzz
* A collection of string matching algorithms from FuzzyWuzzy
* @{
*/
/**
* @brief calculates a simple ratio between two strings
*
* @details
* @code{.cpp}
* // score is 96.55
* double score = ratio("this is a test", "this is a test!")
* @endcode
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff = 0);
#ifdef RAPIDFUZZ_SIMD
namespace experimental {
template <int MaxLen>
struct MultiRatio {
public:
MultiRatio(size_t count) : input_count(count), scorer(count)
{}
size_t result_count() const
{
return scorer.result_count();
}
template <typename Sentence1>
void insert(const Sentence1& s1_)
{
insert(detail::to_begin(s1_), detail::to_end(s1_));
}
template <typename InputIt1>
void insert(InputIt1 first1, InputIt1 last1)
{
scorer.insert(first1, last1);
}
template <typename InputIt2>
void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0) const
{
similarity(scores, score_count, detail::Range(first2, last2), score_cutoff);
}
template <typename Sentence2>
void similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0) const
{
scorer.normalized_similarity(scores, score_count, s2, score_cutoff / 100.0);
for (size_t i = 0; i < input_count; ++i)
scores[i] *= 100.0;
}
private:
size_t input_count;
rapidfuzz::experimental::MultiIndel<MaxLen> scorer;
};
} /* namespace experimental */
#endif
// TODO documentation
template <typename CharT1>
struct CachedRatio {
template <typename InputIt1>
CachedRatio(InputIt1 first1, InputIt1 last1) : cached_indel(first1, last1)
{}
template <typename Sentence1>
CachedRatio(const Sentence1& s1) : cached_indel(s1)
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
// private:
CachedIndel<CharT1> cached_indel;
};
template <typename Sentence1>
CachedRatio(const Sentence1& s1) -> CachedRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedRatio(InputIt1 first1, InputIt1 last1) -> CachedRatio<iter_value_t<InputIt1>>;
template <typename InputIt1, typename InputIt2>
ScoreAlignment<double> partial_ratio_alignment(InputIt1 first1, InputIt1 last1, InputIt2 first2,
InputIt2 last2, double score_cutoff = 0);
template <typename Sentence1, typename Sentence2>
ScoreAlignment<double> partial_ratio_alignment(const Sentence1& s1, const Sentence2& s2,
double score_cutoff = 0);
/**
* @brief calculates the fuzz::ratio of the optimal string alignment
*
* @details
* test @cite hyrro_2004 @cite wagner_fischer_1974
* @code{.cpp}
* // score is 100
* double score = partial_ratio("this is a test", "this is a test!")
* @endcode
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double partial_ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double partial_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
// todo add real implementation
template <typename CharT1>
struct CachedPartialRatio {
template <typename>
friend struct CachedWRatio;
template <typename InputIt1>
CachedPartialRatio(InputIt1 first1, InputIt1 last1);
template <typename Sentence1>
explicit CachedPartialRatio(const Sentence1& s1_)
: CachedPartialRatio(detail::to_begin(s1_), detail::to_end(s1_))
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
private:
std::vector<CharT1> s1;
rapidfuzz::detail::CharSet<CharT1> s1_char_set;
CachedRatio<CharT1> cached_ratio;
};
template <typename Sentence1>
explicit CachedPartialRatio(const Sentence1& s1) -> CachedPartialRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedPartialRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialRatio<iter_value_t<InputIt1>>;
/**
* @brief Sorts the words in the strings and calculates the fuzz::ratio between
* them
*
* @details
* @code{.cpp}
* // score is 100
* double score = token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a
* bear")
* @endcode
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double token_sort_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
#ifdef RAPIDFUZZ_SIMD
namespace experimental {
template <int MaxLen>
struct MultiTokenSortRatio {
public:
MultiTokenSortRatio(size_t count) : scorer(count)
{}
size_t result_count() const
{
return scorer.result_count();
}
template <typename Sentence1>
void insert(const Sentence1& s1_)
{
insert(detail::to_begin(s1_), detail::to_end(s1_));
}
template <typename InputIt1>
void insert(InputIt1 first1, InputIt1 last1)
{
scorer.insert(detail::sorted_split(first1, last1).join());
}
template <typename InputIt2>
void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0) const
{
scorer.similarity(scores, score_count, detail::sorted_split(first2, last2).join(), score_cutoff);
}
template <typename Sentence2>
void similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0) const
{
similarity(scores, score_count, detail::to_begin(s2), detail::to_end(s2), score_cutoff);
}
private:
MultiRatio<MaxLen> scorer;
};
} /* namespace experimental */
#endif
// todo CachedRatio speed for equal strings vs original implementation
// TODO documentation
template <typename CharT1>
struct CachedTokenSortRatio {
template <typename InputIt1>
CachedTokenSortRatio(InputIt1 first1, InputIt1 last1)
: s1_sorted(detail::sorted_split(first1, last1).join()), cached_ratio(s1_sorted)
{}
template <typename Sentence1>
explicit CachedTokenSortRatio(const Sentence1& s1)
: CachedTokenSortRatio(detail::to_begin(s1), detail::to_end(s1))
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
private:
std::vector<CharT1> s1_sorted;
CachedRatio<CharT1> cached_ratio;
};
template <typename Sentence1>
explicit CachedTokenSortRatio(const Sentence1& s1) -> CachedTokenSortRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSortRatio<iter_value_t<InputIt1>>;
/**
* @brief Sorts the words in the strings and calculates the fuzz::partial_ratio
* between them
*
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double partial_token_sort_ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double partial_token_sort_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
// TODO documentation
template <typename CharT1>
struct CachedPartialTokenSortRatio {
template <typename InputIt1>
CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1)
: s1_sorted(detail::sorted_split(first1, last1).join()), cached_partial_ratio(s1_sorted)
{}
template <typename Sentence1>
explicit CachedPartialTokenSortRatio(const Sentence1& s1)
: CachedPartialTokenSortRatio(detail::to_begin(s1), detail::to_end(s1))
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
private:
std::vector<CharT1> s1_sorted;
CachedPartialRatio<CharT1> cached_partial_ratio;
};
template <typename Sentence1>
explicit CachedPartialTokenSortRatio(const Sentence1& s1)
-> CachedPartialTokenSortRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedPartialTokenSortRatio(InputIt1 first1,
InputIt1 last1) -> CachedPartialTokenSortRatio<iter_value_t<InputIt1>>;
/**
* @brief Compares the words in the strings based on unique and common words
* between them using fuzz::ratio
*
* @details
* @code{.cpp}
* // score1 is 83.87
* double score1 = token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a
* bear")
* // score2 is 100
* double score2 = token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")
* @endcode
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double token_set_ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double token_set_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
// TODO documentation
template <typename CharT1>
struct CachedTokenSetRatio {
template <typename InputIt1>
CachedTokenSetRatio(InputIt1 first1, InputIt1 last1)
: s1(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1)))
{}
template <typename Sentence1>
explicit CachedTokenSetRatio(const Sentence1& s1_)
: CachedTokenSetRatio(detail::to_begin(s1_), detail::to_end(s1_))
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
private:
std::vector<CharT1> s1;
detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> tokens_s1;
};
template <typename Sentence1>
explicit CachedTokenSetRatio(const Sentence1& s1) -> CachedTokenSetRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSetRatio<iter_value_t<InputIt1>>;
/**
* @brief Compares the words in the strings based on unique and common words
* between them using fuzz::partial_ratio
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double partial_token_set_ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double partial_token_set_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
// TODO documentation
template <typename CharT1>
struct CachedPartialTokenSetRatio {
template <typename InputIt1>
CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1)
: s1(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1)))
{}
template <typename Sentence1>
explicit CachedPartialTokenSetRatio(const Sentence1& s1_)
: CachedPartialTokenSetRatio(detail::to_begin(s1_), detail::to_end(s1_))
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
private:
std::vector<CharT1> s1;
detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> tokens_s1;
};
template <typename Sentence1>
explicit CachedPartialTokenSetRatio(const Sentence1& s1) -> CachedPartialTokenSetRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedPartialTokenSetRatio(InputIt1 first1,
InputIt1 last1) -> CachedPartialTokenSetRatio<iter_value_t<InputIt1>>;
/**
* @brief Helper method that returns the maximum of fuzz::token_set_ratio and
* fuzz::token_sort_ratio (faster than manually executing the two functions)
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double token_ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double token_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff = 0);
// todo add real implementation
template <typename CharT1>
struct CachedTokenRatio {
template <typename InputIt1>
CachedTokenRatio(InputIt1 first1, InputIt1 last1)
: s1(first1, last1),
s1_tokens(detail::sorted_split(std::begin(s1), std::end(s1))),
s1_sorted(s1_tokens.join()),
cached_ratio_s1_sorted(s1_sorted)
{}
template <typename Sentence1>
explicit CachedTokenRatio(const Sentence1& s1_)
: CachedTokenRatio(detail::to_begin(s1_), detail::to_end(s1_))
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
private:
std::vector<CharT1> s1;
detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> s1_tokens;
std::vector<CharT1> s1_sorted;
CachedRatio<CharT1> cached_ratio_s1_sorted;
};
template <typename Sentence1>
explicit CachedTokenRatio(const Sentence1& s1) -> CachedTokenRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenRatio<iter_value_t<InputIt1>>;
/**
* @brief Helper method that returns the maximum of
* fuzz::partial_token_set_ratio and fuzz::partial_token_sort_ratio (faster than
* manually executing the two functions)
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double partial_token_ratio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double partial_token_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
// todo add real implementation
template <typename CharT1>
struct CachedPartialTokenRatio {
template <typename InputIt1>
CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1)
: s1(first1, last1),
tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1))),
s1_sorted(tokens_s1.join())
{}
template <typename Sentence1>
explicit CachedPartialTokenRatio(const Sentence1& s1_)
: CachedPartialTokenRatio(detail::to_begin(s1_), detail::to_end(s1_))
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
private:
std::vector<CharT1> s1;
detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> tokens_s1;
std::vector<CharT1> s1_sorted;
};
template <typename Sentence1>
explicit CachedPartialTokenRatio(const Sentence1& s1) -> CachedPartialTokenRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenRatio<iter_value_t<InputIt1>>;
/**
* @brief Calculates a weighted ratio based on the other ratio algorithms
*
* @details
* @todo add a detailed description
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double WRatio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double WRatio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff = 0);
// todo add real implementation
template <typename CharT1>
struct CachedWRatio {
template <typename InputIt1>
explicit CachedWRatio(InputIt1 first1, InputIt1 last1);
template <typename Sentence1>
CachedWRatio(const Sentence1& s1_) : CachedWRatio(detail::to_begin(s1_), detail::to_end(s1_))
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
private:
// todo somehow implement this using other ratios with creating PatternMatchVector
// multiple times
std::vector<CharT1> s1;
CachedPartialRatio<CharT1> cached_partial_ratio;
detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> tokens_s1;
std::vector<CharT1> s1_sorted;
rapidfuzz::detail::BlockPatternMatchVector blockmap_s1_sorted;
};
template <typename Sentence1>
explicit CachedWRatio(const Sentence1& s1) -> CachedWRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedWRatio(InputIt1 first1, InputIt1 last1) -> CachedWRatio<iter_value_t<InputIt1>>;
/**
* @brief Calculates a quick ratio between two strings using fuzz.ratio
*
* @details
* @todo add a detailed description
*
* @tparam Sentence1 This is a string that can be converted to
* basic_string_view<char_type>
* @tparam Sentence2 This is a string that can be converted to
* basic_string_view<char_type>
*
* @param s1 string to compare with s2 (for type info check Template parameters
* above)
* @param s2 string to compare with s1 (for type info check Template parameters
* above)
* @param score_cutoff Optional argument for a score threshold between 0% and
* 100%. Matches with a lower score than this number will not be returned.
* Defaults to 0.
*
* @return returns the ratio between s1 and s2 or 0 when ratio < score_cutoff
*/
template <typename Sentence1, typename Sentence2>
double QRatio(const Sentence1& s1, const Sentence2& s2, double score_cutoff = 0);
template <typename InputIt1, typename InputIt2>
double QRatio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2, double score_cutoff = 0);
#ifdef RAPIDFUZZ_SIMD
namespace experimental {
template <int MaxLen>
struct MultiQRatio {
public:
MultiQRatio(size_t count) : scorer(count)
{}
size_t result_count() const
{
return scorer.result_count();
}
template <typename Sentence1>
void insert(const Sentence1& s1_)
{
insert(detail::to_begin(s1_), detail::to_end(s1_));
}
template <typename InputIt1>
void insert(InputIt1 first1, InputIt1 last1)
{
scorer.insert(first1, last1);
str_lens.push_back(static_cast<size_t>(std::distance(first1, last1)));
}
template <typename InputIt2>
void similarity(double* scores, size_t score_count, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0) const
{
similarity(scores, score_count, detail::Range(first2, last2), score_cutoff);
}
template <typename Sentence2>
void similarity(double* scores, size_t score_count, const Sentence2& s2, double score_cutoff = 0) const
{
rapidfuzz::detail::Range s2_(s2);
if (s2_.empty()) {
for (size_t i = 0; i < str_lens.size(); ++i)
scores[i] = 0;
return;
}
scorer.similarity(scores, score_count, s2, score_cutoff);
for (size_t i = 0; i < str_lens.size(); ++i)
if (str_lens[i] == 0) scores[i] = 0;
}
private:
std::vector<size_t> str_lens;
MultiRatio<MaxLen> scorer;
};
} /* namespace experimental */
#endif
template <typename CharT1>
struct CachedQRatio {
template <typename InputIt1>
CachedQRatio(InputIt1 first1, InputIt1 last1) : s1(first1, last1), cached_ratio(first1, last1)
{}
template <typename Sentence1>
explicit CachedQRatio(const Sentence1& s1_) : CachedQRatio(detail::to_begin(s1_), detail::to_end(s1_))
{}
template <typename InputIt2>
double similarity(InputIt2 first2, InputIt2 last2, double score_cutoff = 0.0,
double score_hint = 0.0) const;
template <typename Sentence2>
double similarity(const Sentence2& s2, double score_cutoff = 0.0, double score_hint = 0.0) const;
private:
std::vector<CharT1> s1;
CachedRatio<CharT1> cached_ratio;
};
template <typename Sentence1>
explicit CachedQRatio(const Sentence1& s1) -> CachedQRatio<char_type<Sentence1>>;
template <typename InputIt1>
CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio<iter_value_t<InputIt1>>;
/**@}*/
} // namespace rapidfuzz::fuzz
#include <rapidfuzz/fuzz_impl.hpp>