6#include <rapidfuzz/details/CharSet.hpp>
7#include <rapidfuzz/details/PatternMatchVector.hpp>
8#include <rapidfuzz/details/common.hpp>
9#include <rapidfuzz/distance/Indel.hpp>
11namespace rapidfuzz::fuzz {
43template <
typename Sentence1,
typename Sentence2>
44double ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
46template <
typename InputIt1,
typename InputIt2>
47double ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
50namespace experimental {
54 MultiRatio(
size_t count) : input_count(count), scorer(count)
57 size_t result_count()
const
59 return scorer.result_count();
62 template <
typename Sentence1>
63 void insert(
const Sentence1& s1_)
65 insert(detail::to_begin(s1_), detail::to_end(s1_));
68 template <
typename InputIt1>
69 void insert(InputIt1 first1, InputIt1 last1)
71 scorer.insert(first1, last1);
74 template <
typename InputIt2>
75 void similarity(
double* scores,
size_t score_count, InputIt2 first2, InputIt2 last2,
76 double score_cutoff = 0.0)
const
78 similarity(scores, score_count, detail::Range(first2, last2), score_cutoff);
81 template <
typename Sentence2>
82 void similarity(
double* scores,
size_t score_count,
const Sentence2& s2,
double score_cutoff = 0)
const
84 scorer.normalized_similarity(scores, score_count, s2, score_cutoff / 100.0);
86 for (
size_t i = 0; i < input_count; ++i)
92 rapidfuzz::experimental::MultiIndel<MaxLen> scorer;
98template <
typename CharT1>
100 template <
typename InputIt1>
101 CachedRatio(InputIt1 first1, InputIt1 last1) : cached_indel(first1, last1)
104 template <
typename Sentence1>
105 CachedRatio(
const Sentence1& s1) : cached_indel(s1)
108 template <
typename InputIt2>
109 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
110 double score_hint = 0.0)
const;
112 template <
typename Sentence2>
113 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
116 CachedIndel<CharT1> cached_indel;
119template <
typename Sentence1>
120CachedRatio(
const Sentence1& s1) -> CachedRatio<char_type<Sentence1>>;
122template <
typename InputIt1>
123CachedRatio(InputIt1 first1, InputIt1 last1) -> CachedRatio<iter_value_t<InputIt1>>;
125template <
typename InputIt1,
typename InputIt2>
126ScoreAlignment<double> partial_ratio_alignment(InputIt1 first1, InputIt1 last1, InputIt2 first2,
127 InputIt2 last2,
double score_cutoff = 0);
129template <
typename Sentence1,
typename Sentence2>
130ScoreAlignment<double> partial_ratio_alignment(
const Sentence1& s1,
const Sentence2& s2,
131 double score_cutoff = 0);
158template <
typename Sentence1,
typename Sentence2>
159double partial_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
161template <
typename InputIt1,
typename InputIt2>
162double partial_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
163 double score_cutoff = 0);
166template <
typename CharT1>
167struct CachedPartialRatio {
169 friend struct CachedWRatio;
171 template <
typename InputIt1>
172 CachedPartialRatio(InputIt1 first1, InputIt1 last1);
174 template <
typename Sentence1>
175 explicit CachedPartialRatio(
const Sentence1& s1_)
176 : CachedPartialRatio(detail::to_begin(s1_), detail::to_end(s1_))
179 template <
typename InputIt2>
180 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
181 double score_hint = 0.0)
const;
183 template <
typename Sentence2>
184 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
187 std::vector<CharT1> s1;
188 rapidfuzz::detail::CharSet<CharT1> s1_char_set;
189 CachedRatio<CharT1> cached_ratio;
192template <
typename Sentence1>
193explicit CachedPartialRatio(
const Sentence1& s1) -> CachedPartialRatio<char_type<Sentence1>>;
195template <
typename InputIt1>
196CachedPartialRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialRatio<iter_value_t<InputIt1>>;
224template <
typename Sentence1,
typename Sentence2>
225double token_sort_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
227template <
typename InputIt1,
typename InputIt2>
228double token_sort_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
229 double score_cutoff = 0);
232namespace experimental {
234struct MultiTokenSortRatio {
236 MultiTokenSortRatio(
size_t count) : scorer(count)
239 size_t result_count()
const
241 return scorer.result_count();
244 template <
typename Sentence1>
245 void insert(
const Sentence1& s1_)
247 insert(detail::to_begin(s1_), detail::to_end(s1_));
250 template <
typename InputIt1>
251 void insert(InputIt1 first1, InputIt1 last1)
253 scorer.insert(detail::sorted_split(first1, last1).join());
256 template <
typename InputIt2>
257 void similarity(
double* scores,
size_t score_count, InputIt2 first2, InputIt2 last2,
258 double score_cutoff = 0.0)
const
260 scorer.similarity(scores, score_count, detail::sorted_split(first2, last2).join(), score_cutoff);
263 template <
typename Sentence2>
264 void similarity(
double* scores,
size_t score_count,
const Sentence2& s2,
double score_cutoff = 0)
const
266 similarity(scores, score_count, detail::to_begin(s2), detail::to_end(s2), score_cutoff);
270 MultiRatio<MaxLen> scorer;
277template <
typename CharT1>
278struct CachedTokenSortRatio {
279 template <
typename InputIt1>
280 CachedTokenSortRatio(InputIt1 first1, InputIt1 last1)
281 : s1_sorted(detail::sorted_split(first1, last1).join()), cached_ratio(s1_sorted)
284 template <
typename Sentence1>
285 explicit CachedTokenSortRatio(
const Sentence1& s1)
286 : CachedTokenSortRatio(detail::to_begin(s1), detail::to_end(s1))
289 template <
typename InputIt2>
290 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
291 double score_hint = 0.0)
const;
293 template <
typename Sentence2>
294 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
297 std::vector<CharT1> s1_sorted;
298 CachedRatio<CharT1> cached_ratio;
301template <
typename Sentence1>
302explicit CachedTokenSortRatio(
const Sentence1& s1) -> CachedTokenSortRatio<char_type<Sentence1>>;
304template <
typename InputIt1>
305CachedTokenSortRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSortRatio<iter_value_t<InputIt1>>;
327template <
typename Sentence1,
typename Sentence2>
330template <
typename InputIt1,
typename InputIt2>
332 double score_cutoff = 0);
335template <
typename CharT1>
336struct CachedPartialTokenSortRatio {
337 template <
typename InputIt1>
338 CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1)
339 : s1_sorted(detail::sorted_split(first1, last1).join()), cached_partial_ratio(s1_sorted)
342 template <
typename Sentence1>
343 explicit CachedPartialTokenSortRatio(
const Sentence1& s1)
344 : CachedPartialTokenSortRatio(detail::to_begin(s1), detail::to_end(s1))
347 template <
typename InputIt2>
348 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
349 double score_hint = 0.0)
const;
351 template <
typename Sentence2>
352 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
355 std::vector<CharT1> s1_sorted;
356 CachedPartialRatio<CharT1> cached_partial_ratio;
359template <
typename Sentence1>
360explicit CachedPartialTokenSortRatio(
const Sentence1& s1)
361 -> CachedPartialTokenSortRatio<char_type<Sentence1>>;
363template <
typename InputIt1>
364CachedPartialTokenSortRatio(InputIt1 first1, InputIt1 last1)
365 -> CachedPartialTokenSortRatio<iter_value_t<InputIt1>>;
395template <
typename Sentence1,
typename Sentence2>
396double token_set_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
398template <
typename InputIt1,
typename InputIt2>
399double token_set_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
400 double score_cutoff = 0);
403template <
typename CharT1>
404struct CachedTokenSetRatio {
405 template <
typename InputIt1>
406 CachedTokenSetRatio(InputIt1 first1, InputIt1 last1)
407 : s1(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1)))
410 template <
typename Sentence1>
411 explicit CachedTokenSetRatio(
const Sentence1& s1_)
412 : CachedTokenSetRatio(detail::to_begin(s1_), detail::to_end(s1_))
415 template <
typename InputIt2>
416 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
417 double score_hint = 0.0)
const;
419 template <
typename Sentence2>
420 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
423 std::vector<CharT1> s1;
424 detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> tokens_s1;
427template <
typename Sentence1>
428explicit CachedTokenSetRatio(
const Sentence1& s1) -> CachedTokenSetRatio<char_type<Sentence1>>;
430template <
typename InputIt1>
431CachedTokenSetRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenSetRatio<iter_value_t<InputIt1>>;
452template <
typename Sentence1,
typename Sentence2>
455template <
typename InputIt1,
typename InputIt2>
457 double score_cutoff = 0);
460template <
typename CharT1>
461struct CachedPartialTokenSetRatio {
462 template <
typename InputIt1>
463 CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1)
464 : s1(first1, last1), tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1)))
467 template <
typename Sentence1>
468 explicit CachedPartialTokenSetRatio(
const Sentence1& s1_)
469 : CachedPartialTokenSetRatio(detail::to_begin(s1_), detail::to_end(s1_))
472 template <
typename InputIt2>
473 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
474 double score_hint = 0.0)
const;
476 template <
typename Sentence2>
477 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
480 std::vector<CharT1> s1;
481 detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> tokens_s1;
484template <
typename Sentence1>
485explicit CachedPartialTokenSetRatio(
const Sentence1& s1) -> CachedPartialTokenSetRatio<char_type<Sentence1>>;
487template <
typename InputIt1>
488CachedPartialTokenSetRatio(InputIt1 first1, InputIt1 last1)
489 -> CachedPartialTokenSetRatio<iter_value_t<InputIt1>>;
510template <
typename Sentence1,
typename Sentence2>
511double token_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
513template <
typename InputIt1,
typename InputIt2>
514double token_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
517template <
typename CharT1>
518struct CachedTokenRatio {
519 template <
typename InputIt1>
520 CachedTokenRatio(InputIt1 first1, InputIt1 last1)
522 s1_tokens(detail::sorted_split(std::begin(s1), std::end(s1))),
523 s1_sorted(s1_tokens.join()),
524 cached_ratio_s1_sorted(s1_sorted)
527 template <
typename Sentence1>
528 explicit CachedTokenRatio(
const Sentence1& s1_)
529 : CachedTokenRatio(detail::to_begin(s1_), detail::to_end(s1_))
532 template <
typename InputIt2>
533 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
534 double score_hint = 0.0)
const;
536 template <
typename Sentence2>
537 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
540 std::vector<CharT1> s1;
541 detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> s1_tokens;
542 std::vector<CharT1> s1_sorted;
543 CachedRatio<CharT1> cached_ratio_s1_sorted;
546template <
typename Sentence1>
547explicit CachedTokenRatio(
const Sentence1& s1) -> CachedTokenRatio<char_type<Sentence1>>;
549template <
typename InputIt1>
550CachedTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedTokenRatio<iter_value_t<InputIt1>>;
572template <
typename Sentence1,
typename Sentence2>
573double partial_token_ratio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
575template <
typename InputIt1,
typename InputIt2>
576double partial_token_ratio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
577 double score_cutoff = 0);
580template <
typename CharT1>
581struct CachedPartialTokenRatio {
582 template <
typename InputIt1>
583 CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1)
585 tokens_s1(detail::sorted_split(std::begin(s1), std::end(s1))),
586 s1_sorted(tokens_s1.join())
589 template <
typename Sentence1>
590 explicit CachedPartialTokenRatio(
const Sentence1& s1_)
591 : CachedPartialTokenRatio(detail::to_begin(s1_), detail::to_end(s1_))
594 template <
typename InputIt2>
595 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
596 double score_hint = 0.0)
const;
598 template <
typename Sentence2>
599 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
602 std::vector<CharT1> s1;
603 detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> tokens_s1;
604 std::vector<CharT1> s1_sorted;
607template <
typename Sentence1>
608explicit CachedPartialTokenRatio(
const Sentence1& s1) -> CachedPartialTokenRatio<char_type<Sentence1>>;
610template <
typename InputIt1>
611CachedPartialTokenRatio(InputIt1 first1, InputIt1 last1) -> CachedPartialTokenRatio<iter_value_t<InputIt1>>;
634template <
typename Sentence1,
typename Sentence2>
635double WRatio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
637template <
typename InputIt1,
typename InputIt2>
638double WRatio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
641template <
typename CharT1>
643 template <
typename InputIt1>
644 explicit CachedWRatio(InputIt1 first1, InputIt1 last1);
646 template <
typename Sentence1>
647 CachedWRatio(
const Sentence1& s1_) : CachedWRatio(detail::to_begin(s1_), detail::to_end(s1_))
650 template <
typename InputIt2>
651 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
652 double score_hint = 0.0)
const;
654 template <
typename Sentence2>
655 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
660 std::vector<CharT1> s1;
661 CachedPartialRatio<CharT1> cached_partial_ratio;
662 detail::SplittedSentenceView<typename std::vector<CharT1>::iterator> tokens_s1;
663 std::vector<CharT1> s1_sorted;
664 rapidfuzz::detail::BlockPatternMatchVector blockmap_s1_sorted;
667template <
typename Sentence1>
668explicit CachedWRatio(
const Sentence1& s1) -> CachedWRatio<char_type<Sentence1>>;
670template <
typename InputIt1>
671CachedWRatio(InputIt1 first1, InputIt1 last1) -> CachedWRatio<iter_value_t<InputIt1>>;
694template <
typename Sentence1,
typename Sentence2>
695double QRatio(
const Sentence1& s1,
const Sentence2& s2,
double score_cutoff = 0);
697template <
typename InputIt1,
typename InputIt2>
698double QRatio(InputIt1 first1, InputIt1 last1, InputIt2 first2, InputIt2 last2,
double score_cutoff = 0);
701namespace experimental {
705 MultiQRatio(
size_t count) : scorer(count)
708 size_t result_count()
const
710 return scorer.result_count();
713 template <
typename Sentence1>
714 void insert(
const Sentence1& s1_)
716 insert(detail::to_begin(s1_), detail::to_end(s1_));
719 template <
typename InputIt1>
720 void insert(InputIt1 first1, InputIt1 last1)
722 scorer.insert(first1, last1);
723 str_lens.push_back(
static_cast<size_t>(std::distance(first1, last1)));
726 template <
typename InputIt2>
727 void similarity(
double* scores,
size_t score_count, InputIt2 first2, InputIt2 last2,
728 double score_cutoff = 0.0)
const
730 similarity(scores, score_count, detail::Range(first2, last2), score_cutoff);
733 template <
typename Sentence2>
734 void similarity(
double* scores,
size_t score_count,
const Sentence2& s2,
double score_cutoff = 0)
const
736 rapidfuzz::detail::Range s2_(s2);
738 for (
size_t i = 0; i < str_lens.size(); ++i)
744 scorer.similarity(scores, score_count, s2, score_cutoff);
746 for (
size_t i = 0; i < str_lens.size(); ++i)
747 if (str_lens[i] == 0) scores[i] = 0;
751 std::vector<size_t> str_lens;
752 MultiRatio<MaxLen> scorer;
757template <
typename CharT1>
759 template <
typename InputIt1>
760 CachedQRatio(InputIt1 first1, InputIt1 last1) : s1(first1, last1), cached_ratio(first1, last1)
763 template <
typename Sentence1>
764 explicit CachedQRatio(
const Sentence1& s1_) : CachedQRatio(detail::to_begin(s1_), detail::to_end(s1_))
767 template <
typename InputIt2>
768 double similarity(InputIt2 first2, InputIt2 last2,
double score_cutoff = 0.0,
769 double score_hint = 0.0)
const;
771 template <
typename Sentence2>
772 double similarity(
const Sentence2& s2,
double score_cutoff = 0.0,
double score_hint = 0.0)
const;
775 std::vector<CharT1> s1;
776 CachedRatio<CharT1> cached_ratio;
779template <
typename Sentence1>
780explicit CachedQRatio(
const Sentence1& s1) -> CachedQRatio<char_type<Sentence1>>;
782template <
typename InputIt1>
783CachedQRatio(InputIt1 first1, InputIt1 last1) -> CachedQRatio<iter_value_t<InputIt1>>;
789#include <rapidfuzz/fuzz_impl.hpp>
double WRatio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
Calculates a weighted ratio based on the other ratio algorithms.
Definition fuzz_impl.hpp:826
double ratio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
calculates a simple ratio between two strings
Definition fuzz_impl.hpp:27
double QRatio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
Calculates a quick ratio between two strings using fuzz.ratio.
Definition fuzz_impl.hpp:908
double partial_token_set_ratio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
Compares the words in the strings based on unique and common words between them using fuzz::partial_r...
Definition fuzz_impl.hpp:490
double token_ratio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
Helper method that returns the maximum of fuzz::token_set_ratio and fuzz::token_sort_ratio (faster th...
Definition fuzz_impl.hpp:567
double partial_token_ratio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
Helper method that returns the maximum of fuzz::partial_token_set_ratio and fuzz::partial_token_sort_...
Definition fuzz_impl.hpp:733
double token_set_ratio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
Compares the words in the strings based on unique and common words between them using fuzz::ratio.
Definition fuzz_impl.hpp:431
double partial_token_sort_ratio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
Sorts the words in the strings and calculates the fuzz::partial_ratio between them.
Definition fuzz_impl.hpp:342
double token_sort_ratio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
Sorts the words in the strings and calculates the fuzz::ratio between them.
Definition fuzz_impl.hpp:303
double partial_ratio(const Sentence1 &s1, const Sentence2 &s2, double score_cutoff=0)
calculates the fuzz::ratio of the optimal string alignment
Definition fuzz_impl.hpp:240