From 50ff94eb8f4d92f7c409996a70caaf26125bddd0 Mon Sep 17 00:00:00 2001 From: Berthold Stoeger Date: Sat, 18 Jun 2022 15:09:45 -0400 Subject: [PATCH] filter: normalize text of fulltext search to base letters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The liter symbol is written as 'ℓ'. To allow searching for that, normalize unicode strings to their base letter. This corresponds to the 'compatibility' mode. We might also think about stripping diacritics. Signed-off-by: Berthold Stoeger --- CHANGELOG.md | 1 + core/fulltext.cpp | 14 ++++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dce5c95ed..25a41e620 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,4 @@ +- filter: normalize unicode code points to base letter (for searching ℓ, etc.) - core: when modifying cylinders across multiple dives, match cylinder number before comparing type - core: merge all properties in a dive, including current, waveheight, etc - core: prevent crash when merging dives without cylinders (as we might get when importing from divelogs.de) diff --git a/core/fulltext.cpp b/core/fulltext.cpp index d8dfd9448..9f7a78a54 100644 --- a/core/fulltext.cpp +++ b/core/fulltext.cpp @@ -85,8 +85,9 @@ bool fulltext_dive_matches(const struct dive *d, const FullTextQuery &q, StringF // Class implementation -// Take a text and tokenize it into words. Normalize the words to upper case -// and add to a given list, if not already in list. +// Take a text and tokenize it into words. Normalize the words to the base +// upper case base character (e.g. 'ℓ' to 'L') and add to a given list, +// if not already in list. // We might think about limiting the lower size of words we store. // Note: we convert to QString before tokenization because we rely in // Qt's isPunct() function. @@ -107,7 +108,9 @@ static void tokenize(QString s, std::vector &res) int end = pos; while (end < size && !s[end].isSpace() && !s[end].isPunct()) ++end; - QString word = loc.toUpper(s.mid(pos, end - pos)); // Sad: Locale::toUpper can't use QStringRef - we have to copy the substring! + QString word = s.mid(pos, end - pos); + word = word.normalized(QString::NormalizationForm_KD); + word = loc.toUpper(word); pos = end; if (find(res.begin(), res.end(), word) == res.end()) @@ -157,11 +160,10 @@ void FullText::populate() void FullText::registerDive(struct dive *d) { - if (d->full_text) { + if (d->full_text) unregisterWords(d, d->full_text->words); - } else { + else d->full_text = new full_text_cache; - } d->full_text->words = getWords(d); registerWords(d, d->full_text->words); }