From f8ae6710e870fd7a2f4210b85ec0d509fe350e28 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 7 Sep 2023 22:14:32 +0200 Subject: [PATCH] use unicode-aware tokenizer separator --- scripts/make-search-index.js | 4 +++- templates/searchresults.html | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/make-search-index.js b/scripts/make-search-index.js index 1acc7cd..f923b54 100644 --- a/scripts/make-search-index.js +++ b/scripts/make-search-index.js @@ -26,7 +26,7 @@ * yarnpkg add lunr * * Usage: - * site ....some args.... --search-data-output tmp/search-raw.json + * reploy ....some args.... --search-data-output tmp/search-raw.json * node scripts/make-search-index.js tmp/search-raw.json _site/search-index.json _site/search-metadata.json */ @@ -40,6 +40,8 @@ if(process.argv.length !== 5) { documents = JSON.parse(fs.readFileSync(process.argv[2], {encoding: 'utf8'})) +lunr.tokenizer.separator = /(\p{P}|\p{S}|\p{Z}|\p{C})+/u + var idx = lunr(function () { this.ref('link') this.field('title', {boost: 9}) diff --git a/templates/searchresults.html b/templates/searchresults.html index 5044d0b..5ced869 100644 --- a/templates/searchresults.html +++ b/templates/searchresults.html @@ -31,6 +31,7 @@