From f8ae6710e870fd7a2f4210b85ec0d509fe350e28 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Thu, 7 Sep 2023 22:14:32 +0200 Subject: use unicode-aware tokenizer separator --- scripts/make-search-index.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/make-search-index.js b/scripts/make-search-index.js index 1acc7cd..f923b54 100644 --- a/scripts/make-search-index.js +++ b/scripts/make-search-index.js @@ -26,7 +26,7 @@ * yarnpkg add lunr * * Usage: - * site ....some args.... --search-data-output tmp/search-raw.json + * reploy ....some args.... --search-data-output tmp/search-raw.json * node scripts/make-search-index.js tmp/search-raw.json _site/search-index.json _site/search-metadata.json */ @@ -40,6 +40,8 @@ if(process.argv.length !== 5) { documents = JSON.parse(fs.readFileSync(process.argv[2], {encoding: 'utf8'})) +lunr.tokenizer.separator = /(\p{P}|\p{S}|\p{Z}|\p{C})+/u + var idx = lunr(function () { this.ref('link') this.field('title', {boost: 9}) -- cgit v1.2.3