use unicode-aware tokenizer separator

This commit is contained in:
Mirek Kratochvil 2023-09-07 22:14:32 +02:00
parent 6666730fa0
commit f8ae6710e8
2 changed files with 4 additions and 1 deletions

View file

@ -26,7 +26,7 @@
* yarnpkg add lunr
*
* Usage:
* site ....some args.... --search-data-output tmp/search-raw.json
* reploy ....some args.... --search-data-output tmp/search-raw.json
* node scripts/make-search-index.js tmp/search-raw.json _site/search-index.json _site/search-metadata.json
*/
@ -40,6 +40,8 @@ if(process.argv.length !== 5) {
documents = JSON.parse(fs.readFileSync(process.argv[2], {encoding: 'utf8'}))
lunr.tokenizer.separator = /(\p{P}|\p{S}|\p{Z}|\p{C})+/u
var idx = lunr(function () {
this.ref('link')
this.field('title', {boost: 9})

View file

@ -31,6 +31,7 @@
<script src="{{root}}static/lunr.min.js"></script>
<script>
lunr.QueryLexer.termSeparator = /(\p{P}|\p{S}|\p{Z}|\p{C})+/u;
var el_query = document.getElementById('search_query');
var el_placeholder = document.getElementById('search_placeholder');
var el_noquery = document.getElementById('search_noquery');