export search data, add indexing script

This commit is contained in:
Mirek Kratochvil 2023-06-17 20:40:11 +02:00
parent 0f97b7a64f
commit 32a49d3179
4 changed files with 76 additions and 0 deletions

View file

@ -21,3 +21,7 @@ htmlWriteOpts =
, writerHighlightStyle = Just pygments , writerHighlightStyle = Just pygments
, writerWrapText = WrapPreserve , writerWrapText = WrapPreserve
} }
-- | Default plaintext writing options for Pandoc.
plainWriteOpts :: WriterOptions
plainWriteOpts = def {writerWrapText = WrapNone}

View file

@ -44,6 +44,7 @@ data SiteState =
-- the template directory) -- the template directory)
, _templates :: M.Map FilePath Mu.Template , _templates :: M.Map FilePath Mu.Template
, _outputDir :: FilePath -- ^ Directory for output , _outputDir :: FilePath -- ^ Directory for output
, _searchDataOut :: Maybe FilePath -- ^ File to write the searchable versions of pages to (as JSON)
, _assetDir :: FilePath -- ^ Directory for output , _assetDir :: FilePath -- ^ Directory for output
, _sourceDirs :: [FilePath] -- ^ Path to page source data , _sourceDirs :: [FilePath] -- ^ Path to page source data
, _templateDir :: FilePath -- ^ Path to template directory , _templateDir :: FilePath -- ^ Path to template directory
@ -68,6 +69,12 @@ siteOptions' = do
long "output" <> long "output" <>
short 'd' <> short 'd' <>
help "Directory to render the site to" <> value "_site" <> showDefault help "Directory to render the site to" <> value "_site" <> showDefault
_searchDataOut <-
Just <$>
(strOption $
long "search-data-output" <>
help "Output JSON with searchable page data to this file") <|>
pure Nothing
_assetDir <- _assetDir <-
strOption $ strOption $
long "assets" <> long "assets" <>

39
make-search-index.js Normal file
View file

@ -0,0 +1,39 @@
/*
* make-search-index.js
*
* This converts a "search data" file produced by the haskell site builder into
* a lunr.js index and saves it in JSON. Metadata for search (currently titles)
* are stored separately in an extra file.
*
* Installing dependencies:
* yarnpkg add lunr
*
* Usage:
* site ....some args.... --search-data-output search-raw.json
* node make-search-index.js search-raw.json search-index.json search-meta.json
*/
lunr = require("lunr")
fs = require("fs")
if(process.argv.length !== 5) {
console.error('Needs exactly 3 arguments (input json, output index).');
process.exit(1);
}
documents = JSON.parse(fs.readFileSync(process.argv[2], {encoding: 'utf8'}))
var idx = lunr(function () {
this.ref('link')
this.field('title', {boost: 10})
this.field('text')
documents.forEach(function (doc) {
this.add(doc)
}, this)
})
fs.writeFileSync(process.argv[3], JSON.stringify(idx), {encoding: 'utf8'})
fs.writeFileSync(process.argv[4], JSON.stringify(
Object.fromEntries(documents.map(x => [x.link, x.title]))
), {encoding: 'utf8'})

26
site.hs
View file

@ -6,6 +6,7 @@ module Main where
import Control.Monad ((>=>), unless, when) import Control.Monad ((>=>), unless, when)
import Control.Monad.Extra (whenM) import Control.Monad.Extra (whenM)
import Control.Monad.Trans.State.Lazy import Control.Monad.Trans.State.Lazy
import qualified Data.Aeson as AE
import qualified Data.Aeson.KeyMap as KM import qualified Data.Aeson.KeyMap as KM
import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Lazy as B
import Data.Digest.Pure.SHA (sha256, showDigest) import Data.Digest.Pure.SHA (sha256, showDigest)
@ -35,6 +36,7 @@ import System.FilePath
import qualified Text.Mustache as Mu import qualified Text.Mustache as Mu
import Text.Pandoc.Class (runIOorExplode) import Text.Pandoc.Class (runIOorExplode)
import Text.Pandoc.Readers.Markdown (readMarkdown) import Text.Pandoc.Readers.Markdown (readMarkdown)
import Text.Pandoc.Writers (writePlain)
import Text.Pandoc.Writers.HTML (writeHtml5String) import Text.Pandoc.Writers.HTML (writeHtml5String)
import qualified Text.Parsec.Error import qualified Text.Parsec.Error
@ -343,6 +345,29 @@ renderTag tag = do
renderTags :: Site () renderTags :: Site ()
renderTags = use (htags . to M.keys) >>= traverse_ renderTag renderTags = use (htags . to M.keys) >>= traverse_ renderTag
-- | Transform one mounted PageInfo to the base search data
mkSearchData :: FilePath -> PageInfo -> Site Y.Value
mkSearchData mount pi = do
text <- io . runIOorExplode $ writePlain plainWriteOpts (pi ^. pageDoc)
let title = pi ^? pageMeta . key "title" . _String
link <- rootUrl mount
pure $
Y.object
[ ("link", fromString link)
, ("title", maybe (fromString mount) Y.String title)
, ("text", Y.String text)
]
-- | Collect all pages' search data to the file
renderSearchData :: Site ()
renderSearchData = use searchDataOut >>= traverse_ go
where
go out = do
ps <- use (pages . to M.assocs) >>= traverse (uncurry mkSearchData)
io $ do
putStrLn $ "S -> " ++ out
AE.encodeFile out $ Y.array ps
-- | Build the whole site. -- | Build the whole site.
main = do main = do
init <- Options.Applicative.execParser siteOptions init <- Options.Applicative.execParser siteOptions
@ -353,5 +378,6 @@ main = do
use templateDir >>= sourceTemplates use templateDir >>= sourceTemplates
use pages >>= traverse (uncurry installPage) . M.assocs use pages >>= traverse (uncurry installPage) . M.assocs
renderTags renderTags
renderSearchData
io $ putStrLn "OK" io $ putStrLn "OK"
whenM (use dumpFinalState) $ get >>= io . print whenM (use dumpFinalState) $ get >>= io . print