From 32a49d3179a969604410ff7507af939c77045b4f Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sat, 17 Jun 2023 20:40:11 +0200 Subject: [PATCH] export search data, add indexing script --- FormatOpts.hs | 4 ++++ Types.hs | 7 +++++++ make-search-index.js | 39 +++++++++++++++++++++++++++++++++++++++ site.hs | 26 ++++++++++++++++++++++++++ 4 files changed, 76 insertions(+) create mode 100644 make-search-index.js diff --git a/FormatOpts.hs b/FormatOpts.hs index 363f6cd..40d02e8 100644 --- a/FormatOpts.hs +++ b/FormatOpts.hs @@ -21,3 +21,7 @@ htmlWriteOpts = , writerHighlightStyle = Just pygments , writerWrapText = WrapPreserve } + +-- | Default plaintext writing options for Pandoc. +plainWriteOpts :: WriterOptions +plainWriteOpts = def {writerWrapText = WrapNone} diff --git a/Types.hs b/Types.hs index 12977d8..e6455d5 100644 --- a/Types.hs +++ b/Types.hs @@ -44,6 +44,7 @@ data SiteState = -- the template directory) , _templates :: M.Map FilePath Mu.Template , _outputDir :: FilePath -- ^ Directory for output + , _searchDataOut :: Maybe FilePath -- ^ File to write the searchable versions of pages to (as JSON) , _assetDir :: FilePath -- ^ Directory for output , _sourceDirs :: [FilePath] -- ^ Path to page source data , _templateDir :: FilePath -- ^ Path to template directory @@ -68,6 +69,12 @@ siteOptions' = do long "output" <> short 'd' <> help "Directory to render the site to" <> value "_site" <> showDefault + _searchDataOut <- + Just <$> + (strOption $ + long "search-data-output" <> + help "Output JSON with searchable page data to this file") <|> + pure Nothing _assetDir <- strOption $ long "assets" <> diff --git a/make-search-index.js b/make-search-index.js new file mode 100644 index 0000000..bb92b68 --- /dev/null +++ b/make-search-index.js @@ -0,0 +1,39 @@ + +/* + * make-search-index.js + * + * This converts a "search data" file produced by the haskell site builder into + * a lunr.js index and saves it in JSON. Metadata for search (currently titles) + * are stored separately in an extra file. + * + * Installing dependencies: + * yarnpkg add lunr + * + * Usage: + * site ....some args.... --search-data-output search-raw.json + * node make-search-index.js search-raw.json search-index.json search-meta.json + */ + +lunr = require("lunr") +fs = require("fs") + +if(process.argv.length !== 5) { + console.error('Needs exactly 3 arguments (input json, output index).'); + process.exit(1); +} + +documents = JSON.parse(fs.readFileSync(process.argv[2], {encoding: 'utf8'})) + +var idx = lunr(function () { + this.ref('link') + this.field('title', {boost: 10}) + this.field('text') + documents.forEach(function (doc) { + this.add(doc) + }, this) +}) + +fs.writeFileSync(process.argv[3], JSON.stringify(idx), {encoding: 'utf8'}) +fs.writeFileSync(process.argv[4], JSON.stringify( + Object.fromEntries(documents.map(x => [x.link, x.title])) + ), {encoding: 'utf8'}) diff --git a/site.hs b/site.hs index e634f05..918fdff 100644 --- a/site.hs +++ b/site.hs @@ -6,6 +6,7 @@ module Main where import Control.Monad ((>=>), unless, when) import Control.Monad.Extra (whenM) import Control.Monad.Trans.State.Lazy +import qualified Data.Aeson as AE import qualified Data.Aeson.KeyMap as KM import qualified Data.ByteString.Lazy as B import Data.Digest.Pure.SHA (sha256, showDigest) @@ -35,6 +36,7 @@ import System.FilePath import qualified Text.Mustache as Mu import Text.Pandoc.Class (runIOorExplode) import Text.Pandoc.Readers.Markdown (readMarkdown) +import Text.Pandoc.Writers (writePlain) import Text.Pandoc.Writers.HTML (writeHtml5String) import qualified Text.Parsec.Error @@ -343,6 +345,29 @@ renderTag tag = do renderTags :: Site () renderTags = use (htags . to M.keys) >>= traverse_ renderTag +-- | Transform one mounted PageInfo to the base search data +mkSearchData :: FilePath -> PageInfo -> Site Y.Value +mkSearchData mount pi = do + text <- io . runIOorExplode $ writePlain plainWriteOpts (pi ^. pageDoc) + let title = pi ^? pageMeta . key "title" . _String + link <- rootUrl mount + pure $ + Y.object + [ ("link", fromString link) + , ("title", maybe (fromString mount) Y.String title) + , ("text", Y.String text) + ] + +-- | Collect all pages' search data to the file +renderSearchData :: Site () +renderSearchData = use searchDataOut >>= traverse_ go + where + go out = do + ps <- use (pages . to M.assocs) >>= traverse (uncurry mkSearchData) + io $ do + putStrLn $ "S -> " ++ out + AE.encodeFile out $ Y.array ps + -- | Build the whole site. main = do init <- Options.Applicative.execParser siteOptions @@ -353,5 +378,6 @@ main = do use templateDir >>= sourceTemplates use pages >>= traverse (uncurry installPage) . M.assocs renderTags + renderSearchData io $ putStrLn "OK" whenM (use dumpFinalState) $ get >>= io . print