diff options
| author | Mirek Kratochvil <exa.exa@gmail.com> | 2025-07-14 10:33:22 +0200 |
|---|---|---|
| committer | Mirek Kratochvil <exa.exa@gmail.com> | 2025-07-14 10:33:22 +0200 |
| commit | 79977cdf4b9a2fcac4c47b458cccca101686da63 (patch) | |
| tree | 14f7631693b1c65e7607b85d6f9324afa9dbabd8 /Opts.hs | |
| parent | 396e5cff54d23a035aa3b7c199ee609b7f7bda65 (diff) | |
| download | werge-79977cdf4b9a2fcac4c47b458cccca101686da63.tar.gz werge-79977cdf4b9a2fcac4c47b458cccca101686da63.tar.bz2 | |
clean up, support external tokenizers
Diffstat (limited to 'Opts.hs')
| -rw-r--r-- | Opts.hs | 14 |
1 files changed, 7 insertions, 7 deletions
@@ -9,32 +9,32 @@ import Options.Applicative import Paths_werge (version) data Tokenizer - = TokenizerFilter String - | TokenizeCharClass - | TokenizeCharClassSimple + = TokenizeFilter String + | TokenizeCharCategory + | TokenizeCharCategorySimple deriving (Show) tokenizer = asum - [ TokenizerFilter + [ TokenizeFilter <$> strOption (long "tok-filter" <> short 'F' <> metavar "FILTER" <> help "external program to separate the text to tokens") , flag' - TokenizeCharClassSimple + TokenizeCharCategorySimple (long "simple-tokens" <> short 'i' <> help "use wider character class to separate the tokens (results in larger tokens and ignores case)") , flag' - TokenizeCharClass + TokenizeCharCategory (long "full-tokens" <> short 'I' <> help "separate characters by all known character classes (default)") - , pure TokenizeCharClass + , pure TokenizeCharCategory ] data Spaces |
