From 79977cdf4b9a2fcac4c47b458cccca101686da63 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Mon, 14 Jul 2025 10:33:22 +0200 Subject: clean up, support external tokenizers --- Toks.hs | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'Toks.hs') diff --git a/Toks.hs b/Toks.hs index 4b110c2..29fa381 100644 --- a/Toks.hs +++ b/Toks.hs @@ -26,11 +26,28 @@ unmarkSpace x = error "unwat" space ('.':_) = True space _ = False -split = - unlines - . map (concatMap escape . markSpace) - . groupBy ((==) `on` generalCategory) +joinSpaces [] = [] +joinSpaces (a@('.':as):xs) = + case joinSpaces xs of + (('.':bs):xs') -> ('.' : (as ++ bs)) : xs' + xs' -> a : xs' +joinSpaces (x:xs) = x : joinSpaces xs -glueToks = concatMap (unmarkSpace . unescape) +splitCategory = make . groupBy ((==) `on` generalCategory) -glue = glueToks . lines +simpleCategory c + | isSpace c = 0 + | isAlpha c = 1 + | isNumber c = 2 + | otherwise = 3 + +splitSimple = make . groupBy ((==) `on` simpleCategory) + +make = map (concatMap escape . markSpace) + +glue :: [String] -> String +glue = concatMap (unmarkSpace . unescape) + +fromFile = lines + +toFile = unlines -- cgit v1.2.3