aboutsummaryrefslogtreecommitdiff
path: root/Toks.hs
diff options
context:
space:
mode:
authorMirek Kratochvil <exa.exa@gmail.com>2025-07-14 10:33:22 +0200
committerMirek Kratochvil <exa.exa@gmail.com>2025-07-14 10:33:22 +0200
commit79977cdf4b9a2fcac4c47b458cccca101686da63 (patch)
tree14f7631693b1c65e7607b85d6f9324afa9dbabd8 /Toks.hs
parent396e5cff54d23a035aa3b7c199ee609b7f7bda65 (diff)
downloadwerge-79977cdf4b9a2fcac4c47b458cccca101686da63.tar.gz
werge-79977cdf4b9a2fcac4c47b458cccca101686da63.tar.bz2
clean up, support external tokenizers
Diffstat (limited to 'Toks.hs')
-rw-r--r--Toks.hs29
1 files changed, 23 insertions, 6 deletions
diff --git a/Toks.hs b/Toks.hs
index 4b110c2..29fa381 100644
--- a/Toks.hs
+++ b/Toks.hs
@@ -26,11 +26,28 @@ unmarkSpace x = error "unwat"
space ('.':_) = True
space _ = False
-split =
- unlines
- . map (concatMap escape . markSpace)
- . groupBy ((==) `on` generalCategory)
+joinSpaces [] = []
+joinSpaces (a@('.':as):xs) =
+ case joinSpaces xs of
+ (('.':bs):xs') -> ('.' : (as ++ bs)) : xs'
+ xs' -> a : xs'
+joinSpaces (x:xs) = x : joinSpaces xs
-glueToks = concatMap (unmarkSpace . unescape)
+splitCategory = make . groupBy ((==) `on` generalCategory)
-glue = glueToks . lines
+simpleCategory c
+ | isSpace c = 0
+ | isAlpha c = 1
+ | isNumber c = 2
+ | otherwise = 3
+
+splitSimple = make . groupBy ((==) `on` simpleCategory)
+
+make = map (concatMap escape . markSpace)
+
+glue :: [String] -> String
+glue = concatMap (unmarkSpace . unescape)
+
+fromFile = lines
+
+toFile = unlines