From 79977cdf4b9a2fcac4c47b458cccca101686da63 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Mon, 14 Jul 2025 10:33:22 +0200 Subject: [PATCH] clean up, support external tokenizers --- Main.hs | 89 +++++++++++++++++++++++++++++++++++++------------------ Opts.hs | 14 ++++----- README.md | 5 ++++ Toks.hs | 29 ++++++++++++++---- 4 files changed, 95 insertions(+), 42 deletions(-) diff --git a/Main.hs b/Main.hs index 9fbf0cc..0c4632c 100644 --- a/Main.hs +++ b/Main.hs @@ -5,27 +5,36 @@ module Main where import Control.Exception import Control.Monad import Data.Bool -import Data.Char import Data.Foldable import Data.List +import Data.Maybe import Data.Traversable import Opts +import System.Environment import System.Exit import System.FilePath import System.IO import System.IO.Temp import System.Process + import qualified Toks -import Debug.Trace +{- + - interface to other programs + -} +diffProg = fromMaybe "diff" <$> lookupEnv "WERGE_DIFF" + +gitProg = fromMaybe "git" <$> lookupEnv "WERGE_GIT" + +bracketFile path mode = bracket (openFile path mode) hClose --- TODO: the diff w rundiff f1 f2 out = do + diff <- diffProg st <- - withFile out WriteMode $ \oh -> + bracketFile out WriteMode $ \oh -> withCreateProcess (proc - "diff" -- TODO: from WERGE_DIFF env + diff [ "--text" , "--new-line-format=+%L" , "--old-line-format=-%L" @@ -33,27 +42,28 @@ rundiff f1 f2 out = do , f1 , f2 ]) - {std_in = NoStream, std_out = UseHandle oh, std_err = Inherit} $ \_ _ _ -> - waitForProcess + {std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess when (st == ExitFailure 2) $ error "diff failed" unless (st `elem` [ExitSuccess, ExitFailure 1]) $ error "diff failed for unknown reason (is GNU diffutils installed?)" gitRepoRelRoot = do + git <- gitProg (path, st) <- withCreateProcess - (proc "git" ["rev-parse", "--show-cdup"]) - {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p -> + (proc git ["rev-parse", "--show-cdup"]) + {std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p -> (,) <$> hGetContents' oh <*> waitForProcess p unless (st == ExitSuccess) $ error "git failed" let [p] = lines path pure p gitUnmerged = do + git <- gitProg (paths, st) <- withCreateProcess - (proc "git" ["status", "--porcelain=v1"]) - {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p -> + (proc git ["status", "--porcelain=v1"]) + {std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p -> (,) <$> (map (drop 3) . filter ("UU " `isPrefixOf`) . lines <$> hGetContents' oh) @@ -61,11 +71,12 @@ gitUnmerged = do unless (st == ExitSuccess) $ error "git failed" pure paths -gitCheckoutMOY u my old your = do +gitCheckoutMOY cfg u my old your = do + git <- gitProg (paths, st) <- withCreateProcess - (proc "git" ["ls-files", "--unmerged", "--", u]) - {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p -> + (proc git ["ls-files", "--unmerged", "--", u]) + {std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p -> (,) <$> (sortOn snd . map ((\[a, b] -> (a, b)) . take 2 . drop 1 . words) @@ -77,8 +88,8 @@ gitCheckoutMOY u my old your = do st <- withCreateProcess (proc "git" ["cat-file", "blob", hash]) - {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just ho) _ p -> do - hGetContents ho >>= writeFile path . Toks.split -- TODO cfg + {std_in = NoStream, std_out = CreatePipe} $ \_ (Just ho) _ p -> do + hSplitToFile cfg ho path waitForProcess p unless (st == ExitSuccess) . error $ "failed checking out " ++ u ++ " from blob " ++ hash @@ -88,10 +99,30 @@ gitCheckoutMOY u my old your = do _ -> error $ "bad data from ls-files for unmerged " ++ u gitAdd path = do - traceM $ "adding " ++ path - st <- rawSystem "git" ["add", "--", path] + git <- gitProg + st <- rawSystem git ["add", "--", path] unless (st == ExitSuccess) $ error "git-add failed" +{- + - configurable splitting + -} +hSplitToFile cfg h path = + case cfgTokenizer cfg of + TokenizeCharCategory -> internal Toks.splitCategory + TokenizeCharCategorySimple -> internal Toks.splitSimple + TokenizeFilter cmd -> do + st <- + bracketFile path WriteMode $ \ho -> + withCreateProcess + (shell cmd) {std_in = UseHandle h, std_out = UseHandle ho} $ \_ _ _ -> + waitForProcess + unless (st == ExitSuccess) $ error "tokenize filter failed" + where + internal s = hGetContents h >>= writeFile path . Toks.toFile . s + +{- + - merge algorithms + -} data Op = Del | Keep @@ -100,10 +131,10 @@ data Op pdiff path = map go . lines <$> readFile path where - go [] = error "empty line from diff" go ('-':s) = (Del, s) go (' ':s) = (Keep, s) go ('+':s) = (Add, s) + go [] = error "unexpected output from diff" data Merged = Ok [String] @@ -204,29 +235,31 @@ resolveSpaces _ x = x merge cfg@Config {..} ms ys = regroup . map (resolve cfg) - . traceShowId . regroup . bool id (concatMap zeal) cfgZealous . expand cfgContext . regroup $ align ms ys +{- + - front-end + -} format :: Config -> Handle -> [Merged] -> IO Bool format Config {..} h = go False where go c [] = pure c go c (Ok x:xs) = do - hPutStr h (Toks.glueToks x) + hPutStr h (Toks.glue x) go c xs go c (Conflict m o y:xs) = do hPutStr h $ mconcat [ cfgLabelStart - , Toks.glueToks m + , Toks.glue m , cfgLabelMyOld - , Toks.glueToks o + , Toks.glue o , cfgLabelOldYour - , Toks.glueToks y + , Toks.glue y , cfgLabelEnd ] go True xs @@ -236,7 +269,7 @@ runCmd CmdDiff3 {..} cfg = let [fMy, fOld, fYour, fdMy, fdYour] = map (workdir ) ["my", "old", "your", "mydiff", "yourdiff"] for_ [(d3my, fMy), (d3old, fOld), (d3your, fYour)] $ \(path, tmp) -> - readFile path >>= writeFile tmp . Toks.split -- TODO cfg + bracketFile path ReadMode $ \h -> hSplitToFile cfg h tmp rundiff fOld fMy fdMy rundiff fOld fYour fdYour conflicted <- @@ -255,15 +288,13 @@ runCmd CmdGitMerge {..} cfg = do withSystemTempDirectory "werge-git" $ \workdir -> do let [fMy, fOld, fYour, fdMy, fdYour] = map (workdir ) ["my", "old", "your", "mydiff", "yourdiff"] - gitCheckoutMOY u fMy fOld fYour + gitCheckoutMOY cfg u fMy fOld fYour rundiff fOld fMy fdMy rundiff fOld fYour fdYour readFile u >>= writeFile (u ++ ".werge-backup") conflict <- - withFile u WriteMode $ \h -> + bracketFile u WriteMode $ \h -> merge cfg <$> pdiff fdMy <*> pdiff fdYour >>= format cfg h - traceShowM conflict - traceShowM gmDoAdd unless conflict $ when gmDoAdd $ gitAdd u pure conflict if or conflicts diff --git a/Opts.hs b/Opts.hs index 761bda0..bc81aad 100644 --- a/Opts.hs +++ b/Opts.hs @@ -9,32 +9,32 @@ import Options.Applicative import Paths_werge (version) data Tokenizer - = TokenizerFilter String - | TokenizeCharClass - | TokenizeCharClassSimple + = TokenizeFilter String + | TokenizeCharCategory + | TokenizeCharCategorySimple deriving (Show) tokenizer = asum - [ TokenizerFilter + [ TokenizeFilter <$> strOption (long "tok-filter" <> short 'F' <> metavar "FILTER" <> help "external program to separate the text to tokens") , flag' - TokenizeCharClassSimple + TokenizeCharCategorySimple (long "simple-tokens" <> short 'i' <> help "use wider character class to separate the tokens (results in larger tokens and ignores case)") , flag' - TokenizeCharClass + TokenizeCharCategory (long "full-tokens" <> short 'I' <> help "separate characters by all known character classes (default)") - , pure TokenizeCharClass + , pure TokenizeCharCategory ] data Spaces diff --git a/README.md b/README.md index caec0e4..9de6a10 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,11 @@ Better docs is WIP cabal install ``` +Running of `werge` requires a working installation of `diff` compatible +with the one from [GNU diffutils](https://www.gnu.org/software/diffutils/). You +may set up a path to such `diff` (or a wrapper script) via environment variable +`WERGE_DIFF`. + ## Help & features ``` diff --git a/Toks.hs b/Toks.hs index 4b110c2..29fa381 100644 --- a/Toks.hs +++ b/Toks.hs @@ -26,11 +26,28 @@ unmarkSpace x = error "unwat" space ('.':_) = True space _ = False -split = - unlines - . map (concatMap escape . markSpace) - . groupBy ((==) `on` generalCategory) +joinSpaces [] = [] +joinSpaces (a@('.':as):xs) = + case joinSpaces xs of + (('.':bs):xs') -> ('.' : (as ++ bs)) : xs' + xs' -> a : xs' +joinSpaces (x:xs) = x : joinSpaces xs -glueToks = concatMap (unmarkSpace . unescape) +splitCategory = make . groupBy ((==) `on` generalCategory) -glue = glueToks . lines +simpleCategory c + | isSpace c = 0 + | isAlpha c = 1 + | isNumber c = 2 + | otherwise = 3 + +splitSimple = make . groupBy ((==) `on` simpleCategory) + +make = map (concatMap escape . markSpace) + +glue :: [String] -> String +glue = concatMap (unmarkSpace . unescape) + +fromFile = lines + +toFile = unlines