aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMirek Kratochvil <exa.exa@gmail.com>2025-07-14 10:33:22 +0200
committerMirek Kratochvil <exa.exa@gmail.com>2025-07-14 10:33:22 +0200
commit79977cdf4b9a2fcac4c47b458cccca101686da63 (patch)
tree14f7631693b1c65e7607b85d6f9324afa9dbabd8
parent396e5cff54d23a035aa3b7c199ee609b7f7bda65 (diff)
downloadwerge-79977cdf4b9a2fcac4c47b458cccca101686da63.tar.gz
werge-79977cdf4b9a2fcac4c47b458cccca101686da63.tar.bz2
clean up, support external tokenizers
-rw-r--r--Main.hs89
-rw-r--r--Opts.hs14
-rw-r--r--README.md5
-rw-r--r--Toks.hs29
4 files changed, 95 insertions, 42 deletions
diff --git a/Main.hs b/Main.hs
index 9fbf0cc..0c4632c 100644
--- a/Main.hs
+++ b/Main.hs
@@ -5,27 +5,36 @@ module Main where
import Control.Exception
import Control.Monad
import Data.Bool
-import Data.Char
import Data.Foldable
import Data.List
+import Data.Maybe
import Data.Traversable
import Opts
+import System.Environment
import System.Exit
import System.FilePath
import System.IO
import System.IO.Temp
import System.Process
+
import qualified Toks
-import Debug.Trace
+{-
+ - interface to other programs
+ -}
+diffProg = fromMaybe "diff" <$> lookupEnv "WERGE_DIFF"
+
+gitProg = fromMaybe "git" <$> lookupEnv "WERGE_GIT"
+
+bracketFile path mode = bracket (openFile path mode) hClose
--- TODO: the diff w
rundiff f1 f2 out = do
+ diff <- diffProg
st <-
- withFile out WriteMode $ \oh ->
+ bracketFile out WriteMode $ \oh ->
withCreateProcess
(proc
- "diff" -- TODO: from WERGE_DIFF env
+ diff
[ "--text"
, "--new-line-format=+%L"
, "--old-line-format=-%L"
@@ -33,27 +42,28 @@ rundiff f1 f2 out = do
, f1
, f2
])
- {std_in = NoStream, std_out = UseHandle oh, std_err = Inherit} $ \_ _ _ ->
- waitForProcess
+ {std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess
when (st == ExitFailure 2) $ error "diff failed"
unless (st `elem` [ExitSuccess, ExitFailure 1])
$ error "diff failed for unknown reason (is GNU diffutils installed?)"
gitRepoRelRoot = do
+ git <- gitProg
(path, st) <-
withCreateProcess
- (proc "git" ["rev-parse", "--show-cdup"])
- {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p ->
+ (proc git ["rev-parse", "--show-cdup"])
+ {std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,) <$> hGetContents' oh <*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
let [p] = lines path
pure p
gitUnmerged = do
+ git <- gitProg
(paths, st) <-
withCreateProcess
- (proc "git" ["status", "--porcelain=v1"])
- {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p ->
+ (proc git ["status", "--porcelain=v1"])
+ {std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (map (drop 3) . filter ("UU " `isPrefixOf`) . lines
<$> hGetContents' oh)
@@ -61,11 +71,12 @@ gitUnmerged = do
unless (st == ExitSuccess) $ error "git failed"
pure paths
-gitCheckoutMOY u my old your = do
+gitCheckoutMOY cfg u my old your = do
+ git <- gitProg
(paths, st) <-
withCreateProcess
- (proc "git" ["ls-files", "--unmerged", "--", u])
- {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p ->
+ (proc git ["ls-files", "--unmerged", "--", u])
+ {std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (sortOn snd
. map ((\[a, b] -> (a, b)) . take 2 . drop 1 . words)
@@ -77,8 +88,8 @@ gitCheckoutMOY u my old your = do
st <-
withCreateProcess
(proc "git" ["cat-file", "blob", hash])
- {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just ho) _ p -> do
- hGetContents ho >>= writeFile path . Toks.split -- TODO cfg
+ {std_in = NoStream, std_out = CreatePipe} $ \_ (Just ho) _ p -> do
+ hSplitToFile cfg ho path
waitForProcess p
unless (st == ExitSuccess) . error
$ "failed checking out " ++ u ++ " from blob " ++ hash
@@ -88,10 +99,30 @@ gitCheckoutMOY u my old your = do
_ -> error $ "bad data from ls-files for unmerged " ++ u
gitAdd path = do
- traceM $ "adding " ++ path
- st <- rawSystem "git" ["add", "--", path]
+ git <- gitProg
+ st <- rawSystem git ["add", "--", path]
unless (st == ExitSuccess) $ error "git-add failed"
+{-
+ - configurable splitting
+ -}
+hSplitToFile cfg h path =
+ case cfgTokenizer cfg of
+ TokenizeCharCategory -> internal Toks.splitCategory
+ TokenizeCharCategorySimple -> internal Toks.splitSimple
+ TokenizeFilter cmd -> do
+ st <-
+ bracketFile path WriteMode $ \ho ->
+ withCreateProcess
+ (shell cmd) {std_in = UseHandle h, std_out = UseHandle ho} $ \_ _ _ ->
+ waitForProcess
+ unless (st == ExitSuccess) $ error "tokenize filter failed"
+ where
+ internal s = hGetContents h >>= writeFile path . Toks.toFile . s
+
+{-
+ - merge algorithms
+ -}
data Op
= Del
| Keep
@@ -100,10 +131,10 @@ data Op
pdiff path = map go . lines <$> readFile path
where
- go [] = error "empty line from diff"
go ('-':s) = (Del, s)
go (' ':s) = (Keep, s)
go ('+':s) = (Add, s)
+ go [] = error "unexpected output from diff"
data Merged
= Ok [String]
@@ -204,29 +235,31 @@ resolveSpaces _ x = x
merge cfg@Config {..} ms ys =
regroup
. map (resolve cfg)
- . traceShowId
. regroup
. bool id (concatMap zeal) cfgZealous
. expand cfgContext
. regroup
$ align ms ys
+{-
+ - front-end
+ -}
format :: Config -> Handle -> [Merged] -> IO Bool
format Config {..} h = go False
where
go c [] = pure c
go c (Ok x:xs) = do
- hPutStr h (Toks.glueToks x)
+ hPutStr h (Toks.glue x)
go c xs
go c (Conflict m o y:xs) = do
hPutStr h
$ mconcat
[ cfgLabelStart
- , Toks.glueToks m
+ , Toks.glue m
, cfgLabelMyOld
- , Toks.glueToks o
+ , Toks.glue o
, cfgLabelOldYour
- , Toks.glueToks y
+ , Toks.glue y
, cfgLabelEnd
]
go True xs
@@ -236,7 +269,7 @@ runCmd CmdDiff3 {..} cfg =
let [fMy, fOld, fYour, fdMy, fdYour] =
map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"]
for_ [(d3my, fMy), (d3old, fOld), (d3your, fYour)] $ \(path, tmp) ->
- readFile path >>= writeFile tmp . Toks.split -- TODO cfg
+ bracketFile path ReadMode $ \h -> hSplitToFile cfg h tmp
rundiff fOld fMy fdMy
rundiff fOld fYour fdYour
conflicted <-
@@ -255,15 +288,13 @@ runCmd CmdGitMerge {..} cfg = do
withSystemTempDirectory "werge-git" $ \workdir -> do
let [fMy, fOld, fYour, fdMy, fdYour] =
map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"]
- gitCheckoutMOY u fMy fOld fYour
+ gitCheckoutMOY cfg u fMy fOld fYour
rundiff fOld fMy fdMy
rundiff fOld fYour fdYour
readFile u >>= writeFile (u ++ ".werge-backup")
conflict <-
- withFile u WriteMode $ \h ->
+ bracketFile u WriteMode $ \h ->
merge cfg <$> pdiff fdMy <*> pdiff fdYour >>= format cfg h
- traceShowM conflict
- traceShowM gmDoAdd
unless conflict $ when gmDoAdd $ gitAdd u
pure conflict
if or conflicts
diff --git a/Opts.hs b/Opts.hs
index 761bda0..bc81aad 100644
--- a/Opts.hs
+++ b/Opts.hs
@@ -9,32 +9,32 @@ import Options.Applicative
import Paths_werge (version)
data Tokenizer
- = TokenizerFilter String
- | TokenizeCharClass
- | TokenizeCharClassSimple
+ = TokenizeFilter String
+ | TokenizeCharCategory
+ | TokenizeCharCategorySimple
deriving (Show)
tokenizer =
asum
- [ TokenizerFilter
+ [ TokenizeFilter
<$> strOption
(long "tok-filter"
<> short 'F'
<> metavar "FILTER"
<> help "external program to separate the text to tokens")
, flag'
- TokenizeCharClassSimple
+ TokenizeCharCategorySimple
(long "simple-tokens"
<> short 'i'
<> help
"use wider character class to separate the tokens (results in larger tokens and ignores case)")
, flag'
- TokenizeCharClass
+ TokenizeCharCategory
(long "full-tokens"
<> short 'I'
<> help
"separate characters by all known character classes (default)")
- , pure TokenizeCharClass
+ , pure TokenizeCharCategory
]
data Spaces
diff --git a/README.md b/README.md
index caec0e4..9de6a10 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,11 @@ Better docs is WIP
cabal install
```
+Running of `werge` requires a working installation of `diff` compatible
+with the one from [GNU diffutils](https://www.gnu.org/software/diffutils/). You
+may set up a path to such `diff` (or a wrapper script) via environment variable
+`WERGE_DIFF`.
+
## Help & features
```
diff --git a/Toks.hs b/Toks.hs
index 4b110c2..29fa381 100644
--- a/Toks.hs
+++ b/Toks.hs
@@ -26,11 +26,28 @@ unmarkSpace x = error "unwat"
space ('.':_) = True
space _ = False
-split =
- unlines
- . map (concatMap escape . markSpace)
- . groupBy ((==) `on` generalCategory)
+joinSpaces [] = []
+joinSpaces (a@('.':as):xs) =
+ case joinSpaces xs of
+ (('.':bs):xs') -> ('.' : (as ++ bs)) : xs'
+ xs' -> a : xs'
+joinSpaces (x:xs) = x : joinSpaces xs
-glueToks = concatMap (unmarkSpace . unescape)
+splitCategory = make . groupBy ((==) `on` generalCategory)
-glue = glueToks . lines
+simpleCategory c
+ | isSpace c = 0
+ | isAlpha c = 1
+ | isNumber c = 2
+ | otherwise = 3
+
+splitSimple = make . groupBy ((==) `on` simpleCategory)
+
+make = map (concatMap escape . markSpace)
+
+glue :: [String] -> String
+glue = concatMap (unmarkSpace . unescape)
+
+fromFile = lines
+
+toFile = unlines