Compare commits

...

10 commits

Author SHA1 Message Date
Mirek Kratochvil 259ad6101b typo 2025-07-18 20:26:53 +02:00
Mirek Kratochvil f5f206765c add a note about history 2025-07-18 15:58:24 +02:00
Mirek Kratochvil 5a88a00a0d autobuild on github 2025-07-18 15:38:15 +02:00
Mirek Kratochvil 44518ce946 document, change non-space token mark 2025-07-18 15:31:55 +02:00
Mirek Kratochvil 6a2b2e3148 explain 2025-07-18 15:22:35 +02:00
Mirek Kratochvil cb5257b285 make diff+patch work together, document 2025-07-18 15:21:08 +02:00
Mirek Kratochvil 56cf7c69a9 have diffs (for good manners) 2025-07-17 22:21:23 +02:00
Mirek Kratochvil 49fcd0ca44 clean up 2025-07-17 20:44:40 +02:00
Mirek Kratochvil ecdaa9511d python 2025-07-17 16:10:41 +02:00
Mirek Kratochvil 69ad61ab22 document difference 2025-07-17 16:06:08 +02:00
7 changed files with 472 additions and 179 deletions

24
.github/workflows/build.yml vendored Normal file
View file

@ -0,0 +1,24 @@
name: build
on:
push:
tags:
- 'v*'
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: haskell-actions/setup@v2
with:
ghc-version: '9.4'
- run: |
cabal build
xz -9 < `cabal exec which werge` > werge-${{ github.ref_name }}-`uname -m`.xz
- uses: softprops/action-gh-release@v2
with:
files: werge-*.xz

235
Main.hs
View file

@ -8,125 +8,19 @@ import Data.Bool
import Data.Foldable
import Data.Function
import Data.List
import Data.Maybe
import Data.Traversable
import Opts
import System.Environment
import System.Exit
import System.FilePath
import System.IO
import System.IO.Temp
import System.Process
import Opts
import Progs
import qualified Toks
import Toks (Tok)
import Debug.Trace
{-
- interface to other programs
-}
diffProg = fromMaybe "diff" <$> lookupEnv "WERGE_DIFF"
gitProg = fromMaybe "git" <$> lookupEnv "WERGE_GIT"
bracketFile path mode = bracket (openFile path mode) hClose
rundiff f1 f2 out = do
diff <- diffProg
st <-
bracketFile out WriteMode $ \oh ->
withCreateProcess
(proc
diff
[ "--text"
, "--new-line-format=+%L"
, "--old-line-format=-%L"
, "--unchanged-line-format= %L"
, f1
, f2
])
{std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess
when (st == ExitFailure 2) $ error "diff failed"
unless (st `elem` [ExitSuccess, ExitFailure 1])
$ error "diff failed for unknown reason (is GNU diffutils installed?)"
gitRepoRelRoot = do
git <- gitProg
(path, st) <-
withCreateProcess
(proc git ["rev-parse", "--show-cdup"])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,) <$> hGetContents' oh <*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
let [p] = lines path
pure p
gitUnmerged = do
git <- gitProg
(paths, st) <-
withCreateProcess
(proc git ["status", "--porcelain=v1"])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (map (drop 3) . filter ("UU " `isPrefixOf`) . lines
<$> hGetContents' oh)
<*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
pure paths
gitCheckoutMOY cfg u my old your = do
git <- gitProg
(paths, st) <-
withCreateProcess
(proc git ["ls-files", "--unmerged", "--", u])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (sortOn snd
. map ((\[a, b] -> (a, b)) . take 2 . drop 1 . words)
. lines
<$> hGetContents' oh)
<*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
let co (hash, _) path = do
st <-
withCreateProcess
(proc "git" ["cat-file", "blob", hash])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just ho) _ p -> do
hSplitToFile cfg ho path
waitForProcess p
unless (st == ExitSuccess) . error
$ "failed checking out " ++ u ++ " from blob " ++ hash
case paths of
[(_, "1"), (_, "2"), (_, "3")] ->
zipWithM co paths [old, my, your] >> pure ()
_ -> error $ "bad data from ls-files for unmerged " ++ u
gitAdd path = do
git <- gitProg
st <- rawSystem git ["add", "--", path]
unless (st == ExitSuccess) $ error "git-add failed"
{-
- configurable splitting
-
- TODO this should probably enforce joinSpaces?
- or have joinSpaces as configurable? (probably best, default true)
-}
hSplitToFile cfg h path =
case cfgTokenizer cfg of
TokenizeCharCategory -> internal Toks.splitCategory
TokenizeCharCategorySimple -> internal Toks.splitSimple
TokenizeFilter cmd -> do
st <-
bracketFile path WriteMode $ \ho ->
withCreateProcess
(shell cmd) {std_in = UseHandle h, std_out = UseHandle ho} $ \_ _ _ ->
waitForProcess
unless (st == ExitSuccess) $ error "tokenize filter failed"
where
internal s = hGetContents h >>= writeFile path . Toks.toFile . s
{-
- merge algorithms
-}
@ -142,7 +36,7 @@ pdiff path = map go . lines <$> readFile path
go ('-':s) = (Del, s)
go (' ':s) = (Keep, s)
go ('+':s) = (Add, s)
go [] = error "unexpected output from diff"
go _ = error "unexpected output from diff"
data Merged
= Ok [String]
@ -150,9 +44,41 @@ data Merged
| Conflict [String] [String] [String]
deriving (Show)
pmerge :: FilePath -> IO [Merged]
pmerge path = go . lines <$> readFile path
where
go [] = []
go xs@(x:_)
| Toks.tok x = goOk xs
| otherwise = goC0 xs
eat = span Toks.tok
goOk xs =
let (a, xs') = eat xs
in Ok a : go xs'
goC0 ("<<<<<<<":xs) =
let (m, xs') = eat xs
in goC1 m xs'
goC0 (x:_) = error $ "unexpected token: " ++ x
goC0 [] = error "unexpected end"
goC1 m ("|||||||":xs) =
let (o, xs') = eat xs
in goC2 m o xs'
goC1 _ (x:_) = error $ "unexpected token: " ++ x
goC1 _ [] = error "unexpected end"
goC2 m o ("=======":xs) =
let (y, xs') = eat xs
in goC3 m o y xs'
goC2 _ _ (x:_) = error $ "unexpected token: " ++ x
goC2 _ _ [] = error "unexpected end"
goC3 m o y (">>>>>>>":xs) = Conflict m o y : go xs
goC3 _ _ _ (x:_) = error $ "unexpected token: " ++ x
goC3 _ _ _ [] = error "unexpected end"
isKeepTok :: (Op, String) -> Bool
isKeepTok (Keep, _) = True
isKeepTok _ = False
isDelTok :: (Op, String) -> Bool
isDelTok (Del, _) = True
isDelTok _ = False
@ -170,6 +96,7 @@ chunks xs =
let (reps, ys) = break isKeepTok xs
in uncurry (Replace `on` map snd) (partition isDelTok reps) : chunks ys
align1 :: Eq a => [a] -> [a] -> ([a], [a], [a])
align1 as [] = ([], as, [])
align1 [] bs = ([], [], bs)
align1 (a:as) (b:bs)
@ -178,7 +105,7 @@ align1 (a:as) (b:bs)
align1 _ _ = error "chunks do not align"
align :: [Merged] -> [Merged] -> [Merged]
align m y = connect $ slice m y
align m0 y0 = connect $ slice m0 y0
where
erase x = Replace x []
nemap _ [] = []
@ -204,13 +131,14 @@ align m y = connect $ slice m y
slice _ _ = error "unacceptable chunks"
coFlag (Ok _) = False
coFlag (Replace _ _) = True
coFlag _ = error "flagging unacceptable chunks"
coSig (a, b) = (coFlag a, coFlag b)
coConn' (a, b) (a', b') = (a && a') || (b && b')
coConn = coConn' `on` coSig
coGroup [] = []
coGroup (x:xs) =
case coGroup xs of
xs'@(ys@(y:_):yss)
(ys@(y:_):yss)
| coConn x y -> (x : ys) : yss
xs' -> [x] : xs'
connect = map confl . coGroup
@ -218,12 +146,14 @@ align m y = connect $ slice m y
toCon (Ok o, Replace _ y) = Conflict o o y
toCon (Replace o m, Ok _) = Conflict m o o
toCon (Replace o m, Replace _ y) = Conflict m o y
confl = foldr cappend (Ok []) . map toCon
cappend (Ok x) (Ok o) = Ok (x ++ o)
cappend (Ok x) (Conflict m o y) = error "align consistency check fail" -- Conflict (x++m) (x++o) (x++y)
cappend (Conflict m o y) (Ok x) = Conflict (m ++ x) (o ++ x) (y ++ x)
cappend (Conflict m o y) (Conflict m' o' y') =
toCon _ = error "converting unacceptable chunks"
confl = foldr coAppend (Ok []) . map toCon
coAppend (Ok x) (Ok o) = Ok (x ++ o)
coAppend (Ok _) (Conflict _ _ _) = error "align consistency check fail" -- Conflict (x++m) (x++o) (x++y)
coAppend (Conflict m o y) (Ok x) = Conflict (m ++ x) (o ++ x) (y ++ x)
coAppend (Conflict m o y) (Conflict m' o' y') =
Conflict (m ++ m') (o ++ o') (y ++ y')
coAppend _ _ = error "appending unacceptable chunks"
regroup :: [Merged] -> [Merged]
regroup [] = []
@ -234,10 +164,11 @@ regroup (x@(Ok a):xs) =
xs' -> x : xs'
regroup (x:xs) = x : regroup xs
zeal Config {..} (Conflict m o y) =
before' ++ (Conflict (reverse m'') o (reverse y'') : after')
zeal :: Config -> Merged -> [Merged]
zeal Config {..} (Conflict m0 o0 y0) =
before' ++ (Conflict (reverse m'') o0 (reverse y'') : after')
where
((m', y'), before) = pops m y
((m', y'), before) = pops m0 y0
((m'', y''), rafter) = pops (reverse m') (reverse y')
before' =
case before of
@ -258,6 +189,7 @@ zeal Config {..} (Conflict m o y) =
pops ms ys = ((ms, ys), [])
zeal _ x = [x]
resolveSpace :: Config -> Merged -> Merged
resolveSpace Config {..} c@(Conflict m o y)
| not (all Toks.space $ concat [m, o, y]) = c
| m == o && o == y = Ok o
@ -282,14 +214,22 @@ expand n = go
go [] = []
go (x@(Conflict m1 o1 y1):xs) =
case go xs of
(Conflict m2 o2 y2:xs') ->
Conflict (m1 ++ m2) (o1 ++ o2) (y1 ++ y2) : xs'
(Conflict m2 o2 y2:xs')
| n > 0 -> Conflict (m1 ++ m2) (o1 ++ o2) (y1 ++ y2) : xs'
(Ok a:Conflict m2 o2 y2:xs')
| length a <= n ->
| length a < n ->
Conflict (m1 ++ a ++ m2) (o1 ++ a ++ o2) (y1 ++ a ++ y2) : xs'
xs' -> x : xs'
go (x@(Replace o1 n1):xs) =
case go xs of
(Replace o2 n2:xs')
| n > 0 -> Replace (o1 ++ o2) (n1 ++ n2) : xs'
(Ok a:Replace o2 n2:xs')
| length a < n -> Replace (o1 ++ a ++ o2) (n1 ++ a ++ n2) : xs'
xs' -> x : xs'
go (x:xs) = x : go xs
resolve :: Config -> Merged -> Merged
resolve cfg@Config {..} c@(Conflict m o y)
| cfgSpaceResolution /= SpaceNormal
, all Toks.space (concat [m, o, y]) = resolveSpace cfg c
@ -305,6 +245,7 @@ resolve cfg@Config {..} c@(Conflict m o y)
ResolveKeep -> c
resolve _ x = x
merge :: Config -> [(Op, String)] -> [(Op, String)] -> [Merged]
merge cfg@Config {..} ms ys =
regroup
. map (resolve cfg)
@ -315,6 +256,8 @@ merge cfg@Config {..} ms ys =
. regroup
$ align (chunks ms) (chunks ys)
diff Config {..} = expand cfgContext . chunks
{-
- front-end
-}
@ -325,7 +268,7 @@ format Config {..} h = go False
go c (Ok x:xs) = do
hPutStr h (Toks.glue x)
go c xs
go c (Conflict m o y:xs) = do
go _ (Conflict m o y:xs) = do
hPutStr h
$ mconcat
[ cfgLabelStart
@ -337,15 +280,21 @@ format Config {..} h = go False
, cfgLabelEnd
]
go True xs
go _ (Replace o n:xs) = do
hPutStr h
$ mconcat
[cfgLabelStart, Toks.glue o, cfgLabelDiff, Toks.glue n, cfgLabelEnd]
go True xs
runCmd :: Command -> Config -> IO ()
runCmd CmdDiff3 {..} cfg =
withSystemTempDirectory "werge-diff3" $ \workdir -> do
let [fMy, fOld, fYour, fdMy, fdYour] =
map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"]
for_ [(d3my, fMy), (d3old, fOld), (d3your, fYour)] $ \(path, tmp) ->
bracketFile path ReadMode $ \h -> hSplitToFile cfg h tmp
rundiff fOld fMy fdMy
rundiff fOld fYour fdYour
runDiff fOld fMy fdMy
runDiff fOld fYour fdYour
conflicted <-
merge cfg <$> pdiff fdMy <*> pdiff fdYour >>= format cfg stdout
if conflicted
@ -363,8 +312,8 @@ runCmd CmdGitMerge {..} cfg = do
let [fMy, fOld, fYour, fdMy, fdYour] =
map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"]
gitCheckoutMOY cfg u fMy fOld fYour
rundiff fOld fMy fdMy
rundiff fOld fYour fdYour
runDiff fOld fMy fdMy
runDiff fOld fYour fdYour
readFile u >>= writeFile (u ++ ".werge-backup")
conflict <-
bracketFile u WriteMode $ \h ->
@ -374,13 +323,39 @@ runCmd CmdGitMerge {..} cfg = do
if or conflicts
then exitWith (ExitFailure 1)
else exitSuccess
runCmd CmdDiff {..} cfg = do
withSystemTempDirectory "werge-diff" $ \workdir -> do
let [fOld, fYour, fDiff] = map (workdir </>) ["old", "your", "diff"]
for_ [(diffOld, fOld), (diffYour, fYour)] $ \(path, tmp) ->
bracketFile path ReadMode $ \h -> hSplitToFile cfg h tmp
conflicted <-
case diffUnified of
Just u -> do
c <- runDiffRaw u fOld fYour fDiff
readFile fDiff >>= putStr . unlines . drop 2 . lines
pure c
Nothing -> do
runDiff fOld fYour fDiff
pdiff fDiff >>= format cfg stdout . diff cfg
if conflicted
then exitWith (ExitFailure 1)
else exitSuccess
runCmd CmdPatch {..} cfg = do
withSystemTempDirectory "werge-patch" $ \workdir -> do
let f = workdir </> "file"
bracketFile patchMy ReadMode $ \h -> hSplitToFile cfg h f
_ <- runPatch f stdin
conflicted <- pmerge f >>= format cfg stdout -- TODO try to resolve more?
if conflicted
then exitWith (ExitFailure 1)
else exitSuccess
runCmd CmdBreak cfg = hSplit cfg stdin stdout
runCmd CmdGlue _ = getContents >>= putStr . Toks.glue . Toks.fromFile
main :: IO ()
main = catch go bad
where
go = do
(cfg, cmd) <- parseOpts
runCmd cmd cfg
go = parseOpts >>= uncurry (flip runCmd)
bad e = do
hPutStrLn stderr $ "fatal: " ++ displayException (e :: IOException)
exitWith (ExitFailure 2)

88
Opts.hs
View file

@ -16,6 +16,7 @@ data Tokenizer
| TokenizeCharCategorySimple
deriving (Show)
tokenizer :: Parser Tokenizer
tokenizer =
asum
[ TokenizeFilter
@ -44,6 +45,7 @@ data ConflictMask = ConflictMask
, cmResolveSeparate :: Bool
} deriving (Show)
conflictMask :: String -> String -> Parser ConflictMask
conflictMask label objs = do
cmResolveOverlaps' <-
fmap not . switch
@ -70,6 +72,7 @@ data Resolution
| ResolveYour
deriving (Show, Eq)
resolutionMode :: String -> Either String Resolution
resolutionMode x
| x `isPrefixOf` "keep" = Right ResolveKeep
| x `isPrefixOf` "my" = Right ResolveMy
@ -86,6 +89,7 @@ data SpaceResolution
| SpaceSpecial Resolution
deriving (Show, Eq)
spaceMode :: String -> Either String SpaceResolution
spaceMode x
| x `isPrefixOf` "normal" = Right SpaceNormal
| Right y <- resolutionMode x = Right (SpaceSpecial y)
@ -106,10 +110,12 @@ data Config = Config
, cfgConflicts :: ConflictMask
, cfgLabelStart :: String
, cfgLabelMyOld :: String
, cfgLabelDiff :: String
, cfgLabelOldYour :: String
, cfgLabelEnd :: String
} deriving (Show)
config :: Parser Config
config = do
cfgTokenizer <- tokenizer
cfgZealous <-
@ -141,7 +147,7 @@ config = do
<> metavar ("(normal|keep|my|old|your)")
<> value SpaceNormal
<> help
"Resolve conflicts in space-only tokens separately, and either keep unresolved conflicts, or resolve in favor of a given version; `normal' resolves the spaces together with other tokens, ignoring choices in --resolve-space-* (default: normal)"
"Resolve conflicts in space-only tokens separately, and either keep unresolved conflicts, or resolve in favor of a given version; `normal' resolves the spaces together with other tokens, ignoring choices in --conflict-space-* (default: normal)"
]
cfgSpaceConflicts <- conflictMask "conflict-space" "space-only tokens"
cfgContext <-
@ -149,10 +155,10 @@ config = do
$ long "expand-context"
<> short 'C'
<> metavar "N"
<> value 1
<> value 2
<> showDefault
<> help
"Consider changes that are at most N tokens apart to be a single change. Zero may cause bad resolutions of near conflicting edits"
"Consider changes that are at less than N tokens apart to be a single change; 0 turns off conflict expansion, 1 may cause bad resolutions of near conflicting edits"
cfgResolution <-
option (eitherReader resolutionMode)
$ long "resolve"
@ -177,6 +183,11 @@ config = do
$ long "label-mo"
<> metavar "\"|||||\""
<> help "Separator of local edits and original"
labelDiff <-
optional . strOption
$ long "label-diff"
<> metavar "\"|||||\""
<> help "Separator for old and new version"
labelOldYour <-
optional . strOption
$ long "label-oy"
@ -193,6 +204,8 @@ config = do
bool "<<<<<" "\ESC[1;37m<\ESC[0;31m" color `fromMaybe` labelStart
, cfgLabelMyOld =
bool "|||||" "\ESC[1;37m|\ESC[1;30m" color `fromMaybe` labelMyOld
, cfgLabelDiff =
bool "|||||" "\ESC[1;37m|\ESC[0;32m" color `fromMaybe` labelDiff
, cfgLabelOldYour =
bool "=====" "\ESC[1;37m=\ESC[0;32m" color `fromMaybe` labelOldYour
, cfgLabelEnd =
@ -210,8 +223,19 @@ data Command
{ gmFiles :: Maybe [FilePath]
, gmDoAdd :: Bool
}
| CmdDiff
{ diffOld :: FilePath
, diffYour :: FilePath
, diffUnified :: Maybe Int
}
| CmdPatch
{ patchMy :: FilePath
}
| CmdBreak
| CmdGlue
deriving (Show)
cmdDiff3 :: Parser Command
cmdDiff3 = do
d3my <- strArgument $ metavar "MYFILE" <> help "Version with local edits"
d3old <- strArgument $ metavar "OLDFILE" <> help "Original file version"
@ -219,13 +243,15 @@ cmdDiff3 = do
strArgument $ metavar "YOURFILE" <> help "Version with other people's edits"
pure CmdDiff3 {..}
cmdGitMerge :: Parser Command
cmdGitMerge = do
gmFiles <-
asum
[ fmap Just . some
$ strArgument
$ metavar "UNMERGED"
<> help "Unmerged file tracked by git (can be specified repeatedly)"
<> help
"Unmerged file tracked by git (can be specified repeatedly)"
, flag'
Nothing
(long "unmerged"
@ -234,18 +260,46 @@ cmdGitMerge = do
]
gmDoAdd <-
asum
[ flag'
True
(long "add"
<> short 'a'
<> help "Run `git add' for fully merged files")
, flag' False (long "no-add" <> help "Prevent running `git add'")
[ flag' True
$ long "add"
<> short 'a'
<> help "Run `git add' for fully merged files"
, flag' False $ long "no-add" <> help "Prevent running `git add'"
, pure False
]
pure CmdGitMerge {..}
cmdDiff :: Parser Command
cmdDiff = do
diffOld <- strArgument $ metavar "OLDFILE" <> help "Original file version"
diffYour <-
strArgument $ metavar "YOURFILE" <> help "File version with changes"
diffUnified <-
asum
[ flag' (Just 20)
$ long "unified"
<> short 'u'
<> help
"Produce unified-diff-like output for `patch' with default context size (20)"
, fmap Just . option auto
$ long "unified-size"
<> short 'U'
<> help "Produce unified diff with this context size"
, flag Nothing Nothing
$ long "merge"
<> short 'm'
<> help "Highlight the differences as with `merge' (default)"
]
pure CmdDiff {..}
cmdPatch :: Parser Command
cmdPatch = do
patchMy <- strArgument $ metavar "MYFILE" <> help "File to be modified"
pure CmdPatch {..}
-- TODO have some option to output the (partially merged) my/old/your files so
-- that folks can continue with external program or so (such as meld)
cmd :: Parser Command
cmd =
hsubparser
$ mconcat
@ -255,11 +309,23 @@ cmd =
, command "git"
$ info cmdGitMerge
$ progDesc "Automerge unmerged files in git conflict"
, command "diff"
$ info cmdDiff
$ progDesc "Find differences between two files"
, command "patch"
$ info cmdPatch
$ progDesc "Apply a patch from `diff' to file"
, command "break"
$ info (pure CmdBreak)
$ progDesc "Break text to tokens"
, command "glue"
$ info (pure CmdGlue)
$ progDesc "Glue tokens back to text"
]
parseOpts :: IO (Config, Command)
parseOpts =
customExecParser (prefs helpShowGlobals)
customExecParser (prefs $ helpShowGlobals <> subparserInline)
$ info
(liftA2 (,) config cmd
<**> helper

157
Progs.hs Normal file
View file

@ -0,0 +1,157 @@
module Progs where
import Control.Exception
import Control.Monad
import Data.List
import Data.Maybe
import System.Environment
import System.Exit
import System.IO
import System.Process
import Opts
import qualified Toks
bracketFile :: FilePath -> IOMode -> (Handle -> IO c) -> IO c
bracketFile path mode = bracket (openFile path mode) hClose
{-
- interface to gnu diff
-}
diffProg :: IO String
diffProg = fromMaybe "diff" <$> lookupEnv "WERGE_DIFF"
patchProg :: IO String
patchProg = fromMaybe "patch" <$> lookupEnv "WERGE_PATCH"
runDiff :: FilePath -> FilePath -> FilePath -> IO ()
runDiff f1 f2 out = do
diff <- diffProg
st <-
bracketFile out WriteMode $ \oh ->
withCreateProcess
(proc
diff
[ "--text"
, "--new-line-format=+%L"
, "--old-line-format=-%L"
, "--unchanged-line-format= %L"
, f1
, f2
])
{std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess
when (st == ExitFailure 2) $ error "diff failed"
unless (st `elem` [ExitSuccess, ExitFailure 1])
$ error "diff failed for unknown reason (is GNU diffutils installed?)"
runDiffRaw :: Int -> FilePath -> FilePath -> FilePath -> IO Bool
runDiffRaw u f1 f2 out = do
diff <- diffProg
st <-
bracketFile out WriteMode $ \oh ->
withCreateProcess
(proc diff ["--text", "--unified=" ++ show u, f1, f2])
{std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess
unless (st `elem` [ExitSuccess, ExitFailure 1]) $ error "diff failed"
pure (st /= ExitSuccess) -- report if diff thinks that the files differed
runPatch :: FilePath -> Handle -> IO Bool
runPatch f hi = do
patch <- patchProg
st <-
withCreateProcess
(proc patch ["--silent", "--batch", "--merge=diff3", f])
{std_in = UseHandle hi} $ \_ _ _ -> waitForProcess
unless (st `elem` [ExitSuccess, ExitFailure 1]) $ error "patch failed"
pure (st /= ExitSuccess) -- report if patch thinks that stuff has failed
{-
- interface to git
-}
gitProg :: IO String
gitProg = fromMaybe "git" <$> lookupEnv "WERGE_GIT"
gitRepoRelRoot :: IO FilePath
gitRepoRelRoot = do
git <- gitProg
(path, st) <-
withCreateProcess
(proc git ["rev-parse", "--show-cdup"])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,) <$> hGetContents' oh <*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
case lines path of
[p] -> pure p
_ -> fail "bad git-rev-parse output"
gitUnmerged :: IO [FilePath]
gitUnmerged = do
git <- gitProg
(paths, st) <-
withCreateProcess
(proc git ["status", "--porcelain=v1"])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (map (drop 3) . filter ("UU " `isPrefixOf`) . lines
<$> hGetContents' oh)
<*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
pure paths
gitCheckoutMOY ::
Config -> FilePath -> FilePath -> FilePath -> FilePath -> IO ()
gitCheckoutMOY cfg u my old your = do
git <- gitProg
(paths, st) <-
withCreateProcess
(proc git ["ls-files", "--unmerged", "--", u])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (sortOn snd
. map ((\[a, b] -> (a, b)) . take 2 . drop 1 . words)
. lines
<$> hGetContents' oh)
<*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
let co (hash, _) path = do
st' <-
withCreateProcess
(proc "git" ["cat-file", "blob", hash])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just ho) _ p -> do
hSplitToFile cfg ho path
waitForProcess p
unless (st' == ExitSuccess) . error
$ "failed checking out " ++ u ++ " from blob " ++ hash
case paths of
[(_, "1"), (_, "2"), (_, "3")] ->
zipWithM co paths [old, my, your] >> pure ()
_ -> error $ "bad data from ls-files for unmerged " ++ u
gitAdd :: FilePath -> IO ()
gitAdd path = do
git <- gitProg
st <- rawSystem git ["add", "--", path]
unless (st == ExitSuccess) $ error "git-add failed"
{-
- interface to external tokenizers
-
- TODO this might probably enforce joinSpaces?
- or have joinSpaces as configurable? (probably best, default true)
-}
hSplit :: Config -> Handle -> Handle -> IO ()
hSplit cfg hi ho =
case cfgTokenizer cfg of
TokenizeCharCategory -> internal Toks.splitCategory
TokenizeCharCategorySimple -> internal Toks.splitSimple
TokenizeFilter fltr -> do
st <-
withCreateProcess
(shell fltr) {std_in = UseHandle ho, std_out = UseHandle ho} $ \_ _ _ ->
waitForProcess
unless (st == ExitSuccess) $ error "tokenize filter failed"
where
internal s = hGetContents hi >>= hPutStr ho . Toks.toFile . s
hSplitToFile :: Config -> Handle -> FilePath -> IO ()
hSplitToFile cfg hi path = bracketFile path WriteMode $ hSplit cfg hi

130
README.md
View file

@ -1,20 +1,26 @@
# werge (merge weird stuff)
This is a partial work-alike of `diff3` and `git merge` and other merge-y tools
that is capable of
This is a partial work-alike of `diff3`, `patch`, `git merge` and other merge-y
tools that is capable of:
- merging token-size changes instead of line-size ones
- largely ignoring changes in blank characters
- merging token-size changes (words, identifiers, sentences) instead of
line-size ones
- merging changes in blank characters separately or ignoring them altogether
These properties are great for several use-cases:
- merging free-flowing text changes (such as in TeX) irrespective of line breaks
etc,
- merging of change sets that use different code formatters
- combining changes in free-flowing text (such as in TeX or Markdown),
irrespectively of changed line breaks, paragraph breaking and justification,
etc.
- merging of code formatted with different code formatters
- minimizing the conflict size of tiny changes to a few characters, making them
easier to resolve
Separate `diff`&`patch` functionality is provided too for sending
token-granularity patches. (The patches are similar to what `git diff
--word-diff` produces, but can be applied to files.)
## Demo
Original (`old` file):
@ -79,39 +85,49 @@ I still cannot do verses.
- Some tokens are marked as spaces by the tokenizer, which allows the merge
algorithm to be (selectively) more zealous when resolving conflicts on these.
This approach differs from various other structured-merge tools by being
completely oblivious about the file structure. Werge trades off some merge
quality for (a lot of) complexity.
Compared to e.g. `difftastic`, `mergiraf` and similar tools, **`werge` is
completely oblivious about the actual file structure** and works on any file
type. This choice trades off some merge quality for (a lot of) complexity.
Tokenizers are simple, implementable as linear scanners that print separate
tokens on individual lines that are prefixed with a space mark (`.` for space
and `|` for non-space), and also escape newlines and backslashes. A default
and `/` for non-space), and also escape newlines and backslashes. A default
tokenization of string "hello \ world" with a new line at the end is listed
below (note the invisible space on the lines with dots):
```
|hello
/hello
.
|\\
/\\
.
|world
/world
.\n
```
Users may supply any tokenizer via option `-F`, e.g. this script makes
line-size tokens (reproducing the usual line merges):
### Custom tokenizers
```
Users may supply any tokenizer via option `-F`. The script below produces
line-size tokens for demonstration (in turn, `werge` will do the usual line
merges), and can be used e.g. via `-F ./tokenize.py`:
```py
#!/usr/bin/env python3
import sys
for l in sys.stdin.readlines():
if len(l)==0: continue
if l[-1]=='\n':
print('|'+l[:-1].replace('\\','\\\\')+'\\n')
print('/'+l[:-1].replace('\\','\\\\')+'\\n')
else:
print('|'+l.replace('\\','\\\\'))
print('/'+l.replace('\\','\\\\'))
```
### History
I previously made an attempt to solve this in `adiff` software, which failed
because the approach was too complex. Before that, the issue was tackled by
Arek Antoniewicz on MFF CUNI, who used regex-edged DFAs (REDFAs) to construct
user-specifiable tokenizers in a pretty cool way.
## Installation
```sh
@ -144,16 +160,16 @@ automatically to `filename.werge-backup`.
```
werge -- blanks-friendly mergetool for tiny interdwindled changes
Usage: werge [(-F|--tok-filter FILTER) | (-i|--simple-tokens) |
(-I|--full-tokens)] [--no-zeal | (-z|--zeal)]
[-S|--space (keep|my|old|your)]
[-s | --resolve-space (normal|keep|my|old|your)]
[--conflict-space-overlaps] [--conflict-space-separate]
[--conflict-space-all] [-C|--expand-context N]
[--resolve (keep|my|old|your)] [--conflict-overlaps]
[--conflict-separate] [--conflict-all] [-G|--color]
[--label-start "<<<<<"] [--label-mo "|||||"] [--label-oy "====="]
[--label-end ">>>>>"] COMMAND
Usage: werge [(-F|--tok-filter FILTER) | (-i|--simple-tokens) |
(-I|--full-tokens)] [--no-zeal | (-z|--zeal)]
[-S|--space (keep|my|old|your)]
[-s | --resolve-space (normal|keep|my|old|your)]
[--conflict-space-overlaps] [--conflict-space-separate]
[--conflict-space-all] [-C|--expand-context N]
[--resolve (keep|my|old|your)] [--conflict-overlaps]
[--conflict-separate] [--conflict-all] [-G|--color]
[--label-start "<<<<<"] [--label-mo "|||||"] [--label-diff "|||||"]
[--label-oy "====="] [--label-end ">>>>>"] COMMAND
Available options:
-F,--tok-filter FILTER External program to separate the text to tokens
@ -183,9 +199,10 @@ Available options:
Never resolve separate (non-overlapping) changes in
space-only tokens
--conflict-space-all Never resolve any changes in space-only tokens
-C,--expand-context N Consider changes that are at most N tokens apart to
be a single change. Zero may cause bad resolutions of
near conflicting edits (default: 1)
-C,--expand-context N Consider changes that are at less than N tokens apart
to be a single change; 0 turns off conflict
expansion, 1 may cause bad resolutions of near
conflicting edits (default: 2)
--resolve (keep|my|old|your)
Resolve general conflicts in favor of a given
version, or keep the conflicts (default: keep)
@ -198,6 +215,7 @@ Available options:
`less -R')
--label-start "<<<<<" Label for beginning of the conflict
--label-mo "|||||" Separator of local edits and original
--label-diff "|||||" Separator for old and new version
--label-oy "=====" Separator of original and other people's edits
--label-end ">>>>>" Label for end of the conflict
-h,--help Show this help text
@ -206,6 +224,10 @@ Available options:
Available commands:
merge diff3-style merge of two changesets
git Automerge unmerged files in git conflict
diff Find differences between two files
patch Apply a patch from `diff' to file
break Break text to tokens
glue Glue tokens back to text
werge is a free software, use it accordingly.
```
@ -237,3 +259,47 @@ Available options:
--no-add Prevent running `git add'
-h,--help Show this help text
```
#### Finding differences
```
Usage: werge diff OLDFILE YOURFILE
[(-u|--unified) | (-U|--unified-size ARG) | (-m|--merge)]
Find differences between two files
Available options:
OLDFILE Original file version
YOURFILE File version with changes
-u,--unified Produce unified-diff-like output for `patch' with
default context size (20)
-U,--unified-size ARG Produce unified diff with this context size
-m,--merge Highlight the differences as with `merge' (default)
-h,--help Show this help text
```
#### Patching files in place
```
Usage: werge patch MYFILE
Apply a patch from `diff' to file
Available options:
MYFILE File to be modified
-h,--help Show this help text
```
#### Converting between files and tokens
Both commands work as plain stdin-to-stdout filters:
```
Usage: werge break
Break text to tokens
```
```
Usage: werge glue
Glue tokens back to text
```

14
Toks.hs
View file

@ -15,19 +15,23 @@ unescape :: String -> String
unescape [] = []
unescape ('\\':'\\':xs) = '\\' : unescape xs
unescape ('\\':'n':xs) = '\n' : unescape xs
unescape ('\\':_) = error "bad escape?"
unescape ('\\':_) = error "bad escape on input"
unescape (x:xs) = x : unescape xs
tok ('.':_) = True
tok ('/':_) = True
tok _ = False
markSpace :: String -> Tok
markSpace [] = error "wat"
markSpace [] = error "empty token"
markSpace s@(c:_)
| isSpace c = '.' : s
| otherwise = '|' : s
| otherwise = '/' : s
unmarkSpace :: Tok -> String
unmarkSpace ('.':s) = s
unmarkSpace ('|':s) = s
unmarkSpace x = error "unwat"
unmarkSpace ('/':s) = s
unmarkSpace _ = error "bad space marking on input"
space :: Tok -> Bool
space ('.':_) = True

View file

@ -22,8 +22,9 @@ executable werge
main-is: Main.hs
other-modules:
Opts
Toks
Paths_werge
Progs
Toks
autogen-modules: Paths_werge
build-depends: