Compare commits

...

10 commits

Author SHA1 Message Date
Mirek Kratochvil 259ad6101b typo 2025-07-18 20:26:53 +02:00
Mirek Kratochvil f5f206765c add a note about history 2025-07-18 15:58:24 +02:00
Mirek Kratochvil 5a88a00a0d autobuild on github 2025-07-18 15:38:15 +02:00
Mirek Kratochvil 44518ce946 document, change non-space token mark 2025-07-18 15:31:55 +02:00
Mirek Kratochvil 6a2b2e3148 explain 2025-07-18 15:22:35 +02:00
Mirek Kratochvil cb5257b285 make diff+patch work together, document 2025-07-18 15:21:08 +02:00
Mirek Kratochvil 56cf7c69a9 have diffs (for good manners) 2025-07-17 22:21:23 +02:00
Mirek Kratochvil 49fcd0ca44 clean up 2025-07-17 20:44:40 +02:00
Mirek Kratochvil ecdaa9511d python 2025-07-17 16:10:41 +02:00
Mirek Kratochvil 69ad61ab22 document difference 2025-07-17 16:06:08 +02:00
7 changed files with 472 additions and 179 deletions

24
.github/workflows/build.yml vendored Normal file
View file

@ -0,0 +1,24 @@
name: build
on:
push:
tags:
- 'v*'
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: haskell-actions/setup@v2
with:
ghc-version: '9.4'
- run: |
cabal build
xz -9 < `cabal exec which werge` > werge-${{ github.ref_name }}-`uname -m`.xz
- uses: softprops/action-gh-release@v2
with:
files: werge-*.xz

235
Main.hs
View file

@ -8,125 +8,19 @@ import Data.Bool
import Data.Foldable import Data.Foldable
import Data.Function import Data.Function
import Data.List import Data.List
import Data.Maybe
import Data.Traversable import Data.Traversable
import Opts
import System.Environment
import System.Exit import System.Exit
import System.FilePath import System.FilePath
import System.IO import System.IO
import System.IO.Temp import System.IO.Temp
import System.Process
import Opts
import Progs
import qualified Toks import qualified Toks
import Toks (Tok) import Toks (Tok)
import Debug.Trace import Debug.Trace
{-
- interface to other programs
-}
diffProg = fromMaybe "diff" <$> lookupEnv "WERGE_DIFF"
gitProg = fromMaybe "git" <$> lookupEnv "WERGE_GIT"
bracketFile path mode = bracket (openFile path mode) hClose
rundiff f1 f2 out = do
diff <- diffProg
st <-
bracketFile out WriteMode $ \oh ->
withCreateProcess
(proc
diff
[ "--text"
, "--new-line-format=+%L"
, "--old-line-format=-%L"
, "--unchanged-line-format= %L"
, f1
, f2
])
{std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess
when (st == ExitFailure 2) $ error "diff failed"
unless (st `elem` [ExitSuccess, ExitFailure 1])
$ error "diff failed for unknown reason (is GNU diffutils installed?)"
gitRepoRelRoot = do
git <- gitProg
(path, st) <-
withCreateProcess
(proc git ["rev-parse", "--show-cdup"])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,) <$> hGetContents' oh <*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
let [p] = lines path
pure p
gitUnmerged = do
git <- gitProg
(paths, st) <-
withCreateProcess
(proc git ["status", "--porcelain=v1"])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (map (drop 3) . filter ("UU " `isPrefixOf`) . lines
<$> hGetContents' oh)
<*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
pure paths
gitCheckoutMOY cfg u my old your = do
git <- gitProg
(paths, st) <-
withCreateProcess
(proc git ["ls-files", "--unmerged", "--", u])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (sortOn snd
. map ((\[a, b] -> (a, b)) . take 2 . drop 1 . words)
. lines
<$> hGetContents' oh)
<*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
let co (hash, _) path = do
st <-
withCreateProcess
(proc "git" ["cat-file", "blob", hash])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just ho) _ p -> do
hSplitToFile cfg ho path
waitForProcess p
unless (st == ExitSuccess) . error
$ "failed checking out " ++ u ++ " from blob " ++ hash
case paths of
[(_, "1"), (_, "2"), (_, "3")] ->
zipWithM co paths [old, my, your] >> pure ()
_ -> error $ "bad data from ls-files for unmerged " ++ u
gitAdd path = do
git <- gitProg
st <- rawSystem git ["add", "--", path]
unless (st == ExitSuccess) $ error "git-add failed"
{-
- configurable splitting
-
- TODO this should probably enforce joinSpaces?
- or have joinSpaces as configurable? (probably best, default true)
-}
hSplitToFile cfg h path =
case cfgTokenizer cfg of
TokenizeCharCategory -> internal Toks.splitCategory
TokenizeCharCategorySimple -> internal Toks.splitSimple
TokenizeFilter cmd -> do
st <-
bracketFile path WriteMode $ \ho ->
withCreateProcess
(shell cmd) {std_in = UseHandle h, std_out = UseHandle ho} $ \_ _ _ ->
waitForProcess
unless (st == ExitSuccess) $ error "tokenize filter failed"
where
internal s = hGetContents h >>= writeFile path . Toks.toFile . s
{- {-
- merge algorithms - merge algorithms
-} -}
@ -142,7 +36,7 @@ pdiff path = map go . lines <$> readFile path
go ('-':s) = (Del, s) go ('-':s) = (Del, s)
go (' ':s) = (Keep, s) go (' ':s) = (Keep, s)
go ('+':s) = (Add, s) go ('+':s) = (Add, s)
go [] = error "unexpected output from diff" go _ = error "unexpected output from diff"
data Merged data Merged
= Ok [String] = Ok [String]
@ -150,9 +44,41 @@ data Merged
| Conflict [String] [String] [String] | Conflict [String] [String] [String]
deriving (Show) deriving (Show)
pmerge :: FilePath -> IO [Merged]
pmerge path = go . lines <$> readFile path
where
go [] = []
go xs@(x:_)
| Toks.tok x = goOk xs
| otherwise = goC0 xs
eat = span Toks.tok
goOk xs =
let (a, xs') = eat xs
in Ok a : go xs'
goC0 ("<<<<<<<":xs) =
let (m, xs') = eat xs
in goC1 m xs'
goC0 (x:_) = error $ "unexpected token: " ++ x
goC0 [] = error "unexpected end"
goC1 m ("|||||||":xs) =
let (o, xs') = eat xs
in goC2 m o xs'
goC1 _ (x:_) = error $ "unexpected token: " ++ x
goC1 _ [] = error "unexpected end"
goC2 m o ("=======":xs) =
let (y, xs') = eat xs
in goC3 m o y xs'
goC2 _ _ (x:_) = error $ "unexpected token: " ++ x
goC2 _ _ [] = error "unexpected end"
goC3 m o y (">>>>>>>":xs) = Conflict m o y : go xs
goC3 _ _ _ (x:_) = error $ "unexpected token: " ++ x
goC3 _ _ _ [] = error "unexpected end"
isKeepTok :: (Op, String) -> Bool
isKeepTok (Keep, _) = True isKeepTok (Keep, _) = True
isKeepTok _ = False isKeepTok _ = False
isDelTok :: (Op, String) -> Bool
isDelTok (Del, _) = True isDelTok (Del, _) = True
isDelTok _ = False isDelTok _ = False
@ -170,6 +96,7 @@ chunks xs =
let (reps, ys) = break isKeepTok xs let (reps, ys) = break isKeepTok xs
in uncurry (Replace `on` map snd) (partition isDelTok reps) : chunks ys in uncurry (Replace `on` map snd) (partition isDelTok reps) : chunks ys
align1 :: Eq a => [a] -> [a] -> ([a], [a], [a])
align1 as [] = ([], as, []) align1 as [] = ([], as, [])
align1 [] bs = ([], [], bs) align1 [] bs = ([], [], bs)
align1 (a:as) (b:bs) align1 (a:as) (b:bs)
@ -178,7 +105,7 @@ align1 (a:as) (b:bs)
align1 _ _ = error "chunks do not align" align1 _ _ = error "chunks do not align"
align :: [Merged] -> [Merged] -> [Merged] align :: [Merged] -> [Merged] -> [Merged]
align m y = connect $ slice m y align m0 y0 = connect $ slice m0 y0
where where
erase x = Replace x [] erase x = Replace x []
nemap _ [] = [] nemap _ [] = []
@ -204,13 +131,14 @@ align m y = connect $ slice m y
slice _ _ = error "unacceptable chunks" slice _ _ = error "unacceptable chunks"
coFlag (Ok _) = False coFlag (Ok _) = False
coFlag (Replace _ _) = True coFlag (Replace _ _) = True
coFlag _ = error "flagging unacceptable chunks"
coSig (a, b) = (coFlag a, coFlag b) coSig (a, b) = (coFlag a, coFlag b)
coConn' (a, b) (a', b') = (a && a') || (b && b') coConn' (a, b) (a', b') = (a && a') || (b && b')
coConn = coConn' `on` coSig coConn = coConn' `on` coSig
coGroup [] = [] coGroup [] = []
coGroup (x:xs) = coGroup (x:xs) =
case coGroup xs of case coGroup xs of
xs'@(ys@(y:_):yss) (ys@(y:_):yss)
| coConn x y -> (x : ys) : yss | coConn x y -> (x : ys) : yss
xs' -> [x] : xs' xs' -> [x] : xs'
connect = map confl . coGroup connect = map confl . coGroup
@ -218,12 +146,14 @@ align m y = connect $ slice m y
toCon (Ok o, Replace _ y) = Conflict o o y toCon (Ok o, Replace _ y) = Conflict o o y
toCon (Replace o m, Ok _) = Conflict m o o toCon (Replace o m, Ok _) = Conflict m o o
toCon (Replace o m, Replace _ y) = Conflict m o y toCon (Replace o m, Replace _ y) = Conflict m o y
confl = foldr cappend (Ok []) . map toCon toCon _ = error "converting unacceptable chunks"
cappend (Ok x) (Ok o) = Ok (x ++ o) confl = foldr coAppend (Ok []) . map toCon
cappend (Ok x) (Conflict m o y) = error "align consistency check fail" -- Conflict (x++m) (x++o) (x++y) coAppend (Ok x) (Ok o) = Ok (x ++ o)
cappend (Conflict m o y) (Ok x) = Conflict (m ++ x) (o ++ x) (y ++ x) coAppend (Ok _) (Conflict _ _ _) = error "align consistency check fail" -- Conflict (x++m) (x++o) (x++y)
cappend (Conflict m o y) (Conflict m' o' y') = coAppend (Conflict m o y) (Ok x) = Conflict (m ++ x) (o ++ x) (y ++ x)
coAppend (Conflict m o y) (Conflict m' o' y') =
Conflict (m ++ m') (o ++ o') (y ++ y') Conflict (m ++ m') (o ++ o') (y ++ y')
coAppend _ _ = error "appending unacceptable chunks"
regroup :: [Merged] -> [Merged] regroup :: [Merged] -> [Merged]
regroup [] = [] regroup [] = []
@ -234,10 +164,11 @@ regroup (x@(Ok a):xs) =
xs' -> x : xs' xs' -> x : xs'
regroup (x:xs) = x : regroup xs regroup (x:xs) = x : regroup xs
zeal Config {..} (Conflict m o y) = zeal :: Config -> Merged -> [Merged]
before' ++ (Conflict (reverse m'') o (reverse y'') : after') zeal Config {..} (Conflict m0 o0 y0) =
before' ++ (Conflict (reverse m'') o0 (reverse y'') : after')
where where
((m', y'), before) = pops m y ((m', y'), before) = pops m0 y0
((m'', y''), rafter) = pops (reverse m') (reverse y') ((m'', y''), rafter) = pops (reverse m') (reverse y')
before' = before' =
case before of case before of
@ -258,6 +189,7 @@ zeal Config {..} (Conflict m o y) =
pops ms ys = ((ms, ys), []) pops ms ys = ((ms, ys), [])
zeal _ x = [x] zeal _ x = [x]
resolveSpace :: Config -> Merged -> Merged
resolveSpace Config {..} c@(Conflict m o y) resolveSpace Config {..} c@(Conflict m o y)
| not (all Toks.space $ concat [m, o, y]) = c | not (all Toks.space $ concat [m, o, y]) = c
| m == o && o == y = Ok o | m == o && o == y = Ok o
@ -282,14 +214,22 @@ expand n = go
go [] = [] go [] = []
go (x@(Conflict m1 o1 y1):xs) = go (x@(Conflict m1 o1 y1):xs) =
case go xs of case go xs of
(Conflict m2 o2 y2:xs') -> (Conflict m2 o2 y2:xs')
Conflict (m1 ++ m2) (o1 ++ o2) (y1 ++ y2) : xs' | n > 0 -> Conflict (m1 ++ m2) (o1 ++ o2) (y1 ++ y2) : xs'
(Ok a:Conflict m2 o2 y2:xs') (Ok a:Conflict m2 o2 y2:xs')
| length a <= n -> | length a < n ->
Conflict (m1 ++ a ++ m2) (o1 ++ a ++ o2) (y1 ++ a ++ y2) : xs' Conflict (m1 ++ a ++ m2) (o1 ++ a ++ o2) (y1 ++ a ++ y2) : xs'
xs' -> x : xs' xs' -> x : xs'
go (x@(Replace o1 n1):xs) =
case go xs of
(Replace o2 n2:xs')
| n > 0 -> Replace (o1 ++ o2) (n1 ++ n2) : xs'
(Ok a:Replace o2 n2:xs')
| length a < n -> Replace (o1 ++ a ++ o2) (n1 ++ a ++ n2) : xs'
xs' -> x : xs'
go (x:xs) = x : go xs go (x:xs) = x : go xs
resolve :: Config -> Merged -> Merged
resolve cfg@Config {..} c@(Conflict m o y) resolve cfg@Config {..} c@(Conflict m o y)
| cfgSpaceResolution /= SpaceNormal | cfgSpaceResolution /= SpaceNormal
, all Toks.space (concat [m, o, y]) = resolveSpace cfg c , all Toks.space (concat [m, o, y]) = resolveSpace cfg c
@ -305,6 +245,7 @@ resolve cfg@Config {..} c@(Conflict m o y)
ResolveKeep -> c ResolveKeep -> c
resolve _ x = x resolve _ x = x
merge :: Config -> [(Op, String)] -> [(Op, String)] -> [Merged]
merge cfg@Config {..} ms ys = merge cfg@Config {..} ms ys =
regroup regroup
. map (resolve cfg) . map (resolve cfg)
@ -315,6 +256,8 @@ merge cfg@Config {..} ms ys =
. regroup . regroup
$ align (chunks ms) (chunks ys) $ align (chunks ms) (chunks ys)
diff Config {..} = expand cfgContext . chunks
{- {-
- front-end - front-end
-} -}
@ -325,7 +268,7 @@ format Config {..} h = go False
go c (Ok x:xs) = do go c (Ok x:xs) = do
hPutStr h (Toks.glue x) hPutStr h (Toks.glue x)
go c xs go c xs
go c (Conflict m o y:xs) = do go _ (Conflict m o y:xs) = do
hPutStr h hPutStr h
$ mconcat $ mconcat
[ cfgLabelStart [ cfgLabelStart
@ -337,15 +280,21 @@ format Config {..} h = go False
, cfgLabelEnd , cfgLabelEnd
] ]
go True xs go True xs
go _ (Replace o n:xs) = do
hPutStr h
$ mconcat
[cfgLabelStart, Toks.glue o, cfgLabelDiff, Toks.glue n, cfgLabelEnd]
go True xs
runCmd :: Command -> Config -> IO ()
runCmd CmdDiff3 {..} cfg = runCmd CmdDiff3 {..} cfg =
withSystemTempDirectory "werge-diff3" $ \workdir -> do withSystemTempDirectory "werge-diff3" $ \workdir -> do
let [fMy, fOld, fYour, fdMy, fdYour] = let [fMy, fOld, fYour, fdMy, fdYour] =
map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"] map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"]
for_ [(d3my, fMy), (d3old, fOld), (d3your, fYour)] $ \(path, tmp) -> for_ [(d3my, fMy), (d3old, fOld), (d3your, fYour)] $ \(path, tmp) ->
bracketFile path ReadMode $ \h -> hSplitToFile cfg h tmp bracketFile path ReadMode $ \h -> hSplitToFile cfg h tmp
rundiff fOld fMy fdMy runDiff fOld fMy fdMy
rundiff fOld fYour fdYour runDiff fOld fYour fdYour
conflicted <- conflicted <-
merge cfg <$> pdiff fdMy <*> pdiff fdYour >>= format cfg stdout merge cfg <$> pdiff fdMy <*> pdiff fdYour >>= format cfg stdout
if conflicted if conflicted
@ -363,8 +312,8 @@ runCmd CmdGitMerge {..} cfg = do
let [fMy, fOld, fYour, fdMy, fdYour] = let [fMy, fOld, fYour, fdMy, fdYour] =
map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"] map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"]
gitCheckoutMOY cfg u fMy fOld fYour gitCheckoutMOY cfg u fMy fOld fYour
rundiff fOld fMy fdMy runDiff fOld fMy fdMy
rundiff fOld fYour fdYour runDiff fOld fYour fdYour
readFile u >>= writeFile (u ++ ".werge-backup") readFile u >>= writeFile (u ++ ".werge-backup")
conflict <- conflict <-
bracketFile u WriteMode $ \h -> bracketFile u WriteMode $ \h ->
@ -374,13 +323,39 @@ runCmd CmdGitMerge {..} cfg = do
if or conflicts if or conflicts
then exitWith (ExitFailure 1) then exitWith (ExitFailure 1)
else exitSuccess else exitSuccess
runCmd CmdDiff {..} cfg = do
withSystemTempDirectory "werge-diff" $ \workdir -> do
let [fOld, fYour, fDiff] = map (workdir </>) ["old", "your", "diff"]
for_ [(diffOld, fOld), (diffYour, fYour)] $ \(path, tmp) ->
bracketFile path ReadMode $ \h -> hSplitToFile cfg h tmp
conflicted <-
case diffUnified of
Just u -> do
c <- runDiffRaw u fOld fYour fDiff
readFile fDiff >>= putStr . unlines . drop 2 . lines
pure c
Nothing -> do
runDiff fOld fYour fDiff
pdiff fDiff >>= format cfg stdout . diff cfg
if conflicted
then exitWith (ExitFailure 1)
else exitSuccess
runCmd CmdPatch {..} cfg = do
withSystemTempDirectory "werge-patch" $ \workdir -> do
let f = workdir </> "file"
bracketFile patchMy ReadMode $ \h -> hSplitToFile cfg h f
_ <- runPatch f stdin
conflicted <- pmerge f >>= format cfg stdout -- TODO try to resolve more?
if conflicted
then exitWith (ExitFailure 1)
else exitSuccess
runCmd CmdBreak cfg = hSplit cfg stdin stdout
runCmd CmdGlue _ = getContents >>= putStr . Toks.glue . Toks.fromFile
main :: IO () main :: IO ()
main = catch go bad main = catch go bad
where where
go = do go = parseOpts >>= uncurry (flip runCmd)
(cfg, cmd) <- parseOpts
runCmd cmd cfg
bad e = do bad e = do
hPutStrLn stderr $ "fatal: " ++ displayException (e :: IOException) hPutStrLn stderr $ "fatal: " ++ displayException (e :: IOException)
exitWith (ExitFailure 2) exitWith (ExitFailure 2)

88
Opts.hs
View file

@ -16,6 +16,7 @@ data Tokenizer
| TokenizeCharCategorySimple | TokenizeCharCategorySimple
deriving (Show) deriving (Show)
tokenizer :: Parser Tokenizer
tokenizer = tokenizer =
asum asum
[ TokenizeFilter [ TokenizeFilter
@ -44,6 +45,7 @@ data ConflictMask = ConflictMask
, cmResolveSeparate :: Bool , cmResolveSeparate :: Bool
} deriving (Show) } deriving (Show)
conflictMask :: String -> String -> Parser ConflictMask
conflictMask label objs = do conflictMask label objs = do
cmResolveOverlaps' <- cmResolveOverlaps' <-
fmap not . switch fmap not . switch
@ -70,6 +72,7 @@ data Resolution
| ResolveYour | ResolveYour
deriving (Show, Eq) deriving (Show, Eq)
resolutionMode :: String -> Either String Resolution
resolutionMode x resolutionMode x
| x `isPrefixOf` "keep" = Right ResolveKeep | x `isPrefixOf` "keep" = Right ResolveKeep
| x `isPrefixOf` "my" = Right ResolveMy | x `isPrefixOf` "my" = Right ResolveMy
@ -86,6 +89,7 @@ data SpaceResolution
| SpaceSpecial Resolution | SpaceSpecial Resolution
deriving (Show, Eq) deriving (Show, Eq)
spaceMode :: String -> Either String SpaceResolution
spaceMode x spaceMode x
| x `isPrefixOf` "normal" = Right SpaceNormal | x `isPrefixOf` "normal" = Right SpaceNormal
| Right y <- resolutionMode x = Right (SpaceSpecial y) | Right y <- resolutionMode x = Right (SpaceSpecial y)
@ -106,10 +110,12 @@ data Config = Config
, cfgConflicts :: ConflictMask , cfgConflicts :: ConflictMask
, cfgLabelStart :: String , cfgLabelStart :: String
, cfgLabelMyOld :: String , cfgLabelMyOld :: String
, cfgLabelDiff :: String
, cfgLabelOldYour :: String , cfgLabelOldYour :: String
, cfgLabelEnd :: String , cfgLabelEnd :: String
} deriving (Show) } deriving (Show)
config :: Parser Config
config = do config = do
cfgTokenizer <- tokenizer cfgTokenizer <- tokenizer
cfgZealous <- cfgZealous <-
@ -141,7 +147,7 @@ config = do
<> metavar ("(normal|keep|my|old|your)") <> metavar ("(normal|keep|my|old|your)")
<> value SpaceNormal <> value SpaceNormal
<> help <> help
"Resolve conflicts in space-only tokens separately, and either keep unresolved conflicts, or resolve in favor of a given version; `normal' resolves the spaces together with other tokens, ignoring choices in --resolve-space-* (default: normal)" "Resolve conflicts in space-only tokens separately, and either keep unresolved conflicts, or resolve in favor of a given version; `normal' resolves the spaces together with other tokens, ignoring choices in --conflict-space-* (default: normal)"
] ]
cfgSpaceConflicts <- conflictMask "conflict-space" "space-only tokens" cfgSpaceConflicts <- conflictMask "conflict-space" "space-only tokens"
cfgContext <- cfgContext <-
@ -149,10 +155,10 @@ config = do
$ long "expand-context" $ long "expand-context"
<> short 'C' <> short 'C'
<> metavar "N" <> metavar "N"
<> value 1 <> value 2
<> showDefault <> showDefault
<> help <> help
"Consider changes that are at most N tokens apart to be a single change. Zero may cause bad resolutions of near conflicting edits" "Consider changes that are at less than N tokens apart to be a single change; 0 turns off conflict expansion, 1 may cause bad resolutions of near conflicting edits"
cfgResolution <- cfgResolution <-
option (eitherReader resolutionMode) option (eitherReader resolutionMode)
$ long "resolve" $ long "resolve"
@ -177,6 +183,11 @@ config = do
$ long "label-mo" $ long "label-mo"
<> metavar "\"|||||\"" <> metavar "\"|||||\""
<> help "Separator of local edits and original" <> help "Separator of local edits and original"
labelDiff <-
optional . strOption
$ long "label-diff"
<> metavar "\"|||||\""
<> help "Separator for old and new version"
labelOldYour <- labelOldYour <-
optional . strOption optional . strOption
$ long "label-oy" $ long "label-oy"
@ -193,6 +204,8 @@ config = do
bool "<<<<<" "\ESC[1;37m<\ESC[0;31m" color `fromMaybe` labelStart bool "<<<<<" "\ESC[1;37m<\ESC[0;31m" color `fromMaybe` labelStart
, cfgLabelMyOld = , cfgLabelMyOld =
bool "|||||" "\ESC[1;37m|\ESC[1;30m" color `fromMaybe` labelMyOld bool "|||||" "\ESC[1;37m|\ESC[1;30m" color `fromMaybe` labelMyOld
, cfgLabelDiff =
bool "|||||" "\ESC[1;37m|\ESC[0;32m" color `fromMaybe` labelDiff
, cfgLabelOldYour = , cfgLabelOldYour =
bool "=====" "\ESC[1;37m=\ESC[0;32m" color `fromMaybe` labelOldYour bool "=====" "\ESC[1;37m=\ESC[0;32m" color `fromMaybe` labelOldYour
, cfgLabelEnd = , cfgLabelEnd =
@ -210,8 +223,19 @@ data Command
{ gmFiles :: Maybe [FilePath] { gmFiles :: Maybe [FilePath]
, gmDoAdd :: Bool , gmDoAdd :: Bool
} }
| CmdDiff
{ diffOld :: FilePath
, diffYour :: FilePath
, diffUnified :: Maybe Int
}
| CmdPatch
{ patchMy :: FilePath
}
| CmdBreak
| CmdGlue
deriving (Show) deriving (Show)
cmdDiff3 :: Parser Command
cmdDiff3 = do cmdDiff3 = do
d3my <- strArgument $ metavar "MYFILE" <> help "Version with local edits" d3my <- strArgument $ metavar "MYFILE" <> help "Version with local edits"
d3old <- strArgument $ metavar "OLDFILE" <> help "Original file version" d3old <- strArgument $ metavar "OLDFILE" <> help "Original file version"
@ -219,13 +243,15 @@ cmdDiff3 = do
strArgument $ metavar "YOURFILE" <> help "Version with other people's edits" strArgument $ metavar "YOURFILE" <> help "Version with other people's edits"
pure CmdDiff3 {..} pure CmdDiff3 {..}
cmdGitMerge :: Parser Command
cmdGitMerge = do cmdGitMerge = do
gmFiles <- gmFiles <-
asum asum
[ fmap Just . some [ fmap Just . some
$ strArgument $ strArgument
$ metavar "UNMERGED" $ metavar "UNMERGED"
<> help "Unmerged file tracked by git (can be specified repeatedly)" <> help
"Unmerged file tracked by git (can be specified repeatedly)"
, flag' , flag'
Nothing Nothing
(long "unmerged" (long "unmerged"
@ -234,18 +260,46 @@ cmdGitMerge = do
] ]
gmDoAdd <- gmDoAdd <-
asum asum
[ flag' [ flag' True
True $ long "add"
(long "add" <> short 'a'
<> short 'a' <> help "Run `git add' for fully merged files"
<> help "Run `git add' for fully merged files") , flag' False $ long "no-add" <> help "Prevent running `git add'"
, flag' False (long "no-add" <> help "Prevent running `git add'")
, pure False , pure False
] ]
pure CmdGitMerge {..} pure CmdGitMerge {..}
cmdDiff :: Parser Command
cmdDiff = do
diffOld <- strArgument $ metavar "OLDFILE" <> help "Original file version"
diffYour <-
strArgument $ metavar "YOURFILE" <> help "File version with changes"
diffUnified <-
asum
[ flag' (Just 20)
$ long "unified"
<> short 'u'
<> help
"Produce unified-diff-like output for `patch' with default context size (20)"
, fmap Just . option auto
$ long "unified-size"
<> short 'U'
<> help "Produce unified diff with this context size"
, flag Nothing Nothing
$ long "merge"
<> short 'm'
<> help "Highlight the differences as with `merge' (default)"
]
pure CmdDiff {..}
cmdPatch :: Parser Command
cmdPatch = do
patchMy <- strArgument $ metavar "MYFILE" <> help "File to be modified"
pure CmdPatch {..}
-- TODO have some option to output the (partially merged) my/old/your files so -- TODO have some option to output the (partially merged) my/old/your files so
-- that folks can continue with external program or so (such as meld) -- that folks can continue with external program or so (such as meld)
cmd :: Parser Command
cmd = cmd =
hsubparser hsubparser
$ mconcat $ mconcat
@ -255,11 +309,23 @@ cmd =
, command "git" , command "git"
$ info cmdGitMerge $ info cmdGitMerge
$ progDesc "Automerge unmerged files in git conflict" $ progDesc "Automerge unmerged files in git conflict"
, command "diff"
$ info cmdDiff
$ progDesc "Find differences between two files"
, command "patch"
$ info cmdPatch
$ progDesc "Apply a patch from `diff' to file"
, command "break"
$ info (pure CmdBreak)
$ progDesc "Break text to tokens"
, command "glue"
$ info (pure CmdGlue)
$ progDesc "Glue tokens back to text"
] ]
parseOpts :: IO (Config, Command) parseOpts :: IO (Config, Command)
parseOpts = parseOpts =
customExecParser (prefs helpShowGlobals) customExecParser (prefs $ helpShowGlobals <> subparserInline)
$ info $ info
(liftA2 (,) config cmd (liftA2 (,) config cmd
<**> helper <**> helper

157
Progs.hs Normal file
View file

@ -0,0 +1,157 @@
module Progs where
import Control.Exception
import Control.Monad
import Data.List
import Data.Maybe
import System.Environment
import System.Exit
import System.IO
import System.Process
import Opts
import qualified Toks
bracketFile :: FilePath -> IOMode -> (Handle -> IO c) -> IO c
bracketFile path mode = bracket (openFile path mode) hClose
{-
- interface to gnu diff
-}
diffProg :: IO String
diffProg = fromMaybe "diff" <$> lookupEnv "WERGE_DIFF"
patchProg :: IO String
patchProg = fromMaybe "patch" <$> lookupEnv "WERGE_PATCH"
runDiff :: FilePath -> FilePath -> FilePath -> IO ()
runDiff f1 f2 out = do
diff <- diffProg
st <-
bracketFile out WriteMode $ \oh ->
withCreateProcess
(proc
diff
[ "--text"
, "--new-line-format=+%L"
, "--old-line-format=-%L"
, "--unchanged-line-format= %L"
, f1
, f2
])
{std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess
when (st == ExitFailure 2) $ error "diff failed"
unless (st `elem` [ExitSuccess, ExitFailure 1])
$ error "diff failed for unknown reason (is GNU diffutils installed?)"
runDiffRaw :: Int -> FilePath -> FilePath -> FilePath -> IO Bool
runDiffRaw u f1 f2 out = do
diff <- diffProg
st <-
bracketFile out WriteMode $ \oh ->
withCreateProcess
(proc diff ["--text", "--unified=" ++ show u, f1, f2])
{std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess
unless (st `elem` [ExitSuccess, ExitFailure 1]) $ error "diff failed"
pure (st /= ExitSuccess) -- report if diff thinks that the files differed
runPatch :: FilePath -> Handle -> IO Bool
runPatch f hi = do
patch <- patchProg
st <-
withCreateProcess
(proc patch ["--silent", "--batch", "--merge=diff3", f])
{std_in = UseHandle hi} $ \_ _ _ -> waitForProcess
unless (st `elem` [ExitSuccess, ExitFailure 1]) $ error "patch failed"
pure (st /= ExitSuccess) -- report if patch thinks that stuff has failed
{-
- interface to git
-}
gitProg :: IO String
gitProg = fromMaybe "git" <$> lookupEnv "WERGE_GIT"
gitRepoRelRoot :: IO FilePath
gitRepoRelRoot = do
git <- gitProg
(path, st) <-
withCreateProcess
(proc git ["rev-parse", "--show-cdup"])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,) <$> hGetContents' oh <*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
case lines path of
[p] -> pure p
_ -> fail "bad git-rev-parse output"
gitUnmerged :: IO [FilePath]
gitUnmerged = do
git <- gitProg
(paths, st) <-
withCreateProcess
(proc git ["status", "--porcelain=v1"])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (map (drop 3) . filter ("UU " `isPrefixOf`) . lines
<$> hGetContents' oh)
<*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
pure paths
gitCheckoutMOY ::
Config -> FilePath -> FilePath -> FilePath -> FilePath -> IO ()
gitCheckoutMOY cfg u my old your = do
git <- gitProg
(paths, st) <-
withCreateProcess
(proc git ["ls-files", "--unmerged", "--", u])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
(,)
<$> (sortOn snd
. map ((\[a, b] -> (a, b)) . take 2 . drop 1 . words)
. lines
<$> hGetContents' oh)
<*> waitForProcess p
unless (st == ExitSuccess) $ error "git failed"
let co (hash, _) path = do
st' <-
withCreateProcess
(proc "git" ["cat-file", "blob", hash])
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just ho) _ p -> do
hSplitToFile cfg ho path
waitForProcess p
unless (st' == ExitSuccess) . error
$ "failed checking out " ++ u ++ " from blob " ++ hash
case paths of
[(_, "1"), (_, "2"), (_, "3")] ->
zipWithM co paths [old, my, your] >> pure ()
_ -> error $ "bad data from ls-files for unmerged " ++ u
gitAdd :: FilePath -> IO ()
gitAdd path = do
git <- gitProg
st <- rawSystem git ["add", "--", path]
unless (st == ExitSuccess) $ error "git-add failed"
{-
- interface to external tokenizers
-
- TODO this might probably enforce joinSpaces?
- or have joinSpaces as configurable? (probably best, default true)
-}
hSplit :: Config -> Handle -> Handle -> IO ()
hSplit cfg hi ho =
case cfgTokenizer cfg of
TokenizeCharCategory -> internal Toks.splitCategory
TokenizeCharCategorySimple -> internal Toks.splitSimple
TokenizeFilter fltr -> do
st <-
withCreateProcess
(shell fltr) {std_in = UseHandle ho, std_out = UseHandle ho} $ \_ _ _ ->
waitForProcess
unless (st == ExitSuccess) $ error "tokenize filter failed"
where
internal s = hGetContents hi >>= hPutStr ho . Toks.toFile . s
hSplitToFile :: Config -> Handle -> FilePath -> IO ()
hSplitToFile cfg hi path = bracketFile path WriteMode $ hSplit cfg hi

130
README.md
View file

@ -1,20 +1,26 @@
# werge (merge weird stuff) # werge (merge weird stuff)
This is a partial work-alike of `diff3` and `git merge` and other merge-y tools This is a partial work-alike of `diff3`, `patch`, `git merge` and other merge-y
that is capable of tools that is capable of:
- merging token-size changes instead of line-size ones - merging token-size changes (words, identifiers, sentences) instead of
- largely ignoring changes in blank characters line-size ones
- merging changes in blank characters separately or ignoring them altogether
These properties are great for several use-cases: These properties are great for several use-cases:
- merging free-flowing text changes (such as in TeX) irrespective of line breaks - combining changes in free-flowing text (such as in TeX or Markdown),
etc, irrespectively of changed line breaks, paragraph breaking and justification,
- merging of change sets that use different code formatters etc.
- merging of code formatted with different code formatters
- minimizing the conflict size of tiny changes to a few characters, making them - minimizing the conflict size of tiny changes to a few characters, making them
easier to resolve easier to resolve
Separate `diff`&`patch` functionality is provided too for sending
token-granularity patches. (The patches are similar to what `git diff
--word-diff` produces, but can be applied to files.)
## Demo ## Demo
Original (`old` file): Original (`old` file):
@ -79,39 +85,49 @@ I still cannot do verses.
- Some tokens are marked as spaces by the tokenizer, which allows the merge - Some tokens are marked as spaces by the tokenizer, which allows the merge
algorithm to be (selectively) more zealous when resolving conflicts on these. algorithm to be (selectively) more zealous when resolving conflicts on these.
This approach differs from various other structured-merge tools by being Compared to e.g. `difftastic`, `mergiraf` and similar tools, **`werge` is
completely oblivious about the file structure. Werge trades off some merge completely oblivious about the actual file structure** and works on any file
quality for (a lot of) complexity. type. This choice trades off some merge quality for (a lot of) complexity.
Tokenizers are simple, implementable as linear scanners that print separate Tokenizers are simple, implementable as linear scanners that print separate
tokens on individual lines that are prefixed with a space mark (`.` for space tokens on individual lines that are prefixed with a space mark (`.` for space
and `|` for non-space), and also escape newlines and backslashes. A default and `/` for non-space), and also escape newlines and backslashes. A default
tokenization of string "hello \ world" with a new line at the end is listed tokenization of string "hello \ world" with a new line at the end is listed
below (note the invisible space on the lines with dots): below (note the invisible space on the lines with dots):
``` ```
|hello /hello
. .
|\\ /\\
. .
|world /world
.\n .\n
``` ```
Users may supply any tokenizer via option `-F`, e.g. this script makes ### Custom tokenizers
line-size tokens (reproducing the usual line merges):
``` Users may supply any tokenizer via option `-F`. The script below produces
line-size tokens for demonstration (in turn, `werge` will do the usual line
merges), and can be used e.g. via `-F ./tokenize.py`:
```py
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys import sys
for l in sys.stdin.readlines(): for l in sys.stdin.readlines():
if len(l)==0: continue if len(l)==0: continue
if l[-1]=='\n': if l[-1]=='\n':
print('|'+l[:-1].replace('\\','\\\\')+'\\n') print('/'+l[:-1].replace('\\','\\\\')+'\\n')
else: else:
print('|'+l.replace('\\','\\\\')) print('/'+l.replace('\\','\\\\'))
``` ```
### History
I previously made an attempt to solve this in `adiff` software, which failed
because the approach was too complex. Before that, the issue was tackled by
Arek Antoniewicz on MFF CUNI, who used regex-edged DFAs (REDFAs) to construct
user-specifiable tokenizers in a pretty cool way.
## Installation ## Installation
```sh ```sh
@ -144,16 +160,16 @@ automatically to `filename.werge-backup`.
``` ```
werge -- blanks-friendly mergetool for tiny interdwindled changes werge -- blanks-friendly mergetool for tiny interdwindled changes
Usage: werge [(-F|--tok-filter FILTER) | (-i|--simple-tokens) | Usage: werge [(-F|--tok-filter FILTER) | (-i|--simple-tokens) |
(-I|--full-tokens)] [--no-zeal | (-z|--zeal)] (-I|--full-tokens)] [--no-zeal | (-z|--zeal)]
[-S|--space (keep|my|old|your)] [-S|--space (keep|my|old|your)]
[-s | --resolve-space (normal|keep|my|old|your)] [-s | --resolve-space (normal|keep|my|old|your)]
[--conflict-space-overlaps] [--conflict-space-separate] [--conflict-space-overlaps] [--conflict-space-separate]
[--conflict-space-all] [-C|--expand-context N] [--conflict-space-all] [-C|--expand-context N]
[--resolve (keep|my|old|your)] [--conflict-overlaps] [--resolve (keep|my|old|your)] [--conflict-overlaps]
[--conflict-separate] [--conflict-all] [-G|--color] [--conflict-separate] [--conflict-all] [-G|--color]
[--label-start "<<<<<"] [--label-mo "|||||"] [--label-oy "====="] [--label-start "<<<<<"] [--label-mo "|||||"] [--label-diff "|||||"]
[--label-end ">>>>>"] COMMAND [--label-oy "====="] [--label-end ">>>>>"] COMMAND
Available options: Available options:
-F,--tok-filter FILTER External program to separate the text to tokens -F,--tok-filter FILTER External program to separate the text to tokens
@ -183,9 +199,10 @@ Available options:
Never resolve separate (non-overlapping) changes in Never resolve separate (non-overlapping) changes in
space-only tokens space-only tokens
--conflict-space-all Never resolve any changes in space-only tokens --conflict-space-all Never resolve any changes in space-only tokens
-C,--expand-context N Consider changes that are at most N tokens apart to -C,--expand-context N Consider changes that are at less than N tokens apart
be a single change. Zero may cause bad resolutions of to be a single change; 0 turns off conflict
near conflicting edits (default: 1) expansion, 1 may cause bad resolutions of near
conflicting edits (default: 2)
--resolve (keep|my|old|your) --resolve (keep|my|old|your)
Resolve general conflicts in favor of a given Resolve general conflicts in favor of a given
version, or keep the conflicts (default: keep) version, or keep the conflicts (default: keep)
@ -198,6 +215,7 @@ Available options:
`less -R') `less -R')
--label-start "<<<<<" Label for beginning of the conflict --label-start "<<<<<" Label for beginning of the conflict
--label-mo "|||||" Separator of local edits and original --label-mo "|||||" Separator of local edits and original
--label-diff "|||||" Separator for old and new version
--label-oy "=====" Separator of original and other people's edits --label-oy "=====" Separator of original and other people's edits
--label-end ">>>>>" Label for end of the conflict --label-end ">>>>>" Label for end of the conflict
-h,--help Show this help text -h,--help Show this help text
@ -206,6 +224,10 @@ Available options:
Available commands: Available commands:
merge diff3-style merge of two changesets merge diff3-style merge of two changesets
git Automerge unmerged files in git conflict git Automerge unmerged files in git conflict
diff Find differences between two files
patch Apply a patch from `diff' to file
break Break text to tokens
glue Glue tokens back to text
werge is a free software, use it accordingly. werge is a free software, use it accordingly.
``` ```
@ -237,3 +259,47 @@ Available options:
--no-add Prevent running `git add' --no-add Prevent running `git add'
-h,--help Show this help text -h,--help Show this help text
``` ```
#### Finding differences
```
Usage: werge diff OLDFILE YOURFILE
[(-u|--unified) | (-U|--unified-size ARG) | (-m|--merge)]
Find differences between two files
Available options:
OLDFILE Original file version
YOURFILE File version with changes
-u,--unified Produce unified-diff-like output for `patch' with
default context size (20)
-U,--unified-size ARG Produce unified diff with this context size
-m,--merge Highlight the differences as with `merge' (default)
-h,--help Show this help text
```
#### Patching files in place
```
Usage: werge patch MYFILE
Apply a patch from `diff' to file
Available options:
MYFILE File to be modified
-h,--help Show this help text
```
#### Converting between files and tokens
Both commands work as plain stdin-to-stdout filters:
```
Usage: werge break
Break text to tokens
```
```
Usage: werge glue
Glue tokens back to text
```

14
Toks.hs
View file

@ -15,19 +15,23 @@ unescape :: String -> String
unescape [] = [] unescape [] = []
unescape ('\\':'\\':xs) = '\\' : unescape xs unescape ('\\':'\\':xs) = '\\' : unescape xs
unescape ('\\':'n':xs) = '\n' : unescape xs unescape ('\\':'n':xs) = '\n' : unescape xs
unescape ('\\':_) = error "bad escape?" unescape ('\\':_) = error "bad escape on input"
unescape (x:xs) = x : unescape xs unescape (x:xs) = x : unescape xs
tok ('.':_) = True
tok ('/':_) = True
tok _ = False
markSpace :: String -> Tok markSpace :: String -> Tok
markSpace [] = error "wat" markSpace [] = error "empty token"
markSpace s@(c:_) markSpace s@(c:_)
| isSpace c = '.' : s | isSpace c = '.' : s
| otherwise = '|' : s | otherwise = '/' : s
unmarkSpace :: Tok -> String unmarkSpace :: Tok -> String
unmarkSpace ('.':s) = s unmarkSpace ('.':s) = s
unmarkSpace ('|':s) = s unmarkSpace ('/':s) = s
unmarkSpace x = error "unwat" unmarkSpace _ = error "bad space marking on input"
space :: Tok -> Bool space :: Tok -> Bool
space ('.':_) = True space ('.':_) = True

View file

@ -22,8 +22,9 @@ executable werge
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Opts Opts
Toks
Paths_werge Paths_werge
Progs
Toks
autogen-modules: Paths_werge autogen-modules: Paths_werge
build-depends: build-depends: