From 23b62f6344ae74c160a9040f163642d3b3c7cede Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 27 Sep 2020 19:26:29 +0200 Subject: [PATCH] software F engineering --- adiff.cabal | 2 ++ src/Diff.hs | 17 ++++--------- src/Diff3.hs | 27 ++++----------------- src/Format.hs | 40 ++++++++++++++++++------------ src/Hunks.hs | 4 +-- src/Main.hs | 15 +++++++++--- src/Merge.hs | 66 +++++++++++++++++++++++++++++++++++++++----------- src/Redfa.hs | 16 +++--------- src/Substr.hs | 4 +-- src/Types.hs | 7 ++++++ src/Version.hs | 1 + 11 files changed, 116 insertions(+), 83 deletions(-) diff --git a/adiff.cabal b/adiff.cabal index 30a668f..af860c9 100644 --- a/adiff.cabal +++ b/adiff.cabal @@ -42,6 +42,8 @@ executable adiff -- .hs or .lhs file containing the Main module. main-is: Main.hs + ghc-options: -O2 -Wall + -- Modules included in this executable, other than Main. other-modules: Diff, Diff3, diff --git a/src/Diff.hs b/src/Diff.hs index 8409618..244aaf2 100644 --- a/src/Diff.hs +++ b/src/Diff.hs @@ -4,18 +4,8 @@ module Diff ( diffToks ) where -import Control.Monad import qualified Data.ByteString as B -import qualified Data.ByteString.Builder as BB -import qualified Data.ByteString.Internal as BI -import qualified Data.ByteString.Lazy as BL -import Data.ByteString.UTF8 (fromString) -import Data.Function (on) -import Data.List (groupBy, mapAccumL) -import Data.List.Extra (split, takeEnd) import qualified Data.Vector as V -import qualified Data.Vector.Unboxed.Mutable as M -import Substr import Types data DiffEnv = @@ -34,6 +24,7 @@ data DiffEnv = } deriving (Show) +toksMatch :: Int -> Int -> DiffEnv -> Bool toksMatch x y DiffEnv {deT1 = t1, deT2 = t2} = t1 V.! x == t2 V.! y stripEqToks :: TV -> TV -> (Diff, Diff, TV, TV) @@ -94,6 +85,7 @@ diffToks t1' t2' = pre ++ res ++ post , deTrans = True } +minIndexFwd :: V.Vector (Int, Int) -> Int minIndexFwd = V.minIndexBy (\x y -> @@ -102,6 +94,7 @@ minIndexFwd = else GT --basically normal V.minIndex ) +minIndexRev :: V.Vector (Int, Int) -> Int minIndexRev = V.minIndexBy (\x y -> @@ -119,10 +112,10 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = vecEmid = vecE mid extraScore i = if isToken - then -(B.length s) + then -(B.length str) else 0 where - (isToken, s) = deT1 de V.! i + (isToken, str) = deT1 de V.! i vecS = vec -- "forward" operation where vec i diff --git a/src/Diff3.hs b/src/Diff3.hs index 3bc3e25..8e4ca20 100644 --- a/src/Diff3.hs +++ b/src/Diff3.hs @@ -1,15 +1,10 @@ {-# LANGUAGE TupleSections #-} + module Diff3 where import Diff -import Types import Merge - -data Origin - = Stable - | Mine - | Your - deriving (Show, Eq) +import Types diff3Toks :: MergeOpts -> TV -> TV -> TV -> Diff diff3Toks mo tMine tOrig tYour = @@ -30,27 +25,15 @@ diff3Toks mo tMine tOrig tYour = align ((Add, a):as) bs@((Remove, _):_) = (Mine, (Add, a)) : align as bs align as@((Remove, _):_) ((Add, b):bs) = (Your, (Add, b)) : align as bs align [] [] = [] - align as@((Add, _):_) [] = map (Mine,) as - align [] bs@((Add, _):_) = map (Your,) bs + align as@((Add, _):_) [] = map (Mine, ) as + align [] bs@((Add, _):_) = map (Your, ) bs align _ _ = error "Internal failure: diffstreams seem broken, cannot align" conflict :: [(Origin, (Op, Tok))] -> Diff conflict [] = [] conflict as@(a:_) | stable a = applySplit stable (map snd) conflict as - | unstable a = applySplit unstable merge conflict as + | otherwise = applySplit (not . stable) (merge mo) conflict as applySplit :: (a -> Bool) -> ([a] -> [b]) -> ([a] -> [b]) -> [a] -> [b] applySplit cond f g xs = f (takeWhile cond xs) ++ g (dropWhile cond xs) - merge :: [(Origin, (Op,Tok))] -> Diff - merge cs = - let mys = map (\a -> map snd $ filter ((== a) . fst) cs) [Mine, Your] - [tokOrigsMine, tokOrigsYour] = - map (map snd.filter ((/= Add) . fst)) mys - [tokMine, tokYour] = map (map snd.filter ((/= Remove) . fst)) mys - in if tokOrigsMine /= tokOrigsYour - then error "Internal failure: merge origins differ" - else map (MineChanged,) tokMine ++ - map (Original,) tokOrigsMine ++ - map (YourChanged,) tokYour stable (Stable, _) = True stable _ = False - unstable = not . stable diff --git a/src/Format.hs b/src/Format.hs index 541befd..e225464 100644 --- a/src/Format.hs +++ b/src/Format.hs @@ -1,39 +1,47 @@ {-# LANGUAGE OverloadedStrings #-} -module Format where +module Format + ( pprHunks + , pprHunk + , pprDiff1 + ) where import Types import qualified Data.ByteString as B import qualified Data.ByteString.Builder as BB import qualified Data.ByteString.Internal as BI -import qualified Data.ByteString.Lazy as BL import Data.String -import Substr -pprHunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" +pprHunkHdr :: Int -> Int -> BB.Builder +pprHunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" +lineSep :: BB.Builder lineSep = fromString "\n" pprHunks :: [Hunk] -> BB.Builder pprHunks = mconcat . map pprHunk pprHunk :: Hunk -> BB.Builder -pprHunk ((i, j), toks) = mconcat ((pprHunkHdr i j <> lineSep) : map pprDiff1 toks) +pprHunk ((i, j), toks) = + mconcat ((pprHunkHdr i j <> lineSep) : map pprDiff1 toks) pprDiff1 :: (Op, Tok) -> BB.Builder -pprDiff1 (op, (tok, s)) = - fromString pfx <> escNewlines s <> lineSep +pprDiff1 (op, (tok, s)) = fromString pfx <> escNewlines s <> lineSep where - pfx = [opc,tc] - opc = case op of - Add -> '+' - Keep -> ' ' - Remove -> '-' - MineChanged -> '<' - Original -> '=' - YourChanged -> '>' - tc = if tok then '|' else '.' + pfx = [opc, tc] + opc = + case op of + Add -> '+' + Keep -> ' ' + Remove -> '-' + MineChanged -> '<' + Original -> '=' + YourChanged -> '>' + tc = + if tok + then '|' + else '.' escNewlines :: BS -> BB.Builder escNewlines s diff --git a/src/Hunks.hs b/src/Hunks.hs index aabae99..9c29b4b 100644 --- a/src/Hunks.hs +++ b/src/Hunks.hs @@ -21,7 +21,7 @@ hunks ctxt d = zipWith (<=) (padEnd ctxt edits) (replicate (1 + 2 * ctxt) 0 ++ edits) addNums = snd . mapAccumL countTok (0, 0) stripNums = (,) <$> fst . head <*> map snd - countTok x@(i, j) d@(op, _) = + countTok x@(i, j) d'@(op, _) = (,) (case op of Remove -> (i + 1, j) @@ -30,4 +30,4 @@ hunks ctxt d = MineChanged -> (i, j) Original -> (i + 1, j + 1) YourChanged -> (i, j)) - (x, d) + (x, d') diff --git a/src/Main.hs b/src/Main.hs index 3171c9e..f0e8e04 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,9 +1,6 @@ module Main where -import qualified Data.ByteString as B import qualified Data.ByteString.Builder as BB -import qualified Data.ByteString.Char8 as B8 -import Data.ByteString.UTF8 (fromString) import qualified Data.Vector as V import Diff import Diff3 @@ -14,6 +11,7 @@ import Options.Applicative import Redfa import System.IO (stdout) import System.IO.MMap +import Types import Version data ADiffOptions = @@ -46,18 +44,26 @@ data ADiffCommandOpts } deriving (Show) +contextOpt :: Parser Int contextOpt = + check <$> option auto (metavar "CONTEXT" <> short 'C' <> long "context" <> value 5 <> help "How many tokens around the change to include in the patch") + where + check c + | c < 0 = error "Negative context" + | otherwise = c +diffCmdOptions :: Parser ADiffCommandOpts diffCmdOptions = CmdDiff <$> contextOpt <*> strArgument (metavar "FROMFILE") <*> strArgument (metavar "TOFILE") +patchCmdOptions :: Parser ADiffCommandOpts patchCmdOptions = CmdPatch <$> switch @@ -82,6 +88,7 @@ patchCmdOptions = help "Strip NUM leading components from the paths" <> value 0) <*> mergeOption True +diff3CmdOptions :: Parser ADiffCommandOpts diff3CmdOptions = CmdDiff3 <$> contextOpt <*> strArgument (metavar "MYFILE") <*> strArgument (metavar "OLDFILE") <*> @@ -98,8 +105,10 @@ actionOption = info diff3CmdOptions $ progDesc "Compare and merge three files" ] +adiffOptions :: Parser ADiffOptions adiffOptions = ADiffOptions <$> redfaOption <*> actionOption +loadToks :: RedfaSpec -> FilePath -> IO TV loadToks redfa f = V.fromList <$> (mmapFileByteString f Nothing >>= redfaTokenize redfa) diff --git a/src/Merge.hs b/src/Merge.hs index ddd527c..0cfe750 100644 --- a/src/Merge.hs +++ b/src/Merge.hs @@ -1,12 +1,13 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} module Merge ( MergeOpts(..) , mergeOption , fmtMerged + , merge ) where -import qualified Data.ByteString as B import qualified Data.ByteString.Builder as BB import Data.String import Options.Applicative @@ -15,6 +16,7 @@ import Types data MergeOpts = MergeOpts { mergeDoMerge :: Bool + , mergeIgnoreWhitespace :: Bool , mergeForceWhitespace :: Bool , mergeKeepWhitespace :: Bool , mergeCStartStr :: BS @@ -24,8 +26,7 @@ data MergeOpts = } deriving (Show) -marker = fromString . replicate 7 - +mergeOption :: Bool -> Parser MergeOpts mergeOption forPatch = addLBR <$> ((,) <$> @@ -35,6 +36,7 @@ mergeOption forPatch = help "Automatically add a line break after conflict markers") <*> mo) where + marker = fromString . replicate 7 mo = MergeOpts <$> switch @@ -45,18 +47,27 @@ mergeOption forPatch = then "Merge using conflict markers instead of printing the rejected hunks" else "Output the merged file instead of the patch")) <*> switch - (short 'w' <> - long "whitespace" <> + (short 'i' <> + long "ignore-whitespace" <> + help + ("Ignore " ++ + (if forPatch + then "hunks" + else "chunks") ++ + " that change only whitespace")) <*> + switch + (short 'f' <> + long "force-whitespace" <> help ((if forPatch - then "Force rejecting a thunk" + then "Force rejecting a hunk" else "Force a merge conflict") ++ - " on whitespace mismatch")) <*> + " on whitespace mismatch (overrides `ignore-whitespace')")) <*> switch (short 'k' <> long "keep-whitespace" <> help - ("On whitespace mismatch, default to the version from " ++ + ("On whitespace mismatch, output the version from " ++ (if forPatch then "original file" else "MYFILE") ++ @@ -89,24 +100,27 @@ mergeOption forPatch = , mergeCEndStr = mergeCEndStr x <> "\n" } -{- This kinda relies on reasonable ordering within the conflicts in the Diff -} +{- This kinda relies on reasonable ordering + - within the conflicts in the Diff -} fmtMerged :: MergeOpts -> Diff -> BB.Builder fmtMerged mo = go Keep where go op [] | conflictOp op = bb $ mergeCEndStr mo | otherwise = mempty - go last l@((op, (_, tok)):xs) - | conflictOp last && not (conflictOp op) = + go prev l@((op, (_, tok)):xs) + | conflictOp prev && not (conflictOp op) = bb (mergeCEndStr mo) <> go Keep l - | not (conflictOp last) && conflictOp op = + | not (conflictOp prev) && conflictOp op = bb (mergeCStartStr mo) <> go MineChanged l - | last /= op && conflictOp op = + | prev /= op && conflictOp op = (case op of MineChanged -> bb $ mergeCStartStr mo Original -> bb $ mergeMineSepStr mo - YourChanged -> bb $ mergeYourSepStr mo) <> + YourChanged -> bb $ mergeYourSepStr mo + _ -> error "Internal conflict handling failure") <> go op l + | op == Remove = go op xs | otherwise = bb tok <> go op xs conflictOp o = case o of @@ -115,3 +129,27 @@ fmtMerged mo = go Keep Remove -> False _ -> True bb = BB.byteString + +merge :: MergeOpts -> [(Origin, (Op, Tok))] -> Diff +merge mo cs = go + where + mys@[diffMine, diffYour] = + map (\a -> map snd $ filter ((a ==) . fst) cs) [Mine, Your] + [tokOrigsMine, tokOrigsYour] = map (map snd . filter ((Add /=) . fst)) mys + [tokMine, tokYour] = map (map snd . filter ((Remove /=) . fst)) mys + conflict = + map (MineChanged, ) tokMine ++ + map (Original, ) tokOrigsMine ++ map (YourChanged, ) tokYour + noTokens = all (not . fst . snd) (diffMine ++ diffYour) + go + | tokOrigsMine /= tokOrigsYour = + error "Internal failure: merge origins differ" + | mergeIgnoreWhitespace mo && noTokens = map (Keep, ) tokOrigsMine + | all ((Keep ==) . fst) diffYour = diffMine -- only mine changed + | all ((Keep ==) . fst) diffMine = diffYour -- only your changed + | diffMine == diffYour = diffMine -- false conflict + | not (mergeForceWhitespace mo) && noTokens = + if mergeKeepWhitespace mo + then diffMine + else diffYour -- conflict happened, but not on significant tokens + | otherwise = conflict -- true conflict diff --git a/src/Redfa.hs b/src/Redfa.hs index 0e9720e..9967d59 100644 --- a/src/Redfa.hs +++ b/src/Redfa.hs @@ -11,12 +11,11 @@ import qualified Data.ByteString.Lazy as BL import Data.ByteString.UTF8 (fromString, toString) import Data.List import Data.Maybe -import Data.Monoid import qualified Data.Vector as V import Options.Applicative +import Substr import Text.Regex.TDFA import Types -import Substr data RedfaOption = RedfaOptionRules [BS] @@ -79,7 +78,7 @@ redfaRuleStringToRuleStr s = in go unescapeRegex :: MonadFail m => BS -> m BS -unescapeRegex s = BL.toStrict . BB.toLazyByteString <$> unescape' s +unescapeRegex s' = BL.toStrict . BB.toLazyByteString <$> unescape' s' where unescape' :: MonadFail m => BS -> m BB.Builder unescape' s @@ -112,8 +111,7 @@ unescapeRegex s = BL.toStrict . BB.toLazyByteString <$> unescape' s redfaPrepareRules :: RedfaOption -> IO RedfaSpec redfaPrepareRules opt = do (states, jumps, regexes, isToken) <- - unzip4 . mapMaybe redfaRuleStringToRuleStr <$> - redfaOptionToRuleStrings opt + unzip4 . mapMaybe redfaRuleStringToRuleStr <$> redfaOptionToRuleStrings opt uRegexes <- traverse unescapeRegex regexes startREs <- traverse (makeRegexM . (fromString "\\`" <>)) uRegexes midREs <- traverse (makeRegexM . (fromString "\\`(.|\n)" <>)) uRegexes @@ -136,13 +134,7 @@ redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [Tok] redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 [] redfaTokenize' :: - MonadFail m - => RedfaSpec - -> BS - -> Int - -> Int - -> [Int] - -> m [Tok] + MonadFail m => RedfaSpec -> BS -> Int -> Int -> [Int] -> m [Tok] redfaTokenize' spec s state off visited | off >= B.length s = pure [] | otherwise = diff --git a/src/Substr.hs b/src/Substr.hs index 87b8f1e..e57210c 100644 --- a/src/Substr.hs +++ b/src/Substr.hs @@ -1,7 +1,7 @@ -module Substr where +module Substr where -import Types import qualified Data.ByteString as B +import Types substr :: Int -> Int -> BS -> BS substr b l = B.take l . B.drop b diff --git a/src/Types.hs b/src/Types.hs index 93698db..c2db41d 100644 --- a/src/Types.hs +++ b/src/Types.hs @@ -5,6 +5,7 @@ import Data.Vector type BS = ByteString +{- TODO: all this needs to get unboxed -} type Tok = (Bool, BS) type TV = Vector Tok @@ -21,3 +22,9 @@ data Op | Original | YourChanged deriving (Show, Eq) + +data Origin + = Stable + | Mine + | Your + deriving (Show, Eq) diff --git a/src/Version.hs b/src/Version.hs index 91985ee..0702390 100644 --- a/src/Version.hs +++ b/src/Version.hs @@ -7,6 +7,7 @@ import Options.Applicative adiffVersion :: String adiffVersion = VERSION_adiff +versionOption :: String -> Parser (a -> a) versionOption prog = infoOption (prog <>