diff --git a/adiff.cabal b/adiff.cabal index 4d9120f..38c1de1 100644 --- a/adiff.cabal +++ b/adiff.cabal @@ -40,22 +40,31 @@ build-type: Simple executable adiff -- .hs or .lhs file containing the Main module. - main-is: MainDiff.hs + main-is: Main.hs -- Modules included in this executable, other than Main. - other-modules: Redfa, Version, Diff, Merge + other-modules: Diff, + Diff3, + Format, + Merge, + Patch, + Redfa, + Substr, + Types, + Version -- LANGUAGE extensions used by modules in this package. other-extensions: CPP -- Other library packages from which modules are imported. build-depends: base ^>=4.13.0.0, + extra ^>= 1.7, mmap ^>=0.5, regex-tdfa ^>= 1.3, optparse-applicative ^>=0.16, bytestring ^>= 0.10.12, vector ^>=0.12, - utf8-string ^>=1.0 + utf8-string ^>=1.0 -- Directories containing source files. hs-source-dirs: src diff --git a/src/Diff.hs b/src/Diff.hs index e4344c1..dbc2379 100644 --- a/src/Diff.hs +++ b/src/Diff.hs @@ -3,10 +3,6 @@ module Diff , Op , diffToks , hunks - , pprDiff - , pprDiff1 - , pprHunk - , pprHunks ) where import Control.Monad @@ -16,31 +12,17 @@ import qualified Data.ByteString.Internal as BI import qualified Data.ByteString.Lazy as BL import Data.ByteString.UTF8 (fromString) import Data.Function (on) -import Data.List (groupBy) +import Data.List (groupBy, mapAccumL) +import Data.List.Extra (split, takeEnd) import qualified Data.Vector as V import qualified Data.Vector.Unboxed.Mutable as M - -type Tok = (Bool, (Int, Int)) - -type Diff = [(Op, Tok)] - -type Hunk = ((Int, Int), [(Op, Tok)]) - -type BS = B.ByteString - -type TV = V.Vector Tok - -data Op - = Remove - | Keep - | Add - deriving (Show, Eq) +import Debug.Trace +import Substr +import Types data DiffEnv = DiffEnv - { deD1 :: BS - , deD2 :: BS - , deT1 :: TV + { deT1 :: TV , deT2 :: TV , deS :: Int , deE :: Int @@ -54,28 +36,21 @@ data DiffEnv = } deriving (Show) -substr b e = B.take (e - b) . B.drop b +toksMatch x y DiffEnv {deT1 = t1, deT2 = t2} = + t1 V.! x == t2 V.! y -toksMatch x y DiffEnv {deT1 = t1, deT2 = t2, deD1 = d1, deD2 = d2} = - toksMatch' x y d1 d2 t1 t2 - -toksMatch' x y d1 d2 t1 t2 = - let (isTok1, (b1, e1)) = t1 V.! x - (isTok2, (b2, e2)) = t2 V.! y - in isTok1 == isTok2 && substr b1 e1 d1 == substr b2 e2 d2 - -stripEqToks :: BS -> BS -> TV -> TV -> (Diff, Diff, TV, TV) -stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2') +stripEqToks :: TV -> TV -> (Diff, Diff, TV, TV) +stripEqToks t1 t2 = (pre, post, t1', t2') where l1 = V.length t1 l2 = V.length t2 firstDiff i - | i < l1 && i < l2 && toksMatch' i i d1 d2 t1 t2 = firstDiff (i + 1) + | i < l1 && i < l2 && (t1 V.! i == t2 V.! i) = firstDiff (i + 1) | otherwise = i b = firstDiff 0 lastDiff i | l1 - i - 1 >= b && - l2 - i - 1 >= b && toksMatch' (l1 - i - 1) (l2 - i - 1) d1 d2 t1 t2 = + l2 - i - 1 >= b && t1 V.! (l1 - i - 1) == t2 V.! (l2 - i - 1)= lastDiff (i + 1) | otherwise = i e = lastDiff 0 @@ -84,19 +59,17 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2') t1' = V.slice b (l1 - e - b) t1 t2' = V.slice b (l2 - e - b) t2 -diffToks :: BS -> BS -> TV -> TV -> Diff -diffToks d1 d2 t1' t2' = pre ++ res ++ post +diffToks :: TV -> TV -> Diff +diffToks t1' t2' = pre ++ res ++ post where - (pre, post, t1, t2) = stripEqToks d1 d2 t1' t2' + (pre, post, t1, t2) = stripEqToks t1' t2' res | V.null t1 = map (\t -> (Add, t)) (V.toList t2) | V.null t2 = map (\t -> (Remove, t)) (V.toList t1) | V.length t1 >= V.length t2 = diffToks' $ DiffEnv - { deD1 = d1 - , deD2 = d2 - , deT1 = t1 + { deT1 = t1 , deT2 = t2 , deS = 0 , deE = V.length t1 @@ -111,9 +84,7 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post | otherwise = diffToks' $ DiffEnv - { deD1 = d2 - , deD2 = d1 - , deT1 = t2 + { deT1 = t2 , deT2 = t1 , deS = 0 , deE = V.length t2 @@ -151,10 +122,10 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = vecEmid = vecE mid extraScore i = if isToken - then -(tokEnd - tokBegin) + then -(B.length s) else 0 where - (isToken, (tokBegin, tokEnd)) = deT1 de V.! i + (isToken, s) = deT1 de V.! i vecS = vec -- "forward" operation where vec i @@ -291,70 +262,31 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = diffToks' de {deE = mid, deVE = vecEmid, deB = opt} ++ diffToks' de {deS = mid, deVS = vecSmid, deA = opt} -pprDiff :: BS -> BS -> Diff -> [BS] -pprDiff d1 d2 = map (pprDiff1 d1 d2) - -pprDiff1 d1 d2 (op, (tok, (i, j))) = - fromString pfx <> escNewlines (substr i j s) - where - pfx = - case (op, tok) of - (Add, True) -> "+|" - (Remove, True) -> "-|" - (Keep, True) -> " |" - (Add, False) -> "+." - (Remove, False) -> "-." - (Keep, False) -> " ." - s = - case op of - Add -> d2 - _ -> d1 - -escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines' - -escNewlines' s - | B.null s = mempty - | B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s) - | B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s) - | otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s) - hunks :: Int -> Diff -> [Hunk] -hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst) +hunks ctxt = + map (stripNums . concat) . + split null . + concat . check . map breakKeeps . groupBy ((==) `on` fst . snd) . addNums where - go _ _ bi bj backlog [] = - if null backlog - then [] - else [((bi, bj), backlog)] - go i j bi bj backlog (g:gs) = - case fst (head g) of - Remove -> go (i + length g) j bi bj (backlog ++ g) gs - Add -> go i (j + length g) bi bj (backlog ++ g) gs - Keep -> - let lg = length g - in if lg <= - (if null backlog || null g - then ctxt - else 2 * ctxt) - then go (i + lg) (j + lg) bi bj (backlog ++ g) gs - else (if null backlog - then id - else (:) ((bi, bj), backlog ++ take ctxt g)) $ - go - (i + lg) - (j + lg) - (i + lg - ctxt) - (j + lg - ctxt) - (if null gs - then [] - else drop (lg - ctxt) g) - gs - -hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" - -lineSep = fromString "\n" - -pprHunk :: BS -> BS -> Hunk -> BS -pprHunk d1 d2 ((i, j), toks) = - B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks) - -pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2) + addNums = snd . mapAccumL countTok (0, 0) + countTok x@(i, j) d@(op, _) = + (,) + (case op of + Remove -> (i + 1, j) + Keep -> (i + 1, j + 1) + Add -> (i, j + 1)) + (x, d) + stripNums = (,) <$> fst . head <*> map snd + breakKeeps ks@((_, (Keep, _)):_) = + let (a, b') = splitAt ctxt ks + (b, c) = splitAt ctxt b' + in if null c + then [ks] + else [a, [], takeEnd ctxt b'] + breakKeeps a = [a] + check ([_, [], _]:[]) = [] + check ([_, [], a]:xs) = [a] : checkLast xs + check a = checkLast a + checkLast [] = [] + checkLast ([a, [], _]:[]) = [[a]] + checkLast (a:xs) = a:checkLast xs diff --git a/src/Diff3.hs b/src/Diff3.hs new file mode 100644 index 0000000..b6c2092 --- /dev/null +++ b/src/Diff3.hs @@ -0,0 +1,7 @@ + +module Diff3 where + +import Types + +diff3Toks :: BS -> BS -> BS -> TV -> TV -> TV -> a +diff3Toks = undefined diff --git a/src/Format.hs b/src/Format.hs new file mode 100644 index 0000000..d4f4308 --- /dev/null +++ b/src/Format.hs @@ -0,0 +1,45 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Format where + +import Types + +import qualified Data.ByteString as B +import qualified Data.ByteString.Builder as BB +import qualified Data.ByteString.Internal as BI +import qualified Data.ByteString.Lazy as BL +import Data.String +import Substr + +pprHunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" + +lineSep = fromString "\n" + +pprHunk :: Hunk -> BS +pprHunk ((i, j), toks) = + B.intercalate lineSep (pprHunkHdr i j : pprDiff toks) + +pprHunks = B.intercalate lineSep . map pprHunk + +pprDiff :: Diff -> [BS] +pprDiff = map pprDiff1 + +pprDiff1 (op, (tok, s)) = + fromString pfx <> escNewlines s + where + pfx = + case (op, tok) of + (Add, True) -> "+|" + (Remove, True) -> "-|" + (Keep, True) -> " |" + (Add, False) -> "+." + (Remove, False) -> "-." + (Keep, False) -> " ." + +escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines' + +escNewlines' s + | B.null s = mempty + | B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s) + | B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s) + | otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s) diff --git a/src/MainDiff.hs b/src/Main.hs similarity index 96% rename from src/MainDiff.hs rename to src/Main.hs index ea60c54..c561a35 100644 --- a/src/MainDiff.hs +++ b/src/Main.hs @@ -4,6 +4,7 @@ import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as B8 import Data.ByteString.UTF8 (fromString) import qualified Data.Vector as V +import Format import Diff import Merge import Options.Applicative @@ -112,6 +113,6 @@ main = data2 <- mmapFileByteString f2 Nothing toks1 <- V.fromList <$> redfaTokenize redfa data1 toks2 <- V.fromList <$> redfaTokenize redfa data2 - let hs = hunks (max 0 ctxt) $ diffToks data1 data2 toks1 toks2 - B8.putStrLn $ pprHunks data1 data2 hs + let hs = hunks (max 0 ctxt) $ diffToks toks1 toks2 + B8.putStrLn $ pprHunks hs CmdPatch {} -> putStrLn "not supported yet" diff --git a/src/Merge.hs b/src/Merge.hs new file mode 100644 index 0000000..674d508 --- /dev/null +++ b/src/Merge.hs @@ -0,0 +1,67 @@ +module Merge + ( MergeOpts + , mergeOption + ) where + +import qualified Data.ByteString as B +import Data.String +import Options.Applicative +import Types + +data MergeOpts = + MergeOpts + { mergeDoMerge :: Bool + , mergeForceWhitespace :: Bool + , mergeKeepWhitespace :: Bool + , mergeCStartStr :: BS + , mergeMineSepStr :: BS + , mergeYourSepStr :: BS + , mergeEndStr :: BS + } + deriving (Show) + +marker = fromString . replicate 7 + +mergeOption forPatch = + MergeOpts <$> + switch + (short 'm' <> + long "merge" <> + help + (if forPatch + then "Merge using conflict markers instead of printing the rejected hunks" + else "Output the merged file instead of the patch")) <*> + switch + (short 'w' <> + long "whitespace" <> + help + ((if forPatch + then "Force rejecting a thunk" + else "Force a merge conflict") ++ + " on whitespace mismatch")) <*> + switch + (short 'k' <> + long "keep-whitespace" <> + help + ("On whitespace mismatch, default to the version from " ++ + (if forPatch + then "original file" + else "MYFILE") ++ + " instead of the one from " ++ + (if forPatch + then "patch" + else "YOURFILE"))) <*> + strOption + (long "merge-start" <> + value (marker '<') <> help "Marker for the beginning of a conflict") <*> + strOption + (long "merge-mine" <> + value (marker '|') <> + help "Marker that separates `mine' from `original' part of the conflict") <*> + strOption + (long "merge-your" <> + value (marker '=') <> + help "Marker that separates `original' from `your' part of the conflict") <*> + strOption + (long "merge-end" <> + value (marker '>') <> help "Marker for the end of a conflict") diff --git a/src/Patch.hs b/src/Patch.hs new file mode 100644 index 0000000..08b324f --- /dev/null +++ b/src/Patch.hs @@ -0,0 +1,4 @@ +module Patch where + +patchToks :: a +patchToks = undefined diff --git a/src/Redfa.hs b/src/Redfa.hs index 42b1db2..5cccdf0 100644 --- a/src/Redfa.hs +++ b/src/Redfa.hs @@ -16,6 +16,7 @@ import qualified Data.Vector as V import Options.Applicative import Text.Regex.TDFA import Diff +import Substr type BS = B.ByteString @@ -143,7 +144,7 @@ redfaTokenize' :: -> Int -> Int -> [Int] - -> m [(Bool, (Int, Int))] + -> m [Tok] redfaTokenize' spec s state off visited | off >= B.length s = pure [] | otherwise = @@ -168,7 +169,7 @@ redfaTokenize' spec s state off visited ((rule, (_, len)):_) -> let matchLen = len - ooff in (if matchLen > 0 - then (:) (rrIsToken rule, (off, off + matchLen)) + then (:) (rrIsToken rule, substr off matchLen s) else id) <$> redfaTokenize' spec diff --git a/src/Substr.hs b/src/Substr.hs new file mode 100644 index 0000000..87b8f1e --- /dev/null +++ b/src/Substr.hs @@ -0,0 +1,7 @@ +module Substr where + +import Types +import qualified Data.ByteString as B + +substr :: Int -> Int -> BS -> BS +substr b l = B.take l . B.drop b diff --git a/src/Types.hs b/src/Types.hs new file mode 100644 index 0000000..d8add21 --- /dev/null +++ b/src/Types.hs @@ -0,0 +1,20 @@ +module Types where + +import Data.ByteString +import Data.Vector + +type BS = ByteString + +type Tok = (Bool, BS) + +type TV = Vector Tok + +type Diff = [(Op, Tok)] + +type Hunk = ((Int, Int), Diff) + +data Op + = Remove + | Keep + | Add + deriving (Show, Eq)