diff --git a/adiff.cabal b/adiff.cabal index c88d03c..19f0dee 100644 --- a/adiff.cabal +++ b/adiff.cabal @@ -43,7 +43,7 @@ executable adiff main-is: MainDiff.hs -- Modules included in this executable, other than Main. - other-modules: Redfa, Version + other-modules: Redfa, Version, Diff -- LANGUAGE extensions used by modules in this package. other-extensions: CPP diff --git a/src/Diff.hs b/src/Diff.hs new file mode 100644 index 0000000..01f7df6 --- /dev/null +++ b/src/Diff.hs @@ -0,0 +1,166 @@ +module Diff + ( Tok + , Op + , diffToks + , hunks + , pprDiff + , pprDiff1 + , pprHunk + , pprHunks + ) where + +import Control.Monad +import qualified Data.ByteString as B +import qualified Data.ByteString.Builder as BB +import qualified Data.ByteString.Internal as BI +import qualified Data.ByteString.Lazy as BL +import Data.ByteString.UTF8 (fromString) +import Data.Function (on) +import Data.List (groupBy) +import qualified Data.Vector as V +import qualified Data.Vector.Unboxed.Mutable as M + +type Tok = (Bool, (Int, Int)) + +type Hunk = ((Int, Int), [(Op, Tok)]) + +type BS = B.ByteString + +data Op + = Remove + | Keep + | Add + deriving (Show, Eq) + +substr b e = B.take (e - b) . B.drop b + +toksMatch d1 d2 t1 t2 x y = + let (tok1, (b1, e1)) = t1 V.! x + (tok2, (b2, e2)) = t2 V.! y + in tok1 == tok2 && substr b1 e1 d1 == substr b2 e2 d2 + +stripEqToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> (Int, Int, Int) +stripEqToks d1 d2 t1 t2 = (b, l1 - e, l2 - e) + where + l1 = V.length t1 + l2 = V.length t2 + firstDiff i + | i < l1 && i < l2 && toksMatch d1 d2 t1 t2 i i = firstDiff (i + 1) + | otherwise = i + b = firstDiff 0 + lastDiff i + | l1 - i - 1 >= b && + l2 - i - 1 >= b && toksMatch d1 d2 t1 t2 (l1 - i - 1) (l2 - i - 1) = + lastDiff (i + 1) + | otherwise = i + e = lastDiff 0 + +diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)] +diffToks d1 d2 t1 t2 = do + let (b, e1, e2) = stripEqToks d1 d2 t1 t2 + ms1 = e1 - b + ms2 = e2 - b + mtx <- M.new ((ms1 + 1) * (ms2 + 1)) + let idx x y = (ms2 + 1) * x + y + forM_ [0 .. ms1] $ \i -> M.write mtx (idx i 0) i + forM_ [0 .. ms2] $ \i -> M.write mtx (idx 0 i) i + let toksMatch' x y = toksMatch d1 d2 t1 t2 (b + x) (b + y) + forM_ [1 .. ms1] $ \i -> + forM [1 .. ms2] $ \j -> do + up <- M.read mtx (idx i (j - 1)) + left <- M.read mtx (idx (i - 1) j) + upleft <- M.read mtx (idx (i - 1) (j - 1)) + M.write mtx (idx i j) $ + minimum $ + [up + 1, left + 1] ++ + (if toksMatch' (i - 1) (j - 1) + then [upleft] + else []) + let doAdd i j = (:) (Add, t2 V.! (b + j - 1)) <$> backtrack i (j - 1) + doRem i j = (:) (Remove, t1 V.! (b + i - 1)) <$> backtrack (i - 1) j + doKeep i j = (:) (Keep, t1 V.! (b + i - 1)) <$> backtrack (i - 1) (j - 1) + backtrack :: Int -> Int -> IO [(Op, Tok)] + backtrack 0 0 = pure [] + backtrack i 0 = doRem i 0 + backtrack 0 j = doAdd 0 j + backtrack i j = do + add <- M.read mtx (idx i (j - 1)) + rem <- M.read mtx (idx (i - 1) j) + keep <- M.read mtx (idx (i - 1) (j - 1)) + if toksMatch' (i - 1) (j - 1) && keep <= min add rem + then doKeep i j + else if add <= rem + then doAdd i j + else doRem i j + diff <- reverse <$> backtrack ms1 ms2 + return $ + map ((,) Keep) (take b $ V.toList t1) ++ + diff ++ map ((,) Keep) (drop e1 $ V.toList t1) + +pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS] +pprDiff d1 d2 = map (pprDiff1 d1 d2) + +pprDiff1 d1 d2 (op, (tok, (i, j))) = + fromString pfx <> escNewlines (substr i j s) + where + pfx = + case (op, tok) of + (Add, True) -> "+|" + (Remove, True) -> "-|" + (Keep, True) -> " |" + (Add, False) -> "+." + (Remove, False) -> "-." + (Keep, False) -> " ." + s = + case op of + Add -> d2 + _ -> d1 + +escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines' + +escNewlines' s + | B.null s = mempty + | B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s) + | B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s) + | otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s) + +hunks :: Int -> [(Op, Tok)] -> [Hunk] +hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst) + where + go _ _ bi bj backlog [] = + if null backlog + then [] + else [((bi, bj), backlog)] + go i j bi bj backlog (g:gs) = + case fst (head g) of + Remove -> go (i + length g) j bi bj (backlog ++ g) gs + Add -> go i (j + length g) bi bj (backlog ++ g) gs + Keep -> + let lg = length g + in if lg <= + (if null backlog || null g + then ctxt + else 2 * ctxt) + then go (i + lg) (j + lg) bi bj (backlog ++ g) gs + else (if null backlog + then id + else (:) ((bi, bj), backlog ++ take ctxt g)) $ + go + (i + lg) + (j + lg) + (i + lg - ctxt) + (j + lg - ctxt) + (if null gs + then [] + else drop (lg - ctxt) g) + gs + +hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" + +lineSep = fromString "\n" + +pprHunk :: BS -> BS -> Hunk -> BS +pprHunk d1 d2 ((i, j), toks) = + B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks) + +pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2) diff --git a/src/MainDiff.hs b/src/MainDiff.hs index fc06227..080fb0f 100644 --- a/src/MainDiff.hs +++ b/src/MainDiff.hs @@ -1,23 +1,28 @@ module Main where import Data.ByteString.UTF8 (fromString) +import qualified Data.ByteString.Char8 as B8 import Options.Applicative import Redfa import Version +import Diff import qualified Data.Vector as V import System.IO.MMap data DiffOptions = DiffOptions { diffRedfaOpt :: RedfaOption + , context :: Int , diffFile1 :: String , diffFile2 :: String } deriving (Show) diffOpts = - DiffOptions <$> redfaOption <*> strArgument (metavar "FROMFILE") <*> - strArgument (metavar "TOFILE") + DiffOptions <$> redfaOption + <*> option auto (metavar "CONTEXT" <> short 'C' <> long "context" <> value 5) + <*> strArgument (metavar "FROMFILE") + <*> strArgument (metavar "TOFILE") main :: IO () main = @@ -34,4 +39,5 @@ main = data2 <- mmapFileByteString (diffFile2 o) Nothing toks1 <- V.fromList <$> redfaTokenize redfa data1 toks2 <- V.fromList <$> redfaTokenize redfa data2 - print $ toks1 + hs <- hunks (max 0 $ context o) <$> diffToks data1 data2 toks1 toks2 + B8.putStrLn $ pprHunks data1 data2 hs diff --git a/src/Redfa.hs b/src/Redfa.hs index 4679cf3..c2ee725 100644 --- a/src/Redfa.hs +++ b/src/Redfa.hs @@ -15,6 +15,7 @@ import Data.Monoid import qualified Data.Vector as V import Options.Applicative import Text.Regex.TDFA +import Diff type BS = B.ByteString @@ -132,7 +133,7 @@ redfaPrepareRules opt = do return $ RedfaRule rs rm b t return $ RedfaSpec start (V.fromList ids) (V.fromList rules) -redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [(Bool, (Int, Int))] +redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [Tok] redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 [] redfaTokenize' ::