implement diff

This commit is contained in:
Mirek Kratochvil 2020-08-13 16:05:47 +02:00
parent 96ef2e732b
commit 7fc24d236c
4 changed files with 178 additions and 5 deletions

View file

@ -43,7 +43,7 @@ executable adiff
main-is: MainDiff.hs main-is: MainDiff.hs
-- Modules included in this executable, other than Main. -- Modules included in this executable, other than Main.
other-modules: Redfa, Version other-modules: Redfa, Version, Diff
-- LANGUAGE extensions used by modules in this package. -- LANGUAGE extensions used by modules in this package.
other-extensions: CPP other-extensions: CPP

166
src/Diff.hs Normal file
View file

@ -0,0 +1,166 @@
module Diff
( Tok
, Op
, diffToks
, hunks
, pprDiff
, pprDiff1
, pprHunk
, pprHunks
) where
import Control.Monad
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL
import Data.ByteString.UTF8 (fromString)
import Data.Function (on)
import Data.List (groupBy)
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed.Mutable as M
type Tok = (Bool, (Int, Int))
type Hunk = ((Int, Int), [(Op, Tok)])
type BS = B.ByteString
data Op
= Remove
| Keep
| Add
deriving (Show, Eq)
substr b e = B.take (e - b) . B.drop b
toksMatch d1 d2 t1 t2 x y =
let (tok1, (b1, e1)) = t1 V.! x
(tok2, (b2, e2)) = t2 V.! y
in tok1 == tok2 && substr b1 e1 d1 == substr b2 e2 d2
stripEqToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> (Int, Int, Int)
stripEqToks d1 d2 t1 t2 = (b, l1 - e, l2 - e)
where
l1 = V.length t1
l2 = V.length t2
firstDiff i
| i < l1 && i < l2 && toksMatch d1 d2 t1 t2 i i = firstDiff (i + 1)
| otherwise = i
b = firstDiff 0
lastDiff i
| l1 - i - 1 >= b &&
l2 - i - 1 >= b && toksMatch d1 d2 t1 t2 (l1 - i - 1) (l2 - i - 1) =
lastDiff (i + 1)
| otherwise = i
e = lastDiff 0
diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)]
diffToks d1 d2 t1 t2 = do
let (b, e1, e2) = stripEqToks d1 d2 t1 t2
ms1 = e1 - b
ms2 = e2 - b
mtx <- M.new ((ms1 + 1) * (ms2 + 1))
let idx x y = (ms2 + 1) * x + y
forM_ [0 .. ms1] $ \i -> M.write mtx (idx i 0) i
forM_ [0 .. ms2] $ \i -> M.write mtx (idx 0 i) i
let toksMatch' x y = toksMatch d1 d2 t1 t2 (b + x) (b + y)
forM_ [1 .. ms1] $ \i ->
forM [1 .. ms2] $ \j -> do
up <- M.read mtx (idx i (j - 1))
left <- M.read mtx (idx (i - 1) j)
upleft <- M.read mtx (idx (i - 1) (j - 1))
M.write mtx (idx i j) $
minimum $
[up + 1, left + 1] ++
(if toksMatch' (i - 1) (j - 1)
then [upleft]
else [])
let doAdd i j = (:) (Add, t2 V.! (b + j - 1)) <$> backtrack i (j - 1)
doRem i j = (:) (Remove, t1 V.! (b + i - 1)) <$> backtrack (i - 1) j
doKeep i j = (:) (Keep, t1 V.! (b + i - 1)) <$> backtrack (i - 1) (j - 1)
backtrack :: Int -> Int -> IO [(Op, Tok)]
backtrack 0 0 = pure []
backtrack i 0 = doRem i 0
backtrack 0 j = doAdd 0 j
backtrack i j = do
add <- M.read mtx (idx i (j - 1))
rem <- M.read mtx (idx (i - 1) j)
keep <- M.read mtx (idx (i - 1) (j - 1))
if toksMatch' (i - 1) (j - 1) && keep <= min add rem
then doKeep i j
else if add <= rem
then doAdd i j
else doRem i j
diff <- reverse <$> backtrack ms1 ms2
return $
map ((,) Keep) (take b $ V.toList t1) ++
diff ++ map ((,) Keep) (drop e1 $ V.toList t1)
pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS]
pprDiff d1 d2 = map (pprDiff1 d1 d2)
pprDiff1 d1 d2 (op, (tok, (i, j))) =
fromString pfx <> escNewlines (substr i j s)
where
pfx =
case (op, tok) of
(Add, True) -> "+|"
(Remove, True) -> "-|"
(Keep, True) -> " |"
(Add, False) -> "+."
(Remove, False) -> "-."
(Keep, False) -> " ."
s =
case op of
Add -> d2
_ -> d1
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
escNewlines' s
| B.null s = mempty
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)
hunks :: Int -> [(Op, Tok)] -> [Hunk]
hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst)
where
go _ _ bi bj backlog [] =
if null backlog
then []
else [((bi, bj), backlog)]
go i j bi bj backlog (g:gs) =
case fst (head g) of
Remove -> go (i + length g) j bi bj (backlog ++ g) gs
Add -> go i (j + length g) bi bj (backlog ++ g) gs
Keep ->
let lg = length g
in if lg <=
(if null backlog || null g
then ctxt
else 2 * ctxt)
then go (i + lg) (j + lg) bi bj (backlog ++ g) gs
else (if null backlog
then id
else (:) ((bi, bj), backlog ++ take ctxt g)) $
go
(i + lg)
(j + lg)
(i + lg - ctxt)
(j + lg - ctxt)
(if null gs
then []
else drop (lg - ctxt) g)
gs
hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
lineSep = fromString "\n"
pprHunk :: BS -> BS -> Hunk -> BS
pprHunk d1 d2 ((i, j), toks) =
B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks)
pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2)

View file

@ -1,23 +1,28 @@
module Main where module Main where
import Data.ByteString.UTF8 (fromString) import Data.ByteString.UTF8 (fromString)
import qualified Data.ByteString.Char8 as B8
import Options.Applicative import Options.Applicative
import Redfa import Redfa
import Version import Version
import Diff
import qualified Data.Vector as V import qualified Data.Vector as V
import System.IO.MMap import System.IO.MMap
data DiffOptions = data DiffOptions =
DiffOptions DiffOptions
{ diffRedfaOpt :: RedfaOption { diffRedfaOpt :: RedfaOption
, context :: Int
, diffFile1 :: String , diffFile1 :: String
, diffFile2 :: String , diffFile2 :: String
} }
deriving (Show) deriving (Show)
diffOpts = diffOpts =
DiffOptions <$> redfaOption <*> strArgument (metavar "FROMFILE") <*> DiffOptions <$> redfaOption
strArgument (metavar "TOFILE") <*> option auto (metavar "CONTEXT" <> short 'C' <> long "context" <> value 5)
<*> strArgument (metavar "FROMFILE")
<*> strArgument (metavar "TOFILE")
main :: IO () main :: IO ()
main = main =
@ -34,4 +39,5 @@ main =
data2 <- mmapFileByteString (diffFile2 o) Nothing data2 <- mmapFileByteString (diffFile2 o) Nothing
toks1 <- V.fromList <$> redfaTokenize redfa data1 toks1 <- V.fromList <$> redfaTokenize redfa data1
toks2 <- V.fromList <$> redfaTokenize redfa data2 toks2 <- V.fromList <$> redfaTokenize redfa data2
print $ toks1 hs <- hunks (max 0 $ context o) <$> diffToks data1 data2 toks1 toks2
B8.putStrLn $ pprHunks data1 data2 hs

View file

@ -15,6 +15,7 @@ import Data.Monoid
import qualified Data.Vector as V import qualified Data.Vector as V
import Options.Applicative import Options.Applicative
import Text.Regex.TDFA import Text.Regex.TDFA
import Diff
type BS = B.ByteString type BS = B.ByteString
@ -132,7 +133,7 @@ redfaPrepareRules opt = do
return $ RedfaRule rs rm b t return $ RedfaRule rs rm b t
return $ RedfaSpec start (V.fromList ids) (V.fromList rules) return $ RedfaSpec start (V.fromList ids) (V.fromList rules)
redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [(Bool, (Int, Int))] redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [Tok]
redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 [] redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 []
redfaTokenize' :: redfaTokenize' ::