implement diff

This commit is contained in:
Mirek Kratochvil 2020-08-13 16:05:47 +02:00
parent 96ef2e732b
commit 7fc24d236c
4 changed files with 178 additions and 5 deletions

View file

@ -43,7 +43,7 @@ executable adiff
main-is: MainDiff.hs
-- Modules included in this executable, other than Main.
other-modules: Redfa, Version
other-modules: Redfa, Version, Diff
-- LANGUAGE extensions used by modules in this package.
other-extensions: CPP

166
src/Diff.hs Normal file
View file

@ -0,0 +1,166 @@
module Diff
( Tok
, Op
, diffToks
, hunks
, pprDiff
, pprDiff1
, pprHunk
, pprHunks
) where
import Control.Monad
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL
import Data.ByteString.UTF8 (fromString)
import Data.Function (on)
import Data.List (groupBy)
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed.Mutable as M
type Tok = (Bool, (Int, Int))
type Hunk = ((Int, Int), [(Op, Tok)])
type BS = B.ByteString
data Op
= Remove
| Keep
| Add
deriving (Show, Eq)
substr b e = B.take (e - b) . B.drop b
toksMatch d1 d2 t1 t2 x y =
let (tok1, (b1, e1)) = t1 V.! x
(tok2, (b2, e2)) = t2 V.! y
in tok1 == tok2 && substr b1 e1 d1 == substr b2 e2 d2
stripEqToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> (Int, Int, Int)
stripEqToks d1 d2 t1 t2 = (b, l1 - e, l2 - e)
where
l1 = V.length t1
l2 = V.length t2
firstDiff i
| i < l1 && i < l2 && toksMatch d1 d2 t1 t2 i i = firstDiff (i + 1)
| otherwise = i
b = firstDiff 0
lastDiff i
| l1 - i - 1 >= b &&
l2 - i - 1 >= b && toksMatch d1 d2 t1 t2 (l1 - i - 1) (l2 - i - 1) =
lastDiff (i + 1)
| otherwise = i
e = lastDiff 0
diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)]
diffToks d1 d2 t1 t2 = do
let (b, e1, e2) = stripEqToks d1 d2 t1 t2
ms1 = e1 - b
ms2 = e2 - b
mtx <- M.new ((ms1 + 1) * (ms2 + 1))
let idx x y = (ms2 + 1) * x + y
forM_ [0 .. ms1] $ \i -> M.write mtx (idx i 0) i
forM_ [0 .. ms2] $ \i -> M.write mtx (idx 0 i) i
let toksMatch' x y = toksMatch d1 d2 t1 t2 (b + x) (b + y)
forM_ [1 .. ms1] $ \i ->
forM [1 .. ms2] $ \j -> do
up <- M.read mtx (idx i (j - 1))
left <- M.read mtx (idx (i - 1) j)
upleft <- M.read mtx (idx (i - 1) (j - 1))
M.write mtx (idx i j) $
minimum $
[up + 1, left + 1] ++
(if toksMatch' (i - 1) (j - 1)
then [upleft]
else [])
let doAdd i j = (:) (Add, t2 V.! (b + j - 1)) <$> backtrack i (j - 1)
doRem i j = (:) (Remove, t1 V.! (b + i - 1)) <$> backtrack (i - 1) j
doKeep i j = (:) (Keep, t1 V.! (b + i - 1)) <$> backtrack (i - 1) (j - 1)
backtrack :: Int -> Int -> IO [(Op, Tok)]
backtrack 0 0 = pure []
backtrack i 0 = doRem i 0
backtrack 0 j = doAdd 0 j
backtrack i j = do
add <- M.read mtx (idx i (j - 1))
rem <- M.read mtx (idx (i - 1) j)
keep <- M.read mtx (idx (i - 1) (j - 1))
if toksMatch' (i - 1) (j - 1) && keep <= min add rem
then doKeep i j
else if add <= rem
then doAdd i j
else doRem i j
diff <- reverse <$> backtrack ms1 ms2
return $
map ((,) Keep) (take b $ V.toList t1) ++
diff ++ map ((,) Keep) (drop e1 $ V.toList t1)
pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS]
pprDiff d1 d2 = map (pprDiff1 d1 d2)
pprDiff1 d1 d2 (op, (tok, (i, j))) =
fromString pfx <> escNewlines (substr i j s)
where
pfx =
case (op, tok) of
(Add, True) -> "+|"
(Remove, True) -> "-|"
(Keep, True) -> " |"
(Add, False) -> "+."
(Remove, False) -> "-."
(Keep, False) -> " ."
s =
case op of
Add -> d2
_ -> d1
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
escNewlines' s
| B.null s = mempty
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)
hunks :: Int -> [(Op, Tok)] -> [Hunk]
hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst)
where
go _ _ bi bj backlog [] =
if null backlog
then []
else [((bi, bj), backlog)]
go i j bi bj backlog (g:gs) =
case fst (head g) of
Remove -> go (i + length g) j bi bj (backlog ++ g) gs
Add -> go i (j + length g) bi bj (backlog ++ g) gs
Keep ->
let lg = length g
in if lg <=
(if null backlog || null g
then ctxt
else 2 * ctxt)
then go (i + lg) (j + lg) bi bj (backlog ++ g) gs
else (if null backlog
then id
else (:) ((bi, bj), backlog ++ take ctxt g)) $
go
(i + lg)
(j + lg)
(i + lg - ctxt)
(j + lg - ctxt)
(if null gs
then []
else drop (lg - ctxt) g)
gs
hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
lineSep = fromString "\n"
pprHunk :: BS -> BS -> Hunk -> BS
pprHunk d1 d2 ((i, j), toks) =
B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks)
pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2)

View file

@ -1,23 +1,28 @@
module Main where
import Data.ByteString.UTF8 (fromString)
import qualified Data.ByteString.Char8 as B8
import Options.Applicative
import Redfa
import Version
import Diff
import qualified Data.Vector as V
import System.IO.MMap
data DiffOptions =
DiffOptions
{ diffRedfaOpt :: RedfaOption
, context :: Int
, diffFile1 :: String
, diffFile2 :: String
}
deriving (Show)
diffOpts =
DiffOptions <$> redfaOption <*> strArgument (metavar "FROMFILE") <*>
strArgument (metavar "TOFILE")
DiffOptions <$> redfaOption
<*> option auto (metavar "CONTEXT" <> short 'C' <> long "context" <> value 5)
<*> strArgument (metavar "FROMFILE")
<*> strArgument (metavar "TOFILE")
main :: IO ()
main =
@ -34,4 +39,5 @@ main =
data2 <- mmapFileByteString (diffFile2 o) Nothing
toks1 <- V.fromList <$> redfaTokenize redfa data1
toks2 <- V.fromList <$> redfaTokenize redfa data2
print $ toks1
hs <- hunks (max 0 $ context o) <$> diffToks data1 data2 toks1 toks2
B8.putStrLn $ pprHunks data1 data2 hs

View file

@ -15,6 +15,7 @@ import Data.Monoid
import qualified Data.Vector as V
import Options.Applicative
import Text.Regex.TDFA
import Diff
type BS = B.ByteString
@ -132,7 +133,7 @@ redfaPrepareRules opt = do
return $ RedfaRule rs rm b t
return $ RedfaSpec start (V.fromList ids) (V.fromList rules)
redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [(Bool, (Int, Int))]
redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [Tok]
redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 []
redfaTokenize' ::