implement diff
This commit is contained in:
parent
96ef2e732b
commit
7fc24d236c
|
@ -43,7 +43,7 @@ executable adiff
|
||||||
main-is: MainDiff.hs
|
main-is: MainDiff.hs
|
||||||
|
|
||||||
-- Modules included in this executable, other than Main.
|
-- Modules included in this executable, other than Main.
|
||||||
other-modules: Redfa, Version
|
other-modules: Redfa, Version, Diff
|
||||||
|
|
||||||
-- LANGUAGE extensions used by modules in this package.
|
-- LANGUAGE extensions used by modules in this package.
|
||||||
other-extensions: CPP
|
other-extensions: CPP
|
||||||
|
|
166
src/Diff.hs
Normal file
166
src/Diff.hs
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
module Diff
|
||||||
|
( Tok
|
||||||
|
, Op
|
||||||
|
, diffToks
|
||||||
|
, hunks
|
||||||
|
, pprDiff
|
||||||
|
, pprDiff1
|
||||||
|
, pprHunk
|
||||||
|
, pprHunks
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Control.Monad
|
||||||
|
import qualified Data.ByteString as B
|
||||||
|
import qualified Data.ByteString.Builder as BB
|
||||||
|
import qualified Data.ByteString.Internal as BI
|
||||||
|
import qualified Data.ByteString.Lazy as BL
|
||||||
|
import Data.ByteString.UTF8 (fromString)
|
||||||
|
import Data.Function (on)
|
||||||
|
import Data.List (groupBy)
|
||||||
|
import qualified Data.Vector as V
|
||||||
|
import qualified Data.Vector.Unboxed.Mutable as M
|
||||||
|
|
||||||
|
type Tok = (Bool, (Int, Int))
|
||||||
|
|
||||||
|
type Hunk = ((Int, Int), [(Op, Tok)])
|
||||||
|
|
||||||
|
type BS = B.ByteString
|
||||||
|
|
||||||
|
data Op
|
||||||
|
= Remove
|
||||||
|
| Keep
|
||||||
|
| Add
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
substr b e = B.take (e - b) . B.drop b
|
||||||
|
|
||||||
|
toksMatch d1 d2 t1 t2 x y =
|
||||||
|
let (tok1, (b1, e1)) = t1 V.! x
|
||||||
|
(tok2, (b2, e2)) = t2 V.! y
|
||||||
|
in tok1 == tok2 && substr b1 e1 d1 == substr b2 e2 d2
|
||||||
|
|
||||||
|
stripEqToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> (Int, Int, Int)
|
||||||
|
stripEqToks d1 d2 t1 t2 = (b, l1 - e, l2 - e)
|
||||||
|
where
|
||||||
|
l1 = V.length t1
|
||||||
|
l2 = V.length t2
|
||||||
|
firstDiff i
|
||||||
|
| i < l1 && i < l2 && toksMatch d1 d2 t1 t2 i i = firstDiff (i + 1)
|
||||||
|
| otherwise = i
|
||||||
|
b = firstDiff 0
|
||||||
|
lastDiff i
|
||||||
|
| l1 - i - 1 >= b &&
|
||||||
|
l2 - i - 1 >= b && toksMatch d1 d2 t1 t2 (l1 - i - 1) (l2 - i - 1) =
|
||||||
|
lastDiff (i + 1)
|
||||||
|
| otherwise = i
|
||||||
|
e = lastDiff 0
|
||||||
|
|
||||||
|
diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)]
|
||||||
|
diffToks d1 d2 t1 t2 = do
|
||||||
|
let (b, e1, e2) = stripEqToks d1 d2 t1 t2
|
||||||
|
ms1 = e1 - b
|
||||||
|
ms2 = e2 - b
|
||||||
|
mtx <- M.new ((ms1 + 1) * (ms2 + 1))
|
||||||
|
let idx x y = (ms2 + 1) * x + y
|
||||||
|
forM_ [0 .. ms1] $ \i -> M.write mtx (idx i 0) i
|
||||||
|
forM_ [0 .. ms2] $ \i -> M.write mtx (idx 0 i) i
|
||||||
|
let toksMatch' x y = toksMatch d1 d2 t1 t2 (b + x) (b + y)
|
||||||
|
forM_ [1 .. ms1] $ \i ->
|
||||||
|
forM [1 .. ms2] $ \j -> do
|
||||||
|
up <- M.read mtx (idx i (j - 1))
|
||||||
|
left <- M.read mtx (idx (i - 1) j)
|
||||||
|
upleft <- M.read mtx (idx (i - 1) (j - 1))
|
||||||
|
M.write mtx (idx i j) $
|
||||||
|
minimum $
|
||||||
|
[up + 1, left + 1] ++
|
||||||
|
(if toksMatch' (i - 1) (j - 1)
|
||||||
|
then [upleft]
|
||||||
|
else [])
|
||||||
|
let doAdd i j = (:) (Add, t2 V.! (b + j - 1)) <$> backtrack i (j - 1)
|
||||||
|
doRem i j = (:) (Remove, t1 V.! (b + i - 1)) <$> backtrack (i - 1) j
|
||||||
|
doKeep i j = (:) (Keep, t1 V.! (b + i - 1)) <$> backtrack (i - 1) (j - 1)
|
||||||
|
backtrack :: Int -> Int -> IO [(Op, Tok)]
|
||||||
|
backtrack 0 0 = pure []
|
||||||
|
backtrack i 0 = doRem i 0
|
||||||
|
backtrack 0 j = doAdd 0 j
|
||||||
|
backtrack i j = do
|
||||||
|
add <- M.read mtx (idx i (j - 1))
|
||||||
|
rem <- M.read mtx (idx (i - 1) j)
|
||||||
|
keep <- M.read mtx (idx (i - 1) (j - 1))
|
||||||
|
if toksMatch' (i - 1) (j - 1) && keep <= min add rem
|
||||||
|
then doKeep i j
|
||||||
|
else if add <= rem
|
||||||
|
then doAdd i j
|
||||||
|
else doRem i j
|
||||||
|
diff <- reverse <$> backtrack ms1 ms2
|
||||||
|
return $
|
||||||
|
map ((,) Keep) (take b $ V.toList t1) ++
|
||||||
|
diff ++ map ((,) Keep) (drop e1 $ V.toList t1)
|
||||||
|
|
||||||
|
pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS]
|
||||||
|
pprDiff d1 d2 = map (pprDiff1 d1 d2)
|
||||||
|
|
||||||
|
pprDiff1 d1 d2 (op, (tok, (i, j))) =
|
||||||
|
fromString pfx <> escNewlines (substr i j s)
|
||||||
|
where
|
||||||
|
pfx =
|
||||||
|
case (op, tok) of
|
||||||
|
(Add, True) -> "+|"
|
||||||
|
(Remove, True) -> "-|"
|
||||||
|
(Keep, True) -> " |"
|
||||||
|
(Add, False) -> "+."
|
||||||
|
(Remove, False) -> "-."
|
||||||
|
(Keep, False) -> " ."
|
||||||
|
s =
|
||||||
|
case op of
|
||||||
|
Add -> d2
|
||||||
|
_ -> d1
|
||||||
|
|
||||||
|
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
|
||||||
|
|
||||||
|
escNewlines' s
|
||||||
|
| B.null s = mempty
|
||||||
|
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
|
||||||
|
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
|
||||||
|
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)
|
||||||
|
|
||||||
|
hunks :: Int -> [(Op, Tok)] -> [Hunk]
|
||||||
|
hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst)
|
||||||
|
where
|
||||||
|
go _ _ bi bj backlog [] =
|
||||||
|
if null backlog
|
||||||
|
then []
|
||||||
|
else [((bi, bj), backlog)]
|
||||||
|
go i j bi bj backlog (g:gs) =
|
||||||
|
case fst (head g) of
|
||||||
|
Remove -> go (i + length g) j bi bj (backlog ++ g) gs
|
||||||
|
Add -> go i (j + length g) bi bj (backlog ++ g) gs
|
||||||
|
Keep ->
|
||||||
|
let lg = length g
|
||||||
|
in if lg <=
|
||||||
|
(if null backlog || null g
|
||||||
|
then ctxt
|
||||||
|
else 2 * ctxt)
|
||||||
|
then go (i + lg) (j + lg) bi bj (backlog ++ g) gs
|
||||||
|
else (if null backlog
|
||||||
|
then id
|
||||||
|
else (:) ((bi, bj), backlog ++ take ctxt g)) $
|
||||||
|
go
|
||||||
|
(i + lg)
|
||||||
|
(j + lg)
|
||||||
|
(i + lg - ctxt)
|
||||||
|
(j + lg - ctxt)
|
||||||
|
(if null gs
|
||||||
|
then []
|
||||||
|
else drop (lg - ctxt) g)
|
||||||
|
gs
|
||||||
|
|
||||||
|
hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
|
||||||
|
|
||||||
|
lineSep = fromString "\n"
|
||||||
|
|
||||||
|
pprHunk :: BS -> BS -> Hunk -> BS
|
||||||
|
pprHunk d1 d2 ((i, j), toks) =
|
||||||
|
B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks)
|
||||||
|
|
||||||
|
pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2)
|
|
@ -1,23 +1,28 @@
|
||||||
module Main where
|
module Main where
|
||||||
|
|
||||||
import Data.ByteString.UTF8 (fromString)
|
import Data.ByteString.UTF8 (fromString)
|
||||||
|
import qualified Data.ByteString.Char8 as B8
|
||||||
import Options.Applicative
|
import Options.Applicative
|
||||||
import Redfa
|
import Redfa
|
||||||
import Version
|
import Version
|
||||||
|
import Diff
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
import System.IO.MMap
|
import System.IO.MMap
|
||||||
|
|
||||||
data DiffOptions =
|
data DiffOptions =
|
||||||
DiffOptions
|
DiffOptions
|
||||||
{ diffRedfaOpt :: RedfaOption
|
{ diffRedfaOpt :: RedfaOption
|
||||||
|
, context :: Int
|
||||||
, diffFile1 :: String
|
, diffFile1 :: String
|
||||||
, diffFile2 :: String
|
, diffFile2 :: String
|
||||||
}
|
}
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
diffOpts =
|
diffOpts =
|
||||||
DiffOptions <$> redfaOption <*> strArgument (metavar "FROMFILE") <*>
|
DiffOptions <$> redfaOption
|
||||||
strArgument (metavar "TOFILE")
|
<*> option auto (metavar "CONTEXT" <> short 'C' <> long "context" <> value 5)
|
||||||
|
<*> strArgument (metavar "FROMFILE")
|
||||||
|
<*> strArgument (metavar "TOFILE")
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main =
|
main =
|
||||||
|
@ -34,4 +39,5 @@ main =
|
||||||
data2 <- mmapFileByteString (diffFile2 o) Nothing
|
data2 <- mmapFileByteString (diffFile2 o) Nothing
|
||||||
toks1 <- V.fromList <$> redfaTokenize redfa data1
|
toks1 <- V.fromList <$> redfaTokenize redfa data1
|
||||||
toks2 <- V.fromList <$> redfaTokenize redfa data2
|
toks2 <- V.fromList <$> redfaTokenize redfa data2
|
||||||
print $ toks1
|
hs <- hunks (max 0 $ context o) <$> diffToks data1 data2 toks1 toks2
|
||||||
|
B8.putStrLn $ pprHunks data1 data2 hs
|
||||||
|
|
|
@ -15,6 +15,7 @@ import Data.Monoid
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
import Options.Applicative
|
import Options.Applicative
|
||||||
import Text.Regex.TDFA
|
import Text.Regex.TDFA
|
||||||
|
import Diff
|
||||||
|
|
||||||
type BS = B.ByteString
|
type BS = B.ByteString
|
||||||
|
|
||||||
|
@ -132,7 +133,7 @@ redfaPrepareRules opt = do
|
||||||
return $ RedfaRule rs rm b t
|
return $ RedfaRule rs rm b t
|
||||||
return $ RedfaSpec start (V.fromList ids) (V.fromList rules)
|
return $ RedfaSpec start (V.fromList ids) (V.fromList rules)
|
||||||
|
|
||||||
redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [(Bool, (Int, Int))]
|
redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [Tok]
|
||||||
redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 []
|
redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 []
|
||||||
|
|
||||||
redfaTokenize' ::
|
redfaTokenize' ::
|
||||||
|
|
Loading…
Reference in a new issue