implement diff
This commit is contained in:
parent
96ef2e732b
commit
7fc24d236c
|
@ -43,7 +43,7 @@ executable adiff
|
|||
main-is: MainDiff.hs
|
||||
|
||||
-- Modules included in this executable, other than Main.
|
||||
other-modules: Redfa, Version
|
||||
other-modules: Redfa, Version, Diff
|
||||
|
||||
-- LANGUAGE extensions used by modules in this package.
|
||||
other-extensions: CPP
|
||||
|
|
166
src/Diff.hs
Normal file
166
src/Diff.hs
Normal file
|
@ -0,0 +1,166 @@
|
|||
module Diff
|
||||
( Tok
|
||||
, Op
|
||||
, diffToks
|
||||
, hunks
|
||||
, pprDiff
|
||||
, pprDiff1
|
||||
, pprHunk
|
||||
, pprHunks
|
||||
) where
|
||||
|
||||
import Control.Monad
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Builder as BB
|
||||
import qualified Data.ByteString.Internal as BI
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import Data.ByteString.UTF8 (fromString)
|
||||
import Data.Function (on)
|
||||
import Data.List (groupBy)
|
||||
import qualified Data.Vector as V
|
||||
import qualified Data.Vector.Unboxed.Mutable as M
|
||||
|
||||
type Tok = (Bool, (Int, Int))
|
||||
|
||||
type Hunk = ((Int, Int), [(Op, Tok)])
|
||||
|
||||
type BS = B.ByteString
|
||||
|
||||
data Op
|
||||
= Remove
|
||||
| Keep
|
||||
| Add
|
||||
deriving (Show, Eq)
|
||||
|
||||
substr b e = B.take (e - b) . B.drop b
|
||||
|
||||
toksMatch d1 d2 t1 t2 x y =
|
||||
let (tok1, (b1, e1)) = t1 V.! x
|
||||
(tok2, (b2, e2)) = t2 V.! y
|
||||
in tok1 == tok2 && substr b1 e1 d1 == substr b2 e2 d2
|
||||
|
||||
stripEqToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> (Int, Int, Int)
|
||||
stripEqToks d1 d2 t1 t2 = (b, l1 - e, l2 - e)
|
||||
where
|
||||
l1 = V.length t1
|
||||
l2 = V.length t2
|
||||
firstDiff i
|
||||
| i < l1 && i < l2 && toksMatch d1 d2 t1 t2 i i = firstDiff (i + 1)
|
||||
| otherwise = i
|
||||
b = firstDiff 0
|
||||
lastDiff i
|
||||
| l1 - i - 1 >= b &&
|
||||
l2 - i - 1 >= b && toksMatch d1 d2 t1 t2 (l1 - i - 1) (l2 - i - 1) =
|
||||
lastDiff (i + 1)
|
||||
| otherwise = i
|
||||
e = lastDiff 0
|
||||
|
||||
diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)]
|
||||
diffToks d1 d2 t1 t2 = do
|
||||
let (b, e1, e2) = stripEqToks d1 d2 t1 t2
|
||||
ms1 = e1 - b
|
||||
ms2 = e2 - b
|
||||
mtx <- M.new ((ms1 + 1) * (ms2 + 1))
|
||||
let idx x y = (ms2 + 1) * x + y
|
||||
forM_ [0 .. ms1] $ \i -> M.write mtx (idx i 0) i
|
||||
forM_ [0 .. ms2] $ \i -> M.write mtx (idx 0 i) i
|
||||
let toksMatch' x y = toksMatch d1 d2 t1 t2 (b + x) (b + y)
|
||||
forM_ [1 .. ms1] $ \i ->
|
||||
forM [1 .. ms2] $ \j -> do
|
||||
up <- M.read mtx (idx i (j - 1))
|
||||
left <- M.read mtx (idx (i - 1) j)
|
||||
upleft <- M.read mtx (idx (i - 1) (j - 1))
|
||||
M.write mtx (idx i j) $
|
||||
minimum $
|
||||
[up + 1, left + 1] ++
|
||||
(if toksMatch' (i - 1) (j - 1)
|
||||
then [upleft]
|
||||
else [])
|
||||
let doAdd i j = (:) (Add, t2 V.! (b + j - 1)) <$> backtrack i (j - 1)
|
||||
doRem i j = (:) (Remove, t1 V.! (b + i - 1)) <$> backtrack (i - 1) j
|
||||
doKeep i j = (:) (Keep, t1 V.! (b + i - 1)) <$> backtrack (i - 1) (j - 1)
|
||||
backtrack :: Int -> Int -> IO [(Op, Tok)]
|
||||
backtrack 0 0 = pure []
|
||||
backtrack i 0 = doRem i 0
|
||||
backtrack 0 j = doAdd 0 j
|
||||
backtrack i j = do
|
||||
add <- M.read mtx (idx i (j - 1))
|
||||
rem <- M.read mtx (idx (i - 1) j)
|
||||
keep <- M.read mtx (idx (i - 1) (j - 1))
|
||||
if toksMatch' (i - 1) (j - 1) && keep <= min add rem
|
||||
then doKeep i j
|
||||
else if add <= rem
|
||||
then doAdd i j
|
||||
else doRem i j
|
||||
diff <- reverse <$> backtrack ms1 ms2
|
||||
return $
|
||||
map ((,) Keep) (take b $ V.toList t1) ++
|
||||
diff ++ map ((,) Keep) (drop e1 $ V.toList t1)
|
||||
|
||||
pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS]
|
||||
pprDiff d1 d2 = map (pprDiff1 d1 d2)
|
||||
|
||||
pprDiff1 d1 d2 (op, (tok, (i, j))) =
|
||||
fromString pfx <> escNewlines (substr i j s)
|
||||
where
|
||||
pfx =
|
||||
case (op, tok) of
|
||||
(Add, True) -> "+|"
|
||||
(Remove, True) -> "-|"
|
||||
(Keep, True) -> " |"
|
||||
(Add, False) -> "+."
|
||||
(Remove, False) -> "-."
|
||||
(Keep, False) -> " ."
|
||||
s =
|
||||
case op of
|
||||
Add -> d2
|
||||
_ -> d1
|
||||
|
||||
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
|
||||
|
||||
escNewlines' s
|
||||
| B.null s = mempty
|
||||
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
|
||||
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
|
||||
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)
|
||||
|
||||
hunks :: Int -> [(Op, Tok)] -> [Hunk]
|
||||
hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst)
|
||||
where
|
||||
go _ _ bi bj backlog [] =
|
||||
if null backlog
|
||||
then []
|
||||
else [((bi, bj), backlog)]
|
||||
go i j bi bj backlog (g:gs) =
|
||||
case fst (head g) of
|
||||
Remove -> go (i + length g) j bi bj (backlog ++ g) gs
|
||||
Add -> go i (j + length g) bi bj (backlog ++ g) gs
|
||||
Keep ->
|
||||
let lg = length g
|
||||
in if lg <=
|
||||
(if null backlog || null g
|
||||
then ctxt
|
||||
else 2 * ctxt)
|
||||
then go (i + lg) (j + lg) bi bj (backlog ++ g) gs
|
||||
else (if null backlog
|
||||
then id
|
||||
else (:) ((bi, bj), backlog ++ take ctxt g)) $
|
||||
go
|
||||
(i + lg)
|
||||
(j + lg)
|
||||
(i + lg - ctxt)
|
||||
(j + lg - ctxt)
|
||||
(if null gs
|
||||
then []
|
||||
else drop (lg - ctxt) g)
|
||||
gs
|
||||
|
||||
hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
|
||||
|
||||
lineSep = fromString "\n"
|
||||
|
||||
pprHunk :: BS -> BS -> Hunk -> BS
|
||||
pprHunk d1 d2 ((i, j), toks) =
|
||||
B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks)
|
||||
|
||||
pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2)
|
|
@ -1,23 +1,28 @@
|
|||
module Main where
|
||||
|
||||
import Data.ByteString.UTF8 (fromString)
|
||||
import qualified Data.ByteString.Char8 as B8
|
||||
import Options.Applicative
|
||||
import Redfa
|
||||
import Version
|
||||
import Diff
|
||||
import qualified Data.Vector as V
|
||||
import System.IO.MMap
|
||||
|
||||
data DiffOptions =
|
||||
DiffOptions
|
||||
{ diffRedfaOpt :: RedfaOption
|
||||
, context :: Int
|
||||
, diffFile1 :: String
|
||||
, diffFile2 :: String
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
diffOpts =
|
||||
DiffOptions <$> redfaOption <*> strArgument (metavar "FROMFILE") <*>
|
||||
strArgument (metavar "TOFILE")
|
||||
DiffOptions <$> redfaOption
|
||||
<*> option auto (metavar "CONTEXT" <> short 'C' <> long "context" <> value 5)
|
||||
<*> strArgument (metavar "FROMFILE")
|
||||
<*> strArgument (metavar "TOFILE")
|
||||
|
||||
main :: IO ()
|
||||
main =
|
||||
|
@ -34,4 +39,5 @@ main =
|
|||
data2 <- mmapFileByteString (diffFile2 o) Nothing
|
||||
toks1 <- V.fromList <$> redfaTokenize redfa data1
|
||||
toks2 <- V.fromList <$> redfaTokenize redfa data2
|
||||
print $ toks1
|
||||
hs <- hunks (max 0 $ context o) <$> diffToks data1 data2 toks1 toks2
|
||||
B8.putStrLn $ pprHunks data1 data2 hs
|
||||
|
|
|
@ -15,6 +15,7 @@ import Data.Monoid
|
|||
import qualified Data.Vector as V
|
||||
import Options.Applicative
|
||||
import Text.Regex.TDFA
|
||||
import Diff
|
||||
|
||||
type BS = B.ByteString
|
||||
|
||||
|
@ -132,7 +133,7 @@ redfaPrepareRules opt = do
|
|||
return $ RedfaRule rs rm b t
|
||||
return $ RedfaSpec start (V.fromList ids) (V.fromList rules)
|
||||
|
||||
redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [(Bool, (Int, Int))]
|
||||
redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [Tok]
|
||||
redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 []
|
||||
|
||||
redfaTokenize' ::
|
||||
|
|
Loading…
Reference in a new issue