This commit is contained in:
Mirek Kratochvil 2020-09-19 19:18:48 +02:00
parent 64b5ca7ea7
commit b58b58ec61

View file

@ -21,10 +21,11 @@ import qualified Data.Vector as V
import qualified Data.Vector.Unboxed.Mutable as M
type Tok = (Bool, (Int, Int))
type Diff= [(Op,Tok)]
type Hunk = ((Int, Int), [(Op, Tok)])
type BS = B.ByteString
type TV = V.Vector Tok
data Op
= Remove
@ -36,8 +37,8 @@ data DiffEnv =
DiffEnv
{ deD1 :: BS
, deD2 :: BS
, deT1 :: V.Vector Tok
, deT2 :: V.Vector Tok
, deT1 :: TV
, deT2 :: TV
, deS :: Int
, deE :: Int
, deL :: Int
@ -53,60 +54,74 @@ data DiffEnv =
substr b e = B.take (e - b) . B.drop b
toksMatch x y DiffEnv {deT1 = t1, deT2 = t2, deD1 = d1, deD2 = d2} =
toksMatch' x y d1 d2 t1 t2
toksMatch' x y d1 d2 t1 t2 =
let (isTok1, (b1, e1)) = t1 V.! x
(isTok2, (b2, e2)) = t2 V.! y
in isTok1 == isTok2 && substr b1 e1 d1 == substr b2 e2 d2
stripEqToks :: DiffEnv -> (Int, Int, Int)
stripEqToks de@DiffEnv {deT1 = t1, deT2 = t2} = (b, l1 - e, l2 - e)
stripEqToks :: BS -> BS -> TV -> TV -> (Diff, Diff, TV, TV)
stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2')
where
l1 = V.length t1
l2 = V.length t2
firstDiff i
| i < l1 && i < l2 && toksMatch i i de = firstDiff (i + 1)
| i < l1 && i < l2 && toksMatch' i i d1 d2 t1 t2 = firstDiff (i + 1)
| otherwise = i
b = firstDiff 0
lastDiff i
| l1 - i - 1 >= b &&
l2 - i - 1 >= b && toksMatch (l1 - i - 1) (l2 - i - 1) de =
l2 - i - 1 >= b && toksMatch' (l1 - i - 1) (l2 - i - 1) d1 d2 t1 t2 =
lastDiff (i + 1)
| otherwise = i
e = lastDiff 0
pre = map (\t -> (Keep,t)) . V.toList . V.take b $ t1
post = map (\t -> (Keep,t)) . V.toList . V.drop (l1-e) $ t1
t1' = V.slice b (l1-e-b) t1
t2' = V.slice b (l2-e-b) t2
diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> [(Op, Tok)]
diffToks d1 d2 t1 t2 =
diffToks' $
if V.length t1 >= V.length t2
then DiffEnv
{ deD1 = d1
, deD2 = d2
, deT1 = t1
, deT2 = t2
, deS = 0
, deE = V.length t1
, deL = V.length t1
, deW = V.length t2
, deA = 0
, deB = V.length t2
, deVS = V.fromList [0 .. V.length t2]
, deVE = V.fromList $ reverse [0 .. V.length t2]
, deTrans = False
}
else DiffEnv
{ deD1 = d2
, deD2 = d1
, deT1 = t2
, deT2 = t1
, deS = 0
, deE = V.length t2
, deL = V.length t2
, deW = V.length t1
, deA = 0
, deB = V.length t1
, deVS = V.fromList [0 .. V.length t1]
, deVE = V.fromList $ reverse [0 .. V.length t1]
, deTrans = True
}
diffToks :: BS -> BS -> TV -> TV -> Diff
diffToks d1 d2 t1' t2' = pre ++ res ++ post
where
(pre, post, t1, t2) = stripEqToks d1 d2 t1' t2'
res
| V.null t1 = map (\t -> (Add,t)) (V.toList t2)
| V.null t2 = map (\t -> (Remove,t)) (V.toList t1)
| V.length t1 >= V.length t2 =
diffToks' $
DiffEnv
{ deD1 = d1
, deD2 = d2
, deT1 = t1
, deT2 = t2
, deS = 0
, deE = V.length t1
, deL = V.length t1
, deW = V.length t2
, deA = 0
, deB = V.length t2
, deVS = V.fromList [0 .. V.length t2]
, deVE = V.fromList $ reverse [0 .. V.length t2]
, deTrans = False
}
| otherwise =
diffToks' $
DiffEnv
{ deD1 = d2
, deD2 = d1
, deT1 = t2
, deT2 = t1
, deS = 0
, deE = V.length t2
, deL = V.length t2
, deW = V.length t1
, deA = 0
, deB = V.length t1
, deVS = V.fromList [0 .. V.length t1]
, deVE = V.fromList $ reverse [0 .. V.length t1]
, deTrans = True
}
minIndexFwd =
V.minIndexBy
@ -124,7 +139,7 @@ minIndexRev =
else GT --picks the last minimum
)
diffToks' :: DiffEnv -> [(Op, Tok)]
diffToks' :: DiffEnv -> Diff
diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
diff
where
@ -263,50 +278,7 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
diffToks' de {deE = mid, deVE = vecEmid, deB = opt} ++
diffToks' de {deS = mid, deVS = vecSmid, deA = opt}
{-
diffToksXX :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)]
diffToksXX d1 d2 t1 t2 = do
let (b, e1, e2) = stripEqToks d1 d2 t1 t2
ms1 = e1 - b
ms2 = e2 - b
mtx <- M.new ((ms1 + 1) * (ms2 + 1))
let idx x y = (ms2 + 1) * x + y
forM_ [0 .. ms1] $ \i -> M.write mtx (idx i 0) i
forM_ [0 .. ms2] $ \i -> M.write mtx (idx 0 i) i
let toksMatch' x y = toksMatch d1 d2 t1 t2 (b + x) (b + y)
forM_ [1 .. ms1] $ \i ->
forM [1 .. ms2] $ \j -> do
up <- M.read mtx (idx i (j - 1))
left <- M.read mtx (idx (i - 1) j)
upleft <- M.read mtx (idx (i - 1) (j - 1))
M.write mtx (idx i j) $
minimum $
[up + 1, left + 1] ++
(if toksMatch' (i - 1) (j - 1)
then [upleft]
else [])
let doAdd i j = (:) (Add, t2 V.! (b + j - 1)) <$> backtrack i (j - 1)
doRem i j = (:) (Remove, t1 V.! (b + i - 1)) <$> backtrack (i - 1) j
doKeep i j = (:) (Keep, t1 V.! (b + i - 1)) <$> backtrack (i - 1) (j - 1)
backtrack :: Int -> Int -> IO [(Op, Tok)]
backtrack 0 0 = pure []
backtrack i 0 = doRem i 0
backtrack 0 j = doAdd 0 j
backtrack i j = do
add <- M.read mtx (idx i (j - 1))
rem <- M.read mtx (idx (i - 1) j)
keep <- M.read mtx (idx (i - 1) (j - 1))
if toksMatch' (i - 1) (j - 1) && keep <= min add rem
then doKeep i j
else if add <= rem
then doAdd i j
else doRem i j
diff <- reverse <$> backtrack ms1 ms2
return $
map ((,) Keep) (take b $ V.toList t1) ++
diff ++ map ((,) Keep) (drop e1 $ V.toList t1)
-}
pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS]
pprDiff :: BS -> BS -> Diff -> [BS]
pprDiff d1 d2 = map (pprDiff1 d1 d2)
pprDiff1 d1 d2 (op, (tok, (i, j))) =
@ -333,7 +305,7 @@ escNewlines' s
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)
hunks :: Int -> [(Op, Tok)] -> [Hunk]
hunks :: Int -> Diff -> [Hunk]
hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst)
where
go _ _ bi bj backlog [] =