diff --git a/src/Diff.hs b/src/Diff.hs index 78db394..a332783 100644 --- a/src/Diff.hs +++ b/src/Diff.hs @@ -21,10 +21,11 @@ import qualified Data.Vector as V import qualified Data.Vector.Unboxed.Mutable as M type Tok = (Bool, (Int, Int)) - +type Diff= [(Op,Tok)] type Hunk = ((Int, Int), [(Op, Tok)]) type BS = B.ByteString +type TV = V.Vector Tok data Op = Remove @@ -36,8 +37,8 @@ data DiffEnv = DiffEnv { deD1 :: BS , deD2 :: BS - , deT1 :: V.Vector Tok - , deT2 :: V.Vector Tok + , deT1 :: TV + , deT2 :: TV , deS :: Int , deE :: Int , deL :: Int @@ -53,60 +54,74 @@ data DiffEnv = substr b e = B.take (e - b) . B.drop b toksMatch x y DiffEnv {deT1 = t1, deT2 = t2, deD1 = d1, deD2 = d2} = + toksMatch' x y d1 d2 t1 t2 + +toksMatch' x y d1 d2 t1 t2 = let (isTok1, (b1, e1)) = t1 V.! x (isTok2, (b2, e2)) = t2 V.! y in isTok1 == isTok2 && substr b1 e1 d1 == substr b2 e2 d2 -stripEqToks :: DiffEnv -> (Int, Int, Int) -stripEqToks de@DiffEnv {deT1 = t1, deT2 = t2} = (b, l1 - e, l2 - e) +stripEqToks :: BS -> BS -> TV -> TV -> (Diff, Diff, TV, TV) +stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2') where l1 = V.length t1 l2 = V.length t2 firstDiff i - | i < l1 && i < l2 && toksMatch i i de = firstDiff (i + 1) + | i < l1 && i < l2 && toksMatch' i i d1 d2 t1 t2 = firstDiff (i + 1) | otherwise = i b = firstDiff 0 lastDiff i | l1 - i - 1 >= b && - l2 - i - 1 >= b && toksMatch (l1 - i - 1) (l2 - i - 1) de = + l2 - i - 1 >= b && toksMatch' (l1 - i - 1) (l2 - i - 1) d1 d2 t1 t2 = lastDiff (i + 1) | otherwise = i e = lastDiff 0 + pre = map (\t -> (Keep,t)) . V.toList . V.take b $ t1 + post = map (\t -> (Keep,t)) . V.toList . V.drop (l1-e) $ t1 + t1' = V.slice b (l1-e-b) t1 + t2' = V.slice b (l2-e-b) t2 -diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> [(Op, Tok)] -diffToks d1 d2 t1 t2 = - diffToks' $ - if V.length t1 >= V.length t2 - then DiffEnv - { deD1 = d1 - , deD2 = d2 - , deT1 = t1 - , deT2 = t2 - , deS = 0 - , deE = V.length t1 - , deL = V.length t1 - , deW = V.length t2 - , deA = 0 - , deB = V.length t2 - , deVS = V.fromList [0 .. V.length t2] - , deVE = V.fromList $ reverse [0 .. V.length t2] - , deTrans = False - } - else DiffEnv - { deD1 = d2 - , deD2 = d1 - , deT1 = t2 - , deT2 = t1 - , deS = 0 - , deE = V.length t2 - , deL = V.length t2 - , deW = V.length t1 - , deA = 0 - , deB = V.length t1 - , deVS = V.fromList [0 .. V.length t1] - , deVE = V.fromList $ reverse [0 .. V.length t1] - , deTrans = True - } +diffToks :: BS -> BS -> TV -> TV -> Diff +diffToks d1 d2 t1' t2' = pre ++ res ++ post + where + (pre, post, t1, t2) = stripEqToks d1 d2 t1' t2' + res + | V.null t1 = map (\t -> (Add,t)) (V.toList t2) + | V.null t2 = map (\t -> (Remove,t)) (V.toList t1) + | V.length t1 >= V.length t2 = + diffToks' $ + DiffEnv + { deD1 = d1 + , deD2 = d2 + , deT1 = t1 + , deT2 = t2 + , deS = 0 + , deE = V.length t1 + , deL = V.length t1 + , deW = V.length t2 + , deA = 0 + , deB = V.length t2 + , deVS = V.fromList [0 .. V.length t2] + , deVE = V.fromList $ reverse [0 .. V.length t2] + , deTrans = False + } + | otherwise = + diffToks' $ + DiffEnv + { deD1 = d2 + , deD2 = d1 + , deT1 = t2 + , deT2 = t1 + , deS = 0 + , deE = V.length t2 + , deL = V.length t2 + , deW = V.length t1 + , deA = 0 + , deB = V.length t1 + , deVS = V.fromList [0 .. V.length t1] + , deVE = V.fromList $ reverse [0 .. V.length t1] + , deTrans = True + } minIndexFwd = V.minIndexBy @@ -124,7 +139,7 @@ minIndexRev = else GT --picks the last minimum ) -diffToks' :: DiffEnv -> [(Op, Tok)] +diffToks' :: DiffEnv -> Diff diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = diff where @@ -263,50 +278,7 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = diffToks' de {deE = mid, deVE = vecEmid, deB = opt} ++ diffToks' de {deS = mid, deVS = vecSmid, deA = opt} -{- -diffToksXX :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)] -diffToksXX d1 d2 t1 t2 = do - let (b, e1, e2) = stripEqToks d1 d2 t1 t2 - ms1 = e1 - b - ms2 = e2 - b - mtx <- M.new ((ms1 + 1) * (ms2 + 1)) - let idx x y = (ms2 + 1) * x + y - forM_ [0 .. ms1] $ \i -> M.write mtx (idx i 0) i - forM_ [0 .. ms2] $ \i -> M.write mtx (idx 0 i) i - let toksMatch' x y = toksMatch d1 d2 t1 t2 (b + x) (b + y) - forM_ [1 .. ms1] $ \i -> - forM [1 .. ms2] $ \j -> do - up <- M.read mtx (idx i (j - 1)) - left <- M.read mtx (idx (i - 1) j) - upleft <- M.read mtx (idx (i - 1) (j - 1)) - M.write mtx (idx i j) $ - minimum $ - [up + 1, left + 1] ++ - (if toksMatch' (i - 1) (j - 1) - then [upleft] - else []) - let doAdd i j = (:) (Add, t2 V.! (b + j - 1)) <$> backtrack i (j - 1) - doRem i j = (:) (Remove, t1 V.! (b + i - 1)) <$> backtrack (i - 1) j - doKeep i j = (:) (Keep, t1 V.! (b + i - 1)) <$> backtrack (i - 1) (j - 1) - backtrack :: Int -> Int -> IO [(Op, Tok)] - backtrack 0 0 = pure [] - backtrack i 0 = doRem i 0 - backtrack 0 j = doAdd 0 j - backtrack i j = do - add <- M.read mtx (idx i (j - 1)) - rem <- M.read mtx (idx (i - 1) j) - keep <- M.read mtx (idx (i - 1) (j - 1)) - if toksMatch' (i - 1) (j - 1) && keep <= min add rem - then doKeep i j - else if add <= rem - then doAdd i j - else doRem i j - diff <- reverse <$> backtrack ms1 ms2 - return $ - map ((,) Keep) (take b $ V.toList t1) ++ - diff ++ map ((,) Keep) (drop e1 $ V.toList t1) --} -pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS] +pprDiff :: BS -> BS -> Diff -> [BS] pprDiff d1 d2 = map (pprDiff1 d1 d2) pprDiff1 d1 d2 (op, (tok, (i, j))) = @@ -333,7 +305,7 @@ escNewlines' s | B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s) | otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s) -hunks :: Int -> [(Op, Tok)] -> [Hunk] +hunks :: Int -> Diff -> [Hunk] hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst) where go _ _ bi bj backlog [] =