From 4b5bac3541f532632ef56e28896f41c152acffdb Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sat, 19 Sep 2020 20:35:32 +0200 Subject: [PATCH] basic scores go! --- src/Diff.hs | 81 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/src/Diff.hs b/src/Diff.hs index a332783..e4344c1 100644 --- a/src/Diff.hs +++ b/src/Diff.hs @@ -21,10 +21,13 @@ import qualified Data.Vector as V import qualified Data.Vector.Unboxed.Mutable as M type Tok = (Bool, (Int, Int)) -type Diff= [(Op,Tok)] + +type Diff = [(Op, Tok)] + type Hunk = ((Int, Int), [(Op, Tok)]) type BS = B.ByteString + type TV = V.Vector Tok data Op @@ -45,8 +48,8 @@ data DiffEnv = , deW :: Int , deA :: Int , deB :: Int - , deVS :: V.Vector Int - , deVE :: V.Vector Int + , deVS :: V.Vector (Int, Int) + , deVE :: V.Vector (Int, Int) , deTrans :: Bool } deriving (Show) @@ -76,18 +79,18 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2') lastDiff (i + 1) | otherwise = i e = lastDiff 0 - pre = map (\t -> (Keep,t)) . V.toList . V.take b $ t1 - post = map (\t -> (Keep,t)) . V.toList . V.drop (l1-e) $ t1 - t1' = V.slice b (l1-e-b) t1 - t2' = V.slice b (l2-e-b) t2 + pre = map (\t -> (Keep, t)) . V.toList . V.take b $ t1 + post = map (\t -> (Keep, t)) . V.toList . V.drop (l1 - e) $ t1 + t1' = V.slice b (l1 - e - b) t1 + t2' = V.slice b (l2 - e - b) t2 diffToks :: BS -> BS -> TV -> TV -> Diff diffToks d1 d2 t1' t2' = pre ++ res ++ post where (pre, post, t1, t2) = stripEqToks d1 d2 t1' t2' res - | V.null t1 = map (\t -> (Add,t)) (V.toList t2) - | V.null t2 = map (\t -> (Remove,t)) (V.toList t1) + | V.null t1 = map (\t -> (Add, t)) (V.toList t2) + | V.null t2 = map (\t -> (Remove, t)) (V.toList t1) | V.length t1 >= V.length t2 = diffToks' $ DiffEnv @@ -101,8 +104,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post , deW = V.length t2 , deA = 0 , deB = V.length t2 - , deVS = V.fromList [0 .. V.length t2] - , deVE = V.fromList $ reverse [0 .. V.length t2] + , deVS = V.fromList $ zip [0 .. V.length t2] (repeat 0) + , deVE = V.fromList $ reverse $ zip [0 .. V.length t2] (repeat 0) , deTrans = False } | otherwise = @@ -118,8 +121,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post , deW = V.length t1 , deA = 0 , deB = V.length t1 - , deVS = V.fromList [0 .. V.length t1] - , deVE = V.fromList $ reverse [0 .. V.length t1] + , deVS = V.fromList $ zip [0 .. V.length t1] (repeat 0) + , deVE = V.fromList $ reverse $ zip [0 .. V.length t1] (repeat 0) , deTrans = True } @@ -146,23 +149,30 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = mid = (s + e) `div` 2 vecSmid = vecS mid vecEmid = vecE mid + extraScore i = + if isToken + then -(tokEnd - tokBegin) + else 0 + where + (isToken, (tokBegin, tokEnd)) = deT1 de V.! i vecS = vec -- "forward" operation where vec i | i == s = deVS de | i > s = V.fromList . upd i . vec $ pred i | otherwise = error "Internal bounds check failure" - upd i v = i : go 1 i + upd i v = (i, 0) : go 1 (i, 0) where - go j up + go j (iup, sup) | j > deW de = [] | otherwise = - let left = v V.! j - upleft = v V.! pred j + let (ileft, sleft) = v V.! j + (iupleft, supleft) = v V.! pred j keep - | toksMatch (pred i) (pred j) de = min upleft + | toksMatch (pred i) (pred j) de = + min (iupleft, supleft + extraScore (pred i)) | otherwise = id - res = keep $ min (succ up) (succ left) + res = keep $ min (succ iup, sup) (succ ileft, sleft) in res : go (succ j) res vecE = vec -- "backward" operation where @@ -170,18 +180,20 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | i == e = deVE de | i < e = V.fromList . reverse . upd i . vec $ succ i | otherwise = error "Internal bounds check failure" - upd i v = (deL de - i) : go (pred $ deW de) (deL de - i) + upd i v = (deL de - i, 0) : go (pred $ deW de) (deL de - i, 0) where - go j down + go j (idown, sdown) | j < 0 = [] | otherwise = - let right = v V.! j - downright = v V.! succ j + let (iright, sright) = v V.! j + (idownright, sdownright) = v V.! succ j keep - | toksMatch i j de = min downright + | toksMatch i j de = + min (idownright, sdownright + extraScore i) | otherwise = id - res = keep $ min (succ down) (succ right) + res = keep $ min (succ idown, sdown) (succ iright, sright) in res : go (pred j) res + scoreAdd (l1, x1) (l2, x2) = (l1 + l2, x1 + x2) {- Now, find the optimal point for splitting. - - Heuristics A: if 2 paths are completely same, prefer deletion first; @@ -193,7 +205,7 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = (if trans then minIndexRev else minIndexFwd) $ - V.zipWith (+) (slice vecSmid) (slice vecEmid) + V.zipWith scoreAdd (slice vecSmid) (slice vecEmid) where slice = V.slice a (succ $ b - a) diff @@ -207,8 +219,8 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = vecRE = deVE de vecLE = vecE s vecRS = vecS e - sumL = V.zipWith (+) vecLS vecLE - sumR = V.zipWith (+) vecRS vecRE + sumL = V.zipWith scoreAdd vecLS vecLE + sumR = V.zipWith scoreAdd vecRS vecRE {- This is getting a bit complicted. In the non-transposed case, we - want to select one Remove/Keep surrounded by 0-n Add ops, possibly - from both sides. The chosen path must belong to the best paths @@ -222,12 +234,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = sCost = vecLS V.! a eCost = vecRS V.! b doKeep - | eCost - sCost == succ (b - a) = False - | eCost - sCost == pred (b - a) = True + | fst eCost - fst sCost == succ (b - a) = False + | fst eCost - fst sCost == pred (b - a) = True | otherwise = error $ "Internal check failure -- costs seem broken: " <> - show [sCost, eCost, a, b] + show [sCost, eCost] <> show [a, b] jumpPos = (if trans {- Heuristic A applies here -} then last @@ -237,11 +249,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = [a .. if doKeep then pred b else b] - , vecLS V.! i == sCost - a + i + , fst (vecLS V.! i) == fst sCost - a + i , sumL V.! i == totalCost , if doKeep - then vecLS V.! i == vecRS V.! succ i - else succ (vecLS V.! i) == vecRS V.! i + then scoreAdd (vecLS V.! i) (0, extraScore s) == + vecRS V.! succ i + else scoreAdd (vecLS V.! i) (1, 0) == vecRS V.! i , if doKeep then sumR V.! succ i == totalCost else sumR V.! i == totalCost