basic scores go!

This commit is contained in:
Mirek Kratochvil 2020-09-19 20:35:32 +02:00
parent b58b58ec61
commit 4b5bac3541

View file

@ -21,10 +21,13 @@ import qualified Data.Vector as V
import qualified Data.Vector.Unboxed.Mutable as M
type Tok = (Bool, (Int, Int))
type Diff = [(Op, Tok)]
type Hunk = ((Int, Int), [(Op, Tok)])
type BS = B.ByteString
type TV = V.Vector Tok
data Op
@ -45,8 +48,8 @@ data DiffEnv =
, deW :: Int
, deA :: Int
, deB :: Int
, deVS :: V.Vector Int
, deVE :: V.Vector Int
, deVS :: V.Vector (Int, Int)
, deVE :: V.Vector (Int, Int)
, deTrans :: Bool
}
deriving (Show)
@ -101,8 +104,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post
, deW = V.length t2
, deA = 0
, deB = V.length t2
, deVS = V.fromList [0 .. V.length t2]
, deVE = V.fromList $ reverse [0 .. V.length t2]
, deVS = V.fromList $ zip [0 .. V.length t2] (repeat 0)
, deVE = V.fromList $ reverse $ zip [0 .. V.length t2] (repeat 0)
, deTrans = False
}
| otherwise =
@ -118,8 +121,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post
, deW = V.length t1
, deA = 0
, deB = V.length t1
, deVS = V.fromList [0 .. V.length t1]
, deVE = V.fromList $ reverse [0 .. V.length t1]
, deVS = V.fromList $ zip [0 .. V.length t1] (repeat 0)
, deVE = V.fromList $ reverse $ zip [0 .. V.length t1] (repeat 0)
, deTrans = True
}
@ -146,23 +149,30 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
mid = (s + e) `div` 2
vecSmid = vecS mid
vecEmid = vecE mid
extraScore i =
if isToken
then -(tokEnd - tokBegin)
else 0
where
(isToken, (tokBegin, tokEnd)) = deT1 de V.! i
vecS = vec -- "forward" operation
where
vec i
| i == s = deVS de
| i > s = V.fromList . upd i . vec $ pred i
| otherwise = error "Internal bounds check failure"
upd i v = i : go 1 i
upd i v = (i, 0) : go 1 (i, 0)
where
go j up
go j (iup, sup)
| j > deW de = []
| otherwise =
let left = v V.! j
upleft = v V.! pred j
let (ileft, sleft) = v V.! j
(iupleft, supleft) = v V.! pred j
keep
| toksMatch (pred i) (pred j) de = min upleft
| toksMatch (pred i) (pred j) de =
min (iupleft, supleft + extraScore (pred i))
| otherwise = id
res = keep $ min (succ up) (succ left)
res = keep $ min (succ iup, sup) (succ ileft, sleft)
in res : go (succ j) res
vecE = vec -- "backward" operation
where
@ -170,18 +180,20 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
| i == e = deVE de
| i < e = V.fromList . reverse . upd i . vec $ succ i
| otherwise = error "Internal bounds check failure"
upd i v = (deL de - i) : go (pred $ deW de) (deL de - i)
upd i v = (deL de - i, 0) : go (pred $ deW de) (deL de - i, 0)
where
go j down
go j (idown, sdown)
| j < 0 = []
| otherwise =
let right = v V.! j
downright = v V.! succ j
let (iright, sright) = v V.! j
(idownright, sdownright) = v V.! succ j
keep
| toksMatch i j de = min downright
| toksMatch i j de =
min (idownright, sdownright + extraScore i)
| otherwise = id
res = keep $ min (succ down) (succ right)
res = keep $ min (succ idown, sdown) (succ iright, sright)
in res : go (pred j) res
scoreAdd (l1, x1) (l2, x2) = (l1 + l2, x1 + x2)
{- Now, find the optimal point for splitting.
-
- Heuristics A: if 2 paths are completely same, prefer deletion first;
@ -193,7 +205,7 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
(if trans
then minIndexRev
else minIndexFwd) $
V.zipWith (+) (slice vecSmid) (slice vecEmid)
V.zipWith scoreAdd (slice vecSmid) (slice vecEmid)
where
slice = V.slice a (succ $ b - a)
diff
@ -207,8 +219,8 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
vecRE = deVE de
vecLE = vecE s
vecRS = vecS e
sumL = V.zipWith (+) vecLS vecLE
sumR = V.zipWith (+) vecRS vecRE
sumL = V.zipWith scoreAdd vecLS vecLE
sumR = V.zipWith scoreAdd vecRS vecRE
{- This is getting a bit complicted. In the non-transposed case, we
- want to select one Remove/Keep surrounded by 0-n Add ops, possibly
- from both sides. The chosen path must belong to the best paths
@ -222,12 +234,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
sCost = vecLS V.! a
eCost = vecRS V.! b
doKeep
| eCost - sCost == succ (b - a) = False
| eCost - sCost == pred (b - a) = True
| fst eCost - fst sCost == succ (b - a) = False
| fst eCost - fst sCost == pred (b - a) = True
| otherwise =
error $
"Internal check failure -- costs seem broken: " <>
show [sCost, eCost, a, b]
show [sCost, eCost] <> show [a, b]
jumpPos =
(if trans {- Heuristic A applies here -}
then last
@ -237,11 +249,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
[a .. if doKeep
then pred b
else b]
, vecLS V.! i == sCost - a + i
, fst (vecLS V.! i) == fst sCost - a + i
, sumL V.! i == totalCost
, if doKeep
then vecLS V.! i == vecRS V.! succ i
else succ (vecLS V.! i) == vecRS V.! i
then scoreAdd (vecLS V.! i) (0, extraScore s) ==
vecRS V.! succ i
else scoreAdd (vecLS V.! i) (1, 0) == vecRS V.! i
, if doKeep
then sumR V.! succ i == totalCost
else sumR V.! i == totalCost