basic scores go!

This commit is contained in:
Mirek Kratochvil 2020-09-19 20:35:32 +02:00
parent b58b58ec61
commit 4b5bac3541

View file

@ -21,10 +21,13 @@ import qualified Data.Vector as V
import qualified Data.Vector.Unboxed.Mutable as M import qualified Data.Vector.Unboxed.Mutable as M
type Tok = (Bool, (Int, Int)) type Tok = (Bool, (Int, Int))
type Diff= [(Op,Tok)]
type Diff = [(Op, Tok)]
type Hunk = ((Int, Int), [(Op, Tok)]) type Hunk = ((Int, Int), [(Op, Tok)])
type BS = B.ByteString type BS = B.ByteString
type TV = V.Vector Tok type TV = V.Vector Tok
data Op data Op
@ -45,8 +48,8 @@ data DiffEnv =
, deW :: Int , deW :: Int
, deA :: Int , deA :: Int
, deB :: Int , deB :: Int
, deVS :: V.Vector Int , deVS :: V.Vector (Int, Int)
, deVE :: V.Vector Int , deVE :: V.Vector (Int, Int)
, deTrans :: Bool , deTrans :: Bool
} }
deriving (Show) deriving (Show)
@ -76,18 +79,18 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2')
lastDiff (i + 1) lastDiff (i + 1)
| otherwise = i | otherwise = i
e = lastDiff 0 e = lastDiff 0
pre = map (\t -> (Keep,t)) . V.toList . V.take b $ t1 pre = map (\t -> (Keep, t)) . V.toList . V.take b $ t1
post = map (\t -> (Keep,t)) . V.toList . V.drop (l1-e) $ t1 post = map (\t -> (Keep, t)) . V.toList . V.drop (l1 - e) $ t1
t1' = V.slice b (l1-e-b) t1 t1' = V.slice b (l1 - e - b) t1
t2' = V.slice b (l2-e-b) t2 t2' = V.slice b (l2 - e - b) t2
diffToks :: BS -> BS -> TV -> TV -> Diff diffToks :: BS -> BS -> TV -> TV -> Diff
diffToks d1 d2 t1' t2' = pre ++ res ++ post diffToks d1 d2 t1' t2' = pre ++ res ++ post
where where
(pre, post, t1, t2) = stripEqToks d1 d2 t1' t2' (pre, post, t1, t2) = stripEqToks d1 d2 t1' t2'
res res
| V.null t1 = map (\t -> (Add,t)) (V.toList t2) | V.null t1 = map (\t -> (Add, t)) (V.toList t2)
| V.null t2 = map (\t -> (Remove,t)) (V.toList t1) | V.null t2 = map (\t -> (Remove, t)) (V.toList t1)
| V.length t1 >= V.length t2 = | V.length t1 >= V.length t2 =
diffToks' $ diffToks' $
DiffEnv DiffEnv
@ -101,8 +104,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post
, deW = V.length t2 , deW = V.length t2
, deA = 0 , deA = 0
, deB = V.length t2 , deB = V.length t2
, deVS = V.fromList [0 .. V.length t2] , deVS = V.fromList $ zip [0 .. V.length t2] (repeat 0)
, deVE = V.fromList $ reverse [0 .. V.length t2] , deVE = V.fromList $ reverse $ zip [0 .. V.length t2] (repeat 0)
, deTrans = False , deTrans = False
} }
| otherwise = | otherwise =
@ -118,8 +121,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post
, deW = V.length t1 , deW = V.length t1
, deA = 0 , deA = 0
, deB = V.length t1 , deB = V.length t1
, deVS = V.fromList [0 .. V.length t1] , deVS = V.fromList $ zip [0 .. V.length t1] (repeat 0)
, deVE = V.fromList $ reverse [0 .. V.length t1] , deVE = V.fromList $ reverse $ zip [0 .. V.length t1] (repeat 0)
, deTrans = True , deTrans = True
} }
@ -146,23 +149,30 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
mid = (s + e) `div` 2 mid = (s + e) `div` 2
vecSmid = vecS mid vecSmid = vecS mid
vecEmid = vecE mid vecEmid = vecE mid
extraScore i =
if isToken
then -(tokEnd - tokBegin)
else 0
where
(isToken, (tokBegin, tokEnd)) = deT1 de V.! i
vecS = vec -- "forward" operation vecS = vec -- "forward" operation
where where
vec i vec i
| i == s = deVS de | i == s = deVS de
| i > s = V.fromList . upd i . vec $ pred i | i > s = V.fromList . upd i . vec $ pred i
| otherwise = error "Internal bounds check failure" | otherwise = error "Internal bounds check failure"
upd i v = i : go 1 i upd i v = (i, 0) : go 1 (i, 0)
where where
go j up go j (iup, sup)
| j > deW de = [] | j > deW de = []
| otherwise = | otherwise =
let left = v V.! j let (ileft, sleft) = v V.! j
upleft = v V.! pred j (iupleft, supleft) = v V.! pred j
keep keep
| toksMatch (pred i) (pred j) de = min upleft | toksMatch (pred i) (pred j) de =
min (iupleft, supleft + extraScore (pred i))
| otherwise = id | otherwise = id
res = keep $ min (succ up) (succ left) res = keep $ min (succ iup, sup) (succ ileft, sleft)
in res : go (succ j) res in res : go (succ j) res
vecE = vec -- "backward" operation vecE = vec -- "backward" operation
where where
@ -170,18 +180,20 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
| i == e = deVE de | i == e = deVE de
| i < e = V.fromList . reverse . upd i . vec $ succ i | i < e = V.fromList . reverse . upd i . vec $ succ i
| otherwise = error "Internal bounds check failure" | otherwise = error "Internal bounds check failure"
upd i v = (deL de - i) : go (pred $ deW de) (deL de - i) upd i v = (deL de - i, 0) : go (pred $ deW de) (deL de - i, 0)
where where
go j down go j (idown, sdown)
| j < 0 = [] | j < 0 = []
| otherwise = | otherwise =
let right = v V.! j let (iright, sright) = v V.! j
downright = v V.! succ j (idownright, sdownright) = v V.! succ j
keep keep
| toksMatch i j de = min downright | toksMatch i j de =
min (idownright, sdownright + extraScore i)
| otherwise = id | otherwise = id
res = keep $ min (succ down) (succ right) res = keep $ min (succ idown, sdown) (succ iright, sright)
in res : go (pred j) res in res : go (pred j) res
scoreAdd (l1, x1) (l2, x2) = (l1 + l2, x1 + x2)
{- Now, find the optimal point for splitting. {- Now, find the optimal point for splitting.
- -
- Heuristics A: if 2 paths are completely same, prefer deletion first; - Heuristics A: if 2 paths are completely same, prefer deletion first;
@ -193,7 +205,7 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
(if trans (if trans
then minIndexRev then minIndexRev
else minIndexFwd) $ else minIndexFwd) $
V.zipWith (+) (slice vecSmid) (slice vecEmid) V.zipWith scoreAdd (slice vecSmid) (slice vecEmid)
where where
slice = V.slice a (succ $ b - a) slice = V.slice a (succ $ b - a)
diff diff
@ -207,8 +219,8 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
vecRE = deVE de vecRE = deVE de
vecLE = vecE s vecLE = vecE s
vecRS = vecS e vecRS = vecS e
sumL = V.zipWith (+) vecLS vecLE sumL = V.zipWith scoreAdd vecLS vecLE
sumR = V.zipWith (+) vecRS vecRE sumR = V.zipWith scoreAdd vecRS vecRE
{- This is getting a bit complicted. In the non-transposed case, we {- This is getting a bit complicted. In the non-transposed case, we
- want to select one Remove/Keep surrounded by 0-n Add ops, possibly - want to select one Remove/Keep surrounded by 0-n Add ops, possibly
- from both sides. The chosen path must belong to the best paths - from both sides. The chosen path must belong to the best paths
@ -222,12 +234,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
sCost = vecLS V.! a sCost = vecLS V.! a
eCost = vecRS V.! b eCost = vecRS V.! b
doKeep doKeep
| eCost - sCost == succ (b - a) = False | fst eCost - fst sCost == succ (b - a) = False
| eCost - sCost == pred (b - a) = True | fst eCost - fst sCost == pred (b - a) = True
| otherwise = | otherwise =
error $ error $
"Internal check failure -- costs seem broken: " <> "Internal check failure -- costs seem broken: " <>
show [sCost, eCost, a, b] show [sCost, eCost] <> show [a, b]
jumpPos = jumpPos =
(if trans {- Heuristic A applies here -} (if trans {- Heuristic A applies here -}
then last then last
@ -237,11 +249,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
[a .. if doKeep [a .. if doKeep
then pred b then pred b
else b] else b]
, vecLS V.! i == sCost - a + i , fst (vecLS V.! i) == fst sCost - a + i
, sumL V.! i == totalCost , sumL V.! i == totalCost
, if doKeep , if doKeep
then vecLS V.! i == vecRS V.! succ i then scoreAdd (vecLS V.! i) (0, extraScore s) ==
else succ (vecLS V.! i) == vecRS V.! i vecRS V.! succ i
else scoreAdd (vecLS V.! i) (1, 0) == vecRS V.! i
, if doKeep , if doKeep
then sumR V.! succ i == totalCost then sumR V.! succ i == totalCost
else sumR V.! i == totalCost else sumR V.! i == totalCost