basic scores go!
This commit is contained in:
parent
b58b58ec61
commit
4b5bac3541
81
src/Diff.hs
81
src/Diff.hs
|
@ -21,10 +21,13 @@ import qualified Data.Vector as V
|
||||||
import qualified Data.Vector.Unboxed.Mutable as M
|
import qualified Data.Vector.Unboxed.Mutable as M
|
||||||
|
|
||||||
type Tok = (Bool, (Int, Int))
|
type Tok = (Bool, (Int, Int))
|
||||||
type Diff= [(Op,Tok)]
|
|
||||||
|
type Diff = [(Op, Tok)]
|
||||||
|
|
||||||
type Hunk = ((Int, Int), [(Op, Tok)])
|
type Hunk = ((Int, Int), [(Op, Tok)])
|
||||||
|
|
||||||
type BS = B.ByteString
|
type BS = B.ByteString
|
||||||
|
|
||||||
type TV = V.Vector Tok
|
type TV = V.Vector Tok
|
||||||
|
|
||||||
data Op
|
data Op
|
||||||
|
@ -45,8 +48,8 @@ data DiffEnv =
|
||||||
, deW :: Int
|
, deW :: Int
|
||||||
, deA :: Int
|
, deA :: Int
|
||||||
, deB :: Int
|
, deB :: Int
|
||||||
, deVS :: V.Vector Int
|
, deVS :: V.Vector (Int, Int)
|
||||||
, deVE :: V.Vector Int
|
, deVE :: V.Vector (Int, Int)
|
||||||
, deTrans :: Bool
|
, deTrans :: Bool
|
||||||
}
|
}
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
@ -76,18 +79,18 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2')
|
||||||
lastDiff (i + 1)
|
lastDiff (i + 1)
|
||||||
| otherwise = i
|
| otherwise = i
|
||||||
e = lastDiff 0
|
e = lastDiff 0
|
||||||
pre = map (\t -> (Keep,t)) . V.toList . V.take b $ t1
|
pre = map (\t -> (Keep, t)) . V.toList . V.take b $ t1
|
||||||
post = map (\t -> (Keep,t)) . V.toList . V.drop (l1-e) $ t1
|
post = map (\t -> (Keep, t)) . V.toList . V.drop (l1 - e) $ t1
|
||||||
t1' = V.slice b (l1-e-b) t1
|
t1' = V.slice b (l1 - e - b) t1
|
||||||
t2' = V.slice b (l2-e-b) t2
|
t2' = V.slice b (l2 - e - b) t2
|
||||||
|
|
||||||
diffToks :: BS -> BS -> TV -> TV -> Diff
|
diffToks :: BS -> BS -> TV -> TV -> Diff
|
||||||
diffToks d1 d2 t1' t2' = pre ++ res ++ post
|
diffToks d1 d2 t1' t2' = pre ++ res ++ post
|
||||||
where
|
where
|
||||||
(pre, post, t1, t2) = stripEqToks d1 d2 t1' t2'
|
(pre, post, t1, t2) = stripEqToks d1 d2 t1' t2'
|
||||||
res
|
res
|
||||||
| V.null t1 = map (\t -> (Add,t)) (V.toList t2)
|
| V.null t1 = map (\t -> (Add, t)) (V.toList t2)
|
||||||
| V.null t2 = map (\t -> (Remove,t)) (V.toList t1)
|
| V.null t2 = map (\t -> (Remove, t)) (V.toList t1)
|
||||||
| V.length t1 >= V.length t2 =
|
| V.length t1 >= V.length t2 =
|
||||||
diffToks' $
|
diffToks' $
|
||||||
DiffEnv
|
DiffEnv
|
||||||
|
@ -101,8 +104,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post
|
||||||
, deW = V.length t2
|
, deW = V.length t2
|
||||||
, deA = 0
|
, deA = 0
|
||||||
, deB = V.length t2
|
, deB = V.length t2
|
||||||
, deVS = V.fromList [0 .. V.length t2]
|
, deVS = V.fromList $ zip [0 .. V.length t2] (repeat 0)
|
||||||
, deVE = V.fromList $ reverse [0 .. V.length t2]
|
, deVE = V.fromList $ reverse $ zip [0 .. V.length t2] (repeat 0)
|
||||||
, deTrans = False
|
, deTrans = False
|
||||||
}
|
}
|
||||||
| otherwise =
|
| otherwise =
|
||||||
|
@ -118,8 +121,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post
|
||||||
, deW = V.length t1
|
, deW = V.length t1
|
||||||
, deA = 0
|
, deA = 0
|
||||||
, deB = V.length t1
|
, deB = V.length t1
|
||||||
, deVS = V.fromList [0 .. V.length t1]
|
, deVS = V.fromList $ zip [0 .. V.length t1] (repeat 0)
|
||||||
, deVE = V.fromList $ reverse [0 .. V.length t1]
|
, deVE = V.fromList $ reverse $ zip [0 .. V.length t1] (repeat 0)
|
||||||
, deTrans = True
|
, deTrans = True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -146,23 +149,30 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
|
||||||
mid = (s + e) `div` 2
|
mid = (s + e) `div` 2
|
||||||
vecSmid = vecS mid
|
vecSmid = vecS mid
|
||||||
vecEmid = vecE mid
|
vecEmid = vecE mid
|
||||||
|
extraScore i =
|
||||||
|
if isToken
|
||||||
|
then -(tokEnd - tokBegin)
|
||||||
|
else 0
|
||||||
|
where
|
||||||
|
(isToken, (tokBegin, tokEnd)) = deT1 de V.! i
|
||||||
vecS = vec -- "forward" operation
|
vecS = vec -- "forward" operation
|
||||||
where
|
where
|
||||||
vec i
|
vec i
|
||||||
| i == s = deVS de
|
| i == s = deVS de
|
||||||
| i > s = V.fromList . upd i . vec $ pred i
|
| i > s = V.fromList . upd i . vec $ pred i
|
||||||
| otherwise = error "Internal bounds check failure"
|
| otherwise = error "Internal bounds check failure"
|
||||||
upd i v = i : go 1 i
|
upd i v = (i, 0) : go 1 (i, 0)
|
||||||
where
|
where
|
||||||
go j up
|
go j (iup, sup)
|
||||||
| j > deW de = []
|
| j > deW de = []
|
||||||
| otherwise =
|
| otherwise =
|
||||||
let left = v V.! j
|
let (ileft, sleft) = v V.! j
|
||||||
upleft = v V.! pred j
|
(iupleft, supleft) = v V.! pred j
|
||||||
keep
|
keep
|
||||||
| toksMatch (pred i) (pred j) de = min upleft
|
| toksMatch (pred i) (pred j) de =
|
||||||
|
min (iupleft, supleft + extraScore (pred i))
|
||||||
| otherwise = id
|
| otherwise = id
|
||||||
res = keep $ min (succ up) (succ left)
|
res = keep $ min (succ iup, sup) (succ ileft, sleft)
|
||||||
in res : go (succ j) res
|
in res : go (succ j) res
|
||||||
vecE = vec -- "backward" operation
|
vecE = vec -- "backward" operation
|
||||||
where
|
where
|
||||||
|
@ -170,18 +180,20 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
|
||||||
| i == e = deVE de
|
| i == e = deVE de
|
||||||
| i < e = V.fromList . reverse . upd i . vec $ succ i
|
| i < e = V.fromList . reverse . upd i . vec $ succ i
|
||||||
| otherwise = error "Internal bounds check failure"
|
| otherwise = error "Internal bounds check failure"
|
||||||
upd i v = (deL de - i) : go (pred $ deW de) (deL de - i)
|
upd i v = (deL de - i, 0) : go (pred $ deW de) (deL de - i, 0)
|
||||||
where
|
where
|
||||||
go j down
|
go j (idown, sdown)
|
||||||
| j < 0 = []
|
| j < 0 = []
|
||||||
| otherwise =
|
| otherwise =
|
||||||
let right = v V.! j
|
let (iright, sright) = v V.! j
|
||||||
downright = v V.! succ j
|
(idownright, sdownright) = v V.! succ j
|
||||||
keep
|
keep
|
||||||
| toksMatch i j de = min downright
|
| toksMatch i j de =
|
||||||
|
min (idownright, sdownright + extraScore i)
|
||||||
| otherwise = id
|
| otherwise = id
|
||||||
res = keep $ min (succ down) (succ right)
|
res = keep $ min (succ idown, sdown) (succ iright, sright)
|
||||||
in res : go (pred j) res
|
in res : go (pred j) res
|
||||||
|
scoreAdd (l1, x1) (l2, x2) = (l1 + l2, x1 + x2)
|
||||||
{- Now, find the optimal point for splitting.
|
{- Now, find the optimal point for splitting.
|
||||||
-
|
-
|
||||||
- Heuristics A: if 2 paths are completely same, prefer deletion first;
|
- Heuristics A: if 2 paths are completely same, prefer deletion first;
|
||||||
|
@ -193,7 +205,7 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
|
||||||
(if trans
|
(if trans
|
||||||
then minIndexRev
|
then minIndexRev
|
||||||
else minIndexFwd) $
|
else minIndexFwd) $
|
||||||
V.zipWith (+) (slice vecSmid) (slice vecEmid)
|
V.zipWith scoreAdd (slice vecSmid) (slice vecEmid)
|
||||||
where
|
where
|
||||||
slice = V.slice a (succ $ b - a)
|
slice = V.slice a (succ $ b - a)
|
||||||
diff
|
diff
|
||||||
|
@ -207,8 +219,8 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
|
||||||
vecRE = deVE de
|
vecRE = deVE de
|
||||||
vecLE = vecE s
|
vecLE = vecE s
|
||||||
vecRS = vecS e
|
vecRS = vecS e
|
||||||
sumL = V.zipWith (+) vecLS vecLE
|
sumL = V.zipWith scoreAdd vecLS vecLE
|
||||||
sumR = V.zipWith (+) vecRS vecRE
|
sumR = V.zipWith scoreAdd vecRS vecRE
|
||||||
{- This is getting a bit complicted. In the non-transposed case, we
|
{- This is getting a bit complicted. In the non-transposed case, we
|
||||||
- want to select one Remove/Keep surrounded by 0-n Add ops, possibly
|
- want to select one Remove/Keep surrounded by 0-n Add ops, possibly
|
||||||
- from both sides. The chosen path must belong to the best paths
|
- from both sides. The chosen path must belong to the best paths
|
||||||
|
@ -222,12 +234,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
|
||||||
sCost = vecLS V.! a
|
sCost = vecLS V.! a
|
||||||
eCost = vecRS V.! b
|
eCost = vecRS V.! b
|
||||||
doKeep
|
doKeep
|
||||||
| eCost - sCost == succ (b - a) = False
|
| fst eCost - fst sCost == succ (b - a) = False
|
||||||
| eCost - sCost == pred (b - a) = True
|
| fst eCost - fst sCost == pred (b - a) = True
|
||||||
| otherwise =
|
| otherwise =
|
||||||
error $
|
error $
|
||||||
"Internal check failure -- costs seem broken: " <>
|
"Internal check failure -- costs seem broken: " <>
|
||||||
show [sCost, eCost, a, b]
|
show [sCost, eCost] <> show [a, b]
|
||||||
jumpPos =
|
jumpPos =
|
||||||
(if trans {- Heuristic A applies here -}
|
(if trans {- Heuristic A applies here -}
|
||||||
then last
|
then last
|
||||||
|
@ -237,11 +249,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
|
||||||
[a .. if doKeep
|
[a .. if doKeep
|
||||||
then pred b
|
then pred b
|
||||||
else b]
|
else b]
|
||||||
, vecLS V.! i == sCost - a + i
|
, fst (vecLS V.! i) == fst sCost - a + i
|
||||||
, sumL V.! i == totalCost
|
, sumL V.! i == totalCost
|
||||||
, if doKeep
|
, if doKeep
|
||||||
then vecLS V.! i == vecRS V.! succ i
|
then scoreAdd (vecLS V.! i) (0, extraScore s) ==
|
||||||
else succ (vecLS V.! i) == vecRS V.! i
|
vecRS V.! succ i
|
||||||
|
else scoreAdd (vecLS V.! i) (1, 0) == vecRS V.! i
|
||||||
, if doKeep
|
, if doKeep
|
||||||
then sumR V.! succ i == totalCost
|
then sumR V.! succ i == totalCost
|
||||||
else sumR V.! i == totalCost
|
else sumR V.! i == totalCost
|
||||||
|
|
Loading…
Reference in a new issue