basic scores go!
This commit is contained in:
		
							parent
							
								
									b58b58ec61
								
							
						
					
					
						commit
						4b5bac3541
					
				
							
								
								
									
										81
									
								
								src/Diff.hs
									
									
									
									
									
								
							
							
						
						
									
										81
									
								
								src/Diff.hs
									
									
									
									
									
								
							|  | @ -21,10 +21,13 @@ import qualified Data.Vector as V | ||||||
| import qualified Data.Vector.Unboxed.Mutable as M | import qualified Data.Vector.Unboxed.Mutable as M | ||||||
| 
 | 
 | ||||||
| type Tok = (Bool, (Int, Int)) | type Tok = (Bool, (Int, Int)) | ||||||
| type Diff= [(Op,Tok)] | 
 | ||||||
|  | type Diff = [(Op, Tok)] | ||||||
|  | 
 | ||||||
| type Hunk = ((Int, Int), [(Op, Tok)]) | type Hunk = ((Int, Int), [(Op, Tok)]) | ||||||
| 
 | 
 | ||||||
| type BS = B.ByteString | type BS = B.ByteString | ||||||
|  | 
 | ||||||
| type TV = V.Vector Tok | type TV = V.Vector Tok | ||||||
| 
 | 
 | ||||||
| data Op | data Op | ||||||
|  | @ -45,8 +48,8 @@ data DiffEnv = | ||||||
|     , deW :: Int |     , deW :: Int | ||||||
|     , deA :: Int |     , deA :: Int | ||||||
|     , deB :: Int |     , deB :: Int | ||||||
|     , deVS :: V.Vector Int |     , deVS :: V.Vector (Int, Int) | ||||||
|     , deVE :: V.Vector Int |     , deVE :: V.Vector (Int, Int) | ||||||
|     , deTrans :: Bool |     , deTrans :: Bool | ||||||
|     } |     } | ||||||
|   deriving (Show) |   deriving (Show) | ||||||
|  | @ -76,18 +79,18 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2') | ||||||
|         lastDiff (i + 1) |         lastDiff (i + 1) | ||||||
|       | otherwise = i |       | otherwise = i | ||||||
|     e = lastDiff 0 |     e = lastDiff 0 | ||||||
|     pre = map (\t -> (Keep,t)) . V.toList . V.take b $ t1 |     pre = map (\t -> (Keep, t)) . V.toList . V.take b $ t1 | ||||||
|     post = map (\t -> (Keep,t)) . V.toList . V.drop (l1-e) $ t1 |     post = map (\t -> (Keep, t)) . V.toList . V.drop (l1 - e) $ t1 | ||||||
|     t1' = V.slice b (l1-e-b) t1 |     t1' = V.slice b (l1 - e - b) t1 | ||||||
|     t2' = V.slice b (l2-e-b) t2 |     t2' = V.slice b (l2 - e - b) t2 | ||||||
| 
 | 
 | ||||||
| diffToks :: BS -> BS -> TV -> TV -> Diff | diffToks :: BS -> BS -> TV -> TV -> Diff | ||||||
| diffToks d1 d2 t1' t2' = pre ++ res ++ post | diffToks d1 d2 t1' t2' = pre ++ res ++ post | ||||||
|   where |   where | ||||||
|     (pre, post, t1, t2) = stripEqToks d1 d2 t1' t2' |     (pre, post, t1, t2) = stripEqToks d1 d2 t1' t2' | ||||||
|     res |     res | ||||||
|       | V.null t1 = map (\t -> (Add,t)) (V.toList t2) |       | V.null t1 = map (\t -> (Add, t)) (V.toList t2) | ||||||
|       | V.null t2 = map (\t -> (Remove,t)) (V.toList t1) |       | V.null t2 = map (\t -> (Remove, t)) (V.toList t1) | ||||||
|       | V.length t1 >= V.length t2 = |       | V.length t1 >= V.length t2 = | ||||||
|         diffToks' $ |         diffToks' $ | ||||||
|         DiffEnv |         DiffEnv | ||||||
|  | @ -101,8 +104,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post | ||||||
|           , deW = V.length t2 |           , deW = V.length t2 | ||||||
|           , deA = 0 |           , deA = 0 | ||||||
|           , deB = V.length t2 |           , deB = V.length t2 | ||||||
|           , deVS = V.fromList [0 .. V.length t2] |           , deVS = V.fromList $ zip [0 .. V.length t2] (repeat 0) | ||||||
|           , deVE = V.fromList $ reverse [0 .. V.length t2] |           , deVE = V.fromList $ reverse $ zip [0 .. V.length t2] (repeat 0) | ||||||
|           , deTrans = False |           , deTrans = False | ||||||
|           } |           } | ||||||
|       | otherwise = |       | otherwise = | ||||||
|  | @ -118,8 +121,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post | ||||||
|           , deW = V.length t1 |           , deW = V.length t1 | ||||||
|           , deA = 0 |           , deA = 0 | ||||||
|           , deB = V.length t1 |           , deB = V.length t1 | ||||||
|           , deVS = V.fromList [0 .. V.length t1] |           , deVS = V.fromList $ zip [0 .. V.length t1] (repeat 0) | ||||||
|           , deVE = V.fromList $ reverse [0 .. V.length t1] |           , deVE = V.fromList $ reverse $ zip [0 .. V.length t1] (repeat 0) | ||||||
|           , deTrans = True |           , deTrans = True | ||||||
|           } |           } | ||||||
| 
 | 
 | ||||||
|  | @ -146,23 +149,30 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | ||||||
|     mid = (s + e) `div` 2 |     mid = (s + e) `div` 2 | ||||||
|     vecSmid = vecS mid |     vecSmid = vecS mid | ||||||
|     vecEmid = vecE mid |     vecEmid = vecE mid | ||||||
|  |     extraScore i = | ||||||
|  |       if isToken | ||||||
|  |         then -(tokEnd - tokBegin) | ||||||
|  |         else 0 | ||||||
|  |       where | ||||||
|  |         (isToken, (tokBegin, tokEnd)) = deT1 de V.! i | ||||||
|     vecS = vec -- "forward" operation |     vecS = vec -- "forward" operation | ||||||
|       where |       where | ||||||
|         vec i |         vec i | ||||||
|           | i == s = deVS de |           | i == s = deVS de | ||||||
|           | i > s = V.fromList . upd i . vec $ pred i |           | i > s = V.fromList . upd i . vec $ pred i | ||||||
|           | otherwise = error "Internal bounds check failure" |           | otherwise = error "Internal bounds check failure" | ||||||
|         upd i v = i : go 1 i |         upd i v = (i, 0) : go 1 (i, 0) | ||||||
|           where |           where | ||||||
|             go j up |             go j (iup, sup) | ||||||
|               | j > deW de = [] |               | j > deW de = [] | ||||||
|               | otherwise = |               | otherwise = | ||||||
|                 let left = v V.! j |                 let (ileft, sleft) = v V.! j | ||||||
|                     upleft = v V.! pred j |                     (iupleft, supleft) = v V.! pred j | ||||||
|                     keep |                     keep | ||||||
|                       | toksMatch (pred i) (pred j) de = min upleft |                       | toksMatch (pred i) (pred j) de = | ||||||
|  |                         min (iupleft, supleft + extraScore (pred i)) | ||||||
|                       | otherwise = id |                       | otherwise = id | ||||||
|                     res = keep $ min (succ up) (succ left) |                     res = keep $ min (succ iup, sup) (succ ileft, sleft) | ||||||
|                  in res : go (succ j) res |                  in res : go (succ j) res | ||||||
|     vecE = vec -- "backward" operation |     vecE = vec -- "backward" operation | ||||||
|       where |       where | ||||||
|  | @ -170,18 +180,20 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | ||||||
|           | i == e = deVE de |           | i == e = deVE de | ||||||
|           | i < e = V.fromList . reverse . upd i . vec $ succ i |           | i < e = V.fromList . reverse . upd i . vec $ succ i | ||||||
|           | otherwise = error "Internal bounds check failure" |           | otherwise = error "Internal bounds check failure" | ||||||
|         upd i v = (deL de - i) : go (pred $ deW de) (deL de - i) |         upd i v = (deL de - i, 0) : go (pred $ deW de) (deL de - i, 0) | ||||||
|           where |           where | ||||||
|             go j down |             go j (idown, sdown) | ||||||
|               | j < 0 = [] |               | j < 0 = [] | ||||||
|               | otherwise = |               | otherwise = | ||||||
|                 let right = v V.! j |                 let (iright, sright) = v V.! j | ||||||
|                     downright = v V.! succ j |                     (idownright, sdownright) = v V.! succ j | ||||||
|                     keep |                     keep | ||||||
|                       | toksMatch i j de = min downright |                       | toksMatch i j de = | ||||||
|  |                         min (idownright, sdownright + extraScore i) | ||||||
|                       | otherwise = id |                       | otherwise = id | ||||||
|                     res = keep $ min (succ down) (succ right) |                     res = keep $ min (succ idown, sdown) (succ iright, sright) | ||||||
|                  in res : go (pred j) res |                  in res : go (pred j) res | ||||||
|  |     scoreAdd (l1, x1) (l2, x2) = (l1 + l2, x1 + x2) | ||||||
|     {- Now, find the optimal point for splitting. |     {- Now, find the optimal point for splitting. | ||||||
|      - |      - | ||||||
|      - Heuristics A: if 2 paths are completely same, prefer deletion first; |      - Heuristics A: if 2 paths are completely same, prefer deletion first; | ||||||
|  | @ -193,7 +205,7 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | ||||||
|       (if trans |       (if trans | ||||||
|          then minIndexRev |          then minIndexRev | ||||||
|          else minIndexFwd) $ |          else minIndexFwd) $ | ||||||
|       V.zipWith (+) (slice vecSmid) (slice vecEmid) |       V.zipWith scoreAdd (slice vecSmid) (slice vecEmid) | ||||||
|       where |       where | ||||||
|         slice = V.slice a (succ $ b - a) |         slice = V.slice a (succ $ b - a) | ||||||
|     diff |     diff | ||||||
|  | @ -207,8 +219,8 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | ||||||
|             vecRE = deVE de |             vecRE = deVE de | ||||||
|             vecLE = vecE s |             vecLE = vecE s | ||||||
|             vecRS = vecS e |             vecRS = vecS e | ||||||
|             sumL = V.zipWith (+) vecLS vecLE |             sumL = V.zipWith scoreAdd vecLS vecLE | ||||||
|             sumR = V.zipWith (+) vecRS vecRE |             sumR = V.zipWith scoreAdd vecRS vecRE | ||||||
|             {- This is getting a bit complicted. In the non-transposed case, we |             {- This is getting a bit complicted. In the non-transposed case, we | ||||||
|              - want to select one Remove/Keep surrounded by 0-n Add ops, possibly |              - want to select one Remove/Keep surrounded by 0-n Add ops, possibly | ||||||
|              - from both sides. The chosen path must belong to the best paths |              - from both sides. The chosen path must belong to the best paths | ||||||
|  | @ -222,12 +234,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | ||||||
|             sCost = vecLS V.! a |             sCost = vecLS V.! a | ||||||
|             eCost = vecRS V.! b |             eCost = vecRS V.! b | ||||||
|             doKeep |             doKeep | ||||||
|               | eCost - sCost == succ (b - a) = False |               | fst eCost - fst sCost == succ (b - a) = False | ||||||
|               | eCost - sCost == pred (b - a) = True |               | fst eCost - fst sCost == pred (b - a) = True | ||||||
|               | otherwise = |               | otherwise = | ||||||
|                 error $ |                 error $ | ||||||
|                 "Internal check failure -- costs seem broken: " <> |                 "Internal check failure -- costs seem broken: " <> | ||||||
|                 show [sCost, eCost, a, b] |                 show [sCost, eCost] <> show [a, b] | ||||||
|             jumpPos = |             jumpPos = | ||||||
|               (if trans {- Heuristic A applies here -} |               (if trans {- Heuristic A applies here -} | ||||||
|                  then last |                  then last | ||||||
|  | @ -237,11 +249,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | ||||||
|                     [a .. if doKeep |                     [a .. if doKeep | ||||||
|                             then pred b |                             then pred b | ||||||
|                             else b] |                             else b] | ||||||
|                 , vecLS V.! i == sCost - a + i |                 , fst (vecLS V.! i) == fst sCost - a + i | ||||||
|                 , sumL V.! i == totalCost |                 , sumL V.! i == totalCost | ||||||
|                 , if doKeep |                 , if doKeep | ||||||
|                     then vecLS V.! i == vecRS V.! succ i |                     then scoreAdd (vecLS V.! i) (0, extraScore s) == | ||||||
|                     else succ (vecLS V.! i) == vecRS V.! i |                          vecRS V.! succ i | ||||||
|  |                     else scoreAdd (vecLS V.! i) (1, 0) == vecRS V.! i | ||||||
|                 , if doKeep |                 , if doKeep | ||||||
|                     then sumR V.! succ i == totalCost |                     then sumR V.! succ i == totalCost | ||||||
|                     else sumR V.! i == totalCost |                     else sumR V.! i == totalCost | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue