basic scores go!
This commit is contained in:
		
							parent
							
								
									b58b58ec61
								
							
						
					
					
						commit
						4b5bac3541
					
				
							
								
								
									
										81
									
								
								src/Diff.hs
									
									
									
									
									
								
							
							
						
						
									
										81
									
								
								src/Diff.hs
									
									
									
									
									
								
							|  | @ -21,10 +21,13 @@ import qualified Data.Vector as V | |||
| import qualified Data.Vector.Unboxed.Mutable as M | ||||
| 
 | ||||
| type Tok = (Bool, (Int, Int)) | ||||
| type Diff= [(Op,Tok)] | ||||
| 
 | ||||
| type Diff = [(Op, Tok)] | ||||
| 
 | ||||
| type Hunk = ((Int, Int), [(Op, Tok)]) | ||||
| 
 | ||||
| type BS = B.ByteString | ||||
| 
 | ||||
| type TV = V.Vector Tok | ||||
| 
 | ||||
| data Op | ||||
|  | @ -45,8 +48,8 @@ data DiffEnv = | |||
|     , deW :: Int | ||||
|     , deA :: Int | ||||
|     , deB :: Int | ||||
|     , deVS :: V.Vector Int | ||||
|     , deVE :: V.Vector Int | ||||
|     , deVS :: V.Vector (Int, Int) | ||||
|     , deVE :: V.Vector (Int, Int) | ||||
|     , deTrans :: Bool | ||||
|     } | ||||
|   deriving (Show) | ||||
|  | @ -76,18 +79,18 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2') | |||
|         lastDiff (i + 1) | ||||
|       | otherwise = i | ||||
|     e = lastDiff 0 | ||||
|     pre = map (\t -> (Keep,t)) . V.toList . V.take b $ t1 | ||||
|     post = map (\t -> (Keep,t)) . V.toList . V.drop (l1-e) $ t1 | ||||
|     t1' = V.slice b (l1-e-b) t1 | ||||
|     t2' = V.slice b (l2-e-b) t2 | ||||
|     pre = map (\t -> (Keep, t)) . V.toList . V.take b $ t1 | ||||
|     post = map (\t -> (Keep, t)) . V.toList . V.drop (l1 - e) $ t1 | ||||
|     t1' = V.slice b (l1 - e - b) t1 | ||||
|     t2' = V.slice b (l2 - e - b) t2 | ||||
| 
 | ||||
| diffToks :: BS -> BS -> TV -> TV -> Diff | ||||
| diffToks d1 d2 t1' t2' = pre ++ res ++ post | ||||
|   where | ||||
|     (pre, post, t1, t2) = stripEqToks d1 d2 t1' t2' | ||||
|     res | ||||
|       | V.null t1 = map (\t -> (Add,t)) (V.toList t2) | ||||
|       | V.null t2 = map (\t -> (Remove,t)) (V.toList t1) | ||||
|       | V.null t1 = map (\t -> (Add, t)) (V.toList t2) | ||||
|       | V.null t2 = map (\t -> (Remove, t)) (V.toList t1) | ||||
|       | V.length t1 >= V.length t2 = | ||||
|         diffToks' $ | ||||
|         DiffEnv | ||||
|  | @ -101,8 +104,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post | |||
|           , deW = V.length t2 | ||||
|           , deA = 0 | ||||
|           , deB = V.length t2 | ||||
|           , deVS = V.fromList [0 .. V.length t2] | ||||
|           , deVE = V.fromList $ reverse [0 .. V.length t2] | ||||
|           , deVS = V.fromList $ zip [0 .. V.length t2] (repeat 0) | ||||
|           , deVE = V.fromList $ reverse $ zip [0 .. V.length t2] (repeat 0) | ||||
|           , deTrans = False | ||||
|           } | ||||
|       | otherwise = | ||||
|  | @ -118,8 +121,8 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post | |||
|           , deW = V.length t1 | ||||
|           , deA = 0 | ||||
|           , deB = V.length t1 | ||||
|           , deVS = V.fromList [0 .. V.length t1] | ||||
|           , deVE = V.fromList $ reverse [0 .. V.length t1] | ||||
|           , deVS = V.fromList $ zip [0 .. V.length t1] (repeat 0) | ||||
|           , deVE = V.fromList $ reverse $ zip [0 .. V.length t1] (repeat 0) | ||||
|           , deTrans = True | ||||
|           } | ||||
| 
 | ||||
|  | @ -146,23 +149,30 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | |||
|     mid = (s + e) `div` 2 | ||||
|     vecSmid = vecS mid | ||||
|     vecEmid = vecE mid | ||||
|     extraScore i = | ||||
|       if isToken | ||||
|         then -(tokEnd - tokBegin) | ||||
|         else 0 | ||||
|       where | ||||
|         (isToken, (tokBegin, tokEnd)) = deT1 de V.! i | ||||
|     vecS = vec -- "forward" operation | ||||
|       where | ||||
|         vec i | ||||
|           | i == s = deVS de | ||||
|           | i > s = V.fromList . upd i . vec $ pred i | ||||
|           | otherwise = error "Internal bounds check failure" | ||||
|         upd i v = i : go 1 i | ||||
|         upd i v = (i, 0) : go 1 (i, 0) | ||||
|           where | ||||
|             go j up | ||||
|             go j (iup, sup) | ||||
|               | j > deW de = [] | ||||
|               | otherwise = | ||||
|                 let left = v V.! j | ||||
|                     upleft = v V.! pred j | ||||
|                 let (ileft, sleft) = v V.! j | ||||
|                     (iupleft, supleft) = v V.! pred j | ||||
|                     keep | ||||
|                       | toksMatch (pred i) (pred j) de = min upleft | ||||
|                       | toksMatch (pred i) (pred j) de = | ||||
|                         min (iupleft, supleft + extraScore (pred i)) | ||||
|                       | otherwise = id | ||||
|                     res = keep $ min (succ up) (succ left) | ||||
|                     res = keep $ min (succ iup, sup) (succ ileft, sleft) | ||||
|                  in res : go (succ j) res | ||||
|     vecE = vec -- "backward" operation | ||||
|       where | ||||
|  | @ -170,18 +180,20 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | |||
|           | i == e = deVE de | ||||
|           | i < e = V.fromList . reverse . upd i . vec $ succ i | ||||
|           | otherwise = error "Internal bounds check failure" | ||||
|         upd i v = (deL de - i) : go (pred $ deW de) (deL de - i) | ||||
|         upd i v = (deL de - i, 0) : go (pred $ deW de) (deL de - i, 0) | ||||
|           where | ||||
|             go j down | ||||
|             go j (idown, sdown) | ||||
|               | j < 0 = [] | ||||
|               | otherwise = | ||||
|                 let right = v V.! j | ||||
|                     downright = v V.! succ j | ||||
|                 let (iright, sright) = v V.! j | ||||
|                     (idownright, sdownright) = v V.! succ j | ||||
|                     keep | ||||
|                       | toksMatch i j de = min downright | ||||
|                       | toksMatch i j de = | ||||
|                         min (idownright, sdownright + extraScore i) | ||||
|                       | otherwise = id | ||||
|                     res = keep $ min (succ down) (succ right) | ||||
|                     res = keep $ min (succ idown, sdown) (succ iright, sright) | ||||
|                  in res : go (pred j) res | ||||
|     scoreAdd (l1, x1) (l2, x2) = (l1 + l2, x1 + x2) | ||||
|     {- Now, find the optimal point for splitting. | ||||
|      - | ||||
|      - Heuristics A: if 2 paths are completely same, prefer deletion first; | ||||
|  | @ -193,7 +205,7 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | |||
|       (if trans | ||||
|          then minIndexRev | ||||
|          else minIndexFwd) $ | ||||
|       V.zipWith (+) (slice vecSmid) (slice vecEmid) | ||||
|       V.zipWith scoreAdd (slice vecSmid) (slice vecEmid) | ||||
|       where | ||||
|         slice = V.slice a (succ $ b - a) | ||||
|     diff | ||||
|  | @ -207,8 +219,8 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | |||
|             vecRE = deVE de | ||||
|             vecLE = vecE s | ||||
|             vecRS = vecS e | ||||
|             sumL = V.zipWith (+) vecLS vecLE | ||||
|             sumR = V.zipWith (+) vecRS vecRE | ||||
|             sumL = V.zipWith scoreAdd vecLS vecLE | ||||
|             sumR = V.zipWith scoreAdd vecRS vecRE | ||||
|             {- This is getting a bit complicted. In the non-transposed case, we | ||||
|              - want to select one Remove/Keep surrounded by 0-n Add ops, possibly | ||||
|              - from both sides. The chosen path must belong to the best paths | ||||
|  | @ -222,12 +234,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | |||
|             sCost = vecLS V.! a | ||||
|             eCost = vecRS V.! b | ||||
|             doKeep | ||||
|               | eCost - sCost == succ (b - a) = False | ||||
|               | eCost - sCost == pred (b - a) = True | ||||
|               | fst eCost - fst sCost == succ (b - a) = False | ||||
|               | fst eCost - fst sCost == pred (b - a) = True | ||||
|               | otherwise = | ||||
|                 error $ | ||||
|                 "Internal check failure -- costs seem broken: " <> | ||||
|                 show [sCost, eCost, a, b] | ||||
|                 show [sCost, eCost] <> show [a, b] | ||||
|             jumpPos = | ||||
|               (if trans {- Heuristic A applies here -} | ||||
|                  then last | ||||
|  | @ -237,11 +249,12 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | |||
|                     [a .. if doKeep | ||||
|                             then pred b | ||||
|                             else b] | ||||
|                 , vecLS V.! i == sCost - a + i | ||||
|                 , fst (vecLS V.! i) == fst sCost - a + i | ||||
|                 , sumL V.! i == totalCost | ||||
|                 , if doKeep | ||||
|                     then vecLS V.! i == vecRS V.! succ i | ||||
|                     else succ (vecLS V.! i) == vecRS V.! i | ||||
|                     then scoreAdd (vecLS V.! i) (0, extraScore s) == | ||||
|                          vecRS V.! succ i | ||||
|                     else scoreAdd (vecLS V.! i) (1, 0) == vecRS V.! i | ||||
|                 , if doKeep | ||||
|                     then sumR V.! succ i == totalCost | ||||
|                     else sumR V.! i == totalCost | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue