implement diff
This commit is contained in:
		
							parent
							
								
									96ef2e732b
								
							
						
					
					
						commit
						7fc24d236c
					
				|  | @ -43,7 +43,7 @@ executable adiff | |||
|   main-is: MainDiff.hs | ||||
| 
 | ||||
|   -- Modules included in this executable, other than Main. | ||||
|   other-modules: Redfa, Version | ||||
|   other-modules: Redfa, Version, Diff | ||||
| 
 | ||||
|   -- LANGUAGE extensions used by modules in this package. | ||||
|   other-extensions: CPP | ||||
|  |  | |||
							
								
								
									
										166
									
								
								src/Diff.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										166
									
								
								src/Diff.hs
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,166 @@ | |||
| module Diff | ||||
|   ( Tok | ||||
|   , Op | ||||
|   , diffToks | ||||
|   , hunks | ||||
|   , pprDiff | ||||
|   , pprDiff1 | ||||
|   , pprHunk | ||||
|   , pprHunks | ||||
|   ) where | ||||
| 
 | ||||
| import Control.Monad | ||||
| import qualified Data.ByteString as B | ||||
| import qualified Data.ByteString.Builder as BB | ||||
| import qualified Data.ByteString.Internal as BI | ||||
| import qualified Data.ByteString.Lazy as BL | ||||
| import Data.ByteString.UTF8 (fromString) | ||||
| import Data.Function (on) | ||||
| import Data.List (groupBy) | ||||
| import qualified Data.Vector as V | ||||
| import qualified Data.Vector.Unboxed.Mutable as M | ||||
| 
 | ||||
| type Tok = (Bool, (Int, Int)) | ||||
| 
 | ||||
| type Hunk = ((Int, Int), [(Op, Tok)]) | ||||
| 
 | ||||
| type BS = B.ByteString | ||||
| 
 | ||||
| data Op | ||||
|   = Remove | ||||
|   | Keep | ||||
|   | Add | ||||
|   deriving (Show, Eq) | ||||
| 
 | ||||
| substr b e = B.take (e - b) . B.drop b | ||||
| 
 | ||||
| toksMatch d1 d2 t1 t2 x y = | ||||
|   let (tok1, (b1, e1)) = t1 V.! x | ||||
|       (tok2, (b2, e2)) = t2 V.! y | ||||
|    in tok1 == tok2 && substr b1 e1 d1 == substr b2 e2 d2 | ||||
| 
 | ||||
| stripEqToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> (Int, Int, Int) | ||||
| stripEqToks d1 d2 t1 t2 = (b, l1 - e, l2 - e) | ||||
|   where | ||||
|     l1 = V.length t1 | ||||
|     l2 = V.length t2 | ||||
|     firstDiff i | ||||
|       | i < l1 && i < l2 && toksMatch d1 d2 t1 t2 i i = firstDiff (i + 1) | ||||
|       | otherwise = i | ||||
|     b = firstDiff 0 | ||||
|     lastDiff i | ||||
|       | l1 - i - 1 >= b && | ||||
|           l2 - i - 1 >= b && toksMatch d1 d2 t1 t2 (l1 - i - 1) (l2 - i - 1) = | ||||
|         lastDiff (i + 1) | ||||
|       | otherwise = i | ||||
|     e = lastDiff 0 | ||||
| 
 | ||||
| diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)] | ||||
| diffToks d1 d2 t1 t2 = do | ||||
|   let (b, e1, e2) = stripEqToks d1 d2 t1 t2 | ||||
|       ms1 = e1 - b | ||||
|       ms2 = e2 - b | ||||
|   mtx <- M.new ((ms1 + 1) * (ms2 + 1)) | ||||
|   let idx x y = (ms2 + 1) * x + y | ||||
|   forM_ [0 .. ms1] $ \i -> M.write mtx (idx i 0) i | ||||
|   forM_ [0 .. ms2] $ \i -> M.write mtx (idx 0 i) i | ||||
|   let toksMatch' x y = toksMatch d1 d2 t1 t2 (b + x) (b + y) | ||||
|   forM_ [1 .. ms1] $ \i -> | ||||
|     forM [1 .. ms2] $ \j -> do | ||||
|       up <- M.read mtx (idx i (j - 1)) | ||||
|       left <- M.read mtx (idx (i - 1) j) | ||||
|       upleft <- M.read mtx (idx (i - 1) (j - 1)) | ||||
|       M.write mtx (idx i j) $ | ||||
|         minimum $ | ||||
|         [up + 1, left + 1] ++ | ||||
|         (if toksMatch' (i - 1) (j - 1) | ||||
|            then [upleft] | ||||
|            else []) | ||||
|   let doAdd i j = (:) (Add, t2 V.! (b + j - 1)) <$> backtrack i (j - 1) | ||||
|       doRem i j = (:) (Remove, t1 V.! (b + i - 1)) <$> backtrack (i - 1) j | ||||
|       doKeep i j = (:) (Keep, t1 V.! (b + i - 1)) <$> backtrack (i - 1) (j - 1) | ||||
|       backtrack :: Int -> Int -> IO [(Op, Tok)] | ||||
|       backtrack 0 0 = pure [] | ||||
|       backtrack i 0 = doRem i 0 | ||||
|       backtrack 0 j = doAdd 0 j | ||||
|       backtrack i j = do | ||||
|         add <- M.read mtx (idx i (j - 1)) | ||||
|         rem <- M.read mtx (idx (i - 1) j) | ||||
|         keep <- M.read mtx (idx (i - 1) (j - 1)) | ||||
|         if toksMatch' (i - 1) (j - 1) && keep <= min add rem | ||||
|           then doKeep i j | ||||
|           else if add <= rem | ||||
|                  then doAdd i j | ||||
|                  else doRem i j | ||||
|   diff <- reverse <$> backtrack ms1 ms2 | ||||
|   return $ | ||||
|     map ((,) Keep) (take b $ V.toList t1) ++ | ||||
|     diff ++ map ((,) Keep) (drop e1 $ V.toList t1) | ||||
| 
 | ||||
| pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS] | ||||
| pprDiff d1 d2 = map (pprDiff1 d1 d2) | ||||
| 
 | ||||
| pprDiff1 d1 d2 (op, (tok, (i, j))) = | ||||
|   fromString pfx <> escNewlines (substr i j s) | ||||
|   where | ||||
|     pfx = | ||||
|       case (op, tok) of | ||||
|         (Add, True) -> "+|" | ||||
|         (Remove, True) -> "-|" | ||||
|         (Keep, True) -> " |" | ||||
|         (Add, False) -> "+." | ||||
|         (Remove, False) -> "-." | ||||
|         (Keep, False) -> " ." | ||||
|     s = | ||||
|       case op of | ||||
|         Add -> d2 | ||||
|         _ -> d1 | ||||
| 
 | ||||
| escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines' | ||||
| 
 | ||||
| escNewlines' s | ||||
|   | B.null s = mempty | ||||
|   | B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s) | ||||
|   | B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s) | ||||
|   | otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s) | ||||
| 
 | ||||
| hunks :: Int -> [(Op, Tok)] -> [Hunk] | ||||
| hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst) | ||||
|   where | ||||
|     go _ _ bi bj backlog [] = | ||||
|       if null backlog | ||||
|         then [] | ||||
|         else [((bi, bj), backlog)] | ||||
|     go i j bi bj backlog (g:gs) = | ||||
|       case fst (head g) of | ||||
|         Remove -> go (i + length g) j bi bj (backlog ++ g) gs | ||||
|         Add -> go i (j + length g) bi bj (backlog ++ g) gs | ||||
|         Keep -> | ||||
|           let lg = length g | ||||
|            in if lg <= | ||||
|                  (if null backlog || null g | ||||
|                     then ctxt | ||||
|                     else 2 * ctxt) | ||||
|                 then go (i + lg) (j + lg) bi bj (backlog ++ g) gs | ||||
|                 else (if null backlog | ||||
|                         then id | ||||
|                         else (:) ((bi, bj), backlog ++ take ctxt g)) $ | ||||
|                      go | ||||
|                        (i + lg) | ||||
|                        (j + lg) | ||||
|                        (i + lg - ctxt) | ||||
|                        (j + lg - ctxt) | ||||
|                        (if null gs | ||||
|                           then [] | ||||
|                           else drop (lg - ctxt) g) | ||||
|                        gs | ||||
| 
 | ||||
| hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" | ||||
| 
 | ||||
| lineSep = fromString "\n" | ||||
| 
 | ||||
| pprHunk :: BS -> BS -> Hunk -> BS | ||||
| pprHunk d1 d2 ((i, j), toks) = | ||||
|   B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks) | ||||
| 
 | ||||
| pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2) | ||||
|  | @ -1,23 +1,28 @@ | |||
| module Main where | ||||
| 
 | ||||
| import Data.ByteString.UTF8 (fromString) | ||||
| import qualified Data.ByteString.Char8 as B8 | ||||
| import Options.Applicative | ||||
| import Redfa | ||||
| import Version | ||||
| import Diff | ||||
| import qualified Data.Vector as V | ||||
| import System.IO.MMap | ||||
| 
 | ||||
| data DiffOptions = | ||||
|   DiffOptions | ||||
|     { diffRedfaOpt :: RedfaOption | ||||
|     , context :: Int | ||||
|     , diffFile1 :: String | ||||
|     , diffFile2 :: String | ||||
|     } | ||||
|   deriving (Show) | ||||
| 
 | ||||
| diffOpts = | ||||
|   DiffOptions <$> redfaOption <*> strArgument (metavar "FROMFILE") <*> | ||||
|   strArgument (metavar "TOFILE") | ||||
|   DiffOptions <$> redfaOption | ||||
|   <*> option auto (metavar "CONTEXT" <> short 'C' <> long "context" <> value 5) | ||||
|   <*> strArgument (metavar "FROMFILE") | ||||
|   <*> strArgument (metavar "TOFILE") | ||||
| 
 | ||||
| main :: IO () | ||||
| main = | ||||
|  | @ -34,4 +39,5 @@ main = | |||
|          data2 <- mmapFileByteString (diffFile2 o) Nothing | ||||
|          toks1 <- V.fromList <$> redfaTokenize redfa data1 | ||||
|          toks2 <- V.fromList <$> redfaTokenize redfa data2 | ||||
|          print $ toks1 | ||||
|          hs <- hunks (max 0 $ context o) <$> diffToks data1 data2 toks1 toks2 | ||||
|          B8.putStrLn $ pprHunks data1 data2 hs | ||||
|  |  | |||
|  | @ -15,6 +15,7 @@ import Data.Monoid | |||
| import qualified Data.Vector as V | ||||
| import Options.Applicative | ||||
| import Text.Regex.TDFA | ||||
| import Diff | ||||
| 
 | ||||
| type BS = B.ByteString | ||||
| 
 | ||||
|  | @ -132,7 +133,7 @@ redfaPrepareRules opt = do | |||
|           return $ RedfaRule rs rm b t | ||||
|   return $ RedfaSpec start (V.fromList ids) (V.fromList rules) | ||||
| 
 | ||||
| redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [(Bool, (Int, Int))] | ||||
| redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [Tok] | ||||
| redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 [] | ||||
| 
 | ||||
| redfaTokenize' :: | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue