implement diff
This commit is contained in:
		
							parent
							
								
									96ef2e732b
								
							
						
					
					
						commit
						7fc24d236c
					
				|  | @ -43,7 +43,7 @@ executable adiff | ||||||
|   main-is: MainDiff.hs |   main-is: MainDiff.hs | ||||||
| 
 | 
 | ||||||
|   -- Modules included in this executable, other than Main. |   -- Modules included in this executable, other than Main. | ||||||
|   other-modules: Redfa, Version |   other-modules: Redfa, Version, Diff | ||||||
| 
 | 
 | ||||||
|   -- LANGUAGE extensions used by modules in this package. |   -- LANGUAGE extensions used by modules in this package. | ||||||
|   other-extensions: CPP |   other-extensions: CPP | ||||||
|  |  | ||||||
							
								
								
									
										166
									
								
								src/Diff.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										166
									
								
								src/Diff.hs
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,166 @@ | ||||||
|  | module Diff | ||||||
|  |   ( Tok | ||||||
|  |   , Op | ||||||
|  |   , diffToks | ||||||
|  |   , hunks | ||||||
|  |   , pprDiff | ||||||
|  |   , pprDiff1 | ||||||
|  |   , pprHunk | ||||||
|  |   , pprHunks | ||||||
|  |   ) where | ||||||
|  | 
 | ||||||
|  | import Control.Monad | ||||||
|  | import qualified Data.ByteString as B | ||||||
|  | import qualified Data.ByteString.Builder as BB | ||||||
|  | import qualified Data.ByteString.Internal as BI | ||||||
|  | import qualified Data.ByteString.Lazy as BL | ||||||
|  | import Data.ByteString.UTF8 (fromString) | ||||||
|  | import Data.Function (on) | ||||||
|  | import Data.List (groupBy) | ||||||
|  | import qualified Data.Vector as V | ||||||
|  | import qualified Data.Vector.Unboxed.Mutable as M | ||||||
|  | 
 | ||||||
|  | type Tok = (Bool, (Int, Int)) | ||||||
|  | 
 | ||||||
|  | type Hunk = ((Int, Int), [(Op, Tok)]) | ||||||
|  | 
 | ||||||
|  | type BS = B.ByteString | ||||||
|  | 
 | ||||||
|  | data Op | ||||||
|  |   = Remove | ||||||
|  |   | Keep | ||||||
|  |   | Add | ||||||
|  |   deriving (Show, Eq) | ||||||
|  | 
 | ||||||
|  | substr b e = B.take (e - b) . B.drop b | ||||||
|  | 
 | ||||||
|  | toksMatch d1 d2 t1 t2 x y = | ||||||
|  |   let (tok1, (b1, e1)) = t1 V.! x | ||||||
|  |       (tok2, (b2, e2)) = t2 V.! y | ||||||
|  |    in tok1 == tok2 && substr b1 e1 d1 == substr b2 e2 d2 | ||||||
|  | 
 | ||||||
|  | stripEqToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> (Int, Int, Int) | ||||||
|  | stripEqToks d1 d2 t1 t2 = (b, l1 - e, l2 - e) | ||||||
|  |   where | ||||||
|  |     l1 = V.length t1 | ||||||
|  |     l2 = V.length t2 | ||||||
|  |     firstDiff i | ||||||
|  |       | i < l1 && i < l2 && toksMatch d1 d2 t1 t2 i i = firstDiff (i + 1) | ||||||
|  |       | otherwise = i | ||||||
|  |     b = firstDiff 0 | ||||||
|  |     lastDiff i | ||||||
|  |       | l1 - i - 1 >= b && | ||||||
|  |           l2 - i - 1 >= b && toksMatch d1 d2 t1 t2 (l1 - i - 1) (l2 - i - 1) = | ||||||
|  |         lastDiff (i + 1) | ||||||
|  |       | otherwise = i | ||||||
|  |     e = lastDiff 0 | ||||||
|  | 
 | ||||||
|  | diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)] | ||||||
|  | diffToks d1 d2 t1 t2 = do | ||||||
|  |   let (b, e1, e2) = stripEqToks d1 d2 t1 t2 | ||||||
|  |       ms1 = e1 - b | ||||||
|  |       ms2 = e2 - b | ||||||
|  |   mtx <- M.new ((ms1 + 1) * (ms2 + 1)) | ||||||
|  |   let idx x y = (ms2 + 1) * x + y | ||||||
|  |   forM_ [0 .. ms1] $ \i -> M.write mtx (idx i 0) i | ||||||
|  |   forM_ [0 .. ms2] $ \i -> M.write mtx (idx 0 i) i | ||||||
|  |   let toksMatch' x y = toksMatch d1 d2 t1 t2 (b + x) (b + y) | ||||||
|  |   forM_ [1 .. ms1] $ \i -> | ||||||
|  |     forM [1 .. ms2] $ \j -> do | ||||||
|  |       up <- M.read mtx (idx i (j - 1)) | ||||||
|  |       left <- M.read mtx (idx (i - 1) j) | ||||||
|  |       upleft <- M.read mtx (idx (i - 1) (j - 1)) | ||||||
|  |       M.write mtx (idx i j) $ | ||||||
|  |         minimum $ | ||||||
|  |         [up + 1, left + 1] ++ | ||||||
|  |         (if toksMatch' (i - 1) (j - 1) | ||||||
|  |            then [upleft] | ||||||
|  |            else []) | ||||||
|  |   let doAdd i j = (:) (Add, t2 V.! (b + j - 1)) <$> backtrack i (j - 1) | ||||||
|  |       doRem i j = (:) (Remove, t1 V.! (b + i - 1)) <$> backtrack (i - 1) j | ||||||
|  |       doKeep i j = (:) (Keep, t1 V.! (b + i - 1)) <$> backtrack (i - 1) (j - 1) | ||||||
|  |       backtrack :: Int -> Int -> IO [(Op, Tok)] | ||||||
|  |       backtrack 0 0 = pure [] | ||||||
|  |       backtrack i 0 = doRem i 0 | ||||||
|  |       backtrack 0 j = doAdd 0 j | ||||||
|  |       backtrack i j = do | ||||||
|  |         add <- M.read mtx (idx i (j - 1)) | ||||||
|  |         rem <- M.read mtx (idx (i - 1) j) | ||||||
|  |         keep <- M.read mtx (idx (i - 1) (j - 1)) | ||||||
|  |         if toksMatch' (i - 1) (j - 1) && keep <= min add rem | ||||||
|  |           then doKeep i j | ||||||
|  |           else if add <= rem | ||||||
|  |                  then doAdd i j | ||||||
|  |                  else doRem i j | ||||||
|  |   diff <- reverse <$> backtrack ms1 ms2 | ||||||
|  |   return $ | ||||||
|  |     map ((,) Keep) (take b $ V.toList t1) ++ | ||||||
|  |     diff ++ map ((,) Keep) (drop e1 $ V.toList t1) | ||||||
|  | 
 | ||||||
|  | pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS] | ||||||
|  | pprDiff d1 d2 = map (pprDiff1 d1 d2) | ||||||
|  | 
 | ||||||
|  | pprDiff1 d1 d2 (op, (tok, (i, j))) = | ||||||
|  |   fromString pfx <> escNewlines (substr i j s) | ||||||
|  |   where | ||||||
|  |     pfx = | ||||||
|  |       case (op, tok) of | ||||||
|  |         (Add, True) -> "+|" | ||||||
|  |         (Remove, True) -> "-|" | ||||||
|  |         (Keep, True) -> " |" | ||||||
|  |         (Add, False) -> "+." | ||||||
|  |         (Remove, False) -> "-." | ||||||
|  |         (Keep, False) -> " ." | ||||||
|  |     s = | ||||||
|  |       case op of | ||||||
|  |         Add -> d2 | ||||||
|  |         _ -> d1 | ||||||
|  | 
 | ||||||
|  | escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines' | ||||||
|  | 
 | ||||||
|  | escNewlines' s | ||||||
|  |   | B.null s = mempty | ||||||
|  |   | B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s) | ||||||
|  |   | B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s) | ||||||
|  |   | otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s) | ||||||
|  | 
 | ||||||
|  | hunks :: Int -> [(Op, Tok)] -> [Hunk] | ||||||
|  | hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst) | ||||||
|  |   where | ||||||
|  |     go _ _ bi bj backlog [] = | ||||||
|  |       if null backlog | ||||||
|  |         then [] | ||||||
|  |         else [((bi, bj), backlog)] | ||||||
|  |     go i j bi bj backlog (g:gs) = | ||||||
|  |       case fst (head g) of | ||||||
|  |         Remove -> go (i + length g) j bi bj (backlog ++ g) gs | ||||||
|  |         Add -> go i (j + length g) bi bj (backlog ++ g) gs | ||||||
|  |         Keep -> | ||||||
|  |           let lg = length g | ||||||
|  |            in if lg <= | ||||||
|  |                  (if null backlog || null g | ||||||
|  |                     then ctxt | ||||||
|  |                     else 2 * ctxt) | ||||||
|  |                 then go (i + lg) (j + lg) bi bj (backlog ++ g) gs | ||||||
|  |                 else (if null backlog | ||||||
|  |                         then id | ||||||
|  |                         else (:) ((bi, bj), backlog ++ take ctxt g)) $ | ||||||
|  |                      go | ||||||
|  |                        (i + lg) | ||||||
|  |                        (j + lg) | ||||||
|  |                        (i + lg - ctxt) | ||||||
|  |                        (j + lg - ctxt) | ||||||
|  |                        (if null gs | ||||||
|  |                           then [] | ||||||
|  |                           else drop (lg - ctxt) g) | ||||||
|  |                        gs | ||||||
|  | 
 | ||||||
|  | hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" | ||||||
|  | 
 | ||||||
|  | lineSep = fromString "\n" | ||||||
|  | 
 | ||||||
|  | pprHunk :: BS -> BS -> Hunk -> BS | ||||||
|  | pprHunk d1 d2 ((i, j), toks) = | ||||||
|  |   B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks) | ||||||
|  | 
 | ||||||
|  | pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2) | ||||||
|  | @ -1,23 +1,28 @@ | ||||||
| module Main where | module Main where | ||||||
| 
 | 
 | ||||||
| import Data.ByteString.UTF8 (fromString) | import Data.ByteString.UTF8 (fromString) | ||||||
|  | import qualified Data.ByteString.Char8 as B8 | ||||||
| import Options.Applicative | import Options.Applicative | ||||||
| import Redfa | import Redfa | ||||||
| import Version | import Version | ||||||
|  | import Diff | ||||||
| import qualified Data.Vector as V | import qualified Data.Vector as V | ||||||
| import System.IO.MMap | import System.IO.MMap | ||||||
| 
 | 
 | ||||||
| data DiffOptions = | data DiffOptions = | ||||||
|   DiffOptions |   DiffOptions | ||||||
|     { diffRedfaOpt :: RedfaOption |     { diffRedfaOpt :: RedfaOption | ||||||
|  |     , context :: Int | ||||||
|     , diffFile1 :: String |     , diffFile1 :: String | ||||||
|     , diffFile2 :: String |     , diffFile2 :: String | ||||||
|     } |     } | ||||||
|   deriving (Show) |   deriving (Show) | ||||||
| 
 | 
 | ||||||
| diffOpts = | diffOpts = | ||||||
|   DiffOptions <$> redfaOption <*> strArgument (metavar "FROMFILE") <*> |   DiffOptions <$> redfaOption | ||||||
|   strArgument (metavar "TOFILE") |   <*> option auto (metavar "CONTEXT" <> short 'C' <> long "context" <> value 5) | ||||||
|  |   <*> strArgument (metavar "FROMFILE") | ||||||
|  |   <*> strArgument (metavar "TOFILE") | ||||||
| 
 | 
 | ||||||
| main :: IO () | main :: IO () | ||||||
| main = | main = | ||||||
|  | @ -34,4 +39,5 @@ main = | ||||||
|          data2 <- mmapFileByteString (diffFile2 o) Nothing |          data2 <- mmapFileByteString (diffFile2 o) Nothing | ||||||
|          toks1 <- V.fromList <$> redfaTokenize redfa data1 |          toks1 <- V.fromList <$> redfaTokenize redfa data1 | ||||||
|          toks2 <- V.fromList <$> redfaTokenize redfa data2 |          toks2 <- V.fromList <$> redfaTokenize redfa data2 | ||||||
|          print $ toks1 |          hs <- hunks (max 0 $ context o) <$> diffToks data1 data2 toks1 toks2 | ||||||
|  |          B8.putStrLn $ pprHunks data1 data2 hs | ||||||
|  |  | ||||||
|  | @ -15,6 +15,7 @@ import Data.Monoid | ||||||
| import qualified Data.Vector as V | import qualified Data.Vector as V | ||||||
| import Options.Applicative | import Options.Applicative | ||||||
| import Text.Regex.TDFA | import Text.Regex.TDFA | ||||||
|  | import Diff | ||||||
| 
 | 
 | ||||||
| type BS = B.ByteString | type BS = B.ByteString | ||||||
| 
 | 
 | ||||||
|  | @ -132,7 +133,7 @@ redfaPrepareRules opt = do | ||||||
|           return $ RedfaRule rs rm b t |           return $ RedfaRule rs rm b t | ||||||
|   return $ RedfaSpec start (V.fromList ids) (V.fromList rules) |   return $ RedfaSpec start (V.fromList ids) (V.fromList rules) | ||||||
| 
 | 
 | ||||||
| redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [(Bool, (Int, Int))] | redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [Tok] | ||||||
| redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 [] | redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 [] | ||||||
| 
 | 
 | ||||||
| redfaTokenize' :: | redfaTokenize' :: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue