cleanup, change representation
This commit is contained in:
		
							parent
							
								
									4b5bac3541
								
							
						
					
					
						commit
						36cf2ba36f
					
				
							
								
								
									
										15
									
								
								adiff.cabal
									
									
									
									
									
								
							
							
						
						
									
										15
									
								
								adiff.cabal
									
									
									
									
									
								
							|  | @ -40,22 +40,31 @@ build-type: Simple | |||
| 
 | ||||
| executable adiff | ||||
|   -- .hs or .lhs file containing the Main module. | ||||
|   main-is: MainDiff.hs | ||||
|   main-is: Main.hs | ||||
| 
 | ||||
|   -- Modules included in this executable, other than Main. | ||||
|   other-modules: Redfa, Version, Diff, Merge | ||||
|   other-modules: Diff, | ||||
|                  Diff3, | ||||
|                  Format, | ||||
|                  Merge, | ||||
|                  Patch, | ||||
|                  Redfa, | ||||
|                  Substr, | ||||
|                  Types, | ||||
|                  Version | ||||
| 
 | ||||
|   -- LANGUAGE extensions used by modules in this package. | ||||
|   other-extensions: CPP | ||||
| 
 | ||||
|   -- Other library packages from which modules are imported. | ||||
|   build-depends: base ^>=4.13.0.0, | ||||
|                  extra ^>= 1.7, | ||||
|                  mmap ^>=0.5, | ||||
|                  regex-tdfa ^>= 1.3, | ||||
|                  optparse-applicative ^>=0.16, | ||||
|                  bytestring ^>= 0.10.12, | ||||
|                  vector ^>=0.12, | ||||
| 		 utf8-string ^>=1.0 | ||||
|                  utf8-string ^>=1.0 | ||||
| 
 | ||||
|   -- Directories containing source files. | ||||
|   hs-source-dirs: src | ||||
|  |  | |||
							
								
								
									
										158
									
								
								src/Diff.hs
									
									
									
									
									
								
							
							
						
						
									
										158
									
								
								src/Diff.hs
									
									
									
									
									
								
							|  | @ -3,10 +3,6 @@ module Diff | |||
|   , Op | ||||
|   , diffToks | ||||
|   , hunks | ||||
|   , pprDiff | ||||
|   , pprDiff1 | ||||
|   , pprHunk | ||||
|   , pprHunks | ||||
|   ) where | ||||
| 
 | ||||
| import Control.Monad | ||||
|  | @ -16,31 +12,17 @@ import qualified Data.ByteString.Internal as BI | |||
| import qualified Data.ByteString.Lazy as BL | ||||
| import Data.ByteString.UTF8 (fromString) | ||||
| import Data.Function (on) | ||||
| import Data.List (groupBy) | ||||
| import Data.List (groupBy, mapAccumL) | ||||
| import Data.List.Extra (split, takeEnd) | ||||
| import qualified Data.Vector as V | ||||
| import qualified Data.Vector.Unboxed.Mutable as M | ||||
| 
 | ||||
| type Tok = (Bool, (Int, Int)) | ||||
| 
 | ||||
| type Diff = [(Op, Tok)] | ||||
| 
 | ||||
| type Hunk = ((Int, Int), [(Op, Tok)]) | ||||
| 
 | ||||
| type BS = B.ByteString | ||||
| 
 | ||||
| type TV = V.Vector Tok | ||||
| 
 | ||||
| data Op | ||||
|   = Remove | ||||
|   | Keep | ||||
|   | Add | ||||
|   deriving (Show, Eq) | ||||
| import Debug.Trace | ||||
| import Substr | ||||
| import Types | ||||
| 
 | ||||
| data DiffEnv = | ||||
|   DiffEnv | ||||
|     { deD1 :: BS | ||||
|     , deD2 :: BS | ||||
|     , deT1 :: TV | ||||
|     { deT1 :: TV | ||||
|     , deT2 :: TV | ||||
|     , deS :: Int | ||||
|     , deE :: Int | ||||
|  | @ -54,28 +36,21 @@ data DiffEnv = | |||
|     } | ||||
|   deriving (Show) | ||||
| 
 | ||||
| substr b e = B.take (e - b) . B.drop b | ||||
| toksMatch x y DiffEnv {deT1 = t1, deT2 = t2} = | ||||
|   t1 V.! x == t2 V.! y | ||||
| 
 | ||||
| toksMatch x y DiffEnv {deT1 = t1, deT2 = t2, deD1 = d1, deD2 = d2} = | ||||
|   toksMatch' x y d1 d2 t1 t2 | ||||
| 
 | ||||
| toksMatch' x y d1 d2 t1 t2 = | ||||
|   let (isTok1, (b1, e1)) = t1 V.! x | ||||
|       (isTok2, (b2, e2)) = t2 V.! y | ||||
|    in isTok1 == isTok2 && substr b1 e1 d1 == substr b2 e2 d2 | ||||
| 
 | ||||
| stripEqToks :: BS -> BS -> TV -> TV -> (Diff, Diff, TV, TV) | ||||
| stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2') | ||||
| stripEqToks :: TV -> TV -> (Diff, Diff, TV, TV) | ||||
| stripEqToks t1 t2 = (pre, post, t1', t2') | ||||
|   where | ||||
|     l1 = V.length t1 | ||||
|     l2 = V.length t2 | ||||
|     firstDiff i | ||||
|       | i < l1 && i < l2 && toksMatch' i i d1 d2 t1 t2 = firstDiff (i + 1) | ||||
|       | i < l1 && i < l2 && (t1 V.! i == t2 V.! i) = firstDiff (i + 1) | ||||
|       | otherwise = i | ||||
|     b = firstDiff 0 | ||||
|     lastDiff i | ||||
|       | l1 - i - 1 >= b && | ||||
|           l2 - i - 1 >= b && toksMatch' (l1 - i - 1) (l2 - i - 1) d1 d2 t1 t2 = | ||||
|           l2 - i - 1 >= b && t1 V.! (l1 - i - 1) == t2 V.! (l2 - i - 1)= | ||||
|         lastDiff (i + 1) | ||||
|       | otherwise = i | ||||
|     e = lastDiff 0 | ||||
|  | @ -84,19 +59,17 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2') | |||
|     t1' = V.slice b (l1 - e - b) t1 | ||||
|     t2' = V.slice b (l2 - e - b) t2 | ||||
| 
 | ||||
| diffToks :: BS -> BS -> TV -> TV -> Diff | ||||
| diffToks d1 d2 t1' t2' = pre ++ res ++ post | ||||
| diffToks :: TV -> TV -> Diff | ||||
| diffToks t1' t2' = pre ++ res ++ post | ||||
|   where | ||||
|     (pre, post, t1, t2) = stripEqToks d1 d2 t1' t2' | ||||
|     (pre, post, t1, t2) = stripEqToks t1' t2' | ||||
|     res | ||||
|       | V.null t1 = map (\t -> (Add, t)) (V.toList t2) | ||||
|       | V.null t2 = map (\t -> (Remove, t)) (V.toList t1) | ||||
|       | V.length t1 >= V.length t2 = | ||||
|         diffToks' $ | ||||
|         DiffEnv | ||||
|           { deD1 = d1 | ||||
|           , deD2 = d2 | ||||
|           , deT1 = t1 | ||||
|           { deT1 = t1 | ||||
|           , deT2 = t2 | ||||
|           , deS = 0 | ||||
|           , deE = V.length t1 | ||||
|  | @ -111,9 +84,7 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post | |||
|       | otherwise = | ||||
|         diffToks' $ | ||||
|         DiffEnv | ||||
|           { deD1 = d2 | ||||
|           , deD2 = d1 | ||||
|           , deT1 = t2 | ||||
|           { deT1 = t2 | ||||
|           , deT2 = t1 | ||||
|           , deS = 0 | ||||
|           , deE = V.length t2 | ||||
|  | @ -151,10 +122,10 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | |||
|     vecEmid = vecE mid | ||||
|     extraScore i = | ||||
|       if isToken | ||||
|         then -(tokEnd - tokBegin) | ||||
|         then -(B.length s) | ||||
|         else 0 | ||||
|       where | ||||
|         (isToken, (tokBegin, tokEnd)) = deT1 de V.! i | ||||
|         (isToken, s) = deT1 de V.! i | ||||
|     vecS = vec -- "forward" operation | ||||
|       where | ||||
|         vec i | ||||
|  | @ -291,70 +262,31 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} = | |||
|         diffToks' de {deE = mid, deVE = vecEmid, deB = opt} ++ | ||||
|         diffToks' de {deS = mid, deVS = vecSmid, deA = opt} | ||||
| 
 | ||||
| pprDiff :: BS -> BS -> Diff -> [BS] | ||||
| pprDiff d1 d2 = map (pprDiff1 d1 d2) | ||||
| 
 | ||||
| pprDiff1 d1 d2 (op, (tok, (i, j))) = | ||||
|   fromString pfx <> escNewlines (substr i j s) | ||||
|   where | ||||
|     pfx = | ||||
|       case (op, tok) of | ||||
|         (Add, True) -> "+|" | ||||
|         (Remove, True) -> "-|" | ||||
|         (Keep, True) -> " |" | ||||
|         (Add, False) -> "+." | ||||
|         (Remove, False) -> "-." | ||||
|         (Keep, False) -> " ." | ||||
|     s = | ||||
|       case op of | ||||
|         Add -> d2 | ||||
|         _ -> d1 | ||||
| 
 | ||||
| escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines' | ||||
| 
 | ||||
| escNewlines' s | ||||
|   | B.null s = mempty | ||||
|   | B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s) | ||||
|   | B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s) | ||||
|   | otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s) | ||||
| 
 | ||||
| hunks :: Int -> Diff -> [Hunk] | ||||
| hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst) | ||||
| hunks ctxt = | ||||
|   map (stripNums . concat) . | ||||
|   split null . | ||||
|   concat . check . map breakKeeps . groupBy ((==) `on` fst . snd) . addNums | ||||
|   where | ||||
|     go _ _ bi bj backlog [] = | ||||
|       if null backlog | ||||
|         then [] | ||||
|         else [((bi, bj), backlog)] | ||||
|     go i j bi bj backlog (g:gs) = | ||||
|       case fst (head g) of | ||||
|         Remove -> go (i + length g) j bi bj (backlog ++ g) gs | ||||
|         Add -> go i (j + length g) bi bj (backlog ++ g) gs | ||||
|         Keep -> | ||||
|           let lg = length g | ||||
|            in if lg <= | ||||
|                  (if null backlog || null g | ||||
|                     then ctxt | ||||
|                     else 2 * ctxt) | ||||
|                 then go (i + lg) (j + lg) bi bj (backlog ++ g) gs | ||||
|                 else (if null backlog | ||||
|                         then id | ||||
|                         else (:) ((bi, bj), backlog ++ take ctxt g)) $ | ||||
|                      go | ||||
|                        (i + lg) | ||||
|                        (j + lg) | ||||
|                        (i + lg - ctxt) | ||||
|                        (j + lg - ctxt) | ||||
|                        (if null gs | ||||
|                           then [] | ||||
|                           else drop (lg - ctxt) g) | ||||
|                        gs | ||||
| 
 | ||||
| hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" | ||||
| 
 | ||||
| lineSep = fromString "\n" | ||||
| 
 | ||||
| pprHunk :: BS -> BS -> Hunk -> BS | ||||
| pprHunk d1 d2 ((i, j), toks) = | ||||
|   B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks) | ||||
| 
 | ||||
| pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2) | ||||
|     addNums = snd . mapAccumL countTok (0, 0) | ||||
|     countTok x@(i, j) d@(op, _) = | ||||
|       (,) | ||||
|         (case op of | ||||
|            Remove -> (i + 1, j) | ||||
|            Keep -> (i + 1, j + 1) | ||||
|            Add -> (i, j + 1)) | ||||
|         (x, d) | ||||
|     stripNums = (,) <$> fst . head <*> map snd | ||||
|     breakKeeps ks@((_, (Keep, _)):_) = | ||||
|       let (a, b') = splitAt ctxt ks | ||||
|           (b, c) = splitAt ctxt b' | ||||
|        in if null c | ||||
|             then [ks] | ||||
|             else [a, [], takeEnd ctxt b'] | ||||
|     breakKeeps a = [a] | ||||
|     check ([_, [], _]:[]) = [] | ||||
|     check ([_, [], a]:xs) = [a] : checkLast xs | ||||
|     check a = checkLast a | ||||
|     checkLast [] = [] | ||||
|     checkLast ([a, [], _]:[]) = [[a]] | ||||
|     checkLast (a:xs) = a:checkLast xs | ||||
|  |  | |||
							
								
								
									
										7
									
								
								src/Diff3.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								src/Diff3.hs
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,7 @@ | |||
| 
 | ||||
| module Diff3 where | ||||
| 
 | ||||
| import Types | ||||
| 
 | ||||
| diff3Toks :: BS -> BS -> BS -> TV -> TV -> TV -> a | ||||
| diff3Toks = undefined | ||||
							
								
								
									
										45
									
								
								src/Format.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								src/Format.hs
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,45 @@ | |||
| {-# LANGUAGE OverloadedStrings #-} | ||||
| 
 | ||||
| module Format where | ||||
| 
 | ||||
| import Types | ||||
| 
 | ||||
| import qualified Data.ByteString as B | ||||
| import qualified Data.ByteString.Builder as BB | ||||
| import qualified Data.ByteString.Internal as BI | ||||
| import qualified Data.ByteString.Lazy as BL | ||||
| import Data.String | ||||
| import Substr | ||||
| 
 | ||||
| pprHunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" | ||||
| 
 | ||||
| lineSep = fromString "\n" | ||||
| 
 | ||||
| pprHunk :: Hunk -> BS | ||||
| pprHunk ((i, j), toks) = | ||||
|   B.intercalate lineSep (pprHunkHdr i j : pprDiff toks) | ||||
| 
 | ||||
| pprHunks = B.intercalate lineSep . map pprHunk | ||||
| 
 | ||||
| pprDiff :: Diff -> [BS] | ||||
| pprDiff = map pprDiff1 | ||||
| 
 | ||||
| pprDiff1 (op, (tok, s)) = | ||||
|   fromString pfx <> escNewlines s | ||||
|   where | ||||
|     pfx = | ||||
|       case (op, tok) of | ||||
|         (Add, True) -> "+|" | ||||
|         (Remove, True) -> "-|" | ||||
|         (Keep, True) -> " |" | ||||
|         (Add, False) -> "+." | ||||
|         (Remove, False) -> "-." | ||||
|         (Keep, False) -> " ." | ||||
| 
 | ||||
| escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines' | ||||
| 
 | ||||
| escNewlines' s | ||||
|   | B.null s = mempty | ||||
|   | B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s) | ||||
|   | B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s) | ||||
|   | otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s) | ||||
|  | @ -4,6 +4,7 @@ import qualified Data.ByteString as B | |||
| import qualified Data.ByteString.Char8 as B8 | ||||
| import Data.ByteString.UTF8 (fromString) | ||||
| import qualified Data.Vector as V | ||||
| import Format | ||||
| import Diff | ||||
| import Merge | ||||
| import Options.Applicative | ||||
|  | @ -112,6 +113,6 @@ main = | |||
|              data2 <- mmapFileByteString f2 Nothing | ||||
|              toks1 <- V.fromList <$> redfaTokenize redfa data1 | ||||
|              toks2 <- V.fromList <$> redfaTokenize redfa data2 | ||||
|              let hs = hunks (max 0 ctxt) $ diffToks data1 data2 toks1 toks2 | ||||
|              B8.putStrLn $ pprHunks data1 data2 hs | ||||
|              let hs = hunks (max 0 ctxt) $ diffToks toks1 toks2 | ||||
|              B8.putStrLn $ pprHunks hs | ||||
|            CmdPatch {} -> putStrLn "not supported yet" | ||||
							
								
								
									
										67
									
								
								src/Merge.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								src/Merge.hs
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,67 @@ | |||
| module Merge | ||||
|   ( MergeOpts | ||||
|   , mergeOption | ||||
|   ) where | ||||
| 
 | ||||
| import qualified Data.ByteString as B | ||||
| import Data.String | ||||
| import Options.Applicative | ||||
| import Types | ||||
| 
 | ||||
| data MergeOpts = | ||||
|   MergeOpts | ||||
|     { mergeDoMerge :: Bool | ||||
|     , mergeForceWhitespace :: Bool | ||||
|     , mergeKeepWhitespace :: Bool | ||||
|     , mergeCStartStr :: BS | ||||
|     , mergeMineSepStr :: BS | ||||
|     , mergeYourSepStr :: BS | ||||
|     , mergeEndStr :: BS | ||||
|     } | ||||
|   deriving (Show) | ||||
| 
 | ||||
| marker = fromString . replicate 7 | ||||
| 
 | ||||
| mergeOption forPatch = | ||||
|   MergeOpts <$> | ||||
|   switch | ||||
|     (short 'm' <> | ||||
|      long "merge" <> | ||||
|      help | ||||
|        (if forPatch | ||||
|           then "Merge using conflict markers instead of printing the rejected hunks" | ||||
|           else "Output the merged file instead of the patch")) <*> | ||||
|   switch | ||||
|     (short 'w' <> | ||||
|      long "whitespace" <> | ||||
|      help | ||||
|        ((if forPatch | ||||
|            then "Force rejecting a thunk" | ||||
|            else "Force a merge conflict") ++ | ||||
|         " on whitespace mismatch")) <*> | ||||
|   switch | ||||
|     (short 'k' <> | ||||
|      long "keep-whitespace" <> | ||||
|      help | ||||
|        ("On whitespace mismatch, default to the version from " ++ | ||||
|         (if forPatch | ||||
|            then "original file" | ||||
|            else "MYFILE") ++ | ||||
|         " instead of the one from " ++ | ||||
|         (if forPatch | ||||
|            then "patch" | ||||
|            else "YOURFILE"))) <*> | ||||
|   strOption | ||||
|     (long "merge-start" <> | ||||
|      value (marker '<') <> help "Marker for the beginning of a conflict") <*> | ||||
|   strOption | ||||
|     (long "merge-mine" <> | ||||
|      value (marker '|') <> | ||||
|      help "Marker that separates `mine' from `original' part of the conflict") <*> | ||||
|   strOption | ||||
|     (long "merge-your" <> | ||||
|      value (marker '=') <> | ||||
|      help "Marker that separates `original' from `your' part of the conflict") <*> | ||||
|   strOption | ||||
|     (long "merge-end" <> | ||||
|      value (marker '>') <> help "Marker for the end of a conflict") | ||||
							
								
								
									
										4
									
								
								src/Patch.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								src/Patch.hs
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,4 @@ | |||
| module Patch where | ||||
| 
 | ||||
| patchToks :: a | ||||
| patchToks = undefined | ||||
|  | @ -16,6 +16,7 @@ import qualified Data.Vector as V | |||
| import Options.Applicative | ||||
| import Text.Regex.TDFA | ||||
| import Diff | ||||
| import Substr | ||||
| 
 | ||||
| type BS = B.ByteString | ||||
| 
 | ||||
|  | @ -143,7 +144,7 @@ redfaTokenize' :: | |||
|   -> Int | ||||
|   -> Int | ||||
|   -> [Int] | ||||
|   -> m [(Bool, (Int, Int))] | ||||
|   -> m [Tok] | ||||
| redfaTokenize' spec s state off visited | ||||
|   | off >= B.length s = pure [] | ||||
|   | otherwise = | ||||
|  | @ -168,7 +169,7 @@ redfaTokenize' spec s state off visited | |||
|           ((rule, (_, len)):_) -> | ||||
|             let matchLen = len - ooff | ||||
|              in (if matchLen > 0 | ||||
|                    then (:) (rrIsToken rule, (off, off + matchLen)) | ||||
|                    then (:) (rrIsToken rule, substr off matchLen s) | ||||
|                    else id) <$> | ||||
|                 redfaTokenize' | ||||
|                   spec | ||||
|  |  | |||
							
								
								
									
										7
									
								
								src/Substr.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								src/Substr.hs
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,7 @@ | |||
| module Substr where  | ||||
| 
 | ||||
| import Types | ||||
| import qualified Data.ByteString as B | ||||
| 
 | ||||
| substr :: Int -> Int -> BS -> BS | ||||
| substr b l = B.take l . B.drop b | ||||
							
								
								
									
										20
									
								
								src/Types.hs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								src/Types.hs
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,20 @@ | |||
| module Types where | ||||
| 
 | ||||
| import Data.ByteString | ||||
| import Data.Vector | ||||
| 
 | ||||
| type BS = ByteString | ||||
| 
 | ||||
| type Tok = (Bool, BS) | ||||
| 
 | ||||
| type TV = Vector Tok | ||||
| 
 | ||||
| type Diff = [(Op, Tok)] | ||||
| 
 | ||||
| type Hunk = ((Int, Int), Diff) | ||||
| 
 | ||||
| data Op | ||||
|   = Remove | ||||
|   | Keep | ||||
|   | Add | ||||
|   deriving (Show, Eq) | ||||
		Loading…
	
		Reference in a new issue