cleanup, change representation

This commit is contained in:
Mirek Kratochvil 2020-09-26 23:09:26 +02:00
parent 4b5bac3541
commit 36cf2ba36f
10 changed files with 213 additions and 120 deletions

View file

@ -40,22 +40,31 @@ build-type: Simple
executable adiff executable adiff
-- .hs or .lhs file containing the Main module. -- .hs or .lhs file containing the Main module.
main-is: MainDiff.hs main-is: Main.hs
-- Modules included in this executable, other than Main. -- Modules included in this executable, other than Main.
other-modules: Redfa, Version, Diff, Merge other-modules: Diff,
Diff3,
Format,
Merge,
Patch,
Redfa,
Substr,
Types,
Version
-- LANGUAGE extensions used by modules in this package. -- LANGUAGE extensions used by modules in this package.
other-extensions: CPP other-extensions: CPP
-- Other library packages from which modules are imported. -- Other library packages from which modules are imported.
build-depends: base ^>=4.13.0.0, build-depends: base ^>=4.13.0.0,
extra ^>= 1.7,
mmap ^>=0.5, mmap ^>=0.5,
regex-tdfa ^>= 1.3, regex-tdfa ^>= 1.3,
optparse-applicative ^>=0.16, optparse-applicative ^>=0.16,
bytestring ^>= 0.10.12, bytestring ^>= 0.10.12,
vector ^>=0.12, vector ^>=0.12,
utf8-string ^>=1.0 utf8-string ^>=1.0
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src

View file

@ -3,10 +3,6 @@ module Diff
, Op , Op
, diffToks , diffToks
, hunks , hunks
, pprDiff
, pprDiff1
, pprHunk
, pprHunks
) where ) where
import Control.Monad import Control.Monad
@ -16,31 +12,17 @@ import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Lazy as BL
import Data.ByteString.UTF8 (fromString) import Data.ByteString.UTF8 (fromString)
import Data.Function (on) import Data.Function (on)
import Data.List (groupBy) import Data.List (groupBy, mapAccumL)
import Data.List.Extra (split, takeEnd)
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified Data.Vector.Unboxed.Mutable as M import qualified Data.Vector.Unboxed.Mutable as M
import Debug.Trace
type Tok = (Bool, (Int, Int)) import Substr
import Types
type Diff = [(Op, Tok)]
type Hunk = ((Int, Int), [(Op, Tok)])
type BS = B.ByteString
type TV = V.Vector Tok
data Op
= Remove
| Keep
| Add
deriving (Show, Eq)
data DiffEnv = data DiffEnv =
DiffEnv DiffEnv
{ deD1 :: BS { deT1 :: TV
, deD2 :: BS
, deT1 :: TV
, deT2 :: TV , deT2 :: TV
, deS :: Int , deS :: Int
, deE :: Int , deE :: Int
@ -54,28 +36,21 @@ data DiffEnv =
} }
deriving (Show) deriving (Show)
substr b e = B.take (e - b) . B.drop b toksMatch x y DiffEnv {deT1 = t1, deT2 = t2} =
t1 V.! x == t2 V.! y
toksMatch x y DiffEnv {deT1 = t1, deT2 = t2, deD1 = d1, deD2 = d2} = stripEqToks :: TV -> TV -> (Diff, Diff, TV, TV)
toksMatch' x y d1 d2 t1 t2 stripEqToks t1 t2 = (pre, post, t1', t2')
toksMatch' x y d1 d2 t1 t2 =
let (isTok1, (b1, e1)) = t1 V.! x
(isTok2, (b2, e2)) = t2 V.! y
in isTok1 == isTok2 && substr b1 e1 d1 == substr b2 e2 d2
stripEqToks :: BS -> BS -> TV -> TV -> (Diff, Diff, TV, TV)
stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2')
where where
l1 = V.length t1 l1 = V.length t1
l2 = V.length t2 l2 = V.length t2
firstDiff i firstDiff i
| i < l1 && i < l2 && toksMatch' i i d1 d2 t1 t2 = firstDiff (i + 1) | i < l1 && i < l2 && (t1 V.! i == t2 V.! i) = firstDiff (i + 1)
| otherwise = i | otherwise = i
b = firstDiff 0 b = firstDiff 0
lastDiff i lastDiff i
| l1 - i - 1 >= b && | l1 - i - 1 >= b &&
l2 - i - 1 >= b && toksMatch' (l1 - i - 1) (l2 - i - 1) d1 d2 t1 t2 = l2 - i - 1 >= b && t1 V.! (l1 - i - 1) == t2 V.! (l2 - i - 1)=
lastDiff (i + 1) lastDiff (i + 1)
| otherwise = i | otherwise = i
e = lastDiff 0 e = lastDiff 0
@ -84,19 +59,17 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2')
t1' = V.slice b (l1 - e - b) t1 t1' = V.slice b (l1 - e - b) t1
t2' = V.slice b (l2 - e - b) t2 t2' = V.slice b (l2 - e - b) t2
diffToks :: BS -> BS -> TV -> TV -> Diff diffToks :: TV -> TV -> Diff
diffToks d1 d2 t1' t2' = pre ++ res ++ post diffToks t1' t2' = pre ++ res ++ post
where where
(pre, post, t1, t2) = stripEqToks d1 d2 t1' t2' (pre, post, t1, t2) = stripEqToks t1' t2'
res res
| V.null t1 = map (\t -> (Add, t)) (V.toList t2) | V.null t1 = map (\t -> (Add, t)) (V.toList t2)
| V.null t2 = map (\t -> (Remove, t)) (V.toList t1) | V.null t2 = map (\t -> (Remove, t)) (V.toList t1)
| V.length t1 >= V.length t2 = | V.length t1 >= V.length t2 =
diffToks' $ diffToks' $
DiffEnv DiffEnv
{ deD1 = d1 { deT1 = t1
, deD2 = d2
, deT1 = t1
, deT2 = t2 , deT2 = t2
, deS = 0 , deS = 0
, deE = V.length t1 , deE = V.length t1
@ -111,9 +84,7 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post
| otherwise = | otherwise =
diffToks' $ diffToks' $
DiffEnv DiffEnv
{ deD1 = d2 { deT1 = t2
, deD2 = d1
, deT1 = t2
, deT2 = t1 , deT2 = t1
, deS = 0 , deS = 0
, deE = V.length t2 , deE = V.length t2
@ -151,10 +122,10 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
vecEmid = vecE mid vecEmid = vecE mid
extraScore i = extraScore i =
if isToken if isToken
then -(tokEnd - tokBegin) then -(B.length s)
else 0 else 0
where where
(isToken, (tokBegin, tokEnd)) = deT1 de V.! i (isToken, s) = deT1 de V.! i
vecS = vec -- "forward" operation vecS = vec -- "forward" operation
where where
vec i vec i
@ -291,70 +262,31 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
diffToks' de {deE = mid, deVE = vecEmid, deB = opt} ++ diffToks' de {deE = mid, deVE = vecEmid, deB = opt} ++
diffToks' de {deS = mid, deVS = vecSmid, deA = opt} diffToks' de {deS = mid, deVS = vecSmid, deA = opt}
pprDiff :: BS -> BS -> Diff -> [BS]
pprDiff d1 d2 = map (pprDiff1 d1 d2)
pprDiff1 d1 d2 (op, (tok, (i, j))) =
fromString pfx <> escNewlines (substr i j s)
where
pfx =
case (op, tok) of
(Add, True) -> "+|"
(Remove, True) -> "-|"
(Keep, True) -> " |"
(Add, False) -> "+."
(Remove, False) -> "-."
(Keep, False) -> " ."
s =
case op of
Add -> d2
_ -> d1
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
escNewlines' s
| B.null s = mempty
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)
hunks :: Int -> Diff -> [Hunk] hunks :: Int -> Diff -> [Hunk]
hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst) hunks ctxt =
map (stripNums . concat) .
split null .
concat . check . map breakKeeps . groupBy ((==) `on` fst . snd) . addNums
where where
go _ _ bi bj backlog [] = addNums = snd . mapAccumL countTok (0, 0)
if null backlog countTok x@(i, j) d@(op, _) =
then [] (,)
else [((bi, bj), backlog)] (case op of
go i j bi bj backlog (g:gs) = Remove -> (i + 1, j)
case fst (head g) of Keep -> (i + 1, j + 1)
Remove -> go (i + length g) j bi bj (backlog ++ g) gs Add -> (i, j + 1))
Add -> go i (j + length g) bi bj (backlog ++ g) gs (x, d)
Keep -> stripNums = (,) <$> fst . head <*> map snd
let lg = length g breakKeeps ks@((_, (Keep, _)):_) =
in if lg <= let (a, b') = splitAt ctxt ks
(if null backlog || null g (b, c) = splitAt ctxt b'
then ctxt in if null c
else 2 * ctxt) then [ks]
then go (i + lg) (j + lg) bi bj (backlog ++ g) gs else [a, [], takeEnd ctxt b']
else (if null backlog breakKeeps a = [a]
then id check ([_, [], _]:[]) = []
else (:) ((bi, bj), backlog ++ take ctxt g)) $ check ([_, [], a]:xs) = [a] : checkLast xs
go check a = checkLast a
(i + lg) checkLast [] = []
(j + lg) checkLast ([a, [], _]:[]) = [[a]]
(i + lg - ctxt) checkLast (a:xs) = a:checkLast xs
(j + lg - ctxt)
(if null gs
then []
else drop (lg - ctxt) g)
gs
hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
lineSep = fromString "\n"
pprHunk :: BS -> BS -> Hunk -> BS
pprHunk d1 d2 ((i, j), toks) =
B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks)
pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2)

7
src/Diff3.hs Normal file
View file

@ -0,0 +1,7 @@
module Diff3 where
import Types
diff3Toks :: BS -> BS -> BS -> TV -> TV -> TV -> a
diff3Toks = undefined

45
src/Format.hs Normal file
View file

@ -0,0 +1,45 @@
{-# LANGUAGE OverloadedStrings #-}
module Format where
import Types
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL
import Data.String
import Substr
pprHunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
lineSep = fromString "\n"
pprHunk :: Hunk -> BS
pprHunk ((i, j), toks) =
B.intercalate lineSep (pprHunkHdr i j : pprDiff toks)
pprHunks = B.intercalate lineSep . map pprHunk
pprDiff :: Diff -> [BS]
pprDiff = map pprDiff1
pprDiff1 (op, (tok, s)) =
fromString pfx <> escNewlines s
where
pfx =
case (op, tok) of
(Add, True) -> "+|"
(Remove, True) -> "-|"
(Keep, True) -> " |"
(Add, False) -> "+."
(Remove, False) -> "-."
(Keep, False) -> " ."
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
escNewlines' s
| B.null s = mempty
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)

View file

@ -4,6 +4,7 @@ import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Char8 as B8
import Data.ByteString.UTF8 (fromString) import Data.ByteString.UTF8 (fromString)
import qualified Data.Vector as V import qualified Data.Vector as V
import Format
import Diff import Diff
import Merge import Merge
import Options.Applicative import Options.Applicative
@ -112,6 +113,6 @@ main =
data2 <- mmapFileByteString f2 Nothing data2 <- mmapFileByteString f2 Nothing
toks1 <- V.fromList <$> redfaTokenize redfa data1 toks1 <- V.fromList <$> redfaTokenize redfa data1
toks2 <- V.fromList <$> redfaTokenize redfa data2 toks2 <- V.fromList <$> redfaTokenize redfa data2
let hs = hunks (max 0 ctxt) $ diffToks data1 data2 toks1 toks2 let hs = hunks (max 0 ctxt) $ diffToks toks1 toks2
B8.putStrLn $ pprHunks data1 data2 hs B8.putStrLn $ pprHunks hs
CmdPatch {} -> putStrLn "not supported yet" CmdPatch {} -> putStrLn "not supported yet"

67
src/Merge.hs Normal file
View file

@ -0,0 +1,67 @@
module Merge
( MergeOpts
, mergeOption
) where
import qualified Data.ByteString as B
import Data.String
import Options.Applicative
import Types
data MergeOpts =
MergeOpts
{ mergeDoMerge :: Bool
, mergeForceWhitespace :: Bool
, mergeKeepWhitespace :: Bool
, mergeCStartStr :: BS
, mergeMineSepStr :: BS
, mergeYourSepStr :: BS
, mergeEndStr :: BS
}
deriving (Show)
marker = fromString . replicate 7
mergeOption forPatch =
MergeOpts <$>
switch
(short 'm' <>
long "merge" <>
help
(if forPatch
then "Merge using conflict markers instead of printing the rejected hunks"
else "Output the merged file instead of the patch")) <*>
switch
(short 'w' <>
long "whitespace" <>
help
((if forPatch
then "Force rejecting a thunk"
else "Force a merge conflict") ++
" on whitespace mismatch")) <*>
switch
(short 'k' <>
long "keep-whitespace" <>
help
("On whitespace mismatch, default to the version from " ++
(if forPatch
then "original file"
else "MYFILE") ++
" instead of the one from " ++
(if forPatch
then "patch"
else "YOURFILE"))) <*>
strOption
(long "merge-start" <>
value (marker '<') <> help "Marker for the beginning of a conflict") <*>
strOption
(long "merge-mine" <>
value (marker '|') <>
help "Marker that separates `mine' from `original' part of the conflict") <*>
strOption
(long "merge-your" <>
value (marker '=') <>
help "Marker that separates `original' from `your' part of the conflict") <*>
strOption
(long "merge-end" <>
value (marker '>') <> help "Marker for the end of a conflict")

4
src/Patch.hs Normal file
View file

@ -0,0 +1,4 @@
module Patch where
patchToks :: a
patchToks = undefined

View file

@ -16,6 +16,7 @@ import qualified Data.Vector as V
import Options.Applicative import Options.Applicative
import Text.Regex.TDFA import Text.Regex.TDFA
import Diff import Diff
import Substr
type BS = B.ByteString type BS = B.ByteString
@ -143,7 +144,7 @@ redfaTokenize' ::
-> Int -> Int
-> Int -> Int
-> [Int] -> [Int]
-> m [(Bool, (Int, Int))] -> m [Tok]
redfaTokenize' spec s state off visited redfaTokenize' spec s state off visited
| off >= B.length s = pure [] | off >= B.length s = pure []
| otherwise = | otherwise =
@ -168,7 +169,7 @@ redfaTokenize' spec s state off visited
((rule, (_, len)):_) -> ((rule, (_, len)):_) ->
let matchLen = len - ooff let matchLen = len - ooff
in (if matchLen > 0 in (if matchLen > 0
then (:) (rrIsToken rule, (off, off + matchLen)) then (:) (rrIsToken rule, substr off matchLen s)
else id) <$> else id) <$>
redfaTokenize' redfaTokenize'
spec spec

7
src/Substr.hs Normal file
View file

@ -0,0 +1,7 @@
module Substr where
import Types
import qualified Data.ByteString as B
substr :: Int -> Int -> BS -> BS
substr b l = B.take l . B.drop b

20
src/Types.hs Normal file
View file

@ -0,0 +1,20 @@
module Types where
import Data.ByteString
import Data.Vector
type BS = ByteString
type Tok = (Bool, BS)
type TV = Vector Tok
type Diff = [(Op, Tok)]
type Hunk = ((Int, Int), Diff)
data Op
= Remove
| Keep
| Add
deriving (Show, Eq)