cleanup, change representation

This commit is contained in:
Mirek Kratochvil 2020-09-26 23:09:26 +02:00
parent 4b5bac3541
commit 36cf2ba36f
10 changed files with 213 additions and 120 deletions

View file

@ -40,22 +40,31 @@ build-type: Simple
executable adiff
-- .hs or .lhs file containing the Main module.
main-is: MainDiff.hs
main-is: Main.hs
-- Modules included in this executable, other than Main.
other-modules: Redfa, Version, Diff, Merge
other-modules: Diff,
Diff3,
Format,
Merge,
Patch,
Redfa,
Substr,
Types,
Version
-- LANGUAGE extensions used by modules in this package.
other-extensions: CPP
-- Other library packages from which modules are imported.
build-depends: base ^>=4.13.0.0,
extra ^>= 1.7,
mmap ^>=0.5,
regex-tdfa ^>= 1.3,
optparse-applicative ^>=0.16,
bytestring ^>= 0.10.12,
vector ^>=0.12,
utf8-string ^>=1.0
utf8-string ^>=1.0
-- Directories containing source files.
hs-source-dirs: src

View file

@ -3,10 +3,6 @@ module Diff
, Op
, diffToks
, hunks
, pprDiff
, pprDiff1
, pprHunk
, pprHunks
) where
import Control.Monad
@ -16,31 +12,17 @@ import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL
import Data.ByteString.UTF8 (fromString)
import Data.Function (on)
import Data.List (groupBy)
import Data.List (groupBy, mapAccumL)
import Data.List.Extra (split, takeEnd)
import qualified Data.Vector as V
import qualified Data.Vector.Unboxed.Mutable as M
type Tok = (Bool, (Int, Int))
type Diff = [(Op, Tok)]
type Hunk = ((Int, Int), [(Op, Tok)])
type BS = B.ByteString
type TV = V.Vector Tok
data Op
= Remove
| Keep
| Add
deriving (Show, Eq)
import Debug.Trace
import Substr
import Types
data DiffEnv =
DiffEnv
{ deD1 :: BS
, deD2 :: BS
, deT1 :: TV
{ deT1 :: TV
, deT2 :: TV
, deS :: Int
, deE :: Int
@ -54,28 +36,21 @@ data DiffEnv =
}
deriving (Show)
substr b e = B.take (e - b) . B.drop b
toksMatch x y DiffEnv {deT1 = t1, deT2 = t2} =
t1 V.! x == t2 V.! y
toksMatch x y DiffEnv {deT1 = t1, deT2 = t2, deD1 = d1, deD2 = d2} =
toksMatch' x y d1 d2 t1 t2
toksMatch' x y d1 d2 t1 t2 =
let (isTok1, (b1, e1)) = t1 V.! x
(isTok2, (b2, e2)) = t2 V.! y
in isTok1 == isTok2 && substr b1 e1 d1 == substr b2 e2 d2
stripEqToks :: BS -> BS -> TV -> TV -> (Diff, Diff, TV, TV)
stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2')
stripEqToks :: TV -> TV -> (Diff, Diff, TV, TV)
stripEqToks t1 t2 = (pre, post, t1', t2')
where
l1 = V.length t1
l2 = V.length t2
firstDiff i
| i < l1 && i < l2 && toksMatch' i i d1 d2 t1 t2 = firstDiff (i + 1)
| i < l1 && i < l2 && (t1 V.! i == t2 V.! i) = firstDiff (i + 1)
| otherwise = i
b = firstDiff 0
lastDiff i
| l1 - i - 1 >= b &&
l2 - i - 1 >= b && toksMatch' (l1 - i - 1) (l2 - i - 1) d1 d2 t1 t2 =
l2 - i - 1 >= b && t1 V.! (l1 - i - 1) == t2 V.! (l2 - i - 1)=
lastDiff (i + 1)
| otherwise = i
e = lastDiff 0
@ -84,19 +59,17 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2')
t1' = V.slice b (l1 - e - b) t1
t2' = V.slice b (l2 - e - b) t2
diffToks :: BS -> BS -> TV -> TV -> Diff
diffToks d1 d2 t1' t2' = pre ++ res ++ post
diffToks :: TV -> TV -> Diff
diffToks t1' t2' = pre ++ res ++ post
where
(pre, post, t1, t2) = stripEqToks d1 d2 t1' t2'
(pre, post, t1, t2) = stripEqToks t1' t2'
res
| V.null t1 = map (\t -> (Add, t)) (V.toList t2)
| V.null t2 = map (\t -> (Remove, t)) (V.toList t1)
| V.length t1 >= V.length t2 =
diffToks' $
DiffEnv
{ deD1 = d1
, deD2 = d2
, deT1 = t1
{ deT1 = t1
, deT2 = t2
, deS = 0
, deE = V.length t1
@ -111,9 +84,7 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post
| otherwise =
diffToks' $
DiffEnv
{ deD1 = d2
, deD2 = d1
, deT1 = t2
{ deT1 = t2
, deT2 = t1
, deS = 0
, deE = V.length t2
@ -151,10 +122,10 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
vecEmid = vecE mid
extraScore i =
if isToken
then -(tokEnd - tokBegin)
then -(B.length s)
else 0
where
(isToken, (tokBegin, tokEnd)) = deT1 de V.! i
(isToken, s) = deT1 de V.! i
vecS = vec -- "forward" operation
where
vec i
@ -291,70 +262,31 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
diffToks' de {deE = mid, deVE = vecEmid, deB = opt} ++
diffToks' de {deS = mid, deVS = vecSmid, deA = opt}
pprDiff :: BS -> BS -> Diff -> [BS]
pprDiff d1 d2 = map (pprDiff1 d1 d2)
pprDiff1 d1 d2 (op, (tok, (i, j))) =
fromString pfx <> escNewlines (substr i j s)
where
pfx =
case (op, tok) of
(Add, True) -> "+|"
(Remove, True) -> "-|"
(Keep, True) -> " |"
(Add, False) -> "+."
(Remove, False) -> "-."
(Keep, False) -> " ."
s =
case op of
Add -> d2
_ -> d1
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
escNewlines' s
| B.null s = mempty
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)
hunks :: Int -> Diff -> [Hunk]
hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst)
hunks ctxt =
map (stripNums . concat) .
split null .
concat . check . map breakKeeps . groupBy ((==) `on` fst . snd) . addNums
where
go _ _ bi bj backlog [] =
if null backlog
then []
else [((bi, bj), backlog)]
go i j bi bj backlog (g:gs) =
case fst (head g) of
Remove -> go (i + length g) j bi bj (backlog ++ g) gs
Add -> go i (j + length g) bi bj (backlog ++ g) gs
Keep ->
let lg = length g
in if lg <=
(if null backlog || null g
then ctxt
else 2 * ctxt)
then go (i + lg) (j + lg) bi bj (backlog ++ g) gs
else (if null backlog
then id
else (:) ((bi, bj), backlog ++ take ctxt g)) $
go
(i + lg)
(j + lg)
(i + lg - ctxt)
(j + lg - ctxt)
(if null gs
then []
else drop (lg - ctxt) g)
gs
hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
lineSep = fromString "\n"
pprHunk :: BS -> BS -> Hunk -> BS
pprHunk d1 d2 ((i, j), toks) =
B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks)
pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2)
addNums = snd . mapAccumL countTok (0, 0)
countTok x@(i, j) d@(op, _) =
(,)
(case op of
Remove -> (i + 1, j)
Keep -> (i + 1, j + 1)
Add -> (i, j + 1))
(x, d)
stripNums = (,) <$> fst . head <*> map snd
breakKeeps ks@((_, (Keep, _)):_) =
let (a, b') = splitAt ctxt ks
(b, c) = splitAt ctxt b'
in if null c
then [ks]
else [a, [], takeEnd ctxt b']
breakKeeps a = [a]
check ([_, [], _]:[]) = []
check ([_, [], a]:xs) = [a] : checkLast xs
check a = checkLast a
checkLast [] = []
checkLast ([a, [], _]:[]) = [[a]]
checkLast (a:xs) = a:checkLast xs

7
src/Diff3.hs Normal file
View file

@ -0,0 +1,7 @@
module Diff3 where
import Types
diff3Toks :: BS -> BS -> BS -> TV -> TV -> TV -> a
diff3Toks = undefined

45
src/Format.hs Normal file
View file

@ -0,0 +1,45 @@
{-# LANGUAGE OverloadedStrings #-}
module Format where
import Types
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL
import Data.String
import Substr
pprHunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
lineSep = fromString "\n"
pprHunk :: Hunk -> BS
pprHunk ((i, j), toks) =
B.intercalate lineSep (pprHunkHdr i j : pprDiff toks)
pprHunks = B.intercalate lineSep . map pprHunk
pprDiff :: Diff -> [BS]
pprDiff = map pprDiff1
pprDiff1 (op, (tok, s)) =
fromString pfx <> escNewlines s
where
pfx =
case (op, tok) of
(Add, True) -> "+|"
(Remove, True) -> "-|"
(Keep, True) -> " |"
(Add, False) -> "+."
(Remove, False) -> "-."
(Keep, False) -> " ."
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
escNewlines' s
| B.null s = mempty
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)

View file

@ -4,6 +4,7 @@ import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import Data.ByteString.UTF8 (fromString)
import qualified Data.Vector as V
import Format
import Diff
import Merge
import Options.Applicative
@ -112,6 +113,6 @@ main =
data2 <- mmapFileByteString f2 Nothing
toks1 <- V.fromList <$> redfaTokenize redfa data1
toks2 <- V.fromList <$> redfaTokenize redfa data2
let hs = hunks (max 0 ctxt) $ diffToks data1 data2 toks1 toks2
B8.putStrLn $ pprHunks data1 data2 hs
let hs = hunks (max 0 ctxt) $ diffToks toks1 toks2
B8.putStrLn $ pprHunks hs
CmdPatch {} -> putStrLn "not supported yet"

67
src/Merge.hs Normal file
View file

@ -0,0 +1,67 @@
module Merge
( MergeOpts
, mergeOption
) where
import qualified Data.ByteString as B
import Data.String
import Options.Applicative
import Types
data MergeOpts =
MergeOpts
{ mergeDoMerge :: Bool
, mergeForceWhitespace :: Bool
, mergeKeepWhitespace :: Bool
, mergeCStartStr :: BS
, mergeMineSepStr :: BS
, mergeYourSepStr :: BS
, mergeEndStr :: BS
}
deriving (Show)
marker = fromString . replicate 7
mergeOption forPatch =
MergeOpts <$>
switch
(short 'm' <>
long "merge" <>
help
(if forPatch
then "Merge using conflict markers instead of printing the rejected hunks"
else "Output the merged file instead of the patch")) <*>
switch
(short 'w' <>
long "whitespace" <>
help
((if forPatch
then "Force rejecting a thunk"
else "Force a merge conflict") ++
" on whitespace mismatch")) <*>
switch
(short 'k' <>
long "keep-whitespace" <>
help
("On whitespace mismatch, default to the version from " ++
(if forPatch
then "original file"
else "MYFILE") ++
" instead of the one from " ++
(if forPatch
then "patch"
else "YOURFILE"))) <*>
strOption
(long "merge-start" <>
value (marker '<') <> help "Marker for the beginning of a conflict") <*>
strOption
(long "merge-mine" <>
value (marker '|') <>
help "Marker that separates `mine' from `original' part of the conflict") <*>
strOption
(long "merge-your" <>
value (marker '=') <>
help "Marker that separates `original' from `your' part of the conflict") <*>
strOption
(long "merge-end" <>
value (marker '>') <> help "Marker for the end of a conflict")

4
src/Patch.hs Normal file
View file

@ -0,0 +1,4 @@
module Patch where
patchToks :: a
patchToks = undefined

View file

@ -16,6 +16,7 @@ import qualified Data.Vector as V
import Options.Applicative
import Text.Regex.TDFA
import Diff
import Substr
type BS = B.ByteString
@ -143,7 +144,7 @@ redfaTokenize' ::
-> Int
-> Int
-> [Int]
-> m [(Bool, (Int, Int))]
-> m [Tok]
redfaTokenize' spec s state off visited
| off >= B.length s = pure []
| otherwise =
@ -168,7 +169,7 @@ redfaTokenize' spec s state off visited
((rule, (_, len)):_) ->
let matchLen = len - ooff
in (if matchLen > 0
then (:) (rrIsToken rule, (off, off + matchLen))
then (:) (rrIsToken rule, substr off matchLen s)
else id) <$>
redfaTokenize'
spec

7
src/Substr.hs Normal file
View file

@ -0,0 +1,7 @@
module Substr where
import Types
import qualified Data.ByteString as B
substr :: Int -> Int -> BS -> BS
substr b l = B.take l . B.drop b

20
src/Types.hs Normal file
View file

@ -0,0 +1,20 @@
module Types where
import Data.ByteString
import Data.Vector
type BS = ByteString
type Tok = (Bool, BS)
type TV = Vector Tok
type Diff = [(Op, Tok)]
type Hunk = ((Int, Int), Diff)
data Op
= Remove
| Keep
| Add
deriving (Show, Eq)