cleanup, change representation
This commit is contained in:
parent
4b5bac3541
commit
36cf2ba36f
15
adiff.cabal
15
adiff.cabal
|
@ -40,22 +40,31 @@ build-type: Simple
|
|||
|
||||
executable adiff
|
||||
-- .hs or .lhs file containing the Main module.
|
||||
main-is: MainDiff.hs
|
||||
main-is: Main.hs
|
||||
|
||||
-- Modules included in this executable, other than Main.
|
||||
other-modules: Redfa, Version, Diff, Merge
|
||||
other-modules: Diff,
|
||||
Diff3,
|
||||
Format,
|
||||
Merge,
|
||||
Patch,
|
||||
Redfa,
|
||||
Substr,
|
||||
Types,
|
||||
Version
|
||||
|
||||
-- LANGUAGE extensions used by modules in this package.
|
||||
other-extensions: CPP
|
||||
|
||||
-- Other library packages from which modules are imported.
|
||||
build-depends: base ^>=4.13.0.0,
|
||||
extra ^>= 1.7,
|
||||
mmap ^>=0.5,
|
||||
regex-tdfa ^>= 1.3,
|
||||
optparse-applicative ^>=0.16,
|
||||
bytestring ^>= 0.10.12,
|
||||
vector ^>=0.12,
|
||||
utf8-string ^>=1.0
|
||||
utf8-string ^>=1.0
|
||||
|
||||
-- Directories containing source files.
|
||||
hs-source-dirs: src
|
||||
|
|
158
src/Diff.hs
158
src/Diff.hs
|
@ -3,10 +3,6 @@ module Diff
|
|||
, Op
|
||||
, diffToks
|
||||
, hunks
|
||||
, pprDiff
|
||||
, pprDiff1
|
||||
, pprHunk
|
||||
, pprHunks
|
||||
) where
|
||||
|
||||
import Control.Monad
|
||||
|
@ -16,31 +12,17 @@ import qualified Data.ByteString.Internal as BI
|
|||
import qualified Data.ByteString.Lazy as BL
|
||||
import Data.ByteString.UTF8 (fromString)
|
||||
import Data.Function (on)
|
||||
import Data.List (groupBy)
|
||||
import Data.List (groupBy, mapAccumL)
|
||||
import Data.List.Extra (split, takeEnd)
|
||||
import qualified Data.Vector as V
|
||||
import qualified Data.Vector.Unboxed.Mutable as M
|
||||
|
||||
type Tok = (Bool, (Int, Int))
|
||||
|
||||
type Diff = [(Op, Tok)]
|
||||
|
||||
type Hunk = ((Int, Int), [(Op, Tok)])
|
||||
|
||||
type BS = B.ByteString
|
||||
|
||||
type TV = V.Vector Tok
|
||||
|
||||
data Op
|
||||
= Remove
|
||||
| Keep
|
||||
| Add
|
||||
deriving (Show, Eq)
|
||||
import Debug.Trace
|
||||
import Substr
|
||||
import Types
|
||||
|
||||
data DiffEnv =
|
||||
DiffEnv
|
||||
{ deD1 :: BS
|
||||
, deD2 :: BS
|
||||
, deT1 :: TV
|
||||
{ deT1 :: TV
|
||||
, deT2 :: TV
|
||||
, deS :: Int
|
||||
, deE :: Int
|
||||
|
@ -54,28 +36,21 @@ data DiffEnv =
|
|||
}
|
||||
deriving (Show)
|
||||
|
||||
substr b e = B.take (e - b) . B.drop b
|
||||
toksMatch x y DiffEnv {deT1 = t1, deT2 = t2} =
|
||||
t1 V.! x == t2 V.! y
|
||||
|
||||
toksMatch x y DiffEnv {deT1 = t1, deT2 = t2, deD1 = d1, deD2 = d2} =
|
||||
toksMatch' x y d1 d2 t1 t2
|
||||
|
||||
toksMatch' x y d1 d2 t1 t2 =
|
||||
let (isTok1, (b1, e1)) = t1 V.! x
|
||||
(isTok2, (b2, e2)) = t2 V.! y
|
||||
in isTok1 == isTok2 && substr b1 e1 d1 == substr b2 e2 d2
|
||||
|
||||
stripEqToks :: BS -> BS -> TV -> TV -> (Diff, Diff, TV, TV)
|
||||
stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2')
|
||||
stripEqToks :: TV -> TV -> (Diff, Diff, TV, TV)
|
||||
stripEqToks t1 t2 = (pre, post, t1', t2')
|
||||
where
|
||||
l1 = V.length t1
|
||||
l2 = V.length t2
|
||||
firstDiff i
|
||||
| i < l1 && i < l2 && toksMatch' i i d1 d2 t1 t2 = firstDiff (i + 1)
|
||||
| i < l1 && i < l2 && (t1 V.! i == t2 V.! i) = firstDiff (i + 1)
|
||||
| otherwise = i
|
||||
b = firstDiff 0
|
||||
lastDiff i
|
||||
| l1 - i - 1 >= b &&
|
||||
l2 - i - 1 >= b && toksMatch' (l1 - i - 1) (l2 - i - 1) d1 d2 t1 t2 =
|
||||
l2 - i - 1 >= b && t1 V.! (l1 - i - 1) == t2 V.! (l2 - i - 1)=
|
||||
lastDiff (i + 1)
|
||||
| otherwise = i
|
||||
e = lastDiff 0
|
||||
|
@ -84,19 +59,17 @@ stripEqToks d1 d2 t1 t2 = (pre, post, t1', t2')
|
|||
t1' = V.slice b (l1 - e - b) t1
|
||||
t2' = V.slice b (l2 - e - b) t2
|
||||
|
||||
diffToks :: BS -> BS -> TV -> TV -> Diff
|
||||
diffToks d1 d2 t1' t2' = pre ++ res ++ post
|
||||
diffToks :: TV -> TV -> Diff
|
||||
diffToks t1' t2' = pre ++ res ++ post
|
||||
where
|
||||
(pre, post, t1, t2) = stripEqToks d1 d2 t1' t2'
|
||||
(pre, post, t1, t2) = stripEqToks t1' t2'
|
||||
res
|
||||
| V.null t1 = map (\t -> (Add, t)) (V.toList t2)
|
||||
| V.null t2 = map (\t -> (Remove, t)) (V.toList t1)
|
||||
| V.length t1 >= V.length t2 =
|
||||
diffToks' $
|
||||
DiffEnv
|
||||
{ deD1 = d1
|
||||
, deD2 = d2
|
||||
, deT1 = t1
|
||||
{ deT1 = t1
|
||||
, deT2 = t2
|
||||
, deS = 0
|
||||
, deE = V.length t1
|
||||
|
@ -111,9 +84,7 @@ diffToks d1 d2 t1' t2' = pre ++ res ++ post
|
|||
| otherwise =
|
||||
diffToks' $
|
||||
DiffEnv
|
||||
{ deD1 = d2
|
||||
, deD2 = d1
|
||||
, deT1 = t2
|
||||
{ deT1 = t2
|
||||
, deT2 = t1
|
||||
, deS = 0
|
||||
, deE = V.length t2
|
||||
|
@ -151,10 +122,10 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
|
|||
vecEmid = vecE mid
|
||||
extraScore i =
|
||||
if isToken
|
||||
then -(tokEnd - tokBegin)
|
||||
then -(B.length s)
|
||||
else 0
|
||||
where
|
||||
(isToken, (tokBegin, tokEnd)) = deT1 de V.! i
|
||||
(isToken, s) = deT1 de V.! i
|
||||
vecS = vec -- "forward" operation
|
||||
where
|
||||
vec i
|
||||
|
@ -291,70 +262,31 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
|
|||
diffToks' de {deE = mid, deVE = vecEmid, deB = opt} ++
|
||||
diffToks' de {deS = mid, deVS = vecSmid, deA = opt}
|
||||
|
||||
pprDiff :: BS -> BS -> Diff -> [BS]
|
||||
pprDiff d1 d2 = map (pprDiff1 d1 d2)
|
||||
|
||||
pprDiff1 d1 d2 (op, (tok, (i, j))) =
|
||||
fromString pfx <> escNewlines (substr i j s)
|
||||
where
|
||||
pfx =
|
||||
case (op, tok) of
|
||||
(Add, True) -> "+|"
|
||||
(Remove, True) -> "-|"
|
||||
(Keep, True) -> " |"
|
||||
(Add, False) -> "+."
|
||||
(Remove, False) -> "-."
|
||||
(Keep, False) -> " ."
|
||||
s =
|
||||
case op of
|
||||
Add -> d2
|
||||
_ -> d1
|
||||
|
||||
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
|
||||
|
||||
escNewlines' s
|
||||
| B.null s = mempty
|
||||
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
|
||||
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
|
||||
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)
|
||||
|
||||
hunks :: Int -> Diff -> [Hunk]
|
||||
hunks ctxt = go 0 0 0 0 [] . groupBy ((==) `on` fst)
|
||||
hunks ctxt =
|
||||
map (stripNums . concat) .
|
||||
split null .
|
||||
concat . check . map breakKeeps . groupBy ((==) `on` fst . snd) . addNums
|
||||
where
|
||||
go _ _ bi bj backlog [] =
|
||||
if null backlog
|
||||
then []
|
||||
else [((bi, bj), backlog)]
|
||||
go i j bi bj backlog (g:gs) =
|
||||
case fst (head g) of
|
||||
Remove -> go (i + length g) j bi bj (backlog ++ g) gs
|
||||
Add -> go i (j + length g) bi bj (backlog ++ g) gs
|
||||
Keep ->
|
||||
let lg = length g
|
||||
in if lg <=
|
||||
(if null backlog || null g
|
||||
then ctxt
|
||||
else 2 * ctxt)
|
||||
then go (i + lg) (j + lg) bi bj (backlog ++ g) gs
|
||||
else (if null backlog
|
||||
then id
|
||||
else (:) ((bi, bj), backlog ++ take ctxt g)) $
|
||||
go
|
||||
(i + lg)
|
||||
(j + lg)
|
||||
(i + lg - ctxt)
|
||||
(j + lg - ctxt)
|
||||
(if null gs
|
||||
then []
|
||||
else drop (lg - ctxt) g)
|
||||
gs
|
||||
|
||||
hunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
|
||||
|
||||
lineSep = fromString "\n"
|
||||
|
||||
pprHunk :: BS -> BS -> Hunk -> BS
|
||||
pprHunk d1 d2 ((i, j), toks) =
|
||||
B.intercalate lineSep (hunkHdr i j : pprDiff d1 d2 toks)
|
||||
|
||||
pprHunks d1 d2 = B.intercalate lineSep . map (pprHunk d1 d2)
|
||||
addNums = snd . mapAccumL countTok (0, 0)
|
||||
countTok x@(i, j) d@(op, _) =
|
||||
(,)
|
||||
(case op of
|
||||
Remove -> (i + 1, j)
|
||||
Keep -> (i + 1, j + 1)
|
||||
Add -> (i, j + 1))
|
||||
(x, d)
|
||||
stripNums = (,) <$> fst . head <*> map snd
|
||||
breakKeeps ks@((_, (Keep, _)):_) =
|
||||
let (a, b') = splitAt ctxt ks
|
||||
(b, c) = splitAt ctxt b'
|
||||
in if null c
|
||||
then [ks]
|
||||
else [a, [], takeEnd ctxt b']
|
||||
breakKeeps a = [a]
|
||||
check ([_, [], _]:[]) = []
|
||||
check ([_, [], a]:xs) = [a] : checkLast xs
|
||||
check a = checkLast a
|
||||
checkLast [] = []
|
||||
checkLast ([a, [], _]:[]) = [[a]]
|
||||
checkLast (a:xs) = a:checkLast xs
|
||||
|
|
7
src/Diff3.hs
Normal file
7
src/Diff3.hs
Normal file
|
@ -0,0 +1,7 @@
|
|||
|
||||
module Diff3 where
|
||||
|
||||
import Types
|
||||
|
||||
diff3Toks :: BS -> BS -> BS -> TV -> TV -> TV -> a
|
||||
diff3Toks = undefined
|
45
src/Format.hs
Normal file
45
src/Format.hs
Normal file
|
@ -0,0 +1,45 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Format where
|
||||
|
||||
import Types
|
||||
|
||||
import qualified Data.ByteString as B
|
||||
import qualified Data.ByteString.Builder as BB
|
||||
import qualified Data.ByteString.Internal as BI
|
||||
import qualified Data.ByteString.Lazy as BL
|
||||
import Data.String
|
||||
import Substr
|
||||
|
||||
pprHunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
|
||||
|
||||
lineSep = fromString "\n"
|
||||
|
||||
pprHunk :: Hunk -> BS
|
||||
pprHunk ((i, j), toks) =
|
||||
B.intercalate lineSep (pprHunkHdr i j : pprDiff toks)
|
||||
|
||||
pprHunks = B.intercalate lineSep . map pprHunk
|
||||
|
||||
pprDiff :: Diff -> [BS]
|
||||
pprDiff = map pprDiff1
|
||||
|
||||
pprDiff1 (op, (tok, s)) =
|
||||
fromString pfx <> escNewlines s
|
||||
where
|
||||
pfx =
|
||||
case (op, tok) of
|
||||
(Add, True) -> "+|"
|
||||
(Remove, True) -> "-|"
|
||||
(Keep, True) -> " |"
|
||||
(Add, False) -> "+."
|
||||
(Remove, False) -> "-."
|
||||
(Keep, False) -> " ."
|
||||
|
||||
escNewlines = BL.toStrict . BB.toLazyByteString . escNewlines'
|
||||
|
||||
escNewlines' s
|
||||
| B.null s = mempty
|
||||
| B.head s == BI.c2w '\n' = BB.stringUtf8 "\\n" <> escNewlines' (B.tail s)
|
||||
| B.head s == BI.c2w '\\' = BB.stringUtf8 "\\\\" <> escNewlines' (B.tail s)
|
||||
| otherwise = BB.word8 (B.head s) <> escNewlines' (B.tail s)
|
|
@ -4,6 +4,7 @@ import qualified Data.ByteString as B
|
|||
import qualified Data.ByteString.Char8 as B8
|
||||
import Data.ByteString.UTF8 (fromString)
|
||||
import qualified Data.Vector as V
|
||||
import Format
|
||||
import Diff
|
||||
import Merge
|
||||
import Options.Applicative
|
||||
|
@ -112,6 +113,6 @@ main =
|
|||
data2 <- mmapFileByteString f2 Nothing
|
||||
toks1 <- V.fromList <$> redfaTokenize redfa data1
|
||||
toks2 <- V.fromList <$> redfaTokenize redfa data2
|
||||
let hs = hunks (max 0 ctxt) $ diffToks data1 data2 toks1 toks2
|
||||
B8.putStrLn $ pprHunks data1 data2 hs
|
||||
let hs = hunks (max 0 ctxt) $ diffToks toks1 toks2
|
||||
B8.putStrLn $ pprHunks hs
|
||||
CmdPatch {} -> putStrLn "not supported yet"
|
67
src/Merge.hs
Normal file
67
src/Merge.hs
Normal file
|
@ -0,0 +1,67 @@
|
|||
module Merge
|
||||
( MergeOpts
|
||||
, mergeOption
|
||||
) where
|
||||
|
||||
import qualified Data.ByteString as B
|
||||
import Data.String
|
||||
import Options.Applicative
|
||||
import Types
|
||||
|
||||
data MergeOpts =
|
||||
MergeOpts
|
||||
{ mergeDoMerge :: Bool
|
||||
, mergeForceWhitespace :: Bool
|
||||
, mergeKeepWhitespace :: Bool
|
||||
, mergeCStartStr :: BS
|
||||
, mergeMineSepStr :: BS
|
||||
, mergeYourSepStr :: BS
|
||||
, mergeEndStr :: BS
|
||||
}
|
||||
deriving (Show)
|
||||
|
||||
marker = fromString . replicate 7
|
||||
|
||||
mergeOption forPatch =
|
||||
MergeOpts <$>
|
||||
switch
|
||||
(short 'm' <>
|
||||
long "merge" <>
|
||||
help
|
||||
(if forPatch
|
||||
then "Merge using conflict markers instead of printing the rejected hunks"
|
||||
else "Output the merged file instead of the patch")) <*>
|
||||
switch
|
||||
(short 'w' <>
|
||||
long "whitespace" <>
|
||||
help
|
||||
((if forPatch
|
||||
then "Force rejecting a thunk"
|
||||
else "Force a merge conflict") ++
|
||||
" on whitespace mismatch")) <*>
|
||||
switch
|
||||
(short 'k' <>
|
||||
long "keep-whitespace" <>
|
||||
help
|
||||
("On whitespace mismatch, default to the version from " ++
|
||||
(if forPatch
|
||||
then "original file"
|
||||
else "MYFILE") ++
|
||||
" instead of the one from " ++
|
||||
(if forPatch
|
||||
then "patch"
|
||||
else "YOURFILE"))) <*>
|
||||
strOption
|
||||
(long "merge-start" <>
|
||||
value (marker '<') <> help "Marker for the beginning of a conflict") <*>
|
||||
strOption
|
||||
(long "merge-mine" <>
|
||||
value (marker '|') <>
|
||||
help "Marker that separates `mine' from `original' part of the conflict") <*>
|
||||
strOption
|
||||
(long "merge-your" <>
|
||||
value (marker '=') <>
|
||||
help "Marker that separates `original' from `your' part of the conflict") <*>
|
||||
strOption
|
||||
(long "merge-end" <>
|
||||
value (marker '>') <> help "Marker for the end of a conflict")
|
4
src/Patch.hs
Normal file
4
src/Patch.hs
Normal file
|
@ -0,0 +1,4 @@
|
|||
module Patch where
|
||||
|
||||
patchToks :: a
|
||||
patchToks = undefined
|
|
@ -16,6 +16,7 @@ import qualified Data.Vector as V
|
|||
import Options.Applicative
|
||||
import Text.Regex.TDFA
|
||||
import Diff
|
||||
import Substr
|
||||
|
||||
type BS = B.ByteString
|
||||
|
||||
|
@ -143,7 +144,7 @@ redfaTokenize' ::
|
|||
-> Int
|
||||
-> Int
|
||||
-> [Int]
|
||||
-> m [(Bool, (Int, Int))]
|
||||
-> m [Tok]
|
||||
redfaTokenize' spec s state off visited
|
||||
| off >= B.length s = pure []
|
||||
| otherwise =
|
||||
|
@ -168,7 +169,7 @@ redfaTokenize' spec s state off visited
|
|||
((rule, (_, len)):_) ->
|
||||
let matchLen = len - ooff
|
||||
in (if matchLen > 0
|
||||
then (:) (rrIsToken rule, (off, off + matchLen))
|
||||
then (:) (rrIsToken rule, substr off matchLen s)
|
||||
else id) <$>
|
||||
redfaTokenize'
|
||||
spec
|
||||
|
|
7
src/Substr.hs
Normal file
7
src/Substr.hs
Normal file
|
@ -0,0 +1,7 @@
|
|||
module Substr where
|
||||
|
||||
import Types
|
||||
import qualified Data.ByteString as B
|
||||
|
||||
substr :: Int -> Int -> BS -> BS
|
||||
substr b l = B.take l . B.drop b
|
20
src/Types.hs
Normal file
20
src/Types.hs
Normal file
|
@ -0,0 +1,20 @@
|
|||
module Types where
|
||||
|
||||
import Data.ByteString
|
||||
import Data.Vector
|
||||
|
||||
type BS = ByteString
|
||||
|
||||
type Tok = (Bool, BS)
|
||||
|
||||
type TV = Vector Tok
|
||||
|
||||
type Diff = [(Op, Tok)]
|
||||
|
||||
type Hunk = ((Int, Int), Diff)
|
||||
|
||||
data Op
|
||||
= Remove
|
||||
| Keep
|
||||
| Add
|
||||
deriving (Show, Eq)
|
Loading…
Reference in a new issue