software F engineering

This commit is contained in:
Mirek Kratochvil 2020-09-27 19:26:29 +02:00
parent efae03223e
commit 23b62f6344
11 changed files with 116 additions and 83 deletions

View file

@ -42,6 +42,8 @@ executable adiff
-- .hs or .lhs file containing the Main module. -- .hs or .lhs file containing the Main module.
main-is: Main.hs main-is: Main.hs
ghc-options: -O2 -Wall
-- Modules included in this executable, other than Main. -- Modules included in this executable, other than Main.
other-modules: Diff, other-modules: Diff,
Diff3, Diff3,

View file

@ -4,18 +4,8 @@ module Diff
( diffToks ( diffToks
) where ) where
import Control.Monad
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL
import Data.ByteString.UTF8 (fromString)
import Data.Function (on)
import Data.List (groupBy, mapAccumL)
import Data.List.Extra (split, takeEnd)
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified Data.Vector.Unboxed.Mutable as M
import Substr
import Types import Types
data DiffEnv = data DiffEnv =
@ -34,6 +24,7 @@ data DiffEnv =
} }
deriving (Show) deriving (Show)
toksMatch :: Int -> Int -> DiffEnv -> Bool
toksMatch x y DiffEnv {deT1 = t1, deT2 = t2} = t1 V.! x == t2 V.! y toksMatch x y DiffEnv {deT1 = t1, deT2 = t2} = t1 V.! x == t2 V.! y
stripEqToks :: TV -> TV -> (Diff, Diff, TV, TV) stripEqToks :: TV -> TV -> (Diff, Diff, TV, TV)
@ -94,6 +85,7 @@ diffToks t1' t2' = pre ++ res ++ post
, deTrans = True , deTrans = True
} }
minIndexFwd :: V.Vector (Int, Int) -> Int
minIndexFwd = minIndexFwd =
V.minIndexBy V.minIndexBy
(\x y -> (\x y ->
@ -102,6 +94,7 @@ minIndexFwd =
else GT --basically normal V.minIndex else GT --basically normal V.minIndex
) )
minIndexRev :: V.Vector (Int, Int) -> Int
minIndexRev = minIndexRev =
V.minIndexBy V.minIndexBy
(\x y -> (\x y ->
@ -119,10 +112,10 @@ diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
vecEmid = vecE mid vecEmid = vecE mid
extraScore i = extraScore i =
if isToken if isToken
then -(B.length s) then -(B.length str)
else 0 else 0
where where
(isToken, s) = deT1 de V.! i (isToken, str) = deT1 de V.! i
vecS = vec -- "forward" operation vecS = vec -- "forward" operation
where where
vec i vec i

View file

@ -1,15 +1,10 @@
{-# LANGUAGE TupleSections #-} {-# LANGUAGE TupleSections #-}
module Diff3 where module Diff3 where
import Diff import Diff
import Types
import Merge import Merge
import Types
data Origin
= Stable
| Mine
| Your
deriving (Show, Eq)
diff3Toks :: MergeOpts -> TV -> TV -> TV -> Diff diff3Toks :: MergeOpts -> TV -> TV -> TV -> Diff
diff3Toks mo tMine tOrig tYour = diff3Toks mo tMine tOrig tYour =
@ -30,27 +25,15 @@ diff3Toks mo tMine tOrig tYour =
align ((Add, a):as) bs@((Remove, _):_) = (Mine, (Add, a)) : align as bs align ((Add, a):as) bs@((Remove, _):_) = (Mine, (Add, a)) : align as bs
align as@((Remove, _):_) ((Add, b):bs) = (Your, (Add, b)) : align as bs align as@((Remove, _):_) ((Add, b):bs) = (Your, (Add, b)) : align as bs
align [] [] = [] align [] [] = []
align as@((Add, _):_) [] = map (Mine,) as align as@((Add, _):_) [] = map (Mine, ) as
align [] bs@((Add, _):_) = map (Your,) bs align [] bs@((Add, _):_) = map (Your, ) bs
align _ _ = error "Internal failure: diffstreams seem broken, cannot align" align _ _ = error "Internal failure: diffstreams seem broken, cannot align"
conflict :: [(Origin, (Op, Tok))] -> Diff conflict :: [(Origin, (Op, Tok))] -> Diff
conflict [] = [] conflict [] = []
conflict as@(a:_) conflict as@(a:_)
| stable a = applySplit stable (map snd) conflict as | stable a = applySplit stable (map snd) conflict as
| unstable a = applySplit unstable merge conflict as | otherwise = applySplit (not . stable) (merge mo) conflict as
applySplit :: (a -> Bool) -> ([a] -> [b]) -> ([a] -> [b]) -> [a] -> [b] applySplit :: (a -> Bool) -> ([a] -> [b]) -> ([a] -> [b]) -> [a] -> [b]
applySplit cond f g xs = f (takeWhile cond xs) ++ g (dropWhile cond xs) applySplit cond f g xs = f (takeWhile cond xs) ++ g (dropWhile cond xs)
merge :: [(Origin, (Op,Tok))] -> Diff
merge cs =
let mys = map (\a -> map snd $ filter ((== a) . fst) cs) [Mine, Your]
[tokOrigsMine, tokOrigsYour] =
map (map snd.filter ((/= Add) . fst)) mys
[tokMine, tokYour] = map (map snd.filter ((/= Remove) . fst)) mys
in if tokOrigsMine /= tokOrigsYour
then error "Internal failure: merge origins differ"
else map (MineChanged,) tokMine ++
map (Original,) tokOrigsMine ++
map (YourChanged,) tokYour
stable (Stable, _) = True stable (Stable, _) = True
stable _ = False stable _ = False
unstable = not . stable

View file

@ -1,39 +1,47 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Format where module Format
( pprHunks
, pprHunk
, pprDiff1
) where
import Types import Types
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Internal as BI import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL
import Data.String import Data.String
import Substr
pprHunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@" pprHunkHdr :: Int -> Int -> BB.Builder
pprHunkHdr i j = fromString $ "@@ -" ++ show i ++ " +" ++ show j ++ " @@"
lineSep :: BB.Builder
lineSep = fromString "\n" lineSep = fromString "\n"
pprHunks :: [Hunk] -> BB.Builder pprHunks :: [Hunk] -> BB.Builder
pprHunks = mconcat . map pprHunk pprHunks = mconcat . map pprHunk
pprHunk :: Hunk -> BB.Builder pprHunk :: Hunk -> BB.Builder
pprHunk ((i, j), toks) = mconcat ((pprHunkHdr i j <> lineSep) : map pprDiff1 toks) pprHunk ((i, j), toks) =
mconcat ((pprHunkHdr i j <> lineSep) : map pprDiff1 toks)
pprDiff1 :: (Op, Tok) -> BB.Builder pprDiff1 :: (Op, Tok) -> BB.Builder
pprDiff1 (op, (tok, s)) = pprDiff1 (op, (tok, s)) = fromString pfx <> escNewlines s <> lineSep
fromString pfx <> escNewlines s <> lineSep
where where
pfx = [opc,tc] pfx = [opc, tc]
opc = case op of opc =
Add -> '+' case op of
Keep -> ' ' Add -> '+'
Remove -> '-' Keep -> ' '
MineChanged -> '<' Remove -> '-'
Original -> '=' MineChanged -> '<'
YourChanged -> '>' Original -> '='
tc = if tok then '|' else '.' YourChanged -> '>'
tc =
if tok
then '|'
else '.'
escNewlines :: BS -> BB.Builder escNewlines :: BS -> BB.Builder
escNewlines s escNewlines s

View file

@ -21,7 +21,7 @@ hunks ctxt d =
zipWith (<=) (padEnd ctxt edits) (replicate (1 + 2 * ctxt) 0 ++ edits) zipWith (<=) (padEnd ctxt edits) (replicate (1 + 2 * ctxt) 0 ++ edits)
addNums = snd . mapAccumL countTok (0, 0) addNums = snd . mapAccumL countTok (0, 0)
stripNums = (,) <$> fst . head <*> map snd stripNums = (,) <$> fst . head <*> map snd
countTok x@(i, j) d@(op, _) = countTok x@(i, j) d'@(op, _) =
(,) (,)
(case op of (case op of
Remove -> (i + 1, j) Remove -> (i + 1, j)
@ -30,4 +30,4 @@ hunks ctxt d =
MineChanged -> (i, j) MineChanged -> (i, j)
Original -> (i + 1, j + 1) Original -> (i + 1, j + 1)
YourChanged -> (i, j)) YourChanged -> (i, j))
(x, d) (x, d')

View file

@ -1,9 +1,6 @@
module Main where module Main where
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Char8 as B8
import Data.ByteString.UTF8 (fromString)
import qualified Data.Vector as V import qualified Data.Vector as V
import Diff import Diff
import Diff3 import Diff3
@ -14,6 +11,7 @@ import Options.Applicative
import Redfa import Redfa
import System.IO (stdout) import System.IO (stdout)
import System.IO.MMap import System.IO.MMap
import Types
import Version import Version
data ADiffOptions = data ADiffOptions =
@ -46,18 +44,26 @@ data ADiffCommandOpts
} }
deriving (Show) deriving (Show)
contextOpt :: Parser Int
contextOpt = contextOpt =
check <$>
option option
auto auto
(metavar "CONTEXT" <> (metavar "CONTEXT" <>
short 'C' <> short 'C' <>
long "context" <> long "context" <>
value 5 <> help "How many tokens around the change to include in the patch") value 5 <> help "How many tokens around the change to include in the patch")
where
check c
| c < 0 = error "Negative context"
| otherwise = c
diffCmdOptions :: Parser ADiffCommandOpts
diffCmdOptions = diffCmdOptions =
CmdDiff <$> contextOpt <*> strArgument (metavar "FROMFILE") <*> CmdDiff <$> contextOpt <*> strArgument (metavar "FROMFILE") <*>
strArgument (metavar "TOFILE") strArgument (metavar "TOFILE")
patchCmdOptions :: Parser ADiffCommandOpts
patchCmdOptions = patchCmdOptions =
CmdPatch <$> CmdPatch <$>
switch switch
@ -82,6 +88,7 @@ patchCmdOptions =
help "Strip NUM leading components from the paths" <> value 0) <*> help "Strip NUM leading components from the paths" <> value 0) <*>
mergeOption True mergeOption True
diff3CmdOptions :: Parser ADiffCommandOpts
diff3CmdOptions = diff3CmdOptions =
CmdDiff3 <$> contextOpt <*> strArgument (metavar "MYFILE") <*> CmdDiff3 <$> contextOpt <*> strArgument (metavar "MYFILE") <*>
strArgument (metavar "OLDFILE") <*> strArgument (metavar "OLDFILE") <*>
@ -98,8 +105,10 @@ actionOption =
info diff3CmdOptions $ progDesc "Compare and merge three files" info diff3CmdOptions $ progDesc "Compare and merge three files"
] ]
adiffOptions :: Parser ADiffOptions
adiffOptions = ADiffOptions <$> redfaOption <*> actionOption adiffOptions = ADiffOptions <$> redfaOption <*> actionOption
loadToks :: RedfaSpec -> FilePath -> IO TV
loadToks redfa f = loadToks redfa f =
V.fromList <$> (mmapFileByteString f Nothing >>= redfaTokenize redfa) V.fromList <$> (mmapFileByteString f Nothing >>= redfaTokenize redfa)

View file

@ -1,12 +1,13 @@
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Merge module Merge
( MergeOpts(..) ( MergeOpts(..)
, mergeOption , mergeOption
, fmtMerged , fmtMerged
, merge
) where ) where
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB import qualified Data.ByteString.Builder as BB
import Data.String import Data.String
import Options.Applicative import Options.Applicative
@ -15,6 +16,7 @@ import Types
data MergeOpts = data MergeOpts =
MergeOpts MergeOpts
{ mergeDoMerge :: Bool { mergeDoMerge :: Bool
, mergeIgnoreWhitespace :: Bool
, mergeForceWhitespace :: Bool , mergeForceWhitespace :: Bool
, mergeKeepWhitespace :: Bool , mergeKeepWhitespace :: Bool
, mergeCStartStr :: BS , mergeCStartStr :: BS
@ -24,8 +26,7 @@ data MergeOpts =
} }
deriving (Show) deriving (Show)
marker = fromString . replicate 7 mergeOption :: Bool -> Parser MergeOpts
mergeOption forPatch = mergeOption forPatch =
addLBR <$> addLBR <$>
((,) <$> ((,) <$>
@ -35,6 +36,7 @@ mergeOption forPatch =
help "Automatically add a line break after conflict markers") <*> help "Automatically add a line break after conflict markers") <*>
mo) mo)
where where
marker = fromString . replicate 7
mo = mo =
MergeOpts <$> MergeOpts <$>
switch switch
@ -45,18 +47,27 @@ mergeOption forPatch =
then "Merge using conflict markers instead of printing the rejected hunks" then "Merge using conflict markers instead of printing the rejected hunks"
else "Output the merged file instead of the patch")) <*> else "Output the merged file instead of the patch")) <*>
switch switch
(short 'w' <> (short 'i' <>
long "whitespace" <> long "ignore-whitespace" <>
help
("Ignore " ++
(if forPatch
then "hunks"
else "chunks") ++
" that change only whitespace")) <*>
switch
(short 'f' <>
long "force-whitespace" <>
help help
((if forPatch ((if forPatch
then "Force rejecting a thunk" then "Force rejecting a hunk"
else "Force a merge conflict") ++ else "Force a merge conflict") ++
" on whitespace mismatch")) <*> " on whitespace mismatch (overrides `ignore-whitespace')")) <*>
switch switch
(short 'k' <> (short 'k' <>
long "keep-whitespace" <> long "keep-whitespace" <>
help help
("On whitespace mismatch, default to the version from " ++ ("On whitespace mismatch, output the version from " ++
(if forPatch (if forPatch
then "original file" then "original file"
else "MYFILE") ++ else "MYFILE") ++
@ -89,24 +100,27 @@ mergeOption forPatch =
, mergeCEndStr = mergeCEndStr x <> "\n" , mergeCEndStr = mergeCEndStr x <> "\n"
} }
{- This kinda relies on reasonable ordering within the conflicts in the Diff -} {- This kinda relies on reasonable ordering
- within the conflicts in the Diff -}
fmtMerged :: MergeOpts -> Diff -> BB.Builder fmtMerged :: MergeOpts -> Diff -> BB.Builder
fmtMerged mo = go Keep fmtMerged mo = go Keep
where where
go op [] go op []
| conflictOp op = bb $ mergeCEndStr mo | conflictOp op = bb $ mergeCEndStr mo
| otherwise = mempty | otherwise = mempty
go last l@((op, (_, tok)):xs) go prev l@((op, (_, tok)):xs)
| conflictOp last && not (conflictOp op) = | conflictOp prev && not (conflictOp op) =
bb (mergeCEndStr mo) <> go Keep l bb (mergeCEndStr mo) <> go Keep l
| not (conflictOp last) && conflictOp op = | not (conflictOp prev) && conflictOp op =
bb (mergeCStartStr mo) <> go MineChanged l bb (mergeCStartStr mo) <> go MineChanged l
| last /= op && conflictOp op = | prev /= op && conflictOp op =
(case op of (case op of
MineChanged -> bb $ mergeCStartStr mo MineChanged -> bb $ mergeCStartStr mo
Original -> bb $ mergeMineSepStr mo Original -> bb $ mergeMineSepStr mo
YourChanged -> bb $ mergeYourSepStr mo) <> YourChanged -> bb $ mergeYourSepStr mo
_ -> error "Internal conflict handling failure") <>
go op l go op l
| op == Remove = go op xs
| otherwise = bb tok <> go op xs | otherwise = bb tok <> go op xs
conflictOp o = conflictOp o =
case o of case o of
@ -115,3 +129,27 @@ fmtMerged mo = go Keep
Remove -> False Remove -> False
_ -> True _ -> True
bb = BB.byteString bb = BB.byteString
merge :: MergeOpts -> [(Origin, (Op, Tok))] -> Diff
merge mo cs = go
where
mys@[diffMine, diffYour] =
map (\a -> map snd $ filter ((a ==) . fst) cs) [Mine, Your]
[tokOrigsMine, tokOrigsYour] = map (map snd . filter ((Add /=) . fst)) mys
[tokMine, tokYour] = map (map snd . filter ((Remove /=) . fst)) mys
conflict =
map (MineChanged, ) tokMine ++
map (Original, ) tokOrigsMine ++ map (YourChanged, ) tokYour
noTokens = all (not . fst . snd) (diffMine ++ diffYour)
go
| tokOrigsMine /= tokOrigsYour =
error "Internal failure: merge origins differ"
| mergeIgnoreWhitespace mo && noTokens = map (Keep, ) tokOrigsMine
| all ((Keep ==) . fst) diffYour = diffMine -- only mine changed
| all ((Keep ==) . fst) diffMine = diffYour -- only your changed
| diffMine == diffYour = diffMine -- false conflict
| not (mergeForceWhitespace mo) && noTokens =
if mergeKeepWhitespace mo
then diffMine
else diffYour -- conflict happened, but not on significant tokens
| otherwise = conflict -- true conflict

View file

@ -11,12 +11,11 @@ import qualified Data.ByteString.Lazy as BL
import Data.ByteString.UTF8 (fromString, toString) import Data.ByteString.UTF8 (fromString, toString)
import Data.List import Data.List
import Data.Maybe import Data.Maybe
import Data.Monoid
import qualified Data.Vector as V import qualified Data.Vector as V
import Options.Applicative import Options.Applicative
import Substr
import Text.Regex.TDFA import Text.Regex.TDFA
import Types import Types
import Substr
data RedfaOption data RedfaOption
= RedfaOptionRules [BS] = RedfaOptionRules [BS]
@ -79,7 +78,7 @@ redfaRuleStringToRuleStr s =
in go in go
unescapeRegex :: MonadFail m => BS -> m BS unescapeRegex :: MonadFail m => BS -> m BS
unescapeRegex s = BL.toStrict . BB.toLazyByteString <$> unescape' s unescapeRegex s' = BL.toStrict . BB.toLazyByteString <$> unescape' s'
where where
unescape' :: MonadFail m => BS -> m BB.Builder unescape' :: MonadFail m => BS -> m BB.Builder
unescape' s unescape' s
@ -112,8 +111,7 @@ unescapeRegex s = BL.toStrict . BB.toLazyByteString <$> unescape' s
redfaPrepareRules :: RedfaOption -> IO RedfaSpec redfaPrepareRules :: RedfaOption -> IO RedfaSpec
redfaPrepareRules opt = do redfaPrepareRules opt = do
(states, jumps, regexes, isToken) <- (states, jumps, regexes, isToken) <-
unzip4 . mapMaybe redfaRuleStringToRuleStr <$> unzip4 . mapMaybe redfaRuleStringToRuleStr <$> redfaOptionToRuleStrings opt
redfaOptionToRuleStrings opt
uRegexes <- traverse unescapeRegex regexes uRegexes <- traverse unescapeRegex regexes
startREs <- traverse (makeRegexM . (fromString "\\`" <>)) uRegexes startREs <- traverse (makeRegexM . (fromString "\\`" <>)) uRegexes
midREs <- traverse (makeRegexM . (fromString "\\`(.|\n)" <>)) uRegexes midREs <- traverse (makeRegexM . (fromString "\\`(.|\n)" <>)) uRegexes
@ -136,13 +134,7 @@ redfaTokenize :: MonadFail m => RedfaSpec -> BS -> m [Tok]
redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 [] redfaTokenize spec s = redfaTokenize' spec s (redfaStart spec) 0 []
redfaTokenize' :: redfaTokenize' ::
MonadFail m MonadFail m => RedfaSpec -> BS -> Int -> Int -> [Int] -> m [Tok]
=> RedfaSpec
-> BS
-> Int
-> Int
-> [Int]
-> m [Tok]
redfaTokenize' spec s state off visited redfaTokenize' spec s state off visited
| off >= B.length s = pure [] | off >= B.length s = pure []
| otherwise = | otherwise =

View file

@ -1,7 +1,7 @@
module Substr where module Substr where
import Types
import qualified Data.ByteString as B import qualified Data.ByteString as B
import Types
substr :: Int -> Int -> BS -> BS substr :: Int -> Int -> BS -> BS
substr b l = B.take l . B.drop b substr b l = B.take l . B.drop b

View file

@ -5,6 +5,7 @@ import Data.Vector
type BS = ByteString type BS = ByteString
{- TODO: all this needs to get unboxed -}
type Tok = (Bool, BS) type Tok = (Bool, BS)
type TV = Vector Tok type TV = Vector Tok
@ -21,3 +22,9 @@ data Op
| Original | Original
| YourChanged | YourChanged
deriving (Show, Eq) deriving (Show, Eq)
data Origin
= Stable
| Mine
| Your
deriving (Show, Eq)

View file

@ -7,6 +7,7 @@ import Options.Applicative
adiffVersion :: String adiffVersion :: String
adiffVersion = VERSION_adiff adiffVersion = VERSION_adiff
versionOption :: String -> Parser (a -> a)
versionOption prog = versionOption prog =
infoOption infoOption
(prog <> (prog <>