clean up, support external tokenizers
This commit is contained in:
parent
396e5cff54
commit
79977cdf4b
89
Main.hs
89
Main.hs
|
|
@ -5,27 +5,36 @@ module Main where
|
|||
import Control.Exception
|
||||
import Control.Monad
|
||||
import Data.Bool
|
||||
import Data.Char
|
||||
import Data.Foldable
|
||||
import Data.List
|
||||
import Data.Maybe
|
||||
import Data.Traversable
|
||||
import Opts
|
||||
import System.Environment
|
||||
import System.Exit
|
||||
import System.FilePath
|
||||
import System.IO
|
||||
import System.IO.Temp
|
||||
import System.Process
|
||||
|
||||
import qualified Toks
|
||||
|
||||
import Debug.Trace
|
||||
{-
|
||||
- interface to other programs
|
||||
-}
|
||||
diffProg = fromMaybe "diff" <$> lookupEnv "WERGE_DIFF"
|
||||
|
||||
gitProg = fromMaybe "git" <$> lookupEnv "WERGE_GIT"
|
||||
|
||||
bracketFile path mode = bracket (openFile path mode) hClose
|
||||
|
||||
-- TODO: the diff w
|
||||
rundiff f1 f2 out = do
|
||||
diff <- diffProg
|
||||
st <-
|
||||
withFile out WriteMode $ \oh ->
|
||||
bracketFile out WriteMode $ \oh ->
|
||||
withCreateProcess
|
||||
(proc
|
||||
"diff" -- TODO: from WERGE_DIFF env
|
||||
diff
|
||||
[ "--text"
|
||||
, "--new-line-format=+%L"
|
||||
, "--old-line-format=-%L"
|
||||
|
|
@ -33,27 +42,28 @@ rundiff f1 f2 out = do
|
|||
, f1
|
||||
, f2
|
||||
])
|
||||
{std_in = NoStream, std_out = UseHandle oh, std_err = Inherit} $ \_ _ _ ->
|
||||
waitForProcess
|
||||
{std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess
|
||||
when (st == ExitFailure 2) $ error "diff failed"
|
||||
unless (st `elem` [ExitSuccess, ExitFailure 1])
|
||||
$ error "diff failed for unknown reason (is GNU diffutils installed?)"
|
||||
|
||||
gitRepoRelRoot = do
|
||||
git <- gitProg
|
||||
(path, st) <-
|
||||
withCreateProcess
|
||||
(proc "git" ["rev-parse", "--show-cdup"])
|
||||
{std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p ->
|
||||
(proc git ["rev-parse", "--show-cdup"])
|
||||
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
|
||||
(,) <$> hGetContents' oh <*> waitForProcess p
|
||||
unless (st == ExitSuccess) $ error "git failed"
|
||||
let [p] = lines path
|
||||
pure p
|
||||
|
||||
gitUnmerged = do
|
||||
git <- gitProg
|
||||
(paths, st) <-
|
||||
withCreateProcess
|
||||
(proc "git" ["status", "--porcelain=v1"])
|
||||
{std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p ->
|
||||
(proc git ["status", "--porcelain=v1"])
|
||||
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
|
||||
(,)
|
||||
<$> (map (drop 3) . filter ("UU " `isPrefixOf`) . lines
|
||||
<$> hGetContents' oh)
|
||||
|
|
@ -61,11 +71,12 @@ gitUnmerged = do
|
|||
unless (st == ExitSuccess) $ error "git failed"
|
||||
pure paths
|
||||
|
||||
gitCheckoutMOY u my old your = do
|
||||
gitCheckoutMOY cfg u my old your = do
|
||||
git <- gitProg
|
||||
(paths, st) <-
|
||||
withCreateProcess
|
||||
(proc "git" ["ls-files", "--unmerged", "--", u])
|
||||
{std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p ->
|
||||
(proc git ["ls-files", "--unmerged", "--", u])
|
||||
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p ->
|
||||
(,)
|
||||
<$> (sortOn snd
|
||||
. map ((\[a, b] -> (a, b)) . take 2 . drop 1 . words)
|
||||
|
|
@ -77,8 +88,8 @@ gitCheckoutMOY u my old your = do
|
|||
st <-
|
||||
withCreateProcess
|
||||
(proc "git" ["cat-file", "blob", hash])
|
||||
{std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just ho) _ p -> do
|
||||
hGetContents ho >>= writeFile path . Toks.split -- TODO cfg
|
||||
{std_in = NoStream, std_out = CreatePipe} $ \_ (Just ho) _ p -> do
|
||||
hSplitToFile cfg ho path
|
||||
waitForProcess p
|
||||
unless (st == ExitSuccess) . error
|
||||
$ "failed checking out " ++ u ++ " from blob " ++ hash
|
||||
|
|
@ -88,10 +99,30 @@ gitCheckoutMOY u my old your = do
|
|||
_ -> error $ "bad data from ls-files for unmerged " ++ u
|
||||
|
||||
gitAdd path = do
|
||||
traceM $ "adding " ++ path
|
||||
st <- rawSystem "git" ["add", "--", path]
|
||||
git <- gitProg
|
||||
st <- rawSystem git ["add", "--", path]
|
||||
unless (st == ExitSuccess) $ error "git-add failed"
|
||||
|
||||
{-
|
||||
- configurable splitting
|
||||
-}
|
||||
hSplitToFile cfg h path =
|
||||
case cfgTokenizer cfg of
|
||||
TokenizeCharCategory -> internal Toks.splitCategory
|
||||
TokenizeCharCategorySimple -> internal Toks.splitSimple
|
||||
TokenizeFilter cmd -> do
|
||||
st <-
|
||||
bracketFile path WriteMode $ \ho ->
|
||||
withCreateProcess
|
||||
(shell cmd) {std_in = UseHandle h, std_out = UseHandle ho} $ \_ _ _ ->
|
||||
waitForProcess
|
||||
unless (st == ExitSuccess) $ error "tokenize filter failed"
|
||||
where
|
||||
internal s = hGetContents h >>= writeFile path . Toks.toFile . s
|
||||
|
||||
{-
|
||||
- merge algorithms
|
||||
-}
|
||||
data Op
|
||||
= Del
|
||||
| Keep
|
||||
|
|
@ -100,10 +131,10 @@ data Op
|
|||
|
||||
pdiff path = map go . lines <$> readFile path
|
||||
where
|
||||
go [] = error "empty line from diff"
|
||||
go ('-':s) = (Del, s)
|
||||
go (' ':s) = (Keep, s)
|
||||
go ('+':s) = (Add, s)
|
||||
go [] = error "unexpected output from diff"
|
||||
|
||||
data Merged
|
||||
= Ok [String]
|
||||
|
|
@ -204,29 +235,31 @@ resolveSpaces _ x = x
|
|||
merge cfg@Config {..} ms ys =
|
||||
regroup
|
||||
. map (resolve cfg)
|
||||
. traceShowId
|
||||
. regroup
|
||||
. bool id (concatMap zeal) cfgZealous
|
||||
. expand cfgContext
|
||||
. regroup
|
||||
$ align ms ys
|
||||
|
||||
{-
|
||||
- front-end
|
||||
-}
|
||||
format :: Config -> Handle -> [Merged] -> IO Bool
|
||||
format Config {..} h = go False
|
||||
where
|
||||
go c [] = pure c
|
||||
go c (Ok x:xs) = do
|
||||
hPutStr h (Toks.glueToks x)
|
||||
hPutStr h (Toks.glue x)
|
||||
go c xs
|
||||
go c (Conflict m o y:xs) = do
|
||||
hPutStr h
|
||||
$ mconcat
|
||||
[ cfgLabelStart
|
||||
, Toks.glueToks m
|
||||
, Toks.glue m
|
||||
, cfgLabelMyOld
|
||||
, Toks.glueToks o
|
||||
, Toks.glue o
|
||||
, cfgLabelOldYour
|
||||
, Toks.glueToks y
|
||||
, Toks.glue y
|
||||
, cfgLabelEnd
|
||||
]
|
||||
go True xs
|
||||
|
|
@ -236,7 +269,7 @@ runCmd CmdDiff3 {..} cfg =
|
|||
let [fMy, fOld, fYour, fdMy, fdYour] =
|
||||
map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"]
|
||||
for_ [(d3my, fMy), (d3old, fOld), (d3your, fYour)] $ \(path, tmp) ->
|
||||
readFile path >>= writeFile tmp . Toks.split -- TODO cfg
|
||||
bracketFile path ReadMode $ \h -> hSplitToFile cfg h tmp
|
||||
rundiff fOld fMy fdMy
|
||||
rundiff fOld fYour fdYour
|
||||
conflicted <-
|
||||
|
|
@ -255,15 +288,13 @@ runCmd CmdGitMerge {..} cfg = do
|
|||
withSystemTempDirectory "werge-git" $ \workdir -> do
|
||||
let [fMy, fOld, fYour, fdMy, fdYour] =
|
||||
map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"]
|
||||
gitCheckoutMOY u fMy fOld fYour
|
||||
gitCheckoutMOY cfg u fMy fOld fYour
|
||||
rundiff fOld fMy fdMy
|
||||
rundiff fOld fYour fdYour
|
||||
readFile u >>= writeFile (u ++ ".werge-backup")
|
||||
conflict <-
|
||||
withFile u WriteMode $ \h ->
|
||||
bracketFile u WriteMode $ \h ->
|
||||
merge cfg <$> pdiff fdMy <*> pdiff fdYour >>= format cfg h
|
||||
traceShowM conflict
|
||||
traceShowM gmDoAdd
|
||||
unless conflict $ when gmDoAdd $ gitAdd u
|
||||
pure conflict
|
||||
if or conflicts
|
||||
|
|
|
|||
14
Opts.hs
14
Opts.hs
|
|
@ -9,32 +9,32 @@ import Options.Applicative
|
|||
import Paths_werge (version)
|
||||
|
||||
data Tokenizer
|
||||
= TokenizerFilter String
|
||||
| TokenizeCharClass
|
||||
| TokenizeCharClassSimple
|
||||
= TokenizeFilter String
|
||||
| TokenizeCharCategory
|
||||
| TokenizeCharCategorySimple
|
||||
deriving (Show)
|
||||
|
||||
tokenizer =
|
||||
asum
|
||||
[ TokenizerFilter
|
||||
[ TokenizeFilter
|
||||
<$> strOption
|
||||
(long "tok-filter"
|
||||
<> short 'F'
|
||||
<> metavar "FILTER"
|
||||
<> help "external program to separate the text to tokens")
|
||||
, flag'
|
||||
TokenizeCharClassSimple
|
||||
TokenizeCharCategorySimple
|
||||
(long "simple-tokens"
|
||||
<> short 'i'
|
||||
<> help
|
||||
"use wider character class to separate the tokens (results in larger tokens and ignores case)")
|
||||
, flag'
|
||||
TokenizeCharClass
|
||||
TokenizeCharCategory
|
||||
(long "full-tokens"
|
||||
<> short 'I'
|
||||
<> help
|
||||
"separate characters by all known character classes (default)")
|
||||
, pure TokenizeCharClass
|
||||
, pure TokenizeCharCategory
|
||||
]
|
||||
|
||||
data Spaces
|
||||
|
|
|
|||
|
|
@ -22,6 +22,11 @@ Better docs is WIP
|
|||
cabal install
|
||||
```
|
||||
|
||||
Running of `werge` requires a working installation of `diff` compatible
|
||||
with the one from [GNU diffutils](https://www.gnu.org/software/diffutils/). You
|
||||
may set up a path to such `diff` (or a wrapper script) via environment variable
|
||||
`WERGE_DIFF`.
|
||||
|
||||
## Help & features
|
||||
|
||||
```
|
||||
|
|
|
|||
29
Toks.hs
29
Toks.hs
|
|
@ -26,11 +26,28 @@ unmarkSpace x = error "unwat"
|
|||
space ('.':_) = True
|
||||
space _ = False
|
||||
|
||||
split =
|
||||
unlines
|
||||
. map (concatMap escape . markSpace)
|
||||
. groupBy ((==) `on` generalCategory)
|
||||
joinSpaces [] = []
|
||||
joinSpaces (a@('.':as):xs) =
|
||||
case joinSpaces xs of
|
||||
(('.':bs):xs') -> ('.' : (as ++ bs)) : xs'
|
||||
xs' -> a : xs'
|
||||
joinSpaces (x:xs) = x : joinSpaces xs
|
||||
|
||||
glueToks = concatMap (unmarkSpace . unescape)
|
||||
splitCategory = make . groupBy ((==) `on` generalCategory)
|
||||
|
||||
glue = glueToks . lines
|
||||
simpleCategory c
|
||||
| isSpace c = 0
|
||||
| isAlpha c = 1
|
||||
| isNumber c = 2
|
||||
| otherwise = 3
|
||||
|
||||
splitSimple = make . groupBy ((==) `on` simpleCategory)
|
||||
|
||||
make = map (concatMap escape . markSpace)
|
||||
|
||||
glue :: [String] -> String
|
||||
glue = concatMap (unmarkSpace . unescape)
|
||||
|
||||
fromFile = lines
|
||||
|
||||
toFile = unlines
|
||||
|
|
|
|||
Loading…
Reference in a new issue