clean up, support external tokenizers
This commit is contained in:
		
							parent
							
								
									396e5cff54
								
							
						
					
					
						commit
						79977cdf4b
					
				
							
								
								
									
										89
									
								
								Main.hs
									
									
									
									
									
								
							
							
						
						
									
										89
									
								
								Main.hs
									
									
									
									
									
								
							|  | @ -5,27 +5,36 @@ module Main where | ||||||
| import Control.Exception | import Control.Exception | ||||||
| import Control.Monad | import Control.Monad | ||||||
| import Data.Bool | import Data.Bool | ||||||
| import Data.Char |  | ||||||
| import Data.Foldable | import Data.Foldable | ||||||
| import Data.List | import Data.List | ||||||
|  | import Data.Maybe | ||||||
| import Data.Traversable | import Data.Traversable | ||||||
| import Opts | import Opts | ||||||
|  | import System.Environment | ||||||
| import System.Exit | import System.Exit | ||||||
| import System.FilePath | import System.FilePath | ||||||
| import System.IO | import System.IO | ||||||
| import System.IO.Temp | import System.IO.Temp | ||||||
| import System.Process | import System.Process | ||||||
|  | 
 | ||||||
| import qualified Toks | import qualified Toks | ||||||
| 
 | 
 | ||||||
| import Debug.Trace | {- | ||||||
|  |  - interface to other programs | ||||||
|  |  -} | ||||||
|  | diffProg = fromMaybe "diff" <$> lookupEnv "WERGE_DIFF" | ||||||
|  | 
 | ||||||
|  | gitProg = fromMaybe "git" <$> lookupEnv "WERGE_GIT" | ||||||
|  | 
 | ||||||
|  | bracketFile path mode = bracket (openFile path mode) hClose | ||||||
| 
 | 
 | ||||||
| -- TODO: the diff w |  | ||||||
| rundiff f1 f2 out = do | rundiff f1 f2 out = do | ||||||
|  |   diff <- diffProg | ||||||
|   st <- |   st <- | ||||||
|     withFile out WriteMode $ \oh -> |     bracketFile out WriteMode $ \oh -> | ||||||
|       withCreateProcess |       withCreateProcess | ||||||
|         (proc |         (proc | ||||||
|            "diff" -- TODO: from WERGE_DIFF env |            diff | ||||||
|            [ "--text" |            [ "--text" | ||||||
|            , "--new-line-format=+%L" |            , "--new-line-format=+%L" | ||||||
|            , "--old-line-format=-%L" |            , "--old-line-format=-%L" | ||||||
|  | @ -33,27 +42,28 @@ rundiff f1 f2 out = do | ||||||
|            , f1 |            , f1 | ||||||
|            , f2 |            , f2 | ||||||
|            ]) |            ]) | ||||||
|           {std_in = NoStream, std_out = UseHandle oh, std_err = Inherit} $ \_ _ _ -> |           {std_in = NoStream, std_out = UseHandle oh} $ \_ _ _ -> waitForProcess | ||||||
|         waitForProcess |  | ||||||
|   when (st == ExitFailure 2) $ error "diff failed" |   when (st == ExitFailure 2) $ error "diff failed" | ||||||
|   unless (st `elem` [ExitSuccess, ExitFailure 1]) |   unless (st `elem` [ExitSuccess, ExitFailure 1]) | ||||||
|     $ error "diff failed for unknown reason (is GNU diffutils installed?)" |     $ error "diff failed for unknown reason (is GNU diffutils installed?)" | ||||||
| 
 | 
 | ||||||
| gitRepoRelRoot = do | gitRepoRelRoot = do | ||||||
|  |   git <- gitProg | ||||||
|   (path, st) <- |   (path, st) <- | ||||||
|     withCreateProcess |     withCreateProcess | ||||||
|       (proc "git" ["rev-parse", "--show-cdup"]) |       (proc git ["rev-parse", "--show-cdup"]) | ||||||
|         {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p -> |         {std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p -> | ||||||
|       (,) <$> hGetContents' oh <*> waitForProcess p |       (,) <$> hGetContents' oh <*> waitForProcess p | ||||||
|   unless (st == ExitSuccess) $ error "git failed" |   unless (st == ExitSuccess) $ error "git failed" | ||||||
|   let [p] = lines path |   let [p] = lines path | ||||||
|   pure p |   pure p | ||||||
| 
 | 
 | ||||||
| gitUnmerged = do | gitUnmerged = do | ||||||
|  |   git <- gitProg | ||||||
|   (paths, st) <- |   (paths, st) <- | ||||||
|     withCreateProcess |     withCreateProcess | ||||||
|       (proc "git" ["status", "--porcelain=v1"]) |       (proc git ["status", "--porcelain=v1"]) | ||||||
|         {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p -> |         {std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p -> | ||||||
|       (,) |       (,) | ||||||
|         <$> (map (drop 3) . filter ("UU " `isPrefixOf`) . lines |         <$> (map (drop 3) . filter ("UU " `isPrefixOf`) . lines | ||||||
|                <$> hGetContents' oh) |                <$> hGetContents' oh) | ||||||
|  | @ -61,11 +71,12 @@ gitUnmerged = do | ||||||
|   unless (st == ExitSuccess) $ error "git failed" |   unless (st == ExitSuccess) $ error "git failed" | ||||||
|   pure paths |   pure paths | ||||||
| 
 | 
 | ||||||
| gitCheckoutMOY u my old your = do | gitCheckoutMOY cfg u my old your = do | ||||||
|  |   git <- gitProg | ||||||
|   (paths, st) <- |   (paths, st) <- | ||||||
|     withCreateProcess |     withCreateProcess | ||||||
|       (proc "git" ["ls-files", "--unmerged", "--", u]) |       (proc git ["ls-files", "--unmerged", "--", u]) | ||||||
|         {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just oh) _ p -> |         {std_in = NoStream, std_out = CreatePipe} $ \_ (Just oh) _ p -> | ||||||
|       (,) |       (,) | ||||||
|         <$> (sortOn snd |         <$> (sortOn snd | ||||||
|                . map ((\[a, b] -> (a, b)) . take 2 . drop 1 . words) |                . map ((\[a, b] -> (a, b)) . take 2 . drop 1 . words) | ||||||
|  | @ -77,8 +88,8 @@ gitCheckoutMOY u my old your = do | ||||||
|         st <- |         st <- | ||||||
|           withCreateProcess |           withCreateProcess | ||||||
|             (proc "git" ["cat-file", "blob", hash]) |             (proc "git" ["cat-file", "blob", hash]) | ||||||
|               {std_in = NoStream, std_out = CreatePipe, std_err = Inherit} $ \_ (Just ho) _ p -> do |               {std_in = NoStream, std_out = CreatePipe} $ \_ (Just ho) _ p -> do | ||||||
|             hGetContents ho >>= writeFile path . Toks.split -- TODO cfg |             hSplitToFile cfg ho path | ||||||
|             waitForProcess p |             waitForProcess p | ||||||
|         unless (st == ExitSuccess) . error |         unless (st == ExitSuccess) . error | ||||||
|           $ "failed checking out " ++ u ++ " from blob " ++ hash |           $ "failed checking out " ++ u ++ " from blob " ++ hash | ||||||
|  | @ -88,10 +99,30 @@ gitCheckoutMOY u my old your = do | ||||||
|     _ -> error $ "bad data from ls-files for unmerged " ++ u |     _ -> error $ "bad data from ls-files for unmerged " ++ u | ||||||
| 
 | 
 | ||||||
| gitAdd path = do | gitAdd path = do | ||||||
|   traceM $ "adding " ++ path |   git <- gitProg | ||||||
|   st <- rawSystem "git" ["add", "--", path] |   st <- rawSystem git ["add", "--", path] | ||||||
|   unless (st == ExitSuccess) $ error "git-add failed" |   unless (st == ExitSuccess) $ error "git-add failed" | ||||||
| 
 | 
 | ||||||
|  | {- | ||||||
|  |  - configurable splitting | ||||||
|  |  -} | ||||||
|  | hSplitToFile cfg h path = | ||||||
|  |   case cfgTokenizer cfg of | ||||||
|  |     TokenizeCharCategory -> internal Toks.splitCategory | ||||||
|  |     TokenizeCharCategorySimple -> internal Toks.splitSimple | ||||||
|  |     TokenizeFilter cmd -> do | ||||||
|  |       st <- | ||||||
|  |         bracketFile path WriteMode $ \ho -> | ||||||
|  |           withCreateProcess | ||||||
|  |             (shell cmd) {std_in = UseHandle h, std_out = UseHandle ho} $ \_ _ _ -> | ||||||
|  |             waitForProcess | ||||||
|  |       unless (st == ExitSuccess) $ error "tokenize filter failed" | ||||||
|  |   where | ||||||
|  |     internal s = hGetContents h >>= writeFile path . Toks.toFile . s | ||||||
|  | 
 | ||||||
|  | {- | ||||||
|  |  - merge algorithms | ||||||
|  |  -} | ||||||
| data Op | data Op | ||||||
|   = Del |   = Del | ||||||
|   | Keep |   | Keep | ||||||
|  | @ -100,10 +131,10 @@ data Op | ||||||
| 
 | 
 | ||||||
| pdiff path = map go . lines <$> readFile path | pdiff path = map go . lines <$> readFile path | ||||||
|   where |   where | ||||||
|     go [] = error "empty line from diff" |  | ||||||
|     go ('-':s) = (Del, s) |     go ('-':s) = (Del, s) | ||||||
|     go (' ':s) = (Keep, s) |     go (' ':s) = (Keep, s) | ||||||
|     go ('+':s) = (Add, s) |     go ('+':s) = (Add, s) | ||||||
|  |     go [] = error "unexpected output from diff" | ||||||
| 
 | 
 | ||||||
| data Merged | data Merged | ||||||
|   = Ok [String] |   = Ok [String] | ||||||
|  | @ -204,29 +235,31 @@ resolveSpaces _ x = x | ||||||
| merge cfg@Config {..} ms ys = | merge cfg@Config {..} ms ys = | ||||||
|   regroup |   regroup | ||||||
|     . map (resolve cfg) |     . map (resolve cfg) | ||||||
|     . traceShowId |  | ||||||
|     . regroup |     . regroup | ||||||
|     . bool id (concatMap zeal) cfgZealous |     . bool id (concatMap zeal) cfgZealous | ||||||
|     . expand cfgContext |     . expand cfgContext | ||||||
|     . regroup |     . regroup | ||||||
|     $ align ms ys |     $ align ms ys | ||||||
| 
 | 
 | ||||||
|  | {- | ||||||
|  |  - front-end | ||||||
|  |  -} | ||||||
| format :: Config -> Handle -> [Merged] -> IO Bool | format :: Config -> Handle -> [Merged] -> IO Bool | ||||||
| format Config {..} h = go False | format Config {..} h = go False | ||||||
|   where |   where | ||||||
|     go c [] = pure c |     go c [] = pure c | ||||||
|     go c (Ok x:xs) = do |     go c (Ok x:xs) = do | ||||||
|       hPutStr h (Toks.glueToks x) |       hPutStr h (Toks.glue x) | ||||||
|       go c xs |       go c xs | ||||||
|     go c (Conflict m o y:xs) = do |     go c (Conflict m o y:xs) = do | ||||||
|       hPutStr h |       hPutStr h | ||||||
|         $ mconcat |         $ mconcat | ||||||
|             [ cfgLabelStart |             [ cfgLabelStart | ||||||
|             , Toks.glueToks m |             , Toks.glue m | ||||||
|             , cfgLabelMyOld |             , cfgLabelMyOld | ||||||
|             , Toks.glueToks o |             , Toks.glue o | ||||||
|             , cfgLabelOldYour |             , cfgLabelOldYour | ||||||
|             , Toks.glueToks y |             , Toks.glue y | ||||||
|             , cfgLabelEnd |             , cfgLabelEnd | ||||||
|             ] |             ] | ||||||
|       go True xs |       go True xs | ||||||
|  | @ -236,7 +269,7 @@ runCmd CmdDiff3 {..} cfg = | ||||||
|     let [fMy, fOld, fYour, fdMy, fdYour] = |     let [fMy, fOld, fYour, fdMy, fdYour] = | ||||||
|           map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"] |           map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"] | ||||||
|     for_ [(d3my, fMy), (d3old, fOld), (d3your, fYour)] $ \(path, tmp) -> |     for_ [(d3my, fMy), (d3old, fOld), (d3your, fYour)] $ \(path, tmp) -> | ||||||
|       readFile path >>= writeFile tmp . Toks.split -- TODO cfg |       bracketFile path ReadMode $ \h -> hSplitToFile cfg h tmp | ||||||
|     rundiff fOld fMy fdMy |     rundiff fOld fMy fdMy | ||||||
|     rundiff fOld fYour fdYour |     rundiff fOld fYour fdYour | ||||||
|     conflicted <- |     conflicted <- | ||||||
|  | @ -255,15 +288,13 @@ runCmd CmdGitMerge {..} cfg = do | ||||||
|       withSystemTempDirectory "werge-git" $ \workdir -> do |       withSystemTempDirectory "werge-git" $ \workdir -> do | ||||||
|         let [fMy, fOld, fYour, fdMy, fdYour] = |         let [fMy, fOld, fYour, fdMy, fdYour] = | ||||||
|               map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"] |               map (workdir </>) ["my", "old", "your", "mydiff", "yourdiff"] | ||||||
|         gitCheckoutMOY u fMy fOld fYour |         gitCheckoutMOY cfg u fMy fOld fYour | ||||||
|         rundiff fOld fMy fdMy |         rundiff fOld fMy fdMy | ||||||
|         rundiff fOld fYour fdYour |         rundiff fOld fYour fdYour | ||||||
|         readFile u >>= writeFile (u ++ ".werge-backup") |         readFile u >>= writeFile (u ++ ".werge-backup") | ||||||
|         conflict <- |         conflict <- | ||||||
|           withFile u WriteMode $ \h -> |           bracketFile u WriteMode $ \h -> | ||||||
|             merge cfg <$> pdiff fdMy <*> pdiff fdYour >>= format cfg h |             merge cfg <$> pdiff fdMy <*> pdiff fdYour >>= format cfg h | ||||||
|         traceShowM conflict |  | ||||||
|         traceShowM gmDoAdd |  | ||||||
|         unless conflict $ when gmDoAdd $ gitAdd u |         unless conflict $ when gmDoAdd $ gitAdd u | ||||||
|         pure conflict |         pure conflict | ||||||
|   if or conflicts |   if or conflicts | ||||||
|  |  | ||||||
							
								
								
									
										14
									
								
								Opts.hs
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								Opts.hs
									
									
									
									
									
								
							|  | @ -9,32 +9,32 @@ import Options.Applicative | ||||||
| import Paths_werge (version) | import Paths_werge (version) | ||||||
| 
 | 
 | ||||||
| data Tokenizer | data Tokenizer | ||||||
|   = TokenizerFilter String |   = TokenizeFilter String | ||||||
|   | TokenizeCharClass |   | TokenizeCharCategory | ||||||
|   | TokenizeCharClassSimple |   | TokenizeCharCategorySimple | ||||||
|   deriving (Show) |   deriving (Show) | ||||||
| 
 | 
 | ||||||
| tokenizer = | tokenizer = | ||||||
|   asum |   asum | ||||||
|     [ TokenizerFilter |     [ TokenizeFilter | ||||||
|         <$> strOption |         <$> strOption | ||||||
|               (long "tok-filter" |               (long "tok-filter" | ||||||
|                  <> short 'F' |                  <> short 'F' | ||||||
|                  <> metavar "FILTER" |                  <> metavar "FILTER" | ||||||
|                  <> help "external program to separate the text to tokens") |                  <> help "external program to separate the text to tokens") | ||||||
|     , flag' |     , flag' | ||||||
|         TokenizeCharClassSimple |         TokenizeCharCategorySimple | ||||||
|         (long "simple-tokens" |         (long "simple-tokens" | ||||||
|            <> short 'i' |            <> short 'i' | ||||||
|            <> help |            <> help | ||||||
|                 "use wider character class to separate the tokens (results in larger tokens and ignores case)") |                 "use wider character class to separate the tokens (results in larger tokens and ignores case)") | ||||||
|     , flag' |     , flag' | ||||||
|         TokenizeCharClass |         TokenizeCharCategory | ||||||
|         (long "full-tokens" |         (long "full-tokens" | ||||||
|            <> short 'I' |            <> short 'I' | ||||||
|            <> help |            <> help | ||||||
|                 "separate characters by all known character classes (default)") |                 "separate characters by all known character classes (default)") | ||||||
|     , pure TokenizeCharClass |     , pure TokenizeCharCategory | ||||||
|     ] |     ] | ||||||
| 
 | 
 | ||||||
| data Spaces | data Spaces | ||||||
|  |  | ||||||
|  | @ -22,6 +22,11 @@ Better docs is WIP | ||||||
| cabal install | cabal install | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
|  | Running of `werge` requires a working installation of `diff` compatible | ||||||
|  | with the one from [GNU diffutils](https://www.gnu.org/software/diffutils/). You | ||||||
|  | may set up a path to such `diff` (or a wrapper script) via environment variable | ||||||
|  | `WERGE_DIFF`. | ||||||
|  | 
 | ||||||
| ## Help & features | ## Help & features | ||||||
| 
 | 
 | ||||||
| ``` | ``` | ||||||
|  |  | ||||||
							
								
								
									
										29
									
								
								Toks.hs
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								Toks.hs
									
									
									
									
									
								
							|  | @ -26,11 +26,28 @@ unmarkSpace x = error "unwat" | ||||||
| space ('.':_) = True | space ('.':_) = True | ||||||
| space _ = False | space _ = False | ||||||
| 
 | 
 | ||||||
| split = | joinSpaces [] = [] | ||||||
|   unlines | joinSpaces (a@('.':as):xs) = | ||||||
|     . map (concatMap escape . markSpace) |   case joinSpaces xs of | ||||||
|     . groupBy ((==) `on` generalCategory) |     (('.':bs):xs') -> ('.' : (as ++ bs)) : xs' | ||||||
|  |     xs' -> a : xs' | ||||||
|  | joinSpaces (x:xs) = x : joinSpaces xs | ||||||
| 
 | 
 | ||||||
| glueToks = concatMap (unmarkSpace . unescape) | splitCategory = make . groupBy ((==) `on` generalCategory) | ||||||
| 
 | 
 | ||||||
| glue = glueToks . lines | simpleCategory c | ||||||
|  |   | isSpace c = 0 | ||||||
|  |   | isAlpha c = 1 | ||||||
|  |   | isNumber c = 2 | ||||||
|  |   | otherwise = 3 | ||||||
|  | 
 | ||||||
|  | splitSimple = make . groupBy ((==) `on` simpleCategory) | ||||||
|  | 
 | ||||||
|  | make = map (concatMap escape . markSpace) | ||||||
|  | 
 | ||||||
|  | glue :: [String] -> String | ||||||
|  | glue = concatMap (unmarkSpace . unescape) | ||||||
|  | 
 | ||||||
|  | fromFile = lines | ||||||
|  | 
 | ||||||
|  | toFile = unlines | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue