some working version
This commit is contained in:
		
							parent
							
								
									bd4e79e064
								
							
						
					
					
						commit
						64b5ca7ea7
					
				
							
								
								
									
										231
									
								
								src/Diff.hs
									
									
									
									
									
								
							
							
						
						
									
										231
									
								
								src/Diff.hs
									
									
									
									
									
								
							| 
						 | 
					@ -32,31 +32,240 @@ data Op
 | 
				
			||||||
  | Add
 | 
					  | Add
 | 
				
			||||||
  deriving (Show, Eq)
 | 
					  deriving (Show, Eq)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					data DiffEnv =
 | 
				
			||||||
 | 
					  DiffEnv
 | 
				
			||||||
 | 
					    { deD1 :: BS
 | 
				
			||||||
 | 
					    , deD2 :: BS
 | 
				
			||||||
 | 
					    , deT1 :: V.Vector Tok
 | 
				
			||||||
 | 
					    , deT2 :: V.Vector Tok
 | 
				
			||||||
 | 
					    , deS :: Int
 | 
				
			||||||
 | 
					    , deE :: Int
 | 
				
			||||||
 | 
					    , deL :: Int
 | 
				
			||||||
 | 
					    , deW :: Int
 | 
				
			||||||
 | 
					    , deA :: Int
 | 
				
			||||||
 | 
					    , deB :: Int
 | 
				
			||||||
 | 
					    , deVS :: V.Vector Int
 | 
				
			||||||
 | 
					    , deVE :: V.Vector Int
 | 
				
			||||||
 | 
					    , deTrans :: Bool
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  deriving (Show)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
substr b e = B.take (e - b) . B.drop b
 | 
					substr b e = B.take (e - b) . B.drop b
 | 
				
			||||||
 | 
					
 | 
				
			||||||
toksMatch d1 d2 t1 t2 x y =
 | 
					toksMatch x y DiffEnv {deT1 = t1, deT2 = t2, deD1 = d1, deD2 = d2} =
 | 
				
			||||||
  let (tok1, (b1, e1)) = t1 V.! x
 | 
					  let (isTok1, (b1, e1)) = t1 V.! x
 | 
				
			||||||
      (tok2, (b2, e2)) = t2 V.! y
 | 
					      (isTok2, (b2, e2)) = t2 V.! y
 | 
				
			||||||
   in tok1 == tok2 && substr b1 e1 d1 == substr b2 e2 d2
 | 
					   in isTok1 == isTok2 && substr b1 e1 d1 == substr b2 e2 d2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
stripEqToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> (Int, Int, Int)
 | 
					stripEqToks :: DiffEnv -> (Int, Int, Int)
 | 
				
			||||||
stripEqToks d1 d2 t1 t2 = (b, l1 - e, l2 - e)
 | 
					stripEqToks de@DiffEnv {deT1 = t1, deT2 = t2} = (b, l1 - e, l2 - e)
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
    l1 = V.length t1
 | 
					    l1 = V.length t1
 | 
				
			||||||
    l2 = V.length t2
 | 
					    l2 = V.length t2
 | 
				
			||||||
    firstDiff i
 | 
					    firstDiff i
 | 
				
			||||||
      | i < l1 && i < l2 && toksMatch d1 d2 t1 t2 i i = firstDiff (i + 1)
 | 
					      | i < l1 && i < l2 && toksMatch i i de = firstDiff (i + 1)
 | 
				
			||||||
      | otherwise = i
 | 
					      | otherwise = i
 | 
				
			||||||
    b = firstDiff 0
 | 
					    b = firstDiff 0
 | 
				
			||||||
    lastDiff i
 | 
					    lastDiff i
 | 
				
			||||||
      | l1 - i - 1 >= b &&
 | 
					      | l1 - i - 1 >= b &&
 | 
				
			||||||
          l2 - i - 1 >= b && toksMatch d1 d2 t1 t2 (l1 - i - 1) (l2 - i - 1) =
 | 
					          l2 - i - 1 >= b && toksMatch (l1 - i - 1) (l2 - i - 1) de =
 | 
				
			||||||
        lastDiff (i + 1)
 | 
					        lastDiff (i + 1)
 | 
				
			||||||
      | otherwise = i
 | 
					      | otherwise = i
 | 
				
			||||||
    e = lastDiff 0
 | 
					    e = lastDiff 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)]
 | 
					diffToks :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> [(Op, Tok)]
 | 
				
			||||||
diffToks d1 d2 t1 t2 = do
 | 
					diffToks d1 d2 t1 t2 =
 | 
				
			||||||
 | 
					  diffToks' $
 | 
				
			||||||
 | 
					  if V.length t1 >= V.length t2
 | 
				
			||||||
 | 
					    then DiffEnv
 | 
				
			||||||
 | 
					           { deD1 = d1
 | 
				
			||||||
 | 
					           , deD2 = d2
 | 
				
			||||||
 | 
					           , deT1 = t1
 | 
				
			||||||
 | 
					           , deT2 = t2
 | 
				
			||||||
 | 
					           , deS = 0
 | 
				
			||||||
 | 
					           , deE = V.length t1
 | 
				
			||||||
 | 
					           , deL = V.length t1
 | 
				
			||||||
 | 
					           , deW = V.length t2
 | 
				
			||||||
 | 
					           , deA = 0
 | 
				
			||||||
 | 
					           , deB = V.length t2
 | 
				
			||||||
 | 
					           , deVS = V.fromList [0 .. V.length t2]
 | 
				
			||||||
 | 
					           , deVE = V.fromList $ reverse [0 .. V.length t2]
 | 
				
			||||||
 | 
					           , deTrans = False
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					    else DiffEnv
 | 
				
			||||||
 | 
					           { deD1 = d2
 | 
				
			||||||
 | 
					           , deD2 = d1
 | 
				
			||||||
 | 
					           , deT1 = t2
 | 
				
			||||||
 | 
					           , deT2 = t1
 | 
				
			||||||
 | 
					           , deS = 0
 | 
				
			||||||
 | 
					           , deE = V.length t2
 | 
				
			||||||
 | 
					           , deL = V.length t2
 | 
				
			||||||
 | 
					           , deW = V.length t1
 | 
				
			||||||
 | 
					           , deA = 0
 | 
				
			||||||
 | 
					           , deB = V.length t1
 | 
				
			||||||
 | 
					           , deVS = V.fromList [0 .. V.length t1]
 | 
				
			||||||
 | 
					           , deVE = V.fromList $ reverse [0 .. V.length t1]
 | 
				
			||||||
 | 
					           , deTrans = True
 | 
				
			||||||
 | 
					           }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					minIndexFwd =
 | 
				
			||||||
 | 
					  V.minIndexBy
 | 
				
			||||||
 | 
					    (\x y ->
 | 
				
			||||||
 | 
					       if x <= y
 | 
				
			||||||
 | 
					         then LT
 | 
				
			||||||
 | 
					         else GT --basically normal V.minIndex
 | 
				
			||||||
 | 
					     )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					minIndexRev =
 | 
				
			||||||
 | 
					  V.minIndexBy
 | 
				
			||||||
 | 
					    (\x y ->
 | 
				
			||||||
 | 
					       if x < y
 | 
				
			||||||
 | 
					         then LT
 | 
				
			||||||
 | 
					         else GT --picks the last minimum
 | 
				
			||||||
 | 
					     )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					diffToks' :: DiffEnv -> [(Op, Tok)]
 | 
				
			||||||
 | 
					diffToks' de@DiffEnv {deS = s, deE = e, deA = a, deB = b, deTrans = trans} =
 | 
				
			||||||
 | 
					  diff
 | 
				
			||||||
 | 
					  where
 | 
				
			||||||
 | 
					    mid = (s + e) `div` 2
 | 
				
			||||||
 | 
					    vecSmid = vecS mid
 | 
				
			||||||
 | 
					    vecEmid = vecE mid
 | 
				
			||||||
 | 
					    vecS = vec -- "forward" operation
 | 
				
			||||||
 | 
					      where
 | 
				
			||||||
 | 
					        vec i
 | 
				
			||||||
 | 
					          | i == s = deVS de
 | 
				
			||||||
 | 
					          | i > s = V.fromList . upd i . vec $ pred i
 | 
				
			||||||
 | 
					          | otherwise = error "Internal bounds check failure"
 | 
				
			||||||
 | 
					        upd i v = i : go 1 i
 | 
				
			||||||
 | 
					          where
 | 
				
			||||||
 | 
					            go j up
 | 
				
			||||||
 | 
					              | j > deW de = []
 | 
				
			||||||
 | 
					              | otherwise =
 | 
				
			||||||
 | 
					                let left = v V.! j
 | 
				
			||||||
 | 
					                    upleft = v V.! pred j
 | 
				
			||||||
 | 
					                    keep
 | 
				
			||||||
 | 
					                      | toksMatch (pred i) (pred j) de = min upleft
 | 
				
			||||||
 | 
					                      | otherwise = id
 | 
				
			||||||
 | 
					                    res = keep $ min (succ up) (succ left)
 | 
				
			||||||
 | 
					                 in res : go (succ j) res
 | 
				
			||||||
 | 
					    vecE = vec -- "backward" operation
 | 
				
			||||||
 | 
					      where
 | 
				
			||||||
 | 
					        vec i
 | 
				
			||||||
 | 
					          | i == e = deVE de
 | 
				
			||||||
 | 
					          | i < e = V.fromList . reverse . upd i . vec $ succ i
 | 
				
			||||||
 | 
					          | otherwise = error "Internal bounds check failure"
 | 
				
			||||||
 | 
					        upd i v = (deL de - i) : go (pred $ deW de) (deL de - i)
 | 
				
			||||||
 | 
					          where
 | 
				
			||||||
 | 
					            go j down
 | 
				
			||||||
 | 
					              | j < 0 = []
 | 
				
			||||||
 | 
					              | otherwise =
 | 
				
			||||||
 | 
					                let right = v V.! j
 | 
				
			||||||
 | 
					                    downright = v V.! succ j
 | 
				
			||||||
 | 
					                    keep
 | 
				
			||||||
 | 
					                      | toksMatch i j de = min downright
 | 
				
			||||||
 | 
					                      | otherwise = id
 | 
				
			||||||
 | 
					                    res = keep $ min (succ down) (succ right)
 | 
				
			||||||
 | 
					                 in res : go (pred j) res
 | 
				
			||||||
 | 
					    {- Now, find the optimal point for splitting.
 | 
				
			||||||
 | 
					     -
 | 
				
			||||||
 | 
					     - Heuristics A: if 2 paths are completely same, prefer deletion first;
 | 
				
			||||||
 | 
					     - which is done by choosing the 'upper' of two possibilities
 | 
				
			||||||
 | 
					     - preferentially (or 'lower' ie 'more to the right' in case of transposed
 | 
				
			||||||
 | 
					     - matrix) -}
 | 
				
			||||||
 | 
					    opt =
 | 
				
			||||||
 | 
					      (a +) .
 | 
				
			||||||
 | 
					      (if trans
 | 
				
			||||||
 | 
					         then minIndexRev
 | 
				
			||||||
 | 
					         else minIndexFwd) $
 | 
				
			||||||
 | 
					      V.zipWith (+) (slice vecSmid) (slice vecEmid)
 | 
				
			||||||
 | 
					      where
 | 
				
			||||||
 | 
					        slice = V.slice a (succ $ b - a)
 | 
				
			||||||
 | 
					    diff
 | 
				
			||||||
 | 
					      | s > e =
 | 
				
			||||||
 | 
					        error $
 | 
				
			||||||
 | 
					        "Internal failure -- recursion off limits: " <>
 | 
				
			||||||
 | 
					        show s <> " vs " <> show e
 | 
				
			||||||
 | 
					      | s == e = map (\i -> (Add, deT2 de V.! i)) [a .. pred b]
 | 
				
			||||||
 | 
					      | succ s == e =
 | 
				
			||||||
 | 
					        let vecLS = deVS de
 | 
				
			||||||
 | 
					            vecRE = deVE de
 | 
				
			||||||
 | 
					            vecLE = vecE s
 | 
				
			||||||
 | 
					            vecRS = vecS e
 | 
				
			||||||
 | 
					            sumL = V.zipWith (+) vecLS vecLE
 | 
				
			||||||
 | 
					            sumR = V.zipWith (+) vecRS vecRE
 | 
				
			||||||
 | 
					            {- This is getting a bit complicted. In the non-transposed case, we
 | 
				
			||||||
 | 
					             - want to select one Remove/Keep surrounded by 0-n Add ops, possibly
 | 
				
			||||||
 | 
					             - from both sides. The chosen path must belong to the best paths
 | 
				
			||||||
 | 
					             - (bidirectional matrix sums must match the minimum at (s,a) and
 | 
				
			||||||
 | 
					             - (b,e) on all steps), AND at the same time the path must be
 | 
				
			||||||
 | 
					             - admissible for the edit operations (ie, it has to `backtrack
 | 
				
			||||||
 | 
					             - well`). Also, it should follow Heuristic A that says that Remove
 | 
				
			||||||
 | 
					             - and Keep ops should go earlier than Add ops (or vice versa if
 | 
				
			||||||
 | 
					             - transposed). -}
 | 
				
			||||||
 | 
					            totalCost = sumL V.! a
 | 
				
			||||||
 | 
					            sCost = vecLS V.! a
 | 
				
			||||||
 | 
					            eCost = vecRS V.! b
 | 
				
			||||||
 | 
					            doKeep
 | 
				
			||||||
 | 
					              | eCost - sCost == succ (b - a) = False
 | 
				
			||||||
 | 
					              | eCost - sCost == pred (b - a) = True
 | 
				
			||||||
 | 
					              | otherwise =
 | 
				
			||||||
 | 
					                error $
 | 
				
			||||||
 | 
					                "Internal check failure -- costs seem broken: " <>
 | 
				
			||||||
 | 
					                show [sCost, eCost, a, b]
 | 
				
			||||||
 | 
					            jumpPos =
 | 
				
			||||||
 | 
					              (if trans {- Heuristic A applies here -}
 | 
				
			||||||
 | 
					                 then last
 | 
				
			||||||
 | 
					                 else head)
 | 
				
			||||||
 | 
					                [ i
 | 
				
			||||||
 | 
					                | i <-
 | 
				
			||||||
 | 
					                    [a .. if doKeep
 | 
				
			||||||
 | 
					                            then pred b
 | 
				
			||||||
 | 
					                            else b]
 | 
				
			||||||
 | 
					                , vecLS V.! i == sCost - a + i
 | 
				
			||||||
 | 
					                , sumL V.! i == totalCost
 | 
				
			||||||
 | 
					                , if doKeep
 | 
				
			||||||
 | 
					                    then vecLS V.! i == vecRS V.! succ i
 | 
				
			||||||
 | 
					                    else succ (vecLS V.! i) == vecRS V.! i
 | 
				
			||||||
 | 
					                , if doKeep
 | 
				
			||||||
 | 
					                    then sumR V.! succ i == totalCost
 | 
				
			||||||
 | 
					                    else sumR V.! i == totalCost
 | 
				
			||||||
 | 
					                , not doKeep || toksMatch s i de
 | 
				
			||||||
 | 
					                ]
 | 
				
			||||||
 | 
					            jumpEnd =
 | 
				
			||||||
 | 
					              if doKeep
 | 
				
			||||||
 | 
					                then jumpPos + 1
 | 
				
			||||||
 | 
					                else jumpPos
 | 
				
			||||||
 | 
					         in map
 | 
				
			||||||
 | 
					              (\i ->
 | 
				
			||||||
 | 
					                 ( if trans
 | 
				
			||||||
 | 
					                     then Remove
 | 
				
			||||||
 | 
					                     else Add
 | 
				
			||||||
 | 
					                 , deT2 de V.! i))
 | 
				
			||||||
 | 
					              [a .. pred jumpPos] ++
 | 
				
			||||||
 | 
					            [ if trans
 | 
				
			||||||
 | 
					                then if doKeep
 | 
				
			||||||
 | 
					                       then (Keep, deT2 de V.! jumpPos)
 | 
				
			||||||
 | 
					                       else (Add, deT1 de V.! s)
 | 
				
			||||||
 | 
					                else ( if doKeep
 | 
				
			||||||
 | 
					                         then Keep
 | 
				
			||||||
 | 
					                         else Remove
 | 
				
			||||||
 | 
					                     , deT1 de V.! s)
 | 
				
			||||||
 | 
					            ] ++
 | 
				
			||||||
 | 
					            map
 | 
				
			||||||
 | 
					              (\i ->
 | 
				
			||||||
 | 
					                 ( if trans
 | 
				
			||||||
 | 
					                     then Remove
 | 
				
			||||||
 | 
					                     else Add
 | 
				
			||||||
 | 
					                 , deT2 de V.! i))
 | 
				
			||||||
 | 
					              [jumpEnd .. pred b]
 | 
				
			||||||
 | 
					      | otherwise =
 | 
				
			||||||
 | 
					        diffToks' de {deE = mid, deVE = vecEmid, deB = opt} ++
 | 
				
			||||||
 | 
					        diffToks' de {deS = mid, deVS = vecSmid, deA = opt}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					{-
 | 
				
			||||||
 | 
					diffToksXX :: BS -> BS -> V.Vector Tok -> V.Vector Tok -> IO [(Op, Tok)]
 | 
				
			||||||
 | 
					diffToksXX d1 d2 t1 t2 = do
 | 
				
			||||||
  let (b, e1, e2) = stripEqToks d1 d2 t1 t2
 | 
					  let (b, e1, e2) = stripEqToks d1 d2 t1 t2
 | 
				
			||||||
      ms1 = e1 - b
 | 
					      ms1 = e1 - b
 | 
				
			||||||
      ms2 = e2 - b
 | 
					      ms2 = e2 - b
 | 
				
			||||||
| 
						 | 
					@ -96,7 +305,7 @@ diffToks d1 d2 t1 t2 = do
 | 
				
			||||||
  return $
 | 
					  return $
 | 
				
			||||||
    map ((,) Keep) (take b $ V.toList t1) ++
 | 
					    map ((,) Keep) (take b $ V.toList t1) ++
 | 
				
			||||||
    diff ++ map ((,) Keep) (drop e1 $ V.toList t1)
 | 
					    diff ++ map ((,) Keep) (drop e1 $ V.toList t1)
 | 
				
			||||||
 | 
					-}
 | 
				
			||||||
pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS]
 | 
					pprDiff :: BS -> BS -> [(Op, Tok)] -> [BS]
 | 
				
			||||||
pprDiff d1 d2 = map (pprDiff1 d1 d2)
 | 
					pprDiff d1 d2 = map (pprDiff1 d1 d2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -71,8 +71,8 @@ patchCmdOptions =
 | 
				
			||||||
    auto
 | 
					    auto
 | 
				
			||||||
    (short 'p' <>
 | 
					    (short 'p' <>
 | 
				
			||||||
     long "strip" <>
 | 
					     long "strip" <>
 | 
				
			||||||
     metavar "NUM" <> help "Strip NUM leading components from the paths" <>
 | 
					     metavar "NUM" <>
 | 
				
			||||||
     value 0) <*>
 | 
					     help "Strip NUM leading components from the paths" <> value 0) <*>
 | 
				
			||||||
  mergeOption True
 | 
					  mergeOption True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
diff3CmdOptions =
 | 
					diff3CmdOptions =
 | 
				
			||||||
| 
						 | 
					@ -103,11 +103,15 @@ main =
 | 
				
			||||||
           progDesc
 | 
					           progDesc
 | 
				
			||||||
             "Compare, patch and merge files on arbitrarily-tokenized sequences." <>
 | 
					             "Compare, patch and merge files on arbitrarily-tokenized sequences." <>
 | 
				
			||||||
           header "adiff: arbitrary-token diff utilities")
 | 
					           header "adiff: arbitrary-token diff utilities")
 | 
				
			||||||
   in do o <- execParser opts
 | 
					   in do ADiffOptions {adiffRedfaOpt = ropt, adiffCmdOpts = copt} <-
 | 
				
			||||||
         redfa <- redfaPrepareRules (diffRedfaOpt o)
 | 
					           execParser opts
 | 
				
			||||||
         data1 <- mmapFileByteString (diffFile1 o) Nothing
 | 
					         redfa <- redfaPrepareRules ropt
 | 
				
			||||||
         data2 <- mmapFileByteString (diffFile2 o) Nothing
 | 
					         case copt of
 | 
				
			||||||
         toks1 <- V.fromList <$> redfaTokenize redfa data1
 | 
					           CmdDiff {diffFile1 = f1, diffFile2 = f2, context = ctxt} -> do
 | 
				
			||||||
         toks2 <- V.fromList <$> redfaTokenize redfa data2
 | 
					             data1 <- mmapFileByteString f1 Nothing
 | 
				
			||||||
         hs <- hunks (max 0 $ context o) <$> diffToks data1 data2 toks1 toks2
 | 
					             data2 <- mmapFileByteString f2 Nothing
 | 
				
			||||||
         B8.putStrLn $ pprHunks data1 data2 hs
 | 
					             toks1 <- V.fromList <$> redfaTokenize redfa data1
 | 
				
			||||||
 | 
					             toks2 <- V.fromList <$> redfaTokenize redfa data2
 | 
				
			||||||
 | 
					             let hs = hunks (max 0 ctxt) $ diffToks data1 data2 toks1 toks2
 | 
				
			||||||
 | 
					             B8.putStrLn $ pprHunks data1 data2 hs
 | 
				
			||||||
 | 
					           CmdPatch {} -> putStrLn "not supported yet"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										14
									
								
								src/Redfa.hs
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								src/Redfa.hs
									
									
									
									
									
								
							| 
						 | 
					@ -59,7 +59,7 @@ redfaOptionToRuleStrings (RedfaOptionFile fn) =
 | 
				
			||||||
  B8.lines <$> B.readFile fn -- TODO improve
 | 
					  B8.lines <$> B.readFile fn -- TODO improve
 | 
				
			||||||
 | 
					
 | 
				
			||||||
splitFirst :: Char -> BS -> (BS, BS)
 | 
					splitFirst :: Char -> BS -> (BS, BS)
 | 
				
			||||||
splitFirst c s = B.splitAt (maybe (B.length s) id $ B8.elemIndex c s) s
 | 
					splitFirst c s = B.splitAt (fromMaybe (B.length s) $ B8.elemIndex c s) s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
redfaRuleStringToRuleStr :: BS -> Maybe (BS, BS, BS, Bool)
 | 
					redfaRuleStringToRuleStr :: BS -> Maybe (BS, BS, BS, Bool)
 | 
				
			||||||
redfaRuleStringToRuleStr s =
 | 
					redfaRuleStringToRuleStr s =
 | 
				
			||||||
| 
						 | 
					@ -68,7 +68,7 @@ redfaRuleStringToRuleStr s =
 | 
				
			||||||
      sf = B8.strip from
 | 
					      sf = B8.strip from
 | 
				
			||||||
      (cleanFrom, isToken)
 | 
					      (cleanFrom, isToken)
 | 
				
			||||||
        | B.null sf = (sf, True)
 | 
					        | B.null sf = (sf, True)
 | 
				
			||||||
        | B.head sf == (fromIntegral $ fromEnum '_') = (B.tail sf, False)
 | 
					        | B.head sf == fromIntegral (fromEnum '_') = (B.tail sf, False)
 | 
				
			||||||
        | otherwise = (sf, True)
 | 
					        | otherwise = (sf, True)
 | 
				
			||||||
      go
 | 
					      go
 | 
				
			||||||
        | B.null s = Nothing
 | 
					        | B.null s = Nothing
 | 
				
			||||||
| 
						 | 
					@ -89,7 +89,7 @@ unescapeRegex s = BL.toStrict . BB.toLazyByteString <$> unescape' s
 | 
				
			||||||
        fail "incomplete escape sequence"
 | 
					        fail "incomplete escape sequence"
 | 
				
			||||||
      | B.head s == BI.c2w '\\' =
 | 
					      | B.head s == BI.c2w '\\' =
 | 
				
			||||||
        let rest = B.tail s
 | 
					        let rest = B.tail s
 | 
				
			||||||
            cc = B.head (rest)
 | 
					            cc = B.head rest
 | 
				
			||||||
            thechar =
 | 
					            thechar =
 | 
				
			||||||
              BB.stringUtf8 $
 | 
					              BB.stringUtf8 $
 | 
				
			||||||
              case BI.w2c cc of
 | 
					              case BI.w2c cc of
 | 
				
			||||||
| 
						 | 
					@ -108,12 +108,12 @@ unescapeRegex s = BL.toStrict . BB.toLazyByteString <$> unescape' s
 | 
				
			||||||
                '\\' -> "\\\\"
 | 
					                '\\' -> "\\\\"
 | 
				
			||||||
                a -> [a] --TODO add support for \x and \u
 | 
					                a -> [a] --TODO add support for \x and \u
 | 
				
			||||||
         in (thechar <>) <$> unescape' (B.tail rest)
 | 
					         in (thechar <>) <$> unescape' (B.tail rest)
 | 
				
			||||||
      | otherwise = (mappend $ BB.word8 $ B.head s) <$> unescape' (B.tail s)
 | 
					      | otherwise = mappend (BB.word8 $ B.head s) <$> unescape' (B.tail s)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
redfaPrepareRules :: RedfaOption -> IO RedfaSpec
 | 
					redfaPrepareRules :: RedfaOption -> IO RedfaSpec
 | 
				
			||||||
redfaPrepareRules opt = do
 | 
					redfaPrepareRules opt = do
 | 
				
			||||||
  (states, jumps, regexes, isToken) <-
 | 
					  (states, jumps, regexes, isToken) <-
 | 
				
			||||||
    unzip4 . catMaybes . fmap redfaRuleStringToRuleStr <$>
 | 
					    unzip4 . mapMaybe redfaRuleStringToRuleStr <$>
 | 
				
			||||||
    redfaOptionToRuleStrings opt
 | 
					    redfaOptionToRuleStrings opt
 | 
				
			||||||
  uRegexes <- traverse unescapeRegex regexes
 | 
					  uRegexes <- traverse unescapeRegex regexes
 | 
				
			||||||
  startREs <- traverse (makeRegexM . (fromString "\\`" <>)) uRegexes
 | 
					  startREs <- traverse (makeRegexM . (fromString "\\`" <>)) uRegexes
 | 
				
			||||||
| 
						 | 
					@ -148,7 +148,7 @@ redfaTokenize' spec s state off visited
 | 
				
			||||||
  | off >= B.length s = pure []
 | 
					  | off >= B.length s = pure []
 | 
				
			||||||
  | otherwise =
 | 
					  | otherwise =
 | 
				
			||||||
    let (ooff, reg) =
 | 
					    let (ooff, reg) =
 | 
				
			||||||
          if (off == 0)
 | 
					          if off == 0
 | 
				
			||||||
            then (0, rrRegexStart)
 | 
					            then (0, rrRegexStart)
 | 
				
			||||||
            else (1, rrRegexMid)
 | 
					            else (1, rrRegexMid)
 | 
				
			||||||
        matchString = B.drop (off - ooff) s
 | 
					        matchString = B.drop (off - ooff) s
 | 
				
			||||||
| 
						 | 
					@ -159,7 +159,7 @@ redfaTokenize' spec s state off visited
 | 
				
			||||||
        contOK (RedfaRule {rrJump = j}, (off', len))
 | 
					        contOK (RedfaRule {rrJump = j}, (off', len))
 | 
				
			||||||
          | off' /= 0 = False
 | 
					          | off' /= 0 = False
 | 
				
			||||||
          | len > ooff = True
 | 
					          | len > ooff = True
 | 
				
			||||||
          | otherwise = not $ j `elem` visited
 | 
					          | otherwise = j `notElem` visited
 | 
				
			||||||
     in case matches of
 | 
					     in case matches of
 | 
				
			||||||
          [] ->
 | 
					          [] ->
 | 
				
			||||||
            fail $
 | 
					            fail $
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue