Commit 693dafe7 authored by Mike Ledger's avatar Mike Ledger

support labels in expressions

parent b13df0ac
......@@ -9,8 +9,9 @@
module Main where
import Control.Lens (failing, re, to, (^?), _Left,
_Right)
import Control.Monad (when)
import Control.Monad.Trans.Resource (MonadResource, runResourceT)
import Control.Monad.Trans.State.Strict
import Control.Monad.Trans.State.Strict (State, evalState, get, modify')
import Data.ByteString (ByteString)
import qualified Data.ByteString.Streaming as B
import qualified Data.ByteString.Streaming.Char8 as Q (lines)
......@@ -42,16 +43,20 @@ data MapOp
type Doc = Int
data MapExpr
= Union MapExpr MapExpr
| Difference MapExpr MapExpr
| Intersection MapExpr MapExpr
| LongUnion MapExpr MapExpr
| LongIntersection MapExpr MapExpr
data MapExpr k a
= Union (MapExpr k a) (MapExpr k a)
| Difference (MapExpr k a) (MapExpr k a)
| Intersection (MapExpr k a) (MapExpr k a)
| LongUnion (MapExpr k a) (MapExpr k a)
| LongIntersection (MapExpr k a) (MapExpr k a)
| DocNum {-# UNPACK #-} !Int
| Labelled {-# UNPACK #-} !Text (MapExpr k a)
| Label !Text
| Result (Map.Map k a)
| Error
deriving (Eq, Show, Read, Ord, Generic)
parseMapExpr :: Parser MapExpr
parseMapExpr :: Parser (MapExpr k a)
parseMapExpr =
buildExpressionParser
[ [ binary "*." LongIntersection AssocLeft
......@@ -65,11 +70,16 @@ parseMapExpr =
term
where
term =
P.token (DocNum . fromInteger <$> (P.char '$' *> P.decimal))
(do lit <- P.token P.stringLiteral
mterm <- P.optional (P.symbolic ':' *> term)
return (maybe (Label lit) (Labelled lit) mterm))
<|> P.token (DocNum . fromInteger <$> (P.char '$' *> P.decimal))
<|> P.parens parseMapExpr
binary name fun = Infix (fun <$ P.textSymbol name)
instance ParseField MapExpr where
binary name fun =
Infix (fun <$ P.textSymbol name)
instance (Typeable k, Typeable a) => ParseField (MapExpr k a) where
parseField h m c = case m of
Nothing ->
Options.argument optTrifectaParser
......@@ -82,7 +92,7 @@ instance ParseField MapExpr where
<> foldMap (Options.help . T.unpack) h
<> foldMap Options.short c
where
metavar = map toUpper (show (typeOf (undefined :: MapExpr)))
metavar = map toUpper (show (typeOf (undefined :: MapExpr k a)))
optTrifectaParser = Options.eitherReader $ \t -> fromMaybe
(Left "unknown error")
(P.parseString parseMapExpr mempty t ^?
......@@ -90,7 +100,7 @@ instance ParseField MapExpr where
(P._Success.re _Right)
(P._Failure.to show.re _Left))
data Options = Options
data Options k a = Options
{ infiles :: [FilePath] <?> (
"Files to combine. The first column of each csv will be used as a key. \
\For non-csv files (\files that don't end in .csv), each line will be \
......@@ -100,7 +110,7 @@ data Options = Options
, hasHeader :: Maybe Bool <?> "whether to ignore header in csv files"
, op :: Maybe MapOp <?> (
"The operation to use to combine csv files (default: union)")
, expr :: Maybe MapExpr <?> (
, expr :: Maybe (MapExpr k a) <?> (
"The expression to use to combine csv files. Reference the inputs with \
\$N for the Nth document in the \"infiles\".\n\
\The operations available are:\n\
......@@ -108,10 +118,12 @@ data Options = Options
\2. Difference with the - operator;\n\
\3. Intersection with the * operator;\n\
\4. Union combining all columns with the +. operator;\n\
\5 .Intersection combining all columns with the *. operator\n")
\5 .Intersection combining all columns with the *. operator\n\
\6. Labels for expressions with the syntax '\"LABEL\": EXPR'.")
, verbose :: Maybe Bool
} deriving (Show, Generic)
instance ParseRecord Options where
instance (Typeable k, Typeable a) => ParseRecord (Options k a) where
parseRecord = parseRecordWithModifiers lispCaseModifiers
{ shortNameModifier = \x -> case x of
"infiles" -> Just 'i'
......@@ -129,7 +141,7 @@ toRow _ = Left ()
-- | select how to decode csv
decode :: Monad m
=> Options
=> Options k a
-> B.ByteString m r
-> Stream (Of (Either String [ByteString])) m ()
decode opts = Csv.decode (if hasHeader' then HasHeader else NoHeader)
......@@ -139,7 +151,9 @@ decode opts = Csv.decode (if hasHeader' then HasHeader else NoHeader)
Nothing -> False
decodeFile :: MonadResource m
=> Options -> FilePath -> Stream (Of (Either String [ByteString])) m ()
=> Options k a
-> FilePath
-> Stream (Of (Either String [ByteString])) m ()
decodeFile opts fp =
if ".csv" `isSuffixOf` fp
then decode opts (B.readFile fp)
......@@ -148,33 +162,44 @@ decodeFile opts fp =
(Q.lines (B.readFile fp))
-- | select the sink to use for output
output :: MonadResource m => Options -> B.ByteString m r -> m r
output :: MonadResource m => Options k a -> B.ByteString m r -> m r
output opts = case unHelpful (outfile opts) of
Just out -> B.writeFile out
Nothing -> B.stdout
interpret :: forall a k. (Monoid a, Ord k) => MapExpr -> V.Vector (Map.Map k a) -> Map.Map k a
interpret me0 v = evalState (go me0) Map.empty
interpret :: forall a k. (Monoid a, Ord k, Ord a) => MapExpr k a -> V.Vector (Map.Map k a) -> Map.Map k a
interpret me0 v = case evalState (go me0) Map.empty of
Result r -> r
_ -> Map.empty
where
go :: MapExpr
-> State (Map.Map MapExpr (Map.Map k a)) (Map.Map k a)
withResult2 f (Result a) (Result b) = Result (f a b)
withResult2 _ _ _ = Error
ins k = k <$ modify' (Map.insert k k)
go :: MapExpr k a
-> State (Map.Map (MapExpr k a) (MapExpr k a)) (MapExpr k a)
go me = do
cache <- get
case Map.lookup me cache of
Just x -> return x
Nothing -> do
r <- case me of
DocNum i -> return (v V.! (i - 1))
Union a b -> Map.union <$> go a <*> go b
Difference a b -> Map.difference <$> go a <*> go b
Intersection a b -> Map.intersection <$> go a <*> go b
LongUnion a b -> Map.unionWith mappend <$> go a <*> go b
LongIntersection a b -> Map.intersectionWith mappend <$> go a <*> go b
modify' (Map.insert me r)
return r
Nothing -> case me of
Labelled k a -> do
a' <- go a
modify' (Map.insert (Label k) a')
return a'
Label k -> return (Label k)
DocNum i -> ins (Result (v V.! (i - 1)))
Union a b -> ins =<< withResult2 Map.union <$> go a <*> go b
Difference a b -> ins =<< withResult2 Map.difference <$> go a <*> go b
Intersection a b -> ins =<< withResult2 Map.intersection <$> go a <*> go b
LongUnion a b -> ins =<< withResult2 (Map.unionWith mappend) <$> go a <*> go b
LongIntersection a b -> ins =<< withResult2 (Map.intersectionWith mappend) <$> go a <*> go b
_ -> return me
-- | select the combining algorithm
combine :: (Monoid a, Ord k) => Options -> [Map.Map k a] -> Map.Map k a
combine :: (Monoid a, Ord k, Ord a) => Options k a -> [Map.Map k a] -> Map.Map k a
combine opts =
case unHelpful (expr opts) of
Just expr -> interpret expr . V.fromList
......@@ -186,6 +211,7 @@ combine opts =
main :: IO ()
main = do
opts@Options{..} <- getRecord "Map-like operations on csv files"
when (verbose == Just True) (print opts)
runResourceT $ mapM
-- write infiles as maps
(fmap (Map.fromList . S.fst')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment