Commit 1b0f1ad2 authored by Mike Ledger's avatar Mike Ledger

support using basic set expressions to process arguments

parent a0aaa865
......@@ -33,4 +33,8 @@ executable csvmaps
, resourcet
, vector >= 0.10
, text >= 1.0
, trifecta
, parsers
, optparse-applicative
, lens
default-language: Haskell2010
......@@ -23,6 +23,10 @@ dependencies:
- resourcet
- vector >= 0.10
- text >= 1.0
- trifecta
- parsers
- optparse-applicative
- lens
executables:
csvmaps:
......
......@@ -7,6 +7,8 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeOperators #-}
module Main where
import Control.Lens (failing, re, to, (^?), _Left,
_Right)
import Control.Monad.Trans.Resource (MonadResource, runResourceT)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Streaming as B
......@@ -14,36 +16,106 @@ import qualified Data.ByteString.Streaming.Char8 as Q (lines)
import Data.Csv (HasHeader (..))
import Data.List (isSuffixOf)
import qualified Data.Map.Strict as Map
import Data.Typeable
import qualified Data.Vector as V
import GHC.Generics (Generic)
import Options.Generic
import Streaming
import qualified Streaming.Csv as Csv
import qualified Streaming.Prelude as S
import Data.Char (toUpper)
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import qualified Options.Applicative as Options
import Text.Parser.Expression (Assoc (..), Operator (..),
buildExpressionParser)
import Text.Trifecta (Parser)
import qualified Text.Trifecta as P
data MapOp
= Intersection
| Union
| Difference
= OpIntersection
| OpUnion
| OpDifference
deriving (Eq, Show, Read, Ord, Generic, ParseField)
type Doc = Int
data MapExpr
= Union MapExpr MapExpr
| Difference MapExpr MapExpr
| Intersection MapExpr MapExpr
| LongUnion MapExpr MapExpr
| LongIntersection MapExpr MapExpr
| DocNum {-# UNPACK #-} !Int
deriving (Eq, Show, Read, Ord, Generic)
parseMapExpr :: Parser MapExpr
parseMapExpr =
buildExpressionParser
[ [ binary "*" Intersection AssocLeft
, binary "*." LongIntersection AssocLeft
]
, [ binary "+" Union AssocRight
, binary "+." LongUnion AssocRight
, binary "-" Difference AssocLeft
]
]
term
where
term =
P.token (DocNum . fromInteger <$> (P.char '$' *> P.decimal))
<|> P.parens parseMapExpr
binary name fun = Infix (fun <$ P.textSymbol name)
instance ParseField MapExpr where
parseField h m c = case m of
Nothing ->
Options.argument optTrifectaParser
$ Options.metavar metavar
<> foldMap (Options.help . T.unpack) h
Just name ->
Options.option optTrifectaParser
$ Options.metavar metavar
<> Options.long (T.unpack name)
<> foldMap (Options.help . T.unpack) h
<> foldMap Options.short c
where
metavar = map toUpper (show (typeOf (undefined :: MapExpr)))
optTrifectaParser = Options.eitherReader $ \t -> fromMaybe
(Left "unknown error")
(P.parseString parseMapExpr mempty t ^?
failing
(P._Success.re _Right)
(P._Failure.to show.re _Left))
data Options = Options
{ infiles :: [FilePath] <?> "files to combine. The first column of each csv\
\ will be used as a key. For non-csv files (\
\files that don't end in .csv), each line will \
\be treated as a key."
, outfile :: Maybe FilePath <?> ".csv file to write output to. If omitted, \
\use stdout."
{ infiles :: [FilePath] <?> (
"Files to combine. The first column of each csv will be used as a key. \
\For non-csv files (\files that don't end in .csv), each line will be \
\treated as a key.")
, outfile :: Maybe FilePath <?> (
".csv file to write output to. If omitted, use stdout.")
, hasHeader :: Maybe Bool <?> "whether to ignore header in csv files"
, op :: Maybe MapOp <?> "the operation to use to combine csv files \
\(default: union)"
, op :: Maybe MapOp <?> (
"The operation to use to combine csv files (default: union)")
, expr :: Maybe MapExpr <?> (
"The expression to use to combine csv files. Reference the inputs with \
\$N for the Nth document in the \"infiles\".\n\
\The operations available are:\n\
\1. Union with the + operator;\n\
\2. Difference with the - operator;\n\
\3. Intersection with the * operator;\n\
\4. Union combining all columns with the +. operator;\n\
\5 .Intersection combining all columns with the *. operator\n")
} deriving (Show, Generic)
instance ParseRecord Options where
parseRecord = parseRecordWithModifiers lispCaseModifiers
{ shortNameModifier = \x -> case x of
"infiles" -> Just 'i'
"outfile" -> Just 'o'
_ -> Nothing
"infiles" -> Just 'i'
"outfile" -> Just 'o'
_ -> Nothing
}
data Row
......@@ -80,12 +152,26 @@ output opts = case unHelpful (outfile opts) of
Just out -> B.writeFile out
Nothing -> B.stdout
interpret :: (Monoid a, Ord k) => MapExpr -> V.Vector (Map.Map k a) -> Map.Map k a
interpret me0 v = go me0
where
go me = case me of
DocNum i -> v V.! i
Union a b -> Map.union (go a) (go b)
Difference a b -> Map.difference (go a) (go b)
Intersection a b -> Map.difference (go a) (go b)
LongUnion a b -> Map.unionWith mappend (go a) (go b)
LongIntersection a b -> Map.intersectionWith mappend (go a) (go b)
-- | select the combining algorithm
combine :: Ord k => Options -> [Map.Map k a] -> Map.Map k a
combine opts = case unHelpful (op opts) of
Just Intersection -> foldr1 Map.intersection
Just Difference -> foldr1 Map.difference
_union -> Map.unions
combine :: (Monoid a, Ord k) => Options -> [Map.Map k a] -> Map.Map k a
combine opts =
case unHelpful (expr opts) of
Just expr -> interpret expr . V.fromList
Nothing -> case unHelpful (op opts) of
Just OpIntersection -> foldr1 Map.intersection
Just OpDifference -> foldr1 Map.difference
_union -> Map.unions
main :: IO ()
main = do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment