Commit 230ea8c2 authored by Mike Ledger's avatar Mike Ledger

support files that aren't .csv

parent 52e3facb
hello
train
these lines are just taken verbatim, commas be damned
......@@ -6,18 +6,20 @@
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeOperators #-}
module Main where
import Control.Monad.Trans.Resource (MonadResource, runResourceT)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Streaming as B
import Data.Csv (HasHeader (..))
import qualified Data.Map.Strict as Map
import GHC.Generics (Generic)
import Options.Generic (type (<?>), ParseField,
ParseRecord, getRecord,
unHelpful)
import Control.Monad.Trans.Resource (MonadResource, runResourceT)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Streaming as B
import qualified Data.ByteString.Streaming.Char8 as Q (lines)
import Data.Csv (HasHeader (..))
import Data.List (isSuffixOf)
import qualified Data.Map.Strict as Map
import GHC.Generics (Generic)
import Options.Generic (type (<?>), ParseField,
ParseRecord, getRecord,
unHelpful)
import Streaming
import qualified Streaming.Csv as Csv
import qualified Streaming.Prelude as S
import qualified Streaming.Csv as Csv
import qualified Streaming.Prelude as S
data MapOp
= Intersection
......@@ -26,14 +28,15 @@ data MapOp
deriving (Eq, Show, Read, Ord, Generic, ParseField)
data Options = Options
{ infiles :: [FilePath] <?>
".csv files to combine. The first column of each csv will be used as a key."
, outfile :: Maybe FilePath <?>
".csv file to write output to. If omitted, use stdout."
, hasHeader :: Maybe Bool <?>
"whether to ignore header in csv files"
, op :: Maybe MapOp <?>
"the operation to use to combine csv files (default: union)"
{ infiles :: [FilePath] <?> "files to combine. The first column of each csv\
\ will be used as a key. For non-csv files (\
\files that don't end in .csv), each line will \
\be treated as a key."
, outfile :: Maybe FilePath <?> ".csv file to write output to. If omitted, \
\use stdout."
, hasHeader :: Maybe Bool <?> "whether to ignore header in csv files"
, op :: Maybe MapOp <?> "the operation to use to combine csv files \
\(default: union)"
} deriving (Show, Generic, ParseRecord)
data Row
......@@ -55,6 +58,15 @@ decode opts = Csv.decode (if hasHeader' then HasHeader else NoHeader)
Just _ -> True
Nothing -> False
decodeFile :: MonadResource m
=> Options -> FilePath -> Stream (Of (Either String [ByteString])) m ()
decodeFile opts fp =
if ".csv" `isSuffixOf` fp
then decode opts (B.readFile fp)
else S.mapped
(fmap (S.mapOf (Right . (:[]))) . B.toStrict)
(Q.lines (B.readFile fp))
-- | select the sink to use for output
output :: MonadResource m => Options -> B.ByteString m r -> m r
output opts = case unHelpful (outfile opts) of
......@@ -71,16 +83,14 @@ combine opts = case unHelpful (op opts) of
main :: IO ()
main = do
opts@Options{..} <- getRecord "Map-like operations on csv files"
runResourceT $
runResourceT $ mapM
-- write infiles as maps
mapM
(fmap (Map.fromList . S.fst')
. S.toList
. S.effects
. S.partitionEithers
. S.map (either (const (Left ())) toRow)
. decode opts
. B.readFile)
. decodeFile opts)
(unHelpful infiles) >>=
-- write output
output opts
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment