Commit 59bb9832 authored by Mike Ledger's avatar Mike Ledger

implement everything

parent 06b819e3
......@@ -23,4 +23,14 @@ executable csvmaps
src
build-depends:
base >= 4.7 && < 5
, bytestring
, streaming-bytestring
, streaming
, cassava-streaming >= 0.2
, cassava
, containers
, optparse-generic
, resourcet
, vector >= 0.10
, text >= 1.0
default-language: Haskell2010
......@@ -13,6 +13,16 @@ extra-source-files:
dependencies:
- base >= 4.7 && < 5
- bytestring
- streaming-bytestring
- streaming
- cassava-streaming >= 0.2
- cassava
- containers
- optparse-generic
- resourcet
- vector >= 0.10
- text >= 1.0
executables:
csvmaps:
......
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ExplicitNamespaces #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TypeOperators #-}
module Main where
import Control.Monad.Trans.Resource (MonadResource, runResourceT)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Streaming as B
import Data.Csv (HasHeader (..))
import qualified Data.Map.Strict as Map
import GHC.Generics (Generic)
import Options.Generic (type (<?>), ParseField,
ParseRecord, getRecord,
unHelpful)
import Streaming
import qualified Streaming.Csv as Csv
import qualified Streaming.Prelude as S
data MapOp
= Intersection
| Union
| Difference
deriving (Eq, Show, Read, Ord, Generic, ParseField)
data Options = Options
{ infiles :: [FilePath] <?>
".csv files to combine. The first column of each csv will be used as a key."
, outfile :: Maybe FilePath <?>
".csv file to write output to. If omitted, use stdout."
, hasHeader :: Maybe Bool <?>
"whether to ignore header in csv files"
, op :: Maybe MapOp <?>
"the operation to use to combine csv files (default: union)"
} deriving (Show, Generic, ParseRecord)
data Row
= EmptyRow
| Row !ByteString ![ByteString]
toRow :: [ByteString] -> Either () (ByteString, [ByteString])
toRow (h:hs) = Right (h, hs)
toRow _ = Left ()
-- | select how to decode csv
decode :: Monad m
=> Options
-> B.ByteString m r
-> Stream (Of (Either String [ByteString])) m ()
decode opts = Csv.decode (if hasHeader' then HasHeader else NoHeader)
where
hasHeader' = case unHelpful (hasHeader opts) of
Just _ -> True
Nothing -> False
-- | select the sink to use for output
output :: MonadResource m => Options -> B.ByteString m r -> m r
output opts = case unHelpful (outfile opts) of
Just out -> B.writeFile out
Nothing -> B.stdout
-- | select the combining algorithm
combine :: Ord k => Options -> [Map.Map k a] -> Map.Map k a
combine opts = case unHelpful (op opts) of
Just Intersection -> foldr Map.intersection Map.empty
Just Difference -> foldr Map.difference Map.empty
_union -> Map.unions
main :: IO ()
main = do
putStrLn "hello world"
opts@Options{..} <- getRecord "Map-like operations on csv files"
-- write infiles as maps
inmaps <- runResourceT $ mapM
(fmap (Map.fromList . S.fst')
. S.toList
. S.effects
. S.partitionEithers
. S.map (either (const (Left ())) toRow)
. decode opts
. B.readFile)
(unHelpful infiles)
-- write output
runResourceT
. output opts
. Csv.encode
-- TODO: add header
. mapM_ (\(k, vs) -> S.yield (k:vs))
. Map.toList
$ combine opts inmaps
......@@ -37,9 +37,15 @@ resolver: lts-9.14
# will not be run. This is useful for tweaking upstream packages.
packages:
- .
- location:
git: git@gitlab.com:transportengineering/streaming-csv.git
commit: 68a600e3822755892630804ec0a82ef34ee97d5c
extra-dep: true
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps: []
extra-deps:
- pipes-csv-1.4.3
# Override default flag values for local packages and extra-deps
flags: {}
......@@ -63,4 +69,4 @@ extra-package-dbs: []
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor
\ No newline at end of file
# compiler-check: newer-minor
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment