Commit c8834d4c authored by Joris's avatar Joris

Make LBC to work

Use request headers to simulate a normal browser
parent 8d14cb80
eval "$(lorri direnv)"
-- This is a GHC environment file written by cabal. This means you can
-- run ghc or ghci and get the environment of the project as a whole.
-- But you still need to use cabal repl $target to get the environment
-- of specific components (libs, exes, tests etc) because each one can
-- have its own source dirs, cpp flags etc.
--
clear-package-db
global-package-db
package-db /home/joris/.cabal/store/ghc-8.6.4/package.db
package-db dist-newstyle/packagedb/ghc-8.6.4
package-id ad-listener-0.1-inplace
package-id base-4.12.0.0
package-id ghc-prim-0.5.3
package-id rts
package-id integer-gmp-1.0.2.0
package-id bytestring-0.10.8.2
package-id deepseq-1.4.4.0
package-id array-0.5.3.0
package-id http-conduit-2.3.7.1-698cc59842e91fb2385290d7f953ea81d6a410ab3cb5d8eddba1dadbcb91777f
package-id aeson-1.4.4.0-19a607af04a35e3d1ee1aa758868a3247152dd6e4f1057e9261c49231ec4ec3c
package-id attoparsec-0.13.2.3-acb21b793de8649e907e1d4337c2fe7d5ae23b610fd0ef7c4b80e118c54e2cc8
package-id containers-0.6.0.1
package-id scientific-0.3.6.2-17e1e07dfeace3f0783362d4d83e24fa68f8f29ca612c6559407e1cd1555c502
package-id binary-0.8.6.0
package-id hashable-1.3.0.0-42f92b7b698f5acd857b99255c4b1fa62a45a98ca86ad5d1116a8298603b41cb
package-id text-1.2.3.1
package-id integer-logarithms-1.0.3-92a2587ad40e99867cdf48c87f146e20859d728808cd820206d06371f1469fc6
package-id primitive-0.7.0.0-d14f8d5d06d5a095af6888521d6674d5b238017b461885fc5ca29325cf41f48a
package-id transformers-0.5.6.2
package-id base-compat-0.10.5-b16cc06fec58ceb5fced2b5eb7f125b200fcc2f943459d2561c7d21d0dbed899
package-id unix-2.7.2.2
package-id time-1.8.0.2
package-id dlist-0.8.0.7-4d3570c268f99aa4a9bcb4b4527bf1b11235d2ff0ed08f6cc352707fed0791fd
package-id tagged-0.8.6-c682998bf9ae7c10cc90a973bc252110b24942c70f29057b91b57a55ce04e543
package-id template-haskell-2.14.0.0
package-id ghc-boot-th-8.6.4
package-id pretty-1.1.3.6
package-id th-abstraction-0.3.1.0-83fd5fc72b916dd497b48c8b45946e2926bfea95f7d31e740b3ac6d1746b98dd
package-id time-compat-1.9.2.2-225cf213101445dd6e8324b3eb2d0f2ff223ac0f316771135121df097d3d856f
package-id base-orphans-0.8.1-d1d5c2f906d8b31189d967f92e3dab8d190a4551016acb01a8f15fe2cc62b631
package-id unordered-containers-0.2.10.0-5b332f1338eacb3d630f9aea03317c2d461a2ccb49f757e6745ac73f7d6a233b
package-id uuid-types-1.0.3-04b088c1d6c06dafea2dde1453d694792749674fc22fca8e2fc2b6605f692022
package-id random-1.1-c4ce1004e7955597786ec781768e43219ab90094a384329291623691c3d437f7
package-id vector-0.12.0.3-998e7a9b071fe68c82651d4210eb3c16a7fe00d32804777484afa72dd0f0b84a
package-id conduit-1.3.1.1-ac26675610927782be99d669337f715ac2e65189abbb8a6aecd97da1c7619c41
package-id directory-1.3.3.0
package-id filepath-1.4.2.1
package-id exceptions-0.10.3-5f18f1eb34346651e717ed86ef2b050241d9bc86927eff9deb32972578864265
package-id mtl-2.2.2
package-id stm-2.5.0.0
package-id transformers-compat-0.6.5-d6949cf186a56947e1f67c8659b23c4b0bd0520d5e2bb84e0812638fa7df698d
package-id mono-traversable-1.0.12.0-33e51187ae735c9933fdb7dc3da77666c572022f99be741da130509fe69ebfcd
package-id split-0.2.3.3-543cd322cb8d74c04caf3f4aa2431da48f00ce7b3818ad3a04e7ccb8d7b65d9b
package-id vector-algorithms-0.8.0.1-f1ab303a4db5d2e71c471b883028ff04924210a288404866bb254acd7ebc390f
package-id resourcet-1.2.2-4111d8829da9570169a650f0b4621e541fa314acd6ecd6a7706c7276ca8001bd
package-id unliftio-core-0.1.2.0-a2bd0029a962b5d323fe59989e51375945216990525221be529584016738d965
package-id conduit-extra-1.3.4-605601e4c2aa62f7fe2547728a1a6d64f81f6eca6ae738a53043a2a93360edc4
package-id async-2.2.2-31ba4beb34a7bb776872540c70359a3d7edcec896c21d08a5ecea6e9e6815058
package-id network-3.1.0.1-c5d093cbdef56dcaabb066d39df063a466ac209c9bcc9e65a794e46ec1eb3308
package-id process-1.6.5.0
package-id streaming-commons-0.2.1.1-aabf659c16433d85fdecd4303d1d224b9f3167244491103b7264b161647cf61a
package-id zlib-0.6.2.1-1dd162374fc5320b398c6f46dee82c534acced9be9f0391fad1f4ce19b84997f
package-id typed-process-0.2.6.0-3ce1b8e6fd9c7c7d93e8e1989ea3ceda0f6f86a8adba9407d75fd77259030b26
package-id http-client-0.6.4-1f3c30cae5f28642d87b7b53aa6394038890168c7f5840a0ecd529a6a9be65ce
package-id blaze-builder-0.4.1.0-c3678a80baedeb5b64caa4c56157e60216eb666694148dcc133213a712350871
package-id case-insensitive-1.2.1.0-3b6e1720d2ab778d809d2c3172293b098d86b8ec00d92f6b011c1ef2daa25dc0
package-id cookie-0.4.4-ef22f175c52bdc5579351a87a5d3c5dd266b79cf5ccd0161e4c03d806b65deca
package-id data-default-class-0.1.2.0-e2fae1c8a626f35fb46dd7274333d0deb7c563f41c08b45f363122b77dc260dc
package-id http-types-0.12.3-2f5117dd73be7e6e5ac721653ef225f4175625e4671991a9234a22a90c88ae10
package-id memory-0.14.18-9ef41e792da357558f4a82bac25db446e95e1f434dfaecda588b70187c1f7b06
package-id basement-0.0.11-b2c9aa4392b0a3951d2318098bd94dbe78e5ed580538fa37c3a69b754945483f
package-id mime-types-0.1.0.9-98f906fc5ace7c2d7e97dc971094089f2f41801d56c1165a9a45537f3a573692
package-id network-uri-2.6.1.0-1f1b00fb6d6442127eaef14a265e718c250b2a994c60de4c4b376b6a7184de95
package-id parsec-3.1.13.0
package-id http-client-tls-0.3.5.3-27030194579f9e0f6e1722b1939665beaaa95c45575e5ab51a7ab6336196b9eb
package-id connection-0.3.1-fb216e05d56b20368f3d342685e0e953b8596160d82ab4db96c9119ad648e0c0
package-id socks-0.6.0-c50da2f6d696d86d19a9287e7b0307998083558c76e45a60fb37239b84d6ba22
package-id cereal-0.5.8.1-bc19aba8084abe1b14e8a9cd796fcbc33ad18b6bbcac8b6047cd75c6d2161537
package-id tls-1.5.1-2136f2ee993d8db2b27e7e1e078c60ba5d7696b9588d8e6b1db0b427b82879aa
package-id asn1-encoding-0.9.5-fc90e4389aef22991454a55623fe123e2335180e02204203052b23b1f484a138
package-id asn1-types-0.3.3-cc2f9a9f556d3be5db6c2e63c078b775770bba03dd7ac95aae152110c6f74458
package-id hourglass-0.2.12-3e32758353ce74eab6b66dbb274afa6d8dbfe7423f008642dc27bc74a17809b7
package-id cryptonite-0.26-94cfd75d566dbd03216eca620edc9119cdd90104966b52da797e2f30498cc229
package-id x509-1.7.5-43f60aa803dd81214b1f1b2e47be6fcfd8941f17179eae97fde1561210a11757
package-id asn1-parse-0.9.4-337ca4a5c2d57d3352134a30e5de78678c9b4f7361613abf6df556584789b906
package-id pem-0.2.4-586e7188d4074b1190839dd3cc01a0f80e24329c3f9dfa1eb9920f6eb84696c2
package-id x509-store-1.6.7-8634583b926a7be0b54e199dba6c824518d98c5110d97e7d503cb4c6ba81c56b
package-id x509-validation-1.6.11-3f8e57be54fdf9bbbf189bf8370e04eee616266f19c844eea185e29dbeb70001
package-id x509-system-1.6.6-f8ff71c611c7e4ce4ce43f60740f752815c00bd9403344bba813a209b8920415
package-id tagsoup-0.14.8-3b33ce2fe87968f75cb05f428e8bdabe1f5ca33bc74985e015a314e628b8adf5
package-id blaze-html-0.9.1.1-b53d45b015525da1f7da4deca624bdf7282b56778d385b6f97e90a829acd58c2
package-id blaze-markup-0.8.2.2-05571f9ec582261c30b12405b0d51ae89d6029eeedb9eb2f59f1ffa22826fa8d
package-id clay-0.13.1-f8c85c3b00f679635fc0f3c728aa346f00fe85d505b125a072bc2ab2816ae510
package-id config-manager-0.3.0.1-eed9e82f7bee2f9981b3513879d6ee77a6179061dc04aad797bfc82407367538
package-id mime-mail-0.5.0-4fb1fd076fa6fdae93fa2fc8a302afcaf1afea652d1d2f5c0512bdcf96612555
package-id base64-bytestring-1.0.0.2-2603227112d298ee837421128736ff44b0373172562e3c2a0cf899f7fc3a69f6
package-id hspec-2.7.1-0556f6629a0618a7e4dce2647189a32370175c65a1b270dabf4a46888558409a
package-id QuickCheck-2.13.2-e6a396b9568e1072e8dc8561e60be6472a3a9f8563634af1b0a060767f49d498
package-id splitmix-0.0.3-f3c95ef168d47a82bf1dd428f59adab3233cd12c4a13cbddc3c38f3cb0f18481
package-id hspec-core-2.7.1-a1a223cfbc48a2ad178e3f67852bfcc81aada8c98d87cb0fe8638e05d97403bf
package-id HUnit-1.6.0.0-59fb1fecf899b35795c11fc6b20a689fd55838f9f2e487040a538b3b405af0ee
package-id call-stack-0.2.0-ff556664ddc7553e213448711d6533a056144b4bb6ddabb7bbca606838915a42
package-id ansi-terminal-0.9.1-96f32532343b8ddb78833b2c0267eb7cdf2aa1be5ee68cf35403f0ab02166ab3
package-id colour-2.3.5-f5ec3a7785abd04cc7c696d496bcbb7687bcff68e8dc9ff7ba84df0b09761e5d
package-id clock-0.8-4438c79e062e978d314f3247ce7102891f992633921e06fc3568820df0f2810a
package-id hspec-expectations-0.8.2-08db7874368c513d1cb483ca89e80bcc55551ee15f1ac46cb10d6ec63ac28cf5
package-id quickcheck-io-0.2.0-356b1a2ed0f1adedb2bca6da6bc3c413068ea546159388abdd183b563c4198f5
package-id setenv-0.1.1.3-f6f10b5fe3849eae4c6dd29a2027196e3f7b320a6bdc940eeb4bf28188003229
package-id tf-random-0.5-c7219da0b994c46c2c3c20fa160eeb06453202068ba1114ed72479db04a8b725
package-id hspec-discover-2.7.1-7dccd83a570fb3400638855c098e3f84332f4e5a24ec7721012c888fe6b28a7b
ad-listener.nix
dist/
dist-newstyle/
local.conf
name: ad-listener
startup_window: app
windows:
- main:
layout: fff4,119x58,0,0{94x58,0,0,0,24x58,95,0,1}
panes:
- # Empty
- make clean build watch
- console:
- # Empty
- app:
- make clean watch
......@@ -2,31 +2,28 @@ all: build
# Dev commands
dev-start:
start:
@nix-shell --command "tmuxinator local"
dev-stop:
stop:
@nix-shell --command "tmuxinator stop ad-listener"
# Other commands
clean:
@cabal clean > /dev/null
@cabal new-clean > /dev/null
install:
@cabal2nix --shell . > ad-listener.nix
watch:
@make install && nix-shell ad-listener.nix --run "nodemon --watch src --delay 0.2 -e hs,conf --exec 'clear && make build-and-launch'"
watch: build
@nodemon --watch src --delay 0.2 -e hs,conf --exec 'clear && make build-and-launch'
build-and-launch:
@(pkill ad-listener || true) && (cabal run || true)
@(pkill ad-listener || true) && (nix-shell -p zlib --command "cabal new-run ad-listener" || true)
build:
@make install && nix-shell ad-listener.nix --run "cabal build || true"
@nix-shell -p zlib --command "cabal new-build"
repl:
@make install && nix-shell ad-listener.nix --run "cabal repl"
@cabal new-repl
test:
@make install && nix-shell ad-listener.nix --run "cabal test"
@nix-shell -p zlib --command "cabal new-test"
......@@ -11,8 +11,8 @@ Then, it send a mail whenever a new ad come up.
## Getting started
1. Install [nix](https://nixos.org/nix/),
2. launch `make dev-start`,
3. later, stop the project with `make dev-stop`.
2. launch `make start`,
3. later, stop the project with `make stop`.
## Build executable
......
......@@ -22,6 +22,7 @@ Library
, http-conduit
, tagsoup
, text
, http-types
Exposed-modules:
FetchAd
......@@ -57,6 +58,7 @@ Executable ad-listener
, tagsoup
, text
, time
, http-conduit
Other-modules:
Conf
......@@ -81,6 +83,7 @@ Test-suite test
, hspec
, ad-listener
, text
, http-conduit
Other-modules:
Ads
......@@ -6,8 +6,8 @@ mailFrom = "ad-listener@mail.com"
mailTo = []
listenInterval = 1 minute
mailMock = False
devMode = False
listenInterval = 20 minute
importMaybe "local.conf"
......@@ -3,7 +3,6 @@ with import <nixpkgs> {}; {
name = "env";
buildInputs = with nodePackages; with haskellPackages; [
cabal-install
cabal2nix
nodemon
stylish-haskell
tmux
......
......@@ -16,8 +16,8 @@ data Conf = Conf
, seLogerUrls :: [URL]
, mailFrom :: Text
, mailTo :: [Text]
, mailMock :: Bool
, listenInterval :: NominalDiffTime
, devMode :: Bool
} deriving Show
parse :: FilePath -> IO Conf
......@@ -31,8 +31,8 @@ parse path = do
Conf.lookup "seLogerUrls" conf <*>
Conf.lookup "mailFrom" conf <*>
Conf.lookup "mailTo" conf <*>
Conf.lookup "listenInterval" conf <*>
Conf.lookup "devMode" conf
Conf.lookup "mailMock" conf <*>
Conf.lookup "listenInterval" conf
)
case conf of
Left msg -> error (T.unpack msg)
......
......@@ -2,10 +2,13 @@ module Main
( main
) where
import qualified Network.HTTP.Conduit as H
import qualified Conf
import qualified Service.AdListener as AdListener
import qualified Service.AdListener as AdListener
main :: IO ()
main = do
conf <- Conf.parse "application.conf"
AdListener.start conf
manager <- H.newManager H.tlsManagerSettings
AdListener.start conf manager
......@@ -2,62 +2,64 @@ module Service.AdListener
( start
) where
import Control.Concurrent (threadDelay)
import qualified Data.Text.IO as T
import Prelude hiding (error)
import Control.Concurrent (threadDelay)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Network.HTTP.Conduit (Manager)
import Prelude hiding (error)
import Conf (Conf)
import Conf (Conf)
import qualified Conf
import qualified FetchAd
import Model.Ad (Ad)
import qualified Model.Ad as Ad
import Model.Mail (Mail (Mail))
import Model.URL (URL)
import qualified Service.MailService as MailService
import qualified Utils.Time as TimeUtils
import qualified View.Ad as Ad
import Model.Ad (Ad)
import qualified Model.Ad as Ad
import Model.Mail (Mail (Mail))
import Model.URL (URL)
import qualified Service.MailService as MailService
import qualified Utils.Time as TimeUtils
import qualified View.Ad as Ad
start :: Conf -> IO ()
start conf = do
ads <- fetchAds conf
start :: Conf -> Manager -> IO ()
start conf manager = do
ads <- fetchAds conf manager
let newURLs = map Ad.url ads
T.putStrLn "Listening to new ads…"
waitListenInterval conf
listenToNewAdsWithViewedURLs conf newURLs
listenToNewAdsWithViewedURLs conf manager newURLs
listenToNewAdsWithViewedURLs :: Conf -> [URL] -> IO ()
listenToNewAdsWithViewedURLs conf viewedURLs = do
ads <- fetchAds conf
listenToNewAdsWithViewedURLs :: Conf -> Manager -> [URL] -> IO ()
listenToNewAdsWithViewedURLs conf manager viewedURLs = do
ads <- fetchAds conf manager
let (newURLs, newAds) = Ad.getNewAds viewedURLs ads
time <- TimeUtils.getCurrentFormattedTime
if not (null newAds)
then
do
_ <- T.putStrLn (Ad.renderConsoleAds time newAds)
if Conf.devMode conf
then return ()
else sendMail conf newAds
sendMail conf newAds
else
return ()
waitListenInterval conf
listenToNewAdsWithViewedURLs conf (viewedURLs ++ newURLs)
listenToNewAdsWithViewedURLs conf manager (viewedURLs ++ newURLs)
fetchAds :: Conf -> IO [Ad]
fetchAds conf = do
leboncoinAds <- FetchAd.leboncoin (Conf.leboncoinUrls conf)
ouestFranceAds <- FetchAd.ouestFrance (Conf.ouestFranceUrls conf)
seLogerAds <- FetchAd.seLoger (Conf.seLogerUrls conf)
fetchAds :: Conf -> Manager -> IO [Ad]
fetchAds conf manager = do
leboncoinAds <- FetchAd.leboncoin manager (Conf.leboncoinUrls conf)
ouestFranceAds <- FetchAd.ouestFrance manager (Conf.ouestFranceUrls conf)
seLogerAds <- FetchAd.seLoger manager (Conf.seLogerUrls conf)
let results = leboncoinAds ++ ouestFranceAds ++ seLogerAds
if null results
then T.putStrLn "Parsed 0 results!"
else return ()
T.putStrLn . T.concat $
[ "Parsed "
, T.pack . show $ length results
, " results"
]
return results
sendMail :: Conf -> [Ad] -> IO ()
sendMail conf ads =
let (title, plainBody) = Ad.renderAds ads
mail = Mail (Conf.mailFrom conf) (Conf.mailTo conf) title plainBody
in MailService.send mail >> return ()
in MailService.send (Conf.mailMock conf) mail >> return ()
waitListenInterval :: Conf -> IO ()
waitListenInterval = threadDelay . (*) 1000000 . round . Conf.listenInterval
......@@ -4,9 +4,9 @@ module Service.MailService
import Control.Arrow (left)
import Control.Exception (SomeException, try)
import Data.Either (isLeft)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.Lazy as LT
import Data.Text.Lazy.Builder (fromText, toLazyText)
import qualified Network.Mail.Mime as Mime
......@@ -14,13 +14,26 @@ import qualified Network.Mail.Mime as Mime
import Model.Mail (Mail)
import qualified Model.Mail as Mail
send :: Mail -> IO (Either Text ())
send mail = do
result <- left (T.pack . show) <$> (try (Mime.renderSendMail . getMimeMail $ mail) :: IO (Either SomeException ()))
if isLeft result
then putStrLn ("Error sending the following email:" ++ (show mail))
else return ()
return result
send :: Bool -> Mail -> IO (Either Text ())
send isMock mail =
if isMock then do
putStrLn $ "MOCK sending mail " ++ (show mail)
return . Right $ ()
else do
result <-
left (T.pack . show) <$>
(try (Mime.renderSendMail . getMimeMail $ mail) :: IO (Either SomeException ()))
case result of
Left err ->
T.putStrLn . T.concat $
[ "Error sending the following email ("
, T.pack . show $ mail
, ":\n"
, err
]
Right _ ->
return ()
return result
getMimeMail :: Mail -> Mime.Mail
getMimeMail mail =
......
......@@ -5,7 +5,7 @@ module FetchAd
) where
import Data.Either (rights)
import Data.Text.Encoding as T
import Network.HTTP.Conduit (Manager)
import Model.Ad (Ad)
import Model.URL (URL)
......@@ -14,23 +14,23 @@ import qualified Parser.OuestFranceParser as OuestFranceParser
import qualified Parser.SeLogerParser as SeLogerParser
import qualified Utils.HTTP as HTTP
leboncoin :: [URL] -> IO [Ad]
leboncoin urls =
leboncoin :: Manager -> [URL] -> IO [Ad]
leboncoin manager urls =
fmap (concat . map LeboncoinParser.parse . rights)
. sequence
. map (HTTP.get T.decodeLatin1)
. map (HTTP.get manager)
$ urls
ouestFrance :: [URL] -> IO [Ad]
ouestFrance urls =
ouestFrance :: Manager -> [URL] -> IO [Ad]
ouestFrance manager urls =
fmap (concat . map OuestFranceParser.parse . rights)
. sequence
. map (HTTP.get T.decodeUtf8)
. map (HTTP.get manager)
$ urls
seLoger :: [URL] -> IO [Ad]
seLoger urls =
seLoger :: Manager -> [URL] -> IO [Ad]
seLoger manager urls =
fmap (concat . map SeLogerParser.parse . rights)
. sequence
. map (HTTP.get T.decodeUtf8)
. map (HTTP.get manager)
$ urls
......@@ -11,14 +11,19 @@ import Model.Ad (Ad (Ad))
import Parser.Utils
parse :: Text -> [Ad]
parse page =
catMaybes . fmap parseAd $ partitions (~== (T.unpack "<a>")) tags
where tags = getTagsBetween "<li itemtype=http://schema.org/Offer>" "<div class=information-immo_content>" (parseTags page)
parse =
catMaybes
. fmap parseAd
. partitions (~== (T.unpack "<li>"))
. parseTags
parseAd :: [Tag Text] -> Maybe Ad
parseAd tags = do
name <- getTagTextAfter "<h2 class=item_title>" tags
location <- getTagAttribute "<meta itemprop=address>" "content" tags
let price = getTagTextAfter "<h3 class=item_price>" tags
name <- getTagTextAfter "<span data-qa-id=aditem_title>" tags
location <- getTagTextAfter "<p data-qa-id=aditem_location>" tags
let price =
case getTagsBetween "<span itemprop=priceCurrency>" "</span>" tags of
[] -> Nothing
xs -> Just $ innerText xs
url <- getTagAttribute "<a>" "href" tags
return (Ad name location price (T.concat ["https:", url]))
......@@ -2,21 +2,47 @@ module Utils.HTTP
( get
) where
import Control.Exception (SomeException, try)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as BS
import Data.Text (Text)
import qualified Data.Text as T
import Network.HTTP.Conduit
import qualified Data.ByteString.Lazy as BS
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding as T
import Data.Text.IO as T
import Network.HTTP.Conduit (Manager)
import qualified Network.HTTP.Conduit as H
import qualified Network.HTTP.Simple as HS
import qualified Network.HTTP.Types.Status as Status
import Model.URL
get :: (ByteString -> Text) -> URL -> IO (Either Text Text)
get decode url = mapLeft (T.pack . show) <$> (try (unsafeGetPage decode url) :: IO (Either SomeException Text))
get :: Manager -> URL -> IO (Either Text Text)
get manager url = do
request <- H.parseRequest (T.unpack url)
unsafeGetPage :: (ByteString -> Text) -> URL -> IO Text
unsafeGetPage decode url = (decode . BS.toStrict) <$> simpleHttp (T.unpack url)
response <- H.httpLbs (HS.setRequestHeaders requestHeaders request) manager
let body = T.decodeUtf8 . BS.toStrict . H.responseBody $ response
let statusCode = Status.statusCode . H.responseStatus $ response
mapLeft :: (a -> c) -> Either a b -> Either c b
mapLeft f (Left l) = Left (f l)
mapLeft _ (Right r) = (Right r)
if statusCode >= 200 && statusCode < 300 then
return . Right $ body
else do
T.putStrLn . T.concat $
[ "Got status "
, T.pack . show $ statusCode
, " while fetching "
, url
, ":\n"
, body
]
return . Left $ body
where
requestHeaders =
[ ("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0")
, ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
, ("Accept-Language", "en-US,en;fr;q=0.5")
, ("Accept-Encoding", "gzip, deflate, br")
, ("Referer", "https://duckduckgo.com/")
, ("DNT", "1")
, ("Connection", "keep-alive")
, ("Upgrade-Insecure-Requests", "1")
]
This diff is collapsed.
import Data.Maybe (catMaybes)
import qualified Data.Text.IO as T
import Data.Maybe (catMaybes)
import qualified Data.Text.IO as T
import qualified Network.HTTP.Conduit as H
import Test.Hspec
import qualified Ads
import qualified FetchAd
import Model.Ad (Ad (..))
import qualified Parser.LeboncoinParser as LeboncoinParser
import qualified Parser.OuestFranceParser as OuestFranceParser
import qualified Parser.SeLogerParser as SeLogerParser
import Model.Ad (Ad (..))
import qualified Parser.LeboncoinParser as LeboncoinParser
-- import qualified Parser.OuestFranceParser as OuestFranceParser
-- import qualified Parser.SeLogerParser as SeLogerParser
main :: IO ()
main = do
manager <- H.newManager H.tlsManagerSettings
hspec $ do
describe "LeboncoinParser" $ do
......@@ -22,34 +25,36 @@ main = do
LeboncoinParser.parse ads `shouldBe` Ads.leboncoin
it "should parse ads from remote page" $ do
ads <- FetchAd.leboncoin ["https://www.leboncoin.fr/locations/offres/ile_de_france/?th=1"]
checkAds ads
describe "OuestFranceParser" $ do
it "should parse no results from empty string" $ do
OuestFranceParser.parse "" `shouldBe` []
it "should parse ads from page" $ do
rawOuestFranceAds <- T.readFile "src/test/resources/ouestFrance.html"
OuestFranceParser.parse rawOuestFranceAds `shouldBe` Ads.ouestFrance
it "should parse ads from remote page" $ do
ads <- FetchAd.ouestFrance ["https://www.ouestfrance-immo.com/louer/appartement/rennes-35-35000/"]
ads <- FetchAd.leboncoin
manager
["https://www.leboncoin.fr/annonces/offres/ile_de_france/"]
checkAds ads
describe "SeLogerParser" $ do
it "should parse no results from empty string" $ do
SeLogerParser.parse "" `shouldBe` []
it "should parse ads from page" $ do
ads <- T.readFile "src/test/resources/seLoger.html"
SeLogerParser.parse ads `shouldBe` Ads.seLoger
it "should parse ads from remote page" $ do
ads <- FetchAd.seLoger ["https://www.seloger.com/list.htm?tri=initial&idtypebien=2,1&idtt=2,5&naturebien=1,2,4&ci=690123"]
checkAds ads
-- describe "OuestFranceParser" $ do
--
-- it "should parse no results from empty string" $ do
-- OuestFranceParser.parse "" `shouldBe` []
--
-- it "should parse ads from page" $ do
-- rawOuestFranceAds <- T.readFile "src/test/resources/ouestFrance.html"
-- OuestFranceParser.parse rawOuestFranceAds `shouldBe` Ads.ouestFrance
--
-- it "should parse ads from remote page" $ do
-- ads <- FetchAd.ouestFrance ["https://www.ouestfrance-immo.com/louer/appartement/rennes-35-35000/"]
-- checkAds ads
--
-- describe "SeLogerParser" $ do
--
-- it "should parse no results from empty string" $ do
-- SeLogerParser.parse "" `shouldBe` []
--
-- it "should parse ads from page" $ do
-- ads <- T.readFile "src/test/resources/seLoger.html"
-- SeLogerParser.parse ads `shouldBe` Ads.seLoger
--
-- it "should parse ads from remote page" $ do
-- ads <- FetchAd.seLoger ["https://www.seloger.com/list.htm?tri=initial&idtypebien=2,1&idtt=2,5&naturebien=1,2,4&ci=690123"]
-- checkAds ads
checkAds :: [Ad] -> IO ()
checkAds ads = do
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment