Commit 31829253 authored by Joris's avatar Joris

Add seLoger parser

parent 96bbdbbe
......@@ -30,3 +30,7 @@ See [application.conf](application.conf).
## Email
`sendmail` command is used for notifications.
## Todo
Add tests on fetched data
......@@ -26,6 +26,7 @@ Library
, Model.URL
, Parser.LeboncoinParser
, Parser.OuestFranceParser
, Parser.SeLogerParser
Other-modules:
Parser.Utils
......
leboncoinUrls = []
ouestFranceUrls = []
seLogerUrls = ["https://www.seloger.com/list.htm?types=1&projects=1&furnished=0&rooms=2&places=%5B%7Bcp%3A75%7D%5D&qsVersion=1.0&bd=CartoToList_SL"]
mailFrom = "ad-listener@mail.com"
......
......@@ -13,6 +13,7 @@ import Model.URL
data Conf = Conf
{ leboncoinUrls :: [URL]
, ouestFranceUrls :: [URL]
, seLogerUrls :: [URL]
, mailFrom :: Text
, mailTo :: [Text]
, listenInterval :: NominalDiffTime
......@@ -27,6 +28,7 @@ parse path = do
Conf <$>
Conf.lookup "leboncoinUrls" conf <*>
Conf.lookup "ouestFranceUrls" conf <*>
Conf.lookup "seLogerUrls" conf <*>
Conf.lookup "mailFrom" conf <*>
Conf.lookup "mailTo" conf <*>
Conf.lookup "listenInterval" conf <*>
......
......@@ -16,6 +16,7 @@ import Model.Mail (Mail (Mail))
import Model.URL (URL)
import qualified Parser.LeboncoinParser as LeboncoinParser
import qualified Parser.OuestFranceParser as OuestFranceParser
import qualified Parser.SeLogerParser as SeLogerParser
import qualified Service.MailService as MailService
import qualified Utils.HTTP as HTTP
import qualified Utils.Time as TimeUtils
......@@ -50,7 +51,8 @@ fetchAds :: Conf -> IO [Ad]
fetchAds conf = do
leboncoinAds <- getLeboncoinAds conf
ouestFranceAds <- getOuestFranceAds conf
let results = leboncoinAds ++ ouestFranceAds
seLogerAds <- getSeLogerAds conf
let results = leboncoinAds ++ ouestFranceAds ++ seLogerAds
if null results
then T.putStrLn "Parsed 0 results!"
else return ()
......@@ -72,6 +74,14 @@ getOuestFranceAds conf =
. Conf.ouestFranceUrls
$ conf
getSeLogerAds :: Conf -> IO [Ad]
getSeLogerAds conf =
fmap (concat . map SeLogerParser.parse . rights)
. sequence
. map (HTTP.get T.decodeUtf8)
. Conf.seLogerUrls
$ conf
sendMail :: Conf -> [Ad] -> IO ()
sendMail conf ads =
let (title, plainBody) = Ad.renderAds ads
......
module Parser.SeLogerParser
( parse
) where
import Data.Maybe (catMaybes)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Model.Ad (Ad (Ad))
import Parser.Utils
parse :: Text -> [Ad]
parse page =
catMaybes . fmap parseAd $ partitions (~== (T.unpack "<div class=c-pa-info>")) tags
where tags = getTagsBetween "<section class=liste_resultat>" "<div class=bottomAnchorWrapper>" (parseTags page)
parseAd :: [Tag Text] -> Maybe Ad
parseAd tags = do
name <- getTagTextAfter "<a>" tags
location <- getTagTextAfter "<div class=c-pa-city>" tags
let price = getTagTextAfter "<span class=c-pa-cprice>" tags
url <- getTagAttribute "<a>" "href" tags
return (Ad name location price (T.concat ["https:", url]))
......@@ -4,11 +4,13 @@ import Test.Hspec
import Model.Ad (Ad (..))
import qualified Parser.LeboncoinParser as LeboncoinParser
import qualified Parser.OuestFranceParser as OuestFranceParser
import qualified Parser.SeLogerParser as SeLogerParser
main :: IO ()
main = do
rawLeboncoinAds <- T.readFile "src/test/resources/leboncoin.html"
rawOuestFranceAds <- T.readFile "src/test/resources/ouestFrance.html"
rawSeLogerParser <- T.readFile "src/test/resources/seLoger.html"
hspec $ do
describe "LeboncoinParser" $ do
......@@ -26,8 +28,8 @@ main = do
, Ad {name = "locations garage / garde meuble", location = "Landerneau", price = Just "50 €", url = "https://www.leboncoin.fr/locations/1450174538.htm?ca=6_s"}
, Ad {name = "Studio meublé", location = "Lanester", price = Just "290 €", url = "https://www.leboncoin.fr/locations/1450268170.htm?ca=6_s"}
, Ad {name = "Ergué-Gabéric - Maison - 3 chambres -", location = "Ergué-Gabéric", price = Just "655 €", url = "https://www.leboncoin.fr/locations/1450267530.htm?ca=6_s"}
, Ad {name = "Studio meublé \224 2 mn de la gare", location = "Rennes", price = Just "475 €", url = "https://www.leboncoin.fr/locations/1450267333.htm?ca=6_s"}
, Ad {name = "Chambre \224 louer chez l,habitant", location = "Rennes", price = Just "450 €", url = "https://www.leboncoin.fr/locations/1450266273.htm?ca=6_s"}
, Ad {name = "Studio meublé à 2 mn de la gare", location = "Rennes", price = Just "475 €", url = "https://www.leboncoin.fr/locations/1450267333.htm?ca=6_s"}
, Ad {name = "Chambre à louer chez l,habitant", location = "Rennes", price = Just "450 €", url = "https://www.leboncoin.fr/locations/1450266273.htm?ca=6_s"}
, Ad {name = "Appartement 3 pièces 63 m\178", location = "Brest", price = Just "600 €", url = "https://www.leboncoin.fr/locations/1426927284.htm?ca=6_s"}
, Ad {name = "recherche location", location = "Plaintel", price = Just "550 €", url = "https://www.leboncoin.fr/locations/1450265866.htm?ca=6_s"}
, Ad {name = "Studio rénové - Centre Ville Lorient", location = "Lorient", price = Just "360 €", url = "https://www.leboncoin.fr/locations/1450265154.htm?ca=6_s"}
......@@ -90,3 +92,34 @@ main = do
]
OuestFranceParser.parse rawOuestFranceAds `shouldBe` ads
describe "SeLogerParser" $ do
it "should parse no results from empty string" $ do
SeLogerParser.parse "" `shouldBe` []
it "should parse ads from page" $ do
let ads =
[ Ad {name = "Appartement", location = "Paris 20ème", price = Just "1 219 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-20eme-75/plaine/134448375.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 16ème", price = Just "1 500 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-16eme-75/muette-sud/134049627.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 3ème", price = Just "1 595 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-3eme-75/arts-et-metiers/76038037.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 15ème", price = Just "1 612 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-15eme-75/citroen-boucicaut/135038561.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 16ème", price = Just "1 650 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-16eme-75/auteuil-sud/133027227.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 16ème", price = Just "1 875 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-16eme-75/chaillot/127190787.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 8ème", price = Just "1 985 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-8eme-75/saint-philippe-du-roule/127109353.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 1er", price = Just "2 090 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-1er-75/ile-de-la-cite/133982099.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 16ème", price = Just "2 240 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-16eme-75/porte-dauphine/118523093.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 17ème", price = Just "2 400 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-17eme-75/champerret-berthier/134621503.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 6ème", price = Just "2 055 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-6eme-75/saint-germain-des-pres/130192167.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 14ème", price = Just "1 300 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-14eme-75/jean-moulin-porte-d-orleans/133269543.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 16ème", price = Just "3 500 €", url = "https:https://www.bellesdemeures.com/annonces/locations/appartement/paris-16eme-75/133260359.htm?bd=CartoToList_SL&furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&cmp=INTSL_ListToDetail"}
, Ad {name = "Appartement", location = "Paris 17ème", price = Just "1 890 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-17eme-75/134972009.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 18ème", price = Just "1 390 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-18eme-75/clignancourt-jules-joffrin/134735501.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 6ème", price = Just "1 600 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-6eme-75/saint-germain-des-pres/134437989.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 11ème", price = Just "1 980 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-11eme-75/bastille-popincourt/134314607.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 13ème", price = Just "901 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-13eme-75/olympiades-choisy/134428153.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 12ème", price = Just "1 250 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-12eme-75/aligre-gare-de-lyon/133908165.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
, Ad {name = "Appartement", location = "Paris 17ème", price = Just "1 100 €", url = "https:https://www.seloger.com/annonces/locations/appartement/paris-17eme-75/legendre-levis/134131479.htm?furnished=0&places=%5b%7bcp%3a75%7d%5d&projects=1&qsversion=1.0&rooms=2&types=1&bd=ListToDetail"}
]
SeLogerParser.parse rawSeLogerParser `shouldBe` ads
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment