Commit 0a4d3c8f authored by Joris's avatar Joris

Add ouest france parser

parent e2a5c7c5
dist
ad-listener.nix
dist/
local.conf
leboncoin-listener.nix
name: leboncoin-listener
name: ad-listener
windows:
- main:
......
all: build
# Dev commands
dev-start:
@nix-shell --command "tmuxinator local"
dev-stop:
@nix-shell --command "tmuxinator stop ad-listener"
# Other commands
clean:
@cabal clean > /dev/null
install:
@cabal2nix --shell . > ad-listener.nix
watch:
@nodemon -e hs,conf --exec 'make build-and-launch'
@make install && nix-shell ad-listener.nix --run "nodemon -e hs,conf --exec 'clear && make build-and-launch'"
build-and-launch:
@(pkill leboncoin-listener || true) && (cabal run || true)
@(pkill ad-listener || true) && (cabal run || true)
.PHONY: build
build:
@cabal build || true
@make install && nix-shell ad-listener.nix --run "cabal build || true"
repl:
@make install && nix-shell ad-listener.nix --run "cabal repl"
test:
@make install && nix-shell ad-listener.nix --run "cabal test"
leboncoin-listener
==================
# Ad-listener
leboncoin-listener listen for changes at a given URL on the website leboncoin
and send mails with a detail whenever new ads come up.
Ad-listener listen for changes at given URLs on
[leboncoin](https://www.leboncoin.fr/) and
[ouestFrance](https://www.ouestfrance-immo.com/). Then, it send mails whenever
new ads come up.
Getting started
---------------
## Getting started
Install nix:
1. Install [nix](https://nixos.org/nix/),
2. launch `make dev-start`,
3. later, stop the project with `make dev-stop`.
```
curl https://nixos.org/nix/install | sh
## Build executable
```bash
make build
```
Inside the project directory, open a nix shell:
## Run tests
```
./dev
```bash
make test
```
Configuration
-------------
## Configuration
See [application.conf](application.conf).
Email
-----
## Email
`sendmail` command is used for notifications.
Name: leboncoin-listener
Name: ad-listener
Version: 0.1
License: GPL-3
License-file: LICENSE
Author: Joris
Author: Joris Guyonvarch
Maintainer: joris@guyonvarch.me
Build-type: Simple
Cabal-version: >= 1.10
executable leboncoin-listener
Library
Hs-source-dirs: src/parser/haskell
Main-is: Main.hs
Hs-source-dirs: src
Default-language: Haskell2010
Ghc-options: -Wall -Werror
Default-language: Haskell2010
Build-depends:
base
, tagsoup
, text
Exposed-modules:
Model.Ad
, Model.URL
, Parser.LeboncoinParser
, Parser.OuestFranceParser
Other-modules:
Parser.Utils
Executable ad-listener
Hs-source-dirs: src/executable/haskell
Main-is: Main.hs
Ghc-options: -Wall -Werror
Default-language: Haskell2010
Build-depends:
base
, ad-listener
, blaze-html
, blaze-markup
, bytestring
, clay
, config-manager
, containers
, directory
, time
, http-conduit
, tagsoup
, mime-mail
, blaze-html
, blaze-markup
, clay
, config-manager
, tagsoup
, text
, time
other-modules:
AdListener
, Conf
, Fetch
, Mail
, Model.Ad
, Model.Detail
Other-modules:
Conf
, Model.Mail
, Model.Resume
, Model.URL
, Page
, Parser.Detail
, Parser.Resume
, Parser.Utils
, Time
, Service.AdListener
, Service.MailService
, Utils.Either
, View.Html.Ad
, View.Html.Design
, View.Plain.Ad
, Utils.HTTP
, Utils.Time
, View.Ad
Test-suite test
Hs-source-dirs: src/test/haskell
Main-is: Main.hs
Ghc-options: -Wall -Werror
Default-language: Haskell2010
Type: exitcode-stdio-1.0
Build-depends:
base
, hspec
, ad-listener
, text
urls = ["https://www.leboncoin.fr/locations/offres/ile_de_france/?f=a&th=1"]
mailFrom = "leboncoin-listener@mail.com"
mailTo = ["jean.dupont@mail.fr", "anne.smith@mail.com"]
properties = ["cp", "city", "surface", "ges"]
leboncoinUrls = []
ouestFranceUrls = []
mailFrom = "ad-listener@mail.com"
mailTo = []
listenInterval = 1 minute
devMode = False
importMaybe "local.conf"
#!/bin/sh
nix-shell --command "make clean build"
with import <nixpkgs> {}; {
env = stdenv.mkDerivation {
name = "env";
buildInputs = [
buildInputs = with nodePackages; with haskellPackages; [
cabal-install
cabal2nix
nodemon
stylish-haskell
tmux
tmuxinator
nodePackages.nodemon
(haskellPackages.ghcWithPackages (p: with p; [
text
bytestring
containers
directory
time
http-conduit
tagsoup
mime-mail
blaze-html
blaze-markup
clay
config-manager
]))
];
};
}
#!/bin/sh
nix-shell --command "tmuxinator local"
......@@ -2,6 +2,6 @@
"dependencies": {
"nodemon": "1.9.2"
},
"repository": "guyonvarch/leboncoin-listener",
"repository": "guyonvarch/ad-listener",
"license": "GPL-3.0"
}
{-# LANGUAGE OverloadedStrings #-}
module AdListener
( start
) where
import Prelude hiding (error)
import qualified Data.Text.IO as T
import Control.Concurrent (threadDelay)
import qualified Fetch
import Model.Ad
import Model.URL
import Model.Resume
import qualified View.Plain.Ad as P
import qualified View.Html.Ad as H
import Mail
import Model.Mail (Mail(Mail))
import Conf (Conf)
import qualified Conf
import Time (getCurrentFormattedTime)
start :: Conf -> IO ()
start conf = do
resumes <- Fetch.resumes . Conf.urls $ conf
let newURLs = map url resumes
T.putStrLn "Listening to new ads…"
waitListenInterval conf
listenToNewAdsWithViewedURLs conf newURLs
listenToNewAdsWithViewedURLs :: Conf -> [URL] -> IO ()
listenToNewAdsWithViewedURLs conf viewedURLs = do
resumes <- Fetch.resumes . Conf.urls $ conf
let (newURLs, newResumes) = getNewResumes viewedURLs resumes
eitherNewAds <- Fetch.ads newResumes
case eitherNewAds of
Left error -> do
T.putStrLn error
waitListenInterval conf
listenToNewAdsWithViewedURLs conf viewedURLs
Right newAds -> do
time <- getCurrentFormattedTime
if not (null newAds)
then
let message = P.renderConsoleAds conf time newAds
in T.putStrLn message >> sendMail conf newAds
else
return ()
waitListenInterval conf
listenToNewAdsWithViewedURLs conf (viewedURLs ++ newURLs)
sendMail :: Conf -> [Ad] -> IO ()
sendMail conf ads =
let (title, plainBody) = P.renderAds conf ads
htmlBody = H.renderAds conf ads
mail = Mail (Conf.mailFrom conf) (Conf.mailTo conf) title plainBody htmlBody
in Mail.send mail >> return ()
waitListenInterval :: Conf -> IO ()
waitListenInterval = threadDelay . (*) 1000000 . round . Conf.listenInterval
{-# LANGUAGE OverloadedStrings #-}
module Fetch
( resumes
, ads
) where
import Data.Text (Text)
import qualified Data.Text.IO as T
import Data.Either (rights)
import Page
import Model.Ad (Ad(Ad))
import Model.Resume (Resume)
import qualified Model.Resume as Resume
import Model.URL (URL)
import qualified Parser.Resume as Resume
import qualified Parser.Detail as Detail
resumes :: [URL] -> IO [Resume]
resumes urls = do
results <- fmap (concat . map Resume.parse . rights) . sequence . map Page.get $ urls
if null results
then T.putStrLn "Parsed 0 results!"
else return ()
return results
ads :: [Resume] -> IO (Either Text [Ad])
ads = fmap sequence . sequence . map ad
ad :: Resume -> IO (Either Text Ad)
ad resume = fmap (\x -> Ad resume (Detail.parse x)) <$> Page.get (Resume.url resume)
module Model.Ad
( Ad(..)
) where
import Model.Resume
import Model.Detail
data Ad = Ad
{ resume :: Resume
, detail :: Detail
} deriving (Eq, Read, Show)
module Model.Detail
( Detail(..)
) where
import Data.Text
import Data.Map (Map)
import Model.URL
data Detail = Detail
{ description :: Maybe Text
, images :: [URL]
, properties :: Map Text Text
} deriving (Eq, Read, Show)
module Model.Resume
( Resume(..)
, getNewResumes
, getURLs
) where
import Data.List ((\\))
import Data.Text (Text)
import Model.URL (URL)
data Resume = Resume
{ name :: Text
, price :: Maybe Text
, url :: URL
, isPro :: Bool
} deriving (Eq, Read, Show)
getNewResumes :: [URL] -> [Resume] -> ([URL], [Resume])
getNewResumes viewdURLs resumes =
let newURLs = (getURLs resumes) \\ viewdURLs
newResumes = filter (\resume -> elem (url resume) newURLs) resumes
in (newURLs, newResumes)
getURLs :: [Resume] -> [URL]
getURLs = map url
module Parser.Detail
( parse
) where
import Data.Text (Text)
import qualified Data.Text as T
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe (catMaybes, fromMaybe)
import Text.HTML.TagSoup
import Model.Detail
import Parser.Utils
parse :: Text -> Detail
parse page =
let tags = parseTags page
in Detail
{ description = parseDescription tags
, images = map (\url -> T.concat [T.pack "https:", url]) $ getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
, properties = parseProperties tags
}
parseDescription :: [Tag Text] -> Maybe Text
parseDescription tags =
let descriptionTags = getTagsBetween "<p itemprop=description>" "</p>" tags
in if null descriptionTags
then
Nothing
else
let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag)
in Just . T.strip . renderTags . replaceBr $ descriptionTags
parseProperties :: [Tag Text] -> Map Text Text
parseProperties tags =
let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "</script>" . getTagsAfter "<body>" $ tags
in fromMaybe M.empty (fmap parseUtagData mbUtagData)
parseUtagData :: Text -> Map Text Text
parseUtagData =
M.fromList
. catMaybes
. fmap parseUtag
. T.splitOn (T.pack ",")
. T.takeWhile (/= '}')
. T.drop 1
. T.dropWhile (/= '{')
parseUtag :: Text -> Maybe (Text, Text)
parseUtag utag =
case T.splitOn (T.pack ":") utag of
[x, y] -> Just (T.strip x, removeQuotes y)
_ -> Nothing
removeQuotes :: Text -> Text
removeQuotes =
T.takeWhile (/= '\"')
. T.dropWhile (== '\"')
. T.strip
module Parser.Resume
( parse
) where
import Data.Maybe (catMaybes, isJust)
import Data.List (find)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Model.Resume (Resume(Resume))
import Parser.Utils
parse :: Text -> [Resume]
parse page =
case dropWhile (not . hasClass (T.pack "section") (T.pack "tabsContent")) (parseTags page) of
[] ->
[]
sectionTags ->
let lbcTags = takeWhile (not . hasClass (T.pack "div") (T.pack "information-immo")) sectionTags
in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags
parseResume :: [Tag Text] -> Maybe Resume
parseResume item = do
name <- getTagTextAfter "<h2 class=item_title>" item
let price = getTagTextAfter "<h3 class=item_price>" item
url <- getTagAttribute "<a>" (T.pack "href") item
let isPro = isJust . find (~== "<span class=ispro>") $ item
return (Resume name price (T.concat [T.pack "https:", url]) isPro)
{-# LANGUAGE OverloadedStrings #-}
module View.Html.Ad
( renderAds
) where
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Lazy (toStrict)
import Data.Maybe (catMaybes)
import Data.List (intersperse)
import Data.Map (Map)
import qualified Data.Map as M
import Text.Blaze.Html
import Text.Blaze.Html5 (Html)
import qualified Text.Blaze.Html5 as H
import qualified Text.Blaze.Html5.Attributes as A
import Text.Blaze.Html.Renderer.Text (renderHtml)
import Text.Blaze.Internal (textValue)
import Model.Ad (Ad)
import qualified Model.Ad as Ad
import Model.Resume (Resume)
import qualified Model.Resume as Resume
import Model.Detail (Detail)
import qualified Model.Detail as Detail
import Model.URL
import Conf (Conf)
import qualified Conf
import qualified View.Html.Design as Design
renderAds :: Conf -> [Ad] -> Text
renderAds conf = toStrict . renderHtml . (adsHtml conf)
adsHtml :: Conf -> [Ad] -> Html
adsHtml conf ads = do mapM_ (adHtml conf) ads
adHtml :: Conf -> Ad -> Html
adHtml conf ad =
let resume = Ad.resume ad
detail = Ad.detail ad
in do
resumeHtml resume
detailHtml conf detail
resumeHtml :: Resume -> Html
resumeHtml resume = do
H.h1 $ do
(toHtml . Resume.name $ resume)
case Resume.price resume of
Just price ->
H.span
! A.class_ "price"
! A.style (textValue . toStrict $ Design.price)
$ toHtml price
Nothing ->
H.span ""
if Resume.isPro resume
then
H.span
! A.class_ "pro"
! A.style (textValue . toStrict $ Design.pro)
$ "PRO"
else
""
linkHtml (Resume.url resume)
detailHtml :: Conf -> Detail -> Html
detailHtml conf detail = do
propertiesHtml (Conf.properties conf) (Detail.properties detail)
case Detail.description detail of
Just description ->
descriptionHtml description
Nothing ->
H.div ""
mapM_ imageLinkHtml (Detail.images detail)
propertiesHtml :: [Text] -> Map Text Text -> Html
propertiesHtml keys properties =
H.dl
! A.style (textValue . toStrict $ Design.definitionList)
$ sequence_ (catMaybes $ map (propertyHtml properties) keys)
propertyHtml :: Map Text Text -> Text -> Maybe Html
propertyHtml properties key =
fmap
(\value -> do
H.dt $ (toHtml key)
H.dd ! A.style (textValue . toStrict $ Design.definitionDescription) $ (toHtml value)
)
(M.lookup key properties)
descriptionHtml :: Text -> Html
descriptionHtml = H.p . sequence_ . intersperse H.br . fmap toHtml . T.lines
linkHtml :: URL -> Html
linkHtml url =
H.a ! A.href (textValue url) $ (toHtml url)
imageLinkHtml :: URL -> Html
imageLinkHtml url =
H.a ! A.href (textValue url) $
H.img
! A.src (textValue url)
! A.alt (textValue url)
{-# LANGUAGE OverloadedStrings #-}
module View.Html.Design
( definitionList
, definitionDescription
, price
, pro
) where
import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as T
import Clay
definitionList :: Text
definitionList = inlineRender $ do
fontWeight bold
fontSize (px 16)
definitionDescription :: Text
definitionDescription = inlineRender $ do
marginLeft (px 0)
marginBottom (px 10)
color orangered
pro :: Text
pro = inlineRender $ do
marginLeft (px 10)
color (rgb 122 179 88)
price :: Text
price = inlineRender $ do
marginLeft (px 10)
color orangered
inlineRender :: Css -> Text
inlineRender =
T.dropEnd 1
. T.drop 1
. renderWith compact []
{-# LANGUAGE OverloadedStrings #-}
module View.Plain.Ad
( renderConsoleAds
, renderAds
) where
import Data.Maybe (fromMaybe, catMaybes)
import Data.Map (Map)
import qualified Data.Map as M
import Data.Text (Text)
import qualified Data.Text as T
import Model.Ad (Ad)
import qualified Model.Ad as Ad
import Model.Resume (Resume)
import qualified Model.Resume as Resume
import Model.Detail (Detail)
import qualified Model.Detail as Detail
import Model.URL (URL)
import Conf (Conf)
import qualified Conf
renderConsoleAds :: Conf -> Text -> [Ad] -> Text
renderConsoleAds conf time ads =
let (title, message) = renderAds conf ads
titleWithTime =
T.concat
[ "\n["
, time
, "] "
, title
]
line = T.map (\_ -> '-') titleWithTime
in T.intercalate
"\n"
[ titleWithTime
, line
, ""
, message
]
renderAds :: Conf -> [Ad] -> (Text, Text)
renderAds conf ads =
let titleMessage = renderTitle $ length ads
adsMessage = T.intercalate "\n\n" . map (renderAd conf) $ ads
in (titleMessage, adsMessage)
renderTitle :: Int -> Text
renderTitle count =
T.concat
[ T.pack . show $ count
, agreement " nouvelle"
, agreement " annonce"
]
where agreement word =
T.concat
[ word
, if count > 1 then "s" else ""
]
renderAd :: Conf -> Ad -> Text
renderAd conf ad =
T.concat