Commit 0eb7d5e0 authored by Joris's avatar Joris

WIP

parent e2a5c7c5
dist
cabal.nix
dist/
local.conf
leboncoin-listener.nix
all: build
# Dev commands
dev-start:
@nix-shell --command "tmuxinator local"
dev-stop:
@nix-shell --command "tmuxinator stop leboncoin-listener"
# Other commands
clean:
@cabal clean > /dev/null
install:
@cabal2nix --shell . > cabal.nix
watch:
@nodemon -e hs,conf --exec 'make build-and-launch'
@make install && nix-shell cabal.nix --run "nodemon -e hs,conf --exec 'clear && make build-and-launch'"
build-and-launch:
@(pkill leboncoin-listener || true) && (cabal run || true)
.PHONY: build
build:
@cabal build || true
@make install && nix-shell cabal.nix --run "cabal build || true"
repl:
@nix-shell cabal.nix --run "cabal repl"
leboncoin-listener
==================
# leboncoin-listener
leboncoin-listener listen for changes at a given URL on the website leboncoin
and send mails with a detail whenever new ads come up.
Getting started
---------------
## Getting started
Install nix:
1. Install [nix](https://nixos.org/nix/),
2. launch `make dev-start`,
3. later, stop the project with `make dev-stop`.
```
curl https://nixos.org/nix/install | sh
```
Inside the project directory, open a nix shell:
```
./dev
```
Configuration
-------------
## Configuration
See [application.conf](application.conf).
Email
-----
## Email
`sendmail` command is used for notifications.
urls = ["https://www.leboncoin.fr/locations/offres/ile_de_france/?f=a&th=1"]
leboncoinUrls = []
ouestFranceUrls = []
mailFrom = "leboncoin-listener@mail.com"
mailTo = ["jean.dupont@mail.fr", "anne.smith@mail.com"]
properties = ["cp", "city", "surface", "ges"]
mailTo = []
listenInterval = 1 minute
devMode = False
importMaybe "local.conf"
#!/bin/sh
nix-shell --command "make clean build"
with import <nixpkgs> {}; {
env = stdenv.mkDerivation {
name = "env";
buildInputs = [
buildInputs = with nodePackages; with haskellPackages; [
cabal-install
cabal2nix
nodemon
stylish-haskell
tmux
tmuxinator
nodePackages.nodemon
(haskellPackages.ghcWithPackages (p: with p; [
text
bytestring
containers
directory
time
http-conduit
tagsoup
mime-mail
blaze-html
blaze-markup
clay
config-manager
]))
];
};
}
#!/bin/sh
nix-shell --command "tmuxinator local"
......@@ -2,7 +2,7 @@ Name: leboncoin-listener
Version: 0.1
License: GPL-3
License-file: LICENSE
Author: Joris
Author: Joris Guyonvarch
Maintainer: joris@guyonvarch.me
Build-type: Simple
Cabal-version: >= 1.10
......@@ -10,8 +10,8 @@ Cabal-version: >= 1.10
executable leboncoin-listener
Main-is: Main.hs
Hs-source-dirs: src
Default-language: Haskell2010
Ghc-options: -Wall -Werror
Default-language: Haskell2010
Build-depends:
base
......@@ -28,22 +28,17 @@ executable leboncoin-listener
, clay
, config-manager
other-modules:
AdListener
, Conf
, Fetch
, Mail
Other-modules:
Conf
, Model.Ad
, Model.Detail
, Model.Mail
, Model.Resume
, Model.URL
, Page
, Parser.Detail
, Parser.Resume
, Parser.LeboncoinParser
, Parser.OuestFranceParser
, Parser.Utils
, Time
, Service.AdListener
, Service.MailService
, Utils.Either
, View.Html.Ad
, View.Html.Design
, View.Plain.Ad
, Utils.HTTP
, Utils.Time
, View.Ad
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
{-# LANGUAGE OverloadedStrings #-}
module AdListener
( start
) where
import Prelude hiding (error)
import qualified Data.Text.IO as T
import Control.Concurrent (threadDelay)
import qualified Fetch
import Model.Ad
import Model.URL
import Model.Resume
import qualified View.Plain.Ad as P
import qualified View.Html.Ad as H
import Mail
import Model.Mail (Mail(Mail))
import Conf (Conf)
import qualified Conf
import Time (getCurrentFormattedTime)
start :: Conf -> IO ()
start conf = do
resumes <- Fetch.resumes . Conf.urls $ conf
let newURLs = map url resumes
T.putStrLn "Listening to new ads…"
waitListenInterval conf
listenToNewAdsWithViewedURLs conf newURLs
listenToNewAdsWithViewedURLs :: Conf -> [URL] -> IO ()
listenToNewAdsWithViewedURLs conf viewedURLs = do
resumes <- Fetch.resumes . Conf.urls $ conf
let (newURLs, newResumes) = getNewResumes viewedURLs resumes
eitherNewAds <- Fetch.ads newResumes
case eitherNewAds of
Left error -> do
T.putStrLn error
waitListenInterval conf
listenToNewAdsWithViewedURLs conf viewedURLs
Right newAds -> do
time <- getCurrentFormattedTime
if not (null newAds)
then
let message = P.renderConsoleAds conf time newAds
in T.putStrLn message >> sendMail conf newAds
else
return ()
waitListenInterval conf
listenToNewAdsWithViewedURLs conf (viewedURLs ++ newURLs)
sendMail :: Conf -> [Ad] -> IO ()
sendMail conf ads =
let (title, plainBody) = P.renderAds conf ads
htmlBody = H.renderAds conf ads
mail = Mail (Conf.mailFrom conf) (Conf.mailTo conf) title plainBody htmlBody
in Mail.send mail >> return ()
waitListenInterval :: Conf -> IO ()
waitListenInterval = threadDelay . (*) 1000000 . round . Conf.listenInterval
......@@ -5,19 +5,20 @@ module Conf
, Conf(..)
) where
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.ConfigManager as Conf
import Data.Time.Clock (NominalDiffTime)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Clock (NominalDiffTime)
import Model.URL
import Model.URL
data Conf = Conf
{ urls :: [URL]
, mailFrom :: Text
, mailTo :: [Text]
, properties :: [Text]
, listenInterval :: NominalDiffTime
{ leboncoinUrls :: [URL]
, ouestFranceUrls :: [URL]
, mailFrom :: Text
, mailTo :: [Text]
, listenInterval :: NominalDiffTime
, devMode :: Bool
} deriving Show
parse :: FilePath -> IO Conf
......@@ -26,12 +27,13 @@ parse path = do
(flip fmap) (Conf.readConfig path) (\configOrError -> do
conf <- configOrError
Conf <$>
Conf.lookup "urls" conf <*>
Conf.lookup "leboncoinUrls" conf <*>
Conf.lookup "ouestFranceUrls" conf <*>
Conf.lookup "mailFrom" conf <*>
Conf.lookup "mailTo" conf <*>
Conf.lookup "properties" conf <*>
Conf.lookup "listenInterval" conf
Conf.lookup "listenInterval" conf <*>
Conf.lookup "devMode" conf
)
case conf of
Left msg -> error (T.unpack msg)
Right c -> return c
Right c -> return c
{-# LANGUAGE OverloadedStrings #-}
module Fetch
( resumes
, ads
) where
import Data.Text (Text)
import qualified Data.Text.IO as T
import Data.Either (rights)
import Page
import Model.Ad (Ad(Ad))
import Model.Resume (Resume)
import qualified Model.Resume as Resume
import Model.URL (URL)
import qualified Parser.Resume as Resume
import qualified Parser.Detail as Detail
resumes :: [URL] -> IO [Resume]
resumes urls = do
results <- fmap (concat . map Resume.parse . rights) . sequence . map Page.get $ urls
if null results
then T.putStrLn "Parsed 0 results!"
else return ()
return results
ads :: [Resume] -> IO (Either Text [Ad])
ads = fmap sequence . sequence . map ad
ad :: Resume -> IO (Either Text Ad)
ad resume = fmap (\x -> Ad resume (Detail.parse x)) <$> Page.get (Resume.url resume)
......@@ -4,9 +4,8 @@ module Main
( main
) where
import qualified AdListener
import qualified Conf
import qualified Service.AdListener as AdListener
main :: IO ()
main = do
......
module Model.Ad
( Ad(..)
, getNewAds
) where
import Model.Resume
import Model.Detail
import Data.List ((\\))
import Data.Text (Text)
import Model.URL (URL)
data Ad = Ad
{ resume :: Resume
, detail :: Detail
{ name :: Text
, price :: Maybe Text
, url :: URL
} deriving (Eq, Read, Show)
getNewAds :: [URL] -> [Ad] -> ([URL], [Ad])
getNewAds viewdURLs ads =
let newURLs = (map url ads) \\ viewdURLs
newAds = filter (\ad -> elem (url ad) newURLs) ads
in (newURLs, newAds)
module Model.Detail
( Detail(..)
) where
import Data.Text
import Data.Map (Map)
import Model.URL
data Detail = Detail
{ description :: Maybe Text
, images :: [URL]
, properties :: Map Text Text
} deriving (Eq, Read, Show)
......@@ -2,12 +2,11 @@ module Model.Mail
( Mail(..)
) where
import Data.Text (Text)
import Data.Text (Text)
data Mail = Mail
{ from :: Text
, to :: [Text]
, subject :: Text
{ from :: Text
, to :: [Text]
, subject :: Text
, plainBody :: Text
, htmlBody :: Text
} deriving (Eq, Show)
module Model.Resume
( Resume(..)
, getNewResumes
, getURLs
) where
import Data.List ((\\))
import Data.Text (Text)
import Model.URL (URL)
data Resume = Resume
{ name :: Text
, price :: Maybe Text
, url :: URL
, isPro :: Bool
} deriving (Eq, Read, Show)
getNewResumes :: [URL] -> [Resume] -> ([URL], [Resume])
getNewResumes viewdURLs resumes =
let newURLs = (getURLs resumes) \\ viewdURLs
newResumes = filter (\resume -> elem (url resume) newURLs) resumes
in (newURLs, newResumes)
getURLs :: [Resume] -> [URL]
getURLs = map url
module Parser.Detail
( parse
) where
import Data.Text (Text)
import qualified Data.Text as T
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe (catMaybes, fromMaybe)
import Text.HTML.TagSoup
import Model.Detail
import Parser.Utils
parse :: Text -> Detail
parse page =
let tags = parseTags page
in Detail
{ description = parseDescription tags
, images = map (\url -> T.concat [T.pack "https:", url]) $ getTagAttributes "<meta itemprop=image>" (T.pack "content") tags
, properties = parseProperties tags
}
parseDescription :: [Tag Text] -> Maybe Text
parseDescription tags =
let descriptionTags = getTagsBetween "<p itemprop=description>" "</p>" tags
in if null descriptionTags
then
Nothing
else
let replaceBr = map (\tag -> if tag ~== "<br>" then TagText (T.pack "\n") else tag)
in Just . T.strip . renderTags . replaceBr $ descriptionTags
parseProperties :: [Tag Text] -> Map Text Text
parseProperties tags =
let mbUtagData = getTagTextAfter "<script>" . getTagsAfter "</script>" . getTagsAfter "<body>" $ tags
in fromMaybe M.empty (fmap parseUtagData mbUtagData)
parseUtagData :: Text -> Map Text Text
parseUtagData =
M.fromList
. catMaybes
. fmap parseUtag
. T.splitOn (T.pack ",")
. T.takeWhile (/= '}')
. T.drop 1
. T.dropWhile (/= '{')
parseUtag :: Text -> Maybe (Text, Text)
parseUtag utag =
case T.splitOn (T.pack ":") utag of
[x, y] -> Just (T.strip x, removeQuotes y)
_ -> Nothing
removeQuotes :: Text -> Text
removeQuotes =
T.takeWhile (/= '\"')
. T.dropWhile (== '\"')
. T.strip
module Parser.LeboncoinParser
( parse
) where
import Data.Maybe (catMaybes)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Model.Ad (Ad (Ad))
import Parser.Utils
parse :: Text -> [Ad]
parse page =
catMaybes . fmap parseAd $ partitions (~== "<a>") tags
where tags = getTagsBetween "<section class=tabsContent>" "<div class=information-immo>" (parseTags page)
parseAd :: [Tag Text] -> Maybe Ad
parseAd tags = do
name <- getTagTextAfter "<h2 class=item_title>" tags
let price = getTagTextAfter "<h3 class=item_price>" tags
url <- getTagAttribute "<a>" (T.pack "href") tags
return (Ad name price (T.concat [T.pack "https:", url]))
module Parser.OuestFranceParser
( parse
) where
import Data.Maybe (catMaybes)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Model.Ad (Ad (Ad))
import Parser.Utils
parse :: Text -> [Ad]
parse page =
catMaybes . fmap parseAd $ partitions (~== "<a class=annLink>") tags
where tags = getTagsBetween "<div id=listAnnonces>" "<div id=interactions>" (parseTags page)
parseAd :: [Tag Text] -> Maybe Ad
parseAd tags = do
name <- getTagTextAfter "<span class=annTitre>" tags
let price = getTagTextAfter "<span class=annPrix>" tags
let startUrl = T.pack "https://www.ouestfrance-immo.com/"
url <- getTagAttribute "<a>" (T.pack "href") tags
return (Ad name price (T.concat [startUrl, url]))
module Parser.Resume
( parse
) where
import Data.Maybe (catMaybes, isJust)
import Data.List (find)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Model.Resume (Resume(Resume))
import Parser.Utils
parse :: Text -> [Resume]
parse page =
case dropWhile (not . hasClass (T.pack "section") (T.pack "tabsContent")) (parseTags page) of
[] ->
[]
sectionTags ->
let lbcTags = takeWhile (not . hasClass (T.pack "div") (T.pack "information-immo")) sectionTags
in catMaybes . fmap parseResume $ partitions (~== "<a>") lbcTags
parseResume :: [Tag Text] -> Maybe Resume
parseResume item = do
name <- getTagTextAfter "<h2 class=item_title>" item
let price = getTagTextAfter "<h3 class=item_price>" item
url <- getTagAttribute "<a>" (T.pack "href") item
let isPro = isJust . find (~== "<span class=ispro>") $ item
return (Resume name price (T.concat [T.pack "https:", url]) isPro)
......@@ -5,16 +5,14 @@ module Parser.Utils
, getTagAttributes
, getTagAttribute
, getTagTextAfter
, hasClass
) where
import Data.List (find, findIndex)
import Data.Maybe (listToMaybe, catMaybes, isJust)
import Data.Text (Text)
import qualified Data.Text as T
import Data.List (find, findIndex)
import Data.Maybe (catMaybes, listToMaybe)
import Data.Text (Text)
import qualified Data.Text as T
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Match (tagOpen)
import Text.HTML.TagSoup
getTagsBefore :: String -> [Tag Text] -> [Tag Text]
getTagsBefore selector = takeWhile (~/= selector)
......@@ -40,7 +38,7 @@ getTagTextAfter :: String -> [Tag Text] -> Maybe Text
getTagTextAfter selector tags =
case findIndex (~== selector) tags of
Just index -> fmap T.strip $ safeGetAt (index + 1) tags >>= maybeTagText
Nothing -> Nothing
Nothing -> Nothing
maybeTagAttribute :: Text -> Tag Text -> Maybe Text
maybeTagAttribute name (TagOpen _ xs) =
......@@ -49,11 +47,3 @@ maybeTagAttribute _ _ = Nothing
safeGetAt :: Int -> [a] -> Maybe a
safeGetAt index = listToMaybe . drop index
hasClass :: Text -> Text -> Tag Text -> Bool
hasClass selector className =
tagOpen ((==) selector) (isJust . find matchClass)
where matchClass (name, values) =
( name == (T.pack "class")
&& (isJust . find ((==) className) . T.words $ values)
)
{-# LANGUAGE OverloadedStrings #-}
module Service.AdListener
( start
) where
import Control.Concurrent (threadDelay)
import Data.Either (rights)
import qualified Data.Text.IO as T
import Prelude hiding (error)
import Conf (Conf)
import qualified Conf
import Model.Ad (Ad)
import qualified Model.Ad as Ad
import Model.Mail (Mail (Mail))
import Model.URL (URL)
import qualified Parser.LeboncoinParser as LeboncoinParser
import qualified Parser.OuestFranceParser as OuestFranceParser
import qualified Service.MailService as MailService
import qualified Utils.HTTP as HTTP
import qualified Utils.Time as TimeUtils
import qualified View.Ad as Ad
start :: Conf -> IO ()
start conf = do
ads <- fetchAds conf
let newURLs = map Ad.url ads
T.putStrLn "Listening to new ads…"
waitListenInterval conf
listenToNewAdsWithViewedURLs conf newURLs
listenToNewAdsWithViewedURLs :: Conf -> [URL] -> IO ()
listenToNewAdsWithViewedURLs conf viewedURLs = do
ads <- fetchAds conf
let (newURLs, newAds) = Ad.getNewAds viewedURLs ads
time <- TimeUtils.getCurrentFormattedTime
if not (null newAds)
then
do
_ <- T.putStrLn (Ad.renderConsoleAds time newAds)
if Conf.devMode conf
then return ()
else sendMail conf newAds
else
return ()
waitListenInterval conf
listenToNewAdsWithViewedURLs conf (viewedURLs ++ newURLs)
fetchAds :: Conf -> IO [Ad]
fetchAds conf = do
putStrLn . show $ (Conf.leboncoinUrls conf)
putStrLn . show $ (Conf.ouestFranceUrls conf)
leboncoinAds <- fmap (concat . map LeboncoinParser.parse . rights) . sequence . map HTTP.get . Conf.leboncoinUrls $ conf
ouestFranceAds <- fmap (concat . map OuestFranceParser.parse . rights) . sequence . map HTTP.get . Conf.ouestFranceUrls $ conf
let results = leboncoinAds ++ ouestFranceAds
if null results
then T.putStrLn "Parsed 0 results!"
else return ()
return results
sendMail :: Conf -> [Ad] -> IO ()
sendMail conf ads =
let (title, plainBody) = Ad.renderAds ads
mail = Mail (Conf.mailFrom conf) (Conf.mailTo conf) title plainBody
in MailService.send mail >> return ()
waitListenInterval :: Conf -> IO ()
waitListenInterval = threadDelay . (*) 1000000 . round . Conf.listenInterval
{-# LANGUAGE OverloadedStrings #-}
module Mail
module Service.MailService
( send
) where
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import Data.Text.Lazy.Builder (toLazyText, fromText)
import Data.Either (isLeft)
import Control.Arrow (left)
import Control.Exception (SomeException, try)
import Data.Either (isLeft)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Lazy as LT
import Data.Text.Lazy.Builder (fromText, toLazyText)
import qualified Network.Mail.Mime as Mime
import Control.Exception (SomeException, try)
import Control.Arrow (left)
import qualified Network.Mail.Mime as Mime
import Model.Mail (Mail)
import qualified Model.Mail as Mail
import Model.Mail (Mail)
import qualified Model.Mail as Mail
send :: Mail -> IO (Either Text ())
send mail = do
......@@ -32,9 +30,7 @@ getMimeMail mail =
in fromMail
{ Mime.mailTo = map address . Mail.to $ mail
, Mime.mailParts =
[ [ Mime.plainPart . strictToLazy . Mail.plainBody $ mail
, Mime.htmlPart . strictToLazy . Mail.htmlBody $ mail