...
 
Commits (2)
......@@ -35,13 +35,16 @@ print(c2.data)
## Changelog
2020.2.23.1.9000
2020.2.23.1.9001
* Scraper now gets ads' post IDs from ad URLs. Before, a deep scrape was
required to get post IDs.
* Data columns are rearranged so `post_id` and `datetime_scr` appear first.
* `datatime_scr` now contains seconds, so it will differ across pages if
`deep=False` or across ads if `deep=True`.
2020.2.23.1
* First release.
......
......@@ -189,6 +189,7 @@ class CLSearch:
"""Go to next results page."""
self.url = self.next_page_url
self.__navigate()
self.__find_next_page()
def __navigate(self):
"""Navigate the scraper.
......@@ -204,7 +205,6 @@ class CLSearch:
print(f"Parsing {self.url}\n")
self.reqc = r.content
self.soup = BeautifulSoup(self.reqc, "html.parser")
self.__find_next_page()
sleep(1 + uniform(1, 5)) # be polite
def __build_url(self, suffix):
......@@ -234,8 +234,9 @@ class CLSearch:
@staticmethod
def __get_datetime():
return datetime.now().strftime("%Y-%m-%d %H:%M")
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if __name__ == "__main__":
pass
s = CLSearch(geo='newjersey', query="'no section 8'")
df = s.data
......@@ -18,7 +18,7 @@ URL = 'https://gitlab.com/everetr/craigapts'
EMAIL = ''
AUTHOR = 'Everet Rummel'
REQUIRES_PYTHON = '>=3.7.0'
VERSION = '2020.2.23.1'
VERSION = '2020.2.23.1.9001'
# What packages are required for this module to be executed?
REQUIRED = [
......