struct CrawlData urls urlsvisited urlsvalid urls404 urls3xx urlsexternal urlsfail urlsignored CrawlData() = new(Set(), Set(), Set(), Set(), Set(), Set(), Set(), Set()) end function readurls(filename) if isfile(filename) return Sitemap.read_sitemap(filename) else nothing end end function read(set::Set, filename) urls = readurls(filename) if urls == nothing return end for url in urls push!(set, url) end end function read_crawldata(data::CrawlData=CrawlData(), filenames::Filenames=Filenames()) CrawlData @info "Reading sitemap files …" read(data.urls, filenames.urls) read(data.urlsvisited, filenames.urlsvisited) read(data.urlsvalid, filenames.urlsvalid) read(data.urls404, filenames.urls404) read(data.urls3xx, filenames.urls3xx) read(data.urlsexternal, filenames.urlsexternal) read(data.urlsfail, filenames.urlsfail) read(data.urlsignored, filenames.urlsignored) return data end function write_crawldata(data::CrawlData, filenames::Filenames=Filenames()) @info "Writing sitemap files …" Sitemap.write_sitemap(filenames.urls, data.urls) Sitemap.write_sitemap(filenames.urlsvisited, data.urlsvisited) Sitemap.write_sitemap(filenames.urlsvalid, data.urlsvalid) Sitemap.write_sitemap(filenames.urls404, data.urls404) Sitemap.write_sitemap(filenames.urls3xx, data.urls3xx) Sitemap.write_sitemap(filenames.urlsexternal, data.urlsexternal) Sitemap.write_sitemap(filenames.urlsfail, data.urlsfail) Sitemap.write_sitemap(filenames.urlsignored, data.urlsignored) end function stringlengths(data::CrawlData) return """#urls: $(length(data.urls)), #visited: $(length(data.urlsvisited)), #valid $(length(data.urlsvalid)), #404 $(length(data.urls404)), #3xx $(length(data.urls3xx)), #external $(length(data.urlsexternal)), #fail $(length(data.urlsfail)), #ignored $(length(data.urlsignored)), """ end