crawl-sitemap.jl 936 Bytes
Newer Older
1
2
using HTTP

3
include("Sitemap.jl")
Jan Klass's avatar
Jan Klass committed
4
5
include("filenames.jl")
include("crawldata.jl")
6
7
8
include("crawl.jl")
include("check.jl")

9
10
11
12
13
function print_usage()
    println("Usage: <action> <param>")
    println("<action>:")
    println("  crawl <base URL>")
    println("  check <base URL>")
14
    println("For example: crawl https://example.org/")
15
16
end

Jan Klass's avatar
Jan Klass committed
17
filenames = Filenames()
18

19
20
if length(ARGS) != 2
    @error "Parameters <action> and <param> are required"
21
    print_usage()
22
23
24
25
    exit(1)
end
action = ARGS[1]
param = ARGS[2]
26

27
28
29
30
31
32
33
34
try
    if action == "crawl"
        # TODO: Make sure to use trailing slash
        crawl_and_generate(param; forcehttps=true)
    elseif action == "check"
        check(param)
    else
        @error "Unknown action $action"
35
        print_usage()
36
37
38
39
40
41
42
        exit(1)
    end
catch ex
    if isa(ex, InterruptException)
        @info "Cancelled via interrupt"
    end
    throw(ex)
43
end