check.jl 935 Bytes
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
function checkurl(url)
    res = HTTP.get(url; readtimeout=2, redirect=false, status_exception=false, require_ssl_verification=false)
    if res.status == 200
        @info "Valid URL $url"
    elseif HTTP.isredirect(res)
        @info "Invalid: Redirect at $url"
    else
        @info "Invalid: HTTP status code $(res.status) at $url"
    end
end

function rebase(url, base)
    m = match(r"^(?<protocol>[a-zA-Z0-9]+)\:\/\/(?<host>[^\/]+)\/(?<path>.*)$", url)
    return base * m[:path]
end

function check(base)
Jan Klass's avatar
Jan Klass committed
18
    urls = Sitemap.read_sitemap(out_sitemap)
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33

    @info "Found $(length(urls)) URLs"
    @info "Checking URLs …"
    for url in urls
        if length(base) > 0
            
            @debug "Checking URL $url …"
            url = rebase(url, base)
            checkurl(url)
        else
            @debug "Checking URL $url …"
            checkurl(url)
        end
    end
end