sitemap.jl 2.12 KB
Newer Older
1 2 3
module Sitemap
using LightXML

Jan Klass's avatar
Jan Klass committed
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
export write_sitemap, read_sitemap

function read_sitemap(filename)
    @info "Reading sitemap file $filename …"

    urls = []

    xdoc = LightXML.parse_file(filename)
    xurlset = LightXML.root(xdoc)
    if LightXML.name(xurlset) != "urlset"
        @error "Invalid sitemap. Expected <urlset>, found <$(LightXML.name(xurlset))>."
        exit(1)
    end
    for xurl in LightXML.child_nodes(xurlset)
        # Apparently we iterate more things than we can see in the XML.
        # Skip anything that is not an element node.
        if !is_elementnode(xurl)
            continue
        end
        if LightXML.name(xurl) != "url"
            continue
            @error "Invalid sitemap. Expected <url>, found <$(LightXML.name(xurl))>."
            exit(1)
        end
        if !has_children(xurl)
            @warn "Invalid sitemap. <url> must have a <loc> child."
            break
        end
        # xurl = XMLElement(xurl)
        url = nothing
        for xloc in LightXML.child_nodes(xurl)
            # Apparently we iterate more things than we can see in the XML.
            # Skip anything that is not an element node.
            if !is_elementnode(xloc)
                continue
            end
            url = LightXML.content(xloc)
            push!(urls, url)
        end
        if url == nothing
            @warn "Sitemap contains invalid url element with missing required loc element."
        end
    end
    LightXML.free(xdoc)

    return urls
end
51

52 53 54 55 56 57 58 59 60 61 62 63
function write_sitemap(filename, urls)
    xdoc = XMLDocument()
    xurlset = create_root(xdoc, "urlset")
    set_attribute(xurlset, "xmlns", "http://www.sitemaps.org/schemas/sitemap/0.9")
    for url in urls
        xurl = new_child(xurlset, "url")
        xloc = new_child(xurl, "loc")
        add_text(xloc, url)
    end
    save_file(xdoc, filename)
end

64 65 66 67 68 69 70 71 72 73 74 75 76
function sort(set::Set)
    vec = Vector()
    for val in set
        push!(vec, val)
    end
    sort!(vec)
    return vec
end

function write_sitemap(filename, urls::Set)
    write_sitemap(filename, sort(urls))
end

77
end