include all images from Wordpress that are still used in posts

Aka the 'wp-content' folder. This script was used to scrape this:

#!/usr/bin/env python3

import re, os, glob, subprocess

for d in glob.glob('/home/hans/code/fdroid/fdroid-website/_*'):
    for root, dirs, files in os.walk(d):
        if root.endswith('.git'):
            continue
        for f in files:
            if f[-3:] in ('png', 'peg', 'jpg', 'ttf', 'svg', 'gif'):
                continue
            #print(f)
            with open(os.path.join(root, f), encoding='utf-8') as fp:
                data = fp.read()
            for m in re.finditer(r'(https://f-droid.org/wp-[^ "]+)', data):
                url = m.group(1)
                dldir = os.path.dirname(url.replace('https://f-droid.org/', ''))
                print('URL', url, dldir)
                os.makedirs(dldir, exist_ok=True)
                subprocess.call(['wget', '--continue', url,
                                 '--output-document', url.replace('https://f-droid.org/', '')])
parent 9aa1f451
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment