.issuebot.py 20.7 KB
Newer Older
1 2
#!/usr/bin/env python3

3
import configparser
4 5
import git
import gitlab
6
import github
7
import glob
8
import hashlib
9 10 11
import os
import re
import requests
12
import shutil
13 14
import subprocess
import sys
Hans-Christoph Steiner's avatar
Hans-Christoph Steiner committed
15
import time
16
import traceback
17

18 19
from androguard.core.bytecodes.apk import APK
from fdroidserver import net
20
from urllib.parse import parse_qs, urlparse
21

22
JAVA_PACKAGENAME = '''(?:[a-zA-Z]+(?:\d*[a-zA-Z_]*)*)(?:\.[a-zA-Z]+(?:\d*[a-zA-Z_]*)*)+'''
23
GPLAY_PATTERN = re.compile('http[s]?://play\.google\.com/store/apps/details\?id=' + JAVA_PACKAGENAME)
24
GIT_PATTERN = re.compile('http[s]?://(codeberg.org|github.com|gitlab.com|bitbucket.org|git.code.sf.net)/[\w.-]+/[\w.-]+')
25
PACKAGE_ID_PATTERN = re.compile('(APPLICATION|PACKAGE) ?(ID|NAME):?\s*' + JAVA_PACKAGENAME, re.IGNORECASE | re.DOTALL)
26 27
# find all repositories that use plain HTTP urls (e.g. not HTTPS)
HTTP_GRADLE_PATTERN = re.compile('repositories\s*{[^}]*http://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+[^}]*}', re.DOTALL)
28
METADATA_PATTERN = re.compile('metadata/' + JAVA_PACKAGENAME + '\.yml$')
29
IZZYSOFT_PATTERN = re.compile(b'''.*<a [^>]* href=['"](.+)['"][^>]*>Download.*''')
30

31 32
HEADERS = {'User-Agent': 'F-Droid Issuebot'}

33
PERSONAL_ACCESS_TOKEN = os.getenv('PERSONAL_ACCESS_TOKEN')
34
VIRUSTOTAL_API_KEY = os.getenv('VIRUSTOTAL_API_KEY')
35 36 37 38 39 40 41

def _requests_get(url):
    return requests.get(url, headers=HEADERS)

def _requests_head(url):
    return requests.head(url, headers=HEADERS)

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
def download_file(url):
    try:
        net.download_file(url, dldir='.')
    except requests.exceptions.HTTPError as e:
        print(e)

def get_apk_from_github(url, labels):
    if url.startswith('https://github.com/'):
        token = os.getenv('GITHUB_TOKEN')
        g = github.Github(token)
        repo = g.get_repo('/'.join(url.split('/')[3:5]))
        try:
            release = repo.get_latest_release()
            for asset in release.get_assets():
                if asset.browser_download_url.endswith('.apk'):
                    print('Downloading from %s/releases/latest' % url)
                    download_file(asset.browser_download_url)
                    labels.add('in-github-releases')
        except github.GithubException as e:
            print(e)

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
def get_virustotal_status(apkfilename, sha256, headers=None, apikey=None):
    """Get the status of the APK on VirusTotal, uploading as needed

    returns the JSON info from VirusTotal
    """
    if not headers:
        headers = {
            "User-Agent": "F-Droid"
        }
    params = {
        'apikey': VIRUSTOTAL_API_KEY,
        'resource': sha256,
    }
    needs_file_upload = False
    response = dict()
    while True:
        r = requests.post('https://www.virustotal.com/vtapi/v2/file/report',
                          params=params, headers=headers)
        if r.status_code == 200:
            response = r.json()
            if response['response_code'] == 0:
                needs_file_upload = True
            break
        elif r.status_code == 204:
            print('Waiting for VirusTotal rate limiting...')
            time.sleep(10)  # wait for public API rate limiting
89 90
        elif r.status_code == 403:
            print('VirusTotal throws a 403 (API key invalid?)')
Izzy's avatar
Izzy committed
91
            break;
92
        else:
93 94
            try:
                r.raise_for_status()
Hans-Christoph Steiner's avatar
Hans-Christoph Steiner committed
95
            except Exception:
96 97
                print(re.sub(r'apikey=[0-9a-f]+', '', traceback.format_exc()))
                sys.exit(1)
98 99 100 101 102 103 104 105 106 107 108

    if needs_file_upload:
        print('Uploading ' + apkfilename + ' to virustotal')
        files = {
            'file': (os.path.basename(apkfilename), open(apkfilename, 'rb'))
        }
        r = requests.post('https://www.virustotal.com/vtapi/v2/file/scan',
                          params=params, headers=headers, files=files)
        response = r.json()
    return response

109

110 111 112 113 114 115
gl = gitlab.Gitlab('https://gitlab.com', api_version=4,
                   private_token=PERSONAL_ACCESS_TOKEN)
rfp = gl.projects.get('fdroid/rfp')

os.makedirs('build', exist_ok=True)
os.makedirs('metadata', exist_ok=True)
116 117
os.makedirs('repo', exist_ok=True)
with open('fdroid-icon.png', 'w') as fp:
118
    fp.write('')
119 120
with open('config.py', 'w') as fp:
    fp.write('repo_pubkey = "deadbeef"\n')
121
    fp.write('make_current_version_link = False\n')
122 123
    fp.write('androidobservatory = True\n')
    if VIRUSTOTAL_API_KEY:
Hans-Christoph Steiner's avatar
Hans-Christoph Steiner committed
124
        fp.write('virustotal_apikey = "%s"\n' % VIRUSTOTAL_API_KEY)
125

126
processed = 0
127
# per_page: by default, GitLab returns only 20 entries. At max unfortunately 100
128
for issue in rfp.issues.list(state='opened',order_by='updated_at',per_page=250):
129
    note = ''
130
    security = ''
131
    labels = set(issue.labels)
132
    if 'fdroid-bot' in labels:
133
        # print('Skipping %s (already has fdroid-bot label).' % issue.title)
134
        continue
135
    if processed > 21:
136 137 138
        print('Processed %d app, quitting.' % processed)
        break
    processed += 1
139
    labels.add('fdroid-bot')
140

141 142
    print('Checking', issue.title)

143 144 145
    git_urls = set()
    for m in GIT_PATTERN.finditer(issue.description):
        url = m.group(0)
146 147
        if url.startswith('http://'):
            url = 'https://' + url[7:]
148 149 150
        if not url.startswith('https://gitlab.com/fdroid/'):
            git_urls.add(url)
            labels.add('git-url')
151 152
    for git_url in git_urls:
        get_apk_from_github(git_url, labels)
153

154 155 156 157 158
    appid = None
    gplay_urls = set()
    for m in GPLAY_PATTERN.finditer(issue.description):
        gplay_urls.add(m.group(0))
    for url in sorted(gplay_urls):
159
        appid = parse_qs(urlparse(url).query)['id'][0]
160 161 162 163
        cmd = ['gplaycli', '-d', appid]
        print('$ %s' % ' '.join(cmd))
        sp = subprocess.run(cmd, stderr=subprocess.STDOUT)
        print(sp.stdout)
164 165 166 167 168 169

    if not appid:
        m = PACKAGE_ID_PATTERN.search(issue.description)
        if m:
            appid = m.group(0).split(':')[1].strip()

170 171 172 173 174 175 176
    if not appid:
        for url in git_urls:
            cmd = ['fdroid', 'import', '-Wwarn', '--url', url]
            print('$ %s' % ' '.join(cmd))
            p = subprocess.run(cmd, stderr=subprocess.STDOUT, universal_newlines=True)
            if p.returncode == 0:
                for f in glob.glob(os.path.join('metadata', '*.yml')):
177
                    with open(f, errors='surrogateescape') as fp:
178 179 180 181
                        if url in fp.read():
                            m = METADATA_PATTERN.search(f)
                            if m:
                                appid = m.group(0)[9:-4]
182
            elif p.stdout:
183 184 185 186
                    note += '\n\n## fdroid import\n\n'
                    note += '%s:\n\n```console\n$ %s\n' % (url, ' '.join(cmd))
                    note += p.stdout + '\n```\n\n'

187 188 189 190 191 192 193 194
    if not appid:
        for f in glob.glob('*.apk'):
            try:
                appid = APK(f).get_package()
                break
            except Exception as e:
                print(e)

195
    apkfilename = None
196
    if appid:
197
        apkfilename = appid + '.apk'
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
        fdroiddata_url = 'https://gitlab.com/fdroid/fdroiddata/blob/master/metadata/' + appid + '.yml'
        r = _requests_head(fdroiddata_url)
        if r.status_code == 200:
            note += 'Closing issue, since I found %s in F-Droid.  ' % appid
            note += 'If you think I made a mistake, please reopen this issue! For more info:\n\n'
            note += '* [fdroiddata](%s)\n' % fdroiddata_url
            note += '* [F-Droid wiki](https://f-droid.org/wiki/page/%s)\n' % appid
            note += '* [F-Droid GitLab](%s)\n' \
                    % ('https://gitlab.com/search?group_id=28397&scope=issues&search=' + appid)
            labels.add('in-fdroiddata')
            issue.state_event = 'close'
        url = 'https://f-droid.org/packages/' + appid
        r = _requests_head(url)
        if r.status_code == 200:
            note += '* [f-droid.org](%s)\n' % url
            labels.add('in-f-droid.org')
214

215 216
        gplay_urls.add('https://play.google.com/store/apps/details?id=' + appid)
        urls = set(gplay_urls)
217 218
        izzysoft_url = 'https://apt.izzysoft.de/fdroid/index/apk/' + appid
        urls.add(izzysoft_url)
219
        for url in urls:
220
            r = _requests_head(url)
221 222 223 224 225 226
            if r.status_code == 200:
                note += '* found %s\n' %url
                if url.startswith('https://play.google.com'):
                    labels.add('in-google-play')
                elif url.startswith('https://apt.izzysoft.de'):
                    labels.add('in-izzysoft')
227 228 229 230 231 232 233 234
                    print('looking for', apkfilename, 'in izzysoft')
                    if not os.path.exists(apkfilename):
                        r = _requests_get(url)
                        m = IZZYSOFT_PATTERN.search(r.content)
                        if m:
                            dl_url = 'https://apt.izzysoft.de' + m.group(1).decode()
                            print('Downloading', dl_url)
                            # the stream=True parameter keeps memory usage low
235
                            r = requests.get(dl_url, stream=True, allow_redirects=True,
236 237 238 239 240 241 242
                                             headers=HEADERS)
                            if r.status_code == 200:
                                with open(apkfilename, 'wb') as fp:
                                    for chunk in r.iter_content(chunk_size=64*1024):
                                        if chunk:  # filter out keep-alive new chunks
                                            fp.write(chunk)
                                    fp.flush()
243

244 245
    repo = None
    for url in sorted(git_urls):
246
        r = _requests_head(url)
247 248
        if r.status_code == 200:
            if appid:
249 250 251 252
                metadata_file = os.path.join('metadata', appid + '.yml')
                if not os.path.exists(metadata_file):
                    with open(metadata_file, 'w') as fp:
                        fp.write('RepoType: git\nRepo: %s\n' % url)
253
                repo_path = os.path.join('build', appid)
254 255 256 257 258
                if os.path.exists(repo_path):
                    repo = git.Repo(repo_path)
                else:
                    print('$ git clone', url, repo_path)
                    repo = git.Repo.clone_from(url, repo_path)
259

260
                print('Checking for gradle')
261 262 263
                insecure_repositories = set()
                for root, dirs, files in os.walk(repo_path):
                    for f in files:
264
                        if f.endswith('.gradle') or f.endswith('.gradle.kts'):
265
                            labels.add('gradle')
266
                            path = os.path.join(root, f)
267 268 269 270 271 272 273
                            if os.path.exists(path):
                                with open(path, errors='surrogateescape') as fp:
                                    data = fp.read()
                                for url in HTTP_GRADLE_PATTERN.findall(data):
                                    print('Found plain HTTP URL for gradle repository:\n%s\n%s'
                                          % (path, url))
                                    insecure_repositories.add(url)
274 275 276 277 278 279 280 281 282 283 284 285 286
                        elif f.endswith('.java'):
                            labels.add('java')
                        elif f.endswith('.kt'):
                            labels.add('kotlin')
                        elif f == 'App.js' or f == 'App.jsx':
                            labels.add('react-native')
                        elif f == 'Android.mk' or f == 'Application.mk':
                            labels.add('ndk')
                        elif f.endswith('.cc') or f.endswith('.cpp'):
                            labels.add('c++')
                    for d in dirs:
                        if d == 'fastlane':
                            labels.add('fastlane')
287 288 289 290 291 292
                if insecure_repositories:
                    security += ('* gradle build uses %d plain HTTP URLs for repositories!  This is insecure! See: %s\n'
                                 % (len(insecure_repositories),
                                    'https://max.computer/blog/how-to-take-over-the-computer-of-any-java-or-clojure-or-scala-developer/'))
                    for url in insecure_repositories:
                        security += '  * %s\n' % url
293
                    labels.add('insecure-repositories')
294

295 296
                prop = os.path.join(repo_path, 'gradle', 'wrapper', 'gradle-wrapper.properties')
                if os.path.exists(prop):
297
                    labels.add('gradle')
298
                    with open(prop, errors='surrogateescape') as fp:
299 300 301
                        propdata = fp.read()
                    config = configparser.ConfigParser()
                    config.read_string('[DEFAULT]\n' + propdata)  # fake a INI file
302
                    gradle_url = config['DEFAULT']['distributionUrl'].replace('\\', '')
303 304 305 306 307 308 309

                    gradle_jar = os.path.join(repo_path, 'gradle', 'wrapper', 'gradle-wrapper.jar')
                    hasher = hashlib.sha256()
                    with open(gradle_jar, 'rb') as fp:
                        hasher.update(fp.read())
                    sha256 = hasher.hexdigest().encode()

310 311
                    wrapper_url = re.sub(r'(bin|all).zip', r'wrapper.jar', gradle_url)
                    r = requests.get(wrapper_url + '.sha256', allow_redirects=True, headers=HEADERS)
312 313
                    if r.status_code == 200:
                        if not r.content.startswith(sha256):
314 315 316
                            note += ('* _gradle/wrapper/gradle-wrapper.jar_ does not match the wrapper version ('
                                     + gradle_url + ').  See update and verify instructions: '
                                     + 'https://docs.gradle.org/current/userguide/gradle_wrapper.html#sec:upgrading_wrapper')
317 318 319 320
                            s = _requests_get('https://gradle.org/release-checksums/')
                            if s.status_code == 200:
                                if sha256 not in s.content:
                                    labels.add('insecure-gradlew')
321
                                    security += ('* Custom _gradle/wrapper/gradle-wrapper.jar_, not found '
322 323 324 325 326 327 328
                                                 + 'in https://gradle.org/release-checksums: '
                                                 + sha256.decode()+ '\n')
                            else:
                                security += ('* failed to fetch https://gradle.org/release-checksums\n')
                    else:
                        labels.add('insecure-gradlew')
                        security += ('* _gradle/wrapper/gradle-wrapper.properties_ contains wrapper not on'
329
                                     + 'official website: ' + wrapper_url + '\n')
330

331
                    if gradle_url.startswith('http:'):
332
                        labels.add('insecure-gradlew')
333
                        security += '* Insecure HTTP gradle download, use _%s_!\n' \
334 335
                                    % gradle_url.replace('http:', 'https:')
                    if not gradle_url.startswith('https://services.gradle.org/distributions/gradle-'):
336 337
                        labels.add('insecure-gradlew')
                        security += ('* _gradle/wrapper/gradle-wrapper.properties_ uses non-standard source '
338
                                     + 'for downloading gradle: ' + gradle_url)
339
                    if 'distributionSha256Sum' not in config['DEFAULT']:
340
                        labels.add('insecure-gradlew')
341
                        security += ('* _gradle/wrapper/gradle-wrapper.properties_ is missing [distributionSha256Sum]('
342 343
                                     + 'https://docs.gradle.org/current/userguide/gradle_wrapper.html#sec:verification), '
                                     + 'unverified gradle download!')
344
                        r = requests.get(gradle_url + '.sha256', allow_redirects=True, headers=HEADERS)
345
                        if r.status_code == 200:
346
                            security += (' Here is an example of how to fix this:\n\n'
347
                                         + '```properties\n{propdata}\ndistributionSha256Sum={sha256}\n```\n\n'
348
                                         .format(propdata=propdata.strip(), sha256=r.content.decode()))
349 350
                        else:
                            security += '\n\n'
351 352 353 354 355 356 357 358 359 360 361 362

                cmd = ['fdroid', 'scanner', '--verbose', appid]
                print('$ %s' % ' '.join(cmd))
                scanner_output = subprocess.check_output(cmd, stderr=subprocess.STDOUT).decode()
                if 'WARNING:' in scanner_output:
                    labels.add('scanner-warning')
                if 'ERROR:' in scanner_output:
                    labels.add('scanner-error')
                note += '\n\n## fdroid scanner\n\n'
                note += '%s:\n\n```console\n$ %s\n' % (url, ' '.join(cmd))
                note += scanner_output + '\n```\n\n'

363 364
                break

365
    report = ''
366
    if repo:
367
        print('Scanning tag/branch names')
368 369 370 371 372 373 374
        for tag in repo.tags:
            suspicious = set()
            for c in re.findall(r'[^a-zA-Z0-9.,()/+_#@-]', tag.name):
                suspicious.add(c)
            if suspicious:
                report += "* %s (%s)\n" % (tag, ''.join(sorted(suspicious)))
        if report:
375
            report = '\n\n### Suspicious tag/branch names\n\n' + report
376 377
            labels.add('suspicious-names')

378 379
    if security or report:
        note += '\n\n## Security Issues\n\n' + security + '\n\n' + report
380

381 382 383 384
    scanners = ''
    if appid:
        scanners += '* [APK Scan %s](https://apkscan.org/?searchby=pkg&q=%s)\n' \
                    % (appid, appid)
385
        if os.path.exists(apkfilename):
386
            hasher = hashlib.sha256()
387
            with open(apkfilename, 'rb') as fp:
388 389 390 391 392 393 394
                while True:
                    chunk = fp.read(65536)
                    if not chunk:
                        break
                    hasher.update(chunk)
            sha256 = hasher.hexdigest()
            scanners += '* [APK Scan](https://apkscan.org/?searchby=hash&q=%s)\n' % sha256
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417

            print('Checking VirusTotal')
            response = get_virustotal_status(apkfilename, sha256, apikey=VIRUSTOTAL_API_KEY)
            if response.get('response_code') is None:
                print('No response from VirusTotal')
            elif response['response_code'] < 0:
                labels.add('in-virustotal')
                scanners += ('* ' + apkfilename + ': [' + response.get('verbose_msg')
                             + '](https://www.virustotal.com/file/' + response['resource'] + ')\n')
            elif response['response_code'] == 0:
                scanners += ('* ' + apkfilename + ' has been uploaded to VirusTotal: ['
                             + response['verbose_msg'] + '](' + response['permalink'] + ')\n')
            else:
                if response.get('positives', 0) == 0:
                    labels.add('in-virustotal')
                    scanners += ('* ' + apkfilename + ' has [not been flagged by VirusTotal]('
                                 + response['permalink'] + ')\n')
                else:
                    labels.add('in-virustotal')
                    labels.add('flagged-by-virustotal')
                    scanners += ('* ' + apkfilename + ' has been [flagged by VirusTotal '
                               + str(response['positives']) + ' times]('
                               + response['permalink'] + ')\n')
418

419 420
        t = _requests_get('https://reports.exodus-privacy.eu.org/api/trackers')
        r = _requests_get(('https://reports.exodus-privacy.eu.org/api/search/%s' % appid))
421 422 423 424 425 426 427 428 429 430 431 432
        if t.status_code == 200 and r.status_code == 200:
            try:
                trackers = t.json()['trackers']
                reports = r.json()
                if appid in reports:
                    exodus = ''
                    reports = r.json()[appid]['reports']
                    if len(reports) > 0:
                        for trackernum in reports[0]['trackers']:
                            tracker = trackers[str(trackernum)]
                            exodus += '* [%s](%s)\n' % (tracker['name'], tracker['website'])
                    if exodus:
433
                        labels.add('trackers')
434 435 436 437 438 439 440 441
                        scanners += '### [Exodus Privacy](https://reports.exodus-privacy.eu.org/search)\n\n'
                        scanners += exodus
            except Exception as e:
                print(appid, '-', e)

    if scanners:
        note += '\n\n## External Scanners\n\n' + scanners + '\n\n'

442 443 444 445 446 447 448 449 450 451
    emojis = issue.awardemojis.list()
    add_robot = True
    for emoji in emojis:
        if emoji.name == 'robot':
            add_robot = False
            break
    if add_robot:
        issue.awardemojis.create({'name': 'robot'})

    if appid:
452
        labels.add(appid)
453
    if labels:
454
        issue.labels = sorted(labels)
455 456 457 458

    if note:
        n = issue.notes.create({'body': note})
    issue.save()
459 460 461 462 463 464 465 466 467 468 469

# attempt to upload all APKs to APK Scan
cmd = 'fdroid update --create-metadata --nosign'
print('$ ' + cmd)
for f in glob.glob('*.apk'):
    shutil.move(f, 'repo')
subprocess.call(cmd.split())

cmd = 'fdroid server update --verbose'
print('$ ' + cmd)
subprocess.call(cmd.split())
470 471

print('DONE')