getzip.py 5.24 KB
Newer Older
Douglas Goodwin's avatar
Douglas Goodwin committed
1
from ftplib import FTP
2
import sys, shutil, time
Douglas Goodwin's avatar
Douglas Goodwin committed
3
from os import stat
4
from string import Template
Douglas Goodwin's avatar
Douglas Goodwin committed
5 6
from datetime import datetime
import git
7
import pytz
Douglas Goodwin's avatar
Douglas Goodwin committed
8
from zipfile import ZipFile
9
from texttable import Texttable
10 11
import pandas as pd

12
from myconfig import ftp_server, ftp_user, ftp_pass, remotepath, DEBUG, repodir
Douglas Goodwin's avatar
Douglas Goodwin committed
13 14 15

# this script grabs Metro's latest GTFS archive for Nextrail,
# unpacks the archive and builds a README.md file.
Douglas Goodwin's avatar
Douglas Goodwin committed
16 17
# it then unpacks the files, commits them into the repository
# and pushes the changes to https://gitlab.com/LACMTA/gtfs_rail
Douglas Goodwin's avatar
Douglas Goodwin committed
18 19 20

myoutf="gtfs_rail.zip"
r = git.Repo(repodir)
21

Douglas Goodwin's avatar
Douglas Goodwin committed
22 23 24
ftp = FTP(ftp_server)
ftp.login(ftp_user, ftp_pass)

25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
def file_len(fname):
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1

def updateTerminalPickups(stoptimesfile='stop_times.txt',DEBUG=False):
    stop_times = stoptimesfile
    fieldnames = ['trip_id','arrival_time','departure_time','stop_id','stop_sequence','stop_headsign','pickup_type','drop_off_type']
    # create a dataframe from the stop_times_sorted.txt file
    df = pd.read_csv(stop_times, usecols=fieldnames)
    # Let's set the `pickup_type` column to 1 (indicating no pickup) on the last stop for every trip
    df.loc[ df.groupby(['trip_id'], sort=False)['stop_sequence'].transform('idxmax'),'pickup_type' ]=1
    # create the new csv
    df.to_csv('stop_times.csv', columns=fieldnames, index=False, header=True)
    # update the file
    if (int( file_len('stop_times.csv') ) == int( file_len(stoptimesfile) )):
        if DEBUG:
            print('done!')
        shutil.move('stop_times.csv', stoptimesfile)
    else:
        print('file length different')

Douglas Goodwin's avatar
Douglas Goodwin committed
48
def writeREADME(tpl='README.tpl', outfile='README.md', thevars={ 'thefiletable':'', 'thetimestamp':'', 'thegitlog':'', 'ts':'' }):
49 50 51 52 53 54 55 56 57 58
    #open the template file
    template_file = open( tpl )
    src = Template( template_file.read() )
    #do the substitution
    README = src.substitute(thevars)

    f = open(outfile,"w")
    f.write( README )
    f.close()

59 60 61 62 63 64 65 66 67 68 69
def writeFEEDINFO(tpl='feed_info.tpl', outfile='feed_info.txt', thevars={}):
    #open the template file
    template_file = open( tpl )
    src = Template( template_file.read() )
    #do the substitution
    FEEDINFO = src.substitute(thevars)

    f = open(outfile,"w")
    f.write( FEEDINFO )
    f.close()

Douglas Goodwin's avatar
Douglas Goodwin committed
70 71 72 73 74 75 76 77 78 79 80 81 82 83
def dos2unix(afile,DEBUG=False):
    content = ''
    outsize = 0
    with open(afile, 'rb') as infile:
        content = infile.read()
    infile.close()
    with open(afile, 'wb') as outfile:
        for line in content.splitlines():
            outsize += len(line) + 1
            outfile.write(line + b'\n')
    outfile.close()
    if DEBUG:
        print("Done. Stripped %s bytes." % (len(content)-outsize))

Douglas Goodwin's avatar
Douglas Goodwin committed
84 85 86 87 88 89 90 91 92
def getbinary(ftp, remotedir="/", remotef="hi.zip", outfile=None):
    # fetch a binary file
    ftp.cwd(remotedir)
    if outfile is None:
        outfile = sys.stdout
    else:
        with open(outfile, 'wb') as f:
            ftp.retrbinary("RETR " + remotef, f.write)

Douglas Goodwin's avatar
Douglas Goodwin committed
93
resp = getbinary(ftp, remotedir=remotepath, remotef="GTFS_Rail_Nextrain.zip", outfile="gtfs_rail.zip")
94
if DEBUG: print( "ftp server reponse: %s" %(resp))
Douglas Goodwin's avatar
Douglas Goodwin committed
95 96
ftp.quit()

97 98 99 100 101 102 103
# add the feed_info.txt file
zf = ZipFile(myoutf, mode='a')
try:
    zf.write('feed_info.txt')
finally:
    zf.close()

Douglas Goodwin's avatar
Douglas Goodwin committed
104
# let's work with the zip file
Douglas Goodwin's avatar
Douglas Goodwin committed
105 106 107
zobj = ZipFile(myoutf)
stats = stat(myoutf)
dt = datetime.fromtimestamp(stats.st_ctime)
Douglas Goodwin's avatar
Douglas Goodwin committed
108 109 110 111 112

tzstr = "America/Los_Angeles"
fmt = '%Y-%m-%d %H:%M:%S %Z'
dt = datetime.now(pytz.timezone(tzstr))
version = dt.strftime(fmt) + ' ' + tzstr
Douglas Goodwin's avatar
Douglas Goodwin committed
113
ts = time.time()
Douglas Goodwin's avatar
Douglas Goodwin committed
114

Douglas Goodwin's avatar
Douglas Goodwin committed
115
# unzip the contents
Douglas Goodwin's avatar
Douglas Goodwin committed
116 117
zobj.extractall()

118 119 120
# fix the pickup codes
updateTerminalPickups(stoptimesfile='stop_times.txt',DEBUG=DEBUG)

Douglas Goodwin's avatar
Douglas Goodwin committed
121
# let's update the README.md file
Douglas Goodwin's avatar
Douglas Goodwin committed
122 123 124 125 126 127 128 129 130 131
flist = zobj.namelist()

table = Texttable()
# horizontal lines, vertical lines, intersection points of these lines, and the header line
table.set_chars(['','','','-'])
table.set_deco(table.HEADER | table.VLINES)
table.set_cols_align(["r", "c", "l"])
tablelist = []
tablelist.append(["Length", "Creation datetime", "Name"])

132 133 134 135
writeFEEDINFO(tpl='feed_info.tpl', outfile='feed_info.txt', thevars={})

print(flist)

Douglas Goodwin's avatar
Douglas Goodwin committed
136
# get the contents and add the files to the commit list
Douglas Goodwin's avatar
Douglas Goodwin committed
137
for fi in flist:
Douglas Goodwin's avatar
Douglas Goodwin committed
138 139
    dos2unix(fi,DEBUG)
    r.index.add([fi])
Douglas Goodwin's avatar
Douglas Goodwin committed
140 141 142 143 144 145
    dtup = zobj.getinfo(fi).date_time
    dtstr = "%d-%02d-%02d %02d:%02d" %(dtup[0],dtup[1],dtup[2],dtup[3],dtup[4])
    thisrow = [zobj.getinfo(fi).file_size, dtstr, fi]
    tablelist.append(thisrow)

table.add_rows(tablelist)
146
thefiletable = table.draw()
147
if DEBUG: print(thefiletable)
Douglas Goodwin's avatar
Douglas Goodwin committed
148

149 150 151 152 153 154 155
# git log -1  --stat
thegitlog = r.git.log('-1', '--stat')

tplvars = {
    'thefiletable':thefiletable,
    'thetimestamp':version,
    'thegitlog':thegitlog,
Douglas Goodwin's avatar
Douglas Goodwin committed
156
    'ts':ts,
157 158 159
    }

writeREADME(tpl='README.tpl', outfile='README.md', thevars=tplvars)
Douglas Goodwin's avatar
Douglas Goodwin committed
160

161
# did more than one file (the README.md) change?
162
files_changed=r.git.diff('--name-only')
163
if (".zip" in files_changed):
164 165
    # add the README.md file to the commit list
    r.index.add(["README.md"])
166
    r.index.add(["feed_info.txt"])
Douglas Goodwin's avatar
Douglas Goodwin committed
167

168 169
    # add the original ZIPFILE file to the commit list
    r.index.add([myoutf])
Douglas Goodwin's avatar
Douglas Goodwin committed
170

171 172
    # commit all the changes
    r.index.commit(version)
Douglas Goodwin's avatar
Douglas Goodwin committed
173

174 175 176
    #pull and push
    origin = r.remotes.origin
    pullinfo = origin.pull()
177
    # if DEBUG: print (pullinfo)
178
    pushinfo = origin.push()
179
    if DEBUG: print (pushinfo)