...
 
Commits (2)
class Base64Converter:
def __init__(self):
self.d = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
def encode(self, f):
s = ''
data = f.read(3)
while data:
s += self.encode3bytes(data)
data = f.read(3)
return s
def decode(self, f):
ds = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
digits = {}
for pos, digit in enumerate(ds):
digits[digit] = pos
bs = bytes()
data = f.read(4)
while data:
bs += self.decode4chars(data, digits)
data = f.read(4)
return bs
def encode3bytes(self, bytes3):
if not isinstance(bytes3, bytes) or len(bytes3) < 1 or len(bytes3) > 3:
raise ValueError('Input should be 1 to 3 bytes')
d = self.d
b1 = bytes3[0]
index1 = b1 >> 2
if len(bytes3) == 1:
index2 = (b1 & 3) << 4
return f'{d[index1]}{d[index2]}=='
b2 = bytes3[1]
index2 = (b1 & 3) << 4 | b2 >> 4
if len(bytes3) == 2:
index3 = (b2 & 15) << 2
return f'{d[index1]}{d[index2]}{d[index3]}='
b3 = bytes3[2]
index3 = (b2 & 15) << 2 | (b3 & 192) >> 6
index4 = b3 & 63
return f'{d[index1]}{d[index2]}{d[index3]}{d[index4]}'
def decode4chars(self, s, d):
ds = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
if not isinstance(s, str) or len(s) != 4 or \
not all([ch in ds for ch in s[:2]]) or \
not all([ch in ds + '=' for ch in s[2:]]):
raise ValueError(f'{s} is not a base64 encoded string')
int1 = d[s[0]]
int2 = d[s[1]]
b1 = (int1 << 2) | ((int2 & 48) >> 4)
if s[2:] == '==':
return bytes([b1])
int3 = d[s[2]]
b2 = (int2 & 15) << 4 | int3 >> 2
if s[3:] == '=':
return bytes([b1, b2])
int4 = d[s[3]]
b3 = (int3 & 3) << 6 | int4
return bytes([b1, b2, b3])
#!/usr/bin/env python3
import os
import sys
from base64 import Base64Converter
def main():
try:
infile = open(sys.argv[1], 'r')
except IndexError:
print('Requires file to decode as argument.')
quit()
except FileNotFoundError:
print(f'{sys.argv[1]} file not found.')
quit()
outfile = open(sys.argv[1][:-7].replace('__', '.'), 'wb')
b64 = Base64Converter()
outfile.write(b64.decode(infile))
infile.close()
outfile.close()
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import os
import sys
from base64 import Base64Converter
def main():
try:
infile = open(sys.argv[1], 'rb')
except IndexError:
print('Requires file to encode as argument.')
quit()
except FileNotFoundError:
print(f'{sys.argv[1]} file not found.')
quit()
outfile = open(sys.argv[1].replace('.', '__') + '.b64txt', 'w')
b64 = Base64Converter()
outfile.write(b64.encode(infile))
infile.close()
outfile.close()
if __name__ == '__main__':
main()
Let's start by choosing three arbitrary bytes to convert into four base64
digits. To reason using bits, here is a bit pattern:
0101 1010 0010 1011 1110 0110
In hex this is:
5A2BE6
We will want to read in 3 bytes at a time and convert them into 4 base64
characters.
Let's read them in from a BytesIO object.
>>> import io
>>> f = io.BytesIO(b"\x5A\x2B\xE6")
>>> three_bytes = f.read()
>>> type(three_bytes)
<class 'bytes'>
>>> len(three_bytes)
3
Using https://en.wikipedia.org/wiki/Base64 as a guide, we can see that these
3 bytes should be converted into the following 4 Base64 characters:
010110 100010 101111 100110
which maps to the following Base64 characters:
Wivm
Since the high order bits of each bytes will be set to 0s, we will have the
following bytes:
0001 0110 0010 0010 0010 1111 0010 0110
Let's write an end-to-end test for this whole process using a new Base64
class.
>>> from base64 import Base64Converter
>>> b64 = Base64Converter()
>>> b64.encode3bytes(three_bytes)
'Wivm'
Working backwards from 'STOP' as the target output, add another test.
Using the Base64 table on the Wikipeida page, we find that 'STOP' is
010010 010011 001110 001111
which rearranged into bytes is
0100 1001 0011 0011 1000 1111
which in Hex is 49338F, and we have our test.
>>> b64.encode3bytes(b'\x49\x33\x8F')
'STOP'
The inverse adds another twist, which is that when a byte can be represented
by a printable ASCII character, that is how Python represents it by default:
>>> b64.decode4chars('STOP', '')
b'I3\x8f'
A very helpful resource for working with binary data in Python is:
https://www.devdungeon.com/content/working-binary-data-python
Now that we can convert back and forth between 3 bytes and
4 Base64 digits, our next task is to open a file containing bytes, read in
3 bytes at a time, convert them into 4 Base64 digits (characters), and write
them to another open text file.
We'll use byte encoded text, to make evaluation and debugging easier.
>>> f1 = io.BytesIO(b'This is a test. It is only a test!')
>>> from base64 import Base64Converter
>>> b64 = Base64Converter()
>>> s = b64.encode(f1)
>>> s
'VGhpcyBpcyBhIHRlc3QuIEl0IGlzIG9ubHkgYSB0ZXN0IQ=='
>>> f2 = io.StringIO(s)
>>> bs = b64.decode(f2)
>>> bs
b'This is a test. It is only a test!'