Commit 149170bb authored by adam j hartz's avatar adam j hartz

add support for underscores in numeric literals

resolves #22
parent 03efe4f2
......@@ -42,7 +42,7 @@ from builtins import open as _builtin_open
from codecs import lookup, BOM_UTF8
import collections
from io import TextIOWrapper
from itertools import chain
import itertools as _itertools
import re
import sys
from token import *
......@@ -189,19 +189,39 @@ Comment = r'#[^\r\n]*'
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
Name = r'\$?\w+'
Hexnumber = r'0[xX][0-9a-fA-F]+'
Binnumber = r'0[bB][01]+'
Octnumber = r'0[oO][0-7]+'
Decnumber = r'(?:0+|[1-9][0-9]*)'
Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+'
Binnumber = r'0[bB](?:_?[01])+'
Octnumber = r'0[oO](?:_?[0-7])+'
Decnumber = r'(?:0(?:_?0)*|[1-9](?:_?[0-9])*)'
Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
Exponent = r'[eE][-+]?[0-9]+'
Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
Expfloat = r'[0-9]+' + Exponent
Exponent = r'[eE][-+]?[0-9](?:_?[0-9])*'
Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
Floatnumber = group(Pointfloat, Expfloat)
Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
Number = group(Imagnumber, Floatnumber, Intnumber)
StringPrefix = r'(?:[bB][rR]?|[rR][bB]?|[uU])?'
# Return the empty string, plus all of the valid string prefixes.
def _all_string_prefixes():
# The valid string prefixes. Only contain the lower case versions,
# and don't contain any permuations (include 'fr', but not
# 'rf'). The various permutations will be generated.
_valid_string_prefixes = ['b', 'r', 'u', 'f', 'br', 'fr']
# if we add binary f-strings, add: ['fb', 'fbr']
result = set([''])
for prefix in _valid_string_prefixes:
for t in _itertools.permutations(prefix):
# create a list with upper and lower versions of each
# character
for u in _itertools.product(*[(c, c.upper()) for c in t]):
return result
# Note that since _all_string_prefixes includes the empty string,
# StringPrefix can be the empty string (making it optional).
StringPrefix = group(*_all_string_prefixes())
# Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
......@@ -366,7 +386,7 @@ class Untokenizer:
startline = token[0] in (NEWLINE, NL)
prevstring = False
for tok in chain([token], iterable):
for tok in _itertools.chain([token], iterable):
toknum, tokval = tok[:2]
if toknum == ENCODING:
self.encoding = tokval
