Commit 9b540728 authored by Michael Büsch's avatar Michael Büsch

Partially unify encoding handling

Signed-off-by: Michael Büsch's avatarMichael Buesch <m@bues.ch>
parent 5add1eb4
......@@ -98,8 +98,7 @@ def main():
else:
inDataBytes = sys.stdin.buffer.read()
else:
inDataBytes = awlFileRead(opt_infile,
encoding="binary")
inDataBytes = safeFileRead(opt_infile)
if opt_inputParser:
tab = opt_inputParser.parseData(inDataBytes,
......
......@@ -719,6 +719,7 @@ class Project(object):
factory = ProjectFactory
ENCODING = XmlFactory.XML_ENCODING
DATETIME_FMT = "%Y-%m-%d %H:%M:%S.%f"
EnumGen.start
......@@ -888,7 +889,7 @@ class Project(object):
@classmethod
def detectType(cls, dataBytes):
try:
dataText = dataBytes.decode("utf-8")
dataText = dataBytes.decode(cls.ENCODING)
dataLines = dataText.splitlines()
magic_v0 = "[AWLSIM_PROJECT]"
magic_v1 = "<awlsim_project"
......@@ -903,7 +904,7 @@ class Project(object):
@classmethod
def detectFileType(cls, filename):
return cls.detectType(awlFileRead(filename, encoding="binary"))
return cls.detectType(safeFileRead(filename))
@classmethod
def dataIsProject(cls, dataBytes):
......@@ -915,7 +916,7 @@ class Project(object):
@classmethod
def fromText(cls, text, projectFile):
textBytes = text.encode("utf-8")
textBytes = text.encode(cls.ENCODING)
projectType = cls.detectType(textBytes)
if projectType == cls.TYPE_V0:
return LegacyProjectParser.parse(cls, text, projectFile)
......@@ -938,7 +939,12 @@ class Project(object):
@classmethod
def fromFile(cls, filename):
return cls.fromText(awlFileRead(filename, encoding="utf8"), filename)
try:
return cls.fromText(safeFileRead(filename).decode(cls.ENCODING), filename)
except UnicodeError as e:
raise AwlSimError("Project file: Failed to %s decode "
"project file '%s': %s" % (
cls.ENCODING, filename, str(e)))
@classmethod
def fromProjectOrRawAwlFile(cls, filename):
......@@ -964,7 +970,7 @@ class Project(object):
try:
factory = self.factory(project=self)
xmlBytes = factory.compose(lineBreakStr="\r\n", attrLineBreak=True)
xmlBytes = factory.compose(attrLineBreak=True)
xmlText = xmlBytes.decode(factory.XML_ENCODING)
except self.factory.Error as e:
raise AwlSimError("Project file: Failed to compose XML: "
......@@ -980,7 +986,13 @@ class Project(object):
raise AwlSimError("Project file: Cannot generate project file. "
"No file name specified.")
text = self.toText(projectFile)
awlFileWrite(projectFile, text, encoding="utf8")
try:
data = text.encode(self.ENCODING)
except UnicodeError as e:
raise AwlSimError("Project file: Failed to %s encode "
"project file '%s': %s" % (
self.ENCODING, projectFile, str(e)))
safeFileWrite(projectFile, data)
for awlSrc in self.awlSources:
awlSrc.writeFileBacking()
for symSrc in self.symTabSources:
......
......@@ -55,15 +55,20 @@ class SourceFactory(XmlFactory):
source = self.source
if tag.name == "source":
sourceData = "".join(self.__data)
# Strip leading and trailing line break.
idx = sourceData.find("\n")
if idx >= 0 and not sourceData[:idx].strip():
sourceData = sourceData[idx+1:]
idx = sourceData.rfind("\n")
if idx >= 0 and not sourceData[idx+1:].strip():
sourceData = sourceData[:idx]
if sourceData.endswith("\r"):
sourceData = sourceData[:-1]
if source.STRIP_DATA:
# Strip all leading and trailing white space.
sourceData = sourceData.strip()
else:
# Only strip leading and trailing line break
# that we added during compose.
idx = sourceData.find("\n")
if idx >= 0 and not sourceData[:idx].strip():
sourceData = sourceData[idx+1:]
idx = sourceData.rfind("\n")
if idx >= 0 and not sourceData[idx+1:].strip():
sourceData = sourceData[:idx]
if sourceData.endswith("\r"):
sourceData = sourceData[:-1]
# Add the data to the source.
try:
source.sourceBytes = sourceData.encode(source.ENCODING)
......@@ -88,8 +93,10 @@ class SourceFactory(XmlFactory):
else:
try:
data = source.sourceBytes.decode(source.ENCODING)
# Enforce UNIX line endings.
data = toUnixEol(data)
# Add leading and trailing line break.
data = "\r\n%s\r\n" % data
data = "\n%s\n" % data
except UnicodeError as e:
raise self.Error("Failed to decode source code data")
tags = [
......@@ -112,6 +119,7 @@ class GenericSource(object):
IDENT_HASH = hashlib.sha256
ENCODING = "<unknown>"
USE_CDATA = False
STRIP_DATA = False
factory = SourceFactory
......@@ -153,11 +161,11 @@ class GenericSource(object):
if not self.__identHash:
# Calculate the ident hash
h = self.IDENT_HASH(self.SRCTYPE.encode(
"utf-8", "strict"))
self.ENCODING, "strict"))
if self.name is not None:
h.update(self.name.encode("utf-8", "ignore"))
h.update(self.name.encode(self.ENCODING, "ignore"))
if self.filepath is not None:
h.update(self.filepath.encode("utf-8", "ignore"))
h.update(self.filepath.encode(self.ENCODING, "ignore"))
h.update(self.sourceBytes)
self.__identHash = h.digest()
return self.__identHash
......@@ -181,7 +189,7 @@ class GenericSource(object):
"Write the backing file, if any."
if not self.isFileBacked():
return
awlFileWrite(self.filepath, self.sourceBytes, encoding="binary")
safeFileWrite(self.filepath, self.sourceBytes)
def forceNonFileBacked(self, newName):
"Convert this source to a non-file-backed source."
......@@ -195,7 +203,7 @@ class GenericSource(object):
@classmethod
def fromFile(cls, name, filepath):
try:
data = awlFileRead(filepath, encoding="binary")
data = safeFileRead(filepath)
except AwlSimError as e:
raise AwlSimError("Project: Could not read %s "
"source file '%s':\n%s" %\
......@@ -227,6 +235,7 @@ class AwlSource(GenericSource):
SRCTYPE_ID = 0 # .awlpro file format ID
ENCODING = "latin_1"
USE_CDATA = False
STRIP_DATA = False
def dup(self):
return AwlSource(self.name, self.filepath,
......@@ -235,8 +244,9 @@ class AwlSource(GenericSource):
class FupSource(GenericSource):
SRCTYPE = "FUP/FBD"
SRCTYPE_ID = 1 # .awlpro file format ID
ENCODING = "UTF-8"
ENCODING = XmlFactory.XML_ENCODING
USE_CDATA = True
STRIP_DATA = True
def dup(self):
return FupSource(self.name, self.filepath,
......@@ -245,8 +255,9 @@ class FupSource(GenericSource):
class KopSource(GenericSource):
SRCTYPE = "KOP/LAD"
SRCTYPE_ID = 2 # .awlpro file format ID
ENCODING = "UTF-8"
ENCODING = XmlFactory.XML_ENCODING
USE_CDATA = True
STRIP_DATA = True
def dup(self):
return KopSource(self.name, self.filepath,
......@@ -257,6 +268,7 @@ class SymTabSource(GenericSource):
SRCTYPE_ID = 3 # .awlpro file format ID
ENCODING = "latin_1"
USE_CDATA = False
STRIP_DATA = False
def dup(self):
return SymTabSource(self.name, self.filepath,
......
......@@ -126,43 +126,37 @@ def fileExists(filename):
return None
return True
def awlFileRead(filename, encoding="latin_1"):
def safeFileRead(filename):
try:
fd = open(filename, "rb")
data = fd.read()
if encoding != "binary":
data = data.decode(encoding)
fd.close()
except (IOError, UnicodeError) as e:
raise AwlParserError("Failed to read '%s': %s" %\
with open(filename, "rb") as fd:
data = fd.read()
fd.close()
except IOError as e:
raise AwlSimError("Failed to read '%s': %s" %\
(filename, str(e)))
return data
def awlFileWrite(filename, data, encoding="latin_1"):
if encoding != "binary":
data = "\r\n".join(data.splitlines()) + "\r\n"
def safeFileWrite(filename, data):
for count in range(1000):
tmpFile = "%s-%d-%d.tmp" %\
(filename, random.randint(0, 0xFFFF), count)
if not os.path.exists(tmpFile):
break
else:
raise AwlParserError("Could not create temporary file")
raise AwlSimError("Could not create temporary file")
try:
fd = open(tmpFile, "wb")
if encoding != "binary":
data = data.encode(encoding)
fd.write(data)
fd.flush()
fd.close()
with open(tmpFile, "wb") as fd:
fd.write(data)
fd.flush()
fd.close()
if not osIsPosix:
# Can't use safe rename on non-POSIX.
# Must unlink first.
with contextlib.suppress(OSError):
os.unlink(filename)
os.rename(tmpFile, filename)
except (IOError, OSError, UnicodeError) as e:
raise AwlParserError("Failed to write file:\n" + str(e))
except (IOError, OSError) as e:
raise AwlSimError("Failed to write file:\n" + str(e))
finally:
with contextlib.suppress(IOError, OSError):
os.unlink(tmpFile)
......@@ -213,6 +207,19 @@ def bytesToHexStr(_bytes):
return None
return binascii.b2a_hex(_bytes).decode("ascii")
def toUnixEol(string):
"""Convert a string to UNIX line endings,
no matter what line endings (mix) the input string is.
"""
return string.replace("\r\n", "\n")\
.replace("\r", "\n")
def toDosEol(string):
"""Convert a string to DOS line endings,
no matter what line endings (mix) the input string is.
"""
return toUnixEol(string).replace("\n", "\r\n")
def envClearLang(env, lang = "C"):
"""Reset the language settings of an environment dict
to some expected value and return the result.
......
......@@ -448,7 +448,7 @@ class EditWidget(SourceCodeEdit):
def __updateSource(self):
sourceText = self.toPlainText()
# Convert to DOS-style line endings
sourceText = "\r\n".join(sourceText.splitlines())
sourceText = toDosEol(sourceText)
# Convert to binary
try:
sourceBytes = sourceText.encode(AwlParser.TEXT_ENCODING,
......
......@@ -430,7 +430,7 @@ class AwlSourceTabWidget(SourceTabWidget):
if not fn.endswith(".awl"):
fn += ".awl"
try:
awlFileWrite(fn, source.sourceBytes, encoding="binary")
safeFileWrite(fn, source.sourceBytes)
except AwlSimError as e:
MessageBox.handleAwlSimError(self,
"Failed to export source", e)
......@@ -617,7 +617,7 @@ class SymSourceTabWidget(SourceTabWidget):
if not fn.endswith(".asc"):
fn += ".asc"
try:
awlFileWrite(fn, source.sourceBytes, encoding="binary")
safeFileWrite(fn, source.sourceBytes)
except AwlSimError as e:
MessageBox.handleAwlSimError(self,
"Failed to export symbol table", e)
......
......@@ -412,10 +412,11 @@ __run_test()
cd "$rootdir" || die "cd to $rootdir failed"
# Check the file type and run the tester
if [ "$(echo -n "$testfile" | tail -c4)" = ".awl" -o\
"$(echo -n "$testfile" | tail -c7)" = ".awlpro" ]; then
if [ "$(echo -n "$testfile" | tail -c4)" = ".awl" ]; then
check_dos_text_encoding "$testfile"
run_awl_test "$interpreter" "$testfile" "$@"
elif [ "$(echo -n "$testfile" | tail -c7)" = ".awlpro" ]; then
run_awl_test "$interpreter" "$testfile" "$@"
elif [ "$(echo -n "$testfile" | tail -c3)" = ".sh" ]; then
run_sh_test "$interpreter" "$testfile" "$@"
elif [ "$(echo -n "$testfile" | tail -c3)" = ".py" ]; then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment