Commit 0dd1e07c authored by Alex's avatar Alex

coala-utils: Move StringConverter

parent d0491781
Pipeline #3653961 passed with stage
in 2 minutes 33 seconds
# -*- coding: utf-8 -*-
import appdirs
import os
import re
# Start ignoring PyImportSortBear, PyLintBear as BUS_NAME is imported as a
# constant from other files.
from coala_utils import VERSION
# Stop ignoring
THIS_IS_A_BUG = ("This is a bug. We are sorry for the inconvenience. "
"Please contact the developers for assistance.")
Please register or sign in to reply
CRASH_MESSAGE = ("An unknown error occurred. This is a bug. We are "
"sorry for the inconvenience. Please contact the "
"developers for assistance. During execution of "
"coala an exception was raised. This should never "
"happen. When asked for, the following information "
"may help investigating:")
VERSION_CONFLICT_MESSAGE = ("There is a conflict in the version of a "
"dependency you have installed and the "
"requirements of coala. This may be resolved by "
"creating a separate virtual environment for "
"coala or running `pip install %s`. Be aware "
"that the latter solution might break other "
"python packages that depend on the currently "
"installed version.")
OBJ_NOT_ACCESSIBLE = "{} is not accessible and will be ignored!"
# This string contains many unicode characters to challenge tests.
COMPLEX_TEST_STRING = ("4 r34l ch4ll3n63: 123 ÄÖü ABc @€¥ §&% {[( ←↓→↑ "
"ĦŊħ ß°^ \\\n\u2192")
# Path to the coalib directory
coalib_root = os.path.join(os.path.dirname(__file__),
# Path to the language definition files
language_definitions = os.path.join(coalib_root,
system_coafile = os.path.join(coalib_root, "default_coafile")
user_coafile = os.path.join(os.path.expanduser("~"), ".coarc")
default_coafile = ".coafile"
USER_DATA_DIR = appdirs.user_data_dir('coala', version=VERSION)
URL_REGEX = re.compile(
r'^(?:(?:http|ftp)[s]?://)?' # scheme
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+' # domain name
r'localhost|' # OR localhost
r'(?:\d{1,3}\.){3}\d{1,3})' # OR an ip
r'(?::\d+)?' # optional port number
r'(?:/?|[/?]\S+)$', # path
import re
from collections import Iterable, OrderedDict
from coala_utils.misc import Constants
from coala_utils.parsing.StringProcessing import (
unescape, unescaped_split, unescaped_strip)
class StringConverter:
Converts strings to other things as needed. If you need some kind of string
conversion that is not implemented here, consider adding it so everyone
gets something out of it.
def __init__(self,
list_delimiters=(',', ';'),
if not isinstance(list_delimiters, Iterable):
raise TypeError("list_delimiters has to be an Iterable.")
if not isinstance(strip_whitespaces, bool):
raise TypeError("strip_whitespaces has to be a bool parameter")
self.__strip_whitespaces = strip_whitespaces
self.__list_delimiters = list_delimiters
self.__dict_delimiter = dict_delimiter
self.__remove_empty_iter_elements = remove_empty_iter_elements
self.__escaped_list = None
self.__unescaped_list = None
self.__dict = None
self.value = value
def __str__(self):
return unescape(self.value)
def __bool__(self):
if str(self).lower() in Constants.TRUE_STRINGS:
return True
if str(self).lower() in Constants.FALSE_STRINGS:
return False
raise ValueError
def __len__(self):
return len(str(self))
def __int__(self):
return int(str(self))
def __float__(self):
return float(str(self))
def __url__(self):
Determines the url validity of this setting.
:return: url string
:raises ValueError: If the url is not valid.
strrep = str(self).strip()
if Constants.URL_REGEX.match(strrep):
return strrep
raise ValueError(repr(strrep) + " is not a valid url.")
def __iter__(self, remove_backslashes=True):
Converts the value to a list using the delimiters given at construction
Note that escaped values will be unescaped and escaped list delimiters
will be allowed in values. If you need the escapes you should not
use this routine.
:param remove_backslashes: Whether or not to remove the backslashes
after conversion.
:return: An iterator over all values.
if remove_backslashes:
return iter(self.__unescaped_list)
return iter(self.__escaped_list)
def __getitem__(self, item):
return self.__dict.__getitem__(item)
def keys(self):
return self.__dict.keys()
def __get_raw_list(self):
pattern = ("(?:" +
"|".join(re.escape(v) for v in self.__list_delimiters) +
return list(unescaped_split(pattern,
def __prepare_list(self):
self.__escaped_list = self.__get_raw_list()
if self.__strip_whitespaces:
self.__escaped_list = [unescaped_strip(elem)
for elem in self.__escaped_list]
self.__unescaped_list = [unescape(elem)
for elem in self.__escaped_list]
if self.__remove_empty_iter_elements:
# Need to do after stripping, cant use builtin functionality of
# split.
while "" in self.__unescaped_list:
while "" in self.__escaped_list:
def __prepare_dict(self):
# We must keep order here, user can drop it later.
self.__dict = OrderedDict()
for elem in self.__get_raw_list():
key_val = unescaped_split(self.__dict_delimiter, elem, max_split=1)
if self.__strip_whitespaces:
key_val = [unescaped_strip(item) for item in key_val]
key_val = [unescape(item) for item in key_val]
if not any(item != "" for item in key_val):
if len(key_val) < 2:
self.__dict[key_val[0]] = ""
self.__dict[key_val[0]] = key_val[1]
def value(self):
return self.__value
def value(self, newval):
self.__value = str(newval)
if self.__strip_whitespaces:
self.__value = unescaped_strip(self.__value)
def __eq__(self, other):
return isinstance(other, StringConverter) and self.value == other.value
def __ne__(self, other):
return not self.__eq__(other)
This diff is collapsed.
def limit(iterator, count):
A filter that removes all elements behind the set limit.
:param iterator: The iterator to be filtered.
:param count: The iterator limit. All elements at positions bigger than
this limit are trimmed off. Exclusion: 0 or numbers below
does not limit at all, means the passed iterator is
completely yielded.
if count <= 0: # Performance branch
for elem in iterator:
yield elem
for elem in iterator:
yield elem
count -= 1
if count == 0:
def trim_empty_matches(iterator, groups=(0,)):
A filter that removes empty match strings. It can only operate on iterators
whose elements are of type MatchObject.
:param iterator: The iterator to be filtered.
:param groups: An iteratable defining the groups to check for blankness.
Only results are not yielded if all groups of the match
are blank.
You can not only pass numbers but also strings, if your
MatchObject contains named groups.
for elem in iterator:
if any(len( > 0 for group in groups):
yield elem
from coala_utils.decorators import generate_ordering, generate_repr
from coala_utils.parsing.StringProcessing import Match
@generate_repr("begin", "inside", "end")
@generate_ordering("begin", "inside", "end")
class InBetweenMatch:
Holds information about a match enclosed by two matches.
def __init__(self, begin, inside, end):
Instantiates a new InBetweenMatch.
:param begin: The ``Match`` of the start pattern.
:param inside: The ``Match`` between start and end.
:param end: The ``Match`` of the end pattern.
if begin > inside or inside > end:
raise ValueError("The inside match must be enclosed by the begin "
"and end match.")
self._begin = begin
self._inside = inside
self._end = end
def from_values(cls, begin, begin_pos, inside, inside_pos, end, end_pos):
Instantiates a new InBetweenMatch from Match values.
This function allows to bypass the usage of Match object instantation:
>>> a = InBetweenMatch(Match("A", 0), Match("B", 1), Match("C", 2))
>>> b = InBetweenMatch.from_values("A", 0, "B", 1, "C", 2)
>>> assert a == b
:param begin: The matched string from start pattern.
:param begin_pos: The position of the matched begin string.
:param inside: The matched string from inside/in-between pattern.
:param inside_pos: The position of the matched inside/in-between
:param end: The matched string from end pattern.
:param end_pos: The position of the matched end string.
:returns: An InBetweenMatch from the given values.
return cls(Match(begin, begin_pos),
Match(inside, inside_pos),
Match(end, end_pos))
def begin(self):
return self._begin
def inside(self):
return self._inside
def end(self):
return self._end
from coala_utils.decorators import generate_ordering, generate_repr
@generate_repr("match", "range")
@generate_ordering("range", "match")
class Match:
Stores information about a single textual match.
def __init__(self, match, position):
Instantiates a new Match.
:param match: The actual matched string.
:param position: The position where the match was found. Starts from
self._match = match
self._position = position
def __len__(self):
return len(self.match)
def __str__(self):
return self.match
def match(self):
Returns the text matched.
:returns: The text matched.
return self._match
def position(self):
Returns the position where the text was matched (zero-based).
:returns: The position.
return self._position
def end_position(self):
Marks the end position of the matched text (zero-based).
:returns: The end-position.
return len(self) + self.position
def range(self):
Returns the position range where the text was matched.
:returns: A pair indicating the position range. The first element is
the start position, the second one the end position.
return (self.position, self.end_position)
# Start ignoring PyImportSortBear because of dependency chains!
from coala_utils.parsing.StringProcessing.Match import Match
from coala_utils.parsing.StringProcessing.InBetweenMatch import InBetweenMatch
from coala_utils.parsing.StringProcessing.Core import (
search_for, unescaped_search_for, split, unescaped_split,
search_in_between, unescaped_search_in_between, nested_search_in_between,
escape, convert_to_raw, unescape, unescaped_rstrip, unescaped_strip,
# Stop ignoring
\ No newline at end of file
import unittest
from coala_utils.misc.StringConverter import StringConverter
class StringConverterTest(unittest.TestCase):
def setUp(self):
self.uut = StringConverter("\n \\1 \n ")
def test_construction(self):
def test_whitespace_stripping(self):
self.assertEqual(str(self.uut), "1")
self.uut = StringConverter("\n 1 \n", strip_whitespaces=False)
self.assertEqual(str(self.uut), "\n 1 \n")
def test_int_conversion(self):
self.assertEqual(int(self.uut), 1)
self.uut = StringConverter(" not an int ")
self.assertRaises(ValueError, int, self.uut)
def test_float_conversion(self):
self.assertEqual(float(self.uut), 1)
self.uut.value = "0.5 "
self.assertEqual(float(self.uut), 0.5)
self.uut = StringConverter(" not a float ")
self.assertRaises(ValueError, float, self.uut)
def test_len(self):
self.assertEqual(len(self.uut), 1)
def test_iterator(self):
self.uut = StringConverter("a, test with!!some challenge",
list_delimiters=[",", " ", "!!"])
["a", "test", "with", "some", "challenge"])
self.uut = StringConverter("a\\ \\,\\\\ test with!!some challenge",
list_delimiters=[",", " ", "!!"])
["a ,\\", "test", "with", "some", "challenge"])
self.uut = StringConverter("a, test with!some \\\\\\ challenge\\ ",
list_delimiters=", !",
["a", "test", "with", "some", "\\ challenge "])
self.uut = StringConverter("a, test with!some \\\\\\ challenge\\ ",
list_delimiters=", !",
["a", "test", "with", "some", "\\ challenge "])
self.uut = StringConverter("testval", list_delimiters=[",", "¸"])
self.uut.value = "a\\n,bug¸g"
self.assertEqual(list(self.uut), ["an", "bug", "g"])
self.assertEqual(list(self.uut.__iter__(False)), ["a\\n", "bug", "g"])
self.assertTrue("bug" in self.uut)
self.assertFalse("but" in self.uut)
self.uut = StringConverter("a, test, \n",
self.assertEqual(list(self.uut), ["a", "test"])
self.uut = StringConverter("a, test, \n",
self.assertEqual(list(self.uut), ["a", " test", " \n"])
uut = StringConverter("A,B,C , D\\x \\a,42,\\n8 ",
self.assertEqual(list(uut), ["A", "B", "C ", " Dx a", "42", "n8 "])
def test_iterator_escape_whitespaces(self):
uut = StringConverter("ta, chi, tsu, te, \\ to", list_delimiters=",")
self.assertEqual(list(uut), ["ta", "chi", "tsu", "te", " to"])
uut = StringConverter(r"/**, \ *\ , \ */", list_delimiters=",")
self.assertEqual(list(uut), ["/**", " * ", " */"])
uut = StringConverter(
"abc\\\\ , def\\ \\ \\ , \\\\ unstrip \\\\\\ ",
self.assertEqual(list(uut), ["abc\\", "def ", "\\ unstrip \\ "])
def test_iterator_remove_empty_iter_elements(self):
uut = StringConverter("a, b, c, , e, , g", list_delimiters=",")
self.assertEqual(list(uut), ["a", "b", "c", "e", "g"])
uut = StringConverter("a, , ,, e, , g",
self.assertEqual(list(uut), ["a", "e", "g"])
uut = StringConverter(",,, ,",
self.assertEqual(list(uut), [])
uut = StringConverter("a, b, c, , e, , g",
self.assertEqual(list(uut), ["a", "b", "c", "", "e", "", "g"])
uut = StringConverter(",,, ,",
self.assertEqual(list(uut), ["", "", "", "", ""])
def test_dict_escape_whitespaces(self):
uut = StringConverter(
"\\ : \\ , hello: \\ world, \\\\ A \\\\ : B\\ ")
self.assertEqual(dict(uut), {" ": " ",
"hello": " world",
"\\ A \\": "B "})
uut = StringConverter(r"/**, \ *\ , \ */")
self.assertEqual(dict(uut), {"/**": "", " * ": "", " */": ""})
uut = StringConverter("abc\\\\ : qew, def\\ \\ \\ ,"
" \\\\ unstrip \\\\\\ ")
self.assertEqual(dict(uut), {"abc\\": "qew",
"def ": "",
"\\ unstrip \\ ": ""})
uut = StringConverter("A:B,C : D\\x \\a,42:\\n8 ",
self.assertEqual(dict(uut), {"A": "B", "C ": " Dx a", "42": "n8 "})
def test_dict_conversion(self):
self.uut = StringConverter("test")
self.assertEqual(dict(self.uut), {"test": ""})
self.uut = StringConverter("test, t")
self.assertEqual(dict(self.uut), {"test": "", "t": ""})
self.uut = StringConverter("test, t: v")
self.assertEqual(dict(self.uut), {"test": "", "t": "v"})
# Check escaping
self.uut = StringConverter("test, t\\: v")
self.assertEqual(dict(self.uut), {"test": "", "t: v": ""})
self.uut = StringConverter("test, t\\: v: t")
self.assertEqual(dict(self.uut), {"test": "", "t: v": "t"})
self.uut = StringConverter("test\\, t\\: v: t")
self.assertEqual(dict(self.uut), {"test, t: v": "t"})
self.uut = StringConverter("test\\, t\\: v: t\\,")
self.assertEqual(dict(self.uut), {"test, t: v": "t,"})
# Check that lists ignore colons
self.assertEqual(list(self.uut), ["test, t: v: t,"])
def test_bool_conversion(self):
self.assertEqual(bool(self.uut), True)
self.uut.value = "yeah"
self.assertEqual(bool(self.uut), True)
self.uut = StringConverter("y")
self.assertEqual(bool(self.uut), True)
self.uut = StringConverter("nope")
self.assertEqual(bool(self.uut), False)
self.uut = StringConverter(" i dont know ")
self.assertRaises(ValueError, bool, self.uut)
def test_equality_comparision(self):
self.assertEqual(StringConverter(" i dont know "),
StringConverter("i dont know"))
self.assertNotEqual(StringConverter(" dont know "),
StringConverter("i dont know "))
StringConverter("i dont know "))
self.assertNotEqual(5, StringConverter("i dont know "))
def test_url(self):
valid_urls = (
# Scheme tests
"", "", "", "",
# Domain tests
"", "",
"", "http://localhost", "",
"", "",
"", "", "", "localhost",
"", "", "", "",
# Port number
"localhost:8888", "", "",
# Paths
"", "", "",
"", "")
invalid_urls = (
# Invalid types
123, True, None,
# Invalid links
"unknown://", "123", "abcd", "url.unknown",
"", "http://unknownlocalhost",
for url in valid_urls:
except ValueError as exception:
print(exception)"URL {} raised ValueError unexpectedly.".format(url))
for url in invalid_urls:
self.assertRaises(ValueError, self.uut.__url__)
from coala_utils.parsing.StringProcessing import convert_to_raw
from tests.parsing.StringProcessing.StringProcessingTestBase import (
class ConvertToRawTest(StringProcessingTestBase):
def test_convert_to_raw(self):
# In (input, output) format
test_data = [
(r"test", r"test"),
(r"test_path", r"test_path"),
(r"test, path", r"test, path"),
(r"test\ path", r"test\ path"),
(r"test\path", r"test\\path"),
(r"test\\path", r"test\\path"),
(r"test\=path", r"test\=path"),
(r"test=path", r"test=path"),
(r"value\=as\something", r"value\=as\\something")]
for test in test_data:
self.assertEqual(convert_to_raw(test[0], ",.=# "), test[1])
from coala_utils.parsing.StringProcessing import escape
from tests.parsing.StringProcessing.StringProcessingTestBase import (
class EscapeTest(StringProcessingTestBase):
# Test escape() using a single character to escape and default parameters.
def test_normal_behaviour(self):
expected_results = [
r"out1 \'escaped-escape: \\ \' out2",
r"out1 \'escaped-quote: \\' \' out2",
r"out1 \'escaped-anything: \X \' out2",
r"out1 \'two escaped escapes: \\\\ \' out2",
r"out1 \'escaped-quote at end: \\'\' out2",
r"out1 \'escaped-escape at end: \\\' out2",
r"out1 \'str1\' out2 \'str2\' out2",
r"out1 \\' \'str1\' out2 \'str2\' out2",
r"out1 \\\\' \'str1\' out2 \'str2\' out2",
r"out1 \\ \'str1\' out2 \'str2\' out2",
r"out1 \\\\ \'str1\' out2 \'str2\' out2",
r"out1 \\\'str1\' out2 \'str2\' out2",
r"out1 \\\\\'str1\' out2 \'str2\' out2",
r"out1 \'str1\'\'str2\'\'str3\' out2",
r"out1 out2 out3",,
2 *]