Commit 3f72ff1f by Alex

coala-utils: Move StringProcessing

parent 7a777eea
Pipeline #3658775 passed with stage
in 3 minutes 19 seconds
def limit(iterator, count):
"""
A filter that removes all elements behind the set limit.
:param iterator: The iterator to be filtered.
:param count: The iterator limit. All elements at positions bigger than
this limit are trimmed off. Exclusion: 0 or numbers below
does not limit at all, means the passed iterator is
completely yielded.
"""
if count <= 0: # Performance branch
for elem in iterator:
yield elem
else:
for elem in iterator:
yield elem
count -= 1
if count == 0:
break
def trim_empty_matches(iterator, groups=(0,)):
"""
A filter that removes empty match strings. It can only operate on iterators
whose elements are of type MatchObject.
:param iterator: The iterator to be filtered.
:param groups: An iteratable defining the groups to check for blankness.
Only results are not yielded if all groups of the match
are blank.
You can not only pass numbers but also strings, if your
MatchObject contains named groups.
"""
for elem in iterator:
if any(len(elem.group(group)) > 0 for group in groups):
yield elem
from coala_utils.decorators import generate_ordering, generate_repr
from coala_utils.string_processing import Match
@generate_repr("begin", "inside", "end")
@generate_ordering("begin", "inside", "end")
class InBetweenMatch:
"""
Holds information about a match enclosed by two matches.
"""
def __init__(self, begin, inside, end):
"""
Instantiates a new InBetweenMatch.
:param begin: The ``Match`` of the start pattern.
:param inside: The ``Match`` between start and end.
:param end: The ``Match`` of the end pattern.
"""
if begin > inside or inside > end:
raise ValueError("The inside match must be enclosed by the begin "
"and end match.")
self._begin = begin
self._inside = inside
self._end = end
@classmethod
def from_values(cls, begin, begin_pos, inside, inside_pos, end, end_pos):
"""
Instantiates a new InBetweenMatch from Match values.
This function allows to bypass the usage of Match object instantation:
>>> a = InBetweenMatch(Match("A", 0), Match("B", 1), Match("C", 2))
>>> b = InBetweenMatch.from_values("A", 0, "B", 1, "C", 2)
>>> assert a == b
:param begin: The matched string from start pattern.
:param begin_pos: The position of the matched begin string.
:param inside: The matched string from inside/in-between pattern.
:param inside_pos: The position of the matched inside/in-between
string.
:param end: The matched string from end pattern.
:param end_pos: The position of the matched end string.
:returns: An InBetweenMatch from the given values.
"""
return cls(Match(begin, begin_pos),
Match(inside, inside_pos),
Match(end, end_pos))
@property
def begin(self):
return self._begin
@property
def inside(self):
return self._inside
@property
def end(self):
return self._end
from coala_utils.decorators import generate_ordering, generate_repr
@generate_repr("match", "range")
@generate_ordering("range", "match")
class Match:
"""
Stores information about a single textual match.
"""
def __init__(self, match, position):
"""
Instantiates a new Match.
:param match: The actual matched string.
:param position: The position where the match was found. Starts from
zero.
"""
self._match = match
self._position = position
def __len__(self):
return len(self.match)
def __str__(self):
return self.match
@property
def match(self):
"""
Returns the text matched.
:returns: The text matched.
"""
return self._match
@property
def position(self):
"""
Returns the position where the text was matched (zero-based).
:returns: The position.
"""
return self._position
@property
def end_position(self):
"""
Marks the end position of the matched text (zero-based).
:returns: The end-position.
"""
return len(self) + self.position
@property
def range(self):
"""
Returns the position range where the text was matched.
:returns: A pair indicating the position range. The first element is
the start position, the second one the end position.
"""
return (self.position, self.end_position)
import re
from collections import Iterable, OrderedDict
from coala_utils import Constants
from coala_utils.string_processing import (
unescape, unescaped_split, unescaped_strip)
class StringConverter:
"""
Converts strings to other things as needed. If you need some kind of string
conversion that is not implemented here, consider adding it so everyone
gets something out of it.
"""
def __init__(self,
value,
strip_whitespaces=True,
list_delimiters=(',', ';'),
dict_delimiter=":",
remove_empty_iter_elements=True):
if not isinstance(list_delimiters, Iterable):
raise TypeError("list_delimiters has to be an Iterable.")
if not isinstance(strip_whitespaces, bool):
raise TypeError("strip_whitespaces has to be a bool parameter")
self.__strip_whitespaces = strip_whitespaces
self.__list_delimiters = list_delimiters
self.__dict_delimiter = dict_delimiter
self.__remove_empty_iter_elements = remove_empty_iter_elements
self.__escaped_list = None
self.__unescaped_list = None
self.__dict = None
self.value = value
def __str__(self):
return unescape(self.value)
def __bool__(self):
if str(self).lower() in Constants.TRUE_STRINGS:
return True
if str(self).lower() in Constants.FALSE_STRINGS:
return False
raise ValueError
def __len__(self):
return len(str(self))
def __int__(self):
return int(str(self))
def __float__(self):
return float(str(self))
def __url__(self):
"""
Determines the url validity of this setting.
:return: url string
:raises ValueError: If the url is not valid.
"""
strrep = str(self).strip()
if Constants.URL_REGEX.match(strrep):
return strrep
raise ValueError(repr(strrep) + " is not a valid url.")
def __iter__(self, remove_backslashes=True):
"""
Converts the value to a list using the delimiters given at construction
time.
Note that escaped values will be unescaped and escaped list delimiters
will be allowed in values. If you need the escapes you should not
use this routine.
:param remove_backslashes: Whether or not to remove the backslashes
after conversion.
:return: An iterator over all values.
"""
if remove_backslashes:
return iter(self.__unescaped_list)
else:
return iter(self.__escaped_list)
def __getitem__(self, item):
return self.__dict.__getitem__(item)
def keys(self):
return self.__dict.keys()
def __get_raw_list(self):
pattern = ("(?:" +
"|".join(re.escape(v) for v in self.__list_delimiters) +
")")
return list(unescaped_split(pattern,
self.value,
use_regex=True))
def __prepare_list(self):
self.__escaped_list = self.__get_raw_list()
if self.__strip_whitespaces:
self.__escaped_list = [unescaped_strip(elem)
for elem in self.__escaped_list]
self.__unescaped_list = [unescape(elem)
for elem in self.__escaped_list]
if self.__remove_empty_iter_elements:
# Need to do after stripping, cant use builtin functionality of
# split.
while "" in self.__unescaped_list:
self.__unescaped_list.remove("")
while "" in self.__escaped_list:
self.__escaped_list.remove("")
def __prepare_dict(self):
# We must keep order here, user can drop it later.
self.__dict = OrderedDict()
for elem in self.__get_raw_list():
key_val = unescaped_split(self.__dict_delimiter, elem, max_split=1)
if self.__strip_whitespaces:
key_val = [unescaped_strip(item) for item in key_val]
key_val = [unescape(item) for item in key_val]
if not any(item != "" for item in key_val):
continue
if len(key_val) < 2:
self.__dict[key_val[0]] = ""
else:
self.__dict[key_val[0]] = key_val[1]
@property
def value(self):
return self.__value
@value.setter
def value(self, newval):
self.__value = str(newval)
if self.__strip_whitespaces:
self.__value = unescaped_strip(self.__value)
self.__prepare_list()
self.__prepare_dict()
def __eq__(self, other):
return isinstance(other, StringConverter) and self.value == other.value
def __ne__(self, other):
return not self.__eq__(other)
# Start ignoring PyImportSortBear because of dependency chains!
from coala_utils.string_processing.Match import Match
from coala_utils.string_processing.InBetweenMatch import InBetweenMatch
from coala_utils.string_processing.Core import (
search_for, unescaped_search_for, split, unescaped_split,
search_in_between, unescaped_search_in_between, nested_search_in_between,
escape, convert_to_raw, unescape, unescaped_rstrip, unescaped_strip,
position_is_escaped)
# Stop ignoring
from coala_utils.string_processing import convert_to_raw
from tests.string_processing.StringProcessingTestBase import (
StringProcessingTestBase)
class ConvertToRawTest(StringProcessingTestBase):
def test_convert_to_raw(self):
# In (input, output) format
test_data = [
(r"test", r"test"),
(r"test_path", r"test_path"),
(r"test, path", r"test, path"),
(r"test\ path", r"test\ path"),
(r"test\path", r"test\\path"),
(r"test\\path", r"test\\path"),
(r"test\=path", r"test\=path"),
(r"test=path", r"test=path"),
(r"value\=as\something", r"value\=as\\something")]
for test in test_data:
self.assertEqual(convert_to_raw(test[0], ",.=# "), test[1])
from coala_utils.string_processing import escape
from tests.string_processing.StringProcessingTestBase import (
StringProcessingTestBase)
class EscapeTest(StringProcessingTestBase):
# Test escape() using a single character to escape and default parameters.
def test_normal_behaviour(self):
expected_results = [
r"out1 \'escaped-escape: \\ \' out2",
r"out1 \'escaped-quote: \\' \' out2",
r"out1 \'escaped-anything: \X \' out2",
r"out1 \'two escaped escapes: \\\\ \' out2",
r"out1 \'escaped-quote at end: \\'\' out2",
r"out1 \'escaped-escape at end: \\\' out2",
r"out1 \'str1\' out2 \'str2\' out2",
r"out1 \\' \'str1\' out2 \'str2\' out2",
r"out1 \\\\' \'str1\' out2 \'str2\' out2",
r"out1 \\ \'str1\' out2 \'str2\' out2",
r"out1 \\\\ \'str1\' out2 \'str2\' out2",
r"out1 \\\'str1\' out2 \'str2\' out2",
r"out1 \\\\\'str1\' out2 \'str2\' out2",
r"out1 \'str1\'\'str2\'\'str3\' out2",
r"",
r"out1 out2 out3",
self.bs,
2 * self.bs]
self.assertResultsEqual(
escape,
{(test_string, "'"): result
for test_string, result in zip(self.test_strings,
expected_results)})
# Tests escape() with more than one char to escape and an escape sequence
# that consists of more than one char.
def test_advanced(self):
expected_results = [
r"out()1 'e()scaped-e()scape: \\ ' out2",
r"out()1 'e()scaped-quote: \' ' out2",
r"out()1 'e()scaped-anything: \X ' out2",
r"out()1 'two e()scaped e()scape()s: \\\\ ' out2",
r"out()1 'e()scaped-quote at end: \'' out2",
r"out()1 'e()scaped-e()scape at end: \\' out2",
r"out()1 '()str()1' out2 '()str2' out2",
r"out()1 \' '()str()1' out2 '()str2' out2",
r"out()1 \\\' '()str()1' out2 '()str2' out2",
r"out()1 \\ '()str()1' out2 '()str2' out2",
r"out()1 \\\\ '()str()1' out2 '()str2' out2",
r"out()1 \\'()str()1' out2 '()str2' out2",
r"out()1 \\\\'()str()1' out2 '()str2' out2",
r"out()1 '()str()1''()str2''()str()()3' out2",
r"",
r"out()1 out2 out()()3",
self.bs,
2 * self.bs]
self.assertResultsEqual(
escape,
{(test_string, "1s33", "()"): result
for test_string, result in zip(self.test_strings,
expected_results)})
# Tests the realistic case when needing to escape spaces inside a shell
# with carets.
def test_windows_shell_space_escape(self):
expected_results = [
r"out1^ 'escaped-escape:^ ^ ^ ^ ^ ^ ^ ^ \\^ '^ out2",
r"out1^ 'escaped-quote:^ ^ ^ ^ ^ ^ ^ ^ ^ \'^ '^ out2",
r"out1^ 'escaped-anything:^ ^ ^ ^ ^ ^ \X^ '^ out2",
r"out1^ 'two^ escaped^ escapes:^ \\\\^ '^ out2",
r"out1^ 'escaped-quote^ at^ end:^ ^ ^ \''^ out2",
r"out1^ 'escaped-escape^ at^ end:^ ^ \\'^ out2",
r"out1^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ \'^ ^ ^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ \\\'^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ \\^ ^ ^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ \\\\^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ ^ ^ ^ ^ ^ ^ ^ ^ \\'str1'^ out2^ 'str2'^ out2",
r"out1^ ^ ^ ^ ^ ^ ^ \\\\'str1'^ out2^ 'str2'^ out2",
r"out1^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ 'str1''str2''str3'^ out2",
r"",
r"out1^ out2^ out3",
self.bs,
2 * self.bs]
self.assertResultsEqual(
escape,
{(test_string, " ", "^"): result
for test_string, result in zip(self.test_strings,
expected_results)})
# Tests using iterators instead of strings for the chars to escape. This
# allows to escape complete strings and not only chars.
def test_iterators_not_strings(self):
expected_results = [
r"\out1 'escaped-escape: \\ ' out2",
r"\out1 'escaped-quote: \' ' out2",
r"\out1 'escaped-anything: \X ' out2",
r"\out1 'two escaped escapes: \\\\ ' out2",
r"\out1 'escaped-quote at end: \'' out2",
r"\out1 'escaped-escape at end: \\' out2",
r"\out1 'str1' out2 '\str2' out2",
r"\out1 \' 'str1' out2 '\str2' out2",
r"\out1 \\\' 'str1' out2 '\str2' out2",
r"\out1 \\ 'str1' out2 '\str2' out2",
r"\out1 \\\\ 'str1' out2 '\str2' out2",
r"\out1 \\'str1' out2 '\str2' out2",
r"\out1 \\\\'str1' out2 '\str2' out2",
r"\out1 'str1''\str2''str3' out2",
r"",
r"\out1 out2 out3",
self.bs,
2 * self.bs]
self.assertResultsEqual(
escape,
{(test_string, ("out1", "str2")): result
for test_string, result in zip(self.test_strings,
expected_results)})
import unittest
from coala_utils.string_processing import InBetweenMatch, Match
class InBetweenMatchTest(unittest.TestCase):
def test_invalid(self):
self.assertRaises(ValueError,
InBetweenMatch,
Match("a", 10),
Match("b", -1),
Match("c", 12))
self.assertRaises(ValueError,
InBetweenMatch.from_values,
"X",
1,
"QAD",
2,
"LK",
1)
self.assertRaises(ValueError,
InBetweenMatch.from_values,
"1",
50,
"2",
22,
"3",
28)
def test_properties(self):
uut = InBetweenMatch(Match("ABC", 0), Match("DEF", 3), Match("GHI", 6))
self.assertEqual(str(uut.begin), "ABC")
self.assertEqual(uut.begin.position, 0)
self.assertEqual(str(uut.inside), "DEF")
self.assertEqual(uut.inside.position, 3)
self.assertEqual(str(uut.end), "GHI")
self.assertEqual(uut.end.position, 6)
def test_from_values(self):
uut = InBetweenMatch.from_values("hello", 47, "world", 77, "rises", 90)
self.assertEqual(str(uut.begin), "hello")
self.assertEqual(uut.begin.position, 47)
self.assertEqual(str(uut.inside), "world")
self.assertEqual(uut.inside.position, 77)
self.assertEqual(str(uut.end), "rises")
self.assertEqual(uut.end.position, 90)
import unittest
from coala_utils.string_processing.Filters import limit
class LimitTest(unittest.TestCase):
sequence = (1, 5, 19, 22, -3, 18, 99, 500, 2015)
def test_finite(self):
for test_limit in (1, 2, 3, 7, 8, 10, 22, 500000):
self.assertEqual(tuple(limit(self.sequence, test_limit)),
self.sequence[0:test_limit])
def test_infinite(self):
for test_limit in (0, -1, -2, -6555123):
self.assertEqual(tuple(limit(self.sequence, test_limit)),
self.sequence)
import unittest
from coala_utils.string_processing import Match
class MatchTest(unittest.TestCase):
def test_properties(self):
uut = Match("ABC", 0)
self.assertEqual(uut.match, "ABC")
self.assertEqual(str(uut), "ABC")
self.assertEqual(uut.position, 0)
self.assertEqual(uut.end_position, 3)
self.assertEqual(uut.range, (0, 3))
self.assertEqual(len(uut), 3)
def test_properties2(self):
uut = Match("alea iacta est", 48)
self.assertEqual(uut.match, "alea iacta est")
self.assertEqual(str(uut), "alea iacta est")
self.assertEqual(uut.position, 48)
self.assertEqual(uut.end_position, 62)
self.assertEqual(uut.range, (48, 62))
self.assertEqual(len(uut), 14)
from coala_utils.string_processing import (
InBetweenMatch, nested_search_in_between)
from tests.string_processing.StringProcessingTestBase import (
StringProcessingTestBase)
class NestedSearchInBetweenTest(StringProcessingTestBase):
bs = StringProcessingTestBase.bs
test_basic_expected_results = [
[("(", 0, "", 1, ")", 1),
("(", 6, "This is a word", 7, ")", 21),
("(", 25, "(in a word) another ", 26, ")", 46)],
[("(", 4, "((((((((((((((((((1)2)3))))))))))))))))", 5, ")", 44)],
[("(", 6, "do (it ) more ", 7, ")", 21),
("(", 41, "", 42, ")", 42),
("(", 44, "hello.", 45, ")", 51)],
[("(", 0, "", 1, ")", 1),
("(", 8, r"This\ is a word" + bs, 9, ")", 25),
("(", 29, r"(in a\\\ word\\\\\) another " + bs, 30, ")", 59)],
[("(", 5,
r"\(\((((((\\\(((((((((((1)2)3))\\\\\)))))))))))))\)" + bs, 6,
")", 57)],
[("(", 7, "do (it ) more ", 8, ")", 22),
("(", 45, "", 46, ")", 46),
("(", 48, "hello.", 49, ")", 55)]]
# Test the basic functionality of nested_search_in_between().
def test_basic(self):
self.assertResultsEqual(
nested_search_in_between,
{(self.search_in_between_begin_pattern,
self.search_in_between_end_pattern,
test_string,
0,
False,
False): [InBetweenMatch.from_values(*args)
for args in result]
for test_string, result in zip(
self.search_in_between_test_strings,
self.test_basic_expected_results)},
list)
# Test nested_search_in_between() when feeding it with the same begin- and
# end-sequences.
def test_same_pattern(self):
self.assertResultsEqual(
nested_search_in_between,
{(pattern, pattern, test_string, 0, False, False): []
for test_string in self.search_in_between_test_strings
for pattern in [self.search_in_between_begin_pattern,
self.search_in_between_end_pattern]},
list)
# Test nested_search_in_between() for its max_match parameter.
def test_max_match(self):
self.assertResultsEqual(
nested_search_in_between,
{(self.search_in_between_begin_pattern,
self.search_in_between_end_pattern,
test_string,
max_match,
False,
False): [InBetweenMatch.from_values(*args)
for args in result]
for max_match in [1, 2, 5, 22]
for test_string, result in zip(
self.search_in_between_test_strings,
[elem[0:max_match]
for elem in self.test_basic_expected_results])},
list)
# Test nested_search_in_between() with a regex pattern.
def test_regex_pattern(self):
self.assertResultsEqual(
nested_search_in_between,
{(r"(?:)\(", r"\)(?:)", test_string, 0, False, True):
[InBetweenMatch.from_values(*args) for args in result]
for test_string, result in zip(
self.search_in_between_test_strings,
self.test_basic_expected_results)},
list)
# Test nested_search_in_between() for its auto_trim feature.
def test_auto_trim(self):
expected_results = [
[("(", 6, "This is a word", 7, ")", 21),
("(", 25, "(in a word) another ", 26, ")", 46)],
[("(", 4, "((((((((((((((((((1)2)3))))))))))))))))", 5, ")", 44)],
[("(", 6, "do (it ) more ", 7, ")", 21),
("(", 44, "hello.", 45, ")", 51)],
[("(", 8, r"This\ is a word" + self.bs, 9, ")", 25),
("(", 29,
r"(in a\\\ word\\\\\) another " + self.bs, 30,
")", 59)],
[("(",
5,
r"\(\((((((\\\(((((((((((1)2)3))\\\\\)))))))))))))\)" + self.bs,
6,
")",
57)],
[("(", 7, "do (it ) more ", 8, ")", 22),
("(", 48, "hello.", 49, ")", 55)]]
self.assertResultsEqual(
nested_search_in_between,
{(begin_pattern,
end_pattern,
test_string,
0,
True,
use_regex): [InBetweenMatch.from_values(*args)
for args in result]
for test_string, result in zip(
self.search_in_between_test_strings,
expected_results)
for use_regex, begin_pattern, end_pattern in [
(True, r"\(", r"\)"),
(False,
self.search_in_between_begin_pattern,
self.search_in_between_end_pattern)]},
list)
# Test for special cases that exposed bugs earlier.