Commit 52a87c87 by Alex

coala-utils: Move StringProcessing

parent c43aea47
def limit(iterator, count):
"""
A filter that removes all elements behind the set limit.
:param iterator: The iterator to be filtered.
:param count: The iterator limit. All elements at positions bigger than
this limit are trimmed off. Exclusion: 0 or numbers below
does not limit at all, means the passed iterator is
completely yielded.
"""
if count <= 0: # Performance branch
for elem in iterator:
yield elem
else:
for elem in iterator:
yield elem
count -= 1
if count == 0:
break
def trim_empty_matches(iterator, groups=(0,)):
"""
A filter that removes empty match strings. It can only operate on iterators
whose elements are of type MatchObject.
:param iterator: The iterator to be filtered.
:param groups: An iteratable defining the groups to check for blankness.
Only results are not yielded if all groups of the match
are blank.
You can not only pass numbers but also strings, if your
MatchObject contains named groups.
"""
for elem in iterator:
if any(len(elem.group(group)) > 0 for group in groups):
yield elem
from coala_utils.decorators import generate_ordering, generate_repr
from coala_utils.parsing.StringProcessing import Match
@generate_repr("begin", "inside", "end")
@generate_ordering("begin", "inside", "end")
class InBetweenMatch:
"""
Holds information about a match enclosed by two matches.
"""
def __init__(self, begin, inside, end):
"""
Instantiates a new InBetweenMatch.
:param begin: The ``Match`` of the start pattern.
:param inside: The ``Match`` between start and end.
:param end: The ``Match`` of the end pattern.
"""
if begin > inside or inside > end:
raise ValueError("The inside match must be enclosed by the begin "
"and end match.")
self._begin = begin
self._inside = inside
self._end = end
@classmethod
def from_values(cls, begin, begin_pos, inside, inside_pos, end, end_pos):
"""
Instantiates a new InBetweenMatch from Match values.
This function allows to bypass the usage of Match object instantation:
>>> a = InBetweenMatch(Match("A", 0), Match("B", 1), Match("C", 2))
>>> b = InBetweenMatch.from_values("A", 0, "B", 1, "C", 2)
>>> assert a == b
:param begin: The matched string from start pattern.
:param begin_pos: The position of the matched begin string.
:param inside: The matched string from inside/in-between pattern.
:param inside_pos: The position of the matched inside/in-between
string.
:param end: The matched string from end pattern.
:param end_pos: The position of the matched end string.
:returns: An InBetweenMatch from the given values.
"""
return cls(Match(begin, begin_pos),
Match(inside, inside_pos),
Match(end, end_pos))
@property
def begin(self):
return self._begin
@property
def inside(self):
return self._inside
@property
def end(self):
return self._end
from coala_utils.decorators import generate_ordering, generate_repr
@generate_repr("match", "range")
@generate_ordering("range", "match")
class Match:
"""
Stores information about a single textual match.
"""
def __init__(self, match, position):
"""
Instantiates a new Match.
:param match: The actual matched string.
:param position: The position where the match was found. Starts from
zero.
"""
self._match = match
self._position = position
def __len__(self):
return len(self.match)
def __str__(self):
return self.match
@property
def match(self):
"""
Returns the text matched.
:returns: The text matched.
"""
return self._match
@property
def position(self):
"""
Returns the position where the text was matched (zero-based).
:returns: The position.
"""
return self._position
@property
def end_position(self):
"""
Marks the end position of the matched text (zero-based).
:returns: The end-position.
"""
return len(self) + self.position
@property
def range(self):
"""
Returns the position range where the text was matched.
:returns: A pair indicating the position range. The first element is
the start position, the second one the end position.
"""
return (self.position, self.end_position)
# Start ignoring PyImportSortBear because of dependency chains!
from coala_utils.parsing.StringProcessing.Match import Match
from coala_utils.parsing.StringProcessing.InBetweenMatch import InBetweenMatch
from coala_utils.parsing.StringProcessing.Core import (
search_for, unescaped_search_for, split, unescaped_split,
search_in_between, unescaped_search_in_between, nested_search_in_between,
escape, convert_to_raw, unescape, unescaped_rstrip, unescaped_strip,
position_is_escaped)
# Stop ignoring
from coala_utils.parsing.StringProcessing import convert_to_raw
from tests.parsing.StringProcessing.StringProcessingTestBase import (
StringProcessingTestBase)
class ConvertToRawTest(StringProcessingTestBase):
def test_convert_to_raw(self):
# In (input, output) format
test_data = [
(r"test", r"test"),
(r"test_path", r"test_path"),
(r"test, path", r"test, path"),
(r"test\ path", r"test\ path"),
(r"test\path", r"test\\path"),
(r"test\\path", r"test\\path"),
(r"test\=path", r"test\=path"),
(r"test=path", r"test=path"),
(r"value\=as\something", r"value\=as\\something")]
for test in test_data:
self.assertEqual(convert_to_raw(test[0], ",.=# "), test[1])
from coala_utils.parsing.StringProcessing import escape
from tests.parsing.StringProcessing.StringProcessingTestBase import (
StringProcessingTestBase)
class EscapeTest(StringProcessingTestBase):
# Test escape() using a single character to escape and default parameters.
def test_normal_behaviour(self):
expected_results = [
r"out1 \'escaped-escape: \\ \' out2",
r"out1 \'escaped-quote: \\' \' out2",
r"out1 \'escaped-anything: \X \' out2",
r"out1 \'two escaped escapes: \\\\ \' out2",
r"out1 \'escaped-quote at end: \\'\' out2",
r"out1 \'escaped-escape at end: \\\' out2",
r"out1 \'str1\' out2 \'str2\' out2",
r"out1 \\' \'str1\' out2 \'str2\' out2",
r"out1 \\\\' \'str1\' out2 \'str2\' out2",
r"out1 \\ \'str1\' out2 \'str2\' out2",
r"out1 \\\\ \'str1\' out2 \'str2\' out2",
r"out1 \\\'str1\' out2 \'str2\' out2",
r"out1 \\\\\'str1\' out2 \'str2\' out2",
r"out1 \'str1\'\'str2\'\'str3\' out2",
r"",
r"out1 out2 out3",
self.bs,
2 * self.bs]
self.assertResultsEqual(
escape,
{(test_string, "'"): result
for test_string, result in zip(self.test_strings,
expected_results)})
# Tests escape() with more than one char to escape and an escape sequence
# that consists of more than one char.
def test_advanced(self):
expected_results = [
r"out()1 'e()scaped-e()scape: \\ ' out2",
r"out()1 'e()scaped-quote: \' ' out2",
r"out()1 'e()scaped-anything: \X ' out2",
r"out()1 'two e()scaped e()scape()s: \\\\ ' out2",
r"out()1 'e()scaped-quote at end: \'' out2",
r"out()1 'e()scaped-e()scape at end: \\' out2",
r"out()1 '()str()1' out2 '()str2' out2",
r"out()1 \' '()str()1' out2 '()str2' out2",
r"out()1 \\\' '()str()1' out2 '()str2' out2",
r"out()1 \\ '()str()1' out2 '()str2' out2",
r"out()1 \\\\ '()str()1' out2 '()str2' out2",
r"out()1 \\'()str()1' out2 '()str2' out2",
r"out()1 \\\\'()str()1' out2 '()str2' out2",
r"out()1 '()str()1''()str2''()str()()3' out2",
r"",
r"out()1 out2 out()()3",
self.bs,
2 * self.bs]
self.assertResultsEqual(
escape,
{(test_string, "1s33", "()"): result
for test_string, result in zip(self.test_strings,
expected_results)})
# Tests the realistic case when needing to escape spaces inside a shell
# with carets.
def test_windows_shell_space_escape(self):
expected_results = [
r"out1^ 'escaped-escape:^ ^ ^ ^ ^ ^ ^ ^ \\^ '^ out2",
r"out1^ 'escaped-quote:^ ^ ^ ^ ^ ^ ^ ^ ^ \'^ '^ out2",
r"out1^ 'escaped-anything:^ ^ ^ ^ ^ ^ \X^ '^ out2",
r"out1^ 'two^ escaped^ escapes:^ \\\\^ '^ out2",
r"out1^ 'escaped-quote^ at^ end:^ ^ ^ \''^ out2",
r"out1^ 'escaped-escape^ at^ end:^ ^ \\'^ out2",
r"out1^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ \'^ ^ ^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ \\\'^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ \\^ ^ ^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ \\\\^ ^ ^ ^ ^ ^ 'str1'^ out2^ 'str2'^ out2",
r"out1^ ^ ^ ^ ^ ^ ^ ^ ^ \\'str1'^ out2^ 'str2'^ out2",
r"out1^ ^ ^ ^ ^ ^ ^ \\\\'str1'^ out2^ 'str2'^ out2",
r"out1^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ 'str1''str2''str3'^ out2",
r"",
r"out1^ out2^ out3",
self.bs,
2 * self.bs]
self.assertResultsEqual(
escape,
{(test_string, " ", "^"): result
for test_string, result in zip(self.test_strings,
expected_results)})
# Tests using iterators instead of strings for the chars to escape. This
# allows to escape complete strings and not only chars.
def test_iterators_not_strings(self):
expected_results = [
r"\out1 'escaped-escape: \\ ' out2",
r"\out1 'escaped-quote: \' ' out2",
r"\out1 'escaped-anything: \X ' out2",
r"\out1 'two escaped escapes: \\\\ ' out2",
r"\out1 'escaped-quote at end: \'' out2",
r"\out1 'escaped-escape at end: \\' out2",
r"\out1 'str1' out2 '\str2' out2",
r"\out1 \' 'str1' out2 '\str2' out2",
r"\out1 \\\' 'str1' out2 '\str2' out2",
r"\out1 \\ 'str1' out2 '\str2' out2",
r"\out1 \\\\ 'str1' out2 '\str2' out2",
r"\out1 \\'str1' out2 '\str2' out2",
r"\out1 \\\\'str1' out2 '\str2' out2",
r"\out1 'str1''\str2''str3' out2",
r"",
r"\out1 out2 out3",
self.bs,
2 * self.bs]
self.assertResultsEqual(
escape,
{(test_string, ("out1", "str2")): result
for test_string, result in zip(self.test_strings,
expected_results)})
import unittest
from coala_utils.parsing.StringProcessing import InBetweenMatch, Match
class InBetweenMatchTest(unittest.TestCase):
def test_invalid(self):
self.assertRaises(ValueError,
InBetweenMatch,
Match("a", 10),
Match("b", -1),
Match("c", 12))
self.assertRaises(ValueError,
InBetweenMatch.from_values,
"X",
1,
"QAD",
2,
"LK",
1)
self.assertRaises(ValueError,
InBetweenMatch.from_values,
"1",
50,
"2",
22,
"3",
28)
def test_properties(self):
uut = InBetweenMatch(Match("ABC", 0), Match("DEF", 3), Match("GHI", 6))
self.assertEqual(str(uut.begin), "ABC")
self.assertEqual(uut.begin.position, 0)
self.assertEqual(str(uut.inside), "DEF")
self.assertEqual(uut.inside.position, 3)
self.assertEqual(str(uut.end), "GHI")
self.assertEqual(uut.end.position, 6)
def test_from_values(self):
uut = InBetweenMatch.from_values("hello", 47, "world", 77, "rises", 90)
self.assertEqual(str(uut.begin), "hello")
self.assertEqual(uut.begin.position, 47)
self.assertEqual(str(uut.inside), "world")
self.assertEqual(uut.inside.position, 77)
self.assertEqual(str(uut.end), "rises")
self.assertEqual(uut.end.position, 90)
import unittest
from coala_utils.parsing.StringProcessing.Filters import limit
class LimitTest(unittest.TestCase):
sequence = (1, 5, 19, 22, -3, 18, 99, 500, 2015)
def test_finite(self):
for test_limit in (1, 2, 3, 7, 8, 10, 22, 500000):
self.assertEqual(tuple(limit(self.sequence, test_limit)),
self.sequence[0:test_limit])
def test_infinite(self):
for test_limit in (0, -1, -2, -6555123):
self.assertEqual(tuple(limit(self.sequence, test_limit)),
self.sequence)
import unittest
from coala_utils.parsing.StringProcessing import Match
class MatchTest(unittest.TestCase):
def test_properties(self):
uut = Match("ABC", 0)
self.assertEqual(uut.match, "ABC")
self.assertEqual(str(uut), "ABC")
self.assertEqual(uut.position, 0)
self.assertEqual(uut.end_position, 3)
self.assertEqual(uut.range, (0, 3))
self.assertEqual(len(uut), 3)
def test_properties2(self):
uut = Match("alea iacta est", 48)
self.assertEqual(uut.match, "alea iacta est")
self.assertEqual(str(uut), "alea iacta est")
self.assertEqual(uut.position, 48)
self.assertEqual(uut.end_position, 62)
self.assertEqual(uut.range, (48, 62))
self.assertEqual(len(uut), 14)
from coala_utils.parsing.StringProcessing import (
InBetweenMatch, nested_search_in_between)
from tests.parsing.StringProcessing.StringProcessingTestBase import (
StringProcessingTestBase)
class NestedSearchInBetweenTest(StringProcessingTestBase):
bs = StringProcessingTestBase.bs
test_basic_expected_results = [
[("(", 0, "", 1, ")", 1),
("(", 6, "This is a word", 7, ")", 21),
("(", 25, "(in a word) another ", 26, ")", 46)],
[("(", 4, "((((((((((((((((((1)2)3))))))))))))))))", 5, ")", 44)],
[("(", 6, "do (it ) more ", 7, ")", 21),
("(", 41, "", 42, ")", 42),
("(", 44, "hello.", 45, ")", 51)],
[("(", 0, "", 1, ")", 1),
("(", 8, r"This\ is a word" + bs, 9, ")", 25),
("(", 29, r"(in a\\\ word\\\\\) another " + bs, 30, ")", 59)],
[("(", 5,
r"\(\((((((\\\(((((((((((1)2)3))\\\\\)))))))))))))\)" + bs, 6,
")", 57)],
[("(", 7, "do (it ) more ", 8, ")", 22),
("(", 45, "", 46, ")", 46),
("(", 48, "hello.", 49, ")", 55)]]
# Test the basic functionality of nested_search_in_between().
def test_basic(self):
self.assertResultsEqual(
nested_search_in_between,
{(self.search_in_between_begin_pattern,
self.search_in_between_end_pattern,
test_string,
0,
False,
False): [InBetweenMatch.from_values(*args)
for args in result]
for test_string, result in zip(
self.search_in_between_test_strings,
self.test_basic_expected_results)},
list)
# Test nested_search_in_between() when feeding it with the same begin- and
# end-sequences.
def test_same_pattern(self):
self.assertResultsEqual(
nested_search_in_between,
{(pattern, pattern, test_string, 0, False, False): []
for test_string in self.search_in_between_test_strings
for pattern in [self.search_in_between_begin_pattern,
self.search_in_between_end_pattern]},
list)
# Test nested_search_in_between() for its max_match parameter.
def test_max_match(self):
self.assertResultsEqual(
nested_search_in_between,
{(self.search_in_between_begin_pattern,
self.search_in_between_end_pattern,
test_string,
max_match,
False,
False): [InBetweenMatch.from_values(*args)
for args in result]
for max_match in [1, 2, 5, 22]
for test_string, result in zip(
self.search_in_between_test_strings,
[elem[0:max_match]
for elem in self.test_basic_expected_results])},
list)
# Test nested_search_in_between() with a regex pattern.
def test_regex_pattern(self):
self.assertResultsEqual(
nested_search_in_between,
{(r"(?:)\(", r"\)(?:)", test_string, 0, False, True):
[InBetweenMatch.from_values(*args) for args in result]
for test_string, result in zip(
self.search_in_between_test_strings,
self.test_basic_expected_results)},
list)
# Test nested_search_in_between() for its auto_trim feature.
def test_auto_trim(self):
expected_results = [
[("(", 6, "This is a word", 7, ")", 21),
("(", 25, "(in a word) another ", 26, ")", 46)],
[("(", 4, "((((((((((((((((((1)2)3))))))))))))))))", 5, ")", 44)],
[("(", 6, "do (it ) more ", 7, ")", 21),
("(", 44, "hello.", 45, ")", 51)],
[("(", 8, r"This\ is a word" + self.bs, 9, ")", 25),
("(", 29,
r"(in a\\\ word\\\\\) another " + self.bs, 30,
")", 59)],
[("(",
5,
r"\(\((((((\\\(((((((((((1)2)3))\\\\\)))))))))))))\)" + self.bs,
6,
")",
57)],
[("(", 7, "do (it ) more ", 8, ")", 22),
("(", 48, "hello.", 49, ")", 55)]]
self.assertResultsEqual(
nested_search_in_between,
{(begin_pattern,
end_pattern,
test_string,
0,
True,
use_regex): [InBetweenMatch.from_values(*args)
for args in result]
for test_string, result in zip(
self.search_in_between_test_strings,
expected_results)
for use_regex, begin_pattern, end_pattern in [
(True, r"\(", r"\)"),
(False,
self.search_in_between_begin_pattern,
self.search_in_between_end_pattern)]},
list)
# Test for special cases that exposed bugs earlier.
def test_special(self):
self.assertResultsEqual(
nested_search_in_between,
{("(", ")", "a)b(c", 0, True, False): []},
list)
from coala_utils.parsing.StringProcessing import position_is_escaped
from tests.parsing.StringProcessing.StringProcessingTestBase import (
StringProcessingTestBase)
class PositionIsEscapedTest(StringProcessingTestBase):
# Test the position_is_escaped() function.
def test_basic(self):
expected_results = [
30 * [False] + [True] + 7 * [False],
30 * [False] + [True] + 7 * [False],
30 * [False] + [True] + 7 * [False],
28 * [False] + [True, False, True] + 7 * [False],
31 * [False] + [True] + 6 * [False],
31 * [False] + [True] + 6 * [False],
38 * [False],
6 * [False] + [True] + 31 * [False],
6 * [False] + [True, False, True] + 29 * [False],
6 * [False] + [True] + 31 * [False],
6 * [False] + [True, False, True] + 29 * [False],
14 * [False] + [True] + 23 * [False],
12 * [False] + [True, False, True] + 23 * [False],
38 * [False],
[],
14 * [False],
[False],
[False, True]]
self.assertResultsEqual(
position_is_escaped,
{(test_string, position): result
for test_string, string_result in zip(self.test_strings,
expected_results)
for position, result in zip(range(len(test_string)),
string_result)})
# Test position_is_escaped() with a more special test string.
def test_extended(self):
test_string = r"\\\\\abcabccba###\\13q4ujsabbc\+'**'ac###.#.####-ba"
result_dict = {
0: False,
1: True,
2: False,
3: True,
4: False,
5: True,
6: False,
7: False,
17: False,
18: True,
19: False,
30: False,
31: True,
50: False,
51: False,
6666666: False,
-1: False,
-20: True,
-21: False}
self.assertResultsEqual(
position_is_escaped,
{(test_string, position): result
for position, result in result_dict.items()})
from coala_utils.parsing.StringProcessing import search_for
from tests.parsing.StringProcessing.StringProcessingTestBase import (
StringProcessingTestBase)
class SearchForTest(StringProcessingTestBase):
# Match either "out1" or "out2".
test_basic_pattern = "out1|out2"
# These are the expected results for the zero-group of the
# returned MatchObject's.
test_basic_expected_results = [
[r"out1", r"out2"],
[r"out1", r"out2"],
[r"out1", r"out2"],
[r"out1", r"out2"],
[r"out1", r"out2"],
[r"out1", r"out2"],
[r"out1", r"out2", r"out2"],
[r"out1", r"out2", r"out2"],
[r"out1", r"out2", r"out2"],
[r"out1", r"out2", r"out2"],
[r"out1", r"out2", r"out2"],
[r"out1", r"out2", r"out2"],
[r"out1", r"out2", r"out2"],
[r"out1", r"out2"],
[],
[r"out1", r"out2"],
[],
[]]
@staticmethod
def list_zero_group(it):
"""
Collects all MatchObject elements from the given iterator and extracts
their first matching group (group 0).
:param it: The input iterator where to collect from.
"""
return [elem.group(0) for elem in it]
# Test the search_for() function.
def test_basic(self):
expected_results = self.test_basic_expected_results
self.assertResultsEqual(
search_for,
{(self.test_basic_pattern, test_string, 0, 0, True): result
for test_string, result in zip(self.test_strings,
expected_results)},
self.list_zero_group)
# Test search_for() with a simple pattern.
def test_simple_pattern(self):
expected_results = [
i * [r"'"] for i in
[2, 3, 2, 2, 3, 2, 4, 5, 5, 4, 4, 4, 4, 6, 0, 0, 0, 0]]
self.assertResultsEqual(
search_for,
{(r"'", test_string, 0, 0, use_regex): result
for test_string, result in zip(self.test_strings,
expected_results)
for use_regex in [True, False]},
self.list_zero_group)
# Test search_for() with an empty pattern.
def test_empty_pattern(self):
expected_results = [
(len(elem) + 1) * [r""] for elem in self.test_strings]
self.assertResultsEqual(
search_for,
{(r"", test_string, 0, 0, use_regex): result
for test_string, result in zip(self.test_strings,
expected_results)
for use_regex in [True, False]},