Skip to content

Core Modules

Explainer Module

rexplain.core.explainer

RegexExplainer

Provides human-readable explanations for regex patterns.

Source code in src/rexplain/core/explainer.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
class RegexExplainer:
    """
    Provides human-readable explanations for regex patterns.
    """
    def explain(self, pattern: str, flags: int = 0) -> str:
        r"""
        Explain a regex pattern as a formatted, line-by-line string.

        Args:
            pattern (str): The regex pattern to explain.
            flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

        Returns:
            str: A line-by-line explanation of the regex pattern.
        """
        from .parser import RegexParser
        ast = RegexParser().parse(pattern, flags=flags)
        return explain(ast)

explain(pattern, flags=0)

Explain a regex pattern as a formatted, line-by-line string.

Parameters:

Name Type Description Default
pattern str

The regex pattern to explain.

required
flags int

Regex flags (e.g., re.IGNORECASE). Defaults to 0.

0

Returns:

Name Type Description
str str

A line-by-line explanation of the regex pattern.

Source code in src/rexplain/core/explainer.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def explain(self, pattern: str, flags: int = 0) -> str:
    r"""
    Explain a regex pattern as a formatted, line-by-line string.

    Args:
        pattern (str): The regex pattern to explain.
        flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

    Returns:
        str: A line-by-line explanation of the regex pattern.
    """
    from .parser import RegexParser
    ast = RegexParser().parse(pattern, flags=flags)
    return explain(ast)

explain(ast)

Return a line-by-line, context-aware explanation of the regex AST.

Parameters:

Name Type Description Default
ast RegexAST

The root node of the regex AST.

required

Returns:

Name Type Description
str str

A formatted, line-by-line explanation of the regex pattern.

Source code in src/rexplain/core/explainer.py
147
148
149
150
151
152
153
154
155
156
157
158
def explain(ast: RegexAST) -> str:
    r"""
    Return a line-by-line, context-aware explanation of the regex AST.

    Args:
        ast (RegexAST): The root node of the regex AST.

    Returns:
        str: A formatted, line-by-line explanation of the regex pattern.
    """
    lines = _token_and_explanation(ast)
    return '\n'.join(lines)

Generator Module

rexplain.core.generator

ExampleGenerator

Generates example strings that match a given regex pattern using the AST.

Source code in src/rexplain/core/generator.py
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
class ExampleGenerator:
    """
    Generates example strings that match a given regex pattern using the AST.
    """
    def __init__(self):
        """
        Initialize the ExampleGenerator.
        """
        self.parser = RegexParser()
        # For negated char classes, pick from this set
        self.default_charset = [chr(i) for i in range(32, 127)]

    def generate(self, pattern: str, count: int = 3, flags: int = 0) -> List[str]:
        """
        Generate a list of example strings that match the given regex pattern.

        Args:
            pattern (str): The regex pattern.
            count (int, optional): Number of examples to generate. Defaults to 3.
            flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

        Returns:
            List[str]: Example strings matching the pattern.
        """
        ast = self.parser.parse(pattern, flags=flags)
        # For alternations, try to cover all branches if possible
        if isinstance(ast, Alternation) and count <= len(ast.options):
            return [self._generate_from_ast(opt) for opt in ast.options[:count]]
        # Special handling for anchored patterns: only generate the exact match
        if self._is_fully_anchored(ast):
            return [self._generate_from_ast(ast)] * count
        return [self._generate_from_ast(ast) for _ in range(count)]

    def _is_fully_anchored(self, ast: RegexAST) -> bool:
        # Returns True if the pattern is ^...$ (fully anchored)
        if isinstance(ast, Sequence):
            elements = ast.elements
            if len(elements) >= 2 and isinstance(elements[0], Anchor) and elements[0].value == '^' and \
               isinstance(elements[-1], Anchor) and elements[-1].value == '$':
                return True
            if len(elements) == 1 and isinstance(elements[0], Anchor):
                return True
        if isinstance(ast, Anchor):
            return True
        return False

    def _generate_from_ast(self, ast: RegexAST) -> str:
        if isinstance(ast, Literal):
            return ast.value
        elif isinstance(ast, CharClass):
            chars, negated = self._parse_char_class(ast.value)
            if negated:
                candidates = [c for c in self.default_charset if c not in chars]
                return random.choice(candidates) if candidates else '?'
            else:
                return random.choice(chars) if chars else ''
        elif isinstance(ast, Escape):
            # Map escapes to representative characters
            escape_map = {
                r'\d': lambda: str(random.randint(0, 9)),
                r'\w': lambda: random.choice('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'),
                r'\s': lambda: random.choice([' ', '\t', '\n']),
                r'\D': lambda: random.choice([c for c in self.default_charset if not c.isdigit()]),
                r'\W': lambda: random.choice([c for c in self.default_charset if not (c.isalnum() or c == '_')]),
                r'\S': lambda: random.choice([c for c in self.default_charset if not c.isspace()]),
                r'\\': lambda: '\\',
                r'\n': lambda: '\n',
                r'\t': lambda: '\t',
                r'\r': lambda: '\r',
                r'\b': lambda: '',  # word boundary, ignore in generation
                r'\B': lambda: '',  # non-word boundary, ignore
            }
            # Unicode/hex escapes
            if ast.value.startswith(r'\u') and len(ast.value) == 6:
                try:
                    codepoint = int(ast.value[2:], 16)
                    return chr(codepoint)
                except Exception:
                    return '?'
            if ast.value.startswith(r'\x') and len(ast.value) == 4:
                try:
                    codepoint = int(ast.value[2:], 16)
                    return chr(codepoint)
                except Exception:
                    return '?'
            return escape_map.get(ast.value, lambda: '?')()
        elif isinstance(ast, Quantifier):
            min_n, max_n = self._parse_quant(ast.quant)
            # For larger ranges, limit max_n for practicality
            max_n = min(max_n, 8)
            n = random.randint(min_n, max_n)
            return ''.join(self._generate_from_ast(ast.child) for _ in range(n))
        elif isinstance(ast, Anchor):
            # Anchors do not produce characters
            return ''
        elif isinstance(ast, Sequence):
            # If fully anchored, only generate the inner content
            if self._is_fully_anchored(ast):
                elements = ast.elements
                # Remove ^ and $ anchors
                inner = elements[1:-1]
                return ''.join(self._generate_from_ast(e) for e in inner)
            # Recursively generate for each element
            return ''.join(self._generate_from_ast(e) for e in ast.elements)
        elif isinstance(ast, Alternation):
            # Randomly pick one option, support nested alternations
            option = random.choice(ast.options)
            return self._generate_from_ast(option)
        elif isinstance(ast, Group):
            # For lookahead/lookbehind, do not generate any characters
            if ast.group_type in {'GROUP_LOOKAHEAD', 'GROUP_NEG_LOOKAHEAD', 'GROUP_LOOKBEHIND', 'GROUP_NEG_LOOKBEHIND'}:
                return ''
            # Recursively generate for each child (supports nested groups)
            return ''.join(self._generate_from_ast(child) for child in ast.children)
        else:
            return ''

    def _parse_char_class(self, class_str: str) -> Tuple[List[str], bool]:
        # Enhanced char class parser: supports negation and ranges
        chars = []
        negated = False
        if class_str.startswith('[') and class_str.endswith(']'):
            inner = class_str[1:-1]
            if inner.startswith('^'):
                negated = True
                inner = inner[1:]
            i = 0
            while i < len(inner):
                if i+2 < len(inner) and inner[i+1] == '-':
                    # Range
                    start, end = inner[i], inner[i+2]
                    chars.extend([chr(c) for c in range(ord(start), ord(end)+1)])
                    i += 3
                else:
                    chars.append(inner[i])
                    i += 1
        return chars, negated

    def _parse_quant(self, quant: str) -> Tuple[int, int]:
        # Returns (min, max) for quantifier
        if quant == '*':
            return (0, 4)
        elif quant == '+':
            return (1, 4)
        elif quant == '?':
            return (0, 1)
        elif quant.endswith('?'):
            # Non-greedy, treat as normal
            return self._parse_quant(quant[:-1])
        elif quant.startswith('{'):
            import re
            m = re.match(r'\{(\d+)(,(\d*)?)?\}', quant)
            if m:
                n1 = int(m.group(1))
                n2 = m.group(3)
                if n2 == '' or n2 is None:
                    return (n1, n1)
                elif n2:
                    return (n1, int(n2) if n2.isdigit() else n1+4)
            return (1, 1)
        else:
            return (1, 1)

__init__()

Initialize the ExampleGenerator.

Source code in src/rexplain/core/generator.py
 9
10
11
12
13
14
15
def __init__(self):
    """
    Initialize the ExampleGenerator.
    """
    self.parser = RegexParser()
    # For negated char classes, pick from this set
    self.default_charset = [chr(i) for i in range(32, 127)]

generate(pattern, count=3, flags=0)

Generate a list of example strings that match the given regex pattern.

Parameters:

Name Type Description Default
pattern str

The regex pattern.

required
count int

Number of examples to generate. Defaults to 3.

3
flags int

Regex flags (e.g., re.IGNORECASE). Defaults to 0.

0

Returns:

Type Description
List[str]

List[str]: Example strings matching the pattern.

Source code in src/rexplain/core/generator.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def generate(self, pattern: str, count: int = 3, flags: int = 0) -> List[str]:
    """
    Generate a list of example strings that match the given regex pattern.

    Args:
        pattern (str): The regex pattern.
        count (int, optional): Number of examples to generate. Defaults to 3.
        flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

    Returns:
        List[str]: Example strings matching the pattern.
    """
    ast = self.parser.parse(pattern, flags=flags)
    # For alternations, try to cover all branches if possible
    if isinstance(ast, Alternation) and count <= len(ast.options):
        return [self._generate_from_ast(opt) for opt in ast.options[:count]]
    # Special handling for anchored patterns: only generate the exact match
    if self._is_fully_anchored(ast):
        return [self._generate_from_ast(ast)] * count
    return [self._generate_from_ast(ast) for _ in range(count)]

Parser Module

rexplain.core.parser

Alternation dataclass

Bases: RegexAST

Represents alternation, e.g., a|b|c.

Source code in src/rexplain/core/parser.py
66
67
68
69
70
71
@dataclass
class Alternation(RegexAST):
    """
    Represents alternation, e.g., a|b|c.
    """
    options: List[RegexAST]

Anchor dataclass

Bases: RegexAST

Represents anchors like ^, $, \b, etc.

Source code in src/rexplain/core/parser.py
52
53
54
55
56
57
@dataclass
class Anchor(RegexAST):
    r"""
    Represents anchors like ^, $, \b, etc.
    """
    value: str

CharClass dataclass

Bases: RegexAST

Represents a character class, e.g., [a-z] or [^abc].

Source code in src/rexplain/core/parser.py
26
27
28
29
30
31
@dataclass
class CharClass(RegexAST):
    """
    Represents a character class, e.g., [a-z] or [^abc].
    """
    value: str  # The raw class string, e.g., '[a-z]'

Escape dataclass

Bases: RegexAST

Represents escape sequences like \d, \w, etc.

Source code in src/rexplain/core/parser.py
59
60
61
62
63
64
@dataclass
class Escape(RegexAST):
    r"""
    Represents escape sequences like \d, \w, etc.
    """
    value: str

Group dataclass

Bases: RegexAST

Represents a group (capturing, non-capturing, named, lookahead, etc.).

Source code in src/rexplain/core/parser.py
33
34
35
36
37
38
39
40
41
42
@dataclass
class Group(RegexAST):
    """
    Represents a group (capturing, non-capturing, named, lookahead, etc.).
    """
    group_type: str  # 'capturing', 'noncap', 'named', 'lookahead', etc.
    children: List[RegexAST]
    name: Optional[str] = None  # For named groups
    flags: Optional[str] = None  # For inline/scoped flags
    condition: Optional[str] = None  # For conditional expressions

Literal dataclass

Bases: RegexAST

Represents a literal character in the regex.

Source code in src/rexplain/core/parser.py
19
20
21
22
23
24
@dataclass
class Literal(RegexAST):
    """
    Represents a literal character in the regex.
    """
    value: str

Quantifier dataclass

Bases: RegexAST

Represents a quantifier applied to a subpattern, e.g., a*, b{2,3}.

Source code in src/rexplain/core/parser.py
44
45
46
47
48
49
50
@dataclass
class Quantifier(RegexAST):
    """
    Represents a quantifier applied to a subpattern, e.g., a*, b{2,3}.
    """
    child: RegexAST
    quant: str  # '*', '+', '?', '{n}', '{n,m}', etc.

RegexAST dataclass

Base class for all AST nodes representing regex components.

Source code in src/rexplain/core/parser.py
 5
 6
 7
 8
 9
10
@dataclass
class RegexAST:
    """
    Base class for all AST nodes representing regex components.
    """
    pass

RegexParser

Parses a regex string into an abstract syntax tree (AST).

Source code in src/rexplain/core/parser.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
class RegexParser:
    """
    Parses a regex string into an abstract syntax tree (AST).
    """
    def parse(self, pattern: str, flags: int = 0) -> RegexAST:
        r"""
        Parse a regex pattern string into an AST.

        Args:
            pattern (str): The regex pattern to parse.
            flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

        Returns:
            RegexAST: The root node of the parsed regex AST.
        """
        tokens = self.tokenize(pattern, flags)
        self._tokens = tokens
        self._pos = 0
        ast = self._parse_alternation()
        return ast

    def _peek(self):
        if self._pos < len(self._tokens):
            return self._tokens[self._pos]
        return None

    def _advance(self):
        tok = self._peek()
        if tok:
            self._pos += 1
        return tok

    def _parse_alternation(self):
        options = [self._parse_sequence()]
        while self._peek() and self._peek().type == 'SPECIAL' and self._peek().value == '|':
            self._advance()  # skip '|'
            options.append(self._parse_sequence())
        if len(options) == 1:
            return options[0]
        return Alternation(options)

    def _parse_sequence(self):
        elements = []
        while True:
            tok = self._peek()
            if tok is None or (tok.type == 'SPECIAL' and tok.value == '|') or (tok.type == 'GROUP_CLOSE'):
                break
            elements.append(self._parse_quantifier())
        if len(elements) == 1:
            return elements[0]
        return Sequence(elements)

    def _parse_quantifier(self):
        # Always allow quantifiers to apply to any atom, including Anchor
        atom = self._parse_atom()
        tok = self._peek()
        if tok and tok.type == 'QUANTIFIER':
            quant_tok = self._advance()
            # Check for non-greedy quantifier (e.g., *?, +?, ??, {n,m}?)
            next_tok = self._peek()
            if next_tok and next_tok.type == 'SPECIAL' and next_tok.value == '?':
                self._advance()
                quant_str = quant_tok.value + '?'
            else:
                quant_str = quant_tok.value
            return Quantifier(atom, quant_str)
        return atom

    def _parse_atom(self):
        tok = self._peek()
        if tok is None:
            return None
        # Escaped metacharacters as literals
        if tok.type == 'ESCAPE':
            # If it's an escaped metacharacter, treat as Literal
            metachars = {'.', '*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$', '\\'}
            if len(tok.value) == 2 and tok.value[1] in metachars:
                self._advance()
                return Literal(tok.value[1])
            else:
                self._advance()
                return Escape(tok.value)
        elif tok.type == 'LITERAL':
            self._advance()
            return Literal(tok.value)
        elif tok.type == 'CHAR_CLASS':
            self._advance()
            return CharClass(tok.value)
        elif tok.type == 'SPECIAL' and tok.value in {'^', '$'}:
            self._advance()
            return Anchor(tok.value)
        elif tok.type.startswith('GROUP_'):
            return self._parse_group()
        else:
            self._advance()
            return Literal(tok.value)

    def _parse_group(self):
        tok = self._advance()
        group_type = tok.type
        name = None
        flags = None
        condition = None
        # Inline flags: (?i), (?m), (?s), or scoped flags (?i:...)
        if group_type == 'GROUP_FLAGS':
            # Distinguish between inline and scoped flags
            import re
            m = re.match(r'\(\?[a-zA-Z]+([):])', tok.value)
            if m and m.group(1) == ')':
                # Inline flags group, e.g., (?i)
                flags = tok.value[2:-1]  # extract flags between (? and )
                return Group('GROUP_FLAGS', [], None, flags=flags)
            elif m and m.group(1) == ':':
                # Scoped flags group, e.g., (?m:...)
                flags = tok.value[2:-1]  # extract flags between (? and :
                group_type = 'GROUP_FLAGS'
                # Parse group contents until closing paren
                children = []
                if self._peek() and self._peek().type == 'GROUP_CLOSE':
                    self._advance()  # empty group
                    return Group(group_type, children, name, flags, condition)
                children.append(self._parse_alternation())
                if self._peek() and self._peek().type == 'GROUP_CLOSE':
                    self._advance()
                else:
                    raise ValueError('Unclosed group: missing )')
                return Group(group_type, children, name, flags, condition)
        if group_type == 'GROUP_NAMED':
            # Extract group name from value, e.g., (?P<name>
            import re
            m = re.match(r'\(\?P<([^>]+)>', tok.value)
            if m:
                name = m.group(1)  # FIX: should be group(1), not group(2)
        # For lookahead/lookbehind/noncap/flags/conditional and other group types, parse contents then expect GROUP_CLOSE
        children = []
        if group_type in {'GROUP_LOOKAHEAD', 'GROUP_NEG_LOOKAHEAD', 'GROUP_LOOKBEHIND', 'GROUP_NEG_LOOKBEHIND', 'GROUP_NONCAP', 'GROUP_FLAGS', 'GROUP_CONDITIONAL', 'GROUP_NAMED'}:
            # Parse group contents until closing paren
            if self._peek() and self._peek().type == 'GROUP_CLOSE':
                self._advance()  # empty group
                return Group(group_type, children, name, flags, condition)
            children.append(self._parse_alternation())
            if self._peek() and self._peek().type == 'GROUP_CLOSE':
                self._advance()
            else:
                raise ValueError('Unclosed group: missing )')
            return Group(group_type, children, name, flags, condition)
        # For capturing groups, parse alternation (may be nested)
        if self._peek() and self._peek().type == 'GROUP_CLOSE':
            self._advance()  # consume ')'
            return Group(group_type, children, name, flags, condition)
        while self._peek() and not (self._peek().type == 'GROUP_CLOSE'):
            children.append(self._parse_alternation())
        if self._peek() and self._peek().type == 'GROUP_CLOSE':
            self._advance()  # consume ')'
        else:
            raise ValueError('Unclosed group: missing )')
        return Group(group_type, children, name, flags, condition)

    def tokenize(self, pattern: str, flags: int = 0) -> List['RegexToken']:
        r"""
        Tokenize a regex pattern string into RegexToken objects, including character classes and groups.

        Args:
            pattern (str): The regex pattern to tokenize.
            flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

        Returns:
            List[RegexToken]: List of tokens representing the regex pattern.
        """
        tokens: List[RegexToken] = []
        i = 0
        special_chars = {'.', '*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$'}
        escape_sequences = {'d', 'w', 's', 'D', 'W', 'S', 'b', 'B', 'A', 'Z', 'G', 'n', 'r', 't', 'v', 'f', '\\', 'u', 'x', 'N'}
        length = len(pattern)
        while i < length:
            c = pattern[i]
            # Character class
            if c == '[':
                start = i
                i += 1
                in_escape = False
                while i < length:
                    if not in_escape and pattern[i] == ']':
                        i += 1
                        break
                    if pattern[i] == '\\' and not in_escape:
                        in_escape = True
                        i += 1
                    else:
                        in_escape = False
                        i += 1
                if i > length or (i == length and (length == 0 or pattern[i-1] != ']')):
                    raise ValueError('Unclosed character class: missing ]')
                tokens.append(RegexToken(type='CHAR_CLASS', value=pattern[start:i]))
            # Group constructs
            elif c == '(':
                if pattern[i:i+3] == '(?:':
                    tokens.append(RegexToken(type='GROUP_NONCAP', value='(?:'))
                    i += 3
                elif pattern[i:i+4] == '(?P<':
                    # Named group: (?P<name>
                    start = i
                    j = i+4
                    while j < length and pattern[j] != '>':
                        j += 1
                    if j < length and pattern[j] == '>':
                        group_str = pattern[start:j+1]
                        tokens.append(RegexToken(type='GROUP_NAMED', value=group_str))
                        i = j+1  # Advance index to after the closing '>'
                    else:
                        tokens.append(RegexToken(type='GROUP_OPEN', value='('))
                        i += 1
                elif pattern[i:i+3] == '(?=':
                    tokens.append(RegexToken(type='GROUP_LOOKAHEAD', value='(?='))
                    i += 3
                elif pattern[i:i+4] == '(?!':
                    tokens.append(RegexToken(type='GROUP_NEG_LOOKAHEAD', value='(?!'))
                    i += 4
                elif pattern[i:i+4] == '(?<=':
                    tokens.append(RegexToken(type='GROUP_LOOKBEHIND', value='(?<='))
                    i += 4
                elif pattern[i:i+5] == '(?<!':
                    tokens.append(RegexToken(type='GROUP_NEG_LOOKBEHIND', value='(?<!'))
                    i += 5
                # Inline flags or conditional expressions
                elif pattern[i:i+2] == '(?':
                    # Could be inline flags, scoped flags, or conditional
                    j = i+2
                    flag_str = ''
                    while j < length and pattern[j] in 'imsxauL':
                        flag_str += pattern[j]
                        j += 1
                    if j < length and pattern[j] == ':':
                        tokens.append(RegexToken(type='GROUP_FLAGS', value=pattern[i:j+1]))
                        i = j+1
                    elif j < length and pattern[j] == ')':
                        tokens.append(RegexToken(type='GROUP_FLAGS', value=pattern[i:j+1]))
                        i = j+1
                    else:
                        tokens.append(RegexToken(type='GROUP_OPEN', value='('))
                        i += 1
                else:
                    tokens.append(RegexToken(type='GROUP_OPEN', value='('))
                    i += 1
            elif c == ')':
                tokens.append(RegexToken(type='GROUP_CLOSE', value=')'))
                i += 1
            # Quantifier braces
            elif c == '{':
                start = i
                i += 1
                while i < length and pattern[i] != '}':
                    i += 1
                if i < length and pattern[i] == '}':
                    i += 1
                else:
                    raise ValueError('Unclosed quantifier braces: missing }')
                tokens.append(RegexToken(type='QUANTIFIER', value=pattern[start:i]))
            # Quantifiers *, +, ?
            elif c in {'*', '+', '?'}:
                tokens.append(RegexToken(type='QUANTIFIER', value=c))
                i += 1
            # Escape sequences (including Unicode/ASCII/Named)
            elif c == '\\':
                if i + 1 < length:
                    next_c = pattern[i+1]
                    if next_c in escape_sequences:
                        # Unicode: \uXXXX, ASCII: \xXX, Named: \N{...}
                        if next_c == 'u' and i+5 < length:
                            tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+6]))
                            i += 6
                        elif next_c == 'x' and i+3 < length:
                            tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+4]))
                            i += 4
                        elif next_c == 'N' and i+2 < length and pattern[i+2] == '{':
                            j = i+3
                            while j < length and pattern[j] != '}':
                                j += 1
                            if j < length and pattern[j] == '}':
                                tokens.append(RegexToken(type='ESCAPE', value=pattern[i:j+1]))
                                i = j+1
                            else:
                                tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+2]))
                                i += 2
                        else:
                            tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+2]))
                            i += 2
                    else:
                        tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+2]))
                        i += 2
                else:
                    tokens.append(RegexToken(type='ESCAPE', value=c))
                    i += 1
            # Specials (other than quantifiers)
            elif c in special_chars:
                tokens.append(RegexToken(type='SPECIAL', value=c))
                i += 1
            # Literals
            else:
                tokens.append(RegexToken(type='LITERAL', value=c))
                i += 1
        return tokens

parse(pattern, flags=0)

Parse a regex pattern string into an AST.

Parameters:

Name Type Description Default
pattern str

The regex pattern to parse.

required
flags int

Regex flags (e.g., re.IGNORECASE). Defaults to 0.

0

Returns:

Name Type Description
RegexAST RegexAST

The root node of the parsed regex AST.

Source code in src/rexplain/core/parser.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def parse(self, pattern: str, flags: int = 0) -> RegexAST:
    r"""
    Parse a regex pattern string into an AST.

    Args:
        pattern (str): The regex pattern to parse.
        flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

    Returns:
        RegexAST: The root node of the parsed regex AST.
    """
    tokens = self.tokenize(pattern, flags)
    self._tokens = tokens
    self._pos = 0
    ast = self._parse_alternation()
    return ast

tokenize(pattern, flags=0)

Tokenize a regex pattern string into RegexToken objects, including character classes and groups.

Parameters:

Name Type Description Default
pattern str

The regex pattern to tokenize.

required
flags int

Regex flags (e.g., re.IGNORECASE). Defaults to 0.

0

Returns:

Type Description
List[RegexToken]

List[RegexToken]: List of tokens representing the regex pattern.

Source code in src/rexplain/core/parser.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
def tokenize(self, pattern: str, flags: int = 0) -> List['RegexToken']:
    r"""
    Tokenize a regex pattern string into RegexToken objects, including character classes and groups.

    Args:
        pattern (str): The regex pattern to tokenize.
        flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

    Returns:
        List[RegexToken]: List of tokens representing the regex pattern.
    """
    tokens: List[RegexToken] = []
    i = 0
    special_chars = {'.', '*', '+', '?', '|', '(', ')', '[', ']', '{', '}', '^', '$'}
    escape_sequences = {'d', 'w', 's', 'D', 'W', 'S', 'b', 'B', 'A', 'Z', 'G', 'n', 'r', 't', 'v', 'f', '\\', 'u', 'x', 'N'}
    length = len(pattern)
    while i < length:
        c = pattern[i]
        # Character class
        if c == '[':
            start = i
            i += 1
            in_escape = False
            while i < length:
                if not in_escape and pattern[i] == ']':
                    i += 1
                    break
                if pattern[i] == '\\' and not in_escape:
                    in_escape = True
                    i += 1
                else:
                    in_escape = False
                    i += 1
            if i > length or (i == length and (length == 0 or pattern[i-1] != ']')):
                raise ValueError('Unclosed character class: missing ]')
            tokens.append(RegexToken(type='CHAR_CLASS', value=pattern[start:i]))
        # Group constructs
        elif c == '(':
            if pattern[i:i+3] == '(?:':
                tokens.append(RegexToken(type='GROUP_NONCAP', value='(?:'))
                i += 3
            elif pattern[i:i+4] == '(?P<':
                # Named group: (?P<name>
                start = i
                j = i+4
                while j < length and pattern[j] != '>':
                    j += 1
                if j < length and pattern[j] == '>':
                    group_str = pattern[start:j+1]
                    tokens.append(RegexToken(type='GROUP_NAMED', value=group_str))
                    i = j+1  # Advance index to after the closing '>'
                else:
                    tokens.append(RegexToken(type='GROUP_OPEN', value='('))
                    i += 1
            elif pattern[i:i+3] == '(?=':
                tokens.append(RegexToken(type='GROUP_LOOKAHEAD', value='(?='))
                i += 3
            elif pattern[i:i+4] == '(?!':
                tokens.append(RegexToken(type='GROUP_NEG_LOOKAHEAD', value='(?!'))
                i += 4
            elif pattern[i:i+4] == '(?<=':
                tokens.append(RegexToken(type='GROUP_LOOKBEHIND', value='(?<='))
                i += 4
            elif pattern[i:i+5] == '(?<!':
                tokens.append(RegexToken(type='GROUP_NEG_LOOKBEHIND', value='(?<!'))
                i += 5
            # Inline flags or conditional expressions
            elif pattern[i:i+2] == '(?':
                # Could be inline flags, scoped flags, or conditional
                j = i+2
                flag_str = ''
                while j < length and pattern[j] in 'imsxauL':
                    flag_str += pattern[j]
                    j += 1
                if j < length and pattern[j] == ':':
                    tokens.append(RegexToken(type='GROUP_FLAGS', value=pattern[i:j+1]))
                    i = j+1
                elif j < length and pattern[j] == ')':
                    tokens.append(RegexToken(type='GROUP_FLAGS', value=pattern[i:j+1]))
                    i = j+1
                else:
                    tokens.append(RegexToken(type='GROUP_OPEN', value='('))
                    i += 1
            else:
                tokens.append(RegexToken(type='GROUP_OPEN', value='('))
                i += 1
        elif c == ')':
            tokens.append(RegexToken(type='GROUP_CLOSE', value=')'))
            i += 1
        # Quantifier braces
        elif c == '{':
            start = i
            i += 1
            while i < length and pattern[i] != '}':
                i += 1
            if i < length and pattern[i] == '}':
                i += 1
            else:
                raise ValueError('Unclosed quantifier braces: missing }')
            tokens.append(RegexToken(type='QUANTIFIER', value=pattern[start:i]))
        # Quantifiers *, +, ?
        elif c in {'*', '+', '?'}:
            tokens.append(RegexToken(type='QUANTIFIER', value=c))
            i += 1
        # Escape sequences (including Unicode/ASCII/Named)
        elif c == '\\':
            if i + 1 < length:
                next_c = pattern[i+1]
                if next_c in escape_sequences:
                    # Unicode: \uXXXX, ASCII: \xXX, Named: \N{...}
                    if next_c == 'u' and i+5 < length:
                        tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+6]))
                        i += 6
                    elif next_c == 'x' and i+3 < length:
                        tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+4]))
                        i += 4
                    elif next_c == 'N' and i+2 < length and pattern[i+2] == '{':
                        j = i+3
                        while j < length and pattern[j] != '}':
                            j += 1
                        if j < length and pattern[j] == '}':
                            tokens.append(RegexToken(type='ESCAPE', value=pattern[i:j+1]))
                            i = j+1
                        else:
                            tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+2]))
                            i += 2
                    else:
                        tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+2]))
                        i += 2
                else:
                    tokens.append(RegexToken(type='ESCAPE', value=pattern[i:i+2]))
                    i += 2
            else:
                tokens.append(RegexToken(type='ESCAPE', value=c))
                i += 1
        # Specials (other than quantifiers)
        elif c in special_chars:
            tokens.append(RegexToken(type='SPECIAL', value=c))
            i += 1
        # Literals
        else:
            tokens.append(RegexToken(type='LITERAL', value=c))
            i += 1
    return tokens

RegexToken dataclass

Represents a single regex component (token) in the pattern.

Source code in src/rexplain/core/parser.py
376
377
378
379
380
381
382
@dataclass
class RegexToken:
    """
    Represents a single regex component (token) in the pattern.
    """
    type: str
    value: str

Sequence dataclass

Bases: RegexAST

Represents a sequence of regex elements (e.g., abcd).

Source code in src/rexplain/core/parser.py
12
13
14
15
16
17
@dataclass
class Sequence(RegexAST):
    """
    Represents a sequence of regex elements (e.g., abcd).
    """
    elements: List[RegexAST]

Tester Module

rexplain.core.tester

MatchResult dataclass

Represents the result of testing a string against a regex pattern.

Attributes:

Name Type Description
matches bool

Whether the string fully matches the pattern.

reason str

Explanation of the match or failure.

failed_at Optional[int]

Index where the match failed, if applicable.

partial_matches Optional[List[str]]

List of partial matches, if any.

Source code in src/rexplain/core/tester.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
@dataclass
class MatchResult:
    """
    Represents the result of testing a string against a regex pattern.

    Attributes:
        matches (bool): Whether the string fully matches the pattern.
        reason (str): Explanation of the match or failure.
        failed_at (Optional[int]): Index where the match failed, if applicable.
        partial_matches (Optional[List[str]]): List of partial matches, if any.
    """
    matches: bool
    reason: str
    failed_at: Optional[int] = None
    partial_matches: Optional[List[str]] = None

    def __str__(self):
        return (
            f"MatchResult(matches={self.matches}, reason=\"{self.reason}\", "
            f"failed_at={self.failed_at}, partial_matches={self.partial_matches})"
        )

RegexTester

Tests if a string matches a regex pattern and provides detailed feedback.

Source code in src/rexplain/core/tester.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
class RegexTester:
    """
    Tests if a string matches a regex pattern and provides detailed feedback.
    """
    def test(self, pattern: str, test_string: str, flags: int = 0) -> MatchResult:
        r"""
        Test if a string matches a regex pattern and explain why/why not.

        Args:
            pattern (str): The regex pattern.
            test_string (str): The string to test.
            flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

        Returns:
            MatchResult: Result object with match status and explanation.
        """
        prog = re.compile(pattern, flags)
        m = prog.fullmatch(test_string)
        if m:
            return MatchResult(matches=True, reason="Full match.")

        # Try to use the parser for step-by-step analysis
        try:
            from .parser import RegexParser, Literal, CharClass, Escape, Sequence
            ast = RegexParser().parse(pattern, flags=flags)
            # Only handle simple sequences of literals/char classes for now
            if isinstance(ast, Sequence):
                elements = ast.elements
            else:
                elements = [ast]
            i = 0
            j = 0
            details = []
            while i < len(elements) and j < len(test_string):
                node = elements[i]
                c = test_string[j]
                if isinstance(node, Literal):
                    if c == node.value:
                        details.append(f"{c!r} matches literal '{node.value}' at position {j}")
                        i += 1
                        j += 1
                    else:
                        reason = (f"Failed at position {j}: expected literal '{node.value}', got '{c}'")
                        return MatchResult(
                            matches=False,
                            reason=reason,
                            failed_at=j,
                            partial_matches=[test_string[:j]] if j > 0 else []
                        )
                elif isinstance(node, CharClass):
                    import re as _re
                    charclass = node.value
                    # Remove brackets for eval
                    pattern = charclass
                    if pattern.startswith('[') and pattern.endswith(']'):
                        pattern = pattern[1:-1]
                    # Build a regex for the char class
                    charclass_re = _re.compile(f"[{pattern}]")
                    if charclass_re.fullmatch(c):
                        details.append(f"{c!r} matches character class {node.value} at position {j}")
                        i += 1
                        j += 1
                    else:
                        reason = (f"Failed at position {j}: expected character in {node.value}, got '{c}'")
                        return MatchResult(
                            matches=False,
                            reason=reason,
                            failed_at=j,
                            partial_matches=[test_string[:j]] if j > 0 else []
                        )
                elif isinstance(node, Escape):
                    import re as _re
                    esc = node.value
                    esc_re = _re.compile(esc)
                    display_esc = esc  # Always show as written (e.g., '\d')
                    if esc_re.fullmatch(c):
                        details.append(f"{c!r} matches escape {display_esc} at position {j}")
                        i += 1
                        j += 1
                    else:
                        reason = (f"Failed at position {j}: expected {display_esc}, got '{c}'")
                        return MatchResult(
                            matches=False,
                            reason=reason,
                            failed_at=j,
                            partial_matches=[test_string[:j]] if j > 0 else []
                        )
                else:
                    # For now, fallback to regex engine for complex nodes
                    break
            # If we finished all pattern elements but string is too short
            if i < len(elements):
                reason = f"String too short: expected more input for pattern element {elements[i]} at position {j}"
                return MatchResult(
                    matches=False,
                    reason=reason,
                    failed_at=j,
                    partial_matches=[test_string[:j]] if j > 0 else []
                )
            # If we finished all pattern elements but string is too long
            if j < len(test_string):
                reason = f"String too long: extra input '{test_string[j:]}' at position {j}"
                return MatchResult(
                    matches=False,
                    reason=reason,
                    failed_at=j,
                    partial_matches=[test_string[:j]] if j > 0 else []
                )
        except Exception as e:
            # Fallback to regex engine for complex patterns or parser errors
            pass

        # Fallback: original logic
        # Check if pattern is a literal (no regex metacharacters)
        if not re.search(r'[.^$*+?{}\[\]|()]', pattern):
            # Literal pattern: compare character by character
            match_len = 0
            for c1, c2 in zip(pattern, test_string):
                if c1 == c2:
                    match_len += 1
                else:
                    break
            failed_at = match_len
            reason = (
                f"Match failed at position {failed_at}: unexpected character '{test_string[failed_at]}'"
                if failed_at < len(test_string)
                else "String too short."
            )
            partial_matches = [test_string[:match_len]] if match_len > 0 else []
            return MatchResult(
                matches=False,
                reason=reason,
                failed_at=failed_at,
                partial_matches=partial_matches
            )
        # Regex pattern: use current logic
        longest = 0
        for i in range(1, len(test_string) + 1):
            m = prog.fullmatch(test_string[:i])
            if m:
                longest = i
        if longest > 0:
            failed_at = None
            for i, (c1, c2) in enumerate(zip(pattern, test_string)):
                if c1 != c2:
                    failed_at = i
                    break
            if failed_at is None:
                failed_at = min(len(pattern), len(test_string))
            reason = (
                f"Match failed at position {failed_at}: unexpected character '{test_string[failed_at]}'"
                if failed_at < len(test_string)
                else "String too short."
            )
            return MatchResult(
                matches=False,
                reason=reason,
                failed_at=failed_at,
                partial_matches=[test_string[:longest]]
            )
        failed_at = 0
        for i, (c1, c2) in enumerate(zip(pattern, test_string)):
            if c1 != c2:
                failed_at = i
                break
        else:
            failed_at = min(len(pattern), len(test_string))
        return MatchResult(matches=False, reason="No match at all.", failed_at=failed_at, partial_matches=[])

test(pattern, test_string, flags=0)

Test if a string matches a regex pattern and explain why/why not.

Parameters:

Name Type Description Default
pattern str

The regex pattern.

required
test_string str

The string to test.

required
flags int

Regex flags (e.g., re.IGNORECASE). Defaults to 0.

0

Returns:

Name Type Description
MatchResult MatchResult

Result object with match status and explanation.

Source code in src/rexplain/core/tester.py
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
def test(self, pattern: str, test_string: str, flags: int = 0) -> MatchResult:
    r"""
    Test if a string matches a regex pattern and explain why/why not.

    Args:
        pattern (str): The regex pattern.
        test_string (str): The string to test.
        flags (int, optional): Regex flags (e.g., re.IGNORECASE). Defaults to 0.

    Returns:
        MatchResult: Result object with match status and explanation.
    """
    prog = re.compile(pattern, flags)
    m = prog.fullmatch(test_string)
    if m:
        return MatchResult(matches=True, reason="Full match.")

    # Try to use the parser for step-by-step analysis
    try:
        from .parser import RegexParser, Literal, CharClass, Escape, Sequence
        ast = RegexParser().parse(pattern, flags=flags)
        # Only handle simple sequences of literals/char classes for now
        if isinstance(ast, Sequence):
            elements = ast.elements
        else:
            elements = [ast]
        i = 0
        j = 0
        details = []
        while i < len(elements) and j < len(test_string):
            node = elements[i]
            c = test_string[j]
            if isinstance(node, Literal):
                if c == node.value:
                    details.append(f"{c!r} matches literal '{node.value}' at position {j}")
                    i += 1
                    j += 1
                else:
                    reason = (f"Failed at position {j}: expected literal '{node.value}', got '{c}'")
                    return MatchResult(
                        matches=False,
                        reason=reason,
                        failed_at=j,
                        partial_matches=[test_string[:j]] if j > 0 else []
                    )
            elif isinstance(node, CharClass):
                import re as _re
                charclass = node.value
                # Remove brackets for eval
                pattern = charclass
                if pattern.startswith('[') and pattern.endswith(']'):
                    pattern = pattern[1:-1]
                # Build a regex for the char class
                charclass_re = _re.compile(f"[{pattern}]")
                if charclass_re.fullmatch(c):
                    details.append(f"{c!r} matches character class {node.value} at position {j}")
                    i += 1
                    j += 1
                else:
                    reason = (f"Failed at position {j}: expected character in {node.value}, got '{c}'")
                    return MatchResult(
                        matches=False,
                        reason=reason,
                        failed_at=j,
                        partial_matches=[test_string[:j]] if j > 0 else []
                    )
            elif isinstance(node, Escape):
                import re as _re
                esc = node.value
                esc_re = _re.compile(esc)
                display_esc = esc  # Always show as written (e.g., '\d')
                if esc_re.fullmatch(c):
                    details.append(f"{c!r} matches escape {display_esc} at position {j}")
                    i += 1
                    j += 1
                else:
                    reason = (f"Failed at position {j}: expected {display_esc}, got '{c}'")
                    return MatchResult(
                        matches=False,
                        reason=reason,
                        failed_at=j,
                        partial_matches=[test_string[:j]] if j > 0 else []
                    )
            else:
                # For now, fallback to regex engine for complex nodes
                break
        # If we finished all pattern elements but string is too short
        if i < len(elements):
            reason = f"String too short: expected more input for pattern element {elements[i]} at position {j}"
            return MatchResult(
                matches=False,
                reason=reason,
                failed_at=j,
                partial_matches=[test_string[:j]] if j > 0 else []
            )
        # If we finished all pattern elements but string is too long
        if j < len(test_string):
            reason = f"String too long: extra input '{test_string[j:]}' at position {j}"
            return MatchResult(
                matches=False,
                reason=reason,
                failed_at=j,
                partial_matches=[test_string[:j]] if j > 0 else []
            )
    except Exception as e:
        # Fallback to regex engine for complex patterns or parser errors
        pass

    # Fallback: original logic
    # Check if pattern is a literal (no regex metacharacters)
    if not re.search(r'[.^$*+?{}\[\]|()]', pattern):
        # Literal pattern: compare character by character
        match_len = 0
        for c1, c2 in zip(pattern, test_string):
            if c1 == c2:
                match_len += 1
            else:
                break
        failed_at = match_len
        reason = (
            f"Match failed at position {failed_at}: unexpected character '{test_string[failed_at]}'"
            if failed_at < len(test_string)
            else "String too short."
        )
        partial_matches = [test_string[:match_len]] if match_len > 0 else []
        return MatchResult(
            matches=False,
            reason=reason,
            failed_at=failed_at,
            partial_matches=partial_matches
        )
    # Regex pattern: use current logic
    longest = 0
    for i in range(1, len(test_string) + 1):
        m = prog.fullmatch(test_string[:i])
        if m:
            longest = i
    if longest > 0:
        failed_at = None
        for i, (c1, c2) in enumerate(zip(pattern, test_string)):
            if c1 != c2:
                failed_at = i
                break
        if failed_at is None:
            failed_at = min(len(pattern), len(test_string))
        reason = (
            f"Match failed at position {failed_at}: unexpected character '{test_string[failed_at]}'"
            if failed_at < len(test_string)
            else "String too short."
        )
        return MatchResult(
            matches=False,
            reason=reason,
            failed_at=failed_at,
            partial_matches=[test_string[:longest]]
        )
    failed_at = 0
    for i, (c1, c2) in enumerate(zip(pattern, test_string)):
        if c1 != c2:
            failed_at = i
            break
    else:
        failed_at = min(len(pattern), len(test_string))
    return MatchResult(matches=False, reason="No match at all.", failed_at=failed_at, partial_matches=[])