Source code for reagex.reagex

from string import Formatter


[docs]def reagex(pattern, **group_patterns): """ Utility function for writing regular expressions with many capturing groups in a readable, clean and hierarchical way. It is just a wrapper of ``str.format`` and it works in the same way. A minimal example:: pattern = reagex( '{name} "{nickname}" {surname}', name='[A-Z][a-z]+', nickname='[a-z]+', surname='[A-Z][a-z]+' ) Args: pattern (str): a pattern where you can use ``str.format`` syntax for groups ``{group_name}``. Groups are capturing unless they starts with ``'_'``. For each group in this argument, this function expects a keyword argument with the same name containing the pattern for the group. **group_patterns (str): patterns associated to groups; for each group in ``pattern`` of the kind ``{group_name}`` this function expects a keyword argument. Returns: a pattern you can pass to ``re`` functions """ out = [] formatter = Formatter() for literal_text, group_name, format_spec, conversion in formatter.parse(pattern): if literal_text: out.append(literal_text) if group_name: pattern = group_patterns[group_name] if group_name.startswith('_'): out.append('(?:%s)' % pattern) else: out.append('(?P<%s>%s)' % (group_name, pattern)) return ''.join(out)
[docs]def repeated(pattern, sep, least=1, most=None): """ Returns a pattern that matches a sequence of strings that match ``pattern`` separated by strings that match ``sep``. For example, for matching a sequence of ``'{key}={value}'`` pairs separated by ``'&'``, where key and value contains only lowercase letters:: repeated('[a-z]+=[a-z]+', '&') == '[a-z]+=[a-z]+(?:&[a-z]+=[a-z]+)*' Args: pattern (str): a pattern sep (str): a pattern for the separator (usually just a character/string) least (int, positive): minimum number of strings matching ``pattern``; must be positive most (Optional[int]): maximum number of strings matching ``pattern``; must be greater or equal to ``least`` Returns: a pattern """ if least <= 0: raise ValueError('least should be positive; it is: %d' % least) if most is not None: if most < 2: raise ValueError('it does not make any sense to call this function with most<2:\n' 'for most=1, you could just write the <pattern> argument') if most < least: raise ValueError('most must be greater or equal to least') least_s = str(least - 1) if least > 1 else '' most_s = str(most - 1) if most else '' if most and least == most: if least == 2: return pattern + sep + pattern reps = '{%s}' % least_s else: reps = '{%s,%s}' % (least_s, most_s) if reps == '{,}': reps = '*' elif reps == '{1,}': reps = '+' elif reps == '{,1}': reps = '?' return ('{pattern}(?:{sep}{pattern}){reps}' .format(pattern=pattern, sep=sep, reps=reps))