"""
Parses single or double-quoted strings while preserving escaped quote chars
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1. captures the first quote character (either a single-quote or double-quote)
and stores it in a group named "quote"
2. takes zero or more characters up until it matches the character that is
stored in the group named "quote" (i.e. named referenced group) providing
that the matching "quote" character is not escaped (preceeded by a backslash
character, i.e. negative look behind assertion). All matching characters
between the quotes are stored in a group named "string".
"""
import re
QUOTED_STRING_RE = re.compile(
r"(?P<quote>['\"])(?P<string>.*?)(?<!\\)(?P=quote)")
def parse_quoted_string(search_string):
r"""
>>> s = '...And the gold of \'the knight\\\'s good banner\' Still waved...'
>>> parse_quoted_string(s)
"the knight\\'s good banner"
>>> s = '"To save my lady!" Fast rode \'the knight\'... by "Stephen Crane"'
>>> parse_quoted_string(s)
'To save my lady!'
>>> print(QUOTED_STRING_RE.findall(s))
[('"', 'To save my lady!'), ("'", 'the knight'), ('"', 'Stephen Crane')]
"""
match = QUOTED_STRING_RE.search(search_string)
if match:
return match.group('string')
if __name__ == '__main__':
import doctest
doctest.testmod()