1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 Having written modules to handle turning a string representation of a list back
24 into a list (including nested lists) and also a very simple CSV parser, I
25 realised I needed a more solid set of functions for handling lists (comma
26 delimited lines) and quoting/unquoting elements of lists.
27
28 The test stuff provides useful examples of how the functions work.
29 """
30
31
32 try:
33 basestring
34 except NameError:
35 basestring = (str, unicode)
36
37 import re
38 inquotes = re.compile(r'''\s*(".*?"|'.*?')(.*)''')
39 badchars = re.compile(r'''^[^'," \[\]\(\)#]+$''')
40
41 paramfinder = re.compile(r'''(?:'.*?')|(?:".*?")|(?:[^'",\s][^,]*)''')
42 unquoted = re.compile(r'''
43 ([^\#,"'\(\)\[\]][^\#,\]\)]*) # value
44 \s* # whitespace - XXX not caught
45 ([\#,\)\]].*)? # rest of the line
46 $''', re.VERBOSE)
47
48 __all__ = [
49 'elem_quote',
50 'unquote',
51 'ListQuoteError',
52 'QuoteError',
53 'UnQuoteError',
54 'BadLineError',
55 'CommentError',
56 'quote_escape',
57 'quote_unescape',
58 'simplelist',
59 'LineParser',
60 'lineparse',
61 'csvread',
62 'csvwrite',
63 'list_stringify',
64 'makelist'
65 ]
66
68 """Base class for errors raised by the listquote module."""
69
71 """This value can't be quoted."""
72
74 """The value is badly quoted."""
75
77 """A line is badly built."""
78
80 """A line contains a disallowed comment."""
81
83 """The CSV File contained errors."""
84
85
86
87
88 -def elem_quote(member, nonquote=True, stringify=False, encoding=None):
89 """
90 Simple method to add the most appropriate quote to an element - either single
91 quotes or double quotes.
92
93 If member contains ``\n`` a ``QuoteError`` is raised - multiline values
94 can't be quoted by elem_quote.
95
96 If ``nonquote`` is set to ``True`` (the default), then if member contains none
97 of ``'," []()#;`` then it isn't quoted at all.
98
99 If member contains both single quotes *and* double quotes then all double
100 quotes (``"``) will be escaped as ``&mjf-quot;`` and member will then be quoted
101 with double quotes.
102
103 If ``stringify`` is set to ``True`` (the default is ``False``) then non string
104 (unicode or byte-string) values will be first converted to strings using the
105 ``str`` function. Otherwise elem_quote raises a ``TypeError``.
106
107 If ``encoding`` is not ``None`` and member is a byte string, then it will be
108 decoded into unicode using this encoding.
109
110 >>> elem_quote('hello')
111 'hello'
112 >>> elem_quote('hello', nonquote=False)
113 '"hello"'
114 >>> elem_quote('"hello"')
115 '\\'"hello"\\''
116 >>> elem_quote(3)
117 Traceback (most recent call last):
118 TypeError: Can only quote strings. "3"
119 >>> elem_quote(3, stringify=True)
120 '3'
121 >>> elem_quote('hello', encoding='ascii')
122 u'hello'
123 >>> elem_quote('\\n')
124 Traceback (most recent call last):
125 QuoteError: Multiline values can't be quoted.
126 "
127 "
128 """
129 if not isinstance(member, basestring):
130 if stringify:
131 member = str(member)
132 else:
133
134 raise TypeError('Can only quote strings. "%s"' % str(member))
135 if encoding and isinstance(member, str):
136
137 member = unicode(member, encoding)
138 if '\n' in member:
139 raise QuoteError('Multiline values can\'t be quoted.\n"%s"' % str(member))
140
141 if nonquote and badchars.match(member) is not None:
142 return member
143
144
145 elif member.find('"') == -1:
146 return '"%s"' % member
147
148 elif member.find("'") == -1:
149 return "'%s'" % member
150 else:
151 raise QuoteError('Value can\'t be quoted : "%s"' % member)
152
153 -def unquote(inline, fullquote=True, retain=False):
154 """
155 Unquote a value.
156
157 If the value isn't quoted it returns the value.
158
159 If the value is badly quoted it raises ``UnQuoteError``.
160
161 If retain is ``True`` (default is ``False``) then the quotes are left
162 around the value (but leading or trailing whitespace will have been
163 removed).
164
165 If fullquote is ``False`` (default is ``True``) then unquote will only
166 unquote the first part of the ``inline``. If there is anything after the
167 quoted element, this will be returned as well (instead of raising an
168 error).
169
170 In this case the return value is ``(value, rest)``.
171
172 >>> unquote('hello')
173 'hello'
174 >>> unquote('"hello"')
175 'hello'
176 >>> unquote('"hello')
177 Traceback (most recent call last):
178 UnQuoteError: Value is badly quoted: ""hello"
179 >>> unquote('"hello" fish')
180 Traceback (most recent call last):
181 UnQuoteError: Value is badly quoted: ""hello" fish"
182 >>> unquote("'hello'", retain=True)
183 "'hello'"
184 >>> unquote('"hello" fish', fullquote=False)
185 ('hello', ' fish')
186 """
187 mat = inquotes.match(inline)
188 if mat is None:
189 if inline.strip()[0] not in '\'\"':
190 return inline
191 else:
192
193 raise UnQuoteError('Value is badly quoted: "%s"' % inline)
194 quoted, rest = mat.groups()
195 if fullquote and rest.strip():
196
197 raise UnQuoteError('Value is badly quoted: "%s"' % inline)
198 if not retain:
199 quoted = quoted[1:-1]
200 if not fullquote:
201 return quoted, rest
202 else:
203 return quoted