1 """HTTP library functions.
2
3 This module contains functions for building an HTTP application
4 framework: any one, not just one whose name starts with "Ch". ;) If you
5 reference any modules from some popular framework inside *this* module,
6 FuManChu will personally hang you up by your thumbs and submit you
7 to a public caning.
8 """
9
10 from binascii import b2a_base64
11 from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou
12 from cherrypy._cpcompat import basestring, bytestr, iteritems, nativestr
13 from cherrypy._cpcompat import reversed, sorted, unicodestr, unquote_qs
14 response_codes = BaseHTTPRequestHandler.responses.copy()
15
16
17 response_codes[500] = ('Internal Server Error',
18 'The server encountered an unexpected condition '
19 'which prevented it from fulfilling the request.')
20 response_codes[503] = ('Service Unavailable',
21 'The server is currently unable to handle the '
22 'request due to a temporary overloading or '
23 'maintenance of the server.')
24
25 import re
26 import urllib
27
28
30 """Return the given path \*atoms, joined into a single URL.
31
32 This will correctly join a SCRIPT_NAME and PATH_INFO into the
33 original URL, even if either atom is blank.
34 """
35 url = "/".join([x for x in atoms if x])
36 while "//" in url:
37 url = url.replace("//", "/")
38
39 return url or "/"
40
41
43 """Return the given path *atoms, joined into a single URL.
44
45 This will correctly join a SCRIPT_NAME and PATH_INFO into the
46 original URL, even if either atom is blank.
47 """
48 url = ntob("/").join([x for x in atoms if x])
49 while ntob("//") in url:
50 url = url.replace(ntob("//"), ntob("/"))
51
52 return url or ntob("/")
53
54
56 """Return a protocol tuple from the given 'HTTP/x.y' string."""
57 return int(protocol_str[5]), int(protocol_str[7])
58
59
61 """Return a list of (start, stop) indices from a Range header, or None.
62
63 Each (start, stop) tuple will be composed of two ints, which are suitable
64 for use in a slicing operation. That is, the header "Range: bytes=3-6",
65 if applied against a Python string, is requesting resource[3:7]. This
66 function will return the list [(3, 7)].
67
68 If this function returns an empty list, you should return HTTP 416.
69 """
70
71 if not headervalue:
72 return None
73
74 result = []
75 bytesunit, byteranges = headervalue.split("=", 1)
76 for brange in byteranges.split(","):
77 start, stop = [x.strip() for x in brange.split("-", 1)]
78 if start:
79 if not stop:
80 stop = content_length - 1
81 start, stop = int(start), int(stop)
82 if start >= content_length:
83
84
85
86
87
88
89
90
91 continue
92 if stop < start:
93
94
95
96
97
98
99 return None
100 result.append((start, stop + 1))
101 else:
102 if not stop:
103
104 return None
105
106 result.append((content_length - int(stop), content_length))
107
108 return result
109
110
112
113 """An element (with parameters) from an HTTP header's element list."""
114
120
122 return cmp(self.value, other.value)
123
125 return self.value < other.value
126
130
133
136
138 """Transform 'token;key=val' to ('token', {'key': 'val'})."""
139
140
141 atoms = [x.strip() for x in elementstr.split(";") if x.strip()]
142 if not atoms:
143 initial_value = ''
144 else:
145 initial_value = atoms.pop(0).strip()
146 params = {}
147 for atom in atoms:
148 atom = [x.strip() for x in atom.split("=", 1) if x.strip()]
149 key = atom.pop(0)
150 if atom:
151 val = atom[0]
152 else:
153 val = ""
154 params[key] = val
155 return initial_value, params
156 parse = staticmethod(parse)
157
159 """Construct an instance from a string of the form 'token;key=val'."""
160 ival, params = cls.parse(elementstr)
161 return cls(ival, params)
162 from_str = classmethod(from_str)
163
164
165 q_separator = re.compile(r'; *q *=')
166
167
169
170 """An element (with parameters) from an Accept* header's element list.
171
172 AcceptElement objects are comparable; the more-preferred object will be
173 "less than" the less-preferred object. They are also therefore sortable;
174 if you sort a list of AcceptElement objects, they will be listed in
175 priority order; the most preferred value will be first. Yes, it should
176 have been the other way around, but it's too late to fix now.
177 """
178
194 from_str = classmethod(from_str)
195
197 val = self.params.get("q", "1")
198 if isinstance(val, HeaderElement):
199 val = val.value
200 return float(val)
201 qvalue = property(qvalue, doc="The qvalue, or priority, of this value.")
202
208
214
215 RE_HEADER_SPLIT = re.compile(',(?=(?:[^"]*"[^"]*")*[^"]*$)')
217 """Return a sorted HeaderElement list from a comma-separated header string.
218 """
219 if not fieldvalue:
220 return []
221
222 result = []
223 for element in RE_HEADER_SPLIT.split(fieldvalue):
224 if fieldname.startswith("Accept") or fieldname == 'TE':
225 hv = AcceptElement.from_str(element)
226 else:
227 hv = HeaderElement.from_str(element)
228 result.append(hv)
229
230 return list(reversed(sorted(result)))
231
232
233 -def decode_TEXT(value):
234 r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> "f\xfcr")."""
235 try:
236
237 from email.header import decode_header
238 except ImportError:
239 from email.Header import decode_header
240 atoms = decode_header(value)
241 decodedvalue = ""
242 for atom, charset in atoms:
243 if charset is not None:
244 atom = atom.decode(charset)
245 decodedvalue += atom
246 return decodedvalue
247
248
250 """Return legal HTTP status Code, Reason-phrase and Message.
251
252 The status arg must be an int, or a str that begins with an int.
253
254 If status is an int, or a str and no reason-phrase is supplied,
255 a default reason-phrase will be provided.
256 """
257
258 if not status:
259 status = 200
260
261 status = str(status)
262 parts = status.split(" ", 1)
263 if len(parts) == 1:
264
265 code, = parts
266 reason = None
267 else:
268 code, reason = parts
269 reason = reason.strip()
270
271 try:
272 code = int(code)
273 except ValueError:
274 raise ValueError("Illegal response status from server "
275 "(%s is non-numeric)." % repr(code))
276
277 if code < 100 or code > 599:
278 raise ValueError("Illegal response status from server "
279 "(%s is out of range)." % repr(code))
280
281 if code not in response_codes:
282
283 default_reason, message = "", ""
284 else:
285 default_reason, message = response_codes[code]
286
287 if reason is None:
288 reason = default_reason
289
290 return code, reason, message
291
292
293
294
295
296
297 -def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'):
298 """Parse a query given as a string argument.
299
300 Arguments:
301
302 qs: URL-encoded query string to be parsed
303
304 keep_blank_values: flag indicating whether blank values in
305 URL encoded queries should be treated as blank strings. A
306 true value indicates that blanks should be retained as blank
307 strings. The default false value indicates that blank values
308 are to be ignored and treated as if they were not included.
309
310 strict_parsing: flag indicating what to do with parsing errors. If
311 false (the default), errors are silently ignored. If true,
312 errors raise a ValueError exception.
313
314 Returns a dict, as G-d intended.
315 """
316 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
317 d = {}
318 for name_value in pairs:
319 if not name_value and not strict_parsing:
320 continue
321 nv = name_value.split('=', 1)
322 if len(nv) != 2:
323 if strict_parsing:
324 raise ValueError("bad query field: %r" % (name_value,))
325
326 if keep_blank_values:
327 nv.append('')
328 else:
329 continue
330 if len(nv[1]) or keep_blank_values:
331 name = unquote_qs(nv[0], encoding)
332 value = unquote_qs(nv[1], encoding)
333 if name in d:
334 if not isinstance(d[name], list):
335 d[name] = [d[name]]
336 d[name].append(value)
337 else:
338 d[name] = value
339 return d
340
341
342 image_map_pattern = re.compile(r"[0-9]+,[0-9]+")
343
344
346 """Build a params dictionary from a query_string.
347
348 Duplicate key/value pairs in the provided query_string will be
349 returned as {'key': [val1, val2, ...]}. Single key/values will
350 be returned as strings: {'key': 'value'}.
351 """
352 if image_map_pattern.match(query_string):
353
354
355 pm = query_string.split(",")
356 pm = {'x': int(pm[0]), 'y': int(pm[1])}
357 else:
358 pm = _parse_qs(query_string, keep_blank_values, encoding=encoding)
359 return pm
360
361
363
364 """A case-insensitive dict subclass.
365
366 Each key is changed on entry to str(key).title().
367 """
368
371
374
377
380
381 - def get(self, key, default=None):
383
384 if hasattr({}, 'has_key'):
387
391
393 newdict = cls()
394 for k in seq:
395 newdict[str(k).title()] = value
396 return newdict
397 fromkeys = classmethod(fromkeys)
398
400 key = str(key).title()
401 try:
402 return self[key]
403 except KeyError:
404 self[key] = x
405 return x
406
407 - def pop(self, key, default):
409
410
411
412
413
414
415
416 if nativestr == bytestr:
417 header_translate_table = ''.join([chr(i) for i in xrange(256)])
418 header_translate_deletechars = ''.join(
419 [chr(i) for i in xrange(32)]) + chr(127)
420 else:
421 header_translate_table = None
422 header_translate_deletechars = bytes(range(32)) + bytes([127])
423
424
426
427 """A dict subclass for HTTP request and response headers.
428
429 Each key is changed on entry to str(key).title(). This allows headers
430 to be case-insensitive and avoid duplicates.
431
432 Values are header values (decoded according to :rfc:`2047` if necessary).
433 """
434
435 protocol = (1, 1)
436 encodings = ["ISO-8859-1"]
437
438
439
440
441
442
443 use_rfc_2047 = True
444
446 """Return a sorted list of HeaderElements for the given header."""
447 key = str(key).title()
448 value = self.get(key)
449 return header_elements(key, value)
450
452 """Return a sorted list of HeaderElement.value for the given header."""
453 return [e.value for e in self.elements(key)]
454
458
482 encode_header_items = classmethod(encode_header_items)
483
485 """Return the given header name or value, encoded for HTTP output."""
486 for enc in cls.encodings:
487 try:
488 return v.encode(enc)
489 except UnicodeEncodeError:
490 continue
491
492 if cls.protocol == (1, 1) and cls.use_rfc_2047:
493
494
495
496
497
498 v = b2a_base64(v.encode('utf-8'))
499 return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?='))
500
501 raise ValueError("Could not encode header part %r using "
502 "any of the encodings %r." %
503 (v, cls.encodings))
504 encode = classmethod(encode)
505
506
508
509 """An internet address.
510
511 name
512 Should be the client's host name. If not available (because no DNS
513 lookup is performed), the IP address should be used instead.
514
515 """
516
517 ip = "0.0.0.0"
518 port = 80
519 name = "unknown.tld"
520
521 - def __init__(self, ip, port, name=None):
527
529 return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name)
530