diff options
Diffstat (limited to 'lib-python/2.7/urlparse.py')
-rw-r--r-- | lib-python/2.7/urlparse.py | 42 |
1 files changed, 39 insertions, 3 deletions
diff --git a/lib-python/2.7/urlparse.py b/lib-python/2.7/urlparse.py index f6d44c8dd3..ae6310feb6 100644 --- a/lib-python/2.7/urlparse.py +++ b/lib-python/2.7/urlparse.py @@ -165,6 +165,25 @@ def _splitnetloc(url, start=0): delim = min(delim, wdelim) # use earliest delim position return url[start:delim], url[delim:] # return (domain, rest) +def _checknetloc(netloc): + if not netloc or not isinstance(netloc, unicode): + return + # looking for characters like \u2100 that expand to 'a/c' + # IDNA uses NFKC equivalence, so normalize for this check + import unicodedata + n = netloc.replace(u'@', u'') # ignore characters already included + n = n.replace(u':', u'') # but not the surrounding text + n = n.replace(u'#', u'') + n = n.replace(u'?', u'') + netloc2 = unicodedata.normalize('NFKC', n) + if n == netloc2: + return + for c in '/?#@:': + if c in netloc2: + raise ValueError("netloc %r contains invalid characters " + "under NFKC normalization" + % netloc) + def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> @@ -193,6 +212,7 @@ def urlsplit(url, scheme='', allow_fragments=True): url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) + _checknetloc(netloc) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v @@ -216,6 +236,7 @@ def urlsplit(url, scheme='', allow_fragments=True): url, fragment = url.split('#', 1) if '?' in url: url, query = url.split('?', 1) + _checknetloc(netloc) v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v @@ -362,7 +383,7 @@ def unquote(s): append(item) return ''.join(res) -def parse_qs(qs, keep_blank_values=0, strict_parsing=0): +def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): """Parse a query given as a string argument. Arguments: @@ -379,16 +400,20 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0): strict_parsing: flag indicating what to do with parsing errors. If false (the default), errors are silently ignored. If true, errors raise a ValueError exception. + + max_num_fields: int. If set, then throws a ValueError if there + are more than n fields read by parse_qsl(). """ dict = {} - for name, value in parse_qsl(qs, keep_blank_values, strict_parsing): + for name, value in parse_qsl(qs, keep_blank_values, strict_parsing, + max_num_fields): if name in dict: dict[name].append(value) else: dict[name] = [value] return dict -def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): +def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): """Parse a query given as a string argument. Arguments: @@ -405,8 +430,19 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0): false (the default), errors are silently ignored. If true, errors raise a ValueError exception. + max_num_fields: int. If set, then throws a ValueError if there + are more than n fields read by parse_qsl(). + Returns a list, as G-d intended. """ + # If max_num_fields is defined then check that the number of fields + # is less than max_num_fields. This prevents a memory exhaustion DOS + # attack via post bodies with many fields. + if max_num_fields is not None: + num_fields = 1 + qs.count('&') + qs.count(';') + if max_num_fields < num_fields: + raise ValueError('Max number of fields exceeded') + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] r = [] for name_value in pairs: |