--- mistune-0.7.2/mistune.py.O 2016-02-26 03:41:36.000000000 +0200 +++ mistune-0.7.2/mistune.py 2017-12-28 17:50:03.127083566 +0200 @@ -57,46 +57,45 @@ The original cgi.escape will always escape "&", but you can control this one for a smart escape amp. :param quote: if set to True, " and ' will be escaped. :param smart_amp: if set to False, & will always be escaped. """ if smart_amp: text = _escape_pattern.sub('&', text) else: text = text.replace('&', '&') text = text.replace('<', '<') text = text.replace('>', '>') if quote: text = text.replace('"', '"') text = text.replace("'", ''') return text def escape_link(url, **kwargs): """Remove dangerous URL schemes like javascript: and escape afterwards.""" - if ':' in url: - scheme, _ = url.split(':', 1) - scheme = _nonalpha_pattern.sub('', scheme) - # whitelist would be better but mistune's use case is too general - if scheme.lower() in _scheme_blacklist: - return '' + lower_url = url.lower().strip('\x00\x1a \n\r\t') + + for scheme in _scheme_blacklist: + if re.sub(r'[^A-Za-z0-9\/:]+', '', lower_url).startswith(scheme): + return '' # escape &entities; to &entities; kwargs['smart_amp'] = False return escape(url, **kwargs) def preprocessing(text, tab=4): text = _newline_pattern.sub('\n', text) text = text.replace('\t', ' ' * tab) text = text.replace('\u00a0', ' ') text = text.replace('\u2424', '\n') pattern = re.compile(r'^ +$', re.M) return pattern.sub('', text) class BlockGrammar(object): """Grammars for block level tokens.""" def_links = re.compile( r'^ *\[([^^\]]+)\]: *' # [key]: r'<?([^\s>]+)>?' # <link> or link @@ -824,41 +823,41 @@ def strikethrough(self, text): """Rendering ~~strikethrough~~ text. :param text: text content for strikethrough. """ return '<del>%s</del>' % text def text(self, text): """Rendering unformatted text. :param text: text content. """ return escape(text) def autolink(self, link, is_email=False): """Rendering a given link or email address. :param link: link content or email address. :param is_email: whether this is an email or not. """ - text = link = escape(link) + text = link = escape_link(link) if is_email: link = 'mailto:%s' % link return '<a href="%s">%s</a>' % (link, text) def link(self, link, title, text): """Rendering a given link with content and title. :param link: href link for ``<a>`` tag. :param title: title content for `title` attribute. :param text: text content for description. """ link = escape_link(link, quote=True) if not title: return '<a href="%s">%s</a>' % (link, text) title = escape(title, quote=True) return '<a href="%s" title="%s">%s</a>' % (link, title, text) def image(self, src, title, text): """Rendering a image with title and text. --- mistune-0.7.2/tests/test_extra.py.O 2016-02-26 03:37:28.000000000 +0200 +++ mistune-0.7.2/tests/test_extra.py 2017-12-28 17:45:04.864272169 +0200 @@ -6,40 +6,42 @@ assert '>' in ret ret = mistune.markdown('this **foo** is <b>bold</b>', escape=True) assert '>' in ret def test_linebreak(): ret = mistune.markdown('this **foo** \nis me') assert '<br>' not in ret ret = mistune.markdown('this **foo** \nis me', hard_wrap=True) assert '<br>' in ret def test_safe_links(): attack_vectors = ( # "standard" javascript pseudo protocol ('javascript:alert`1`', ''), # bypass attempt ('jAvAsCrIpT:alert`1`', ''), + # bypass with newline + ('javasc\nript:alert`1`', ''), # javascript pseudo protocol with entities ('javascript:alert`1`', 'javascript&colon;alert`1`'), # javascript pseudo protocol with prefix (dangerous in Chrome) ('\x1Ajavascript:alert`1`', ''), # data-URI (dangerous in Firefox) ('data:text/html,<script>alert`1`</script>', ''), # vbscript-URI (dangerous in Internet Explorer) ('vbscript:msgbox', ''), # breaking out of the attribute ('"<>', '"<>'), ) for vector, expected in attack_vectors: # image assert 'src="%s"' % expected in mistune.markdown('![atk](%s)' % vector) # link assert 'href="%s"' % expected in mistune.markdown('[atk](%s)' % vector) def test_skip_style(): ret = mistune.markdown(