diff --git a/chat_archiver/chat_archiver/main.py b/chat_archiver/chat_archiver/main.py index 7d4d051..1aae410 100644 --- a/chat_archiver/chat_archiver/main.py +++ b/chat_archiver/chat_archiver/main.py @@ -354,7 +354,7 @@ URL_REGEX = re.compile(r""" # Previous char is not a letter. This prevents eg. "foohttp://example.com" # Also disallows / as the previous character, otherwise "file:///foo.bar/baz" # can match on the "foo.bar/baz" part. - (? https?:// )? # Hostname, which must contain a dot. Single-part hostnames like "localhost" are valid @@ -372,8 +372,8 @@ URL_REGEX = re.compile(r""" # like that even though it's encoded when actually sent as a URL. # Restricting this to letters prevents things like non-breaking spaces causing problems. # For the same reason we also allow {} and [] which seem to show up often in paths. - (?P / (\w | [!#$%&'()*+,./:;=?@_~{}-] | \[ | \] )* )? -""", re.ASCII | re.VERBOSE | re.IGNORECASE) + (?P / [\w!#$%&'()*+,./:;=?@_~{}\[\]-]* )? +""", re.VERBOSE | re.IGNORECASE) _IMAGE_LINKS_RUNNING = KeyedGroup()