|
|
@ -3290,14 +3290,13 @@ def is_html(first_bytes):
|
|
|
|
(b'\xff\xfe', 'utf-16-le'),
|
|
|
|
(b'\xff\xfe', 'utf-16-le'),
|
|
|
|
(b'\xfe\xff', 'utf-16-be'),
|
|
|
|
(b'\xfe\xff', 'utf-16-be'),
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
encoding = 'utf-8'
|
|
|
|
for bom, enc in BOMS:
|
|
|
|
for bom, enc in BOMS:
|
|
|
|
if first_bytes.startswith(bom):
|
|
|
|
while first_bytes.startswith(bom):
|
|
|
|
s = first_bytes[len(bom):].decode(enc, 'replace')
|
|
|
|
encoding, first_bytes = enc, first_bytes[len(bom):]
|
|
|
|
break
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
s = first_bytes.decode('utf-8', 'replace')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return re.match(r'^\s*<', s)
|
|
|
|
return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def determine_protocol(info_dict):
|
|
|
|
def determine_protocol(info_dict):
|
|
|
|