[extractor/common] Improve _hidden_inputs

pull/6530/merge
Sergey M․ 10 years ago
parent 9303ce3e69
commit 201ea3ee8e

@ -724,16 +724,18 @@ class InfoExtractor(object):
@staticmethod @staticmethod
def _hidden_inputs(html): def _hidden_inputs(html):
return dict([ hidden_inputs = {}
(input.group('name'), input.group('value')) for input in re.finditer( for input in re.findall(r'<input([^>]+)>', html):
r'''(?x) if not re.search(r'type=(["\'])hidden\1', input):
<input\s+ continue
type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+ name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+ if not name:
(?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)? continue
value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value) value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
''', html) if not value:
]) continue
hidden_inputs[name.group('value')] = value.group('value')
return hidden_inputs
def _form_hidden_inputs(self, form_id, html): def _form_hidden_inputs(self, form_id, html):
form = self._search_regex( form = self._search_regex(

Loading…
Cancel
Save