downloader: Give up on 404 in addition to 403

Also fix some logging.

When we're out of touch with twitch for long enough, our segment URL will get
so old that twitch stops returning 403 because our token is expired,
and start returning 404s, presumebly becasue the underlying resource has gone away.

We want to treat these the same.
mike/downloader/conn-pooling
Mike Lang 6 years ago committed by Christopher Usher
parent 7f9a1dbe45
commit 3042d00516

@ -242,12 +242,12 @@ class StreamWorker(object):
If the url had been working (ie. that wasn't the first fetch), it will also stay alive and
attempt to use the old url until a new worker tells it to stand down,
or a 403 Forbidden is received (as this indicates the url is expired).
or a 403 or 404 is received (as this indicates the url is expired).
Since segment urls returned for a particular media playlist url are stable, we have an easier
time of managing downloading those:
* Every time a new URL is seen, a new SegmentGetter is created
* SegmentGetters will retry the same URL until they succeed, or get a 403 Forbidden indicating
* SegmentGetters will retry the same URL until they succeed, or get a 403 or 404 indicating
the url has expired.
"""
@ -313,8 +313,8 @@ class StreamWorker(object):
if first:
self.logger.warning("Failed on first fetch, stopping")
self.stop()
elif isinstance(e, requests.HTTPError) and e.response is not None and e.response.status_code == 403:
self.logger.warning("Failed with 403 Forbidden, stopping")
elif isinstance(e, requests.HTTPError) and e.response is not None and e.response.status_code in (403, 404):
self.logger.warning("Failed with {}, stopping".format(e.response.status_code))
self.stop()
self.wait(self.FETCH_RETRY_INTERVAL)
continue
@ -359,7 +359,7 @@ class StreamWorker(object):
class SegmentGetter(object):
"""Fetches a segment and writes it to disk.
Retries until it succeeds, or gets a 403 Forbidden indicating
Retries until it succeeds, or gets a 403 or 404 indicating
the url has expired.
Due to retries and multiple workers further up the stack, SegmentGetter needs to
@ -448,12 +448,11 @@ class SegmentGetter(object):
full_prefix = "{}-full".format(self.prefix)
return any(candidate.startswith(full_prefix) for candidate in candidates)
def get_segment(self):
try:
self._get_segment()
except Exception:
logging.exception("Failed to get segment {}".format(self.segment))
self.logger.exception("Failed to get segment {}".format(self.segment))
return False
else:
return True
@ -466,12 +465,14 @@ class SegmentGetter(object):
hash = hashlib.sha256()
file_created = False
try:
logging.debug("Downloading segment {} to {}".format(self.segment, temp_path))
self.logger.debug("Downloading segment {} to {}".format(self.segment, temp_path))
with soft_hard_timeout(self.logger, "getting and writing segment", self.FETCH_FULL_TIMEOUTS, retry.set):
with soft_hard_timeout(self.logger, "getting segment headers", self.FETCH_HEADERS_TIMEOUTS, retry.set):
resp = requests.get(self.segment.uri, stream=True)
if resp.status_code == 403:
logging.warning("Got 403 Forbidden for segment, giving up: {}".format(self.segment))
# twitch returns 403 for expired segment urls, and 404 for very old urls where the original segment is gone.
# the latter can happen if we have a network issue that cuts us off from twitch for some time.
if resp.status_code in (403, 404):
self.logger.warning("Got {} for segment, giving up: {}".format(resp.status_code, self.segment))
return
resp.raise_for_status()
common.ensure_directory(temp_path)

Loading…
Cancel
Save