From f0d9aa82c21e26b211883bba56e80e65211565fe Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Thu, 21 Mar 2019 05:50:19 -0700 Subject: [PATCH] Ignore segments that are marked as ads * Checks for the SCTE35-OUT/SCTE35-IN marks in the HLS stream that indicate an ad start/end * Ignores those segments completely * Doesn't mark the StreamWorker as up until it sees the first non-ad segment Some other operational notes: * The main risk this adds is that re-connecting / refreshing master playlist takes longer. If all downloaders are doing this at the same time (ie. because the stream only just came up, or during a deployment rollout), all downloaders might be waiting for ads to finish and you'll miss segments. * We should run more downloaders to compensate. This also increases the chance at least one of them won't get any ads, so we get everything right from stream-up. * The other mitigation we can do is have geographically diverse downloaders. This decreases the risk that they all get served an ad, and at least at time of writing it seems that no in-stream ads are served outside of these regions: > US, Canada, Germany, France, Sweden, Belgium, Poland, Norway, Finland, Denmark, Netherlands, Italy, Spain, Switzerland, Austria, Portugal, UK, Australia, New Zealand --- downloader/downloader/hls_playlist.py | 12 ++++++++++-- downloader/downloader/main.py | 15 ++++++++++++--- downloader/downloader/twitch.py | 3 +++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/downloader/downloader/hls_playlist.py b/downloader/downloader/hls_playlist.py index d737b01..7a764e9 100644 --- a/downloader/downloader/hls_playlist.py +++ b/downloader/downloader/hls_playlist.py @@ -62,7 +62,7 @@ IFrameStreamInfo = namedtuple("IFrameStreamInfo", "bandwidth program_id " Playlist = namedtuple("Playlist", "uri stream_info media is_iframe") Resolution = namedtuple("Resolution", "width height") Segment = namedtuple("Segment", "uri duration title key discontinuity " - "byterange date map") + "byterange date map scte35") class M3U8(object): @@ -184,11 +184,12 @@ class M3U8Parser(object): date = self.state.pop("date", None) map_ = self.state.get("map") key = self.state.get("key") + scte35 = self.state.get("scte35") segment = Segment(self.uri(line), extinf[0], extinf[1], key, self.state.pop("discontinuity", False), - byterange, date, map_) + byterange, date, map_, scte35) self.m3u8.segments.append(segment) elif self.state.pop("expect_playlist", None): streaminf = self.state.pop("streaminf", {}) @@ -259,6 +260,13 @@ class M3U8Parser(object): start = Start(attr.get("TIME-OFFSET"), self.parse_bool(attr.get("PRECISE", "NO"))) self.m3u8.start = start + elif line.startswith("#EXT-X-SCTE35-OUT"): + # marks start of ad, with optional URL + attr = self.parse_tag(line, self.parse_attributes) + self.state["scte35"] = attr.get('URL') or "unknown" + elif line.startswith("#EXT-X-SCTE35-IN"): + # marks end of ad + self.state["scte35"] = None def parse(self, data): self.state = {} diff --git a/downloader/downloader/main.py b/downloader/downloader/main.py index 7fe473c..d0810af 100644 --- a/downloader/downloader/main.py +++ b/downloader/downloader/main.py @@ -79,8 +79,11 @@ class StreamsManager(object): one, but during switchover there may be 2 - one old one continuing to (try to) operate while the second one confirms it's working. While trying to get a url working, it won't retry, it'll just ask the manager immediately to create yet another new worker then quit. - When one successfully fetches a playlist for the first time, it marks all older - workers as able to shut down by calling manager.mark_working(). + When one successfully fetches a playlist for the first time, and confirms it has a non-ad + segment, it marks all older workers as able to shut down by calling manager.mark_working(). + We wait for a non-ad segment because on first connect, a preroll ad may play. + We don't want to give up on the old connection (which may contain segments covering + the time the preroll ad is playing) until the ad is over. Creation of a new stream worker may be triggered by: * An existing worker failing to refresh its playlist @@ -328,11 +331,17 @@ class StreamWorker(object): # We successfully got the playlist at least once first = False - self.manager.mark_working(self) # Start any new segment getters date = None # tracks date in case some segment doesn't include it for segment in playlist.segments: + if segment.scte35: + self.logger.debug("Ignoring ad segment for {}".format(segment.scte35)) + continue + + # We've got our first non-ad segment, so we're good to take it from here. + self.manager.mark_working(self) + if segment.date: date = dateutil.parser.parse(segment.date) if segment.uri not in self.getters: diff --git a/downloader/downloader/twitch.py b/downloader/downloader/twitch.py index 5e7fcfe..94b60da 100644 --- a/downloader/downloader/twitch.py +++ b/downloader/downloader/twitch.py @@ -38,6 +38,9 @@ def get_master_playlist(channel, session=requests): # Also observed in the wild but not used in streamlink: # "playlist_include_framerate": "true" # "reassignments_supported": "true" + # It's reported that setting this may affect whether you get ads, but this is + # in flux. Better to just blend in with the crowd for now. + # "platform": "_" }, ) resp.raise_for_status() # getting master playlist