diff --git a/backfiller/backfiller.py b/backfiller/backfiller.py index e585848..40773fc 100644 --- a/backfiller/backfiller.py +++ b/backfiller/backfiller.py @@ -3,6 +3,10 @@ # lots about web services and the like I don't know # needs logging, exception handling and the like +# when starting the backfiller and every few hours, backfill everything +# more frequently, backfill the last couple hours +# (last three hour directories so always at least two hours). + import glob import requests import os @@ -10,74 +14,76 @@ import os def get_nodes(): - # either read a config file or query the database to get the addresses of - # the other nodes + # either read a config file or query the database to get the addresses + # of the other nodes - # as a prototype can just hardcode some addresses. + # as a prototype can just hardcode some addresses. - nodes = [] + nodes = [] - return nodes + return nodes def list_remote_segments(node): - # return a list of paths - # obviously update path with real one + # return a list of paths + # obviously update path with real one - resp = requests.get(node + '/wubloader/segment_list') - remote_segments = resp.json() #replace with appropriate parser + resp = requests.get(node + '/wubloader/segment_list') + remote_segments = resp.json() #replace with appropriate parser - return remote_segments + return remote_segments #based on _get_segment in downloader/main #should have a more general shared version of this def get_remote_segment(node, segment): - # obviously update path with real one - resp = requests.get(node + '/wubloader/segments/' + segment, stream=True) + # obviously update path with real one + resp = requests.get(node + '/wubloader/segments/' + segment, stream=True) - with open('temp_backfill', 'w') as f: - for chunk in resp.iter_content(8192): - f.write(chunk) + with open('temp_backfill', 'w') as f: + for chunk in resp.iter_content(8192): + f.write(chunk) - os.rename(temp, segment) + os.rename(temp, segment) def back_fill(base_dir, nodes=None): - # loop over nodes asking for a list of segments then downloads any segments - # it doesn't have + # loop over nodes asking for a list of segments then downloads any + # segments it doesn't have - if nodes is None: - nodes = get_nodes() + if nodes is None: + nodes = get_nodes() - for node in nodes: + for node in nodes: - # need to figure out how to properly check whether this node is the - # same - if node == 'localhost': - continue + # need to figure out how to properly check whether this node is + # the same + if node == 'localhost': + continue - # not sure how much we want to hard code the search - local_segments = set(glob.glob(base_dir + '/*/*/*.ts')) + # not sure how much we want to hard code the search + # replace with something from the restreamer + local_segments = set(glob.glob(base_dir + '/*/*/*.ts')) - remote_segments = set(list_remote_segments(node)) + remote_segments = set(list_remote_segments(node)) - missing_segments = remote_segments - local_segments + missing_segments = remote_segments - local_segments - - for missing_segment in missing_segments: - get_remote_segment(node, missing_segment) + + for missing_segment in missing_segments: + get_remote_segment(node, missing_segment) def main(base_dir, wait_time=60): - None - # every wait_time call back_fill - # time from start of back_fill - # to keep things simple don't try two back_fills at the same time - # wait for previous one to start before launching second. - # if it's taken more than wait_time for back_fill to run, start immediately + None + # every wait_time call back_fill + # time from start of back_fill + # to keep things simple don't try two back_fills at the same time + # wait for previous one to start before launching second. + # if it's taken more than wait_time for back_fill to run, start + # immediately