bus_analyzer: Parallelize and fix error handling

pull/370/head
Mike Lang 1 year ago
parent c5acfe3ae0
commit b65203ce0e

@ -8,6 +8,7 @@ import traceback
import argh import argh
import gevent.event import gevent.event
from gevent.pool import Pool
from common import database from common import database
from common.segments import parse_segment_path, list_segment_files from common.segments import parse_segment_path, list_segment_files
@ -102,13 +103,12 @@ def do_analyze_segment(dbconnect, *segment_paths, base_dir='.', prototypes_path=
"""Analyze individual segments and write them to the database""" """Analyze individual segments and write them to the database"""
prototypes = load_prototypes(prototypes_path) prototypes = load_prototypes(prototypes_path)
dbmanager = database.DBManager(dsn=dbconnect) dbmanager = database.DBManager(dsn=dbconnect)
conn = dbmanager.get_conn()
for segment_path in segment_paths: for segment_path in segment_paths:
analyze_segment(conn, prototypes, segment_path) analyze_segment(db_manager, prototypes, segment_path)
def analyze_segment(conn, prototypes, segment_path, check_segment_name=None): def analyze_segment(db_manager, prototypes, segment_path, check_segment_name=None):
segment_info = parse_segment_path(segment_path) segment_info = parse_segment_path(segment_path)
if segment_info.type == "temp": if segment_info.type == "temp":
logging.info("Ignoring temp segment {}".format(segment_path)) logging.info("Ignoring temp segment {}".format(segment_path))
@ -123,11 +123,14 @@ def analyze_segment(conn, prototypes, segment_path, check_segment_name=None):
except Exception: except Exception:
logging.warning(f"Failed to extract segment {segment_path!r}", exc_info=True) logging.warning(f"Failed to extract segment {segment_path!r}", exc_info=True)
odometer = None odometer = None
clock = None
tod = None
error = traceback.format_exc() error = traceback.format_exc()
else: else:
logging.info(f"Got odometer = {odometer}, clock = {clock}, time of day = {tod} for segment {segment_path!r}") logging.info(f"Got odometer = {odometer}, clock = {clock}, time of day = {tod} for segment {segment_path!r}")
error = None error = None
conn = db_manager.get_conn()
database.query( database.query(
conn, conn,
""" """
@ -147,9 +150,10 @@ def analyze_segment(conn, prototypes, segment_path, check_segment_name=None):
clock=clock, clock=clock,
timeofday=tod, timeofday=tod,
) )
db_manager.put_conn(conn)
def analyze_hour(conn, prototypes, existing_segments, base_dir, channel, quality, hour): def analyze_hour(db_manager, prototypes, existing_segments, base_dir, channel, quality, hour, concurrency=10):
hour_path = os.path.join(base_dir, channel, quality, hour) hour_path = os.path.join(base_dir, channel, quality, hour)
segments = sorted(list_segment_files(hour_path)) segments = sorted(list_segment_files(hour_path))
@ -167,8 +171,12 @@ def analyze_hour(conn, prototypes, existing_segments, base_dir, channel, quality
segments_to_do.append((segment_path, segment_name)) segments_to_do.append((segment_path, segment_name))
logging.info("Found {} segments not already existing".format(len(segments_to_do))) logging.info("Found {} segments not already existing".format(len(segments_to_do)))
pool = Pool(concurrency)
workers = []
for segment_path, segment_name in segments_to_do: for segment_path, segment_name in segments_to_do:
analyze_segment(conn, prototypes, segment_path, segment_name) workers.append(pool.spawn(analyze_segment, db_manager, prototypes, segment_path, segment_name))
for worker in workers:
worker.get() # re-raise errors
def parse_hours(s): def parse_hours(s):
@ -190,6 +198,7 @@ def main(
run_once=False, run_once=False,
overwrite=False, overwrite=False,
prototypes_path="./prototypes", prototypes_path="./prototypes",
concurrency=10,
): ):
CHECK_INTERVAL = 2 CHECK_INTERVAL = 2
@ -241,7 +250,7 @@ def main(
for channel in channels: for channel in channels:
for hour in do_hours: for hour in do_hours:
analyze_hour(conn, prototypes, existing_segments, base_dir, channel, quality, hour) analyze_hour(db_manager, prototypes, existing_segments, base_dir, channel, quality, hour, concurrency=concurrency)
if run_once: if run_once:
logging.info("Requested to only run once, stopping") logging.info("Requested to only run once, stopping")

Loading…
Cancel
Save