From ba988a14d5f866cc50e3d99bdea0b95b52a30108 Mon Sep 17 00:00:00 2001 From: HeNine <> Date: Wed, 10 Aug 2022 16:09:04 +0200 Subject: [PATCH] result finalization --- api/escher_api/escher.py | 89 +++++++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 10 deletions(-) diff --git a/api/escher_api/escher.py b/api/escher_api/escher.py index 6e29449..b427d55 100644 --- a/api/escher_api/escher.py +++ b/api/escher_api/escher.py @@ -1,8 +1,13 @@ +from collections import namedtuple import flask as flask from datetime import datetime from common import database +from gevent.pool import Pool + +from psycopg2.extras import execute_values + app = flask.Flask('escher') @@ -219,11 +224,13 @@ def get_transcript(db_conn, ts_query, start_time="-infinity", end_time="infinity p = 0.02 l = b_p + bus_duration a = a_p + n_m + # Lomax distribution is posterior predictive for exponential message_duration_diff = l * ((1 - p)**-(1/a) - 1) + current_result = Result() results = [] - print(message_duration_diff) + # print(message_duration_diff) for transcript_line in db_results: # Current result set is new @@ -231,8 +238,8 @@ def get_transcript(db_conn, ts_query, start_time="-infinity", end_time="infinity current_result.transcript.append(transcript_line) # New message is within window elif (transcript_line.start_time - current_result.transcript[-1].end_time).total_seconds() <= message_duration_diff: - print((transcript_line.start_time - - current_result.transcript[-1].end_time).total_seconds()) + # print((transcript_line.start_time - + # current_result.transcript[-1].end_time).total_seconds()) current_result.transcript.append(transcript_line) # New message is outside window else: @@ -356,11 +363,76 @@ def get_chat(db_conn, ts_query, start_time="-infinity", end_time="infinity"): return results -def load_result_data(result): - pass +def load_results_data(db_conn, results): + """ + Replace chat and transcript with all entries in result's timeframe. + """ + + # ggroup = Pool(size=30) + + # results = ggroup.map(lambda result: load_result_data(db_manager, result), results) + + result_timespans = [(i, result.start_time, result.end_time) for (i, result) in enumerate(results)] + + # Clear lists so we can later insert new lines + for result in results: + result.chat = [] + result.transcript = [] + + + cur = db_conn.cursor() + + execute_values(cur, + """ + --sql + WITH timespans (id, start_time, end_time) AS (VALUES %s) + SELECT + timespans.id, + pub_time, + content->'tags'->>'display-name' AS name, + content->'params'->>1 AS content FROM timespans JOIN chat ON (pub_time BETWEEN start_time AND end_time); + """, + result_timespans + ) + + for chat_line in cur: + results[chat_line.id].chat.append(chat_line) + + execute_values(cur, + """ + --sql + WITH timespans (id, start_time, end_time) AS (VALUES %s) + SELECT + timespans.id, + buscribe_transcriptions.start_time, + buscribe_transcriptions.end_time, + names, + buscribe_transcriptions.transcription_line + FROM timespans JOIN + buscribe_transcriptions ON ( + buscribe_transcriptions.start_time >= timespans.start_time AND buscribe_transcriptions.start_time <= timespans.end_time AND + buscribe_transcriptions.end_time >= timespans.start_time AND buscribe_transcriptions.end_time <= timespans.end_time + ) + LEFT OUTER JOIN ( + SELECT line, + ARRAY( + SELECT speaker_name + FROM buscribe_line_inferred_speakers AS inner_speakers + WHERE inner_speakers.line = buscribe_line_inferred_speakers.line + ) AS names + FROM buscribe_line_inferred_speakers + ) AS inferred_speakers ON buscribe_transcriptions.id = inferred_speakers.line; + """, + result_timespans + ) + + for transcript_line in cur: + results[transcript_line.id].transcript.append(transcript_line) + + return results -def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result]): +def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result], limit: int, offset = 0): """ Merge different types of results in order of importance. @@ -383,12 +455,9 @@ def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result else: transcript_i += 1 - # print(vst_result) while chat_i < len(chat) and chat[chat_i].start_time < vst_result.end_time: - # print(vst_result) if overlap(vst_result, chat[chat_i]): vst_result.chat.extend(chat.pop(chat_i).chat) - # print(vst_result) else: chat_i += 1 @@ -405,7 +474,7 @@ def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result merged = transcript + vst + chat merged.sort(key=lambda result: result.start_time) merged.sort(key=lambda result: result.weight, reverse=True) - return merged + return merged[offset:min((offset + limit), len(merged))] def overlap(result_a, result_b):