result finalization

trunk
HeNine 2 years ago
parent cccb10bc6b
commit ba988a14d5

@ -1,8 +1,13 @@
from collections import namedtuple
import flask as flask import flask as flask
from datetime import datetime from datetime import datetime
from common import database from common import database
from gevent.pool import Pool
from psycopg2.extras import execute_values
app = flask.Flask('escher') app = flask.Flask('escher')
@ -219,11 +224,13 @@ def get_transcript(db_conn, ts_query, start_time="-infinity", end_time="infinity
p = 0.02 p = 0.02
l = b_p + bus_duration l = b_p + bus_duration
a = a_p + n_m a = a_p + n_m
# Lomax distribution is posterior predictive for exponential # Lomax distribution is posterior predictive for exponential
message_duration_diff = l * ((1 - p)**-(1/a) - 1) message_duration_diff = l * ((1 - p)**-(1/a) - 1)
current_result = Result() current_result = Result()
results = [] results = []
print(message_duration_diff) # print(message_duration_diff)
for transcript_line in db_results: for transcript_line in db_results:
# Current result set is new # Current result set is new
@ -231,8 +238,8 @@ def get_transcript(db_conn, ts_query, start_time="-infinity", end_time="infinity
current_result.transcript.append(transcript_line) current_result.transcript.append(transcript_line)
# New message is within window # New message is within window
elif (transcript_line.start_time - current_result.transcript[-1].end_time).total_seconds() <= message_duration_diff: elif (transcript_line.start_time - current_result.transcript[-1].end_time).total_seconds() <= message_duration_diff:
print((transcript_line.start_time - # print((transcript_line.start_time -
current_result.transcript[-1].end_time).total_seconds()) # current_result.transcript[-1].end_time).total_seconds())
current_result.transcript.append(transcript_line) current_result.transcript.append(transcript_line)
# New message is outside window # New message is outside window
else: else:
@ -356,11 +363,76 @@ def get_chat(db_conn, ts_query, start_time="-infinity", end_time="infinity"):
return results return results
def load_result_data(result): def load_results_data(db_conn, results):
pass """
Replace chat and transcript with all entries in result's timeframe.
"""
# ggroup = Pool(size=30)
# results = ggroup.map(lambda result: load_result_data(db_manager, result), results)
result_timespans = [(i, result.start_time, result.end_time) for (i, result) in enumerate(results)]
# Clear lists so we can later insert new lines
for result in results:
result.chat = []
result.transcript = []
cur = db_conn.cursor()
execute_values(cur,
"""
--sql
WITH timespans (id, start_time, end_time) AS (VALUES %s)
SELECT
timespans.id,
pub_time,
content->'tags'->>'display-name' AS name,
content->'params'->>1 AS content FROM timespans JOIN chat ON (pub_time BETWEEN start_time AND end_time);
""",
result_timespans
)
for chat_line in cur:
results[chat_line.id].chat.append(chat_line)
execute_values(cur,
"""
--sql
WITH timespans (id, start_time, end_time) AS (VALUES %s)
SELECT
timespans.id,
buscribe_transcriptions.start_time,
buscribe_transcriptions.end_time,
names,
buscribe_transcriptions.transcription_line
FROM timespans JOIN
buscribe_transcriptions ON (
buscribe_transcriptions.start_time >= timespans.start_time AND buscribe_transcriptions.start_time <= timespans.end_time AND
buscribe_transcriptions.end_time >= timespans.start_time AND buscribe_transcriptions.end_time <= timespans.end_time
)
LEFT OUTER JOIN (
SELECT line,
ARRAY(
SELECT speaker_name
FROM buscribe_line_inferred_speakers AS inner_speakers
WHERE inner_speakers.line = buscribe_line_inferred_speakers.line
) AS names
FROM buscribe_line_inferred_speakers
) AS inferred_speakers ON buscribe_transcriptions.id = inferred_speakers.line;
""",
result_timespans
)
for transcript_line in cur:
results[transcript_line.id].transcript.append(transcript_line)
return results
def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result]): def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result], limit: int, offset = 0):
""" """
Merge different types of results in order of importance. Merge different types of results in order of importance.
@ -383,12 +455,9 @@ def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result
else: else:
transcript_i += 1 transcript_i += 1
# print(vst_result)
while chat_i < len(chat) and chat[chat_i].start_time < vst_result.end_time: while chat_i < len(chat) and chat[chat_i].start_time < vst_result.end_time:
# print(vst_result)
if overlap(vst_result, chat[chat_i]): if overlap(vst_result, chat[chat_i]):
vst_result.chat.extend(chat.pop(chat_i).chat) vst_result.chat.extend(chat.pop(chat_i).chat)
# print(vst_result)
else: else:
chat_i += 1 chat_i += 1
@ -405,7 +474,7 @@ def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result
merged = transcript + vst + chat merged = transcript + vst + chat
merged.sort(key=lambda result: result.start_time) merged.sort(key=lambda result: result.start_time)
merged.sort(key=lambda result: result.weight, reverse=True) merged.sort(key=lambda result: result.weight, reverse=True)
return merged return merged[offset:min((offset + limit), len(merged))]
def overlap(result_a, result_b): def overlap(result_a, result_b):

Loading…
Cancel
Save