result finalization

trunk
HeNine 2 years ago
parent cccb10bc6b
commit ba988a14d5

@ -1,8 +1,13 @@
from collections import namedtuple
import flask as flask
from datetime import datetime
from common import database
from gevent.pool import Pool
from psycopg2.extras import execute_values
app = flask.Flask('escher')
@ -219,11 +224,13 @@ def get_transcript(db_conn, ts_query, start_time="-infinity", end_time="infinity
p = 0.02
l = b_p + bus_duration
a = a_p + n_m
# Lomax distribution is posterior predictive for exponential
message_duration_diff = l * ((1 - p)**-(1/a) - 1)
current_result = Result()
results = []
print(message_duration_diff)
# print(message_duration_diff)
for transcript_line in db_results:
# Current result set is new
@ -231,8 +238,8 @@ def get_transcript(db_conn, ts_query, start_time="-infinity", end_time="infinity
current_result.transcript.append(transcript_line)
# New message is within window
elif (transcript_line.start_time - current_result.transcript[-1].end_time).total_seconds() <= message_duration_diff:
print((transcript_line.start_time -
current_result.transcript[-1].end_time).total_seconds())
# print((transcript_line.start_time -
# current_result.transcript[-1].end_time).total_seconds())
current_result.transcript.append(transcript_line)
# New message is outside window
else:
@ -356,11 +363,76 @@ def get_chat(db_conn, ts_query, start_time="-infinity", end_time="infinity"):
return results
def load_result_data(result):
pass
def load_results_data(db_conn, results):
"""
Replace chat and transcript with all entries in result's timeframe.
"""
# ggroup = Pool(size=30)
# results = ggroup.map(lambda result: load_result_data(db_manager, result), results)
result_timespans = [(i, result.start_time, result.end_time) for (i, result) in enumerate(results)]
# Clear lists so we can later insert new lines
for result in results:
result.chat = []
result.transcript = []
cur = db_conn.cursor()
execute_values(cur,
"""
--sql
WITH timespans (id, start_time, end_time) AS (VALUES %s)
SELECT
timespans.id,
pub_time,
content->'tags'->>'display-name' AS name,
content->'params'->>1 AS content FROM timespans JOIN chat ON (pub_time BETWEEN start_time AND end_time);
""",
result_timespans
)
for chat_line in cur:
results[chat_line.id].chat.append(chat_line)
execute_values(cur,
"""
--sql
WITH timespans (id, start_time, end_time) AS (VALUES %s)
SELECT
timespans.id,
buscribe_transcriptions.start_time,
buscribe_transcriptions.end_time,
names,
buscribe_transcriptions.transcription_line
FROM timespans JOIN
buscribe_transcriptions ON (
buscribe_transcriptions.start_time >= timespans.start_time AND buscribe_transcriptions.start_time <= timespans.end_time AND
buscribe_transcriptions.end_time >= timespans.start_time AND buscribe_transcriptions.end_time <= timespans.end_time
)
LEFT OUTER JOIN (
SELECT line,
ARRAY(
SELECT speaker_name
FROM buscribe_line_inferred_speakers AS inner_speakers
WHERE inner_speakers.line = buscribe_line_inferred_speakers.line
) AS names
FROM buscribe_line_inferred_speakers
) AS inferred_speakers ON buscribe_transcriptions.id = inferred_speakers.line;
""",
result_timespans
)
for transcript_line in cur:
results[transcript_line.id].transcript.append(transcript_line)
return results
def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result]):
def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result], limit: int, offset = 0):
"""
Merge different types of results in order of importance.
@ -383,12 +455,9 @@ def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result
else:
transcript_i += 1
# print(vst_result)
while chat_i < len(chat) and chat[chat_i].start_time < vst_result.end_time:
# print(vst_result)
if overlap(vst_result, chat[chat_i]):
vst_result.chat.extend(chat.pop(chat_i).chat)
# print(vst_result)
else:
chat_i += 1
@ -405,7 +474,7 @@ def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result
merged = transcript + vst + chat
merged.sort(key=lambda result: result.start_time)
merged.sort(key=lambda result: result.weight, reverse=True)
return merged
return merged[offset:min((offset + limit), len(merged))]
def overlap(result_a, result_b):

Loading…
Cancel
Save