|
|
|
@ -1,8 +1,13 @@
|
|
|
|
|
|
|
|
|
|
from collections import namedtuple
|
|
|
|
|
import flask as flask
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from common import database
|
|
|
|
|
|
|
|
|
|
from gevent.pool import Pool
|
|
|
|
|
|
|
|
|
|
from psycopg2.extras import execute_values
|
|
|
|
|
|
|
|
|
|
app = flask.Flask('escher')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -219,11 +224,13 @@ def get_transcript(db_conn, ts_query, start_time="-infinity", end_time="infinity
|
|
|
|
|
p = 0.02
|
|
|
|
|
l = b_p + bus_duration
|
|
|
|
|
a = a_p + n_m
|
|
|
|
|
|
|
|
|
|
# Lomax distribution is posterior predictive for exponential
|
|
|
|
|
message_duration_diff = l * ((1 - p)**-(1/a) - 1)
|
|
|
|
|
|
|
|
|
|
current_result = Result()
|
|
|
|
|
results = []
|
|
|
|
|
print(message_duration_diff)
|
|
|
|
|
# print(message_duration_diff)
|
|
|
|
|
for transcript_line in db_results:
|
|
|
|
|
|
|
|
|
|
# Current result set is new
|
|
|
|
@ -231,8 +238,8 @@ def get_transcript(db_conn, ts_query, start_time="-infinity", end_time="infinity
|
|
|
|
|
current_result.transcript.append(transcript_line)
|
|
|
|
|
# New message is within window
|
|
|
|
|
elif (transcript_line.start_time - current_result.transcript[-1].end_time).total_seconds() <= message_duration_diff:
|
|
|
|
|
print((transcript_line.start_time -
|
|
|
|
|
current_result.transcript[-1].end_time).total_seconds())
|
|
|
|
|
# print((transcript_line.start_time -
|
|
|
|
|
# current_result.transcript[-1].end_time).total_seconds())
|
|
|
|
|
current_result.transcript.append(transcript_line)
|
|
|
|
|
# New message is outside window
|
|
|
|
|
else:
|
|
|
|
@ -356,11 +363,76 @@ def get_chat(db_conn, ts_query, start_time="-infinity", end_time="infinity"):
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_result_data(result):
|
|
|
|
|
pass
|
|
|
|
|
def load_results_data(db_conn, results):
|
|
|
|
|
"""
|
|
|
|
|
Replace chat and transcript with all entries in result's timeframe.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# ggroup = Pool(size=30)
|
|
|
|
|
|
|
|
|
|
# results = ggroup.map(lambda result: load_result_data(db_manager, result), results)
|
|
|
|
|
|
|
|
|
|
result_timespans = [(i, result.start_time, result.end_time) for (i, result) in enumerate(results)]
|
|
|
|
|
|
|
|
|
|
# Clear lists so we can later insert new lines
|
|
|
|
|
for result in results:
|
|
|
|
|
result.chat = []
|
|
|
|
|
result.transcript = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cur = db_conn.cursor()
|
|
|
|
|
|
|
|
|
|
execute_values(cur,
|
|
|
|
|
"""
|
|
|
|
|
--sql
|
|
|
|
|
WITH timespans (id, start_time, end_time) AS (VALUES %s)
|
|
|
|
|
SELECT
|
|
|
|
|
timespans.id,
|
|
|
|
|
pub_time,
|
|
|
|
|
content->'tags'->>'display-name' AS name,
|
|
|
|
|
content->'params'->>1 AS content FROM timespans JOIN chat ON (pub_time BETWEEN start_time AND end_time);
|
|
|
|
|
""",
|
|
|
|
|
result_timespans
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for chat_line in cur:
|
|
|
|
|
results[chat_line.id].chat.append(chat_line)
|
|
|
|
|
|
|
|
|
|
execute_values(cur,
|
|
|
|
|
"""
|
|
|
|
|
--sql
|
|
|
|
|
WITH timespans (id, start_time, end_time) AS (VALUES %s)
|
|
|
|
|
SELECT
|
|
|
|
|
timespans.id,
|
|
|
|
|
buscribe_transcriptions.start_time,
|
|
|
|
|
buscribe_transcriptions.end_time,
|
|
|
|
|
names,
|
|
|
|
|
buscribe_transcriptions.transcription_line
|
|
|
|
|
FROM timespans JOIN
|
|
|
|
|
buscribe_transcriptions ON (
|
|
|
|
|
buscribe_transcriptions.start_time >= timespans.start_time AND buscribe_transcriptions.start_time <= timespans.end_time AND
|
|
|
|
|
buscribe_transcriptions.end_time >= timespans.start_time AND buscribe_transcriptions.end_time <= timespans.end_time
|
|
|
|
|
)
|
|
|
|
|
LEFT OUTER JOIN (
|
|
|
|
|
SELECT line,
|
|
|
|
|
ARRAY(
|
|
|
|
|
SELECT speaker_name
|
|
|
|
|
FROM buscribe_line_inferred_speakers AS inner_speakers
|
|
|
|
|
WHERE inner_speakers.line = buscribe_line_inferred_speakers.line
|
|
|
|
|
) AS names
|
|
|
|
|
FROM buscribe_line_inferred_speakers
|
|
|
|
|
) AS inferred_speakers ON buscribe_transcriptions.id = inferred_speakers.line;
|
|
|
|
|
""",
|
|
|
|
|
result_timespans
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
for transcript_line in cur:
|
|
|
|
|
results[transcript_line.id].transcript.append(transcript_line)
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result]):
|
|
|
|
|
def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result], limit: int, offset = 0):
|
|
|
|
|
"""
|
|
|
|
|
Merge different types of results in order of importance.
|
|
|
|
|
|
|
|
|
@ -383,12 +455,9 @@ def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result
|
|
|
|
|
else:
|
|
|
|
|
transcript_i += 1
|
|
|
|
|
|
|
|
|
|
# print(vst_result)
|
|
|
|
|
while chat_i < len(chat) and chat[chat_i].start_time < vst_result.end_time:
|
|
|
|
|
# print(vst_result)
|
|
|
|
|
if overlap(vst_result, chat[chat_i]):
|
|
|
|
|
vst_result.chat.extend(chat.pop(chat_i).chat)
|
|
|
|
|
# print(vst_result)
|
|
|
|
|
else:
|
|
|
|
|
chat_i += 1
|
|
|
|
|
|
|
|
|
@ -405,7 +474,7 @@ def merge_results(transcript: list[Result], vst: list[Result], chat: list[Result
|
|
|
|
|
merged = transcript + vst + chat
|
|
|
|
|
merged.sort(key=lambda result: result.start_time)
|
|
|
|
|
merged.sort(key=lambda result: result.weight, reverse=True)
|
|
|
|
|
return merged
|
|
|
|
|
return merged[offset:min((offset + limit), len(merged))]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def overlap(result_a, result_b):
|
|
|
|
|