From 54fd356b39a328c3a89eca81174ed83974359b55 Mon Sep 17 00:00:00 2001 From: Mike Lang Date: Fri, 23 May 2025 18:25:15 +1000 Subject: [PATCH] Add youtubebot This adds a zulip bot that polls the youtube API for new comment threads, and posts them to Zulip. Some limitations: - It doesn't keep any state, so it won't post anything it "missed" while not running. - It can only find top-level comments, not replies - For quota reasons, we shouldn't poll more often than every 1 minute (at this rate we consume approx 1 upload worth of quota per day) - If somehow there are more than 100 comments within 1 minute, it will miss all but the last 100. --- docker-compose.jsonnet | 16 +++++ zulip_bots/zulip_bots/youtubebot.py | 107 ++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) create mode 100644 zulip_bots/zulip_bots/youtubebot.py diff --git a/docker-compose.jsonnet b/docker-compose.jsonnet index 3031ee9..7a1ee87 100644 --- a/docker-compose.jsonnet +++ b/docker-compose.jsonnet @@ -37,6 +37,7 @@ pubbot: false, blogbot: false, prizebot: false, + youtubebot: false, bus_analyzer: false, graphs: false, }, @@ -358,6 +359,13 @@ state_path:: "./prizebot_state.json", }, + youtubebot:: { + zulip_email: "youtube-bot@chat.videostrike.team", + zulip_api_key: "", + google_credentials_file: $.cutter_creds_file, + channel_id: "UCz5-PNQxaT4WtB_OMAwD85g", // DesertBusForHope + }, + // template for donation data urls donation_url_template:: "https://example.com/DB{}/DB{}.json", @@ -805,6 +813,14 @@ volumes: ["%s:%s" % [$.prizebot.state_path, $.prizebot.state]], }, + [if $.enabled.youtubebot then "youtubebot"]: + bot_service("youtubebot", $.youtubebot + { + zulip_url: $.zulip_url, + google_credentials_file: "/creds.json", + }) + { + volumes: ["%s:/creds.json" % $.youtubebot.google_credentials_file], + }, + }, } diff --git a/zulip_bots/zulip_bots/youtubebot.py b/zulip_bots/zulip_bots/youtubebot.py new file mode 100644 index 0000000..25e42e3 --- /dev/null +++ b/zulip_bots/zulip_bots/youtubebot.py @@ -0,0 +1,107 @@ + +import gevent.monkey +gevent.monkey.patch_all() + +import json +import logging +import time + +from common.googleapis import GoogleAPIClient + +from .config import get_config +from .zulip import Client + +def get_comments(google, channel_id): + resp = google.request("GET", + "https://www.googleapis.com/youtube/v3/commentThreads", + params={ + "part": "snippet", + "allThreadsRelatedToChannelId": channel_id, + "maxResults": "100", + "textFormat": "plainText", + } + ) + resp.raise_for_status() + items = resp.json()["items"][::-1] # flip direction so we get earliest first + if items: + earliest = items[0]["snippet"]["topLevelComment"] + logging.info(f"Got {len(items)} comment threads, oldest is {earliest['id']} at {earliest['snippet']['publishedAt']}") + else: + logging.info("Got no comment threads") + # We could look at replies, but since we can only check for new replies in the first 100 threads, + # we'd rather just never show them than confuse people when they don't show up sometimes. + comments = [] + for thread in items: + logging.debug(f"Got thread: {json.dumps(thread)}") + comment = thread["snippet"]["topLevelComment"] + comment["videoId"] = thread["snippet"]["videoId"] + comments.append(comment) + return comments + + +def show_comment(zulip, stream, topic, comment): + c = comment["snippet"] + author = f"[{c['authorDisplayName']}]({c['authorChannelUrl']})" + video = f"https://youtu.be/{comment['videoId']}" + message = f"{author} commented on {video}:\n```quote\n{c['textDisplay']}\n```" + logging.info(f"Sending message to {stream}/{topic}: {message!r}") + # Empty stream acts as a dry-run mode + if stream: + zulip.send_to_stream(stream, topic, message) + + +def main(conf_file, interval=60, one_off=0, stream="bot-spam", topic="Youtube Comments", keep=1000, log="INFO"): + """Config: + zulip_url + zulip_email + zulip_api_key + channel_id + google_credentials_file: + Path to json file containing at least: + client_id + client_secret + refresh_token + These creds should be authed as the target account with Youtube Data API read perms + + In one-off=N mode, get the last N comments and show them, then exit. + """ + logging.basicConfig(level=log) + + config = get_config(conf_file) + zulip = Client(config["zulip_url"], config["zulip_email"], config["zulip_api_key"]) + with open(config["google_credentials_file"]) as f: + credentials = json.load(f) + google = GoogleAPIClient(credentials["client_id"], credentials["client_secret"], credentials["refresh_token"]) + channel_id = config["channel_id"] + + if one_off: + comments = get_comments(google, channel_id) + for comment in comments[-one_off:]: + show_comment(zulip, stream, topic, comment) + return + + seen = None + while True: + start = time.monotonic() + + if seen is None: + # Get latest messages as of startup, so we know what's new next time + seen = [comment["id"] for comment in get_comments(google, channel_id)] + else: + for comment in get_comments(google, channel_id): + if comment["id"] in seen: + logging.debug(f"Comment {comment['id']} already seen, skipping") + continue + show_comment(zulip, stream, topic, comment) + seen.append(comment["id"]) + seen = seen[-keep:] + + remaining = start + interval - time.monotonic() + logging.debug(f"Keeping {len(seen)} seen, waiting {remaining:.2f}s") + if remaining > 0: + time.sleep(remaining) + + +if __name__ == '__main__': + import argh + argh.dispatch_command(main)