mirror of https://github.com/ekimekim/wubloader
Import blogbot from previous years
parent
a0ca96aff6
commit
de52f7dbff
@ -0,0 +1,139 @@
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
|
||||
import argh
|
||||
import requests
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
logging.basicConfig(level='INFO')
|
||||
|
||||
class Client(object):
|
||||
def __init__(self, base_url, email, api_key):
|
||||
self.base_url = base_url
|
||||
self.email = email
|
||||
self.api_key = api_key
|
||||
|
||||
def request(self, method, *path, **params):
|
||||
if method == 'GET':
|
||||
args = {"params": params}
|
||||
else:
|
||||
args = {"data": {
|
||||
k: v if isinstance(v, str) else json.dumps(v)
|
||||
for k, v in params.items()
|
||||
}}
|
||||
url = "/".join([self.base_url, "api/v1"] + list(map(str, path)))
|
||||
resp = requests.request(method, url, auth=(self.email, self.api_key), **args)
|
||||
if not resp.ok:
|
||||
logging.info(repr(params))
|
||||
logging.info(f"Got {resp.status_code} for {url}: {resp.text}")
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def html_to_md(html):
|
||||
"""Lossy attempt to convert html to markdown"""
|
||||
if isinstance(html, bs4.Comment):
|
||||
return ""
|
||||
|
||||
if html.name is None:
|
||||
# Raw string, return as-is
|
||||
return html
|
||||
|
||||
if html.name == "br":
|
||||
return "\n"
|
||||
|
||||
if html.name == "hr":
|
||||
return "---"
|
||||
|
||||
if html.name == "img":
|
||||
return html.get("src") + "\n"
|
||||
|
||||
inner = "".join(html_to_md(child) for child in html.children)
|
||||
|
||||
if html.name == "a":
|
||||
return "[{}]({})".format(inner, html.get("href"))
|
||||
|
||||
if html.name == "p":
|
||||
return inner + "\n"
|
||||
|
||||
CHAR_FORMAT = {
|
||||
"b": "**",
|
||||
"strong": "**",
|
||||
"h1": "**",
|
||||
"h2": "**",
|
||||
"h3": "**",
|
||||
"h4": "**",
|
||||
"h5": "**",
|
||||
"h6": "**",
|
||||
"i": "*",
|
||||
"em": "*",
|
||||
"del": "~~",
|
||||
"pre": "`",
|
||||
"code": "`",
|
||||
}
|
||||
if html.name in CHAR_FORMAT:
|
||||
char = CHAR_FORMAT[html.name]
|
||||
return f"{char}{inner}{char}"
|
||||
|
||||
return inner
|
||||
|
||||
def blog_to_md(blog):
|
||||
md_content = html_to_md(BeautifulSoup(blog["content"], "html.parser"))
|
||||
return "\n".join([
|
||||
"Blog Post: [{title}](https://desertbus.org/?id={id})".format(**blog),
|
||||
"Posted by {author} at <time:{date}>".format(**blog),
|
||||
"```quote",
|
||||
md_content,
|
||||
"```",
|
||||
])
|
||||
|
||||
def get_posts():
|
||||
"""Get all blog posts on the front page"""
|
||||
resp = requests.get("https://desertbus.org/wapi/blog/1")
|
||||
resp.raise_for_status()
|
||||
posts = resp.json()["posts"]
|
||||
logging.info("Fetched posts: {}".format(", ".join(post['id'] for post in posts)))
|
||||
return posts
|
||||
|
||||
def send_post(client, stream, topic, post):
|
||||
content = blog_to_md(post)
|
||||
client.request("POST", "messages",
|
||||
type="stream",
|
||||
to=stream,
|
||||
topic=topic,
|
||||
content=content,
|
||||
)
|
||||
|
||||
def main(zulip_url, zulip_email, zulip_key, interval=60, test=False, stream='bot-spam', topic='Blog Posts'):
|
||||
"""Post to zulip each new blog post, checking every INTERVAL seconds.
|
||||
Will not post any posts that already exist, unless --test is given
|
||||
in which case it will print the most recent on startup."""
|
||||
client = Client(zulip_url, zulip_email, zulip_key)
|
||||
seen = set()
|
||||
first = True
|
||||
while True:
|
||||
start = time.time()
|
||||
try:
|
||||
posts = get_posts()
|
||||
except Exception:
|
||||
logging.exception("Failed to get posts")
|
||||
else:
|
||||
if first:
|
||||
seen = set(post['id'] for post in posts)
|
||||
if test:
|
||||
send_post(client, stream, topic, posts[0])
|
||||
first = False
|
||||
else:
|
||||
for post in posts[::-1]:
|
||||
if post['id'] not in seen:
|
||||
send_post(client, stream, topic, post)
|
||||
seen.add(post['id'])
|
||||
remaining = start + interval - time.time()
|
||||
if remaining > 0:
|
||||
time.sleep(remaining)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
argh.dispatch_command(main)
|
Loading…
Reference in New Issue