From: www Date: Sun, 29 Sep 2024 21:29:43 +0000 (+0000) Subject: Mirrored from chen.git X-Git-Url: https://git.chaotic.ninja/gitweb/yakumo_izuru/?a=commitdiff_plain;h=HEAD;p=chen.git Mirrored from chen.git git-svn-id: https://svn.chaotic.ninja/svn/chen-yakumo.izuru@1 45bf2fb3-30f2-7a47-aa86-e6f8f3ae9456 --- 8a75ddbfedddd74f4b750d09fd6a67596f956001 diff --git a/branches/master/.gitignore b/branches/master/.gitignore new file mode 100644 index 0000000..2fa7ce7 --- /dev/null +++ b/branches/master/.gitignore @@ -0,0 +1 @@ +config.ini diff --git a/branches/master/COPYING b/branches/master/COPYING new file mode 100644 index 0000000..5aabf52 --- /dev/null +++ b/branches/master/COPYING @@ -0,0 +1,4 @@ +Freedom License v1 (2023年05月11日) + +全く無限的自由です。 +It's infinite freedom. diff --git a/branches/master/Makefile b/branches/master/Makefile new file mode 100644 index 0000000..870c59d --- /dev/null +++ b/branches/master/Makefile @@ -0,0 +1,10 @@ +# $TheSupernovaDuo$ +all: + @echo "Commands available" + @echo "==================" + @echo "deps -- fetch and install dependencies" + @echo "format -- format code using python-black" +deps: + pip install --user -r requirements.txt +format: + black main.py diff --git a/branches/master/README b/branches/master/README new file mode 100644 index 0000000..35474bd --- /dev/null +++ b/branches/master/README @@ -0,0 +1,20 @@ +chen +==== + +XMPP bot to preview links and file contents. Shikigami of the Shikigami of the Gap Youkai +Based on Angel[1], without the sed(1) and YT redirect features. + +Requirements +------------ + +* Python >= 3.7 + +Run +--- + +* pip3 install --user -r requirements.txt +* $EDITOR config.ini.default (save as config.ini) +* python3 main.py + + +[1]: https://wiki.kalli.st/Angel diff --git a/branches/master/avatar.png b/branches/master/avatar.png new file mode 100644 index 0000000..f3e7231 Binary files /dev/null and b/branches/master/avatar.png differ diff --git a/branches/master/config.ini.default b/branches/master/config.ini.default new file mode 100644 index 0000000..c8348a8 --- /dev/null +++ b/branches/master/config.ini.default @@ -0,0 +1,5 @@ +[chen] +jid = chen@example.com +password = b0TPA55W0rD +nick = Chen +autojoin = room1@muc.example.com room2@muc.example.com room3@muc.example.com diff --git a/branches/master/main.py b/branches/master/main.py new file mode 100644 index 0000000..51acbaa --- /dev/null +++ b/branches/master/main.py @@ -0,0 +1,274 @@ +import requests +import bs4 +import random +import configparser +import re +import io +import os +import mimetypes +import asyncio +from collections import defaultdict +from slixmpp import ClientXMPP +from urllib.parse import urlparse, parse_qs, urlunparse +from pantomime import normalize_mimetype +import ecgi + +parser = "html.parser" +user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0" +accept_lang = "en-US" +data_limit = 786400000 + +headers = { + "user-agent": user_agent, + "Accept-Language": accept_lang, + "Cache-Control": "no-cache", +} +block_list = ( + "localhost", + "127.0.0.1", + "0.0.0.0", + "youtu.be", + "www.youtube.com", + "youtube.com", + "m.youtube.com", + "music.youtube.com", +) +req_list = ( + "http://", + "https://", +) +html_files = ( + "text/html", + "application/xhtml+xml", +) + + +class Lifo(list): + """ + Limited size LIFO array to store messages and urls + """ + + def __init__(self, size): + super().__init__() + self.size = size + + def add(self, item): + self.insert(0, item) + if len(self) > self.size: + self.pop() + + +# Cheeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeen +class ChenBot(ClientXMPP): + commands = {} + muc_commands = {} + + messages = defaultdict( + lambda: { + "messages": Lifo(100), + "links": Lifo(10), + "previews": Lifo(10), + } + ) + + def get_urls(self, msg): + str_list = msg["body"].strip().split() + urls = [u for u in str_list if any(r in u for r in req_list)] + return urls + + async def parse_uri(self, uri, sender, mtype): + """Parse a URI and send the result to the sender.""" + netloc = uri.netloc + if netloc.split(":")[0] in block_list: + return + else: + await self.process_link(uri, sender, mtype) + + async def process_link(self, uri, sender, mtype): + """Process a link and send the result to the sender.""" + url = urlunparse(uri) + r = requests.get(url, stream=True, headers=headers, timeout=5) + if not r.ok: + return + ftype = normalize_mimetype(r.headers.get("content-type")) + if ftype in html_files: + data = "" + for i in r.iter_content(chunk_size=1024, decode_unicode=False): + data += i.decode("utf-8", errors="ignore") + if len(data) > data_limit or "" in data.lower(): + break + soup = bs4.BeautifulSoup(data, parser) + if title := soup.find("title"): + output = title.text.strip() + if output: + output = f"*{output}*" if ("\n" not in output) else output + if output in self.messages[sender]["previews"]: + return + + self.messages[sender]["previews"].add(output) + if r.history: + self.send_message(mto=sender, mbody=r.url, mtype=mtype) + self.send_message(mto=sender, mbody=output, mtype=mtype) + + else: + try: + length = 0 + outfile = io.BytesIO() + for chunk in r.iter_content( + chunk_size=512, + decode_unicode=False, + ): + length += 512 + if length >= data_limit: + return + outfile.write(chunk) + + content_disposition = r.headers.get("content-disposition") + filename = None + if content_disposition: + _, params = ecgi.parse_header(content_disposition) + filename = params.get("filename") + else: + filename = os.path.basename(uri.path) + + ext = os.path.splitext(filename)[1] if filename else ".txt" + fname = filename if filename else f"file{ext}" + await self.embed_file(url, sender, mtype, ftype, fname, outfile) + except Exception as e: + print(e) + + async def embed_file(self, url, sender, mtype, ftype, fname, outfile): + """Embed a file and send the result to the sender.""" + furl = await self.plugin["xep_0363"].upload_file( + fname, content_type=ftype, input_file=outfile + ) + message = self.make_message(sender) + message["body"] = furl + message["type"] = mtype + message["oob"]["url"] = furl + message.send() + + async def parse_urls(self, msg, urls, sender, mtype): + body = msg["body"].lower() + if "nsfl" in body: + return + if "nsfw" in body: + return + if "#nospoil" in body: + return + for u in urls: + if u in self.messages[sender]["links"]: + continue + else: + self.messages[sender]["links"].add(u) + + uri = urlparse(u) + await self.parse_uri(uri, sender, mtype) + + def __init__(self, jid, password, nick, autojoin=None): + ClientXMPP.__init__(self, jid, password) + self.jid = jid + self.nick = nick or [] + self.autojoin = autojoin or [] + self.register_plugin("xep_0030") + self.register_plugin("xep_0060") + self.register_plugin("xep_0054") + self.register_plugin("xep_0045") + self.register_plugin("xep_0066") + self.register_plugin("xep_0084") + self.register_plugin("xep_0153") + self.register_plugin("xep_0363") + + self.add_event_handler("session_start", self.session_start) + self.add_event_handler("message", self.message) + self.add_event_handler("groupchat_message", self.muc_message) + self.add_event_handler("disconnected", lambda _: self.connect()) + + async def session_start(self, event): + """Start the bot.""" + self.send_presence() + await self.get_roster() + await self.update_info() + for channel in self.autojoin: + try: + self.plugin["xep_0045"].join_muc(channel, self.nick) + except Exception as e: + print(e) + + async def update_info(self): + """Update the bot info.""" + with open("avatar.png", "rb") as avatar_file: + avatar = avatar_file.read() + + avatar_type = "image/png" + avatar_id = self.plugin["xep_0084"].generate_id(avatar) + avatar_bytes = len(avatar) + + asyncio.gather(self.plugin["xep_0084"].publish_avatar(avatar)) + asyncio.gather( + self.plugin["xep_0153"].set_avatar( + avatar=avatar, + mtype=avatar_type, + ) + ) + + info = { + "id": avatar_id, + "type": avatar_type, + "bytes": avatar_bytes, + } + + asyncio.gather(self.plugin["xep_0084"].publish_avatar_metadata([info])) + + vcard = self.plugin["xep_0054"].make_vcard() + vcard["URL"] = "https://git.chaotic.ninja/yakumo.izuru/chen" + vcard["DESC"] = "Shikigami of the Shikigami of the Gap Youkai" + vcard["NICKNAME"] = "Chen" + vcard["FN"] = "Chen" + asyncio.gather(self.plugin["xep_0054"].publish_vcard(vcard)) + + async def message(self, msg): + """Process a message.""" + if msg["type"] in ("chat", "normal"): + mtype = "chat" + sender = msg["from"].bare + message = msg["body"] + + ctx = message.strip().split() + + try: + if not msg["oob"]["url"]: + if urls := self.get_urls(msg): + await self.parse_urls(msg, urls, sender, mtype) + except Exception as e: + print(e) + + async def muc_message(self, msg): + """Process a groupchat message.""" + if msg["type"] in ("groupchat", "normal"): + mtype = "groupchat" + sender = msg["from"].bare + if msg["mucnick"] == self.nick: + return + + ctx = msg["body"].strip().split() + + try: + if not msg["oob"]["url"]: + if urls := self.get_urls(msg): + await self.parse_urls(msg, urls, sender, mtype) + except Exception: + pass + + +if __name__ == "__main__": + config = configparser.ConfigParser() + config.read("config.ini") + jid = config["chen"]["jid"] + password = config["chen"]["password"] + nick = config["chen"]["nick"] + autojoin = config["chen"]["autojoin"].split() + bot = ChenBot(jid, password, nick, autojoin=autojoin) + + bot.connect() + bot.process(forever=True) diff --git a/branches/master/requirements.txt b/branches/master/requirements.txt new file mode 100644 index 0000000..9b9d03a --- /dev/null +++ b/branches/master/requirements.txt @@ -0,0 +1,6 @@ +requests +slixmpp +beautifulsoup4 +pantomime +aiohttp +git+https://git.chaotic.ninja/yakumo.izuru/ecgi#egg=ecgi diff --git a/branches/master/run.sh b/branches/master/run.sh new file mode 100755 index 0000000..146b4fa --- /dev/null +++ b/branches/master/run.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# $TheSupernovaDuo$ +echo "Starting bot..." +python-3.9 main.py 2>chen.log diff --git a/branches/master/services/chen.service b/branches/master/services/chen.service new file mode 100644 index 0000000..0c2f885 --- /dev/null +++ b/branches/master/services/chen.service @@ -0,0 +1,19 @@ +# $YakumoLabs$ +# vim: ft=systemd +[Unit] +Description=XMPP bot to preview links and file contents +After=network.target +After=prosody.service +After=ejabberd.service + +[Service] +Type=simple +Restart=always +RestartSec=10 +StartLimitBurst=5 +StartLimitInterval=100 +WorkingDirectory=%h/chen/ +ExecStart=/usr/bin/python main.py + +[Install] +WantedBy=multi-user.target diff --git a/branches/master/services/chen.yml b/branches/master/services/chen.yml new file mode 100644 index 0000000..c75d47a --- /dev/null +++ b/branches/master/services/chen.yml @@ -0,0 +1,6 @@ +# $YakumoLabs$ +# vim: ft=yaml +# +# Note: %h must be replaced by the user's home directory path +cmd: /usr/bin/env python3 main.py +cwd: %h/chen diff --git a/branches/origin-master/.gitignore b/branches/origin-master/.gitignore new file mode 100644 index 0000000..2fa7ce7 --- /dev/null +++ b/branches/origin-master/.gitignore @@ -0,0 +1 @@ +config.ini diff --git a/branches/origin-master/COPYING b/branches/origin-master/COPYING new file mode 100644 index 0000000..5aabf52 --- /dev/null +++ b/branches/origin-master/COPYING @@ -0,0 +1,4 @@ +Freedom License v1 (2023年05月11日) + +全く無限的自由です。 +It's infinite freedom. diff --git a/branches/origin-master/Makefile b/branches/origin-master/Makefile new file mode 100644 index 0000000..870c59d --- /dev/null +++ b/branches/origin-master/Makefile @@ -0,0 +1,10 @@ +# $TheSupernovaDuo$ +all: + @echo "Commands available" + @echo "==================" + @echo "deps -- fetch and install dependencies" + @echo "format -- format code using python-black" +deps: + pip install --user -r requirements.txt +format: + black main.py diff --git a/branches/origin-master/README b/branches/origin-master/README new file mode 100644 index 0000000..35474bd --- /dev/null +++ b/branches/origin-master/README @@ -0,0 +1,20 @@ +chen +==== + +XMPP bot to preview links and file contents. Shikigami of the Shikigami of the Gap Youkai +Based on Angel[1], without the sed(1) and YT redirect features. + +Requirements +------------ + +* Python >= 3.7 + +Run +--- + +* pip3 install --user -r requirements.txt +* $EDITOR config.ini.default (save as config.ini) +* python3 main.py + + +[1]: https://wiki.kalli.st/Angel diff --git a/branches/origin-master/avatar.png b/branches/origin-master/avatar.png new file mode 100644 index 0000000..f3e7231 Binary files /dev/null and b/branches/origin-master/avatar.png differ diff --git a/branches/origin-master/config.ini.default b/branches/origin-master/config.ini.default new file mode 100644 index 0000000..c8348a8 --- /dev/null +++ b/branches/origin-master/config.ini.default @@ -0,0 +1,5 @@ +[chen] +jid = chen@example.com +password = b0TPA55W0rD +nick = Chen +autojoin = room1@muc.example.com room2@muc.example.com room3@muc.example.com diff --git a/branches/origin-master/main.py b/branches/origin-master/main.py new file mode 100644 index 0000000..51acbaa --- /dev/null +++ b/branches/origin-master/main.py @@ -0,0 +1,274 @@ +import requests +import bs4 +import random +import configparser +import re +import io +import os +import mimetypes +import asyncio +from collections import defaultdict +from slixmpp import ClientXMPP +from urllib.parse import urlparse, parse_qs, urlunparse +from pantomime import normalize_mimetype +import ecgi + +parser = "html.parser" +user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0" +accept_lang = "en-US" +data_limit = 786400000 + +headers = { + "user-agent": user_agent, + "Accept-Language": accept_lang, + "Cache-Control": "no-cache", +} +block_list = ( + "localhost", + "127.0.0.1", + "0.0.0.0", + "youtu.be", + "www.youtube.com", + "youtube.com", + "m.youtube.com", + "music.youtube.com", +) +req_list = ( + "http://", + "https://", +) +html_files = ( + "text/html", + "application/xhtml+xml", +) + + +class Lifo(list): + """ + Limited size LIFO array to store messages and urls + """ + + def __init__(self, size): + super().__init__() + self.size = size + + def add(self, item): + self.insert(0, item) + if len(self) > self.size: + self.pop() + + +# Cheeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeen +class ChenBot(ClientXMPP): + commands = {} + muc_commands = {} + + messages = defaultdict( + lambda: { + "messages": Lifo(100), + "links": Lifo(10), + "previews": Lifo(10), + } + ) + + def get_urls(self, msg): + str_list = msg["body"].strip().split() + urls = [u for u in str_list if any(r in u for r in req_list)] + return urls + + async def parse_uri(self, uri, sender, mtype): + """Parse a URI and send the result to the sender.""" + netloc = uri.netloc + if netloc.split(":")[0] in block_list: + return + else: + await self.process_link(uri, sender, mtype) + + async def process_link(self, uri, sender, mtype): + """Process a link and send the result to the sender.""" + url = urlunparse(uri) + r = requests.get(url, stream=True, headers=headers, timeout=5) + if not r.ok: + return + ftype = normalize_mimetype(r.headers.get("content-type")) + if ftype in html_files: + data = "" + for i in r.iter_content(chunk_size=1024, decode_unicode=False): + data += i.decode("utf-8", errors="ignore") + if len(data) > data_limit or "" in data.lower(): + break + soup = bs4.BeautifulSoup(data, parser) + if title := soup.find("title"): + output = title.text.strip() + if output: + output = f"*{output}*" if ("\n" not in output) else output + if output in self.messages[sender]["previews"]: + return + + self.messages[sender]["previews"].add(output) + if r.history: + self.send_message(mto=sender, mbody=r.url, mtype=mtype) + self.send_message(mto=sender, mbody=output, mtype=mtype) + + else: + try: + length = 0 + outfile = io.BytesIO() + for chunk in r.iter_content( + chunk_size=512, + decode_unicode=False, + ): + length += 512 + if length >= data_limit: + return + outfile.write(chunk) + + content_disposition = r.headers.get("content-disposition") + filename = None + if content_disposition: + _, params = ecgi.parse_header(content_disposition) + filename = params.get("filename") + else: + filename = os.path.basename(uri.path) + + ext = os.path.splitext(filename)[1] if filename else ".txt" + fname = filename if filename else f"file{ext}" + await self.embed_file(url, sender, mtype, ftype, fname, outfile) + except Exception as e: + print(e) + + async def embed_file(self, url, sender, mtype, ftype, fname, outfile): + """Embed a file and send the result to the sender.""" + furl = await self.plugin["xep_0363"].upload_file( + fname, content_type=ftype, input_file=outfile + ) + message = self.make_message(sender) + message["body"] = furl + message["type"] = mtype + message["oob"]["url"] = furl + message.send() + + async def parse_urls(self, msg, urls, sender, mtype): + body = msg["body"].lower() + if "nsfl" in body: + return + if "nsfw" in body: + return + if "#nospoil" in body: + return + for u in urls: + if u in self.messages[sender]["links"]: + continue + else: + self.messages[sender]["links"].add(u) + + uri = urlparse(u) + await self.parse_uri(uri, sender, mtype) + + def __init__(self, jid, password, nick, autojoin=None): + ClientXMPP.__init__(self, jid, password) + self.jid = jid + self.nick = nick or [] + self.autojoin = autojoin or [] + self.register_plugin("xep_0030") + self.register_plugin("xep_0060") + self.register_plugin("xep_0054") + self.register_plugin("xep_0045") + self.register_plugin("xep_0066") + self.register_plugin("xep_0084") + self.register_plugin("xep_0153") + self.register_plugin("xep_0363") + + self.add_event_handler("session_start", self.session_start) + self.add_event_handler("message", self.message) + self.add_event_handler("groupchat_message", self.muc_message) + self.add_event_handler("disconnected", lambda _: self.connect()) + + async def session_start(self, event): + """Start the bot.""" + self.send_presence() + await self.get_roster() + await self.update_info() + for channel in self.autojoin: + try: + self.plugin["xep_0045"].join_muc(channel, self.nick) + except Exception as e: + print(e) + + async def update_info(self): + """Update the bot info.""" + with open("avatar.png", "rb") as avatar_file: + avatar = avatar_file.read() + + avatar_type = "image/png" + avatar_id = self.plugin["xep_0084"].generate_id(avatar) + avatar_bytes = len(avatar) + + asyncio.gather(self.plugin["xep_0084"].publish_avatar(avatar)) + asyncio.gather( + self.plugin["xep_0153"].set_avatar( + avatar=avatar, + mtype=avatar_type, + ) + ) + + info = { + "id": avatar_id, + "type": avatar_type, + "bytes": avatar_bytes, + } + + asyncio.gather(self.plugin["xep_0084"].publish_avatar_metadata([info])) + + vcard = self.plugin["xep_0054"].make_vcard() + vcard["URL"] = "https://git.chaotic.ninja/yakumo.izuru/chen" + vcard["DESC"] = "Shikigami of the Shikigami of the Gap Youkai" + vcard["NICKNAME"] = "Chen" + vcard["FN"] = "Chen" + asyncio.gather(self.plugin["xep_0054"].publish_vcard(vcard)) + + async def message(self, msg): + """Process a message.""" + if msg["type"] in ("chat", "normal"): + mtype = "chat" + sender = msg["from"].bare + message = msg["body"] + + ctx = message.strip().split() + + try: + if not msg["oob"]["url"]: + if urls := self.get_urls(msg): + await self.parse_urls(msg, urls, sender, mtype) + except Exception as e: + print(e) + + async def muc_message(self, msg): + """Process a groupchat message.""" + if msg["type"] in ("groupchat", "normal"): + mtype = "groupchat" + sender = msg["from"].bare + if msg["mucnick"] == self.nick: + return + + ctx = msg["body"].strip().split() + + try: + if not msg["oob"]["url"]: + if urls := self.get_urls(msg): + await self.parse_urls(msg, urls, sender, mtype) + except Exception: + pass + + +if __name__ == "__main__": + config = configparser.ConfigParser() + config.read("config.ini") + jid = config["chen"]["jid"] + password = config["chen"]["password"] + nick = config["chen"]["nick"] + autojoin = config["chen"]["autojoin"].split() + bot = ChenBot(jid, password, nick, autojoin=autojoin) + + bot.connect() + bot.process(forever=True) diff --git a/branches/origin-master/requirements.txt b/branches/origin-master/requirements.txt new file mode 100644 index 0000000..9b9d03a --- /dev/null +++ b/branches/origin-master/requirements.txt @@ -0,0 +1,6 @@ +requests +slixmpp +beautifulsoup4 +pantomime +aiohttp +git+https://git.chaotic.ninja/yakumo.izuru/ecgi#egg=ecgi diff --git a/branches/origin-master/run.sh b/branches/origin-master/run.sh new file mode 100755 index 0000000..146b4fa --- /dev/null +++ b/branches/origin-master/run.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# $TheSupernovaDuo$ +echo "Starting bot..." +python-3.9 main.py 2>chen.log diff --git a/branches/origin-master/services/chen.service b/branches/origin-master/services/chen.service new file mode 100644 index 0000000..0c2f885 --- /dev/null +++ b/branches/origin-master/services/chen.service @@ -0,0 +1,19 @@ +# $YakumoLabs$ +# vim: ft=systemd +[Unit] +Description=XMPP bot to preview links and file contents +After=network.target +After=prosody.service +After=ejabberd.service + +[Service] +Type=simple +Restart=always +RestartSec=10 +StartLimitBurst=5 +StartLimitInterval=100 +WorkingDirectory=%h/chen/ +ExecStart=/usr/bin/python main.py + +[Install] +WantedBy=multi-user.target diff --git a/branches/origin-master/services/chen.yml b/branches/origin-master/services/chen.yml new file mode 100644 index 0000000..c75d47a --- /dev/null +++ b/branches/origin-master/services/chen.yml @@ -0,0 +1,6 @@ +# $YakumoLabs$ +# vim: ft=yaml +# +# Note: %h must be replaced by the user's home directory path +cmd: /usr/bin/env python3 main.py +cwd: %h/chen diff --git a/branches/origin/.gitignore b/branches/origin/.gitignore new file mode 100644 index 0000000..2fa7ce7 --- /dev/null +++ b/branches/origin/.gitignore @@ -0,0 +1 @@ +config.ini diff --git a/branches/origin/COPYING b/branches/origin/COPYING new file mode 100644 index 0000000..5aabf52 --- /dev/null +++ b/branches/origin/COPYING @@ -0,0 +1,4 @@ +Freedom License v1 (2023年05月11日) + +全く無限的自由です。 +It's infinite freedom. diff --git a/branches/origin/Makefile b/branches/origin/Makefile new file mode 100644 index 0000000..870c59d --- /dev/null +++ b/branches/origin/Makefile @@ -0,0 +1,10 @@ +# $TheSupernovaDuo$ +all: + @echo "Commands available" + @echo "==================" + @echo "deps -- fetch and install dependencies" + @echo "format -- format code using python-black" +deps: + pip install --user -r requirements.txt +format: + black main.py diff --git a/branches/origin/README b/branches/origin/README new file mode 100644 index 0000000..35474bd --- /dev/null +++ b/branches/origin/README @@ -0,0 +1,20 @@ +chen +==== + +XMPP bot to preview links and file contents. Shikigami of the Shikigami of the Gap Youkai +Based on Angel[1], without the sed(1) and YT redirect features. + +Requirements +------------ + +* Python >= 3.7 + +Run +--- + +* pip3 install --user -r requirements.txt +* $EDITOR config.ini.default (save as config.ini) +* python3 main.py + + +[1]: https://wiki.kalli.st/Angel diff --git a/branches/origin/avatar.png b/branches/origin/avatar.png new file mode 100644 index 0000000..f3e7231 Binary files /dev/null and b/branches/origin/avatar.png differ diff --git a/branches/origin/config.ini.default b/branches/origin/config.ini.default new file mode 100644 index 0000000..c8348a8 --- /dev/null +++ b/branches/origin/config.ini.default @@ -0,0 +1,5 @@ +[chen] +jid = chen@example.com +password = b0TPA55W0rD +nick = Chen +autojoin = room1@muc.example.com room2@muc.example.com room3@muc.example.com diff --git a/branches/origin/main.py b/branches/origin/main.py new file mode 100644 index 0000000..51acbaa --- /dev/null +++ b/branches/origin/main.py @@ -0,0 +1,274 @@ +import requests +import bs4 +import random +import configparser +import re +import io +import os +import mimetypes +import asyncio +from collections import defaultdict +from slixmpp import ClientXMPP +from urllib.parse import urlparse, parse_qs, urlunparse +from pantomime import normalize_mimetype +import ecgi + +parser = "html.parser" +user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0" +accept_lang = "en-US" +data_limit = 786400000 + +headers = { + "user-agent": user_agent, + "Accept-Language": accept_lang, + "Cache-Control": "no-cache", +} +block_list = ( + "localhost", + "127.0.0.1", + "0.0.0.0", + "youtu.be", + "www.youtube.com", + "youtube.com", + "m.youtube.com", + "music.youtube.com", +) +req_list = ( + "http://", + "https://", +) +html_files = ( + "text/html", + "application/xhtml+xml", +) + + +class Lifo(list): + """ + Limited size LIFO array to store messages and urls + """ + + def __init__(self, size): + super().__init__() + self.size = size + + def add(self, item): + self.insert(0, item) + if len(self) > self.size: + self.pop() + + +# Cheeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeen +class ChenBot(ClientXMPP): + commands = {} + muc_commands = {} + + messages = defaultdict( + lambda: { + "messages": Lifo(100), + "links": Lifo(10), + "previews": Lifo(10), + } + ) + + def get_urls(self, msg): + str_list = msg["body"].strip().split() + urls = [u for u in str_list if any(r in u for r in req_list)] + return urls + + async def parse_uri(self, uri, sender, mtype): + """Parse a URI and send the result to the sender.""" + netloc = uri.netloc + if netloc.split(":")[0] in block_list: + return + else: + await self.process_link(uri, sender, mtype) + + async def process_link(self, uri, sender, mtype): + """Process a link and send the result to the sender.""" + url = urlunparse(uri) + r = requests.get(url, stream=True, headers=headers, timeout=5) + if not r.ok: + return + ftype = normalize_mimetype(r.headers.get("content-type")) + if ftype in html_files: + data = "" + for i in r.iter_content(chunk_size=1024, decode_unicode=False): + data += i.decode("utf-8", errors="ignore") + if len(data) > data_limit or "" in data.lower(): + break + soup = bs4.BeautifulSoup(data, parser) + if title := soup.find("title"): + output = title.text.strip() + if output: + output = f"*{output}*" if ("\n" not in output) else output + if output in self.messages[sender]["previews"]: + return + + self.messages[sender]["previews"].add(output) + if r.history: + self.send_message(mto=sender, mbody=r.url, mtype=mtype) + self.send_message(mto=sender, mbody=output, mtype=mtype) + + else: + try: + length = 0 + outfile = io.BytesIO() + for chunk in r.iter_content( + chunk_size=512, + decode_unicode=False, + ): + length += 512 + if length >= data_limit: + return + outfile.write(chunk) + + content_disposition = r.headers.get("content-disposition") + filename = None + if content_disposition: + _, params = ecgi.parse_header(content_disposition) + filename = params.get("filename") + else: + filename = os.path.basename(uri.path) + + ext = os.path.splitext(filename)[1] if filename else ".txt" + fname = filename if filename else f"file{ext}" + await self.embed_file(url, sender, mtype, ftype, fname, outfile) + except Exception as e: + print(e) + + async def embed_file(self, url, sender, mtype, ftype, fname, outfile): + """Embed a file and send the result to the sender.""" + furl = await self.plugin["xep_0363"].upload_file( + fname, content_type=ftype, input_file=outfile + ) + message = self.make_message(sender) + message["body"] = furl + message["type"] = mtype + message["oob"]["url"] = furl + message.send() + + async def parse_urls(self, msg, urls, sender, mtype): + body = msg["body"].lower() + if "nsfl" in body: + return + if "nsfw" in body: + return + if "#nospoil" in body: + return + for u in urls: + if u in self.messages[sender]["links"]: + continue + else: + self.messages[sender]["links"].add(u) + + uri = urlparse(u) + await self.parse_uri(uri, sender, mtype) + + def __init__(self, jid, password, nick, autojoin=None): + ClientXMPP.__init__(self, jid, password) + self.jid = jid + self.nick = nick or [] + self.autojoin = autojoin or [] + self.register_plugin("xep_0030") + self.register_plugin("xep_0060") + self.register_plugin("xep_0054") + self.register_plugin("xep_0045") + self.register_plugin("xep_0066") + self.register_plugin("xep_0084") + self.register_plugin("xep_0153") + self.register_plugin("xep_0363") + + self.add_event_handler("session_start", self.session_start) + self.add_event_handler("message", self.message) + self.add_event_handler("groupchat_message", self.muc_message) + self.add_event_handler("disconnected", lambda _: self.connect()) + + async def session_start(self, event): + """Start the bot.""" + self.send_presence() + await self.get_roster() + await self.update_info() + for channel in self.autojoin: + try: + self.plugin["xep_0045"].join_muc(channel, self.nick) + except Exception as e: + print(e) + + async def update_info(self): + """Update the bot info.""" + with open("avatar.png", "rb") as avatar_file: + avatar = avatar_file.read() + + avatar_type = "image/png" + avatar_id = self.plugin["xep_0084"].generate_id(avatar) + avatar_bytes = len(avatar) + + asyncio.gather(self.plugin["xep_0084"].publish_avatar(avatar)) + asyncio.gather( + self.plugin["xep_0153"].set_avatar( + avatar=avatar, + mtype=avatar_type, + ) + ) + + info = { + "id": avatar_id, + "type": avatar_type, + "bytes": avatar_bytes, + } + + asyncio.gather(self.plugin["xep_0084"].publish_avatar_metadata([info])) + + vcard = self.plugin["xep_0054"].make_vcard() + vcard["URL"] = "https://git.chaotic.ninja/yakumo.izuru/chen" + vcard["DESC"] = "Shikigami of the Shikigami of the Gap Youkai" + vcard["NICKNAME"] = "Chen" + vcard["FN"] = "Chen" + asyncio.gather(self.plugin["xep_0054"].publish_vcard(vcard)) + + async def message(self, msg): + """Process a message.""" + if msg["type"] in ("chat", "normal"): + mtype = "chat" + sender = msg["from"].bare + message = msg["body"] + + ctx = message.strip().split() + + try: + if not msg["oob"]["url"]: + if urls := self.get_urls(msg): + await self.parse_urls(msg, urls, sender, mtype) + except Exception as e: + print(e) + + async def muc_message(self, msg): + """Process a groupchat message.""" + if msg["type"] in ("groupchat", "normal"): + mtype = "groupchat" + sender = msg["from"].bare + if msg["mucnick"] == self.nick: + return + + ctx = msg["body"].strip().split() + + try: + if not msg["oob"]["url"]: + if urls := self.get_urls(msg): + await self.parse_urls(msg, urls, sender, mtype) + except Exception: + pass + + +if __name__ == "__main__": + config = configparser.ConfigParser() + config.read("config.ini") + jid = config["chen"]["jid"] + password = config["chen"]["password"] + nick = config["chen"]["nick"] + autojoin = config["chen"]["autojoin"].split() + bot = ChenBot(jid, password, nick, autojoin=autojoin) + + bot.connect() + bot.process(forever=True) diff --git a/branches/origin/requirements.txt b/branches/origin/requirements.txt new file mode 100644 index 0000000..9b9d03a --- /dev/null +++ b/branches/origin/requirements.txt @@ -0,0 +1,6 @@ +requests +slixmpp +beautifulsoup4 +pantomime +aiohttp +git+https://git.chaotic.ninja/yakumo.izuru/ecgi#egg=ecgi diff --git a/branches/origin/run.sh b/branches/origin/run.sh new file mode 100755 index 0000000..146b4fa --- /dev/null +++ b/branches/origin/run.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# $TheSupernovaDuo$ +echo "Starting bot..." +python-3.9 main.py 2>chen.log diff --git a/branches/origin/services/chen.service b/branches/origin/services/chen.service new file mode 100644 index 0000000..0c2f885 --- /dev/null +++ b/branches/origin/services/chen.service @@ -0,0 +1,19 @@ +# $YakumoLabs$ +# vim: ft=systemd +[Unit] +Description=XMPP bot to preview links and file contents +After=network.target +After=prosody.service +After=ejabberd.service + +[Service] +Type=simple +Restart=always +RestartSec=10 +StartLimitBurst=5 +StartLimitInterval=100 +WorkingDirectory=%h/chen/ +ExecStart=/usr/bin/python main.py + +[Install] +WantedBy=multi-user.target diff --git a/branches/origin/services/chen.yml b/branches/origin/services/chen.yml new file mode 100644 index 0000000..c75d47a --- /dev/null +++ b/branches/origin/services/chen.yml @@ -0,0 +1,6 @@ +# $YakumoLabs$ +# vim: ft=yaml +# +# Note: %h must be replaced by the user's home directory path +cmd: /usr/bin/env python3 main.py +cwd: %h/chen diff --git a/trunk/.gitignore b/trunk/.gitignore new file mode 100644 index 0000000..2fa7ce7 --- /dev/null +++ b/trunk/.gitignore @@ -0,0 +1 @@ +config.ini diff --git a/trunk/COPYING b/trunk/COPYING new file mode 100644 index 0000000..5aabf52 --- /dev/null +++ b/trunk/COPYING @@ -0,0 +1,4 @@ +Freedom License v1 (2023年05月11日) + +全く無限的自由です。 +It's infinite freedom. diff --git a/trunk/Makefile b/trunk/Makefile new file mode 100644 index 0000000..870c59d --- /dev/null +++ b/trunk/Makefile @@ -0,0 +1,10 @@ +# $TheSupernovaDuo$ +all: + @echo "Commands available" + @echo "==================" + @echo "deps -- fetch and install dependencies" + @echo "format -- format code using python-black" +deps: + pip install --user -r requirements.txt +format: + black main.py diff --git a/trunk/README b/trunk/README new file mode 100644 index 0000000..35474bd --- /dev/null +++ b/trunk/README @@ -0,0 +1,20 @@ +chen +==== + +XMPP bot to preview links and file contents. Shikigami of the Shikigami of the Gap Youkai +Based on Angel[1], without the sed(1) and YT redirect features. + +Requirements +------------ + +* Python >= 3.7 + +Run +--- + +* pip3 install --user -r requirements.txt +* $EDITOR config.ini.default (save as config.ini) +* python3 main.py + + +[1]: https://wiki.kalli.st/Angel diff --git a/trunk/avatar.png b/trunk/avatar.png new file mode 100644 index 0000000..f3e7231 Binary files /dev/null and b/trunk/avatar.png differ diff --git a/trunk/config.ini.default b/trunk/config.ini.default new file mode 100644 index 0000000..c8348a8 --- /dev/null +++ b/trunk/config.ini.default @@ -0,0 +1,5 @@ +[chen] +jid = chen@example.com +password = b0TPA55W0rD +nick = Chen +autojoin = room1@muc.example.com room2@muc.example.com room3@muc.example.com diff --git a/trunk/main.py b/trunk/main.py new file mode 100644 index 0000000..51acbaa --- /dev/null +++ b/trunk/main.py @@ -0,0 +1,274 @@ +import requests +import bs4 +import random +import configparser +import re +import io +import os +import mimetypes +import asyncio +from collections import defaultdict +from slixmpp import ClientXMPP +from urllib.parse import urlparse, parse_qs, urlunparse +from pantomime import normalize_mimetype +import ecgi + +parser = "html.parser" +user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0" +accept_lang = "en-US" +data_limit = 786400000 + +headers = { + "user-agent": user_agent, + "Accept-Language": accept_lang, + "Cache-Control": "no-cache", +} +block_list = ( + "localhost", + "127.0.0.1", + "0.0.0.0", + "youtu.be", + "www.youtube.com", + "youtube.com", + "m.youtube.com", + "music.youtube.com", +) +req_list = ( + "http://", + "https://", +) +html_files = ( + "text/html", + "application/xhtml+xml", +) + + +class Lifo(list): + """ + Limited size LIFO array to store messages and urls + """ + + def __init__(self, size): + super().__init__() + self.size = size + + def add(self, item): + self.insert(0, item) + if len(self) > self.size: + self.pop() + + +# Cheeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeen +class ChenBot(ClientXMPP): + commands = {} + muc_commands = {} + + messages = defaultdict( + lambda: { + "messages": Lifo(100), + "links": Lifo(10), + "previews": Lifo(10), + } + ) + + def get_urls(self, msg): + str_list = msg["body"].strip().split() + urls = [u for u in str_list if any(r in u for r in req_list)] + return urls + + async def parse_uri(self, uri, sender, mtype): + """Parse a URI and send the result to the sender.""" + netloc = uri.netloc + if netloc.split(":")[0] in block_list: + return + else: + await self.process_link(uri, sender, mtype) + + async def process_link(self, uri, sender, mtype): + """Process a link and send the result to the sender.""" + url = urlunparse(uri) + r = requests.get(url, stream=True, headers=headers, timeout=5) + if not r.ok: + return + ftype = normalize_mimetype(r.headers.get("content-type")) + if ftype in html_files: + data = "" + for i in r.iter_content(chunk_size=1024, decode_unicode=False): + data += i.decode("utf-8", errors="ignore") + if len(data) > data_limit or "" in data.lower(): + break + soup = bs4.BeautifulSoup(data, parser) + if title := soup.find("title"): + output = title.text.strip() + if output: + output = f"*{output}*" if ("\n" not in output) else output + if output in self.messages[sender]["previews"]: + return + + self.messages[sender]["previews"].add(output) + if r.history: + self.send_message(mto=sender, mbody=r.url, mtype=mtype) + self.send_message(mto=sender, mbody=output, mtype=mtype) + + else: + try: + length = 0 + outfile = io.BytesIO() + for chunk in r.iter_content( + chunk_size=512, + decode_unicode=False, + ): + length += 512 + if length >= data_limit: + return + outfile.write(chunk) + + content_disposition = r.headers.get("content-disposition") + filename = None + if content_disposition: + _, params = ecgi.parse_header(content_disposition) + filename = params.get("filename") + else: + filename = os.path.basename(uri.path) + + ext = os.path.splitext(filename)[1] if filename else ".txt" + fname = filename if filename else f"file{ext}" + await self.embed_file(url, sender, mtype, ftype, fname, outfile) + except Exception as e: + print(e) + + async def embed_file(self, url, sender, mtype, ftype, fname, outfile): + """Embed a file and send the result to the sender.""" + furl = await self.plugin["xep_0363"].upload_file( + fname, content_type=ftype, input_file=outfile + ) + message = self.make_message(sender) + message["body"] = furl + message["type"] = mtype + message["oob"]["url"] = furl + message.send() + + async def parse_urls(self, msg, urls, sender, mtype): + body = msg["body"].lower() + if "nsfl" in body: + return + if "nsfw" in body: + return + if "#nospoil" in body: + return + for u in urls: + if u in self.messages[sender]["links"]: + continue + else: + self.messages[sender]["links"].add(u) + + uri = urlparse(u) + await self.parse_uri(uri, sender, mtype) + + def __init__(self, jid, password, nick, autojoin=None): + ClientXMPP.__init__(self, jid, password) + self.jid = jid + self.nick = nick or [] + self.autojoin = autojoin or [] + self.register_plugin("xep_0030") + self.register_plugin("xep_0060") + self.register_plugin("xep_0054") + self.register_plugin("xep_0045") + self.register_plugin("xep_0066") + self.register_plugin("xep_0084") + self.register_plugin("xep_0153") + self.register_plugin("xep_0363") + + self.add_event_handler("session_start", self.session_start) + self.add_event_handler("message", self.message) + self.add_event_handler("groupchat_message", self.muc_message) + self.add_event_handler("disconnected", lambda _: self.connect()) + + async def session_start(self, event): + """Start the bot.""" + self.send_presence() + await self.get_roster() + await self.update_info() + for channel in self.autojoin: + try: + self.plugin["xep_0045"].join_muc(channel, self.nick) + except Exception as e: + print(e) + + async def update_info(self): + """Update the bot info.""" + with open("avatar.png", "rb") as avatar_file: + avatar = avatar_file.read() + + avatar_type = "image/png" + avatar_id = self.plugin["xep_0084"].generate_id(avatar) + avatar_bytes = len(avatar) + + asyncio.gather(self.plugin["xep_0084"].publish_avatar(avatar)) + asyncio.gather( + self.plugin["xep_0153"].set_avatar( + avatar=avatar, + mtype=avatar_type, + ) + ) + + info = { + "id": avatar_id, + "type": avatar_type, + "bytes": avatar_bytes, + } + + asyncio.gather(self.plugin["xep_0084"].publish_avatar_metadata([info])) + + vcard = self.plugin["xep_0054"].make_vcard() + vcard["URL"] = "https://git.chaotic.ninja/yakumo.izuru/chen" + vcard["DESC"] = "Shikigami of the Shikigami of the Gap Youkai" + vcard["NICKNAME"] = "Chen" + vcard["FN"] = "Chen" + asyncio.gather(self.plugin["xep_0054"].publish_vcard(vcard)) + + async def message(self, msg): + """Process a message.""" + if msg["type"] in ("chat", "normal"): + mtype = "chat" + sender = msg["from"].bare + message = msg["body"] + + ctx = message.strip().split() + + try: + if not msg["oob"]["url"]: + if urls := self.get_urls(msg): + await self.parse_urls(msg, urls, sender, mtype) + except Exception as e: + print(e) + + async def muc_message(self, msg): + """Process a groupchat message.""" + if msg["type"] in ("groupchat", "normal"): + mtype = "groupchat" + sender = msg["from"].bare + if msg["mucnick"] == self.nick: + return + + ctx = msg["body"].strip().split() + + try: + if not msg["oob"]["url"]: + if urls := self.get_urls(msg): + await self.parse_urls(msg, urls, sender, mtype) + except Exception: + pass + + +if __name__ == "__main__": + config = configparser.ConfigParser() + config.read("config.ini") + jid = config["chen"]["jid"] + password = config["chen"]["password"] + nick = config["chen"]["nick"] + autojoin = config["chen"]["autojoin"].split() + bot = ChenBot(jid, password, nick, autojoin=autojoin) + + bot.connect() + bot.process(forever=True) diff --git a/trunk/requirements.txt b/trunk/requirements.txt new file mode 100644 index 0000000..9b9d03a --- /dev/null +++ b/trunk/requirements.txt @@ -0,0 +1,6 @@ +requests +slixmpp +beautifulsoup4 +pantomime +aiohttp +git+https://git.chaotic.ninja/yakumo.izuru/ecgi#egg=ecgi diff --git a/trunk/run.sh b/trunk/run.sh new file mode 100755 index 0000000..146b4fa --- /dev/null +++ b/trunk/run.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# $TheSupernovaDuo$ +echo "Starting bot..." +python-3.9 main.py 2>chen.log diff --git a/trunk/services/chen.service b/trunk/services/chen.service new file mode 100644 index 0000000..0c2f885 --- /dev/null +++ b/trunk/services/chen.service @@ -0,0 +1,19 @@ +# $YakumoLabs$ +# vim: ft=systemd +[Unit] +Description=XMPP bot to preview links and file contents +After=network.target +After=prosody.service +After=ejabberd.service + +[Service] +Type=simple +Restart=always +RestartSec=10 +StartLimitBurst=5 +StartLimitInterval=100 +WorkingDirectory=%h/chen/ +ExecStart=/usr/bin/python main.py + +[Install] +WantedBy=multi-user.target diff --git a/trunk/services/chen.yml b/trunk/services/chen.yml new file mode 100644 index 0000000..c75d47a --- /dev/null +++ b/trunk/services/chen.yml @@ -0,0 +1,6 @@ +# $YakumoLabs$ +# vim: ft=yaml +# +# Note: %h must be replaced by the user's home directory path +cmd: /usr/bin/env python3 main.py +cwd: %h/chen