diff --git a/.gitignore b/.gitignore index b4794e7..3f140f0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__ config.yml conf.d .venv +bot_session.session diff --git a/README.md b/README.md index 20111a7..810a32d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,19 @@ # pgbot Use `config.yml` to set it up. It needs a list of regex with tokens and a database to match them. This doc is probably going to be abandoned right away, but at least I have added this line I dunno. + +Dependencies: +* aiocron - for the scheduler +* bs4 - for parsing html +* fake_headers - for tricking search engines +* psycopg - for pgsql +* pyyaml - for config parsing +* requests - for http requests +* telethon - for interacting with bot api + +Initial setup: +``` +python -m venv .venv +source .venv/bin/activate +pip install aiocron bs4 fake_headers psycopg pyyaml requests telethon +``` diff --git a/pgbot b/pgbot index 40f9b82..c61e283 100755 --- a/pgbot +++ b/pgbot @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import asyncio import sys import threading @@ -8,9 +9,9 @@ import yaml import pgbotlib.dbstuff import pgbotlib.commands +import pgbotlib.cron import pgbotlib.misc import pgbotlib.response -import pgbotlib.sched def init(args: list) -> tuple: @@ -25,8 +26,6 @@ def init(args: list) -> tuple: 'bot_session', config['api_id'], config['api_hash']).start(bot_token=config['bot_token']) - # db_conn = pgbotlib.dbstuff.DBConn( - # f'dbname={config['db_name']} user={config['db_user']}') db_conn = pgbotlib.dbstuff.DBConn(config['db_spec']) return config, db_conn, client @@ -35,27 +34,22 @@ def init(args: list) -> tuple: def main(): config, db_conn, client = init(sys.argv[1:]) - responder = pgbotlib.response.Responder(config, client, db_conn) + namegen = pgbotlib.misc.NameGenerator(config, db_conn) + responder = pgbotlib.response.Responder(config, client, db_conn, namegen) commander = pgbotlib.commands.Commander(config, client, config['admins'], - db_conn, responder) - - sched_thread = threading.Thread( - target=pgbotlib.sched.spawn_scheduler, - args=(config, client, responder), - daemon=True) - sched_thread.start() + db_conn, namegen, responder) @client.on(telethon.events.NewMessage()) async def handle_new_message(event): - chat = await event.get_chat() - result = await client.get_messages(chat.id, ids=[event.message.reply_to.reply_to_msg_id]) - print(result) - if event.message.text.startswith('/'): + if event.message.text.startswith('.'): await commander.action(event) else: await responder.respond(event) - client.run_until_disconnected() + cron = pgbotlib.cron.Cron(config, client, responder) + cron.plan() + loop = asyncio.get_event_loop() + loop.run_forever() if __name__ == '__main__': diff --git a/pgbotlib/api.py b/pgbotlib/api.py index 848f3c0..7172c6d 100644 --- a/pgbotlib/api.py +++ b/pgbotlib/api.py @@ -3,10 +3,11 @@ import json import random import re +import typing -import requests import bs4 import fake_headers +import requests import pgbotlib.dbstuff @@ -27,16 +28,17 @@ class ApiWrapper: # this is the entry point for the api calls # if you add another api, make sure there is a match here - def call(self, api: str, data: str | None, message: str) -> str: - match api: - case 'img_url': return self.format_img(data) - case 'gif': return self.get_gif() - case 'kmp': return self.get_kmp() - case 'fga': return self.get_fga() - case 'fakenews': return self.get_fakenews() - case 'anek': return self.get_anek() - case 'y_search': return self.y_search(message) - case _: return self.FAILED + # this could have used match - case statement, but python 3.9 + def call(self, api: str, data: typing.Union[str, None], + message: str) -> str: + if api == 'img_url': return self.format_img(data) + elif api == 'gif': return self.get_gif() + elif api == 'kmp': return self.get_kmp() + elif api == 'fga': return self.get_fga() + elif api == 'fakenews': return self.get_fakenews() + elif api == 'anek': return self.get_anek() + elif api == 'y_search': return self.y_search(message) + return self.FAILED def __sanitize_search(self, message: str) -> str: """Removes one of each of the search tokens from the query @@ -67,14 +69,14 @@ class ApiWrapper: 'isize': 'medium'}, headers=self.headers.generate()) parser = bs4.BeautifulSoup(request.text, 'html.parser') - items_place = parser.find('div', {'class': 'serp-list'}) - items = items_place.find_all('div', {'class': 'serp-item'}) + items_tag = parser.find('div', {'role': 'main'}) + items_full = json.loads(items_tag.find('div')['data-state']) + items = items_full['initialState']['serpList']['items']['entities'] images = [] - for item in items: - data = json.loads(item.get('data-bem')) - images.append(data['serp-item']['img_href']) + for item in items.values(): + images.append(item.get('origUrl')) if not images: - return None + return self.FAILED result = random.choice(images) return f'[url]({result})' diff --git a/pgbotlib/commands.py b/pgbotlib/commands.py index 360b938..1dfe22b 100644 --- a/pgbotlib/commands.py +++ b/pgbotlib/commands.py @@ -1,33 +1,64 @@ """ Respond to commands """ import telethon +import telethon.utils -import pgbotlib.api import pgbotlib.dbstuff +import pgbotlib.misc import pgbotlib.response +# TODO: quote via response? +# chat = await event.get_chat() +# result = await client.get_messages(chat.id, ids=[event.message.reply_to.reply_to_msg_id]) +# print(result) class Commander: - T_START = frozenset(['start_cmd']) - T_STOP = frozenset(['stop_cmd']) + T_START = frozenset(['cmd_start']) + T_START_E = frozenset(['cmd_start_enabled']) + T_STOP = frozenset(['cmd_stop']) + T_STOP_D = frozenset(['cmd_stop_disabled']) + NOPE = "а ты что ещё за хуй с горы?" + YEP = "да, господин!" + DOC = """ + Команды: + __.start__ + запустить бота + __.stop__ + остановить бота + __.list__ + перечислить доступные токены + __.regex token__ + перечислить регулярные выражения, относящиеся к токену + __.chat__ + получить id текущего чата + __.users__ + перечислить id пользователей + __.add token1[,token2,...] your phrase here__ + добавить фразу your phrase here для реакции на токены + __.adduser id имя__ + добавить пользователю имя + __.help__ + вывести этот текст + """ def __init__(self, config: dict, client: telethon.TelegramClient, admins: list, db_conn: pgbotlib.dbstuff.DBConn, + namegen: pgbotlib.misc.NameGenerator, responder: pgbotlib.response.Responder) -> None: - self.config = config + self.chats = config['chats'] self.client = client self.admins = admins self.db_conn = db_conn + self.namegen = namegen self.responder = responder self.available_tokens = [ str(token) for token, _ in self.responder.tokens] - def __add_entry(self, caller: int, command: str) -> bool: + def __add_response(self, caller: int, command: str) -> bool: if caller not in self.admins: - print('fuck off!') - return None + return self.NOPE input_tokens, phrase = command.strip().split(' ', 1) input_tokenset = frozenset(input_tokens.split(',')) for token in input_tokenset: @@ -35,27 +66,78 @@ class Commander: return False query = 'INSERT INTO responses (tokens, response) values (%s,%s)' values = (','.join(sorted(input_tokenset)), phrase.strip()) - return self.db_conn.update(query, values) + self.db_conn.update(query, values) + return self.YEP + + def __add_user(self, caller: int, userspec: str) -> bool: + if caller not in self.admins: + return self.NOPE + user_id, names = userspec.strip().split(' ', 1) + for name in names.strip().split(','): + query = 'INSERT INTO names (tg_id, name) values(%s,%s)' + values = (user_id, name) + self.db_conn.update(query, values) + return self.YEP + + + def __start_response(self) -> str: + if self.responder.is_enabled(): + return self.responder.get_response(self.T_START_E) + return self.responder.get_response(self.T_START) + + def __stop_response(self) -> str: + if self.responder.is_enabled(): + return self.responder.get_response(self.T_STOP) + return self.responder.get_response(self.T_STOP_D) + + def __list_users(self, users: list) -> str: + userlist = [f'{user.id}: {self.namegen.get_tg_name(user)}' + for user in users] + return '\n'.join(userlist) + + def __list_regex(self, token: str) -> str: + for t, r in self.responder.tokens: + if token == t: + regexlist = [i.pattern for i in r] + return '\n'.join(regexlist) + return 'not found!' async def action(self, event: telethon.events.common.EventBuilder) -> None: + chat_id = telethon.utils.get_peer_id(event.message.peer_id) + if chat_id not in self.chats: + return None command = event.message.text sender = await event.get_sender() response = None - match command: - case command if command.startswith('/add '): - if self.__add_entry(sender.id, command[5:]): - response = 'success' - else: - response = 'failure' - case '/list': - response = ', '.join(self.available_tokens) - case '/start': - self.responder.enable() - response = self.responder.get_response(self.T_START) - case '/stop': - self.responder.disable() - response = self.responder.get_response(self.T_STOP) + if command.startswith('.add '): + try: + response = self.__add_response(sender.id, command[5:]) + except Exception as e: + response = str(e) + elif command.startswith('.adduser '): + try: + response = self.__add_user(sender.id, command[9:]) + except Exception as e: + response = str(e) + elif command == '.chat': + response = str(chat_id) + elif command == '.list': + response = ', '.join(self.available_tokens) + elif command.startswith('.regex '): + response = self.__list_regex(command[7:].strip()) + elif command == '.users': + users = await self.client.get_participants( + entity=event.message.peer_id) + response = self.__list_users(users) + elif command == '.start': + response = self.__start_response() + self.responder.enable() + elif command == '.stop': + response = self.__stop_response() + self.responder.disable() + elif command == '.help': + response = self.DOC if response: await self.client.send_message(event.message.peer_id, response) return None diff --git a/pgbotlib/cron.py b/pgbotlib/cron.py new file mode 100644 index 0000000..1959293 --- /dev/null +++ b/pgbotlib/cron.py @@ -0,0 +1,33 @@ +import asyncio +import random + +import yaml +import aiocron +import telethon +import pgbotlib.response + + +class Cron: + def __init__(self, + config: dict, + client: telethon.TelegramClient, + responder: pgbotlib.response.Responder) -> None: + with open(config['schedule'], 'r', encoding='utf-8') as data: + self.sched = yaml.safe_load(data.read()) + self.responder = responder + self.client = client + + def __mkjob(self, job: dict) -> callable: + tokens = frozenset(job['tokens'].split(',')) + async def send_message() -> None: + if 'rand' in job: + wait_seconds = random.randint(0, job['rand']) * 60 + await asyncio.sleep(wait_seconds) + message = self.responder.get_response(tokens) + message = self.responder.api_match(message, '') + await self.client.send_message(job['chat'], message) + return send_message + + def plan(self) -> None: + for job in self.sched: + aiocron.crontab(job['cron'], func=self.__mkjob(job)) diff --git a/pgbotlib/dbstuff.py b/pgbotlib/dbstuff.py index e207fae..510ec7c 100644 --- a/pgbotlib/dbstuff.py +++ b/pgbotlib/dbstuff.py @@ -7,9 +7,19 @@ class DBConn: self.connection = psycopg.connect(*args, **kwargs) self.cursor = self.connection.cursor() - def update(self, query: str, values: tuple) -> list: - self.cursor.execute(query, values) - return self.connection.commit() + def update(self, query: str, values: tuple) -> None: + failure = None + try: + self.cursor.execute('SAVEPOINT sp1') + self.cursor.execute(query, values) + except Exception as e: + failure = e + self.cursor.execute('ROLLBACK TO SAVEPOINT sp1') + else: + self.cursor.execute('RELEASE SAVEPOINT sp1') + self.connection.commit() + if failure: + raise failure def query_raw(self, query: str, values: tuple) -> list: self.cursor.execute(query, values) diff --git a/pgbotlib/misc.py b/pgbotlib/misc.py index cc51a94..47eda4a 100644 --- a/pgbotlib/misc.py +++ b/pgbotlib/misc.py @@ -1,6 +1,5 @@ import telethon import pgbotlib.dbstuff -import pgbotlib.response class NameGenerator: diff --git a/pgbotlib/response.py b/pgbotlib/response.py index a46559f..9c02643 100644 --- a/pgbotlib/response.py +++ b/pgbotlib/response.py @@ -4,6 +4,7 @@ import telethon import yaml import pgbotlib.api import pgbotlib.dbstuff +import pgbotlib.misc def get_token(token_name: str, token_regex: list) -> tuple: @@ -22,18 +23,20 @@ def get_tokens(path: str) -> list: class Responder: def __init__(self, config: dict, client: telethon.TelegramClient, - db_connection: pgbotlib.dbstuff.DBConn) -> None: + db_connection: pgbotlib.dbstuff.DBConn, + namegen: pgbotlib.misc.NameGenerator) -> None: # apiregex matches "{apiname}optional data" # message itself is also passed to the api call method - self.started = True + self.enabled = True self.apiregex = re.compile(r'^\{(\w+)\}(.+)?$') self.namegen = pgbotlib.misc.NameGenerator(config, db_connection) self.tokens = get_tokens(config['response_tokens']) + self.chats = config['chats'] self.api = pgbotlib.api.ApiWrapper(self.tokens, db_connection) self.db_connection = db_connection self.client = client - def __tokenize(self, message: str) -> frozenset: + def tokenize(self, message: str) -> frozenset: tokens = set() for token, regexi in self.tokens: for regex in regexi: @@ -54,10 +57,13 @@ class Responder: "SELECT response FROM responses WHERE tokens = %s", (key,)) def enable(self) -> None: - self.started = True + self.enabled = True def disable(self) -> None: - self.started = False + self.enabled = False + + def is_enabled(self) -> bool: + return self.enabled def get_response(self, tokens: frozenset) -> str: counter = 0 @@ -93,10 +99,13 @@ class Responder: async def respond(self, event: telethon.events.common.EventBuilder) -> None: - if not self.started: + if not self.enabled: + return None + chat_id = telethon.utils.get_peer_id(event.message.peer_id) + if chat_id not in self.chats: return None message = event.message.text.lower() - tokens = self.__tokenize(message) + tokens = self.tokenize(message) response = self.get_response(tokens) if not response: return None diff --git a/pgbotlib/sched.py b/pgbotlib/sched.py deleted file mode 100644 index 9b3ada2..0000000 --- a/pgbotlib/sched.py +++ /dev/null @@ -1,70 +0,0 @@ -import asyncio -import time -import random - -import yaml -import schedule -import telethon -import pgbotlib.response - - -class Scheduler: - def __init__(self, - config: dict, - client: telethon.TelegramClient, - responder: pgbotlib.response.Responder) -> None: - self.responder = responder - self.client = client - with open(config['schedule'], 'r', encoding='utf-8') as data: - self.sched = yaml.safe_load(data.read()) - self.days = ( - schedule.every().day, - schedule.every().monday, - schedule.every().tuesday, - schedule.every().wednesday, - schedule.every().thursday, - schedule.every().friday, - schedule.every().saturday, - schedule.every().sunday - ) - - def __get_job(self, tokens: frozenset, - chat_id: int, rand: int) -> callable: - async def send_message(): - if rand: - time.sleep(random.randint(0, rand) * 60) - message = self.responder.get_response(tokens) - message = self.responder.api_match(message, '') - await self.client.send_message(chat_id, message) - - def job(): - loop = asyncio.get_event_loop() - coroutine = send_message() - loop.run_until_complete(coroutine) - return job - - def __schedule_job(self, tokens: str, chat: int, - day: int, t: str, rand: int) -> None: - job_tokens = frozenset(tokens.split(',')) - job = self.__get_job(job_tokens, chat, rand) - self.days[day].at(t).do(job) - - def build(self) -> None: - for i in self.sched: - for day in i.get('days', [0]): - for timespec in i['time']: - self.__schedule_job(i['tokens'], i['chat'], - day, timespec, i.get('rand', 0)) - - def run(self) -> None: - while True: - schedule.run_pending() - time.sleep(1) - - -def spawn_scheduler(config: dict, client: telethon.TelegramClient, - responder: pgbotlib.response.Responder) -> Scheduler: - asyncio.set_event_loop(asyncio.new_event_loop()) - scheduler = Scheduler(config, client, responder) - scheduler.build() - scheduler.run() diff --git a/populate b/populate new file mode 100755 index 0000000..6710f9d --- /dev/null +++ b/populate @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +import psycopg +import yaml +import sys + +with open('config.yml', 'r', encoding='UTF-8') as data: + config = yaml.safe_load(data.read()) +with open(config['response_tokens'], 'r', encoding='UTF-8') as data: + valid_tokens = {item for item in yaml.safe_load(data.read())} +with open(sys.argv[1], 'r', encoding='UTF-8') as data: + phrases = yaml.safe_load(data.read()) + +with psycopg.connect(config['db_spec']) as conn: + query_phrases = 'INSERT INTO responses (tokens, response) VALUES (%s, %s)' + with conn.cursor() as cur: + for regexref, responses in phrases.items(): + tokens = set(regexref.split(',')) + if tokens != tokens & valid_tokens: + print(f'{str(tokens)} failed to add!') + continue + token_string = ','.join(sorted(tokens)) + for response in responses: + cur.execute('SAVEPOINT sp1') + try: + cur.execute( query_phrases, (token_string, response)) + except psycopg.errors.UniqueViolation as err: + cur.execute('ROLLBACK TO SAVEPOINT sp1') + # print(err) + continue + cur.execute('RELEASE SAVEPOINT sp1') + conn.commit() + #for item in names: + # usernames = names[item] + # for username in usernames: + # cur.execute('INSERT INTO names (tg_id, name) VALUES (%s, %s)', + # (item, username)) + #conn.commit() diff --git a/sched.dist.yml b/sched.dist.yml index 997d0d5..f39ba36 100644 --- a/sched.dist.yml +++ b/sched.dist.yml @@ -1,15 +1,10 @@ # schedule things here, see examples - tokens: botname,praise - chat: 00000000 - days: [1, 5] - time: - - "19:59" + cron: 59 19 * * 1-5 rand: 5 + chat: 00000000 - tokens: greeting - chat: 00000000 - days: [1, 2, 3] - time: - - "13:05" - - "13:10" + cron: 5,10 13 * * 1-3 rand: 3 + chat: 00000000