pgbot/pgbotlib/api.py

119 lines
4.5 KiB
Python

import json
import random
import re
import typing
import bs4
import fake_headers
import requests
import pgbotlib.dbstuff
class ApiWrapper:
FAILED = 'я обосрался :<'
GIF_REGEX = {
'part': re.compile(r'(?<=\<center\>).*(?=\<\/center\>)'),
'gif': re.compile(r'(?<=src=").*(?="\s)')
}
SEARCH_TOKENS = ['botname', '!find']
def __init__(self, tokens: dict, db_conn: pgbotlib.dbstuff.DBConn) -> None:
self.tokens = tokens
self.db_conn = db_conn
self.nonw = re.compile(r'\W')
self.headers = fake_headers.Headers(headers=True)
# this is the entry point for the api calls
# if you add another api, make sure there is a match here
# this could have used match - case statement, but python 3.9
def call(self, api: str, data: typing.Union[str, None],
message: str) -> str:
match api:
case 'img_url': return self.format_img(data)
case 'gif': return self.get_gif()
case 'kmp': return self.get_kmp()
case 'fga': return self.get_fga()
case 'fakenews': return self.get_fakenews()
case 'anek': return self.get_anek()
case 'y_search': return self.y_search(message)
case _: return self.FAILED
def __sanitize_search(self, message: str) -> str:
"""Removes one of each of the search tokens from the query
so that "bot find" phrase does not poison the search query
It's not guaranteed it will delete the first match though,
and I see no point in implementing that"""
keywords = self.nonw.sub(' ', message)
for token_spec in self.tokens:
if token_spec[0] not in self.SEARCH_TOKENS:
continue
for regex in token_spec[1]:
sub_spec = regex.subn('', keywords, count=1)
if sub_spec[1]:
keywords = sub_spec[0]
break
return keywords
def y_search(self, message: str) -> str:
"""Pretty much copy & paste from the original bot
I have no fucking clue how this black magic works"""
query = self.__sanitize_search(message)
request = requests.get('https://yandex.ru/images/search',
timeout=30,
params={'text': query,
'nomisspell': 1,
'noreask': 1,
'isize': 'medium'},
headers=self.headers.generate())
parser = bs4.BeautifulSoup(request.text, 'html.parser')
items_tag = parser.find('div', {'role': 'main'})
items_full = json.loads(items_tag.find('div')['data-state'])
items = items_full['initialState']['serpList']['items']['entities']
images = []
for item in items.values():
images.append(item.get('origUrl'))
if not images:
return self.FAILED
result = random.choice(images)
return f'[url]({result})'
def get_gif(self) -> str:
resp = requests.get("http://xdgif.ru/random/", timeout=30)
part = self.GIF_REGEX['part'].search(resp.text).group(0)
gif = self.GIF_REGEX['gif'].search(part).group(0)
return gif
@staticmethod
def get_kmp() -> str:
request = requests.get("https://killpls.me/random/", timeout=30)
parser = bs4.BeautifulSoup(request.text, features="html.parser")
result = parser.find("div", attrs={
"style": "margin:0.5em 0;line-height:1.785em"})
return result.text.strip()
@staticmethod
def get_fga() -> str:
request = requests.get("http://fucking-great-advice.ru/api/random",
timeout=30)
return json.loads(request.text)["text"]
@staticmethod
def get_fakenews() -> str:
request = requests.get("http://news.olegmakarenko.ru/news", timeout=30)
parser = bs4.BeautifulSoup(request.text, features="html.parser")
news = [item.text.strip() for item in parser.find_all(
"span", attrs={"class": "headlinetext"})]
return random.choice(news)
@staticmethod
def get_anek() -> str:
request = requests.get("http://rzhunemogu.ru/Rand.aspx?CType=11",
timeout=30)
result = request.text.split('<content>')[1].split('</content>')[0]
return result.strip()
@staticmethod
def format_img(data: str) -> str:
return f'[url]({data})'