pgbot/pgbotlib/api.py

120 lines
4.5 KiB
Python
Raw Normal View History

2023-10-31 01:45:36 +02:00
import json
import random
import re
import typing
2023-10-31 01:45:36 +02:00
import bs4
import fake_headers
2023-11-10 01:02:27 +02:00
import requests
2023-10-31 01:45:36 +02:00
import pgbotlib.dbstuff
class ApiWrapper:
FAILED = 'я обосрался :<'
GIF_REGEX = {
'part': re.compile(r'(?<=\<center\>).*(?=\<\/center\>)'),
'gif': re.compile(r'(?<=src=").*(?="\s)')
}
SEARCH_TOKENS = ['botname', '!find']
def __init__(self, tokens: dict, db_conn: pgbotlib.dbstuff.DBConn) -> None:
self.tokens = tokens
self.db_conn = db_conn
self.nonw = re.compile(r'\W')
self.headers = fake_headers.Headers(headers=True)
# this is the entry point for the api calls
# if you add another api, make sure there is a match here
# this could have used match - case statement, but python 3.9
def call(self, api: str, data: typing.Union[str, None],
message: str) -> str:
2024-11-09 22:01:38 +02:00
match api:
case 'img_url': return self.format_img(data)
case 'gif': return self.get_gif()
case 'kmp': return self.get_kmp()
case 'fga': return self.get_fga()
case 'fakenews': return self.get_fakenews()
case 'anek': return self.get_anek()
case 'y_search': return self.y_search(message)
case _: return self.FAILED
2023-10-31 01:45:36 +02:00
def __sanitize_search(self, message: str) -> str:
"""Removes one of each of the search tokens from the query
so that "bot find" phrase does not poison the search query
It's not guaranteed it will delete the first match though,
and I see no point in implementing that"""
keywords = self.nonw.sub(' ', message)
for token_spec in self.tokens:
if token_spec[0] not in self.SEARCH_TOKENS:
continue
for regex in token_spec[1]:
sub_spec = regex.subn('', keywords, count=1)
if sub_spec[1]:
keywords = sub_spec[0]
break
return keywords
def y_search(self, message: str) -> str:
"""Pretty much copy & paste from the original bot
I have no fucking clue how this black magic works"""
query = self.__sanitize_search(message)
request = requests.get('https://yandex.ru/images/search',
timeout=30,
params={'text': query,
'nomisspell': 1,
'noreask': 1,
'isize': 'medium'},
headers=self.headers.generate())
parser = bs4.BeautifulSoup(request.text, 'html.parser')
2023-11-10 01:02:27 +02:00
items_tag = parser.find('div', {'role': 'main'})
items_full = json.loads(items_tag.find('div')['data-state'])
items = items_full['initialState']['serpList']['items']['entities']
2023-10-31 01:45:36 +02:00
images = []
2023-11-10 01:02:27 +02:00
for item in items.values():
images.append(item.get('origUrl'))
2023-10-31 01:45:36 +02:00
if not images:
2023-11-10 01:02:27 +02:00
return self.FAILED
2023-10-31 01:45:36 +02:00
result = random.choice(images)
return f'[url]({result})'
def get_gif(self) -> str:
resp = requests.get("http://xdgif.ru/random/", timeout=30)
part = self.GIF_REGEX['part'].search(resp.text).group(0)
gif = self.GIF_REGEX['gif'].search(part).group(0)
return gif
@staticmethod
def get_kmp() -> str:
request = requests.get("https://killpls.me/random/", timeout=30)
parser = bs4.BeautifulSoup(request.text, features="html.parser")
result = parser.find("div", attrs={
"style": "margin:0.5em 0;line-height:1.785em"})
return result.text.strip()
@staticmethod
def get_fga() -> str:
request = requests.get("http://fucking-great-advice.ru/api/random",
timeout=30)
return json.loads(request.text)["text"]
@staticmethod
def get_fakenews() -> str:
request = requests.get("http://news.olegmakarenko.ru/news", timeout=30)
parser = bs4.BeautifulSoup(request.text, features="html.parser")
news = [item.text.strip() for item in parser.find_all(
"span", attrs={"class": "headlinetext"})]
return random.choice(news)
@staticmethod
def get_anek() -> str:
request = requests.get("http://rzhunemogu.ru/Rand.aspx?CType=11",
timeout=30)
result = request.text.split('<content>')[1].split('</content>')[0]
return result.strip()
@staticmethod
def format_img(data: str) -> str:
return f'[url]({data})'