画像 OCR、base64 デコード、文字正規表現抽出
Discord の内容を取得
def get_discord_contents():
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'ja-JP,ja;q=0.9',
'Authorization': '***',
'Cookie': '****',
'Priority': 'u=3, i',
'Referer': 'https://discord.com/channels/1243823539426033696/1265925366820765818',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0.1 Safari/605.1.15',
'X-Debug-Options': 'bugReporterEnabled',
'X-Discord-Locale': 'ja-JP',
'X-Discord-Timezone': 'Asia/Tokyo',
'X-Super-Properties': '***'
}
url = "https://discord.com/api/v9/channels/1265925366820765818/messages?limit=20"
r = requests.get(url=url, headers=headers)
return r
テキスト、画像内容抽出
純粋な招待コードと base64 デコード
def decode_base64(encoded_text="SFM1RXA3VkdzYQ=="):
decoded_text = base64.b64decode(encoded_text).decode("utf-8")
return decoded_text
def regex_extract(text):
extracts = []
pattern_1 = r"[A-Za-z0-9_]{10}"
pattern_2 = r"[A-Za-z0-9+]{10,}([^=]+={1,2})"
extracts.extend(re.findall(pattern_2, text))
extracts.extend(re.findall(pattern_1, text))
return extracts
画像 OCR 抽出
def image_to_text(image_url):
response = requests.get(image_url)
image = Image.open(BytesIO(response.content))
# img_path = Path('image.png')
engine = RapidOCR(text_score=0.6, det_use_cuda=False)
result, elapse = engine(image)
logger.info(f'image_to_text : {elapse}')
return [i[1] for i in result]
Follow 招待コードの検証
def validation_follow(codes):
CSRFTOKEN = '***'
COOKIE = "***".format(CSRFTOKEN)
headers = {
"Access-Control-Allow-Credentials": "true",
"Access-Control-Allow-Origin": "https://app.follow.is",
"Alt-Svc": 'h3=":443"; ma=86400',
"Content-Type": "application/json",
"Accept": "application/json",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "ja-JP,ja;q=0.9",
"Cookie": COOKIE,
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
}
for code in codes:
if code != None and len(code) == 10:
data = {"code": code, "csrfToken": CSRFTOKEN}
r = requests.post(url='https://api.follow.is/invitations/use', json=data, headers=headers)
logger.info(f'validation_follow code:{code}, 戻り値:{r.json()}')
コードを実行
import re
import time
import requests
import base64
import logging
from pathlib import Path
from PIL import Image
from io import BytesIO
from rapidocr_onnxruntime import RapidOCR
class UniqueList:
def __init__(self, items):
self.items = items # 元のリスト
self.used_items = set() # 使用済みの要素を追跡するため
def get_next(self):
not_items = []
for item in self.items:
if item not in self.used_items:
self.used_items.add(item) # 使用済みとしてマーク
not_items.append(item)
return not_items
def add_item(self, item):
if item not in self.items and item not in self.used_items:
self.items.append(item) # リストと使用済み集合に存在しない場合のみ追加
codes = UniqueList([])
images = UniqueList([])
logging.basicConfig(
filename='follow.log',
filemode="w",
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
while True:
try:
r1 = get_discord_contents(url='https://discord.com/api/v9/channels/1265932718084984963/messages?limit=10')
r2 = get_discord_contents(url='https://discord.com/api/v9/channels/1265925366820765818/messages?limit=10')
result = r1.json() + r2.json()
# logger.info(f'result: {len(result)}')
print(f'\r最新のメッセージ:{result[0]['content'].replace('\n', ' ')}', end='')
for i in result:
contents = i['content'] + '\n'
if len(i['attachments']) >= 1:
for p in i['attachments']:
images.add_item(p['url'])
image_urls = images.get_next()
for url in image_urls:
ima_text = image_to_text(image_url=url)
contents += "\n".join(ima_text)
code = regex_extract(contents)
for c in code:
codes.add_item(c)
not_item = codes.get_next()
validation_follow(codes=not_item)
except:
time.sleep(1)
time.sleep(0.1)
出力:
2024-10-18 13:48:50,080 - INFO - validation_follow code:hZruosjpoc, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:50,683 - INFO - validation_follow code:VJowdvPF09, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:51,258 - INFO - validation_follow code:NCIVDUo7nk, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:51,823 - INFO - validation_follow code:jTTtCsNRBR, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:52,432 - INFO - validation_follow code:LRZzfdZXm2, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:53,016 - INFO - validation_follow code:3qHSJ5L7yg, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:54,031 - INFO - validation_follow code:6eeWiixmUo, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:54,629 - INFO - validation_follow code:1q6LwImbwu, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:55,224 - INFO - validation_follow code:eCS6MzKM9e, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:55,825 - INFO - validation_follow code:KIgGczbfCY, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:56,421 - INFO - validation_follow code:t90_3MTQyx, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:57,451 - INFO - validation_follow code:3bU6yvri52, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:48:58,039 - INFO - validation_follow code:QicWR5NOHU, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:52:50,019 - INFO - validation_follow code:6893703270, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:57:49,362 - INFO - validation_follow code:1069791849, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}
2024-10-18 13:58:45,737 - INFO - image_to_text : [0.6887292919855099, 0.04823112487792969, 1.1249077320098877]
2024-10-18 14:00:48,248 - INFO - validation_follow code:Yzd6ZUpfZV, 戻り値:{'code': 5001, 'message': '招待はすでに存在します。'}