implemented most of the pixiv scraper

This commit is contained in:
2025-08-06 19:00:19 +03:00
parent 3a25f4fdd4
commit f9277ad570
19 changed files with 277 additions and 104 deletions

View File

@ -1,4 +1,4 @@
### Сомнения по API:
1) `feeds` API должно давать возможность получать инфу по ленте и ресетать ленту, вроде всё
2) Нужно ли API картинок? Вроде вообще всё будет делать scraper. (хотя наверное получение инфы о картинке по id нужная штука)
1) `feeds` API delete feed
2) Нужно ли API картинок? Вроде вообще всё будет делать scraper. (хотя наверное получение инфы о картинке по id - нужная штука)
3) Возможно стоит добавить возможность динамической смены длины invite кода

Binary file not shown.

View File

@ -39,15 +39,13 @@ async def read_accounts_by_group_platform(
status_code=status.HTTP_403_FORBIDDEN,
detail="Not allowed",
)
account_data = db.get_accounts_by_group_platform(conn, groupname, platform)
account_data = db.get_accounts_by_group_platform(conn, groupname, platform.lower())
if account_data is None:
return HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No such account",
)
account = Account()
account.fill(account_data)
return account
return Account().fill(account_data)
@accounts_router.post("/add")
@ -65,13 +63,13 @@ async def add_account(
status_code=status.HTTP_403_FORBIDDEN,
detail="Not allowed",
)
if db.check_account_existence(conn, groupname, platform):
if db.check_account_existence(conn, groupname, platform.lower()):
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="Account already exists",
)
hashed_password = encode_str(password)
return db.create_account(conn, platform, login, hashed_password, metadata)
return db.create_account(conn, groupname, current_user.username, platform.lower(), login, hashed_password, metadata)
@accounts_router.post("/update")
@ -85,18 +83,17 @@ async def update_account(
conn: Annotated[connection, Depends(get_db_connection)],
current_user: Annotated[User, Depends(get_current_user)]
):
account_data = db.get_accounts_by_group_platform(conn, groupname, platform)
account_data = db.get_accounts_by_group_platform(conn, groupname, platform.lower())
if account_data is None:
return HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No such account",
)
account = Account()
account.fill(account_data)
account = Account().fill(account_data)
if current_user.username != account.author and current_user.role not in settings.settings.admin_roles:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Not allowed",
)
return db.update_account(conn, groupname, author, platform, login, password, metadata)
return db.update_account(conn, groupname, author, platform.lower(), login, password, metadata)

View File

@ -23,14 +23,13 @@ async def read_feed(
conn: Annotated[connection, Depends(get_db_connection)],
current_user: Annotated[User, Depends(get_current_user)]
):
feed = Feed()
feed_data = db.get_feed(conn, feed_id)
if feed_data is None:
return HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No such feed",
)
feed.fill(feed_data)
feed = Feed().fill(feed_data)
groupname = get_groupname_by_feed_id(conn, feed_id)
if groupname is None:
@ -48,7 +47,7 @@ async def read_feed(
return feed
# TODO: most logic + exception
# TODO: review exception
@feeds_router.post("/new")
async def new_feed(
groupname: str,
@ -63,11 +62,11 @@ async def new_feed(
accounts = get_accounts_by_group(conn, groupname)
feed = generate_feed(conn, accounts)
if not isinstance(feed, Exception):
if feed:
return db.create_feed(conn, groupname, feed)
else:
raise HTTPException(
status_code=status.HTTP_418_IM_A_TEAPOT,
status_code=status.HTTP_424_FAILED_DEPENDENCY,
detail="Failed to generate feed",
)

View File

@ -27,15 +27,13 @@ async def read_any_group(
status_code=status.HTTP_403_FORBIDDEN,
detail="Not allowed",
)
group = Group()
group_data = db.get_group(conn, groupname)
if group_data is None:
return HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No such group",
)
group.fill(group_data)
return group
return Group().fill(group_data)
@groups_router.post("/invite_code")
async def read_group_invite_code(

View File

@ -22,6 +22,7 @@ class User(BaseModel):
self.disabled = params["disabled"]
self.last_seen_at = params["last_seen_at"]
self.created_at = params["created_at"]
return self
username: str = ""
password: str = ""
role: str = "user"
@ -39,6 +40,7 @@ class Group(BaseModel):
self.feed_interval_minutes = params["feed_interval_minutes"]
self.last_feed_id = params["last_feed_id"]
self.created_at = params["created_at"]
return self
groupname: str = ""
author: str = ""
invite_code: str = ""
@ -53,6 +55,7 @@ class Membership(BaseModel):
self.groupname = params["groupname"]
self.username = params["username"]
self.joined_at = params["joined_at"]
return self
groupname: str = ""
username: str = ""
joined_at: datetime | None = None
@ -66,11 +69,12 @@ class Picture(BaseModel):
self.url = params["url"]
self.metadata = params["metadata"]
self.created_at = params["created_at"]
return self
id: int = -1
source: str = ""
external_id: str = ""
url: str = ""
metadata: dict | None = None
metadata: dict = {}
created_at: datetime | None = None
@ -81,6 +85,7 @@ class Swipe(BaseModel):
self.picture_id = params["picture_id"]
self.value = params["value"]
self.created_at = params["created_at"]
return self
username: str = ""
feed_id: int = -1
picture_id: int = -1
@ -94,6 +99,7 @@ class Feed(BaseModel):
self.groupname = params["groupname"]
self.image_ids = params["image_ids"]
self.created_at = params["created_at"]
return self
id: int = -1
groupname: str = ""
image_ids: list[int] = []
@ -103,13 +109,16 @@ class Feed(BaseModel):
class Account(BaseModel):
def fill(self, params):
self.id = params["id"]
self.groupname = params["groupname"]
self.author = params["author"]
self.platform = params["platform"]
self.login = params["login"]
self.password = decode_str(params["password"])
self.metadata = params["metadata"]
self.created_at = params["created_at"]
return self
id: int = -1
groupname: str = ""
author: str = ""
platform: str = ""
login: str = ""

View File

@ -18,15 +18,13 @@ async def read_picture(
conn: Annotated[connection, Depends(get_db_connection)],
current_user: Annotated[User, Depends(get_current_user)]
):
picture = Picture()
picture_data = db.get_picture(conn, id)
if picture_data is None:
return HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No such picture",
)
picture.fill(picture_data)
return picture
return Picture().fill(picture_data)
@pictures_router.post("/add")

View File

@ -38,15 +38,13 @@ async def read_swipe(
detail="Not allowed",
)
swipe = Swipe()
swipe_data = db.get_swipe(conn, username, feed_id, picture_id)
if swipe_data is None:
return HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No such swipe",
)
swipe.fill(swipe_data)
return swipe
return Swipe().fill(swipe_data)
@swipes_router.post("/swipe/picture_id")
@ -106,14 +104,13 @@ async def add_swipe(
detail="No such feed or feed is not linked to group",
)
group = Group()
group_data = get_group(conn, groupname)
if group_data is None:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No such feed or feed is not linked to group",
)
group.fill(group_data)
group = Group().fill(group_data)
# Check for trying to skip in
# a group with skips disabled

View File

@ -27,14 +27,13 @@ async def read_users_any(
status_code=status.HTTP_403_FORBIDDEN,
detail="Not allowed",
)
user = User()
user_data = db.get_user(conn, username)
if user_data is None:
return HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No such user",
)
user.fill(user_data)
user = User().fill(user_data)
return user

View File

@ -98,12 +98,11 @@ async def get_current_user(
except InvalidTokenError:
raise credentials_exception
user = User()
user_data = db.users.get_user(conn, token_data.username)
if user_data is None:
raise credentials_exception
user.fill(user_data)
user = User().fill(user_data)
if user.disabled:
raise HTTPException(
@ -122,10 +121,8 @@ def get_group_by_name(
status_code=status.HTTP_404_NOT_FOUND,
detail="No such group"
)
group = Group()
group_data = db.groups.get_group(conn, groupname)
if group_data is None:
raise group_exception
group.fill(group_data)
return group
return Group().fill(group_data)

View File

@ -9,6 +9,8 @@ from api.models import Account
def create_account(
conn: connection,
groupname: str,
author: str,
platform: str,
login: str,
password: str,
@ -18,14 +20,18 @@ def create_account(
cur.execute(
"""
insert into picrinth.accounts
(platform, login, password,
metadata, created_at)
(groupname, author, platform, login,
password, metadata, created_at)
values (%s, %s, %s, %s, now())
returning id
""",
(platform, login, password, json.dumps(metadata)),
(groupname, author, platform, login, password, json.dumps(metadata)),
)
return conn.commit()
result = cur.fetchone()
conn.commit()
if result is None:
return None
return result[0]
def delete_account(
@ -80,14 +86,32 @@ def update_account(
cur.execute(
"""
update picrinth.accounts
SET platform = %s,
author = %s,
SET author = %s,
login = %s,
password = %s,
metadata = %s
where groupname = %s
where groupname = %s and platform = %s
""",
(platform, author, login, password, json.dumps(metadata), groupname),
(author, login, password, json.dumps(metadata), groupname, platform),
)
conn.commit()
return cur.rowcount > 0
def update_account_metadata(
conn: connection,
groupname: str,
platform: str,
metadata: dict
):
with conn.cursor() as cur:
cur.execute(
"""
update picrinth.accounts
SET metadata = %s
where groupname = %s and platform = %s
""",
(json.dumps(metadata), groupname, platform),
)
conn.commit()
return cur.rowcount > 0
@ -95,7 +119,6 @@ def update_account(
# account receiving
# TODO: fix list comprehension
def get_accounts_by_group(
conn: connection,
groupname: str

View File

@ -13,13 +13,16 @@ def create_feed(
"""
insert into picrinth.feeds
(groupname, image_ids, created_at)
values (%s, %s, %s, %s, now())
values (%s, %s, now())
returning id
""",
(groupname, image_ids),
)
result = cur.fetchone()
conn.commit()
return cur.rowcount > 0
if result is None:
return None
return result[0]
def delete_feed(

View File

@ -20,8 +20,11 @@ def create_swipe(
""",
(username, feed_id, picture_id, value),
)
result = cur.fetchone()
conn.commit()
return cur.rowcount > 0
if result is None:
return None
return result[0]
def delete_swipe(

10
src/scraper/gelbooru.py Normal file
View File

@ -0,0 +1,10 @@
from psycopg2._psycopg import connection
import db.pictures as db
from api.models import Account, Picture
def gelbooru(conn: connection, account: Account):
picture = Picture(external_id = "3", url = "", metadata = {})
picture_id = db.create_picture(conn, "gelbooru", picture.external_id, picture.url, picture.metadata)
return picture_id

10
src/scraper/pinterest.py Normal file
View File

@ -0,0 +1,10 @@
from psycopg2._psycopg import connection
import db.pictures as db
from api.models import Account, Picture
def pinterest(conn: connection, account: Account):
picture = Picture(external_id = "1", url = "", metadata = {})
picture_id = db.create_picture(conn, "pinterest", picture.external_id, picture.url, picture.metadata)
return picture_id

134
src/scraper/pixiv.py Normal file
View File

@ -0,0 +1,134 @@
from fastapi import HTTPException, status
from gppt import GetPixivToken
from loguru import logger
from pixivpy3 import AppPixivAPI
from psycopg2._psycopg import connection
import db.pictures as db
from api.models import Account, Picture
from db.accounts import update_account_metadata
# Wrapper functions
def pixiv(conn: connection, account: Account) -> list[int]:
# Getting refresh token
refresh_token = account.metadata.get('refresh_token', '')
if not refresh_token:
try:
refresh_token = get_refresh_token(account.login, account.password)
account.metadata['refresh_token'] = refresh_token
update_account_metadata(conn, account.groupname, account.platform, account.metadata)
except Exception as e:
# TODO: review ruff "do not use bare `except`"
logger.debug(f"Pixiv refresh token missing and creation failed: {e}")
raise HTTPException(
status_code=status.HTTP_407_PROXY_AUTHENTICATION_REQUIRED,
detail="Pixiv refresh token missing and creation failed"
)
# Logging into pixiv
try:
api = auth(refresh_token)
except Exception as e:
# TODO: review ruff "do not use bare `except`"
logger.debug(f"Pixiv refresh token invalid and recreation failed: {e}")
raise HTTPException(
status_code=status.HTTP_407_PROXY_AUTHENTICATION_REQUIRED,
detail="Pixiv refresh token invalid and recreation failed"
)
# Getting pixiv account recommendations
pictures = []
try:
pictures = get_recommended(api, 20)
except Exception as e:
logger.error(f"Failed to get recommendations from pixiv: {e}")
if not pictures:
logger.error("Failed to generate feed from pixiv")
return []
# Saving recommendations as Pictures to DB
pictures_ids = []
for picture in pictures:
pictures_ids.append(db.create_picture(conn, "pixiv", picture.external_id, picture.url, picture.metadata))
return pictures_ids
# TODO: change exceptions. Implement like_picture
def like_picture(conn: connection, account: Account, picture_id: int):
# Getting refresh token
refresh_token = account.metadata.get('refresh_token', '')
if not refresh_token:
try:
refresh_token = get_refresh_token(account.login, account.password)
account.metadata['refresh_token'] = refresh_token
update_account_metadata(conn, account.groupname, account.platform, account.metadata)
except Exception as e:
# TODO: review ruff "do not use bare `except`"
logger.debug(f"Pixiv refresh token missing and creation failed: {e}")
raise HTTPException(
status_code=status.HTTP_407_PROXY_AUTHENTICATION_REQUIRED,
detail="Pixiv refresh token missing and creation failed"
)
# Logging into pixiv
try:
api = auth(refresh_token)
except Exception as e:
# TODO: review ruff "do not use bare `except`"
logger.debug(f"Pixiv refresh token invalid and recreation failed: {e}")
raise HTTPException(
status_code=status.HTTP_407_PROXY_AUTHENTICATION_REQUIRED,
detail="Pixiv refresh token invalid and recreation failed"
)
# Liking
return like(api, picture_id)
# Auth
def get_refresh_token(login: str, password: str) -> str:
g = GetPixivToken(headless=True)
login_response = g.login(username=login, password=password)
refresh_token = login_response.get("refresh_token", '')
if not refresh_token:
raise Exception
return refresh_token
def auth(refresh_token: str) -> AppPixivAPI:
api = AppPixivAPI()
api.auth(refresh_token)
print("Login successful") # TODO: delete
return api
def refresh_refresh_token(refresh_token: str) -> str:
g = GetPixivToken(headless=True)
# TODO: save new refresh_token to DB + account + add auto updating as coroutine
response = g.refresh(refresh_token)
print(response) # TODO: delete
return response.get("refresh_token", '')
# Main functions
def get_recommended(api: AppPixivAPI, recommendations_number: int = 20) -> list[Picture]:
result = api.illust_recommended()
# TODO: make recommendations number useful + add as var to settings/group settings
# illusts = result.illusts[:recommendations_number]
illusts = result.illusts
pictures = []
for illust in illusts:
if illust.type == "illust":
picture = Picture(external_id=illust.id, source="pixiv", url=illust.image_urls.large, metadata={})
pictures.append(picture)
return pictures
def like(api: AppPixivAPI, illust_id: int):
api.illust_bookmark_add(illust_id)
print(f"Picture {illust_id} liked!") # TODO: delete

View File

@ -1,67 +1,53 @@
from fastapi import HTTPException, status
from loguru import logger
from psycopg2._psycopg import connection
import db.pictures as db
from api.models import Account, Picture
from scraper.gelbooru import gelbooru
from scraper.pinterest import pinterest
from scraper.pixiv import pixiv
from settings import startup_settings
from settings.consts import SUPPORTED_PLATFORMS
# TODO: rewrite mock functions to real ones
def generate_feed(
conn: connection,
accounts: list[Account]
) -> list | Exception:
) -> list:
feed = []
for account in accounts:
if account.platform not in SUPPORTED_PLATFORMS:
raise Exception
# TODO: review typing
if account.platform not in startup_settings.platforms_enabled: # type: ignore
if account.platform not in startup_settings.platforms_enabled:
raise Exception
# TODO: Should get pictures from platforms APIs
match account.platform:
case "pinterest":
pinterest()
temp_feed = pinterest(conn, account)
feed.append(temp_feed)
case "pixiv":
pixiv()
temp_feed = pixiv(conn, account)
feed.append(temp_feed)
case "gelbooru":
gelbooru()
temp_feed = gelbooru(conn, account)
feed.append(temp_feed)
case _:
return HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Platform is not supported",
)
logger.warning(f"Platform for feed generation is not supported: {account.platform}")
# TODO: remove mock results
pictures_ids = db.get_all_pictures_ids(conn)
return pictures_ids
feed = db.get_all_pictures_ids(conn)
return feed
def pinterest():
# TODO: Implement process_swipe
def process_swipe():
return
def pixiv():
return
def gelbooru():
return
def get_picture(
conn: connection,
picture_id: int
) -> int:
picture = Picture()
picture_data = db.get_picture(conn, picture_id)
if picture_id is None:
return -1
picture.fill(picture_data)
return picture.id
def get_credentials(
conn: connection,
platform: str,
groupname: str
) -> tuple[str, str]:
return 'a', 'b'
return Picture().fill(picture_data).id

View File

@ -1,7 +1,10 @@
JOIN_CODE_SYMBOLS = "ABCDEFGHJKLMNPQRSTUVWXYZ23456789" # No O + 0, I + 1
# No O + 0, I + 1. All in upper case
JOIN_CODE_SYMBOLS = "ABCDEFGHJKLMNPQRSTUVWXYZ23456789"
# All platforms should be written in lower case
SUPPORTED_PLATFORMS = ["pinterest", "pixiv", "gelbooru"]
# Info for settings update endpoint
API_EDITABLE_SETTINGS_LIST = """
admin_roles ["admin"]

View File

@ -1,28 +1,35 @@
from decouple import Csv, config
from .consts import SUPPORTED_PLATFORMS
from decouple import config
def str_to_bool(string: str) -> bool:
if string.lower() == "true":
return True
return False
# database
db_host = str(config("db_host", default="127.0.0.1"))
db_port = int(config("db_port", default=5432))
db_name = str(config("db_name", default="postgres"))
db_user = str(config("db_user", default="postgres"))
db_password = str(config("db_password", default="postgres"))
db_host: str = config("db_host", default="127.0.0.1", cast=str) # type: ignore
db_port: int = config("db_port", default=5432, cast=int) # type: ignore
db_name: str = config("db_name", default="postgres", cast=str) # type: ignore
db_user: str = config("db_user", default="postgres", cast=str) # type: ignore
db_password: str = config("db_password", default="postgres", cast=str) # type: ignore
# auth
secret_key = str(config("secret_key"))
algorithm = str(config("algorithm", "HS256"))
access_token_expiration_time = int(config("access_token_expiration_time", default=10080))
secret_key: str = config("secret_key", cast=str) # type: ignore
algorithm: str = config("algorithm", "HS256", cast=str) # type: ignore
access_token_expiration_time: int = config("access_token_expiration_time", default=10080, cast=int) # type: ignore
# other settings
join_code_length = int(config("join_code_length", default=8))
platforms_enabled = config("platforms_enabled", default=SUPPORTED_PLATFORMS, cast=list[str])
join_code_length: int = config("join_code_length", default=8, cast=int) # type: ignore
platforms_enabled: list[str] = config("platforms_enabled", default=SUPPORTED_PLATFORMS, cast=Csv(str)) # type: ignore
# dev
swagger_enabled = str_to_bool(str(config("swagger_enabled", "false")))
log_level = str(config("log_level", default="INFO"))
swagger_enabled: bool = config("swagger_enabled", "false", cast=bool) # type: ignore
log_level: str = config("log_level", default="INFO", cast=str) # type: ignore
def setup_settings():
global platforms_enabled
platforms_supported_and_enabled = []
for platform in platforms_enabled:
if platform.lower() in SUPPORTED_PLATFORMS:
platforms_supported_and_enabled.append(platform.lower())
platforms_enabled = platforms_supported_and_enabled
setup_settings()