Readability and functionality improvements

- Refactored all DBwork functions to not set and close connection inside
their body, they use connection as a parameter instead
- Added single file to configure a FastAPI app
- Implemented FastAPI's lifespan function that calls certain functions
on app startup and shutdown
- Added error logging for scraping functions
- Fixed /articles/get/html and /articles/get/md endpoints
- All POST methods now return base64 encoded html/md strings to avoid
weird json formatting
This commit is contained in:
2025-09-04 23:05:12 +03:00
parent 2b191dddd2
commit 6da6ace82f
5 changed files with 85 additions and 69 deletions

View File

@ -4,7 +4,10 @@ from loguru import logger
#connection stuff #connection stuff
connection = None
def set_connection(): def set_connection():
global connection
try: try:
connection = psycopg2.connect( connection = psycopg2.connect(
dbname = config.db_name, dbname = config.db_name,
@ -13,7 +16,7 @@ def set_connection():
host = config.host_name, host = config.host_name,
port = config.port port = config.port
) )
return connection logger.info('Connection to PostreSQL DB set successfully')
except psycopg2.Error as e: except psycopg2.Error as e:
logger.error(f'Failed to set connection to the PostgreSQL DB: {e.pgerror}') logger.error(f'Failed to set connection to the PostgreSQL DB: {e.pgerror}')
@ -23,6 +26,7 @@ def close_connection(connection):
cursor = connection.cursor() cursor = connection.cursor()
cursor.close() cursor.close()
connection.close() connection.close()
logger.info('Connection to PostreSQL DB closed successfully')
except psycopg2.Error as e: except psycopg2.Error as e:
logger.error(f'Failed to close PostgreSQL connection: {e.pgerror}') logger.error(f'Failed to close PostgreSQL connection: {e.pgerror}')
@ -70,22 +74,19 @@ def get_all_entries(connection):
#'create if no any' type functions for schema and table #'create if no any' type functions for schema and table
def schema_creator(schema_name): def schema_creator(schema_name, connection):
conn = set_connection() cur = connection.cursor()
cur = conn.cursor()
try: try:
cur.execute(f'CREATE SCHEMA IF NOT EXISTS {schema_name};') cur.execute(f'CREATE SCHEMA IF NOT EXISTS {schema_name};')
conn.commit() connection.commit()
logger.info(f'Successfully created schema {schema_name} if it didn\'t exist yet') logger.info(f'Successfully created schema {schema_name} if it didn\'t exist yet')
except psycopg2.Error as e: except psycopg2.Error as e:
logger.error(f'Error during schema creation: {e}') logger.error(f'Error during schema creation: {e}')
finally:
close_connection(conn)
def table_creator(schema_name, table_name):
conn = set_connection() def table_creator(schema_name, table_name, connection):
cur = conn.cursor() cur = connection.cursor()
try: try:
cur.execute(f''' cur.execute(f'''
CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} CREATE TABLE IF NOT EXISTS {schema_name}.{table_name}
@ -100,9 +101,8 @@ def table_creator(schema_name, table_name):
ALTER TABLE IF EXISTS {schema_name}.{table_name} ALTER TABLE IF EXISTS {schema_name}.{table_name}
OWNER to {config.postgres_user}; OWNER to {config.postgres_user};
''') ''')
conn.commit() connection.commit()
logger.info(f'Successfully created table {table_name} in schema {schema_name} if it didn\'t exist yet') logger.info(f'Successfully created table {table_name} in schema {schema_name} if it didn\'t exist yet')
except psycopg2.Error as e: except psycopg2.Error as e:
logger.error(f'Error during table creation: {e}') logger.error(f'Error during table creation: {e}')
finally:
close_connection(conn)

41
src/app_creator.py Normal file
View File

@ -0,0 +1,41 @@
import config
import router
import DBwork
from fastapi import FastAPI
from loguru import logger
from contextlib import asynccontextmanager
if config.enable_api_docs:
docs_url = '/api/docs'
else:
docs_url = None
schema_name = 'harticle'
table_name = 'articles'
@asynccontextmanager
async def lifespan(app: FastAPI):
DBwork.set_connection()
DBwork.schema_creator(schema_name, DBwork.connection)
DBwork.table_creator(schema_name, table_name, DBwork.connection)
yield
DBwork.close_connection(DBwork.connection)
app = FastAPI(docs_url=docs_url, lifespan=lifespan)
def create_app():
logging_level = config.logging_level
logger.add(
"sys.stdout",
format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | {file}:{line} - {message}",
colorize=True,
level=logging_level
)
app.include_router(router.router)
return app

View File

@ -1,26 +1,7 @@
import router
import uvicorn import uvicorn
from loguru import logger from app_creator import create_app
import config
from fastapi import FastAPI
logging_level = config.logging_level if __name__ == '__main__':
logger.add( app = create_app()
"sys.stdout", uvicorn.run(app=app, host="127.0.0.1", port=8000, log_level="info")
format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | {file}:{line} - {message}",
colorize=True,
level=logging_level
)
if config.enable_api_docs:
docs_url = '/api/docs'
else:
docs_url = None
app = FastAPI(docs_url=docs_url)
app.include_router(router.router)
router.main()
uvicorn.run(app=app, host="127.0.0.1", port=8000, log_level="info")

View File

@ -1,16 +1,16 @@
import DBwork import DBwork
import scraper import scraper
from DBwork import connection as conn
from fastapi import Response, status, APIRouter from fastapi import Response, status, APIRouter
from pydantic import BaseModel from pydantic import BaseModel
import psycopg2 import psycopg2
from json import dumps from json import dumps
import base64
schema_name = 'harticle'
table_name = 'articles'
router = APIRouter(prefix='/api') router = APIRouter(prefix='/api')
class Entry(BaseModel): class Entry(BaseModel):
url: str url: str
rating: int | None = None rating: int | None = None
@ -31,15 +31,12 @@ async def ping():
@router.get('/rates') @router.get('/rates')
async def get_rates(): async def get_rates():
conn = DBwork.set_connection()
result = dumps(DBwork.get_all_entries(conn)) result = dumps(DBwork.get_all_entries(conn))
DBwork.close_connection(conn)
return result return result
@router.post('/article/rate') @router.post('/article/rate')
async def save_rating(entry: Entry, response: Response): async def save_rating(entry: Entry, response: Response):
conn = DBwork.set_connection()
try: try:
DBwork.add_entry(article_url=entry.url, DBwork.add_entry(article_url=entry.url,
rating=entry.rating, rating=entry.rating,
@ -50,7 +47,6 @@ async def save_rating(entry: Entry, response: Response):
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
message = 'internal server error' message = 'internal server error'
finally: finally:
DBwork.close_connection(conn)
return {'message': message, return {'message': message,
'url': entry.url, 'url': entry.url,
'rating': entry.rating 'rating': entry.rating
@ -59,7 +55,6 @@ async def save_rating(entry: Entry, response: Response):
@router.post('/article/remove_rate') @router.post('/article/remove_rate')
async def remove_rating(entry: Entry, response: Response): async def remove_rating(entry: Entry, response: Response):
conn = DBwork.set_connection()
try: try:
DBwork.delete_entry(entry.url, conn) DBwork.delete_entry(entry.url, conn)
message = 'success' message = 'success'
@ -67,39 +62,39 @@ async def remove_rating(entry: Entry, response: Response):
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
message = 'internal server error' message = 'internal server error'
finally: finally:
DBwork.close_connection(conn)
return {'message': message} return {'message': message}
@router.post('/article/get/html') @router.post('/article/get/html')
async def get_article_html(article: Article, response: Response = None): async def get_article_html(article: Article, response: Response = None):
html_string = await scraper.get_article_html(article.url, md=False) html_string = await scraper.get_article_html(article.url)
return html_string b64_string = base64.b64encode(html_string.encode('utf-8')).decode('utf-8')
return Response(content=b64_string, media_type='text/plain')
@router.post('/article/get/md') @router.post('/article/get/md')
async def get_article_md(article: Article, response: Response = None): async def get_article_md(article: Article, response: Response = None):
md_string = await scraper.get_article_html(article.url, md=True) md_string = await scraper.get_article_html(article.url, md=True)
return md_string b64_string = base64.b64encode(md_string.encode('utf-8')).decode('utf-8')
return Response(content=b64_string, media_type='text/plain')
@router.post('/articles/get/html') @router.post('/articles/get/html')
async def get_n_articles_html(amount: Amount, response: Response = None): async def get_n_articles_html(amount: Amount, response: Response = None):
articles = [] articles = {}
for url in await scraper.get_articles_from_feed(amount.amount): urls = await scraper.get_articles_from_feed(amount.amount)
articles.append(await scraper.get_article_html(f'https://habr.com{url}')) for url in urls:
html = await scraper.get_article_html(f'https://habr.com{url}')
b64_string = base64.b64encode(html.encode('utf-8')).decode('utf-8')
articles[f'https://habr.com{url}'] = b64_string
return articles return articles
@router.post('/articles/get/md') @router.post('/articles/get/md')
async def get_n_articles_md(amount: Amount, response: Response = None): async def get_n_articles_md(amount: Amount, response: Response = None):
articles = [] articles = {}
for url in await scraper.get_articles_from_feed(amount.amount): for url in await scraper.get_articles_from_feed(amount.amount):
articles.append(await scraper.get_article_md(f'https://habr.com{url}')) md = await scraper.get_article_html(f'https://habr.com{url}', md=True)
b64_string = base64.b64encode(md.encode('utf-8')).decode('utf-8')
articles[f'https://habr.com{url}'] = b64_string
return articles return articles
''' MAIN '''
def main():
DBwork.schema_creator(schema_name)
DBwork.table_creator(schema_name, table_name)

View File

@ -6,7 +6,6 @@ from loguru import logger
async def get_article_html(url: str, md: bool = False) -> str: async def get_article_html(url: str, md: bool = False) -> str:
print(url, type(url))
response = requests.get(url) response = requests.get(url)
if response.status_code == 200: if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser') soup = BeautifulSoup(response.content, 'html.parser')
@ -22,7 +21,7 @@ async def get_article_html(url: str, md: bool = False) -> str:
else: else:
return content.prettify() return content.prettify()
else: else:
logger.error(f'Error during fetching habr article html. Status code: {response.status_code}') logger.error(f'Error during fetching habr response. Status code: {response.status_code}')
async def get_articles_from_feed(amount: int) -> list[str]: async def get_articles_from_feed(amount: int) -> list[str]:
@ -34,4 +33,4 @@ async def get_articles_from_feed(amount: int) -> list[str]:
urls.append(str(url['href'])) urls.append(str(url['href']))
return urls return urls
else: else:
logger.error(f'Error during fetching habr article html. Status code: {response.status_code}') logger.error(f'Error during fetching habr response. Status code: {response.status_code}')