Readability and functionality improvements
- Refactored all DBwork functions to not set and close connection inside their body, they use connection as a parameter instead - Added single file to configure a FastAPI app - Implemented FastAPI's lifespan function that calls certain functions on app startup and shutdown - Added error logging for scraping functions - Fixed /articles/get/html and /articles/get/md endpoints - All POST methods now return base64 encoded html/md strings to avoid weird json formatting
This commit is contained in:
@ -4,7 +4,10 @@ from loguru import logger
|
||||
|
||||
|
||||
#connection stuff
|
||||
connection = None
|
||||
|
||||
def set_connection():
|
||||
global connection
|
||||
try:
|
||||
connection = psycopg2.connect(
|
||||
dbname = config.db_name,
|
||||
@ -13,7 +16,7 @@ def set_connection():
|
||||
host = config.host_name,
|
||||
port = config.port
|
||||
)
|
||||
return connection
|
||||
logger.info('Connection to PostreSQL DB set successfully')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Failed to set connection to the PostgreSQL DB: {e.pgerror}')
|
||||
|
||||
@ -23,6 +26,7 @@ def close_connection(connection):
|
||||
cursor = connection.cursor()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
logger.info('Connection to PostreSQL DB closed successfully')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Failed to close PostgreSQL connection: {e.pgerror}')
|
||||
|
||||
@ -70,22 +74,19 @@ def get_all_entries(connection):
|
||||
|
||||
|
||||
#'create if no any' type functions for schema and table
|
||||
def schema_creator(schema_name):
|
||||
conn = set_connection()
|
||||
cur = conn.cursor()
|
||||
def schema_creator(schema_name, connection):
|
||||
cur = connection.cursor()
|
||||
try:
|
||||
cur.execute(f'CREATE SCHEMA IF NOT EXISTS {schema_name};')
|
||||
conn.commit()
|
||||
connection.commit()
|
||||
logger.info(f'Successfully created schema {schema_name} if it didn\'t exist yet')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Error during schema creation: {e}')
|
||||
finally:
|
||||
close_connection(conn)
|
||||
|
||||
|
||||
def table_creator(schema_name, table_name):
|
||||
conn = set_connection()
|
||||
cur = conn.cursor()
|
||||
|
||||
def table_creator(schema_name, table_name, connection):
|
||||
cur = connection.cursor()
|
||||
try:
|
||||
cur.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {schema_name}.{table_name}
|
||||
@ -100,9 +101,8 @@ def table_creator(schema_name, table_name):
|
||||
ALTER TABLE IF EXISTS {schema_name}.{table_name}
|
||||
OWNER to {config.postgres_user};
|
||||
''')
|
||||
conn.commit()
|
||||
connection.commit()
|
||||
logger.info(f'Successfully created table {table_name} in schema {schema_name} if it didn\'t exist yet')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Error during table creation: {e}')
|
||||
finally:
|
||||
close_connection(conn)
|
||||
|
||||
|
||||
41
src/app_creator.py
Normal file
41
src/app_creator.py
Normal file
@ -0,0 +1,41 @@
|
||||
import config
|
||||
import router
|
||||
import DBwork
|
||||
from fastapi import FastAPI
|
||||
from loguru import logger
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
|
||||
if config.enable_api_docs:
|
||||
docs_url = '/api/docs'
|
||||
else:
|
||||
docs_url = None
|
||||
|
||||
schema_name = 'harticle'
|
||||
table_name = 'articles'
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
DBwork.set_connection()
|
||||
DBwork.schema_creator(schema_name, DBwork.connection)
|
||||
DBwork.table_creator(schema_name, table_name, DBwork.connection)
|
||||
yield
|
||||
DBwork.close_connection(DBwork.connection)
|
||||
|
||||
|
||||
app = FastAPI(docs_url=docs_url, lifespan=lifespan)
|
||||
|
||||
|
||||
def create_app():
|
||||
logging_level = config.logging_level
|
||||
logger.add(
|
||||
"sys.stdout",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | {file}:{line} - {message}",
|
||||
colorize=True,
|
||||
level=logging_level
|
||||
)
|
||||
|
||||
app.include_router(router.router)
|
||||
|
||||
return app
|
||||
25
src/main.py
25
src/main.py
@ -1,26 +1,7 @@
|
||||
import router
|
||||
import uvicorn
|
||||
from loguru import logger
|
||||
import config
|
||||
from fastapi import FastAPI
|
||||
from app_creator import create_app
|
||||
|
||||
|
||||
logging_level = config.logging_level
|
||||
logger.add(
|
||||
"sys.stdout",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | {file}:{line} - {message}",
|
||||
colorize=True,
|
||||
level=logging_level
|
||||
)
|
||||
|
||||
if config.enable_api_docs:
|
||||
docs_url = '/api/docs'
|
||||
else:
|
||||
docs_url = None
|
||||
|
||||
app = FastAPI(docs_url=docs_url)
|
||||
app.include_router(router.router)
|
||||
|
||||
|
||||
router.main()
|
||||
if __name__ == '__main__':
|
||||
app = create_app()
|
||||
uvicorn.run(app=app, host="127.0.0.1", port=8000, log_level="info")
|
||||
|
||||
@ -1,16 +1,16 @@
|
||||
import DBwork
|
||||
import scraper
|
||||
from DBwork import connection as conn
|
||||
from fastapi import Response, status, APIRouter
|
||||
from pydantic import BaseModel
|
||||
import psycopg2
|
||||
from json import dumps
|
||||
import base64
|
||||
|
||||
|
||||
schema_name = 'harticle'
|
||||
table_name = 'articles'
|
||||
|
||||
router = APIRouter(prefix='/api')
|
||||
|
||||
|
||||
class Entry(BaseModel):
|
||||
url: str
|
||||
rating: int | None = None
|
||||
@ -31,15 +31,12 @@ async def ping():
|
||||
|
||||
@router.get('/rates')
|
||||
async def get_rates():
|
||||
conn = DBwork.set_connection()
|
||||
result = dumps(DBwork.get_all_entries(conn))
|
||||
DBwork.close_connection(conn)
|
||||
return result
|
||||
|
||||
|
||||
@router.post('/article/rate')
|
||||
async def save_rating(entry: Entry, response: Response):
|
||||
conn = DBwork.set_connection()
|
||||
try:
|
||||
DBwork.add_entry(article_url=entry.url,
|
||||
rating=entry.rating,
|
||||
@ -50,7 +47,6 @@ async def save_rating(entry: Entry, response: Response):
|
||||
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
message = 'internal server error'
|
||||
finally:
|
||||
DBwork.close_connection(conn)
|
||||
return {'message': message,
|
||||
'url': entry.url,
|
||||
'rating': entry.rating
|
||||
@ -59,7 +55,6 @@ async def save_rating(entry: Entry, response: Response):
|
||||
|
||||
@router.post('/article/remove_rate')
|
||||
async def remove_rating(entry: Entry, response: Response):
|
||||
conn = DBwork.set_connection()
|
||||
try:
|
||||
DBwork.delete_entry(entry.url, conn)
|
||||
message = 'success'
|
||||
@ -67,39 +62,39 @@ async def remove_rating(entry: Entry, response: Response):
|
||||
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
message = 'internal server error'
|
||||
finally:
|
||||
DBwork.close_connection(conn)
|
||||
return {'message': message}
|
||||
|
||||
|
||||
@router.post('/article/get/html')
|
||||
async def get_article_html(article: Article, response: Response = None):
|
||||
html_string = await scraper.get_article_html(article.url, md=False)
|
||||
return html_string
|
||||
html_string = await scraper.get_article_html(article.url)
|
||||
b64_string = base64.b64encode(html_string.encode('utf-8')).decode('utf-8')
|
||||
return Response(content=b64_string, media_type='text/plain')
|
||||
|
||||
|
||||
@router.post('/article/get/md')
|
||||
async def get_article_md(article: Article, response: Response = None):
|
||||
md_string = await scraper.get_article_html(article.url, md=True)
|
||||
return md_string
|
||||
b64_string = base64.b64encode(md_string.encode('utf-8')).decode('utf-8')
|
||||
return Response(content=b64_string, media_type='text/plain')
|
||||
|
||||
|
||||
@router.post('/articles/get/html')
|
||||
async def get_n_articles_html(amount: Amount, response: Response = None):
|
||||
articles = []
|
||||
for url in await scraper.get_articles_from_feed(amount.amount):
|
||||
articles.append(await scraper.get_article_html(f'https://habr.com{url}'))
|
||||
articles = {}
|
||||
urls = await scraper.get_articles_from_feed(amount.amount)
|
||||
for url in urls:
|
||||
html = await scraper.get_article_html(f'https://habr.com{url}')
|
||||
b64_string = base64.b64encode(html.encode('utf-8')).decode('utf-8')
|
||||
articles[f'https://habr.com{url}'] = b64_string
|
||||
return articles
|
||||
|
||||
|
||||
@router.post('/articles/get/md')
|
||||
async def get_n_articles_md(amount: Amount, response: Response = None):
|
||||
articles = []
|
||||
articles = {}
|
||||
for url in await scraper.get_articles_from_feed(amount.amount):
|
||||
articles.append(await scraper.get_article_md(f'https://habr.com{url}'))
|
||||
md = await scraper.get_article_html(f'https://habr.com{url}', md=True)
|
||||
b64_string = base64.b64encode(md.encode('utf-8')).decode('utf-8')
|
||||
articles[f'https://habr.com{url}'] = b64_string
|
||||
return articles
|
||||
|
||||
|
||||
''' MAIN '''
|
||||
def main():
|
||||
DBwork.schema_creator(schema_name)
|
||||
DBwork.table_creator(schema_name, table_name)
|
||||
|
||||
@ -6,7 +6,6 @@ from loguru import logger
|
||||
|
||||
|
||||
async def get_article_html(url: str, md: bool = False) -> str:
|
||||
print(url, type(url))
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
@ -22,7 +21,7 @@ async def get_article_html(url: str, md: bool = False) -> str:
|
||||
else:
|
||||
return content.prettify()
|
||||
else:
|
||||
logger.error(f'Error during fetching habr article html. Status code: {response.status_code}')
|
||||
logger.error(f'Error during fetching habr response. Status code: {response.status_code}')
|
||||
|
||||
|
||||
async def get_articles_from_feed(amount: int) -> list[str]:
|
||||
@ -34,4 +33,4 @@ async def get_articles_from_feed(amount: int) -> list[str]:
|
||||
urls.append(str(url['href']))
|
||||
return urls
|
||||
else:
|
||||
logger.error(f'Error during fetching habr article html. Status code: {response.status_code}')
|
||||
logger.error(f'Error during fetching habr response. Status code: {response.status_code}')
|
||||
|
||||
Reference in New Issue
Block a user