From 2b191dddd2a799040aa96487a63a6d756fd699d6 Mon Sep 17 00:00:00 2001 From: n0body Date: Wed, 3 Sep 2025 20:49:19 +0300 Subject: [PATCH] Implemented all endpoints --- requirements.txt | Bin 836 -> 982 bytes src/APIapp.py | 72 -------------------------------- src/DBwork.py | 68 ++++++++++++------------------ src/config.py | 3 +- src/main.py | 27 ++++++++++-- src/router.py | 105 +++++++++++++++++++++++++++++++++++++++++++++++ src/scraper.py | 37 +++++++++++++++++ 7 files changed, 194 insertions(+), 118 deletions(-) delete mode 100644 src/APIapp.py create mode 100644 src/router.py create mode 100644 src/scraper.py diff --git a/requirements.txt b/requirements.txt index 4ea05502486402145bd2034a8bc45a75e38b7789..f46947bdde08c9c5ed2b290b22ad17d3f110e9ca 100644 GIT binary patch delta 139 zcmX@Yc8z^PfOZl?DnlYeDMJZECPNyK&S5BK$OqB|3?>Y=Kxo3C$6&}{3?xlAdJ8j( z<^ok0F=R8O09BPUv|lPfVV{q%ipg(?0-estJ++ diff --git a/src/APIapp.py b/src/APIapp.py deleted file mode 100644 index fa3f3e0..0000000 --- a/src/APIapp.py +++ /dev/null @@ -1,72 +0,0 @@ -import DBwork -from fastapi import FastAPI, Response, status -from pydantic import BaseModel -import psycopg2 -from json import dumps - - -schema_name = 'harticle' -table_name = 'articles' - -app = FastAPI() - -class Entry(BaseModel): - url: str - rating: int | None = None - - -@app.get('/api/ping') -async def ping(): - return {'message': 'pong'} - - -@app.get('/api/rates') -async def get_rates(): - return dumps(DBwork.get_all_entries()) - - -@app.post('/api/article/rate') -async def save_rating(entry: Entry, response: Response): - conn, cur = DBwork.set_connection() - try: - DBwork.add_entry(article_url=entry.url, - rating=entry.rating, - connection=conn, - cursor=cur - ) - message = 'success' - except psycopg2.Error: - response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR - message = 'internal server error' - finally: - DBwork.close_connection(conn, cur) - return {'message': message, - 'url': entry.url, - 'rating': entry.rating - } - - -@app.post('/api/article/remove_rate') -async def remove_rating(entry: Entry, response: Response): - conn, cur = DBwork.set_connection() - try: - DBwork.delete_entry(entry.url, conn, cur) - message = 'success' - except psycopg2.Error: - response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR - message = 'internal server error' - finally: - DBwork.close_connection(conn, cur) - return {'message': message} - - -@app.post('/api/articles/get') -async def megafunc(entry: Entry, response: Response): - ... - - -''' MAIN ''' -async def main(): - DBwork.schema_creator(schema_name) - DBwork.table_creator(schema_name, table_name) - diff --git a/src/DBwork.py b/src/DBwork.py index 825eb19..e5365a8 100644 --- a/src/DBwork.py +++ b/src/DBwork.py @@ -3,15 +3,6 @@ import config from loguru import logger -logging_level = config.logging_level -logger.add( - "sys.stdout", - format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | {file}:{line} - {message}", - colorize=True, - level=logging_level -) - - #connection stuff def set_connection(): try: @@ -22,14 +13,14 @@ def set_connection(): host = config.host_name, port = config.port ) - cursor = connection.cursor() - return connection, cursor + return connection except psycopg2.Error as e: logger.error(f'Failed to set connection to the PostgreSQL DB: {e.pgerror}') -def close_connection(connection, cursor): +def close_connection(connection): try: + cursor = connection.cursor() cursor.close() connection.close() except psycopg2.Error as e: @@ -37,57 +28,51 @@ def close_connection(connection, cursor): #actual DB alters -def add_entry(article_url, rating): - connection, cursor = set_connection() +def add_entry(article_url, rating, connection): try: + cursor = connection.cursor() cursor.execute("INSERT INTO harticle.articles (article_url, rating) VALUES (%s, %s);", (article_url, rating,)) connection.commit() logger.info('An entry has been written to the PGSQL DB successfully') except psycopg2.Error as e: logger.error(f'Failed to write an entry for article \'{article_url}\': {e.pgerror}') - finally: - close_connection(connection, cursor) -def delete_entry(article_url, connection, cursor): - connection, cursor = set_connection() +def delete_entry(article_url, connection): try: + cursor = connection.cursor() cursor.execute("DELETE FROM harticle.articles WHERE article_url = %s;", (article_url,)) connection.commit() logger.info(f'Rating for article \'{article_url}\' was cleared successfully') except psycopg2.Error as e: logger.error(f'Failed to clear a rating entry for article \'{article_url}\': {e.pgerror}') - finally: - close_connection(connection, cursor) -# def delete_rating(article_url, connection, cursor): -# close_connection(connection, cursor) +# def delete_rating(article_url, connection): # try: +# cursor = connection.cursor() # cursor.execute("UPDATE harticle.articles SET rating = NULL WHERE article_url = %s;", (article_url,)) # connection.commit() # logger.info(f'Rating for article \'{article_url}\' was cleared successfully') -# close_connection(connection, cursor) # except psycopg2.Error as e: # logger.error(f'Failed to clear a rating entry for article \'{article_url}\': {e.pgerror}') -def get_all_entries(): - connection, cursor = set_connection() +def get_all_entries(connection): try: + cursor = connection.cursor() cursor.execute('SELECT article_url, rating FROM harticle.articles;') entries = cursor.fetchall() logger.info('All entry pairs have been retrieved successfully') return entries except psycopg2.Error as e: logger.error(f'Failed to fetch DB entries: {e.pgerror}') - finally: - close_connection(connection, cursor) #'create if no any' type functions for schema and table def schema_creator(schema_name): - conn, cur = set_connection() + conn = set_connection() + cur = conn.cursor() try: cur.execute(f'CREATE SCHEMA IF NOT EXISTS {schema_name};') conn.commit() @@ -95,28 +80,29 @@ def schema_creator(schema_name): except psycopg2.Error as e: logger.error(f'Error during schema creation: {e}') finally: - close_connection(conn, cur) + close_connection(conn) def table_creator(schema_name, table_name): - conn, cur = set_connection() + conn = set_connection() + cur = conn.cursor() try: cur.execute(f''' -CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} -( - id SERIAL PRIMARY KEY, - article_url VARCHAR(3000) UNIQUE NOT NULL, - rating INT CHECK (rating < 2) -) + CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} + ( + id SERIAL PRIMARY KEY, + article_url VARCHAR(3000) UNIQUE NOT NULL, + rating INT CHECK (rating < 2) + ) -TABLESPACE pg_default; + TABLESPACE pg_default; -ALTER TABLE IF EXISTS {schema_name}.{table_name} - OWNER to {config.postgres_user}; -''') + ALTER TABLE IF EXISTS {schema_name}.{table_name} + OWNER to {config.postgres_user}; + ''') conn.commit() logger.info(f'Successfully created table {table_name} in schema {schema_name} if it didn\'t exist yet') except psycopg2.Error as e: logger.error(f'Error during table creation: {e}') finally: - close_connection(conn, cur) \ No newline at end of file + close_connection(conn) diff --git a/src/config.py b/src/config.py index 4ff096e..1d60cdc 100644 --- a/src/config.py +++ b/src/config.py @@ -6,4 +6,5 @@ postgres_user = config('POSTGRES_USER') postgres_password = config('POSTGRES_PASSWORD') host_name = config('HOST_NAME') port = config('PORT') -logging_level = config('LOGGING_LEVEL') \ No newline at end of file +logging_level = config('LOGGING_LEVEL') +enable_api_docs = config('ENABLE_API_DOCS', cast=bool) diff --git a/src/main.py b/src/main.py index 8e5f9a7..9b78470 100644 --- a/src/main.py +++ b/src/main.py @@ -1,7 +1,26 @@ -import asyncio -import APIapp +import router import uvicorn +from loguru import logger +import config +from fastapi import FastAPI -asyncio.run(APIapp.main()) -uvicorn.run("APIapp:app", host="127.0.0.1", port=8000, log_level="info") \ No newline at end of file +logging_level = config.logging_level +logger.add( + "sys.stdout", + format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | {file}:{line} - {message}", + colorize=True, + level=logging_level +) + +if config.enable_api_docs: + docs_url = '/api/docs' +else: + docs_url = None + +app = FastAPI(docs_url=docs_url) +app.include_router(router.router) + + +router.main() +uvicorn.run(app=app, host="127.0.0.1", port=8000, log_level="info") diff --git a/src/router.py b/src/router.py new file mode 100644 index 0000000..7301866 --- /dev/null +++ b/src/router.py @@ -0,0 +1,105 @@ +import DBwork +import scraper +from fastapi import Response, status, APIRouter +from pydantic import BaseModel +import psycopg2 +from json import dumps + + +schema_name = 'harticle' +table_name = 'articles' + +router = APIRouter(prefix='/api') + +class Entry(BaseModel): + url: str + rating: int | None = None + + +class Article(BaseModel): + url: str + + +class Amount(BaseModel): + amount: int + + +@router.get('/ping') +async def ping(): + return {'message': 'pong'} + + +@router.get('/rates') +async def get_rates(): + conn = DBwork.set_connection() + result = dumps(DBwork.get_all_entries(conn)) + DBwork.close_connection(conn) + return result + + +@router.post('/article/rate') +async def save_rating(entry: Entry, response: Response): + conn = DBwork.set_connection() + try: + DBwork.add_entry(article_url=entry.url, + rating=entry.rating, + connection=conn + ) + message = 'success' + except psycopg2.Error: + response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR + message = 'internal server error' + finally: + DBwork.close_connection(conn) + return {'message': message, + 'url': entry.url, + 'rating': entry.rating + } + + +@router.post('/article/remove_rate') +async def remove_rating(entry: Entry, response: Response): + conn = DBwork.set_connection() + try: + DBwork.delete_entry(entry.url, conn) + message = 'success' + except psycopg2.Error: + response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR + message = 'internal server error' + finally: + DBwork.close_connection(conn) + return {'message': message} + + +@router.post('/article/get/html') +async def get_article_html(article: Article, response: Response = None): + html_string = await scraper.get_article_html(article.url, md=False) + return html_string + + +@router.post('/article/get/md') +async def get_article_md(article: Article, response: Response = None): + md_string = await scraper.get_article_html(article.url, md=True) + return md_string + + +@router.post('/articles/get/html') +async def get_n_articles_html(amount: Amount, response: Response = None): + articles = [] + for url in await scraper.get_articles_from_feed(amount.amount): + articles.append(await scraper.get_article_html(f'https://habr.com{url}')) + return articles + + +@router.post('/articles/get/md') +async def get_n_articles_md(amount: Amount, response: Response = None): + articles = [] + for url in await scraper.get_articles_from_feed(amount.amount): + articles.append(await scraper.get_article_md(f'https://habr.com{url}')) + return articles + + +''' MAIN ''' +def main(): + DBwork.schema_creator(schema_name) + DBwork.table_creator(schema_name, table_name) diff --git a/src/scraper.py b/src/scraper.py new file mode 100644 index 0000000..d571745 --- /dev/null +++ b/src/scraper.py @@ -0,0 +1,37 @@ +from bs4 import BeautifulSoup +import requests +import re +from markdownify import MarkdownConverter +from loguru import logger + + +async def get_article_html(url: str, md: bool = False) -> str: + print(url, type(url)) + response = requests.get(url) + if response.status_code == 200: + soup = BeautifulSoup(response.content, 'html.parser') + content = soup.find('div', class_='tm-article-presenter') + # style = soup.find('style') + filter_tags = ['footer', 'meta', 'widget', 'vote', 'hubs', 'sticky'] + for tag in filter_tags: + trash = content.find_all(class_=re.compile(tag)) + for element in trash: + element.decompose() + if md: + return MarkdownConverter().convert_soup(content) + else: + return content.prettify() + else: + logger.error(f'Error during fetching habr article html. Status code: {response.status_code}') + + +async def get_articles_from_feed(amount: int) -> list[str]: + response = requests.get('https://habr.com/ru/feed/') + if response.status_code == 200: + soup = BeautifulSoup(response.content, 'html.parser') + urls = [] + for url in soup.find_all(class_='tm-title__link', limit=amount, href=True): + urls.append(str(url['href'])) + return urls + else: + logger.error(f'Error during fetching habr article html. Status code: {response.status_code}')