Compare commits
19 Commits
8a6490a481
...
1.1.1
| Author | SHA1 | Date | |
|---|---|---|---|
| 9424276474 | |||
| 7949312f9a | |||
| f814a1ba00 | |||
| a910bffcc9 | |||
| 06e24e35e1 | |||
| 523ac2228d | |||
| 95a232fb78 | |||
| 355aff8cf3 | |||
| 2ecf7ae56d | |||
| a2ab535256 | |||
| c3788356c7 | |||
| fa012c3161 | |||
| 1c7e95b119 | |||
| ba1f7eb450 | |||
| 873c061152 | |||
| 16eccddb59 | |||
| c2dd26c5d3 | |||
| 6da6ace82f | |||
| 2b191dddd2 |
14
.dockerignore
Normal file
14
.dockerignore
Normal file
@ -0,0 +1,14 @@
|
||||
#.dockerignore
|
||||
# Gitea
|
||||
.gitea
|
||||
|
||||
# Docker
|
||||
.dockerignore
|
||||
dockerfile
|
||||
compose.yaml
|
||||
compose.yml
|
||||
|
||||
# Git
|
||||
.gitignore
|
||||
*.md
|
||||
example.env
|
||||
48
.gitea/workflows/docker-build-push.yaml
Normal file
48
.gitea/workflows/docker-build-push.yaml
Normal file
@ -0,0 +1,48 @@
|
||||
name: Build and Push Docker Image
|
||||
|
||||
on:
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
env:
|
||||
REGISTRY: git.frik.su
|
||||
IMAGE_NAME: ${{ gitea.repository }}
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install Docker
|
||||
run: curl -fsSL https://get.docker.com | sh
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Login to registry
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Extract Docker tags from release
|
||||
id: meta
|
||||
uses: docker/metadata-action@v4
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
tags: |
|
||||
type=ref,event=tag
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=semver,pattern={{major}}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -179,3 +179,4 @@ cython_debug/
|
||||
/src/test.py
|
||||
responseTester.py
|
||||
/sys.stdout
|
||||
/src/otherTest.py
|
||||
238
README.MD
Normal file
238
README.MD
Normal file
@ -0,0 +1,238 @@
|
||||
# Habr article API
|
||||
|
||||
This is a simple API that can be deployed on your server to access habr.com's articles content, as well as keeping a record of articles and their ratings which you can manage by connecting to corresponding endpoints.
|
||||
|
||||
From here on on out we will call a pair "article_url" - "rating" an **entry**.
|
||||
|
||||
## API Reference
|
||||
|
||||
|
||||
|
||||
|
||||
### Ping
|
||||
|
||||
```http
|
||||
GET /api/ping
|
||||
```
|
||||
|
||||
A basic ping endpoint.
|
||||
|
||||
#### Response on success
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"message": "pong"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
### See current entries
|
||||
|
||||
|
||||
```http
|
||||
GET /api/rates
|
||||
```
|
||||
|
||||
Returns all entries in the PostreSQL DB.
|
||||
|
||||
#### Response on success
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"article_url_1": rating(0 or 1),
|
||||
"article_url_2": rating(0 or 1),
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Make a new entry
|
||||
|
||||
|
||||
```http
|
||||
POST /api/article/rate
|
||||
```
|
||||
|
||||
Save a new entry to the DB.
|
||||
|
||||
#### Request body
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"url": {article_url},
|
||||
"rating": {integer, 0 or 1}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
#### Response on success
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"message": "success",
|
||||
"url": "{article_url}",
|
||||
"rating": {integer, 0 or 1}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
### Delete an entry
|
||||
|
||||
|
||||
```http
|
||||
POST /api/article/remove_rate
|
||||
```
|
||||
|
||||
Delete an existing entry from the DB.
|
||||
|
||||
#### Request body
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"url": "{article_url}"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
#### Response on success
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"message": "success"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Get article html
|
||||
|
||||
```http
|
||||
POST /api/article/api/article/get/html
|
||||
```
|
||||
|
||||
Get hmtl of a desired habr article body encoded in base64.
|
||||
|
||||
#### Request body
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"url": "{article_url}"
|
||||
}
|
||||
```
|
||||
|
||||
#### Response on success
|
||||
|
||||
`text/plain`
|
||||
```
|
||||
{article_url}
|
||||
|
||||
{b64 encoded html}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Get article MD
|
||||
|
||||
```http
|
||||
POST /api/article/api/article/get/md
|
||||
```
|
||||
|
||||
Get md of a desired habr article body encoded in base64.
|
||||
|
||||
#### Request body
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"url": "{article_url}"
|
||||
}
|
||||
```
|
||||
|
||||
#### Response on success
|
||||
|
||||
`text/plain`
|
||||
```
|
||||
{article_url}
|
||||
|
||||
{b64 encoded md}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Get html of N articles from habr.com/feed
|
||||
|
||||
```http
|
||||
POST /api/article/api/articles/get/html
|
||||
```
|
||||
|
||||
Get html bodies of N last articles from [habr.com/feed](habr.com/feed)
|
||||
|
||||
#### Request body
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"amount": {articles_amount}
|
||||
}
|
||||
```
|
||||
|
||||
#### Response on success
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"{article_url_1}": "{b64_encoded_html}",
|
||||
"{article_url_2}": "{b64_encoded_html}",
|
||||
...
|
||||
"{article_url_n}": "{b64_encoded_html}"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
### Get MD of N articles from habr.com/feed
|
||||
|
||||
```http
|
||||
POST /api/article/api/articles/get/md
|
||||
```
|
||||
|
||||
Get MD of N last articles from [habr.com/feed](habr.com/feed)
|
||||
|
||||
#### Request body
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"amount": {articles_amount}
|
||||
}
|
||||
```
|
||||
|
||||
#### Response on success
|
||||
|
||||
`application/json`
|
||||
```json
|
||||
{
|
||||
"{article_url_1}": "{b64_encoded_md}",
|
||||
"{article_url_2}": "{b64_encoded_md}",
|
||||
...
|
||||
"{article_url_n}": "{b64_encoded_md}"
|
||||
}
|
||||
```
|
||||
36
compose.yml
Normal file
36
compose.yml
Normal file
@ -0,0 +1,36 @@
|
||||
services:
|
||||
habr-article-api:
|
||||
image: git.frik.su/n0one/habr-article-api:latest
|
||||
container_name: habr-article-api
|
||||
ports:
|
||||
- 4002:8000
|
||||
environment:
|
||||
DB_NAME: postgres
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
HOST_NAME: postgres
|
||||
PG_PORT: 5432
|
||||
LOGGING_LEVEL: "INFO"
|
||||
ENABLE_API_DOCS: "True"
|
||||
UVI_LOGGING_LEVEL: "info"
|
||||
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
restart: unless-stopped
|
||||
|
||||
postgres:
|
||||
image: postgres:latest
|
||||
container_name: habr-article-api-postgres
|
||||
environment:
|
||||
POSTGRES_DB: postgres
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
ports:
|
||||
- :5432
|
||||
healthcheck:
|
||||
test: ["CMD", "pg_isready", "-U", "postgres"]
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
restart: unless-stopped
|
||||
13
dockerfile
Normal file
13
dockerfile
Normal file
@ -0,0 +1,13 @@
|
||||
FROM python:3.13-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends libpq-dev build-essential
|
||||
|
||||
COPY requirements.txt ./
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD ["python", "src/main.py"]
|
||||
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
@ -1,72 +0,0 @@
|
||||
import DBwork
|
||||
from fastapi import FastAPI, Response, status
|
||||
from pydantic import BaseModel
|
||||
import psycopg2
|
||||
from json import dumps
|
||||
|
||||
|
||||
schema_name = 'harticle'
|
||||
table_name = 'articles'
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
class Entry(BaseModel):
|
||||
url: str
|
||||
rating: int | None = None
|
||||
|
||||
|
||||
@app.get('/api/ping')
|
||||
async def ping():
|
||||
return {'message': 'pong'}
|
||||
|
||||
|
||||
@app.get('/api/rates')
|
||||
async def get_rates():
|
||||
return dumps(DBwork.get_all_entries())
|
||||
|
||||
|
||||
@app.post('/api/article/rate')
|
||||
async def save_rating(entry: Entry, response: Response):
|
||||
conn, cur = DBwork.set_connection()
|
||||
try:
|
||||
DBwork.add_entry(article_url=entry.url,
|
||||
rating=entry.rating,
|
||||
connection=conn,
|
||||
cursor=cur
|
||||
)
|
||||
message = 'success'
|
||||
except psycopg2.Error:
|
||||
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
message = 'internal server error'
|
||||
finally:
|
||||
DBwork.close_connection(conn, cur)
|
||||
return {'message': message,
|
||||
'url': entry.url,
|
||||
'rating': entry.rating
|
||||
}
|
||||
|
||||
|
||||
@app.post('/api/article/remove_rate')
|
||||
async def remove_rating(entry: Entry, response: Response):
|
||||
conn, cur = DBwork.set_connection()
|
||||
try:
|
||||
DBwork.delete_entry(entry.url, conn, cur)
|
||||
message = 'success'
|
||||
except psycopg2.Error:
|
||||
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
message = 'internal server error'
|
||||
finally:
|
||||
DBwork.close_connection(conn, cur)
|
||||
return {'message': message}
|
||||
|
||||
|
||||
@app.post('/api/articles/get')
|
||||
async def megafunc(entry: Entry, response: Response):
|
||||
...
|
||||
|
||||
|
||||
''' MAIN '''
|
||||
async def main():
|
||||
DBwork.schema_creator(schema_name)
|
||||
DBwork.table_creator(schema_name, table_name)
|
||||
|
||||
7
src/DBmodel.py
Normal file
7
src/DBmodel.py
Normal file
@ -0,0 +1,7 @@
|
||||
import psycopg2
|
||||
|
||||
|
||||
class DataBase:
|
||||
connection: psycopg2._psycopg.connection | None = None
|
||||
|
||||
db = DataBase()
|
||||
@ -1,122 +1,101 @@
|
||||
import psycopg2
|
||||
import config
|
||||
from loguru import logger
|
||||
|
||||
|
||||
logging_level = config.logging_level
|
||||
logger.add(
|
||||
"sys.stdout",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | {file}:{line} - {message}",
|
||||
colorize=True,
|
||||
level=logging_level
|
||||
)
|
||||
from DBmodel import db
|
||||
|
||||
|
||||
#connection stuff
|
||||
def set_connection():
|
||||
try:
|
||||
connection = psycopg2.connect(
|
||||
db.connection = psycopg2.connect(
|
||||
dbname = config.db_name,
|
||||
user = config.postgres_user,
|
||||
password = config.postgres_password,
|
||||
host = config.host_name,
|
||||
port = config.port
|
||||
)
|
||||
cursor = connection.cursor()
|
||||
return connection, cursor
|
||||
logger.info('Connection to PostreSQL DB set successfully')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Failed to set connection to the PostgreSQL DB: {e.pgerror}')
|
||||
exit()
|
||||
|
||||
|
||||
def close_connection(connection, cursor):
|
||||
def close_connection(connection):
|
||||
try:
|
||||
cursor = connection.cursor()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
logger.info('Connection to PostreSQL DB closed successfully')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Failed to close PostgreSQL connection: {e.pgerror}')
|
||||
|
||||
|
||||
#actual DB alters
|
||||
def add_entry(article_url, rating):
|
||||
connection, cursor = set_connection()
|
||||
def add_entry(article_url, rating, connection):
|
||||
try:
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("INSERT INTO harticle.articles (article_url, rating) VALUES (%s, %s);", (article_url, rating,))
|
||||
connection.commit()
|
||||
logger.info('An entry has been written to the PGSQL DB successfully')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Failed to write an entry for article \'{article_url}\': {e.pgerror}')
|
||||
finally:
|
||||
close_connection(connection, cursor)
|
||||
|
||||
|
||||
def delete_entry(article_url, connection, cursor):
|
||||
connection, cursor = set_connection()
|
||||
def delete_entry(article_url, connection):
|
||||
try:
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("DELETE FROM harticle.articles WHERE article_url = %s;", (article_url,))
|
||||
connection.commit()
|
||||
logger.info(f'Rating for article \'{article_url}\' was cleared successfully')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Failed to clear a rating entry for article \'{article_url}\': {e.pgerror}')
|
||||
finally:
|
||||
close_connection(connection, cursor)
|
||||
|
||||
|
||||
# def delete_rating(article_url, connection, cursor):
|
||||
# close_connection(connection, cursor)
|
||||
# try:
|
||||
# cursor.execute("UPDATE harticle.articles SET rating = NULL WHERE article_url = %s;", (article_url,))
|
||||
# connection.commit()
|
||||
# logger.info(f'Rating for article \'{article_url}\' was cleared successfully')
|
||||
# close_connection(connection, cursor)
|
||||
# except psycopg2.Error as e:
|
||||
# logger.error(f'Failed to clear a rating entry for article \'{article_url}\': {e.pgerror}')
|
||||
|
||||
|
||||
def get_all_entries():
|
||||
connection, cursor = set_connection()
|
||||
def get_all_entries(connection):
|
||||
try:
|
||||
cursor.execute('SELECT article_url, rating FROM harticle.articles;')
|
||||
entries = cursor.fetchall()
|
||||
cursor = connection.cursor()
|
||||
cursor.execute('SELECT article_url FROM harticle.articles;')
|
||||
urls = cursor.fetchall()
|
||||
cursor.execute('SELECT rating FROM harticle.articles;')
|
||||
ratings = cursor.fetchall()
|
||||
logger.info('All entry pairs have been retrieved successfully')
|
||||
entries = {}
|
||||
for i in range(len(urls)):
|
||||
entries[urls[i][0]] = ratings[i][0]
|
||||
return entries
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Failed to fetch DB entries: {e.pgerror}')
|
||||
finally:
|
||||
close_connection(connection, cursor)
|
||||
|
||||
|
||||
#'create if no any' type functions for schema and table
|
||||
def schema_creator(schema_name):
|
||||
conn, cur = set_connection()
|
||||
def schema_creator(schema_name, connection):
|
||||
cur = connection.cursor()
|
||||
try:
|
||||
cur.execute(f'CREATE SCHEMA IF NOT EXISTS {schema_name};')
|
||||
conn.commit()
|
||||
connection.commit()
|
||||
logger.info(f'Successfully created schema {schema_name} if it didn\'t exist yet')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Error during schema creation: {e}')
|
||||
finally:
|
||||
close_connection(conn, cur)
|
||||
|
||||
|
||||
def table_creator(schema_name, table_name):
|
||||
conn, cur = set_connection()
|
||||
|
||||
def table_creator(schema_name, table_name, connection):
|
||||
cur = connection.cursor()
|
||||
try:
|
||||
cur.execute(f'''
|
||||
CREATE TABLE IF NOT EXISTS {schema_name}.{table_name}
|
||||
(
|
||||
id SERIAL PRIMARY KEY,
|
||||
article_url VARCHAR(3000) UNIQUE NOT NULL,
|
||||
rating INT CHECK (rating < 2)
|
||||
)
|
||||
CREATE TABLE IF NOT EXISTS {schema_name}.{table_name}
|
||||
(
|
||||
id SERIAL PRIMARY KEY,
|
||||
article_url VARCHAR(3000) UNIQUE NOT NULL,
|
||||
rating INT CHECK (rating < 2)
|
||||
)
|
||||
|
||||
TABLESPACE pg_default;
|
||||
TABLESPACE pg_default;
|
||||
|
||||
ALTER TABLE IF EXISTS {schema_name}.{table_name}
|
||||
OWNER to {config.postgres_user};
|
||||
''')
|
||||
conn.commit()
|
||||
ALTER TABLE IF EXISTS {schema_name}.{table_name}
|
||||
OWNER to {config.postgres_user};
|
||||
''')
|
||||
connection.commit()
|
||||
logger.info(f'Successfully created table {table_name} in schema {schema_name} if it didn\'t exist yet')
|
||||
except psycopg2.Error as e:
|
||||
logger.error(f'Error during table creation: {e}')
|
||||
finally:
|
||||
close_connection(conn, cur)
|
||||
42
src/app_creator.py
Normal file
42
src/app_creator.py
Normal file
@ -0,0 +1,42 @@
|
||||
import config
|
||||
import router
|
||||
import DBwork
|
||||
from DBmodel import db
|
||||
from fastapi import FastAPI
|
||||
from loguru import logger
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
|
||||
if config.enable_api_docs:
|
||||
docs_url = '/api/docs'
|
||||
else:
|
||||
docs_url = None
|
||||
|
||||
schema_name = 'harticle'
|
||||
table_name = 'articles'
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
DBwork.set_connection()
|
||||
DBwork.schema_creator(schema_name, db.connection)
|
||||
DBwork.table_creator(schema_name, table_name, db.connection)
|
||||
yield
|
||||
DBwork.close_connection(db.connection)
|
||||
|
||||
|
||||
app = FastAPI(docs_url=docs_url, lifespan=lifespan)
|
||||
|
||||
|
||||
def create_app():
|
||||
logging_level = config.logging_level
|
||||
logger.add(
|
||||
"sys.stdout",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss.SSS} | {level} | {file}:{line} - {message}",
|
||||
colorize=True,
|
||||
level=logging_level
|
||||
)
|
||||
|
||||
app.include_router(router.router)
|
||||
|
||||
return app
|
||||
@ -5,5 +5,10 @@ db_name = config('DB_NAME')
|
||||
postgres_user = config('POSTGRES_USER')
|
||||
postgres_password = config('POSTGRES_PASSWORD')
|
||||
host_name = config('HOST_NAME')
|
||||
port = config('PORT')
|
||||
port = config('PG_PORT')
|
||||
|
||||
enable_api_docs = config('ENABLE_API_DOCS', cast=bool)
|
||||
|
||||
uvicorn_logging_level = config('UVI_LOGGING_LEVEL')
|
||||
|
||||
logging_level = config('LOGGING_LEVEL')
|
||||
@ -1,7 +1,8 @@
|
||||
import asyncio
|
||||
import APIapp
|
||||
import uvicorn
|
||||
from config import uvicorn_logging_level
|
||||
from app_creator import create_app
|
||||
|
||||
|
||||
asyncio.run(APIapp.main())
|
||||
uvicorn.run("APIapp:app", host="127.0.0.1", port=8000, log_level="info")
|
||||
if __name__ == '__main__':
|
||||
app = create_app()
|
||||
uvicorn.run(app=app, host="0.0.0.0", port=8000, log_level=uvicorn_logging_level.lower())
|
||||
|
||||
101
src/router.py
Normal file
101
src/router.py
Normal file
@ -0,0 +1,101 @@
|
||||
import DBwork
|
||||
from DBmodel import db
|
||||
import scraper
|
||||
from fastapi import Response, status, APIRouter
|
||||
from pydantic import BaseModel
|
||||
import psycopg2
|
||||
import base64
|
||||
|
||||
|
||||
router = APIRouter(prefix='/api')
|
||||
|
||||
|
||||
class Entry(BaseModel):
|
||||
username: str
|
||||
url: str
|
||||
rating: int | None = None
|
||||
|
||||
|
||||
class Article(BaseModel):
|
||||
url: str
|
||||
|
||||
|
||||
class Amount(BaseModel):
|
||||
amount: int
|
||||
|
||||
|
||||
@router.get('/ping')
|
||||
async def ping():
|
||||
return {'message': 'pong'}
|
||||
|
||||
|
||||
@router.get('/rates')
|
||||
async def get_rates():
|
||||
result = DBwork.get_all_entries(db.connection)
|
||||
return result
|
||||
|
||||
|
||||
@router.post('/article/rate')
|
||||
async def save_rating(entry: Entry, response: Response):
|
||||
try:
|
||||
DBwork.add_entry(article_url=entry.url,
|
||||
rating=entry.rating,
|
||||
connection=db.connection
|
||||
)
|
||||
message = 'success'
|
||||
except psycopg2.Error:
|
||||
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
message = 'internal server error'
|
||||
finally:
|
||||
return {'message': message,
|
||||
'username': entry.username,
|
||||
'url': entry.url,
|
||||
'rating': entry.rating
|
||||
}
|
||||
|
||||
|
||||
@router.post('/article/remove_rate')
|
||||
async def remove_rating(entry: Entry, response: Response):
|
||||
try:
|
||||
DBwork.delete_entry(entry.url, db.connection)
|
||||
message = 'success'
|
||||
except psycopg2.Error:
|
||||
response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
message = 'internal server error'
|
||||
finally:
|
||||
return {'message': message}
|
||||
|
||||
|
||||
@router.post('/article/get/html')
|
||||
async def get_article_html(article: Article, response: Response = None):
|
||||
html_string = await scraper.get_article_html(article.url)
|
||||
b64_string = base64.b64encode(html_string.encode('utf-8')).decode('utf-8')
|
||||
return Response(content=article.url + '\r\n' + b64_string, media_type='text/plain')
|
||||
|
||||
|
||||
@router.post('/article/get/md')
|
||||
async def get_article_md(article: Article, response: Response = None):
|
||||
md_string = await scraper.get_article_html(article.url, md=True)
|
||||
b64_string = base64.b64encode(md_string.encode('utf-8')).decode('utf-8')
|
||||
return Response(content=article.url + '\r\n' + b64_string, media_type='text/plain')
|
||||
|
||||
|
||||
@router.post('/articles/get/html')
|
||||
async def get_n_articles_html(amount: Amount, response: Response = None):
|
||||
articles = {}
|
||||
urls = await scraper.get_articles_from_feed(amount.amount)
|
||||
for url in urls:
|
||||
html = await scraper.get_article_html(f'https://habr.com{url}')
|
||||
b64_string = base64.b64encode(html.encode('utf-8')).decode('utf-8')
|
||||
articles[f'https://habr.com{url}'] = b64_string
|
||||
return articles
|
||||
|
||||
|
||||
@router.post('/articles/get/md')
|
||||
async def get_n_articles_md(amount: Amount, response: Response = None):
|
||||
articles = {}
|
||||
for url in await scraper.get_articles_from_feed(amount.amount):
|
||||
md = await scraper.get_article_html(f'https://habr.com{url}', md=True)
|
||||
b64_string = base64.b64encode(md.encode('utf-8')).decode('utf-8')
|
||||
articles[f'https://habr.com{url}'] = b64_string
|
||||
return articles
|
||||
36
src/scraper.py
Normal file
36
src/scraper.py
Normal file
@ -0,0 +1,36 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import re
|
||||
from markdownify import MarkdownConverter
|
||||
from loguru import logger
|
||||
|
||||
|
||||
async def get_article_html(url: str, md: bool = False) -> str:
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
content = soup.find('div', class_='tm-article-presenter')
|
||||
# style = soup.find('style')
|
||||
filter_tags = ['footer', 'meta', 'widget', 'vote', 'hubs', 'sticky']
|
||||
for tag in filter_tags:
|
||||
trash = content.find_all(class_=re.compile(tag))
|
||||
for element in trash:
|
||||
element.decompose()
|
||||
if md:
|
||||
return MarkdownConverter().convert_soup(content)
|
||||
else:
|
||||
return content.prettify()
|
||||
else:
|
||||
logger.error(f'Error during fetching habr response. Status code: {response.status_code}')
|
||||
|
||||
|
||||
async def get_articles_from_feed(amount: int) -> list[str]:
|
||||
response = requests.get('https://habr.com/ru/feed/')
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
urls = []
|
||||
for url in soup.find_all(class_='tm-title__link', limit=amount, href=True):
|
||||
urls.append(str(url['href']))
|
||||
return urls
|
||||
else:
|
||||
logger.error(f'Error during fetching habr response. Status code: {response.status_code}')
|
||||
Reference in New Issue
Block a user