initial commit
This commit is contained in:
commit
00eca9aa4e
28 changed files with 2926 additions and 0 deletions
27
.chglog/config.yml
Executable file
27
.chglog/config.yml
Executable file
|
@ -0,0 +1,27 @@
|
||||||
|
style: none
|
||||||
|
template: CHANGELOG.tpl.md
|
||||||
|
info:
|
||||||
|
title: CHANGELOG
|
||||||
|
repository_url: git.fqserv.eu:takaoni/tenkan.git
|
||||||
|
options:
|
||||||
|
commits:
|
||||||
|
# filters:
|
||||||
|
# Type:
|
||||||
|
# - feat
|
||||||
|
# - fix
|
||||||
|
# - perf
|
||||||
|
# - refactor
|
||||||
|
commit_groups:
|
||||||
|
# title_maps:
|
||||||
|
# feat: Features
|
||||||
|
# fix: Bug Fixes
|
||||||
|
# perf: Performance Improvements
|
||||||
|
# refactor: Code Refactoring
|
||||||
|
header:
|
||||||
|
pattern: "^(\\w*)\\:\\s(.*)$"
|
||||||
|
pattern_maps:
|
||||||
|
- Type
|
||||||
|
- Subject
|
||||||
|
notes:
|
||||||
|
keywords:
|
||||||
|
- BREAKING CHANGE
|
140
.gitignore
vendored
Normal file
140
.gitignore
vendored
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py,cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
#Pipfile.lock
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
.chglog/
|
55
.pre-commit-config.yaml
Normal file
55
.pre-commit-config.yaml
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v4.1.0
|
||||||
|
hooks:
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: check-json
|
||||||
|
- id: check-added-large-files
|
||||||
|
- id: double-quote-string-fixer
|
||||||
|
- id: fix-encoding-pragma
|
||||||
|
- id: no-commit-to-branch
|
||||||
|
- id: name-tests-test
|
||||||
|
- repo: https://gitlab.com/pycqa/flake8
|
||||||
|
rev: 3.9.2
|
||||||
|
hooks:
|
||||||
|
- id: flake8
|
||||||
|
- repo: https://github.com/psf/black
|
||||||
|
rev: 21.12b0
|
||||||
|
hooks:
|
||||||
|
- id: black
|
||||||
|
name: black (python)
|
||||||
|
args: ['-S']
|
||||||
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
|
rev: v0.931
|
||||||
|
hooks:
|
||||||
|
- id: mypy
|
||||||
|
additional_dependencies: [pydantic] # add if use pydantic
|
||||||
|
- repo: https://github.com/PyCQA/isort
|
||||||
|
rev: 5.10.1
|
||||||
|
hooks:
|
||||||
|
- id: isort
|
||||||
|
name: isort (python)
|
||||||
|
args: ['--profile', 'black']
|
||||||
|
- repo: https://github.com/PyCQA/bandit
|
||||||
|
rev: 1.7.1
|
||||||
|
hooks:
|
||||||
|
- id: bandit
|
||||||
|
exclude: ^tests/
|
||||||
|
- repo: https://github.com/asottile/pyupgrade
|
||||||
|
rev: v2.31.0
|
||||||
|
hooks:
|
||||||
|
- id: pyupgrade
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: pylint
|
||||||
|
name: pylint
|
||||||
|
entry: pylint --disable=E1101,E0401,C0301 --ignore=__init__.py --ignore-patterns=(.)*_test\.py,test_(.)*\.py
|
||||||
|
language: system
|
||||||
|
types: [python]
|
||||||
|
- id: pytest
|
||||||
|
name: Check pytest unit tests pass
|
||||||
|
entry: pytest
|
||||||
|
pass_filenames: false
|
||||||
|
language: system
|
||||||
|
types: [python]
|
0
CHANGELOG.md
Normal file
0
CHANGELOG.md
Normal file
13
LICENCE
Normal file
13
LICENCE
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||||
|
Version 2, December 2004
|
||||||
|
|
||||||
|
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||||
|
|
||||||
|
Everyone is permitted to copy and distribute verbatim or modified
|
||||||
|
copies of this license document, and changing it is allowed as long
|
||||||
|
as the name is changed.
|
||||||
|
|
||||||
|
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||||
|
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||||
|
|
||||||
|
0. You just DO WHAT THE FUCK YOU WANT TO.
|
80
README.md
Normal file
80
README.md
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
# tenkan
|
||||||
|
|
||||||
|
Command line tool to convert HTTP RSS/Atom feeds to gemini format.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
```shell script
|
||||||
|
pip install tenkan
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
Add a feed
|
||||||
|
```shell script
|
||||||
|
# Any valid RSS/Atom feed
|
||||||
|
tenkan add feedname url
|
||||||
|
```
|
||||||
|
|
||||||
|
Update content of feed list
|
||||||
|
```shell script
|
||||||
|
tenkan update
|
||||||
|
```
|
||||||
|
|
||||||
|
Delete feed
|
||||||
|
```shell script
|
||||||
|
tenkan delete feedname
|
||||||
|
```
|
||||||
|
|
||||||
|
List subscripted feeds
|
||||||
|
```shell script
|
||||||
|
tenkan list
|
||||||
|
```
|
||||||
|
## Options
|
||||||
|
A debug mode is avaible via --debug option.
|
||||||
|
If you want to use your configuration or feeds file in another place than default one, you can use --config and --feedsfile options.
|
||||||
|
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
tenkan searches for a configuration file at the following location:
|
||||||
|
|
||||||
|
`$XDG_CONFIG_HOME/tenkan/tenkan.conf`
|
||||||
|
|
||||||
|
### Example config
|
||||||
|
This can be found in tenkan.conf.example.
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[tenkan]
|
||||||
|
gemini_path = /usr/local/gemini/
|
||||||
|
gemini_url = gemini://foo.bar/feeds/
|
||||||
|
# will purge feed folders having more than defined element count
|
||||||
|
# purge_feed_folder_after = 100
|
||||||
|
|
||||||
|
[filters]
|
||||||
|
# authors we don't want to read
|
||||||
|
# authors_blacklist = foo, bar
|
||||||
|
# blacklist of article titles, if provided, it won't be processed
|
||||||
|
# titles_blacklist = foo, bar
|
||||||
|
# blacklist of article links, if provided, it won't be processed
|
||||||
|
# links_blacklist = foo/bar.com, bar/foo, bla
|
||||||
|
|
||||||
|
[formatting]
|
||||||
|
# maximum article title size, 120 chars if not provided
|
||||||
|
# title_size = 120
|
||||||
|
|
||||||
|
# feeds with a truncated content
|
||||||
|
# will be fetched and converted using readability
|
||||||
|
# truncated_feeds = foo, bar
|
||||||
|
```
|
||||||
|
|
||||||
|
## Todolist
|
||||||
|
- [ ] Add a edit command
|
||||||
|
- [ ] Add a --feedname option to update command, to update a single feed
|
||||||
|
- [ ] Rewrite configuration checks
|
||||||
|
- [ ] Improve tests
|
||||||
|
- [ ] Refactor needed parts like write_article
|
||||||
|
- [ ] (not sure if relevant) migrate images too, for gemini clients that can handle it
|
||||||
|
|
||||||
|
## Development
|
||||||
|
I recommend using pre-commit. The pre-commit configuration I use is located in .pre-commit-config.yamlfile.
|
||||||
|
|
||||||
|
Run pre-commit command before every pull request and fix the warnings or errors it produces.
|
1287
poetry.lock
generated
Normal file
1287
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
66
pyproject.toml
Normal file
66
pyproject.toml
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
[tool.poetry]
|
||||||
|
name = "tenkan"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "RSS/atom feed converter from html to gemini"
|
||||||
|
authors = ["Quentin Ferrand <quentin.ferrand@protonmail.com>"]
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.8"
|
||||||
|
DateTime = "^4.3"
|
||||||
|
feedparser = "^6.0.8"
|
||||||
|
feedgen = "^0.9.0"
|
||||||
|
requests = "^2.26.0"
|
||||||
|
markdownify = "^0.10.0"
|
||||||
|
md2gemini = "^1.8.1"
|
||||||
|
readability-lxml = "^0.8.1"
|
||||||
|
rich = "^10.16.2"
|
||||||
|
prettytable = "^3.0.0"
|
||||||
|
|
||||||
|
[tool.poetry.dev-dependencies]
|
||||||
|
pytest = "^5.2"
|
||||||
|
black = {version = "^21.11b1", allow-prereleases = true}
|
||||||
|
flake8 = "^4.0.1"
|
||||||
|
mypy = "^0.910"
|
||||||
|
isort = "^5.10.1"
|
||||||
|
pytest-cov = "^3.0.0"
|
||||||
|
pylint = "^2.12.2"
|
||||||
|
pyupgrade = "^2.31.0"
|
||||||
|
bandit = "^1.7.1"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core>=1.0.0"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
|
[tool.black]
|
||||||
|
line-length = 79
|
||||||
|
target-version = ['py38']
|
||||||
|
include = '\.pyi?$'
|
||||||
|
exclude = '''
|
||||||
|
|
||||||
|
(
|
||||||
|
/(
|
||||||
|
\.eggs # exclude a few common directories in the
|
||||||
|
| \.git # root of the project
|
||||||
|
| \.hg
|
||||||
|
| \.mypy_cache
|
||||||
|
| \.tox
|
||||||
|
| \.venv
|
||||||
|
| _build
|
||||||
|
| buck-out
|
||||||
|
| build
|
||||||
|
| dist
|
||||||
|
)/
|
||||||
|
| foo.py # also separately exclude a file named foo.py in
|
||||||
|
# the root of the project
|
||||||
|
)
|
||||||
|
'''
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
multi_line_output = 3
|
||||||
|
include_trailing_comma = true
|
||||||
|
force_grid_wrap = 0
|
||||||
|
use_parentheses = true
|
||||||
|
line_length = 79
|
||||||
|
|
||||||
|
[tool.poetry.scripts]
|
||||||
|
tenkan = "tenkan.cli:main"
|
21
tenkan.conf.example
Normal file
21
tenkan.conf.example
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
[tenkan]
|
||||||
|
gemini_path = /usr/local/gemini/
|
||||||
|
gemini_url = gemini://foo.bar/feeds/
|
||||||
|
# will purge feed folders having more than defined element count
|
||||||
|
# purge_feed_folder_after = 100
|
||||||
|
|
||||||
|
[filters]
|
||||||
|
# authors we don't want to read
|
||||||
|
# authors_blacklist = foo, bar
|
||||||
|
# blacklist of article titles, if provided, it won't be processed
|
||||||
|
# titles_blacklist = foo, bar
|
||||||
|
# blacklist of article links, if provided, it won't be processed
|
||||||
|
# links_blacklist = foo/bar.com, bar/foo, bla
|
||||||
|
|
||||||
|
[formatting]
|
||||||
|
# maximum article title size, 120 chars if not provided
|
||||||
|
# title_size = 120
|
||||||
|
|
||||||
|
# feeds with a truncated content
|
||||||
|
# will be fetched and converted using readability
|
||||||
|
# truncated_feeds = foo, bar
|
2
tenkan/__init__.py
Normal file
2
tenkan/__init__.py
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
__version__ = '0.1.0'
|
214
tenkan/cli.py
Normal file
214
tenkan/cli.py
Normal file
|
@ -0,0 +1,214 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
cli module
|
||||||
|
It parses args and runs what's needed if every over modules
|
||||||
|
depending of what command is done
|
||||||
|
"""
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from argparse import ArgumentParser, RawTextHelpFormatter
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import NoReturn
|
||||||
|
|
||||||
|
from rich.traceback import install
|
||||||
|
|
||||||
|
from tenkan.config import load_config
|
||||||
|
from tenkan.feedsfile import (
|
||||||
|
add_feed,
|
||||||
|
create,
|
||||||
|
del_feed,
|
||||||
|
list_feeds,
|
||||||
|
update_last_run,
|
||||||
|
)
|
||||||
|
from tenkan.files import delete_folder
|
||||||
|
from tenkan.processing import (
|
||||||
|
fetch_feeds,
|
||||||
|
prepare_fetched_content,
|
||||||
|
process_fetched_feeds,
|
||||||
|
write_processed_feeds,
|
||||||
|
)
|
||||||
|
|
||||||
|
# rich tracebacks
|
||||||
|
install(show_locals=True)
|
||||||
|
|
||||||
|
|
||||||
|
class MyParser(ArgumentParser): # pylint: disable=too-few-public-methods
|
||||||
|
"""Child class to print help msg if no or bad args given"""
|
||||||
|
|
||||||
|
def error(self, message: str) -> NoReturn:
|
||||||
|
"""exit"""
|
||||||
|
sys.stderr.write(f'error: {message}')
|
||||||
|
self.print_help()
|
||||||
|
sys.exit(2)
|
||||||
|
|
||||||
|
|
||||||
|
def load_args(args: list):
|
||||||
|
"""args parsing function"""
|
||||||
|
|
||||||
|
desc = 'tenkan : RSS/atom feed converter from html to gemini\n\nTo show the detailed help of a COMMAND run `ytcc COMMAND --help`.'
|
||||||
|
parser = MyParser(
|
||||||
|
description=desc, prog='tenkan', formatter_class=RawTextHelpFormatter
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'-v',
|
||||||
|
'--version',
|
||||||
|
action='version',
|
||||||
|
version='%(prog)s 0.1.0',
|
||||||
|
help='show %(prog)s version number and exit',
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--config',
|
||||||
|
default=f'{str(Path.home())}/.config/tenkan/tenkan.conf',
|
||||||
|
help='config file, $HOME/.config/tenkan/tenkan.conf by default',
|
||||||
|
dest='config',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--feedsfile',
|
||||||
|
default=f'{str(Path.home())}/.config/tenkan/feeds.json',
|
||||||
|
help='feeds file containing feed list, $HOME/.config/tenkan/feeds.json by default',
|
||||||
|
dest='feedsfile',
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
'--debug', action='store_true', help='debug mode', dest='debug'
|
||||||
|
)
|
||||||
|
|
||||||
|
subparsers = parser.add_subparsers(
|
||||||
|
title='command', required=True, dest='command'
|
||||||
|
)
|
||||||
|
|
||||||
|
parser_add = subparsers.add_parser(
|
||||||
|
'add', help='add a feed to the feeds list'
|
||||||
|
)
|
||||||
|
parser_add.add_argument(
|
||||||
|
'name', help='the name of the feed you want to add'
|
||||||
|
)
|
||||||
|
parser_add.add_argument('url', help='the HTTP url of the feed')
|
||||||
|
|
||||||
|
parser_update = subparsers.add_parser(
|
||||||
|
'update', help='update feeds folder from feed list'
|
||||||
|
)
|
||||||
|
parser_update.add_argument(
|
||||||
|
'--force',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='update feed list even if there is no new content',
|
||||||
|
)
|
||||||
|
|
||||||
|
parser_list = subparsers.add_parser(
|
||||||
|
'list', help='list all feeds in feeds list'
|
||||||
|
)
|
||||||
|
parser_list.add_argument(
|
||||||
|
'list', help='list all feeds in feeds list', action='store_true'
|
||||||
|
)
|
||||||
|
|
||||||
|
parser_delete = subparsers.add_parser(
|
||||||
|
'delete', help='remove a feed to the feeds list'
|
||||||
|
)
|
||||||
|
parser_delete.add_argument(
|
||||||
|
'name', help='the name of the feed you want to delete'
|
||||||
|
)
|
||||||
|
parser_delete.add_argument(
|
||||||
|
'--delete-gmi-folder',
|
||||||
|
help='delete gmi folder, True by default',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
dest='delete_folder',
|
||||||
|
)
|
||||||
|
return parser.parse_args(args)
|
||||||
|
|
||||||
|
|
||||||
|
def set_logging(args, config: configparser.ConfigParser) -> None:
|
||||||
|
"""define logging settings"""
|
||||||
|
log = logging.getLogger()
|
||||||
|
log.setLevel(logging.INFO)
|
||||||
|
if args.debug:
|
||||||
|
log.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
console_formatter = logging.Formatter(fmt='%(message)s')
|
||||||
|
file_formatter = logging.Formatter(
|
||||||
|
fmt='%(asctime)s %(levelname)s: %(message)s'
|
||||||
|
)
|
||||||
|
|
||||||
|
stdout_handler = logging.StreamHandler(stream=sys.stdout)
|
||||||
|
stdout_handler.setFormatter(console_formatter)
|
||||||
|
log.addHandler(stdout_handler)
|
||||||
|
|
||||||
|
if config['tenkan'].get('log_file'):
|
||||||
|
file_handler = logging.FileHandler(
|
||||||
|
filename=config['tenkan'].get('log_file'),
|
||||||
|
encoding='utf-8',
|
||||||
|
)
|
||||||
|
file_handler.setFormatter(file_formatter)
|
||||||
|
log.addHandler(file_handler)
|
||||||
|
|
||||||
|
|
||||||
|
def run(args, config: configparser.ConfigParser) -> None:
|
||||||
|
"""run stuff depending of command used"""
|
||||||
|
# exit with error if json file not found with actions other than add
|
||||||
|
if not Path(args.feedsfile).exists() and 'add' not in args.command:
|
||||||
|
logging.error('No json file %s, can\'t continue', args.feedsfile)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# list feeds in a pretty format
|
||||||
|
if args.command == 'list':
|
||||||
|
list_feeds(file=args.feedsfile)
|
||||||
|
|
||||||
|
# add a feed to feeds file
|
||||||
|
if args.command == 'add':
|
||||||
|
# if home directory, creates json with empty structure if no file yet
|
||||||
|
if not Path(args.feedsfile).parents[0].exists():
|
||||||
|
if str(Path(args.feedsfile).parents[0]) == str(Path.home()):
|
||||||
|
Path(args.feedsfile).parents[0].mkdir(
|
||||||
|
parents=True, exist_ok=True
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logging.error(
|
||||||
|
'Directory of feeds file %s not found, exiting',
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
if not Path(args.feedsfile).is_file():
|
||||||
|
create(args.feedsfile)
|
||||||
|
add_feed(file=args.feedsfile, feed_name=args.name, feed_url=args.url)
|
||||||
|
|
||||||
|
# delete a feed from feeds file
|
||||||
|
if args.command == 'delete':
|
||||||
|
del_feed(file=args.feedsfile, feed_name=args.name)
|
||||||
|
if args.delete_folder:
|
||||||
|
delete_folder(
|
||||||
|
path=config['tenkan']['gemini_path'], feed_name=args.name
|
||||||
|
)
|
||||||
|
|
||||||
|
# update content
|
||||||
|
if args.command == 'update':
|
||||||
|
fetched_feeds = fetch_feeds(
|
||||||
|
feeds_file=args.feedsfile,
|
||||||
|
gmi_url=config['tenkan']['gemini_url'],
|
||||||
|
)
|
||||||
|
print('')
|
||||||
|
fetched_feeds = prepare_fetched_content(fetched_feeds, args.force)
|
||||||
|
feed_list = process_fetched_feeds(
|
||||||
|
config=config,
|
||||||
|
fetched_feeds=fetched_feeds,
|
||||||
|
force=args.force,
|
||||||
|
)
|
||||||
|
if feed_list:
|
||||||
|
write_processed_feeds(args, config, feed_list)
|
||||||
|
else:
|
||||||
|
logging.info('No new content to process, stopping')
|
||||||
|
update_last_run(args.feedsfile, str(datetime.now()))
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""load conf, args, set logging and run main program"""
|
||||||
|
|
||||||
|
args = load_args(args=sys.argv[1:])
|
||||||
|
config = load_config(args.config)
|
||||||
|
set_logging(args, config)
|
||||||
|
run(args, config)
|
57
tenkan/config.py
Normal file
57
tenkan/config.py
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""config module : configuration file parsing"""
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def load_config(config_file) -> configparser.ConfigParser:
|
||||||
|
"""config load"""
|
||||||
|
|
||||||
|
# exit with error if config file not found
|
||||||
|
if not Path(config_file).exists():
|
||||||
|
logging.error('No config file found %s, exiting', config_file)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
parser = configparser.ConfigParser()
|
||||||
|
parser.read(config_file)
|
||||||
|
if 'tenkan' not in parser.sections():
|
||||||
|
logging.critical(
|
||||||
|
"Missing [tenkan] section in config file %s, can't go further",
|
||||||
|
config_file,
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# shitty checks of config content
|
||||||
|
# to improve later...
|
||||||
|
for opt in ['gemini_path', 'gemini_url']:
|
||||||
|
if not parser.has_option('tenkan', opt):
|
||||||
|
logging.error('Missing option %s', opt)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if parser.has_option('tenkan', 'purge_feed_folder_after'):
|
||||||
|
if not int(parser['tenkan']['purge_feed_folder_after']):
|
||||||
|
logging.error(
|
||||||
|
'Wrong type for purge_feed_folder_after option, should be a number'
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if parser.has_section('filters'):
|
||||||
|
for item in parser['filters']:
|
||||||
|
parser['filters'][item] = parser['filters'][item].replace(' ', '')
|
||||||
|
|
||||||
|
if parser.has_option('formatting', 'truncated_feeds'):
|
||||||
|
parser['formatting']['truncated_feeds'] = parser['formatting'][
|
||||||
|
'truncated_feeds'
|
||||||
|
].replace(' ', '')
|
||||||
|
|
||||||
|
if parser.has_option('formatting', 'title_size') and not int(
|
||||||
|
parser['formatting']['title_size']
|
||||||
|
):
|
||||||
|
logging.error('Wrong type for title_size option, should be a number')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
return parser
|
196
tenkan/feed.py
Normal file
196
tenkan/feed.py
Normal file
|
@ -0,0 +1,196 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
""" feed module : feed object """
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import requests # type: ignore
|
||||||
|
from markdownify import markdownify # type: ignore
|
||||||
|
from md2gemini import md2gemini # type: ignore
|
||||||
|
from readability import Document # type: ignore
|
||||||
|
from requests.adapters import HTTPAdapter # type: ignore
|
||||||
|
from urllib3.util.retry import Retry
|
||||||
|
|
||||||
|
from tenkan.utils import measure
|
||||||
|
|
||||||
|
|
||||||
|
class Feed:
|
||||||
|
"""
|
||||||
|
receives various feed data and applies necessary changes to make it usable into files
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
input_content: dict,
|
||||||
|
filters=None,
|
||||||
|
formatting=None,
|
||||||
|
) -> None:
|
||||||
|
self.content = input_content
|
||||||
|
self.filters = filters
|
||||||
|
self.formatting = formatting
|
||||||
|
self.new_entries: list = []
|
||||||
|
|
||||||
|
def needs_update(self) -> bool:
|
||||||
|
"""Checks if updates are available"""
|
||||||
|
if not self.content['json_hash_last_update']:
|
||||||
|
return True
|
||||||
|
if (
|
||||||
|
self.content['json_hash_last_update']
|
||||||
|
!= self.content['fetched_hash_last_update']
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@measure
|
||||||
|
def get_new_entries(self) -> None:
|
||||||
|
"""Selects new entries depending on filters defined on config file"""
|
||||||
|
for entry in self.content['fetched_content']['entries']:
|
||||||
|
if (
|
||||||
|
any(
|
||||||
|
x in entry['title']
|
||||||
|
for x in self.filters.get('titles_blacklist', '').split(
|
||||||
|
','
|
||||||
|
)
|
||||||
|
)
|
||||||
|
or any(
|
||||||
|
x in entry['link']
|
||||||
|
for x in self.filters.get('links_blacklist', '').split(',')
|
||||||
|
)
|
||||||
|
or any(
|
||||||
|
# feedparser object can be problematic sometimes
|
||||||
|
# we need to check if we have an authors item
|
||||||
|
# AND we check if we can get it's name because it can be empty
|
||||||
|
# AND if we don't have any of these, we return a stupid string
|
||||||
|
# to match the str type which is expected
|
||||||
|
x
|
||||||
|
in (
|
||||||
|
entry.get('authors')
|
||||||
|
and entry.authors[0].get('name')
|
||||||
|
or 'random string'
|
||||||
|
)
|
||||||
|
for x in self.filters.get('authors_blacklist', '').split(
|
||||||
|
','
|
||||||
|
)
|
||||||
|
)
|
||||||
|
):
|
||||||
|
self.content['fetched_content']['entries'].remove(entry)
|
||||||
|
continue
|
||||||
|
self.new_entries.append(entry)
|
||||||
|
|
||||||
|
@measure
|
||||||
|
def export_content(self) -> dict:
|
||||||
|
"""Exports properly formatted content"""
|
||||||
|
# create feed item structure
|
||||||
|
data_export: dict[str, List] = {
|
||||||
|
'title': self.content['title'],
|
||||||
|
'last_update': self.content['last_update'],
|
||||||
|
'gmi_url': self.content['gmi_url'],
|
||||||
|
'articles': [],
|
||||||
|
'hash_last_update': self.content['fetched_hash_last_update'],
|
||||||
|
}
|
||||||
|
for article in self.new_entries:
|
||||||
|
article_formatted_title = self._format_article_title(article)
|
||||||
|
article_date = self._get_article_date(article)
|
||||||
|
|
||||||
|
# 2 possibilities to get content : content['value'] or summary
|
||||||
|
content = (
|
||||||
|
article['content'][0]['value']
|
||||||
|
if article.get('content')
|
||||||
|
else article['summary']
|
||||||
|
)
|
||||||
|
|
||||||
|
article_content = self._format_article_content(
|
||||||
|
content, link=article['link']
|
||||||
|
)
|
||||||
|
|
||||||
|
data_export['articles'].append(
|
||||||
|
{
|
||||||
|
'article_title': article['title'],
|
||||||
|
'article_formatted_title': article_formatted_title,
|
||||||
|
'article_content': article_content,
|
||||||
|
'article_date': article_date,
|
||||||
|
'http_url': article['link'],
|
||||||
|
'updated': article_date,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return data_export
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_article_date(cls, article: dict) -> datetime:
|
||||||
|
"""get date string and return datetime object"""
|
||||||
|
try:
|
||||||
|
return (
|
||||||
|
datetime(
|
||||||
|
*article.get(
|
||||||
|
'published_parsed', article['updated_parsed']
|
||||||
|
)[:6]
|
||||||
|
)
|
||||||
|
.replace(tzinfo=timezone.utc)
|
||||||
|
.astimezone(tz=None)
|
||||||
|
)
|
||||||
|
except KeyError:
|
||||||
|
logging.error(
|
||||||
|
"Can't find a proper date field in article data, this should not happen !"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
@measure
|
||||||
|
def _format_article_title(self, article: dict) -> str:
|
||||||
|
"""title formatting to make it usable as a file title"""
|
||||||
|
# truncate title size depending on title size
|
||||||
|
maxlen = int(self.formatting.get('title_size', 120))
|
||||||
|
if len(self.content['title']) + len(article['title']) > maxlen:
|
||||||
|
maxlen = maxlen - len(self.content['title'])
|
||||||
|
|
||||||
|
# We don't want multiline titles (yes, it happens)
|
||||||
|
article['title'] = article['title'].replace('\n', '')[:maxlen]
|
||||||
|
|
||||||
|
# remove special characters
|
||||||
|
# probably not the best way to do it, as it seems there is performance issues here
|
||||||
|
# to improve later if possible
|
||||||
|
formatted_str = (
|
||||||
|
article['title']
|
||||||
|
.encode('utf8', 'ignore')
|
||||||
|
.decode('utf8', 'ignore')
|
||||||
|
.replace(' ', '-')
|
||||||
|
)
|
||||||
|
return re.sub('[«»!@#$%^&*(){};:,./<>?/|`~=_+]', '', formatted_str)[
|
||||||
|
:maxlen
|
||||||
|
]
|
||||||
|
|
||||||
|
@measure
|
||||||
|
def _format_article_content(self, content: str, link: str) -> str:
|
||||||
|
"""
|
||||||
|
Formats article content from html to gmi
|
||||||
|
Will use readability if the feed is truncated, so it should retrieve the full content
|
||||||
|
"""
|
||||||
|
|
||||||
|
# conversion to readability format if asked
|
||||||
|
if self.content['title'] in self.formatting.get(
|
||||||
|
'truncated_feeds', 'アケオメ'
|
||||||
|
).split(','):
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'
|
||||||
|
}
|
||||||
|
|
||||||
|
req = requests.Session()
|
||||||
|
retries = Retry(
|
||||||
|
total=5,
|
||||||
|
backoff_factor=0.1,
|
||||||
|
status_forcelist=[500, 502, 503, 504],
|
||||||
|
)
|
||||||
|
req.mount('http://', HTTPAdapter(max_retries=retries))
|
||||||
|
req.mount('https://', HTTPAdapter(max_retries=retries))
|
||||||
|
res = req.get(url=link, headers=headers)
|
||||||
|
|
||||||
|
content = Document(res.text).summary()
|
||||||
|
|
||||||
|
# convert html -> md -> gemini
|
||||||
|
article = md2gemini(markdownify(content))
|
||||||
|
|
||||||
|
return article
|
85
tenkan/feedsfile.py
Normal file
85
tenkan/feedsfile.py
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
""" feedsfile mddule : json feeds file manipulation """
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Dict # , Optional
|
||||||
|
|
||||||
|
from prettytable import PrettyTable
|
||||||
|
|
||||||
|
|
||||||
|
def create(file: str) -> None:
|
||||||
|
"""file creation"""
|
||||||
|
with open(file, 'x') as _file:
|
||||||
|
data: dict = {'feeds': {}}
|
||||||
|
json.dump(data, _file)
|
||||||
|
|
||||||
|
|
||||||
|
def read(file: str) -> Dict[str, Dict[str, Dict[str, str]]]:
|
||||||
|
"""read file and return json data"""
|
||||||
|
with open(file, 'r') as _file:
|
||||||
|
file_data = json.load(_file)
|
||||||
|
return file_data
|
||||||
|
|
||||||
|
|
||||||
|
def _write(file: str, file_data: Dict[str, Dict[str, Dict[str, str]]]) -> None:
|
||||||
|
"""write new data into file"""
|
||||||
|
with open(file, 'w') as file_updated:
|
||||||
|
json.dump(file_data, file_updated, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def add_feed(file: str, feed_name: str, feed_url: str) -> None:
|
||||||
|
"""add a new feed into existing file"""
|
||||||
|
file_data: Dict[str, Dict[str, Dict[str, str]]] = read(file)
|
||||||
|
file_data['feeds'][feed_name] = {
|
||||||
|
'url': feed_url,
|
||||||
|
'last_update': '',
|
||||||
|
'hash_last_update': '',
|
||||||
|
}
|
||||||
|
_write(file, file_data)
|
||||||
|
logging.info('feed %s added', feed_name)
|
||||||
|
|
||||||
|
|
||||||
|
def del_feed(file: str, feed_name: str) -> None:
|
||||||
|
"""remove feed from file"""
|
||||||
|
file_data = read(file)
|
||||||
|
# don't do anything if no feed found
|
||||||
|
if file_data['feeds'].get(feed_name):
|
||||||
|
del file_data['feeds'][feed_name]
|
||||||
|
_write(file, file_data)
|
||||||
|
logging.info('feed %s deleted', feed_name)
|
||||||
|
else:
|
||||||
|
logging.info('no feed %s found into feeds file', feed_name)
|
||||||
|
|
||||||
|
|
||||||
|
def get_feed_item(file: str, feed_name: str, item: str) -> str:
|
||||||
|
"""Return element of a defined feed"""
|
||||||
|
file_data = read(file)
|
||||||
|
item = file_data['feeds'][feed_name][item]
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def update_last_run(file: str, date: str) -> None:
|
||||||
|
"""Update last_run key in json file"""
|
||||||
|
file_data: dict = read(file)
|
||||||
|
file_data['last_run'] = date
|
||||||
|
_write(file, file_data)
|
||||||
|
|
||||||
|
|
||||||
|
def update_feed(file: str, feed_name: str, hash_last_update: str) -> None:
|
||||||
|
"""update last update date of a defined feed"""
|
||||||
|
file_data = read(file)
|
||||||
|
file_data['feeds'][feed_name]['hash_last_update'] = hash_last_update
|
||||||
|
_write(file, file_data)
|
||||||
|
|
||||||
|
|
||||||
|
def list_feeds(file: str) -> None:
|
||||||
|
"""list feed file content"""
|
||||||
|
file_data = read(file)
|
||||||
|
table = PrettyTable()
|
||||||
|
table.field_names = ['Title', 'URL']
|
||||||
|
for item, value in file_data['feeds'].items():
|
||||||
|
table.add_row([item, value['url']])
|
||||||
|
logging.info(table)
|
133
tenkan/files.py
Normal file
133
tenkan/files.py
Normal file
|
@ -0,0 +1,133 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
""" files module : generated gemini feeds files management """
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import pathlib
|
||||||
|
import shutil
|
||||||
|
from typing import Dict, Union
|
||||||
|
|
||||||
|
from feedgen.feed import FeedGenerator # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def path_exists(path: str) -> bool:
|
||||||
|
"""Check if feed path exists"""
|
||||||
|
if pathlib.Path(path).is_dir():
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def write_files(path: str, data: dict, max_num_entries: int) -> None:
|
||||||
|
"""
|
||||||
|
Converts feed objects into files and write them in the feed folder
|
||||||
|
"""
|
||||||
|
tpath = path
|
||||||
|
path = path + data['title']
|
||||||
|
pathlib.Path(path).mkdir(exist_ok=True)
|
||||||
|
num_entries = 0
|
||||||
|
# count entries in index file
|
||||||
|
if pathlib.Path(f'{path}/index.gmi').is_file():
|
||||||
|
num_entries = sum(1 for line in open(f'{path}/index.gmi'))
|
||||||
|
|
||||||
|
# if there is more articles than defined in max_num_entries, delete and rewrite
|
||||||
|
if num_entries > max_num_entries:
|
||||||
|
delete_folder(tpath, data['title'])
|
||||||
|
|
||||||
|
index_file_write_header(path, data['title'])
|
||||||
|
urls = []
|
||||||
|
art_output = {}
|
||||||
|
for article in data['articles']:
|
||||||
|
art_output = write_article(article, data, path)
|
||||||
|
urls.append(art_output['url'])
|
||||||
|
index_file_write_footer(path)
|
||||||
|
# no need to update atom file if no new articles (write_article func returns url list)
|
||||||
|
if art_output.get('new_file'):
|
||||||
|
_rebuild_atom_file(path=path, data=data, urls=urls)
|
||||||
|
|
||||||
|
|
||||||
|
# def purge_folder(path: str) -> None:
|
||||||
|
# """Purge folder with too many entries"""
|
||||||
|
# logging.info('Purging %s folder', path)
|
||||||
|
# files = [x for x in pathlib.Path(f'{path}').iterdir() if x.is_file()]
|
||||||
|
# for file in files:
|
||||||
|
# pathlib.Path.unlink(file)
|
||||||
|
|
||||||
|
|
||||||
|
def delete_folder(path: str, feed_name: str) -> None:
|
||||||
|
"""delete a feed folder"""
|
||||||
|
if pathlib.Path(f'{path}{feed_name}/').exists():
|
||||||
|
shutil.rmtree(f'{path}{feed_name}')
|
||||||
|
logging.info('%s/%s folder deleted', path, feed_name)
|
||||||
|
else:
|
||||||
|
logging.info(
|
||||||
|
'folder %s%s not present, nothing to delete', path, feed_name
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def index_file_write_header(path: str, title: str) -> None:
|
||||||
|
"""Write index header"""
|
||||||
|
with open(f'{path}/index.gmi', 'w') as index:
|
||||||
|
index.write(f'# {title}\n\n')
|
||||||
|
index.write('=> ../ ..\n')
|
||||||
|
|
||||||
|
|
||||||
|
def index_file_write_footer(path: str) -> None:
|
||||||
|
"""Write index footer"""
|
||||||
|
with open(f'{path}/index.gmi', 'a') as index:
|
||||||
|
index.write('\n=> atom.xml Atom feed\n')
|
||||||
|
|
||||||
|
|
||||||
|
def write_article(
|
||||||
|
article: dict, data: dict, path: str
|
||||||
|
) -> Dict[str, Union[bool, str]]:
|
||||||
|
"""Write individual article"""
|
||||||
|
# prepare data for file format
|
||||||
|
date = article['article_date']
|
||||||
|
file_date = date.strftime('%Y-%m-%d_%H-%M-%S')
|
||||||
|
date = date.strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
file_title = article['article_formatted_title']
|
||||||
|
content = article['article_content']
|
||||||
|
|
||||||
|
# we add the entry into index file
|
||||||
|
with open(f'{path}/index.gmi', 'a') as index:
|
||||||
|
index.write(
|
||||||
|
f"=> {file_date}_{file_title}.gmi {date} - {article['article_title']}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
new_file = False
|
||||||
|
# write the file is it doesn't exist, obviously
|
||||||
|
if not pathlib.Path(f'{path}/{file_date}_{file_title}.gmi').is_file():
|
||||||
|
new_file = True
|
||||||
|
logging.info('%s : adding entry %s', data['title'], file_title)
|
||||||
|
# we write the entry file
|
||||||
|
author = article['author'] if 'author' in article else None
|
||||||
|
|
||||||
|
pathlib.Path(f'{path}/{file_date}_{file_title}.gmi').write_text(
|
||||||
|
f"# {article['article_title']}\n\n=> {article['http_url']}\n\n{date}, {author}\n\n{content}"
|
||||||
|
)
|
||||||
|
url = f"{data['gmi_url']}{data['title']}/{file_date}_{file_title}.gmi"
|
||||||
|
|
||||||
|
return {'new_file': new_file, 'url': url}
|
||||||
|
|
||||||
|
|
||||||
|
def _rebuild_atom_file(path: str, data: dict, urls: list) -> None:
|
||||||
|
"""rebuilds the atom file into gmi folder"""
|
||||||
|
|
||||||
|
atomfeed = FeedGenerator()
|
||||||
|
atomfeed.id(data['gmi_url'])
|
||||||
|
atomfeed.title(data['title'])
|
||||||
|
atomfeed.updated = data['last_update']
|
||||||
|
atomfeed.link(href=f"{data['gmi_url']}.atom.xml", rel='self')
|
||||||
|
atomfeed.link(href=data['gmi_url'], rel='alternate')
|
||||||
|
|
||||||
|
# rebuild all articles
|
||||||
|
for art, article in enumerate(data['articles']):
|
||||||
|
atomentry = atomfeed.add_entry()
|
||||||
|
url = urls[art]
|
||||||
|
atomentry.guid(url)
|
||||||
|
atomentry.link(href=url, rel='alternate')
|
||||||
|
atomentry.updated(article['updated'])
|
||||||
|
atomentry.title(article['article_title'])
|
||||||
|
|
||||||
|
atomfeed.atom_file(f'{path}/atom.xml', pretty=True)
|
||||||
|
logging.info('Wrote Atom feed for %s', data['title'])
|
114
tenkan/processing.py
Normal file
114
tenkan/processing.py
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""processing module : feeds file processing """
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
import feedparser # type: ignore
|
||||||
|
|
||||||
|
from tenkan.feed import Feed
|
||||||
|
from tenkan.feedsfile import read, update_feed
|
||||||
|
from tenkan.files import path_exists, write_files
|
||||||
|
from tenkan.utils import display_feeds_fetch_progress, measure
|
||||||
|
|
||||||
|
|
||||||
|
@measure
|
||||||
|
def fetch_feeds(feeds_file: str, gmi_url: str) -> list:
|
||||||
|
"""Fetch all http feeds with threads"""
|
||||||
|
workers = os.cpu_count() or 1
|
||||||
|
try:
|
||||||
|
fetched_feeds = []
|
||||||
|
with ThreadPoolExecutor(max_workers=workers) as executor:
|
||||||
|
for item, values in read(feeds_file)['feeds'].items():
|
||||||
|
fetched_feeds.append(
|
||||||
|
{
|
||||||
|
'title': item,
|
||||||
|
'fetched_content': executor.submit(
|
||||||
|
feedparser.parse, values['url']
|
||||||
|
),
|
||||||
|
'gmi_url': gmi_url,
|
||||||
|
'last_update': values['last_update'],
|
||||||
|
'fetched_hash_last_update': None,
|
||||||
|
'json_hash_last_update': values['hash_last_update'],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
display_feeds_fetch_progress(fetched_feeds)
|
||||||
|
return fetched_feeds
|
||||||
|
except json.decoder.JSONDecodeError as bad_json:
|
||||||
|
raise bad_json
|
||||||
|
|
||||||
|
|
||||||
|
@measure
|
||||||
|
def prepare_fetched_content(fetched_feeds: list, force: bool = False) -> list:
|
||||||
|
"""Prepare some necessary data to be sent to feed object"""
|
||||||
|
list_to_export = []
|
||||||
|
for ftfd in fetched_feeds:
|
||||||
|
try:
|
||||||
|
# store workers result into fetched_content
|
||||||
|
ftfd['fetched_content'] = ftfd['fetched_content'].result() # type: ignore
|
||||||
|
# we store a sha256 footprint of fetched content,
|
||||||
|
# to compare to last known footprint
|
||||||
|
tmp_hash = hashlib.sha256(
|
||||||
|
str(ftfd['fetched_content'].get('entries')[0]).encode()
|
||||||
|
)
|
||||||
|
if tmp_hash.hexdigest() != ftfd['json_hash_last_update'] or force:
|
||||||
|
ftfd['fetched_hash_last_update'] = tmp_hash.hexdigest()
|
||||||
|
list_to_export.append(ftfd)
|
||||||
|
# sometimes we don't get anything in fetched_content, so just ignore it
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
return list_to_export
|
||||||
|
|
||||||
|
|
||||||
|
@measure
|
||||||
|
def process_fetched_feeds(
|
||||||
|
config: configparser.ConfigParser, fetched_feeds: list, force: bool = False
|
||||||
|
) -> list:
|
||||||
|
"""Process previously fetched feeds"""
|
||||||
|
feed_list = []
|
||||||
|
for ftfd in fetched_feeds:
|
||||||
|
# initialize feed object
|
||||||
|
feed = Feed(
|
||||||
|
input_content=ftfd,
|
||||||
|
filters=config['filters'],
|
||||||
|
formatting=config['formatting'],
|
||||||
|
)
|
||||||
|
# process feeds if there are updates since last run
|
||||||
|
# or if the feed had never been processed
|
||||||
|
# or if --force option is used
|
||||||
|
if (
|
||||||
|
feed.needs_update()
|
||||||
|
or not path_exists(
|
||||||
|
path=config['tenkan']['gemini_path'] + ftfd['title']
|
||||||
|
)
|
||||||
|
or force
|
||||||
|
):
|
||||||
|
logging.info('Processing %s', ftfd['title'])
|
||||||
|
feed.get_new_entries()
|
||||||
|
feed_list.append(feed.export_content())
|
||||||
|
return feed_list
|
||||||
|
|
||||||
|
|
||||||
|
@measure
|
||||||
|
def write_processed_feeds(
|
||||||
|
args, config: configparser.ConfigParser, feed_list: list
|
||||||
|
) -> None:
|
||||||
|
"""Write files from processed feeds into gemini folder"""
|
||||||
|
for files_data in feed_list:
|
||||||
|
write_files(
|
||||||
|
path=config['tenkan']['gemini_path'],
|
||||||
|
data=files_data,
|
||||||
|
max_num_entries=int(
|
||||||
|
config['tenkan'].get('purge_feed_folder_after', '9999')
|
||||||
|
),
|
||||||
|
)
|
||||||
|
update_feed(
|
||||||
|
file=args.feedsfile,
|
||||||
|
feed_name=files_data['title'],
|
||||||
|
hash_last_update=files_data['hash_last_update'],
|
||||||
|
)
|
35
tenkan/utils.py
Normal file
35
tenkan/utils.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""utils module : various utils"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from time import sleep, time
|
||||||
|
|
||||||
|
|
||||||
|
def display_feeds_fetch_progress(fetched_feeds: list) -> None:
|
||||||
|
"""Display feeds being fetched"""
|
||||||
|
qsize = len(fetched_feeds)
|
||||||
|
while True:
|
||||||
|
done = len([x for x in fetched_feeds if x['fetched_content'].done()])
|
||||||
|
print(f'Fetching feeds [{done}/{qsize}]', end='\r', flush=True)
|
||||||
|
sleep(0.3)
|
||||||
|
if done == qsize:
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
def measure(func):
|
||||||
|
"""
|
||||||
|
Decorator to measure time took by a func
|
||||||
|
Used only in debug mode
|
||||||
|
"""
|
||||||
|
|
||||||
|
def wrap_func(*args, **kwargs):
|
||||||
|
time1 = time()
|
||||||
|
result = func(*args, **kwargs)
|
||||||
|
time2 = time()
|
||||||
|
logging.debug(
|
||||||
|
'Function %s executed in %ss', func.__name__, time2 - time1
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
return wrap_func
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
81
tests/cli_test.py
Normal file
81
tests/cli_test.py
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import configparser
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tenkan.cli import load_args, load_config, run
|
||||||
|
from tenkan.feedsfile import add_feed, read
|
||||||
|
|
||||||
|
|
||||||
|
def test_config_loaded():
|
||||||
|
config_file = Path('./tests/data/tenkan.conf')
|
||||||
|
res = load_config(config_file)
|
||||||
|
assert isinstance(res, configparser.ConfigParser)
|
||||||
|
|
||||||
|
|
||||||
|
def test_config_tenkan_section_missing():
|
||||||
|
|
||||||
|
config_file = Path('./tests/data/tenkan.conf_fail')
|
||||||
|
|
||||||
|
with pytest.raises(SystemExit) as pytest_wrapped_e:
|
||||||
|
load_config(config_file)
|
||||||
|
assert pytest_wrapped_e.type == SystemExit
|
||||||
|
assert pytest_wrapped_e.value.code == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_arg_feedsfile_missing():
|
||||||
|
args = load_args(['--feedsfile', '/tmp/toto.json', 'list'])
|
||||||
|
config = Path('./tests/data/tenkan.conf')
|
||||||
|
with pytest.raises(SystemExit) as pytest_wrapped_e:
|
||||||
|
run(args, config)
|
||||||
|
assert pytest_wrapped_e.type == SystemExit
|
||||||
|
assert pytest_wrapped_e.value.code == 1
|
||||||
|
|
||||||
|
|
||||||
|
# def test_stupid_command():
|
||||||
|
# args = load_args(['bla'])
|
||||||
|
# config = Path('./tests/data/tenkan.conf')
|
||||||
|
# with pytest.raises(SystemExit) as pytest_wrapped_e:
|
||||||
|
# load_args(args)
|
||||||
|
# assert pytest_wrapped_e.type == SystemExit
|
||||||
|
# assert pytest_wrapped_e.value.code == 2
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_cmd_feedsfile_missing(tmp_path):
|
||||||
|
feeds = tmp_path / 'toto.json'
|
||||||
|
args = load_args(['--feedsfile', str(feeds), 'add', 'blabla', 'blibli'])
|
||||||
|
config = Path('./tests/data/tenkan.conf')
|
||||||
|
run(args, config)
|
||||||
|
assert Path(f'{feeds}').is_file()
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_bad_feedsfile_folder():
|
||||||
|
args = load_args(
|
||||||
|
['--feedsfile', '/tmp/tmp/tmp/titi.json', 'add', 'blabla', 'blibli']
|
||||||
|
)
|
||||||
|
config = Path('./tests/data/tenkan.conf')
|
||||||
|
with pytest.raises(SystemExit) as pytest_wrapped_e:
|
||||||
|
run(args, config)
|
||||||
|
assert pytest_wrapped_e.type == SystemExit
|
||||||
|
assert pytest_wrapped_e.value.code == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_del_cmd():
|
||||||
|
feeds = Path('./tests/data/feeds.json')
|
||||||
|
args = load_args(['--feedsfile', str(feeds), 'delete', 'tutu'])
|
||||||
|
config = Path('./tests/data/tenkan.conf')
|
||||||
|
add_feed(file=feeds, feed_name='tutu', feed_url='tata')
|
||||||
|
run(args, config)
|
||||||
|
data = read(file=feeds)
|
||||||
|
assert not data['feeds'].get('tutu')
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_cmd():
|
||||||
|
feeds = Path('./tests/data/feeds.json')
|
||||||
|
args = load_args(['--feedsfile', str(feeds), 'update'])
|
||||||
|
config = load_config(str(Path('./tests/data/tenkan.conf')))
|
||||||
|
data1 = read(file=feeds)['last_run']
|
||||||
|
run(args, config)
|
||||||
|
data2 = read(file=feeds)['last_run']
|
||||||
|
assert data1 != data2
|
14
tests/config_test.py
Normal file
14
tests/config_test.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tenkan.config import load_config
|
||||||
|
|
||||||
|
|
||||||
|
def test_configfile_missing():
|
||||||
|
config = Path('/tmp/toto.conf')
|
||||||
|
with pytest.raises(SystemExit) as pytest_wrapped_e:
|
||||||
|
load_config(config)
|
||||||
|
assert pytest_wrapped_e.type == SystemExit
|
||||||
|
assert pytest_wrapped_e.value.code == 1
|
10
tests/data/feeds.json
Normal file
10
tests/data/feeds.json
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
{
|
||||||
|
"last_run": "2022-01-12 21:31:10.703787",
|
||||||
|
"feeds": {
|
||||||
|
"srad-science": {
|
||||||
|
"url": "https://srad.jp/science.rss",
|
||||||
|
"last_update": null,
|
||||||
|
"hash_last_update": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
7
tests/data/feeds.json_fail
Normal file
7
tests/data/feeds.json_fail
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"feeds": {
|
||||||
|
"srad-science": {
|
||||||
|
"url": "https://srad.jp/science.rss",
|
||||||
|
"last_update": null
|
||||||
|
}
|
||||||
|
}
|
15
tests/data/tenkan.conf
Normal file
15
tests/data/tenkan.conf
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
[tenkan]
|
||||||
|
gemini_path = /tmp/
|
||||||
|
gemini_url = gemini://space.fqserv.eu/feeds/
|
||||||
|
|
||||||
|
[filters]
|
||||||
|
# authors we don't want to read
|
||||||
|
authors_blacklist = Rabaudy, Élise Costa, Sagalovitch, Pessin, Gallerey
|
||||||
|
titles_blacklist = Pinned
|
||||||
|
links_blacklist = slate.fr/audio, slate.fr/grand-format, slate.fr/boire-manger/top-chef
|
||||||
|
|
||||||
|
[formatting]
|
||||||
|
title_size = 120
|
||||||
|
# feeds with a truncated content
|
||||||
|
# will be fetched and converted using readability-lxml
|
||||||
|
truncated_feeds = gurumed, slate, cnrs
|
15
tests/data/tenkan.conf_fail
Normal file
15
tests/data/tenkan.conf_fail
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
#[tenkan]
|
||||||
|
#gemini_path = /tmp/hu/
|
||||||
|
#gemini_url = gemini://space.fqserv.eu/feeds/
|
||||||
|
|
||||||
|
[filters]
|
||||||
|
# authors we don't want to read
|
||||||
|
authors_blacklist = Rabaudy, Élise Costa, Sagalovitch, Pessin, Gallerey
|
||||||
|
titles_blacklist = Pinned
|
||||||
|
links_blacklist = slate.fr/audio, slate.fr/grand-format, slate.fr/boire-manger/top-chef
|
||||||
|
|
||||||
|
[formatting]
|
||||||
|
title_size = 120
|
||||||
|
# feeds with a truncated content
|
||||||
|
# will be fetched and converted using readability-lxml
|
||||||
|
truncated_feeds = gurumed, slate, cnrs
|
101
tests/feed_test.py
Normal file
101
tests/feed_test.py
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tenkan.feed import Feed
|
||||||
|
|
||||||
|
data = {
|
||||||
|
'title': 'bla',
|
||||||
|
'url': 'bla',
|
||||||
|
'fetched_content': 'bla',
|
||||||
|
'last_update': None,
|
||||||
|
'gmi_url': 'bla',
|
||||||
|
'json_hash_last_update': 'bl',
|
||||||
|
'fetched_hash_last_update': 'bla',
|
||||||
|
}
|
||||||
|
|
||||||
|
article_data1 = {
|
||||||
|
'title': 'article_title',
|
||||||
|
'article_formatted_title': 'article_formatted_title',
|
||||||
|
'article_content': {'summary': 'article_content'},
|
||||||
|
'article_date': datetime(2022, 1, 7, 15, 25, 0, tzinfo=timezone.utc),
|
||||||
|
'http_url': 'article_link',
|
||||||
|
'updated': 'Fri, 07 Jan 2022 15:25:00 +0000',
|
||||||
|
'updated_parsed': datetime(
|
||||||
|
2022, 1, 7, 15, 25, 0, tzinfo=timezone.utc
|
||||||
|
).timetuple(),
|
||||||
|
}
|
||||||
|
|
||||||
|
article_data2 = {
|
||||||
|
'title': 'article_title',
|
||||||
|
'article_formatted_title': 'article_formatted_title',
|
||||||
|
'article_content': {'summary': 'article_content'},
|
||||||
|
'article_date': 'bad_date',
|
||||||
|
'http_url': 'article_link',
|
||||||
|
'updated_': 'bad_date',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_needs_update_no_last_update():
|
||||||
|
data['json_hash_last_update'] = None
|
||||||
|
feed = Feed(input_content=data)
|
||||||
|
assert feed.needs_update() is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_needs_update_last_update_ne_updated_field():
|
||||||
|
feed = Feed(input_content=data)
|
||||||
|
assert feed.needs_update() is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_need_update():
|
||||||
|
data['json_hash_last_update'] = 'bla'
|
||||||
|
feed = Feed(input_content=data)
|
||||||
|
assert feed.needs_update() is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_content_exported():
|
||||||
|
# TODO : use article_data
|
||||||
|
feed = Feed(input_content=data)
|
||||||
|
|
||||||
|
expected_data = {
|
||||||
|
'title': 'bla',
|
||||||
|
'last_update': None,
|
||||||
|
'gmi_url': 'bla',
|
||||||
|
'articles': [],
|
||||||
|
'hash_last_update': 'bla',
|
||||||
|
}
|
||||||
|
|
||||||
|
assert feed.export_content() == expected_data
|
||||||
|
|
||||||
|
|
||||||
|
def test_date_format_published():
|
||||||
|
data['articles'] = article_data1
|
||||||
|
feed = Feed(input_content=data)
|
||||||
|
assert (
|
||||||
|
feed._get_article_date(article_data1)
|
||||||
|
== data['articles']['article_date']
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bad_date_format():
|
||||||
|
data['articles'] = article_data2
|
||||||
|
feed = Feed(input_content=data)
|
||||||
|
with pytest.raises(SystemExit) as pytest_wrapped_e:
|
||||||
|
feed._get_article_date(article_data2)
|
||||||
|
assert pytest_wrapped_e.type == SystemExit
|
||||||
|
assert pytest_wrapped_e.value.code == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_article_content_formatted():
|
||||||
|
feed = Feed(input_content=data, formatting={'truncated_feeds': 'rien'})
|
||||||
|
res = feed._format_article_content(content='coucou', link='blbl')
|
||||||
|
assert res == 'coucou'
|
||||||
|
|
||||||
|
|
||||||
|
def test_title_formatted():
|
||||||
|
feed = Feed(input_content=data, formatting={'title_size': 10})
|
||||||
|
art = article_data1
|
||||||
|
art['title'] = 'blabla / bla ?'
|
||||||
|
res = feed._format_article_title(article=article_data1)
|
||||||
|
assert res == 'blabla-'
|
43
tests/feedsfile_test.py
Normal file
43
tests/feedsfile_test.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from tenkan.feedsfile import (
|
||||||
|
add_feed,
|
||||||
|
del_feed,
|
||||||
|
get_feed_item,
|
||||||
|
read,
|
||||||
|
update_feed,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_feed_item():
|
||||||
|
feeds = Path('./tests/data/feeds.json')
|
||||||
|
item = get_feed_item(file=feeds, feed_name='srad-science', item='url')
|
||||||
|
assert item == 'https://srad.jp/science.rss'
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_hash():
|
||||||
|
feeds = Path('./tests/data/feeds.json')
|
||||||
|
update_feed(file=feeds, feed_name='srad-science', hash_last_update='blbl')
|
||||||
|
item = get_feed_item(
|
||||||
|
file=feeds, feed_name='srad-science', item='hash_last_update'
|
||||||
|
)
|
||||||
|
assert item == 'blbl'
|
||||||
|
update_feed(file=feeds, feed_name='srad-science', hash_last_update='')
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_feed():
|
||||||
|
feeds = Path('./tests/data/feeds.json')
|
||||||
|
add_feed(file=feeds, feed_name='toto', feed_url='tata')
|
||||||
|
data = read(file=feeds)
|
||||||
|
assert data['feeds'].get('toto')
|
||||||
|
del_feed(file=feeds, feed_name='toto')
|
||||||
|
|
||||||
|
|
||||||
|
def test_del_feed():
|
||||||
|
feeds = Path('./tests/data/feeds.json')
|
||||||
|
add_feed(file=feeds, feed_name='tutu', feed_url='tata')
|
||||||
|
del_feed(file=feeds, feed_name='tutu')
|
||||||
|
data = read(file=feeds)
|
||||||
|
assert not data['feeds'].get('tutu')
|
70
tests/files_test.py
Normal file
70
tests/files_test.py
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from tenkan.files import (
|
||||||
|
_rebuild_atom_file,
|
||||||
|
delete_folder,
|
||||||
|
path_exists,
|
||||||
|
write_article,
|
||||||
|
)
|
||||||
|
|
||||||
|
data: dict = {
|
||||||
|
'title': 'bla',
|
||||||
|
'url': 'bla',
|
||||||
|
'fetched_content': 'bla',
|
||||||
|
'last_update': None,
|
||||||
|
'gmi_url': 'bla',
|
||||||
|
'articles': [],
|
||||||
|
}
|
||||||
|
|
||||||
|
article_data = {
|
||||||
|
'article_title': 'article_title',
|
||||||
|
'article_formatted_title': 'article_formatted_title',
|
||||||
|
'article_content': {'summary': 'article_content'},
|
||||||
|
'article_date': datetime(2022, 1, 7, 15, 25, 0, tzinfo=timezone.utc),
|
||||||
|
'http_url': 'article_link',
|
||||||
|
'updated': 'Fri, 07 Jan 2022 15:25:00 +0000',
|
||||||
|
'updated_parsed': datetime(
|
||||||
|
2022, 1, 7, 15, 25, 0, tzinfo=timezone.utc
|
||||||
|
).timetuple(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_path_exists(tmp_path):
|
||||||
|
d = tmp_path / 'sub'
|
||||||
|
d.mkdir()
|
||||||
|
|
||||||
|
assert path_exists(d) is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_path_doesnt_exist(tmp_path):
|
||||||
|
d = tmp_path / 'sub'
|
||||||
|
|
||||||
|
assert path_exists(d) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_article_written(tmp_path):
|
||||||
|
path = tmp_path / 'sub'
|
||||||
|
path.mkdir()
|
||||||
|
date = article_data['article_date']
|
||||||
|
file_date = date.strftime('%Y-%m-%d_%H-%M-%S')
|
||||||
|
file_title = article_data['article_formatted_title']
|
||||||
|
res = write_article(article=article_data, data=data, path=path)
|
||||||
|
assert res['new_file'] is True
|
||||||
|
assert (
|
||||||
|
res['url']
|
||||||
|
== f"{data['gmi_url']}{data['title']}/{file_date}_{file_title}.gmi"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_folder_deleted(tmp_path):
|
||||||
|
subpath = tmp_path / 'sub2'
|
||||||
|
delete_folder(path=tmp_path, feed_name='sub2')
|
||||||
|
assert not subpath.exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_atomfile_built(tmp_path):
|
||||||
|
data['articles'].append(article_data)
|
||||||
|
_rebuild_atom_file(path=tmp_path, data=data, urls=['bla'])
|
||||||
|
assert Path(f'{tmp_path}/atom.xml').is_file()
|
45
tests/processing_test.py
Normal file
45
tests/processing_test.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from json import JSONDecodeError
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import feedparser
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from tenkan.config import load_config
|
||||||
|
from tenkan.processing import fetch_feeds, process_fetched_feeds
|
||||||
|
|
||||||
|
data = [
|
||||||
|
{
|
||||||
|
'title': 'bla',
|
||||||
|
'url': 'bla',
|
||||||
|
'fetched_content': None,
|
||||||
|
'last_update': None,
|
||||||
|
'gmi_url': 'bla',
|
||||||
|
'json_hash_last_update': 'bli',
|
||||||
|
'fetched_hash_last_update': 'bli',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_feed_fetched():
|
||||||
|
feeds = Path('./tests/data/feeds.json')
|
||||||
|
|
||||||
|
res = fetch_feeds(feeds_file=feeds, gmi_url='blbl')
|
||||||
|
assert type(res) is list
|
||||||
|
assert len(res) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_feed_raise_when_shitty_feedfile():
|
||||||
|
feeds = Path('./tests/data/feeds.json_fail')
|
||||||
|
|
||||||
|
with pytest.raises(JSONDecodeError):
|
||||||
|
fetch_feeds(feeds_file=feeds, gmi_url='blbl')
|
||||||
|
|
||||||
|
|
||||||
|
def test_feed_processed():
|
||||||
|
config_file = Path('./tests/data/tenkan.conf')
|
||||||
|
conf = load_config(config_file)
|
||||||
|
data[0]['fetched_content'] = feedparser.parse(
|
||||||
|
'https://srad.jp/science.rss'
|
||||||
|
)
|
||||||
|
process_fetched_feeds(config=conf, fetched_feeds=data)
|
Loading…
Reference in a new issue