initial commit

2022-01-12 21:31:56 +01:00 · 2022-01-12 21:31:56 +01:00 · 00eca9aa4e
commit 00eca9aa4e
28 changed files with 2926 additions and 0 deletions
--- a/.chglog/config.yml
+++ b/.chglog/config.yml
@ -0,0 +1,27 @@
+style: none
+template: CHANGELOG.tpl.md
+info:
+  title: CHANGELOG
+  repository_url: git.fqserv.eu:takaoni/tenkan.git
+options:
+  commits:
+    # filters:
+    #   Type:
+    #     - feat
+    #     - fix
+    #     - perf
+    #     - refactor
+  commit_groups:
+    # title_maps:
+    #   feat: Features
+    #   fix: Bug Fixes
+    #   perf: Performance Improvements
+    #   refactor: Code Refactoring
+  header:
+    pattern: "^(\\w*)\\:\\s(.*)$"
+    pattern_maps:
+      - Type
+      - Subject
+  notes:
+    keywords:
+      - BREAKING CHANGE
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,140 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+.chglog/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -0,0 +1,55 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.1.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-json
+    -   id: check-added-large-files
+    -   id: double-quote-string-fixer
+    -   id: fix-encoding-pragma
+    -   id: no-commit-to-branch
+    -   id: name-tests-test
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.9.2
+    hooks:
+    -   id: flake8
+-   repo: https://github.com/psf/black
+    rev: 21.12b0
+    hooks:
+      - id: black
+        name: black (python)
+        args: ['-S']
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v0.931
+    hooks:
+        - id: mypy
+          additional_dependencies: [pydantic]  # add if use pydantic
+-   repo: https://github.com/PyCQA/isort
+    rev: 5.10.1
+    hooks:
+    -   id: isort
+        name: isort (python)
+        args: ['--profile', 'black']
+-   repo: https://github.com/PyCQA/bandit
+    rev: 1.7.1
+    hooks:
+    -   id: bandit
+        exclude: ^tests/
+-   repo: https://github.com/asottile/pyupgrade
+    rev: v2.31.0
+    hooks:
+    -   id: pyupgrade
+-   repo: local
+    hooks:
+      - id: pylint
+        name: pylint
+        entry: pylint --disable=E1101,E0401,C0301 --ignore=__init__.py --ignore-patterns=(.)*_test\.py,test_(.)*\.py
+        language: system
+        types: [python]
+      - id: pytest
+        name: Check pytest unit tests pass
+        entry: pytest
+        pass_filenames: false
+        language: system
+        types: [python]
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/13
+++ b/13
@ -0,0 +1,13 @@
+           DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+                   Version 2, December 2004
+
+Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
+
+Everyone is permitted to copy and distribute verbatim or modified
+copies of this license document, and changing it is allowed as long
+as the name is changed.
+
+           DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+  TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. You just DO WHAT THE FUCK YOU WANT TO.
--- a/README.md
+++ b/README.md
@ -0,0 +1,80 @@
+# tenkan
+
+Command line tool to convert HTTP RSS/Atom feeds to gemini format.
+
+## Installation
+```shell script
+pip install tenkan
+```
+
+## Usage
+
+Add a feed
+```shell script
+# Any valid RSS/Atom feed
+tenkan add feedname url
+```
+
+Update content of feed list
+```shell script
+tenkan update
+```
+
+Delete feed
+```shell script
+tenkan delete feedname
+```
+
+List subscripted feeds
+```shell script
+tenkan list
+```
+## Options
+A debug mode is avaible via --debug option.
+If you want to use your configuration or feeds file in another place than default one, you can use --config and --feedsfile options.
+
+
+## Configuration
+tenkan searches for a configuration file at the following location:
+
+`$XDG_CONFIG_HOME/tenkan/tenkan.conf`
+
+### Example config
+This can be found in tenkan.conf.example.
+
+```ini
+[tenkan]
+gemini_path = /usr/local/gemini/
+gemini_url = gemini://foo.bar/feeds/
+# will purge feed folders having more than defined element count
+# purge_feed_folder_after = 100
+
+[filters]
+# authors we don't want to read
+# authors_blacklist = foo, bar
+# blacklist of article titles, if provided, it won't be processed
+# titles_blacklist = foo, bar
+# blacklist of article links, if provided, it won't be processed
+# links_blacklist = foo/bar.com, bar/foo, bla
+
+[formatting]
+# maximum article title size, 120 chars if not provided
+# title_size = 120
+
+# feeds with a truncated content
+# will be fetched and converted using readability
+# truncated_feeds = foo, bar
+```
+
+## Todolist
+- [ ] Add a edit command
+- [ ] Add a --feedname option to update command, to update a single feed
+- [ ] Rewrite configuration checks
+- [ ] Improve tests
+- [ ] Refactor needed parts like write_article
+- [ ] (not sure if relevant) migrate images too, for gemini clients that can handle it
+
+## Development
+I recommend using pre-commit. The pre-commit configuration I use is located in .pre-commit-config.yamlfile.
+
+Run pre-commit command before every pull request and fix the warnings or errors it produces.
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,66 @@
+[tool.poetry]
+name = "tenkan"
+version = "0.1.0"
+description = "RSS/atom feed converter from html to gemini"
+authors = ["Quentin Ferrand <quentin.ferrand@protonmail.com>"]
+
+[tool.poetry.dependencies]
+python = "^3.8"
+DateTime = "^4.3"
+feedparser = "^6.0.8"
+feedgen = "^0.9.0"
+requests = "^2.26.0"
+markdownify = "^0.10.0"
+md2gemini = "^1.8.1"
+readability-lxml = "^0.8.1"
+rich = "^10.16.2"
+prettytable = "^3.0.0"
+
+[tool.poetry.dev-dependencies]
+pytest = "^5.2"
+black = {version = "^21.11b1", allow-prereleases = true}
+flake8 = "^4.0.1"
+mypy = "^0.910"
+isort = "^5.10.1"
+pytest-cov = "^3.0.0"
+pylint = "^2.12.2"
+pyupgrade = "^2.31.0"
+bandit = "^1.7.1"
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.black]
+line-length = 79
+target-version = ['py38']
+include = '\.pyi?$'
+exclude = '''
+
+(
+  /(
+      \.eggs         # exclude a few common directories in the
+    | \.git          # root of the project
+    | \.hg
+    | \.mypy_cache
+    | \.tox
+    | \.venv
+    | _build
+    | buck-out
+    | build
+    | dist
+  )/
+  | foo.py           # also separately exclude a file named foo.py in
+                     # the root of the project
+)
+'''
+
+[tool.isort]
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+use_parentheses = true
+line_length = 79
+
+[tool.poetry.scripts]
+tenkan = "tenkan.cli:main"
--- a/tenkan.conf.example
+++ b/tenkan.conf.example
@ -0,0 +1,21 @@
+[tenkan]
+gemini_path = /usr/local/gemini/
+gemini_url = gemini://foo.bar/feeds/
+# will purge feed folders having more than defined element count
+# purge_feed_folder_after = 100
+
+[filters]
+# authors we don't want to read
+# authors_blacklist = foo, bar
+# blacklist of article titles, if provided, it won't be processed
+# titles_blacklist = foo, bar
+# blacklist of article links, if provided, it won't be processed
+# links_blacklist = foo/bar.com, bar/foo, bla
+
+[formatting]
+# maximum article title size, 120 chars if not provided
+# title_size = 120
+
+# feeds with a truncated content
+# will be fetched and converted using readability
+# truncated_feeds = foo, bar
--- a/tenkan/init.py
+++ b/tenkan/init.py
@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+__version__ = '0.1.0'
--- a/tenkan/cli.py
+++ b/tenkan/cli.py
@ -0,0 +1,214 @@
+# -*- coding: utf-8 -*-
+
+"""
+cli module
+It parses args and runs what's needed if every over modules
+depending of what command is done
+"""
+
+import configparser
+import logging
+import sys
+from argparse import ArgumentParser, RawTextHelpFormatter
+from datetime import datetime
+from pathlib import Path
+from typing import NoReturn
+
+from rich.traceback import install
+
+from tenkan.config import load_config
+from tenkan.feedsfile import (
+    add_feed,
+    create,
+    del_feed,
+    list_feeds,
+    update_last_run,
+)
+from tenkan.files import delete_folder
+from tenkan.processing import (
+    fetch_feeds,
+    prepare_fetched_content,
+    process_fetched_feeds,
+    write_processed_feeds,
+)
+
+# rich tracebacks
+install(show_locals=True)
+
+
+class MyParser(ArgumentParser):  # pylint: disable=too-few-public-methods
+    """Child class to print help msg if no or bad args given"""
+
+    def error(self, message: str) -> NoReturn:
+        """exit"""
+        sys.stderr.write(f'error: {message}')
+        self.print_help()
+        sys.exit(2)
+
+
+def load_args(args: list):
+    """args parsing function"""
+
+    desc = 'tenkan : RSS/atom feed converter from html to gemini\n\nTo show the detailed help of a COMMAND run `ytcc COMMAND --help`.'
+    parser = MyParser(
+        description=desc, prog='tenkan', formatter_class=RawTextHelpFormatter
+    )
+
+    parser.add_argument(
+        '-v',
+        '--version',
+        action='version',
+        version='%(prog)s 0.1.0',
+        help='show %(prog)s version number and exit',
+    )
+
+    parser.add_argument(
+        '--config',
+        default=f'{str(Path.home())}/.config/tenkan/tenkan.conf',
+        help='config file, $HOME/.config/tenkan/tenkan.conf by default',
+        dest='config',
+    )
+    parser.add_argument(
+        '--feedsfile',
+        default=f'{str(Path.home())}/.config/tenkan/feeds.json',
+        help='feeds file containing feed list, $HOME/.config/tenkan/feeds.json by default',
+        dest='feedsfile',
+    )
+
+    parser.add_argument(
+        '--debug', action='store_true', help='debug mode', dest='debug'
+    )
+
+    subparsers = parser.add_subparsers(
+        title='command', required=True, dest='command'
+    )
+
+    parser_add = subparsers.add_parser(
+        'add', help='add a feed to the feeds list'
+    )
+    parser_add.add_argument(
+        'name', help='the name of the feed you want to add'
+    )
+    parser_add.add_argument('url', help='the HTTP url of the feed')
+
+    parser_update = subparsers.add_parser(
+        'update', help='update feeds folder from feed list'
+    )
+    parser_update.add_argument(
+        '--force',
+        action='store_true',
+        default=False,
+        help='update feed list even if there is no new content',
+    )
+
+    parser_list = subparsers.add_parser(
+        'list', help='list all feeds in feeds list'
+    )
+    parser_list.add_argument(
+        'list', help='list all feeds in feeds list', action='store_true'
+    )
+
+    parser_delete = subparsers.add_parser(
+        'delete', help='remove a feed to the feeds list'
+    )
+    parser_delete.add_argument(
+        'name', help='the name of the feed you want to delete'
+    )
+    parser_delete.add_argument(
+        '--delete-gmi-folder',
+        help='delete gmi folder, True by default',
+        action='store_true',
+        default=False,
+        dest='delete_folder',
+    )
+    return parser.parse_args(args)
+
+
+def set_logging(args, config: configparser.ConfigParser) -> None:
+    """define logging settings"""
+    log = logging.getLogger()
+    log.setLevel(logging.INFO)
+    if args.debug:
+        log.setLevel(logging.DEBUG)
+
+    console_formatter = logging.Formatter(fmt='%(message)s')
+    file_formatter = logging.Formatter(
+        fmt='%(asctime)s %(levelname)s: %(message)s'
+    )
+
+    stdout_handler = logging.StreamHandler(stream=sys.stdout)
+    stdout_handler.setFormatter(console_formatter)
+    log.addHandler(stdout_handler)
+
+    if config['tenkan'].get('log_file'):
+        file_handler = logging.FileHandler(
+            filename=config['tenkan'].get('log_file'),
+            encoding='utf-8',
+        )
+        file_handler.setFormatter(file_formatter)
+        log.addHandler(file_handler)
+
+
+def run(args, config: configparser.ConfigParser) -> None:
+    """run stuff depending of command used"""
+    # exit with error if json file not found with actions other than add
+    if not Path(args.feedsfile).exists() and 'add' not in args.command:
+        logging.error('No json file %s, can\'t continue', args.feedsfile)
+        sys.exit(1)
+
+    # list feeds in a pretty format
+    if args.command == 'list':
+        list_feeds(file=args.feedsfile)
+
+    # add a feed to feeds file
+    if args.command == 'add':
+        # if home directory, creates json with empty structure if no file yet
+        if not Path(args.feedsfile).parents[0].exists():
+            if str(Path(args.feedsfile).parents[0]) == str(Path.home()):
+                Path(args.feedsfile).parents[0].mkdir(
+                    parents=True, exist_ok=True
+                )
+            else:
+                logging.error(
+                    'Directory of feeds file %s not found, exiting',
+                )
+                sys.exit(1)
+        if not Path(args.feedsfile).is_file():
+            create(args.feedsfile)
+        add_feed(file=args.feedsfile, feed_name=args.name, feed_url=args.url)
+
+    # delete a feed from feeds file
+    if args.command == 'delete':
+        del_feed(file=args.feedsfile, feed_name=args.name)
+        if args.delete_folder:
+            delete_folder(
+                path=config['tenkan']['gemini_path'], feed_name=args.name
+            )
+
+    # update content
+    if args.command == 'update':
+        fetched_feeds = fetch_feeds(
+            feeds_file=args.feedsfile,
+            gmi_url=config['tenkan']['gemini_url'],
+        )
+        print('')
+        fetched_feeds = prepare_fetched_content(fetched_feeds, args.force)
+        feed_list = process_fetched_feeds(
+            config=config,
+            fetched_feeds=fetched_feeds,
+            force=args.force,
+        )
+        if feed_list:
+            write_processed_feeds(args, config, feed_list)
+        else:
+            logging.info('No new content to process, stopping')
+        update_last_run(args.feedsfile, str(datetime.now()))
+
+
+def main() -> None:
+    """load conf, args, set logging and run main program"""
+
+    args = load_args(args=sys.argv[1:])
+    config = load_config(args.config)
+    set_logging(args, config)
+    run(args, config)
--- a/tenkan/config.py
+++ b/tenkan/config.py
@ -0,0 +1,57 @@
+# -*- coding: utf-8 -*-
+
+"""config module : configuration file parsing"""
+
+import configparser
+import logging
+import sys
+from pathlib import Path
+
+
+def load_config(config_file) -> configparser.ConfigParser:
+    """config load"""
+
+    # exit with error if config file not found
+    if not Path(config_file).exists():
+        logging.error('No config file found %s, exiting', config_file)
+        sys.exit(1)
+
+    parser = configparser.ConfigParser()
+    parser.read(config_file)
+    if 'tenkan' not in parser.sections():
+        logging.critical(
+            "Missing [tenkan] section in config file %s, can't go further",
+            config_file,
+        )
+        sys.exit(1)
+
+    # shitty checks of config content
+    # to improve later...
+    for opt in ['gemini_path', 'gemini_url']:
+        if not parser.has_option('tenkan', opt):
+            logging.error('Missing option %s', opt)
+            sys.exit(1)
+
+    if parser.has_option('tenkan', 'purge_feed_folder_after'):
+        if not int(parser['tenkan']['purge_feed_folder_after']):
+            logging.error(
+                'Wrong type for purge_feed_folder_after option, should be a number'
+            )
+            sys.exit(1)
+
+    if parser.has_section('filters'):
+        for item in parser['filters']:
+            parser['filters'][item] = parser['filters'][item].replace(' ', '')
+
+    if parser.has_option('formatting', 'truncated_feeds'):
+        parser['formatting']['truncated_feeds'] = parser['formatting'][
+            'truncated_feeds'
+        ].replace(' ', '')
+
+    if parser.has_option('formatting', 'title_size') and not int(
+        parser['formatting']['title_size']
+    ):
+        logging.error('Wrong type for title_size option, should be a number')
+        sys.exit(1)
+
+    return parser
--- a/tenkan/feed.py
+++ b/tenkan/feed.py
@ -0,0 +1,196 @@
+# -*- coding: utf-8 -*-
+
+""" feed module : feed object """
+
+import logging
+import re
+import sys
+from datetime import datetime, timezone
+from typing import List
+
+import requests  # type: ignore
+from markdownify import markdownify  # type: ignore
+from md2gemini import md2gemini  # type: ignore
+from readability import Document  # type: ignore
+from requests.adapters import HTTPAdapter  # type: ignore
+from urllib3.util.retry import Retry
+
+from tenkan.utils import measure
+
+
+class Feed:
+    """
+    receives various feed data and applies necessary changes to make it usable into files
+    """
+
+    def __init__(
+        self,
+        input_content: dict,
+        filters=None,
+        formatting=None,
+    ) -> None:
+        self.content = input_content
+        self.filters = filters
+        self.formatting = formatting
+        self.new_entries: list = []
+
+    def needs_update(self) -> bool:
+        """Checks if updates are available"""
+        if not self.content['json_hash_last_update']:
+            return True
+        if (
+            self.content['json_hash_last_update']
+            != self.content['fetched_hash_last_update']
+        ):
+            return True
+        return False
+
+    @measure
+    def get_new_entries(self) -> None:
+        """Selects new entries depending on filters defined on config file"""
+        for entry in self.content['fetched_content']['entries']:
+            if (
+                any(
+                    x in entry['title']
+                    for x in self.filters.get('titles_blacklist', '').split(
+                        ','
+                    )
+                )
+                or any(
+                    x in entry['link']
+                    for x in self.filters.get('links_blacklist', '').split(',')
+                )
+                or any(
+                    # feedparser object can be problematic sometimes
+                    # we need to check if we have an authors item
+                    # AND we check if we can get it's name because it can be empty
+                    # AND if we don't have any of these, we return a stupid string
+                    # to match the str type which is expected
+                    x
+                    in (
+                        entry.get('authors')
+                        and entry.authors[0].get('name')
+                        or 'random string'
+                    )
+                    for x in self.filters.get('authors_blacklist', '').split(
+                        ','
+                    )
+                )
+            ):
+                self.content['fetched_content']['entries'].remove(entry)
+                continue
+            self.new_entries.append(entry)
+
+    @measure
+    def export_content(self) -> dict:
+        """Exports properly formatted content"""
+        # create feed item structure
+        data_export: dict[str, List] = {
+            'title': self.content['title'],
+            'last_update': self.content['last_update'],
+            'gmi_url': self.content['gmi_url'],
+            'articles': [],
+            'hash_last_update': self.content['fetched_hash_last_update'],
+        }
+        for article in self.new_entries:
+            article_formatted_title = self._format_article_title(article)
+            article_date = self._get_article_date(article)
+
+            # 2 possibilities to get content : content['value'] or summary
+            content = (
+                article['content'][0]['value']
+                if article.get('content')
+                else article['summary']
+            )
+
+            article_content = self._format_article_content(
+                content, link=article['link']
+            )
+
+            data_export['articles'].append(
+                {
+                    'article_title': article['title'],
+                    'article_formatted_title': article_formatted_title,
+                    'article_content': article_content,
+                    'article_date': article_date,
+                    'http_url': article['link'],
+                    'updated': article_date,
+                }
+            )
+
+        return data_export
+
+    @classmethod
+    def _get_article_date(cls, article: dict) -> datetime:
+        """get date string and return datetime object"""
+        try:
+            return (
+                datetime(
+                    *article.get(
+                        'published_parsed', article['updated_parsed']
+                    )[:6]
+                )
+                .replace(tzinfo=timezone.utc)
+                .astimezone(tz=None)
+            )
+        except KeyError:
+            logging.error(
+                "Can't find a proper date field in article data, this should not happen !"
+            )
+            sys.exit(1)
+
+    @measure
+    def _format_article_title(self, article: dict) -> str:
+        """title formatting to make it usable as a file title"""
+        # truncate title size depending on title size
+        maxlen = int(self.formatting.get('title_size', 120))
+        if len(self.content['title']) + len(article['title']) > maxlen:
+            maxlen = maxlen - len(self.content['title'])
+
+        # We don't want multiline titles (yes, it happens)
+        article['title'] = article['title'].replace('\n', '')[:maxlen]
+
+        # remove special characters
+        # probably not the best way to do it, as it seems there is performance issues here
+        # to improve later if possible
+        formatted_str = (
+            article['title']
+            .encode('utf8', 'ignore')
+            .decode('utf8', 'ignore')
+            .replace(' ', '-')
+        )
+        return re.sub('[«»!@#$%^&*(){};:,./<>?/|`~=_+]', '', formatted_str)[
+            :maxlen
+        ]
+
+    @measure
+    def _format_article_content(self, content: str, link: str) -> str:
+        """
+        Formats article content from html to gmi
+        Will use readability if the feed is truncated, so it should retrieve the full content
+        """
+
+        # conversion to readability format if asked
+        if self.content['title'] in self.formatting.get(
+            'truncated_feeds', 'アケオメ'
+        ).split(','):
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'
+            }
+
+            req = requests.Session()
+            retries = Retry(
+                total=5,
+                backoff_factor=0.1,
+                status_forcelist=[500, 502, 503, 504],
+            )
+            req.mount('http://', HTTPAdapter(max_retries=retries))
+            req.mount('https://', HTTPAdapter(max_retries=retries))
+            res = req.get(url=link, headers=headers)
+
+            content = Document(res.text).summary()
+
+        # convert html -> md -> gemini
+        article = md2gemini(markdownify(content))
+
+        return article
--- a/tenkan/feedsfile.py
+++ b/tenkan/feedsfile.py
@ -0,0 +1,85 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+""" feedsfile mddule : json feeds file manipulation """
+
+import json
+import logging
+from typing import Dict  # , Optional
+
+from prettytable import PrettyTable
+
+
+def create(file: str) -> None:
+    """file creation"""
+    with open(file, 'x') as _file:
+        data: dict = {'feeds': {}}
+        json.dump(data, _file)
+
+
+def read(file: str) -> Dict[str, Dict[str, Dict[str, str]]]:
+    """read file and return json data"""
+    with open(file, 'r') as _file:
+        file_data = json.load(_file)
+    return file_data
+
+
+def _write(file: str, file_data: Dict[str, Dict[str, Dict[str, str]]]) -> None:
+    """write new data into file"""
+    with open(file, 'w') as file_updated:
+        json.dump(file_data, file_updated, indent=4)
+
+
+def add_feed(file: str, feed_name: str, feed_url: str) -> None:
+    """add a new feed into existing file"""
+    file_data: Dict[str, Dict[str, Dict[str, str]]] = read(file)
+    file_data['feeds'][feed_name] = {
+        'url': feed_url,
+        'last_update': '',
+        'hash_last_update': '',
+    }
+    _write(file, file_data)
+    logging.info('feed %s added', feed_name)
+
+
+def del_feed(file: str, feed_name: str) -> None:
+    """remove feed from file"""
+    file_data = read(file)
+    # don't do anything if no feed found
+    if file_data['feeds'].get(feed_name):
+        del file_data['feeds'][feed_name]
+        _write(file, file_data)
+        logging.info('feed %s deleted', feed_name)
+    else:
+        logging.info('no feed %s found into feeds file', feed_name)
+
+
+def get_feed_item(file: str, feed_name: str, item: str) -> str:
+    """Return element of a defined feed"""
+    file_data = read(file)
+    item = file_data['feeds'][feed_name][item]
+    return item
+
+
+def update_last_run(file: str, date: str) -> None:
+    """Update last_run key in json file"""
+    file_data: dict = read(file)
+    file_data['last_run'] = date
+    _write(file, file_data)
+
+
+def update_feed(file: str, feed_name: str, hash_last_update: str) -> None:
+    """update last update date of a defined feed"""
+    file_data = read(file)
+    file_data['feeds'][feed_name]['hash_last_update'] = hash_last_update
+    _write(file, file_data)
+
+
+def list_feeds(file: str) -> None:
+    """list feed file content"""
+    file_data = read(file)
+    table = PrettyTable()
+    table.field_names = ['Title', 'URL']
+    for item, value in file_data['feeds'].items():
+        table.add_row([item, value['url']])
+    logging.info(table)
--- a/tenkan/files.py
+++ b/tenkan/files.py
@ -0,0 +1,133 @@
+# -*- coding: utf-8 -*-
+
+""" files module : generated gemini feeds files management """
+
+import logging
+import pathlib
+import shutil
+from typing import Dict, Union
+
+from feedgen.feed import FeedGenerator  # type: ignore
+
+
+def path_exists(path: str) -> bool:
+    """Check if feed path exists"""
+    if pathlib.Path(path).is_dir():
+        return True
+    return False
+
+
+def write_files(path: str, data: dict, max_num_entries: int) -> None:
+    """
+    Converts feed objects into files and write them in the feed folder
+    """
+    tpath = path
+    path = path + data['title']
+    pathlib.Path(path).mkdir(exist_ok=True)
+    num_entries = 0
+    # count entries in index file
+    if pathlib.Path(f'{path}/index.gmi').is_file():
+        num_entries = sum(1 for line in open(f'{path}/index.gmi'))
+
+    # if there is more articles than defined in max_num_entries, delete and rewrite
+    if num_entries > max_num_entries:
+        delete_folder(tpath, data['title'])
+
+    index_file_write_header(path, data['title'])
+    urls = []
+    art_output = {}
+    for article in data['articles']:
+        art_output = write_article(article, data, path)
+        urls.append(art_output['url'])
+    index_file_write_footer(path)
+    # no need to update atom file if no new articles (write_article func returns url list)
+    if art_output.get('new_file'):
+        _rebuild_atom_file(path=path, data=data, urls=urls)
+
+
+# def purge_folder(path: str) -> None:
+#     """Purge folder with too many entries"""
+#     logging.info('Purging %s folder', path)
+#     files = [x for x in pathlib.Path(f'{path}').iterdir() if x.is_file()]
+#     for file in files:
+#         pathlib.Path.unlink(file)
+
+
+def delete_folder(path: str, feed_name: str) -> None:
+    """delete a feed folder"""
+    if pathlib.Path(f'{path}{feed_name}/').exists():
+        shutil.rmtree(f'{path}{feed_name}')
+        logging.info('%s/%s folder deleted', path, feed_name)
+    else:
+        logging.info(
+            'folder %s%s not present, nothing to delete', path, feed_name
+        )
+
+
+def index_file_write_header(path: str, title: str) -> None:
+    """Write index header"""
+    with open(f'{path}/index.gmi', 'w') as index:
+        index.write(f'# {title}\n\n')
+        index.write('=> ../ ..\n')
+
+
+def index_file_write_footer(path: str) -> None:
+    """Write index footer"""
+    with open(f'{path}/index.gmi', 'a') as index:
+        index.write('\n=> atom.xml Atom feed\n')
+
+
+def write_article(
+    article: dict, data: dict, path: str
+) -> Dict[str, Union[bool, str]]:
+    """Write individual article"""
+    # prepare data for file format
+    date = article['article_date']
+    file_date = date.strftime('%Y-%m-%d_%H-%M-%S')
+    date = date.strftime('%Y-%m-%d %H:%M:%S')
+    file_title = article['article_formatted_title']
+    content = article['article_content']
+
+    # we add the entry into index file
+    with open(f'{path}/index.gmi', 'a') as index:
+        index.write(
+            f"=> {file_date}_{file_title}.gmi {date} - {article['article_title']}\n"
+        )
+
+    new_file = False
+    # write the file is it doesn't exist, obviously
+    if not pathlib.Path(f'{path}/{file_date}_{file_title}.gmi').is_file():
+        new_file = True
+        logging.info('%s : adding entry %s', data['title'], file_title)
+        # we write the entry file
+        author = article['author'] if 'author' in article else None
+
+        pathlib.Path(f'{path}/{file_date}_{file_title}.gmi').write_text(
+            f"# {article['article_title']}\n\n=> {article['http_url']}\n\n{date}, {author}\n\n{content}"
+        )
+    url = f"{data['gmi_url']}{data['title']}/{file_date}_{file_title}.gmi"
+
+    return {'new_file': new_file, 'url': url}
+
+
+def _rebuild_atom_file(path: str, data: dict, urls: list) -> None:
+    """rebuilds the atom file into gmi folder"""
+
+    atomfeed = FeedGenerator()
+    atomfeed.id(data['gmi_url'])
+    atomfeed.title(data['title'])
+    atomfeed.updated = data['last_update']
+    atomfeed.link(href=f"{data['gmi_url']}.atom.xml", rel='self')
+    atomfeed.link(href=data['gmi_url'], rel='alternate')
+
+    # rebuild all articles
+    for art, article in enumerate(data['articles']):
+        atomentry = atomfeed.add_entry()
+        url = urls[art]
+        atomentry.guid(url)
+        atomentry.link(href=url, rel='alternate')
+        atomentry.updated(article['updated'])
+        atomentry.title(article['article_title'])
+
+    atomfeed.atom_file(f'{path}/atom.xml', pretty=True)
+    logging.info('Wrote Atom feed for %s', data['title'])
--- a/tenkan/processing.py
+++ b/tenkan/processing.py
@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+
+"""processing module : feeds file processing """
+
+import configparser
+import hashlib
+import json
+import logging
+import os
+from concurrent.futures import ThreadPoolExecutor
+
+import feedparser  # type: ignore
+
+from tenkan.feed import Feed
+from tenkan.feedsfile import read, update_feed
+from tenkan.files import path_exists, write_files
+from tenkan.utils import display_feeds_fetch_progress, measure
+
+
+@measure
+def fetch_feeds(feeds_file: str, gmi_url: str) -> list:
+    """Fetch all http feeds with threads"""
+    workers = os.cpu_count() or 1
+    try:
+        fetched_feeds = []
+        with ThreadPoolExecutor(max_workers=workers) as executor:
+            for item, values in read(feeds_file)['feeds'].items():
+                fetched_feeds.append(
+                    {
+                        'title': item,
+                        'fetched_content': executor.submit(
+                            feedparser.parse, values['url']
+                        ),
+                        'gmi_url': gmi_url,
+                        'last_update': values['last_update'],
+                        'fetched_hash_last_update': None,
+                        'json_hash_last_update': values['hash_last_update'],
+                    }
+                )
+            display_feeds_fetch_progress(fetched_feeds)
+        return fetched_feeds
+    except json.decoder.JSONDecodeError as bad_json:
+        raise bad_json
+
+
+@measure
+def prepare_fetched_content(fetched_feeds: list, force: bool = False) -> list:
+    """Prepare some necessary data to be sent to feed object"""
+    list_to_export = []
+    for ftfd in fetched_feeds:
+        try:
+            # store workers result into fetched_content
+            ftfd['fetched_content'] = ftfd['fetched_content'].result()  # type: ignore
+            # we store a sha256 footprint of fetched content,
+            # to compare to last known footprint
+            tmp_hash = hashlib.sha256(
+                str(ftfd['fetched_content'].get('entries')[0]).encode()
+            )
+            if tmp_hash.hexdigest() != ftfd['json_hash_last_update'] or force:
+                ftfd['fetched_hash_last_update'] = tmp_hash.hexdigest()
+                list_to_export.append(ftfd)
+        # sometimes we don't get anything in fetched_content, so just ignore it
+        except IndexError:
+            pass
+    return list_to_export
+
+
+@measure
+def process_fetched_feeds(
+    config: configparser.ConfigParser, fetched_feeds: list, force: bool = False
+) -> list:
+    """Process previously fetched feeds"""
+    feed_list = []
+    for ftfd in fetched_feeds:
+        # initialize feed object
+        feed = Feed(
+            input_content=ftfd,
+            filters=config['filters'],
+            formatting=config['formatting'],
+        )
+        # process feeds if there are updates since last run
+        # or if the feed had never been processed
+        # or if --force option is used
+        if (
+            feed.needs_update()
+            or not path_exists(
+                path=config['tenkan']['gemini_path'] + ftfd['title']
+            )
+            or force
+        ):
+            logging.info('Processing %s', ftfd['title'])
+            feed.get_new_entries()
+            feed_list.append(feed.export_content())
+    return feed_list
+
+
+@measure
+def write_processed_feeds(
+    args, config: configparser.ConfigParser, feed_list: list
+) -> None:
+    """Write files from processed feeds into gemini folder"""
+    for files_data in feed_list:
+        write_files(
+            path=config['tenkan']['gemini_path'],
+            data=files_data,
+            max_num_entries=int(
+                config['tenkan'].get('purge_feed_folder_after', '9999')
+            ),
+        )
+        update_feed(
+            file=args.feedsfile,
+            feed_name=files_data['title'],
+            hash_last_update=files_data['hash_last_update'],
+        )
--- a/tenkan/utils.py
+++ b/tenkan/utils.py
@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+"""utils module : various utils"""
+
+import logging
+from time import sleep, time
+
+
+def display_feeds_fetch_progress(fetched_feeds: list) -> None:
+    """Display feeds being fetched"""
+    qsize = len(fetched_feeds)
+    while True:
+        done = len([x for x in fetched_feeds if x['fetched_content'].done()])
+        print(f'Fetching feeds [{done}/{qsize}]', end='\r', flush=True)
+        sleep(0.3)
+        if done == qsize:
+            break
+
+
+def measure(func):
+    """
+    Decorator to measure time took by a func
+    Used only in debug mode
+    """
+
+    def wrap_func(*args, **kwargs):
+        time1 = time()
+        result = func(*args, **kwargs)
+        time2 = time()
+        logging.debug(
+            'Function %s executed in %ss', func.__name__, time2 - time1
+        )
+        return result
+
+    return wrap_func
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/cli_test.py
+++ b/tests/cli_test.py
@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+import configparser
+from pathlib import Path
+
+import pytest
+
+from tenkan.cli import load_args, load_config, run
+from tenkan.feedsfile import add_feed, read
+
+
+def test_config_loaded():
+    config_file = Path('./tests/data/tenkan.conf')
+    res = load_config(config_file)
+    assert isinstance(res, configparser.ConfigParser)
+
+
+def test_config_tenkan_section_missing():
+
+    config_file = Path('./tests/data/tenkan.conf_fail')
+
+    with pytest.raises(SystemExit) as pytest_wrapped_e:
+        load_config(config_file)
+    assert pytest_wrapped_e.type == SystemExit
+    assert pytest_wrapped_e.value.code == 1
+
+
+def test_arg_feedsfile_missing():
+    args = load_args(['--feedsfile', '/tmp/toto.json', 'list'])
+    config = Path('./tests/data/tenkan.conf')
+    with pytest.raises(SystemExit) as pytest_wrapped_e:
+        run(args, config)
+    assert pytest_wrapped_e.type == SystemExit
+    assert pytest_wrapped_e.value.code == 1
+
+
+# def test_stupid_command():
+#    args = load_args(['bla'])
+#    config = Path('./tests/data/tenkan.conf')
+#    with pytest.raises(SystemExit) as pytest_wrapped_e:
+#        load_args(args)
+#    assert pytest_wrapped_e.type == SystemExit
+#    assert pytest_wrapped_e.value.code == 2
+
+
+def test_add_cmd_feedsfile_missing(tmp_path):
+    feeds = tmp_path / 'toto.json'
+    args = load_args(['--feedsfile', str(feeds), 'add', 'blabla', 'blibli'])
+    config = Path('./tests/data/tenkan.conf')
+    run(args, config)
+    assert Path(f'{feeds}').is_file()
+
+
+def test_add_bad_feedsfile_folder():
+    args = load_args(
+        ['--feedsfile', '/tmp/tmp/tmp/titi.json', 'add', 'blabla', 'blibli']
+    )
+    config = Path('./tests/data/tenkan.conf')
+    with pytest.raises(SystemExit) as pytest_wrapped_e:
+        run(args, config)
+    assert pytest_wrapped_e.type == SystemExit
+    assert pytest_wrapped_e.value.code == 1
+
+
+def test_del_cmd():
+    feeds = Path('./tests/data/feeds.json')
+    args = load_args(['--feedsfile', str(feeds), 'delete', 'tutu'])
+    config = Path('./tests/data/tenkan.conf')
+    add_feed(file=feeds, feed_name='tutu', feed_url='tata')
+    run(args, config)
+    data = read(file=feeds)
+    assert not data['feeds'].get('tutu')
+
+
+def test_update_cmd():
+    feeds = Path('./tests/data/feeds.json')
+    args = load_args(['--feedsfile', str(feeds), 'update'])
+    config = load_config(str(Path('./tests/data/tenkan.conf')))
+    data1 = read(file=feeds)['last_run']
+    run(args, config)
+    data2 = read(file=feeds)['last_run']
+    assert data1 != data2
--- a/tests/config_test.py
+++ b/tests/config_test.py
@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+from pathlib import Path
+
+import pytest
+
+from tenkan.config import load_config
+
+
+def test_configfile_missing():
+    config = Path('/tmp/toto.conf')
+    with pytest.raises(SystemExit) as pytest_wrapped_e:
+        load_config(config)
+    assert pytest_wrapped_e.type == SystemExit
+    assert pytest_wrapped_e.value.code == 1
--- a/tests/data/feeds.json
+++ b/tests/data/feeds.json
@ -0,0 +1,10 @@
+{
+    "last_run": "2022-01-12 21:31:10.703787",
+    "feeds": {
+        "srad-science": {
+            "url": "https://srad.jp/science.rss",
+            "last_update": null,
+            "hash_last_update": ""
+        }
+    }
+}
--- a/tests/data/feeds.json_fail
+++ b/tests/data/feeds.json_fail
@ -0,0 +1,7 @@
+{
+    "feeds": {
+        "srad-science": {
+            "url": "https://srad.jp/science.rss",
+            "last_update": null
+        }
+    }
--- a/tests/data/tenkan.conf
+++ b/tests/data/tenkan.conf
@ -0,0 +1,15 @@
+[tenkan]
+gemini_path = /tmp/
+gemini_url = gemini://space.fqserv.eu/feeds/
+
+[filters]
+# authors we don't want to read
+authors_blacklist = Rabaudy, Élise Costa, Sagalovitch, Pessin, Gallerey
+titles_blacklist = Pinned
+links_blacklist = slate.fr/audio, slate.fr/grand-format, slate.fr/boire-manger/top-chef
+
+[formatting]
+title_size = 120
+# feeds with a truncated content
+# will be fetched and converted using readability-lxml
+truncated_feeds = gurumed, slate, cnrs
--- a/tests/data/tenkan.conf_fail
+++ b/tests/data/tenkan.conf_fail
@ -0,0 +1,15 @@
+#[tenkan]
+#gemini_path = /tmp/hu/
+#gemini_url = gemini://space.fqserv.eu/feeds/
+
+[filters]
+# authors we don't want to read
+authors_blacklist = Rabaudy, Élise Costa, Sagalovitch, Pessin, Gallerey
+titles_blacklist = Pinned
+links_blacklist = slate.fr/audio, slate.fr/grand-format, slate.fr/boire-manger/top-chef
+
+[formatting]
+title_size = 120
+# feeds with a truncated content
+# will be fetched and converted using readability-lxml
+truncated_feeds = gurumed, slate, cnrs
--- a/tests/feed_test.py
+++ b/tests/feed_test.py
@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+from datetime import datetime, timezone
+
+import pytest
+
+from tenkan.feed import Feed
+
+data = {
+    'title': 'bla',
+    'url': 'bla',
+    'fetched_content': 'bla',
+    'last_update': None,
+    'gmi_url': 'bla',
+    'json_hash_last_update': 'bl',
+    'fetched_hash_last_update': 'bla',
+}
+
+article_data1 = {
+    'title': 'article_title',
+    'article_formatted_title': 'article_formatted_title',
+    'article_content': {'summary': 'article_content'},
+    'article_date': datetime(2022, 1, 7, 15, 25, 0, tzinfo=timezone.utc),
+    'http_url': 'article_link',
+    'updated': 'Fri, 07 Jan 2022 15:25:00 +0000',
+    'updated_parsed': datetime(
+        2022, 1, 7, 15, 25, 0, tzinfo=timezone.utc
+    ).timetuple(),
+}
+
+article_data2 = {
+    'title': 'article_title',
+    'article_formatted_title': 'article_formatted_title',
+    'article_content': {'summary': 'article_content'},
+    'article_date': 'bad_date',
+    'http_url': 'article_link',
+    'updated_': 'bad_date',
+}
+
+
+def test_needs_update_no_last_update():
+    data['json_hash_last_update'] = None
+    feed = Feed(input_content=data)
+    assert feed.needs_update() is True
+
+
+def test_needs_update_last_update_ne_updated_field():
+    feed = Feed(input_content=data)
+    assert feed.needs_update() is True
+
+
+def test_no_need_update():
+    data['json_hash_last_update'] = 'bla'
+    feed = Feed(input_content=data)
+    assert feed.needs_update() is False
+
+
+def test_content_exported():
+    # TODO : use article_data
+    feed = Feed(input_content=data)
+
+    expected_data = {
+        'title': 'bla',
+        'last_update': None,
+        'gmi_url': 'bla',
+        'articles': [],
+        'hash_last_update': 'bla',
+    }
+
+    assert feed.export_content() == expected_data
+
+
+def test_date_format_published():
+    data['articles'] = article_data1
+    feed = Feed(input_content=data)
+    assert (
+        feed._get_article_date(article_data1)
+        == data['articles']['article_date']
+    )
+
+
+def test_bad_date_format():
+    data['articles'] = article_data2
+    feed = Feed(input_content=data)
+    with pytest.raises(SystemExit) as pytest_wrapped_e:
+        feed._get_article_date(article_data2)
+    assert pytest_wrapped_e.type == SystemExit
+    assert pytest_wrapped_e.value.code == 1
+
+
+def test_article_content_formatted():
+    feed = Feed(input_content=data, formatting={'truncated_feeds': 'rien'})
+    res = feed._format_article_content(content='coucou', link='blbl')
+    assert res == 'coucou'
+
+
+def test_title_formatted():
+    feed = Feed(input_content=data, formatting={'title_size': 10})
+    art = article_data1
+    art['title'] = 'blabla / bla ?'
+    res = feed._format_article_title(article=article_data1)
+    assert res == 'blabla-'
--- a/tests/feedsfile_test.py
+++ b/tests/feedsfile_test.py
@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+
+from pathlib import Path
+
+from tenkan.feedsfile import (
+    add_feed,
+    del_feed,
+    get_feed_item,
+    read,
+    update_feed,
+)
+
+
+def test_get_feed_item():
+    feeds = Path('./tests/data/feeds.json')
+    item = get_feed_item(file=feeds, feed_name='srad-science', item='url')
+    assert item == 'https://srad.jp/science.rss'
+
+
+def test_update_hash():
+    feeds = Path('./tests/data/feeds.json')
+    update_feed(file=feeds, feed_name='srad-science', hash_last_update='blbl')
+    item = get_feed_item(
+        file=feeds, feed_name='srad-science', item='hash_last_update'
+    )
+    assert item == 'blbl'
+    update_feed(file=feeds, feed_name='srad-science', hash_last_update='')
+
+
+def test_add_feed():
+    feeds = Path('./tests/data/feeds.json')
+    add_feed(file=feeds, feed_name='toto', feed_url='tata')
+    data = read(file=feeds)
+    assert data['feeds'].get('toto')
+    del_feed(file=feeds, feed_name='toto')
+
+
+def test_del_feed():
+    feeds = Path('./tests/data/feeds.json')
+    add_feed(file=feeds, feed_name='tutu', feed_url='tata')
+    del_feed(file=feeds, feed_name='tutu')
+    data = read(file=feeds)
+    assert not data['feeds'].get('tutu')
--- a/tests/files_test.py
+++ b/tests/files_test.py
@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+from datetime import datetime, timezone
+from pathlib import Path
+
+from tenkan.files import (
+    _rebuild_atom_file,
+    delete_folder,
+    path_exists,
+    write_article,
+)
+
+data: dict = {
+    'title': 'bla',
+    'url': 'bla',
+    'fetched_content': 'bla',
+    'last_update': None,
+    'gmi_url': 'bla',
+    'articles': [],
+}
+
+article_data = {
+    'article_title': 'article_title',
+    'article_formatted_title': 'article_formatted_title',
+    'article_content': {'summary': 'article_content'},
+    'article_date': datetime(2022, 1, 7, 15, 25, 0, tzinfo=timezone.utc),
+    'http_url': 'article_link',
+    'updated': 'Fri, 07 Jan 2022 15:25:00 +0000',
+    'updated_parsed': datetime(
+        2022, 1, 7, 15, 25, 0, tzinfo=timezone.utc
+    ).timetuple(),
+}
+
+
+def test_path_exists(tmp_path):
+    d = tmp_path / 'sub'
+    d.mkdir()
+
+    assert path_exists(d) is True
+
+
+def test_path_doesnt_exist(tmp_path):
+    d = tmp_path / 'sub'
+
+    assert path_exists(d) is False
+
+
+def test_article_written(tmp_path):
+    path = tmp_path / 'sub'
+    path.mkdir()
+    date = article_data['article_date']
+    file_date = date.strftime('%Y-%m-%d_%H-%M-%S')
+    file_title = article_data['article_formatted_title']
+    res = write_article(article=article_data, data=data, path=path)
+    assert res['new_file'] is True
+    assert (
+        res['url']
+        == f"{data['gmi_url']}{data['title']}/{file_date}_{file_title}.gmi"
+    )
+
+
+def test_folder_deleted(tmp_path):
+    subpath = tmp_path / 'sub2'
+    delete_folder(path=tmp_path, feed_name='sub2')
+    assert not subpath.exists()
+
+
+def test_atomfile_built(tmp_path):
+    data['articles'].append(article_data)
+    _rebuild_atom_file(path=tmp_path, data=data, urls=['bla'])
+    assert Path(f'{tmp_path}/atom.xml').is_file()
--- a/tests/processing_test.py
+++ b/tests/processing_test.py
@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+from json import JSONDecodeError
+from pathlib import Path
+
+import feedparser
+import pytest
+
+from tenkan.config import load_config
+from tenkan.processing import fetch_feeds, process_fetched_feeds
+
+data = [
+    {
+        'title': 'bla',
+        'url': 'bla',
+        'fetched_content': None,
+        'last_update': None,
+        'gmi_url': 'bla',
+        'json_hash_last_update': 'bli',
+        'fetched_hash_last_update': 'bli',
+    }
+]
+
+
+def test_feed_fetched():
+    feeds = Path('./tests/data/feeds.json')
+
+    res = fetch_feeds(feeds_file=feeds, gmi_url='blbl')
+    assert type(res) is list
+    assert len(res) == 1
+
+
+def test_feed_raise_when_shitty_feedfile():
+    feeds = Path('./tests/data/feeds.json_fail')
+
+    with pytest.raises(JSONDecodeError):
+        fetch_feeds(feeds_file=feeds, gmi_url='blbl')
+
+
+def test_feed_processed():
+    config_file = Path('./tests/data/tenkan.conf')
+    conf = load_config(config_file)
+    data[0]['fetched_content'] = feedparser.parse(
+        'https://srad.jp/science.rss'
+    )
+    process_fetched_feeds(config=conf, fetched_feeds=data)