Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
versun committed Apr 24, 2024
0 parents commit c22484c
Show file tree
Hide file tree
Showing 2 changed files with 233 additions and 0 deletions.
163 changes: 163 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Project
/static
data/
*.ipynb

# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python
env*
*.csv
.idea/
.DS_Store
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coveage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
pytestdebug.log

# Translations
*.mo
*.pot

# Django stuff:
#*.log
local_settings.py
*.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/
doc/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
env/
venv/
ENV/
env.bak/
venv.bak/
pythonenv*

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# profiling data
.prof

# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
.Python
[Bb]in
[Ii]nclude
[Ll]ib
[Ll]ib64
[Ll]ocal
[Ss]cripts
pyvenv.cfg
.venv
pip-selfcheck.json


# End of https://www.toptal.com/developers/gitignore/api/python
70 changes: 70 additions & 0 deletions feed2json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import json
import feedparser


def feed2json(feed_file_path: str = None, feed_url: str = None, feed_string: str = None) -> dict:
if feed_file_path or feed_url or feed_string is None:
raise ValueError("Must provide one of feed_file_path, feed_url, or feed_string")

feed = None
if feed_file_path:
feed = feedparser.parse(feed_file_path)
elif feed_url:
feed = feedparser.parse(feed_url)
elif feed_string:
feed = feedparser.parse(feed_string)

if not feed:
raise ValueError("No feed provided")

json_feed = {
"version": "https://jsonfeed.org/version/1.1",
"title": feed.feed.title,
"feed_url": feed.feed.id,
"home_page_url": feed.feed.get("link", None)
}

if hasattr(feed.feed, "subtitle"):
json_feed["description"] = feed.feed.subtitle
if hasattr(feed.feed, "updated"):
json_feed["updated"] = feed.feed.updated

json_feed["items"] = []
for entry in feed.entries:
item = {
"id": entry.id,
"url": entry.link,
"title": entry.title,
}
if hasattr(entry, "summary"):
item["content_html"] = entry.summary
if hasattr(entry, "published"):
item["date_published"] = entry.published
if hasattr(entry, "updated"):
item["date_modified"] = entry.updated
if hasattr(entry, "author"):
authors = entry.author
if not isinstance(authors, list):
authors = [authors]
item["authors"] = [{"name": author} for author in authors]
if hasattr(entry, "content"):
item["content_html"] = ""
for content in entry.content:
if content["type"] == "text/html":
item["content_html"] += content["value"]
elif content["type"] == "text/plain":
item["content_text"] = content["value"]

if hasattr(entry, "categories"):
item["tags"] = [{"name": category} for category in entry.categories]

if hasattr(entry, "enclosures"):
item["attachments"] = [{"url": enclosure["href"], 'length': enclosure["length"], 'type': enclosure["type"]}
for enclosure in entry.enclosures]

if hasattr(entry, "summary"):
item["summary"] = entry.summary

json_feed["items"].append(item)

return json_feed

0 comments on commit c22484c

Please sign in to comment.