Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
versun committed Apr 29, 2024
1 parent c22484c commit 4afa299
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 49 deletions.
1 change: 1 addition & 0 deletions .github/FUNDING.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
custom: [ "https://afdian.net/a/versun" ]
39 changes: 39 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: Upload Python Package

on:
release:
types: [published]

permissions:
contents: read

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
19 changes: 19 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Copyright (c) 2018 The Python Packaging Authority

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
## Feed2Json

Convert RSS feed to JSON Feed

Installation
-----------
`pip install feed2json`

Usage
-----------
```python
from feed2json import feed2json
# -----------
feed_url = "https://versun.me/feed"
json_feed:dict = feed2json(feed_url)
# -----------
feed_html = '''
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link href="http://example.org/"/>
<updated>2003-12-13T18:30:02Z</updated>
<author>
<name>John Doe</name>
</author>
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
<entry>
<title>Atom-Powered Robots Run Amok</title>
<link href="http://example.org/2003/12/13/atom03"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
</entry>
</feed>
'''
json_feed:dict = feed2json(feed_html)
# -----------
feed_xml_file = 'example_feed.xml'
json_feed:dict = feed2json(feed_xml_file)
```
Empty file added __init__.py
Empty file.
125 changes: 76 additions & 49 deletions feed2json.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,97 @@
import json
import feedparser
import time
from datetime import datetime
# https://feedparser.readthedocs.io/en/latest/reference-feed-author_detail.html
# https://www.jsonfeed.org/version/1.1/

def struct_time_to_rfc3339(struct_time):
try:
dt = datetime.fromtimestamp(time.mktime(struct_time))
rfc3339 = dt.isoformat()
if dt.tzinfo is None:
rfc3339 += "Z"
except Exception:
rfc3339 = None
return rfc3339

def gfnn(feed, *keys): #get first non none
return next((feed.get(key) for key in keys if feed.get(key)), None)

def feed2json(feed_file_path: str = None, feed_url: str = None, feed_string: str = None) -> dict:
if feed_file_path or feed_url or feed_string is None:
feed_args = [feed_file_path, feed_url, feed_string]
if not any(feed_args):
raise ValueError("Must provide one of feed_file_path, feed_url, or feed_string")

feed = None
if feed_file_path:
feed = feedparser.parse(feed_file_path)
elif feed_url:
feed = feedparser.parse(feed_url)
elif feed_string:
feed = feedparser.parse(feed_string)
feed_arg = next((arg for arg in feed_args if arg), None)
feed = feedparser.parse(feed_arg) if feed_arg else None

if not feed.get('feed'):
raise ValueError("No feed found")

if not feed:
raise ValueError("No feed provided")
feed_info = feed.feed

json_feed = {
"version": "https://jsonfeed.org/version/1.1",
"title": feed.feed.title,
"feed_url": feed.feed.id,
"home_page_url": feed.feed.get("link", None)
"version": "https://jsonfeed.org/version/1.1", #string
"title": gfnn(feed_info, 'title', 'subtitle'), #string
"feed_url": gfnn(feed_info, 'link', 'id'), #string
"home_page_url": gfnn(feed_info, 'id', 'link'), #string
"description": gfnn(feed_info, 'subtitle','info'), #string
"icon": gfnn(feed_info, 'icon','logo'), #string
"favicon": gfnn(feed_info, 'logo','icon'), #string
"authors": [ #array of objects
{"name": feed_info.get('author_detail', {}).get('name'), #string
"url": gfnn(feed_info.get('author_detail', {}),'href','email'), #string
"avatar": None, #string
},
],
"language": gfnn(feed_info, 'language'), #string
"expired": None, #boolean
"hub": None, #array of objects
"items": [], #array
}

if hasattr(feed.feed, "subtitle"):
json_feed["description"] = feed.feed.subtitle
if hasattr(feed.feed, "updated"):
json_feed["updated"] = feed.feed.updated

json_feed["items"] = []
for entry in feed.entries:
item = {
"id": entry.id,
"url": entry.link,
"title": entry.title,
}
if hasattr(entry, "summary"):
item["content_html"] = entry.summary
if hasattr(entry, "published"):
item["date_published"] = entry.published
if hasattr(entry, "updated"):
item["date_modified"] = entry.updated
if hasattr(entry, "author"):
authors = entry.author
if not isinstance(authors, list):
authors = [authors]
item["authors"] = [{"name": author} for author in authors]
if hasattr(entry, "content"):
"id": entry.get('id'), #string
"url": entry.get('link'), #string
"external_url": None, #string
"title": entry.get('title'), #string
"content_text": None, #string
"content_html": None, #string
"summary": entry.get('summary'), #string
"image": None, #url string
"banner_image": None, #url string
"date_published": struct_time_to_rfc3339(entry.get('published_parsed')),#string RFC 3339 format: 2010-02-07T14:04:00-05:00.
"date_modified": struct_time_to_rfc3339(entry.get('updated_parsed')),#string RFC 3339 format: 2010-02-07T14:04:00-05:00.
"authors": [entry.get('author'),], #array of objects
"tags": [tag['label'] for tag in entry.get('tags', []) if tag], #array of objects
"language": None, #string
"attachments": [ #array of objects
'''
{"url": string,
'mime_type': string,
'title': strinrg,
'size_in_bytes': int,
'duration_in_seconds': int
'''
],
}

if len(entry.get('content',[])) > 0:
item["content_html"] = ""
for content in entry.content:
if content["type"] == "text/html":
item["content_text"] = ""
for content in entry['content']:
if content["type"] == "text/plain":
item["content_text"] += content["value"]
else:
item["content_html"] += content["value"]
elif content["type"] == "text/plain":
item["content_text"] = content["value"]

if hasattr(entry, "categories"):
item["tags"] = [{"name": category} for category in entry.categories]

if hasattr(entry, "enclosures"):
item["attachments"] = [{"url": enclosure["href"], 'length': enclosure["length"], 'type': enclosure["type"]}
item["attachments"] = [{"url": enclosure["href"],
'size_in_bytes': int(enclosure["length"]),
'mime_type': enclosure["type"]}
for enclosure in entry.enclosures]

if hasattr(entry, "summary"):
item["summary"] = entry.summary

json_feed["items"].append(item)

return json_feed
return json_feed
38 changes: 38 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["./"]

[project]
name = "feed2json"
version = "2024.4.29"
authors = [
{ name="Versun", email="[email protected]" },
]
description = "Convert RSS feed to JSON Feed"
readme = "README.md"
requires-python = ">=3.10"
license = {text = "MIT License"}
classifiers = [
"Environment :: Web Environment",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dependencies = [
"feedparser>=6.0.11",
]

[project.urls]
Homepage = "https://github.com/versun/feed2json"
Repository = "https://github.com/versun/feed2json.git"
Issues = "https://github.com/versun/feed2json/issues"
1 change: 1 addition & 0 deletions requirements.text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
feedparser

0 comments on commit 4afa299

Please sign in to comment.