init

versun · Apr 29, 2024 · 4afa299 · 4afa299
1 parent c22484c
commit 4afa299
Show file tree

Hide file tree

Showing 8 changed files with 217 additions and 49 deletions.
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
@@ -0,0 +1 @@
+custom: [ "https://afdian.net/a/versun" ]
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -0,0 +1,39 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: Upload Python Package
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.10'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Build package
+      run: python -m build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2018 The Python Packaging Authority
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,43 @@
+## Feed2Json
+
+Convert RSS feed to JSON Feed
+
+Installation
+-----------
+`pip install feed2json`
+
+Usage
+-----------
+```python
+from feed2json import feed2json
+# ----------- 
+feed_url = "https://versun.me/feed"
+json_feed:dict = feed2json(feed_url)
+# ----------- 
+feed_html = '''
+   <?xml version="1.0" encoding="utf-8"?>
+   <feed xmlns="http://www.w3.org/2005/Atom">
+
+     <title>Example Feed</title>
+     <link href="http://example.org/"/>
+     <updated>2003-12-13T18:30:02Z</updated>
+     <author>
+       <name>John Doe</name>
+     </author>
+     <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
+
+     <entry>
+       <title>Atom-Powered Robots Run Amok</title>
+       <link href="http://example.org/2003/12/13/atom03"/>
+       <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+       <updated>2003-12-13T18:30:02Z</updated>
+       <summary>Some text.</summary>
+     </entry>
+
+   </feed>
+'''
+json_feed:dict = feed2json(feed_html)
+# ----------- 
+feed_xml_file = 'example_feed.xml'
+json_feed:dict = feed2json(feed_xml_file)
+```
diff --git a/__init__.py b/__init__.py
diff --git a/feed2json.py b/feed2json.py
@@ -1,70 +1,97 @@
-import json
 import feedparser
+import time
+from datetime import datetime
+# https://feedparser.readthedocs.io/en/latest/reference-feed-author_detail.html
+# https://www.jsonfeed.org/version/1.1/
 
+def struct_time_to_rfc3339(struct_time):
+    try:
+        dt = datetime.fromtimestamp(time.mktime(struct_time)) 
+        rfc3339 = dt.isoformat()
+        if dt.tzinfo is None:
+            rfc3339 += "Z"
+    except Exception:
+        rfc3339 = None
+    return rfc3339
+
+def gfnn(feed, *keys): #get first non none
+    return next((feed.get(key) for key in keys if feed.get(key)), None)
 
 def feed2json(feed_file_path: str = None, feed_url: str = None, feed_string: str = None) -> dict:
-    if feed_file_path or feed_url or feed_string is None:
+    feed_args = [feed_file_path, feed_url, feed_string]
+    if not any(feed_args):
         raise ValueError("Must provide one of feed_file_path, feed_url, or feed_string")
 
-    feed = None
-    if feed_file_path:
-        feed = feedparser.parse(feed_file_path)
-    elif feed_url:
-        feed = feedparser.parse(feed_url)
-    elif feed_string:
-        feed = feedparser.parse(feed_string)
+    feed_arg = next((arg for arg in feed_args if arg), None)
+    feed = feedparser.parse(feed_arg) if feed_arg else None
+
+    if not feed.get('feed'):
+        raise ValueError("No feed found")
 
-    if not feed:
-        raise ValueError("No feed provided")
+    feed_info = feed.feed
 
     json_feed = {
-        "version": "https://jsonfeed.org/version/1.1",
-        "title": feed.feed.title,
-        "feed_url": feed.feed.id,
-        "home_page_url": feed.feed.get("link", None)
+        "version": "https://jsonfeed.org/version/1.1", #string
+        "title": gfnn(feed_info, 'title', 'subtitle'), #string
+        "feed_url": gfnn(feed_info, 'link', 'id'), #string
+        "home_page_url": gfnn(feed_info, 'id', 'link'), #string
+        "description": gfnn(feed_info, 'subtitle','info'), #string
+        "icon": gfnn(feed_info, 'icon','logo'), #string
+        "favicon": gfnn(feed_info, 'logo','icon'), #string
+        "authors": [ #array of objects
+                    {"name": feed_info.get('author_detail', {}).get('name'), #string
+                     "url": gfnn(feed_info.get('author_detail', {}),'href','email'), #string
+                     "avatar": None, #string
+                    },
+                   ],
+        "language": gfnn(feed_info, 'language'), #string
+        "expired": None, #boolean
+        "hub": None, #array of objects
+        "items": [], #array
     }
 
-    if hasattr(feed.feed, "subtitle"):
-        json_feed["description"] = feed.feed.subtitle
-    if hasattr(feed.feed, "updated"):
-        json_feed["updated"] = feed.feed.updated
-
-    json_feed["items"] = []
     for entry in feed.entries:
         item = {
-            "id": entry.id,
-            "url": entry.link,
-            "title": entry.title,
-        }
-        if hasattr(entry, "summary"):
-            item["content_html"] = entry.summary
-        if hasattr(entry, "published"):
-            item["date_published"] = entry.published
-        if hasattr(entry, "updated"):
-            item["date_modified"] = entry.updated
-        if hasattr(entry, "author"):
-            authors = entry.author
-            if not isinstance(authors, list):
-                authors = [authors]
-            item["authors"] = [{"name": author} for author in authors]
-        if hasattr(entry, "content"):
+            "id": entry.get('id'), #string
+            "url": entry.get('link'), #string
+            "external_url": None, #string
+            "title": entry.get('title'), #string
+            "content_text": None, #string
+            "content_html": None, #string
+            "summary": entry.get('summary'), #string
+            "image": None, #url string
+            "banner_image": None, #url string
+            "date_published": struct_time_to_rfc3339(entry.get('published_parsed')),#string RFC 3339 format: 2010-02-07T14:04:00-05:00.
+            "date_modified": struct_time_to_rfc3339(entry.get('updated_parsed')),#string RFC 3339 format: 2010-02-07T14:04:00-05:00.
+            "authors": [entry.get('author'),], #array of objects
+            "tags": [tag['label'] for tag in entry.get('tags', []) if tag], #array of objects
+            "language": None, #string
+            "attachments": [ #array of objects
+                            '''
+                            {"url": string, 
+                            'mime_type': string, 
+                            'title': strinrg,
+                            'size_in_bytes': int,
+                            'duration_in_seconds': int
+                            '''
+                           ],
+            }
+
+        if len(entry.get('content',[])) > 0:
             item["content_html"] = ""
-            for content in entry.content:
-                if content["type"] == "text/html":
+            item["content_text"] = ""
+            for content in entry['content']:
+                if content["type"] == "text/plain":
+                    item["content_text"] += content["value"]
+                else:
                     item["content_html"] += content["value"]
-                elif content["type"] == "text/plain":
-                    item["content_text"] = content["value"]
-
-        if hasattr(entry, "categories"):
-            item["tags"] = [{"name": category} for category in entry.categories]
 
         if hasattr(entry, "enclosures"):
-            item["attachments"] = [{"url": enclosure["href"], 'length': enclosure["length"], 'type': enclosure["type"]}
+            item["attachments"] = [{"url": enclosure["href"], 
+                                    'size_in_bytes': int(enclosure["length"]), 
+                                    'mime_type': enclosure["type"]}
                                    for enclosure in entry.enclosures]
 
-        if hasattr(entry, "summary"):
-            item["summary"] = entry.summary
-
         json_feed["items"].append(item)
 
-    return json_feed
+    return json_feed
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,38 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["./"]
+
+[project]
+name = "feed2json"
+version = "2024.4.29"
+authors = [
+  { name="Versun", email="[email protected]" },
+]
+description = "Convert RSS feed to JSON Feed"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "MIT License"}
+classifiers = [
+    "Environment :: Web Environment",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "feedparser>=6.0.11",
+]
+
+[project.urls]
+Homepage = "https://github.com/versun/feed2json"
+Repository = "https://github.com/versun/feed2json.git"
+Issues = "https://github.com/versun/feed2json/issues"
diff --git a/requirements.text b/requirements.text
@@ -0,0 +1 @@
+feedparser