From 9afaab9a395bf8e78405bc9a9859dab6d1f51ec5 Mon Sep 17 00:00:00 2001
From: jere344 <86294972+jere344@users.noreply.github.com>
Date: Fri, 4 Aug 2023 01:58:40 +0200
Subject: [PATCH 1/2] Create 69shu.py

I had to overwrite get_source to make the request because I didn't know how to deal with the encoding.
The other Chinese sources works fine from the start tho, I don't know why
---
 sources/zh/69shu.py | 133 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 sources/zh/69shu.py

diff --git a/sources/zh/69shu.py b/sources/zh/69shu.py
new file mode 100644
index 000000000..2afd0a372
--- /dev/null
+++ b/sources/zh/69shu.py
@@ -0,0 +1,133 @@
+# -*- coding: utf-8 -*-
+import logging
+from bs4 import Tag
+from lncrawl.core.crawler import Crawler
+import urllib.parse
+
+import requests
+from bs4 import BeautifulSoup
+
+headers = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0",
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+    "Accept-Language": "en-US,en;q=0.5",
+    "Content-Type": "application/x-www-form-urlencoded",
+    "Origin": "https://www.69shu.com",
+    "DNT": "1",
+    "Alt-Used": "www.69shu.com",
+    "Connection": "keep-alive",
+    "Upgrade-Insecure-Requests": "1",
+    "Sec-Fetch-Dest": "document",
+    "Sec-Fetch-Mode": "navigate",
+    "Sec-Fetch-Site": "same-origin",
+    "Sec-Fetch-User": "?1",
+}
+
+logger = logging.getLogger(__name__)
+search_url = "https://www.69shu.com/modules/article/search.php"
+
+
+class sixnineshu(Crawler):
+    base_url = "https://www.69shu.com/"
+
+    def get_soup(self, url):
+        """overwrite the get_soup function to set the encoding"""
+        data = requests.get(url, headers=headers)
+        data.encoding = "gbk"
+        soup = BeautifulSoup(data.text, "html.parser")
+        return soup
+
+    def search_novel(self, query):
+        query = urllib.parse.quote(query.encode("gbk"))
+        data = requests.post(
+            "https://www.69shu.com/modules/article/search.php",
+            headers=headers,
+            data=f"searchkey={query}&searchtype=all",
+        )
+        data.encoding = "gbk"
+
+        soup = BeautifulSoup(data.text, "html.parser")
+
+        # If only one result is found, we will be redirected to the novel page
+        # We can check the URL to see if we are redirected or not
+
+        redirected = data.url != search_url
+
+        if not redirected:
+            results = []
+            for novel in soup.select("div.newbox ul li"):
+                results.append(
+                    {
+                        "title": novel.select_one("h3 a").text.title(),
+                        "url": novel.select_one("a")["href"],
+                        "info": "Latest: %s" % novel.select_one("div.zxzj p").text,
+                    }
+                )
+
+        else:
+            results = [
+                {
+                    "title": soup.select_one("div.booknav2 h1").text.strip(),
+                    "url": data.url,
+                    "info": "Latest: %s" % soup.select_one("div.qustime ul li").text,
+                }
+            ]
+
+        return results
+
+    def read_novel_info(self):
+        logger.debug("Visiting %s", self.novel_url)
+        soup = self.get_soup(self.novel_url)
+
+        possible_title = soup.select_one("div.booknav2 h1")
+        assert possible_title, "No novel title"
+        self.novel_title = possible_title.text.strip()
+        logger.info("Novel title: %s", self.novel_title)
+
+        possible_image = soup.select_one("div.bookimg2 img")
+        if isinstance(possible_image, Tag):
+            self.novel_cover = self.absolute_url(possible_image["src"])
+        logger.info("Novel cover: %s", self.novel_cover)
+
+        possible_author = soup.select_one(f'.booknav2 p a[href*="authorarticle"]')
+        if isinstance(possible_author, Tag):
+            self.novel_author = possible_author.text.strip()
+        logger.info("Novel Author: %s", self.novel_author)
+
+        possible_synopsis = soup.select_one("div.navtxt p")
+        if isinstance(possible_synopsis, Tag):
+            self.novel_synopsis = possible_synopsis.text.strip()
+        logger.info("Novel Synopsis: %s", self.novel_synopsis)
+
+        # Only one category per novel on this website
+        possible_tag = soup.select_one(f'.booknav2 p a[href*="top"]')
+        if isinstance(possible_tag, Tag):
+            self.novel_tags = [possible_tag.text.strip()]
+        logger.info("Novel Tag: %s", self.novel_tags)
+
+        # https://www.69shu.com/txt/A43616.htm -> https://www.69shu.com/A43616/
+        soup = self.get_soup(self.novel_url.replace("/txt/", "/").replace(".htm", "/"))
+
+        for li in soup.select("div.catalog ul li"):
+            chap_id = int(li["data-num"])
+            vol_id = len(self.chapters) // 100 + 1
+            if len(self.chapters) % 100 == 0:
+                self.volumes.append({"id": vol_id})
+            self.chapters.append(
+                {
+                    "id": chap_id,
+                    "volume": vol_id,
+                    "title": li.text.strip(),
+                    "url": self.absolute_url(li.select_one("a")["href"]),
+                }
+            )
+
+    def download_chapter_body(self, chapter):
+        soup = self.get_soup(chapter["url"])
+
+        contents = soup.select_one("div.txtnav")
+        contents.select_one("h1").decompose()
+        contents.select_one("div.txtinfo").decompose()
+        contents.select_one("div#txtright").decompose()
+
+        return self.cleaner.extract_contents(contents)

From 5ea6287bd5e52ec23832160d22c33f25e5e784ae Mon Sep 17 00:00:00 2001
From: jere344 <86294972+jere344@users.noreply.github.com>
Date: Fri, 4 Aug 2023 02:03:10 +0200
Subject: [PATCH 2/2] Update 69shu.py

Forgot to remove useless f-string
---
 sources/zh/69shu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sources/zh/69shu.py b/sources/zh/69shu.py
index 2afd0a372..e2eaead0d 100644
--- a/sources/zh/69shu.py
+++ b/sources/zh/69shu.py
@@ -89,7 +89,7 @@ def read_novel_info(self):
             self.novel_cover = self.absolute_url(possible_image["src"])
         logger.info("Novel cover: %s", self.novel_cover)
 
-        possible_author = soup.select_one(f'.booknav2 p a[href*="authorarticle"]')
+        possible_author = soup.select_one('.booknav2 p a[href*="authorarticle"]')
         if isinstance(possible_author, Tag):
             self.novel_author = possible_author.text.strip()
         logger.info("Novel Author: %s", self.novel_author)
@@ -100,7 +100,7 @@ def read_novel_info(self):
         logger.info("Novel Synopsis: %s", self.novel_synopsis)
 
         # Only one category per novel on this website
-        possible_tag = soup.select_one(f'.booknav2 p a[href*="top"]')
+        possible_tag = soup.select_one('.booknav2 p a[href*="top"]')
         if isinstance(possible_tag, Tag):
             self.novel_tags = [possible_tag.text.strip()]
         logger.info("Novel Tag: %s", self.novel_tags)