Skip to content

Commit

Permalink
Merge branch 'stashapp:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
gimmeliina authored Dec 29, 2023
2 parents fea9615 + 391490c commit 312a5b8
Show file tree
Hide file tree
Showing 367 changed files with 16,313 additions and 5,355 deletions.
51 changes: 51 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: Deploy repository to Github Pages

on:
push:
branches: [ master, stable ]

# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
contents: read
pages: write
id-token: write

jobs:
build:
runs-on: ubuntu-22.04
steps:
- name: Checkout main
uses: actions/checkout@v2
with:
path: master
ref: master
fetch-depth: '0'
- run: |
cd master
./build_site.sh ../_site/stable
- name: Checkout Stable
uses: actions/checkout@v2
with:
path: dev
# replace with develop tag/branch when necessary
ref: master
fetch-depth: '0'
- run: |
cd dev
../master/build_site.sh ../_site/develop
- uses: actions/upload-pages-artifact@v2

deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-22.04
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v2

2 changes: 1 addition & 1 deletion .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ jobs:
- uses: actions/checkout@v2
- uses: actions/setup-node@v1
with:
node-version: '12.x'
node-version: '14.x'
- run: cd ./validator && yarn install --frozen-lockfile
- run: node ./validate.js --ci
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ yarn-error.log
# Scraper-generated files
/scrapers/*.ini
**/__pycache__/

/_site
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"yaml.schemas": {
"validator/scraper.schema.json": "*.yml"
},
"python.analysis.typeCheckingMode": "basic"
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# CommunityScrapers
This is a public repository containing scrapers created by the Stash Community.

**:exclamation: Make sure to read ALL of the instructions here before requesting any help in the discord channel. For a more user friendly / step by step guide you can checkout [stashdb's guide to scraping](https://guidelines.stashdb.org/docs/guide-to-scraping/):exclamation:**
**:exclamation: Make sure to read ALL of the instructions here before requesting any help in the discord channel. For a more user friendly / step by step guide you can checkout [stashdb's guide to scraping](https://docs.stashapp.cc/beginner-guides/guide-to-scraping/):exclamation:**

To download the scrapers you can clone the git repo or download directly any of the scrapers.

Expand Down
558 changes: 413 additions & 145 deletions SCRAPERS-LIST.md

Large diffs are not rendered by default.

89 changes: 89 additions & 0 deletions build_site.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash

# builds a repository of scrapers
# outputs to _site with the following structure:
# index.yml
# <scraper_id>.zip
# Each zip file contains the scraper.yml file and any other files in the same directory

outdir="$1"
if [ -z "$outdir" ]; then
outdir="_site"
fi

rm -rf "$outdir"
mkdir -p "$outdir"

buildScraper()
{
f=$1
dir=$(dirname "$f")

# get the scraper id from the filename
scraper_id=$(basename "$f" .yml)
versionFile=$f
if [ "$scraper_id" == "package" ]; then
scraper_id=$(basename "$dir")
fi

if [ "$dir" != "./scrapers" ]; then
versionFile="$dir"
fi

echo "Processing $scraper_id"

# create a directory for the version
version=$(git log -n 1 --pretty=format:%h -- "$versionFile")
updated=$(TZ=UTC0 git log -n 1 --date="format-local:%F %T" --pretty=format:%ad -- "$versionFile")

# create the zip file
# copy other files
zipfile=$(realpath "$outdir/$scraper_id.zip")

name=$(grep "^name:" "$f" | cut -d' ' -f2- | sed -e 's/\r//' -e 's/^"\(.*\)"$/\1/')
ignore=$(grep "^# ignore:" "$f" | cut -c 10- | sed -e 's/\r//')
dep=$(grep "^# requires:" "$f" | cut -c 12- | sed -e 's/\r//')

# always ignore package file
ignore="-x $ignore package"

pushd "$dir" > /dev/null
if [ "$dir" != "./scrapers" ]; then
zip -r "$zipfile" . ${ignore} > /dev/null
else
zip "$zipfile" "$scraper_id.yml" > /dev/null
fi
popd > /dev/null

# write to spec index
echo "- id: $scraper_id
name: $name
version: $version
date: $updated
path: $scraper_id.zip
sha256: $(sha256sum "$zipfile" | cut -d' ' -f1)" >> "$outdir"/index.yml

# handle dependencies
if [ ! -z "$dep" ]; then
echo " requires:" >> "$outdir"/index.yml
for d in ${dep//,/ }; do
echo " - $d" >> "$outdir"/index.yml
done
fi

echo "" >> "$outdir"/index.yml
}

# find all yml files in ./scrapers - these are packages individually
for f in ./scrapers/*.yml; do
buildScraper "$f"
done

find ./scrapers/ -mindepth 2 -name *.yml -print0 | while read -d $'\0' f; do
buildScraper "$f"
done

# handle dependency packages
find ./scrapers/ -mindepth 2 -name package -print0 | while read -d $'\0' f; do
buildScraper "$f"
done
35 changes: 35 additions & 0 deletions scrapers/1passforallsites.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: "1 Pass For All Sites"
sceneByURL:
- action: scrapeXPath
url:
- 1passforallsites.com/episode/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Studio:
Name:
selector: //a[contains(@href,'?site=')]
Title:
selector: //title
postProcess:
- replace:
- regex: (^.+) - 1 .+$
with: $1
Details: //div[@class="sp-info-txt"]/p/text()
Performers:
Name:
selector: //p[@class="sp-info-name"]/a/text()
Tags:
Name:
selector: //p[@class="niches-list"]/a/text()
Date:
selector: //li[contains(text(),"Added:")]
postProcess:
- replace:
- regex: "Added\\: (.+)"
with: $1
- parseDate: 2 Jan 2006
Image: //video/@poster

# Last Updated July 12, 2023
Original file line number Diff line number Diff line change
@@ -1,30 +1,31 @@
# requires: Algolia
name: "21Naturals"
sceneByURL:
- action: script
url:
- 21naturals.com/en/video
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21naturals
sceneByFragment:
action: script
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21naturals
sceneByName:
action: script
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21naturals
- searchName
sceneByQueryFragment:
action: script
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21naturals
- validName
# Last Updated March 23, 2022
Original file line number Diff line number Diff line change
@@ -1,30 +1,31 @@
# requires: Algolia
name: "21Sextreme"
sceneByURL:
- action: script
url:
- 21sextreme.com/en/video
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21sextreme
sceneByFragment:
action: script
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21sextreme
sceneByName:
action: script
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21sextreme
- searchName
sceneByQueryFragment:
action: script
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21sextreme
- validName
# Last Updated March 23, 2022
Original file line number Diff line number Diff line change
@@ -1,30 +1,31 @@
# requires: Algolia
name: "21Sextury"
sceneByURL:
- action: script
url:
- 21sextury.com/en/video
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21sextury
sceneByFragment:
action: script
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21sextury
sceneByName:
action: script
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21sextury
- searchName
sceneByQueryFragment:
action: script
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21sextury
- validName
galleryByURL:
Expand All @@ -33,7 +34,7 @@ galleryByURL:
- 21sextury.com/en/photo/
script:
- python
- Algolia.py
- ../Algolia/Algolia.py
- 21sextury
- gallery
# Last Updated December 22, 2022
19 changes: 16 additions & 3 deletions scrapers/AMAMultimedia.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ sceneByURL:
- holed.com/video/
- lubed.com/video/
- mom4k.com/video/
- momcum.com/video/
- myveryfirsttime.com/video/
- nannyspy.com/video/
- passion-hd.com/video/
Expand Down Expand Up @@ -73,10 +74,22 @@ xPathScrapers:
- regex: ([?&]img[wh]=\d+)+$
with:
Studio:
Name: //div[@id="navigation"]/h5/a/@alt
Name:
selector: //div[@id="navigation"]/h5/a/@alt
postProcess:
- map:
Baeb: BAEB
Casting Couch X: Casting Couch-X
Cum4K: Cum 4K
Exotic4k: Exotic 4K
GirlCum: Girl Cum
MomCum: Mom Cum
NannySpy: Nanny Spy
SpyFam: Spy Fam
Tiny4K: Tiny 4K
WetVR: Wet VR
Date:
selector: //div[contains(text(), 'RELEASED')]/span/text()
postProcess:
- parseDate: January 02, 2006

# Last Updated March 26, 2022
# Last Updated February 06, 2023
37 changes: 37 additions & 0 deletions scrapers/APOVStory.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: A POV Story

sceneByURL:
- action: scrapeXPath
url:
- apovstory.com/trailers/
scraper: sceneScraper
xPathScrapers:
sceneScraper:
scene:
Studio:
Name:
fixed: "A POV Story"
Title:
selector: //div[@class = 'trailerArea centerwrap']/h3
Details:
selector: //div[@class = 'trailerContent']//*//text()
concat: "\n\n"
postProcess:
- replace:
- regex: ^Description:\s*
with:
Tags:
Name: //li/span[contains(text(),'CATEGORIES')]/parent::li//a//text()
Performers:
Name: //li/span[contains(text(),'FEATURING')]/parent::li//a//text()
Image:
selector: //div[@class="player-thumb"]/img/@src0_3x
postProcess:
- replace:
- regex: ^
with: "https://apovstory.com"
Date:
selector: //span[contains(text(),'RELEASE DATE')]/parent::li/text()
postProcess:
- parseDate: "January 2, 2006"
# Last Updated August 24, 2023
Loading

0 comments on commit 312a5b8

Please sign in to comment.