From fbfdae62672a560e2dd0ba2969893362815908ac Mon Sep 17 00:00:00 2001 From: Lam Le <79891960+lamle-ea@users.noreply.github.com> Date: Mon, 16 Jan 2023 09:58:45 +0700 Subject: [PATCH] Create cron2023.yml --- .github/workflows/cron2023.yml | 63 ++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 .github/workflows/cron2023.yml diff --git a/.github/workflows/cron2023.yml b/.github/workflows/cron2023.yml new file mode 100644 index 0000000..4ffa2a5 --- /dev/null +++ b/.github/workflows/cron2023.yml @@ -0,0 +1,63 @@ +name: Cron2023 + +on: + schedule: + # Set any time that you'd like scrapers to run (in UTC) + - cron: "37 6 * * *" + workflow_dispatch: + +env: + CI: true + PIPENV_VENV_IN_PROJECT: true + SCRAPY_SETTINGS_MODULE: city_scrapers.settings.prod + WAYBACK_ENABLED: true + AUTOTHROTTLE_MAX_DELAY: 30.0 + AUTOTHROTTLE_START_DELAY: 1.5 + AUTOTHROTTLE_TARGET_CONCURRENCY: 3.0 + # Add secrets for the platform you're using and uncomment here + # AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + # AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + # S3_BUCKET: ${{ secrets.S3_BUCKET }} + AZURE_ACCOUNT_KEY: ${{ secrets.AZURE_ACCOUNT_KEY }} + AZURE_ACCOUNT_NAME: ${{ secrets.AZURE_ACCOUNT_NAME }} + AZURE_CONTAINER: ${{ secrets.AZURE_CONTAINER }} + # GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + # GCS_BUCKET = os.getenv("GCS_BUCKET") + # Setup Sentry, add the DSN to secrets and uncomment here + # SENTRY_DSN: ${{ secrets.SENTRY_DSN }} + +jobs: + crawl: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v1 + with: + python-version: 3.9 + + - name: Install Pipenv + uses: dschep/install-pipenv-action@v1 + + - name: Cache Python dependencies + uses: actions/cache@v1 + with: + path: .venv + key: pip-3.9-${{ hashFiles('**/Pipfile.lock') }} + restore-keys: | + pip-3.9- + pip- + - name: Install dependencies + run: pipenv sync + env: + PIPENV_DEFAULT_PYTHON_VERSION: 3.9 + + - name: Run scrapers + run: | + export PYTHONPATH=$(pwd):$PYTHONPATH + ./.deploy.sh + - name: Combine output feeds + run: | + export PYTHONPATH=$(pwd):$PYTHONPATH + pipenv run scrapy combinefeeds -s LOG_ENABLED=False