Skip to content
This repository has been archived by the owner on Feb 22, 2023. It is now read-only.

Commit

Permalink
Download translations in bulk to prevent GlotPress throttling (#2188)
Browse files Browse the repository at this point in the history
Co-authored-by: Zack Krida <[email protected]>
  • Loading branch information
dhruvkb and zackkrida authored Feb 17, 2023
1 parent 64df99a commit bbb6f28
Show file tree
Hide file tree
Showing 10 changed files with 266 additions and 2,525 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/ghcr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ jobs:
with:
ref: ${{ github.event.release.tag_name }}

- uses: ./.github/actions/setup-node-env

- name: Download translation strings
run: pnpm i18n
env:
GLOTPRESS_USERNAME: ${{ secrets.MAKE_USERNAME }}
GLOTPRESS_PASSWORD: ${{ secrets.MAKE_LOGIN_PASSWORD }}

- uses: docker/setup-buildx-action@v2
with:
install: true
Expand Down
11 changes: 7 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,9 @@ selenium-debug.log
vercel.json
.eslintcache
.nuxt
src/locales/*.json
.nuxt-storybook
storybook-static
.vercel
/src/locales/scripts/valid-locales.json
/src/locales/scripts/untranslated-locales.json
/src/locales/scripts/wp-locales.json
.zshrc
.tcv-export

Expand All @@ -44,3 +40,10 @@ test/Default

# Ignore generated translation files
*.pot

# Ignore downloaded translation files
/src/locales/openverse.zip
/src/locales/*.json
/src/locales/scripts/valid-locales.json
/src/locales/scripts/untranslated-locales.json
/src/locales/scripts/wp-locales.json
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ ARG RELEASE

RUN echo "{\"release\":\"${RELEASE}\"}" > /home/node/app/src/static/version.json

RUN pnpm i18n
RUN pnpm build:only

###################
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@
"@typescript-eslint/parser": "^5.44.0",
"@vue/runtime-dom": "^3.2.37",
"@vue/test-utils": "^1.1.3",
"adm-zip": "^0.5.10",
"autoprefixer": "^10.4.0",
"axios-rate-limit": "^1.3.0",
"babel-jest": "^26.6.3",
Expand All @@ -170,6 +171,7 @@
"postcss": "^8.4.12",
"prettier": "^2.8.3",
"prettier-plugin-tailwindcss": "^0.2.2",
"qs": "^6.11.0",
"rimraf": "^3.0.2",
"tailwind-config-viewer": "^1.6.3",
"tailwindcss": "^3.2.4",
Expand Down
35 changes: 22 additions & 13 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

116 changes: 116 additions & 0 deletions src/locales/scripts/bulk-download.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
const { pipeline } = require("stream/promises")

const { createWriteStream } = require("fs")

const qs = require("qs")
const AdmZip = require("adm-zip")

const { writeLocaleFile } = require("./utils")
const axios = require("./axios")
const jed1xJsonToJson = require("./jed1x-json-to-json")

const LOGIN_URL = "https://login.wordpress.org/wp-login.php"
const BULK_DOWNLOAD_URL =
"https://translate.wordpress.org/exporter/meta/openverse/-do/"

/**
* Given a username and password, login to WordPress and get the authentication
* cookies from the `Set-Cookie` header.
*
* @param log {string} - the username to log in with
* @param pwd {string} - the password for the given username
* @return {Promise<string[]>} - the list of cookies in the `Set-Cookie` header
*/
const getAuthCookies = async (log, pwd) => {
const res = await axios.post(
LOGIN_URL,
qs.stringify({
log,
pwd,
rememberme: "forever",
"wp-submit": "Log In",
redirect_to: "https://make.wordpress.org/",
}),
{
headers: { "content-type": "application/x-www-form-urlencoded" },
maxRedirects: 0,
validateStatus: () => true,
}
)
if (
res.status == 302 &&
res.headers["set-cookie"].join(" ").includes("wporg_logged_in")
) {
return res.headers["set-cookie"].map((cookie) =>
cookie.substring(0, cookie.indexOf(";"))
)
}
throw new Error(`Authentication failed: server returned ${res.status}`)
}

/**
* Fetch the ZIP of translations strings from GlotPress using the authentication
* cookies to access the page.
*
* @param cookies {string[]} - the cookies to authenticate the ZIP download
* @return {Promise<string>}} - the path to the downloaded ZIP file
*/
const fetchBulkJed1x = async (cookies) => {
const res = await axios.get(BULK_DOWNLOAD_URL, {
headers: { cookie: cookies.join(";") },
params: { "export-format": "jed1x" },
responseType: "stream",
})
const destPath = process.cwd() + "/src/locales/openverse.zip"
await pipeline(res.data, createWriteStream(destPath))
return destPath
}

/**
* Extract all JSON file from the given ZIP file. Their names are sanitised to
* be in the format `<locale_code>.json`.
*
* @param zipPath {string} - the path to the ZIP file to extract
* @return {Promise<unknown[]>} - the outcome of writing all ZIP files
*/
const extractZip = async (zipPath) => {
const zip = new AdmZip(zipPath, undefined)
const localeJsonMap = zip
.getEntries()
.filter((entry) => entry.entryName.endsWith(".json"))
.map((entry) => {
const jed1xObj = JSON.parse(zip.readAsText(entry))
const vueI18nObj = jed1xJsonToJson(jed1xObj)
const localeName = entry.name
.replace("meta-openverse-", "")
.replace(".jed.json", "")
return [localeName, vueI18nObj]
})
return await Promise.all(
localeJsonMap.map((args) => writeLocaleFile(...args))
)
}

/**
* Perform a bulk download of translation strings from GlotPress and extrat the
* JSON files from the ZIP archive.
*
* @return {Promise<boolean>} - whether the bulk download succeeded
*/
const bulkDownload = async () => {
console.log("Performing bulk download.")
const username = process.env.GLOTPRESS_USERNAME
const password = process.env.GLOTPRESS_PASSWORD

if (!(username && password)) {
console.log("Auth credentials not found, bulk download cancelled.")
throw new Error("Bulk download cancelled")
}

const cookies = await getAuthCookies(username, password)
const zipPath = await fetchBulkJed1x(cookies)
const translations = await extractZip(zipPath)
console.log(`Successfully saved ${translations.length} translations.`)
}

module.exports = bulkDownload
Loading

0 comments on commit bbb6f28

Please sign in to comment.