Skip to content

Commit

Permalink
Merge pull request #2394 from concord-consortium/188192669-update-mig…
Browse files Browse the repository at this point in the history
…ration-scripts

188192669-update-migration-scripts
  • Loading branch information
scytacki committed Sep 4, 2024
2 parents e8b4565 + 725dcbc commit 51f7793
Show file tree
Hide file tree
Showing 5 changed files with 380 additions and 85 deletions.
258 changes: 258 additions & 0 deletions scripts/ai/clean-docs-without-metadata.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,258 @@
#!/usr/bin/node

// This script finds documents without metadata in the realtime database.
// If the deleteTypes array is uncommented, it will delete these documents.

// to run this script type the following in the terminal
// cf. https://stackoverflow.com/a/66626333/16328462
// $ cd scripts/ai
// $ npx tsx clean-docs-without-metadata.ts

import admin from "firebase-admin";

import { getFirebaseBasePath, getScriptRootFilePath, prettyDuration,
remapFirebaseClassPublications, remapFirebaseProblemDocPublications } from "../lib/script-utils.js";

// Load the service account key JSON file.
import { getClassKeys } from "../lib/firebase-classes.js";

// The portal to get documents from. For example, "learn.concord.org".
const portal = "learn.concord.org";
// The demo name to use. Make falsy to not use a demo.
// const demo = "TAGCLUE";
const demo = false;

// Make falsy to include all documents
const documentLimit = false;
// const documentLimit = 10000;

// List of types to delete if they appear safe to do so.
const deleteTypes = [
// "problem",
// "planning",
// "learningLog",
// "publication"
];

// If a problem or planning doc doesn't have metadata and there is an offering for
// the user that also doesn't have type specific metadata for any problem or planning
// docs, perhaps the doc is supposed to go in this "empty slot".
// Or if there is a learningLog that doesn't have metadata and there is no learningLog
// metadata for this user.
// For the problem and planning docs this hasn't been practical to figure out.
// There are usually multiple empty slots.
// For learningLogs there haven't been any cases like this.
// Additionally these docs would not be accessible to the user
// after they were created. So there isn't much point in keeping them around.
const deleteDefaultDocsEvenIfThereIsEmptySlot = true;
// const deleteDefaultDocsEvenIfThereIsEmptySlot = false;

console.log(`*** Starting to Download Documents ***`);

const startTime = Date.now();
let documentsProcessed = 0;
let undefinedDocuments = 0;
let failedDocuments = 0;
let emptyDocuments = 0;

const databaseURL = "https://collaborative-learning-ec215.firebaseio.com";

const firebaseBasePath = getFirebaseBasePath(portal, demo);

const {classKeys, accessTime, fetchTime} = await getClassKeys(firebaseBasePath);

// Fetch the service account key JSON file contents; must be in same folder as script
const serviceAccountFile = getScriptRootFilePath("serviceAccountKey.json");
const credential = admin.credential.cert(serviceAccountFile);
// Initialize the app with a service account, granting admin privileges
admin.initializeApp({
credential,
databaseURL
});

const credentialTime = Date.now();

// CHECKME: what about cross class supports?
// They might be saved as a supportPublication with an offering but then there
// is a metadata file that has extra information about it.

for (const key of Object.keys(classKeys)) {
const getClassValue = async (prop: string) => {
const snapshot = await admin.database().ref(`${firebaseBasePath}/${key}/${prop}`).once("value");
return snapshot.val();
};

if (documentLimit && documentsProcessed >= documentLimit) break;
const users = await getClassValue("users");
const offerings = await getClassValue("offerings");
const fbPersonalPublications = await getClassValue("personalPublications");
const personalPublications = remapFirebaseClassPublications(fbPersonalPublications);
const fbLearningLogPublications = await getClassValue("publications");
const learningLogPublications = remapFirebaseClassPublications(fbLearningLogPublications);

const problemDocPublications = {};
for (const [offeringId, offering] of Object.entries(offerings)) {
const fbProblemDocPublications = (offering as any).publications;
if (!fbProblemDocPublications) continue;
problemDocPublications[offeringId] = remapFirebaseProblemDocPublications(fbProblemDocPublications);
}

for (const [userId, user] of Object.entries<any>(users)) {
if (documentLimit && documentsProcessed >= documentLimit) break;
if (!user.documents) continue;
for (const [docId, doc] of Object.entries<any>(user.documents)) {
if (documentLimit && documentsProcessed >= documentLimit) break;
documentsProcessed++;

const docPath = `${firebaseBasePath}/${key}/users/${userId}/documents/${docId}`;
const content = doc.content as string | undefined;
let parsedContent;
let tiles;
if (!content) {
undefinedDocuments++;
} else {
try {
parsedContent = JSON.parse(content);
tiles = Object.values<any>(parsedContent.tileMap);
if (tiles.length === 0) {
emptyDocuments++;
}
} catch (e) {
failedDocuments++;
}
}

const documentMetadata = user.documentMetadata[docId];

// We only care about docs without metadata
if (documentMetadata) continue;

const deleteDoc = async () => {
if (deleteTypes.includes(doc.type)) {
try {
await admin.database().ref(docPath).remove();
console.log("deleted", docPath);
} catch (e) {
console.log("failed to delete", docPath, e);
}
} else {
console.log("would delete", docPath);
}
};

const personalDocMetadata = user.personalDocs?.[docId];
const learningLogMetadata = user.learningLogs?.[docId];

const hasContent = !!tiles && (tiles.length > 0);
const tools = new Set();
if (hasContent) {
for (const tile of tiles) {
const { type } = tile.content;
if (type === "Placeholder") continue;
tools.add(type);
}
}

console.log(documentsProcessed, "No metadata", `${key}/users/${userId}/documentMetadata/${docId}`,
{type: doc.type, hasContent });

if (tools.size) {
console.log("tools", [...tools]);
}

const typeSpecificMetadata = { offerings: {}} as any;
if (learningLogMetadata) typeSpecificMetadata.learningLogMetadata = learningLogMetadata;
if (personalDocMetadata) typeSpecificMetadata.personalDocMetadata = personalDocMetadata;

// Look for type specific metadata
// We have to search through several places since we don't have an offering id
for (const [offeringId, offering] of Object.entries(offerings)) {
const offeringUser = (offering as any).users?.[userId];
const problemMetadata = offeringUser?.documents?.[docId];
const planningMetadata = offeringUser?.planning?.[docId];
if (problemMetadata || planningMetadata) {
typeSpecificMetadata.offerings[offeringId] = {};
}
if (problemMetadata) {
typeSpecificMetadata.offerings[offeringId].problemMetadata = problemMetadata;
}
if (planningMetadata) {
typeSpecificMetadata.offerings[offeringId].planningMetadata = planningMetadata;
}
}

for (const [offeringId, problemDocPublicationsOffering] of Object.entries(problemDocPublications)) {
const problemDocPublication = problemDocPublicationsOffering?.[docId];
if (problemDocPublication) {
if (typeSpecificMetadata.offerings[offeringId]) {
typeSpecificMetadata.offerings[offeringId] = {};
}
typeSpecificMetadata.offerings[offeringId].problemDocPublication = problemDocPublication;
}
}

const personalPublication = personalPublications?.[docId];
if (personalPublication) {
typeSpecificMetadata.personalPublication = personalPublication;
}
const learningLogPublication = learningLogPublications?.[docId];
if (learningLogPublication) {
typeSpecificMetadata.learningLogPublication = learningLogPublication;
}

if (Object.keys(typeSpecificMetadata).length > 1 || Object.keys(typeSpecificMetadata.offerings).length > 0) {
// So far none of the docs without generic metadata have any typeSpecific Metadata
console.log("typeSpecific", typeSpecificMetadata);

// If there is type specific metadata don't try to delete this document
continue;
}

// If the doc type is a problem or planning check to see if there is one configured for all
// of the offerings of this user. If there isn't one, perhaps this doc should be saved.
//
if (["problem", "planning"].includes(doc.type)) {
const offeringStats = {};
for (const [offeringId, offering] of Object.entries(offerings)) {
const offeringUser = (offering as any).users?.[userId];
if (doc.type === "problem") {
offeringStats[offeringId] = Object.keys(offeringUser?.documents || {}).length;
}
if (doc.type === "planning") {
offeringStats[offeringId] = Object.keys(offeringUser?.planning || {}).length;
}
}
console.log("offeringStats", offeringStats);
if (deleteDefaultDocsEvenIfThereIsEmptySlot || !Object.values(offeringStats).includes(0)) {
await deleteDoc();
}

}
if (doc.type === "learningLog") {
// undefined means there isn't even a learningLogs map in the database
const numLearningLogs = user.learningLogs && Object.keys(user.learningLogs || {}).length;
console.log("num learningLogs", numLearningLogs);
if (deleteDefaultDocsEvenIfThereIsEmptySlot || numLearningLogs) {
await deleteDoc();
}
}
if (doc.type === "publication") {
await deleteDoc();
}
}

}
}

const endTime = Date.now();
console.log(`***** End script *****`);
console.log(`- Time to access token: ${prettyDuration(accessTime - startTime)}`);
console.log(`- Time to fetch documents: ${prettyDuration(fetchTime - startTime)}`);
console.log(`- Time to get credential: ${prettyDuration(credentialTime - startTime)}`);
console.log(`- Total Time: ${prettyDuration(endTime - startTime)}`);
console.log(`Documents downloaded: ${documentsProcessed}`);
console.log(`Undefined documents: ${undefinedDocuments}`);
console.log(`Empty documents: ${emptyDocuments}`);
console.log(`Failed to process: ${failedDocuments}`);

process.exit(0);
43 changes: 3 additions & 40 deletions scripts/ai/download-documents-with-info.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@ import admin from "firebase-admin";
import stringify from "json-stringify-pretty-compact";

import { datasetPath, networkFileName } from "./script-constants.js";
import { getFirebaseBasePath, getScriptRootFilePath, prettyDuration } from "../lib/script-utils.js";
import { getFirebaseBasePath, getScriptRootFilePath, prettyDuration,
remapFirebaseClassPublications, remapFirebaseProblemDocPublications } from "../lib/script-utils.js";

// Load the service account key JSON file.
import { getClassKeys } from "../lib/firebase-classes.js";

// The portal to get documents from. For example, "learn.concord.org".
const portal = "learn.concord.org";
// const portal = "learn.portal.staging.concord.org";
// The demo name to use. Make falsy to not use a demo.
// const demo = "TAGCLUE";
const demo = false;
Expand Down Expand Up @@ -52,45 +54,6 @@ admin.initializeApp({
databaseURL
});

/**
* Firebase publications are stored with different keys than their document
* id for some reason. In some cases the real document id is in self.documentKey
* so we make a map with that documentKey as the key of the map.
*
* @param fbPublications
*/
function remapFirebaseClassPublications(fbPublications: Record<string, any>) {
if (!fbPublications) return undefined;
const publications = {};
for (const [fbId, publication] of Object.entries(fbPublications)) {
if (!publication?.self?.documentKey) {
console.log("Invalid publication found: ", fbId);
continue;
}
publications[publication.self.documentKey] = publication;
}
return publications;
}

/**
* Firebase publications are stored with different keys than their document
* id for some reason. In some cases the real document id is in documentKey
* so we make a map with that documentKey as the key of the map.
* @param fbPublications
*/
function remapFirebaseProblemDocPublications(fbPublications: Record<string, any>) {
if (!fbPublications) return undefined;
const publications = {};
for (const [fbId, publication] of Object.entries(fbPublications)) {
if (!publication?.documentKey) {
console.log("Invalid publication found: ", fbId);
continue;
}
publications[publication.documentKey] = publication;
}
return publications;
}

const credentialTime = Date.now();

// CHECKME: what about cross class supports?
Expand Down
52 changes: 31 additions & 21 deletions scripts/ai/update-class-metadata.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async function processFile() {

const documentSnapshots = await documentCollection.where("id", "==", id).get();

if (documentSnapshots.empty) {
const createClassDoc = async () => {
const metaData = {
context_id,
id,
Expand All @@ -88,28 +88,38 @@ async function processFile() {
await newMetaDataDoc.create(metaData);
console.log("Created new class metadata", metaDataDocId);
metadataCreated++;
} else {
// There can be multiple class metadata documents for each actual class. Note that the name/path for these
// Firestore documents may be "[network name]_[class hash]" and/or simply "[class hash]".
// For now we just update all of these documents.
documentSnapshots.forEach(doc => {
const requiredMatches = [
{ field: "context_id", expected: context_id, actual: doc.data().context_id },
{ field: "id", expected: id, actual: doc.data().id },
{ field: "uri", expected: uri, actual: doc.data().uri }
];

for (const { field, expected, actual } of requiredMatches) {
if (expected !== actual) {
console.error(`Skipping update due to ${field} mismatch. Expected ${expected}, got ${actual}.`);
return;
}
};

// There can be multiple class metadata documents for each actual class. Note that the name/path for these
// Firestore documents may be "[network name]_[class hash]" and/or simply "[class hash]".
// For now we just update all of these documents.

let hasClassDocWithSimpleId = false;
for (const doc of documentSnapshots.docs) {
if (doc.id === context_id) hasClassDocWithSimpleId = true;

const requiredMatches = [
{ field: "context_id", expected: context_id, actual: doc.data().context_id },
{ field: "id", expected: id, actual: doc.data().id },
{ field: "uri", expected: uri, actual: doc.data().uri }
];

let hasMismatch = false;
for (const { field, expected, actual } of requiredMatches) {
if (expected !== actual) {
console.error(`Skipping update of ${doc.id} due to ${field} mismatch. Expected ${expected}, got ${actual}.`);
hasMismatch = true;
}
}
if (hasMismatch) continue;

doc.ref.update({ name, networks, teachers } as any);
console.log(context_id, doc.id, "Updated existing class metadata with", { name, networks, teachers });
metadataUpdated++;
});
await doc.ref.update({ name, networks, teachers } as any);
console.log(context_id, doc.id, "Updated existing class metadata with", { name, networks, teachers });
metadataUpdated++;
}

if (!hasClassDocWithSimpleId) {
await createClassDoc();
}
}
}
Expand Down
Loading

0 comments on commit 51f7793

Please sign in to comment.