Skip to content

Commit

Permalink
Merge pull request #588 from the-draupnir-project/gnuxie/config-edito…
Browse files Browse the repository at this point in the history
…r safe mode recovery

Safe Mode account data config recovery.
  • Loading branch information
Gnuxie authored Oct 2, 2024
2 parents 56cdc48 + d817810 commit 890f004
Show file tree
Hide file tree
Showing 23 changed files with 674 additions and 101 deletions.
17 changes: 17 additions & 0 deletions config/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,23 @@ protections:
roomStateBackingStore:
enabled: false

# Safe mode provides recovery options for some failure modes when Draupnir
# fails to start. For example, if the bot fails to resolve a room alias in
# a watched list, or if the server has parted from a protected room and can't
# find a way back in. Safe mode will provide different options to recover from
# these. Such as unprotecting the room or unwatching the policy list.
# By default Draupnir will boot into safe mode only when the failure mode
# is recoverable.
# It may be desirable to prevent the bot from starting into safe mode if you have
# a pager system when Draupnir is down, as Draupnir could prevent your monitoring
# system from identifying a failure to start.
#safeMode:
# # The option for entering safe mode when Draupnir fails to start up.
# # - "RecoveryOnly" will only start the bot in safe mode when there are recovery options available. This is the default.
# # - "Never" will never start the bot in safe mode when Draupnir fails to start normally.
# # - "Always" will always start the bot in safe mode when Draupnir fails to start normally.
# bootOption: RecoveryOnly

# Options for advanced monitoring of the health of the bot.
health:
# healthz options. These options are best for use in container environments
Expand Down
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
"@sentry/node": "^7.17.2",
"@sentry/tracing": "^7.17.2",
"@sinclair/typebox": "0.32.34",
"@the-draupnir-project/interface-manager": "2.4.0",
"@the-draupnir-project/interface-manager": "2.4.1",
"@the-draupnir-project/matrix-basic-types": "^0.2.0",
"await-lock": "^2.2.2",
"better-sqlite3": "^9.4.3",
Expand All @@ -69,8 +69,8 @@
"js-yaml": "^4.1.0",
"jsdom": "^24.0.0",
"matrix-appservice-bridge": "^9.0.1",
"matrix-protection-suite": "npm:@gnuxie/matrix-protection-suite@1.4.0",
"matrix-protection-suite-for-matrix-bot-sdk": "npm:@gnuxie/matrix-protection-suite-for-matrix-bot-sdk@1.4.0",
"matrix-protection-suite": "npm:@gnuxie/matrix-protection-suite@1.5.1",
"matrix-protection-suite-for-matrix-bot-sdk": "npm:@gnuxie/matrix-protection-suite-for-matrix-bot-sdk@1.5.1",
"parse-duration": "^1.0.2",
"pg": "^8.8.0",
"shell-quote": "^1.7.3",
Expand Down
12 changes: 10 additions & 2 deletions src/Draupnir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,11 +164,19 @@ export class Draupnir implements Client, MatrixAdaptorContext {
}
this.reactionHandler.on(
ARGUMENT_PROMPT_LISTENER,
makeListenerForArgumentPrompt(this.commandDispatcher)
makeListenerForArgumentPrompt(
this.commandRoomID,
this.commandDispatcher,
this.reactionHandler
)
);
this.reactionHandler.on(
DEFAUILT_ARGUMENT_PROMPT_LISTENER,
makeListenerForPromptDefault(this.commandDispatcher)
makeListenerForPromptDefault(
this.commandRoomID,
this.commandDispatcher,
this.reactionHandler
)
);
this.capabilityMessageRenderer = new DraupnirRendererMessageCollector(
this.clientPlatform.toRoomMessageSender(),
Expand Down
71 changes: 53 additions & 18 deletions src/DraupnirBotMode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
Logger,
ActionException,
ActionExceptionKind,
ConfigRecoverableError,
} from "matrix-protection-suite";
import {
BotSDKLogServiceLogger,
Expand All @@ -39,14 +40,16 @@ import {
StringUserID,
MatrixRoomID,
} from "@the-draupnir-project/matrix-basic-types";
import { Result, isError } from "@gnuxie/typescript-result";
import { Err, Ok, Result, isError } from "@gnuxie/typescript-result";
import {
DraupnirRestartError,
SafeModeToggle,
SafeModeToggleOptions,
} from "./safemode/SafeModeToggle";
import { SafeModeDraupnir } from "./safemode/DraupnirSafeMode";
import { ResultError } from "@gnuxie/typescript-result";
import { SafeModeCause, SafeModeReason } from "./safemode/SafeModeCause";
import { SafeModeBootOption } from "./safemode/BootOption";

setGlobalLoggerProvider(new BotSDKLogServiceLogger());

Expand All @@ -70,6 +73,10 @@ interface BotModeTogle extends SafeModeToggle {
startFromScratch(
options?: SafeModeToggleOptions
): Promise<Result<Draupnir | SafeModeDraupnir>>;
maybeRecoverWithSafeMode(
error: ResultError,
options?: SafeModeToggleOptions
): Promise<Result<SafeModeDraupnir>>;
}

export class DraupnirBotModeToggle implements BotModeTogle {
Expand Down Expand Up @@ -168,20 +175,32 @@ export class DraupnirBotModeToggle implements BotModeTogle {
}
public async switchToDraupnir(
options?: SafeModeToggleOptions
): Promise<Result<Draupnir>> {
): Promise<Result<Draupnir, DraupnirRestartError | ResultError>> {
if (this.draupnir !== null) {
return ResultError.Result(
`There is a draupnir for ${this.clientUserID} already running`
);
}
this.stopSafeModeDraupnir();
const draupnirResult = await this.draupnirFactory.makeDraupnir(
this.clientUserID,
this.managementRoom,
this.config,
this
);
if (isError(draupnirResult)) {
return draupnirResult;
const safeModeResult = await this.maybeRecoverWithSafeMode(
draupnirResult.error,
options
);
if (isError(safeModeResult)) {
return safeModeResult;
} else {
return DraupnirRestartError.Result(
"Draupnir failed to start, switching to safe mode.",
{ safeModeDraupnir: safeModeResult.ok }
);
}
}
this.draupnir = draupnirResult.ok;
this.draupnir.start();
Expand All @@ -192,7 +211,7 @@ export class DraupnirBotModeToggle implements BotModeTogle {
await this.webAPIs.start();
} catch (e) {
if (e instanceof Error) {
this.stopDraupnir();
await this.stopDraupnir();
log.error("Failed to start webAPIs", e);
return ActionException.Result("Failed to start webAPIs", {
exceptionKind: ActionExceptionKind.Unknown,
Expand All @@ -203,7 +222,6 @@ export class DraupnirBotModeToggle implements BotModeTogle {
}
}
}
this.stopSafeModeDraupnir();
return draupnirResult;
}
public async switchToSafeMode(
Expand All @@ -225,7 +243,7 @@ export class DraupnirBotModeToggle implements BotModeTogle {
if (isError(safeModeResult)) {
return safeModeResult;
}
this.stopDraupnir();
await this.stopDraupnir();
this.safeModeDraupnir = safeModeResult.ok;
this.safeModeDraupnir.start();
if (options?.sendStatusOnStart) {
Expand All @@ -239,23 +257,40 @@ export class DraupnirBotModeToggle implements BotModeTogle {
): Promise<Result<Draupnir | SafeModeDraupnir>> {
const draupnirResult = await this.switchToDraupnir(options ?? {});
if (isError(draupnirResult)) {
if (this.config.safeMode?.bootIntoOnStartupFailure) {
if (draupnirResult.error instanceof DraupnirRestartError) {
return Ok(draupnirResult.error.safeModeDraupnir);
} else {
return draupnirResult;
}
}
return draupnirResult;
}

public async maybeRecoverWithSafeMode(
error: ResultError,
options?: SafeModeToggleOptions | undefined
): Promise<Result<SafeModeDraupnir>> {
switch (this.config.safeMode?.bootOption) {
case SafeModeBootOption.Never:
return Err(error);
case SafeModeBootOption.RecoveryOnly:
if (!(error instanceof ConfigRecoverableError)) {
return Err(error);
}
// fallthrough
default:
log.error(
"Failed to start draupnir, switching to safe mode as configured",
draupnirResult.error
error
);
return await this.switchToSafeMode(
{
reason: SafeModeReason.InitializationError,
error: draupnirResult.error,
error: error,
},
options ?? {}
);
} else {
return draupnirResult;
}
}
return draupnirResult;
}

public async encryptionInitialized(): Promise<void> {
Expand All @@ -265,18 +300,18 @@ export class DraupnirBotModeToggle implements BotModeTogle {
await this.webAPIs.start();
await this.draupnir.startupComplete();
} catch (e) {
this.stopEverything();
await this.stopEverything();
throw e;
}
} else if (this.safeModeDraupnir !== null) {
this.safeModeDraupnir.startupComplete();
}
}

private stopDraupnir(): void {
private async stopDraupnir(): Promise<void> {
this.draupnir?.stop();
this.draupnir = null;
this.webAPIs?.stop();
await this.webAPIs?.stop();
this.webAPIs = null;
}

Expand All @@ -285,8 +320,8 @@ export class DraupnirBotModeToggle implements BotModeTogle {
this.safeModeDraupnir = null;
}

public stopEverything(): void {
this.stopDraupnir();
public async stopEverything(): Promise<void> {
await this.stopDraupnir();
this.stopSafeModeDraupnir();
}
}
12 changes: 10 additions & 2 deletions src/appservice/bot/AppserviceCommandHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,19 @@ export class AppserviceCommandHandler {
);
this.reactionHandler.on(
ARGUMENT_PROMPT_LISTENER,
makeListenerForArgumentPrompt(this.commandDispatcher)
makeListenerForArgumentPrompt(
this.adminRoomID,
this.commandDispatcher,
this.reactionHandler
)
);
this.reactionHandler.on(
DEFAUILT_ARGUMENT_PROMPT_LISTENER,
makeListenerForPromptDefault(this.commandDispatcher)
makeListenerForPromptDefault(
this.adminRoomID,
this.commandDispatcher,
this.reactionHandler
)
);
this.JSInterfaceDispatcher = makeAppserviceJSCommandDispatcher(
this.appserviceContext
Expand Down
32 changes: 26 additions & 6 deletions src/commands/interface-manager/MatrixPromptForAccept.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: Apache-2.0

import { Logger, Value, isError } from "matrix-protection-suite";
import { Logger, Task, Value, isError } from "matrix-protection-suite";
import {
MatrixReactionHandler,
ReactionListener,
Expand All @@ -24,6 +24,7 @@ import {
readCommand,
} from "@the-draupnir-project/interface-manager";
import { Ok, Result } from "@gnuxie/typescript-result";
import { StringRoomID } from "@the-draupnir-project/matrix-basic-types";

const log = new Logger("MatrixPromptForAccept");

Expand All @@ -49,7 +50,8 @@ function continueCommandAcceptingPrompt(
eventContext: MatrixEventContext,
promptContext: PromptContext,
serializedPrompt: string,
commandDispatcher: MatrixInterfaceCommandDispatcher<MatrixEventContext>
commandDispatcher: MatrixInterfaceCommandDispatcher<MatrixEventContext>,
reactionHandler: MatrixReactionHandler
): void {
const stream = new StandardPresentationArgumentStream(
readCommand(
Expand All @@ -61,14 +63,25 @@ function continueCommandAcceptingPrompt(
)
);
commandDispatcher.handleCommandFromPresentationStream(eventContext, stream);
void Task(
reactionHandler.completePrompt(
eventContext.roomID,
eventContext.event.event_id
)
);
}

export const DEFAUILT_ARGUMENT_PROMPT_LISTENER =
"ge.applied-langua.ge.draupnir.default_argument_prompt";
export function makeListenerForPromptDefault(
commandDispatcher: MatrixInterfaceCommandDispatcher<MatrixEventContext>
commandRoomID: StringRoomID,
commandDispatcher: MatrixInterfaceCommandDispatcher<MatrixEventContext>,
reactionHandler: MatrixReactionHandler
): ReactionListener {
return function (reactionKey, item, context, reactionMap, annotatedEvent) {
if (annotatedEvent.room_id !== commandRoomID) {
return;
}
if (item !== "ok") {
return;
}
Expand All @@ -84,17 +97,23 @@ export function makeListenerForPromptDefault(
{ event: annotatedEvent, roomID: annotatedEvent.room_id },
promptContext.ok,
item,
commandDispatcher
commandDispatcher,
reactionHandler
);
};
}

export const ARGUMENT_PROMPT_LISTENER =
"ge.applied-langua.ge.draupnir.argument_prompt";
export function makeListenerForArgumentPrompt(
commandDispatcher: MatrixInterfaceCommandDispatcher<MatrixEventContext>
commandRoomID: StringRoomID,
commandDispatcher: MatrixInterfaceCommandDispatcher<MatrixEventContext>,
reactionHandler: MatrixReactionHandler
): ReactionListener {
return function (reactionKey, item, context, reactionMap, annotatedEvent) {
if (annotatedEvent.room_id !== commandRoomID) {
return;
}
const promptContext = Value.Decode(PromptContext, context);
if (isError(promptContext)) {
log.error(
Expand All @@ -107,7 +126,8 @@ export function makeListenerForArgumentPrompt(
{ event: annotatedEvent, roomID: annotatedEvent.room_id },
promptContext.ok,
item,
commandDispatcher
commandDispatcher,
reactionHandler
);
};
}
Expand Down
6 changes: 4 additions & 2 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { load } from "js-yaml";
import { MatrixClient, LogService } from "matrix-bot-sdk";
import Config from "config";
import path from "path";
import { SafeModeBootOption } from "./safemode/BootOption";

/**
* The configuration, as read from production.yaml
Expand Down Expand Up @@ -90,7 +91,7 @@ export interface IConfig {
};
};
safeMode?: {
bootIntoOnStartupFailure: boolean;
bootOption: SafeModeBootOption;
};
health: {
healthz: {
Expand Down Expand Up @@ -192,7 +193,7 @@ const defaultConfig: IConfig = {
},
},
safeMode: {
bootIntoOnStartupFailure: false,
bootOption: SafeModeBootOption.RecoveryOnly,
},
health: {
healthz: {
Expand Down Expand Up @@ -287,6 +288,7 @@ export function getProvisionedMjolnirConfig(managementRoomId: string): IConfig {
"automaticallyRedactForReasons",
"protectAllJoinedRooms",
"backgroundDelayMS",
"safeMode",
];
const configTemplate = read(); // we use the standard bot config as a template for every provisioned mjolnir.
const unusedKeys = Object.keys(configTemplate).filter(
Expand Down
Loading

0 comments on commit 890f004

Please sign in to comment.