Skip to content

Commit

Permalink
Merge pull request #27 from nextcloud/add/stt-smart-picker
Browse files Browse the repository at this point in the history
Add stt smart picker
  • Loading branch information
MB-Finski authored Jan 12, 2024
2 parents 65186cf + 8ecf879 commit 84a6122
Show file tree
Hide file tree
Showing 34 changed files with 1,802 additions and 87 deletions.
1 change: 1 addition & 0 deletions .github/workflows/psalm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ jobs:
php-version: 8.2
coverage: none
ini-file: development
extensions: mbstring, iconv, fileinfo, intl, sqlite, pdo_sqlite, gd, zip
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand Down
1 change: 1 addition & 0 deletions appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ include text processing providers to:
<screenshot>https://github.com/nextcloud/assistant/raw/main/img/screenshot3.jpg</screenshot>
<background-jobs>
<job>OCA\TpAssistant\Cron\CleanupImageGenerations</job>
<job>OCA\TpAssistant\Cron\CleanupTranscriptions</job>
</background-jobs>
<commands>
<command>OCA\TpAssistant\Command\CleanupImageGenerations</command>
Expand Down
5 changes: 5 additions & 0 deletions appinfo/routes.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,10 @@
['name' => 'FreePrompt#getPromptHistory', 'url' => '/f/prompt_history', 'verb' => 'GET'],
['name' => 'FreePrompt#getOutputs', 'url' => '/f/get_outputs', 'verb' => 'GET'],
['name' => 'FreePrompt#cancelGeneration', 'url' => '/f/cancel_generation', 'verb' => 'POST'],

['name' => 'SpeechToText#getResultPage', 'url' => '/stt/resultPage', 'verb' => 'GET'],
['name' => 'SpeechToText#getTranscript', 'url' => '/stt/transcript', 'verb' => 'GET'],
['name' => 'SpeechToText#transcribeAudio', 'url' => '/stt/transcribeAudio', 'verb' => 'POST'],
['name' => 'SpeechToText#transcribeFile', 'url' => '/stt/transcribeFile', 'verb' => 'POST'],
],
];
12 changes: 12 additions & 0 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@

use OCA\TpAssistant\Listener\BeforeTemplateRenderedListener;
use OCA\TpAssistant\Listener\FreePrompt\FreePromptReferenceListener;
use OCA\TpAssistant\Listener\SpeechToText\SpeechToTextReferenceListener;
use OCA\TpAssistant\Listener\SpeechToText\SpeechToTextResultListener;
use OCA\TpAssistant\Listener\TaskFailedListener;
use OCA\TpAssistant\Listener\TaskSuccessfulListener;
use OCA\TpAssistant\Listener\Text2Image\Text2ImageReferenceListener;
use OCA\TpAssistant\Listener\Text2Image\Text2ImageResultListener;
use OCA\TpAssistant\Notification\Notifier;
use OCA\TpAssistant\Reference\FreePromptReferenceProvider;
use OCA\TpAssistant\Reference\SpeechToTextReferenceProvider;
use OCA\TpAssistant\Reference\Text2ImageReferenceProvider;
use OCP\AppFramework\App;
use OCP\AppFramework\Bootstrap\IBootContext;
Expand All @@ -18,6 +21,8 @@
use OCP\AppFramework\Bootstrap\IRegistrationContext;
use OCP\AppFramework\Http\Events\BeforeTemplateRenderedEvent;
use OCP\Collaboration\Reference\RenderReferenceEvent;
use OCP\SpeechToText\Events\TranscriptionFailedEvent;
use OCP\SpeechToText\Events\TranscriptionSuccessfulEvent;
use OCP\TextProcessing\Events\TaskFailedEvent as TextTaskFailedEvent;
use OCP\TextProcessing\Events\TaskSuccessfulEvent as TextTaskSuccessfulEvent;
use OCP\TextToImage\Events\TaskFailedEvent as TextToImageTaskFailedEvent;
Expand All @@ -32,9 +37,11 @@ class Application extends App implements IBootstrap {
public const DEFAULT_MAX_IMAGE_GENERATION_IDLE_TIME = 60 * 60 * 24 * 90; // 90 days
public const DEFAULT_TEXT_GENERATION_STORAGE_TIME = 60 * 60 * 24 * 90; // 90 days
public const IMAGE_FOLDER = 'generated_images';
public const SPEECH_TO_TEXT_REC_FOLDER = 'stt_recordings';

public const TASK_TYPE_TEXT_GEN = 0;
public const TASK_TYPE_TEXT_TO_IMAGE = 1;
public const TASK_TYPE_SPEECH_TO_TEXT = 2;

public function __construct(array $urlParams = []) {
parent::__construct(self::APP_ID, $urlParams);
Expand All @@ -43,11 +50,16 @@ public function __construct(array $urlParams = []) {
public function register(IRegistrationContext $context): void {
$context->registerReferenceProvider(Text2ImageReferenceProvider::class);
$context->registerReferenceProvider(FreePromptReferenceProvider::class);
$context->registerReferenceProvider(SpeechToTextReferenceProvider::class);

$context->registerEventListener(RenderReferenceEvent::class, Text2ImageReferenceListener::class);
$context->registerEventListener(RenderReferenceEvent::class, FreePromptReferenceListener::class);
$context->registerEventListener(RenderReferenceEvent::class, SpeechToTextReferenceListener::class);

$context->registerEventListener(TextToImageTaskSuccessfulEvent::class, Text2ImageResultListener::class);
$context->registerEventListener(TextToImageTaskFailedEvent::class, Text2ImageResultListener::class);
$context->registerEventListener(TranscriptionSuccessfulEvent::class, SpeechToTextResultListener::class);
$context->registerEventListener(TranscriptionFailedEvent::class, SpeechToTextResultListener::class);

$context->registerEventListener(BeforeTemplateRenderedEvent::class, BeforeTemplateRenderedListener::class);

Expand Down
240 changes: 240 additions & 0 deletions lib/Controller/SpeechToTextController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
<?php
/**
* @copyright Copyright (c) 2023 Anupam Kumar <[email protected]>
*
* @author Anupam Kumar <[email protected]>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

namespace OCA\TpAssistant\Controller;

use DateTime;
use Exception;
use InvalidArgumentException;
use OCA\TpAssistant\AppInfo\Application;
use OCA\TpAssistant\Db\SpeechToText\TranscriptMapper;
use OCA\TpAssistant\Service\SpeechToText\SpeechToTextService;
use OCP\AppFramework\Controller;
use OCP\AppFramework\Db\DoesNotExistException;
use OCP\AppFramework\Db\MultipleObjectsReturnedException;
use OCP\AppFramework\Http;
use OCP\AppFramework\Http\Attribute\AnonRateLimit;
use OCP\AppFramework\Http\Attribute\NoAdminRequired;
use OCP\AppFramework\Http\Attribute\NoCSRFRequired;
use OCP\AppFramework\Http\Attribute\UserRateLimit;
use OCP\AppFramework\Http\DataResponse;
use OCP\AppFramework\Http\TemplateResponse;
use OCP\AppFramework\Services\IInitialState;
use OCP\Files\NotFoundException;
use OCP\Files\NotPermittedException;
use OCP\IL10N;
use OCP\IRequest;
use OCP\PreConditionNotMetException;
use Psr\Log\LoggerInterface;
use RuntimeException;

class SpeechToTextController extends Controller {

public function __construct(
string $appName,
IRequest $request,
private SpeechToTextService $service,
private LoggerInterface $logger,
private IL10N $l10n,
private TranscriptMapper $transcriptMapper,
private IInitialState $initialState,
private ?string $userId,
) {
parent::__construct($appName, $request);
}

/**
* @param int $id
* @return TemplateResponse
*/
#[NoAdminRequired]
#[NoCSRFRequired]
#[UserRateLimit(limit: 10, period: 60)]
#[AnonRateLimit(limit: 2, period: 60)]
public function getResultPage(int $id): TemplateResponse {
$response = new TemplateResponse(Application::APP_ID, 'speechToTextResultPage');
try {
$initData = [
'status' => 'success',
'result' => $this->internalGetTranscript($id),
'taskType' => Application::TASK_TYPE_SPEECH_TO_TEXT,
];
} catch (Exception $e) {
$initData = [
'status' => 'failure',
'message' => $e->getMessage(),
];
$response->setStatus(intval($e->getCode()));
}
$this->initialState->provideInitialState('plain-text-result', $initData);
return $response;
}

/**
* @param int $id Transcript ID
* @return DataResponse
*/
#[NoAdminRequired]
public function getTranscript(int $id): DataResponse {
try {
return new DataResponse($this->internalGetTranscript($id));
} catch (Exception $e) {
return new DataResponse($e->getMessage(), intval($e->getCode()));
}
}

/**
* Internal function to get transcript and throw a common exception
*
* @param integer $id
* @return string
*/
private function internalGetTranscript(int $id): string {
try {
$transcriptEntity = $this->transcriptMapper->find($id, $this->userId);
$transcript = $transcriptEntity->getTranscript();

$transcriptEntity->setLastAccessed(new DateTime());
$this->transcriptMapper->update($transcriptEntity);

return trim($transcript);
} catch (InvalidArgumentException $e) {
$this->logger->error(
'Invalid argument in transcript access time update call: ' . $e->getMessage(),
['app' => Application::APP_ID],
);
throw new Exception(
$this->l10n->t('Error in transcript access time update call'),
Http::STATUS_INTERNAL_SERVER_ERROR,
);
} catch (MultipleObjectsReturnedException $e) {
$this->logger->error('Multiple transcripts found: ' . $e->getMessage(), ['app' => Application::APP_ID]);
throw new Exception($this->l10n->t('Multiple transcripts found'), Http::STATUS_BAD_REQUEST);
} catch (DoesNotExistException $e) {
throw new Exception($this->l10n->t('Transcript not found'), Http::STATUS_NOT_FOUND);
} catch (Exception $e) {
$this->logger->error('Error: ' . $e->getMessage(), ['app' => Application::APP_ID]);
throw new Exception(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR,
);
}
}

/**
* @return DataResponse
*/
#[NoAdminRequired]
public function transcribeAudio(): DataResponse {
$audioData = $this->request->getUploadedFile('audioData');

if ($audioData['error'] !== 0) {
return new DataResponse('Error in audio file upload: ' . $audioData['error'], Http::STATUS_BAD_REQUEST);
}

if (empty($audioData)) {
return new DataResponse('Invalid audio data received', Http::STATUS_BAD_REQUEST);
}

if ($audioData['type'] !== 'audio/mp3' && $audioData['type'] !== 'audio/mpeg') {
return new DataResponse('Audio file must be in MP3 format', Http::STATUS_BAD_REQUEST);
}

try {
$this->service->transcribeAudio($audioData['tmp_name'], $this->userId);
return new DataResponse('ok');
} catch (RuntimeException $e) {
$this->logger->error(
'Runtime exception: ' . $e->getMessage(),
['app' => Application::APP_ID]
);
return new DataResponse(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
} catch (PreConditionNotMetException $e) {
$this->logger->error('No Speech-to-Text provider found: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('No Speech-to-Text provider found, install one from the app store to use this feature.'),
Http::STATUS_BAD_REQUEST
);
} catch (InvalidArgumentException $e) {
$this->logger->error('InvalidArgumentException: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
}
}

/**
* @param string $path Nextcloud file path
* @return DataResponse
*/
#[NoAdminRequired]
public function transcribeFile(string $path): DataResponse {
if ($path === '') {
return new DataResponse('Empty file path received', Http::STATUS_BAD_REQUEST);
}

try {
$this->service->transcribeFile($path, $this->userId);
return new DataResponse('ok');
} catch (NotFoundException $e) {
$this->logger->error('Audio file not found: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('Audio file not found.'),
Http::STATUS_NOT_FOUND
);
} catch (RuntimeException $e) {
$this->logger->error(
'Runtime exception: ' . $e->getMessage(),
['app' => Application::APP_ID]
);
return new DataResponse(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
} catch (NotPermittedException $e) {
$this->logger->error(
'No permission to create recording file/directory: ' . $e->getMessage(),
['app' => Application::APP_ID]
);
return new DataResponse(
$this->l10n->t('No permission to create recording file/directory, contact your sysadmin to resolve this issue.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
} catch (PreConditionNotMetException $e) {
$this->logger->error('No Speech-to-Text provider found: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('No Speech-to-Text provider found, install one from the app store to use this feature.'),
Http::STATUS_BAD_REQUEST
);
} catch (InvalidArgumentException $e) {
$this->logger->error('InvalidArgumentException: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
}
}
}
45 changes: 45 additions & 0 deletions lib/Cron/CleanupTranscriptions.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?php

declare(strict_types=1);

/**
* @copyright Copyright (c) 2023 Anupam Kumar <[email protected]>
*
* @author Anupam Kumar <[email protected]>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

namespace OCA\TpAssistant\Cron;

use OCA\TpAssistant\Db\SpeechToText\TranscriptMapper;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\BackgroundJob\TimedJob;

class CleanupTranscriptions extends TimedJob {
public function __construct(
ITimeFactory $time,
private TranscriptMapper $transcriptMapper,
) {
parent::__construct($time);
$this->setInterval(60 * 60 * 24); // 24 hours
}

protected function run($argument) {
$this->transcriptMapper->cleanupTranscriptions();
}
}
Loading

0 comments on commit 84a6122

Please sign in to comment.