From ed8b2a88447632adf27ea55fd47c87ebbfc6988e Mon Sep 17 00:00:00 2001 From: MB-Finski Date: Wed, 10 Jan 2024 11:24:57 +0000 Subject: [PATCH 01/10] Import stt smart picker functionality from stt_helper Signed-off-by: MB-Finski --- appinfo/info.xml | 3 +- appinfo/routes.php | 5 + lib/AppInfo/Application.php | 31 +- lib/Controller/SpeechToTextController.php | 240 ++++++++++++++ lib/Cron/CleanupTranscriptions.php | 45 +++ lib/Db/SpeechToText/Transcript.php | 69 ++++ lib/Db/SpeechToText/TranscriptMapper.php | 91 ++++++ .../SpeechToTextReferenceListener.php | 58 ++++ .../SpeechToTextResultListener.php | 73 +++++ .../Version010003Date20231208083112.php | 25 ++ lib/Notification/Notifier.php | 24 +- .../SpeechToTextReferenceProvider.php | 109 +++++++ lib/Service/AssistantService.php | 4 + .../SpeechToText/SpeechToTextService.php | 199 ++++++++++++ lib/Settings/Admin.php | 9 +- lib/Settings/Personal.php | 15 +- src/assistant.js | 112 +++++-- src/components/AdminSettings.vue | 14 + src/components/AssistantPlainTextModal.vue | 189 +++++++++++ ...rm.vue => AssistantTextProcessingForm.vue} | 2 +- ...l.vue => AssistantTextProcessingModal.vue} | 12 +- src/components/PersonalSettings.vue | 8 + src/constants.js | 5 + src/main.js | 8 +- src/speechToTextReference.js | 46 +++ src/speechToTextResultPage.js | 14 + src/views/PlainTextResultPage.vue | 60 ++++ .../SpeechToTextCustomPickerElement.vue | 297 ++++++++++++++++++ src/views/TaskResultPage.vue | 6 +- templates/speechToTextResultPage.php | 11 + webpack.js | 2 + 31 files changed, 1729 insertions(+), 57 deletions(-) create mode 100644 lib/Controller/SpeechToTextController.php create mode 100644 lib/Cron/CleanupTranscriptions.php create mode 100644 lib/Db/SpeechToText/Transcript.php create mode 100644 lib/Db/SpeechToText/TranscriptMapper.php create mode 100644 lib/Listener/SpeechToText/SpeechToTextReferenceListener.php create mode 100644 lib/Listener/SpeechToText/SpeechToTextResultListener.php create mode 100644 lib/Reference/SpeechToTextReferenceProvider.php create mode 100644 lib/Service/SpeechToText/SpeechToTextService.php create mode 100644 src/components/AssistantPlainTextModal.vue rename src/components/{AssistantForm.vue => AssistantTextProcessingForm.vue} (99%) rename src/components/{AssistantModal.vue => AssistantTextProcessingModal.vue} (93%) create mode 100644 src/speechToTextReference.js create mode 100644 src/speechToTextResultPage.js create mode 100644 src/views/PlainTextResultPage.vue create mode 100644 src/views/SpeechToText/SpeechToTextCustomPickerElement.vue create mode 100644 templates/speechToTextResultPage.php diff --git a/appinfo/info.xml b/appinfo/info.xml index f56b39d0..e6d1770c 100644 --- a/appinfo/info.xml +++ b/appinfo/info.xml @@ -48,7 +48,8 @@ include text processing providers to: https://github.com/nextcloud/assistant/raw/main/img/screenshot2.jpg https://github.com/nextcloud/assistant/raw/main/img/screenshot3.jpg - OCA\TpAssistant\Cron\CleanupImageGenerations + OCA\TPAssistant\Cron\CleanupImageGenerations + OCA\TPAssistant\Cron\CleanupTranscriptions OCA\TpAssistant\Command\CleanupImageGenerations diff --git a/appinfo/routes.php b/appinfo/routes.php index 5271fed7..b84df620 100644 --- a/appinfo/routes.php +++ b/appinfo/routes.php @@ -23,5 +23,10 @@ ['name' => 'FreePrompt#getPromptHistory', 'url' => '/f/prompt_history', 'verb' => 'GET'], ['name' => 'FreePrompt#getOutputs', 'url' => '/f/get_outputs', 'verb' => 'GET'], ['name' => 'FreePrompt#cancelGeneration', 'url' => '/f/cancel_generation', 'verb' => 'POST'], + + ['name' => 'SpeechToText#getResultPage', 'url' => '/stt/resultPage', 'verb' => 'GET'], + ['name' => 'SpeechToText#getTranscript', 'url' => '/stt/transcript', 'verb' => 'GET'], + ['name' => 'SpeechToText#transcribeAudio', 'url' => '/stt/transcribeAudio', 'verb' => 'POST'], + ['name' => 'SpeechToText#transcribeFile', 'url' => '/stt/transcribeFile', 'verb' => 'POST'], ], ]; diff --git a/lib/AppInfo/Application.php b/lib/AppInfo/Application.php index c641d857..3015646d 100644 --- a/lib/AppInfo/Application.php +++ b/lib/AppInfo/Application.php @@ -2,15 +2,18 @@ namespace OCA\TpAssistant\AppInfo; -use OCA\TpAssistant\Listener\BeforeTemplateRenderedListener; -use OCA\TpAssistant\Listener\FreePrompt\FreePromptReferenceListener; -use OCA\TpAssistant\Listener\TaskFailedListener; -use OCA\TpAssistant\Listener\TaskSuccessfulListener; -use OCA\TpAssistant\Listener\Text2Image\Text2ImageReferenceListener; -use OCA\TpAssistant\Listener\Text2Image\Text2ImageResultListener; -use OCA\TpAssistant\Notification\Notifier; -use OCA\TpAssistant\Reference\FreePromptReferenceProvider; -use OCA\TpAssistant\Reference\Text2ImageReferenceProvider; +use OCA\TPAssistant\Listener\BeforeTemplateRenderedListener; +use OCA\TPAssistant\Listener\FreePrompt\FreePromptReferenceListener; +use OCA\TPAssistant\Listener\TaskFailedListener; +use OCA\TPAssistant\Listener\TaskSuccessfulListener; +use OCA\TPAssistant\Listener\Text2Image\Text2ImageReferenceListener; +use OCA\TPAssistant\Listener\SpeechToText\SpeechToTextResultListener; +use OCA\TPAssistant\Listener\SpeechToText\SpeechToTextReferenceListener; +use OCA\TPAssistant\Reference\SpeechToTextReferenceProvider; +use OCA\TPAssistant\Listener\Text2Image\Text2ImageResultListener; +use OCA\TPAssistant\Notification\Notifier; +use OCA\TPAssistant\Reference\FreePromptReferenceProvider; +use OCA\TPAssistant\Reference\Text2ImageReferenceProvider; use OCP\AppFramework\App; use OCP\AppFramework\Bootstrap\IBootContext; @@ -22,6 +25,9 @@ use OCP\TextProcessing\Events\TaskSuccessfulEvent as TextTaskSuccessfulEvent; use OCP\TextToImage\Events\TaskFailedEvent as TextToImageTaskFailedEvent; use OCP\TextToImage\Events\TaskSuccessfulEvent as TextToImageTaskSuccessfulEvent; +use OCP\SpeechToText\Events\TranscriptionFailedEvent; +use OCP\SpeechToText\Events\TranscriptionSuccessfulEvent; + class Application extends App implements IBootstrap { @@ -32,9 +38,11 @@ class Application extends App implements IBootstrap { public const DEFAULT_MAX_IMAGE_GENERATION_IDLE_TIME = 60 * 60 * 24 * 90; // 90 days public const DEFAULT_TEXT_GENERATION_STORAGE_TIME = 60 * 60 * 24 * 90; // 90 days public const IMAGE_FOLDER = 'generated_images'; + public const SPEECH_TO_TEXT_REC_FOLDER = 'stt_recordings'; public const TASK_TYPE_TEXT_GEN = 0; public const TASK_TYPE_TEXT_TO_IMAGE = 1; + public const TASK_TYPE_SPEECH_TO_TEXT = 2; public function __construct(array $urlParams = []) { parent::__construct(self::APP_ID, $urlParams); @@ -43,11 +51,16 @@ public function __construct(array $urlParams = []) { public function register(IRegistrationContext $context): void { $context->registerReferenceProvider(Text2ImageReferenceProvider::class); $context->registerReferenceProvider(FreePromptReferenceProvider::class); + $context->registerReferenceProvider(SpeechToTextReferenceProvider::class); $context->registerEventListener(RenderReferenceEvent::class, Text2ImageReferenceListener::class); $context->registerEventListener(RenderReferenceEvent::class, FreePromptReferenceListener::class); + $context->registerEventListener(RenderReferenceEvent::class, SpeechToTextReferenceListener::class); + $context->registerEventListener(TextToImageTaskSuccessfulEvent::class, Text2ImageResultListener::class); $context->registerEventListener(TextToImageTaskFailedEvent::class, Text2ImageResultListener::class); + $context->registerEventListener(TranscriptionSuccessfulEvent::class, SpeechToTextResultListener::class); + $context->registerEventListener(TranscriptionFailedEvent::class, SpeechToTextResultListener::class); $context->registerEventListener(BeforeTemplateRenderedEvent::class, BeforeTemplateRenderedListener::class); diff --git a/lib/Controller/SpeechToTextController.php b/lib/Controller/SpeechToTextController.php new file mode 100644 index 00000000..48b1500d --- /dev/null +++ b/lib/Controller/SpeechToTextController.php @@ -0,0 +1,240 @@ + + * + * @author Anupam Kumar + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + + namespace OCA\TPAssistant\Controller; + +use DateTime; +use Exception; +use InvalidArgumentException; +use OCA\TPAssistant\AppInfo\Application; +use OCA\TPAssistant\Db\SpeechToText\TranscriptMapper; +use OCA\TPAssistant\Service\SpeechToText\SpeechToTextService; +use OCP\AppFramework\Controller; +use OCP\AppFramework\Db\DoesNotExistException; +use OCP\AppFramework\Db\MultipleObjectsReturnedException; +use OCP\AppFramework\Http; +use OCP\AppFramework\Http\Attribute\AnonRateLimit; +use OCP\AppFramework\Http\Attribute\NoAdminRequired; +use OCP\AppFramework\Http\Attribute\NoCSRFRequired; +use OCP\AppFramework\Http\Attribute\UserRateLimit; +use OCP\AppFramework\Http\DataResponse; +use OCP\AppFramework\Http\TemplateResponse; +use OCP\AppFramework\Services\IInitialState; +use OCP\Files\NotFoundException; +use OCP\Files\NotPermittedException; +use OCP\IL10N; +use OCP\IRequest; +use OCP\PreConditionNotMetException; +use Psr\Log\LoggerInterface; +use RuntimeException; + +class SpeechToTextController extends Controller { + + public function __construct( + string $appName, + IRequest $request, + private SpeechToTextService $service, + private LoggerInterface $logger, + private IL10N $l10n, + private TranscriptMapper $transcriptMapper, + private IInitialState $initialState, + private ?string $userId, + ) { + parent::__construct($appName, $request); + } + + /** + * @param int $id + * @return TemplateResponse + */ + #[NoAdminRequired] + #[NoCSRFRequired] + #[UserRateLimit(limit: 10, period: 60)] + #[AnonRateLimit(limit: 2, period: 60)] + public function getResultPage(int $id): TemplateResponse { + $response = new TemplateResponse(Application::APP_ID, 'speechToTextResultPage'); + try { + $initData = [ + 'status' => 'success', + 'result' => $this->internalGetTranscript($id), + 'taskType' => Application::TASK_TYPE_SPEECH_TO_TEXT, + ]; + } catch (Exception $e) { + $initData = [ + 'status' => 'failure', + 'message' => $e->getMessage(), + ]; + $response->setStatus(intval($e->getCode())); + } + $this->initialState->provideInitialState('plain-text-result', $initData); + return $response; + } + + /** + * @param int $id Transcript ID + * @return DataResponse + */ + #[NoAdminRequired] + public function getTranscript(int $id): DataResponse { + try { + return new DataResponse($this->internalGetTranscript($id)); + } catch (Exception $e) { + return new DataResponse($e->getMessage(), intval($e->getCode())); + } + } + + /** + * Internal function to get transcript and throw a common exception + * + * @param integer $id + * @return string + */ + private function internalGetTranscript(int $id): string { + try { + $transcriptEntity = $this->transcriptMapper->find($id, $this->userId); + $transcript = $transcriptEntity->getTranscript(); + + $transcriptEntity->setLastAccessed(new DateTime()); + $this->transcriptMapper->update($transcriptEntity); + + return trim($transcript); + } catch (InvalidArgumentException $e) { + $this->logger->error( + 'Invalid argument in transcript access time update call: ' . $e->getMessage(), + ['app' => Application::APP_ID], + ); + throw new Exception( + $this->l10n->t('Error in transcript access time update call'), + Http::STATUS_INTERNAL_SERVER_ERROR, + ); + } catch (MultipleObjectsReturnedException $e) { + $this->logger->error('Multiple transcripts found: ' . $e->getMessage(), ['app' => Application::APP_ID]); + throw new Exception($this->l10n->t('Multiple transcripts found'), Http::STATUS_BAD_REQUEST); + } catch (DoesNotExistException $e) { + throw new Exception($this->l10n->t('Transcript not found'), Http::STATUS_NOT_FOUND); + } catch (Exception $e) { + $this->logger->error('Error: ' . $e->getMessage(), ['app' => Application::APP_ID]); + throw new Exception( + $this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'), + Http::STATUS_INTERNAL_SERVER_ERROR, + ); + } + } + + /** + * @return DataResponse + */ + #[NoAdminRequired] + public function transcribeAudio(): DataResponse { + $audioData = $this->request->getUploadedFile('audioData'); + + if ($audioData['error'] !== 0) { + return new DataResponse('Error in audio file upload: ' . $audioData['error'], Http::STATUS_BAD_REQUEST); + } + + if (empty($audioData)) { + return new DataResponse('Invalid audio data received', Http::STATUS_BAD_REQUEST); + } + + if ($audioData['type'] !== 'audio/mp3' && $audioData['type'] !== 'audio/mpeg') { + return new DataResponse('Audio file must be in MP3 format', Http::STATUS_BAD_REQUEST); + } + + try { + $this->service->transcribeAudio($audioData['tmp_name'], $this->userId); + return new DataResponse('ok'); + } catch (RuntimeException $e) { + $this->logger->error( + 'Runtime exception: ' . $e->getMessage(), + ['app' => Application::APP_ID] + ); + return new DataResponse( + $this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'), + Http::STATUS_INTERNAL_SERVER_ERROR + ); + } catch (PreConditionNotMetException $e) { + $this->logger->error('No Speech-to-Text provider found: ' . $e->getMessage(), ['app' => Application::APP_ID]); + return new DataResponse( + $this->l10n->t('No Speech-to-Text provider found, install one from the app store to use this feature.'), + Http::STATUS_BAD_REQUEST + ); + } catch (InvalidArgumentException $e) { + $this->logger->error('InvalidArgumentException: ' . $e->getMessage(), ['app' => Application::APP_ID]); + return new DataResponse( + $this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'), + Http::STATUS_INTERNAL_SERVER_ERROR + ); + } + } + + /** + * @param string $path Nextcloud file path + * @return DataResponse + */ + #[NoAdminRequired] + public function transcribeFile(string $path): DataResponse { + if ($path === '') { + return new DataResponse('Empty file path received', Http::STATUS_BAD_REQUEST); + } + + try { + $this->service->transcribeFile($path, $this->userId); + return new DataResponse('ok'); + } catch (NotFoundException $e) { + $this->logger->error('Audio file not found: ' . $e->getMessage(), ['app' => Application::APP_ID]); + return new DataResponse( + $this->l10n->t('Audio file not found.'), + Http::STATUS_NOT_FOUND + ); + } catch (RuntimeException $e) { + $this->logger->error( + 'Runtime exception: ' . $e->getMessage(), + ['app' => Application::APP_ID] + ); + return new DataResponse( + $this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'), + Http::STATUS_INTERNAL_SERVER_ERROR + ); + } catch (NotPermittedException $e) { + $this->logger->error( + 'No permission to create recording file/directory: ' . $e->getMessage(), + ['app' => Application::APP_ID] + ); + return new DataResponse( + $this->l10n->t('No permission to create recording file/directory, contact your sysadmin to resolve this issue.'), + Http::STATUS_INTERNAL_SERVER_ERROR + ); + } catch (PreConditionNotMetException $e) { + $this->logger->error('No Speech-to-Text provider found: ' . $e->getMessage(), ['app' => Application::APP_ID]); + return new DataResponse( + $this->l10n->t('No Speech-to-Text provider found, install one from the app store to use this feature.'), + Http::STATUS_BAD_REQUEST + ); + } catch (InvalidArgumentException $e) { + $this->logger->error('InvalidArgumentException: ' . $e->getMessage(), ['app' => Application::APP_ID]); + return new DataResponse( + $this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'), + Http::STATUS_INTERNAL_SERVER_ERROR + ); + } + } +} diff --git a/lib/Cron/CleanupTranscriptions.php b/lib/Cron/CleanupTranscriptions.php new file mode 100644 index 00000000..442acef7 --- /dev/null +++ b/lib/Cron/CleanupTranscriptions.php @@ -0,0 +1,45 @@ + + * + * @author Anupam Kumar + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +namespace OCA\TPAssistant\Cron; + +use OCA\TPAssistant\Db\SpeechToText\TranscriptMapper; +use OCP\AppFramework\Utility\ITimeFactory; +use OCP\BackgroundJob\TimedJob; + +class CleanupTranscriptions extends TimedJob { + public function __construct( + ITimeFactory $time, + private TranscriptMapper $transcriptMapper, + ) { + parent::__construct($time); + $this->setInterval(60 * 60 * 24); // 24 hours + } + + protected function run($argument) { + $this->transcriptMapper->cleanupTranscriptions(); + } +} diff --git a/lib/Db/SpeechToText/Transcript.php b/lib/Db/SpeechToText/Transcript.php new file mode 100644 index 00000000..c225c6cb --- /dev/null +++ b/lib/Db/SpeechToText/Transcript.php @@ -0,0 +1,69 @@ + + * + * @author Anupam Kumar + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +namespace OCA\TPAssistant\Db\SpeechToText; + +use OCP\AppFramework\Db\Entity; +use OCP\DB\Types; + +/** + * Class Transcript + * + * @package OCA\Stt\Db + * @method ?string getUserId() + * @method void setUserId(?string $userId) + * @method string getTranscript() + * @method void setTranscript(string $transcript) + * @method \DateTime getLastAccessed() + * @method void setLastAccessed(\DateTime $lastAccessed) + */ +class Transcript extends Entity { + + protected $userId; + protected $transcript; + protected $lastAccessed; + + public static $columns = [ + 'id', + 'user_id', + 'transcript', + 'last_accessed', + ]; + public static $fields = [ + 'id', + 'userId', + 'transcript', + 'lastAccessed', + ]; + + public function __construct() { + $this->addType('id', Types::INTEGER); + $this->addType('userId', Types::STRING); + $this->addType('transcript', Types::STRING); + $this->addType('lastAccessed', Types::DATETIME); + } +} diff --git a/lib/Db/SpeechToText/TranscriptMapper.php b/lib/Db/SpeechToText/TranscriptMapper.php new file mode 100644 index 00000000..edb875c1 --- /dev/null +++ b/lib/Db/SpeechToText/TranscriptMapper.php @@ -0,0 +1,91 @@ + + * + * @author Anupam Kumar + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + + namespace OCA\TPAssistant\Db\SpeechToText; + +use DateTime; +use Exception; +use OCP\AppFramework\Db\DoesNotExistException; +use OCP\AppFramework\Db\MultipleObjectsReturnedException; +use OCP\AppFramework\Db\QBMapper; +use OCP\DB\QueryBuilder\IQueryBuilder; +use OCP\IDBConnection; +use Psr\Log\LoggerInterface; + +/** + * @template-extends QBMapper + */ +class TranscriptMapper extends QBMapper { + + public function __construct(IDBConnection $db, private LoggerInterface $logger) { + parent::__construct($db, 'assistant_stt_transcripts', Transcript::class); + $this->db = $db; + } + + /** + * @param integer $id + * @param string|null $userId + * @throws Exception + * @throws MultipleObjectsReturnedException if more than one item exist + * @throws DoesNotExistException if the item does not exist + * @return Transcript + */ + public function find(int $id, ?string $userId): Transcript { + $qb = $this->db->getQueryBuilder(); + + if (strlen($userId) > 0 && $userId !== 'admin') { + $qb + ->select(Transcript::$columns) + ->from($this->getTableName()) + ->where($qb->expr()->eq('id', $qb->createNamedParameter($id))) + ->andWhere($qb->expr()->eq('user_id', $qb->createNamedParameter($userId))) + ; + } else { + $qb + ->select(Transcript::$columns) + ->from($this->getTableName()) + ->where($qb->expr()->eq('id', $qb->createNamedParameter($id))) + ; + } + + return $this->findEntity($qb); + } + + public function cleanupTranscriptions(): void { + $qb = $this->db->getQueryBuilder(); + $qb + ->delete($this->getTableName()) + ->where($qb->expr()->lte( + 'last_accessed', + $qb->createNamedParameter(new DateTime('-2 weeks'), IQueryBuilder::PARAM_DATE) + )) + ; + + $deletedRows = $qb->executeStatement(); + $this->logger->debug('Cleared {count} old transcriptions', ['count' => $deletedRows]); + } +} diff --git a/lib/Listener/SpeechToText/SpeechToTextReferenceListener.php b/lib/Listener/SpeechToText/SpeechToTextReferenceListener.php new file mode 100644 index 00000000..e9a7b6f3 --- /dev/null +++ b/lib/Listener/SpeechToText/SpeechToTextReferenceListener.php @@ -0,0 +1,58 @@ + + * + * @author Anupam Kumar + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\TPAssistant\Listener\SpeechToText; + +use OCA\TPAssistant\AppInfo\Application; + +use OCP\Collaboration\Reference\RenderReferenceEvent; +use OCP\EventDispatcher\Event; +use OCP\EventDispatcher\IEventListener; +use OCP\Util; +use OCP\IConfig; +use OCP\SpeechToText\ISpeechToTextManager; + +/** + * @template-implements IEventListener + */ +class SpeechToTextReferenceListener implements IEventListener { + public function __construct( + private IConfig $config, + private string $userId, + private ISpeechToTextManager $sttProcessingManager, + ) { + } + + public function handle(Event $event): void { + if (!$event instanceof RenderReferenceEvent) { + return; + } + if ($this->config->getAppValue(Application::APP_ID, 'speech_to_text_picker_enabled', '1') === '1' && + $this->config->getUserValue($this->userId, Application::APP_ID, 'speech_to_text_picker_enabled', '1') === '1') { + + // Double check that atleast one provider is registered + if ($this->sttProcessingManager->hasProviders()) { + Util::addScript(Application::APP_ID, Application::APP_ID . '-speechToTextReference'); + } + } + } +} diff --git a/lib/Listener/SpeechToText/SpeechToTextResultListener.php b/lib/Listener/SpeechToText/SpeechToTextResultListener.php new file mode 100644 index 00000000..158d475a --- /dev/null +++ b/lib/Listener/SpeechToText/SpeechToTextResultListener.php @@ -0,0 +1,73 @@ + + * + * @author Anupam Kumar + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\TPAssistant\Listener\SpeechToText; + +use DateTime; +use OCA\TPAssistant\AppInfo\Application; +use OCA\TPAssistant\Service\SpeechToText\SpeechToTextService; +use OCP\EventDispatcher\Event; +use OCP\EventDispatcher\IEventListener; +use OCP\SpeechToText\Events\AbstractTranscriptionEvent; +use OCP\SpeechToText\Events\TranscriptionFailedEvent; +use OCP\SpeechToText\Events\TranscriptionSuccessfulEvent; +use Psr\Log\LoggerInterface; + +/** + * @template-implements IEventListener + */ +class SpeechToTextResultListener implements IEventListener { + public function __construct( + private SpeechToTextService $sttService, + private LoggerInterface $logger, + ) { + } + + public function handle(Event $event): void { + if (!$event instanceof AbstractTranscriptionEvent || $event->getAppId() !== Application::APP_ID) { + return; + } + + if ($event instanceof TranscriptionSuccessfulEvent) { + $transcript = $event->getTranscript(); + $userId = $event->getUserId(); + + try { + $this->sttService->sendSpeechToTextNotification($userId, $transcript,true); + } catch (\InvalidArgumentException $e) { + $this->logger->error('Failed to dispatch notification for successful transcription: ' . $e->getMessage()); + } + } + + if ($event instanceof TranscriptionFailedEvent) { + $userId = $event->getUserId(); + $this->logger->error('Transcript generation failed: ' . $event->getErrorMessage()); + + try { + $this->sttService->sendSpeechToTextNotification($userId, '',false); + } catch (\InvalidArgumentException $e) { + $this->logger->error('Failed to dispatch notification for failed transcription: ' . $e->getMessage()); + } + + } + } +} diff --git a/lib/Migration/Version010003Date20231208083112.php b/lib/Migration/Version010003Date20231208083112.php index d5607525..4cd5e659 100644 --- a/lib/Migration/Version010003Date20231208083112.php +++ b/lib/Migration/Version010003Date20231208083112.php @@ -152,6 +152,31 @@ public function changeSchema(IOutput $output, Closure $schemaClosure, array $opt $table->addIndex(['user_id', 'timestamp'], 'assistant_t_prompts_uid_ts'); } + if (!$schema->hasTable('assistant_stt_transcripts')) { + $schemaChanged = true; + $table = $schema->createTable('assistant_stt_transcripts'); + + $table->addColumn('id', Types::BIGINT, [ + 'autoincrement' => true, + 'notnull' => true, + 'length' => 64, + ]); + $table->addColumn('user_id', Types::STRING, [ + 'notnull' => false, + 'length' => 64, + ]); + $table->addColumn('transcript', Types::STRING, [ + 'notnull' => true, + ]); + $table->addColumn('last_accessed', Types::DATETIME, [ + 'notnull' => false, + ]); + + $table->setPrimaryKey(['id'], 'assistant_stt_transcript_id'); + $table->addIndex(['user_id'], 'assistant_stt_transcript_user'); + $table->addIndex(['last_accessed'], 'assistant_stt_transcript_la'); + } + return $schemaChanged ? $schema : null; } diff --git a/lib/Notification/Notifier.php b/lib/Notification/Notifier.php index 66f5928f..04006341 100644 --- a/lib/Notification/Notifier.php +++ b/lib/Notification/Notifier.php @@ -79,8 +79,10 @@ public function prepare(INotification $notification, string $languageCode): INot } catch (\Exception | \Throwable $e) { $this->logger->debug('Impossible to get task type ' . $params['taskTypeClass'], ['exception' => $e]); } - } elseif ($params['taskType'] === Application::TASK_TYPE_TEXT_TO_IMAGE) { + } elseif ($params['taskType'] === Application::TASK_TYPE_TEXT_TO_IMAGE) { $taskTypeName = $l->t('Text to image'); + } elseif ($params['taskType'] === Application::TASK_TYPE_SPEECH_TO_TEXT) { + $taskTypeName = $l->t('Speech to text'); } switch ($notification->getSubject()) { @@ -88,7 +90,17 @@ public function prepare(INotification $notification, string $languageCode): INot $subject = $taskTypeName === null ? $l->t('Task for "%1$s" has finished', [$schedulingAppName]) : $l->t('"%1$s" task for "%2$s" has finished', [$taskTypeName, $schedulingAppName]); - $content = $l->t('Input: %1$s', [$params['input']]); + + $content = ''; + if (isset($params['input'])) { + $content .= $l->t('Input: %1$s', [$params['input']]); + } + + if (isset($params['result'])) { + $content === '' ?: $content .= '\n'; + $content .= $l->t('Result: %1$s', [$params['result']]); + } + $link = $params['target'] ?? $this->url->linkToRouteAbsolute(Application::APP_ID . '.assistant.getTextProcessingTaskResultPage', ['taskId' => $params['id']]); $iconUrl = $this->url->getAbsoluteURL($this->url->imagePath(Application::APP_ID, 'app-dark.svg')); @@ -113,7 +125,13 @@ public function prepare(INotification $notification, string $languageCode): INot $subject = $taskTypeName === null ? $l->t('Task for "%1$s" has failed', [$schedulingAppName]) : $l->t('"%1$s" task for "%2$s" has failed', [$taskTypeName, $schedulingAppName]); - $content = $l->t('Input: %1$s', [$params['input']]); + + $content = ''; + if (isset($params['input'])) { + $content .= $l->t('Input: %1$s', [$params['input']]); + } + + $link = $params['target'] ?? $this->url->linkToRouteAbsolute(Application::APP_ID . '.assistant.getTextProcessingTaskResultPage', ['taskId' => $params['id']]); $iconUrl = $this->url->getAbsoluteURL($this->url->imagePath('core', 'actions/error.svg')); diff --git a/lib/Reference/SpeechToTextReferenceProvider.php b/lib/Reference/SpeechToTextReferenceProvider.php new file mode 100644 index 00000000..81b56bd4 --- /dev/null +++ b/lib/Reference/SpeechToTextReferenceProvider.php @@ -0,0 +1,109 @@ + + * + * @author Anupam Kumar + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\TPAssistant\Reference; + +use OCA\TPAssistant\AppInfo\Application; +use OCP\Collaboration\Reference\ADiscoverableReferenceProvider; +use OCP\Collaboration\Reference\IReference; +use OCP\Collaboration\Reference\IReferenceManager; +use OCP\IL10N; +use OCP\IURLGenerator; + +class SpeechToTextReferenceProvider extends ADiscoverableReferenceProvider { + + public function __construct( + private IL10N $l10n, + private IURLGenerator $urlGenerator, + private IReferenceManager $referenceManager, + private ?string $userId + ) { + } + + /** + * @inheritDoc + */ + public function getId(): string { + return 'assistant_speech_to_text'; + } + + /** + * @inheritDoc + */ + public function getTitle(): string { + return $this->l10n->t('Speech to Text'); + } + + /** + * @inheritDoc + */ + public function getOrder(): int { + return 10; + } + + /** + * @inheritDoc + */ + public function getIconUrl(): string { + return $this->urlGenerator->getAbsoluteURL( + $this->urlGenerator->imagePath(Application::APP_ID, 'app-dark.svg') + ); + } + + /** + * @inheritDoc + */ + public function matchReference(string $referenceText): bool { + return false; + } + + /** + * @inheritDoc + */ + public function resolveReference(string $referenceText): ?IReference { + return null; + } + + /** + * We use the userId here because when connecting/disconnecting from the GitHub account, + * we want to invalidate all the user cache and this is only possible with the cache prefix + * @inheritDoc + */ + public function getCachePrefix(string $referenceId): string { + return $this->userId ?? ''; + } + + /** + * @inheritDoc + */ + public function getCacheKey(string $referenceId): ?string { + return $referenceId; + } + + /** + * @param string $userId + * @return void + */ + public function invalidateUserCache(string $userId): void { + $this->referenceManager->invalidateCache($userId); + } +} diff --git a/lib/Service/AssistantService.php b/lib/Service/AssistantService.php index dc84d523..a6d1383f 100644 --- a/lib/Service/AssistantService.php +++ b/lib/Service/AssistantService.php @@ -10,6 +10,8 @@ use OCP\TextProcessing\IManager as ITextProcessingManager; use OCP\TextProcessing\Task as TextProcessingTask; use OCP\TextToImage\Task as TextToImageTask; +use OCP\Notification\IManager as INotificationManager; +use OCP\IURLGenerator; class AssistantService { @@ -17,6 +19,7 @@ public function __construct( string $appName, private INotificationManager $notificationManager, private ITextProcessingManager $textProcessingManager, + private IURLGenerator $url, ) { } @@ -27,6 +30,7 @@ public function __construct( * @param string|null $target optional notification link target * @param string|null $actionLabel optional label for the notification action button * @return void + * @throws \InvalidArgumentException */ public function sendNotification(TextProcessingTask|TextToImageTask $task, ?string $target = null, ?string $actionLabel = null): void { $manager = $this->notificationManager; diff --git a/lib/Service/SpeechToText/SpeechToTextService.php b/lib/Service/SpeechToText/SpeechToTextService.php new file mode 100644 index 00000000..2de06369 --- /dev/null +++ b/lib/Service/SpeechToText/SpeechToTextService.php @@ -0,0 +1,199 @@ + + * + * @author Anupam Kumar + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +namespace OCA\TPAssistant\Service\SpeechToText; + +use DateTime; +use InvalidArgumentException; +use OCA\TPAssistant\AppInfo\Application; +use OCA\TPAssistant\Db\SpeechToText\Transcript; +use OCA\TPAssistant\Db\SpeechToText\TranscriptMapper; +use OCP\Files\File; +use OCP\Files\Folder; +use OCP\Files\IRootFolder; +use OCP\Files\NotFoundException; +use OCP\Files\NotPermittedException; +use OCP\IConfig; +use OCP\IURLGenerator; +use OCP\Notification\IManager as INotifyManager; +use OCP\PreConditionNotMetException; +use OCP\SpeechToText\ISpeechToTextManager; +use Psr\Log\LoggerInterface; +use RuntimeException; + +class SpeechToTextService { + + public function __construct( + private ISpeechToTextManager $manager, + private IRootFolder $rootFolder, + private INotifyManager $notificationManager, + private IURLGenerator $urlGenerator, + private LoggerInterface $logger, + private IConfig $config, + private TranscriptMapper $transcriptMapper, + ) { + } + + /** + * @param string $path + * @param string|null $userId + * @throws NotFoundException + * @throws NotPermittedException + * @throws PreConditionNotMetException + * @throws InvalidArgumentException + * @throws RuntimeException + */ + public function transcribeFile(string $path, ?string $userId): void { + // this also prevents NoUserException + if (is_null($userId)) { + throw new InvalidArgumentException('userId must not be null'); + } + + $userFolder = $this->rootFolder->getUserFolder($userId); + $audioFile = $userFolder->get($path); + + $this->manager->scheduleFileTranscription($audioFile, $userId, Application::APP_ID); + } + + /** + * @param string $tempFileLocation + * @param string|null $userId + * @throws NotPermittedException + * @throws PreConditionNotMetException + * @throws InvalidArgumentException + * @throws RuntimeException + */ + public function transcribeAudio(string $tempFileLocation, ?string $userId): void { + if ($userId === null) { + throw new InvalidArgumentException('userId must not be null'); + } + + $audioFile = $this->getFileObject($userId, $tempFileLocation); + $this->manager->scheduleFileTranscription($audioFile, $userId, Application::APP_ID); + } + + /** + * @param string $userId + * @param string $tempFileLocation + * @return File + * @throws NotPermittedException + * @throws RuntimeException + */ + private function getFileObject(string $userId, string $tempFileLocation): File { + $userFolder = $this->rootFolder->getUserFolder($userId); + + $sttFolderName = $this->config->getAppValue(Application::APP_ID, 'stt_folder', '(not set)'); + if ($sttFolderName === '(not set)') { + $sttFolderName = Application::SPEECH_TO_TEXT_REC_FOLDER; + + if ($userFolder->nodeExists($sttFolderName)) { + $sttFolder = $this->getUniqueNamedFolder($userId); + $sttFolderName = $sttFolder->getName(); + } else { + $sttFolder = $userFolder->newFolder($sttFolderName); + } + $this->config->setAppValue(Application::APP_ID, 'stt_folder', $sttFolderName); + } else { + $sttFolder = $userFolder->get($sttFolderName); + if (!$sttFolder instanceof Folder) { + // the folder created by this app was tampered with + // create a new one + $sttFolder = $this->getUniqueNamedFolder($userId); + $sttFolderName = $sttFolder->getName(); + $this->config->setAppValue(Application::APP_ID, 'stt_folder', $sttFolderName); + } + } + + $filename = (new DateTime())->format('d-M-Y-Hisu') . '.mp3'; + $audioFile = $sttFolder->newFile($filename, fopen($tempFileLocation, 'rb')); + + return $audioFile; + } + + /** + * @param string $userId + * @param integer $try + * @return Folder + * @throws RuntimeException + * @throws NotPermittedException + */ + private function getUniqueNamedFolder(string $userId, int $try = 3): Folder { + $userFolder = $this->rootFolder->getUserFolder($userId); + $sttFolderPath = Application::SPEECH_TO_TEXT_REC_FOLDER . ' ' . strval(4 - $try); + + if ($userFolder->nodeExists($sttFolderPath)) { + if ($try === 0) { + // give up + throw new RuntimeException('Could not create a folder with a unique name'); + } + return $this->getUniqueNamedFolder($userId, $try - 1); + } + + return $userFolder->newFolder($sttFolderPath); + } + + /** + * Send transcription result notification + * @param string $userId + * @param string $result + * @param boolean $success + * @param int $taskType + * @return void + * @throws \InvalidArgumentException + */ + public function sendSpeechToTextNotification(string $userId, string $result, bool $success): void { + $manager = $this->notificationManager; + $notification = $manager->createNotification(); + + try { + $transcriptEntity = new Transcript(); + $transcriptEntity->setUserId($userId); + $transcriptEntity->setTranscript($result); + // never seen transcripts should also be deleted in the cleanup job + $transcriptEntity->setLastAccessed(new DateTime()); + $transcriptEntity = $this->transcriptMapper->insert($transcriptEntity); + + $id = $transcriptEntity->getId(); + } catch (\OCP\Db\Exception $e) { + $this->logger->error('Failed to save transcript in DB: ' . $e->getMessage()); + $success = false; + } + + $params = [ + 'appId' => Application::APP_ID, + 'taskType' => Application::TASK_TYPE_SPEECH_TO_TEXT, + 'result' => $result, + 'target' => $this->urlGenerator->linkToRouteAbsolute(Application::APP_ID . '.SpeechToText.getResultPage', ['id' => $id]) + ]; + $subject = $success + ? 'success' + : 'failure'; + + $notification->setApp(Application::APP_ID) + ->setUser($userId) + ->setDateTime(new DateTime()) + ->setObject('speech-to-text-result', $id) + ->setSubject($subject, $params); + + $manager->notify($notification); + } +} diff --git a/lib/Settings/Admin.php b/lib/Settings/Admin.php index 0279d50d..d06989e3 100644 --- a/lib/Settings/Admin.php +++ b/lib/Settings/Admin.php @@ -9,6 +9,8 @@ use OCP\Settings\ISettings; use OCP\TextProcessing\FreePromptTaskType; use OCP\TextProcessing\IManager as ITextProcessingManager; +use OCP\SpeechToText\ISpeechToTextManager; + use OCP\TextToImage\IManager as ITextToImageManager; class Admin implements ISettings { @@ -17,7 +19,8 @@ public function __construct( private IConfig $config, private IInitialState $initialStateService, private ITextToImageManager $textToImageManager, - private ITextProcessingManager $textProcessingManager + private ITextProcessingManager $textProcessingManager, + private ISpeechToTextManager $speechToTextManager, ) { } @@ -32,6 +35,8 @@ public function getForm(): TemplateResponse { $textToImagePickerEnabled = $this->config->getAppValue(Application::APP_ID, 'text_to_image_picker_enabled', '1') === '1'; $maxImageGenerationIdleTime = (int) $this->config->getAppValue(Application::APP_ID, 'max_image_generation_idle_time', (string) Application::DEFAULT_MAX_IMAGE_GENERATION_IDLE_TIME); $freePromptPickerEnabled = $this->config->getAppValue(Application::APP_ID, 'free_prompt_picker_enabled', '1') === '1'; + $speechToTextAvailable = $this->speechToTextManager->hasProviders(); + $speechToTextEnabled = $this->config->getAppValue(Application::APP_ID, 'speech_to_text_picker_enabled', '1') === '1'; $adminConfig = [ 'text_processing_available' => $textProcessingAvailable, @@ -41,6 +46,8 @@ public function getForm(): TemplateResponse { 'max_image_generation_idle_time' => $maxImageGenerationIdleTime, 'free_prompt_task_type_available' => $freePromptTaskTypeAvailable, 'free_prompt_picker_enabled' => $freePromptPickerEnabled, + 'speech_to_text_picker_available' => $speechToTextAvailable, + 'speech_to_text_picker_enabled' => $speechToTextEnabled, ]; $this->initialStateService->provideInitialState('admin-config', $adminConfig); diff --git a/lib/Settings/Personal.php b/lib/Settings/Personal.php index 1e8cf27f..ce4ed75d 100644 --- a/lib/Settings/Personal.php +++ b/lib/Settings/Personal.php @@ -9,6 +9,8 @@ use OCP\Settings\ISettings; use OCP\TextProcessing\FreePromptTaskType; use OCP\TextProcessing\IManager as ITextProcessingManager; +use OCP\SpeechToText\ISpeechToTextManager; + use OCP\TextToImage\IManager; class Personal implements ISettings { @@ -18,8 +20,9 @@ public function __construct( private IInitialState $initialStateService, private ?string $userId, private IManager $textToImageManager, - private ITextProcessingManager $textProcessingManager - ) { + private ITextProcessingManager $textProcessingManager, + private ISpeechToTextManager $speechToTextManager, + ) { } /** @@ -28,16 +31,20 @@ public function __construct( public function getForm(): TemplateResponse { $textProcessingAvailable = $this->textProcessingManager->hasProviders(); $freePromptTaskTypeAvailable = in_array(FreePromptTaskType::class, $this->textProcessingManager->getAvailableTaskTypes()); + $speechToTextAvailable = $this->speechToTextManager->hasProviders(); $assistantAvailable = $textProcessingAvailable && $this->config->getAppValue(Application::APP_ID, 'assistant_enabled', '1') === '1'; $assistantEnabled = $this->config->getUserValue($this->userId, Application::APP_ID, 'assistant_enabled', '1') === '1'; $textToImagePickerAvailable = $this->textToImageManager->hasProviders() && $this->config->getAppValue(Application::APP_ID, 'text_to_image_picker_enabled', '1') === '1'; - false; + $textToImagePickerEnabled = $this->config->getUserValue($this->userId, Application::APP_ID, 'text_to_image_picker_enabled', '1') === '1'; $freePromptPickerAvailable = $freePromptTaskTypeAvailable && $this->config->getAppValue(Application::APP_ID, 'free_prompt_picker_enabled', '1') === '1'; $freePromptPickerEnabled = $this->config->getUserValue($this->userId, Application::APP_ID, 'free_prompt_picker_enabled', '1') === '1'; + + $speechToTextPickerAvailable = $speechToTextAvailable && $this->config->getAppValue(Application::APP_ID, 'speech_to_text_picker_enabled', '1') === '1'; + $speechToTextPickerEnabled = $this->config->getUserValue($this->userId, Application::APP_ID, 'speech_to_text_picker_enabled', '1') === '1'; $userConfig = [ 'assistant_available' => $assistantAvailable, @@ -46,6 +53,8 @@ public function getForm(): TemplateResponse { 'text_to_image_picker_enabled' => $textToImagePickerEnabled, 'free_prompt_picker_available' => $freePromptPickerAvailable, 'free_prompt_picker_enabled' => $freePromptPickerEnabled, + 'speech_to_text_picker_available' => $speechToTextPickerAvailable, + 'speech_to_text_picker_enabled' => $speechToTextPickerEnabled, ]; $this->initialStateService->provideInitialState('config', $userConfig); return new TemplateResponse(Application::APP_ID, 'personalSettings'); diff --git a/src/assistant.js b/src/assistant.js index 7b4b0b21..0257d759 100644 --- a/src/assistant.js +++ b/src/assistant.js @@ -1,4 +1,4 @@ -import { STATUS } from './constants.js' +import { STATUS, TASK_TYPES } from './constants.js' import { linkTo } from '@nextcloud/router' import { getRequestToken } from '@nextcloud/auth' __webpack_nonce__ = btoa(getRequestToken()) // eslint-disable-line @@ -7,7 +7,7 @@ __webpack_public_path__ = linkTo('assistant', 'js/') // eslint-disable-line /** * Creates an assistant modal and return a promise which provides the result * - * OCA.TpAssistant.openAssistantForm({ + * OCA.TPAssistant.openAssistantTextProcessingForm({ * appId: 'my_app_id', * identifier: 'my task identifier', * taskType: 'OCP\\TextProcessing\\FreePromptTaskType', @@ -31,36 +31,34 @@ __webpack_public_path__ = linkTo('assistant', 'js/') // eslint-disable-line * @param {object} params parameters for the assistant * @param {string} params.appId the scheduling app id * @param {string} params.identifier the task identifier - * @param {string} params.taskType the task type class + * @param {string} params.taskType the text processing task type class * @param {string} params.input optional initial input text * @param {boolean} params.isInsideViewer Should be true if this function is called while the Viewer is displayed * @param {boolean} params.closeOnResult If true, the modal will be closed when getting a sync result * @param {Array} params.actionButtons List of extra buttons to show in the assistant result form (only if closeOnResult is false) * @return {Promise} */ -export async function openAssistantForm({ - appId, identifier = '', taskType = null, input = '', - isInsideViewer = undefined, closeOnResult = false, actionButtons = undefined, -}) { +export async function openAssistantTextProcessingForm({ appId, identifier = '', taskType = null, input = '', isInsideViewer = undefined }) { const { default: Vue } = await import(/* webpackChunkName: "vue-lazy" */'vue') - const { default: AssistantModal } = await import(/* webpackChunkName: "assistant-modal-lazy" */'./components/AssistantModal.vue') + const { default: AssistantTextProcessingModal } = await import(/* webpackChunkName: "assistant-modal-lazy" */'./components/AssistantTextProcessingModal.vue') Vue.mixin({ methods: { t, n } }) // fallback to the last used one - const selectedTaskTypeId = taskType ?? (await getLastSelectedTaskType())?.data + const textProcessingTaskTypeId = taskType ?? (await getLastSelectedTaskType())?.data return new Promise((resolve, reject) => { - const modalId = 'assistantModal' + const modalId = 'assistantTextProcessingModal' const modalElement = document.createElement('div') modalElement.id = modalId document.body.append(modalElement) - const View = Vue.extend(AssistantModal) + const View = Vue.extend(AssistantTextProcessingModal) const view = new View({ propsData: { isInsideViewer, input, - selectedTaskTypeId, + taskType: TASK_TYPES.text_generation, + textProcessingTaskTypeId, showScheduleConfirmation: false, showSyncTaskRunning: false, actionButtons, @@ -91,7 +89,7 @@ export async function openAssistantForm({ view.loading = true view.showSyncTaskRunning = true view.input = data.input - view.selectedTaskTypeId = data.taskTypeId + view.textProcessingTaskTypeId = data.taskTypeId runOrScheduleTask(appId, identifier, data.taskTypeId, data.input) .then((response) => { const task = response.data?.task @@ -124,7 +122,7 @@ export async function openAssistantForm({ }) view.$on('cancel-sync-n-schedule', () => { cancelCurrentSyncTask() - scheduleTask(appId, identifier, view.selectedTaskTypeId, view.input) + scheduleTask(appId, identifier, view.textProcessingTaskTypeId, view.input) .then((response) => { view.showSyncTaskRunning = false view.showScheduleConfirmation = true @@ -245,7 +243,10 @@ export function handleNotification(event) { // We use the object type to know if (event.notification.objectType === 'task') { event.cancelAction = true - showResults(event.notification.objectId) + showTextProcessingTaskResult(event.notification.objectId) + } else if (event.notification.objectType === 'speech-to-text-result') { + event.cancelAction = true + showSpeechToTextResult(event.notification) } } @@ -255,14 +256,14 @@ export function handleNotification(event) { * @param {number} taskId the task id to show the result of * @return {Promise} */ -async function showResults(taskId) { +async function showTextProcessingTaskResult(taskId) { const { default: axios } = await import(/* webpackChunkName: "axios-lazy" */'@nextcloud/axios') const { generateOcsUrl } = await import(/* webpackChunkName: "router-lazy" */'@nextcloud/router') const { showError } = await import(/* webpackChunkName: "dialogs-lazy" */'@nextcloud/dialogs') const url = generateOcsUrl('textprocessing/task/{taskId}', { taskId }) axios.get(url).then(response => { console.debug('showing results for task', response.data.ocs.data.task) - openAssistantResult(response.data.ocs.data.task) + openAssistantTaskResult(response.data.ocs.data.task) }).catch(error => { console.error(error) showError(t('assistant', 'This task does not exist or has been cleaned up')) @@ -270,29 +271,88 @@ async function showResults(taskId) { } /** - * Open an assistant modal to show the result of a task + * Show the result of a speech to text transcription + * @param {object} notification the notification object + * @return {Promise} + */ +async function showSpeechToTextResult(notification) { + const { default: Vue } = await import(/* webpackChunkName: "vue-lazy" */'vue') + const { showError } = await import(/* webpackChunkName: "dialogs-lazy" */'@nextcloud/dialogs') + Vue.mixin({ methods: { t, n } }) + + const { generateUrl } = await import(/* webpackChunkName: "router-lazy" */'@nextcloud/router') + const { default: axios } = await import(/* webpackChunkName: "axios-lazy" */'@nextcloud/axios') + + const params = { + params: { + id: notification.objectId, + }, + } + + const url = generateUrl('apps/assistant/stt/transcript') + + axios.get(url, params).then(response => { + console.debug('showing results for stt', response.data) + openAssistantPlainTextResult(response.data, TASK_TYPES.speech_to_text) + }).catch(error => { + console.error(error) + showError(t('assistant', 'This transcript does not exist or has been cleaned up')) + }) +} + +/** + * Open an assistant modal to show a plain text result + * @param {string} result the plain text result to show + * @param {number} taskType the task type + * @return {Promise} + */ +export async function openAssistantPlainTextResult(result, taskType) { + const { default: Vue } = await import(/* webpackChunkName: "vue-lazy" */'vue') + const { default: AssistantPlainTextModal } = await import(/* webpackChunkName: "assistant-modal-lazy" */'./components/AssistantPlainTextModal.vue') + Vue.mixin({ methods: { t, n } }) + + const modalId = 'assistantPlainTextModal' + const modalElement = document.createElement('div') + modalElement.id = modalId + document.body.append(modalElement) + + const View = Vue.extend(AssistantPlainTextModal) + const view = new View({ + propsData: { + output: result, + taskType, + }, + }).$mount(modalElement) + + view.$on('cancel', () => { + view.$destroy() + }) +} + +/** + * Open an assistant modal to show the result of a task * * @param {object} task the task we want to see the result of * @return {Promise} */ -export async function openAssistantResult(task) { +export async function openAssistantTaskResult(task) { const { showError } = await import(/* webpackChunkName: "dialogs-lazy" */'@nextcloud/dialogs') const { default: Vue } = await import(/* webpackChunkName: "vue-lazy" */'vue') - const { default: AssistantModal } = await import(/* webpackChunkName: "assistant-modal-lazy" */'./components/AssistantModal.vue') + const { default: AssistantTextProcessingModal } = await import(/* webpackChunkName: "assistant-modal-lazy" */'./components/AssistantTextProcessingModal.vue') Vue.mixin({ methods: { t, n } }) - const modalId = 'assistantModal' + const modalId = 'assistantTextProcessingModal' const modalElement = document.createElement('div') modalElement.id = modalId document.body.append(modalElement) - const View = Vue.extend(AssistantModal) + const View = Vue.extend(AssistantTextProcessingModal) const view = new View({ propsData: { // isInsideViewer, input: task.input, output: task.output ?? '', - selectedTaskTypeId: task.type, + textProcessingTaskTypeId: task.type, showScheduleConfirmation: false, }, }).$mount(modalElement) @@ -316,7 +376,7 @@ export async function openAssistantResult(task) { view.loading = true view.showSyncTaskRunning = true view.input = data.input - view.selectedTaskTypeId = data.taskTypeId + view.textProcessingTaskTypeId = data.taskTypeId runTask(task.appId, task.identifier, data.taskTypeId, data.input) .then((response) => { // resolve(response.data?.task) @@ -344,7 +404,7 @@ export async function openAssistantResult(task) { }) view.$on('cancel-sync-n-schedule', () => { cancelCurrentSyncTask() - scheduleTask(task.appId, task.identifier, view.selectedTaskTypeId, view.input) + scheduleTask(task.appId, task.identifier, view.textProcessingTaskTypeId, view.input) .then((response) => { view.showSyncTaskRunning = false view.showScheduleConfirmation = true @@ -374,7 +434,7 @@ export async function addAssistantMenuEntry() { }).$mount(menuEntry) view.$on('click', () => { - openAssistantForm({ appId: 'assistant' }) + openAssistantTextProcessingForm({ appId: 'assistant' }) .then(r => { console.debug('scheduled task', r) }) diff --git a/src/components/AdminSettings.vue b/src/components/AdminSettings.vue index bde88ab3..d6efd633 100644 --- a/src/components/AdminSettings.vue +++ b/src/components/AdminSettings.vue @@ -51,6 +51,20 @@ + +
+ {{ t('assistant', 'Speech-to-text smart picker') }} +
+ + + {{ t('assistant', 'To enable this feature, please install a speech-to-text provider.') }} + +
+
+

diff --git a/src/components/AssistantPlainTextModal.vue b/src/components/AssistantPlainTextModal.vue new file mode 100644 index 00000000..9c9e887d --- /dev/null +++ b/src/components/AssistantPlainTextModal.vue @@ -0,0 +1,189 @@ + + + + + + + diff --git a/src/components/AssistantForm.vue b/src/components/AssistantTextProcessingForm.vue similarity index 99% rename from src/components/AssistantForm.vue rename to src/components/AssistantTextProcessingForm.vue index 10e313e1..b072b395 100644 --- a/src/components/AssistantForm.vue +++ b/src/components/AssistantTextProcessingForm.vue @@ -132,7 +132,7 @@ Vue.use(VueClipboard) const FREE_PROMPT_TASK_TYPE_ID = 'OCP\\TextProcessing\\FreePromptTaskType' export default { - name: 'AssistantForm', + name: 'AssistantTextProcessingForm', components: { TaskTypeSelect, NcButton, diff --git a/src/components/AssistantModal.vue b/src/components/AssistantTextProcessingModal.vue similarity index 93% rename from src/components/AssistantModal.vue rename to src/components/AssistantTextProcessingModal.vue index 02170be8..3f28637e 100644 --- a/src/components/AssistantModal.vue +++ b/src/components/AssistantTextProcessingModal.vue @@ -25,12 +25,12 @@ :description="shortInput" :show-close-button="true" @close="onCancel" /> - + +
+ {{ t('assistant', 'Speech-to-text smart picker') }} +
+
@@ -68,6 +75,7 @@ export default { noProvidersAvailable() { return this.state.text_to_image_picker_available === false && this.state.text_processing_available === false + && this.state.speech_to_text_picker_available === false }, }, diff --git a/src/constants.js b/src/constants.js index a0928539..c07c011c 100644 --- a/src/constants.js +++ b/src/constants.js @@ -5,3 +5,8 @@ export const STATUS = { scheduled: 1, unknown: 0, } +export const TASK_TYPES = { + text_generation: 0, + image_generation: 1, + speech_to_text: 2, +} diff --git a/src/main.js b/src/main.js index 44f8b551..8a429731 100644 --- a/src/main.js +++ b/src/main.js @@ -1,4 +1,4 @@ -import { handleNotification, addAssistantMenuEntry, openAssistantForm, openAssistantResult } from './assistant.js' +import { handleNotification, addAssistantMenuEntry, openAssistantTextProcessingForm, openAssistantTaskResult } from './assistant.js' import { subscribe } from '@nextcloud/event-bus' import { loadState } from '@nextcloud/initial-state' @@ -12,9 +12,9 @@ function init() { /** * @namespace */ - OCA.TpAssistant = { - openAssistantForm, - openAssistantResult, + OCA.TPAssistant = { + openAssistantTextProcessingForm, + openAssistantTaskResult, } subscribe('notifications:action:execute', handleNotification) if (loadState('assistant', 'assistant-enabled')) { diff --git a/src/speechToTextReference.js b/src/speechToTextReference.js new file mode 100644 index 00000000..7b95e42c --- /dev/null +++ b/src/speechToTextReference.js @@ -0,0 +1,46 @@ +/** + * @copyright Copyright (c) 2022 Julien Veyssier + * + * @author Julien Veyssier + * @author Anupam Kumar + * + * @license AGPL-3.0-or-later + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + * + */ + +import { registerCustomPickerElement, NcCustomPickerRenderResult } from '@nextcloud/vue/dist/Components/NcRichText.js' +import { linkTo } from '@nextcloud/router' +import { getRequestToken } from '@nextcloud/auth' + +__webpack_nonce__ = btoa(getRequestToken()) // eslint-disable-line +__webpack_public_path__ = linkTo('assistant', 'js/') // eslint-disable-line + +registerCustomPickerElement('assistant_speech_to_text', async (el, { providerId, accessible }) => { + const { default: Vue } = await import(/* webpackChunkName: "vue-lazy" */'vue') + Vue.mixin({ methods: { t, n } }) + const { default: SpeechToTextCustomPickerElement } = await import(/* webpackChunkName: "speech-to-text-picker-lazy" */'./views/SpeechToText/SpeechToTextCustomPickerElement.vue') + const Element = Vue.extend(SpeechToTextCustomPickerElement) + const vueElement = new Element({ + propsData: { + providerId, + accessible, + }, + }).$mount(el) + return new NcCustomPickerRenderResult(vueElement.$el, vueElement) +}, (el, renderResult) => { + console.debug('Stt custom destroy callback. el:', el, 'renderResult:', renderResult) + renderResult.object.$destroy() +}, 'normal') diff --git a/src/speechToTextResultPage.js b/src/speechToTextResultPage.js new file mode 100644 index 00000000..b053f18c --- /dev/null +++ b/src/speechToTextResultPage.js @@ -0,0 +1,14 @@ +import Vue from 'vue' + +import { linkTo } from '@nextcloud/router' +import { getRequestToken } from '@nextcloud/auth' + +import PlainTextResultPage from './views/PlainTextResultPage.vue' + +__webpack_nonce__ = btoa(getRequestToken()) // eslint-disable-line +__webpack_public_path__ = linkTo('stt_helper', 'js/') // eslint-disable-line + +Vue.mixin({ methods: { t, n } }) + +const View = Vue.extend(PlainTextResultPage) +new View().$mount('#assistant-stt-content') diff --git a/src/views/PlainTextResultPage.vue b/src/views/PlainTextResultPage.vue new file mode 100644 index 00000000..ac5909d0 --- /dev/null +++ b/src/views/PlainTextResultPage.vue @@ -0,0 +1,60 @@ + + + + + diff --git a/src/views/SpeechToText/SpeechToTextCustomPickerElement.vue b/src/views/SpeechToText/SpeechToTextCustomPickerElement.vue new file mode 100644 index 00000000..aad48000 --- /dev/null +++ b/src/views/SpeechToText/SpeechToTextCustomPickerElement.vue @@ -0,0 +1,297 @@ + + + + + diff --git a/src/views/TaskResultPage.vue b/src/views/TaskResultPage.vue index 8ba3af0e..43e7f16f 100644 --- a/src/views/TaskResultPage.vue +++ b/src/views/TaskResultPage.vue @@ -11,7 +11,7 @@ v-else-if="showScheduleConfirmation" :description="shortInput" :show-close-button="false" /> - + +
\ No newline at end of file diff --git a/webpack.js b/webpack.js index 51eef96e..7fe050fc 100644 --- a/webpack.js +++ b/webpack.js @@ -18,6 +18,8 @@ webpackConfig.entry = { imageGenerationPage: { import: path.join(__dirname, 'src', 'imageGenerationPage.js'), filename: appId + '-imageGenerationPage.js' }, imageGenerationReference: { import: path.join(__dirname, 'src', 'imageGenerationReference.js'), filename: appId + '-imageGenerationReference.js' }, textGenerationReference: { import: path.join(__dirname, 'src', 'textGenerationReference.js'), filename: appId + '-textGenerationReference.js' }, + speechToTextResultPage: { import: path.join(__dirname, 'src', 'speechToTextResultPage.js'), filename: appId + '-speechToTextResultPage.js' }, + speechToTextReference: { import: path.join(__dirname, 'src', 'speechToTextReference.js'), filename: appId + '-speechToTextReference.js' }, personalSettings: { import: path.join(__dirname, 'src', 'personalSettings.js'), filename: appId + '-personalSettings.js' }, adminSettings: { import: path.join(__dirname, 'src', 'adminSettings.js'), filename: appId + '-adminSettings.js' }, main: { import: path.join(__dirname, 'src', 'main.js'), filename: appId + '-main.js' }, From 4ef39506d27747daba3a981a1541f91261eae7e4 Mon Sep 17 00:00:00 2001 From: MB-Finski Date: Wed, 10 Jan 2024 13:36:12 +0000 Subject: [PATCH 02/10] Fix function params Signed-off-by: MB-Finski --- src/assistant.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/assistant.js b/src/assistant.js index 0257d759..9ba17f4e 100644 --- a/src/assistant.js +++ b/src/assistant.js @@ -38,7 +38,10 @@ __webpack_public_path__ = linkTo('assistant', 'js/') // eslint-disable-line * @param {Array} params.actionButtons List of extra buttons to show in the assistant result form (only if closeOnResult is false) * @return {Promise} */ -export async function openAssistantTextProcessingForm({ appId, identifier = '', taskType = null, input = '', isInsideViewer = undefined }) { +export async function openAssistantTextProcessingForm({ + appId, identifier = '', taskType = null, input = '', + isInsideViewer = undefined, closeOnResult = false, actionButtons = undefined, +}) { const { default: Vue } = await import(/* webpackChunkName: "vue-lazy" */'vue') const { default: AssistantTextProcessingModal } = await import(/* webpackChunkName: "assistant-modal-lazy" */'./components/AssistantTextProcessingModal.vue') Vue.mixin({ methods: { t, n } }) From db5638399a8669536d2752a351cc3183d1cfc909 Mon Sep 17 00:00:00 2001 From: MB-Finski Date: Thu, 11 Jan 2024 09:20:41 +0000 Subject: [PATCH 03/10] Small UI adjustments Signed-off-by: MB-Finski --- src/components/AssistantPlainTextModal.vue | 79 ++++++++++++++++++++-- src/views/PlainTextResultPage.vue | 5 ++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/src/components/AssistantPlainTextModal.vue b/src/components/AssistantPlainTextModal.vue index 9c9e887d..c844443a 100644 --- a/src/components/AssistantPlainTextModal.vue +++ b/src/components/AssistantPlainTextModal.vue @@ -28,7 +28,7 @@ {{ t('assistant', 'This output was generated by AI. Make sure to double-check and adjust.') }} +
+ + {{ t('assistant', 'Copy output') }} + + + + {{ t('assistant', 'Reset') }} + +

@@ -52,14 +69,23 @@