Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add stt smart picker #27

Merged
merged 10 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/psalm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ jobs:
php-version: 8.2
coverage: none
ini-file: development
extensions: mbstring, iconv, fileinfo, intl, sqlite, pdo_sqlite, gd, zip
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

Expand Down
1 change: 1 addition & 0 deletions appinfo/info.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ include text processing providers to:
<screenshot>https://github.com/nextcloud/assistant/raw/main/img/screenshot3.jpg</screenshot>
<background-jobs>
<job>OCA\TpAssistant\Cron\CleanupImageGenerations</job>
<job>OCA\TpAssistant\Cron\CleanupTranscriptions</job>
</background-jobs>
<commands>
<command>OCA\TpAssistant\Command\CleanupImageGenerations</command>
Expand Down
5 changes: 5 additions & 0 deletions appinfo/routes.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,10 @@
['name' => 'FreePrompt#getPromptHistory', 'url' => '/f/prompt_history', 'verb' => 'GET'],
['name' => 'FreePrompt#getOutputs', 'url' => '/f/get_outputs', 'verb' => 'GET'],
['name' => 'FreePrompt#cancelGeneration', 'url' => '/f/cancel_generation', 'verb' => 'POST'],

['name' => 'SpeechToText#getResultPage', 'url' => '/stt/resultPage', 'verb' => 'GET'],
['name' => 'SpeechToText#getTranscript', 'url' => '/stt/transcript', 'verb' => 'GET'],
['name' => 'SpeechToText#transcribeAudio', 'url' => '/stt/transcribeAudio', 'verb' => 'POST'],
['name' => 'SpeechToText#transcribeFile', 'url' => '/stt/transcribeFile', 'verb' => 'POST'],
],
];
12 changes: 12 additions & 0 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@

use OCA\TpAssistant\Listener\BeforeTemplateRenderedListener;
use OCA\TpAssistant\Listener\FreePrompt\FreePromptReferenceListener;
use OCA\TpAssistant\Listener\SpeechToText\SpeechToTextReferenceListener;
use OCA\TpAssistant\Listener\SpeechToText\SpeechToTextResultListener;
use OCA\TpAssistant\Listener\TaskFailedListener;
use OCA\TpAssistant\Listener\TaskSuccessfulListener;
use OCA\TpAssistant\Listener\Text2Image\Text2ImageReferenceListener;
use OCA\TpAssistant\Listener\Text2Image\Text2ImageResultListener;
use OCA\TpAssistant\Notification\Notifier;
use OCA\TpAssistant\Reference\FreePromptReferenceProvider;
use OCA\TpAssistant\Reference\SpeechToTextReferenceProvider;
use OCA\TpAssistant\Reference\Text2ImageReferenceProvider;
use OCP\AppFramework\App;
use OCP\AppFramework\Bootstrap\IBootContext;
Expand All @@ -18,6 +21,8 @@
use OCP\AppFramework\Bootstrap\IRegistrationContext;
use OCP\AppFramework\Http\Events\BeforeTemplateRenderedEvent;
use OCP\Collaboration\Reference\RenderReferenceEvent;
use OCP\SpeechToText\Events\TranscriptionFailedEvent;
use OCP\SpeechToText\Events\TranscriptionSuccessfulEvent;
use OCP\TextProcessing\Events\TaskFailedEvent as TextTaskFailedEvent;
use OCP\TextProcessing\Events\TaskSuccessfulEvent as TextTaskSuccessfulEvent;
use OCP\TextToImage\Events\TaskFailedEvent as TextToImageTaskFailedEvent;
Expand All @@ -32,9 +37,11 @@ class Application extends App implements IBootstrap {
public const DEFAULT_MAX_IMAGE_GENERATION_IDLE_TIME = 60 * 60 * 24 * 90; // 90 days
public const DEFAULT_TEXT_GENERATION_STORAGE_TIME = 60 * 60 * 24 * 90; // 90 days
public const IMAGE_FOLDER = 'generated_images';
public const SPEECH_TO_TEXT_REC_FOLDER = 'stt_recordings';

public const TASK_TYPE_TEXT_GEN = 0;
public const TASK_TYPE_TEXT_TO_IMAGE = 1;
public const TASK_TYPE_SPEECH_TO_TEXT = 2;

public function __construct(array $urlParams = []) {
parent::__construct(self::APP_ID, $urlParams);
Expand All @@ -43,11 +50,16 @@ public function __construct(array $urlParams = []) {
public function register(IRegistrationContext $context): void {
$context->registerReferenceProvider(Text2ImageReferenceProvider::class);
$context->registerReferenceProvider(FreePromptReferenceProvider::class);
$context->registerReferenceProvider(SpeechToTextReferenceProvider::class);

$context->registerEventListener(RenderReferenceEvent::class, Text2ImageReferenceListener::class);
$context->registerEventListener(RenderReferenceEvent::class, FreePromptReferenceListener::class);
$context->registerEventListener(RenderReferenceEvent::class, SpeechToTextReferenceListener::class);

$context->registerEventListener(TextToImageTaskSuccessfulEvent::class, Text2ImageResultListener::class);
$context->registerEventListener(TextToImageTaskFailedEvent::class, Text2ImageResultListener::class);
$context->registerEventListener(TranscriptionSuccessfulEvent::class, SpeechToTextResultListener::class);
$context->registerEventListener(TranscriptionFailedEvent::class, SpeechToTextResultListener::class);

$context->registerEventListener(BeforeTemplateRenderedEvent::class, BeforeTemplateRenderedListener::class);

Expand Down
240 changes: 240 additions & 0 deletions lib/Controller/SpeechToTextController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
<?php
/**
* @copyright Copyright (c) 2023 Anupam Kumar <[email protected]>
*
* @author Anupam Kumar <[email protected]>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

namespace OCA\TpAssistant\Controller;

use DateTime;
use Exception;
use InvalidArgumentException;
use OCA\TpAssistant\AppInfo\Application;
use OCA\TpAssistant\Db\SpeechToText\TranscriptMapper;
use OCA\TpAssistant\Service\SpeechToText\SpeechToTextService;
use OCP\AppFramework\Controller;
use OCP\AppFramework\Db\DoesNotExistException;
use OCP\AppFramework\Db\MultipleObjectsReturnedException;
use OCP\AppFramework\Http;
use OCP\AppFramework\Http\Attribute\AnonRateLimit;
use OCP\AppFramework\Http\Attribute\NoAdminRequired;
use OCP\AppFramework\Http\Attribute\NoCSRFRequired;
use OCP\AppFramework\Http\Attribute\UserRateLimit;
use OCP\AppFramework\Http\DataResponse;
use OCP\AppFramework\Http\TemplateResponse;
use OCP\AppFramework\Services\IInitialState;
use OCP\Files\NotFoundException;
use OCP\Files\NotPermittedException;
use OCP\IL10N;
use OCP\IRequest;
use OCP\PreConditionNotMetException;
use Psr\Log\LoggerInterface;
use RuntimeException;

class SpeechToTextController extends Controller {

public function __construct(
string $appName,
IRequest $request,
private SpeechToTextService $service,
private LoggerInterface $logger,
private IL10N $l10n,
private TranscriptMapper $transcriptMapper,
private IInitialState $initialState,
private ?string $userId,
) {
parent::__construct($appName, $request);
}

/**
* @param int $id
* @return TemplateResponse
*/
#[NoAdminRequired]
#[NoCSRFRequired]
#[UserRateLimit(limit: 10, period: 60)]
#[AnonRateLimit(limit: 2, period: 60)]
public function getResultPage(int $id): TemplateResponse {
$response = new TemplateResponse(Application::APP_ID, 'speechToTextResultPage');
try {
$initData = [
'status' => 'success',
'result' => $this->internalGetTranscript($id),
'taskType' => Application::TASK_TYPE_SPEECH_TO_TEXT,
];
} catch (Exception $e) {
$initData = [
'status' => 'failure',
'message' => $e->getMessage(),
];
$response->setStatus(intval($e->getCode()));
}
$this->initialState->provideInitialState('plain-text-result', $initData);
return $response;
}

/**
* @param int $id Transcript ID
* @return DataResponse
*/
#[NoAdminRequired]
public function getTranscript(int $id): DataResponse {
try {
return new DataResponse($this->internalGetTranscript($id));
} catch (Exception $e) {
return new DataResponse($e->getMessage(), intval($e->getCode()));
}
}

/**
* Internal function to get transcript and throw a common exception
*
* @param integer $id
* @return string
*/
private function internalGetTranscript(int $id): string {
try {
$transcriptEntity = $this->transcriptMapper->find($id, $this->userId);
$transcript = $transcriptEntity->getTranscript();

$transcriptEntity->setLastAccessed(new DateTime());
$this->transcriptMapper->update($transcriptEntity);

return trim($transcript);
} catch (InvalidArgumentException $e) {
$this->logger->error(
'Invalid argument in transcript access time update call: ' . $e->getMessage(),
['app' => Application::APP_ID],
);
throw new Exception(
$this->l10n->t('Error in transcript access time update call'),
Http::STATUS_INTERNAL_SERVER_ERROR,
);
} catch (MultipleObjectsReturnedException $e) {
$this->logger->error('Multiple transcripts found: ' . $e->getMessage(), ['app' => Application::APP_ID]);
throw new Exception($this->l10n->t('Multiple transcripts found'), Http::STATUS_BAD_REQUEST);
} catch (DoesNotExistException $e) {
throw new Exception($this->l10n->t('Transcript not found'), Http::STATUS_NOT_FOUND);
} catch (Exception $e) {
$this->logger->error('Error: ' . $e->getMessage(), ['app' => Application::APP_ID]);
throw new Exception(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR,
);
}
}

/**
* @return DataResponse
*/
#[NoAdminRequired]
public function transcribeAudio(): DataResponse {
$audioData = $this->request->getUploadedFile('audioData');

if ($audioData['error'] !== 0) {
return new DataResponse('Error in audio file upload: ' . $audioData['error'], Http::STATUS_BAD_REQUEST);
}

if (empty($audioData)) {
return new DataResponse('Invalid audio data received', Http::STATUS_BAD_REQUEST);
}

if ($audioData['type'] !== 'audio/mp3' && $audioData['type'] !== 'audio/mpeg') {
return new DataResponse('Audio file must be in MP3 format', Http::STATUS_BAD_REQUEST);
}

try {
$this->service->transcribeAudio($audioData['tmp_name'], $this->userId);
return new DataResponse('ok');
} catch (RuntimeException $e) {
$this->logger->error(
'Runtime exception: ' . $e->getMessage(),
['app' => Application::APP_ID]
);
return new DataResponse(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
} catch (PreConditionNotMetException $e) {
$this->logger->error('No Speech-to-Text provider found: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('No Speech-to-Text provider found, install one from the app store to use this feature.'),
Http::STATUS_BAD_REQUEST
);
} catch (InvalidArgumentException $e) {
$this->logger->error('InvalidArgumentException: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
}
}

/**
* @param string $path Nextcloud file path
* @return DataResponse
*/
#[NoAdminRequired]
public function transcribeFile(string $path): DataResponse {
if ($path === '') {
return new DataResponse('Empty file path received', Http::STATUS_BAD_REQUEST);
}

try {
$this->service->transcribeFile($path, $this->userId);
return new DataResponse('ok');
} catch (NotFoundException $e) {
$this->logger->error('Audio file not found: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('Audio file not found.'),
Http::STATUS_NOT_FOUND
);
} catch (RuntimeException $e) {
$this->logger->error(
'Runtime exception: ' . $e->getMessage(),
['app' => Application::APP_ID]
);
return new DataResponse(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
} catch (NotPermittedException $e) {
$this->logger->error(
'No permission to create recording file/directory: ' . $e->getMessage(),
['app' => Application::APP_ID]
);
return new DataResponse(
$this->l10n->t('No permission to create recording file/directory, contact your sysadmin to resolve this issue.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
} catch (PreConditionNotMetException $e) {
$this->logger->error('No Speech-to-Text provider found: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('No Speech-to-Text provider found, install one from the app store to use this feature.'),
Http::STATUS_BAD_REQUEST
);
} catch (InvalidArgumentException $e) {
$this->logger->error('InvalidArgumentException: ' . $e->getMessage(), ['app' => Application::APP_ID]);
return new DataResponse(
$this->l10n->t('Some internal error occurred. Contact your sysadmin for more info.'),
Http::STATUS_INTERNAL_SERVER_ERROR
);
}
}
}
45 changes: 45 additions & 0 deletions lib/Cron/CleanupTranscriptions.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?php

declare(strict_types=1);

/**
* @copyright Copyright (c) 2023 Anupam Kumar <[email protected]>
*
* @author Anupam Kumar <[email protected]>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/

namespace OCA\TpAssistant\Cron;

use OCA\TpAssistant\Db\SpeechToText\TranscriptMapper;
use OCP\AppFramework\Utility\ITimeFactory;
use OCP\BackgroundJob\TimedJob;

class CleanupTranscriptions extends TimedJob {
public function __construct(
ITimeFactory $time,
private TranscriptMapper $transcriptMapper,
) {
parent::__construct($time);
$this->setInterval(60 * 60 * 24); // 24 hours
}

protected function run($argument) {
$this->transcriptMapper->cleanupTranscriptions();
}
}
Loading
Loading