From 7ebc78955c65af90e7ee0afbd07adc15271113ba Mon Sep 17 00:00:00 2001 From: splitbrain Date: Wed, 13 Sep 2023 13:13:03 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Automatic=20code=20style=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AIChat.php | 6 +++--- Chunk.php | 5 +---- Embeddings.php | 25 +++++++++++++------------ Model/AbstractModel.php | 12 ++++++------ Model/OpenAI/GPT35Turbo.php | 11 ++++++----- Model/OpenAI/GPT35Turbo16k.php | 2 +- Model/OpenAI/GPT4.php | 2 +- Storage/AbstractStorage.php | 8 +++++--- Storage/PineconeStorage.php | 4 ++-- Storage/SQLiteStorage.php | 20 ++++++++++++-------- action.php | 15 ++++++++------- cli.php | 7 +++---- conf/default.php | 1 + conf/metadata.php | 2 +- helper.php | 12 ++++++------ lang/languages.php | 1 + syntax/chat.php | 11 ++++++----- syntax/similar.php | 11 +++++------ 18 files changed, 81 insertions(+), 74 deletions(-) diff --git a/AIChat.php b/AIChat.php index fc6f621..2834ef7 100644 --- a/AIChat.php +++ b/AIChat.php @@ -8,9 +8,9 @@ class AIChat { /** @var int preferUIlanguage config: guess language use, all sources */ - const LANG_AUTO_ALL = 0; + public const LANG_AUTO_ALL = 0; /** @var int preferUIlanguage config: use UI language, all sources */ - const LANG_UI_ALL = 1; + public const LANG_UI_ALL = 1; /** @var int preferUIlanguage config: use UI language, limit sources */ - const LANG_UI_LIMITED = 2; + public const LANG_UI_LIMITED = 2; } diff --git a/Chunk.php b/Chunk.php index b5b8aac..1ea082c 100644 --- a/Chunk.php +++ b/Chunk.php @@ -138,9 +138,6 @@ public function setScore($score): void $this->score = $score; } - /** - * @return string - */ public function getLanguage(): string { return $this->language; @@ -182,7 +179,7 @@ protected function determineLanguage() * @param string $json * @return Chunk */ - static public function fromJSON($json) + public static function fromJSON($json) { $data = json_decode($json, true); return new self( diff --git a/Embeddings.php b/Embeddings.php index 9129c23..0245bef 100644 --- a/Embeddings.php +++ b/Embeddings.php @@ -2,6 +2,7 @@ namespace dokuwiki\plugin\aichat; +use dokuwiki\Extension\PluginInterface; use dokuwiki\plugin\aichat\Model\AbstractModel; use dokuwiki\plugin\aichat\Storage\AbstractStorage; use dokuwiki\Search\Indexer; @@ -18,7 +19,7 @@ class Embeddings { /** @var int maximum overlap between chunks in tokens */ - const MAX_OVERLAP_LEN = 200; + public const MAX_OVERLAP_LEN = 200; /** @var AbstractModel */ protected $model; @@ -70,7 +71,7 @@ public function setLogger(CLI $logger) */ public function getTokenEncoder() { - if ($this->tokenEncoder === null) { + if (!$this->tokenEncoder instanceof Encoder) { $this->tokenEncoder = new Encoder(); } return $this->tokenEncoder; @@ -108,7 +109,7 @@ public function createNewIndex($skipRE = '', $clear = false) if ($firstChunk && @filemtime(wikiFN($page)) < $firstChunk->getCreated()) { // page is older than the chunks we have, reuse the existing chunks $this->storage->reusePageChunks($page, $chunkID); - if ($this->logger) $this->logger->info("Reusing chunks for $page"); + if ($this->logger instanceof CLI) $this->logger->info("Reusing chunks for $page"); } else { // page is newer than the chunks we have, create new chunks $this->storage->deletePageChunks($page, $chunkID); @@ -134,7 +135,7 @@ protected function createPageChunks($page, $firstChunkID) $chunkList = []; $textRenderer = plugin_load('renderer', 'text'); - if ($textRenderer) { + if ($textRenderer instanceof PluginInterface) { global $ID; $ID = $page; $text = p_cached_output(wikiFN($page), 'text', $page); @@ -149,7 +150,7 @@ protected function createPageChunks($page, $firstChunkID) try { $embedding = $this->model->getEmbedding($part); } catch (\Exception $e) { - if ($this->logger) { + if ($this->logger instanceof CLI) { $this->logger->error( 'Failed to get embedding for chunk of page {page}: {msg}', ['page' => $page, 'msg' => $e->getMessage()] @@ -160,8 +161,8 @@ protected function createPageChunks($page, $firstChunkID) $chunkList[] = new Chunk($page, $firstChunkID, $part, $embedding); $firstChunkID++; } - if ($this->logger) { - if (count($chunkList)) { + if ($this->logger instanceof CLI) { + if ($chunkList !== []) { $this->logger->success('{id} split into {count} chunks', ['id' => $page, 'count' => count($chunkList)]); } else { $this->logger->warning('{id} could not be split into chunks', ['id' => $page]); @@ -181,7 +182,7 @@ protected function createPageChunks($page, $firstChunkID) * @return Chunk[] * @throws \Exception */ - public function getSimilarChunks($query, $lang='') + public function getSimilarChunks($query, $lang = '') { global $auth; $vector = $this->model->getEmbedding($query); @@ -193,7 +194,7 @@ public function getSimilarChunks($query, $lang='') $time = microtime(true); $chunks = $this->storage->getSimilarChunks($vector, $lang, $fetch); - if ($this->logger) { + if ($this->logger instanceof CLI) { $this->logger->info( 'Fetched {count} similar chunks from store in {time} seconds', ['count' => count($chunks), 'time' => round(microtime(true) - $time, 2)] @@ -236,7 +237,7 @@ public function splitIntoChunks($text) $slen = count($tiktok->encode($sentence)); if ($slen > $this->model->getMaxEmbeddingTokenLength()) { // sentence is too long, we need to split it further - if ($this->logger) $this->logger->warning('Sentence too long, splitting not implemented yet'); + if ($this->logger instanceof CLI) $this->logger->warning('Sentence too long, splitting not implemented yet'); continue; } @@ -251,7 +252,7 @@ public function splitIntoChunks($text) $chunks[] = $chunk; // start new chunk with remembered sentences - $chunk = join(' ', $this->sentenceQueue); + $chunk = implode(' ', $this->sentenceQueue); $chunk .= $sentence; $chunklen = count($tiktok->encode($chunk)); } @@ -274,7 +275,7 @@ protected function rememberSentence($sentence) // remove oldest sentences from queue until we are below the max overlap $encoder = $this->getTokenEncoder(); - while (count($encoder->encode(join(' ', $this->sentenceQueue))) > self::MAX_OVERLAP_LEN) { + while (count($encoder->encode(implode(' ', $this->sentenceQueue))) > self::MAX_OVERLAP_LEN) { array_shift($this->sentenceQueue); } } diff --git a/Model/AbstractModel.php b/Model/AbstractModel.php index 8e4fe12..07dfbcd 100644 --- a/Model/AbstractModel.php +++ b/Model/AbstractModel.php @@ -2,10 +2,8 @@ namespace dokuwiki\plugin\aichat\Model; - -abstract class AbstractModel { - - +abstract class AbstractModel +{ /** @var int total tokens used by this instance */ protected $tokensUsed = 0; /** @var int total cost used by this instance (multiplied by 1000*10000) */ @@ -43,7 +41,8 @@ abstract public function getMaxContextTokenLength(); * * @return int */ - public function getMaxRephrasingTokenLength() { + public function getMaxRephrasingTokenLength() + { return $this->getMaxContextTokenLength(); } @@ -77,7 +76,8 @@ abstract public function getAnswer($messages); * @return string The new question * @throws \Exception */ - public function getRephrasedQuestion($messages) { + public function getRephrasedQuestion($messages) + { return $this->getAnswer($messages); } diff --git a/Model/OpenAI/GPT35Turbo.php b/Model/OpenAI/GPT35Turbo.php index b30cadc..960af03 100644 --- a/Model/OpenAI/GPT35Turbo.php +++ b/Model/OpenAI/GPT35Turbo.php @@ -13,7 +13,7 @@ class GPT35Turbo extends AbstractModel { /** @var int[] real 1K cost multiplied by 10000 to avoid floating point issues, as of 2023-06-14 */ - static protected $prices = [ + protected static $prices = [ 'text-embedding-ada-002' => 1, // $0.0001 per 1k token 'gpt-3.5-turbo' => 15, // $0.0015 per 1k token 'gpt-3.5-turbo-16k' => 30, // $0.003 per 1k token @@ -22,14 +22,14 @@ class GPT35Turbo extends AbstractModel ]; /** @var array[] The models and limits for the different use cases */ - static protected $setup = [ + protected static $setup = [ 'embedding' => ['text-embedding-ada-002', 1000], // chunk size 'rephrase' => ['gpt-3.5-turbo', 3500], // rephrasing context size 'chat' => ['gpt-3.5-turbo', 3500], // question context size ]; /** @var int How often to retry a request if it fails */ - const MAX_RETRIES = 3; + public const MAX_RETRIES = 3; /** @var DokuHTTPClient */ protected $http; @@ -97,7 +97,8 @@ public function getRephrasedQuestion($messages) /** * @internal for checking available models */ - public function listUpstreamModels() { + public function listUpstreamModels() + { $url = 'https://api.openai.com/v1/models'; $result = $this->http->get($url); return $result; @@ -167,7 +168,7 @@ protected function request($endpoint, $data, $retry = 0) } // update usage statistics - if(isset($result['usage'])) { + if (isset($result['usage'])) { $price = self::$prices[$data['model']] ?? 0; $this->tokensUsed += $result['usage']['total_tokens']; $this->costEstimate += $result['usage']['total_tokens'] * $price; diff --git a/Model/OpenAI/GPT35Turbo16k.php b/Model/OpenAI/GPT35Turbo16k.php index 331e119..72b69fb 100644 --- a/Model/OpenAI/GPT35Turbo16k.php +++ b/Model/OpenAI/GPT35Turbo16k.php @@ -9,7 +9,7 @@ */ class GPT35Turbo16K extends GPT35Turbo { - static protected $setup = [ + protected static $setup = [ 'embedding' => ['text-embedding-ada-002', 3000], 'rephrase' => ['gpt-3.5-turbo', 3500], 'chat' => ['gpt-3.5-turbo-16k', 6000], diff --git a/Model/OpenAI/GPT4.php b/Model/OpenAI/GPT4.php index c116542..c2029d4 100644 --- a/Model/OpenAI/GPT4.php +++ b/Model/OpenAI/GPT4.php @@ -9,7 +9,7 @@ */ class GPT4 extends GPT35Turbo { - static protected $setup = [ + protected static $setup = [ 'embedding' => ['text-embedding-ada-002', 2000], 'rephrase' => ['gpt-4', 3500], 'chat' => ['gpt-4', 3000], diff --git a/Storage/AbstractStorage.php b/Storage/AbstractStorage.php index 48b5300..19117b0 100644 --- a/Storage/AbstractStorage.php +++ b/Storage/AbstractStorage.php @@ -21,7 +21,8 @@ abstract class AbstractStorage * @param CLIPlugin $logger * @return void */ - public function setLogger($logger) { + public function setLogger($logger) + { $this->logger = $logger; } @@ -112,7 +113,7 @@ abstract public function getPageChunks($page, $firstChunkID); * @param int $limit The number of results to return, see note above * @return Chunk[] */ - abstract public function getSimilarChunks($vector, $lang='', $limit = 4); + abstract public function getSimilarChunks($vector, $lang = '', $limit = 4); /** * Get information about the storage @@ -130,7 +131,8 @@ abstract public function statistics(); * @param string $metafile path to the file with the metadata * @return void */ - public function dumpTSV($vectorfile, $metafile) { + public function dumpTSV($vectorfile, $metafile) + { throw new \RuntimeException('Not implemented for current storage'); } } diff --git a/Storage/PineconeStorage.php b/Storage/PineconeStorage.php index 23ebe17..c04c437 100644 --- a/Storage/PineconeStorage.php +++ b/Storage/PineconeStorage.php @@ -45,7 +45,7 @@ protected function runQuery($endpoint, $data, $method = 'POST') { $url = $this->baseurl . $endpoint; - if (is_array($data) && !count($data)) { + if (is_array($data) && $data === []) { $json = '{}'; } else { $json = json_encode($data); @@ -197,7 +197,7 @@ public function getPageChunks($page, $firstChunkID) /** @inheritdoc */ public function getSimilarChunks($vector, $lang = '', $limit = 4) { - $limit = $limit * 2; // we can't check ACLs, so we return more than requested + $limit *= 2; // we can't check ACLs, so we return more than requested if ($lang) { $filter = ['language' => ['$eq', $lang]]; diff --git a/Storage/SQLiteStorage.php b/Storage/SQLiteStorage.php index 9c61ec3..22acbf3 100644 --- a/Storage/SQLiteStorage.php +++ b/Storage/SQLiteStorage.php @@ -1,4 +1,6 @@ -getCluster($vector, $lang); if ($this->logger) $this->logger->info( - 'Using cluster {cluster} for similarity search', ['cluster' => $cluster] + 'Using cluster {cluster} for similarity search', + ['cluster' => $cluster] ); $result = $this->db->queryAll( @@ -282,7 +285,7 @@ protected function createLanguageClusters($lang) static $iterations = 0; ++$iterations; if ($this->logger) { - $clustercounts = join(',', array_map('count', $clusters)); + $clustercounts = implode(',', array_map('count', $clusters)); $this->logger->info('Iteration {iteration}: [{clusters}]', [ 'iteration' => $iterations, 'clusters' => $clustercounts ]); @@ -290,7 +293,7 @@ protected function createLanguageClusters($lang) }, Cluster::INIT_KMEANS_PLUS_PLUS); // store the clusters - foreach ($clusters as $clusterID => $cluster) { + foreach ($clusters as $cluster) { /** @var Cluster $cluster */ $centroid = $cluster->getCoordinates(); $query = 'INSERT INTO clusters (lang, centroid) VALUES (?, ?)'; @@ -322,7 +325,8 @@ protected function setChunkClusters() $query = 'UPDATE embeddings SET cluster = ? WHERE id = ?'; $this->db->exec($query, [$cluster, $record['id']]); if ($this->logger) $this->logger->success( - 'Chunk {id} assigned to cluster {cluster}', ['id' => $record['id'], 'cluster' => $cluster] + 'Chunk {id} assigned to cluster {cluster}', + ['id' => $record['id'], 'cluster' => $cluster] ); } $handle->closeCursor(); diff --git a/action.php b/action.php index 25e5dfa..9354dd2 100644 --- a/action.php +++ b/action.php @@ -1,5 +1,9 @@ */ -class action_plugin_aichat extends \dokuwiki\Extension\ActionPlugin +class action_plugin_aichat extends ActionPlugin { - /** @inheritDoc */ - public function register(Doku_Event_Handler $controller) + public function register(EventHandler $controller) { $controller->register_hook('AJAX_CALL_UNKNOWN', 'BEFORE', $this, 'handleQuestion'); } @@ -27,7 +30,7 @@ public function register(Doku_Event_Handler $controller) * @param mixed $param optional parameter passed when event was registered * @return void */ - public function handleQuestion(Doku_Event $event, $param) + public function handleQuestion(Event $event, $param) { if ($event->data !== 'aichat') return; $event->preventDefault(); @@ -67,7 +70,7 @@ public function handleQuestion(Doku_Event $event, $param) ]); if ($this->getConf('logging')) { - \dokuwiki\Logger::getInstance('aichat')->log( + Logger::getInstance('aichat')->log( $question, [ 'interpretation' => $result['question'], @@ -88,6 +91,4 @@ public function handleQuestion(Doku_Event $event, $param) ]); } } - } - diff --git a/cli.php b/cli.php index 18519fb..cd5f6ce 100644 --- a/cli.php +++ b/cli.php @@ -7,7 +7,6 @@ use splitbrain\phpcli\Options; use splitbrain\phpcli\TableFormatter; - /** * DokuWiki Plugin aichat (CLI Component) * @@ -43,7 +42,9 @@ protected function setup(Options $options) $options->registerOption( 'clear', 'Clear all existing embeddings before creating new ones', - 'c', false, 'embed' + 'c', + false, + 'embed' ); $options->registerCommand('maintenance', 'Run storage maintenance. Refert to the documentation for details.'); @@ -68,7 +69,6 @@ protected function setup(Options $options) ' Not supported on all storages.'); $options->registerArgument('vector.tsv', 'The vector file', false, 'tsv'); $options->registerArgument('meta.tsv', 'The meta file', false, 'tsv'); - } /** @inheritDoc */ @@ -76,7 +76,6 @@ protected function main(Options $options) { ini_set('memory_limit', -1); switch ($options->getCmd()) { - case 'embed': $this->createEmbeddings($options->getOpt('clear')); break; diff --git a/conf/default.php b/conf/default.php index 6bbd58d..612129d 100644 --- a/conf/default.php +++ b/conf/default.php @@ -1,4 +1,5 @@ */ - $meta['openaikey'] = array('string'); $meta['openaiorg'] = array('string'); diff --git a/helper.php b/helper.php index 8bb9bfd..996ebdb 100644 --- a/helper.php +++ b/helper.php @@ -1,5 +1,6 @@ */ -class helper_plugin_aichat extends \dokuwiki\Extension\Plugin +class helper_plugin_aichat extends Plugin { /** @var CLIPlugin $logger */ protected $logger; @@ -65,7 +66,7 @@ public function userMayAccess() */ public function getModel() { - if ($this->model === null) { + if (!$this->model instanceof AbstractModel) { $class = '\\dokuwiki\\plugin\\aichat\\Model\\' . $this->getConf('model'); if (!class_exists($class)) { @@ -88,7 +89,7 @@ public function getModel() */ public function getEmbeddings() { - if ($this->embeddings === null) { + if (!$this->embeddings instanceof Embeddings) { $this->embeddings = new Embeddings($this->getModel(), $this->getStorage()); if ($this->logger) { $this->embeddings->setLogger($this->logger); @@ -105,7 +106,7 @@ public function getEmbeddings() */ public function getStorage() { - if ($this->storage === null) { + if (!$this->storage instanceof AbstractStorage) { if ($this->getConf('pinecone_apikey')) { $this->storage = new PineconeStorage(); } else { @@ -238,7 +239,7 @@ protected function getPrompt($type, $vars = []) { $template = file_get_contents($this->localFN('prompt_' . $type)); - $replace = array(); + $replace = []; foreach ($vars as $key => $val) { $replace['{{' . strtoupper($key) . '}}'] = $val; } @@ -282,4 +283,3 @@ public function getLanguageLimit() } } } - diff --git a/lang/languages.php b/lang/languages.php index 38ae12e..69e7d68 100644 --- a/lang/languages.php +++ b/lang/languages.php @@ -1,4 +1,5 @@ 'Afar', 'ab' => 'Abkhaz', diff --git a/syntax/chat.php b/syntax/chat.php index 3a4355e..74b3306 100644 --- a/syntax/chat.php +++ b/syntax/chat.php @@ -1,12 +1,14 @@ */ -class syntax_plugin_aichat_chat extends \dokuwiki\Extension\SyntaxPlugin +class syntax_plugin_aichat_chat extends SyntaxPlugin { /** @inheritDoc */ public function getType() @@ -50,9 +52,9 @@ public function render($format, Doku_Renderer $renderer, $data) return false; } - if($this->getConf('restricted')) $renderer->nocache(); + if ($this->getConf('restricted')) $renderer->nocache(); $helper = plugin_load('helper', 'aichat'); - if(!$helper->userMayAccess()) { + if (!$helper->userMayAccess()) { return true; } @@ -67,7 +69,7 @@ public function render($format, Doku_Renderer $renderer, $data) $opts = [ 'label' => $this->getLang('title'), ]; - if(in_array('float', $data['params'])) $opts['class'] = 'float'; + if (in_array('float', $data['params'])) $opts['class'] = 'float'; $html = '' . $html . ''; } @@ -76,4 +78,3 @@ public function render($format, Doku_Renderer $renderer, $data) return true; } } - diff --git a/syntax/similar.php b/syntax/similar.php index 1f83782..db2d6c9 100644 --- a/syntax/similar.php +++ b/syntax/similar.php @@ -1,5 +1,6 @@ */ -class syntax_plugin_aichat_similar extends \dokuwiki\Extension\SyntaxPlugin +class syntax_plugin_aichat_similar extends SyntaxPlugin { /** @inheritDoc */ public function getType() @@ -54,10 +55,10 @@ public function render($mode, Doku_Renderer $renderer, $data) $pages = (new Indexer())->getPages(); $pos = array_search($id, $pages); - if($pos === false) return true; + if ($pos === false) return true; $storage = $helper->getStorage(); - $chunks = $storage->getPageChunks($id, $pos*100); + $chunks = $storage->getPageChunks($id, $pos * 100); $similar = []; foreach ($chunks as $chunk) { $similar += $storage->getSimilarChunks($chunk->getEmbedding(), 10); @@ -72,13 +73,12 @@ public function render($mode, Doku_Renderer $renderer, $data) return $b->getScore() <=> $a->getScore(); }); - if(!$similar) return true; + if (!$similar) return true; $similar = array_slice($similar, 0, 5); $renderer->listu_open(); foreach ($similar as $chunk) { - /** @var Chunk $chunk */ $renderer->listitem_open(1); $renderer->listcontent_open(); $renderer->internallink($chunk->getPage(), null, null, false, 'navigation'); @@ -90,4 +90,3 @@ public function render($mode, Doku_Renderer $renderer, $data) return true; } } -