Skip to content

Commit

Permalink
Merge pull request #2 from cosmocode/bot/autofix
Browse files Browse the repository at this point in the history
🤖 Automatic code style fixes
  • Loading branch information
splitbrain authored Sep 13, 2023
2 parents b377ce3 + 7ebc789 commit d145bc5
Show file tree
Hide file tree
Showing 18 changed files with 81 additions and 74 deletions.
6 changes: 3 additions & 3 deletions AIChat.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
class AIChat
{
/** @var int preferUIlanguage config: guess language use, all sources */
const LANG_AUTO_ALL = 0;
public const LANG_AUTO_ALL = 0;
/** @var int preferUIlanguage config: use UI language, all sources */
const LANG_UI_ALL = 1;
public const LANG_UI_ALL = 1;
/** @var int preferUIlanguage config: use UI language, limit sources */
const LANG_UI_LIMITED = 2;
public const LANG_UI_LIMITED = 2;
}
5 changes: 1 addition & 4 deletions Chunk.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,6 @@ public function setScore($score): void
$this->score = $score;
}

/**
* @return string
*/
public function getLanguage(): string
{
return $this->language;
Expand Down Expand Up @@ -182,7 +179,7 @@ protected function determineLanguage()
* @param string $json
* @return Chunk
*/
static public function fromJSON($json)
public static function fromJSON($json)
{
$data = json_decode($json, true);
return new self(
Expand Down
25 changes: 13 additions & 12 deletions Embeddings.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace dokuwiki\plugin\aichat;

use dokuwiki\Extension\PluginInterface;
use dokuwiki\plugin\aichat\Model\AbstractModel;
use dokuwiki\plugin\aichat\Storage\AbstractStorage;
use dokuwiki\Search\Indexer;
Expand All @@ -18,7 +19,7 @@
class Embeddings
{
/** @var int maximum overlap between chunks in tokens */
const MAX_OVERLAP_LEN = 200;
public const MAX_OVERLAP_LEN = 200;

/** @var AbstractModel */
protected $model;
Expand Down Expand Up @@ -70,7 +71,7 @@ public function setLogger(CLI $logger)
*/
public function getTokenEncoder()
{
if ($this->tokenEncoder === null) {
if (!$this->tokenEncoder instanceof Encoder) {
$this->tokenEncoder = new Encoder();
}
return $this->tokenEncoder;
Expand Down Expand Up @@ -108,7 +109,7 @@ public function createNewIndex($skipRE = '', $clear = false)
if ($firstChunk && @filemtime(wikiFN($page)) < $firstChunk->getCreated()) {
// page is older than the chunks we have, reuse the existing chunks
$this->storage->reusePageChunks($page, $chunkID);
if ($this->logger) $this->logger->info("Reusing chunks for $page");
if ($this->logger instanceof CLI) $this->logger->info("Reusing chunks for $page");
} else {
// page is newer than the chunks we have, create new chunks
$this->storage->deletePageChunks($page, $chunkID);
Expand All @@ -134,7 +135,7 @@ protected function createPageChunks($page, $firstChunkID)
$chunkList = [];

$textRenderer = plugin_load('renderer', 'text');
if ($textRenderer) {
if ($textRenderer instanceof PluginInterface) {
global $ID;
$ID = $page;
$text = p_cached_output(wikiFN($page), 'text', $page);
Expand All @@ -149,7 +150,7 @@ protected function createPageChunks($page, $firstChunkID)
try {
$embedding = $this->model->getEmbedding($part);
} catch (\Exception $e) {
if ($this->logger) {
if ($this->logger instanceof CLI) {
$this->logger->error(
'Failed to get embedding for chunk of page {page}: {msg}',
['page' => $page, 'msg' => $e->getMessage()]
Expand All @@ -160,8 +161,8 @@ protected function createPageChunks($page, $firstChunkID)
$chunkList[] = new Chunk($page, $firstChunkID, $part, $embedding);
$firstChunkID++;
}
if ($this->logger) {
if (count($chunkList)) {
if ($this->logger instanceof CLI) {
if ($chunkList !== []) {
$this->logger->success('{id} split into {count} chunks', ['id' => $page, 'count' => count($chunkList)]);
} else {
$this->logger->warning('{id} could not be split into chunks', ['id' => $page]);
Expand All @@ -181,7 +182,7 @@ protected function createPageChunks($page, $firstChunkID)
* @return Chunk[]
* @throws \Exception
*/
public function getSimilarChunks($query, $lang='')
public function getSimilarChunks($query, $lang = '')
{
global $auth;
$vector = $this->model->getEmbedding($query);
Expand All @@ -193,7 +194,7 @@ public function getSimilarChunks($query, $lang='')

$time = microtime(true);
$chunks = $this->storage->getSimilarChunks($vector, $lang, $fetch);
if ($this->logger) {
if ($this->logger instanceof CLI) {
$this->logger->info(
'Fetched {count} similar chunks from store in {time} seconds',
['count' => count($chunks), 'time' => round(microtime(true) - $time, 2)]
Expand Down Expand Up @@ -236,7 +237,7 @@ public function splitIntoChunks($text)
$slen = count($tiktok->encode($sentence));
if ($slen > $this->model->getMaxEmbeddingTokenLength()) {
// sentence is too long, we need to split it further
if ($this->logger) $this->logger->warning('Sentence too long, splitting not implemented yet');
if ($this->logger instanceof CLI) $this->logger->warning('Sentence too long, splitting not implemented yet');
continue;
}

Expand All @@ -251,7 +252,7 @@ public function splitIntoChunks($text)
$chunks[] = $chunk;

// start new chunk with remembered sentences
$chunk = join(' ', $this->sentenceQueue);
$chunk = implode(' ', $this->sentenceQueue);
$chunk .= $sentence;
$chunklen = count($tiktok->encode($chunk));
}
Expand All @@ -274,7 +275,7 @@ protected function rememberSentence($sentence)

// remove oldest sentences from queue until we are below the max overlap
$encoder = $this->getTokenEncoder();
while (count($encoder->encode(join(' ', $this->sentenceQueue))) > self::MAX_OVERLAP_LEN) {
while (count($encoder->encode(implode(' ', $this->sentenceQueue))) > self::MAX_OVERLAP_LEN) {
array_shift($this->sentenceQueue);
}
}
Expand Down
12 changes: 6 additions & 6 deletions Model/AbstractModel.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@

namespace dokuwiki\plugin\aichat\Model;


abstract class AbstractModel {


abstract class AbstractModel
{
/** @var int total tokens used by this instance */
protected $tokensUsed = 0;
/** @var int total cost used by this instance (multiplied by 1000*10000) */
Expand Down Expand Up @@ -43,7 +41,8 @@ abstract public function getMaxContextTokenLength();
*
* @return int
*/
public function getMaxRephrasingTokenLength() {
public function getMaxRephrasingTokenLength()
{
return $this->getMaxContextTokenLength();
}

Expand Down Expand Up @@ -77,7 +76,8 @@ abstract public function getAnswer($messages);
* @return string The new question
* @throws \Exception
*/
public function getRephrasedQuestion($messages) {
public function getRephrasedQuestion($messages)
{
return $this->getAnswer($messages);
}

Expand Down
11 changes: 6 additions & 5 deletions Model/OpenAI/GPT35Turbo.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
class GPT35Turbo extends AbstractModel
{
/** @var int[] real 1K cost multiplied by 10000 to avoid floating point issues, as of 2023-06-14 */
static protected $prices = [
protected static $prices = [
'text-embedding-ada-002' => 1, // $0.0001 per 1k token
'gpt-3.5-turbo' => 15, // $0.0015 per 1k token
'gpt-3.5-turbo-16k' => 30, // $0.003 per 1k token
Expand All @@ -22,14 +22,14 @@ class GPT35Turbo extends AbstractModel
];

/** @var array[] The models and limits for the different use cases */
static protected $setup = [
protected static $setup = [
'embedding' => ['text-embedding-ada-002', 1000], // chunk size
'rephrase' => ['gpt-3.5-turbo', 3500], // rephrasing context size
'chat' => ['gpt-3.5-turbo', 3500], // question context size
];

/** @var int How often to retry a request if it fails */
const MAX_RETRIES = 3;
public const MAX_RETRIES = 3;

/** @var DokuHTTPClient */
protected $http;
Expand Down Expand Up @@ -97,7 +97,8 @@ public function getRephrasedQuestion($messages)
/**
* @internal for checking available models
*/
public function listUpstreamModels() {
public function listUpstreamModels()
{
$url = 'https://api.openai.com/v1/models';
$result = $this->http->get($url);
return $result;
Expand Down Expand Up @@ -167,7 +168,7 @@ protected function request($endpoint, $data, $retry = 0)
}

// update usage statistics
if(isset($result['usage'])) {
if (isset($result['usage'])) {
$price = self::$prices[$data['model']] ?? 0;
$this->tokensUsed += $result['usage']['total_tokens'];
$this->costEstimate += $result['usage']['total_tokens'] * $price;
Expand Down
2 changes: 1 addition & 1 deletion Model/OpenAI/GPT35Turbo16k.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
*/
class GPT35Turbo16K extends GPT35Turbo
{
static protected $setup = [
protected static $setup = [
'embedding' => ['text-embedding-ada-002', 3000],
'rephrase' => ['gpt-3.5-turbo', 3500],
'chat' => ['gpt-3.5-turbo-16k', 6000],
Expand Down
2 changes: 1 addition & 1 deletion Model/OpenAI/GPT4.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
*/
class GPT4 extends GPT35Turbo
{
static protected $setup = [
protected static $setup = [
'embedding' => ['text-embedding-ada-002', 2000],
'rephrase' => ['gpt-4', 3500],
'chat' => ['gpt-4', 3000],
Expand Down
8 changes: 5 additions & 3 deletions Storage/AbstractStorage.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ abstract class AbstractStorage
* @param CLIPlugin $logger
* @return void
*/
public function setLogger($logger) {
public function setLogger($logger)
{
$this->logger = $logger;
}

Expand Down Expand Up @@ -112,7 +113,7 @@ abstract public function getPageChunks($page, $firstChunkID);
* @param int $limit The number of results to return, see note above
* @return Chunk[]
*/
abstract public function getSimilarChunks($vector, $lang='', $limit = 4);
abstract public function getSimilarChunks($vector, $lang = '', $limit = 4);

/**
* Get information about the storage
Expand All @@ -130,7 +131,8 @@ abstract public function statistics();
* @param string $metafile path to the file with the metadata
* @return void
*/
public function dumpTSV($vectorfile, $metafile) {
public function dumpTSV($vectorfile, $metafile)
{
throw new \RuntimeException('Not implemented for current storage');
}
}
4 changes: 2 additions & 2 deletions Storage/PineconeStorage.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ protected function runQuery($endpoint, $data, $method = 'POST')
{
$url = $this->baseurl . $endpoint;

if (is_array($data) && !count($data)) {
if (is_array($data) && $data === []) {
$json = '{}';
} else {
$json = json_encode($data);
Expand Down Expand Up @@ -197,7 +197,7 @@ public function getPageChunks($page, $firstChunkID)
/** @inheritdoc */
public function getSimilarChunks($vector, $lang = '', $limit = 4)
{
$limit = $limit * 2; // we can't check ACLs, so we return more than requested
$limit *= 2; // we can't check ACLs, so we return more than requested

if ($lang) {
$filter = ['language' => ['$eq', $lang]];
Expand Down
20 changes: 12 additions & 8 deletions Storage/SQLiteStorage.php
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
<?php /** @noinspection SqlResolve */
<?php

/** @noinspection SqlResolve */

namespace dokuwiki\plugin\aichat\Storage;

Expand All @@ -16,12 +18,12 @@
class SQLiteStorage extends AbstractStorage
{
/** @var float minimum similarity to consider a chunk a match */
const SIMILARITY_THRESHOLD = 0.75;
public const SIMILARITY_THRESHOLD = 0.75;

/** @var int Number of documents to randomly sample to create the clusters */
const SAMPLE_SIZE = 2000;
public const SAMPLE_SIZE = 2000;
/** @var int The average size of each cluster */
const CLUSTER_SIZE = 400;
public const CLUSTER_SIZE = 400;

/** @var SQLiteDB */
protected $db;
Expand Down Expand Up @@ -138,7 +140,8 @@ public function getSimilarChunks($vector, $lang = '', $limit = 4)
{
$cluster = $this->getCluster($vector, $lang);
if ($this->logger) $this->logger->info(
'Using cluster {cluster} for similarity search', ['cluster' => $cluster]
'Using cluster {cluster} for similarity search',
['cluster' => $cluster]
);

$result = $this->db->queryAll(
Expand Down Expand Up @@ -282,15 +285,15 @@ protected function createLanguageClusters($lang)
static $iterations = 0;
++$iterations;
if ($this->logger) {
$clustercounts = join(',', array_map('count', $clusters));
$clustercounts = implode(',', array_map('count', $clusters));
$this->logger->info('Iteration {iteration}: [{clusters}]', [
'iteration' => $iterations, 'clusters' => $clustercounts
]);
}
}, Cluster::INIT_KMEANS_PLUS_PLUS);

// store the clusters
foreach ($clusters as $clusterID => $cluster) {
foreach ($clusters as $cluster) {
/** @var Cluster $cluster */
$centroid = $cluster->getCoordinates();
$query = 'INSERT INTO clusters (lang, centroid) VALUES (?, ?)';
Expand Down Expand Up @@ -322,7 +325,8 @@ protected function setChunkClusters()
$query = 'UPDATE embeddings SET cluster = ? WHERE id = ?';
$this->db->exec($query, [$cluster, $record['id']]);
if ($this->logger) $this->logger->success(
'Chunk {id} assigned to cluster {cluster}', ['id' => $record['id'], 'cluster' => $cluster]
'Chunk {id} assigned to cluster {cluster}',
['id' => $record['id'], 'cluster' => $cluster]
);
}
$handle->closeCursor();
Expand Down
15 changes: 8 additions & 7 deletions action.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
<?php

use dokuwiki\Extension\ActionPlugin;
use dokuwiki\Extension\EventHandler;
use dokuwiki\Extension\Event;
use dokuwiki\Logger;
use dokuwiki\ErrorHandler;
use dokuwiki\plugin\aichat\Chunk;

Expand All @@ -9,11 +13,10 @@
* @license GPL 2 http://www.gnu.org/licenses/gpl-2.0.html
* @author Andreas Gohr <[email protected]>
*/
class action_plugin_aichat extends \dokuwiki\Extension\ActionPlugin
class action_plugin_aichat extends ActionPlugin
{

/** @inheritDoc */
public function register(Doku_Event_Handler $controller)
public function register(EventHandler $controller)
{
$controller->register_hook('AJAX_CALL_UNKNOWN', 'BEFORE', $this, 'handleQuestion');
}
Expand All @@ -27,7 +30,7 @@ public function register(Doku_Event_Handler $controller)
* @param mixed $param optional parameter passed when event was registered
* @return void
*/
public function handleQuestion(Doku_Event $event, $param)
public function handleQuestion(Event $event, $param)
{
if ($event->data !== 'aichat') return;
$event->preventDefault();
Expand Down Expand Up @@ -67,7 +70,7 @@ public function handleQuestion(Doku_Event $event, $param)
]);

if ($this->getConf('logging')) {
\dokuwiki\Logger::getInstance('aichat')->log(
Logger::getInstance('aichat')->log(
$question,
[
'interpretation' => $result['question'],
Expand All @@ -88,6 +91,4 @@ public function handleQuestion(Doku_Event $event, $param)
]);
}
}

}

Loading

0 comments on commit d145bc5

Please sign in to comment.