From 58e3e0ea9dda0b4fa96c9913fc34254d2a8697f4 Mon Sep 17 00:00:00 2001 From: Matias De lellis Date: Tue, 4 Jun 2024 12:00:49 -0300 Subject: [PATCH] Adds batch option for face clustering. Issue #712 (Among many others) This improves the performance of clustering with many faces. In my tests with 43 thousand faces, it takes 11,91 minutes to run the full 'face:background_job --cluster-mode' command. Using a batch size of 20 thousand the time is reduced to 4.47 minutes. With 5000, 1.71 minute and with 2000 (Which is the minimum cut it takes only 54 seconds. Against all odds, memory consumption does not increase in any way, but it still has a disadvantage. The clusters... although they do not increase their quantity as much (only 5%), these will generally be smaller (Presumably of higher quality), but there will be more clusters to give them names. Well, this is another advanced option that will not be available in the administrator interface. occ config:app:set facerecognition clustering_batch_size --value='1000' --type=integer --- .../Tasks/CreateClustersTask.php | 26 ++++++++++++++++--- lib/Service/SettingsService.php | 7 +++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/lib/BackgroundJob/Tasks/CreateClustersTask.php b/lib/BackgroundJob/Tasks/CreateClustersTask.php index 212193b2..e173701c 100644 --- a/lib/BackgroundJob/Tasks/CreateClustersTask.php +++ b/lib/BackgroundJob/Tasks/CreateClustersTask.php @@ -153,16 +153,36 @@ private function createClusterIfNeeded(string $userId) { $this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence) ); - $this->logInfo(count($faces) . ' faces found for clustering'); + $facesCount = count($faces); + $this->logInfo('There are ' . $facesCount . ' faces for clustering'); + + $noSlices = 1; + $sliceSize = $facesCount; + + $defaultSlice = $this->settingsService->getClusterigBatchSize(); + if ($defaultSlice > 0) { + // The minimum batch size is 20000 faces + $defaultSlice = max($defaultSlice, 2000); + // The maximun batch size is the faces count. + $defaultSlice = min($defaultSlice, $facesCount); + $noSlices = intval($facesCount / $defaultSlice) + 1; + $sliceSize = ceil($facesCount / $noSlices); + } + + $this->logDebug('We will cluster with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces'); + + $newClusters = []; + for ($i = 0; $i < $noSlices ; $i++) { + $facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize); + $newClusters = array_merge($newClusters, $this->getNewClusters($facesSliced)); + } // Cluster is associative array where key is person ID. // Value is array of face IDs. For old clusters, person IDs are some existing person IDs, // and for new clusters is whatever chinese whispers decides to identify them. // - $currentClusters = $this->getCurrentClusters($faces); - $newClusters = $this->getNewClusters($faces); $this->logInfo(count($newClusters) . ' clusters found after clustering'); // New merge diff --git a/lib/Service/SettingsService.php b/lib/Service/SettingsService.php index 96e59b98..91ac93ad 100644 --- a/lib/Service/SettingsService.php +++ b/lib/Service/SettingsService.php @@ -89,6 +89,9 @@ class SettingsService { const USER_ENABLED_KEY = 'enabled'; // The default is defined by system 'default_enabled' key + const CLUSTERING_BATCH_SIZE_KEY = 'clustering_batch_size'; + const DEFAULT_CLUSTERING_BATCH_SIZE = '-1'; + /** User setting that remember last images checked */ const STALE_IMAGES_LAST_CHECKED_KEY = 'stale_images_last_checked'; const DEFAULT_STALE_IMAGES_LAST_CHECKED = '0'; @@ -312,6 +315,10 @@ public function getDefaultUserEnabled (): bool { return ($enabled === 'true'); } + public function getClusterigBatchSize(): int { + return intval($this->config->getAppValue(Application::APP_NAME, self::CLUSTERING_BATCH_SIZE_KEY, self::DEFAULT_CLUSTERING_BATCH_SIZE)); + } + public function getHandleSharedFiles(): bool { $handle = $this->config->getAppValue(Application::APP_NAME, self::HANDLE_SHARED_FILES_KEY, self::DEFAULT_HANDLE_SHARED_FILES); return ($handle === 'true');