Skip to content

Commit

Permalink
Adds batch option for face clustering. Issue #712 (Among many others)
Browse files Browse the repository at this point in the history
This improves the performance of clustering with many faces. In my
tests with 43 thousand faces, it takes 11,91 minutes to run the full
'face:background_job --cluster-mode' command. Using a batch size of 20
thousand the time is reduced to 4.47 minutes. With 5000, 1.71 minute
and with 2000 (Which is the minimum cut it takes only 54 seconds.

Against all odds, memory consumption does not increase in any way,
but it still has a disadvantage. The clusters... although they do not
increase their quantity as much (only 5%), these will generally be
smaller (Presumably of higher quality), but there will be more
clusters to give them names.

Well, this is another advanced option that will not be available in
the administrator interface.

occ config:app:set  facerecognition clustering_batch_size --value='1000' --type=integer
  • Loading branch information
matiasdelellis committed Jun 4, 2024
1 parent b89a898 commit 58e3e0e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 3 deletions.
26 changes: 23 additions & 3 deletions lib/BackgroundJob/Tasks/CreateClustersTask.php
Original file line number Diff line number Diff line change
Expand Up @@ -153,16 +153,36 @@ private function createClusterIfNeeded(string $userId) {
$this->faceMapper->getNonGroupableFaces($userId, $modelId, $min_face_size, $min_confidence)
);

$this->logInfo(count($faces) . ' faces found for clustering');
$facesCount = count($faces);
$this->logInfo('There are ' . $facesCount . ' faces for clustering');

$noSlices = 1;
$sliceSize = $facesCount;

$defaultSlice = $this->settingsService->getClusterigBatchSize();
if ($defaultSlice > 0) {
// The minimum batch size is 20000 faces
$defaultSlice = max($defaultSlice, 2000);
// The maximun batch size is the faces count.
$defaultSlice = min($defaultSlice, $facesCount);
$noSlices = intval($facesCount / $defaultSlice) + 1;
$sliceSize = ceil($facesCount / $noSlices);
}

$this->logDebug('We will cluster with ' . $noSlices . ' batch(es) of ' . $sliceSize . ' faces');

$newClusters = [];
for ($i = 0; $i < $noSlices ; $i++) {
$facesSliced = array_slice($faces, $i * $sliceSize, $sliceSize);
$newClusters = array_merge($newClusters, $this->getNewClusters($facesSliced));
}

// Cluster is associative array where key is person ID.
// Value is array of face IDs. For old clusters, person IDs are some existing person IDs,
// and for new clusters is whatever chinese whispers decides to identify them.
//

$currentClusters = $this->getCurrentClusters($faces);

$newClusters = $this->getNewClusters($faces);
$this->logInfo(count($newClusters) . ' clusters found after clustering');

// New merge
Expand Down
7 changes: 7 additions & 0 deletions lib/Service/SettingsService.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ class SettingsService {
const USER_ENABLED_KEY = 'enabled';
// The default is defined by system 'default_enabled' key

const CLUSTERING_BATCH_SIZE_KEY = 'clustering_batch_size';
const DEFAULT_CLUSTERING_BATCH_SIZE = '-1';

/** User setting that remember last images checked */
const STALE_IMAGES_LAST_CHECKED_KEY = 'stale_images_last_checked';
const DEFAULT_STALE_IMAGES_LAST_CHECKED = '0';
Expand Down Expand Up @@ -312,6 +315,10 @@ public function getDefaultUserEnabled (): bool {
return ($enabled === 'true');
}

public function getClusterigBatchSize(): int {
return intval($this->config->getAppValue(Application::APP_NAME, self::CLUSTERING_BATCH_SIZE_KEY, self::DEFAULT_CLUSTERING_BATCH_SIZE));
}

public function getHandleSharedFiles(): bool {
$handle = $this->config->getAppValue(Application::APP_NAME, self::HANDLE_SHARED_FILES_KEY, self::DEFAULT_HANDLE_SHARED_FILES);
return ($handle === 'true');
Expand Down

0 comments on commit 58e3e0e

Please sign in to comment.