Skip to content

Commit

Permalink
Merge pull request #13 from Wikia/UGC-4625
Browse files Browse the repository at this point in the history
UGC-4625 | Use utf8mb4 client character set for Cargo's DB connection
  • Loading branch information
MrVanosh authored Dec 4, 2023
2 parents 111de7f + 0ec4433 commit df2f597
Showing 1 changed file with 36 additions and 6 deletions.
42 changes: 36 additions & 6 deletions includes/CargoUtils.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
use MediaWiki\Linker\LinkRenderer;
use MediaWiki\Linker\LinkTarget;
use MediaWiki\MediaWikiServices;
use Wikimedia\Rdbms\DatabaseMysqli;
use Wikimedia\Rdbms\IDatabase;

class CargoUtils {

Expand Down Expand Up @@ -92,9 +94,34 @@ public static function getDB() {
} else {
self::$CargoDB = Database::factory( $wgCargoDBtype, $params );
}

// Fandom change: Ensure Cargo DB connections use 4-byte UTF-8 client character set (UGC-4625).
self::setClientCharacterSet( self::$CargoDB );

return self::$CargoDB;
}

/**
* Set the client character set of a database connection handle to 4-byte UTF-8.
* This is necessary because Cargo utilizes functions such as REGEXP_LIKE(),
* which fail if the client character set is "binary".
*
* @param IDatabase $dbw Database connection handle.
*/
private static function setClientCharacterSet( IDatabase $dbw ): void {
if ( $dbw instanceof DatabaseMysqli ) {
// Force open the database connection so that we can obtain the underlying native connection handle.
$dbw->ping();

$ref = new ReflectionMethod( $dbw, 'getBindingHandle' );
$ref->setAccessible( true );

/** @var mysqli $mysqli */
$mysqli = $ref->invoke( $dbw );
$mysqli->set_charset( 'utf8mb4' );
}
}

/**
* Gets a page property for the specified page ID and property name.
*/
Expand Down Expand Up @@ -616,7 +643,7 @@ public static function recreateDBTablesForTemplate(
$tableSchemaString = $tableSchema->toDBString();
}

$dbw = wfGetDB( DB_MASTER );
$dbw = wfGetDB( DB_PRIMARY );
$cdb = self::getDB();

// Cannot run any recreate if a replacement table exists.
Expand Down Expand Up @@ -957,10 +984,13 @@ public static function createTable( $cdb, $tableName, $fieldsInTable, $multipleC
if ( $wgCargoDBRowFormat != null ) {
$createSQL .= " ROW_FORMAT=$wgCargoDBRowFormat";
}
// Fandom edit: set utf-8 character set for Cargo tables.
// Note: this is to bring tables on any dbs created post-UCP in line with those imported
// from Gamepedia, since the database default charset is different between those.
$createSQL .= " CHARACTER SET utf8 COLLATE utf8_unicode_ci";
// Fandom edit: set utf8mb4 character set for Cargo tables (UGC-4625).
// These tables cannot use the binary charset that other MediaWiki tables use
// due to the need to support natural ordering of varchar fields as well as
// SQL functions such as REGEXP_LIKE() that do not support binary fields.
// Historically, these tables were created with the 3-byte utf8 character set,
// which is not sufficient for some characters, such as emoji.
$createSQL .= " CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci";

$cdb->query( $createSQL );

Expand Down Expand Up @@ -1125,7 +1155,7 @@ public static function parseCoordinatesString( $coordinatesString ) {
if ( count( $latAndLonStrings ) != 2 ) {
throw new MWException( "Error parsing coordinates string: \"$coordinatesString\"." );
}
list( $latString, $lonString ) = $latAndLonStrings;
[ $latString, $lonString ] = $latAndLonStrings;

// Handle strings one at a time.
$latIsNegative = false;
Expand Down

0 comments on commit df2f597

Please sign in to comment.