diff options
author | jenkins-bot <jenkins-bot@gerrit.wikimedia.org> | 2022-01-10 18:34:25 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@wikimedia.org> | 2022-01-10 18:34:25 +0000 |
commit | fb80b943b67a17a85111692fa4ac675f148f05e6 (patch) | |
tree | 19918ca5d3c85b8b139003ac2e15e5885937d326 | |
parent | 51fbd273ab893a7e66aa3dc3afbe1ac4b2a552a4 (diff) | |
parent | 1aecb692f64b3166cbaf1a7de9d85790ebc8759f (diff) |
Merge "Write to multiple categorylinks tables on update"
-rw-r--r-- | includes/DefaultSettings.php | 11 | ||||
-rw-r--r-- | includes/deferred/LinksUpdate.php | 15 | ||||
-rw-r--r-- | includes/deferred/LinksUpdate/CategoryLinksTable.php | 84 | ||||
-rw-r--r-- | includes/deferred/LinksUpdate/LinksTable.php | 9 | ||||
-rw-r--r-- | includes/deferred/LinksUpdate/LinksTableGroup.php | 70 |
5 files changed, 154 insertions, 35 deletions
diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 783831b72e8b..8e76c3605cf3 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -8858,6 +8858,17 @@ $wgCategoryPagingLimit = 200; $wgCategoryCollation = 'uppercase'; /** + * Additional category collations to store during LinksUpdate. This can be used + * to perform online migration of categories from one collation to another. An + * array of associative arrays each having the following keys: + * - table: (string) The table name + * - collation: (string) The collation to use for cl_sortkey + * - fakeCollation: (string) The collation name to insert into cl_collation + * @since 1.38 + */ +$wgTempCategoryCollations = []; + +/** * Array holding default tracking category names. * * Array contains the system messages for each tracking category. diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index d00abb6f7af5..0abd53b681ee 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -143,14 +143,17 @@ class LinksUpdate extends DataUpdate { $this->mRecursive = $recursive; $services = MediaWikiServices::getInstance(); + $config = $services->getMainConfig(); $this->tableFactory = new LinksTableGroup( $services->getObjectFactory(), $services->getDBLoadBalancerFactory(), + $services->getCollationFactory(), $page, - $services->getMainConfig()->get( 'UpdateRowsPerQuery' ), + $config->get( 'UpdateRowsPerQuery' ), function ( $table, $rows ) { $this->getHookRunner()->onLinksUpdateAfterInsert( $this, $table, $rows ); - } + }, + $config->get( 'TempCategoryCollations' ) ); // TODO: this does not have to be called in LinksDeletionUpdate $this->tableFactory->setParserOutput( $parserOutput ); @@ -190,6 +193,14 @@ class LinksUpdate extends DataUpdate { return; } + // Do any setup that needs to be done prior to acquiring the lock + // Calling getAll() here has the side-effect of calling + // LinksUpdateBatch::setParserOutput() on all subclasses, allowing + // those methods to also do pre-lock operations. + foreach ( $this->tableFactory->getAll() as $batch ) { + $batch->beforeLock(); + } + if ( $this->ticket ) { // Make sure all links update threads see the changes of each other. // This handles the case when updates have to batched into several COMMITs. diff --git a/includes/deferred/LinksUpdate/CategoryLinksTable.php b/includes/deferred/LinksUpdate/CategoryLinksTable.php index 2643d6259fec..505623a132b5 100644 --- a/includes/deferred/LinksUpdate/CategoryLinksTable.php +++ b/includes/deferred/LinksUpdate/CategoryLinksTable.php @@ -2,7 +2,7 @@ namespace MediaWiki\Deferred\LinksUpdate; -use MediaWiki\Collation\CollationFactory; +use Collation; use MediaWiki\DAO\WikiAwareEntity; use MediaWiki\Languages\LanguageConverterFactory; use MediaWiki\Page\PageReferenceValue; @@ -24,7 +24,8 @@ use Title; class CategoryLinksTable extends TitleLinksTable { /** * @var array Associative array of new links, with the category name in the - * key and the sort key prefix in the value + * key. The value is a list consisting of the sort key prefix and the sort + * key. */ private $newLinks = []; @@ -43,6 +44,12 @@ class CategoryLinksTable extends TitleLinksTable { /** @var string The collation name for cl_collation */ private $collationName; + /** @var string The table name */ + private $tableName = 'categorylinks'; + + /** @var bool */ + private $isTempTable; + /** @var string The category type, which depends on the source page */ private $categoryType; @@ -52,17 +59,31 @@ class CategoryLinksTable extends TitleLinksTable { /** @var WikiPageFactory */ private $wikiPageFactory; + /** + * @param LanguageConverterFactory $converterFactory + * @param NamespaceInfo $namespaceInfo + * @param WikiPageFactory $wikiPageFactory + * @param Collation $collation + * @param string $collationName + * @param string $tableName + * @param bool $isTempTable + */ public function __construct( LanguageConverterFactory $converterFactory, - CollationFactory $collationFactory, NamespaceInfo $namespaceInfo, - WikiPageFactory $wikiPageFactory + WikiPageFactory $wikiPageFactory, + Collation $collation, + $collationName, + $tableName, + $isTempTable ) { $this->languageConverter = $converterFactory->getLanguageConverter(); - $this->collation = $collationFactory->getCategoryCollation(); - $this->collationName = $collationFactory->getDefaultCollationName(); $this->namespaceInfo = $namespaceInfo; $this->wikiPageFactory = $wikiPageFactory; + $this->collation = $collation; + $this->collationName = $collationName; + $this->tableName = $tableName; + $this->isTempTable = $isTempTable; } /** @@ -75,16 +96,32 @@ class CategoryLinksTable extends TitleLinksTable { public function setParserOutput( ParserOutput $parserOutput ) { $this->newLinks = []; - foreach ( $parserOutput->getCategories() as $name => $sortKey ) { - // If the sortkey is longer then 255 bytes, it is truncated by DB, and then doesn't match - // when comparing existing vs current categories, causing T27254. - $sortKey = mb_strcut( $sortKey, 0, 255 ); - $this->newLinks[(string)$name] = $sortKey; + $sourceTitle = Title::castFromPageIdentity( $this->getSourcePage() ); + $sortKeyInputs = []; + foreach ( $parserOutput->getCategories() as $name => $sortKeyPrefix ) { + // If the sort key is longer then 255 bytes, it is truncated by DB, + // and then doesn't match when comparing existing vs current + // categories, causing T27254. + $sortKeyPrefix = mb_strcut( $sortKeyPrefix, 0, 255 ); + + $targetTitle = Title::makeTitleSafe( NS_CATEGORY, $name ); + $this->languageConverter->findVariantLink( $name, $targetTitle, true ); + + // Treat custom sort keys as a prefix, so that if multiple + // things are forced to sort as '*' or something, they'll + // sort properly in the category rather than in page_id + // order or such. + $sortKeyInputs[$name] = $sourceTitle->getCategorySortkey( $sortKeyPrefix ); + $this->newLinks[$name] = [ $sortKeyPrefix ]; + } + $sortKeys = $this->collation->getSortKeys( $sortKeyInputs ); + foreach ( $sortKeys as $name => $sortKey ) { + $this->newLinks[$name][1] = $sortKey; } } protected function getTableName() { - return 'categorylinks'; + return $this->tableName; } protected function getFromField() { @@ -102,8 +139,8 @@ class CategoryLinksTable extends TitleLinksTable { * @return iterable<array> */ protected function getNewLinkIDs() { - foreach ( $this->newLinks as $name => $sortkey ) { - yield [ $name, $sortkey ]; + foreach ( $this->newLinks as $name => [ $prefix, $sortKey ] ) { + yield [ $name, $prefix ]; } } @@ -141,25 +178,16 @@ class CategoryLinksTable extends TitleLinksTable { protected function isInNewSet( $linkId ) { [ $name, $prefix ] = $linkId; return \array_key_exists( $name, $this->newLinks ) - && $this->newLinks[$name] === $prefix; + && $this->newLinks[$name][0] === $prefix; } protected function insertLink( $linkId ) { [ $name, $prefix ] = $linkId; - $nt = Title::makeTitleSafe( NS_CATEGORY, $name ); - $this->languageConverter->findVariantLink( $name, $nt, true ); - - // Treat custom sortkeys as a prefix, so that if multiple - // things are forced to sort as '*' or something, they'll - // sort properly in the category rather than in page_id - // order or such. - $sortkey = $this->collation->getSortKey( - Title::castFromPageIdentity( $this->getSourcePage() ) - ->getCategorySortkey( $prefix ) ); + $sortKey = $this->newLinks[$name][1]; $this->insertRow( [ 'cl_to' => $name, - 'cl_sortkey' => $sortkey, + 'cl_sortkey' => $sortKey, 'cl_timestamp' => $this->getDB()->timestamp(), 'cl_sortkey_prefix' => $prefix, 'cl_collation' => $this->collationName, @@ -190,6 +218,10 @@ class CategoryLinksTable extends TitleLinksTable { } protected function finishUpdate() { + if ( $this->isTempTable ) { + // Don't do invalidations for temporary collations + return; + } $this->invalidateCategories(); $this->updateCategoryCounts(); } diff --git a/includes/deferred/LinksUpdate/LinksTable.php b/includes/deferred/LinksUpdate/LinksTable.php index c32893220450..bc00d07d5b6b 100644 --- a/includes/deferred/LinksUpdate/LinksTable.php +++ b/includes/deferred/LinksUpdate/LinksTable.php @@ -364,6 +364,15 @@ abstract class LinksTable { } /** + * Subclasses can override this to do any necessary setup before the lock + * is acquired. + * + * @stable to override + */ + public function beforeLock() { + } + + /** * Subclasses can override this to do any necessary setup before individual * write operations begin. * diff --git a/includes/deferred/LinksUpdate/LinksTableGroup.php b/includes/deferred/LinksUpdate/LinksTableGroup.php index 850a12e3fc33..fb8f91866c50 100644 --- a/includes/deferred/LinksUpdate/LinksTableGroup.php +++ b/includes/deferred/LinksUpdate/LinksTableGroup.php @@ -2,6 +2,7 @@ namespace MediaWiki\Deferred\LinksUpdate; +use MediaWiki\Collation\CollationFactory; use MediaWiki\Config\ServiceOptions; use MediaWiki\MediaWikiServices; use MediaWiki\Page\PageIdentity; @@ -21,16 +22,18 @@ class LinksTableGroup { * - serviceOptions: An array of configuration variable names. If this is * set, the specified configuration will be sent to the subclass * constructor as a ServiceOptions object. + * - needCollation: If true, the following additional args will be added: + * Collation, collation name and table name. */ private const CORE_LIST = [ 'categorylinks' => [ 'class' => CategoryLinksTable::class, 'services' => [ 'LanguageConverterFactory', - 'CollationFactory', 'NamespaceInfo', 'WikiPageFactory' - ] + ], + 'needCollation' => true, ], 'externallinks' => [ 'class' => ExternalLinksTable::class @@ -65,6 +68,9 @@ class LinksTableGroup { /** @var LBFactory */ private $lbFactory; + /** @var CollationFactory */ + private $collationFactory; + /** @var PageIdentity */ private $page; @@ -86,25 +92,37 @@ class LinksTableGroup { /** @var LinksTable[] */ private $tables = []; + /** @var array */ + private $tempCollations; + /** * @param ObjectFactory $objectFactory * @param LBFactory $lbFactory + * @param CollationFactory $collationFactory * @param PageIdentity $page * @param int $batchSize * @param callable|null $afterUpdateHook + * @param array $tempCollations */ public function __construct( ObjectFactory $objectFactory, LBFactory $lbFactory, + CollationFactory $collationFactory, PageIdentity $page, $batchSize, - $afterUpdateHook + $afterUpdateHook, + array $tempCollations ) { $this->objectFactory = $objectFactory; $this->lbFactory = $lbFactory; + $this->collationFactory = $collationFactory; $this->page = $page; $this->batchSize = $batchSize; $this->afterUpdateHook = $afterUpdateHook; + $this->tempCollations = []; + foreach ( $tempCollations as $info ) { + $this->tempCollations[$info['table']] = $info; + } } /** @@ -161,11 +179,46 @@ class LinksTableGroup { * @return array */ private function getSpec( $tableName ) { - if ( !isset( self::CORE_LIST[$tableName] ) ) { - throw new \InvalidArgumentException( - __CLASS__ . ": unknown table name \"$tableName\"" ); + if ( isset( self::CORE_LIST[$tableName] ) ) { + $spec = self::CORE_LIST[$tableName]; + return $this->addCollationArgs( $spec, $tableName, false ); + } + if ( isset( $this->tempCollations[$tableName] ) ) { + $info = $this->tempCollations[$tableName]; + $spec = self::CORE_LIST['categorylinks']; + return $this->addCollationArgs( $spec, $tableName, true, $info ); + } + throw new \InvalidArgumentException( + __CLASS__ . ": unknown table name \"$tableName\"" ); + } + + /** + * Add extra args to the spec of a table that needs collation information + * + * @param array $spec + * @param string $tableName + * @param bool $isTempTable + * @param array $info Temporary collation info + * @return array ObjectFactory spec + */ + private function addCollationArgs( $spec, $tableName, $isTempTable, $info = [] ) { + if ( isset( $spec['needCollation'] ) ) { + if ( isset( $info['collation'] ) ) { + $collation = $this->collationFactory->makeCollation( $info['collation'] ); + $collationName = $info['fakeCollation'] ?? $info['collation']; + } else { + $collation = $this->collationFactory->getCategoryCollation(); + $collationName = $this->collationFactory->getDefaultCollationName(); + } + $spec['args'] = [ + $collation, + $info['fakeCollation'] ?? $collationName, + $tableName, + $isTempTable + ]; + unset( $spec['needCollation'] ); } - return self::CORE_LIST[$tableName]; + return $spec; } /** @@ -214,5 +267,8 @@ class LinksTableGroup { foreach ( self::CORE_LIST as $tableName => $spec ) { yield $this->get( $tableName ); } + foreach ( $this->tempCollations as $tableName => $collation ) { + yield $this->get( $tableName ); + } } } |