diff options
author | jenkins-bot <jenkins-bot@gerrit.wikimedia.org> | 2022-01-10 18:34:18 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@wikimedia.org> | 2022-01-10 18:34:18 +0000 |
commit | 51fbd273ab893a7e66aa3dc3afbe1ac4b2a552a4 (patch) | |
tree | 14102bf47738fe5f684eac14892f413d8bda74df | |
parent | 40f1c72fda50908859312059e15aec24c7cb6026 (diff) | |
parent | d3b2b800678e91fd1a6177d80fde790c9006d423 (diff) |
Merge "LinksUpdate refactor"
18 files changed, 1984 insertions, 822 deletions
diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 634341fad873..ba745fd94dd5 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -148,6 +148,7 @@ class AutoLoader { 'MediaWiki\\Config\\' => __DIR__ . '/config/', 'MediaWiki\\Content\\' => __DIR__ . '/content/', 'MediaWiki\\DB\\' => __DIR__ . '/db/', + 'MediaWiki\\Deferred\\LinksUpdate\\' => __DIR__ . '/deferred/LinksUpdate/', 'MediaWiki\\Diff\\' => __DIR__ . '/diff/', 'MediaWiki\\Edit\\' => __DIR__ . '/edit/', 'MediaWiki\\EditPage\\' => __DIR__ . '/editpage/', diff --git a/includes/collation/CollationFactory.php b/includes/collation/CollationFactory.php index 060f810d674f..6710c5f7e437 100644 --- a/includes/collation/CollationFactory.php +++ b/includes/collation/CollationFactory.php @@ -128,7 +128,11 @@ class CollationFactory { * @return Collation */ public function getCategoryCollation(): Collation { - return $this->makeCollation( $this->options->get( 'CategoryCollation' ) ); + return $this->makeCollation( $this->getDefaultCollationName() ); + } + + public function getDefaultCollationName(): string { + return $this->options->get( 'CategoryCollation' ); } /** diff --git a/includes/deferred/LinksDeletionUpdate.php b/includes/deferred/LinksDeletionUpdate.php index 6b5c5d4ba5e4..d7f52ad0734d 100644 --- a/includes/deferred/LinksDeletionUpdate.php +++ b/includes/deferred/LinksDeletionUpdate.php @@ -19,6 +19,7 @@ * * @file */ + use MediaWiki\MediaWikiServices; /** diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index b2a486149a46..d00abb6f7af5 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -20,10 +20,17 @@ * @file */ +use MediaWiki\Deferred\LinksUpdate\ExternalLinksTable; +use MediaWiki\Deferred\LinksUpdate\LinksTable; +use MediaWiki\Deferred\LinksUpdate\LinksTableGroup; +use MediaWiki\Deferred\LinksUpdate\PageLinksTable; +use MediaWiki\Deferred\LinksUpdate\PagePropsTable; +use MediaWiki\Deferred\LinksUpdate\TitleLinksTable; use MediaWiki\HookContainer\ProtectedHookAccessorTrait; use MediaWiki\Logger\LoggerFactory; use MediaWiki\MediaWikiServices; use MediaWiki\Page\PageIdentity; +use MediaWiki\Page\PageReferenceValue; use MediaWiki\Revision\RevisionRecord; use MediaWiki\User\UserIdentity; use Wikimedia\Rdbms\IDatabase; @@ -84,37 +91,6 @@ class LinksUpdate extends DataUpdate { private $mRevisionRecord; /** - * @var array[]|null Added links if calculated. - * @phan-var array<int,array{pl_from:int,pl_from_namespace:int,pl_namespace:int,pl_title:string}>|null - */ - private $linkInsertions = null; - - /** - * @var null|array Deleted links if calculated. - */ - private $linkDeletions = null; - - /** - * @var null|array[] Added external links if calculated. - */ - private $externalLinkInsertions = null; - - /** - * @var null|array Deleted external links if calculated. - */ - private $externalLinkDeletions = null; - - /** - * @var null|array Added properties if calculated. - */ - private $propertyInsertions = null; - - /** - * @var null|array Deleted properties if calculated. - */ - private $propertyDeletions = null; - - /** * @var UserIdentity|null */ private $user; @@ -122,7 +98,8 @@ class LinksUpdate extends DataUpdate { /** @var IDatabase */ private $db; - private $isStrictTestMode = false; + /** @var LinksTableGroup */ + private $tableFactory; /** * @param PageIdentity $page The page we're updating @@ -165,9 +142,27 @@ class LinksUpdate extends DataUpdate { $this->mRecursive = $recursive; + $services = MediaWikiServices::getInstance(); + $this->tableFactory = new LinksTableGroup( + $services->getObjectFactory(), + $services->getDBLoadBalancerFactory(), + $page, + $services->getMainConfig()->get( 'UpdateRowsPerQuery' ), + function ( $table, $rows ) { + $this->getHookRunner()->onLinksUpdateAfterInsert( $this, $table, $rows ); + } + ); + // TODO: this does not have to be called in LinksDeletionUpdate + $this->tableFactory->setParserOutput( $parserOutput ); + $this->getHookRunner()->onLinksUpdateConstructed( $this ); } + public function setTransactionTicket( $ticket ) { + parent::setTransactionTicket( $ticket ); + $this->tableFactory->setTransactionTicket( $ticket ); + } + /** * Update link tables with outgoing links from an updated article * @@ -244,101 +239,10 @@ class LinksUpdate extends DataUpdate { } protected function doIncrementalUpdate() { - # Page links - $existingPL = $this->getExistingLinks(); - $this->linkDeletions = $this->getLinkDeletions( $existingPL ); - $this->linkInsertions = $this->getLinkInsertions( $existingPL ); - $this->incrTableUpdate( 'pagelinks', 'pl', $this->linkDeletions, $this->linkInsertions ); - - # Image links - $existingIL = $this->getExistingImages(); - $imageDeletes = $this->getImageDeletions( $existingIL ); - $imageAdditions = $this->getImageAdditions( $existingIL ); - $this->incrTableUpdate( - 'imagelinks', - 'il', - $imageDeletes, - $this->getImageInsertions( $existingIL ) ); - - # Image change tags - $enabledTags = ChangeTags::getSoftwareTags(); - $mediaChangeTags = array_filter( [ - count( $imageAdditions ) && in_array( 'mw-add-media', $enabledTags ) ? 'mw-add-media' : '', - count( $imageDeletes ) && in_array( 'mw-remove-media', $enabledTags ) ? 'mw-remove-media' : '', - ] ); - $revisionRecord = $this->getRevisionRecord(); - if ( $revisionRecord && count( $mediaChangeTags ) ) { - ChangeTags::addTags( $mediaChangeTags, null, $revisionRecord->getId() ); + foreach ( $this->tableFactory->getAll() as $table ) { + $table->update(); } - # Invalidate all image description pages which had links added or removed - $imageUpdates = $imageDeletes + $imageAdditions; - $this->invalidateImageDescriptions( $imageUpdates ); - - # External links - $existingEL = $this->getExistingExternals(); - $this->externalLinkDeletions = $this->getExternalDeletions( $existingEL ); - $this->externalLinkInsertions = $this->getExternalInsertions( - $existingEL ); - $this->incrTableUpdate( - 'externallinks', - 'el', - $this->externalLinkDeletions, - $this->externalLinkInsertions ); - - # Language links - $existingLL = $this->getExistingInterlangs(); - $this->incrTableUpdate( - 'langlinks', - 'll', - $this->getInterlangDeletions( $existingLL ), - $this->getInterlangInsertions( $existingLL ) ); - - # Inline interwiki links - $existingIW = $this->getExistingInterwikis(); - $this->incrTableUpdate( - 'iwlinks', - 'iwl', - $this->getInterwikiDeletions( $existingIW ), - $this->getInterwikiInsertions( $existingIW ) ); - - # Template links - $existingTL = $this->getExistingTemplates(); - $this->incrTableUpdate( - 'templatelinks', - 'tl', - $this->getTemplateDeletions( $existingTL ), - $this->getTemplateInsertions( $existingTL ) ); - - # Category links - $existingCL = $this->getExistingCategories(); - $categoryDeletes = $this->getCategoryDeletions( $existingCL ); - $this->incrTableUpdate( - 'categorylinks', - 'cl', - $categoryDeletes, - $this->getCategoryInsertions( $existingCL ) ); - $categoryInserts = array_diff_assoc( $this->mCategories, $existingCL ); - $categoryUpdates = $categoryInserts + $categoryDeletes; - - # Page properties - $existingPP = $this->getExistingProperties(); - $this->propertyDeletions = $this->getPropertyDeletions( $existingPP ); - $this->incrTableUpdate( - 'page_props', - 'pp', - $this->propertyDeletions, - $this->getPropertyInsertions( $existingPP ) ); - - # Invalidate the necessary pages - $this->propertyInsertions = array_diff_assoc( $this->mProperties, $existingPP ); - $changed = $this->propertyDeletions + $this->propertyInsertions; - $this->invalidateProperties( $changed ); - - # Invalidate all categories which were added, deleted or changed (set symmetric difference) - $this->invalidateCategories( $categoryUpdates ); - $this->updateCategoryCounts( $categoryInserts, $categoryDeletes ); - # Refresh links of all pages including this page # This will be in a separate transaction if ( $this->mRecursive ) { @@ -423,655 +327,13 @@ class LinksUpdate extends DataUpdate { } /** - * @param array $cats - */ - private function invalidateCategories( $cats ) { - PurgeJobUtils::invalidatePages( - $this->getDB(), NS_CATEGORY, array_map( 'strval', array_keys( $cats ) ) - ); - } - - /** - * Update all the appropriate counts in the category table. - * @param array $added Associative array of category name => sort key - * @param array $deleted Associative array of category name => sort key - */ - private function updateCategoryCounts( array $added, array $deleted ) { - global $wgUpdateRowsPerQuery; - - if ( !$added && !$deleted ) { - return; - } - - $domainId = $this->getDB()->getDomainID(); - $services = MediaWikiServices::getInstance(); - $wp = $services->getWikiPageFactory()->newFromTitle( $this->mTitle ); - $lbf = $services->getDBLoadBalancerFactory(); - // T163801: try to release any row locks to reduce contention - $lbf->commitAndWaitForReplication( __METHOD__, $this->ticket, [ 'domain' => $domainId ] ); - - foreach ( array_chunk( array_keys( $added ), $wgUpdateRowsPerQuery ) as $addBatch ) { - $wp->updateCategoryCounts( array_map( 'strval', $addBatch ), [], $this->mId ); - $lbf->commitAndWaitForReplication( - __METHOD__, $this->ticket, [ 'domain' => $domainId ] ); - } - - foreach ( array_chunk( array_keys( $deleted ), $wgUpdateRowsPerQuery ) as $deleteBatch ) { - $wp->updateCategoryCounts( [], array_map( 'strval', $deleteBatch ), $this->mId ); - $lbf->commitAndWaitForReplication( - __METHOD__, $this->ticket, [ 'domain' => $domainId ] ); - } - } - - /** - * @param array $images - */ - private function invalidateImageDescriptions( array $images ) { - PurgeJobUtils::invalidatePages( - $this->getDB(), NS_FILE, array_map( 'strval', array_keys( $images ) ) - ); - } - - /** - * Update a table by doing a delete query then an insert query - * @param string $table Table name - * @param string $prefix Field name prefix - * @param array $deletions - * @param array $insertions Rows to insert - */ - private function incrTableUpdate( $table, $prefix, $deletions, $insertions ) { - $services = MediaWikiServices::getInstance(); - $bSize = $services->getMainConfig()->get( 'UpdateRowsPerQuery' ); - $lbf = $services->getDBLoadBalancerFactory(); - - if ( $table === 'page_props' ) { - $fromField = 'pp_page'; - } else { - $fromField = "{$prefix}_from"; - } - - $deleteWheres = []; // list of WHERE clause arrays for each DB delete() call - if ( $table === 'pagelinks' || $table === 'templatelinks' || $table === 'iwlinks' ) { - $baseKey = ( $table === 'iwlinks' ) ? 'iwl_prefix' : "{$prefix}_namespace"; - - $curBatchSize = 0; - $curDeletionBatch = []; - $deletionBatches = []; - foreach ( $deletions as $ns => $dbKeys ) { - foreach ( $dbKeys as $dbKey => $unused ) { - $curDeletionBatch[$ns][$dbKey] = 1; - if ( ++$curBatchSize >= $bSize ) { - $deletionBatches[] = $curDeletionBatch; - $curDeletionBatch = []; - $curBatchSize = 0; - } - } - } - if ( $curDeletionBatch ) { - $deletionBatches[] = $curDeletionBatch; - } - - foreach ( $deletionBatches as $deletionBatch ) { - $deleteWheres[] = [ - $fromField => $this->mId, - $this->getDB()->makeWhereFrom2d( $deletionBatch, $baseKey, "{$prefix}_title" ) - ]; - } - } else { - if ( $table === 'langlinks' ) { - $toField = 'll_lang'; - } elseif ( $table === 'page_props' ) { - $toField = 'pp_propname'; - } else { - $toField = $prefix . '_to'; - } - - $deletionBatches = array_chunk( array_keys( $deletions ), $bSize ); - foreach ( $deletionBatches as $deletionBatch ) { - $deleteWheres[] = [ - $fromField => $this->mId, - $toField => array_map( 'strval', $deletionBatch ) - ]; - } - } - - $domainId = $this->getDB()->getDomainID(); - - foreach ( $deleteWheres as $deleteWhere ) { - $this->getDB()->delete( $table, $deleteWhere, __METHOD__ ); - $lbf->commitAndWaitForReplication( - __METHOD__, $this->ticket, [ 'domain' => $domainId ] - ); - } - - $insertBatches = array_chunk( $insertions, $bSize ); - foreach ( $insertBatches as $insertBatch ) { - $this->getDB()->insert( $table, $insertBatch, __METHOD__, - $this->getConflictOption() ); - $lbf->commitAndWaitForReplication( - __METHOD__, $this->ticket, [ 'domain' => $domainId ] - ); - } - - if ( count( $insertions ) ) { - $this->getHookRunner()->onLinksUpdateAfterInsert( $this, $table, $insertions ); - } - } - - /** * Omit conflict resolution options from the insert query so that testing * can confirm that the incremental update logic was correct. * * @param bool $mode */ public function setStrictTestMode( $mode = true ) { - $this->isStrictTestMode = $mode; - } - - /** - * @return array - */ - private function getConflictOption() { - if ( $this->isStrictTestMode ) { - return []; - } else { - return [ 'IGNORE' ]; - } - } - - /** - * Get an array of pagelinks insertions for passing to the DB - * Skips the titles specified by the 2-D array $existing - * @param array $existing - * @return array[] - * @phan-return array<int,array{pl_from:int,pl_from_namespace:int,pl_namespace:int,pl_title:string}> - */ - private function getLinkInsertions( $existing = [] ) { - $arr = []; - foreach ( $this->mLinks as $ns => $dbkeys ) { - $diffs = isset( $existing[$ns] ) - ? array_diff_key( $dbkeys, $existing[$ns] ) - : $dbkeys; - foreach ( $diffs as $dbk => $id ) { - $arr[] = [ - 'pl_from' => $this->mId, - 'pl_from_namespace' => $this->mTitle->getNamespace(), - 'pl_namespace' => $ns, - 'pl_title' => $dbk - ]; - } - } - - return $arr; - } - - /** - * Get an array of template insertions. Like getLinkInsertions() - * @param array $existing - * @return array - */ - private function getTemplateInsertions( $existing = [] ) { - $arr = []; - foreach ( $this->mTemplates as $ns => $dbkeys ) { - $diffs = isset( $existing[$ns] ) ? array_diff_key( $dbkeys, $existing[$ns] ) : $dbkeys; - foreach ( $diffs as $dbk => $id ) { - $arr[] = [ - 'tl_from' => $this->mId, - 'tl_from_namespace' => $this->mTitle->getNamespace(), - 'tl_namespace' => $ns, - 'tl_title' => $dbk - ]; - } - } - - return $arr; - } - - /** - * Get an array of image insertions - * Skips the names specified in $existing - * @param array $existing - * @return array - */ - private function getImageInsertions( $existing = [] ) { - $arr = []; - $diffs = $this->getImageAdditions( $existing ); - foreach ( $diffs as $iname => $dummy ) { - $arr[] = [ - 'il_from' => $this->mId, - 'il_from_namespace' => $this->mTitle->getNamespace(), - 'il_to' => $iname - ]; - } - - return $arr; - } - - /** - * Get an array of externallinks insertions. Skips the names specified in $existing - * @param array $existing - * @return array[] - */ - private function getExternalInsertions( $existing = [] ) { - $arr = []; - $diffs = array_diff_key( $this->mExternals, $existing ); - foreach ( $diffs as $url => $dummy ) { - foreach ( LinkFilter::makeIndexes( $url ) as $index ) { - $arr[] = [ - 'el_from' => $this->mId, - 'el_to' => $url, - 'el_index' => $index, - 'el_index_60' => substr( $index, 0, 60 ), - ]; - } - } - - return $arr; - } - - /** - * Get an array of category insertions - * - * @param array $existing Mapping existing category names to sort keys. If both - * match a link in $this, the link will be omitted from the output - * - * @return array - */ - private function getCategoryInsertions( $existing = [] ) { - global $wgCategoryCollation; - $diffs = array_diff_assoc( $this->mCategories, $existing ); - $arr = []; - - $languageConverter = MediaWikiServices::getInstance()->getLanguageConverterFactory() - ->getLanguageConverter(); - - $collation = MediaWikiServices::getInstance()->getCollationFactory()->getCategoryCollation(); - foreach ( $diffs as $name => $prefix ) { - $nt = Title::makeTitleSafe( NS_CATEGORY, $name ); - $languageConverter->findVariantLink( $name, $nt, true ); - - $type = MediaWikiServices::getInstance()->getNamespaceInfo()-> - getCategoryLinkType( $this->mTitle->getNamespace() ); - - # Treat custom sortkeys as a prefix, so that if multiple - # things are forced to sort as '*' or something, they'll - # sort properly in the category rather than in page_id - # order or such. - $sortkey = $collation->getSortKey( $this->mTitle->getCategorySortkey( $prefix ) ); - - $arr[] = [ - 'cl_from' => $this->mId, - 'cl_to' => $name, - 'cl_sortkey' => $sortkey, - 'cl_timestamp' => $this->getDB()->timestamp(), - 'cl_sortkey_prefix' => $prefix, - 'cl_collation' => $wgCategoryCollation, - 'cl_type' => $type, - ]; - } - - return $arr; - } - - /** - * Get an array of interlanguage link insertions - * - * @param array $existing Mapping existing language codes to titles - * - * @return array - */ - private function getInterlangInsertions( $existing = [] ) { - $diffs = array_diff_assoc( $this->mInterlangs, $existing ); - $arr = []; - foreach ( $diffs as $lang => $title ) { - $arr[] = [ - 'll_from' => $this->mId, - 'll_lang' => $lang, - 'll_title' => $title - ]; - } - - return $arr; - } - - /** - * Get an array of page property insertions - * @param array $existing - * @return array - */ - private function getPropertyInsertions( $existing = [] ) { - $diffs = array_diff_assoc( $this->mProperties, $existing ); - - $arr = []; - foreach ( array_keys( $diffs ) as $name ) { - $arr[] = $this->getPagePropRowData( (string)$name ); - } - - return $arr; - } - - /** - * Returns an associative array to be used for inserting a row into - * the page_props table. Besides the given property name, this will - * include the page id from $this->mId and any property value from - * $this->mProperties. - * - * The array returned will include the pp_sortkey field. - * The sortkey value is currently determined by getPropertySortKeyValue(). - * - * @note this assumes that $this->mProperties[$prop] is defined. - * - * @param string $prop The name of the property. - * - * @return array - */ - private function getPagePropRowData( $prop ) { - $value = $this->mProperties[$prop]; - - return [ - 'pp_page' => $this->mId, - 'pp_propname' => $prop, - 'pp_value' => $value, - 'pp_sortkey' => $this->getPropertySortKeyValue( $value ) - ]; - } - - /** - * Determines the sort key for the given property value. - * This will return $value if it is a float or int, - * 1 or resp. 0 if it is a bool, and null otherwise. - * - * @note In the future, we may allow the sortkey to be specified explicitly - * in ParserOutput::setProperty. - * - * @param mixed $value - * - * @return float|null - */ - private function getPropertySortKeyValue( $value ) { - if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { - return floatval( $value ); - } - - return null; - } - - /** - * Get an array of interwiki insertions for passing to the DB - * Skips the titles specified by the 2-D array $existing - * @param array $existing - * @return array - */ - private function getInterwikiInsertions( $existing = [] ) { - $arr = []; - foreach ( $this->mInterwikis as $prefix => $dbkeys ) { - $diffs = isset( $existing[$prefix] ) - ? array_diff_key( $dbkeys, $existing[$prefix] ) - : $dbkeys; - - foreach ( $diffs as $dbk => $id ) { - $arr[] = [ - 'iwl_from' => $this->mId, - 'iwl_prefix' => $prefix, - 'iwl_title' => $dbk - ]; - } - } - - return $arr; - } - - /** - * Given an array of existing images, returns $this images that are not in there - * and thus should be added. - * @param array $existing - * @return array - */ - private function getImageAdditions( $existing ) { - return array_diff_key( $this->mImages, $existing ); - } - - /** - * Given an array of existing links, returns those links which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getLinkDeletions( $existing ) { - $del = []; - foreach ( $existing as $ns => $dbkeys ) { - if ( isset( $this->mLinks[$ns] ) ) { - $del[$ns] = array_diff_key( $dbkeys, $this->mLinks[$ns] ); - } else { - $del[$ns] = $dbkeys; - } - } - - return $del; - } - - /** - * Given an array of existing templates, returns those templates which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getTemplateDeletions( $existing ) { - $del = []; - foreach ( $existing as $ns => $dbkeys ) { - if ( isset( $this->mTemplates[$ns] ) ) { - $del[$ns] = array_diff_key( $dbkeys, $this->mTemplates[$ns] ); - } else { - $del[$ns] = $dbkeys; - } - } - - return $del; - } - - /** - * Given an array of existing images, returns those images which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getImageDeletions( $existing ) { - return array_diff_key( $existing, $this->mImages ); - } - - /** - * Given an array of existing external links, returns those links which are not - * in $this and thus should be deleted. - * @param array $existing - * @return array - */ - private function getExternalDeletions( $existing ) { - return array_diff_key( $existing, $this->mExternals ); - } - - /** - * Given an array of existing categories, returns those categories which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getCategoryDeletions( $existing ) { - return array_diff_assoc( $existing, $this->mCategories ); - } - - /** - * Given an array of existing interlanguage links, returns those links which are not - * in $this and thus should be deleted. - * @param array $existing - * @return array - */ - private function getInterlangDeletions( $existing ) { - return array_diff_assoc( $existing, $this->mInterlangs ); - } - - /** - * Get array of properties which should be deleted. - * @param array $existing - * @return array - */ - private function getPropertyDeletions( $existing ) { - return array_diff_assoc( $existing, $this->mProperties ); - } - - /** - * Given an array of existing interwiki links, returns those links which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getInterwikiDeletions( $existing ) { - $del = []; - foreach ( $existing as $prefix => $dbkeys ) { - if ( isset( $this->mInterwikis[$prefix] ) ) { - $del[$prefix] = array_diff_key( $dbkeys, $this->mInterwikis[$prefix] ); - } else { - $del[$prefix] = $dbkeys; - } - } - - return $del; - } - - /** - * Get an array of existing links, as a 2-D array - * - * @return array - */ - private function getExistingLinks() { - $res = $this->getDB()->select( 'pagelinks', [ 'pl_namespace', 'pl_title' ], - [ 'pl_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - if ( !isset( $arr[$row->pl_namespace] ) ) { - $arr[$row->pl_namespace] = []; - } - $arr[$row->pl_namespace][$row->pl_title] = 1; - } - - return $arr; - } - - /** - * Get an array of existing templates, as a 2-D array - * - * @return array - */ - private function getExistingTemplates() { - $res = $this->getDB()->select( 'templatelinks', [ 'tl_namespace', 'tl_title' ], - [ 'tl_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - if ( !isset( $arr[$row->tl_namespace] ) ) { - $arr[$row->tl_namespace] = []; - } - $arr[$row->tl_namespace][$row->tl_title] = 1; - } - - return $arr; - } - - /** - * Get an array of existing images, image names in the keys - * - * @return array - */ - private function getExistingImages() { - $res = $this->getDB()->select( 'imagelinks', [ 'il_to' ], - [ 'il_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->il_to] = 1; - } - - return $arr; - } - - /** - * Get an array of existing external links, URLs in the keys - * - * @return array - */ - private function getExistingExternals() { - $res = $this->getDB()->select( 'externallinks', [ 'el_to' ], - [ 'el_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->el_to] = 1; - } - - return $arr; - } - - /** - * Get an array of existing categories, with the name in the key and sort key in the value. - * - * @return array - */ - private function getExistingCategories() { - $res = $this->getDB()->select( 'categorylinks', [ 'cl_to', 'cl_sortkey_prefix' ], - [ 'cl_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->cl_to] = $row->cl_sortkey_prefix; - } - - return $arr; - } - - /** - * Get an array of existing interlanguage links, with the language code in the key and the - * title in the value. - * - * @return array - */ - private function getExistingInterlangs() { - $res = $this->getDB()->select( 'langlinks', [ 'll_lang', 'll_title' ], - [ 'll_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->ll_lang] = $row->ll_title; - } - - return $arr; - } - - /** - * Get an array of existing inline interwiki links, as a 2-D array - * @return array [ prefix => [ dbkey => 1 ] ] - */ - private function getExistingInterwikis() { - $res = $this->getDB()->select( 'iwlinks', [ 'iwl_prefix', 'iwl_title' ], - [ 'iwl_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - if ( !isset( $arr[$row->iwl_prefix] ) ) { - $arr[$row->iwl_prefix] = []; - } - $arr[$row->iwl_prefix][$row->iwl_title] = 1; - } - - return $arr; - } - - /** - * Get an array of existing categories, with the name in the key and sort key in the value. - * - * @return array Array of property names and values - */ - private function getExistingProperties() { - $res = $this->getDB()->select( 'page_props', [ 'pp_propname', 'pp_value' ], - [ 'pp_page' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->pp_propname] = $row->pp_value; - } - - return $arr; + $this->tableFactory->setStrictTestMode( $mode ); } /** @@ -1121,6 +383,7 @@ class LinksUpdate extends DataUpdate { */ public function setRevisionRecord( RevisionRecord $revisionRecord ) { $this->mRevisionRecord = $revisionRecord; + $this->tableFactory->setRevision( $revisionRecord ); } /** @@ -1152,66 +415,49 @@ class LinksUpdate extends DataUpdate { } /** - * Invalidate any necessary link lists related to page property changes - * @param array $changed + * @return PageLinksTable */ - private function invalidateProperties( $changed ) { - global $wgPagePropLinkInvalidations; + protected function getPageLinksTable(): PageLinksTable { + // @phan-suppress-next-line PhanTypeMismatchReturnSuperType + return $this->tableFactory->get( 'pagelinks' ); + } - $jobs = []; - foreach ( $changed as $name => $value ) { - if ( isset( $wgPagePropLinkInvalidations[$name] ) ) { - $inv = $wgPagePropLinkInvalidations[$name]; - if ( !is_array( $inv ) ) { - $inv = [ $inv ]; - } - foreach ( $inv as $table ) { - $jobs[] = HTMLCacheUpdateJob::newForBacklinks( - $this->mTitle, - $table, - [ 'causeAction' => 'page-props' ] - ); - } - } - } + /** + * @return ExternalLinksTable + */ + protected function getExternalLinksTable(): ExternalLinksTable { + // @phan-suppress-next-line PhanTypeMismatchReturnSuperType + return $this->tableFactory->get( 'externallinks' ); + } - JobQueueGroup::singleton()->lazyPush( $jobs ); + /** + * @return PagePropsTable + */ + protected function getPagePropsTable(): PagePropsTable { + // @phan-suppress-next-line PhanTypeMismatchReturnSuperType + return $this->tableFactory->get( 'page_props' ); } /** * Fetch page links added by this LinksUpdate. Only available after the update is complete. + * * @since 1.22 - * @return null|array Array of Titles + * @deprecated since 1.38 use getPageReferenceIterator() or getPageReferenceArray() + * @return Title[] Array of Titles */ public function getAddedLinks() { - if ( $this->linkInsertions === null ) { - return null; - } - $result = []; - foreach ( $this->linkInsertions as $insertion ) { - $result[] = Title::makeTitle( $insertion['pl_namespace'], $insertion['pl_title'] ); - } - - return $result; + return $this->getPageLinksTable()->getTitleArray( LinksTable::INSERTED ); } /** * Fetch page links removed by this LinksUpdate. Only available after the update is complete. + * * @since 1.22 - * @return null|array Array of Titles + * @deprecated since 1.38 use getPageReferenceIterator() or getPageReferenceArray() + * @return Title[] Array of Titles */ public function getRemovedLinks() { - if ( $this->linkDeletions === null ) { - return null; - } - $result = []; - foreach ( $this->linkDeletions as $ns => $titles ) { - foreach ( $titles as $title => $unused ) { - $result[] = Title::makeTitle( $ns, $title ); - } - } - - return $result; + return $this->getPageLinksTable()->getTitleArray( LinksTable::DELETED ); } /** @@ -1221,10 +467,7 @@ class LinksUpdate extends DataUpdate { * @return null|array Array of Strings */ public function getAddedExternalLinks() { - if ( $this->externalLinkInsertions === null ) { - return null; - } - return array_column( $this->externalLinkInsertions, 'el_to' ); + return $this->getExternalLinksTable()->getStringArray( LinksTable::INSERTED ); } /** @@ -1234,10 +477,7 @@ class LinksUpdate extends DataUpdate { * @return null|string[] */ public function getRemovedExternalLinks() { - if ( $this->externalLinkDeletions === null ) { - return null; - } - return array_keys( $this->externalLinkDeletions ); + return $this->getExternalLinksTable()->getStringArray( LinksTable::DELETED ); } /** @@ -1247,7 +487,7 @@ class LinksUpdate extends DataUpdate { * @return null|array */ public function getAddedProperties() { - return $this->propertyInsertions; + return $this->getPagePropsTable()->getAssocArray( LinksTable::INSERTED ); } /** @@ -1257,7 +497,45 @@ class LinksUpdate extends DataUpdate { * @return null|array */ public function getRemovedProperties() { - return $this->propertyDeletions; + return $this->getPagePropsTable()->getAssocArray( LinksTable::DELETED ); + } + + /** + * Get an iterator over PageReferenceValue objects corresponding to a given set + * type in a given table. + * + * @since 1.38 + * @param string $tableName The name of any table that links to local titles + * @param int $setType One of: + * - LinksTable::INSERTED: The inserted links + * - LinksTable::DELETED: The deleted links + * - LinksTable::CHANGED: Both the inserted and deleted links + * - LinksTable::OLD: The old set of links, loaded before the update + * - LinksTable::NEW: The new set of links from the ParserOutput + * @return iterable<PageReferenceValue> + * @phan-return \Traversable + */ + public function getPageReferenceIterator( $tableName, $setType ) { + $table = $this->tableFactory->get( $tableName ); + if ( $table instanceof TitleLinksTable ) { + return $table->getPageReferenceIterator( $setType ); + } else { + throw new \InvalidArgumentException( + __METHOD__ . ": $tableName does not have a list of titles" ); + } + } + + /** + * Same as getPageReferenceIterator() but converted to an array for convenience + * (at the expense of additional time and memory usage) + * + * @since 1.38 + * @param string $tableName + * @param int $setType + * @return PageReferenceValue[] + */ + public function getPageReferenceArray( $tableName, $setType ) { + return iterator_to_array( $this->getPageReferenceIterator( $tableName, $setType ) ); } /** diff --git a/includes/deferred/LinksUpdate/CategoryLinksTable.php b/includes/deferred/LinksUpdate/CategoryLinksTable.php new file mode 100644 index 000000000000..2643d6259fec --- /dev/null +++ b/includes/deferred/LinksUpdate/CategoryLinksTable.php @@ -0,0 +1,247 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\Collation\CollationFactory; +use MediaWiki\DAO\WikiAwareEntity; +use MediaWiki\Languages\LanguageConverterFactory; +use MediaWiki\Page\PageReferenceValue; +use MediaWiki\Page\WikiPageFactory; +use NamespaceInfo; +use ParserOutput; +use PurgeJobUtils; +use Title; + +/** + * categorylinks + * + * Link ID format: string[] + * - 0: Category name + * - 1: User-specified sort key (cl_sortkey_prefix) + * + * @since 1.38 + */ +class CategoryLinksTable extends TitleLinksTable { + /** + * @var array Associative array of new links, with the category name in the + * key and the sort key prefix in the value + */ + private $newLinks = []; + + /** + * @var array|null Associative array of existing links, or null if it has + * not been loaded yet + */ + private $existingLinks; + + /** @var \ILanguageConverter */ + private $languageConverter; + + /** @var \Collation */ + private $collation; + + /** @var string The collation name for cl_collation */ + private $collationName; + + /** @var string The category type, which depends on the source page */ + private $categoryType; + + /** @var NamespaceInfo */ + private $namespaceInfo; + + /** @var WikiPageFactory */ + private $wikiPageFactory; + + public function __construct( + LanguageConverterFactory $converterFactory, + CollationFactory $collationFactory, + NamespaceInfo $namespaceInfo, + WikiPageFactory $wikiPageFactory + ) { + $this->languageConverter = $converterFactory->getLanguageConverter(); + $this->collation = $collationFactory->getCategoryCollation(); + $this->collationName = $collationFactory->getDefaultCollationName(); + $this->namespaceInfo = $namespaceInfo; + $this->wikiPageFactory = $wikiPageFactory; + } + + /** + * Cache the category type after the source page has been set + */ + public function startUpdate() { + $this->categoryType = $this->namespaceInfo + ->getCategoryLinkType( $this->getSourcePage()->getNamespace() ); + } + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = []; + foreach ( $parserOutput->getCategories() as $name => $sortKey ) { + // If the sortkey is longer then 255 bytes, it is truncated by DB, and then doesn't match + // when comparing existing vs current categories, causing T27254. + $sortKey = mb_strcut( $sortKey, 0, 255 ); + $this->newLinks[(string)$name] = $sortKey; + } + } + + protected function getTableName() { + return 'categorylinks'; + } + + protected function getFromField() { + return 'cl_from'; + } + + protected function getExistingFields() { + return [ 'cl_to', 'cl_sortkey_prefix' ]; + } + + /** + * Get the new link IDs. The link ID is a list with the name in the first + * element and the sort key prefix in the second element. + * + * @return iterable<array> + */ + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $name => $sortkey ) { + yield [ $name, $sortkey ]; + } + } + + /** + * Get the existing links as an associative array, with the category name + * in the key and the sort key prefix in the value. + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->cl_to] = $row->cl_sortkey_prefix; + } + } + return $this->existingLinks; + } + + /** + * @return \Generator + */ + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $name => $sortkey ) { + yield [ $name, $sortkey ]; + } + } + + protected function isExisting( $linkId ) { + $links = $this->getExistingLinks(); + [ $name, $prefix ] = $linkId; + return \array_key_exists( $name, $links ) && $links[$name] === $prefix; + } + + protected function isInNewSet( $linkId ) { + [ $name, $prefix ] = $linkId; + return \array_key_exists( $name, $this->newLinks ) + && $this->newLinks[$name] === $prefix; + } + + protected function insertLink( $linkId ) { + [ $name, $prefix ] = $linkId; + $nt = Title::makeTitleSafe( NS_CATEGORY, $name ); + $this->languageConverter->findVariantLink( $name, $nt, true ); + + // Treat custom sortkeys as a prefix, so that if multiple + // things are forced to sort as '*' or something, they'll + // sort properly in the category rather than in page_id + // order or such. + $sortkey = $this->collation->getSortKey( + Title::castFromPageIdentity( $this->getSourcePage() ) + ->getCategorySortkey( $prefix ) ); + + $this->insertRow( [ + 'cl_to' => $name, + 'cl_sortkey' => $sortkey, + 'cl_timestamp' => $this->getDB()->timestamp(), + 'cl_sortkey_prefix' => $prefix, + 'cl_collation' => $this->collationName, + 'cl_type' => $this->categoryType, + ] ); + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ 'cl_to' => $linkId[0] ] ); + } + + protected function makePageReferenceValue( $linkId ): PageReferenceValue { + return new PageReferenceValue( NS_CATEGORY, $linkId[0], WikiAwareEntity::LOCAL ); + } + + protected function makeTitle( $linkId ): Title { + return Title::makeTitle( NS_CATEGORY, $linkId[0] ); + } + + protected function deduplicateLinkIds( $linkIds ) { + $seen = []; + foreach ( $linkIds as $linkId ) { + if ( !\array_key_exists( $linkId[0], $seen ) ) { + $seen[$linkId[0]] = true; + yield $linkId; + } + } + } + + protected function finishUpdate() { + $this->invalidateCategories(); + $this->updateCategoryCounts(); + } + + private function invalidateCategories() { + $changedCategoryNames = array_unique( array_merge( + array_column( $this->insertedLinks, 0 ), + array_column( $this->deletedLinks, 0 ) + ) ); + PurgeJobUtils::invalidatePages( + $this->getDB(), NS_CATEGORY, $changedCategoryNames ); + } + + /** + * Update all the appropriate counts in the category table. + */ + private function updateCategoryCounts() { + if ( !$this->insertedLinks && !$this->deletedLinks ) { + return; + } + + $domainId = $this->getDB()->getDomainID(); + $wp = $this->wikiPageFactory->newFromTitle( $this->getSourcePage() ); + $lbf = $this->getLBFactory(); + $size = $this->getBatchSize(); + // T163801: try to release any row locks to reduce contention + $lbf->commitAndWaitForReplication( + __METHOD__, $this->getTransactionTicket(), [ 'domain' => $domainId ] ); + + if ( count( $this->insertedLinks ) + count( $this->deletedLinks ) < $size ) { + $wp->updateCategoryCounts( + array_column( $this->insertedLinks, 0 ), + array_column( $this->deletedLinks, 0 ), + $this->getSourcePageId() + ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->getTransactionTicket(), [ 'domain' => $domainId ] ); + } else { + $addedChunks = array_chunk( array_column( $this->insertedLinks, 0 ), $size ); + foreach ( $addedChunks as $chunk ) { + $wp->updateCategoryCounts( $chunk, [], $this->getSourcePageId() ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->getTransactionTicket(), [ 'domain' => $domainId ] ); + } + + $deletedChunks = array_chunk( array_column( $this->deletedLinks, 0 ), $size ); + foreach ( $deletedChunks as $chunk ) { + $wp->updateCategoryCounts( [], $chunk, $this->getSourcePageId() ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->getTransactionTicket(), [ 'domain' => $domainId ] ); + } + + } + } +} diff --git a/includes/deferred/LinksUpdate/ExternalLinksTable.php b/includes/deferred/LinksUpdate/ExternalLinksTable.php new file mode 100644 index 000000000000..12a8db877e7b --- /dev/null +++ b/includes/deferred/LinksUpdate/ExternalLinksTable.php @@ -0,0 +1,99 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use LinkFilter; +use ParserOutput; + +/** + * externallinks + * + * Link ID format: string URL + * + * @since 1.38 + */ +class ExternalLinksTable extends LinksTable { + private $newLinks = []; + private $existingLinks; + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getExternalLinks(); + } + + protected function getTableName() { + return 'externallinks'; + } + + protected function getFromField() { + return 'el_from'; + } + + protected function getExistingFields() { + return [ 'el_to' ]; + } + + /** + * Get the existing links as an array, where the key is the URL and the + * value is unused. + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->el_to] = true; + } + } + return $this->existingLinks; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $link => $unused ) { + yield $link; + } + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $link => $unused ) { + yield $link; + } + } + + protected function isExisting( $linkId ) { + return \array_key_exists( $linkId, $this->getExistingLinks() ); + } + + protected function isInNewSet( $linkId ) { + return \array_key_exists( $linkId, $this->newLinks ); + } + + protected function insertLink( $linkId ) { + foreach ( LinkFilter::makeIndexes( $linkId ) as $index ) { + $this->insertRow( [ + 'el_to' => $linkId, + 'el_index' => $index, + 'el_index_60' => substr( $index, 0, 60 ), + ] ); + } + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ 'el_to' => $linkId ] ); + } + + /** + * Get an array of URLs of the given type + * + * @param int $setType One of the link set constants as in LinksTable::getLinkIDs() + * @return string[] + */ + public function getStringArray( $setType ) { + $ids = $this->getLinkIDs( $setType ); + if ( is_array( $ids ) ) { + return $ids; + } else { + return iterator_to_array( $ids ); + } + } +} diff --git a/includes/deferred/LinksUpdate/GenericPageLinksTable.php b/includes/deferred/LinksUpdate/GenericPageLinksTable.php new file mode 100644 index 000000000000..acf06e630ae0 --- /dev/null +++ b/includes/deferred/LinksUpdate/GenericPageLinksTable.php @@ -0,0 +1,136 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\DAO\WikiAwareEntity; +use MediaWiki\Page\PageReferenceValue; +use Title; + +/** + * Shared code for pagelinks and templatelinks. They are very similar tables + * since they both link to an arbitrary page identified by namespace and title. + * + * Link ID format: string[]: + * - 0: namespace ID + * - 1: title DB key + * + * @since 1.38 + */ +abstract class GenericPageLinksTable extends TitleLinksTable { + /** + * A 2d array representing the new links, with the namespace ID in the + * first key, the DB key in the second key, and the value arbitrary. + * + * @var array + */ + protected $newLinks = []; + + /** + * The existing links in the same format as self::$newLinks, or null if it + * has not been loaded yet. + * + * @var array|null + */ + private $existingLinks; + + /** + * Get the namespace field name + * + * @return string + */ + abstract protected function getNamespaceField(); + + /** + * Get the title (DB key) field name + * + * @return string + */ + abstract protected function getTitleField(); + + /** + * @return mixed + */ + abstract protected function getFromNamespaceField(); + + protected function getExistingFields() { + return [ + 'ns' => $this->getNamespaceField(), + 'title' => $this->getTitleField() + ]; + } + + /** + * Get existing links as an associative array + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->ns][$row->title] = 1; + } + } + + return $this->existingLinks; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $ns => $links ) { + foreach ( $links as $dbk => $unused ) { + yield [ $ns, $dbk ]; + } + } + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $ns => $links ) { + foreach ( $links as $dbk => $unused ) { + yield [ $ns, $dbk ]; + } + } + } + + protected function isExisting( $linkId ) { + [ $ns, $dbk ] = $linkId; + return isset( $this->getExistingLinks()[$ns][$dbk] ); + } + + protected function isInNewSet( $linkId ) { + [ $ns, $dbk ] = $linkId; + return isset( $this->newLinks[$ns][$dbk] ); + } + + protected function insertLink( $linkId ) { + $this->insertRow( [ + $this->getFromNamespaceField() => $this->getSourcePage()->getNamespace(), + $this->getNamespaceField() => $linkId[0], + $this->getTitleField() => $linkId[1] + ] ); + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ + $this->getNamespaceField() => $linkId[0], + $this->getTitleField() => $linkId[1] + ] ); + } + + protected function makePageReferenceValue( $linkId ): PageReferenceValue { + return new PageReferenceValue( $linkId[0], $linkId[1], WikiAwareEntity::LOCAL ); + } + + protected function makeTitle( $linkId ): Title { + return Title::makeTitle( $linkId[0], $linkId[1] ); + } + + protected function deduplicateLinkIds( $linkIds ) { + $seen = []; + foreach ( $linkIds as $linkId ) { + if ( !isset( $seen[$linkId[0]][$linkId[1]] ) ) { + $seen[$linkId[0]][$linkId[1]] = true; + yield $linkId; + } + } + } +} diff --git a/includes/deferred/LinksUpdate/ImageLinksTable.php b/includes/deferred/LinksUpdate/ImageLinksTable.php new file mode 100644 index 000000000000..bc0adcdb6a13 --- /dev/null +++ b/includes/deferred/LinksUpdate/ImageLinksTable.php @@ -0,0 +1,138 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ChangeTags; +use MediaWiki\DAO\WikiAwareEntity; +use MediaWiki\Page\PageReferenceValue; +use ParserOutput; +use PurgeJobUtils; +use Title; + +/** + * imagelinks + * + * Link ID format: string image name + * + * @since 1.38 + */ +class ImageLinksTable extends TitleLinksTable { + /** + * @var array New links with the name in the key, value arbitrary + */ + private $newLinks; + + /** + * @var array Existing links with the name in the key, value arbitrary + */ + private $existingLinks; + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getImages(); + } + + protected function getTableName() { + return 'imagelinks'; + } + + protected function getFromField() { + return 'il_from'; + } + + protected function getExistingFields() { + return [ 'il_to' ]; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $link => $unused ) { + yield (string)$link; + } + } + + /** + * Get existing links with the name in the key, value arbitrary. + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->il_to] = true; + } + } + return $this->existingLinks; + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $link => $unused ) { + yield $link; + } + } + + protected function isExisting( $linkId ) { + return \array_key_exists( $linkId, $this->getExistingLinks() ); + } + + protected function isInNewSet( $linkId ) { + return \array_key_exists( $linkId, $this->newLinks ); + } + + protected function insertLink( $linkId ) { + $this->insertRow( [ + 'il_from_namespace' => $this->getSourcePage()->getNamespace(), + 'il_to' => $linkId + ] ); + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ 'il_to' => $linkId ] ); + } + + protected function makePageReferenceValue( $linkId ): PageReferenceValue { + return new PageReferenceValue( NS_FILE, $linkId, WikiAwareEntity::LOCAL ); + } + + protected function makeTitle( $linkId ): Title { + return Title::makeTitle( NS_FILE, $linkId ); + } + + protected function deduplicateLinkIds( $linkIds ) { + if ( !is_array( $linkIds ) ) { + $linkIds = iterator_to_array( $linkIds ); + } + return array_unique( $linkIds ); + } + + protected function finishUpdate() { + $this->updateChangeTags(); + $this->invalidateImageDescriptions(); + } + + /** + * Add the mw-add-media or mw-remove-media change tags to the edit if appropriate + */ + private function updateChangeTags() { + $enabledTags = ChangeTags::getSoftwareTags(); + $mediaChangeTags = []; + if ( count( $this->insertedLinks ) && in_array( 'mw-add-media', $enabledTags ) ) { + $mediaChangeTags[] = 'mw-add-media'; + } + if ( count( $this->deletedLinks ) && in_array( 'mw-remove-media', $enabledTags ) ) { + $mediaChangeTags[] = 'mw-remove-media'; + } + $revisionRecord = $this->getRevision(); + if ( $revisionRecord && count( $mediaChangeTags ) ) { + ChangeTags::addTags( $mediaChangeTags, null, $revisionRecord->getId() ); + } + } + + /** + * Invalidate all image description pages which had links added or removed + */ + private function invalidateImageDescriptions() { + PurgeJobUtils::invalidatePages( + $this->getDB(), NS_FILE, + array_merge( $this->insertedLinks, $this->deletedLinks ) ); + } +} diff --git a/includes/deferred/LinksUpdate/InterwikiLinksTable.php b/includes/deferred/LinksUpdate/InterwikiLinksTable.php new file mode 100644 index 000000000000..a5b4f06099b5 --- /dev/null +++ b/includes/deferred/LinksUpdate/InterwikiLinksTable.php @@ -0,0 +1,97 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ParserOutput; + +/** + * iwlinks + * + * Link ID format: string[] + * - 0: Interwiki prefix + * - 1: Foreign title + * + * @since 1.38 + */ +class InterwikiLinksTable extends LinksTable { + /** @var array */ + private $newLinks = []; + + /** @var array|null */ + private $existingLinks; + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getInterwikiLinks(); + } + + protected function getTableName() { + return 'iwlinks'; + } + + protected function getFromField() { + return 'iwl_from'; + } + + protected function getExistingFields() { + return [ 'iwl_prefix', 'iwl_title' ]; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $prefix => $links ) { + foreach ( $links as $title => $unused ) { + yield [ $prefix, $title ]; + } + } + } + + /** + * Get the existing links as a 2-d array, with the prefix in the first key, + * the title in the second key, and the value arbitrary. + * + * @return array|null + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->iwl_prefix][$row->iwl_title] = true; + } + } + return $this->existingLinks; + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $prefix => $links ) { + foreach ( $links as $title => $unused ) { + yield [ $prefix, $title ]; + } + } + } + + protected function isExisting( $linkId ) { + $links = $this->getExistingLinks(); + [ $prefix, $title ] = $linkId; + return isset( $links[$prefix][$title] ); + } + + protected function isInNewSet( $linkId ) { + [ $prefix, $title ] = $linkId; + return isset( $this->newLinks[$prefix][$title] ); + } + + protected function insertLink( $linkId ) { + [ $prefix, $title ] = $linkId; + $this->insertRow( [ + 'iwl_prefix' => $prefix, + 'iwl_title' => $title + ] ); + } + + protected function deleteLink( $linkId ) { + [ $prefix, $title ] = $linkId; + $this->deleteRow( [ + 'iwl_prefix' => $prefix, + 'iwl_title' => $title + ] ); + } +} diff --git a/includes/deferred/LinksUpdate/LangLinksTable.php b/includes/deferred/LinksUpdate/LangLinksTable.php new file mode 100644 index 000000000000..09722338d58f --- /dev/null +++ b/includes/deferred/LinksUpdate/LangLinksTable.php @@ -0,0 +1,99 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ParserOutput; + +/** + * langlinks + * + * Link ID format: string[] + * - 0: Language code + * - 1: Foreign title + * + * @since 1.38 + */ +class LangLinksTable extends LinksTable { + private $newLinks = []; + private $existingLinks; + + public function setParserOutput( ParserOutput $parserOutput ) { + // Convert the format of the interlanguage links + // I didn't want to change it in the ParserOutput, because that array is passed all + // the way back to the skin, so either a skin API break would be required, or an + // inefficient back-conversion. + $ill = $parserOutput->getLanguageLinks(); + $this->newLinks = []; + foreach ( $ill as $link ) { + [ $key, $title ] = explode( ':', $link, 2 ); + $this->newLinks[$key] = $title; + } + } + + protected function getTableName() { + return 'langlinks'; + } + + protected function getFromField() { + return 'll_from'; + } + + protected function getExistingFields() { + return [ 'll_lang', 'll_title' ]; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $key => $title ) { + yield [ $key, $title ]; + } + } + + /** + * Get the existing links as an array where the key is the language code + * and the value is the title of the target in that language. + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->ll_lang] = $row->ll_title; + } + } + return $this->existingLinks; + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $lang => $title ) { + yield [ $lang, $title ]; + } + } + + protected function isExisting( $linkId ) { + $links = $this->getExistingLinks(); + [ $lang, $title ] = $linkId; + return \array_key_exists( $lang, $links ) + && $links[$lang] === $title; + } + + protected function isInNewSet( $linkId ) { + [ $lang, $title ] = $linkId; + return \array_key_exists( $lang, $this->newLinks ) + && $this->newLinks[$lang] === $title; + } + + protected function insertLink( $linkId ) { + [ $lang, $title ] = $linkId; + $this->insertRow( [ + 'll_lang' => $lang, + 'll_title' => $title + ] ); + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ + 'll_lang' => $linkId[0] + ] ); + } +} diff --git a/includes/deferred/LinksUpdate/LinksTable.php b/includes/deferred/LinksUpdate/LinksTable.php new file mode 100644 index 000000000000..c32893220450 --- /dev/null +++ b/includes/deferred/LinksUpdate/LinksTable.php @@ -0,0 +1,472 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\Page\PageIdentity; +use MediaWiki\Revision\RevisionRecord; +use ParserOutput; +use Wikimedia\Rdbms\IDatabase; +use Wikimedia\Rdbms\IResultWrapper; +use Wikimedia\Rdbms\LBFactory; + +/** + * The base class for classes which update a single link table. + * + * A LinksTable object is a container for new and existing link sets outbound + * from a single page, and an abstraction of the associated DB schema. The + * object stores state related to an update of the outbound links of a page. + * + * Explanation of link ID concept + * ------------------------------ + * + * Link IDs identify a link in the new or old state, or in the change arrays. + * They are opaque to the base class and are type-hinted here as mixed. + * + * Conventionally, the link ID is string|string[] and contains the link target + * fields. + * + * The link ID should contain enough information so that the base class can + * tell whether an existing link is in the new set, or vice versa, for the + * purposes of incremental updates. If a change to a field would cause a DB + * update, the field should be in the link ID. + * + * For example, a change to cl_timestamp does not trigger an update, so + * cl_timestamp is not in the link ID. + * + * @stable to extend + * @since 1.38 + */ +abstract class LinksTable { + /** Link type: Inserted (added) links */ + public const INSERTED = 1; + + /** Link type: Deleted (removed) links */ + public const DELETED = 2; + + /** Link type: Changed (inserted or removed) links */ + public const CHANGED = 3; + + /** Link type: existing/old links */ + public const OLD = 4; + + /** Link type: new links (from the ParserOutput) */ + public const NEW = 5; + + /** + * Rows to delete. An array of associative arrays, each associative array + * being the conditions for a delete query. Common conditions should be + * leftmost in the associative array so that they can be factored out. + * + * @var array + */ + protected $rowsToDelete = []; + + /** + * Rows to insert. An array of associative arrays, each associative array + * mapping field names to values. + * + * @var array + */ + protected $rowsToInsert = []; + + /** @var array Link IDs for inserted links */ + protected $insertedLinks = []; + + /** @var array Link IDs for deleted links */ + protected $deletedLinks = []; + + /** @var LBFactory */ + private $lbFactory; + + /** @var IDatabase */ + private $db; + + /** @var PageIdentity */ + private $sourcePage; + + /** @var int */ + private $batchSize; + + /** @var mixed */ + private $ticket; + + /** @var RevisionRecord */ + private $revision; + + /** @var callable|null Callback for deprecated hook */ + private $afterUpdateHook; + + /** @var bool */ + protected $strictTestMode; + + /** + * This is called by the factory to inject dependencies for the base class. + * This is used instead of the constructor so that changes can be made to + * the injected parameters without breaking the subclass constructors. + * + * @param LBFactory $lbFactory + * @param PageIdentity $sourcePage + * @param int $batchSize + * @param callable|null $afterUpdateHook + */ + final public function injectBaseDependencies( + LBFactory $lbFactory, + PageIdentity $sourcePage, + $batchSize, + $afterUpdateHook + ) { + $this->lbFactory = $lbFactory; + $this->db = $this->lbFactory->getMainLB()->getConnection( DB_PRIMARY ); + $this->sourcePage = $sourcePage; + $this->batchSize = $batchSize; + $this->afterUpdateHook = $afterUpdateHook; + } + + /** + * Set the empty transaction ticket + * + * @param mixed $ticket + */ + public function setTransactionTicket( $ticket ) { + $this->ticket = $ticket; + } + + /** + * Set the revision associated with the edit. + * + * @param RevisionRecord $revision + */ + public function setRevision( RevisionRecord $revision ) { + $this->revision = $revision; + } + + /** + * Subclasses should implement this to extract the data they need from the + * ParserOutput. + * + * To support a future refactor of LinksDeletionUpdate, if this method is + * not called, the subclass should assume that the new state is empty. + * + * @param ParserOutput $parserOutput + */ + abstract public function setParserOutput( ParserOutput $parserOutput ); + + /** + * Get the table name. + * + * @return string + */ + abstract protected function getTableName(); + + /** + * Get the name of the field which links to page_id. + * + * @return string + */ + abstract protected function getFromField(); + + /** + * Get the fields to be used in fetchExistingRows(). Note that + * fetchExistingRows() is just a helper for subclasses. The value returned + * here is effectively private to the subclass. + * + * @return array + */ + abstract protected function getExistingFields(); + + /** + * Get an array (or iterator) of link IDs for the new state. + * + * See the LinksTable doc comment for an explanation of link IDs. + * + * @return iterable<mixed> + */ + abstract protected function getNewLinkIDs(); + + /** + * Get an array (or iterator) of link IDs for the existing state. The + * subclass should load the data from the database. There is + * fetchExistingRows() to make this easier but the subclass is responsible + * for caching. + * + * See the LinksTable doc comment for an explanation of link IDs. + * + * @return iterable<mixed> + */ + abstract protected function getExistingLinkIDs(); + + /** + * Determine whether a link (from the new set) is in the existing set. + * + * @param mixed $linkId + * @return bool + */ + abstract protected function isExisting( $linkId ); + + /** + * Determine whether a link (from the existing set) is in the new set. + * + * @param mixed $linkId + * @return bool + */ + abstract protected function isInNewSet( $linkId ); + + /** + * Insert a link identified by ID. The subclass is expected to queue the + * insertion by calling insertRow(). + * + * @param mixed $linkId + */ + abstract protected function insertLink( $linkId ); + + /** + * Delete a link identified by ID. The subclass is expected to queue the + * deletion by calling deleteRow(). + * + * @param mixed $linkId + */ + abstract protected function deleteLink( $linkId ); + + /** + * @stable to override + * @return IDatabase + */ + protected function getDB(): IDatabase { + return $this->db; + } + + /** + * @return LBFactory + */ + protected function getLBFactory(): LBFactory { + return $this->lbFactory; + } + + /** + * Get the page_id of the source page + * + * @return int + */ + protected function getSourcePageId(): int { + return $this->sourcePage->getId(); + } + + /** + * Get the source page, i.e. the page which is being updated and is the + * source of links. + * + * @return PageIdentity + */ + protected function getSourcePage(): PageIdentity { + return $this->sourcePage; + } + + /** + * Get the maximum number of rows to update in a batch. + * + * @return int + */ + protected function getBatchSize(): int { + return $this->batchSize; + } + + /** + * Get the empty transaction ticket, or null if there is none. + * + * @return mixed + */ + protected function getTransactionTicket() { + return $this->ticket; + } + + /** + * Get the RevisionRecord of the new revision, if the LinksUpdate caller + * injected one. + * + * @return RevisionRecord|null + */ + protected function getRevision(): ?RevisionRecord { + return $this->revision; + } + + /** + * Get field=>value associative array for the from field(s) + * + * @stable to override + * @return array + */ + protected function getFromConds() { + return [ $this->getFromField() => $this->getSourcePageId() ]; + } + + /** + * Do a select query to fetch the existing rows. This is a helper for + * subclasses. + * + * @return IResultWrapper + */ + protected function fetchExistingRows(): IResultWrapper { + return $this->getDB()->newSelectQueryBuilder() + ->select( $this->getExistingFields() ) + ->from( $this->getTableName() ) + ->where( $this->getFromConds() ) + ->caller( __METHOD__ ) + ->fetchResultSet(); + } + + /** + * Execute the update + */ + final public function update() { + $this->startUpdate(); + foreach ( $this->getNewLinkIDs() as $link ) { + if ( !$this->isExisting( $link ) ) { + $this->insertLink( $link ); + $this->insertedLinks[] = $link; + } + } + + foreach ( $this->getExistingLinkIDs() as $link ) { + if ( !$this->isInNewSet( $link ) ) { + $this->deleteLink( $link ); + $this->deletedLinks[] = $link; + } + } + $this->doWrites(); + $this->finishUpdate(); + } + + /** + * Queue a row for insertion. Subclasses are expected to call this from + * insertLink(). The "from" field should not be included in the row. + * + * @param array $row Associative array mapping fields to values. + */ + protected function insertRow( $row ) { + $row += $this->getFromConds(); + $this->rowsToInsert[] = $row; + } + + /** + * Queue a deletion operation. Subclasses are expected to call this from + * deleteLink(). The "from" field does not need to be included in the + * conditions. + * + * Most often, the conditions match a single row, but this is not required. + * + * @param array $conds Associative array mapping fields to values, + * specifying the conditions for a delete query. + */ + protected function deleteRow( $conds ) { + // Put the "from" field leftmost, so it can be factored out + $conds = $this->getFromConds() + $conds; + $this->rowsToDelete[] = $conds; + } + + /** + * Subclasses can override this to do any necessary setup before individual + * write operations begin. + * + * @stable to override + */ + protected function startUpdate() { + } + + /** + * Subclasses can override this to do any updates associated with their + * link data, for example dispatching HTML update jobs. + * + * @stable to override + */ + protected function finishUpdate() { + } + + /** + * Do the common DB operations + */ + protected function doWrites() { + $db = $this->getDB(); + $table = $this->getTableName(); + $domainId = $db->getDomainID(); + $batchSize = $this->getBatchSize(); + $ticket = $this->getTransactionTicket(); + + foreach ( array_chunk( $this->rowsToDelete, $batchSize ) as $chunk ) { + $factoredConds = $db->factorConds( $chunk ); + $db->delete( + $table, + $factoredConds, + __METHOD__ + ); + $this->lbFactory->commitAndWaitForReplication( + __METHOD__, $ticket, [ 'domain' => $domainId ] + ); + } + + $insertBatches = array_chunk( $this->rowsToInsert, $batchSize ); + foreach ( $insertBatches as $insertBatch ) { + $db->insert( $table, $insertBatch, __METHOD__, $this->getInsertOptions() ); + $this->lbFactory->commitAndWaitForReplication( + __METHOD__, $ticket, [ 'domain' => $domainId ] + ); + } + + if ( count( $this->rowsToInsert ) && $this->afterUpdateHook ) { + ( $this->afterUpdateHook )( $table, $this->rowsToInsert ); + } + } + + /** + * Omit conflict resolution options from the insert query so that testing + * can confirm that the incremental update logic was correct. + * + * @param bool $mode + */ + public function setStrictTestMode( $mode = true ) { + $this->strictTestMode = $mode; + } + + /** + * Get the options for the insert queries + * + * @return array + */ + protected function getInsertOptions() { + if ( $this->strictTestMode ) { + return []; + } else { + return [ 'IGNORE' ]; + } + } + + /** + * Get an array or iterator of link IDs of a given type. Some subclasses + * use this to provide typed data to callers. This is not public because + * link IDs are a private concept. + * + * @param int $setType One of the class constants: self::INSERTED, self::DELETED, + * self::CHANGED, self::OLD or self::NEW. + * @return iterable<mixed> + */ + protected function getLinkIDs( $setType ) { + switch ( $setType ) { + case self::INSERTED: + return $this->insertedLinks; + + case self::DELETED: + return $this->deletedLinks; + + case self::CHANGED: + return array_merge( $this->insertedLinks, $this->deletedLinks ); + + case self::OLD: + return $this->getExistingLinkIDs(); + + case self::NEW: + return $this->getNewLinkIDs(); + + default: + throw new \InvalidArgumentException( __METHOD__ . ": Unknown link type" ); + } + } +} diff --git a/includes/deferred/LinksUpdate/LinksTableGroup.php b/includes/deferred/LinksUpdate/LinksTableGroup.php new file mode 100644 index 000000000000..850a12e3fc33 --- /dev/null +++ b/includes/deferred/LinksUpdate/LinksTableGroup.php @@ -0,0 +1,218 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\Config\ServiceOptions; +use MediaWiki\MediaWikiServices; +use MediaWiki\Page\PageIdentity; +use MediaWiki\Revision\RevisionRecord; +use ParserOutput; +use Wikimedia\ObjectFactory\ObjectFactory; +use Wikimedia\Rdbms\LBFactory; + +/** + * @since 1.38 + */ +class LinksTableGroup { + /** + * ObjectFactory specifications for the subclasses. The following + * additional keys are defined: + * + * - serviceOptions: An array of configuration variable names. If this is + * set, the specified configuration will be sent to the subclass + * constructor as a ServiceOptions object. + */ + private const CORE_LIST = [ + 'categorylinks' => [ + 'class' => CategoryLinksTable::class, + 'services' => [ + 'LanguageConverterFactory', + 'CollationFactory', + 'NamespaceInfo', + 'WikiPageFactory' + ] + ], + 'externallinks' => [ + 'class' => ExternalLinksTable::class + ], + 'imagelinks' => [ + 'class' => ImageLinksTable::class + ], + 'iwlinks' => [ + 'class' => InterwikiLinksTable::class + ], + 'langlinks' => [ + 'class' => LangLinksTable::class + ], + 'pagelinks' => [ + 'class' => PageLinksTable::class + ], + 'page_props' => [ + 'class' => PagePropsTable::class, + 'services' => [ + 'JobQueueGroup' + ], + 'serviceOptions' => PagePropsTable::CONSTRUCTOR_OPTIONS + ], + 'templatelinks' => [ + 'class' => TemplateLinksTable::class + ] + ]; + + /** @var ObjectFactory */ + private $objectFactory; + + /** @var LBFactory */ + private $lbFactory; + + /** @var PageIdentity */ + private $page; + + /** @var ParserOutput|null */ + private $parserOutput; + + /** @var int */ + private $batchSize; + + /** @var callable|null */ + private $afterUpdateHook; + + /** @var mixed */ + private $ticket; + + /** @var RevisionRecord|null */ + private $revision; + + /** @var LinksTable[] */ + private $tables = []; + + /** + * @param ObjectFactory $objectFactory + * @param LBFactory $lbFactory + * @param PageIdentity $page + * @param int $batchSize + * @param callable|null $afterUpdateHook + */ + public function __construct( + ObjectFactory $objectFactory, + LBFactory $lbFactory, + PageIdentity $page, + $batchSize, + $afterUpdateHook + ) { + $this->objectFactory = $objectFactory; + $this->lbFactory = $lbFactory; + $this->page = $page; + $this->batchSize = $batchSize; + $this->afterUpdateHook = $afterUpdateHook; + } + + /** + * Set the ParserOutput object to be used in new and existing objects. + * + * @param ParserOutput $parserOutput + */ + public function setParserOutput( ParserOutput $parserOutput ) { + $this->parserOutput = $parserOutput; + foreach ( $this->tables as $table ) { + $table->setParserOutput( $parserOutput ); + } + } + + /** + * Set the transaction ticket to be used in new and existing objects. + * + * @param mixed $ticket + */ + public function setTransactionTicket( $ticket ) { + $this->ticket = $ticket; + foreach ( $this->tables as $table ) { + $table->setTransactionTicket( $ticket ); + } + } + + /** + * Set the revision to be used in new and existing objects. + * + * @param RevisionRecord $revision + */ + public function setRevision( RevisionRecord $revision ) { + $this->revision = $revision; + foreach ( $this->tables as $table ) { + $table->setRevision( $revision ); + } + } + + /** + * Set the strict test mode + * + * @param bool $mode + */ + public function setStrictTestMode( $mode = true ) { + foreach ( $this->getAll() as $table ) { + $table->setStrictTestMode( $mode ); + } + } + + /** + * Get the spec array for a given table. + * + * @param string $tableName + * @return array + */ + private function getSpec( $tableName ) { + if ( !isset( self::CORE_LIST[$tableName] ) ) { + throw new \InvalidArgumentException( + __CLASS__ . ": unknown table name \"$tableName\"" ); + } + return self::CORE_LIST[$tableName]; + } + + /** + * Get a LinksTable for a given table. + * + * @param string $tableName + * @return LinksTable + */ + public function get( $tableName ) { + if ( !isset( $this->tables[$tableName] ) ) { + $spec = $this->getSpec( $tableName ); + if ( isset( $spec['serviceOptions'] ) ) { + $config = MediaWikiServices::getInstance()->getMainConfig(); + $extraArgs = [ new ServiceOptions( $spec['serviceOptions'], $config ) ]; + unset( $spec['serviceOptions'] ); + } else { + $extraArgs = []; + } + /** @var LinksTable $table */ + $table = $this->objectFactory->createObject( $spec, [ 'extraArgs' => $extraArgs ] ); + $table->injectBaseDependencies( + $this->lbFactory, + $this->page, + $this->batchSize, + $this->afterUpdateHook + ); + if ( $this->parserOutput ) { + $table->setParserOutput( $this->parserOutput ); + } + if ( $this->ticket ) { + $table->setTransactionTicket( $this->ticket ); + } + if ( $this->revision ) { + $table->setRevision( $this->revision ); + } + $this->tables[$tableName] = $table; + } + return $this->tables[$tableName]; + } + + /** + * Get LinksTable objects for all known links tables. + * @return iterable<LinksTable> + */ + public function getAll() { + foreach ( self::CORE_LIST as $tableName => $spec ) { + yield $this->get( $tableName ); + } + } +} diff --git a/includes/deferred/LinksUpdate/PageLinksTable.php b/includes/deferred/LinksUpdate/PageLinksTable.php new file mode 100644 index 000000000000..15a21a55f0a1 --- /dev/null +++ b/includes/deferred/LinksUpdate/PageLinksTable.php @@ -0,0 +1,34 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ParserOutput; + +/** + * pagelinks + */ +class PageLinksTable extends GenericPageLinksTable { + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getLinks(); + } + + protected function getTableName() { + return 'pagelinks'; + } + + protected function getFromField() { + return 'pl_from'; + } + + protected function getNamespaceField() { + return 'pl_namespace'; + } + + protected function getTitleField() { + return 'pl_title'; + } + + protected function getFromNamespaceField() { + return 'pl_from_namespace'; + } +} diff --git a/includes/deferred/LinksUpdate/PagePropsTable.php b/includes/deferred/LinksUpdate/PagePropsTable.php new file mode 100644 index 000000000000..ca7097d16c2b --- /dev/null +++ b/includes/deferred/LinksUpdate/PagePropsTable.php @@ -0,0 +1,190 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use HTMLCacheUpdateJob; +use JobQueueGroup; +use MediaWiki\Config\ServiceOptions; +use ParserOutput; + +/** + * page_props + * + * Link ID format: string[] + * 0: Property name (pp_propname) + * 1: Property value (pp_value) + * + * @since 1.38 + */ +class PagePropsTable extends LinksTable { + /** @var JobQueueGroup */ + private $jobQueueGroup; + + /** @var array */ + private $newProps = []; + + /** @var array|null */ + private $existingProps; + + /** + * The configured PagePropLinkInvalidations. An associative array where the + * key is the property name and the value is a string or array of strings + * giving the link table names which will be used for backlink cache + * invalidation. + * + * @var array + */ + private $linkInvalidations; + + public const CONSTRUCTOR_OPTIONS = [ 'PagePropLinkInvalidations' ]; + + public function __construct( + ServiceOptions $options, + JobQueueGroup $jobQueueGroup + ) { + $this->jobQueueGroup = $jobQueueGroup; + $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); + $this->linkInvalidations = $options->get( 'PagePropLinkInvalidations' ); + } + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newProps = $parserOutput->getPageProperties(); + } + + protected function getTableName() { + return 'page_props'; + } + + protected function getFromField() { + return 'pp_page'; + } + + protected function getExistingFields() { + return [ 'pp_propname', 'pp_value' ]; + } + + protected function getNewLinkIDs() { + foreach ( $this->newProps as $name => $value ) { + yield [ $name, $value ]; + } + } + + /** + * Get the existing page_props as an associative array + * + * @return array + */ + private function getExistingProps() { + if ( $this->existingProps === null ) { + $this->existingProps = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingProps[$row->pp_propname] = $row->pp_value; + } + } + return $this->existingProps; + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingProps() as $name => $value ) { + yield [ $name, $value ]; + } + } + + protected function isExisting( $linkId ) { + $existing = $this->getExistingProps(); + [ $name, $value ] = $linkId; + return \array_key_exists( $name, $existing ) + && $existing[$name] === $value; + } + + protected function isInNewSet( $linkId ) { + [ $name, $value ] = $linkId; + return \array_key_exists( $name, $this->newProps ) + && $this->newProps[$name] === $value; + } + + protected function insertLink( $linkId ) { + [ $name, $value ] = $linkId; + $this->insertRow( [ + 'pp_propname' => $name, + 'pp_value' => $value, + 'pp_sortkey' => $this->getPropertySortKeyValue( $value ) + ] ); + } + + /** + * Determines the sort key for the given property value. + * This will return $value if it is a float or int, + * 1 or resp. 0 if it is a bool, and null otherwise. + * + * @note In the future, we may allow the sortkey to be specified explicitly + * in ParserOutput::setProperty. + * + * @param mixed $value + * + * @return float|null + */ + private function getPropertySortKeyValue( $value ) { + if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { + return floatval( $value ); + } + + return null; + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ + 'pp_propname' => $linkId[0] + ] ); + } + + protected function finishUpdate() { + $changed = array_unique( array_merge( + array_column( $this->insertedLinks, 0 ), + array_column( $this->deletedLinks, 0 ) ) ); + $this->invalidateProperties( $changed ); + } + + /** + * Invalidate the properties given the list of changed property names + * + * @param string[] $changed + */ + private function invalidateProperties( array $changed ) { + $jobs = []; + foreach ( $changed as $name ) { + if ( isset( $this->linkInvalidations[$name] ) ) { + $inv = $this->linkInvalidations[$name]; + if ( !is_array( $inv ) ) { + $inv = [ $inv ]; + } + foreach ( $inv as $table ) { + $jobs[] = HTMLCacheUpdateJob::newForBacklinks( + $this->getSourcePage(), + $table, + [ 'causeAction' => 'page-props' ] + ); + } + } + } + + if ( $jobs ) { + $this->jobQueueGroup->lazyPush( $jobs ); + } + } + + /** + * Get the properties for a given link set as an associative array + * + * @param int $setType The set type as in LinksTable::getLinkIDs() + * @return array + */ + public function getAssocArray( $setType ) { + $props = []; + foreach ( $this->getLinkIDs( $setType ) as $linkId ) { + [ $name, $value ] = $linkId; + $props[$name] = $value; + } + return $props; + } +} diff --git a/includes/deferred/LinksUpdate/TemplateLinksTable.php b/includes/deferred/LinksUpdate/TemplateLinksTable.php new file mode 100644 index 000000000000..0cc655fc517c --- /dev/null +++ b/includes/deferred/LinksUpdate/TemplateLinksTable.php @@ -0,0 +1,36 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ParserOutput; + +/** + * templatelinks + * + * @since 1.38 + */ +class TemplateLinksTable extends GenericPageLinksTable { + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getTemplates(); + } + + protected function getTableName() { + return 'templatelinks'; + } + + protected function getFromField() { + return 'tl_from'; + } + + protected function getNamespaceField() { + return 'tl_namespace'; + } + + protected function getTitleField() { + return 'tl_title'; + } + + protected function getFromNamespaceField() { + return 'tl_from_namespace'; + } +} diff --git a/includes/deferred/LinksUpdate/TitleLinksTable.php b/includes/deferred/LinksUpdate/TitleLinksTable.php new file mode 100644 index 000000000000..8a28638d8ced --- /dev/null +++ b/includes/deferred/LinksUpdate/TitleLinksTable.php @@ -0,0 +1,88 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\Page\PageReferenceValue; +use Title; + +/** + * An abstract base class for tables that link to local titles. + * + * @stable to extend + * @since 1.38 + */ +abstract class TitleLinksTable extends LinksTable { + /** + * Convert a link ID to a PageReferenceValue + * + * @param mixed $linkId + * @return PageReferenceValue + */ + abstract protected function makePageReferenceValue( $linkId ): PageReferenceValue; + + /** + * Convert a link ID to a Title + * + * @stable to override + * @param mixed $linkId + * @return Title + */ + protected function makeTitle( $linkId ): Title { + return Title::castFromPageReference( $this->makePageReferenceValue( $linkId ) ); + } + + /** + * Given an iterator over link IDs, remove links which go to the same + * title, leaving only one link per title. + * + * @param iterable<mixed> $linkIds + * @return iterable<mixed> + */ + abstract protected function deduplicateLinkIds( $linkIds ); + + /** + * Get link IDs for a given set type, filtering out duplicate links to the + * same title. + * + * @param int $setType + * @return iterable<mixed> + */ + protected function getDeduplicatedLinkIds( $setType ) { + $linkIds = $this->getLinkIDs( $setType ); + // Only the CHANGED set type should have duplicates + if ( $setType === self::CHANGED ) { + $linkIds = $this->deduplicateLinkIds( $linkIds ); + } + return $linkIds; + } + + /** + * Get a link set as an array of Title objects. This is memory-inefficient. + * + * @deprecated since 1.38 + * @param int $setType + * @return Title[] + */ + public function getTitleArray( $setType ) { + $linkIds = $this->getDeduplicatedLinkIds( $setType ); + $titles = []; + foreach ( $linkIds as $linkId ) { + $titles[] = $this->makeTitle( $linkId ); + } + return $titles; + } + + /** + * Get a link set as an iterator over PageReferenceValue objects. + * + * @param int $setType + * @return iterable<PageReferenceValue> + * @phan-return \Traversable + */ + public function getPageReferenceIterator( $setType ) { + $linkIds = $this->getDeduplicatedLinkIds( $setType ); + foreach ( $linkIds as $linkId ) { + yield $this->makePageReferenceValue( $linkId ); + } + } +} diff --git a/tests/phpunit/includes/deferred/LinksDeletionUpdateTest.php b/tests/phpunit/includes/deferred/LinksDeletionUpdateTest.php index 59c6cdeff91f..82db3d71ba6b 100644 --- a/tests/phpunit/includes/deferred/LinksDeletionUpdateTest.php +++ b/tests/phpunit/includes/deferred/LinksDeletionUpdateTest.php @@ -3,6 +3,18 @@ /** * @covers LinksDeletionUpdate * @covers LinksUpdate + * @covers \MediaWiki\Deferred\LinksUpdate\CategoryLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\ExternalLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\GenericPageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\ImageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\InterwikiLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LangLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LinksTableGroup + * @covers \MediaWiki\Deferred\LinksUpdate\PageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\PagePropsTable + * @covers \MediaWiki\Deferred\LinksUpdate\TemplateLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\TitleLinksTable * * @group LinksUpdate * @group Database diff --git a/tests/phpunit/includes/deferred/LinksUpdateTest.php b/tests/phpunit/includes/deferred/LinksUpdateTest.php index 9d090824b4cc..db3efd269ce4 100644 --- a/tests/phpunit/includes/deferred/LinksUpdateTest.php +++ b/tests/phpunit/includes/deferred/LinksUpdateTest.php @@ -5,6 +5,18 @@ use Wikimedia\TestingAccessWrapper; /** * @covers LinksUpdate + * @covers \MediaWiki\Deferred\LinksUpdate\CategoryLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\ExternalLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\GenericPageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\ImageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\InterwikiLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LangLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LinksTableGroup + * @covers \MediaWiki\Deferred\LinksUpdate\PageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\PagePropsTable + * @covers \MediaWiki\Deferred\LinksUpdate\TemplateLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\TitleLinksTable * * @group LinksUpdate * @group Database |