summaryrefslogtreecommitdiff
path: root/includes/site/MediaWikiSite.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/site/MediaWikiSite.php')
-rw-r--r--includes/site/MediaWikiSite.php352
1 files changed, 352 insertions, 0 deletions
diff --git a/includes/site/MediaWikiSite.php b/includes/site/MediaWikiSite.php
new file mode 100644
index 00000000..05092723
--- /dev/null
+++ b/includes/site/MediaWikiSite.php
@@ -0,0 +1,352 @@
+<?php
+/**
+ * Class representing a MediaWiki site.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Site
+ * @license GNU GPL v2+
+ * @author John Erling Blad < jeblad@gmail.com >
+ * @author Daniel Kinzler
+ * @author Jeroen De Dauw < jeroendedauw@gmail.com >
+ */
+
+/**
+ * Class representing a MediaWiki site.
+ *
+ * @since 1.21
+ *
+ * @ingroup Site
+ */
+class MediaWikiSite extends Site {
+
+ const PATH_FILE = 'file_path';
+ const PATH_PAGE = 'page_path';
+
+ /**
+ * @since 1.21
+ * @deprecated Just use the constructor or the factory Site::newForType
+ *
+ * @param integer $globalId
+ *
+ * @return MediaWikiSite
+ */
+ public static function newFromGlobalId( $globalId ) {
+ $site = new static();
+ $site->setGlobalId( $globalId );
+ return $site;
+ }
+
+ /**
+ * Constructor.
+ *
+ * @since 1.21
+ *
+ * @param string $type
+ */
+ public function __construct( $type = self::TYPE_MEDIAWIKI ) {
+ parent::__construct( $type );
+ }
+
+ /**
+ * Returns the database form of the given title.
+ *
+ * @since 1.21
+ *
+ * @param string $title the target page's title, in normalized form.
+ *
+ * @return String
+ */
+ public function toDBKey( $title ) {
+ return str_replace( ' ', '_', $title );
+ }
+
+ /**
+ * Returns the normalized form of the given page title, using the normalization rules of the given site.
+ * If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
+ *
+ * @note : This actually makes an API request to the remote site, so beware that this function is slow and depends
+ * on an external service.
+ *
+ * @note : If MW_PHPUNIT_TEST is defined, the call to the external site is skipped, and the title
+ * is normalized using the local normalization rules as implemented by the Title class.
+ *
+ * @see Site::normalizePageName
+ *
+ * @since 1.21
+ *
+ * @param string $pageName
+ *
+ * @return string
+ * @throws MWException
+ */
+ public function normalizePageName( $pageName ) {
+
+ // Check if we have strings as arguments.
+ if ( !is_string( $pageName ) ) {
+ throw new MWException( '$pageName must be a string' );
+ }
+
+ // Go on call the external site
+ if ( defined( 'MW_PHPUNIT_TEST' ) ) {
+ // If the code is under test, don't call out to other sites, just normalize locally.
+ // Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
+
+ $t = Title::newFromText( $pageName );
+ return $t->getPrefixedText();
+ } else {
+
+ // Make sure the string is normalized into NFC (due to the bug 40017)
+ // but do nothing to the whitespaces, that should work appropriately.
+ // @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
+ $pageName = UtfNormal::cleanUp( $pageName );
+
+ // Build the args for the specific call
+ $args = array(
+ 'action' => 'query',
+ 'prop' => 'info',
+ 'redirects' => true,
+ 'converttitles' => true,
+ 'format' => 'json',
+ 'titles' => $pageName,
+ //@todo: options for maxlag and maxage
+ // Note that maxlag will lead to a long delay before a reply is made,
+ // but that maxage can avoid the extreme delay. On the other hand
+ // maxage could be nice to use anyhow as it stops unnecessary requests.
+ // Also consider smaxage if maxage is used.
+ );
+
+ $url = $this->getFileUrl( 'api.php' ) . '?' . wfArrayToCgi( $args );
+
+ // Go on call the external site
+ //@todo: we need a good way to specify a timeout here.
+ $ret = Http::get( $url );
+ }
+
+ if ( $ret === false ) {
+ wfDebugLog( "MediaWikiSite", "call to external site failed: $url" );
+ return false;
+ }
+
+ $data = FormatJson::decode( $ret, true );
+
+ if ( !is_array( $data ) ) {
+ wfDebugLog( "MediaWikiSite", "call to <$url> returned bad json: " . $ret );
+ return false;
+ }
+
+ $page = static::extractPageRecord( $data, $pageName );
+
+ if ( isset( $page['missing'] ) ) {
+ wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for a missing page title! " . $ret );
+ return false;
+ }
+
+ if ( isset( $page['invalid'] ) ) {
+ wfDebugLog( "MediaWikiSite", "call to <$url> returned a marker for an invalid page title! " . $ret );
+ return false;
+ }
+
+ if ( !isset( $page['title'] ) ) {
+ wfDebugLog( "MediaWikiSite", "call to <$url> did not return a page title! " . $ret );
+ return false;
+ }
+
+ return $page['title'];
+ }
+
+ /**
+ * Get normalization record for a given page title from an API response.
+ *
+ * @since 1.21
+ *
+ * @param array $externalData A reply from the API on a external server.
+ * @param string $pageTitle Identifies the page at the external site, needing normalization.
+ *
+ * @return array|boolean a 'page' structure representing the page identified by $pageTitle.
+ */
+ private static function extractPageRecord( $externalData, $pageTitle ) {
+ // If there is a special case with only one returned page
+ // we can cheat, and only return
+ // the single page in the "pages" substructure.
+ if ( isset( $externalData['query']['pages'] ) ) {
+ $pages = array_values( $externalData['query']['pages'] );
+ if ( count( $pages) === 1 ) {
+ return $pages[0];
+ }
+ }
+ // This is only used during internal testing, as it is assumed
+ // a more optimal (and lossfree) storage.
+ // Make initial checks and return if prerequisites are not meet.
+ if ( !is_array( $externalData ) || !isset( $externalData['query'] ) ) {
+ return false;
+ }
+ // Loop over the tree different named structures, that otherwise are similar
+ $structs = array(
+ 'normalized' => 'from',
+ 'converted' => 'from',
+ 'redirects' => 'from',
+ 'pages' => 'title'
+ );
+ foreach ( $structs as $listId => $fieldId ) {
+ // Check if the substructure exist at all.
+ if ( !isset( $externalData['query'][$listId] ) ) {
+ continue;
+ }
+ // Filter the substructure down to what we actually are using.
+ $collectedHits = array_filter(
+ array_values( $externalData['query'][$listId] ),
+ function( $a ) use ( $fieldId, $pageTitle ) {
+ return $a[$fieldId] === $pageTitle;
+ }
+ );
+ // If still looping over normalization, conversion or redirects,
+ // then we need to keep the new page title for later rounds.
+ if ( $fieldId === 'from' && is_array( $collectedHits ) ) {
+ switch ( count( $collectedHits ) ) {
+ case 0:
+ break;
+ case 1:
+ $pageTitle = $collectedHits[0]['to'];
+ break;
+ default:
+ return false;
+ }
+ }
+ // If on the pages structure we should prepare for returning.
+ elseif ( $fieldId === 'title' && is_array( $collectedHits ) ) {
+ switch ( count( $collectedHits ) ) {
+ case 0:
+ return false;
+ case 1:
+ return array_shift( $collectedHits );
+ default:
+ return false;
+ }
+ }
+ }
+ // should never be here
+ return false;
+ }
+
+ /**
+ * @see Site::getLinkPathType
+ * Returns Site::PATH_PAGE
+ *
+ * @since 1.21
+ *
+ * @return string
+ */
+ public function getLinkPathType() {
+ return self::PATH_PAGE;
+ }
+
+ /**
+ * Returns the relative page path.
+ *
+ * @since 1.21
+ *
+ * @return string
+ */
+ public function getRelativePagePath() {
+ return parse_url( $this->getPath( self::PATH_PAGE ), PHP_URL_PATH );
+ }
+
+ /**
+ * Returns the relative file path.
+ *
+ * @since 1.21
+ *
+ * @return string
+ */
+ public function getRelativeFilePath() {
+ return parse_url( $this->getPath( self::PATH_FILE ), PHP_URL_PATH );
+ }
+
+ /**
+ * Sets the relative page path.
+ *
+ * @since 1.21
+ *
+ * @param string $path
+ */
+ public function setPagePath( $path ) {
+ $this->setPath( self::PATH_PAGE, $path );
+ }
+
+ /**
+ * Sets the relative file path.
+ *
+ * @since 1.21
+ *
+ * @param string $path
+ */
+ public function setFilePath( $path ) {
+ $this->setPath( self::PATH_FILE, $path );
+ }
+
+ /**
+ * @see Site::getPageUrl
+ *
+ * This implementation returns a URL constructed using the path returned by getLinkPath().
+ * In addition to the default behavior implemented by Site::getPageUrl(), this
+ * method converts the $pageName to DBKey-format by replacing spaces with underscores
+ * before using it in the URL.
+ *
+ * @since 1.21
+ *
+ * @param string|boolean $pageName Page name or false (default: false)
+ *
+ * @return string
+ */
+ public function getPageUrl( $pageName = false ) {
+ $url = $this->getLinkPath();
+
+ if ( $url === false ) {
+ return false;
+ }
+
+ if ( $pageName !== false ) {
+ $pageName = $this->toDBKey( trim( $pageName ) );
+ $url = str_replace( '$1', wfUrlencode( $pageName ), $url );
+ }
+
+ return $url;
+ }
+
+ /**
+ * Returns the full file path (ie site url + relative file path).
+ * The path should go at the $1 marker. If the $path
+ * argument is provided, the marker will be replaced by it's value.
+ *
+ * @since 1.21
+ *
+ * @param string|boolean $path
+ *
+ * @return string
+ */
+ public function getFileUrl( $path = false ) {
+ $filePath = $this->getPath( self::PATH_FILE );
+
+ if ( $filePath !== false ) {
+ $filePath = str_replace( '$1', $path, $filePath );
+ }
+
+ return $filePath;
+ }
+
+}