summaryrefslogtreecommitdiff
path: root/includes/title/NamespaceAwareForeignTitleFactory.php
blob: bf97e2cdf24dc96c470d9349ad2bf7374ee3432b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
<?php
/**
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @license GPL 2+
 */

/**
 * A parser that translates page titles on a foreign wiki into ForeignTitle
 * objects, using information about the namespace setup on the foreign site.
 */
class NamespaceAwareForeignTitleFactory implements ForeignTitleFactory {
	/**
	 * @var array
	 */
	protected $foreignNamespaces;
	/**
	 * @var array
	 */
	private $foreignNamespacesFlipped;

	/**
	 * Normalizes an array name for $foreignNamespacesFlipped.
	 * @param string $name
	 * @return string
	 */
	private function normalizeNamespaceName( $name ) {
		return strtolower( str_replace( ' ', '_', $name ) );
	}

	/**
	 * @param array|null $foreignNamespaces An array 'id' => 'name' which contains
	 * the complete namespace setup of the foreign wiki. Such data could be
	 * obtained from siteinfo/namespaces in an XML dump file, or by an action API
	 * query such as api.php?action=query&meta=siteinfo&siprop=namespaces. If
	 * this data is unavailable, use NaiveForeignTitleFactory instead.
	 */
	public function __construct( $foreignNamespaces ) {
		$this->foreignNamespaces = $foreignNamespaces;
		if ( !is_null( $foreignNamespaces ) ) {
			$this->foreignNamespacesFlipped = array();
			foreach ( $foreignNamespaces as $id => $name ) {
				$newKey = self::normalizeNamespaceName( $name );
				$this->foreignNamespacesFlipped[$newKey] = $id;
			}
		}
	}

	/**
	 * Creates a ForeignTitle object based on the page title, and optionally the
	 * namespace ID, of a page on a foreign wiki. These values could be, for
	 * example, the <title> and <ns> attributes found in an XML dump.
	 *
	 * @param string $title The page title
	 * @param int|null $ns The namespace ID, or null if this data is not available
	 * @return ForeignTitle
	 */
	public function createForeignTitle( $title, $ns = null ) {
		// Export schema version 0.5 and earlier (MW 1.18 and earlier) does not
		// contain a <ns> tag, so we need to be able to handle that case.
		if ( is_null( $ns ) ) {
			return self::parseTitleNoNs( $title );
		} else {
			return self::parseTitleWithNs( $title, $ns );
		}
	}

	/**
	 * Helper function to parse the title when the namespace ID is not specified.
	 *
	 * @param string $title
	 * @return ForeignTitle
	 */
	protected function parseTitleNoNs( $title ) {
		$pieces = explode( ':', $title, 2 );
		$key = self::normalizeNamespaceName( $pieces[0] );

		// Does the part before the colon match a known namespace? Check the
		// foreign namespaces
		$isNamespacePartValid = isset( $this->foreignNamespacesFlipped[$key] );

		if ( count( $pieces ) === 2 && $isNamespacePartValid ) {
			list( $namespaceName, $pageName ) = $pieces;
			$ns = $this->foreignNamespacesFlipped[$key];
		} else {
			$namespaceName = '';
			$pageName = $title;
			$ns = 0;
		}

		return new ForeignTitle( $ns, $namespaceName, $pageName );
	}

	/**
	 * Helper function to parse the title when the namespace value is known.
	 *
	 * @param string $title
	 * @param int $ns
	 * @return ForeignTitle
	 */
	protected function parseTitleWithNs( $title, $ns ) {
		$pieces = explode( ':', $title, 2 );

		if ( isset( $this->foreignNamespaces[$ns] ) ) {
			$namespaceName = $this->foreignNamespaces[$ns];
		} else {
			$namespaceName = $ns == '0' ? '' : $pieces[0];
		}

		// We assume that the portion of the page title before the colon is the
		// namespace name, except in the case of namespace 0
		if ( $ns != '0' ) {
			$pageName = $pieces[1];
		} else {
			$pageName = $title;
		}

		return new ForeignTitle( $ns, $namespaceName, $pageName );
	}
}