summaryrefslogtreecommitdiff
path: root/tests/phpunit/includes/GlobalFunctions/wfBCP47Test.php
blob: 166d641f723f01a22034844472583c0092ed8971 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
<?php
/**
 * @group GlobalFunctions
 * @covers ::wfBCP47
 */
class WfBCP47Test extends MediaWikiTestCase {
	/**
	 * test @see wfBCP47().
	 * Please note the BCP explicitly state that language codes are case
	 * insensitive, there are some exceptions to the rule :)
	 * This test is used to verify our formatting against all lower and
	 * all upper cases language code.
	 *
	 * @see http://tools.ietf.org/html/bcp47
	 * @dataProvider provideLanguageCodes()
	 */
	public function testBCP47( $code, $expected ) {
		$code = strtolower( $code );
		$this->assertEquals( $expected, wfBCP47( $code ),
			"Applying BCP47 standard to lower case '$code'"
		);

		$code = strtoupper( $code );
		$this->assertEquals( $expected, wfBCP47( $code ),
			"Applying BCP47 standard to upper case '$code'"
		);
	}

	/**
	 * Array format is ($code, $expected)
	 */
	public static function provideLanguageCodes() {
		return array(
			// Extracted from BCP47 (list not exhaustive)
			# 2.1.1
			array( 'en-ca-x-ca', 'en-CA-x-ca' ),
			array( 'sgn-be-fr', 'sgn-BE-FR' ),
			array( 'az-latn-x-latn', 'az-Latn-x-latn' ),
			# 2.2
			array( 'sr-Latn-RS', 'sr-Latn-RS' ),
			array( 'az-arab-ir', 'az-Arab-IR' ),

			# 2.2.5
			array( 'sl-nedis', 'sl-nedis' ),
			array( 'de-ch-1996', 'de-CH-1996' ),

			# 2.2.6
			array(
				'en-latn-gb-boont-r-extended-sequence-x-private',
				'en-Latn-GB-boont-r-extended-sequence-x-private'
			),

			// Examples from BCP47 Appendix A
			# Simple language subtag:
			array( 'DE', 'de' ),
			array( 'fR', 'fr' ),
			array( 'ja', 'ja' ),

			# Language subtag plus script subtag:
			array( 'zh-hans', 'zh-Hans' ),
			array( 'sr-cyrl', 'sr-Cyrl' ),
			array( 'sr-latn', 'sr-Latn' ),

			# Extended language subtags and their primary language subtag
			# counterparts:
			array( 'zh-cmn-hans-cn', 'zh-cmn-Hans-CN' ),
			array( 'cmn-hans-cn', 'cmn-Hans-CN' ),
			array( 'zh-yue-hk', 'zh-yue-HK' ),
			array( 'yue-hk', 'yue-HK' ),

			# Language-Script-Region:
			array( 'zh-hans-cn', 'zh-Hans-CN' ),
			array( 'sr-latn-RS', 'sr-Latn-RS' ),

			# Language-Variant:
			array( 'sl-rozaj', 'sl-rozaj' ),
			array( 'sl-rozaj-biske', 'sl-rozaj-biske' ),
			array( 'sl-nedis', 'sl-nedis' ),

			# Language-Region-Variant:
			array( 'de-ch-1901', 'de-CH-1901' ),
			array( 'sl-it-nedis', 'sl-IT-nedis' ),

			# Language-Script-Region-Variant:
			array( 'hy-latn-it-arevela', 'hy-Latn-IT-arevela' ),

			# Language-Region:
			array( 'de-de', 'de-DE' ),
			array( 'en-us', 'en-US' ),
			array( 'es-419', 'es-419' ),

			# Private use subtags:
			array( 'de-ch-x-phonebk', 'de-CH-x-phonebk' ),
			array( 'az-arab-x-aze-derbend', 'az-Arab-x-aze-derbend' ),
			/**
			 * Previous test does not reflect the BCP which states:
			 *  az-Arab-x-AZE-derbend
			 * AZE being private, it should be lower case, hence the test above
			 * should probably be:
			 *  array( 'az-arab-x-aze-derbend', 'az-Arab-x-AZE-derbend' ),
			 */

			# Private use registry values:
			array( 'x-whatever', 'x-whatever' ),
			array( 'qaa-qaaa-qm-x-southern', 'qaa-Qaaa-QM-x-southern' ),
			array( 'de-qaaa', 'de-Qaaa' ),
			array( 'sr-latn-qm', 'sr-Latn-QM' ),
			array( 'sr-qaaa-rs', 'sr-Qaaa-RS' ),

			# Tags that use extensions
			array( 'en-us-u-islamcal', 'en-US-u-islamcal' ),
			array( 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ),
			array( 'en-a-myext-b-another', 'en-a-myext-b-another' ),

			# Invalid:
			// de-419-DE
			// a-DE
			// ar-a-aaa-b-bbb-a-ccc
		);
	}
}