From 4ac9fa081a7c045f6a9f1cfc529d82423f485b2e Mon Sep 17 00:00:00 2001
From: Pierre Schmitz <pierre@archlinux.de>
Date: Sun, 8 Dec 2013 09:55:49 +0100
Subject: Update to MediaWiki 1.22.0

---
 resources/jquery/jquery.byteLength.js | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'resources/jquery/jquery.byteLength.js')

diff --git a/resources/jquery/jquery.byteLength.js b/resources/jquery/jquery.byteLength.js
index 3d5b7206..398937e6 100644
--- a/resources/jquery/jquery.byteLength.js
+++ b/resources/jquery/jquery.byteLength.js
@@ -4,6 +4,8 @@
  * Calculate the byte length of a string (accounting for UTF-8).
  *
  * @author Jan Paul Posma, 2011
+ * @author Timo Tijhof, 2012
+ * @author David Chan, 2013
  */
 jQuery.byteLength = function ( str ) {
 
@@ -12,8 +14,18 @@ jQuery.byteLength = function ( str ) {
 	// Note, surrogate (\uD800-\uDFFF) characters are counted as 2 bytes, since there's two of them
 	// and the actual character takes 4 bytes in UTF-8 (2*2=4). Might not work perfectly in
 	// edge cases such as illegal sequences, but that should never happen.
+
+	// https://en.wikipedia.org/wiki/UTF-8#Description
+	// The mapping from UTF-16 code units to UTF-8 bytes is as follows:
+	// > Range 0000-007F: codepoints that become 1 byte of UTF-8
+	// > Range 0080-07FF: codepoints that become 2 bytes of UTF-8
+	// > Range 0800-D7FF: codepoints that become 3 bytes of UTF-8
+	// > Range D800-DFFF: Surrogates (each pair becomes 4 bytes of UTF-8)
+	// > Range E000-FFFF: codepoints that become 3 bytes of UTF-8 (continued)
+
 	return str
 		.replace( /[\u0080-\u07FF\uD800-\uDFFF]/g, '**' )
 		.replace( /[\u0800-\uD7FF\uE000-\uFFFF]/g, '***' )
 		.length;
+
 };
-- 
cgit v1.2.2