summaryrefslogtreecommitdiff
path: root/includes/ParserXML.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/ParserXML.php')
-rw-r--r--includes/ParserXML.php643
1 files changed, 0 insertions, 643 deletions
diff --git a/includes/ParserXML.php b/includes/ParserXML.php
deleted file mode 100644
index e7b64f6e..00000000
--- a/includes/ParserXML.php
+++ /dev/null
@@ -1,643 +0,0 @@
-<?php
-/**
- *
- * @package MediaWiki
- * @subpackage Experimental
- */
-
-/** */
-require_once ('Parser.php');
-
-/**
- * This should one day become the XML->(X)HTML parser
- * Based on work by Jan Hidders and Magnus Manske
- * To use, set
- * $wgUseXMLparser = true ;
- * $wgEnableParserCache = false ;
- * $wgWiki2xml to the path and executable of the command line version (cli)
- * in LocalSettings.php
- * @package MediaWiki
- * @subpackage Experimental
- */
-
-/**
- * the base class for an element
- * @package MediaWiki
- * @subpackage Experimental
- */
-class element {
- var $name = '';
- var $attrs = array ();
- var $children = array ();
-
- /**
- * This finds the ATTRS element and returns the ATTR sub-children as a single string
- * @todo FIXME $parser always empty when calling makeXHTML()
- */
- function getSourceAttrs() {
- $ret = '';
- foreach ($this->children as $child) {
- if (!is_string($child) AND $child->name == 'ATTRS') {
- $ret = $child->makeXHTML($parser);
- }
- }
- return $ret;
- }
-
- /**
- * This collects the ATTR thingies for getSourceAttrs()
- */
- function getTheseAttrs() {
- $ret = array ();
- foreach ($this->children as $child) {
- if (!is_string($child) AND $child->name == 'ATTR') {
- $ret[] = $child->attrs["NAME"]."='".$child->children[0]."'";
- }
- }
- return implode(' ', $ret);
- }
-
- function fixLinkTails(& $parser, $key) {
- $k2 = $key +1;
- if (!isset ($this->children[$k2]))
- return;
- if (!is_string($this->children[$k2]))
- return;
- if (is_string($this->children[$key]))
- return;
- if ($this->children[$key]->name != "LINK")
- return;
-
- $n = $this->children[$k2];
- $s = '';
- while ($n != '' AND (($n[0] >= 'a' AND $n[0] <= 'z') OR $n[0] == 'ä' OR $n[0] == 'ö' OR $n[0] == 'ü' OR $n[0] == 'ß')) {
- $s .= $n[0];
- $n = substr($n, 1);
- }
- $this->children[$k2] = $n;
-
- if (count($this->children[$key]->children) > 1) {
- $kl = array_keys($this->children[$key]->children);
- $kl = array_pop($kl);
- $this->children[$key]->children[$kl]->children[] = $s;
- } else {
- $e = new element;
- $e->name = "LINKOPTION";
- $t = $this->children[$key]->sub_makeXHTML($parser);
- $e->children[] = trim($t).$s;
- $this->children[$key]->children[] = $e;
- }
- }
-
- /**
- * This function generates the XHTML for the entire subtree
- */
- function sub_makeXHTML(& $parser, $tag = '', $attr = '') {
- $ret = '';
-
- $attr2 = $this->getSourceAttrs();
- if ($attr != '' AND $attr2 != '')
- $attr .= ' ';
- $attr .= $attr2;
-
- if ($tag != '') {
- $ret .= '<'.$tag;
- if ($attr != '')
- $ret .= ' '.$attr;
- $ret .= '>';
- }
-
- # THIS SHOULD BE DONE IN THE WIKI2XML-PARSER INSTEAD
- # foreach ( array_keys ( $this->children ) AS $x )
- # $this->fixLinkTails ( $parser , $x ) ;
-
- foreach ($this->children as $child) {
- if (is_string($child)) {
- $ret .= $child;
- } elseif ($child->name != 'ATTRS') {
- $ret .= $child->makeXHTML($parser);
- }
- }
- if ($tag != '')
- $ret .= '</'.$tag.">\n";
- return $ret;
- }
-
- /**
- * Link functions
- */
- function createInternalLink(& $parser, $target, $display_title, $options) {
- global $wgUser;
- $skin = $wgUser->getSkin();
- $tp = explode(':', $target); # tp = target parts
- $title = ''; # The plain title
- $language = ''; # The language/meta/etc. part
- $namespace = ''; # The namespace, if any
- $subtarget = ''; # The '#' thingy
-
- $nt = Title :: newFromText($target);
- $fl = strtoupper($this->attrs['FORCEDLINK']) == 'YES';
-
- if ($fl || count($tp) == 1) {
- # Plain and simple case
- $title = $target;
- } else {
- # There's stuff missing here...
- if ($nt->getNamespace() == NS_IMAGE) {
- $options[] = $display_title;
- return $parser->makeImage($nt, implode('|', $options));
- } else {
- # Default
- $title = $target;
- }
- }
-
- if ($language != '') {
- # External link within the WikiMedia project
- return "{language link}";
- } else {
- if ($namespace != '') {
- # Link to another namespace, check for image/media stuff
- return "{namespace link}";
- } else {
- return $skin->makeLink($target, $display_title);
- }
- }
- }
-
- /** @todo document */
- function makeInternalLink(& $parser) {
- $target = '';
- $option = array ();
- foreach ($this->children as $child) {
- if (is_string($child)) {
- # This shouldn't be the case!
- } else {
- if ($child->name == 'LINKTARGET') {
- $target = trim($child->makeXHTML($parser));
- } else {
- $option[] = trim($child->makeXHTML($parser));
- }
- }
- }
-
- if (count($option) == 0)
- $option[] = $target; # Create dummy display title
- $display_title = array_pop($option);
- return $this->createInternalLink($parser, $target, $display_title, $option);
- }
-
- /** @todo document */
- function getTemplateXHTML($title, $parts, & $parser) {
- global $wgLang, $wgUser;
- $skin = $wgUser->getSkin();
- $ot = $title; # Original title
- if (count(explode(':', $title)) == 1)
- $title = $wgLang->getNsText(NS_TEMPLATE).":".$title;
- $nt = Title :: newFromText($title);
- $id = $nt->getArticleID();
- if ($id == 0) {
- # No/non-existing page
- return $skin->makeBrokenLink($title, $ot);
- }
-
- $a = 0;
- $tv = array (); # Template variables
- foreach ($parts AS $part) {
- $a ++;
- $x = explode('=', $part, 2);
- if (count($x) == 1)
- $key = "{$a}";
- else
- $key = $x[0];
- $value = array_pop($x);
- $tv[$key] = $value;
- }
- $art = new Article($nt);
- $text = $art->getContent(false);
- $parser->plain_parse($text, true, $tv);
-
- return $text;
- }
-
- /**
- * This function actually converts wikiXML into XHTML tags
- * @todo use switch() !
- */
- function makeXHTML(& $parser) {
- $ret = '';
- $n = $this->name; # Shortcut
-
- if ($n == 'EXTENSION') {
- # Fix allowed HTML
- $old_n = $n;
- $ext = strtoupper($this->attrs['NAME']);
-
- switch($ext) {
- case 'B':
- case 'STRONG':
- $n = 'BOLD';
- break;
- case 'I':
- case 'EM':
- $n = 'ITALICS';
- break;
- case 'U':
- $n = 'UNDERLINED'; # Hey, virtual wiki tag! ;-)
- break;
- case 'S':
- $n = 'STRIKE';
- break;
- case 'P':
- $n = 'PARAGRAPH';
- break;
- case 'TABLE':
- $n = 'TABLE';
- break;
- case 'TR':
- $n = 'TABLEROW';
- break;
- case 'TD':
- $n = 'TABLECELL';
- break;
- case 'TH':
- $n = 'TABLEHEAD';
- break;
- case 'CAPTION':
- $n = 'CAPTION';
- break;
- case 'NOWIKI':
- $n = 'NOWIKI';
- break;
- }
- if ($n != $old_n) {
- unset ($this->attrs['NAME']); # Cleanup
- } elseif ($parser->nowiki > 0) {
- # No 'real' wiki tags allowed in nowiki section
- $n = '';
- }
- } // $n = 'EXTENSION'
-
- switch($n) {
- case 'ARTICLE':
- $ret .= $this->sub_makeXHTML($parser);
- break;
- case 'HEADING':
- $ret .= $this->sub_makeXHTML($parser, 'h'.$this->attrs['LEVEL']);
- break;
- case 'PARAGRAPH':
- $ret .= $this->sub_makeXHTML($parser, 'p');
- break;
- case 'BOLD':
- $ret .= $this->sub_makeXHTML($parser, 'strong');
- break;
- case 'ITALICS':
- $ret .= $this->sub_makeXHTML($parser, 'em');
- break;
-
- # These don't exist as wiki markup
- case 'UNDERLINED':
- $ret .= $this->sub_makeXHTML($parser, 'u');
- break;
- case 'STRIKE':
- $ret .= $this->sub_makeXHTML($parser, 'strike');
- break;
-
- # HTML comment
- case 'COMMENT':
- # Comments are parsed out
- $ret .= '';
- break;
-
-
- # Links
- case 'LINK':
- $ret .= $this->makeInternalLink($parser);
- break;
- case 'LINKTARGET':
- case 'LINKOPTION':
- $ret .= $this->sub_makeXHTML($parser);
- break;
-
- case 'TEMPLATE':
- $parts = $this->sub_makeXHTML($parser);
- $parts = explode('|', $parts);
- $title = array_shift($parts);
- $ret .= $this->getTemplateXHTML($title, $parts, & $parser);
- break;
-
- case 'TEMPLATEVAR':
- $x = $this->sub_makeXHTML($parser);
- if (isset ($parser->mCurrentTemplateOptions["{$x}"]))
- $ret .= $parser->mCurrentTemplateOptions["{$x}"];
- break;
-
- # Internal use, not generated by wiki2xml parser
- case 'IGNORE':
- $ret .= $this->sub_makeXHTML($parser);
-
- case 'NOWIKI':
- $parser->nowiki++;
- $ret .= $this->sub_makeXHTML($parser, '');
- $parser->nowiki--;
-
-
- # Unknown HTML extension
- case 'EXTENSION': # This is currently a dummy!!!
- $ext = $this->attrs['NAME'];
-
- $ret .= '&lt;'.$ext.'&gt;';
- $ret .= $this->sub_makeXHTML($parser);
- $ret .= '&lt;/'.$ext.'&gt; ';
- break;
-
-
- # Table stuff
-
- case 'TABLE':
- $ret .= $this->sub_makeXHTML($parser, 'table');
- break;
- case 'TABLEROW':
- $ret .= $this->sub_makeXHTML($parser, 'tr');
- break;
- case 'TABLECELL':
- $ret .= $this->sub_makeXHTML($parser, 'td');
- break;
- case 'TABLEHEAD':
- $ret .= $this->sub_makeXHTML($parser, 'th');
- break;
- case 'CAPTION':
- $ret .= $this->sub_makeXHTML($parser, 'caption');
- break;
- case 'ATTRS': # SPECIAL CASE : returning attributes
- return $this->getTheseAttrs();
-
-
- # Lists stuff
- case 'LISTITEM':
- if ($parser->mListType == 'dl')
- $ret .= $this->sub_makeXHTML($parser, 'dd');
- else
- $ret .= $this->sub_makeXHTML($parser, 'li');
- break;
- case 'LIST':
- $type = 'ol'; # Default
- if ($this->attrs['TYPE'] == 'bullet')
- $type = 'ul';
- else
- if ($this->attrs['TYPE'] == 'indent')
- $type = 'dl';
- $oldtype = $parser->mListType;
- $parser->mListType = $type;
- $ret .= $this->sub_makeXHTML($parser, $type);
- $parser->mListType = $oldtype;
- break;
-
- # Something else entirely
- default:
- $ret .= '&lt;'.$n.'&gt;';
- $ret .= $this->sub_makeXHTML($parser);
- $ret .= '&lt;/'.$n.'&gt; ';
- } // switch($n)
-
- $ret = "\n{$ret}\n";
- $ret = str_replace("\n\n", "\n", $ret);
- return $ret;
- }
-
- /**
- * A function for additional debugging output
- */
- function myPrint() {
- $ret = "<ul>\n";
- $ret .= "<li> <b> Name: </b> $this->name </li>\n";
- // print attributes
- $ret .= '<li> <b> Attributes: </b>';
- foreach ($this->attrs as $name => $value) {
- $ret .= "$name => $value; ";
- }
- $ret .= " </li>\n";
- // print children
- foreach ($this->children as $child) {
- if (is_string($child)) {
- $ret .= "<li> $child </li>\n";
- } else {
- $ret .= $child->myPrint();
- }
- }
- $ret .= "</ul>\n";
- return $ret;
- }
-}
-
-$ancStack = array (); // the stack with ancestral elements
-
-// START Three global functions needed for parsing, sorry guys
-/** @todo document */
-function wgXMLstartElement($parser, $name, $attrs) {
- global $ancStack;
-
- $newElem = new element;
- $newElem->name = $name;
- $newElem->attrs = $attrs;
-
- array_push($ancStack, $newElem);
-}
-
-/** @todo document */
-function wgXMLendElement($parser, $name) {
- global $ancStack, $rootElem;
- // pop element off stack
- $elem = array_pop($ancStack);
- if (count($ancStack) == 0)
- $rootElem = $elem;
- else
- // add it to its parent
- array_push($ancStack[count($ancStack) - 1]->children, $elem);
-}
-
-/** @todo document */
-function wgXMLcharacterData($parser, $data) {
- global $ancStack;
- $data = trim($data); // Don't add blank lines, they're no use...
- // add to parent if parent exists
- if ($ancStack && $data != "") {
- array_push($ancStack[count($ancStack) - 1]->children, $data);
- }
-}
-// END Three global functions needed for parsing, sorry guys
-
-/**
- * Here's the class that generates a nice tree
- * @package MediaWiki
- * @subpackage Experimental
- */
-class xml2php {
-
- /** @todo document */
- function & scanFile($filename) {
- global $ancStack, $rootElem;
- $ancStack = array ();
-
- $xml_parser = xml_parser_create();
- xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
- xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
- if (!($fp = fopen($filename, 'r'))) {
- die('could not open XML input');
- }
- while ($data = fread($fp, 4096)) {
- if (!xml_parse($xml_parser, $data, feof($fp))) {
- die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
- }
- }
- xml_parser_free($xml_parser);
-
- // return the remaining root element we copied in the beginning
- return $rootElem;
- }
-
- /** @todo document */
- function scanString($input) {
- global $ancStack, $rootElem;
- $ancStack = array ();
-
- $xml_parser = xml_parser_create();
- xml_set_element_handler($xml_parser, 'wgXMLstartElement', 'wgXMLendElement');
- xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
-
- if (!xml_parse($xml_parser, $input, true)) {
- die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser)));
- }
- xml_parser_free($xml_parser);
-
- // return the remaining root element we copied in the beginning
- return $rootElem;
- }
-
-}
-
-/**
- * @todo document
- * @package MediaWiki
- * @subpackage Experimental
- */
-class ParserXML extends Parser {
- /**#@+
- * @private
- */
- # Persistent:
- var $mTagHooks, $mListType;
-
- # Cleared with clearState():
- var $mOutput, $mAutonumber, $mDTopen, $mStripState = array ();
- var $mVariables, $mIncludeCount, $mArgStack, $mLastSection, $mInPre;
-
- # Temporary:
- var $mOptions, $mTitle, $mOutputType, $mTemplates, // cache of already loaded templates, avoids
- // multiple SQL queries for the same string
- $mTemplatePath; // stores an unsorted hash of all the templates already loaded
- // in this path. Used for loop detection.
-
- var $nowikicount, $mCurrentTemplateOptions;
-
- /**#@-*/
-
- /**
- * Constructor
- *
- * @public
- */
- function ParserXML() {
- $this->mTemplates = array ();
- $this->mTemplatePath = array ();
- $this->mTagHooks = array ();
- $this->clearState();
- }
-
- /**
- * Clear Parser state
- *
- * @private
- */
- function clearState() {
- $this->mOutput = new ParserOutput;
- $this->mAutonumber = 0;
- $this->mLastSection = "";
- $this->mDTopen = false;
- $this->mVariables = false;
- $this->mIncludeCount = array ();
- $this->mStripState = array ();
- $this->mArgStack = array ();
- $this->mInPre = false;
- }
-
- /**
- * Turns the wikitext into XML by calling the external parser
- *
- */
- function html2xml(& $text) {
- global $wgWiki2xml;
-
- # generating html2xml command path
- $a = $wgWiki2xml;
- $a = explode('/', $a);
- array_pop($a);
- $a[] = 'html2xml';
- $html2xml = implode('/', $a);
- $a = array ();
-
- $tmpfname = tempnam( wfTempDir(), 'FOO' );
- $handle = fopen($tmpfname, 'w');
- fwrite($handle, utf8_encode($text));
- fclose($handle);
- exec($html2xml.' < '.$tmpfname, $a);
- $text = utf8_decode(implode("\n", $a));
- unlink($tmpfname);
- }
-
- /** @todo document */
- function runXMLparser(& $text) {
- global $wgWiki2xml;
-
- $this->html2xml($text);
-
- $tmpfname = tempnam( wfTempDir(), 'FOO');
- $handle = fopen($tmpfname, 'w');
- fwrite($handle, $text);
- fclose($handle);
- exec($wgWiki2xml.' < '.$tmpfname, $a);
- $text = utf8_decode(implode("\n", $a));
- unlink($tmpfname);
- }
-
- /** @todo document */
- function plain_parse(& $text, $inline = false, $templateOptions = array ()) {
- $this->runXMLparser($text);
- $nowikicount = 0;
- $w = new xml2php;
- $result = $w->scanString($text);
-
- $oldTemplateOptions = $this->mCurrentTemplateOptions;
- $this->mCurrentTemplateOptions = $templateOptions;
-
- if ($inline) { # Inline rendering off for templates
- if (count($result->children) == 1)
- $result->children[0]->name = 'IGNORE';
- }
-
- if (1)
- $text = $result->makeXHTML($this); # No debugging info
- else
- $text = $result->makeXHTML($this).'<hr>'.$text.'<hr>'.$result->myPrint();
- $this->mCurrentTemplateOptions = $oldTemplateOptions;
- }
-
- /** @todo document */
- function parse($text, & $title, $options, $linestart = true, $clearState = true) {
- $this->plain_parse($text);
- $this->mOutput->setText($text);
- return $this->mOutput;
- }
-
-}
-?>