Your IP : 3.17.75.47


Current Path : /var/www/www-root/data/www/monolith-realty.ru/bitrix/modules/main/lib/web/dom/
Upload File :
Current File : /var/www/www-root/data/www/monolith-realty.ru/bitrix/modules/main/lib/web/dom/htmlparser.php

<?php
namespace Bitrix\Main\Web\DOM;

use \Bitrix\Main\Text\HtmlFilter;

class HtmlParser extends Parser
{
	public $debugTime = 0;

	protected $tagsMustBeClosed = array('SCRIPT', 'STYLE');


	public $storePhpCode = true;

	protected static $objectCounter = 0;
	protected $currentObjectNumber;
	protected $storedItemCounter;
	protected $storedPHP = array();

	public function __construct()
	{
		static::$objectCounter++;
		$this->currentObjectNumber = static::$objectCounter;
		$this->storedItemCounter = 0;

		$this->setConfig(new HtmlParserConfig);
	}

	/*
	 * @param Node $node
	 * @return string
	 */
	public function getSource(Node $node)
	{
		$source = '';
		switch($node->getNodeType())
		{
			case Node::ELEMENT_NODE:

				/*@var $node Element*/
				$source = $this->getSourceElement($node);
				break;

			case Node::ATTRIBUTE_NODE:

				/*@var $node Attr*/
				$source = $this->getSourceAttr($node);
				break;

			case Node::TEXT_NODE:

				/*@var Text $node*/
				if($node->getParentNode() && in_array($node->getParentNode()->getNodeName(), $this->tagsMustBeClosed))
				{
					$source = $node->getNodeValue();
				}
				else
				{
					$source = HtmlFilter::encode($node->getNodeValue(), ENT_QUOTES);
				}

				break;

			case Node::COMMENT_NODE:

				/*@var Comment $node*/
				$source = '<!--' . $node->getNodeValue() . '-->';
				if($this->storePhpCode)
				{
					$source = $this->restorePHP($source);
				}
				break;

			case Node::DOCUMENT_TYPE_NODE:

				/*@var DocumentType $node*/
				$source = $this->getSourceDocType($node);
				break;
		}

		return $source;
	}

	protected function getSourceAttr(Attr $node)
	{
		return $node->getName() . '="' . HtmlFilter::encode($node->getValue()) . '"';
	}

	protected function getSourceElement(Element $node)
	{
		$nodeName = mb_strtolower($node->getNodeName());
		$source = '<' . $nodeName;
		if($node->hasAttributes())
		{
			$attrList = $node->getAttributesArray();
			foreach($attrList as $attr)
			{
				$source .= ' ' . $this->getSource($attr);
			}
		}

		if($node->hasChildNodes())
		{
			$source .= '>';

			if(Node::$isNodeListAsArray)
			{
				$childNodes = $node->getChildNodesArray();
				foreach ($childNodes as $child)
				{
					$source .= $this->getSource($child);
				}
			}
			else
			{
				for($i = 0; $i < $node->getChildNodes()->getLength(); $i++)
				{
					$source .= $this->getSource($node->getChildNodes()->item($i));
				}
			}


			$source .= '</' . $nodeName . '>';
		}
		else
		{
			$source .= ' />';
		}

		return $source;
	}

	protected function getSourceDocType(DocumentType $node)
	{
		$source = '<!DOCTYPE html>';
		return $source;
	}

	/*
	 * @param string $text
	 * @param Node $node
	 * @return void
	 */
	public function parse($text = "", Node $node)
	{
		if($this->storePhpCode)
		{
			$text = $this->storePHP($text);
		}
		else
		{
			$text = $this->commentPHP($text);
		}

		$isCharOpen = true;
		$buffer = '';

		$textLength = strlen($text);
		for($i = 0; $i < $textLength; $i++)
		{
			$char = substr($text, $i, 1);
			if($char === '<')
			{
				$node = $this->getNextNode($buffer, $node);
				$buffer = $char;
				$isCharOpen = true;
			}
			elseif($char === '>')
			{
				$buffer .= $char;
				if($isCharOpen)
				{
					$node = $this->getNextNode($buffer, $node);
					$buffer = '';
				}
				$isCharOpen = false;
			}
			else
			{
				$buffer .= $char;
			}

			if(!$node)
			{
				return null;
			}
		}

		if($buffer != '')
		{
			$node = $this->getNextNode($buffer, $node);
		}

		return $node;
	}

	protected function parseElement($text)
	{
		$result = array('NAME' => '', 'ATTRIBUTES' => array());

		if(preg_match('/[ \t\r\n]/S', $text, $matches, PREG_OFFSET_CAPTURE))
		{
			$delimiterPosition = $matches[0][1];
			$result['NAME'] = mb_strtoupper(mb_substr($text, 0, $delimiterPosition));
			$textAttr = mb_substr($text, $delimiterPosition + 1);
			$result['ATTRIBUTES'] = $this->parseAttributes($textAttr);
		}
		else
		{
			$result['NAME'] = mb_strtoupper($text);
		}

		return $result;
	}

	protected function parseDocType($text)
	{
		return array();
	}

	protected function parseAttributes($text)
	{
		static $search = array(
			"'&(quot|#34);'i",
			"'&(lt|#60);'i",
			"'&(gt|#62);'i",
			"'&(amp|#38);'i",
		);

		static $replace = array(
			"\"",
			"<",
			">",
			"&",
		);

		$attributes = array();
		if ($text !== "")
		{
			preg_match_all("/(?'name'[\w\-_:?&]+)(?'eq'\s*=\s*)?(?(eq)([\"'])(?'val'.*?)\g{-2})/s", $text, $attrTmp);
			if(strpos($text, "&") === false)
			{
				foreach($attrTmp['name'] as $i => $attrName)
				{
					$attributes[$attrName] = $attrTmp['val'][$i];
				}
			}
			else
			{
				foreach($attrTmp['name'] as $i => $attrName)
				{
					$attributes[$attrName] = preg_replace($search, $replace, $attrTmp['val'][$i]);
				}
			}
		}

		return $attributes;
	}

	protected function parseAttributesOld($text)
	{
		preg_match_all("/\b([\w_-]+\s*=\s*([\"']*)[^\\2]+?\\2)/", $text, $pairs);
		$pairs = $pairs[0];

		$attributeList = Array();
		foreach($pairs as $pair)
		{
			$attr = array_map(
				function ($data){
					$data = preg_replace("/(^['\"]|['\"]$)/","",$data);
					return $data;
				},
				preg_split("/\s*=\s*/", $pair)
			);
			$name = $attr[0];
			$value = $attr[1];
			$attributeList[$name] = $value;
		}

		return $attributeList;
	}

	protected function getNextNode($tag, Node $parentNode)
	{
		$node = null;
		$isSingleTag = true;

		static $tagsWithoutClose = array('INPUT'=>1, 'IMG'=>1, 'BR'=>1, 'HR'=>1, 'META'=>1, 'AREA'=>1, 'BASE'=>1, 'COL'=>1, 'EMBED'=>1, 'KEYGEN'=>1, 'LINK'=>1, 'PARAM'=>1, 'SOURCE'=>1, 'TRACK'=>1, 'WBR'=>1);
		$tagsCantHaveNestedTags = array();

		$document = $parentNode->getOwnerDocument();

		if($parentNode->getNodeType() === Node::COMMENT_NODE)
		{
			$commentClosePosition = mb_strpos($tag, '-->');
			if($commentClosePosition !== false)
			{
				$clean = mb_substr($tag, 0, $commentClosePosition);
				$parentNode->setNodeValue($parentNode->getNodeValue() . $clean);
				$parentNode->bxNodeFoundCloseTag = true;

				$tag = mb_substr($tag, $commentClosePosition + 3);
				if(!$tag)
				{
					return $parentNode->getParentNode();
				}
				else
				{
					$parentNode = $parentNode->getParentNode();
				}
			}
			else
			{
				$parentNode->setNodeValue($parentNode->getNodeValue() . $tag);
				return $parentNode;
			}
		}
		elseif(in_array($parentNode->getNodeName(), $this->tagsMustBeClosed))
		{
			if(mb_strtoupper(mb_substr($tag, -9)) == '</'.$parentNode->getNodeName().'>')
			{
				$parentNode->bxNodeFoundCloseTag = true;
				$parentNode = $parentNode->getParentNode();
			}
			else
			{
				$firstChild = $parentNode->getFirstChild();
				if(!$firstChild)
				{
					$parentNode->appendChild($document->createTextNode($tag));
				}
				else
				{
					$firstChild->setNodeValue($firstChild->getNodeValue() . $tag);
				}

				$parentNode->bxNodeFoundCloseTag = false;
				return $parentNode;
			}
		}

		if(mb_substr($tag, 0, 2) === '</')
		{
			// closed tag
			//TODO: find closest opened parent with same nodeName and return it
			$cleaned = mb_strtoupper(mb_substr($tag, 2, -mb_strlen('>')));
			$searchableNode = $parentNode;
			$isSearchableNodeFound = false;

			$unclosedNodes = array();
			do
			{
				if(!$searchableNode->bxNodeFoundCloseTag)
				{
					$unclosedNodes[] = $searchableNode;
				}

				if($searchableNode->getNodeName() === $cleaned)
				{
					$isSearchableNodeFound = true;
					break;
				}
			}while($searchableNode = $searchableNode->getParentNode());

			if($isSearchableNodeFound)
			{
				foreach($unclosedNodes as $unclosedNode)
				{
					/* @var $unclosedNode Node */
					if(in_array($unclosedNode->getNodeName(), $tagsCantHaveNestedTags))
					{
						if($unclosedNode->hasChildNodes())
						{
							foreach ($unclosedNode->getChildNodesArray() as $childNode)
							{
								$unclosedNode->getParentNode()->appendChild($unclosedNode->removeChild($childNode));
							}
						}
					}

					$unclosedNode->bxNodeFoundCloseTag = true;
				}

				return $searchableNode->getParentNode();
			}
			else
			{
				if(false)
				{
					throw new DomException('Parser error. Find close tag, but can not find open tag ' . $cleaned);
				}
				else
				{
					if ($parentNode->getParentNode())
					{
						$parentNode->getParentNode()->bxNodeFoundCloseTag = true;
					}
					return $parentNode;
				}
			}
		}
		elseif(mb_substr($tag, 0, 4) === '<!--')
		{
			// Comment
			$cleaned = mb_substr($tag, 4);
			if(mb_substr($tag, -3) == '-->')
			{
				$cleaned = mb_substr($cleaned, 0, -3);
				$parentNode->bxNodeFoundCloseTag = true;
			}
			else
			{
				$isSingleTag = false;
			}


			//$parentNode->bxNodeFoundCloseTag = false;
			$node = $document->createComment($cleaned);
		}
		elseif(mb_substr($tag, 0, 1) === '<')
		{

			// Element
			if(mb_substr($tag, -2) === '/>')
			{
				// empty tag
				$cleaned = mb_substr($tag, 1, -2);
				$bxNodeWithCloseTag = false;
			}
			else
			{
				$cleaned = mb_substr($tag, 1, -1);
				$isSingleTag = false;
				$bxNodeWithCloseTag = true;
			}

			$list = $this->parseElement($cleaned);

			$isDocType = mb_substr($list['NAME'], 0, mb_strlen('!DOCTYPE')) === '!DOCTYPE';

			if(isset($tagsWithoutClose[$list['NAME']]) || $isDocType)
			{
				$bxNodeWithCloseTag = false;
				$isSingleTag = true;
			}

			if($isDocType)
			{
				$list = $this->parseDocType($cleaned);
				//TODO: set doctype fields
			}
			else
			{
				$node = $document->createElement($list['NAME']);
				foreach($list['ATTRIBUTES'] as $attrName => $attrValue)
				{
					$nodeAttr = $document->createAttribute($attrName, $attrValue);
					$node->setAttributeNode($nodeAttr);
				}
				$node->bxNodeWithCloseTag = $bxNodeWithCloseTag;
			}
		}
		else
		{
			// Text
			$cleaned = html_entity_decode($tag, ENT_QUOTES, (defined("BX_UTF") ? "UTF-8" : "ISO-8859-1"));
			$node = $document->createTextNode($cleaned);
		}

		if($node && $parentNode)
		{
			$parentNode->appendChild($node);
			if(!$isSingleTag)
			{
				return $node;
			}
		}

		return $parentNode;
	}

	/*
	* @param string $html
	* @return string
	*/
	public function commentPHP($html)
	{
		$html = str_replace(array('<?', '?>'), array('<!--', '-->'),	$html);
		return $html;
	}

	/*
	 * @param string $html
	 * @return string
	*/
	public function storePHP($html)
	{
		if(preg_match_all('/(<\?[\W\w\n]*?\?>)/i', $html, $matches, PREG_SET_ORDER) && is_array($matches))
		{
			$prefix = 'BX_DOM_DOCUMENT_PHP_SLICE_PLACEHOLDER_' . $this->currentObjectNumber . '_';
			foreach($matches as $key => $value)
			{
				$this->storedItemCounter++;
				$this->storedPHP['<!--' . $prefix . $this->storedItemCounter . '-->'] = $value[0];
			}

			$replaceFrom = array_values($this->storedPHP);
			$replaceTo = array_keys($this->storedPHP);

			$html = str_replace($replaceFrom, $replaceTo,	$html);
		}

		return $html;
	}

	/*
	 * @param string $html
	 * @return string
	*/
	public function restorePHP($html)
	{
		$html = str_replace(
			array_keys($this->storedPHP),
			array_values($this->storedPHP),
			$html
		);

		return $html;
	}
}