diff options
Diffstat (limited to 'min/lib/Minify/HTML.php')
-rw-r--r-- | min/lib/Minify/HTML.php | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/min/lib/Minify/HTML.php b/min/lib/Minify/HTML.php new file mode 100644 index 0000000..fb5c1e9 --- /dev/null +++ b/min/lib/Minify/HTML.php @@ -0,0 +1,245 @@ +<?php +/** + * Class Minify_HTML + * @package Minify + */ + +/** + * Compress HTML + * + * This is a heavy regex-based removal of whitespace, unnecessary comments and + * tokens. IE conditional comments are preserved. There are also options to have + * STYLE and SCRIPT blocks compressed by callback functions. + * + * A test suite is available. + * + * @package Minify + * @author Stephen Clay <steve@mrclay.org> + */ +class Minify_HTML { + + /** + * "Minify" an HTML page + * + * @param string $html + * + * @param array $options + * + * 'cssMinifier' : (optional) callback function to process content of STYLE + * elements. + * + * 'jsMinifier' : (optional) callback function to process content of SCRIPT + * elements. Note: the type attribute is ignored. + * + * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If + * unset, minify will sniff for an XHTML doctype. + * + * @return string + */ + public static function minify($html, $options = array()) { + $min = new Minify_HTML($html, $options); + return $min->process(); + } + + + /** + * Create a minifier object + * + * @param string $html + * + * @param array $options + * + * 'cssMinifier' : (optional) callback function to process content of STYLE + * elements. + * + * 'jsMinifier' : (optional) callback function to process content of SCRIPT + * elements. Note: the type attribute is ignored. + * + * 'xhtml' : (optional boolean) should content be treated as XHTML1.0? If + * unset, minify will sniff for an XHTML doctype. + * + * @return null + */ + public function __construct($html, $options = array()) + { + $this->_html = str_replace("\r\n", "\n", trim($html)); + if (isset($options['xhtml'])) { + $this->_isXhtml = (bool)$options['xhtml']; + } + if (isset($options['cssMinifier'])) { + $this->_cssMinifier = $options['cssMinifier']; + } + if (isset($options['jsMinifier'])) { + $this->_jsMinifier = $options['jsMinifier']; + } + } + + + /** + * Minify the markeup given in the constructor + * + * @return string + */ + public function process() + { + if ($this->_isXhtml === null) { + $this->_isXhtml = (false !== strpos($this->_html, '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML')); + } + + $this->_replacementHash = 'MINIFYHTML' . md5($_SERVER['REQUEST_TIME']); + $this->_placeholders = array(); + + // replace SCRIPTs (and minify) with placeholders + $this->_html = preg_replace_callback( + '/(\\s*)(<script\\b[^>]*?>)([\\s\\S]*?)<\\/script>(\\s*)/i' + ,array($this, '_removeScriptCB') + ,$this->_html); + + // replace STYLEs (and minify) with placeholders + $this->_html = preg_replace_callback( + '/\\s*(<style\\b[^>]*?>)([\\s\\S]*?)<\\/style>\\s*/i' + ,array($this, '_removeStyleCB') + ,$this->_html); + + // remove HTML comments (not containing IE conditional comments). + $this->_html = preg_replace_callback( + '/<!--([\\s\\S]*?)-->/' + ,array($this, '_commentCB') + ,$this->_html); + + // replace PREs with placeholders + $this->_html = preg_replace_callback('/\\s*(<pre\\b[^>]*?>[\\s\\S]*?<\\/pre>)\\s*/i' + ,array($this, '_removePreCB') + ,$this->_html); + + // replace TEXTAREAs with placeholders + $this->_html = preg_replace_callback( + '/\\s*(<textarea\\b[^>]*?>[\\s\\S]*?<\\/textarea>)\\s*/i' + ,array($this, '_removeTextareaCB') + ,$this->_html); + + // trim each line. + // @todo take into account attribute values that span multiple lines. + $this->_html = preg_replace('/^\\s+|\\s+$/m', '', $this->_html); + + // remove ws around block/undisplayed elements + $this->_html = preg_replace('/\\s+(<\\/?(?:area|base(?:font)?|blockquote|body' + .'|caption|center|cite|col(?:group)?|dd|dir|div|dl|dt|fieldset|form' + .'|frame(?:set)?|h[1-6]|head|hr|html|legend|li|link|map|menu|meta' + .'|ol|opt(?:group|ion)|p|param|t(?:able|body|head|d|h||r|foot|itle)' + .'|ul)\\b[^>]*>)/i', '$1', $this->_html); + + // remove ws outside of all elements + $this->_html = preg_replace_callback( + '/>([^<]+)</' + ,array($this, '_outsideTagCB') + ,$this->_html); + + // use newlines before 1st attribute in open tags (to limit line lengths) + $this->_html = preg_replace('/(<[a-z\\-]+)\\s+([^>]+>)/i', "$1\n$2", $this->_html); + + // fill placeholders + $this->_html = str_replace( + array_keys($this->_placeholders) + ,array_values($this->_placeholders) + ,$this->_html + ); + return $this->_html; + } + + protected function _commentCB($m) + { + return (0 === strpos($m[1], '[') || false !== strpos($m[1], '<![')) + ? $m[0] + : ''; + } + + protected function _reservePlace($content) + { + $placeholder = '%' . $this->_replacementHash . count($this->_placeholders) . '%'; + $this->_placeholders[$placeholder] = $content; + return $placeholder; + } + + protected $_isXhtml = null; + protected $_replacementHash = null; + protected $_placeholders = array(); + protected $_cssMinifier = null; + protected $_jsMinifier = null; + + protected function _outsideTagCB($m) + { + return '>' . preg_replace('/^\\s+|\\s+$/', ' ', $m[1]) . '<'; + } + + protected function _removePreCB($m) + { + return $this->_reservePlace($m[1]); + } + + protected function _removeTextareaCB($m) + { + return $this->_reservePlace($m[1]); + } + + protected function _removeStyleCB($m) + { + $openStyle = $m[1]; + $css = $m[2]; + // remove HTML comments + $css = preg_replace('/(?:^\\s*<!--|-->\\s*$)/', '', $css); + + // remove CDATA section markers + $css = $this->_removeCdata($css); + + // minify + $minifier = $this->_cssMinifier + ? $this->_cssMinifier + : 'trim'; + $css = call_user_func($minifier, $css); + + return $this->_reservePlace($this->_needsCdata($css) + ? "{$openStyle}/*<![CDATA[*/{$css}/*]]>*/</style>" + : "{$openStyle}{$css}</style>" + ); + } + + protected function _removeScriptCB($m) + { + $openScript = $m[2]; + $js = $m[3]; + + // whitespace surrounding? preserve at least one space + $ws1 = ($m[1] === '') ? '' : ' '; + $ws2 = ($m[4] === '') ? '' : ' '; + + // remove HTML comments (and ending "//" if present) + $js = preg_replace('/(?:^\\s*<!--\\s*|\\s*(?:\\/\\/)?\\s*-->\\s*$)/', '', $js); + + // remove CDATA section markers + $js = $this->_removeCdata($js); + + // minify + $minifier = $this->_jsMinifier + ? $this->_jsMinifier + : 'trim'; + $js = call_user_func($minifier, $js); + + return $this->_reservePlace($this->_needsCdata($js) + ? "{$ws1}{$openScript}/*<![CDATA[*/{$js}/*]]>*/</script>{$ws2}" + : "{$ws1}{$openScript}{$js}</script>{$ws2}" + ); + } + + protected function _removeCdata($str) + { + return (false !== strpos($str, '<![CDATA[')) + ? str_replace(array('<![CDATA[', ']]>'), '', $str) + : $str; + } + + protected function _needsCdata($str) + { + return ($this->_isXhtml && preg_match('/(?:[<&]|\\-\\-|\\]\\]>)/', $str)); + } +} |