ACC SHELL
<?php
# html checker written originally for validating html
# tags
#
# (c) 2000 pavel kolesnikov <k@les.cz>
#
# usage:
# $parser = new HTMLparser($cfg)
# where $cfg is
# *1* empty, default configuration will be used,
# see "$this->cfg = array(" line in the constructor
# *2* an array, for the syntax see above (or bellow :)
# *3* path to the config file that contains something like
# <?php $cfg = array(...) ?remove_this_string>
# (for the syntax see *2* :)
# instance of HTMLparser is to be used in following way:
# $res = $parser->parse($string, $result_info)
# $res = 1/0 , ok/ko
# where result_info is a hash array containg following fields:
# $rep = array(
# 'rc' => 0 or 1 (ok or ko),
# 'message' => $string with syntax downlighted ;),
# 'status' => see first declared variables of
# this class);
#
# BUGS & TODOs:
#
# however the config file syntax supposes more functionalities
# this class checks only for
# * correct html format (no crossing tags etc)
# * using only html tags allowed by config file
# * pair tags are closed and single ones are not
# it should check for correct tags nesting (<td> should
# be only inside <tr>, <tr> should be only inside <table>)
# for required/forbidden attributes (e.g. width and height
# inside <img> tags or javascript events inside any tags)
#
# due to security reason there's patch agains using
# javascript in tags' attributes and due to lack of
# time this patch is very dirty, search for "dirty
# patch" if you're interested :)
#
#$GLOBALS['HPDEBUG']=1;
if (empty($GLOBALS['__CLASSES_HTMLPARSER__'])) {
$GLOBALS['__CLASSES_HTMLPARSER__'] = 1;
class HTMLparser {
var $UNEXP_GT = -11,
$UNEXP_EQ = -2,
$UNEXP_QUOT = -10,
$CROSS_TAGS = -3,
$UNMAT_TAG = -4,
$UNMAT_QUOT = -5,
$UNEXP_CLO = -6,
$CLO_SNG = -7,
$CLO_WS = -8,
$BAD_TAG = -9,
$EOF = 0;
var $ERR = -100,
$NONE = -101,
$FOUND = -102;
var $OK = 1;
var $STRICT = 0;
var $cfg;
function HTMLparser($cfg = 0) {
# dostali jsme primo konfiguraci v poli?
if (is_array($cfg))
$this->cfg = $cfg;
# nebo jsme dostali cestu ke konfiguracnimu
# skriptu?
else if (is_string($cfg)) {
include($cfg);
if (is_array($cfg))
$this->cfg = $cfg;
}
# ostatni pripady pripadne neuspech nacteni
# ze skriptu: berem takovyto default:
if (!is_array($this->cfg))
$this->debug("HTMLparser: default config");
$this->cfg = array(
'allowed' => array(
'b' => 1,
'strong' => 1,
'i' => 1,
'center' => 1,
'div' => 0,
'a' => 0,
'font' => 0,
'img' => 1,
'h1' => 0,
'h2' => 0,
'h3' => 0,
'h3' => 0,
'h4' => 0,
'h5' => 0,
'h6' => 0,
'hr' => 0,
'br' => 1,
'p' => 0,
's' => 0,
'strike' => 0,
'sup' => 0,
'sub' => 0,
'font' => 0,
'blockquote' => 0,
'code' => 0,
'small' => 0,
'big' => 0,
'tt' => 0,
'dl' => 0,
'dt' => 0,
'dd' => 0,
'ol' => 0,
'ul' => 0,
'li' => 0,
'table' => 0,
'tr' => 0,
'td' => 0,
'th' => 0,
'u' => 0,
'pre' => 0,
'em' => 0),
'disabled_attr' => array(
'*' => array(
'^on.*', 'style'),
'a' => array('name')),
'disabled_val' => array(
'*' => array('javascript:')),
'single' => array( # 0 = optional
'br' => 1,
'hr' => 0,
'img' => 1,
'p' => 0,
'dt' => 0,
'dd' => 0,
'li' => 0,
'td' => 0),
'mandatory_attr' => array(
'img' => array(
'src', 'width', 'height'),
'a' => array('href')));
}
function get_cfg() {
return $this->cfg;
}
function debug($msg) {
if (!empty($GLOBALS['HPDEBUG'])) {
echo "$msg<BR>\n";
flush();
}
}
function bdebug($msg) {
if (!empty($GLOBALS['HPDEBUG'])) {
echo "<B>$msg</B><BR>\n";
flush();
}
}
function show_array($arr, $name="", $show = 0) {
if (empty($GLOBALS['HPDEBUG']) && !$show)
return 1;
if (!is_array($arr)) echo "$name is not an array<BR>\n";
else {
echo "$name contains:<BR>";
reset($arr);
$buff = array();
while (list($j, $i) = each($arr)) {
# if (strval(intval($j)) == "$j")
# continue;
if (is_array($i))
$buff[] = array($i, $name . '[' . $j . ']');
else
echo "<TT> </TT>$j -> '$i'<BR>\n";
flush();
}
reset($buff);
while ($b = current($buff)) {
$this->show_array($b[0], $b[1]);
next($buff);
}
echo "<BR>\n";
reset($arr);
return 1;
}
return 0;
}
function parse($str, &$report) {
$this->bdebug('htmlparser::parse');
$report = array();
$tagbuf = array();
$tags = array();
$pos = 0;
$len = strlen($str);
$this->status = 0;
$this->GTBUG = array();
/*
echo "<TABLE>\n";
for ($_i = 0; $_i < strlen($str); $_i++) echo "<TR><TD>$_i</TD><TD>$str[$_i]</TD></TR>\n";
echo "</TABLE>\n";
*/
while (($rc =
$this->next_tag($str, $tags, $tagbuf, $pos, $len))
== $this->OK)
{
$this->bdebug("next_tag vratil '$rc'");
$this->show_array($tagbuf,
'parse: tagbuf po navratu z next_tag');
$this->show_array($tagbuf, 'tagbuf');
}
$this->show_array($tagbuf, 'parse::tagbuf');
$this->clean_tagbuf($tagbuf);
$count = 0;
reset($tagbuf);
$i = 0;
$a = $tagbuf[$i];
while (isset($tagbuf[$i])) {
$this->debug("countimg tagbuf: a = '$a' " . $a['name']);
if (!empty($a))
$count++;
next($tagbuf);
$i++;
$a = $tagbuf[$i];
current($tagbuf);
}
$this->debug("count(tagbuf) = $count");
$this->replace_gts($str);
if (($rc == $this->EOF) && !$count)
return 1;
$this->generate_report(
$report,
$tags,
$tagbuf,
$str,
$pos,
$len,
$rc);
return 0;
}
function replace_gts(&$str) {
$this->bdebug('htmlparser::replace_gts');
if (empty($this->GTBUF)
|| !is_array($this->GTBUF)
|| !count($this->GTBUF))
{
return 1;
}
$this->debug(' ... ma smysl');
reset($this->GTBUF);
$last = 0;
$arr = array();
while (($gt = current($this->GTBUF)) || ("$gt" == "0")) {
#$this->debug("gt = $gt, last = $last");
$kousek = substr($str, $last, $gt - $last);
#$this->debug("kousek = '$kousek'");
$arr[] = $kousek;
$last = $gt + 1;
next($this->GTBUF);
}
#$this->debug("gt = $gt, last = $last");
$kousek = substr($str, $last, strlen($str) - $last);
#$this->debug("posledni kousek = '$kousek'");
$arr[] = $kousek;
#$this->show_array($arr, 'gts');
$str = implode('>', $arr);
return 1;
}
function clean_tagbuf(&$tagbuf) {
$this->bdebug('htmlparser::clean_tagbuf');
$c = count($tagbuf);
$c--;
reset($tagbuf);
while ($c >= 0) {
if (empty($tagbuf[$c])) {
$c--;
continue;
}
$name = strtolower($tagbuf[$c]['name']);
$this->debug("ct: rusim $name? (c = $c)");
if (isset($this->cfg['single'][$name])) {
$tagbuf[$c] = '';
$this->debug("ano");
} else
break;
$c--;
}
}
function generate_report(&$rep, &$tags, &$tagbuf, &$str, $pos, $len, $rc) {
$this->bdebug("htmlparser::generate_report");
reset($tags);
reset($tagbuf);
$repstr = '';
$i = 0;
$this->show_array($tagbuf, 'generate_report::tagbuf');
$this->show_array($tags, 'generate_report::tags');
while ($t = current($tags)) {
$start = $t['start'];
$end = $t['end'];
$this->debug("start = $start, end = $end");
$repstr .= substr($str, $i, ($start - $i));
$repstr .= sprintf('<FONT COLOR="blue"><B>%s</B></fONT>',
strtr(
substr($str, $start, ($end - $start)),
array('<' => '<', '>' => '>')));
$i = $end;
next($tags);
}
if ($i != $len) {
$repstr .= sprintf('<B><FONT COLOR="red">%s</FONT></B>',
strtr(
substr($str, $i, $pos - $i),
array('<' => '<', '>' => '>')));
$repstr .= htmlspecialchars(substr($str, $pos));
}
$rep = array(
'rc' => $rc,
'message' => $repstr,
'status' => $this->status);
}
function next_tag(&$str, &$tags, &$tagbuf, &$pos, $len) {
$this->bdebug("htmlparser::next_tag(pos = $pos, len = $len)");
$this->show_array($tagbuf, 'tagbuf');
while ($pos < $len) {
$c = $str[$pos++];
$this->debug("c = '$c'");
if ($c == '>') {
if (!empty($this->STRICT)) {
$this->status = $this->UNEXP_GT;
#echo $this->status."|st<BR>";
return 0;
}
$this->debug("pridavam do GTBUF " . strval($pos - 1));
$this->GTBUF[] = $pos - 1;
}
if ($c == '<') {
$tag = $this->read_tag($str, $pos, $len);
$name = $tag['name'];
if (is_array($tag)) {
if ($tag['closing']) {
$this->debug("'$name' is closing'");
if (!$this->check_closing_tag($tagbuf, $tag)) {
$this->status = $this->UNEXP_CLO;
return $this->ERR;
}
#unset($tagbuf[$l]);
$this->debug("next_tag: mam closing");
$tags[] = $tag;
return $this->OK;
}
if (empty($this->cfg['single'][$name])) {
$this->debug("$name neni single");
$tagbuf[] = $tag;
}
$tags[] = $tag;
return $this->OK;
}
return $tag;
}
}
}
function check_closing_tag(&$tagbuf, &$tag) {
$keys = array_keys($tagbuf);
$keysc = count($keys);
$tbc = $keys[$keysc - 1] + 1;
#$tbc = count($tagbuf);
$l = $tbc - 1;
$new = strtolower($tag['name']);
$this->debug("<B>htmlparser::check_closing_tag:</B> '$new'");
reset($tagbuf);
$this->show_array($tagbuf, 'c_c_t: tagbuf');
$this->debug("tbc = $tbc, l = $l, new = $new");
if (!empty($this->cfg['single'][$new])) {
$this->status = $this->CLO_SNG;
$this->debug("clo_sng");
return $this->ERR;
}
$this->debug("'$tag[name]' = '$new' is not single");
while ($l >= 0) {
if (!$tagbuf[$l]) {
$this->debug("tagbuf[$l] = '$tagbuf[$l]' empty");
$l--;
continue;
}
$last = strtolower($tagbuf[$l]['name']);
$this->debug("predchazi tag '$last'");
if ($last == $new) {
$this->debug("'$new' uzavira tag '$last'");
while ($l < $tbc) {
#unset($tagbuf[$l++]);
$this->debug("rusim tagbuf[$l]");
$__tmp = $tagbuf;
array_splice($__tmp, $l++, 1);
$tagbuf = $__tmp;
#$tagbuf[$l++] = '';
}
$this->show_array($tagbuf, 'tagbuf po vyruseni');
return 1;
}
if (isset($this->cfg['single'][$last]))
{
$this->debug("nepárový tag '$last'");
$l--;
} else {
$this->debug("neakceptovatelny tag '$last'");
return 0;
}
}
$this->debug("vracim 0");
return 0;
}
function read_tag(&$str, &$pos, $len) {
$this->bdebug("htmlparser::read_tag(pos = $pos)");
$closing = 0;
$start = $pos - 1;
if ($str[$pos] == '/') {
$closing = 1;
$pos++;
$this->debug("closing...");
}
$name = $this->get_tag_name($str, $pos, $len);
$this->debug("name = $name");
if ($name == $this->ERR)
return $this->ERR;
if (empty($this->cfg['allowed'][strtolower($name)])) {
$this->debug("'$name' is a bad tag");
$this->status = $this->BAD_TAG;
return $this->ERR;
}
if ($closing) {
$last = $str[$pos];
$this->debug("last pred koncem closing = '$last'");
if ($last == '>') {
$pos++;
return array( 'name' => $name,
'closing'=> $closing,
'start' => $start,
'end' => $pos);
}
$this->status = $this->CLO_WS;
return $this->ERR;
}
$params = $this->get_tag_params($str, $pos, $len);
$this->debug("params = $params");
if (!is_array($params))
return $this->ERR;
$this->debug("read_tag: Vracim OK");
return array( 'name' => $name,
'params'=> $params,
'closing'=> $closing,
'start' => $start,
'end' => $pos);
}
function get_tag_name(&$str, &$pos, $len) {
$this->debug("<B>get_tag_name:</B> pos = $pos");
$name = '';
if (!$this->skip_ws($str, $pos, $len, $last))
return $this->EOF;
while ($pos < $len) {
$c = $str[$pos];
if (!ereg("^[ \t\r\n>]$", $c)) {
$name .= $c;
$pos++;
} else
break;
}
$this->debug("return '$name'");
return $name;
}
function get_tag_params(&$str, &$pos, $len) {
$this->debug("<B>get_tag_params:</b> pos = $pos");
$params = array();
while (1) {
$key = $this->get_tag_params_key($str, $pos, $len);
if (!$key)
return $params;
if ($key == $this->ERR)
return $this->ERR;
$val = $this->get_tag_params_val($str, $pos, $len);
if ($val == $this->ERR)
return $this->ERR;
/* dirty patch */
$this->debug("dirty patch: $key : $val");
if (eregi('^on', $key)) {
$this->debug("key '$key' is 'on'");
return $this->ERR;
}
if (preg_match('/^\s*javascript:/i', $val)) {
$this->debug("val '$val' is javascript");
return $this->ERR;
}
if (eregi('^\s*[a-z]:\\\\', $val)) {
return $this->ERR;
}
/* e.o.d.p. */
$params[$key] = $val;
}
}
function get_tag_params_key(&$str, &$pos, $len) {
$this->debug("<B>get_tag_params_key:</B> pos = $pos");
$key = '';
if (!$this->skip_ws($str, $pos, $len, $last))
return $this->EOF;
while ($pos < $len) {
$c = $str[$pos++];
$this->debug("c = $c");
if (ereg("^[ \t\n>]$", $c)) {
$this->debug('ereg("^[ \t\n>]$');
break;
}
if ($c == '=') {
if (empty($key)) {
$this->status = $this->UNEXP_EQ;
$this->debug("$this->ERR | $this->UNEXP_EQ | pos = $pos");
return $this->ERR;
}
$pos--;
break;
}
$key .= $c;
}
$this->debug("return '$key'");
return $key;
}
function get_tag_params_val(&$str, &$pos, $len) {
$this->debug("<B>get_tag_params_val:</B> pos = $pos");
$val = '';
$quoted = 0;
if ($this->skip_ws($str, $pos, $len, $last) == $this->EOF)
return $this->EOF;
$pos++;
if ($last == '=') {
if (!$this->skip_ws($str, $pos, $len, $last))
return $this->EOF;
do {
$c = $str[$pos++];
$this->debug("c = '$c'");
#if (($c == '"') || ($c == "'")) {
if ($c == '"') {
if (empty($val))
$quoted = 1;
else if ($quoted)
break;
else {
$this->status = $this->UNEXP_QUOT;
$this->debug("$this->ERR | $this->UNEXP_QUOT");
return $this->ERR;
}
} else if ($c == '>') {
if (!empty($val) && $quoted) {
$this->status = $this->UNMAT_QUOT;
$this->debug("$this->ERR | $this->UNMAT_QUOT");
return $this->ERR;
} else {
$pos--;
break;
}
} else
$val .= $c;
} while ($pos < $len);
}
$this->debug("<LI>return $val");
return $val;
}
function skip_ws(&$str, &$pos, $len, &$last) {
$this->debug("<B>skip_ws:</B> pos = $pos");
while ($pos < $len) {
$last = $str[$pos];
switch ($last) {
case "\t":
case "\n":
case " ":
case "\r":
break;
default:
return $this->OK;
}
#if (!ereg("^[ \t\n\r]$", $last))
# return $this->OK;
$pos++;
}
return $this->EOF;
}
};
}
?>
ACC SHELL 2018