ACC SHELL

Path : /srv/www/vhosts/pzk/__functions/
File Upload :
Current File : //srv/www/vhosts/pzk/__functions/parser.php

<?php

#    html checker written originally for validating html
#    tags
#
#    (c) 2000 pavel kolesnikov <k@les.cz>
#
#    usage:
#        $parser    = new HTMLparser($cfg)
#    where $cfg is
#        *1* empty, default configuration will be used,
#            see "$this->cfg    = array(" line in the constructor
#        *2* an array, for the syntax see above (or bellow :)
#        *3* path to the config file that contains something like
#            <?php $cfg = array(...) ?remove_this_string>
#            (for the syntax see *2* :)
#    instance of HTMLparser is to be used in following way:
#        $res = $parser->parse($string, $result_info)
#        $res = 1/0 , ok/ko
#    where result_info is a hash array containg following fields:
#        $rep    = array(
#            'rc'      => 0 or 1 (ok or ko),
#            'message' => $string with syntax downlighted ;),
#            'status'  => see first declared variables of
#                     this class);
#
#    BUGS & TODOs:
#
#        however the config file syntax supposes more functionalities
#        this class checks only for
#            * correct html format (no crossing tags etc)
#            * using only html tags allowed by config file
#            * pair tags are closed and single ones are not
#        it should check for correct tags nesting (<td> should
#        be only inside <tr>, <tr> should be only inside <table>)
#        for required/forbidden attributes (e.g. width and height
#        inside <img> tags or javascript events inside any tags)
#
#        due to security reason there's patch agains using
#        javascript in tags' attributes and due to lack of 
#        time this patch is very dirty, search for "dirty
#        patch" if you're interested :)
#

#$GLOBALS['HPDEBUG']=1;
                

if (empty($GLOBALS['__CLASSES_HTMLPARSER__'])) {
$GLOBALS['__CLASSES_HTMLPARSER__'] = 1;

class HTMLparser {

    var    $UNEXP_GT        = -11,
        $UNEXP_EQ        = -2,
        $UNEXP_QUOT        = -10,
        $CROSS_TAGS        = -3,
        $UNMAT_TAG        = -4,
        $UNMAT_QUOT        = -5,
        $UNEXP_CLO        = -6,
        $CLO_SNG        = -7,
        $CLO_WS            = -8,
        $BAD_TAG        = -9,
        $EOF            = 0;
    var    $ERR            = -100,
        $NONE            = -101,
        $FOUND            = -102;
    var    $OK            = 1;

    var    $STRICT            = 0;
    var    $cfg;

    function HTMLparser($cfg = 0) {
        # dostali jsme primo konfiguraci v poli?
        if (is_array($cfg))
            $this->cfg    = $cfg;
        # nebo jsme dostali cestu ke konfiguracnimu
        # skriptu?
        else if (is_string($cfg)) {
            include($cfg);
            if (is_array($cfg))
                $this->cfg    = $cfg;
        }
        # ostatni pripady pripadne neuspech nacteni
        # ze skriptu: berem takovyto default:
        if (!is_array($this->cfg))
            $this->debug("HTMLparser: default config");
            $this->cfg    = array(
                'allowed'    => array(
                    'b'        => 1,
                    'strong'    => 1,
                    'i'        => 1,
                    'center'    => 1,
                    'div'        => 0,
                    'a'        => 0,
                    'font'        => 0,
                    'img'        => 1,
                    'h1'        => 0,
                    'h2'        => 0,
                    'h3'        => 0,
                    'h3'        => 0,
                    'h4'        => 0,
                    'h5'        => 0,
                    'h6'        => 0,
                    'hr'        => 0,
                    'br'        => 1,
                    'p'        => 0,
                    's'        => 0,
                    'strike'    => 0,
                    'sup'        => 0,
                    'sub'        => 0,
                    'font'        => 0,
                    'blockquote'    => 0,
                    'code'        => 0,
                    'small'        => 0,
                    'big'        => 0,
                    'tt'        => 0,
                    'dl'        => 0,
                    'dt'        => 0,
                    'dd'        => 0,
                    'ol'        => 0,
                    'ul'        => 0,
                    'li'        => 0,
                    'table'        => 0,
                    'tr'        => 0,
                    'td'        => 0,
                    'th'        => 0,
                    'u'        => 0,
                    'pre'        => 0,
                    'em'        => 0),
                'disabled_attr'    => array(
                    '*'        => array(
                        '^on.*', 'style'),
                    'a'    => array('name')),
                'disabled_val'    => array(
                    '*'    => array('javascript:')),
                'single'    => array( # 0 = optional
                    'br'        => 1,
                    'hr'        => 0,
                    'img'        => 1,
                    'p'        => 0,
                    'dt'        => 0,
                    'dd'        => 0,
                    'li'        => 0,
                    'td'        => 0),
                'mandatory_attr' => array(
                    'img'        => array(
                        'src', 'width', 'height'),
                    'a'        => array('href')));
    }

    function get_cfg() {
        return $this->cfg;
    }

    function debug($msg) {
        if (!empty($GLOBALS['HPDEBUG'])) {
            echo "$msg<BR>\n";
            flush();
        }
    }

    function bdebug($msg) {
        if (!empty($GLOBALS['HPDEBUG'])) {
            echo "<B>$msg</B><BR>\n";
            flush();
        }
    }

        function show_array($arr, $name="", $show = 0) {
                if (empty($GLOBALS['HPDEBUG']) && !$show)
                        return 1;
                if (!is_array($arr)) echo "$name is not an array<BR>\n";
                else {
                        echo "$name contains:<BR>";
                        reset($arr);
                        $buff   = array();
                        while (list($j, $i) = each($arr)) {
                #               if (strval(intval($j)) == "$j")
                #                       continue;
                                if (is_array($i))
                                        $buff[] = array($i, $name . '[' . $j . ']');
                                else
                                        echo "<TT>&nbsp; &nbsp;</TT>$j -&gt; '$i'<BR>\n";
                                flush();
                        }
                        reset($buff);
                        while ($b = current($buff)) {
                                $this->show_array($b[0], $b[1]);
                                next($buff);
                        }
                        echo "<BR>\n";
                        reset($arr);
                        return 1;
                }
                return 0;
        }


    function parse($str, &$report) {
        $this->bdebug('htmlparser::parse');
        $report        = array();
        $tagbuf        = array();
        $tags        = array();
        $pos        = 0;
        $len        = strlen($str);
        $this->status    = 0;
        $this->GTBUG    = array();

/*
echo "<TABLE>\n";
for ($_i = 0; $_i < strlen($str); $_i++) echo "<TR><TD>$_i</TD><TD>$str[$_i]</TD></TR>\n";
echo "</TABLE>\n";
*/
        while (($rc = 
            $this->next_tag($str, $tags, $tagbuf, $pos, $len))
            == $this->OK)
        {
            $this->bdebug("next_tag vratil '$rc'");
            $this->show_array($tagbuf,
                'parse: tagbuf po navratu z next_tag');
            $this->show_array($tagbuf, 'tagbuf');
        }
        $this->show_array($tagbuf, 'parse::tagbuf');
        $this->clean_tagbuf($tagbuf);
        $count = 0;
        reset($tagbuf);
        $i    = 0;
        $a    = $tagbuf[$i];
        while (isset($tagbuf[$i])) {
            $this->debug("countimg tagbuf: a = '$a' " . $a['name']);
            if (!empty($a))
                $count++;
            next($tagbuf);
            $i++;
            $a = $tagbuf[$i];
            current($tagbuf);
        }
        $this->debug("count(tagbuf) = $count");
        $this->replace_gts($str);
        if (($rc == $this->EOF) && !$count)
            return 1;
        $this->generate_report(
            $report,
            $tags,
            $tagbuf,
            $str,
            $pos,
            $len,
            $rc);
        return 0;
    }

    function replace_gts(&$str) {
        $this->bdebug('htmlparser::replace_gts');
        if (empty($this->GTBUF)
            || !is_array($this->GTBUF)
            || !count($this->GTBUF))
        {
            return 1;
        }
        $this->debug('         ... ma smysl');
        reset($this->GTBUF);
        $last    = 0;
        $arr    = array();
        while (($gt = current($this->GTBUF)) || ("$gt" == "0")) {
            #$this->debug("gt = $gt, last = $last");
            $kousek    = substr($str, $last, $gt - $last);
            #$this->debug("kousek = '$kousek'");
            $arr[]    = $kousek;
            $last    = $gt + 1;
            next($this->GTBUF);
        }
        #$this->debug("gt = $gt, last = $last");
        $kousek    = substr($str, $last, strlen($str) - $last);
        #$this->debug("posledni kousek = '$kousek'");
        $arr[]    = $kousek;
        #$this->show_array($arr, 'gts');
        $str    = implode('&gt;', $arr);
        return 1;
    }

    function clean_tagbuf(&$tagbuf) {
        $this->bdebug('htmlparser::clean_tagbuf');
        $c = count($tagbuf);
        $c--;
        reset($tagbuf);
        while ($c >= 0) {
            if (empty($tagbuf[$c])) {
                $c--;
                continue;
            }
            $name    = strtolower($tagbuf[$c]['name']);
            $this->debug("ct: rusim $name? (c = $c)");
            if (isset($this->cfg['single'][$name])) {
                $tagbuf[$c] = '';
                $this->debug("ano");
            } else
                break;
            $c--;
        }
    }

    function generate_report(&$rep, &$tags, &$tagbuf, &$str, $pos, $len, $rc) {
        $this->bdebug("htmlparser::generate_report");
        reset($tags);
        reset($tagbuf);
        $repstr    = '';
        $i    = 0;
        $this->show_array($tagbuf, 'generate_report::tagbuf');
        $this->show_array($tags, 'generate_report::tags');
        while ($t = current($tags)) {
            $start    = $t['start'];
            $end    = $t['end'];
            $this->debug("start = $start, end = $end");
            $repstr    .= substr($str, $i, ($start - $i));
            $repstr    .= sprintf('<FONT COLOR="blue"><B>%s</B></fONT>',
                strtr(
                    substr($str, $start, ($end - $start)),
                    array('<' => '&lt;', '>' => '&gt;')));
            $i    = $end;
            next($tags);
        }
        if ($i != $len) {
            $repstr    .= sprintf('<B><FONT COLOR="red">%s</FONT></B>',
                strtr(
                    substr($str, $i, $pos - $i),
                    array('<' => '&lt;', '>' => '&gt;')));
            $repstr    .= htmlspecialchars(substr($str, $pos));
        }
        $rep    = array(
            'rc'        => $rc,
            'message'    => $repstr,
            'status'    => $this->status);
    }

    function next_tag(&$str, &$tags, &$tagbuf, &$pos, $len) {
        $this->bdebug("htmlparser::next_tag(pos = $pos, len = $len)");
        $this->show_array($tagbuf, 'tagbuf');
        while ($pos < $len) {
            $c    = $str[$pos++];
            $this->debug("c = '$c'");
            if ($c == '>') {
                if (!empty($this->STRICT)) {
                    $this->status    = $this->UNEXP_GT;
                    #echo $this->status."|st<BR>";
                    return 0;
                }
$this->debug("pridavam do GTBUF " . strval($pos - 1));
                $this->GTBUF[] = $pos - 1;
            }
            if ($c == '<') {
                $tag    = $this->read_tag($str, $pos, $len);
                $name    = $tag['name'];
                if (is_array($tag)) {
                    if ($tag['closing']) {
                        $this->debug("'$name' is closing'");
                        if (!$this->check_closing_tag($tagbuf, $tag)) {
                            $this->status = $this->UNEXP_CLO;
                            return $this->ERR;
                        }
                        #unset($tagbuf[$l]);
                        $this->debug("next_tag: mam closing");
                        $tags[]    = $tag;
                        return $this->OK;
                    }
                    if (empty($this->cfg['single'][$name])) {
                        $this->debug("$name neni single");
                        $tagbuf[]    = $tag;
                    }
                    $tags[]        = $tag;
                    return    $this->OK;
                }
                return $tag;
            }
        }
    }

    function check_closing_tag(&$tagbuf, &$tag) {
        $keys    = array_keys($tagbuf);
        $keysc    = count($keys);
        $tbc    = $keys[$keysc - 1] + 1;
        #$tbc    = count($tagbuf);
        $l    = $tbc - 1;
        $new    = strtolower($tag['name']);
        $this->debug("<B>htmlparser::check_closing_tag:</B> '$new'");
        reset($tagbuf);
        $this->show_array($tagbuf, 'c_c_t: tagbuf');
        $this->debug("tbc = $tbc, l = $l, new = $new");
        if (!empty($this->cfg['single'][$new])) {
            $this->status    = $this->CLO_SNG;
            $this->debug("clo_sng");
            return $this->ERR;
        }
$this->debug("'$tag[name]' = '$new' is not single");
        while ($l >= 0) {
            if (!$tagbuf[$l]) {
$this->debug("tagbuf[$l] = '$tagbuf[$l]' empty");
                $l--;
                continue;
            }
            $last    = strtolower($tagbuf[$l]['name']);
$this->debug("predchazi tag '$last'");
            if ($last == $new) {
                $this->debug("'$new' uzavira tag '$last'");
                while ($l < $tbc) {
                    #unset($tagbuf[$l++]);
$this->debug("rusim tagbuf[$l]");
                    $__tmp = $tagbuf;
                    array_splice($__tmp, $l++, 1);
                    $tagbuf = $__tmp;
                    #$tagbuf[$l++] = '';
                }
$this->show_array($tagbuf, 'tagbuf po vyruseni');
                return 1;
            }
            if (isset($this->cfg['single'][$last]))
            {
                $this->debug("nepárový tag '$last'");
                $l--;
            } else {
                $this->debug("neakceptovatelny tag '$last'");
                return 0;
            }
        }
        $this->debug("vracim 0");
        return 0;
    }

    function read_tag(&$str, &$pos, $len) {
        $this->bdebug("htmlparser::read_tag(pos = $pos)");
        $closing    = 0;
        $start        = $pos - 1;
        if ($str[$pos] == '/') {
            $closing = 1;
            $pos++;
            $this->debug("closing...");
        }
        $name    = $this->get_tag_name($str, $pos, $len);
        $this->debug("name = $name");
        if ($name == $this->ERR)
            return $this->ERR;
        if (empty($this->cfg['allowed'][strtolower($name)])) {
            $this->debug("'$name' is a bad tag");
            $this->status    = $this->BAD_TAG;
            return $this->ERR;
        }
        if ($closing) {
            $last    = $str[$pos];
            $this->debug("last pred koncem closing = '$last'");
            if ($last == '>') {
                $pos++;
                return array(    'name'    => $name,
                        'closing'=> $closing,
                        'start'    => $start,
                        'end'    => $pos);
            }
            $this->status = $this->CLO_WS;
            return $this->ERR;
        }
        $params    = $this->get_tag_params($str, $pos, $len);
        $this->debug("params = $params");
        if (!is_array($params))
            return $this->ERR;
        $this->debug("read_tag: Vracim OK");
        return array(    'name'    => $name,
                'params'=> $params,
                'closing'=> $closing,
                'start'    => $start,
                'end'    => $pos);
    }

    function get_tag_name(&$str, &$pos, $len) {
        $this->debug("<B>get_tag_name:</B> pos = $pos");
        $name    = '';
        if (!$this->skip_ws($str, $pos, $len, $last))
            return $this->EOF;
        while ($pos < $len) {
            $c    = $str[$pos];
            if (!ereg("^[ \t\r\n>]$", $c)) {
                $name    .= $c;
                $pos++;
            } else
                break;
        }
        $this->debug("return '$name'");
        return $name;
    }

    function get_tag_params(&$str, &$pos, $len) {
        $this->debug("<B>get_tag_params:</b> pos = $pos");
        $params    = array();
        while (1) {
            $key    = $this->get_tag_params_key($str, $pos, $len);
            if (!$key)
                return $params;
            if ($key == $this->ERR)
                return $this->ERR;
            $val    = $this->get_tag_params_val($str, $pos, $len);
            if ($val == $this->ERR)
                return $this->ERR;
/* dirty patch */
    $this->debug("dirty patch: $key : $val");
    if (eregi('^on', $key)) {
        $this->debug("key '$key' is 'on'");
        return $this->ERR;
    }
    if (preg_match('/^\s*javascript:/i', $val)) {
        $this->debug("val '$val' is javascript");
        return $this->ERR;
    }
    if (eregi('^\s*[a-z]:\\\\', $val)) {
        return $this->ERR;
    }

/* e.o.d.p. */
            $params[$key]    = $val;
        }
    }

    function get_tag_params_key(&$str, &$pos, $len) {
        $this->debug("<B>get_tag_params_key:</B> pos = $pos");
        $key    = '';
        if (!$this->skip_ws($str, $pos, $len, $last))
            return $this->EOF;
        while ($pos < $len) {
            $c    = $str[$pos++];
            $this->debug("c = $c");
            if (ereg("^[ \t\n>]$", $c)) {
                $this->debug('ereg("^[ \t\n>]$');
                break;
            }
            if ($c == '=') {
                if (empty($key)) {
                    $this->status    = $this->UNEXP_EQ;
                    $this->debug("$this->ERR | $this->UNEXP_EQ | pos = $pos");
                    return $this->ERR;
                }
                $pos--;
                break;
            }
            $key    .= $c;
        }
        $this->debug("return '$key'");
        return $key;
    }

    function get_tag_params_val(&$str, &$pos, $len) {
        $this->debug("<B>get_tag_params_val:</B> pos = $pos");
        $val    = '';
        $quoted    = 0;
        if ($this->skip_ws($str, $pos, $len, $last) == $this->EOF)
            return $this->EOF;
        $pos++;
        if ($last == '=') {
            if (!$this->skip_ws($str, $pos, $len, $last))
                return $this->EOF;
            do {
                $c    = $str[$pos++];
                $this->debug("c = '$c'");
                #if (($c == '"') || ($c == "'")) {
                if ($c == '"') {
                    if (empty($val))
                        $quoted    = 1;
                    else if ($quoted)
                        break;
                    else {
                        $this->status = $this->UNEXP_QUOT;
                        $this->debug("$this->ERR | $this->UNEXP_QUOT");
                        return $this->ERR;
                    }
                } else if ($c == '>') {
                    if (!empty($val) && $quoted) {
                        $this->status = $this->UNMAT_QUOT;
                        $this->debug("$this->ERR | $this->UNMAT_QUOT");
                        return $this->ERR;
                    } else {
                        $pos--;
                        break;
                    }
                } else
                    $val    .= $c;
            } while ($pos < $len);
        }
        $this->debug("<LI>return $val");
        return $val;
    }

    function skip_ws(&$str, &$pos, $len, &$last) {
        $this->debug("<B>skip_ws:</B> pos = $pos");
        while ($pos < $len) {
            $last    = $str[$pos];
            switch ($last) {
                case "\t":
                case "\n":
                case " ":
                case "\r":
                    break;
                default:
                    return $this->OK;
            }
            #if (!ereg("^[ \t\n\r]$", $last))
            #    return $this->OK;
            $pos++;
        }
        return $this->EOF;
    }

};

}

?>

ACC SHELL 2018