ACC SHELL
# $Id: PerlSAX.pm,v 1.6 2000/02/28 10:40:21 matt Exp $
package XML::XPath::PerlSAX;
use XML::XPath::XMLParser;
use strict;
sub new {
my $class = shift;
my %args = @_;
bless \%args, $class;
}
sub parse {
my $self = shift;
die "XML::XPath::PerlSAX: parser instance ($self) already parsing\n"
if (defined $self->{ParseOptions});
# If there's one arg and it's an array ref, assume it's a node we're parsing
my $args;
if (@_ == 1 && ref($_[0]) =~ /^(text|comment|element|namespace|attribute|pi)$/) {
# warn "Parsing node\n";
my $node = shift;
# warn "PARSING: $node ", XML::XPath::XMLParser::as_string($node), "\n\n";
$args = { Source => { Node => $node } };
}
else {
$args = (@_ == 1) ? shift : { @_ };
}
my $parse_options = { %$self, %$args };
$self->{ParseOptions} = $parse_options;
# ensure that we have at least one source
if (!defined $parse_options->{Source} ||
!defined $parse_options->{Source}{Node}) {
die "XML::XPath::PerlSAX: no source defined for parse\n";
}
# assign default Handler to any undefined handlers
if (defined $parse_options->{Handler}) {
$parse_options->{DocumentHandler} = $parse_options->{Handler}
if (!defined $parse_options->{DocumentHandler});
}
# ensure that we have a DocumentHandler
if (!defined $parse_options->{DocumentHandler}) {
die "XML::XPath::PerlSAX: no Handler or DocumentHandler defined for parse\n";
}
# cache DocumentHandler in self for callbacks
$self->{DocumentHandler} = $parse_options->{DocumentHandler};
if ((ref($parse_options->{Source}{Node}) eq 'element') &&
!($parse_options->{Source}{Node}->[node_parent])) {
# Got root node
$self->{DocumentHandler}->start_document( { } );
$self->parse_node($parse_options->{Source}{Node});
return $self->{DocumentHandler}->end_document( { } );
}
else {
$self->parse_node($parse_options->{Source}{Node});
}
# clean up parser instance
delete $self->{ParseOptions};
delete $self->{DocumentHandler};
}
sub parse_node {
my $self = shift;
my $node = shift;
# warn "parse_node $node\n";
if (ref($node) eq 'element' && $node->[node_parent]) {
# bundle up attributes
my @attribs;
foreach my $attr (@{$node->[node_attribs]}) {
if ($attr->[node_prefix]) {
push @attribs, $attr->[node_prefix] . ":" . $attr->[node_key];
}
else {
push @attribs, $attr->[node_key];
}
push @attribs, $attr->[node_value];
}
$self->{DocumentHandler}->start_element(
{ Name => $node->[node_name],
Attributes => \@attribs,
}
);
foreach my $kid (@{$node->[node_children]}) {
$self->parse_node($kid);
}
$self->{DocumentHandler}->end_element(
{
Name => $node->[node_name],
}
);
}
elsif (ref($node) eq 'text') {
$self->{DocumentHandler}->characters($node->[node_text]);
}
elsif (ref($node) eq 'comment') {
$self->{DocumentHandler}->comment($node->[node_comment]);
}
elsif (ref($node) eq 'pi') {
$self->{DocumentHandler}->processing_instruction(
{
Target => $node->[node_target],
Data => $node->[node_data]
}
);
}
elsif (ref($node) eq 'element') { # root node
# just do kids
foreach my $kid (@{$node->[node_children]}) {
$self->parse_node($kid);
}
}
else {
die "Unknown node type: '", ref($node), "' ", scalar(@$node), "\n";
}
}
1;
__END__
=head1 NAME
XML::XPath::PerlSAX - A PerlSAX event generator for my wierd node structure
=head1 SYNOPSIS
use XML::XPath;
use XML::XPath::PerlSAX;
use XML::DOM::PerlSAX;
my $xp = XML::XPath->new(filename => 'test.xhtml');
my $paras = $xp->find('/html/body/p');
my $handler = XML::DOM::PerlSAX->new();
my $generator = XML::XPath::PerlSAX->new( Handler => $handler );
foreach my $node ($paras->get_nodelist) {
my $domtree = $generator->parse($node);
# do something with $domtree
}
=head1 DESCRIPTION
This module generates PerlSAX events to pass to a PerlSAX handler such
as XML::DOM::PerlSAX. It operates specifically on my wierd tree format.
Unfortunately SAX doesn't seem to cope with namespaces, so these are
lost completely. I believe SAX2 is doing namespaces.
=head1 Other
The XML::DOM::PerlSAX handler I tried was completely broken (didn't even
compile before I patched it a bit), so I don't know how correct this
is or how far it will work.
This software may only be distributed as part of the XML::XPath package.
ACC SHELL 2018