DUC

Extract

Included libraries Package variables Description General documentation Methods

Package variables top
No package variables defined.
Included modulestop
XML::TreeBuilder
strict
Inherit top
Exporter
Synopsistop
No synopsis!
Descriptiontop
This class opens extracts, both from files and from strings, 
so that the user will be able to get extract information quickly
and easily.

MEAD::Extract should do much the same stuff as this class.
Methodstop
_really_open_meNo descriptionCode
get_DID_for_sentenceNo descriptionCode
get_DOCID_for_sentenceNo descriptionCode
get_SNO_for_sentenceNo descriptionCode
get_WCNT_for_sentenceNo descriptionCode
get_num_sentencesNo descriptionCode
get_textNo descriptionCode
open_from_fileNo descriptionCode
parse_from_stringNo descriptionCode

Methods description


Methods code

_really_open_medescriptiontopprevnext
sub _really_open_me {
    my ($self, $extract_string) = @_;

    $self->{extract_string} = $extract_string;

    my $extract_tree = XML::TreeBuilder->new;
    $extract_tree->parse($extract_string);
    my $extract_node = $extract_tree->find_by_tag_name("multi-e");

    my $node;
    my $i = 0;
    my @nodes = $extract_node->look_down("_tag", "s");
    foreach my $node (@nodes) {
	$i++;

	## document ID's can be called either of these names...
my $DID = $node->attr("docref"); unless ($DID) { $DID = $node->attr("docid"); } my $SNO = $node->attr("num"); my $WCNT = $node->attr("wdcount"); ## TODO: AJW 8/28
## get the wordcount somehow, possibly splitting the words...
##die unless ($DID and $SNO and $WCNT);
unless ($DID && $SNO) { die "Couldn't find (at least one of) 'docid' or 'num' " . "in the $i-th sentence node"; } $self->{DID_list}[$i] = $DID; $self->{SNO_list}[$i] = $SNO; $self->{WCNT_list}[$i] = $WCNT; } ##cleanup.
$extract_tree->delete;
}
get_DID_for_sentencedescriptiontopprevnext
sub get_DID_for_sentence {
    my ($self, $sentence) = @_;
    return $self->get_DOCID_for_sentence($sentence);
}
get_DOCID_for_sentencedescriptiontopprevnext
sub get_DOCID_for_sentence {
    my ($self, $sentence) = @_;
    return $self->{DID_list}[$sentence];
}
get_SNO_for_sentencedescriptiontopprevnext
sub get_SNO_for_sentence {
    my ($self, $sentence) = @_;
    return $self->{SNO_list}[$sentence];
}
get_WCNT_for_sentencedescriptiontopprevnext
sub get_WCNT_for_sentence {
    my ($self, $sentence) = @_;
    return $self->{WCNT_list}[$sentence];
}
get_num_sentencesdescriptiontopprevnext
sub get_num_sentences {
    my $self = shift;
    return scalar(@{$self->{DID_list}}) - 1;
}
get_textdescriptiontopprevnext
sub get_text {
    my $self = shift;
    unless ($self->{text}) {
    $self->{text} = $self->{extract_string};
    $self->{text} =~ s/\<.+?\>//g;
    $self->{text} =~ s/\n+/  /g;
    }
    return $self->{text};
}
open_from_filedescriptiontopprevnext
sub open_from_file {
    my ($class, $filename) = @_;
    my $self = {};

    bless $self, $class;

    open EXTRACT, $filename;
    
    my $extract_string;
    while (<EXTRACT>) {
	$extract_string .= $_;
    }
    close EXTRACT;

    $self->_really_open_me($extract_string);
    
    return $self;
}
parse_from_stringdescriptiontopprevnext
sub parse_from_string {
    my ($class, $extract_string) = @_;
    my $self = {};
    
    bless $self, $class;

    $self->_really_open_me($extract_string);

    return $self;
}

General documentation

No general documentation available.