diff --git a/Makefile.PL b/Makefile.PL index 9867d89..f7587ea 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -170,6 +170,17 @@ if ( $plucene_inst ) { $Wiki::Toolkit::TestConfig::config{plucene} = undef; } +# If we have Plucene installed, we can test that without asking questions. +eval { require KinoSearch; }; +my $plucene_inst = $@ ? 0 : 1; +if ( $plucene_inst ) { + print "You have KinoSearch installed, so will test with that...\n\n"; + $Wiki::Toolkit::TestConfig::config{kinosearch} = 1; +} else { + print "KinoSearch not installed; skipping test...\n\n"; + $Wiki::Toolkit::TestConfig::config{kinosearch} = undef; +} + # Write out the config for next run. open OUT, ">lib/Wiki/Toolkit/TestConfig.pm" or die "Couldn't open lib/Wiki/Toolkit/TestConfig.pm for writing: $!"; diff --git a/lib/Wiki/Toolkit/Search/KinoSearch.pm b/lib/Wiki/Toolkit/Search/KinoSearch.pm new file mode 100644 index 0000000..11b2a2b --- /dev/null +++ b/lib/Wiki/Toolkit/Search/KinoSearch.pm @@ -0,0 +1,142 @@ +package Wiki::Toolkit::Search::KinoSearch; +use strict; +our $VERSION = '0.01'; + +use base 'Wiki::Toolkit::Search::Base'; + +use File::Spec; +use KinoSearch::InvIndexer; +use KinoSearch::Analysis::PolyAnalyzer; + +=head1 NAME + +Wiki::Toolkit::Search::KinoSearch - Use KinoSearch to search your Wiki::Toolkit wiki. + +=head1 SYNOPSIS + + my $search = Wiki::Toolkit::Search::KinoSearch->new( path => "/var/KinoSearch/wiki" ); + my %wombat_nodes = $search->search_nodes("wombat"); + +Provides search-related methods for L. + +=cut + +=head1 METHODS + +=over 4 + +=item B + + my $search = Wiki::Toolkit::Search::KinoSearch->new( path => "/var/KinoSearch/wiki" ); + +Takes only one parameter, which is mandatory. C must be a directory +for storing the indexed data. It should exist and be writeable. + +=cut + +sub _init { + my ( $self, %args ) = @_; + $self->{_dir} = $args{path}; + return $self; +} + +sub _dir { shift->{_dir} } + +sub _analyzer { + KinoSearch::Analysis::PolyAnalyzer->new( language => 'en', ); +} + +sub _indexer { + my ($self) = @_; + my $indexer = KinoSearch::InvIndexer->new( + analyzer => $self->_analyzer, + invindex => $self->_dir, + create => 1, + ); + $indexer->spec_field( name => 'title' ); + $indexer->spec_field( + name => 'body_text', + vectorized => 1, + ); + return $indexer; +} + +sub index_node { + my ( $self, $node, $content ) = @_; + my $indexer = $self->_indexer; + my $doc = $indexer->new_doc; + $doc->set_value( title => $node ); + $doc->set_value( body_text => $content ); + $indexer->add_doc($doc); + $indexer->finish( optimize => $self->optimize ); +} + +sub _searcher { + my ($self) = @_; + KinoSearch::Searcher->new( + invindex => $self->_dir, + analyzer => $self->_analyzer, + ); +} + +sub _search_nodes { + my ( $self, $query ) = @_; + $self->_searcher->search($query); +} + +sub search_nodes { + my ( $self, @args ) = @_; + my $hits = $self->_search_nodes(@args); + my $results = {}; + while ( $hit = $hits->fetch_hit_hashref ) { + $results->{ $hit->{title} } = $hit->{score}; + } + return %$results; +} + +# sub _fuzzy_match { +# my ( $self, $string, $canonical ) = @_; +# return +# map { $_ => ( $_ eq $string ? 2 : 1 ) } +# $self->_search_nodes("fuzzy:$canonical"); +# } + +# sub indexed { +# my ( $self, $id ) = @_; +# my $term = Plucene::Index::Term->new( { field => 'id', text => $id } ); +# return $self->_reader->doc_freq($term); +# } + +sub optimize { 1 } + +sub delete_node { + my ( $self, $id ) = @_; + my $term = KinoSearch::Index::Term->new( title => $id ); + my $indexer = $self->_indexer; + $indexer->delete_docs_by_term($term); + $indexer->finish( optimize => $self->optimize ); +} + +sub supports_phrase_searches { return 0; } +sub supports_fuzzy_searches { return 0; } + +1; +__END__ + +=back + +=head1 TODO + +=over 4 + +=item Phrase Searching +=item Fuzzy Matching + +=back + +=head1 SEE ALSO + +L, L, L. + +=cut + diff --git a/lib/Wiki/Toolkit/TestLib.pm b/lib/Wiki/Toolkit/TestLib.pm index aa3b71d..e1b437f 100644 --- a/lib/Wiki/Toolkit/TestLib.pm +++ b/lib/Wiki/Toolkit/TestLib.pm @@ -145,6 +145,12 @@ if ( $configured{plucene} ) { $plucene_path = "t/plucene"; } +my $kino_path; +# Test with Plucene if possible. +if ( $configured{kinosearch} ) { + $kino_path = "t/kinosearch"; +} + # @wiki_info describes which searches work with which stores. # Database-specific searchers. @@ -169,6 +175,10 @@ foreach my $dbtype ( qw( MySQL Pg SQLite ) ) { if ( $datastore_info{$dbtype} and $plucene_path ); push @wiki_info, { datastore_info => $datastore_info{$dbtype} } if $datastore_info{$dbtype}; + + push @wiki_info, { datastore_info => $datastore_info{$dbtype}, + plucene_path => $kino_path } + if ( $datastore_info{$dbtype} and $kino_path ); } =head1 METHODS