Skip to content

Commit

Permalink
renamed tRepp.* to repp.* by request
Browse files Browse the repository at this point in the history
settings overlay working
proper logging and error handling


git-svn-id: https://pet.opendfki.de/repos/pet/branches/repp@784 4200e16c-5112-0410-ac55-d7fb557a720a
  • Loading branch information
rebecca.dridan committed Aug 3, 2011
1 parent cb7c3de commit e612d4c
Show file tree
Hide file tree
Showing 8 changed files with 212 additions and 248 deletions.
2 changes: 1 addition & 1 deletion cheap/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ cheaplibsources = \
vpm.h vpm.cpp \
pcfg.h pcfg.cpp \
yy-tokenizer.cpp yy-tokenizer.h \
tRepp.cpp tRepp.h \
repp.cpp repp.h \
$(top_srcdir)/common/bitcode.cpp \
$(top_srcdir)/common/chunk-alloc.cpp \
$(top_srcdir)/common/configs.cpp \
Expand Down
40 changes: 13 additions & 27 deletions cheap/cheap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "morph.h"
#include "yy-tokenizer.h"
#ifdef HAVE_BOOST_REGEX_ICU_HPP
#include "tRepp.h"
#include "repp.h"
#endif
#include "lingo-tokenizer.h"
#ifdef HAVE_XML
Expand Down Expand Up @@ -555,26 +555,8 @@ bool load_grammar(string initial_name) {

case TOKENIZER_REPP:
#ifdef HAVE_BOOST_REGEX_ICU_HPP
{
string path;
//have conf file, use to overlay options (not implemented yet) TODO
string repp_opt = get_opt_string("opt_repp");
if (!repp_opt.empty()) {
string reppfilename(find_file(repp_opt, SET_EXT, grammar_file_name));
if (reppfilename.empty()) {
cerr << "Couldn't find REPP conf file \""
<< dir_name(grammar_file_name) << repp_opt << "{" << SET_EXT
<< "}\"." << endl;
exit(1);
}
path = dir_name(reppfilename);
}
else
path = dir_name(grammar_file_name)+"rpp/";
//path should be a setting, but for now, set here TODO
tok = new tReppTokenizer(path);
}
break;
tok = new tReppTokenizer(get_opt_string("opt_repp"), grammar_file_name);
break;
#else
LOG(logAppl, FATAL,
"No Unicode-aware regexp support compiled into this cheap.");
Expand Down Expand Up @@ -919,11 +901,15 @@ void process(const char *grammar_file_name) {
// will not return if a server was started
eventually_start_server(grammar_file_name);

load_grammar(grammar_file_name);
if(get_opt_bool("opt_interactive_morph"))
interactive_morphology();
else
interactive();
//why do we never check if grammar loaded? adding check here- rd, 3/8/11
if (!load_grammar(grammar_file_name)) {
if(get_opt_bool("opt_interactive_morph"))
interactive_morphology();
else
interactive();
} else {
throw tError("Couldn't successfully load grammar, exiting.");
}
}
}

Expand All @@ -943,7 +929,7 @@ int main(int argc, char* argv[]) {
process(grammar_file_name);
}
catch (tError err) {
cerr << err.getMessage();
cerr << err.getMessage() << endl;
exit(1);
}

Expand Down
2 changes: 2 additions & 0 deletions cheap/input-modules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ static bool init(std::string tokenizer_names[]) {
if (TOKENIZER_FSR == i)
continue;
#endif
if (TOKENIZER_REPP == i)
continue; //don't want it as a -tok command line option
doc += "|"; doc += tokenizer_names[i];
}
doc += "' --- select input method (default `" + tokenizer_names[def] + "')";
Expand Down
135 changes: 56 additions & 79 deletions cheap/tRepp.cpp → cheap/repp.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* vim: set expandtab:ts=2:sw=2 */
/* ex: set expandtab ts=2 sw=2: */
/* PET
* Platform for Experimentation with efficient HPSG processing Techniques
*
Expand All @@ -16,7 +16,7 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "tRepp.h"
#include "repp.h"
#include <stack>
#include <fstream>
#include <sstream>
Expand All @@ -26,63 +26,64 @@
#include "settings.h"
#include "utility.h"
#include "cheap.h"
#include "logging.h"

using namespace std;

tReppTokenizer::tReppTokenizer(const string conf, bool verbose)
:_verbose(verbose)
tReppTokenizer::tReppTokenizer(const string conf, const string
&grammar_file_name) :_repp_settings(NULL)
{
// _path = dir_name(conf);
// path should be a setting, but for now, set like this TODO
_path = conf;
settings *repp_settings = cheap_settings;
//assume settings read in to cheap_settings, possibly with overlay
// ifstream reppfile(conf.c_str());
// string reppset, line;
// if (reppfile.is_open()) { // read conf file
// getline(reppfile, line);
// while (!reppfile.eof()) {
// reppset += line;
// getline(reppfile, line);
// }
// repp_settings = new settings(reppset); // parse conf file
// } else {
// cerr << "repp conf file \"" << conf << "\" couldn't be opened." << endl;
// exit(1);
// }
if (!conf.empty()) { //update cheap settings using conf file
string conffname(find_set_file(conf, SET_EXT, grammar_file_name));
if (conffname.empty()) {
throw tError("REPP settings file `" + conf + "' not found.");
}
string reppset, line;
ifstream conffile(conffname.c_str());
if (conffile.is_open()) {
while (getline(conffile, line))
reppset += line;
} else {
throw tError("Couldn't open REPP settings file `" + conf + "'.");
}
_repp_settings = new settings(reppset);
cheap_settings->install(_repp_settings);
}

struct setting *rset;
_path = dir_name(grammar_file_name);
if ((rset = cheap_settings->lookup("repp-dir")) != NULL) {
_path += rset->values[0];
if (_path.at(_path.length()-1) != PATH_SEP[0])
_path += PATH_SEP;
}
// find modules from setting repp-modules and read the files
struct setting *foo;
if ((foo = repp_settings->lookup("repp-modules")) == NULL) return;
for (int i = 0; i < foo->n; ++i) {
if (_verbose) cerr << "initiating repp " << foo->values[i] << endl;
_repps[foo->values[i]] = new tRepp(foo->values[i], this, _verbose);
}
// assign other settings to variables so they can be overwritten by tsdb++
if((foo = repp_settings->lookup("repp-tokenizer")) == NULL) {
cerr << "No repp main module set. Check repp-tokenizer setting." << endl;
exit(1);
} else {
_maintokenizer = foo->values[0];
}
// if repp-calls is not set, just don't use any conditional includes
// or should we warn? or set a default?
if ((foo = repp_settings->lookup("repp-calls")) != NULL) {
for (int i = 0; i < foo->n; ++i) {
_calls.insert(set<string>::value_type(foo->values[i]));
}
if ((rset = cheap_settings->lookup("repp-modules")) == NULL) {
throw tError("No repp modules defined. Check repp-modules setting.");
}
for (int i = 0; i < rset->n; ++i) {
LOG(logRepp, INFO, "initiating repp " << rset->values[i]);
_repps[rset->values[i]] = new tRepp(rset->values[i], this);
}
// check mandatory settings exist
if((rset = cheap_settings->lookup("repp-tokenizer")) == NULL)
throw tError("No repp main module set. Check repp-tokenizer setting.");
}

tReppTokenizer::~tReppTokenizer()
{
for (map<string, tRepp *>::iterator iter = _repps.begin();
iter != _repps.end(); ++iter)
delete iter->second;

// uninstall hangs, memory should be freed anyway?
// if (cheap_settings != NULL && _repp_settings != NULL) {
// cheap_settings->uninstall(_repp_settings);
// delete _repp_settings;
// }
}

tRepp::tRepp(string name, tReppTokenizer *parent, bool verbose)
:_id(name), _parent(parent)
tRepp::tRepp(string name, tReppTokenizer *parent) :_id(name), _parent(parent)
{
tRegex emptyre = boost::make_u32regex("^$");
tRegex commentre = boost::make_u32regex("^;.*$");
Expand Down Expand Up @@ -139,8 +140,8 @@ tRepp::tRepp(string name, tReppTokenizer *parent, bool verbose)
}
else if (boost::u32regex_match(line, res, groupendre)) {
if (in_group.empty()) {
cerr << _id << ":" << line_no << " spurious group close"
<< endl;
LOG(logRepp, WARN, "REPP:" << _id << ":" << line_no
<< " spurious group close.");
}
else
in_group.pop();
Expand All @@ -155,19 +156,17 @@ tRepp::tRepp(string name, tReppTokenizer *parent, bool verbose)
rule_count++;
}
else {
cerr << _id << ":" << line_no << " invalid line: "
<< line << endl;
LOG(logRepp, WARN, "REPP:" << _id << ":" << line_no
<< " invalid line: " << line);
}
}
getline(mainf, line);
}
if (verbose)
cerr << "Read " << _id << " [" << rule_count << " rules]" << endl;
LOG(logRepp, INFO, "Read " << _id << " [" << rule_count << " rules]");
}
else {
cerr << "Couldn't find REPP module "<< name
<< ". Check repp-modules setting." << endl;
exit(1);
throw tError("Couldn't find REPP module `" + name +
"'. Check repp-modules setting.");
}
}

Expand Down Expand Up @@ -200,14 +199,13 @@ void tReppTokenizer::tokenize(myString item, inp_list &result)
_startmap.push_back(smap);
_endmap.push_back(emap);

tRepp *repp = getRepp(_maintokenizer);
tRepp *repp = getRepp(cheap_settings->lookup("repp-tokenizer")->values[0]);

//apply all rules
for (vector<tReppRule *>::iterator iter = repp->rules().begin();
iter != repp->rules().end(); ++iter) {
if ((*iter)->get_type() == ">I") { //conditional include
string name = (*iter)->name();
if (_calls.count(name) == 0)
if (! cheap_settings->member("repp-calls", (*iter)->name().c_str()))
continue; // don't run this optional module
}
rest = (*iter)->apply(repp, rest);
Expand Down Expand Up @@ -241,10 +239,8 @@ void tReppTokenizer::tokenize(myString item, inp_list &result)
stringstream tokid;
tokid << result.size()+42; //to match YY initialization

if (_verbose) {
cerr << "creating item from " << surface << " <"
<< tokstart-1 << ":" << tokend << ">" << endl;
}
LOG(logRepp, DEBUG, "creating item from " << surface << " <"
<< tokstart-1 << ":" << tokend << ">");
tok = new tInputItem(tokid.str().c_str(), start, end,
tokstart-1, tokend, surface, surface);
result.push_back(tok);
Expand Down Expand Up @@ -288,25 +284,6 @@ void tReppTokenizer::tokenize(myString item, inp_list &result)
_endmap.clear();
}

// update function for tsdb++ updating?
void tReppTokenizer::updateReppTokenizer(string &input)
{
if(!input.empty()) {
settings updated_settings(input);

setting *newset = updated_settings.lookup("repp-tokenizer");
if (newset != NULL)
_maintokenizer = newset->values[0];
newset = updated_settings.lookup("repp-calls");
if (newset != NULL) {
_calls.clear();
for (int i = 0; i < newset->n; ++i) {
_calls.insert(set<string>::value_type(newset->values[i]));
}
}
}
}

// standard string replacement rule
tReppFSRule::tReppFSRule(string type, const char *target, const char *format)
: tReppRule(type), _targetstr(target)
Expand Down Expand Up @@ -354,7 +331,7 @@ tReppFSRule::tReppFSRule(string type, const char *target, const char *format)
break; //out of order group
} else {
if (x+1 == flen)
cerr << "unescaped backslash in " << _format << endl;
LOG(logRepp, WARN, "REPP:" << "unescaped backslash in " << _format);
}
}
}
Expand Down
Loading

0 comments on commit e612d4c

Please sign in to comment.