Skip to content

Commit

Permalink
main branch superseded with cm
Browse files Browse the repository at this point in the history
git-svn-id: https://pet.opendfki.de/repos/pet/main@743 4200e16c-5112-0410-ac55-d7fb557a720a
  • Loading branch information
beki01 committed Aug 3, 2010
1 parent 4ff9f9b commit 305c074
Show file tree
Hide file tree
Showing 95 changed files with 10,737 additions and 965 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
specific to chart-mapping branch:
- activate all rules after lex-exhaustive parsing (not just syntactic rules)
for compatibility with LKB
- regular expression literals introduced with slashes in TDL
- assert that fss in unify(), unify_restrict() and unify_np() are valid
- added preliminary XML-RPC server mode
- added chart mapping engine:
* chart mapping rules are resource-sensitive rewrite rules
* chart mapping rules operate on a generalized chart
* two phases: token mapping and lexical filtering
- new generic instantiation mode `default-les=all': try to instantiate
all available generics for all available input items. selection should
then be accomplished by constraints on the input fs
- new tokenizer for the XML-based feature structure chart format (FSC)
- input fs can optionally be unified into a lexical item's fs
- input items can now be described by input feature structures
- case-sensitive string and YY tokenizer
- memory limit is respected during unpacking
- added unit tests for types

v0.99.??
- added the ParseNodes class to be able to produce nice small parse trees as
in the online demo, and an item printer to test it
Expand Down
6 changes: 3 additions & 3 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ cheapdoc:
$(DOXYGEN) doxyconfig.cheap

logon:
strip flop/flop cheap/cheap
/bin/cp flop/flop $$LOGONROOT/uio/bin/linux.x86.32/flop
/bin/cp cheap/cheap $$LOGONROOT/uio/bin/linux.x86.32/cheap
strip flop/flop cheap/cheap
/bin/cp flop/flop $$LOGONROOT/uio/bin/linux.x86.32/tflop
/bin/cp cheap/cheap $$LOGONROOT/uio/bin/linux.x86.32/tcheap

endif

9 changes: 8 additions & 1 deletion README
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,13 @@ privileges. Note that the header files are often distributed in separate
packages, usually ending in `-dev' or `-devel' (e.g. `libboost-dev'). Make
sure that these packages are installed, too.

If you install your own libraries in non-standard directories, make sure
that all these shared libraries and their dependencies can be found when
PET's configure is run. There are several ways to achieve this (e.g., using
ldconfig or the LD_LIBRARY_PATH environment variable, cf. the Program Library
HOWTO). The dependencies of a program or library can be inspected with ldd.
http://tldp.org/HOWTO/Program-Library-HOWTO/.

Required:

Boost provides free portable C++ libraries for various applications.
Expand All @@ -120,7 +127,7 @@ http://www.boost.org/

Optional:

If you want UniCode support in PET, you need the icu package freely
If you want UniCode support in PET, you need the ICU package freely
available from IBM.
Current GNU/Linux distributions usually provide this library as a pre-built
package. Make sure that the header files are installed, too (see above).
Expand Down
7 changes: 6 additions & 1 deletion TODO
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
+- make << operator for tItemPrinters to allow modifier stream like usage
- use logging framework for chart-mapping

- make << operator for tItemPrinters to allow modifier stream like usage
- check if making the stream an argument of the print function decreases
performance, if not, remove the global pointer to the stream used and
replace it with a method argument
Expand Down Expand Up @@ -79,6 +81,9 @@
used for unification and mark it with a special value. After some
thinking, i would say that this might not be feasible.

- extension of TDL syntax to type extension and comments (ticket #1 by Emily
Bender)

- remove all uses of negative values as `marker'-values, especially where casts
from integers to pointers and back are involved.

Expand Down
47 changes: 28 additions & 19 deletions cheap/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ bin_PROGRAMS = cheap
#bin_PROGRAMS += mtest

cheap_SOURCES = cheap.cpp cheap.h
cheap_LDADD = libcheap.la
cheap_LDADD = libcheap.a

#dumpgram_SOURCES = dumpgram.cpp \
# $(top_srcdir)/common/errors.h \
Expand All @@ -47,18 +47,20 @@ cheap_LDADD = libcheap.la

# Convenience library (makes building several executables easier and is
# a tiny step towards a general pet library):
noinst_LTLIBRARIES = libcheap.la
noinst_LIBRARIES = libcheap.a

# The include-file dependencies are automatically computed by Automake
# (`Automatic dependency tracking'). Thus, the headers do not need to be listed
# for compilation, but they need to be found for making a distribution.
# Note that the headers in $(top_srcdir)/common are always included
# (cf. $(top_srcdir)/common/Makefile.am) and are therefore not listed here.
libcheap_la_SOURCES = \
libcheap_a_SOURCES = \
agenda.h \
chart.cpp chart.h \
chart-mapping.cpp chart-mapping.h \
cheaptimer.h \
fs.cpp fs.h \
fs-chart.cpp fs-chart.h \
grammar.cpp grammar.h \
input-modules.cpp input-modules.h \
item-printer.cpp item-printer.h \
Expand All @@ -79,7 +81,9 @@ libcheap_la_SOURCES = \
task.cpp task.h \
tsdb++.h tsdb++.cpp \
mrs.h mrs.cpp \
mrs-printer.h mrs-printer.cpp \
vpm.h vpm.cpp \
pcfg.h pcfg.cpp \
yy-tokenizer.cpp yy-tokenizer.h \
$(top_srcdir)/common/bitcode.cpp \
$(top_srcdir)/common/chunk-alloc.cpp \
Expand All @@ -102,21 +106,21 @@ libcheap_la_SOURCES = \

if TOMABECHI_UNIFIER
DEFS += -DDAG_TOMABECHI
libcheap_la_SOURCES += dag-tomabechi.cpp dag-tomabechi.h failure.cpp failure.h \
libcheap_a_SOURCES += dag-tomabechi.cpp dag-tomabechi.h failure.cpp failure.h \
qc.h qc.cpp
endif
if SIMPLE_UNIFIER
DEFS += -DDAG_SIMPLE -DWROBLEWSKI2
libcheap_la_SOURCES += $(top_srcdir)/common/dag-simple.cpp
libcheap_a_SOURCES += $(top_srcdir)/common/dag-simple.cpp
endif

if ECL
libcheap_la_SOURCES += petecl.c petecl.h
libcheap_a_SOURCES += petecl.c petecl.h
endif
if ECLPREPROC
if ICU
libcheap_la_SOURCES += eclpreprocessor.h eclpreprocessor.cpp #fspp.h fspp.cpp
cheap_LDADD += ../fspp/libfspp.la -lpreprocessor #fix_me
libcheap_a_SOURCES += eclpreprocessor.h eclpreprocessor.cpp #fspp.h fspp.cpp
cheap_LDADD += ../fspp/libfspp.a -lpreprocessor ../fspp/libfspp.a #fix_me
LDFLAGS += -L../fspp
CPPFLAGS += -I../fspp
else
Expand All @@ -125,16 +129,16 @@ else
endif
endif
if ECLMRS
libcheap_la_SOURCES += cppbridge.cpp cppbridge.h petmrs.h petmrs.c
libcheap_a_SOURCES += cppbridge.cpp cppbridge.h petmrs.h petmrs.c
cheap_LDADD += libmrs.a
endif

# Add ecl libraries to be deleted on "make clean" ...
# Eric Nichols <[email protected]>, Jun. 18, 2005
MOSTLYCLEANFILES = libmrs.a

../fspp/libfspp.la:
make -C ../fspp libfspp.la
../fspp/libfspp.a:
make -C ../fspp libfspp.a

libmrs.a:
( \
Expand All @@ -147,34 +151,39 @@ libmrs.a:
) | $(ECL)

if XML
libcheap_la_SOURCES += pic-handler.cpp pic-handler.h pic-states.h \
libcheap_a_SOURCES += pic-handler.cpp pic-handler.h pic-states.h \
pic-tokenizer.h pic-tokenizer.cpp \
xmlparser.cpp xmlparser.h \
smaf-tokenizer.h smaf-tokenizer.cpp
smaf-tokenizer.h smaf-tokenizer.cpp \
fsc-tokenizer.cpp fsc-tokenizer.h
endif

if ICU
libcheap_la_SOURCES += unicode.h unicode.cpp
libcheap_a_SOURCES += unicode.h unicode.cpp
endif

if XMLRPC_C
libcheap_a_SOURCES += server-xmlrpc.cpp server-xmlrpc.h
endif

# if LEXDB
# libcheap_la_SOURCES += psqllex.h psqllex.cpp
# libcheap_a_SOURCES += psqllex.h psqllex.cpp
# endif

if YY
libcheap_la_SOURCES += yy.h yy.cpp
libcheap_a_SOURCES += yy.h yy.cpp
endif

#if EXTDICT
#libcheap_la_SOURCES += extdict.cpp extdict.h
#libcheap_a_SOURCES += extdict.cpp extdict.h
#endif

#if RCU
# QC_PATH_COMP could be done robustly, ie with weights. The aim would be
# to find the paths that most quickly find out the biggest losses.
#DEFS += -DROBUST
#libcheap_la_SOURCES += rcu-types.h rcu-types.cpp rcu-dag-node.h
#nodist_libcheap_la_SOURCES += dag-chunk-alloc.h dag-chunk-alloc.cpp \
#libcheap_a_SOURCES += rcu-types.h rcu-types.cpp rcu-dag-node.h
#nodist_libcheap_a_SOURCES += dag-chunk-alloc.h dag-chunk-alloc.cpp \
# type-chunk-alloc.cpp type-chunk-alloc.h \
# type-alloc.h type-alloc.cpp
# Not done for fair comparison
Expand Down
133 changes: 120 additions & 13 deletions cheap/agenda.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,139 @@
#include <queue>
#include <vector>

#include "item.h"
#include "options.h"
#include "task.h"


/** agenda: a priority queue adapter */
template <typename T, typename LESS_THAN > class agenda {
template <typename T, typename LESS_THAN > class abstract_agenda {
public:

agenda() : _A() {}
virtual ~abstract_agenda() {}

virtual void push(T *t) = 0;
virtual T * top() = 0;
virtual T * pop() = 0; // Responsibility to delete the task is for the caller.
virtual bool empty() = 0;
virtual void feedback (T *t, tItem *result) = 0;

};


template <typename T, typename LESS_THAN > class exhaustive_agenda : public abstract_agenda<T, LESS_THAN > {
public :

~agenda() { while(!this->empty()) delete this->pop(); }
exhaustive_agenda() : _A() {}
~exhaustive_agenda() { while(!this->empty()) delete this->pop(); }

/** Push \a t onto agenda */
void push(T *t) { _A.push(t); }

/** Return the topmost (best) element from the agenda */
T * top() { return _A.top(); }
T * top() { return _A.top(); }
T * pop() { T *t = top(); _A.pop(); return t; }
bool empty() { return _A.empty(); }
void feedback (T *t, tItem *result) {}

private:

std::priority_queue<T *, std::vector<T *>, LESS_THAN> _A;
};



/** Remove the topmost element from the agenda and return it */
T * pop() { T *t = top(); _A.pop(); return t; }

/** Test if agenda is empty */
bool empty() { return _A.empty(); }

/** Return the number of tasks in the agenda */
int size() { return _A.size();}
/*
* LOCAL CAP AGENDA
*/

template <typename T, typename LESS_THAN > class local_cap_agenda : public abstract_agenda<T, LESS_THAN > {
/* This class provides functionality to define a per-cell cap on the number of tasks to be executed. */

public :

local_cap_agenda(int cell_size, int max_pos) : _A(), _popped((max_pos+1)*(max_pos+1)), _max_pos(max_pos), _cell_size(cell_size),
_exec((max_pos+1)*(max_pos+1)), _succ((max_pos+1)*(max_pos+1)), _pass((max_pos+1)*(max_pos+1)) {
}
~local_cap_agenda();

void push(T *t) {
_A.push(t);
}
T * top();
T * pop();
bool empty() { return top() == NULL; }
void feedback (T *t, tItem *result);

private:

std::priority_queue<T *, std::vector<T *>, LESS_THAN> _A;
std::vector<int> _popped;
int _max_pos;
int _cell_size;

std::vector<int> _exec;
std::vector<int> _succ;
std::vector<int> _pass;
};


/*
* (implementation here, due to use of templates)
*/

template <typename T, class LESS_THAN>
local_cap_agenda<T, LESS_THAN>::~local_cap_agenda() {
while (!_A.empty()) {
T* t = _A.top();
delete t;
_A.pop();
}
}

template <typename T, class LESS_THAN>
T * local_cap_agenda<T, LESS_THAN>::top() {
T* t;
bool found = false;
while (!found) {
if (!_A.empty()) {
t = _A.top();
if (t->phrasal() && _popped[t->start()*(_max_pos+1) + t->end()] >= _cell_size) {
// This span reached the limit, so continue searching for a new task.
// Inflectional and lexical rules are always carried out.
delete t;
_A.pop();
} else {
found = true;
}
} else {
t = NULL;
break;
}
}
return t;
}

template <typename T, class LESS_THAN>
T * local_cap_agenda<T, LESS_THAN>::pop() {
T *t = top();
if (t != NULL) {
_A.pop();
}
return t;
}

template <typename T, class LESS_THAN>
void local_cap_agenda<T, LESS_THAN>::feedback (T *t, tItem *result) {
if (t->phrasal()) {
if (get_opt_int("opt_chart_pruning_strategy") == 0) {
_popped[t->start()*(_max_pos+1) + t->end()]++;
} else if (get_opt_int("opt_chart_pruning_strategy") == 1 && (result != 0)) {
_popped[t->start()*(_max_pos+1) + t->end()]++;
} else if (get_opt_int("opt_chart_pruning_strategy") == 2 && (result != 0) && t->yields_passive()) {
_popped[t->start()*(_max_pos+1) + t->end()]++;
}
}
}


#endif
Loading

0 comments on commit 305c074

Please sign in to comment.