-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- dump chart in jxchg format even if an error occured, e.g., exhaustion of passive edges (only if enabled) - deleted inactive code in grammar.cpp (punctuationp, translate_iso_chars) - fixed wrong is_stem_token predicate in item.h - fixed wrong string decoding in XML mode - XML tokenizing works from stream as well as from file - flop returns different exit codes depending on the error v0.99.4 - bugfixes and additions for XML input mode: missing infl tag, translate_iso, state names are static members of the state classes, state factory now creates new states, exception are correctly thrown and catched and are decorated with the error location ; read XML from stdin directly rather than from file. - the isomorphix translation has been put completely into the tokenizers - temporarily removed the error "Duplicate failure path"; it is triggered when the german grammar with new restrictors is used to compute unification quickcheck paths (during computation of the rule filter) - chart::shortest_path is now a template function that gets an additional weight function object as argument v0.99.3 - restrictor can specify paths mixed with features (which are paths of length one anyway) git-svn-id: https://pet.opendfki.de/repos/pet/main@227 4200e16c-5112-0410-ac55-d7fb557a720a
- Loading branch information
kiefer
committed
Dec 20, 2004
1 parent
aa49c2e
commit 1729aa2
Showing
32 changed files
with
1,109 additions
and
608 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -164,76 +164,7 @@ void chart::get_statistics() | |
stats.fssize = (stats.pedges > 0) ? totalsize / stats.pedges : 0; | ||
} | ||
|
||
// | ||
// shortest path algorithm for chart | ||
// Bernd Kiefer ([email protected]) | ||
// | ||
|
||
// _fix_me_ this should be a method of the item, not a function | ||
int weight(tItem * i) { | ||
// return a weight close to infinity if this is not a phrasal item | ||
// thus, we guarantee that phrasal items are drastically preferred | ||
if (dynamic_cast<tPhrasalItem *>(i) != NULL) return 1; | ||
else if (dynamic_cast<tLexItem *>(i) != NULL) return 1000; | ||
else return 1000000; | ||
} | ||
|
||
void chart::shortest_path (list <tItem *> &result, bool all) { | ||
|
||
vector<tItem *>::size_type size = _Cp_start.size() ; | ||
vector<tItem *>::size_type u, v ; | ||
|
||
vector < list < unsigned int > > pred(size) ; | ||
|
||
unsigned int *distance = new unsigned int[size + 1] ; | ||
unsigned int new_dist ; | ||
|
||
list <tItem *>::iterator curr ; | ||
tItem *passive ; | ||
|
||
// compute the minimal distance and minimal distance predecessor nodes for | ||
// each node | ||
for (u = 1 ; u <= size ; u++) { distance[u] = UINT_MAX ; } | ||
distance[0] = 0 ; | ||
|
||
for (u = 0 ; u < size ; u++) { | ||
/* this is topologically sorted order */ | ||
for (curr = _Cp_start[u].begin() ; curr != _Cp_start[u].end() ; curr++) { | ||
passive = *curr ; | ||
v = passive->end() ; new_dist = distance[u] + weight(passive) ; | ||
if (distance[v] >= new_dist) { | ||
if (distance[v] > new_dist) { | ||
distance[v] = new_dist ; | ||
pred[v].clear() ; | ||
} | ||
pred[v].push_front(u) ; | ||
} | ||
} | ||
} | ||
|
||
/** Extract all best paths */ | ||
queue < unsigned int > current ; | ||
bool *unseen = new bool[size + 1] ; | ||
for (u = 0 ; u <= size ; u++) unseen[u] = true ; | ||
|
||
current.push(size - 1) ; | ||
while (! current.empty()) { | ||
u = current.front() ; current.pop() ; | ||
for (curr = _Cp_end[u].begin() ; curr != _Cp_end[u].end() ; curr++) { | ||
passive = *curr ; | ||
v = passive->start() ; | ||
if ((find (pred[u].begin(), pred[u].end(), v) != pred[u].end()) | ||
&& ((int) (distance[u] - distance[v])) == weight(passive)) { | ||
result.push_front(passive) ; | ||
if (unseen[v]) { current.push(v) ; unseen[v] = false ; } | ||
if (! all) break; // only extract one path | ||
} | ||
} | ||
} | ||
|
||
delete[] distance; | ||
delete[] unseen; | ||
} | ||
|
||
/** Return \c true if the chart is connected using only edges considered \a | ||
* valid, i.e., there is a path from the first to the last node. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,11 +71,23 @@ class chart | |
/** Return the readings found during parsing */ | ||
vector<tItem *> &readings() { return _readings; } | ||
|
||
/** If the parse was not successful, this function computes a shortest path | ||
* through the chart based on some heuristic built into the tItem score() | ||
* function to get the best partial results. | ||
/** Compute a shortest path through the chart based on the \a weight_fn | ||
* function that returns a weight for every chart item. | ||
* | ||
* This function can for example be used to get the best partial results or | ||
* to extract an input item sequence. \c weight_t is the numeric type | ||
* returned by \a weight_fn, i.e., weight_fn has to be of type | ||
* \code unary_function< tItem *, weight_t > | ||
* \param result a list of items constituting the minimum overall weight path | ||
* \param weight_fn a \code unary_function< tItem *, weight_t > \endcode | ||
* determining the weight for a passive chart edge (smaller is better) | ||
* \param all if there is more than one optimal path, passing \c true will | ||
* return all items on the optimal paths, otherwise, only the items on | ||
* one of the optimal paths (the default) | ||
*/ | ||
void shortest_path(list <tItem *> &items, bool all = false); | ||
template < typename weight_t, typename weight_fn_t > | ||
void shortest_path(list <tItem *> &result, weight_fn_t weight_fn | ||
, bool all = false); | ||
|
||
/** Return \c true if the chart is connected using only edges considered \a | ||
* valid, i.e., there is a path from the first to the last node. | ||
|
@@ -372,4 +384,70 @@ class chart_iter_adj_active | |
list<tItem *>::iterator _curr; | ||
}; | ||
|
||
|
||
// | ||
// shortest path algorithm for chart | ||
// Bernd Kiefer ([email protected]) | ||
// | ||
|
||
/** Implemenation of shortest path function template */ | ||
template< typename weight_t, typename weight_fn_t > | ||
void chart::shortest_path(list <tItem *> &result, weight_fn_t weight_fn | ||
, bool all) { | ||
// unary_function< tItem *, weight_t > | ||
vector<tItem *>::size_type size = _Cp_start.size() ; | ||
vector<tItem *>::size_type u, v ; | ||
|
||
vector < list < weight_t > > pred(size) ; | ||
|
||
weight_t *distance = new weight_t[size + 1] ; | ||
weight_t new_dist ; | ||
|
||
list <tItem *>::iterator curr ; | ||
tItem *passive ; | ||
|
||
// compute the minimal distance and minimal distance predecessor nodes for | ||
// each node | ||
for (u = 1 ; u <= size ; u++) { distance[u] = UINT_MAX ; } | ||
distance[0] = 0 ; | ||
|
||
for (u = 0 ; u < size ; u++) { | ||
/* this is topologically sorted order */ | ||
for (curr = _Cp_start[u].begin() ; curr != _Cp_start[u].end() ; curr++) { | ||
passive = *curr ; | ||
v = passive->end() ; new_dist = distance[u] + weight_fn(passive) ; | ||
if (distance[v] >= new_dist) { | ||
if (distance[v] > new_dist) { | ||
distance[v] = new_dist ; | ||
pred[v].clear() ; | ||
} | ||
pred[v].push_front(u) ; | ||
} | ||
} | ||
} | ||
|
||
/** Extract all best paths */ | ||
queue < weight_t > current ; | ||
bool *unseen = new bool[size + 1] ; | ||
for (u = 0 ; u <= size ; u++) unseen[u] = true ; | ||
|
||
current.push(size - 1) ; | ||
while (! current.empty()) { | ||
u = current.front() ; current.pop() ; | ||
for (curr = _Cp_end[u].begin() ; curr != _Cp_end[u].end() ; curr++) { | ||
passive = *curr ; | ||
v = passive->start() ; | ||
if ((find (pred[u].begin(), pred[u].end(), v) != pred[u].end()) | ||
&& (distance[u] == weight_fn(passive) + distance[v])) { | ||
result.push_front(passive) ; | ||
if (unseen[v]) { current.push(v) ; unseen[v] = false ; } | ||
if (! all) break; // only extract one path | ||
} | ||
} | ||
} | ||
|
||
delete[] distance; | ||
delete[] unseen; | ||
} | ||
|
||
#endif |
Oops, something went wrong.