Skip to content

Commit e1f0606

Browse files
committed
Refactor the model to make it usable without the server
Currently, we can only effectively use the `SemanticMatch` and `EquivalenceTable` classes with the server. This creates a new and improved `SemanticMatchDictStore`, which implements the algorithm to search for matches. We then refactor the service to use this new class. Furthermore, we clean up the service to use it in a more pythonic way, eliminating the need for the `service_model` module.
1 parent ce50050 commit e1f0606

File tree

4 files changed

+307
-55
lines changed

4 files changed

+307
-55
lines changed

semantic_matcher/model.py

Lines changed: 152 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from typing import Dict, List
1+
import json
2+
import copy
3+
from typing import Dict, List, Set, Optional, Iterable
24

35
from pydantic import BaseModel
46

@@ -8,13 +10,159 @@ class SemanticMatch(BaseModel):
810
A semantic match, mapping two semanticIDs with a matching score. Can be imagined as a weighted graph with
911
`base_semantic_id` ---`score`---> `match_semantic_id`
1012
11-
Todo: Think about static and TTL, but that is optimization
12-
Todo: Maybe we want to have the matching method as debug information
13+
:cvar base_semantic_id:
14+
:cvar match_semantic_id:
15+
:cvar score: The semantic similarity score, a float between 0 and 1
16+
:cvar path: Optionally, if the `SemanticMatch` did not come from a source but is inferred by another `SemanticMatch`
17+
the `path` stores the SemanticMatches it came from
18+
:cvar meta_information: Optional meta_information, such as the source of the `SemanticMatch`
1319
"""
1420
base_semantic_id: str
1521
match_semantic_id: str
1622
score: float
17-
meta_information: Dict
23+
path: Optional[List["SemanticMatch"]] = None
24+
meta_information: Optional[Dict] = None
25+
26+
def __hash__(self):
27+
return hash((
28+
self.base_semantic_id,
29+
self.match_semantic_id,
30+
self.score,
31+
self.path,
32+
frozenset(self.meta_information.items())
33+
))
34+
35+
@classmethod
36+
def combine_semantic_matches(cls, first: "SemanticMatch", second: "SemanticMatch") -> "SemanticMatch":
37+
"""
38+
Construct a new `SemanticMatch` by combining two `SemanticMatch`es.
39+
40+
Given the following situation:
41+
A --0.4--> B
42+
B --0.5--> C
43+
this constructs a new `SemanticMatch`:
44+
A --(0.4*0.5)--> C
45+
while updating the `path` information of the new `SemanticMatch`
46+
47+
:param first: First `SemanticMatch`
48+
:param second: Second `SemanticMatch`. Note that `second.base_semantic_id` needs to be the same
49+
as `first.match_semantic_id`
50+
:return: The combined `SemanticMatch`
51+
"""
52+
if not first.match_semantic_id == second.base_semantic_id:
53+
raise KeyError(f"Cannot combine. `first.match_semantic_id` ({first.match_semantic_id}) does not "
54+
f"fit `second.base_semantic_id` ({second.base_semantic_id}).")
55+
if second.path:
56+
new_path = copy.copy(second.path)
57+
new_path.insert(0, second)
58+
else:
59+
new_path = [second]
60+
return SemanticMatch(
61+
base_semantic_id=first.base_semantic_id,
62+
match_semantic_id=second.match_semantic_id,
63+
score=first.score*second.score,
64+
path=new_path,
65+
)
66+
67+
class SemanticMatchDictStore:
68+
"""
69+
A collection of `SemanticMatch`es, stored in a Dict, where the Key is the `base_semantic_id` and the Value is
70+
the `SemanticMatch` object. This allows for efficient resolution of the `SemanticMatches` of the `base_semantic_id`.
71+
"""
72+
def __init__(self, matches: Iterable[SemanticMatch]):
73+
self._store: Dict[str, Set[SemanticMatch]] = {}
74+
for x in matches:
75+
self.add(x)
76+
77+
def add(self, match: SemanticMatch) -> None:
78+
"""
79+
Add a `SemanticMatch` to the store
80+
"""
81+
if match.base_semantic_id in self._store:
82+
self._store[match.base_semantic_id].add(match)
83+
else:
84+
self._store[match.base_semantic_id] = {match}
85+
86+
def discard(self, match: SemanticMatch) -> None:
87+
"""
88+
Discard a `SemanticMatch` from the store
89+
"""
90+
# First we remove the `SemanticMatch` from the set of matches for that `base_semantic_id`
91+
self._store[match.base_semantic_id].discard(match)
92+
# Then, if there is no more `SemanticMatch`es for that `base_semantic_id`, we remove the Dict entry completely
93+
if not len(self._store[match.base_semantic_id]):
94+
self._store.pop(match.base_semantic_id)
95+
96+
def get_all_matches(self) -> Set[SemanticMatch]:
97+
"""
98+
Return a set of all `SemanticMatch`es currently inside the store
99+
"""
100+
all_matches: Set[SemanticMatch] = set()
101+
for i in self._store.values():
102+
all_matches.update(i)
103+
return all_matches
104+
105+
def get_matches(self, semantic_id: str, min_score: Optional[float] = None) -> Set[SemanticMatch]:
106+
"""
107+
Return all 'SemanticMatches' of a given semantic_id currently inside a store that have a higher or equal
108+
score than the `min_score`.
109+
This is a recursive function, that also queries the matches of the matches, as long as the multiplicative
110+
scores of the matches is still higher or equal to the `min_score`.
111+
"""
112+
matches: Set[SemanticMatch] = set() # This is our return Set
113+
114+
# First, we check on the current level
115+
current_matches_with_any_score = self._store.get(semantic_id, set())
116+
current_matches = {
117+
match for match in current_matches_with_any_score if min_score is None or match.score >= min_score
118+
}
119+
# We can already update our return Set, since we know that the `current_matches` will definitely be inside
120+
matches.update(current_matches)
121+
122+
# Now we do the same query each of the current_matches that have a score larger or equal to min_score
123+
# Todo: We currently have a loop in here that we need to break
124+
for match in current_matches:
125+
# We calculate the new minimal score
126+
# Unified score is multiplied: score(A->B) * score(B->C)
127+
# This score should be larger or equal than the requested min_score:
128+
# score(A->B) * score(B->C) >= min_score
129+
# score(A->B) is well known, as it is the `match.score`
130+
# => score(B->C) >= (min_score/score(A->B))
131+
if min_score:
132+
new_min_score = min_score/match.score
133+
else:
134+
new_min_score = min_score
135+
# Here's the recursive function call, we do the same thing again with the new matches and the
136+
# updated `min_score`:
137+
new_matches = self.get_matches(semantic_id=match.base_semantic_id, min_score=new_min_score)
138+
# These new matches are now not relative to the original `base_semantic_id`, so we need to create new
139+
# `SemanticMatch`es and somehow store the path.
140+
for new_match in new_matches:
141+
matches.add(SemanticMatch.combine_semantic_matches(
142+
first=match,
143+
second=new_match
144+
))
145+
146+
# In the end, we return our return Set
147+
return matches
148+
149+
def to_file(self, filename: str) -> None:
150+
matches: List[Dict] = [match.model_dump() for match in self.get_all_matches()]
151+
with open(filename, "w") as file:
152+
json.dump(matches, file, indent=4)
153+
154+
@classmethod
155+
def from_file(cls, filename: str) -> "SemanticMatchDictStore":
156+
with open(filename, "r") as file:
157+
matches_data = json.load(file)
158+
matches = [SemanticMatch(**match_dict) for match_dict in matches_data]
159+
return cls(matches)
160+
161+
def __len__(self) -> int:
162+
length = 0
163+
for i in self._store.values():
164+
length += len(i)
165+
return length
18166

19167

20168
class EquivalenceTable(BaseModel):

semantic_matcher/service.py

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,26 @@
1-
from typing import List
1+
from typing import Optional, List
22

3+
from pydantic import BaseModel
34
import requests
45
from fastapi import APIRouter
56

6-
from semantic_matcher import model, service_model
7+
from semantic_matcher import model
8+
9+
10+
class MatchRequest(BaseModel):
11+
"""
12+
Request body for the :func:`service.SemanticMatchingService.get_match`
13+
14+
:ivar semantic_id: The semantic ID that we want to find matches for
15+
:ivar local_only: If `True`, only check at the local service and do not request other services
16+
:ivar name: Optional name of the resolved semantic ID for NLP matching
17+
:ivar definition: Optional definition of the resolved semantic ID for NLP matching
18+
"""
19+
semantic_id: str
20+
score_limit: float
21+
local_only: bool = True
22+
name: Optional[str] = None
23+
definition: Optional[str] = None
724

825

926
class SemanticMatchingService:
@@ -27,7 +44,7 @@ class SemanticMatchingService:
2744
def __init__(
2845
self,
2946
endpoint: str,
30-
equivalences: model.EquivalenceTable
47+
matches: model.SemanticMatchDictStore
3148
):
3249
"""
3350
Initializer of :class:`~.SemanticMatchingService`
@@ -46,48 +63,42 @@ def __init__(
4663
self.router.add_api_route(
4764
"/get_matches",
4865
self.get_matches,
66+
response_model=List[model.SemanticMatch],
4967
methods=["GET"]
5068
)
5169
self.router.add_api_route(
5270
"/post_matches",
5371
self.post_matches,
5472
methods=["POST"]
5573
)
56-
self.router.add_api_route(
57-
"/clear",
58-
self.remove_all_matches,
59-
methods=["POST"]
60-
)
6174
self.endpoint: str = endpoint
62-
self.equivalence_table: model.EquivalenceTable = equivalences
75+
self.matches: model.SemanticMatchDictStore = matches
6376

6477
def get_all_matches(self):
6578
"""
6679
Returns all matches stored in the equivalence table-
6780
"""
68-
matches = self.equivalence_table.get_all_matches()
81+
matches = self.matches.get_all_matches()
6982
return matches
7083

71-
def remove_all_matches(self):
72-
self.equivalence_table.remove_all_semantic_matches()
7384

7485
def get_matches(
7586
self,
76-
request_body: service_model.MatchRequest
77-
) -> service_model.MatchesList:
87+
request_body: MatchRequest
88+
) -> List[model.SemanticMatch]:
7889
"""
7990
A query to match two SubmodelElements semantically.
8091
8192
Returns a matching score
8293
"""
8394
# Try first local matching
84-
matches: List[model.SemanticMatch] = self.equivalence_table.get_local_matches(
95+
matches: List[model.SemanticMatch] = list(self.matches.get_matches(
8596
semantic_id=request_body.semantic_id,
86-
score_limit=request_body.score_limit
87-
)
97+
min_score=request_body.score_limit
98+
))
8899
# If the request asks us to only locally look, we're done already
89100
if request_body.local_only:
90-
return service_model.MatchesList(matches=matches)
101+
return matches
91102
# Now look for remote matches:
92103
additional_remote_matches: List[model.SemanticMatch] = []
93104
for match in matches:
@@ -97,7 +108,7 @@ def get_matches(
97108
remote_matching_service = self._get_matcher_from_semantic_id(match.match_semantic_id)
98109
if remote_matching_service is None:
99110
continue
100-
remote_matching_request = service_model.MatchRequest(
111+
remote_matching_request = MatchRequest(
101112
semantic_id=match.match_semantic_id,
102113
# This is a simple "Ungleichung"
103114
# Unified score is multiplied: score(A->B) * score(B->C)
@@ -112,20 +123,20 @@ def get_matches(
112123
definition=request_body.definition
113124
)
114125
url = f"{remote_matching_service}/get_matches"
126+
# Todo: Break recursion loop here
115127
new_matches_response = requests.get(url, json=remote_matching_request.model_dump_json())
116-
match_response = service_model.MatchesList.model_validate_json(new_matches_response.text)
117-
additional_remote_matches.extend(match_response.matches)
128+
response_matches = [model.SemanticMatch(**match) for match in new_matches_response.json()]
129+
additional_remote_matches.extend(response_matches)
118130
# Finally, put all matches together and return
119131
matches.extend(additional_remote_matches)
120-
res = service_model.MatchesList(matches=matches)
121-
return res
132+
return matches
122133

123134
def post_matches(
124135
self,
125-
request_body: service_model.MatchesList
136+
request_body: List[model.SemanticMatch]
126137
) -> None:
127-
for match in request_body.matches:
128-
self.equivalence_table.add_semantic_match(match)
138+
for match in request_body:
139+
self.matches.add(match)
129140
# Todo: Figure out how to properly return 200
130141

131142
def _get_matcher_from_semantic_id(self, semantic_id: str) -> str:

semantic_matcher/service_model.py

Lines changed: 0 additions & 25 deletions
This file was deleted.

0 commit comments

Comments
 (0)