|
7 | 7 | # !pip install -q boto3
|
8 | 8 | # !pip install pandas
|
9 | 9 |
|
| 10 | +###################################################### |
| 11 | + |
10 | 12 | # 사용할 데이터 파일을 읽어 판다스 데이터프레임으로 저장
|
11 | 13 | import pandas as pd
|
12 | 14 | import requests
|
@@ -136,3 +138,72 @@ def _generate_data():
|
136 | 138 | count = aos_client.count(index=index_name)
|
137 | 139 | print(count)
|
138 | 140 | # {'count': 11713, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}}
|
| 141 | + |
| 142 | +###################################################### |
| 143 | + |
| 144 | +#키워드 검색 결과 확인하기(렉시컬 탐색) |
| 145 | +def keyword_search(query_text): |
| 146 | + query = { |
| 147 | + "size": 10, |
| 148 | + "query": { |
| 149 | + "multi_match": { |
| 150 | + "query": query_text, |
| 151 | + "fields": ["plot"], |
| 152 | + } |
| 153 | + }, |
| 154 | + } |
| 155 | + |
| 156 | + res = aos_client.search(index=index_name, body=query) |
| 157 | + |
| 158 | + query_result = [] |
| 159 | + for hit in res["hits"]["hits"]: |
| 160 | + row = [ |
| 161 | + hit["_score"], |
| 162 | + hit["_source"]["title"], |
| 163 | + hit["_source"]["plot"], |
| 164 | + hit["_source"]["genre"], |
| 165 | + hit["_source"]["rating"], |
| 166 | + hit["_source"]["main_act"], |
| 167 | + ] |
| 168 | + query_result.append(row) |
| 169 | + |
| 170 | + query_result_df = pd.DataFrame( |
| 171 | + data=query_result, |
| 172 | + columns=["_score", "title", "plot", "genre", "rating", "main_act"], |
| 173 | + ) |
| 174 | + display(query_result_df) |
| 175 | + |
| 176 | + |
| 177 | +def keyword_search(query_text): |
| 178 | + query = { |
| 179 | + "size": 10, |
| 180 | + "query": { |
| 181 | + "multi_match": { |
| 182 | + "query": query_text, |
| 183 | + "fields": ["plot"], |
| 184 | + } |
| 185 | + }, |
| 186 | + } |
| 187 | + |
| 188 | + res = aos_client.search(index=index_name, body=query) |
| 189 | + |
| 190 | + query_result = [] |
| 191 | + for hit in res["hits"]["hits"]: |
| 192 | + row = [ |
| 193 | + hit["_score"], |
| 194 | + hit["_source"]["title"], |
| 195 | + hit["_source"]["plot"], |
| 196 | + hit["_source"]["genre"], |
| 197 | + hit["_source"]["rating"], |
| 198 | + hit["_source"]["main_act"], |
| 199 | + ] |
| 200 | + query_result.append(row) |
| 201 | + |
| 202 | + query_result_df = pd.DataFrame( |
| 203 | + data=query_result, |
| 204 | + columns=["_score", "title", "plot", "genre", "rating", "main_act"], |
| 205 | + ) |
| 206 | + display(query_result_df) |
| 207 | + |
| 208 | +#상위 10개 검색결과 확인 |
| 209 | +keyword_search("지구의 영웅들이 힘을 합쳐 우주의 악당을 물리친다") |
0 commit comments