-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexecute.py
82 lines (69 loc) · 2.07 KB
/
execute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# -*- coding: utf-8 -*-
import search
import index
import json
import csv
from evalution import *
from tools import *
sourceEval = 'CACM\query.text'
source = 'CACM\cacm.all'
commonwords = 'CACM\common_words'
####
####
# Generation de l'index sur CACM
####
####
source = 'CACM\cacm.all'
commonwords = 'CACM\common_words'
cacm = index.Index(source, commonwords)
# Parsing de la source pour en faire un dico
cacm.generateDico()
# Creation de l'index inverse (comprends la creation de l'index)
cacm.generateIndex()
cacm.generateIIndex()
#cacm.loadIIndexFromFile()
###
###
# Lancement d'une recherche
###
###
recherche = search.Search()
recherche.setType('tf-idf')
####
####
# Evaluation des algorithmes
####
####
qrels= 'CACM\qrels.text'
parsedQrels=parseQrels(qrels)
### Mise sous forme de dictionnaire du fichier query.text
resultatsEvaluation=rec_dd()
sourceEval = 'CACM\query.text'
commonwords = 'CACM\common_words'
f = csv.writer(open("evaluation.csv", "wb+"))
g = csv.writer(open("results.csv", "wb+"))
evaluation = index.Index(sourceEval, commonwords)
evaluation.generateDico()
for requete in evaluation.dico:
recherche.setQuery(evaluation.dico[requete]['.W'])
recherche.setLimit(100)
resultsScored = recherche.executeSearch(cacm.iIndex)
results = [int(result[0]) for result in resultsScored]
scores = [float(result[1]) for result in resultsScored]
for k in range(100):
if parsedQrels[requete]:
resultatsEvaluation[requete][k]['rappel'] = calculRappel(results, parsedQrels[requete],k)
resultatsEvaluation[requete][k]['precision'] = calculPrecision(results, parsedQrels[requete],k)
for result in range(len(results)):
if results[result] in parsedQrels[requete]:
results[result]='p' + str(results[result])
g.writerow([requete] + [str(results[i]) + ' ' + str(scores [i]) for i in range(len(results))])
header = ['requete','ordre','rappel','precision']
f.writerow(header)
for requete in resultatsEvaluation:
for ordre in resultatsEvaluation[1]:
row = [requete]
row += [ordre]
for data in ['rappel','precision']:
row += [resultatsEvaluation[requete][ordre][data]]
f.writerow(row)