Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
daaca80
Add files via upload
erba994 Oct 30, 2018
b92831d
Practical 1, Task 2 uploaded
erba994 Nov 2, 2018
0b27173
Delete testtokens_extractor.py
erba994 Nov 3, 2018
1c407c6
Delete MaxMatch_Documentation.pdf
erba994 Nov 3, 2018
0510a82
Delete MaxMatch_Documentation.tex
erba994 Nov 3, 2018
1b57ac2
Delete dictionary.txt
erba994 Nov 3, 2018
8ff5e49
Delete testtokens.txt
erba994 Nov 3, 2018
a0ccf3b
Delete maxmatchtokens_pythondict.txt
erba994 Nov 3, 2018
5b070e6
Delete dictionarybash.txt
erba994 Nov 3, 2018
34f820d
Delete dictionary_extractor.py
erba994 Nov 3, 2018
0896966
Delete ja_gsd-ud-test.conllu
erba994 Nov 3, 2018
4d3ddec
Delete maxmatch.pstat
erba994 Nov 3, 2018
a6ddb57
Delete maxmatch.py
erba994 Nov 3, 2018
a2d4885
Delete maxmatchtokens_bashdict.txt
erba994 Nov 3, 2018
28ccb18
Delete sentencelist.txt
erba994 Nov 3, 2018
3931011
Delete sentencelist_extractor.py
erba994 Nov 3, 2018
371da01
Delete ja_gsd-ud-train.conllu
erba994 Nov 3, 2018
281a4ad
Practical 1 added as per guidelines
erba994 Nov 3, 2018
7270f9b
Rename test-1-answers.tex to quiz-01-response.tex
erba994 Nov 3, 2018
17c5abb
quiz 2 uploaded
erba994 Nov 19, 2018
b95286e
Practical 2 report and files added
erba994 Nov 21, 2018
4632e6b
Practical 5 added
erba994 Mar 30, 2019
e993920
Practical 5 added
erba994 Mar 30, 2019
5c3b5f9
Practical 3 added
erba994 Mar 30, 2019
2cda4a7
Practical 3 added
erba994 Mar 30, 2019
016af52
Practical 4 added
erba994 Mar 30, 2019
64e9123
Practical 4 added
erba994 Mar 30, 2019
24c0e1b
quiz 3 uploaded
erba994 Apr 1, 2019
6997bc4
Rename MST.txt to MST-response.txt
erba994 Apr 5, 2019
0849caa
Rename tagger_comparison.txt to tagger_comparison-response.txt
erba994 Apr 5, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions 2018-komp-ling/practicals/MST-response.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
In the folder, the file kernelarray.py implements the Chi-Edwards algorithm. The file needs a conllu UDPipe segmented file as the input and will output a list of heads with their dependents for every head, for every sentence. To input the file, open the .py file and change the file address in the main section (still not good enough in coding to implement args, sorry.)
105 changes: 105 additions & 0 deletions 2018-komp-ling/practicals/MST/kernelarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import scipy.linalg as sp
from math import sqrt
import numpy
import re


def open_conllu(pathtofile):
with open(pathtofile, encoding="utf-8") as r:
listsents = []
listrow = []
listwords = []
listword = []
file = r.readlines()
for line in file:
if re.sub(r"^([0-9]+)\t.+\n$", r"\1", line) is "1":
if listrow != []:
listsents.append(listrow)
listwords.append(listword)
listrow = []
listword = []
if re.sub(r"^([0-9]+)\t.+\n$", "", line) is "":
row_num = re.sub(r"^([0-9]+)\t.+?\t.+?\t.+?\t.+?\t.+?\t([0-9]+)\t.+\n$", r"\1", line)
row_vec = re.sub(r"^([0-9]+)\t.+?\t.+?\t.+?\t.+?\t.+?\t([0-9]+)\t.+\n$", r"\2", line)
word = re.sub(r"^[0-9]+\t(.+?)\t.+?\t.+?\t.+?\t.+?\t[0-9]+\t.+\n$", r"\1", line)
listrow.append((int(row_num) - 1, int(row_vec) - 1))
listword.append(word)
if listrow != []:
listsents.append(listrow)
listwords.append(listword)
return listsents, listwords


def boundary_creator(vertex, edges):
boundary = []
for v in vertex:
newboundary = []
for e in edges:
newboundary.append(1 if e[1] == v else -1 if e[0] == v else 0)
boundary.append(newboundary)
return boundary


"""CYCLE CHECKING AND REDUCTION: this part checks through nullspace if there is any cycle first. If nullspace sum is \
not 0, we do have cycles.Then takes any row and checks which row receives more than one edge. Takes the index of the \
starting vertex of the edges, and check which of the vertexes has more weight in the nullspace calculus. The one \
starting from the most weighted vertex then gets reduced and the nullspace is updated dinamically. The cycle repeats \
until the nullspace sum returns 0"""


def cycle_reductioner(boundary):
boundarynull = sp.null_space(boundary)
boundarynull = boundarynull * -sqrt(2)
while numpy.sum(boundarynull) != 0 and len(boundarynull) != 0:
listall = []
for row in boundary:
newlist = []
if row.count(1) > 1:
for index, number in enumerate(row):
if number == 1:
newlist.append(index)
listall.append(newlist)
for list in listall:
maxvalue = 0
maxindex = 0
for index in list:
value = abs(boundarynull[index][0])
if value > maxvalue:
maxvalue = value
maxindex = index
for row in boundary:
if row.count(1) > 1:
if row[maxindex] == 1:
row[maxindex] = 0
if row[maxindex] == -1:
row[maxindex] == 0
boundarynull = sp.null_space(boundary)
boundarynull = boundarynull * -sqrt(2)
return boundary


def alg_return(reduced_object, words):
sentence = []
for word in reduced_object:
verts = []
wordsverts = []
origin = word.index(-1)
for i, x in enumerate(word):
if x == 1:
verts.append(i)
for x in verts:
wordsverts.append(words[x])
sentence.append((words[origin], wordsverts))
return sentence

if __name__=="__main__":
doc, words = open_conllu("paragraph.conllu")
finallist = []
for i, sent in enumerate(doc):
wordings = words[i]
vertex = range(len(sent))
edges = sent
# vertex = [0, 1, 2, 3, 4, 5, 6]
# edges = [[0, 1], [1, 3], [2, 3], [3, 4], [4, 6], [5, 6], [6, 1]]
finallist.append(alg_return(cycle_reductioner(boundary_creator(vertex, edges)), wordings))
print(finallist)
Loading