-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathutils.py
39 lines (36 loc) · 1.2 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#coding: utf8
import pandas as pd
import numpy as np
def sublist_uniques(data,sublist):
categories = set()
for d,t in data.iterrows():
try:
for j in t[sublist]:
categories.add(j)
except:
pass
return list(categories)
def sublists_to_binaries(data,sublist,index_key = None):
categories = sublist_uniques(data,sublist)
frame = pd.DataFrame(columns=categories)
for d,i in data.iterrows():
if type(i[sublist]) == list or np.array:
try:
if index_key != None:
key = i[index_key]
f =np.zeros(len(categories))
for j in i[sublist]:
f[categories.index(j)] = 1
if key in frame.index:
for j in i[sublist]:
frame.loc[key][j]+=1
else:
frame.loc[key]=f
else:
f =np.zeros(len(categories))
for j in i[sublist]:
f[categories.index(j)] = 1
frame.loc[d]=f
except:
pass
return frame