-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpisos.py
84 lines (75 loc) · 4.33 KB
/
pisos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pandas as pd
import numpy as np
import re
"""
\piso uno dos tres
plata baja
"""
data = pd.read_csv('../desafio_ds/properati.csv')
def pisos(df):
cont = data['floor'].isna().sum()
cont_orig = cont
mascara = (df['property_type'] == 'house') & (df['floor'].isnull())
df.loc[mascara,'floor'] = 0
resultado = cont - data['floor'].isna().sum()
cont = data['floor'].isna().sum()
print('Se completaron con el patron TYPE HOUSE: {} registros'.format(resultado))
dic_piso = {'PLANTA BAJA': 0, 'PRIMER': 1, 'SEGUNDO': 2, 'TERCERO': 3, 'CUARTO': 4, 'QUINTO': 5, 'SEXTO': 6,
'SEPTIMO': 7, 'OCTAVO': 8, 'NOVENO': 9, 'DECIMO': 10}
pattern = '(?P<nro>PRIMER|SEGUNDO|TERCERO|CUARTO|QUINTO|SEXTO|SEPTIMO|OCTAVO|NOVENO|DECIMO)(?P<piso>\sPISO\s)'
patron_sin_ex = re.compile(pattern, re.IGNORECASE)
search = df['description'].apply(lambda x: x if x is np.NaN else patron_sin_ex.search(x))
mascara_search = (search.notnull()) & df['floor'].isnull()
df.loc[mascara_search, 'floor'] = search[mascara_search].apply(lambda x: int(dic_piso[str.upper(x.group('nro'))]))
resultado = cont - data['floor'].isna().sum()
cont = data['floor'].isna().sum()
print('Se completaron con el patron PRIMER.SEGUNDO.....PISO: {} registros'.format(resultado))
pattern = '(?P<nro>\d{1,2})(?P<piso>(er|do|to|mo|vo|no)\spiso\s)'
patron_sin_ex = re.compile(pattern, re.IGNORECASE)
search = df['description'].apply(lambda x: x if x is np.NaN else patron_sin_ex.search(x))
mascara_search = (search.notnull()) & df['floor'].isnull()
df.loc[mascara_search, 'floor'] = search[mascara_search].apply(
lambda x: int(x.group('nro')))
resultado = cont - data['floor'].isna().sum()
cont = data['floor'].isna().sum()
print('Se completaron con el patron 1ER....2DO.....PISO: {} registros'.format(resultado))
pattern = '(?P<nro>\d{1,2})(?P<piso>\spiso\s)'
patron_sin_ex = re.compile(pattern, re.IGNORECASE)
search = df['description'].apply(lambda x: x if x is np.NaN else patron_sin_ex.search(x))
mascara_search = (search.notnull()) & df['floor'].isnull()
df.loc[mascara_search, 'floor'] = search[mascara_search].apply(
lambda x: int(x.group('nro')))
resultado = cont - data['floor'].isna().sum()
cont = data['floor'].isna().sum()
print('Se completaron con el patron 1..2.....PISO: {} registros'.format(resultado))
pattern = '(?P<piso>piso\s)(?P<nro>\d{1,2})'
patron_sin_ex = re.compile(pattern, re.IGNORECASE)
search = df['description'].apply(lambda x: x if x is np.NaN else patron_sin_ex.search(x))
mascara_search = (search.notnull()) & df['floor'].isnull()
df.loc[mascara_search, 'floor'] = search[mascara_search].apply(
lambda x: int(x.group('nro')))
resultado = cont - data['floor'].isna().sum()
cont = data['floor'].isna().sum()
print('Se completaron con el patron PISO...1..2....: {} registros'.format(resultado))
dic_piso = {'UNO': 1, 'DOS': 2, 'TRES': 3, 'CUARTO': 4, 'CINCO': 5, 'SEIS': 6, 'SIETE': 7, 'OCHO': 8, 'NUEVE': 9,
'DIEZ': 10}
pattern = '(?P<piso>PISO\s)(?P<nro>UNO|DOS|TRES|CUARTO|CINCO|SEIS|SIETE|OCHO|NUEVE|DIEZ)'
patron_sin_ex = re.compile(pattern, re.IGNORECASE)
search = df['description'].apply(lambda x: x if x is np.NaN else patron_sin_ex.search(x))
mascara_search = (search.notnull()) & df['floor'].isnull()
df.loc[mascara_search, 'floor'] = search[mascara_search].apply(lambda x: int(dic_piso[str.upper(x.group('nro'))]))
resultado = cont - data['floor'].isna().sum()
cont = data['floor'].isna().sum()
print('Se completaron con el patron PISO...UNO...DOS.....: {} registros'.format(resultado))
pattern = 'PLANTA\sBAJA'
patron_sin_ex = re.compile(pattern, re.IGNORECASE)
search = df['description'].apply(lambda x: x if x is np.NaN else patron_sin_ex.search(x))
mascara_search = (search.notnull()) & df['floor'].isnull()
df.loc[mascara_search, 'floor'] = 0
resultado = cont - data['floor'].isna().sum()
cont = data['floor'].isna().sum()
print('Se completaron con el patron PLANTA BAJA: {} registros'.format(resultado))
print('Total original de NULLs para floor: {}'.format(cont_orig))
print('Total actual de NULLs para floor: {}'.format(cont))
print('Porcentaje de NULLs corregidos para floor: {}%'.format(round((100-(cont * 100)/cont_orig)),0))
pisos(data)