diff --git a/Data Analysis/Basic Time Series.ipynb b/Data Analysis/Basic Time Series.ipynb index 1a4802d..80c07bf 100644 --- a/Data Analysis/Basic Time Series.ipynb +++ b/Data Analysis/Basic Time Series.ipynb @@ -885,7 +885,7 @@ "name": "ruby" }, "language_info": { - "file_extension": "rb", + "file_extension": ".rb", "mimetype": "application/x-ruby", "name": "ruby", "version": "2.2.1" diff --git a/Data Analysis/Usage of DataFrame.ipynb b/Data Analysis/Usage of DataFrame.ipynb index 7bdc1e4..b5962f5 100644 --- a/Data Analysis/Usage of DataFrame.ipynb +++ b/Data Analysis/Usage of DataFrame.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": { "collapsed": false }, @@ -25,7 +25,7 @@ "application/javascript": [ "if(window['d3'] === undefined ||\n", " window['Nyaplot'] === undefined){\n", - " var path = {\"d3\":\"http://d3js.org/d3.v3.min\",\"downloadable\":\"http://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\"};\n", + " var path = {\"d3\":\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\",\"downloadable\":\"http://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\"};\n", "\n", "\n", "\n", @@ -56,7 +56,7 @@ "}\n" ], "text/plain": [ - "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"http://d3js.org/d3.v3.min\\\",\\\"downloadable\\\":\\\"http://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"},\\\"downloadable\\\":{\\\"exports\\\":\\\"downloadable\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});});\\n}\\n\"" + "\"if(window['d3'] === undefined ||\\n window['Nyaplot'] === undefined){\\n var path = {\\\"d3\\\":\\\"https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min\\\",\\\"downloadable\\\":\\\"http://cdn.rawgit.com/domitry/d3-downloadable/master/d3-downloadable\\\"};\\n\\n\\n\\n var shim = {\\\"d3\\\":{\\\"exports\\\":\\\"d3\\\"},\\\"downloadable\\\":{\\\"exports\\\":\\\"downloadable\\\"}};\\n\\n require.config({paths: path, shim:shim});\\n\\n\\nrequire(['d3'], function(d3){window['d3']=d3;console.log('finished loading d3');require(['downloadable'], function(downloadable){window['downloadable']=downloadable;console.log('finished loading downloadable');\\n\\n\\tvar script = d3.select(\\\"head\\\")\\n\\t .append(\\\"script\\\")\\n\\t .attr(\\\"src\\\", \\\"http://cdn.rawgit.com/domitry/Nyaplotjs/master/release/nyaplot.js\\\")\\n\\t .attr(\\\"async\\\", true);\\n\\n\\tscript[0][0].onload = script[0][0].onreadystatechange = function(){\\n\\n\\n\\t var event = document.createEvent(\\\"HTMLEvents\\\");\\n\\t event.initEvent(\\\"load_nyaplot\\\",false,false);\\n\\t window.dispatchEvent(event);\\n\\t console.log('Finished loading Nyaplotjs');\\n\\n\\t};\\n\\n\\n});});\\n}\\n\"" ] }, "metadata": {}, @@ -68,7 +68,7 @@ "true" ] }, - "execution_count": 2, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "collapsed": false }, @@ -107,11 +107,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72711700 rows: 4 cols: 2
ab
one11
two22
three33
four44
" + "
Daru::DataFrame:25900980 rows: 4 cols: 2
ab
one11
two22
three33
four44
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b \n", " one 1 1 \n", " two 2 2 \n", @@ -119,7 +119,7 @@ " four 4 4 \n" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -139,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { "collapsed": false }, @@ -147,11 +147,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:73018170 rows: 4 cols: 2
ba
011
122
233
344
" + "
Daru::DataFrame:25639640 rows: 4 cols: 2
ba
011
122
233
344
" ], "text/plain": [ "\n", - "#\n", + "#\n", " b a \n", " 0 1 1 \n", " 1 2 2 \n", @@ -159,7 +159,7 @@ " 3 4 4 \n" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -183,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { "collapsed": false }, @@ -191,11 +191,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:73231850 rows: 6 cols: 2
v1v2
a133
absent44
b211
c3
d4
e522
" + "
Daru::DataFrame:25111260 rows: 6 cols: 2
v1v2
a133
absent44
b211
c3
d4
e522
" ], "text/plain": [ "\n", - "#\n", + "#\n", " v1 v2 \n", " a 1 33 \n", " absent nil 44 \n", @@ -205,7 +205,7 @@ " e 5 22 \n" ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -230,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { "collapsed": false }, @@ -265,7 +265,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "collapsed": false }, @@ -273,11 +273,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72736240 rows: 4 cols: 4
abcd
011110a
1222204
233330g
3444403
" + "
Daru::DataFrame:24268680 rows: 4 cols: 4
abcd
011110a
1222204
233330g
3444403
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c d \n", " 0 1 11 10 a \n", " 1 2 22 20 4 \n", @@ -285,7 +285,7 @@ " 3 4 44 40 3 \n" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -312,7 +312,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { "collapsed": false }, @@ -320,17 +320,17 @@ { "data": { "text/html": [ - "
Daru::DataFrame:73292080 rows: 2 cols: 5
abcdodd
01234
111442255
" + "
Daru::DataFrame:23883560 rows: 2 cols: 5
abcdodd
01234
111442255
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c d odd \n", " 0 1 2 3 4 nil \n", " 1 11 44 22 nil 55 \n" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -362,7 +362,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "collapsed": false }, @@ -370,11 +370,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72552450 rows: 17 cols: 8
AccountManagerNamePriceProductQuantityRepStatus
0714466Debra HenleyTrantow-Barrows30000CPU1Craig Bookerpresented
1714466Debra HenleyTrantow-Barrows10000Software1Craig Bookerpresented
2714466Debra HenleyTrantow-Barrows5000Maintenance2Craig Bookerpending
3737550Debra HenleyFritsch, Russel and Anderson35000CPU1Craig Bookerdeclined
4146832Debra HenleyKiehn-Spinka65000CPU2Daniel Hiltonwon
5218895Debra HenleyKulas Inc40000CPU2Daniel Hiltonpending
6218895Debra HenleyKulas Inc10000Software1Daniel Hiltonpresented
7412290Debra HenleyJerde-Hilpert5000Maintenance2John Smithpending
8740150Debra HenleyBarton LLC35000CPU1John Smithdeclined
9141962Fred AndersonHerman LLC65000CPU2Cedric Mosswon
10163416Fred AndersonPurdy-Kunde30000CPU1Cedric Mosspresented
11239344Fred AndersonStokes LLC5000Maintenance1Cedric Mosspending
12239344Fred AndersonStokes LLC10000Software1Cedric Mosspresented
13307599Fred AndersonKassulke, Ondricka and Metz7000Maintenance3Wendy Yulewon
14688981Fred AndersonKeeling LLC100000CPU5Wendy Yulewon
15729833Fred AndersonKoepp Ltd65000CPU2Wendy Yuledeclined
16729833Fred AndersonKoepp Ltd5000Monitor2Wendy Yulepresented
" + "
Daru::DataFrame:23327140 rows: 17 cols: 8
AccountManagerNamePriceProductQuantityRepStatus
0714466Debra HenleyTrantow-Barrows30000CPU1Craig Bookerpresented
1714466Debra HenleyTrantow-Barrows10000Software1Craig Bookerpresented
2714466Debra HenleyTrantow-Barrows5000Maintenance2Craig Bookerpending
3737550Debra HenleyFritsch, Russel and Anderson35000CPU1Craig Bookerdeclined
4146832Debra HenleyKiehn-Spinka65000CPU2Daniel Hiltonwon
5218895Debra HenleyKulas Inc40000CPU2Daniel Hiltonpending
6218895Debra HenleyKulas Inc10000Software1Daniel Hiltonpresented
7412290Debra HenleyJerde-Hilpert5000Maintenance2John Smithpending
8740150Debra HenleyBarton LLC35000CPU1John Smithdeclined
9141962Fred AndersonHerman LLC65000CPU2Cedric Mosswon
10163416Fred AndersonPurdy-Kunde30000CPU1Cedric Mosspresented
11239344Fred AndersonStokes LLC5000Maintenance1Cedric Mosspending
12239344Fred AndersonStokes LLC10000Software1Cedric Mosspresented
13307599Fred AndersonKassulke, Ondricka and Metz7000Maintenance3Wendy Yulewon
14688981Fred AndersonKeeling LLC100000CPU5Wendy Yulewon
15729833Fred AndersonKoepp Ltd65000CPU2Wendy Yuledeclined
16729833Fred AndersonKoepp Ltd5000Monitor2Wendy Yulepresented
" ], "text/plain": [ "\n", - "#\n", + "#\n", " Account Manager Name Price Product Quantity Rep Status \n", " 0 714466 Debra Henl Trantow-Ba 30000 CPU 1 Craig Book presented \n", " 1 714466 Debra Henl Trantow-Ba 10000 Software 1 Craig Book presented \n", @@ -394,13 +394,13 @@ " ... ... ... ... ... ... ... ... ... \n" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "Daru::DataFrame.from_csv '/home/sameer/gitrepos/daru/spec/fixtures/sales-funnel.csv'" + "Daru::DataFrame.from_csv 'data/sales-funnel.csv'" ] }, { @@ -441,7 +441,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": { "collapsed": false }, @@ -449,11 +449,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:73139660 rows: 6 cols: 5
idnameagecitya1
01Alex20New Yorka,b
12Claude23Londonb,c
23Peter25Londona
34FranzParis
45George5.5Tomea,b,c
56Fernand
" + "
Daru::DataFrame:22709180 rows: 6 cols: 5
idnameagecitya1
01Alex20New Yorka,b
12Claude23Londonb,c
23Peter25Londona
34FranzParis
45George5.5Tomea,b,c
56Fernand
" ], "text/plain": [ "\n", - "#\n", + "#\n", " id name age city a1 \n", " 0 1 Alex 20 New York a,b \n", " 1 2 Claude 23 London b,c \n", @@ -463,13 +463,13 @@ " 5 6 Fernand nil nil nil \n" ] }, - "execution_count": 10, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df = Daru::DataFrame.from_excel '/home/sameer/gitrepos/daru/spec/fixtures/test_xls.xls'" + "df = Daru::DataFrame.from_excel 'data/test_xls.xls'" ] }, { @@ -510,7 +510,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -518,11 +518,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72781990 rows: 7 cols: 3
abc
a1a11
b2b22
c3c33
d4d44
e5e55
f6f66
g7g77
" + "
Daru::DataFrame:22000940 rows: 7 cols: 3
abc
a1a11
b2b22
c3c33
d4d44
e5e55
f6f66
g7g77
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c \n", " a 1 a 11 \n", " b 2 b 22 \n", @@ -533,7 +533,7 @@ " g 7 g 77 \n" ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -555,7 +555,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": { "collapsed": false }, @@ -563,11 +563,11 @@ { "data": { "text/html": [ - "
Daru::Vector:72780230 size: 7
b
aa
bb
cc
dd
ee
ff
gg
" + "
Daru::Vector:21997920 size: 7
b
aa
bb
cc
dd
ee
ff
gg
" ], "text/plain": [ "\n", - "#\n", + "#\n", " b\n", " a a\n", " b b\n", @@ -578,7 +578,7 @@ " g g\n" ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -596,7 +596,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": { "collapsed": false }, @@ -604,11 +604,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72618370 rows: 7 cols: 2
bc
aa11
bb22
cc33
dd44
ee55
ff66
gg77
" + "
Daru::DataFrame:19097240 rows: 7 cols: 2
bc
aa11
bb22
cc33
dd44
ee55
ff66
gg77
" ], "text/plain": [ "\n", - "#\n", + "#\n", " b c \n", " a a 11 \n", " b b 22 \n", @@ -619,7 +619,7 @@ " g g 77 \n" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -639,7 +639,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": { "collapsed": false }, @@ -647,18 +647,18 @@ { "data": { "text/html": [ - "
Daru::Vector:72521270 size: 3
c
a3
bc
c33
" + "
Daru::Vector:17445320 size: 3
c
a3
bc
c33
" ], "text/plain": [ "\n", - "#\n", + "#\n", " c\n", " a 3\n", " b c\n", " c 33\n" ] }, - "execution_count": 14, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -676,7 +676,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": { "collapsed": false }, @@ -684,18 +684,18 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72470330 rows: 3 cols: 3
abc
d4d44
e5e55
f6f66
" + "
Daru::DataFrame:16860640 rows: 3 cols: 3
abc
d4d44
e5e55
f6f66
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c \n", " d 4 d 44 \n", " e 5 e 55 \n", " f 6 f 66 \n" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -713,7 +713,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": { "collapsed": false }, @@ -721,18 +721,18 @@ { "data": { "text/html": [ - "
Daru::Vector:70267960 size: 3
d
a4
bd
c44
" + "
Daru::Vector:16631880 size: 3
3
a4
bd
c44
" ], "text/plain": [ "\n", - "#\n", - " d\n", + "#\n", + " 3\n", " a 4\n", " b d\n", " c 44\n" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -750,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": { "collapsed": false }, @@ -758,18 +758,18 @@ { "data": { "text/html": [ - "
Daru::DataFrame:69983350 rows: 3 cols: 3
abc
a1a11
b2b22
c3c33
" + "
Daru::DataFrame:15990800 rows: 3 cols: 3
abc
a1a11
b2b22
c3c33
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c \n", " a 1 a 11 \n", " b 2 b 22 \n", " c 3 c 33 \n" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -789,7 +789,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": { "collapsed": false }, @@ -797,11 +797,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72781990 rows: 7 cols: 4
abcd
a1a1111
b2b2244
c3c3399
d4d44176
e5e55275
f6f66396
g7g77539
" + "
Daru::DataFrame:22000940 rows: 7 cols: 4
abcd
a1a1111
b2b2244
c3c3399
d4d44176
e5e55275
f6f66396
g7g77539
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c d \n", " a 1 a 11 11 \n", " b 2 b 22 44 \n", @@ -812,7 +812,7 @@ " g 7 g 77 539 \n" ] }, - "execution_count": 18, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -831,7 +831,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "metadata": { "collapsed": false }, @@ -839,11 +839,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72781990 rows: 7 cols: 3
acd
a11111
b22244
c33399
d444176
e555275
f666396
g777539
" + "
Daru::DataFrame:22000940 rows: 7 cols: 3
acd
a11111
b22244
c33399
d444176
e555275
f666396
g777539
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d \n", " a 1 11 11 \n", " b 2 22 44 \n", @@ -854,7 +854,7 @@ " g 7 77 539 \n" ] }, - "execution_count": 19, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -876,7 +876,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": { "collapsed": false }, @@ -884,11 +884,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72781990 rows: 7 cols: 4
acdb
a11111a
b22244c
c3339933
d444176b
e555275d
f66639688
g777539
" + "
Daru::DataFrame:22000940 rows: 7 cols: 4
acdb
a11111a
b22244c
c3339933
d444176b
e555275d
f66639688
g777539
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d b \n", " a 1 11 11 a \n", " b 2 22 44 c \n", @@ -899,7 +899,7 @@ " g 7 77 539 nil \n" ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -918,7 +918,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": { "collapsed": false }, @@ -926,11 +926,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72781990 rows: 8 cols: 4
acdb
a11111a
b22244c
c3339933
d444176b
e555275d
f66639688
g777539
latest30104020
" + "
Daru::DataFrame:22000940 rows: 8 cols: 4
acdb
a11111a
b22244c
c3339933
d444176b
e555275d
f66639688
g777539
latest30104020
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d b \n", " a 1 11 11 a \n", " b 2 22 44 c \n", @@ -942,7 +942,7 @@ " latest 30 10 40 20 \n" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -968,7 +968,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "metadata": { "collapsed": false }, @@ -976,11 +976,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:73278350 rows: 8 cols: 2
ac
a111
b222
c333
d444
e555
f666
g777
latest3010
" + "
Daru::DataFrame:25037600 rows: 8 cols: 2
ac
a111
b222
c333
d444
e555
f666
g777
latest3010
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c \n", " a 1 11 \n", " b 2 22 \n", @@ -992,7 +992,7 @@ " latest 30 10 \n" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -1012,7 +1012,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "metadata": { "collapsed": false }, @@ -1020,18 +1020,18 @@ { "data": { "text/html": [ - "
Daru::DataFrame:73410330 rows: 3 cols: 4
acdb
a11111a
b22244c
latest30104020
" + "
Daru::DataFrame:24513280 rows: 3 cols: 4
acdb
a11111a
b22244c
latest30104020
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d b \n", " a 1 11 11 a \n", " b 2 22 44 c \n", " latest 30 10 40 20 \n" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1053,7 +1053,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "metadata": { "collapsed": false }, @@ -1061,11 +1061,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:73243870 rows: 4 cols: 8
abcdefglatest
a123456730
c1122334455667710
d11449917627539653940
bac33bd8820
" + "
Daru::DataFrame:23956660 rows: 4 cols: 8
abcdefglatest
a123456730
c1122334455667710
d11449917627539653940
bac33bd8820
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c d e f g latest \n", " a 1 2 3 4 5 6 7 30 \n", " c 11 22 33 44 55 66 77 10 \n", @@ -1073,7 +1073,7 @@ " b a c 33 b d 88 nil 20 \n" ] }, - "execution_count": 24, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1104,7 +1104,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "metadata": { "collapsed": false }, @@ -1112,11 +1112,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:73164840 rows: 8 cols: 4
acdb
a112121a
b123254c
c134310933
d1454186b
e1565285d
f167640688
g1787549
latest40205020
" + "
Daru::DataFrame:23500340 rows: 8 cols: 4
acdb
a112121a
b123254c
c134310933
d1454186b
e1565285d
f167640688
g1787549
latest40205020
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d b \n", " a 11 21 21 a \n", " b 12 32 54 c \n", @@ -1128,7 +1128,7 @@ " latest 40 20 50 20 \n" ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1152,7 +1152,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "metadata": { "collapsed": false }, @@ -1160,24 +1160,24 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72714560 rows: 9 cols: 5
abcdf
a5866
b9295
c350
d10196
e
f64147
g
latest3711
older
" + "
Daru::DataFrame:22775800 rows: 9 cols: 5
abcdf
a5240
b6734
c2889
d44125
e
f72158
g
latest7240
older
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c d f \n", - " a 58 nil 66 nil nil \n", - " b 92 nil 95 nil nil \n", - " c 3 nil 50 nil nil \n", - " d 101 nil 96 nil nil \n", + " a 52 nil 40 nil nil \n", + " b 67 nil 34 nil nil \n", + " c 28 nil 89 nil nil \n", + " d 44 nil 125 nil nil \n", " e nil nil nil nil nil \n", - " f 64 nil 147 nil nil \n", + " f 72 nil 158 nil nil \n", " g nil nil nil nil nil \n", - " latest 37 nil 11 nil nil \n", + " latest 72 nil 40 nil nil \n", " older nil nil nil nil nil \n" ] }, - "execution_count": 26, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -1207,7 +1207,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "metadata": { "collapsed": false }, @@ -1215,18 +1215,18 @@ { "data": { "text/html": [ - "
Daru::Vector:72534840 size: 3
mean
a7.25
c39.75
d197.5
" + "
Daru::Vector:21668800 size: 3
mean
a7.25
c39.75
d197.5
" ], "text/plain": [ "\n", - "#\n", + "#\n", " mean\n", " a 7.25\n", " c 39.75\n", " d 197.5\n" ] }, - "execution_count": 27, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1244,7 +1244,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "metadata": { "collapsed": false }, @@ -1252,11 +1252,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72483730 rows: 5 cols: 3
acd
count888
mean7.2539.75197.5
std9.4074438611133925.06990227344335190.99214643539665
min11011
max3077539
" + "
Daru::DataFrame:19564740 rows: 5 cols: 3
acd
count888
mean7.2539.75197.5
std9.4074438611133925.06990227344335190.99214643539665
min11011
max3077539
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d \n", " count 8 8 8 \n", " mean 7.25 39.75 197.5 \n", @@ -1265,7 +1265,7 @@ " max 30 77 539 \n" ] }, - "execution_count": 28, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1283,7 +1283,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "metadata": { "collapsed": false }, @@ -1291,18 +1291,18 @@ { "data": { "text/html": [ - "
Daru::DataFrame:70263780 rows: 3 cols: 3
acd
a88.5-66.5-233.0
c-66.5628.54637.0
d-233.04637.036478.0
" + "
Daru::DataFrame:19100920 rows: 3 cols: 3
acd
a88.5-66.5-233.0
c-66.5628.54637.0
d-233.04637.036478.0
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d \n", " a 88.5 -66.5 -233.0 \n", " c -66.5 628.5 4637.0 \n", " d -233.0 4637.0 36478.0 \n" ] }, - "execution_count": 29, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -1320,7 +1320,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "metadata": { "collapsed": false }, @@ -1328,18 +1328,18 @@ { "data": { "text/html": [ - "
Daru::DataFrame:69832090 rows: 3 cols: 3
acd
a1.0-0.28196640612394586-0.12967873822641748
c-0.281966406123945860.99999999999999980.9684315851062977
d-0.129678738226417480.96843158510629771.0
" + "
Daru::DataFrame:17235860 rows: 3 cols: 3
acd
a1.0-0.28196640612394586-0.12967873822641748
c-0.281966406123945860.99999999999999980.9684315851062977
d-0.129678738226417480.96843158510629771.0
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d \n", " a 1.0 -0.2819664 -0.1296787 \n", " c -0.2819664 0.99999999 0.96843158 \n", " d -0.1296787 0.96843158 1.0 \n" ] }, - "execution_count": 30, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -1357,7 +1357,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "metadata": { "collapsed": false }, @@ -1366,7 +1366,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "= 87788e7a-6255-4867-a882-5861c3869260\n", + "= 251989c1-bf81-41b8-a316-e1941b4a7d0d\n", " Number of rows: 8\n", " Element:[a]\n", " == a\n", @@ -1389,7 +1389,7 @@ " skew: 0.1381\n", " kurtosis: -1.7271\n", " Element:[d]\n", - " == d\n", + " == a\n", " n :8\n", " n valid:8\n", " median: 137.5\n", @@ -1438,7 +1438,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": { "collapsed": false }, @@ -1464,7 +1464,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "metadata": { "collapsed": false }, @@ -1501,7 +1501,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 33, "metadata": { "collapsed": false }, @@ -1512,7 +1512,7 @@ "[7.25, 39.75, 197.5]" ] }, - "execution_count": 34, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1531,7 +1531,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 34, "metadata": { "collapsed": false }, @@ -1542,7 +1542,7 @@ "[7.666666666666667, 22.666666666666668, 42.0, 74.66666666666667, 111.66666666666667, 139.0, 207.66666666666666, 25.0]" ] }, - "execution_count": 35, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1571,7 +1571,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 35, "metadata": { "collapsed": false }, @@ -1579,11 +1579,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:67297900 rows: 8 cols: 3
acd
a777275
b15.0125.0505.5
c33399
d444176
e555275
f666396
g777539
latest301040
" + "
Daru::DataFrame:14037900 rows: 8 cols: 3
acd
a777275
b15.0125.0505.5
c33399
d444176
e555275
f666396
g777539
latest301040
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d \n", " a 7 77 275 \n", " b 15.0 125.0 505.5 \n", @@ -1595,7 +1595,7 @@ " latest 30 10 40 \n" ] }, - "execution_count": 36, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1612,7 +1612,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 36, "metadata": { "collapsed": false }, @@ -1620,11 +1620,11 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72546640 rows: 8 cols: 4
acdb
a011110
b-2222440
c-66339933
d-132441760
e-220552750
f-3306639688
g-46277539
latest-30104020
" + "
Daru::DataFrame:25748140 rows: 8 cols: 4
acdb
a011110
b-2222440
c-66339933
d-132441760
e-220552750
f-3306639688
g-46277539
latest-30104020
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a c d b \n", " a 0 11 11 0 \n", " b -22 22 44 0 \n", @@ -1636,7 +1636,7 @@ " latest -30 10 40 20 \n" ] }, - "execution_count": 37, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -1664,7 +1664,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 37, "metadata": { "collapsed": false }, @@ -1672,11 +1672,11 @@ { "data": { "text/html": [ - "
Daru::Vector:73197120 size: 4
nil
a9
c99
d495
b121
" + "
Daru::Vector:24585460 size: 4
nil
a9
c99
d495
b121
" ], "text/plain": [ "\n", - "#\n", + "#\n", " nil\n", " a 9\n", " c 99\n", @@ -1684,7 +1684,7 @@ " b 121\n" ] }, - "execution_count": 38, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1699,7 +1699,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 38, "metadata": { "collapsed": false }, @@ -1707,11 +1707,11 @@ { "data": { "text/html": [ - "
Daru::Vector:73133890 size: 8
nil
a1
b24
c69
d136
e225
f336
g469
latest60
" + "
Daru::Vector:24120620 size: 8
nil
a1
b24
c69
d136
e225
f336
g469
latest60
" ], "text/plain": [ "\n", - "#\n", + "#\n", " nil\n", " a 1\n", " b 24\n", @@ -1723,7 +1723,7 @@ "latest 60\n" ] }, - "execution_count": 39, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1749,7 +1749,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 39, "metadata": { "collapsed": false }, @@ -1757,11 +1757,11 @@ { "data": { "text/html": [ - "
Daru::Vector:72958340 size: 8
nil
a23
b68
c135
d224
e335
f468
g623
latest80
" + "
Daru::Vector:23802280 size: 8
nil
a23
b68
c135
d224
e335
f468
g623
latest80
" ], "text/plain": [ "\n", - "#\n", + "#\n", " nil\n", " a 23\n", " b 68\n", @@ -1773,7 +1773,7 @@ "latest 80\n" ] }, - "execution_count": 40, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -1790,12 +1790,12 @@ "\n", "Daru::DataFrame offers a robust `#sort` function which can be used for hierarchically sorting the Vectors in the DataFrame.\n", "\n", - "Heres an example to demonstrate a lot of the options:" + "Here are couple of examples to demonstrate a lot of the options:" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 40, "metadata": { "collapsed": false }, @@ -1803,20 +1803,20 @@ { "data": { "text/html": [ - "
Daru::DataFrame:72645290 rows: 5 cols: 3
abc
0g11This
1g4dataframe
2g335is
3sort32for
4this11sorting
" + "
Daru::DataFrame:23409420 rows: 5 cols: 3
abc
0g4This
1g4dataframe
2g335is
3sort32for
4this11sorting
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c \n", - " 0 g 11 This \n", + " 0 g 4 This \n", " 1 g 4 dataframe \n", " 2 g 335 is \n", " 3 sort 32 for \n", " 4 this 11 sorting \n" ] }, - "execution_count": 42, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1829,9 +1829,24 @@ " })" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Array passed as an argument to 'sort' tells the method the order\n", + "in which preference of sorting should be given to each Vector.\n", + "\n", + "The **:ascending** option will tell DataFrame the order in which you want\n", + "the Vectors to be sorted. *true* for ascending sort and *false* for \n", + "descending sort.\n", + "\n", + "The **:by** option lets you define a custom attribute for each vector to sort by.\n", + "This works similarly to passing a block to Array#sort_by." + ] + }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 41, "metadata": { "collapsed": false }, @@ -1839,36 +1854,204 @@ { "data": { "text/html": [ - "
Daru::DataFrame:70223790 rows: 5 cols: 3
abc
2g335is
0g11This
1g4dataframe
3sort32for
4this11sorting
" + "
Daru::DataFrame:22869300 rows: 5 cols: 3
abc
2g335is
0g4This
1g4dataframe
3sort32for
4this11sorting
" ], "text/plain": [ "\n", - "#\n", + "#\n", " a b c \n", " 2 g 335 is \n", - " 0 g 11 This \n", + " 0 g 4 This \n", " 1 g 4 dataframe \n", " 3 sort 32 for \n", " 4 this 11 sorting \n" ] }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort([:a,:b,:c], ascending: [true, false, true], by: {c: lambda { |a| a.size }})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Additional examples" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sort a dataframe with a vector sequence. " + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Daru::DataFrame:22471140 rows: 5 cols: 2
ab
213
015
322
124
431
" + ], + "text/plain": [ + "\n", + "#\n", + " a b \n", + " 2 1 3 \n", + " 0 1 5 \n", + " 3 2 2 \n", + " 1 2 4 \n", + " 4 3 1 \n" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = Daru::DataFrame.new({a: [1,2,1,2,3], b: [5,4,3,2,1]})\n", + "\n", + "df.sort [:a, :b]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sort a dataframe without a block. Here nils will be handled automatically and appear at top. " + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Daru::DataFrame:22025680 rows: 5 cols: 2
ab
13
31
0-34
2-12
454
" + ], + "text/plain": [ + "\n", + "#\n", + " a b \n", + " 1 nil 3 \n", + " 3 nil 1 \n", + " 0 -3 4 \n", + " 2 -1 2 \n", + " 4 5 4 \n" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = Daru::DataFrame.new({a: [-3,nil,-1,nil,5], b: [4,3,2,1,4]})\n", + "\n", + "df.sort([:a])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sort a dataframe with a block with nils handled automatically. " + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Daru::DataFrame:19612940 rows: 6 cols: 2
ab
21
51
4-1x
1-1aa
0aaa
3baaa
" + ], + "text/plain": [ + "\n", + "#\n", + " a b \n", + " 2 1 nil \n", + " 5 1 nil \n", + " 4 -1 x \n", + " 1 -1 aa \n", + " 0 nil aaa \n", + " 3 nil baaa \n" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = Daru::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })\n", + "\n", + "# df.sort [:b], by: {b: lambda { |a| a.length } }\n", + "# This would give \"NoMethodError: undefined method `length' for nil:NilClass\"\n", + "\n", + "# Instead you could do the following if you want the nils to be handled automatically\n", + "df.sort [:b], by: {b: lambda { |a| a.length } }, handle_nils: true" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sort a dataframe with a block with nils handled manually. " + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
Daru::DataFrame:19202680 rows: 6 cols: 2
ab
4-1x
1-1aa
0aaa
3baaa
21
51
" + ], + "text/plain": [ + "\n", + "#\n", + " a b \n", + " 4 -1 x \n", + " 1 -1 aa \n", + " 0 nil aaa \n", + " 3 nil baaa \n", + " 2 1 nil \n", + " 5 1 nil \n" + ] + }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# The Array passed as an argument to 'sort' tells the method the order\n", - "# in which preference of sorting should be given to each Vector.\n", - "# \n", - "# The `:ascending` option will tell DataFrame the order in which you want\n", - "# the Vectors to be sorted. *true* for ascending sort and *false* for \n", - "# descending sort.\n", - "# \n", - "# The `:by` option lets you define a custom <=> operator for each vector.\n", - "# This works similarly to passing a block to Array#sort.\n", + "df = Daru::DataFrame.new({a: [nil,-1,1,nil,-1,1], b: ['aaa','aa',nil,'baaa','x',nil] })\n", "\n", - "df.sort([:a,:b,:c], ascending: [true, false, true], by: {c: lambda { |a,b| a.size <=> b.size }})" + "# To print nils at the bottom one can use lambda { |a| (a.nil?)[1]:[0,a.length] }\n", + "df.sort [:b], by: {b: lambda { |a| (a.nil?)?[1]:[0,a.length] } }, handle_nils: true" ] } ], @@ -1879,7 +2062,7 @@ "name": "ruby" }, "language_info": { - "file_extension": "rb", + "file_extension": ".rb", "mimetype": "application/x-ruby", "name": "ruby", "version": "2.2.1" diff --git a/Data Analysis/data/sales-funnel.csv b/Data Analysis/data/sales-funnel.csv new file mode 100644 index 0000000..2c58dff --- /dev/null +++ b/Data Analysis/data/sales-funnel.csv @@ -0,0 +1,18 @@ +Account,Name,Rep,Manager,Product,Quantity,Price,Status +714466,Trantow-Barrows,Craig Booker,Debra Henley,CPU,1,30000,presented +714466,Trantow-Barrows,Craig Booker,Debra Henley,Software,1,10000,presented +714466,Trantow-Barrows,Craig Booker,Debra Henley,Maintenance,2,5000,pending +737550,"Fritsch, Russel and Anderson",Craig Booker,Debra Henley,CPU,1,35000,declined +146832,Kiehn-Spinka,Daniel Hilton,Debra Henley,CPU,2,65000,won +218895,Kulas Inc,Daniel Hilton,Debra Henley,CPU,2,40000,pending +218895,Kulas Inc,Daniel Hilton,Debra Henley,Software,1,10000,presented +412290,Jerde-Hilpert,John Smith,Debra Henley,Maintenance,2,5000,pending +740150,Barton LLC,John Smith,Debra Henley,CPU,1,35000,declined +141962,Herman LLC,Cedric Moss,Fred Anderson,CPU,2,65000,won +163416,Purdy-Kunde,Cedric Moss,Fred Anderson,CPU,1,30000,presented +239344,Stokes LLC,Cedric Moss,Fred Anderson,Maintenance,1,5000,pending +239344,Stokes LLC,Cedric Moss,Fred Anderson,Software,1,10000,presented +307599,"Kassulke, Ondricka and Metz",Wendy Yule,Fred Anderson,Maintenance,3,7000,won +688981,Keeling LLC,Wendy Yule,Fred Anderson,CPU,5,100000,won +729833,Koepp Ltd,Wendy Yule,Fred Anderson,CPU,2,65000,declined +729833,Koepp Ltd,Wendy Yule,Fred Anderson,Monitor,2,5000,presented diff --git a/Data Analysis/data/test_xls.xls b/Data Analysis/data/test_xls.xls new file mode 100644 index 0000000..043890d Binary files /dev/null and b/Data Analysis/data/test_xls.xls differ