diff --git a/n122-chi-square-test/n122a-chi-square-test.ipynb b/n122-chi-square-test/n122a-chi-square-test.ipynb new file mode 100644 index 0000000..3d1aefb --- /dev/null +++ b/n122-chi-square-test/n122a-chi-square-test.ipynb @@ -0,0 +1,621 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "n122a-chi-square-test.ipynb", + "provenance": [], + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.2" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rQf3dSuc5PqO" + }, + "source": [ + "\n", + "\n", + "## *DATA SCIENCE / SECTION 1 / SPRINT 2 / NOTE 2*\n", + "\n", + "# ๐Ÿ“ Assignment" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zIldCo3R71wd" + }, + "source": [ + "# ์นด์ด์ œ๊ณฑ๊ฒ€์ •" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "11OzdxWTM7UR" + }, + "source": [ + "\n", + "## 1. ๊ณต๊ณต๋ฐ์ดํ„ฐ ์กฐ์ž‘ ์—ฐ์Šต. \n", + "\n", + "๊ตญ๊ฐ€ ํ†ต๊ณ„ ํฌํ„ธ์—์„œ ์•„๋ž˜ ์ด๋ฏธ์ง€๋ฅผ ์ฐธ์กฐํ•˜์—ฌ\n", + "\n", + "\n", + "\n", + "**2020๋…„ 8์›”**์— ํ•ด๋‹นํ•˜๋Š” ๊ทœ๋ชจ๋ณ„ ๋ฏธ๋ถ„์–‘ํ˜„ํ™ฉ ์— ๋Œ€ํ•œ ๋ฐ์ดํ„ฐ์…‹์„ ์ƒ์„ฑํ•˜์„ธ์š”. \n", + "\n", + "\n", + "\n", + "- `60m์ดํ•˜`, `60~85m`, `85m์ดˆ๊ณผ`์˜ 3๊ฐœ ๊ทœ๋ชจ์™€ (column)\n", + "- `์„œ์šธ`, `๋Œ€์ „`, `๋Œ€๊ตฌ`, `๋ถ€์‚ฐ` 4๊ฐœ์˜ ์ง€์—ญ์„ ํฌํ•จํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค. (row)\n", + "- `๋ฏผ๊ฐ„๋ถ€๋ฌธ`๋งŒ ํฌํ•จํ•ฉ๋‹ˆ๋‹ค\n", + "- ๋ฐ์ดํ„ฐ๊ฐ€ ์—†๋Š” ๊ฒฝ์šฐ๋Š” 0์œผ๋กœ ์ฒ˜๋ฆฌํ•˜์„ธ์š”.\n", + "\n", + "์ดํ›„ ๋ฐ์ดํ„ฐ์…‹์„ colab์œผ๋กœ ๋ถˆ๋Ÿฌ์˜ค์„ธ์š”. ์ด๋•Œ ๋ณ€์ˆ˜์˜ ์ด๋ฆ„์€ `df`๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "iZGiUmi2EuGZ" + }, + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from scipy import stats" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 203 + }, + "id": "M4iwxM5FEI__", + "outputId": "220a9be8-505d-4a8f-aacc-998eba7eefbb" + }, + "source": [ + "df = pd.read_csv('/content/๊ทœ๋ชจ๋ณ„_๋ฏธ๋ถ„์–‘ํ˜„ํ™ฉ_20210716162818.csv', encoding='euc-kr')\n", + "df.columns = ['์‹œ๋„', '๋ถ€๋ฌธ', '๊ทœ๋ชจ', '2020_08']\n", + "\n", + "df= df.pivot_table ( index = '์‹œ๋„' , columns = '๊ทœ๋ชจ' , values = '2020_08', aggfunc='sum')\n", + "df = df.rename(columns = {'60ใŽก์ดํ•˜' : '~60ใŽก', '85ใŽก์ดˆ๊ณผ' :'85ใŽก~'})\n", + "df[['60โˆผ85ใŽก',\t'~60ใŽก',\t'85ใŽก~']] = df[['60โˆผ85ใŽก',\t'~60ใŽก',\t'85ใŽก~']].apply(pd.to_numeric)\n", + "col0, col1, col2 =[df.columns[0]] , [df.columns[1]] , [df.columns[2]]\n", + "new_col=col1+col0+col2\n", + "df=df[new_col]\n", + "df" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
๊ทœ๋ชจ~60ใŽก60โˆผ85ใŽก85ใŽก~
์‹œ๋„
๋Œ€๊ตฌ143143744
๋Œ€์ „78210
๋ถ€์‚ฐ577735142
์„œ์šธ5420
\n", + "
" + ], + "text/plain": [ + "๊ทœ๋ชจ ~60ใŽก 60โˆผ85ใŽก 85ใŽก~\n", + "์‹œ๋„ \n", + "๋Œ€๊ตฌ 143 1437 44\n", + "๋Œ€์ „ 782 1 0\n", + "๋ถ€์‚ฐ 577 735 142\n", + "์„œ์šธ 54 2 0" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 6 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "v2u3PjaFNE5T", + "outputId": "81068ff5-06f3-4b2d-f121-77aa9ee3bcf5" + }, + "source": [ + "df.shape" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(4, 3)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 7 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jp0lVlW4OJ6u", + "outputId": "6f77b463-bbef-43a1-ecf0-b59e315587c5" + }, + "source": [ + "df.sum().sum()" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "3917" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 8 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RXPZZuAi3ccI" + }, + "source": [ + "## 2. ์ง€์—ญ์— ๋Œ€ํ•ด์„œ one-sample chi-square test๋ฅผ ์‹คํ–‰, ํ•ด๋‹น ๊ฒฐ๊ณผ๋ฅผ `chi1`์— ์ €์žฅ ํ›„ ์„ค๋ช…ํ•ด๋ณด์„ธ์š”.\n", + "\n", + "์˜ˆ์‹œ) ๋งŒ์•ฝ **9์›”๋‹ฌ ๋ฐ์ดํ„ฐ**๋ฅผ ๊ธฐ์ค€์œผ๋กœ ํ•œ๋‹ค๋ฉด\n", + "```python\n", + " [52+2+0, 590+665+142, 113+1061+42, 772+1+0]\n", + "```\n", + "์„ ๋น„๊ต ํ•˜๊ฒŒ ๋  ๊ฒƒ์ž…๋‹ˆ๋‹ค.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "oIMzAkXks-Sv", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5724b5d3-a961-4feb-974d-01aa6ddb68a9" + }, + "source": [ + "# scipy ์‚ฌ์šฉ\n", + "from scipy.stats import chisquare\n", + "obs = df.sum(axis=1)\n", + "chi1 = chisquare(obs)[0]\n", + "print(f'chi1 = {chi1:}')" + ], + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "text": [ + "chi1 = 1564.4572376818994\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Kuk3GOQEPn-i", + "outputId": "f639bac7-3393-439b-ee3a-5e35882f744e" + }, + "source": [ + "obs" + ], + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "์‹œ๋„\n", + "๋Œ€๊ตฌ 1624\n", + "๋Œ€์ „ 783\n", + "๋ถ€์‚ฐ 1454\n", + "์„œ์šธ 56\n", + "dtype: int64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 14 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tUfYf21fNFqm" + }, + "source": [ + "## Ho : ์ง€์—ญ์— ๋”ฐ๋ผ ๋ฏธ๋ถ„์–‘ ์ˆ˜๋Š” ๋…๋ฆฝ์ ์ด๋‹ค.\n", + " \n", + "\n", + "* ์ง€์—ญ์— ์ƒ๊ด€์—†์ด ๊ณตํ‰ํ•˜๊ฒŒ ๋‚˜์˜ฌ ๊ฒƒ์ด๋‹ค.\n", + "* p-value = 0 < 0.05, ๊ธฐ๊ฐ\n", + "\n", + "\n", + "## H1 : ์ง€์—ญ์— ๋”ฐ๋ผ ๋ฏธ๋ถ„์–‘ ์ˆ˜๋Š” ๋…๋ฆฝ์ ์ด์ง€ ์•Š๋‹ค.\n", + "\n", + "* ์ง€์—ญ์— ์ƒ๊ด€์—†์ด ๊ณตํ‰ํ•˜๊ฒŒ ๋‚˜์˜ค์ง€ ์•Š์„ ๊ฒƒ์ด๋‹ค.\n", + "* p-value = 0 < 0.05, ์ฑ„ํƒ\n", + "\n", + "\n", + "## ๊ฒฐ๋ก  : ์ง€์—ญ๋ณ€ ๋ฏธ๋ถ„์–‘ ์ˆ˜๋Š” ์ง€์—ญ๊ณผ ์—ฐ๊ด€์„ฑ์ด ์žˆ์„๊ฒƒ์ด๋‹ค.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eMDtugVNRu0q" + }, + "source": [ + "## 3. ์ง€์—ญ๊ณผ ๊ทœ๋ชจ์— ๋Œ€ํ•ด์„œ two-sample chi-square test๋ฅผ ์‹คํ–‰, ํ•ด๋‹น ๊ฒฐ๊ณผ๋ฅผ `chi2`์— ์ €์žฅ ํ›„ ์„ค๋ช…ํ•ด๋ณด์„ธ์š”.\n", + "\n", + "์˜ˆ์‹œ) **9์›”๋‹ฌ ๋ฐ์ดํ„ฐ**๋ฅผ ๊ธฐ์ค€์œผ๋กœ ํ•œ๋‹ค๋ฉด\n", + "\n", + "| |-60 | 60-85 | 85- |\n", + "|:-:|:-:|:-:|:-:|\n", + "|์„œ์šธ|52|2|0|\n", + "|๋Œ€์ „|772|1|0|\n", + "|๋Œ€๊ตฌ|113|1061|42|\n", + "|๋ถ€์‚ฐ|590|665|142|\n", + "\n", + "์— ๋Œ€ํ•ด์„œ ๊ฒ€์ •ํ•ด์•ผ ํ•  ๊ฒ๋‹ˆ๋‹ค.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Ckcr4A4FM7cs", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1239ea12-463d-4a8b-b663-cf423b9f9346" + }, + "source": [ + "from scipy.stats import chi2_contingency\n", + "chi2 = chi2_contingency(df) #df -> ์ด๋ฏธ crosstab์ด ๋˜์–ด์žˆ๋„ค.\n", + "chi2\n", + "print(f' chi2 = {chi2[0]}\\n p_value = {chi2[1]}\\n ์ž์œ ๋„ = {chi2[2]}\\n array = \\n{chi2[3]}')" + ], + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "text": [ + " chi2 = 2064.5767314171994\n", + " p_value = 0.0\n", + " ์ž์œ ๋„ = 6\n", + " array = \n", + "[[645.12228746 901.76155221 77.11616033]\n", + " [311.04110288 434.77789124 37.18100587]\n", + " [577.59101353 807.36533061 69.04365586]\n", + " [ 22.24559612 31.09522594 2.65917794]]\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aI02BaXljhJy" + }, + "source": [ + "## H0 : ๋ฏธ๋ถ„์–‘ ํ˜„ํ™ฉ์— ๋Œ€ํ•ด ์ง€์—ญ๊ณผ ๊ทœ๋ชจ๋Š” ์—ฐ๊ด€์„ฑ์ด ์—†๋‹ค\n", + "## H1 : ๋ฏธ๋ถ„์–‘ ํ˜„ํ™ฉ์— ๋Œ€ํ•ด ์ง€์—ญ๊ณผ ๊ทœ๋ชจ๋Š” ์—ฐ๊ด€์„ฑ์ด ์žˆ๋‹ค\n", + "\n", + " * ๊ฒ€์ •๊ฒฐ๊ณผ, p-value๋Š” 0.0์œผ๋กœ ์œ ์˜์ˆ˜์ค€ 95%ํ•˜์—์„œ ์œ ์˜ํ™•๋ฅ  0.05๋ณด๋‹ค ์ž‘์€\n", + " ๊ฐ’์ด๋‹ค. ๋”ฐ๋ผ์„œ ๊ท€๋ฌด๊ฐ€์„ค์„ ๊ธฐ๊ฐํ•œ๋‹ค. \n", + " ์ฆ‰, ๋ฏธ๋ถ„์–‘ ํ˜„ํ™ฉ์— ์žˆ์–ด ์ง€์—ญ๊ณผ ๊ทœ๋ชจ๋Š” ์—ฐ๊ด€์„ฑ์ด ์—†๋‹ค๊ณ  ๋ณผ ์ˆ˜ ์—†๋‹ค.`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Tf7XPpBP6YqX" + }, + "source": [ + "## 4. 2๋ฒˆ์— ๋Œ€ํ•ด์„œ NumPy ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ (Scipy๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ์•Š๊ณ ) $\\chi^2$ test ์‹œํ–‰ ํ›„ 2๋ฒˆ์˜ ๊ฒฐ๊ณผ์™€ ๋น„๊ตํ•ด๋ณด์„ธ์š”. \n", + "\n", + "- `obs`, `exp`, `chi`๋ผ๋Š” ๋ณ€์ˆ˜๋ฅผ ์‚ฌ์šฉํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "OjFt0b1-wrFL", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "bf5faf2f-79ad-4eca-dec0-cca2cfd7c03c" + }, + "source": [ + "# numpy์‚ฌ์šฉ\n", + "obs = df.sum(axis = 1)\n", + "exp = np.sum(obs) / len(obs)\n", + "chi_squared = ((obs - exp)**2) / exp\n", + "chi = chi_squared.sum()\n", + "p_value = 1 - stats.chi.cdf(chi, df = (4-1))\n", + "print(f'chi = {chi:}' ,f'p_value = {p_value:}')" + ], + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "text": [ + "chi = 1564.4572376818994 p_value = 0.0\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "d3rO4-ROP5sS", + "outputId": "b104b951-b6cf-48f6-a19a-fb2206269b82" + }, + "source": [ + "exp" + ], + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "979.25" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 18 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a2BrKHq8m8AS" + }, + "source": [ + "## ๐Ÿ”ฅ ๋„์ „ ๊ณผ์ œ\n", + "\n", + "์•„๋ž˜ ์„ธ๊ฐ€์ง€๋ฅผ ๋‹คํ•˜๋ฉด ๋ฉ๋‹ˆ๋‹ค.\n", + "\n", + "### 1. Function\n", + "\n", + "4๋ฒˆ์—์„œ ์‚ฌ์šฉํ•œ one sample chisquare test๋ฅผ ํ•จ์ˆ˜์˜ ํ˜•ํƒœ๋กœ ๋ณ€๊ฒฝํ•˜์„ธ์š”." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "AntnuAkFm-JQ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6525278f-5b07-4f47-c553-b77776eccee4" + }, + "source": [ + "v1 = [18,22,20,15,23,22]\n", + "v2 = [5,23,26,19,24,23]\n", + "\n", + "def myChisq(value):\n", + " import numpy as np\n", + " from scipy import stats\n", + " obs = value # v1, v2\n", + " exp = np.sum(obs) / len(obs) # ๊ธฐ๋Œ€๊ฐ’\n", + " chi_squared = ((obs-exp)**2) / exp # x์ œ๊ณฑ\n", + " chi = chi_squared.sum() # chisqure\n", + " p_value = 1 - stats.chi2.cdf(chi, df = len(value)-1) # p_value\n", + " return print(f'value = {value}, chisquare = {chi} , p_value = {p_value}') \n", + "\n", + "myChisq(v1) \n", + "myChisq(v2)\n", + "\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "value = [18, 22, 20, 15, 23, 22], chisquare = 2.3000000000000003 , p_value = 0.8062668698851285\n", + "value = [5, 23, 26, 19, 24, 23], chisquare = 14.8 , p_value = 0.011251979028327308\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4ohsJhQUmEuS" + }, + "source": [ + "### 2. ANOVA\n", + "\n", + "์•„๋ž˜ ๋งํฌ๋ฅผ ์ฐธ์กฐํ•˜์—ฌ ANOVA ์— ๋Œ€ํ•œ ๊ธ€์„ ์ฝ๊ณ \n", + "\n", + "\n", + "\n", + "๋‹ค์Œ `4๊ฐœ ๊ทธ๋ฃน์— ๋Œ€ํ•ด์„œ ํ‰๊ท ์˜ ์ฐจ์ด๊ฐ€ ์žˆ๋Š”์ง€`์— ๋Œ€ํ•œ ๊ฐ€์„ค ๊ฒ€์ •์„ ์‹œํ–‰ํ•˜์„ธ์š”.\n", + "\n", + "A : `38 33 35 92 76 97 88 41 11 9`\n", + "\n", + "B : `18 52 62 48 30 40 87 12 97 82`\n", + "\n", + "C : `28 90 5 49 66 73 96 80 4 17`\n", + "\n", + "D : ` 8 99 4 12 7 64 18 10 9 20`\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DMebi5_4UaSn" + }, + "source": [ + "## H0 : 4๊ฐœ์˜ ๊ทธ๋ฃน์— ๋Œ€ํ•ด์„œ ํ‰๊ท ์˜ ์ฐจ์˜๊ฐ€ ๋‚˜์ง€ ์•Š๋Š”๋‹ค\n", + "## H1 : 4๊ฐœ์˜ ๊ทธ๋ฃน์— ๋Œ€ํ•ด์„œ ํ‰๊ท ์˜ ์ฐจ์ด๊ฐ€ ๋‚œ๋‹ค." + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AgkfC51fqLEZ", + "outputId": "8408a80d-2c9e-48ba-c62c-91fa9032247d" + }, + "source": [ + "A = [38,33,35,92,76,97,88,41,11,9]\n", + "B = [18,52,62,48,30,40,87,12,97,82]\n", + "C = [28,90,5,49,66,73,96,80,4,17]\n", + "D = [8,99,4,12,7,64,18,10,9,20]\n", + "\n", + "F_statistic, pVal = stats.f_oneway(A,B,C,D)\n", + "print(f'๋ฐ์ดํ„ฐ์˜ ์ผ์›๋ถ„์‚ฐ๋ถ„์„ ๊ฒฐ๊ณผ : F={F_statistic}, p={pVal}')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "๋ฐ์ดํ„ฐ์˜ ์ผ์›๋ถ„์‚ฐ๋ถ„์„ ๊ฒฐ๊ณผ : F=1.7249594239128412, p=0.17920877113948797\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k2vuHNLu1niX" + }, + "source": [ + "## ๊ฒฐ๊ณผ\n", + "* P-value>0.05 ์ด๋ฏ€๋กœ ๊ท€๋ฌด๊ฐ€์„ค ๊ธฐ๊ฐ ๋Œ€๋ฆฝ๊ฐ€์„ค ์ฑ„ํƒ" + ] + } + ] +} \ No newline at end of file diff --git a/n211-simple-regression/n211a-simple-regression.ipynb b/n211-simple-regression/n211a-simple-regression.ipynb new file mode 100644 index 0000000..3c0b046 --- /dev/null +++ b/n211-simple-regression/n211a-simple-regression.ipynb @@ -0,0 +1,201 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "colab": { + "name": "n211a-simple-regression.ipynb", + "provenance": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Q6G77lsPMjS3" + }, + "source": [ + "\n", + "\n", + "## *DATA SCIENCE / SECTION 2 / SPRINT 1 / NOTE 1*\n", + "\n", + "# ๐Ÿ“ Assignment\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7IXUfiQ2UKj6" + }, + "source": [ + "# Linear Regression\n", + "\n", + "์ด๋ฒˆ ๋ชจ๋“ˆ์—์„œ ์‚ฌ์šฉํ•œ ๋ฐ์ดํ„ฐ์™€ ์œ ์‚ฌํ•œ ๋ฐ์ดํ„ฐ๋ฅผ ์‚ฌ์šฉํ•ด ๋ณต์Šตํ•ด ๋ณด๊ฒ ์Šต๋‹ˆ๋‹ค. ์ด ๋ฐ์ดํ„ฐ์…‹์€ ๋ฏธ๊ตญ ์‹œ์• ํ‹€ King County ์ง€์—ญ์—์„œ 2014๋…„ 5์›”๋ถ€ํ„ฐ ~ 2015๋…„ 5์›” ๊นŒ์ง€ ์ฃผํƒ ํŒ๋งค ๊ฐ€๊ฒฉ ๋ฐ์ดํ„ฐ์ž…๋‹ˆ๋‹ค.\n", + "\n", + " - [House Sales in King County, USA](https://www.kaggle.com/harlfoxem/housesalesprediction?select=kc_house_data.csv)\n", + "\n", + "\n", + "์ด ๋ฐ์ดํ„ฐ๋ฅผ ์‚ฌ์šฉํ•ด ๋‹ค์Œ ๊ณผ์ œ๋ฅผ ๋งˆํฌ๋‹ค์šด๊ณผ ์ฝ”๋“œ๋ฅผ ์‚ฌ์šฉํ•ด ์ง„ํ–‰ํ•ด ์ฃผ์„ธ์š”." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "frBW3-iQMjS6" + }, + "source": [ + "# ๋ฐ์ดํ„ฐ๋Š” ์บ๊ธ€์—์„œ ๋ฐ›์œผ์…”๋„ ๋˜๊ณ  ์ด ๋งํฌ์—์„œ ๋ถˆ๋Ÿฌ์™€๋„ ๋ฉ๋‹ˆ๋‹ค.\n", + "import pandas as pd\n", + "df = pd.read_csv('https://ds-lecture-data.s3.ap-northeast-2.amazonaws.com/kc_house_data/kc_house_data.csv')\n" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "k4uUYX0WMjS7" + }, + "source": [ + "### 1) ํŠน์„ฑ๋“ค๊ณผ ํƒ€๊ฒŸ(Price)๊ณผ์˜ ์ƒ๊ด€๊ณ„์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•˜๊ณ  ๊ฐ€์žฅ ์ƒ๊ด€๊ด€๊ณ„๊ฐ€ ๋†’์€ ํŠน์„ฑ์„ ์ฐพ์•„ ํƒ€๊ฒŸ๊ณผ์˜ ๊ด€๊ณ„๋ฅผ ์‹œ๊ฐํ™” ํ•˜์„ธ์š”.\n", + "- **ํƒ€๊ฒŸ๊ณผ ์ƒ๊ด€๊ด€๊ณ„๊ฐ€ ๊ฐ€์žฅ ๋†’์€ ํŠน์„ฑ์˜ ์ด๋ฆ„์„ ๊ณผ์ œ ์ œ์ถœํผ์— ์ œ์ถœํ•˜์„ธ์š”.**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "lvvTEiWgMjS8" + }, + "source": [ + "### ์ด๊ณณ์—์„œ ๊ณผ์ œ๋ฅผ ์ง„ํ–‰ํ•ด ์ฃผ์„ธ์š” ###" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QSlnVioSMjS8" + }, + "source": [ + "### 2) grade ์™€ price์˜ scatter plot์„ ๊ทธ๋ฆฌ๊ณ  ๊ธฐ์ค€๋ชจ๋ธ์„ ์‹œ๊ฐํ™” ํ•ด ๋ณด์„ธ์š”.\n", + "- **๊ธฐ์ค€๋ชจ๋ธ์˜ ๊ฐ’์„ ๊ณผ์ œ ์ œ์ถœํผ์— ์ œ์ถœํ•˜์„ธ์š”.**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kSO49zGXMjS9" + }, + "source": [ + "### ์ด๊ณณ์—์„œ ๊ณผ์ œ๋ฅผ ์ง„ํ–‰ํ•ด ์ฃผ์„ธ์š” ###" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eGF8rIzZMjS9" + }, + "source": [ + "### 3) Scikit-Learn ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•ด ํŠน์„ฑ grade์— ๋Œ€ํ•œ ์„ ํ˜•ํšŒ๊ท€๋ชจ๋ธ์„ ๋งŒ๋“ค์–ด ๋ณด์„ธ์š”.\n", + "- **grade ๊ฐ’์ด 6์ธ ๊ฒฝ์šฐ์— ๋ชจ๋ธ์ด ์˜ˆ์ธกํ•˜๋Š” ์ฃผํƒ ๊ฐ€๊ฒฉ์„ ๊ณผ์ œ ์ œ์ถœํผ์— ์ œ์ถœํ•˜์„ธ์š”.**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "n0fFNcV6MjS-" + }, + "source": [ + "### ์ด๊ณณ์—์„œ ๊ณผ์ œ๋ฅผ ์ง„ํ–‰ํ•ด ์ฃผ์„ธ์š” ###" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xrabCjNgMjS_" + }, + "source": [ + "### 4) ๋งŒ๋“  ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•ด ์ƒˆ๋กœ์šด ๋ฐ์ดํ„ฐ์— ๋Œ€ํ•ด ์˜ˆ์ธก์„ ํ•ด ๋ณด๊ณ  coefficient๋ฅผ ์‚ฌ์šฉํ•ด ์„ค๋ช…ํ•ด ๋ณด์„ธ์š”.\n", + "- **grade ํŠน์„ฑ์— ๋Œ€ํ•œ ํšŒ๊ท€๊ณ„์ˆ˜๋ฅผ ๊ณผ์ œ ์ œ์ถœํผ์— ์ œ์ถœํ•˜์„ธ์š”.**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1ENl5mBeMjTA" + }, + "source": [ + "### ์ด๊ณณ์—์„œ ๊ณผ์ œ๋ฅผ ์ง„ํ–‰ํ•ด ์ฃผ์„ธ์š” ###" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jmY0X2xMMjTA" + }, + "source": [ + "### 5) sqft_living ํŠน์„ฑ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ชจ๋ธ์„ ๋งŒ๋“ค์–ด ๋ณด๊ณ  ๋‘ ๋ชจ๋ธ์„ ๋น„๊ตํ•ด ๋ณด์„ธ์š”.\n", + "- **sqft_living์— ๋Œ€ํ•œ ํšŒ๊ท€๊ณ„์ˆ˜๋ฅผ ๊ณผ์ œ ์ œ์ถœํผ์— ์ œ์ถœํ•˜์„ธ์š”.**" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Y1t7D490MjTA" + }, + "source": [ + "### ์ด๊ณณ์—์„œ ๊ณผ์ œ๋ฅผ ์ง„ํ–‰ํ•ด ์ฃผ์„ธ์š” ###" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WTf8r5SNMjTB" + }, + "source": [ + "## ๐Ÿ”ฅ๋„์ „๊ณผ์ œ\n", + "\n", + "### 6) ๋‹จ์ˆœ์„ ํ˜•ํšŒ๊ท€๋ชจ๋ธ์„ ๋งŒ๋“ค๊ธฐ ์œ„ํ•œ ์ข‹์€ ํŠน์„ฑ์„ ์„ ํƒํ•˜๊ธฐ ์œ„ํ•ด ๊ณ ๋ คํ•ด์•ผ ํ•˜๋Š” ์‚ฌํ•ญ๋“ค์— ๋Œ€ํ•ด ์กฐ์‚ฌํ•˜๊ณ  ์„œ์ˆ ํ•˜์„ธ์š”.\n", + "\n", + "### 7) OLS์— ๋Œ€ํ•ด์„œ ๊ฒ€์ƒ‰ํ•ด ๋ณด๊ณ  ๋ณธ์ธ์˜ ์ดํ•ด๋ฅผ ๋…น์—ฌ๋‚ด์–ด 10๋ฌธ์žฅ ๋‚ด๋กœ ์š”์•ฝํ•ด ๋ณด์„ธ์š”." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "3Wx69QtfMjTB" + }, + "source": [ + "### ์ด๊ณณ์—์„œ ๊ณผ์ œ๋ฅผ ์ง„ํ–‰ํ•ด ์ฃผ์„ธ์š” ###" + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file diff --git a/n231-choose-your-ml-problems/n231a-choose-your-ml-problems.ipynb b/n231-choose-your-ml-problems/n231a-choose-your-ml-problems.ipynb new file mode 100644 index 0000000..7c5b96a --- /dev/null +++ b/n231-choose-your-ml-problems/n231a-choose-your-ml-problems.ipynb @@ -0,0 +1,160 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + }, + "colab": { + "name": "n231a-choose-your-ml-problems.ipynb", + "provenance": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "nCc3XZEyG3XV" + }, + "source": [ + "\n", + "\n", + "## *DATA SCIENCE / SECTION 2 / SPRINT 3 / NOTE 1*\n", + "\n", + "# ๐Ÿ“ Assignment\n", + "---\n", + "# Choose your ML problems\n", + "\n", + "์ด๋ฒˆ ์Šคํ”„๋ฆฐํŠธ์—์„œ๋Š” ํฌํŠธํด๋ฆฌ์˜ค ํ”„๋กœ์ ํŠธ๋ฅผ ์œ„ํ•ด ์ˆ˜๊ฐ•์ƒ ๊ฐ์ž๊ฐ€ ์ •ํ•œ ๋ฐ์ดํ„ฐ์…‹์„ ์‚ฌ์šฉํ•˜์—ฌ ๊ณผ์ œ๋ฅผ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค. ํ”„๋กœ์ ํŠธ๋ฅผ ์œ„ํ•œ ๋ฐ์ดํ„ฐ๋ฅผ ์ตœ์ข… ์„ ํƒํ•˜๊ธฐ ์ „์— ์—ฌ๋Ÿฌ ๊ฐ€๋Šฅํ•œ ๋ฐ์ดํ„ฐ์„ธํŠธ๋ฅผ ์‚ดํŽด๋ณด๊ณ  ๊ฐ„๋‹จํ•œ ๋ชจ๋ธ๊นŒ์ง€ ํ•™์Šตํ•ด ๋ณด๋Š” ๊ฒƒ์„ ์ถ”์ฒœ๋“œ๋ฆฝ๋‹ˆ๋‹ค." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "25Q5khpG3-cc" + }, + "source": [ + "### 1) ํƒ€๊ฒŸ์œผ๋กœ ์‚ฌ์šฉํ•  ํŠน์„ฑ์„ ์„ ํƒํ•ฉ๋‹ˆ๋‹ค.\n", + "- **(๊ฐ๊ด€์‹) ๋‹ค์Œ ์ค‘ ์„ค๋ช…์ด ์˜ฌ๋ฐ”๋ฅธ ํ•ญ๋ชฉ์„ ์„ ํƒํ•˜๊ณ  ๊ณผ์ œ ์ œ์ถœํผ์— ์ œ์ถœํ•˜์„ธ์š”.**\n", + " - ํŠน์„ฑ 3๊ฐœ๋ฅผ ์‚ฌ์šฉํ•ด ๊ฐ€์žฅ ์„ฑ๋Šฅ์ด ์ข‹์€ ๋ชจ๋ธ A๋ฅผ ๋งŒ๋“ค๊ณ  ๋˜ ํŠน์„ฑ 5๊ฐœ๋ฅผ ์‚ฌ์šฉํ•ด ์ตœ์ ์˜ ๋ชจ๋ธ B๋ฅผ ํ•™์Šตํ•˜์˜€์Šต๋‹ˆ๋‹ค. ์ด๋•Œ ๋ชจ๋ธ A์—์„œ ์„ ํƒ๋œ ํŠน์„ฑ์€ A1, A2, A3 ์ž…๋‹ˆ๋‹ค.\n", + " 1. ์ตœ์ ์˜ ๋ชจ๋ธ B์—๋Š” ํŠน์„ฑ A1, A2, A3๊ฐ€ ํ•ญ์ƒ ์„ ํƒ๋ฉ๋‹ˆ๋‹ค.\n", + " 2. ์ตœ์ ์˜ ๋ชจ๋ธ B ํ•™์Šต์‹œ ํŠน์„ฑ A1, A2, A3๋Š” ์ „ํ˜€ ์‚ฌ์šฉ๋˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.\n", + " 3. ์ตœ์ ์˜ ๋ชจ๋ธ B์— ํŠน์„ฑ A1, A2, A3๊ฐ€ ์„ ํƒ๋ ์ง€ ์•Œ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RobhoziJ3-cd" + }, + "source": [ + "### 3" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "16tT-UAI3-ce" + }, + "source": [ + "### 2) ํ’€๊ณ ์ž ํ•˜๋Š” ๋ฌธ์ œ๊ฐ€ ๋ถ„๋ฅ˜/ํšŒ๊ท€ ๋ฌธ์ œ์ธ์ง€ ์ •ํ•ฉ๋‹ˆ๋‹ค.\n", + "- **(๊ฐ๊ด€์‹) ๋‹ค์Œ ์„ค๋ช… ์ค‘ ๊ฐ€์žฅ ํ‹€๋ฆฐ ํ•ญ๋ชฉ์„ ์„ ํƒํ•˜๊ณ  ๊ณผ์ œ ์ œ์ถœํผ์— ์ œ์ถœํ•˜์„ธ์š”.**\n", + " 1. ํŠน์ •ํ•œ ์ข…๋ชฉ์˜ ๋‹ค์Œ๋‚  ์ฃผ์‹ ์ข…๊ฐ€๋ฅผ ์˜ˆ์ธกํ•˜๋Š” ๋ฌธ์ œ๋Š” Supervised Learning ๋ฌธ์ œ์ด๋‹ค.\n", + " 2. ๋‹ค์Œ๋‚  KOSPI ์ง€์ˆ˜๊ฐ€ ์ฆ๊ฐ€ํ• ์ง€ ๊ฐ์†Œํ• ์ง€ ์˜ˆ์ธกํ•˜๋Š” ๋ฌธ์ œ๋Š” Classification ๋ฌธ์ œ์ด๋‹ค.\n", + " 3. ์ถ•๊ตฌ ๊ฒฝ๊ธฐ ๋„์ค‘์— ์ŠนํŒจ๋ฅผ ์˜ˆ์ธกํ•˜๋Š” ๋ชจ๋ธ์„ ๋งŒ๋“ค๊ธฐ ์œ„ํ•ด์„œ๋Š” ์‹ค์‹œ๊ฐ„ ํ†ต๊ณ„ ์ •๋ณด์™€ ๊ฒฝ๊ธฐ๋งˆ๋‹ค ์ŠนํŒจ ๋ฐ์ดํ„ฐ๊ฐ€ ํ•„์š”ํ•˜๋‹ค.\n", + " 4. ์ด๋ฒˆ ์‹œ์ฆŒ ํ† ํŠธ๋„˜ ์„ ์ˆ˜๋“ค์˜ ๊ฐœ์ธ ํ†ต๊ณ„ ๋ฐ์ดํ„ฐ(์Š›, ๋„์›€, ...)๋ฅผ ์‚ฌ์šฉํ•ด์„œ ๋‹ค์Œ ํ† ํŠธ๋„˜ ๊ฒฝ๊ธฐ์˜ ์ŠนํŒจ๋ฅผ ์˜ˆ์ธกํ•  ์ˆ˜ ์žˆ๋‹ค.\n", + " " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "syw5kiS43-ce" + }, + "source": [ + "### 4" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uhMD-HUR3-ce" + }, + "source": [ + "### 3) ํƒ€๊ฒŸ์˜ ๋ถ„ํฌ๋ฅผ ๋ถ„์„ํ•˜๊ณ  ์‚ฌ์šฉํ•  ํ‰๊ฐ€์ง€ํ‘œ๋ฅผ ์„ ํƒํ•ฉ๋‹ˆ๋‹ค. ํ‰๊ฐ€์ง€ํ‘œ๋ฅผ ์„ ํƒํ•œ ์ด์œ ๋ฅผ ์„ค๋ช…ํ•˜์„ธ์š”.\n", + "- ๋ถ„๋ฅ˜: ํด๋ž˜์Šค๊ฐ€ ๋ช‡ ๊ฐœ์ธ์ง€? ํด๋ž˜์Šค ๋น„์œจ์ด ์–ด๋–ค์ง€(balance or imbalance)\n", + "- ํšŒ๊ท€: ํƒ€๊ฒŸ ๋ถ„ํฌ๊ฐ€ right-skewed ์ธ์ง€? log-transform์„ ์‚ฌ์šฉํ•  ๊ฒƒ์ธ์ง€?\n", + "- ๋ถ„๋ฅ˜: ๋‹ค์ˆ˜ ํด๋ž˜์Šค(majority class) ๋น„์œจ์ด 50%~70% ์ธ ๊ฒฝ์šฐ ์ •ํ™•๋„(accuracy)๋งŒ ์‚ฌ์šฉํ•ด๋„ ๋ฌด๋ฐฉํ•ฉ๋‹ˆ๋‹ค. ํ•˜์ง€๋งŒ ๋ฒ”์œ„๋ฅผ ๋„˜์–ด์„ค ๊ฒฝ์šฐ ์ •ํ™•๋„๋งŒ์„ ์‚ฌ์šฉํ•ด์„œ๋Š” ๋ชจ๋ธ์„ ์ž˜๋ชป ์ดํ•ดํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ์–ด๋–ค ํ‰๊ฐ€์ง€ํ‘œ๋ฅผ ์‚ฌ์šฉํ•ด์•ผ ํ• ๊นŒ์š”?\n", + "- ํšŒ๊ท€: MAE, RMSE, R^2 ๋“ฑ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.\n", + "- **(๊ฐ๊ด€์‹) ๊ฐ™์€ ๋ฐ์ดํ„ฐ๋กœ ์ŠคํŒธ์„ ์˜ˆ์ธกํ•˜๋Š” ๋ชจ๋ธ A์™€ B๋ฅผ ๋งŒ๋“ค์—ˆ์Šต๋‹ˆ๋‹ค. ๋‹ค์Œ ์„ค๋ช… ์ค‘ ๊ฐ€์žฅ ์˜ฌ๋ฐ”๋ฅธ ํ•ญ๋ชฉ์„ ์„ ํƒํ•˜๊ณ  ๊ณผ์ œ ์ œ์ถœํผ์— ์ œ์ถœํ•˜์„ธ์š”.**\n", + " 1. A์˜ ์žฌํ˜„์œจ์€ 80%์ด๊ณ  B์˜ ์žฌํ˜„์œจ์€ 60%์ผ ๋•Œ A๊ฐ€ ๋” ์ข‹์€ ๋ชจ๋ธ์ด๋‹ค.\n", + " 2. A์˜ ์ •๋ฐ€๋„๋Š” 80%์ด๊ณ  B์˜ ์ •๋ฐ€๋„๋Š” 60%์ผ ๋•Œ A๊ฐ€ ๋” ์ข‹์€ ๋ชจ๋ธ์ด๋‹ค.\n", + " 3. A๋Š” B๋ณด๋‹ค ์ •๋ฐ€๋„๋Š” ๋†’์ง€๋งŒ ์žฌํ˜„์œจ์€ ๋‚ฎ๋‹ค. A๊ฐ€ ๋” ์ข‹์€ ๋ชจ๋ธ์ด๋‹ค.\n", + " 4. A๋Š” B๋ณด๋‹ค ์žฌํ˜„์œจ์€ ๋†’์ง€๋งŒ, ์ •๋ฐ€๋„๋Š” ๋‚ฎ๋‹ค. A๊ฐ€ ๋” ์ข‹์€ ๋ชจ๋ธ์ด๋‹ค.\n", + " 5. A๋Š” B๋ณด๋‹ค ์ •๋ฐ€๋„, ์žฌํ˜„์œจ ๋ชจ๋‘ ๋†’๋‹ค. A๊ฐ€ ๋” ์ข‹์€ ๋ชจ๋ธ์ด๋‹ค." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "86r9VSpr3-cf" + }, + "source": [ + "### 5" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nQyvvRaI3-cf" + }, + "source": [ + "## ๐Ÿ”ฅ ๋„์ „๊ณผ์ œ" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m1VDTtbB3-cg" + }, + "source": [ + "### 4) ๋ฐ์ดํ„ฐ๋ฅผ ํ›ˆ๋ จ/๊ฒ€์ฆ/ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ๋กœ ๋‚˜๋ˆ„๊ณ  ์ „์ฒ˜๋ฆฌ๋ฅผ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค. ๊ทธ๋ฆฌ๊ณ  ๊ฐ„๋‹จํ•œ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•ด์„œ ํ•™์Šต์„ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค.\n", + "- ์ด์ƒ์น˜(outliers)๊ฐ€ ์žˆ๋‹ค๋ฉด ์–ด๋–ป๊ฒŒ ์ฒ˜๋ฆฌํ•  ๊ฒƒ์ธ์ง€ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค.\n", + "- ๋ฌด์ž‘์œ„๋กœ or ์‹œ๊ฐ„์— ๋”ฐ๋ผ ๋ฐ์ดํ„ฐ๋ฅผ ๋‚˜๋ˆŒ ๊ฒƒ์ธ์ง€ ์„ค๋ช…ํ•ฉ๋‹ˆ๋‹ค.\n", + "- ๋ชจ๋ธ ํ•™์Šต ๊ฒฐ๊ณผ๋ฅผ ๋ฆฌํฌํŒ… ํ•ฉ๋‹ˆ๋‹ค.\n", + "- ์ •๋ณด ๋ˆ„์ˆ˜(leakage)๊ฐ€ ์žˆ๋Š”์ง€ ํ™•์ธํ•ด ๋ด…๋‹ˆ๋‹ค." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "G-xH-VL83-ch" + }, + "source": [ + "### ์ด๊ณณ์—์„œ ๊ณผ์ œ๋ฅผ ์ง„ํ–‰ํ•ด ์ฃผ์„ธ์š” ### " + ], + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file