diff --git a/n122-chi-square-test/n122a-chi-square-test.ipynb b/n122-chi-square-test/n122a-chi-square-test.ipynb
new file mode 100644
index 0000000..3d1aefb
--- /dev/null
+++ b/n122-chi-square-test/n122a-chi-square-test.ipynb
@@ -0,0 +1,621 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "n122a-chi-square-test.ipynb",
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.2"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "rQf3dSuc5PqO"
+ },
+ "source": [
+ "
\n",
+ "\n",
+ "## *DATA SCIENCE / SECTION 1 / SPRINT 2 / NOTE 2*\n",
+ "\n",
+ "# ๐ Assignment"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zIldCo3R71wd"
+ },
+ "source": [
+ "# ์นด์ด์ ๊ณฑ๊ฒ์ "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "11OzdxWTM7UR"
+ },
+ "source": [
+ "\n",
+ "## 1. ๊ณต๊ณต๋ฐ์ดํฐ ์กฐ์ ์ฐ์ต. \n",
+ "\n",
+ "๊ตญ๊ฐ ํต๊ณ ํฌํธ์์ ์๋ ์ด๋ฏธ์ง๋ฅผ ์ฐธ์กฐํ์ฌ\n",
+ "\n",
+ "\n",
+ "\n",
+ "**2020๋
8์**์ ํด๋นํ๋ ๊ท๋ชจ๋ณ ๋ฏธ๋ถ์ํํฉ ์ ๋ํ ๋ฐ์ดํฐ์
์ ์์ฑํ์ธ์. \n",
+ "\n",
+ "
\n",
+ "\n",
+ "- `60m์ดํ`, `60~85m`, `85m์ด๊ณผ`์ 3๊ฐ ๊ท๋ชจ์ (column)\n",
+ "- `์์ธ`, `๋์ `, `๋๊ตฌ`, `๋ถ์ฐ` 4๊ฐ์ ์ง์ญ์ ํฌํจํด์ผํฉ๋๋ค. (row)\n",
+ "- `๋ฏผ๊ฐ๋ถ๋ฌธ`๋ง ํฌํจํฉ๋๋ค\n",
+ "- ๋ฐ์ดํฐ๊ฐ ์๋ ๊ฒฝ์ฐ๋ 0์ผ๋ก ์ฒ๋ฆฌํ์ธ์.\n",
+ "\n",
+ "์ดํ ๋ฐ์ดํฐ์
์ colab์ผ๋ก ๋ถ๋ฌ์ค์ธ์. ์ด๋ ๋ณ์์ ์ด๋ฆ์ `df`๋ฅผ ์ฌ์ฉํฉ๋๋ค.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "iZGiUmi2EuGZ"
+ },
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from scipy import stats"
+ ],
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 203
+ },
+ "id": "M4iwxM5FEI__",
+ "outputId": "220a9be8-505d-4a8f-aacc-998eba7eefbb"
+ },
+ "source": [
+ "df = pd.read_csv('/content/๊ท๋ชจ๋ณ_๋ฏธ๋ถ์ํํฉ_20210716162818.csv', encoding='euc-kr')\n",
+ "df.columns = ['์๋', '๋ถ๋ฌธ', '๊ท๋ชจ', '2020_08']\n",
+ "\n",
+ "df= df.pivot_table ( index = '์๋' , columns = '๊ท๋ชจ' , values = '2020_08', aggfunc='sum')\n",
+ "df = df.rename(columns = {'60ใก์ดํ' : '~60ใก', '85ใก์ด๊ณผ' :'85ใก~'})\n",
+ "df[['60โผ85ใก',\t'~60ใก',\t'85ใก~']] = df[['60โผ85ใก',\t'~60ใก',\t'85ใก~']].apply(pd.to_numeric)\n",
+ "col0, col1, col2 =[df.columns[0]] , [df.columns[1]] , [df.columns[2]]\n",
+ "new_col=col1+col0+col2\n",
+ "df=df[new_col]\n",
+ "df"
+ ],
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " ๊ท๋ชจ | \n",
+ " ~60ใก | \n",
+ " 60โผ85ใก | \n",
+ " 85ใก~ | \n",
+ "
\n",
+ " \n",
+ " ์๋ | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " ๋๊ตฌ | \n",
+ " 143 | \n",
+ " 1437 | \n",
+ " 44 | \n",
+ "
\n",
+ " \n",
+ " ๋์ | \n",
+ " 782 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " ๋ถ์ฐ | \n",
+ " 577 | \n",
+ " 735 | \n",
+ " 142 | \n",
+ "
\n",
+ " \n",
+ " ์์ธ | \n",
+ " 54 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "๊ท๋ชจ ~60ใก 60โผ85ใก 85ใก~\n",
+ "์๋ \n",
+ "๋๊ตฌ 143 1437 44\n",
+ "๋์ 782 1 0\n",
+ "๋ถ์ฐ 577 735 142\n",
+ "์์ธ 54 2 0"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 6
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "v2u3PjaFNE5T",
+ "outputId": "81068ff5-06f3-4b2d-f121-77aa9ee3bcf5"
+ },
+ "source": [
+ "df.shape"
+ ],
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(4, 3)"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "jp0lVlW4OJ6u",
+ "outputId": "6f77b463-bbef-43a1-ecf0-b59e315587c5"
+ },
+ "source": [
+ "df.sum().sum()"
+ ],
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "3917"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "RXPZZuAi3ccI"
+ },
+ "source": [
+ "## 2. ์ง์ญ์ ๋ํด์ one-sample chi-square test๋ฅผ ์คํ, ํด๋น ๊ฒฐ๊ณผ๋ฅผ `chi1`์ ์ ์ฅ ํ ์ค๋ช
ํด๋ณด์ธ์.\n",
+ "\n",
+ "์์) ๋ง์ฝ **9์๋ฌ ๋ฐ์ดํฐ**๋ฅผ ๊ธฐ์ค์ผ๋ก ํ๋ค๋ฉด\n",
+ "```python\n",
+ " [52+2+0, 590+665+142, 113+1061+42, 772+1+0]\n",
+ "```\n",
+ "์ ๋น๊ต ํ๊ฒ ๋ ๊ฒ์
๋๋ค.\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "oIMzAkXks-Sv",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "5724b5d3-a961-4feb-974d-01aa6ddb68a9"
+ },
+ "source": [
+ "# scipy ์ฌ์ฉ\n",
+ "from scipy.stats import chisquare\n",
+ "obs = df.sum(axis=1)\n",
+ "chi1 = chisquare(obs)[0]\n",
+ "print(f'chi1 = {chi1:}')"
+ ],
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "chi1 = 1564.4572376818994\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Kuk3GOQEPn-i",
+ "outputId": "f639bac7-3393-439b-ee3a-5e35882f744e"
+ },
+ "source": [
+ "obs"
+ ],
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "์๋\n",
+ "๋๊ตฌ 1624\n",
+ "๋์ 783\n",
+ "๋ถ์ฐ 1454\n",
+ "์์ธ 56\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 14
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tUfYf21fNFqm"
+ },
+ "source": [
+ "## Ho : ์ง์ญ์ ๋ฐ๋ผ ๋ฏธ๋ถ์ ์๋ ๋
๋ฆฝ์ ์ด๋ค.\n",
+ " \n",
+ "\n",
+ "* ์ง์ญ์ ์๊ด์์ด ๊ณตํํ๊ฒ ๋์ฌ ๊ฒ์ด๋ค.\n",
+ "* p-value = 0 < 0.05, ๊ธฐ๊ฐ\n",
+ "\n",
+ "\n",
+ "## H1 : ์ง์ญ์ ๋ฐ๋ผ ๋ฏธ๋ถ์ ์๋ ๋
๋ฆฝ์ ์ด์ง ์๋ค.\n",
+ "\n",
+ "* ์ง์ญ์ ์๊ด์์ด ๊ณตํํ๊ฒ ๋์ค์ง ์์ ๊ฒ์ด๋ค.\n",
+ "* p-value = 0 < 0.05, ์ฑํ\n",
+ "\n",
+ "\n",
+ "## ๊ฒฐ๋ก : ์ง์ญ๋ณ ๋ฏธ๋ถ์ ์๋ ์ง์ญ๊ณผ ์ฐ๊ด์ฑ์ด ์์๊ฒ์ด๋ค.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "eMDtugVNRu0q"
+ },
+ "source": [
+ "## 3. ์ง์ญ๊ณผ ๊ท๋ชจ์ ๋ํด์ two-sample chi-square test๋ฅผ ์คํ, ํด๋น ๊ฒฐ๊ณผ๋ฅผ `chi2`์ ์ ์ฅ ํ ์ค๋ช
ํด๋ณด์ธ์.\n",
+ "\n",
+ "์์) **9์๋ฌ ๋ฐ์ดํฐ**๋ฅผ ๊ธฐ์ค์ผ๋ก ํ๋ค๋ฉด\n",
+ "\n",
+ "| |-60 | 60-85 | 85- |\n",
+ "|:-:|:-:|:-:|:-:|\n",
+ "|์์ธ|52|2|0|\n",
+ "|๋์ |772|1|0|\n",
+ "|๋๊ตฌ|113|1061|42|\n",
+ "|๋ถ์ฐ|590|665|142|\n",
+ "\n",
+ "์ ๋ํด์ ๊ฒ์ ํด์ผ ํ ๊ฒ๋๋ค.\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Ckcr4A4FM7cs",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "1239ea12-463d-4a8b-b663-cf423b9f9346"
+ },
+ "source": [
+ "from scipy.stats import chi2_contingency\n",
+ "chi2 = chi2_contingency(df) #df -> ์ด๋ฏธ crosstab์ด ๋์ด์๋ค.\n",
+ "chi2\n",
+ "print(f' chi2 = {chi2[0]}\\n p_value = {chi2[1]}\\n ์์ ๋ = {chi2[2]}\\n array = \\n{chi2[3]}')"
+ ],
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ " chi2 = 2064.5767314171994\n",
+ " p_value = 0.0\n",
+ " ์์ ๋ = 6\n",
+ " array = \n",
+ "[[645.12228746 901.76155221 77.11616033]\n",
+ " [311.04110288 434.77789124 37.18100587]\n",
+ " [577.59101353 807.36533061 69.04365586]\n",
+ " [ 22.24559612 31.09522594 2.65917794]]\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "aI02BaXljhJy"
+ },
+ "source": [
+ "## H0 : ๋ฏธ๋ถ์ ํํฉ์ ๋ํด ์ง์ญ๊ณผ ๊ท๋ชจ๋ ์ฐ๊ด์ฑ์ด ์๋ค\n",
+ "## H1 : ๋ฏธ๋ถ์ ํํฉ์ ๋ํด ์ง์ญ๊ณผ ๊ท๋ชจ๋ ์ฐ๊ด์ฑ์ด ์๋ค\n",
+ "\n",
+ " * ๊ฒ์ ๊ฒฐ๊ณผ, p-value๋ 0.0์ผ๋ก ์ ์์์ค 95%ํ์์ ์ ์ํ๋ฅ 0.05๋ณด๋ค ์์\n",
+ " ๊ฐ์ด๋ค. ๋ฐ๋ผ์ ๊ท๋ฌด๊ฐ์ค์ ๊ธฐ๊ฐํ๋ค. \n",
+ " ์ฆ, ๋ฏธ๋ถ์ ํํฉ์ ์์ด ์ง์ญ๊ณผ ๊ท๋ชจ๋ ์ฐ๊ด์ฑ์ด ์๋ค๊ณ ๋ณผ ์ ์๋ค.`"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Tf7XPpBP6YqX"
+ },
+ "source": [
+ "## 4. 2๋ฒ์ ๋ํด์ NumPy ๋ฅผ ์ฌ์ฉํ์ฌ (Scipy๋ฅผ ์ฌ์ฉํ์ง ์๊ณ ) $\\chi^2$ test ์ํ ํ 2๋ฒ์ ๊ฒฐ๊ณผ์ ๋น๊ตํด๋ณด์ธ์. \n",
+ "\n",
+ "- `obs`, `exp`, `chi`๋ผ๋ ๋ณ์๋ฅผ ์ฌ์ฉํด์ผํฉ๋๋ค."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "OjFt0b1-wrFL",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "bf5faf2f-79ad-4eca-dec0-cca2cfd7c03c"
+ },
+ "source": [
+ "# numpy์ฌ์ฉ\n",
+ "obs = df.sum(axis = 1)\n",
+ "exp = np.sum(obs) / len(obs)\n",
+ "chi_squared = ((obs - exp)**2) / exp\n",
+ "chi = chi_squared.sum()\n",
+ "p_value = 1 - stats.chi.cdf(chi, df = (4-1))\n",
+ "print(f'chi = {chi:}' ,f'p_value = {p_value:}')"
+ ],
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "chi = 1564.4572376818994 p_value = 0.0\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "d3rO4-ROP5sS",
+ "outputId": "b104b951-b6cf-48f6-a19a-fb2206269b82"
+ },
+ "source": [
+ "exp"
+ ],
+ "execution_count": 18,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "979.25"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 18
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "a2BrKHq8m8AS"
+ },
+ "source": [
+ "## ๐ฅ ๋์ ๊ณผ์ \n",
+ "\n",
+ "์๋ ์ธ๊ฐ์ง๋ฅผ ๋คํ๋ฉด ๋ฉ๋๋ค.\n",
+ "\n",
+ "### 1. Function\n",
+ "\n",
+ "4๋ฒ์์ ์ฌ์ฉํ one sample chisquare test๋ฅผ ํจ์์ ํํ๋ก ๋ณ๊ฒฝํ์ธ์."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "AntnuAkFm-JQ",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "6525278f-5b07-4f47-c553-b77776eccee4"
+ },
+ "source": [
+ "v1 = [18,22,20,15,23,22]\n",
+ "v2 = [5,23,26,19,24,23]\n",
+ "\n",
+ "def myChisq(value):\n",
+ " import numpy as np\n",
+ " from scipy import stats\n",
+ " obs = value # v1, v2\n",
+ " exp = np.sum(obs) / len(obs) # ๊ธฐ๋๊ฐ\n",
+ " chi_squared = ((obs-exp)**2) / exp # x์ ๊ณฑ\n",
+ " chi = chi_squared.sum() # chisqure\n",
+ " p_value = 1 - stats.chi2.cdf(chi, df = len(value)-1) # p_value\n",
+ " return print(f'value = {value}, chisquare = {chi} , p_value = {p_value}') \n",
+ "\n",
+ "myChisq(v1) \n",
+ "myChisq(v2)\n",
+ "\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "value = [18, 22, 20, 15, 23, 22], chisquare = 2.3000000000000003 , p_value = 0.8062668698851285\n",
+ "value = [5, 23, 26, 19, 24, 23], chisquare = 14.8 , p_value = 0.011251979028327308\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4ohsJhQUmEuS"
+ },
+ "source": [
+ "### 2. ANOVA\n",
+ "\n",
+ "์๋ ๋งํฌ๋ฅผ ์ฐธ์กฐํ์ฌ ANOVA ์ ๋ํ ๊ธ์ ์ฝ๊ณ \n",
+ "\n",
+ "\n",
+ "\n",
+ "๋ค์ `4๊ฐ ๊ทธ๋ฃน์ ๋ํด์ ํ๊ท ์ ์ฐจ์ด๊ฐ ์๋์ง`์ ๋ํ ๊ฐ์ค ๊ฒ์ ์ ์ํํ์ธ์.\n",
+ "\n",
+ "A : `38 33 35 92 76 97 88 41 11 9`\n",
+ "\n",
+ "B : `18 52 62 48 30 40 87 12 97 82`\n",
+ "\n",
+ "C : `28 90 5 49 66 73 96 80 4 17`\n",
+ "\n",
+ "D : ` 8 99 4 12 7 64 18 10 9 20`\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DMebi5_4UaSn"
+ },
+ "source": [
+ "## H0 : 4๊ฐ์ ๊ทธ๋ฃน์ ๋ํด์ ํ๊ท ์ ์ฐจ์๊ฐ ๋์ง ์๋๋ค\n",
+ "## H1 : 4๊ฐ์ ๊ทธ๋ฃน์ ๋ํด์ ํ๊ท ์ ์ฐจ์ด๊ฐ ๋๋ค."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "AgkfC51fqLEZ",
+ "outputId": "8408a80d-2c9e-48ba-c62c-91fa9032247d"
+ },
+ "source": [
+ "A = [38,33,35,92,76,97,88,41,11,9]\n",
+ "B = [18,52,62,48,30,40,87,12,97,82]\n",
+ "C = [28,90,5,49,66,73,96,80,4,17]\n",
+ "D = [8,99,4,12,7,64,18,10,9,20]\n",
+ "\n",
+ "F_statistic, pVal = stats.f_oneway(A,B,C,D)\n",
+ "print(f'๋ฐ์ดํฐ์ ์ผ์๋ถ์ฐ๋ถ์ ๊ฒฐ๊ณผ : F={F_statistic}, p={pVal}')"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "๋ฐ์ดํฐ์ ์ผ์๋ถ์ฐ๋ถ์ ๊ฒฐ๊ณผ : F=1.7249594239128412, p=0.17920877113948797\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "k2vuHNLu1niX"
+ },
+ "source": [
+ "## ๊ฒฐ๊ณผ\n",
+ "* P-value>0.05 ์ด๋ฏ๋ก ๊ท๋ฌด๊ฐ์ค ๊ธฐ๊ฐ ๋๋ฆฝ๊ฐ์ค ์ฑํ"
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/n211-simple-regression/n211a-simple-regression.ipynb b/n211-simple-regression/n211a-simple-regression.ipynb
new file mode 100644
index 0000000..3c0b046
--- /dev/null
+++ b/n211-simple-regression/n211a-simple-regression.ipynb
@@ -0,0 +1,201 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.7"
+ },
+ "colab": {
+ "name": "n211a-simple-regression.ipynb",
+ "provenance": []
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Q6G77lsPMjS3"
+ },
+ "source": [
+ "
\n",
+ "\n",
+ "## *DATA SCIENCE / SECTION 2 / SPRINT 1 / NOTE 1*\n",
+ "\n",
+ "# ๐ Assignment\n",
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7IXUfiQ2UKj6"
+ },
+ "source": [
+ "# Linear Regression\n",
+ "\n",
+ "์ด๋ฒ ๋ชจ๋์์ ์ฌ์ฉํ ๋ฐ์ดํฐ์ ์ ์ฌํ ๋ฐ์ดํฐ๋ฅผ ์ฌ์ฉํด ๋ณต์ตํด ๋ณด๊ฒ ์ต๋๋ค. ์ด ๋ฐ์ดํฐ์
์ ๋ฏธ๊ตญ ์์ ํ King County ์ง์ญ์์ 2014๋
5์๋ถํฐ ~ 2015๋
5์ ๊น์ง ์ฃผํ ํ๋งค ๊ฐ๊ฒฉ ๋ฐ์ดํฐ์
๋๋ค.\n",
+ "\n",
+ " - [House Sales in King County, USA](https://www.kaggle.com/harlfoxem/housesalesprediction?select=kc_house_data.csv)\n",
+ "\n",
+ "\n",
+ "์ด ๋ฐ์ดํฐ๋ฅผ ์ฌ์ฉํด ๋ค์ ๊ณผ์ ๋ฅผ ๋งํฌ๋ค์ด๊ณผ ์ฝ๋๋ฅผ ์ฌ์ฉํด ์งํํด ์ฃผ์ธ์."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "frBW3-iQMjS6"
+ },
+ "source": [
+ "# ๋ฐ์ดํฐ๋ ์บ๊ธ์์ ๋ฐ์ผ์
๋ ๋๊ณ ์ด ๋งํฌ์์ ๋ถ๋ฌ์๋ ๋ฉ๋๋ค.\n",
+ "import pandas as pd\n",
+ "df = pd.read_csv('https://ds-lecture-data.s3.ap-northeast-2.amazonaws.com/kc_house_data/kc_house_data.csv')\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "k4uUYX0WMjS7"
+ },
+ "source": [
+ "### 1) ํน์ฑ๋ค๊ณผ ํ๊ฒ(Price)๊ณผ์ ์๊ด๊ณ์๋ฅผ ๊ณ์ฐํ๊ณ ๊ฐ์ฅ ์๊ด๊ด๊ณ๊ฐ ๋์ ํน์ฑ์ ์ฐพ์ ํ๊ฒ๊ณผ์ ๊ด๊ณ๋ฅผ ์๊ฐํ ํ์ธ์.\n",
+ "- **ํ๊ฒ๊ณผ ์๊ด๊ด๊ณ๊ฐ ๊ฐ์ฅ ๋์ ํน์ฑ์ ์ด๋ฆ์ ๊ณผ์ ์ ์ถํผ์ ์ ์ถํ์ธ์.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "lvvTEiWgMjS8"
+ },
+ "source": [
+ "### ์ด๊ณณ์์ ๊ณผ์ ๋ฅผ ์งํํด ์ฃผ์ธ์ ###"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "QSlnVioSMjS8"
+ },
+ "source": [
+ "### 2) grade ์ price์ scatter plot์ ๊ทธ๋ฆฌ๊ณ ๊ธฐ์ค๋ชจ๋ธ์ ์๊ฐํ ํด ๋ณด์ธ์.\n",
+ "- **๊ธฐ์ค๋ชจ๋ธ์ ๊ฐ์ ๊ณผ์ ์ ์ถํผ์ ์ ์ถํ์ธ์.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "kSO49zGXMjS9"
+ },
+ "source": [
+ "### ์ด๊ณณ์์ ๊ณผ์ ๋ฅผ ์งํํด ์ฃผ์ธ์ ###"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "eGF8rIzZMjS9"
+ },
+ "source": [
+ "### 3) Scikit-Learn ๋ผ์ด๋ธ๋ฌ๋ฆฌ๋ฅผ ์ฌ์ฉํด ํน์ฑ grade์ ๋ํ ์ ํํ๊ท๋ชจ๋ธ์ ๋ง๋ค์ด ๋ณด์ธ์.\n",
+ "- **grade ๊ฐ์ด 6์ธ ๊ฒฝ์ฐ์ ๋ชจ๋ธ์ด ์์ธกํ๋ ์ฃผํ ๊ฐ๊ฒฉ์ ๊ณผ์ ์ ์ถํผ์ ์ ์ถํ์ธ์.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "n0fFNcV6MjS-"
+ },
+ "source": [
+ "### ์ด๊ณณ์์ ๊ณผ์ ๋ฅผ ์งํํด ์ฃผ์ธ์ ###"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xrabCjNgMjS_"
+ },
+ "source": [
+ "### 4) ๋ง๋ ๋ชจ๋ธ์ ์ฌ์ฉํด ์๋ก์ด ๋ฐ์ดํฐ์ ๋ํด ์์ธก์ ํด ๋ณด๊ณ coefficient๋ฅผ ์ฌ์ฉํด ์ค๋ช
ํด ๋ณด์ธ์.\n",
+ "- **grade ํน์ฑ์ ๋ํ ํ๊ท๊ณ์๋ฅผ ๊ณผ์ ์ ์ถํผ์ ์ ์ถํ์ธ์.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "1ENl5mBeMjTA"
+ },
+ "source": [
+ "### ์ด๊ณณ์์ ๊ณผ์ ๋ฅผ ์งํํด ์ฃผ์ธ์ ###"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "jmY0X2xMMjTA"
+ },
+ "source": [
+ "### 5) sqft_living ํน์ฑ์ ์ฌ์ฉํ์ฌ ๋ชจ๋ธ์ ๋ง๋ค์ด ๋ณด๊ณ ๋ ๋ชจ๋ธ์ ๋น๊ตํด ๋ณด์ธ์.\n",
+ "- **sqft_living์ ๋ํ ํ๊ท๊ณ์๋ฅผ ๊ณผ์ ์ ์ถํผ์ ์ ์ถํ์ธ์.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Y1t7D490MjTA"
+ },
+ "source": [
+ "### ์ด๊ณณ์์ ๊ณผ์ ๋ฅผ ์งํํด ์ฃผ์ธ์ ###"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WTf8r5SNMjTB"
+ },
+ "source": [
+ "## ๐ฅ๋์ ๊ณผ์ \n",
+ "\n",
+ "### 6) ๋จ์์ ํํ๊ท๋ชจ๋ธ์ ๋ง๋ค๊ธฐ ์ํ ์ข์ ํน์ฑ์ ์ ํํ๊ธฐ ์ํด ๊ณ ๋ คํด์ผ ํ๋ ์ฌํญ๋ค์ ๋ํด ์กฐ์ฌํ๊ณ ์์ ํ์ธ์.\n",
+ "\n",
+ "### 7) OLS์ ๋ํด์ ๊ฒ์ํด ๋ณด๊ณ ๋ณธ์ธ์ ์ดํด๋ฅผ ๋
น์ฌ๋ด์ด 10๋ฌธ์ฅ ๋ด๋ก ์์ฝํด ๋ณด์ธ์."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "3Wx69QtfMjTB"
+ },
+ "source": [
+ "### ์ด๊ณณ์์ ๊ณผ์ ๋ฅผ ์งํํด ์ฃผ์ธ์ ###"
+ ],
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/n231-choose-your-ml-problems/n231a-choose-your-ml-problems.ipynb b/n231-choose-your-ml-problems/n231a-choose-your-ml-problems.ipynb
new file mode 100644
index 0000000..7c5b96a
--- /dev/null
+++ b/n231-choose-your-ml-problems/n231a-choose-your-ml-problems.ipynb
@@ -0,0 +1,160 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.7"
+ },
+ "colab": {
+ "name": "n231a-choose-your-ml-problems.ipynb",
+ "provenance": []
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "nCc3XZEyG3XV"
+ },
+ "source": [
+ "
\n",
+ "\n",
+ "## *DATA SCIENCE / SECTION 2 / SPRINT 3 / NOTE 1*\n",
+ "\n",
+ "# ๐ Assignment\n",
+ "---\n",
+ "# Choose your ML problems\n",
+ "\n",
+ "์ด๋ฒ ์คํ๋ฆฐํธ์์๋ ํฌํธํด๋ฆฌ์ค ํ๋ก์ ํธ๋ฅผ ์ํด ์๊ฐ์ ๊ฐ์๊ฐ ์ ํ ๋ฐ์ดํฐ์
์ ์ฌ์ฉํ์ฌ ๊ณผ์ ๋ฅผ ์ํํฉ๋๋ค. ํ๋ก์ ํธ๋ฅผ ์ํ ๋ฐ์ดํฐ๋ฅผ ์ต์ข
์ ํํ๊ธฐ ์ ์ ์ฌ๋ฌ ๊ฐ๋ฅํ ๋ฐ์ดํฐ์ธํธ๋ฅผ ์ดํด๋ณด๊ณ ๊ฐ๋จํ ๋ชจ๋ธ๊น์ง ํ์ตํด ๋ณด๋ ๊ฒ์ ์ถ์ฒ๋๋ฆฝ๋๋ค."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "25Q5khpG3-cc"
+ },
+ "source": [
+ "### 1) ํ๊ฒ์ผ๋ก ์ฌ์ฉํ ํน์ฑ์ ์ ํํฉ๋๋ค.\n",
+ "- **(๊ฐ๊ด์) ๋ค์ ์ค ์ค๋ช
์ด ์ฌ๋ฐ๋ฅธ ํญ๋ชฉ์ ์ ํํ๊ณ ๊ณผ์ ์ ์ถํผ์ ์ ์ถํ์ธ์.**\n",
+ " - ํน์ฑ 3๊ฐ๋ฅผ ์ฌ์ฉํด ๊ฐ์ฅ ์ฑ๋ฅ์ด ์ข์ ๋ชจ๋ธ A๋ฅผ ๋ง๋ค๊ณ ๋ ํน์ฑ 5๊ฐ๋ฅผ ์ฌ์ฉํด ์ต์ ์ ๋ชจ๋ธ B๋ฅผ ํ์ตํ์์ต๋๋ค. ์ด๋ ๋ชจ๋ธ A์์ ์ ํ๋ ํน์ฑ์ A1, A2, A3 ์
๋๋ค.\n",
+ " 1. ์ต์ ์ ๋ชจ๋ธ B์๋ ํน์ฑ A1, A2, A3๊ฐ ํญ์ ์ ํ๋ฉ๋๋ค.\n",
+ " 2. ์ต์ ์ ๋ชจ๋ธ B ํ์ต์ ํน์ฑ A1, A2, A3๋ ์ ํ ์ฌ์ฉ๋์ง ์์ต๋๋ค.\n",
+ " 3. ์ต์ ์ ๋ชจ๋ธ B์ ํน์ฑ A1, A2, A3๊ฐ ์ ํ๋ ์ง ์ ์ ์์ต๋๋ค."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "RobhoziJ3-cd"
+ },
+ "source": [
+ "### 3"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "16tT-UAI3-ce"
+ },
+ "source": [
+ "### 2) ํ๊ณ ์ ํ๋ ๋ฌธ์ ๊ฐ ๋ถ๋ฅ/ํ๊ท ๋ฌธ์ ์ธ์ง ์ ํฉ๋๋ค.\n",
+ "- **(๊ฐ๊ด์) ๋ค์ ์ค๋ช
์ค ๊ฐ์ฅ ํ๋ฆฐ ํญ๋ชฉ์ ์ ํํ๊ณ ๊ณผ์ ์ ์ถํผ์ ์ ์ถํ์ธ์.**\n",
+ " 1. ํน์ ํ ์ข
๋ชฉ์ ๋ค์๋ ์ฃผ์ ์ข
๊ฐ๋ฅผ ์์ธกํ๋ ๋ฌธ์ ๋ Supervised Learning ๋ฌธ์ ์ด๋ค.\n",
+ " 2. ๋ค์๋ KOSPI ์ง์๊ฐ ์ฆ๊ฐํ ์ง ๊ฐ์ํ ์ง ์์ธกํ๋ ๋ฌธ์ ๋ Classification ๋ฌธ์ ์ด๋ค.\n",
+ " 3. ์ถ๊ตฌ ๊ฒฝ๊ธฐ ๋์ค์ ์นํจ๋ฅผ ์์ธกํ๋ ๋ชจ๋ธ์ ๋ง๋ค๊ธฐ ์ํด์๋ ์ค์๊ฐ ํต๊ณ ์ ๋ณด์ ๊ฒฝ๊ธฐ๋ง๋ค ์นํจ ๋ฐ์ดํฐ๊ฐ ํ์ํ๋ค.\n",
+ " 4. ์ด๋ฒ ์์ฆ ํ ํธ๋ ์ ์๋ค์ ๊ฐ์ธ ํต๊ณ ๋ฐ์ดํฐ(์, ๋์, ...)๋ฅผ ์ฌ์ฉํด์ ๋ค์ ํ ํธ๋ ๊ฒฝ๊ธฐ์ ์นํจ๋ฅผ ์์ธกํ ์ ์๋ค.\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "syw5kiS43-ce"
+ },
+ "source": [
+ "### 4"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "uhMD-HUR3-ce"
+ },
+ "source": [
+ "### 3) ํ๊ฒ์ ๋ถํฌ๋ฅผ ๋ถ์ํ๊ณ ์ฌ์ฉํ ํ๊ฐ์งํ๋ฅผ ์ ํํฉ๋๋ค. ํ๊ฐ์งํ๋ฅผ ์ ํํ ์ด์ ๋ฅผ ์ค๋ช
ํ์ธ์.\n",
+ "- ๋ถ๋ฅ: ํด๋์ค๊ฐ ๋ช ๊ฐ์ธ์ง? ํด๋์ค ๋น์จ์ด ์ด๋ค์ง(balance or imbalance)\n",
+ "- ํ๊ท: ํ๊ฒ ๋ถํฌ๊ฐ right-skewed ์ธ์ง? log-transform์ ์ฌ์ฉํ ๊ฒ์ธ์ง?\n",
+ "- ๋ถ๋ฅ: ๋ค์ ํด๋์ค(majority class) ๋น์จ์ด 50%~70% ์ธ ๊ฒฝ์ฐ ์ ํ๋(accuracy)๋ง ์ฌ์ฉํด๋ ๋ฌด๋ฐฉํฉ๋๋ค. ํ์ง๋ง ๋ฒ์๋ฅผ ๋์ด์ค ๊ฒฝ์ฐ ์ ํ๋๋ง์ ์ฌ์ฉํด์๋ ๋ชจ๋ธ์ ์๋ชป ์ดํดํ ์ ์์ต๋๋ค. ์ด๋ค ํ๊ฐ์งํ๋ฅผ ์ฌ์ฉํด์ผ ํ ๊น์?\n",
+ "- ํ๊ท: MAE, RMSE, R^2 ๋ฑ์ ์ฌ์ฉํ ์ ์์ต๋๋ค.\n",
+ "- **(๊ฐ๊ด์) ๊ฐ์ ๋ฐ์ดํฐ๋ก ์คํธ์ ์์ธกํ๋ ๋ชจ๋ธ A์ B๋ฅผ ๋ง๋ค์์ต๋๋ค. ๋ค์ ์ค๋ช
์ค ๊ฐ์ฅ ์ฌ๋ฐ๋ฅธ ํญ๋ชฉ์ ์ ํํ๊ณ ๊ณผ์ ์ ์ถํผ์ ์ ์ถํ์ธ์.**\n",
+ " 1. A์ ์ฌํ์จ์ 80%์ด๊ณ B์ ์ฌํ์จ์ 60%์ผ ๋ A๊ฐ ๋ ์ข์ ๋ชจ๋ธ์ด๋ค.\n",
+ " 2. A์ ์ ๋ฐ๋๋ 80%์ด๊ณ B์ ์ ๋ฐ๋๋ 60%์ผ ๋ A๊ฐ ๋ ์ข์ ๋ชจ๋ธ์ด๋ค.\n",
+ " 3. A๋ B๋ณด๋ค ์ ๋ฐ๋๋ ๋์ง๋ง ์ฌํ์จ์ ๋ฎ๋ค. A๊ฐ ๋ ์ข์ ๋ชจ๋ธ์ด๋ค.\n",
+ " 4. A๋ B๋ณด๋ค ์ฌํ์จ์ ๋์ง๋ง, ์ ๋ฐ๋๋ ๋ฎ๋ค. A๊ฐ ๋ ์ข์ ๋ชจ๋ธ์ด๋ค.\n",
+ " 5. A๋ B๋ณด๋ค ์ ๋ฐ๋, ์ฌํ์จ ๋ชจ๋ ๋๋ค. A๊ฐ ๋ ์ข์ ๋ชจ๋ธ์ด๋ค."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "86r9VSpr3-cf"
+ },
+ "source": [
+ "### 5"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "nQyvvRaI3-cf"
+ },
+ "source": [
+ "## ๐ฅ ๋์ ๊ณผ์ "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "m1VDTtbB3-cg"
+ },
+ "source": [
+ "### 4) ๋ฐ์ดํฐ๋ฅผ ํ๋ จ/๊ฒ์ฆ/ํ
์คํธ ๋ฐ์ดํฐ๋ก ๋๋๊ณ ์ ์ฒ๋ฆฌ๋ฅผ ์ํํฉ๋๋ค. ๊ทธ๋ฆฌ๊ณ ๊ฐ๋จํ ๋ชจ๋ธ์ ์ฌ์ฉํด์ ํ์ต์ ์ํํฉ๋๋ค.\n",
+ "- ์ด์์น(outliers)๊ฐ ์๋ค๋ฉด ์ด๋ป๊ฒ ์ฒ๋ฆฌํ ๊ฒ์ธ์ง ์ค๋ช
ํฉ๋๋ค.\n",
+ "- ๋ฌด์์๋ก or ์๊ฐ์ ๋ฐ๋ผ ๋ฐ์ดํฐ๋ฅผ ๋๋ ๊ฒ์ธ์ง ์ค๋ช
ํฉ๋๋ค.\n",
+ "- ๋ชจ๋ธ ํ์ต ๊ฒฐ๊ณผ๋ฅผ ๋ฆฌํฌํ
ํฉ๋๋ค.\n",
+ "- ์ ๋ณด ๋์(leakage)๊ฐ ์๋์ง ํ์ธํด ๋ด
๋๋ค."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "G-xH-VL83-ch"
+ },
+ "source": [
+ "### ์ด๊ณณ์์ ๊ณผ์ ๋ฅผ ์งํํด ์ฃผ์ธ์ ### "
+ ],
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file