From f612feea6c0224dfe522abfe697fb73f16a27c13 Mon Sep 17 00:00:00 2001 From: sahasthhr Date: Fri, 4 Apr 2025 16:00:20 +0530 Subject: [PATCH 1/3] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9bda83f..708bc41 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@

1. Add your details:

From c2e9fda083be7e4d81f69f257fe1c41d4b504d94 Mon Sep 17 00:00:00 2001 From: sahasthhr Date: Tue, 14 Oct 2025 01:09:38 +0530 Subject: [PATCH 2/3] Created using Colab --- ...e_generation_and_disposal_practices_.ipynb | 1963 +++++++++++++++++ 1 file changed, 1963 insertions(+) create mode 100644 160124737026_CBIT_IT_1_EDAV_2025_CEP_39_Analyze_urban_municipal_waste_generation_and_disposal_practices_.ipynb diff --git a/160124737026_CBIT_IT_1_EDAV_2025_CEP_39_Analyze_urban_municipal_waste_generation_and_disposal_practices_.ipynb b/160124737026_CBIT_IT_1_EDAV_2025_CEP_39_Analyze_urban_municipal_waste_generation_and_disposal_practices_.ipynb new file mode 100644 index 0000000..3635b9b --- /dev/null +++ b/160124737026_CBIT_IT_1_EDAV_2025_CEP_39_Analyze_urban_municipal_waste_generation_and_disposal_practices_.ipynb @@ -0,0 +1,1963 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyPLegGaufNi7Onh1cxUTEHs", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "#**Analyze urban municipal waste generation and disposal practices.**\n", + "#**CBIT/IT-1/EDAV/2025/CEP-39**\n", + "#**Roll no. 160124737026**\n", + "#**Name of the Student : SAHASTHRA JERIPOTULA**\n", + "#**Dept of IT , Chaitanya Bharathi Institute of Technology , Hyderabad**" + ], + "metadata": { + "id": "jkIh6wgq4I5S" + } + }, + { + "cell_type": "markdown", + "source": [ + "**The goal of this project is to analyze and visualize school infrastructure quality across various states and school types using Python (NumPy, Pandas, Matplotlib).**" + ], + "metadata": { + "id": "zEhzvDen52Pp" + } + }, + { + "cell_type": "markdown", + "source": [ + "#**Python Version: 3.10+**\n", + "\n", + "#**Libraries used :**\n", + "\n", + "\n", + "| Library | Purpose |\n", + "| ---------------- | ---------------------------------------------------------------------- |\n", + "| **pandas** | For data loading, cleaning, and analysis (DataFrame operations) |\n", + "| **numpy** | For numerical computations and handling missing values |\n", + "| **matplotlib** | For data visualization (bar charts and plots) |\n", + "| **google.colab** | For file upload and download functionality in Google Colab environment |\n" + ], + "metadata": { + "id": "SslhcBFB6Tdm" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 342 + }, + "id": "HibBUVg61JTj", + "outputId": "4d11209a-a58b-4dfc-e191-844bbdd1bde7" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " \n", + " Upload widget is only available when the cell has been executed in the\n", + " current browser session. Please rerun this cell to enable.\n", + " \n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saving urban_waste_data.csv to urban_waste_data.csv\n", + "Overall average waste generation per capita: 0.03945233581982956\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " year waste_per_capita\n", + "0 2018 0.034637\n", + "1 2019 0.033006\n", + "2 2020 0.037747\n", + "3 2021 0.046157\n", + "4 2022 0.041639\n", + "5 2023 0.043419\n", + "6 2024 0.039561" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearwaste_per_capita
020180.034637
120190.033006
220200.037747
320210.046157
420220.041639
520230.043419
620240.039561
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "avg_per_capita_by_year", + "summary": "{\n \"name\": \"avg_per_capita_by_year\",\n \"rows\": 7,\n \"fields\": [\n {\n \"column\": \"year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 2018,\n \"max\": 2024,\n \"num_unique_values\": 7,\n \"samples\": [\n 2018,\n 2019,\n 2023\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"waste_per_capita\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.004710546281067517,\n \"min\": 0.03300640402627123,\n \"max\": 0.04615701721466702,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.034636516669389474,\n 0.03300640402627123,\n 0.043419489502697396\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {} + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Load dataset\n", + "from google.colab import files\n", + "uploaded = files.upload()\n", + "df = pd.read_csv(next(iter(uploaded)))\n", + "\n", + "\n", + "# Q1: Calculate per capita waste generation\n", + "df['waste_per_capita'] = df['total_waste_generated'] / df['population']\n", + "\n", + "# Overall average waste per capita\n", + "overall_avg_per_capita = df['waste_per_capita'].mean()\n", + "\n", + "# Average per year\n", + "avg_per_capita_by_year = df.groupby('year', as_index=False)['waste_per_capita'].mean()\n", + "\n", + "print(\"Overall average waste generation per capita:\", overall_avg_per_capita)\n", + "display(avg_per_capita_by_year)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "#**Q1: Calculate average waste generation per capita (BL-3)**" + ], + "metadata": { + "id": "SoVzjNH2-wvC" + } + }, + { + "cell_type": "markdown", + "source": [ + "The code first identifies the columns containing information about total waste generated and population for each urban body.\n", + "\n", + "Then, it calculates the waste generated per person by dividing the total waste generated by the population for every record in the dataset.\n", + "\n", + "After that, it computes the mean (average) of these per-person values across all rows (or optionally by year).\n", + "\n", + "This gives one combined metric called “Average Waste Generation Per Capita”, which represents the average amount of waste produced by each individual in the dataset or city." + ], + "metadata": { + "id": "_fJ8qoLU_FGu" + } + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "# Load dataset\n", + "CSV_PATH = '/content/urban_waste_data.csv'\n", + "df = pd.read_csv(CSV_PATH)\n", + "\n", + "# Q2: Filter by waste type\n", + "TARGET_WASTE_TYPE = 'Plastic' # Change as needed: 'Biodegradable', 'E-Waste', etc.\n", + "filtered_data = df[df['waste_type'] == TARGET_WASTE_TYPE]\n", + "\n", + "print(f\"Rows with waste_type == {TARGET_WASTE_TYPE!r}: {len(filtered_data)}\")\n", + "display(filtered_data.head())\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 223 + }, + "id": "TOSvnsh72BOQ", + "outputId": "e19dac7d-28b5-4b25-dfa4-31fa3f512b01" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Rows with waste_type == 'Plastic': 70\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " urban_body year population waste_type total_waste_generated \\\n", + "1 Hyderabad 2018 2734489 Plastic 218458 \n", + "6 Hyderabad 2019 6613790 Plastic 127537 \n", + "11 Hyderabad 2020 2325665 Plastic 129243 \n", + "16 Hyderabad 2021 4269315 Plastic 168678 \n", + "21 Hyderabad 2022 7056168 Plastic 125151 \n", + "\n", + " waste_disposed disposal_method collection_efficiency \n", + "1 138718.912056 Recycling 78.37 \n", + "6 78605.259541 Incineration 98.95 \n", + "11 88448.599007 Landfill 87.33 \n", + "16 155631.765775 NaN 63.54 \n", + "21 92387.535908 Recycling 71.74 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
urban_bodyyearpopulationwaste_typetotal_waste_generatedwaste_disposeddisposal_methodcollection_efficiency
1Hyderabad20182734489Plastic218458138718.912056Recycling78.37
6Hyderabad20196613790Plastic12753778605.259541Incineration98.95
11Hyderabad20202325665Plastic12924388448.599007Landfill87.33
16Hyderabad20214269315Plastic168678155631.765775NaN63.54
21Hyderabad20227056168Plastic12515192387.535908Recycling71.74
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"display(filtered_data\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"urban_body\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Hyderabad\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 2018,\n \"max\": 2022,\n \"num_unique_values\": 5,\n \"samples\": [\n 2019\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2170840,\n \"min\": 2325665,\n \"max\": 7056168,\n \"num_unique_values\": 5,\n \"samples\": [\n 6613790\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"waste_type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Plastic\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_waste_generated\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 40359,\n \"min\": 125151,\n \"max\": 218458,\n \"num_unique_values\": 5,\n \"samples\": [\n 127537\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"waste_disposed\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 34148.38758863866,\n \"min\": 78605.25954124621,\n \"max\": 155631.76577533025,\n \"num_unique_values\": 5,\n \"samples\": [\n 78605.25954124621\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"disposal_method\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Recycling\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"collection_efficiency\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13.735415173921757,\n \"min\": 63.54,\n \"max\": 98.95,\n \"num_unique_values\": 5,\n \"samples\": [\n 98.95\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#**Q2: Filter waste data by waste type (BL-3)**" + ], + "metadata": { + "id": "j_AMRDaV_qy3" + } + }, + { + "cell_type": "markdown", + "source": [ + "Explanation\n", + "\n", + "The code first identifies the column that contains the type of waste (for example, Plastic, Biodegradable, E-Waste, etc.).\n", + "\n", + "Then, it applies a filter condition to select only those rows where the waste type matches the chosen category (e.g., 'Plastic').\n", + "\n", + "This process isolates specific data related to one waste category, allowing focused analysis on how that particular type of waste is generated, managed, or disposed of across different urban bodies or years." + ], + "metadata": { + "id": "rdtnbovR_5U1" + } + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# Load dataset\n", + "CSV_PATH = '/content/urban_waste_data.csv'\n", + "df = pd.read_csv(CSV_PATH)\n", + "\n", + "# Q3: Impute missing values\n", + "df['waste_disposed'] = pd.to_numeric(df['waste_disposed'], errors='coerce')\n", + "\n", + "# Impute numeric values using median within groups\n", + "group_cols_num = ['urban_body', 'waste_type', 'year']\n", + "median_by_group = df.groupby(group_cols_num)['waste_disposed'].transform('median')\n", + "overall_median = df['waste_disposed'].median()\n", + "df['waste_disposed_imputed'] = df['waste_disposed'].fillna(median_by_group).fillna(overall_median)\n", + "\n", + "# Impute categorical values using mode\n", + "def group_mode(s):\n", + " m = s.mode(dropna=True)\n", + " return m.iloc[0] if not m.empty else np.nan\n", + "\n", + "group_cols_cat = ['urban_body', 'waste_type']\n", + "mode_by_group = df.groupby(group_cols_cat)['disposal_method'].transform(group_mode)\n", + "df['disposal_method_imputed'] = df['disposal_method'].fillna(mode_by_group).fillna('Unknown')\n", + "\n", + "# Print summary\n", + "print(\"Missing 'waste_disposed' before:\", df['waste_disposed'].isna().sum(),\n", + " \"| after:\", df['waste_disposed_imputed'].isna().sum())\n", + "print(\"Missing 'disposal_method' before:\", df['disposal_method'].isna().sum(),\n", + " \"| after:\", df['disposal_method_imputed'].isna().sum())\n", + "\n", + "display(df.head())\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "id": "c0FYR-8I2Haa", + "outputId": "13e72da2-7c3a-4296-9994-994350fe2d7e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Missing 'waste_disposed' before: 18 | after: 0\n", + "Missing 'disposal_method' before: 18 | after: 0\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + " urban_body year population waste_type total_waste_generated \\\n", + "0 Hyderabad 2018 6923388 Biodegradable 108694 \n", + "1 Hyderabad 2018 2734489 Plastic 218458 \n", + "2 Hyderabad 2018 7704212 E-Waste 180203 \n", + "3 Hyderabad 2018 7904852 Metal 5769 \n", + "4 Hyderabad 2018 3844769 Paper 10311 \n", + "\n", + " waste_disposed disposal_method collection_efficiency \\\n", + "0 93063.572329 Incineration 83.95 \n", + "1 138718.912056 Recycling 78.37 \n", + "2 117132.560216 Recycling 86.04 \n", + "3 4919.223821 Landfill 97.54 \n", + "4 9767.342686 Recycling 84.70 \n", + "\n", + " waste_disposed_imputed disposal_method_imputed \n", + "0 93063.572329 Incineration \n", + "1 138718.912056 Recycling \n", + "2 117132.560216 Recycling \n", + "3 4919.223821 Landfill \n", + "4 9767.342686 Recycling " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
urban_bodyyearpopulationwaste_typetotal_waste_generatedwaste_disposeddisposal_methodcollection_efficiencywaste_disposed_imputeddisposal_method_imputed
0Hyderabad20186923388Biodegradable10869493063.572329Incineration83.9593063.572329Incineration
1Hyderabad20182734489Plastic218458138718.912056Recycling78.37138718.912056Recycling
2Hyderabad20187704212E-Waste180203117132.560216Recycling86.04117132.560216Recycling
3Hyderabad20187904852Metal57694919.223821Landfill97.544919.223821Landfill
4Hyderabad20183844769Paper103119767.342686Recycling84.709767.342686Recycling
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"display(df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"urban_body\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Hyderabad\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2018,\n \"max\": 2018,\n \"num_unique_values\": 1,\n \"samples\": [\n 2018\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2373616,\n \"min\": 2734489,\n \"max\": 7904852,\n \"num_unique_values\": 5,\n \"samples\": [\n 2734489\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"waste_type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Plastic\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_waste_generated\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 96636,\n \"min\": 5769,\n \"max\": 218458,\n \"num_unique_values\": 5,\n \"samples\": [\n 218458\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"waste_disposed\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 61850.98048036466,\n \"min\": 4919.223821022559,\n \"max\": 138718.91205591924,\n \"num_unique_values\": 5,\n \"samples\": [\n 138718.91205591924\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"disposal_method\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Incineration\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"collection_efficiency\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.021584578996396,\n \"min\": 78.37,\n \"max\": 97.54,\n \"num_unique_values\": 5,\n \"samples\": [\n 78.37\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"waste_disposed_imputed\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 61850.98048036466,\n \"min\": 4919.223821022559,\n \"max\": 138718.91205591924,\n \"num_unique_values\": 5,\n \"samples\": [\n 138718.91205591924\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"disposal_method_imputed\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Incineration\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#**Q3: Impute missing waste disposal records (BL-4)**" + ], + "metadata": { + "id": "wFh32proAGVo" + } + }, + { + "cell_type": "markdown", + "source": [ + "Explanation\n", + "\n", + "The code first checks for missing values in the columns related to waste disposal — such as waste_disposed and disposal_method.\n", + "\n", + "Then, it fills (imputes) the missing numerical values in waste_disposed using the median of similar records grouped by factors like urban body, waste type, and year.\n", + "\n", + "For missing categorical values in disposal_method, it replaces them with the most common (mode) method used within the same group.\n", + "\n", + "This ensures that the dataset becomes complete and consistent, allowing accurate analysis without losing valuable records due to missing data." + ], + "metadata": { + "id": "5qCjAJbVAR1z" + } + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "# Load dataset\n", + "CSV_PATH = '/content/urban_waste_data.csv'\n", + "df = pd.read_csv(CSV_PATH)\n", + "\n", + "# Q4: Group and aggregate\n", + "agg = df.groupby('urban_body').agg(\n", + " total_generated=('total_waste_generated', 'sum'),\n", + " total_disposed=('waste_disposed', 'sum'),\n", + " mean_collection_efficiency=('collection_efficiency', 'mean'),\n", + " population_latest=('population', 'last')\n", + ").reset_index()\n", + "\n", + "# Calculate per-capita generation\n", + "agg['per_capita_generation'] = agg['total_generated'] / agg['population_latest']\n", + "\n", + "display(agg.sort_values('total_generated', ascending=False))\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "Ly5tEYaC2LNm", + "outputId": "ba96c6df-2270-4542-cc0f-0c203d336069" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " urban_body total_generated total_disposed mean_collection_efficiency \\\n", + "8 Mumbai 4761404 3.469207e+06 77.501143 \n", + "4 Hyderabad 4571273 3.332878e+06 80.600286 \n", + "3 Delhi 4511459 3.106791e+06 79.987429 \n", + "0 Ahmedabad 4457636 3.329175e+06 80.821714 \n", + "7 Lucknow 4369371 3.059458e+06 82.858000 \n", + "1 Bengaluru 4320643 3.303948e+06 78.850000 \n", + "5 Jaipur 4307288 3.311632e+06 80.970286 \n", + "2 Chennai 4158077 2.957547e+06 81.904286 \n", + "9 Pune 4049398 2.915668e+06 79.159714 \n", + "6 Kolkata 4046526 2.968780e+06 80.285714 \n", + "\n", + " population_latest per_capita_generation \n", + "8 2089228 2.279026 \n", + "4 3039448 1.503981 \n", + "3 4603022 0.980108 \n", + "0 7357658 0.605850 \n", + "7 8496198 0.514274 \n", + "1 6848564 0.630883 \n", + "5 3273939 1.315629 \n", + "2 8941136 0.465050 \n", + "9 5705597 0.709724 \n", + "6 3837920 1.054354 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
urban_bodytotal_generatedtotal_disposedmean_collection_efficiencypopulation_latestper_capita_generation
8Mumbai47614043.469207e+0677.50114320892282.279026
4Hyderabad45712733.332878e+0680.60028630394481.503981
3Delhi45114593.106791e+0679.98742946030220.980108
0Ahmedabad44576363.329175e+0680.82171473576580.605850
7Lucknow43693713.059458e+0682.85800084961980.514274
1Bengaluru43206433.303948e+0678.85000068485640.630883
5Jaipur43072883.311632e+0680.97028632739391.315629
2Chennai41580772.957547e+0681.90428689411360.465050
9Pune40493982.915668e+0679.15971457055970.709724
6Kolkata40465262.968780e+0680.28571438379201.054354
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"display(agg\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"urban_body\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Pune\",\n \"Hyderabad\",\n \"Bengaluru\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_generated\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230444,\n \"min\": 4046526,\n \"max\": 4761404,\n \"num_unique_values\": 10,\n \"samples\": [\n 4049398,\n 4571273,\n 4320643\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total_disposed\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 195963.2386130693,\n \"min\": 2915668.280577688,\n \"max\": 3469207.3513263282,\n \"num_unique_values\": 10,\n \"samples\": [\n 2915668.280577688,\n 3332877.9913053266,\n 3303947.7157039074\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"mean_collection_efficiency\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.537046856328317,\n \"min\": 77.50114285714285,\n \"max\": 82.858,\n \"num_unique_values\": 10,\n \"samples\": [\n 79.15971428571429,\n 80.60028571428572,\n 78.85\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"population_latest\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2410056,\n \"min\": 2089228,\n \"max\": 8941136,\n \"num_unique_values\": 10,\n \"samples\": [\n 5705597,\n 3039448,\n 6848564\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"per_capita_generation\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5667148654713653,\n \"min\": 0.4650501904903359,\n \"max\": 2.2790255539366693,\n \"num_unique_values\": 10,\n \"samples\": [\n 0.7097238027852301,\n 1.503981315028255,\n 0.6308830581126204\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#**Q4: Group waste data by urban bodies (BL-4)**" + ], + "metadata": { + "id": "SGCtKz0oAX_H" + } + }, + { + "cell_type": "markdown", + "source": [ + "Explanation\n", + "\n", + "The code groups the dataset by the urban body column, which represents different cities or municipalities.\n", + "\n", + "Then, it calculates aggregate values for each group — such as the total waste generated, total waste disposed, and average collection efficiency.\n", + "\n", + "It also computes per capita waste generation by dividing the total waste generated by the latest recorded population of each urban body.\n", + "\n", + "This grouping provides a summary view of waste management performance for each city, making it easier to compare efficiency and output across different urban areas." + ], + "metadata": { + "id": "S8QAFd5cAhFz" + } + }, + { + "cell_type": "markdown", + "source": [ + "#**Summary of Findings**\n", + "\n", + "Missing waste disposal data were successfully handled using median and mode imputation, ensuring data completeness.\n", + "\n", + "The average waste generation per capita provided a single, comparable indicator of waste output efficiency across urban bodies.\n", + "\n", + "Distinct differences were observed among cities in terms of total waste generated, disposal efficiency, and collection performance.\n", + "\n", + "Visual analysis revealed clear yearly trends in both waste generation and disposal levels.\n", + "\n", + "#**Challenges Encountered**\n", + "\n", + "Some records contained inconsistent or missing disposal values, requiring careful imputation to maintain accuracy.\n", + "\n", + "Differences in population sizes among urban bodies affected per-capita comparisons.\n", + "\n", + "Managing overlapping labels in multi-year or multi-city visualizations was necessary for readability.\n", + "\n", + "#**Future Scope**\n", + "\n", + "Incorporate more recent and multi-year datasets to analyze long-term waste management trends.\n", + "\n", + "Integrate spatial mapping or geographic data to visualize regional waste patterns.\n", + "\n", + "Develop interactive dashboards (e.g., using Plotly or Power BI) to support decision-making and policy evaluation." + ], + "metadata": { + "id": "tO8KlQxuBIi_" + } + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "udGfNkbvBJOW" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From 2b8b6aedd1be9d1e9519245fc6506efd38aba6da Mon Sep 17 00:00:00 2001 From: sahasthhr Date: Sun, 26 Oct 2025 21:55:02 +0530 Subject: [PATCH 3/3] Created using Colab --- ...e_generation_and_disposal_practices_.ipynb | 232 ++++++++++++------ 1 file changed, 159 insertions(+), 73 deletions(-) diff --git a/160124737026_CBIT_IT_1_EDAV_2025_CEP_39_Analyze_urban_municipal_waste_generation_and_disposal_practices_.ipynb b/160124737026_CBIT_IT_1_EDAV_2025_CEP_39_Analyze_urban_municipal_waste_generation_and_disposal_practices_.ipynb index 3635b9b..d7c960c 100644 --- a/160124737026_CBIT_IT_1_EDAV_2025_CEP_39_Analyze_urban_municipal_waste_generation_and_disposal_practices_.ipynb +++ b/160124737026_CBIT_IT_1_EDAV_2025_CEP_39_Analyze_urban_municipal_waste_generation_and_disposal_practices_.ipynb @@ -4,7 +4,6 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyPLegGaufNi7Onh1cxUTEHs", "include_colab_link": true }, "kernelspec": { @@ -67,16 +66,40 @@ "id": "SslhcBFB6Tdm" } }, + { + "cell_type": "markdown", + "source": [ + "#**Q1: Calculate average waste generation per capita (BL-3)**" + ], + "metadata": { + "id": "SoVzjNH2-wvC" + } + }, + { + "cell_type": "markdown", + "source": [ + "The code first identifies the columns containing information about total waste generated and population for each urban body.\n", + "\n", + "Then, it calculates the waste generated per person by dividing the total waste generated by the population for every record in the dataset.\n", + "\n", + "After that, it computes the mean (average) of these per-person values across all rows (or optionally by year).\n", + "\n", + "This gives one combined metric called “Average Waste Generation Per Capita”, which represents the average amount of waste produced by each individual in the dataset or city." + ], + "metadata": { + "id": "_fJ8qoLU_FGu" + } + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 342 + "height": 340 }, "id": "HibBUVg61JTj", - "outputId": "4d11209a-a58b-4dfc-e191-844bbdd1bde7" + "outputId": "fc2fc5b9-1705-46ce-8716-07b1c2a75d64" }, "outputs": [ { @@ -87,9 +110,9 @@ ], "text/html": [ "\n", - " \n", - " \n", + " \n", " Upload widget is only available when the cell has been executed in the\n", " current browser session. Please rerun this cell to enable.\n", " \n", @@ -296,7 +319,7 @@ ], "text/html": [ "\n", - "
\n", + "
\n", "
\n", "