Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix a couple of notebook bugs #9

Merged
merged 2 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 32 additions & 21 deletions jupyterlite/files/examples/Fleiss Kappa.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
},
{
"cell_type": "code",
"source": "# Generic GET call to a JSON endpoint \nasync def get_json(url):\n resp = await fetch(url)\n resp_text = await resp.text()\n return json.loads(resp_text)\n\n",
"source": "async def get_text(url):\n resp = await fetch(url)\n resp_text = await resp.text()\n return resp_text",
"metadata": {
"trusted": true
},
Expand All @@ -77,11 +77,11 @@
},
{
"cell_type": "code",
"source": "data = await get_json(f'/api/export/books/{QUEPID_BOOK_NUM}')",
"source": "data = await get_text(f'/api/books/{QUEPID_BOOK_NUM}.csv')",
"metadata": {
"trusted": true
},
"execution_count": null,
"execution_count": 4,
"outputs": [],
"id": "8fef6231-daa8-467f-ac57-13a144e8a356"
},
Expand All @@ -93,13 +93,23 @@
},
{
"cell_type": "code",
"source": "# Initialize a list to hold the tuples of (doc_id, rating, count)\nratings_data = []\n\n# Iterate through each query-doc pair\nfor pair in data['query_doc_pairs']:\n # Initialize a dictionary to count the ratings for this pair\n ratings_count = defaultdict(int)\n \n # Extract judgements and count the ratings\n for judgement in pair['judgements']:\n rating = judgement['rating']\n ratings_count[rating] += 1\n\n # Append the counts to the ratings_data list\n for rating, count in ratings_count.items():\n ratings_data.append((pair['doc_id'], rating, count))\n",
"source": "from io import StringIO\ndf = pd.read_csv(StringIO(data))\ndf",
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": [],
"id": "9a8561fd-2dbf-477e-9ac1-4df6d5ebdc91"
"execution_count": 5,
"outputs": [
{
"execution_count": 5,
"output_type": "execute_result",
"data": {
"text/plain": " query docid David Tippett Eric Pugh Atita Arora \\\n0 projector screen 325961 3.0 3.0 NaN \n1 projector screen 47471 3.0 3.0 NaN \n2 projector screen 126679 3.0 3.0 NaN \n3 projector screen 254441 NaN 3.0 NaN \n4 projector screen 325958 NaN 3.0 NaN \n... ... ... ... ... ... \n2415 power supply 1667352 NaN 0.0 NaN \n2416 power supply 1667804 NaN 0.0 NaN \n2417 power supply 1667752 NaN 0.0 NaN \n2418 power supply 1667821 NaN 0.0 NaN \n2419 power supply 1667357 NaN 0.0 NaN \n\n Cody Collier Benjamin Trent Jeff Alexander Chris Marino \\\n0 NaN NaN NaN NaN \n1 NaN NaN NaN NaN \n2 NaN NaN NaN NaN \n3 NaN NaN NaN NaN \n4 NaN NaN NaN NaN \n... ... ... ... ... \n2415 NaN NaN NaN NaN \n2416 NaN NaN NaN NaN \n2417 NaN NaN NaN NaN \n2418 NaN NaN NaN NaN \n2419 NaN NaN NaN NaN \n\n [email protected] Michael Froh [email protected] \\\n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN \n... ... ... ... \n2415 NaN NaN NaN \n2416 NaN NaN NaN \n2417 NaN NaN NaN \n2418 NaN NaN NaN \n2419 NaN NaN NaN \n\n Maximilian Werk David Fisher Ryan Finley Erica Schramma Peter Fries \n0 NaN NaN NaN NaN NaN \n1 NaN NaN NaN NaN NaN \n2 NaN NaN NaN NaN NaN \n3 NaN NaN NaN NaN NaN \n4 NaN NaN NaN NaN NaN \n... ... ... ... ... ... \n2415 NaN NaN NaN NaN NaN \n2416 NaN NaN NaN NaN NaN \n2417 NaN NaN NaN NaN NaN \n2418 NaN NaN NaN NaN NaN \n2419 NaN NaN NaN NaN NaN \n\n[2420 rows x 17 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>query</th>\n <th>docid</th>\n <th>David Tippett</th>\n <th>Eric Pugh</th>\n <th>Atita Arora</th>\n <th>Cody Collier</th>\n <th>Benjamin Trent</th>\n <th>Jeff Alexander</th>\n <th>Chris Marino</th>\n <th>[email protected]</th>\n <th>Michael Froh</th>\n <th>[email protected]</th>\n <th>Maximilian Werk</th>\n <th>David Fisher</th>\n <th>Ryan Finley</th>\n <th>Erica Schramma</th>\n <th>Peter Fries</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>projector screen</td>\n <td>325961</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>1</th>\n <td>projector screen</td>\n <td>47471</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2</th>\n <td>projector screen</td>\n <td>126679</td>\n <td>3.0</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3</th>\n <td>projector screen</td>\n <td>254441</td>\n <td>NaN</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>4</th>\n <td>projector screen</td>\n <td>325958</td>\n <td>NaN</td>\n <td>3.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>2415</th>\n <td>power supply</td>\n <td>1667352</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2416</th>\n <td>power supply</td>\n <td>1667804</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2417</th>\n <td>power supply</td>\n <td>1667752</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2418</th>\n <td>power supply</td>\n <td>1667821</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>2419</th>\n <td>power supply</td>\n <td>1667357</td>\n <td>NaN</td>\n <td>0.0</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n<p>2420 rows × 17 columns</p>\n</div>"
},
"metadata": {}
}
],
"id": "3ab80ff9-a9cb-4007-80e6-3d79a8bf762b"
},
{
"cell_type": "markdown",
Expand All @@ -109,13 +119,13 @@
},
{
"cell_type": "code",
"source": "# Convert ratings_data to a DataFrame\ndf = pd.DataFrame(ratings_data, columns=['doc_id', 'rating', 'count'])\n\n# Use crosstab to create a contingency table\ndata_crosstab = pd.crosstab(index=df['doc_id'], columns=df['rating'], values=df['count'], aggfunc='sum')\n\n# Drop any rows missing judgements\ndata_crosstab = data_crosstab.dropna(how='any')\n\n# Convert the DataFrame to the format expected by aggregate_raters\ndata_for_aggregation = data_crosstab.values\n\n# Aggregate the raters' data\ntable, _ = aggregate_raters(data_for_aggregation)",
"source": "# Count the ratings values\nraters = list(df.columns[2:])\ndf['judgments'] = df[raters].values.tolist()\ndf['judgments'] = df['judgments'].apply(lambda x: pd.Series(x).dropna().tolist())\nrated = df[['query', 'docid', 'judgments']].explode('judgments')\nrated['count'] = rated.groupby(['query', 'docid'])['judgments'].transform('count')\n\n# Use crosstab to create a contingency table\ndata_crosstab = pd.crosstab(index=rated['docid'], columns=rated['judgments'], values=rated['count'], aggfunc='sum')\n\n# Drop any rows missing judgements\ndata_crosstab = data_crosstab.dropna(how='any')\n\n# Convert the DataFrame to the format expected by aggregate_raters\ndata_for_aggregation = data_crosstab.values\n\n# Aggregate the raters' data\ntable, _ = aggregate_raters(data_for_aggregation)",
"metadata": {
"trusted": true
},
"execution_count": null,
"execution_count": 6,
"outputs": [],
"id": "a7598308-129b-4628-ad3a-fc3d703f8205"
"id": "b170c198-ef0f-4974-bb4e-4b4311965c1e"
},
{
"cell_type": "markdown",
Expand All @@ -129,23 +139,24 @@
"metadata": {
"trusted": true
},
"execution_count": null,
"outputs": [],
"execution_count": 7,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": "<IPython.core.display.Markdown object>",
"text/markdown": "## Fleiss' Kappa: -0.2632"
},
"metadata": {}
}
],
"id": "25a613f9"
},
{
"cell_type": "markdown",
"source": "_This notebook was last updated 19-FEB-2024_",
"source": "_This notebook was last updated 17-JAN-2025_",
"metadata": {},
"id": "5704579e-2321-4629-8de0-6608b428e2b6"
},
{
"cell_type": "code",
"source": "",
"metadata": {},
"execution_count": null,
"outputs": [],
"id": "7203f6cc-c068-4f75-a59a-1f49c5555319"
}
]
}
Loading
Loading