diff --git a/.gitignore b/.gitignore index 1a6e47c..5d3d8e7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ .DS_Store ~* -.ipynb_checkpoints \ No newline at end of file +.ipynb_checkpoints +/.idea +/.venv +__pycache__ +/gesture_train.csv \ No newline at end of file diff --git a/Live Coding Completo.ipynb b/Live Coding Completo.ipynb index 14f4d5a..9b4dd1e 100644 --- a/Live Coding Completo.ipynb +++ b/Live Coding Completo.ipynb @@ -1,10 +1,10 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", - "id": "b35fe87b", - "metadata": {}, + "metadata": { + "id": "b35fe87b" + }, "source": [ "# Rock - Paper - Scissors - Lizard - Spock\n", "\n", @@ -12,10 +12,7 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "78e5f376", - "metadata": {}, "source": [ "## Task 1\n", "\n", @@ -26,110 +23,169 @@ "* come è possible prendere un input da parte dell'utente\n", "* come è possibile creare una lista con le possibili scelte di gioco\n", "* come è possibile generare la mossa del computer in maniera casuale" - ] + ], + "metadata": { + "collapsed": false + } }, { "cell_type": "code", "execution_count": null, - "id": "6cfa68db", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:23.287013Z", - "start_time": "2023-05-19T12:20:20.860873Z" - } + "id": "6cfa68db" }, "outputs": [], "source": [ - "intero=10\n", - "booleano=True\n", - "numero_float=0.13\n", - "stringa='pycon'\n" + "integer_number = 10\n", + "float_number = 0.13\n", + "boolean = True\n", + "string = \"pycon\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:23.303529Z", - "start_time": "2023-05-19T12:20:20.876923Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 7, + "status": "ok", + "timestamp": 1699709841312, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "DLo0HbU8jwtm", + "outputId": "8fb84091-34ba-4dbd-94ea-3e2ccad4f316" }, "outputs": [], "source": [ - "print('Ciao Mondo :)')" + "print(\"Ciao Mondo :)\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:23.333073Z", - "start_time": "2023-05-19T12:20:20.891457Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 279, + "status": "ok", + "timestamp": 1699709841586, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "0OX9TkPDjwtm", + "outputId": "236a58da-c29b-46cb-8eab-b2e50a15556d" }, "outputs": [], "source": [ - "print(intero)" + "print(integer_number)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:23.334653Z", - "start_time": "2023-05-19T12:20:20.905149Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 5, + "status": "ok", + "timestamp": 1699709841586, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "W70GvccNjwtn", + "outputId": "a87c9464-85b8-4086-a9d4-d5cbceea3b2c" }, "outputs": [], "source": [ - "print(stringa,intero)" + "print(string, integer_number)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:26.414596Z", - "start_time": "2023-05-19T12:20:20.924676Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 2795, + "status": "ok", + "timestamp": 1699709844379, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "sCQu4LkLjwtn", + "outputId": "c5b694ec-2b17-4f51-f402-a6eff4790264" }, "outputs": [], "source": [ - "risultato = input('digita un numero?')" + "risultato = input(\"digita un numero?\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:26.415102Z", - "start_time": "2023-05-19T12:20:26.202688Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 8, + "status": "ok", + "timestamp": 1699709844379, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "Pslrt8Nwjwto", + "outputId": "57c4624b-b1e5-463a-c625-6e40ba2d13ce" }, "outputs": [], "source": [ - "print(risultato)\n" + "print(risultato)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:30.451911Z", - "start_time": "2023-05-19T12:20:26.220803Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 2801, + "status": "ok", + "timestamp": 1699709847174, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "hy51y-owjwto", + "outputId": "a2168225-1f45-49b1-cc33-f2c021a60afa" }, "outputs": [], "source": [ @@ -140,54 +196,55 @@ "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:30.466948Z", - "start_time": "2023-05-19T12:20:30.278604Z" - }, - "collapsed": false + "id": "sO8y1DHqjwto" }, "outputs": [], "source": [ - "import random\n" + "import random" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:30.467949Z", - "start_time": "2023-05-19T12:20:30.295991Z" - }, - "collapsed": false + "id": "NxgXYbZ-jwtp" }, "outputs": [], "source": [ - "\n", "possible_actions = [\"rock\", \"paper\", \"scissors\"]\n", - "computer_action = random.choice(possible_actions)\n" + "computer_action = random.choice(possible_actions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:30.469948Z", - "start_time": "2023-05-19T12:20:30.312305Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 9, + "status": "ok", + "timestamp": 1699709847174, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "EKBwgQm1jwtp", + "outputId": "14137bef-862d-4d35-9cc8-5e7b0fd9b847" }, "outputs": [], "source": [ - "print(f\"\\nYou chose {user_action}, computer chose {computer_action}.\\n\")\n" + "print(f\"\\nYou chose {user_action}, computer chose {computer_action}.\\n\")" ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "f9faebb5", - "metadata": {}, + "metadata": { + "id": "f9faebb5" + }, "source": [ "## Task 2\n", "\n", @@ -197,12 +254,22 @@ { "cell_type": "code", "execution_count": null, - "id": "90af3eea", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:30.469948Z", - "start_time": "2023-05-19T12:20:30.326982Z" - } + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 5, + "status": "ok", + "timestamp": 1699709847174, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "90af3eea", + "outputId": "dd03b6a2-38f4-43e5-9807-ce912ff62e13" }, "outputs": [], "source": [ @@ -226,14 +293,14 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "f9182a9f", - "metadata": {}, + "metadata": { + "id": "f9182a9f" + }, "source": [ "### Task 2a - Ripetiamo le manche di gioco per fare una partita vera e propria\n", "\n", - "In questa cella useremo un **loop** Python (in particolare un ciclo `while`) per **giocare un numero indefinito di manche**. In particolare andremo a ripetere all'interno del ciclo `while` tutto quello che abbiamo fatto finora per la singola manche: \n", + "In questa cella useremo un **loop** Python (in particolare un ciclo `while`) per **giocare un numero indefinito di manche**. In particolare andremo a ripetere all'interno del ciclo `while` tutto quello che abbiamo fatto finora per la singola manche:\n", "- prendere in input dall'utente una scelta\n", "- generare la mossa del computer\n", "- confrontare le mosse\n", @@ -245,12 +312,22 @@ { "cell_type": "code", "execution_count": null, - "id": "47580fd9", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:44.753623Z", - "start_time": "2023-05-19T12:20:30.346046Z" - } + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 4668, + "status": "ok", + "timestamp": 1699709851839, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "47580fd9", + "outputId": "921ac4d0-4b70-40ad-b75a-dd8041c5a911" }, "outputs": [], "source": [ @@ -284,23 +361,23 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "dd16cb05", - "metadata": {}, + "metadata": { + "id": "dd16cb05" + }, "source": [ "## Task 3: Ottimizzazioni nel codice\n", "\n", - "Ora che abbiamo una versione di base del gioco in cui possiamo giocare contro il computer e anche aumentare la durata di una partita, cerchiamo di essere un po'più **pro**. \n", + "Ora che abbiamo una versione di base del gioco in cui possiamo giocare contro il computer e anche aumentare la durata di una partita, cerchiamo di essere un po'più **pro**.\n", "\n", - "Andremo nelle prossime celle ad implementare una serie di ottimizzazioni che serviranno a rendere il nostro codice più manutenibile e leggibile. " + "Andremo nelle prossime celle ad implementare una serie di ottimizzazioni che serviranno a rendere il nostro codice più manutenibile e leggibile." ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "2dfe73b6", - "metadata": {}, + "metadata": { + "id": "2dfe73b6" + }, "source": [ "### Task 3a: Creiamo un enum\n", "\n", @@ -310,12 +387,8 @@ { "cell_type": "code", "execution_count": null, - "id": "0ec721b6", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:44.753623Z", - "start_time": "2023-05-19T12:20:44.647673Z" - } + "id": "0ec721b6" }, "outputs": [], "source": [ @@ -331,24 +404,34 @@ "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:44.756156Z", - "start_time": "2023-05-19T12:20:44.665466Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 10, + "status": "ok", + "timestamp": 1699709851839, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "4BTg9ygOjwtq", + "outputId": "fbfc8ae2-0cfe-42cd-ce09-0426001729cc" }, "outputs": [], "source": [ - "print('Action.Rock == Action.Rock',Action.Rock == Action.Rock)\n", - "print('Action.Rock == Action(0)',Action.Rock == Action(0))\n", - "print('Action(0)',Action(0))" + "print('Action.Rock == Action.Rock', Action.Rock == Action.Rock)\n", + "print('Action.Rock == Action(0)', Action.Rock == Action(0))\n", + "print('Action(0)', Action(0))" ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "92eea47a", - "metadata": {}, + "metadata": { + "id": "92eea47a" + }, "source": [ "### Task 3b: Usiamo delle funzioni per ottimizzare il codice\n", "\n", @@ -362,12 +445,8 @@ { "cell_type": "code", "execution_count": null, - "id": "08c5263b", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:44.756156Z", - "start_time": "2023-05-19T12:20:44.679996Z" - } + "id": "08c5263b" }, "outputs": [], "source": [ @@ -377,7 +456,6 @@ " action = Action(selection)\n", " return action\n", "\n", - "\n", "def get_user_selection():\n", " choices = [f\"{action.name}[{action.value}]\" for action in Action]\n", " choices_str = \", \".join(choices)\n", @@ -390,11 +468,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:44.793367Z", - "start_time": "2023-05-19T12:20:44.696045Z" - }, - "collapsed": false + "id": "hldiDTxqjwtr" }, "outputs": [], "source": [ @@ -408,11 +482,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:44.794876Z", - "start_time": "2023-05-19T12:20:44.715076Z" - }, - "collapsed": false + "id": "eMYFUSiojwtr" }, "outputs": [], "source": [ @@ -437,12 +507,12 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "4ca7428d", - "metadata": {}, + "metadata": { + "id": "4ca7428d" + }, "source": [ - "Una volta create queste funzioni possiamo crearne un'unica che racchiuda tutta la logica di gioco che possiamo invocare (o chiamare) ogni volta che vogliamo iniziare una nuova partita: \n", + "Una volta create queste funzioni possiamo crearne un'unica che racchiuda tutta la logica di gioco che possiamo invocare (o chiamare) ogni volta che vogliamo iniziare una nuova partita:\n", "\n", "- `start_game()`\n" ] @@ -450,12 +520,8 @@ { "cell_type": "code", "execution_count": null, - "id": "8ebfa484", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:44.794876Z", - "start_time": "2023-05-19T12:20:44.724834Z" - } + "id": "8ebfa484" }, "outputs": [], "source": [ @@ -463,7 +529,7 @@ " while True:\n", " try:\n", " user_action = get_user_selection()\n", - " except ValueError as e:\n", + " except ValueError:\n", " range_str = f\"[0, {len(Action) - 1}]\"\n", " print(f\"Invalid selection. Enter a value in range {range_str}\")\n", " continue\n", @@ -480,11 +546,21 @@ "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:50.862916Z", - "start_time": "2023-05-19T12:20:44.741502Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 11137, + "status": "ok", + "timestamp": 1699709862968, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "I3BM0Z4Kjwtr", + "outputId": "a102b536-8581-40b3-9aed-3e9eea5c921d" }, "outputs": [], "source": [ @@ -492,10 +568,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "6955e8bc", - "metadata": {}, + "metadata": { + "id": "6955e8bc" + }, "source": [ "### Task 3c: Creiamo un dizionario con le mosse vincenti\n", "\n", @@ -507,12 +583,8 @@ { "cell_type": "code", "execution_count": null, - "id": "96893f8d", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:50.864487Z", - "start_time": "2023-05-19T12:20:50.756391Z" - } + "id": "96893f8d" }, "outputs": [], "source": [ @@ -524,10 +596,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "c3205d3c", - "metadata": {}, + "metadata": { + "id": "c3205d3c" + }, "source": [ "### Task 3d: Usiamo il dizionario e l'operatore `in` per semplificare i controlli" ] @@ -535,12 +607,8 @@ { "cell_type": "code", "execution_count": null, - "id": "dce77a8a", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:50.864487Z", - "start_time": "2023-05-19T12:20:50.774458Z" - } + "id": "dce77a8a" }, "outputs": [], "source": [ @@ -559,11 +627,21 @@ "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:56.122073Z", - "start_time": "2023-05-19T12:20:50.794541Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 11858, + "status": "ok", + "timestamp": 1699709874816, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "cv6L7ahtjwtr", + "outputId": "609f516f-00c3-42ae-ba9e-1496fc56cae8" }, "outputs": [], "source": [ @@ -571,10 +649,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "ee1b1835", - "metadata": {}, + "metadata": { + "id": "ee1b1835" + }, "source": [ "### Task 3e: Aggiungiamo le altre mosse: `lizard` e `spock`\n", "\n", @@ -584,12 +662,8 @@ { "cell_type": "code", "execution_count": null, - "id": "60f6e2d5", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:56.122073Z", - "start_time": "2023-05-19T12:20:56.018470Z" - } + "id": "60f6e2d5" }, "outputs": [], "source": [ @@ -610,14 +684,14 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "67881ce8", - "metadata": {}, + "metadata": { + "id": "67881ce8" + }, "source": [ - "### Task 3f: Rendiamo più *catchy* il gioco tramite ASCII art \n", + "### Task 3f: Rendiamo più *catchy* il gioco tramite ASCII art\n", "\n", - "Creeremo due nuovi dizionari: \n", + "Creeremo due nuovi dizionari:\n", "- in `ascii_action` metteremo le ascii art delle mosse\n", "- in `ascii_results` metteremo le ascii art dei possibili risultati" ] @@ -625,12 +699,8 @@ { "cell_type": "code", "execution_count": null, - "id": "4422813d", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:56.182207Z", - "start_time": "2023-05-19T12:20:56.037218Z" - } + "id": "4422813d" }, "outputs": [], "source": [ @@ -682,11 +752,13 @@ " \"\"\"\n", "}\n", "\n", - "COMPUTER_WIN=-1\n", - "HUMAN_WIN=1\n", - "DROW=0\n", + "class Result(IntEnum):\n", + " COMPUTER_WIN = -1\n", + " HUMAN_WIN = 1\n", + " DRAW = 0\n", + "\n", "ascii_result = {\n", - " COMPUTER_WIN: r\"\"\"\n", + " Result.COMPUTER_WIN: r\"\"\"\n", " _____ ________ _________ _ _ _____ ___________\n", "/ __ \\ _ | \\/ || ___ \\ | | |_ _| ___| ___ \\\\\n", "| / \\/ | | | . . || |_/ / | | | | | | |__ | |_/ /\n", @@ -703,7 +775,7 @@ " \\/ \\/ \\___/\\_| \\_/\\____/ (_|_|_)\n", "\n", " \"\"\",\n", - " HUMAN_WIN: r\"\"\"\n", + " Result.HUMAN_WIN: r\"\"\"\n", " _ _ _ ____ ___ ___ _ _\n", "| | | | | | | \\/ | / _ \\ | \\ | |\n", "| |_| | | | | . . |/ /_\\ \\| \\| |\n", @@ -728,7 +800,7 @@ "(_|_|_)_| \\___/|_| |_| |_|\\___/ \\_/\\_/ (_|_|_)\n", "\n", " \"\"\",\n", - " DROW: r\"\"\"\n", + " Result.DRAW: r\"\"\"\n", " _ _ _\n", " | | (_) | |\n", " __ _ | |_ _ ___ __| | __ _ __ _ _ __ ___ ___\n", @@ -749,12 +821,12 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "75f2c6d5", - "metadata": {}, + "metadata": { + "id": "75f2c6d5" + }, "source": [ - "Dopodichè creeremo due funzioni per visualizzare agevolmente azioni e risultati in ASCII art: \n", + "Dopodichè creeremo due funzioni per visualizzare agevolmente azioni e risultati in ASCII art:\n", "- `display_action`\n", "- `display_results`" ] @@ -762,12 +834,8 @@ { "cell_type": "code", "execution_count": null, - "id": "c96921e2", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:56.182207Z", - "start_time": "2023-05-19T12:20:56.052294Z" - } + "id": "c96921e2" }, "outputs": [], "source": [ @@ -782,11 +850,21 @@ "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:56.183207Z", - "start_time": "2023-05-19T12:20:56.064342Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 15, + "status": "ok", + "timestamp": 1699709874816, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "18tMjb3sjwts", + "outputId": "24de5258-e6d3-4a97-8e09-17f6659485f3" }, "outputs": [], "source": [ @@ -794,10 +872,10 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "c1b643e3", - "metadata": {}, + "metadata": { + "id": "c1b643e3" + }, "source": [ "Per usare queste funzioni dovremo modificare anche la funzione `determine_winner`" ] @@ -805,12 +883,8 @@ { "cell_type": "code", "execution_count": null, - "id": "252c6e45", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:20:56.184209Z", - "start_time": "2023-05-19T12:20:56.081582Z" - } + "id": "252c6e45" }, "outputs": [], "source": [ @@ -821,25 +895,35 @@ " display_action(computer_action)\n", " defeats = victories[user_action]\n", " if user_action == computer_action:\n", - " display_result(DROW)\n", - " return DROW\n", + " display_result(Result.DRAW)\n", + " return Result.DRAW\n", " elif computer_action in defeats:\n", - " display_result(HUMAN_WIN)\n", - " return HUMAN_WIN\n", + " display_result(Result.HUMAN_WIN)\n", + " return Result.HUMAN_WIN\n", " else:\n", - " display_result(COMPUTER_WIN)\n", - " return COMPUTER_WIN" + " display_result(Result.COMPUTER_WIN)\n", + " return Result.HUMAN_WIN" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:21:01.057176Z", - "start_time": "2023-05-19T12:20:56.101255Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 4400, + "status": "ok", + "timestamp": 1699709879202, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "q_y93jZFjwtv", + "outputId": "9a6ba887-29e9-4baa-840d-d98ea434c6d0" }, "outputs": [], "source": [ @@ -847,83 +931,96 @@ ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "5e478a36", - "metadata": {}, + "metadata": { + "id": "5e478a36" + }, "source": [ "### Conserviamo i punteggi ottenuti manche per manche dagli utenti\n", "\n", - "Non ci accontenteremo più solo dei messaggi di vittoria della singola manche. Vogliamo proprio fare una partita per capire chi vince fra utente e computer dopo N manche. Ora possiamo fare una vera e propria partita contro il computer e decidere quando finirla! " + "Non ci accontenteremo più solo dei messaggi di vittoria della singola manche. Vogliamo proprio fare una partita per capire chi vince fra utente e computer dopo N manche. Ora possiamo fare una vera e propria partita contro il computer e decidere quando finirla!" ] }, { "cell_type": "code", "execution_count": null, - "id": "ec5766f6", + "outputs": [], + "source": [], "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:21:01.096343Z", - "start_time": "2023-05-19T12:21:01.055176Z" - } + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ec5766f6" }, "outputs": [], "source": [ "def print_game_results(game_results):\n", - " num_tied = game_results.count(DROW)/len(game_results)*100\n", - " num_player_wins = game_results.count(HUMAN_WIN)/len(game_results)*100\n", - " num_computer_wins =game_results.count(COMPUTER_WIN)/len(game_results)*100\n", - "\n", - " print( 'There were ', num_tied, '% tied games', \"\\nthe player won \", num_player_wins, '% of games\\nthe computer won ', num_computer_wins, '% of games\\nin a total of ', len(game_results), ' games')\n", + " num_tied = game_results.count(Result.DRAW) / len(game_results) * 100\n", + " num_player_wins = game_results.count(Result.HUMAN_WIN) / len(game_results) * 100\n", + " num_computer_wins = game_results.count(Result.COMPUTER_WIN) / len(game_results) * 100\n", + " print(f\"There were {num_tied}% tied games\\nthe player won {num_player_wins}% of games\\nthe computer won {num_computer_wins}% of games\\nin a total of {len(game_results)} games\")\n", "\n", "def start_game(num_games=1):\n", - " game_results=[]\n", + " game_results = []\n", " counter=0\n", " while True:\n", " try:\n", " user_action = get_user_selection()\n", - " except ValueError as e:\n", + " except ValueError:\n", " range_str = f\"[0, {len(Action) - 1}]\"\n", " print(f\"Invalid selection. Enter a value in range {range_str}\")\n", " continue\n", "\n", " computer_action = get_computer_selection()\n", " game_results.append(determine_winner(user_action, computer_action))\n", - " counter+=1\n", + " counter += 1\n", "\n", - " if counter>=num_games:\n", + " if counter >= num_games:\n", " break\n", " print_game_results(game_results)\n", - " return game_results\n", - "\n" + " return game_results" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:21:09.040634Z", - "start_time": "2023-05-19T12:21:01.070699Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 4689, + "status": "ok", + "timestamp": 1699709883885, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "lcLQZxy3jwtw", + "outputId": "15a53e1a-4624-4a0a-e604-2b687f656fef" }, "outputs": [], "source": [ - "game_results=start_game(5)" + "game_results = start_game(5)" ] }, { - "attachments": {}, "cell_type": "markdown", - "id": "aae3e00d", - "metadata": {}, + "metadata": { + "id": "aae3e00d" + }, "source": [ "### Utilizziamo un'interfaccia grafica!\n", "\n", "Nella cella successiva andremo ad utilizzare una feature di Jupyter che ci consente di creare al volo un menu a tendina (dopotutto questa è una pagina HTML, no?) e di associare un comportamento alla scelta della voce dal menu!\n", "\n", - "Concetti connessi: \n", + "Concetti connessi:\n", "- list comprehension\n", "- `widgets.Dropdown`" ] @@ -931,498 +1028,116 @@ { "cell_type": "code", "execution_count": null, - "id": "a6b41097", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:21:09.162321Z", - "start_time": "2023-05-19T12:21:08.955141Z" - } - }, - "outputs": [], - "source": [ - "import ipywidgets as widgets\n", - "options=[(action.name,action.value) for action in Action]\n", - "menu = widgets.Dropdown(\n", - " options=options ,\n", - " description='Chose:')\n", - "output = widgets.Output(layout={'border': '1px solid black'})\n", - "\n", - "def on_button_clicked(b):\n", - " output.clear_output()\n", - " with output:\n", - " computer_action = get_computer_selection()\n", - " determine_winner(Action(menu.value), computer_action)\n", - "\n", - "button = widgets.Button(description=\"Play!\", button_style='success', icon='check')\n", - "button.on_click(on_button_clicked)\n", - "box = widgets.VBox([menu, button, output])\n", - "\n", - "display(box)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "d95e966e", - "metadata": {}, - "source": [ - "## Time to use ML!\n", - "\n", - "Nelle celle successive andremo ad utilizzare il Machine Learning per addestrare un modello predittivo in grado di dedurre la mossa dell'utente a partire dall'inquadratura della mano ottenuta con la webcam.\n", - "\n", - "Installiamo le librerie necessarie e importiamole:" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "2065bc78", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "735944ea", - "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:26:06.922965Z", - "start_time": "2023-05-19T12:25:54.947034Z" - } - }, - "outputs": [], - "source": [ - "!pip install numpy\n", - "!pip install opencv-python\n", - "!pip install mediapipe\n", - "!pip install requests" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:26:10.333815Z", - "start_time": "2023-05-19T12:26:10.296111Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 15258, + "status": "ok", + "timestamp": 1699709899132, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "yE1oRjMn5G0f", + "outputId": "fe2da3a7-4310-44a5-c77d-560c2e9739e5" }, "outputs": [], "source": [ - "import numpy as np\n", - "import mediapipe as mp\n", - "import cv2" + "!pip install gdown numpy opencv-python mediapipe requests" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:26:11.205558Z", - "start_time": "2023-05-19T12:26:10.759149Z" + "colab": { + "base_uri": "https://localhost:8080/" }, - "collapsed": false + "executionInfo": { + "elapsed": 951, + "status": "ok", + "timestamp": 1699709900079, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "uddtBN5Hj0mh", + "outputId": "5aaf0849-e0d3-48be-fa9e-35bf7473506f" }, "outputs": [], "source": [ - "import requests\n", - "url = \"https://raw.githubusercontent.com/ntu-rris/google-mediapipe/main/data/gesture_train.csv\"\n", - "\n", - "# If repo is private - we need to add a token in header:\n", - "\n", - "\n", - "resp = requests.get(url)\n", - "\n", - "with open('./gesture_train.csv', 'wb') as f:\n", - " f.write(resp.content)\n" + "!gdown 1G9WKV8BbGFx5JySQoRQrZ3Y8c_Lg9q3N" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:27:07.433893Z", - "start_time": "2023-05-19T12:27:07.331888Z" + "colab": { + "base_uri": "https://localhost:8080/", + "height": 847 }, - "collapsed": false + "executionInfo": { + "elapsed": 7, + "status": "ok", + "timestamp": 1699709900079, + "user": { + "displayName": "Moreno Mazzocchetti", + "userId": "18050587376363424786" + }, + "user_tz": -60 + }, + "id": "a6b41097", + "outputId": "aadb9b9b-abec-4391-fe5b-93f908d710c6" }, "outputs": [], "source": [ - "# Define default camera intrinsic\n", - "img_width = 640\n", - "img_height = 480\n", - "intrin_default = {\n", - " 'fx': img_width*0.9, # Approx 0.7w < f < w https://www.learnopencv.com/approximate-focal-length-for-webcams-and-cell-phone-cameras/\n", - " 'fy': img_width*0.9,\n", - " 'cx': img_width*0.5, # Approx center of image\n", - " 'cy': img_height*0.5,\n", - " 'width': img_width,\n", - "}\n", - "class GestureRecognition:\n", - " def __init__(self):\n", - "\n", - " # 11 types of gesture 'name':class label\n", - " self.gesture = {\n", - " 'fist':0,'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,\n", - " 'rock':7,'spiderman':8,'yeah':9,'ok':10,\n", - " }\n", - "\n", - " # Load training data\n", - " file = np.genfromtxt('./gesture_train.csv', delimiter=',')\n", - " # Extract input joint angles\n", - " angle = file[:,:-1].astype(np.float32)\n", - " # Extract output class label\n", - " label = file[:, -1].astype(np.float32)\n", - " # Use OpenCV KNN\n", - " self.knn = cv2.ml.KNearest_create()\n", - " self.knn.train(angle, cv2.ml.ROW_SAMPLE, label)\n", - "\n", - "\n", - "\n", - " def eval(self, angle):\n", - " # Use KNN for gesture recognition\n", - " data = np.asarray([angle], dtype=np.float32)\n", - " ret, results, neighbours ,dist = self.knn.findNearest(data, 3)\n", - " idx = int(results[0][0]) # Index of class label\n", - "\n", - " return list(self.gesture)[idx] # Return name of class label\n", - "\n", - "\n", - "class MediaPipeHand:\n", - " def __init__(self, static_image_mode=True, max_num_hands=1,\n", - " model_complexity=1, intrin=None):\n", - " self.max_num_hands = max_num_hands\n", - " if intrin is None:\n", - " self.intrin = intrin_default\n", - " else:\n", - " self.intrin = intrin\n", - "\n", - " # Access MediaPipe Solutions Python API\n", - " mp_hands = mp.solutions.hands\n", - " # help(mp_hands.Hands)\n", - "\n", - " # Initialize MediaPipe Hands\n", - " # static_image_mode:\n", - " # For video processing set to False:\n", - " # Will use previous frame to localize hand to reduce latency\n", - " # For unrelated images set to True:\n", - " # To allow hand detection to run on every input images\n", - "\n", - " # max_num_hands:\n", - " # Maximum number of hands to detect\n", - "\n", - " # model_complexity:\n", - " # Complexity of the hand landmark model: 0 or 1.\n", - " # Landmark accuracy as well as inference latency generally\n", - " # go up with the model complexity. Default to 1.\n", - "\n", - " # min_detection_confidence:\n", - " # Confidence value [0,1] from hand detection model\n", - " # for detection to be considered successful\n", - "\n", - " # min_tracking_confidence:\n", - " # Minimum confidence value [0,1] from landmark-tracking model\n", - " # for hand landmarks to be considered tracked successfully,\n", - " # or otherwise hand detection will be invoked automatically on the next input image.\n", - " # Setting it to a higher value can increase robustness of the solution,\n", - " # at the expense of a higher latency.\n", - " # Ignored if static_image_mode is true, where hand detection simply runs on every image.\n", - "\n", - " self.pipe = mp_hands.Hands(\n", - " static_image_mode=static_image_mode,\n", - " max_num_hands=max_num_hands,\n", - " model_complexity=model_complexity,\n", - " min_detection_confidence=0.5,\n", - " min_tracking_confidence=0.5)\n", - "\n", - " # Define hand parameter\n", - " self.param = []\n", - " for i in range(max_num_hands):\n", - " p = {\n", - " 'keypt' : np.zeros((21,2)), # 2D keypt in image coordinate (pixel)\n", - " 'joint' : np.zeros((21,3)), # 3D joint in camera coordinate (m)\n", - " 'class' : None, # Left / right / none hand\n", - " 'score' : 0, # Probability of predicted handedness (always>0.5, and opposite handedness=1-score)\n", - " 'angle' : np.zeros(15), # Flexion joint angles in degree\n", - " 'gesture' : None, # Type of hand gesture\n", - " 'rvec' : np.zeros(3), # Global rotation vector Note: this term is only used for solvepnp initialization\n", - " 'tvec' : np.asarray([0,0,0.6]), # Global translation vector (m) Note: Init z direc to some +ve dist (i.e. in front of camera), to prevent solvepnp from wrongly estimating z as -ve\n", - " 'fps' : -1, # Frame per sec\n", - " # https://github.com/google/mediapipe/issues/1351\n", - " # 'visible' : np.zeros(21), # Visibility: Likelihood [0,1] of being visible (present and not occluded) in the image\n", - " # 'presence': np.zeros(21), # Presence: Likelihood [0,1] of being present in the image or if its located outside the image\n", - " }\n", - " self.param.append(p)\n", - "\n", - "\n", - " def result_to_param(self, result, img):\n", - " # Convert mediapipe result to my own param\n", - " img_height, img_width, _ = img.shape\n", - "\n", - " # Reset param\n", - " for p in self.param:\n", - " p['class'] = None\n", - "\n", - " if result.multi_hand_landmarks is not None:\n", - " # Loop through different hands\n", - " for i, res in enumerate(result.multi_handedness):\n", - " if i>self.max_num_hands-1: break # Note: Need to check if exceed max number of hand\n", - " self.param[i]['class'] = res.classification[0].label\n", - " self.param[i]['score'] = res.classification[0].score\n", - "\n", - " # Loop through different hands\n", - " for i, res in enumerate(result.multi_hand_landmarks):\n", - " if i>self.max_num_hands-1: break # Note: Need to check if exceed max number of hand\n", - " # Loop through 21 landmark for each hand\n", - " for j, lm in enumerate(res.landmark):\n", - " self.param[i]['keypt'][j,0] = lm.x * img_width # Convert normalized coor to pixel [0,1] -> [0,width]\n", - " self.param[i]['keypt'][j,1] = lm.y * img_height # Convert normalized coor to pixel [0,1] -> [0,height]\n", - "\n", - " # Ignore it https://github.com/google/mediapipe/issues/1320\n", - " # self.param[i]['visible'][j] = lm.visibility\n", - " # self.param[i]['presence'][j] = lm.presence\n", - "\n", - " if result.multi_hand_world_landmarks is not None:\n", - " for i, res in enumerate(result.multi_hand_world_landmarks):\n", - " if i>self.max_num_hands-1: break # Note: Need to check if exceed max number of hand\n", - " # Loop through 21 landmark for each hand\n", - " for j, lm in enumerate(res.landmark):\n", - " self.param[i]['joint'][j,0] = lm.x\n", - " self.param[i]['joint'][j,1] = lm.y\n", - " self.param[i]['joint'][j,2] = lm.z\n", - "\n", - " # Convert relative 3D joint to angle\n", - " self.param[i]['angle'] = self.convert_joint_to_angle(self.param[i]['joint'])\n", - " # Convert relative 3D joint to camera coordinate\n", - " self.convert_joint_to_camera_coor(self.param[i], self.intrin)\n", - "\n", - " return self.param\n", - "\n", - "\n", - " def convert_joint_to_angle(self, joint):\n", - " # Get direction vector of bone from parent to child\n", - " v1 = joint[[0,1,2,3,0,5,6,7,0,9,10,11,0,13,14,15,0,17,18,19],:] # Parent joint\n", - " v2 = joint[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],:] # Child joint\n", - " v = v2 - v1 # [20,3]\n", - " # Normalize v\n", - " v = v/np.linalg.norm(v, axis=1)[:, np.newaxis]\n", - "\n", - " # Get angle using arcos of dot product\n", - " angle = np.arccos(np.einsum('nt,nt->n',\n", - " v[[0,1,2,4,5,6,8,9,10,12,13,14,16,17,18],:],\n", - " v[[1,2,3,5,6,7,9,10,11,13,14,15,17,18,19],:])) # [15,]\n", - "\n", - " return np.degrees(angle) # Convert radian to degree\n", - "\n", - "\n", - " def convert_joint_to_camera_coor(self, param, intrin, use_solvepnp=True):\n", - " # MediaPipe version 0.8.9.1 onwards:\n", - " # Given real-world 3D joint centered at middle MCP joint -> J_origin\n", - " # To estimate the 3D joint in camera coordinate -> J_camera = J_origin + tvec,\n", - " # We need to find the unknown translation vector -> tvec = [tx,ty,tz]\n", - " # Such that when J_camera is projected to the 2D image plane\n", - " # It matches the 2D keypoint locations\n", - "\n", - " # Considering all 21 keypoints,\n", - " # Each keypoints will form 2 eq, in total we have 42 eq 3 unknowns\n", - " # Since the equations are linear wrt [tx,ty,tz]\n", - " # We can solve the unknowns using linear algebra A.x = b, where x = [tx,ty,tz]\n", - "\n", - " # Consider a single keypoint (pixel x) and joint (X,Y,Z)\n", - " # Using the perspective projection eq:\n", - " # (x - cx)/fx = (X + tx) / (Z + tz)\n", - " # Similarly for pixel y:\n", - " # (y - cy)/fy = (Y + ty) / (Z + tz)\n", - " # Rearranging the above linear equations by keeping constants to the right hand side:\n", - " # fx.tx - (x - cx).tz = -fx.X + (x - cx).Z\n", - " # fy.ty - (y - cy).tz = -fy.Y + (y - cy).Z\n", - " # Therefore, we can factor out the unknowns and form a matrix eq:\n", - " # [fx 0 (x - cx)][tx] [-fx.X + (x - cx).Z]\n", - " # [ 0 fy (y - cy)][ty] = [-fy.Y + (y - cy).Z]\n", - " # [tz]\n", - "\n", - " idx = [i for i in range(21)] # Use all landmarks\n", - "\n", - " if use_solvepnp:\n", - " # Method 1: OpenCV solvePnP\n", - " fx, fy = intrin['fx'], intrin['fy']\n", - " cx, cy = intrin['cx'], intrin['cy']\n", - " intrin_mat = np.asarray([[fx,0,cx],[0,fy,cy],[0,0,1]])\n", - " dist_coeff = np.zeros(4)\n", - "\n", - " ret, param['rvec'], param['tvec'] = cv2.solvePnP(\n", - " param['joint'][idx], param['keypt'][idx],\n", - " intrin_mat, dist_coeff, param['rvec'], param['tvec'],\n", - " useExtrinsicGuess=True)\n", - " # Add tvec to all joints\n", - " param['joint'] += param['tvec']\n", - "\n", - " else:\n", - " # Method 2:\n", - " A = np.zeros((len(idx),2,3))\n", - " b = np.zeros((len(idx),2))\n", - "\n", - " A[:,0,0] = intrin['fx']\n", - " A[:,1,1] = intrin['fy']\n", - " A[:,0,2] = -(param['keypt'][idx,0] - intrin['cx'])\n", - " A[:,1,2] = -(param['keypt'][idx,1] - intrin['cy'])\n", - "\n", - " b[:,0] = -intrin['fx'] * param['joint'][idx,0] \\\n", - " + (param['keypt'][idx,0] - intrin['cx']) * param['joint'][idx,2]\n", - " b[:,1] = -intrin['fy'] * param['joint'][idx,1] \\\n", - " + (param['keypt'][idx,1] - intrin['cy']) * param['joint'][idx,2]\n", + "from support import *\n", "\n", - " A = A.reshape(-1,3) # [8,3]\n", - " b = b.flatten() # [8]\n", + "options=[(action.name, action.value) for action in Action]\n", + "output, button, box, menu = create_dropdown(options)\n", "\n", - " # Use the normal equation AT.A.x = AT.b to minimize the sum of the sq diff btw left and right sides\n", - " x = np.linalg.solve(A.T @ A, A.T @ b)\n", - " # Add tvec to all joints\n", - " param['joint'] += x\n", - "\n", - "\n", - "\n", - " def forward(self, img):\n", - "\n", - " # Extract result\n", - " result = self.pipe.process(img)\n", - "\n", - " # Convert result to my own param\n", - " param = self.result_to_param(result, img)\n", - "\n", - " return param\n", + "def on_button_clicked(b):\n", + " output.clear_output()\n", + " with output:\n", + " computer_action = get_computer_selection()\n", + " determine_winner(Action(menu.value), computer_action)\n", "\n", + "button.on_click(on_button_clicked)\n", "\n", - "\n" + "display(box)" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:27:09.195732Z", - "start_time": "2023-05-19T12:27:09.078715Z" - }, - "collapsed": false + "id": "d95e966e" }, - "outputs": [], "source": [ - "import io\n", - "\n", - "try:\n", - " from google.colab.output import eval_js\n", - " colab = True\n", - "except:\n", - " colab = False\n", - "\n", - "# colab=False\n", - "\n", - "if colab:\n", - " from IPython.display import display, Javascript\n", - " from google.colab.output import eval_js\n", - " from base64 import b64decode\n", - " from PIL import Image as PIL_Image\n", - "\n", - "\n", - " def take_photo(quality=0.8):\n", - " js = Javascript('''\n", - " async function takePhoto(quality) {\n", - " const div = document.createElement('div');\n", - " const capture = document.createElement('button');\n", - " capture.textContent = 'Capture';\n", - " div.appendChild(capture);\n", - "\n", - " const video = document.createElement('video');\n", - " video.style.display = 'block';\n", - " const stream = await navigator.mediaDevices.getUserMedia({video: true});\n", - "\n", - " document.body.appendChild(div);\n", - " div.appendChild(video);\n", - " video.srcObject = stream;\n", - " await video.play();\n", - "\n", - " // Resize the output to fit the video element.\n", - " google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);\n", - "\n", - " // Wait for Capture to be clicked.\n", - " await new Promise((resolve) => capture.onclick = resolve);\n", - "\n", - " const canvas = document.createElement('canvas');\n", - " canvas.width = video.videoWidth;\n", - " canvas.height = video.videoHeight;\n", - " canvas.getContext('2d').drawImage(video, 0, 0);\n", - " stream.getVideoTracks()[0].stop();\n", - " div.remove();\n", - " return canvas.toDataURL('image/jpeg', quality);\n", - " }\n", - " ''')\n", - " display(js)\n", - " data = eval_js('takePhoto({})'.format(quality))\n", - " binary = b64decode(data.split(',')[1])\n", - "\n", - "\n", - " image = PIL_Image.open(io.BytesIO(binary))\n", - " image_np = np.array(image)\n", - "\n", - " # with open(filename, 'wb') as f:\n", - " # f.write(binary)\n", - " return image_np\n", - "else:\n", - " import cv2\n", - " def take_photo(filename='photo.jpg', quality=0.8):\n", - " cam = cv2.VideoCapture(0)\n", - "\n", - " cv2.namedWindow(\"test\")\n", - "\n", - " img_counter = 0\n", - "\n", - " while True:\n", - " ret, frame = cam.read()\n", - " # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)\n", - " if not ret:\n", - " print(\"failed to grab frame\")\n", - " break\n", - " cv2.imshow(\"test\", frame)\n", - "\n", - " k = cv2.waitKey(1)\n", - " if k%256 == 27 or k%256 == 32 :\n", - " # ESC pressed\n", - " break\n", - "\n", - " cam.release()\n", - "\n", - " cv2.destroyAllWindows()\n", - "\n", - " # Preprocess image\n", - " img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n", - " # Flip image for 3rd person view\n", - " img = cv2.flip(img, 1)\n", + "## Time to use ML!\n", "\n", - " # To improve performance, optionally mark image as not writeable to pass by reference\n", - " img.flags.writeable = False\n", + "Nelle celle successive andremo ad utilizzare il Machine Learning per addestrare un modello predittivo in grado di dedurre la mossa dell'utente a partire dall'inquadratura della mano ottenuta con la webcam.\n", "\n", - " return img" + "Installiamo le librerie necessarie e importiamole:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:27:10.726845Z", - "start_time": "2023-05-19T12:27:10.611796Z" - }, - "collapsed": false + "id": "3CCoidG5jwtx" }, "outputs": [], "source": [ "def start_game(num_games=1):\n", - " game_results=[]\n", - " counter=0\n", + " game_results = []\n", + " counter = 0\n", " # Load mediapipe hand class\n", " pipe = MediaPipeHand(static_image_mode=True, max_num_hands=1)\n", " # Load gesture recognition class\n", @@ -1430,36 +1145,29 @@ " while True:\n", " try:\n", " img = take_photo()\n", - "\n", - " # # Show the image which was just taken.\n", - " # plt.imshow(img)\n", - " # Feedforward to extract keypoint\n", " param = pipe.forward(img)\n", " # Evaluate gesture for all hands\n", "\n", " for p in param:\n", " if p['class'] is not None:\n", " p['gesture'] = gest.eval(p['angle'])\n", - " # print(p['class'])\n", - " # print(p['gesture'])\n", - "\n", - " if p['gesture']=='fist':\n", + " action = None\n", + " if p['gesture'] == 'fist':\n", " action = Action.Rock\n", - " elif p['gesture']=='five':\n", + " elif p['gesture'] == 'five':\n", " action = Action.Paper\n", - " elif (p['gesture']=='three') or (p['gesture']=='yeah'):\n", + " elif p['gesture'] == 'three' or p['gesture'] == 'yeah':\n", " action = Action.Scissors\n", - " elif (p['gesture']=='rock') :\n", + " elif p['gesture'] == 'rock':\n", " action = Action.Lizard\n", - " elif (p['gesture']=='four'):\n", + " elif p['gesture'] == 'four':\n", " action = Action.Spock\n", " if action is not None:\n", " computer_action = get_computer_selection()\n", " game_results.append(determine_winner(action, computer_action))\n", - " counter+=1\n", + " counter += 1\n", " print_game_results(game_results)\n", - " old_action=action\n", - "\n", + " break\n", " if counter>=num_games:\n", " break\n", " except Exception as err:\n", @@ -1475,19 +1183,24 @@ "cell_type": "code", "execution_count": null, "metadata": { - "ExecuteTime": { - "end_time": "2023-05-19T12:27:39.871557Z", - "start_time": "2023-05-19T12:27:12.133115Z" + "colab": { + "background_save": true, + "base_uri": "https://localhost:8080/", + "height": 17 }, - "collapsed": false + "id": "biwLyBjhjwtx" }, "outputs": [], "source": [ - "start_game(num_games=5)" + "start_game(num_games=3)" ] } ], "metadata": { + "colab": { + "name": "", + "version": "" + }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", @@ -1504,6 +1217,390 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "044c8c12d53c4f80b02a195ee1b10ded": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DropdownModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DropdownModel", + "_options_labels": [ + "Rock", + "Paper", + "Scissors", + "Lizard", + "Spock" + ], + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "DropdownView", + "description": "Chose:", + "description_tooltip": null, + "disabled": false, + "index": 0, + "layout": "IPY_MODEL_249fd3223b024434870c8581f5221f70", + "style": "IPY_MODEL_38d57a4f87494825b29f284516a9c312" + } + }, + "249fd3223b024434870c8581f5221f70": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "38d57a4f87494825b29f284516a9c312": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "67200cd7f8924627aa18cc81af0a478d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": "1px solid black", + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9181989fa1c943b895c8d8920b57c0d4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bce58963af05485d9e28cc4836e298c7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "VBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_044c8c12d53c4f80b02a195ee1b10ded", + "IPY_MODEL_f698ce0c10b946beb5563253752f3f6b", + "IPY_MODEL_debad6f6bc9c4066bbb98c12cf703d26" + ], + "layout": "IPY_MODEL_9181989fa1c943b895c8d8920b57c0d4" + } + }, + "debad6f6bc9c4066bbb98c12cf703d26": { + "model_module": "@jupyter-widgets/output", + "model_module_version": "1.0.0", + "model_name": "OutputModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_67200cd7f8924627aa18cc81af0a478d", + "msg_id": "", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You chose\n", + "\n", + " ______ _\n", + " | ___ \\ | |\n", + " | |_/ /___ ___| | __\n", + " | // _ \\ / __| |/ /\n", + " | |\\ \\ (_) | (__| <\n", + " \\_| \\_\\___/ \\___|_|\\_\\\n", + "\n", + " \n", + "The computer chose\n", + "\n", + " _____ _\n", + " / ___| (_)\n", + " \\ `--. ___ _ ___ ___ ___ _ __ ___\n", + " `--. \\/ __| / __/ __|/ _ \\| '__/ __|\n", + " /\\__/ / (__| \\__ \\__ \\ (_) | | \\__ \\\\\n", + " \\____/ \\___|_|___/___/\\___/|_| |___/\n", + " \n", + "\n", + " _ _ _ ____ ___ ___ _ _\n", + "| | | | | | | \\/ | / _ \\ | \\ | |\n", + "| |_| | | | | . . |/ /_\\ \\| \\| |\n", + "| _ | | | | |\\/| || _ || . ` |\n", + "| | | | |_| | | | || | | || |\\ |\n", + "\\_| |_/\\___/\\_| |_/\\_| |_/\\_| \\_/\n", + "\n", + "\n", + " _ _ _____ _ _ _____ _ _ _\n", + "| | | |_ _| \\ | |/ ___| | | | |\n", + "| | | | | | | \\| |\\ `--. | | | |\n", + "| |/\\| | | | | . ` | `--. \\ | | | |\n", + "\\ /\\ /_| |_| |\\ |/\\__/ / |_|_|_|\n", + " \\/ \\/ \\___/\\_| \\_/\\____/ (_|_|_)\n", + "\n", + "\n", + " __\n", + " / _|\n", + " | |_ ___ _ __ _ __ _____ __\n", + " | _/ _ \\| '__| | '_ \\ / _ \\ \\ /\\ / /\n", + " _ _ _| || (_) | | | | | | (_) \\ V V / _ _ _\n", + "(_|_|_)_| \\___/|_| |_| |_|\\___/ \\_/\\_/ (_|_|_)\n", + "\n", + " \n" + ] + } + ] + } + }, + "f12b87967424418d8f6fed872834a091": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ButtonStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "button_color": null, + "font_weight": "" + } + }, + "f698ce0c10b946beb5563253752f3f6b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ButtonModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ButtonModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ButtonView", + "button_style": "success", + "description": "Play!", + "disabled": false, + "icon": "check", + "layout": "IPY_MODEL_fcf2583dce504a9c960c73d0492264d5", + "style": "IPY_MODEL_f12b87967424418d8f6fed872834a091", + "tooltip": "" + } + }, + "fcf2583dce504a9c960c73d0492264d5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } } }, "nbformat": 4, diff --git a/support.py b/support.py new file mode 100644 index 0000000..f4385e3 --- /dev/null +++ b/support.py @@ -0,0 +1,411 @@ +import numpy as np +import mediapipe as mp +import cv2 +import requests +import ipywidgets as widgets +import io + +###################################### + + +def create_dropdown(actions): + menu = widgets.Dropdown(options=actions, description="Chose:") + output = widgets.Output(layout={"border": "1px solid black"}) + + button = widgets.Button(description="Play!", button_style="success", icon="check") + box = widgets.VBox([menu, button, output]) + return output, button, box, menu + + +######################################## + + +def download_dataset(): + url = "https://raw.githubusercontent.com/ntu-rris/google-mediapipe/main/data/gesture_train.csv" + resp = requests.get(url) + with open("./gesture_train.csv", "wb") as f: + f.write(resp.content) + + +download_dataset() +######################################## + +# Define default camera intrinsic +img_width = 640 +img_height = 480 +intrin_default = { + "fx": img_width + * 0.9, # Approx 0.7w < f < w https://www.learnopencv.com/approximate-focal-length-for-webcams-and-cell-phone-cameras/ + "fy": img_width * 0.9, + "cx": img_width * 0.5, # Approx center of image + "cy": img_height * 0.5, + "width": img_width, +} +######################################### + + +class GestureRecognition: + import numpy as np + import mediapipe as mp + import cv2 + + def __init__(self): + # 11 types of gesture 'name':class label + self.gesture = { + "fist": 0, + "one": 1, + "two": 2, + "three": 3, + "four": 4, + "five": 5, + "six": 6, + "rock": 7, + "spiderman": 8, + "yeah": 9, + "ok": 10, + } + + # Load training data + file = np.genfromtxt("./gesture_train.csv", delimiter=",") + # Extract input joint angles + angle = file[:, :-1].astype(np.float32) + # Extract output class label + label = file[:, -1].astype(np.float32) + # Use OpenCV KNN + self.knn = cv2.ml.KNearest_create() + self.knn.train(angle, cv2.ml.ROW_SAMPLE, label) + + def eval(self, angle): + # Use KNN for gesture recognition + data = np.asarray([angle], dtype=np.float32) + ret, results, neighbours, dist = self.knn.findNearest(data, 3) + idx = int(results[0][0]) # Index of class label + + return list(self.gesture)[idx] # Return name of class label + + +##################################### + + +class MediaPipeHand: + def __init__( + self, static_image_mode=True, max_num_hands=1, model_complexity=1, intrin=None + ): + self.max_num_hands = max_num_hands + if intrin is None: + self.intrin = intrin_default + else: + self.intrin = intrin + + # Access MediaPipe Solutions Python API + mp_hands = mp.solutions.hands + # help(mp_hands.Hands) + + # Initialize MediaPipe Hands + # static_image_mode: + # For video processing set to False: + # Will use previous frame to localize hand to reduce latency + # For unrelated images set to True: + # To allow hand detection to run on every input images + + # max_num_hands: + # Maximum number of hands to detect + + # model_complexity: + # Complexity of the hand landmark model: 0 or 1. + # Landmark accuracy as well as inference latency generally + # go up with the model complexity. Default to 1. + + # min_detection_confidence: + # Confidence value [0,1] from hand detection model + # for detection to be considered successful + + # min_tracking_confidence: + # Minimum confidence value [0,1] from landmark-tracking model + # for hand landmarks to be considered tracked successfully, + # or otherwise hand detection will be invoked automatically on the next input image. + # Setting it to a higher value can increase robustness of the solution, + # at the expense of a higher latency. + # Ignored if static_image_mode is true, where hand detection simply runs on every image. + + self.pipe = mp_hands.Hands( + static_image_mode=static_image_mode, + max_num_hands=max_num_hands, + model_complexity=model_complexity, + min_detection_confidence=0.5, + min_tracking_confidence=0.5, + ) + + # Define hand parameter + self.param = [] + for i in range(max_num_hands): + p = { + "keypt": np.zeros((21, 2)), # 2D keypt in image coordinate (pixel) + "joint": np.zeros((21, 3)), # 3D joint in camera coordinate (m) + "class": None, # Left / right / none hand + "score": 0, # Probability of predicted handedness (always>0.5, and opposite handedness=1-score) + "angle": np.zeros(15), # Flexion joint angles in degree + "gesture": None, # Type of hand gesture + "rvec": np.zeros( + 3 + ), # Global rotation vector Note: this term is only used for solvepnp initialization + "tvec": np.asarray( + [0, 0, 0.6] + ), # Global translation vector (m) Note: Init z direc to some +ve dist (i.e. in front of camera), to prevent solvepnp from wrongly estimating z as -ve + "fps": -1, # Frame per sec + # https://github.com/google/mediapipe/issues/1351 + # 'visible' : np.zeros(21), # Visibility: Likelihood [0,1] of being visible (present and not occluded) in the image + # 'presence': np.zeros(21), # Presence: Likelihood [0,1] of being present in the image or if its located outside the image + } + self.param.append(p) + + def result_to_param(self, result, img): + # Convert mediapipe result to my own param + img_height, img_width, _ = img.shape + + # Reset param + for p in self.param: + p["class"] = None + + if result.multi_hand_landmarks is not None: + # Loop through different hands + for i, res in enumerate(result.multi_handedness): + if i > self.max_num_hands - 1: + break # Note: Need to check if exceed max number of hand + self.param[i]["class"] = res.classification[0].label + self.param[i]["score"] = res.classification[0].score + + # Loop through different hands + for i, res in enumerate(result.multi_hand_landmarks): + if i > self.max_num_hands - 1: + break # Note: Need to check if exceed max number of hand + # Loop through 21 landmark for each hand + for j, lm in enumerate(res.landmark): + self.param[i]["keypt"][j, 0] = ( + lm.x * img_width + ) # Convert normalized coor to pixel [0,1] -> [0,width] + self.param[i]["keypt"][j, 1] = ( + lm.y * img_height + ) # Convert normalized coor to pixel [0,1] -> [0,height] + + # Ignore it https://github.com/google/mediapipe/issues/1320 + # self.param[i]['visible'][j] = lm.visibility + # self.param[i]['presence'][j] = lm.presence + + if result.multi_hand_world_landmarks is not None: + for i, res in enumerate(result.multi_hand_world_landmarks): + if i > self.max_num_hands - 1: + break # Note: Need to check if exceed max number of hand + # Loop through 21 landmark for each hand + for j, lm in enumerate(res.landmark): + self.param[i]["joint"][j, 0] = lm.x + self.param[i]["joint"][j, 1] = lm.y + self.param[i]["joint"][j, 2] = lm.z + + # Convert relative 3D joint to angle + self.param[i]["angle"] = self.convert_joint_to_angle( + self.param[i]["joint"] + ) + # Convert relative 3D joint to camera coordinate + self.convert_joint_to_camera_coor(self.param[i], self.intrin) + + return self.param + + def convert_joint_to_angle(self, joint): + # Get direction vector of bone from parent to child + v1 = joint[ + [0, 1, 2, 3, 0, 5, 6, 7, 0, 9, 10, 11, 0, 13, 14, 15, 0, 17, 18, 19], : + ] # Parent joint + v2 = joint[ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], : + ] # Child joint + v = v2 - v1 # [20,3] + # Normalize v + v = v / np.linalg.norm(v, axis=1)[:, np.newaxis] + + # Get angle using arcos of dot product + angle = np.arccos( + np.einsum( + "nt,nt->n", + v[[0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18], :], + v[[1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19], :], + ) + ) # [15,] + + return np.degrees(angle) # Convert radian to degree + + def convert_joint_to_camera_coor(self, param, intrin, use_solvepnp=True): + # MediaPipe version 0.8.9.1 onwards: + # Given real-world 3D joint centered at middle MCP joint -> J_origin + # To estimate the 3D joint in camera coordinate -> J_camera = J_origin + tvec, + # We need to find the unknown translation vector -> tvec = [tx,ty,tz] + # Such that when J_camera is projected to the 2D image plane + # It matches the 2D keypoint locations + + # Considering all 21 keypoints, + # Each keypoints will form 2 eq, in total we have 42 eq 3 unknowns + # Since the equations are linear wrt [tx,ty,tz] + # We can solve the unknowns using linear algebra A.x = b, where x = [tx,ty,tz] + + # Consider a single keypoint (pixel x) and joint (X,Y,Z) + # Using the perspective projection eq: + # (x - cx)/fx = (X + tx) / (Z + tz) + # Similarly for pixel y: + # (y - cy)/fy = (Y + ty) / (Z + tz) + # Rearranging the above linear equations by keeping constants to the right hand side: + # fx.tx - (x - cx).tz = -fx.X + (x - cx).Z + # fy.ty - (y - cy).tz = -fy.Y + (y - cy).Z + # Therefore, we can factor out the unknowns and form a matrix eq: + # [fx 0 (x - cx)][tx] [-fx.X + (x - cx).Z] + # [ 0 fy (y - cy)][ty] = [-fy.Y + (y - cy).Z] + # [tz] + + idx = [i for i in range(21)] # Use all landmarks + + if use_solvepnp: + # Method 1: OpenCV solvePnP + fx, fy = intrin["fx"], intrin["fy"] + cx, cy = intrin["cx"], intrin["cy"] + intrin_mat = np.asarray([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) + dist_coeff = np.zeros(4) + + ret, param["rvec"], param["tvec"] = cv2.solvePnP( + param["joint"][idx], + param["keypt"][idx], + intrin_mat, + dist_coeff, + param["rvec"], + param["tvec"], + useExtrinsicGuess=True, + ) + # Add tvec to all joints + param["joint"] += param["tvec"] + + else: + # Method 2: + A = np.zeros((len(idx), 2, 3)) + b = np.zeros((len(idx), 2)) + + A[:, 0, 0] = intrin["fx"] + A[:, 1, 1] = intrin["fy"] + A[:, 0, 2] = -(param["keypt"][idx, 0] - intrin["cx"]) + A[:, 1, 2] = -(param["keypt"][idx, 1] - intrin["cy"]) + + b[:, 0] = ( + -intrin["fx"] * param["joint"][idx, 0] + + (param["keypt"][idx, 0] - intrin["cx"]) * param["joint"][idx, 2] + ) + b[:, 1] = ( + -intrin["fy"] * param["joint"][idx, 1] + + (param["keypt"][idx, 1] - intrin["cy"]) * param["joint"][idx, 2] + ) + + A = A.reshape(-1, 3) # [8,3] + b = b.flatten() # [8] + + # Use the normal equation AT.A.x = AT.b to minimize the sum of the sq diff btw left and right sides + x = np.linalg.solve(A.T @ A, A.T @ b) + # Add tvec to all joints + param["joint"] += x + + def forward(self, img): + # Extract result + result = self.pipe.process(img) + + # Convert result to my own param + param = self.result_to_param(result, img) + + return param + + +########################################## + +try: + from google.colab.output import eval_js + + colab = True +except: + colab = False + +if colab: + from IPython.display import display, Javascript + from google.colab.output import eval_js + from base64 import b64decode + from PIL import Image as PIL_Image + + def take_photo(quality=0.8): + js = Javascript( + """ + async function takePhoto(quality) { + const div = document.createElement('div'); + const capture = document.createElement('button'); + capture.textContent = 'Capture'; + div.appendChild(capture); + + const video = document.createElement('video'); + video.style.display = 'block'; + const stream = await navigator.mediaDevices.getUserMedia({video: true}); + + document.body.appendChild(div); + div.appendChild(video); + video.srcObject = stream; + await video.play(); + + // Resize the output to fit the video element. + google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true); + + // Wait for Capture to be clicked. + await new Promise((resolve) => capture.onclick = resolve); + + const canvas = document.createElement('canvas'); + canvas.width = video.videoWidth; + canvas.height = video.videoHeight; + canvas.getContext('2d').drawImage(video, 0, 0); + stream.getVideoTracks()[0].stop(); + div.remove(); + return canvas.toDataURL('image/jpeg', quality); + } + """ + ) + display(js) + data = eval_js("takePhoto({})".format(quality)) + binary = b64decode(data.split(",")[1]) + + image = PIL_Image.open(io.BytesIO(binary)) + image_np = np.array(image) + return image_np +else: + + def take_photo(filename="photo.jpg", quality=0.8): + cam = cv2.VideoCapture(0) + + cv2.namedWindow("test") + + img_counter = 0 + + while True: + ret, frame = cam.read() + # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses) + if not ret: + print("failed to grab frame") + break + cv2.imshow("test", frame) + + k = cv2.waitKey(1) + if k % 256 == 27 or k % 256 == 32: + # ESC pressed + break + + cam.release() + + cv2.destroyAllWindows() + + # Preprocess image + img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + # Flip image for 3rd person view + img = cv2.flip(img, 1) + + # To improve performance, optionally mark image as not writeable to pass by reference + img.flags.writeable = False + + return img