|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "markdown", |
5 | | - "id": "2a5baace", |
| 5 | + "id": "e06275b8", |
6 | 6 | "metadata": {}, |
7 | 7 | "source": [ |
8 | 8 | "# How to Build a Video Deduplication System\n", |
|
27 | 27 | }, |
28 | 28 | { |
29 | 29 | "cell_type": "markdown", |
30 | | - "id": "69dfd6cf", |
| 30 | + "id": "9d953a08", |
31 | 31 | "metadata": {}, |
32 | 32 | "source": [ |
33 | 33 | "## Preparation\n", |
|
48 | 48 | { |
49 | 49 | "cell_type": "code", |
50 | 50 | "execution_count": 1, |
51 | | - "id": "0042fbfa", |
| 51 | + "id": "469282e9", |
52 | 52 | "metadata": {}, |
53 | 53 | "outputs": [], |
54 | 54 | "source": [ |
|
57 | 57 | }, |
58 | 58 | { |
59 | 59 | "cell_type": "markdown", |
60 | | - "id": "9738ca63", |
| 60 | + "id": "e4f5a71c", |
61 | 61 | "metadata": {}, |
62 | 62 | "source": [ |
63 | 63 | "### Prepare the data\n", |
|
75 | 75 | { |
76 | 76 | "cell_type": "code", |
77 | 77 | "execution_count": 2, |
78 | | - "id": "5023c1df", |
| 78 | + "id": "22543226", |
79 | 79 | "metadata": {}, |
80 | 80 | "outputs": [], |
81 | 81 | "source": [ |
|
86 | 86 | { |
87 | 87 | "cell_type": "code", |
88 | 88 | "execution_count": 3, |
89 | | - "id": "72f01cdf", |
| 89 | + "id": "091912bc", |
90 | 90 | "metadata": { |
91 | 91 | "scrolled": true |
92 | 92 | }, |
|
285 | 285 | }, |
286 | 286 | { |
287 | 287 | "cell_type": "markdown", |
288 | | - "id": "4d042c4c", |
| 288 | + "id": "ef4670d6", |
289 | 289 | "metadata": {}, |
290 | 290 | "source": [ |
291 | 291 | "Define some helper function to convert video to gif so that we can have a look at these videos. " |
|
294 | 294 | { |
295 | 295 | "cell_type": "code", |
296 | 296 | "execution_count": 4, |
297 | | - "id": "99abfd40", |
| 297 | + "id": "3ea961d6", |
298 | 298 | "metadata": {}, |
299 | 299 | "outputs": [], |
300 | 300 | "source": [ |
|
334 | 334 | }, |
335 | 335 | { |
336 | 336 | "cell_type": "markdown", |
337 | | - "id": "619cae93", |
| 337 | + "id": "97440442", |
338 | 338 | "metadata": {}, |
339 | 339 | "source": [ |
340 | 340 | "Positive denotes a video that is contain same content event in anchor video, while negative denotes an inconsistent." |
|
343 | 343 | { |
344 | 344 | "cell_type": "code", |
345 | 345 | "execution_count": 5, |
346 | | - "id": "4770d11c", |
| 346 | + "id": "88519061", |
347 | 347 | "metadata": { |
348 | 348 | "scrolled": true |
349 | 349 | }, |
|
389 | 389 | }, |
390 | 390 | { |
391 | 391 | "cell_type": "markdown", |
392 | | - "id": "4999c2ed", |
| 392 | + "id": "5c707302", |
393 | 393 | "metadata": {}, |
394 | 394 | "source": [ |
395 | 395 | "### Create a Milvus Collection\n", |
|
400 | 400 | { |
401 | 401 | "cell_type": "code", |
402 | 402 | "execution_count": 6, |
403 | | - "id": "208ab462", |
| 403 | + "id": "df83801e", |
404 | 404 | "metadata": {}, |
405 | 405 | "outputs": [], |
406 | 406 | "source": [ |
|
432 | 432 | { |
433 | 433 | "cell_type": "code", |
434 | 434 | "execution_count": 7, |
435 | | - "id": "e9c9e6d8", |
| 435 | + "id": "ecd908ad", |
436 | 436 | "metadata": {}, |
437 | 437 | "outputs": [], |
438 | 438 | "source": [ |
|
441 | 441 | }, |
442 | 442 | { |
443 | 443 | "cell_type": "markdown", |
444 | | - "id": "6d4281ad", |
| 444 | + "id": "7acaf86b", |
445 | 445 | "metadata": {}, |
446 | 446 | "source": [ |
447 | 447 | "## Video Copy Detection\n", |
|
453 | 453 | }, |
454 | 454 | { |
455 | 455 | "cell_type": "markdown", |
456 | | - "id": "5eac8eac", |
| 456 | + "id": "5fdcd9b8", |
457 | 457 | "metadata": {}, |
458 | 458 | "source": [ |
459 | 459 | "### Load Video Embeddings into Milvus\n", |
|
464 | 464 | { |
465 | 465 | "cell_type": "code", |
466 | 466 | "execution_count": 8, |
467 | | - "id": "9f36c5df", |
| 467 | + "id": "1dc07d2d", |
468 | 468 | "metadata": {}, |
469 | 469 | "outputs": [ |
470 | 470 | { |
471 | 471 | "name": "stdout", |
472 | 472 | "output_type": "stream", |
473 | 473 | "text": [ |
474 | | - "CPU times: user 26min 27s, sys: 15.9 s, total: 26min 42s\n", |
475 | | - "Wall time: 1min 56s\n" |
| 474 | + "CPU times: user 26min 10s, sys: 14.1 s, total: 26min 24s\n", |
| 475 | + "Wall time: 1min 55s\n" |
476 | 476 | ] |
477 | 477 | } |
478 | 478 | ], |
|
496 | 496 | }, |
497 | 497 | { |
498 | 498 | "cell_type": "markdown", |
499 | | - "id": "e6c9fe9a", |
| 499 | + "id": "f96e5c4b", |
500 | 500 | "metadata": {}, |
501 | 501 | "source": [ |
502 | 502 | "Here is detailed explanation for each line of the code:\n", |
|
515 | 515 | { |
516 | 516 | "cell_type": "code", |
517 | 517 | "execution_count": 9, |
518 | | - "id": "5102998a", |
| 518 | + "id": "2738a2b3", |
519 | 519 | "metadata": { |
520 | 520 | "scrolled": true |
521 | 521 | }, |
|
540 | 540 | { |
541 | 541 | "cell_type": "code", |
542 | 542 | "execution_count": 10, |
543 | | - "id": "441801e0", |
| 543 | + "id": "97e3f3f7", |
544 | 544 | "metadata": {}, |
545 | 545 | "outputs": [ |
546 | 546 | { |
|
557 | 557 | }, |
558 | 558 | { |
559 | 559 | "cell_type": "markdown", |
560 | | - "id": "d1697e68", |
| 560 | + "id": "4af2a4e3", |
561 | 561 | "metadata": {}, |
562 | 562 | "source": [ |
563 | 563 | "## Evaluation\n", |
|
571 | 571 | { |
572 | 572 | "cell_type": "code", |
573 | 573 | "execution_count": 11, |
574 | | - "id": "b36f69ca", |
| 574 | + "id": "81b8b5ed", |
575 | 575 | "metadata": {}, |
576 | 576 | "outputs": [ |
577 | 577 | { |
578 | 578 | "name": "stdout", |
579 | 579 | "output_type": "stream", |
580 | 580 | "text": [ |
581 | | - "CPU times: user 5min 20s, sys: 2.28 s, total: 5min 23s\n", |
582 | | - "Wall time: 21.9 s\n" |
| 581 | + "CPU times: user 4min 16s, sys: 3.15 s, total: 4min 19s\n", |
| 582 | + "Wall time: 20.9 s\n" |
583 | 583 | ] |
584 | 584 | } |
585 | 585 | ], |
|
600 | 600 | { |
601 | 601 | "cell_type": "code", |
602 | 602 | "execution_count": 12, |
603 | | - "id": "eb79042a", |
| 603 | + "id": "249ab799", |
604 | 604 | "metadata": {}, |
605 | 605 | "outputs": [ |
606 | 606 | { |
|
623 | 623 | { |
624 | 624 | "cell_type": "code", |
625 | 625 | "execution_count": 13, |
626 | | - "id": "b68adbdf", |
| 626 | + "id": "a026d731", |
627 | 627 | "metadata": {}, |
628 | 628 | "outputs": [ |
629 | 629 | { |
|
652 | 652 | " </thead>\n", |
653 | 653 | " <tbody>\n", |
654 | 654 | " <tr>\n", |
655 | | - " <th>recall_at_k</th>\n", |
| 655 | + " <th>map_at_k</th>\n", |
656 | 656 | " <td>0.973977</td>\n", |
657 | 657 | " </tr>\n", |
658 | 658 | " </tbody>\n", |
659 | 659 | "</table>\n", |
660 | 660 | "</div>" |
661 | 661 | ], |
662 | 662 | "text/plain": [ |
663 | | - " mean_average_precision\n", |
664 | | - "recall_at_k 0.973977" |
| 663 | + " mean_average_precision\n", |
| 664 | + "map_at_k 0.973977" |
665 | 665 | ] |
666 | 666 | }, |
667 | 667 | "metadata": {}, |
|
671 | 671 | "source": [ |
672 | 672 | "benchmark = (\n", |
673 | 673 | " dc.with_metrics(['mean_average_precision',]) \\\n", |
674 | | - " .evaluate['ground_truth_event', 'topk_events'](name='recall_at_k') \\\n", |
| 674 | + " .evaluate['ground_truth_event', 'topk_events'](name='map_at_k') \\\n", |
675 | 675 | " .report()\n", |
676 | 676 | ")" |
677 | 677 | ] |
678 | 678 | }, |
679 | 679 | { |
680 | 680 | "cell_type": "markdown", |
681 | | - "id": "51de65b0", |
| 681 | + "id": "a2003b8d", |
682 | 682 | "metadata": {}, |
683 | 683 | "source": [ |
684 | 684 | "We found that we achieved an excellent topk metric on this easy small dataset, which means that if we limit each event to have k duplicate videos, then they can all be almost recalled and they are almost true positive ." |
685 | 685 | ] |
686 | 686 | }, |
687 | 687 | { |
688 | 688 | "cell_type": "markdown", |
689 | | - "id": "6373bddd", |
| 689 | + "id": "c1b44a40", |
690 | 690 | "metadata": {}, |
691 | 691 | "source": [ |
692 | 692 | "## Show query results\n", |
|
697 | 697 | { |
698 | 698 | "cell_type": "code", |
699 | 699 | "execution_count": 14, |
700 | | - "id": "8c96f4b1", |
| 700 | + "id": "6d461033", |
701 | 701 | "metadata": {}, |
702 | 702 | "outputs": [ |
703 | 703 | { |
|
735 | 735 | { |
736 | 736 | "cell_type": "code", |
737 | 737 | "execution_count": 15, |
738 | | - "id": "7cb4c0d7", |
| 738 | + "id": "486b42fd", |
739 | 739 | "metadata": {}, |
740 | 740 | "outputs": [ |
741 | 741 | { |
|
759 | 759 | { |
760 | 760 | "cell_type": "code", |
761 | 761 | "execution_count": 16, |
762 | | - "id": "9ca15405", |
| 762 | + "id": "b971fb56", |
763 | 763 | "metadata": { |
764 | 764 | "scrolled": true |
765 | 765 | }, |
|
785 | 785 | { |
786 | 786 | "cell_type": "code", |
787 | 787 | "execution_count": 17, |
788 | | - "id": "439afc5d", |
| 788 | + "id": "56e0526b", |
789 | 789 | "metadata": {}, |
790 | 790 | "outputs": [ |
791 | 791 | { |
|
809 | 809 | { |
810 | 810 | "cell_type": "code", |
811 | 811 | "execution_count": 18, |
812 | | - "id": "9aa5f6a1", |
| 812 | + "id": "549fb90f", |
813 | 813 | "metadata": {}, |
814 | 814 | "outputs": [], |
815 | 815 | "source": [ |
|
820 | 820 | { |
821 | 821 | "cell_type": "code", |
822 | 822 | "execution_count": null, |
823 | | - "id": "433957bf", |
| 823 | + "id": "d406323c", |
824 | 824 | "metadata": {}, |
825 | 825 | "outputs": [], |
826 | 826 | "source": [] |
|
0 commit comments