From 32a34338a92d6a5b4bba0b8e8955a06459c988f2 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 29 Nov 2019 01:29:25 +0100 Subject: [PATCH] Update slides according to comments and suggestions --- slides/Manifest.toml | 385 ---------------------------------- slides/Project.toml | 6 - slides/README.md | 20 -- slides/figures.jl | 100 --------- slides/figures/car.svg | 17 -- slides/figures/car0.svg | 89 ++++++++ slides/figures/car1.svg | 89 ++++++++ slides/figures/car2.svg | 89 ++++++++ slides/figures/car3.svg | 89 ++++++++ slides/figures/car4.svg | 89 ++++++++ slides/figures/errors_ece.tex | 85 -------- slides/references.bib | 71 +------ slides/spotlight.pdf | Bin 131 -> 131 bytes slides/spotlight.tex | 110 ++++------ 14 files changed, 494 insertions(+), 745 deletions(-) delete mode 100644 slides/Manifest.toml delete mode 100644 slides/Project.toml delete mode 100644 slides/figures.jl delete mode 100644 slides/figures/car.svg create mode 100644 slides/figures/car0.svg create mode 100644 slides/figures/car1.svg create mode 100644 slides/figures/car2.svg create mode 100644 slides/figures/car3.svg create mode 100644 slides/figures/car4.svg delete mode 100644 slides/figures/errors_ece.tex diff --git a/slides/Manifest.toml b/slides/Manifest.toml deleted file mode 100644 index 36eabb5..0000000 --- a/slides/Manifest.toml +++ /dev/null @@ -1,385 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -[[ArgCheck]] -deps = ["Random"] -git-tree-sha1 = "dab25d711a1dedb707a55dbc1eb9fd578f76ff32" -uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197" -version = "1.0.1" - -[[Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[BinDeps]] -deps = ["Compat", "Libdl", "SHA", "URIParser"] -git-tree-sha1 = "12093ca6cdd0ee547c39b1870e0c9c3f154d9ca9" -uuid = "9e28174c-4ba2-5203-b857-d8d62c4213ee" -version = "0.8.10" - -[[BinaryProvider]] -deps = ["Libdl", "SHA"] -git-tree-sha1 = "29995a7b317bbd06be147e1974a3541ce2502dca" -uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232" -version = "0.5.7" - -[[CSTParser]] -deps = ["Tokenize"] -git-tree-sha1 = "c69698c3d4a7255bc1b4bc2afc09f59db910243b" -uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f" -version = "0.6.2" - -[[CSV]] -deps = ["CategoricalArrays", "DataFrames", "Dates", "FilePathsBase", "LazyArrays", "Mmap", "Parsers", "PooledArrays", "Profile", "Tables", "Unicode", "WeakRefStrings"] -git-tree-sha1 = "0c3201d16c65a1453be158189d48df7c357eb7fb" -uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" -version = "0.5.14" - -[[CalibrationErrors]] -deps = ["DataStructures", "Distances", "LinearAlgebra", "Parameters", "Statistics", "StatsBase"] -git-tree-sha1 = "282878076e89e97de73c591c5b9e24e291fc865b" -uuid = "33913031-fe46-5864-950f-100836f47845" -version = "0.1.0" - -[[CalibrationPaper]] -deps = ["CalibrationErrors", "Distances", "Parameters", "Query", "SpecialFunctions", "Statistics", "StatsFuns"] -path = ".." -uuid = "5e6d2ce1-a021-4f7d-b692-0c7af47336b0" -version = "0.1.0" - -[[CategoricalArrays]] -deps = ["Compat", "DataAPI", "Future", "JSON", "Missings", "Printf", "Reexport", "Unicode"] -git-tree-sha1 = "45101c4d0df3946acb6e9bfcfd3a8c32abbd421b" -uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597" -version = "0.7.1" - -[[Compat]] -deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "ed2c4abadf84c53d9e58510b5fc48912c2336fbb" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "2.2.0" - -[[Crayons]] -deps = ["Test"] -git-tree-sha1 = "f621b8ef51fd2004c7cf157ea47f027fdeac5523" -uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" -version = "4.0.0" - -[[DataAPI]] -git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252" -uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.1.0" - -[[DataFrames]] -deps = ["CategoricalArrays", "Compat", "DataAPI", "InvertedIndices", "IteratorInterfaceExtensions", "Missings", "PooledArrays", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] -git-tree-sha1 = "271528230c65a4517522e2968c3deed76b92b998" -uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "0.19.4" - -[[DataStructures]] -deps = ["InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "2103e504f427e54ffa19af9ada225733a21f951f" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.17.3" - -[[DataValueInterfaces]] -git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" -uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" -version = "1.0.0" - -[[DataValues]] -deps = ["DataValueInterfaces", "Dates"] -git-tree-sha1 = "b65fd071ca17aa265eb8c5ab0e522faa03a50d34" -uuid = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" -version = "0.4.12" - -[[Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[DefaultApplication]] -deps = ["Test"] -git-tree-sha1 = "a51d16b075dc52e22cde13b4a6e0ba4ba86649ee" -uuid = "3f0dd361-4fe0-5fc6-8523-80b14ec94d85" -version = "0.1.3" - -[[DelimitedFiles]] -deps = ["Mmap"] -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" - -[[Distances]] -deps = ["LinearAlgebra", "Statistics"] -git-tree-sha1 = "23717536c81b63e250f682b0e0933769eecd1411" -uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" -version = "0.8.2" - -[[Distributed]] -deps = ["Random", "Serialization", "Sockets"] -uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" - -[[DocStringExtensions]] -deps = ["LibGit2", "Markdown", "Pkg", "Test"] -git-tree-sha1 = "88bb0edb352b16608036faadcc071adda068582a" -uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.8.1" - -[[FilePathsBase]] -deps = ["Dates", "LinearAlgebra", "Printf", "Test", "UUIDs"] -git-tree-sha1 = "2ee1d999c462425e2f848524f732ed51bc1ab63a" -uuid = "48062228-2e41-5def-b9a4-89aafe57970f" -version = "0.6.2" - -[[FillArrays]] -deps = ["LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "de38b0253ade98340fabaf220f368f6144541938" -uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "0.7.4" - -[[Future]] -deps = ["Random"] -uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" - -[[InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[InvertedIndices]] -deps = ["Test"] -git-tree-sha1 = "15732c475062348b0165684ffe28e85ea8396afc" -uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" -version = "1.0.0" - -[[IterableTables]] -deps = ["DataValues", "IteratorInterfaceExtensions", "Requires", "TableTraits", "TableTraitsUtils", "Test"] -git-tree-sha1 = "18d6084924b2ac78deb65229cbcaa04d56ecb075" -uuid = "1c8ee90f-4401-5389-894e-7a04a3dc0f4d" -version = "0.11.0" - -[[IteratorInterfaceExtensions]] -git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" -uuid = "82899510-4779-5014-852e-03e436cf321d" -version = "1.0.0" - -[[JSON]] -deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e" -uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.0" - -[[LazyArrays]] -deps = ["FillArrays", "LinearAlgebra", "MacroTools", "StaticArrays"] -git-tree-sha1 = "01ec151cd0418fb05294b0230471765e0a65adb1" -uuid = "5078a376-72f3-5289-bfd5-ec5146d43c02" -version = "0.12.3" - -[[LibGit2]] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[LinearAlgebra]] -deps = ["Libdl"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[MacroTools]] -deps = ["CSTParser", "Compat", "DataStructures", "Test", "Tokenize"] -git-tree-sha1 = "d6e9dedb8c92c3465575442da456aec15a89ff76" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.1" - -[[Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[Missings]] -deps = ["DataAPI"] -git-tree-sha1 = "de0a5ce9e5289f27df672ffabef4d1e5861247d5" -uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "0.4.3" - -[[Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[OrderedCollections]] -deps = ["Random", "Serialization", "Test"] -git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.1.0" - -[[PGFPlotsX]] -deps = ["ArgCheck", "Crayons", "DataStructures", "Dates", "DefaultApplication", "DocStringExtensions", "MacroTools", "Missings", "Parameters", "Requires", "StatsBase", "Unicode"] -git-tree-sha1 = "169528d1574e4695db9b5e8304ddfeb4bb717a3f" -uuid = "8314cec4-20b6-5062-9cdb-752b83310925" -version = "1.1.0" - -[[Parameters]] -deps = ["OrderedCollections"] -git-tree-sha1 = "b62b2558efb1eef1fa44e4be5ff58a515c287e38" -uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" -version = "0.12.0" - -[[Parsers]] -deps = ["Dates", "Test"] -git-tree-sha1 = "ef0af6c8601db18c282d092ccbd2f01f3f0cd70b" -uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "0.3.7" - -[[Pkg]] -deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" - -[[PooledArrays]] -git-tree-sha1 = "6e8c38927cb6e9ae144f7277c753714861b27d14" -uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" -version = "0.5.2" - -[[Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[Profile]] -deps = ["Printf"] -uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" - -[[Query]] -deps = ["DataValues", "IterableTables", "MacroTools", "QueryOperators", "Statistics"] -git-tree-sha1 = "fd506b7101702daa4fb13cdffca771180d16c19c" -uuid = "1a8c2f83-1ff3-5112-b086-8aa67b057ba1" -version = "0.12.2" - -[[QueryOperators]] -deps = ["DataStructures", "DataValues", "IteratorInterfaceExtensions", "TableShowUtils"] -git-tree-sha1 = "7ffa41275edd6e0374a9ae8e3b048840867a06d9" -uuid = "2aef5ad7-51ca-5a8f-8e88-e75cf067b44b" -version = "0.9.1" - -[[REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[Random]] -deps = ["Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[Reexport]] -deps = ["Pkg"] -git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "0.2.0" - -[[Requires]] -deps = ["Test"] -git-tree-sha1 = "f6fbf4ba64d295e146e49e021207993b6b48c7d1" -uuid = "ae029012-a4dd-5104-9daa-d747884805df" -version = "0.5.2" - -[[Rmath]] -deps = ["BinaryProvider", "Libdl", "Random", "Statistics", "Test"] -git-tree-sha1 = "9a6c758cdf73036c3239b0afbea790def1dabff9" -uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" -version = "0.5.0" - -[[SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" - -[[Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[SharedArrays]] -deps = ["Distributed", "Mmap", "Random", "Serialization"] -uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" - -[[Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[SortingAlgorithms]] -deps = ["DataStructures", "Random", "Test"] -git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd" -uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" -version = "0.3.1" - -[[SparseArrays]] -deps = ["LinearAlgebra", "Random"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[SpecialFunctions]] -deps = ["BinDeps", "BinaryProvider", "Libdl", "Test"] -git-tree-sha1 = "0b45dc2e45ed77f445617b99ff2adf0f5b0f23ea" -uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "0.7.2" - -[[StaticArrays]] -deps = ["LinearAlgebra", "Random", "Statistics"] -git-tree-sha1 = "1085ffbf5fd48fdba64ef8e902ca429c4e1212d3" -uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "0.11.1" - -[[Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[[StatsBase]] -deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] -git-tree-sha1 = "c53e809e63fe5cf5de13632090bc3520649c9950" -uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -version = "0.32.0" - -[[StatsFuns]] -deps = ["Rmath", "SpecialFunctions", "Test"] -git-tree-sha1 = "b3a4e86aa13c732b8a8c0ba0c3d3264f55e6bb3e" -uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "0.8.0" - -[[TableShowUtils]] -deps = ["DataValues", "Dates", "JSON", "Markdown", "Test"] -git-tree-sha1 = "14c54e1e96431fb87f0d2f5983f090f1b9d06457" -uuid = "5e66a065-1f0a-5976-b372-e0b8c017ca10" -version = "0.2.5" - -[[TableTraits]] -deps = ["IteratorInterfaceExtensions"] -git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e" -uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" -version = "1.0.0" - -[[TableTraitsUtils]] -deps = ["DataValues", "IteratorInterfaceExtensions", "Missings", "TableTraits"] -git-tree-sha1 = "fbc53f586630fa2a72190f7a792ba4106fd0776a" -uuid = "382cd787-c1b6-5bf2-a167-d5b971a19bda" -version = "1.0.0" - -[[Tables]] -deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] -git-tree-sha1 = "aaed7b3b00248ff6a794375ad6adf30f30ca5591" -uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" -version = "0.2.11" - -[[Test]] -deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[Tokenize]] -git-tree-sha1 = "dfcdbbfb2d0370716c815cbd6f8a364efb6f42cf" -uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624" -version = "0.5.6" - -[[URIParser]] -deps = ["Test", "Unicode"] -git-tree-sha1 = "6ddf8244220dfda2f17539fa8c9de20d6c575b69" -uuid = "30578b45-9adc-5946-b283-645ec420af67" -version = "0.4.0" - -[[UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[WeakRefStrings]] -deps = ["Random", "Test"] -git-tree-sha1 = "9a0bb82eede528debe631b642eeb48a631a69bc2" -uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" -version = "0.6.1" diff --git a/slides/Project.toml b/slides/Project.toml deleted file mode 100644 index fc1050b..0000000 --- a/slides/Project.toml +++ /dev/null @@ -1,6 +0,0 @@ -[deps] -CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" -CalibrationPaper = "5e6d2ce1-a021-4f7d-b692-0c7af47336b0" -DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -PGFPlotsX = "8314cec4-20b6-5062-9cdb-752b83310925" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/slides/README.md b/slides/README.md index eef0253..2ab1c5e 100644 --- a/slides/README.md +++ b/slides/README.md @@ -4,26 +4,6 @@ This folder contains the source code of the slides summarizing the paper "Calibration tests in multi-class classification: A unifying framework" by Widmann, Lindsten, and Zachariah. -## Generate the figure - -Open a terminal in the current directory and install all required Julia packages -by running -```shell -julia --project=. -e "using Pkg; Pkg.instantiate()" -``` -Afterwards start a Julia REPL -```shell -julia --project=. -``` -and include the file `figures.jl` with -``` julia -julia> include("figures.jl") -``` -You can regenerate the figure by running -``` julia -julia> errors_ece() -``` - ## Compile the slides The slides can be compiled as a PDF file by running diff --git a/slides/figures.jl b/slides/figures.jl deleted file mode 100644 index fcb3e3a..0000000 --- a/slides/figures.jl +++ /dev/null @@ -1,100 +0,0 @@ -using CalibrationPaper -using CSV -using DataFrames -using PGFPlotsX -using StatsBase - -function errors_ece() - # initialize group plot - @pgf plt = GroupPlot( - { - group_style = - { - group_name = "group", - group_size = "3 by 1", - horizontal_sep = raw"0.02\textwidth", - vertical_sep = "0pt", - ylabels_at = "edge left", - yticklabels_at = "edge left" - }, - ylabel = raw"\# runs", - no_markers, - label_style = { font = raw"\small" }, - tick_label_style = { font = raw"\tiny" }, - grid = "major", - width = raw"0.19\textwidth", - height = raw"0.08\textwidth", - "every x tick scale label/.style" = { at = "{(1,0)}", anchor = "west" }, - "scale only axis", - ymin = 0, ymax = 3500, - legend_cell_align = "left", - legend_style = - { - fill = "none", - draw = "none", - font = raw"\small", - inner_sep = "0pt", - at = "({1.1, 1})", - anchor = "north west" } - }) - - # define displayed models - models = [CalibrationPaperModel(10, 0.1, 0.0, true), - CalibrationPaperModel(10, 0.1, 0.5, true), - CalibrationPaperModel(10, 0.1, 1.0, false)] - - # load experimental results - datadir = joinpath(@__DIR__, "..", "experiments", "data", "errors") - df = CSV.read(joinpath(datadir, "ECE_uniform.csv")) - - # for all studied experiments and models - for (i, model) in enumerate(models) - # load estimates - estimates = collect_estimates(df, model) - - # compute histogram - hist = fit(Histogram, estimates, closed = :left) - - # create axis object with histogram - @pgf ax = Axis(PlotInc( - { - ybar_interval, - fill = "Dark2-A!30!white", - forget_plot - }, - Table(hist))) - - # add mean of estimates - @pgf push!(ax, VLine({ solid, thick, "Dark2-B" }, mean(estimates))) - if i == 3 - push!(ax, - raw"\addlegendimage{solid, thick, Dark2-B, no markers};", - LegendEntry("mean estimate")) - end - - # compute true value - analytic = CalibrationPaper.analytic_ece(model) - - # plot true value - @pgf push!(ax, VLine({ dashed, thick, "Dark2-C" }, analytic)) - if i == 3 - push!(ax, raw"\addlegendimage{dashed, thick, Dark2-C, no markers};", - LegendEntry(raw"$\mathrm{ECE}$")) - end - - # hack so that limits are updated as well - @pgf push!(ax, PlotInc({ draw = "none" }, Coordinates([analytic], [0]))) - - # add axis to group plot - push!(plt, ax) - end - - # save histogram - figuresdir = joinpath(@__DIR__, "figures") - isdir(figuresdir) || mkdir(figuresdir) - picture = TikzPicture(plt, - raw"\node[anchor=north, font=\small] at ($(group c1r1.west |- group c1r1.outer south)!0.5!(group c3r1.east |- group c3r1.outer south)$){$\mathrm{ECE}$ estimate};") - pgfsave(joinpath(figuresdir, "errors_ece.tex"), picture; include_preamble = false) - - nothing -end diff --git a/slides/figures/car.svg b/slides/figures/car.svg deleted file mode 100644 index b8fe910..0000000 --- a/slides/figures/car.svg +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - image/svg+xml - - - - - - - - \ No newline at end of file diff --git a/slides/figures/car0.svg b/slides/figures/car0.svg new file mode 100644 index 0000000..54dc3b5 --- /dev/null +++ b/slides/figures/car0.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/slides/figures/car1.svg b/slides/figures/car1.svg new file mode 100644 index 0000000..3be8735 --- /dev/null +++ b/slides/figures/car1.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/slides/figures/car2.svg b/slides/figures/car2.svg new file mode 100644 index 0000000..e8ad59b --- /dev/null +++ b/slides/figures/car2.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/slides/figures/car3.svg b/slides/figures/car3.svg new file mode 100644 index 0000000..e7e6ae5 --- /dev/null +++ b/slides/figures/car3.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/slides/figures/car4.svg b/slides/figures/car4.svg new file mode 100644 index 0000000..f0c4308 --- /dev/null +++ b/slides/figures/car4.svg @@ -0,0 +1,89 @@ + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + diff --git a/slides/figures/errors_ece.tex b/slides/figures/errors_ece.tex deleted file mode 100644 index 9fdb407..0000000 --- a/slides/figures/errors_ece.tex +++ /dev/null @@ -1,85 +0,0 @@ -\begin{tikzpicture} -\begin{groupplot}[group style={group name={group}, group size={3 by 1}, horizontal sep={0.02\textwidth}, vertical sep={0pt}, ylabels at={edge left}, yticklabels at={edge left}}, ylabel={\# runs}, no markers, label style={font={\small}}, tick label style={font={\tiny}}, grid={major}, width={0.19\textwidth}, height={0.08\textwidth}, every x tick scale label/.style={at={{(1,0)}}, anchor={west}}, scale only axis, ymin={0}, ymax={3500}, legend cell align={left}, legend style={fill={none}, draw={none}, font={\small}, inner sep={0pt}, at={({1.1, 1})}, anchor={north west}}] - \nextgroupplot - \addplot+[ybar interval, fill={Dark2-A!30!white}, forget plot] - table[row sep={\\}] - { - \\ - 0.17 7.0 \\ - 0.18 55.0 \\ - 0.19 229.0 \\ - 0.2 796.0 \\ - 0.21 1782.0 \\ - 0.22 2550.0 \\ - 0.23 2307.0 \\ - 0.24 1389.0 \\ - 0.25 667.0 \\ - 0.26 171.0 \\ - 0.27 42.0 \\ - 0.28 5.0 \\ - 0.29 0.0 \\ - } - ; - \draw[solid, thick, Dark2-B] (0.22876019647666532,\pgfkeysvalueof{/pgfplots/ymin})--(0.22876019647666532,\pgfkeysvalueof{/pgfplots/ymax}); - \draw[dashed, thick, Dark2-C] (0.0,\pgfkeysvalueof{/pgfplots/ymin})--(0.0,\pgfkeysvalueof{/pgfplots/ymax}); - \addplot+[draw={none}] - coordinates { - (0.0,0) - } - ; - \nextgroupplot - \addplot+[ybar interval, fill={Dark2-A!30!white}, forget plot] - table[row sep={\\}] - { - \\ - 0.42 23.0 \\ - 0.44 207.0 \\ - 0.46 838.0 \\ - 0.48 1942.0 \\ - 0.5 2982.0 \\ - 0.52 2440.0 \\ - 0.54 1165.0 \\ - 0.56 345.0 \\ - 0.58 52.0 \\ - 0.6 6.0 \\ - 0.62 0.0 \\ - } - ; - \draw[solid, thick, Dark2-B] (0.5133844252714233,\pgfkeysvalueof{/pgfplots/ymin})--(0.5133844252714233,\pgfkeysvalueof{/pgfplots/ymax}); - \draw[dashed, thick, Dark2-C] (0.45,\pgfkeysvalueof{/pgfplots/ymin})--(0.45,\pgfkeysvalueof{/pgfplots/ymax}); - \addplot+[draw={none}] - coordinates { - (0.45,0) - } - ; - \nextgroupplot - \addplot+[ybar interval, fill={Dark2-A!30!white}, forget plot] - table[row sep={\\}] - { - \\ - 0.3 15.0 \\ - 0.32 118.0 \\ - 0.34 804.0 \\ - 0.36 2310.0 \\ - 0.38 3279.0 \\ - 0.4 2413.0 \\ - 0.42 863.0 \\ - 0.44 185.0 \\ - 0.46 13.0 \\ - 0.48 0.0 \\ - } - ; - \draw[solid, thick, Dark2-B] (0.3908207321571612,\pgfkeysvalueof{/pgfplots/ymin})--(0.3908207321571612,\pgfkeysvalueof{/pgfplots/ymax}); - \addlegendimage{solid, thick, Dark2-B, no markers}; - \addlegendentry {mean estimate} - \draw[dashed, thick, Dark2-C] (0.7106418012290426,\pgfkeysvalueof{/pgfplots/ymin})--(0.7106418012290426,\pgfkeysvalueof{/pgfplots/ymax}); - \addlegendimage{dashed, thick, Dark2-C, no markers}; - \addlegendentry {$\mathrm{ECE}$} - \addplot+[draw={none}] - coordinates { - (0.7106418012290426,0) - } - ; -\end{groupplot} -\node[anchor=north, font=\small] at ($(group c1r1.west |- group c1r1.outer south)!0.5!(group c3r1.east |- group c3r1.outer south)$){$\mathrm{ECE}$ estimate}; -\end{tikzpicture} diff --git a/slides/references.bib b/slides/references.bib index ed89727..e341080 100644 --- a/slides/references.bib +++ b/slides/references.bib @@ -1,80 +1,15 @@ -@InProceedings{guo17_calib_moder_neural_networ, - author = {Chuan Guo and Geoff Pleiss and Yu Sun and Kilian - Q. Weinberger}, - title = {On Calibration of Modern Neural Networks}, - booktitle = {Proceedings of the 34th International Conference on - Machine Learning}, - year = 2017, - volume = 70, - pages = {1321--1330}, - url = {http://proceedings.mlr.press/v70/guo17a.html}, - abstract = {Confidence calibration - the problem of predicting - probability estimates representative of the true - correctness likelihood - is important for - classification models in many applications. We - discover that modern neural networks, unlike those - from a decade ago, are poorly calibrated. Through - extensive experiments, we observe that depth, width, - weight decay, and Batch Normalization are important - factors influencing calibration. We evaluate the - performance of various post-processing calibration - methods on state-of-the-art architectures with image - and document classification datasets. Our analysis - and experiments not only offer insights into neural - network learning, but also provide a simple and - straightforward recipe for practical settings: on - most datasets, temperature scaling - a - single-parameter variant of Platt Scaling - is - surprisingly effective at calibrating predictions.}, - month = 08, - pdf = {http://proceedings.mlr.press/v70/guo17a/guo17a.pdf}, - series = {Proceedings of Machine Learning Research}, -} - -@InProceedings{vaicenavicius19_evaluat, +@article{vaicenavicius19_evaluat, author = {Vaicenavicius, Juozas and Widmann, David and Andersson, Carl and Lindsten, Fredrik and Roll, Jacob and Sch\"{o}n, Thomas B.}, title = {Evaluating model calibration in classification}, - booktitle = {Proceedings of Machine Learning Research}, + journal = {Proceedings of Machine Learning Research}, year = 2019, - volume = 89, - pages = {3459--3467}, - abstract = {Probabilistic classifiers output a probability - distribution on target classes rather than just a - class prediction. Besides providing a clear - separation of prediction and decision making, the - main advantage of probabilistic models is their - ability to represent uncertainty about - predictions. In safety-critical applications, it is - pivotal for a model to possess an adequate sense of - uncertainty, which for probabilistic classifiers - translates into outputting probability distributions - that are consistent with the empirical frequencies - observed from realized outcomes. A classifier with - such a property is called calibrated. In this work, - we develop a general theoretical calibration - evaluation framework grounded in probability theory, - and point out subtleties present in model - calibration evaluation that lead to refined - interpretations of existing evaluation - techniques. Lastly, we propose new ways to quantify - and visualize miscalibration in probabilistic - classification, including novel multidimensional - reliability diagrams.}, - month = 4, - series = {Proceedings of Machine Learning Research}, } @article{broecker07_increas_reliab_reliab_diagr, author = {Jochen Br{\"o}cker and Leonard A. Smith}, - title = {Increasing the Reliability of Reliability Diagrams}, + title = {Increasing the reliability of reliability diagrams}, journal = {Weather and Forecasting}, - volume = {22}, - number = {3}, - pages = {651-661}, year = {2007}, - doi = {10.1175/waf993.1}, - url = {https://doi.org/10.1175/waf993.1}, - DATE_ADDED = {Wed Oct 23 10:41:11 2019}, } diff --git a/slides/spotlight.pdf b/slides/spotlight.pdf index 0aba4f6d0c0ec6b8f377ef68d396170f9cd54f94..3c5079df0e0993acb3820af56ff6b9bd8ef41919 100644 GIT binary patch delta 84 zcmV~$yAgmO3;@uxWeP_i$4#AN%;^!r6oofnuzjHZ)b!Oe#_ delta 84 zcmV~$yAgmO3;@uxWeP_iUV4hGaz dx4Mu$sMl3Th^!1=szaemIF;vq^JG&oDnGJY7F7TM diff --git a/slides/spotlight.tex b/slides/spotlight.tex index fb864bb..80b1a47 100644 --- a/slides/spotlight.tex +++ b/slides/spotlight.tex @@ -4,7 +4,7 @@ % arara: lualatex: { shell: true } \PassOptionsToPackage{force}{filehook} % see https://tex.stackexchange.com/questions/513051/filehook-error-with-memoir-after-update-texlive-2019-in-oct-15 \documentclass[aspectratio=169]{beamer} -% \documentclass[handout]{beamer} % for handouts +% \documentclass[aspectratio=169,handout]{beamer} % for handouts % Plots \usepackage{pgfplots} @@ -90,7 +90,7 @@ {\end{tcolorbox}} % References -\usepackage[style=authoryear-icomp,doi=false,url=false,isbn=false]{biblatex} +\usepackage[style=authortitle-icomp,doi=false,url=false,isbn=false]{biblatex} \addbibresource{references.bib} \newenvironment{refitemize}% @@ -122,7 +122,7 @@ \node[right,inner sep=0pt,outer sep=0pt, right=2mm of UU] (LiU) {\includegraphics[height=0.75cm]{figures/logos/LiU.pdf}}; \end{tikzpicture}% } -\institute{$^\star$Department of Information Technology, Uppsala University\\$^\ddagger$Division of Statistics and Machine Learning, Linköping University} +\institute{$^\star$Department of Information Technology, Uppsala University, Sweden\\$^\ddagger$Division of Statistics and Machine Learning, Linköping University, Sweden} \begin{document} @@ -219,14 +219,12 @@ \begin{frame}{Calibrated model} \begin{tcbraster}[raster columns=2,raster equal height=rows] - \onslide<2->{% - \begin{uugreenbox}[raster multicolumn=2] - \begin{center} - A \hl{calibrated model} reports\\ - \hl{predictions consistent with empirically observed frequencies} of outcomes. - \end{center} - \end{uugreenbox} - }% + \begin{uugreenbox}[raster multicolumn=2] + \begin{center} + A \hl{calibrated model} reports\\ + \hl{predictions consistent with empirically observed frequencies} of outcomes. + \end{center} + \end{uugreenbox} \begin{uuyellowbox}[enhanced, title={Prediction}, valign=center, remember as=A] \begin{center} \begin{tabular}{@{}cc@{}} \toprule @@ -246,20 +244,21 @@ \end{uubluebox} \end{tcbraster} - \onslide<2->{% - \begin{tikzpicture}[remember picture, overlay] - \path (A) -- node [font=\boldmath\Huge, color=uured, align=center, midway] {$\stackrel{?}{=}$} (B); - \end{tikzpicture} - }% + \begin{tikzpicture}[remember picture, overlay] + \path (A) -- node [font=\boldmath\Huge, color=uured, align=center, midway] {$\stackrel{?}{=}$} (B); + \end{tikzpicture} \end{frame} -\begin{frame}{Multi-class classification} +\begin{frame}{Multi-class classification: all scores matter!} \begin{tcbraster}[raster columns=1] \begin{tcolorbox}[blankest] \begin{center} \begin{tikzpicture} - \node[draw, inner sep=2mm] (image) at (0, 0) - {\includesvg[height=\dimexpr0.11\textwidth-4mm\relax]{car}}; + \node[minimum height=0.11\textwidth, inner sep=2mm] (image) at (0, 0) + {\begin{tabular}{@{}ccc@{}} + \includesvg[height=3mm]{car0} & \includesvg[height=3mm]{car1} & \includesvg[height=3mm]{car2} \\ + \includesvg[height=3mm]{car3} & \includesvg[height=3mm]{car4} & $\cdots$ \\ + \end{tabular}}; \onslide<2->{% \node[draw, fill=gronskasvag, right=1cm of image, inner sep=2mm] (model) @@ -279,72 +278,54 @@ \end{center} \end{tcolorbox} \onslide<4->{% - \begin{uuyellowbox}[enhanced, title={Reduction to binary classification}, fontupper=\footnotesize, fontlower=\footnotesize, sidebyside, lower separated=false, remember as=A] + \begin{uubluebox} + \begin{center} + Common calibration evaluation techniques consider only the + most-confident score + \end{center} + \end{uubluebox} + }% + \onslide<5->{% + \begin{uuredbox}[enhanced, fontlower=\footnotesize, sidebyside, lower separated=false, righthand width=0.3\textwidth] + Common approaches do not distinguish between the two predictions + even though the control actions based on these might be very + different! + + \tcblower + \begin{center} \begin{tabular}{@{}ccc@{}} \toprule \texttt{object} & \texttt{human} & \texttt{animal} \\ \midrule \hl{80\%} & 0\% & 20\% \\ - 10\% & \hl{80\%} & 10\% \\ - $\vdots$ & $\vdots$ & $\vdots$ \\ \bottomrule + \hl{80\%} & 20\% & 0\% \\ \bottomrule \end{tabular} \end{center} - \onslide<5->{% - \tcblower - \begin{center} - \begin{tabular}{@{}ccc@{}} \toprule - \texttt{object} & \texttt{human} & \texttt{animal} \\ \midrule - \hl{80\%} & 0\% & 20\% \\ - \hl{80\%} & 10\% & 10\% \\ - $\vdots$ & $\vdots$ & $\vdots$ \\ \bottomrule - \end{tabular} - \end{center} - }% - \end{uuyellowbox} + \end{uuredbox} }% \begin{tcolorbox}[blankest] \begin{refitemize} \refitem{vaicenavicius19_evaluat} - \onslide<4->{\refitem{guo17_calib_moder_neural_networ}} \end{refitemize} \end{tcolorbox} \end{tcbraster} - - \onslide<6->{% - \begin{tikzpicture}[remember picture, overlay] - \node at (A) {% - \begin{uuredbox}[width=0.5\pagewidth, nobeforeafter] - \begin{center} - Often \hl{partial calibration} is \hl{not enough}! - \end{center} - \end{uuredbox}% - }; - \end{tikzpicture} - }% \end{frame} \begin{frame}{Our contribution: Calibration errors in multi-class classification} \begin{tcbraster}[raster columns=1, raster rows=3] - \begin{uuyellowbox}[title={Unified framework}, left=0pt] + \begin{uuyellowbox}[title={Unifying framework of calibration errors}, left=0pt] \begin{itemize} - \item Encompasses existing measures such as the + \item Based on the full predictions with all scores + \item<2-> Encompasses existing measures such as the expected calibration error ($\mathrm{ECE}$) - \item<2-> Enables derivation of a \hl{kernel calibration error ($\mathrm{KCE}$)} + \item<3-> Enables derivation of a \hl{kernel calibration error ($\mathrm{KCE}$)} \end{itemize} \end{uuyellowbox} - - \onslide<3->{% - \begin{tcolorbox}[blank] - The standard $\mathrm{ECE}$ estimator is usually biased and inconsistent: - \begin{center} - \input{figures/errors_ece.tex} - \end{center} - \end{tcolorbox} - }% \onslide<4->{% - \begin{uugreenbox} - \begin{center} - The $\mathrm{KCE}$ yields \hl{unbiased} and \hl{consistent} estimators - \end{center} + \begin{uugreenbox}[left=0pt, title={Estimating calibration errors}] + \begin{itemize} + \item The standard $\mathrm{ECE}$ estimator is usually biased and inconsistent + \item<5-> The $\mathrm{KCE}$ yields \hl{unbiased} and \hl{consistent} estimators + \end{itemize} \end{uugreenbox} }% \end{tcbraster} @@ -421,7 +402,8 @@ \onslide<7->{% \begin{uugreenbox}[left=0pt] \begin{itemize} - \item Existing approach for estimating the p-value for the $\mathrm{ECE}$ seems unreliable + \item Existing $\mathrm{ECE}$-based approach seems prone to + underestimating the p-value \item<8-> \hl{Well-founded bounds and approximations} of the p-value for the $\mathrm{KCE}$ \end{itemize} \end{uugreenbox}