diff --git a/slides/Manifest.toml b/slides/Manifest.toml
deleted file mode 100644
index 36eabb5..0000000
--- a/slides/Manifest.toml
+++ /dev/null
@@ -1,385 +0,0 @@
-# This file is machine-generated - editing it directly is not advised
-
-[[ArgCheck]]
-deps = ["Random"]
-git-tree-sha1 = "dab25d711a1dedb707a55dbc1eb9fd578f76ff32"
-uuid = "dce04be8-c92d-5529-be00-80e4d2c0e197"
-version = "1.0.1"
-
-[[Base64]]
-uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
-
-[[BinDeps]]
-deps = ["Compat", "Libdl", "SHA", "URIParser"]
-git-tree-sha1 = "12093ca6cdd0ee547c39b1870e0c9c3f154d9ca9"
-uuid = "9e28174c-4ba2-5203-b857-d8d62c4213ee"
-version = "0.8.10"
-
-[[BinaryProvider]]
-deps = ["Libdl", "SHA"]
-git-tree-sha1 = "29995a7b317bbd06be147e1974a3541ce2502dca"
-uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
-version = "0.5.7"
-
-[[CSTParser]]
-deps = ["Tokenize"]
-git-tree-sha1 = "c69698c3d4a7255bc1b4bc2afc09f59db910243b"
-uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f"
-version = "0.6.2"
-
-[[CSV]]
-deps = ["CategoricalArrays", "DataFrames", "Dates", "FilePathsBase", "LazyArrays", "Mmap", "Parsers", "PooledArrays", "Profile", "Tables", "Unicode", "WeakRefStrings"]
-git-tree-sha1 = "0c3201d16c65a1453be158189d48df7c357eb7fb"
-uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
-version = "0.5.14"
-
-[[CalibrationErrors]]
-deps = ["DataStructures", "Distances", "LinearAlgebra", "Parameters", "Statistics", "StatsBase"]
-git-tree-sha1 = "282878076e89e97de73c591c5b9e24e291fc865b"
-uuid = "33913031-fe46-5864-950f-100836f47845"
-version = "0.1.0"
-
-[[CalibrationPaper]]
-deps = ["CalibrationErrors", "Distances", "Parameters", "Query", "SpecialFunctions", "Statistics", "StatsFuns"]
-path = ".."
-uuid = "5e6d2ce1-a021-4f7d-b692-0c7af47336b0"
-version = "0.1.0"
-
-[[CategoricalArrays]]
-deps = ["Compat", "DataAPI", "Future", "JSON", "Missings", "Printf", "Reexport", "Unicode"]
-git-tree-sha1 = "45101c4d0df3946acb6e9bfcfd3a8c32abbd421b"
-uuid = "324d7699-5711-5eae-9e2f-1d82baa6b597"
-version = "0.7.1"
-
-[[Compat]]
-deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
-git-tree-sha1 = "ed2c4abadf84c53d9e58510b5fc48912c2336fbb"
-uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
-version = "2.2.0"
-
-[[Crayons]]
-deps = ["Test"]
-git-tree-sha1 = "f621b8ef51fd2004c7cf157ea47f027fdeac5523"
-uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f"
-version = "4.0.0"
-
-[[DataAPI]]
-git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252"
-uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
-version = "1.1.0"
-
-[[DataFrames]]
-deps = ["CategoricalArrays", "Compat", "DataAPI", "InvertedIndices", "IteratorInterfaceExtensions", "Missings", "PooledArrays", "Printf", "REPL", "Reexport", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"]
-git-tree-sha1 = "271528230c65a4517522e2968c3deed76b92b998"
-uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-version = "0.19.4"
-
-[[DataStructures]]
-deps = ["InteractiveUtils", "OrderedCollections"]
-git-tree-sha1 = "2103e504f427e54ffa19af9ada225733a21f951f"
-uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
-version = "0.17.3"
-
-[[DataValueInterfaces]]
-git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6"
-uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464"
-version = "1.0.0"
-
-[[DataValues]]
-deps = ["DataValueInterfaces", "Dates"]
-git-tree-sha1 = "b65fd071ca17aa265eb8c5ab0e522faa03a50d34"
-uuid = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5"
-version = "0.4.12"
-
-[[Dates]]
-deps = ["Printf"]
-uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
-
-[[DefaultApplication]]
-deps = ["Test"]
-git-tree-sha1 = "a51d16b075dc52e22cde13b4a6e0ba4ba86649ee"
-uuid = "3f0dd361-4fe0-5fc6-8523-80b14ec94d85"
-version = "0.1.3"
-
-[[DelimitedFiles]]
-deps = ["Mmap"]
-uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
-
-[[Distances]]
-deps = ["LinearAlgebra", "Statistics"]
-git-tree-sha1 = "23717536c81b63e250f682b0e0933769eecd1411"
-uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
-version = "0.8.2"
-
-[[Distributed]]
-deps = ["Random", "Serialization", "Sockets"]
-uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
-
-[[DocStringExtensions]]
-deps = ["LibGit2", "Markdown", "Pkg", "Test"]
-git-tree-sha1 = "88bb0edb352b16608036faadcc071adda068582a"
-uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.8.1"
-
-[[FilePathsBase]]
-deps = ["Dates", "LinearAlgebra", "Printf", "Test", "UUIDs"]
-git-tree-sha1 = "2ee1d999c462425e2f848524f732ed51bc1ab63a"
-uuid = "48062228-2e41-5def-b9a4-89aafe57970f"
-version = "0.6.2"
-
-[[FillArrays]]
-deps = ["LinearAlgebra", "Random", "SparseArrays"]
-git-tree-sha1 = "de38b0253ade98340fabaf220f368f6144541938"
-uuid = "1a297f60-69ca-5386-bcde-b61e274b549b"
-version = "0.7.4"
-
-[[Future]]
-deps = ["Random"]
-uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
-
-[[InteractiveUtils]]
-deps = ["Markdown"]
-uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-
-[[InvertedIndices]]
-deps = ["Test"]
-git-tree-sha1 = "15732c475062348b0165684ffe28e85ea8396afc"
-uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f"
-version = "1.0.0"
-
-[[IterableTables]]
-deps = ["DataValues", "IteratorInterfaceExtensions", "Requires", "TableTraits", "TableTraitsUtils", "Test"]
-git-tree-sha1 = "18d6084924b2ac78deb65229cbcaa04d56ecb075"
-uuid = "1c8ee90f-4401-5389-894e-7a04a3dc0f4d"
-version = "0.11.0"
-
-[[IteratorInterfaceExtensions]]
-git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856"
-uuid = "82899510-4779-5014-852e-03e436cf321d"
-version = "1.0.0"
-
-[[JSON]]
-deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e"
-uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.0"
-
-[[LazyArrays]]
-deps = ["FillArrays", "LinearAlgebra", "MacroTools", "StaticArrays"]
-git-tree-sha1 = "01ec151cd0418fb05294b0230471765e0a65adb1"
-uuid = "5078a376-72f3-5289-bfd5-ec5146d43c02"
-version = "0.12.3"
-
-[[LibGit2]]
-uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
-
-[[Libdl]]
-uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-
-[[LinearAlgebra]]
-deps = ["Libdl"]
-uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-
-[[Logging]]
-uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
-
-[[MacroTools]]
-deps = ["CSTParser", "Compat", "DataStructures", "Test", "Tokenize"]
-git-tree-sha1 = "d6e9dedb8c92c3465575442da456aec15a89ff76"
-uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
-version = "0.5.1"
-
-[[Markdown]]
-deps = ["Base64"]
-uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
-
-[[Missings]]
-deps = ["DataAPI"]
-git-tree-sha1 = "de0a5ce9e5289f27df672ffabef4d1e5861247d5"
-uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
-version = "0.4.3"
-
-[[Mmap]]
-uuid = "a63ad114-7e13-5084-954f-fe012c677804"
-
-[[OrderedCollections]]
-deps = ["Random", "Serialization", "Test"]
-git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
-uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
-version = "1.1.0"
-
-[[PGFPlotsX]]
-deps = ["ArgCheck", "Crayons", "DataStructures", "Dates", "DefaultApplication", "DocStringExtensions", "MacroTools", "Missings", "Parameters", "Requires", "StatsBase", "Unicode"]
-git-tree-sha1 = "169528d1574e4695db9b5e8304ddfeb4bb717a3f"
-uuid = "8314cec4-20b6-5062-9cdb-752b83310925"
-version = "1.1.0"
-
-[[Parameters]]
-deps = ["OrderedCollections"]
-git-tree-sha1 = "b62b2558efb1eef1fa44e4be5ff58a515c287e38"
-uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
-version = "0.12.0"
-
-[[Parsers]]
-deps = ["Dates", "Test"]
-git-tree-sha1 = "ef0af6c8601db18c282d092ccbd2f01f3f0cd70b"
-uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "0.3.7"
-
-[[Pkg]]
-deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
-uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-
-[[PooledArrays]]
-git-tree-sha1 = "6e8c38927cb6e9ae144f7277c753714861b27d14"
-uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720"
-version = "0.5.2"
-
-[[Printf]]
-deps = ["Unicode"]
-uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-
-[[Profile]]
-deps = ["Printf"]
-uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
-
-[[Query]]
-deps = ["DataValues", "IterableTables", "MacroTools", "QueryOperators", "Statistics"]
-git-tree-sha1 = "fd506b7101702daa4fb13cdffca771180d16c19c"
-uuid = "1a8c2f83-1ff3-5112-b086-8aa67b057ba1"
-version = "0.12.2"
-
-[[QueryOperators]]
-deps = ["DataStructures", "DataValues", "IteratorInterfaceExtensions", "TableShowUtils"]
-git-tree-sha1 = "7ffa41275edd6e0374a9ae8e3b048840867a06d9"
-uuid = "2aef5ad7-51ca-5a8f-8e88-e75cf067b44b"
-version = "0.9.1"
-
-[[REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets"]
-uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
-
-[[Random]]
-deps = ["Serialization"]
-uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[[Reexport]]
-deps = ["Pkg"]
-git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0"
-uuid = "189a3867-3050-52da-a836-e630ba90ab69"
-version = "0.2.0"
-
-[[Requires]]
-deps = ["Test"]
-git-tree-sha1 = "f6fbf4ba64d295e146e49e021207993b6b48c7d1"
-uuid = "ae029012-a4dd-5104-9daa-d747884805df"
-version = "0.5.2"
-
-[[Rmath]]
-deps = ["BinaryProvider", "Libdl", "Random", "Statistics", "Test"]
-git-tree-sha1 = "9a6c758cdf73036c3239b0afbea790def1dabff9"
-uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa"
-version = "0.5.0"
-
-[[SHA]]
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-
-[[Serialization]]
-uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-
-[[SharedArrays]]
-deps = ["Distributed", "Mmap", "Random", "Serialization"]
-uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383"
-
-[[Sockets]]
-uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
-
-[[SortingAlgorithms]]
-deps = ["DataStructures", "Random", "Test"]
-git-tree-sha1 = "03f5898c9959f8115e30bc7226ada7d0df554ddd"
-uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c"
-version = "0.3.1"
-
-[[SparseArrays]]
-deps = ["LinearAlgebra", "Random"]
-uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-
-[[SpecialFunctions]]
-deps = ["BinDeps", "BinaryProvider", "Libdl", "Test"]
-git-tree-sha1 = "0b45dc2e45ed77f445617b99ff2adf0f5b0f23ea"
-uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
-version = "0.7.2"
-
-[[StaticArrays]]
-deps = ["LinearAlgebra", "Random", "Statistics"]
-git-tree-sha1 = "1085ffbf5fd48fdba64ef8e902ca429c4e1212d3"
-uuid = "90137ffa-7385-5640-81b9-e52037218182"
-version = "0.11.1"
-
-[[Statistics]]
-deps = ["LinearAlgebra", "SparseArrays"]
-uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
-
-[[StatsBase]]
-deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"]
-git-tree-sha1 = "c53e809e63fe5cf5de13632090bc3520649c9950"
-uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-version = "0.32.0"
-
-[[StatsFuns]]
-deps = ["Rmath", "SpecialFunctions", "Test"]
-git-tree-sha1 = "b3a4e86aa13c732b8a8c0ba0c3d3264f55e6bb3e"
-uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
-version = "0.8.0"
-
-[[TableShowUtils]]
-deps = ["DataValues", "Dates", "JSON", "Markdown", "Test"]
-git-tree-sha1 = "14c54e1e96431fb87f0d2f5983f090f1b9d06457"
-uuid = "5e66a065-1f0a-5976-b372-e0b8c017ca10"
-version = "0.2.5"
-
-[[TableTraits]]
-deps = ["IteratorInterfaceExtensions"]
-git-tree-sha1 = "b1ad568ba658d8cbb3b892ed5380a6f3e781a81e"
-uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
-version = "1.0.0"
-
-[[TableTraitsUtils]]
-deps = ["DataValues", "IteratorInterfaceExtensions", "Missings", "TableTraits"]
-git-tree-sha1 = "fbc53f586630fa2a72190f7a792ba4106fd0776a"
-uuid = "382cd787-c1b6-5bf2-a167-d5b971a19bda"
-version = "1.0.0"
-
-[[Tables]]
-deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"]
-git-tree-sha1 = "aaed7b3b00248ff6a794375ad6adf30f30ca5591"
-uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
-version = "0.2.11"
-
-[[Test]]
-deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
-uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[[Tokenize]]
-git-tree-sha1 = "dfcdbbfb2d0370716c815cbd6f8a364efb6f42cf"
-uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624"
-version = "0.5.6"
-
-[[URIParser]]
-deps = ["Test", "Unicode"]
-git-tree-sha1 = "6ddf8244220dfda2f17539fa8c9de20d6c575b69"
-uuid = "30578b45-9adc-5946-b283-645ec420af67"
-version = "0.4.0"
-
-[[UUIDs]]
-deps = ["Random", "SHA"]
-uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-
-[[Unicode]]
-uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
-
-[[WeakRefStrings]]
-deps = ["Random", "Test"]
-git-tree-sha1 = "9a0bb82eede528debe631b642eeb48a631a69bc2"
-uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5"
-version = "0.6.1"
diff --git a/slides/Project.toml b/slides/Project.toml
deleted file mode 100644
index fc1050b..0000000
--- a/slides/Project.toml
+++ /dev/null
@@ -1,6 +0,0 @@
-[deps]
-CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
-CalibrationPaper = "5e6d2ce1-a021-4f7d-b692-0c7af47336b0"
-DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
-PGFPlotsX = "8314cec4-20b6-5062-9cdb-752b83310925"
-StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
diff --git a/slides/README.md b/slides/README.md
index eef0253..2ab1c5e 100644
--- a/slides/README.md
+++ b/slides/README.md
@@ -4,26 +4,6 @@ This folder contains the source code of the slides summarizing the paper
"Calibration tests in multi-class classification: A unifying framework"
by Widmann, Lindsten, and Zachariah.
-## Generate the figure
-
-Open a terminal in the current directory and install all required Julia packages
-by running
-```shell
-julia --project=. -e "using Pkg; Pkg.instantiate()"
-```
-Afterwards start a Julia REPL
-```shell
-julia --project=.
-```
-and include the file `figures.jl` with
-``` julia
-julia> include("figures.jl")
-```
-You can regenerate the figure by running
-``` julia
-julia> errors_ece()
-```
-
## Compile the slides
The slides can be compiled as a PDF file by running
diff --git a/slides/figures.jl b/slides/figures.jl
deleted file mode 100644
index fcb3e3a..0000000
--- a/slides/figures.jl
+++ /dev/null
@@ -1,100 +0,0 @@
-using CalibrationPaper
-using CSV
-using DataFrames
-using PGFPlotsX
-using StatsBase
-
-function errors_ece()
- # initialize group plot
- @pgf plt = GroupPlot(
- {
- group_style =
- {
- group_name = "group",
- group_size = "3 by 1",
- horizontal_sep = raw"0.02\textwidth",
- vertical_sep = "0pt",
- ylabels_at = "edge left",
- yticklabels_at = "edge left"
- },
- ylabel = raw"\# runs",
- no_markers,
- label_style = { font = raw"\small" },
- tick_label_style = { font = raw"\tiny" },
- grid = "major",
- width = raw"0.19\textwidth",
- height = raw"0.08\textwidth",
- "every x tick scale label/.style" = { at = "{(1,0)}", anchor = "west" },
- "scale only axis",
- ymin = 0, ymax = 3500,
- legend_cell_align = "left",
- legend_style =
- {
- fill = "none",
- draw = "none",
- font = raw"\small",
- inner_sep = "0pt",
- at = "({1.1, 1})",
- anchor = "north west" }
- })
-
- # define displayed models
- models = [CalibrationPaperModel(10, 0.1, 0.0, true),
- CalibrationPaperModel(10, 0.1, 0.5, true),
- CalibrationPaperModel(10, 0.1, 1.0, false)]
-
- # load experimental results
- datadir = joinpath(@__DIR__, "..", "experiments", "data", "errors")
- df = CSV.read(joinpath(datadir, "ECE_uniform.csv"))
-
- # for all studied experiments and models
- for (i, model) in enumerate(models)
- # load estimates
- estimates = collect_estimates(df, model)
-
- # compute histogram
- hist = fit(Histogram, estimates, closed = :left)
-
- # create axis object with histogram
- @pgf ax = Axis(PlotInc(
- {
- ybar_interval,
- fill = "Dark2-A!30!white",
- forget_plot
- },
- Table(hist)))
-
- # add mean of estimates
- @pgf push!(ax, VLine({ solid, thick, "Dark2-B" }, mean(estimates)))
- if i == 3
- push!(ax,
- raw"\addlegendimage{solid, thick, Dark2-B, no markers};",
- LegendEntry("mean estimate"))
- end
-
- # compute true value
- analytic = CalibrationPaper.analytic_ece(model)
-
- # plot true value
- @pgf push!(ax, VLine({ dashed, thick, "Dark2-C" }, analytic))
- if i == 3
- push!(ax, raw"\addlegendimage{dashed, thick, Dark2-C, no markers};",
- LegendEntry(raw"$\mathrm{ECE}$"))
- end
-
- # hack so that limits are updated as well
- @pgf push!(ax, PlotInc({ draw = "none" }, Coordinates([analytic], [0])))
-
- # add axis to group plot
- push!(plt, ax)
- end
-
- # save histogram
- figuresdir = joinpath(@__DIR__, "figures")
- isdir(figuresdir) || mkdir(figuresdir)
- picture = TikzPicture(plt,
- raw"\node[anchor=north, font=\small] at ($(group c1r1.west |- group c1r1.outer south)!0.5!(group c3r1.east |- group c3r1.outer south)$){$\mathrm{ECE}$ estimate};")
- pgfsave(joinpath(figuresdir, "errors_ece.tex"), picture; include_preamble = false)
-
- nothing
-end
diff --git a/slides/figures/car.svg b/slides/figures/car.svg
deleted file mode 100644
index b8fe910..0000000
--- a/slides/figures/car.svg
+++ /dev/null
@@ -1,17 +0,0 @@
-
-
-
\ No newline at end of file
diff --git a/slides/figures/car0.svg b/slides/figures/car0.svg
new file mode 100644
index 0000000..54dc3b5
--- /dev/null
+++ b/slides/figures/car0.svg
@@ -0,0 +1,89 @@
+
+
+
+
diff --git a/slides/figures/car1.svg b/slides/figures/car1.svg
new file mode 100644
index 0000000..3be8735
--- /dev/null
+++ b/slides/figures/car1.svg
@@ -0,0 +1,89 @@
+
+
+
+
diff --git a/slides/figures/car2.svg b/slides/figures/car2.svg
new file mode 100644
index 0000000..e8ad59b
--- /dev/null
+++ b/slides/figures/car2.svg
@@ -0,0 +1,89 @@
+
+
+
+
diff --git a/slides/figures/car3.svg b/slides/figures/car3.svg
new file mode 100644
index 0000000..e7e6ae5
--- /dev/null
+++ b/slides/figures/car3.svg
@@ -0,0 +1,89 @@
+
+
+
+
diff --git a/slides/figures/car4.svg b/slides/figures/car4.svg
new file mode 100644
index 0000000..f0c4308
--- /dev/null
+++ b/slides/figures/car4.svg
@@ -0,0 +1,89 @@
+
+
+
+
diff --git a/slides/figures/errors_ece.tex b/slides/figures/errors_ece.tex
deleted file mode 100644
index 9fdb407..0000000
--- a/slides/figures/errors_ece.tex
+++ /dev/null
@@ -1,85 +0,0 @@
-\begin{tikzpicture}
-\begin{groupplot}[group style={group name={group}, group size={3 by 1}, horizontal sep={0.02\textwidth}, vertical sep={0pt}, ylabels at={edge left}, yticklabels at={edge left}}, ylabel={\# runs}, no markers, label style={font={\small}}, tick label style={font={\tiny}}, grid={major}, width={0.19\textwidth}, height={0.08\textwidth}, every x tick scale label/.style={at={{(1,0)}}, anchor={west}}, scale only axis, ymin={0}, ymax={3500}, legend cell align={left}, legend style={fill={none}, draw={none}, font={\small}, inner sep={0pt}, at={({1.1, 1})}, anchor={north west}}]
- \nextgroupplot
- \addplot+[ybar interval, fill={Dark2-A!30!white}, forget plot]
- table[row sep={\\}]
- {
- \\
- 0.17 7.0 \\
- 0.18 55.0 \\
- 0.19 229.0 \\
- 0.2 796.0 \\
- 0.21 1782.0 \\
- 0.22 2550.0 \\
- 0.23 2307.0 \\
- 0.24 1389.0 \\
- 0.25 667.0 \\
- 0.26 171.0 \\
- 0.27 42.0 \\
- 0.28 5.0 \\
- 0.29 0.0 \\
- }
- ;
- \draw[solid, thick, Dark2-B] (0.22876019647666532,\pgfkeysvalueof{/pgfplots/ymin})--(0.22876019647666532,\pgfkeysvalueof{/pgfplots/ymax});
- \draw[dashed, thick, Dark2-C] (0.0,\pgfkeysvalueof{/pgfplots/ymin})--(0.0,\pgfkeysvalueof{/pgfplots/ymax});
- \addplot+[draw={none}]
- coordinates {
- (0.0,0)
- }
- ;
- \nextgroupplot
- \addplot+[ybar interval, fill={Dark2-A!30!white}, forget plot]
- table[row sep={\\}]
- {
- \\
- 0.42 23.0 \\
- 0.44 207.0 \\
- 0.46 838.0 \\
- 0.48 1942.0 \\
- 0.5 2982.0 \\
- 0.52 2440.0 \\
- 0.54 1165.0 \\
- 0.56 345.0 \\
- 0.58 52.0 \\
- 0.6 6.0 \\
- 0.62 0.0 \\
- }
- ;
- \draw[solid, thick, Dark2-B] (0.5133844252714233,\pgfkeysvalueof{/pgfplots/ymin})--(0.5133844252714233,\pgfkeysvalueof{/pgfplots/ymax});
- \draw[dashed, thick, Dark2-C] (0.45,\pgfkeysvalueof{/pgfplots/ymin})--(0.45,\pgfkeysvalueof{/pgfplots/ymax});
- \addplot+[draw={none}]
- coordinates {
- (0.45,0)
- }
- ;
- \nextgroupplot
- \addplot+[ybar interval, fill={Dark2-A!30!white}, forget plot]
- table[row sep={\\}]
- {
- \\
- 0.3 15.0 \\
- 0.32 118.0 \\
- 0.34 804.0 \\
- 0.36 2310.0 \\
- 0.38 3279.0 \\
- 0.4 2413.0 \\
- 0.42 863.0 \\
- 0.44 185.0 \\
- 0.46 13.0 \\
- 0.48 0.0 \\
- }
- ;
- \draw[solid, thick, Dark2-B] (0.3908207321571612,\pgfkeysvalueof{/pgfplots/ymin})--(0.3908207321571612,\pgfkeysvalueof{/pgfplots/ymax});
- \addlegendimage{solid, thick, Dark2-B, no markers};
- \addlegendentry {mean estimate}
- \draw[dashed, thick, Dark2-C] (0.7106418012290426,\pgfkeysvalueof{/pgfplots/ymin})--(0.7106418012290426,\pgfkeysvalueof{/pgfplots/ymax});
- \addlegendimage{dashed, thick, Dark2-C, no markers};
- \addlegendentry {$\mathrm{ECE}$}
- \addplot+[draw={none}]
- coordinates {
- (0.7106418012290426,0)
- }
- ;
-\end{groupplot}
-\node[anchor=north, font=\small] at ($(group c1r1.west |- group c1r1.outer south)!0.5!(group c3r1.east |- group c3r1.outer south)$){$\mathrm{ECE}$ estimate};
-\end{tikzpicture}
diff --git a/slides/references.bib b/slides/references.bib
index ed89727..e341080 100644
--- a/slides/references.bib
+++ b/slides/references.bib
@@ -1,80 +1,15 @@
-@InProceedings{guo17_calib_moder_neural_networ,
- author = {Chuan Guo and Geoff Pleiss and Yu Sun and Kilian
- Q. Weinberger},
- title = {On Calibration of Modern Neural Networks},
- booktitle = {Proceedings of the 34th International Conference on
- Machine Learning},
- year = 2017,
- volume = 70,
- pages = {1321--1330},
- url = {http://proceedings.mlr.press/v70/guo17a.html},
- abstract = {Confidence calibration - the problem of predicting
- probability estimates representative of the true
- correctness likelihood - is important for
- classification models in many applications. We
- discover that modern neural networks, unlike those
- from a decade ago, are poorly calibrated. Through
- extensive experiments, we observe that depth, width,
- weight decay, and Batch Normalization are important
- factors influencing calibration. We evaluate the
- performance of various post-processing calibration
- methods on state-of-the-art architectures with image
- and document classification datasets. Our analysis
- and experiments not only offer insights into neural
- network learning, but also provide a simple and
- straightforward recipe for practical settings: on
- most datasets, temperature scaling - a
- single-parameter variant of Platt Scaling - is
- surprisingly effective at calibrating predictions.},
- month = 08,
- pdf = {http://proceedings.mlr.press/v70/guo17a/guo17a.pdf},
- series = {Proceedings of Machine Learning Research},
-}
-
-@InProceedings{vaicenavicius19_evaluat,
+@article{vaicenavicius19_evaluat,
author = {Vaicenavicius, Juozas and Widmann, David and
Andersson, Carl and Lindsten, Fredrik and Roll,
Jacob and Sch\"{o}n, Thomas B.},
title = {Evaluating model calibration in classification},
- booktitle = {Proceedings of Machine Learning Research},
+ journal = {Proceedings of Machine Learning Research},
year = 2019,
- volume = 89,
- pages = {3459--3467},
- abstract = {Probabilistic classifiers output a probability
- distribution on target classes rather than just a
- class prediction. Besides providing a clear
- separation of prediction and decision making, the
- main advantage of probabilistic models is their
- ability to represent uncertainty about
- predictions. In safety-critical applications, it is
- pivotal for a model to possess an adequate sense of
- uncertainty, which for probabilistic classifiers
- translates into outputting probability distributions
- that are consistent with the empirical frequencies
- observed from realized outcomes. A classifier with
- such a property is called calibrated. In this work,
- we develop a general theoretical calibration
- evaluation framework grounded in probability theory,
- and point out subtleties present in model
- calibration evaluation that lead to refined
- interpretations of existing evaluation
- techniques. Lastly, we propose new ways to quantify
- and visualize miscalibration in probabilistic
- classification, including novel multidimensional
- reliability diagrams.},
- month = 4,
- series = {Proceedings of Machine Learning Research},
}
@article{broecker07_increas_reliab_reliab_diagr,
author = {Jochen Br{\"o}cker and Leonard A. Smith},
- title = {Increasing the Reliability of Reliability Diagrams},
+ title = {Increasing the reliability of reliability diagrams},
journal = {Weather and Forecasting},
- volume = {22},
- number = {3},
- pages = {651-661},
year = {2007},
- doi = {10.1175/waf993.1},
- url = {https://doi.org/10.1175/waf993.1},
- DATE_ADDED = {Wed Oct 23 10:41:11 2019},
}
diff --git a/slides/spotlight.pdf b/slides/spotlight.pdf
index 0aba4f6..3c5079d 100644
Binary files a/slides/spotlight.pdf and b/slides/spotlight.pdf differ
diff --git a/slides/spotlight.tex b/slides/spotlight.tex
index fb864bb..80b1a47 100644
--- a/slides/spotlight.tex
+++ b/slides/spotlight.tex
@@ -4,7 +4,7 @@
% arara: lualatex: { shell: true }
\PassOptionsToPackage{force}{filehook} % see https://tex.stackexchange.com/questions/513051/filehook-error-with-memoir-after-update-texlive-2019-in-oct-15
\documentclass[aspectratio=169]{beamer}
-% \documentclass[handout]{beamer} % for handouts
+% \documentclass[aspectratio=169,handout]{beamer} % for handouts
% Plots
\usepackage{pgfplots}
@@ -90,7 +90,7 @@
{\end{tcolorbox}}
% References
-\usepackage[style=authoryear-icomp,doi=false,url=false,isbn=false]{biblatex}
+\usepackage[style=authortitle-icomp,doi=false,url=false,isbn=false]{biblatex}
\addbibresource{references.bib}
\newenvironment{refitemize}%
@@ -122,7 +122,7 @@
\node[right,inner sep=0pt,outer sep=0pt, right=2mm of UU] (LiU) {\includegraphics[height=0.75cm]{figures/logos/LiU.pdf}};
\end{tikzpicture}%
}
-\institute{$^\star$Department of Information Technology, Uppsala University\\$^\ddagger$Division of Statistics and Machine Learning, Linköping University}
+\institute{$^\star$Department of Information Technology, Uppsala University, Sweden\\$^\ddagger$Division of Statistics and Machine Learning, Linköping University, Sweden}
\begin{document}
@@ -219,14 +219,12 @@
\begin{frame}{Calibrated model}
\begin{tcbraster}[raster columns=2,raster equal height=rows]
- \onslide<2->{%
- \begin{uugreenbox}[raster multicolumn=2]
- \begin{center}
- A \hl{calibrated model} reports\\
- \hl{predictions consistent with empirically observed frequencies} of outcomes.
- \end{center}
- \end{uugreenbox}
- }%
+ \begin{uugreenbox}[raster multicolumn=2]
+ \begin{center}
+ A \hl{calibrated model} reports\\
+ \hl{predictions consistent with empirically observed frequencies} of outcomes.
+ \end{center}
+ \end{uugreenbox}
\begin{uuyellowbox}[enhanced, title={Prediction}, valign=center, remember as=A]
\begin{center}
\begin{tabular}{@{}cc@{}} \toprule
@@ -246,20 +244,21 @@
\end{uubluebox}
\end{tcbraster}
- \onslide<2->{%
- \begin{tikzpicture}[remember picture, overlay]
- \path (A) -- node [font=\boldmath\Huge, color=uured, align=center, midway] {$\stackrel{?}{=}$} (B);
- \end{tikzpicture}
- }%
+ \begin{tikzpicture}[remember picture, overlay]
+ \path (A) -- node [font=\boldmath\Huge, color=uured, align=center, midway] {$\stackrel{?}{=}$} (B);
+ \end{tikzpicture}
\end{frame}
-\begin{frame}{Multi-class classification}
+\begin{frame}{Multi-class classification: all scores matter!}
\begin{tcbraster}[raster columns=1]
\begin{tcolorbox}[blankest]
\begin{center}
\begin{tikzpicture}
- \node[draw, inner sep=2mm] (image) at (0, 0)
- {\includesvg[height=\dimexpr0.11\textwidth-4mm\relax]{car}};
+ \node[minimum height=0.11\textwidth, inner sep=2mm] (image) at (0, 0)
+ {\begin{tabular}{@{}ccc@{}}
+ \includesvg[height=3mm]{car0} & \includesvg[height=3mm]{car1} & \includesvg[height=3mm]{car2} \\
+ \includesvg[height=3mm]{car3} & \includesvg[height=3mm]{car4} & $\cdots$ \\
+ \end{tabular}};
\onslide<2->{%
\node[draw, fill=gronskasvag, right=1cm of image, inner sep=2mm] (model)
@@ -279,72 +278,54 @@
\end{center}
\end{tcolorbox}
\onslide<4->{%
- \begin{uuyellowbox}[enhanced, title={Reduction to binary classification}, fontupper=\footnotesize, fontlower=\footnotesize, sidebyside, lower separated=false, remember as=A]
+ \begin{uubluebox}
+ \begin{center}
+ Common calibration evaluation techniques consider only the
+ most-confident score
+ \end{center}
+ \end{uubluebox}
+ }%
+ \onslide<5->{%
+ \begin{uuredbox}[enhanced, fontlower=\footnotesize, sidebyside, lower separated=false, righthand width=0.3\textwidth]
+ Common approaches do not distinguish between the two predictions
+ even though the control actions based on these might be very
+ different!
+
+ \tcblower
+
\begin{center}
\begin{tabular}{@{}ccc@{}} \toprule
\texttt{object} & \texttt{human} & \texttt{animal} \\ \midrule
\hl{80\%} & 0\% & 20\% \\
- 10\% & \hl{80\%} & 10\% \\
- $\vdots$ & $\vdots$ & $\vdots$ \\ \bottomrule
+ \hl{80\%} & 20\% & 0\% \\ \bottomrule
\end{tabular}
\end{center}
- \onslide<5->{%
- \tcblower
- \begin{center}
- \begin{tabular}{@{}ccc@{}} \toprule
- \texttt{object} & \texttt{human} & \texttt{animal} \\ \midrule
- \hl{80\%} & 0\% & 20\% \\
- \hl{80\%} & 10\% & 10\% \\
- $\vdots$ & $\vdots$ & $\vdots$ \\ \bottomrule
- \end{tabular}
- \end{center}
- }%
- \end{uuyellowbox}
+ \end{uuredbox}
}%
\begin{tcolorbox}[blankest]
\begin{refitemize}
\refitem{vaicenavicius19_evaluat}
- \onslide<4->{\refitem{guo17_calib_moder_neural_networ}}
\end{refitemize}
\end{tcolorbox}
\end{tcbraster}
-
- \onslide<6->{%
- \begin{tikzpicture}[remember picture, overlay]
- \node at (A) {%
- \begin{uuredbox}[width=0.5\pagewidth, nobeforeafter]
- \begin{center}
- Often \hl{partial calibration} is \hl{not enough}!
- \end{center}
- \end{uuredbox}%
- };
- \end{tikzpicture}
- }%
\end{frame}
\begin{frame}{Our contribution: Calibration errors in multi-class classification}
\begin{tcbraster}[raster columns=1, raster rows=3]
- \begin{uuyellowbox}[title={Unified framework}, left=0pt]
+ \begin{uuyellowbox}[title={Unifying framework of calibration errors}, left=0pt]
\begin{itemize}
- \item Encompasses existing measures such as the
+ \item Based on the full predictions with all scores
+ \item<2-> Encompasses existing measures such as the
expected calibration error ($\mathrm{ECE}$)
- \item<2-> Enables derivation of a \hl{kernel calibration error ($\mathrm{KCE}$)}
+ \item<3-> Enables derivation of a \hl{kernel calibration error ($\mathrm{KCE}$)}
\end{itemize}
\end{uuyellowbox}
-
- \onslide<3->{%
- \begin{tcolorbox}[blank]
- The standard $\mathrm{ECE}$ estimator is usually biased and inconsistent:
- \begin{center}
- \input{figures/errors_ece.tex}
- \end{center}
- \end{tcolorbox}
- }%
\onslide<4->{%
- \begin{uugreenbox}
- \begin{center}
- The $\mathrm{KCE}$ yields \hl{unbiased} and \hl{consistent} estimators
- \end{center}
+ \begin{uugreenbox}[left=0pt, title={Estimating calibration errors}]
+ \begin{itemize}
+ \item The standard $\mathrm{ECE}$ estimator is usually biased and inconsistent
+ \item<5-> The $\mathrm{KCE}$ yields \hl{unbiased} and \hl{consistent} estimators
+ \end{itemize}
\end{uugreenbox}
}%
\end{tcbraster}
@@ -421,7 +402,8 @@
\onslide<7->{%
\begin{uugreenbox}[left=0pt]
\begin{itemize}
- \item Existing approach for estimating the p-value for the $\mathrm{ECE}$ seems unreliable
+ \item Existing $\mathrm{ECE}$-based approach seems prone to
+ underestimating the p-value
\item<8-> \hl{Well-founded bounds and approximations} of the p-value for the $\mathrm{KCE}$
\end{itemize}
\end{uugreenbox}