TensorBFS
diff --git a/‎Project.toml‎
Lines changed: 1 addition & 1 deletion b/‎Project.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmark/bench_map.jl‎
Lines changed: 2 additions & 3 deletions b/‎benchmark/bench_map.jl‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎benchmark/bench_mar.jl‎
Lines changed: 2 additions & 2 deletions b/‎benchmark/bench_mar.jl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmark/bench_mmap.jl‎
Lines changed: 2 additions & 2 deletions b/‎benchmark/bench_mmap.jl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/make.jl‎
Lines changed: 11 additions & 2 deletions b/‎docs/make.jl‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎docs/src/api/public.md‎
Lines changed: 9 additions & 7 deletions b/‎docs/src/api/public.md‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎docs/src/performance.jl‎
Lines changed: 62 additions & 0 deletions b/‎docs/src/performance.jl‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎docs/src/performance.md‎
Lines changed: 0 additions & 93 deletions b/‎docs/src/performance.md‎
Lines changed: 0 additions & 93 deletions
diff --git a/‎examples/asia/main.jl‎
Lines changed: 4 additions & 9 deletions b/‎examples/asia/main.jl‎
Lines changed: 4 additions & 9 deletions
@@ -1,7 +1,7 @@
 name = "TensorInference"
 uuid = "c2297e78-99bd-40ad-871d-f50e56b81012"
 authors = ["Jin-Guo Liu", "Martin Roa Villescas"]
-version = "0.1.0"
+version = "0.2.0"
 
 [deps]
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
@@ -6,11 +6,10 @@ using Artifacts
 
 const SUITE = BenchmarkGroup()
 
-model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
-problem = read_instance(model_filepath; evidence_filepath, solution_filepath)
+problem = problem_from_artifact("uai2014", "MAR" "Promedus", 14)
 
 optimizer = TreeSA(ntrials = 1, niters = 2, βs = 1:0.1:40)
-tn = TensorNetworkModel(problem; optimizer)
+tn = TensorNetworkModel(read_model(problem); optimizer, evidence=get_evidence(problem))
 SUITE["map"] = @benchmarkable most_probable_config(tn)
 
 end  # module
 
@@ -8,8 +8,8 @@ using Artifacts
 
 const SUITE = BenchmarkGroup()
 
-model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
-problem = read_instance(model_filepath; evidence_filepath, solution_filepath)
+model_filepath, evidence_filepath, _, solution_filepath = get_model_filepaths("Promedus_14", "MAR")
+problem = read_model(model_filepath; evidence_filepath, solution_filepath)
 
 optimizer = TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.1:100)
 tn1 = TensorNetworkModel(problem; optimizer)
 
@@ -6,8 +6,8 @@ using Artifacts
 
 const SUITE = BenchmarkGroup()
 
-model_filepath, evidence_filepath, _, solution_filepath = get_instance_filepaths("Promedus_14", "MAR")
-problem = read_instance(model_filepath; evidence_filepath, solution_filepath)
+model_filepath, evidence_filepath, _, solution_filepath = get_model_filepaths("Promedus_14", "MAR")
+problem = read_model(model_filepath; evidence_filepath, solution_filepath)
 optimizer = TreeSA(ntrials = 1, niters = 2, βs = 1:0.1:40)
 
 # Does not marginalize any var
 
@@ -3,7 +3,7 @@ using TensorInference: OMEinsum
 using TensorInference.OMEinsum: OMEinsumContractionOrders
 using Documenter, Literate
 
-# Literate
+# Literate Examples
 const EXAMPLE_DIR = pkgdir(TensorInference, "examples")
 const LITERATE_GENERATED_DIR = pkgdir(TensorInference, "docs", "src", "generated")
 mkpath(LITERATE_GENERATED_DIR)
@@ -15,6 +15,15 @@ for each in readdir(EXAMPLE_DIR)
     Literate.markdown(input_file, workdir; execute=true)
 end
 
+const EXTRA_JL = ["performance.jl"]
+const SRC_DIR = pkgdir(TensorInference, "docs", "src")
+for each in EXTRA_JL
+    cp(joinpath(SRC_DIR, each), joinpath(LITERATE_GENERATED_DIR, each); force=true)
+    input_file = joinpath(LITERATE_GENERATED_DIR, each)
+    @info "building" input_file
+    Literate.markdown(input_file, LITERATE_GENERATED_DIR; execute=true)
+end
+
 DocMeta.setdocmeta!(TensorInference, :DocTestSetup, :(using TensorInference); recursive=true)
 
 makedocs(;
@@ -36,7 +45,7 @@ makedocs(;
             "Asia network" => "generated/asia/main.md",
            ],
         "UAI file formats" => "uai-file-formats.md",
-        "Performance tips" => "performance.md",
+        "Performance tips" => "generated/performance.md",
         "API" => [
             "Public" => "api/public.md",
             "Internal" => "api/internal.md"
 
@@ -41,7 +41,8 @@ TreeSA
 MMAPModel
 RescaledArray
 TensorNetworkModel
-UAIInstance
+ArtifactProblemSpec
+UAIModel
 ```
 
 ## Functions
@@ -55,13 +56,14 @@ marginals
 maximum_logp
 most_probable_config
 probability
-read_evidence_file
-read_instance
-read_instance_from_artifact
+dataset_from_artifact
+problem_from_artifact
+read_model
+read_evidence
+read_solution
+read_queryvars
 read_model_file
-read_solution_file
+read_evidence_file
 read_td_file
 sample
-set_evidence!
-set_query!
 ```
@@ -0,0 +1,62 @@
+# # Performance Tips
+# ## Optimize contraction orders
+
+# Let us use a problem instance from the "Promedus" dataset of the UAI 2014 competition as an example.
+using TensorInference
+problem = problem_from_artifact("uai2014", "MAR", "Promedus", 11)
+model, evidence = read_model(problem), read_evidence(problem);
+
+# Next, we select the tensor network contraction order optimizer.
+optimizer = TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.3:100)
+
+# Here, we choose the local search based [`TreeSA`](@ref) algorithm, which often finds the smallest time/space complexity and supports slicing.
+# One can type `?TreeSA` in a Julia REPL for more information about how to configure the hyper-parameters of the [`TreeSA`](@ref) method, 
+# while the detailed algorithm explanation is in [arXiv: 2108.05665](https://arxiv.org/abs/2108.05665).
+# Alternative tensor network contraction order optimizers include
+# * [`GreedyMethod`](@ref) (default, fastest in searching speed but worst in contraction complexity)
+# * [`KaHyParBipartite`](@ref)
+# * [`SABipartite`](@ref)
+
+tn = TensorNetworkModel(model; optimizer, evidence);
+
+# The returned object `tn` contains a field `code` that specifies the tensor network with optimized contraction order. To check the contraction complexity, please type
+contraction_complexity(tn)
+
+# The returned object contains log2 values of the number of multiplications, the number elements in the largest tensor during contraction and the number of read-write operations to tensor elements.
+
+probability(tn)
+
+# ## Using the slicing technique to reduce the memory cost
+
+# For large scale applications, it is also possible to slice over certain degrees of freedom to reduce the space complexity, i.e.
+# loop and accumulate over certain degrees of freedom so that one can have a smaller tensor network inside the loop due to the removal of these degrees of freedom.
+# In the [`TreeSA`](@ref) optimizer, one can set `nslices` to a value larger than zero to turn on this feature.
+# As a comparison we slice over 5 degrees of freedom, which can reduce the space complexity by at most 5.
+# In this application, the slicing achieves the largest possible space complexity reduction 5, while the time and read-write complexity are only increased by less than 1,
+# i.e. the peak memory usage is reduced by a factor ``32``, while the (theoretical) computing time is increased by at a factor ``< 2``.
+optimizer = TreeSA(ntrials = 1, niters = 5, βs = 0.1:0.3:100, nslices=5)
+tn = TensorNetworkModel(model; optimizer, evidence);
+contraction_complexity(tn)
+
+# ## Faster Tropical tensor contraction to speed up MAP and MMAP
+# No extra effort is required to enjoy the BLAS level speed provided by [`TropicalGEMM`](https://github.com/TensorBFS/TropicalGEMM.jl).
+# The benchmark in the `TropicalGEMM` repo shows this performance is close to the theoretical optimal value.
+# Its implementation on GPU is under development in Github repo [`CuTropicalGEMM.jl`](https://github.com/ArrogantGao/CuTropicalGEMM.jl) as a part of [Open Source Promotion Plan summer program](https://summer-ospp.ac.cn/).
+
+# ## Working with GPUs
+# To upload the computation to GPU, you just add `using CUDA` before calling the `solve` function, and set the keyword argument `usecuda` to `true`.
+# ```julia
+# julia> using CUDA
+# [ Info: OMEinsum loaded the CUDA module successfully
+#
+# julia> marginals(tn; usecuda = true);
+# ```
+
+# Functions support `usecuda` keyword argument includes
+# * [`probability`](@ref)
+# * [`log_probability`](@ref)
+# * [`marginals`](@ref)
+# * [`most_probable_config`](@ref)
+
+# ## Benchmarks
+# Please check our [paper (link to be added)]().
@@ -51,12 +51,12 @@ using TensorInference
 # Load the ASIA network model from the `asia.uai` file located in the examples
 # directory. See [Model file format (.uai)](@ref) for a description of the
 # format of this file.
-instance = read_instance(pkgdir(TensorInference, "examples", "asia", "asia.uai"))
+model = read_model_file(pkgdir(TensorInference, "examples", "asia", "asia.uai"))
 
 # ---
 
 # Create a tensor network representation of the loaded model.
-tn = TensorNetworkModel(instance)
+tn = TensorNetworkModel(model)
 
 # ---
 
@@ -76,13 +76,9 @@ get_vars(tn)
 # ---
 
 # Set an evidence: Assume that the "X-ray" result (variable 7) is positive.
-set_evidence!(instance, 7 => 0)
-
-# ---
-
 # Since setting an evidence may affect the contraction order of the tensor
 # network, recompute it.
-tn = TensorNetworkModel(instance)
+tn = TensorNetworkModel(model, evidence = Dict(7 => 0))
 
 # ---
 
@@ -107,8 +103,7 @@ logp, cfg = most_probable_config(tn)
 # Compute the most probable values of certain variables (e.g., 4 and 7) while
 # marginalizing over others. This is known as Maximum a Posteriori (MAP)
 # estimation.
-set_query!(instance, [4, 7])
-mmap = MMAPModel(instance)
+mmap = MMAPModel(model, evidence=Dict(7=>0), queryvars=[4,7])
 
 # ---