diff --git a/scripts/builtin/ampute.dml b/scripts/builtin/ampute.dml
index 0da6af20fdb..ffc645b5b35 100644
--- a/scripts/builtin/ampute.dml
+++ b/scripts/builtin/ampute.dml
@@ -30,7 +30,6 @@
 # mech         a string [either "MAR", "MNAR", or "MCAR"] specifying the missingness mechanism. Chosen "MAR" and "MNAR" settings will be overridden if a non-default weight matrix is specified
 # weights      a weight matrix [shape: k-by-m], containing weights that will be used to calculate the weighted sum scores. Will be overridden if mech == "MCAR"
 # seed         a manually defined seed for reproducible RNG
-
 # -------------------------------------------------------------------------------------
 #
 # OUTPUT:
diff --git a/scripts/builtin/confusionMatrix.dml b/scripts/builtin/confusionMatrix.dml
index 3ac70fb3f87..b21088f2cfa 100644
--- a/scripts/builtin/confusionMatrix.dml
+++ b/scripts/builtin/confusionMatrix.dml
@@ -23,7 +23,7 @@
 # and actual labels. We return both the counts and relative frequency
 # (normalized by sum of true labels)
 #
-# .. code-block::
+# .. code-block:: text
 #
 #                   True Labels
 #                     1    2
diff --git a/scripts/builtin/cooccurrenceMatrix.dml b/scripts/builtin/cooccurrenceMatrix.dml
index 86b8b9ca169..590f4ba1e00 100644
--- a/scripts/builtin/cooccurrenceMatrix.dml
+++ b/scripts/builtin/cooccurrenceMatrix.dml
@@ -18,22 +18,21 @@
 # under the License.
 #
 #-------------------------------------------------------------
-#
-# The implementation is based on
+
+# Cleans and processes text data by removing punctuation, converting it to lowercase, and reformatting.
+# Adds an index column to the result. The implementation is based on
 # https://github.com/stanfordnlp/GloVe/blob/master/src/cooccur.c
 #
-#-------------------------------------------------------------
-
-## Cleans and processes text data by removing punctuation, converting it to lowercase, and reformatting.
-## Adds an index column to the result.
 # INPUT:
 # ------------------------------------------------------------------------------
 # S     (Frame[Unknown]): 1D input data frame containing text data.
 # ------------------------------------------------------------------------------
+#
 # OUTPUT:
 # ------------------------------------------------------------------------------
 # result    (Frame[Unknown]): Processed text data with an index column.
 # ------------------------------------------------------------------------------
+
 processText = function(Frame[Unknown] S) return (Frame[Unknown] result){
     print("processText");
     tmpStr = map(S[,1], "x -> x.replaceAll(\"[.]\", \"\")");
@@ -172,4 +171,4 @@ f_cooccurrenceMatrix = function(
     [wordPosition, docID] = getWordPosition(processedResult, maxTokens);
     [recodedWordPosition, tableSize, column] = getRecodedMatrix(wordPosition);
     coocMatrix = createCoocMatrix(cbind(docID, recodedWordPosition), tableSize, distanceWeighting, symmetric, windowSize);
-}
+}
\ No newline at end of file
diff --git a/scripts/builtin/decisionTree.dml b/scripts/builtin/decisionTree.dml
index 69bf12af90c..94c292d8554 100644
--- a/scripts/builtin/decisionTree.dml
+++ b/scripts/builtin/decisionTree.dml
@@ -30,9 +30,9 @@
 #   and the following trees, M would look as follows:
 #
 #   (L1)               |d<5|
-#                     /     \
+#                     /     \\
 #   (L2)           P1:2    |a<7|
-#                          /   \
+#                          /   \\
 #   (L3)                 P2:2 P3:1
 #
 #   --> M :=
diff --git a/scripts/builtin/dedup.dml b/scripts/builtin/dedup.dml
index 1ec2e29c395..af2ecafcdcd 100644
--- a/scripts/builtin/dedup.dml
+++ b/scripts/builtin/dedup.dml
@@ -28,11 +28,11 @@
 #
 # INPUT:
 # --------------------------------------------------------------------------------------
-# X                 Input Frame[String] with n rows and d columns (raw tuples)
-# gloveMatrix       Matrix[Double] of size |V| × e (pretrained GloVe embeddings) -> |V| number of words and e = embedding dimesnion
-# vocab             Frame[String] of size |V| × 1 (vocabulary aligned with gloveMatrix)
-# similarityMeasure (optional) String specifying similarity metric: "cosine", "euclidean"
-# threshold         (optional) Double: threshold value above which tuples are considered duplicates
+# X                  Input Frame[String] with n rows and d columns (raw tuples)
+# gloveMatrix        Matrix[Double] of size |V| × e (pretrained GloVe embeddings) -> |V| number of words and e = embedding dimesnion
+# vocab              Frame[String] of size |V| × 1 (vocabulary aligned with gloveMatrix)
+# similarityMeasure  (optional) String specifying similarity metric: "cosine", "euclidean"
+# threshold          (optional) Double: threshold value above which tuples are considered duplicates
 # --------------------------------------------------------------------------------------
 #
 # OUTPUT:
diff --git a/scripts/builtin/differenceStatistics.dml b/scripts/builtin/differenceStatistics.dml
index 0e9019f0963..30f207091e4 100644
--- a/scripts/builtin/differenceStatistics.dml
+++ b/scripts/builtin/differenceStatistics.dml
@@ -28,6 +28,11 @@
 # X        First Matrix to compare
 # Y        Second Matrix to compare
 # --------------------------------------------------------------------------------
+#
+# OUTPUT:
+# -------------------------------------------------------------------------------------
+# stats.   Difference statistics
+# -------------------------------------------------------------------------------------
 
 m_differenceStatistics = function(Matrix[Double] X, Matrix[Double] Y)  {
 
diff --git a/scripts/builtin/glove.dml b/scripts/builtin/glove.dml
index fc5ee9bafb3..9acf52975c6 100644
--- a/scripts/builtin/glove.dml
+++ b/scripts/builtin/glove.dml
@@ -18,6 +18,51 @@
 # under the License.
 #-------------------------------------------------------------
 
+
+# Computes the vector embeddings for words in a large text corpus. 
+#
+# INPUT:
+# -------------------------------------------------------------------------------- 
+# input                 1DInput corpus in CSV format.
+# seed                  Random seed for reproducibility.
+# vector_size           Dimensionality of word vectors, V.
+# eta                   Learning rate for optimization, recommended value: 0.05.
+# alpha                 Weighting function parameter, recommended value: 0.75.
+# x_max                 Maximum co-occurrence value as per the GloVe paper: 100.
+# tol                   Tolerance value to avoid overfitting, recommended value: 1e-4.
+# iterations            Total number of training iterations.
+# print_loss_it         Interval (in iterations) for printing the loss.
+# maxTokens             Maximum number of tokens per text entry.
+# windowSize            Context window size.
+# distanceWeighting     Whether to apply distance-based weighting.
+# symmetric             Determines if the matrix is symmetric (TRUE) or asymmetric (FALSE).
+# ------------------------------------------------------------------------------
+#
+# OUTPUT:
+# ------------------------------------------------------------------------------
+# G                     The word indices and their word vectors, of shape (N, V). Each represented as a vector, of shape (1,V)
+# ------------------------------------------------------------------------------
+
+
+f_glove = function(
+    Frame[Unknown] input,
+    int seed, int vector_size,
+    double alpha, double eta,
+    double x_max,
+    double tol,
+    int iterations,
+    int print_loss_it,
+    Int maxTokens,
+    Int windowSize,
+    Boolean distanceWeighting,
+    Boolean symmetric)
+    return (frame[Unknown] G){
+
+        [cooc_matrix, cooc_index] = cooccurrenceMatrix(input, maxTokens, windowSize, distanceWeighting, symmetric);
+        G = gloveWithCoocMatrix(cooc_matrix, cooc_index, seed, vector_size, alpha, eta, x_max, tol, iterations, print_loss_it);
+}
+
+
 init = function(matrix[double] cooc_matrix, double x_max, double alpha)
   return(matrix[double] weights, matrix[double] log_cooc_matrix){
   E = 2.718281828;
@@ -118,45 +163,3 @@ gloveWithCoocMatrix = function(matrix[double] cooc_matrix, frame[Unknown] cooc_i
     print("Given " + iterations + " iterations, " + "stopped (or converged) at the " + final_iter + " iteration / error: " + error);
     G = cbind(cooc_index[,2], as.frame(G));
 }
-
-glove = function(
-    Frame[Unknown] input,
-    int seed, int vector_size,
-    double alpha, double eta,
-    double x_max,
-    double tol,
-    int iterations,
-    int print_loss_it,
-    Int maxTokens,
-    Int windowSize,
-    Boolean distanceWeighting,
-    Boolean symmetric)
-    return (frame[Unknown] G){
-
-        /*
-        * Main function to Computes the vector embeddings for words in a large text corpus.
-        * INPUT:
-        * ------------------------------------------------------------------------------
-        * - input (Frame[Unknown]): 1DInput corpus in CSV format.
-        * - seed: Random seed for reproducibility.
-        * - vector_size: Dimensionality of word vectors, V.
-        * - eta: Learning rate for optimization, recommended value: 0.05.
-        * - alpha: Weighting function parameter, recommended value: 0.75.
-        * - x_max: Maximum co-occurrence value as per the GloVe paper: 100.
-        * - tol: Tolerance value to avoid overfitting, recommended value: 1e-4.
-        * - iterations: Total number of training iterations.
-        * - print_loss_it: Interval (in iterations) for printing the loss.
-        * - maxTokens (Int): Maximum number of tokens per text entry.
-        * - windowSize (Int): Context window size.
-        * - distanceWeighting (Boolean): Whether to apply distance-based weighting.
-        * - symmetric (Boolean): Determines if the matrix is symmetric (TRUE) or asymmetric (FALSE).
-        * ------------------------------------------------------------------------------
-        * OUTPUT:
-        * ------------------------------------------------------------------------------
-        * G (Frame[Unknown]): The word indices and their word vectors, of shape (N, V). Each represented as a vector, of shape (1,V)
-        * ------------------------------------------------------------------------------
-        */
-
-        [cooc_matrix, cooc_index] = cooccurrenceMatrix(input, maxTokens, windowSize, distanceWeighting, symmetric);
-        G = gloveWithCoocMatrix(cooc_matrix, cooc_index, seed, vector_size, alpha, eta, x_max, tol, iterations, print_loss_it);
-}
diff --git a/scripts/builtin/imputeByKNN.dml b/scripts/builtin/imputeByKNN.dml
index 13136ff2c9a..edd8e7727d2 100644
--- a/scripts/builtin/imputeByKNN.dml
+++ b/scripts/builtin/imputeByKNN.dml
@@ -25,23 +25,16 @@
 # the missing values by column means. Currently, only the column with the most
 # missing values is actually imputed.
 #
-# ------------------------------------------------------------------------------
 # INPUT:
 # ------------------------------------------------------------------------------
-# X           Matrix with missing values, which are represented as NaNs
-# method      Method used for imputing missing values with different performance
-#             and accuracy tradeoffs:
-#             'dist' (default): Compute all-pairs distances and impute the
-#                               missing values by closest. O(N^2 * #features)
-#             'dist_missing':   Compute distances between data and records with
-#                               missing values. O(N*M * #features), assuming
-#                               that the number of records with MV is M<<N.
-#             'dist_sample':    Compute distances between sample of data and
-#                               records with missing values. O(S*M * #features)
-#                               with M<<N and S<<N, but suboptimal imputation.
-# seed        Root seed value for random/sample calls for deterministic behavior
-#             -1 for true randomization
-# sample_frac Sample fraction for 'dist_sample' (value between 0 and 1)
+# X             Matrix with missing values, which are represented as NaNs
+# method        Method used for imputing missing values with different performance and accuracy tradeoffs:\n
+#               - 'dist' (default): Compute all-pairs distances and impute the missing values by closest. O(N^2 * #features)
+#               - 'dist_missing': Compute distances between data and records with missing values. O(N*M * #features), assuming that the number of records with MV is M<<N.
+#               - 'dist_sample': Compute distances between sample of data and records with missing values. O(S*M * #features) with M<<N and S<<N, but suboptimal imputation.
+#
+# seed          Root seed value for random/sample calls for deterministic behavior. -1 for true randomization
+# sample_frac   Sample fraction for 'dist_sample' (value between 0 and 1)
 # ------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -136,4 +129,4 @@ compute_missing_values = function (Matrix[Double] X, Matrix[Double] filled_matri
     #Get the subset records that need to be imputed
     imputedValue = t(reshaped) %*% aligned
     imputedValue = t(imputedValue)
-}
+}
\ No newline at end of file
diff --git a/scripts/builtin/quantizeByCluster.dml b/scripts/builtin/quantizeByCluster.dml
index 824ac350534..20b0b0c89d4 100644
--- a/scripts/builtin/quantizeByCluster.dml
+++ b/scripts/builtin/quantizeByCluster.dml
@@ -58,7 +58,7 @@
 #           the product quantization. Only relevant when space_decomp = TRUE.
 # ------------------------------------------------------------------------------------------
 
-m_quantizeByCluster = function(Matrix[Double]X, Integer M = 4, Integer k = 10, Integer runs = 10,
+m_quantizeByCluster = function(Matrix[Double] X, Integer M = 4, Integer k = 10, Integer runs = 10,
     Integer max_iter = 1000, Double eps = 1e-6, Integer avg_sample_size_per_centroid = 50, Boolean separate=TRUE, Boolean space_decomp=FALSE, Integer seed = -1)
   return(Matrix[Double] codebook, Matrix[Double] codes, Matrix[Double] R)
 {
@@ -118,5 +118,4 @@ m_quantizeByCluster = function(Matrix[Double]X, Integer M = 4, Integer k = 10, I
       codes[,i] = tmp_c + offset
     }
   }
-}
-
+}
\ No newline at end of file
diff --git a/scripts/builtin/randomForest.dml b/scripts/builtin/randomForest.dml
index 8daeb5bc7f0..53529afb9a9 100644
--- a/scripts/builtin/randomForest.dml
+++ b/scripts/builtin/randomForest.dml
@@ -26,16 +26,17 @@
 # and optionally subset of features (columns). During tree construction, split
 # candidates are additionally chosen on a sample of remaining features.
 #
-# .. code-block::
+# .. code-block:: text
 #
 #   For example, given a feature matrix with features [a,b,c,d]
 #   and the following two trees, M (the output) would look as follows:
 #
 #   (L1)          |a<7|                   |d<5|
-#                /     \                 /     \
+#                /     \\                 /     \\
 #   (L2)     |c<3|     |b<4|         |a<7|     P3:2
-#            /   \     /   \         /   \
+#            /   \\     /   \\         /  \\
 #   (L3)   P1:2 P2:1 P3:1 P4:2     P1:2 P2:1
+#
 #   --> M :=
 #   [[1, 7, 3, 3, 2, 4, 0, 2, 0, 1, 0, 1, 0, 2],  (1st tree)
 #    [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]]  (2nd tree)
diff --git a/scripts/builtin/shapExplainer.dml b/scripts/builtin/shapExplainer.dml
index b78a5dbcefb..39d365bf013 100644
--- a/scripts/builtin/shapExplainer.dml
+++ b/scripts/builtin/shapExplainer.dml
@@ -51,6 +51,7 @@
 # S              Matrix holding the shapley values along the cols, one row per instance.
 # expected       Double holding the average prediction of all instances.
 # -----------------------------------------------------------------------------
+
 s_shapExplainer = function(String model_function, list[unknown] model_args, Matrix[Double] x_instances,
     Matrix[Double] X_bg, Integer n_permutations = 10, Integer n_samples = 100, Integer remove_non_var=0,
     Matrix[Double] partitions=as.matrix(-1), Integer seed = -1, Integer verbose = 0)
diff --git a/scripts/builtin/topk_cleaning.dml b/scripts/builtin/topk_cleaning.dml
index 6f946c7729c..c9987320928 100644
--- a/scripts/builtin/topk_cleaning.dml
+++ b/scripts/builtin/topk_cleaning.dml
@@ -19,8 +19,44 @@
 #
 #-------------------------------------------------------------
 
-# This function cleans top-K item (where K is given as input)for a given list of users.
+# This function cleans top-K item (where K is given as input) for a given list of users.
 # metaData[3, ncol(X)] : metaData[1] stores mask, metaData[2] stores schema, metaData[3] stores FD mask
+#
+# INPUT:
+# ------------------------------------------------------------------------------
+# dataTrain           Training set
+# dataTest            Test set ignored when cv is set to True
+# metaData            3×n frame with schema, categorical mask, and FD mask for dataTrain
+# primitives          Library of primitive cleaning operators
+# parameters          Hyperparameter search space that matches the primitives
+# refSol              Reference solution
+# evaluationFunc      Name of a SystemDS DML function that scores a pipeline
+# evalFunHp           Hyperparameter matrix for the above evaluation function
+# topK                Number of best pipelines to return
+# resource_val        Maximum resource R for the Bandit search
+# max_iter            Maximum iterations while enumerating logical pipelines
+# lq                  Lower quantile used by utils::doErrorSample when triggered
+# uq                  Upper quantile used by utils::doErrorSample when triggered
+# sample              Fraction of rows to subsample from dataTrain
+# expectedIncrease    Minimum improvement over dirtyScore that a candidate must deliver
+# seed                Seed number
+# cv                  TRUE means k-fold CV, FALSE means hold-out split
+# cvk                 Number of folds if cv = TRUE
+# isLastLabel         TRUE if the last column is the label
+# rowCount            Row-count threshold above which doErrorSample may replace uniform sampling
+# correctTypos        Run spelling correction in the string preprocessing step
+# enablePruning       Enable pruning inside the Bandit phase
+# ------------------------------------------------------------------------------
+#
+# OUTPUT:
+#-------------------------------------------------------------------------------
+# topKPipelines       K cleaned-data pipelines
+# topKHyperParams     Hyperparameter matrix with rows aligning with topKPipelines
+# topKScores          Evaluation scores with rows aligning with topKPipelines
+# dirtyScore          Baseline score on the unclean data
+# evalFunHp           Updated evaluation function hyperparameters
+# applyFunc           Frame of “apply” functions for deploying each of the top-K pipelines
+#-------------------------------------------------------------------------------
 
 source("scripts/pipelines/scripts/utils.dml") as utils;
 source("scripts/pipelines/scripts/enumerateLogical.dml") as lg;
diff --git a/src/main/python/docs/README.md b/src/main/python/docs/README.md
index 61bdd24a3e9..e5bc5c59583 100644
--- a/src/main/python/docs/README.md
+++ b/src/main/python/docs/README.md
@@ -39,4 +39,4 @@ and then run `make html`:
 make html
 ```
 
-The docs will then be created at: `/src/main/python/build`in HTML will be placed in the `./_build` directory.
+The docs will then be created at: `/src/main/python/docs/build/html/`.
\ No newline at end of file
diff --git a/src/main/python/docs/requires-docs.txt b/src/main/python/docs/requires-docs.txt
index 9305d9320fa..1022b652401 100644
--- a/src/main/python/docs/requires-docs.txt
+++ b/src/main/python/docs/requires-docs.txt
@@ -24,4 +24,5 @@ sphinx_rtd_theme
 numpy
 py4j
 scipy
-requests
\ No newline at end of file
+requests
+pandas
\ No newline at end of file
diff --git a/src/main/python/generator/dml_parser.py b/src/main/python/generator/dml_parser.py
index 2abffb021f6..8e835e96a12 100644
--- a/src/main/python/generator/dml_parser.py
+++ b/src/main/python/generator/dml_parser.py
@@ -28,7 +28,7 @@
 class FunctionParser(object):
     header_input_pattern = r"^[ \t\n]*[#]+[ \t\n]*input[ \t\n\w:;.,#]*[\s#\-]*[#]+[\w\s\d:,.()\" \t\n\-]*[\s#\-]*$"
     header_output_pattern = r"[\s#\-]*[#]+[ \t]*(return|output)[ \t\w:;.,#]*[\s#\-]*[#]+[\w\s\d:,.()\" \t\-]*[\s#\-]*$"
-    function_pattern = r"^[ms]_[\w]+[ \t\n]*=[ \t\n]+function[^#{]*"
+    function_pattern = r"^[fms]_[\w]+[ \t\n]*=[ \t\n]+function[^#{]*"
     # parameter_pattern = r"^m_[\w]+[\s]+=[\s]+function[\s]*\([\s]*(?=return)[\s]*\)[\s]*return[\s]*\([\s]*([\w\[\]\s,\d=.\-_]*)[\s]*\)[\s]*"
     header_parameter_pattern = r"[\s#\-]*[#]+[ \t]*([\w|-]+)[\s]+([\w]+)[\s]+([\w,\d.\"\-]+)[\s]+([\w|\W]+)"
     divider_pattern = r"[\s#\-]*"
@@ -57,15 +57,13 @@ def parse_function(self, path: str):
         """
         file_name = os.path.basename(path)
         function_name, extension = os.path.splitext(file_name)
-        # try:
-        function_definition = self.find_function_definition(path)
-        # pattern = re.compile(
-        #     self.__class__.parameter_pattern, flags=re.I | re.M)
-        # match = pattern.match(function_definition)
-
-        # if match:
+        try:
+            function_definition = self.find_function_definition(path)
+        except AttributeError:
+            print(f"[INFO] Skipping '{function_name}': does not match function name pattern. It is likely an internal function.")
+            return
 
-        func_split = function_definition.split("function")[1].split("return")
+        func_split = function_definition.split("function", 1)[1].split("return")
        
         param_str = self.extract_param_str(func_split[0])
         retval_str = None
diff --git a/src/main/python/systemds/operator/algorithm/__init__.py b/src/main/python/systemds/operator/algorithm/__init__.py
index bd611ee6cc6..e8cb4c04e95 100644
--- a/src/main/python/systemds/operator/algorithm/__init__.py
+++ b/src/main/python/systemds/operator/algorithm/__init__.py
@@ -31,6 +31,7 @@
 from .builtin.alsPredict import alsPredict 
 from .builtin.alsTopkPredict import alsTopkPredict 
 from .builtin.ampute import ampute 
+from .builtin.apply_pipeline import apply_pipeline 
 from .builtin.arima import arima 
 from .builtin.auc import auc 
 from .builtin.autoencoder_2layer import autoencoder_2layer 
@@ -38,7 +39,10 @@
 from .builtin.bivar import bivar 
 from .builtin.components import components 
 from .builtin.confusionMatrix import confusionMatrix 
+from .builtin.cooccurrenceMatrix import cooccurrenceMatrix 
 from .builtin.cor import cor 
+from .builtin.correctTypos import correctTypos 
+from .builtin.correctTyposApply import correctTyposApply 
 from .builtin.cov import cov 
 from .builtin.cox import cox 
 from .builtin.cspline import cspline 
@@ -49,16 +53,24 @@
 from .builtin.dbscanApply import dbscanApply 
 from .builtin.decisionTree import decisionTree 
 from .builtin.decisionTreePredict import decisionTreePredict 
+from .builtin.dedup import dedup 
 from .builtin.deepWalk import deepWalk 
+from .builtin.denialConstraints import denialConstraints 
 from .builtin.differenceStatistics import differenceStatistics 
 from .builtin.discoverFD import discoverFD 
 from .builtin.dist import dist 
+from .builtin.dmv import dmv 
+from .builtin.ema import ema 
 from .builtin.executePipeline import executePipeline 
 from .builtin.f1Score import f1Score 
 from .builtin.fdr import fdr 
 from .builtin.ffPredict import ffPredict 
 from .builtin.ffTrain import ffTrain 
+from .builtin.fit_pipeline import fit_pipeline 
+from .builtin.fixInvalidLengths import fixInvalidLengths 
+from .builtin.fixInvalidLengthsApply import fixInvalidLengthsApply 
 from .builtin.flattenQuantile import flattenQuantile 
+from .builtin.frameSort import frameSort 
 from .builtin.frequencyEncode import frequencyEncode 
 from .builtin.frequencyEncodeApply import frequencyEncodeApply 
 from .builtin.garch import garch 
@@ -66,6 +78,7 @@
 from .builtin.getAccuracy import getAccuracy 
 from .builtin.glm import glm 
 from .builtin.glmPredict import glmPredict 
+from .builtin.glove import glove 
 from .builtin.gmm import gmm 
 from .builtin.gmmPredict import gmmPredict 
 from .builtin.gnmf import gnmf 
@@ -97,6 +110,7 @@
 from .builtin.impurityMeasures import impurityMeasures 
 from .builtin.imputeByFD import imputeByFD 
 from .builtin.imputeByFDApply import imputeByFDApply 
+from .builtin.imputeByKNN import imputeByKNN 
 from .builtin.imputeByMean import imputeByMean 
 from .builtin.imputeByMeanApply import imputeByMeanApply 
 from .builtin.imputeByMedian import imputeByMedian 
@@ -126,6 +140,7 @@
 from .builtin.mape import mape 
 from .builtin.matrixProfile import matrixProfile 
 from .builtin.mcc import mcc 
+from .builtin.mdedup import mdedup 
 from .builtin.mice import mice 
 from .builtin.miceApply import miceApply 
 from .builtin.mse import mse 
@@ -153,6 +168,7 @@
 from .builtin.pnmf import pnmf 
 from .builtin.ppca import ppca 
 from .builtin.psnr import psnr 
+from .builtin.quantizeByCluster import quantizeByCluster 
 from .builtin.raGroupby import raGroupby 
 from .builtin.raJoin import raJoin 
 from .builtin.raSelection import raSelection 
@@ -165,6 +181,7 @@
 from .builtin.selectByVarThresh import selectByVarThresh 
 from .builtin.ses import ses 
 from .builtin.setdiff import setdiff 
+from .builtin.shapExplainer import shapExplainer 
 from .builtin.sherlock import sherlock 
 from .builtin.sherlockPredict import sherlockPredict 
 from .builtin.shortestPath import shortestPath 
@@ -189,10 +206,12 @@
 from .builtin.tSNE import tSNE 
 from .builtin.toOneHot import toOneHot 
 from .builtin.tomeklink import tomeklink 
+from .builtin.topk_cleaning import topk_cleaning 
 from .builtin.underSampling import underSampling 
 from .builtin.union import union 
 from .builtin.univar import univar 
 from .builtin.vectorToCsv import vectorToCsv 
+from .builtin.wer import wer 
 from .builtin.winsorize import winsorize 
 from .builtin.winsorizeApply import winsorizeApply 
 from .builtin.xdummy1 import xdummy1 
@@ -211,6 +230,7 @@
  'alsPredict',
  'alsTopkPredict',
  'ampute',
+ 'apply_pipeline',
  'arima',
  'auc',
  'autoencoder_2layer',
@@ -218,7 +238,10 @@
  'bivar',
  'components',
  'confusionMatrix',
+ 'cooccurrenceMatrix',
  'cor',
+ 'correctTypos',
+ 'correctTyposApply',
  'cov',
  'cox',
  'cspline',
@@ -229,16 +252,24 @@
  'dbscanApply',
  'decisionTree',
  'decisionTreePredict',
+ 'dedup',
  'deepWalk',
+ 'denialConstraints',
  'differenceStatistics',
  'discoverFD',
  'dist',
+ 'dmv',
+ 'ema',
  'executePipeline',
  'f1Score',
  'fdr',
  'ffPredict',
  'ffTrain',
+ 'fit_pipeline',
+ 'fixInvalidLengths',
+ 'fixInvalidLengthsApply',
  'flattenQuantile',
+ 'frameSort',
  'frequencyEncode',
  'frequencyEncodeApply',
  'garch',
@@ -246,6 +277,7 @@
  'getAccuracy',
  'glm',
  'glmPredict',
+ 'glove',
  'gmm',
  'gmmPredict',
  'gnmf',
@@ -277,6 +309,7 @@
  'impurityMeasures',
  'imputeByFD',
  'imputeByFDApply',
+ 'imputeByKNN',
  'imputeByMean',
  'imputeByMeanApply',
  'imputeByMedian',
@@ -306,6 +339,7 @@
  'mape',
  'matrixProfile',
  'mcc',
+ 'mdedup',
  'mice',
  'miceApply',
  'mse',
@@ -333,6 +367,7 @@
  'pnmf',
  'ppca',
  'psnr',
+ 'quantizeByCluster',
  'raGroupby',
  'raJoin',
  'raSelection',
@@ -345,6 +380,7 @@
  'selectByVarThresh',
  'ses',
  'setdiff',
+ 'shapExplainer',
  'sherlock',
  'sherlockPredict',
  'shortestPath',
@@ -369,10 +405,12 @@
  'tSNE',
  'toOneHot',
  'tomeklink',
+ 'topk_cleaning',
  'underSampling',
  'union',
  'univar',
  'vectorToCsv',
+ 'wer',
  'winsorize',
  'winsorizeApply',
  'xdummy1',
diff --git a/src/main/python/systemds/operator/algorithm/builtin/ampute.py b/src/main/python/systemds/operator/algorithm/builtin/ampute.py
index d323000710e..fb3a82a380f 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/ampute.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ampute.py
@@ -33,6 +33,16 @@ def ampute(X: Matrix,
     """
      This function injects missing values into a multivariate a given dataset, similarly to the ampute() method in R's MICE package.
     
+    
+    
+    :param X: a multivariate numeric dataset [shape: n-by-m]
+    :param prop: a number in the (0, 1] range specifying the proportion of amputed rows across the entire dataset
+    :param patterns: a pattern matrix of 0's and 1's [shape: k-by-m] where each row corresponds to a pattern. 0 indicates that a variable should have missing values and 1 indicating that a variable should remain complete
+    :param freq: a vector [length: k] containing the relative frequency with which each pattern in the patterns matrix should occur
+    :param mech: a string [either "MAR", "MNAR", or "MCAR"] specifying the missingness mechanism. Chosen "MAR" and "MNAR" settings will be overridden if a non-default weight matrix is specified
+    :param weights: a weight matrix [shape: k-by-m], containing weights that will be used to calculate the weighted sum scores. Will be overridden if mech == "MCAR"
+    :param seed: a manually defined seed for reproducible RNG
+    :return: amputed output dataset
     """
 
     params_dict = {'X': X}
diff --git a/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
index be1100b4127..63ffc3f66b3 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/apply_pipeline.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/confusionMatrix.py b/src/main/python/systemds/operator/algorithm/builtin/confusionMatrix.py
index 81c549b5982..66a01780b0e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/confusionMatrix.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/confusionMatrix.py
@@ -35,7 +35,7 @@ def confusionMatrix(P: Matrix,
      and actual labels. We return both the counts and relative frequency
      (normalized by sum of true labels)
     
-     .. code-block::
+     .. code-block:: text
     
                        True Labels
                          1    2
diff --git a/src/main/python/systemds/operator/algorithm/builtin/cooccurrenceMatrix.py b/src/main/python/systemds/operator/algorithm/builtin/cooccurrenceMatrix.py
new file mode 100644
index 00000000000..6df77d3e7dd
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/cooccurrenceMatrix.py
@@ -0,0 +1,58 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/cooccurrenceMatrix.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def cooccurrenceMatrix(input: Frame,
+                       maxTokens: int,
+                       windowSize: int,
+                       distanceWeighting: bool,
+                       symmetric: bool):
+    """
+     Cleans and processes text data by removing punctuation, converting it to lowercase, and reformatting.
+     Adds an index column to the result. The implementation is based on
+     https://github.com/stanfordnlp/GloVe/blob/master/src/cooccur.c
+    
+    
+    
+    :param S: (Frame[Unknown]): 1D input data frame containing text data.
+    :return: (Frame[Unknown]): Processed text data with an index column.
+    """
+
+    params_dict = {'input': input, 'maxTokens': maxTokens, 'windowSize': windowSize, 'distanceWeighting': distanceWeighting, 'symmetric': symmetric}
+    
+    vX_0 = Matrix(input.sds_context, '')
+    vX_1 = Frame(input.sds_context, '')
+    output_nodes = [vX_0, vX_1, ]
+
+    op = MultiReturn(input.sds_context, 'cooccurrenceMatrix', output_nodes, named_input_nodes=params_dict)
+
+    vX_0._unnamed_input_nodes = [op]
+    vX_1._unnamed_input_nodes = [op]
+
+    return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
index 321a1949f58..64354a9bc3e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/correctTypos.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
index 0a2c61a6f40..5da8769509c 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/correctTyposApply.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
index a1a751d0aad..3fe565b8c7e 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/decisionTree.py
@@ -44,9 +44,9 @@ def decisionTree(X: Matrix,
        and the following trees, M would look as follows:
     
        (L1)               |d<5|
-                         /     \
+                         /     \\
        (L2)           P1:2    |a<7|
-                              /   \
+                              /   \\
        (L3)                 P2:2 P3:1
     
        --> M :=
diff --git a/src/main/python/systemds/operator/algorithm/builtin/dedup.py b/src/main/python/systemds/operator/algorithm/builtin/dedup.py
new file mode 100644
index 00000000000..13d5c35a41e
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/dedup.py
@@ -0,0 +1,68 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/dedup.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def dedup(X: Frame,
+          gloveMatrix: Matrix,
+          vocab: Frame,
+          **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+     Builtin for deduplication using distributed representations (DRs) and
+     locality-sensitive hashing (LSH) based blocking.
+    
+     The function encodes each input tuple as a dense vector using pre-trained GloVe embeddings (simple averaging), 
+     groups semantically similar tuples via LSH into buckets, and compares only those pairs for deduplication.
+     
+    
+    
+    
+    :param X: Input Frame[String] with n rows and d columns (raw tuples)
+    :param gloveMatrix: Matrix[Double] of size |V| × e (pretrained GloVe embeddings) -> |V| number of words and e = embedding dimesnion
+    :param vocab: Frame[String] of size |V| × 1 (vocabulary aligned with gloveMatrix)
+    :param similarityMeasure: (optional) String specifying similarity metric: "cosine", "euclidean"
+    :param threshold: (optional) Double: threshold value above which tuples are considered duplicates
+    :return: Frame[String] with deduplicated tuples
+        (first occurrence of each duplicate group is retained)
+    :return: Frame[String] with all detected duplicates
+        (i.e., tuples removed from the input)
+    """
+
+    params_dict = {'X': X, 'gloveMatrix': gloveMatrix, 'vocab': vocab}
+    params_dict.update(kwargs)
+    
+    vX_0 = Frame(X.sds_context, '')
+    vX_1 = Frame(X.sds_context, '')
+    output_nodes = [vX_0, vX_1, ]
+
+    op = MultiReturn(X.sds_context, 'dedup', output_nodes, named_input_nodes=params_dict)
+
+    vX_0._unnamed_input_nodes = [op]
+    vX_1._unnamed_input_nodes = [op]
+
+    return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
index 347502b848e..5cdec212965 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/denialConstraints.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/differenceStatistics.py b/src/main/python/systemds/operator/algorithm/builtin/differenceStatistics.py
index dfe2218a424..b6597bb6e4b 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/differenceStatistics.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/differenceStatistics.py
@@ -35,6 +35,11 @@ def differenceStatistics(X: Matrix,
      they are different. This can be used for instance in comparison of lossy
      compression techniques, that reduce the fidelity of the data. 
     
+    
+    
+    :param X: First Matrix to compare
+    :param Y: Second Matrix to compare
+    :return: Difference statistics
     """
 
     params_dict = {'X': X, 'Y': Y}
diff --git a/src/main/python/systemds/operator/algorithm/builtin/dmv.py b/src/main/python/systemds/operator/algorithm/builtin/dmv.py
index deaf3ea8a6b..2955e505e13 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/dmv.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/dmv.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/ema.py b/src/main/python/systemds/operator/algorithm/builtin/ema.py
index 4e0ccca6bbb..90f9a852d76 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/ema.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/ema.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
index 1fffb46f100..66750fc0711 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/executePipeline.py
@@ -28,7 +28,18 @@
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
-def executePipeline(X: Matrix):
+def executePipeline(pipeline: Frame,
+                    Xtrain: Matrix,
+                    Ytrain: Matrix,
+                    Xtest: Matrix,
+                    Ytest: Matrix,
+                    metaList: List,
+                    hyperParameters: Matrix,
+                    flagsCount: int,
+                    verbose: bool,
+                    startInd: int,
+                    endInd: int,
+                    **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
      This function execute pipeline.
     
@@ -56,17 +67,30 @@ def executePipeline(X: Matrix):
     :return: ---
     """
 
-    params_dict = {'X': X}
+    params_dict = {'pipeline': pipeline, 'Xtrain': Xtrain, 'Ytrain': Ytrain, 'Xtest': Xtest, 'Ytest': Ytest, 'metaList': metaList, 'hyperParameters': hyperParameters, 'flagsCount': flagsCount, 'verbose': verbose, 'startInd': startInd, 'endInd': endInd}
+    params_dict.update(kwargs)
     
-    vX_0 = Matrix(X.sds_context, '')
-    vX_1 = Matrix(X.sds_context, '')
-    vX_2 = Matrix(X.sds_context, '')
-    output_nodes = [vX_0, vX_1, vX_2, ]
+    vX_0 = Matrix(pipeline.sds_context, '')
+    vX_1 = Matrix(pipeline.sds_context, '')
+    vX_2 = Matrix(pipeline.sds_context, '')
+    vX_3 = Matrix(pipeline.sds_context, '')
+    vX_4 = Scalar(pipeline.sds_context, '')
+    vX_5 = Matrix(pipeline.sds_context, '')
+    vX_6 = Matrix(pipeline.sds_context, '')
+    vX_7 = Scalar(pipeline.sds_context, '')
+    vX_8 = List(pipeline.sds_context, '')
+    output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, vX_6, vX_7, vX_8, ]
 
-    op = MultiReturn(X.sds_context, 'executePipeline', output_nodes, named_input_nodes=params_dict)
+    op = MultiReturn(pipeline.sds_context, 'executePipeline', output_nodes, named_input_nodes=params_dict)
 
     vX_0._unnamed_input_nodes = [op]
     vX_1._unnamed_input_nodes = [op]
     vX_2._unnamed_input_nodes = [op]
+    vX_3._unnamed_input_nodes = [op]
+    vX_4._unnamed_input_nodes = [op]
+    vX_5._unnamed_input_nodes = [op]
+    vX_6._unnamed_input_nodes = [op]
+    vX_7._unnamed_input_nodes = [op]
+    vX_8._unnamed_input_nodes = [op]
 
     return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
index 5de40c745f8..48363035d8b 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fit_pipeline.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
index b635f31b298..cc0e83a51e4 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengths.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
index cc8fe68aacc..ed2572368d3 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/fixInvalidLengthsApply.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/frameSort.py b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
index 0bfc7f3afec..2575baefe4b 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/frameSort.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/glove.py b/src/main/python/systemds/operator/algorithm/builtin/glove.py
new file mode 100644
index 00000000000..3df38dfbfbd
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/glove.py
@@ -0,0 +1,68 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/glove.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def glove(input: Frame,
+          seed: int,
+          vector_size: int,
+          alpha: float,
+          eta: float,
+          x_max: float,
+          tol: float,
+          iterations: int,
+          print_loss_it: int,
+          maxTokens: int,
+          windowSize: int,
+          distanceWeighting: bool,
+          symmetric: bool):
+    """
+     Computes the vector embeddings for words in a large text corpus. 
+    
+    
+    
+    :param input: 1DInput corpus in CSV format.
+    :param seed: Random seed for reproducibility.
+    :param vector_size: Dimensionality of word vectors, V.
+    :param eta: Learning rate for optimization, recommended value: 0.05.
+    :param alpha: Weighting function parameter, recommended value: 0.75.
+    :param x_max: Maximum co-occurrence value as per the GloVe paper: 100.
+    :param tol: Tolerance value to avoid overfitting, recommended value: 1e-4.
+    :param iterations: Total number of training iterations.
+    :param print_loss_it: Interval (in iterations) for printing the loss.
+    :param maxTokens: Maximum number of tokens per text entry.
+    :param windowSize: Context window size.
+    :param distanceWeighting: Whether to apply distance-based weighting.
+    :param symmetric: Determines if the matrix is symmetric (TRUE) or asymmetric (FALSE).
+    :return: The word indices and their word vectors, of shape (N, V). Each represented as a vector, of shape (1,V)
+    """
+
+    params_dict = {'input': input, 'seed': seed, 'vector_size': vector_size, 'alpha': alpha, 'eta': eta, 'x_max': x_max, 'tol': tol, 'iterations': iterations, 'print_loss_it': print_loss_it, 'maxTokens': maxTokens, 'windowSize': windowSize, 'distanceWeighting': distanceWeighting, 'symmetric': symmetric}
+    return Matrix(input.sds_context,
+        'glove',
+        named_input_nodes=params_dict)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/imputeByKNN.py b/src/main/python/systemds/operator/algorithm/builtin/imputeByKNN.py
index fcc096180b9..f04aa098514 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/imputeByKNN.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/imputeByKNN.py
@@ -25,13 +25,30 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
 def imputeByKNN(X: Matrix,
                 **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+     Imputes missing values, indicated by NaNs, using KNN-based methods
+     (k-nearest neighbors by euclidean distance). In order to avoid NaNs in
+     distance computation and meaningful nearest neighbor search, we initialize
+     the missing values by column means. Currently, only the column with the most
+     missing values is actually imputed.
     
+    
+    
+    :param X: Matrix with missing values, which are represented as NaNs
+    :param method: Method used for imputing missing values with different performance and accuracy tradeoffs:\n
+        - 'dist' (default): Compute all-pairs distances and impute the missing values by closest. O(N^2 * #features)
+        - 'dist_missing': Compute distances between data and records with missing values. O(N*M * #features), assuming that the number of records with MV is M<<N.
+        - 'dist_sample': Compute distances between sample of data and records with missing values. O(S*M * #features) with M<<N and S<<N, but suboptimal imputation.
+    :param seed: Root seed value for random/sample calls for deterministic behavior. -1 for true randomization
+    :param sample_frac: Sample fraction for 'dist_sample' (value between 0 and 1)
+    :return: Imputed dataset
+    """
+
     params_dict = {'X': X}
     params_dict.update(kwargs)
     return Matrix(X.sds_context,
diff --git a/src/main/python/systemds/operator/algorithm/builtin/mdedup.py b/src/main/python/systemds/operator/algorithm/builtin/mdedup.py
index 85d93d5c2cc..cbcc15d43b2 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/mdedup.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/mdedup.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
diff --git a/src/main/python/systemds/operator/algorithm/builtin/quantizeByCluster.py b/src/main/python/systemds/operator/algorithm/builtin/quantizeByCluster.py
new file mode 100644
index 00000000000..5afb96412b9
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/quantizeByCluster.py
@@ -0,0 +1,83 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/quantizeByCluster.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def quantizeByCluster(X: Matrix,
+                      **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+     The quantizeByCluster-function implements product quantization. Initially, it
+     divides the original vector space into M subspaces. The resulting lower dimensional
+     subvectors are then quantized. If the column count is not divisible by the number of
+     subspaces M, the data is padded with zeros. Optimal space decomposition can be
+     computed, when the data follows a Gaussian distribution. The function uses kmeans for
+     quantizing and svd to compute the space decomposition.
+    
+    
+    
+    :param X: The input matrix to perform product quantization on
+    :param M: Number of subspaces
+    :param k: Number of vectors in the subcodebooks
+    :param runs: Number of runs (with different initial centroids)
+    :param max_iter: Maximum number of iterations per run
+    :param eps: Tolerance (epsilon) for WCSS change ratio
+    :param avg_sample_size_per_centroid: Average number of records per centroid in data samples
+    :param separate: Cluster subspaces separately. If value is set to true,
+        kmeans is run M times, once for each subspace. Otherwise
+        kmeans is run only once.
+    :param space_decomp: Decompose the vector space by multiplying the input
+        matrix X with an orthogonal matrix R. Assumes the data
+        follows a parametric Gaussian distribution.
+        Time complexity in O(nrow(X)^2 * min(nrow(X), ncol(X))).
+    :param seed: The seed used for initial sampling. If set to -1 random
+        seeds are selected.
+    :return: The matrix containing the centroids. If clustered separately, the ith
+        subcodebook is the ith chunk of size k. The codebook matrix has the dimensions
+        [k*M x ncol(X)/M].
+    :return: The mapping of vectors to centroids. Each vector of the input matrix X is mapped
+        onto a vector of codes. The entries in the codes matrix are the indices of
+        the vectors in the codebook. The codes matrix has the dimensions [nrow(X) x M].
+    :return: The orthogonal matrix R which is applied to the input matrix X before performing
+        the product quantization. Only relevant when space_decomp = TRUE.
+    """
+
+    params_dict = {'X': X}
+    params_dict.update(kwargs)
+    
+    vX_0 = Matrix(X.sds_context, '')
+    vX_1 = Matrix(X.sds_context, '')
+    vX_2 = Matrix(X.sds_context, '')
+    output_nodes = [vX_0, vX_1, vX_2, ]
+
+    op = MultiReturn(X.sds_context, 'quantizeByCluster', output_nodes, named_input_nodes=params_dict)
+
+    vX_0._unnamed_input_nodes = [op]
+    vX_1._unnamed_input_nodes = [op]
+    vX_2._unnamed_input_nodes = [op]
+
+    return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/randomForest.py b/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
index 88b1c9145b8..177ebd3fd38 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/randomForest.py
@@ -40,16 +40,17 @@ def randomForest(X: Matrix,
      and optionally subset of features (columns). During tree construction, split
      candidates are additionally chosen on a sample of remaining features.
     
-     .. code-block::
+     .. code-block:: text
     
        For example, given a feature matrix with features [a,b,c,d]
        and the following two trees, M (the output) would look as follows:
     
        (L1)          |a<7|                   |d<5|
-                    /     \                 /     \
+                    /     \\                 /     \\
        (L2)     |c<3|     |b<4|         |a<7|     P3:2
-                /   \     /   \         /   \
+                /   \\     /   \\         /  \\
        (L3)   P1:2 P2:1 P3:1 P4:2     P1:2 P2:1
+    
        --> M :=
        [[1, 7, 3, 3, 2, 4, 0, 2, 0, 1, 0, 1, 0, 2],  (1st tree)
         [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]]  (2nd tree)
diff --git a/src/main/python/systemds/operator/algorithm/builtin/shapExplainer.py b/src/main/python/systemds/operator/algorithm/builtin/shapExplainer.py
new file mode 100644
index 00000000000..42a0afb6e69
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/shapExplainer.py
@@ -0,0 +1,78 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/shapExplainer.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def shapExplainer(model_function: str,
+                  model_args: List,
+                  x_instances: Matrix,
+                  X_bg: Matrix,
+                  **kwargs: Dict[str, VALID_INPUT_TYPES]):
+    """
+     Computes shapley values for multiple instances in parallel using antithetic permutation sampling.
+     The resulting matrix phis holds the shapley values for each feature in the column given by the index of the feature in the sample.
+    
+     This method first creates two large matrices for masks and masked background data for all permutations and
+     then runs in paralell on all instances in x.
+     While the prepared matrices can become very large (2 * #features * #permuations * #n_samples * #features),
+     the preparation of a row for the model call breaks down to a single element-wise multiplication of this mask with the row and
+     an addition to the masked background data, since masks can be reused for each instance.
+    
+    
+    
+    :param model_function: The function of the model to be evaluated as a String. This function has to take a matrix of samples
+        and return a vector of predictions.
+        It might be usefull to wrap the model into a function the takes and returns the desired shapes and
+        use this wrapper here.
+    :param model_args: Arguments in order for the model, if desired. This will be prepended by the created instances-matrix.
+    :param x_instances: Multiple instances as rows for which to compute the shapley values.
+    :param X_bg: The background dataset from which to pull the random samples to perform Monte Carlo integration.
+    :param n_permutations: The number of permutaions. Defaults to 10. Theoretical 1 should already be enough for models with up
+        to second order interaction effects.
+    :param n_samples: Number of samples from X_bg used for marginalization.
+    :param remove_non_var: EXPERIMENTAL: If set, for every instance the varaince of each feature is checked against this feature in the
+        background data. If it does not change, we do not run any model cals for it.
+    :param seed: A seed, in case the sampling has to be deterministic.
+    :param verbose: A boolean to enable logging of each step of the function.
+    :return: Matrix holding the shapley values along the cols, one row per instance.
+    :return: Double holding the average prediction of all instances.
+    """
+
+    params_dict = {'model_function': model_function, 'model_args': model_args, 'x_instances': x_instances, 'X_bg': X_bg}
+    params_dict.update(kwargs)
+    
+    vX_0 = Matrix(model_function.sds_context, '')
+    vX_1 = Scalar(model_function.sds_context, '')
+    output_nodes = [vX_0, vX_1, ]
+
+    op = MultiReturn(model_function.sds_context, 'shapExplainer', output_nodes, named_input_nodes=params_dict)
+
+    vX_0._unnamed_input_nodes = [op]
+    vX_1._unnamed_input_nodes = [op]
+
+    return op
diff --git a/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py b/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py
index 16a20d20e08..270a6d7b166 100644
--- a/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py
+++ b/src/main/python/systemds/operator/algorithm/builtin/topk_cleaning.py
@@ -25,7 +25,6 @@
 from typing import Dict, Iterable
 
 from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
-from systemds.script_building.dag import OutputType
 from systemds.utils.consts import VALID_INPUT_TYPES
 
 
@@ -36,8 +35,39 @@ def topk_cleaning(dataTrain: Frame,
                   evalFunHp: Matrix,
                   **kwargs: Dict[str, VALID_INPUT_TYPES]):
     """
-     This function cleans top-K item (where K is given as input)for a given list of users.
+     This function cleans top-K item (where K is given as input) for a given list of users.
      metaData[3, ncol(X)] : metaData[1] stores mask, metaData[2] stores schema, metaData[3] stores FD mask
+    
+    
+    
+    :param dataTrain: Training set
+    :param dataTest: Test set ignored when cv is set to True
+    :param metaData: 3×n frame with schema, categorical mask, and FD mask for dataTrain
+    :param primitives: Library of primitive cleaning operators
+    :param parameters: Hyperparameter search space that matches the primitives
+    :param refSol: Reference solution
+    :param evaluationFunc: Name of a SystemDS DML function that scores a pipeline
+    :param evalFunHp: Hyperparameter matrix for the above evaluation function
+    :param topK: Number of best pipelines to return
+    :param resource_val: Maximum resource R for the Bandit search
+    :param max_iter: Maximum iterations while enumerating logical pipelines
+    :param lq: Lower quantile used by utils::doErrorSample when triggered
+    :param uq: Upper quantile used by utils::doErrorSample when triggered
+    :param sample: Fraction of rows to subsample from dataTrain
+    :param expectedIncrease: Minimum improvement over dirtyScore that a candidate must deliver
+    :param seed: Seed number
+    :param cv: TRUE means k-fold CV, FALSE means hold-out split
+    :param cvk: Number of folds if cv = TRUE
+    :param isLastLabel: TRUE if the last column is the label
+    :param rowCount: Row-count threshold above which doErrorSample may replace uniform sampling
+    :param correctTypos: Run spelling correction in the string preprocessing step
+    :param enablePruning: Enable pruning inside the Bandit phase
+    :return: K cleaned-data pipelines
+    :return: Hyperparameter matrix with rows aligning with topKPipelines
+    :return: Evaluation scores with rows aligning with topKPipelines
+    :return: Baseline score on the unclean data
+    :return: Updated evaluation function hyperparameters
+    :return: Frame of “apply” functions for deploying each of the top-K pipelines
     """
 
     params_dict = {'dataTrain': dataTrain, 'primitives': primitives, 'parameters': parameters, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp}
diff --git a/src/main/python/systemds/operator/algorithm/builtin/wer.py b/src/main/python/systemds/operator/algorithm/builtin/wer.py
new file mode 100644
index 00000000000..99d278461cf
--- /dev/null
+++ b/src/main/python/systemds/operator/algorithm/builtin/wer.py
@@ -0,0 +1,48 @@
+# -------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# -------------------------------------------------------------
+
+# Autogenerated By   : src/main/python/generator/generator.py
+# Autogenerated From : scripts/builtin/wer.dml
+
+from typing import Dict, Iterable
+
+from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
+from systemds.utils.consts import VALID_INPUT_TYPES
+
+
+def wer(R: Frame,
+        H: Frame):
+    """
+     This built-in function computes the word error rate (WER)
+     defined as wer = (numSubst + numDel + numIns) / length(r)
+    
+    
+    
+    :param R: Input frame of reference strings, shape: [N x 1]
+    :param H: Input frame of hypothesis strings, shape: [N x 1]
+    :return: Output matrix of word error rate per pair of strings,
+        shape: [N x 1], where W[i,1] = wer(R[i,1], H[i,1])
+    """
+
+    params_dict = {'R': R, 'H': H}
+    return Matrix(R.sds_context,
+        'wer',
+        named_input_nodes=params_dict)