diff --git a/packages/nimble/R/nimbleFunction_keywordProcessing.R b/packages/nimble/R/nimbleFunction_keywordProcessing.R
index d67d42f63..8fbcc8b3f 100644
--- a/packages/nimble/R/nimbleFunction_keywordProcessing.R
+++ b/packages/nimble/R/nimbleFunction_keywordProcessing.R
@@ -2015,16 +2015,15 @@ nimDerivsInfoClass_init_impl <- function(.self
.self$model <- model
## wrt nodes
- wrtNodes <- model$expandNodeNames(wrtNodes, returnScalarComponents = TRUE)
+ wrtNodes <- model$expandNodeNames(wrtNodes) # , returnScalarComponents = TRUE)
wrtNodesAccessor <- modelVariableAccessorVector(model,
- wrtNodes,
+ model$expandNodeNames(wrtNodes, returnScalarComponents = TRUE),
logProb = FALSE)
.self$wrtMapInfo <- makeMapInfoFromAccessorVectorFaster(wrtNodesAccessor)
# See comment in makeModelDerivsInfo for explanation of why both of the next
# two lines are necessary.
calcNodes <- model$expandNodeNames(calcNodes)
- calcNodes <- model$expandNodeNames(calcNodes, returnScalarComponents = TRUE)
derivsInfo <- makeModelDerivsInfo_impl(model,
wrtNodes,
calcNodes,
@@ -2063,7 +2062,6 @@ makeOutputNodes <- function(model,
## Presuming (and based on a simple test) that by now,
## input `calcNodes` would include all encompassing nodes
## and that this next step would not introduce any additional components.
- calcNodes <- model$expandNodeNames(calcNodes)
logProbCalcNodeNames <- model$modelDef$nodeName2LogProbName(calcNodes)
isDetermCalcNodes <- model$isDeterm(calcNodes, nodesAlreadyExpanded = TRUE)
modelOutputNodes <- c(model$expandNodeNames(calcNodes[isDetermCalcNodes], returnScalarComponents = TRUE),
@@ -2171,13 +2169,13 @@ makeModelDerivsInfo <- function(model,
wrtNodes,
calcNodes,
dataAsConstantNodes = TRUE) {
- wrtNodes <- model$expandNodeNames(wrtNodes, returnScalarComponents = TRUE)
+ wrtNodes <- model$expandNodeNames(wrtNodes) #### , returnScalarComponents = TRUE)
# This ensures that elements of a non-scalar node become the entire non-scalar node.
calcNodes <- model$expandNodeNames(calcNodes)
# And then this splits into scalar components.
# E.g. if calcNodes is 'mu[1]' but 'mu[1:3]' is a vector node,
# the above call gets `mu[1:3]` and then the below call splits it.
- calcNodes <- model$expandNodeNames(calcNodes, returnScalarComponents = TRUE)
+ #### calcNodes <- model$expandNodeNames(calcNodes, returnScalarComponents = TRUE)
makeModelDerivsInfo_impl(model,
wrtNodes,
calcNodes,
@@ -2205,31 +2203,28 @@ makeModelDerivsInfo_impl <- function(model,
## Gymnastics to convert to actual nodes for computational efficiency are needed because `setdiff` needs to
## operate on node components because `wrt` is in terms of components not nodes.
nonWrtCalcNodes <- setdiff(calcNodes, wrtNodes) # Node components here, since `calcNodes`, `wrtNodes` are as components upon input.
- nonWrtCalcActualNodes <- model$expandNodeNames(nonWrtCalcNodes) # Nodes here. Can be costly for large multivar nodes (say 3 sec. for a 1m-element node).
- nonWrtStochCalcNodes <- nonWrtCalcActualNodes[ model$isStoch(nonWrtCalcActualNodes,
+ nonWrtStochCalcNodes <- nonWrtCalcNodes[ model$isStoch(nonWrtCalcNodes,
nodesAlreadyExpanded = TRUE) ] # Run `isStoch` on nodes not components (issue #1431).
- parentNodes <- getImmediateParentNodes(calcNodes, model)
- parentNodes <- model$expandNodeNames(parentNodes, returnScalarComponents = TRUE)
+ ## Do next steps with nodes as otherwise can be inefficient when `parentNodes` has many components.
+ ## `getImmediateParentNodes` can return components.
+ parentNodes <- model$expandNodeNames(getImmediateParentNodes(calcNodes, model))
neededParentNodes <- setdiff(parentNodes, c(wrtNodes, nonWrtCalcNodes))
-
- extraInputNodes <- model$expandNodeNames(c(neededParentNodes,
- nonWrtStochCalcNodes)) # Need as nodes so `isData` below run on nodes for efficiency.
+ extraInputNodes <- c(neededParentNodes, nonWrtStochCalcNodes)
+
constantNodes <- character()
if(dataAsConstantNodes) {
boolData <- model$isData(extraInputNodes)
- constantNodes <- model$expandNodeNames(extraInputNodes[boolData], returnScalarComponents = TRUE, sort = TRUE)
- extraInputNodes <- model$expandNodeNames(extraInputNodes[!boolData], returnScalarComponents = TRUE, sort = TRUE)
+ constantNodes <- extraInputNodes[boolData]
+ extraInputNodes <- extraInputNodes[!boolData]
## `wrtNodes` components could have crept in when initializing `extraInputNodes` based on nodes.
extraInputNodes <- setdiff(extraInputNodes, wrtNodes)
constantNodes <- setdiff(constantNodes, wrtNodes)
} else {
- extraInputNodes <- model$expandNodeNames(extraInputNodes, returnScalarComponents = TRUE, sort = TRUE)
- ## `wrtNodes` components could have crept in when initializing `extraInputNodes` based on nodes.
extraInputNodes <- setdiff(extraInputNodes, wrtNodes)
}
- list(updateNodes = extraInputNodes,
- constantNodes = constantNodes)
+ list(updateNodes = model$expandNodeNames(extraInputNodes, returnScalarComponents = TRUE, sort = TRUE),
+ constantNodes = model$expandNodeNames(constantNodes, returnScalarComponents = TRUE, sort = TRUE))
}
nimDerivsInfoClass_update_init_impl <- function(.self,
diff --git a/packages/nimble/inst/include/nimble/nimbleCppADbaseClass.cpp b/packages/nimble/inst/include/nimble/nimbleCppADbaseClass.cpp
index 403944fad..c38e0330d 100644
--- a/packages/nimble/inst/include/nimble/nimbleCppADbaseClass.cpp
+++ b/packages/nimble/inst/include/nimble/nimbleCppADbaseClass.cpp
@@ -72,7 +72,7 @@ void getDerivs_internal(vector &independentVars,
// for(size_t ijk = 0; ijk < wrtVector.size(); ++ijk)
// std::cout< &independentVars,
#endif
std::size_t n = independentVars.size(); // dim of independent vars
std::size_t m = ADtape->Range();
-
+
int maxOrder;
bool ordersFound[3];
setOrdersFound(derivOrders, ordersFound, maxOrder);
-
+
// std::cout<<"orders: "< &independentVars,
This results in the returned "y" dimension being 1.
If wrt is provided (conceptually, this could be called "inputInds") *without* inDir,
it is used to filter result "x" dimensions for any order. This means
- for Forward mode only basis vectors in wrt directions will be used, and in
- Reverse mode, results will be filtered to only wrt directions.
+ for Forward mode only basis vectors in wrt directions will be used, and in
+ Reverse mode, results will be filtered to only wrt directions.
The returned "x" dimensions will be length(wrt).
If wrt is provided *with* inDir,
it is used to filter "x" dimension results from Reverse mode, whereas
- Forward mode will go in the inDir direction for "x". This will result in
+ Forward mode will go in the inDir direction for "x". This will result in
Reverse 1 returning "x" dimension 1 and Reverse 2 returning "x" dimensions (1, length(wrt)).
If inDir *is* provided *without* wrt,
if order includes 1, the Jacobian "x" dimension will be 1.
@@ -125,7 +125,7 @@ void getDerivs_internal(vector &independentVars,
If outInds is provided *without* outDir,
it is used to filter result "y" dimensions for any order (including 0). This means
for Reverse mode only basis vectors in outInds directions will be used, and in
- Forward mode, results will be filtered to only outInds directions.
+ Forward mode, results will be filtered to only outInds directions.
The returned "y" dimensions will be length(outInds).
If outInds is provided *with* outDir,
it is used to filter "y" dimension results from Forward mode, whereas
@@ -153,10 +153,12 @@ void getDerivs_internal(vector &independentVars,
std::cout<<"Warning: Both wrt and inDir are provided, which is only meaningful for order 2 (not requested), so wrt will be ignored."< &independentVars,
if(inDir_provided) {
wrtAllx_o1 = false; // not checked anyway
wrtAllx1_o2 = false; // not checked anyway
- res_dimx_o1 = 1;
- res_dimx1_o2 = 1;
+ res_dimx_o1 = num_inDirs; //1
+ res_dimx1_o2 = num_inDirs; //1;
}
} else { // wrt_provided
wrtAllx_o1 = false;
@@ -192,15 +194,15 @@ void getDerivs_internal(vector &independentVars,
wrtAllx2_o2 = false;
res_dimx2_o2 = length_wrt;
if(inDir_provided) { // inDir_provided
- res_dimx_o1 = 1;
- res_dimx1_o2 = 1;
+ res_dimx_o1 = num_inDirs; //1;
+ res_dimx1_o2 = num_inDirs; //1;
} else {
res_dimx_o1 = length_wrt;
res_dimx1_o2 = length_wrt;
}
}
if(outInds_provided) {
- outAlly_o0 = false;
+ outAlly_o0 = false;
outAlly_o1 = false;
outAlly_o2 = false;
res_dimy_o0 = length_outInds;
@@ -217,18 +219,18 @@ void getDerivs_internal(vector &independentVars,
res_dimy_o1 = 1;
}
}
-
+
vector value_ans;
#ifdef _TIME_AD_GENERAL
derivs_run_tape_timer_start();
#endif
-
+
int strategy_zero = 1; // 1 means do it, 0 means skip.
- int strategy_one = 0; // 0=skip. 1=forward regular. 2=reverse subgraph_rev_jac. 3=reverse normal
+ int strategy_one = 0; // 0=skip. 1=forward regular. 2=reverse subgraph_rev_jac. 3=reverse normal
int strategy_two= 0; // 0=skip. 1=reverse regular.
if(maxOrder > 0) {
if (ordersFound[1]) {
- // At least Jacobian is needed.
+ // At least Jacobian is needed.
if(!ordersFound[2]) {
// Hessian is not needed.
if((m <= n || outDir_provided) && !inDir_provided) { // possibly we should compare to wrt_n, but we have use cases where comparing to n works better
@@ -239,7 +241,7 @@ void getDerivs_internal(vector &independentVars,
strategy_one = 2; // use subgraph_rev_jac if jacobian is the final order
if(!ordersFound[0]) {
// If value is not needed, we can skip Forward 0 because subgraph_rev_jac does it itself.
- strategy_zero = 0;
+ strategy_zero = 0;
}
} else {
// m == 1, or outDir_provided, so we can use regular reverse mode.
@@ -260,7 +262,7 @@ void getDerivs_internal(vector &independentVars,
}
// std::cout<<"n: "< &independentVars,
value_ans.resize(m);
std::fill(value_ans.begin(), value_ans.end(), 0.0);
}
- if(maxOrder > 0){
+ if(maxOrder > 0){
// std::cout<<"entering maxOrder> 0\n";
vector infIndicators(m, false); // default values will be false
for(size_t inf_ind = 0; inf_ind < m; inf_ind++){
@@ -304,7 +306,7 @@ void getDerivs_internal(vector &independentVars,
//std::cout<<"done setting hessian size\n";
vector cppad_derivOut;
std::vector w(m, 0);
-
+
// begin replacement
if (maxOrder == 1) {
if(strategy_one == 2) { // subgraph_rev_jac
@@ -331,7 +333,7 @@ void getDerivs_internal(vector &independentVars,
int y_ind = outAlly_o1 ? i_out : outInds[i_out] - 1;
select_range[y_ind] = true;
}
-
+
std::vector col_2_wrtVecm1;
if(!wrtAllx_o1) {
col_2_wrtVecm1.resize(n, -1);
@@ -371,7 +373,7 @@ void getDerivs_internal(vector &independentVars,
if(y_ind < 0) {
std::cout<<"Error: y_ind is negative. This should not happen."< max_row) {
// max_row = this_row;
@@ -391,7 +393,7 @@ void getDerivs_internal(vector &independentVars,
} // end k
//std::cout<<"done filling jacobian\n";
} // end strategy_one == 2
- else if(strategy_one == 3)
+ else if(strategy_one == 3)
{ // regular reverse mode, used for m == 1 or outDir_provided
// std::cout<<"regular reverse version of jacobian\n";
for(size_t i_out = 0; i_out < res_dimy_o1; i_out++) {
@@ -452,7 +454,7 @@ void getDerivs_internal(vector &independentVars,
#endif
} else { // end use_wrt
x1_ind = 0; // inDir is used, so we only need one input.
- for(size_t ix = 0; ix < n; ix++) x1[ix] = inDir[ix];
+ for(size_t ix = 0; ix < n; ix++) x1[ix] = inDir[n*i_wrt + ix];
cppad_derivOut = ADtape->Forward(1, x1);
} // end use inDir.
if (ordersFound[1]) { // will always be true if maxOrder == 1
@@ -476,7 +478,7 @@ void getDerivs_internal(vector &independentVars,
// std::cout<<"Entering maxOrder > 1\n";
// maxOrder > 1: outer loop over vec_ind, inner loop over dy_ind
// strategy_one and strategy_two are actually ignored here for now, b/c we wouldn't be here if not needed.
- std::vector cppad_derivOut_F1;
+ std::vector cppad_derivOut_F1;
std::vector x1(n, 0);
for (size_t i1_wrt = 0; i1_wrt < res_dimx1_o2; i1_wrt++) {
int x1_ind(0);
@@ -492,7 +494,7 @@ void getDerivs_internal(vector &independentVars,
#endif
} else { // end use_wrt
x1_ind = 0; // inDir is used, so we only need one input.
- for(size_t ix = 0; ix < n; ix++) x1[ix] = inDir[ix];
+ for(size_t ix = 0; ix < n; ix++) x1[ix] = inDir[n*i1_wrt + ix];
cppad_derivOut_F1 = ADtape->Forward(1, x1);
} // end use inDir.
// std::cout<<"contents of cppad_derivOut_F1: ";
@@ -532,20 +534,20 @@ void getDerivs_internal(vector &independentVars,
for (size_t i2_wrt = 0; i2_wrt < res_dimx2_o2; i2_wrt++) {
int x2_ind(0);
x2_ind = wrtAllx2_o2 ? i2_wrt : wrtVector[i2_wrt] - 1;
-
- ansList->hessian[res_dimx1_o2 * res_dimx2_o2 * i_out + res_dimx2_o2 * i1_wrt + i2_wrt] =
+
+ ansList->hessian[res_dimx1_o2 * res_dimx2_o2 * i_out + res_dimx1_o2 * i2_wrt + i1_wrt] =
cppad_derivOut[x2_ind * 2 + 1];
}
} else {
for (size_t i2_wrt = 0; i2_wrt < res_dimx2_o2; i2_wrt++) {
- ansList->hessian[res_dimx1_o2 * res_dimx2_o2 * i_out + res_dimx2_o2 * i1_wrt + i2_wrt] =
+ ansList->hessian[res_dimx1_o2 * res_dimx2_o2 * i_out + res_dimx1_o2 * i2_wrt + i1_wrt] =
CppAD::numeric_limits::quiet_NaN();
}
}
// end use_out2
if(!outDir_provided) w[y_ind] = 0; // outDir is used, so we only need one output.
} // end out_ind loop
-
+
if (ordersFound[1]) {
BASE *LHS = ansList->jacobian.getPtr() + res_dimy_o1 * i1_wrt;
@@ -560,7 +562,7 @@ void getDerivs_internal(vector &independentVars,
}
}
} // end else
-
+
// end replacement
#ifdef _TIME_AD_GENERAL
derivs_getDerivs_timer_stop();
diff --git a/packages/nimble/tests/testthat/test-ADdirsAndInds.R b/packages/nimble/tests/testthat/test-ADdirsAndInds.R
index 21503f69f..de6bbbae0 100644
--- a/packages/nimble/tests/testthat/test-ADdirsAndInds.R
+++ b/packages/nimble/tests/testthat/test-ADdirsAndInds.R
@@ -718,3 +718,73 @@ test_that('Derivatives with double-taping, indices, and directions work',
## #c(-.3, -.7) %*% apply(correct$hessian, 1, \(H) H %*% c(-2, -1, -3))
}
)
+
+## test_that('Derivatives with double-taping, indices, and directions work',
+## {
+## ADfun1 <- nimbleFunction(
+## setup = function(){},
+## run = function(x = double(1)) {
+## ans <- c(x[1]^3, x[2]^3, (x[1]^2*x[2]^2))
+## return(ans)
+## returnType(double(1))
+## },
+## methods = list(
+## derivsRun = function(x = double(1),
+## wrt = double(1),
+## outInds = double(1),
+## inDir = double(1),
+## outDir = double(1),
+## order = double(1)) {
+## ans <- derivs(run(x), wrt = wrt, outInds = outInds,
+## inDir = inDir, outDir = outDir, order = order)
+## return(ans)
+## returnType(ADNimbleList())
+## },
+## jacobianRun = function(x = double(1)) {
+## out <- derivs(run(x), wrt = 1:2, order = 1)
+## ans <- nimNumeric(length = 6, value = out$jacobian)
+## returnType(double(1))
+## return(ans)
+## },
+
+## metaDerivsRun = function(x = double(1),
+## order = double(1)) {
+## out <- derivs(jacobianRun(x), wrt = 1:2,
+## order = order)
+## returnType(ADNimbleList())
+## return(out)
+## }
+## ), buildDerivs = c('jacobianRun', 'run')
+## )
+
+## ADfunInst <- ADfun1()
+## xRec <- c(2.2, 3.3)
+## x <- c(1.6, 2.8)
+## Rderivs <- ADfunInst$jacobianRun(x)
+## Rderivs <- ADfunInst$metaDerivsRun(x, order = 1)
+## temporarilyAssignInGlobalEnv(ADfunInst)
+## cADfunInst <- compileNimble(ADfunInst)
+
+## ## record
+## cADfunInst$derivsRun(x = xRec,
+## wrt = 1:2,
+## outInds = numeric(),
+## inDir = numeric(),
+## outDir = numeric(),
+## order = 1:2)
+
+## ## Future args:
+## wrt <- list(1:2, 1, 2, c(2, 1), c(2, 1, 2))
+## outInds <- list(1:3, 1, 2, 3, c(3, 1), c(3, 1, 3))
+## inDirs <- list(c(-0.3, -0.7))
+## outDirs <- list(c(-2, -1, -3))
+## orders <- list(0, 1, 2, c(0, 1), c(0, 2), c(1, 2), 0:2)
+
+## cADfunInst$derivsRun(x = xRec,
+## wrt = 1:2,
+## outInds = numeric(),
+## inDir = rep(inDirs[[1]], 4),
+## outDir = numeric(),
+## order = 1:2)
+
+## })