Fix subfunction merge block adjoint (#4177)

JHopeCollins · pbrubeck · commit bc82efd406c9 · 2025-04-10T11:32:30.000+01:00
* comment to explain how we evaluate the adjoint of the FunctionMergeBlock
diff --git a/firedrake/adjoint_utils/blocks/function.py b/firedrake/adjoint_utils/blocks/function.py
@@ -197,50 +197,71 @@ class SubfunctionBlock(Block):
     def __init__(self, func, idx, ad_block_tag=None):
         super().__init__(ad_block_tag=ad_block_tag)
         self.add_dependency(func)
-        self.idx = idx
+        self.sub_idx = idx
 
     def evaluate_adj_component(self, inputs, adj_inputs, block_variable, idx,
                                prepared=None):
         eval_adj = firedrake.Cofunction(block_variable.output.function_space().dual())
         if type(adj_inputs[0]) is firedrake.Cofunction:
-            eval_adj.sub(self.idx).assign(adj_inputs[0])
+            eval_adj.sub(self.sub_idx).assign(adj_inputs[0])
         else:
-            eval_adj.sub(self.idx).assign(adj_inputs[0].function)
+            eval_adj.sub(self.sub_idx).assign(adj_inputs[0].function)
         return eval_adj
 
     def evaluate_tlm_component(self, inputs, tlm_inputs, block_variable, idx,
                                prepared=None):
-        return firedrake.Function.sub(tlm_inputs[0], self.idx)
+        return firedrake.Function.sub(tlm_inputs[0], self.sub_idx)
 
     def evaluate_hessian_component(self, inputs, hessian_inputs, adj_inputs,
                                    block_variable, idx,
                                    relevant_dependencies, prepared=None):
         eval_hessian = firedrake.Cofunction(block_variable.output.function_space().dual())
-        eval_hessian.sub(self.idx).assign(hessian_inputs[0])
+        eval_hessian.sub(self.sub_idx).assign(hessian_inputs[0])
         return eval_hessian
 
     def recompute_component(self, inputs, block_variable, idx, prepared):
         return maybe_disk_checkpoint(
-            firedrake.Function.sub(inputs[0], self.idx)
+            firedrake.Function.sub(inputs[0], self.sub_idx)
         )
 
     def __str__(self):
-        return f"{self.get_dependencies()[0]}[{self.idx}]"
+        return f"{self.get_dependencies()[0]}[{self.sub_idx}]"
 
 
 class FunctionMergeBlock(Block):
     def __init__(self, func, idx, ad_block_tag=None):
         super().__init__(ad_block_tag=ad_block_tag)
         self.add_dependency(func)
-        self.idx = idx
+        self.sub_idx = idx
         for output in func._ad_outputs:
             self.add_dependency(output)
 
     def evaluate_adj_component(self, inputs, adj_inputs, block_variable, idx,
                                prepared=None):
+        # The merge block appears whenever a subfunction is the output of a block.
+        # This means that the subfunction has been modified, so we need to make
+        # sure that this modification is accounted for when evaluating the adjoint.
+        #
+        # When recomputing the merge block, the indexed subfunction in the full
+        # Function is completely overwritten, meaning that the pre-existing value
+        # of the subfunction in the full function is ignored.
+        # The equivalent adjoint operation is to:
+        #   1. send the subfunction component of the adjoint value back up
+        #      the branch of the tape corresponding to the subfunction
+        #      dependency (idx=0).
+        #   2. zero out the subfunction component of the adjoint value sent
+        #      back up the full Function branch of the tape (idx=1).
+        # This means that when the adjoint values of each branch are combined
+        # after the SubfunctionBlock only the adjoint value from the subfunction
+        # branch is used.
+        #
+        # See https://github.com/firedrakeproject/firedrake/pull/4177 for more
+        # detail and for diagrams of the tape produced when accessing subfunctions.
+
         if idx == 0:
-            return adj_inputs[0].subfunctions[self.idx]
+            return adj_inputs[0].subfunctions[self.sub_idx].copy(deepcopy=True)
         else:
+            adj_inputs[0].subfunctions[self.sub_idx].zero()
             return adj_inputs[0]
 
     def evaluate_tlm(self, markings=False):
@@ -253,7 +274,7 @@ def evaluate_tlm(self, markings=False):
         fs = output.output.function_space()
         f = type(output.output)(fs)
         output.add_tlm_output(
-            type(output.output).assign(f.sub(self.idx), tlm_input)
+            type(output.output).assign(f.sub(self.sub_idx), tlm_input)
         )
 
     def evaluate_hessian_component(self, inputs, hessian_inputs, adj_inputs,
@@ -265,12 +286,12 @@ def recompute_component(self, inputs, block_variable, idx, prepared):
         sub_func = inputs[0]
         parent_in = inputs[1]
         parent_out = type(parent_in)(parent_in)
-        parent_out.sub(self.idx).assign(sub_func)
+        parent_out.sub(self.sub_idx).assign(sub_func)
         return maybe_disk_checkpoint(parent_out)
 
     def __str__(self):
         deps = self.get_dependencies()
-        return f"{deps[1]}[{self.idx}].assign({deps[0]})"
+        return f"{deps[1]}[{self.sub_idx}].assign({deps[0]})"
 
 
 class CofunctionAssignBlock(Block):
diff --git a/tests/firedrake/adjoint/test_split_and_subfunctions.py b/tests/firedrake/adjoint/test_split_and_subfunctions.py
@@ -183,6 +183,7 @@ def test_subfunctions_always_create_blocks():
         # force the subfunctions to be created
         _ = kappa.subfunctions
 
+    continue_annotation()
     with set_working_tape() as tape:
         u.assign(kappa.subfunctions[0])
         J = assemble(inner(u, u)*dx)
@@ -191,3 +192,25 @@ def test_subfunctions_always_create_blocks():
 
     rf.derivative()
     assert control.block_variable.adj_value is not None, "Functional should depend on Control"
+
+
+@pytest.mark.skipcomplex
+def test_writing_to_subfunctions():
+    with stop_annotating():
+        mesh = UnitIntervalMesh(1)
+        R = FunctionSpace(mesh, "R", 0)
+
+        kappa = Function(R).assign(2.0)
+        u = Function(R)
+        usub = u.subfunctions[0]
+
+    continue_annotation()
+    with set_working_tape() as tape:
+        u.assign(kappa)
+        usub *= 2
+        J = assemble(inner(u, u) * dx)
+        print(f"{type(J) = }")
+        rf = ReducedFunctional(J, Control(kappa), tape=tape)
+    pause_annotation()
+
+    assert taylor_test(rf, kappa, Constant(0.1)) > 1.9