diff --git a/docs/index.rst b/docs/index.rst index 481e633..745bd02 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -51,6 +51,7 @@ directions, and invariant along the vertical direction. installation usage convention + optimization_tasks contributing authors history diff --git a/docs/optimization_tasks.rst b/docs/optimization_tasks.rst new file mode 100644 index 0000000..d6a5f3b --- /dev/null +++ b/docs/optimization_tasks.rst @@ -0,0 +1,256 @@ +Optimization task definitions (1D mode + S-matrix caching) +=========================================================== + +This note turns the recent performance discussion into implementation +tasks with explicit scope, physics constraints, and compatibility rules. + + +Task A: Native 1D mode inferred from ``Nx``/``Ny`` +-------------------------------------------------- + +Goal +~~~~ + +Accelerate simulations of structures that are periodic in only one lateral +direction (e.g., binary gratings), while keeping the user-facing API +compatible with existing code. + +Current issue +~~~~~~~~~~~~~ + +The current solver stack is built around 2D unit-cell discretization and a 2D +harmonic index set ``G[:, 0], G[:, 1]``. This causes avoidable overhead when a +layer is effectively 1D (for example ``Ny == 1``), because the code still +executes a full 2D path for Fourier transforms, convolution indexing, and +matrix assembly. + +Why this is necessary for performance +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In 1D gratings, the physically relevant Fourier basis is one index (``m``) +instead of two indices (``m, n``). Keeping a 2D representation for a 1D +problem inflates matrix dimensions and therefore CPU time and memory for +eigensolves and S-matrix operations. + +Proposed optimization +~~~~~~~~~~~~~~~~~~~~~ + +Infer 1D mode directly from grid shape, without introducing a new required +flag: + +* If ``Nx > 1`` and ``Ny == 1``: use x-periodic 1D path. +* If ``Ny > 1`` and ``Nx == 1``: use y-periodic 1D path. +* If ``Nx > 1`` and ``Ny > 1``: keep existing 2D path. +* If ``Nx == 1`` and ``Ny == 1``: treat as degenerate/uniform and use current + behavior or raise a clear error. + +Important architectural rule: + +* Do **not** overload ``Gmethod`` to represent dimensionality. ``Gmethod`` + should keep its current meaning (2D truncation strategy). 1D dimensional + inference should be orthogonal and handled before harmonic selection. + +Implementation outline +~~~~~~~~~~~~~~~~~~~~~~ + +1. Add an internal per-layer dimensional descriptor (e.g., ``periodic_dim``) + derived from ``Nx, Ny`` in ``Add_LayerGrid``. +2. Introduce 1D FFT/convolution helpers for patterned layers with one singleton + axis. +3. Add a 1D harmonic generator path that produces a single integer harmonic + list and map it to the existing matrix assembly interface with minimal + branching. +4. Keep existing 2D APIs and default behavior unchanged for ``Nx > 1`` and + ``Ny > 1``. + +Backward compatibility and risks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Existing 2D scripts remain unchanged. +* Existing 1D-as-2D scripts (``Ny == 1`` or ``Nx == 1``) should produce + numerically consistent results, with lower runtime and memory use. +* Risk: hidden assumptions that both harmonic axes exist in downstream code. + Mitigation: isolate 1D branching at Fourier/convolution and harmonic-index + construction boundaries; keep the rest of the solver working on consistent + shaped arrays. + +Physical validity constraints +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Valid only when one in-plane direction is truly invariant. +* Must preserve Maxwell boundary conditions and the same sign/branch + conventions used in the current eigensystem solver. +* Polarization coupling behavior in the 1D path must remain consistent with + the existing field conventions. + +Acceptance criteria +~~~~~~~~~~~~~~~~~~~ + +* Unit tests showing that 1D inferred path (``Ny == 1`` or ``Nx == 1``) + matches the legacy 2D emulation within tolerance. +* Benchmarks demonstrating lower runtime/memory for representative 1D grating + problems. + + +Task B: Cache matrix inverses/factorizations in repeated S-matrix use +---------------------------------------------------------------------- + +Goal +~~~~ + +Reduce repeated linear-algebra cost in S-matrix assembly during consecutive +solves where layer eigensystems are unchanged. + +Current issue +~~~~~~~~~~~~~ + +``GetSMatrix`` repeatedly computes expensive inverses inside layer loops, +including ``inv(phi_l)`` and ``inv(kp_l @ phi_l)``. For repeated solves on the +same solved state, these costs are paid again and again. + +Why this is necessary for performance +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Matrix inversion/factorization dominates runtime for moderate/large harmonic +bases. Reusing already computed inverse/factorization objects can substantially +reduce per-solve overhead in repeated post-processing workflows. + +Proposed optimization +~~~~~~~~~~~~~~~~~~~~~ + +Add cacheable per-layer linear algebra artifacts: + +* ``phi_inv`` (or factorization equivalent) for each solved layer. +* factorization/inverse of ``kp @ phi`` used in ``GetSMatrix`` transitions. + +Use cached artifacts when valid; recompute on invalidation. + +Strict invalidation rules (must-have) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Cache is valid only if all dependencies are unchanged: + +* frequency ``omega`` +* incident-angle-dependent wavevectors ``kx, ky`` +* reciprocal lattice / harmonic set ``G`` +* patterned-layer Fourier material matrices (from epsilon grids) +* layer thickness-dependent phase factors where applicable + +Any change to these must invalidate affected layer caches before S-matrix +assembly. + +Backward compatibility and risks +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Public API should remain unchanged; caching is internal. +* Results must be numerically identical to non-cached computation (within + floating-point tolerance). +* Main risk is stale-cache reuse. Mitigation: conservative invalidation keyed + to the dependency set above. + +Physical validity constraints +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The eigenvectors ``phi`` are **not** universally invariant; they vary when the +operator changes (e.g., through ``omega``, ``kx``, ``ky``, or epsilon Fourier +matrices). Therefore caching is physically valid only with strict dependency +tracking and invalidation. + +Acceptance criteria +~~~~~~~~~~~~~~~~~~~ + +* Regression tests proving cached and non-cached S-matrix paths agree. +* Tests that modify one dependency at a time and verify cache invalidation. +* Performance checks showing reduced repeated-solve wall time for unchanged + layer states. + + +Task C: Incremental implementation plan for 1D support +------------------------------------------------------ + +Goal +~~~~ + +Define a practical, low-risk implementation sequence to deliver inferred 1D +support quickly while preserving numerical correctness and backward +compatibility. + +Why this task is separate from Task A +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Task A defines the target architecture. This task defines *how* to implement it +in shippable stages, so performance gains are achieved early and regressions are +contained. + +Proposed phased plan +~~~~~~~~~~~~~~~~~~~~ + +Phase 1 (detection + plumbing) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Add internal detection logic from ``Nx, Ny``: + + - ``Nx > 1, Ny == 1`` -> ``periodic_dim='x'`` + - ``Ny > 1, Nx == 1`` -> ``periodic_dim='y'`` + - otherwise -> existing 2D path + +* Store per-layer dimension metadata and surface it only internally (no public + API change). +* Add explicit validation/error messages for ambiguous/degenerate grids. + +Phase 2 (1D Fourier path) +^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Implement 1D FFT/convolution helpers and dispatch to them for inferred 1D + layers. +* Keep existing 2D helpers untouched for non-1D layers. +* Ensure returned tensor/matrix shapes remain compatible with downstream + eigensystem code to minimize refactoring risk. + +Phase 3 (1D harmonic basis path) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Add a dedicated 1D harmonic generator for a single index basis. +* Keep ``Gmethod`` semantics unchanged for 2D truncation only. +* Provide internal adapters so field reconstruction and post-processing APIs + keep working with both 1D and 2D solved states. + +Phase 4 (verification + docs + benchmark) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* Add regression tests comparing inferred 1D path against current 1D-as-2D + emulation on representative gratings. +* Add benchmark cases demonstrating runtime and memory improvements. +* Document the inference rules and edge-case behavior in user docs. + +Performance expectations +~~~~~~~~~~~~~~~~~~~~~~~~ + +* Early wins in Phase 2 from avoiding unnecessary 2D FFT/convolution work for + singleton-axis grids. +* Largest gains in Phase 3 once harmonic basis dimensionality is reduced from + 2D to 1D for truly 1D periodic structures. + +Backward compatibility +~~~~~~~~~~~~~~~~~~~~~~ + +* No required new flags; existing scripts should continue to run unchanged. +* Users currently setting ``Ny == 1`` or ``Nx == 1`` receive the optimized path + automatically. +* 2D behavior remains the default whenever both dimensions exceed 1. + +Physical validity constraints +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Use 1D path only when one in-plane direction is invariant. +* Ensure matching boundary-condition treatment and branch/sign conventions + relative to existing 2D solver. +* Include tests for both normal and oblique incidence to verify consistency of + field/polarization behavior. + +Acceptance criteria +~~~~~~~~~~~~~~~~~~~ + +* Stage-gated PRs where each phase is independently testable and reversible. +* Numerical agreement with baseline 1D-as-2D results within tolerance. +* Demonstrated performance gains on at least one binary-grating benchmark. diff --git a/grcwa/rcwa.py b/grcwa/rcwa.py index 5ebc034..cd2a6de 100644 --- a/grcwa/rcwa.py +++ b/grcwa/rcwa.py @@ -24,40 +24,46 @@ def __init__(self,nG,L1,L2,freq,theta,phi,verbose=1): self.nG = nG self.verbose = verbose self.Layer_N = 0 # total number of layers - + # the length of the following variables = number of total layers self.thickness_list = [] self.id_list = [] #[type, No., No. in patterned/uniform, No. in its family] starting from 0 - # type:0 for uniform, 1 for Grids, 2 for Fourier + ## type:0 for uniform, 1 for Grids, 2 for Fourier - self.kp_list = [] + self.kp_list = [] self.q_list = [] # eigenvalues self.phi_list = [] #eigenvectors # Uniform layer self.Uniform_ep_list = [] self.Uniform_N = 0 - + # Patterned layer - self.Patterned_N = 0 # total number of patterned layers + self.Patterned_N = 0 # total number of patterned layers self.Patterned_epinv_list = [] self.Patterned_ep2_list = [] - + # patterned layer from Grids self.GridLayer_N = 0 self.GridLayer_Nxy_list = [] # layers of analytic Fourier series (e.g. circles) self.FourierLayer_N = 0 - self.FourierLayer_params = [] - + self.FourierLayer_params = [] + + # cache for repeated S-matrix assembly + self._smatrix_cache = None + + # inferred periodic dimension based on grid discretization + self.grid_periodic_dim = '2d' + def Add_LayerUniform(self,thickness,epsilon): #assert type(thickness) == float, 'thickness should be a float' self.id_list.append([0,self.Layer_N,self.Uniform_N]) self.Uniform_ep_list.append(epsilon) self.thickness_list.append(thickness) - + self.Layer_N += 1 self.Uniform_N += 1 @@ -79,6 +85,24 @@ def Add_LayerFourier(self,thickness,params): self.Patterned_N += 1 self.FourierLayer_N += 1 + def _infer_grid_periodic_dim(self): + # Infer effective periodicity from grid layer discretization. + # Returns: '2d', '1dx', or '1dy'. + if self.GridLayer_N == 0: + return '2d' + + all_ny1 = True + all_nx1 = True + for Nx, Ny in self.GridLayer_Nxy_list: + all_ny1 = all_ny1 and (Ny == 1) + all_nx1 = all_nx1 and (Nx == 1) + + if all_ny1 and not all_nx1: + return '1dx' + if all_nx1 and not all_ny1: + return '1dy' + return '2d' + def Init_Setup(self,Pscale=1.,Gmethod=0): ''' Set up reciprocal lattice (Gmethod:truncation scheme, 0 for circular, 1 for rectangular) @@ -92,28 +116,39 @@ def Init_Setup(self,Pscale=1.,Gmethod=0): # set up reciprocal lattice self.Lk1, self.Lk2 = Lattice_Reciprocate(self.L1,self.L2) self.G,self.nG = Lattice_getG(self.nG,self.Lk1,self.Lk2,method=Gmethod) - + + # infer effective 1D periodicity from grid discretization when possible + self.grid_periodic_dim = self._infer_grid_periodic_dim() + if self.grid_periodic_dim == '1dx': + ind = self.G[:,1] == 0 + self.G = self.G[ind] + self.nG = len(self.G) + elif self.grid_periodic_dim == '1dy': + ind = self.G[:,0] == 0 + self.G = self.G[ind] + self.nG = len(self.G) + self.Lk1 = self.Lk1/Pscale self.Lk2 = self.Lk2/Pscale # self.kx = kx0 + 2*bd.pi*(self.Lk1[0]*self.G[:,0]+self.Lk2[0]*self.G[:,1]) # self.ky = ky0 + 2*bd.pi*(self.Lk1[1]*self.G[:,0]+self.Lk2[1]*self.G[:,1]) self.kx,self.ky = Lattice_SetKs(self.G, kx0, ky0, self.Lk1, self.Lk2) - + #normalization factor for energies off normal incidence self.normalization = bd.sqrt(self.Uniform_ep_list[0])/bd.cos(self.theta) - + #if comm.rank == 0 and verbose>0: if self.verbose>0: print('Total nG = ',self.nG) self.Patterned_ep2_list = [None]*self.Patterned_N - self.Patterned_epinv_list = [None]*self.Patterned_N + self.Patterned_epinv_list = [None]*self.Patterned_N for i in range(self.Layer_N): if self.id_list[i][0] == 0: ep = self.Uniform_ep_list[self.id_list[i][2]] kp = MakeKPMatrix(self.omega,0,1./ep,self.kx,self.ky) self.kp_list.append(kp) - + q,phi = SolveLayerEigensystem_uniform(self.omega,self.kx,self.ky,ep) self.q_list.append(q) self.phi_list.append(phi) @@ -121,7 +156,36 @@ def Init_Setup(self,Pscale=1.,Gmethod=0): self.kp_list.append(None) self.q_list.append(None) self.phi_list.append(None) - + + self._invalidate_smatrix_cache() + + def _invalidate_smatrix_cache(self): + self._smatrix_cache = None + + def _get_smatrix_cache(self): + if self._smatrix_cache is not None: + return self._smatrix_cache + + if len(self.phi_list) != self.Layer_N or len(self.kp_list) != self.Layer_N: + return None + + phi_inv_list = [None] * self.Layer_N + kpphi_inv_list = [None] * self.Layer_N + + for i in range(self.Layer_N): + phi = self.phi_list[i] + kp = self.kp_list[i] + if bd.isinstance(phi, type(None)) or bd.isinstance(kp, type(None)): + return None + phi_inv_list[i] = bd.inv(phi) + kpphi_inv_list[i] = bd.inv(bd.dot(kp, phi)) + + self._smatrix_cache = { + 'phi_inv_list': phi_inv_list, + 'kpphi_inv_list': kpphi_inv_list, + } + return self._smatrix_cache + def MakeExcitationPlanewave(self,p_amp,p_phase,s_amp,s_phase,order = 0, direction = 'forward'): ''' Front incidence @@ -138,7 +202,7 @@ def MakeExcitationPlanewave(self,p_amp,p_phase,s_amp,s_phase,order = 0, directio -p_amp*bd.sin(phi)*bd.exp(1j*p_phase)) tmp2 = bd.zeros(2*self.nG,dtype=complex) - tmp2[order+self.nG] = 1.0 + tmp2[order+self.nG] = 1.0 a0 = a0 + tmp2*(-s_amp*bd.cos(theta)*bd.sin(phi)*bd.exp(1j*s_phase) \ +p_amp*bd.cos(phi)*bd.exp(1j*p_phase)) elif direction == 'backward': @@ -151,20 +215,22 @@ def MakeExcitationPlanewave(self,p_amp,p_phase,s_amp,s_phase,order = 0, directio tmp2[order+self.nG] = 1.0 bN = bN + tmp2*(-s_amp*bd.cos(theta)*bd.sin(phi)*bd.exp(1j*s_phase) \ +p_amp*bd.cos(phi)*bd.exp(1j*p_phase)) - + self.a0 = a0 self.bN = bN - + def GridLayer_geteps(self,ep_all): ''' Fourier transform + eigenvalue for grid layer ''' + self._invalidate_smatrix_cache() + ptri = 0 ptr = 0 for i in range(self.Layer_N): if self.id_list[i][0] != 1: continue - + Nx = self.GridLayer_Nxy_list[ptri][0] Ny = self.GridLayer_Nxy_list[ptri][1] dN = 1./Nx/Ny @@ -173,7 +239,7 @@ def GridLayer_geteps(self,ep_all): ep_grid = [bd.reshape(ep_all[0][ptr:ptr+Nx*Ny],[Nx,Ny]),bd.reshape(ep_all[1][ptr:ptr+Nx*Ny],[Nx,Ny]),bd.reshape(ep_all[2][ptr:ptr+Nx*Ny],[Nx,Ny])] else: ep_grid = bd.reshape(ep_all[ptr:ptr+Nx*Ny],[Nx,Ny]) - + epinv, ep2 = Epsilon_fft(dN,ep_grid,self.G) self.Patterned_epinv_list[self.id_list[i][2]] = epinv @@ -187,7 +253,7 @@ def GridLayer_geteps(self,ep_all): self.phi_list[self.id_list[i][1]] = phi ptr += Nx*Ny - ptri += 1 + ptri += 1 def Return_eps(self,which_layer,Nx,Ny,component='xx'): ''' @@ -212,10 +278,10 @@ def Return_eps(self,which_layer,Nx,Ny,component='xx'): epk = self.Patterned_ep2_list[self.id_list[i][2]][self.nG:,:self.nG] elif component == 'yy': epk = self.Patterned_ep2_list[self.id_list[i][2]][self.nG:,self.nG:] - + return get_ifft(Nx,Ny,epk[0,:],self.G) - + def RT_Solve(self,normalize = 0, byorder = 0): ''' Reflection and transmission power computation @@ -224,7 +290,7 @@ def RT_Solve(self,normalize = 0, byorder = 0): if normalize = 1, it will be divided by n[0]*cos(theta) ''' - aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list) + aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list,self._get_smatrix_cache()) fi,bi = GetZPoyntingFlux(self.a0,b0,self.omega,self.kp_list[0],self.phi_list[0],self.q_list[0],byorder=byorder) fe,be = GetZPoyntingFlux(aN,self.bN,self.omega,self.kp_list[-1],self.phi_list[-1],self.q_list[-1],byorder=byorder) @@ -245,40 +311,40 @@ def GetAmplitudes_noTranslate(self,which_layer): returns fourier amplitude ''' if which_layer == 0 : - aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list) + aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list,self._get_smatrix_cache()) ai = self.a0 bi = b0 elif which_layer == self.Layer_N-1: - aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list) + aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list,self._get_smatrix_cache()) ai = aN bi = self.bN else: - ai, bi = SolveInterior(which_layer,self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list) + ai, bi = SolveInterior(which_layer,self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list,self._get_smatrix_cache()) return ai,bi - + def GetAmplitudes(self,which_layer,z_offset): ''' returns fourier amplitude ''' if which_layer == 0 : - aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list) + aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list,self._get_smatrix_cache()) ai = self.a0 bi = b0 elif which_layer == self.Layer_N-1: - aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list) + aN, b0 = SolveExterior(self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list,self._get_smatrix_cache()) ai = aN bi = self.bN else: - ai, bi = SolveInterior(which_layer,self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list) + ai, bi = SolveInterior(which_layer,self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list,self._get_smatrix_cache()) ai, bi = TranslateAmplitudes(self.q_list[which_layer],self.thickness_list[which_layer],z_offset,ai,bi) return ai,bi - + def Solve_FieldFourier(self,which_layer,z_offset): ''' returns field amplitude in fourier space: [ex,ey,ez], [hx,hy,hz] @@ -305,7 +371,7 @@ def Solve_FieldFourier(self,which_layer,z_offset): fexy = bd.dot(self.kp_list[which_layer],tmp2) fey = - fexy[:self.nG] fex = fexy[self.nG:] - + #hz in Fourier space fhz = (self.kx*fey - self.ky*fex)/self.omega @@ -349,7 +415,7 @@ def Solve_FieldOnGrid(self,which_layer,z_offset,Nxy=None): def Volume_integral(self,which_layer,Mx,My,Mz,normalize=0): '''Mxyz is convolution matrix. - This function computes 1/A\int_V Mx|Ex|^2+My|Ey|^2+Mz|Ez|^2 + This function computes 1/A\\int_V Mx|Ex|^2+My|Ey|^2+Mz|Ez|^2 To be consistent with Poynting vector defintion here, the absorbed power will be just omega*output ''' kp = self.kp_list[which_layer] @@ -362,10 +428,10 @@ def Volume_integral(self,which_layer,Mx,My,Mz,normalize=0): epinv = self.Patterned_epinv_list[self.id_list[which_layer][2]] # un-translated amplitdue - ai, bi = SolveInterior(which_layer,self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list) + ai, bi = SolveInterior(which_layer,self.a0,self.bN,self.q_list,self.phi_list,self.kp_list,self.thickness_list,self._get_smatrix_cache()) ab = bd.hstack((ai,bi)) abMatrix = bd.outer(ab,bd.conj(ab)) - + Mt = Matrix_zintegral(q,self.thickness_list[which_layer]) # overall abM = abMatrix * Mt @@ -386,14 +452,14 @@ def Volume_integral(self,which_layer,Mx,My,Mz,normalize=0): bd.hstack((Mzeros,My,Mzeros)),\ bd.hstack((Mzeros,Mzeros,Mz)))) - # integral = Tr[ abMatrix * F^\dagger * Matconv *F ] + # integral = Tr[ abMatrix * F^\dagger * Matconv *F ] tmp = bd.dot(bd.dot(bd.conj(bd.transpose(F)),Mtotal),F) val = bd.trace(bd.dot(abM,tmp)) if normalize == 1: val = val*self.normalization return val - + def Solve_ZStressTensorIntegral(self,which_layer): ''' returns 2F_x,2F_y,2F_z, integrated over z-plane @@ -436,7 +502,7 @@ def Solve_ZStressTensorIntegral(self,which_layer): def MakeKPMatrix(omega,layer_type,epinv,kx,ky): nG = len(kx) - + # uniform layer, epinv has length 1 if layer_type == 0: # JkkJT = np.block([[np.diag(ky*ky), np.diag(-ky*kx)], @@ -444,14 +510,14 @@ def MakeKPMatrix(omega,layer_type,epinv,kx,ky): Jk = bd.vstack((bd.diag(-ky),bd.diag(kx))) JkkJT = bd.dot(Jk,bd.transpose(Jk)) - + kp = omega**2*bd.eye(2*nG) - epinv*JkkJT # patterned layer else: Jk = bd.vstack((bd.diag(-ky),bd.diag(kx))) tmp = bd.dot(Jk,epinv) kp = omega**2*bd.eye(2*nG) - bd.dot(tmp,bd.transpose(Jk)) - + return kp def SolveLayerEigensystem_uniform(omega,kx,ky,epsilon): @@ -466,11 +532,11 @@ def SolveLayerEigensystem_uniform(omega,kx,ky,epsilon): def SolveLayerEigensystem(omega,kx,ky,kp,ep2): nG = len(kx) - + k = bd.vstack((bd.diag(kx),bd.diag(ky))) kkT = bd.dot(k,bd.transpose(k)) M = bd.dot(ep2,kp) - kkT - + q,phi = bd.eig(M) q = bd.sqrt(q) @@ -478,12 +544,12 @@ def SolveLayerEigensystem(omega,kx,ky,kp,ep2): q = bd.where(bd.imag(q)<0.,-q,q) return q,phi -def GetSMatrix(indi,indj,q_list,phi_list,kp_list,thickness_list): +def GetSMatrix(indi,indj,q_list,phi_list,kp_list,thickness_list,smatrix_cache=None): ''' S_ij: size 4n*4n ''' #assert type(indi) == int, 'layer index i must be integar' #assert type(indj) == int, 'layer index j must be integar' - + nG2 = len(q_list[0]) S11 = bd.eye(nG2,dtype=complex) S12 = bd.zeros_like(S11) @@ -493,15 +559,22 @@ def GetSMatrix(indi,indj,q_list,phi_list,kp_list,thickness_list): return S11,S12,S21,S22 elif indi>indj: raise Exception('indi must be < indj') - + for l in range(indi,indj): ## next layer lp1 = l+1 ## Q = inv(phi_l) * phi_lp1 - Q = bd.dot(bd.inv(phi_list[l]), phi_list[lp1]) + if smatrix_cache is None: + phi_inv_l = bd.inv(phi_list[l]) + kpphi_inv_l = bd.inv(bd.dot(kp_list[l],phi_list[l])) + else: + phi_inv_l = smatrix_cache['phi_inv_list'][l] + kpphi_inv_l = smatrix_cache['kpphi_inv_list'][l] + + Q = bd.dot(phi_inv_l, phi_list[lp1]) ## P = ql*inv(kp_l*phi_l) * kp_lp1*phi_lp1*q_lp1^-1 - P1 = bd.dot(bd.diag(q_list[l]), bd.inv(bd.dot(kp_list[l],phi_list[l]))) + P1 = bd.dot(bd.diag(q_list[l]), kpphi_inv_l) P2 = bd.dot(bd.dot(kp_list[lp1],phi_list[lp1]), bd.diag(1./q_list[lp1])) P = bd.dot(P1,P2) # P1 = bd.dot(kp_list[l],phi_list[l]) @@ -533,32 +606,32 @@ def GetSMatrix(indi,indj,q_list,phi_list,kp_list,thickness_list): P2 = bd.dot(S22,bd.dot(T12,S12)) P1 = bd.dot(S22,bd.dot(T11,d2)) S22 = P1 + P2 - + return S11,S12,S21,S22 -def SolveExterior(a0,bN,q_list,phi_list,kp_list,thickness_list): +def SolveExterior(a0,bN,q_list,phi_list,kp_list,thickness_list,smatrix_cache=None): ''' Given a0, bN, solve for b0, aN ''' Nlayer = len(thickness_list) # total number of layers - S11, S12, S21, S22 = GetSMatrix(0,Nlayer-1,q_list,phi_list,kp_list,thickness_list) + S11, S12, S21, S22 = GetSMatrix(0,Nlayer-1,q_list,phi_list,kp_list,thickness_list,smatrix_cache=smatrix_cache) aN = bd.dot(S11,a0) + bd.dot(S12,bN) b0 = bd.dot(S21,a0) + bd.dot(S22,bN) return aN,b0 -def SolveInterior(which_layer,a0,bN,q_list,phi_list,kp_list,thickness_list): +def SolveInterior(which_layer,a0,bN,q_list,phi_list,kp_list,thickness_list,smatrix_cache=None): ''' Given a0, bN, solve for ai, bi Layer numbering starts from 0 ''' Nlayer = len(thickness_list) # total number of layers nG2 = len(q_list[0]) - - S11, S12, S21, S22 = GetSMatrix(0,which_layer,q_list,phi_list,kp_list,thickness_list) - pS11, pS12, pS21, pS22 = GetSMatrix(which_layer,Nlayer-1,q_list,phi_list,kp_list,thickness_list) + + S11, S12, S21, S22 = GetSMatrix(0,which_layer,q_list,phi_list,kp_list,thickness_list,smatrix_cache=smatrix_cache) + pS11, pS12, pS21, pS22 = GetSMatrix(which_layer,Nlayer-1,q_list,phi_list,kp_list,thickness_list,smatrix_cache=smatrix_cache) # tmp = inv(1-S12*pS21) tmp = bd.inv(bd.eye(nG2)-bd.dot(S12,pS21)) @@ -566,14 +639,14 @@ def SolveInterior(which_layer,a0,bN,q_list,phi_list,kp_list,thickness_list): ai = bd.dot(tmp, bd.dot(S11,a0)+bd.dot(S12,bd.dot(pS22,bN))) # bi = pS21 ai + pS22 bN bi = bd.dot(pS21,ai) + bd.dot(pS22,bN) - + return ai,bi def TranslateAmplitudes(q,thickness,dz,ai,bi): aim = ai*bd.exp(1j*q*dz) bim = bi*bd.exp(1j*q*(thickness-dz)) return aim,bim - + def GetZPoyntingFlux(ai,bi,omega,kp,phi,q,byorder=0): ''' diff --git a/tests/test_rcwa.py b/tests/test_rcwa.py index a0acc73..1758bd1 100644 --- a/tests/test_rcwa.py +++ b/tests/test_rcwa.py @@ -1,5 +1,6 @@ import numpy as np import grcwa +import grcwa.rcwa as rcwa_core from .utils import t_grad try: @@ -88,6 +89,146 @@ def test_rcwa(): Tx,Ty,Tz = obj.Solve_ZStressTensorIntegral(0) assert Tz<0 + +def test_smatrix_cache_unavailable_before_patterned_setup(): + planewave={'p_amp':1,'s_amp':0,'p_phase':0,'s_phase':0} + obj = grcwa.obj(nG, L1, L2, freq, theta, phi, verbose=0) + obj.Add_LayerUniform(thick0, epsuniform0) + obj.Add_LayerGrid(pthick[0], Nx, Ny) + obj.Add_LayerUniform(thickN, epsuniformN) + obj.Init_Setup(Pscale=1., Gmethod=0) + obj.MakeExcitationPlanewave(planewave['p_amp'],planewave['p_phase'], + planewave['s_amp'],planewave['s_phase'],order=0) + + # Patterned layer eigensystem not yet available + assert obj._get_smatrix_cache() is None + + +def test_getsmatrix_cached_vs_uncached_identical(): + planewave={'p_amp':1,'s_amp':0,'p_phase':0,'s_phase':0} + obj = rcwa_assembly(epgrid, freq, theta, phi, planewave, pthick, Pscale=1.) + + cache = obj._get_smatrix_cache() + assert cache is not None + assert len(cache['phi_inv_list']) == obj.Layer_N + assert len(cache['kpphi_inv_list']) == obj.Layer_N + + s_cached = rcwa_core.GetSMatrix(0, obj.Layer_N-1, obj.q_list, obj.phi_list, + obj.kp_list, obj.thickness_list, + smatrix_cache=cache) + s_plain = rcwa_core.GetSMatrix(0, obj.Layer_N-1, obj.q_list, obj.phi_list, + obj.kp_list, obj.thickness_list, + smatrix_cache=None) + + for blk_cached, blk_plain in zip(s_cached, s_plain): + assert np.allclose(blk_cached, blk_plain) + + +def test_exterior_interior_cached_vs_uncached_identical(): + planewave={'p_amp':1,'s_amp':0,'p_phase':0,'s_phase':0} + obj = rcwa_assembly(epgrid, freq, theta, phi, planewave, pthick, Pscale=1.) + + cache = obj._get_smatrix_cache() + aN_c, b0_c = rcwa_core.SolveExterior(obj.a0, obj.bN, obj.q_list, obj.phi_list, + obj.kp_list, obj.thickness_list, + smatrix_cache=cache) + aN_p, b0_p = rcwa_core.SolveExterior(obj.a0, obj.bN, obj.q_list, obj.phi_list, + obj.kp_list, obj.thickness_list, + smatrix_cache=None) + assert np.allclose(aN_c, aN_p) + assert np.allclose(b0_c, b0_p) + + ai_c, bi_c = rcwa_core.SolveInterior(1, obj.a0, obj.bN, obj.q_list, obj.phi_list, + obj.kp_list, obj.thickness_list, + smatrix_cache=cache) + ai_p, bi_p = rcwa_core.SolveInterior(1, obj.a0, obj.bN, obj.q_list, obj.phi_list, + obj.kp_list, obj.thickness_list, + smatrix_cache=None) + assert np.allclose(ai_c, ai_p) + assert np.allclose(bi_c, bi_p) + +def test_smatrix_cache_reuse_and_invalidate(): + planewave={'p_amp':1,'s_amp':0,'p_phase':0,'s_phase':0} + obj = rcwa_assembly(epgrid,freq,theta,phi,planewave,pthick,Pscale=1.) + + # First solve builds cache + R1, T1 = obj.RT_Solve(normalize=0) + assert obj._smatrix_cache is not None + old_kpphi = np.array(obj._smatrix_cache['kpphi_inv_list'][1], copy=True) + + # Repeated solve should reuse the same cache object and result + R2, T2 = obj.RT_Solve(normalize=0) + assert np.allclose([R1, T1], [R2, T2]) + + # Updating patterned epsilon should invalidate and rebuild cache + epgrid2 = np.array(epgrid, copy=True) + epgrid2[0, 0] = epgrid2[0, 0] + 0.5 + obj.GridLayer_geteps(epgrid2.flatten()) + assert obj._smatrix_cache is None + + obj.RT_Solve(normalize=0) + assert obj._smatrix_cache is not None + assert not np.allclose(obj._smatrix_cache['kpphi_inv_list'][1], old_kpphi) + + + +def _assemble_1d_compare_case(nG_case=41, Nx_case=41, Ny_case=9, freq_case=0.85): + L1c = [0.2, 0] + L2c = [0, 0.2] + theta_c = np.pi / 20 + phi_c = 0. + + # 1D profile varying along x only + x = np.linspace(0, 1., Nx_case) + eps_x = np.where(np.abs(x - 0.5) < 0.2, 10.0, 2.0) + + ep_2d = np.repeat(eps_x[:, None], Ny_case, axis=1) + ep_1d = eps_x[:, None] + + planewave_c = {'p_amp': 1, 's_amp': 0, 'p_phase': 0, 's_phase': 0} + + # baseline 2D emulation + obj2 = grcwa.obj(nG_case, L1c, L2c, freq_case, theta_c, phi_c, verbose=0) + obj2.Add_LayerUniform(0.4, 1.0) + obj2.Add_LayerGrid(0.2, Nx_case, Ny_case) + obj2.Add_LayerUniform(0.4, 1.0) + obj2.Init_Setup(Gmethod=1) + obj2.MakeExcitationPlanewave(planewave_c['p_amp'], planewave_c['p_phase'], + planewave_c['s_amp'], planewave_c['s_phase'], order=0) + obj2.GridLayer_geteps(ep_2d.flatten()) + + # inferred 1D case (Ny=1) + obj1 = grcwa.obj(nG_case, L1c, L2c, freq_case, theta_c, phi_c, verbose=0) + obj1.Add_LayerUniform(0.4, 1.0) + obj1.Add_LayerGrid(0.2, Nx_case, 1) + obj1.Add_LayerUniform(0.4, 1.0) + obj1.Init_Setup(Gmethod=1) + obj1.MakeExcitationPlanewave(planewave_c['p_amp'], planewave_c['p_phase'], + planewave_c['s_amp'], planewave_c['s_phase'], order=0) + obj1.GridLayer_geteps(ep_1d.flatten()) + + return obj2, obj1 + + +def test_1d_inference_reduces_to_single_harmonic_axis(): + obj2, obj1 = _assemble_1d_compare_case() + + assert obj2.grid_periodic_dim == '2d' + assert obj1.grid_periodic_dim == '1dx' + assert np.all(obj1.G[:, 1] == 0) + assert obj1.nG <= obj2.nG + + +def test_1d_inference_matches_2d_emulation_for_rt_points(): + freqs = [0.7, 0.85, 1.0] + for f in freqs: + obj2, obj1 = _assemble_1d_compare_case(freq_case=f) + R2, T2 = obj2.RT_Solve(normalize=1) + R1, T1 = obj1.RT_Solve(normalize=1) + + assert np.allclose(R1, R2, rtol=2e-3, atol=1e-5) + assert np.allclose(T1, T2, rtol=2e-3, atol=1e-5) + if AG_AVAILABLE: grcwa.set_backend('autograd') def test_epsgrad():