Skip to content

Commit 601222e

Browse files
authored
New PyTorch and Keras interfaces (#30)
Native Torch and Keras Interfaces for CrossSim and small bugfixes.
1 parent 8dd008f commit 601222e

File tree

216 files changed

+14986
-583
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

216 files changed

+14986
-583
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ applications/dnn/training/cross_sim_models/
1414
applications/dnn/training/sweep_results/
1515
applications/dnn/training/weight_update_stats/
1616
applications/dnn/inference/errorloop_outputs/
17+
applications/dnn/torch/cifar10_resnet/cifar-10-batches-py/
1718
tutorial/.ipynb_checkpoints/
1819
tutorial/params_64.json
1920
CrossSim.egg-info/

applications/dnn/calibration.py

Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
#
2+
# Copyright 2024 National Technology & Engineering Solutions of Sandia, LLC
3+
# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S.
4+
# Government retains certain rights in this software.
5+
#
6+
# See LICENSE for full license details
7+
#
8+
9+
"""
10+
Utility functions for calibrating input ranges for crossbar inputs and ADCs
11+
based on profiled data. Compatible with both PyTorch and Keras interfaces.
12+
These calibration methods are not guaranteed to be optimal.
13+
"""
14+
15+
import os
16+
import numpy.typing as npt
17+
import numpy as np
18+
from scipy.optimize import minimize
19+
from simulator.parameters.core_parameters import CoreStyle
20+
from simulator.backend import ComputeBackend
21+
xp = ComputeBackend()
22+
23+
24+
def calibrate_input_limits(
25+
all_xbar_inputs: list,
26+
Nbits: int = 0,
27+
norm_ord: float = 1.0,
28+
) -> npt.ArrayLike:
29+
"""Optimizes the input range for all layers in a network given profiled
30+
input values. This function is intended for use with ResNet CNNs where
31+
all but the first layer is precded by a ReLU, so inputs are strictly positive.
32+
33+
Args:
34+
all_xbar_inputs: list of arrays, each array contains profiled input
35+
values for a layer
36+
Nbits: quantization resolution used in optimizer
37+
Set to 0 to set range based on max profiled value
38+
norm_ord: power of the error norm used for the loss function in optimizer
39+
Returns:
40+
NumPy array containing the (min, max) range for the inputs of every layer
41+
"""
42+
43+
n_layers = len(all_xbar_inputs)
44+
input_limits = np.zeros((n_layers, 2))
45+
46+
for k in range(n_layers):
47+
48+
xbar_inputs_k = xp.asarray(all_xbar_inputs[k])
49+
50+
if Nbits > 0:
51+
eta0 = -4
52+
# Optimize the input percentile
53+
eta = minimize(
54+
quantizationError_ReLU,
55+
eta0,
56+
args=(xbar_inputs_k, Nbits, norm_ord),
57+
method="nelder-mead",
58+
tol=0.1,
59+
)
60+
percentile_max = 100 * (1 - pow(10, eta.x[0]))
61+
xmax = xp.percentile(xbar_inputs_k, percentile_max)
62+
else:
63+
xmax = xp.max(all_xbar_inputs[k])
64+
65+
input_limits[k, :] = np.array([0, float(xmax)])
66+
67+
return input_limits
68+
69+
70+
def calibrate_adc_limits(
71+
analog_layers: list,
72+
all_adc_inputs: list,
73+
Nbits: int = 0,
74+
norm_ord: float = 1.0,
75+
bitslice_pct: float = 99.99,
76+
) -> npt.ArrayLike:
77+
"""Optimizes the ADC input range for all layers in a network given profiled
78+
input values.
79+
80+
Args:
81+
analog_layers: list of Torch analog modules or Keras analog layers containing
82+
params that will be used to decide how to calibrate
83+
all_adc_inputs: list of arrays, each array contains profiled input
84+
values for a layer
85+
Nbits: quantization resolution used in optimizer
86+
Set to 0 to set range based on max profiled value
87+
norm_ord: power of the error norm used for the loss function in optimizer
88+
(Used for unsliced core only)
89+
bitslice_pct: desired percentile coverage of input distribution that is used to
90+
find ADC ranges. (Used for bitsliced core only)
91+
Returns:
92+
NumPy array containing the (min, max) range for the inputs of every layer
93+
"""
94+
95+
96+
n_layers = len(all_adc_inputs)
97+
if analog_layers[0].params.core.style != CoreStyle.BITSLICED:
98+
adc_limits = np.zeros((n_layers, 2))
99+
else:
100+
# Allows non-uniform bit slice width across layers
101+
adc_limits = [None] * n_layers
102+
103+
k = 0
104+
for layer in analog_layers:
105+
adc_inputs_k = xp.asarray(all_adc_inputs[k])
106+
107+
if layer.params.core.style != CoreStyle.BITSLICED:
108+
adc_limits[k, :] = optimize_adc_limits_unsliced(
109+
adc_inputs_k, Nbits=Nbits, norm_ord=norm_ord
110+
)
111+
else:
112+
num_slices = layer.params.core.bit_sliced.num_slices
113+
adc_limits[k] = optimize_adc_limits_bitsliced(
114+
adc_inputs_k,
115+
num_slices,
116+
style = layer.params.core.bit_sliced.style,
117+
Nrows = Nrows,
118+
pct = bitslice_pct,
119+
)
120+
k += 1
121+
122+
return adc_limits
123+
124+
125+
def optimize_adc_limits_unsliced(
126+
adc_inputs_k: npt.ArrayLike,
127+
Nbits: int = 0,
128+
norm_ord: float = 1.0,
129+
) -> npt.ArrayLike:
130+
"""Optimizes the ADC input range for one layer which does not using weight bit slicing."""
131+
132+
# Although input bit slices are profiled separately, the current calibration
133+
# method does not resolve data by input bit
134+
adc_inputs_k = adc_inputs_k.flatten()
135+
136+
if Nbits > 0:
137+
etas0 = (-4, -4)
138+
# Optimize the input percentile
139+
etas = minimize(
140+
quantizationError_minMax,
141+
etas0,
142+
args=(adc_inputs_k, Nbits, norm_ord),
143+
method="nelder-mead",
144+
tol=0.1,
145+
)
146+
percentile_min = 100 * pow(10, etas.x[0])
147+
percentile_max = 100 * (1 - pow(10, etas.x[1]))
148+
xmin = xp.percentile(adc_inputs_k, percentile_min)
149+
xmax = xp.percentile(adc_inputs_k, percentile_max)
150+
else:
151+
xmin = xp.min(adc_inputs_k)
152+
xmax = xp.max(adc_inputs_k)
153+
154+
adc_limits_k = np.array([float(xmin), float(xmax)])
155+
156+
return adc_limits_k
157+
158+
159+
def optimize_adc_limits_bitsliced(
160+
adc_inputs_k: npt.ArrayLike,
161+
num_slices: int = 2,
162+
style: int = BitSlicedCoreStyle.BALANCED,
163+
Nrows: int = 1,
164+
pct: float = 99.99,
165+
) -> npt.ArrayLike:
166+
"""
167+
Optimizes the ADC input range for one layer which uses weight bit slicing.
168+
To reduce the overhead of bit slice digital post-processing, this method ensures
169+
that the ratio of the ADC limits of any two bit slices must be a power of 2.
170+
"""
171+
172+
# NOTE: Although input bit slices are profiled separately, the current calibration
173+
# method does not resolve data by input bit
174+
175+
adc_limits_k = np.zeros((num_slices, 2))
176+
177+
if style == BitSlicedCoreStyle.OFFSET:
178+
raise NotImplementedError(
179+
"ADC limits auto-calibration with weight bit slicing OFFSET "
180+
+ "style not been implemented yet."
181+
)
182+
183+
for i_slice in range(num_slices):
184+
adc_inputs_ik = adc_inputs_k[i_slice,:,:].flatten()
185+
adc_inputs_ik /= Nrows
186+
187+
# Find the percentile extreme values of the ADC input distribution
188+
p_neg = xp.percentile(adc_inputs_ik, 100-pct)
189+
p_pos = xp.percentile(adc_inputs_ik, pct)
190+
p_out = xp.maximum(xp.abs(p_neg),xp.abs(p_pos))
191+
192+
# Compute how much the ADC limits can be divided from the maximum possible,
193+
# and still cover the percentile extreme values
194+
clip_power_i = xp.floor(xp.log2(1/p_out)).astype(int)
195+
adc_limits_k[i_slice,0] = -Nrows / 2**clip_power_i
196+
adc_limits_k[i_slice,1] = Nrows / 2**clip_power_i
197+
198+
return adc_limits_k
199+
200+
201+
202+
def quantizationError_ReLU(eta, x, Nbits, norm_ord):
203+
"""Quantizes values over a range from the minimum value to a high
204+
percentile value of the data. The percentile is only applied on
205+
large positive values, assuming ReLU activation is used.
206+
207+
Args:
208+
eta: parameter that controls the percentile used for clipping
209+
(to be optimized)
210+
x: data values to be quantized
211+
Nbits: quantization resolution in bits
212+
norm_ord: power of the error norm used for the loss function
213+
"""
214+
215+
# Clip
216+
P = 100 * (1 - pow(10, eta))
217+
P = xp.clip(P, 0, 100)
218+
x_min = 0 # assume ReLU
219+
x_maxP = xp.percentile(x, P)
220+
x_Q = x.copy()
221+
x_Q = x_Q.clip(x_min, x_maxP)
222+
223+
# Quantize
224+
qmult = (2**Nbits - 1) / (x_maxP - x_min)
225+
x_Q = (x_Q - x_min) * qmult
226+
x_Q = xp.rint(x_Q, out=x_Q)
227+
x_Q /= qmult
228+
x_Q += x_min
229+
err = xp.linalg.norm(x - x_Q, ord=norm_ord)
230+
return float(err)
231+
232+
233+
def quantizationError_minMax(etas, x, Nbits, norm_ord):
234+
"""Quantizes values over a range by optimizing the upper and lower
235+
percentiles of the range.
236+
237+
Args:
238+
etas: tuple of two parameters that control the lower and upper percentile
239+
used for clipping (to be optimized)
240+
x: data values to be quantized
241+
Nbits: quantization resolution in bits
242+
norm_ord: power of the error norm used for the loss function
243+
"""
244+
# Clip
245+
etaMin, etaMax = etas
246+
P_min = 100 * pow(10, etaMin)
247+
P_max = 100 * (1 - pow(10, etaMax))
248+
P_min = xp.clip(P_min, 0, 100)
249+
P_max = xp.clip(P_max, 0, 100)
250+
x_min = xp.percentile(x, P_min)
251+
x_max = xp.percentile(x, P_max)
252+
x_Q = x.copy()
253+
x_Q = x_Q.clip(x_min, x_max)
254+
255+
# Quantize
256+
qmult = (2**Nbits - 1) / (x_max - x_min)
257+
x_Q = (x_Q - x_min) * qmult
258+
x_Q = xp.rint(x_Q, out=x_Q)
259+
x_Q /= qmult
260+
x_Q += x_min
261+
err = xp.linalg.norm(x - x_Q, ord=norm_ord)
262+
return float(err)

0 commit comments

Comments
 (0)