03 - Training Qiskit VQC with a Simple SPSA-style Optimizer¶
This notebook trains a small Qiskit variational classifier using a
gradient-free SPSA-style optimizer with the Sampler primitive.
In [1]:
Copied!
import numpy as np
from qiskit.circuit import ParameterVector, QuantumCircuit
from qiskit.primitives import Sampler
from quantumuq import ShotBootstrap, wrap_qiskit_sampler
from quantumuq.datasets.toy import make_moons
rng = np.random.default_rng(0)
dataset = make_moons(n_samples=200, noise=0.1, random_state=0)
X, y = dataset.X, dataset.y
perm = rng.permutation(len(X))
train_idx, test_idx = perm[:150], perm[150:]
X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]
import numpy as np
from qiskit.circuit import ParameterVector, QuantumCircuit
from qiskit.primitives import Sampler
from quantumuq import ShotBootstrap, wrap_qiskit_sampler
from quantumuq.datasets.toy import make_moons
rng = np.random.default_rng(0)
dataset = make_moons(n_samples=200, noise=0.1, random_state=0)
X, y = dataset.X, dataset.y
perm = rng.permutation(len(X))
train_idx, test_idx = perm[:150], perm[150:]
X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]
In [2]:
Copied!
n_qubits = 1
n_params = 2
theta = ParameterVector("theta", n_params)
qc = QuantumCircuit(n_qubits)
qc.ry(theta[0], 0)
qc.rz(theta[1], 0)
qc.measure_all()
def feature_map(X: np.ndarray):
X_arr = np.asarray(X)
if X_arr.ndim == 1:
X_arr = X_arr[0:1]
# Map first feature to a simple linear function.
return [[float(x[0]), -float(x[0])] for x in X_arr]
sampler = Sampler()
predictor = wrap_qiskit_sampler(
sampler=sampler,
circuit=qc,
task="classification",
n_classes=2,
feature_map=feature_map,
)
n_qubits = 1
n_params = 2
theta = ParameterVector("theta", n_params)
qc = QuantumCircuit(n_qubits)
qc.ry(theta[0], 0)
qc.rz(theta[1], 0)
qc.measure_all()
def feature_map(X: np.ndarray):
X_arr = np.asarray(X)
if X_arr.ndim == 1:
X_arr = X_arr[0:1]
# Map first feature to a simple linear function.
return [[float(x[0]), -float(x[0])] for x in X_arr]
sampler = Sampler()
predictor = wrap_qiskit_sampler(
sampler=sampler,
circuit=qc,
task="classification",
n_classes=2,
feature_map=feature_map,
)
/var/folders/rh/1c0lrj_x0x956417g86lc4ph0000gn/T/ipykernel_16882/3919322708.py:17: DeprecationWarning: The class ``qiskit.primitives.sampler.Sampler`` is deprecated as of qiskit 1.2. It will be removed no earlier than 3 months after the release date. All implementations of the `BaseSamplerV1` interface have been deprecated in favor of their V2 counterparts. The V2 alternative for the `Sampler` class is `StatevectorSampler`. sampler = Sampler()
In [3]:
Copied!
def spsa_step(params, a, c):
delta = rng.choice([-1.0, 1.0], size=params.shape)
params_plus = params + c * delta
params_minus = params - c * delta
def loss_for(p):
# Override feature_map by shifting parameters.
def fm(X_batch):
return [list(p) for _ in np.atleast_2d(X_batch)]
local_predictor = wrap_qiskit_sampler(
sampler=sampler,
circuit=qc,
task="classification",
n_classes=2,
feature_map=fm,
)
probs = local_predictor.predict_proba(X_train, shots=1000)
probs = np.clip(probs, 1e-12, 1.0)
y_one_hot = np.eye(2)[y_train]
return -np.mean(np.sum(y_one_hot * np.log(probs), axis=1))
l_plus = loss_for(params_plus)
l_minus = loss_for(params_minus)
g_hat = (l_plus - l_minus) / (2.0 * c * delta)
return params - a * g_hat
# Minimal training loop (few iterations to keep runtime small).
params_vec = np.zeros(n_params)
for k in range(10):
a = 0.1 / (k + 1)
c = 0.1
params_vec = spsa_step(params_vec, a=a, c=c)
print(f"Iter {k+1}, params={params_vec}")
def trained_feature_map(X_batch):
return [list(params_vec) for _ in np.atleast_2d(X_batch)]
trained_predictor = wrap_qiskit_sampler(
sampler=sampler,
circuit=qc,
task="classification",
n_classes=2,
feature_map=trained_feature_map,
)
uq = ShotBootstrap(n_samples=8, shots=1000, seed=0)
uq_model = trained_predictor.with_uq(uq)
dist = uq_model.predict_dist(X_test)
print("Predictive mean shape:", dist.mean.shape)
def spsa_step(params, a, c):
delta = rng.choice([-1.0, 1.0], size=params.shape)
params_plus = params + c * delta
params_minus = params - c * delta
def loss_for(p):
# Override feature_map by shifting parameters.
def fm(X_batch):
return [list(p) for _ in np.atleast_2d(X_batch)]
local_predictor = wrap_qiskit_sampler(
sampler=sampler,
circuit=qc,
task="classification",
n_classes=2,
feature_map=fm,
)
probs = local_predictor.predict_proba(X_train, shots=1000)
probs = np.clip(probs, 1e-12, 1.0)
y_one_hot = np.eye(2)[y_train]
return -np.mean(np.sum(y_one_hot * np.log(probs), axis=1))
l_plus = loss_for(params_plus)
l_minus = loss_for(params_minus)
g_hat = (l_plus - l_minus) / (2.0 * c * delta)
return params - a * g_hat
# Minimal training loop (few iterations to keep runtime small).
params_vec = np.zeros(n_params)
for k in range(10):
a = 0.1 / (k + 1)
c = 0.1
params_vec = spsa_step(params_vec, a=a, c=c)
print(f"Iter {k+1}, params={params_vec}")
def trained_feature_map(X_batch):
return [list(params_vec) for _ in np.atleast_2d(X_batch)]
trained_predictor = wrap_qiskit_sampler(
sampler=sampler,
circuit=qc,
task="classification",
n_classes=2,
feature_map=trained_feature_map,
)
uq = ShotBootstrap(n_samples=8, shots=1000, seed=0)
uq_model = trained_predictor.with_uq(uq)
dist = uq_model.predict_dist(X_test)
print("Predictive mean shape:", dist.mean.shape)
Iter 1, params=[0.08382779 0.08382779] Iter 2, params=[2.76816958 2.76816958] Iter 3, params=[2.68309031 2.85324885] Iter 4, params=[2.63318114 2.80333968] Iter 5, params=[2.59919249 2.76935103] Iter 6, params=[2.57171491 2.74187344] Iter 7, params=[2.5497678 2.76382055] Iter 8, params=[2.5311114 2.74516415] Iter 9, params=[2.51648865 2.73054141] Iter 10, params=[2.50320384 2.7172566 ] Predictive mean shape: (50, 2)
What the SPSA training loop does¶
params_veccollects all trainable circuit parameters.- At each iteration, we sample a random perturbation
deltaand evaluate the loss atparams + c * deltaandparams - c * delta. - These two evaluations give a noisy estimate of the gradient direction,
which we use to update
params_vec. - This is a simple, gradient-free way to optimize Qiskit circuits using only
Samplerevaluations (no analytic gradients required).