viewof r = Inputs.range([0, 2], {step: 0.01, value: 1.2, label: tex`r = \frac{\pi_\theta}{\pi_{\text{old}}}`, width: 250})
viewof A = Inputs.range([-2, 2], {step: 0.1, value: 1.0, label: tex`\hat{A}`, width: 250})
viewof eps = Inputs.range([0, 0.5], {step: 0.01, value: 0.2, label: tex`\epsilon`, width: 250})
// Function to compute PPO clipped objective at a single ratio
function ppoClipObj(ratio, advantage, epsilon) {
const clip_ratio = Math.min(1 + epsilon, Math.max(1 - epsilon, ratio))
if (advantage >= 0) {
return Math.min(ratio * advantage, clip_ratio * advantage)
} else {
return Math.max(ratio * advantage, clip_ratio * advantage)
}
}
// Compute current objective at slider r
objective = ppoClipObj(r, A, eps)
// Data for visualization
ratio_values = d3.range(0, 2.01, 0.01)
curve_data = ratio_values.map(rr => ({
r: rr,
unclipped_obj: rr * A,
clipped_obj: ppoClipObj(rr, A, eps)
}))
// Plot the surrogate objective vs ratio
Plot.plot({
style: "overflow: visible; display: block; margin: 0 auto;",
width: 600,
height: 400,
y: {grid: true, label: "Objective Value"},
x: {label: "Probability Ratio r", domain: [0,2]},
marks: [
Plot.line(curve_data, {x: "r", y: "unclipped_obj", stroke: "steelblue", strokeWidth: 2, label: "Unclipped"}),
Plot.line(curve_data, {x: "r", y: "clipped_obj", stroke: "orange", strokeWidth: 2, label: "Clipped"}),
Plot.ruleX([1 - eps, 1 + eps], {stroke: "red", strokeDasharray: "4,4"})
]
})