Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/common/abi/SP1Helios.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,12 @@
"outputs": [],
"stateMutability": "nonpayable",
"type": "function"
},
{
"inputs": [],
"name": "heliosProgramVkey",
"outputs": [{ "internalType": "bytes32", "name": "", "type": "bytes32" }],
"stateMutability": "view",
"type": "function"
}
]
88 changes: 55 additions & 33 deletions src/finalizer/utils/helios.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,17 @@ import {
} from "../../utils";
import { spreadEventWithBlockNumber } from "../../utils/EventUtils";
import { FinalizerPromise, CrossChainMessage } from "../types";
import axios from "axios";
import UNIVERSAL_SPOKE_ABI from "../../common/abi/Universal_SpokePool.json";
import { RelayedCallDataEvent, StoredCallDataEvent } from "../../interfaces/Universal";
import { ApiProofRequest, ProofOutputs, ProofStateResponse, SP1HeliosProofData } from "../../interfaces/ZkApi";
import { StorageSlotVerifiedEvent, HeadUpdateEvent } from "../../interfaces/Helios";
import { calculateProofId, decodeProofOutputs } from "../../utils/ZkApiUtils";
import {
calculateProofId,
decodeProofOutputs,
getProofStateWithRetries,
getVkeyWithRetries,
requestProofWithRetries,
} from "../../utils/ZkApiUtils";
import { calculateHubPoolStoreStorageSlot, getHubPoolStoreContract } from "../../utils/UniversalUtils";
import { stringifyThrownValue } from "../../utils/LogUtils";
import { getSp1HeliosContractEVM } from "../../utils/HeliosUtils";
Expand Down Expand Up @@ -69,6 +74,15 @@ export async function heliosL1toL2Finalizer(
const l1ChainId = hubPoolClient.chainId;
const l2ChainId = l2SpokePoolClient.chainId;
const sp1HeliosL2 = await getSp1HeliosContractEVM(l2SpokePoolClient.spokePool, l2SpokePoolClient.spokePool.signer);

const apiBaseUrl = process.env.HELIOS_PROOF_API_URL;
if (!apiBaseUrl) {
throw new Error("[heliosL1toL2Finalizer] HELIOS_PROOF_API_URL environment variable not set.");
}

// Early vkey mismatch check to avoid requesting wrong proofs
await ensureVkeysMatch(apiBaseUrl, sp1HeliosL2);

const { sp1HeliosHead, sp1HeliosHeader } = await getSp1HeliosHeadData(sp1HeliosL2);

// --- Step 1: Identify all actions needed (pending L1 -> L2 messages to finalize & keep-alive) ---
Expand All @@ -95,6 +109,7 @@ export async function heliosL1toL2Finalizer(
// --- Step 2: Enrich actions with ZK proofs. Return messages that are ready to submit on-chain ---
const readyActions = await enrichHeliosActions(
logger,
apiBaseUrl,
actions,
l2SpokePoolClient,
l1SpokePoolClient,
Expand Down Expand Up @@ -267,17 +282,14 @@ async function shouldGenerateKeepAliveAction(
// returns helios messages ready for on-chain execution enriched with proof data
async function enrichHeliosActions(
logger: winston.Logger,
apiBaseUrl: string,
actions: HeliosAction[],
l2SpokePoolClient: EVMSpokePoolClient,
l1SpokePoolClient: EVMSpokePoolClient,
currentL2HeliosHeadNumber: number,
currentL2HeliosHeader: string
): Promise<HeliosAction[]> {
const l2ChainId = l2SpokePoolClient.chainId;
const apiBaseUrl = process.env.HELIOS_PROOF_API_URL;
if (!apiBaseUrl) {
throw new Error("[enrichHeliosActions] HELIOS_PROOF_API_URL environment variable not set.");
}
const hubPoolStoreAddress = getHubPoolStoreContract(
l1SpokePoolClient.chainId,
l1SpokePoolClient.spokePool.provider
Expand Down Expand Up @@ -327,38 +339,24 @@ async function enrichHeliosActions(
}

const proofId = calculateProofId(apiRequest);
const getProofUrl = `${apiBaseUrl}/v1/api/proofs/${proofId}`;
logger.debug({ ...logContext, message: "Attempting to get proof", proofId });

logger.debug({ ...logContext, message: "Attempting to get proof", proofId, getProofUrl });

let proofState: ProofStateResponse | null = null;

// @dev We need try - catch here because of how API responds to non-existing proofs: with NotFound status
let getError: any = null;
let proofStateOr404: ProofStateResponse | 404;
try {
const response = await axios.get<ProofStateResponse>(getProofUrl);
proofState = response.data;
logger.debug({ ...logContext, message: "Proof state received", proofId, status: proofState.status });
} catch (error: any) {
getError = error;
proofStateOr404 = await getProofStateWithRetries(apiBaseUrl, proofId);
} catch (error) {
// add context to error
throw new Error(`Failed to get proof state for proofId ${proofId}: ${stringifyThrownValue(error)}`);
}

// Axios error. Handle based on whether was a NOTFOUND or another error
if (getError) {
const isNotFoundError = axios.isAxiosError(getError) && getError.response?.status === 404;
if (isNotFoundError) {
// NOTFOUND error -> Request proof
logger.debug({ ...logContext, message: "Proof not found (404), requesting...", proofId });
await axios.post(`${apiBaseUrl}/v1/api/proofs`, apiRequest);
logger.debug({ ...logContext, message: "Proof requested successfully.", proofId });
continue;
} else {
// If other error is returned -- throw and alert PD; this shouldn't happen
throw new Error(`Failed to get proof state for proofId ${proofId}: ${stringifyThrownValue(getError)}`);
}
if (proofStateOr404 === 404) {
logger.debug({ ...logContext, message: "Proof not found (404), requesting...", proofId });
await requestProofWithRetries(apiBaseUrl, apiRequest);
logger.debug({ ...logContext, message: "Proof requested successfully.", proofId });
continue;
}

// No axios error, process `proofState`
const proofState: ProofStateResponse = proofStateOr404;
switch (proofState.status) {
case "pending":
// If proof generation is pending -- there's nothing for us to do yet. Will check this proof next run
Expand All @@ -378,7 +376,7 @@ async function enrichHeliosActions(
errorMessage: proofState.error_message,
});

await axios.post(`${apiBaseUrl}/v1/api/proofs`, apiRequest);
await requestProofWithRetries(apiBaseUrl, apiRequest);
logger.debug({ ...logContext, message: "Errored proof requested again successfully.", proofId });
break;
}
Expand Down Expand Up @@ -762,3 +760,27 @@ function addUpdateOnlyTxn(
destinationChainId: l2ChainId,
});
}

/**
*
* @notice This function ensures that there's a match between `ZK API.vkey` _and_ `SP1Helios.vkey`. If these two don't
* match, the generated proof cannot be used to confirm messages on the destination chain.
*
* When upgrading the ZK Helios setup, ZK API will always have to be upgraded _after_ we send an upgrade message to all
* the V4 chains, because ZK API has to _first_ generate all the proofs for "upgrade message" to get confirmed on the
* destination chain. This means that ELF vkey might become stale in the ZK API (say some contract already upgraded but
* we didn't yet re-deploy the ZK API with the new ELF). This check eases the operational overhead to the upgrade by not
* allowing to request a proof with an incorrect(stale) ELF vkey.
*/
async function ensureVkeysMatch(apiBaseUrl: string, sp1Helios: ethers.Contract): Promise<void> {
const [apiResp, contractVkeyRaw] = await Promise.all([getVkeyWithRetries(apiBaseUrl), sp1Helios.heliosProgramVkey()]);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these likely to change frequently? Should we cache this in redis to avoid extra queries on every finalizer run? We can follow a pattern like this where we load the redis cache within this function so we don't need to pass it by input

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These won't be updated frequently, but we do need the non-cached value for this logic to make sense.

This logic is to be used to harden our upgrade process.
Context here is that currently we don't have logic to remove proofs from the ZK API.

So with this vkey change, the upgrade process is:

  • send txs to upgrade all chains(contracts) to the new vkey (repoint Spokes to the new SP1Helios with new vkey)
  • we need to make sure that every chain had the proof generated for it and upgraded. This will happen at around the same time for all the chains, but perhaps not at exactly the same time
  • while some chain has already new vkey, while the API still has the old one, this check prevents the finalizer from requesting a proof with an incorrect vkey (so we don't have to go clean that up manually)

So I do think we have to stick with the raw request here


const apiVkey = apiResp.vkey.toLowerCase();
const contractVkey = contractVkeyRaw?.toLowerCase();

if (apiVkey === undefined || contractVkey === undefined || apiVkey !== contractVkey) {
throw new Error(
`SP1Helios vkey check failed: api=${apiVkey} contract=${contractVkey} address=${sp1Helios.address}`
);
}
}
31 changes: 20 additions & 11 deletions src/interfaces/ZkApi.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { BigNumber } from "../utils";
import { enums, Infer, object, optional, string } from "superstruct";

// --- API Interaction Types ---
export interface ApiProofRequest {
Expand All @@ -9,19 +10,27 @@ export interface ApiProofRequest {
dst_chain_contract_from_header: string;
}

export type ProofStatus = "pending" | "success" | "errored";
export const ProofStatusSS = enums(["pending", "success", "errored"]);
export type ProofStatus = Infer<typeof ProofStatusSS>;

export interface SP1HeliosProofData {
proof: string;
public_values: string;
}
export const VkeyResponseSS = object({
vkey: string(),
});
export type VkeyResponse = Infer<typeof VkeyResponseSS>;

export interface ProofStateResponse {
proof_id: string;
status: ProofStatus;
update_calldata?: SP1HeliosProofData; // Present only if status is "success"
error_message?: string; // Present only if status is "errored"
}
export const SP1HeliosProofDataSS = object({
proof: string(),
public_values: string(),
});
export type SP1HeliosProofData = Infer<typeof SP1HeliosProofDataSS>;

export const ProofStateResponseSS = object({
proof_id: string(),
status: ProofStatusSS,
update_calldata: optional(SP1HeliosProofDataSS),
error_message: optional(string()),
});
export type ProofStateResponse = Infer<typeof ProofStateResponseSS>;

// ABI for `public_values` returned from ZK API as part of `SP1HeliosProofData`
export const PROOF_OUTPUTS_ABI_TUPLE = `tuple(
Expand Down
2 changes: 1 addition & 1 deletion src/utils/HeliosUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ export async function getSp1HeliosContractEVM(
evmSpokePool.provider
);
const heliosAddress = await universalSpokePoolContract.helios();
return new ethers.Contract(heliosAddress, SP1_HELIOS_ABI as any, signerOrProvider);
return new ethers.Contract(heliosAddress, SP1_HELIOS_ABI, signerOrProvider);
}
9 changes: 9 additions & 0 deletions src/utils/RetryUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,12 @@ export function retryAsync<T, U extends unknown[]>(
}
return ret;
}

// Exponential backoff with jitter and a cap
export function backoffWithJitter(retry: number, baseDelayMs = 50, backoffExponentBase = 2, maxDelayMs = 5000) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 now we can use this throughout the repo

const baseDelay = baseDelayMs * backoffExponentBase ** retry;
const jitter = (0.5 - Math.random()) * baseDelay;
const delay = baseDelay + jitter;
const base = Math.min(delay, maxDelayMs);
return base + jitter;
}
80 changes: 78 additions & 2 deletions src/utils/ZkApiUtils.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
import { BigNumber, ethers } from ".";
import { ApiProofRequest, PROOF_OUTPUTS_ABI_TUPLE, ProofOutputs } from "../interfaces/ZkApi";
import axios from "axios";
import { backoffWithJitter, BigNumber, ethers } from ".";
import {
ApiProofRequest,
PROOF_OUTPUTS_ABI_TUPLE,
ProofOutputs,
ProofStateResponse,
ProofStateResponseSS,
VkeyResponse,
VkeyResponseSS,
} from "../interfaces/ZkApi";
import { create } from "superstruct";

/**
* Calculates the deterministic Proof ID based on the request parameters.
Expand Down Expand Up @@ -52,3 +62,69 @@ export function decodeProofOutputs(publicValuesBytes: string): ProofOutputs {
})),
};
}

export async function getProofStateWithRetries(
apiBaseUrl: string,
proofId: string,
maxAttempts = 3
): Promise<ProofStateResponse | 404> {
let attempt = 0;
for (;;) {
try {
const response = await axios.get(`${apiBaseUrl}/v1/api/proofs/${proofId}`);
const proofState: ProofStateResponse = create(response.data, ProofStateResponseSS);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we move this create outside the try-catch block?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thinking is: if e.g. the API "meant to send correct bytes", but something was wrong with the network communication, we won't be able to parse the response with Superstruct so we can just retry.

The idea here is that retries on Superstruct throw should be rare but we still want to retry those errors.

That said, if superstruct is throwing because suddenly our formats on ZK API <> Finalizer don't agree, then we'll notice it in the pages and go fix it.

So this .create being inside doesn't create any issues, that's a way to retry on another type of network failure

return proofState;
} catch (e: any) {
// 404 is a valid/expected response: proof not yet created
if (axios.isAxiosError(e) && e.response?.status === 404) {
return 404;
}

attempt++;
if (attempt >= maxAttempts) {
throw e;
}
await new Promise((resolve) => setTimeout(resolve, backoffWithJitter(attempt)));
// todo: consider adding a logger log here .onRetry with `datadog = true` to monitor the error rates
}
}
}

export async function requestProofWithRetries(
apiBaseUrl: string,
request: ApiProofRequest,
maxAttempts = 3
): Promise<void> {
let attempt = 0;
for (;;) {
try {
await axios.post(`${apiBaseUrl}/v1/api/proofs`, request);
return;
} catch (e: any) {
attempt++;
if (attempt >= maxAttempts) {
throw e;
}
await new Promise((resolve) => setTimeout(resolve, backoffWithJitter(attempt)));
// todo: consider adding a logger log here .onRetry with `datadog = true` to monitor the error rates
}
}
}

export async function getVkeyWithRetries(apiBaseUrl: string, maxAttempts = 3): Promise<VkeyResponse> {
let attempt = 0;
for (;;) {
try {
const response = await axios.get(`${apiBaseUrl}/v1/api/vkey`);
const vkeyResponse: VkeyResponse = create(response.data, VkeyResponseSS);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return vkeyResponse;
} catch (e) {
attempt++;
if (attempt >= maxAttempts) {
throw e;
}
await new Promise((resolve) => setTimeout(resolve, backoffWithJitter(attempt)));
// todo: consider adding a logger log here .onRetry with `datadog = true` to monitor the error rates
}
}
}