diff --git a/src/common/abi/SP1Helios.json b/src/common/abi/SP1Helios.json index 70dda7a5bc..6e63866030 100644 --- a/src/common/abi/SP1Helios.json +++ b/src/common/abi/SP1Helios.json @@ -52,5 +52,12 @@ "outputs": [], "stateMutability": "nonpayable", "type": "function" + }, + { + "inputs": [], + "name": "heliosProgramVkey", + "outputs": [{ "internalType": "bytes32", "name": "", "type": "bytes32" }], + "stateMutability": "view", + "type": "function" } ] diff --git a/src/finalizer/utils/helios.ts b/src/finalizer/utils/helios.ts index dfdc22fcb3..3f2b4b402d 100644 --- a/src/finalizer/utils/helios.ts +++ b/src/finalizer/utils/helios.ts @@ -13,12 +13,17 @@ import { } from "../../utils"; import { spreadEventWithBlockNumber } from "../../utils/EventUtils"; import { FinalizerPromise, CrossChainMessage } from "../types"; -import axios from "axios"; import UNIVERSAL_SPOKE_ABI from "../../common/abi/Universal_SpokePool.json"; import { RelayedCallDataEvent, StoredCallDataEvent } from "../../interfaces/Universal"; import { ApiProofRequest, ProofOutputs, ProofStateResponse, SP1HeliosProofData } from "../../interfaces/ZkApi"; import { StorageSlotVerifiedEvent, HeadUpdateEvent } from "../../interfaces/Helios"; -import { calculateProofId, decodeProofOutputs } from "../../utils/ZkApiUtils"; +import { + calculateProofId, + decodeProofOutputs, + getProofStateWithRetries, + getVkeyWithRetries, + requestProofWithRetries, +} from "../../utils/ZkApiUtils"; import { calculateHubPoolStoreStorageSlot, getHubPoolStoreContract } from "../../utils/UniversalUtils"; import { stringifyThrownValue } from "../../utils/LogUtils"; import { getSp1HeliosContractEVM } from "../../utils/HeliosUtils"; @@ -69,6 +74,15 @@ export async function heliosL1toL2Finalizer( const l1ChainId = hubPoolClient.chainId; const l2ChainId = l2SpokePoolClient.chainId; const sp1HeliosL2 = await getSp1HeliosContractEVM(l2SpokePoolClient.spokePool, l2SpokePoolClient.spokePool.signer); + + const apiBaseUrl = process.env.HELIOS_PROOF_API_URL; + if (!apiBaseUrl) { + throw new Error("[heliosL1toL2Finalizer] HELIOS_PROOF_API_URL environment variable not set."); + } + + // Early vkey mismatch check to avoid requesting wrong proofs + await ensureVkeysMatch(apiBaseUrl, sp1HeliosL2); + const { sp1HeliosHead, sp1HeliosHeader } = await getSp1HeliosHeadData(sp1HeliosL2); // --- Step 1: Identify all actions needed (pending L1 -> L2 messages to finalize & keep-alive) --- @@ -95,6 +109,7 @@ export async function heliosL1toL2Finalizer( // --- Step 2: Enrich actions with ZK proofs. Return messages that are ready to submit on-chain --- const readyActions = await enrichHeliosActions( logger, + apiBaseUrl, actions, l2SpokePoolClient, l1SpokePoolClient, @@ -267,6 +282,7 @@ async function shouldGenerateKeepAliveAction( // returns helios messages ready for on-chain execution enriched with proof data async function enrichHeliosActions( logger: winston.Logger, + apiBaseUrl: string, actions: HeliosAction[], l2SpokePoolClient: EVMSpokePoolClient, l1SpokePoolClient: EVMSpokePoolClient, @@ -274,10 +290,6 @@ async function enrichHeliosActions( currentL2HeliosHeader: string ): Promise { const l2ChainId = l2SpokePoolClient.chainId; - const apiBaseUrl = process.env.HELIOS_PROOF_API_URL; - if (!apiBaseUrl) { - throw new Error("[enrichHeliosActions] HELIOS_PROOF_API_URL environment variable not set."); - } const hubPoolStoreAddress = getHubPoolStoreContract( l1SpokePoolClient.chainId, l1SpokePoolClient.spokePool.provider @@ -327,38 +339,24 @@ async function enrichHeliosActions( } const proofId = calculateProofId(apiRequest); - const getProofUrl = `${apiBaseUrl}/v1/api/proofs/${proofId}`; + logger.debug({ ...logContext, message: "Attempting to get proof", proofId }); - logger.debug({ ...logContext, message: "Attempting to get proof", proofId, getProofUrl }); - - let proofState: ProofStateResponse | null = null; - - // @dev We need try - catch here because of how API responds to non-existing proofs: with NotFound status - let getError: any = null; + let proofStateOr404: ProofStateResponse | 404; try { - const response = await axios.get(getProofUrl); - proofState = response.data; - logger.debug({ ...logContext, message: "Proof state received", proofId, status: proofState.status }); - } catch (error: any) { - getError = error; + proofStateOr404 = await getProofStateWithRetries(apiBaseUrl, proofId); + } catch (error) { + // add context to error + throw new Error(`Failed to get proof state for proofId ${proofId}: ${stringifyThrownValue(error)}`); } - // Axios error. Handle based on whether was a NOTFOUND or another error - if (getError) { - const isNotFoundError = axios.isAxiosError(getError) && getError.response?.status === 404; - if (isNotFoundError) { - // NOTFOUND error -> Request proof - logger.debug({ ...logContext, message: "Proof not found (404), requesting...", proofId }); - await axios.post(`${apiBaseUrl}/v1/api/proofs`, apiRequest); - logger.debug({ ...logContext, message: "Proof requested successfully.", proofId }); - continue; - } else { - // If other error is returned -- throw and alert PD; this shouldn't happen - throw new Error(`Failed to get proof state for proofId ${proofId}: ${stringifyThrownValue(getError)}`); - } + if (proofStateOr404 === 404) { + logger.debug({ ...logContext, message: "Proof not found (404), requesting...", proofId }); + await requestProofWithRetries(apiBaseUrl, apiRequest); + logger.debug({ ...logContext, message: "Proof requested successfully.", proofId }); + continue; } - // No axios error, process `proofState` + const proofState: ProofStateResponse = proofStateOr404; switch (proofState.status) { case "pending": // If proof generation is pending -- there's nothing for us to do yet. Will check this proof next run @@ -378,7 +376,7 @@ async function enrichHeliosActions( errorMessage: proofState.error_message, }); - await axios.post(`${apiBaseUrl}/v1/api/proofs`, apiRequest); + await requestProofWithRetries(apiBaseUrl, apiRequest); logger.debug({ ...logContext, message: "Errored proof requested again successfully.", proofId }); break; } @@ -762,3 +760,27 @@ function addUpdateOnlyTxn( destinationChainId: l2ChainId, }); } + +/** + * + * @notice This function ensures that there's a match between `ZK API.vkey` _and_ `SP1Helios.vkey`. If these two don't + * match, the generated proof cannot be used to confirm messages on the destination chain. + * + * When upgrading the ZK Helios setup, ZK API will always have to be upgraded _after_ we send an upgrade message to all + * the V4 chains, because ZK API has to _first_ generate all the proofs for "upgrade message" to get confirmed on the + * destination chain. This means that ELF vkey might become stale in the ZK API (say some contract already upgraded but + * we didn't yet re-deploy the ZK API with the new ELF). This check eases the operational overhead to the upgrade by not + * allowing to request a proof with an incorrect(stale) ELF vkey. + */ +async function ensureVkeysMatch(apiBaseUrl: string, sp1Helios: ethers.Contract): Promise { + const [apiResp, contractVkeyRaw] = await Promise.all([getVkeyWithRetries(apiBaseUrl), sp1Helios.heliosProgramVkey()]); + + const apiVkey = apiResp.vkey.toLowerCase(); + const contractVkey = contractVkeyRaw?.toLowerCase(); + + if (apiVkey === undefined || contractVkey === undefined || apiVkey !== contractVkey) { + throw new Error( + `SP1Helios vkey check failed: api=${apiVkey} contract=${contractVkey} address=${sp1Helios.address}` + ); + } +} diff --git a/src/interfaces/ZkApi.ts b/src/interfaces/ZkApi.ts index 0647163353..0e3d601aee 100644 --- a/src/interfaces/ZkApi.ts +++ b/src/interfaces/ZkApi.ts @@ -1,4 +1,5 @@ import { BigNumber } from "../utils"; +import { enums, Infer, object, optional, string } from "superstruct"; // --- API Interaction Types --- export interface ApiProofRequest { @@ -9,19 +10,27 @@ export interface ApiProofRequest { dst_chain_contract_from_header: string; } -export type ProofStatus = "pending" | "success" | "errored"; +export const ProofStatusSS = enums(["pending", "success", "errored"]); +export type ProofStatus = Infer; -export interface SP1HeliosProofData { - proof: string; - public_values: string; -} +export const VkeyResponseSS = object({ + vkey: string(), +}); +export type VkeyResponse = Infer; -export interface ProofStateResponse { - proof_id: string; - status: ProofStatus; - update_calldata?: SP1HeliosProofData; // Present only if status is "success" - error_message?: string; // Present only if status is "errored" -} +export const SP1HeliosProofDataSS = object({ + proof: string(), + public_values: string(), +}); +export type SP1HeliosProofData = Infer; + +export const ProofStateResponseSS = object({ + proof_id: string(), + status: ProofStatusSS, + update_calldata: optional(SP1HeliosProofDataSS), + error_message: optional(string()), +}); +export type ProofStateResponse = Infer; // ABI for `public_values` returned from ZK API as part of `SP1HeliosProofData` export const PROOF_OUTPUTS_ABI_TUPLE = `tuple( diff --git a/src/utils/HeliosUtils.ts b/src/utils/HeliosUtils.ts index 13d7f83954..8b1b535f2d 100644 --- a/src/utils/HeliosUtils.ts +++ b/src/utils/HeliosUtils.ts @@ -21,5 +21,5 @@ export async function getSp1HeliosContractEVM( evmSpokePool.provider ); const heliosAddress = await universalSpokePoolContract.helios(); - return new ethers.Contract(heliosAddress, SP1_HELIOS_ABI as any, signerOrProvider); + return new ethers.Contract(heliosAddress, SP1_HELIOS_ABI, signerOrProvider); } diff --git a/src/utils/RetryUtils.ts b/src/utils/RetryUtils.ts index b15d38157c..4df4a29ea4 100644 --- a/src/utils/RetryUtils.ts +++ b/src/utils/RetryUtils.ts @@ -15,3 +15,12 @@ export function retryAsync( } return ret; } + +// Exponential backoff with jitter and a cap +export function backoffWithJitter(retry: number, baseDelayMs = 50, backoffExponentBase = 2, maxDelayMs = 5000) { + const baseDelay = baseDelayMs * backoffExponentBase ** retry; + const jitter = (0.5 - Math.random()) * baseDelay; + const delay = baseDelay + jitter; + const base = Math.min(delay, maxDelayMs); + return base + jitter; +} diff --git a/src/utils/ZkApiUtils.ts b/src/utils/ZkApiUtils.ts index 137c7fd783..1f3ec7703e 100644 --- a/src/utils/ZkApiUtils.ts +++ b/src/utils/ZkApiUtils.ts @@ -1,5 +1,15 @@ -import { BigNumber, ethers } from "."; -import { ApiProofRequest, PROOF_OUTPUTS_ABI_TUPLE, ProofOutputs } from "../interfaces/ZkApi"; +import axios from "axios"; +import { backoffWithJitter, BigNumber, ethers } from "."; +import { + ApiProofRequest, + PROOF_OUTPUTS_ABI_TUPLE, + ProofOutputs, + ProofStateResponse, + ProofStateResponseSS, + VkeyResponse, + VkeyResponseSS, +} from "../interfaces/ZkApi"; +import { create } from "superstruct"; /** * Calculates the deterministic Proof ID based on the request parameters. @@ -52,3 +62,69 @@ export function decodeProofOutputs(publicValuesBytes: string): ProofOutputs { })), }; } + +export async function getProofStateWithRetries( + apiBaseUrl: string, + proofId: string, + maxAttempts = 3 +): Promise { + let attempt = 0; + for (;;) { + try { + const response = await axios.get(`${apiBaseUrl}/v1/api/proofs/${proofId}`); + const proofState: ProofStateResponse = create(response.data, ProofStateResponseSS); + return proofState; + } catch (e: any) { + // 404 is a valid/expected response: proof not yet created + if (axios.isAxiosError(e) && e.response?.status === 404) { + return 404; + } + + attempt++; + if (attempt >= maxAttempts) { + throw e; + } + await new Promise((resolve) => setTimeout(resolve, backoffWithJitter(attempt))); + // todo: consider adding a logger log here .onRetry with `datadog = true` to monitor the error rates + } + } +} + +export async function requestProofWithRetries( + apiBaseUrl: string, + request: ApiProofRequest, + maxAttempts = 3 +): Promise { + let attempt = 0; + for (;;) { + try { + await axios.post(`${apiBaseUrl}/v1/api/proofs`, request); + return; + } catch (e: any) { + attempt++; + if (attempt >= maxAttempts) { + throw e; + } + await new Promise((resolve) => setTimeout(resolve, backoffWithJitter(attempt))); + // todo: consider adding a logger log here .onRetry with `datadog = true` to monitor the error rates + } + } +} + +export async function getVkeyWithRetries(apiBaseUrl: string, maxAttempts = 3): Promise { + let attempt = 0; + for (;;) { + try { + const response = await axios.get(`${apiBaseUrl}/v1/api/vkey`); + const vkeyResponse: VkeyResponse = create(response.data, VkeyResponseSS); + return vkeyResponse; + } catch (e) { + attempt++; + if (attempt >= maxAttempts) { + throw e; + } + await new Promise((resolve) => setTimeout(resolve, backoffWithJitter(attempt))); + // todo: consider adding a logger log here .onRetry with `datadog = true` to monitor the error rates + } + } +}