1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
// Copyright 2021 Parity Technologies (UK) Ltd.
// This file is part of Polkadot.
// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.
use parity_scale_codec::{Decode, Encode};
use std::any::Any;
/// Result of PVF preparation performed by the validation host.
pub type PrepareResult = Result<(), PrepareError>;
/// An error that occurred during the prepare part of the PVF pipeline.
#[derive(Debug, Clone, Encode, Decode)]
pub enum PrepareError {
/// During the prevalidation stage of preparation an issue was found with the PVF.
Prevalidation(String),
/// Compilation failed for the given PVF.
Preparation(String),
/// An unexpected panic has occured in the preparation worker.
Panic(String),
/// Failed to prepare the PVF due to the time limit.
TimedOut,
/// This state indicates that the process assigned to prepare the artifact wasn't responsible
/// or were killed. This state is reported by the validation host (not by the worker).
DidNotMakeIt,
}
/// A error raised during validation of the candidate.
#[derive(Debug, Clone)]
pub enum ValidationError {
/// The error was raised because the candidate is invalid.
InvalidCandidate(InvalidCandidate),
/// This error is raised due to inability to serve the request.
InternalError(String),
}
/// A description of an error raised during executing a PVF and can be attributed to the combination
/// of the candidate [`polkadot_parachain::primitives::ValidationParams`] and the PVF.
#[derive(Debug, Clone)]
pub enum InvalidCandidate {
/// PVF preparation ended up with a deterministic error.
PrepareError(String),
/// The failure is reported by the execution worker. The string contains the error message.
WorkerReportedError(String),
/// The worker has died during validation of a candidate. That may fall in one of the following
/// categories, which we cannot distinguish programmatically:
///
/// (a) Some sort of transient glitch caused the worker process to abort. An example would be that
/// the host machine ran out of free memory and the OOM killer started killing the processes,
/// and in order to save the parent it will "sacrifice child" first.
///
/// (b) The candidate triggered a code path that has lead to the process death. For example,
/// the PVF found a way to consume unbounded amount of resources and then it either exceeded
/// an `rlimit` (if set) or, again, invited OOM killer. Another possibility is a bug in
/// wasmtime allowed the PVF to gain control over the execution worker.
///
/// We attribute such an event to an invalid candidate in either case.
///
/// The rationale for this is that a glitch may lead to unfair rejecting candidate by a single
/// validator. If the glitch is somewhat more persistent the validator will reject all candidate
/// thrown at it and hopefully the operator notices it by decreased reward performance of the
/// validator. On the other hand, if the worker died because of (b) we would have better chances
/// to stop the attack.
AmbiguousWorkerDeath,
/// PVF execution (compilation is not included) took more time than was allotted.
HardTimeout,
}
impl From<PrepareError> for ValidationError {
fn from(error: PrepareError) -> Self {
// Here we need to classify the errors into two errors: deterministic and non-deterministic.
//
// Non-deterministic errors can happen spuriously. Typically, they occur due to resource
// starvation, e.g. under heavy load or memory pressure. Those errors are typically transient
// but may persist e.g. if the node is run by overwhelmingly underpowered machine.
//
// Deterministic errors should trigger reliably. Those errors depend on the PVF itself and
// the sc-executor/wasmtime logic.
//
// For now, at least until the PVF pre-checking lands, the deterministic errors will be
// treated as `InvalidCandidate`. Should those occur they could potentially trigger disputes.
//
// All non-deterministic errors are qualified as `InternalError`s and will not trigger
// disputes.
match error {
PrepareError::Prevalidation(err) => ValidationError::InvalidCandidate(
InvalidCandidate::PrepareError(format!("prevalidation: {}", err)),
),
PrepareError::Preparation(err) => ValidationError::InvalidCandidate(
InvalidCandidate::PrepareError(format!("preparation: {}", err)),
),
PrepareError::Panic(err) => ValidationError::InvalidCandidate(
InvalidCandidate::PrepareError(format!("panic: {}", err)),
),
PrepareError::TimedOut => ValidationError::InternalError("prepare: timeout".to_owned()),
PrepareError::DidNotMakeIt =>
ValidationError::InternalError("prepare: did not make it".to_owned()),
}
}
}
/// Attempt to convert an opaque panic payload to a string.
///
/// This is a best effort, and is not guaranteed to provide the most accurate value.
pub(crate) fn stringify_panic_payload(payload: Box<dyn Any + Send + 'static>) -> String {
match payload.downcast::<&'static str>() {
Ok(msg) => msg.to_string(),
Err(payload) => match payload.downcast::<String>() {
Ok(msg) => *msg,
// At least we tried...
Err(_) => "unknown panic payload".to_string(),
},
}
}