Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sled agent: implement OmicronZoneImageSource::Artifact #7781

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions illumos-utils/src/running_zone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,9 @@ pub struct ZoneBuilder<'a> {
/// The directories that will be searched for the image tarball for the
/// provided zone type ([`Self::with_zone_type`]).
zone_image_paths: Option<&'a [Utf8PathBuf]>,
/// The file name of the zone image to search for in [`Self::zone_image_paths`].
/// If unset, defaults to `{zone_type}.tar.gz`.
zone_image_file_name: Option<&'a str>,
/// The name of the type of zone being created (e.g. "propolis-server")
zone_type: Option<&'a str>,
/// Unique ID of the instance of the zone being created. (optional)
Expand Down Expand Up @@ -1110,6 +1113,17 @@ impl<'a> ZoneBuilder<'a> {
self
}

/// The file name of the zone image to search for in the zone image
/// paths ([`Self::with_zone_image_paths`]). If unset, defaults to
/// `{zone_type}.tar.gz`.
pub fn with_zone_image_file_name(
mut self,
image_file_name: &'a str,
) -> Self {
self.zone_image_file_name = Some(image_file_name);
self
}

/// The name of the type of zone being created (e.g. "propolis-server")
pub fn with_zone_type(mut self, zone_type: &'a str) -> Self {
self.zone_type = Some(zone_type);
Expand Down Expand Up @@ -1227,6 +1241,7 @@ impl<'a> ZoneBuilder<'a> {
underlay_vnic_allocator: Some(underlay_vnic_allocator),
zone_root_path: Some(mut zone_root_path),
zone_image_paths: Some(zone_image_paths),
zone_image_file_name,
zone_type: Some(zone_type),
unique_name,
datasets: Some(datasets),
Expand Down Expand Up @@ -1255,15 +1270,18 @@ impl<'a> ZoneBuilder<'a> {
InstalledZone::get_zone_name(zone_type, unique_name);

// Looks for the image within `zone_image_path`, in order.
let image = format!("{}.tar.gz", zone_type);
let image_file_name = match zone_image_file_name {
Some(image) => image,
None => &format!("{}.tar.gz", zone_type),
};
let zone_image_path = zone_image_paths
.iter()
.find_map(|image_path| {
let path = image_path.join(&image);
let path = image_path.join(image_file_name);
if path.exists() { Some(path) } else { None }
})
.ok_or_else(|| InstallZoneError::ImageNotFound {
image: image.to_string(),
image: image_file_name.to_string(),
paths: zone_image_paths
.iter()
.map(|p| p.to_path_buf())
Expand Down
12 changes: 12 additions & 0 deletions nexus-sled-agent-shared/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,18 @@ pub enum OmicronZoneImageSource {
Artifact { hash: ArtifactHash },
}

impl OmicronZoneImageSource {
/// Return the artifact hash used for the zone image, if the zone's image
/// source is from the artifact store.
pub fn artifact_hash(&self) -> Option<ArtifactHash> {
if let OmicronZoneImageSource::Artifact { hash } = self {
Some(*hash)
} else {
None
}
}
}

// See `OmicronZoneConfig`. This is a separate function instead of being `impl
// Default` because we don't want to accidentally use this default in Rust code.
fn deserialize_image_source_default() -> OmicronZoneImageSource {
Expand Down
64 changes: 56 additions & 8 deletions sled-agent/src/artifact_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
//! Operations that list or modify artifacts or the configuration are called by
//! Nexus and handled by the Sled Agent API.

use std::collections::BTreeSet;
use std::future::Future;
use std::io::ErrorKind;
use std::net::SocketAddrV6;
Expand Down Expand Up @@ -47,6 +48,8 @@ use tokio::io::AsyncWriteExt;
use tokio::sync::{mpsc, oneshot, watch};
use tufaceous_artifact::ArtifactHash;

use crate::services::ServiceManager;

// These paths are defined under the artifact storage dataset. They
// cannot conflict with any artifact paths because all artifact paths are
// hexadecimal-encoded SHA-256 checksums.
Expand Down Expand Up @@ -86,7 +89,11 @@ pub(crate) struct ArtifactStore<T: DatasetsManager> {
}

impl<T: DatasetsManager> ArtifactStore<T> {
pub(crate) async fn new(log: &Logger, storage: T) -> ArtifactStore<T> {
pub(crate) async fn new(
log: &Logger,
storage: T,
services: Option<ServiceManager>,
) -> ArtifactStore<T> {
let log = log.new(slog::o!("component" => "ArtifactStore"));

let mut ledger_paths = Vec::new();
Expand Down Expand Up @@ -125,6 +132,7 @@ impl<T: DatasetsManager> ArtifactStore<T> {
tokio::task::spawn(ledger_manager(
log.clone(),
ledger_paths,
services,
ledger_rx,
config_tx,
));
Expand Down Expand Up @@ -456,9 +464,11 @@ type LedgerManagerRequest =
async fn ledger_manager(
log: Logger,
ledger_paths: Vec<Utf8PathBuf>,
services: Option<ServiceManager>,
mut rx: mpsc::Receiver<LedgerManagerRequest>,
config_channel: watch::Sender<Option<ArtifactConfig>>,
) {
let services = services.as_ref();
let handle_request = async |new_config: ArtifactConfig| {
if ledger_paths.is_empty() {
return Err(Error::NoUpdateDataset);
Expand All @@ -467,7 +477,38 @@ async fn ledger_manager(
Ledger::<ArtifactConfig>::new(&log, ledger_paths.clone()).await
{
if new_config.generation > ledger.data().generation {
// New config generation; update the ledger.
// New config generation. First check that it's not asking
// us to delete any artifact that is part of the current zone
// configuration.
if let Some(services) = services {
let mut difference = ledger
.data()
.artifacts
.difference(&new_config.artifacts)
.copied()
.peekable();
if difference.peek().is_some() {
let in_use = services
.omicron_zones_list()
.await
.zones
.into_iter()
.filter_map(|zone| {
zone.image_source.artifact_hash()
})
.collect::<BTreeSet<_>>();
for sha256 in difference {
if in_use.contains(&sha256) {
return Err(Error::ArtifactInUse {
sha256,
thing: "current zone configuration",
});
}
}
}
}

// Everything looks okay; update the ledger.
*ledger.data_mut() = new_config;
ledger
} else if new_config == *ledger.data() {
Expand Down Expand Up @@ -770,10 +811,16 @@ impl RepoDepotApi for RepoDepotImpl {
}

#[derive(Debug, thiserror::Error, SlogInlineError)]
pub(crate) enum Error {
pub enum Error {
#[error("Another task is already writing artifact {sha256}")]
AlreadyInProgress { sha256: ArtifactHash },

#[error(
"Artifact {sha256} is in use by {thing} \
but would be deleted by new artifact config"
)]
ArtifactInUse { sha256: ArtifactHash, thing: &'static str },

#[error("Error while reading request body")]
Body(dropshot::HttpError),

Expand Down Expand Up @@ -853,7 +900,8 @@ impl From<Error> for HttpError {
fn from(err: Error) -> HttpError {
match err {
// 4xx errors
Error::HashMismatch { .. }
Error::ArtifactInUse { .. }
| Error::HashMismatch { .. }
| Error::NoConfig
| Error::NotInConfig { .. } => {
HttpError::for_bad_request(None, err.to_string())
Expand Down Expand Up @@ -993,7 +1041,7 @@ mod test {

let log = test_setup_log("generations");
let backend = TestBackend::new(2);
let store = ArtifactStore::new(&log.log, backend).await;
let store = ArtifactStore::new(&log.log, backend, None).await;

// get_config returns None
assert!(store.get_config().is_none());
Expand Down Expand Up @@ -1046,7 +1094,7 @@ mod test {
async fn list_get_put() {
let log = test_setup_log("list_get_put");
let backend = TestBackend::new(2);
let mut store = ArtifactStore::new(&log.log, backend).await;
let mut store = ArtifactStore::new(&log.log, backend, None).await;

// get fails, because it doesn't exist yet
assert!(matches!(
Expand Down Expand Up @@ -1166,7 +1214,7 @@ mod test {

let log = test_setup_log("no_dataset");
let backend = TestBackend::new(0);
let store = ArtifactStore::new(&log.log, backend).await;
let store = ArtifactStore::new(&log.log, backend, None).await;

assert!(matches!(
store.get(TEST_HASH).await,
Expand Down Expand Up @@ -1194,7 +1242,7 @@ mod test {

let log = test_setup_log("wrong_hash");
let backend = TestBackend::new(2);
let store = ArtifactStore::new(&log.log, backend).await;
let store = ArtifactStore::new(&log.log, backend, None).await;
let mut config = ArtifactConfig {
generation: 1u32.into(),
artifacts: BTreeSet::new(),
Expand Down
80 changes: 64 additions & 16 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ use internal_dns_types::names::DNS_ZONE;
use itertools::Itertools;
use nexus_config::{ConfigDropshotWithTls, DeploymentConfig};
use nexus_sled_agent_shared::inventory::{
OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, ZoneKind,
OmicronZoneConfig, OmicronZoneImageSource, OmicronZoneType,
OmicronZonesConfig, ZoneKind,
};
use omicron_common::address::AZ_PREFIX;
use omicron_common::address::COCKROACH_PORT;
Expand Down Expand Up @@ -108,7 +109,9 @@ use sled_hardware::is_gimlet;
use sled_hardware::underlay;
use sled_hardware_types::Baseboard;
use sled_storage::config::MountConfig;
use sled_storage::dataset::{CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET};
use sled_storage::dataset::{
CONFIG_DATASET, INSTALL_DATASET, M2_ARTIFACT_DATASET, ZONE_DATASET,
};
use sled_storage::manager::StorageHandle;
use slog::Logger;
use slog_error_chain::InlineErrorChain;
Expand Down Expand Up @@ -1697,22 +1700,64 @@ impl ServiceManager {
.map(|d| zone::Device { name: d.to_string() })
.collect();

// Look for the image in the ramdisk first
let mut zone_image_paths = vec![Utf8PathBuf::from("/opt/oxide")];
// Inject an image path if requested by a test.
if let Some(path) = self.inner.image_directory_override.get() {
zone_image_paths.push(path.clone());
// TODO: `InstallDataset` should be renamed to something more accurate
// when all the major changes here have landed. Some zones are
// distributed from the host OS image and are never placed in the
// install dataset; that enum variant more accurately reflects that we
// are falling back to searching `/opt/oxide` in addition to the install
// datasets.
let image_source = match &request {
ZoneArgs::Omicron(zone_config) => &zone_config.zone.image_source,
ZoneArgs::Switch(_) => &OmicronZoneImageSource::InstallDataset,
};
let zone_image_file_name = match image_source {
OmicronZoneImageSource::InstallDataset => None,
OmicronZoneImageSource::Artifact { hash } => Some(hash.to_string()),
};

// If the boot disk exists, look for the image in the "install" dataset
// there too.
let all_disks = self.inner.storage.get_latest_disks().await;
if let Some((_, boot_zpool)) = all_disks.boot_disk() {
zone_image_paths.push(boot_zpool.dataset_mountpoint(
&all_disks.mount_config().root,
INSTALL_DATASET,
));
}
let zone_image_paths = match image_source {
OmicronZoneImageSource::InstallDataset => {
// Look for the image in the ramdisk first
let mut zone_image_paths =
vec![Utf8PathBuf::from("/opt/oxide")];
// Inject an image path if requested by a test.
if let Some(path) = self.inner.image_directory_override.get() {
zone_image_paths.push(path.clone());
};

// If the boot disk exists, look for the image in the "install"
// dataset there too.
if let Some((_, boot_zpool)) = all_disks.boot_disk() {
zone_image_paths.push(boot_zpool.dataset_mountpoint(
&all_disks.mount_config().root,
INSTALL_DATASET,
));
}

zone_image_paths
}
OmicronZoneImageSource::Artifact { .. } => {
// Search both artifact datasets, but look on the boot disk first.
let boot_zpool =
all_disks.boot_disk().map(|(_, boot_zpool)| boot_zpool);
// This iterator starts with the zpool for the boot disk (if it
// exists), and then is followed by all other zpools.
let zpool_iter = boot_zpool.clone().into_iter().chain(
all_disks
.all_m2_zpools()
.into_iter()
.filter(|zpool| Some(zpool) != boot_zpool.as_ref()),
);
zpool_iter
.map(|zpool| {
zpool.dataset_mountpoint(
&all_disks.mount_config().root,
M2_ARTIFACT_DATASET,
)
})
.collect()
}
};

let zone_type_str = match &request {
ZoneArgs::Omicron(zone_config) => {
Expand All @@ -1736,6 +1781,9 @@ impl ServiceManager {
if let Some(vnic) = bootstrap_vnic {
zone_builder = zone_builder.with_bootstrap_vnic(vnic);
}
if let Some(file_name) = &zone_image_file_name {
zone_builder = zone_builder.with_zone_image_file_name(file_name);
}
let installed_zone = zone_builder
.with_log(self.inner.log.clone())
.with_underlay_vnic_allocator(&self.inner.underlay_vnic_allocator)
Expand Down
7 changes: 4 additions & 3 deletions sled-agent/src/sim/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,10 @@ impl SledAgent {

simulated_upstairs.register_storage(id, &storage);

let repo_depot = ArtifactStore::new(&log, SimArtifactStorage::new())
.await
.start(&log, &config.dropshot);
let repo_depot =
ArtifactStore::new(&log, SimArtifactStorage::new(), None)
.await
.start(&log, &config.dropshot);

Arc::new(SledAgent {
id,
Expand Down
Loading
Loading