diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index f7e89787499..f70873f5dd9 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -56,7 +56,7 @@ }, "/filesystem": { "put": { - "operationId": "filesystem_put", + "operationId": "filesystems_put", "requestBody": { "content": { "application/json": { @@ -608,14 +608,29 @@ ] }, "DatasetEnsureBody": { + "description": "Used to request that the Sled initialize multiple datasets.", + "type": "object", + "properties": { + "datasets": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DatasetEnsureRequest" + } + } + }, + "required": [ + "datasets" + ] + }, + "DatasetEnsureRequest": { "description": "Used to request a new dataset kind exists within a zpool.\n\nMany dataset types are associated with services that will be instantiated when the dataset is detected.", "type": "object", "properties": { "address": { "type": "string" }, - "dataset_kind": { - "$ref": "#/components/schemas/DatasetKind" + "dataset_name": { + "$ref": "#/components/schemas/DatasetName" }, "gz_address": { "nullable": true, @@ -626,17 +641,12 @@ "id": { "type": "string", "format": "uuid" - }, - "zpool_id": { - "type": "string", - "format": "uuid" } }, "required": [ "address", - "dataset_kind", - "id", - "zpool_id" + "dataset_name", + "id" ] }, "DatasetKind": { diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index fd2b4b0ac20..accf6072541 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -31,7 +31,7 @@ type SledApiDescription = ApiDescription; pub fn api() -> SledApiDescription { fn register_endpoints(api: &mut SledApiDescription) -> Result<(), String> { api.register(disk_put)?; - api.register(filesystem_put)?; + api.register(filesystems_put)?; api.register(instance_issue_disk_snapshot_request)?; api.register(instance_put_migration_ids)?; api.register(instance_put_state)?; @@ -113,21 +113,13 @@ async fn sled_role_get( method = PUT, path = "/filesystem", }] -async fn filesystem_put( +async fn filesystems_put( rqctx: RequestContext, body: TypedBody, ) -> Result { let sa = rqctx.context(); let body_args = body.into_inner(); - sa.filesystem_ensure( - body_args.id, - body_args.zpool_id, - body_args.dataset_kind, - body_args.address, - body_args.gz_address, - ) - .await - .map_err(|e| Error::from(e))?; + sa.filesystems_ensure(body_args).await.map_err(|e| Error::from(e))?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 868d293caa8..38e2513d477 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -313,18 +313,22 @@ impl std::fmt::Display for DatasetKind { } } +/// Used to request that the Sled initialize multiple datasets. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] +pub struct DatasetEnsureBody { + pub datasets: Vec, +} + /// Used to request a new dataset kind exists within a zpool. /// /// Many dataset types are associated with services that will be /// instantiated when the dataset is detected. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct DatasetEnsureBody { +pub struct DatasetEnsureRequest { // The UUID of the dataset, as well as the service using it directly. pub id: Uuid, // The name (and UUID) of the Zpool which we are inserting into. - pub zpool_id: Uuid, - // The type of the filesystem. - pub dataset_kind: DatasetKind, + pub dataset_name: crate::storage::dataset::DatasetName, // The address on which the zone will listen for requests. pub address: SocketAddrV6, // The addresses in the global zone which should be created, if necessary @@ -333,14 +337,15 @@ pub struct DatasetEnsureBody { pub gz_address: Option, } -impl From for sled_agent_client::types::DatasetEnsureBody { - fn from(p: DatasetEnsureBody) -> Self { +impl From + for sled_agent_client::types::DatasetEnsureRequest +{ + fn from(p: DatasetEnsureRequest) -> Self { Self { - zpool_id: p.zpool_id, - dataset_kind: p.dataset_kind.into(), + id: p.id, + dataset_name: p.dataset_name.into(), address: p.address.to_string(), gz_address: p.gz_address, - id: p.id, } } } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index f102d25c3a9..ec10ce0bd78 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -7,13 +7,15 @@ use crate::bootstrap::params::StartSledAgentRequest; use crate::ledger::{Ledger, Ledgerable}; use crate::params::{ - DatasetEnsureBody, ServiceType, ServiceZoneRequest, ServiceZoneService, + DatasetEnsureRequest, ServiceType, ServiceZoneRequest, ServiceZoneService, ZoneType, }; use crate::rack_setup::config::SetupServiceConfig as Config; +use crate::storage::dataset::DatasetName; use crate::storage_manager::StorageResources; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; +use illumos_utils::zpool::ZpoolName; use internal_dns::{ServiceName, DNS_ZONE}; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, @@ -93,7 +95,7 @@ pub enum PlanError { pub struct SledRequest { /// Datasets to be created. #[serde(default, rename = "dataset")] - pub datasets: Vec, + pub datasets: Vec, /// Services to be instantiated. #[serde(default, rename = "service")] @@ -165,7 +167,7 @@ impl Plan { async fn get_u2_zpools_from_sled( log: &Logger, address: SocketAddrV6, - ) -> Result, PlanError> { + ) -> Result, PlanError> { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) @@ -179,7 +181,7 @@ impl Plan { ); let get_u2_zpools = || async { - let zpools: Vec = client + let zpools: Vec = client .zpools_get() .await .map(|response| { @@ -187,7 +189,9 @@ impl Plan { .into_inner() .into_iter() .filter_map(|zpool| match zpool.disk_type { - SledAgentTypes::DiskType::U2 => Some(zpool.id), + SledAgentTypes::DiskType::U2 => { + Some(ZpoolName::new_external(zpool.id)) + } SledAgentTypes::DiskType::M2 => None, }) .collect() @@ -304,11 +308,12 @@ impl Plan { dns_address, nic: nic.clone(), }; + let dataset_name = + DatasetName::new(u2_zpools[0].clone(), dataset_kind); - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: dataset_kind.clone(), + dataset_name: dataset_name.clone(), address: http_address, gz_address: None, }); @@ -316,12 +321,7 @@ impl Plan { id, zone_type: ZoneType::ExternalDns, addresses: vec![internal_ip], - dataset: Some(crate::storage::dataset::DatasetName::new( - illumos_utils::zpool::ZpoolName::new_external( - u2_zpools[0], - ), - dataset_kind, - )), + dataset: Some(dataset_name), gz_addresses: vec![], services: vec![ServiceZoneService { id, @@ -411,10 +411,12 @@ impl Plan { dns_builder .service_backend_zone(ServiceName::Cockroach, &zone, port) .unwrap(); - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::CockroachDb, + dataset_name: DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::CockroachDb, + ), address, gz_address: None, }); @@ -430,10 +432,12 @@ impl Plan { dns_builder .service_backend_zone(ServiceName::Clickhouse, &zone, port) .unwrap(); - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::Clickhouse, + dataset_name: DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::Clickhouse, + ), address, gz_address: None, }); @@ -442,7 +446,7 @@ impl Plan { // Each zpool gets a crucible zone. // // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove - for zpool_id in &u2_zpools { + for pool in &u2_zpools { let ip = addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CRUCIBLE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); @@ -456,10 +460,12 @@ impl Plan { ) .unwrap(); - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: *zpool_id, - dataset_kind: crate::params::DatasetKind::Crucible, + dataset_name: DatasetName::new( + pool.clone(), + crate::params::DatasetKind::Crucible, + ), address, gz_address: None, }); @@ -475,6 +481,14 @@ impl Plan { SocketAddrV6::new(dns_ip, DNS_HTTP_PORT, 0, 0); let id = Uuid::new_v4(); let zone = dns_builder.host_zone(id, dns_ip).unwrap(); + let dataset_name = DatasetName::new( + u2_zpools[0].clone(), + crate::params::DatasetKind::InternalDns { + http_address, + dns_address, + }, + ); + dns_builder .service_backend_zone( ServiceName::InternalDns, @@ -482,13 +496,9 @@ impl Plan { DNS_HTTP_PORT, ) .unwrap(); - request.datasets.push(DatasetEnsureBody { + request.datasets.push(DatasetEnsureRequest { id, - zpool_id: u2_zpools[0], - dataset_kind: crate::params::DatasetKind::InternalDns { - http_address, - dns_address, - }, + dataset_name, address: http_address, gz_address: Some(dns_subnet.gz_address().ip()), }); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index f304dd087f2..2cc5adb43d1 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -62,7 +62,7 @@ use crate::bootstrap::rss_handle::BootstrapAgentHandle; use crate::ledger::{Ledger, Ledgerable}; use crate::nexus::d2n_params; use crate::params::{ - AutonomousServiceOnlyError, DatasetEnsureBody, DatasetKind, ServiceType, + AutonomousServiceOnlyError, DatasetEnsureRequest, DatasetKind, ServiceType, ServiceZoneRequest, TimeSync, ZoneType, }; use crate::rack_setup::plan::service::{ @@ -284,7 +284,7 @@ impl ServiceInner { async fn initialize_datasets( &self, sled_address: SocketAddrV6, - datasets: &Vec, + datasets: &Vec, ) -> Result<(), SetupServiceError> { let dur = std::time::Duration::from_secs(60); @@ -299,26 +299,29 @@ impl ServiceInner { self.log.new(o!("SledAgentClient" => sled_address.to_string())), ); + let datasets = + datasets.iter().map(|d| d.clone().into()).collect::>(); + info!(self.log, "sending dataset requests..."); - for dataset in datasets { - let filesystem_put = || async { - info!(self.log, "creating new filesystem: {:?}", dataset); - client - .filesystem_put(&dataset.clone().into()) - .await - .map_err(BackoffError::transient)?; - Ok::<(), BackoffError>>(()) - }; - let log_failure = |error, _| { - warn!(self.log, "failed to create filesystem"; "error" => ?error); - }; - retry_notify( - retry_policy_internal_service_aggressive(), - filesystem_put, - log_failure, - ) - .await?; - } + let filesystem_put = || async { + info!(self.log, "creating new filesystems: {:?}", datasets); + client + .filesystems_put(&SledAgentTypes::DatasetEnsureBody { + datasets: datasets.clone(), + }) + .await + .map_err(BackoffError::transient)?; + Ok::<(), BackoffError>>(()) + }; + let log_failure = |error, _| { + warn!(self.log, "failed to create filesystem"; "error" => ?error); + }; + retry_notify( + retry_policy_internal_service_aggressive(), + filesystem_put, + log_failure, + ) + .await?; Ok(()) } @@ -386,7 +389,7 @@ impl ServiceInner { .iter() .filter_map(|dataset| { if matches!( - dataset.dataset_kind, + dataset.dataset_name.dataset(), DatasetKind::InternalDns { .. } ) { Some(dataset.clone()) @@ -416,8 +419,8 @@ impl ServiceInner { .datasets .iter() .filter_map(|dataset| { - match dataset.dataset_kind { - DatasetKind::InternalDns { http_address, .. } => Some(http_address), + match dataset.dataset_name.dataset() { + DatasetKind::InternalDns { http_address, .. } => Some(*http_address), _ => None, } }) @@ -771,11 +774,11 @@ impl ServiceInner { for dataset in service_request.datasets.iter() { datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id: dataset.zpool_id, + zpool_id: dataset.dataset_name.pool().id(), dataset_id: dataset.id, request: NexusTypes::DatasetPutRequest { address: dataset.address.to_string(), - kind: dataset.dataset_kind.clone().into(), + kind: dataset.dataset_name.dataset().clone().into(), }, }) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index f437b9d0b0a..9a0de6f0e67 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -9,7 +9,7 @@ use crate::config::Config; use crate::instance_manager::InstanceManager; use crate::nexus::{LazyNexusClient, NexusRequestQueue}; use crate::params::{ - DatasetKind, DiskStateRequested, InstanceHardware, + DatasetEnsureBody, DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, InstanceUnregisterResponse, ServiceEnsureBody, ServiceZoneService, SledRole, TimeSync, VpcFirewallRule, Zpool, @@ -547,45 +547,56 @@ impl SledAgent { } } - /// Ensures that a filesystem type exists within the zpool. - pub async fn filesystem_ensure( + /// Ensures that all filesystem type exists within the zpool. + pub async fn filesystems_ensure( &self, - dataset_id: Uuid, - zpool_id: Uuid, - dataset_kind: DatasetKind, - address: SocketAddrV6, - gz_address: Option, + requested_datasets: DatasetEnsureBody, ) -> Result<(), Error> { - // First, ensure the dataset exists - let dataset = self - .inner - .storage - .upsert_filesystem(dataset_id, zpool_id, dataset_kind.clone()) - .await?; + // TODO: + // - If these are the set of filesystems, we should also consider + // removing the ones which are not listed here. + // - It's probably worth sending a bulk request to the storage system, + // rather than requesting individual datasets. + for dataset in &requested_datasets.datasets { + let dataset_id = dataset.id; + + // First, ensure the dataset exists + self.inner + .storage + .upsert_filesystem(dataset_id, dataset.dataset_name.clone()) + .await?; + } - // NOTE: We use the "dataset_id" as the "service_id" here. - // - // Since datasets are tightly coupled with their own services - e.g., - // from the perspective of Nexus, provisioning a dataset implies the - // sled should start a service - this is ID re-use is reasonable. - // - // If Nexus ever wants sleds to provision datasets independently of - // launching services, this ID type overlap should be reconsidered. - let service_type = dataset_kind.service_type(); - let services = - vec![ServiceZoneService { id: dataset_id, details: service_type }]; - - // Next, ensure a zone exists to manage storage for that dataset - let request = crate::params::ServiceZoneRequest { - id: dataset_id, - zone_type: dataset_kind.zone_type(), - addresses: vec![*address.ip()], - dataset: Some(dataset), - gz_addresses: gz_address.into_iter().collect(), - services, - }; - self.inner.services.ensure_storage_service(request).await?; + for dataset in &requested_datasets.datasets { + let dataset_id = dataset.id; + let address = dataset.address; + let gz_address = dataset.gz_address; + // NOTE: We use the "dataset_id" as the "service_id" here. + // + // Since datasets are tightly coupled with their own services - e.g., + // from the perspective of Nexus, provisioning a dataset implies the + // sled should start a service - this is ID re-use is reasonable. + // + // If Nexus ever wants sleds to provision datasets independently of + // launching services, this ID type overlap should be reconsidered. + let service_type = dataset.dataset_name.dataset().service_type(); + let services = vec![ServiceZoneService { + id: dataset_id, + details: service_type, + }]; + + // Next, ensure a zone exists to manage storage for that dataset + let request = crate::params::ServiceZoneRequest { + id: dataset_id, + zone_type: dataset.dataset_name.dataset().zone_type(), + addresses: vec![*address.ip()], + dataset: Some(dataset.dataset_name.clone()), + gz_addresses: gz_address.into_iter().collect(), + services, + }; + self.inner.services.ensure_storage_service(request).await?; + } Ok(()) } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index cf2d5a43f6f..0c901ce0887 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -5,7 +5,6 @@ //! Management of sled-local storage. use crate::nexus::LazyNexusClient; -use crate::params::DatasetKind; use crate::storage::dataset::DatasetName; use camino::Utf8PathBuf; use futures::stream::FuturesOrdered; @@ -156,8 +155,7 @@ type NotifyFut = #[derive(Debug)] struct NewFilesystemRequest { dataset_id: Uuid, - zpool_id: Uuid, - dataset_kind: DatasetKind, + dataset_name: DatasetName, responder: oneshot::Sender>, } @@ -788,14 +786,18 @@ impl StorageWorker { ) -> Result { info!(self.log, "add_dataset: {:?}", request); let mut pools = resources.pools.lock().await; - let pool = pools.get_mut(&request.zpool_id).ok_or_else(|| { - Error::ZpoolNotFound(format!( - "{}, looked up while trying to add dataset", - request.zpool_id - )) - })?; - let dataset_name = - DatasetName::new(pool.name.clone(), request.dataset_kind.clone()); + let pool = pools + .get_mut(&request.dataset_name.pool().id()) + .ok_or_else(|| { + Error::ZpoolNotFound(format!( + "{}, looked up while trying to add dataset", + request.dataset_name.pool(), + )) + })?; + let dataset_name = DatasetName::new( + pool.name.clone(), + request.dataset_name.dataset().clone(), + ); self.ensure_dataset(request.dataset_id, &dataset_name)?; Ok(dataset_name) } @@ -1027,16 +1029,11 @@ impl StorageManager { pub async fn upsert_filesystem( &self, dataset_id: Uuid, - zpool_id: Uuid, - dataset_kind: DatasetKind, + dataset_name: DatasetName, ) -> Result { let (tx, rx) = oneshot::channel(); - let request = NewFilesystemRequest { - dataset_id, - zpool_id, - dataset_kind, - responder: tx, - }; + let request = + NewFilesystemRequest { dataset_id, dataset_name, responder: tx }; self.inner .tx