diff --git a/init/nitro/args_reader.c b/init/nitro/args_reader.c index 44bac1fea..032c0ec70 100644 --- a/init/nitro/args_reader.c +++ b/init/nitro/args_reader.c @@ -16,6 +16,9 @@ #define ENCLAVE_VSOCK_LAUNCH_ARGS_READY 0xb7 +/* + * Enclave argument IDs. + */ enum { ENCLAVE_ARG_ID_ROOTFS, ENCLAVE_ARG_ID_EXEC_PATH, @@ -30,22 +33,22 @@ enum { /* * Before reading data from the vsock, the vsock sends a 4-byte "header", * representing the size (in bytes) of the object that will be written over the - * stream. Read this size and store it within a uint32_t variable. + * stream. Read this size and store it within a uint64_t variable. */ -static int args_reader_len_read(int sock_fd, uint32_t *size) +static int args_reader_len_read(int sock_fd, uint64_t *size) { - uint8_t bytes[sizeof(uint32_t)]; + uint8_t bytes[sizeof(uint64_t)]; ssize_t ret; // Read the bytes (representing the size) from the vsock. - ret = read(sock_fd, bytes, sizeof(uint32_t)); - if (ret < sizeof(uint32_t)) { + ret = read(sock_fd, bytes, sizeof(uint64_t)); + if (ret < sizeof(uint64_t)) { perror("vsock byte buffer length read"); return -errno; } // Store the size within the "size" argument. - memcpy(size, bytes, sizeof(uint32_t)); + memcpy(size, bytes, sizeof(uint64_t)); return 0; } @@ -66,9 +69,9 @@ static void char_list_free(char **buf) /* * Read and store an object from the vsock stream. */ -static int args_reader_rcv(int sock_fd, void **buf_ptr, uint32_t *size) +static int args_reader_rcv(int sock_fd, void **buf_ptr, uint64_t *size) { - uint32_t len, idx; + uint64_t len, idx; ssize_t read_len; uint8_t *buf; int ret; @@ -122,7 +125,7 @@ static int args_reader_rcv(int sock_fd, void **buf_ptr, uint32_t *size) */ static int args_reader_char_list_build(int sock_fd, char ***buf_ptr) { - uint32_t size; + uint64_t size; char **buf; int ret, i; @@ -162,9 +165,9 @@ static int args_reader_char_list_build(int sock_fd, char ***buf_ptr) */ static int args_reader_signal(unsigned int vsock_port) { - uint8_t buf[1]; struct sockaddr_vm saddr; int ret, sock_fd; + uint8_t buf[1]; buf[0] = ENCLAVE_VSOCK_LAUNCH_ARGS_READY; errno = -EINVAL; @@ -218,6 +221,9 @@ static int args_reader_signal(unsigned int vsock_port) return -errno; } +/* + * Read each enclave argument from the host. + */ static int __args_reader_read(int sock_fd, struct enclave_args *args) { uint8_t id; @@ -261,11 +267,16 @@ static int __args_reader_read(int sock_fd, struct enclave_args *args) return 0; } + // Error occurred. Return error code. if (ret < 0) return ret; } } +/* + * Establish communication with the host's argument writer and read the enclave + * configuration (via the arguments) from it. + */ int args_reader_read(struct enclave_args *args, unsigned int vsock_port) { int ret, sock_fd; diff --git a/init/nitro/device/app_stdio_output.c b/init/nitro/device/app_stdio_output.c index f6696cbe3..30a9dffef 100644 --- a/init/nitro/device/app_stdio_output.c +++ b/init/nitro/device/app_stdio_output.c @@ -11,6 +11,10 @@ static int APP_STDIO_OUTPUT_VSOCK_FD = -1; +/* + * Redirect std{err, out} output to a vsock connected to the host. Allows the + * host to read application output. + */ int app_stdio_output(unsigned int vsock_port) { int streams[2] = {STDOUT_FILENO, STDERR_FILENO}; @@ -18,6 +22,7 @@ int app_stdio_output(unsigned int vsock_port) struct timeval timeval; int ret, sock_fd, i; + // Open a vsock and connect to the host. sock_fd = socket(AF_VSOCK, SOCK_STREAM, 0); if (sock_fd < 0) { perror("unable to create guest socket"); @@ -47,6 +52,7 @@ int app_stdio_output(unsigned int vsock_port) return -errno; } + // Refer the std{err, out} file descriptors to the connected vsock. for (i = 0; i < 2; i++) { ret = dup2(sock_fd, streams[i]); if (ret < 0) { @@ -57,11 +63,15 @@ int app_stdio_output(unsigned int vsock_port) } } + // Store the vsock's file descriptor for eventual closing. APP_STDIO_OUTPUT_VSOCK_FD = sock_fd; return 0; } +/* + * Dereference and close the application output vsock. + */ void app_stdio_close(void) { close(STDOUT_FILENO); diff --git a/init/nitro/device/device.c b/init/nitro/device/device.c index 89abda167..53e8551d7 100644 --- a/init/nitro/device/device.c +++ b/init/nitro/device/device.c @@ -8,12 +8,20 @@ #include "include/device.h" +/* + * Upon receiving SIGUSR1 from a device proxy process, set the proxy ready + * variable to indicate the proxy is finished initializing and the main process + * can continue. + */ void device_proxy_sig_handler(int sig) { if (sig == SIGUSR1) DEVICE_PROXY_READY = 1; } +/* + * Initialize a specific device proxy. + */ int device_init(enum krun_nitro_device dev, int vsock_port, int shutdown_fd) { int ret; @@ -22,6 +30,12 @@ int device_init(enum krun_nitro_device dev, int vsock_port, int shutdown_fd) DEVICE_PROXY_READY = 0; switch (dev) { + /* + * Some proxies will fork to produce separate processes. These processes + * will send a signal to the main process to indicate when they have + * finished initialization. When applicable, the main process must wait for + * this signal before continuing execution. + */ case KRUN_NE_DEV_SIGNAL_HANDLER: ret = sig_handler_init(vsock_port, shutdown_fd); while (!DEVICE_PROXY_READY) diff --git a/init/nitro/device/include/device.h b/init/nitro/device/include/device.h index 0d91d561a..4f81d2054 100644 --- a/init/nitro/device/include/device.h +++ b/init/nitro/device/include/device.h @@ -5,8 +5,16 @@ #include +/* + * Variable for device proxies to indicate to the main process that they have + * finished initialization. + */ static volatile sig_atomic_t DEVICE_PROXY_READY = 0; +/* + * Device proxy signal handler. Used by device proxy processes to notify the + * main process that they have finished initialization. + */ void device_proxy_sig_handler(int); enum krun_nitro_device { diff --git a/init/nitro/device/net_tap_afvsock.c b/init/nitro/device/net_tap_afvsock.c index 5444473ef..dd8b65f5e 100644 --- a/init/nitro/device/net_tap_afvsock.c +++ b/init/nitro/device/net_tap_afvsock.c @@ -31,6 +31,10 @@ #define TUN_DEV_MAJOR 10 #define TUN_DEV_MINOR 200 +/* + * Forward ethernet packets to/from the host vsock providing network access and + * the guest TAP device routing application network traffic. + */ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, char *tap_name) { @@ -38,9 +42,13 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, unsigned char *buf; bool event_found; struct ifreq ifr; - ssize_t nread; int ret, sock_fd; + ssize_t nread; + /* + * Fetch the TAP device's Maximum Transfer Unit (MTU) and allocate a buffer + * in that size to transfer ethernet frames to/from the host. + */ sock_fd = socket(AF_INET, SOCK_DGRAM, 0); if (sock_fd < 0) { perror("creating INET socket to get TAP MTU"); @@ -54,7 +62,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, if (ret < 0) { close(sock_fd); perror("fetch MTU of TAP device"); - exit(ret); + exit(-errno); } close(sock_fd); @@ -65,6 +73,13 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, exit(-1); } + // Forward the MTU to the host for it to allocate a corresponding buffer. + ret = write(vsock_fd, (void *)&ifr.ifr_mtu, sizeof(int)); + if (ret < sizeof(int)) { + perror("write TAP device MTU to host"); + exit(-errno); + } + pfds[0].fd = vsock_fd; pfds[0].events = POLLIN; @@ -74,10 +89,12 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, pfds[2].fd = shutdown_fd; pfds[2].events = POLLIN; + // Signal to the parent process that initialization is complete. kill(getppid(), SIGUSR1); while (poll(pfds, 3, -1) > 0) { event_found = false; + // Event on vsock. Read the frame and write it to the TAP device. if (pfds[0].revents & POLLIN) { unsigned int sz; nread = read(vsock_fd, &sz, 4); @@ -92,6 +109,7 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, event_found = true; } + // Event on the TAP device. Read the frame and write it to the vsock. if (pfds[1].revents & POLLIN) { nread = read(tun_fd, buf, ifr.ifr_mtu); if (nread > 0) { @@ -120,6 +138,9 @@ static int tap_vsock_forward(int tun_fd, int vsock_fd, int shutdown_fd, exit(0); } +/* + * Initialize the enclave TAP device to route all network traffic to the host. + */ static int tun_init(void) { struct stat statbuf; @@ -165,6 +186,9 @@ static int tun_init(void) return 0; } +/* + * Assign IP data to route enclave network traffic to the TAP device. + */ static int tap_assign_ipaddr(char *name) { struct sockaddr_in *addr; @@ -279,6 +303,9 @@ static int tap_assign_ipaddr(char *name) return 0; } +/* + * Allocate a TAP device for enclave network traffic. + */ static int tap_alloc(char *name) { struct ifreq ifr; @@ -303,6 +330,7 @@ static int tap_alloc(char *name) strcpy(name, ifr.ifr_name); + // Assign the IP data to the TAP device. ret = tap_assign_ipaddr(name); if (ret < 0) return ret; @@ -310,6 +338,9 @@ static int tap_alloc(char *name) return fd; } +/* + * Initialize a TAP device to route network traffic to/from. + */ int tap_afvsock_init(unsigned int vsock_port, int shutdown_fd) { int ret, tun_fd, vsock_fd; @@ -335,6 +366,7 @@ int tap_afvsock_init(unsigned int vsock_port, int shutdown_fd) perror("network proxy process"); exit(EXIT_FAILURE); case 0: + // Initialize the vsock used for network proxying. vsock_fd = socket(AF_VSOCK, SOCK_STREAM, 0); if (vsock_fd < 0) { perror("network vsock creation"); @@ -350,7 +382,6 @@ int tap_afvsock_init(unsigned int vsock_port, int shutdown_fd) return -errno; } - // Initialize the vsock used for network proxying. memset(&saddr, 0, sizeof(struct sockaddr_vm)); saddr.svm_family = AF_VSOCK; saddr.svm_cid = VMADDR_CID_HOST; @@ -363,6 +394,7 @@ int tap_afvsock_init(unsigned int vsock_port, int shutdown_fd) exit(EXIT_FAILURE); } + // Forward network traffic between the host and TAP device. ret = tap_vsock_forward(tun_fd, vsock_fd, shutdown_fd, tap_name); if (ret < 0) exit(EXIT_FAILURE); diff --git a/init/nitro/device/signal.c b/init/nitro/device/signal.c index 751f952b8..734176125 100644 --- a/init/nitro/device/signal.c +++ b/init/nitro/device/signal.c @@ -15,6 +15,9 @@ #include "include/device.h" +/* + * Forward signals from the host to the parent process. + */ static int sig_handler_start(int vsock_fd, int shutdown_fd) { struct pollfd pfds[2]; @@ -27,9 +30,11 @@ static int sig_handler_start(int vsock_fd, int shutdown_fd) pfds[1].fd = shutdown_fd; pfds[1].events = POLLIN; + // Signal to the parent process that initialization is complete. kill(getppid(), SIGUSR1); while (poll(pfds, 2, -1) > 0) { + // Event on vsock. Read the signal and forward it to the parent process. if (pfds[0].revents & POLLIN) { len = read(vsock_fd, (void *)&sig, sizeof(int)); if (len != sizeof(int)) { @@ -39,6 +44,7 @@ static int sig_handler_start(int vsock_fd, int shutdown_fd) kill(getppid(), sig); } + // Event on shutdown FD. Close the vsock and exit. if (pfds[1].revents & POLLIN) break; } @@ -48,6 +54,10 @@ static int sig_handler_start(int vsock_fd, int shutdown_fd) exit(0); } +/* + * Initialize a signal handling proxy to forward signals from the host to the + * parent process. + */ int sig_handler_init(unsigned int vsock_port, int shutdown_fd) { struct sockaddr_vm saddr; @@ -61,6 +71,7 @@ int sig_handler_init(unsigned int vsock_port, int shutdown_fd) perror("signal handler proxy process"); return -errno; case 0: + // Initialize the vsock used for signal forwarding. vsock_fd = socket(AF_VSOCK, SOCK_STREAM, 0); if (vsock_fd < 0) { perror("signal handler vsock creation"); @@ -90,6 +101,7 @@ int sig_handler_init(unsigned int vsock_port, int shutdown_fd) return -errno; } + // Forward signals from the host to the parent process. ret = sig_handler_start(vsock_fd, shutdown_fd); if (ret < 0) { close(vsock_fd); diff --git a/init/nitro/fs.c b/init/nitro/fs.c index 65284a327..9b9fba93a 100644 --- a/init/nitro/fs.c +++ b/init/nitro/fs.c @@ -14,6 +14,48 @@ #define SYS_FS_CGROUP_PATH "/sys/fs/cgroup/" #define CGROUP_SUB_PATH_SIZE (sizeof(SYS_FS_CGROUP_PATH) - 1 + 64) +/* + * Initialize /dev/console and redirect std{err, in, out} to it for early debug + * output. + */ +int console_init() +{ + const char *path = "/dev/console"; + FILE *file; + int ret; + + ret = mount("dev", "/dev", "devtmpfs", MS_NOSUID | MS_NOEXEC, NULL); + if (ret < 0 && errno != EBUSY) { + perror("mount /dev"); + return -errno; + } + + // Redirect stdin, stdout, and stderr to /dev/console. + file = freopen(path, "r", stdin); + if (file == NULL) { + perror("freopen stdin"); + return -errno; + } + + file = freopen(path, "w", stdout); + if (file == NULL) { + perror("freopen stdout"); + goto err; + } + + file = freopen(path, "w", stderr); + if (file == NULL) { + perror("freopen stderr"); + goto err; + } + + return 0; + +err: + fclose(file); + return -errno; +} + /* * Initialize the cgroups. */ diff --git a/init/nitro/include/args_reader.h b/init/nitro/include/args_reader.h index e037e7827..d73895d1d 100644 --- a/init/nitro/include/args_reader.h +++ b/init/nitro/include/args_reader.h @@ -6,14 +6,17 @@ #include #include +/* + * Enclave configuration arguments written from the host. + */ struct enclave_args { - void *rootfs_archive; - uint32_t rootfs_archive_size; - char *exec_path; - char **exec_argv; - char **exec_envp; - bool network_proxy; - bool debug; + void *rootfs_archive; // rootfs tar archive. + uint64_t rootfs_archive_size; // Size of rootfs tar archive. + char *exec_path; // Path of execution binary. + char **exec_argv; // Execution argument vector. + char **exec_envp; // Execution environment pointer. + bool network_proxy; // Indicate if networking is configured. + bool debug; // Indicate if running in debug mode. }; int args_reader_read(struct enclave_args *, unsigned int); diff --git a/init/nitro/include/fs.h b/init/nitro/include/fs.h index b07509985..359cea8ba 100644 --- a/init/nitro/include/fs.h +++ b/init/nitro/include/fs.h @@ -3,6 +3,7 @@ #ifndef _FS_INIT_H #define _FS_INIT_H +int console_init(); int filesystem_init(); int cgroups_init(); diff --git a/init/nitro/main.c b/init/nitro/main.c index 531c4929b..2b5c53403 100644 --- a/init/nitro/main.c +++ b/init/nitro/main.c @@ -42,55 +42,14 @@ enum { }; /* - * Initialize /dev/console and redirect std{err, in, out} to it for early debug - * output. + * Load the NSM kernel module. */ -int console_init() -{ - const char *path = "/dev/console"; - FILE *file; - int ret; - - ret = mount("dev", "/dev", "devtmpfs", MS_NOSUID | MS_NOEXEC, NULL); - if (ret < 0 && errno != EBUSY) { - perror("mount /dev"); - return -errno; - } - - // Redirect stdin, stdout, and stderr to /dev/console. - file = freopen(path, "r", stdin); - if (file == NULL) { - perror("freopen stdin"); - return -errno; - } - - file = freopen(path, "w", stdout); - if (file == NULL) { - perror("freopen stdout"); - goto err; - } - - file = freopen(path, "w", stderr); - if (file == NULL) { - perror("freopen stderr"); - goto err; - } - - return 0; - -err: - fclose(file); - return -errno; -} - -/* - * Initialize/load the NSM kernel module. - */ -int nsm_init() +static int nsm_load(void) { const char *file_name = "nsm.ko"; int fd, ret; + // Open and load the kernel module. fd = open(file_name, O_RDONLY | O_CLOEXEC); if (fd < 0 && errno == ENOENT) return 0; @@ -99,7 +58,6 @@ int nsm_init() return -errno; } - // Load the NSM module. ret = finit_module(fd, "", 0); if (ret < 0) { close(fd); @@ -127,7 +85,7 @@ int nsm_init() /* * Mount the extracted rootfs and switch the root directory to it. */ -static int rootfs_mount() +static int rootfs_mount(void) { int ret; @@ -172,7 +130,7 @@ static int rootfs_mount() /* * Launch the application specified with argv and envp. */ -pid_t launch(char **argv, char **envp) +static pid_t launch(char **argv, char **envp) { int ret; @@ -200,11 +158,9 @@ pid_t launch(char **argv, char **envp) } /* - * Measure the enclave rootfs and execution variables (path, argv, envp) with - * the NSM PCRs. + * Measure the enclave execution environment (path, argv, envp) in NSM PCR 17. * - * NSM PCR 16 contains the measurement of the root filesystem. - * NSM PCR 17 contains the measurement of the execution variables (path, argv, + * NSM PCR 17 contains the measurement of the execution environment (path, argv, * envp). */ static int nsm_pcrs_exec_path_extend(int nsm_fd, char *path, char **argv, @@ -217,14 +173,14 @@ static int nsm_pcrs_exec_path_extend(int nsm_fd, char *path, char **argv, pcr_data_size = 256; - // Measure the execution path with NSM PCR 17. + // Measure the execution path. exec_ptr = path; ret = nsm_extend_pcr(nsm_fd, NSM_PCR_EXEC_DATA, (uint8_t *)exec_ptr, strlen(exec_ptr), (void *)pcr_data, &pcr_data_size); if (ret != ERROR_CODE_SUCCESS) goto out; - // Measure each execution argument with NSM PCR 17. + // Measure each execution argument. for (i = 0; (exec_ptr = argv[i]) != NULL; ++i) { ret = nsm_extend_pcr(nsm_fd, NSM_PCR_EXEC_DATA, (uint8_t *)exec_ptr, @@ -233,7 +189,7 @@ static int nsm_pcrs_exec_path_extend(int nsm_fd, char *path, char **argv, goto out; } - // Measure each environment variable with NSM PCR 17. + // Measure each environment variable. for (i = 0; (exec_ptr = envp[i]) != NULL; ++i) { ret = nsm_extend_pcr(nsm_fd, NSM_PCR_EXEC_DATA, (uint8_t *)exec_ptr, @@ -249,7 +205,7 @@ static int nsm_pcrs_exec_path_extend(int nsm_fd, char *path, char **argv, } /* - * Lock PCRs measured by initramfs and close the NSM handle. + * Lock PCRs measured by init process and close the NSM handle. */ static int nsm_exit(int nsm_fd) { @@ -258,7 +214,7 @@ static int nsm_exit(int nsm_fd) /* * Lock PCRs 16 and 17 so they cannot be extended further. This is to ensure * there can no further data measured other than the rootfs and execution - * variables. + * environment. */ ret = nsm_lock_pcrs(nsm_fd, NSM_PCR_EXEC_DATA); if (ret != ERROR_CODE_SUCCESS) @@ -268,12 +224,15 @@ static int nsm_exit(int nsm_fd) nsm_lib_exit(nsm_fd); ret = 0; - out: return -ret; } -unsigned int cid_fetch(void) +/* + * Fetch the enclave VM's CID in order to calculate vsock port offsets for host + * communication. + */ +static unsigned int cid_fetch(void) { unsigned int cid; int ret, fd; @@ -285,23 +244,25 @@ unsigned int cid_fetch(void) } ret = ioctl(fd, IOCTL_VM_SOCKETS_GET_LOCAL_CID, &cid); + close(fd); + if (ret < 0) { - close(fd); perror("unable to fetch VM CID:"); return 0; } - close(fd); - return cid; } +/* + * Forward the application return code to the host. + */ static int app_ret_write(int code, unsigned int cid) { - int ret, sock_fd; unsigned int vsock_port; struct sockaddr_vm addr; struct timeval timeval; + int ret, sock_fd; sock_fd = socket(AF_VSOCK, SOCK_STREAM, 0); if (sock_fd < 0) { @@ -319,6 +280,10 @@ static int app_ret_write(int code, unsigned int cid) memset(&timeval, 0, sizeof(struct timeval)); timeval.tv_sec = 5; + /* + * The host needs to join all device proxy threads before reading the return + * code. Allow some time for the host to connect to the return code vsock. + */ ret = setsockopt(sock_fd, AF_VSOCK, SO_VM_SOCKETS_CONNECT_TIMEOUT, (void *)&timeval, sizeof(struct timeval)); if (ret < 0) { @@ -334,6 +299,7 @@ static int app_ret_write(int code, unsigned int cid) return -errno; } + // Write the return code. ret = write(sock_fd, (void *)&code, sizeof(int)); if (ret < sizeof(int)) { perror("unable to write application return code"); @@ -341,12 +307,27 @@ static int app_ret_write(int code, unsigned int cid) return -errno; } + /* + * Read a return code (value is irrelevant) from the host. This is to ensure + * that the host was able to read the return code from the vsock before the + * enclave exits. + */ + ret = read(sock_fd, (void *)&code, sizeof(int)); + if (ret < sizeof(int)) { + perror("unable to read close signal from application return vsock"); + close(sock_fd); + return -errno; + } + close(sock_fd); return 0; } -static int devices_init(int cid, struct enclave_args *args, int shutdown_fd) +/* + * Initialize each configured device proxy for the enclave. + */ +static int proxies_init(int cid, struct enclave_args *args, int shutdown_fd) { struct sigaction sa; int ret; @@ -357,17 +338,28 @@ static int devices_init(int cid, struct enclave_args *args, int shutdown_fd) sigaddset(&sa.sa_mask, SIGUSR1); sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL); + /* + * Each proxy will send a SIGUSR1 message to indicate when it has started. + * Enable this signal so the main process can wait and be notified when each + * proxy has initialized itself. + */ ret = sigaction(SIGUSR1, &sa, NULL); if (ret < 0) { perror("sigaction enable SIGUSR1 for device proxies"); return -errno; } + /* + * If not running in debug mode, initialize the application output proxy. + * In debug mode, the enclave uses the console (which is already connected) + * for output. + */ if (!args->debug) { ret = device_init(KRUN_NE_DEV_APP_OUTPUT_STDIO, cid + VSOCK_PORT_OFFSET_OUTPUT, shutdown_fd); } + // Initialize the network proxy if configured. if (args->network_proxy) { ret = device_init(KRUN_NE_DEV_NET_TAP_AF_VSOCK, cid + VSOCK_PORT_OFFSET_NET, shutdown_fd); @@ -375,19 +367,29 @@ static int devices_init(int cid, struct enclave_args *args, int shutdown_fd) return ret; } + /* + * The signal proxy is always initialized to allow the host to send signals + * to the enclave. + */ ret = device_init(KRUN_NE_DEV_SIGNAL_HANDLER, cid + VSOCK_PORT_OFFSET_SIGNAL_HANDLER, shutdown_fd); - if (ret < 0) - return ret; return ret; } -static int devices_exit(struct enclave_args *args, int shutdown_fd) +/* + * Close and exit each device proxy. + */ +static int proxies_exit(struct enclave_args *args, int shutdown_fd) { uint64_t sfd_val; int ret; + /* + * The shutdown value is irrelevant, it acts as a signal to all device proxy + * threads that the enclave is exiting. Upon receiving this signal, each + * device proxy will close their respective vsock and exit. + */ sfd_val = 1; ret = write(shutdown_fd, &sfd_val, sizeof(uint64_t)); if (ret < 0) { @@ -395,19 +397,28 @@ static int devices_exit(struct enclave_args *args, int shutdown_fd) ret = -errno; } + // If not in debug mode, close the application output vsock. if (!args->debug) app_stdio_close(); return ret; } +// The PID of the application process. static pid_t KRUN_NITRO_APP_PID = -1; +// Indicates if a SIGTERM signal was caught by the enclave signal handler. static bool KRUN_NITRO_SIGTERM_CAUGHT = false; +/* + * Forward a signal from the signal handler to the application process. + * Currently, only SIGTERM is supported. + */ void shutdown_sig_handler(int sig) { if ((sig == SIGTERM) && (KRUN_NITRO_APP_PID > 0)) { + // Send the signal to the application process. kill(KRUN_NITRO_APP_PID, sig); + // Indicate that the SIGTERM signal was caught. KRUN_NITRO_SIGTERM_CAUGHT = true; } } @@ -447,10 +458,11 @@ int main(int argc, char *argv[]) goto out; // Initialize the NSM kernel module. - ret = nsm_init(); + ret = nsm_load(); if (ret < 0) goto out; + // Read the enclave arguments from the host. ret = args_reader_read(&args, cid + VSOCK_PORT_OFFSET_ARGS_READER); if (ret < 0) goto out; @@ -463,7 +475,7 @@ int main(int argc, char *argv[]) goto out; } - // Measure the rootfs and execution variables in the NSM PCRs. + // Measure the rootfs and execution environment in the NSM PCRs. ret = nsm_pcrs_exec_path_extend(nsm_fd, args.exec_path, args.exec_argv, args.exec_envp); if (ret < 0) @@ -502,6 +514,10 @@ int main(int argc, char *argv[]) if (ret < 0) goto out; + /* + * Create a shutdown eventfd that can be written to in order to notify each + * device proxy to close and exit at some point. + */ shutdown_fd = eventfd(0, 0); if (shutdown_fd < 0) { perror("creating shutdown FD"); @@ -509,7 +525,8 @@ int main(int argc, char *argv[]) goto out; } - ret = devices_init(cid, &args, shutdown_fd); + // Initialize each configured device proxy. + ret = proxies_init(cid, &args, shutdown_fd); if (ret < 0) goto out; @@ -531,8 +548,16 @@ int main(int argc, char *argv[]) ret = launch(args.exec_argv, args.exec_envp); break; default: + /* + * Store the application process' PID in the event of a signal needing + * to be forwarded to it. + */ KRUN_NITRO_APP_PID = pid; + /* + * Initialize the shutdown handler for signals to be forwarded to the + * application process. + */ memset(&sa, 0, sizeof(struct sigaction)); sa.sa_handler = shutdown_sig_handler; @@ -542,19 +567,26 @@ int main(int argc, char *argv[]) return -errno; } + // Wait for the application process to exit. waitpid(pid, &ret_code, 0); + /* + * If the process was ended by a signal, the return code may represent a + * value that under normal circumstances would indicate an error. + * Therefore, if the application ended from a signal, zero-out the + * return code (indicating that the application process exited + * gracefully). + */ if (KRUN_NITRO_SIGTERM_CAUGHT) ret_code = 0; - ret = devices_exit(&args, shutdown_fd); + // Close and exit each device proxy. + ret = proxies_exit(&args, shutdown_fd); if (ret < 0) goto out; + // Write the return code to the host. ret = app_ret_write(ret_code, cid); - - // Allow the host to read the return code before exiting the enclave. - sleep(1); } out: diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index e41ff2ccc..e497c0157 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -372,7 +372,7 @@ impl TryFrom for NitroEnclave { return Err(-libc::EINVAL); }; - let net = { + let net_unixfd = { let mut list = ctx.vmr.net.list; let len = list.len(); match len { @@ -381,13 +381,12 @@ impl TryFrom for NitroEnclave { let device = list.pop_front().unwrap(); let device = device.lock().unwrap(); - match nitro::NetProxy::try_from(&*device) { - Ok(net_proxy) => Some(net_proxy), - Err(e) => { - error!("unable to configure network device: {:?}", e); - return Err(-libc::EINVAL); - } - } + let fd = match device.cfg_backend { + VirtioNetBackend::UnixstreamFd(fd) => RawFd::from(fd), + _ => return Err(libc::EINVAL), + }; + + Some(fd) } _ => { error!( @@ -412,7 +411,7 @@ impl TryFrom for NitroEnclave { exec_path, exec_args, exec_env, - net, + net_unixfd, output_path, debug: *debug, }) diff --git a/src/nitro/src/enclave/args_writer.rs b/src/nitro/src/enclave/args_writer.rs index 40533d059..25c8d7ea6 100644 --- a/src/nitro/src/enclave/args_writer.rs +++ b/src/nitro/src/enclave/args_writer.rs @@ -1,51 +1,60 @@ // SPDX-License-Identifier: Apache-2.0 -use crate::{ - enclave::{device::DeviceProxyList, VsockPortOffset}, - error::NitroError, -}; +use crate::enclave::{proxy::DeviceProxyList, VsockPortOffset}; use libc::c_int; use nitro_enclaves::launch::PollTimeout; use nix::poll::{poll, PollFd, PollFlags, PollTimeout as NixPollTimeout}; use std::{ - ffi::CString, - io::{Read, Write}, + ffi::{self, CString}, + fmt, + io::{self, Read, Write}, + num::TryFromIntError, os::fd::AsFd, str::FromStr, }; use vsock::{VsockAddr, VsockListener, VsockStream, VMADDR_CID_ANY}; +// A known byte that libkrun-nitro and the enclave initramfs will exchange to confirm that startup +// was successful and the initramfs is ready to begin reading enclave arguments. const ENCLAVE_VSOCK_LAUNCH_ARGS_READY: u8 = 0xb7; -type Result = std::result::Result; +type Result = std::result::Result; +/// The service responsible for writing the configuration (rootfs, execution environment, and +// optional device proxies) to the enclave. #[derive(Debug, Default)] pub struct EnclaveArgsWriter<'a> { + // List of enclave arguments. pub args: Vec>, } impl<'a> EnclaveArgsWriter<'a> { + /// Create a new arguments writer. An enclave's rootfs and execution path are required + /// arguments. Some device proxies are required, but others are optional. pub fn new( rootfs_archive: &'a [u8], exec_path: &str, argv_str: &str, envp_str: &str, - devices: &'a DeviceProxyList, + proxies: &'a DeviceProxyList, ) -> Self { let mut args: Vec> = Vec::new(); + // Split the argv string into a vector. let argv: Vec = argv_str .replace("\"", "") .split(' ') .map(|s| s.to_string()) .collect(); + // Split the envp string into a vector. let envp: Vec = envp_str .replace("\"", "") .split(' ') .map(|s| s.to_string()) .collect(); + // Create the initial argument list from the required arguments. args.append(&mut vec![ EnclaveArg::RootFilesystem(rootfs_archive), EnclaveArg::ExecPath(exec_path.to_string()), @@ -53,40 +62,45 @@ impl<'a> EnclaveArgsWriter<'a> { EnclaveArg::ExecEnvp(envp), ]); - for device in &devices.0 { - if let Some(arg) = device.enclave_arg() { + // Add an enclave argument for each device proxy that includes one. Any optional device + // proxy has an enclave argument. + for proxy in &proxies.0 { + if let Some(arg) = proxy.arg() { args.push(arg); } } Self { args } } + + /// Write the arguments to the enclave. pub fn write_args(&self, cid: u32, timeout: PollTimeout) -> Result<()> { + // Establish a vsock connection to the enclave's initramfs. let listener = VsockListener::bind(&VsockAddr::new( VMADDR_CID_ANY, cid + (VsockPortOffset::ArgsReader as u32), )) - .unwrap(); + .map_err(Error::VsockBind)?; + self.poll(&listener, timeout)?; - let mut stream = listener.accept().unwrap(); + let mut stream = listener.accept().map_err(Error::VsockAccept)?; if stream.1.cid() != cid { - return Err(NitroError::HeartbeatCidMismatch); + return Err(Error::VsockCidMismatch); } + // Exchange the ready signal to ensure the initramfs is ready to receive arguments. let mut buf = [0u8]; - let bytes = stream.0.read(&mut buf).map_err(NitroError::HeartbeatRead)?; + let bytes = stream.0.read(&mut buf).map_err(Error::VsockRead)?; if bytes != 1 || buf[0] != ENCLAVE_VSOCK_LAUNCH_ARGS_READY { - return Err(NitroError::EnclaveHeartbeatNotDetected); + return Err(Error::ReadySignalNotDetected); } - stream - .0 - .write_all(&buf) - .map_err(NitroError::HeartbeatWrite)?; + stream.0.write_all(&buf).map_err(Error::VsockWrite)?; + // Write each argument. for arg in &self.args { arg.write(&mut stream.0)?; } @@ -98,6 +112,7 @@ impl<'a> EnclaveArgsWriter<'a> { Ok(()) } + /// The enclave's initramfs may take some time to connect over vsock. Poll for the connection. fn poll(&self, listener: &VsockListener, timeout: PollTimeout) -> Result<()> { let mut poll_fds = [PollFd::new(listener.as_fd(), PollFlags::POLLIN)]; let result = poll( @@ -106,24 +121,34 @@ impl<'a> EnclaveArgsWriter<'a> { ); match result { - Ok(0) => Err(NitroError::PollNoSelectedEvents), - Ok(x) if x > 1 => Err(NitroError::PollMoreThanOneSelectedEvent), + Ok(0) => Err(Error::PollNoEvents), + Ok(x) if x > 1 => Err(Error::PollMoreThanOneSelectedEvent), _ => Ok(()), } } } +/// An enclave argument. #[derive(Debug)] pub enum EnclaveArg<'a> { + // Enclave rootfs. RootFilesystem(&'a [u8]), + // Enclave execution environment (path, argv, envp). ExecPath(String), ExecArgv(Vec), ExecEnvp(Vec), + // Network proxy. NetworkProxy, + // Debug logs. Debug, + + // Placeholder argument where libkrun notifies the initramfs that all arguments have been + // written and it can now close the vsock connection. Finished, } +/// Each argument has a unique code/ID for the initramfs to understand how to read its parameters. +/// This code is represented as a one-byte value. impl From<&EnclaveArg<'_>> for u8 { fn from(arg: &EnclaveArg) -> u8 { match arg { @@ -140,45 +165,126 @@ impl From<&EnclaveArg<'_>> for u8 { } impl EnclaveArg<'_> { + /// Write an argument to the enclave. fn write(&self, vsock: &mut VsockStream) -> Result<()> { let id: [u8; 1] = [self.into()]; - vsock.write_all(&id).unwrap(); + // Write the argument's ID for the enclave to understand how to read the argument's + // parameters. + vsock.write_all(&id).map_err(Error::VsockWrite)?; match self { + // rootfs argument writes the rootfs tar archive. Self::RootFilesystem(buf) => { - let len: u32 = buf.len().try_into().unwrap(); + let len: u64 = buf.len().try_into().map_err(Error::VsockBufferLenConvert)?; - vsock.write_all(&len.to_ne_bytes()).unwrap(); + vsock + .write_all(&len.to_ne_bytes()) + .map_err(Error::VsockWrite)?; - vsock.write_all(buf).unwrap(); + vsock.write_all(buf).map_err(Error::VsockWrite)?; } + // Execution argv and envp arguments write their respective contents as string arrays. Self::ExecArgv(vec) | Self::ExecEnvp(vec) => { - let len: u32 = vec.len().try_into().unwrap(); + // Write the amount of strings the enclave will read. + let len: u64 = vec.len().try_into().map_err(Error::VsockBufferLenConvert)?; - vsock.write_all(&len.to_ne_bytes()).unwrap(); + vsock + .write_all(&len.to_ne_bytes()) + .map_err(Error::VsockWrite)?; + // For each string, write the length (i.e. the number of bytes the enclave should + // read) and the string itself. for string in vec { - let bytes = Vec::from(CString::from_str(string).unwrap().as_bytes_with_nul()); - - let len: u32 = bytes.len().try_into().unwrap(); - - vsock.write_all(&len.to_ne_bytes()).unwrap(); - - vsock.write_all(&bytes).unwrap(); + let bytes = Vec::from( + CString::from_str(string) + .map_err(Error::CStringConvert)? + .as_bytes_with_nul(), + ); + + let len: u64 = bytes + .len() + .try_into() + .map_err(Error::VsockBufferLenConvert)?; + + vsock + .write_all(&len.to_ne_bytes()) + .map_err(Error::VsockWrite)?; + + vsock.write_all(&bytes).map_err(Error::VsockWrite)?; } } + // Execution path argument writes the path as a string. Self::ExecPath(buf) => { - let bytes = Vec::from(CString::from_str(buf).unwrap().as_bytes_with_nul()); - let len: u32 = bytes.len().try_into().unwrap(); - - vsock.write_all(&len.to_ne_bytes()).unwrap(); - - vsock.write_all(&bytes).unwrap(); + let bytes = Vec::from( + CString::from_str(buf) + .map_err(Error::CStringConvert)? + .as_bytes_with_nul(), + ); + let len: u64 = bytes + .len() + .try_into() + .map_err(Error::VsockBufferLenConvert)?; + + vsock + .write_all(&len.to_ne_bytes()) + .map_err(Error::VsockWrite)?; + + vsock.write_all(&bytes).map_err(Error::VsockWrite)?; } + + // Other arguments write solely their ID. The enclave will initialize them. _ => (), } Ok(()) } } + +/// Error in the process of writing the enclave's arguments. +#[derive(Debug)] +pub enum Error { + // Convert a string to a CString. + CStringConvert(ffi::NulError), + // No events detected on vsock. + PollNoEvents, + // More than one event found on vsock. + PollMoreThanOneSelectedEvent, + // Ready signal not detected. + ReadySignalNotDetected, + // Accepting the vsock connection. + VsockAccept(io::Error), + // Binding to the vsock. + VsockBind(io::Error), + // Converting a byte buffer's length to a u64. + VsockBufferLenConvert(TryFromIntError), + // CID mismatch with communicating enclave. + VsockCidMismatch, + // Reading from the vsock. + VsockRead(io::Error), + // Writing to the vsock. + VsockWrite(io::Error), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let msg = match self { + Self::CStringConvert(e) => format!("unable to convert string to CString: {e}"), + Self::PollNoEvents => "no events on vsock detected".to_string(), + Self::PollMoreThanOneSelectedEvent => { + "more than one event on vsock detected".to_string() + } + Self::ReadySignalNotDetected => "ready signal not detected".to_string(), + Self::VsockAccept(e) => format!("unable to accept vsock connection: {e}"), + Self::VsockBind(e) => format!("unable to bind to vsock: {e}"), + Self::VsockBufferLenConvert(e) => { + format!("unable to convert vsock buffer size to u64: {e}") + } + Self::VsockCidMismatch => "CID mismatch on vsock".to_string(), + Self::VsockRead(e) => format!("unable to read from vsock: {e}"), + Self::VsockWrite(e) => format!("unable to write to vsock: {e}"), + }; + + write!(f, "{}", msg) + } +} diff --git a/src/nitro/src/enclave/device/devices/net.rs b/src/nitro/src/enclave/device/devices/net.rs deleted file mode 100644 index cef3ac512..000000000 --- a/src/nitro/src/enclave/device/devices/net.rs +++ /dev/null @@ -1,133 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -use crate::enclave::{ - args_writer::EnclaveArg, - device::{DeviceProxy, Error, Result}, - VsockPortOffset, -}; -use devices::virtio::{net::device::VirtioNetBackend, Net}; -use std::{ - io::{ErrorKind, Read, Write}, - os::{ - fd::{FromRawFd, OwnedFd, RawFd}, - unix::net::UnixStream, - }, - sync::mpsc::{self, RecvTimeoutError}, - thread::{self, JoinHandle}, - time::Duration, -}; -use vsock::{VsockAddr, VsockListener, VMADDR_CID_ANY}; - -#[derive(Clone)] -pub struct NetProxy { - fd: RawFd, -} - -impl TryFrom<&Net> for NetProxy { - type Error = Error; - - fn try_from(net: &Net) -> Result { - let fd = match net.cfg_backend { - VirtioNetBackend::UnixstreamFd(fd) => RawFd::from(fd), - _ => return Err(Error::InvalidNetInterface), - }; - - Ok(Self { fd }) - } -} - -impl DeviceProxy for NetProxy { - fn vsock_port_offset(&self) -> VsockPortOffset { - VsockPortOffset::Net - } - - #[allow(unreachable_code)] - fn _start(&mut self, vsock_port: u32) -> Result<()> { - let vsock_listener = VsockListener::bind(&VsockAddr::new(VMADDR_CID_ANY, vsock_port)) - .map_err(Error::VsockBind)?; - - let mut vsock_stream = vsock_listener.accept().map_err(Error::VsockAccept)?; - - let mut vsock_stream_clone = vsock_stream.0.try_clone().map_err(Error::VsockClone)?; - - let unix_stream = unsafe { UnixStream::from(OwnedFd::from_raw_fd(self.fd)) }; - let mut unix_stream_clone_write = unix_stream.try_clone().map_err(Error::UnixClone)?; - - let (tx, rx) = mpsc::channel::<()>(); - - // vsock - let vsock_thread: JoinHandle> = thread::spawn(move || { - let mut vsock_buf = [0u8; 1500]; - loop { - let size = vsock_stream_clone - .read(&mut vsock_buf) - .map_err(Error::VsockRead)?; - if size > 0 { - unix_stream_clone_write - .write_all(&vsock_buf[..size]) - .map_err(Error::UnixWrite)?; - } else { - tx.send(()).unwrap(); - break; - } - } - - Ok(()) - }); - - let mut unix_stream_clone_read = unix_stream.try_clone().unwrap(); - unix_stream_clone_read - .set_read_timeout(Some(Duration::from_millis(250))) - .unwrap(); - // Unix - let unix_thread: JoinHandle> = thread::spawn(move || { - let mut unix_buf = [0u8; 1500]; - loop { - match unix_stream_clone_read.read(&mut unix_buf) { - Ok(size) => { - if size > 0 { - if vsock_stream.0.write_all(&unix_buf[..size]).is_err() { - continue; - } - } else { - break; - } - } - Err(ref e) - if e.kind() == ErrorKind::TimedOut || e.kind() == ErrorKind::WouldBlock => - { - match rx.recv_timeout(Duration::from_micros(500)) { - Ok(_) => break, - Err(e) => { - if e == RecvTimeoutError::Timeout { - continue; - } else { - panic!(); - } - } - } - } - Err(_) => panic!(), - } - } - - Ok(()) - }); - - if let Ok(Err(err)) = vsock_thread.join() { - log::error!("error with network vsock stream listener thread: {:?}", err); - return Err(err); - } - - if let Ok(Err(err)) = unix_thread.join() { - log::error!("error with network UNIX stream listener thread: {:?}", err); - return Err(err); - } - - Ok(()) - } - - fn enclave_arg(&self) -> Option> { - Some(EnclaveArg::NetworkProxy) - } -} diff --git a/src/nitro/src/enclave/device/devices/output.rs b/src/nitro/src/enclave/device/devices/output.rs deleted file mode 100644 index 7b92ae726..000000000 --- a/src/nitro/src/enclave/device/devices/output.rs +++ /dev/null @@ -1,81 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -use crate::enclave::{ - args_writer::EnclaveArg, - device::{DeviceProxy, Error}, - VsockPortOffset, -}; -use std::{ - fs::File, - fs::OpenOptions, - io::{Read, Write}, - path::PathBuf, -}; -use vsock::{VsockAddr, VsockListener, VsockStream, VMADDR_CID_ANY, VMADDR_CID_HYPERVISOR}; - -type Result = std::result::Result; - -pub struct OutputProxy { - file: File, - debug: bool, -} - -impl OutputProxy { - pub fn new(path: &PathBuf, debug: bool) -> Result { - let file = OpenOptions::new() - .read(false) - .write(true) - .open(path) - .map_err(Error::FileOpen)?; - - Ok(Self { file, debug }) - } -} - -impl DeviceProxy for OutputProxy { - fn enclave_arg(&self) -> Option> { - match self.debug { - true => Some(EnclaveArg::Debug), - false => None, - } - } - - fn vsock_port_offset(&self) -> VsockPortOffset { - match self.debug { - true => VsockPortOffset::Console, - false => VsockPortOffset::AppOutput, - } - } - - fn _start(&mut self, vsock_port: u32) -> Result<()> { - let mut vsock_stream = if self.debug { - VsockStream::connect(&VsockAddr::new(VMADDR_CID_HYPERVISOR, vsock_port)) - .map_err(Error::VsockConnect)? - } else { - let vsock_listener = VsockListener::bind(&VsockAddr::new(VMADDR_CID_ANY, vsock_port)) - .map_err(Error::VsockBind)?; - - let (vsock_stream, _vsock_addr) = - vsock_listener.accept().map_err(Error::VsockAccept)?; - - vsock_stream - }; - - let mut vsock_buf = [0u8; 1500]; - loop { - let size = vsock_stream - .read(&mut vsock_buf) - .map_err(Error::VsockRead)?; - - if size > 0 { - self.file - .write_all(&vsock_buf[..size]) - .map_err(Error::FileWrite)?; - } else { - break; - } - } - - Ok(()) - } -} diff --git a/src/nitro/src/enclave/device/devices/signal_handler.rs b/src/nitro/src/enclave/device/devices/signal_handler.rs deleted file mode 100644 index cf89d6ee8..000000000 --- a/src/nitro/src/enclave/device/devices/signal_handler.rs +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -use crate::enclave::{ - device::{EnclaveArg, Error, Result}, - DeviceProxy, VsockPortOffset, -}; -use signal_hook::consts::SIGTERM; -use std::{ - io::{Read, Write}, - sync::{ - atomic::{AtomicBool, Ordering}, - mpsc::{self, RecvTimeoutError}, - Arc, - }, - thread::{self, JoinHandle}, - time::Duration, -}; -use vsock::{VsockAddr, VsockListener, VMADDR_CID_ANY}; - -#[derive(Default)] -pub struct SignalHandler; - -impl DeviceProxy for SignalHandler { - fn enclave_arg(&self) -> Option> { - None - } - - fn vsock_port_offset(&self) -> VsockPortOffset { - VsockPortOffset::SignalHandler - } - - fn _start(&mut self, vsock_port: u32) -> Result<()> { - let term = Arc::new(AtomicBool::new(false)); - signal_hook::flag::register(SIGTERM, Arc::clone(&term)).unwrap(); - - let vsock_listener = VsockListener::bind(&VsockAddr::new(VMADDR_CID_ANY, vsock_port)) - .map_err(Error::VsockBind)?; - - let (mut vsock_stream, _vsock_addr) = - vsock_listener.accept().map_err(Error::VsockAccept)?; - - let (tx, rx) = mpsc::channel::<()>(); - let mut vsock_stream_clone = vsock_stream.try_clone().map_err(Error::VsockClone)?; - - let signal_handler: JoinHandle> = thread::spawn(move || { - while !term.load(Ordering::Relaxed) { - match rx.recv_timeout(Duration::from_micros(500)) { - Ok(_) => return Ok(()), - Err(e) => { - if e == RecvTimeoutError::Timeout { - continue; - } - } - } - } - - let sig = libc::SIGTERM; - vsock_stream - .write(&sig.to_ne_bytes()) - .map_err(Error::VsockWrite)?; - - Ok(()) - }); - - let shutdown_listener: JoinHandle> = thread::spawn(move || { - let mut vsock_buf = [0u8; 1]; - let _ = vsock_stream_clone - .read(&mut vsock_buf) - .map_err(Error::VsockRead)?; - let _ = tx.send(()); - - Ok(()) - }); - - let _ = signal_handler.join().unwrap(); - let _ = shutdown_listener.join().unwrap(); - - Ok(()) - } -} diff --git a/src/nitro/src/enclave/device/mod.rs b/src/nitro/src/enclave/device/mod.rs deleted file mode 100644 index bdcfff4ae..000000000 --- a/src/nitro/src/enclave/device/mod.rs +++ /dev/null @@ -1,96 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 - -mod devices; - -pub use devices::*; - -use crate::enclave::{args_writer::EnclaveArg, VsockPortOffset}; -use std::{ - fmt, io, - thread::{self, JoinHandle}, -}; - -type Result = std::result::Result; - -pub trait DeviceProxy { - fn enclave_arg(&self) -> Option>; - fn vsock_port_offset(&self) -> VsockPortOffset; - fn start(&mut self, cid: u32) -> Result<()> { - let port = cid + (self.vsock_port_offset() as u32); - - self._start(port) - } - fn _start(&mut self, vsock_port: u32) -> Result<()>; -} - -pub struct DeviceProxyList(pub Vec>); - -impl DeviceProxyList { - pub fn start(self, cid: u32) { - let mut handles: Vec>> = Vec::new(); - - for mut device in self.0 { - let handle: JoinHandle> = thread::spawn(move || { - device.start(cid)?; - - Ok(()) - }); - - handles.push(handle); - } - - for handle in handles.into_iter() { - let res = handle.join().unwrap(); - if let Err(err) = res { - log::error!("error running enclave device proxy: {:?}", err); - } - } - } -} - -#[derive(Debug)] -pub enum Error { - FileOpen(io::Error), - FileWrite(io::Error), - InvalidNetInterface, - UnixClone(io::Error), - UnixWrite(io::Error), - VsockAccept(io::Error), - VsockBind(io::Error), - VsockClone(io::Error), - VsockConnect(io::Error), - VsockRead(io::Error), - VsockWrite(io::Error), -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let msg = match self { - Self::FileOpen(cause) => format!("unable to open file: {:?}", cause), - Self::FileWrite(cause) => { - format!("unable to write buffer to output file: {:?}", cause) - } - Self::InvalidNetInterface => { - "invalid network proxy interface, must supply unix stream file descriptor" - .to_string() - } - Self::UnixClone(cause) => format!("unable to clone unix stream: {:?}", cause), - Self::UnixWrite(cause) => format!("unable to write to unix stream: {:?}", cause), - Self::VsockAccept(cause) => format!( - "unable to accept connection from enclave vsock: {:?}", - cause - ), - Self::VsockBind(cause) => { - format!("unable to bind to enclave vsock: {:?}", cause) - } - Self::VsockConnect(cause) => format!("uanble to connect to enclave vsock: {:?}", cause), - Self::VsockClone(cause) => format!("unable to clone enclave vsock: {:?}", cause), - Self::VsockRead(cause) => { - format!("unable to read from enclave vsock: {:?}", cause) - } - Self::VsockWrite(cause) => format!("unable to write to enclave vsock: {:?}", cause), - }; - - write!(f, "{}", msg) - } -} diff --git a/src/nitro/src/enclave/mod.rs b/src/nitro/src/enclave/mod.rs index a43f00db9..9dce60ffa 100644 --- a/src/nitro/src/enclave/mod.rs +++ b/src/nitro/src/enclave/mod.rs @@ -1,37 +1,40 @@ // SPDX-License-Identifier: Apache-2.0 -pub mod args_writer; -pub mod device; +pub(crate) mod args_writer; +pub(crate) mod proxy; +use super::error::{return_code, start, Error}; use args_writer::EnclaveArgsWriter; -use device::{ - net::NetProxy, output::OutputProxy, signal_handler::SignalHandler, DeviceProxy, DeviceProxyList, -}; use nitro_enclaves::{ launch::{ImageType, Launcher, MemoryInfo, PollTimeout, StartFlags}, Device, }; +use proxy::{ + net::NetProxy, output::OutputProxy, signal_handler::SignalHandler, DeviceProxy, DeviceProxyList, +}; use std::{ env, ffi::OsStr, fs, - io::{self, Read}, + io::{self, Read, Write}, + os::fd::RawFd, path::PathBuf, }; -use vsock::{VsockAddr, VsockListener, VMADDR_CID_ANY}; - -use super::error::NitroError; use tar::HeaderMode; +use vsock::{VsockAddr, VsockListener, VMADDR_CID_ANY}; -type Result = std::result::Result; +const KRUN_NITRO_EIF_PATH_ENV_VAR: &str = "KRUN_NITRO_EIF_PATH"; +const KRUN_NITRO_EIF_PATH_DEFAULT: &str = "/usr/share/krun-nitro/krun-nitro.eif"; +/// Directories within the configured rootfs that will be ignored when writing to the enclave. The +/// enclave is responsible for initializing these directories within the guest operating system. const ROOTFS_DIR_DENYLIST: [&str; 6] = [ - "proc", // /proc. - "run", // /run. - "tmp", // /tmp. - "dev", // /dev. - "sys", // /sys. - "usr/share/krun-nitro", + "proc", // /proc. + "run", // /run. + "tmp", // /tmp. + "dev", // /dev. + "sys", // /sys. + "usr/share/krun-nitro", // Cached EIF file (and possibly other metadata). ]; /// Nitro Enclave data. @@ -49,7 +52,7 @@ pub struct NitroEnclave { /// Execution environment. pub exec_env: String, /// Network proxy. - pub net: Option, + pub net_unixfd: Option, /// Path to redirect enclave output to. pub output_path: PathBuf, // Output kernel and initramfs debug logs from enclave. @@ -57,94 +60,129 @@ pub struct NitroEnclave { } impl NitroEnclave { - /// Run the enclave. - pub fn run(mut self) -> Result<()> { - let rootfs_archive = self.rootfs_archive()?; - - let devices = self.devices()?; + /// Run an application within a nitro enclave. + pub fn run(mut self) -> Result<(), Error> { + // Collect all launch parameters (rootfs, execution arguments, device proxies) and establish + // an enclave argument writer to write this data to the nitro enclave when started. + let rootfs_archive = self.rootfs_archive().map_err(Error::RootFsArchive)?; + let proxies = self.proxies().map_err(Error::DeviceProxy)?; let writer = EnclaveArgsWriter::new( &rootfs_archive, &self.exec_path, &self.exec_args, &self.exec_env, - &devices, + &proxies, ); - let (cid, timeout) = self.start()?; + // Disable signals to launch enclave VM. + self.signals(false); + + // Launch the enclave and write the configured launch parameters to the initramfs. + let (cid, timeout) = self.start().map_err(Error::Start)?; - writer.write_args(cid, timeout)?; + writer.write_args(cid, timeout).map_err(Error::ArgsWrite)?; + // Establish the vsock listener for the application's return code upon termination. let retcode_listener = VsockListener::bind(&VsockAddr::new( VMADDR_CID_ANY, cid + (VsockPortOffset::ReturnCode as u32), )) - .unwrap(); + .map_err(return_code::Error::VsockBind) + .map_err(Error::ReturnCodeListener)?; - devices.start(cid); + // Enable signals now that enclave VM is started. + self.signals(true); - /* - * In debug mode, the console device doesn't shut down until the enclave - * itself exits. Thus, libkrun will be unable to retrieve the shutdown - * code from the enclave. - */ + // Run the device proxies. Each proxy is run within its own thread that can only be + // terminated by the enclave (by closing the vsock connection). + proxies.run(cid).map_err(Error::DeviceProxy)?; + + // In debug mode, the console device doesn't shut down until the enclave itself exits. Thus, + // libkrun will be unable to retrieve the shutdown code from the enclave. if !self.debug { - let ret = self.shutdown_ret(retcode_listener)?; + // Retrieve the application return code from the enclave. + let ret = self + .shutdown_ret(retcode_listener) + .map_err(Error::ReturnCodeListener)?; + + // A non-zero return code indicates an error. Wrap this code within an Error object. if ret != 0 { - return Err(NitroError::AppReturn(ret)); + return Err(Error::AppReturn(ret)); } } Ok(()) } - fn start(&mut self) -> Result<(u32, PollTimeout)> { - let eif = eif()?; + /// Start a nitro enclave. + fn start(&mut self) -> Result<(u32, PollTimeout), start::Error> { + // Read the cached EIF file required to run the enclave. + let eif = { + let path = env::var(KRUN_NITRO_EIF_PATH_ENV_VAR) + .unwrap_or(KRUN_NITRO_EIF_PATH_DEFAULT.to_string()); + + fs::read(path).map_err(start::Error::EifRead) + }?; + // Calculate the poll timeout (based on the size of the EIF file and amount of RAM allocated + // to the enclave) for the enclave to indicate that has successfully started. let timeout = PollTimeout::try_from((eif.as_slice(), self.mem_size_mib << 20)) - .map_err(NitroError::PollTimeoutCalculate)?; + .map_err(start::Error::PollTimeoutCalculate)?; - let device = Device::open().map_err(NitroError::DeviceOpen)?; + // Launch an enclave VM with the configured number of vCPUs and amount of RAM. + let device = Device::open().map_err(start::Error::DeviceOpen)?; - let mut launcher = Launcher::new(&device).map_err(NitroError::VmCreate)?; + let mut launcher = Launcher::new(&device).map_err(start::Error::VmCreate)?; let mem = MemoryInfo::new(ImageType::Eif(&eif), self.mem_size_mib); - launcher.set_memory(mem).map_err(NitroError::VmMemorySet)?; + launcher + .set_memory(mem) + .map_err(start::Error::VmMemorySet)?; for _ in 0..self.vcpus { - launcher.add_vcpu(None).map_err(NitroError::VcpuAdd)?; + launcher.add_vcpu(None).map_err(start::Error::VcpuAdd)?; } + // Indicate to the enclave to start in debug mode if configured. let mut start_flags = StartFlags::empty(); if self.debug { start_flags |= StartFlags::DEBUG; } + // Start the enclave. let cid = launcher .start(start_flags, None) - .map_err(NitroError::VmStart)?; + .map_err(start::Error::VmStart)?; - Ok((cid.try_into().unwrap(), timeout)) // Safe to unwrap. + // Safe to unwrap. + Ok((cid.try_into().unwrap(), timeout)) } - fn devices(&self) -> Result { + /// Initialize and collect all device proxies used for the enclave. + fn proxies(&self) -> Result { let mut proxies: Vec> = vec![]; - let output = - OutputProxy::new(&self.output_path, self.debug).map_err(NitroError::DeviceError)?; + // All enclaves will include a proxy for debug/application output. + let output = OutputProxy::new(&self.output_path, self.debug)?; proxies.push(Box::new(output)); - if let Some(net) = self.net.clone() { + if let Some(fd) = self.net_unixfd { + let net = NetProxy::try_from(fd)?; proxies.push(Box::new(net)); } - proxies.push(Box::new(SignalHandler)); + // All enclaves will include a proxy for signal handling (e.g. forwarding SIGTERM signals to + // application running within the enclave). + proxies.push(Box::new(SignalHandler::new()?)); Ok(DeviceProxyList(proxies)) } - fn rootfs_archive(&self) -> Result> { + /// Produce a tarball of the enclave's rootfs (to be written to and extracted by the enclave + // initramfs). + fn rootfs_archive(&self) -> Result, io::Error> { let mut builder = tar::Builder::new(Vec::new()); builder.mode(HeaderMode::Deterministic); @@ -156,54 +194,76 @@ impl NitroEnclave { .file_name() .unwrap_or(OsStr::new("/")) .to_str() - .ok_or(NitroError::RootFsArchive(io::Error::other( - "unable to convert rootfs directory name to str", + .ok_or(io::Error::other(format!( + "unable to convert rootfs directory name (\"{:?}\") to str", + pathbuf_copy )))?; - for entry in fs::read_dir(pathbuf).map_err(NitroError::RootFsArchive)? { - let entry = entry.map_err(NitroError::RootFsArchive)?; - let filetype = entry.file_type().map_err(NitroError::RootFsArchive)?; - let filename = entry.file_name().into_string().map_err(|_| { - NitroError::RootFsArchive(io::Error::other( - "unable to convert file name to String object", + // Traverse each directory and file within the root directory tree. If a directory is not + // found within the denylist, add it to the archive. + for entry in fs::read_dir(pathbuf)? { + let entry = entry?; + let filetype = entry.file_type()?; + let filename = entry.file_name().into_string().map_err(|e| { + io::Error::other(format!( + "unable to convert file name {:?} to String object", + e )) })?; if !ROOTFS_DIR_DENYLIST.contains(&filename.as_str()) && filename != rootfs_dirname { if filetype.is_dir() { - builder - .append_dir_all(format!("rootfs/{}", filename), entry.path()) - .map_err(NitroError::RootFsArchive)?; + builder.append_dir_all(format!("rootfs/{}", filename), entry.path())? } else if filetype.is_file() { - builder - .append_path_with_name(entry.path(), format!("rootfs/{}", filename)) - .map_err(NitroError::RootFsArchive)?; + builder.append_path_with_name(entry.path(), format!("rootfs/{}", filename))? } } } - builder.into_inner().map_err(NitroError::RootFsArchive) + builder.into_inner() } - fn shutdown_ret(&self, vsock_listener: VsockListener) -> Result { - let (mut vsock_stream, _vsock_addr) = vsock_listener.accept().unwrap(); + // Receive a 4-byte (representing an i32) return code from the enclave via vsock. This + // represents the return code of the application that ran within the enclave. + fn shutdown_ret(&self, vsock_listener: VsockListener) -> Result { + let (mut vsock_stream, _vsock_addr) = vsock_listener + .accept() + .map_err(return_code::Error::VsockAccept)?; let mut buf = [0u8; 4]; - let _ = vsock_stream.read(&mut buf).unwrap(); + let _ = vsock_stream + .read(&mut buf) + .map_err(return_code::Error::VsockRead)?; + + let close_signal: u32 = 0; + vsock_stream + .write_all(&close_signal.to_ne_bytes()) + .map_err(return_code::Error::VsockWrite)?; Ok(i32::from_ne_bytes(buf)) } -} -fn eif() -> Result> { - let path = env::var("KRUN_NITRO_EIF_PATH") - .unwrap_or("/usr/share/krun-nitro/krun-nitro.eif".to_string()); - - let bytes = fs::read(path).map_err(NitroError::EifRead)?; - - Ok(bytes) + // Enable or disable all signals. + fn signals(&self, enable: bool) { + let sig = if enable { + libc::SIG_UNBLOCK + } else { + libc::SIG_BLOCK + }; + + let mut set: libc::sigset_t = unsafe { std::mem::zeroed() }; + unsafe { + libc::sigfillset(&mut set); + libc::pthread_sigmask(sig, &set, std::ptr::null_mut()); + } + } } +/// Each service provided to an enclave is done so via vsock. Each service has a designated port +/// offset (relative to the enclave VM's CID) to connect to for service. The port number for each of +/// an enclave's services can be calculated as: +/// +/// vsock port = (Enclave VM CID + vsock port offset) #[repr(u32)] pub enum VsockPortOffset { ArgsReader = 1, diff --git a/src/nitro/src/enclave/proxy/mod.rs b/src/nitro/src/enclave/proxy/mod.rs new file mode 100644 index 000000000..eace1d2d7 --- /dev/null +++ b/src/nitro/src/enclave/proxy/mod.rs @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: Apache-2.0 + +mod proxies; + +pub use proxies::*; + +use crate::enclave::args_writer::EnclaveArg; +use std::{ + fmt, io, + num::TryFromIntError, + sync::mpsc::{self, RecvTimeoutError}, + thread::{self, JoinHandle}, + time::Duration, +}; +use vsock::*; + +type Result = std::result::Result; + +/// Device proxy trait to describe shared behavior between all proxies. +pub trait DeviceProxy: Send { + /// Enclave argument of the proxy. + fn arg(&self) -> Option>; + /// Clone a proxy's contents. + fn clone(&self) -> Result>>; + /// Receive data from the proxy's vsock. Perhaps perform some other functions. + fn rcv(&mut self, vsock: &mut VsockStream) -> Result; + /// Write data to the enclave's vsock. Perhaps perform some other functions. + fn send(&mut self, vsock: &mut VsockStream) -> Result; + /// Establish the proxy's respective vsock connection. + fn vsock(&mut self, cid: u32) -> Result; +} + +/// List of all configured device proxies. +pub struct DeviceProxyList(pub Vec>); + +impl DeviceProxyList { + /// Run each proxy's send and receive processes within their own dedicated threads. + pub fn run(self, cid: u32) -> Result<()> { + // This function will not return until all device proxies' dedicated threads have returned. + // Under normal conditions, this will only happen when the enclave completes execution and + // gracefully closes all proxy vsock connections. Store each thread's JoinHandle in a list + // to keep track of completed proxy threads. + let mut handles: Vec>> = Vec::new(); + + for mut proxy in self.0 { + // Get a proxy's vsock connection for the its receiver thread. + let mut vsock_rcv = proxy.vsock(cid)?; + + let handle: JoinHandle> = thread::spawn(move || { + // Clone the proxy and vsock connection data for the proxy's sender thread. + let clone = proxy.clone()?; + let mut vsock_send = vsock_rcv.try_clone().map_err(Error::VsockClone)?; + + // Establish a message passing channel for the receiver thread to notify the send + // thread that the enclave has closed the connection. + let (tx, rx) = mpsc::channel::<()>(); + + // Receiver thread. Receive data from the vsock and perform some proxy-dependent + // action with the data. + let rcv: JoinHandle> = thread::spawn(move || loop { + // Proxy rcv method returns the number of bytes read from the vsock. + match proxy.rcv(&mut vsock_rcv) { + // Zero bytes read indicates the enclave has closed the vsock connection. + // Notify the sender thread that the vsock was closed. + Ok(0) => { + let _ = tx.send(()); + return Ok(()); + } + // Bytes were read, continue the receive process. + Ok(_) => continue, + // An error occured, exit the receiver thread and notify the sender thread to + // also exit. + Err(e) => { + let _ = tx.send(()); + return Err(e); + } + } + }); + + // Sender thread. Perform some proxy-dependent action and (if applicable) write data + // to the vsock. + let send: JoinHandle> = thread::spawn(move || { + // Some proxies (like output/debug) do not send data to the enclave. If there is + // nothing to be done, exit the thread. + if let Some(mut sender) = clone { + loop { + // Proxy send method returns the number of bytes written to the vsock. + let size = sender.send(&mut vsock_send)?; + + // No data was written to the vsock. This may indicate that the timeout + // has occurred without data being retrieved from the device's other + // party. This may indicate that the proxy is complete. Check for this + // by reading if a message was sent by the receiver thread. + if size == 0 { + match rx.recv_timeout(Duration::from_micros(500)) { + // Message was sent indicating the enclave has closed the + // connection, exit from this thread. + Ok(_) => break, + Err(e) => { + // The receiver thread has not sent a shutdown signal. + // Continue execution. + if e == RecvTimeoutError::Timeout { + continue; + } else { + // Error in fetching message from receiver thread. + return Err(Error::ShutdownSignalReceive(e))?; + } + } + } + } + } + } + + Ok(()) + }); + + if let Ok(Err(e)) = rcv.join() { + log::error!("error in device proxy receiver thread: {e}"); + } + + if let Ok(Err(e)) = send.join() { + log::error!("error in device proxy sender thread: {e}"); + } + + Ok(()) + }); + + // Add the proxy's control thread JoinHandle into the list. + handles.push(handle); + } + + // Traverse over each device proxy thread and ensure it closes and exits correctly. Do not + // return until all do. + for handle in handles.into_iter() { + let res = handle.join(); + if let Ok(Err(err)) = res { + log::error!("error running enclave device proxy: {:?}", err); + } + } + + Ok(()) + } +} + +/// Error while running a device proxy. +#[derive(Debug)] +pub enum Error { + // Opening a file (for proxies also communicating with files/sockets). + FileOpen(io::Error), + // Writing to a file. + FileWrite(io::Error), + // Receiving a shutdown signal from a proxy's receiver thread. + ShutdownSignalReceive(mpsc::RecvTimeoutError), + // Registering a signal for the signal handler. + SignalRegister(io::Error), + // Cloning a unix socket. + UnixClone(io::Error), + // Reading from a unix socket. + UnixRead(io::Error), + // Setting the read timeout for a unix socket. + UnixReadTimeoutSet(io::Error), + // Writing to a unix socket. + UnixWrite(io::Error), + // Accepting the vsock connection. + VsockAccept(io::Error), + // Binding to the vsock. + VsockBind(io::Error), + // Converting a byte buffer's length to a u64. + VsockBufferLenConvert(TryFromIntError), + // Cloning the vsock. + VsockClone(io::Error), + // Connecting to the vsock. + VsockConnect(io::Error), + // Reading from the vsock. + VsockRead(io::Error), + // Writing to the vsock. + VsockWrite(io::Error), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let msg = match self { + Self::FileOpen(e) => format!("unable to open file: {e}"), + Self::FileWrite(e) => format!("unable to write buffer to output file: {e}"), + Self::ShutdownSignalReceive(e) => { + format!("error while receiving read proxy shutdown signal: {e}") + } + Self::SignalRegister(e) => { + format!("unable to register signal in signal handler proxy: {e}") + } + Self::UnixClone(e) => format!("unable to clone unix stream: {e}"), + Self::UnixRead(e) => format!("unable to read from unix stream: {e}"), + Self::UnixReadTimeoutSet(e) => { + format!("unable to set read timeout for unix stream: {e}") + } + Self::UnixWrite(e) => format!("unable to write to unix stream: {e}"), + Self::VsockAccept(e) => format!("unable to accept connection from vsock: {e}"), + Self::VsockBind(e) => format!("unable to bind to vsock: {e}"), + Self::VsockBufferLenConvert(e) => { + format!("unable to convert vsock buffer size to u32: {e}") + } + Self::VsockConnect(e) => format!("unable to connect to vsock: {e}"), + Self::VsockClone(e) => format!("unable to clone vsock: {e}"), + Self::VsockRead(e) => format!("unable to read from vsock: {e}"), + Self::VsockWrite(e) => format!("unable to write to vsock: {e}"), + }; + + write!(f, "{}", msg) + } +} diff --git a/src/nitro/src/enclave/device/devices/mod.rs b/src/nitro/src/enclave/proxy/proxies/mod.rs similarity index 100% rename from src/nitro/src/enclave/device/devices/mod.rs rename to src/nitro/src/enclave/proxy/proxies/mod.rs diff --git a/src/nitro/src/enclave/proxy/proxies/net.rs b/src/nitro/src/enclave/proxy/proxies/net.rs new file mode 100644 index 000000000..ef98570ea --- /dev/null +++ b/src/nitro/src/enclave/proxy/proxies/net.rs @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: Apache-2.0 + +use crate::enclave::{ + args_writer::EnclaveArg, + proxy::{DeviceProxy, Error, Result}, + VsockPortOffset, +}; +use std::{ + io::{ErrorKind, Read, Write}, + mem::size_of, + os::{ + fd::{FromRawFd, OwnedFd, RawFd}, + unix::net::UnixStream, + }, + time::Duration, +}; +use vsock::{VsockAddr, VsockListener, VsockStream, VMADDR_CID_ANY}; + +/// Network proxy. Forwards data to/from a UNIX socket and vsock within an enclave to provide +/// network access. +pub struct NetProxy { + // Unix socket connected to service providing network access. + unix: UnixStream, + // Buffer to send/receive data to/from vsock. + buf: Vec, +} + +impl TryFrom for NetProxy { + type Error = Error; + + fn try_from(fd: RawFd) -> Result { + let buf = Vec::new(); + + let unix = unsafe { UnixStream::from(OwnedFd::from_raw_fd(fd)) }; + unix.set_read_timeout(Some(Duration::from_millis(250))) + .map_err(Error::UnixReadTimeoutSet)?; + + Ok(Self { buf, unix }) + } +} + +impl DeviceProxy for NetProxy { + /// Enclave argument of the proxy. + fn arg(&self) -> Option> { + Some(EnclaveArg::NetworkProxy) + } + + /// Clone a proxy's contents (notably, its connected unix socket). + fn clone(&self) -> Result>> { + let unix = self.unix.try_clone().map_err(Error::UnixClone)?; + + Ok(Some(Box::new(Self { + buf: self.buf.clone(), + unix, + }))) + } + + /// Receive data from the proxy's vsock. Forward the data to the connected unix socket. + fn rcv(&mut self, vsock: &mut VsockStream) -> Result { + let size = vsock.read(&mut self.buf).map_err(Error::VsockRead)?; + if size > 0 { + self.unix + .write_all(&self.buf[..size]) + .map_err(Error::UnixWrite)?; + } + + Ok(size) + } + + /// Receive data from the connected unix socket. Forward the data to the proxy's vsock. + fn send(&mut self, vsock: &mut VsockStream) -> Result { + match self.unix.read(&mut self.buf) { + Ok(size) => { + if size > 0 { + let _ = vsock.write_all(&self.buf[..size]); + } + + Ok(size) + } + // No data read from unix socket before timeout. + Err(ref e) if e.kind() == ErrorKind::TimedOut || e.kind() == ErrorKind::WouldBlock => { + Ok(0) + } + Err(e) => Err(Error::UnixRead(e)), + } + } + + /// Establish the proxy's vsock connection. + fn vsock(&mut self, cid: u32) -> Result { + let port = cid + (VsockPortOffset::Net as u32); + + let listener = + VsockListener::bind(&VsockAddr::new(VMADDR_CID_ANY, port)).map_err(Error::VsockBind)?; + + let (mut vsock, _) = listener.accept().map_err(Error::VsockAccept)?; + + /* + * Upon initial connection, read the MTU size from the enclave and allocate the buffer + * accordingly. + */ + let size = { + let mut size_buf = [0u8; size_of::()]; + let _ = vsock.read(&mut size_buf).map_err(Error::VsockRead)?; + + u32::from_ne_bytes(size_buf) + }; + + self.buf + .resize(size.try_into().map_err(Error::VsockBufferLenConvert)?, 0); + + Ok(vsock) + } +} diff --git a/src/nitro/src/enclave/proxy/proxies/output.rs b/src/nitro/src/enclave/proxy/proxies/output.rs new file mode 100644 index 000000000..047016232 --- /dev/null +++ b/src/nitro/src/enclave/proxy/proxies/output.rs @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: Apache-2.0 + +use crate::enclave::{ + args_writer::EnclaveArg, + proxy::{DeviceProxy, Error}, + VsockPortOffset, +}; +use std::{ + fs::File, + fs::OpenOptions, + io::{Read, Write}, + path::PathBuf, +}; +use vsock::{VsockAddr, VsockListener, VsockStream, VMADDR_CID_ANY, VMADDR_CID_HYPERVISOR}; + +type Result = std::result::Result; + +const OUTPUT_BUFFER_SIZE: usize = 1500; + +/// Output proxy. May output application process logs or (in debug mode) kernel+initramfs logs as +// well. +pub struct OutputProxy { + // The file to write enclave output to. + file: File, + // Indicator of debug mode. + debug: bool, + // Buffer to receive data from the vsock. + buf: Vec, +} + +impl OutputProxy { + /// Open the file in which to forward enclave output to. + pub fn new(path: &PathBuf, debug: bool) -> Result { + let file = OpenOptions::new() + .read(false) + .write(true) + .open(path) + .map_err(Error::FileOpen)?; + + Ok(Self { + file, + debug, + buf: vec![0u8; OUTPUT_BUFFER_SIZE], + }) + } +} + +impl DeviceProxy for OutputProxy { + /// Enclave argument of the proxy. + fn arg(&self) -> Option> { + // The enclave only needs to be made aware that it is to be run in debug mode. + match self.debug { + true => Some(EnclaveArg::Debug), + false => None, + } + } + + /// The output proxy doesn't send any data to the enclave, so there is no need for cloning it + /// for a sender thread. + fn clone(&self) -> Result>> { + Ok(None) + } + + /// Receive data from the proxy's vsock. Forward the data to the output file. + fn rcv(&mut self, vsock: &mut VsockStream) -> Result { + let size = vsock.read(&mut self.buf).map_err(Error::VsockRead)?; + if size > 0 { + self.file + .write_all(&self.buf[..size]) + .map_err(Error::FileWrite)?; + } + + Ok(size) + } + + /// The output proxy does not send data to the enclave. + fn send(&mut self, _vsock: &mut VsockStream) -> Result { + Ok(0) + } + + /// Establish the proxy's vsock connection. + fn vsock(&mut self, cid: u32) -> Result { + // If debug mode is enabled, connect to the enclave's console for kernel+initramfs logs. + let port = { + let offset = match self.debug { + true => VsockPortOffset::Console, + false => VsockPortOffset::AppOutput, + }; + + cid + (offset as u32) + }; + + // If debug mode is enabled, the enclave already binds to the console vsock. + let vsock = if self.debug { + VsockStream::connect(&VsockAddr::new(VMADDR_CID_HYPERVISOR, port)) + .map_err(Error::VsockConnect)? + } else { + let listener = VsockListener::bind(&VsockAddr::new(VMADDR_CID_ANY, port)) + .map_err(Error::VsockBind)?; + + let (vsock, _) = listener.accept().map_err(Error::VsockAccept)?; + + vsock + }; + + Ok(vsock) + } +} diff --git a/src/nitro/src/enclave/proxy/proxies/signal_handler.rs b/src/nitro/src/enclave/proxy/proxies/signal_handler.rs new file mode 100644 index 000000000..7b4ab7b02 --- /dev/null +++ b/src/nitro/src/enclave/proxy/proxies/signal_handler.rs @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: Apache-2.0 + +use crate::enclave::{ + proxy::{EnclaveArg, Error, Result}, + DeviceProxy, VsockPortOffset, +}; +use signal_hook::consts::SIGTERM; +use std::{ + io::{ErrorKind, Read, Write}, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, +}; +use vsock::{VsockAddr, VsockListener, VsockStream, VMADDR_CID_ANY}; + +/// Signal handler proxy. Forwards signals from the host to the enclave. Currently, only SIGTERM is +/// supported. +#[derive(Clone)] +pub struct SignalHandler { + // Signal hook to determine when a SIGTERM is caught. + sig: Arc, + // Buffer to forward the SIGTERM to the enclave. + buf: [u8; 1], +} + +impl SignalHandler { + // Create a new signal handler proxy with the SIGTERM hook set to false (not caught yet). + pub fn new() -> Result { + let sig = Arc::new(AtomicBool::new(false)); + signal_hook::flag::register(SIGTERM, Arc::clone(&sig)).map_err(Error::SignalRegister)?; + + let buf = [0u8; 1]; + + Ok(Self { sig, buf }) + } +} + +impl DeviceProxy for SignalHandler { + /// Enclave argument of the proxy. + fn arg(&self) -> Option> { + None + } + + /// Clone a proxy's contents. The cloned signal handler is not used, only the vsock connection. + fn clone(&self) -> Result>> { + Ok(Some(Box::new(Clone::clone(self)))) + } + + /// Receive data from the proxy's vsock. This should never read any actual data, but be a + /// placeholder to indicate that the enclave has closed the vsock connection. + fn rcv(&mut self, vsock: &mut VsockStream) -> Result { + vsock.read(&mut self.buf).map_err(Error::VsockRead) + } + + /// Check if a SIGTERM was caught. If so, write the signal to the enclave indicating it should + // gracefully shut down. + fn send(&mut self, vsock: &mut VsockStream) -> Result { + if !self.sig.load(Ordering::Relaxed) { + return Ok(0); + } + + let sig = libc::SIGTERM; + match vsock.write(&sig.to_ne_bytes()) { + Ok(size) => Ok(size), + /* + * If connection was already closed by enclave, return zero bytes written in order to + * listen for receiver shutdown signal. + */ + Err(e) if e.kind() == ErrorKind::BrokenPipe => Ok(0), + Err(e) => Err(Error::VsockWrite(e)), + } + } + + /// Establish the proxy's vsock connection. + fn vsock(&mut self, cid: u32) -> Result { + let port = cid + (VsockPortOffset::SignalHandler as u32); + + let listener = + VsockListener::bind(&VsockAddr::new(VMADDR_CID_ANY, port)).map_err(Error::VsockBind)?; + + let (vsock, _) = listener.accept().map_err(Error::VsockAccept)?; + + Ok(vsock) + } +} diff --git a/src/nitro/src/error.rs b/src/nitro/src/error.rs index 2eed35aa6..0218ec6f9 100644 --- a/src/nitro/src/error.rs +++ b/src/nitro/src/error.rs @@ -1,98 +1,115 @@ // SPDX-License-Identifier: Apache-2.0 -use super::enclave::device; -use nitro_enclaves::launch::LaunchError; -use std::{ffi, fmt, io}; +use super::enclave::{args_writer, proxy}; +use std::{fmt, io}; +/// Error in the running of a nitro enclave. #[derive(Debug)] -pub enum NitroError { +pub enum Error { + // Application running within the enclave returned a non-zero return code. AppReturn(i32), - DeviceOpen(io::Error), - VmCreate(LaunchError), - VmMemorySet(LaunchError), - VcpuAdd(LaunchError), - HeartbeatAccept(io::Error), - HeartbeatBind(io::Error), - HeartbeatRead(io::Error), - HeartbeatWrite(io::Error), - VmStart(LaunchError), - PollTimeoutCalculate(LaunchError), - PollNoSelectedEvents, - PollMoreThanOneSelectedEvent, - EnclaveHeartbeatNotDetected, + // Argument writing process. + ArgsWrite(args_writer::Error), + // Error in device proxy execution. + DeviceProxy(proxy::Error), + // Error in listener for application return code. + ReturnCodeListener(return_code::Error), + // Error in rootfs tar archiving. RootFsArchive(io::Error), - HeartbeatCidMismatch, - VsockCreate, - VsockSetTimeout, - VsockConnect, - IpcWrite(io::Error), - VsockBytesLenWrite(io::Error), - VsockBytesWrite(io::Error), - VsockBytesTooLarge, - CStringConversion(ffi::NulError), - EifRead(io::Error), - EifTarExtract(io::Error), - DeviceError(device::Error), + // Error in launching the enclave. + Start(start::Error), } -impl fmt::Display for NitroError { +impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let msg = match self { - NitroError::AppReturn(ret) => format!("app returned non-zero return code: {ret}"), - NitroError::DeviceOpen(e) => format!("unable to open nitro enclaves device: {e}"), - NitroError::VmCreate(e) => format!("unable to create enclave VM: {e}"), - NitroError::VmMemorySet(e) => format!("unable to set enclave memory regions: {e}"), - NitroError::VcpuAdd(e) => format!("unable to add vCPU to enclave: {e}"), - NitroError::HeartbeatAccept(e) => { - format!("unable to accept enclave heartbeat vsock: {e}") + Self::AppReturn(ret) => format!("app returned non-zero return code: {ret}"), + Self::ArgsWrite(e) => format!("enclave VM argument writer error: {e}"), + Self::DeviceProxy(e) => format!("device proxy error: {e}"), + Self::ReturnCodeListener(e) => { + format!("error with enclave VM return code listener: {e}") } - NitroError::HeartbeatBind(e) => { - format!("unable to bind to enclave heartbeat vsock: {e}") - } - NitroError::HeartbeatRead(e) => format!("unable to read enclave heartbeat vsock: {e}"), - NitroError::HeartbeatWrite(e) => { - format!("unable to write to enclave heartbeat vsock: {e}") - } - NitroError::VmStart(e) => format!("unable to start enclave: {e}"), - NitroError::PollTimeoutCalculate(e) => { - format!("unable to calculate vsock poll timeout: {e}") - } - NitroError::PollNoSelectedEvents => { - "no selected poll fds for heartbeat vsock found".to_string() - } - NitroError::PollMoreThanOneSelectedEvent => { - "more than one selected pollfd for heartbeat vsock found".to_string() - } - NitroError::EnclaveHeartbeatNotDetected => { - "enclave heartbeat message not detected".to_string() - } - NitroError::HeartbeatCidMismatch => "enclave heartbeat vsock CID mismatch".to_string(), - NitroError::VsockCreate => "unable to create enclave vsock".to_string(), - NitroError::VsockSetTimeout => { - "unable to set poll timeout for enclave vsock".to_string() - } - NitroError::VsockConnect => "unable to connect to enclave vsock".to_string(), - NitroError::RootFsArchive(e) => { + Self::RootFsArchive(e) => { format!("unable to archive rootfs: {e}") } - NitroError::IpcWrite(e) => { - format!("unable to write enclave vsock data to UNIX IPC socket: {e}") - } - NitroError::VsockBytesLenWrite(e) => { - format!("unable to write rootfs archive length to enclave: {e}") - } - NitroError::VsockBytesWrite(e) => { - format!("unable to write rootfs archive to enclave: {e}") - } - NitroError::VsockBytesTooLarge => { - "vsock write byte buffer size is larger than 64 bytes".to_string() - } - NitroError::CStringConversion(e) => format!("unable to convert String to CString: {e}"), - NitroError::EifRead(e) => format!("unable to read cached EIF file: {e}"), - NitroError::EifTarExtract(e) => format!("unable to extract EIF from tar archive: {e}"), - NitroError::DeviceError(e) => format!("device proxy error: {:?}", e), + Self::Start(e) => format!("error launching enclave VM: {e}"), }; write!(f, "{}", msg) } } + +pub mod start { + use super::*; + use nitro_enclaves::launch::LaunchError; + + /// Error in launching the enclave. + #[derive(Debug)] + pub enum Error { + // Opening the /dev/nitro_enclaves device. + DeviceOpen(io::Error), + // Reading the cached EIF. + EifRead(io::Error), + // Calculating the poll timeout. + PollTimeoutCalculate(LaunchError), + // Adding a vCPU to an enclave VM. + VcpuAdd(LaunchError), + // Creating the enclave VM. + VmCreate(LaunchError), + // Setting the enclave VM's memory. + VmMemorySet(LaunchError), + // Starting the enclave VM. + VmStart(LaunchError), + } + + impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let msg = match self { + Self::DeviceOpen(e) => format!("unable to open nitro enclaves device: {e}"), + Self::EifRead(e) => format!("unable to read cached EIF file: {e}"), + Self::PollTimeoutCalculate(e) => { + format!("unable to calculate vsock poll timeout for enclave VM: {e}") + } + Self::VcpuAdd(e) => format!("unable to add vCPU to enclave VM: {e}"), + Self::VmCreate(e) => format!("unable to create enclave VM: {e}"), + Self::VmMemorySet(e) => { + format!("unable to set enclave VM memory regions: {e}") + } + Self::VmStart(e) => format!("unable to start enclave VM: {e}"), + }; + + write!(f, "{}", msg) + } + } +} + +pub mod return_code { + use super::*; + + /// Error in listener for application return code. + #[derive(Debug)] + #[allow(clippy::enum_variant_names)] + pub enum Error { + // Accepting the vsock connection. + VsockAccept(io::Error), + // Binding to the vsock. + VsockBind(io::Error), + // Reading from the vsock. + VsockRead(io::Error), + // Writing to the vsock. + VsockWrite(io::Error), + } + + impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let msg = match self { + Self::VsockAccept(e) => format!("unable to accept vsock connection: {e}"), + Self::VsockBind(e) => format!("unable to bind to vsock: {e}"), + Self::VsockRead(e) => format!("unable to read from vsock: {e}"), + Self::VsockWrite(e) => format!("unable to write to vsock: {e}"), + }; + + write!(f, "{}", msg) + } + } +} diff --git a/src/nitro/src/lib.rs b/src/nitro/src/lib.rs index 16c3ac562..2b9e282be 100644 --- a/src/nitro/src/lib.rs +++ b/src/nitro/src/lib.rs @@ -5,6 +5,3 @@ pub mod enclave; #[cfg(feature = "nitro")] mod error; - -#[cfg(feature = "nitro")] -pub use enclave::device::net::NetProxy;