From 30164a45bad6528d7b300e232e781cbdc0f1f875 Mon Sep 17 00:00:00 2001 From: Lukas Gerlach Date: Tue, 2 Dec 2025 15:32:52 +0100 Subject: [PATCH] Add RISC-V architecture support Port Osiris to RISC-V (64-bit), tested on T-Head C910 processor. Key changes: - Add RISCV target architecture to CMakeLists.txt - Implement RISC-V prolog/epilog with proper calling convention - Add RISC-V timing using rdcycle and fence instructions - Implement Spectre-RSB pattern using JAL/RET for RSB manipulation - Add fence.i for instruction cache synchronization - Handle SIGBUS for RISC-V misaligned memory accesses - Add T-Head C910 cache flush instructions (dcache.*, icache.*) - Update filters to recognize RISC-V cache instructions - Rename x86Instruction to Instruction for architecture independence - Add instruction generator script parsing riscv-opcodes format Build with: cmake -DARCH=RISCV .. Requires: Capstone 5+ with RISC-V support --- CMakeLists.txt | 12 +- README.md | 12 +- riscv-instructions/fetch-riscv-opcodes.sh | 2 + .../generate_riscv_instructions.py | 391 ++++++++++++++++++ src/code_generator.cc | 14 +- src/code_generator.h | 12 +- src/core.cc | 18 +- src/executor.cc | 337 ++++++++++++++- src/executor.h | 1 + src/filter.cc | 59 +-- src/osiris.cc | 21 +- src/utils.cc | 4 + src/utils.h | 1 + 13 files changed, 807 insertions(+), 77 deletions(-) create mode 100755 riscv-instructions/fetch-riscv-opcodes.sh create mode 100644 riscv-instructions/generate_riscv_instructions.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 5963ef9..f797e88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,8 +36,11 @@ if (ARCH STREQUAL INTEL) elseif(ARCH STREQUAL AMD) message(STATUS "Compiling for AMD processor.") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DAMD") +elseif(ARCH STREQUAL RISCV) + message(STATUS "Compiling for RISC-V processor.") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DRISCV") else() - message(WARNING "Illegal or missing value for target processor. Set '-DARCH=[INTEL|AMD]'. Defaulting to INTEL.") + message(WARNING "Illegal or missing value for target processor. Set '-DARCH=[INTEL|AMD|RISCV]'. Defaulting to INTEL.") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DINTEL") endif() @@ -62,5 +65,12 @@ add_executable(osiris # dependencies find_package(OpenSSL REQUIRED) target_link_libraries(osiris OpenSSL::Crypto) + +# RISC-V needs Capstone 5+ for RISC-V disassembly support +# Use LD_LIBRARY_PATH=/usr/local/lib at runtime to load the correct version +if (ARCH STREQUAL RISCV) + target_include_directories(osiris PRIVATE /usr/local/include) +endif() target_link_libraries(osiris capstone) + target_link_libraries(osiris stdc++fs) # GCC version < 9 needs this to support c++ filesystem lib diff --git a/README.md b/README.md index 2e81634..4db48a2 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ This repository contains the implementation of the Osiris framework discussed in the research paper "Osiris: Automated Discovery of Microarchitectural Side Channels" (USENIX Security'21). You can find the paper at the [USENIX website](https://www.usenix.org/system/files/sec21-weber.pdf). The framework is developed to find microarchitectural side channels in an automated manner. -Currently the implementation supports only x86 processors. +The implementation supports x86 (Intel/AMD) and RISC-V processors. ## Supported Platforms Osiris is developed and tested on Arch Linux and Ubuntu. @@ -24,9 +24,10 @@ Hence, we expect Osiris to work on other Linux distributions as well but we did - Arch-Linux-package: `cmake` - Ubuntu-package: `cmake` -#### Capstone -- Arch-Linux-package: `capstone` +#### Capstone +- Arch-Linux-package: `capstone` - Ubuntu-packages: `libcapstone-dev, libcapstone3` +- RISC-V: Requires Capstone 5+ (build from source: https://github.com/capstone-engine/capstone) #### OpenSSL - Arch-Linux-package: `openssl` @@ -47,8 +48,11 @@ On Arch-Linux: pip install pwntools --user ``` +#### RISC-V Cross-Compiler (RISC-V only, for generating instructions) +- `riscv64-elf-gcc` toolchain (provides `riscv64-elf-as`) + ## Building -Just install all listed dependencies and execute `./build.sh INTEL` or `./build.sh AMD` for Intel and AMD processors, respectively. +Just install all listed dependencies and execute `./build.sh INTEL`, `./build.sh AMD`, or `./build.sh RISCV` for Intel, AMD, and RISC-V processors, respectively. ## Noise Reduction To get precise results Osiris relies on the operating system to reduce the noise of its diff --git a/riscv-instructions/fetch-riscv-opcodes.sh b/riscv-instructions/fetch-riscv-opcodes.sh new file mode 100755 index 0000000..c1a1e82 --- /dev/null +++ b/riscv-instructions/fetch-riscv-opcodes.sh @@ -0,0 +1,2 @@ +#! /bin/sh +git clone https://github.com/riscv/riscv-opcodes.git riscv-opcodes diff --git a/riscv-instructions/generate_riscv_instructions.py b/riscv-instructions/generate_riscv_instructions.py new file mode 100644 index 0000000..e7c9650 --- /dev/null +++ b/riscv-instructions/generate_riscv_instructions.py @@ -0,0 +1,391 @@ +#!/usr/bin/env python3 +""" +Generate RISC-V instruction database for Osiris. +Parses riscv-opcodes format directly to compute encodings. +""" + +import base64 +import os +import re +from pathlib import Path + +OPCODES_DIR = Path(__file__).parent / "riscv-opcodes" / "extensions" + +# Standard RISC-V register field positions +FIELD_RD = (11, 7) # bits 11..7 +FIELD_RS1 = (19, 15) # bits 19..15 +FIELD_RS2 = (24, 20) # bits 24..20 +FIELD_RS3 = (31, 27) # bits 31..27 (for R4-type) + +# Fixed register values (following Osiris paper - one variant per opcode) +REG_RD = 10 # a0 +REG_RS1 = 11 # a1 +REG_RS2 = 12 # a2 +REG_RS3 = 13 # a3 + +# FP registers +FREG_RD = 10 # fa0 +FREG_RS1 = 11 # fa1 +FREG_RS2 = 0 # ft0 +FREG_RS3 = 1 # ft1 + +# Instructions to skip (cause control flow issues or are privileged) +SKIP_INSTRUCTIONS = { + 'jal', 'jalr', 'beq', 'bne', 'blt', 'bge', 'bltu', 'bgeu', + 'ret', 'jr', 'j', 'call', 'tail', + 'beqz', 'bnez', 'bltz', 'bgtz', 'blez', 'bgez', + 'bgt', 'ble', 'bgtu', 'bleu', + 'ecall', # system call - will cause issues + 'mret', 'sret', 'uret', 'mnret', # return from trap + 'wfi', # wait for interrupt - will hang + 'sfence.vma', 'hfence.vvma', 'hfence.gvma', # privileged + 'sinval.vma', 'sfence.w.inval', 'sfence.inval.ir', # privileged + 'hinval.vvma', 'hinval.gvma', # hypervisor + 'hlv.b', 'hlv.bu', 'hlv.h', 'hlv.hu', 'hlv.w', 'hlv.wu', 'hlv.d', # hypervisor + 'hsv.b', 'hsv.h', 'hsv.w', 'hsv.d', # hypervisor + 'hlvx.hu', 'hlvx.wu', # hypervisor + 'dret', # debug return + 'c.j', 'c.jr', 'c.jalr', 'c.jal', # compressed jumps + 'c.beqz', 'c.bnez', # compressed branches + 'c.ebreak', # compressed ebreak + 'cm.jt', 'cm.jalt', # table jump +} + +# Operand field mappings - which bits each operand occupies +OPERAND_FIELDS = { + 'rd': (11, 7), + 'rs1': (19, 15), + 'rs2': (24, 20), + 'rs3': (31, 27), + 'fd': (11, 7), # FP destination + 'fs1': (19, 15), # FP source 1 + 'fs2': (24, 20), # FP source 2 + 'fs3': (31, 27), # FP source 3 + 'rm': (14, 12), # rounding mode + 'aq': (26, 26), # acquire + 'rl': (25, 25), # release + 'fm': (31, 28), # fence mode + 'pred': (27, 24), # fence predecessor + 'succ': (23, 20), # fence successor + 'shamtw': (24, 20), # shift amount (32-bit) + 'shamtd': (25, 20), # shift amount (64-bit) + 'shamt': (25, 20), # shift amount +} + + +def parse_bit_range(spec): + """Parse bit range like '31..25' or single bit like '26'.""" + if '..' in spec: + hi, lo = spec.split('..') + return int(hi), int(lo) + else: + bit = int(spec) + return bit, bit + + +def parse_value(val_str): + """Parse value, handling hex (0x...), binary (0b...), and decimal.""" + val_str = val_str.strip() + if val_str.startswith('0x') or val_str.startswith('0X'): + return int(val_str, 16) + elif val_str.startswith('0b') or val_str.startswith('0B'): + return int(val_str, 2) + return int(val_str) + + +def set_bits(encoding, hi, lo, value): + """Set bits hi..lo in encoding to value.""" + width = hi - lo + 1 + mask = (1 << width) - 1 + value = value & mask # truncate to field width + encoding |= (value << lo) + return encoding + + +def parse_instruction_line(line): + """Parse an instruction definition line from riscv-opcodes. + + Returns (instr_name, operands, fixed_bits) or None if not a valid instruction. + """ + line = line.strip() + + # Skip empty lines, comments, pseudo-ops + if not line or line.startswith('#') or line.startswith('$'): + return None + + # Split into tokens + parts = line.split() + if len(parts) < 2: + return None + + instr_name = parts[0] + + # Parse remaining tokens into operands and fixed bit fields + operands = [] + fixed_bits = [] # list of (hi, lo, value) + + for token in parts[1:]: + if '=' in token: + # Fixed bit field: "31..25=32" or "14..12=0" or "6..2=0x0C" + field, value = token.split('=') + hi, lo = parse_bit_range(field) + val = parse_value(value) + fixed_bits.append((hi, lo, val)) + else: + # Operand name + operands.append(token) + + return instr_name, operands, fixed_bits + + +def compute_encoding(operands, fixed_bits, is_fp=False): + """Compute the instruction encoding with fixed register values.""" + encoding = 0 + + # Apply fixed bit fields + for hi, lo, val in fixed_bits: + encoding = set_bits(encoding, hi, lo, val) + + # Apply register values for operands + for op in operands: + if op in ('rd',): + encoding = set_bits(encoding, 11, 7, REG_RD if not is_fp else REG_RD) + elif op in ('rs1',): + encoding = set_bits(encoding, 19, 15, REG_RS1) + elif op in ('rs2',): + encoding = set_bits(encoding, 24, 20, REG_RS2) + elif op in ('rs3',): + encoding = set_bits(encoding, 31, 27, REG_RS3) + elif op in ('fd',): + encoding = set_bits(encoding, 11, 7, FREG_RD) + elif op in ('fs1',): + encoding = set_bits(encoding, 19, 15, FREG_RS1) + elif op in ('fs2',): + encoding = set_bits(encoding, 24, 20, FREG_RS2) + elif op in ('fs3',): + encoding = set_bits(encoding, 31, 27, FREG_RS3) + elif op in ('rm',): + # Rounding mode - use RNE (round to nearest, ties to even) + encoding = set_bits(encoding, 14, 12, 0) + elif op in ('aq',): + encoding = set_bits(encoding, 26, 26, 0) + elif op in ('rl',): + encoding = set_bits(encoding, 25, 25, 0) + elif op in ('fm',): + encoding = set_bits(encoding, 31, 28, 0) + elif op in ('pred',): + encoding = set_bits(encoding, 27, 24, 0xF) # iorw + elif op in ('succ',): + encoding = set_bits(encoding, 23, 20, 0xF) # iorw + elif op in ('shamtw',): + encoding = set_bits(encoding, 24, 20, 1) # shift by 1 + elif op in ('shamtd', 'shamt'): + encoding = set_bits(encoding, 25, 20, 1) # shift by 1 + elif op.startswith('imm') or op.startswith('bimm') or op.startswith('jimm'): + # Immediate fields - set to 0 (or small offset for loads/stores) + pass + elif op in ('csr',): + # CSR address - use cycle (0xC00) + encoding = set_bits(encoding, 31, 20, 0xC00) + elif op in ('zimm',): + # CSR immediate - use 0 + pass + # Ignore other operands (we'll set them to 0 by default) + + return encoding + + +def encoding_to_bytes(encoding, size=4): + """Convert encoding integer to little-endian bytes.""" + return encoding.to_bytes(size, byteorder='little') + + +def generate_asm_string(instr_name, operands, is_fp=False): + """Generate assembly string for display.""" + reg_rd = 'fa0' if is_fp and instr_name.startswith('f') else 'a0' + reg_rs1 = 'fa1' if is_fp and any(op in ('fs1',) for op in operands) else 'a1' + reg_rs2 = 'ft0' if is_fp and any(op in ('fs2',) for op in operands) else 'a2' + + # Determine operands for display + asm_operands = [] + for op in operands: + if op == 'rd': + asm_operands.append('a0') + elif op == 'fd': + asm_operands.append('fa0') + elif op == 'rs1': + asm_operands.append('a1') + elif op == 'fs1': + asm_operands.append('fa1') + elif op == 'rs2': + asm_operands.append('a2') + elif op == 'fs2': + asm_operands.append('ft0') + elif op == 'rs3': + asm_operands.append('a3') + elif op == 'fs3': + asm_operands.append('ft1') + elif op.startswith('imm12') and 'rs1' in operands: + # Load/store format: 0(a1) + asm_operands.append('0(a1)') + elif op.startswith('imm'): + asm_operands.append('0') + elif op.startswith('shamt'): + asm_operands.append('1') + elif op == 'csr': + asm_operands.append('cycle') + elif op == 'zimm': + asm_operands.append('0') + # Skip other internal fields + + if asm_operands: + return f"{instr_name} {', '.join(asm_operands)}" + return instr_name + + +def is_fp_instruction(instr_name, operands): + """Check if this is a floating-point instruction.""" + return any(op in ('fd', 'fs1', 'fs2', 'fs3') for op in operands) + + +def get_instruction_size(instr_name, operands, fixed_bits): + """Determine instruction size (2 for compressed, 4 for normal).""" + # Check opcode field for compressed instructions (bits 1..0 != 3) + for hi, lo, val in fixed_bits: + if lo == 0 and hi <= 1: + if val != 3: + return 2 # Compressed instruction + return 4 + + +def process_extension_file(filepath): + """Process one extension file and return list of (bytecode, asm, category, extension, isa_set).""" + results = [] + ext_name = filepath.stem # e.g., 'rv_i', 'rv_zbb' + + # Map extension name to category + extension = ext_name.upper().replace('_', '-') + + with open(filepath, 'r') as f: + for line in f: + parsed = parse_instruction_line(line) + if not parsed: + continue + + instr_name, operands, fixed_bits = parsed + + # Skip problematic instructions + if instr_name in SKIP_INSTRUCTIONS: + continue + + # Skip if no fixed bits (probably malformed) + if not fixed_bits: + continue + + is_fp = is_fp_instruction(instr_name, operands) + + try: + encoding = compute_encoding(operands, fixed_bits, is_fp) + size = get_instruction_size(instr_name, operands, fixed_bits) + bytecode = encoding_to_bytes(encoding, size) + b64 = base64.b64encode(bytecode).decode('ascii') + + asm = generate_asm_string(instr_name, operands, is_fp) + + # Determine category + if 'load' in instr_name or instr_name.startswith('l') and instr_name[1] in 'bhwd': + category = 'LOAD' + elif 'store' in instr_name or instr_name.startswith('s') and instr_name[1] in 'bhwd': + category = 'STORE' + elif instr_name.startswith('amo') or instr_name.startswith('lr') or instr_name.startswith('sc'): + category = 'ATOMIC' + elif instr_name.startswith('f'): + category = 'FP' + elif 'mul' in instr_name or 'div' in instr_name or 'rem' in instr_name: + category = 'MULDIV' + elif 'cbo' in instr_name or 'cache' in instr_name: + category = 'CACHE' + elif 'fence' in instr_name: + category = 'FENCE' + elif 'csr' in instr_name: + category = 'CSR' + else: + category = 'ARITH' + + isa_set = extension.split('-')[0] # RV, RV64, etc. + + results.append((b64, asm, category, extension, isa_set)) + except Exception as e: + # Skip instructions we can't encode + pass + + return results + + +def main(): + all_results = [] + + # Only skip compressed instructions (16-bit, need special encoding) + skip_extensions = { + 'rv_c', 'rv_c_d', 'rv_c_zicfiss', 'rv_c_zihintntl', + 'rv_zcb', 'rv_zcmop', 'rv_zcmp', 'rv_zcmt', + 'rv32_c', 'rv32_c_f', 'rv64_c', 'rv64_zcb', + 'rv32_zclsd', 'rv32_zilsd', # compressed load/store + } + + # Process ALL extension files (rv_*, rv32_*, rv64_*) + for pattern in ['rv_*', 'rv32_*', 'rv64_*']: + for ext_file in sorted(OPCODES_DIR.glob(pattern)): + if ext_file.is_file() and ext_file.stem not in skip_extensions: + results = process_extension_file(ext_file) + all_results.extend(results) + + # Add C910 custom cache instructions (raw encodings from T-Head manual) + c910_instructions = [ + # Format: (encoding, asm, category) + # dcache.civa rs1 - clean and invalidate by VA (rs1=a1=11) + (0x0278800b, 'dcache.civa a1', 'CACHE'), + # dcache.iva rs1 - invalidate by VA + (0x0268800b, 'dcache.iva a1', 'CACHE'), + # dcache.cva rs1 - clean by VA (writeback) + (0x0258800b, 'dcache.cva a1', 'CACHE'), + # dcache.ciall - clean and invalidate all + (0x0030000b, 'dcache.ciall', 'CACHE'), + # dcache.iall - invalidate all + (0x0020000b, 'dcache.iall', 'CACHE'), + # dcache.call - clean all (writeback) + (0x0010000b, 'dcache.call', 'CACHE'), + # icache.iva rs1 - invalidate I-cache by VA + (0x0308800b, 'icache.iva a1', 'CACHE'), + # icache.iall - invalidate all I-cache + (0x0100000b, 'icache.iall', 'CACHE'), + # sync - full memory barrier + (0x0180000b, 'sync', 'SYNC'), + # sync.i - instruction sync + (0x01a0000b, 'sync.i', 'SYNC'), + # sync.s - store sync broadcast + (0x0190000b, 'sync.s', 'SYNC'), + ] + + for encoding, asm, category in c910_instructions: + bytecode = encoding.to_bytes(4, byteorder='little') + b64 = base64.b64encode(bytecode).decode('ascii') + all_results.append((b64, asm, category, 'THEAD-C910', 'CUSTOM')) + + # Output header + print("byte_representation;assembly_code;category;extension;isa_set") + + # Output unique instructions (dedup by encoding) + seen = set() + for b64, asm, category, extension, isa_set in all_results: + if b64 not in seen: + seen.add(b64) + print(f"{b64};{asm};{category};{extension};{isa_set}") + + # Add synthetic entries (for sleep patterns) + print(";busy-sleep;SLEEP;SYNTHETIC;SLEEP") + print(";short-busy-sleep;SLEEP;SYNTHETIC;SLEEP") + + +if __name__ == '__main__': + main() diff --git a/src/code_generator.cc b/src/code_generator.cc index c5599b3..faab89c 100644 --- a/src/code_generator.cc +++ b/src/code_generator.cc @@ -26,7 +26,7 @@ namespace osiris { -std::string x86Instruction::GetCSVRepresentation() const { +std::string Instruction::GetCSVRepresentation() const { std::stringstream line; line << std::hex << instruction_uid; line << ";"; @@ -71,10 +71,12 @@ CodeGenerator::CodeGenerator(const std::string& instructions_filename) { while (std::getline(istream, line)) { std::vector line_splitted = SplitString(line, ';'); if (line_splitted.size() != 5) { - LOG_ERROR("Mismatch of line format in instruction file. Aborting!"); + LOG_ERROR("Mismatch of line format in instruction file at line " + + std::to_string(instruction_idx + 2) + " (got " + + std::to_string(line_splitted.size()) + " fields): [" + line + "]. Aborting!"); std::abort(); } - x86Instruction instruction{ + Instruction instruction{ GenerateInstructionUID(instruction_idx), base64_decode(line_splitted[0]), line_splitted[1], @@ -127,7 +129,7 @@ int CodeGenerator::GenerateRandomNumber(int min, int max) { return distribution(rand_generator_); } -x86Instruction CodeGenerator::CreateInstructionFromIndex(uint64_t instruction_idx) { +Instruction CodeGenerator::CreateInstructionFromIndex(uint64_t instruction_idx) { if (instruction_idx > instruction_list_.size()) { LOG_ERROR("Invalid instruction index"); std::abort(); @@ -135,7 +137,7 @@ x86Instruction CodeGenerator::CreateInstructionFromIndex(uint64_t instruction_id return instruction_list_[instruction_idx]; } -x86Instruction CodeGenerator::CreateInstructionFromUID(uint64_t instruction_uid) { +Instruction CodeGenerator::CreateInstructionFromUID(uint64_t instruction_uid) { size_t instruction_idx = InstructionUIDToInstructionIndex(instruction_uid); if (instruction_idx > instruction_list_.size()) { LOG_ERROR("Invalid instruction index"); @@ -144,7 +146,7 @@ x86Instruction CodeGenerator::CreateInstructionFromUID(uint64_t instruction_uid) return instruction_list_[instruction_idx]; } -x86Instruction CodeGenerator::CreateRandomInstruction() { +Instruction CodeGenerator::CreateRandomInstruction() { size_t idx = GenerateRandomNumber(0, instruction_list_.size() - 1); LOG_DEBUG("Got random instruction on index " + std::to_string(idx)); return CreateInstructionFromIndex(idx); diff --git a/src/code_generator.h b/src/code_generator.h index 2ece5e8..004678a 100644 --- a/src/code_generator.h +++ b/src/code_generator.h @@ -36,9 +36,9 @@ constexpr uint64_t kMemoryBegin = 0x13370000; constexpr uint64_t kMemoryEnd = 0x13371fff; //// -/// Represents one x86 instruction +/// Represents one instruction (architecture-independent) /// -struct x86Instruction { +struct Instruction { uint64_t instruction_uid; byte_array byte_representation; std::string assembly_code; @@ -61,16 +61,16 @@ class CodeGenerator { /// Create instruction from instruction list /// \param instruction_idx instruction index /// \return corresponding instruction - x86Instruction CreateInstructionFromIndex(size_t instruction_idx); + Instruction CreateInstructionFromIndex(size_t instruction_idx); /// Create instruction from instruction UID /// \param instruction_idx instruction UID /// \return corresponding instruction - x86Instruction CreateInstructionFromUID(uint64_t instruction_uid); + Instruction CreateInstructionFromUID(uint64_t instruction_uid); /// Create random instruction /// \return random instruction - x86Instruction CreateRandomInstruction(); + Instruction CreateRandomInstruction(); /// Get number of Instructions that were loaded to the codegen /// \return no of instructions @@ -86,7 +86,7 @@ class CodeGenerator { uint64_t GenerateInstructionUID(size_t instruction_idx); size_t InstructionUIDToInstructionIndex(uint64_t instruction_uid); - std::vector instruction_list_; + std::vector instruction_list_; std::default_random_engine rand_generator_; std::string instruction_file_sha256hash_; }; diff --git a/src/core.cc b/src/core.cc index 563912d..045aaf5 100644 --- a/src/core.cc +++ b/src/core.cc @@ -54,13 +54,13 @@ void Core::FindAndOutputTriggerpairsWithoutAssumptions(const std::string& output size_t max_instruction_no = code_generator_.GetNumberOfInstructions(); for (size_t measurement_idx = 0; measurement_idx < max_instruction_no; measurement_idx++) { - x86Instruction measurement_sequence = + Instruction measurement_sequence = code_generator_.CreateInstructionFromIndex(measurement_idx); LOG_INFO("processing measurement " + std::to_string(measurement_idx) + "/" + std::to_string(max_instruction_no - 1)); for (size_t trigger_idx = 0; trigger_idx < max_instruction_no; trigger_idx++) { - x86Instruction trigger_sequence = code_generator_.CreateInstructionFromIndex(trigger_idx); + Instruction trigger_sequence = code_generator_.CreateInstructionFromIndex(trigger_idx); if (trigger_sequence.assembly_code == "busy-sleep" || trigger_sequence.assembly_code == "short-busy-sleep" || trigger_sequence.assembly_code == "sleep-syscall") { @@ -68,7 +68,7 @@ void Core::FindAndOutputTriggerpairsWithoutAssumptions(const std::string& output continue; } for (size_t reset_idx = 0; reset_idx < max_instruction_no; reset_idx++) { - x86Instruction reset_sequence = code_generator_.CreateInstructionFromIndex(reset_idx); + Instruction reset_sequence = code_generator_.CreateInstructionFromIndex(reset_idx); // execute sleeps only 1 time int reset_executions_amount = reset_sequence.assembly_code == "busy-sleep" || @@ -132,7 +132,7 @@ void Core::FindAndOutputTriggerpairsWithTriggerEqualsMeasurement( output_csvfile << headerline << std::endl; size_t max_instruction_no = code_generator_.GetNumberOfInstructions(); for (size_t trigger_idx = 0; trigger_idx < max_instruction_no; trigger_idx++) { - x86Instruction trigger_sequence = code_generator_.CreateInstructionFromIndex(trigger_idx); + Instruction trigger_sequence = code_generator_.CreateInstructionFromIndex(trigger_idx); std::stringstream output_stream; LOG_INFO("processing trigger " + std::to_string(trigger_idx) + " (" + trigger_sequence.assembly_code + ")"); @@ -143,7 +143,7 @@ void Core::FindAndOutputTriggerpairsWithTriggerEqualsMeasurement( continue; } for (size_t reset_idx = 0; reset_idx < max_instruction_no; reset_idx++) { - x86Instruction reset_sequence = code_generator_.CreateInstructionFromIndex(reset_idx); + Instruction reset_sequence = code_generator_.CreateInstructionFromIndex(reset_idx); // execute sleeps only 1 time int reset_executions_amount = reset_sequence.assembly_code == "busy-sleep" || @@ -203,7 +203,11 @@ void Core::FormatTriggerPairOutput(const std::string& output_folder, // initialize capstone csh capstone_handle; +#if defined(RISCV) + if (cs_open(CS_ARCH_RISCV, CS_MODE_RISCV64, &capstone_handle) != CS_ERR_OK) { +#else if (cs_open(CS_ARCH_X86, CS_MODE_64, &capstone_handle) != CS_ERR_OK) { +#endif LOG_ERROR("Couldn't initialize Capstone! Aborting!"); std::exit(1); } @@ -324,7 +328,7 @@ void Core::OutputNonFaultingInstructions(const std::string& output_filename) { // write non-faulting instructions in original format for (size_t instruction_idx : non_faulting_instructions) { - x86Instruction instruction = code_generator_.CreateInstructionFromIndex(instruction_idx); + Instruction instruction = code_generator_.CreateInstructionFromIndex(instruction_idx); std::string line = base64_encode(instruction.byte_representation); line += ";"; @@ -345,7 +349,7 @@ std::vector Core::FindNonFaultingInstructions() { std::vector non_faulting_instruction_indexes; byte_array empty_sequence; for (size_t inst_idx = 0; inst_idx < code_generator_.GetNumberOfInstructions(); inst_idx++) { - x86Instruction measurement_sequence = code_generator_.CreateInstructionFromIndex(inst_idx); + Instruction measurement_sequence = code_generator_.CreateInstructionFromIndex(inst_idx); int64_t result; LOG_INFO("testing instruction " + measurement_sequence.assembly_code); int error = executor_.TestTriggerSequence(measurement_sequence.byte_representation, diff --git a/src/executor.cc b/src/executor.cc index 986667e..1d43338 100644 --- a/src/executor.cc +++ b/src/executor.cc @@ -70,7 +70,12 @@ Executor::Executor() { #if DEBUGMODE == 0 // if we are not in DEBUGMODE this will instead be inlined in Executor::ExecuteCodePage() +#if defined(RISCV) + // RISC-V also needs SIGBUS for memory alignment issues + std::array signals_to_handle = {SIGSEGV, SIGILL, SIGFPE, SIGTRAP, SIGBUS}; +#else std::array signals_to_handle = {SIGSEGV, SIGILL, SIGFPE, SIGTRAP}; +#endif // register fault handler RegisterFaultHandler(signals_to_handle); #endif @@ -79,7 +84,11 @@ Executor::Executor() { Executor::~Executor() { #if DEBUGMODE == 0 // if we are not in DEBUGMODE this will instead be inlined in Executor::ExecuteCodePage() +#if defined(RISCV) + std::array signals_to_handle = {SIGSEGV, SIGILL, SIGFPE, SIGTRAP, SIGBUS}; +#else std::array signals_to_handle = {SIGSEGV, SIGILL, SIGFPE, SIGTRAP}; +#endif UnregisterFaultHandler(signals_to_handle); #endif } @@ -303,17 +312,6 @@ void Executor::CreateSpeculativeTriggerTestrunCode(int codepage_no, const byte_array& trigger_sequence, const byte_array& reset_sequence, int reset_executions_amount) { - // call rel32 - constexpr char INST_RELATIVE_CALL[] = "\xe8\xff\xff\xff\xff"; - // jmp rel32 - constexpr char INST_RELATIVE_JMP[] = "\xe9\xff\xff\xff\xff"; - // lea rax, [rip + offset] - constexpr char INST_LEA_RAX_DEREF_RIP_PLUS_OFFSET[] = "\x48\x8d\x05\xff\xff\xff\xff"; - // mov [rsp], rax - constexpr char INST_MOV_DEREF_RSP_RAX[] = "\x48\x89\x04\x24"; - // ret - constexpr char INST_RET[] = "\xc3"; - ClearDataPage(); InitializeCodePage(codepage_no); @@ -329,10 +327,121 @@ void Executor::CreateSpeculativeTriggerTestrunCode(int codepage_no, } AddSerializeInstructionToCodePage(codepage_no); +#if defined(RISCV) + // + // RISC-V Spectre-RSB implementation + // + // Pattern: + // jal ra, _retp ; pushes return address to RSB, jumps to _retp + // ; executed speculatively (RSB predicts return here) + // j _end ; speculative jump to end (use 4-byte JAL x0) + // _retp: + // auipc ra, 0 ; load PC into ra + // addi ra, ra, ; add offset to reach _end + // ret ; mispredicted return (RSB says go after jal) + // _end: + // + // + + // Instruction sizes: + // JAL rd, offset: 4 bytes + // AUIPC rd, imm: 4 bytes + // ADDI rd, rs1, imm: 4 bytes + // C.JR ra (ret): 2 bytes + + constexpr size_t JAL_SIZE = 4; + constexpr size_t AUIPC_SIZE = 4; + constexpr size_t ADDI_SIZE = 4; + constexpr size_t RET_SIZE = 2; + + // JAL offset: from JAL instruction to _retp + // _retp is after: JAL + trigger_sequence + J_END (another JAL x0) + size_t trigger_size = trigger_sequence.size(); + int32_t jal_offset = static_cast(JAL_SIZE + trigger_size + JAL_SIZE); + + // J_END offset: from J_END instruction to _end + // _end is after: AUIPC + ADDI + RET + int32_t j_end_offset = static_cast(AUIPC_SIZE + ADDI_SIZE + RET_SIZE); + + // ADDI offset: from AUIPC instruction to _end + // _end is after: ADDI + RET + int32_t addi_offset = static_cast(ADDI_SIZE + RET_SIZE); + + // Encode JAL rd, offset + // JAL format: imm[20|10:1|11|19:12] rd[4:0] opcode[6:0] + // opcode = 0x6f + auto encode_jal = [](int rd, int32_t imm) -> uint32_t { + uint32_t imm20 = (imm >> 20) & 0x1; + uint32_t imm10_1 = (imm >> 1) & 0x3ff; + uint32_t imm11 = (imm >> 11) & 0x1; + uint32_t imm19_12 = (imm >> 12) & 0xff; + return (imm20 << 31) | (imm10_1 << 21) | (imm11 << 20) | + (imm19_12 << 12) | (rd << 7) | 0x6f; + }; + + // Encode ADDI rd, rs1, imm + // ADDI format: imm[11:0] rs1[4:0] funct3[2:0] rd[4:0] opcode[6:0] + // funct3 = 0, opcode = 0x13 + auto encode_addi = [](int rd, int rs1, int32_t imm) -> uint32_t { + return ((imm & 0xfff) << 20) | (rs1 << 15) | (0 << 12) | (rd << 7) | 0x13; + }; + + // Helper to convert uint32_t to byte array (little endian) + auto uint32_to_bytes = [](uint32_t val) -> byte_array { + byte_array bytes; + bytes.push_back(std::byte{static_cast(val & 0xff)}); + bytes.push_back(std::byte{static_cast((val >> 8) & 0xff)}); + bytes.push_back(std::byte{static_cast((val >> 16) & 0xff)}); + bytes.push_back(std::byte{static_cast((val >> 24) & 0xff)}); + return bytes; + }; + + // Generate the instructions + uint32_t jal_ra_instr = encode_jal(1, jal_offset); // jal ra, _retp (rd=1 is ra) + uint32_t jal_x0_instr = encode_jal(0, j_end_offset); // j _end (jal x0, offset) + uint32_t addi_instr = encode_addi(1, 1, addi_offset); // addi ra, ra, offset + + // AUIPC ra, 0 = 0x00000097 + constexpr char INST_AUIPC_RA_0[] = "\x97\x00\x00\x00"; + + // RET (c.jr ra) = 0x8082 + constexpr char INST_RET_RISCV[] = "\x82\x80"; + + // Emit JAL ra, _retp + AddInstructionToCodePage(codepage_no, uint32_to_bytes(jal_ra_instr)); + + // Speculation starts here - RSB predicts return to after JAL + AddInstructionToCodePage(codepage_no, trigger_sequence); + + // J to _end (jal x0, offset) - still speculative + AddInstructionToCodePage(codepage_no, uint32_to_bytes(jal_x0_instr)); + + // _retp: Target of JAL + // AUIPC ra, 0 + AddInstructionToCodePage(codepage_no, INST_AUIPC_RA_0, 4); + + // ADDI ra, ra, offset + AddInstructionToCodePage(codepage_no, uint32_to_bytes(addi_instr)); + + // RET (mispredicted by RSB) + AddInstructionToCodePage(codepage_no, INST_RET_RISCV, 2); + // _end: Both speculation and architectural paths meet here + +#else // - // use spectre-RSB to speculatively execute the trigger + // x86-64 Spectre-RSB implementation // + // call rel32 + constexpr char INST_RELATIVE_CALL[] = "\xe8\xff\xff\xff\xff"; + // jmp rel32 + constexpr char INST_RELATIVE_JMP[] = "\xe9\xff\xff\xff\xff"; + // lea rax, [rip + offset] + constexpr char INST_LEA_RAX_DEREF_RIP_PLUS_OFFSET[] = "\x48\x8d\x05\xff\xff\xff\xff"; + // mov [rsp], rax + constexpr char INST_MOV_DEREF_RSP_RAX[] = "\x48\x89\x04\x24"; + // ret + constexpr char INST_RET[] = "\xc3"; // note that for all following calculations sizeof has the additional '\0', hence the - 1 // we use this to generate a call which can be misprecided; target is behind the speculated code @@ -379,6 +488,8 @@ void Executor::CreateSpeculativeTriggerTestrunCode(int codepage_no, AddInstructionToCodePage(codepage_no, INST_RET, 1); // target of LEA_RIP_DISPLACEMENT (manipulated RET) and JMP_DISPLACEMENT +#endif + // serialize after trigger //page_idx = AddSerializeInstructionToCodePage(page_idx, codepage_no); @@ -406,15 +517,29 @@ void Executor::ClearDataPage() { } void Executor::InitializeCodePage(int codepage_no) { + assert(codepage_no < static_cast(execution_code_pages_.size())); + +#if defined(RISCV) + // RISC-V: c.nop is 2 bytes (0x0001), but we use 4-byte nop for alignment + // Fill with 4-byte NOPs (addi x0, x0, 0 = 0x00000013) + uint32_t* page = reinterpret_cast(execution_code_pages_[codepage_no]); + constexpr uint32_t INST_NOP_4BYTE = 0x00000013; // addi x0, x0, 0 + for (size_t i = 0; i < kPagesize / 4; i++) { + page[i] = INST_NOP_4BYTE; + } + // Add RET (c.jr ra = 0x8082) as last instruction + // Place it at the end, 2-byte aligned + constexpr uint16_t INST_RET = 0x8082; // c.jr ra (compressed ret) + uint16_t* ret_loc = reinterpret_cast(execution_code_pages_[codepage_no] + kPagesize - 2); + *ret_loc = INST_RET; +#else constexpr char INST_RET = '\xc3'; constexpr char INST_NOP = '\x90'; - - assert(codepage_no < static_cast(execution_code_pages_.size())); memset(execution_code_pages_[codepage_no], INST_NOP, kPagesize); - // add RET as last instruction (even though AddEpilog adds a RET it could happen that a // jump skips it) execution_code_pages_[codepage_no][kPagesize - 1] = INST_RET; +#endif // reset index to write code_pages_last_written_index_[codepage_no] = 0; @@ -422,6 +547,59 @@ void Executor::InitializeCodePage(int codepage_no) { void Executor::AddProlog(int codepage_no) { // NOTE: everything in this function must be mirrored by AddEpilog + +#if defined(RISCV) + // RISC-V prolog for C910 + // Save only registers we actually use: ra, s0 (frame pointer) + // Timing uses t3/t4 (caller-saved, no need to save) + // Generated instructions only use caller-saved registers (a0-a3, t0-t2) + // Stack frame: 16 bytes (2 regs * 8 bytes) + + // addi sp, sp, -16 + constexpr char INST_ADDI_SP_SP_MINUS16[] = "\x13\x01\x01\xff"; // addi sp, sp, -16 + // sd ra, 0(sp) + constexpr char INST_SD_RA_0_SP[] = "\x06\xe0"; + // sd s0, 8(sp) + constexpr char INST_SD_S0_8_SP[] = "\x22\xe4"; + // mv s0, sp (save frame pointer) + constexpr char INST_MV_S0_SP[] = "\x0a\x84"; + + // Allocate stack frame + AddInstructionToCodePage(codepage_no, INST_ADDI_SP_SP_MINUS16, 4); + + // Save callee-saved registers we use + AddInstructionToCodePage(codepage_no, INST_SD_RA_0_SP, 2); + AddInstructionToCodePage(codepage_no, INST_SD_S0_8_SP, 2); + + // Save frame pointer + AddInstructionToCodePage(codepage_no, INST_MV_S0_SP, 2); + + // Create additional stack space for instruction testing (2048 bytes max with addi) + // addi sp, sp, -2048 + constexpr char INST_ADDI_SP_SP_MINUS2048[] = "\x13\x01\x01\x80"; + AddInstructionToCodePage(codepage_no, INST_ADDI_SP_SP_MINUS2048, 4); + + // Initialize registers a0-a3 and t0-t2 to point to memory locations (0x13370000) + // lui loads upper 20 bits, so lui rd, 0x13370 gives rd = 0x13370000 + // This matches kMemoryBegin and allows load/store/atomic instructions to work + constexpr char INST_LUI_A0_0x13370[] = "\x37\x05\x37\x13"; // lui a0, 0x13370 + constexpr char INST_LUI_A1_0x13370[] = "\xb7\x05\x37\x13"; // lui a1, 0x13370 + constexpr char INST_LUI_A2_0x13370[] = "\x37\x06\x37\x13"; // lui a2, 0x13370 + constexpr char INST_LUI_A3_0x13370[] = "\xb7\x06\x37\x13"; // lui a3, 0x13370 + constexpr char INST_LUI_T0_0x13370[] = "\xb7\x02\x37\x13"; // lui t0, 0x13370 + constexpr char INST_LUI_T1_0x13370[] = "\x37\x03\x37\x13"; // lui t1, 0x13370 + constexpr char INST_LUI_T2_0x13370[] = "\xb7\x03\x37\x13"; // lui t2, 0x13370 + + AddInstructionToCodePage(codepage_no, INST_LUI_A0_0x13370, 4); + AddInstructionToCodePage(codepage_no, INST_LUI_A1_0x13370, 4); + AddInstructionToCodePage(codepage_no, INST_LUI_A2_0x13370, 4); + AddInstructionToCodePage(codepage_no, INST_LUI_A3_0x13370, 4); + AddInstructionToCodePage(codepage_no, INST_LUI_T0_0x13370, 4); + AddInstructionToCodePage(codepage_no, INST_LUI_T1_0x13370, 4); + AddInstructionToCodePage(codepage_no, INST_LUI_T2_0x13370, 4); + +#else + // x86-64 prolog constexpr char INST_PUSH_RBX_RSP_RBP[] = "\x53\x54\x55"; constexpr char INST_PUSH_R12_R13_R14_R15[] = "\x41\x54\x41\x55\x41\x56\x41\x57"; constexpr char INST_SUB_RSP_0x8[] = "\x48\x83\xec\x08"; @@ -484,10 +662,44 @@ void Executor::AddProlog(int codepage_no) { AddInstructionToCodePage(codepage_no, encoded_immediate); AddInstructionToCodePage(codepage_no, INST_MOVQ_XMM0_R8, 5); +#endif } void Executor::AddEpilog(int codepage_no) { // NOTE: everything in this function must be mirrored by AddProlog + +#if defined(RISCV) + // RISC-V epilog for C910 + // Restore only registers we saved: ra, s0 + // Timing uses t3/t4 (caller-saved, no need to restore) + // mv sp, s0 + constexpr char INST_MV_SP_S0[] = "\x22\x81"; + + // ld ra, 0(sp) + constexpr char INST_LD_RA_0_SP[] = "\x82\x60"; + // ld s0, 8(sp) + constexpr char INST_LD_S0_8_SP[] = "\x22\x64"; + + // addi sp, sp, 16 (deallocate stack frame) + constexpr char INST_ADDI_SP_SP_16[] = "\x13\x01\x01\x01"; // addi sp, sp, 16 + // ret (c.jr ra) + constexpr char INST_RET[] = "\x82\x80"; + + // Restore stack pointer from frame pointer + AddInstructionToCodePage(codepage_no, INST_MV_SP_S0, 2); + + // Restore callee-saved registers + AddInstructionToCodePage(codepage_no, INST_LD_RA_0_SP, 2); + AddInstructionToCodePage(codepage_no, INST_LD_S0_8_SP, 2); + + // Deallocate stack frame + AddInstructionToCodePage(codepage_no, INST_ADDI_SP_SP_16, 4); + + // Return + AddInstructionToCodePage(codepage_no, INST_RET, 2); + +#else + // x86-64 epilog constexpr char INST_CLD[] = "\xfc"; constexpr char INST_POP_R15_R14_R13_R12[] = "\x41\x5f\x41\x5e\x41\x5d\x41\x5c"; constexpr char INST_POP_RBP_RSP_RBX[] = "\x5d\x5c\x5b"; @@ -516,15 +728,39 @@ void Executor::AddEpilog(int codepage_no) { // insert return AddInstructionToCodePage(codepage_no, INST_RET, 1); +#endif } void Executor::AddSerializeInstructionToCodePage(int codepage_no) { - // insert CPUID to serialize instruction stream +#if defined(RISCV) + // RISC-V: use fence to serialize instruction stream + // fence iorw, iorw + constexpr char INST_FENCE_IORW[] = "\x0f\x00\xf0\x0f"; + AddInstructionToCodePage(codepage_no, INST_FENCE_IORW, 4); +#else + // x86: insert CPUID to serialize instruction stream constexpr char INST_XOR_EAX_EAX_CPUID[] = "\x31\xc0\x0f\xa2"; AddInstructionToCodePage(codepage_no, INST_XOR_EAX_EAX_CPUID, 4); +#endif } void Executor::AddTimerStartToCodePage(int codepage_no) { +#if defined(RISCV) + // RISC-V: fence + rdcycle for timing + // Use t3/t4 for timing - these are caller-saved registers (not used by tested instructions) + // t3 = x28, t4 = x29 + // fence rw, rw + constexpr char INST_FENCE[] = "\x0f\x00\x30\x03"; + // rdcycle t3 (save start time in t3) + // rdcycle is csrrs rd, cycle, x0; for t3 (x28) it's 0xc0002e73 + constexpr char INST_RDCYCLE_T3[] = "\x73\x2e\x00\xc0"; + + AddInstructionToCodePage(codepage_no, INST_FENCE, 4); + AddInstructionToCodePage(codepage_no, INST_RDCYCLE_T3, 4); + AddInstructionToCodePage(codepage_no, INST_FENCE, 4); + +#else + // x86: mfence + cpuid + rdtsc/rdpru constexpr char INST_MFENCE[] = "\x0f\xae\xf0"; constexpr char INST_XOR_EAX_EAX_CPUID[] = "\x31\xc0\x0f\xa2"; // note that we can use R10 as it is caller-saved @@ -541,10 +777,34 @@ void Executor::AddTimerStartToCodePage(int codepage_no) { AddInstructionToCodePage(codepage_no, INST_MOV_ECX_1_RDPRU, 8); #endif // move result to R10 s.t. we can use it later in AddTimerEndToCodePage - AddInstructionToCodePage(codepage_no, INST_MOV_R10_RAX,3); + AddInstructionToCodePage(codepage_no, INST_MOV_R10_RAX, 3); +#endif } void Executor::AddTimerEndToCodePage(int codepage_no) { +#if defined(RISCV) + // RISC-V: fence + rdcycle + sub for timing + // Use t3/t4 for timing - these are caller-saved registers (not used by tested instructions) + // t3 = x28, t4 = x29 + // fence rw, rw + constexpr char INST_FENCE[] = "\x0f\x00\x30\x03"; + // rdcycle t4 (get end time in t4) + // rdcycle is csrrs rd, cycle, x0; for t4 (x29) it's 0xc0002ef3 + constexpr char INST_RDCYCLE_T4[] = "\xf3\x2e\x00\xc0"; + // sub a0, t4, t3 (a0 = t4 - t3 = elapsed cycles) + // sub rd, rs1, rs2: opcode=0x33, funct3=0, funct7=0x20 + // rd=a0(10), rs1=t4(29), rs2=t3(28) + // encoding: 0x41ce8533 + constexpr char INST_SUB_A0_T4_T3[] = "\x33\x85\xce\x41"; + + AddInstructionToCodePage(codepage_no, INST_FENCE, 4); + AddInstructionToCodePage(codepage_no, INST_RDCYCLE_T4, 4); + AddInstructionToCodePage(codepage_no, INST_FENCE, 4); + // Calculate elapsed time: a0 = t4 - t3 + AddInstructionToCodePage(codepage_no, INST_SUB_A0_T4_T3, 4); + +#else + // x86: rdtscp/rdpru + sub constexpr char INST_XOR_EAX_EAX_CPUID[] = "\x31\xc0\x0f\xa2"; constexpr char INST_SUB_RAX_R10[] = "\x4c\x29\xd0"; // note that we can use R11 as it is caller-saved @@ -564,6 +824,7 @@ void Executor::AddTimerEndToCodePage(int codepage_no) { AddInstructionToCodePage(codepage_no, INST_SUB_RAX_R10, 3); AddInstructionToCodePage(codepage_no, INST_MOV_R11_RAX, 3); AddInstructionToCodePage(codepage_no, INST_XOR_EAX_EAX_CPUID, 4); +#endif } void Executor::AddInstructionToCodePage(int codepage_no, @@ -607,19 +868,42 @@ void Executor::AddInstructionToCodePage(int codepage_no, } void Executor::MakeTimerResultReturnValue(int codepage_no) { +#if defined(RISCV) + // RISC-V: Result is already in a0 from AddTimerEndToCodePage (sub a0, t1, t0) + // Nothing to do - a0 is the return value register in RISC-V calling convention + (void)codepage_no; // Suppress unused parameter warning +#else + // x86: Move result from R11 to RAX (return value register) constexpr char MOV_RAX_R11[] = "\x4c\x89\xd8"; assert(code_pages_last_written_index_[codepage_no] + 3 < kPagesize); AddInstructionToCodePage(codepage_no, MOV_RAX_R11, 3); +#endif } byte_array Executor::CreateSequenceOfNOPs(size_t length) { - constexpr auto INST_NOP_AS_DECIMAL = static_cast(0x90); byte_array nops; + +#if defined(RISCV) + // RISC-V: c.nop is 2 bytes (0x01, 0x00 in little-endian) + // Create NOPs to match or exceed the requested length + // Using 4-byte NOP (addi x0, x0, 0 = 0x00000013) for better alignment + constexpr unsigned char INST_NOP_4BYTE[] = {0x13, 0x00, 0x00, 0x00}; + size_t nops_needed = (length + 3) / 4; // Round up to 4-byte boundary + for (size_t i = 0; i < nops_needed; i++) { + for (size_t j = 0; j < 4; j++) { + nops.push_back(std::byte{INST_NOP_4BYTE[j]}); + } + } +#else + // x86: NOP is 1 byte (0x90) + constexpr auto INST_NOP_AS_DECIMAL = static_cast(0x90); std::byte nop_byte{INST_NOP_AS_DECIMAL}; for (size_t i = 0; i < length; i++) { nops.push_back(nop_byte); } +#endif + return nops; } @@ -633,6 +917,7 @@ static int sigsegv_no = 0; static int sigfpe_no = 0; static int sigill_no = 0; static int sigtrap_no = 0; +static int sigbus_no = 0; void Executor::PrintFaultCount() { std::cout << "=== Faultcounters of Executor ===" << std::endl @@ -640,6 +925,7 @@ void Executor::PrintFaultCount() { << "\tSIGFPE: " << sigfpe_no << std::endl << "\tSIGILL: " << sigill_no << std::endl << "\tSIGTRAP: " << sigtrap_no << std::endl + << "\tSIGBUS: " << sigbus_no << std::endl << "=================================" << std::endl; } @@ -655,6 +941,8 @@ void Executor::FaultHandler(int sig) { break; case SIGTRAP:sigtrap_no++; break; + case SIGBUS:sigbus_no++; + break; default:std::abort(); } @@ -685,13 +973,22 @@ int Executor::ExecuteCodePage(void* codepage, uint64_t* cycles_elapsed) { #if DEBUGMODE == 1 // list of signals that we catch and throw as errors // (without DEBUGMODE the array is defined in the error case) +#if defined(RISCV) + std::array signals_to_handle = {SIGSEGV, SIGILL, SIGFPE, SIGTRAP, SIGBUS}; +#else std::array signals_to_handle = {SIGSEGV, SIGILL, SIGFPE, SIGTRAP}; +#endif // register fault handler (if not in debugmode we do this in constructor/destructor as // this has a huge impact on the runtime) RegisterFaultHandler(signals_to_handle); #endif if (!setjmp(fault_handler_jump_buf)) { +#if defined(RISCV) + // RISC-V requires fence.i to synchronize instruction cache after modifying code + // Without this, the CPU may execute stale instructions from I-cache + asm volatile("fence.i" ::: "memory"); +#endif // jump to codepage uint64_t cycle_diff = ((uint64_t(*)()) codepage)(); // set return argument @@ -711,7 +1008,11 @@ int Executor::ExecuteCodePage(void* codepage, uint64_t* cycles_elapsed) { #if DEBUGMODE == 0 // only allocate the array in case of an error to safe execution time // list of signals that we catch and throw as errors +#if defined(RISCV) + std::array signals_to_handle = {SIGSEGV, SIGILL, SIGFPE, SIGTRAP, SIGBUS}; +#else std::array signals_to_handle = {SIGSEGV, SIGILL, SIGFPE, SIGTRAP}; +#endif #endif sigset_t signal_set; sigemptyset(&signal_set); diff --git a/src/executor.h b/src/executor.h index e79beab..9ff0f04 100644 --- a/src/executor.h +++ b/src/executor.h @@ -16,6 +16,7 @@ #ifndef OSIRIS_SRC_EXECUTOR_H_ #define OSIRIS_SRC_EXECUTOR_H_ +#include #include #include "code_generator.h" diff --git a/src/filter.cc b/src/filter.cc index eaa4bcc..d904f94 100644 --- a/src/filter.cc +++ b/src/filter.cc @@ -149,6 +149,7 @@ bool ResultFilter::FilterFunctionMeasurementTriggerExtensionPairs(int64_t line_n bool ResultFilter::FilterFunctionRemoveCacheResetSequence([[maybe_unused]] int64_t line_no, const ResultLineData& result_line_data) { + // x86 cache flush instructions if (result_line_data.reset_sequence.find("CLFLUSH") != std::string::npos) { return true; } @@ -159,42 +160,48 @@ bool ResultFilter::FilterFunctionRemoveCacheResetSequence([[maybe_unused]] int64 if (result_line_data.reset_sequence.find("MASKMOV") != std::string::npos) { return true; } - return false; -} - -bool ResultFilter::FilterFunctionRemoveAllCacheSequences([[maybe_unused]] int64_t line_no, - const ResultLineData& result_line_data) { - - if (result_line_data.measurement_sequence.find("CLFLUSH") != std::string::npos) { - return true; - } - if (result_line_data.measurement_sequence.find("MOV") != std::string::npos && - result_line_data.measurement_sequence.find("NT") != std::string::npos) { + // RISC-V T-Head C910 cache flush instructions + if (result_line_data.reset_sequence.find("DCACHE") != std::string::npos) { return true; } - if (result_line_data.measurement_sequence.find("MASKMOV") != std::string::npos) { + if (result_line_data.reset_sequence.find("ICACHE") != std::string::npos) { return true; } + return false; +} - if (result_line_data.trigger_sequence.find("CLFLUSH") != std::string::npos) { - return true; - } - if (result_line_data.trigger_sequence.find("MOV") != std::string::npos && - result_line_data.trigger_sequence.find("NT") != std::string::npos) { - return true; - } - if (result_line_data.trigger_sequence.find("MASKMOV") != std::string::npos) { - return true; - } +bool ResultFilter::FilterFunctionRemoveAllCacheSequences([[maybe_unused]] int64_t line_no, + const ResultLineData& result_line_data) { + // Helper lambda to check if a sequence contains cache-related instructions + auto containsCacheInstruction = [](const std::string& seq) -> bool { + // x86 cache flush instructions + if (seq.find("CLFLUSH") != std::string::npos) { + return true; + } + if (seq.find("MOV") != std::string::npos && + seq.find("NT") != std::string::npos) { + return true; + } + if (seq.find("MASKMOV") != std::string::npos) { + return true; + } + // RISC-V T-Head C910 cache flush instructions + if (seq.find("DCACHE") != std::string::npos) { + return true; + } + if (seq.find("ICACHE") != std::string::npos) { + return true; + } + return false; + }; - if (result_line_data.reset_sequence.find("CLFLUSH") != std::string::npos) { + if (containsCacheInstruction(result_line_data.measurement_sequence)) { return true; } - if (result_line_data.reset_sequence.find("MOV") != std::string::npos && - result_line_data.reset_sequence.find("NT") != std::string::npos) { + if (containsCacheInstruction(result_line_data.trigger_sequence)) { return true; } - if (result_line_data.reset_sequence.find("MASKMOV") != std::string::npos) { + if (containsCacheInstruction(result_line_data.reset_sequence)) { return true; } return false; diff --git a/src/osiris.cc b/src/osiris.cc index 65ce3c5..1ddb534 100644 --- a/src/osiris.cc +++ b/src/osiris.cc @@ -31,8 +31,13 @@ // // Constants // +#if defined(RISCV) +const std::string kInstructionFile("../riscv-instructions/instructions.b64"); +const std::string kInstructionFileCleaned("../riscv-instructions/instructions_cleaned.b64"); +#else const std::string kInstructionFile("../x86-instructions/instructions.b64"); const std::string kInstructionFileCleaned("../x86-instructions/instructions_cleaned.b64"); +#endif const std::string kOutputCSVNoAssumptions("./measure_trigger_pairs.csv"); @@ -43,15 +48,11 @@ const std::string kOutputFolderFormattedTriggerEqualsMeasurement("./triggerpairs // // Validate Target Architecture Macros // -#ifdef INTEL - #ifdef AMD +#if defined(INTEL) + defined(AMD) + defined(RISCV) > 1 static_assert(false, "Multiple target architectures defined! Aborting!"); - #endif #endif -#ifndef INTEL - #ifndef AMD +#if !defined(INTEL) && !defined(AMD) && !defined(RISCV) static_assert(false, "No target architecture defined! Aborting!"); - #endif #endif @@ -112,9 +113,9 @@ void ConfirmResultsOfFuzzer(const std::string& input_file, const std::string& ou uint64_t trigger_uid = std::get<1>(elem); uint64_t reset_uid = std::get<2>(elem); line = std::get<3>(elem); - osiris::x86Instruction measurement = code_generator.CreateInstructionFromUID(measurement_uid); - osiris::x86Instruction trigger = code_generator.CreateInstructionFromUID(trigger_uid); - osiris::x86Instruction reset = code_generator.CreateInstructionFromUID(reset_uid); + osiris::Instruction measurement = code_generator.CreateInstructionFromUID(measurement_uid); + osiris::Instruction trigger = code_generator.CreateInstructionFromUID(trigger_uid); + osiris::Instruction reset = code_generator.CreateInstructionFromUID(reset_uid); if (trigger.assembly_code == "busy-sleep" || measurement.assembly_code == "busy-sleep") { // the sleep is only a valid reset sequence @@ -252,6 +253,8 @@ int main(int argc, char* argv[]) { LOG_DEBUG("Osiris was compiled for Intel"); #elif defined(AMD) LOG_DEBUG("Osiris was compiled for AMD"); +#elif defined(RISCV) + LOG_DEBUG("Osiris was compiled for RISC-V"); #endif } else { osiris::SetLogLevel(osiris::INFO); diff --git a/src/utils.cc b/src/utils.cc index 7db1ad3..75c1796 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -192,6 +192,9 @@ std::string base64_encode(const byte_array& bytes_to_encode) { return ret; } +// Suppress deprecation warnings for legacy OpenSSL SHA256 API +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" std::string CalculateFileHashSHA256(const std::string& filename) { std::ifstream file_stream(filename); if (!file_stream.is_open()) { @@ -225,5 +228,6 @@ std::string CalculateFileHashSHA256(const std::string& filename) { return ss.str(); } +#pragma GCC diagnostic pop } // namespace osiris diff --git a/src/utils.h b/src/utils.h index dd59be2..1d1920d 100644 --- a/src/utils.h +++ b/src/utils.h @@ -17,6 +17,7 @@ #define OSIRIS_SRC_UTILS_H_ #include +#include #include #include