From 286ac86ebac1a867b6a70372b92660b1c8bb8d48 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Mon, 15 Jan 2024 11:09:24 +0100 Subject: [PATCH 1/5] Re-create minimal library using unwinding tables --- CMakeLists.txt | 12 + include/async-profiler/arch.h | 149 ++++ include/async-profiler/codeCache.h | 161 +++++ include/async-profiler/dwarf.h | 153 ++++ include/async-profiler/elf_helpers.h | 36 + include/async-profiler/mutex.h | 55 ++ include/async-profiler/os.h | 31 + include/async-profiler/safeAccess.h | 35 + include/async-profiler/stackFrame.h | 69 ++ include/async-profiler/stackWalker.h | 34 + include/async-profiler/stack_context.h | 48 ++ include/async-profiler/symbols.h | 37 + include/perf_archmap.hpp | 4 +- src/async-profiler/codeCache.cpp | 236 +++++++ src/async-profiler/dwarf.cpp | 396 +++++++++++ src/async-profiler/elf_helpers.cpp | 245 +++++++ src/async-profiler/mutex.cpp | 38 + src/async-profiler/os.cpp | 6 + src/async-profiler/safeAccess.cpp | 79 +++ src/async-profiler/stackFrame_aarch64.cpp | 112 +++ src/async-profiler/stackFrame_arm.cpp | 101 +++ src/async-profiler/stackFrame_i386.cpp | 106 +++ src/async-profiler/stackFrame_ppc64.cpp | 136 ++++ src/async-profiler/stackFrame_x64.cpp | 54 ++ src/async-profiler/stackWalker.cpp | 180 +++++ src/async-profiler/stack_context.cpp | 17 + src/async-profiler/symbols_linux.cpp | 811 ++++++++++++++++++++++ test/CMakeLists.txt | 16 + 28 files changed, 3355 insertions(+), 2 deletions(-) create mode 100644 include/async-profiler/arch.h create mode 100644 include/async-profiler/codeCache.h create mode 100644 include/async-profiler/dwarf.h create mode 100644 include/async-profiler/elf_helpers.h create mode 100644 include/async-profiler/mutex.h create mode 100644 include/async-profiler/os.h create mode 100644 include/async-profiler/safeAccess.h create mode 100644 include/async-profiler/stackFrame.h create mode 100644 include/async-profiler/stackWalker.h create mode 100644 include/async-profiler/stack_context.h create mode 100644 include/async-profiler/symbols.h create mode 100644 src/async-profiler/codeCache.cpp create mode 100644 src/async-profiler/dwarf.cpp create mode 100644 src/async-profiler/elf_helpers.cpp create mode 100644 src/async-profiler/mutex.cpp create mode 100644 src/async-profiler/os.cpp create mode 100644 src/async-profiler/safeAccess.cpp create mode 100644 src/async-profiler/stackFrame_aarch64.cpp create mode 100644 src/async-profiler/stackFrame_arm.cpp create mode 100644 src/async-profiler/stackFrame_i386.cpp create mode 100644 src/async-profiler/stackFrame_ppc64.cpp create mode 100644 src/async-profiler/stackFrame_x64.cpp create mode 100644 src/async-profiler/stackWalker.cpp create mode 100644 src/async-profiler/stack_context.cpp create mode 100644 src/async-profiler/symbols_linux.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 66399582e..d31c68e31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,18 @@ add_subdirectory(src/event_parser) # elfutils include(Findelfutils) +# -- Async profiler -- +set(ASYNC_PROFILER_LIB_DIR ${CMAKE_SOURCE_DIR}) +set(ASYNC_PROFILER_SRC_DIR ${ASYNC_PROFILER_LIB_DIR}/src/async-profiler) +set(ASYNC_PROFILER_LIB_INCLUDE ${ASYNC_PROFILER_LIB_DIR}/include/async-profiler) +aux_source_directory(${ASYNC_PROFILER_SRC_DIR} ASYNC_PROFILER_SOURCES) +add_library(async_prof_lib STATIC ${ASYNC_PROFILER_SOURCES}) +target_include_directories(async_prof_lib PUBLIC ${ASYNC_PROFILER_LIB_INCLUDE} + ${CMAKE_SOURCE_DIR}/include) +target_link_libraries(async_prof_lib PRIVATE dw elf Threads::Threads) +set_property(TARGET async_prof_lib PROPERTY POSITION_INDEPENDENT_CODE ON) +add_library(DDProf::AsyncProf ALIAS async_prof_lib) + # ---- Static analysis ---- include(ClangTidy) include(Format) diff --git a/include/async-profiler/arch.h b/include/async-profiler/arch.h new file mode 100644 index 000000000..94a2397e7 --- /dev/null +++ b/include/async-profiler/arch.h @@ -0,0 +1,149 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ARCH_H +#define _ARCH_H + +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; + +static inline u64 atomicInc(volatile u64 &var, u64 increment = 1) { + return __sync_fetch_and_add(&var, increment); +} + +static inline int atomicInc(volatile int &var, int increment = 1) { + return __sync_fetch_and_add(&var, increment); +} + +static inline u64 loadAcquire(u64 &var) { + return __atomic_load_n(&var, __ATOMIC_ACQUIRE); +} + +static inline void storeRelease(u64 &var, u64 value) { + return __atomic_store_n(&var, value, __ATOMIC_RELEASE); +} + +#if defined(__x86_64__) || defined(__i386__) + +typedef unsigned char instruction_t; +const instruction_t BREAKPOINT = 0xcc; +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = 2; +const int FRAME_PC_SLOT = 1; +const int ADJUST_RET = 1; +const int PLT_HEADER_SIZE = 16; +const int PLT_ENTRY_SIZE = 16; +const int PERF_REG_PC = 8; // PERF_REG_X86_IP + +# define spinPause() asm volatile("pause") +# define rmb() asm volatile("lfence" : : : "memory") +# define flushCache(addr) \ + asm volatile("mfence; clflush (%0); mfence" : : "r"(addr) : "memory") + +#elif defined(__arm__) || defined(__thumb__) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0xe7f001f0; +const instruction_t BREAKPOINT_THUMB = 0xde01de01; +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 1; +const int ADJUST_RET = 0; +const int PLT_HEADER_SIZE = 20; +const int PLT_ENTRY_SIZE = 12; +const int PERF_REG_PC = 15; // PERF_REG_ARM_PC + +# define spinPause() asm volatile("yield") +# define rmb() asm volatile("dmb ish" : : : "memory") +# define flushCache(addr) \ + __builtin___clear_cache((char *)(addr), \ + (char *)(addr) + sizeof(instruction_t)) + +#elif defined(__aarch64__) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0xd4200000; +const int BREAKPOINT_OFFSET = 0; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 1; +const int ADJUST_RET = 0; +const int PLT_HEADER_SIZE = 32; +const int PLT_ENTRY_SIZE = 16; +const int PERF_REG_PC = 32; // PERF_REG_ARM64_PC + +# define spinPause() asm volatile("isb") +# define rmb() asm volatile("dmb ish" : : : "memory") +# define flushCache(addr) \ + __builtin___clear_cache((char *)(addr), \ + (char *)(addr) + sizeof(instruction_t)) + +#elif defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +typedef unsigned int instruction_t; +const instruction_t BREAKPOINT = 0x7fe00008; +// We place the break point in the third instruction slot on PPCLE as the first +// two are skipped if the call comes from within the same compilation unit +// according to the LE ABI. +const int BREAKPOINT_OFFSET = 8; + +const int SYSCALL_SIZE = sizeof(instruction_t); +const int FRAME_PC_SLOT = 2; +const int ADJUST_RET = 0; +const int PLT_HEADER_SIZE = 24; +const int PLT_ENTRY_SIZE = 24; +const int PERF_REG_PC = 32; // PERF_REG_POWERPC_NIP + +# define spinPause() \ + asm volatile("yield") // does nothing, but using or 1,1,1 would lead to + // other problems +# define rmb() \ + asm volatile("sync" \ + : \ + : \ + : "memory") // lwsync would do but better safe than sorry +# define flushCache(addr) \ + __builtin___clear_cache((char *)(addr), \ + (char *)(addr) + sizeof(instruction_t)) + +#else + +# error "Compiling on unsupported arch" + +#endif + +// Return address signing support. +// Apple M1 has 47 bit virtual addresses. +#if defined(__aarch64__) && defined(__APPLE__) +# define ADDRESS_BITS 47 +# define WX_MEMORY true +#else +# define WX_MEMORY false +#endif + +#ifdef ADDRESS_BITS +static inline const void *stripPointer(const void *p) { + return (const void *)((unsigned long)p & ((1UL << ADDRESS_BITS) - 1)); +} +#else +# define stripPointer(p) (p) +#endif + +#endif // _ARCH_H diff --git a/include/async-profiler/codeCache.h b/include/async-profiler/codeCache.h new file mode 100644 index 000000000..534996d9e --- /dev/null +++ b/include/async-profiler/codeCache.h @@ -0,0 +1,161 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _CODECACHE_H +#define _CODECACHE_H + +// #include + +#define NO_MIN_ADDRESS ((const void *)-1) +#define NO_MAX_ADDRESS ((const void *)0) + +typedef bool (*NamePredicate)(const char *name); + +const int INITIAL_CODE_CACHE_CAPACITY = 1000; +const int MAX_NATIVE_LIBS = 2048; + +class NativeFunc { +private: + short _lib_index; + char _mark; + char _reserved; + char _name[0]; + + static NativeFunc *from(const char *name) { + return (NativeFunc *)(name - sizeof(NativeFunc)); + } + +public: + static char *create(const char *name, short lib_index); + static void destroy(char *name); + + static short libIndex(const char *name) { return from(name)->_lib_index; } + + static bool isMarked(const char *name) { return from(name)->_mark != 0; } + + static void mark(const char *name) { from(name)->_mark = 1; } +}; + +class CodeBlob { +public: + const void *_start; + const void *_end; + char *_name; + + static int comparator(const void *c1, const void *c2) { + CodeBlob *cb1 = (CodeBlob *)c1; + CodeBlob *cb2 = (CodeBlob *)c2; + if (cb1->_start < cb2->_start) { + return -1; + } else if (cb1->_start > cb2->_start) { + return 1; + } else if (cb1->_end == cb2->_end) { + return 0; + } else { + return cb1->_end > cb2->_end ? -1 : 1; + } + } +}; + +class FrameDesc; + +class CodeCache { +protected: + char *_name; + short _lib_index; + const void *_min_address; + const void *_max_address; + const char *_text_base; + + void **_got_start; + void **_got_end; + bool _got_patchable; + + int _capacity; + int _count; + CodeBlob *_blobs; + + void expand(); + +public: + // todo fix hacky override for remote + FrameDesc *_dwarf_table; + int _dwarf_table_length; + + CodeCache(const char *name, short lib_index = -1, + const void *min_address = NO_MIN_ADDRESS, + const void *max_address = NO_MAX_ADDRESS); + + ~CodeCache(); + + const char *name() const { return _name; } + + const void *minAddress() const { return _min_address; } + + const void *maxAddress() const { return _max_address; } + + bool contains(const void *address) const { + return address >= _min_address && address < _max_address; + } + + void setTextBase(const char *text_base) { _text_base = text_base; } + + const char *getTextBase() { return _text_base; } + + void **gotStart() const { return _got_start; } + + void **gotEnd() const { return _got_end; } + + void add(const void *start, int length, const char *name, + bool update_bounds = false); + void updateBounds(const void *start, const void *end); + void sort(); + void mark(NamePredicate predicate); + + CodeBlob *find(const void *address); + const char *binarySearch(const void *address); + const void *findSymbol(const char *name); + const void *findSymbolByPrefix(const char *prefix); + const void *findSymbolByPrefix(const char *prefix, int prefix_len); + + void setGlobalOffsetTable(void **start, void **end, bool patchable); + void **findGlobalOffsetEntry(void *address); + void makeGotPatchable(); + + void setDwarfTable(FrameDesc *table, int length); + FrameDesc *findFrameDesc(const void *pc); +}; + +class CodeCacheArray { +private: + CodeCache *_libs[MAX_NATIVE_LIBS]; + int _count; + +public: + CodeCacheArray() : _count(0) {} + + CodeCache *operator[](int index) { return _libs[index]; } + + int count() { return __atomic_load_n(&_count, __ATOMIC_ACQUIRE); } + + void add(CodeCache *lib) { + int index = __atomic_load_n(&_count, __ATOMIC_ACQUIRE); + _libs[index] = lib; + __atomic_store_n(&_count, index + 1, __ATOMIC_RELEASE); + } +}; + +#endif // _CODECACHE_H diff --git a/include/async-profiler/dwarf.h b/include/async-profiler/dwarf.h new file mode 100644 index 000000000..a001500b9 --- /dev/null +++ b/include/async-profiler/dwarf.h @@ -0,0 +1,153 @@ +/* + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _DWARF_H +#define _DWARF_H + +#include "arch.h" +#include + +#if defined(__x86_64__) + +# define DWARF_SUPPORTED true + +const int DW_REG_FP = 6; +const int DW_REG_SP = 7; +const int DW_REG_PC = 16; + +#elif defined(__i386__) + +# define DWARF_SUPPORTED true + +const int DW_REG_FP = 5; +const int DW_REG_SP = 4; +const int DW_REG_PC = 8; + +#else + +# define DWARF_SUPPORTED false + +const int DW_REG_FP = 0; +const int DW_REG_SP = 1; +const int DW_REG_PC = 2; + +#endif + +const int DW_REG_PLT = 128; // denotes special rule for PLT entries +const int DW_REG_INVALID = 255; // denotes unsupported configuration + +const int DW_PC_OFFSET = 1; +const int DW_SAME_FP = 0x80000000; +const int DW_STACK_SLOT = sizeof(void *); + +struct FrameDesc { + u32 loc; + int cfa; + int fp_off; + + static FrameDesc default_frame; + + static int comparator(const void *p1, const void *p2) { + FrameDesc *fd1 = (FrameDesc *)p1; + FrameDesc *fd2 = (FrameDesc *)p2; + return (int)(fd1->loc - fd2->loc); + } +}; + +class DwarfParser { +private: + const char *_name; + const char *_image_base; + const char *_ptr; + + int _capacity; + int _count; + FrameDesc *_table; + FrameDesc *_prev; + + u32 _code_align; + int _data_align; + + const char *add(size_t size) { + const char *ptr = _ptr; + _ptr = ptr + size; + return ptr; + } + + u8 get8() { return *_ptr++; } + + u16 get16() { return *(u16 *)add(2); } + + u32 get32() { return *(u32 *)add(4); } + + u32 getLeb() { + u32 result = 0; + for (u32 shift = 0;; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + return result; + } + } + } + + int getSLeb() { + int result = 0; + for (u32 shift = 0;; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + if ((b & 0x40) != 0 && (shift += 7) < 32) { + result |= -1 << shift; + } + return result; + } + } + } + + void skipLeb() { + while (*_ptr++ & 0x80) {} + } + + const char *getPtr() { + const char *ptr = _ptr; + return ptr + *(int *)add(4); + } + + void parse(const char *eh_frame_hdr, u64 adjust_eh_frame); + void parseCie(); + void parseFde(); + void parseInstructions(u32 loc, const char *end); + int parseExpression(); + + void addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off); + FrameDesc *addRecordRaw(u32 loc, int cfa, int fp_off); + +public: + DwarfParser(const char *name, const char *image_base, + const char *eh_frame_hdr, u64 adjust_eh_frame = 0); + + // manual parse of fde + DwarfParser(const char *name, const char *image_base); + + void addFde(const char *fde, const char *cie); + + FrameDesc *table() const { return _table; } + + int count() const { return _count; } +}; + +#endif // _DWARF_H diff --git a/include/async-profiler/elf_helpers.h b/include/async-profiler/elf_helpers.h new file mode 100644 index 000000000..f428e44e2 --- /dev/null +++ b/include/async-profiler/elf_helpers.h @@ -0,0 +1,36 @@ +#pragma once + +#include "ddprof_defs.hpp" + +struct Elf; +using ddprof::Offset_t; +using ddprof::ElfAddress_t; + +struct SectionInfo { + const char *_data; + Offset_t _offset; + ElfAddress_t _vaddr_sec; +}; + +// To adjust addresses inside the eh_frame_hdr +// If we are in different segments, we should consider +// (vaddr_eh_frame - vaddr_eh_frame_hdr) +// + (offset_eh_frame - offset_eh_frame_hdr) +struct EhFrameInfo { + SectionInfo _eh_frame; + SectionInfo _eh_frame_hdr; +}; + +bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, + Offset_t &elf_offset, Offset_t &bias_offset, + Offset_t &text_base); + +const char *get_section_data(Elf *elf, const char *section_name, + Offset_t &elf_offset); + +bool get_section_info(Elf *elf, const char *section_name, + SectionInfo §ion_info); + +bool get_eh_frame_info(Elf *elf, EhFrameInfo &eh_frame_info); + +bool process_fdes(Elf *elf); diff --git a/include/async-profiler/mutex.h b/include/async-profiler/mutex.h new file mode 100644 index 000000000..b017bcd82 --- /dev/null +++ b/include/async-profiler/mutex.h @@ -0,0 +1,55 @@ +/* + * Copyright 2018 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _MUTEX_H +#define _MUTEX_H + +#include "arch.h" +#include + +class Mutex { +protected: + pthread_mutex_t _mutex; + +public: + Mutex(); + + void lock(); + void unlock(); +}; + +class WaitableMutex : public Mutex { +protected: + pthread_cond_t _cond; + +public: + WaitableMutex(); + + bool waitUntil(u64 wall_time); + void notify(); +}; + +class MutexLocker { +private: + Mutex *_mutex; + +public: + MutexLocker(Mutex &mutex) : _mutex(&mutex) { _mutex->lock(); } + + ~MutexLocker() { _mutex->unlock(); } +}; + +#endif // _MUTEX_H diff --git a/include/async-profiler/os.h b/include/async-profiler/os.h new file mode 100644 index 000000000..352cb1d0b --- /dev/null +++ b/include/async-profiler/os.h @@ -0,0 +1,31 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2018 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#ifndef _OS_H +#define _OS_H + +#include + +class OS { +public: + static const size_t page_size; + static const size_t page_mask; +}; + +#endif // _OS_H diff --git a/include/async-profiler/safeAccess.h b/include/async-profiler/safeAccess.h new file mode 100644 index 000000000..652e03d71 --- /dev/null +++ b/include/async-profiler/safeAccess.h @@ -0,0 +1,35 @@ +/* + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _SAFEACCESS_H +#define _SAFEACCESS_H + +#include "arch.h" +#include + +#ifdef __clang__ +# define NOINLINE __attribute__((noinline)) +#else +# define NOINLINE __attribute__((noinline, noclone)) +#endif + +namespace SafeAccess { + +NOINLINE __attribute__((aligned(16))) void *load(void **ptr); + +} + +#endif // _SAFEACCESS_H diff --git a/include/async-profiler/stackFrame.h b/include/async-profiler/stackFrame.h new file mode 100644 index 000000000..3beade264 --- /dev/null +++ b/include/async-profiler/stackFrame.h @@ -0,0 +1,69 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _STACKFRAME_H +#define _STACKFRAME_H + +#include "arch.h" +#include +#include + +class StackFrame { +private: + ucontext_t *_ucontext; + + static bool withinCurrentStack(uintptr_t address) { + // Check that the address is not too far from the stack pointer of current + // context + void *real_sp; + return address - (uintptr_t)&real_sp <= 0xffff; + } + +public: + StackFrame(void *ucontext) { _ucontext = (ucontext_t *)ucontext; } + + void restore(uintptr_t saved_pc, uintptr_t saved_sp, uintptr_t saved_fp) { + if (_ucontext != nullptr) { + pc() = saved_pc; + sp() = saved_sp; + fp() = saved_fp; + } + } + + uintptr_t stackAt(int slot) { return ((uintptr_t *)sp())[slot]; } + + uintptr_t &pc(); + uintptr_t &sp(); + uintptr_t &fp(); + + uintptr_t &retval(); + uintptr_t arg0(); + uintptr_t arg1(); + uintptr_t arg2(); + uintptr_t arg3(); + + void ret(); + + bool popStub(instruction_t *entry, const char *name); + bool popMethod(instruction_t *entry); + + bool checkInterruptedSyscall(); + + // Check if PC points to a syscall instruction + static bool isSyscall(instruction_t *pc); +}; + +#endif // _STACKFRAME_H diff --git a/include/async-profiler/stackWalker.h b/include/async-profiler/stackWalker.h new file mode 100644 index 000000000..60998482a --- /dev/null +++ b/include/async-profiler/stackWalker.h @@ -0,0 +1,34 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#ifndef _STACKWALKER_H +#define _STACKWALKER_H + +#include + +#include "codeCache.h" +#include "stack_context.h" + +CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void *address); + +int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, + const ap::StackBuffer &buffer, void const **callchain, + int max_depth, int skip); + +#endif // _STACKWALKER_H diff --git a/include/async-profiler/stack_context.h b/include/async-profiler/stack_context.h new file mode 100644 index 000000000..2ce2801b3 --- /dev/null +++ b/include/async-profiler/stack_context.h @@ -0,0 +1,48 @@ + + +#pragma once + +#include +#include + +#include "perf_archmap.hpp" + +namespace ap { +struct StackContext { + const void *pc; + uint64_t sp; + uint64_t fp; + + void set(const void *pc, uintptr_t sp, uintptr_t fp) { + this->pc = pc; + this->sp = sp; + this->fp = fp; + } +}; + +// Async profiler's unwinding only uses a subset of the registers +StackContext from_regs(std::span regs); + +struct StackBuffer { + StackBuffer(std::span bytes, uint64_t start, uint64_t end) + : _bytes(bytes), sp_start(start), sp_end(end) {} + std::span _bytes; + uint64_t sp_start; // initial SP (in context of the process) + uint64_t sp_end; // sp + size (so root functions = start of stack) + /* + sp_end + For this thread, high address matches where the stack begins + as it grows down. + | + Main() + | + FuncA() + | + ... + | + sp_start + This matches the SP register when the stack was captured + */ +}; + +} // namespace ap diff --git a/include/async-profiler/symbols.h b/include/async-profiler/symbols.h new file mode 100644 index 000000000..b06cc2b53 --- /dev/null +++ b/include/async-profiler/symbols.h @@ -0,0 +1,37 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _SYMBOLS_H +#define _SYMBOLS_H + +#include "codeCache.h" +#include "mutex.h" + +class Symbols { +private: + static Mutex _parse_lock; + static bool _have_kernel_symbols; + +public: + static void parseKernelSymbols(CodeCache *cc); + static void parseLibraries(CodeCacheArray *array, bool kernel_symbols); + static void parsePidLibraries(pid_t pid, CodeCacheArray *array, + bool kernel_symbols); + + static bool haveKernelSymbols() { return _have_kernel_symbols; } +}; + +#endif // _SYMBOLS_H diff --git a/include/perf_archmap.hpp b/include/perf_archmap.hpp index 7c0473c60..8c46a56a0 100644 --- a/include/perf_archmap.hpp +++ b/include/perf_archmap.hpp @@ -17,7 +17,7 @@ namespace ddprof { inline constexpr size_t k_perf_register_count = 20; inline constexpr uint64_t k_perf_register_mask = 0xff0fff; -# define REGNAME(x) PAM_X86_##x +# define REGNAME(x) ddprof::PAM_X86_##x enum PERF_ARCHMAP_X86 { PAM_X86_RAX, PAM_X86_RBX, @@ -55,7 +55,7 @@ inline constexpr size_t k_perf_register_count = 33; inline constexpr uint64_t k_perf_register_mask = ~(~0ULL << k_perf_register_count); -# define REGNAME(x) PAM_ARM_##x +# define REGNAME(x) ddprof::PAM_ARM_##x enum PERF_ARCHMAP_ARM { PAM_ARM_X0, PAM_ARM_X1, diff --git a/src/async-profiler/codeCache.cpp b/src/async-profiler/codeCache.cpp new file mode 100644 index 000000000..72b6d63ce --- /dev/null +++ b/src/async-profiler/codeCache.cpp @@ -0,0 +1,236 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2016 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#include "codeCache.h" +#include "dwarf.h" +#include "os.h" +#include +#include +#include +#include + +char *NativeFunc::create(const char *name, short lib_index) { + NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name)); + f->_lib_index = lib_index; + f->_mark = 0; + return strcpy(f->_name, name); +} + +void NativeFunc::destroy(char *name) { free(from(name)); } + +CodeCache::CodeCache(const char *name, short lib_index, const void *min_address, + const void *max_address) { + _name = NativeFunc::create(name, -1); + _lib_index = lib_index; + _min_address = min_address; + _max_address = max_address; + _text_base = NULL; + + _got_start = NULL; + _got_end = NULL; + _got_patchable = false; + + _dwarf_table = NULL; + _dwarf_table_length = 0; + + _capacity = INITIAL_CODE_CACHE_CAPACITY; + _count = 0; + _blobs = new CodeBlob[_capacity]; +} + +CodeCache::~CodeCache() { + for (int i = 0; i < _count; i++) { + NativeFunc::destroy(_blobs[i]._name); + } + NativeFunc::destroy(_name); + delete[] _blobs; + free(_dwarf_table); +} + +void CodeCache::expand() { + CodeBlob *old_blobs = _blobs; + CodeBlob *new_blobs = new CodeBlob[_capacity * 2]; + + memcpy(new_blobs, old_blobs, _count * sizeof(CodeBlob)); + + _capacity *= 2; + _blobs = new_blobs; + delete[] old_blobs; +} + +void CodeCache::add(const void *start, int length, const char *name, + bool update_bounds) { + char *name_copy = NativeFunc::create(name, _lib_index); + // Replace non-printable characters + for (char *s = name_copy; *s != 0; s++) { + if (*s < ' ') + *s = '?'; + } + + if (_count >= _capacity) { + expand(); + } + + const void *end = (const char *)start + length; + _blobs[_count]._start = start; + _blobs[_count]._end = end; + _blobs[_count]._name = name_copy; + _count++; + + if (update_bounds) { + updateBounds(start, end); + } +} + +void CodeCache::updateBounds(const void *start, const void *end) { + if (start < _min_address) + _min_address = start; + if (end > _max_address) + _max_address = end; +} + +void CodeCache::sort() { + if (_count == 0) + return; + + qsort(_blobs, _count, sizeof(CodeBlob), CodeBlob::comparator); + + if (_min_address == NO_MIN_ADDRESS) + _min_address = _blobs[0]._start; + if (_max_address == NO_MAX_ADDRESS) + _max_address = _blobs[_count - 1]._end; +} + +void CodeCache::mark(NamePredicate predicate) { + for (int i = 0; i < _count; i++) { + const char *blob_name = _blobs[i]._name; + if (blob_name != NULL && predicate(blob_name)) { + NativeFunc::mark(blob_name); + } + } +} + +CodeBlob *CodeCache::find(const void *address) { + for (int i = 0; i < _count; i++) { + if (address >= _blobs[i]._start && address < _blobs[i]._end) { + return &_blobs[i]; + } + } + return NULL; +} + +const char *CodeCache::binarySearch(const void *address) { + int low = 0; + int high = _count - 1; + + while (low <= high) { + int mid = (unsigned int)(low + high) >> 1; + if (_blobs[mid]._end <= address) { + low = mid + 1; + } else if (_blobs[mid]._start > address) { + high = mid - 1; + } else { + return _blobs[mid]._name; + } + } + + // Symbols with zero size can be valid functions: e.g. ASM entry points or + // kernel code. Also, in some cases (endless loop) the return address may + // point beyond the function. + if (low > 0 && + (_blobs[low - 1]._start == _blobs[low - 1]._end || + _blobs[low - 1]._end == address)) { + return _blobs[low - 1]._name; + } + return _name; +} + +const void *CodeCache::findSymbol(const char *name) { + for (int i = 0; i < _count; i++) { + const char *blob_name = _blobs[i]._name; + if (blob_name != NULL && strcmp(blob_name, name) == 0) { + return _blobs[i]._start; + } + } + return NULL; +} + +const void *CodeCache::findSymbolByPrefix(const char *prefix) { + return findSymbolByPrefix(prefix, strlen(prefix)); +} + +const void *CodeCache::findSymbolByPrefix(const char *prefix, int prefix_len) { + for (int i = 0; i < _count; i++) { + const char *blob_name = _blobs[i]._name; + if (blob_name != NULL && strncmp(blob_name, prefix, prefix_len) == 0) { + return _blobs[i]._start; + } + } + return NULL; +} + +void CodeCache::setGlobalOffsetTable(void **start, void **end, bool patchable) { + _got_start = start; + _got_end = end; + _got_patchable = patchable; +} + +void **CodeCache::findGlobalOffsetEntry(void *address) { + for (void **entry = _got_start; entry < _got_end; entry++) { + if (*entry == address) { + makeGotPatchable(); + return entry; + } + } + return NULL; +} + +void CodeCache::makeGotPatchable() { + if (!_got_patchable) { + uintptr_t got_start = (uintptr_t)_got_start & ~OS::page_mask; + uintptr_t got_size = + ((uintptr_t)_got_end - got_start + OS::page_mask) & ~OS::page_mask; + mprotect((void *)got_start, got_size, PROT_READ | PROT_WRITE); + _got_patchable = true; + } +} + +void CodeCache::setDwarfTable(FrameDesc *table, int length) { + _dwarf_table = table; + _dwarf_table_length = length; +} + +FrameDesc *CodeCache::findFrameDesc(const void *pc) { + u32 target_loc = (const char *)pc - _text_base; + int low = 0; + int high = _dwarf_table_length - 1; + + while (low <= high) { + int mid = (unsigned int)(low + high) >> 1; + if (_dwarf_table[mid].loc < target_loc) { + low = mid + 1; + } else if (_dwarf_table[mid].loc > target_loc) { + high = mid - 1; + } else { + return &_dwarf_table[mid]; + } + } + + return low > 0 ? &_dwarf_table[low - 1] : NULL; +} diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp new file mode 100644 index 000000000..05307e21c --- /dev/null +++ b/src/async-profiler/dwarf.cpp @@ -0,0 +1,396 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#include "dwarf.h" +#include + +#include + +#define DEBUG + +enum { + DW_CFA_nop = 0x0, + DW_CFA_set_loc = 0x1, + DW_CFA_advance_loc1 = 0x2, + DW_CFA_advance_loc2 = 0x3, + DW_CFA_advance_loc4 = 0x4, + DW_CFA_offset_extended = 0x5, + DW_CFA_restore_extended = 0x6, + DW_CFA_undefined = 0x7, + DW_CFA_same_value = 0x8, + DW_CFA_register = 0x9, + DW_CFA_remember_state = 0xa, + DW_CFA_restore_state = 0xb, + DW_CFA_def_cfa = 0xc, + DW_CFA_def_cfa_register = 0xd, + DW_CFA_def_cfa_offset = 0xe, + DW_CFA_def_cfa_expression = 0xf, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + DW_CFA_GNU_args_size = 0x2e, + + DW_CFA_advance_loc = 0x1, + DW_CFA_offset = 0x2, + DW_CFA_restore = 0x3, +}; + +enum { + DW_OP_breg_pc = 0x70 + DW_REG_PC, + DW_OP_const1u = 0x08, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_minus = 0x1c, + DW_OP_plus = 0x22, +}; + +FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | (2 * DW_STACK_SLOT) << 8, + -2 * DW_STACK_SLOT}; + +DwarfParser::DwarfParser(const char *name, const char *image_base, + const char *eh_frame_hdr, u64 adjust_eh_frame) { + _name = name; + _image_base = image_base; + + _capacity = 128; + _count = 0; + _table = (FrameDesc *)malloc(_capacity * sizeof(FrameDesc)); + _prev = NULL; + + _code_align = sizeof(instruction_t); + _data_align = -(int)sizeof(void *); + + parse(eh_frame_hdr, adjust_eh_frame); +} + +void DwarfParser::parse(const char *eh_frame_hdr, u64 adjust_eh_frame) { + u8 version = eh_frame_hdr[0]; + u8 eh_frame_ptr_enc = eh_frame_hdr[1]; + u8 fde_count_enc = eh_frame_hdr[2]; + u8 table_enc = eh_frame_hdr[3]; + + if (version != 1 || (eh_frame_ptr_enc & 0x7) != 0x3 || + (fde_count_enc & 0x7) != 0x3 || (table_enc & 0xf7) != 0x33) { + return; + } + + int fde_count = *(int *)(eh_frame_hdr + 8); +#ifdef DEBUG + printf("fde count = %d \n", fde_count); +#endif + int *table = (int *)(eh_frame_hdr + 16); + for (int i = 0; i < fde_count; i++) { + _ptr = eh_frame_hdr + table[i * 2] - adjust_eh_frame; + if (i == 0) { + printf("ptr = %p, table offset = %p \n", _ptr, table[i * 2]); + } + parseFde(); + } +} + +void DwarfParser::parseCie() { + u32 cie_len = get32(); + if (cie_len == 0 || cie_len == 0xffffffff) { + return; + } + + const char *cie_start = _ptr; + _ptr += 5; + while (*_ptr++) {} + _code_align = getLeb(); + _data_align = getSLeb(); + _ptr = cie_start + cie_len; +} + +void DwarfParser::parseFde() { + + u32 fde_len = get32(); + // printf("fde len = %u \n", fde_len); + if (fde_len == 0 || fde_len == 0xffffffff) { + return; + } + + const char *fde_start = _ptr; + u32 cie_offset = get32(); + if (_count == 0) { +#ifdef DEBUG + printf("Change pointer to %lx - %lx \n", fde_start, cie_offset); +#endif + _ptr = fde_start - cie_offset; + parseCie(); + _ptr = fde_start + 4; + } + + u32 range_start = getPtr() - _image_base; + if (_count == 0) { + printf("Dwarf range start: %lx (ptr) - %lx (image) = %lx \n", getPtr(), + _image_base, range_start); + } + + u32 range_len = get32(); + _ptr += getLeb(); + parseInstructions(range_start, fde_start + fde_len); + addRecord(range_start + range_len, DW_REG_SP, DW_STACK_SLOT, DW_SAME_FP); +} + +void DwarfParser::parseInstructions(u32 loc, const char *end) { + const u32 code_align = _code_align; + const int data_align = _data_align; + + u32 cfa_reg = DW_REG_SP; + int cfa_off = DW_STACK_SLOT; + int fp_off = DW_SAME_FP; + int pc_off = -DW_STACK_SLOT; + + u32 rem_cfa_reg; + int rem_cfa_off; + int rem_fp_off; + int rem_pc_off; + + while (_ptr < end) { + u8 op = get8(); + switch (op >> 6) { + case 0: + switch (op) { + case DW_CFA_nop: + case DW_CFA_set_loc: + _ptr = end; + break; + case DW_CFA_advance_loc1: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += get8() * code_align; + break; + case DW_CFA_advance_loc2: + addRecord(loc, cfa_reg, cfa_off, fp_off); +#ifdef ALIGN_ISSUES + // 16 bits alignement check + if ((reinterpret_cast(_ptr) & 0xf) != 0) { + // The address is not 16-bit aligned here + printf("Addr is not aligned 0x%lx (code_align=%i)\n", _ptr, + code_align); + exit(1); + } +#endif + loc += get16() * code_align; + break; + case DW_CFA_advance_loc4: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += get32() * code_align; + break; + case DW_CFA_offset_extended: + switch (getLeb()) { + case DW_REG_FP: + fp_off = getLeb() * data_align; + break; + case DW_REG_PC: + pc_off = getLeb() * data_align; + break; + default: + skipLeb(); + } + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + skipLeb(); + break; + case DW_CFA_register: + skipLeb(); + skipLeb(); + break; + case DW_CFA_remember_state: + rem_cfa_reg = cfa_reg; + rem_cfa_off = cfa_off; + rem_fp_off = fp_off; + rem_pc_off = pc_off; + break; + case DW_CFA_restore_state: + cfa_reg = rem_cfa_reg; + cfa_off = rem_cfa_off; + fp_off = rem_fp_off; + pc_off = rem_pc_off; + break; + case DW_CFA_def_cfa: + cfa_reg = getLeb(); + cfa_off = getLeb(); + break; + case DW_CFA_def_cfa_register: + cfa_reg = getLeb(); + break; + case DW_CFA_def_cfa_offset: + cfa_off = getLeb(); + break; + case DW_CFA_def_cfa_expression: { + u32 len = getLeb(); + cfa_reg = len == 11 ? DW_REG_PLT : DW_REG_INVALID; + cfa_off = DW_STACK_SLOT; + _ptr += len; + break; + } + case DW_CFA_expression: + skipLeb(); + _ptr += getLeb(); + break; + case DW_CFA_offset_extended_sf: + switch (getLeb()) { + case DW_REG_FP: + fp_off = getSLeb() * data_align; + break; + case DW_REG_PC: + pc_off = getSLeb() * data_align; + break; + default: + skipLeb(); + } + break; + case DW_CFA_def_cfa_sf: + cfa_reg = getLeb(); + cfa_off = getSLeb() * data_align; + break; + case DW_CFA_def_cfa_offset_sf: + cfa_off = getSLeb() * data_align; + break; + case DW_CFA_val_offset: + case DW_CFA_val_offset_sf: + skipLeb(); + skipLeb(); + break; + case DW_CFA_val_expression: + if (getLeb() == DW_REG_PC) { + int pc_off = parseExpression(); + if (pc_off != 0) { + fp_off = DW_PC_OFFSET | (pc_off << 1); + } + } else { + _ptr += getLeb(); + } + break; + case DW_CFA_GNU_args_size: + skipLeb(); + break; + default: + return; + } + break; + case DW_CFA_advance_loc: + addRecord(loc, cfa_reg, cfa_off, fp_off); + loc += (op & 0x3f) * code_align; + break; + case DW_CFA_offset: + switch (op & 0x3f) { + case DW_REG_FP: + fp_off = getLeb() * data_align; + break; + case DW_REG_PC: + pc_off = getLeb() * data_align; + break; + default: + skipLeb(); + } + break; + case DW_CFA_restore: + break; + } + } + + addRecord(loc, cfa_reg, cfa_off, fp_off); +} + +// Parse a limited subset of DWARF expressions, which is used in +// DW_CFA_val_expression to point to the previous PC relative to the current PC. +// Returns the offset of the previous PC from the current PC. +int DwarfParser::parseExpression() { + int pc_off = 0; + int tos = 0; + + u32 len = getLeb(); + const char *end = _ptr + len; + + while (_ptr < end) { + u8 op = get8(); + switch (op) { + case DW_OP_breg_pc: + pc_off = getSLeb(); + break; + case DW_OP_const1u: + tos = get8(); + break; + case DW_OP_const1s: + tos = (signed char)get8(); + break; + case DW_OP_const2u: + tos = get16(); + break; + case DW_OP_const2s: + tos = (short)get16(); + break; + case DW_OP_const4u: + case DW_OP_const4s: + tos = get32(); + break; + case DW_OP_constu: + tos = getLeb(); + break; + case DW_OP_consts: + tos = getSLeb(); + break; + case DW_OP_minus: + pc_off -= tos; + break; + case DW_OP_plus: + pc_off += tos; + break; + default: + _ptr = end; + return 0; + } + } + + return pc_off; +} + +void DwarfParser::addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off) { + int cfa = cfa_reg | cfa_off << 8; + if (_prev == NULL || (_prev->loc == loc && --_count >= 0) || + _prev->cfa != cfa || _prev->fp_off != fp_off) { + _prev = addRecordRaw(loc, cfa, fp_off); + } +} + +FrameDesc *DwarfParser::addRecordRaw(u32 loc, int cfa, int fp_off) { + if (_count >= _capacity) { + _capacity *= 2; + _table = (FrameDesc *)realloc(_table, _capacity * sizeof(FrameDesc)); + } + + FrameDesc *f = &_table[_count++]; + f->loc = loc; + f->cfa = cfa; + f->fp_off = fp_off; + return f; +} diff --git a/src/async-profiler/elf_helpers.cpp b/src/async-profiler/elf_helpers.cpp new file mode 100644 index 000000000..ba7b04134 --- /dev/null +++ b/src/async-profiler/elf_helpers.cpp @@ -0,0 +1,245 @@ +#include "elf_helpers.h" + +#include "build_id.hpp" +#include "logger.hpp" + +#include +#include +#include + +#include +#include + +#define LG_WRN(args...) printf(args) + +const char *get_section_data(Elf *elf, const char *section_name, + Offset_t &elf_offset) { + // Get the string table index for the section header strings + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx) != 0) { + fprintf(stderr, + "Failed to get string table index for section header strings: %s\n", + elf_errmsg(-1)); + return nullptr; + } + + // Iterate over the sections and find the .eh_frame section + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn)) != NULL) { + // Get the section header for the current section + GElf_Shdr shdr; + if (gelf_getshdr(scn, &shdr) != &shdr) { + fprintf(stderr, "Failed to get section header: %s\n", elf_errmsg(-1)); + return nullptr; + } + + // Get the name of the current section + char *name = elf_strptr(elf, shstrndx, shdr.sh_name); + if (name == NULL) { + fprintf(stderr, "Failed to get section name: %s\n", elf_errmsg(-1)); + return nullptr; + } + + // Check if the section is the .eh_frame section + if (strcmp(name, section_name) == 0) { + printf("%s section found at offset 0x%lx, size %ld\n", section_name, + shdr.sh_offset, shdr.sh_size); + // Get the data for the .eh_frame section + elf_offset = shdr.sh_offset; + Elf_Data *data = elf_getdata(scn, NULL); + if (data == NULL) { + fprintf(stderr, "Unable to find section data: %s\n", section_name); + return nullptr; + } else { + return reinterpret_cast(data->d_buf); + } + } + } + + fprintf(stderr, "Failed to find section: %s\n", section_name); + return nullptr; +} + +bool get_section_info(Elf *elf, const char *section_name, + SectionInfo §ion_info) { + // Get the string table index for the section header strings + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx) != 0) { + fprintf(stderr, + "Failed to get string table index for section header strings: %s\n", + elf_errmsg(-1)); + return false; + } + + // Iterate over the sections and find the .eh_frame section + Elf_Scn *scn = NULL; + bool found = false; + GElf_Shdr shdr; + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + // Get the section header for the current section + if (gelf_getshdr(scn, &shdr) != &shdr) { + fprintf(stderr, "Failed to get section header: %s\n", elf_errmsg(-1)); + return false; + } + + // Get the name of the current section + char *name = elf_strptr(elf, shstrndx, shdr.sh_name); + if (name == NULL) { + fprintf(stderr, "Failed to get section name: %s\n", elf_errmsg(-1)); + return false; + } + + // Check if the section is the .eh_frame section + if (strcmp(name, section_name) == 0) { + printf("%s section found at offset 0x%lx, size %ld, vaddr %lx\n", + section_name, shdr.sh_offset, shdr.sh_size, shdr.sh_addr); + // Get the data for the .eh_frame section + Elf_Data *data = elf_getdata(scn, NULL); + if (data == NULL) { + fprintf(stderr, "Unable to find section data: %s\n", section_name); + return false; + } else { + section_info._data = reinterpret_cast(data->d_buf); + section_info._offset = shdr.sh_offset; + section_info._vaddr_sec = shdr.sh_addr; + found = true; + } + } + } + if (!found) { + fprintf(stderr, "Failed to find section: %s\n", section_name); + return false; + } + + return true; +} + +bool get_elf_offsets(Elf *elf, const char *filepath, ElfAddress_t &vaddr, + Offset_t &elf_offset, Offset_t &bias_offset, + Offset_t &text_base) { + vaddr = 0; + bias_offset = 0; + GElf_Ehdr ehdr_mem; + GElf_Ehdr *ehdr = gelf_getehdr(elf, &ehdr_mem); + if (ehdr == nullptr) { + LG_WRN("Invalid elf %s", filepath); + return false; + } + text_base = ehdr->e_entry; + + bool found_exec = false; + switch (ehdr->e_type) { + case ET_EXEC: + case ET_CORE: + case ET_DYN: { + size_t phnum; + if (unlikely(elf_getphdrnum(elf, &phnum) != 0)) { + LG_WRN("Invalid elf %s", filepath); + return false; + } + for (size_t i = 0; i < phnum; ++i) { + GElf_Phdr phdr_mem; + // Retrieve the program header + GElf_Phdr *ph = gelf_getphdr(elf, i, &phdr_mem); + if (unlikely(ph == NULL)) { + LG_WRN("Invalid elf %s", filepath); + return false; + } + constexpr int rx = PF_X | PF_R; + if (ph->p_type == PT_LOAD) { + if ((ph->p_flags & rx) == rx) { + if (!found_exec) { + vaddr = ph->p_vaddr; + bias_offset = ph->p_vaddr - ph->p_offset; + printf("%lx - %lx (vaddr - p_offset) \n", ph->p_vaddr, + ph->p_offset); + elf_offset = ph->p_offset; + found_exec = true; + } else { + // There can be multiple executable load segments. + // The first one should be considered (this is valid) + // Leaving the failure for now as it allows me to find test cases + printf("Multiple exec LOAD segments: %s", filepath); + } + } + } + } + break; + } + default: + LG_WRN("Unsupported elf type (%d) %s", ehdr->e_type, filepath); + return false; + } + + if (!found_exec) { + LG_WRN("Not executable LOAD segment found in %s", filepath); + } + return found_exec; +} + +bool get_eh_frame_info(Elf *elf, EhFrameInfo &eh_frame_info) { + if (!get_section_info(elf, ".eh_frame_hdr", eh_frame_info._eh_frame_hdr)) { + return false; + } + if (!get_section_info(elf, ".eh_frame", eh_frame_info._eh_frame)) { + return false; + } + return true; +} + +// correct way of parsing the FDEs +bool process_fdes(Elf *elf) { + Elf_Scn *scn = NULL; + Elf_Data *data = NULL; + GElf_Shdr shdr; + + // Get the string table index for the section header strings + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx) != 0) { + fprintf(stderr, + "Failed to get string table index for section header strings: %s\n", + elf_errmsg(-1)); + return false; + } + + while ((scn = elf_nextscn(elf, scn)) != NULL) { + gelf_getshdr(scn, &shdr); + if (shdr.sh_type == SHT_PROGBITS && + (strcmp(".debug_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0 || + strcmp(".eh_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0)) { + // This is the .debug_frame or .eh_frame section + data = elf_getdata(scn, NULL); + break; + } + } + if (!data) { + fprintf(stderr, "Unable to find dwarf information\n"); + return false; + } + + // Iterate through the CFI records in the .debug_frame or .eh_frame section + Dwarf_Off offset = 0; + while (true) { + // Get the next CFI record + Dwarf_Off next_offset; + Dwarf_CFI_Entry entry; + + int result = dwarf_next_cfi( + reinterpret_cast(elf_getident(elf, NULL)), data, + strcmp(".eh_frame", elf_strptr(elf, shstrndx, shdr.sh_name)) == 0, + offset, &next_offset, &entry); + if (result != 0) { + // End of CFI records + break; + } + + // printf("cfi id = %lx\n", entry); + // Process the CFI record + // ... + + // Move to the next CFI record + offset = next_offset; + } + return true; +} diff --git a/src/async-profiler/mutex.cpp b/src/async-profiler/mutex.cpp new file mode 100644 index 000000000..8f58cc091 --- /dev/null +++ b/src/async-profiler/mutex.cpp @@ -0,0 +1,38 @@ +/* + * Copyright 2018 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mutex.h" + +Mutex::Mutex() { + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&_mutex, &attr); +} + +void Mutex::lock() { pthread_mutex_lock(&_mutex); } + +void Mutex::unlock() { pthread_mutex_unlock(&_mutex); } + +WaitableMutex::WaitableMutex() : Mutex() { pthread_cond_init(&_cond, NULL); } + +bool WaitableMutex::waitUntil(u64 wall_time) { + struct timespec ts = {(time_t)(wall_time / 1000000), + (long)(wall_time % 1000000) * 1000}; + return pthread_cond_timedwait(&_cond, &_mutex, &ts) != 0; +} + +void WaitableMutex::notify() { pthread_cond_signal(&_cond); } diff --git a/src/async-profiler/os.cpp b/src/async-profiler/os.cpp new file mode 100644 index 000000000..d6b2510cc --- /dev/null +++ b/src/async-profiler/os.cpp @@ -0,0 +1,6 @@ +#include + +#include "os.h" + +const size_t OS::page_size = sysconf(_SC_PAGESIZE); +const size_t OS::page_mask = OS::page_size - 1; \ No newline at end of file diff --git a/src/async-profiler/safeAccess.cpp b/src/async-profiler/safeAccess.cpp new file mode 100644 index 000000000..e5ce221c0 --- /dev/null +++ b/src/async-profiler/safeAccess.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include +#include + +#include "safeAccess.h" +#include "stackFrame.h" + +static struct sigaction oldact; + +namespace SafeAccess { + +NOINLINE __attribute__((aligned(16))) void *load(void **ptr) { return *ptr; } + +// skipFaultInstruction returns the address of the instruction immediately +// following the given instruction. pc is assumed to point to the same kind of +// load that SafeAccess::load would use +static uintptr_t skipFaultInstruction(uintptr_t pc) { +#if defined(__x86_64__) + return *(u16 *)pc == 0x8b48 ? 3 : 0; // mov rax, [reg] +#elif defined(__i386__) + return *(u8 *)pc == 0x8b ? 2 : 0; // mov eax, [reg] +#elif defined(__arm__) || defined(__thumb__) + return (*(instruction_t *)pc & 0x0e50f000) == 0x04100000 ? 4 + : 0; // ldr r0, [reg] +#elif defined(__aarch64__) + return (*(instruction_t *)pc & 0xffc0001f) == 0xf9400000 ? 4 + : 0; // ldr x0, [reg] +#else + return sizeof(instruction_t); +#endif +} + +} // namespace SafeAccess + +static void segv_handler(int sig, siginfo_t *si, void *ucontext) { + ucontext_t *uc = (ucontext_t *)ucontext; + StackFrame frame(uc); + + // If we segfault in the SafeAccess::load, skip past the bad access and + // set the return value to 0. + // + // We have to check if we are *near* the beginning of load, since there will + // be a few instructions (for frame pointer setup) before the actual bad + // access + if ((frame.pc() - (uintptr_t)SafeAccess::load) < 16) { + uintptr_t instructionEncodedLength = + SafeAccess::skipFaultInstruction(frame.pc()); + frame.pc() += instructionEncodedLength; + frame.retval() = 0x0; + return; + } + + // fall back otherwise + if (oldact.sa_sigaction != nullptr) { + oldact.sa_sigaction(sig, si, ucontext); + } else if (oldact.sa_handler != nullptr) { + oldact.sa_handler(sig); + } else { + // If there wasn't a fallback, re-set to the default handler + // (which just aborts the program) and re-raise the signal + struct sigaction sa; + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_handler = SIG_DFL; + sigaction(sig, &sa, nullptr); + raise(sig); + } +} + +__attribute__((constructor)) static void init(void) { + struct sigaction sa; + memset(&oldact, 0, sizeof(struct sigaction)); + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_sigaction = segv_handler; + sa.sa_flags = SA_SIGINFO; + + sigaction(SIGSEGV, &sa, &oldact); +} \ No newline at end of file diff --git a/src/async-profiler/stackFrame_aarch64.cpp b/src/async-profiler/stackFrame_aarch64.cpp new file mode 100644 index 000000000..9b7cc6c71 --- /dev/null +++ b/src/async-profiler/stackFrame_aarch64.cpp @@ -0,0 +1,112 @@ +/* + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __aarch64__ + +# include "stackFrame.h" +# include +# include +# include + +# ifdef __APPLE__ +# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m +# else +# define REG(l, m) _ucontext->uc_mcontext.l +# endif + +uintptr_t &StackFrame::pc() { return (uintptr_t &)REG(pc, pc); } + +uintptr_t &StackFrame::sp() { return (uintptr_t &)REG(sp, sp); } + +uintptr_t &StackFrame::fp() { return (uintptr_t &)REG(regs[29], fp); } + +uintptr_t &StackFrame::retval() { return (uintptr_t &)REG(regs[0], x[0]); } + +uintptr_t StackFrame::arg0() { return (uintptr_t)REG(regs[0], x[0]); } + +uintptr_t StackFrame::arg1() { return (uintptr_t)REG(regs[1], x[1]); } + +uintptr_t StackFrame::arg2() { return (uintptr_t)REG(regs[2], x[2]); } + +uintptr_t StackFrame::arg3() { return (uintptr_t)REG(regs[3], x[3]); } + +void StackFrame::ret() { pc() = REG(regs[30], lr); } + +bool StackFrame::popStub(instruction_t *entry, const char *name) { + instruction_t *ip = (instruction_t *)pc(); + if (ip == entry || *ip == 0xd65f03c0 || strncmp(name, "itable", 6) == 0 || + strncmp(name, "vtable", 6) == 0 || + strncmp(name, "compare_long_string_", 20) == 0 || + strcmp(name, "zero_blocks") == 0 || + strcmp(name, "forward_copy_longs") == 0 || + strcmp(name, "backward_copy_longs") == 0 || + strcmp(name, "InlineCacheBuffer") == 0) { + ret(); + return true; + } else if (entry != NULL && entry[0] == 0xa9bf7bfd) { + // The stub begins with + // stp x29, x30, [sp, #-16]! + // mov x29, sp + if (ip == entry + 1) { + sp() += 16; + ret(); + return true; + } else if (entry[1] == 0x910003fd && withinCurrentStack(fp())) { + sp() = fp() + 16; + fp() = stackAt(-2); + pc() = stackAt(-1); + return true; + } + } + return false; +} + +bool StackFrame::popMethod(instruction_t *entry) { + instruction_t *ip = (instruction_t *)pc(); + if ((*ip & 0xffe07fff) == 0xa9007bfd) { + // stp x29, x30, [sp, #offset] + // SP has been adjusted, but FP not yet stored in a new frame + unsigned int offset = (*ip >> 12) & 0x1f8; + sp() += offset + 16; + } + ret(); + return true; +} + +bool StackFrame::checkInterruptedSyscall() { +# ifdef __APPLE__ + // We are not interested in syscalls that do not check error code, e.g. + // semaphore_wait_trap + if (*(instruction_t *)pc() == 0xd65f03c0) { + return true; + } + // If carry flag is set, the error code is in low byte of x0 + if (REG(pstate, cpsr) & (1 << 29)) { + return (retval() & 0xff) == EINTR || (retval() & 0xff) == ETIMEDOUT; + } else { + return retval() == (uintptr_t)-EINTR; + } +# else + return retval() == (uintptr_t)-EINTR; +# endif +} + +bool StackFrame::isSyscall(instruction_t *pc) { + // svc #0 or svc #80 + return (*pc & 0xffffefff) == 0xd4000001; +} + +#endif // __aarch64__ diff --git a/src/async-profiler/stackFrame_arm.cpp b/src/async-profiler/stackFrame_arm.cpp new file mode 100644 index 000000000..0f1b46b1f --- /dev/null +++ b/src/async-profiler/stackFrame_arm.cpp @@ -0,0 +1,101 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined(__arm__) || defined(__thumb__) + +# include "stackFrame.h" +# include +# include + +uintptr_t &StackFrame::pc() { + return (uintptr_t &)_ucontext->uc_mcontext.arm_pc; +} + +uintptr_t &StackFrame::sp() { + return (uintptr_t &)_ucontext->uc_mcontext.arm_sp; +} + +uintptr_t &StackFrame::fp() { + return (uintptr_t &)_ucontext->uc_mcontext.arm_fp; +} + +uintptr_t &StackFrame::retval() { + return (uintptr_t &)_ucontext->uc_mcontext.arm_r0; +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r0; +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r1; +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r2; +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)_ucontext->uc_mcontext.arm_r3; +} + +void StackFrame::ret() { + _ucontext->uc_mcontext.arm_pc = _ucontext->uc_mcontext.arm_lr; +} + +bool StackFrame::popStub(instruction_t *entry, const char *name) { + instruction_t *ip = (instruction_t *)pc(); + if (ip == entry || *ip == 0xe12fff1e || strncmp(name, "itable", 6) == 0 || + strncmp(name, "vtable", 6) == 0 || + strcmp(name, "InlineCacheBuffer") == 0) { + ret(); + return true; + } + return false; +} + +bool StackFrame::popMethod(instruction_t *entry) { + instruction_t *ip = (instruction_t *)pc(); + if (ip > entry && ip <= entry + 4 && (*ip & 0xffffff00) == 0xe24dd000) { + // push {r11, lr} + // mov r11, sp (optional) + // -> sub sp, sp, #offs + fp() = stackAt(0); + pc() = stackAt(1); + sp() += 8; + return true; + } else if (*ip == 0xe8bd4800) { + // add sp, sp, #offs + // -> pop {r11, lr} + fp() = stackAt(0); + pc() = stackAt(1); + sp() += 8; + return true; + } + ret(); + return true; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t *pc) { + // swi #0 + return *pc == 0xef000000; +} + +#endif // defined(__arm__) || defined(__thumb__) diff --git a/src/async-profiler/stackFrame_i386.cpp b/src/async-profiler/stackFrame_i386.cpp new file mode 100644 index 000000000..a08e5f11f --- /dev/null +++ b/src/async-profiler/stackFrame_i386.cpp @@ -0,0 +1,106 @@ +/* + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __i386__ + +# include "stackFrame.h" +# include +# include + +uintptr_t &StackFrame::pc() { + return (uintptr_t &)_ucontext->uc_mcontext.gregs[REG_EIP]; +} + +uintptr_t &StackFrame::sp() { + return (uintptr_t &)_ucontext->uc_mcontext.gregs[REG_ESP]; +} + +uintptr_t &StackFrame::fp() { + return (uintptr_t &)_ucontext->uc_mcontext.gregs[REG_EBP]; +} + +uintptr_t &StackFrame::retval() { + return (uintptr_t &)_ucontext->uc_mcontext.gregs[REG_EAX]; +} + +uintptr_t StackFrame::arg0() { return stackAt(1); } + +uintptr_t StackFrame::arg1() { return stackAt(2); } + +uintptr_t StackFrame::arg2() { return stackAt(3); } + +uintptr_t StackFrame::arg3() { return stackAt(4); } + +void StackFrame::ret() { + pc() = stackAt(0); + sp() += 4; +} + +bool StackFrame::popStub(instruction_t *entry, const char *name) { + instruction_t *ip = (instruction_t *)pc(); + if (ip == entry || *ip == 0xc3 || strncmp(name, "itable", 6) == 0 || + strncmp(name, "vtable", 6) == 0 || + strcmp(name, "InlineCacheBuffer") == 0) { + pc() = stackAt(0); + sp() += 4; + return true; + } else if (entry != NULL && entry[0] == 0x55 && entry[1] == 0x8b && + entry[2] == 0xec) { + // The stub begins with + // push ebp + // mov ebp, esp + if (ip == entry + 1) { + pc() = stackAt(1); + sp() += 8; + return true; + } else if (withinCurrentStack(fp())) { + sp() = fp() + 8; + fp() = stackAt(-2); + pc() = stackAt(-1); + return true; + } + } + return false; +} + +bool StackFrame::popMethod(instruction_t *entry) { + instruction_t *ip = (instruction_t *)pc(); + if (ip <= entry || *ip == 0xc3 || *ip == 0x55 // ret or push ebp + || (((uintptr_t)ip & 0xfff) && ip[-1] == 0x5d)) // after pop ebp + { + pc() = stackAt(0); + sp() += 4; + return true; + } else if (*ip == 0x5d) { + // pop ebp + fp() = stackAt(0); + pc() = stackAt(1); + sp() += 8; + return true; + } + return false; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t *pc) { + // int 0x80 + return pc[0] == 0xcd && pc[1] == 0x80; +} + +#endif // __i386__ diff --git a/src/async-profiler/stackFrame_ppc64.cpp b/src/async-profiler/stackFrame_ppc64.cpp new file mode 100644 index 000000000..465e0a0a8 --- /dev/null +++ b/src/async-profiler/stackFrame_ppc64.cpp @@ -0,0 +1,136 @@ +/* + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Authors: Andrei Pangin and Gunter Haug + */ + +#if defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +# include "stackFrame.h" +# include +# include + +uintptr_t &StackFrame::pc() { + return (uintptr_t &)_ucontext->uc_mcontext.regs->nip; +} + +uintptr_t &StackFrame::sp() { + return (uintptr_t &)_ucontext->uc_mcontext.regs->gpr[1]; +} + +uintptr_t &StackFrame::fp() { + return *((uintptr_t *)_ucontext->uc_mcontext.regs->gpr[1]); +} + +uintptr_t &StackFrame::retval() { + return (uintptr_t &)_ucontext->uc_mcontext.regs->gpr[3]; +} + +uintptr_t StackFrame::arg0() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[3]; +} + +uintptr_t StackFrame::arg1() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[4]; +} + +uintptr_t StackFrame::arg2() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[5]; +} + +uintptr_t StackFrame::arg3() { + return (uintptr_t)_ucontext->uc_mcontext.regs->gpr[6]; +} + +void StackFrame::ret() { + _ucontext->uc_mcontext.regs->nip = _ucontext->uc_mcontext.regs->link; +} + +static inline bool inC1EpilogueCrit(uintptr_t pc) { + if (!(pc & 0xfff)) { + // Make sure we are not at the page boundary, so that reading [pc - 1] is + // safe + return false; + } + // C1 epilogue and critical section (posX) + // 3821**** add r1,r1,xx + // pos3 xxxxxxxx + // pos2 1000e1eb ld r31,16(r1) + // pos1 a603e87f mtlr r31 + // xxxxxxxx + // 2000804e blr + instruction_t *inst = (instruction_t *)pc; + if (inst[1] == 0xebe10010 && inst[2] == 0x7fe803a6 || + inst[0] == 0xebe10010 && inst[1] == 0x7fe803a6 || + inst[-1] == 0xebe10010 && inst[0] == 0x7fe803a6) { + return true; + } + + return false; // not in critical section +} + +static inline bool inC2PrologueCrit(uintptr_t pc) { + // C2 prologue and critical section + // f821**** stdu r1, (xx)r1 + // pos1 fa950010 std r20,16(r21) + instruction_t *inst = (instruction_t *)pc; + if (inst[0] == 0xfa950010 && (inst[-1] & 0xffff0000) == 0xf8210000) { + return true; + } + + return false; // not in critical section +} + +bool StackFrame::popStub(instruction_t *entry, const char *name) { + pc() = _ucontext->uc_mcontext.regs->link; + return true; +} + +bool StackFrame::popMethod(instruction_t *entry) { + // On PPC there is a valid back link to the previous frame at all times. The + // callee stores the return address in the caller's frame before it constructs + // its own frame. After it has destroyed its frame it restores the link + // register and returns. A problematic sequence is the prologue/epilogue of a + // compiled method before/after frame construction/destruction. Therefore + // popping the frame would not help here, as it is not yet/anymore present, + // rather more adjusting the pc to the callers pc does the trick. There are + // two exceptions to this, One in the prologue of C2 compiled methods and one + // in the epilogue of C1 compiled methods. + if (inC1EpilogueCrit(pc())) { + // lr not yet set: use the value stored in the frame + pc() = stackAt(2); + } else if (inC2PrologueCrit(pc())) { + // frame constructed but lr not yet stored in it: just do it here + *(((unsigned long *)_ucontext->uc_mcontext.regs->gpr[21]) + 2) = + (unsigned long)_ucontext->uc_mcontext.regs->gpr[20]; + } else { + // most probably caller's framer is still on top but pc is already in + // callee: use caller's pc + pc() = _ucontext->uc_mcontext.regs->link; + } + + return true; +} + +bool StackFrame::checkInterruptedSyscall() { + return retval() == (uintptr_t)-EINTR; +} + +bool StackFrame::isSyscall(instruction_t *pc) { + // sc/svc + return (*pc & 0x1f) == 17; +} + +#endif // defined(__PPC64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) diff --git a/src/async-profiler/stackFrame_x64.cpp b/src/async-profiler/stackFrame_x64.cpp new file mode 100644 index 000000000..a9c08d32d --- /dev/null +++ b/src/async-profiler/stackFrame_x64.cpp @@ -0,0 +1,54 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#ifdef __x86_64__ + +# include "stackFrame.h" +# include +# include +# include + +# ifdef __APPLE__ +# define REG(l, m) _ucontext->uc_mcontext->__ss.__##m +# else +# define REG(l, m) _ucontext->uc_mcontext.gregs[REG_##l] +# endif + +uintptr_t &StackFrame::pc() { return (uintptr_t &)REG(RIP, rip); } + +uintptr_t &StackFrame::sp() { return (uintptr_t &)REG(RSP, rsp); } + +uintptr_t &StackFrame::fp() { return (uintptr_t &)REG(RBP, rbp); } + +uintptr_t &StackFrame::retval() { return (uintptr_t &)REG(RAX, rax); } + +uintptr_t StackFrame::arg0() { return (uintptr_t)REG(RDI, rdi); } + +uintptr_t StackFrame::arg1() { return (uintptr_t)REG(RSI, rsi); } + +uintptr_t StackFrame::arg2() { return (uintptr_t)REG(RDX, rdx); } + +uintptr_t StackFrame::arg3() { return (uintptr_t)REG(RCX, rcx); } + +void StackFrame::ret() { + pc() = stackAt(0); + sp() += 8; +} + +#endif // __x86_64__ diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp new file mode 100644 index 000000000..17834537c --- /dev/null +++ b/src/async-profiler/stackWalker.cpp @@ -0,0 +1,180 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2021 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + * Modified by r1viollet to allow remote unwinding + */ +#include "stackWalker.h" +#include "codeCache.h" +#include "dwarf.h" +#include "logger.hpp" +#include "safeAccess.h" +#include "stackFrame.h" +#include + +const intptr_t MIN_VALID_PC = 0x1000; +const intptr_t MAX_WALK_SIZE = 0x100000; +const intptr_t MAX_FRAME_SIZE = 0x40000; + +bool read_memory(uint64_t addr, uint64_t *res, const ap::StackBuffer &buffer) { + if (addr < 4095) { + return false; + } + if ((addr & 0x7) != 0) { + // not aligned + return false; + } + if (addr > addr + sizeof(uint64_t)) { + return false; + } + + if (addr < buffer.sp_start && addr > buffer.sp_start - 4096) { + if (*res > buffer.sp_start && *res < buffer.sp_end) { + // todo this is true only on leaf function afaik ? + // printf("red zone optim (current value %lx) -- OK\n", *res); + // high addr + // + // sp(2) + // + // sp(3) (-32 fp_off) + // + // low addr + // red zone + return true; + } + // printf("red zone optim (current value %lx) -- KO\n", *res); + return false; + } else if (addr < buffer.sp_start || + addr + sizeof(uint64_t) > buffer.sp_end) { + return false; + } + uint64_t stack_idx = addr - buffer.sp_start; + if (stack_idx > addr) { + return false; + } + *res = *(uint64_t *)(buffer._bytes.data() + stack_idx); + return true; +} + +// todo const correctness +CodeCache *findLibraryByAddress(CodeCacheArray *cache, const void *address) { + const int native_lib_count = cache->count(); + for (int i = 0; i < native_lib_count; i++) { + if (cache->operator[](i)->contains(address)) { + return cache->operator[](i); + } + } + return NULL; +} + +bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, + FrameDesc *f); + +bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, + CodeCacheArray *cache) { + FrameDesc *f; + CodeCache *cc = findLibraryByAddress(cache, sc.pc); + if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) { + f = &FrameDesc::default_frame; + } +// const char *sym = cc?cc->binarySearch(sc.pc):"unknown"; +// printf("-- Unwind from %s, %s \n", sym?sym:"unknown", cc?cc->name():"unknown"); + return stepStackContext(sc, buffer, f); +} + +bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, + FrameDesc *f) { + uintptr_t bottom = sc.sp + MAX_WALK_SIZE; + uintptr_t prev_sp = sc.sp; + + u8 cfa_reg = (u8)f->cfa; + int cfa_off = f->cfa >> 8; + if (cfa_reg == DW_REG_SP) { + sc.sp = sc.sp + cfa_off; + } else if (cfa_reg == DW_REG_FP) { + sc.sp = sc.fp + cfa_off; + } else if (cfa_reg == DW_REG_PLT) { + sc.sp += ((uintptr_t)sc.pc & 15) >= 11 ? cfa_off * 2 : cfa_off; + } else { + // it is interesting to categorize these cases + // printf("unhandled reg \n"); + return false; + } + // Check if the next frame is below on the current stack + if (sc.sp < prev_sp || sc.sp >= prev_sp + MAX_FRAME_SIZE || sc.sp >= bottom) { + return false; + } + + // Stack pointer must be word aligned + if ((sc.sp & (sizeof(uintptr_t) - 1)) != 0) { + return false; + } + + if (f->fp_off & DW_PC_OFFSET) { + sc.pc = (const char *)sc.pc + (f->fp_off >> 1); + } else { + if (f->fp_off != DW_SAME_FP && f->fp_off < MAX_FRAME_SIZE && + f->fp_off > -MAX_FRAME_SIZE) { + // Update the frame pointer (based on fp offset) + if (!read_memory(sc.sp + f->fp_off, reinterpret_cast(&sc.fp), + buffer)) { + return false; + } + } + // Update the pc using return address + if (!read_memory(reinterpret_cast((void **)sc.sp - 1), + reinterpret_cast(&sc.pc), buffer)) { + return false; + } + } + + if (sc.pc < (const void *)MIN_VALID_PC || + sc.pc > (const void *)-MIN_VALID_PC) { + return false; + } + return true; +} + +void populateStackContext(ap::StackContext &sc, void *ucontext) { + if (ucontext == NULL) { + sc.pc = __builtin_return_address(0); + sc.fp = + (uintptr_t)__builtin_frame_address(1); // XXX(nick): this isn't safe.... + sc.sp = (uintptr_t)__builtin_frame_address(0); + } else { + StackFrame frame(ucontext); + sc.pc = (const void *)frame.pc(); + sc.fp = frame.fp(); + sc.sp = frame.sp(); + } +} + +int stackWalk(CodeCacheArray *cache, ap::StackContext &sc, + const ap::StackBuffer &buffer, void const **callchain, + int max_depth, int skip) { + int depth = -skip; + // Walk until the bottom of the stack or until the first Java frame + while (depth < max_depth) { + int d = depth++; + if (d >= 0) { + callchain[d] = sc.pc; + } + if (!stepStackContext(sc, buffer, cache)) { + break; + } + } + return depth; +} diff --git a/src/async-profiler/stack_context.cpp b/src/async-profiler/stack_context.cpp new file mode 100644 index 000000000..6d7dbe52a --- /dev/null +++ b/src/async-profiler/stack_context.cpp @@ -0,0 +1,17 @@ +#include "async-profiler/stack_context.h" + +#define CAST_TO_VOID_STAR(ptr) reinterpret_cast(ptr) + +namespace ap { + +// Async profiler's unwinding only uses a subset of the registers +StackContext from_regs(std::span regs) { + // context from saving state + ap::StackContext sc; + sc.pc = CAST_TO_VOID_STAR(regs[REGNAME(PC)]); + sc.sp = regs[REGNAME(SP)]; + sc.fp = regs[REGNAME(RBP)]; + return sc; +} + +} // namespace ap \ No newline at end of file diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp new file mode 100644 index 000000000..be3ea3121 --- /dev/null +++ b/src/async-profiler/symbols_linux.cpp @@ -0,0 +1,811 @@ +/* + * Copyright 2022 Nick Ripley + * Copyright 2017 Andrei Pangin + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modified by Nick Ripley to extract components needed for call stack unwinding + */ + +#ifdef __linux__ + +# include "dwarf.h" +# include "symbols.h" +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include "elf_helpers.h" +# include +# include + +# define LG_WRN(...) printf(__VA_ARGS__) + +class SymbolDesc { +private: + const char *_addr; + const char *_type; + +public: + SymbolDesc(const char *s) { + _addr = s; + _type = strchr(_addr, ' ') + 1; + } + + const char *addr() { return (const char *)strtoul(_addr, NULL, 16); } + char type() { return _type[0]; } + const char *name() { return _type + 2; } +}; + +class MemoryMapDesc { +private: + const char *_addr; + const char *_end; + const char *_perm; + const char *_offs; + const char *_dev; + const char *_inode; + const char *_file; + +public: + MemoryMapDesc(const char *s) { + _addr = s; + _end = strchr(_addr, '-') + 1; + _perm = strchr(_end, ' ') + 1; + _offs = strchr(_perm, ' ') + 1; + _dev = strchr(_offs, ' ') + 1; + _inode = strchr(_dev, ' ') + 1; + _file = strchr(_inode, ' '); + + if (_file != NULL) { + while (*_file == ' ') + _file++; + } + } + + const char *file() { return _file; } + bool isReadable() { return _perm[0] == 'r'; } + bool isExecutable() { return _perm[2] == 'x'; } + const char *addr() { return (const char *)strtoul(_addr, NULL, 16); } + const char *end() { return (const char *)strtoul(_end, NULL, 16); } + unsigned long offs() { return strtoul(_offs, NULL, 16); } + unsigned long dev() { + return strtoul(_dev, NULL, 16) << 8 | strtoul(_dev + 3, NULL, 16); + } + unsigned long inode() { return strtoul(_inode, NULL, 10); } +}; + +# ifdef __LP64__ +const unsigned char ELFCLASS_SUPPORTED = ELFCLASS64; +typedef Elf64_Ehdr ElfHeader; +typedef Elf64_Shdr ElfSection; +typedef Elf64_Phdr ElfProgramHeader; +typedef Elf64_Nhdr ElfNote; +typedef Elf64_Sym ElfSymbol; +typedef Elf64_Rel ElfRelocation; +typedef Elf64_Dyn ElfDyn; +# define ELF_R_TYPE ELF64_R_TYPE +# define ELF_R_SYM ELF64_R_SYM +# else +const unsigned char ELFCLASS_SUPPORTED = ELFCLASS32; +typedef Elf32_Ehdr ElfHeader; +typedef Elf32_Shdr ElfSection; +typedef Elf32_Phdr ElfProgramHeader; +typedef Elf32_Nhdr ElfNote; +typedef Elf32_Sym ElfSymbol; +typedef Elf32_Rel ElfRelocation; +typedef Elf32_Dyn ElfDyn; +# define ELF_R_TYPE ELF32_R_TYPE +# define ELF_R_SYM ELF32_R_SYM +# endif // __LP64__ + +# if defined(__x86_64__) +# define R_GLOB_DAT R_X86_64_GLOB_DAT +# elif defined(__i386__) +# define R_GLOB_DAT R_386_GLOB_DAT +# elif defined(__arm__) || defined(__thumb__) +# define R_GLOB_DAT R_ARM_GLOB_DAT +# elif defined(__aarch64__) +# define R_GLOB_DAT R_AARCH64_GLOB_DAT +# elif defined(__PPC64__) +# define R_GLOB_DAT R_PPC64_GLOB_DAT +# else +# error "Compiling on unsupported arch" +# endif + +// GNU dynamic linker relocates pointers in the dynamic section, while musl +// doesn't. A tricky case is when we attach to a musl container from a glibc +// host. +# ifdef __musl__ +# define DYN_PTR(ptr) (_base + (ptr)) +# else +# define DYN_PTR(ptr) \ + ((char *)(ptr) >= _base ? (char *)(ptr) : _base + (ptr)) +# endif // __musl__ + +class ElfParser { +public: + CodeCache *_cc; + const char *_base; + const char *_file_name; + ElfHeader *_header; + const char *_sections; + + ElfParser(CodeCache *cc, const char *base, const void *addr, + const char *file_name = NULL) { + _cc = cc; + _base = base; + _file_name = file_name; + _header = (ElfHeader *)addr; + _sections = (const char *)addr + _header->e_shoff; + } + + bool validHeader() { + unsigned char *ident = _header->e_ident; + return ident[0] == 0x7f && ident[1] == 'E' && ident[2] == 'L' && + ident[3] == 'F' && ident[4] == ELFCLASS_SUPPORTED && + ident[5] == ELFDATA2LSB && ident[6] == EV_CURRENT && + _header->e_shstrndx != SHN_UNDEF; + } + + ElfSection *section(int index) { + return (ElfSection *)(_sections + index * _header->e_shentsize); + } + + const char *at(ElfSection *section) { + return (const char *)_header + section->sh_offset; + } + + const char *at(ElfProgramHeader *pheader) { + return _header->e_type == ET_EXEC + ? (const char *)pheader->p_vaddr + : (const char *)_header + pheader->p_vaddr; + } + + ElfSection *findSection(uint32_t type, const char *name); + ElfProgramHeader *findProgramHeader(uint32_t type); + + void parseDynamicSection(); + void parseDwarfInfo(); + void parseDwarfInfoRemote(const char *eh_frame_data, const char *base_remote, + Offset_t adjust_eh_frame); + void loadSymbols(bool use_debug); + bool loadSymbolsUsingBuildId(); + bool loadSymbolsUsingDebugLink(); + void loadSymbolTable(ElfSection *symtab); + void addRelocationSymbols(ElfSection *reltab, const char *plt); + +public: + static const char *get_self_vdso(void); + static void parseProgramHeaders(CodeCache *cc, const char *base); + static bool parseProgramHeadersRemote(Elf *elf, CodeCache *cc, + const char *base, + const char *mmap_addr); + static bool parseFile(CodeCache *cc, const char *base, const char *file_name, + bool use_debug); + static void parseMem(CodeCache *cc, const char *base); + static void parseMemRemote(CodeCache *cc, const char *base, const char *addr); +}; + +ElfSection *ElfParser::findSection(uint32_t type, const char *name) { + const char *strtab = at(section(_header->e_shstrndx)); + + for (int i = 0; i < _header->e_shnum; i++) { + ElfSection *section = this->section(i); + if (section->sh_type == type && section->sh_name != 0) { + if (strcmp(strtab + section->sh_name, name) == 0) { + return section; + } + } + } + + return NULL; +} + +ElfProgramHeader *ElfParser::findProgramHeader(uint32_t type) { + const char *pheaders = (const char *)_header + _header->e_phoff; + + for (int i = 0; i < _header->e_phnum; i++) { + ElfProgramHeader *pheader = + (ElfProgramHeader *)(pheaders + i * _header->e_phentsize); + if (pheader->p_type == type) { + return pheader; + } + } + + return NULL; +} + +bool ElfParser::parseFile(CodeCache *cc, const char *base, + const char *file_name, bool use_debug) { + int fd = open(file_name, O_RDONLY); + if (fd == -1) { + return false; + } + + size_t length = (size_t)lseek64(fd, 0, SEEK_END); + void *addr = mmap(NULL, length, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (addr == MAP_FAILED) { + LG_WRN("Could not parse symbols from %s: %s", file_name, strerror(errno)); + } else { + ElfParser elf(cc, base, addr, file_name); + if (elf.validHeader()) { + elf.loadSymbols(use_debug); + } + munmap(addr, length); + } + return true; +} + +void ElfParser::parseMemRemote(CodeCache *cc, const char *base, + const char *addr) { + ElfParser elf(cc, base, addr); + if (elf.validHeader()) { + elf.loadSymbols(false); + } +} + +void ElfParser::parseMem(CodeCache *cc, const char *base) { + ElfParser elf(cc, base, base); + if (elf.validHeader()) { + elf.loadSymbols(false); + } +} + +// remote opens the elf file +bool ElfParser::parseProgramHeadersRemote(Elf *elf, CodeCache *cc, + const char *base, + const char *mmap_addr) { + // todo check if I can use base + ElfParser elf_remote(cc, base, mmap_addr); + if (elf_remote.validHeader()) { + cc->setTextBase(mmap_addr); + elf_remote.parseDynamicSection(); + elf_remote.parseDwarfInfo(); + return true; + } else { + printf("invalid header \n"); + } + return false; +} + +void ElfParser::parseProgramHeaders(CodeCache *cc, const char *base) { + ElfParser elf(cc, base, base); + + if (elf.validHeader()) { + printf("Setting text base = %p \n", base); + cc->setTextBase(base); + elf.parseDynamicSection(); + elf.parseDwarfInfo(); + } +} + +void ElfParser::parseDynamicSection() { + ElfProgramHeader *dynamic = findProgramHeader(PT_DYNAMIC); + if (dynamic != NULL) { + void **got_start = NULL; + size_t pltrelsz = 0; + char *rel = NULL; + size_t relsz = 0; + size_t relent = 0; + size_t relcount = 0; + + const char *dyn_start = at(dynamic); + const char *dyn_end = dyn_start + dynamic->p_memsz; + for (ElfDyn *dyn = (ElfDyn *)dyn_start; dyn < (ElfDyn *)dyn_end; dyn++) { + switch (dyn->d_tag) { + case DT_PLTGOT: + got_start = (void **)DYN_PTR(dyn->d_un.d_ptr) + 3; + break; + case DT_PLTRELSZ: + pltrelsz = dyn->d_un.d_val; + break; + case DT_RELA: + case DT_REL: + rel = (char *)DYN_PTR(dyn->d_un.d_ptr); + break; + case DT_RELASZ: + case DT_RELSZ: + relsz = dyn->d_un.d_val; + break; + case DT_RELAENT: + case DT_RELENT: + relent = dyn->d_un.d_val; + break; + case DT_RELACOUNT: + case DT_RELCOUNT: + relcount = dyn->d_un.d_val; + break; + } + } + printf("relent = %d \n", relent); + if (relent != 0) { + if (pltrelsz != 0 && got_start != NULL) { + // The number of entries in .got.plt section matches the number of + // entries in .rela.plt + printf("GOT start == %p \n", got_start); + _cc->setGlobalOffsetTable(got_start, got_start + pltrelsz / relent, + false); + } else if (rel != NULL && relsz != 0) { + // RELRO technique: .got.plt has been merged into .got and made + // read-only. Find .got end from the highest relocation address. + void **min_addr = (void **)-1; + void **max_addr = (void **)0; + for (size_t offs = relcount * relent; offs < relsz; offs += relent) { + ElfRelocation *r = (ElfRelocation *)(rel + offs); + if (ELF_R_TYPE(r->r_info) == R_GLOB_DAT) { + void **addr = (void **)(_base + r->r_offset); + if (addr < min_addr) + min_addr = addr; + if (addr > max_addr) + max_addr = addr; + } + } + + if (got_start == NULL) { + got_start = (void **)min_addr; + } + + if (max_addr >= got_start) { + _cc->setGlobalOffsetTable(got_start, max_addr + 1, false); + } + } + } + } else { + printf("No dynamic section \n"); + } +} + +void ElfParser::parseDwarfInfoRemote(const char *eh_frame_data, + const char *base_remote, + Offset_t adjust_eh_frame) { + printf("Create dwarf with base:%p - eh_frame_hdr:%p\n", _base, eh_frame_data); + DwarfParser dwarf(_cc->name(), base_remote, eh_frame_data, adjust_eh_frame); + _cc->setDwarfTable(dwarf.table(), dwarf.count()); + printf("Created a number of dwarf entries = %d \n", dwarf.count()); +} + +void ElfParser::parseDwarfInfo() { + if (!DWARF_SUPPORTED) + return; + + ElfProgramHeader *eh_frame_hdr = findProgramHeader(PT_GNU_EH_FRAME); + + if (eh_frame_hdr != NULL) { + printf("Create dwarf with %lx - at:%lx \n", _base, at(eh_frame_hdr)); + DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); + _cc->setDwarfTable(dwarf.table(), dwarf.count()); + printf("Created a number of dwarf entries = %d \n", dwarf.count()); + } +} + +void ElfParser::loadSymbols(bool use_debug) { + // Look for debug symbols in the original .so + ElfSection *section = findSection(SHT_SYMTAB, ".symtab"); + if (section != NULL) { + loadSymbolTable(section); + goto loaded; + } + + // Try to load symbols from an external debuginfo library + if (use_debug) { + if (loadSymbolsUsingBuildId() || loadSymbolsUsingDebugLink()) { + goto loaded; + } + } + + // If everything else fails, load only exported symbols + section = findSection(SHT_DYNSYM, ".dynsym"); + if (section != NULL) { + loadSymbolTable(section); + } + +loaded: + if (use_debug) { + // Synthesize names for PLT stubs + ElfSection *plt = findSection(SHT_PROGBITS, ".plt"); + ElfSection *reltab = findSection(SHT_RELA, ".rela.plt"); + if (reltab == NULL) { + reltab = findSection(SHT_REL, ".rel.plt"); + } + if (plt != NULL && reltab != NULL) { + addRelocationSymbols(reltab, _base + plt->sh_offset + PLT_HEADER_SIZE); + } + } +} + +// Load symbols from /usr/lib/debug/.build-id/ab/cdef1234.debug, where +// abcdef1234 is Build ID +bool ElfParser::loadSymbolsUsingBuildId() { + ElfSection *section = findSection(SHT_NOTE, ".note.gnu.build-id"); + if (section == NULL || section->sh_size <= 16) { + return false; + } + + ElfNote *note = (ElfNote *)at(section); + if (note->n_namesz != 4 || note->n_descsz < 2 || note->n_descsz > 64) { + return false; + } + + const char *build_id = (const char *)note + sizeof(*note) + 4; + int build_id_len = note->n_descsz; + + char path[PATH_MAX]; + char *p = + path + sprintf(path, "/usr/lib/debug/.build-id/%02hhx/", build_id[0]); + for (int i = 1; i < build_id_len; i++) { + p += sprintf(p, "%02hhx", build_id[i]); + } + strcpy(p, ".debug"); + + return parseFile(_cc, _base, path, false); +} + +// Look for debuginfo file specified in .gnu_debuglink section +bool ElfParser::loadSymbolsUsingDebugLink() { + ElfSection *section = findSection(SHT_PROGBITS, ".gnu_debuglink"); + if (section == NULL || section->sh_size <= 4) { + return false; + } + + const char *basename = strrchr(_file_name, '/'); + if (basename == NULL) { + return false; + } + + char *dirname = strndup(_file_name, basename - _file_name); + if (dirname == NULL) { + return false; + } + + const char *debuglink = at(section); + char path[PATH_MAX]; + bool result = false; + + // 1. /path/to/libjvm.so.debug + if (strcmp(debuglink, basename + 1) != 0 && + snprintf(path, PATH_MAX, "%s/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + // 2. /path/to/.debug/libjvm.so.debug + if (!result && + snprintf(path, PATH_MAX, "%s/.debug/%s", dirname, debuglink) < PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + // 3. /usr/lib/debug/path/to/libjvm.so.debug + if (!result && + snprintf(path, PATH_MAX, "/usr/lib/debug%s/%s", dirname, debuglink) < + PATH_MAX) { + result = parseFile(_cc, _base, path, false); + } + + free(dirname); + return result; +} + +void ElfParser::loadSymbolTable(ElfSection *symtab) { + ElfSection *strtab = section(symtab->sh_link); + const char *strings = at(strtab); + int cpt = 0; + const char *symbols = at(symtab); + const char *symbols_end = symbols + symtab->sh_size; + for (; symbols < symbols_end; symbols += symtab->sh_entsize) { + ElfSymbol *sym = (ElfSymbol *)symbols; + if (sym->st_name != 0 && sym->st_value != 0) { + // Skip special AArch64 mapping symbols: $x and $d + if (sym->st_size != 0 || sym->st_info != 0 || + strings[sym->st_name] != '$') { + // printf("Loading sym %s at 0x%lx (base=0x%lx)\n", strings + + // sym->st_name, + // _base + sym->st_value, _base); + _cc->add(_base + sym->st_value, (int)sym->st_size, + strings + sym->st_name); + ++cpt; + } + } + } + printf("Considered %d symbols \n", cpt); +} + +void ElfParser::addRelocationSymbols(ElfSection *reltab, const char *plt) { + ElfSection *symtab = section(reltab->sh_link); + const char *symbols = at(symtab); + + ElfSection *strtab = section(symtab->sh_link); + const char *strings = at(strtab); + + const char *relocations = at(reltab); + const char *relocations_end = relocations + reltab->sh_size; + for (; relocations < relocations_end; relocations += reltab->sh_entsize) { + ElfRelocation *r = (ElfRelocation *)relocations; + ElfSymbol *sym = + (ElfSymbol *)(symbols + ELF_R_SYM(r->r_info) * symtab->sh_entsize); + + char name[256]; + if (sym->st_name == 0) { + strcpy(name, "@plt"); + } else { + const char *sym_name = strings + sym->st_name; + snprintf(name, sizeof(name), "%s%cplt", sym_name, + sym_name[0] == '_' && sym_name[1] == 'Z' ? '.' : '@'); + name[sizeof(name) - 1] = 0; + } + + _cc->add(plt, PLT_ENTRY_SIZE, name); + plt += PLT_ENTRY_SIZE; + } +} + +Mutex Symbols::_parse_lock; +bool Symbols::_have_kernel_symbols = false; + +void Symbols::parseKernelSymbols(CodeCache *cc) { + // XXX(nick): omitted +} + +const char *ElfParser::get_self_vdso(void) { + FILE *f = fopen("/proc/self/maps", "r"); + const char *addr_vdso = nullptr; + + if (f == NULL) { + return nullptr; + } + char *str = NULL; + size_t str_size = 0; + ssize_t len; + + while ((len = getline(&str, &str_size, f)) > 0) { + str[len - 1] = 0; + + MemoryMapDesc map(str); + if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { + continue; + } + const char *image_base = map.addr(); + if (map.isExecutable()) { + if (strcmp(map.file(), "[vdso]") == 0) { + addr_vdso = image_base; // found it + break; + } + } + } + return addr_vdso; +} + +void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, + bool kernel_symbols) { + std::set parsed_libraries; + std::set parsed_inodes; + MutexLocker ml(_parse_lock); + char proc_map_filename[1024] = {}; + snprintf(proc_map_filename, std::size(proc_map_filename), "%s/proc/%d/maps", + "", pid); + // todo plug the proc_map open functions (handles user switches) + FILE *f = fopen(proc_map_filename, "r"); + if (f == NULL) { + return; + } + + // last readable is previous mmap + const char *last_readable_base = NULL; + const char *image_end = NULL; + char *str = NULL; + size_t str_size = 0; + ssize_t len; + // tell elf what version we are using + elf_version(EV_CURRENT); + + while ((len = getline(&str, &str_size, f)) > 0) { + str[len - 1] = 0; + + MemoryMapDesc map(str); + if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { + continue; + } + + const char *image_base = map.addr(); + if (image_base != image_end) + last_readable_base = image_base; + image_end = map.end(); + + if (map.isExecutable()) { + if (!parsed_libraries.insert(image_base).second) { + continue; // the library was already parsed + } + + int count = array->count(); + if (count >= MAX_NATIVE_LIBS) { + break; + } + + CodeCache *cc = new CodeCache(map.file(), count, image_base, image_end); + unsigned long inode = map.inode(); + printf("+++++ Considering %s ++++ \n", map.file()); + if (inode != 0) { + char proc_root_filename[1024] = {}; + // use /proc//root to access the file (whole host) + int n = snprintf(proc_root_filename, 1024, "%s/proc/%d/root%s", "", pid, map.file()); + if (n < 0) { + printf("error encoding file %s \n", map.file()); + continue; + } + int fd = open(proc_root_filename, O_RDONLY); + // remote unwinding + if (-1 == fd) { + printf("error opening file %s \n", map.file()); + continue; + } + size_t length = (size_t)lseek64(fd, 0, SEEK_END); + // todo : remove the mmap + Elf *elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + LG_WRN("Invalid elf %s (efl:%p, addr_mmap:%p)\n", map.file(), elf); + goto continue_loop; + } + Offset_t biais_offset; + ElfAddress_t vaddr; + ElfAddress_t text_base; // not used + Offset_t elf_offset; + // Compute how to convert a process address + if (get_elf_offsets(elf, map.file(), vaddr, elf_offset, biais_offset, + text_base)) { + printf("vaddr from get_elf_offset: %lx \n", vaddr); + printf("biais offset get_elf_offset: %lx \n", biais_offset); + printf("text base from get_elf_offset: %lx \n", text_base); + printf("offset from get_elf_offset: %lx \n", elf_offset); + printf("last readable: %lx \n", last_readable_base); + } + + // Do not parse the same executable twice, e.g. on Alpine Linux + if (parsed_inodes.insert(map.dev() | inode << 16).second) { + // Be careful: executable file is not always ELF, e.g. classes.jsa + // todo: This should be something with the biais instead + if ((image_base -= map.offs()) >= last_readable_base) { + // process elf info + EhFrameInfo eh_frame_info = {}; + if (!get_eh_frame_info(elf, eh_frame_info)) { + printf("Failed to retrieve eh frame info\n"); + } + const char *elf_base = eh_frame_info._eh_frame_hdr._data - + eh_frame_info._eh_frame_hdr._offset; + // this is used during unwinding to offset PC to dwarf instructions + cc->setTextBase(image_base); + printf("image base = %lx \n", image_base); + + if (eh_frame_info._eh_frame_hdr._data) { + // todo: is this always valid ? + ElfParser elf_remote(cc, image_base, elf_base); + + // (vaddr_eh_frame - vaddr_eh_frame_hdr) - (offset_sec_1 - + // offset_sec_2) + // If eh frame is not in the same segment + Offset_t adjust_eh_frame = + (eh_frame_info._eh_frame._vaddr_sec - + eh_frame_info._eh_frame_hdr._vaddr_sec) - + (eh_frame_info._eh_frame._offset - + eh_frame_info._eh_frame_hdr._offset); + printf("adjust eh_frame %lx \n", adjust_eh_frame); + elf_remote.parseDwarfInfoRemote( + eh_frame_info._eh_frame_hdr._data, + eh_frame_info._eh_frame_hdr._data - + eh_frame_info._eh_frame_hdr._offset, + adjust_eh_frame); + } else { + printf("No EH Frame data - %s\n", map.file()); + } + } + ElfParser::parseFile(cc, image_base, map.file(), true); + } + + continue_loop: + close(fd); + elf_end(elf); // no-op if null + } else if (strcmp(map.file(), "[vdso]") == 0) { + // find our self address for vdso + const char *addr_vdso = ElfParser::get_self_vdso(); + ElfParser::parseMemRemote(cc, image_base, addr_vdso); + } + cc->sort(); + array->add(cc); + } + } + + free(str); + fclose(f); +} + +void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { + // we can't use static global sets due to undefined initialization order stuff + // (see + // https://stackoverflow.com/questions/27145617/segfault-when-adding-an-element-to-a-stdmap) + // I'm not sure why this original code even worked? + std::set parsed_libraries; + std::set parsed_inodes; + MutexLocker ml(_parse_lock); + + FILE *f = fopen("/proc/self/maps", "r"); + if (f == NULL) { + return; + } + + const char *last_readable_base = NULL; + const char *image_end = NULL; + char *str = NULL; + size_t str_size = 0; + ssize_t len; + + while ((len = getline(&str, &str_size, f)) > 0) { + str[len - 1] = 0; + + MemoryMapDesc map(str); + if (!map.isReadable() || map.file() == NULL || map.file()[0] == 0) { + continue; + } + + const char *image_base = map.addr(); + if (image_base != image_end) + last_readable_base = image_base; + image_end = map.end(); + + if (map.isExecutable()) { + if (!parsed_libraries.insert(image_base).second) { + continue; // the library was already parsed + } + printf("Considering %s \n", map.file()); + + int count = array->count(); + if (count >= MAX_NATIVE_LIBS) { + break; + } + + CodeCache *cc = new CodeCache(map.file(), count, image_base, image_end); + + unsigned long inode = map.inode(); + if (inode != 0) { + // Do not parse the same executable twice, e.g. on Alpine Linux + if (parsed_inodes.insert(map.dev() | inode << 16).second) { + // Be careful: executable file is not always ELF, e.g. classes.jsa + printf("image_base = %p, map.offs() = %p, last_readable_base = %p \n", + image_base, map.offs(), last_readable_base); + // todo - read the biais from the vaddr field (open file?) + + if ((image_base -= map.offs()) >= last_readable_base) { + ElfParser::parseProgramHeaders(cc, image_base); + } + ElfParser::parseFile(cc, image_base, map.file(), true); + } + } else if (strcmp(map.file(), "[vdso]") == 0) { + ElfParser::parseMem(cc, image_base); + } + cc->sort(); + array->add(cc); + } + } + free(str); + fclose(f); +} + +#endif // __linux__ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 14e6860b9..6106c1a67 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -391,6 +391,22 @@ add_unit_test( add_unit_test(ddprof_module_lib-ut ddprof_module_lib-ut.cc ../src/ddprof_module_lib.cc ../src/build_id.cc ../src/dso.cc LIBRARIES ${ELFUTILS_LIBRARIES}) +#add_unit_test( +# dwarf_unwind-ut +# dwarf_unwind-ut.cc +# ../src/lib/pthread_fixes.cc +# ../src/lib/savecontext.cc +# ../src/lib/saveregisters.cc +# # ../src/lib/allocation_tracker.cc +# ../src/ringbuffer_utils.cc +# ../src/perf_ringbuffer.cc +# ../src/perf.cc +# ../src/pevent_lib.cc +# ../src/sys_utils.cc +# ../src/user_override.cc +# ../src/perf_watcher.cc +# LIBRARIES ${ELFUTILS_LIBRARIES} DDProf::AsyncProf) +#target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE}) add_benchmark(savecontext-bench savecontext-bench.cc ../src/lib/pthread_fixes.cc ../src/lib/savecontext.cc ../src/lib/saveregisters.cc LIBRARIES llvm-demangle) From 7c4795ca141cb1cd393a4a371d690fa3f37cd018 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Sat, 13 Apr 2024 13:00:14 +0200 Subject: [PATCH 2/5] Experimental unwinding - WIP Ensure we use elf addresses instead of absolute addresses --- include/async-profiler/codeCache.h | 8 +- src/async-profiler/codeCache.cpp | 8 +- src/async-profiler/stackWalker.cpp | 2 +- src/async-profiler/symbols_linux.cpp | 3 + test/CMakeLists.txt | 32 ++--- test/dwarf_unwind-ut.cc | 174 +++++++++++++++++++++++++++ 6 files changed, 204 insertions(+), 23 deletions(-) create mode 100644 test/dwarf_unwind-ut.cc diff --git a/include/async-profiler/codeCache.h b/include/async-profiler/codeCache.h index 534996d9e..f8c257bad 100644 --- a/include/async-profiler/codeCache.h +++ b/include/async-profiler/codeCache.h @@ -17,7 +17,7 @@ #ifndef _CODECACHE_H #define _CODECACHE_H -// #include +#include #define NO_MIN_ADDRESS ((const void *)-1) #define NO_MAX_ADDRESS ((const void *)0) @@ -78,7 +78,7 @@ class CodeCache { short _lib_index; const void *_min_address; const void *_max_address; - const char *_text_base; + const void *_text_base; void **_got_start; void **_got_end; @@ -113,7 +113,7 @@ class CodeCache { void setTextBase(const char *text_base) { _text_base = text_base; } - const char *getTextBase() { return _text_base; } + const void *getTextBase() { return _text_base; } void **gotStart() const { return _got_start; } @@ -136,7 +136,7 @@ class CodeCache { void makeGotPatchable(); void setDwarfTable(FrameDesc *table, int length); - FrameDesc *findFrameDesc(const void *pc); + FrameDesc *findFrameDesc(uint64_t elf_address); }; class CodeCacheArray { diff --git a/src/async-profiler/codeCache.cpp b/src/async-profiler/codeCache.cpp index 72b6d63ce..da620f249 100644 --- a/src/async-profiler/codeCache.cpp +++ b/src/async-profiler/codeCache.cpp @@ -20,10 +20,13 @@ #include "codeCache.h" #include "dwarf.h" #include "os.h" + #include #include #include #include +#include +#include char *NativeFunc::create(const char *name, short lib_index) { NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name)); @@ -216,8 +219,9 @@ void CodeCache::setDwarfTable(FrameDesc *table, int length) { _dwarf_table_length = length; } -FrameDesc *CodeCache::findFrameDesc(const void *pc) { - u32 target_loc = (const char *)pc - _text_base; +FrameDesc *CodeCache::findFrameDesc(uintptr_t elf_address) { + assert(elf_address < std::numeric_limits::max()); + const u32 target_loc = (const u32)elf_address; int low = 0; int high = _dwarf_table_length - 1; diff --git a/src/async-profiler/stackWalker.cpp b/src/async-profiler/stackWalker.cpp index 17834537c..ce17da65e 100644 --- a/src/async-profiler/stackWalker.cpp +++ b/src/async-profiler/stackWalker.cpp @@ -87,7 +87,7 @@ bool stepStackContext(ap::StackContext &sc, const ap::StackBuffer &buffer, CodeCacheArray *cache) { FrameDesc *f; CodeCache *cc = findLibraryByAddress(cache, sc.pc); - if (cc == NULL || (f = cc->findFrameDesc(sc.pc)) == NULL) { + if (cc == NULL || (f = cc->findFrameDesc(static_cast(sc.pc) - static_cast(cc->getTextBase()))) == NULL) { f = &FrameDesc::default_frame; } // const char *sym = cc?cc->binarySearch(sc.pc):"unknown"; diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index be3ea3121..772569aab 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -678,6 +678,9 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, printf("offset from get_elf_offset: %lx \n", elf_offset); printf("last readable: %lx \n", last_readable_base); } + else { + printf("Failed to read elf offsets \n"); + } // Do not parse the same executable twice, e.g. on Alpine Linux if (parsed_inodes.insert(map.dev() | inode << 16).second) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6106c1a67..e222ce26a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -391,22 +391,22 @@ add_unit_test( add_unit_test(ddprof_module_lib-ut ddprof_module_lib-ut.cc ../src/ddprof_module_lib.cc ../src/build_id.cc ../src/dso.cc LIBRARIES ${ELFUTILS_LIBRARIES}) -#add_unit_test( -# dwarf_unwind-ut -# dwarf_unwind-ut.cc -# ../src/lib/pthread_fixes.cc -# ../src/lib/savecontext.cc -# ../src/lib/saveregisters.cc -# # ../src/lib/allocation_tracker.cc -# ../src/ringbuffer_utils.cc -# ../src/perf_ringbuffer.cc -# ../src/perf.cc -# ../src/pevent_lib.cc -# ../src/sys_utils.cc -# ../src/user_override.cc -# ../src/perf_watcher.cc -# LIBRARIES ${ELFUTILS_LIBRARIES} DDProf::AsyncProf) -#target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE}) +add_unit_test( + dwarf_unwind-ut + dwarf_unwind-ut.cc + ../src/lib/pthread_fixes.cc + ../src/lib/savecontext.cc + ../src/lib/saveregisters.cc + # ../src/lib/allocation_tracker.cc + ../src/ringbuffer_utils.cc + ../src/perf_ringbuffer.cc + ../src/perf.cc + ../src/pevent_lib.cc + ../src/sys_utils.cc + ../src/user_override.cc + ../src/perf_watcher.cc + LIBRARIES ${ELFUTILS_LIBRARIES} DDProf::AsyncProf) +target_include_directories(dwarf_unwind-ut PRIVATE ${ASYNC_PROFILER_INCLUDE}) add_benchmark(savecontext-bench savecontext-bench.cc ../src/lib/pthread_fixes.cc ../src/lib/savecontext.cc ../src/lib/saveregisters.cc LIBRARIES llvm-demangle) diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc new file mode 100644 index 000000000..4918e3ce5 --- /dev/null +++ b/test/dwarf_unwind-ut.cc @@ -0,0 +1,174 @@ +#include + +#include "savecontext.hpp" +#include "stackWalker.h" +#include "unwind_state.hpp" + +#include + +#include "async-profiler/codeCache.h" +#include "async-profiler/stack_context.h" +#include "async-profiler/symbols.h" + +// Retrieves instruction pointer +#define _THIS_IP_ \ + ({ \ + __label__ __here; \ + __here: \ + (unsigned long)&&__here; \ + }) + +// #include "ddprof_defs.hpp" + +// temp copy pasta +#define PERF_SAMPLE_STACK_SIZE (4096UL * 8) + +std::byte stack[PERF_SAMPLE_STACK_SIZE]; + +DDPROF_NOINLINE size_t funcA(std::array ®s); +DDPROF_NOINLINE size_t funcB(std::array ®s); + +size_t funcB(std::array ®s) { + printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_); + std::span bounds = ddprof::retrieve_stack_bounds(); + size_t size = ddprof::save_context(bounds, regs, stack); + + return size; +} + +size_t funcA(std::array ®s) { + printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_); + return funcB(regs); +} + +TEST(dwarf_unwind, simple) { + CodeCacheArray cache_arary; + // Load libraries + Symbols::parsePidLibraries(getpid(), &cache_arary, false); + std::array regs; + size_t size_stack = funcA(regs); + EXPECT_TRUE(size_stack); + + ap::StackContext sc = ap::from_regs(std::span(regs)); + ap::StackBuffer buffer(stack, sc.sp, sc.sp + size_stack); + + void *callchain[128]; + int n = stackWalk(&cache_arary, sc, buffer, + const_cast(callchain), 128, 0); + const char *syms[128]; + for (int i = 0; i < n; ++i) { + { // retrieve symbol + CodeCache *code_cache = findLibraryByAddress( + &cache_arary, reinterpret_cast(callchain[i])); + if (code_cache) { + syms[i] = code_cache->binarySearch(callchain[i]); + printf("IP = %p - %s\n", callchain[i], syms[i]); + } + } + } + + // Check that we found the expected functions during unwinding + ASSERT_TRUE(std::string(syms[0]).find("save_context") != std::string::npos); + ASSERT_TRUE(std::string(syms[1]).find("funcB") != std::string::npos); + ASSERT_TRUE(std::string(syms[2]).find("funcA") != std::string::npos); +} + +#ifdef ALLOC_TRACKER +#include "allocation_tracker.hpp" +#include "perf_ringbuffer.hpp" +#include "ringbuffer_holder.hpp" +#include "ringbuffer_utils.hpp" +#include +#include "defer.hpp" + +namespace ddprof { +static const uint64_t kSamplingRate = 1; + +DDPROF_NOINLINE void func_save_sleep(size_t size); +DDPROF_NOINLINE void func_intermediate_0(size_t size); +DDPROF_NOINLINE void func_intermediate_1(size_t size); + +DDPROF_NOINLINE void func_save_sleep(size_t size) { + ddprof::TrackerThreadLocalState *tl_state = AllocationTracker::get_tl_state(); + assert(tl_state); + int i = 0; + while (++i < 100000) { + + ddprof::AllocationTracker::track_allocation_s(0xdeadbeef, size, *tl_state); + // prevent tail call optimization + getpid(); + usleep(100); + // printf("Save context nb -- %d \n", i); + } +} + +void func_intermediate_0(size_t size) { func_intermediate_1(size); } + +void func_intermediate_1(size_t size) { func_save_sleep(size); } + +TEST(dwarf_unwind, remote) { + const uint64_t rate = 1; + const size_t buf_size_order = 5; + ddprof::RingBufferHolder ring_buffer{buf_size_order, + RingBufferType::kMPSCRingBuffer}; + AllocationTracker::allocation_tracking_init( + kSamplingRate, + AllocationTracker::kDeterministicSampling | + AllocationTracker::kTrackDeallocations, + k_default_perf_stack_sample_size, ring_buffer.get_buffer_info(), {}); + defer { AllocationTracker::allocation_tracking_free(); }; + + // Fork + pid_t temp_pid = fork(); + if (!temp_pid) { + func_intermediate_0(10); + // char *const argList[] = {"sleep", "10", nullptr}; + // execvp("sleep", argList); + return; + } + + // Load libraries from the fork - Cache array is relent to a single pid + CodeCacheArray cache_arary; + sleep(1); + Symbols::parsePidLibraries(temp_pid, &cache_arary, false); + // Establish a ring buffer ? + + ddprof::MPSCRingBufferReader reader{&ring_buffer.get_ring_buffer()}; + ASSERT_GT(reader.available_size(), 0); + + auto buf = reader.read_sample(); + ASSERT_FALSE(buf.empty()); + const perf_event_header *hdr = + reinterpret_cast(buf.data()); + ASSERT_EQ(hdr->type, PERF_RECORD_SAMPLE); + + // convert based on mask for this watcher (default in this case) + perf_event_sample *sample = hdr2samp(hdr, ddprof::perf_event_default_sample_type()); + + std::span regs_span{sample->regs, ddprof::k_perf_register_count}; + ap::StackContext sc = ap::from_regs(regs_span); + std::span stack{ + reinterpret_cast(sample->data_stack), sample->size_stack}; + ap::StackBuffer buffer(stack, sc.sp, sc.sp + sample->size_stack); + + void *callchain[ddprof::kMaxStackDepth]; + int n = + stackWalk(&cache_arary, sc, buffer, const_cast(callchain), + ddprof::kMaxStackDepth, 0); + + std::array syms; + for (int i = 0; i < n; ++i) { + { // retrieve symbol + CodeCache *code_cache = findLibraryByAddress( + &cache_arary, reinterpret_cast(callchain[i])); + if (code_cache) { + syms[i] = code_cache->binarySearch(callchain[i]); + printf("IP = %p - %s\n", callchain[i], syms[i]); + } + } + // cleanup the producer fork + kill(temp_pid, SIGTERM); + } +} +} +#endif From 6afaebb72e94f930bb1ce4c8185c56e3f83eae60 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Sat, 13 Apr 2024 14:55:54 +0200 Subject: [PATCH 3/5] unwind experimental Import new version of dwarf file --- include/async-profiler/dwarf.h | 222 ++++++----- include/logger.hpp | 12 +- src/async-profiler/dwarf.cpp | 662 ++++++++++++++++----------------- 3 files changed, 436 insertions(+), 460 deletions(-) diff --git a/include/async-profiler/dwarf.h b/include/async-profiler/dwarf.h index a001500b9..d05aab431 100644 --- a/include/async-profiler/dwarf.h +++ b/include/async-profiler/dwarf.h @@ -1,153 +1,169 @@ /* - * Copyright 2021 Andrei Pangin - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 */ #ifndef _DWARF_H #define _DWARF_H -#include "arch.h" #include +#include "arch.h" + + +const int DW_REG_PLT = 128; // denotes special rule for PLT entries +const int DW_REG_INVALID = 255; // denotes unsupported configuration + +const int DW_PC_OFFSET = 1; +const int DW_SAME_FP = 0x80000000; +const int DW_STACK_SLOT = sizeof(void*); + #if defined(__x86_64__) -# define DWARF_SUPPORTED true +#define DWARF_SUPPORTED true const int DW_REG_FP = 6; const int DW_REG_SP = 7; const int DW_REG_PC = 16; +const int EMPTY_FRAME_SIZE = DW_STACK_SLOT; +const int LINKED_FRAME_SIZE = 2 * DW_STACK_SLOT; #elif defined(__i386__) -# define DWARF_SUPPORTED true +#define DWARF_SUPPORTED true const int DW_REG_FP = 5; const int DW_REG_SP = 4; const int DW_REG_PC = 8; +const int EMPTY_FRAME_SIZE = DW_STACK_SLOT; +const int LINKED_FRAME_SIZE = 2 * DW_STACK_SLOT; + +#elif defined(__aarch64__) + +#define DWARF_SUPPORTED true + +const int DW_REG_FP = 29; +const int DW_REG_SP = 31; +const int DW_REG_PC = 30; +const int EMPTY_FRAME_SIZE = 0; +const int LINKED_FRAME_SIZE = 0; #else -# define DWARF_SUPPORTED false +#define DWARF_SUPPORTED false const int DW_REG_FP = 0; const int DW_REG_SP = 1; const int DW_REG_PC = 2; +const int EMPTY_FRAME_SIZE = 0; +const int LINKED_FRAME_SIZE = 0; #endif -const int DW_REG_PLT = 128; // denotes special rule for PLT entries -const int DW_REG_INVALID = 255; // denotes unsupported configuration - -const int DW_PC_OFFSET = 1; -const int DW_SAME_FP = 0x80000000; -const int DW_STACK_SLOT = sizeof(void *); struct FrameDesc { - u32 loc; - int cfa; - int fp_off; - - static FrameDesc default_frame; - - static int comparator(const void *p1, const void *p2) { - FrameDesc *fd1 = (FrameDesc *)p1; - FrameDesc *fd2 = (FrameDesc *)p2; - return (int)(fd1->loc - fd2->loc); - } + u32 loc; + int cfa; + int fp_off; + int pc_off; + + static FrameDesc empty_frame; + static FrameDesc default_frame; + + static int comparator(const void* p1, const void* p2) { + FrameDesc* fd1 = (FrameDesc*)p1; + FrameDesc* fd2 = (FrameDesc*)p2; + return (int)(fd1->loc - fd2->loc); + } }; + class DwarfParser { -private: - const char *_name; - const char *_image_base; - const char *_ptr; - - int _capacity; - int _count; - FrameDesc *_table; - FrameDesc *_prev; - - u32 _code_align; - int _data_align; - - const char *add(size_t size) { - const char *ptr = _ptr; - _ptr = ptr + size; - return ptr; - } - - u8 get8() { return *_ptr++; } - - u16 get16() { return *(u16 *)add(2); } - - u32 get32() { return *(u32 *)add(4); } - - u32 getLeb() { - u32 result = 0; - for (u32 shift = 0;; shift += 7) { - u8 b = *_ptr++; - result |= (b & 0x7f) << shift; - if ((b & 0x80) == 0) { - return result; - } + private: + const char* _name; + const char* _image_base; + const char* _ptr; + + int _capacity; + int _count; + FrameDesc* _table; + FrameDesc* _prev; + + u32 _code_align; + int _data_align; + + const char* add(size_t size) { + const char* ptr = _ptr; + _ptr = ptr + size; + return ptr; } - } - - int getSLeb() { - int result = 0; - for (u32 shift = 0;; shift += 7) { - u8 b = *_ptr++; - result |= (b & 0x7f) << shift; - if ((b & 0x80) == 0) { - if ((b & 0x40) != 0 && (shift += 7) < 32) { - result |= -1 << shift; - } - return result; - } + + u8 get8() { + return *_ptr++; + } + + u16 get16() { + return *(u16*)add(2); + } + + u32 get32() { + return *(u32*)add(4); } - } - void skipLeb() { - while (*_ptr++ & 0x80) {} - } + u32 getLeb() { + u32 result = 0; + for (u32 shift = 0; ; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + return result; + } + } + } - const char *getPtr() { - const char *ptr = _ptr; - return ptr + *(int *)add(4); - } + int getSLeb() { + int result = 0; + for (u32 shift = 0; ; shift += 7) { + u8 b = *_ptr++; + result |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + if ((b & 0x40) != 0 && (shift += 7) < 32) { + result |= -1 << shift; + } + return result; + } + } + } - void parse(const char *eh_frame_hdr, u64 adjust_eh_frame); - void parseCie(); - void parseFde(); - void parseInstructions(u32 loc, const char *end); - int parseExpression(); + void skipLeb() { + while (*_ptr++ & 0x80) {} + } - void addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off); - FrameDesc *addRecordRaw(u32 loc, int cfa, int fp_off); + const char* getPtr() { + const char* ptr = _ptr; + return ptr + *(int*)add(4); + } -public: - DwarfParser(const char *name, const char *image_base, - const char *eh_frame_hdr, u64 adjust_eh_frame = 0); + void parse(const char* eh_frame_hdr, u64 adjust_eh_frame = 0); + void parseCie(); + void parseFde(); + void parseInstructions(u32 loc, const char* end); + int parseExpression(); - // manual parse of fde - DwarfParser(const char *name, const char *image_base); + void addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off, int pc_off); + FrameDesc* addRecordRaw(u32 loc, int cfa, int fp_off, int pc_off); - void addFde(const char *fde, const char *cie); + public: + DwarfParser(const char* name, const char* image_base, + const char* eh_frame_hdr, u64 adjust_eh_frame = 0); - FrameDesc *table() const { return _table; } + FrameDesc* table() const { + return _table; + } - int count() const { return _count; } + int count() const { + return _count; + } }; #endif // _DWARF_H diff --git a/include/logger.hpp b/include/logger.hpp index 2485e8e15..4ed8e9539 100644 --- a/include/logger.hpp +++ b/include/logger.hpp @@ -118,15 +118,15 @@ void LOG_set_logs_allowed_function(LogsAllowedCallback logs_allowed_function); #define LG_IF_LVL_OK(level, ...) \ do { \ if (unlikely(LOG_is_logging_enabled_for_level(level))) { \ - olprintfln(ABS(level), -1, MYNAME, __VA_ARGS__); \ + ddprof::olprintfln(ABS(level), -1, MYNAME, __VA_ARGS__); \ } \ } while (false) -#define LG_ERR(...) LG_IF_LVL_OK(LL_ERROR, __VA_ARGS__) -#define LG_WRN(...) LG_IF_LVL_OK(LL_WARNING, __VA_ARGS__) -#define LG_NTC(...) LG_IF_LVL_OK(LL_NOTICE, __VA_ARGS__) -#define LG_NFO(...) LG_IF_LVL_OK(LL_INFORMATIONAL, __VA_ARGS__) -#define LG_DBG(...) LG_IF_LVL_OK(LL_DEBUG, __VA_ARGS__) +#define LG_ERR(...) LG_IF_LVL_OK(ddprof::LL_ERROR, __VA_ARGS__) +#define LG_WRN(...) LG_IF_LVL_OK(ddprof::LL_WARNING, __VA_ARGS__) +#define LG_NTC(...) LG_IF_LVL_OK(ddprof::LL_NOTICE, __VA_ARGS__) +#define LG_NFO(...) LG_IF_LVL_OK(ddprof::LL_INFORMATIONAL, __VA_ARGS__) +#define LG_DBG(...) LG_IF_LVL_OK(ddprof::LL_DEBUG, __VA_ARGS__) #define PRINT_NFO(...) LG_IF_LVL_OK(-1 * LL_INFORMATIONAL, __VA_ARGS__) } // namespace ddprof diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp index 05307e21c..8463f4cca 100644 --- a/src/async-profiler/dwarf.cpp +++ b/src/async-profiler/dwarf.cpp @@ -1,396 +1,356 @@ /* - * Copyright 2022 Nick Ripley - * Copyright 2021 Andrei Pangin - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modified by Nick Ripley to extract components needed for call stack unwinding + * Copyright The async-profiler authors + * SPDX-License-Identifier: Apache-2.0 */ -#include "dwarf.h" #include +#include "dwarf.h" +#include "logger.hpp" -#include - -#define DEBUG enum { - DW_CFA_nop = 0x0, - DW_CFA_set_loc = 0x1, - DW_CFA_advance_loc1 = 0x2, - DW_CFA_advance_loc2 = 0x3, - DW_CFA_advance_loc4 = 0x4, - DW_CFA_offset_extended = 0x5, - DW_CFA_restore_extended = 0x6, - DW_CFA_undefined = 0x7, - DW_CFA_same_value = 0x8, - DW_CFA_register = 0x9, - DW_CFA_remember_state = 0xa, - DW_CFA_restore_state = 0xb, - DW_CFA_def_cfa = 0xc, - DW_CFA_def_cfa_register = 0xd, - DW_CFA_def_cfa_offset = 0xe, - DW_CFA_def_cfa_expression = 0xf, - DW_CFA_expression = 0x10, - DW_CFA_offset_extended_sf = 0x11, - DW_CFA_def_cfa_sf = 0x12, - DW_CFA_def_cfa_offset_sf = 0x13, - DW_CFA_val_offset = 0x14, - DW_CFA_val_offset_sf = 0x15, - DW_CFA_val_expression = 0x16, - DW_CFA_GNU_args_size = 0x2e, - - DW_CFA_advance_loc = 0x1, - DW_CFA_offset = 0x2, - DW_CFA_restore = 0x3, + DW_CFA_nop = 0x0, + DW_CFA_set_loc = 0x1, + DW_CFA_advance_loc1 = 0x2, + DW_CFA_advance_loc2 = 0x3, + DW_CFA_advance_loc4 = 0x4, + DW_CFA_offset_extended = 0x5, + DW_CFA_restore_extended = 0x6, + DW_CFA_undefined = 0x7, + DW_CFA_same_value = 0x8, + DW_CFA_register = 0x9, + DW_CFA_remember_state = 0xa, + DW_CFA_restore_state = 0xb, + DW_CFA_def_cfa = 0xc, + DW_CFA_def_cfa_register = 0xd, + DW_CFA_def_cfa_offset = 0xe, + DW_CFA_def_cfa_expression = 0xf, + DW_CFA_expression = 0x10, + DW_CFA_offset_extended_sf = 0x11, + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + DW_CFA_AARCH64_negate_ra_state = 0x2d, + DW_CFA_GNU_args_size = 0x2e, + + DW_CFA_advance_loc = 0x1, + DW_CFA_offset = 0x2, + DW_CFA_restore = 0x3, }; enum { - DW_OP_breg_pc = 0x70 + DW_REG_PC, - DW_OP_const1u = 0x08, - DW_OP_const1s = 0x09, - DW_OP_const2u = 0x0a, - DW_OP_const2s = 0x0b, - DW_OP_const4u = 0x0c, - DW_OP_const4s = 0x0d, - DW_OP_constu = 0x10, - DW_OP_consts = 0x11, - DW_OP_minus = 0x1c, - DW_OP_plus = 0x22, + DW_OP_breg_pc = 0x70 + DW_REG_PC, + DW_OP_const1u = 0x08, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_minus = 0x1c, + DW_OP_plus = 0x22, }; -FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | (2 * DW_STACK_SLOT) << 8, - -2 * DW_STACK_SLOT}; -DwarfParser::DwarfParser(const char *name, const char *image_base, - const char *eh_frame_hdr, u64 adjust_eh_frame) { - _name = name; - _image_base = image_base; +FrameDesc FrameDesc::empty_frame = {0, DW_REG_SP | EMPTY_FRAME_SIZE << 8, DW_SAME_FP, -EMPTY_FRAME_SIZE}; +FrameDesc FrameDesc::default_frame = {0, DW_REG_FP | LINKED_FRAME_SIZE << 8, -LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT}; - _capacity = 128; - _count = 0; - _table = (FrameDesc *)malloc(_capacity * sizeof(FrameDesc)); - _prev = NULL; - _code_align = sizeof(instruction_t); - _data_align = -(int)sizeof(void *); +DwarfParser::DwarfParser(const char* name, const char* image_base, + const char* eh_frame_hdr, u64 adjust_eh_frame) { + _name = name; + _image_base = image_base; - parse(eh_frame_hdr, adjust_eh_frame); + _capacity = 128; + _count = 0; + _table = (FrameDesc*)malloc(_capacity * sizeof(FrameDesc)); + _prev = NULL; + + _code_align = sizeof(instruction_t); + _data_align = -(int)sizeof(void*); + + parse(eh_frame_hdr, adjust_eh_frame); } -void DwarfParser::parse(const char *eh_frame_hdr, u64 adjust_eh_frame) { - u8 version = eh_frame_hdr[0]; - u8 eh_frame_ptr_enc = eh_frame_hdr[1]; - u8 fde_count_enc = eh_frame_hdr[2]; - u8 table_enc = eh_frame_hdr[3]; +void DwarfParser::parse(const char* eh_frame_hdr, u64 adjust_eh_frame) { + u8 version = eh_frame_hdr[0]; + u8 eh_frame_ptr_enc = eh_frame_hdr[1]; + u8 fde_count_enc = eh_frame_hdr[2]; + u8 table_enc = eh_frame_hdr[3]; - if (version != 1 || (eh_frame_ptr_enc & 0x7) != 0x3 || - (fde_count_enc & 0x7) != 0x3 || (table_enc & 0xf7) != 0x33) { - return; - } + if (version != 1 || (eh_frame_ptr_enc & 0x7) != 0x3 || (fde_count_enc & 0x7) != 0x3 || (table_enc & 0xf7) != 0x33) { + LG_WRN("Unsupported .eh_frame_hdr [%02x%02x%02x%02x] in %s", + version, eh_frame_ptr_enc, fde_count_enc, table_enc, _name); + return; + } - int fde_count = *(int *)(eh_frame_hdr + 8); -#ifdef DEBUG - printf("fde count = %d \n", fde_count); -#endif - int *table = (int *)(eh_frame_hdr + 16); - for (int i = 0; i < fde_count; i++) { - _ptr = eh_frame_hdr + table[i * 2] - adjust_eh_frame; - if (i == 0) { - printf("ptr = %p, table offset = %p \n", _ptr, table[i * 2]); + int fde_count = *(int*)(eh_frame_hdr + 8); + int* table = (int*)(eh_frame_hdr + 16) - adjust_eh_frame; + for (int i = 0; i < fde_count; i++) { + _ptr = eh_frame_hdr + table[i * 2]; + parseFde(); } - parseFde(); - } } void DwarfParser::parseCie() { - u32 cie_len = get32(); - if (cie_len == 0 || cie_len == 0xffffffff) { - return; - } - - const char *cie_start = _ptr; - _ptr += 5; - while (*_ptr++) {} - _code_align = getLeb(); - _data_align = getSLeb(); - _ptr = cie_start + cie_len; + u32 cie_len = get32(); + if (cie_len == 0 || cie_len == 0xffffffff) { + return; + } + + const char* cie_start = _ptr; + _ptr += 5; + while (*_ptr++) {} + _code_align = getLeb(); + _data_align = getSLeb(); + _ptr = cie_start + cie_len; } void DwarfParser::parseFde() { + u32 fde_len = get32(); + if (fde_len == 0 || fde_len == 0xffffffff) { + return; + } - u32 fde_len = get32(); - // printf("fde len = %u \n", fde_len); - if (fde_len == 0 || fde_len == 0xffffffff) { - return; - } - - const char *fde_start = _ptr; - u32 cie_offset = get32(); - if (_count == 0) { -#ifdef DEBUG - printf("Change pointer to %lx - %lx \n", fde_start, cie_offset); -#endif - _ptr = fde_start - cie_offset; - parseCie(); - _ptr = fde_start + 4; - } - - u32 range_start = getPtr() - _image_base; - if (_count == 0) { - printf("Dwarf range start: %lx (ptr) - %lx (image) = %lx \n", getPtr(), - _image_base, range_start); - } - - u32 range_len = get32(); - _ptr += getLeb(); - parseInstructions(range_start, fde_start + fde_len); - addRecord(range_start + range_len, DW_REG_SP, DW_STACK_SLOT, DW_SAME_FP); + const char* fde_start = _ptr; + u32 cie_offset = get32(); + if (_count == 0) { + _ptr = fde_start - cie_offset; + parseCie(); + _ptr = fde_start + 4; + } + + u32 range_start = getPtr() - _image_base; + u32 range_len = get32(); + _ptr += getLeb(); + parseInstructions(range_start, fde_start + fde_len); + addRecord(range_start + range_len, DW_REG_FP, LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE, -LINKED_FRAME_SIZE + DW_STACK_SLOT); } -void DwarfParser::parseInstructions(u32 loc, const char *end) { - const u32 code_align = _code_align; - const int data_align = _data_align; - - u32 cfa_reg = DW_REG_SP; - int cfa_off = DW_STACK_SLOT; - int fp_off = DW_SAME_FP; - int pc_off = -DW_STACK_SLOT; - - u32 rem_cfa_reg; - int rem_cfa_off; - int rem_fp_off; - int rem_pc_off; - - while (_ptr < end) { - u8 op = get8(); - switch (op >> 6) { - case 0: - switch (op) { - case DW_CFA_nop: - case DW_CFA_set_loc: - _ptr = end; - break; - case DW_CFA_advance_loc1: - addRecord(loc, cfa_reg, cfa_off, fp_off); - loc += get8() * code_align; - break; - case DW_CFA_advance_loc2: - addRecord(loc, cfa_reg, cfa_off, fp_off); -#ifdef ALIGN_ISSUES - // 16 bits alignement check - if ((reinterpret_cast(_ptr) & 0xf) != 0) { - // The address is not 16-bit aligned here - printf("Addr is not aligned 0x%lx (code_align=%i)\n", _ptr, - code_align); - exit(1); - } +void DwarfParser::parseInstructions(u32 loc, const char* end) { + const u32 code_align = _code_align; + const int data_align = _data_align; + + u32 cfa_reg = DW_REG_SP; + int cfa_off = EMPTY_FRAME_SIZE; + int fp_off = DW_SAME_FP; + int pc_off = -EMPTY_FRAME_SIZE; + + u32 rem_cfa_reg; + int rem_cfa_off; + int rem_fp_off; + int rem_pc_off; + + while (_ptr < end) { + u8 op = get8(); + switch (op >> 6) { + case 0: + switch (op) { + case DW_CFA_nop: + case DW_CFA_set_loc: + _ptr = end; + break; + case DW_CFA_advance_loc1: + addRecord(loc, cfa_reg, cfa_off, fp_off, pc_off); + loc += get8() * code_align; + break; + case DW_CFA_advance_loc2: + addRecord(loc, cfa_reg, cfa_off, fp_off, pc_off); + loc += get16() * code_align; + break; + case DW_CFA_advance_loc4: + addRecord(loc, cfa_reg, cfa_off, fp_off, pc_off); + loc += get32() * code_align; + break; + case DW_CFA_offset_extended: + switch (getLeb()) { + case DW_REG_FP: fp_off = getLeb() * data_align; break; + case DW_REG_PC: pc_off = getLeb() * data_align; break; + default: skipLeb(); + } + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + if (getLeb() == DW_REG_FP) { + fp_off = DW_SAME_FP; + } + break; + case DW_CFA_register: + skipLeb(); + skipLeb(); + break; + case DW_CFA_remember_state: + rem_cfa_reg = cfa_reg; + rem_cfa_off = cfa_off; + rem_fp_off = fp_off; + rem_pc_off = pc_off; + break; + case DW_CFA_restore_state: + cfa_reg = rem_cfa_reg; + cfa_off = rem_cfa_off; + fp_off = rem_fp_off; + pc_off = rem_pc_off; + break; + case DW_CFA_def_cfa: + cfa_reg = getLeb(); + cfa_off = getLeb(); + break; + case DW_CFA_def_cfa_register: + cfa_reg = getLeb(); + break; + case DW_CFA_def_cfa_offset: + cfa_off = getLeb(); + break; + case DW_CFA_def_cfa_expression: { + u32 len = getLeb(); + cfa_reg = len == 11 ? DW_REG_PLT : DW_REG_INVALID; + cfa_off = DW_STACK_SLOT; + _ptr += len; + break; + } + case DW_CFA_expression: + skipLeb(); + _ptr += getLeb(); + break; + case DW_CFA_offset_extended_sf: + switch (getLeb()) { + case DW_REG_FP: fp_off = getSLeb() * data_align; break; + case DW_REG_PC: pc_off = getSLeb() * data_align; break; + default: skipLeb(); + } + break; + case DW_CFA_def_cfa_sf: + cfa_reg = getLeb(); + cfa_off = getSLeb() * data_align; + break; + case DW_CFA_def_cfa_offset_sf: + cfa_off = getSLeb() * data_align; + break; + case DW_CFA_val_offset: + case DW_CFA_val_offset_sf: + skipLeb(); + skipLeb(); + break; + case DW_CFA_val_expression: + if (getLeb() == DW_REG_PC) { + int pc_off = parseExpression(); + if (pc_off != 0) { + fp_off = DW_PC_OFFSET | (pc_off << 1); + } + } else { + _ptr += getLeb(); + } + break; +#ifdef __aarch64__ + case DW_CFA_AARCH64_negate_ra_state: + break; #endif - loc += get16() * code_align; - break; - case DW_CFA_advance_loc4: - addRecord(loc, cfa_reg, cfa_off, fp_off); - loc += get32() * code_align; - break; - case DW_CFA_offset_extended: - switch (getLeb()) { - case DW_REG_FP: - fp_off = getLeb() * data_align; - break; - case DW_REG_PC: - pc_off = getLeb() * data_align; - break; - default: - skipLeb(); - } - break; - case DW_CFA_restore_extended: - case DW_CFA_undefined: - case DW_CFA_same_value: - skipLeb(); - break; - case DW_CFA_register: - skipLeb(); - skipLeb(); - break; - case DW_CFA_remember_state: - rem_cfa_reg = cfa_reg; - rem_cfa_off = cfa_off; - rem_fp_off = fp_off; - rem_pc_off = pc_off; - break; - case DW_CFA_restore_state: - cfa_reg = rem_cfa_reg; - cfa_off = rem_cfa_off; - fp_off = rem_fp_off; - pc_off = rem_pc_off; - break; - case DW_CFA_def_cfa: - cfa_reg = getLeb(); - cfa_off = getLeb(); - break; - case DW_CFA_def_cfa_register: - cfa_reg = getLeb(); - break; - case DW_CFA_def_cfa_offset: - cfa_off = getLeb(); - break; - case DW_CFA_def_cfa_expression: { - u32 len = getLeb(); - cfa_reg = len == 11 ? DW_REG_PLT : DW_REG_INVALID; - cfa_off = DW_STACK_SLOT; - _ptr += len; - break; - } - case DW_CFA_expression: - skipLeb(); - _ptr += getLeb(); - break; - case DW_CFA_offset_extended_sf: - switch (getLeb()) { - case DW_REG_FP: - fp_off = getSLeb() * data_align; - break; - case DW_REG_PC: - pc_off = getSLeb() * data_align; - break; - default: - skipLeb(); + case DW_CFA_GNU_args_size: + skipLeb(); + break; + default: + LG_WRN("Unknown DWARF instruction 0x%x in %s", op, _name); + return; + } + break; + case DW_CFA_advance_loc: + addRecord(loc, cfa_reg, cfa_off, fp_off, pc_off); + loc += (op & 0x3f) * code_align; + break; + case DW_CFA_offset: + switch (op & 0x3f) { + case DW_REG_FP: fp_off = getLeb() * data_align; break; + case DW_REG_PC: pc_off = getLeb() * data_align; break; + default: skipLeb(); + } + break; + case DW_CFA_restore: + if ((op & 0x3f) == DW_REG_FP) { + fp_off = DW_SAME_FP; + } + break; } - break; - case DW_CFA_def_cfa_sf: - cfa_reg = getLeb(); - cfa_off = getSLeb() * data_align; - break; - case DW_CFA_def_cfa_offset_sf: - cfa_off = getSLeb() * data_align; - break; - case DW_CFA_val_offset: - case DW_CFA_val_offset_sf: - skipLeb(); - skipLeb(); - break; - case DW_CFA_val_expression: - if (getLeb() == DW_REG_PC) { - int pc_off = parseExpression(); - if (pc_off != 0) { - fp_off = DW_PC_OFFSET | (pc_off << 1); - } - } else { - _ptr += getLeb(); - } - break; - case DW_CFA_GNU_args_size: - skipLeb(); - break; - default: - return; - } - break; - case DW_CFA_advance_loc: - addRecord(loc, cfa_reg, cfa_off, fp_off); - loc += (op & 0x3f) * code_align; - break; - case DW_CFA_offset: - switch (op & 0x3f) { - case DW_REG_FP: - fp_off = getLeb() * data_align; - break; - case DW_REG_PC: - pc_off = getLeb() * data_align; - break; - default: - skipLeb(); - } - break; - case DW_CFA_restore: - break; } - } - addRecord(loc, cfa_reg, cfa_off, fp_off); + addRecord(loc, cfa_reg, cfa_off, fp_off, pc_off); } -// Parse a limited subset of DWARF expressions, which is used in -// DW_CFA_val_expression to point to the previous PC relative to the current PC. +// Parse a limited subset of DWARF expressions, which is used in DW_CFA_val_expression +// to point to the previous PC relative to the current PC. // Returns the offset of the previous PC from the current PC. int DwarfParser::parseExpression() { - int pc_off = 0; - int tos = 0; - - u32 len = getLeb(); - const char *end = _ptr + len; - - while (_ptr < end) { - u8 op = get8(); - switch (op) { - case DW_OP_breg_pc: - pc_off = getSLeb(); - break; - case DW_OP_const1u: - tos = get8(); - break; - case DW_OP_const1s: - tos = (signed char)get8(); - break; - case DW_OP_const2u: - tos = get16(); - break; - case DW_OP_const2s: - tos = (short)get16(); - break; - case DW_OP_const4u: - case DW_OP_const4s: - tos = get32(); - break; - case DW_OP_constu: - tos = getLeb(); - break; - case DW_OP_consts: - tos = getSLeb(); - break; - case DW_OP_minus: - pc_off -= tos; - break; - case DW_OP_plus: - pc_off += tos; - break; - default: - _ptr = end; - return 0; + int pc_off = 0; + int tos = 0; + + u32 len = getLeb(); + const char* end = _ptr + len; + + while (_ptr < end) { + u8 op = get8(); + switch (op) { + case DW_OP_breg_pc: + pc_off = getSLeb(); + break; + case DW_OP_const1u: + tos = get8(); + break; + case DW_OP_const1s: + tos = (signed char)get8(); + break; + case DW_OP_const2u: + tos = get16(); + break; + case DW_OP_const2s: + tos = (short)get16(); + break; + case DW_OP_const4u: + case DW_OP_const4s: + tos = get32(); + break; + case DW_OP_constu: + tos = getLeb(); + break; + case DW_OP_consts: + tos = getSLeb(); + break; + case DW_OP_minus: + pc_off -= tos; + break; + case DW_OP_plus: + pc_off += tos; + break; + default: + LG_WRN("Unknown DWARF opcode 0x%x in %s", op, _name); + _ptr = end; + return 0; + } } - } - return pc_off; + return pc_off; } -void DwarfParser::addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off) { - int cfa = cfa_reg | cfa_off << 8; - if (_prev == NULL || (_prev->loc == loc && --_count >= 0) || - _prev->cfa != cfa || _prev->fp_off != fp_off) { - _prev = addRecordRaw(loc, cfa, fp_off); - } +void DwarfParser::addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off, int pc_off) { + int cfa = cfa_reg | cfa_off << 8; + if (_prev == NULL || (_prev->loc == loc && --_count >= 0) || + _prev->cfa != cfa || _prev->fp_off != fp_off || _prev->pc_off != pc_off) { + _prev = addRecordRaw(loc, cfa, fp_off, pc_off); + } } -FrameDesc *DwarfParser::addRecordRaw(u32 loc, int cfa, int fp_off) { - if (_count >= _capacity) { - _capacity *= 2; - _table = (FrameDesc *)realloc(_table, _capacity * sizeof(FrameDesc)); - } - - FrameDesc *f = &_table[_count++]; - f->loc = loc; - f->cfa = cfa; - f->fp_off = fp_off; - return f; +FrameDesc* DwarfParser::addRecordRaw(u32 loc, int cfa, int fp_off, int pc_off) { + if (_count >= _capacity) { + _capacity *= 2; + _table = (FrameDesc*)realloc(_table, _capacity * sizeof(FrameDesc)); + } + + FrameDesc* f = &_table[_count++]; + f->loc = loc; + f->cfa = cfa; + f->fp_off = fp_off; + f->pc_off = pc_off; + return f; } From ac35d6d9fee923a244227dfc013dd9f73c96e2c7 Mon Sep 17 00:00:00 2001 From: r1viollet Date: Sat, 13 Apr 2024 18:11:31 +0200 Subject: [PATCH 4/5] Experimental unwinding Create an API that loads a dwarf table from a DSO structure --- include/logger.hpp | 2 +- test/CMakeLists.txt | 4 ++ test/dwarf_unwind-ut.cc | 118 +++++++++++++++++++++++++++++++++++----- 3 files changed, 110 insertions(+), 14 deletions(-) diff --git a/include/logger.hpp b/include/logger.hpp index 4ed8e9539..8a8b9503d 100644 --- a/include/logger.hpp +++ b/include/logger.hpp @@ -118,7 +118,7 @@ void LOG_set_logs_allowed_function(LogsAllowedCallback logs_allowed_function); #define LG_IF_LVL_OK(level, ...) \ do { \ if (unlikely(LOG_is_logging_enabled_for_level(level))) { \ - ddprof::olprintfln(ABS(level), -1, MYNAME, __VA_ARGS__); \ + ddprof::olprintfln(ABS(level), -1, MYNAME, __VA_ARGS__); \ } \ } while (false) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e222ce26a..d4bd038c1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -398,6 +398,10 @@ add_unit_test( ../src/lib/savecontext.cc ../src/lib/saveregisters.cc # ../src/lib/allocation_tracker.cc + ../src/dso_hdr.cc + ../src/dso.cc + ../src/procutils.cc + ../src/signal_helper.cc ../src/ringbuffer_utils.cc ../src/perf_ringbuffer.cc ../src/perf.cc diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index 4918e3ce5..b6cea2f18 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -1,10 +1,15 @@ #include +#include "dso_hdr.hpp" +#include "elf_helpers.h" +#include "loghandle.hpp" #include "savecontext.hpp" #include "stackWalker.h" #include "unwind_state.hpp" #include +#include +#include // open with readonly #include "async-profiler/codeCache.h" #include "async-profiler/stack_context.h" @@ -25,8 +30,10 @@ std::byte stack[PERF_SAMPLE_STACK_SIZE]; -DDPROF_NOINLINE size_t funcA(std::array ®s); -DDPROF_NOINLINE size_t funcB(std::array ®s); +DDPROF_NOINLINE size_t +funcA(std::array ®s); +DDPROF_NOINLINE size_t +funcB(std::array ®s); size_t funcB(std::array ®s) { printf("dwarf_unwind-ut:%s %lx \n", __FUNCTION__, _THIS_IP_); @@ -73,13 +80,95 @@ TEST(dwarf_unwind, simple) { ASSERT_TRUE(std::string(syms[2]).find("funcA") != std::string::npos); } +namespace ddprof { + +DDRes load_dwarf(pid_t pid, DsoHdr::PidMapping &pid_map, DsoHdr &dso_hdr, + ProcessAddress_t ip) { + // todo : check if we already parsed ? + + DsoHdr::DsoFindRes find_res = + dso_hdr.dso_find_or_backpopulate(pid_map, pid, ip); + if (!find_res.second) { + DDRES_RETURN_WARN_LOG(DD_WHAT_UW_ERROR, "Unable to find 0x%lu", ip); + } + const Dso &dso = find_res.first->second; + if (!has_relevant_path(dso._type) || !dso.is_executable()) { + DDRES_RETURN_WARN_LOG(DD_WHAT_UW_ERROR, + "Unable to load dwarf from dso" + "%s", + dso._filename.c_str()); + } + FileInfoId_t file_info_id = dso_hdr.get_or_insert_file_info(dso); + if (file_info_id <= k_file_info_error) { + DDRES_RETURN_WARN_LOG(DD_WHAT_UW_ERROR, + "Unable to find path to" + "%s", + dso._filename.c_str()); + } + const FileInfoValue &file_info_value = + dso_hdr.get_file_info_value(file_info_id); + + int fd = open(file_info_value.get_path().c_str(), O_RDONLY); + // remote unwinding + if (-1 == fd) { + DDRES_RETURN_WARN_LOG(DD_WHAT_UW_ERROR, "error opening file %s \n", + file_info_value.get_path().c_str()); + } + Elf *elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + DDRES_RETURN_WARN_LOG(DD_WHAT_UW_ERROR, "Invalid elf %s \n", + file_info_value.get_path().c_str()); + } + Offset_t biais_offset; + ElfAddress_t vaddr; + ElfAddress_t text_base; + Offset_t elf_offset; + // Compute how to convert a process address + if (!get_elf_offsets(elf, file_info_value.get_path().c_str(), vaddr, + elf_offset, biais_offset, text_base)) { + // Todo: we have a more accurate version of this function + DDRES_RETURN_WARN_LOG(DD_WHAT_UW_ERROR, + "Unable to compute elf offsets %s \n", + file_info_value.get_path().c_str()); + } + + EhFrameInfo eh_frame_info = {}; + if (!get_eh_frame_info(elf, eh_frame_info)) { + printf("Failed to retrieve eh frame info\n"); + DDRES_RETURN_WARN_LOG(DD_WHAT_UW_ERROR, + "Unable to retrieve eh_frame_info %s \n", + file_info_value.get_path().c_str()); + } + const char *elf_base = + eh_frame_info._eh_frame_hdr._data - eh_frame_info._eh_frame_hdr._offset; + + Offset_t adjust_eh_frame = (eh_frame_info._eh_frame._vaddr_sec - + eh_frame_info._eh_frame_hdr._vaddr_sec) - + (eh_frame_info._eh_frame._offset - eh_frame_info._eh_frame_hdr._offset); + + DwarfParser dwarf(dso._filename.c_str(), elf_base, + eh_frame_info._eh_frame_hdr._data, adjust_eh_frame); + + LG_DBG("Dwarf table %lu elements", dwarf.count()); + free(dwarf.table()); +} + +TEST(dwarf_unwind, read_dwarf) { + LogHandle handle; + pid_t my_pid = getpid(); + ProcessAddress_t ip = _THIS_IP_; + DsoHdr dso_hdr; + load_dwarf(my_pid, dso_hdr.get_pid_mapping(my_pid), dso_hdr, ip); +} + +} // namespace ddprof #ifdef ALLOC_TRACKER -#include "allocation_tracker.hpp" -#include "perf_ringbuffer.hpp" -#include "ringbuffer_holder.hpp" -#include "ringbuffer_utils.hpp" -#include -#include "defer.hpp" +# include "allocation_tracker.hpp" +# include "defer.hpp" +# include "perf_ringbuffer.hpp" +# include "ringbuffer_holder.hpp" +# include "ringbuffer_utils.hpp" +# include namespace ddprof { static const uint64_t kSamplingRate = 1; @@ -93,7 +182,7 @@ DDPROF_NOINLINE void func_save_sleep(size_t size) { assert(tl_state); int i = 0; while (++i < 100000) { - + ddprof::AllocationTracker::track_allocation_s(0xdeadbeef, size, *tl_state); // prevent tail call optimization getpid(); @@ -143,12 +232,15 @@ TEST(dwarf_unwind, remote) { ASSERT_EQ(hdr->type, PERF_RECORD_SAMPLE); // convert based on mask for this watcher (default in this case) - perf_event_sample *sample = hdr2samp(hdr, ddprof::perf_event_default_sample_type()); + perf_event_sample *sample = + hdr2samp(hdr, ddprof::perf_event_default_sample_type()); - std::span regs_span{sample->regs, ddprof::k_perf_register_count}; + std::span regs_span{ + sample->regs, ddprof::k_perf_register_count}; ap::StackContext sc = ap::from_regs(regs_span); std::span stack{ - reinterpret_cast(sample->data_stack), sample->size_stack}; + reinterpret_cast(sample->data_stack), + sample->size_stack}; ap::StackBuffer buffer(stack, sc.sp, sc.sp + sample->size_stack); void *callchain[ddprof::kMaxStackDepth]; @@ -170,5 +262,5 @@ TEST(dwarf_unwind, remote) { kill(temp_pid, SIGTERM); } } -} +} // namespace ddprof #endif From 3f8ca36c295374190f7b74c1b02b8f0e013f5c83 Mon Sep 17 00:00:00 2001 From: Nicolas Savoire Date: Mon, 15 Apr 2024 17:44:55 +0200 Subject: [PATCH 5/5] Fix some warnings, use vector --- include/async-profiler/codeCache.h | 7 ++++--- include/async-profiler/dwarf.h | 18 ++++++++--------- include/perf_archmap.hpp | 1 + src/async-profiler/codeCache.cpp | 11 +++------- src/async-profiler/dwarf.cpp | 30 +++++++++++----------------- src/async-profiler/elf_helpers.cpp | 2 -- src/async-profiler/stack_context.cpp | 2 +- src/async-profiler/symbols_linux.cpp | 18 ++++++++--------- test/dwarf_unwind-ut.cc | 5 +++-- 9 files changed, 41 insertions(+), 53 deletions(-) diff --git a/include/async-profiler/codeCache.h b/include/async-profiler/codeCache.h index f8c257bad..9bc693184 100644 --- a/include/async-profiler/codeCache.h +++ b/include/async-profiler/codeCache.h @@ -18,6 +18,7 @@ #define _CODECACHE_H #include +#include #define NO_MIN_ADDRESS ((const void *)-1) #define NO_MAX_ADDRESS ((const void *)0) @@ -92,8 +93,8 @@ class CodeCache { public: // todo fix hacky override for remote - FrameDesc *_dwarf_table; - int _dwarf_table_length; + using FrameDescTable = std::vector; + FrameDescTable _dwarf_table; CodeCache(const char *name, short lib_index = -1, const void *min_address = NO_MIN_ADDRESS, @@ -135,7 +136,7 @@ class CodeCache { void **findGlobalOffsetEntry(void *address); void makeGotPatchable(); - void setDwarfTable(FrameDesc *table, int length); + void setDwarfTable(FrameDescTable &&table); FrameDesc *findFrameDesc(uint64_t elf_address); }; diff --git a/include/async-profiler/dwarf.h b/include/async-profiler/dwarf.h index d05aab431..38327e91c 100644 --- a/include/async-profiler/dwarf.h +++ b/include/async-profiler/dwarf.h @@ -9,6 +9,7 @@ #include #include "arch.h" +#include const int DW_REG_PLT = 128; // denotes special rule for PLT entries const int DW_REG_INVALID = 255; // denotes unsupported configuration @@ -84,10 +85,9 @@ class DwarfParser { const char* _image_base; const char* _ptr; - int _capacity; - int _count; - FrameDesc* _table; - FrameDesc* _prev; + using FrameDescTable = std::vector; + FrameDescTable _table; + FrameDesc *_prev; u32 _code_align; int _data_align; @@ -157,13 +157,11 @@ class DwarfParser { DwarfParser(const char* name, const char* image_base, const char* eh_frame_hdr, u64 adjust_eh_frame = 0); - FrameDesc* table() const { - return _table; - } + const FrameDescTable &table() const & { return _table; } - int count() const { - return _count; - } + FrameDescTable &&table() && { return std::move(_table); } + + int count() const { return _table.size(); } }; #endif // _DWARF_H diff --git a/include/perf_archmap.hpp b/include/perf_archmap.hpp index 8c46a56a0..712442172 100644 --- a/include/perf_archmap.hpp +++ b/include/perf_archmap.hpp @@ -26,6 +26,7 @@ enum PERF_ARCHMAP_X86 { PAM_X86_RSI, PAM_X86_RDI, PAM_X86_RBP, + PAM_X86_FP = PAM_X86_RBP, // For uniformity PAM_X86_RSP, PAM_X86_SP = PAM_X86_RSP, // For uniformity PAM_X86_RIP, diff --git a/src/async-profiler/codeCache.cpp b/src/async-profiler/codeCache.cpp index da620f249..8764a4444 100644 --- a/src/async-profiler/codeCache.cpp +++ b/src/async-profiler/codeCache.cpp @@ -49,9 +49,6 @@ CodeCache::CodeCache(const char *name, short lib_index, const void *min_address, _got_end = NULL; _got_patchable = false; - _dwarf_table = NULL; - _dwarf_table_length = 0; - _capacity = INITIAL_CODE_CACHE_CAPACITY; _count = 0; _blobs = new CodeBlob[_capacity]; @@ -63,7 +60,6 @@ CodeCache::~CodeCache() { } NativeFunc::destroy(_name); delete[] _blobs; - free(_dwarf_table); } void CodeCache::expand() { @@ -214,16 +210,15 @@ void CodeCache::makeGotPatchable() { } } -void CodeCache::setDwarfTable(FrameDesc *table, int length) { - _dwarf_table = table; - _dwarf_table_length = length; +void CodeCache::setDwarfTable(FrameDescTable &&table) { + _dwarf_table = std::move(table); } FrameDesc *CodeCache::findFrameDesc(uintptr_t elf_address) { assert(elf_address < std::numeric_limits::max()); const u32 target_loc = (const u32)elf_address; int low = 0; - int high = _dwarf_table_length - 1; + int high = _dwarf_table.size() - 1; while (low <= high) { int mid = (unsigned int)(low + high) >> 1; diff --git a/src/async-profiler/dwarf.cpp b/src/async-profiler/dwarf.cpp index 8463f4cca..2fcff029b 100644 --- a/src/async-profiler/dwarf.cpp +++ b/src/async-profiler/dwarf.cpp @@ -64,9 +64,7 @@ DwarfParser::DwarfParser(const char* name, const char* image_base, _name = name; _image_base = image_base; - _capacity = 128; - _count = 0; - _table = (FrameDesc*)malloc(_capacity * sizeof(FrameDesc)); + _table.reserve(128); _prev = NULL; _code_align = sizeof(instruction_t); @@ -117,7 +115,7 @@ void DwarfParser::parseFde() { const char* fde_start = _ptr; u32 cie_offset = get32(); - if (_count == 0) { + if (_table.empty()) { _ptr = fde_start - cie_offset; parseCie(); _ptr = fde_start + 4; @@ -237,9 +235,9 @@ void DwarfParser::parseInstructions(u32 loc, const char* end) { break; case DW_CFA_val_expression: if (getLeb() == DW_REG_PC) { - int pc_off = parseExpression(); - if (pc_off != 0) { - fp_off = DW_PC_OFFSET | (pc_off << 1); + int pc_off2 = parseExpression(); + if (pc_off2 != 0) { + fp_off = DW_PC_OFFSET | (pc_off2 << 1); } } else { _ptr += getLeb(); @@ -335,22 +333,18 @@ int DwarfParser::parseExpression() { void DwarfParser::addRecord(u32 loc, u32 cfa_reg, int cfa_off, int fp_off, int pc_off) { int cfa = cfa_reg | cfa_off << 8; - if (_prev == NULL || (_prev->loc == loc && --_count >= 0) || - _prev->cfa != cfa || _prev->fp_off != fp_off || _prev->pc_off != pc_off) { + if (_prev == NULL || _prev->cfa != cfa || _prev->fp_off != fp_off || + _prev->pc_off != pc_off) { _prev = addRecordRaw(loc, cfa, fp_off, pc_off); } } FrameDesc* DwarfParser::addRecordRaw(u32 loc, int cfa, int fp_off, int pc_off) { - if (_count >= _capacity) { - _capacity *= 2; - _table = (FrameDesc*)realloc(_table, _capacity * sizeof(FrameDesc)); + if (_prev != NULL && _prev->loc == loc) { + _table.back() = {loc, cfa, fp_off, pc_off}; + } else { + _table.push_back({loc, cfa, fp_off, pc_off}); } - FrameDesc* f = &_table[_count++]; - f->loc = loc; - f->cfa = cfa; - f->fp_off = fp_off; - f->pc_off = pc_off; - return f; + return &_table.back(); } diff --git a/src/async-profiler/elf_helpers.cpp b/src/async-profiler/elf_helpers.cpp index ba7b04134..4f2aef0c3 100644 --- a/src/async-profiler/elf_helpers.cpp +++ b/src/async-profiler/elf_helpers.cpp @@ -10,8 +10,6 @@ #include #include -#define LG_WRN(args...) printf(args) - const char *get_section_data(Elf *elf, const char *section_name, Offset_t &elf_offset) { // Get the string table index for the section header strings diff --git a/src/async-profiler/stack_context.cpp b/src/async-profiler/stack_context.cpp index 6d7dbe52a..956ebc8d9 100644 --- a/src/async-profiler/stack_context.cpp +++ b/src/async-profiler/stack_context.cpp @@ -10,7 +10,7 @@ StackContext from_regs(std::span ap::StackContext sc; sc.pc = CAST_TO_VOID_STAR(regs[REGNAME(PC)]); sc.sp = regs[REGNAME(SP)]; - sc.fp = regs[REGNAME(RBP)]; + sc.fp = regs[REGNAME(FP)]; return sc; } diff --git a/src/async-profiler/symbols_linux.cpp b/src/async-profiler/symbols_linux.cpp index 772569aab..f7d19f8f0 100644 --- a/src/async-profiler/symbols_linux.cpp +++ b/src/async-profiler/symbols_linux.cpp @@ -339,7 +339,7 @@ void ElfParser::parseDynamicSection() { break; } } - printf("relent = %d \n", relent); + printf("relent = %zu \n", relent); if (relent != 0) { if (pltrelsz != 0 && got_start != NULL) { // The number of entries in .got.plt section matches the number of @@ -382,7 +382,7 @@ void ElfParser::parseDwarfInfoRemote(const char *eh_frame_data, Offset_t adjust_eh_frame) { printf("Create dwarf with base:%p - eh_frame_hdr:%p\n", _base, eh_frame_data); DwarfParser dwarf(_cc->name(), base_remote, eh_frame_data, adjust_eh_frame); - _cc->setDwarfTable(dwarf.table(), dwarf.count()); + _cc->setDwarfTable(std::move(dwarf).table()); printf("Created a number of dwarf entries = %d \n", dwarf.count()); } @@ -393,9 +393,9 @@ void ElfParser::parseDwarfInfo() { ElfProgramHeader *eh_frame_hdr = findProgramHeader(PT_GNU_EH_FRAME); if (eh_frame_hdr != NULL) { - printf("Create dwarf with %lx - at:%lx \n", _base, at(eh_frame_hdr)); + printf("Create dwarf with %p - at:%p \n", _base, at(eh_frame_hdr)); DwarfParser dwarf(_cc->name(), _base, at(eh_frame_hdr)); - _cc->setDwarfTable(dwarf.table(), dwarf.count()); + _cc->setDwarfTable(std::move(dwarf).table()); printf("Created a number of dwarf entries = %d \n", dwarf.count()); } } @@ -658,11 +658,11 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, printf("error opening file %s \n", map.file()); continue; } - size_t length = (size_t)lseek64(fd, 0, SEEK_END); + // size_t length = (size_t)lseek64(fd, 0, SEEK_END); // todo : remove the mmap Elf *elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); if (elf == NULL) { - LG_WRN("Invalid elf %s (efl:%p, addr_mmap:%p)\n", map.file(), elf); + LG_WRN("Invalid elf %s (elf:%p)\n", map.file(), elf); goto continue_loop; } Offset_t biais_offset; @@ -676,7 +676,7 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, printf("biais offset get_elf_offset: %lx \n", biais_offset); printf("text base from get_elf_offset: %lx \n", text_base); printf("offset from get_elf_offset: %lx \n", elf_offset); - printf("last readable: %lx \n", last_readable_base); + printf("last readable: %p \n", last_readable_base); } else { printf("Failed to read elf offsets \n"); @@ -696,7 +696,7 @@ void Symbols::parsePidLibraries(pid_t pid, CodeCacheArray *array, eh_frame_info._eh_frame_hdr._offset; // this is used during unwinding to offset PC to dwarf instructions cc->setTextBase(image_base); - printf("image base = %lx \n", image_base); + printf("image base = %p \n", image_base); if (eh_frame_info._eh_frame_hdr._data) { // todo: is this always valid ? @@ -791,7 +791,7 @@ void Symbols::parseLibraries(CodeCacheArray *array, bool kernel_symbols) { // Do not parse the same executable twice, e.g. on Alpine Linux if (parsed_inodes.insert(map.dev() | inode << 16).second) { // Be careful: executable file is not always ELF, e.g. classes.jsa - printf("image_base = %p, map.offs() = %p, last_readable_base = %p \n", + printf("image_base = %p, map.offs() = %lx, last_readable_base = %p \n", image_base, map.offs(), last_readable_base); // todo - read the biais from the vaddr field (open file?) diff --git a/test/dwarf_unwind-ut.cc b/test/dwarf_unwind-ut.cc index b6cea2f18..d6ef1ffb8 100644 --- a/test/dwarf_unwind-ut.cc +++ b/test/dwarf_unwind-ut.cc @@ -149,8 +149,9 @@ DDRes load_dwarf(pid_t pid, DsoHdr::PidMapping &pid_map, DsoHdr &dso_hdr, DwarfParser dwarf(dso._filename.c_str(), elf_base, eh_frame_info._eh_frame_hdr._data, adjust_eh_frame); - LG_DBG("Dwarf table %lu elements", dwarf.count()); - free(dwarf.table()); + LG_DBG("Dwarf table %d elements", dwarf.count()); + + return {}; } TEST(dwarf_unwind, read_dwarf) {