内存访问越界的定位
去年笔者分享了两篇内存访问越界的定位方法,第一种方法用到了mprotect 系统调用设置内存属性,第二种方法则未用到该系统调用。两种方法对于调试用的简单应用都有效,不过笔者编写的调试应用是单线程的。在具体的工作实践中,只用到过第一种方法;因此不确定第二种方法效果如何。笔者认为这两种方法有一些缺点,某些情况下甚至可能会失效,例如系统调用产生的内存访问越界:read(fd, buf, buflen + overflowsize) 。此外,第二种方法需要知晓具体的内存分配的数据结构,笔者希望通过本文,进一步解析基于ptmalloc 的内存分配数据,为以后调试内存异常补充新的调试方法。
多线程的内存分配
在Linux内核中,线程与进程都通过task_struct 对象来表示、管理;而在应用层,一个进程的多个线程恰好共享了同一个地址空间。假设一个线程不断地分配内存,得到的内存指针交由另一个线程去释放,这是可行的;那么问题是,多线程对内存分配、释放的操作,如何实现内存资源进行互斥、同步的访问?glibc的内存分配模块ptmalloc 给出的方案是:
- 通常情况下,每创建一个新的线程,便为之分配一个新的堆,并通过
malloc_state 结构体来管理; - 每个线程的局部存储变量都存放了一个
malloc_state 结构体指针,在访问时通过对mutex 加锁实现互斥; - 主线程的堆空间通常通过
brk (或sbrk )系统调用来分配(大内存块的分区可能会用mmap 来分配); - 子线程的堆空间通过
mmap 系统调用来分配,单个线程的malloc_state 可能包含多个不连续堆空间;
当某个线程退出后,其分配的内存不会被释放,不过其占有的malloc_state 会被存储至一个链表中(见下面的next_free 指针);当再资创建新的线程时,可能会复用已退出线程的malloc_state 。该结构体定义于malloc/malloc.c :
struct malloc_state
{
__libc_lock_define (, mutex);
int flags;
int have_fastchunks;
mfastbinptr fastbinsY[NFASTBINS];
mchunkptr top;
mchunkptr last_remainder;
mchunkptr bins[NBINS * 2 - 2];
unsigned int binmap[BINMAPSIZE];
struct malloc_state *next;
struct malloc_state *next_free;
INTERNAL_SIZE_T attached_threads;
INTERNAL_SIZE_T system_mem;
INTERNAL_SIZE_T max_system_mem;
};
通常情况下,每一个线程对应一个该结构体对象;对于主线程,其对象是静态定义的:
static struct malloc_state main_arena =
{
.mutex = _LIBC_LOCK_INITIALIZER,
.next = &main_arena,
.attached_threads = 1
};
结构体中某些成员的解析需要冗长的说明,这里不再详述。
线程的堆空间heap_info
ptmalloc对内存管理的实现是比较复杂的,该模块提供了malloc_info、mallocinfo等接口,可以提供当前分配内存的状态信息。此外,ptmalloc 源码中还有dump_heap 等C语言的查看函数,通过阅读源码可以了解内存分配的具体实现。通过mmap 分配堆内存最大的大小是固定的(单个子线程可以有多个堆),在32位平台上通常为1MB;在64位平台上通常为64MB;堆的定义为:
typedef struct _heap_info
{
mstate ar_ptr;
struct _heap_info *prev;
size_t size;
size_t mprotect_size;
char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK];
} heap_info;
确定了某个堆的基地址后,可以方便地遍历其中的内存块(mchunkptr 指向的内存):
static void
dump_heap (heap_info *heap)
{
char *ptr;
mchunkptr p;
fprintf (stderr, "Heap %p, size %10lx:\n", heap, (long) heap->size);
ptr = (heap->ar_ptr != (mstate) (heap + 1)) ?
(char *) (heap + 1) : (char *) (heap + 1) + sizeof (struct malloc_state);
p = (mchunkptr) (((unsigned long) ptr + MALLOC_ALIGN_MASK) &
~MALLOC_ALIGN_MASK);
从上面的代码可以确定,子线程的malloc_state 内存管理对象也可能从其堆空间中分配。如此,对于多线程的内存解析就有了一个清晰的着手点。
编写GDB扩展脚本解析多线程的内存分配
通过阅读ptmalloc 源码,可以编写python 扩展脚本,由gdb 加载,解析多线程应用的内存分配数据。具体实现是从查找静态线构体变量main_arena 开始的,结构体中next 指针将多线程的各个malloc_state 对象串联起来。该脚本对main_arena 的解析信息不多,但对于子线程的堆内存解析信息较多。在此基础上,可以实现一些复杂、精确的内存异常的调试功能。脚本全部内容如下(文件名为ptmalloc.py ):
#!/usr/bin/env python3
class ptmalloc(gdb.Command):
"""Dump GNU Libc ptmalloc state"""
def __init__(self):
super(ptmalloc, self).__init__("ptmalloc", gdb.COMMAND_USER)
# get the type of ulong:
self.type_ulong = gdb.lookup_type('unsigned long')
self.type_ulong_ptr = self.type_ulong.pointer()
# get the type of malloc_state:
self.type_mstate = gdb.lookup_type('struct malloc_state')
self.type_mstate_ptr = self.type_mstate.pointer()
# get the type of malloc_chunk:
self.type_mchunk = gdb.lookup_type('struct malloc_chunk')
self.type_mchunk_ptr = self.type_mchunk.pointer()
# get the type of heap:
self.type_heap = gdb.lookup_type('struct _heap_info')
self.type_heap_ptr = self.type_heap.pointer()
# set the address of main_arena
self.addr_main_arena = 0x0 # NULL pointer!
# set the heap maximum size
self.heap_max_size = 0x4000000 # 64MB for 64-bit system
# self.heap_max_size = 0x0100000 # 1MB for 32-bit system
# set the heap pointer mask
self.heap_mask = self.heap_max_size - 0x1
# malloc align and mask
self.malloc_align = 0x2 * self.type_ulong.sizeof
self.malloc_align_mask = self.malloc_align - 0x1
def get_main_arena(self):
# `main_arena is defined in malloc/malloc.c
marena = gdb.lookup_static_symbol('main_arena')
if marena is None:
gdb.write('Error, failed to lookup symbol main_arena!\n', gdb.STDERR)
gdb.flush(gdb.STDERR)
return None, 0x0
# get the value of main_arena:
marena = marena.value()
# get the address of main_arena
maddr = int(marena.address.cast(self.type_ulong))
assert(maddr > 0) # the address must not be NULL
# update `addr_main_arena
self.addr_main_arena = maddr
return marena, maddr
def check_mstate(self, mstate):
if mstate is None:
return None, 0x0
if not isinstance(mstate, gdb.Value):
gdb.write("Error, attempt to check unknown type as mstate: {}\n".format(
str(type(mstate))), gdb.STDERR)
gdb.flush(gdb.STDERR)
return None, 0x0
maddr = int(mstate.cast(self.type_ulong))
if maddr == 0:
gdb.write("Warning, null malloc state.\n", gdb.STDERR)
gdb.flush(gdb.STDERR)
return None, 0x0
if maddr == self.addr_main_arena:
gdb.write("Warning, main arena show up again.\n", gdb.STDERR)
gdb.flush(gdb.STDERR)
return None, 0x0
mstate = mstate.cast(self.type_mstate_ptr).dereference()
return mstate, maddr
def dump_size(self, memsize):
memstr, sepcha = "", ""
if isinstance(memsize, gdb.Value):
memsize = int(memsize.cast(self.type_ulong))
elif not isinstance(memsize, int):
gdb.write("Error, invalid size type: {}".format(str(type(memsize))), gdb.STDERR)
gdb.flush(gdb.STDERR)
return ""
if memsize >= 0x40000000:
sepcha = ", "
memstr = "{0} GB".format(memsize
memsize = memsize & (0x40000000 - 0x1)
if memsize >= 0x100000:
if len(sepcha) > 0:
memstr += sepcha
else:
sepcha = ", "
memstr += "{} MB".format(memsize
memsize = memsize & (0x100000 - 0x1)
if len(sepcha) > 0:
memstr += sepcha
memstr += "{:.02f} KB".format(memsize / 0x400)
return memstr
def is_main_arena(self, arena):
addr = 0x0
if isinstance(arena, int):
addr = arena
elif isinstance(arena, gdb.Value):
addr = int(arena.address.cast(self.type_ulong))
else:
gdb.write("Error, unknown arena type: {}\n".format(
str(type(arena))), gdb.STDERR)
gdb.flush(gdb.STDERR)
return False
return addr != 0 and self.addr_main_arena == addr
def dump_fastbins(self, mstate):
fbins = mstate['fastbinsY']
ftbins = fbins.type
if ftbins.code != gdb.TYPE_CODE_ARRAY:
gdb.write("Error, fastbins should be an array: {}\n".format(
ftbins.code), gdb.STDERR)
gdb.flush()
return False
for count in range(10):
fbin_ptr = fbins[count]
fbin_addr = fbin_ptr.cast(self.type_ulong)
if fbin_addr == 0:
continue
gdb.write('\tfastbinsY[{}]: {:#x}\n'.format(count, int(fbin_addr)))
gdb.flush(gdb.STDOUT)
return True
def dump_named_chunk(self, mstate, name):
top_ptr = mstate[name].cast(self.type_mchunk_ptr)
top_chk = top_ptr.dereference()
top_ptr = top_ptr.cast(self.type_ulong)
if top_ptr == 0:
return False
gdb.write("\t{} chunk at {:#x}:\n".format(name.title(), int(top_ptr)))
curr_size = int(top_chk['mchunk_size'].cast(self.type_ulong))
if (curr_size & 0x1) != 0:
gdb.write('\t\tPrevious chunk is in use.\n')
else:
prev_size = int(top_chk['mchunk_prev_size'].cast(self.type_ulong))
gdb.write('\t\tPrevious chunk size: {0:#x} ({1})\n'.format(
prev_size, self.dump_size(prev_size)))
gdb.write('\t\tmapped: {}\n'.format(
(curr_size & 0x2) != 0))
gdb.write('\t\tnot main arena: {}\n'.format(
(curr_size & 0x4) != 0))
curr_size &= ~0x7
gdb.write('\t\tchunk size: {0:#x} ({1})\n'.format(
curr_size, self.dump_size(curr_size)))
gdb.flush(gdb.STDOUT)
return True
def dump_free_chunks(self, mstate):
binaddr = mstate['bins'][0].address.cast(self.type_ulong)
binaddr = binaddr - 0x2 * self.type_ulong.sizeof
mcount, mtotal = 0x0, 0x0
for idx in range(1, 128):
bin_ptr = binaddr + (idx - 1) * 0x2 * self.type_ulong.sizeof
bin_addr = int(bin_ptr)
bin_at = bin_ptr.cast(self.type_mchunk_ptr).dereference()
bin_fd_ptr = bin_at['fd'].cast(self.type_mchunk_ptr)
bin_fd_addr = int(bin_fd_ptr.cast(self.type_ulong))
if bin_fd_addr == 0 or bin_addr == bin_fd_addr:
continue
count, total = 0, 0
while bin_addr != bin_fd_addr:
count += 1
bin_fd = bin_fd_ptr.dereference()
memsize = int(bin_fd['mchunk_size'].cast(self.type_ulong))
memsize &= ~0x7; total += memsize
bin_fd_ptr = bin_fd['fd'].cast(self.type_mchunk_ptr)
bin_fd_addr = int(bin_fd_ptr.cast(self.type_ulong))
if bin_fd_addr == 0:
gdb.write('\t\tWarning, null mchunkptr found at {}\n'.format(
count), gdb.STDERR)
gdb.flush(gdb.STDERR)
break
# gdb.write('\t\tchunk at {}, count: {}, total size: {}\n'.format(
# idx, count, self.dump_size(total)))
mcount += count
mtotal += total
gdb.write('\tfree chunks: {}\n'.format(mcount))
gdb.write('\tfree chunk size: {:#x} ({})\n'.format(mtotal, self.dump_size(mtotal)))
return True
def dump_heaps(self, mstate, maddr):
if self.is_main_arena(mstate):
# main_arena does not have associated heap structure ?
# The answer is: I don't known yet.
return True
count = 0
heap_ptr = mstate['top'].cast(self.type_ulong) & ~self.heap_mask
heap_addr = heap_ptr
heap_ptr = heap_ptr.cast(self.type_heap_ptr)
while heap_addr != 0:
heap = heap_ptr.dereference()
m_addr = int(heap['ar_ptr'].cast(self.type_ulong))
heap_size = int(heap['size'].cast(self.type_ulong))
chunk_addr = heap_addr + heap.type.sizeof
if chunk_addr == maddr:
chunk_addr += mstate.type.sizeof
if (chunk_addr & self.malloc_align_mask) != 0:
chunk_addr += self.malloc_align_mask
chunk_addr &= ~self.malloc_align_mask
gdb.write('\theap[{}] at address: {:#x}\n'.format(count, int(heap_addr)))
gdb.write('\t\tchunk for heap: {:#x}\n'.format(int(chunk_addr)))
gdb.write('\t\tarena for heap: {:#x}\n'.format(m_addr))
gdb.write('\t\tsize for heap: {:#x} ({})\n'.format(
heap_size, self.dump_size(heap_size)))
count += 1
heap_ptr = heap['prev']
heap_addr = heap_ptr.cast(self.type_ulong)
return True
def dump_mstate(self, mstate, maddr):
mutex = int(mstate['mutex'].cast(self.type_ulong))
flags = int(mstate['flags'].cast(self.type_ulong))
have_fast = int(mstate['have_fastchunks'].cast(self.type_ulong))
naddr = int(mstate['next'].cast(self.type_ulong))
nfree = int(mstate['next_free'].cast(self.type_ulong))
threads = int(mstate['attached_threads'].cast(self.type_ulong))
system_mem = int(mstate['system_mem'].cast(self.type_ulong))
max_system_mem = int(mstate['max_system_mem'].cast(self.type_ulong))
gdb.write('Arena at address {:#x} ->\n'.format(maddr))
gdb.write('\tmain_arena: {}\n'.format(self.is_main_arena(maddr)))
gdb.write('\tmutex: {}\n'.format(mutex))
gdb.write('\tflags: {:#x}\n'.format(flags))
gdb.write('\thave_fastchunks: {}\n'.format(have_fast))
gdb.write('\tattached_threads: {}\n'.format(threads))
gdb.write('\tsystem_mem: {0:#x} ({1})\n'.format(
system_mem, self.dump_size(system_mem)))
gdb.write('\tmax_mem: {0:#x} ({1})\n'.format(
max_system_mem, self.dump_size(max_system_mem)))
self.dump_heaps(mstate, maddr)
self.dump_named_chunk(mstate, 'top')
self.dump_named_chunk(mstate, 'last_remainder')
self.dump_free_chunks(mstate)
gdb.write('\tnext free: {:#x}\n'.format(nfree))
gdb.write('\tnext arena: {:#x}\n'.format(naddr))
self.dump_fastbins(mstate)
gdb.write('\n'); gdb.flush(gdb.STDOUT)
return True
def invoke(self, args, fromtty):
marena, maddr = self.get_main_arena()
if marena is None:
return False
self.dump_mstate(marena, maddr)
arena, addr = self.check_mstate(marena["next"])
while arena is not None:
self.dump_mstate(arena, addr)
arena, addr = self.check_mstate(arena["next"])
return True
ptmalloc()
GDB调试需要使能python 扩展脚本的支持;如有必要,建议重新编译gdb调试器。详细的python 扩展文档可在GDB的官方文档中查看。笔者编写了一个多线程分配内存的简单应用,使用该脚本解析堆内存的操作及结果如下:
(gdb) source ptmalloc.py
(gdb) ptmalloc
Arena at address 0x7ffff7f82b80 ->
main_arena: True
mutex: 0
flags: 0x0
have_fastchunks: 0
attached_threads: 1
system_mem: 0x21000 (132.00 KB)
max_mem: 0x21000 (132.00 KB)
Top chunk at 0x555555559fa0:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x20060 (128.09 KB)
free chunks: 0
free chunk size: 0x0 (0.00 KB)
next free: 0x0
next arena: 0x7fffcc000020
Arena at address 0x7fffcc000020 ->
main_arena: False
mutex: 0
flags: 0x2
have_fastchunks: 0
attached_threads: 1
system_mem: 0x1144c000 (276 MB, 304.00 KB)
max_mem: 0x11708000 (279 MB, 32.00 KB)
heap[0] at address: 0x7fffa4000000
chunk for heap: 0x7fffa4000020
arena for heap: 0x7fffcc000020
size for heap: 0x15e0000 (21 MB, 896.00 KB)
heap[1] at address: 0x7fffac000000
chunk for heap: 0x7fffac000020
arena for heap: 0x7fffcc000020
size for heap: 0x3f9c000 (63 MB, 624.00 KB)
heap[2] at address: 0x7fffa8000000
chunk for heap: 0x7fffa8000020
arena for heap: 0x7fffcc000020
size for heap: 0x3f2a000 (63 MB, 168.00 KB)
heap[3] at address: 0x7fffb8000000
chunk for heap: 0x7fffb8000020
arena for heap: 0x7fffcc000020
size for heap: 0x3fa6000 (63 MB, 664.00 KB)
heap[4] at address: 0x7fffcc000000
chunk for heap: 0x7fffcc0008c0
arena for heap: 0x7fffcc000020
size for heap: 0x4000000 (64 MB, 0.00 KB)
Top chunk at 0x7fffa55bf1e0:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x20e20 (131.53 KB)
free chunks: 251
free chunk size: 0x152ce50 (21 MB, 179.58 KB)
next free: 0x0
next arena: 0x7fffd4000020
Arena at address 0x7fffd4000020 ->
main_arena: False
mutex: 0
flags: 0x2
have_fastchunks: 0
attached_threads: 1
system_mem: 0x8e3f000 (142 MB, 252.00 KB)
max_mem: 0x8e3f000 (142 MB, 252.00 KB)
heap[0] at address: 0x7fffa0000000
chunk for heap: 0x7fffa0000020
arena for heap: 0x7fffd4000020
size for heap: 0xe97000 (14 MB, 604.00 KB)
heap[1] at address: 0x7fffb4000000
chunk for heap: 0x7fffb4000020
arena for heap: 0x7fffd4000020
size for heap: 0x3fed000 (63 MB, 948.00 KB)
heap[2] at address: 0x7fffd4000000
chunk for heap: 0x7fffd40008c0
arena for heap: 0x7fffd4000020
size for heap: 0x3fbb000 (63 MB, 748.00 KB)
Top chunk at 0x7fffa0e96f20:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0xe0 (0.22 KB)
free chunks: 247
free chunk size: 0xd389d0 (13 MB, 226.45 KB)
next free: 0x0
next arena: 0x7fffd8000020
Arena at address 0x7fffd8000020 ->
main_arena: False
mutex: 0
flags: 0x2
have_fastchunks: 0
attached_threads: 1
system_mem: 0x4539000 (69 MB, 228.00 KB)
max_mem: 0x4539000 (69 MB, 228.00 KB)
heap[0] at address: 0x7fffb0000000
chunk for heap: 0x7fffb0000020
arena for heap: 0x7fffd8000020
size for heap: 0x54d000 (5 MB, 308.00 KB)
heap[1] at address: 0x7fffd8000000
chunk for heap: 0x7fffd80008c0
arena for heap: 0x7fffd8000020
size for heap: 0x3fec000 (63 MB, 944.00 KB)
Top chunk at 0x7fffb04ee220:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x5ede0 (379.47 KB)
Last_Remainder chunk at 0x7fffd9655780:
Previous chunk is in use.
mapped: False
not main arena: True
chunk size: 0xd370 (52.86 KB)
free chunks: 492
free chunk size: 0x4accb0 (4 MB, 691.17 KB)
next free: 0x0
next arena: 0x7fffe4000020
Arena at address 0x7fffe4000020 ->
main_arena: False
mutex: 0
flags: 0x2
have_fastchunks: 0
attached_threads: 1
system_mem: 0x2300000 (35 MB, 0.00 KB)
max_mem: 0x2300000 (35 MB, 0.00 KB)
heap[0] at address: 0x7fffe4000000
chunk for heap: 0x7fffe40008c0
arena for heap: 0x7fffe4000020
size for heap: 0x2300000 (35 MB, 0.00 KB)
Top chunk at 0x7fffe62ff5f0:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0xa10 (2.52 KB)
Last_Remainder chunk at 0x7fffe4a888c0:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x20 (0.03 KB)
free chunks: 490
free chunk size: 0x2f9130 (2 MB, 996.30 KB)
next free: 0x0
next arena: 0x7fffe0000020
Arena at address 0x7fffe0000020 ->
main_arena: False
mutex: 0
flags: 0x2
have_fastchunks: 0
attached_threads: 1
system_mem: 0x10f3000 (16 MB, 972.00 KB)
max_mem: 0x10f3000 (16 MB, 972.00 KB)
heap[0] at address: 0x7fffe0000000
chunk for heap: 0x7fffe00008c0
arena for heap: 0x7fffe0000020
size for heap: 0x10f3000 (16 MB, 972.00 KB)
Top chunk at 0x7fffe10f2d50:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x2b0 (0.67 KB)
Last_Remainder chunk at 0x7fffe053ef30:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x20 (0.03 KB)
free chunks: 652
free chunk size: 0xc3370 (780.86 KB)
next free: 0x0
next arena: 0x7fffec000020
Arena at address 0x7fffec000020 ->
main_arena: False
mutex: 0
flags: 0x2
have_fastchunks: 0
attached_threads: 1
system_mem: 0x8a9000 (8 MB, 676.00 KB)
max_mem: 0x8a9000 (8 MB, 676.00 KB)
heap[0] at address: 0x7fffec000000
chunk for heap: 0x7fffec0008c0
arena for heap: 0x7fffec000020
size for heap: 0x8a9000 (8 MB, 676.00 KB)
Top chunk at 0x7fffec8a89b0:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x650 (1.58 KB)
Last_Remainder chunk at 0x7fffec4cdb60:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x30 (0.05 KB)
free chunks: 595
free chunk size: 0x7cc20 (499.03 KB)
next free: 0x0
next arena: 0x7fffe8000020
Arena at address 0x7fffe8000020 ->
main_arena: False
mutex: 0
flags: 0x2
have_fastchunks: 0
attached_threads: 1
system_mem: 0x451000 (4 MB, 324.00 KB)
max_mem: 0x451000 (4 MB, 324.00 KB)
heap[0] at address: 0x7fffe8000000
chunk for heap: 0x7fffe80008c0
arena for heap: 0x7fffe8000020
size for heap: 0x451000 (4 MB, 324.00 KB)
Top chunk at 0x7fffe8450950:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x6b0 (1.67 KB)
Last_Remainder chunk at 0x7fffe8089db0:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0xb0 (0.17 KB)
free chunks: 227
free chunk size: 0x12140 (72.31 KB)
next free: 0x0
next arena: 0x7ffff0000020
Arena at address 0x7ffff0000020 ->
main_arena: False
mutex: 0
flags: 0x2
have_fastchunks: 0
attached_threads: 1
system_mem: 0x249000 (2 MB, 292.00 KB)
max_mem: 0x249000 (2 MB, 292.00 KB)
heap[0] at address: 0x7ffff0000000
chunk for heap: 0x7ffff00008c0
arena for heap: 0x7ffff0000020
size for heap: 0x249000 (2 MB, 292.00 KB)
Top chunk at 0x7ffff0248b90:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x470 (1.11 KB)
Last_Remainder chunk at 0x7ffff014f1f0:
Previous chunk is in use.
mapped: False
not main arena: False
chunk size: 0x40 (0.06 KB)
free chunks: 30
free chunk size: 0xf80 (3.88 KB)
next free: 0x0
next arena: 0x7ffff7f82b80
Warning, main arena show up again.
(gdb)
|