본문 바로가기
pwnable

[ELF 구조]리눅스 커널 코드 분석 000

by pwnhub 2024. 4. 29.

https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/elf.h

#define EI_NIDENT	16

typedef struct elf32_hdr {
  unsigned char	e_ident[EI_NIDENT];
  Elf32_Half	e_type;
  Elf32_Half	e_machine;
  Elf32_Word	e_version;
  Elf32_Addr	e_entry;  /* Entry point */
  Elf32_Off	e_phoff;
  Elf32_Off	e_shoff;
  Elf32_Word	e_flags;
  Elf32_Half	e_ehsize;
  Elf32_Half	e_phentsize;
  Elf32_Half	e_phnum;
  Elf32_Half	e_shentsize;
  Elf32_Half	e_shnum;
  Elf32_Half	e_shstrndx;
} Elf32_Ehdr;

typedef struct elf64_hdr {
  unsigned char	e_ident[EI_NIDENT];	/* ELF "magic number" */
  Elf64_Half e_type;
  Elf64_Half e_machine;
  Elf64_Word e_version;
  Elf64_Addr e_entry;		/* Entry point virtual address */
  Elf64_Off e_phoff;		/* Program header table file offset */
  Elf64_Off e_shoff;		/* Section header table file offset */
  Elf64_Word e_flags;
  Elf64_Half e_ehsize;
  Elf64_Half e_phentsize;
  Elf64_Half e_phnum;
  Elf64_Half e_shentsize;
  Elf64_Half e_shnum;
  Elf64_Half e_shstrndx;
} Elf64_Ehdr;

64bit 기준으로 서술

EI_NIDENT가 16이므로 e_ident는 16바이트

#define	EI_MAG0		0		/* e_ident[] indexes */
#define	EI_MAG1		1
#define	EI_MAG2		2
#define	EI_MAG3		3
#define	EI_CLASS	4
#define	EI_DATA		5
#define	EI_VERSION	6
#define	EI_OSABI	7
#define	EI_PAD		8

#define	ELFMAG0		0x7f		/* EI_MAG */
#define	ELFMAG1		'E'
#define	ELFMAG2		'L'
#define	ELFMAG3		'F'
#define	ELFMAG		"\\177ELF"
#define	SELFMAG		4

#define	ELFCLASSNONE	0		/* EI_CLASS */
#define	ELFCLASS32	1
#define	ELFCLASS64	2
#define	ELFCLASSNUM	3

#define ELFDATANONE	0		/* e_ident[EI_DATA] */
#define ELFDATA2LSB	1
#define ELFDATA2MSB	2

#define EV_NONE		0		/* e_version, EI_VERSION */

 

7f45 4c46 0201 0100 0000 0000 0000 0000 .ELF.........…

앞 4자리는 El_MAG로, elf의 경우 ELFMAG 와 같은 값을 가진다

 

7f45 4c46 0201 0100 0000 0000 0000 0000 .ELF.........…

그 다음 1자리는 El_CLASS 로 bit 수를 의미한다

0 → invalid

1 → 32bit

2 → 64bit

 

7f45 4c46 0201 0100 0000 0000 0000 0000 .ELF.........…

그 다음 1자리는 El_DATA 로 LSB/MSB를 의미한다

0 : invalid

1 : LSB → for little endian

2 : MSB → for big endian

 

7f45 4c46 0201 0100 0000 0000 0000 0000 .ELF.........…

그 다음 1자리는 El_VERSION 이고 모든 ELF의 버전은 1로 한정된다

 

7f45 4c46 0201 0100 0000 0000 0000 0000 .ELF.........…

그 다음 1자리는 El_OSABI 로 OS Application binary interface를 의미한다

0 : None/System-V ( 일반적 )

1 : HP-UX

2 : NetBSD

3 : Linux

이후 거의 안쓰는 비트(00) 과 패딩(00)으로 16바이트를 마저 채운다


 

sudo apt-get install gcc-multilib 을 설치해준 후

gcc -o main32 main.c -m32 & gcc -o main64 main.c -m64

실행해주면 각각 32bit, 64bit 으로 컴파일 된다

*main.c는 hello world 코드를 사용했다. 어떤 코드던 상관없음

xxd main32 | head -n 1 00000000: 7f45 4c46 0101 0100 0000 0000 0000 0000 .ELF............

xxd main64 | head -n 1 00000000: 7f45 4c46 0201 0100 0000 0000 0000 0000 .ELF............


흐름분석

리눅스에서 ELF파일을 실행하면 커널 내부의 execve() 처리 코드에서 처리하게 된다.

https://elixir.bootlin.com/linux/latest/source/fs/exec.c

static int do_execveat_common(int fd, struct filename *filename,
			      struct user_arg_ptr argv,
			      struct user_arg_ptr envp,
			      int flags)
{
	struct linux_binprm *bprm;
	int retval;

	if (IS_ERR(filename))
		return PTR_ERR(filename);

	/*
	 * We move the actual failure in case of RLIMIT_NPROC excess from
	 * set*uid() to execve() because too many poorly written programs
	 * don't check setuid() return code.  Here we additionally recheck
	 * whether NPROC limit is still exceeded.
	 */
	if ((current->flags & PF_NPROC_EXCEEDED) &&
	    is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
		retval = -EAGAIN;
		goto out_ret;
	}

	/* We're below the limit (still or again), so we don't want to make
	 * further execve() calls fail. */
	current->flags &= ~PF_NPROC_EXCEEDED;

	bprm = alloc_bprm(fd, filename, flags);
	if (IS_ERR(bprm)) {
		retval = PTR_ERR(bprm);
		goto out_ret;
	}

	retval = count(argv, MAX_ARG_STRINGS);
	if (retval == 0)
		pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\\n",
			     current->comm, bprm->filename);
	if (retval < 0)
		goto out_free;
	bprm->argc = retval;

	retval = count(envp, MAX_ARG_STRINGS);
	if (retval < 0)
		goto out_free;
	bprm->envc = retval;

	retval = bprm_stack_limits(bprm);
	if (retval < 0)
		goto out_free;

	retval = copy_string_kernel(bprm->filename, bprm);
	if (retval < 0)
		goto out_free;
	bprm->exec = bprm->p;

	retval = copy_strings(bprm->envc, envp, bprm);
	if (retval < 0)
		goto out_free;

	retval = copy_strings(bprm->argc, argv, bprm);
	if (retval < 0)
		goto out_free;

	/*
	 * When argv is empty, add an empty string ("") as argv[0] to
	 * ensure confused userspace programs that start processing
	 * from argv[1] won't end up walking envp. See also
	 * bprm_stack_limits().
	 */
	if (bprm->argc == 0) {
		retval = copy_string_kernel("", bprm);
		if (retval < 0)
			goto out_free;
		bprm->argc = 1;
	}

	retval = bprm_execve(bprm);
out_free:
	free_bprm(bprm);

out_ret:
	putname(filename);
	return retval;
}
static int bprm_execve(struct linux_binprm *bprm)
{
	int retval;

	retval = prepare_bprm_creds(bprm);
	if (retval)
		return retval;

	/*
	 * Check for unsafe execution states before exec_binprm(), which
	 * will call back into begin_new_exec(), into bprm_creds_from_file(),
	 * where setuid-ness is evaluated.
	 */
	check_unsafe_exec(bprm);
	current->in_execve = 1;
	sched_mm_cid_before_execve(current);

	sched_exec();

	/* Set the unchanging part of bprm->cred */
	retval = security_bprm_creds_for_exec(bprm);
	if (retval)
		goto out;

	retval = exec_binprm(bprm);
	if (retval < 0)
		goto out;

	sched_mm_cid_after_execve(current);
	/* execve succeeded */
	current->fs->in_exec = 0;
	current->in_execve = 0;
	rseq_execve(current);
	user_events_execve(current);
	acct_update_integrals(current);
	task_numa_free(current, false);
	return retval;

out:
	/*
	 * If past the point of no return ensure the code never
	 * returns to the userspace process.  Use an existing fatal
	 * signal if present otherwise terminate the process with
	 * SIGSEGV.
	 */
	if (bprm->point_of_no_return && !fatal_signal_pending(current))
		force_fatal_sig(SIGSEGV);

	sched_mm_cid_after_execve(current);
	current->fs->in_exec = 0;
	current->in_execve = 0;

	return retval;
}
static int exec_binprm(struct linux_binprm *bprm)
{
	pid_t old_pid, old_vpid;
	int ret, depth;

	/* Need to fetch pid before load_binary changes it */
	old_pid = current->pid;
	rcu_read_lock();
	old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
	rcu_read_unlock();

	/* This allows 4 levels of binfmt rewrites before failing hard. */
	for (depth = 0;; depth++) {
		struct file *exec;
		if (depth > 5)
			return -ELOOP;

		ret = search_binary_handler(bprm);
		if (ret < 0)
			return ret;
		if (!bprm->interpreter)
			break;

		exec = bprm->file;
		bprm->file = bprm->interpreter;
		bprm->interpreter = NULL;

		allow_write_access(exec);
		if (unlikely(bprm->have_execfd)) {
			if (bprm->executable) {
				fput(exec);
				return -ENOEXEC;
			}
			bprm->executable = exec;
		} else
			fput(exec);
	}

	audit_bprm(bprm);
	trace_sched_process_exec(current, old_pid, bprm);
	ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
	proc_exec_connector(current);
	return 0;
}
static int search_binary_handler(struct linux_binprm *bprm)
{
	bool need_retry = IS_ENABLED(CONFIG_MODULES);
	struct linux_binfmt *fmt;
	int retval;

	retval = prepare_binprm(bprm);
	if (retval < 0)
		return retval;

	retval = security_bprm_check(bprm);
	if (retval)
		return retval;

	retval = -ENOENT;
 retry:
	read_lock(&binfmt_lock);
	list_for_each_entry(fmt, &formats, lh) {
		if (!try_module_get(fmt->module))
			continue;
		read_unlock(&binfmt_lock);

		retval = fmt->load_binary(bprm);

		read_lock(&binfmt_lock);
		put_binfmt(fmt);
		if (bprm->point_of_no_return || (retval != -ENOEXEC)) {
			read_unlock(&binfmt_lock);
			return retval;
		}
	}
	read_unlock(&binfmt_lock);

	if (need_retry) {
		if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
		    printable(bprm->buf[2]) && printable(bprm->buf[3]))
			return retval;
		if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
			return retval;
		need_retry = false;
		goto retry;
	}

	return retval;
}

https://elixir.bootlin.com/linux/latest/source/fs/binfmt_elf.c#L819

static struct linux_binfmt elf_format = {
	.module		= THIS_MODULE,
	.load_binary	= load_elf_binary,
	.load_shlib	= load_elf_library,
#ifdef CONFIG_COREDUMP
	.core_dump	= elf_core_dump,
	.min_coredump	= ELF_EXEC_PAGESIZE,
#endif
};

do_execveat_common → bprm_execve → [exec_binprm](<https://elixir.bootlin.com/linux/latest/C/ident/exec_binprm>) → [search_binary_handler](<https://elixir.bootlin.com/linux/latest/C/ident/search_binary_handler>) → [load_binary](<https://elixir.bootlin.com/linux/latest/C/ident/load_binary>) → load_elf_binary

load_elf_binary 코드 중 일부

if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
		goto out;

ELF파일의 첫 16바이트 중 ELFMAG(최초 4개 바이트) → .ELF~ 포맷이 지켜졌는지 검사한다.

e_ident 끝


https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/elf.h

/* 32-bit ELF base types. */
typedef__u32Elf32_Addr;
typedef__u16Elf32_Half;
typedef__u32Elf32_Off;
typedef__s32Elf32_Sword;
typedef__u32Elf32_Word;

/* 64-bit ELF base types. */
typedef__u64Elf64_Addr;
typedef__u16Elf64_Half;
typedef__s16Elf64_SHalf;
typedef__u64Elf64_Off;
typedef__s32Elf64_Sword;
typedef__u32Elf64_Word;
typedef__u64Elf64_Xword;
typedef__s64Elf64_Sxword;

사이즈는 위 코드에서 확인할 수 있다

~~unsigned char	e_ident[EI_NIDENT];	/* ELF "magic number" */~~
  Elf64_Half e_type;
  Elf64_Half e_machine;
  Elf64_Word e_version;
  Elf64_Addr e_entry;		/* Entry point virtual address */
  Elf64_Off e_phoff;		/* Program header table file offset */
  Elf64_Off e_shoff;		/* Section header table file offset */
  Elf64_Word e_flags;
  Elf64_Half e_ehsize;
  Elf64_Half e_phentsize;
  Elf64_Half e_phnum;
  Elf64_Half e_shentsize;
  Elf64_Half e_shnum;
  Elf64_Half e_shstrndx;

이어서 멤버를 마저 알아보면

type :

ET_NONE : 0 : No file type

ET_REL : 1 : Relocatable file type : 가장 일반적임, gcc -o main main.c 이런 명령어의 결과

ET_EXEC : 2 : Executable file : PIE 보호기법 풀려있는 상태, gcc -no-pie main.c -o main

ET_DYN : 3 : Shared object file, gcc -shared main.c -o main

ET_CORE : 4 : Core file, 나중에 알아보기

+프로세서에서 예약한 일부 값도 있다고 함

machine :

0x03 : X86

0x08 : MIPS

0x28 : ARM

0x3E : amd64

0xB7 : ARMv8

0xF3 : RISC-V

version :

마찬가지로 항상 1

entry :

시작주소 or 공유 라이브러리의 생성자 주소

그 외, 없으면 0

phoff :

program header 시작주소 (offset)

shoff :

section header 시작주소 (offset)

flags :

os의 실행 모드 같은거라는데 뭔지 모르겠음

공부해야함

ehsize :

elf header 의 크기

phentzise :

program header 1개의 크기

phnum :

program header 의 수

shentsize :

section header 1개의 크기

shnum :

section header 의 수

shstrndx :

section header 의 문자열 테이블 인덱스


 

readelf -h main

ELF Header:
  Magic:   7f 45 4c 46 01 01 01 00 00 00 00 00 00 00 00 00
  Class:                             ELF32
  Data:                              2's complement, little endian
  Version:                           1 (current)
  OS/ABI:                            UNIX - System V
  ABI Version:                       0
  Type:                              DYN (Position-Independent Executable file)
  Machine:                           Intel 80386
  Version:                           0x1
  Entry point address:               0x1070
  Start of program headers:          52 (bytes into file)
  Start of section headers:          13780 (bytes into file)
  Flags:                             0x0
  Size of this header:               52 (bytes)
  Size of program headers:           32 (bytes)
  Number of program headers:         11
  Size of section headers:           40 (bytes)
  Number of section headers:         29
  Section header string table index: 28