University Of Edin’s 2023 VEHMeme.exe writeup
We have an exe
$ file VEHMeme.exe
VEHMeme.exe: PE32+ executable (console) x86-64, for MS Windows, 6 sections
$ sha256sum VEHMeme.exe
c3499f66bf7e97d6a56de40637e3eeebedad94f700c23d30d3f89136cbf8102d VEHMeme.exe
Loading in IDA - the main
function is pretty simple
int __fastcall main(int argc, const char **argv, const char **envp)
{
// .....
ResourceW = FindResourceW(0, (LPCWSTR)0x69, (LPCWSTR)0xA);
ResourceW_1 = ResourceW;
if ( !ResourceW || (Resource = LoadResource(0, ResourceW)) == 0 )
exit(0);
v6 = LockResource(Resource);
resource_size = SizeofResource(0, ResourceW_1);
puts("Please enter the flag: ");
Stream = _acrt_iob_func(0);
fgets(input, 42, Stream);
n41 = -1;
do
++n41;
while ( input[n41] );
if ( n41 != 41 )
{
puts("Wrong flag!!!");
exit(0);
}
rwx_page = (void (*)(void))VirtualAlloc(0, resource_size, 0x1000u, 0x40u);
dwSize_1 = resource_size;
shellcode_rwx_Page = rwx_page;
memcpy(rwx_page, v6, dwSize_1);
shellcode_rwx_Page();
return 0;
}
It reads a resource from the PE and copies it on a newly allocated RWX
page. See PAGE_EXECUTE_READWRITE
here
We can dump the resource and obtain the shellcode using pefile
import pefile
pe = pefile.PE("VEHMeme.exe")
RESOURCE_TYPE = 0xA
RESOURCE_ID = 0x69
for entry in pe.DIRECTORY_ENTRY_RESOURCE.entries:
if entry.id == RESOURCE_TYPE:
for res_id in entry.directory.entries:
if res_id.id == RESOURCE_ID:
for lang in res_id.directory.entries:
data_rva = lang.data.struct.OffsetToData
size = lang.data.struct.Size
data = pe.get_memory_mapped_image()[data_rva : data_rva + size]
with open("shellcode.bin", "wb") as f:
f.write(data)
break
When loaded into IDA
seg005:0000000000000000 seg005 segment byte public 'CODE' use64
seg005:0000000000000000 assume cs:seg005
seg005:0000000000000000 assume es:nothing, ss:nothing, ds:nothing, fs:nothing, gs:nothing
seg005:0000000000000000 int 3 ; Trap to Debugger
seg005:0000000000000001 int 3 ; Trap to Debugger
seg005:0000000000000002 int 3 ; Trap to Debugger
seg005:0000000000000003 int 3 ; Trap to Debugger
seg005:0000000000000004 int 3 ; Trap to Debugger
seg005:0000000000000005 invlpg byte ptr ds:5
seg005:000000000000000D invlpg byte ptr ds:5
seg005:0000000000000015 invlpg byte ptr ds:5
seg005:000000000000001D invlpg byte ptr ds:5
seg005:0000000000000025 invlpg byte ptr ds:5
seg005:000000000000002D invlpg byte ptr ds:5
seg005:0000000000000035 invlpg byte ptr ds:5
seg005:000000000000003D invlpg byte ptr ds:5
seg005:0000000000000045 nop
seg005:0000000000000046 mov rax, 1
seg005:000000000000004D mov rax, [rax]
seg005:0000000000000050 mov rax, 5
seg005:0000000000000057 mov rcx, 0
seg005:000000000000005E div rcx
Looks like the operations are weird - raising exceptions.
We can also see that the binary registers a new TlsCallback
void TlsCallback_0()
{
if ( !dword_14000566C )
{
AddVectoredExceptionHandler(1u, Handler);
qword_140005678 = (__int64)VirtualAlloc(0, 0x1000u, 0x1000u, 4u);
memset((void *)qword_140005678, 0, 0x1000u);
}
dword_14000566C = 1;
}
Where Handler
seems to be changing some global variables based on the exception code
ExceptionCode = ExceptionInfo->ExceptionRecord->ExceptionCode;
if ( (unsigned int)ExceptionCode > 0xC000001D )
{
if ( (_DWORD)ExceptionCode == 0xC0000094 )
{
ContextRecord = ExceptionInfo->ContextRecord;
--dword_140005670;
ContextRecord->Rip += 3LL;
}
else if ( (_DWORD)ExceptionCode == -1073741674 )
{
++*(_BYTE *)(dword_140005670 + qword_140005678);
ExceptionInfo->ContextRecord->Rip += 8LL;
return 0xFFFFFFFFLL;
}
return 0xFFFFFFFFLL;
}
By looking at the pattern we can deduce that the program implements a brainfuck
VM with the exceptions being instructions. With some analysis we can get to this decompilation
*(_QWORD *)&ExceptionCode = ExceptionInfo->ExceptionRecord->ExceptionCode;
if ( ExceptionCode > (unsigned int)EXCEPTION_ILLEGAL_INSTRUCTION )
{
if ( ExceptionCode == (unsigned int)EXCEPTION_INT_DIVIDE_BY_ZERO )
{
ContextRecord = ExceptionInfo->ContextRecord;
--data_pointer;
ContextRecord->Rip += 3LL;
}
else if ( ExceptionCode == (unsigned int)EXCEPTION_PRIV_INSTRUCTION )
{
++::data_base[data_pointer];
ExceptionInfo->ContextRecord->Rip += 8LL;
return 0xFFFFFFFFLL;
}
return 0xFFFFFFFFLL;
}
if ( ExceptionCode == (unsigned int)EXCEPTION_ILLEGAL_INSTRUCTION )
{
--::data_base[data_pointer];
++ExceptionInfo->ContextRecord->Rip;
return 0xFFFFFFFFLL;
}
if ( ExceptionCode == (unsigned int)EXCEPTION_BREAKPOINT )
{
ContextRecord_1 = ExceptionInfo->ContextRecord;
++data_pointer;
++ContextRecord_1->Rip;
return 0xFFFFFFFFLL;
}
if ( ExceptionCode == (unsigned int)EXCEPTION_SINGLE_STEP )
{
v18 = input[dword_1400059A0++];
::data_base[data_pointer] = v18;
++ExceptionInfo->ContextRecord->Rip;
return 0xFFFFFFFFLL;
}
if ( ExceptionCode != (unsigned int)EXCEPTION_ACCESS_VIOLATION )
return 0xFFFFFFFFLL;
ContextRecord_2 = ExceptionInfo->ContextRecord;
Rax = ContextRecord_2->Rax;
if ( !Rax )
flag_check(ExceptionInfo, *(_QWORD *)&ExceptionCode, a3, ExceptionInfo);
v7 = Rax == 1;
data_base = ::data_base;
if ( v7 )
{
bracket_depth = bracket_depth;
Rip = ContextRecord_2->Rip;
v14 = Rip - 8;
v15 = (_BYTE *)(Rip + 3);
bracket_stack[bracket_depth] = v14;
bracket_depth = bracket_depth + 1;
v16 = 1;
if ( !data_base[data_pointer] )
{
do
{
v17 = v15;
if ( *v15 == '1' )
{
--v16;
}
else if ( *v15 == 0x90 )
{
++v16;
}
++v15;
}
while ( v16 );
v15 = v17;
}
ContextRecord_2->Rip = (DWORD64)v15;
return 0xFFFFFFFFLL;
}
else
{
v9 = bracket_depth - 1;
v10 = ContextRecord_2->Rip + 3;
--bracket_depth;
if ( ::data_base[data_pointer] )
v10 = bracket_stack[v9];
ContextRecord_2->Rip = v10;
return 0xFFFFFFFFLL;
}
With this we can find that
0x80000003 : > : Breakpoint
0x80000004 : , : Single-step exception
0xC000001D : - : Illegal instruction
0xC0000094 : < : Integer division by zero
0xC0000096 : + : Privileged instruction
0xC0000005 : [ or ] - based on the value of rax : Access violation
So now we can
- either trace the dumped shellcode and classify the instructions in the above bucket
- run the program - trace and dump exception information
Trying the first approach seemed like a lot of trial and error, so I resorted to the second approach
Use frida
to dump the exception info like this
const loggedAddresses = new Set();
const moduleName = "VEHMeme.exe";
const offset = 0x10B0;
function hookFunction() {
const baseAddr = Module.findBaseAddress(moduleName);
if (!baseAddr) {
console.log("[!] Module not found!");
return;
}
const targetAddr = baseAddr.add(offset);
console.log("[+] Hooking function at:", targetAddr);
Interceptor.attach(targetAddr, {
onEnter: function (args) {
const ptrToPtr = args[0]; // First argument (pointer to pointer)
if (!ptrToPtr.isNull()) {
const ptrToExceptionRecord = ptrToPtr.readPointer(); // Dereference once
if (!ptrToExceptionRecord.isNull()) {
const exc_code = ptrToExceptionRecord.readU32(); // Read DWORD (32-bit value)
const ptrToContextRecord = ptrToPtr.add(8).readPointer();
if (!ptrToContextRecord.isNull()) {
const rip = ptrToContextRecord.add(0xF8).readPointer();
const rax = ptrToContextRecord.add(0x78).readPointer();
if (!loggedAddresses.has(rip.toString(16))) {
console.log(`${rip} ${rax} ${exc_code.toString(16)}`);
loggedAddresses.add(rip.toString(16));
}
}
}
}
}
});
}
hookFunction();
Once we have all the information dumped from this we can build the brainfuck
program like this
lines = open('out').readlines()
'''
0x80000003 : >
0x80000004 : ,
0xC000001D : -
0xC0000094 : <
0xC0000096 : +
0xC0000005 is [ or ]
0x80000003: Breakpoint
0x80000004: Single-step exception
0xC0000005: Access violation
0xC000001D: Illegal instruction
0xC0000094: Integer division by zero
0xC0000096: Privileged instruction
'''
code = []
for line in lines:
addr,rax, exc_type = map(lambda x: int(x, 16), line.strip().split())
code.append((addr, rax, exc_type))
code.sort(key=lambda x: x[0])
import json
codej = json.loads(open('diss.json').read())
offsets = dict({i.get('offset'): i for i in codej})
base = code[0][0]
max_addr = -1
for addr, rax, exc_type in code:
addr -= base
max_addr = max(max_addr, addr)
if exc_type == 0x80000003:
print('>', end="|")
elif exc_type == 0x80000004:
print(',', end="|")
elif exc_type == 0xC0000005 and rax == 1:
print('[', end="|")
elif exc_type == 0xC0000005 and rax == 2:
print(']', end="|")
elif exc_type == 0xC000001D:
print('-', end="|")
elif exc_type == 0xC0000094:
print('<', end="|")
elif exc_type == 0xC0000096:
print('+', end="|")
else:
print('UNKNOWN')
print(hex(addr), hex(exc_type), offsets.get(addr, {}).get("disasm"))
# print(max_addr)
print("NOT Executed")
for offset, item in offsets.items():
if offset > max_addr:
print(hex(offset), item.get("disasm"))
the diss.json
can be built like this
r2 -AAAA -q -c "pDj 77671" shellcode.bin > diss.json
It was used to annotate and check if I had any errors/miss in tracing with frida
This produces almost 9kb of brainfuck
code
$ wc disass.full
0 1 8965 disass.full
Now brainfuck
is not a readable language and 9kb of it is a pain as well
Lets use some llvm to optimize and see how we can minify the logic
We can use bf2llvm.c to lift the brainfuck
to LLVM IR
and then optimize it with opt
cat disass.full.bc | ./bf2llvm | opt-17 -S -O3 > prof.opt.ll
This optimizes away the first set of loops that were creating constants on the tape
%data_ptr = tail call dereferenceable_or_null(30000) ptr @calloc(i64 30000, i64 1)
%0 = getelementptr inbounds i8, ptr %data_ptr, i64 4
store i8 80, ptr %0, align 1
%1 = getelementptr inbounds i8, ptr %data_ptr, i64 6
store i8 35, ptr %1, align 1
%2 = getelementptr inbounds i8, ptr %data_ptr, i64 8
store i8 71, ptr %2, align 1
%3 = getelementptr inbounds i8, ptr %data_ptr, i64 10
store i8 95, ptr %3, align 1
%4 = getelementptr inbounds i8, ptr %data_ptr, i64 12
store i8 20, ptr %4, align 1
%5 = getelementptr inbounds i8, ptr %data_ptr, i64 14
store i8 88, ptr %5, align 1
%6 = getelementptr inbounds i8, ptr %data_ptr, i64 16
store i8 59, ptr %6, align 1
%7 = getelementptr inbounds i8, ptr %data_ptr, i64 18
store i8 56, ptr %7, align 1
%8 = getelementptr inbounds i8, ptr %data_ptr, i64 20
We still have to read the rest of the code to figure out the logic
The gist is - it populates 2 arrays of length 41 each and then subtracts the bytes from the input with the members of the array
In [1]: a = [80, 35, 71, 95, 20, 88, 59, 56, 53, 76, 25, 19, 113, 85, 19, 23, 46, 24, 1, 60, 16, 38, 49, 99, 95, 42, 34, 63, 34, 53, 61, 46, 3, 42, 10, 52, 9, 36, 42, 88, 10, 38, 16, 28, 21, 28, 26, 43, 61, 46, 31, 70, 32, 7, 14, 32, 89, 70, 25, 47, 50, 79, 66, 3, 11, 5, 66, 17, 51, 19, 42, 43, 6, 101, 10, 85,
...: 4, 46, 65, 9, 12, 38, 1]
In [2]: "".join([(chr(a[i]+a[i+41])) for i in range(41)])
Out[2]: 'v3ct0rfuck_3xc3pt10n_h4ndl3r5_h4h4_87e3d0'
Which gives us the flag