Solving Ben_Lolo's WeeperVM Level 1
Link : https://crackmes.one/crackme/67f9bdc38f555589f3530a85
$ file WeeperVM--Level_1
WeeperVM--Level_1: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, stripped
ELF thats pretty easy to decompile
main
starts by initializing a bunch of threads with anti-debugging measures like
- Look for
TracerPid
in/proc/self/status
- Locate
.text
in/proc/self/maps
and calculate a checksum fromproc/self/mem
for it - Some sort of checksum verification from memory and
/proc/self/exe
The strings are encrypted with xor
char *__fastcall xor_dec(char *encryp, int out_len, const char *key, int key_len)
{
char *result; // rax
__int64 i; // rcx
result = (char *)calloc(out_len + 1, 1u);
for ( i = 0; out_len > (int)i; ++i )
result[i] = encryp[i] ^ key[(int)i % key_len];
result[out_len] = 0;
return result;
}
Just used frida
to dump threadId
and strings like this
const module = "crackme";
const baseAddr = Module.findBaseAddress(module);
const funcAddr = baseAddr.add(0x0e27);
var seen = new Set();
Interceptor.attach(funcAddr, {
onEnter: function (args) {
},
onLeave: function (retval) {
var callee = this.context.rsp.readPointer().sub(baseAddr).add(0x10000).toString(16);
if (seen.has(callee)) {
return;
}
seen.add(callee);
console.log(callee, this.threadId, retval.readCString());
}
});
for a quick
10b1a 1545 r
10b36 1545 /proc/self/maps
10b8b 1545 r-xp
10bb4 1545 %lx-%lx
10c10 1545 rb
10c2c 1545 /proc/self/mem
11155 1554 r
11171 1554 /proc/self/status
111c5 1554 TracerPid:
11007 1545 rb
11023 1545 /proc/self/exe
At the end of main
is the vm interpreter loop
while ( 1 )
{
vip = vip;
ins = bytecode[vip];
if ( !ins )
break;
if ( execute_vm_weep(ins, v2) )
return 1;
vip = vip + 1;
}
The function is too large to paste here. VM features include
- 16 general purpose registers -
r14
andr15
were mostly used to jump flags
register for compare with bits for equality, less/greater than- 256 bytes of scratch space - init with values
- Flag is read to 0x40 idx in the scratch space
- The opcodes are
OPCODES = {
0x0: "MOV",
0x1: "MOVB",
0x2: "ADD",
0x3: "SUB",
0x4: "MUL",
0x5: "MOD",
0x6: "CALL",
0x7: "READ_INPUT",
0x9: "CMP",
0xA: "JMP",
0xB: "AND",
0xC: "OR",
0xD: "XOR",
0xE: "SHL",
0xF: "SHR",
}
I wrote a disassembler with reversing
def disassemble(instruction):
def byte(n): return (instruction >> (8 * n)) & 0xFF
# XOR mutation like in the code
modified = instruction ^ (0x1010101 * byte(2))
opcode = (modified >> 28) & 0xF
mem_sel = (modified >> 27) & 0x1 # 1 = special area (offset 77600), 0 = dword_13080
mode = (modified >> 25) & 0x3 # 0 = direct, 1 = from buffer, 2 = immediate
index = (modified >> 8) & 0xFF # index byte (BYTE1)
xor_byte = byte(2)
src_byte = instruction ^ xor_byte # used for source/indexing buffer
op_str = OPCODES.get(opcode, f"UNKNOWN_{opcode}")
# Create operand descriptions
# dst = f"{'BUF' if mem_sel else 'R'}{index}"
dst = "BUF[{}]".format(index) if mem_sel else f"R{index}"
if mode == 0:
src = f"R{src_byte & 0xFF}"
elif mode == 1:
src = f"[BUF][{src_byte & 0xFF}]:({buf[src_byte & 0xFF]}| '{chr(buf[src_byte & 0xFF]) if buf[src_byte & 0xFF] < 127 else ''}')"
elif mode == 2:
src = f"i{src_byte & 0xFF}"
else:
src = "??"
if opcode == 0x0 and mode == 2 and mem_sel == 0:
const_register_bank[dst] = int(src_byte & 0xFF)
return f"{op_str} {dst}, {src} | {const_register_bank.get(dst, 'UNKNOWN')}"
if opcode == 0x2 and mode == 2 and mem_sel == 0:
const_register_bank[dst] += int(src_byte & 0xFF)
return f"{op_str} {dst}, {src} | {const_register_bank.get(dst, 'UNKNOWN')}"
# Handle special opcodes
if opcode == 0x6:
return f"{op_str} putchar('{chr(buf[index]) if buf[index] != 10 else '\\n'}')"
elif opcode == 0x7:
return f"{op_str} READ_INPUT to BUF[{index}] len={src}"
elif opcode == 0x9:
return f"{op_str} {dst} vs {src}"
elif opcode == 0xA:
return f"{op_str} IF (flags & {src}) -> jmp {dst}|{const_register_bank.get(dst, "UNKNOWN")}"
elif opcode == 0x1:
if mem_sel == 0:
return f"{op_str} {dst}, BUF[{src}]"
else:
return f"{op_str} BUF[{dst}], {src}"
else:
return f"{op_str} {dst}, {src}"
const_register_bank
will see when the values are of this pattern
511: 17131DEC: MOV R14, i255
512: B89D9362: ADD R14, i255
513: 17323C0E: ADD R14, i60
And then annotate the disassembly with consts.
Also read the code and started labelling
label_db = {
779: "SUCCESS",
796: "FAILURE",
363: "CONST_390",
393: "CONST_362",
367: "CONST_796",
391: "CONST_398",
399: "CONST_401",
412: '''[f"{a[160+i]:02X}^{a[176+i]:02X}={a[160+i]^a[176+i]:02X}" for i in range(16)]''',
435: f"CONST_{255+197}",
437: f"CONST_{255+206}",
}
buf
is the 256 byte region that was in the .lotus
in the binary
A total of 821 instructions and the end had a check
682: DA46069A: CMP BUF[64] vs i220|
683: 57F2FCF0: JMP IF (flags & i2) -> jmp R14|796|
684: 17B3BDB7: JMP IF (flags & i4) -> jmp R14|796|
685: 20BCFDD3: CMP BUF[65] vs i111|
686: E5404E42: JMP IF (flags & i2) -> jmp R14|796|
687: FE5A545E: JMP IF (flags & i4) -> jmp R14|796|
688: C05D1F0B: CMP BUF[66] vs i86|
689: 06A3ADA1: JMP IF (flags & i2) -> jmp R14|796|
690: 55F1FFF5: JMP IF (flags & i4) -> jmp R14|796|
BUF[64]
is where our input was written to - there were some misunderstanding and I missed the mechanics of the movb
instruction. So I wrote a pintool to track read/writes to the memory in interesting regions
ADDRINT region_start = 0x12F20;
ADDRINT region_end = 0x13020;
ADDRINT register_start = 0x13080;
ADDRINT register_end = 0x130C0;
ADDRINT flags_start = 0x13060;
ADDRINT flags_end = 0x13062;
VOID RecordMemRead(ADDRINT addr, UINT32 size)
{
UINT64 value = 0;
if ((ADDRINT)addr >= l + region_start && (ADDRINT)addr < l + region_end)
{
PIN_SafeCopy(&value, reinterpret_cast<VOID *>(addr), size);
outFile << "READMEM" << size << "@" << " -> " << std::dec << addr - l - region_start << ": " << std::hex << value << std::endl;
}
if ((ADDRINT)addr >= l + register_start && (ADDRINT)addr < l + register_end)
{
PIN_SafeCopy(&value, reinterpret_cast<VOID *>(addr), size);
outFile << "READREG" << size << "@" << " -> " << std::dec << (addr - l - register_start) / size << ": " << std::hex << value << std::endl;
}
if ((ADDRINT)addr >= l + flags_start && (ADDRINT)addr < l + flags_end)
{
PIN_SafeCopy(&value, reinterpret_cast<VOID *>(addr), size);
outFile << "READFLAGS" << size << "@" << " -> " << std::dec << (addr - l - flags_start) / size << ": " << std::hex << value << std::endl;
}
}
VOID RecordMemWrite(ADDRINT addr, UINT32 size, ADDRINT ip)
{
if ((ADDRINT)addr >= region_start && (ADDRINT)addr < region_end)
{
outFile << ip << "|WRITEMEM" << size << "@" << " -> " << std::dec << addr - l - region_start << std::endl;
}
if ((ADDRINT)addr >= register_start && (ADDRINT)addr < register_end)
{
outFile << ip << "|WRITEREG" << size << "@" << " -> " << std::dec << (addr - l - register_start) / size << std::endl;
}
if ((ADDRINT)addr >= flags_start && (ADDRINT)addr < flags_end)
{
outFile << ip << "|WRITEFLAGS" << size << "@" << " -> " << std::dec << (addr - l - flags_start) / size << std::endl;
}
}
// usage
if (INS_IsMemoryRead(ins))
{
INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)RecordMemRead,
IARG_MEMORYREAD_EA, IARG_MEMORYREAD_SIZE, IARG_END);
}
if (INS_IsMemoryWrite(ins))
{
INS_InsertPredicatedCall(ins, IPOINT_BEFORE, (AFUNPTR)RecordMemWrite,
IARG_MEMORYWRITE_EA, IARG_MEMORYWRITE_SIZE, IARG_INST_PTR, IARG_END);
}
This helped me a lot to validate my assumptions and fix my disassembler such that I had the correct disassembly very quickly.
The next step was to build a CFG and try to make an educated guess on the structure of the program. So I did some crude basic block analysis and built the CFG with python.
import re
import logging
def load_disasm(path):
disasm = {}
with open(path, 'r') as f:
for line in f:
match = re.match(r'^([0-9]+):\s[0-9a-fA-F]+:\s(.*)', line)
if match:
addr = int(match.group(1))
mnemonic = match.group(2).strip()
disasm[addr] = mnemonic
return disasm
def load_trace(path):
with open(path, 'r') as f:
trace = []
for line in f:
match = re.match(r'^([0-9]+): .*', line)
if match:
trace.append(int(match.group(1)))
return trace
disasm = load_disasm("diss")
trace = load_trace("pin.out")
# print(trace)
# print(disasm)
def build_simple_cfg(trace, disasm):
blocks = set(disasm.keys())
# blacklist direct jumps
bl = set([389, 397, 451, 520, 527, 534, 541, 548, 555, 562, 569, 585])
edges = set() # its a trace - don't create duplicate edges
# no jumps trace
for i in blocks:
if i in bl:
continue
if i+1 in blocks:
edges.add((i, i+1))
# trace with jumps
for i in range(len(trace) - 1):
src = trace[i]
dst = trace[i + 1]
if src in blocks and dst in blocks:
edges.add((src, dst))
else:
logging.warning(f"Invalid edge {src} -> {dst}")
return blocks, edges
from graphviz import Digraph
def write_simple_dot(blocks, edges, out_path='simple.dot'):
dot = Digraph(name="CFG", format='dot')
dot.attr('node', shape='box')
# Add basic blocks with proper labels
for ip in blocks:
dot.node(str(ip))
# Add edges
for src, dst in edges:
dot.edge(str(src), str(dst))
dot.save(filename=out_path)
basic_blocks, edges = build_simple_cfg(trace, disasm)
write_simple_dot(basic_blocks, edges)
import networkx as nx
from networkx.drawing.nx_pydot import read_dot, write_dot
def condense_linear_blocks(disasm, dot_input='simple.dot', dot_output='condensed.dot'):
G = read_dot(dot_input)
G = nx.DiGraph(G) # ensure it's directed
skip = set()
for node in list(G.nodes):
if node in skip or node not in G:
continue
path = [node]
current = node
while True:
# preds = list(G.predecessors(current))
succs = list(G.successors(current))
# break if not exactly one successor or the successor has multiple preds
if len(succs) != 1:
break
next_node = succs[0]
if len(list(G.predecessors(next_node))) != 1:
break
path.append(next_node)
current = next_node
skip.add(current)
if len(path) > 1:
new_node = "\n".join(n + "|" + disasm.get(int(n), "???") for n in path).replace(':', '-')
G.add_node(new_node)
# edges from predecessors of first node
for pred in G.predecessors(path[0]):
G.add_edge(pred, new_node)
# edges to successors of last node
for succ in G.successors(path[-1]):
G.add_edge(new_node, succ)
for n in path:
G.remove_node(n)
else:
new_node = path[0] + "|" + disasm.get(int(path[0]), "???")
G.add_node(new_node)
for pred in G.predecessors(path[0]):
G.add_edge(pred, new_node)
for succ in G.successors(path[0]):
G.add_edge(new_node, succ)
G.remove_node(path[0])
write_dot(G, dot_output)
condense_linear_blocks(disasm)
This script loads the disassembly and the pintool trace to build linkages for jump targets and builds a dot file which I used in the browser to build an idea of what the code is doing.
All this could have been avoided if I would have written a CPU module in ghidra
I then fixed up the disassembler to generate as close to python as possible and then manually fixed the loops, if/else in the code to get this
def vm_main():
global buf
r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15 = [BitVecVal(0, 8)] * 16
r12 = 0
while r12 < 16:
r2 = 160
r2 += r12
r0 = buf[r2] # MOVB R0, BUF[R2]
r2 += 16 # r2 = 176 + r12
r1 = buf[r2] # MOVB R1, BUF[R2]
r2 += 16 # r2 = 192 + r12
buf[240] = r2 # MOV BUF[240], R2
r0 ^= r1 # XOR R0, R1
buf[buf[240]] = r0 # MOVB BUF[BUF[240]], R0
buf[r2] = r0
r12 += 1
for r3 in range(0, 32, 16):
r0 = 0
for r12 in range(16):
r1 = r0
for r13 in range(8):
r11 = 72
r11 += r3
r11 += r13
r2 = buf[r11] # MOVB R2, BUF[R11]
r6 = r2
r6 &= 240
r6 >>= 4
r7 = r2
r7 &= 15
r11 = 192
r11 += r1
r4 = buf[r11] # MOVB R4, BUF[R11]
r11 = 151
r11 -= r13
r5 = buf[r11] # MOVB R5, BUF[R11]
r4 &= r5
if r4 != 0:
r4 = 128
r4 += r6
r2 = buf[r4] # MOVB R2, BUF[R4]
r2 &= 15
r2 <<= 4
r4 = 128
r4 += r7
r5 = buf[r4] # MOVB R5, BUF[R4]
r5 &= 240
r5 >>= 4
r2 |= r5
else:
r4 = 128
r4 += r6
r2 = buf[r4] # MOVB R2, BUF[R4]
r2 &= 240
r4 = 128
r4 += r7
r5 = buf[r4] # MOVB R5, BUF[R4]
r5 &= 15
r2 |= r5
r4 = 192
r4 += r0
r5 = buf[r4] # MOVB R5, BUF[R4]
r2 ^= r5
r4 = r2
r2 = 0
r5 = 144
r6 = buf[r5] # MOVB R6, BUF[R5]
r6 &= r4
r6 >>= 3
r2 |= r6
r5 += 1
r6 = buf[r5] # MOVB R6, BUF[R5]
r6 &= r4
r6 >>= 4
r2 |= r6
r5 += 1
r6 = buf[r5] # MOVB R6, BUF[R5]
r6 &= r4
r6 <<= 2
r2 |= r6
r5 += 1
r6 = buf[r5] # MOVB R6, BUF[R5]
r6 &= r4
r6 >>= 1
r2 |= r6
r5 += 1
r6 = buf[r5] # MOVB R6, BUF[R5]
r6 &= r4
r6 <<= 2
r2 |= r6
r5 += 1
r6 = buf[r5] # MOVB R6, BUF[R5]
r6 &= r4
r6 <<= 4
r2 |= r6
r5 += 1
r6 = buf[r5] # MOVB R6, BUF[R5]
r6 &= r4
r6 >>= 1
r2 |= r6
r5 += 1
r6 = buf[r5] # MOVB R6, BUF[R5]
r6 &= r4
r6 <<= 1
r2 |= r6
r11 = 64
r11 += r3
r10 = 144
r9 = 152
# Based on the value of r13, jump to different parts
r4 = r13
r4 += 7
r4 %= 8
# Execution continues at label 570
for kk in range(8):
r5 = r2
r6 = r11
r6 += r4
r6 = buf[r6] # MOVB R6, BUF[R6]
r7 = buf[r10] # MOVB R7, BUF[R10]
r8 = buf[r9] # MOVB R8, BUF[R9]
r5 ^= r6
r5 &= r7
r6 &= r8
r5 |= r6
buf[240] = r11
buf[240] += r4
buf[buf[240]] = r5 # MOVB BUF[BUF[240]], R5
buf[r11+r4] = r5
r10 += 1
r9 += 1
if kk == 0:
r4 = r13
r4 += 6
r4 %= 8
elif kk == 1:
r4 = r13
r4 += 2
r4 %= 8
elif kk == 2:
r4 = r13
r4 += 1
r4 %= 8
elif kk == 3:
r4 = r13
r4 += 5
r4 %= 8
elif kk == 4:
r4 = r13
r4 += 0
r4 %= 8
elif kk == 5:
r4 = r13
r4 += 3
r4 %= 8
elif kk == 6:
r4 = r13
r4 += 4
r4 %= 8
# Check if r13 == 7
if r13 < 7:
r0 += 1
r0 %= 16
# Swap bytes in a range of memory
r4 = 64
r4 += r3
r5 = 72
r5 += r3
r13 = 0
while r13 < 8:
r6 = buf[r4] # MOVB R6, BUF[R4]
r7 = buf[r5] # MOVB R7, BUF[R5]
buf[240] = r4
buf[buf[240]] = r7 # MOVB BUF[BUF[240]], R7
buf[r4] = r7
buf[240] = r5
buf[buf[240]] = r6 # MOVB BUF[BUF[240]], R6
buf[r5] = r6
r4 += 1
r5 += 1
r13 += 1
# r12 += 1
# if r12 == 16:
# # End inner loop, move to next r3
# continue
# Another swap operation
r4 = 64
r4 += r3
r5 = 72
r5 += r3
r13 = 0
while r13 < 8:
r6 = buf[r4] # MOVB R6, BUF[R4]
r7 = buf[r5] # MOVB R7, BUF[R5]
buf[240] = r4
buf[buf[240]] = r7 # MOVB BUF[BUF[240]], R7
buf[r4] = r7
buf[240] = r5
buf[buf[240]] = r6 # MOVB BUF[BUF[240]], R6
buf[r5] = r6
r4 += 1
r5 += 1
r13 += 1
# Move data around in memory
r13 = 0
while r13 < 16:
r4 = r13
r4 += 9
r4 %= 16
r5 = 192
r5 += r13
r6 = 208
r6 += r4
r4 = buf[r5] # MOVB R4, BUF[R5]
buf[240] = r6
buf[buf[240]] = r4 # MOVB BUF[BUF[240]], R4
buf[r6] = r4
r13 += 1
# More memory operations
r13 = 0
while r13 < 16:
r4 = 192
r4 += r13
r5 = 208
r5 += r13
r6 = buf[r5] # MOVB R6, BUF[R5]
buf[240] = r4
buf[buf[240]] = r6 # MOVB BUF[BUF[240]], R6
buf[r4] = r6
r13 += 1
So the overall logic is to transform the input in chunks of 16 bytes and then compare the output to a known transformed value
682: DA46069A: CMP BUF[64] vs i220|
683: 57F2FCF0: JMP IF (flags & i2) -> jmp R14|796|
684: 17B3BDB7: JMP IF (flags & i4) -> jmp R14|796|
685: 20BCFDD3: CMP BUF[65] vs i111|
686: E5404E42: JMP IF (flags & i2) -> jmp R14|796|
687: FE5A545E: JMP IF (flags & i4) -> jmp R14|796|
688: C05D1F0B: CMP BUF[66] vs i86|
689: 06A3ADA1: JMP IF (flags & i2) -> jmp R14|796|
690: 55F1FFF5: JMP IF (flags & i4) -> jmp R14|796|
Without looking at the structure of the transformations I symbolized everything and let the z3 script run overnight.
So then I manually started to analyze and rename parts of the code, with removing parts of the code and registers - the manual decompiler pipeline. I also fixed up the pintool to yield the transformed values and I built a db of known transformations - input to output from the binary
I just fixed the cmp
instruction in the pintool to not fail any byte check for the transformed output based on the value of the vip
register.
void count_eq(CONTEXT *ctxt)
{
ADDRINT ecx = PIN_GetContextReg(ctxt, REG_RCX);
ADDRINT esi = PIN_GetContextReg(ctxt, REG_RSI);
if (fix_check) {
outFile << "CMP " << "ECX: 0x" << std::hex << ecx << " | ESI: 0x" << esi << std::endl;
ADDRINT regval;
PIN_GetContextRegval(ctxt, REG_EFLAGS, reinterpret_cast<UINT8 *>(®val));
regval = regval | (1 << 6);
PIN_SetContextRegval(ctxt, REG_EFLAGS, reinterpret_cast<UINT8 *>(®val));
PIN_ExecuteAt(ctxt);
}
}
void dump_ins(CONTEXT *ctxt)
{
ADDRINT rax = PIN_GetContextReg(ctxt, REG_RAX);
if (rax >= 682 && rax <= 775) {
fix_check = TRUE;
} else {
fix_check = FALSE;
}
}
// usage
if (INS_Address(ins) == l + 0x11798)
{
INS_InsertCall(ins, IPOINT_AFTER, (AFUNPTR)count_eq, IARG_CONTEXT, IARG_END);
}
if (INS_Address(ins) == l + 0x11B85)
{
INS_InsertCall(ins, IPOINT_BEFORE, (AFUNPTR)dump_ins, IARG_CONST_CONTEXT, IARG_END);
}
Before I got to the point of the disassembly I thought that maybe I could side channel it - by counting the cmp
instruction. But after the decompilation I could see diffusion, permutations in a way that a single bit reflected in all of the output bytes.
I also lifted the code to LLVM IR and fed it to KLEE to see if I could solve it - but that also did not pan out very well.
So the only option left was to analyze the transformation and optimize the code to get to a more readable state - this is where I spent most of my time.
Eventually coming to
def feistel_transform(input_str, second_round=False):
s_box = [199, 242, 126, 169, 227, 219, 176, 4, 44, 109, 49, 26, 150, 79, 88, 133]
round_keys = [70, 252, 99, 237, 166, 65, 135, 113, 220, 141, 64, 156, 218, 97, 218, 26]
if second_round:
round_keys = round_keys[7:] + round_keys[:7]
assert len(input_str) == 16
byte_data = list(bytearray(input_str, "utf-8"))
left_half, right_half = byte_data[:8], byte_data[8:]
round_idx = 0
for _ in range(16):
current_idx = round_idx
for nibble_idx in range(8):
high_nibble = (right_half[nibble_idx] & 0xF0) >> 4
low_nibble = right_half[nibble_idx] & 0x0F
swap_nibbles = round_keys[current_idx] & (1 << nibble_idx)
transformed_high = s_box[high_nibble]
transformed_low = s_box[low_nibble]
if swap_nibbles != 0:
transformed_high &= 0x0F
transformed_high <<= 4
transformed_low &= 0xF0
transformed_low >>= 4
else:
transformed_high &= 0xF0
transformed_low &= 0x0F
round_output = transformed_high | transformed_low
round_output ^= round_keys[round_idx]
permutation_map = [7, 6, 2, 1, 5, 0, 3, 4]
bit_swap_rules = [
(0, 1), (1, 0), (2, 6), (3, 5), (4, 3), (5, 7), (6, 2), (7, 4), ]
swap_dict = {dest: src for src, dest in bit_swap_rules}
for perm_idx in range(8):
target_idx = (nibble_idx + permutation_map[perm_idx]) % 8
left_half[target_idx] ^= (
(round_output & (1 << swap_dict[7 - perm_idx]))
>> swap_dict[7 - perm_idx]
) << (7 - perm_idx)
if nibble_idx < 7:
round_idx = (round_idx + 1) % 16
left_half, right_half = right_half, left_half
left_half, right_half = right_half, left_half
return left_half + right_half
Yes. Its a feistel like construction - I optimized the byte swaps and operations to halfs. I also looked at the bit permutation and how each byte of right half was tranformed, permuted and xored to bits of the left half. I did a lot of reading in the meanwhile trying to guess if this is a known algo.
Once I knew its feistel I could invert it - we just had to invert the order of the round_keys
is what I called.
def i_feistel_transform(input, second_round=False):
s_box = [199, 242, 126, 169, 227, 219, 176, 4, 44, 109, 49, 26, 150, 79, 88, 133]
round_keys = [ 70, 252, 99, 237, 166, 65, 135, 113, 220, 141, 64, 156, 218, 97, 218, 26]
if second_round:
round_keys = round_keys[7:] + round_keys[:7]
assert len(input) == 16
byte_data = list(input)
left_half, right_half = byte_data[:8], byte_data[8:]
round_idx = 0
for xx in range(16):
current_idx = [9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 5, 14, 7, 0][xx]
for nibble_idx in range(7, -1, -1):
high_nibble = (right_half[nibble_idx] & 0xF0) >> 4
low_nibble = right_half[nibble_idx] & 0x0F
swap_nibbles = round_keys[current_idx] & (1 << nibble_idx)
transformed_high = s_box[high_nibble]
transformed_low = s_box[low_nibble]
if swap_nibbles != 0:
transformed_high &= 0x0F
transformed_high <<= 4
transformed_low &= 0xF0
transformed_low >>= 4
else:
transformed_high &= 0xF0
transformed_low &= 0x0F
round_output = transformed_high | transformed_low
round_output ^= round_keys[round_idx]
permutation_map = [7, 6, 2, 1, 5, 0, 3, 4]
bit_swap_rules = [
(0, 1), (1, 0), (2, 6), (3, 5), (4, 3), (5, 7), (6, 2), (7, 4), ]
swap_dict = {dest: src for src, dest in bit_swap_rules}
for perm_idx in range(7, -1, -1):
target_idx = (nibble_idx + permutation_map[perm_idx]) % 8
left_half[target_idx] ^= (
(round_output & (1 << swap_dict[7 - perm_idx]))
>> swap_dict[7 - perm_idx]
) << (7 - perm_idx)
if nibble_idx != 0:
round_idx = (round_idx - 1) % 16
left_half, right_half = right_half, left_half
left_half, right_half = right_half, left_half
return left_half + right_half
if __name__ == "__main__":
expected = [0xDC,0x6F,0x56,0xC2,0xD3,0x4E,0x50,0x77,0x69,0xB2,0x4D,0xE9,0x8F,0xF5,0x2D,0x12,0x2A,0x95,0x81,0xE5,0x35,0xDB,0x8A,0x10,0x14,0xFF,0xA7,0x4,0xD0,0xC1,0xE5,0x74]
print(
"".join(
map(
chr, i_feistel_transform(expected[:16])
+ i_feistel_transform(expected[16:32], True), )
)
)
Which gave us the flag
()v3Ng34nce_I5_4n_1D10T5_g4m3_()