lua的llvm的反混淆与还原

Posted 大叔学算法

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了lua的llvm的反混淆与还原相关的知识,希望对你有一定的参考价值。

最近接触到个lua的加密方式,以前的lua的防护一般也就luac,或者luajit编码后就算是做好防护了,最多就是在编码文件的基础上用什么算法给文件加密下,但是这些呢其实都有对应的反编译软件对其进行反编译,效果还行,够你看了。尤其是luac,如果编码的时候没有去掉debug信息的话,基本上反编译的效果非常ok。再高个等级的,把luachunk结构给改了,或者把opcode给改了。这样反编译就需要把opcode的顺序给搞定了,才可以进行反编译。这种加密方式已经很ok了,会让人花大量的时间去找opcode了(当然其实也没那么久了。其实opcode的顺序也就几十万种吧(具体没算过最多几百万种),那么开发个工具,循环遍历下就是,几百万种可能对计算机而言也是很快的。或者有些文件是公用库了,网上一堆源码,对其编码,解析下就可以找到opcode的顺序了。这些所谓的加密什么的都是有有个固定文件,而这固定文件又有些格式还是比较好搞定的。最近遇到一种变态的,具体是什么说法咱也叫不上来,不是专业的。具体实现形式粗略说下。明文--------luac编码后-----软件解析编码后的文件得到chunk信息------改掉chunk信息,随机生成自己的vm engine 适应这个vm engine-----把修改后的chunk信息保存为二进制数据,对这个二进制数据随机加入混淆信息什么的------vm engine进行混淆,载入二进制数据-------vm engine 解析二进制数据,得到chunk信息,动态加入执行。说了这么多,还是来看个标准的再说。好了再去理解那种混淆,动态随机什么的。

local advanced_debuglocal newEnvlocal customErrorHandler
local lua_opcode_types = { "ABC", "ABx", "ABC", "ABC", "ABC", "ABx", "ABC", "ABx", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "AsBx", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "AsBx", "AsBx", "ABC", "ABC", "ABC", "ABx", "ABC",}
local lua_opcode_names = { "MOVE", "LOADK", "LOADBOOL", "LOADNIL", "GETUPVAL", "GETGLOBAL", "GETTABLE", "SETGLOBAL", "SETUPVAL", "SETTABLE", "NEWTABLE", "SELF", "ADD", "SUB", "MUL", "DIV", "MOD", "POW", "UNM", "NOT", "LEN", "CONCAT", "JMP", "EQ", "LT", "LE", "TEST", "TESTSET", "CALL", "TAILCALL", "RETURN", "FORLOOP", "FORPREP", "TFORLOOP", "SETLIST", "CLOSE", "CLOSURE", "VARARG"};
--[[local lua_opcode_numbers = {};for number, name in next, lua_opcode_names do lua_opcode_numbers[name] = number;end--]]
--- Extract bits from an integer--@author: Stravantlocal function get_bits(input, n, n2) --功能得到数字input第2到n2个二进制数值 if n2 then local total = 0 local digitn = 0 for i = n, n2 do total = total + 2^digitn*get_bits(input, i) digitn = digitn + 1 end return total else local pn = 2^(n-1) return (input % (pn + pn) >= pn) and 1 or 0 endend
local function decode_bytecode(bytecode) --解密chunk信息,这里大有可为。 local index = 1 local big_endian = false local int_size; local size_t;
-- Actual binary decoding functions. Dependant on the bytecode. local get_int, get_size_t;
-- Binary decoding helper functions local get_int8, get_int32, get_int64, get_float64, get_string; do function get_int8() local a = bytecode:byte(index, index); index = index + 1 return a end function get_int32() --得到int32 local a, b, c, d = bytecode:byte(index, index + 3); index = index + 4; return d*16777216 + c*65536 + b*256 + a end function get_int64() --得到int64 local a = get_int32(); local b = get_int32(); return b*4294967296 + a; end function get_float64() --得到float64 単精度数值了 local a = get_int32() local b = get_int32() return (-2*get_bits(b, 32)+1)*(2^(get_bits(b, 21, 31)-1023))* ((get_bits(b, 1, 20)*(2^32) + a)/(2^52)+1) end function get_string(len) --得到字符串 local str; if len then str = bytecode:sub(index, index + len - 1); index = index + len; else len = get_size_t(); if len == 0 then return; end str = bytecode:sub(index, index + len - 1); index = index + len; end return str; end end-------------以上那些得到什么信息的方法,都是标准的,如果你心里够变态,就把他全给改了。 local function decode_chunk() ----解析chunk信息 一般chunk信息包含 -- name nupval nparam isvararg maxStack --instructions 包含code信息 --constants 包含文本,变量信息什么的 --functionPrototypes 子函数信息 --upvalues 为空 --loca 为空 --debug Lines debug local chunk; local instructions = {}; local constants = {}; local prototypes = {}; local debug = { lines = {}; };
chunk = { instructions = instructions; constants = constants; prototypes = prototypes; debug = debug; };
local num;
chunk.name = get_string();-- 得到 function的name,基本为空 chunk.first_line = get_int(); -- First line chunk.last_line = get_int(); -- Last line
if chunk.name then chunk.name = chunk.name:sub(1, -2); end
chunk.upvalues = get_int8(); chunk.arguments = get_int8(); chunk.varg = get_int8(); chunk.stack = get_int8(); ---以上读取信息是标准的流程,部分信息也不是必须的,可删,可改顺序。
-- TODO: realign lists to 1 -- 解析 instructions do num = get_int(); for i = 1, num do local instruction = { -- opcode = opcode number; -- type = [ABC, ABx, AsBx] -- A, B, C, Bx, or sBx depending on type };
local data = get_int32(); local opcode = get_bits(data, 1, 6); local type = lua_opcode_types[opcode + 1];
instruction.opcode = opcode; instruction.type = type;
instruction.A = get_bits(data, 7, 14); if type == "ABC" then instruction.B = get_bits(data, 24, 32); instruction.C = get_bits(data, 15, 23); elseif type == "ABx" then instruction.Bx = get_bits(data, 15, 32); elseif type == "AsBx" then instruction.sBx = get_bits(data, 15, 32) - 131071; end
instructions[i] = instruction; end end
-- 解析 constants do num = get_int(); for i = 1, num do local constant = { -- type = constant type; -- data = constant data; }; local type = get_int8(); constant.type = type;
if type == 1 then constant.data = (get_int8() ~= 0); elseif type == 3 then constant.data = get_float64(); elseif type == 4 then constant.data = get_string():sub(1, -2); end
constants[i-1] = constant; end end
-- 解析 Prototypes,就是内部子函数的信息。 do num = get_int(); for i = 1, num do prototypes[i-1] = decode_chunk(); end end
--解析 debug info -- 以下不是所有的都是必须的。可少。对别人的反编译造成难度。 do -- line numbers local data = debug.lines num = get_int(); for i = 1, num do data[i] = get_int32(); end
-- locals 变量,最好为空 num = get_int(); for i = 1, num do get_string():sub(1, -2); -- local name get_int32(); -- local start PC get_int32(); -- local end PC end
-- upvalues 最好为空 num = get_int(); for i = 1, num do get_string(); -- upvalue name end end
return chunk; end
-- 核实bytecode header do assert(get_string(4) == "\27Lua", "Lua bytecode expected."); assert(get_int8() == 0x51, "Only Lua 5.1 is supported."); get_int8(); -- Oficial bytecode big_endian = (get_int8() == 0); int_size = get_int8(); size_t = get_int8();
if int_size == 4 then get_int = get_int32; elseif int_size == 8 then get_int = get_int64; else -- TODO: refactor errors into table error("Unsupported bytecode target platform"); end
if size_t == 4 then get_size_t = get_int32; elseif size_t == 8 then get_size_t = get_int64; else error("Unsupported bytecode target platform"); end
assert(get_string(3) == "\4\8\0", "Unsupported bytecode target platform"); end
return decode_chunk();end
local function handle_return(...) local c = select("#", ...) local t = {...} return c, tend
local function create_wrapper(cache, upvalues) local instructions = cache.instructions; local constants = cache.constants; local prototypes = cache.prototypes;
local stack, top local environment local IP = 1; -- instruction pointer local vararg, vararg_size
local opcode_funcs = { --标准的opcode顺序,可改。 [0] = function(instruction) -- MOVE stack[instruction.A] = stack[instruction.B]; end, [1] = function(instruction) -- LOADK stack[instruction.A] = constants[instruction.Bx].data; end, [2] = function(instruction) -- LOADBOOL stack[instruction.A] = instruction.B ~= 0 if instruction.C ~= 0 then IP = IP + 1 end end, [3] = function(instruction) -- LOADNIL local stack = stack for i = instruction.A, instruction.B do stack[i] = nil end end, [4] = function(instruction) -- GETUPVAL stack[instruction.A] = upvalues[instruction.B] end, [5] = function(instruction) -- GETGLOBAL local key = constants[instruction.Bx].data; stack[instruction.A] = environment[key]; end, [6] = function(instruction) -- GETTABLE local C = instruction.C local stack = stack C = C > 255 and constants[C-256].data or stack[C] stack[instruction.A] = stack[instruction.B][C]; end, [7] = function(instruction) -- SETGLOBAL local key = constants[instruction.Bx].data; environment[key] = stack[instruction.A]; end, [8] = function (instruction) -- SETUPVAL upvalues[instruction.B] = stack[instruction.A] end, [9] = function (instruction) -- SETTABLE local B = instruction.B; local C = instruction.C; local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B]; C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A][B] = C end, [10] = function (instruction) -- NEWTABLE stack[instruction.A] = {} end, [11] = function (instruction) -- SELF local A = instruction.A local B = instruction.B local C = instruction.C local stack = stack
B = stack[B] C = C > 255 and constants[C-256].data or stack[C]
stack[A+1] = B stack[A] = B[C] end, [12] = function(instruction) -- ADD local B = instruction.B; local C = instruction.C; local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B]; C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B+C; end, [13] = function(instruction) -- SUB local B = instruction.B; local C = instruction.C; local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B]; C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B - C; end, [14] = function(instruction) -- MUL local B = instruction.B; local C = instruction.C; local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B]; C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B * C; end, [15] = function(instruction) --DIV local B = instruction.B; local C = instruction.C; local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B]; C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B / C; end, [16] = function(instruction) -- MOD local B = instruction.B; local C = instruction.C; local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B]; C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B % C; end, [17] = function(instruction) -- POW local B = instruction.B; local C = instruction.C; local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B]; C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B ^ C; end, [18] = function(instruction) -- UNM stack[instruction.A] = -stack[instruction.B] end, [19] = function(instruction) -- NOT stack[instruction.A] = not stack[instruction.B] end, [20] = function(instruction) -- LEN stack[instruction.A] = #stack[instruction.B] end, [21] = function(instruction) -- CONCAT local B = instruction.B local result = stack[B] for i = B+1, instruction.C do result = result .. stack[i] end stack[instruction.A] = result end, [22] = function(instruction) -- JUMP IP = IP + instruction.sBx end, [23] = function(instruction) -- EQ local A = instruction.A local B = instruction.B local C = instruction.C local stack, constants = stack, constants
A = A ~= 0 B = B > 255 and constants[B-256].data or stack[B] C = C > 255 and constants[C-256].data or stack[C] if (B == C) ~= A then IP = IP + 1 end end, [24] = function(instruction) -- LT local A = instruction.A local B = instruction.B local C = instruction.C local stack, constants = stack, constants
A = A ~= 0 B = B > 255 and constants[B-256].data or stack[B] C = C > 255 and constants[C-256].data or stack[C] if (B < C) ~= A then IP = IP + 1 end end, [25] = function(instruction) -- LT local A = instruction.A local B = instruction.B local C = instruction.C local stack, constants = stack, constants
A = A ~= 0 B = B > 255 and constants[B-256].data or stack[B] C = C > 255 and constants[C-256].data or stack[C] if (B <= C) ~= A then IP = IP + 1 end end, [26] = function(instruction) -- TEST if (not not stack[instruction.A]) == (instruction.C == 0) then IP = IP + 1 end end, [27] = function(instruction) -- TESTSET local stack = stack local B = stack[instruction.B] if (not not B) == (instruction.C == 0) then IP = IP + 1 else stack[instruction.A] = B end end, [28] = function(instruction) -- CALL local A = instruction.A; local B = instruction.B; local C = instruction.C; local stack = stack; local args, results; local limit, loop
args = {}; if B ~= 1 then if B ~= 0 then limit = A+B-1; else limit = top end
loop = 0 for i = A+1, limit do loop = loop + 1 args[loop] = stack[i]; end
limit, results = handle_return(stack[A](unpack(args, 1, limit-A))) else limit, results = handle_return(stack[A]()) end
top = A - 1
if C ~= 1 then if C ~= 0 then limit = A+C-2; else limit = limit+A end
loop = 0; for i = A, limit do loop = loop + 1; stack[i] = results[loop]; end end end, [29] = function (instruction) -- TAILCALL local A = instruction.A; local B = instruction.B; local C = instruction.C; local stack = stack; local args, results; local top, limit, loop = top
args = {}; if B ~= 1 then if B ~= 0 then limit = A+B-1; else limit = top end
loop = 0 for i = A+1, limit do loop = loop + 1 args[#args+1] = stack[i]; end
results = {stack[A](unpack(args, 1, limit-A))}; else results = {stack[A]()}; end
return true, results end, [30] = function(instruction) -- RETURN --TODO: CLOSE local A = instruction.A; local B = instruction.B; local stack = stack; local limit; local loop, output;
if B == 1 then return true; end if B == 0 then limit = top else limit = A + B - 2; end
output = {}; local loop = 0 for i = A, limit do loop = loop + 1 output[loop] = stack[i]; end return true, output; end, [31] = function(instruction) -- FORLOOP local A = instruction.A local stack = stack
local step = stack[A+2] local index = stack[A] + step stack[A] = index
if step > 0 then if index <= stack[A+1] then IP = IP + instruction.sBx stack[A+3] = index end else if index >= stack[A+1] then IP = IP + instruction.sBx stack[A+3] = index end end end, [32] = function(instruction) -- FORPREP local A = instruction.A local stack = stack
stack[A] = stack[A] - stack[A+2] IP = IP + instruction.sBx end, [33] = function(instruction) -- TFORLOOP local A = instruction.A local B = instruction.B local C = instruction.C local stack = stack
local offset = A+2 local result = {stack[A](stack[A+1], stack[A+2])} for i = 1, C do stack[offset+i] = result[i] end
if stack[A+3] ~= nil then stack[A+2] = stack[A+3] else IP = IP + 1 end end, [34] = function(instruction) -- SETLIST local A = instruction.A local B = instruction.B local C = instruction.C local stack = stack
if C == 0 then error("NYI: extended SETLIST") else local offset = (C - 1) * 50 local t = stack[A]
if B == 0 then B = top end for i = 1, B do t[offset+i] = stack[A+i] end end end, [35] = function(instruction) -- CLOSE --io.stderr:write("NYI: CLOSE") --io.stderr:flush() end, [36] = function(instruction) -- CLOSURE local proto = prototypes[instruction.Bx] local instructions = instructions local stack = stack
local indices = {} local new_upvals = setmetatable({}, { __index = function(t, k) local upval = indices[k] return upval.segment[upval.offset] end, __newindex = function(t, k, v) local upval = indices[k] upval.segment[upval.offset] = v end } ) for i = 1, proto.upvalues do local movement = instructions[IP] if movement.opcode == 0 then -- MOVE indices[i-1] = {segment = stack, offset = movement.B} elseif instructions[IP].opcode == 4 then -- GETUPVAL indices[i-1] = {segment = upvalues, offset = movement.B} end IP = IP + 1 end
local _, func = create_wrapper(proto, new_upvals) stack[instruction.A] = func end, [37] = function(instruction) -- VARARG local A = instruction.A local B = instruction.B local stack, vararg = stack, vararg
for i = A, A + (B > 0 and B - 1 or vararg_size) do stack[i] = vararg[i - A] end end, }
local function handle_error(b) local name = cache.name; local line = cache.debug.lines[IP]; local err = (b:match("^.+:(.+)") or b) local output = "Error: ";
if name then output = name end
if line then output = output.." - Line: "..line end
if b and type(b) == "string" then output = output.." - Error: ".. err end
if customErrorHandler then --coroutine.resume(customErrorHandler,tostring(line)..":"..tostring(err)) customErrorHandler(tostring(line)..":"..tostring(err)) else error(tostring(line)..":"..tostring(err),3) end end
local function loop() -- local instructions = instructions local instruction, a, b, ran
while true do instruction = instructions[IP]; --//print("IP: "..tostring(IP)) --//print("INSTUCT: "..tostring(instruction)) --//print("OPCODE: "..instruction.opcode) IP = IP + 1 ran, a, b = pcall(function() returnopcode_funcs[instruction.opcode](instruction); end); --动态调用chunk if not ran then --你要变态opcode_funcs这边再建个组改变opcode顺序 handle_error(a); break; elseif a then return b; end end end
local debugging = { get_stack = function() return stack; end; get_IP = function() return IP; end };
local function func(...) local local_stack = {}; local ghost_stack = {};
top = -1 stack = setmetatable(local_stack, { __index = ghost_stack; __newindex = function(t, k, v) if k > top and v then top = k end ghost_stack[k] = v end; }) local args = {...}; vararg = {} vararg_size = select("#", ...) - 1 for i = 0, vararg_size do local_stack[i] = args[i+1]; vararg[i] = args[i+1] end
environment = newEnv or getfenv(); IP = 1; local thread = coroutine.create(loop) local a, b = coroutine.resume(thread)
if a then if b then return unpack(b); end return; else if advanced_debug then --TODO advanced debugging else handle_error(b) end end end --这边可得到所有chunk信息,用处很大。 return debugging, func;end
return { load_bytecode = function(bytecode, env, customHandler) newEnv = env or getfenv(2) customErrorHandler = customHandler local cache = decode_bytecode(bytecode); local _, func = create_wrapper(cache); return func; end;
-- Utilities (Debug, Introspection, Testing, etc) utils = { decode_bytecode = decode_bytecode; create_wrapper = create_wrapper; debug_bytecode = function(bytecode) local cache = decode_bytecode(bytecode) return create_wrapper(cache); end; };}


decode_bytecode(bytecode);

Bytecode这个就是你的二进制信息,可改成其他形式。

看到这里,你是头都大了,蒙圈了,这边虽然有decode,但是他解出来的不是完整的字节数组,只是有了所有的chunk信息,对lua不够熟悉的话,就基本玩完。熟悉的话,到还好,可以根据chunk信息去生成luac文件。但是这里的还有个非常坑的地方在instructions这里。Instructions这里说白了就是存储了4个字节,这4个字节可解析成opcode,A,B,C,也可以解析成opcode,A,bx,也可解析成opcode,a,sbx。也就是虽然你可以dump出这4个字节,但是vm engine对这4个字节的解析跟标准的解析是不一样的,也就是说你需要vm engin解析出来的数据,去构成标准的。但是你又不知道到底是用opcode ,a,bx去构建,还是opcode,a,b,c去构建。当然你可以说我可以根据opcode去构建。当然是可以。但是opcode的顺序又乱了,你又的去找opcode的顺序。这种动态的看起来非常复杂,但是有源码,虽然混淆了,但是还是可以看看的,还是有希望还原的。时间和精力的问题。好了,就这样,难得写这么多,可能很多人也看不懂这个,或者看不到这篇文章吧。


以上是关于lua的llvm的反混淆与还原的主要内容,如果未能解决你的问题,请参考以下文章

如何用llvm-obfuscator混淆代码

Android 应用安全风险与防范

如何对 Lua 脚本进行去混淆处理?

LLVM代码及指令选择分析

SpringBoot 代码混淆,防止反编译代码泄露

Redis 分布式锁混淆结果与 Lua 脚本