lua的llvm的反混淆与还原
Posted 大叔学算法
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了lua的llvm的反混淆与还原相关的知识,希望对你有一定的参考价值。
最近接触到个lua的加密方式,以前的lua的防护一般也就luac,或者luajit编码后就算是做好防护了,最多就是在编码文件的基础上用什么算法给文件加密下,但是这些呢其实都有对应的反编译软件对其进行反编译,效果还行,够你看了。尤其是luac,如果编码的时候没有去掉debug信息的话,基本上反编译的效果非常ok。再高个等级的,把lua的chunk结构给改了,或者把opcode给改了。这样反编译就需要把opcode的顺序给搞定了,才可以进行反编译。这种加密方式已经很ok了,会让人花大量的时间去找opcode了(当然其实也没那么久了。其实opcode的顺序也就几十万种吧(具体没算过最多几百万种),那么开发个工具,循环遍历下就是,几百万种可能对计算机而言也是很快的。或者有些文件是公用库了,网上一堆源码,对其编码,解析下就可以找到opcode的顺序了。这些所谓的加密什么的都是有有个固定文件,而这固定文件又有些格式还是比较好搞定的。最近遇到一种变态的,具体是什么说法咱也叫不上来,不是专业的。具体实现形式粗略说下。明文--------luac编码后-----软件解析编码后的文件得到chunk信息------改掉chunk信息,随机生成自己的vm engine 适应这个vm engine-----把修改后的chunk信息保存为二进制数据,对这个二进制数据随机加入混淆信息什么的------对vm engine进行混淆,载入二进制数据-------vm engine 解析二进制数据,得到chunk信息,动态加入执行。说了这么多,还是来看个标准的再说。好了再去理解那种混淆,动态随机什么的。
local advanced_debug
local newEnv
local customErrorHandler
local lua_opcode_types = {
"ABC", "ABx", "ABC", "ABC",
"ABC", "ABx", "ABC", "ABx",
"ABC", "ABC", "ABC", "ABC",
"ABC", "ABC", "ABC", "ABC",
"ABC", "ABC", "ABC", "ABC",
"ABC", "ABC", "AsBx", "ABC",
"ABC", "ABC", "ABC", "ABC",
"ABC", "ABC", "ABC", "AsBx",
"AsBx", "ABC", "ABC", "ABC",
"ABx", "ABC",
}
local lua_opcode_names = {
"MOVE", "LOADK", "LOADBOOL", "LOADNIL",
"GETUPVAL", "GETGLOBAL", "GETTABLE", "SETGLOBAL",
"SETUPVAL", "SETTABLE", "NEWTABLE", "SELF",
"ADD", "SUB", "MUL", "DIV",
"MOD", "POW", "UNM", "NOT",
"LEN", "CONCAT", "JMP", "EQ",
"LT", "LE", "TEST", "TESTSET",
"CALL", "TAILCALL", "RETURN", "FORLOOP",
"FORPREP", "TFORLOOP", "SETLIST", "CLOSE",
"CLOSURE", "VARARG"
};
--[[
local lua_opcode_numbers = {};
for number, name in next, lua_opcode_names do
lua_opcode_numbers[name] = number;
end
--]]
--- Extract bits from an integer
--@author: Stravant
local function get_bits(input, n, n2) --功能得到数字input第2到n2个二进制数值
if n2 then
local total = 0
local digitn = 0
for i = n, n2 do
total = total + 2^digitn*get_bits(input, i)
digitn = digitn + 1
end
return total
else
local pn = 2^(n-1)
return (input % (pn + pn) >= pn) and 1 or 0
end
end
local function decode_bytecode(bytecode) --解密chunk信息,这里大有可为。
local index = 1
local big_endian = false
local int_size;
local size_t;
-- Actual binary decoding functions. Dependant on the bytecode.
local get_int, get_size_t;
-- Binary decoding helper functions
local get_int8, get_int32, get_int64, get_float64, get_string;
do
function get_int8()
local a = bytecode:byte(index, index);
index = index + 1
return a
end
function get_int32() --得到int32
local a, b, c, d = bytecode:byte(index, index + 3);
index = index + 4;
return d*16777216 + c*65536 + b*256 + a
end
function get_int64() --得到int64
local a = get_int32();
local b = get_int32();
return b*4294967296 + a;
end
function get_float64() --得到float64 単精度数值了
local a = get_int32()
local b = get_int32()
return (-2*get_bits(b, 32)+1)*(2^(get_bits(b, 21, 31)-1023))*
((get_bits(b, 1, 20)*(2^32) + a)/(2^52)+1)
end
function get_string(len) --得到字符串
local str;
if len then
str = bytecode:sub(index, index + len - 1);
index = index + len;
else
len = get_size_t();
if len == 0 then return; end
str = bytecode:sub(index, index + len - 1);
index = index + len;
end
return str;
end
end
-------------以上那些得到什么信息的方法,都是标准的,如果你心里够变态,就把他全给改了。
local function decode_chunk() ----解析chunk信息
一般chunk信息包含
-- name nupval nparam isvararg maxStack
--instructions 包含code信息
--constants 包含文本,变量信息什么的
--functionPrototypes 子函数信息
--upvalues 为空
--loca 为空
--debug Lines debug
local chunk;
local instructions = {};
local constants = {};
local prototypes = {};
local debug = {
lines = {};
};
chunk = {
instructions = instructions;
constants = constants;
prototypes = prototypes;
debug = debug;
};
local num;
chunk.name = get_string();-- 得到 function的name,基本为空
chunk.first_line = get_int(); -- First line
chunk.last_line = get_int(); -- Last line
if chunk.name then chunk.name = chunk.name:sub(1, -2); end
chunk.upvalues = get_int8();
chunk.arguments = get_int8();
chunk.varg = get_int8();
chunk.stack = get_int8();
---以上读取信息是标准的流程,部分信息也不是必须的,可删,可改顺序。
-- TODO: realign lists to 1
-- 解析 instructions
do
num = get_int();
for i = 1, num do
local instruction = {
-- opcode = opcode number;
-- type = [ABC, ABx, AsBx]
-- A, B, C, Bx, or sBx depending on type
};
local data = get_int32();
local opcode = get_bits(data, 1, 6);
local type = lua_opcode_types[opcode + 1];
instruction.opcode = opcode;
instruction.type = type;
instruction.A = get_bits(data, 7, 14);
if type == "ABC" then
instruction.B = get_bits(data, 24, 32);
instruction.C = get_bits(data, 15, 23);
elseif type == "ABx" then
instruction.Bx = get_bits(data, 15, 32);
elseif type == "AsBx" then
instruction.sBx = get_bits(data, 15, 32) - 131071;
end
instructions[i] = instruction;
end
end
-- 解析 constants
do
num = get_int();
for i = 1, num do
local constant = {
-- type = constant type;
-- data = constant data;
};
local type = get_int8();
constant.type = type;
if type == 1 then
constant.data = (get_int8() ~= 0);
elseif type == 3 then
constant.data = get_float64();
elseif type == 4 then
constant.data = get_string():sub(1, -2);
end
constants[i-1] = constant;
end
end
-- 解析 Prototypes,就是内部子函数的信息。
do
num = get_int();
for i = 1, num do
prototypes[i-1] = decode_chunk();
end
end
--解析 debug info
-- 以下不是所有的都是必须的。可少。对别人的反编译造成难度。
do
-- line numbers
local data = debug.lines
num = get_int();
for i = 1, num do
data[i] = get_int32();
end
-- locals 变量,最好为空
num = get_int();
for i = 1, num do
get_string():sub(1, -2); -- local name
get_int32(); -- local start PC
get_int32(); -- local end PC
end
-- upvalues 最好为空
num = get_int();
for i = 1, num do
get_string(); -- upvalue name
end
end
return chunk;
end
-- 核实bytecode header
do
assert(get_string(4) == "\27Lua", "Lua bytecode expected.");
assert(get_int8() == 0x51, "Only Lua 5.1 is supported.");
get_int8(); -- Oficial bytecode
big_endian = (get_int8() == 0);
int_size = get_int8();
size_t = get_int8();
if int_size == 4 then
get_int = get_int32;
elseif int_size == 8 then
get_int = get_int64;
else
-- TODO: refactor errors into table
error("Unsupported bytecode target platform");
end
if size_t == 4 then
get_size_t = get_int32;
elseif size_t == 8 then
get_size_t = get_int64;
else
error("Unsupported bytecode target platform");
end
assert(get_string(3) == "\4\8\0",
"Unsupported bytecode target platform");
end
return decode_chunk();
end
local function handle_return(...)
local c = select("#", ...)
local t = {...}
return c, t
end
local function create_wrapper(cache, upvalues)
local instructions = cache.instructions;
local constants = cache.constants;
local prototypes = cache.prototypes;
local stack, top
local environment
local IP = 1; -- instruction pointer
local vararg, vararg_size
local opcode_funcs = { --标准的opcode顺序,可改。
[0] = function(instruction) -- MOVE
stack[instruction.A] = stack[instruction.B];
end,
[1] = function(instruction) -- LOADK
stack[instruction.A] = constants[instruction.Bx].data;
end,
[2] = function(instruction) -- LOADBOOL
stack[instruction.A] = instruction.B ~= 0
if instruction.C ~= 0 then
IP = IP + 1
end
end,
[3] = function(instruction) -- LOADNIL
local stack = stack
for i = instruction.A, instruction.B do
stack[i] = nil
end
end,
[4] = function(instruction) -- GETUPVAL
stack[instruction.A] = upvalues[instruction.B]
end,
[5] = function(instruction) -- GETGLOBAL
local key = constants[instruction.Bx].data;
stack[instruction.A] = environment[key];
end,
[6] = function(instruction) -- GETTABLE
local C = instruction.C
local stack = stack
C = C > 255 and constants[C-256].data or stack[C]
stack[instruction.A] = stack[instruction.B][C];
end,
[7] = function(instruction) -- SETGLOBAL
local key = constants[instruction.Bx].data;
environment[key] = stack[instruction.A];
end,
[8] = function (instruction) -- SETUPVAL
upvalues[instruction.B] = stack[instruction.A]
end,
[9] = function (instruction) -- SETTABLE
local B = instruction.B;
local C = instruction.C;
local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B];
C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A][B] = C
end,
[10] = function (instruction) -- NEWTABLE
stack[instruction.A] = {}
end,
[11] = function (instruction) -- SELF
local A = instruction.A
local B = instruction.B
local C = instruction.C
local stack = stack
B = stack[B]
C = C > 255 and constants[C-256].data or stack[C]
stack[A+1] = B
stack[A] = B[C]
end,
[12] = function(instruction) -- ADD
local B = instruction.B;
local C = instruction.C;
local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B];
C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B+C;
end,
[13] = function(instruction) -- SUB
local B = instruction.B;
local C = instruction.C;
local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B];
C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B - C;
end,
[14] = function(instruction) -- MUL
local B = instruction.B;
local C = instruction.C;
local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B];
C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B * C;
end,
[15] = function(instruction) --DIV
local B = instruction.B;
local C = instruction.C;
local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B];
C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B / C;
end,
[16] = function(instruction) -- MOD
local B = instruction.B;
local C = instruction.C;
local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B];
C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B % C;
end,
[17] = function(instruction) -- POW
local B = instruction.B;
local C = instruction.C;
local stack, constants = stack, constants;
B = B > 255 and constants[B-256].data or stack[B];
C = C > 255 and constants[C-256].data or stack[C];
stack[instruction.A] = B ^ C;
end,
[18] = function(instruction) -- UNM
stack[instruction.A] = -stack[instruction.B]
end,
[19] = function(instruction) -- NOT
stack[instruction.A] = not stack[instruction.B]
end,
[20] = function(instruction) -- LEN
stack[instruction.A] = #stack[instruction.B]
end,
[21] = function(instruction) -- CONCAT
local B = instruction.B
local result = stack[B]
for i = B+1, instruction.C do
result = result .. stack[i]
end
stack[instruction.A] = result
end,
[22] = function(instruction) -- JUMP
IP = IP + instruction.sBx
end,
[23] = function(instruction) -- EQ
local A = instruction.A
local B = instruction.B
local C = instruction.C
local stack, constants = stack, constants
A = A ~= 0
B = B > 255 and constants[B-256].data or stack[B]
C = C > 255 and constants[C-256].data or stack[C]
if (B == C) ~= A then
IP = IP + 1
end
end,
[24] = function(instruction) -- LT
local A = instruction.A
local B = instruction.B
local C = instruction.C
local stack, constants = stack, constants
A = A ~= 0
B = B > 255 and constants[B-256].data or stack[B]
C = C > 255 and constants[C-256].data or stack[C]
if (B < C) ~= A then
IP = IP + 1
end
end,
[25] = function(instruction) -- LT
local A = instruction.A
local B = instruction.B
local C = instruction.C
local stack, constants = stack, constants
A = A ~= 0
B = B > 255 and constants[B-256].data or stack[B]
C = C > 255 and constants[C-256].data or stack[C]
if (B <= C) ~= A then
IP = IP + 1
end
end,
[26] = function(instruction) -- TEST
if (not not stack[instruction.A]) == (instruction.C == 0) then
IP = IP + 1
end
end,
[27] = function(instruction) -- TESTSET
local stack = stack
local B = stack[instruction.B]
if (not not B) == (instruction.C == 0) then
IP = IP + 1
else
stack[instruction.A] = B
end
end,
[28] = function(instruction) -- CALL
local A = instruction.A;
local B = instruction.B;
local C = instruction.C;
local stack = stack;
local args, results;
local limit, loop
args = {};
if B ~= 1 then
if B ~= 0 then
limit = A+B-1;
else
limit = top
end
loop = 0
for i = A+1, limit do
loop = loop + 1
args[loop] = stack[i];
end
limit, results = handle_return(stack[A](unpack(args, 1, limit-A)))
else
limit, results = handle_return(stack[A]())
end
top = A - 1
if C ~= 1 then
if C ~= 0 then
limit = A+C-2;
else
limit = limit+A
end
loop = 0;
for i = A, limit do
loop = loop + 1;
stack[i] = results[loop];
end
end
end,
[29] = function (instruction) -- TAILCALL
local A = instruction.A;
local B = instruction.B;
local C = instruction.C;
local stack = stack;
local args, results;
local top, limit, loop = top
args = {};
if B ~= 1 then
if B ~= 0 then
limit = A+B-1;
else
limit = top
end
loop = 0
for i = A+1, limit do
loop = loop + 1
args[#args+1] = stack[i];
end
results = {stack[A](unpack(args, 1, limit-A))};
else
results = {stack[A]()};
end
return true, results
end,
[30] = function(instruction) -- RETURN
--TODO: CLOSE
local A = instruction.A;
local B = instruction.B;
local stack = stack;
local limit;
local loop, output;
if B == 1 then
return true;
end
if B == 0 then
limit = top
else
limit = A + B - 2;
end
output = {};
local loop = 0
for i = A, limit do
loop = loop + 1
output[loop] = stack[i];
end
return true, output;
end,
[31] = function(instruction) -- FORLOOP
local A = instruction.A
local stack = stack
local step = stack[A+2]
local index = stack[A] + step
stack[A] = index
if step > 0 then
if index <= stack[A+1] then
IP = IP + instruction.sBx
stack[A+3] = index
end
else
if index >= stack[A+1] then
IP = IP + instruction.sBx
stack[A+3] = index
end
end
end,
[32] = function(instruction) -- FORPREP
local A = instruction.A
local stack = stack
stack[A] = stack[A] - stack[A+2]
IP = IP + instruction.sBx
end,
[33] = function(instruction) -- TFORLOOP
local A = instruction.A
local B = instruction.B
local C = instruction.C
local stack = stack
local offset = A+2
local result = {stack[A](stack[A+1], stack[A+2])}
for i = 1, C do
stack[offset+i] = result[i]
end
if stack[A+3] ~= nil then
stack[A+2] = stack[A+3]
else
IP = IP + 1
end
end,
[34] = function(instruction) -- SETLIST
local A = instruction.A
local B = instruction.B
local C = instruction.C
local stack = stack
if C == 0 then
error("NYI: extended SETLIST")
else
local offset = (C - 1) * 50
local t = stack[A]
if B == 0 then
B = top
end
for i = 1, B do
t[offset+i] = stack[A+i]
end
end
end,
[35] = function(instruction) -- CLOSE
--io.stderr:write("NYI: CLOSE")
--io.stderr:flush()
end,
[36] = function(instruction) -- CLOSURE
local proto = prototypes[instruction.Bx]
local instructions = instructions
local stack = stack
local indices = {}
local new_upvals = setmetatable({},
{
__index = function(t, k)
local upval = indices[k]
return upval.segment[upval.offset]
end,
__newindex = function(t, k, v)
local upval = indices[k]
upval.segment[upval.offset] = v
end
}
)
for i = 1, proto.upvalues do
local movement = instructions[IP]
if movement.opcode == 0 then -- MOVE
indices[i-1] = {segment = stack, offset = movement.B}
elseif instructions[IP].opcode == 4 then -- GETUPVAL
indices[i-1] = {segment = upvalues, offset = movement.B}
end
IP = IP + 1
end
local _, func = create_wrapper(proto, new_upvals)
stack[instruction.A] = func
end,
[37] = function(instruction) -- VARARG
local A = instruction.A
local B = instruction.B
local stack, vararg = stack, vararg
for i = A, A + (B > 0 and B - 1 or vararg_size) do
stack[i] = vararg[i - A]
end
end,
}
local function handle_error(b)
local name = cache.name;
local line = cache.debug.lines[IP];
local err = (b:match("^.+:(.+)") or b)
local output = "Error: ";
if name then
output = name
end
if line then
output = output.." - Line: "..line
end
if b and type(b) == "string" then
output = output.." - Error: ".. err
end
if customErrorHandler then
--coroutine.resume(customErrorHandler,tostring(line)..":"..tostring(err))
customErrorHandler(tostring(line)..":"..tostring(err))
else
error(tostring(line)..":"..tostring(err),3)
end
end
local function loop() --
local instructions = instructions
local instruction, a, b, ran
while true do
instruction = instructions[IP];
--//print("IP: "..tostring(IP))
--//print("INSTUCT: "..tostring(instruction))
--//print("OPCODE: "..instruction.opcode)
IP = IP + 1
ran, a, b = pcall(function() return
opcode_funcs[instruction.opcode](instruction); end); --动态调用chunk
if not ran then --你要变态opcode_funcs这边再建个组改变opcode顺序
handle_error(a);
break;
elseif a then
return b;
end
end
end
local debugging = {
get_stack = function()
return stack;
end;
get_IP = function()
return IP;
end
};
local function func(...)
local local_stack = {};
local ghost_stack = {};
top = -1
stack = setmetatable(local_stack, {
__index = ghost_stack;
__newindex = function(t, k, v)
if k > top and v then
top = k
end
ghost_stack[k] = v
end;
})
local args = {...};
vararg = {}
vararg_size = select("#", ...) - 1
for i = 0, vararg_size do
local_stack[i] = args[i+1];
vararg[i] = args[i+1]
end
environment = newEnv or getfenv();
IP = 1;
local thread = coroutine.create(loop)
local a, b = coroutine.resume(thread)
if a then
if b then
return unpack(b);
end
return;
else
if advanced_debug then
--TODO advanced debugging
else
handle_error(b)
end
end
end
--这边可得到所有chunk信息,用处很大。
return debugging, func;
end
return {
load_bytecode = function(bytecode, env, customHandler)
newEnv = env or getfenv(2)
customErrorHandler = customHandler
local cache = decode_bytecode(bytecode);
local _, func = create_wrapper(cache);
return func;
end;
-- Utilities (Debug, Introspection, Testing, etc)
utils = {
decode_bytecode = decode_bytecode;
create_wrapper = create_wrapper;
debug_bytecode = function(bytecode)
local cache = decode_bytecode(bytecode)
return create_wrapper(cache);
end;
};
}
decode_bytecode(bytecode);
Bytecode这个就是你的二进制信息,可改成其他形式。
看到这里,你是头都大了,蒙圈了,这边虽然有decode,但是他解出来的不是完整的字节数组,只是有了所有的chunk信息,对lua不够熟悉的话,就基本玩完。熟悉的话,到还好,可以根据chunk信息去生成luac文件。但是这里的还有个非常坑的地方在instructions这里。Instructions这里说白了就是存储了4个字节,这4个字节可解析成opcode,A,B,C,也可以解析成opcode,A,bx,也可解析成opcode,a,sbx。也就是虽然你可以dump出这4个字节,但是vm engine对这4个字节的解析跟标准的解析是不一样的,也就是说你需要vm engin解析出来的数据,去构成标准的。但是你又不知道到底是用opcode ,a,bx去构建,还是opcode,a,b,c去构建。当然你可以说我可以根据opcode去构建。当然是可以。但是opcode的顺序又乱了,你又的去找opcode的顺序。这种动态的看起来非常复杂,但是有源码,虽然混淆了,但是还是可以看看的,还是有希望还原的。时间和精力的问题。好了,就这样,难得写这么多,可能很多人也看不懂这个,或者看不到这篇文章吧。
以上是关于lua的llvm的反混淆与还原的主要内容,如果未能解决你的问题,请参考以下文章