x64 程序集优化

Posted

技术标签:

【中文标题】x64 程序集优化【英文标题】:x64 Assembly Optimization 【发布时间】:2017-04-25 12:52:43 【问题描述】:

我正在尝试优化几个组装程序的大小,我不关心速度。 我熟悉的优化有以下几种情况:

;the following two lines
    mov rbp, rsp
    add rbp, 50h
;can be changed to
    lea rbp, [rsp+50h]

我可以使用哪些其他优化来减少以下过程中的字节数? 我不是要求任何人完全优化这个过程,只是指出我可以改进的地方。

;get procedure address
asmGetProc proc
push rcx                    ;pointer to function name
push rdx                    ;DllBase address (IMAGE_DOS_HEADER pointer)
push r8                     ;pointer to IMAGE_EXPORT_DIRECTORY
push r9                     ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
                            ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals[r9]

push rbx                    ;saved pointer to function name

push r10                    ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNames
                            ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
                            ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfFunctions

mov rbx, rcx                ;save the function name pointer to rax

mov r8d, [rdx+3ch]          ;IMAGE_DOS_HEADER->e_lfanew (DWORD) (Offset to IMAGE_NT_HEADERS64)
add r8, rdx                 ;add DllBase to the e_lfanew offset
add r8, 88h                 ;18h - IMAGE_NT_HEADERS64->OptionalHeader (IMAGE_OPTIONAL_HEADER64) 18h bytes
                            ;70h - skip entire IMAGE_OPTIONAL_HEADER64 structure
                            ;r8 points to the IMAGE_DATA_DIRECTORY structure
mov r8d, [r8]               ;IMAGE_DATA_DIRECTORY->VirtualAddress (DWORD)
add r8, rdx                 ;add DllBase to VirtualAddress (IMAGE_EXPORT_DIRECTORY)

mov r9d, [r8+18h]           ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
mov r10d, [r8+20h]          ;IMAGE_EXPORT_DIRECTORY->AddressOfNames (DWORD)
add r10, rdx                ;add DllBase to AddressOfNames (DWORD)
for_each_function:
    ;decrement function name counter
    dec r9

    ;load current index of AddressOfNames into r11
    lea rcx, [r10 + 4 * r9]     ;AddressOfNames[i] - function string RVA (relative virtual address)
    mov ecx, [rcx]              ;r11d is the AddressOfName[r9] RVA (DWORD)
    add rcx, rdx                ;add DllBase to string RVA DWORD

    call asmHsh                 ;hash the function name
    cmp rax, rbx                ;compare the function name hash with the passed hash
jnz for_each_function           ;jump to top of loop is not a match


;r8  - export directory
;r9  - function name counter
;r10 - AddressOfNameOrdinals / AddressOfFunctions array
;rax - final point to function
mov r10d, [r8+24h]          ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals (DWORD)
add r10, rdx                ;add DllBase to AddressOfNameOrdinals DWORD
mov r9w, [r10+2*r9]         ;AddressOfNameOrdinals[2*r9] - (2*r9 = 2 bytes * function name counter)

mov r10d, [r8+1ch]          ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions (DWORD)
add r10, rdx                ;add DllBase to AddressOfFunctions DWORD
mov eax, [r10+r9*4]         ;AddressOfFunctions[4*r9] - (4*r9 = 4 bytes * function ordinal)
add rax, rdx                ;add DllBase to function ordinal RVA DWORD

pop r10
pop rbx
pop r9
pop r8
pop rdx
pop rcx

ret                         ;return from procedure
asmGetProc endp

编辑:添加 asmHsh(我的错)

;hash function (djb2)
asmHsh proc
;rcx - null terminated function name
push rcx
push rdx

mov rax, 5381d
hl:
    mov rdx, rax
    shl rax, 5
    add rax, rdx
    xor al, [rcx]
    inc rcx
;check for null termination
mov dl, [rcx]
cmp dl, 00h
jne short hl         

pop rdx
pop rcx
ret

asmHsh endp

【问题讨论】:

【参考方案1】:

在 64 位模式下为空间优化程序集应该: (1) 在足够的情况下使用 DWORD 宽度(减少前缀); (2) 坚持使用旧的 X86 寄存器 eax-edx / esi / edi / ebp (更严格的编码)。

希望下面所做的能说明这个想法。 ML64 将原始例程组装成 135 字节,修改后的版本组装成 103 字节。

变更示例:(1)使用rbp/rsi/rdi代替r8/r9/r10; (2) 可通过多组件地址模式完成的压缩指令序列; (3) 在已知数据为 32 位的情况下使用 DWORD dec; (4) 使用 IMUL 代替 shift/add。

" ;- " 在删除的行前面 " ;## delta " 附加到添加的行,其中 delta 是新代码产生的字节差。没有尝试调整 cmets。

;hash function (djb2)
asmHsh proc
;rcx - null terminated function name
push rcx
;-push rdx ;## -1

mov rax, 5381d
hl:
;-  mov rdx, rax
;-  shl rax, 5
;-  add rax, rdx
    imul rax,rax,33 ;## -6
    xor al, [rcx]
    inc rcx
;check for null termination
;-mov dl, [rcx]
;-cmp dl, 00h
cmp byte ptr [rcx], 00h ;## -2
jne short hl         

;-pop rdx ;## -1
pop rcx
ret
asmHsh endp

;get procedure address
asmGetProc proc
push rcx                    ;pointer to function name
push rdx                    ;DllBase address (IMAGE_DOS_HEADER pointer)
;-push r8                    ;pointer to IMAGE_EXPORT_DIRECTORY
push rbp ;## -1
;-push r9                     ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
push rsi ;## -1
                            ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals[r9]

push rbx                    ;saved pointer to function name

;-push r10                    ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNames
push rdi ;## -1
                            ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals
                            ;pointer to IMAGE_EXPORT_DIRECTORY->AddressOfFunctions

mov rbx, rcx                ;save the function name pointer to rax

;-mov r8d, [rdx+3ch]          ;IMAGE_DOS_HEADER->e_lfanew (DWORD) (Offset to IMAGE_NT_HEADERS64)
mov ebp, [rdx+3ch] ;## -1
;-add r8, rdx                 ;add DllBase to the e_lfanew offset
;-add r8, 88h                 ;18h - IMAGE_NT_HEADERS64->OptionalHeader (IMAGE_OPTIONAL_HEADER64) 18h bytes
;-                            ;70h - skip entire IMAGE_OPTIONAL_HEADER64 structure
;-                            ;r8 points to the IMAGE_DATA_DIRECTORY structure
;-mov r8d, [r8]               ;IMAGE_DATA_DIRECTORY->VirtualAddress (DWORD)
mov ebp, [rbp+rdx+88h] ;## -5
;-add r8, rdx                 ;add DllBase to VirtualAddress (IMAGE_EXPORT_DIRECTORY)
add rbp, rdx ;## 0

;-mov r9d, [r8+18h]           ;IMAGE_EXPORT_DIRECTORY->NumberOfNames
mov esi, [rbp+18h] ;## -1
;-mov r10d, [r8+20h]          ;IMAGE_EXPORT_DIRECTORY->AddressOfNames (DWORD)
mov edi, [rbp+20h] ;## -1
;-add r10, rdx                ;add DllBase to AddressOfNames (DWORD)
add rdi, rdx ;## 0
for_each_function:
    ;decrement function name counter
;-  dec r9
    dec esi ;## -1

    ;load current index of AddressOfNames into r11
;-  lea rcx, [r10 + 4 * r9]     ;AddressOfNames[i] - function string RVA (relative virtual address)
;-  mov ecx, [rcx]              ;r11d is the AddressOfName[r9] RVA (DWORD)
    mov ecx, [rdi + 4 * rsi] ;## -3
    add rcx, rdx                ;add DllBase to string RVA DWORD

    call asmHsh                 ;hash the function name
    cmp rax, rbx                ;compare the function name hash with the passed hash
jnz for_each_function           ;jump to top of loop is not a match


;r8  - export directory
;r9  - function name counter
;r10 - AddressOfNameOrdinals / AddressOfFunctions array
;rax - final point to function
;-mov r10d, [r8+24h]          ;IMAGE_EXPORT_DIRECTORY->AddressOfNameOrdinals (DWORD)
mov edi, [rbp+24h];## -1
;-add r10, rdx                ;add DllBase to AddressOfNameOrdinals DWORD
add rdi, rdx; ## 0
;-mov r9w, [r10+2*r9]         ;AddressOfNameOrdinals[2*r9] - (2*r9 = 2 bytes * function name counter)
mov si, [rdi+2*rsi] ;## -1

;-mov r10d, [r8+1ch]          ;IMAGE_EXPORT_DIRECTORY->AddressOfFunctions (DWORD)
mov edi, [rbp+1ch] ;## -1
;-add r10, rdx                ;add DllBase to AddressOfFunctions DWORD
add rdi, rdx ;## 0
;-mov eax, [r10+r9*4]         ;AddressOfFunctions[4*r9] - (4*r9 = 4 bytes * function ordinal)
mov eax, [rdi+rsi*4] ; ## -1
add rax, rdx                ;add DllBase to function ordinal RVA DWORD

;-pop r10
pop rdi ; ## -1
pop rbx
;-pop r9
pop rsi
;-pop r8
pop rbp ;## -1
pop rdx
pop rcx

ret                         ;return from procedure
asmGetProc endp

【讨论】:

谢谢,这正是我想要的。我没有意识到使用 x86 寄存器更有效。为了完整起见,我在问题中添加了 asmHsh。 没问题。由于您提供了 asmHsh,我更新了我的答案,并尝试缩小 asmHsh。

以上是关于x64 程序集优化的主要内容,如果未能解决你的问题,请参考以下文章

使用 Side-by-Side 程序集加载 x64 或 x32 版本的 DLL

为啥我能够将 x64 程序集加载到 AnyCPU Prefer 32 位可执行文件中?

访问程序集 X64 函数中的结构字段

无法为 x64 和 x86 加载文件或程序集 'CefSharp.Wpf;只有一个作品

将 MVC 项目“任何 CPU”转换为 x64 - 无法加载程序集

.NET 4.0 NGEN x64 程序集上的符号解析不起作用