c/c++c语言的自增操作在不同编译器的差别

Posted 2023-03-02 jiangwei0512

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了c/c++c语言的自增操作在不同编译器的差别相关的知识，希望对你有一定的参考价值。

示例代码

代码如下：

#include <stdio.h>

#define product(x) ((x)*(x))

int main(void)

  int i = 3, j, k;
  j = product(i++);   // (i++) * (i++)
  k = product(++i);   // (++i) * (++i)

  printf("%d %d\\n", j, k);

执行结果

在Ubuntu18.04下通过GCC编译和执行的结果：

注意第一个值是12。

在Windows10下通过VS2015编译和执行的结果：

注意第一个值是9。

也就是说同样的代码，在不同的编译器下执行的结果不同！

反汇编分析

通过汇编代码分析，首先查看GCC的反汇编：

jw@ubuntu:~/code/tmp$ gcc -g test.c
jw@ubuntu:~/code/tmp$ gdb a.out
GNU gdb (Ubuntu 8.1.1-0ubuntu1) 8.1.1
Copyright (C) 2018 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from a.out...done.
(gdb) l
1	#include<stdio.h>
2	
3	#define product(x) ((x)*(x))
4	
5	int main(void)
6	
7	  int i = 3, j, k;
8	  j = product(i++);   // (i++) * (i++)
9	  k = product(++i);   // (++i) * (++i)
10	
(gdb) b 7
Breakpoint 1 at 0x652: file test.c, line 7.
(gdb) r
Starting program: /home/jw/code/tmp/a.out 

Breakpoint 1, main () at test.c:7
7	  int i = 3, j, k;
(gdb) set disassembly-flavor intel
(gdb) disas
Dump of assembler code for function main:
   0x000055555555464a <+0>:	push   rbp
   0x000055555555464b <+1>:	mov    rbp,rsp
   0x000055555555464e <+4>:	sub    rsp,0x10
=> 0x0000555555554652 <+8>:	mov    DWORD PTR [rbp-0xc],0x3		; 给i赋值为3
   0x0000555555554659 <+15>:	mov    edx,DWORD PTR [rbp-0xc]	; 将i的值赋值给edx，所以edx = 3
   0x000055555555465c <+18>:	lea    eax,[rdx+0x1]			; rdx的值就是edx的值，就是3，这里就是3 + 1赋值给eax，所以eax = 4
   0x000055555555465f <+21>:	mov    DWORD PTR [rbp-0xc],eax	; 将eax的值赋值给i，也就是说，这里i发生了一次自增，此时i = 4
   0x0000555555554662 <+24>:	mov    eax,DWORD PTR [rbp-0xc]	; 将i的值赋值给eax，所以eax = 4
   0x0000555555554665 <+27>:	lea    ecx,[rax+0x1]			; rax的值就是eax的值，所以这里ecx = 5
   0x0000555555554668 <+30>:	mov    DWORD PTR [rbp-0xc],ecx	; 将ecx的值赋值给i，这里又是一次i的自增，此时i = 5
   0x000055555555466b <+33>:	imul   eax,edx					; 完成相乘的操作并赋值给eax，此时eax = 4, edx = 3, 所以eax = 12
   0x000055555555466e <+36>:	mov    DWORD PTR [rbp-0x8],eax	; 将eax的值赋值给j，所以j = 12，到这里结果已经出来了
   0x0000555555554671 <+39>:	add    DWORD PTR [rbp-0xc],0x1
   0x0000555555554675 <+43>:	add    DWORD PTR [rbp-0xc],0x1
   0x0000555555554679 <+47>:	mov    eax,DWORD PTR [rbp-0xc]
   0x000055555555467c <+50>:	imul   eax,DWORD PTR [rbp-0xc]
   0x0000555555554680 <+54>:	mov    DWORD PTR [rbp-0x4],eax
   0x0000555555554683 <+57>:	mov    edx,DWORD PTR [rbp-0x4]
   0x0000555555554686 <+60>:	mov    eax,DWORD PTR [rbp-0x8]
   0x0000555555554689 <+63>:	mov    esi,eax
   0x000055555555468b <+65>:	lea    rdi,[rip+0xa2]        # 0x555555554734
   0x0000555555554692 <+72>:	mov    eax,0x0
   0x0000555555554697 <+77>:	call   0x555555554520 <printf@plt>
   0x000055555555469c <+82>:	mov    eax,0x0
   0x00005555555546a1 <+87>:	leave  
   0x00005555555546a2 <+88>:	ret    
---Type <return> to continue, or q <return> to quit---
End of assembler dump.
(gdb) q
A debugging session is active.

	Inferior 1 [process 20205] will be killed.

Quit anyway? (y or n) y

查看注释的代码，可以看到j的值是如何计算出来的。

然后分析VS的反汇编：

00E73D8E  mov         dword ptr [i],3  		; i赋值为3
	j = product(i++);   // (i++) * (i++)
00E73D95  mov         eax,dword ptr [i] 	; eax = 3 
00E73D98  imul        eax,dword ptr [i]  	; eax = 9
00E73D9C  mov         dword ptr [j],eax  	; j = 9
00E73D9F  mov         ecx,dword ptr [i]  
00E73DA2  add         ecx,1  
00E73DA5  mov         dword ptr [i],ecx  	; 完成i的一次自增
00E73DA8  mov         edx,dword ptr [i]  
00E73DAB  add         edx,1  
00E73DAE  mov         dword ptr [i],edx  	; 再完成i的一次自增
	k = product(++i);   // (++i) * (++i)
00E73DB1  mov         eax,dword ptr [i]  
00E73DB4  add         eax,1  
00E73DB7  mov         dword ptr [i],eax  
00E73DBA  mov         ecx,dword ptr [i]  
00E73DBD  add         ecx,1  
00E73DC0  mov         dword ptr [i],ecx  
00E73DC3  mov         edx,dword ptr [i]  
00E73DC6  imul        edx,dword ptr [i]  
00E73DCA  mov         dword ptr [k],edx

相比之下VS的汇编更直观，很容易就得到了9。

通过反汇编，可以看到两边到底是如何处理这个代码的，但是也仅此而已，只能证明两个编译器通过不同的方式执行了代码，但是具体哪一种是“对”的呢？汇编代码无法告诉我们。

进一步分析

从直觉上讲，问题应该是出在i++这个操作上，它是后置的自增操作，如果是简单的j = i++;，则GCC和VS的执行结果是一致的。

但是如果是i = i++;呢？将之前的代码修改：

#include <stdio.h>

int main(void)

	int i = 3;
	i = i++;
	printf("%d\\n", i);

查看GCC和VS的结果，发现确实也有差别，在GCC中的结果3，而VS中的结果是4！

从这里可以推出同一个变量在一行代码中存在执行顺序方面的不同可能，这跟编译器相关。通过查看c语言的标准（在Project status and milestones (open-std.org)可以查看c语言的标准，这里参考的是C11），里面有相关的说明：

也就是说c语言实际上将这种行为定义为undefined，所以不同的编译器就可以有不同的实现了。关于这个问题，在Stack Overflow有更多的说明，比如c - Why are these constructs using pre and post-increment undefined behavior? - Stack Overflow

总结

通常我们不会写i = i++;这样的代码，但是当有宏定义的时候，当它扩展之后还是存在未定义行为的情况，此时不同编译器实现可能导致不同的结果。

所以在使用++这种操作的时候需要小心，极端一点，甚至可以不用，实际上i++和i = i + 1可能并没有什么差异，比如在VS里面通过反汇编查看两种情况时的汇编代码：

	int i = 3;
00BA1A6E  mov         dword ptr [i],3  
	i++;
00BA1A75  mov         eax,dword ptr [i]  
00BA1A78  add         eax,1  
00BA1A7B  mov         dword ptr [i],eax

和

	int i = 3;
00C61A6E  mov         dword ptr [i],3  
	i = i + 1;
00C61A75  mov         eax,dword ptr [i]  
00C61A78  add         eax,1  
00C61A7B  mov         dword ptr [i],eax

实际上的汇编代码根本没有区别（这里使用的是默认的编译参数，也许使用不同的优化参数会有不同结果）。

以上是关于c/c++c语言的自增操作在不同编译器的差别的主要内容，如果未能解决你的问题，请参考以下文章