原子变量的性能问题

Posted CobbLiu

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了原子变量的性能问题相关的知识,希望对你有一定的参考价值。

#include <stdio.h>
#include <sys/time.h>

int main()
{
    volatile int m;

    struct timeval start;
    gettimeofday(&start, NULL);
    for (int i = 0; i < 1000000; i++) {
        m++;
    }
    struct timeval end;
    gettimeofday(&end, NULL);

    printf("add cost %lldus\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));

    int n;
    gettimeofday(&start, NULL);
    for (int i = 0; i < 1000000; i++) {
        __sync_fetch_and_add(&n, 1);
    }
    gettimeofday(&end, NULL);
    printf("atomic cost %lldus\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));

    return 0;
}

之所以用volatile修饰m是拒绝编译器对m++做优化。

使用O2编译并查看性能:

$gcc -O2 -std=c99 -o perf atomic_perf.c
$./perf
add cost 2638us
atomic cost 8510us

  可见如果你的变量压根不会被多线程访问,并且对性能极度苛刻的话,还是不要用原子变量了吧。因为在有些平台上“A full memory barrier is created when this function is invoked”。

 

可以通过下面的方法看到m++和原子操作的汇编之间的区别:

$gcc -O2 -std=c99 -g -c atomic_perf.c
$objdump -Sl atomic_perf.o

atomic_perf.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <main>:
main():
/home/admin/jinxin/test/atomic_perf.c:5
#include <stdio.h>
#include <sys/time.h>

int main()
{
   0: 55                    push   %rbp
/home/admin/jinxin/test/atomic_perf.c:9
    volatile int m;

    struct timeval start;
    gettimeofday(&start, NULL);
   1: 31 f6                 xor    %esi,%esi
/home/admin/jinxin/test/atomic_perf.c:5
   3: 53                    push   %rbx
   4: 48 83 ec 38           sub    $0x38,%rsp
/home/admin/jinxin/test/atomic_perf.c:9
   8: 48 8d 6c 24 10        lea    0x10(%rsp),%rbp
   d: 48 89 ef              mov    %rbp,%rdi
  10: e8 00 00 00 00        callq  15 <main+0x15>
  15: 31 d2                 xor    %edx,%edx
/home/admin/jinxin/test/atomic_perf.c:11
    for (int i = 0; i < 1000000; i++) {
        m++;
  17: 8b 44 24 2c           mov    0x2c(%rsp),%eax
/home/admin/jinxin/test/atomic_perf.c:10
  1b: 83 c2 01              add    $0x1,%edx
/home/admin/jinxin/test/atomic_perf.c:11
  1e: 83 c0 01              add    $0x1,%eax
/home/admin/jinxin/test/atomic_perf.c:10
  21: 81 fa 40 42 0f 00     cmp    $0xf4240,%edx
/home/admin/jinxin/test/atomic_perf.c:11
  27: 89 44 24 2c           mov    %eax,0x2c(%rsp)
/home/admin/jinxin/test/atomic_perf.c:10
  2b: 75 ea                 jne    17 <main+0x17>
/home/admin/jinxin/test/atomic_perf.c:14
    }
    struct timeval end;
    gettimeofday(&end, NULL);
  2d: 31 f6                 xor    %esi,%esi
  2f: 48 89 e7              mov    %rsp,%rdi
  32: e8 00 00 00 00        callq  37 <main+0x37>
/home/admin/jinxin/test/atomic_perf.c:16

    printf("add cost %lldus\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));
  37: 48 8b 04 24           mov    (%rsp),%rax
  3b: 48 2b 44 24 10        sub    0x10(%rsp),%rax
  40: bf 00 00 00 00        mov    $0x0,%edi
  45: 48 8b 74 24 08        mov    0x8(%rsp),%rsi
  4a: 48 2b 74 24 18        sub    0x18(%rsp),%rsi
  4f: 48 69 c0 40 42 0f 00  imul   $0xf4240,%rax,%rax
  56: 48 01 c6              add    %rax,%rsi
  59: 31 c0                 xor    %eax,%eax
  5b: e8 00 00 00 00        callq  60 <main+0x60>
/home/admin/jinxin/test/atomic_perf.c:19

    int n;
    gettimeofday(&start, NULL);
  60: 31 f6                 xor    %esi,%esi
  62: 48 89 ef              mov    %rbp,%rdi
  65: e8 00 00 00 00        callq  6a <main+0x6a>
  6a: 48 8d 54 24 28        lea    0x28(%rsp),%rdx
  6f: 31 c0                 xor    %eax,%eax
/home/admin/jinxin/test/atomic_perf.c:21
    for (int i = 0; i < 1000000; i++) {
        __sync_fetch_and_add(&n, 1);
  71: f0 83 02 01           lock addl $0x1,(%rdx)
/home/admin/jinxin/test/atomic_perf.c:20
  75: 83 c0 01              add    $0x1,%eax
  78: 3d 40 42 0f 00        cmp    $0xf4240,%eax
  7d: 75 f2                 jne    71 <main+0x71>
/home/admin/jinxin/test/atomic_perf.c:23
    }
    gettimeofday(&end, NULL);
  7f: 48 89 e7              mov    %rsp,%rdi
  82: 31 f6                 xor    %esi,%esi
  84: e8 00 00 00 00        callq  89 <main+0x89>
/home/admin/jinxin/test/atomic_perf.c:24
    printf("atomic cost %lldus\n", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));
  89: 48 8b 04 24           mov    (%rsp),%rax
  8d: 48 2b 44 24 10        sub    0x10(%rsp),%rax
  92: bf 00 00 00 00        mov    $0x0,%edi
  97: 48 8b 74 24 08        mov    0x8(%rsp),%rsi
  9c: 48 2b 74 24 18        sub    0x18(%rsp),%rsi
  a1: 48 69 c0 40 42 0f 00  imul   $0xf4240,%rax,%rax
  a8: 48 01 c6              add    %rax,%rsi
  ab: 31 c0                 xor    %eax,%eax
  ad: e8 00 00 00 00        callq  b2 <main+0xb2>
/home/admin/jinxin/test/atomic_perf.c:27

    return 0;
}
  b2: 48 83 c4 38           add    $0x38,%rsp
  b6: 31 c0                 xor    %eax,%eax
  b8: 5b                    pop    %rbx
  b9: 5d                    pop    %rbp
  ba: c3                    retq???

  

以上是关于原子变量的性能问题的主要内容,如果未能解决你的问题,请参考以下文章

原子片段:原子编辑器中的多行片段

jedis解决高并发的一些学习

不同场景下的CUDA原子操作性能

C++多线程1.2-线程安全的保证——互斥量mutex(锁)和原子变量atomic

C++多线程1.2-线程安全的保证——互斥量mutex(锁)和原子变量atomic

Java多线程之Atomic:原子变量与原子类