为啥并行多线程代码执行比顺序慢?

Posted

技术标签:

【中文标题】为啥并行多线程代码执行比顺序慢?【英文标题】:Why Parallel Multithread code execution is slower than sequential?为什么并行多线程代码执行比顺序慢? 【发布时间】:2021-06-18 12:59:45 【问题描述】:

我想使用多线程使用矩形和梯形方法执行积分计算以获得更快的结果。 不幸的是,就我而言,执行多线程代码比标准顺序代码慢。 使用多线程比单线程慢得多——毕竟,不应该反过来吗? 感觉线程越多,代码执行的越慢。

此外,我注意到线程越多,积分结果越不精确。这在使用梯形法计算积分时尤其明显。

这是我的代码: https://dotnetfiddle.net/jEPURO

using System;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;

namespace ParallelProgramming.ConsoleApp

    class Program
    
        public static string IntegrationMethod  get; set; 
        public static double IntervalBegin  get; set; 
        public static double IntervalEnd  get; set; 
        public static int NPrecisionValue  get; set; 
        public static bool IsParallel  get; set; 
        public static int ThreadValue  get; set; 
        public static Stopwatch Stopwatch  get; set; 
        public static double Result  get; set; 

        static void Main(string[] args)
        
            Console.WriteLine("Function                                  | Elapsed Time     | Estimated Integral");
            Console.WriteLine("-----------------------------------------------------------------");

            IntervalBegin = 5;
            IntervalEnd = -2;
            NPrecisionValue = 100000000;

            //RectangularIntegration – Sequential
            NumericalIntegrationMethods integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegration(IntervalBegin, IntervalEnd, NPrecisionValue);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.RectangularIntegration) – Sequential | Stopwatch.Elapsed | Result");

            //RectangularIntegrationParallel - 1 thread
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 1);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.RectangularIntegrationParallel) – 1 Thread | Stopwatch.Elapsed | Result");

            //RectangularIntegrationParallel - 2 threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 2);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.RectangularIntegrationParallel) – 2 Threads | Stopwatch.Elapsed | Result");

            //RectangularIntegrationParallel - 3 threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 3);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.RectangularIntegrationParallel) – 3 Threads | Stopwatch.Elapsed | Result");

            //RectangularIntegrationParallel - 4 threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 4);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.RectangularIntegrationParallel) – 4 Threads | Stopwatch.Elapsed | Result");

            //TrapezoidalIntegration - Sequential 
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegration(IntervalBegin, IntervalEnd, NPrecisionValue);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.TrapezoidalIntegration) – Sequential | Stopwatch.Elapsed | Result");

            //TrapezoidalIntegrationParallel – 1 Thread
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 1);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.TrapezoidalIntegrationParallel) – 1 Thread | Stopwatch.Elapsed | Result");

            //TrapezoidalIntegrationParallel – 2 Threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 2);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.TrapezoidalIntegrationParallel) – 2 Threads | Stopwatch.Elapsed | Result");
            
            //TrapezoidalIntegrationParallel – 3 Threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 3);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.TrapezoidalIntegrationParallel) – 3 Threads | Stopwatch.Elapsed | Result");

            //TrapezoidalIntegrationParallel – 4 Threads
            integral = new();
            Stopwatch = Stopwatch.StartNew();
            Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 4);
            Stopwatch.Stop();

            Console.WriteLine($"nameof(integral.TrapezoidalIntegrationParallel) – 4 Threads | Stopwatch.Elapsed | Result");

            Console.WriteLine("Press any key to continue...");
            Console.ReadLine();
        
    
    public class NumericalIntegrationMethods
    
        double Function(double x)
        
            return x * x + 2 * x;
        

        public double RectangularIntegration(double xp, double xk, int n)
        
            double dx, integral = 0;
            dx = (xk - xp) / n;

            for (int i = 1; i <= n; i++)
            
                integral += dx * Function(xp + i * dx);
                //Console.WriteLine("Sekwencyjnie - iteracja 0 wątek ID: 1", i, Thread.CurrentThread.ManagedThreadId);
            

            return integral;
        

        public double TrapezoidalIntegration(double xp, double xk, int n)
        
            double dx, integral = 0;
            dx = (xk - xp) / n;

            for (int i = 1; i <= n; i++)
            
                integral += Function(xp + i * dx);
                //Console.WriteLine("Sekwencyjnie - iteracja 0 wątek ID: 1", i, Thread.CurrentThread.ManagedThreadId);
            

            integral += (Function(xp) + Function(xk)) / 2;
            integral *= dx;

            return integral;
        

        public double RectangularIntegrationParallel(double xp, double xk, int n, int maxThreads)
        
            double dx, integral = 0;
            dx = (xk - xp) / n;

            Parallel.For(1, n + 1, new ParallelOptions  MaxDegreeOfParallelism = maxThreads , i =>
            
                integral += dx * Function(xp + i * dx);
                //Console.WriteLine("Równolegle - iteracja 0 wątek ID: 1", i, Thread.CurrentThread.ManagedThreadId);
            );

            return integral;
        

        public double TrapezoidalIntegrationParallel(double xp, double xk, int n, int maxThreads)
        
            double dx, integral = 0;
            dx = (xk - xp) / n;

            Parallel.For(1, n + 1, new ParallelOptions  MaxDegreeOfParallelism = maxThreads , i =>
            
                integral += Function(xp + i * dx);
                //Console.WriteLine("Równolegle - iteracja 0 wątek ID: 1", i, Thread.CurrentThread.ManagedThreadId);
            );

            integral += (Function(xp) + Function(xk)) / 2;
            integral *= dx;

            return integral;
        
    



这是输出:

Function                                  | Elapsed Time     | Estimated Integral
-----------------------------------------------------------------
RectangularIntegration – Sequential | 00:00:00.9284260 | -65.33333210831276
RectangularIntegrationParallel – 1 Thread | 00:00:01.7040507 | -65.33333210831276
RectangularIntegrationParallel – 2 Threads | 00:00:01.7191484 | -65.33333210831276
RectangularIntegrationParallel – 3 Threads | 00:00:01.6888398 | -57.73164823448317
RectangularIntegrationParallel – 4 Threads | 00:00:01.5530828 | -65.33333210831276
TrapezoidalIntegration – Sequential | 00:00:00.7278303 | -65.33333333332568
TrapezoidalIntegrationParallel – 1 Thread | 00:00:01.4265208 | -65.33333333332568
TrapezoidalIntegrationParallel – 2 Threads | 00:00:02.3009881 | -33.110522448239216
TrapezoidalIntegrationParallel – 3 Threads | 00:00:01.6062253 | -57.02137898750542
TrapezoidalIntegrationParallel – 4 Threads | 00:00:01.9967140 | -18.120285251376426

为什么会这样?我究竟做错了什么?毕竟,使用的线程越多,结果应该越快。 4 线程应该比 3 快,3 线程应该比 2 快,依此类推。 如何使用更多线程获得更快的结果?

【问题讨论】:

老实说,我根本不会相信这些数字 - JIT 和线程池增长没有预热。如果你关心性能,强烈推荐 benchmarkdotnet(在 NuGet 上免费提供)(它很容易上手,GitHub 页面上有示例)。我也有兴趣看看这里是否可以使用 SIMD(通过跨度和向量) @Neil C++ 与 C# 的区别并不那么明显 - 例如,CLR 内部的大部分内容最近已从 C++ 迁移到 C#,并且性能提高了 .当然,它是上下文相关的,但当前的 JIT 和其他功能:意味着这不是一场简单的比赛。 除非 Parallel.For 有一些我不知道的魔法,看起来您的并行实现可能会围绕“积分”变量产生大量争用。每个循环都非常简单和简短,但它们都竞争这个共享变量的读/写。我会尝试总结非共享变量中的所有内容,然后在各个迭代完成后总结这些变量。此外,如果并行生成新线程(不确定是否是这种情况),这将产生大量开销。而且,正如所指出的,确保一切都已预热等。 ps:你正在做的事情甚至是线程安全的吗? "+=" 不是原子操作,除非幕后有一些进一步的同步魔法,否则您最终可能会得到无效的结果 @Bogey 没有魔法。 OP 的并行代码不是线程安全的,会产生不正确的结果。 【参考方案1】:

这是一种并行计算的方法,该方法允许每个线程独立工作,同时使用线程本地状态(accumulator 参数)将来自其他线程的干扰降至最低。一般来说,每个线程越少相互干扰,并行代码的效率就越高。

public double RectangularIntegrationParallel(double xp, double xk, int n, int maxThreads)

    double dx, integral = 0;
    dx = (xk - xp) / n;
    var locker = new object();

    Parallel.ForEach(Partitioner.Create(0, n + 1), new ParallelOptions
    
        MaxDegreeOfParallelism = maxThreads
    , () => 0.0D, (range, state, accumulator) =>
    
        for (int i = range.Item1; i < range.Item2; i++)
        
            accumulator += dx * Function(xp + i * dx);
        
        return accumulator;
    , accumulator =>
    
        lock (locker)  integral += accumulator; 
    );
    return integral;

使用Partitioner.Create 方法的目的是对工作负载进行分块。您可以使用 Partitioner 将计算的总范围拆分为子范围,并为每个子范围调用一次 lambda,而不是为计算的每个小循环调用 lambda。编译器会调用 cannot be inlined 的 Lambda,因此通常您希望避免每秒调用 lambda 数百万次。

本示例中使用的 Parallel.ForEach 重载具有以下签名:

public static ParallelLoopResult ForEach<TSource, TLocal>(
    Partitioner<TSource> source,
    ParallelOptions parallelOptions,
    Func<TLocal> localInit,
    Func<TSource, ParallelLoopState, TLocal, TLocal> body,
    Action<TLocal> localFinally);

Parallel 类的替代方法是使用PLINQ。一般来说,PLINQ 会生成更简洁、更易于理解的代码,但通常会产生一些额外的开销。

public double RectangularIntegrationParallel(double xp, double xk, int n, int maxThreads)

    double dx = (xk - xp) / n;

    return Partitioner.Create(0, n + 1)
        .AsParallel()
        .WithDegreeOfParallelism(maxThreads)
        .Select(range =>
        
            double integral = 0.0;
            for (int i = range.Item1; i < range.Item2; i++)
            
                integral += dx * Function(xp + i * dx);
            
            return integral;
        )
        .Sum();

【讨论】:

很好地解释了,我确实看到并行代码执行的性能显着提高。谢谢!

以上是关于为啥并行多线程代码执行比顺序慢?的主要内容,如果未能解决你的问题,请参考以下文章

为啥执行矩阵乘法的两个进程并行运行比连续运行慢?

为啥以下简单的并行化代码比 Python 中的简单循环慢得多?

你应该这样去开发接口:Java多线程并行计算

为啥在Python里推荐使用多进程而不是多线程

多线程在我的 c# 程序中执行比顺序执行需要更多时间

并行代码比串行代码慢(值函数迭代示例)