为啥并行多线程代码执行比顺序慢?
Posted
技术标签:
【中文标题】为啥并行多线程代码执行比顺序慢?【英文标题】:Why Parallel Multithread code execution is slower than sequential?为什么并行多线程代码执行比顺序慢? 【发布时间】:2021-06-18 12:59:45 【问题描述】:我想使用多线程使用矩形和梯形方法执行积分计算以获得更快的结果。 不幸的是,就我而言,执行多线程代码比标准顺序代码慢。 使用多线程比单线程慢得多——毕竟,不应该反过来吗? 感觉线程越多,代码执行的越慢。
此外,我注意到线程越多,积分结果越不精确。这在使用梯形法计算积分时尤其明显。
这是我的代码: https://dotnetfiddle.net/jEPURO
using System;
using System.Diagnostics;
using System.Threading;
using System.Threading.Tasks;
namespace ParallelProgramming.ConsoleApp
class Program
public static string IntegrationMethod get; set;
public static double IntervalBegin get; set;
public static double IntervalEnd get; set;
public static int NPrecisionValue get; set;
public static bool IsParallel get; set;
public static int ThreadValue get; set;
public static Stopwatch Stopwatch get; set;
public static double Result get; set;
static void Main(string[] args)
Console.WriteLine("Function | Elapsed Time | Estimated Integral");
Console.WriteLine("-----------------------------------------------------------------");
IntervalBegin = 5;
IntervalEnd = -2;
NPrecisionValue = 100000000;
//RectangularIntegration – Sequential
NumericalIntegrationMethods integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.RectangularIntegration(IntervalBegin, IntervalEnd, NPrecisionValue);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.RectangularIntegration) – Sequential | Stopwatch.Elapsed | Result");
//RectangularIntegrationParallel - 1 thread
integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 1);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.RectangularIntegrationParallel) – 1 Thread | Stopwatch.Elapsed | Result");
//RectangularIntegrationParallel - 2 threads
integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 2);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.RectangularIntegrationParallel) – 2 Threads | Stopwatch.Elapsed | Result");
//RectangularIntegrationParallel - 3 threads
integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 3);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.RectangularIntegrationParallel) – 3 Threads | Stopwatch.Elapsed | Result");
//RectangularIntegrationParallel - 4 threads
integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.RectangularIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 4);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.RectangularIntegrationParallel) – 4 Threads | Stopwatch.Elapsed | Result");
//TrapezoidalIntegration - Sequential
integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.TrapezoidalIntegration(IntervalBegin, IntervalEnd, NPrecisionValue);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.TrapezoidalIntegration) – Sequential | Stopwatch.Elapsed | Result");
//TrapezoidalIntegrationParallel – 1 Thread
integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 1);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.TrapezoidalIntegrationParallel) – 1 Thread | Stopwatch.Elapsed | Result");
//TrapezoidalIntegrationParallel – 2 Threads
integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 2);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.TrapezoidalIntegrationParallel) – 2 Threads | Stopwatch.Elapsed | Result");
//TrapezoidalIntegrationParallel – 3 Threads
integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 3);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.TrapezoidalIntegrationParallel) – 3 Threads | Stopwatch.Elapsed | Result");
//TrapezoidalIntegrationParallel – 4 Threads
integral = new();
Stopwatch = Stopwatch.StartNew();
Result = integral.TrapezoidalIntegrationParallel(IntervalBegin, IntervalEnd, NPrecisionValue, 4);
Stopwatch.Stop();
Console.WriteLine($"nameof(integral.TrapezoidalIntegrationParallel) – 4 Threads | Stopwatch.Elapsed | Result");
Console.WriteLine("Press any key to continue...");
Console.ReadLine();
public class NumericalIntegrationMethods
double Function(double x)
return x * x + 2 * x;
public double RectangularIntegration(double xp, double xk, int n)
double dx, integral = 0;
dx = (xk - xp) / n;
for (int i = 1; i <= n; i++)
integral += dx * Function(xp + i * dx);
//Console.WriteLine("Sekwencyjnie - iteracja 0 wątek ID: 1", i, Thread.CurrentThread.ManagedThreadId);
return integral;
public double TrapezoidalIntegration(double xp, double xk, int n)
double dx, integral = 0;
dx = (xk - xp) / n;
for (int i = 1; i <= n; i++)
integral += Function(xp + i * dx);
//Console.WriteLine("Sekwencyjnie - iteracja 0 wątek ID: 1", i, Thread.CurrentThread.ManagedThreadId);
integral += (Function(xp) + Function(xk)) / 2;
integral *= dx;
return integral;
public double RectangularIntegrationParallel(double xp, double xk, int n, int maxThreads)
double dx, integral = 0;
dx = (xk - xp) / n;
Parallel.For(1, n + 1, new ParallelOptions MaxDegreeOfParallelism = maxThreads , i =>
integral += dx * Function(xp + i * dx);
//Console.WriteLine("Równolegle - iteracja 0 wątek ID: 1", i, Thread.CurrentThread.ManagedThreadId);
);
return integral;
public double TrapezoidalIntegrationParallel(double xp, double xk, int n, int maxThreads)
double dx, integral = 0;
dx = (xk - xp) / n;
Parallel.For(1, n + 1, new ParallelOptions MaxDegreeOfParallelism = maxThreads , i =>
integral += Function(xp + i * dx);
//Console.WriteLine("Równolegle - iteracja 0 wątek ID: 1", i, Thread.CurrentThread.ManagedThreadId);
);
integral += (Function(xp) + Function(xk)) / 2;
integral *= dx;
return integral;
这是输出:
Function | Elapsed Time | Estimated Integral
-----------------------------------------------------------------
RectangularIntegration – Sequential | 00:00:00.9284260 | -65.33333210831276
RectangularIntegrationParallel – 1 Thread | 00:00:01.7040507 | -65.33333210831276
RectangularIntegrationParallel – 2 Threads | 00:00:01.7191484 | -65.33333210831276
RectangularIntegrationParallel – 3 Threads | 00:00:01.6888398 | -57.73164823448317
RectangularIntegrationParallel – 4 Threads | 00:00:01.5530828 | -65.33333210831276
TrapezoidalIntegration – Sequential | 00:00:00.7278303 | -65.33333333332568
TrapezoidalIntegrationParallel – 1 Thread | 00:00:01.4265208 | -65.33333333332568
TrapezoidalIntegrationParallel – 2 Threads | 00:00:02.3009881 | -33.110522448239216
TrapezoidalIntegrationParallel – 3 Threads | 00:00:01.6062253 | -57.02137898750542
TrapezoidalIntegrationParallel – 4 Threads | 00:00:01.9967140 | -18.120285251376426
为什么会这样?我究竟做错了什么?毕竟,使用的线程越多,结果应该越快。 4 线程应该比 3 快,3 线程应该比 2 快,依此类推。 如何使用更多线程获得更快的结果?
【问题讨论】:
老实说,我根本不会相信这些数字 - JIT 和线程池增长没有预热。如果你关心性能,强烈推荐 benchmarkdotnet(在 NuGet 上免费提供)(它很容易上手,GitHub 页面上有示例)。我也有兴趣看看这里是否可以使用 SIMD(通过跨度和向量) @Neil C++ 与 C# 的区别并不那么明显 - 例如,CLR 内部的大部分内容最近已从 C++ 迁移到 C#,并且性能提高了 .当然,它是上下文相关的,但当前的 JIT 和其他功能:意味着这不是一场简单的比赛。 除非 Parallel.For 有一些我不知道的魔法,看起来您的并行实现可能会围绕“积分”变量产生大量争用。每个循环都非常简单和简短,但它们都竞争这个共享变量的读/写。我会尝试总结非共享变量中的所有内容,然后在各个迭代完成后总结这些变量。此外,如果并行生成新线程(不确定是否是这种情况),这将产生大量开销。而且,正如所指出的,确保一切都已预热等。 ps:你正在做的事情甚至是线程安全的吗? "+=" 不是原子操作,除非幕后有一些进一步的同步魔法,否则您最终可能会得到无效的结果 @Bogey 没有魔法。 OP 的并行代码不是线程安全的,会产生不正确的结果。 【参考方案1】:这是一种并行计算的方法,该方法允许每个线程独立工作,同时使用线程本地状态(accumulator
参数)将来自其他线程的干扰降至最低。一般来说,每个线程越少相互干扰,并行代码的效率就越高。
public double RectangularIntegrationParallel(double xp, double xk, int n, int maxThreads)
double dx, integral = 0;
dx = (xk - xp) / n;
var locker = new object();
Parallel.ForEach(Partitioner.Create(0, n + 1), new ParallelOptions
MaxDegreeOfParallelism = maxThreads
, () => 0.0D, (range, state, accumulator) =>
for (int i = range.Item1; i < range.Item2; i++)
accumulator += dx * Function(xp + i * dx);
return accumulator;
, accumulator =>
lock (locker) integral += accumulator;
);
return integral;
使用Partitioner.Create
方法的目的是对工作负载进行分块。您可以使用 Partitioner
将计算的总范围拆分为子范围,并为每个子范围调用一次 lambda,而不是为计算的每个小循环调用 lambda。编译器会调用 cannot be inlined 的 Lambda,因此通常您希望避免每秒调用 lambda 数百万次。
本示例中使用的 Parallel.ForEach
重载具有以下签名:
public static ParallelLoopResult ForEach<TSource, TLocal>(
Partitioner<TSource> source,
ParallelOptions parallelOptions,
Func<TLocal> localInit,
Func<TSource, ParallelLoopState, TLocal, TLocal> body,
Action<TLocal> localFinally);
Parallel
类的替代方法是使用PLINQ。一般来说,PLINQ 会生成更简洁、更易于理解的代码,但通常会产生一些额外的开销。
public double RectangularIntegrationParallel(double xp, double xk, int n, int maxThreads)
double dx = (xk - xp) / n;
return Partitioner.Create(0, n + 1)
.AsParallel()
.WithDegreeOfParallelism(maxThreads)
.Select(range =>
double integral = 0.0;
for (int i = range.Item1; i < range.Item2; i++)
integral += dx * Function(xp + i * dx);
return integral;
)
.Sum();
【讨论】:
很好地解释了,我确实看到并行代码执行的性能显着提高。谢谢!以上是关于为啥并行多线程代码执行比顺序慢?的主要内容,如果未能解决你的问题,请参考以下文章