根据嵌套键值对对象数组进行排序的最快方法

Posted

技术标签:

【中文标题】根据嵌套键值对对象数组进行排序的最快方法【英文标题】:Fastest way to sort array of objects on the basis of nested key values 【发布时间】:2021-07-20 22:54:01 【问题描述】:

我试图根据深深嵌套在对象中的 key 值对包含大约 100 个大型实体(具有近 30 个键)的对象数组进行排序,为此我使用了 lodash 的 orderBy 方法:

let name = (user) => user.accountDetails.name.toLowerCase();
let dob = (user) => user.personalProfile.dob;    
orderBy(cloneDeep(data), [name, dob], [sortOrder1, sortOrder2])

*考虑 sortOrder 是 desc 或 asec

但是排序过程花费的时间相当长。哪一种 我们可以使用更快的方法对对象数组进行排序 埋在物体深处的钥匙?

示例数据(考虑像这样的 50 个条目至少有 40 个键)


    "list": "bugs42",
    "start-date": "2015-08-27",
    "accountDetails": 
        "name": "diamond",
        "text": "8 months",
        "milliseconds": 19936427304
    
    "personalProfile": 
        "name": "stark",
        "dob": "2003-03-12T09:26:39.980Z",
    
,

    
    "list": "bugs50",
    "start-date": "2015-08-27",
    "accountDetails": 
        "name": "ruby",
        "text": "8 months",
        "milliseconds": 19936427305
    
    "personalProfile": 
        "name": "warmachine",
        "dob": "2007-03-31T09:26:39.980Z",
    

【问题讨论】:

这能回答你的问题吗? How to sort a javascript array of objects by nested object property? 您不需要将字符串转换为 Date 对象。字符串将按照 ISO 8601 格式自然排序。 不,因为 .sort() 内部使用混合排序技术,并且由于对象非常大,我们宁愿采用一种以最少交换次数进行的方法,是的,我们可以删除显式日期转换@HereticMonkey 【参考方案1】:

1。使用 JavaScript 的内置 sort() 函数

我们可以使用 JavaScript 的内置数组 sort() 方法,该方法可以快速而准确地对所有内容进行排序。如果您希望原始数组保持不变,那么在数组的副本而不是数组本身上运行sort() 方法很重要。我们可以通过几个非常简单的方式做到这一点:

array.slice.sort(…) [...array].sort(…)

在下面的示例中,我选择使用spread syntax,后者是:

const data = [
  list: "bugs42",
  startdate: "2015-08-27",
  accountDetails:  name: "diamond", text: "8 months", milliseconds: 19936427304 ,
  personalProfile:  name: "stark", dob: "2003-03-12T09:26:39.980Z" 
, 
  list: "bugs50",
  startdate: "2015-08-27",
  accountDetails:  name: "ruby", text: "8 months", milliseconds: 19936427305 ,
  personalProfile:  name: "warmachine", dob: "2007-03-31T09:26:39.980Z" 
];

const sortByDobAsc = data => [...data].sort((a,b) => new Date(a.personalProfile.dob) - new Date(b.personalProfile.dob));

const sortByDobDes = data => [...data].sort((a,b) => new Date(b.personalProfile.dob) - new Date(a.personalProfile.dob));

console.log(sortByDobAsc(data), sortByDobDes(data));

有关 JavaScript 内置 sort() 方法的更多信息,请在此处查看 MDN 文档:Array.prototype.sort()

2。使用第三方排序功能

Hariyanto Lim 的This article 探索了替代排序方法,似乎有几种著名的自定义排序算法可供您选择,甚至可以在此基础上进行构建。

他比较中最快的似乎是 Chrome 和 Safari 中的 QuickInsertionSort,以及 Firefox 中的其他 quickSort 函数中的任何一个,其中 QuickInsertionSort 在某些情况下奇怪地变得与本机 JS @987654335 一样慢@方法。

这里是探索的所有三个替代函数的源代码:

1。 QuickInsertionSort()

function QuickInsertionSort(arr) 
  'use strict';

  if(!arr || 1 > arr.length) 
    return null;
  

  var startIndex = 0, endIndex = arr.length - 1;

  // use 'stack' data structure to eliminate recursive call
  // DON'T use Array.push() and Array.pop() because slow !!!
  // so use manual indexing
  var stackLength = 0; 
  
  // use 2 arrays instead of 1 array to fasten (reduce calculation of '+= 2' and '-= 2')
  var startIndexes = [];
  var endIndexes = [];

  // variables for partitioning
  var partitionIndex, pivot, left, right, _swap_temp;

  // variables for insertion sort
  var i, j, key;

  do 
    // in my testing, I found 32 is very good choice for totally generated-random data,
    // more than 100 will cause slower speed overal.      
    if(32 >= endIndex - startIndex) 

      // even using insertionSort,
      // still need this because it still come here !!
      if(1 == endIndex - startIndex) 
        if(arr[startIndex] > arr[endIndex]) 
          _swap_temp = arr[startIndex];
          arr[startIndex] = arr[endIndex];
          arr[endIndex] = _swap_temp;
        
       else 
        /**************************************
        ****** start of insertion sort ********
        ***************************************/
        for(i = startIndex + 1; endIndex >= i; i++) 
          key = arr[i];
          
          // Move elements of arr[startIndex..i-1], that are 
          // greater than key, to one position ahead 
          // of their current position
          for (j = i - 1; j >= startIndex; j--) 
            if(arr[j] > key) 
              arr[j + 1] = arr[j];
              continue;
            

            // use 'break' to avoid decreasing 'j' 
            break;
          

          // swap
          arr[j + 1] = key;
        
        /**************************************
        ****** end of insertion sort **********
        ***************************************/
      

      // continue to process next data, is there any data inside stack ? 
      if(stackLength > 0) 
        // pop
        stackLength--; // reduce counter to get the last position from stack
        startIndex = startIndexes[stackLength];
        endIndex = endIndexes[stackLength];
       else 
        // no data inside stack, so finish
        break;
      
     else 
      // squeeze every millisecond by put main logic here instead of separate function

      // in my testing using median_of_3 does not give better result for generated totally random data !!

      /*********************************************
      *********** start of partitioning ************
      ************* Tony Hoare *********************
      **********************************************/

      // minimize worst case scenario

      // === start of select pivot ============
      pivot = arr[startIndex];

      // try to find a different element value
      j = endIndex;
      while(pivot == arr[j] && j >= startIndex) 
        j--;
      
      if(j > startIndex) 
        // check which element is lower? 
        // use the lower value as pivot   
        if(pivot > arr[j]) 
          pivot = arr[j];
        
      
      // === end of select pivot ============

      left = startIndex;
      right = endIndex;

      do 
        
        while(pivot > arr[left]) 
          left++;
        

        while(arr[right] > pivot) 
          right--;
        

        if(left >= right) 
          partitionIndex = right;
          break;
        

        //swap(left, right);
        // because many swaps, so optimize to implement swap here !
        _swap_temp = arr[left];
        arr[left] = arr[right];
        arr[right] = _swap_temp;

        left++;
        right--;
       while(true); // loop forever until break

      if(partitionIndex > startIndex) 
        // has lower partition, so process it

        if(endIndex > partitionIndex + 1) 
          // push 'right' side partition info into stack for later
          startIndexes[stackLength] = partitionIndex + 1;
          endIndexes[stackLength] = endIndex;
          stackLength++; // increase counter for NEXT slot
        

        // prepare next loop
        // keep same value for startIndex but update endIndex
        endIndex = partitionIndex;

       else if(endIndex > partitionIndex + 1) 
        // at this point, it means there is no 'lower' side partition but has 'higher' side partition

        // prepare next loop
        // keep same value for endIndex but update startIndex
        startIndex = partitionIndex + 1;
      
      
      /*********************************************
      ****** end of Tony Hoare partitioning ********
      **********************************************/
    
   while(endIndex > startIndex);

2。 quickSort_by_Tony_Hoare_non_recursive()

function quickSort_by_Tony_Hoare_non_recursive(arr) 
  'use strict';

  if(!arr || 1 > arr.length) 
    return null;
  

  var arrLength = arr.length;

  var startIndex = 0, endIndex = arrLength - 1;

  // don't use Array.push() and Array.pop() because too slow
  // use 2 arrays instead of 1 to avoid unnecessary increasing and reducing stackLength
  var stackStartIndex = [], stackEndIndex = [];
  var stackLength = 0;

  var partitionIndex;

  var i, j, is_key;

  do 
    partitionIndex = partition_by_Tony_Hoare(arr, startIndex, endIndex);

    if(partitionIndex > startIndex) 
      // there is lower values to partition 

      // is there higher values?
      if(endIndex > partitionIndex + 1)  
        // we don't do it now, push it into stack for later 
        stackStartIndex[stackLength] = partitionIndex + 1;
        stackEndIndex[stackLength] = endIndex;
        stackLength++; // increase counter for next slot
      

      // set new parameter to partition lower values 
      endIndex = partitionIndex;
     else if(endIndex > partitionIndex + 1)  
      // there is no lower values, only higher value, this is worst case!
      // set new parameter for next partitioning
      startIndex = partitionIndex + 1;
     else 
      // no valid partitioning index, so we get from stack (if any)
      if(stackLength > 0) 
        stackLength--;
        startIndex = stackStartIndex[stackLength];
        endIndex = stackEndIndex[stackLength];
       else 
        break; // finished !
      
    
   while(endIndex > startIndex);

  return arr;

3。 quickSort_by_Nico_Lomuto()

function quickSort_by_Nico_Lomuto(arr, startIndex, endIndex) 
  // using Nico Lomuto partition scheme
  // simpler and easier to understand.    

  if(endIndex > startIndex) 

    var partitionIndex = partition_by_Nico_Lomuto(arr, startIndex, endIndex);

    // the item at partitionIndex will not be included in recursive sorting because 
    // arr[partitionIndex] >= [...lowers]
    // [...highers] >= arr[partitionIndex]

    // recursion to sort lower values
    quickSort_by_Nico_Lomuto(arr, startIndex, partitionIndex - 1);

    // recursion to sort higher values
    quickSort_by_Nico_Lomuto(arr, partitionIndex + 1, endIndex);
  

  return arr;


function partition_by_Nico_Lomuto(arr, startIndex, endIndex) 
  // easier to implement and understand 

  //var pivot = arr[startIndex];

  // Lomuto partitioning has worst case if selected pivot value is LARGEST value in the range!
  // prevent worst case by carefully selecting pivot value!
  var pivot = selectPivot(arr, startIndex, endIndex, true); // true = MUST do swapping !
  
  var i = startIndex;

  // one time loop from bottom to the second from top, because pivot is the top position
  for(j = startIndex; endIndex > j; j++) 
    // is current element is smaller than or equal to pivot ?
    if(pivot >= arr[j]) 
      // swap 
      swap(arr, i, j);

      i++;
    
  

  // swap
  swap(arr, i, endIndex);

  return i;


function selectPivot(arr, startIndex, endIndex, doSwap) 
  // find a pivot value which not the lowest value within the range 
  
  // Get 2 UNIQUE elements, if failed then it means all elements are same value.

  var pivot = arr[startIndex]; // get first element from the first position

  // try to find a different element value
  var j = endIndex;
  while(pivot == arr[j] && j >= startIndex) 
    j--;
  
  if(startIndex > j) 
    //console.log('selectPivot(arr, ' + startIndex + ',' + endIndex + '), all elements are equal, nothing to sort');
    return pivot;
  

  // check which element is lower? 
  // use the lower value as pivot and swap the position with the last position (endIndex)   
  if(pivot > arr[j]) 
    pivot = arr[j];
    if(doSwap) 
      swap(arr, j, endIndex);
    
   else 
    if(doSwap) 
      swap(arr, startIndex, endIndex);
    
  

  return pivot;


function swap(arr, a, b) 
  // replace more than 1 element value in array using 1 line
  // this ability is 'ES6 destructuring swap',
  // only specific for Javascript language
  // but VERY VERY SLOW, almost 3 times slower !
  //[arr[a], arr[b]] = [arr[b], arr[a]];

  // normal way for many programming language
  var _swap_temp = arr[a];
  arr[a] = arr[b];
  arr[b] = _swap_temp;

【讨论】:

Lodash 也在使用 javascript 的内置 .sort 方法,但我正在寻找比这种特定情况更快的方法。我宁愿考虑将对象展平,然后执行父级排序。但是扁平化也更昂贵,并且会导致更大的时间复杂度 @Stark 我明白了。阅读this article,看起来有几种自定义排序算法可供您选择甚至构建。他们比较中最快的似乎是 Chrome 和 Safari 中的 QuickInsertionSort,以及 Firefox 中的其他 quickSort 函数中的任何一个,其中 QuickInsertionSort 在某些情况下奇怪地变得与原生 JS sort() 方法一样慢。我也会更新我的答案以显示这一点。 嗯,我必须收集一些关于这种混合排序技术对大对象的性能的统计数据。谢谢@Brandon McConnell 有机会可以看看这篇文章。他展示了每个人的具体统计数据,每个人都展示了他如何相互比较。

以上是关于根据嵌套键值对对象数组进行排序的最快方法的主要内容,如果未能解决你的问题,请参考以下文章

排序数组以及遍历普通对象的键值对以及如何遍历一个不同键值的对象数组

ReactJS:如何根据道具的值对对象数组进行排序?

从对象数组制作树,保留其他键值对[重复]

如何根据 Aurelia/Typescript 中的嵌套属性对对象数组进行排序

通过具有日期值的单个键对对象数组进行排序

怎样从对象中获取键值并保存在对象中