根据元素的频率对数组进行排序

Posted

技术标签:

【中文标题】根据元素的频率对数组进行排序【英文标题】:Sort array based on frequency of elements 【发布时间】:2020-08-26 16:53:42 【问题描述】:

我有一个包含重复数据的数组。我想根据频率对其进行排序(首先是重复次数最高的元素),然后删除重复项。

我尝试了下面的代码,它抛出了不同的顺序。

var arr=[
    name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22",
    name: "", sys_id: "",
    name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933",
    name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22",
    name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933",
    name: "SNOW Change Request", sys_id: "325b08913783d6c4f4f4c97a43990e90",
    name: "", sys_id: "",
    name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933",
    name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933",
    name: "Incident Request", sys_id: "3ee543f61b363740f713ed7b2f4bcbc0"
];

console.log(arr.reduce((x, y) => 
  if (x[y.name]) 
    x[y.name]++;
    return x;
   else 
    var z = ;
    z[y.name] = 1;
    return Object.assign(x, z);
  
, ))

【问题讨论】:

y.fruit 来自哪里?并根据哪个键识别重复项? 好吧,现在你已经修复了属性名称,你似乎有一个工作版本:) 请记住,对象的顺序没有指定,你得到的结果是一个对象,不再是数组,但也许现在您可以使用 name 的属性对您的缩减集进行排序:) @Icepickle 抱歉,我发布了不同的数组。请现在检查我更新的代码。仍然顺序不正确。 @krish 我更新了我的评论,希望对你有足够的帮助 你的真实数据会有多大,krish?因为您接受的答案是 O(2N),因此随着数据的增长,排序所需的时间将增长exponentially。你可以用我提供的种子生成器试试看区别 【参考方案1】:

不是最佳解决方案,但您可以以此为起点:

var arr = [
    "name": "Manage AAA Devices",
    "sys_id": "7b491aad371adb003ef7a9c2b3990e22"
  ,
  
    "name": "",
    "sys_id": ""
  ,
  
    "name": "Manage AAA - ISE Admin Functions",
    "sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
  ,
  
    "name": "Manage AAA Devices",
    "sys_id": "7b491aad371adb003ef7a9c2b3990e22"
  ,
  
    "name": "Manage AAA - ISE Admin Functions",
    "sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
  ,
  
    "name": "SNOW Change Request",
    "sys_id": "325b08913783d6c4f4f4c97a43990e90"
  ,
  
    "name": "",
    "sys_id": ""
  ,
  
    "name": "Manage AAA - ISE Admin Functions",
    "sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
  ,
  
    "name": "Manage AAA - ISE Admin Functions",
    "sys_id": "dc5f99a2dbb2b74019d81ffa68961933"
  ,
  
    "name": "Incident Request",
    "sys_id": "3ee543f61b363740f713ed7b2f4bcbc0"
  
];

const noOfOccurencies = arr.reduce((x, y) => 
  if (x[y.name]) 
    x[y.name]++;
    return x;
   else 
    var z = ;
    z[y.name] = 1;
    return Object.assign(x, z);
  
, )

console.log(noOfOccurencies);

// Order an array of objects based on another array order
// https://gist.github.com/ecarter/1423674
function mapOrder(array, order, key) 
  array.sort(function(a, b) 
    var A = a[key],
      B = b[key];

    if (order.indexOf(A) > order.indexOf(B)) 
      return 1;
     else 
      return -1;
    
  );
  return array;
;

// sort noOfOccurencies
const itemOrder = Object.keys(noOfOccurencies).sort(function(a, b) 
  return noOfOccurencies[a] - noOfOccurencies[b]
)
// reverse noOfOccurencies (*highest duplicates element first*)
itemOrder.reverse();

console.log(itemOrder);

// order the original array based on itemOrder
let ordered_array = mapOrder(arr, itemOrder, 'name');

// remove duplicates from the new array ordered
const noDuplicatesArr = ordered_array.filter((v, i, a) => a.findIndex(t => (t.name === v.name)) === i)

console.log(noDuplicatesArr);

【讨论】:

一旦你找到了长度,真的不需要重新迭代原始数组,特别是如果其余的只是简单的欺骗【参考方案2】:

let arr = [
    name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22",
    name: "", sys_id: "",
    name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933",
    name: "Manage AAA Devices", sys_id: "7b491aad371adb003ef7a9c2b3990e22",
    name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933",
    name: "SNOW Change Request", sys_id: "325b08913783d6c4f4f4c97a43990e90",
    name: "", sys_id: "",
    name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933",
    name: "Manage AAA - ISE Admin Functions", sys_id: "dc5f99a2dbb2b74019d81ffa68961933",
    name: "Incident Request", sys_id: "3ee543f61b363740f713ed7b2f4bcbc0",
];

function sortByFrequency(arrayWithDuplicates) 
    let tempArray = [];
    let temp = [];

    arrayWithDuplicates.forEach((item) => 
            temp.push(JSON.stringify(item));
        
    );

    (Array(...new Set(temp))).forEach((x) => 
        tempArray.push(arrayWithDuplicates.filter((y) => 
            return x === JSON.stringify(y);
        ));
    );

    tempArray.sort((a, b) => 
        return a.length - b.length;
    );

    temp = [];
    tempArray.forEach((item) => 
        temp.push(name: item[0].name, frequency: item.length);
    );

    return temp.reverse();


console.log(sortByFrequency(arr));

【讨论】:

将要比较的完整对象字符串化会产生相当大的开销,尤其是在双重迭代时。对于当前的大小来说还可以,但是根据他的源数组的增长方式,这可能会花费一些时间【参考方案3】:

我认为最简单的方法可能是将原始项目保留为您正在创建的字典的一部分,然后您可以简单地使用该字典进行排序。无论如何,字典将不再有 1 项,并且它会有一个项目计数。

所以你会稍微改变reduce函数

function reduceWithCount( arr, prop ) 
  return arr.reduce( (agg, item) => 
    const key = item[prop];
    if (!agg[key]) 
      agg[key] =  count: 0, item ;
    
    agg[key].count++;
    return agg;
  , );

要从中创建一个新数组,按您记录的频率排序,您只需将项目映射回来并使用字典对象中先前保存的计数

function createArrayFromObject( countedDictionary, prop ) 
  return Object.keys( countedDictionary )
    .map( key => countedDictionary[key].item )
    .sort( (a, b) => countedDictionary[b[prop]].count - countedDictionary[a[prop]].count );

然而,这意味着使用这些函数,您必须将密钥传递两次,但我认为这不会造成太大的开销。

我稍微更改了 sn-p 的代码,使其不使用您的原始数组,而是使用随机生成的数组,以查看它将如何处理 50.000 个条目。

只要原始数组中没有 50000 个不同的名称,使用简单的字典查找就可以了。

const seed = 
  'Manage AAA Devices': '7b491aad371adb003ef7a9c2b3990e22',
  '': '',
  'Manage AAA - ISE Admin Functions': 'dc5f99a2dbb2b74019d81ffa68961933',
  'SNOW Change Request': '325b08913783d6c4f4f4c97a43990e90',
  'Incident Request': '3ee543f61b363740f713ed7b2f4bcbc0'
;

const seedKeys = Object.keys( seed );
const arr = [...new Array(50000)].map( _ => 
  let name = seedKeys[parseInt(Math.random() * seedKeys.length)];
  return  name, sysId: seed[name] ;
 );

console.log(`array generated with $arr.length items`);

function reduceWithCount( arr, prop ) 
  return arr.reduce( (agg, item) => 
    const key = item[prop];
    if (!agg[key]) 
      agg[key] =  count: 0, item ;
    
    agg[key].count++;
    return agg;
  , );


function createArrayFromObject( countedDictionary, prop ) 
  return Object.keys( countedDictionary )
    .map( key => countedDictionary[key].item )
    .sort( (a, b) => countedDictionary[b[prop]].count - countedDictionary[a[prop]].count );


console.time('removeAndSort');
const removedDuplicates = reduceWithCount( arr, 'name' );
const sortedArray = createArrayFromObject( removedDuplicates, 'name' );
console.timeEnd('removeAndSort');
console.log( removedDuplicates );
console.log( sortedArray );

【讨论】:

以上是关于根据元素的频率对数组进行排序的主要内容,如果未能解决你的问题,请参考以下文章

我需要帮助以这种特殊方式根据频率对 java 中的数组进行排序

我想要一些关于如何根据元素频率对列表进行排序的帮助[重复]

通过降低出现频率对元素进行排序

数据结构与算法——计数排序桶排序基数排序

算法按频率高低来进行排序

在python中,如何按元素的频率对列表进行排序