利用python将二值csv格式转换为矩阵

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了利用python将二值csv格式转换为矩阵相关的知识,希望对你有一定的参考价值。

#!/usr/bin/env python
# coding:utf-8
#import pandas as pd, numpy as np;

‘‘‘
将csv文件转换为对应的邻接矩阵mat
‘‘‘

from numpy import *;


def protein_complexes_trans():
    file = open(protein_complexes.csv);
    filePro = open(complexes, a);
    fileTarget = open(targets(complexes),a);
    fileInter = open(protein_complexes_interaction_matrix, a);
    proteins = [];
    targets = [];
    while 1:
        line = file.readline();
        if not line:
            break;
        token = line.split(;);
        targ = token[1].split(,);
        for i in range(0,len(targ)):
            targets.append(targ[i]);
        proteins.append(token[0]);

    file.seek(0);  #将指针重置回第一行

    proArr = unique(array(proteins)).tolist();  #去重,并删去最后一项(标题)
    tarArr = unique(array(targets)).tolist();  #删去最后

#mat为邻接矩阵 
    mat = zeros((len(proArr),len(tarArr)), dtype = int16);

    while 1:
        line = file.readline();
        if not line:
            break;
        token = line.split(;);
        targ = token[1].split(,);
        row = proArr.index(token[0]);
        for i in range(0, len(targ)):
            col = tarArr.index(targ[i]);
            mat[row][col] = 1;
    #输出药物和靶向的列表
    for i in proArr:
        filePro.write(%s\n % i);
    for i in tarArr:
        fileTarget.write(%s\n % i);
    
    print "%d&%d" % (len(proArr), len(tarArr));
    
    interMat = mat.tolist();
    
    #将矩阵写入文件
    for i in range(0, len(proArr)):
        fileInter.write(%s\n % (str(interMat[i])));
    return 0;

def drugs_targets_trans():
    #print ‘hello world‘;
    file = open(drugs_targets_sum);
    fileDrug = open(drugs, a);
    fileTarget = open(targets,a);
    fileInter = open(drugs_targets_interaction_matrix, a);
    drugs = [];
    targets = [];
    while 1:
        line = file.readline();
        if not line:
            break;
        token = line.split(;);
        targ = token[1].split(,);
        for i in range(0,len(targ)):
            targets.append(targ[i]);
            drugs.append(token[0]);

    file.seek(0);  #将指针重置回第一行

    drugArr = unique(array(drugs)).tolist();  #去重,并删去最后一项(标题)
    tarArr = unique(array(targets)).tolist();  #删去最后

#mat为邻接矩阵 
    mat = zeros((len(drugArr),len(tarArr)), dtype = int16);

    while 1:
        line = file.readline();
        if not line:
            break;
        token = line.split(;);
        targ = token[1].split(,);
        row = drugArr.index(token[0]);
        for i in range(0, len(targ)):
            col = tarArr.index(targ[i]);
            mat[row][col] = 1;

    #输出药物和靶向的列表
    for i in drugArr:
        fileDrug.write(%s\n % i);
    for i in tarArr:
        fileTarget.write(%s\n % i);
    
    interMat = mat.tolist();
    for i in range(0, len(drugArr)):
        fileInter.write(%s\n % (str(interMat[i])));

#protein_complexes_trans();
drugs_targets_trans();

 

以上是关于利用python将二值csv格式转换为矩阵的主要内容,如果未能解决你的问题,请参考以下文章

取消酸洗后将二值化数据帧反转为原始分类值

如何使用 python 和 pandas 将 Csv 文件转换为 libsvm?

81 数字验证码识别实例

Python爬虫编程思想(157):使用Scrapy从CSV格式转换到JSON格式

Python爬虫编程思想(157):使用Scrapy从CSV格式转换到JSON格式

Python爬虫编程思想(157):使用Scrapy从CSV格式转换到JSON格式