python 连接 oracle 统计指定表格所有字段的缺失值数

Posted iUpoint

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 连接 oracle 统计指定表格所有字段的缺失值数相关的知识,希望对你有一定的参考价值。

import cx_Oracle as orcl
class ConnectOracle:
    #scott xx 192.168.32.200 1521 oracle
    #sqlplus, system as sysdba, select instance_name from  V$instance;
    #oracle://qmcbrt:[email protected]:1521/tqmcbdb
    #cx_Oracle.connection(‘hr‘, ‘hrpwd‘, ‘localhost:1521/XE‘)
    ##cx_Oracle.connection(‘qmcbrt:[email protected]:1521/tqmcbdb‘)
    def __init__(self, username, passwd, host, port=‘1521‘, sid=‘oracle‘):
        # dsn = data source name
        self.login = {}
        self.db = None
        self.cursor = None
        self.login[‘username‘] = username
        self.login[‘passwd‘] = passwd
        self.login[‘dsn‘] = orcl.makedsn(host, port, sid)
        print(orcl.clientversion())

    def connect_oracle(self):
        try:
            #orcl.connect(‘load/[email protected]/ora11g‘)
            #orcl.connect(‘username‘,‘password‘,‘host/orcl‘)
            self.db = orcl.connect(self.login[‘username‘], self.login[‘passwd‘], self.login[‘dsn‘])  # 登录内搜数据库
            self.db.autocommit = False  # 关闭自动提交
            #orcl.Cursor(self.db)
            self.cursor = self.db.cursor()  # 设置cursor光标
            #print(self.db.dsn)
            #print(self.db.password)
            #print(self.db.ping())
            #print(self.db.tnsentry)
            #print(self.db.username);
            #print(self.db.version)
            #print(self.cursor.rowcount)
            return True
        except:
            print(‘can not connect oracle‘)
            return False

    def close_oracle(self):
        self.cursor.close()
        self.db.close()

    def select_oracle(self, sql, size=0, params=None):
        if self.connect_oracle():
            if params:
                # 执行多条sql命令
                # results = cur.executemany("select field from table where field = %s", (‘value1‘, ‘value2‘, ‘value3‘))
                #sql = "SELECT ENAME, trunc(SAL,0) FROM EMP                 #           WHERE deptno = :deptno and sal > :value"
                #cursor.execute(sql, deptno = 30, value = 1000);
                #params = {‘deptno‘:30, ‘sal‘:3000}
                #cur.execute(sql, params)
                self.cursor.executemany(sql, params)
            else:
                self.cursor.execute(sql)
            if size:
                content = self.cursor.fetchmany(size)
            else:
                content = self.cursor.fetchall()
            
            # 字段名
            #ss = sql.lower()
            #find默认查找从左向右第一个位置
            #string = sql[ss.find(‘select‘), ss.find(‘from‘)]
            columns = [i[0] for i in self.cursor.description]
            self.close_oracle()
            #if len(content)==1:
            #    return pd.Series(content[0], index=columns)
            return pd.DataFrame(content, columns=columns)
        return False

    def insert_oracle(self, sql, data=None):
        try:
            self.connect_oracle()
            if data:
                # 批量插入, [(),..]、((),..)
                # mysql自带load data infile ‘D:/../...txt‘ into table user(var1, ., .)
                #self.cursor.execute(‘insert into demo(v) values(:1)‘,[‘nice‘])
                self.cursor.executemany(sql, data)
            else:
                self.cursor.execute(sql, data)
        except:
            print("insert异常")
            self.db.rollback()  # 回滚
        finally:
            self.db.commit()  # 事务提交
            self.close_oracle()




# 批量查询数据缺失率
#sql_generator():
def missing_count(table_name, where_condition={}, **engine):
    #where 条件参数化, str或dict
    sql_tb_name = "select column_name from user_tab_columns         where table_name = ‘{}‘".format(table_name)

    db = ConnectOracle(**engine)
    #sql_select.encode(‘utf-8‘)
    columns = db.select_oracle(sql=sql_tb_name)
    
    #生成select语句
    ss = ‘‘
    for col in columns.COLUMN_NAME:
        ss += ‘sum(decode({},null, 1, 0)) as {}, ‘.format(col, col)
    ss = ss[:-2]
    
    #生成where条件
    wh = ‘‘
    if where_condition:
        wh += ‘ where ‘
        if type(where_condition)==str:
            wh += where_condition
        if type(where_condition)==dict:
            for key in where_condition.keys():
                if type(where_condition[key])!=str:
                    wh += (‘t.‘ + str(key) + ‘ = ‘ +
                           str(where_condition[key]) + ‘ and ‘)
                else:
                    wh += ("t." + str(key) + " = ‘" +
                           str(where_condition[key]) + "‘ and ")
            wh = wh[:-4]
    
    #print(ss)
    sql_select =  ‘‘‘select count(*) as counts, {}
                    from {} t {}
                    ‘‘‘.format(ss, table_name, wh)
    
    #print(sql_select)
    res = db.select_oracle(sql=sql_select)
    return pd.Series(res.values.tolist()[0], index=res.columns)




import numpy as np
import pandas as pd
from collections import Counter
config = {
        ‘username‘:‘qmcbrt‘,
        ‘passwd‘:‘qmcbrt‘,
        ‘host‘:‘localhost‘,
        ‘port‘:‘1521‘,
        ‘sid‘:‘tqmcbdb‘
        }
db = ConnectOracle(**config)
#sql_select.encode(‘utf-8‘)
db.select_oracle(sql=sql_select)
#colnames = [‘aac147‘, ‘aac058‘, ‘aac004‘, ‘aac005‘, ‘aac161‘, ‘aac006‘, ‘aac011‘,
#       ‘aac017‘, ‘aca111‘, ‘aac009‘, ‘aac154‘, ‘aab301‘, ‘bab004‘, ‘aac300‘,
#       ‘aac301‘, ‘bac020‘, ‘bac015‘, ‘aac204‘, ‘aac060‘, ‘bae185_zd‘,
#       ‘baz060_zd‘, ‘aac012‘, ‘aac031‘, ‘aac049‘, ‘aae030‘, ‘aae031‘, ‘aae116‘,
#       ‘aae201‘, ‘aae200‘, ‘aic161‘, ‘aic162‘, ‘is_zd‘, ‘is_dy‘, ‘label‘]
#qmcb_km_100 = pd.DataFrame(np.array(qmcb_km_100), columns=colnames)
#
#qmcb_km_100.to_csv(r‘e:/qmcb/qmcb_km_100.csv‘, index=False, encoding=‘utf-8‘)

where_condition = {
                 ‘is_normal‘: 1,
                 ‘is_below_16‘: 0,
                 ‘is_xs‘: 0,
                 ‘is_cj‘: 0,
                 ‘is_dead‘: 0,
                 ‘AAE138_is_not_null‘: 0,
                 ‘is_dc‘: 0,
                 ‘is_px‘: 0
                 }
# 计算 QMCB_KM_2019_1_31_1 表的数据缺失数
missing_count(‘QMCB_KM_2019_1_31_1‘, where_condition, **config)

  

以上是关于python 连接 oracle 统计指定表格所有字段的缺失值数的主要内容,如果未能解决你的问题,请参考以下文章

python操作execl表格常用方法-pandas

oracle统计各数据库总记录条数及所占容量

oracle数据同步 建同义表后报错:无法解析指定的连接标识符

如何统计日志里面访问次数最多的IP

Oracle sql - 连接所有列

jquery实现的统计table表格指定列的单元格值的和