数据清洗
Posted crazybird123
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了数据清洗相关的知识,希望对你有一定的参考价值。
1、wideface数据
# -*- coding: utf-8 -*- """ Created on Mon Jan 21 16:06:05 2019 @author: admin """ import re import linecache import os FILEDIR = "目录\widerFace\wider_face_split\" file = open(FILEDIR+‘wider_face_train_bbx_gt.txt‘, ‘r‘) def count_lines(file): lines_quantity = 0 while True: buffer = file.read(1024 * 8192) if not buffer: break lines_quantity += buffer.count(‘ ‘) file.close() return lines_quantity lines = count_lines(file) for i in range(lines): line = linecache.getline(FILEDIR+‘wider_face_train_bbx_gt.txt‘,i)#读取一行 if re.search(‘jpg‘, line): position = line.index(‘/‘) file_name = line[position + 1: -5] #图片名 folder_name = line[:position] #文件夹名 print(file_name) i += 1 face_count = int(linecache.getline(FILEDIR+‘wider_face_train_bbx_gt.txt‘, i)) for j in range(face_count): box_line = linecache.getline(FILEDIR + ‘wider_face_train_bbx_gt.txt‘, i+j+1) #x1, y1, w, h, x1,y1 为人脸框左上角的坐标 po_x1 = box_line.index(‘ ‘) x1 = box_line[:po_x1] po_y1 = box_line.index(‘ ‘, po_x1 + 1) y1 = box_line[po_x1:po_y1] po_w = box_line.index(‘ ‘, po_y1 + 1) w = box_line[po_y1:po_w] po_h = box_line.index(‘ ‘, po_w + 1) h = box_line[po_w:po_h] coordinates = x1 + y1 + w + h # print(coordinates) #if not(os.path.exists(FILEDIR + "wider_face_train\" + folder_name)): #os.makedirs(FILEDIR + "wider_face_train\" + folder_name) #with open(FILEDIR + "wider_face_train\"+ folder_name + "\" + file_name + ".txt", ‘a‘) as f: #f.write(coordinates + " ") with open(FILEDIR + "widerTrainModify.txt",‘a‘) as f: f.write(file_name + ".jpg" + " " + coordinates+" ") i += i + j + 1
以上是关于数据清洗的主要内容,如果未能解决你的问题,请参考以下文章