图片验证码预处理

Posted chenxiyuxiao

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了图片验证码预处理相关的知识,希望对你有一定的参考价值。

from copy import deepcopy

from PIL import Image
import numpy as np
from collections import Counter
import hashlib


def fileMd5(filePath):
with open(filePath, ‘rb‘) as f:
a = hashlib.md5()
a.update(f.read())

return a.hexdigest()


# 图片只有背景色与文字色,两个颜色的处理 二值化处理,图片颜色越单调,效果越好
# 可用来处理大部分彩色图片,非干扰线
def parseImg_1(filePath):
img = Image.open(filePath)
lim = img.convert(‘L‘)
data = np.asarray(lim)
onea = data.reshape(np.multiply(*data.shape))
d_dict = dict(Counter(onea))
keys = list(d_dict.keys())
sort_ = lambda x: d_dict.get(x)
keys.sort(key=sort_, reverse=True)
ress = []
for z in keys:
if not ress:
ress.append(int(z))
elif abs(z - ress[0]) > 50:
ress.append(int(z))
break
threshold = np.mean(ress)
dd = np.where(data > threshold, 255, 0)
img2 = Image.fromarray(np.uint8(dd))
img2.save(‘parsed.jpg‘)


# 背景色转白色
def parseImg_2(filePath, transNum=35):
"""

:param filePath: 图片路径
:param transNum: 根据背景色的多少,若只有一种 ,则写1 即可
:return:
"""
img = Image.open(filePath)
img = img.convert(‘L‘)
data = np.asarray(img)
ddd = data.reshape(np.multiply(*data.shape))
k = dict(Counter(ddd))
keys = list(k.keys())
sort_ = lambda x: k.get(x)
keys.sort(key=sort_, reverse=True)
# zh = keys[0]
for ind in range(transNum):
data = np.where(data == keys[ind], 255, data)
# data = np.where(data>,data,255)
img2 = Image.fromarray(np.uint8(data))
img2.save(‘parsed.jpg‘)


# 干扰线降噪 适合处理较细的线,如果线太粗则效果很糟糕 , 处理一次往往得不到最好的效果
# 也可用于去除噪点 ,太粗的点无法去除
def parseImg_3(filePath):
img = Image.open(filePath)
img = img.convert(‘L‘)
img = np.asarray(img)
# img = np.ndarray(img)
ano = np.ndarray(shape=img.shape)
h, w = img.shape[:2]
# !!!opencv矩阵点是反的
# img[1,2] 1:图片的高度,2:图片的宽度
for y in range(1, w - 1):
for x in range(1, h - 1):
count = 0
if img[x, y - 1] > 150:
count = count + 1
if img[x, y + 1] > 150:
count = count + 1
if img[x - 1, y] > 150:
count = count + 1
if img[x + 1, y] > 150:
count = count + 1
if count > 2:
# img[x, y] = 255
ano[x, y] = 255
else:
ano[x, y] = img[x, y]
ano = np.where(ano > 150, 255, 0)
img2 = Image.fromarray(np.uint8(ano))
img2.save(filePath)


# 干扰线降噪, 多次调用 第三个处理方法,直到处理干净为止
def parseImg_3_wanshanbanben(filepath):
first = fileMd5(filepath)
while True:
parseImg_3(filepath)
second = fileMd5(filepath)
if first == second:
break
else:
first = second

# 空心转实心算法
def parseImg_4(filePath):
img = Image.open(filePath)
img = img.convert(‘L‘)
img = np.asarray(img)
img = np.where(img > 200, 255, 0)
duizhao = np.zeros(shape=img.shape)
w,h = img.shape
# print(w,h)
isWhite = lambda x: img[x[0],x[1]]==255
startZuobiao = [[0, 0],[0,h],[w,0],[w,h] ]
hasPanduan = set()
hasPed = lambda x: ‘*‘.join([str(item) for item in x]) in hasPanduan
hasPed_add = lambda x: hasPanduan.add(‘*‘.join([str(item) for item in x]))
totalLen = deepcopy(len(hasPanduan))
# print(totalLen)
while True:
for zuo in startZuobiao:
x,y = zuo
for i,j in [
[x,y+1],[x,y-1],[x+1,y],[x-1,y] ,[x-1,y-1],[x-1,y+1],[x+1,y-1],[x+1,y+1]
]:
if 0<=i<=w-1 and 0<=j<=h-1:
if not hasPed([i,j]) and isWhite([i,j]):
duizhao[i,j] = 255
startZuobiao.append([i,j])
hasPed_add([i,j])
if len(hasPanduan) == totalLen:
# print(totalLen)
break
else:
totalLen = deepcopy(len(hasPanduan))






img2 = Image.fromarray(np.uint8(duizhao))
img2.save(‘parsed.jpg‘)


if __name__ == ‘__main__‘:
# parseImg_1(‘pin.png‘)
# parseImg_1(‘q.jpg‘)
# # parseImg_2(‘ws.jpg‘)
# # parseImg_3(‘parsed.jpg‘)
# parseImg_3_wanshanbanben(‘parsed.jpg‘)
parseImg_4(‘pin.png‘)
# parseImg_1(‘pin2.png‘)
# parseImg_3_wanshanbanben(‘parsed.jpg‘)

以上是关于图片验证码预处理的主要内容,如果未能解决你的问题,请参考以下文章

12306购票处理验证码并完成登陆

第二百七十节,Tornado框架-生成验证码图片,以及验证码结合Session验证

图片验证码

图片验证码预处理

图片验证码识别算法

字符型图片验证码识别完整过程及Python实现