使用keras框架cnn+ctc_loss识别不定长字符图片操作

作者:xinfeng2005 时间:2022-05-13 22:15:42 

我就废话不多说了,大家还是直接看代码吧~


# -*- coding: utf-8 -*-
#keras==2.0.5
#tensorflow==1.1.0

import os,sys,string
import sys
import logging
import multiprocessing
import time
import json
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

import keras
import keras.backend as K
from keras.datasets import mnist
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras.callbacks import *
from keras import backend as K
# from keras.utils.visualize_util import plot
from visual_callbacks import AccLossPlotter
plotter = AccLossPlotter(graphs=['acc', 'loss'], save_graph=True, save_graph_path=sys.path[0])

#识别字符集
char_ocr='0123456789' #string.digits
#定义识别字符串的最大长度
seq_len=8
#识别结果集合个数 0-9
label_count=len(char_ocr)+1

def get_label(filepath):
# print(str(os.path.split(filepath)[-1]).split('.')[0].split('_')[-1])
lab=[]
for num in str(os.path.split(filepath)[-1]).split('.')[0].split('_')[-1]:
lab.append(int(char_ocr.find(num)))
if len(lab) < seq_len:
cur_seq_len = len(lab)
for i in range(seq_len - cur_seq_len):
 lab.append(label_count) #
return lab

def gen_image_data(dir=r'data\train', file_list=[]):
dir_path = dir
for rt, dirs, files in os.walk(dir_path): # =pathDir
for filename in files:
 # print (filename)
 if filename.find('.') >= 0:
 (shotname, extension) = os.path.splitext(filename)
 # print shotname,extension
 if extension == '.tif': # extension == '.png' or
  file_list.append(os.path.join('%s\\%s' % (rt, filename)))
  # print (filename)

print(len(file_list))
index = 0
X = []
Y = []
for file in file_list:

index += 1
# if index>1000:
# break
# print(file)
img = cv2.imread(file, 0)
# print(np.shape(img))
# cv2.namedWindow("the window")
# cv2.imshow("the window",img)
img = cv2.resize(img, (150, 50), interpolation=cv2.INTER_CUBIC)
img = cv2.transpose(img,(50,150))
img =cv2.flip(img,1)
# cv2.namedWindow("the window")
# cv2.imshow("the window",img)
# cv2.waitKey()
img = (255 - img) / 256 # 反色处理
X.append([img])
Y.append(get_label(file))
# print(get_label(file))
# print(np.shape(X))
# print(np.shape(X))

# print(np.shape(X))
X = np.transpose(X, (0, 2, 3, 1))
X = np.array(X)
Y = np.array(Y)
return X,Y

# the actual loss calc occurs here despite it not being
# an internal Keras loss function

def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
# y_pred = y_pred[:, 2:, :] 测试感觉没影响
y_pred = y_pred[:, :, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

if __name__ == '__main__':
height=150
width=50
input_tensor = Input((height, width, 1))
x = input_tensor
for i in range(3):
x = Convolution2D(32*2**i, (3, 3), activation='relu', padding='same')(x)
# x = Convolution2D(32*2**i, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

conv_shape = x.get_shape()
# print(conv_shape)
x = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2] * conv_shape[3])))(x)

x = Dense(32, activation='relu')(x)

gru_1 = GRU(32, return_sequences=True, kernel_initializer='he_normal', name='gru1')(x)
gru_1b = GRU(32, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(x)
gru1_merged = add([gru_1, gru_1b]) ###################

gru_2 = GRU(32, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(32, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(
gru1_merged)
x = concatenate([gru_2, gru_2b]) ######################
x = Dropout(0.25)(x)
x = Dense(label_count, kernel_initializer='he_normal', activation='softmax')(x)
base_model = Model(inputs=input_tensor, outputs=x)

labels = Input(name='the_labels', shape=[seq_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])

model = Model(inputs=[input_tensor, labels, input_length, label_length], outputs=[loss_out])
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adadelta')
model.summary()

def test(base_model):
file_list = []
X, Y = gen_image_data(r'data\test', file_list)
y_pred = base_model.predict(X)
shape = y_pred[:, :, :].shape # 2:
out = K.get_value(K.ctc_decode(y_pred[:, :, :], input_length=np.ones(shape[0]) * shape[1])[0][0])[:,
 :seq_len] # 2:
print()
error_count=0
for i in range(len(X)):
 print(file_list[i])
 str_src = str(os.path.split(file_list[i])[-1]).split('.')[0].split('_')[-1]
 print(out[i])
 str_out = ''.join([str(x) for x in out[i] if x!=-1 ])
 print(str_src, str_out)
 if str_src!=str_out:
 error_count+=1
 print('################################',error_count)
 # img = cv2.imread(file_list[i])
 # cv2.imshow('image', img)
 # cv2.waitKey()

class LossHistory(Callback):
def on_train_begin(self, logs={}):
 self.losses = []

def on_epoch_end(self, epoch, logs=None):
 model.save_weights('model_1018.w')
 base_model.save_weights('base_model_1018.w')
 test(base_model)

def on_batch_end(self, batch, logs={}):
 self.losses.append(logs.get('loss'))

# checkpointer = ModelCheckpoint(filepath="keras_seq2seq_1018.hdf5", verbose=1, save_best_only=True, )
history = LossHistory()

# base_model.load_weights('base_model_1018.w')
# model.load_weights('model_1018.w')

X,Y=gen_image_data()
maxin=4900
subseq_size = 100
batch_size=10
result=model.fit([X[:maxin], Y[:maxin], np.array(np.ones(len(X))*int(conv_shape[1]))[:maxin], np.array(np.ones(len(X))*seq_len)[:maxin]], Y[:maxin],
  batch_size=20,
  epochs=1000,
  callbacks=[history, plotter, EarlyStopping(patience=10)], #checkpointer, history,
  validation_data=([X[maxin:], Y[maxin:], np.array(np.ones(len(X))*int(conv_shape[1]))[maxin:], np.array(np.ones(len(X))*seq_len)[maxin:]], Y[maxin:]),
  )

test(base_model)

K.clear_session()

补充知识:日常填坑之keras.backend.ctc_batch_cost参数问题

InvalidArgumentError sequence_length(0) <=30错误

下面的代码是在网上绝大多数文章给出的关于k.ctc_batch_cost()函数的使用代码


def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
y_pred = y_pred[:, 2:, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

可以注意到有一句:y_pred = y_pred[:, 2:, :],这里把y_pred 的第二维数据去掉了两列,说人话:把送进lstm序列的step减了2步。后来偶然在一篇文章中有提到说这里之所以减2是因为在将feature送入keras的lstm时自动少了2维,所以这里就写成这样了。估计是之前老版本的bug,现在的新版本已经修复了。如果依然按照上面的写法,会得到如下错误:

InvalidArgumentError sequence_length(0) <=30

'<='后面的数值 = 你cnn最后的输出维度 - 2。这个错误我找了很久,一直不明白30哪里来的,后来一行行的检查代码是发现了这里很可疑,于是改成如下形式错误解决。


def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

训练时出现ctc_loss_calculator.cc:144] No valid path found或loss: inf错误

熟悉CTC算法的话,这个提示应该是ctc没找到有效路径。既然是没找到有效路径,那肯定是label和input之间哪个地方又出问题了!和input相关的错误已经解决了,那么肯定就是label的问题了。再看ctc_batch_cost的四个参数,labels和label_length这两个地方有可疑。对于ctc_batch_cost()的参数,labels需要one-hot编码,形状:[batch, max_labelLength],其中max_labelLength指预测的最大字符长度;label_length就是每个label中的字符长度了,受之前tf.ctc_loss的影响把这里都设置成了最大长度,所以报错。

对于参数labels而言,max_labelLength是能预测的最大字符长度。这个值与送lstm的featue的第二维,即特征序列的max_step有关,表面上看只要max_labelLength<max_step即可,但是如果小的不多依然会出现上述错误。至于到底要小多少,还得从ctc算法里找,由于ctc算法在标签中的每个字符后都加了一个空格,所以应该把这个长度考虑进去,所以有 max_labelLength < max_step//2。没仔细研究keras里ctc_batch_cost()函数的实现细节,上面是我的猜测。如果有很明确的答案,还请麻烦告诉我一声,谢了先!

错误代码:

batch_label_length = np.ones(batch_size) * max_labelLength

正确打开方式:


batch_x, batch_y = [], []
batch_input_length = np.ones(batch_size) * (max_img_weigth//8)
batch_label_length = []
for j in range(i, i + batch_size):
x, y = self.get_img_data(index_all[j])
batch_x.append(x)
batch_y.append(y)
batch_label_length.append(self.label_length[j])

最后附一张我的crnn的模型图:

使用keras框架cnn+ctc_loss识别不定长字符图片操作

来源:https://blog.csdn.net/xinfeng2005/article/details/78278832

标签:keras,cnn,ctc,loss,字符图片
0
投稿

猜你喜欢

  • layui select获取自定义属性方法

    2024-06-23 14:14:21
  • python模块和包的应用BASE_PATH使用解析

    2021-04-15 09:46:36
  • javascript 将共享属性迁移到原型中去的实现方法

    2024-04-22 13:05:00
  • SQL SERVER 日志已满的处理方法

    2010-07-31 13:32:00
  • python实现二叉排序树

    2022-08-26 04:34:37
  • python利用numpy存取文件案例教程

    2023-05-22 03:55:53
  • colab中修改python版本的全过程

    2022-10-31 07:18:38
  • 如何从SQL数据库中调用图片?

    2009-11-15 19:59:00
  • vue+elementui+vuex+sessionStorage实现历史标签菜单的示例代码

    2024-04-30 10:26:30
  • Django forms表单 select下拉框的传值实例

    2023-04-21 02:23:53
  • 深入解析MS-SQL锁机制

    2024-01-27 19:03:21
  • PHP源码之 ext/mysql扩展部分

    2023-07-21 18:24:20
  • c++基础语法:虚继承

    2024-01-23 10:01:00
  • 详解python基础中的for循环

    2021-04-23 07:42:11
  • MySQL中无过滤条件的count详解

    2024-01-22 13:06:20
  • Python3中的真除和Floor除法用法分析

    2023-10-11 09:01:45
  • python mysql实现学生成绩管理系统

    2024-01-25 09:28:12
  • python数据可视化绘制世界人口地图

    2022-04-27 21:31:31
  • 轻松解决AJAX的中文乱码问题

    2008-09-03 12:55:00
  • asp如何编写一个最简单的聊天程序?

    2009-11-08 19:04:00
  • asp之家 网络编程 m.aspxhome.com