python多进程读图提取特征存npy

作者:业余狙击手19 时间:2022-09-05 11:38:06 

本文实例为大家分享了python多进程读图提取特征存npy的具体代码,供大家参考,具体内容如下


import multiprocessing
import os, time, random
import numpy as np
import cv2
import os
import sys
from time import ctime
import tensorflow as tf

image_dir = r"D:/sxl/处理图片/汉字分类/train10/"  #图像文件夹路径
data_type = 'test'
save_path = r'E:/sxl_Programs/Python/CNN/npy/'  #存储路径
data_name = 'Img10'        #npy文件名

char_set = np.array(os.listdir(image_dir))   #文件夹名称列表
np.save(save_path+'ImgShuZi10.npy',char_set)   #文件夹名称列表
char_set_n = len(char_set)       #文件夹列表长度

read_process_n = 1 #进程数
repate_n = 4   #随机移动次数
data_size = 1000000 #1个npy大小

shuffled = True  #是否打乱

#可以读取带中文路径的图
def cv_imread(file_path,type=0):
cv_img=cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1)
# print(file_path)
# print(cv_img.shape)
# print(len(cv_img.shape))
if(type==0):
 if(len(cv_img.shape)==3):
  cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
return cv_img

#多个数组按同一规则打乱数据
def ShuffledData(features,labels):
'''
@description:随机打乱数据与标签,但保持数据与标签一一对应
'''
permutation = np.random.permutation(features.shape[0])
shuffled_features = features[permutation,:] #多维
shuffled_labels = labels[permutation]  #1维
return shuffled_features,shuffled_labels

#函数功能:简单网格
#函数要求:1.无关图像大小;2.输入图像默认为灰度图;3.参数只有输入图像
#返回数据:1x64*64维特征
def GetFeature(image):

#图像大小归一化
image = cv2.resize(image,(64,64))
img_h = image.shape[0]
img_w = image.shape[1]

#定义特征向量
feature = np.zeros(img_h*img_w,dtype=np.int16)

for h in range(img_h):
 for w in range(img_w):
  feature[h*img_h+w] = image[h,w]

return feature

# 写数据进程执行的代码:
def read_image_to_queue(queue):
print('Process to write: %s' % os.getpid())
for j,dirname in enumerate(char_set): # dirname 是文件夹名称
 label = np.where(char_set==dirname)[0][0]  #文件夹名称对应的下标序号
 print('序号:'+str(j),'读 '+dirname+' 文件夹...时间:',ctime() )
 for parent,_,filenames in os.walk(os.path.join(image_dir,dirname)):
  for filename in filenames:
   if(filename[-4:]!='.jpg'):
    continue
   image = cv_imread(os.path.join(parent,filename),0)

# cv2.imshow(dirname,image)
   # cv2.waitKey(0)
   queue.put((image,label))

for i in range(read_process_n):
 queue.put((None,-1))

print('读图结束!')
return True

# 读数据进程执行的代码:
def extract_feature(queue,lock,count):
'''
@description:从队列中取出图片进行特征提取
@queue:先进先出队列
 lock:锁,在计数时上锁,防止冲突
 count:计数
'''

print('Process %s start reading...' % os.getpid())

global data_n
features = [] #存放提取到的特征
labels = [] #存放标签
flag = True #标志着进程是否结束
while flag:
 image,label = queue.get() #从队列中获取图像和标签

if len(features) >= data_size or label == -1: #特征数组的长度大于指定长度,则开始存储

array_features = np.array(features) #转换成数组
  array_labels = np.array(labels)

array_features,array_labels = ShuffledData(array_features,array_labels) #打乱数据

lock.acquire() # 锁开始

# 拆分数据为训练集,测试集
  split_x = int(array_features.shape[0] * 0.8)
  train_data, test_data = np.split(array_features, [split_x], axis=0)  # 拆分特征数据集
  train_labels, test_labels = np.split(array_labels, [split_x], axis=0) # 拆分标签数据集

count.value += 1 #下标计数加1
  str_features_name_train = data_name+'_features_train_'+str(count.value)+'.npy'
  str_labels_name_train = data_name+'_labels_train_'+str(count.value)+'.npy'
  str_features_name_test = data_name+'_features_test_'+str(count.value)+'.npy'
  str_labels_name_test = data_name+'_labels_test_'+str(count.value)+'.npy'

lock.release() # 锁释放

np.save(save_path+str_features_name_train,train_data)
  np.save(save_path+str_labels_name_train,train_labels)
  np.save(save_path+str_features_name_test,test_data)
  np.save(save_path+str_labels_name_test,test_labels)
  print(os.getpid(),'save:',str_features_name_train)
  print(os.getpid(),'save:',str_labels_name_train)
  print(os.getpid(),'save:',str_features_name_test)
  print(os.getpid(),'save:',str_labels_name_test)
  features.clear()
  labels.clear()

if label == -1:
  break

# 获取特征向量,传入灰度图
 feature = GetFeature(image)
 features.append(feature)
 labels.append(label)

# # 随机移动4次
 # for itime in range(repate_n):
 #  rMovedImage = randomMoveImage(image)
 #  feature = SimpleGridFeature(rMovedImage) # 简单网格
 #  features.append(feature)
 #  labels.append(label)

print('Process %s is done!' % os.getpid())

if __name__=='__main__':
time_start = time.time() # 开始计时

# 父进程创建Queue,并传给各个子进程:
image_queue = multiprocessing.Queue(maxsize=1000) #队列
lock = multiprocessing.Lock()      #锁
count = multiprocessing.Value('i',0)    #计数

#将图写入队列进程
write_sub_process = multiprocessing.Process(target=read_image_to_queue, args=(image_queue,))

read_sub_processes = []       #读图子线程
for i in range(read_process_n):
 read_sub_processes.append(
  multiprocessing.Process(target=extract_feature, args=(image_queue,lock,count))
 )

# 启动子进程pw,写入:
write_sub_process.start()

# 启动子进程pr,读取:
for p in read_sub_processes:
 p.start()

# 等待进程结束:
write_sub_process.join()
for p in read_sub_processes:
 p.join()

time_end=time.time()
time_h=(time_end-time_start)/3600
print('用时:%.6f 小时'% time_h)
print ("读图提取特征存npy,运行结束!")

来源:https://blog.csdn.net/sxlsxl119/article/details/89340318

标签:python,多进程,提取特征
0
投稿

猜你喜欢

  • 三分钟教会你用Python+OpenCV批量裁剪xml格式标注的图片

    2023-07-09 14:11:01
  • Python3实现简单可学习的手写体识别(实例讲解)

    2021-10-05 14:24:05
  • python爬虫模拟登录之图片验证码实现详解

    2022-06-30 00:07:29
  • JavaScript 经典实例日常收集整理(常用经典)

    2023-09-15 07:40:56
  • 关于JavaScript中string 的replace

    2024-05-05 09:22:42
  • javascript设计模式 – 原型模式原理与应用实例分析

    2024-04-22 13:26:50
  • Linux 安装JDK Tomcat MySQL的教程(使用Mac远程访问)

    2024-01-16 12:12:47
  • Python OpenCV对图像像素进行操作

    2021-02-25 13:02:20
  • Python实现注册登录系统

    2021-10-21 20:01:05
  • PHP函数extension_loaded()用法实例

    2023-08-14 19:11:10
  • CentOS7.5 安装MySql的教程

    2024-01-13 07:08:16
  • Matplotlib中rcParams使用方法

    2022-12-14 03:41:46
  • 在python 中实现运行多条shell命令

    2023-07-30 10:40:11
  • TensorFlow:将ckpt文件固化成pb文件教程

    2021-01-24 11:33:45
  • Python实现的批量修改文件后缀名操作示例

    2021-08-28 08:34:58
  • Selenium元素的常用操作方法分析

    2021-09-21 14:51:54
  • Python asyncore socket客户端开发基本使用教程

    2021-01-25 11:06:39
  • docker下mysql 8.0.20 安装配置方法图文教程

    2024-01-19 05:02:39
  • python logging.info在终端没输出的解决

    2022-04-15 20:39:52
  • Jupyter安装链接aconda实现过程图解

    2022-02-04 21:33:57
  • asp之家 网络编程 m.aspxhome.com