Featured image of post 摆脱pysimplegui依赖,转向QT的拥抱

摆脱pysimplegui依赖,转向QT的拥抱

基于QT多线程,实现运行程序不卡顿,成功解决pyinstaller等打包工具的exe程序无响应的问题

前言

由于户口本信息识别的ocr算法需要用到百度家的飞桨框架,他们家的识别理论上目前也是国内做的最好的了,之前在使用pysimplegui打包成可执行文件(exe)时,发现一运行起来就会将整个界面锁死,目前也没有找到在simplegui上的多线程解法。想想为了以后程序的集成, qt这关还是得走一走的,索性学习了一段时间的pyqt5,还好现在资料还多一些,而且还得到了大佬的帮助,不然多线程这个是真的麻烦。现在就介绍本次户口本信息识别的技术流程。

主要思路

gui的设计基本是依靠Qtdesigner,然后借助自动编译程序将ui文件转为py文件,相关的教程其实网上很多,这里就不重复赘述了。本文主要基于重写Qthread类,继承基本Qwidget完成界面不卡顿的实现,还使用了归一化数量实现百分比进度条。识别方法上,主要使用工业级框架paddlehub,在cv的特征匹配下,将信息准确映射,这个也是借助了一位大佬的帮助才得以实现,不然纯坐标的信息分类能让我敲上上千行代码。。。

GUI设计

先给出gui的.py代码:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'd:\Data\Codes\2022\ocrhukou\untitled.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again.  Do not edit this file unless you know what you are doing.


from PyQt5 import QtCore, QtGui, QtWidgets


class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(800, 600)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget)
        self.verticalLayout.setObjectName("verticalLayout")
        self.gridLayout = QtWidgets.QGridLayout()
        self.gridLayout.setObjectName("gridLayout")
        self.label = QtWidgets.QLabel(self.centralwidget)
        self.label.setObjectName("label")
        self.gridLayout.addWidget(self.label, 0, 0, 1, 1)
        self.lineEdit = QtWidgets.QLineEdit(self.centralwidget)
        self.lineEdit.setObjectName("lineEdit")
        self.gridLayout.addWidget(self.lineEdit, 0, 1, 1, 1)
        self.pushButton = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton.setObjectName("pushButton")
        self.gridLayout.addWidget(self.pushButton, 0, 2, 1, 1)
        self.label_2 = QtWidgets.QLabel(self.centralwidget)
        self.label_2.setObjectName("label_2")
        self.gridLayout.addWidget(self.label_2, 1, 0, 1, 1)
        self.lineEdit_2 = QtWidgets.QLineEdit(self.centralwidget)
        self.lineEdit_2.setObjectName("lineEdit_2")
        self.gridLayout.addWidget(self.lineEdit_2, 1, 1, 1, 1)
        self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_2.setObjectName("pushButton_2")
        self.gridLayout.addWidget(self.pushButton_2, 1, 2, 1, 1)
        self.label_3 = QtWidgets.QLabel(self.centralwidget)
        self.label_3.setObjectName("label_3")
        self.gridLayout.addWidget(self.label_3, 2, 0, 1, 1)
        self.lineEdit_3 = QtWidgets.QLineEdit(self.centralwidget)
        self.lineEdit_3.setObjectName("lineEdit_3")
        self.gridLayout.addWidget(self.lineEdit_3, 2, 1, 1, 1)
        self.pushButton_3 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_3.setObjectName("pushButton_3")
        self.gridLayout.addWidget(self.pushButton_3, 2, 2, 1, 1)
        self.verticalLayout.addLayout(self.gridLayout)
        self.horizontalLayout = QtWidgets.QHBoxLayout()
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.pushButton_4 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_4.setObjectName("pushButton_4")
        self.horizontalLayout.addWidget(self.pushButton_4)
        self.pushButton_5 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_5.setObjectName("pushButton_5")
        self.horizontalLayout.addWidget(self.pushButton_5)
        self.verticalLayout.addLayout(self.horizontalLayout)
        self.label_4 = QtWidgets.QLabel(self.centralwidget)
        self.label_4.setObjectName("label_4")
        self.verticalLayout.addWidget(self.label_4)
        self.textEdit = QtWidgets.QTextEdit(self.centralwidget)
        self.textEdit.setObjectName("textEdit")
        self.verticalLayout.addWidget(self.textEdit)
        self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
        self.horizontalLayout_2.setObjectName("horizontalLayout_2")
        self.label_5 = QtWidgets.QLabel(self.centralwidget)
        self.label_5.setObjectName("label_5")
        self.horizontalLayout_2.addWidget(self.label_5)
        self.progressBar = QtWidgets.QProgressBar(self.centralwidget)
        self.progressBar.setProperty("value", 0)
        self.progressBar.setObjectName("progressBar")
        self.horizontalLayout_2.addWidget(self.progressBar)
        self.verticalLayout.addLayout(self.horizontalLayout_2)
        MainWindow.setCentralWidget(self.centralwidget)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)

        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)

    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "户口本批量识别"))
        self.label.setText(_translate("MainWindow", "待处理路径"))
        self.pushButton.setText(_translate("MainWindow", "选择文件夹"))
        self.label_2.setText(_translate("MainWindow", "excel保存路径"))
        self.pushButton_2.setText(_translate("MainWindow", "选择保存路径"))
        self.label_3.setText(_translate("MainWindow", "识别模板"))
        self.pushButton_3.setText(_translate("MainWindow", "模板文件选择"))
        self.pushButton_4.setText(_translate("MainWindow", "开始识别"))
        self.pushButton_5.setText(_translate("MainWindow", "停止识别"))
        self.label_4.setText(_translate("MainWindow", "识别信息输出框"))
        self.label_5.setText(_translate("MainWindow", "进度条"))

程序界面

不得不说qt5确实比pysimplegui优雅很多,在gui的细节上充满诚意,当然这体量大了很多,两者并没有什么可比性。代码基本就是转换后直接用,并没有做修改,除非还有一些界面上细节函数的处理,那可以随便加上,比如在运行时某个按钮将其设置为不可点击的状态,可以通过判断if来设置该button的enable,下面给出一个我使用过的小函数:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def caigou_selec_change(self, s ):
    self.comboBox_2.clear()
    if s == "采购公告":
        self.comboBox_2.addItems(["公开招标",
                                    "邀请招标", 
                                    "竞争性谈判", 
                                    "询价", 
                                    "单一来源", 
                                    "竞争性磋商", 
                                    "电子卖场", 
                                    "其他"])
        
        
    elif s == "结果公告":
        self.comboBox_2.addItems(["中标(成交)结果公告",
                                    "中标公告",
                                    "成交公告",
                                    "邀请招标资格入围公告",
                                    "废标公告",
                                    "终止公告",
                                    "公开招标资格入围公告",
                                    "其他采购结果公告",
                                    "结果公告总页"
                                    ])  # 一次添加多个控件
    elif s == "合同公告":
        self.comboBox_2.addItem("采购合同公告")
    else:
        self.comboBox_2.addItem("请重选")   

这个是我在做某个政府网站相关的信息爬取的时候使用的小函数,大意是这样,其他细节还需要读者上网自己学习一下,当然本次任务并没有需要到这些函数。

重写多线程类

QT本身自带多线程类的实现,但是需要开发人员继承后重写,这里给出代码:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

class New_Thread(QThread):
    finishSignal = pyqtSignal(int)
    def __init__(self, deal_path, excel_path, mobanfile):
        super(New_Thread, self).__init__()
        self.input_file_path = deal_path
        self.excel_save_path = excel_path
        self.moban = mobanfile

    
    def run(self):
        files_len = 0
        deal_pic_counts = 0
        biaoshima = 0
        sheet1, style_content, excelfile = self.excel_style()
        allpic_num = 0
        for root, dirs, files in os.walk(self.input_file_path):
            for file in files:
                if file.lower().split('.')[-1]=='jpg':
                    files_len+=1
            
        for dir in os.listdir(self.input_file_path):
            jpgfolder = os.path.join(self.input_file_path, dir)
            jpglist = os.listdir(jpgfolder)
            
            #预定义参数
            sort_num = 2
            for jpgfile_name in jpglist:
                if jpgfile_name.lower().split('.')[-1]=='jpg':
                    allpic_num += 1
                    jpgfilepath = os.path.join(jpgfolder, jpgfile_name)
                    infor = self.get_infor(jpgfilepath, self.moban)
                    if infor['Name']!='' and infor['Gender']!='':
                        deal_pic_counts += 1
                        # 内容开始
                        if infor['Relationship']=='户主':
                            sheet1.write(deal_pic_counts, 1, '1' , style_content)
                            biaoshima = infor['ID_number'][-6:]
                        else:
                            sheet1.write(deal_pic_counts, 1, sort_num , style_content)
                            sort_num = sort_num + 1
                        sheet1.write(deal_pic_counts, 2, infor['Name'], style_content)
                        sheet1.write(deal_pic_counts, 3, infor['Gender'], style_content)
                        sheet1.write(deal_pic_counts, 4, infor['Relationship'], style_content)
                        sheet1.write(deal_pic_counts, 5, infor['ID_number'], style_content)
                        sheet1.write(deal_pic_counts, 6, infor['Native_place'], style_content)
                        print("the" ,deal_pic_counts ,"picture")
                    percent_num = (allpic_num/files_len)*100
                    self.finishSignal.emit(percent_num)       
            for picnum in range(deal_pic_counts - (sort_num ) + 2, deal_pic_counts+1):
                sheet1.write(picnum, 0, biaoshima, style_content)     
        excelfile.save(self.excel_save_path)    

这是最基本的写法,主要必须的时继承Qthread,并内置init和run函数,当然run所需要使用的辅助函数,可以在线程类里接着写,并不需要写到界面类里。

paddlehub识别

这个算法其实是大佬直接给我的,我也只是提前读者一步学习(笑死),直接的算法我就不透露了,思路主要是基于基本的模板,将户口本信息特征匹配到模板的固定位置,这样方便信息归类。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

    def cv_imread(self, filePath):
        cv_img=cv2.imdecode(np.fromfile(filePath,dtype=np.uint8),-1)
        return cv_img


    def cv_imread2(self, filePath):
        cv_img=cv2.imdecode(np.fromfile(filePath,dtype=np.uint8),-1)
        cv_img=cv2.cvtColor(cv_img,cv2.COLOR_RGB2GRAY)
        return cv_img

    def excel_style(self):
        # 创建excel对象
        file = xlwt.Workbook()
        # 添加sheet页
        sheet1 = file.add_sheet(u'sheet1', cell_overwrite_ok=True)
        font = xlwt.Font()
        font.height = 20 * 12
        font.name = "宋体"

        alignment = xlwt.Alignment()
        alignment.horz = 0x02
        alignment.vert = 0x01

        style_content = xlwt.XFStyle()
        style_content.font = font
        style_content.alignment = alignment

        borders_content = style_content.borders
        borders_content.left = 1
        borders_content.right = 1
        borders_content.top = 1
        borders_content.bottom = 1
        borders_content.left_colour = 0x08
        borders_content.right_colour = 0x08
        borders_content.top_colour = 0x08
        borders_content.bottom_colour = 0x08

        # 标题名称
        title = [
            '标识',
            '顺序号',
            '姓名', 
            '性别', 
            '与户主关系', 
            '身份证号',
            '住址'
            ]
        
        #写入标题sheet1
        for j in range(0, len(title)):
        #     # 设置单元格宽度
            if j==5:
                sheet1.col(j).width = 8000
            elif j==4:
                sheet1.col(j).width = 8000
            elif j==6:
                sheet1.col(j).width = 8000
            # else:
            #     sheet1.col(j).width = 4000
        #     # 单元格写入内容
            sheet1.write(0, j, title[j], style_content)
        
        return sheet1, style_content, file

    def get_infor(self, input_path, moban,  use_gpu=False):
        img = self.cv_imread(input_path)
        tempalte_img = self.cv_imread2(moban) 
        scale = 2 / 3
        flag, img_out = self.feature_match(tempalte_img, img, scale)
        if flag:
            # ocr = hub.Module(name="chinese_ocr_db_crnn_server")
            ocr = hub.Module(name="chinese_ocr_db_crnn_server", enable_mkldnn=True)
            result = ocr.recognize_text(images=[img_out], use_gpu=use_gpu)
            out_dict = self.get_registration_card_information(result)
        else:
            out_dict = {'Name': '',
                        'Name_used_before': '',
                        'Relationship': '',
                        'Gender': '',
                        'Nation': '',
                        'Birthday': '',
                        'Birth_place': '',
                        'Native_place': '',
                        'ID_number': '',
                        'Code': '1'}
        return out_dict


    

开头的两个imread是因为直接使用cv.imread在遇到中文路径会报错,所以一般折中使用辅助函数读取图片。excel_style基于xlwt记录识别出来的信息,以excel形式导出,算是我很常用的数据整合的函数。get_infor主要是利用paddlehub的函数,非常轻松就能对图片所有出现的文本进行识别并给出坐标,其实和之前我写过的一篇文章里用的paddleocr算是师承一脉。其他的就不说了,大佬是使用特征匹配获取每个信息的相对位置,其实和用坐标范围判别是差不多的,而坐标范围判别在我其他文章中也写有具体算法。仔细的读者会发现,上文代码其实函数缩进多了一个tab,这个其实是因为这些函数也是放在New_Thread类里的。

主窗口类

核心算法算是完成了,接下来就是将上文内容利用起来。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from PyQt5.QtWidgets import QMainWindow, QApplication, QFileDialog, QMessageBox
from PyQt5.QtCore import QThread, pyqtSignal
from Ui_untitled import Ui_MainWindow
import os, xlwt, sys, copy, re, xlwt, cv2
import numpy as np
import paddlehub as hub


class MainWindow(QMainWindow, Ui_MainWindow):

    def __init__(self):
        super(MainWindow, self).__init__()
        self.setupUi(self)
        self.pushButton_4.clicked.connect(self.Start)
        self.pushButton_5.clicked.connect(self.Stop)
        self.pushButton.clicked.connect(self.SelectFolder)
        self.pushButton_2.clicked.connect(self.save_excel)
        self.pushButton_3.clicked.connect(self.select_moban)
    
    def Stop(self):
        print('End')
        self.thread.terminate() 
        
    def Start(self):   
        dealpath = self.lineEdit.text()
        excelpath = self.lineEdit_2.text()
        mobanfile = self.lineEdit_3.text()
        if dealpath=='':
            self.msg6()
        
        elif self.lineEdit_2.text()=='':
            self.msg4()
            desktop = os.path.join(os.path.expanduser("~"), 'Desktop')
            basename = os.path.basename(dealpath)
            excelpath = os.path.join(desktop, basename+'.xls')
            self.lineEdit_2.setText(str(excelpath))
            
        elif mobanfile=='':
            self.msg7()
            
            # excelpath = self.lineEdit_2.text()
        
        if dealpath!='' and excelpath!='' and mobanfile!='':
            self.thread = New_Thread(dealpath, excelpath, mobanfile)  
            self.thread.start()
            self.thread.finishSignal.connect(self.Change)
        # self.pushButton_4
    def Change(self, msg):
        # print(msg)
        # self.label.setText(str(msg))
        self.progressBar.setValue(msg)
        if msg==100:
            self.msg5()    
        
    def msg5(self):
        QMessageBox.about(self,"完成","识别任务已完成")
        self.progressBar.setValue(0)    
        
    def SelectFolder(self):
        directory1 = QFileDialog.getExistingDirectory(None,"选取文件夹","C:/")
        self.lineEdit.setText(str(directory1))    
        
    def msg4(self):
        QMessageBox.critical(self,"警告!","excel保存路径为空,已设置默认输出路径",QMessageBox.Yes|QMessageBox.No,QMessageBox.Yes)
        
    def msg6(self):
        QMessageBox.critical(self,"错误!","待处理文件夹为空,请输入路径",QMessageBox.Yes|QMessageBox.No,QMessageBox.Yes)
    def msg7(self):
        QMessageBox.critical(self,"错误!","模板文件为空,该路径为必填项",QMessageBox.Yes|QMessageBox.No,QMessageBox.Yes)
       
    def save_excel(self):
        fileName2, ok2 = QFileDialog.getSaveFileName(None, "文件保存",  "C:/")
        self.lineEdit_2.setText(str(fileName2))
        
    def select_moban(self):
        fileName1, filetype = QFileDialog.getOpenFileName(self,
                                    "选取模板",
                                    "C:/",
                                    "Image Files (*.jpg)")   #设置文件扩展名过滤,注意用双分号间隔
        self.lineEdit_3.setText(str(fileName1))    
        
if __name__ == '__main__':
    app = QApplication(sys.argv)
    main_window = MainWindow()
    main_window.show()
    sys.exit(app.exec_())

这个类也算是通用写法了,除了init函数,其他都是自定义函数,如每个按钮绑定的函数,以及一些警告的完善等等。

总结

其实也是得到群友的一些点拨,这个多线程自己做起来还真抽象,网上某些博客写的又太误导,不过真正实现了以后还是挺有成就感。最有收获的当然是大佬给的户口本识别函数,可惜一些原因不能和读者分享,主要思路也是给出来了,有兴趣的话自己实现一下也好。

Author by Jerrychoices
Built with Hugo
主题 StackJimmy 设计

本站访客数人次 总访问量 本文阅读量