ocr_tool.md

2023/10/08 categories:Code| tags:Code|

記事に戻る

# -*- coding: utf-8 -*-
import csv
import sys
from pathlib import Path
from poppler import Poppler
from PyQt5 import QtWidgets, QtCore, QtGui

from ocr_view import OCRView
from toolbar import ToolBar
from filelist import FileList
from tesseract_ocr import TesseractOCR
from iamge_processing import ImageProcessing

class MainWindow(QtWidgets.QMainWindow):
    def __init__(self):
        super(MainWindow, self).__init__()
        self.readableImages = ['.pdf', '.bmp', '.jpg', '.jpeg', '.png', '.pdm', '.pgm', '.ppm', '.xbm', '.xpm']
        self.copiedChildrenDatas = None
        self.copiedChildrenTexts = None
        self.copiedChildrenPens  = None
        self.poppler = Poppler(Path(__file__).resolve().parent)
        self.tesseractOCR = TesseractOCR(self, Path(__file__).resolve().parent)
        
        self.resize(800, 600)
        self.setWindowTitle('OCR tool')
        self.setCentralWidget( QtWidgets.QWidget(self) )
        self.ocrView = OCRView( self.centralWidget() )
        
        self.layout = QtWidgets.QVBoxLayout( self.centralWidget() )
        self.layout.setContentsMargins(0, 0, 0, 0)
        self.layout.addWidget(self.ocrView)

        self.toolBar = ToolBar( self.centralWidget() )
        self.toolBarDock = QtWidgets.QDockWidget('', self)
        self.toolBarDock.setWidget(self.toolBar)
        self.toolBarDock.setFloating(False)
        self.toolBarDock.setFeatures(QtWidgets.QDockWidget.NoDockWidgetFeatures)
        self.toolBarDock.setTitleBarWidget( QtWidgets.QWidget() )
        self.addDockWidget(QtCore.Qt.TopDockWidgetArea, self.toolBarDock)

        self.fileList = FileList(self)
        self.fileListDock = QtWidgets.QDockWidget('File list', self)
        self.fileListDock.setWidget(self.fileList)
        self.fileListDock.setFeatures(QtWidgets.QDockWidget.NoDockWidgetFeatures)
        self.fileListDock.setTitleBarWidget( QtWidgets.QWidget() )
        self.addDockWidget(QtCore.Qt.LeftDockWidgetArea, self.fileListDock)

        self.progressBar = QtWidgets.QProgressBar()
        self.progressBar.hide()
        self.statusBar().addPermanentWidget(self.progressBar)
        
        self.toolBar.drawMenu.drawRectButton.clicked.connect(lambda flag : self.ocrView.setDrawRectFlag(flag))
        self.toolBar.drawMenu.deleteRectButton.clicked.connect(self.removeSelectedRects)
        self.toolBar.drawMenu.drawRectButton.setEnabled(False)
        self.toolBar.fileMenu.openButton.clicked.connect(self.fileOpen)
        self.toolBar.fileMenu.saveButton.clicked.connect(self.saveFiles)
        self.toolBar.fileMenu.clearButton.clicked.connect(self.fileClear)
        self.toolBar.imageMenu.recognizeButton.clicked.connect(self.recognizeAll)
        self.toolBar.viewMenu.rowHeight.valueChanged.connect(self.rowHeightChanged)
        self.toolBar.viewMenu.addToggleViewButton(self.fileListDock.toggleViewAction(), 0, 0, 1, 1)
        self.toolBar.ocrMenu.ocrButton.clicked.connect(self.ocrAll)
        self.ocrView.mouseLeftReleasedSignal.connect(self.ocrViewClicked)
        self.fileList.tableClicked.connect(self.fileListClicked)
        self.fileList.childrenCopiedSignal.connect(self.childrenCopied)
        self.fileList.childrenPasteSignal.connect(self.childrenPaste)
        self.fileList.listUpdated.connect(self.refreshView)
        self.fileList.fileDropped.connect(lambda files : self.fileOpen(files))
        self.fileList.filePixmapNoneSignal.connect(lambda index : self.fileListClicked(index))
        self.tesseractOCR.nextSignal.connect(self.ocrNext)
        self.tesseractOCR.finished.connect( lambda : self.progressBar.hide() )

        self.rowHeightChanged( self.toolBar.viewMenu.rowHeight.value() )

    def childrenCopied(self, childrenDatas, childrenTexts, childrenPens, childrenPenTexts):
        self.copiedChildrenDatas = childrenDatas
        self.copiedChildrenTexts = childrenTexts
        self.copiedChildrenPens = childrenPens
        self.copiedChildrenPenTexts = childrenPenTexts

    def childrenPaste(self, parentIndexes):
        if self.copiedChildrenDatas is None or self.copiedChildrenTexts is None:
            return
        for parentIndex in parentIndexes:
            self.fileList.removeAllChildren(parentIndex)

            pixmap = self.fileList.item(parentIndex.row(), 1).data()
            if pixmap is None:
                filePath = self.fileList.item( parentIndex.row() ).data()
                pixmap = self.pathToPixmap( parentIndex, filePath )
                pixmapIndex = self.fileList.index(parentIndex.row(), 1)
                self.fileList.setData( pixmapIndex, 'image', pixmap )

            for data, text, pen, penText in zip(self.copiedChildrenDatas, self.copiedChildrenTexts, self.copiedChildrenPens, self.copiedChildrenPenTexts):
                self.fileList.appendChild(parentIndex, text, data, pen, penText)

    def closeEvent(self, event):
        if self.tesseractOCR.isRunning():
            self.tesseractOCR.terminate()
            for path in [Path('__temp__.png'), Path('__temp__.txt'), Path('__whitelist__.txt')]:
                if path.exists():
                    path.unlink()

    def fileClear(self):
        self.fileList.removeAllChildren()
        self.ocrView.clear()
        self.toolBar.drawMenu.drawRectButton.setEnabled(False)

    def fileListClicked(self, clickedIndex):
        selectedIndexes = self.fileList.selectedIndexes()

        if not clickedIndex.parent().isValid():
            self.ocrView.drawRectFlag = self.toolBar.drawMenu.drawRectButton.isChecked() and True

            pixmap = self.fileList.item(clickedIndex.row(), 1).data()
            if pixmap is None:
                filePath = self.fileList.item( clickedIndex.row() ).data()
                pixmap = self.pathToPixmap( clickedIndex, filePath )
                pixmapIndex = self.fileList.index(clickedIndex.row(), 1)
                self.fileList.setData( pixmapIndex, 'image', pixmap )

            parentIndex = clickedIndex
            pixmap = self.getFilePixmap( parentIndex )
            self.toolBar.drawMenu.drawRectButton.setEnabled(True)
        else:
            self.ocrView.drawRectFlag = False
            parentIndex = clickedIndex.parent()
            pixmap = self.getFilePixmap( parentIndex )
            self.toolBar.drawMenu.drawRectButton.setEnabled(False)

        for index in selectedIndexes:
            self.fileList.view.selectionModel().select(index, QtCore.QItemSelectionModel.Select)

        self.refreshView(pixmap, parentIndex)

    def fileOpen(self, files):
        if not type(files) is list:
            filter = 'Readable files (*' + ' *'.join(self.readableImages) + ')'
            files, _ = QtWidgets.QFileDialog.getOpenFileNames(None, 'Open PDF files', '', filter)
        filePaths = [ item.data() for item in self.fileList.items() ]
        for path in [ Path(f) for f in files]:
            if not path in filePaths:
                self.fileList.appendFile(path)

    def getFilePixmap(self, index):
        if self.fileList.item(index.row(), 1).checkState() == QtCore.Qt.Checked:
            return self.fileList.item(index.row(), 1).data()
        if self.fileList.item(index.row(), 2).checkState() == QtCore.Qt.Checked:
            return self.fileList.item(index.row(), 2).data()
        return None

    def getPen(self, text=None):
        def pen(r, g, b, a, w):
            return QtGui.QPen( QtGui.QBrush( QtGui.QColor(r, g, b, a) ), w )

        if text is None:
            text = self.toolBar.drawMenu.rectPenComboBox.currentText()
        if text == 'red': 
            return pen(255,   0,   0, 100, 10), text 
        elif text == 'green': 
            return pen(  0, 255,   0, 100, 10), text
        elif text == 'blue': 
            return pen(  0,   0, 255, 100, 10), text
        elif text == 'yellow': 
            return pen(255, 255,   0, 100, 10), text
        else:
            return pen(255,   0,   0, 100, 10), 'red'

    def ocrNext(self, parentRow, childRow, outputText):
        self.fileList.item(parentRow).child(childRow, 2).setText(outputText.strip())
        self.progressBar.setValue( self.progressBar.value() + 1 )

    def ocrAll(self):
        whiteListFilePath = Path(__file__).resolve().parent / '__whitelist__.txt'
        with open(whiteListFilePath, 'w') as f:
            f.write( 'tessedit_char_whitelist ' + self.toolBar.whiteListMenu.text.toPlainText() )
        
        self.progressBar.setValue(0)
        self.progressBar.show()
        self.progressBar.setMaximum( sum([ item.rowCount() for item in self.fileList.items() ]) )
        self.tesseractOCR.whiteListFilePath = whiteListFilePath
        self.tesseractOCR.items = self.fileList.items()
        self.tesseractOCR.start()

    def ocrViewClicked(self, rectF):
        if self.ocrView.drawRectFlag:
            parentIndex = self.fileList.view.selectedIndexes()[0]
            pen, text = self.getPen()
            self.fileList.appendChild(parentIndex, self.fileList.rectText(parentIndex), rectF, pen, text)
            self.ocrView.appendRect(rectF, pen)

    def pathToPixmap(self, index, filePath):
        if filePath.suffix == '.pdf':
            paths = self.poppler.pdftocairo(filePath, Path('pdftocairo_temp.png'), 300)
            if len(paths) > 0:
                pixmap = QtGui.QPixmap( str(paths[0]) )
                for path in paths:
                    path.unlink()
                return pixmap
        else:
            return QtGui.QPixmap( str(filePath) )
        return None

    def refreshView(self, pixmap=None, parentIndex=None):
        self.ocrView.clear()

        if not pixmap is None:
            self.ocrView.addPixmap(pixmap)
        
        if not parentIndex is None:
            parentItem = self.fileList.item(parentIndex.row())
            selectedRows = list(set([ i.row() for i in self.fileList.selectedIndexes() if i.parent() == parentIndex ]))

            for row in selectedRows:
                rectItem = parentItem.child(row, 0)
                if rectItem.checkState() == QtCore.Qt.Unchecked:
                    continue
                rectF = rectItem.data()
                pen, _ = self.getPen('blue')
                self.ocrView.appendRect(rectF, pen)
                
            for row in range(self.fileList.rowCount(parentIndex)):
                if row in selectedRows:
                    continue
                rectItem = parentItem.child(row, 0)
                if rectItem.checkState() == QtCore.Qt.Unchecked:
                    continue
                rectF = rectItem.data()
                pen = parentItem.child(row, 3).data()
                self.ocrView.appendRect(rectF, pen)
        
        self.ocrView.update()

    def recognizeAll(self):
        for row in range( self.fileList.rowCount() ):
            self.fileList.removeAllChildren( self.fileList.index(row) )

        for row in range( self.fileList.rowCount() ):
            parentIndex = self.fileList.index(row, 0)
            pixmap = self.fileList.item(row, 1).data()
            if pixmap is None:
                filePath = self.fileList.item(row).data()
                pixmap = self.pathToPixmap( parentIndex, filePath )
                self.fileList.setFileData(row, 1, 'image', pixmap)

            area_range = ( self.toolBar.imageMenu.contourAreaMin.value(), self.toolBar.imageMenu.contourAreaMax.value() )
            dilate_size = ( self.toolBar.imageMenu.dilate.value(), self.toolBar.imageMenu.dilate.value() )
            image_process = ImageProcessing(pixmap)
            edge, rects, crops = image_process.recognize_table(area_range, dilate_size)

            self.fileList.setFileData(row, 2, 'edge image', edge, QtCore.Qt.Unchecked)

            for data in crops:
                pen, text = self.getPen()
                self.fileList.appendChild(
                    parentIndex, 
                    self.fileList.rectText(parentIndex, 'Rect'), 
                    QtCore.QRectF(data[0], data[1], data[2], data[3]), 
                    pen, text
                )
            
        self.refreshView(None, None)

    def removeSelectedRects(self):
        removedRects, parentIndex = self.fileList.removeSelectedRects()
        for rect in removedRects:
            self.ocrView.removeRect(rect)
        pixmap = self.getFilePixmap(parentIndex)
        self.refreshView(pixmap, parentIndex)
        
    def rowHeightChanged(self, value):
        self.fileList.view.setStyleSheet('QTreeView::item { padding: ' + str(value) + 'px }')

    def saveFiles(self):
        keys = []
        for fileItem in self.fileList.items():
            for rectRow in range(fileItem.rowCount()):
                key = fileItem.child(rectRow, 0).text().strip()
                if not key in keys:
                    keys.append(key)

        csvDatas = [ ['filename'] + keys ]
        for i in self.fileList.items():
            datas = { i.child(r, 0).text().strip() : i.child(r, 2).text().strip() for r in range(i.rowCount()) }
            datasKeys = list(datas.keys())
            csvRow = [i.text().strip()]
            for key in keys:
                if key in datasKeys:
                    csvRow.append(datas[key])
                else:
                    csvRow.append('')
            csvDatas.append(csvRow)

        filepath, _ = QtWidgets.QFileDialog.getSaveFileName(None, 'Save CSV file', '', 'CSV file(*.csv)')
        if filepath == '':
            return
        with open(filepath, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerows(csvDatas)

if __name__ == '__main__':
    app = QtWidgets.QApplication(sys.argv)
    window = MainWindow()
    window.show()
    app.exec()

Share post

Related Posts

コメント