ocr_tool.md
2023/10/08 categories:Code| tags:Code|
# -*- coding: utf-8 -*-
import csv
import sys
from pathlib import Path
from poppler import Poppler
from PyQt5 import QtWidgets, QtCore, QtGui
from ocr_view import OCRView
from toolbar import ToolBar
from filelist import FileList
from tesseract_ocr import TesseractOCR
from iamge_processing import ImageProcessing
class MainWindow(QtWidgets.QMainWindow):
def __init__(self):
super(MainWindow, self).__init__()
self.readableImages = ['.pdf', '.bmp', '.jpg', '.jpeg', '.png', '.pdm', '.pgm', '.ppm', '.xbm', '.xpm']
self.copiedChildrenDatas = None
self.copiedChildrenTexts = None
self.copiedChildrenPens = None
self.poppler = Poppler(Path(__file__).resolve().parent)
self.tesseractOCR = TesseractOCR(self, Path(__file__).resolve().parent)
self.resize(800, 600)
self.setWindowTitle('OCR tool')
self.setCentralWidget( QtWidgets.QWidget(self) )
self.ocrView = OCRView( self.centralWidget() )
self.layout = QtWidgets.QVBoxLayout( self.centralWidget() )
self.layout.setContentsMargins(0, 0, 0, 0)
self.layout.addWidget(self.ocrView)
self.toolBar = ToolBar( self.centralWidget() )
self.toolBarDock = QtWidgets.QDockWidget('', self)
self.toolBarDock.setWidget(self.toolBar)
self.toolBarDock.setFloating(False)
self.toolBarDock.setFeatures(QtWidgets.QDockWidget.NoDockWidgetFeatures)
self.toolBarDock.setTitleBarWidget( QtWidgets.QWidget() )
self.addDockWidget(QtCore.Qt.TopDockWidgetArea, self.toolBarDock)
self.fileList = FileList(self)
self.fileListDock = QtWidgets.QDockWidget('File list', self)
self.fileListDock.setWidget(self.fileList)
self.fileListDock.setFeatures(QtWidgets.QDockWidget.NoDockWidgetFeatures)
self.fileListDock.setTitleBarWidget( QtWidgets.QWidget() )
self.addDockWidget(QtCore.Qt.LeftDockWidgetArea, self.fileListDock)
self.progressBar = QtWidgets.QProgressBar()
self.progressBar.hide()
self.statusBar().addPermanentWidget(self.progressBar)
self.toolBar.drawMenu.drawRectButton.clicked.connect(lambda flag : self.ocrView.setDrawRectFlag(flag))
self.toolBar.drawMenu.deleteRectButton.clicked.connect(self.removeSelectedRects)
self.toolBar.drawMenu.drawRectButton.setEnabled(False)
self.toolBar.fileMenu.openButton.clicked.connect(self.fileOpen)
self.toolBar.fileMenu.saveButton.clicked.connect(self.saveFiles)
self.toolBar.fileMenu.clearButton.clicked.connect(self.fileClear)
self.toolBar.imageMenu.recognizeButton.clicked.connect(self.recognizeAll)
self.toolBar.viewMenu.rowHeight.valueChanged.connect(self.rowHeightChanged)
self.toolBar.viewMenu.addToggleViewButton(self.fileListDock.toggleViewAction(), 0, 0, 1, 1)
self.toolBar.ocrMenu.ocrButton.clicked.connect(self.ocrAll)
self.ocrView.mouseLeftReleasedSignal.connect(self.ocrViewClicked)
self.fileList.tableClicked.connect(self.fileListClicked)
self.fileList.childrenCopiedSignal.connect(self.childrenCopied)
self.fileList.childrenPasteSignal.connect(self.childrenPaste)
self.fileList.listUpdated.connect(self.refreshView)
self.fileList.fileDropped.connect(lambda files : self.fileOpen(files))
self.fileList.filePixmapNoneSignal.connect(lambda index : self.fileListClicked(index))
self.tesseractOCR.nextSignal.connect(self.ocrNext)
self.tesseractOCR.finished.connect( lambda : self.progressBar.hide() )
self.rowHeightChanged( self.toolBar.viewMenu.rowHeight.value() )
def childrenCopied(self, childrenDatas, childrenTexts, childrenPens, childrenPenTexts):
self.copiedChildrenDatas = childrenDatas
self.copiedChildrenTexts = childrenTexts
self.copiedChildrenPens = childrenPens
self.copiedChildrenPenTexts = childrenPenTexts
def childrenPaste(self, parentIndexes):
if self.copiedChildrenDatas is None or self.copiedChildrenTexts is None:
return
for parentIndex in parentIndexes:
self.fileList.removeAllChildren(parentIndex)
pixmap = self.fileList.item(parentIndex.row(), 1).data()
if pixmap is None:
filePath = self.fileList.item( parentIndex.row() ).data()
pixmap = self.pathToPixmap( parentIndex, filePath )
pixmapIndex = self.fileList.index(parentIndex.row(), 1)
self.fileList.setData( pixmapIndex, 'image', pixmap )
for data, text, pen, penText in zip(self.copiedChildrenDatas, self.copiedChildrenTexts, self.copiedChildrenPens, self.copiedChildrenPenTexts):
self.fileList.appendChild(parentIndex, text, data, pen, penText)
def closeEvent(self, event):
if self.tesseractOCR.isRunning():
self.tesseractOCR.terminate()
for path in [Path('__temp__.png'), Path('__temp__.txt'), Path('__whitelist__.txt')]:
if path.exists():
path.unlink()
def fileClear(self):
self.fileList.removeAllChildren()
self.ocrView.clear()
self.toolBar.drawMenu.drawRectButton.setEnabled(False)
def fileListClicked(self, clickedIndex):
selectedIndexes = self.fileList.selectedIndexes()
if not clickedIndex.parent().isValid():
self.ocrView.drawRectFlag = self.toolBar.drawMenu.drawRectButton.isChecked() and True
pixmap = self.fileList.item(clickedIndex.row(), 1).data()
if pixmap is None:
filePath = self.fileList.item( clickedIndex.row() ).data()
pixmap = self.pathToPixmap( clickedIndex, filePath )
pixmapIndex = self.fileList.index(clickedIndex.row(), 1)
self.fileList.setData( pixmapIndex, 'image', pixmap )
parentIndex = clickedIndex
pixmap = self.getFilePixmap( parentIndex )
self.toolBar.drawMenu.drawRectButton.setEnabled(True)
else:
self.ocrView.drawRectFlag = False
parentIndex = clickedIndex.parent()
pixmap = self.getFilePixmap( parentIndex )
self.toolBar.drawMenu.drawRectButton.setEnabled(False)
for index in selectedIndexes:
self.fileList.view.selectionModel().select(index, QtCore.QItemSelectionModel.Select)
self.refreshView(pixmap, parentIndex)
def fileOpen(self, files):
if not type(files) is list:
filter = 'Readable files (*' + ' *'.join(self.readableImages) + ')'
files, _ = QtWidgets.QFileDialog.getOpenFileNames(None, 'Open PDF files', '', filter)
filePaths = [ item.data() for item in self.fileList.items() ]
for path in [ Path(f) for f in files]:
if not path in filePaths:
self.fileList.appendFile(path)
def getFilePixmap(self, index):
if self.fileList.item(index.row(), 1).checkState() == QtCore.Qt.Checked:
return self.fileList.item(index.row(), 1).data()
if self.fileList.item(index.row(), 2).checkState() == QtCore.Qt.Checked:
return self.fileList.item(index.row(), 2).data()
return None
def getPen(self, text=None):
def pen(r, g, b, a, w):
return QtGui.QPen( QtGui.QBrush( QtGui.QColor(r, g, b, a) ), w )
if text is None:
text = self.toolBar.drawMenu.rectPenComboBox.currentText()
if text == 'red':
return pen(255, 0, 0, 100, 10), text
elif text == 'green':
return pen( 0, 255, 0, 100, 10), text
elif text == 'blue':
return pen( 0, 0, 255, 100, 10), text
elif text == 'yellow':
return pen(255, 255, 0, 100, 10), text
else:
return pen(255, 0, 0, 100, 10), 'red'
def ocrNext(self, parentRow, childRow, outputText):
self.fileList.item(parentRow).child(childRow, 2).setText(outputText.strip())
self.progressBar.setValue( self.progressBar.value() + 1 )
def ocrAll(self):
whiteListFilePath = Path(__file__).resolve().parent / '__whitelist__.txt'
with open(whiteListFilePath, 'w') as f:
f.write( 'tessedit_char_whitelist ' + self.toolBar.whiteListMenu.text.toPlainText() )
self.progressBar.setValue(0)
self.progressBar.show()
self.progressBar.setMaximum( sum([ item.rowCount() for item in self.fileList.items() ]) )
self.tesseractOCR.whiteListFilePath = whiteListFilePath
self.tesseractOCR.items = self.fileList.items()
self.tesseractOCR.start()
def ocrViewClicked(self, rectF):
if self.ocrView.drawRectFlag:
parentIndex = self.fileList.view.selectedIndexes()[0]
pen, text = self.getPen()
self.fileList.appendChild(parentIndex, self.fileList.rectText(parentIndex), rectF, pen, text)
self.ocrView.appendRect(rectF, pen)
def pathToPixmap(self, index, filePath):
if filePath.suffix == '.pdf':
paths = self.poppler.pdftocairo(filePath, Path('pdftocairo_temp.png'), 300)
if len(paths) > 0:
pixmap = QtGui.QPixmap( str(paths[0]) )
for path in paths:
path.unlink()
return pixmap
else:
return QtGui.QPixmap( str(filePath) )
return None
def refreshView(self, pixmap=None, parentIndex=None):
self.ocrView.clear()
if not pixmap is None:
self.ocrView.addPixmap(pixmap)
if not parentIndex is None:
parentItem = self.fileList.item(parentIndex.row())
selectedRows = list(set([ i.row() for i in self.fileList.selectedIndexes() if i.parent() == parentIndex ]))
for row in selectedRows:
rectItem = parentItem.child(row, 0)
if rectItem.checkState() == QtCore.Qt.Unchecked:
continue
rectF = rectItem.data()
pen, _ = self.getPen('blue')
self.ocrView.appendRect(rectF, pen)
for row in range(self.fileList.rowCount(parentIndex)):
if row in selectedRows:
continue
rectItem = parentItem.child(row, 0)
if rectItem.checkState() == QtCore.Qt.Unchecked:
continue
rectF = rectItem.data()
pen = parentItem.child(row, 3).data()
self.ocrView.appendRect(rectF, pen)
self.ocrView.update()
def recognizeAll(self):
for row in range( self.fileList.rowCount() ):
self.fileList.removeAllChildren( self.fileList.index(row) )
for row in range( self.fileList.rowCount() ):
parentIndex = self.fileList.index(row, 0)
pixmap = self.fileList.item(row, 1).data()
if pixmap is None:
filePath = self.fileList.item(row).data()
pixmap = self.pathToPixmap( parentIndex, filePath )
self.fileList.setFileData(row, 1, 'image', pixmap)
area_range = ( self.toolBar.imageMenu.contourAreaMin.value(), self.toolBar.imageMenu.contourAreaMax.value() )
dilate_size = ( self.toolBar.imageMenu.dilate.value(), self.toolBar.imageMenu.dilate.value() )
image_process = ImageProcessing(pixmap)
edge, rects, crops = image_process.recognize_table(area_range, dilate_size)
self.fileList.setFileData(row, 2, 'edge image', edge, QtCore.Qt.Unchecked)
for data in crops:
pen, text = self.getPen()
self.fileList.appendChild(
parentIndex,
self.fileList.rectText(parentIndex, 'Rect'),
QtCore.QRectF(data[0], data[1], data[2], data[3]),
pen, text
)
self.refreshView(None, None)
def removeSelectedRects(self):
removedRects, parentIndex = self.fileList.removeSelectedRects()
for rect in removedRects:
self.ocrView.removeRect(rect)
pixmap = self.getFilePixmap(parentIndex)
self.refreshView(pixmap, parentIndex)
def rowHeightChanged(self, value):
self.fileList.view.setStyleSheet('QTreeView::item { padding: ' + str(value) + 'px }')
def saveFiles(self):
keys = []
for fileItem in self.fileList.items():
for rectRow in range(fileItem.rowCount()):
key = fileItem.child(rectRow, 0).text().strip()
if not key in keys:
keys.append(key)
csvDatas = [ ['filename'] + keys ]
for i in self.fileList.items():
datas = { i.child(r, 0).text().strip() : i.child(r, 2).text().strip() for r in range(i.rowCount()) }
datasKeys = list(datas.keys())
csvRow = [i.text().strip()]
for key in keys:
if key in datasKeys:
csvRow.append(datas[key])
else:
csvRow.append('')
csvDatas.append(csvRow)
filepath, _ = QtWidgets.QFileDialog.getSaveFileName(None, 'Save CSV file', '', 'CSV file(*.csv)')
if filepath == '':
return
with open(filepath, 'w', newline='') as f:
writer = csv.writer(f)
writer.writerows(csvDatas)
if __name__ == '__main__':
app = QtWidgets.QApplication(sys.argv)
window = MainWindow()
window.show()
app.exec()