实现批量图片文字识别(python+flask+EasyOCR)
话不多说,向上效果图
1)先说框架版本
为什么要先说框架版本呢,因为我在各种版本中尝试了两天,总算确定了如下版本适合我,至于其他的版本,各位自己去尝试
python 3.9.7
EasyOCR 1.7.2
flask 3.0.3
2)执行操作效果图
2.1)多选文件
2.2)图片预览
2.3)提取选中文件
2.4)提取所有文件
3.上代码
3.1)前端代码index.html
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Python OCR</title><link rel="stylesheet" href="/static/js/bootstrap/css/bootstrap.min.css"><style>.container {max-width: 1024px;margin-top: 20px;}.preview-image {max-width: 300px;max-height: 400px;margin-top: 10px;}.file-input {display: none;}</style>
</head>
<body><div class="container"><div class="row"><div class="col-md-12"><input type="file" id="fileInput" class="file-input" multiple></div></div><div class="row mt-3"><div class="col-md-6"><div class="list-group" id="picListBox"></div><button id="btnExtractSelected" class="btn btn-primary mt-2">提取选中文件</button><button id="btnDeleteSelected" class="btn btn-danger mt-2">删除选中文件</button></div><div class="col-md-6"><img id="lblImage" class="preview-image" src="" alt="暂未选择文件"></div></div><div class="row mt-3"><div class="col-md-12"><textarea id="txtPressInof" class="form-control" rows="5" readonly></textarea></div></div><div class="row mt-3"><div class="col-md-12 text-center"><button id="btnDo" class="btn btn-success">立即提取所有文件</button></div></div></div><script src="/static/js/jQuery-2.2.0.min.js"></script><script>$(document).ready(function () {// 处理文件选择$('#fileInput').change(function () {const files = $(this)[0].files;$('#picListBox').empty();for (let i = 0; i < files.length; i++) {const filename = files[i].name;const item = $('<a href="#" class="list-group-item list-group-item-action">' + filename + '</a>');item.data('file', files[i]);$('#picListBox').append(item);}});// 图片列表项点击事件$('#picListBox').on('click', 'a', function () {// 移除其他列表项的 active 类$('#picListBox a').removeClass('active');// 为当前点击的列表项添加 active 类$(this).addClass('active');const file = $(this).data('file');const reader = new FileReader();reader.onload = function (e) {$('#lblImage').attr('src', e.target.result);};reader.readAsDataURL(file);});// 删除选中文件按钮点击事件$('#btnDeleteSelected').click(function () {const selectedFileItem = $('#picListBox a.active');if (!selectedFileItem.length) {alert('请选择一个文件');return;}// 移除选中的文件项selectedFileItem.remove();// 清空 lblImage$('#lblImage').attr('src', '');$('#lblImage').attr('alt', '暂未选择文件');});// 提取选中文件按钮点击事件$('#btnExtractSelected').click(function () {const selectedFile = $('#picListBox a.active').data('file');if (!selectedFile) {alert('请选择一个文件');return;}const formData = new FormData();formData.append('file', selectedFile);$(this).prop('disabled', true).text('正在提取...');$.ajax({url: '/extract-selected', // 后端接口,处理选中的图片并返回提取结果type: 'POST',data: formData,contentType: false,processData: false,success: function (data) {$('#txtPressInof').val(data);$('#btnExtractSelected').prop('disabled', false).text('提取选中文件');}});});// 立即提取所有文件按钮点击事件$('#btnDo').click(function () {const formData = new FormData();$('#picListBox a').each(function () {formData.append('files', $(this).data('file'));});$(this).prop('disabled', true).text('正在提取...');$.ajax({url: '/process-pictures', // 后端接口,处理所有图片并返回提取结果type: 'POST',data: formData,contentType: false,processData: false,success: function (data) {$('#txtPressInof').val(data);$('#btnDo').prop('disabled', false).text('立即提取所有文件');}});});});</script>
</body>
</html>
3.2)后端代码
from flask import Flask, request, jsonify, render_template
import os
import easyocr
import time
import re
app = Flask(__name__)@app.route('/')
def index():return render_template('index.html')def clean_filename(filename):"""清理文件名,移除非法字符"""return re.sub(r'[\\/*?:"<>|]', "", filename)def get_unique_filename(target_folder, filename):"""确保文件名唯一,避免覆盖同名文件"""base, ext = os.path.splitext(filename)unique_filename = filenamecounter = 1while os.path.exists(os.path.join(target_folder, unique_filename)):unique_filename = f"{base}_{int(time.time())}{ext}" # 添加时间戳确保唯一性counter += 1return unique_filename@app.route('/process-pictures', methods=['POST'])
def process_pictures():# 指定保存图片的目标文件夹target_folder = "D:/OCR_Images" # 请确保该文件夹存在或在代码中创建它if not os.path.exists(target_folder):os.makedirs(target_folder)files = request.files.getlist('files')reader = easyocr.Reader(['ch_sim', 'en'], model_storage_directory='./model')result = ""for i, file in enumerate(files):# 获取文件名(不包含路径)filename = os.path.basename(file.filename)#print(filename)# 清理文件名safe_filename = clean_filename(filename)#print(safe_filename)# 确保文件名唯一unique_filename = get_unique_filename(target_folder, filename)#print(unique_filename)file_path = os.path.join(target_folder, unique_filename)file.save(file_path)result += f'【开始处理{i + 1}-{len(files)}张图片,{unique_filename}】\n'#print(file_path,result)ocr_result = reader.readtext(file_path)restxt = ""for ln in ocr_result:restxt += ln[1]restxt += "\n"result += restxtresult += "全部完成"return jsonify(result)@app.route('/extract-selected', methods=['POST'])
def extract_selected():# 指定保存图片的目标文件夹target_folder = "D:/OCR_Images" # 请确保该文件夹存在或在代码中创建它if not os.path.exists(target_folder):os.makedirs(target_folder)file = request.files['file']# 获取文件名(不包含路径)filename = os.path.basename(file.filename)# 清理文件名safe_filename = clean_filename(filename)# 确保文件名唯一unique_filename = get_unique_filename(target_folder, safe_filename)file_path = os.path.join(target_folder, unique_filename)file.save(file_path)reader = easyocr.Reader(['ch_sim', 'en'], model_storage_directory='./model')ocr_result = reader.readtext(file_path)result = f'【开始处理文件,{unique_filename}】\n'restxt = ""for ln in ocr_result:restxt += ln[1]restxt += "\n"result += restxtreturn jsonify(result)if __name__ == '__main__':app.run(debug=True)
最后提示:文字模型需自行下载保存到本地,我保存在自己的项目下的model目录下
模型的引用语句(有gpu的情况):