还是基于医院检查单的需求,后期会有大量的样本图片,需要分别归类去识别验证,通过程序自动分类减少下人工分类的成本,简单代码实现下。
先通过百度普通OCR识别图片文字信息,对文字信息遍历,看是否有关联的关键字信息。如果识别不行,在本地的tesseract在识别一次,识别不出在统一放一个目录中,人工处理。
分类前:
分类后:
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2022-7-8 上午 9:09 # @Author : damon.guo # @File : picToClass.py # @Software: PyCharmimport cv2from PIL import Imageimport pytesseractimport os,sysimport shutilfrom aip import AipOcrgettaglist = ["报告单", "报告", "医院", "卫生院", "中心"] # 识别关键字def picToClass(picturePath): # imageCode = cv2.imread(r"F:\xbl\11\\12.jpg") # 图像增强,二值化 # picturePath=r"F:\xbl\11\\preclass\\1.jpg" # 图像增强,二值化 image = cv2.imread(picturePath) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5, 5), 0) edged = cv2.Canny(gray, 75, 200) tmpfilename = r"F:\xbl\11\\class\\tmp.jpg" # 临时文件 cv2.imwrite(tmpfilename, edged) text = pytesseract.image_to_string(Image.open(tmpfilename), lang='chi_sim') print("二次识别",picturePath) for i in text.split("\n"): index = getTagIndex(i) if index != -1: news = i[:index] print("nes", news) return news.strip(" ").strip(":").strip("%") # 还是识别不出来,统一放一个目录 return "other"def getclient(APP_ID, API_KEY, SECRET_KEY): """ 你的 APPID AK SK """ # APP_ID = '你的 App ID' # API_KEY = '你的 Api Key' # SECRET_KEY = '你的 Secret Key' client = AipOcr(APP_ID, API_KEY, SECRET_KEY) return clientdef get_file_content(imagepath): with open(imagepath, 'rb') as fp: return fp.read()def ocrsdk(imagepath): #先使用百度sdk识别 client = getclient("xx", "xx", "xx") image = get_file_content(imagepath) # 必填参数 options = {} # options["templateSign"] = "" # 模板id # options["detect_direction"] = "true" #是否自动转向 # options["probability"] = "true" # options["language_type"] = "CHN_ENG" # 识别语言 res_image = client.basicAccurate(image,options) print("识别图片:",image) tag = baiduocrreslut(res_image) if tag == "other": # 无法识别在 # 在本地ocr识别一次 tag = picToClass(imagepath) return tagdef getTagIndex(strs): # 识别关键字在位置,取到索引,并向后多取2位 for i in gettaglist: if i in strs: index = strs.index(i) return index+2 return -1def copyFile(src,dst): shutil.copy(src, dst)def baiduocrreslut(res): # 百度识别结果解析, wordlist = res["words_result"] for w in wordlist: for i in gettaglist: if i in w["words"]: print(w["words"]) if len(w["words"]) >= 4: return w['words'] return "other"def main(): path = r"F:\xbl\preclass" # 待分类图片目录 classpath = r"F:\xbl\class" # 分类后主目录 listf = os.listdir(path) for i in listf: imagepath = os.path.join(path,i) tag = ocrsdk(imagepath) tagpath = os.path.join(classpath,tag) if not os.path.exists(tagpath): os.makedirs(tagpath) newimagepath = os.path.join(tagpath,i) if not os.path.exists(newimagepath): shutil.copy(imagepath,newimagepath)if __name__ == "__main__": main()