IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> Python知识库 -> Python 腾讯云 OCR识别 -> 正文阅读

[Python知识库]Python 腾讯云 OCR识别

网上找了下Python 做OCR识别的,如果是用本地识别,通常用 tesseract,
但是好像识别率不高,

找了下还是用腾讯云的 OCR 实现比较快,而且每个月有1000的免费额度,但是网上的示例不多,用Python的还是Python2.7,代码也都是拼接的乱七八糟, 花了点时间改成Python3的

Python 3.10

import hashlib, hmac, json, os, sys, time
import requests

from datetime import datetime


def heads():
    # 密钥参数
    secret_id = "AKIDI1SfDx6vqcAHtnG14JjQpC*****"
    secret_key = "OH3ZSLemhaOs6yGqRSJwPFwlY****"

    service = "ocr"
    host = "ocr.tencentcloudapi.com"
    endpoint = "https://" + host
    region = "ap-guangzhou"
    action = "GeneralBasicOCR"
    version = "2018-11-19"
    algorithm = "TC3-HMAC-SHA256"
    timestamp = int(time.time())
    # timestamp = 1551113065
    date = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")
    params = {
        "ImageUrl": "https://main.qcloudimg.com/raw/929bf9094ce2012473bcc4233a383e01.png"
    }

    # ************* 步骤 1:拼接规范请求串 *************
    http_request_method = "POST"
    canonical_uri = "/"
    canonical_querystring = ""
    ct = "application/json; charset=utf-8"
    payload = json.dumps(params)
    canonical_headers = "content-type:%s\nhost:%s\n" % (ct, host)
    signed_headers = "content-type;host"
    hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest()
    canonical_request = (http_request_method + "\n" +
                         canonical_uri + "\n" +
                         canonical_querystring + "\n" +
                         canonical_headers + "\n" +
                         signed_headers + "\n" +
                         hashed_request_payload)
    print(canonical_request)

    # ************* 步骤 2:拼接待签名字符串 *************
    credential_scope = date + "/" + service + "/" + "tc3_request"
    hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
    string_to_sign = (algorithm + "\n" +
                      str(timestamp) + "\n" +
                      credential_scope + "\n" +
                      hashed_canonical_request)
    print(string_to_sign)

    # ************* 步骤 3:计算签名 *************
    # 计算签名摘要函数
    def sign(key, msg):
        return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()

    secret_date = sign(("TC3" + secret_key).encode("utf-8"), date)
    secret_service = sign(secret_date, service)
    secret_signing = sign(secret_service, "tc3_request")
    signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
    print(signature)

    # ************* 步骤 4:拼接 Authorization *************
    authorization = (algorithm + " " +
                     "Credential=" + secret_id + "/" + credential_scope + ", " +
                     "SignedHeaders=" + signed_headers + ", " +
                     "Signature=" + signature)
    print(authorization)

    print('curl -X POST ' + endpoint
          + ' -H "Authorization: ' + authorization + '"'
          + ' -H "Content-Type: application/json; charset=utf-8"'
          + ' -H "Host: ' + host + '"'
          + ' -H "X-TC-Action: ' + action + '"'
          + ' -H "X-TC-Timestamp: ' + str(timestamp) + '"'
          + ' -H "X-TC-Version: ' + version + '"'
          + ' -H "X-TC-Region: ' + region + '"'
          + " -d '" + payload + "'")
    return timestamp, authorization


def req():
    timestamp, authorization = heads()
    headers = {
        'Authorization': '%s' % authorization,
        'Content-Type': 'application/json; charset=utf-8',
        'Host': 'ocr.tencentcloudapi.com',
        'X-TC-Action': 'GeneralBasicOCR',
        'X-TC-Timestamp': '%s' % timestamp,
        'X-TC-Version': '2018-11-19',
        'X-TC-Region': 'ap-guangzhou',
    }

    data = '{"ImageUrl": "https://main.qcloudimg.com/raw/929bf9094ce2012473bcc4233a383e01.png"}'
    r = requests.post('https://ocr.tencentcloudapi.com', headers=headers, data=data)
    # json 输出
    status_code = r.status_code
    print(status_code)
    r_json = r.json()
    text_list = r_json["Response"]["TextDetections"]
    text_ret = map(lambda x: x["DetectedText"], text_list)
    ret = ''.join(text_ret)
    print(ret)

    # 字符串输出
    # responseinfo = r.content
    # data = responseinfo.decode('utf-8')
    # print(data)


req()

Base64 图片上传

import hashlib, hmac, json, os, sys, time
import requests

from datetime import datetime


def heads(image_path):
    # 密钥参数
    secret_id = "AKIDI1SfDx6vqcAHtnG14JjQpC0p******"
    secret_key = "OH3ZSLemhaOs6yGqRSJwPFwlY*****"

    service = "ocr"
    host = "ocr.tencentcloudapi.com"
    endpoint = "https://" + host
    region = "ap-guangzhou"
    action = "GeneralBasicOCR"
    version = "2018-11-19"
    algorithm = "TC3-HMAC-SHA256"
    timestamp = int(time.time())
    date = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%d")
    # 图片编码
    image_base64 = base64(image_path)
    params = {
        "ImageBase64": image_base64
    }

    # ************* 步骤 1:拼接规范请求串 *************
    http_request_method = "POST"
    canonical_uri = "/"
    canonical_querystring = ""
    ct = "application/json; charset=utf-8"
    payload = json.dumps(params)
    canonical_headers = "content-type:%s\nhost:%s\n" % (ct, host)
    signed_headers = "content-type;host"
    hashed_request_payload = hashlib.sha256(payload.encode("utf-8")).hexdigest()
    canonical_request = (http_request_method + "\n" +
                         canonical_uri + "\n" +
                         canonical_querystring + "\n" +
                         canonical_headers + "\n" +
                         signed_headers + "\n" +
                         hashed_request_payload)
    print(canonical_request)

    # ************* 步骤 2:拼接待签名字符串 *************
    credential_scope = date + "/" + service + "/" + "tc3_request"
    hashed_canonical_request = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest()
    string_to_sign = (algorithm + "\n" +
                      str(timestamp) + "\n" +
                      credential_scope + "\n" +
                      hashed_canonical_request)
    print(string_to_sign)

    # ************* 步骤 3:计算签名 *************
    # 计算签名摘要函数
    def sign(key, msg):
        return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()

    secret_date = sign(("TC3" + secret_key).encode("utf-8"), date)
    secret_service = sign(secret_date, service)
    secret_signing = sign(secret_service, "tc3_request")
    signature = hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
    print(signature)

    # ************* 步骤 4:拼接 Authorization *************
    authorization = (algorithm + " " +
                     "Credential=" + secret_id + "/" + credential_scope + ", " +
                     "SignedHeaders=" + signed_headers + ", " +
                     "Signature=" + signature)
    print(authorization)

    print('curl -X POST ' + endpoint
          + ' -H "Authorization: ' + authorization + '"'
          + ' -H "Content-Type: application/json; charset=utf-8"'
          + ' -H "Host: ' + host + '"'
          + ' -H "X-TC-Action: ' + action + '"'
          + ' -H "X-TC-Timestamp: ' + str(timestamp) + '"'
          + ' -H "X-TC-Version: ' + version + '"'
          + ' -H "X-TC-Region: ' + region + '"'
          + " -d '" + payload + "'")
    return timestamp, authorization, payload


def req(image_path):
    timestamp, authorization, data = heads(image_path)
    headers = {
        'Authorization': '%s' % authorization,
        'Content-Type': 'application/json; charset=utf-8',
        'Host': 'ocr.tencentcloudapi.com',
        'X-TC-Action': 'GeneralBasicOCR',
        'X-TC-Timestamp': '%s' % timestamp,
        'X-TC-Version': '2018-11-19',
        'X-TC-Region': 'ap-guangzhou',
    }

    # data = '{"ImageUrl": "https://main.qcloudimg.com/raw/929bf9094ce2012473bcc4233a383e01.png"}'
    r = requests.post('https://ocr.tencentcloudapi.com', headers=headers, data=data)
    # json 输出
    status_code = r.status_code
    print(status_code)
    r_json = r.json()
    print("json:", r_json)
    text_list = r_json["Response"]["TextDetections"]
    text_ret = map(lambda x: x["DetectedText"], text_list)
    ret = ''.join(text_ret)
    print(ret)

    # 字符串输出
    # responseinfo = r.content
    # data = responseinfo.decode('utf-8')
    # print(data)


def base64(file_path):
    import base64
    # file_path = '1.jpg'
    file = open(file_path, 'rb')
    encoded = base64.b64encode(file.read()).decode()
    return "data:image/png;base64," + encoded


req("test_ocr.png")

参考

  Python知识库 最新文章
Python中String模块
【Python】 14-CVS文件操作
python的panda库读写文件
使用Nordic的nrf52840实现蓝牙DFU过程
【Python学习记录】numpy数组用法整理
Python学习笔记
python字符串和列表
python如何从txt文件中解析出有效的数据
Python编程从入门到实践自学/3.1-3.2
python变量
上一篇文章      下一篇文章      查看所有文章
加:2022-10-22 21:11:19  更:2022-10-22 21:13:00 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2024年11日历 -2024/11/15 7:16:13-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码