[开发测试] Python3通过Everything SDK访问本地文件

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> 开发测试 -> Python3通过Everything SDK访问本地文件 -> 正文阅读

[开发测试]Python3通过Everything SDK访问本地文件

作者:treeSkill

需求：

??????? 本地文件中，查找在书单<信息安全从业者书单>的书籍。

原理：

????????遍历 README.md 将通过Everything SDK在本地查找每本书。

1、计算文件CRC32

??????? 因为只是确定本地文件的唯一性，CRC32计算效率上比md5和sha1更快，所以计算CRC.

#!usr/bin/env python  
#-*- coding:utf-8 -*-  

import zlib
import os

block_size = 1024 * 1024
#从文件中读取block_size大小，计算CRC32
def crc32_simple(filepath):
    try:
        with open(filepath,'rb') as f:
            s=f.read(block_size)
            return zlib.crc32(s,0)
    except Exception as e:
        print(str(e))
        return 0

#计算整个文件的crc32
def crc32_file(filepath):
    crc = 0
    try:
        fd = open(filepath, 'rb')
        while True:
            buffer = fd.read(block_size)
            if len(buffer) == 0: # EOF or file empty. return hashes
                fd.close()
                if sys.version_info[0] < 3 and crc < 0:
                    crc += 2 ** 32
                return crc#返回的是十进制的值
            crc = zlib.crc32(buffer, crc)
    except Exception as e:
        if sys.version_info[0] < 3:
            error = unicode(e)
        else:
            error = str(e)
        print(error)
        return 0

2、文件大小自动变换单位

递归实现文件大小根据bytes，返回合理区间['B', 'KB', 'MB', 'GB', 'TB', 'PB']。eg : 16473740 bytes--> 15.727 MB

#根据文件大小 返回合理区间，16473740 bytes--> 15.727 MB
def FormatSize(size):
    print(size)
    #递归实现，精确为最大单位值 + 小数点后三位
    def formatsize(integer, remainder, level):
        if integer >= 1024:
            remainder = integer % 1024
            integer //= 1024
            level += 1
            return formatsize(integer, remainder, level)
        else:
            return integer, remainder, level

    units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
    integer, remainder, level = formatsize(size, 0, 0)
    if level+1 > len(units):
        level = -1
    return ( '{}.{:>03d} {}'.format(integer, remainder, units[level]) )

3、调用Everything SDK，通过everything64.dll来完成交互。

import ctypes
import datetime
import struct

#dll imports
everything_dll = ctypes.WinDLL (r"./Everything64.dll")
everything_dll.Everything_GetResultDateModified.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultSize.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultFileNameW.argtypes = [ctypes.c_int]
everything_dll.Everything_GetResultFileNameW.restype = ctypes.c_wchar_p

#转换时间
def get_time(filetime):
    #convert a windows FILETIME to a python datetime
    #https://stackoverflow.com/questions/39481221/convert-datetime-back-to-windows-64-bit-filetime
    WINDOWS_TICKS = int(1/10**-7)  # 10,000,000 (100 nanoseconds or .1 microseconds)
    WINDOWS_EPOCH = datetime.datetime.strptime('1601-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
    POSIX_EPOCH = datetime.datetime.strptime('1970-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
    EPOCH_DIFF = (POSIX_EPOCH - WINDOWS_EPOCH).total_seconds()  # 11644473600.0
    WINDOWS_TICKS_TO_POSIX_EPOCH = EPOCH_DIFF * WINDOWS_TICKS  # 116444736000000000.0
    
    """Convert windows filetime winticks to python datetime.datetime."""
    winticks = struct.unpack('<Q', filetime)[0]
    microsecs = (winticks - WINDOWS_TICKS_TO_POSIX_EPOCH) / WINDOWS_TICKS
    return datetime.datetime.fromtimestamp(microsecs)
    
#defines 定义参看Everything.h
EVERYTHING_REQUEST_FILE_NAME = 0x00000001
EVERYTHING_REQUEST_PATH = 0x00000002
EVERYTHING_REQUEST_SIZE = 0x00000010
EVERYTHING_REQUEST_DATE_MODIFIED = 0x00000040

EVERYTHING_SORT_SIZE_DESCENDING = 6

#关键词搜索
def searchfile(bookName):
    recom = re.compile(r'[《》:：、；.，,;—— -()（）【】\'\"]')
    keyword = recom.sub(' ',bookName).strip()
    if len(keyword) <1:
        return
    #文件大小倒序
    everything_dll.Everything_SetSort(EVERYTHING_SORT_SIZE_DESCENDING)
    everything_dll.Everything_SetSearchW(keyword)
    everything_dll.Everything_SetRequestFlags(EVERYTHING_REQUEST_FILE_NAME | EVERYTHING_REQUEST_PATH | EVERYTHING_REQUEST_SIZE | EVERYTHING_REQUEST_DATE_MODIFIED)

    #execute the query
    everything_dll.Everything_QueryW(1)

    #get the number of results
    num_results = everything_dll.Everything_GetNumResults()

    #show the number of results
    result = "\nResult Count: {}\n".format(num_results)
    print(keyword,result)
    
    #create buffers
    file_name = ctypes.create_unicode_buffer(260)
    file_modi = ctypes.c_ulonglong(1)
    file_size = ctypes.c_ulonglong(1)

    bPrint = False
    nCount = 0
    #show results
    for i in range(num_results):
        everything_dll.Everything_GetResultFullPathNameW(i,file_name,260)
        everything_dll.Everything_GetResultDateModified(i,file_modi)
        everything_dll.Everything_GetResultSize(i,file_size)
        filepath = ctypes.wstring_at(file_name)
        if filepath.endswith('.lnk') or filepath.endswith('.txt'):
            continue
        #计算文件crc32,格式化为0x1122AAFF
        filecrc = hex(crc32_file(filepath)).upper().replace("0X","0x")
        filesize = FormatSize(file_size.value)
        modtime = get_time(file_modi)
        strInfo = "\nFilePath: {}\nSize: {}    CRC32:{}".format(filepath,filesize,filecrc)
        print(strInfo)
        if not bPrint:
            fout.write("\n=======↓↓↓↓↓===========\n")
            fout.write(bookName)
            fout.write("\n-----------------")
            bPrint = True
        fout.write(strInfo)
        nCount+=1
    if bPrint:
        fout.write("\nFind Count：{}".format(nCount))
        fout.write("\n=======↑↑↑↑↑===========\n")

完整代码

#!usr/bin/env python  
#-*- coding:utf-8 -*-  
""" 
@author:hiltonwei
@file: secBooksFind.py 
@time: 2021/12/06 
@desc: 
    信息安全从业者书单推荐 https://github.com/riusksk/secbook
    step1 读入 README.md,读取《》内书名
    step2 通过everything的sdk查找文件，并计算文件CRC32校验值,写入txt中
"""

import zlib
import os
import sys
import ctypes
import datetime
import struct
import io
import re

#dll imports
everything_dll = ctypes.WinDLL (r"./Everything64.dll")
everything_dll.Everything_GetResultDateModified.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultSize.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultFileNameW.argtypes = [ctypes.c_int]
everything_dll.Everything_GetResultFileNameW.restype = ctypes.c_wchar_p


fout = open("secBooksFind.txt", 'a+')

block_size = 1024 * 1024
#从文件中读取block_size大小，计算CRC32
def crc32_simple(filepath):
    try:
        with open(filepath,'rb') as f:
            s=f.read(block_size)
            return zlib.crc32(s,0)
    except Exception as e:
        print(str(e))
        return 0

#计算整个文件的crc32
def crc32_file(filepath):
    crc = 0
    try:
        fd = open(filepath, 'rb')
        while True:
            buffer = fd.read(block_size)
            if len(buffer) == 0: # EOF or file empty. return hashes
                fd.close()
                if sys.version_info[0] < 3 and crc < 0:
                    crc += 2 ** 32
                return crc#返回的是十进制的值
            crc = zlib.crc32(buffer, crc)
    except Exception as e:
        if sys.version_info[0] < 3:
            error = unicode(e)
        else:
            error = str(e)
        print(error)
        return 0

#根据文件大小 返回合理区间，16473740 bytes--> 15.727 MB
def FormatSize(size):
    print(size)
    #递归实现，精确为最大单位值 + 小数点后三位
    def formatsize(integer, remainder, level):
        if integer >= 1024:
            remainder = integer % 1024
            integer //= 1024
            level += 1
            return formatsize(integer, remainder, level)
        else:
            return integer, remainder, level

    units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
    integer, remainder, level = formatsize(size, 0, 0)
    if level+1 > len(units):
        level = -1
    return ( '{}.{:>03d} {}'.format(integer, remainder, units[level]) )

#转换时间
def get_time(filetime):
    #convert a windows FILETIME to a python datetime
    #https://stackoverflow.com/questions/39481221/convert-datetime-back-to-windows-64-bit-filetime
    WINDOWS_TICKS = int(1/10**-7)  # 10,000,000 (100 nanoseconds or .1 microseconds)
    WINDOWS_EPOCH = datetime.datetime.strptime('1601-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
    POSIX_EPOCH = datetime.datetime.strptime('1970-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
    EPOCH_DIFF = (POSIX_EPOCH - WINDOWS_EPOCH).total_seconds()  # 11644473600.0
    WINDOWS_TICKS_TO_POSIX_EPOCH = EPOCH_DIFF * WINDOWS_TICKS  # 116444736000000000.0
    
    """Convert windows filetime winticks to python datetime.datetime."""
    winticks = struct.unpack('<Q', filetime)[0]
    microsecs = (winticks - WINDOWS_TICKS_TO_POSIX_EPOCH) / WINDOWS_TICKS
    return datetime.datetime.fromtimestamp(microsecs)
    
#defines 定义参看Everything.h
EVERYTHING_REQUEST_FILE_NAME = 0x00000001
EVERYTHING_REQUEST_PATH = 0x00000002
EVERYTHING_REQUEST_SIZE = 0x00000010
EVERYTHING_REQUEST_DATE_MODIFIED = 0x00000040

EVERYTHING_SORT_SIZE_DESCENDING = 6

#关键词搜索
def searchfile(bookName):
    recom = re.compile(r'[《》:：、；.，,;—— -()（）【】\'\"]')
    keyword = recom.sub(' ',bookName).strip()
    if len(keyword) <1:
        return
    #文件大小倒序
    everything_dll.Everything_SetSort(EVERYTHING_SORT_SIZE_DESCENDING)
    everything_dll.Everything_SetSearchW(keyword)
    everything_dll.Everything_SetRequestFlags(EVERYTHING_REQUEST_FILE_NAME | EVERYTHING_REQUEST_PATH | EVERYTHING_REQUEST_SIZE | EVERYTHING_REQUEST_DATE_MODIFIED)

    #execute the query
    everything_dll.Everything_QueryW(1)

    #get the number of results
    num_results = everything_dll.Everything_GetNumResults()

    #show the number of results
    result = "\nResult Count: {}\n".format(num_results)
    print(keyword,result)
    
    #create buffers
    file_name = ctypes.create_unicode_buffer(260)
    file_modi = ctypes.c_ulonglong(1)
    file_size = ctypes.c_ulonglong(1)

    bPrint = False
    nCount = 0
    #show results
    for i in range(num_results):
        everything_dll.Everything_GetResultFullPathNameW(i,file_name,260)
        everything_dll.Everything_GetResultDateModified(i,file_modi)
        everything_dll.Everything_GetResultSize(i,file_size)
        filepath = ctypes.wstring_at(file_name)
        if filepath.endswith('.lnk') or filepath.endswith('.txt'):
            continue
        #计算文件crc32,格式化为0x1122AAFF
        filecrc = hex(crc32_file(filepath)).upper().replace("0X","0x")
        filesize = FormatSize(file_size.value)
        modtime = get_time(file_modi)
        strInfo = "\nFilePath: {}\nSize: {}    CRC32:{}".format(filepath,filesize,filecrc)
        print(strInfo)
        if not bPrint:
            fout.write("\n=======↓↓↓↓↓===========\n")
            fout.write(bookName)
            fout.write("\n-----------------")
            bPrint = True
        fout.write(strInfo)
        nCount+=1
    if bPrint:
        fout.write("\nFind Count：{}".format(nCount))
        fout.write("\n=======↑↑↑↑↑===========\n")

#读取文件，将《》内的名称去特殊符号后，通过everything查找
def readMd(fileName):
    dataStr = []
    with io.open(fileName,'r', encoding='utf-8') as f:
        dataStr = f.readlines()
    for line in dataStr:
        if line.startswith('·'):
            #《》的内容
            start = line.find('《')
            end = line.find('》')
            end = end if end == -1 else end+1
            f0 = line[start:end]
            searchfile(f0)
        
if __name__ == "__main__":
    readMd("README.md")

fout.close()

开发测试最新文章

pytest系列——allure之生成测试报告（Wind

某大厂软件测试岗一面笔试题+二面问答题面试

iperf 学习笔记

关于Python中使用selenium八大定位方法

【软件测试】为什么提升不了？8年测试总结再

加:2021-12-10 11:21:17 更:2021-12-10 11:22:24

360图书馆购物三丰科技阅读网日历万年历 2025年2日历

-2025/2/6 7:56:52-

图片自动播放器
↓图片自动播放器↓

TxT小说阅读器
↓语音阅读,小说下载,古典文学↓

一键清除垃圾
↓轻轻一点,清除系统垃圾↓

图片批量下载器
↓批量下载图片,美女图库↓

网站联系: qq:121756557 email:121756557@qq.com IT数码