需求:
??????? 本地文件中,查找在书单<信息安全从业者书单>的书籍。
原理:
????????遍历 README.md 将通过Everything SDK在本地查找每本书。
1、计算文件CRC32
??????? 因为只是确定本地文件的唯一性,CRC32计算效率上比md5和sha1更快,所以计算CRC.
#!usr/bin/env python
#-*- coding:utf-8 -*-
import zlib
import os
block_size = 1024 * 1024
#从文件中读取block_size大小,计算CRC32
def crc32_simple(filepath):
try:
with open(filepath,'rb') as f:
s=f.read(block_size)
return zlib.crc32(s,0)
except Exception as e:
print(str(e))
return 0
#计算整个文件的crc32
def crc32_file(filepath):
crc = 0
try:
fd = open(filepath, 'rb')
while True:
buffer = fd.read(block_size)
if len(buffer) == 0: # EOF or file empty. return hashes
fd.close()
if sys.version_info[0] < 3 and crc < 0:
crc += 2 ** 32
return crc#返回的是十进制的值
crc = zlib.crc32(buffer, crc)
except Exception as e:
if sys.version_info[0] < 3:
error = unicode(e)
else:
error = str(e)
print(error)
return 0
2、文件大小自动变换单位
递归实现 文件大小根据bytes,返回合理区间['B', 'KB', 'MB', 'GB', 'TB', 'PB']。eg : 16473740 bytes--> 15.727 MB
#根据文件大小 返回合理区间,16473740 bytes--> 15.727 MB
def FormatSize(size):
print(size)
#递归实现,精确为最大单位值 + 小数点后三位
def formatsize(integer, remainder, level):
if integer >= 1024:
remainder = integer % 1024
integer //= 1024
level += 1
return formatsize(integer, remainder, level)
else:
return integer, remainder, level
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
integer, remainder, level = formatsize(size, 0, 0)
if level+1 > len(units):
level = -1
return ( '{}.{:>03d} {}'.format(integer, remainder, units[level]) )
3、调用Everything SDK,通过everything64.dll来完成交互。
import ctypes
import datetime
import struct
#dll imports
everything_dll = ctypes.WinDLL (r"./Everything64.dll")
everything_dll.Everything_GetResultDateModified.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultSize.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultFileNameW.argtypes = [ctypes.c_int]
everything_dll.Everything_GetResultFileNameW.restype = ctypes.c_wchar_p
#转换时间
def get_time(filetime):
#convert a windows FILETIME to a python datetime
#https://stackoverflow.com/questions/39481221/convert-datetime-back-to-windows-64-bit-filetime
WINDOWS_TICKS = int(1/10**-7) # 10,000,000 (100 nanoseconds or .1 microseconds)
WINDOWS_EPOCH = datetime.datetime.strptime('1601-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
POSIX_EPOCH = datetime.datetime.strptime('1970-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
EPOCH_DIFF = (POSIX_EPOCH - WINDOWS_EPOCH).total_seconds() # 11644473600.0
WINDOWS_TICKS_TO_POSIX_EPOCH = EPOCH_DIFF * WINDOWS_TICKS # 116444736000000000.0
"""Convert windows filetime winticks to python datetime.datetime."""
winticks = struct.unpack('<Q', filetime)[0]
microsecs = (winticks - WINDOWS_TICKS_TO_POSIX_EPOCH) / WINDOWS_TICKS
return datetime.datetime.fromtimestamp(microsecs)
#defines 定义参看Everything.h
EVERYTHING_REQUEST_FILE_NAME = 0x00000001
EVERYTHING_REQUEST_PATH = 0x00000002
EVERYTHING_REQUEST_SIZE = 0x00000010
EVERYTHING_REQUEST_DATE_MODIFIED = 0x00000040
EVERYTHING_SORT_SIZE_DESCENDING = 6
#关键词搜索
def searchfile(bookName):
recom = re.compile(r'[《》::、;.,,;—— -()()【】\'\"]')
keyword = recom.sub(' ',bookName).strip()
if len(keyword) <1:
return
#文件大小倒序
everything_dll.Everything_SetSort(EVERYTHING_SORT_SIZE_DESCENDING)
everything_dll.Everything_SetSearchW(keyword)
everything_dll.Everything_SetRequestFlags(EVERYTHING_REQUEST_FILE_NAME | EVERYTHING_REQUEST_PATH | EVERYTHING_REQUEST_SIZE | EVERYTHING_REQUEST_DATE_MODIFIED)
#execute the query
everything_dll.Everything_QueryW(1)
#get the number of results
num_results = everything_dll.Everything_GetNumResults()
#show the number of results
result = "\nResult Count: {}\n".format(num_results)
print(keyword,result)
#create buffers
file_name = ctypes.create_unicode_buffer(260)
file_modi = ctypes.c_ulonglong(1)
file_size = ctypes.c_ulonglong(1)
bPrint = False
nCount = 0
#show results
for i in range(num_results):
everything_dll.Everything_GetResultFullPathNameW(i,file_name,260)
everything_dll.Everything_GetResultDateModified(i,file_modi)
everything_dll.Everything_GetResultSize(i,file_size)
filepath = ctypes.wstring_at(file_name)
if filepath.endswith('.lnk') or filepath.endswith('.txt'):
continue
#计算文件crc32,格式化为0x1122AAFF
filecrc = hex(crc32_file(filepath)).upper().replace("0X","0x")
filesize = FormatSize(file_size.value)
modtime = get_time(file_modi)
strInfo = "\nFilePath: {}\nSize: {} CRC32:{}".format(filepath,filesize,filecrc)
print(strInfo)
if not bPrint:
fout.write("\n=======↓↓↓↓↓===========\n")
fout.write(bookName)
fout.write("\n-----------------")
bPrint = True
fout.write(strInfo)
nCount+=1
if bPrint:
fout.write("\nFind Count:{}".format(nCount))
fout.write("\n=======↑↑↑↑↑===========\n")
完整代码
#!usr/bin/env python
#-*- coding:utf-8 -*-
"""
@author:hiltonwei
@file: secBooksFind.py
@time: 2021/12/06
@desc:
信息安全从业者书单推荐 https://github.com/riusksk/secbook
step1 读入 README.md,读取《》内书名
step2 通过everything的sdk查找文件,并计算文件CRC32校验值,写入txt中
"""
import zlib
import os
import sys
import ctypes
import datetime
import struct
import io
import re
#dll imports
everything_dll = ctypes.WinDLL (r"./Everything64.dll")
everything_dll.Everything_GetResultDateModified.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultSize.argtypes = [ctypes.c_int,ctypes.POINTER(ctypes.c_ulonglong)]
everything_dll.Everything_GetResultFileNameW.argtypes = [ctypes.c_int]
everything_dll.Everything_GetResultFileNameW.restype = ctypes.c_wchar_p
fout = open("secBooksFind.txt", 'a+')
block_size = 1024 * 1024
#从文件中读取block_size大小,计算CRC32
def crc32_simple(filepath):
try:
with open(filepath,'rb') as f:
s=f.read(block_size)
return zlib.crc32(s,0)
except Exception as e:
print(str(e))
return 0
#计算整个文件的crc32
def crc32_file(filepath):
crc = 0
try:
fd = open(filepath, 'rb')
while True:
buffer = fd.read(block_size)
if len(buffer) == 0: # EOF or file empty. return hashes
fd.close()
if sys.version_info[0] < 3 and crc < 0:
crc += 2 ** 32
return crc#返回的是十进制的值
crc = zlib.crc32(buffer, crc)
except Exception as e:
if sys.version_info[0] < 3:
error = unicode(e)
else:
error = str(e)
print(error)
return 0
#根据文件大小 返回合理区间,16473740 bytes--> 15.727 MB
def FormatSize(size):
print(size)
#递归实现,精确为最大单位值 + 小数点后三位
def formatsize(integer, remainder, level):
if integer >= 1024:
remainder = integer % 1024
integer //= 1024
level += 1
return formatsize(integer, remainder, level)
else:
return integer, remainder, level
units = ['B', 'KB', 'MB', 'GB', 'TB', 'PB']
integer, remainder, level = formatsize(size, 0, 0)
if level+1 > len(units):
level = -1
return ( '{}.{:>03d} {}'.format(integer, remainder, units[level]) )
#转换时间
def get_time(filetime):
#convert a windows FILETIME to a python datetime
#https://stackoverflow.com/questions/39481221/convert-datetime-back-to-windows-64-bit-filetime
WINDOWS_TICKS = int(1/10**-7) # 10,000,000 (100 nanoseconds or .1 microseconds)
WINDOWS_EPOCH = datetime.datetime.strptime('1601-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
POSIX_EPOCH = datetime.datetime.strptime('1970-01-01 00:00:00','%Y-%m-%d %H:%M:%S')
EPOCH_DIFF = (POSIX_EPOCH - WINDOWS_EPOCH).total_seconds() # 11644473600.0
WINDOWS_TICKS_TO_POSIX_EPOCH = EPOCH_DIFF * WINDOWS_TICKS # 116444736000000000.0
"""Convert windows filetime winticks to python datetime.datetime."""
winticks = struct.unpack('<Q', filetime)[0]
microsecs = (winticks - WINDOWS_TICKS_TO_POSIX_EPOCH) / WINDOWS_TICKS
return datetime.datetime.fromtimestamp(microsecs)
#defines 定义参看Everything.h
EVERYTHING_REQUEST_FILE_NAME = 0x00000001
EVERYTHING_REQUEST_PATH = 0x00000002
EVERYTHING_REQUEST_SIZE = 0x00000010
EVERYTHING_REQUEST_DATE_MODIFIED = 0x00000040
EVERYTHING_SORT_SIZE_DESCENDING = 6
#关键词搜索
def searchfile(bookName):
recom = re.compile(r'[《》::、;.,,;—— -()()【】\'\"]')
keyword = recom.sub(' ',bookName).strip()
if len(keyword) <1:
return
#文件大小倒序
everything_dll.Everything_SetSort(EVERYTHING_SORT_SIZE_DESCENDING)
everything_dll.Everything_SetSearchW(keyword)
everything_dll.Everything_SetRequestFlags(EVERYTHING_REQUEST_FILE_NAME | EVERYTHING_REQUEST_PATH | EVERYTHING_REQUEST_SIZE | EVERYTHING_REQUEST_DATE_MODIFIED)
#execute the query
everything_dll.Everything_QueryW(1)
#get the number of results
num_results = everything_dll.Everything_GetNumResults()
#show the number of results
result = "\nResult Count: {}\n".format(num_results)
print(keyword,result)
#create buffers
file_name = ctypes.create_unicode_buffer(260)
file_modi = ctypes.c_ulonglong(1)
file_size = ctypes.c_ulonglong(1)
bPrint = False
nCount = 0
#show results
for i in range(num_results):
everything_dll.Everything_GetResultFullPathNameW(i,file_name,260)
everything_dll.Everything_GetResultDateModified(i,file_modi)
everything_dll.Everything_GetResultSize(i,file_size)
filepath = ctypes.wstring_at(file_name)
if filepath.endswith('.lnk') or filepath.endswith('.txt'):
continue
#计算文件crc32,格式化为0x1122AAFF
filecrc = hex(crc32_file(filepath)).upper().replace("0X","0x")
filesize = FormatSize(file_size.value)
modtime = get_time(file_modi)
strInfo = "\nFilePath: {}\nSize: {} CRC32:{}".format(filepath,filesize,filecrc)
print(strInfo)
if not bPrint:
fout.write("\n=======↓↓↓↓↓===========\n")
fout.write(bookName)
fout.write("\n-----------------")
bPrint = True
fout.write(strInfo)
nCount+=1
if bPrint:
fout.write("\nFind Count:{}".format(nCount))
fout.write("\n=======↑↑↑↑↑===========\n")
#读取文件,将《》内的名称去特殊符号后,通过everything查找
def readMd(fileName):
dataStr = []
with io.open(fileName,'r', encoding='utf-8') as f:
dataStr = f.readlines()
for line in dataStr:
if line.startswith('·'):
#《》的内容
start = line.find('《')
end = line.find('》')
end = end if end == -1 else end+1
f0 = line[start:end]
searchfile(f0)
if __name__ == "__main__":
readMd("README.md")
fout.close()
|