基于区块链的简易论文加密设计
本次实验的要求是对给予的一篇论文设计一个简易的加密原型设计,可通过查询区块链上的对应信息获得论文的基本信息以及上链的时间。以下我通过5个步骤进操作的拆分。
Step 1
由于论文的格式是PDF格式,第一步我们需要先对PDF文档进行处理,使用Python处理PDF文档的模块实现论文基本信息的提取,包括题目,作者,联系方式,出处,摘要等几个方面,并将该信息采用JSON格式存储。
from pdfminer.pdfparser import PDFParser,PDFDocument
from pdfminer.pdfinterp import PDFResourceManager,PDFPageInterpreter,PDFTextExtractionNotAllowed
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LTTextBoxHorizontal,LAParams,LTTextLineHorizontal,LTFigure,LTRect,LTLine,LTCurve
import json
import codecs
pd_file = open(r"C:\Users\zz\Desktop\bitcoin.pdf", "rb")
file = open(r'C:\Users\zz\Desktop\linshichucun.txt','a+',encoding='utf-8')
parser = PDFParser(pd_file)
document = PDFDocument()
parser.set_document(document)
document.set_parser(parser)
document.initialize()
if document.is_extractable:
print(True)
else:
raise PDFTextExtractionNotAllowed
src = PDFResourceManager()
device = PDFPageAggregator(src,laparams=LAParams())
inter = PDFPageInterpreter(src,device)
pages = document.get_pages()
for page in pages:
inter.process_page(page)
layout = device.get_result()
for x in layout:
if isinstance(x, LTTextBoxHorizontal):
print(str(x.get_text()))
file.write(str(x.get_text()))
pd_file.close()
file.seek(0)
content = file.readlines()[0:18]
filename = 'C:/Users/zz/Desktop/bitcoinlinshi.json'
f_obj = open(filename,'a')
json.dump(content,f_obj)
f_obj.close()
file.close()
readfile = 'C:/Users/zz/Desktop/bitcoin.json'
s = open(r'C:/Users/zz/Desktop/bitcoinlinshi.json','r')
k = s.readlines()
a = k[0]
b = ""
for i in range(0, len(a)-1):
if a[i] in "]\"[":
continue
else:
b = b + a[i]
c = b.replace(" \\n,","")
d = c.replace("\\n","")
print(d)
refile = open(realfile,'a')
json.dump(d,refile)
s.close()
refile.close()
Step 2
第二步是对文件进行加密,如添加数字指纹,进行数字签名。我选择使用非对称加密算法生成一对私钥和公钥,私钥进行签名,公钥进行签名验证。
import codecs
from ecdsa import SigningKey,SECP256k1
from pdfminer.pdfparser import PDFParser,PDFDocument
from pdfminer.pdfinterp import PDFResourceManager,PDFPageInterpreter,PDFTextExtractionNotAllowed
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LTTextBoxHorizontal,LAParams,LTTextLineHorizontal,LTFigure,LTRect,LTLine,LTCurve
import json
pd_file = open(r"C:\Users\zz\Desktop\bitcoin.pdf", "rb")
parser = PDFParser(pd_file)
document = PDFDocument()
parser.set_document(document)
document.set_parser(parser)
document.initialize()
if document.is_extractable:
print(True)
else:
raise PDFTextExtractionNotAllowed
src = PDFResourceManager()
device = PDFPageAggregator(src,laparams=LAParams())
inter = PDFPageInterpreter(src,device)
pages = document.get_pages()
for page in pages:
inter.process_page(page)
layout = device.get_result()
for x in layout:
if isinstance(x, LTTextBoxHorizontal):
print(str(x.get_text()))
pd_file.close()
sk = SigningKey.generate(curve=SECP256k1)
vk = sk.get_verifying_key()
signature = sk.sign(str(x.get_text()).encode('utf-8'))
vk.verify(signature,str(x.get_text()).encode('utf-8'))
Step 3
第三步是设计一条区块链,利用加密技术,将论文信息添加入区块中。 其中,代码的前一部分是构造一个区块和区块链,后半部分是将论文信息写入区块中。
import hashlib
from datetime import datetime
class Block:
"""
区块结构
prev_hash: 父区块哈希值
data: 区块内容
timestamp: 区块创建时间
hash: 区块哈希值
"""
def __init__(self, data, prev_hash):
self.prev_hash = prev_hash
self.data = data
self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
message = hashlib.sha256()
message.update(str(self.prev_hash).encode('utf-8'))
message.update(str(self.data).encode('utf-8'))
message.update(str(self.timestamp).encode('utf-8'))
self.hash = message.hexdigest()
class BlockChain:
"""
区块链结构体
blocks: 包含的区块链表
"""
def __init__(self):
self.blocks=[]
def add_block(self, block):
"""
添加区块
"""
self.blocks.append(block)
file = open(r'C:\Users\zz\Desktop\bitcoin.json','r')
k = file.readlines()
a = k[0]
b = ""
for i in range(0, len(a)-1):
if a[i] in "]\"[":
continue
else:
b = b + a[i]
c = b.replace(" \\n,","")
d = c.replace("\\n","")
print(d)
file.close()
genesis_block = Block(data=d,prev_hash="")
blockchain = BlockChain()
blockchain.add_block(genesis_block)
print('区块链包含区块个数: %d\n' % len(blockchain.blocks))
for block in blockchain.blocks:
print("父区块区块哈希:%s" % block.prev_hash)
print("区块内容:%s" % block.data)
print("区块哈希:%s" % block.hash)
print("\n")
file.close()
Step 4
第四步是将论文在区块中的地址写到原PDF文档中,就可以将上面生成的区块哈921837a42e7157eb6eefaa4c7b083e15bbeb13d97d63f0de561a00ff942c3776写入文档。
import PyPDF2
mypdf = open(r'C:\Users\zz\Desktop\bitcoin.pdf',mode = 'rb')
pdfdoc = PyPDF2.PdfFileReader(mypdf)
pdfdoc.numPages
for i in range(pdfdoc.numPages):
page = pdfdoc.getPage(i)
newbitcoin = PyPDF2.PdfFileWriter()
newbitcoin.addPage(page)
pdfout = open(r'C:\Users\zz\Desktop\newbitcoin.pdf','wb')
newbitcoin.write(pdfout)
mypdf.close()
pdfout.close()
Step 5
最后一步是通过区块链信息直接访问该论文信息。
import base64
file = open(r'C:\Users\zz\Desktop\bitcoin.json','r')
read = file.readlines()
a = read[0]
b = ""
for i in range(0, len(a)-1):
b = b + a[i]
result = base64.b64encode(b.encode('utf-8'))
print(result)
text = base64.b64decode(result)
print(text.decode('utf-8'))
结论:
通过与上文的对比可以看出,输出的结果就是我们在第一步中采集的JSON格式存储的论文基础信息。
|