简介
- java解压文件的方式有很多种,Apache官方提供了一个工具,可以用来解压很多类型的文件。该工具可以解压和压缩带密码的7z文件,并支持ar, arj, cpio, dump, tar, zip 等文件的压缩和解压,对于后者而言我没找到压缩和解压带密码文件的api
- 官网
- 本文之探讨解压文件不探讨压缩文件
- 依赖:
- 注意,这里需要引入两个依赖,第二个依赖在解密的时候会用到
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.21</version>
</dependency>
<dependency>
<groupId>org.tukaani</groupId>
<artifactId>xz</artifactId>
<version>1.9</version>
</dependency>
工具类
- 废话不多说,来看看工具类是怎么写的
- 这里头有几个要点
- 7z的解压的API与其他类型的文件不共用,7z可以解压带密码的文件,但其他类型文件没有提供相关API,所以下面这个工具类,需要提供密码的方法只有解压7z文件时才有用,其他文件如何解压带密码的,等研究研究再改进这个工具类。
- 还有,commonDecompression 需要提供编码方式,因为默认使用了utf8编码,结果导致解压乱码无法解压,编码方式在我的电脑里使用的是gbk。而7z的解压貌似没有提供配置编码方式的接口。
package com.wu.util;
import org.apache.commons.compress.archivers.*;
import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry;
import org.apache.commons.compress.archivers.sevenz.SevenZFile;
import org.apache.commons.compress.utils.IOUtils;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
public class ArchiveUtil {
protected void log(String msg) {
System.out.println(msg);
}
protected String getFileExtension(String fileName) {
int i = fileName.lastIndexOf(".");
return fileName.length() > i + 1 ? fileName.substring(i + 1) : "";
}
public void decompression(String srcFile, String destDir, String password, String charset) {
String fileExtension = getFileExtension(srcFile);
if (ArchiveStreamFactory.SEVEN_Z.equals(fileExtension)) {
un7z(srcFile, destDir, password, charset);
} else {
commonDecompression(srcFile, destDir, password, charset);
}
}
public void commonDecompression(String srcFile, String destDir, String password, String charset) {
File f;
String fileExtension = getFileExtension(srcFile);
try (ArchiveInputStream i = new ArchiveStreamFactory().createArchiveInputStream(
fileExtension, Files.newInputStream(Paths.get(srcFile)), charset)) {
ArchiveEntry entry = null;
while ((entry = i.getNextEntry()) != null) {
String entryName = entry.getName();
if (!i.canReadEntryData(entry)) {
log("不能解析文件:" + entryName);
continue;
}
f = new File(destDir, entryName);
if (entry.isDirectory()) {
if (!f.isDirectory() && !f.mkdirs()) {
throw new IOException("failed to create directory " + f);
}
log("文件夹" + entryName + "创建成功!");
} else {
File parent = f.getParentFile();
if (!parent.isDirectory() && !parent.mkdirs()) {
throw new IOException("failed to create directory " + parent);
}
try (OutputStream o = Files.newOutputStream(f.toPath())) {
IOUtils.copy(i, o);
log("文件" + entryName + "解压成功!");
}
}
}
} catch (StreamingNotSupportedException e) {
e.printStackTrace();
} catch (IOException | ArchiveException e) {
e.printStackTrace();
}
}
public void un7z(String srcFile, String destDir, String password, String charset) {
char[] passwordChars = password == null ? null : password.toCharArray();
try (SevenZFile sevenZFile = new SevenZFile(new File(srcFile), passwordChars)) {
SevenZArchiveEntry entry;
File f;
while ((entry = sevenZFile.getNextEntry()) != null) {
String entryName = entry.getName();
f = new File(destDir, entryName);
if (entry.isDirectory()) {
if (!f.isDirectory() && !f.mkdirs()) {
throw new IOException("failed to create directory " + f);
}
log("文件夹" + entryName + "创建成功!");
} else {
try (OutputStream o = Files.newOutputStream(f.toPath())) {
IOUtils.copy(sevenZFile.getInputStream(entry), o);
log("文件" + entryName + "解压成功!");
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
使用示例
String inFile = "F:\\测试\\测试.7z";
String outFile = "F:\\测试\\测试结果";
new ArchiveUtil().decompression(inFile,outFile,"456789","gbk");
7z源码研究
由于7z是可以解压出密码,我很好奇它是怎么解压的。于是我扒了一下源码。
错误的文件类型
我用压缩工具进行压缩,压缩成zip格式的,故意改成7z格式,想看看7z是怎么判断格式错误的。 报错如下
java.io.IOException: Bad 7z signature
at org.apache.commons.compress.archivers.sevenz.SevenZFile.readHeaders(SevenZFile.java:443)
at org.apache.commons.compress.archivers.sevenz.SevenZFile.<init>(SevenZFile.java:343)
at org.apache.commons.compress.archivers.sevenz.SevenZFile.<init>(SevenZFile.java:135)
at org.apache.commons.compress.archivers.sevenz.SevenZFile.<init>(SevenZFile.java:122)
at com.wu.util.ArchiveUtil.un7z(ArchiveUtil.java:111)
at com.wu.util.ArchiveUtil.decompression(ArchiveUtil.java:47)
at com.wu.Application.main(Application.java:15)
我翻开了org.apache.commons.compress.archivers.sevenz.SevenZFile#readHeaders 的源码发现了这句话
if (!Arrays.equals(signature, sevenZSignature)) {
throw new IOException("Bad 7z signature");
}
static final byte[] sevenZSignature = {
(byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C
};
这才意识到7z文件有着标准的开头,我把正常的7z文件丢到winHex里,发现果然如此,文件的开头就是7z文件的标识符
如何解密的?
为了研究这个,我先用压缩工具创造一个带密码的7z文件,密码为"123456",但故意在程序中给定密码为"456789",结果报了如下错误
java.io.IOException: Decryption error (do you have the JCE Unlimited Strength Jurisdiction Policy Files installed?)
at org.apache.commons.compress.archivers.sevenz.AES256SHA256Decoder$1.init(AES256SHA256Decoder.java:103)
at org.apache.commons.compress.archivers.sevenz.AES256SHA256Decoder$1.read(AES256SHA256Decoder.java:111)
at java.io.DataInputStream.readUnsignedByte(DataInputStream.java:288)
at org.tukaani.xz.LZMA2InputStream.decodeChunkHeader(Unknown Source)
at org.tukaani.xz.LZMA2InputStream.read(Unknown Source)
at org.apache.commons.compress.utils.BoundedInputStream.read(BoundedInputStream.java:64)
at org.apache.commons.compress.utils.ChecksumVerifyingInputStream.read(ChecksumVerifyingInputStream.java:88)
at org.apache.commons.compress.utils.ChecksumVerifyingInputStream.read(ChecksumVerifyingInputStream.java:74)
at org.apache.commons.compress.utils.IOUtils.copy(IOUtils.java:95)
at org.apache.commons.compress.utils.IOUtils.copy(IOUtils.java:70)
at com.wu.util.ArchiveUtil.un7z(ArchiveUtil.java:124)
at com.wu.util.ArchiveUtil.decompression(ArchiveUtil.java:47)
at com.wu.Application.main(Application.java:15)
Caused by: java.security.InvalidKeyException: Illegal key size
at javax.crypto.Cipher.checkCryptoPerm(Cipher.java:1039)
at javax.crypto.Cipher.implInit(Cipher.java:805)
at javax.crypto.Cipher.chooseProvider(Cipher.java:864)
at javax.crypto.Cipher.init(Cipher.java:1396)
at javax.crypto.Cipher.init(Cipher.java:1327)
at org.apache.commons.compress.archivers.sevenz.AES256SHA256Decoder$1.init(AES256SHA256Decoder.java:98)
... 12 more
眼尖的小伙伴,可以看到出现了AES256SHA256Decoder这个类,这说明7z解压的时候使用的是AES对称加密算法(这个7z的规范我不知道在哪个官网上有相关的规定,如果有网友知道帮忙在评论区告诉我一下,我现在只能通过扒代码才知道咋回事。。)。 但是有个问题,AES对称加密算法的密码要求长度是16字节的整数倍,可是咱们加密的密码通常都是随机长度甚至可以输入中文的,这是怎么回事?其实通过AES256SHA256Decoder这个类名还可以看到SHA256加密算法的身影。这时脑中自然就有一个猜想,程序先把咱们输入的密码通过SHA256转化为32字节长度,然后再进行AES加密的。 咱们来扒一扒源码看看。我们来到刚才报错的顶部org.apache.commons.compress.archivers.sevenz.AES256SHA256Decoder#decode,这个类只有一个方法,并且这个类的方法已经写得很清楚是怎么回事了。
package org.apache.commons.compress.archivers.sevenz;
import java.io.IOException;
import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import javax.crypto.Cipher;
import javax.crypto.CipherInputStream;
import javax.crypto.SecretKey;
import javax.crypto.spec.IvParameterSpec;
import javax.crypto.spec.SecretKeySpec;
import org.apache.commons.compress.PasswordRequiredException;
class AES256SHA256Decoder extends CoderBase {
@Override
InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength,
final Coder coder, final byte[] passwordBytes, final int maxMemoryLimitInKb) throws IOException {
return new InputStream() {
private boolean isInitialized;
private CipherInputStream cipherInputStream;
private CipherInputStream init() throws IOException {
if (isInitialized) {
return cipherInputStream;
}
if (coder.properties == null) {
throw new IOException("Missing AES256 properties in " + archiveName);
}
if (coder.properties.length < 2) {
throw new IOException("AES256 properties too short in " + archiveName);
}
final int byte0 = 0xff & coder.properties[0];
final int numCyclesPower = byte0 & 0x3f;
final int byte1 = 0xff & coder.properties[1];
final int ivSize = ((byte0 >> 6) & 1) + (byte1 & 0x0f);
final int saltSize = ((byte0 >> 7) & 1) + (byte1 >> 4);
if (2 + saltSize + ivSize > coder.properties.length) {
throw new IOException("Salt size + IV size too long in " + archiveName);
}
final byte[] salt = new byte[saltSize];
System.arraycopy(coder.properties, 2, salt, 0, saltSize);
final byte[] iv = new byte[16];
System.arraycopy(coder.properties, 2 + saltSize, iv, 0, ivSize);
if (passwordBytes == null) {
throw new PasswordRequiredException(archiveName);
}
final byte[] aesKeyBytes;
if (numCyclesPower == 0x3f) {
aesKeyBytes = new byte[32];
System.arraycopy(salt, 0, aesKeyBytes, 0, saltSize);
System.arraycopy(passwordBytes, 0, aesKeyBytes, saltSize,
Math.min(passwordBytes.length, aesKeyBytes.length - saltSize));
} else {
final MessageDigest digest;
try {
digest = MessageDigest.getInstance("SHA-256");
} catch (final NoSuchAlgorithmException noSuchAlgorithmException) {
throw new IOException("SHA-256 is unsupported by your Java implementation",
noSuchAlgorithmException);
}
final byte[] extra = new byte[8];
for (long j = 0; j < (1L << numCyclesPower); j++) {
digest.update(salt);
digest.update(passwordBytes);
digest.update(extra);
for (int k = 0; k < extra.length; k++) {
++extra[k];
if (extra[k] != 0) {
break;
}
}
}
aesKeyBytes = digest.digest();
}
final SecretKey aesKey = new SecretKeySpec(aesKeyBytes, "AES");
try {
final Cipher cipher = Cipher.getInstance("AES/CBC/NoPadding");
cipher.init(Cipher.DECRYPT_MODE, aesKey, new IvParameterSpec(iv));
cipherInputStream = new CipherInputStream(in, cipher);
isInitialized = true;
return cipherInputStream;
} catch (final GeneralSecurityException generalSecurityException) {
throw new IOException("Decryption error " +
"(do you have the JCE Unlimited Strength Jurisdiction Policy Files installed?)",
generalSecurityException);
}
}
@Override
public int read() throws IOException {
return init().read();
}
@Override
public int read(final byte[] b, final int off, final int len) throws IOException {
return init().read(b, off, len);
}
@Override
public void close() throws IOException {
if (cipherInputStream != null) {
cipherInputStream.close();
}
}
};
}
}
把这个类读懂,对了解解压的底层逻辑有很大帮助,之后我会尝试按照这里的逻辑尝试把zip的改写成能够带密码解压的形式。
|