关于编码转换
工作多年,好像没搞通过编码转换
JAVA做过,C#做过,就是碰到问题,随便一通转换就过了,没搞清楚过
很多年了,没深究,惭愧
就好像正则表达式,虽然做过一段时间前端,但是就是没做过爬虫,页面分析之类的,所以也没深究
今天刚好需要,搞了一下转换
一开始,网上找的方法都不好用,根本实现不了
测试了一下,发现甚至网上的方法都不能转换,难道直接Ctrl+C也能出错?
关键在于,必须先用本机编码,先获得“文本源”,原理还是没深究
byte[] temp;
//将 textBox1.Text 转换为 byte 数组
temp = Encoding.Default.GetBytes(currText);
temp = Encoding.Convert(code1,code2,temp);
整个代码:
/* 转码还原(多对比)参考,语法Java
* https://blog.csdn.net/weixin_30715523/article/details/99492142
* c# 字串码合集
* https://blog.csdn.net/luanpeng825485697/article/details/77622243
*
*/
using System;
using System.Collections.Generic;
using System.Text;
using I18N.CJK;
using UnityEngine;
using UnityEditor;
public class TextCodeWnd:EditorWindow
{
public enum TransferType
{
Test,
Src2Dest,
}
public enum FormatType
{
GBK,
utf_8,
GB2312,
}
[MenuItem("Tools/TextCodeRecover")]
public static void ShowWindow()
{
GetWindow<TextCodeWnd>();
}
private string currText;
//private List<Encoding> charsetAttr;
private List<string> charsetAttr;
private FormatType srcFormat;
private FormatType destFormat;
private TransferType transferType;
private void Awake()
{
titleContent = new GUIContent("TextCodeRecover");
Debug.LogError("完美");
//charsetAttr = new List<string>{"GBK", "utf-8","utf-16","utf-16BE","GB18030","GB2312","GBK","Windows-1252","ISO8859-1"};
if (string.IsNullOrEmpty(currText))
{
currText = "閽╃埅";
}
}
private void OnGUI()
{
transferType = (TransferType)EditorGUILayout.EnumPopup("转换方式",transferType);
GUILayout.Space(5);
GUILayout.BeginHorizontal();
srcFormat = (FormatType)EditorGUILayout.EnumPopup("转换从", srcFormat);
GUILayout.Label("->");
destFormat = (FormatType) EditorGUILayout.EnumPopup("到", destFormat);
GUILayout.EndHorizontal();
currText = EditorGUILayout.TextField(currText);
string keySrc = srcFormat.ToString();
string keyDest = destFormat.ToString();
if (keySrc != keyDest)
{
if (GUILayout.Button("ttt"))
{
if (transferType == TransferType.Test)
{
var str = FinalTransferEncoding(keySrc, keyDest, currText);
Debug.LogError(str);
}
else
{
//var str = TransferEncoding(new GB18030Encoding(), new UTF8Encoding(), currText);
Debug.LogError(srcFormat + "-》utf-8");
keySrc = keySrc.Replace("_", "-");
keyDest = keyDest.Replace("_", "-");
var str = TransferEncoding(keySrc, keyDest, currText);
Debug.LogError(str);
}
//RecoverTextAllCheck(currText);
}
}
}
string FinalTransferEncoding(string keySrc,string keyDest,string currText)
{
byte[] temp;
//将 textBox1.Text 转换为 byte 数组
temp = Encoding.Default.GetBytes(currText);
keySrc = keySrc.Replace("_", "-");
keyDest = keyDest.Replace("_", "-");
//利用 Encoding 类的 Convert 方法,将 temp 的编码由 gb2312 转换为 big5 编码
//Encoding.GetEncoding("gb2312")
var code1 = Encoding.GetEncoding(keySrc);
//Encoding.GetEncoding("big5")
var code2 = Encoding.GetEncoding(keyDest);
temp = Encoding.Convert(code1,code2,temp);
return Encoding.Default.GetString(temp);
}
/// <summary>
/// 网上抄的穷举对比方法,没什么卵用
/// </summary>
/// <param name="str"></param>
void RecoverTextAllCheck(string str)
{
//GB18030Encoding text = new GB18030Encoding();
//byte[] btArr = text.GetBytes(str);
foreach (var key in charsetAttr)
{
var charset = Encoding.GetEncoding(key);
byte[] srcBytes = charset.GetBytes(str);
foreach (var originCharsetKey in charsetAttr)
{
var originCharset = Encoding.GetEncoding(originCharsetKey);
if (originCharset.Equals(charset)) {
continue;
}
var bytes = Encoding.Convert(charset, originCharset, srcBytes);
var encodeText = originCharset.GetString(bytes);
Debug.LogError(string.Format("{0}=>({1}){2};结果:{3}",charset, originCharsetKey, originCharset, encodeText));
}
}
}
/// <summary>
/// 本来重载下面的方法,不行;换了一个实现 测试,还是不行;几乎确实是不行的,换方法;最终 FinalTransferEncoding(),完成。
/// </summary>
/// <param name="src"></param>
/// <param name="dest"></param>
/// <param name="str"></param>
/// <returns></returns>
public static string TransferEncoding(string src, string dest, string str)
{
var srcFormat = Encoding.GetEncoding(src);
var bytes = Encoding.GetEncoding(src).GetBytes(str);
return srcFormat.GetString(bytes);
//return TransferEncoding(Encoding.GetEncoding(src), Encoding.GetEncoding(dest), str);
}
/// <summary>
/// 字符串编码转换(其实没用的方法,也可能原代码是Java和c#油茶)
/// </summary>
/// <param name="srcEncoding">原编码</param>
/// <param name="dstEncoding">目标编码</param>
/// <param name="srcBytes">原字符串</param>
/// <returns>字符串</returns>
public static string TransferEncoding(Encoding srcEncoding, Encoding dstEncoding, string srcStr)
{
byte[] srcBytes = srcEncoding.GetBytes(srcStr);
byte[] bytes = Encoding.Convert(srcEncoding, dstEncoding, srcBytes);
return dstEncoding.GetString(bytes);
}
}
|