一. 字符串长度与大小
因为ASCII字符串以NULL或者说以'\0'结尾,UNICODE以L'\0'结尾,那么字符串的大小和长度显示是不同的:字符串大小 = 字符串长度 + 1;
获取一个字符串的长度采用的库函数是
size_t strlen( char const *string );
获取字符串大小的函数可以根据和长度的关系自行构造。下面列举UEFI中的两个函数的实现,
ASCII码
UINTN
EFIAPI
AsciiStrLen (
IN CONST CHAR8 *String
)
{
UINTN Length;
ASSERT (String != NULL);
for (Length = 0; *String != '\0'; String++, Length++) {
//
// If PcdMaximumUnicodeStringLength is not zero,
// length should not more than PcdMaximumUnicodeStringLength
//
if (PcdGet32 (PcdMaximumAsciiStringLength) != 0) {
ASSERT (Length < PcdGet32 (PcdMaximumAsciiStringLength));
}
}
return Length;
}
UINTN
EFIAPI
AsciiStrSize (
IN CONST CHAR8 *String
)
{
return (AsciiStrLen (String) + 1) * sizeof (*String);
}
UNICODE码
UINTN
EFIAPI
StrLen (
IN CONST CHAR16 *String
)
{
UINTN Length;
ASSERT (String != NULL);
ASSERT (((UINTN) String & BIT0) == 0);
for (Length = 0; *String != L'\0'; String++, Length++) {
//
// If PcdMaximumUnicodeStringLength is not zero,
// length should not more than PcdMaximumUnicodeStringLength
//
if (PcdGet32 (PcdMaximumUnicodeStringLength) != 0) {
ASSERT (Length < PcdGet32 (PcdMaximumUnicodeStringLength));
}
}
return Length;
}
UINTN
EFIAPI
StrSize (
IN CONST CHAR16 *String
)
{
return (StrLen (String) + 1) * sizeof (*String);
}
二. 字符串比较
所谓字符串比较,就是看两个字符串是否相互包含,或者对比最先不匹配的字符,哪个在字符集中的序数较小。函数原型是:
int strcmp( char const *s1, char const *s2 )
可以看到该函数的返回值类型为INT,那么如果s1小于s2,返回值小于0;如果s1大于s2,返回值大于0;如果s1等于s2,返回值等于0。
UEFI中该库函数的实现如下:
?ASCII码
INTN
EFIAPI
AsciiStrCmp (
IN CONST CHAR8 *FirstString,
IN CONST CHAR8 *SecondString
)
{
//
// ASSERT both strings are less long than PcdMaximumAsciiStringLength
//
ASSERT (AsciiStrSize (FirstString));
ASSERT (AsciiStrSize (SecondString));
while ((*FirstString != '\0') && (*FirstString == *SecondString)) {
FirstString++;
SecondString++;
}
return *FirstString - *SecondString;
}
UNICODE码
INTN
EFIAPI
StrCmp (
IN CONST CHAR16 *FirstString,
IN CONST CHAR16 *SecondString
)
{
//
// ASSERT both strings are less long than PcdMaximumUnicodeStringLength
//
ASSERT (StrSize (FirstString) != 0);
ASSERT (StrSize (SecondString) != 0);
while ((*FirstString != L'\0') && (*FirstString == *SecondString)) {
FirstString++;
SecondString++;
}
return *FirstString - *SecondString;
}
?三. 字符串变换
这里的字符串变换包括不设置长度的strcpy/strcat,以及设置长度的strncpy/strncat。
strcpy和strncpy的函数原型如下:
char *strcpy( char *dst, char const *src );
char *strncpy( char *dst, char const *src, size_t len );
strcpy一定会把字符串最后的NULL也拷贝到dst中,而strncpy则会根据len的大小决定是否拷贝到dst;两者在UEFI中的实现如下:
ASCII码
CHAR8 *
EFIAPI
AsciiStrCpy (
OUT CHAR8 *Destination,
IN CONST CHAR8 *Source
)
{
CHAR8 *ReturnValue;
//
// Destination cannot be NULL
//
ASSERT (Destination != NULL);
//
// Destination and source cannot overlap
//
ASSERT ((UINTN)(Destination - Source) > AsciiStrLen (Source));
ASSERT ((UINTN)(Source - Destination) > AsciiStrLen (Source));
ReturnValue = Destination;
while (*Source != 0) {
*(Destination++) = *(Source++);
}
*Destination = 0;
return ReturnValue;
}
CHAR8 *
EFIAPI
AsciiStrnCpy (
OUT CHAR8 *Destination,
IN CONST CHAR8 *Source,
IN UINTN Length
)
{
CHAR8 *ReturnValue;
if (Length == 0) {
return Destination;
}
//
// Destination cannot be NULL
//
ASSERT (Destination != NULL);
//
// Destination and source cannot overlap
//
ASSERT ((UINTN)(Destination - Source) > AsciiStrLen (Source));
ASSERT ((UINTN)(Source - Destination) >= Length);
if (PcdGet32 (PcdMaximumAsciiStringLength) != 0) {
ASSERT (Length <= PcdGet32 (PcdMaximumAsciiStringLength));
}
ReturnValue = Destination;
while (*Source != 0 && Length > 0) {
*(Destination++) = *(Source++);
Length--;
}
ZeroMem (Destination, Length * sizeof (*Destination));
return ReturnValue;
}
UNICODE码
CHAR16 *
EFIAPI
StrCpy (
OUT CHAR16 *Destination,
IN CONST CHAR16 *Source
)
{
CHAR16 *ReturnValue;
//
// Destination cannot be NULL
//
ASSERT (Destination != NULL);
ASSERT (((UINTN) Destination & BIT0) == 0);
//
// Destination and source cannot overlap
//
ASSERT ((UINTN)(Destination - Source) > StrLen (Source));
ASSERT ((UINTN)(Source - Destination) > StrLen (Source));
ReturnValue = Destination;
while (*Source != 0) {
*(Destination++) = *(Source++);
}
*Destination = 0;
return ReturnValue;
}
CHAR16 *
EFIAPI
StrnCpy (
OUT CHAR16 *Destination,
IN CONST CHAR16 *Source,
IN UINTN Length
)
{
CHAR16 *ReturnValue;
if (Length == 0) {
return Destination;
}
//
// Destination cannot be NULL if Length is not zero
//
ASSERT (Destination != NULL);
ASSERT (((UINTN) Destination & BIT0) == 0);
//
// Destination and source cannot overlap
//
ASSERT ((UINTN)(Destination - Source) > StrLen (Source));
ASSERT ((UINTN)(Source - Destination) >= Length);
if (PcdGet32 (PcdMaximumUnicodeStringLength) != 0) {
ASSERT (Length <= PcdGet32 (PcdMaximumUnicodeStringLength));
}
ReturnValue = Destination;
while ((*Source != L'\0') && (Length > 0)) {
*(Destination++) = *(Source++);
Length--;
}
ZeroMem (Destination, Length * sizeof (*Destination));
return ReturnValue;
}
?strcat表示将str2粘贴到str1的后面排放,strncat表示只操作str2的n个字符,其函数原型是
char *strcat(char *dest, const char *src)
char *strncat(char *dest, const char *src, size_t n)
?在UEFI中的具体实现如下
ASCII码
CHAR8 *
EFIAPI
AsciiStrCat (
IN OUT CHAR8 *Destination,
IN CONST CHAR8 *Source
)
{
AsciiStrCpy (Destination + AsciiStrLen (Destination), Source);
//
// Size of the resulting string should never be zero.
// PcdMaximumUnicodeStringLength is tested inside StrLen().
//
ASSERT (AsciiStrSize (Destination) != 0);
return Destination;
}
CHAR8 *
EFIAPI
AsciiStrnCat (
IN OUT CHAR8 *Destination,
IN CONST CHAR8 *Source,
IN UINTN Length
)
{
UINTN DestinationLen;
DestinationLen = AsciiStrLen (Destination);
AsciiStrnCpy (Destination + DestinationLen, Source, Length);
Destination[DestinationLen + Length] = '\0';
//
// Size of the resulting string should never be zero.
// PcdMaximumUnicodeStringLength is tested inside StrLen().
//
ASSERT (AsciiStrSize (Destination) != 0);
return Destination;
}
?UNICODE码
CHAR16 *
EFIAPI
StrCat (
IN OUT CHAR16 *Destination,
IN CONST CHAR16 *Source
)
{
StrCpy (Destination + StrLen (Destination), Source);
//
// Size of the resulting string should never be zero.
// PcdMaximumUnicodeStringLength is tested inside StrLen().
//
ASSERT (StrSize (Destination) != 0);
return Destination;
}
CHAR16 *
EFIAPI
StrnCat (
IN OUT CHAR16 *Destination,
IN CONST CHAR16 *Source,
IN UINTN Length
)
{
UINTN DestinationLen;
DestinationLen = StrLen (Destination);
StrnCpy (Destination + DestinationLen, Source, Length);
Destination[DestinationLen + Length] = L'\0';
//
// Size of the resulting string should never be zero.
// PcdMaximumUnicodeStringLength is tested inside StrLen().
//
ASSERT (StrSize (Destination) != 0);
return Destination;
}
四. 字符串查找
字符串查找相关的函数比较多,有strstr/strchr/strrchr/strpbrk/strspn/strcspn等等,其中strstr代表在某一字符串中查找另一个字符串,strchr表示在某一字符串中查找某一个字符,strrchr表示从右往左查找第一个匹配的字符串,strpbrk表示在字符串中查找某一个字符串组任意元素第一次出现的位置,strspn用于检索字符串 group中第一个不在字符串 str中出现的字符下标,strcspn用于检索字符串 group中第一个在字符串 str中出现的字符下标。
这些函数的声明如下:
char *strchr(const char *str, int c);
char *strrchr(const char *str, int c);
char *strstr(const char *haystack, const char *needle);
size_t strspn(const char *str1, const char *str2);
size_t strcspn(const char *str1, const char *str2);
char *strpbrk(const char *str1, const char *str2);
看起来关于查找的字符串很多,但最常用的还是strstr,其在UEFI中的具体实现如下:
ASCII码
CHAR8 *
EFIAPI
AsciiStrStr (
IN CONST CHAR8 *String,
IN CONST CHAR8 *SearchString
)
{
CONST CHAR8 *FirstMatch;
CONST CHAR8 *SearchStringTmp;
//
// ASSERT both strings are less long than PcdMaximumAsciiStringLength
//
ASSERT (AsciiStrSize (String) != 0);
ASSERT (AsciiStrSize (SearchString) != 0);
if (*SearchString == '\0') {
return (CHAR8 *) String;
}
while (*String != '\0') {
SearchStringTmp = SearchString;
FirstMatch = String;
while ((*String == *SearchStringTmp)
&& (*String != '\0')) {
String++;
SearchStringTmp++;
}
if (*SearchStringTmp == '\0') {
return (CHAR8 *) FirstMatch;
}
if (*String == '\0') {
return NULL;
}
String = FirstMatch + 1;
}
return NULL;
}
?UNICODE码
CHAR16 *
EFIAPI
StrStr (
IN CONST CHAR16 *String,
IN CONST CHAR16 *SearchString
)
{
CONST CHAR16 *FirstMatch;
CONST CHAR16 *SearchStringTmp;
//
// ASSERT both strings are less long than PcdMaximumUnicodeStringLength.
// Length tests are performed inside StrLen().
//
ASSERT (StrSize (String) != 0);
ASSERT (StrSize (SearchString) != 0);
if (*SearchString == L'\0') {
return (CHAR16 *) String;
}
while (*String != L'\0') {
SearchStringTmp = SearchString;
FirstMatch = String;
while ((*String == *SearchStringTmp)
&& (*String != L'\0')) {
String++;
SearchStringTmp++;
}
if (*SearchStringTmp == L'\0') {
return (CHAR16 *) FirstMatch;
}
if (*String == L'\0') {
return NULL;
}
String = FirstMatch + 1;
}
return NULL;
}
五. 好用的strtok
经常会遇到某个字符串中出现某些字符或字串规律排放的情况,比如实际工程中遇到的:
char str[] = {0, 1, 8, 9};
?有时需要统计出现的数字字符个数,或者需要将每个数字字符取出来,那么这时strtok就再合适不过了:
char *strtok(char *str, const char *delim)
//分解字符串 str 为一组字符串,delim 为分隔符。
?具体将0/1/8/9打印出来的方法如下:
char str[] = "0, 1, 8, 9";
const char s[2] = ",";
char *token;
/* 获取第一个子字符串 */
token = strtok(str, s);
/* 继续获取其他的子字符串 */
while( token != NULL ) {
printf( "%s\n", token );
token = strtok(NULL, s);
}
总结
?用好C语言字符串相关库函数,可以解决相当一部分字符输入输出等用于字符处理的场景,UEFI中库函数的具体实现,也为我们自行定义库函数中没有但实际需要的功能function提供了一种参考。祝大家在C的应用上越来越得心应手!
|