IT数码 购物 网址 头条 软件 日历 阅读 图书馆
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
图片批量下载器
↓批量下载图片,美女图库↓
图片自动播放器
↓图片自动播放器↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁
 
   -> C++知识库 -> C++11 boost::spirit::qi简单的XML解析器示例 -> 正文阅读

[C++知识库]C++11 boost::spirit::qi简单的XML解析器示例

boost::spirit::qi是一个简单的解释器开发库。可以用来解析文本,构建解释器等。
笔者花了两天时间看完了README文档,并且照着Demo代码写了一遍。感觉语法很复杂。特别是最后的一个XML解析器,很容易就写错了。好在错误信息还是很好理解的。
现在把代码贴出来和大家共享一下。
代码结构如下,
boost::spirit解析XML代码结构
test/CMakeLists.txt

cmake_minimum_required(VERSION 2.6)

if(APPLE)
    message(STATUS "This is Apple, do nothing.")
    set(CMAKE_MACOSX_RPATH 1)
    set(CMAKE_PREFIX_PATH /Users/aabjfzhu/software/vcpkg/ports/cppwork/vcpkg_installed/x64-osx/share )
elseif(UNIX)
    message(STATUS "This is linux, set CMAKE_PREFIX_PATH.")
    set(CMAKE_PREFIX_PATH /vcpkg/ports/cppwork/vcpkg_installed/x64-linux/share)
endif(APPLE)

project(spirit_xml_error_handling)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")

add_definitions(-g)

find_package(ZLIB)

find_package(OpenCV REQUIRED )
find_package(Arrow CONFIG REQUIRED)

find_package(unofficial-brotli REQUIRED)
find_package(unofficial-utf8proc CONFIG REQUIRED)
find_package(Thrift CONFIG REQUIRED)

find_package(glog REQUIRED)

find_package(OpenSSL REQUIRED)

find_package(Boost REQUIRED COMPONENTS
    system
    filesystem
    serialization
    program_options
    thread
    )

find_package(DataFrame REQUIRED)

if(APPLE)
    MESSAGE(STATUS "This is APPLE, set INCLUDE_DIRS")
set(INCLUDE_DIRS ${Boost_INCLUDE_DIRS} /usr/local/include /usr/local/iODBC/include /opt/snowflake/snowflakeodbc/include/ ${CMAKE_CURRENT_SOURCE_DIR}/../include/ ${CMAKE_CURRENT_SOURCE_DIR}/../../../include)
elseif(UNIX)
    MESSAGE(STATUS "This is linux, set INCLUDE_DIRS")
    set(INCLUDE_DIRS ${Boost_INCLUDE_DIRS} /usr/local/include ${CMAKE_CURRENT_SOURCE_DIR}/../include/   ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/)
endif(APPLE)


if(APPLE)
    MESSAGE(STATUS "This is APPLE, set LINK_DIRS")
    set(LINK_DIRS /usr/local/lib /usr/local/iODBC/lib /opt/snowflake/snowflakeodbc/lib/universal)
elseif(UNIX)
    MESSAGE(STATUS "This is linux, set LINK_DIRS")
    set(LINK_DIRS ${Boost_INCLUDE_DIRS} /usr/local/lib /vcpkg/ports/cppwork/vcpkg_installed/x64-linux/lib)
endif(APPLE)

if(APPLE)
    MESSAGE(STATUS "This is APPLE, set ODBC_LIBS")
    set(ODBC_LIBS iodbc iodbcinst)
elseif(UNIX)
    MESSAGE(STATUS "This is linux, set LINK_DIRS")
    set(ODBC_LIBS odbc odbcinst ltdl)
endif(APPLE)

include_directories(${INCLUDE_DIRS})
LINK_DIRECTORIES(${LINK_DIRS})

file( GLOB test_file_list ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) 

file( GLOB APP_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/../impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../include/*.h ${CMAKE_CURRENT_SOURCE_DIR}/../include/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/arr_/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/http/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/yaml/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/df/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/death_handler/impl/*.cpp)

add_library(${PROJECT_NAME}_lib SHARED ${APP_SOURCES} ${test_file})
target_link_libraries(${PROJECT_NAME}_lib ${Boost_LIBRARIES} ZLIB::ZLIB glog::glog DataFrame::DataFrame ${OpenCV_LIBS})
target_link_libraries(${PROJECT_NAME}_lib OpenSSL::SSL OpenSSL::Crypto libgtest.a pystring libyaml-cpp.a libgmock.a ${ODBC_LIBS} libnanodbc.a pthread dl backtrace libzstd.a libbz2.a libsnappy.a re2::re2 parquet lz4 unofficial::brotli::brotlidec-static unofficial::brotli::brotlienc-static unofficial::brotli::brotlicommon-static utf8proc thrift::thrift  arrow arrow_dataset)

foreach( test_file ${test_file_list} )
    file(RELATIVE_PATH filename ${CMAKE_CURRENT_SOURCE_DIR} ${test_file})
    string(REPLACE ".cpp" "" file ${filename})
    add_executable(${file}  ${test_file})
    target_link_libraries(${file} ${PROJECT_NAME}_lib)
endforeach( test_file ${test_file_list})

test/spirit_xml_test.cpp

#include "spirit_xml_error_hand.hpp"

#include <glog/logging.h>
#include <gtest/gtest.h>

#include <fstream>

#include "death_handler/death_handler.h"

int main(int argc, char** argv) {
    FLAGS_log_dir = "./";
    FLAGS_alsologtostderr = true;
    // 日志级别 INFO, WARNING, ERROR, FATAL 的值分别为0、1、2、3
    FLAGS_minloglevel = 0;

    Debug::DeathHandler dh;

    google::InitGoogleLogging("./logs.log");
    testing::InitGoogleTest(&argc, argv);
    int ret = RUN_ALL_TESTS();
    return ret;
}

bool parse_xml(char const* filename) {
    std::ifstream in{filename, std::ios_base::in};
    if (!in) {
        std::cerr << "Error: could not open input file: " << filename
                  << std::endl;
        return false;
    }
    // We will read the contents here
    std::string storage;
    // Do not skip whitespace
    in.unsetf(std::ios::skipws);
    std::copy(std::istream_iterator<char>(in), std::istream_iterator<char>(),
              std::back_inserter(storage));

    namespace qi = boost::spirit::qi;
    using iterator_type = std::string::const_iterator;
    using mini_xml_grammar = client::mini_xml_grammar<iterator_type>;
    using boost::spirit::ascii::space;

    // Our grammar
    mini_xml_grammar xml;
    // Our tree
    client::mini_xml ast;
    iterator_type iter = storage.begin();
    iterator_type end = storage.end();

    bool r = qi::phrase_parse(iter, end, xml, space, ast);
    if (r && iter == end) {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        client::mini_xml_printer printer;
        printer(ast);
        std::cout << "-------------------------\n";
        return true;
    } else {
        std::string rest{iter, end};
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "Stopped at: \"" << rest << "\"\n";
        std::cout << "-------------------------\n";
        return false;
    }
}

GTEST_TEST(SpiritXMLTests, SpiritXML) {
    parse_xml("../data/1.xml");
}

GTEST_TEST(SpiritXMLTests, SpiritXMLError) {
    parse_xml("../data/2.xml");
}

include/spirit_xml_error_hand.hpp

#ifndef _FREDRIC_SPIRIT_XML_ERROR_HAND_HPP_
#define _FREDRIC_SPIRIT_XML_ERROR_HAND_HPP_

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/foreach.hpp>

#include <iostream>
#include <memory>
#include <sstream>
#include <string>
#include <vector>

namespace client {
namespace fusion = boost::fusion;
namespace phoenix = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

/**
 * @brief Mini XML Tree representation
 */
struct mini_xml;

// 要么是 mini_xml的递归结构,要么是std::string
// 这个节点的两种属性
using mini_xml_node =
    boost::variant<boost::recursive_wrapper<mini_xml>, std::string>;

struct mini_xml {
    std::string name;                     // Tag name
    std::vector<mini_xml_node> children;  // children
};
};  // namespace client

BOOST_FUSION_ADAPT_STRUCT(client::mini_xml,
                          (std::string,
                           name)(std::vector<client::mini_xml_node>, children))

namespace client {
// print out the mini xml tree
int const tabsize = 4;

void tab(int indent) {
    for (int i = 0; i < indent; ++i) {
        std::cout << ' ';
    }
}

struct mini_xml_printer {
    mini_xml_printer(int indent_ = 0) : indent{indent_} {}

    void operator()(mini_xml const& xml) const;
    int indent;
};

struct mini_xml_node_printer : boost::static_visitor<> {
    mini_xml_node_printer(int indent_ = 0) : indent{indent_} {}

    void operator()(mini_xml const& xml) const {
        mini_xml_printer(indent + tabsize)(xml);
    }

    void operator()(std::string const& text) const {
        tab(indent + tabsize);
        std::cout << "text: \"" << text << '"' << std::endl;
    }
    int indent;
};

void mini_xml_printer::operator()(mini_xml const& xml) const {
    tab(indent);
    std::cout << "tag: " << xml.name << std::endl;
    tab(indent);

    std::cout << '{' << std::endl;
    for (auto&& node : xml.children) {
        boost::apply_visitor(mini_xml_node_printer(indent), node);
    }
    tab(indent);
    std::cout << '}' << std::endl;
}

// 使用本地变量保存中间临时值, qi::locals
// Refers to: https://www.boost.org/doc/libs/1_66_0/libs/spirit/doc/html/spirit/qi/tutorials/mini_xml___error_handling.html
// Our mini xml grammar definition
template <typename Iterator>
struct mini_xml_grammar : qi::grammar<Iterator, mini_xml(), qi::locals<std::string>, ascii::space_type> {

    
    mini_xml_grammar() : mini_xml_grammar::base_type(xml, "xml_grammar") {
        using ascii::char_;
        using ascii::string;
        using qi::on_error;
        using qi::fail;
        using qi::lexeme;
        using qi::lit;
        using namespace qi::labels;

        using phoenix::at_c;
        using phoenix::push_back;
        using phoenix::val;
        using phoenix::construct;

        text %= lexeme[+(char_ - '<')];
        node %= (xml | text);
        start_tag %=
            '<' >> !lit('/') >> lexeme[+(char_ - '>')] >> '>';

        // r1表示继承 start_tag中的属性
        end_tag = "</" >> string(_r1) >> '>';

        // 复用start_tag的内容是通过 这里实现的
        // 这里其实实现的是一个boost::fusion::tuple
        // 0 = start_tag content
        // ..... 若干 nodes
        // 最后验证end_tag == start_tag,但是不push_back
        xml %= start_tag[_a = _1] >>
              *node >> end_tag(_a);

        // 给各个rule命名
        xml.name("xml");
        node.name("node");
        text.name("text");
        start_tag.name("start_tag");
        end_tag.name("end_tag");

        on_error<fail>(
            xml, 
            std::cout << 
                val("Error! Expecting ")
                << _4   // What failed
                << val(" here\"")
                << construct<std::string>(_3, _2) // Iterators to error-pos, end
                << val("\"")
                << std::endl
        ); 
    }
    qi::rule<Iterator, std::string(), ascii::space_type> text;
    qi::rule<Iterator, mini_xml_node(), ascii::space_type> node;
    qi::rule<Iterator, std::string(), ascii::space_type> start_tag;
    // 继承属性,需要验证start_tag中的字符串
    qi::rule<Iterator, void(std::string), ascii::space_type> end_tag;
    qi::rule<Iterator, mini_xml(), qi::locals<std::string>, ascii::space_type> xml;
};
};  // namespace client

#endif

test/data/1.xml

<note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don't forget me this weekend!</body>
</note>

test/data/2.xml

<foo><bar></foo></bar>

程序输出如下,
boost::spirit解析XML输出

  C++知识库 最新文章
【C++】友元、嵌套类、异常、RTTI、类型转换
通讯录的思路与实现(C语言)
C++PrimerPlus 第七章 函数-C++的编程模块(
Problem C: 算法9-9~9-12:平衡二叉树的基本
MSVC C++ UTF-8编程
C++进阶 多态原理
简单string类c++实现
我的年度总结
【C语言】以深厚地基筑伟岸高楼-基础篇(六
c语言常见错误合集
上一篇文章      下一篇文章      查看所有文章
加:2022-05-11 16:14:51  更:2022-05-11 16:15:21 
 
开发: C++知识库 Java知识库 JavaScript Python PHP知识库 人工智能 区块链 大数据 移动开发 嵌入式 开发工具 数据结构与算法 开发测试 游戏开发 网络协议 系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑 笔记本 显卡 显示器 固态硬盘 硬盘 耳机 手机 iphone vivo oppo 小米 华为 单反 装机 图拉丁

360图书馆 购物 三丰科技 阅读网 日历 万年历 2025年1日历 -2025/1/11 2:41:27-

图片自动播放器
↓图片自动播放器↓
TxT小说阅读器
↓语音阅读,小说下载,古典文学↓
一键清除垃圾
↓轻轻一点,清除系统垃圾↓
图片批量下载器
↓批量下载图片,美女图库↓
  网站联系: qq:121756557 email:121756557@qq.com  IT数码