boost::spirit::qi是一个简单的解释器开发库。可以用来解析文本,构建解释器等。 笔者花了两天时间看完了README文档,并且照着Demo代码写了一遍。感觉语法很复杂。特别是最后的一个XML解析器,很容易就写错了。好在错误信息还是很好理解的。 现在把代码贴出来和大家共享一下。 代码结构如下, test/CMakeLists.txt
cmake_minimum_required(VERSION 2.6)
if(APPLE)
message(STATUS "This is Apple, do nothing.")
set(CMAKE_MACOSX_RPATH 1)
set(CMAKE_PREFIX_PATH /Users/aabjfzhu/software/vcpkg/ports/cppwork/vcpkg_installed/x64-osx/share )
elseif(UNIX)
message(STATUS "This is linux, set CMAKE_PREFIX_PATH.")
set(CMAKE_PREFIX_PATH /vcpkg/ports/cppwork/vcpkg_installed/x64-linux/share)
endif(APPLE)
project(spirit_xml_error_handling)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
add_definitions(-g)
find_package(ZLIB)
find_package(OpenCV REQUIRED )
find_package(Arrow CONFIG REQUIRED)
find_package(unofficial-brotli REQUIRED)
find_package(unofficial-utf8proc CONFIG REQUIRED)
find_package(Thrift CONFIG REQUIRED)
find_package(glog REQUIRED)
find_package(OpenSSL REQUIRED)
find_package(Boost REQUIRED COMPONENTS
system
filesystem
serialization
program_options
thread
)
find_package(DataFrame REQUIRED)
if(APPLE)
MESSAGE(STATUS "This is APPLE, set INCLUDE_DIRS")
set(INCLUDE_DIRS ${Boost_INCLUDE_DIRS} /usr/local/include /usr/local/iODBC/include /opt/snowflake/snowflakeodbc/include/ ${CMAKE_CURRENT_SOURCE_DIR}/../include/ ${CMAKE_CURRENT_SOURCE_DIR}/../../../include)
elseif(UNIX)
MESSAGE(STATUS "This is linux, set INCLUDE_DIRS")
set(INCLUDE_DIRS ${Boost_INCLUDE_DIRS} /usr/local/include ${CMAKE_CURRENT_SOURCE_DIR}/../include/ ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/)
endif(APPLE)
if(APPLE)
MESSAGE(STATUS "This is APPLE, set LINK_DIRS")
set(LINK_DIRS /usr/local/lib /usr/local/iODBC/lib /opt/snowflake/snowflakeodbc/lib/universal)
elseif(UNIX)
MESSAGE(STATUS "This is linux, set LINK_DIRS")
set(LINK_DIRS ${Boost_INCLUDE_DIRS} /usr/local/lib /vcpkg/ports/cppwork/vcpkg_installed/x64-linux/lib)
endif(APPLE)
if(APPLE)
MESSAGE(STATUS "This is APPLE, set ODBC_LIBS")
set(ODBC_LIBS iodbc iodbcinst)
elseif(UNIX)
MESSAGE(STATUS "This is linux, set LINK_DIRS")
set(ODBC_LIBS odbc odbcinst ltdl)
endif(APPLE)
include_directories(${INCLUDE_DIRS})
LINK_DIRECTORIES(${LINK_DIRS})
file( GLOB test_file_list ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
file( GLOB APP_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/../impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../include/*.h ${CMAKE_CURRENT_SOURCE_DIR}/../include/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/arr_/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/http/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/yaml/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/df/impl/*.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../../include/death_handler/impl/*.cpp)
add_library(${PROJECT_NAME}_lib SHARED ${APP_SOURCES} ${test_file})
target_link_libraries(${PROJECT_NAME}_lib ${Boost_LIBRARIES} ZLIB::ZLIB glog::glog DataFrame::DataFrame ${OpenCV_LIBS})
target_link_libraries(${PROJECT_NAME}_lib OpenSSL::SSL OpenSSL::Crypto libgtest.a pystring libyaml-cpp.a libgmock.a ${ODBC_LIBS} libnanodbc.a pthread dl backtrace libzstd.a libbz2.a libsnappy.a re2::re2 parquet lz4 unofficial::brotli::brotlidec-static unofficial::brotli::brotlienc-static unofficial::brotli::brotlicommon-static utf8proc thrift::thrift arrow arrow_dataset)
foreach( test_file ${test_file_list} )
file(RELATIVE_PATH filename ${CMAKE_CURRENT_SOURCE_DIR} ${test_file})
string(REPLACE ".cpp" "" file ${filename})
add_executable(${file} ${test_file})
target_link_libraries(${file} ${PROJECT_NAME}_lib)
endforeach( test_file ${test_file_list})
test/spirit_xml_test.cpp
#include "spirit_xml_error_hand.hpp"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <fstream>
#include "death_handler/death_handler.h"
int main(int argc, char** argv) {
FLAGS_log_dir = "./";
FLAGS_alsologtostderr = true;
// 日志级别 INFO, WARNING, ERROR, FATAL 的值分别为0、1、2、3
FLAGS_minloglevel = 0;
Debug::DeathHandler dh;
google::InitGoogleLogging("./logs.log");
testing::InitGoogleTest(&argc, argv);
int ret = RUN_ALL_TESTS();
return ret;
}
bool parse_xml(char const* filename) {
std::ifstream in{filename, std::ios_base::in};
if (!in) {
std::cerr << "Error: could not open input file: " << filename
<< std::endl;
return false;
}
// We will read the contents here
std::string storage;
// Do not skip whitespace
in.unsetf(std::ios::skipws);
std::copy(std::istream_iterator<char>(in), std::istream_iterator<char>(),
std::back_inserter(storage));
namespace qi = boost::spirit::qi;
using iterator_type = std::string::const_iterator;
using mini_xml_grammar = client::mini_xml_grammar<iterator_type>;
using boost::spirit::ascii::space;
// Our grammar
mini_xml_grammar xml;
// Our tree
client::mini_xml ast;
iterator_type iter = storage.begin();
iterator_type end = storage.end();
bool r = qi::phrase_parse(iter, end, xml, space, ast);
if (r && iter == end) {
std::cout << "-------------------------\n";
std::cout << "Parsing succeeded\n";
client::mini_xml_printer printer;
printer(ast);
std::cout << "-------------------------\n";
return true;
} else {
std::string rest{iter, end};
std::cout << "-------------------------\n";
std::cout << "Parsing failed\n";
std::cout << "Stopped at: \"" << rest << "\"\n";
std::cout << "-------------------------\n";
return false;
}
}
GTEST_TEST(SpiritXMLTests, SpiritXML) {
parse_xml("../data/1.xml");
}
GTEST_TEST(SpiritXMLTests, SpiritXMLError) {
parse_xml("../data/2.xml");
}
include/spirit_xml_error_hand.hpp
#ifndef _FREDRIC_SPIRIT_XML_ERROR_HAND_HPP_
#define _FREDRIC_SPIRIT_XML_ERROR_HAND_HPP_
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/foreach.hpp>
#include <iostream>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
namespace client {
namespace fusion = boost::fusion;
namespace phoenix = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
/**
* @brief Mini XML Tree representation
*/
struct mini_xml;
// 要么是 mini_xml的递归结构,要么是std::string
// 这个节点的两种属性
using mini_xml_node =
boost::variant<boost::recursive_wrapper<mini_xml>, std::string>;
struct mini_xml {
std::string name; // Tag name
std::vector<mini_xml_node> children; // children
};
}; // namespace client
BOOST_FUSION_ADAPT_STRUCT(client::mini_xml,
(std::string,
name)(std::vector<client::mini_xml_node>, children))
namespace client {
// print out the mini xml tree
int const tabsize = 4;
void tab(int indent) {
for (int i = 0; i < indent; ++i) {
std::cout << ' ';
}
}
struct mini_xml_printer {
mini_xml_printer(int indent_ = 0) : indent{indent_} {}
void operator()(mini_xml const& xml) const;
int indent;
};
struct mini_xml_node_printer : boost::static_visitor<> {
mini_xml_node_printer(int indent_ = 0) : indent{indent_} {}
void operator()(mini_xml const& xml) const {
mini_xml_printer(indent + tabsize)(xml);
}
void operator()(std::string const& text) const {
tab(indent + tabsize);
std::cout << "text: \"" << text << '"' << std::endl;
}
int indent;
};
void mini_xml_printer::operator()(mini_xml const& xml) const {
tab(indent);
std::cout << "tag: " << xml.name << std::endl;
tab(indent);
std::cout << '{' << std::endl;
for (auto&& node : xml.children) {
boost::apply_visitor(mini_xml_node_printer(indent), node);
}
tab(indent);
std::cout << '}' << std::endl;
}
// 使用本地变量保存中间临时值, qi::locals
// Refers to: https://www.boost.org/doc/libs/1_66_0/libs/spirit/doc/html/spirit/qi/tutorials/mini_xml___error_handling.html
// Our mini xml grammar definition
template <typename Iterator>
struct mini_xml_grammar : qi::grammar<Iterator, mini_xml(), qi::locals<std::string>, ascii::space_type> {
mini_xml_grammar() : mini_xml_grammar::base_type(xml, "xml_grammar") {
using ascii::char_;
using ascii::string;
using qi::on_error;
using qi::fail;
using qi::lexeme;
using qi::lit;
using namespace qi::labels;
using phoenix::at_c;
using phoenix::push_back;
using phoenix::val;
using phoenix::construct;
text %= lexeme[+(char_ - '<')];
node %= (xml | text);
start_tag %=
'<' >> !lit('/') >> lexeme[+(char_ - '>')] >> '>';
// r1表示继承 start_tag中的属性
end_tag = "</" >> string(_r1) >> '>';
// 复用start_tag的内容是通过 这里实现的
// 这里其实实现的是一个boost::fusion::tuple
// 0 = start_tag content
// ..... 若干 nodes
// 最后验证end_tag == start_tag,但是不push_back
xml %= start_tag[_a = _1] >>
*node >> end_tag(_a);
// 给各个rule命名
xml.name("xml");
node.name("node");
text.name("text");
start_tag.name("start_tag");
end_tag.name("end_tag");
on_error<fail>(
xml,
std::cout <<
val("Error! Expecting ")
<< _4 // What failed
<< val(" here\"")
<< construct<std::string>(_3, _2) // Iterators to error-pos, end
<< val("\"")
<< std::endl
);
}
qi::rule<Iterator, std::string(), ascii::space_type> text;
qi::rule<Iterator, mini_xml_node(), ascii::space_type> node;
qi::rule<Iterator, std::string(), ascii::space_type> start_tag;
// 继承属性,需要验证start_tag中的字符串
qi::rule<Iterator, void(std::string), ascii::space_type> end_tag;
qi::rule<Iterator, mini_xml(), qi::locals<std::string>, ascii::space_type> xml;
};
}; // namespace client
#endif
test/data/1.xml
<note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don't forget me this weekend!</body>
</note>
test/data/2.xml
<foo><bar></foo></bar>
程序输出如下,
|