自主web服务器
背景
http协议被广泛使用,从移动端,pc端浏览器,http协议无疑是打开互联网应用窗口的重要协议,http在网络应用层中的地位不可撼动,是能准确区分前后台的重要协议。
目标
在对http协议的理论学习的基础上,从零开始完成web服务器开发,坐拥下三层协议,从技术到应用,让网络难点无处遁形。
描述
采用C/S模型,编写支持中小型应用的http,并结合mysql,理解常见互联网应用行为,做完该项目,你可以从技术上 完全理解从你上网开始,到关闭浏览器的所有操作中的技术细节!
技术特点
- 网络编程(TCP/IP协议, socket流式套接字,http协议)
- 多线程技术
- cgi技术
- 线程池
项目定位
研发岗
- 开发环境 centos 7 + vim/gcc/gdb + C/C++;
项目实现过程
由于我们编写的是HTTP_SERVER,因此我们只需要编写s端,c端我们使用浏览器进行访问即可;
我们需要对**应用层(主要)**和传输层进行代码编写,网络层及一下,会有对应的TCP/IP协议来保证数据的交互;
下图表示短连接下,C端发起请求,S端响应请求,一来一回 之后关闭sock;
创建HttpServer基础框架
先创建一个能接收到浏览器HTTP报文的socket框架;
TcpServer.hpp
这里将TcpServer中的socker,bind,listen进行了封装,用Init启动,同时设计了单例模式,一个HttpServer只需要一个监听listen_sock即可!
#pragma once
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <pthread.h>
#include "Log.hpp"
using std::cout;
using std::endl;
#define BACKLOG 5
enum ERR
{
SOCK_ERR = 1,
BIND_ERR,
LISTEN_ERR,
USAGE
};
class TcpServer
{
private:
int port;
int listen_sock;
static TcpServer* svr;
private:
TcpServer(int _port):port(_port)
{
}
TcpServer(const TcpServer &s)
{
}
public:
static TcpServer *getinstance(int port)
{
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
if (nullptr == svr)
{
pthread_mutex_lock(&lock);
if (nullptr == svr)
{
svr = new TcpServer(port);
svr -> InitServer();
}
pthread_mutex_unlock(&lock);
}
return svr;
}
public:
void InitServer()
{
Socket();
Bind();
Listen();
LOG(INFO, "TcpServer begin");
}
void Socket()
{
listen_sock = socket(AF_INET, SOCK_STREAM, 0);
if (listen_sock < 0)
{
LOG(FATAL, "socket error");
exit(SOCK_ERR);
}
int opt = 1;
setsockopt(listen_sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
}
void Bind()
{
sockaddr_in local;
bzero(&local, sizeof(local));
local.sin_family = AF_INET;
local.sin_port = htons(port);
local.sin_addr.s_addr = INADDR_ANY;
if (bind(listen_sock, (sockaddr *)&local, sizeof(local)) < 0)
{
LOG(FATAL, "bind error");
exit(BIND_ERR);
}
}
void Listen()
{
if (listen(listen_sock, BACKLOG) < 0)
{
LOG(FATAL, "listen error");
exit(LISTEN_ERR);
}
}
int Sock()
{
return listen_sock;
}
~TcpServer()
{
if (listen_sock > 0)
close(listen_sock);
}
};
TcpServer *TcpServer::svr = nullptr;
HttpServer.hpp
#pragma once
#include <iostream>
#include <signal.h>
#include <pthread.h>
#include "Log.hpp"
#include "TcpServer.hpp"
#include "Protocol.hpp"
#define PORT 8080
class HttpServer
{
private:
int port;
bool stop;
public:
HttpServer(int _port = PORT) : port(_port), stop(false)
{
}
void InitServer()
{
}
void Loop()
{
TcpServer *tsvr = TcpServer::getinstance(port);
LOG(INFO, "Loop Begin");
while (!stop)
{
sockaddr_in peer;
socklen_t len = sizeof(peer);
int sock = accept(tsvr->Sock(), (sockaddr *)&peer, &len);
if (sock < 0)
continue;
LOG(INFO, "Get a new link");
pthread_t tid;
int *psock = new int(sock);
pthread_create(&tid,nullptr,Entrance::HandlerRequest,psock);
pthread_detach(tid);
}
}
~HttpServer() {}
};
Log.hpp
建议的日志系统
#pragma once
#include <iostream>
#include <string>
#include <ctime>
#define INFO
#define WARNING
#define ERROR
#define FATAL
#define LOG(level, message) Log(#level, message, __FILE__, __LINE__)
void Log(std::string level, std::string message, std::string file_name, int line)
{
std::cout << "[" << level << "] " << "[" << time(nullptr) << "] " << "[" << message << "] " << "[" << file_name << "] " << "[" << line << "] " << std::endl;
}
Protocol.hpp
订制一系列的协议,用于才做http报文。构建响应等;
#pragma once
#include <iostream>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
using std::cout;
using std::endl;
class Entrance
{
public:
static void *HandlerRequest(void *psock)
{
int sock = *(int *)psock;
delete (int *)psock;
char buff[4022];
int s = recv(sock, buff, 4022, 0);
buff[s-1] = '\0';
cout << "===============begin===============" << endl;
cout << buff << endl;
cout << "===============end===============" << endl;
return nullptr;
}
};
运行结果
前三行是打印的日志信息,后面是c端浏览器访问我们server的时候发送的报文,我们将它打印出来了;
解析C端发来的HTTP报文
可见,报文都是一行一行的,我们需要按行读取,先来个按行读取的工具!
MSG_PEEK标志位
recv(sock, &c, 1, MSG_PEEK);
我们一般是设置为0,如果设置MSG_PEEK标志位,则仅仅是把tcp缓冲区中的数据拷贝式的读取到buf中,并没有把已读取的数据从tcp缓冲区中移除,相当于peek窥探一下; 这样我们就可以处理的同时,防止破坏下个报文的报头,造成数据报文不完整了;
Util.hpp
工具类Util
#pragma once
#include <iostream>
#include <string>
#include <sys/types.h>
#include <sys/socket.h>
using std::string;
class Util
{
public:
static int ReadLine(int sock, string &out)
{
char c = 'X';
while (c != '\n')
{
ssize_t s = recv(sock, &c, 1, 0);
if (s > 0)
{
if (c == '\r')
{
recv(sock, &c, 1, MSG_PEEK);
if (c == '\n')
{
recv(sock, &c, 1, 0);
}
else
{
c = '\n';
}
}
out += c;
}
else if (s == 0)
{
return 0;
}
else
{
return -1;
}
}
return out.size();
}
};
用Entrance收到报文测试,然后调用按行读取一次,结果如下(调用一次,读取一行,即便请求行)
构建请求与响应类
Protocol.hpp
class HttpRequest
{
public:
string request_line;
vector<string> request_header;
string blank;
string request_body;
string method;
string uri;
string version;
unordered_map<string, string> header_kv;
int content_length;
string path;
string suffix;
string query_string;
bool cgi;
int size;
public:
HttpRequest() : content_length(0), cgi(false) {}
~HttpRequest() {}
};
class HttpResponse
{
public:
string status_line;
vector<std::string> response_header;
string blank;
string response_body;
int status_code;
int fd;
public:
HttpResponse() : blank(LINE_END), status_code(OK), fd(-1) {}
~HttpResponse() {}
};
上述部分成员后续解析报文详细讲解;
读取,解析请求构建响应
读取请求
读取请求的目的为将整个报文按照一定的格式读入请求类中;
- 请求行放入string request_line
- 请求报头存入vector<string> request_header;
- 空行分隔符放入string blank
- 请求正文(如果有)放入request_body;
class EndPoint
{
private:
int sock;
HttpRequest http_request;
HttpResponse http_response;
bool stop;
public:
EndPoint(int _sock) : sock(_sock), stop(false)
{
}
public:
bool RecvHttpRequestLine()
{
auto &line = http_request.request_line;
if (Util::ReadLine(sock, line) <= 0)
{
stop = true;
}
else
{
line.resize(line.size() - 1);
LOG(INFO, http_request.request_line);
}
return stop;
}
bool RecvHttpRequestHeader()
{
auto &v = http_request.request_header;
while (1)
{
string line;
if (Util::ReadLine(sock, line) <= 0)
{
stop = true;
break;
}
if (line == "\n")
{
http_request.blank = line;
break;
}
line.resize(line.size() - 1);
http_request.request_header.push_back(line);
LOG(INFO, line);
}
return stop;
}
};
bool IsNeedRecvHttpRequestBody()
{
auto& method = http_request.method;
auto& mp = http_request.header_kv;
if(method == "POST"){
if(mp.find("Content-Lenght")!=mp.end()){
http_request.size = atoi(mp["Content-Lenght"].c_str());
return true;
}
return true;
}
}
bool RecvHttpRequestBody()
{
if(IsNeedRecvHttpRequestBody()){
int len = http_request.size;
auto body = http_request.request_body;
for(int i = 0;i<len;i++){
char c;
int s = recv(sock,c,1,0);
if(s>0){
body+=c;
}
else{
stop = true;
break;
}
}
return stop;
}
}
bool IsNeedRecvHttpRequestBody()
{
auto &method = http_request.method;
auto &mp = http_request.header_kv;
if (method == "POST")
{
if (mp.find("Content-Length") != mp.end())
{
http_request.size = atoi(mp["Content-Length"].c_str());
return true;
}
return false;
}
return false;
}
bool RecvHttpRequestBody()
{
if (IsNeedRecvHttpRequestBody())
{
int len = http_request.size;
auto body = http_request.request_body;
for (int i = 0; i < len; i++)
{
char c;
int s = recv(sock, &c, 1, 0);
if (s > 0)
{
body += c;
}
else
{
stop = true;
break;
}
}
cout << endl;
cout << body << endl;
return stop;
}
}
注意正文的读取需要配合后面的parse先解析拿出参数,再判断有没有正文读取;
解析请求
解析请求的过程为将读取的request报文的对应属性和内容存入特定的请求类中;用于后续构建响应直接对照构建;
- 请求行的三个属性提取出来分别放入method,uri,version
- 请求报头数组中的一个个k:v分别提出来进行unordered_map的映射{k,v},方便后续直接查询
Util.hpp添加一个工具函数
static bool CutString(const std::string &target, std::string &sub1_out, std::string &sub2_out, std::string sep)
{
size_t pos = target.find(sep);
if(pos!=string::npos){
sub1_out = target.substr(0,pos);
sub2_out = target.substr(pos+sep.size());
return true;
}
return false;
}
stringstream类用法
void ParseHttpRequestLine()
{
stringstream ss(http_request.request_line);
ss >> http_request.method >> http_request.uri >> http_request.version;
auto &method = http_request.method;
std::transform(method.begin(), method.end(), method.begin(), ::toupper);
}
void ParseHttpRequestHeader()
{
auto &mp = http_request.header_kv;
auto &v = http_request.request_header;
for (auto &e : v)
{
string k, v;
Util::CutString(e, k, v, ":");
mp[k] = v;
}
}
构建响应
响应格式
stat系统函数
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
int stat(const char *path, struct stat *buf);
其中st_mode有:
static string Code2Desc(int code)
{
std::string desc;
switch (code)
{
case 200:
desc = "OK";
break;
case 404:
desc = "Not Found";
break;
default:
break;
}
return desc;
}
static std::string Suffix2Desc(const std::string &suffix)
{
static std::unordered_map<std::string, std::string> suffix2desc = {
{".html", "text/html"},
{".css", "text/css"},
{".js", "application/javascript"},
{".jpg", "application/x-jpg"},
{".xml", "application/xml"},
};
auto iter = suffix2desc.find(suffix);
if (iter != suffix2desc.end())
{
return iter->second;
}
return "text/html";
}
void BuildHttpResponse()
{
struct stat st;
int size;
ssize_t rfound;
string _path;
auto &status_code = http_response.status_code;
auto &method = http_request.method;
if (method != "GET" && method != "POST")
{
status_code = BAD_REQUEST;
LOG(WARNING, "method error!");
goto END;
}
if (method == "GET")
{
if (http_request.uri.find("?") != string::npos)
{
Util::CutString(http_request.uri, http_request.path, http_request.query_string, "?");
http_request.cgi = true;
}
else
{
http_request.path = http_request.uri;
}
}
else if (method == "POST")
{
http_request.path = http_request.uri;
http_request.cgi = true;
}
else
{
}
_path = http_request.path;
http_request.path = WEB_ROOT;
http_request.path += _path;
if (http_request.path.find('/') == http_request.path.size() - 1)
{
http_request.path += HOME_PAGE;
}
if (stat(http_request.path.c_str(), &st) == 0)
{
if (S_ISDIR(st.st_mode))
{
http_request.path += '/';
http_request.path += HOME_PAGE;
stat(http_request.path.c_str(), &st);
}
if ((st.st_mode & S_IXUSR) || (st.st_mode & S_IXGRP) || (st.st_mode & S_IXOTH))
{
http_request.cgi = true;
}
size = st.st_size;
}
else
{
LOG(WARNING, http_request.path + "Not Found!");
status_code = NOT_FOUND;
goto END;
}
rfound = http_request.path.rfind(".");
if (rfound == string::npos)
{
http_request.suffix = ".html";
}
else
{
http_request.suffix = http_request.path.substr(rfound);
}
if (http_request.cgi)
{
}
else
{
status_code = ProcessNonCgi(size);
}
END:
return;
BuildHttpResponseHelper();
}
int ProcessNonCgi(int size)
{
http_response.fd = open(http_request.path.c_str(), O_RDONLY);
if (http_response.fd >= 0)
{
http_response.status_line += HTTP_VERSION;
http_response.status_line += " ";
http_response.status_line += std::to_string(http_response.status_code);
http_response.status_line += " ";
http_response.status_line += Code2Desc(http_response.status_code);
http_response.status_line += LINE_END;
http_response.size = size;
string header_line = "Content-Type: ";
header_line += Suffix2Desc(http_request.suffix);
header_line += LINE_END;
http_response.response_header.push_back(header_line);
header_line = "Content-Length: ";
header_line += std::to_string(size);
header_line += LINE_END;
http_response.response_header.push_back(header_line);
http_response.blank = LINE_END;
return OK;
}
return 404;
}
发送响应
sendfile系统函数
sendfile函数在两个文件描述符之间传递数据(完全在内核中操作),从而避免了内核缓冲区和用户缓冲区之间的数据拷贝,效率很高,被称为零拷贝。函数定义为:
#include<sys/sendfile.h>
ssize_t senfile(int out_fd,int in_fd,off_t* offset,size_t count);
void SendHttpResponset()
{
send(sock, http_response.status_line.c_str(), http_response.status_line.size(), 0);
for (auto iter : http_response.response_header)
{
send(sock, iter.c_str(), iter.size(), 0);
}
send(sock, "\n", 1, 0);
sendfile(sock, http_response.fd, nullptr, http_response.size);
close(http_response.fd);
}
运行效果:
上面是我们调用非Cgi技术返回本地静态网页的过程,这显然是不够的,有时候c端请求会带参数需要我们server端处理,这时候就需要引入Cgi技术了;
Cgi技术
简介CGI(Common Gateway Interface)公共网关接口,是外部扩展应用程序与 Web 服务器交互的一个标准接口。它可以使外部程序处理www上客户端送来的表单数据并对此作出反应,通过某些特定的方式处理数据返回给Web服务器进而返回给c端;
虽然我们是创建新线程执行每个c端请求的,但由于我们http_server的进程只有一个,想要到特定位置执行cgi程序,此处不能直接exec替换掉当前进程,否侧httpserver直接没了;
那么就需要创建子进程进行一系列替换操作了;为了实现数据的交互,我们需要同时引入进程间通信,由于是父子之间,那就匿名管道!(因为管道是单向通信,我们要双向通信,所以搞两个管道)
可我们打开两个管道后,父子进程可以看到没错,当子进程进行exec程序替换(只替换代码和数据)之后,这两个匿名管道是数据没了管道还是存在的,(虽然还是存在着的,但是替换的程序看不到的),因为相当于一个全新的进程开始运行,他的文件描述符数组只有初始的0,1,2号fd;3,4号这两个打开的管道被藏起来了,那怎么处理呢?
采用如下设计(一种约定):
我们采用dup2把0,1号标准fd重定向成当前的两个管道3,4;之后再exec替换,exec替换的程序里里是有0,1标准输入输出的,但是他其实已经被替换成两个管道了,用0,1就可以完成server与cgi.exe的交互了;
cgi程序获取数据
- 当c端GET方法发送数据时,一般比较短,我们直接利用环境变量导入可以让cgi程序拿到;
- 当c端POST方法发送数据时,我们直接通过管道写入cgi;
- 当然至于是GET还是POST方法,我们需要导入一个METHOD方法环境变量,让cgi程序可以识别
int ProcessCgi()
{
auto &bin = http_request.path;
auto &method = http_request.method;
auto &body = http_request.request_body;
auto &querystring = http_request.query_string;
string query_string_env;
string method_env;
int input[2];
int output[2];
if (pipe(input) < 0)
{
LOG(ERROR, "pipe input error!");
return 404;
}
if (pipe(output) < 0)
{
LOG(ERROR, "pipe output error!");
return 404;
}
pid_t pid = fork();
if (pid == 0)
{
close(input[0]);
close(output[1]);
dup2(output[0], 0);
dup2(input[1], 1);
method_env = "METHOD=";
method_env += method;
putenv((char *)method_env.c_str());
if (method == "GET")
{
query_string_env = "QUERY_STRING=";
query_string_env += querystring;
putenv((char *)query_string_env.c_str());
}
execl(bin.c_str(), bin.c_str(), nullptr);
exit(1);
}
else if (pid < 0)
{
return 404;
LOG(ERROR, "fork error!");
}
else
{
close(input[1]);
close(output[0]);
if (method == "POST")
{
const char *start = body.c_str();
int total = 0;
int size = 0;
while ((size = write(output[1], start + total, body.size() - total)) > 0)
{
total += size;
}
}
waitpid(pid, nullptr, 0);
close(input[0]);
close(output[1]);
}
return OK;
}
test_cgi.cc
#include <iostream>
#include <cstdlib>
#include <unistd.h>
using namespace std;
int main()
{
cerr << "========================cgi begin===================" << endl;
string method = getenv("METHOD");
cerr << "METHOD = " << method << endl;
string query_string;
if (method == "GET")
{
query_string = getenv("QUERY_STRING");
cerr << "GET DeBug query_string = " << query_string << endl;
}
else if (method == "POST")
{
cerr << "Content-length = " << getenv("CONTENT_LENGTH") << endl;
int count_length = atoi(getenv("CONTENT_LENGTH"));
while (count_length--)
{
char c;
read(0, &c, 1);
query_string += c;
}
cerr << "POST DeBug query_string = " << query_string << endl;
}
else
{
}
cerr << "========================cgi end===================" << endl;
return 0;
}
Makefile的封装
bin=server
cgi=test_cgi
cc=g++
LD_FLAGS=-std=c++11 -lpthread
curr=$(shell pwd)
src=main.cc
ALL:$(bin) $(cgi)
.PHONY:ALL
$(bin):$(src)
$(cc) -o $@ $^ $(LD_FLAGS)
$(cgi):cgi/test_cgi.cc
$(cc) -o $@ $^
.PHONY:clean
clean:
rm -f $(bin) $(cgi)
rm -rf output
.PHONY:output
output:
mkdir -p output
cp $(bin) output
cp -rf wwwroot output
cp $(cgi) output/wwwroot
运行结果:
GET:
POST:
cgi程序处理并返回数据
cgi程序对读入的数据进行处理;在返回给http_server,进而返回给sock(c端链接)
test_cgi.cc
#include <iostream>
#include <cstdlib>
#include <unistd.h>
using namespace std;
bool GetQueryString(string &query_string)
{
bool result = false;
string method = getenv("METHOD");
cerr << "METHOD = " << method << endl;
if (method == "GET")
{
query_string = getenv("QUERY_STRING");
result = true;
}
else if (method == "POST")
{
cerr << "Content-length = " << getenv("CONTENT_LENGTH") << endl;
int count_length = atoi(getenv("CONTENT_LENGTH"));
while (count_length--)
{
char c;
read(0, &c, 1);
query_string += c;
}
result = true;
}
else
{
result = false;
}
return result;
}
void CutString(string &in, const string &sep, string &out1, string &out2)
{
int index;
if ((index = in.find(sep)) != string::npos)
{
out1 = in.substr(0, index);
out2 = in.substr(index + sep.size());
}
}
int main()
{
cerr << "========================cgi begin===================" << endl;
string query_string;
GetQueryString(query_string);
string str1, str2;
string name1, value1;
string name2, value2;
CutString(query_string, "&", str1, str2);
CutString(str1, "=", name1, value1);
CutString(str2, "=", name2, value2);
cout << name1 << " : " << value1 << endl;
cout << name2 << " : " << value2 << endl;
cerr << name1 << " : " << value1 << endl;
cerr << name2 << " : " << value2 << endl;
cerr << "========================cgi end===================" << endl;
return 0;
}
http_server的父进程添加下列从子进程cgi读取数据的代码
char c;
while (read(input[0], &c, 1) > 0)
{
response_body += c;
}
int status = 0;
pid_t ret = waitpid(pid, &status, 0);
if (ret == pid)
{
if (WIFEXITED(status))
{
if (WEXITSTATUS(status) == 0)
{
code = OK;
}
else
{
code = 404;
}
}
else
{
code = 404;
}
}
数据解析测试:
C端:
S端:
cgi技术总结
下面这张图详细的解释了我们这个http_server所引用的cgi技术
可以看到:
子CGI程序的标准输入是浏览器!
子CGI程序的标准输出也是是浏览器!
HTTP搭建了所有的通信细节
cgi程序可以用任何高级语言编写,以上http_server与cgi技术的设计高度解耦,是众多http_server都会使用的机制,众多与前端交互的高级语言,web开发的高级语言,如php,java,底层都引用了cgi技术;
也就意味着我们永远开发的是cgi程序,中间http_server的固定模式不用管,简化了我们开发只需要关心cgi程序,进行数据处理,不用再关心通信细节了(由HTTP完成);
(什么cookie session都能通过环境变量等传递给cgi… 进一步处理)
错误处理
- 逻辑错误(读取完毕了,需要给对方回应)-分析的时候出错eg请求资源不存在或者管道创建失败
- 读取错误(读取不一定完毕,读取的时候出错->不给对方回应->退出即可)-读取的时候出错eg读的时候浏览器sock断开
- 写入错误(send给c端的过程中,c端断开退出了,继续写就没意义了)
处理逻辑错误
请求出错,我们记录错误码,goto end:执行BuildHttpResponseHelper;
不管是cgi还是非cgi,其中有错误我们也记录错误码,进入BuildHttpResponseHelper;
这样在构建响应的时候,如果状态码不对,也能根据相应的状态码构建对应的返回网页,最后send回浏览器;
#define OK 200
#define NOT_FOUND 404
#define BAD_REQUEST 400
#define SERVER_ERROR 500
void HandlerError(string page)
{
http_request.cgi = false;
http_response.fd = open(page.c_str(), O_RDONLY);
if (http_response.fd > 0)
{
struct stat st;
stat(page.c_str(), &st);
string line = "Cntent-Type: text/html";
line += LINE_END;
http_response.response_header.push_back(line);
line = "Cntent-Length: ";
line += std::to_string(st.st_size);
line += LINE_END;
http_response.response_header.push_back(line);
http_response.size = st.st_size;
}
}
void BuildOkResponse()
{
string line = "Cntent-Type: ";
line += Suffix2Desc(http_request.suffix);
line += LINE_END;
http_response.response_header.push_back(line);
line = "Content-Length: ";
if (http_request.cgi)
{
line += std::to_string(http_response.response_body.size());
}
else
{
line += std::to_string(http_response.size);
}
line += LINE_END;
http_response.response_header.push_back(line);
}
void BuildHttpResponseHelper()
{
auto &status_code = http_response.status_code;
auto &status_line = http_response.status_line;
status_line += HTTP_VERSION;
status_line += " ";
status_line += std::to_string(status_code);
status_line += " ";
status_line += Code2Desc(status_code);
status_line += LINE_END;
string path = WEB_ROOT;
switch (status_code)
{
case OK:
BuildOkResponse();
break;
case NOT_FOUND:
path += '/';
path += PAGE_404;
HandlerError(path);
break;
case BAD_REQUEST:
path += '/';
path += PAGE_404;
HandlerError(path);
break;
case SERVER_ERROR:
path += '/';
path += PAGE_404;
HandlerError(path);
break;
default:
break;
}
}
浏览器请求不存在资源:
HTTP_SERVER返回404:
处理读取错误
添加stop停止标记;
在Recv的过程中如果read等方法出错,stop设置为true,最终stop如果还是false证明recv成功,再执行Build 和 Send;
处理写入错误
写入出现问题,c端关闭,他的管道也就都没了,系统会给server发送sigpipe信号中断挂掉server,这显然是不行的!
我们需要忽略他,简单粗暴的处理,保证server继续运行;
引入线程池
我们都知道原先的方法是,来一个sock扩建一个线程,这显然是不行的,如果海量请求来了,一直扩线程server是顶不住的,而且可可以利用这个特点不断的发送sock链接挂起导致http_server崩溃;\
这就要求软件硬件层面取平衡了,线程池是一个常常用来缓解这种情况的方式;
任务类,线程处理的task,我们将原先的Entrance改为CallBack,并且设置仿函数和回调函数,task类能直接回调执行sock处理!
Task.hpp
#pragma once
#include <iostream>
#include "Protocol.hpp"
class Task
{
private:
int sock;
CallBack handler;
public:
Task() {}
Task(int _sock) : sock(_sock)
{
}
void ProcessOn()
{
handler(sock);
}
~Task() {}
};
ThreadPool.hpp
设计一个简易的:“线程池”
#pragma once
#include "Task.hpp"
#include <iostream>
#include <pthread.h>
#include <queue>
#include "Log.hpp"
using std::queue;
#define NUM 6
class Thread_Pool
{
private:
int num;
queue<Task> task_queue;
bool stop;
pthread_mutex_t lock;
pthread_cond_t cond;
static Thread_Pool *single_instance;
Thread_Pool(int _num = NUM) : num(_num), stop(false)
{
pthread_mutex_init(&lock, nullptr);
pthread_cond_init(&cond, nullptr);
}
Thread_Pool(const Thread_Pool &) {}
public:
static Thread_Pool *getinstance()
{
static pthread_mutex_t _mutex = PTHREAD_MUTEX_INITIALIZER;
if (single_instance == nullptr)
{
pthread_mutex_lock(&_mutex);
if (single_instance == nullptr)
{
single_instance = new Thread_Pool();
single_instance->InitThreadPool();
}
pthread_mutex_unlock(&_mutex);
}
return single_instance;
}
bool TaskQueueIsEmpty()
{
return task_queue.size()==0?true:false;
}
void Lock()
{
pthread_mutex_lock(&lock);
}
void Unlock()
{
pthread_mutex_unlock(&lock);
}
bool IsStop()
{
return stop;
}
void ThreadWait()
{
pthread_cond_wait(&cond, &lock);
}
void ThreadWakeup()
{
pthread_cond_signal(&cond);
}
static void *ThreadTRoutine(void *args)
{
Thread_Pool *tp = (Thread_Pool *)args;
while (true)
{
Task t;
tp->Lock();
while (tp->TaskQueueIsEmpty())
{
tp->ThreadWait();
}
tp->PopTask(t);
tp->Unlock();
t.ProcessOn();
}
}
bool InitThreadPool()
{
for (int i = 0; i < num; i++)
{
pthread_t tid;
if (pthread_create(&tid, nullptr, ThreadTRoutine, this) != 0)
{
LOG(FATAL, "create thread pool error");
}
}
LOG(INFO, "create thread pool success");
return true;
}
void PushTask(const Task &task)
{
Lock();
task_queue.push(task);
Unlock();
ThreadWakeup();
}
void PopTask(Task &task)
{
task = task_queue.front();
task_queue.pop();
}
~Thread_Pool()
{
pthread_mutex_destroy(&lock);
pthread_cond_destroy(&cond);
}
};
Thread_Pool *Thread_Pool::single_instance = nullptr;
提交表单测试
修改后的index.html如下:
<!DOCTYPE html>
<html>
<head>
<meta charset='utf-8'>
<title>TEST SUBMIT</title>
</head>
<body>
<form action = "/test_cgi" method="GET">
x:<input type = "text" name = "data_x"><br>
y:<input type = "text" name = "data_y"><br><br>
<input type = "submit" value = "提交运算">
</form>
</body>
</html>
表单里的action是提交路径,method是提交方法(我们用GET or POST);
测试结果:
提交前:
点击提交后:
可以看到,提交按钮将我们输入的数据x:100,y:200 上传到了路径test_cgi中;
本质上是浏览器又向我们HTTP_SERVER发送了请求报头为 GET /test_cgi?data_x=100&data_y=200 HTTP/1.0 的请求,之后cgi处理完数据将结果返回给浏览器 显示处理结果;
当<from>中的method ="POST"时,提交如下:
由于我们表单采用的是GET方法,所以直接在浏览器的请求uri中就能看到提交的数据;
如果是POST方法,那么就会有更好的私密性,提交的数据会在request.body中传递给HTTP_SERVER;
cgi返回网页
显然我们正常业务逻辑下HTTP_SERVER不可能只返回数据给C端,我们需要进行前端操作将数据处理以后嵌入网页返回给C端;(C++写这玩意有点麻烦,我们可以用javaweb php等写cgi程序,cgi程序支持所有语言的可执行程序,根据需求来)
test_cgi
#include <iostream>
#include <cstdlib>
#include <unistd.h>
using namespace std;
bool GetQueryString(string &query_string)
{
bool result = false;
string method = getenv("METHOD");
cerr << "METHOD = " << method << endl;
if (method == "GET")
{
query_string = getenv("QUERY_STRING");
result = true;
}
else if (method == "POST")
{
cerr << "Content-length = " << getenv("CONTENT_LENGTH") << endl;
int count_length = atoi(getenv("CONTENT_LENGTH"));
while (count_length--)
{
char c;
read(0, &c, 1);
query_string += c;
}
result = true;
}
else
{
result = false;
}
return result;
}
void CutString(string &in, const string &sep, string &out1, string &out2)
{
int index;
if ((index = in.find(sep)) != string::npos)
{
out1 = in.substr(0, index);
out2 = in.substr(index + sep.size());
}
}
int main()
{
cerr << "========================cgi begin===================" << endl;
string query_string;
GetQueryString(query_string);
string str1, str2;
string name1, value1;
string name2, value2;
CutString(query_string, "&", str1, str2);
CutString(str1, "=", name1, value1);
CutString(str2, "=", name2, value2);
int x = atoi(value1.c_str());
int y = atoi(value2.c_str());
cout << "<html>";
cout << "<head><meta charset=\"utf-8\"></head>";
cout << "<body>";
cout << name1 << " : " << value1 << endl;
cout << name2 << " : " << value2 << endl;
cout << "<h3> " << value1 << " + " << value2 << " = " << x + y << "</h3>";
cout << "<h3> " << value1 << " - " << value2 << " = " << x - y << "</h3>";
cout << "<h3> " << value1 << " * " << value2 << " = " << x * y << "</h3>";
cout << "<h3> " << value1 << " / " << value2 << " = " << x / y << "</h3>";
cout << "</body>";
cout << "</html>";
cerr << "========================cgi end===================" << endl;
return 0;
}
运行结果:
提交前:
提交后:(GET方法)
表单总结
通过上述提交表单操作,我们能看出:
- GET通过uri传参,from提交的时候,会将参数自动拼接request的到请求uri中;
- POST通过正文传参,参数再request.body中;
GET因为通过uri传参,我们HTTP_SERVER内部对于get传参的方式优化为环境变量传参;但url长度是有限制的,所以GET方法的参数在某种程度上来说是短的,有限制的;
POST是通过request.body传参,底层通过管道,子进程cgi程序读取参数,所以可以参数很长,基本上不受限制;
补充数据库
数据是网络中的石油,实际业务场景中,需要存储数据日后查询使用的场景也很多,我们在此http_server的基础上引入一个简单地数据库,模拟一下用户注册用户名和密码时,后台连接数据库处理的流程!
需要下载安装好C链接mysql的套件;
创建存账户信息的数据库:
comm.hpp
编写完发现GetQueryString()和CutString()不论是普通cgi还是mysqlcgi都需要用到的处理数据的工具函数,我们把他俩单独封装入comm.hpp头文件中
#pragma once
#include <iostream>
#include <cstdlib>
#include <unistd.h>
using namespace std;
bool GetQueryString(string &query_string)
{
bool result = false;
string method = getenv("METHOD");
cerr << "METHOD = " << method << endl;
if (method == "GET")
{
query_string = getenv("QUERY_STRING");
result = true;
}
else if (method == "POST")
{
cerr << "Content-length = " << getenv("CONTENT_LENGTH") << endl;
int count_length = atoi(getenv("CONTENT_LENGTH"));
while (count_length--)
{
char c;
read(0, &c, 1);
query_string += c;
}
result = true;
}
else
{
result = false;
}
return result;
}
void CutString(string &in, const string &sep, string &out1, string &out2)
{
int index;
if ((index = in.find(sep)) != string::npos)
{
out1 = in.substr(0, index);
out2 = in.substr(index + sep.size());
}
}
mysql_conn.cc
#include "comm.hpp"
#include "mysql.h"
bool InsertSql(string sql)
{
MYSQL *conn = mysql_init(nullptr);
mysql_set_character_set(conn, "utf8");
if (nullptr == mysql_real_connect(conn, "127.0.0.1", "http_test", "12345678", "http_test", 3306, nullptr, 0))
{
cerr << "connect mysql error!" << endl;
return 1;
}
cerr << "connect mysql success!" << endl;
cerr << "query : " << sql << endl;
int ret = mysql_query(conn, sql.c_str());
cerr << "ret : " << ret << endl;
mysql_close(conn);
return true;
}
int main()
{
string query_string;
if (GetQueryString(query_string))
{
cerr << "query_string : " << query_string.c_str() << endl;
string name;
string passwd;
CutString(query_string, "&", name, passwd);
string _name;
string sql_name;
CutString(name, "=", _name, sql_name);
string _passwd;
string sql_passwd;
CutString(passwd, "=", _passwd, sql_passwd);
string sql = "insert into user(name,passwd) values(\'";
sql += (sql_name + "\',");
sql += (sql_passwd + ")");
if (InsertSql(sql))
{
cout << "<html>";
cout << "<head><meta charset=\"utf-8\"></head>";
cout << "<body><h1>注册成功!信息已经插入后台数据库!</h1></body>";
}
}
return 0;
}
模拟注册运行展示
浏览器请求http_server,并填写账户信息准备提交注册:
http_server中的sql_conn程序执行结果:
http_server返回的网页给浏览器:
查看mysql中刚注册的账户信息:
项目源代码链接
Gitee仓库
项目总结
聚焦于处理HTTP的请求和构建对应响应; 我们主要研究基于 HTTP/1.0 短连接 的GET和POST方法;
获得请求,分析请求,错误处理等; 制定特定的网页src用于返回; 引入简单的日志系统
搭建CGI机制;
父子管道,设计dup2重定向,环境变量传参等
引入线程池;
采用多线程技术,缓解内存开销;
引入数据库;
链接mysql数据库,可以设计更多样的具体应用;
项目扩展方向
技术层面扩展
- 使用epoll机制(我们用的多线程只是用中小型业务)
- redis;
- 请求转发服务器(代理功能,梯子)
应用层面扩展
- 在线博客(制定对应的格式text和前端功能,建立对应数据库,实现博客的上传查询与修改)
- 在线画图板(返回一个在线画图板网页,用户画完,存入指定路径path,path插入对应数据库用于下次查看)
- 在线音视频播放(已经支持了)
- 在线网络计算器(我们已经实现了建议的±*/)
|