文件I/O

直接使用系统调用的缺点:

影响系统性能

系统调用比普通函数调用开销大，因为系统调用要进行用户空间和内核空间的切换。

系统调用一次所能读写的数据量大小，受硬件的限制。

解决方案:使用带缓冲功能的标准I/O库，以减少系统调用的次数。

例如: fwrite、fread、fopen、fclose、fseek、fflush

文件系统接口

文件系统——一种把数据组织成文件和目录的存储方式，提供了基于文件的存取接口，并通过文件权限控制访问。

(一系列的接口。)

文件系统缓存

主存(通常是DRAM)的一块区域用来缓存文件系统的内容，包含各种数据和元数据。

标准文件访问方式

直接IO方式

OPEN +O_DIRECT = 绕过内核缓冲区的直接访问，有效避免了CPU和内存的多余时间开销。要求内存边界对齐。

直接IO，绕过缓存，不会出现write成功数据丢失情况。

注意：直接IO的缺点就是如果访问的数据不在应用程序缓存中，那么每次数据都会直接从磁盘进行加载，这种直接加载会非常慢，通常直接IO跟异步IO结合使用会得到较好的性能。

示例:

#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

#define TOTAL 10

//直接IO要考虑到硬件特性
//磁盘最基本的单位是扇区，一个扇区512字节
#define BUF_LEN 512


int writeToFile(int fd,const char* buf,int len) {
	int wlen = 0;
	if ((wlen = write(fd, buf, len)) < 0) {
		fprintf(stderr,"write to %d failed,reason:%s.\n",fd,strerror(errno));
		return -3;
	}

	return wlen;
}

int main(int argc, char** argv) {

	//const char* TEXT = "This is a test.\n";

	char* buf = NULL;

	//buf = (char*)malloc(BUF_LEN); 
	//地址要是512的倍数——内存边界对齐

	posix_memalign((void**)&buf,512,BUF_LEN);
	strcpy(buf,"This is test.\n");

	const char* filename = "./io_test.txt";
	int fd = 0;
	int i = 0;

	fd = open(filename,O_RDWR | O_TRUNC | O_CREAT | O_DIRECT);
	if (fd < 0) {
		fprintf(stderr, "fopen %s failed,reason:%s.\n exit\n",filename,strerror(errno));
		return -1;
	}

	for (i = 0; i < TOTAL; i++) {
		if (writeToFile(fd, buf, BUF_LEN) < 0) {
			fprintf(stderr,"write to %s failed,reason: %s.\n exit\n",filename,strerror(errno));
			//return -2;
		}
		printf("%d\n",i+1);
	}

	printf("finished.\n");

	//printf("Start to sleep 30 second....\n");

	if (buf)free(buf);
	close(fd);
	return 0;
	
}

直接IO和标准方式进行对比

**示例:**测试20s内对同一文件的读取次数0

#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>


#define BUF_SIZE 512


int main(int argc, char** argv) {

	char* buf = NULL;
	const char* filename = "./open_compare.txt";
	int fd = -1;
	time_t start;
	time_t cur;
	int rlen = 0;
	int ret = 0;
	static int read_total = 0;


	ret = posix_memalign((void**)&buf,512,BUF_SIZE);
	if (ret)fprintf(stderr,"posix_memalign failed.reason:%s\n",strerror(errno));
	
	start = time(NULL);
	
	do 
	{
		read_total++;
		//fd = open(filename, O_RDWR | O_DIRECT);
		fd = open(filename,O_RDWR);
	
		if (fd < 0) { 
			fprintf(stderr, "fopen %s failed,reason:%s.\n exit\n", filename, strerror(errno));
			return -1;
		}


		do 
		{
			if ((rlen = read(fd, buf, BUF_SIZE)) < 0) {
				fprintf(stderr, "read to %s failed,reason: %s.\n exit\n", filename, strerror(errno));
			}
	
		} while (rlen>0);
		close(fd);
	
		cur = time(NULL);
	} while ((cur-start) < 20);
	
	printf("total time:%d\n",read_total);
	
	return 0;

}

直接IO

标准方式

(高速页缓存，多次读取速度快。)

O_SYNC

缓存同步

为了保证磁盘系统与缓冲区内容一致，Linux系统提供了sync,fsync,fdatasync三个函数。

函数描述:向打开的文件写数据，成功返回写入的字节数，出错则返回-1。
#include<unistd.h>

int fsync(int fd);
int fdatasync(int fd);
void sync(void);
说明:

sync——将所有修改过的块缓冲区排入写队列，然后就返回，它并不等待实际写磁盘操作结束。
fsync——将fd对应文件的块缓冲区立即写入磁盘，并等待实际写磁盘操作结束返回。
fdatasync——类似fsync,但只影响文件的数据部分。而除数据外，fsync还会同步更新文件属性。