glibc readv和writev函数改进

最近在改进公司内部网络发送库,发现了linux下高级io操作函数readv和writev,在glibc里面实现的。

使用这两函数需要include<sys/uio.h>

ssize_t readv(int fd,const struct iovec *iov, int count); 

从文件描述符fd所对应的的文件中读取count字节大小数据到多个指定顺序buffers中,该buffer用iovec描述

ssize_t writev(int fd,const struct iovec *iov, int count);

把count个指定顺序的数据buffer(使用iovec描述)写入到文件描述符fd所对应的的文件中

struct iovec结构在bits/uio.h中定义的,是一种向量形式的结构体。

/* Structure for scatter/gather I/O.  */
struct iovec
  {
    void *iov_base; /* Pointer to data.  */
    size_t iov_len; /* Length of data.  */
  };

能将本来需要多次发送的数据,聚合在一起,一次发送,提高IO效率。

但使用时发现了一些问题,readv一次不能完全接收到期望长度数据。查看glibc源码,发现readv、writev底层分别是基于read、write实现的,而read一次本来就可能获得不了期望长度数据。

It is not an error if this number is smaller than the number of bytes requested; this may happen for example because fewer bytes are actually available right now (maybe because we were close to end-of- file, or because we are reading from a pipe, or from a terminal), or because read() was interrupted by a signal.

查看glic源码实现,发现里面并没有处理这个问题,所以才数显与期望不一致问题。

以下是glibc readv实现,glibc/sysdeps/posix/writev.c,read只调用了一次。

#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <limits.h>
#include <stdbool.h>
#include <sys/param.h>
#include <sys/uio.h>
#include <errno.h>
static void
ifree (char **ptrp)
{
  free (*ptrp);
}
/* Read data from file descriptor FD, and put the result in the
   buffers described by VECTOR, which is a vector of COUNT 'struct iovec's.
   The buffers are filled in the order specified.
   Operates just like 'read' (see <unistd.h>) except that data are
   put in VECTOR instead of a contiguous buffer.  */
ssize_t
__readv (int fd, const struct iovec *vector, int count)
{
  /* Find the total number of bytes to be read.  */
  size_t bytes = 0;
  for (int i = 0; i < count; ++i)
    {
      /* Check for ssize_t overflow.  */
      if (SSIZE_MAX - bytes < vector[i].iov_len)
        {
          __set_errno (EINVAL);
          return -1;
        }
      bytes += vector[i].iov_len;
    }
  /* Allocate a temporary buffer to hold the data.  We should normally
     use alloca since it's faster and does not require synchronization
     with other threads.  But we cannot if the amount of memory
     required is too large.  */
  char *buffer;
  char *malloced_buffer __attribute__ ((__cleanup__ (ifree))) = NULL;
  if (__libc_use_alloca (bytes))
    buffer = (char *) __alloca (bytes);
  else
    {
      malloced_buffer = buffer = (char *) malloc (bytes);
      if (buffer == NULL)
        return -1;
    }
  /* Read the data.  */
  ssize_t bytes_read = __read (fd, buffer, bytes);
  if (bytes_read < 0)
    return -1;
  /* Copy the data from BUFFER into the memory specified by VECTOR.  */
  bytes = bytes_read;
  for (int i = 0; i < count; ++i)
    {
      size_t copy = MIN (vector[i].iov_len, bytes);
      (void) memcpy ((void *) vector[i].iov_base, (void *) buffer, copy);
      buffer += copy;
      bytes -= copy;
      if (bytes == 0)
        break;
    }
  return bytes_read;
}

所以需要把read调用改进一下,保证数据能读取完整。以下是改进

#define __set_errno(val) (errno = (val))

static void
ifree (char **ptrp)
{
  free (*ptrp);
}

/* Read data from file descriptor FD, and put the result in the
   buffers described by VECTOR, which is a vector of COUNT 'struct iovec's.
   The buffers are filled in the order specified.
   Operates just like 'read' (see <unistd.h>) except that data are
   put in VECTOR instead of a contiguous buffer.  */
ssize_t
my_readv (int fd, const struct iovec *vector, int count)
{
  /* Find the total number of bytes to be read.  */
  size_t bytes = 0;
  for (int i = 0; i < count; ++i)
    {
      /* Check for ssize_t overflow.  */
      if (SSIZE_MAX - bytes < vector[i].iov_len)
	{
	  __set_errno (EINVAL);
	  return -1;
	}
      bytes += vector[i].iov_len;
    }

  /* Allocate a temporary buffer to hold the data.  We should normally
     use alloca since it's faster and does not require synchronization
     with other threads.  But we cannot if the amount of memory
     required is too large.  */
  char *buffer;
  char *malloced_buffer __attribute__ ((__cleanup__ (ifree))) = NULL;
  if (bytes < 128)
    buffer = (char *) alloca (bytes);
  else
    {
      malloced_buffer = buffer = (char *) malloc (bytes);
      if (buffer == NULL)
	return -1;
    }

  /* Read the data.  */
  //ssize_t bytes_read = read (fd, buffer, bytes);

  // recv loop
  int bytes_read = 0, ret = 0;
  while(bytes_read < bytes) {
  	  ret = read (fd, buffer + bytes_read, bytes - bytes_read);
      if(ret > 0) {
          bytes_read += ret;
          continue;
      }
  
      if(ret == 0){
          break;
      } else {
          if(errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) {
              continue;
          }
          break;
      }
  }

  if (bytes_read < 0)
    return -1;

  /* Copy the data from BUFFER into the memory specified by VECTOR.  */
  bytes = bytes_read;
  for (int i = 0; i < count; ++i)
    {
      size_t copy = MIN (vector[i].iov_len, bytes);

      (void) memcpy ((void *) vector[i].iov_base, (void *) buffer, copy);

      buffer += copy;
      bytes -= copy;
      if (bytes == 0)
	break;
    }
  return bytes_read;
}

完整code见https://github.com/zhangjun/my_notes/blob/master/linux/io

另外facebook  folly也有实现,见 https://github.com/facebook/folly/blob/master/folly/portability/SysUio.cpp

程序在内存中的分布

在现代的操作系统中,当我们说到内存,往往需要分两部分来讲:物理内存和虚拟内存。从硬件上讲,虚拟空间是CPU内部的寻址空间,位于MMU之前,物理空间是总线上的寻址空间,是经过MMU转换之后的空间。
一般我们所说的程序在内存中的分布指的就是程序在虚拟内存中的存储方式。
从低地址到高地址,可分为下面几段:
预留内存地址
(操作系统维护的内存地址,不可访问)
程序代码区(只读,存代码和一些其他的东西);
data段(存初始化的全局变量和static变量,另外还有文字常量区,常量字符串就是放在这里,程序结束后有系统释放);
bss段(存未初始化的全局变量和static变量);
(由低地址向高地址增长,一般new和malloc分配,由程序员分配释放);
共享库文件(调用的库文件,位于堆和栈之间);
(由高地址向低地址增长,和堆的增长方式相对,对不同的OS来说,栈的初始大小有规定,可以修改,目前默认一般为2M,由编译器自动分配释放);
再上面存的都是操作系统和内核调用的一些内存地址
如图所示:

C/C++ 宏

宏的一些用法:
1、#和##
#符号把一个符号直接转换为字符串,例如:
#define STRING(x) #x
const char *str = STRING( test_string ); str的内容就是”test_string”,也就是说#会把其后的符号直接加上双引号。
符号会连接两个符号,从而产生新的符号(词法层次),例如:
#define SIGN( x ) INT_##x
int SIGN( 1 ); 宏被展开后将成为:int INT_1;
2、变参宏 可以使你自定义类似的宏
#define LOG( format, … ) printf( format, __VA_ARGS__ )
LOG( “%s %d”, str, count );     __VA_ARGS__是系统预定义宏,被自动替换为参数列表。
3、宏调用自己
为了防止宏无限递归展开,当宏展开遇到自己时,就停止展开。
#define TEST(X) (X+TEST(X))
则TEST(1) 最终会展开成1+TEST(1)