glibc readv和writev函数改进

最近在改进公司内部网络发送库,发现了linux下高级io操作函数readv和writev,在glibc里面实现的。

使用这两函数需要include<sys/uio.h>

ssize_t readv(int fd,const struct iovec *iov, int count); 

从文件描述符fd所对应的的文件中读取count字节大小数据到多个指定顺序buffers中,该buffer用iovec描述

ssize_t writev(int fd,const struct iovec *iov, int count);

把count个指定顺序的数据buffer(使用iovec描述)写入到文件描述符fd所对应的的文件中

struct iovec结构在bits/uio.h中定义的,是一种向量形式的结构体。

/* Structure for scatter/gather I/O.  */
struct iovec
  {
    void *iov_base; /* Pointer to data.  */
    size_t iov_len; /* Length of data.  */
  };

能将本来需要多次发送的数据,聚合在一起,一次发送,提高IO效率。

但使用时发现了一些问题,readv一次不能完全接收到期望长度数据。查看glibc源码,发现readv、writev底层分别是基于read、write实现的,而read一次本来就可能获得不了期望长度数据。

It is not an error if this number is smaller than the number of bytes requested; this may happen for example because fewer bytes are actually available right now (maybe because we were close to end-of- file, or because we are reading from a pipe, or from a terminal), or because read() was interrupted by a signal.

查看glic源码实现,发现里面并没有处理这个问题,所以才数显与期望不一致问题。

以下是glibc readv实现,glibc/sysdeps/posix/writev.c,read只调用了一次。

#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <limits.h>
#include <stdbool.h>
#include <sys/param.h>
#include <sys/uio.h>
#include <errno.h>
static void
ifree (char **ptrp)
{
  free (*ptrp);
}
/* Read data from file descriptor FD, and put the result in the
   buffers described by VECTOR, which is a vector of COUNT 'struct iovec's.
   The buffers are filled in the order specified.
   Operates just like 'read' (see <unistd.h>) except that data are
   put in VECTOR instead of a contiguous buffer.  */
ssize_t
__readv (int fd, const struct iovec *vector, int count)
{
  /* Find the total number of bytes to be read.  */
  size_t bytes = 0;
  for (int i = 0; i < count; ++i)
    {
      /* Check for ssize_t overflow.  */
      if (SSIZE_MAX - bytes < vector[i].iov_len)
        {
          __set_errno (EINVAL);
          return -1;
        }
      bytes += vector[i].iov_len;
    }
  /* Allocate a temporary buffer to hold the data.  We should normally
     use alloca since it's faster and does not require synchronization
     with other threads.  But we cannot if the amount of memory
     required is too large.  */
  char *buffer;
  char *malloced_buffer __attribute__ ((__cleanup__ (ifree))) = NULL;
  if (__libc_use_alloca (bytes))
    buffer = (char *) __alloca (bytes);
  else
    {
      malloced_buffer = buffer = (char *) malloc (bytes);
      if (buffer == NULL)
        return -1;
    }
  /* Read the data.  */
  ssize_t bytes_read = __read (fd, buffer, bytes);
  if (bytes_read < 0)
    return -1;
  /* Copy the data from BUFFER into the memory specified by VECTOR.  */
  bytes = bytes_read;
  for (int i = 0; i < count; ++i)
    {
      size_t copy = MIN (vector[i].iov_len, bytes);
      (void) memcpy ((void *) vector[i].iov_base, (void *) buffer, copy);
      buffer += copy;
      bytes -= copy;
      if (bytes == 0)
        break;
    }
  return bytes_read;
}

所以需要把read调用改进一下,保证数据能读取完整。以下是改进

#define __set_errno(val) (errno = (val))

static void
ifree (char **ptrp)
{
  free (*ptrp);
}

/* Read data from file descriptor FD, and put the result in the
   buffers described by VECTOR, which is a vector of COUNT 'struct iovec's.
   The buffers are filled in the order specified.
   Operates just like 'read' (see <unistd.h>) except that data are
   put in VECTOR instead of a contiguous buffer.  */
ssize_t
my_readv (int fd, const struct iovec *vector, int count)
{
  /* Find the total number of bytes to be read.  */
  size_t bytes = 0;
  for (int i = 0; i < count; ++i)
    {
      /* Check for ssize_t overflow.  */
      if (SSIZE_MAX - bytes < vector[i].iov_len)
	{
	  __set_errno (EINVAL);
	  return -1;
	}
      bytes += vector[i].iov_len;
    }

  /* Allocate a temporary buffer to hold the data.  We should normally
     use alloca since it's faster and does not require synchronization
     with other threads.  But we cannot if the amount of memory
     required is too large.  */
  char *buffer;
  char *malloced_buffer __attribute__ ((__cleanup__ (ifree))) = NULL;
  if (bytes < 128)
    buffer = (char *) alloca (bytes);
  else
    {
      malloced_buffer = buffer = (char *) malloc (bytes);
      if (buffer == NULL)
	return -1;
    }

  /* Read the data.  */
  //ssize_t bytes_read = read (fd, buffer, bytes);

  // recv loop
  int bytes_read = 0, ret = 0;
  while(bytes_read < bytes) {
  	  ret = read (fd, buffer + bytes_read, bytes - bytes_read);
      if(ret > 0) {
          bytes_read += ret;
          continue;
      }
  
      if(ret == 0){
          break;
      } else {
          if(errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) {
              continue;
          }
          break;
      }
  }

  if (bytes_read < 0)
    return -1;

  /* Copy the data from BUFFER into the memory specified by VECTOR.  */
  bytes = bytes_read;
  for (int i = 0; i < count; ++i)
    {
      size_t copy = MIN (vector[i].iov_len, bytes);

      (void) memcpy ((void *) vector[i].iov_base, (void *) buffer, copy);

      buffer += copy;
      bytes -= copy;
      if (bytes == 0)
	break;
    }
  return bytes_read;
}

完整code见https://github.com/zhangjun/my_notes/blob/master/linux/io

另外facebook  folly也有实现,见 https://github.com/facebook/folly/blob/master/folly/portability/SysUio.cpp