linux标准输入输出

Posted 2020-10-19 zzfx

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了linux标准输入输出相关的知识，希望对你有一定的参考价值。

一简介

sdtin, stdout, stderr分别称为标准输入，标准输出，标准错误输出, 它们的声明如下：

/* Standard streams. */
extern FILE *stdin; /* Standard input stream. */
extern FILE *stdout; /* Standard output stream. */
extern FILE *stderr; /* Standard error output stream. */

可以看出是libc定义的指针变量，但是C89/C99规定，它应该是一个宏，于是就有了下面这段：

/* C89/C99 say they‘re macros. Make them happy. */
#define stdin stdin
#define stdout stdout
#define stderr stderr

很多时候应用程序io操作并没有指定操作的文件句柄，比如printf，puts，getchar(), scanf()等，这时就采用标准输入输出，看看printf()函数的实现：

int printf(const char * __restrict format, ...)
{
va_list arg;
int rv;

va_start(arg, format);
rv = vfprintf(stdout, format, arg);
va_end(arg);

return rv;
}

printf输出到stdout上，这是很好理解的。

二原理

初始化过程

sdtin, stdout, stderr是在哪里初始化的呢，不难找到如下代码：

FILE *stdin = _stdio_streams;
FILE *stdout = _stdio_streams + 1;
FILE *stderr = _stdio_streams + 2;

也就是说它们的值在编译期就指定了，不需要运行时去设置，继续查看_stdio_streams的定义：

static FILE _stdio_streams[] = {
__STDIO_INIT_FILE_STRUCT(_stdio_streams[0], \
__FLAG_LBF|__FLAG_READONLY, \
0, \
_stdio_streams + 1, \
_fixed_buffers, \
BUFSIZ ),
__STDIO_INIT_FILE_STRUCT(_stdio_streams[1], \
__FLAG_LBF|__FLAG_WRITEONLY, \
1, \
_stdio_streams + 2, \
_fixed_buffers + BUFSIZ, \
BUFSIZ ),
__STDIO_INIT_FILE_STRUCT(_stdio_streams[2], \
__FLAG_NBF|__FLAG_WRITEONLY, \
2, \
NULL, \
NULL, \
0 )
};

特别要注意的是其中的0,1,2文件描述符，FILE是一个结构体类型，定义如下：

struct __STDIO_FILE_STRUCT {
unsigned short __modeflags;
/* There could be a hole here, but modeflags is used most.*/
unsigned char __ungot[2];
int __filedes;
#ifdef __STDIO_BUFFERS
unsigned char *__bufstart;/* pointer to buffer */
unsigned char *__bufend;/* pointer to 1 past end of buffer */
unsigned char *__bufpos;
unsigned char *__bufread; /* pointer to 1 past last buffered read char */

#ifdef __STDIO_GETC_MACRO
unsigned char *__bufgetc_u;/* 1 past last readable by getc_unlocked */
#endif /* __STDIO_GETC_MACRO */
#ifdef __STDIO_PUTC_MACRO
unsigned char *__bufputc_u;/* 1 past last writeable by putc_unlocked */
#endif /* __STDIO_PUTC_MACRO */

#endif /* __STDIO_BUFFERS */

......................
#if __STDIO_BUILTIN_BUF_SIZE > 0
unsigned char __builtinbuf[__STDIO_BUILTIN_BUF_SIZE];
#endif /* __STDIO_BUILTIN_BUF_SIZE > 0 */
};

可以看出_stdio_streams的buffer是固定的：

#ifdef __STDIO_BUFFERS
static unsigned char _fixed_buffers[2 * BUFSIZ];
#endif

BUFSIZ默认大小为4096，但是对于后来fopen打开的文件，缓冲区都是malloc分配的。

0,1,2文件描述是在哪里打开的？一般来说是继承父进程的，这样可以方便的实现重定向和管道操作，父进程先保存0,1,2文件描述符，然后dup 0,1,2，启动子进程，然后父进程还原保存的0,1,2文件描述符，当然libc启动时也对0，1,2文件描述符进行了检查：

__check_one_fd (STDIN_FILENO, O_RDONLY | O_NOFOLLOW);
__check_one_fd (STDOUT_FILENO, O_RDWR | O_NOFOLLOW);
__check_one_fd (STDERR_FILENO, O_RDWR | O_NOFOLLOW);

其中__check_one_fd ()定义为：

static void __check_one_fd(int fd, int mode)
{
/* Check if the specified fd is already open */
if (fcntl(fd, F_GETFD) == -1)
{
/* The descriptor is probably not open, so try to use /dev/null */
int nullfd = open(_PATH_DEVNULL, mode);
/* /dev/null is major=1 minor=3. Make absolutely certain
* that is in fact the device that we have opened and not
* some other wierd file... [removed in uclibc] */
if (nullfd!=fd)
{
abort();
}
}
}

当发现0，1,2没有打开时，打开/dev/null作为0,1,2

另外libc会调用_stdio_init()对_stdio_streams进行运行时初始化，因为其中有些参数无法编译器指定，比如缓冲类型：

void attribute_hidden _stdio_init(void)
{
#ifdef __STDIO_BUFFERS
int old_errno = errno;
/* stdin and stdout uses line buffering when connected to a tty. */
if (!isatty(0))
_stdio_streams[0].__modeflags ^= __FLAG_LBF;
if (!isatty(1))
_stdio_streams[1].__modeflags ^= __FLAG_LBF;
__set_errno(old_errno);
#endif
#ifndef __UCLIBC__
/* _stdio_term is done automatically when exiting if stdio is used.
* See misc/internals/__uClibc_main.c and and stdlib/atexit.c. */
atexit(_stdio_term);
#endif
}

判断是否是tty来确定缓冲的类型，isatty判断的依据是：ioctl (fd, TCGETS, &k_termios)，因为每个tty都对应一个termios，用于line disc配置。

缓冲的类型

#define __FLAG_FBF 0x0000U /* must be 0 */
#define __FLAG_LBF 0x0100U
#define __FLAG_NBF 0x0200U /* (__FLAG_LBF << 1) */

分别表示全缓冲(Full Buffer)，行缓冲(Line Buffer)和无缓冲(No Buffer), 全缓冲的意思是：只有当缓冲区满或没有足够的空间时，才进行真正的读写操作，常见的普通常规文件。行缓冲：读写以一行为基本单位，常见的tty设备。无缓冲：不进行缓冲，直接进行读写，常见的stderr, 需要错误立即输出可见。

和open()的区别

只是在open()系统调用的基础上进行了封装，中间加入了缓冲管理，最终还是通过系统调用实现真正的读写操作。这样的好处是：大部分用户的读写操作是直接操作缓冲的，因为系统调用执行较慢，尽可能减少系统调用的频率，可以大大提高程序执行的效率。

缓冲的管理

读写都是以单个字符为单位的，下面分别分析一下读写过程缓冲区的管理。
读取操作：

if (__STDIO_STREAM_BUFFER_RAVAIL(stream)) {/* Have buffered? */
return __STDIO_STREAM_BUFFER_GET(stream);
}

如果buffer中read available,则直接读取buffer中的字符返回，否则表明buffer中可读数据为空：

if (__STDIO_STREAM_BUFFER_SIZE(stream)) { /* Do we have a buffer? */
__STDIO_STREAM_DISABLE_GETC(stream);
if(__STDIO_FILL_READ_BUFFER(stream)) {/* Refill succeeded? */
__STDIO_STREAM_ENABLE_GETC(stream);/* FBF or LBF */
return __STDIO_STREAM_BUFFER_GET(stream);
}
} else {
unsigned char uc;
if (__stdio_READ(stream, &uc, 1)) {
return uc;
}
}

调用__STDIO_FILL_READ_BUFFER() 填充buffer

#define __STDIO_FILL_READ_BUFFER(S) __stdio_rfill((S))

size_t __stdio_rfill(register FILE *__restrict stream)

{

.......

rv = __stdio_READ(stream, stream->__bufstart,
stream->__bufend - stream->__bufstart);
stream->__bufpos = stream->__bufstart;
stream->__bufread = stream->__bufstart + rv;

}

对于全缓冲，尽可能填满整个buffer，对于行缓冲，则读取一行数据，至于tty怎么读取一行数据，这里不展开。在用户不断的读取数据后stream->__bufpos不断往后移动，当等于stream->__bufread时表明缓冲区读空了，然后再调用这个函数进行填充。

写入操作：

if (__STDIO_STREAM_BUFFER_SIZE(stream)) { /* Do we have a buffer? */
/* The buffer is full and/or the stream is line buffered. */
if (!__STDIO_STREAM_BUFFER_WAVAIL(stream) /* Buffer full? */
&& __STDIO_COMMIT_WRITE_BUFFER(stream) /* Commit failed! */
) {
goto BAD;
}

__STDIO_STREAM_BUFFER_ADD(stream, ((unsigned char) c));

if (__STDIO_STREAM_IS_LBF(stream)) {
if ((((unsigned char) c) == ‘\n‘)
&& __STDIO_COMMIT_WRITE_BUFFER(stream)) {
/* Commit failed! */
__STDIO_STREAM_BUFFER_UNADD(stream); /* Undo the write! */
goto BAD;
}

在写入单个字符之前判断buffer是否还是足够的空间写，如果没有则提交write系统调用，清空buffer。有足够的空间后，写入buffer，最后判断是否是行缓冲，且有行结束标志，如果是则提交write系统调用，对于全缓冲不用管，尽量推迟写入操作，到下次没有足够空间写时才提交write系统调用。__STDIO_COMMIT_WRITE_BUFFER的过程如下：

if ((bufsize = __STDIO_STREAM_BUFFER_WUSED(stream)) != 0) {
stream->__bufpos = stream->__bufstart;
__stdio_WRITE(stream, stream->__bufstart, bufsize);
}

stream->__bufpos是当前写buffer的位置，提交后等于stream->__bufstart，表明清空buffer。

什么是ungot?

void scanf_buffer(void)

{

int a , b;

while( scanf("%d%d",&a,&b) != EOF )

printf("%d%d\n",a,b);

}

这是一种非常常见的用法，正常情况下没什么问题，但是如果用户误输入，比如输入CSDN 666666\n 会出现什么情况呢，好奇的可以运行试验下，结果是会死循环，为何会死循环呢，这就跟scanf()的实现有关了，scanf从缓冲区取出一个字符，%d表明需要的是数字才对，结果一看不对，又把取出的字符塞回去了，scanf函数错误返回，结果缓冲区中的内容仍然为CSDN 666666\n，所以下一次进来由于缓冲区有数据，不等用户输入就直接错误返回了，因此出现了死循环。

这就是ungot机制，当scanf()取出字符发现不对时，将字符退回缓冲区。另外user也可以调用ungetc()函数push back单个字符到缓冲区。

下面是libc中的一段注释，从中可以看出一二：
/***********************************************************************/
/* Having ungotten characters implies the stream is reading.
* The scheme used here treats the least significant 2 bits of
* the stream‘s modeflags member as follows:
* 0 0 Not currently reading.
* 0 1 Reading, but no ungetc() or scanf() push back chars.
* 1 0 Reading with one ungetc() char (ungot[1] is 1)
* or one scanf() pushed back char (ungot[1] is 0).
* 1 1 Reading with both an ungetc() char and a scanf()
* pushed back char. Note that this must be the result
* of a scanf() push back (in ungot[0]) _followed_ by
* an ungetc() call (in ungot[1]).
*
* Notes:
* scanf() can NOT use ungetc() to push back characters.
* (See section 7.19.6.2 of the C9X rationale -- WG14/N897.)
*/

if (__STDIO_STREAM_CAN_USE_BUFFER_GET(stream)
&& (c != EOF)
&& (stream->__bufpos > stream->__bufstart)
&& (stream->__bufpos[-1] == ((unsigned char)c))
) {
--stream->__bufpos;
__STDIO_STREAM_CLEAR_EOF(stream); /* Must clear end-of-file flag. */
}

else if (c != EOF) {
__STDIO_STREAM_DISABLE_GETC(stream);

/* Flag this as a user ungot, as scanf does the necessary fixup. */
stream->__ungot[1] = 1;
stream->__ungot[(++stream->__modeflags) & 1] = c;

__STDIO_STREAM_CLEAR_EOF(stream); /* Must clear end-of-file flag. */
}

如果push back的字符是刚刚读取的，则直接stream->__bufpos减一即可，对于大量使用getc()/ungetc()，可以明显的提高运行效率，但是如果push back的不是最后从缓冲区读取的，而是用户调用ungetc() push back一个其他字符，则走下面的流程，__STDIO_STREAM_DISABLE_GETC(stream)设置下次getc()首先从ungot slot中去读取，ungot slot就是指这里的stream->__ungot[2]，那么可以连续push back多少个字符呢，理论上只有一个，因为scanf只需要一个，但是根据这里的实现代码来看，可以有很多个：

stream->__modeflags 表示的含义：

高位 <---------------------------------------------------- 32bit ----------------------------------------------3--------2---------1---------0>低位

Error EOF ungot reading

0 0 1：表示reading，没有ungot

push back一个字符后，变为：

0 1 0：

stream->__ungot[1] = 1表示stream->__ungot[0]存放的是ungetc() push back的字符

stream->__ungot[1] = 0 表示 stream->__ungot[0]存放的是scanf() push back的字符

接着继续push back一个字符后，变为：

0 1 1：

stream->__ungot[0]存放的是scanf() push back的字符

stream->__ungot[1]存放的是ungetc() push back的字符

可以看出，连续push back两个字符是没什么问题的，但是如果接着push back一个字符会发生什么呢？值变成如下：

1 0 0：

stream->__ungot[0]存放的是ungetc() push back的字符，会覆盖前面push back的字符，并且__FLAG_UNGOT标志被清掉，这时去调用getc()是读取不到push back的字符的，getc()函数的部分代码如下：

if (stream->__modeflags & __FLAG_UNGOT) { /* Use ungots first. */
unsigned char uc = stream->__ungot[(stream->__modeflags--) & 1];
stream->__ungot[1] = 0;
__STDIO_STREAM_VALIDATE(stream);
return uc;
}

__STDIO_STREAM_CLEAR_EOF(stream)最后调用清除掉EOF标志，所以如果连续push back多次字符，并不会导致缓冲区溢出或死机，只是push back的字符不见了，程序运行逻辑可能出现问题，为了程序更好的移植性，连续ungetc()的次数不要超过1次。

锁保护

如果应用是单线程的，则可直接使用无锁版本的接口，busybox是典型的例子：

/* Busybox does not use threads, we can speed up stdio. */
#ifdef HAVE_UNLOCKED_STDIO
# undef getc
# define getc(stream) getc_unlocked(stream)
# undef getchar
# define getchar() getchar_unlocked()
# undef putc
# define putc(c, stream) putc_unlocked(c, stream)
# undef putchar
# define putchar(c) putchar_unlocked(c)
# undef fgetc
# define fgetc(stream) getc_unlocked(stream)
# undef fputc
# define fputc(c, stream) putc_unlocked(c, stream)
#endif
/* Above functions are required by POSIX.1-2008, below ones are extensions */
#ifdef HAVE_UNLOCKED_LINE_OPS
# undef fgets
# define fgets(s, n, stream) fgets_unlocked(s, n, stream)
# undef fputs
# define fputs(s, stream) fputs_unlocked(s, stream)
#endif

读写自动转换

如果stream->__modeflags没有设置readonly或writeonly标志，并且libc配置为支持读写自动转换，则读写转换不需要程序员关心，如果libc不支持自动读写转换，则需要注意了

/* C99: Output shall not be directly followed by input without an
intervening call to the fflush function or to a file positioning
function (fseek, fsetpos, or rewind). */

详细可参考_trans2r.c和_trans2w.c文件实现。

narrow & wide reading

主要跟宽字符相关，如何不支持wchar，则默认是narrow reading方式，narrow以单个字符为单位， wide以两个字符为单位，需要注意的是流一旦设置后，不可进行改变，除非close后重新打开：

if (!(stream->__modeflags & oflag)) {
if (stream->__modeflags & (__FLAG_NARROW|__FLAG_WIDE)) {
__UNDEFINED_OR_NONPORTABLE;
goto DO_EBADF;
}
stream->__modeflags |= oflag;
}

三注意事项

scanf用法

这里有一篇写得还不错的blog，虽然其中有部分表述不正确，但大量的用法实例还是值得借鉴的：http://blog.csdn.net/kobesdu/article/details/39051399 ，其中要特别注意scanf引起的死循环问题，上面在ungot中已经分析过。

fflush清空缓冲区

对于输出，调用fflush立即执行write操作，同时清空缓冲区，但是对于输入，我见到的libc版本fflush()函数部分代码如下：

if (__STDIO_STREAM_IS_WRITING(stream)) {
if (!__STDIO_COMMIT_WRITE_BUFFER(stream)) {
__STDIO_STREAM_DISABLE_PUTC(stream);
__STDIO_STREAM_CLEAR_WRITING(stream);
} else {
retval = EOF;
}

__STDIO_STREAM_IS_WRITING()判断流是否处于写操作中，否则返回错误，所以为了程序具有可移植性，最好是不要使用fflush来清空输入缓冲区，而应改用其他的方法。

结束语：这部分内容太过繁杂，精力有限，为了节省时间，感觉很多东西都描述得不太清楚，后面有时间再补充整理吧。

http://blog.csdn.net/whuzm08/article/details/73793688

以上是关于linux标准输入输出的主要内容，如果未能解决你的问题，请参考以下文章