관리-도구

편집 파일: system_calls.cpp

/*
 * OXT - OS eXtensions for boosT
 * Provides important functionality necessary for writing robust server software.
 *
 * Copyright (c) 2010-2018 Phusion Holding B.V.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
#include "system_calls.hpp"
#include "detail/context.hpp"
#include <boost/thread.hpp>
#include <cerrno>

using namespace boost;
using namespace oxt;

/*************************************
 * oxt
 *************************************/

static const ErrorChance *errorChances = NULL;
static unsigned int nErrorChances = 0;

static void
interruption_signal_handler(int sig) {
	// Do nothing.
}

void
oxt::setup_syscall_interruption_support() {
	struct sigaction action;
	sigset_t signal_set;
	int ret;

/* Very important! The signal mask is inherited across fork()
	 * and exec() and we don't know what the parent process did to
	 * us. At least on OS X, having a signal mask blocking important
	 * signals can lead to stuff like waitpid() malfunction.
	 */
	sigemptyset(&signal_set);
	do {
		ret = sigprocmask(SIG_SETMASK, &signal_set, NULL);
	} while (ret == -1 && errno == EINTR);

action.sa_handler = interruption_signal_handler;
	action.sa_flags   = 0;
	sigemptyset(&action.sa_mask);
	do {
		ret = sigaction(INTERRUPTION_SIGNAL, &action, NULL);
	} while (ret == -1 && errno == EINTR);
	do {
		ret = siginterrupt(INTERRUPTION_SIGNAL, 1);
	} while (ret == -1 && errno == EINTR);
}

void
oxt::setup_random_failure_simulation(const ErrorChance *_errorChances, unsigned int n) {
	if (n > OXT_MAX_ERROR_CHANCES) {
		throw std::runtime_error("Number of error chances may not exceed OXT_MAX_ERROR_CHANCES");
	}

ErrorChance *storage = new ErrorChance[n];
	for (unsigned int i = 0; i < n; i++) {
		storage[i] = _errorChances[i];
	}
	errorChances = storage;
	nErrorChances = n;
}

static bool
shouldSimulateFailure() {
	if (OXT_UNLIKELY(nErrorChances > 0)) {
		double number = random() / (double) RAND_MAX;
		const ErrorChance *candidates[OXT_MAX_ERROR_CHANCES];
		unsigned int i, n = 0;

for (i = 0; i < nErrorChances; i++) {
			if (number <= errorChances[i].chance) {
				candidates[n] = &errorChances[i];
				n++;
			}
		}

if (n > 0) {
			int choice = random() % n;
			errno = candidates[choice]->errorCode;
			return true;
		} else {
			return false;
		}
	} else {
		return false;
	}
}

/*************************************
 * Passenger::syscalls
 *************************************/

#define CHECK_INTERRUPTION(error_expression, allowSimulatingFailure, error_assignment, code) \
	do { \
		if (OXT_UNLIKELY(allowSimulatingFailure && shouldSimulateFailure())) { \
			error_assignment; \
			break; \
		} \
		thread_local_context *ctx = get_thread_local_context(); \
		if (OXT_LIKELY(ctx != NULL)) { \
			ctx->syscall_interruption_lock.unlock(); \
		} \
		int _my_errno; \
		bool _intr_requested = false; \
		do { \
			code; \
			_my_errno = errno; \
		} while ((error_expression) \
			&& _my_errno == EINTR \
			&& (!boost::this_thread::syscalls_interruptable() \
			    || !(_intr_requested = boost::this_thread::interruption_requested())) \
		); \
		if (OXT_LIKELY(ctx != NULL)) { \
			ctx->syscall_interruption_lock.lock(); \
		} \
		if ((error_expression) \
		 && _my_errno == EINTR \
		 && boost::this_thread::syscalls_interruptable() \
		 && _intr_requested) { \
			throw thread_interrupted(); \
		} \
		errno = _my_errno; \
	} while (false)

int
syscalls::open(const char *path, int oflag) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::open(path, oflag)
	);
	return ret;
}

int
syscalls::open(const char *path, int oflag, mode_t mode) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::open(path, oflag, mode)
	);
	return ret;
}

int
syscalls::openat(int dirfd, const char *path, int oflag) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::openat(dirfd, path, oflag)
	);
	return ret;
}

int
syscalls::openat(int dirfd, const char *path, int oflag, mode_t mode) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::openat(dirfd, path, oflag, mode)
	);
	return ret;
}

ssize_t
syscalls::read(int fd, void *buf, size_t count) {
	ssize_t ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::read(fd, buf, count)
	);
	return ret;
}

ssize_t
syscalls::write(int fd, const void *buf, size_t count) {
	ssize_t ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::write(fd, buf, count)
	);
	return ret;
}

ssize_t
syscalls::writev(int fd, const struct iovec *iov, int iovcnt) {
	ssize_t ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::writev(fd, iov, iovcnt)
	);
	return ret;
}

int
syscalls::close(int fd) {
	/* Apparently POSIX says that if close() returns EINTR the
	 * file descriptor will be left in an undefined state, so
	 * when coding for POSIX we can't just loop on EINTR or we
	 * could run into race conditions with other threads.
	 * http://www.daemonology.net/blog/2011-12-17-POSIX-close-is-broken.html
	 *
	 * On Linux, FreeBSD and OpenBSD, close() releases the file
	 * descriptor when it returns EINTR. HP-UX does not.
	 * http://news.ycombinator.com/item?id=3363884
	 *
	 * MacOS X is insane because although the system call does
	 * release the file descriptor, the close() function as
	 * implemented by libSystem may call pthread_testcancel() first
	 * which can also return EINTR. Whether this happens depends
	 * on whether unix2003 is enabled.
	 * http://www.reddit.com/r/programming/comments/ng6vt/posix_close2_is_broken/c38xrgu
	 */
	#if defined(_hpux)
		int ret;
		CHECK_INTERRUPTION(
			ret == -1,
			true,
			ret = -1,
			ret = ::close(fd)
		);
		return ret;
	#else
		if (shouldSimulateFailure()) {
			return -1;
		}

thread_local_context *ctx = get_thread_local_context();
		if (OXT_UNLIKELY(ctx != NULL)) {
			ctx->syscall_interruption_lock.unlock();
		}

/* TODO: If it's not known whether the OS releases the file
		 * descriptor on EINTR-on-close(), we should print some kind of
		 * warning here. This would actually explain why some people get
		 * mysterious EBADF errors. I think the best thing we can do is
		 * to manually whitelist operating systems as we find out their
		 * behaviors.
		 */
		int ret = ::close(fd);
		if (OXT_UNLIKELY(ctx != NULL)) {
			int e = errno;
			ctx->syscall_interruption_lock.lock();
			errno = e;
		}
		if (ret == -1
		 && errno == EINTR
		 && boost::this_thread::syscalls_interruptable()
		 && boost::this_thread::interruption_requested()) {
			throw thread_interrupted();
		} else {
			return ret;
		}
	#endif
}

int
syscalls::pipe(int filedes[2]) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::pipe(filedes)
	);
	return ret;
}

int
syscalls::dup2(int filedes, int filedes2) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::dup2(filedes, filedes2)
	);
	return ret;
}

int
syscalls::mkdir(const char *pathname, mode_t mode) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::mkdir(pathname, mode)
	);
	return ret;
}

int
syscalls::chown(const char *path, uid_t owner, gid_t group) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::chown(path, owner, group)
	);
	return ret;
}

int
syscalls::accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::accept(sockfd, addr, addrlen)
	);
	return ret;
}

int
syscalls::bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::bind(sockfd, addr, addrlen)
	);
	return ret;
}

int
syscalls::connect(int sockfd, const struct sockaddr *serv_addr, socklen_t addrlen) {
	int ret;
	// FIXME: I don't think this is entirely correct.
	// http://www.madore.org/~david/computers/connect-intr.html
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::connect(sockfd, serv_addr, addrlen);
	);
	return ret;
}

int
syscalls::listen(int sockfd, int backlog) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::listen(sockfd, backlog)
	);
	return ret;
}

int
syscalls::socket(int domain, int type, int protocol) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::socket(domain, type, protocol)
	);
	return ret;
}

int
syscalls::socketpair(int d, int type, int protocol, int sv[2]) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::socketpair(d, type, protocol, sv)
	);
	return ret;
}

ssize_t
syscalls::recvmsg(int s, struct msghdr *msg, int flags) {
	ssize_t ret;
	#ifdef _AIX53
		CHECK_INTERRUPTION(
			ret == -1,
			true,
			ret = -1,
			ret = ::nrecvmsg(s, msg, flags)
		);
	#else
		CHECK_INTERRUPTION(
			ret == -1,
			true,
			ret = -1,
			ret = ::recvmsg(s, msg, flags)
		);
	#endif
	return ret;
}

ssize_t
syscalls::sendmsg(int s, const struct msghdr *msg, int flags) {
	ssize_t ret;
	#ifdef _AIX53
		CHECK_INTERRUPTION(
			ret == -1,
			true,
			ret = -1,
			ret = ::nsendmsg(s, msg, flags)
		);
	#else
		CHECK_INTERRUPTION(
			ret == -1,
			true,
			ret = -1,
			ret = ::sendmsg(s, msg, flags)
		);
	#endif
	return ret;
}

int
syscalls::setsockopt(int s, int level, int optname, const void *optval, socklen_t optlen) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::setsockopt(s, level, optname, optval, optlen)
	);
	return ret;
}

int
syscalls::shutdown(int s, int how) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::shutdown(s, how)
	);
	return ret;
}

int
syscalls::select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *errorfds, struct timeval *timeout) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::select(nfds, readfds, writefds, errorfds, timeout)
	);
	return ret;
}

int
syscalls::poll(struct pollfd fds[], nfds_t nfds, int timeout) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::poll(fds, nfds, timeout)
	);
	return ret;
}

FILE *
syscalls::fopen(const char *path, const char *mode) {
	FILE *ret;
	CHECK_INTERRUPTION(
		ret == NULL,
		true,
		ret = NULL,
		ret = ::fopen(path, mode)
	);
	return ret;
}

size_t
syscalls::fread(void *ptr, size_t size, size_t nitems, FILE *stream) {
	int ret;
	CHECK_INTERRUPTION(
		ret == 0 && ferror(stream),
		true,
		ret = 0,
		ret = ::fread(ptr, size, nitems, stream)
	);
	return ret;
}

int
syscalls::fclose(FILE *fp) {
	int ret;
	CHECK_INTERRUPTION(
		ret == EOF,
		true,
		ret = EOF,
		ret = ::fclose(fp)
	);
	return ret;
}

int
syscalls::unlink(const char *pathname) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::unlink(pathname)
	);
	return ret;
}

int
syscalls::stat(const char *path, struct stat *buf) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::stat(path, buf)
	);
	return ret;
}

int
syscalls::lstat(const char *path, struct stat *buf) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::lstat(path, buf)
	);
	return ret;
}

time_t
syscalls::time(time_t *t) {
	time_t ret;
	CHECK_INTERRUPTION(
		ret == (time_t) -1,
		false,
		(void) 0,
		ret = ::time(t)
	);
	return ret;
}

unsigned int
syscalls::sleep(unsigned int seconds) {
	// We use syscalls::nanosleep() here not only to reuse interruption
	// handling code, but also to avoid potentional infinite loops
	// in combination with oxt::thread::interrupt_and_join().
	// Upon interruption sleep() returns the number of seconds unslept
	// but interrupt_and_join() keeps interrupting the thread every 10
	// msec. Depending on the implementation of sleep(), it might return
	// the same value as its original argument. A naive implementation
	// of syscalls::sleep() that sleeps again with the return value
	// could easily cause an infinite loop. nanosleep() has a large
	// enough resolution so it won't trigger the problem.
	struct timespec spec, rem;
	int ret;

spec.tv_sec = seconds;
	spec.tv_nsec = 0;
	ret = syscalls::nanosleep(&spec, &rem);
	if (ret == 0) {
		return 0;
	} else if (errno == EINTR) {
		return rem.tv_sec;
	} else {
		// No sure what to do here. There's an error
		// but we can't return -1. Let's just hope
		// this never happens.
		return 0;
	}
}

int
syscalls::usleep(useconds_t usec) {
	// We use syscalls::nanosleep() here to reuse the code that sleeps
	// for the remaining amount of time, if a signal was received but
	// system call interruption is disabled.
	struct timespec spec;
	spec.tv_sec = usec / 1000000;
	spec.tv_nsec = usec % 1000000 * 1000;
	return syscalls::nanosleep(&spec, NULL);
}

int
syscalls::nanosleep(const struct timespec *req, struct timespec *rem) {
	struct timespec req2 = *req;
	struct timespec rem2;
	int ret, e;
	bool intr_requested = false;

/* We never simulate failure in this function. */

thread_local_context *ctx = get_thread_local_context();
	if (OXT_UNLIKELY(ctx != NULL)) {
		ctx->syscall_interruption_lock.unlock();
	}

do {
		ret = ::nanosleep(&req2, &rem2);
		e = errno;
		if (ret == -1) {
			/* nanosleep() on some systems is sometimes buggy. rem2
			 * could end up containing a tv_sec with a value near 2^32-1,
			 * probably because of integer wrapping bugs in the kernel.
			 * So we check for those.
			 */
			if (rem2.tv_sec < req->tv_sec) {
				req2 = rem2;
			} else {
				req2.tv_sec = 0;
				req2.tv_nsec = 0;
			}
		}
	} while (ret == -1
		&& e == EINTR
		&& (!boost::this_thread::syscalls_interruptable()
		    || !(intr_requested = boost::this_thread::interruption_requested()))
	);

if (OXT_UNLIKELY(ctx != NULL)) {
		ctx->syscall_interruption_lock.lock();
	}

if (ret == -1
	 && e == EINTR
	 && boost::this_thread::syscalls_interruptable()
	 && intr_requested) {
		throw thread_interrupted();
	}
	errno = e;
	if (ret == 0 && rem) {
		*rem = rem2;
	}
	return ret;
}

pid_t
syscalls::fork() {
	/* We don't do anything with the syscall_interruption_lock here
	 * because that can cause an infinite loop. Suppose that we unlock
	 * syscall_interruption_lock, then another thread calls interrupt()
	 * on this thread (which in turn locks syscall_interruption_lock),
	 * and then we context switch back to this thread anf the fork()
	 * proceeds. In the subprocess, syscall_interruption_lock will never
	 * be unlocked and so we're stuck forever trying to obtain the lock.
	 */
	pid_t ret;
	do {
		ret = ::fork();
	} while (ret == -1 && errno == EINTR);
	return ret;
}

int
syscalls::kill(pid_t pid, int sig) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::kill(pid, sig)
	);
	return ret;
}

int
syscalls::killpg(pid_t pgrp, int sig) {
	int ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::killpg(pgrp, sig)
	);
	return ret;
}

pid_t
syscalls::waitpid(pid_t pid, int *status, int options) {
	pid_t ret;
	CHECK_INTERRUPTION(
		ret == -1,
		true,
		ret = -1,
		ret = ::waitpid(pid, status, options)
	);
	return ret;
}

/*************************************
 * boost::this_thread
 *************************************/

#ifdef OXT_THREAD_LOCAL_KEYWORD_SUPPORTED
	/* This variable is an int instead of a bool, because a bug in GCC 4.6
	 * can cause segmentation faults for bool TLS variables.
	 * https://code.google.com/p/phusion-passenger/issues/detail?id=902
	 * http://stackoverflow.com/questions/20410943/segmentation-fault-when-accessing-statically-initialized-thread-variable?noredirect=1#comment30483943_20410943
	 * https://bugzilla.redhat.com/show_bug.cgi?id=731228
	 */
	__thread int boost::this_thread::_syscalls_interruptable = 1;

bool
	boost::this_thread::syscalls_interruptable() {
		return _syscalls_interruptable;
	}
#else
	boost::thread_specific_ptr<bool> boost::this_thread::_syscalls_interruptable;

bool
	boost::this_thread::syscalls_interruptable() {
		return _syscalls_interruptable.get() == NULL || *_syscalls_interruptable;
	}
#endif