...
 
Commits (19)
......@@ -850,9 +850,11 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (new)
blk_mq_free_tag_set(ctrl->ctrl.admin_tagset);
out_free_async_qe:
nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
sizeof(struct nvme_command), DMA_TO_DEVICE);
ctrl->async_event_sqe.data = NULL;
if (ctrl->async_event_sqe.data) {
nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
sizeof(struct nvme_command), DMA_TO_DEVICE);
ctrl->async_event_sqe.data = NULL;
}
out_free_queue:
nvme_rdma_free_queue(&ctrl->queues[0]);
return error;
......
......@@ -515,7 +515,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
return 1;
}
static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{
struct nvmet_tcp_queue *queue = cmd->queue;
int ret;
......@@ -523,9 +523,15 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
while (cmd->cur_sg) {
struct page *page = sg_page(cmd->cur_sg);
u32 left = cmd->cur_sg->length - cmd->offset;
int flags = MSG_DONTWAIT;
if ((!last_in_batch && cmd->queue->send_list_len) ||
cmd->wbytes_done + left < cmd->req.transfer_len ||
queue->data_digest || !queue->nvme_sq.sqhd_disabled)
flags |= MSG_MORE;
ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
left, MSG_DONTWAIT | MSG_MORE);
left, flags);
if (ret <= 0)
return ret;
......@@ -660,7 +666,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
}
if (cmd->state == NVMET_TCP_SEND_DATA) {
ret = nvmet_try_send_data(cmd);
ret = nvmet_try_send_data(cmd, last_in_batch);
if (ret <= 0)
goto done_send;
}
......
......@@ -540,9 +540,14 @@ static int alloc_fd(unsigned start, unsigned flags)
return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
}
int __get_unused_fd_flags(unsigned flags, unsigned long nofile)
{
return __alloc_fd(current->files, 0, nofile, flags);
}
int get_unused_fd_flags(unsigned flags)
{
return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE));
}
EXPORT_SYMBOL(get_unused_fd_flags);
......
......@@ -343,6 +343,7 @@ struct io_accept {
struct sockaddr __user *addr;
int __user *addr_len;
int flags;
unsigned long nofile;
};
struct io_sync {
......@@ -397,6 +398,7 @@ struct io_open {
struct filename *filename;
struct statx __user *buffer;
struct open_how how;
unsigned long nofile;
};
struct io_files_update {
......@@ -2577,6 +2579,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
......@@ -2618,6 +2621,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return ret;
}
req->open.nofile = rlimit(RLIMIT_NOFILE);
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
......@@ -2636,7 +2640,7 @@ static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt,
if (ret)
goto err;
ret = get_unused_fd_flags(req->open.how.flags);
ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile);
if (ret < 0)
goto err;
......@@ -3321,6 +3325,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
accept->flags = READ_ONCE(sqe->accept_flags);
accept->nofile = rlimit(RLIMIT_NOFILE);
return 0;
#else
return -EOPNOTSUPP;
......@@ -3337,7 +3342,8 @@ static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
file_flags = force_nonblock ? O_NONBLOCK : 0;
ret = __sys_accept4_file(req->file, file_flags, accept->addr,
accept->addr_len, accept->flags);
accept->addr_len, accept->flags,
accept->nofile);
if (ret == -EAGAIN && force_nonblock)
return -EAGAIN;
if (ret == -ERESTARTSYS)
......@@ -4131,6 +4137,9 @@ static int io_req_defer_prep(struct io_kiocb *req,
{
ssize_t ret = 0;
if (!sqe)
return 0;
if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (unlikely(ret))
......@@ -4907,6 +4916,11 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK;
INIT_LIST_HEAD(&req->link_list);
if (io_alloc_async_ctx(req)) {
ret = -EAGAIN;
goto err_req;
}
ret = io_req_defer_prep(req, sqe);
if (ret)
req->flags |= REQ_F_FAIL_LINK;
......
......@@ -85,6 +85,7 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
extern void set_close_on_exec(unsigned int fd, int flag);
extern bool get_close_on_exec(unsigned int fd);
extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile);
extern int get_unused_fd_flags(unsigned flags);
extern void put_unused_fd(unsigned int fd);
......
......@@ -401,7 +401,8 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len,
int addr_len);
extern int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
int __user *upeer_addrlen, int flags,
unsigned long nofile);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
......
......@@ -1707,7 +1707,8 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
int __sys_accept4_file(struct file *file, unsigned file_flags,
struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags)
int __user *upeer_addrlen, int flags,
unsigned long nofile)
{
struct socket *sock, *newsock;
struct file *newfile;
......@@ -1738,7 +1739,7 @@ int __sys_accept4_file(struct file *file, unsigned file_flags,
*/
__module_get(newsock->ops->owner);
newfd = get_unused_fd_flags(flags);
newfd = __get_unused_fd_flags(flags, nofile);
if (unlikely(newfd < 0)) {
err = newfd;
sock_release(newsock);
......@@ -1807,7 +1808,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
f = fdget(fd);
if (f.file) {
ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
upeer_addrlen, flags);
upeer_addrlen, flags,
rlimit(RLIMIT_NOFILE));
if (f.flags)
fput(f.file);
}
......
......@@ -16,7 +16,7 @@ override CFLAGS += -D_FORTIFY_SOURCE=2
%: %.c
@mkdir -p $(BUILD_OUTPUT)
$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS)
$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap
.PHONY : clean
clean :
......
......@@ -30,7 +30,7 @@
#include <sched.h>
#include <time.h>
#include <cpuid.h>
#include <linux/capability.h>
#include <sys/capability.h>
#include <errno.h>
#include <math.h>
......@@ -304,6 +304,10 @@ int *irqs_per_cpu; /* indexed by cpu_num */
void setup_all_buffers(void);
char *sys_lpi_file;
char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
int cpu_is_not_present(int cpu)
{
return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
......@@ -2916,8 +2920,6 @@ int snapshot_gfx_mhz(void)
*
* record snapshot of
* /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
*
* return 1 if config change requires a restart, else return 0
*/
int snapshot_cpu_lpi_us(void)
{
......@@ -2941,17 +2943,14 @@ int snapshot_cpu_lpi_us(void)
/*
* snapshot_sys_lpi()
*
* record snapshot of
* /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
*
* return 1 if config change requires a restart, else return 0
* record snapshot of sys_lpi_file
*/
int snapshot_sys_lpi_us(void)
{
FILE *fp;
int retval;
fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r");
fp = fopen_or_die(sys_lpi_file, "r");
retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
if (retval != 1) {
......@@ -3151,28 +3150,42 @@ void check_dev_msr()
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
void check_permissions()
/*
* check for CAP_SYS_RAWIO
* return 0 on success
* return 1 on fail
*/
int check_for_cap_sys_rawio(void)
{
struct __user_cap_header_struct cap_header_data;
cap_user_header_t cap_header = &cap_header_data;
struct __user_cap_data_struct cap_data_data;
cap_user_data_t cap_data = &cap_data_data;
extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
int do_exit = 0;
char pathname[32];
cap_t caps;
cap_flag_value_t cap_flag_value;
/* check for CAP_SYS_RAWIO */
cap_header->pid = getpid();
cap_header->version = _LINUX_CAPABILITY_VERSION;
if (capget(cap_header, cap_data) < 0)
err(-6, "capget(2) failed");
caps = cap_get_proc();
if (caps == NULL)
err(-6, "cap_get_proc\n");
if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
do_exit++;
if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
err(-6, "cap_get\n");
if (cap_flag_value != CAP_SET) {
warnx("capget(CAP_SYS_RAWIO) failed,"
" try \"# setcap cap_sys_rawio=ep %s\"", progname);
return 1;
}
if (cap_free(caps) == -1)
err(-6, "cap_free\n");
return 0;
}
void check_permissions(void)
{
int do_exit = 0;
char pathname[32];
/* check for CAP_SYS_RAWIO */
do_exit += check_for_cap_sys_rawio();
/* test file permissions */
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (euidaccess(pathname, R_OK)) {
......@@ -3265,6 +3278,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
pkg_cstate_limits = glm_pkg_cstate_limits;
break;
default:
......@@ -3336,6 +3350,17 @@ int is_skx(unsigned int family, unsigned int model)
}
return 0;
}
int is_ehl(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
switch (model) {
case INTEL_FAM6_ATOM_TREMONT:
return 1;
}
return 0;
}
int has_turbo_ratio_limit(unsigned int family, unsigned int model)
{
......@@ -3478,6 +3503,23 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
dump_nhm_cst_cfg();
}
static void dump_sysfs_file(char *path)
{
FILE *input;
char cpuidle_buf[64];
input = fopen(path, "r");
if (input == NULL) {
if (debug)
fprintf(outf, "NSFOD %s\n", path);
return;
}
if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input))
err(1, "%s: failed to read file", path);
fclose(input);
fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
}
static void
dump_sysfs_cstate_config(void)
{
......@@ -3491,6 +3533,15 @@ dump_sysfs_cstate_config(void)
if (!DO_BIC(BIC_sysfs))
return;
if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
fprintf(outf, "cpuidle not loaded\n");
return;
}
dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver");
dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor");
dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro");
for (state = 0; state < 10; ++state) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
......@@ -3894,6 +3945,20 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
else
BIC_PRESENT(BIC_PkgWatt);
break;
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
if (rapl_joules) {
BIC_PRESENT(BIC_Pkg_J);
BIC_PRESENT(BIC_Cor_J);
BIC_PRESENT(BIC_RAM_J);
BIC_PRESENT(BIC_GFX_J);
} else {
BIC_PRESENT(BIC_PkgWatt);
BIC_PRESENT(BIC_CorWatt);
BIC_PRESENT(BIC_RAMWatt);
BIC_PRESENT(BIC_GFXWatt);
}
break;
case INTEL_FAM6_SKYLAKE_L: /* SKL */
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
......@@ -4295,6 +4360,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
return 1;
}
return 0;
......@@ -4324,6 +4390,7 @@ int has_c8910_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
return 1;
}
return 0;
......@@ -4610,14 +4677,24 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_SKYLAKE:
case INTEL_FAM6_KABYLAKE_L:
case INTEL_FAM6_KABYLAKE:
case INTEL_FAM6_COMETLAKE_L:
case INTEL_FAM6_COMETLAKE:
return INTEL_FAM6_SKYLAKE_L;
case INTEL_FAM6_ICELAKE_L:
case INTEL_FAM6_ICELAKE_NNPI:
case INTEL_FAM6_TIGERLAKE_L:
case INTEL_FAM6_TIGERLAKE:
return INTEL_FAM6_CANNONLAKE_L;
case INTEL_FAM6_ATOM_TREMONT_D:
return INTEL_FAM6_ATOM_GOLDMONT_D;
case INTEL_FAM6_ATOM_TREMONT_L:
return INTEL_FAM6_ATOM_TREMONT;
case INTEL_FAM6_ICELAKE_X:
return INTEL_FAM6_SKYLAKE_X;
}
return model;
}
......@@ -4872,7 +4949,8 @@ void process_cpuid()
do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model);
if (do_slm_cstates || do_knl_cstates || is_cnl(family, model))
if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) ||
is_ehl(family, model))
BIC_NOT_PRESENT(BIC_CPU_c3);
if (!quiet)
......@@ -4907,10 +4985,16 @@ void process_cpuid()
else
BIC_NOT_PRESENT(BIC_CPU_LPI);
if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK))
if (!access(sys_lpi_file_sysfs, R_OK)) {
sys_lpi_file = sys_lpi_file_sysfs;
BIC_PRESENT(BIC_SYS_LPI);
else
} else if (!access(sys_lpi_file_debugfs, R_OK)) {
sys_lpi_file = sys_lpi_file_debugfs;
BIC_PRESENT(BIC_SYS_LPI);
} else {
sys_lpi_file_sysfs = NULL;
BIC_NOT_PRESENT(BIC_SYS_LPI);
}
if (!quiet)
decode_misc_feature_control();
......@@ -5306,7 +5390,7 @@ int get_and_dump_counters(void)
}
void print_version() {
fprintf(outf, "turbostat version 19.08.31"
fprintf(outf, "turbostat version 20.03.20"
" - Len Brown <lenb@kernel.org>\n");
}
......@@ -5323,9 +5407,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
}
msrp->msr_num = msr_num;
strncpy(msrp->name, name, NAME_BYTES);
strncpy(msrp->name, name, NAME_BYTES - 1);
if (path)
strncpy(msrp->path, path, PATH_BYTES);
strncpy(msrp->path, path, PATH_BYTES - 1);
msrp->width = width;
msrp->type = type;
msrp->format = format;
......