大流量、高并发的场景肯定是有很多tcp连接的。
ss命令可以查看这些连接,然而今天看到两台机器cpu负载明显变高了
然后发现ss -nap命令占用了一个cpu的资源,然后查了ss的源码
源码地址:https://github.com/shemminger/iproute2/blob/master/misc/ss.c
阿西吧 p参数会导致遍历/proc/目录,然后就会有大量的读操作,然后还有个hash表的创建,p是为了显示连接的process跟pid
case 'p':
show_users++;
user_ent_hash_build();
break;
show_users++;
user_ent_hash_build();
break;
static void user_ent_hash_build(void)
{
const char *root = getenv("PROC_ROOT") ? : "/proc/";
struct dirent *d;
char name[1024];
int nameoff;
DIR *dir;
strcpy(name, root);
if (strlen(name) == 0 || name[strlen(name)-1] != '/')
strcat(name, "/");
nameoff = strlen(name);
dir = opendir(name);
if (!dir)
return;
while ((d = readdir(dir)) != NULL) {
struct dirent *d1;
char process[16];
int pid, pos;
DIR *dir1;
char crap;
if (sscanf(d->d_name, "%d%c", &pid, &crap) != 1)
continue;
sprintf(name + nameoff, "%d/fd/", pid);
pos = strlen(name);
if ((dir1 = opendir(name)) == NULL)
continue;
process[0] = '\0';
while ((d1 = readdir(dir1)) != NULL) {
const char *pattern = "socket:[";
unsigned int ino;
char lnk[64];
int fd;
if (sscanf(d1->d_name, "%d%c", &fd, &crap) != 1)
continue;
sprintf(name+pos, "%d", fd);
if (readlink(name, lnk, sizeof(lnk)-1) < 0 ||
strncmp(lnk, pattern, strlen(pattern)))
continue;
sscanf(lnk, "socket:[%u]", &ino);
if (process[0] == '\0') {
char tmp[1024];
FILE *fp;
snprintf(tmp, sizeof(tmp), "%s/%d/stat", root, pid);
if ((fp = fopen(tmp, "r")) != NULL) {
fscanf(fp, "%*d (%[^)])", process);
fclose(fp);
}
}
user_ent_add(ino, process, pid, fd);
}
closedir(dir1);
}
closedir(dir);
}
{
const char *root = getenv("PROC_ROOT") ? : "/proc/";
struct dirent *d;
char name[1024];
int nameoff;
DIR *dir;
strcpy(name, root);
if (strlen(name) == 0 || name[strlen(name)-1] != '/')
strcat(name, "/");
nameoff = strlen(name);
dir = opendir(name);
if (!dir)
return;
while ((d = readdir(dir)) != NULL) {
struct dirent *d1;
char process[16];
int pid, pos;
DIR *dir1;
char crap;
if (sscanf(d->d_name, "%d%c", &pid, &crap) != 1)
continue;
sprintf(name + nameoff, "%d/fd/", pid);
pos = strlen(name);
if ((dir1 = opendir(name)) == NULL)
continue;
process[0] = '\0';
while ((d1 = readdir(dir1)) != NULL) {
const char *pattern = "socket:[";
unsigned int ino;
char lnk[64];
int fd;
if (sscanf(d1->d_name, "%d%c", &fd, &crap) != 1)
continue;
sprintf(name+pos, "%d", fd);
if (readlink(name, lnk, sizeof(lnk)-1) < 0 ||
strncmp(lnk, pattern, strlen(pattern)))
continue;
sscanf(lnk, "socket:[%u]", &ino);
if (process[0] == '\0') {
char tmp[1024];
FILE *fp;
snprintf(tmp, sizeof(tmp), "%s/%d/stat", root, pid);
if ((fp = fopen(tmp, "r")) != NULL) {
fscanf(fp, "%*d (%[^)])", process);
fclose(fp);
}
}
user_ent_add(ino, process, pid, fd);
}
closedir(dir1);
}
closedir(dir);
}
所有不需要这两个参数的时候不要加p参数
但是同学的命令是ss -nap|grep ESTAB|grep pid=1356,
可以发现是要查询pid=1356进程的ESTAB状态连接
然后呢
root 17642 0.0 0.1 496312 14828 ? SNl Jul20 0:14 /home/ops/bbmon/bbmon
root 10067 0.0 0.0 109680 1704 ? SN 15:09 0:00 \_ /bin/bash -c ss -nap|grep ESTAB|grep pid=1265,
root 10068 98.4 0.0 6292 748 ? RN 15:09 3:51 \_ ss -nap
root 10069 0.0 0.0 103256 868 ? SN 15:09 0:00 \_ grep ESTAB
root 10070 0.0 0.0 103252 868 ? SN 15:09 0:00 \_ grep pid=1265,
root 10067 0.0 0.0 109680 1704 ? SN 15:09 0:00 \_ /bin/bash -c ss -nap|grep ESTAB|grep pid=1265,
root 10068 98.4 0.0 6292 748 ? RN 15:09 3:51 \_ ss -nap
root 10069 0.0 0.0 103256 868 ? SN 15:09 0:00 \_ grep ESTAB
root 10070 0.0 0.0 103252 868 ? SN 15:09 0:00 \_ grep pid=1265,
bbmon看着像是一个监控的程序,看了ss的源码,可以考虑改改源码让这个命令更高效,加一个状态参数、一个pid参数,当然也可以自己造轮子。
转载请注明:小Y » ss -nap命令引起的cpu负载上升