CPU親和性的使用與機制--含Xen中VCPU和PCPU的繫結方法
CPU的親和性, 就是程序要在指定的 CPU 上儘量長時間地執行而不被遷移到其他處理器,親和性是從affinity翻譯過來的,應該有點不準確,給人的感覺是親和性就是有傾向的意思,而實際上是倒向的意思,稱為CPU關聯性更好,程式設計師的土話就是繫結CPU,綁核。
在多核執行的機器上,每個CPU本身自己會有快取,快取著程序使用的資訊,而程序可能會被OS排程到其他CPU上,如此,CPU cache命中率就低了,當繫結CPU後,程式就會一直在指定的cpu跑,不會由作業系統排程到其他CPU上,效能有一定的提高。
另外一種使用綁核考慮就是將重要的業務程序隔離開,對於部分實時程序排程優先順序高,可以將其繫結到一個指定核上,既可以保證實時程序的排程,也可以避免其他CPU上程序被該實時程序干擾。
1.CPU親和性在使用者態的使用
linux的CPU親和性在使用者態表現為一個cpu_set_t掩碼的形式,使用者可以呼叫兩個函式設定和獲取掩碼:
#define _GNU_SOURCE /* See feature_test_macros(7) */ #include int sched_setaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask); int sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask);
#include int sched_setaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask); int sched_getaffinity(pid_t pid, size_t cpusetsize, cpu_set_t *mask);
sched_setaffinity是設定指定pid親和性掩碼的,mask是傳入的引數;sched_getaffinity則是獲取指定pid親和性掩碼的,mask是獲取的引數。
cpusetsize可以通過sizeof cpu_set_t算出來。
cpu_set_t 是一個掩碼陣列,一共有1024位,每一位都可以對應一個cpu核心,以下巨集,都是對這個掩碼進行操作的。如果需要,一個程序是可以繫結多個cpu的。
void CPU_ZERO(cpu_set_t *set);
void CPU_SET(int cpu, cpu_set_t *set);
void CPU_CLR(int cpu, cpu_set_t *set);
CPU_ZERO(cpu_set_t *set);
void CPU_SET(int cpu, cpu_set_t *set);
void CPU_CLR(int cpu, cpu_set_t *set);
而mask的表現是如此的:如果是0X23,轉換成二進位制則為00100011,則表明程序繫結在0核、1核和5核上。
綁核需要注意是,子程序會繼承父程序的綁核關係。
程式碼例項:
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include <sys/syscall.h>
#define gettid() syscall(__NR_gettid)
void *test_thread(void *arg)
{
cpu_set_t mask;
int loop = 0;
int cpu_num = 0;
cpu_num = sysconf(_SC_NPROCESSORS_CONF);
pthread_detach(pthread_self());
CPU_ZERO(&mask);
CPU_SET(1, &mask);
if(sched_setaffinity(0, sizeof(mask), &mask) == -1)
{
printf("set affinity failedn");
}
while(1)
{
CPU_ZERO(&mask);
if(sched_getaffinity(0, sizeof(mask), &mask) == -1)
{
printf("get failedn");
}
for(loop = 0; loop < cpu_num; loop++)
{
if(CPU_ISSET(loop, &mask))
{
printf("test thread %lu run on processor %dn",
gettid(), loop);
}
}
sleep(1);
}
}
void *child_thread(void *arg)
{
cpu_set_t mask;
int loop = 0;
int cpu_num = 0;
cpu_num = sysconf(_SC_NPROCESSORS_CONF);
pthread_detach(pthread_self());
while(1)
{
CPU_ZERO(&mask);
if(sched_getaffinity(0, sizeof(mask), &mask) == -1)
{
printf("get failedn");
}
for(loop = 0; loop < cpu_num; loop++)
{
if(CPU_ISSET(loop, &mask))
{
printf("child thread %lu run on processor %dn",
gettid(), loop);
}
}
sleep(1);
}
}
int main(int argc, char *argv[])
{
int cpu_num = 0;
pthread_t thread;
int cpuid = 0;
int ret = 0;
int loop = 0;
cpu_set_t mask_set;
cpu_set_t mask_get;
if(argc != 2)
{
printf("usage:cpu numn");
return -1;
}
cpuid = atoi(argv[1]);
/* 獲取系統CPU的個數 */
cpu_num = sysconf(_SC_NPROCESSORS_CONF);
printf("system has %i processor.n", cpu_num);
/* 初始化mask_set */
CPU_ZERO(&mask_set);
CPU_SET(cpuid, &mask_set);
if(sched_setaffinity(0, sizeof(mask_set), &mask_set) == -1)
{
printf("Warning:set cpu %d affinity failedn", cpuid);
}
ret = pthread_create(&thread, NULL, child_thread, NULL);
if(ret)
{
printf("Error:pthread_create failedn");
return -1;
}
ret = pthread_create(&thread, NULL, test_thread, NULL);
if(ret)
{
printf("Error:pthread_create failedn");
return -1;
}
while(1)
{
CPU_ZERO(&mask_get);
if(sched_getaffinity(0, sizeof(mask_get), &mask_get) == -1)
{
printf("Warning:get cpu %d affinity failedn", cpuid);
}
for(loop = 0; loop < cpu_num; loop++)
{
if(CPU_ISSET(loop, &mask_get))
{
printf("this processor %lu is running on processor:
%dn", gettid(), loop);
}
}
sleep(1);
}
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
#include <sys/syscall.h>
#define gettid() syscall(__NR_gettid)
void *test_thread(void *arg)
{
cpu_set_t mask;
int loop = 0;
int cpu_num = 0;
cpu_num = sysconf(_SC_NPROCESSORS_CONF);
pthread_detach(pthread_self());
CPU_ZERO(&mask);
CPU_SET(1, &mask);
if(sched_setaffinity(0, sizeof(mask), &mask) == -1)
{
printf("set affinity failedn");
}
while(1)
{
CPU_ZERO(&mask);
if(sched_getaffinity(0, sizeof(mask), &mask) == -1)
{
printf("get failedn");
}
for(loop = 0; loop < cpu_num; loop++)
{
if(CPU_ISSET(loop, &mask))
{
printf("test thread %lu run on processor %dn",
gettid(), loop);
}
}
sleep(1);
}
}
void *child_thread(void *arg)
{
cpu_set_t mask;
int loop = 0;
int cpu_num = 0;
cpu_num = sysconf(_SC_NPROCESSORS_CONF);
pthread_detach(pthread_self());
while(1)
{
CPU_ZERO(&mask);
if(sched_getaffinity(0, sizeof(mask), &mask) == -1)
{
printf("get failedn");
}
for(loop = 0; loop < cpu_num; loop++)
{
if(CPU_ISSET(loop, &mask))
{
printf("child thread %lu run on processor %dn",
gettid(), loop);
}
}
sleep(1);
}
}
int main(int argc, char *argv[])
{
int cpu_num = 0;
pthread_t thread;
int cpuid = 0;
int ret = 0;
int loop = 0;
cpu_set_t mask_set;
cpu_set_t mask_get;
if(argc != 2)
{
printf("usage:cpu numn");
return -1;
}
cpuid = atoi(argv[1]);
/* 獲取系統CPU的個數 */
cpu_num = sysconf(_SC_NPROCESSORS_CONF);
printf("system has %i processor.n", cpu_num);
/* 初始化mask_set */
CPU_ZERO(&mask_set);
CPU_SET(cpuid, &mask_set);
if(sched_setaffinity(0, sizeof(mask_set), &mask_set) == -1)
{
printf("Warning:set cpu %d affinity failedn", cpuid);
}
ret = pthread_create(&thread, NULL, child_thread, NULL);
if(ret)
{
printf("Error:pthread_create failedn");
return -1;
}
ret = pthread_create(&thread, NULL, test_thread, NULL);
if(ret)
{
printf("Error:pthread_create failedn");
return -1;
}
while(1)
{
CPU_ZERO(&mask_get);
if(sched_getaffinity(0, sizeof(mask_get), &mask_get) == -1)
{
printf("Warning:get cpu %d affinity failedn", cpuid);
}
for(loop = 0; loop < cpu_num; loop++)
{
if(CPU_ISSET(loop, &mask_get))
{
printf("this processor %lu is running on processor:
%dn", gettid(), loop);
}
}
sleep(1);
}
return 0;
}
執行之後根據列印和/proc stat的內容可以判斷,status有
Cpus_allowed: 08
Cpus_allowed_list: 3
可以更清楚的看到程序綁核狀態
但是如果程序已經在執行過程中,使用者不能直接改動程式碼,就用taskset工具更改CPU親和性關係。
taskset [options] -p [mask] pid
其中mask前面已說了,參看man手冊更詳細一點。
二、CPU親和性在核心態機制
在核心程序結構體task_struct裡面有一個引數,即為
cpumask_t cpus_allowed;
用來記住CPU的綁核關係。
核心尤其是排程的時候,可以保證讓task不會被排程到其他CPU上
static inline
int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
{
int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
/*
* In order not to call set_task_cpu() on a blocking task we need
* to rely on ttwu() to place the task on a valid ->cpus_allowed
* cpu.
*
* Since this is common to all placement strategies, this lives here.
*
* [ this allows ->select_task() to simply return task_cpu(p) and
* not worry about this generic constraint ]
*/
if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
!cpu_online(cpu)))
cpu = select_fallback_rq(task_cpu(p), p);
return cpu;
}
int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
{
int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
/*
* In order not to call set_task_cpu() on a blocking task we need
* to rely on ttwu() to place the task on a valid ->cpus_allowed
* cpu.
*
* Since this is common to all placement strategies, this lives here.
*
* [ this allows ->select_task() to simply return task_cpu(p) and
* not worry about this generic constraint ]
*/
if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
!cpu_online(cpu)))
cpu = select_fallback_rq(task_cpu(p), p);
return cpu;
}
程序在選擇CPU佇列的時候,只選擇被允許的CPU佇列,使用cpumask_test_cpu進行測試。
使用xm vcpu-pin命令可以固定vcpu到物理cpu
xm vcpu-pin domname vcpu cpu
vcpu--虛擬cpu的號碼(號碼可以用cat /proc/cpuinfo命令得到,一般從0~N-1[其中N為CPU核數])
cpu--主機中cpu的號碼
程式碼研究分析:
/xen-4.0.0/tools/python/xen/xm/main.py
96 SUBCOMMAND_HELP = {
97 # common commands
------------------------------------------
160 'vcpu-pin' : ('<Domain> <VCPU|all> <CPUs|all>',
161 'Set which CPUs a VCPU can use.'),
------------------------------------------
261 }
-----------------------------------------
376 domain_commands = [
-----------------------------------------
408 "vcpu-pin",
-----------------------------------------
1455 def xm_vcpu_pin(args):
arg_check(args, "vcpu-pin", 3)
def cpu_make_map(cpulist):
cpus = []
for c in cpulist.split(','):
if c.find('-') != -1:
(x,y) = c.split('-')
for i in range(int(x),int(y)+1):
cpus.append(int(i))
else:
# remove this element from the list
if c[0] == '^':
cpus = [x for x in cpus if x != int(c[1:])]
else:
cpus.append(int(c))
cpus.sort()
return ",".join(map(str, cpus))
dom = args[0]
vcpu = args[1]
if args[2] == 'all':
cpumap = cpu_make_map('0-63')
else:
cpumap = cpu_make_map(args[2])
if serverType == SERVER_XEN_API:
server.xenapi.VM.add_to_VCPUs_params_live(
get_single_vm(dom), "cpumap%i" % int(vcpu), cpumap)
else:
server.xend.domain.pincpu(dom, vcpu, cpumap)
-------------------------------------------------------------------------------
3445 commands = {
3475 # cpu commands
"vcpu-pin": xm_vcpu_pin,
"vcpu-list": xm_vcpu_list,
"vcpu-set": xm_vcpu_set,
-------------------------------------------------第二層分割線---------------------------------------------
/xen-4.0.0/tools/python/xen/xend/XendDomain.py
1564 def domain_pincpu(self, domid, vcpu, cpumap):
"""Set which cpus vcpu can use
@param domid: Domain ID or Name
@type domid: int or string.
@param vcpu: vcpu to pin to
@type vcpu: int
@param cpumap: string repr of usable cpus
@type cpumap: string
@rtype: 0
"""
dominfo = self.domain_lookup_nr(domid)
if not dominfo:
raise XendInvalidDomain(str(domid))
# if vcpu is keyword 'all', apply the cpumap to all vcpus
if str(vcpu).lower() == "all":
vcpus = range(0, int(dominfo.getVCpuCount()))
else:
vcpus = [ int(vcpu) ]
# set the same cpumask for all vcpus
rc = 0
cpus = dominfo.getCpus()
cpumap = map(int, cpumap.split(","))
for v in vcpus:
try:
if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
rc = xc.vcpu_setaffinity(dominfo.getDomid(), v, cpumap)
cpus[v] = cpumap
except Exception, ex:
log.exception(ex)
raise XendError("Cannot pin vcpu: %d to cpu: %s - %s" % \
(v, cpumap, str(ex)))
dominfo.setCpus(cpus)
self.managed_config_save(dominfo)
return rc
---------------------------------------第三層分割線--------------------------------------------------
/xen-4.0.0/tools/python/xen/lowlevel/xc/xc.c
#define PKG "xen.lowlevel.xc"
#define CLS "xc"
static PyObject *pyxc_vcpu_setaffinity(XcObject *self,
PyObject *args,
PyObject *kwds)
{
uint32_t dom;
int vcpu = 0, i;
uint64_t *cpumap;
PyObject *cpulist = NULL;
int nr_cpus, size;
xc_physinfo_t info = {0};
uint64_t cpumap_size = sizeof(*cpumap);
static char *kwd_list[] = { "domid", "vcpu", "cpumap", NULL };
if ( !PyArg_ParseTupleAndKeywords(args, kwds, "i|iO", kwd_list,
&dom, &vcpu, &cpulist) )
return NULL;
if ( xc_physinfo(self->xc_handle, &info) != 0 )
return pyxc_error_to_exception();
nr_cpus = info.nr_cpus;
size = (nr_cpus + cpumap_size * 8 - 1)/ (cpumap_size * 8);
cpumap = malloc(cpumap_size * size);
if(cpumap == NULL)
return pyxc_error_to_exception();
if ( (cpulist != NULL) && PyList_Check(cpulist) )
{
for ( i = 0; i < size; i++)
{
cpumap[i] = 0ULL;
}
for ( i = 0; i < PyList_Size(cpulist); i++ )
{
long cpu = PyInt_AsLong(PyList_GetItem(cpulist, i));
cpumap[cpu / (cpumap_size * 8)] |= (uint64_t)1 << (cpu % (cpumap_size * 8));
}
}
if ( xc_vcpu_setaffinity(self->xc_handle, dom, vcpu, cpumap, size * cpumap_size) != 0 )
{
free(cpumap);
return pyxc_error_to_exception();
}
Py_INCREF(zero);
free(cpumap);
return zero;
}
---------------------------------------------------------------------------------
1755 static PyMethodDef pyxc_methods[] = {
-------------------------------------------------------
1829 "vcpu_setaffinity",
(PyCFunction)pyxc_vcpu_setaffinity,
METH_VARARGS | METH_KEYWORDS, "\n"
"Pin a VCPU to a specified set CPUs.\n"
" dom [int]: Identifier of domain to which VCPU belongs.\n"
" vcpu [int, 0]: VCPU being pinned.\n"
" cpumap [list, []]: list of usable CPUs.\n\n"
"Returns: [int] 0 on success; -1 on error.\n" },
--------------------------------------------------------
2268 };
--------------------------------------------------------------------------------------
/xen-4.0.0/tools/libxc/xc_domain.c
int xc_vcpu_setaffinity(int xc_handle,
uint32_t domid,
int vcpu,
uint64_t cpumap)
{
DECLARE_DOMCTL;
int ret = -1;
uint8_t local[sizeof (cpumap)];
domctl.cmd = XEN_DOMCTL_setvcpuaffinity;
domctl.domain = (domid_t)domid;
domctl.u.vcpuaffinity.vcpu = vcpu;
bitmap_64_to_byte(local, &cpumap, sizeof(cpumap) * 8);
set_xen_guest_handle(domctl.u.vcpuaffinity.cpumap.bitmap, local);
domctl.u.vcpuaffinity.cpumap.nr_cpus = sizeof(cpumap) * 8;
if ( lock_pages(local, sizeof(local)) != 0 )
{
PERROR("Could not lock memory for Xen hypercall");
goto out;
}
ret = do_domctl(xc_handle, &domctl);
unlock_pages(local, sizeof(local));
out:
return ret;
}