Patch constructed from ZIP posted at developer site - https://sites.google.com/site/realtimexen/download http://students.cec.wustl.edu/~sx1/RT-XEN/xen0512.zip diff -ubrN xen/xen-4.0.1/tools/libxc/Makefile xen-4.0.1/tools/libxc/Makefile --- xen/xen-4.0.1/tools/libxc/Makefile 2010-08-25 04:22:09.000000000 -0600 +++ xen-4.0.1/tools/libxc/Makefile 2011-04-24 20:29:11.000000000 -0600 @@ -17,6 +17,7 @@ CTRL_SRCS-y += xc_private.c CTRL_SRCS-y += xc_sedf.c CTRL_SRCS-y += xc_csched.c +CTRL_SRCS-y += xc_rt.c CTRL_SRCS-y += xc_tbuf.c CTRL_SRCS-y += xc_pm.c CTRL_SRCS-y += xc_cpu_hotplug.c diff -ubrN xen/xen-4.0.1/tools/libxc/xc_rt.c xen-4.0.1/tools/libxc/xc_rt.c --- xen/xen-4.0.1/tools/libxc/xc_rt.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/tools/libxc/xc_rt.c 2011-04-24 20:52:41.000000000 -0600 @@ -0,0 +1,49 @@ +/**************************************************************************** + * (C) 2006 - Emmanuel Ackaouy - XenSource Inc. + **************************************************************************** + * + * File: xc_rt.c + * Author: Sisu Xi + * + * Description: XC Interface to the ds scheduler + * + */ +#include "xc_private.h" + +int +xc_sched_rt_domain_set( + int xc_handle, + uint32_t domid, + struct xen_domctl_sched_rt *sdom) +{ + DECLARE_DOMCTL; + + domctl.cmd = XEN_DOMCTL_scheduler_op; + domctl.domain = (domid_t) domid; + domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_RT; + domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo; + domctl.u.scheduler_op.u.rt = *sdom; + + return do_domctl(xc_handle, &domctl); +} + +int +xc_sched_rt_domain_get( + int xc_handle, + uint32_t domid, + struct xen_domctl_sched_rt *sdom) +{ + DECLARE_DOMCTL; + int err; + + domctl.cmd = XEN_DOMCTL_scheduler_op; + domctl.domain = (domid_t) domid; + domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_RT; + domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo; + + err = do_domctl(xc_handle, &domctl); + if ( err == 0 ) + *sdom = domctl.u.scheduler_op.u.rt; + + return err; +} diff -ubrN xen/xen-4.0.1/tools/libxc/xenctrl.h xen-4.0.1/tools/libxc/xenctrl.h --- xen/xen-4.0.1/tools/libxc/xenctrl.h 2010-08-25 04:22:09.000000000 -0600 +++ xen-4.0.1/tools/libxc/xenctrl.h 2011-04-24 15:41:12.000000000 -0600 @@ -465,6 +465,15 @@ uint32_t domid, struct xen_domctl_sched_credit *sdom); +// added by Sisu Xi +int xc_sched_rt_domain_set(int xc_handle, + uint32_t domid, + struct xen_domctl_sched_rt *sdom); + +int xc_sched_rt_domain_get(int xc_handle, + uint32_t domid, + struct xen_domctl_sched_rt *sdom); + /** * This function sends a trigger to a domain. * diff -ubrN xen/xen-4.0.1/tools/libxl/libxl.c xen-4.0.1/tools/libxl/libxl.c --- xen/xen-4.0.1/tools/libxl/libxl.c 2010-08-25 04:22:09.000000000 -0600 +++ xen-4.0.1/tools/libxl/libxl.c 2011-04-24 15:50:49.000000000 -0600 @@ -2766,6 +2766,64 @@ if (rc != 0) return rc; + return 0; +} + +// added by Sisu Xi + +int libxl_sched_rt_domain_get(struct libxl_ctx *ctx, uint32_t domid, struct libxl_sched_rt *scinfo) +{ + struct xen_domctl_sched_rt sdom; + int rc; + + rc = xc_sched_rt_domain_get(ctx->xch, domid, &sdom); + if (rc != 0) + return rc; + + scinfo->budget = sdom.budget; + scinfo->period = sdom.period; + scinfo->level = sdom.level; + + return 0; +} + +int libxl_sched_rt_domain_set(struct libxl_ctx *ctx, uint32_t domid, struct libxl_sched_rt *scinfo) +{ + struct xen_domctl_sched_rt sdom; + xc_domaininfo_t domaininfo; + int rc; + + rc = xc_domain_getinfolist(ctx->xch, domid, 1, &domaininfo); + if (rc != 1 || domaininfo.domain != domid) + return rc; + + + if (scinfo->budget < 1 || scinfo->budget > 65535) { + XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, rc, + "Cpu budget out of range, valid values are within range from 1 to 65535"); + return -1; + } + + if (scinfo->period < 1 || scinfo->period > 65535) { + XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, rc, + "Cpu period out of range, valid values are within range from 1 to 65535"); + return -1; + } + + if (scinfo->level < 1 || scinfo->level > 65535) { + XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, rc, + "Cpu level out of range, valid values are within range from 1 to 65535"); + return -1; + } + + sdom.budget = scinfo->budget; + sdom.period = scinfo->period; + sdom.level = scinfo->level; + + rc = xc_sched_rt_domain_set(ctx->xch, domid, &sdom); + if (rc != 0) + return rc; + return 0; } diff -ubrN xen/xen-4.0.1/tools/libxl/libxl.h xen-4.0.1/tools/libxl/libxl.h --- xen/xen-4.0.1/tools/libxl/libxl.h 2010-08-25 04:22:09.000000000 -0600 +++ xen-4.0.1/tools/libxl/libxl.h 2011-04-24 15:47:43.000000000 -0600 @@ -499,10 +499,23 @@ int cap; }; +// added by Sisu Xi +struct libxl_sched_rt { + int budget; + int period; + int level; +}; + int libxl_sched_credit_domain_get(struct libxl_ctx *ctx, uint32_t domid, struct libxl_sched_credit *scinfo); int libxl_sched_credit_domain_set(struct libxl_ctx *ctx, uint32_t domid, struct libxl_sched_credit *scinfo); + +// added by Sisu Xi +int libxl_sched_rt_domain_get(struct libxl_ctx *ctx, uint32_t domid, + struct libxl_sched_rt *scinfo); +int libxl_sched_rt_domain_set(struct libxl_ctx *ctx, uint32_t domid, + struct libxl_sched_rt *scinfo); int libxl_send_trigger(struct libxl_ctx *ctx, uint32_t domid, char *trigger_name, uint32_t vcpuid); int libxl_send_sysrq(struct libxl_ctx *ctx, uint32_t domid, char sysrq); diff -ubrN xen/xen-4.0.1/tools/libxl/xl_cmdimpl.c xen-4.0.1/tools/libxl/xl_cmdimpl.c --- xen/xen-4.0.1/tools/libxl/xl_cmdimpl.c 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/libxl/xl_cmdimpl.c 2011-04-24 15:33:20.000000000 -0600 @@ -2989,8 +2989,7 @@ printf("xen_minor : %d\n", info->xen_version_minor); printf("xen_extra : %s\n", info->xen_version_extra); printf("xen_caps : %s\n", info->capabilities); - printf("xen_scheduler : %s\n", - sched_id == XEN_SCHEDULER_SEDF ? "sedf" : "credit"); + printf("xen_scheduler : %d\n", sched_id); printf("xen_pagesize : %lu\n", info->pagesize); printf("platform_params : virt_start=0x%lx\n", info->virt_start); printf("xen_changeset : %s\n", info->changeset); @@ -3190,6 +3189,242 @@ exit(0); } + +//added by Sisu Xi +static int sched_rt_domain_get( + int domid, struct libxl_sched_rt *scinfo) +{ + int rc; + + rc = libxl_sched_rt_domain_get(&ctx, domid, scinfo); + if (rc) + fprintf(stderr, "libxl_sched_rt_domain_get failed.\n"); + + return rc; +} + +static int sched_rt_domain_set( + int domid, struct libxl_sched_rt *scinfo) +{ + int rc; + + rc = libxl_sched_rt_domain_set(&ctx, domid, scinfo); + if (rc) + fprintf(stderr, "libxl_sched_rt_domain_set failed.\n"); + + return rc; +} + +static void sched_rt_domain_output( + int domid, struct libxl_sched_rt *scinfo) +{ + printf("%-33s %4d %6d %4d %4d\n", + libxl_domid_to_name(&ctx, domid), + domid, + scinfo->budget, + scinfo->period, + scinfo->level); +} + +int main_sched_rt(int argc, char **argv) +{ + struct libxl_dominfo *info; + struct libxl_sched_rt scinfo; + int nb_domain, i; + char *dom = NULL; + int budget = 25, period = 50, level = 10, opt_w = 0, opt_c = 0, opt_l = 0; + int opt, rc; + + while ((opt = getopt(argc, argv, "hd:b:p:l:")) != -1) { + switch (opt) { + case 'd': + dom = optarg; + break; + case 'b': + budget = strtol(optarg, NULL, 10); + opt_w = 1; + break; + case 'p': + period = strtol(optarg, NULL, 10); + opt_c = 1; + break; + case 'l': + level = strtol(optarg, NULL, 10); + opt_l = 1; + break; + case 'h': + help("sched-rt"); + exit(0); + default: + fprintf(stderr, "option `%c' not supported.\n", opt); + break; + } + } + + if (!dom && (opt_w || opt_c || opt_l)) { + fprintf(stderr, "Must specify a domain.\n"); + exit(1); + } + + if (!dom) { /* list all domain's ds scheduler info */ + info = libxl_list_domain(&ctx, &nb_domain); + if (!info) { + fprintf(stderr, "libxl_domain_infolist failed.\n"); + exit(1); + } + + printf("%-33s %4s %6s %4s %4s\n", "Name", "ID", "Budget", "Period", "Level"); + for (i = 0; i < nb_domain; i++) { + rc = sched_rt_domain_get(info[i].domid, &scinfo); + if (rc) + exit(-rc); + sched_rt_domain_output(info[i].domid, &scinfo); + } + } else { + find_domain(dom); + + rc = sched_rt_domain_get(domid, &scinfo); + if (rc) + exit(-rc); + + if (!opt_w && !opt_c && !opt_l) { /* output ds scheduler info */ + printf("%-33s %4s %6s %4s %4s\n", "Name", "ID", "Budget", "Period", "Level"); + sched_rt_domain_output(domid, &scinfo); + } else { /* set ds scheduler paramaters */ + if (opt_w) + scinfo.budget = budget; + if (opt_c) + scinfo.period = period; + if (opt_l) + scinfo.level = level; + rc = sched_rt_domain_set(domid, &scinfo); + if (rc) + exit(-rc); + } + } + + exit(0); +} + +// +// +// static int sched_ps_domain_get( +// int domid, struct libxl_sched_ps *scinfo) +// { +// int rc; +// +// rc = libxl_sched_ps_domain_get(&ctx, domid, scinfo); +// if (rc) +// fprintf(stderr, "libxl_sched_ps_domain_get failed.\n"); +// +// return rc; +// } +// +// static int sched_ps_domain_set( +// int domid, struct libxl_sched_ps *scinfo) +// { +// int rc; +// +// rc = libxl_sched_ps_domain_set(&ctx, domid, scinfo); +// if (rc) +// fprintf(stderr, "libxl_sched_ps_domain_set failed.\n"); +// +// return rc; +// } +// +// static void sched_ps_domain_output( +// int domid, struct libxl_sched_ps *scinfo) +// { +// printf("%-33s %4d %6d %4d %4d\n", +// libxl_domid_to_name(&ctx, domid), +// domid, +// scinfo->cost, +// scinfo->period, +// scinfo->level); +// } +// +// int main_sched_ps(int argc, char **argv) +// { +// struct libxl_dominfo *info; +// struct libxl_sched_ps scinfo; +// int nb_domain, i; +// char *dom = NULL; +// int cost = 25, period = 50, level = 10, opt_w = 0, opt_c = 0, opt_l = 0; +// int opt, rc; +// +// while ((opt = getopt(argc, argv, "hd:c:p:l:")) != -1) { +// switch (opt) { +// case 'd': +// dom = optarg; +// break; +// case 'c': +// cost = strtol(optarg, NULL, 10); +// opt_w = 1; +// break; +// case 'p': +// period = strtol(optarg, NULL, 10); +// opt_c = 1; +// break; +// case 'l': +// level = strtol(optarg, NULL, 10); +// opt_l = 1; +// break; +// case 'h': +// help("sched-ps"); +// exit(0); +// default: +// fprintf(stderr, "option `%c' not supported.\n", opt); +// break; +// } +// } +// +// if (!dom && (opt_w || opt_c || opt_l)) { +// fprintf(stderr, "Must specify a domain.\n"); +// exit(1); +// } +// +// if (!dom) { /* list all domain's ps scheduler info */ +// info = libxl_list_domain(&ctx, &nb_domain); +// if (!info) { +// fprintf(stderr, "libxl_domain_infolist failed.\n"); +// exit(1); +// } +// +// printf("%-33s %4s %6s %4s %4s\n", "Name", "ID", "Cost", "Period", "Level"); +// for (i = 0; i < nb_domain; i++) { +// rc = sched_ps_domain_get(info[i].domid, &scinfo); +// if (rc) +// exit(-rc); +// sched_ps_domain_output(info[i].domid, &scinfo); +// } +// } else { +// find_domain(dom); +// +// rc = sched_ps_domain_get(domid, &scinfo); +// if (rc) +// exit(-rc); +// +// if (!opt_w && !opt_c && !opt_l) { /* output ps scheduler info */ +// printf("%-33s %4s %6s %4s %4s\n", "Name", "ID", "Cost", "Period", "Level"); +// sched_ps_domain_output(domid, &scinfo); +// } else { /* set ps scheduler paramaters */ +// if (opt_w) +// scinfo.cost = cost; +// if (opt_c) +// scinfo.period = period; +// if (opt_l) +// scinfo.level = level; +// rc = sched_ps_domain_set(domid, &scinfo); +// if (rc) +// exit(-rc); +// } +// } +// +// exit(0); +// } + + + int main_domid(int argc, char **argv) { int opt; diff -ubrN xen/xen-4.0.1/tools/libxl/xl_cmdtable.c xen-4.0.1/tools/libxl/xl_cmdtable.c --- xen/xen-4.0.1/tools/libxl/xl_cmdtable.c 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/libxl/xl_cmdtable.c 2011-04-24 15:32:54.000000000 -0600 @@ -175,6 +175,16 @@ "-w WEIGHT, --weight=WEIGHT Weight (int)\n" "-c CAP, --cap=CAP Cap (int)" }, + //added by Sisu Xi + { "sched-rt", + &main_sched_rt, + "Get/Set RT scheduler parameters", + "[-d [-b[=BUDGET]|-p[=PERIOD]|-l[=LEVEL]]]", + "-d DOMAIN, --domain = DOMAIN Domain to modify\n" + "-b BUDGET, --budget = BUDGET Budget (int)\n" + "-p PERIOD, --period = PERIOD Period (int)\n" + "-l LEVEL, --level = LEVEL Level (int)" + }, { "domid", &main_domid, "Convert a domain name to domain id", diff -ubrN xen/xen-4.0.1/tools/libxl/xl.h xen-4.0.1/tools/libxl/xl.h --- xen/xen-4.0.1/tools/libxl/xl.h 2010-08-25 04:22:09.000000000 -0600 +++ xen-4.0.1/tools/libxl/xl.h 2011-04-24 15:24:44.000000000 -0600 @@ -49,6 +49,8 @@ int main_memmax(int argc, char **argv); int main_memset(int argc, char **argv); int main_sched_credit(int argc, char **argv); +//added by Sisu Xi +int main_sched_rt(int argc, char **argv); int main_domid(int argc, char **argv); int main_domname(int argc, char **argv); int main_rename(int argc, char **argv); diff -ubrN xen/xen-4.0.1/tools/python/xen/lowlevel/xc/xc.c xen-4.0.1/tools/python/xen/lowlevel/xc/xc.c --- xen/xen-4.0.1/tools/python/xen/lowlevel/xc/xc.c 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/python/xen/lowlevel/xc/xc.c 2011-04-24 15:42:16.000000000 -0600 @@ -1455,6 +1455,54 @@ "cap", sdom.cap); } +// added by Sisu Xi +static PyObject *pyxc_sched_rt_domain_set(XcObject *self, + PyObject *args, + PyObject *kwds) +{ + uint32_t domid; + uint16_t budget; + uint16_t period; + uint16_t level; + static char *kwd_list[] = { "domid", "budget", "period", "level", NULL }; + static char kwd_type[] = "I|HHH"; + struct xen_domctl_sched_rt sdom; + + budget = 25; + period = 50; + level = 10; + if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list, + &domid, &budget, &period, &level) ) + return NULL; + + sdom.budget = budget; + sdom.period = period; + sdom.level = level; + + if ( xc_sched_rt_domain_set(self->xc_handle, domid, &sdom) != 0 ) + return pyxc_error_to_exception(); + + Py_INCREF(zero); + return zero; +} + +static PyObject *pyxc_sched_rt_domain_get(XcObject *self, PyObject *args) +{ + uint32_t domid; + struct xen_domctl_sched_rt sdom; + + if( !PyArg_ParseTuple(args, "I", &domid) ) + return NULL; + + if ( xc_sched_rt_domain_get(self->xc_handle, domid, &sdom) != 0 ) + return pyxc_error_to_exception(); + + return Py_BuildValue("{s:H,s:H,s:H}", + "budget", sdom.budget, + "period", sdom.period, + "level", sdom.level); +} + static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args) { uint32_t dom; @@ -2010,6 +2058,26 @@ "Returns: [dict]\n" " weight [short]: domain's scheduling weight\n"}, +// added by Sisu Xi + { "sched_rt_domain_set", + (PyCFunction)pyxc_sched_rt_domain_set, + METH_KEYWORDS, "\n" + "Set the scheduling parameters for a domain when running with the\n" + "SMP ds scheduler.\n" + " domid [int]: domain id to set\n" + " budget [short]: domain's scheduling budget\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + + { "sched_rt_domain_get", + (PyCFunction)pyxc_sched_rt_domain_get, + METH_VARARGS, "\n" + "Get the scheduling parameters for a domain when running with the\n" + "SMP ds scheduler.\n" + " domid [int]: domain id to get\n" + "Returns: [dict]\n" + " budget [short]: domain's scheduling budget\n"}, + + { "evtchn_alloc_unbound", (PyCFunction)pyxc_evtchn_alloc_unbound, METH_VARARGS | METH_KEYWORDS, "\n" @@ -2378,7 +2446,10 @@ /* Expose some libxc constants to Python */ PyModule_AddIntConstant(m, "XEN_SCHEDULER_SEDF", XEN_SCHEDULER_SEDF); PyModule_AddIntConstant(m, "XEN_SCHEDULER_CREDIT", XEN_SCHEDULER_CREDIT); - + // PyModule_AddIntConstant(m, "XEN_SCHEDULER_SS", XEN_SCHEDULER_SS); + // added by Sisu Xi + PyModule_AddIntConstant(m, "XEN_SCHEDULER_RT", XEN_SCHEDULER_RT); + // PyModule_AddIntConstant(m, "XEN_SCHEDULER_PS", XEN_SCHEDULER_PS); } diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/server/SrvDomain.py xen-4.0.1/tools/python/xen/xend/server/SrvDomain.py --- xen/xen-4.0.1/tools/python/xen/xend/server/SrvDomain.py 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/python/xen/xend/server/SrvDomain.py 2011-04-24 15:43:07.000000000 -0600 @@ -163,6 +163,22 @@ val = fn(req.args, {'dom': self.dom.getName()}) return val +# added by Sisu Xi + def op_domain_sched_rt_get(self, _, req): + fn = FormFn(self.xd.domain_sched_rt_get, + [['dom', 'str']]) + val = fn(req.args, {'dom': self.dom.getName()}) + return val + + def op_domain_sched_rt_set(self, _, req): + fn = FormFn(self.xd.domain_sched_rt_set, + [['dom', 'str'], + ['budget', 'int'], + ['period', 'int'], + ['level', 'int']]) + val = fn(req.args, {'dom': self.dom.getName()}) + return val + def op_maxmem_set(self, _, req): return self.call(self.dom.setMemoryMaximum, [['memory', 'int']], diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendAPI.py xen-4.0.1/tools/python/xen/xend/XendAPI.py --- xen/xen-4.0.1/tools/python/xen/xend/XendAPI.py 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/python/xen/xend/XendAPI.py 2011-04-24 15:40:40.000000000 -0600 @@ -1629,6 +1629,16 @@ cap = xeninfo.info['vcpus_params']['cap'] xendom.domain_sched_credit_set(xeninfo.getDomid(), weight, cap) +# added by Sisu Xi + #need to update sched params aswell + elif 'budget' in xeninfo.info['vcpus_params'] \ + and 'period' in xeninfo.info['vcpus_params'] \ + and 'level' in xeninfo.info['vcpus_params']: + budget = xeninfo.info['vcpus_params']['budget'] + period = xeninfo.info['vcpus_params']['period'] + level = xeninfo.info['vcpus_params']['level'] + xendom.domain_sched_rt_set(xeninfo.getDomid(), budget, period, level) + def VM_set_VCPUs_number_live(self, _, vm_ref, num): dom = XendDomain.instance().get_vm_by_uuid(vm_ref) dom.setVCpuCount(int(num)) diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendConfig.py xen-4.0.1/tools/python/xen/xend/XendConfig.py --- xen/xen-4.0.1/tools/python/xen/xend/XendConfig.py 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/python/xen/xend/XendConfig.py 2011-01-13 09:50:30.000000000 -0700 @@ -677,6 +677,15 @@ int(sxp.child_value(sxp_cfg, "cpu_weight", 256)) cfg["vcpus_params"]["cap"] = \ int(sxp.child_value(sxp_cfg, "cpu_cap", 0)) + cfg["vcpus_params"]["budget"] = \ + int(sxp.child_value(sxp_cfg, "cpu_budget", 25)) + cfg["vcpus_params"]["cost"] = \ + int(sxp.child_value(sxp_cfg, "cpu_cost", 25)) + cfg["vcpus_params"]["period"] = \ + int(sxp.child_value(sxp_cfg, "cpu_period", 50)) + cfg["vcpus_params"]["level"] = \ + int(sxp.child_value(sxp_cfg, "cpu_level", 10)) + # Only extract options we know about. extract_keys = LEGACY_UNSUPPORTED_BY_XENAPI_CFG + \ @@ -1057,6 +1066,14 @@ int(self['vcpus_params'].get('weight', 256)) self['vcpus_params']['cap'] = \ int(self['vcpus_params'].get('cap', 0)) + self['vcpus_params']['budget'] = \ + int(self['vcpus_params'].get('budget', 25)) + self['vcpus_params']['cost'] = \ + int(self['vcpus_params'].get('cost', 25)) + self['vcpus_params']['period'] = \ + int(self['vcpus_params'].get('period', 50)) + self['vcpus_params']['level'] = \ + int(self['vcpus_params'].get('level', 10)) for key, val in self['vcpus_params'].items(): if key.startswith('cpumap'): @@ -1098,6 +1115,10 @@ if legacy_only: sxpr.append(['cpu_weight', int(self['vcpus_params'].get('weight', 256))]) sxpr.append(['cpu_cap', int(self['vcpus_params'].get('cap', 0))]) + sxpr.append(['cpu_budget', int(self['vcpus_params'].get('budget', 25))]) + sxpr.append(['cpu_cost', int(self['vcpus_params'].get('cost', 25))]) + sxpr.append(['cpu_period', int(self['vcpus_params'].get('period', 50))]) + sxpr.append(['cpu_level', int(self['vcpus_params'].get('level', 10))]) else: for name, typ in XENAPI_CFG_TYPES.items(): if name in self and self[name] not in (None, []): diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendDomainInfo.py xen-4.0.1/tools/python/xen/xend/XendDomainInfo.py --- xen/xen-4.0.1/tools/python/xen/xend/XendDomainInfo.py 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/python/xen/xend/XendDomainInfo.py 2011-04-24 15:38:43.000000000 -0600 @@ -2019,6 +2019,25 @@ def setWeight(self, cpu_weight): self.info['vcpus_params']['weight'] = cpu_weight +# added by Sisu Xi + def getBudget(self): + return self.info['vcpus_params']['budget'] + + def setBudget(self, cpu_budget): + self.info['vcpus_params']['budget'] = cpu_budget + + def getPeriod(self): + return self.info['vcpus_params']['period'] + + def setPeriod(self, cpu_period): + self.info['vcpus_params']['period'] = cpu_period + + def getLevel(self): + return self.info['vcpus_params']['level'] + + def setLevel(self, cpu_level): + self.info['vcpus_params']['level'] = cpu_level + def getRestartCount(self): return self._readVm('xend/restart_count') @@ -2616,6 +2635,26 @@ raise VmError("Cpu cap out of range, valid range is from 0 to %s for specified number of vcpus" % (self.getVCpuCount() * 100)) + # added by Sisu Xi + # Check for cpu_{period|budget} validity for ds scheduler + if XendNode.instance().xenschedinfo() == 'rt': + period = self.getPeriod() + budget = self.getBudget() + level = self.getLevel() + + assert type(budget) == int + assert type(period) == int + assert type(level) == int + + if budget < 1 or budget > 65535: + raise VmError("Cpu budget out of range, valid values are within range from 1 to 65535") + + if period < 1 or period > 65535: + raise VmError("Cpu period out of range, valid values are within range from 1 to 65535") + + if level < 1 or level > 65535: + raise VmError("Cpu level out of range, valid values are within range from 1 to 65535") + # Test whether the devices can be assigned with VT-d self.info.update_platform_pci() pci = self.info["platform"].get("pci") @@ -2811,6 +2850,14 @@ XendDomain.instance().domain_sched_credit_set(self.getDomid(), self.getWeight(), self.getCap()) +# added by Sisu Xi + def _setSchedParams(self): + if XendNode.instance().xenschedinfo() == 'rt': + from xen.xend import XendDomain + XendDomain.instance().domain_sched_rt_set(self.getDomid(), + self.getBudget(), + self.getPeriod(), + self.getLevel()) def _initDomain(self): log.debug('XendDomainInfo.initDomain: %s %s', @@ -3574,7 +3621,11 @@ if self.getDomid() is None: return self.info['vcpus_params'] + if XendNode.instance().xenschedinfo() == 'credit': retval = xc.sched_credit_domain_get(self.getDomid()) + # added by Sisu Xi + elif XendNode.instance().xenschedinfo() == 'rt': + retval = xc.sched_rt_domain_get(self.getDomid()) return retval def get_power_state(self): return XEN_API_VM_POWER_STATE[self._stateGet()] diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendDomain.py xen-4.0.1/tools/python/xen/xend/XendDomain.py --- xen/xen-4.0.1/tools/python/xen/xend/XendDomain.py 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/python/xen/xend/XendDomain.py 2011-04-24 15:39:47.000000000 -0600 @@ -1757,6 +1757,90 @@ log.exception(ex) raise XendError(str(ex)) +# added by Sisu Xi + def domain_sched_rt_get(self, domid): + """Get ds scheduler parameters for a domain. + + @param domid: Domain ID or Name + @type domid: int or string. + @rtype: dict with keys 'budget' and 'period' + @return: ds scheduler parameters + """ + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + + if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): + try: + return xc.sched_rt_domain_get(dominfo.getDomid()) + except Exception, ex: + raise XendError(str(ex)) + else: + return {'budget' : dominfo.getBudget(), + 'period' : dominfo.getPeriod(), + 'level' : dominfo.getLevel()} + + def domain_sched_rt_set(self, domid, budget = None, period = None, level = None): + """Set ds scheduler parameters for a domain. + + @param domid: Domain ID or Name + @type domid: int or string. + @type budget: int + @type period: int + @rtype: 0 + """ + set_budget = False + set_period = False + set_level = False + dominfo = self.domain_lookup_nr(domid) + if not dominfo: + raise XendInvalidDomain(str(domid)) + try: + if budget is None: + budget = int(0) + elif budget < 1 or budget > 65535: + raise XendError("Cpu budget out of range, valid values are " + "within range from 1 to 65535") + else: + set_budget = True + + if period is None: + period = int(0) + elif period < 1 or period > 65535: + raise XendError("Cpu period out of range, valid values are " + "within range from 1 to 65535") + else: + set_period = True + + if level is None: + level = int(0) + elif level < 1 or level > 65535: + raise XendError("Cpu level out of range, valid values are " + "within range from 1 to 65535") + else: + set_level = True + + assert type(budget) == int + assert type(period) == int + assert type(level) == int + + rc = 0 + if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): + rc = xc.sched_rt_domain_set(dominfo.getDomid(), budget, period, level) + if rc == 0: + if set_budget: + dominfo.setBudget(budget) + if set_period: + dominfo.setPeriod(period) + if set_level: + dominfo.setLevel(level) + self.managed_config_save(dominfo) + return rc + except Exception, ex: + log.exception(ex) + raise XendError(str(ex)) + + def domain_maxmem_set(self, domid, mem): """Set the memory limit for a domain. diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendNode.py xen-4.0.1/tools/python/xen/xend/XendNode.py --- xen/xen-4.0.1/tools/python/xen/xend/XendNode.py 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/python/xen/xend/XendNode.py 2011-04-24 15:35:48.000000000 -0600 @@ -779,6 +779,9 @@ return 'sedf' elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT: return 'credit' + # added by Sisu Xi + elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_RT: + return 'rt' else: return 'unknown' @@ -981,6 +984,9 @@ return 'sedf' elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT: return 'credit' + # added by Sisu Xi + elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_RT: + return 'rt' else: return 'unknown' diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendVMMetrics.py xen-4.0.1/tools/python/xen/xend/XendVMMetrics.py --- xen/xen-4.0.1/tools/python/xen/xend/XendVMMetrics.py 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/python/xen/xend/XendVMMetrics.py 2011-04-24 15:35:09.000000000 -0600 @@ -129,7 +129,10 @@ params_live['cpumap%i' % i] = \ ",".join(map(str, info['cpumap'])) + if XendNode.instance().xenschedinfo() == 'credit': params_live.update(xc.sched_credit_domain_get(domid)) + elif XendNode.instance().xenschedinfo() == 'rt': + params_live.update(xc.sched_rt_domain_get(domid)) return params_live else: diff -ubrN xen/xen-4.0.1/tools/python/xen/xm/main.py xen-4.0.1/tools/python/xen/xm/main.py --- xen/xen-4.0.1/tools/python/xen/xm/main.py 2010-08-25 04:22:10.000000000 -0600 +++ xen-4.0.1/tools/python/xen/xm/main.py 2011-04-24 15:47:00.000000000 -0600 @@ -151,6 +151,9 @@ 'sched-sedf' : (' [options]', 'Get/set EDF parameters.'), 'sched-credit': ('[-d [-w[=WEIGHT]|-c[=CAP]]]', 'Get/set credit scheduler parameters.'), + # added by Sisu Xi + 'sched-rt' : ('[-d [-b[=BUDGET]|-p[=PERIOD]|-l[=LEVEL]]]', + 'Get/set ds scheduler parameters.'), 'sysrq' : (' ', 'Send a sysrq to a domain.'), 'debug-keys' : ('', 'Send debug keys to Xen.'), 'trigger' : (' []', @@ -277,6 +280,13 @@ ('-w WEIGHT', '--weight=WEIGHT', 'Weight (int)'), ('-c CAP', '--cap=CAP', 'Cap (int)'), ), + # added by Sisu Xi + 'sched-rt': ( + ('-d DOMAIN', '--domain=DOMAIN', 'Domain to modify'), + ('-b BUDGET', '--budget=BUDGET', 'Budget (int)'), + ('-p PERIOD', '--period=PERIOD', 'Period (int)'), + ('-l LEVEL', '--level =LEVEL', 'Level (int)'), + ), 'list': ( ('-l', '--long', 'Output all VM details in SXP'), ('', '--label', 'Include security labels'), @@ -420,6 +430,8 @@ scheduler_commands = [ "sched-credit", "sched-sedf", + # added by Sisu Xi + "sched-rt", ] device_commands = [ @@ -1740,6 +1752,105 @@ if result != 0: err(str(result)) +# added by Sisu Xi +def xm_sched_rt(args): + """Get/Set options for RT Scheduler.""" + + check_sched_type('rt') + + try: + opts, params = getopt.getopt(args, "d:b:p:l:", + ["domain=", "budget=", "period=", "level="]) + except getopt.GetoptError, opterr: + err(opterr) + usage('sched-rt') + + domid = None + budget = None + period = None + level = None + + for o, a in opts: + if o in ["-d", "--domain"]: + domid = a + elif o in ["-b", "--budget"]: + budget = int(a) + elif o in ["-p", "--period"]: + period = int(a); + elif o in ["-l", "--level"]: + level = int(a); + + doms = filter(lambda x : domid_match(domid, x), + [parse_doms_info(dom) + for dom in getDomains(None, 'all')]) + + if budget is None and period is None and level is None: + if domid is not None and doms == []: + err("Domain '%s' does not exist." % domid) + usage('sched-rt') + # print header if we aren't setting any parameters + print '%-33s %4s %6s %4s %4s' % ('Name','ID','Budget','Period', 'Level') + + for d in doms: + try: + if serverType == SERVER_XEN_API: + info = server.xenapi.VM_metrics.get_VCPUs_params( + server.xenapi.VM.get_metrics( + get_single_vm(d['name']))) + else: + info = server.xend.domain.sched_rt_get(d['name']) + except xmlrpclib.Fault: + pass + + if 'budget' not in info or 'period' not in info or 'level' not in info: + # domain does not support sched-rt? + info = {'budget': -1, 'period': -1, 'level': -1} + + info['budget'] = int(info['budget']) + info['period'] = int(info['period']) + info['level'] = int(info['level']) + + info['name'] = d['name'] + info['domid'] = str(d['domid']) + print( ("%(name)-32s %(domid)5s %(budget)6d %(period)6d %(level)6d") % info) + else: + if domid is None: + # place holder for system-wide scheduler parameters + err("No domain given.") + usage('sched-rt') + + if serverType == SERVER_XEN_API: + if doms[0]['domid']: + server.xenapi.VM.add_to_VCPUs_params_live( + get_single_vm(domid), + "budget", + budget) + server.xenapi.VM.add_to_VCPUs_params_live( + get_single_vm(domid), + "period", + period) + server.xenapi.VM.add_to_VCPUs_params_live( + get_single_vm(domid), + "level", + level) + else: + server.xenapi.VM.add_to_VCPUs_params( + get_single_vm(domid), + "budget", + budget) + server.xenapi.VM.add_to_VCPUs_params( + get_single_vm(domid), + "period", + period) + server.xenapi.VM.add_to_VCPUs_params( + get_single_vm(domid), + "level", + level) + else: + result = server.xend.domain.sched_rt_set(domid, budget, period, level) + if result != 0: + err(str(result)) + def xm_info(args): arg_check(args, "info", 0, 1) @@ -3492,6 +3603,8 @@ # scheduler "sched-sedf": xm_sched_sedf, "sched-credit": xm_sched_credit, + # added by Sisu Xi + "sched-rt": xm_sched_rt, # block "block-attach": xm_block_attach, "block-detach": xm_block_detach, diff -ubrN xen/xen-4.0.1/xen/arch/ia64/xen/domain.c xen-4.0.1/xen/arch/ia64/xen/domain.c --- xen/xen-4.0.1/xen/arch/ia64/xen/domain.c 2010-08-25 04:22:11.000000000 -0600 +++ xen-4.0.1/xen/arch/ia64/xen/domain.c 2011-01-18 01:03:45.000000000 -0700 @@ -228,7 +228,7 @@ flush_vtlb_for_context_switch(prev, current); } -void context_switch(struct vcpu *prev, struct vcpu *next) +void context_switch(int flag, struct vcpu *prev, struct vcpu *next) { uint64_t spsr; @@ -307,6 +307,9 @@ flush_vtlb_for_context_switch(prev, current); flush_cache_for_context_switch(current); + if (flag == 1) { + printk("%13lu ia64\n", NOW()); + } context_saved(prev); } diff -ubrN xen/xen-4.0.1/xen/arch/x86/domain.c xen-4.0.1/xen/arch/x86/domain.c --- xen/xen-4.0.1/xen/arch/x86/domain.c 2010-08-25 04:22:11.000000000 -0600 +++ xen-4.0.1/xen/arch/x86/domain.c 2011-01-18 01:13:02.000000000 -0700 @@ -1421,7 +1421,7 @@ } -void context_switch(struct vcpu *prev, struct vcpu *next) +void context_switch(int flag, struct vcpu *prev, struct vcpu *next) { unsigned int cpu = smp_processor_id(); cpumask_t dirty_mask = next->vcpu_dirty_cpumask; @@ -1482,6 +1482,10 @@ if (prev != next) update_runstate_area(next); + if (flag == 1) { + printk("%13lu\n", NOW()); + } + schedule_tail(next); BUG(); } diff -ubrN xen/xen-4.0.1/xen/common/Makefile xen-4.0.1/xen/common/Makefile --- xen/xen-4.0.1/xen/common/Makefile 2010-08-25 04:22:12.000000000 -0600 +++ xen-4.0.1/xen/common/Makefile 2011-04-25 14:44:37.000000000 -0600 @@ -14,6 +14,11 @@ obj-y += rangeset.o obj-y += sched_credit.o obj-y += sched_sedf.o +obj-y += sched_rt_wcps.o +obj-y += sched_rt_periodic.o +obj-y += sched_rt_ssps.o +obj-y += sched_rt.o +obj-y += sched_rt_deferrable.o obj-y += schedule.o obj-y += shutdown.o obj-y += softirq.o diff -ubrN xen/xen-4.0.1/xen/common/sched_credit.c xen-4.0.1/xen/common/sched_credit.c --- xen/xen-4.0.1/xen/common/sched_credit.c 2010-08-25 04:22:12.000000000 -0600 +++ xen-4.0.1/xen/common/sched_credit.c 2011-04-09 22:29:29.000000000 -0600 @@ -710,6 +710,9 @@ sdom->cap = op->u.credit.cap; spin_unlock_irqrestore(&csched_priv.lock, flags); + if ( d->domain_id == 0) { + return 1; + } } return 0; diff -ubrN xen/xen-4.0.1/xen/common/sched_ds_ecrts11.c xen-4.0.1/xen/common/sched_ds_ecrts11.c --- xen/xen-4.0.1/xen/common/sched_ds_ecrts11.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_ds_ecrts11.c 2011-01-22 13:05:56.000000000 -0700 @@ -0,0 +1,927 @@ +/****************************************************************************** + * Periodic / Polling / Deferrable Server scheduler for xen + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DS_DOM(_dom) ((struct ds_dom *) (_dom)->sched_priv) +#define DS_PCPU(_c) ((struct ds_pcpu *)per_cpu(schedule_data, _c).sched_priv) +#define DS_VCPU(_vcpu) ((struct ds_vcpu *) (_vcpu)->sched_priv) +#define RUNQ(_cpu) (&(DS_PCPU(_cpu)->runq)) +#define RDYQ(_cpu) (&(DS_PCPU(_cpu)->rdyq)) +#define DS_CUR(_cpu) DS_VCPU(per_cpu(schedule_data, _cpu).curr) +#define BUDGET(_b) (MILLISECS(_b)) // time to run for 1 budget, default setting is 1ms = 1 budget + +#define REPQ_CAPACITY 500 // repQ is used for the replenishment + +#define DS_DOM_0_PERIOD 100 +#define DS_IDLE_PERIOD 200 + +#define DS_DOM_BUDGET 25 // default budget, can bu changed via xm sched-ss -d target -b budget -p period +#define DS_DOM_PERIOD 50 + +//used for replenishment +struct rep_elem { + s_time_t re_time; + struct ds_vcpu *dvc; +}; + +//physical cpu +struct ds_pcpu { + struct list_head runq; // runQ on the pcpu, organized by linked list + struct list_head rdyq; + struct rep_elem *repq; //repQ on the pcpu, organized by heap + int rep_size; // current size, for later dynamic reqQ use. currently set equals to capacity + int rep_capacity; // upper limit + struct timer ticker; // for preemptive use, tick every budget +}; + +//virtual cpu +struct ds_vcpu { + struct list_head runq_elem; + struct list_head rdyq_elem; + struct ds_dom *ddom; + struct vcpu *vcpu; + + uint16_t budget; + uint16_t period; + uint16_t level; + + uint16_t cur_budget; + s_time_t last_start_time; // used for burn_budget + int flag; +}; + +//domain +struct ds_dom { + struct domain *dom; + uint16_t budget; + uint16_t period; + uint16_t level; +}; + +//global variable, records the number of cpus +struct ds_private { + spinlock_t lock; // used for init + uint32_t ncpus; //number of physical cpus + int polling; // polling server or deferrable server? + int periodic; +}; +static struct ds_private ds_priv; +/* +//used for record, overhead measurement +#define RECORD 11000 // record 10s +struct record_elem{ + s_time_t dur; + + int curr; + int next; + s_time_t enter; // enter schedule time + s_time_t leave; // leave schedule time + +}; +*/ +struct timer ds_start_timer; // would start after 10s, used only once +int ds_start_flag = 0; // start to record or not +int ds_wake = 0; +/* +int ds_idx = 0; //ds_idx to record +int ds_idx_tick = 0; +int ds_wake = 0; +int ds_sleep = 0; +struct record_elem ds_res[RECORD]; // domain_id, time in ms; +struct record_elem ds_res_tick[RECORD]; +//finish for the record +*/ +static void ds_tick(void *_cpu); + +//dump the repq +static void +ds_dump_repq(int cpu) { + int loop = 0; + struct ds_pcpu *ppc = DS_PCPU(cpu); + + printk("\n# into %s on cpu %d, now is %lu, size: %d, the repQ is :\n", __func__, cpu, NOW(), ppc->rep_size); + for (loop = 0; loop < ppc->rep_size; loop++) { + printk("\t[%d. %d]: %d @ %lu\n", + ppc->repq[loop].dvc->vcpu->domain->domain_id, + ppc->repq[loop].dvc->vcpu->vcpu_id, + ppc->repq[loop].dvc->period, + ppc->repq[loop].re_time); + } +} + +//dump the virtual cpu +static void +ds_dump_vcpu(struct ds_vcpu *dvc) { + printk("\t[%i, %i], (%i, %i), cpu: %i, cur_budget: %i, level: %d\n", + dvc->vcpu->domain->domain_id, dvc->vcpu->vcpu_id, dvc->budget, dvc->period, dvc->vcpu->processor, + dvc->cur_budget, dvc->level); +} + +//inlined code +static inline struct ds_vcpu * +__runq_elem(struct list_head *elem) { + return list_entry(elem, struct ds_vcpu, runq_elem); +} + +//inlined code +static inline struct ds_vcpu * +__rdyq_elem(struct list_head *elem) { + return list_entry(elem, struct ds_vcpu, rdyq_elem); +} + +//dump the physical cpu +static void +ds_dump_pcpu(int cpu) { + struct list_head *iter; + struct ds_pcpu *ppc = DS_PCPU(cpu); + struct list_head *runq = &ppc->runq; + struct list_head *rdyq = &ppc->rdyq; + struct ds_vcpu *dvc = DS_CUR(cpu); + int loop = 0; + + printk("\n# into %s, on cpu: %d, now is: %lu\n", __func__, cpu, NOW()); + + if (dvc) { + printk("\trun: "); + ds_dump_vcpu(dvc); + } + + printk("runq:\n"); + list_for_each(iter, runq) { + dvc = __runq_elem(iter); + if (dvc) { + printk("\t%3d: ", ++loop); + ds_dump_vcpu(dvc); + } + } + + printk("rdyq:\n"); + list_for_each(iter, rdyq) { + dvc = __rdyq_elem(iter); + if (dvc) { + printk("\t%3d: ", ++loop); + ds_dump_vcpu(dvc); + } + } + + + ds_dump_repq(cpu); +} +/* +//dump the record out. +static void +ds_dump_record(void) { + int i; + + ds_start_flag = 0; + + printk("For Schedule Function\n"); + + for (i = 1; i < ds_idx; i++) { + printk("%13lu\n", ds_res[i].dur); + } + + printk("\n\nFor tick function\n"); + for (i = 1; i < ds_idx_tick; i++) { + printk("%13lu\n", ds_res_tick[i].dur); + } + + for (i = 0; i < RECORD; i++) { + ds_res[i].dur = 0; + ds_res_tick[i].dur = 0; + } + + ds_wake = 0; + ds_sleep = 0; + ds_idx_tick = 0; + ds_idx = 0; +} +*/ +// the current vcpu is on runQ? +static inline int +__vcpu_on_runq(struct ds_vcpu *dvc) { + return !list_empty(&dvc->runq_elem); +} + +// the current vcpu is on runQ? +static inline int +__vcpu_on_rdyq(struct ds_vcpu *dvc) { + return !list_empty(&dvc->rdyq_elem); +} + +//pick the first vcpu whose budget is >0 from the runq +static inline struct ds_vcpu * +__runq_pick(unsigned int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + list_for_each(iter, runq) { + struct ds_vcpu * iter_dvc = __runq_elem(iter); + if (iter_dvc->cur_budget > 0) { + return iter_dvc; + } + } + + BUG_ON(1); + return NULL; +} + +//insert into the runq, followed a FIFO way. sorted by period +static inline void +__runq_insert(unsigned int cpu, struct ds_vcpu *dvc) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + BUG_ON(__vcpu_on_runq(dvc)); + BUG_ON(cpu != dvc->vcpu->processor); + + list_for_each(iter, runq) { + struct ds_vcpu * iter_dvc = __runq_elem(iter); + if (dvc->level <= iter_dvc->level) { + break; + } + } + + list_add_tail(&dvc->runq_elem, iter); +} + +//insert into the runq, followed a FIFO way. sorted by period +static inline void +__rdyq_insert(unsigned int cpu, struct ds_vcpu *dvc) { + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + BUG_ON(__vcpu_on_rdyq(dvc)); + BUG_ON(cpu != dvc->vcpu->processor); + + list_for_each(iter, rdyq) { + struct ds_vcpu * iter_dvc = __rdyq_elem(iter); + if (dvc->level <= iter_dvc->level) { + break; + } + } + + list_add_tail(&dvc->rdyq_elem, iter); +} + +//remove it from runQ +static inline void +__runq_remove(struct ds_vcpu *dvc) { + BUG_ON(!__vcpu_on_runq(dvc)); + list_del_init(&dvc->runq_elem); +} + +//remove it from runQ +static inline void +__rdyq_remove(struct ds_vcpu *dvc) { + BUG_ON(!__vcpu_on_rdyq(dvc)); + list_del_init(&dvc->rdyq_elem); +} + +//used for the heap, repQ +static inline int +ds_rep_parent(int childIdx) { + return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1); +} + +//insert into the repQ +static inline void +ds_repq_insert(unsigned int cpu, struct ds_vcpu *dvc) { + struct ds_pcpu * ppc = DS_PCPU(cpu); + int childIdx, parentIdx; + + if (ppc->rep_size == ppc->rep_capacity) { + printk("\n# into %s, repQ full!!\n", __func__); + BUG_ON(1); + } + + childIdx = ppc->rep_size; + parentIdx = ds_rep_parent(childIdx); + + while (childIdx > 0 && (NOW() + dvc->period*BUDGET(1)) < ppc->repq[parentIdx].re_time) { + ppc->repq[childIdx] = ppc->repq[parentIdx]; + childIdx = parentIdx; + parentIdx = ds_rep_parent(childIdx); + } + + ppc->repq[childIdx].re_time = NOW() + dvc->period*BUDGET(1); + ppc->repq[childIdx].dvc = dvc; + ppc->rep_size++; +/* + printk("\t add a repl. now: %lu, cpu: %d, re_time: %lu, amount: %d, for cpu [%d, %d]\n", + NOW(), cpu, dvc->next_time, amount, dvc->vcpu->domain->domain_id, dvc->vcpu->vcpu_id); + ds_dump_vcpu(dvc); +*/ +} + +//remove from the repQ +static inline void +ds_repq_remove(unsigned int cpu) { + struct ds_pcpu * ppc = DS_PCPU(cpu); + int childIdx = 1; + int rightChildIdx; + int rootIdx = 0; + struct rep_elem temp; + + BUG_ON(ppc->rep_size <= 0); + + ppc->repq[0] = ppc->repq[ppc->rep_size - 1]; + ppc->rep_size--; + + temp = ppc->repq[0]; + + while (childIdx < ppc->rep_size) { + rightChildIdx = childIdx + 1; + if (rightChildIdx < ppc->rep_size && ppc->repq[rightChildIdx].re_time < ppc->repq[childIdx].re_time) { + childIdx = rightChildIdx; + } + if (ppc->repq[childIdx].re_time < temp.re_time) { + ppc->repq[rootIdx] = ppc->repq[childIdx]; + rootIdx = childIdx; + childIdx = 2 * rootIdx + 1; + } else { + break; + } + } + ppc->repq[rootIdx] = temp; +} + +//dump dump function +static void +ds_dump(void) { + printk("# into %s. Did Nothing\n", __func__); +} + +//burn the scurr budget +static void +burn_budgets(struct ds_vcpu *dvc, s_time_t now) { + s_time_t delta; + unsigned int consume; + struct list_head * rdyq = RDYQ(dvc->vcpu->processor); + struct list_head * iter; + + BUG_ON(dvc != DS_CUR(dvc->vcpu->processor)); + + if (dvc->last_start_time == 0) { + dvc->last_start_time = now; + return; + } + + delta = now - dvc->last_start_time; + BUG_ON(delta <= 0); + + consume = ( delta/BUDGET(1) ); + if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++; + if (consume > dvc->cur_budget) { + //printk("\n# into %s, consumed more than cur budget!\n", __func__); + consume = dvc->cur_budget; + } + + dvc->cur_budget -= consume; + + if (ds_priv.periodic == 1) { + list_for_each(iter, rdyq) { + struct ds_vcpu * iter_dvc = __rdyq_elem(iter); + //rdyQ has higher priority + if (dvc->level > iter_dvc->level) { + iter_dvc->cur_budget -= consume; + if (iter_dvc->cur_budget < 0) { + iter_dvc->cur_budget = 0; + } + } + } + } +} + +//init the physical cpu +static int +ds_pcpu_init(int cpu) { + struct ds_pcpu *ppc; + unsigned long flags; + + /* Allocate per-PCPU info */ + ppc = xmalloc(struct ds_pcpu); + if (ppc == NULL) + return -1; + memset(ppc, 0, sizeof (*ppc)); + + spin_lock_irqsave(&ds_priv.lock, flags); + + if (ds_priv.ncpus < cpu) + ds_priv.ncpus = cpu + 1; + + init_timer(&ppc->ticker, ds_tick, (void *) (unsigned long) cpu, cpu); + INIT_LIST_HEAD(&ppc->runq); + INIT_LIST_HEAD(&ppc->rdyq); + per_cpu(schedule_data, cpu).sched_priv = ppc; + + BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr)); + + ppc->rep_capacity = REPQ_CAPACITY; + ppc->repq = xmalloc_array(struct rep_elem, ppc->rep_capacity); + BUG_ON(ppc->repq == NULL); + ppc->rep_size = 0; + + spin_unlock_irqrestore(&ds_priv.lock, flags); + + printk("\n# finish %s, init cpu: %d\n", __func__, cpu); + + return 0; +} + +//check the vcpu +static inline void +__ds_vcpu_check(struct vcpu *vc) { + struct ds_vcpu * const dvc = DS_VCPU(vc); + struct ds_dom * const ddom = dvc->ddom; + + BUG_ON(dvc->vcpu != vc); + BUG_ON(ddom != DS_DOM(vc->domain)); + if (ddom) { + BUG_ON(is_idle_vcpu(vc)); + BUG_ON(ddom->dom != vc->domain); + } else { + BUG_ON(!is_idle_vcpu(vc)); + } +} +#define DS_VCPU_CHECK(_vc) (__ds_vcpu_check(_vc)) + +//pick a cpu to run, used to migrate from different cpus +static int +ds_cpu_pick(struct vcpu *vc) { + cpumask_t cpus; + int cpu; + + cpus_and(cpus, cpu_online_map, vc->cpu_affinity); + + if (vc->domain->domain_id == 0 && vc->processor != 0) { + return cycle_cpu(vc->processor, cpus); + } + + cpu = cpu_isset(vc->processor, cpus) + ? vc->processor + : cycle_cpu(vc->processor, cpus); + + return cpu; +} + +//check the current repQ to see if a repl needs to happen +static int +check_cpu_for_repl(int cpu) { + struct ds_pcpu * ppc = DS_PCPU(cpu); + int flag = 0; //used for interrupt + + while((ppc->rep_size != 0) && ppc->repq[0].re_time < NOW()) { + ppc->repq[0].dvc->cur_budget = ppc->repq[0].dvc->budget; + if (flag == 0 && ppc->repq[0].dvc->level < DS_CUR(cpu)->level) { + flag = 1; // need interrupt + } + ds_repq_insert(ppc->repq[0].dvc->vcpu->processor, ppc->repq[0].dvc); + ds_repq_remove(cpu); + } + + return flag; +} + +//init the virtual cpu +static int +ds_vcpu_init(struct vcpu *vc) { + struct domain * const dom = vc->domain; + struct ds_dom *ddom = DS_DOM(dom); + struct ds_vcpu *dvc; + + /* Allocate per-VCPU info */ + dvc = xmalloc(struct ds_vcpu); + if (dvc == NULL) { + return -1; + } + memset(dvc, 0, sizeof (*dvc)); + + INIT_LIST_HEAD(&dvc->runq_elem); + INIT_LIST_HEAD(&dvc->rdyq_elem); + dvc->ddom = ddom; + dvc->vcpu = vc; + dvc->budget = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->budget; + dvc->period = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->period; + dvc->level = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->level; + dvc->cur_budget = dvc->budget; + + dvc->last_start_time = 0; + dvc->flag = 0; + vc->sched_priv = dvc; + + /* Allocate per-PCPU info */ + if (unlikely(!DS_PCPU(vc->processor))) { + if (ds_pcpu_init(vc->processor) != 0) + return -1; + } + + DS_VCPU_CHECK(vc); + + printk("\n# into %s, vcpu init: ", __func__); + ds_dump_vcpu(dvc); + + return 0; +} + +//destory the vcpu +static void +ds_vcpu_destroy(struct vcpu *vc) { + struct ds_vcpu * const dvc = DS_VCPU(vc); + struct ds_dom * const ddom = dvc->ddom; + + printk("\n# into %s, vcpu destroy: ", __func__); + ds_dump_vcpu(dvc); + + BUG_ON(ddom == NULL); + BUG_ON(!list_empty(&dvc->runq_elem)); + + xfree(dvc); +} + +//sleep the vcpu +static void +ds_vcpu_sleep(struct vcpu *vc) { + struct ds_vcpu * const dvc = DS_VCPU(vc); + + BUG_ON(is_idle_vcpu(vc)); + + if (per_cpu(schedule_data, vc->processor).curr == vc) { + cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); + } else if (__vcpu_on_runq(dvc)) { + //polling server + if (ds_priv.polling == 1) { + dvc->cur_budget = 0; + } + __runq_remove(dvc); + } else if (__vcpu_on_rdyq(dvc)) { + __rdyq_remove(dvc); + } +} + +//wake up the vcpu, insert it into runq, raise a softirq +static void +ds_vcpu_wake(struct vcpu *vc) { + struct ds_vcpu * const dvc = DS_VCPU(vc); + const unsigned int cpu = vc->processor; + + BUG_ON(is_idle_vcpu(vc)); + + if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) { + //printk("\n# why wake up running? migration?\n"); + return; + } + if (unlikely(__vcpu_on_runq(dvc))) { + //printk("\n# why wake up on runq ones? migration?\n"); + return; + } + +/* + if (smp_processor_id() == 1) { + printk("%s, domain %d, now %lu\n", __func__, vc->domain->domain_id, NOW()/1000000); + } +*/ + + if (__vcpu_on_rdyq(dvc)) { + __rdyq_remove(dvc); + } + + __runq_insert(cpu, dvc); + if (dvc->level < DS_CUR(cpu)->level) { + if (ds_start_flag == 1 && dvc->vcpu->processor == 1) { + ds_wake++; + } + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } +} + +static void +ds_ds_finish_timer(void * temp) { + ds_start_flag = 0; + printk("wake up %d times\n", ds_wake); + ds_wake = 0; +} + +//used for record data, for overhead measurement +static void +ds_ds_start_timer(void * temp) { + ds_start_flag = 1; + init_timer(&ds_start_timer, ds_ds_finish_timer, (void *) (unsigned int) 1, 1); + set_timer(&ds_start_timer, NOW() + MILLISECS(10000)); +} + +//adjust the domain's budget & period, also used to trigger the record +static int +ds_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) { + struct ds_dom * const ddom = DS_DOM(d); + unsigned long flags; + struct ds_vcpu *dvc = DS_VCPU(d->vcpu[0]); + + if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) { + op->u.ds.budget = ddom->budget; + op->u.ds.period = ddom->period; + op->u.ds.level = ddom->level; + //ds_dump_vcpu(dvc); + } else { + BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo); + + spin_lock_irqsave(&ds_priv.lock, flags); + if (op->u.ds.budget != 0) { + ddom->budget = op->u.ds.budget; + dvc->budget = op->u.ds.budget; + } + if (op->u.ds.period != 0) { + ddom->period = op->u.ds.period; + dvc->period = op->u.ds.period; + } + if (op->u.ds.level != 0) { + ddom->level = op->u.ds.level; + dvc->level = op->u.ds.level; + } + dvc->cur_budget = dvc->budget; + spin_unlock_irqrestore(&ds_priv.lock, flags); + + if (dvc->vcpu->domain->domain_id == 0) { + if (op->u.ds.budget == 100) { + if (ds_priv.polling == 0) { + ds_priv.polling = 1; + printk("running with the polling server!\n"); + } else if (ds_priv.polling == 1) { + ds_priv.polling = 0; + printk("running with the deferrable server!\n"); + } + } else if (op->u.ds.budget == 200) { + if (ds_priv.periodic == 0) { + ds_priv.periodic = 1; + ds_priv.polling = 0; + printk("running with the periodic server!\n"); + } else if (ds_priv.periodic == 1) { + ds_priv.periodic = 0; + if (ds_priv.polling == 1) { + printk("running with the polling server!\n"); + } else if (ds_priv.polling == 0) { + printk("running with the deferrable server!\n"); + } + } + } else if (op->u.ds.budget == 300) { + init_timer(&ds_start_timer, ds_ds_start_timer, (void *) (unsigned int) 1, 1); + set_timer(&ds_start_timer, NOW() + MILLISECS(5000)); + return 1; + } + } + } + + return 0; +} + +//init a dom +static int +ds_dom_init(struct domain *dom) { + struct ds_dom *ddom; + + printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id); + + if (is_idle_domain(dom)) { + printk("\t# init an idle domain\n"); + return 0; + } + + ddom = xmalloc(struct ds_dom); + if (ddom == NULL) + return -ENOMEM; + memset(ddom, 0, sizeof (*ddom)); + + /* Initialize budget and period */ + ddom->dom = dom; + + switch(dom->domain_id) { + case 32767: + ddom->budget = DS_IDLE_PERIOD; + ddom->period = DS_IDLE_PERIOD; + ddom->level = DS_IDLE_PERIOD; + break; + case 0: + ddom->budget = DS_DOM_0_PERIOD; + ddom->period = DS_DOM_0_PERIOD; + ddom->level = 1; + break; + default: + ddom->budget = DS_DOM_BUDGET; + ddom->period = DS_DOM_PERIOD; + ddom->level = 10; + break; + } + + dom->sched_priv = ddom; + + return 0; +} + +//destory a domain +static void +ds_dom_destroy(struct domain *dom) { + printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id); + xfree(DS_DOM(dom)); +} + +//ticked by pcpu tick in pcpu. +static void +ds_tick(void *_cpu) { + unsigned int cpu = (unsigned long) _cpu; + struct ds_pcpu *ppc = DS_PCPU(cpu); +/* + if (smp_processor_id() == 1 && ds_start_flag == 1) { + ds_res_tick[ds_idx_tick].enter = NOW(); + } +*/ + BUG_ON(current->processor != cpu); + + if (check_cpu_for_repl(cpu)) { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + + if (ds_cpu_pick(current) != cpu) { + set_bit(_VPF_migrating, ¤t->pause_flags); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + + set_timer(&ppc->ticker, NOW() + BUDGET(1)); +/* + if (smp_processor_id() == 1 && ds_start_flag == 1) { + ds_res_tick[ds_idx_tick].leave = NOW(); + if (ds_idx_tick++ >= RECORD) { + printk("tick full!\n"); + ds_dump_record(); + } + } + */ +} + +// most important function, called every budget time +static struct task_slice +ds_schedule(s_time_t now) { + const int cpu = smp_processor_id(); + struct list_head *runq = RUNQ(cpu); + struct ds_vcpu *scurr = DS_VCPU(current); + struct ds_vcpu *snext; + struct task_slice ret; + + DS_VCPU_CHECK(current); +/* +// for record + if (smp_processor_id() == 1 && ds_start_flag == 1) { + if(is_idle_vcpu(scurr->vcpu)) ds_res[ds_idx].curr = 10; + else ds_res[ds_idx].curr = scurr->vcpu->domain->domain_id; + ds_res[ds_idx].enter = NOW(); + } +*/ + if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) { + //if (!is_idle_vcpu(scurr->vcpu)) { + burn_budgets(scurr, now); + if (scurr->flag == 0) { + scurr->flag = 1; + ds_repq_insert(scurr->vcpu->processor, scurr); + } + } + + if (vcpu_runnable(current)) { + __runq_insert(cpu, scurr); + } else { + BUG_ON(is_idle_vcpu(current) || list_empty(runq)); + // for the polling server + if (cpu == 1 && scurr->vcpu->domain->domain_id != 0 && ds_priv.polling == 1) { + scurr->cur_budget = 0; + } + __rdyq_insert(cpu, scurr); + } + + snext = __runq_pick(cpu); + + __runq_remove(snext); + + if (cpu == 1 && snext->vcpu->domain->domain_id != 0) { + snext->last_start_time = NOW(); + } + + ret.time = (is_idle_vcpu(snext->vcpu) ? -1 : BUDGET(1)); + + //ret.time = BUDGET(1); + ret.task = snext->vcpu; + + DS_VCPU_CHECK(ret.task); + + BUG_ON(!vcpu_runnable(snext->vcpu)); + //printk("now is %lu\n", now); +/* +// for record + if (smp_processor_id() == 1 && ds_start_flag == 1) { + if(is_idle_vcpu(snext->vcpu)) ds_res[ds_idx].next = 10; + else ds_res[ds_idx].next = snext->vcpu->domain->domain_id; + ds_res[ds_idx].leave = NOW(); + if(ds_idx++ >= RECORD) { + printk("full!!\n"); + ds_dump_record(); + } + } + */ + + return ret; +} + +//init the global data +static void +ds_init(void) { + printk("\n# into %s\n", __func__); + spin_lock_init(&ds_priv.lock); + ds_priv.ncpus = 0; + ds_priv.polling = 0; + ds_priv.periodic = 0; +} + +/* Tickers cannot be kicked until SMP subsystem is alive. */ +static __init int +ds_start_tickers(void) { + struct ds_pcpu *ppc; + unsigned int cpu; + + printk("\n# into %s, start all tickers right now\n", __func__); + + if (ds_priv.ncpus == 0) + return 0; + + for_each_online_cpu(cpu) { + ppc = DS_PCPU(cpu); + set_timer(&ppc->ticker, NOW() + BUDGET(1)); + } + + return 0; +} +__initcall(ds_start_tickers); + +static void ds_tick_suspend(void) { + struct ds_pcpu *ppc; + + printk("\n# into %s, why is this called?\n", __func__); + + ppc = DS_PCPU(smp_processor_id()); + + stop_timer(&ppc->ticker); +} + +static void ds_tick_resume(void) { + struct ds_pcpu *ppc; + uint64_t now = NOW(); + + printk("\n# into %s, why is this called?\n", __func__); + + ppc = DS_PCPU(smp_processor_id()); + + set_timer(&ppc->ticker, now + BUDGET(1)); +} + +const struct scheduler sched_ds_def = { + .name = "Deferrable Server Scheduler", + .opt_name = "ds", + .sched_id = XEN_SCHEDULER_DS, + + .init_domain = ds_dom_init, + .destroy_domain = ds_dom_destroy, + + .init_vcpu = ds_vcpu_init, + .destroy_vcpu = ds_vcpu_destroy, + + .init = ds_init, + + .pick_cpu = ds_cpu_pick, + + .tick_suspend = ds_tick_suspend, + .tick_resume = ds_tick_resume, + + .do_schedule = ds_schedule, + + .sleep = ds_vcpu_sleep, + .wake = ds_vcpu_wake, + + .adjust = ds_dom_cntl, + + .dump_cpu_state = ds_dump_pcpu, + .dump_settings = ds_dump, +}; diff -ubrN xen/xen-4.0.1/xen/common/sched_ds_emsoft11.c xen-4.0.1/xen/common/sched_ds_emsoft11.c --- xen/xen-4.0.1/xen/common/sched_ds_emsoft11.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_ds_emsoft11.c 2011-04-10 11:56:00.000000000 -0600 @@ -0,0 +1,1136 @@ +/****************************************************************************** + * Periodic / Polling / Deferrable Server scheduler for xen + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DS_DOM(_dom) ((struct ds_dom *) (_dom)->sched_priv) +#define DS_PCPU(_c) ((struct ds_pcpu *)per_cpu(schedule_data, _c).sched_priv) +#define DS_VCPU(_vcpu) ((struct ds_vcpu *) (_vcpu)->sched_priv) +#define RUNQ(_cpu) (&(DS_PCPU(_cpu)->runq)) +#define RDYQ(_cpu) (&(DS_PCPU(_cpu)->rdyq)) +#define DS_CUR(_cpu) DS_VCPU(per_cpu(schedule_data, _cpu).curr) +#define BUDGET(_b) (MILLISECS(10*_b)) // time to run for 1 budget, default setting is 1ms = 1 budget + +#define REPQ_CAPACITY 500 // repQ is used for the replenishment + +#define DS_DOM_0_PERIOD 100 +#define DS_IDLE_PERIOD 200 + +#define DS_DOM_BUDGET 25 // default budget, can bu changed via xm sched-ss -d target -b budget -p period +#define DS_DOM_PERIOD 50 + +//PES is the Simple PES, CS is the standard PES +enum server_type {DS, POS, PES, CS, SS}; + +//physical cpu +struct ds_pcpu { + struct list_head runq; // runQ on the pcpu, organized by linked list + struct list_head rdyq; + struct rep_elem *repq; //repQ on the pcpu, organized by heap + int rep_size; // current size, for later dynamic reqQ use. currently set equals to capacity + int rep_capacity; // upper limit + struct timer ticker; // for preemptive use, tick every budget +}; + +//virtual cpu +struct ds_vcpu { + struct list_head runq_elem; + struct list_head rdyq_elem; + struct list_head active_elem; //used to link all active vcpu except domain 0 and idle one! + struct ds_dom *ddom; + struct vcpu *vcpu; + + uint16_t repq_pending; // used to calculate how many items are on repq + + uint16_t budget; + uint16_t period; + uint16_t level; + + uint16_t cur_budget; + s_time_t last_start_time; // used for burn_budget + s_time_t next_time; //the next repl time + + int flag; // represent whether the vCPU has started or not + + uint16_t burn_total; // used only for Sporadic Server +}; + +//used for replenishment +struct rep_elem { + s_time_t re_time; + int16_t re_amount; + struct ds_vcpu *dvc; +}; + +//domain +struct ds_dom { + struct domain *dom; + uint16_t budget; + uint16_t period; + uint16_t level; +}; + +//global variable, records the number of cpus +struct ds_private { + spinlock_t lock; // used for init + uint32_t ncpus; //number of physical cpus + enum server_type type; // used for different type of servers + struct list_head active; //active_vcpu except domain 0 and idle vcpu! +}; +static struct ds_private ds_priv; + +static void ds_tick(void *_cpu); + +//dump the repq +static void +ds_dump_repq(int cpu) { + int loop = 0; + struct ds_pcpu *ppc = DS_PCPU(cpu); + + printk("repq: size: %d\n", ppc->rep_size); + for (loop = 0; loop < ppc->rep_size; loop++) { + printk("\t[%d, %d]: %d @ %lu\n", + ppc->repq[loop].dvc->vcpu->domain->domain_id, + ppc->repq[loop].dvc->vcpu->vcpu_id, + ppc->repq[loop].re_amount, + ppc->repq[loop].re_time); + } +} + +//dump the virtual cpu +static void +ds_dump_vcpu(struct ds_vcpu *dvc) { + printk("\t[%i, %i], cur: %i, rep: %d, last: %lu, next: %lu, \n", dvc->vcpu->domain->domain_id, dvc->vcpu->vcpu_id, dvc->cur_budget, dvc->repq_pending, dvc->last_start_time, dvc->next_time); +} + +//inlined code +static inline struct ds_vcpu * +__runq_elem(struct list_head *elem) { + return list_entry(elem, struct ds_vcpu, runq_elem); +} + +//inlined code +static inline struct ds_vcpu * +__rdyq_elem(struct list_head *elem) { + return list_entry(elem, struct ds_vcpu, rdyq_elem); +} + +//dump the physical cpu +static void +ds_dump_pcpu(int cpu) { + struct list_head *iter; + struct ds_pcpu *ppc = DS_PCPU(cpu); + struct list_head *runq = &ppc->runq; + struct list_head *rdyq = &ppc->rdyq; + struct ds_vcpu *dvc = DS_CUR(cpu); + int loop = 0; + + printk("### cpu: %d, now is: %lu\n", cpu, NOW()); + + if (dvc) { + printk("\trun: "); + ds_dump_vcpu(dvc); + } + + printk("runq:\n"); + list_for_each(iter, runq) { + dvc = __runq_elem(iter); + if (dvc) { + printk("\t%3d: ", ++loop); + ds_dump_vcpu(dvc); + } + } + + printk("rdyq:\n"); + list_for_each(iter, rdyq) { + dvc = __rdyq_elem(iter); + if (dvc) { + printk("\t%3d: ", ++loop); + ds_dump_vcpu(dvc); + } + } + + ds_dump_repq(cpu); + printk("\n"); +} + +// the current vcpu is on runQ? +static inline int +__vcpu_on_runq(struct ds_vcpu *dvc) { + return !list_empty(&dvc->runq_elem); +} + +// the current vcpu is on runQ? +static inline int +__vcpu_on_rdyq(struct ds_vcpu *dvc) { + return !list_empty(&dvc->rdyq_elem); +} + +//pick the first vcpu whose budget is >0 from the runq +static inline struct ds_vcpu * +__runq_pick(unsigned int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + list_for_each(iter, runq) { + struct ds_vcpu * iter_dvc = __runq_elem(iter); + if (iter_dvc->cur_budget > 0) { + return iter_dvc; + } + } + + BUG_ON(1); + return NULL; +} + +//pick the first one with budget > 0, regardless of runnable or not +static inline struct ds_vcpu * +__rdyq_pick(unsigned int cpu) { + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + list_for_each(iter, rdyq) { + struct ds_vcpu *iter_dvc = __rdyq_elem(iter); + if (iter_dvc->cur_budget > 0) { + return iter_dvc; + } + } + + return NULL; +} + +static inline struct ds_vcpu * +__runq_pick_idle(unsigned int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + list_for_each(iter, runq) { + struct ds_vcpu * iter_dvc = __runq_elem(iter); + if (is_idle_vcpu(iter_dvc->vcpu)) { + return iter_dvc; + } + } + + BUG_ON(1); + return NULL; +} + +//insert into the runq, followed a FIFO way. sorted by level +static inline void +__runq_insert(unsigned int cpu, struct ds_vcpu *dvc) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + BUG_ON(__vcpu_on_runq(dvc)); + BUG_ON(cpu != dvc->vcpu->processor); + + list_for_each(iter, runq) { + struct ds_vcpu * iter_dvc = __runq_elem(iter); + if (dvc->level < iter_dvc->level) { + break; + } + } + + list_add_tail(&dvc->runq_elem, iter); +} + +//insert into the runq, followed a FIFO way. sorted by level +static inline void +__rdyq_insert(unsigned int cpu, struct ds_vcpu *dvc) { + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + BUG_ON(__vcpu_on_rdyq(dvc)); + BUG_ON(cpu != dvc->vcpu->processor); + + list_for_each(iter, rdyq) { + struct ds_vcpu * iter_dvc = __rdyq_elem(iter); + if (dvc->level <= iter_dvc->level) { + break; + } + } + + list_add_tail(&dvc->rdyq_elem, iter); +} + +//remove it from runQ +static inline void +__runq_remove(struct ds_vcpu *dvc) { + BUG_ON(!__vcpu_on_runq(dvc)); + list_del_init(&dvc->runq_elem); +} + +//remove it from runQ +static inline void +__rdyq_remove(struct ds_vcpu *dvc) { + BUG_ON(!__vcpu_on_rdyq(dvc)); + list_del_init(&dvc->rdyq_elem); +} + +//used for the heap, repQ +static inline int +ds_rep_parent(int childIdx) { + return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1); +} + +//insert into the repQ +static inline void +ds_repq_insert(unsigned int cpu, struct ds_vcpu *dvc, int amount) { + struct ds_pcpu * ppc = DS_PCPU(cpu); + int childIdx, parentIdx; + + if (dvc->next_time == 0) { + printk("\n# in %s, ERROR! dvc is:", __func__); + ds_dump_vcpu(dvc); + ds_dump_pcpu(cpu); + dvc->next_time = NOW() + BUDGET(1) * dvc->period; + } + + if (amount == 0) { + return; + } + + if (ppc->rep_size == ppc->rep_capacity) { + printk("\n# into %s, repQ full!!\n", __func__); + BUG_ON(1); + } + + childIdx = ppc->rep_size; + parentIdx = ds_rep_parent(childIdx); + + while (childIdx > 0 && dvc->next_time < ppc->repq[parentIdx].re_time) { + ppc->repq[childIdx] = ppc->repq[parentIdx]; + childIdx = parentIdx; + parentIdx = ds_rep_parent(childIdx); + } + + ppc->repq[childIdx].re_time = dvc->next_time; + ppc->repq[childIdx].dvc = dvc; + ppc->repq[childIdx].re_amount = amount; + ppc->rep_size++; + + // dvc->next_time = 0; + dvc->repq_pending++; +} + +//remove from the repQ +static inline void +ds_repq_remove(unsigned int cpu) { + struct ds_pcpu * ppc = DS_PCPU(cpu); + int childIdx = 1; + int rightChildIdx; + int rootIdx = 0; + struct rep_elem temp; + + BUG_ON(ppc->rep_size <= 0); + + ppc->repq[0].dvc->repq_pending--; + ppc->repq[0] = ppc->repq[ppc->rep_size - 1]; + ppc->rep_size--; + + temp = ppc->repq[0]; + + while (childIdx < ppc->rep_size) { + rightChildIdx = childIdx + 1; + if (rightChildIdx < ppc->rep_size && ppc->repq[rightChildIdx].re_time < ppc->repq[childIdx].re_time) { + childIdx = rightChildIdx; + } + if (ppc->repq[childIdx].re_time < temp.re_time) { + ppc->repq[rootIdx] = ppc->repq[childIdx]; + rootIdx = childIdx; + childIdx = 2 * rootIdx + 1; + } else { + break; + } + } + ppc->repq[rootIdx] = temp; +} + +//dump dump function +static void +ds_dump(void) { + printk("# into %s. Did Nothing\n", __func__); +} + +//burn the scurr budget +//dom != 0 && !is_idle_vcpu(dvc) +static void +burn_budgets(struct ds_vcpu *dvc, s_time_t now) { + s_time_t delta; + unsigned int consume; + struct list_head * rdyq = RDYQ(dvc->vcpu->processor); + struct list_head * iter; + + BUG_ON(dvc != DS_CUR(dvc->vcpu->processor)); + + if (dvc->last_start_time == 0) { + dvc->last_start_time = now; + printk("\nset last_start_time to 0 in %s\n", __func__); + return; + } + + delta = now - dvc->last_start_time; + BUG_ON(delta <= 0); + + consume = ( delta/BUDGET(1) ); + if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++; + + dvc->cur_budget -= consume; + if (dvc->cur_budget < 0) dvc->cur_budget = 0; + + if(ds_priv.type == SS) { + dvc->burn_total += consume; + } + + // printk("\n\t%d @ burn\n", consume); +//used for simple PES, to burn all the VCPU's budget who has higher priority + if (ds_priv.type == PES) { + list_for_each(iter, rdyq) { + struct ds_vcpu * iter_dvc = __rdyq_elem(iter); + //rdyQ has higher priority + if (dvc->level > iter_dvc->level && iter_dvc->cur_budget > 0) { + iter_dvc->cur_budget -= consume; + if (iter_dvc->cur_budget < 0) { + iter_dvc->cur_budget = 0; + } + break; // the enhanced old periodic server + } + } + } +} + +//used for PES and CS, the dvc is the IDLE VCPU +//domain != 0, is_idle_vcpu(dvc) +static void +burn_extra(struct ds_vcpu *dvc, s_time_t now) { + s_time_t delta; + unsigned int consume; + struct list_head * rdyq = RDYQ(dvc->vcpu->processor); + struct list_head * iter; + + BUG_ON(dvc != DS_CUR(dvc->vcpu->processor)); + + if (dvc->last_start_time == 0) { + dvc->last_start_time = now; + return; + } + + if (ds_priv.type == DS || ds_priv.type == POS || ds_priv.type == SS) { + return; + } + + delta = now - dvc->last_start_time; + BUG_ON(delta <= 0); + + consume = ( delta/BUDGET(1) ); + if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++; + + if (ds_priv.type == PES) { + list_for_each(iter, rdyq) { + struct ds_vcpu * iter_dvc = __rdyq_elem(iter); + //rdyQ has higher priority + if (iter_dvc->cur_budget > 0) { + iter_dvc->cur_budget -= consume; + if (iter_dvc->cur_budget < 0) { + iter_dvc->cur_budget = 0; + } + break; // the enhanced old periodic server + } + } + } else { // now for the CS, need to consume the budget of the first element on rdyq + list_for_each(iter, rdyq) { + struct ds_vcpu * iter_dvc = __rdyq_elem(iter); + //rdyQ has higher priority + if (iter_dvc->cur_budget > 0) { + iter_dvc->cur_budget -= consume; + if (iter_dvc->cur_budget < 0) { + iter_dvc->cur_budget = 0; + } + break; + } + }// if no one runs, also check the rdyQ, and then returns + } +} + +//init the physical cpu +static int +ds_pcpu_init(int cpu) { + struct ds_pcpu *ppc; + unsigned long flags; + + /* Allocate per-PCPU info */ + ppc = xmalloc(struct ds_pcpu); + if (ppc == NULL) + return -1; + memset(ppc, 0, sizeof (*ppc)); + + spin_lock_irqsave(&ds_priv.lock, flags); + + if (ds_priv.ncpus < cpu) + ds_priv.ncpus = cpu + 1; + + init_timer(&ppc->ticker, ds_tick, (void *) (unsigned long) cpu, cpu); + INIT_LIST_HEAD(&ppc->runq); + INIT_LIST_HEAD(&ppc->rdyq); + per_cpu(schedule_data, cpu).sched_priv = ppc; + + BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr)); + + ppc->rep_capacity = REPQ_CAPACITY; + ppc->repq = xmalloc_array(struct rep_elem, ppc->rep_capacity); + BUG_ON(ppc->repq == NULL); + ppc->rep_size = 0; + + spin_unlock_irqrestore(&ds_priv.lock, flags); + + printk("\n# finish %s, init cpu: %d\n", __func__, cpu); + + return 0; +} + +//check the vcpu +static inline void +__ds_vcpu_check(struct vcpu *vc) { + struct ds_vcpu * const dvc = DS_VCPU(vc); + struct ds_dom * const ddom = dvc->ddom; + + BUG_ON(dvc->vcpu != vc); + BUG_ON(ddom != DS_DOM(vc->domain)); + if (ddom) { + BUG_ON(is_idle_vcpu(vc)); + BUG_ON(ddom->dom != vc->domain); + } else { + BUG_ON(!is_idle_vcpu(vc)); + } +} +#define DS_VCPU_CHECK(_vc) (__ds_vcpu_check(_vc)) + +//pick a cpu to run, used to migrate from different cpus +static int +ds_cpu_pick(struct vcpu *vc) { + cpumask_t cpus; + int cpu; + + cpus_and(cpus, cpu_online_map, vc->cpu_affinity); + + if (vc->domain->domain_id == 0 && vc->processor != 0) { + return cycle_cpu(vc->processor, cpus); + } + + cpu = cpu_isset(vc->processor, cpus) + ? vc->processor + : cycle_cpu(vc->processor, cpus); + + return cpu; +} + +//for PES or CS, when the +//check the current repQ to see if a repl needs to happen +static int +check_cpu_for_repl(int cpu) { + struct ds_pcpu * ppc = DS_PCPU(cpu); + int flag = 0; //used for interrupt + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + while((ppc->rep_size != 0) && ppc->repq[0].re_time < NOW()) { + ppc->repq[0].dvc->cur_budget += ppc->repq[0].re_amount; + if (ppc->repq[0].dvc->cur_budget > ppc->repq[0].dvc->budget) { + ppc->repq[0].dvc->cur_budget = ppc->repq[0].dvc->budget; + } + + if (ds_priv.type != SS) { // insert next repl + ppc->repq[0].dvc->next_time = NOW() + BUDGET(1) * ppc->repq[0].dvc->period; + ds_repq_insert(ppc->repq[0].dvc->vcpu->processor, ppc->repq[0].dvc, ppc->repq[0].dvc->budget); + } + + if (ds_priv.type != CS) { + if (ppc->repq[0].dvc->level < DS_CUR(cpu)->level) { + flag = 1; + } // raise interrupt + } else { // for the CS type + if (!is_idle_vcpu(current)) { + if (ppc->repq[0].dvc->level < DS_CUR(cpu)->level) { + flag = 1; + } + } else { // the idle VCPU + list_for_each(iter, rdyq) { + struct ds_vcpu * iter_dvc = __rdyq_elem(iter); + //rdyQ has higher priority + if (iter_dvc->cur_budget > 0) { + if (ppc->repq[0].dvc->level < iter_dvc->level) { + flag = 1; // higher priority + } + break; + } + } + } + } + + ds_repq_remove(cpu); + } + + return flag; +} + +//init the virtual cpu +static int +ds_vcpu_init(struct vcpu *vc) { + struct domain * const dom = vc->domain; + struct ds_dom *ddom = DS_DOM(dom); + struct ds_vcpu *dvc; + + /* Allocate per-VCPU info */ + dvc = xmalloc(struct ds_vcpu); + if (dvc == NULL) { + return -1; + } + memset(dvc, 0, sizeof (*dvc)); + + INIT_LIST_HEAD(&dvc->runq_elem); + INIT_LIST_HEAD(&dvc->rdyq_elem); + INIT_LIST_HEAD(&dvc->active_elem); // init for active list + dvc->ddom = ddom; + dvc->vcpu = vc; + dvc->budget = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->budget; + dvc->period = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->period; + dvc->level = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->level; + dvc->cur_budget = dvc->budget; + dvc->repq_pending = 0; + + dvc->last_start_time = 0; + dvc->flag = 0; + + dvc->burn_total = 0; + dvc->next_time = 0; + + vc->sched_priv = dvc; + + /* Allocate per-PCPU info */ + if (unlikely(!DS_PCPU(vc->processor))) { + if (ds_pcpu_init(vc->processor) != 0) + return -1; + } + + DS_VCPU_CHECK(vc); + + printk("\n# into %s, vcpu init: ", __func__); + ds_dump_vcpu(dvc); + + return 0; +} + +//destory the vcpu +static void +ds_vcpu_destroy(struct vcpu *vc) { + struct ds_vcpu * const dvc = DS_VCPU(vc); + struct ds_dom * const ddom = dvc->ddom; + + printk("\n# into %s, vcpu destroy: ", __func__); + ds_dump_vcpu(dvc); + + BUG_ON(ddom == NULL); + BUG_ON(!list_empty(&dvc->runq_elem)); + list_del_init(&dvc->active_elem); + + xfree(dvc); +} + +//sleep the vcpu +static void +ds_vcpu_sleep(struct vcpu *vc) { + struct ds_vcpu * const dvc = DS_VCPU(vc); + + BUG_ON(is_idle_vcpu(vc)); + + if (per_cpu(schedule_data, vc->processor).curr == vc) { + cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); + } else if (__vcpu_on_runq(dvc)) { + //polling server + if (ds_priv.type == POS) { + dvc->cur_budget = 0; + } + __runq_remove(dvc); + } else if (__vcpu_on_rdyq(dvc)) { + __rdyq_remove(dvc); + } +} + +//wake up the vcpu, insert it into runq, raise a softirq +static void +ds_vcpu_wake(struct vcpu *vc) { + struct ds_vcpu * const dvc = DS_VCPU(vc); + const unsigned int cpu = vc->processor; + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + BUG_ON(is_idle_vcpu(vc)); + + // if (vc->domain->domain_id != 0) { + // printk("wake vcpu: now %lu ", NOW()); + // ds_dump_vcpu(dvc); + // } + + if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) { + if (vc->domain->domain_id != 0) { + printk("\nrunning\n"); + } + return; + } + if (unlikely(__vcpu_on_runq(dvc))) { + if (vc->domain->domain_id != 0) { + printk("\nrunq\n"); + } + return; + } + + if (__vcpu_on_rdyq(dvc)) { + __rdyq_remove(dvc); + } + + if (!__vcpu_on_runq(dvc)) { + __runq_insert(cpu, dvc); + } + + if (ds_priv.type != CS) { + if (dvc->level < DS_CUR(cpu)->level) { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + return; + } + } else { + if (!is_idle_vcpu(current)) { + if (dvc->level < DS_CUR(cpu)->level) { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + return; + } + } else { + list_for_each(iter, rdyq) { + struct ds_vcpu * iter_dvc = __rdyq_elem(iter); + //rdyQ has higher priority + if (iter_dvc->cur_budget > 0) { + if (dvc->level < iter_dvc->level) { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + return; + } + break; + } + } + } + } + + return; +} + +static inline void +ds_reset(int cpu) { + struct ds_pcpu * ppc = DS_PCPU(cpu); + struct list_head * iter; + + printk("\nbefore reset\n"); + ds_dump_pcpu(cpu); + + // empty the RepQ + while(ppc->rep_size != 0) { + ds_repq_remove(cpu); + } + + printk("\nvcpu on list is:\n"); + //init each vcpu; + list_for_each(iter, &ds_priv.active) { + struct ds_vcpu * iter_dvc = list_entry(iter, struct ds_vcpu, active_elem); + if (__vcpu_on_runq(iter_dvc)) { + __runq_remove(iter_dvc); + } + if (__vcpu_on_rdyq(iter_dvc)) { + __rdyq_remove(iter_dvc); + } + iter_dvc->cur_budget = iter_dvc->budget; + iter_dvc->last_start_time = NOW(); + iter_dvc->next_time = NOW() + BUDGET(1) * iter_dvc->period; + iter_dvc->burn_total = 0; + ds_dump_vcpu(iter_dvc); + } + + printk("\nafter reset\n"); + ds_dump_pcpu(cpu); + + //insert into Queues + list_for_each(iter, &ds_priv.active) { + struct ds_vcpu * iter_dvc = list_entry(iter, struct ds_vcpu, active_elem); + if (vcpu_runnable(iter_dvc->vcpu)) { + if (!__vcpu_on_runq(iter_dvc)) { + __runq_insert(cpu, iter_dvc); + } + } else { + if (!__vcpu_on_rdyq(iter_dvc)) { + __rdyq_insert(cpu, iter_dvc); + } + } + if (ds_priv.type != SS) { + ds_repq_insert(iter_dvc->vcpu->processor, iter_dvc, iter_dvc->budget); + } + } + + printk("\nafter insert\n"); + ds_dump_pcpu(cpu); +} + +//adjust the domain's budget & period, also used to trigger the record +static int +ds_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) { + struct ds_dom * const ddom = DS_DOM(d); + unsigned long flags; + struct ds_vcpu *dvc = DS_VCPU(d->vcpu[0]); + int flag = 0; + + if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) { + op->u.ds.budget = ddom->budget; + op->u.ds.period = ddom->period; + op->u.ds.level = ddom->level; + //ds_dump_vcpu(dvc); + } else { + BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo); + + spin_lock_irqsave(&ds_priv.lock, flags); + if (op->u.ds.budget != 0) { + ddom->budget = op->u.ds.budget; + dvc->budget = op->u.ds.budget; + } + if (op->u.ds.period != 0) { + ddom->period = op->u.ds.period; + dvc->period = op->u.ds.period; + } + if (op->u.ds.level != 0) { + ddom->level = op->u.ds.level; + dvc->level = op->u.ds.level; + } + dvc->cur_budget = dvc->budget; + spin_unlock_irqrestore(&ds_priv.lock, flags); + + if (dvc->vcpu->domain->domain_id == 0) { + switch (op->u.ds.budget) { + case 100: + ds_priv.type = DS; + flag = 1; + break; + case 200: + ds_priv.type = CS; + flag = 1; + break; + case 300: + ds_priv.type = PES; + flag = 1; + break; + case 400: + ds_priv.type = POS; + flag = 1; + break; + case 500: + ds_priv.type = SS; + flag = 1; + break; + case 600: + return 1; // return to record the overhead! + break; + case 700: + ds_dump_pcpu(1); + break; + default: + printk("set budget of Domain-0 to : 100 (DS), 200 (CS), 300 (PES), 400 (POS), 500 (SS), 600 (record overhead), 700 (dump PCPU)\n"); + break; + } + printk("Currently running with Scheduler "); + switch (ds_priv.type) { + case CS: + printk("CS\n"); + break; + case POS: + printk("POS\n"); + break; + case PES: + printk("PES\n"); + break; + case DS: + printk("DS\n"); + break; + case SS: + printk("SS\n"); + break; + default: + printk("Wrong!!!\n"); + break; + } + if (flag == 1) { + ds_reset(1); + cpu_raise_softirq(1, SCHEDULE_SOFTIRQ); + flag = 0; + } + } + } + + return 0; +} + +//init a dom +static int +ds_dom_init(struct domain *dom) { + struct ds_dom *ddom; + + printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id); + + if (is_idle_domain(dom)) { + printk("\t# init an idle domain\n"); + return 0; + } + + ddom = xmalloc(struct ds_dom); + if (ddom == NULL) + return -ENOMEM; + memset(ddom, 0, sizeof (*ddom)); + + /* Initialize budget and period */ + ddom->dom = dom; + + switch(dom->domain_id) { + case 32767: + ddom->budget = DS_IDLE_PERIOD; + ddom->period = DS_IDLE_PERIOD; + ddom->level = 100; + break; + case 0: + ddom->budget = DS_DOM_0_PERIOD; + ddom->period = DS_DOM_0_PERIOD; + ddom->level = 1; + break; + default: + ddom->budget = DS_DOM_BUDGET; + ddom->period = DS_DOM_PERIOD; + ddom->level = 10; + break; + } + + dom->sched_priv = ddom; + + return 0; +} + +//destory a domain +static void +ds_dom_destroy(struct domain *dom) { + printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id); + xfree(DS_DOM(dom)); +} + +//ticked by pcpu tick in pcpu. +static void +ds_tick(void *_cpu) { + unsigned int cpu = (unsigned long) _cpu; + struct ds_pcpu *ppc = DS_PCPU(cpu); + + BUG_ON(current->processor != cpu); + + if (check_cpu_for_repl(cpu)) { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + + if (ds_cpu_pick(current) != cpu) { + set_bit(_VPF_migrating, ¤t->pause_flags); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + + set_timer(&ppc->ticker, NOW() + BUDGET(1)); +} + +// most important function, called every budget time +static struct task_slice +ds_schedule(s_time_t now) { + const int cpu = smp_processor_id(); + struct list_head *runq = RUNQ(cpu); + // struct list_head *rdyq = RDYQ(cpu); + struct ds_vcpu *scurr = DS_VCPU(current); + struct ds_vcpu *snext; + struct task_slice ret; + + DS_VCPU_CHECK(current); + +// need to consider idle_vcpu for CS and PES + if (scurr->vcpu->domain->domain_id != 0) { + if (!is_idle_vcpu(scurr->vcpu)) { + // for the first time the VCPU is executed + if (scurr->flag == 0) { + scurr->flag = 1; + BUG_ON(!list_empty(&scurr->active_elem)); + list_add(&scurr->active_elem, &ds_priv.active); + scurr->next_time = now + BUDGET(1) * scurr->period; + ds_repq_insert(scurr->vcpu->processor, scurr, scurr->budget); + } + if (cpu == 1) { + burn_budgets(scurr, now); + } + } else if (cpu == 1) { // scurr is the IDLE VCPU, have to deal with specially in CS and PES + burn_extra(scurr, now); + } + } + + if (vcpu_runnable(current)) { + if (!__vcpu_on_runq(scurr)) { + __runq_insert(cpu, scurr); + } + } else { + BUG_ON(is_idle_vcpu(current) || list_empty(runq)); + //for POS server + if (cpu == 1 && scurr->vcpu->domain->domain_id != 0 && ds_priv.type == POS) { + scurr->cur_budget = 0; + } + if (!__vcpu_on_rdyq(scurr)) { + __rdyq_insert(cpu, scurr); + } + } + + if (cpu != 1) { + snext = __runq_pick(cpu); + } else if (ds_priv.type != CS) { + snext = __runq_pick(cpu); + } else { // now runs CS scheduler + // printk("\n\trdy empty? %d, run: %d, rdy: %d\n", list_empty(rdyq), __runq_pick(cpu)->level, __rdyq_pick(cpu)->level); + if ( __rdyq_pick(cpu) == NULL || (__runq_pick(cpu)->level < __rdyq_pick(cpu)->level) ) { + snext = __runq_pick(cpu); //we are fine here + // printk("\npicked %d\n", snext->vcpu->domain->domain_id); + } else { + // if ( __rdyq_pick(cpu) == NULL ) { + // printk("\n\t\trdyq is null\n"); + // } else { + // printk("\n\t\trun: %d, rdy: %d\n", __runq_pick(cpu)->level, __rdyq_pick(cpu)->level); + // } + snext = __runq_pick_idle(cpu); // pick the IDLE VCPU for the VCPU on the RdyQ + // ds_dump_pcpu(1); + } + } + + if (cpu == 1 && snext->vcpu->domain->domain_id != 0) { + snext->last_start_time = NOW(); + } + + BUG_ON(!__vcpu_on_runq(snext)); + __runq_remove(snext); + + //context switch happens + if (cpu == 1 && snext != scurr) { + if (ds_priv.type == SS) { + if (!is_idle_vcpu(snext->vcpu)) { + snext->next_time = now + BUDGET(1) * snext->period; + } + if (!is_idle_vcpu(scurr->vcpu)) { + ds_repq_insert(cpu, scurr, scurr->burn_total); + scurr->burn_total = 0; + } + } + } + + // ret.time = is_idle_vcpu(snext->vcpu) ? BUDGET(1) : BUDGET(1) * snext->cur_budget; + ret.time = BUDGET(1); // used to test the enhanced old periodic server + ret.task = snext->vcpu; + + DS_VCPU_CHECK(ret.task); + + BUG_ON(!vcpu_runnable(snext->vcpu)); + + return ret; +} + +//init the global data +static void +ds_init(void) { + printk("\n# into %s\n", __func__); + spin_lock_init(&ds_priv.lock); + ds_priv.ncpus = 0; + ds_priv.type = DS; + INIT_LIST_HEAD(&ds_priv.active); +} + +/* Tickers cannot be kicked until SMP subsystem is alive. */ +static __init int +ds_start_tickers(void) { + struct ds_pcpu *ppc; + unsigned int cpu; + + printk("\n# into %s, start all tickers right now\n", __func__); + + if (ds_priv.ncpus == 0) + return 0; + + for_each_online_cpu(cpu) { + ppc = DS_PCPU(cpu); + set_timer(&ppc->ticker, NOW() + BUDGET(1)); + } + + return 0; +} +__initcall(ds_start_tickers); + +static void ds_tick_suspend(void) { + struct ds_pcpu *ppc; + + printk("\n# into %s, why is this called?\n", __func__); + + ppc = DS_PCPU(smp_processor_id()); + + stop_timer(&ppc->ticker); +} + +static void ds_tick_resume(void) { + struct ds_pcpu *ppc; + uint64_t now = NOW(); + + printk("\n# into %s, why is this called?\n", __func__); + + ppc = DS_PCPU(smp_processor_id()); + + set_timer(&ppc->ticker, now + BUDGET(1)); +} + +const struct scheduler sched_ds_def = { + .name = "Deferrable Server Scheduler", + .opt_name = "ds", + .sched_id = XEN_SCHEDULER_DS, + + .init_domain = ds_dom_init, + .destroy_domain = ds_dom_destroy, + + .init_vcpu = ds_vcpu_init, + .destroy_vcpu = ds_vcpu_destroy, + + .init = ds_init, + + .pick_cpu = ds_cpu_pick, + + .tick_suspend = ds_tick_suspend, + .tick_resume = ds_tick_resume, + + .do_schedule = ds_schedule, + + .sleep = ds_vcpu_sleep, + .wake = ds_vcpu_wake, + + .adjust = ds_dom_cntl, + + .dump_cpu_state = ds_dump_pcpu, + .dump_settings = ds_dump, +}; diff -ubrN xen/xen-4.0.1/xen/common/sched_rt.c xen-4.0.1/xen/common/sched_rt.c --- xen/xen-4.0.1/xen/common/sched_rt.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_rt.c 2011-05-01 00:42:25.000000000 -0600 @@ -0,0 +1,584 @@ +/****************************************************************************** + * Real Time Xen scheduler Framework + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Jaewoo Lee (C) 2010 University of Pennsylvania + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sched_rt.h" + +extern const struct rt_scheduler sched_deferrable_def; +extern const struct rt_scheduler sched_periodic_def; +extern const struct rt_scheduler sched_wcps_def; +extern const struct rt_scheduler sched_ssps_def; +//extern const struct rt_scheduler sched_polling_def; +//extern const struct rt_scheduler sched_sporadic_def; + +static struct rt_private rt_priv; + +static void rt_tick(void *_cpu); + + +//init the global data, picking schedulers! +static void +rt_init(void) { + printk("\n# into %s\n", __func__); + + spin_lock_init(&rt_priv.lock); + rt_priv.ncpus = 0; + rt_priv.type = DS; + INIT_LIST_HEAD(&rt_priv.active); +} + +//dump the physical cpu +static void +rt_dump_pcpu(int cpu) { + struct list_head *iter; + struct rt_pcpu *ppc = RT_PCPU(cpu); + struct list_head *runq = &ppc->runq; + struct list_head *rdyq = &ppc->rdyq; + struct rt_vcpu *dvc = RT_CUR(cpu); + int loop = 0; + + printk("### cpu: %d, now is: %lu\n", cpu, NOW()); + + if (dvc) { + printk("\trun: "); + rt_dump_vcpu(dvc); + } + + printk("runq:\n"); + list_for_each(iter, runq) { + dvc = __runq_elem(iter); + if (dvc) { + printk("\t%3d: ", ++loop); + rt_dump_vcpu(dvc); + } + } + + printk("rdyq:\n"); + list_for_each(iter, rdyq) { + dvc = __rdyq_elem(iter); + if (dvc) { + printk("\t%3d: ", ++loop); + rt_dump_vcpu(dvc); + } + } + + rt_dump_repq(cpu); + printk("\n"); +} + +//dump dump function +static void +rt_dump(void) { + rt_dump_pcpu(1); +} + +//init the physical cpu +static int +rt_pcpu_init(int cpu) { + struct rt_pcpu *ppc; + unsigned long flags; + + /* Allocate per-PCPU info */ + ppc = xmalloc(struct rt_pcpu); + if (ppc == NULL) + return -1; + memset(ppc, 0, sizeof (*ppc)); + + spin_lock_irqsave(&rt_priv.lock, flags); + + if (rt_priv.ncpus < cpu) + rt_priv.ncpus = cpu + 1; + + init_timer(&ppc->ticker, rt_tick, (void *) (unsigned long) cpu, cpu); + INIT_LIST_HEAD(&ppc->runq); + INIT_LIST_HEAD(&ppc->rdyq); + per_cpu(schedule_data, cpu).sched_priv = ppc; + + BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr)); + + ppc->rep_capacity = REPQ_CAPACITY; + ppc->repq = xmalloc_array(struct rep_elem, ppc->rep_capacity); + BUG_ON(ppc->repq == NULL); + ppc->rep_size = 0; + + spin_unlock_irqrestore(&rt_priv.lock, flags); + + printk("\n# finish %s, init cpu: %d\n", __func__, cpu); + + return 0; +} + +//pick a cpu to run, used to migrate from different cpus +static int +rt_cpu_pick(struct vcpu *vc) { + cpumask_t cpus; + int cpu; + + cpus_and(cpus, cpu_online_map, vc->cpu_affinity); + + if (vc->domain->domain_id == 0 && vc->processor != 0) { + return cycle_cpu(vc->processor, cpus); + } + + cpu = cpu_isset(vc->processor, cpus) + ? vc->processor + : cycle_cpu(vc->processor, cpus); + + return cpu; +} + +//init the virtual cpu +static int +rt_vcpu_init(struct vcpu *vc) { + struct domain * const dom = vc->domain; + struct rt_dom *ddom = RT_DOM(dom); + struct rt_vcpu *dvc; + + /* Allocate per-VCPU info */ + dvc = xmalloc(struct rt_vcpu); + if (dvc == NULL) { + return -1; + } + memset(dvc, 0, sizeof (*dvc)); + + INIT_LIST_HEAD(&dvc->runq_elem); + INIT_LIST_HEAD(&dvc->rdyq_elem); + INIT_LIST_HEAD(&dvc->active_elem); // init for active list + dvc->ddom = ddom; + dvc->vcpu = vc; + dvc->budget = is_idle_vcpu(vc)? RT_IDLE_PERIOD: ddom->budget; + dvc->period = is_idle_vcpu(vc)? RT_IDLE_PERIOD: ddom->period; + dvc->level = is_idle_vcpu(vc)? RT_IDLE_PERIOD: ddom->level; + dvc->cur_budget = dvc->budget; + dvc->repq_pending = 0; + + dvc->last_start_time = 0; + + dvc->burn_total = 0; + dvc->next_time = 0; + + vc->sched_priv = dvc; + + /* Allocate per-PCPU info */ + if (unlikely(!RT_PCPU(vc->processor))) { + if (rt_pcpu_init(vc->processor) != 0) + return -1; + } + + RT_VCPU_CHECK(vc); + + BUG_ON(!list_empty(&dvc->active_elem)); + list_add(&dvc->active_elem, &rt_priv.active); + dvc->next_time = NOW() + BUDGET(1) * dvc->period; + rt_repq_insert(dvc->vcpu->processor, dvc, dvc->budget); + + printk("\n# into %s, vcpu init: ", __func__); + rt_dump_vcpu(dvc); + + return 0; +} + +//destory the vcpu +static void +rt_vcpu_destroy(struct vcpu *vc) { + struct rt_vcpu * const dvc = RT_VCPU(vc); + struct rt_dom * const ddom = dvc->ddom; + + printk("\n# into %s, vcpu destroy: ", __func__); + rt_dump_vcpu(dvc); + + BUG_ON(ddom == NULL); + BUG_ON(!list_empty(&dvc->runq_elem)); + list_del_init(&dvc->active_elem); + + xfree(dvc); +} + +//init a dom +static int +rt_dom_init(struct domain *dom) { + struct rt_dom *ddom; + + printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id); + + if (is_idle_domain(dom)) { + printk("\t# init an idle domain\n"); + return 0; + } + + ddom = xmalloc(struct rt_dom); + if (ddom == NULL) + return -ENOMEM; + memset(ddom, 0, sizeof (*ddom)); + + /* Initialize budget and period */ + ddom->dom = dom; + + switch(dom->domain_id) { + case 32767: + ddom->budget = RT_IDLE_PERIOD; + ddom->period = RT_IDLE_PERIOD; + ddom->level = 100; + break; + case 0: + ddom->budget = RT_DOM_0_PERIOD; + ddom->period = RT_DOM_0_PERIOD; + ddom->level = 1; + break; + default: + ddom->budget = RT_DOM_BUDGET; + ddom->period = RT_DOM_PERIOD; + ddom->level = 10; + break; + } + + dom->sched_priv = ddom; + + return 0; +} + +//destory a domain +static void +rt_dom_destroy(struct domain *dom) { + printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id); + xfree(RT_DOM(dom)); +} + +/* Tickers cannot be kicked until SMP subsystem is alive. */ +static __init int +rt_start_tickers(void) { + struct rt_pcpu *ppc; + unsigned int cpu; + + printk("\n# into %s, start all tickers right now\n", __func__); + + if (rt_priv.ncpus == 0) + return 0; + + for_each_online_cpu(cpu) { + ppc = RT_PCPU(cpu); + set_timer(&ppc->ticker, NOW() + BUDGET(1)); + } + + return 0; +} +__initcall(rt_start_tickers); + +static void rt_tick_suspend(void) { + struct rt_pcpu *ppc; + + printk("\n# into %s, why is this called?\n", __func__); + + ppc = RT_PCPU(smp_processor_id()); + + stop_timer(&ppc->ticker); +} + +static void rt_tick_resume(void) { + struct rt_pcpu *ppc; + uint64_t now = NOW(); + + printk("\n# into %s, why is this called?\n", __func__); + + ppc = RT_PCPU(smp_processor_id()); + + set_timer(&ppc->ticker, now + BUDGET(1)); +} + + +/********************************************* + * Four Subscheduler Specific Functions +*********************************************/ + +//sleep the vcpu +static void +rt_vcpu_sleep(struct vcpu *vc) { + //SCHED_OP(vcpu_sleep, vc); + struct rt_vcpu * const dvc = RT_VCPU(vc); + + BUG_ON(is_idle_vcpu(vc)); + + if (per_cpu(schedule_data, vc->processor).curr == vc) { + cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); + } else if (__vcpu_on_runq(dvc)) { + __runq_remove(dvc); + } else if (__vcpu_on_rdyq(dvc)) { + __rdyq_remove(dvc); + } + + return; +} + +//wake up the vcpu, insert it into runq, raise a softirq +static void +rt_vcpu_wake(struct vcpu *vc) { + //SCHED_OP(vcpu_wake, vc); + struct rt_vcpu * const dvc = RT_VCPU(vc); + const unsigned int cpu = vc->processor; + + BUG_ON(is_idle_vcpu(vc)); + + if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) { + if (vc->domain->domain_id != 0) { + printk("\nwake running\n"); + } + return; + } + if (unlikely(__vcpu_on_runq(dvc))) { + if (vc->domain->domain_id != 0) { + printk("\nwake on runq\n"); + } + return; + } + + if (__vcpu_on_rdyq(dvc)) { + __rdyq_remove(dvc); + } + + if (!__vcpu_on_runq(dvc)) { + __runq_insert(cpu, dvc); + } + + if (dvc->level < RT_CUR(cpu)->level) { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + + return; +} + +//ticked by pcpu tick in pcpu. +static void +rt_tick(void *_cpu) { + unsigned int cpu = (unsigned long) _cpu; + switch (rt_priv.type) { + case DS: + sched_deferrable_def.tick(_cpu); + break; + case PPS: + sched_periodic_def.tick(_cpu); + break; + case WCPS: + sched_wcps_def.tick(_cpu); + break; + case SSPS: + sched_ssps_def.tick(_cpu); + break; + default: + printk("Wrong in %s\n", __func__); + sched_deferrable_def.tick(_cpu); + break; + } + + if (rt_cpu_pick(current) != cpu) { + set_bit(_VPF_migrating, ¤t->pause_flags); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } +} + +// most important function, called every budget time +static struct task_slice +rt_schedule(s_time_t now) { + switch (rt_priv.type) { + case DS: + return sched_deferrable_def.schedule(now); + break; + case PPS: + return sched_periodic_def.schedule(now); + break; + case WCPS: + return sched_wcps_def.schedule(now); + break; + case SSPS: + return sched_ssps_def.schedule(now); + break; + default: + printk("Wrong in %s\n", __func__); + return sched_deferrable_def.schedule(now); + break; + } +} + +/********************************************* + * Used to Adjust Domain parameters and switch schedulers +*********************************************/ +/* +// !!! Need to consider SS! for the repl queue!! have not done yet! +static inline void +rt_reset(int cpu) { + struct rt_pcpu * ppc = RT_PCPU(cpu); + struct list_head * iter; + + printk("\nBefore reset\n"); + rt_dump_pcpu(cpu); + + // empty the RepQ + while(ppc->rep_size != 0) { + rt_repq_remove(cpu); + } + + printk("\nvcpu on list is:\n"); + //init each vcpu; + list_for_each(iter, &rt_priv.active) { + struct rt_vcpu * iter_dvc = list_entry(iter, struct rt_vcpu, active_elem); + if (__vcpu_on_runq(iter_dvc)) { + __runq_remove(iter_dvc); + } + if (__vcpu_on_rdyq(iter_dvc)) { + __rdyq_remove(iter_dvc); + } + iter_dvc->cur_budget = iter_dvc->budget; + iter_dvc->last_start_time = NOW(); + iter_dvc->next_time = NOW() + BUDGET(1) * iter_dvc->period; + iter_dvc->burn_total = 0; + rt_dump_vcpu(iter_dvc); + } + + //insert into Queues + list_for_each(iter, &rt_priv.active) { + struct rt_vcpu * iter_dvc = list_entry(iter, struct rt_vcpu, active_elem); + if (vcpu_runnable(iter_dvc->vcpu)) { + if (!__vcpu_on_runq(iter_dvc)) { + __runq_insert(cpu, iter_dvc); + } + } else { + if (!__vcpu_on_rdyq(iter_dvc)) { + __rdyq_insert(cpu, iter_dvc); + } + } + } + + printk("\nAfter Reset\n"); + rt_dump_pcpu(cpu); +} +*/ +//adjust the domain's budget & period, also used to trigger the record +static int +rt_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) { + struct rt_dom * const ddom = RT_DOM(d); + unsigned long flags; + struct rt_vcpu *dvc = RT_VCPU(d->vcpu[0]); + + if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) { + op->u.rt.budget = ddom->budget; + op->u.rt.period = ddom->period; + op->u.rt.level = ddom->level; + } else { + BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo); + + spin_lock_irqsave(&rt_priv.lock, flags); + + if (op->u.rt.budget != 0) { + ddom->budget = op->u.rt.budget; + dvc->budget = op->u.rt.budget; + } + + if (op->u.rt.period != 0) { + ddom->period = op->u.rt.period; + dvc->period = op->u.rt.period; + } + + if (op->u.rt.level != 0) { + ddom->level = op->u.rt.level; + dvc->level = op->u.rt.level; + } + dvc->cur_budget = dvc->budget; // reset its budget + spin_unlock_irqrestore(&rt_priv.lock, flags); + + if (dvc->vcpu->domain->domain_id == 0) { + switch (op->u.rt.budget) { + case 100: + printk("############################\n100: dump info\n200: DS\n300: PPS\n400: WC-PS\n500: SS-PS\n\n"); + rt_dump_pcpu(1); + break; + case 200: + rt_priv.type = DS; //change to DS; + //rt_reset(1); + break; + case 300: + rt_priv.type = PPS; // to PPS + //rt_reset(1); + break; + case 400: + rt_priv.type = WCPS; // WCPS + //rt_reset(1); + break; + case 500: + rt_priv.type = SSPS; // SSPS + //rt_reset(1); + break; + default: + printk("############################\n100: dump info\n200: DS\n300: PPS\n400: WC-PS\n500: SS-PS\n\n"); + break; + } + printk("Current Scheduler: "); + switch (rt_priv.type) { + case DS: + printk("%s\n", sched_deferrable_def.name); + break; + case PPS: + printk("%s\n", sched_periodic_def.name); + break; + case WCPS: + printk("%s\n", sched_wcps_def.name); + break; + case SSPS: + printk("%s\n", sched_ssps_def.name); + break; + default: + printk("wrong, reset to DS\n"); + rt_priv.type = DS; + break; + } + + } + } + + return 0; +} + +const struct scheduler sched_rt_def = { + .name = "Real Time Scheduler", + .opt_name = "rt", + .sched_id = XEN_SCHEDULER_RT, + + .init_domain = rt_dom_init, + .destroy_domain = rt_dom_destroy, + + .init_vcpu = rt_vcpu_init, + .destroy_vcpu = rt_vcpu_destroy, + + .init = rt_init, + + .pick_cpu = rt_cpu_pick, + + .tick_suspend = rt_tick_suspend, + .tick_resume = rt_tick_resume, + + .do_schedule = rt_schedule, + + .sleep = rt_vcpu_sleep, + .wake = rt_vcpu_wake, + + .adjust = rt_dom_cntl, + + .dump_cpu_state = rt_dump_pcpu, + .dump_settings = rt_dump, +}; diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_deferrable.c xen-4.0.1/xen/common/sched_rt_deferrable.c --- xen/xen-4.0.1/xen/common/sched_rt_deferrable.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_rt_deferrable.c 2011-04-24 21:23:02.000000000 -0600 @@ -0,0 +1,79 @@ +/****************************************************************************** + * Real Time Xen scheduler Framework + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sched_rt.h" +#include "sched_rt_repq.h" + + +// most important function, called every budget time +static struct task_slice +deferrable_schedule(s_time_t now) { + const int cpu = smp_processor_id(); + struct list_head *runq = RUNQ(cpu); + struct rt_vcpu *scurr = RT_VCPU(current); + struct rt_vcpu *snext; + struct task_slice ret; + + RT_VCPU_CHECK(current); + + if ((scurr->vcpu->domain->domain_id != 0) && (!is_idle_vcpu(scurr->vcpu))) { + repq_burn(scurr, now); + } + + if (vcpu_runnable(current)) { + if (!__vcpu_on_runq(scurr)) { + __runq_insert(cpu, scurr); + } + } else { + BUG_ON(is_idle_vcpu(current) || list_empty(runq)); + if (!__vcpu_on_rdyq(scurr)) { + __rdyq_insert(cpu, scurr); + } + } + + snext = __runq_pick(cpu); + + if (snext->vcpu->domain->domain_id != 0) { + snext->last_start_time = NOW(); + } + + BUG_ON(!__vcpu_on_runq(snext)); + __runq_remove(snext); + + ret.time = is_idle_vcpu(snext->vcpu) ? BUDGET(1) : BUDGET(1) * snext->cur_budget; + ret.task = snext->vcpu; + + RT_VCPU_CHECK(ret.task); + + BUG_ON(!vcpu_runnable(snext->vcpu)); + + return ret; +} + +const struct rt_scheduler sched_deferrable_def = { + .name = "Deferrable Server Scheduler", + .opt_name = "ds", + + .tick = repq_tick, + .vcpu_wake = NULL, + .vcpu_sleep = NULL, + .schedule = deferrable_schedule +}; diff -ubrN xen/xen-4.0.1/xen/common/sched_rt.h xen-4.0.1/xen/common/sched_rt.h --- xen/xen-4.0.1/xen/common/sched_rt.h 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_rt.h 2011-04-24 22:23:43.000000000 -0600 @@ -0,0 +1,348 @@ +/****************************************************************************** + * Real Time Xen scheduler Headfile, including the common data structures + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Jaewoo Lee (C) 2010 University of Pennsylvania + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#define RT_DOM(_dom) ((struct rt_dom *) (_dom)->sched_priv) +#define RT_PCPU(_c) ((struct rt_pcpu *)per_cpu(schedule_data, _c).sched_priv) +#define RT_VCPU(_vcpu) ((struct rt_vcpu *) (_vcpu)->sched_priv) +#define RUNQ(_cpu) (&(RT_PCPU(_cpu)->runq)) +#define RDYQ(_cpu) (&(RT_PCPU(_cpu)->rdyq)) +#define RT_CUR(_cpu) RT_VCPU(per_cpu(schedule_data, _cpu).curr) +#define BUDGET(_b) (MILLISECS(1*_b)) // time to run for 1 budget, default setting is 1ms = 1 budget + +#define REPQ_CAPACITY 500 // repQ is used for the replenishment + +#define RT_DOM_0_PERIOD 100 +#define RT_IDLE_PERIOD 200 + +#define RT_DOM_BUDGET 25 // default budget, can bu changed via xm sched-ss -d target -b budget -p period +#define RT_DOM_PERIOD 50 + +enum server_type {DS, PPS, WCPS, SSPS}; + + +/********************************************* + * Data Structure +*********************************************/ + +//physical cpu +struct rt_pcpu { + struct list_head runq; // runQ on the pcpu, organized by linked list + struct list_head rdyq; + struct rep_elem *repq; //repQ on the pcpu, organized by heap + int rep_size; // current size, for later dynamic reqQ use. currently set equals to capacity + int rep_capacity; // upper limit + struct timer ticker; // for preemptive use, tick every budget +}; + +//virtual cpu +struct rt_vcpu { + struct list_head runq_elem; + struct list_head rdyq_elem; + struct list_head active_elem; //used to link all active vcpu except domain 0 and idle one! + struct rt_dom *ddom; + struct vcpu *vcpu; + + uint16_t repq_pending; // used to calculate how many items are on repq + + uint16_t budget; + uint16_t period; + uint16_t level; + + uint16_t cur_budget; + s_time_t last_start_time; // used for burn_budget + s_time_t next_time; //the next repl time + + uint16_t burn_total; // used only for Sporadic Server +}; + +//used for replenishment +struct rep_elem { + s_time_t re_time; + int16_t re_amount; + struct rt_vcpu *dvc; +}; + +//domain +struct rt_dom { + struct domain *dom; + uint16_t budget; + uint16_t period; + uint16_t level; +}; + +//global variable, records the number of cpus +struct rt_private { + spinlock_t lock; // used for init + uint32_t ncpus; //number of physical cpus + struct list_head active; //active_vcpu except domain 0 and idle vcpu! + enum server_type type; //used to represent scheduler +}; + +struct rt_scheduler { + char *name; + char *opt_name; + + void (*vcpu_sleep)(struct vcpu *vc); + void (*tick)(void *_cpu); + struct task_slice (*schedule)(s_time_t); + void (*vcpu_wake)(struct vcpu *vc); +}; + +/********************************************* + * Common Code +*********************************************/ + +//check the vcpu +static inline void +__rt_vcpu_check(struct vcpu *vc) { + struct rt_vcpu * const dvc = RT_VCPU(vc); + struct rt_dom * const ddom = dvc->ddom; + + BUG_ON(dvc->vcpu != vc); + BUG_ON(ddom != RT_DOM(vc->domain)); + if (ddom) { + BUG_ON(is_idle_vcpu(vc)); + BUG_ON(ddom->dom != vc->domain); + } else { + BUG_ON(!is_idle_vcpu(vc)); + } +} +#define RT_VCPU_CHECK(_vc) (__rt_vcpu_check(_vc)) + +//inlined code +static inline struct rt_vcpu * +__runq_elem(struct list_head *elem) { + return list_entry(elem, struct rt_vcpu, runq_elem); +} + +//inlined code +static inline struct rt_vcpu * +__rdyq_elem(struct list_head *elem) { + return list_entry(elem, struct rt_vcpu, rdyq_elem); +} + +// the current vcpu is on runQ? +static inline int +__vcpu_on_runq(struct rt_vcpu *dvc) { + return !list_empty(&dvc->runq_elem); +} + +// the current vcpu is on runQ? +static inline int +__vcpu_on_rdyq(struct rt_vcpu *dvc) { + return !list_empty(&dvc->rdyq_elem); +} + +/********************************************* + * Dump Settings Related +*********************************************/ + +//dump the repq +static inline void +rt_dump_repq(int cpu) { + int loop = 0; + struct rt_pcpu *ppc = RT_PCPU(cpu); + + printk("repq: size: %d\n", ppc->rep_size); + for (loop = 0; loop < ppc->rep_size; loop++) { + printk("\t[%d, %d]: %d @ %lu\n", + ppc->repq[loop].dvc->vcpu->domain->domain_id, + ppc->repq[loop].dvc->vcpu->vcpu_id, + ppc->repq[loop].re_amount, + ppc->repq[loop].re_time); + } +} + +//dump the virtual cpu +static inline void +rt_dump_vcpu(struct rt_vcpu *dvc) { + printk("\t[%i, %i], cur: %i, rep: %d, last: %lu, next: %lu, \n", dvc->vcpu->domain->domain_id, dvc->vcpu->vcpu_id, dvc->cur_budget, dvc->repq_pending, dvc->last_start_time, dvc->next_time); +} + +/********************************************* + * RunQ, RdyQ, and RepQ Related +*********************************************/ + +//pick the first vcpu whose budget is >0 from the runq +static inline struct rt_vcpu * +__runq_pick(unsigned int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + list_for_each(iter, runq) { + struct rt_vcpu * iter_dvc = __runq_elem(iter); + if (iter_dvc->cur_budget > 0) { + return iter_dvc; + } + } + + BUG_ON(1); + return NULL; +} + +//pick the first one with budget > 0, regardless of runnable or not +static inline struct rt_vcpu * +__rdyq_pick(unsigned int cpu) { + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + list_for_each(iter, rdyq) { + struct rt_vcpu *iter_dvc = __rdyq_elem(iter); + if (iter_dvc->cur_budget > 0) { + return iter_dvc; + } + } + + return NULL; +} + +//pick the IDLE VCPU from RunQ, for Periodic Server +static inline struct rt_vcpu * +__runq_pick_idle(unsigned int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + list_for_each(iter, runq) { + struct rt_vcpu * iter_dvc = __runq_elem(iter); + if (is_idle_vcpu(iter_dvc->vcpu)) { + return iter_dvc; + } + } + + BUG_ON(1); + return NULL; +} + +//insert into the runq, followed a FIFO way. sorted by level +static inline void +__runq_insert(unsigned int cpu, struct rt_vcpu *dvc) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + BUG_ON(__vcpu_on_runq(dvc)); + BUG_ON(cpu != dvc->vcpu->processor); + + list_for_each(iter, runq) { + struct rt_vcpu * iter_dvc = __runq_elem(iter); + if (dvc->level < iter_dvc->level) { + break; + } + } + + list_add_tail(&dvc->runq_elem, iter); +} + +//insert into the runq, followed a FIFO way. sorted by level +static inline void +__rdyq_insert(unsigned int cpu, struct rt_vcpu *dvc) { + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + BUG_ON(__vcpu_on_rdyq(dvc)); + BUG_ON(cpu != dvc->vcpu->processor); + + list_for_each(iter, rdyq) { + struct rt_vcpu * iter_dvc = __rdyq_elem(iter); + if (dvc->level <= iter_dvc->level) { + break; + } + } + + list_add_tail(&dvc->rdyq_elem, iter); +} + +//remove it from runQ +static inline void +__runq_remove(struct rt_vcpu *dvc) { + BUG_ON(!__vcpu_on_runq(dvc)); + list_del_init(&dvc->runq_elem); +} + +//remove it from runQ +static inline void +__rdyq_remove(struct rt_vcpu *dvc) { + BUG_ON(!__vcpu_on_rdyq(dvc)); + list_del_init(&dvc->rdyq_elem); +} + +//used for the heap, repQ +static inline int +rt_rep_parent(int childIdx) { + return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1); +} + +//insert into the repQ +static inline void +rt_repq_insert(unsigned int cpu, struct rt_vcpu *dvc, int amount) { + struct rt_pcpu * ppc = RT_PCPU(cpu); + int childIdx, parentIdx; + + if (dvc->next_time == 0) { + return; + } + + if (amount == 0) { + return; + } + + if (ppc->rep_size == ppc->rep_capacity) { + printk("\n# into %s, repQ full!!\n", __func__); + BUG_ON(1); + } + + childIdx = ppc->rep_size; + parentIdx = rt_rep_parent(childIdx); + + while (childIdx > 0 && dvc->next_time < ppc->repq[parentIdx].re_time) { + ppc->repq[childIdx] = ppc->repq[parentIdx]; + childIdx = parentIdx; + parentIdx = rt_rep_parent(childIdx); + } + + ppc->repq[childIdx].re_time = dvc->next_time; + ppc->repq[childIdx].dvc = dvc; + ppc->repq[childIdx].re_amount = amount; + ppc->rep_size++; + + // dvc->next_time = 0; + dvc->repq_pending++; +} + +//remove from the repQ +static inline void +rt_repq_remove(unsigned int cpu) { + struct rt_pcpu * ppc = RT_PCPU(cpu); + int childIdx = 1; + int rightChildIdx; + int rootIdx = 0; + struct rep_elem temp; + + BUG_ON(ppc->rep_size <= 0); + + ppc->repq[0].dvc->repq_pending--; + ppc->repq[0] = ppc->repq[ppc->rep_size - 1]; + ppc->rep_size--; + + temp = ppc->repq[0]; + + while (childIdx < ppc->rep_size) { + rightChildIdx = childIdx + 1; + if (rightChildIdx < ppc->rep_size && ppc->repq[rightChildIdx].re_time < ppc->repq[childIdx].re_time) { + childIdx = rightChildIdx; + } + if (ppc->repq[childIdx].re_time < temp.re_time) { + ppc->repq[rootIdx] = ppc->repq[childIdx]; + rootIdx = childIdx; + childIdx = 2 * rootIdx + 1; + } else { + break; + } + } + ppc->repq[rootIdx] = temp; +} + diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_periodic.c xen-4.0.1/xen/common/sched_rt_periodic.c --- xen/xen-4.0.1/xen/common/sched_rt_periodic.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_rt_periodic.c 2011-04-24 21:23:44.000000000 -0600 @@ -0,0 +1,125 @@ +/****************************************************************************** + * Real Time Xen scheduler Framework + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sched_rt.h" +#include "sched_rt_repq.h" + + + +//burn the extra budget on RdyQ +static void +repq_burn_rdyq(struct rt_vcpu *dvc, s_time_t now) { + s_time_t delta; + unsigned int consume; + struct list_head * rdyq = RDYQ(dvc->vcpu->processor); + struct list_head * iter; + + BUG_ON(dvc != RT_CUR(dvc->vcpu->processor)); + + if (dvc->last_start_time == 0) { + dvc->last_start_time = now; + return; + } + + delta = now - dvc->last_start_time; + BUG_ON(delta <= 0); + + consume = ( delta/BUDGET(1) ); + if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++; + + // burn budgets on RdyQ + list_for_each(iter, rdyq) { + struct rt_vcpu * iter_dvc = __rdyq_elem(iter); + if (iter_dvc->cur_budget > 0) { + iter_dvc->cur_budget -= consume; + if (iter_dvc->cur_budget < 0) { + iter_dvc->budget = 0; + } + return; // only burn one budget + } + } + + return; +} + + +// most important function, called every budget time +static struct task_slice +periodic_schedule(s_time_t now) { + const int cpu = smp_processor_id(); + struct list_head *runq = RUNQ(cpu); + struct rt_vcpu *scurr = RT_VCPU(current); + struct rt_vcpu *snext; + struct task_slice ret; + + RT_VCPU_CHECK(current); + + if ((scurr->vcpu->domain->domain_id != 0)) { + if (!is_idle_vcpu(scurr->vcpu)) { + repq_burn(scurr, now); + } else if (cpu == 1) { //scurr is the idle vcpu on cpu 1, need to deal with periodic server + repq_burn_rdyq(scurr, now); + } + } + + if (vcpu_runnable(current)) { + if (!__vcpu_on_runq(scurr)) { + __runq_insert(cpu, scurr); + } + } else { + BUG_ON(is_idle_vcpu(current) || list_empty(runq)); + if (!__vcpu_on_rdyq(scurr)) { + __rdyq_insert(cpu, scurr); + } + } + + if ( (__rdyq_pick(cpu) == NULL) || (__runq_pick(cpu)->level < __rdyq_pick(cpu)->level) ) { + snext = __runq_pick(cpu); + } else { + snext = __runq_pick_idle(cpu); // pick the IDLE to mimic the as if budget idled away behavior + } + + if (snext->vcpu->domain->domain_id != 0) { + snext->last_start_time = NOW(); + } + + BUG_ON(!__vcpu_on_runq(snext)); + __runq_remove(snext); + + ret.time = BUDGET(1); + ret.task = snext->vcpu; + + RT_VCPU_CHECK(ret.task); + + BUG_ON(!vcpu_runnable(snext->vcpu)); + + return ret; +} + +const struct rt_scheduler sched_periodic_def = { + .name = "Pure Periodic Server Scheduler", + .opt_name = "pps", + + .tick = repq_tick, + .vcpu_sleep = NULL, + .vcpu_wake = NULL, + .schedule = periodic_schedule +}; diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_repq.h xen-4.0.1/xen/common/sched_rt_repq.h --- xen/xen-4.0.1/xen/common/sched_rt_repq.h 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_rt_repq.h 2011-05-02 21:06:21.000000000 -0600 @@ -0,0 +1,86 @@ +/****************************************************************************** + * Real Time Xen scheduler Headfile, including the common data structures + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Jaewoo Lee (C) 2010 University of Pennsylvania + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + + +/********************************************* + * Shared by the subschedulers +*********************************************/ + +//check the current repQ to see if a repl needs to happen +//Even if the IDLE VCPU is running, just raise an interrupt to trigger the schedule function!! +static int +check_cpu_for_repl(int cpu) { + struct rt_pcpu * ppc = RT_PCPU(cpu); + int flag = 0; //used for interrupt + + while((ppc->rep_size != 0) && ppc->repq[0].re_time < NOW()) { + ppc->repq[0].dvc->cur_budget += ppc->repq[0].re_amount; + if (ppc->repq[0].dvc->cur_budget > ppc->repq[0].dvc->budget) { + ppc->repq[0].dvc->cur_budget = ppc->repq[0].dvc->budget; + } + + while (ppc->repq[0].dvc->next_time <= NOW()) { + ppc->repq[0].dvc->next_time += BUDGET(1) * ppc->repq[0].dvc->period; + } + rt_repq_insert(ppc->repq[0].dvc->vcpu->processor, ppc->repq[0].dvc, ppc->repq[0].dvc->budget); + + if (ppc->repq[0].dvc->level < RT_CUR(cpu)->level) flag = 1; // need to raise an interrupt + + // bug fix 0501 + if (ppc->repq[0].dvc->level != RT_CUR(cpu)->level) { // do not change the current running one + ppc->repq[0].dvc->last_start_time = NOW(); + } + + rt_repq_remove(cpu); + } + + return flag; +} + +//ticked by pcpu tick in pcpu, used in the repq way +static void +repq_tick(void *_cpu) { + unsigned int cpu = (unsigned long) _cpu; + struct rt_pcpu *ppc = RT_PCPU(cpu); + + BUG_ON(current->processor != cpu); + + if (check_cpu_for_repl(cpu)) { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + + set_timer(&ppc->ticker, NOW() + BUDGET(1)); +} + +static int +repq_burn(struct rt_vcpu *dvc, s_time_t now) { + s_time_t delta; + unsigned int consume; + + BUG_ON(dvc != RT_CUR(dvc->vcpu->processor)); + + if (dvc->last_start_time == 0) { + dvc->last_start_time = now; + return 0; + } + + delta = now - dvc->last_start_time; + BUG_ON(delta <= 0); + + consume = ( delta/BUDGET(1) ); + if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++; + + if (consume > dvc->cur_budget) { + dvc->cur_budget = 0; + } else { + dvc->cur_budget -= consume; + } + + return consume; +} + diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_ssps.c xen-4.0.1/xen/common/sched_rt_ssps.c --- xen/xen-4.0.1/xen/common/sched_rt_ssps.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_rt_ssps.c 2011-05-01 19:41:06.000000000 -0600 @@ -0,0 +1,197 @@ +/****************************************************************************** + * Real Time Xen scheduler Framework + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Jaewoo Lee (C) U Penn + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sched_rt.h" +#include "sched_rt_repq.h" + + +static int +check_rdyq(int cpu) { + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + list_for_each(iter, rdyq) { + struct rt_vcpu * iter_dvc = __rdyq_elem(iter); + if (iter_dvc->cur_budget > 0) { + return 1; + } + } + + return 0; +} + +//burn the extra budget on RdyQ +static void +ssps_burn_rdyq(struct rt_vcpu *dvc, s_time_t now) { + s_time_t delta; + unsigned int consume; + struct list_head * rdyq = RDYQ(dvc->vcpu->processor); + struct list_head * iter; + + BUG_ON(dvc != RT_CUR(dvc->vcpu->processor)); + + if (dvc->last_start_time == 0) { + dvc->last_start_time = now; + return; + } + + delta = now - dvc->last_start_time; + BUG_ON(delta <= 0); + + consume = ( delta/BUDGET(1) ); + if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++; + + // burn budgets on RdyQ + list_for_each(iter, rdyq) { + struct rt_vcpu * iter_dvc = __rdyq_elem(iter); + if (iter_dvc->cur_budget > 0) { + //bug fix 0501 + + delta = now - iter_dvc->last_start_time; + // just get repled, skip this VCPU + if (delta < BUDGET(1)) { + continue; + } + if (consume > iter_dvc->cur_budget) { + iter_dvc->cur_budget = 0; + } else { + iter_dvc->cur_budget -= consume; + } + return; // only burn one budget + } + } + + return; +} + +static void +ssps_burn_extra(int cpu, int consume) { + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + // burn budgets on RdyQ + list_for_each(iter, rdyq) { + struct rt_vcpu * iter_dvc = __rdyq_elem(iter); + if (iter_dvc->cur_budget > 0) { + if (consume > iter_dvc->cur_budget) { + iter_dvc->cur_budget = 0; + } else { + iter_dvc->cur_budget -= consume; + } + return; // only burn one budget + } + } + + return; +} + +//pick the first vcpu whose budget is >0 from the runq +static inline struct rt_vcpu * +__ssps_runq_pick(unsigned int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + list_for_each(iter, runq) { + struct rt_vcpu * iter_dvc = __runq_elem(iter); + return iter_dvc; + } + + BUG_ON(1); + return NULL; +} + +// most important function, called every budget time +static struct task_slice +ssps_schedule(s_time_t now) { + const int cpu = smp_processor_id(); + struct list_head *runq = RUNQ(cpu); + struct rt_vcpu *scurr = RT_VCPU(current); + struct rt_vcpu *snext; + struct task_slice ret; + int consume; + int old_budget; + + RT_VCPU_CHECK(current); + + if ((scurr->vcpu->domain->domain_id != 0)) { + if (!is_idle_vcpu(scurr->vcpu)) { + //check how many budget should burn + old_budget = scurr->cur_budget; + if (old_budget > 0) { + consume = repq_burn(scurr, now); + if (consume != 0 && __rdyq_pick(cpu) != NULL && __rdyq_pick(cpu)->level < scurr->level) { + scurr->cur_budget = old_budget; //restore its original budget + ssps_burn_extra(cpu, consume); // burn the one on rdyq instead + } + } else { + ssps_burn_rdyq(scurr, now); + } + } else { + ssps_burn_rdyq(scurr, now); // idle VCPU, still need to burn the ones on rdyq + } + } + + if (vcpu_runnable(current)) { + if (!__vcpu_on_runq(scurr)) { + __runq_insert(cpu, scurr); + } + } else { + BUG_ON(is_idle_vcpu(current) || list_empty(runq)); + if (!__vcpu_on_rdyq(scurr)) { + __rdyq_insert(cpu, scurr); + } + } + + snext = __runq_pick(cpu); + //slack stealing!! + if (is_idle_vcpu(snext->vcpu)) { + if (check_rdyq(snext->vcpu->processor)) { + snext = __ssps_runq_pick(cpu); + } + } + + if (snext->vcpu->domain->domain_id != 0) { + snext->last_start_time = NOW(); + } + + BUG_ON(!__vcpu_on_runq(snext)); + __runq_remove(snext); + + ret.time = BUDGET(1); + ret.task = snext->vcpu; + + RT_VCPU_CHECK(ret.task); + + BUG_ON(!vcpu_runnable(snext->vcpu)); + + return ret; +} + + +const struct rt_scheduler sched_ssps_def = { + .name = "Slack Stealing Periodic Server Scheduler", + .opt_name = "ssps", + .tick = repq_tick, + .vcpu_sleep = NULL, + .vcpu_wake = NULL, + .schedule = ssps_schedule +}; diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_wcps.c xen-4.0.1/xen/common/sched_rt_wcps.c --- xen/xen-4.0.1/xen/common/sched_rt_wcps.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_rt_wcps.c 2011-05-01 13:24:27.000000000 -0600 @@ -0,0 +1,146 @@ +/****************************************************************************** + * Real Time Xen scheduler Framework + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sched_rt.h" +#include "sched_rt_repq.h" + +//burn the extra budget on RdyQ +static void +wcps_burn_rdyq(struct rt_vcpu *dvc, s_time_t now) { + s_time_t delta; + unsigned int consume; + struct list_head * rdyq = RDYQ(dvc->vcpu->processor); + struct list_head * iter; + + BUG_ON(dvc != RT_CUR(dvc->vcpu->processor)); + + if (dvc->last_start_time == 0) { + dvc->last_start_time = now; + return; + } + + delta = now - dvc->last_start_time; + BUG_ON(delta <= 0); + + consume = ( delta/BUDGET(1) ); + if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++; + + // burn budgets on RdyQ + list_for_each(iter, rdyq) { + struct rt_vcpu * iter_dvc = __rdyq_elem(iter); + if (iter_dvc->cur_budget > 0) { + if (consume > iter_dvc->cur_budget) { + iter_dvc->cur_budget = 0; + } else { + iter_dvc->cur_budget -= consume; + } + return; // only burn one budget + } + } + + return; +} + + +static void +wcps_burn_extra(int cpu, int consume) { + struct list_head * rdyq = RDYQ(cpu); + struct list_head * iter; + + // burn budgets on RdyQ + list_for_each(iter, rdyq) { + struct rt_vcpu * iter_dvc = __rdyq_elem(iter); + if (iter_dvc->cur_budget > 0) { + if (consume > iter_dvc->cur_budget) { + iter_dvc->cur_budget = 0; + } else { + iter_dvc->cur_budget -= consume; + } + return; // only burn one budget + } + } + + return; +} + + +// most important function, called every budget time +static struct task_slice +wcps_schedule(s_time_t now) { + const int cpu = smp_processor_id(); + struct list_head *runq = RUNQ(cpu); + struct rt_vcpu *scurr = RT_VCPU(current); + struct rt_vcpu *snext; + struct task_slice ret; + int consume; + + RT_VCPU_CHECK(current); + + if ((scurr->vcpu->domain->domain_id != 0)) { + if (!is_idle_vcpu(scurr->vcpu)) { + consume = repq_burn(scurr, now); + if (consume != 0 && __rdyq_pick(cpu) != NULL && __rdyq_pick(cpu)->level < scurr->level) { + wcps_burn_extra(cpu, consume); // burn the extra budget on rdyq, mimic the idled away behavior + } + } else if (cpu == 1) { + wcps_burn_rdyq(scurr, now); + } + } + + if (vcpu_runnable(current)) { + if (!__vcpu_on_runq(scurr)) { + __runq_insert(cpu, scurr); + } + } else { + BUG_ON(is_idle_vcpu(current) || list_empty(runq)); + if (!__vcpu_on_rdyq(scurr)) { + __rdyq_insert(cpu, scurr); + } + } + + snext = __runq_pick(cpu); + + if (snext->vcpu->domain->domain_id != 0) { + snext->last_start_time = NOW(); + } + + BUG_ON(!__vcpu_on_runq(snext)); + __runq_remove(snext); + + ret.time = BUDGET(1); + ret.task = snext->vcpu; + + RT_VCPU_CHECK(ret.task); + + BUG_ON(!vcpu_runnable(snext->vcpu)); + + return ret; +} + +const struct rt_scheduler sched_wcps_def = { + .name = "Work Conserving Periodic Server Scheduler", + .opt_name = "wcps", + + .tick = repq_tick, + .vcpu_sleep = NULL, + .vcpu_wake = NULL, + .schedule = wcps_schedule, +}; diff -ubrN xen/xen-4.0.1/xen/common/sched_sedf.c xen-4.0.1/xen/common/sched_sedf.c --- xen/xen-4.0.1/xen/common/sched_sedf.c 2010-08-25 04:22:12.000000000 -0600 +++ xen-4.0.1/xen/common/sched_sedf.c 2011-04-09 23:29:38.000000000 -0600 @@ -1429,6 +1429,11 @@ } rc = sedf_adjust_weights(op); + + if (p->domain_id == 0) { + return 1; + } + if ( rc ) return rc; @@ -1453,6 +1458,7 @@ } PRINT(2,"sedf_adjust_finished\n"); + return 0; } diff -ubrN xen/xen-4.0.1/xen/common/sched_ss.c xen-4.0.1/xen/common/sched_ss.c --- xen/xen-4.0.1/xen/common/sched_ss.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_ss.c 2011-01-21 08:38:10.000000000 -0700 @@ -0,0 +1,884 @@ +/****************************************************************************** + * Sporadic Server scheduler for xen + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SS_DOM(_dom) ((struct ss_dom *) (_dom)->sched_priv) +#define SS_PCPU(_c) ((struct ss_pcpu *)per_cpu(schedule_data, _c).sched_priv) +#define SS_VCPU(_vcpu) ((struct ss_vcpu *) (_vcpu)->sched_priv) +#define RUNQ(_cpu) (&(SS_PCPU(_cpu)->runq)) +#define SS_CUR(_cpu) SS_VCPU(per_cpu(schedule_data, _cpu).curr) +#define BUDGET(_b) (MILLISECS(_b)) // time to run for 1 budget, default setting is 1ms = 1 budget + +#define REPQ_CAPACITY 500 // repQ is used for the replenishment + +#define SS_DOM_0_PERIOD 100 +#define SS_IDLE_PERIOD 200 + +#define SS_DOM_BUDGET 25 // default budget, can bu changed via xm sched-ss -d target -b budget -p period +#define SS_DOM_PERIOD 50 + +//used for status +#define IDLE 0 +#define ACTIVE 1 + +//used for replenishment +struct rep_elem { + s_time_t re_time; + uint16_t re_amount; + struct ss_vcpu *svc; +}; + +//physical cpu +struct ss_pcpu { + struct list_head runq; // runQ on the pcpu, organized by linked list + struct rep_elem *repq; //repQ on the pcpu, organized by heap + int rep_size; // current size + int rep_capacity; // upper limit + struct timer ticker; // for preemptive use, tick every budget +}; + +//virtual cpu +struct ss_vcpu { + struct list_head runq_elem; + struct ss_dom *sdom; + struct vcpu *vcpu; + + uint16_t budget; + uint16_t period; + + uint16_t cur_budget; + s_time_t last_start_time; // used for burn_budget + uint16_t burn_total; // used for budget repl + int status; + + s_time_t next_time; // used for repl +}; + +//domain +struct ss_dom { + struct domain *dom; + uint16_t budget; + uint16_t period; +}; + +//global variable, records the number of cpus +struct ss_private { + spinlock_t lock; + uint32_t ncpus; +}; +static struct ss_private ss_priv; + +/* +//used for record, overhead measurement +#define RECORD 4000 +struct record_elem{ + int curr; + int next; + s_time_t enter_base; // enter rep insert time + s_time_t leave_base; // leave rep insert time + s_time_t enter; // enter schedule time + s_time_t leave; // leave schedule time +}; +*/ +struct timer ss_start_timer; // would start after 10s, used only once +int ss_start_flag = 0; // start to record or not +int ss_wake = 0; +/* +int idx = 0; //idx to record +struct record_elem res[RECORD]; // domain_id, time in ms; +//finish for the record +*/ +static void ss_tick(void *_cpu); + +//dump the repq +static void +ss_dump_repq(int cpu) { + int loop = 0; + struct ss_pcpu *spc = SS_PCPU(cpu); + + printk("\n# into %s on cpu %d, now is %lu, size: %d, the repQ is :\n", __func__, cpu, NOW(), spc->rep_size); + for (loop = 0; loop < spc->rep_size; loop++) { + printk("\t[%d. %d]: %d @ %lu\n", + spc->repq[loop].svc->vcpu->domain->domain_id, + spc->repq[loop].svc->vcpu->vcpu_id, + spc->repq[loop].re_amount, + spc->repq[loop].re_time); + } +} + +//dump the virtual cpu +static void +ss_dump_vcpu(struct ss_vcpu *svc) { + printk("\t[%i, %i], (%i, %i), cpu: %i, cur_budget: %i, last_start_time: %lu, burn_total: %i, status %d, next_time: %lu\n", + svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id, svc->budget, svc->period, svc->vcpu->processor, + svc->cur_budget, svc->last_start_time, svc->burn_total, svc->status, svc->next_time); +} + +//inlined code +static inline struct ss_vcpu * +__runq_elem(struct list_head *elem) { + return list_entry(elem, struct ss_vcpu, runq_elem); +} + +//dump the physical cpu +static void +ss_dump_pcpu(int cpu) { + struct list_head *iter; + struct ss_pcpu *spc = SS_PCPU(cpu); + struct list_head *runq = &spc->runq; + struct ss_vcpu *svc = SS_CUR(cpu); + int loop = 0; + + printk("\n# into %s, on cpu: %d, now is: %lu\n", __func__, cpu, NOW()); + + if (svc) { + printk("\trun: "); + ss_dump_vcpu(svc); + } + + list_for_each(iter, runq) { + svc = __runq_elem(iter); + if (svc) { + printk("\t%3d: ", ++loop); + ss_dump_vcpu(svc); + } + } + + ss_dump_repq(cpu); +} +/* +//dump the record out. +static void +ss_dump_record(void) { + int i; + + for (i = 1; i < RECORD; i++) { + printk("%-3d %-3d %13lu %13lu %13lu %13lu\n", res[i].curr, res[i].next, res[i].enter_base, res[i].leave_base, res[i].enter, res[i].leave); + } + ss_dump_pcpu(1); + idx = 0; + start_flag = 0; +} + +*/ +// the current vcpu is on runQ? +static inline int +__vcpu_on_runq(struct ss_vcpu *svc) { + return !list_empty(&svc->runq_elem); +} + +//pick the first vcpu whose budget is >0 from the runq +static inline struct ss_vcpu * +__runq_pick(unsigned int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + list_for_each(iter, runq) { + struct ss_vcpu * iter_svc = __runq_elem(iter); + if (iter_svc->cur_budget > 0) { + return iter_svc; + } + } + + BUG_ON(1); + return NULL; +} + +//insert into the runq, followed a FIFO way. sorted by period +static inline void +__runq_insert(unsigned int cpu, struct ss_vcpu *svc) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + BUG_ON(__vcpu_on_runq(svc)); + BUG_ON(cpu != svc->vcpu->processor); + + list_for_each(iter, runq) { + struct ss_vcpu * iter_svc = __runq_elem(iter); + if (svc->vcpu->domain->domain_id <= iter_svc->vcpu->domain->domain_id) { + break; + } + } + + list_add_tail(&svc->runq_elem, iter); +} + +//remove it from runQ +static inline void +__runq_remove(struct ss_vcpu *svc) { + BUG_ON(!__vcpu_on_runq(svc)); + list_del_init(&svc->runq_elem); +} + +//used for the heap, repQ +static inline int +ss_rep_parent(int childIdx) { + return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1); +} + +//insert into the repQ +static inline void +ss_repq_insert(unsigned int cpu, struct ss_vcpu *svc, int amount) { + struct ss_pcpu * spc = SS_PCPU(cpu); + int childIdx, parentIdx; + + if (amount == 0) { + svc->next_time = 0; + return; + } + + if (svc->next_time == 0) { + printk("\n# in %s, ERROR! svc is:", __func__); + ss_dump_vcpu(svc); + ss_dump_pcpu(cpu); + BUG_ON(1); + } + + if (spc->rep_size == spc->rep_capacity) { + printk("\n# into %s, repQ full!!\n", __func__); + BUG_ON(1); + } + + childIdx = spc->rep_size; + parentIdx = ss_rep_parent(childIdx); + + + while (childIdx > 0 && svc->next_time < spc->repq[parentIdx].re_time) { + spc->repq[childIdx] = spc->repq[parentIdx]; + childIdx = parentIdx; + parentIdx = ss_rep_parent(childIdx); + } + + spc->repq[childIdx].re_time = svc->next_time; + spc->repq[childIdx].re_amount = amount; + spc->repq[childIdx].svc = svc; + spc->rep_size++; +/* + printk("\t add a repl. now: %lu, cpu: %d, re_time: %lu, amount: %d, for cpu [%d, %d]\n", + NOW(), cpu, svc->next_time, amount, svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id); + ss_dump_vcpu(svc); +*/ + svc->next_time = 0; +} + +//remove from the repQ +static inline void +ss_repq_remove(unsigned int cpu) { + struct ss_pcpu * spc = SS_PCPU(cpu); + int childIdx = 1; + int rightChildIdx; + int rootIdx = 0; + struct rep_elem temp; + + BUG_ON(spc->rep_size <= 0); + + spc->repq[0] = spc->repq[spc->rep_size - 1]; + spc->rep_size--; + + temp = spc->repq[0]; + + while (childIdx < spc->rep_size) { + rightChildIdx = childIdx + 1; + if (rightChildIdx < spc->rep_size && spc->repq[rightChildIdx].re_time < spc->repq[childIdx].re_time) { + childIdx = rightChildIdx; + } + if (spc->repq[childIdx].re_time < temp.re_time) { + spc->repq[rootIdx] = spc->repq[childIdx]; + rootIdx = childIdx; + childIdx = 2 * rootIdx + 1; + } else { + break; + } + } + spc->repq[rootIdx] = temp; +} + +//svc should be snext. Doing this is because we can not get snext->period +//scan the runQ to change status, deside next time or amount +static void +ss_scan_runq(unsigned int cpu, struct ss_vcpu *svc) { + struct list_head * runq = RUNQ(cpu); + struct ss_vcpu *cur = svc; + struct list_head * iter; + int re_amount; + + list_for_each(iter, runq) { + struct ss_vcpu * iter_svc = __runq_elem(iter); + if (is_idle_vcpu(iter_svc->vcpu)) { + return; + } + + if (iter_svc->vcpu->domain->domain_id < cur->vcpu->domain->domain_id) { + if (iter_svc->status == ACTIVE) { + //change from ACTIVE to IDLE, decide the repl amount + BUG_ON(iter_svc->next_time == 0); + iter_svc->status = IDLE; + re_amount = iter_svc->burn_total; + iter_svc->burn_total = 0; + ss_repq_insert(cpu, iter_svc, re_amount); + } + } else { + if (iter_svc->status == IDLE) { + //mark it to be ACTIVE, decide the repl time + iter_svc->status = ACTIVE; + BUG_ON(iter_svc->next_time != 0); + iter_svc->next_time = NOW() + BUDGET(iter_svc->period); + } + } + } +} + +//dump dump function +static void +ss_dump(void) { + printk("# into %s.\n", __func__); +} + +//burn the scurr budget +static void +burn_budgets(struct ss_vcpu *svc, s_time_t now) { + s_time_t delta; + unsigned int consume; + + BUG_ON(svc != SS_CUR(svc->vcpu->processor)); + + if (svc->last_start_time == 0) { + svc->last_start_time = now; + return; + } + + delta = now - svc->last_start_time; + BUG_ON(delta <= 0); + + consume = ( delta/BUDGET(1) ); + if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++; + if (consume > svc->cur_budget) { + //printk("\n# into %s, this should not happen!\n", __func__); + consume = svc->cur_budget; + } + + svc->cur_budget -= consume; + svc->burn_total += consume; +} + +//init the physical cpu +static int +ss_pcpu_init(int cpu) { + struct ss_pcpu *spc; + unsigned long flags; + + /* Allocate per-PCPU info */ + spc = xmalloc(struct ss_pcpu); + if (spc == NULL) + return -1; + memset(spc, 0, sizeof (*spc)); + + spin_lock_irqsave(&ss_priv.lock, flags); + + if (ss_priv.ncpus < cpu) + ss_priv.ncpus = cpu + 1; + + init_timer(&spc->ticker, ss_tick, (void *) (unsigned long) cpu, cpu); + INIT_LIST_HEAD(&spc->runq); + per_cpu(schedule_data, cpu).sched_priv = spc; + + BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr)); + + spc->rep_capacity = REPQ_CAPACITY; + spc->repq = xmalloc_array(struct rep_elem, spc->rep_capacity); + BUG_ON(spc->repq == NULL); + spc->rep_size = 0; + + spin_unlock_irqrestore(&ss_priv.lock, flags); + + printk("\n# finish %s, init cpu: %d\n", __func__, cpu); + + return 0; +} + +//check the vcpu +static inline void +__ss_vcpu_check(struct vcpu *vc) { + struct ss_vcpu * const svc = SS_VCPU(vc); + struct ss_dom * const sdom = svc->sdom; + + BUG_ON(svc->vcpu != vc); + BUG_ON(sdom != SS_DOM(vc->domain)); + if (sdom) { + BUG_ON(is_idle_vcpu(vc)); + BUG_ON(sdom->dom != vc->domain); + } else { + BUG_ON(!is_idle_vcpu(vc)); + } +} +#define SS_VCPU_CHECK(_vc) (__ss_vcpu_check(_vc)) + +//pick a cpu to run, used to migrate from different cpus +static int +ss_cpu_pick(struct vcpu *vc) { + cpumask_t cpus; + int cpu; + + cpus_and(cpus, cpu_online_map, vc->cpu_affinity); + + if (vc->domain->domain_id == 0 && vc->processor != 0) { + return cycle_cpu(vc->processor, cpus); + } + + cpu = cpu_isset(vc->processor, cpus) + ? vc->processor + : cycle_cpu(vc->processor, cpus); + + return cpu; +} + +//check the current repQ to see if a repl needs to happen +static int +check_cpu_for_repl(int cpu) { + int ret = 0; + struct ss_pcpu * spc = SS_PCPU(cpu); + + while((spc->rep_size != 0) && spc->repq[0].re_time < NOW()) { + spc->repq[0].svc->cur_budget += spc->repq[0].re_amount; + if (spc->repq[0].svc->cur_budget > spc->repq[0].svc->budget) { + printk("\n# into %s, this should not happen!\n", __func__); + spc->repq[0].svc->cur_budget = spc->repq[0].svc->budget; + } + ss_repq_remove(cpu); + ret = 1; + } + + return ret; +} + +//if a repl happens, do we need an interrupt? (higher priority than current running one) +static void +check_runq_for_interrupt(int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + struct ss_vcpu * cur = SS_CUR(cpu); + + list_for_each(iter, runq) { + struct ss_vcpu * iter_svc = __runq_elem(iter); + if (iter_svc->vcpu->domain->domain_id >= cur->vcpu->domain->domain_id) { + return; + } else if (iter_svc->cur_budget > 0) { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + } +} + +//init the virtual cpu +static int +ss_vcpu_init(struct vcpu *vc) { + struct domain * const dom = vc->domain; + struct ss_dom *sdom = SS_DOM(dom); + struct ss_vcpu *svc; + + /* Allocate per-VCPU info */ + svc = xmalloc(struct ss_vcpu); + if (svc == NULL) { + return -1; + } + memset(svc, 0, sizeof (*svc)); + + INIT_LIST_HEAD(&svc->runq_elem); + svc->sdom = sdom; + svc->vcpu = vc; + svc->budget = is_idle_vcpu(vc)? SS_IDLE_PERIOD: sdom->budget; + svc->period = is_idle_vcpu(vc)? SS_IDLE_PERIOD: sdom->period; + svc->cur_budget = svc->budget; + + svc->last_start_time = 0; + svc->burn_total = 0; + svc->next_time = 0; + svc->status = IDLE; + vc->sched_priv = svc; + + /* Allocate per-PCPU info */ + if (unlikely(!SS_PCPU(vc->processor))) { + if (ss_pcpu_init(vc->processor) != 0) + return -1; + } + + SS_VCPU_CHECK(vc); + + printk("\n# into %s, vcpu init: ", __func__); + ss_dump_vcpu(svc); + + return 0; +} + +//destory the vcpu +static void +ss_vcpu_destroy(struct vcpu *vc) { + struct ss_vcpu * const svc = SS_VCPU(vc); + struct ss_dom * const sdom = svc->sdom; + + printk("\n# into %s, vcpu destroy: ", __func__); + ss_dump_vcpu(svc); + + BUG_ON(sdom == NULL); + BUG_ON(!list_empty(&svc->runq_elem)); + + xfree(svc); +} + +//sleep the vcpu +static void +ss_vcpu_sleep(struct vcpu *vc) { + struct ss_vcpu * const svc = SS_VCPU(vc); + + if (vc->domain->domain_id != 0) { + printk("\n# into %s: now %lu, sleep vcpu: \n", __func__, NOW()); + ss_dump_vcpu(svc); + } + + BUG_ON(is_idle_vcpu(vc)); + + if (per_cpu(schedule_data, vc->processor).curr == vc) { + cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); + } else if (__vcpu_on_runq(svc)) { + //BUG_ON(svc->status == ACTIVE); + __runq_remove(svc); + } +} + +//wake up the vcpu, insert it into runq, raise a softirq +static void +ss_vcpu_wake(struct vcpu *vc) { + struct ss_vcpu * const svc = SS_VCPU(vc); + const unsigned int cpu = vc->processor; + + BUG_ON(is_idle_vcpu(vc)); + + if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) { + //printk("\n# why wake up running? migration?\n"); + return; + } + if (unlikely(__vcpu_on_runq(svc))) { + //printk("\n# why wake up on runq ones? migration?\n"); + return; + } + + __runq_insert(cpu, svc); + if (svc->vcpu->domain->domain_id < SS_CUR(cpu)->vcpu->domain->domain_id) { + if (svc->vcpu->processor == 1 && ss_start_flag == 1) { + ss_wake++; + } + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } +} + +//used for record data, for overhead measurement +static void +ss_ss_finish_timer(void * temp) { + ss_start_flag = 0; + printk("wake up %d times\n", ss_wake); + ss_wake = 0; +} + +static void +ss_ss_start_timer(void * temp) { + ss_start_flag = 1; + init_timer(&ss_start_timer, ss_ss_finish_timer, (void *) (unsigned int) 1, 1); + set_timer(&ss_start_timer, NOW() + MILLISECS(10000)); +} + +//adjust the domain's budget & period, also used to trigger the record +static int +ss_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) { + struct ss_dom * const sdom = SS_DOM(d); + unsigned long flags; + struct ss_vcpu *svc = SS_VCPU(d->vcpu[0]); + + if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) { + op->u.ss.budget = sdom->budget; + op->u.ss.period = sdom->period; + //ss_dump_vcpu(svc); + } else { + BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo); + + spin_lock_irqsave(&ss_priv.lock, flags); + if (op->u.ss.budget != 0) { + sdom->budget = op->u.ss.budget; + svc->budget = op->u.ss.budget; + } + if (op->u.ss.period != 0) { + sdom->period = op->u.ss.period; + svc->period = op->u.ss.period; + } + svc->cur_budget = svc->budget; + spin_unlock_irqrestore(&ss_priv.lock, flags); + + if (svc->vcpu->domain->domain_id == 0) { + init_timer(&ss_start_timer, ss_ss_start_timer, (void *) (unsigned int) 1, 1); + set_timer(&ss_start_timer, NOW() + MILLISECS(5000)); + return 1; + } + + //ss_dump_vcpu(svc); + } + + return 0; +} + +//init a dom +static int +ss_dom_init(struct domain *dom) { + struct ss_dom *sdom; + + printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id); + + if (is_idle_domain(dom)) { + printk("\t# init an idle domain\n"); + return 0; + } + + sdom = xmalloc(struct ss_dom); + if (sdom == NULL) + return -ENOMEM; + memset(sdom, 0, sizeof (*sdom)); + + /* Initialize budget and period */ + sdom->dom = dom; + + switch(dom->domain_id) { + case 32767: + sdom->budget = SS_IDLE_PERIOD; + sdom->period = SS_IDLE_PERIOD; + break; + case 0: + sdom->budget = SS_DOM_0_PERIOD; + sdom->period = SS_DOM_0_PERIOD; + break; + default: + sdom->budget = SS_DOM_BUDGET; + sdom->period = SS_DOM_PERIOD; + break; + } + + dom->sched_priv = sdom; + + return 0; +} + +//destory a domain +static void +ss_dom_destroy(struct domain *dom) { + printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id); + xfree(SS_DOM(dom)); +} + +//ticked by pcpu tick in pcpu. +static void +ss_tick(void *_cpu) { + unsigned int cpu = (unsigned long) _cpu; + struct ss_pcpu *spc = SS_PCPU(cpu); + + BUG_ON(current->processor != cpu); + + if (check_cpu_for_repl(cpu)) { + check_runq_for_interrupt(cpu); + } + + if (ss_cpu_pick(current) != cpu) { + set_bit(_VPF_migrating, ¤t->pause_flags); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + + set_timer(&spc->ticker, NOW() + BUDGET(1)); +} + +// most important function, called every budget time +static struct task_slice +ss_schedule(s_time_t now) { + const int cpu = smp_processor_id(); + struct list_head *runq = RUNQ(cpu); + struct ss_vcpu *scurr = SS_VCPU(current); + struct ss_vcpu *snext; + struct task_slice ret; + int re_amount; + + SS_VCPU_CHECK(current); +/* +// for record + if (smp_processor_id() == 1 && start_flag == 1) { + if(is_idle_vcpu(scurr->vcpu)) res[idx].curr = 10; + else res[idx].curr = scurr->vcpu->domain->domain_id; + res[idx].enter_base = NOW(); + res[idx].leave_base = NOW(); + res[idx].enter = NOW(); + } +*/ + if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) { + //if (!is_idle_vcpu(scurr->vcpu)) { + burn_budgets(scurr, now); + } + + if (vcpu_runnable(current)) { + __runq_insert(cpu, scurr); + } else { + BUG_ON(is_idle_vcpu(current) || list_empty(runq)); + } + + snext = __runq_pick(cpu); + + __runq_remove(snext); + +//context switch do happen!, and snext is not an idle vcpu + if (cpu == 1 && snext != scurr) { + //if (snext != scurr) { + //for the scurr: + //if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) { + if (!is_idle_vcpu(scurr->vcpu)) { + BUG_ON(scurr->status != ACTIVE); + BUG_ON(scurr->next_time == 0); + scurr->status = IDLE; + re_amount = scurr->burn_total; + scurr->burn_total = 0; + //printk("\n# into %s, change status to IDLE, decide repl amount here! now is %lu, for vcpu[%d, %d], re_amount is: %d, re_time is %lu\n", + // __func__, NOW(), scurr->vcpu->domain->domain_id, scurr->vcpu->vcpu_id, re_amount, scurr->next_time); + ss_repq_insert(cpu, scurr, re_amount); + } + + //for the snext: + //if (!is_idle_vcpu(snext->vcpu) && snext->vcpu->domain->domain_id != 0) { + if (!is_idle_vcpu(snext->vcpu)) { + if (snext->status == IDLE) { + BUG_ON(snext->next_time != 0); + snext->status = ACTIVE; + snext->next_time = NOW() + BUDGET(snext->period); + //printk("\n# into %s, change status to ACTIVE, decide repl time here! now is %lu, for vcpu [%d, %d], re_time is %lu\n", + // __func__, NOW(), snext->vcpu->domain->domain_id, snext->vcpu->vcpu_id, snext->next_time); + } + } + + //scan the whole runq + ss_scan_runq(cpu, snext); + } + + if (cpu == 1 && snext->vcpu->domain->domain_id != 0) { + snext->last_start_time = NOW(); + } + + ret.time = (is_idle_vcpu(snext->vcpu) ? -1 : BUDGET(1)); + //ret.time = BUDGET(1); + ret.task = snext->vcpu; + + SS_VCPU_CHECK(ret.task); + + BUG_ON(!vcpu_runnable(snext->vcpu)); + //printk("now is %lu\n", now); + +/* +// for record + + if (smp_processor_id() == 1 && start_flag == 1) { + if(is_idle_vcpu(snext->vcpu)) res[idx].next = 10; + else res[idx].next = snext->vcpu->domain->domain_id; + res[idx].leave = NOW(); + if(idx++ >= RECORD) { + ss_dump_pcpu(1); + ss_dump_record(); + } + } +*/ + return ret; +} + +//init the global data +static void +ss_init(void) { + printk("\n# into %s\n", __func__); + spin_lock_init(&ss_priv.lock); + ss_priv.ncpus = 0; +} + +/* Tickers cannot be kicked until SMP subsystem is alive. */ +static __init int +ss_start_tickers(void) { + struct ss_pcpu *spc; + unsigned int cpu; + + printk("\n# into %s, start all tickers right now\n", __func__); + + if (ss_priv.ncpus == 0) + return 0; + + for_each_online_cpu(cpu) { + spc = SS_PCPU(cpu); + set_timer(&spc->ticker, NOW() + BUDGET(1)); + } + + return 0; +} +__initcall(ss_start_tickers); + +static void ss_tick_suspend(void) { + struct ss_pcpu *spc; + + printk("\n# into %s, why is this called?\n", __func__); + + spc = SS_PCPU(smp_processor_id()); + + stop_timer(&spc->ticker); +} + +static void ss_tick_resume(void) { + struct ss_pcpu *spc; + uint64_t now = NOW(); + + printk("\n# into %s, why is this called?\n", __func__); + + spc = SS_PCPU(smp_processor_id()); + + set_timer(&spc->ticker, now + BUDGET(1)); +} + +const struct scheduler sched_ss_def = { + .name = "Sporadic Server Scheduler", + .opt_name = "ss", + .sched_id = XEN_SCHEDULER_SS, + + .init_domain = ss_dom_init, + .destroy_domain = ss_dom_destroy, + + .init_vcpu = ss_vcpu_init, + .destroy_vcpu = ss_vcpu_destroy, + + .init = ss_init, + + .pick_cpu = ss_cpu_pick, + + .tick_suspend = ss_tick_suspend, + .tick_resume = ss_tick_resume, + + .do_schedule = ss_schedule, + + .sleep = ss_vcpu_sleep, + .wake = ss_vcpu_wake, + + .adjust = ss_dom_cntl, + + .dump_cpu_state = ss_dump_pcpu, + .dump_settings = ss_dump, +}; diff -ubrN xen/xen-4.0.1/xen/common/sched_ss_rtas11.c xen-4.0.1/xen/common/sched_ss_rtas11.c --- xen/xen-4.0.1/xen/common/sched_ss_rtas11.c 1969-12-31 17:00:00.000000000 -0700 +++ xen-4.0.1/xen/common/sched_ss_rtas11.c 2010-12-16 00:46:03.000000000 -0700 @@ -0,0 +1,893 @@ +/****************************************************************************** + * Sporadic Server scheduler for xen + * + * by Sisu Xi (C) 2010 Washington University in St. Louis + * based on code by Mark Williamson (C) 2004 Intel Research Cambridge + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SS_DOM(_dom) ((struct ss_dom *) (_dom)->sched_priv) +#define SS_PCPU(_c) ((struct ss_pcpu *)per_cpu(schedule_data, _c).sched_priv) +#define SS_VCPU(_vcpu) ((struct ss_vcpu *) (_vcpu)->sched_priv) +#define RUNQ(_cpu) (&(SS_PCPU(_cpu)->runq)) +#define SS_CUR(_cpu) SS_VCPU(per_cpu(schedule_data, _cpu).curr) +#define BUDGET(_b) (MILLISECS(_b)) // time to run for 1 budget, default setting is 1ms = 1 budget + +#define REPQ_CAPACITY 500 // repQ is used for the replenishment + +#define SS_DOM_0_PERIOD 100 +#define SS_IDLE_PERIOD 200 + +#define SS_DOM_BUDGET 25 // default budget, can bu changed via xm sched-ss -d target -b budget -p period +#define SS_DOM_PERIOD 50 + +//used for status +#define IDLE 0 +#define ACTIVE 1 + +//used for replenishment +struct rep_elem { + s_time_t re_time; + uint16_t re_amount; + struct ss_vcpu *svc; +}; + +//physical cpu +struct ss_pcpu { + struct list_head runq; // runQ on the pcpu, organized by linked list + struct rep_elem *repq; //repQ on the pcpu, organized by heap + int rep_size; // current size, for later dynamic reqQ use. currently set equals to capacity + int rep_capacity; // upper limit + struct timer ticker; // for preemptive use, tick every budget +}; + +//virtual cpu +struct ss_vcpu { + struct list_head runq_elem; + struct ss_dom *sdom; + struct vcpu *vcpu; + + uint16_t budget; + uint16_t period; + + uint16_t cur_budget; + s_time_t last_start_time; // used for burn_budget + uint16_t burn_total; // used for budget repl + int status; + + s_time_t next_time; // used for repl +}; + +//domain +struct ss_dom { + struct domain *dom; + uint16_t budget; + uint16_t period; +}; + +//global variable, records the number of cpus +struct ss_private { + spinlock_t lock; // used for init + uint32_t ncpus; //number of physical cpus +}; +static struct ss_private ss_priv; + +//used for record, overhead measurement +#define RECORD 4000 +struct record_elem{ + int curr; + int next; + s_time_t enter_base; // enter rep insert time + s_time_t leave_base; // leave rep insert time + s_time_t enter; // enter schedule time + s_time_t leave; // leave schedule time +}; + +struct timer start_timer; // would start after 10s, used only once +int start_flag = 0; // start to record or not +int idx = 0; //idx to record +struct record_elem res[RECORD]; // domain_id, time in ms; +//finish for the record + +static void ss_tick(void *_cpu); + +//dump the repq +static void +ss_dump_repq(int cpu) { + int loop = 0; + struct ss_pcpu *spc = SS_PCPU(cpu); + + printk("\n# into %s on cpu %d, now is %lu, size: %d, the repQ is :\n", __func__, cpu, NOW(), spc->rep_size); + for (loop = 0; loop < spc->rep_size; loop++) { + printk("\t[%d. %d]: %d @ %lu\n", + spc->repq[loop].svc->vcpu->domain->domain_id, + spc->repq[loop].svc->vcpu->vcpu_id, + spc->repq[loop].re_amount, + spc->repq[loop].re_time); + } +} + +//dump the virtual cpu +static void +ss_dump_vcpu(struct ss_vcpu *svc) { + printk("\t[%i, %i], (%i, %i), cpu: %i, cur_budget: %i, last_start_time: %lu, burn_total: %i, status %d, next_time: %lu\n", + svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id, svc->budget, svc->period, svc->vcpu->processor, + svc->cur_budget, svc->last_start_time, svc->burn_total, svc->status, svc->next_time); +} + +//inlined code +static inline struct ss_vcpu * +__runq_elem(struct list_head *elem) { + return list_entry(elem, struct ss_vcpu, runq_elem); +} + +//dump the physical cpu +static void +ss_dump_pcpu(int cpu) { + struct list_head *iter; + struct ss_pcpu *spc = SS_PCPU(cpu); + struct list_head *runq = &spc->runq; + struct ss_vcpu *svc = SS_CUR(cpu); + int loop = 0; + + printk("\n# into %s, on cpu: %d, now is: %lu\n", __func__, cpu, NOW()); + + if (svc) { + printk("\trun: "); + ss_dump_vcpu(svc); + } + + list_for_each(iter, runq) { + svc = __runq_elem(iter); + if (svc) { + printk("\t%3d: ", ++loop); + ss_dump_vcpu(svc); + } + } + + ss_dump_repq(cpu); +} + +//dump the record out. +static void +ss_dump_record(void) { + int i; + + for (i = 1; i < RECORD; i++) { + printk("%-3d %-3d %13lu %13lu %13lu %13lu\n", res[i].curr, res[i].next, res[i].enter_base, res[i].leave_base, res[i].enter, res[i].leave); + } + //ss_dump_pcpu(1); + idx = 0; + start_flag = 0; +} + +// the current vcpu is on runQ? +static inline int +__vcpu_on_runq(struct ss_vcpu *svc) { + return !list_empty(&svc->runq_elem); +} + +//pick the first vcpu whose budget is >0 from the runq +static inline struct ss_vcpu * +__runq_pick(unsigned int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + list_for_each(iter, runq) { + struct ss_vcpu * iter_svc = __runq_elem(iter); + if (iter_svc->cur_budget > 0) { + return iter_svc; + } + } + + BUG_ON(1); + return NULL; +} + +//insert into the runq, followed a FIFO way. sorted by period +static inline void +__runq_insert(unsigned int cpu, struct ss_vcpu *svc) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + + BUG_ON(__vcpu_on_runq(svc)); + BUG_ON(cpu != svc->vcpu->processor); + + list_for_each(iter, runq) { + struct ss_vcpu * iter_svc = __runq_elem(iter); + if (svc->period <= iter_svc->period) { + break; + } + } + + list_add_tail(&svc->runq_elem, iter); +} + +//remove it from runQ +static inline void +__runq_remove(struct ss_vcpu *svc) { + BUG_ON(!__vcpu_on_runq(svc)); + list_del_init(&svc->runq_elem); +} + +//used for the heap, repQ +static inline int +ss_rep_parent(int childIdx) { + return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1); +} + +//insert into the repQ +static inline void +ss_repq_insert(unsigned int cpu, struct ss_vcpu *svc, int amount) { + struct ss_pcpu * spc = SS_PCPU(cpu); + int childIdx, parentIdx; + + if (amount == 0) { + svc->next_time = 0; + return; + } + + if (svc->next_time == 0) { + printk("\n# in %s, ERROR! svc is:", __func__); + ss_dump_vcpu(svc); + ss_dump_pcpu(cpu); + BUG_ON(1); + } + + if (spc->rep_size == spc->rep_capacity) { + printk("\n# into %s, repQ full!!\n", __func__); + BUG_ON(1); + } + + childIdx = spc->rep_size; + parentIdx = ss_rep_parent(childIdx); + + + while (childIdx > 0 && svc->next_time < spc->repq[parentIdx].re_time) { + spc->repq[childIdx] = spc->repq[parentIdx]; + childIdx = parentIdx; + parentIdx = ss_rep_parent(childIdx); + } + + spc->repq[childIdx].re_time = svc->next_time; + spc->repq[childIdx].re_amount = amount; + spc->repq[childIdx].svc = svc; + spc->rep_size++; +/* + printk("\t add a repl. now: %lu, cpu: %d, re_time: %lu, amount: %d, for cpu [%d, %d]\n", + NOW(), cpu, svc->next_time, amount, svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id); + ss_dump_vcpu(svc); +*/ + svc->next_time = 0; +} + +//remove from the repQ +static inline void +ss_repq_remove(unsigned int cpu) { + struct ss_pcpu * spc = SS_PCPU(cpu); + int childIdx = 1; + int rightChildIdx; + int rootIdx = 0; + struct rep_elem temp; + + BUG_ON(spc->rep_size <= 0); + + spc->repq[0] = spc->repq[spc->rep_size - 1]; + spc->rep_size--; + + temp = spc->repq[0]; + + while (childIdx < spc->rep_size) { + rightChildIdx = childIdx + 1; + if (rightChildIdx < spc->rep_size && spc->repq[rightChildIdx].re_time < spc->repq[childIdx].re_time) { + childIdx = rightChildIdx; + } + if (spc->repq[childIdx].re_time < temp.re_time) { + spc->repq[rootIdx] = spc->repq[childIdx]; + rootIdx = childIdx; + childIdx = 2 * rootIdx + 1; + } else { + break; + } + } + spc->repq[rootIdx] = temp; +} + +//svc should be snext. Doing this is because we can not get snext->period +//scan the runQ to change status, deside next time or amount +static void +ss_scan_runq(unsigned int cpu, struct ss_vcpu *svc) { + struct list_head * runq = RUNQ(cpu); + struct ss_vcpu *cur = svc; + struct list_head * iter; + int re_amount; + + list_for_each(iter, runq) { + struct ss_vcpu * iter_svc = __runq_elem(iter); + if (is_idle_vcpu(iter_svc->vcpu)) { + return; + } + + //those who has higher priority but run out of budget + if (iter_svc->period < cur->period) { + if (iter_svc->status == ACTIVE) { + //change from ACTIVE to IDLE, decide the repl amount + BUG_ON(iter_svc->next_time == 0); + iter_svc->status = IDLE; + re_amount = iter_svc->burn_total; + iter_svc->burn_total = 0; + ss_repq_insert(cpu, iter_svc, re_amount); + } + } + //those who has lower priority, should all be set to IDLE. On runQ means it has work to do!!! + else { + /* + if (iter_svc->status == IDLE) { + //mark it to be ACTIVE, decide the repl time + iter_svc->status = ACTIVE; + BUG_ON(iter_svc->next_time != 0); + iter_svc->next_time = NOW() + BUDGET(iter_svc->period); + } + */ + // modification made according to RTAS 10 paper + if (iter_svc->status == ACTIVE) { + //mark it to be IDLE, decide the repl amount + BUG_ON(iter_svc->next_time == 0); + iter_svc->status = IDLE; + re_amount = iter_svc->burn_total; + iter_svc->burn_total = 0; + ss_repq_insert(cpu, iter_svc, re_amount); + printk("# into %s, Mark lower running CPU to be IDLE!\n", __func__); + } + } + } +} + +//dump dump function +static void +ss_dump(void) { + printk("# into %s. Did Nothing\n", __func__); +} + +//burn the scurr budget +static void +burn_budgets(struct ss_vcpu *svc, s_time_t now) { + s_time_t delta; + unsigned int consume; + + BUG_ON(svc != SS_CUR(svc->vcpu->processor)); + + if (svc->last_start_time == 0) { + svc->last_start_time = now; + return; + } + + delta = now - svc->last_start_time; + BUG_ON(delta <= 0); + + consume = ( delta/BUDGET(1) ); + if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++; + if (consume > svc->cur_budget) { + printk("\n# into %s, consumed more than cur budget!\n", __func__); + consume = svc->cur_budget; + } + + svc->cur_budget -= consume; + svc->burn_total += consume; +} + +//init the physical cpu +static int +ss_pcpu_init(int cpu) { + struct ss_pcpu *spc; + unsigned long flags; + + /* Allocate per-PCPU info */ + spc = xmalloc(struct ss_pcpu); + if (spc == NULL) + return -1; + memset(spc, 0, sizeof (*spc)); + + spin_lock_irqsave(&ss_priv.lock, flags); + + if (ss_priv.ncpus < cpu) + ss_priv.ncpus = cpu + 1; + + init_timer(&spc->ticker, ss_tick, (void *) (unsigned long) cpu, cpu); + INIT_LIST_HEAD(&spc->runq); + per_cpu(schedule_data, cpu).sched_priv = spc; + + BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr)); + + spc->rep_capacity = REPQ_CAPACITY; + spc->repq = xmalloc_array(struct rep_elem, spc->rep_capacity); + BUG_ON(spc->repq == NULL); + spc->rep_size = 0; + + spin_unlock_irqrestore(&ss_priv.lock, flags); + + printk("\n# finish %s, init cpu: %d\n", __func__, cpu); + + return 0; +} + +//check the vcpu +static inline void +__ss_vcpu_check(struct vcpu *vc) { + struct ss_vcpu * const svc = SS_VCPU(vc); + struct ss_dom * const sdom = svc->sdom; + + BUG_ON(svc->vcpu != vc); + BUG_ON(sdom != SS_DOM(vc->domain)); + if (sdom) { + BUG_ON(is_idle_vcpu(vc)); + BUG_ON(sdom->dom != vc->domain); + } else { + BUG_ON(!is_idle_vcpu(vc)); + } +} +#define SS_VCPU_CHECK(_vc) (__ss_vcpu_check(_vc)) + +//pick a cpu to run, used to migrate from different cpus +static int +ss_cpu_pick(struct vcpu *vc) { + cpumask_t cpus; + int cpu; + + cpus_and(cpus, cpu_online_map, vc->cpu_affinity); + + if (vc->domain->domain_id == 0 && vc->processor != 0) { + return cycle_cpu(vc->processor, cpus); + } + + cpu = cpu_isset(vc->processor, cpus) + ? vc->processor + : cycle_cpu(vc->processor, cpus); + + return cpu; +} + +//check the current repQ to see if a repl needs to happen +static int +check_cpu_for_repl(int cpu) { +// int ret = 0; + struct ss_pcpu * spc = SS_PCPU(cpu); + int flag = 0; //used for interrupt + int priority = SS_CUR(cpu)->period; // current running vcpu's period + + while((spc->rep_size != 0) && spc->repq[0].re_time < NOW()) { + spc->repq[0].svc->cur_budget += spc->repq[0].re_amount; + if (spc->repq[0].svc->cur_budget > spc->repq[0].svc->budget) { + //printk("\n# into %s, repl to more than init budget!\n", __func__); + spc->repq[0].svc->cur_budget = spc->repq[0].svc->budget; + } + if (flag == 0 && spc->repq[0].svc->period < priority) { + flag = 1; // need interrupt + } + ss_repq_remove(cpu); +// ret = 1; + } + + return flag; +} + +/* +//if a repl happens, do we need an interrupt? (higher priority than current running one) +static void +check_runq_for_interrupt(int cpu) { + struct list_head * runq = RUNQ(cpu); + struct list_head * iter; + struct ss_vcpu * cur = SS_CUR(cpu); + + list_for_each(iter, runq) { + struct ss_vcpu * iter_svc = __runq_elem(iter); + if (iter_svc->period >= cur->period) { + return; + } else if (iter_svc->cur_budget > 0) { + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + } +} +*/ + +//init the virtual cpu +static int +ss_vcpu_init(struct vcpu *vc) { + struct domain * const dom = vc->domain; + struct ss_dom *sdom = SS_DOM(dom); + struct ss_vcpu *svc; + + /* Allocate per-VCPU info */ + svc = xmalloc(struct ss_vcpu); + if (svc == NULL) { + return -1; + } + memset(svc, 0, sizeof (*svc)); + + INIT_LIST_HEAD(&svc->runq_elem); + svc->sdom = sdom; + svc->vcpu = vc; + svc->budget = is_idle_vcpu(vc)? SS_IDLE_PERIOD: sdom->budget; + svc->period = is_idle_vcpu(vc)? SS_IDLE_PERIOD: sdom->period; + svc->cur_budget = svc->budget; + + svc->last_start_time = 0; + svc->burn_total = 0; + svc->next_time = 0; + svc->status = IDLE; + vc->sched_priv = svc; + + /* Allocate per-PCPU info */ + if (unlikely(!SS_PCPU(vc->processor))) { + if (ss_pcpu_init(vc->processor) != 0) + return -1; + } + + SS_VCPU_CHECK(vc); + + printk("\n# into %s, vcpu init: ", __func__); + ss_dump_vcpu(svc); + + return 0; +} + +//destory the vcpu +static void +ss_vcpu_destroy(struct vcpu *vc) { + struct ss_vcpu * const svc = SS_VCPU(vc); + struct ss_dom * const sdom = svc->sdom; + + printk("\n# into %s, vcpu destroy: ", __func__); + ss_dump_vcpu(svc); + + BUG_ON(sdom == NULL); + BUG_ON(!list_empty(&svc->runq_elem)); + + xfree(svc); +} + +//sleep the vcpu +static void +ss_vcpu_sleep(struct vcpu *vc) { + struct ss_vcpu * const svc = SS_VCPU(vc); + +/* + if (vc->domain->domain_id != 0) { + printk("\n# into %s: now %lu, sleep vcpu: \n", __func__, NOW()); + ss_dump_vcpu(svc); + } +*/ + BUG_ON(is_idle_vcpu(vc)); + + if (per_cpu(schedule_data, vc->processor).curr == vc) { + cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); + } else if (__vcpu_on_runq(svc)) { + //BUG_ON(svc->status == ACTIVE); + __runq_remove(svc); + } +} + +//wake up the vcpu, insert it into runq, raise a softirq +static void +ss_vcpu_wake(struct vcpu *vc) { + struct ss_vcpu * const svc = SS_VCPU(vc); + const unsigned int cpu = vc->processor; + + BUG_ON(is_idle_vcpu(vc)); + + if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) { + //printk("\n# why wake up running? migration?\n"); + return; + } + if (unlikely(__vcpu_on_runq(svc))) { + //printk("\n# why wake up on runq ones? migration?\n"); + return; + } + +/* + if (smp_processor_id() == 1) { + printk("%s, domain %d, now %lu\n", __func__, vc->domain->domain_id, NOW()/1000000); + } +*/ + __runq_insert(cpu, svc); + //if (svc->period < SS_CUR(cpu)->period) + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); +} + +//used for record data, for overhead measurement +static void +ss_start_timer(void * temp) { + start_flag = 1; +} + +//adjust the domain's budget & period, also used to trigger the record +static int +ss_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) { + struct ss_dom * const sdom = SS_DOM(d); + unsigned long flags; + struct ss_vcpu *svc = SS_VCPU(d->vcpu[0]); + + if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) { + op->u.ss.budget = sdom->budget; + op->u.ss.period = sdom->period; + //ss_dump_vcpu(svc); + } else { + BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo); + + spin_lock_irqsave(&ss_priv.lock, flags); + if (op->u.ss.budget != 0) { + sdom->budget = op->u.ss.budget; + svc->budget = op->u.ss.budget; + } + if (op->u.ss.period != 0) { + sdom->period = op->u.ss.period; + svc->period = op->u.ss.period; + } + svc->cur_budget = svc->budget; + spin_unlock_irqrestore(&ss_priv.lock, flags); + + if (svc->vcpu->domain->domain_id == 0) { + printk("into %s, start to record now!\n", __func__); + init_timer(&start_timer, ss_start_timer, (void *) (unsigned int) 1, 1); + set_timer(&start_timer, NOW() + MILLISECS(10000)); + } + + //ss_dump_vcpu(svc); + } + + return 0; +} + +//init a dom +static int +ss_dom_init(struct domain *dom) { + struct ss_dom *sdom; + + printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id); + + if (is_idle_domain(dom)) { + printk("\t# init an idle domain\n"); + return 0; + } + + sdom = xmalloc(struct ss_dom); + if (sdom == NULL) + return -ENOMEM; + memset(sdom, 0, sizeof (*sdom)); + + /* Initialize budget and period */ + sdom->dom = dom; + + switch(dom->domain_id) { + case 32767: + sdom->budget = SS_IDLE_PERIOD; + sdom->period = SS_IDLE_PERIOD; + break; + case 0: + sdom->budget = SS_DOM_0_PERIOD; + sdom->period = SS_DOM_0_PERIOD; + break; + default: + sdom->budget = SS_DOM_BUDGET; + sdom->period = SS_DOM_PERIOD; + break; + } + + dom->sched_priv = sdom; + + return 0; +} + +//destory a domain +static void +ss_dom_destroy(struct domain *dom) { + printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id); + xfree(SS_DOM(dom)); +} + +//ticked by pcpu tick in pcpu. +static void +ss_tick(void *_cpu) { + unsigned int cpu = (unsigned long) _cpu; + struct ss_pcpu *spc = SS_PCPU(cpu); + + BUG_ON(current->processor != cpu); + + if (check_cpu_for_repl(cpu)) { +// check_runq_for_interrupt(cpu); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + + if (ss_cpu_pick(current) != cpu) { + set_bit(_VPF_migrating, ¤t->pause_flags); + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + } + + set_timer(&spc->ticker, NOW() + BUDGET(1)); +} + +// most important function, called every budget time +static struct task_slice +ss_schedule(s_time_t now) { + const int cpu = smp_processor_id(); + struct list_head *runq = RUNQ(cpu); + struct ss_vcpu *scurr = SS_VCPU(current); + struct ss_vcpu *snext; + struct task_slice ret; + int re_amount; + + SS_VCPU_CHECK(current); + +// for record + if (smp_processor_id() == 1 && start_flag == 1) { + if(is_idle_vcpu(scurr->vcpu)) res[idx].curr = 10; + else res[idx].curr = scurr->vcpu->domain->domain_id; + res[idx].enter_base = NOW(); + res[idx].leave_base = NOW(); + res[idx].enter = NOW(); + } + + if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) { + //if (!is_idle_vcpu(scurr->vcpu)) { + burn_budgets(scurr, now); + } + + if (vcpu_runnable(current)) { + __runq_insert(cpu, scurr); + } else { + BUG_ON(is_idle_vcpu(current) || list_empty(runq)); + } + + snext = __runq_pick(cpu); + + __runq_remove(snext); + +//context switch do happen!, and snext is not an idle vcpu + if (cpu == 1 && snext != scurr) { + //if (snext != scurr) { + //for the scurr: + //if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) { + if (!is_idle_vcpu(scurr->vcpu)) { + BUG_ON(scurr->status != ACTIVE); + BUG_ON(scurr->next_time == 0); + scurr->status = IDLE; + re_amount = scurr->burn_total; + scurr->burn_total = 0; + //printk("\n# into %s, change status to IDLE, decide repl amount here! now is %lu, for vcpu[%d, %d], re_amount is: %d, re_time is %lu\n", + // __func__, NOW(), scurr->vcpu->domain->domain_id, scurr->vcpu->vcpu_id, re_amount, scurr->next_time); + ss_repq_insert(cpu, scurr, re_amount); + } + + //for the snext: + //if (!is_idle_vcpu(snext->vcpu) && snext->vcpu->domain->domain_id != 0) { + if (!is_idle_vcpu(snext->vcpu)) { + if (snext->status == IDLE) { + BUG_ON(snext->next_time != 0); + snext->status = ACTIVE; + snext->next_time = NOW() + BUDGET(snext->period); + //printk("\n# into %s, change status to ACTIVE, decide repl time here! now is %lu, for vcpu [%d, %d], re_time is %lu\n", + // __func__, NOW(), snext->vcpu->domain->domain_id, snext->vcpu->vcpu_id, snext->next_time); + } + } + + //scan the whole runq + ss_scan_runq(cpu, snext); + } + + if (cpu == 1 && snext->vcpu->domain->domain_id != 0) { + snext->last_start_time = NOW(); + } + + ret.time = (is_idle_vcpu(snext->vcpu) ? -1 : BUDGET(1)); + //ret.time = BUDGET(1); + ret.task = snext->vcpu; + + SS_VCPU_CHECK(ret.task); + + BUG_ON(!vcpu_runnable(snext->vcpu)); + //printk("now is %lu\n", now); + +// for record + if (smp_processor_id() == 1 && start_flag == 1) { + if(is_idle_vcpu(snext->vcpu)) res[idx].next = 10; + else res[idx].next = snext->vcpu->domain->domain_id; + res[idx].leave = NOW(); + if(idx++ >= RECORD) { + ss_dump_record(); + } + } + + return ret; +} + +//init the global data +static void +ss_init(void) { + printk("\n# into %s\n", __func__); + spin_lock_init(&ss_priv.lock); + ss_priv.ncpus = 0; +} + +/* Tickers cannot be kicked until SMP subsystem is alive. */ +static __init int +ss_start_tickers(void) { + struct ss_pcpu *spc; + unsigned int cpu; + + printk("\n# into %s, start all tickers right now\n", __func__); + + if (ss_priv.ncpus == 0) + return 0; + + for_each_online_cpu(cpu) { + spc = SS_PCPU(cpu); + set_timer(&spc->ticker, NOW() + BUDGET(1)); + } + + return 0; +} +__initcall(ss_start_tickers); + +static void ss_tick_suspend(void) { + struct ss_pcpu *spc; + + printk("\n# into %s, why is this called?\n", __func__); + + spc = SS_PCPU(smp_processor_id()); + + stop_timer(&spc->ticker); +} + +static void ss_tick_resume(void) { + struct ss_pcpu *spc; + uint64_t now = NOW(); + + printk("\n# into %s, why is this called?\n", __func__); + + spc = SS_PCPU(smp_processor_id()); + + set_timer(&spc->ticker, now + BUDGET(1)); +} + +const struct scheduler sched_ss_def = { + .name = "Sporadic Server Scheduler", + .opt_name = "ss", + .sched_id = XEN_SCHEDULER_SS, + + .init_domain = ss_dom_init, + .destroy_domain = ss_dom_destroy, + + .init_vcpu = ss_vcpu_init, + .destroy_vcpu = ss_vcpu_destroy, + + .init = ss_init, + + .pick_cpu = ss_cpu_pick, + + .tick_suspend = ss_tick_suspend, + .tick_resume = ss_tick_resume, + + .do_schedule = ss_schedule, + + .sleep = ss_vcpu_sleep, + .wake = ss_vcpu_wake, + + .adjust = ss_dom_cntl, + + .dump_cpu_state = ss_dump_pcpu, + .dump_settings = ss_dump, +}; diff -ubrN xen/xen-4.0.1/xen/common/schedule.c xen-4.0.1/xen/common/schedule.c --- xen/xen-4.0.1/xen/common/schedule.c 2010-08-25 04:22:12.000000000 -0600 +++ xen-4.0.1/xen/common/schedule.c 2011-04-24 15:43:52.000000000 -0600 @@ -34,8 +34,8 @@ #include #include -/* opt_sched: scheduler - default to credit */ -static char __initdata opt_sched[10] = "credit"; +/* opt_sched: scheduler - default to rt */ +static char __initdata opt_sched[10] = "rt"; string_param("sched", opt_sched); /* if sched_smt_power_savings is set, @@ -56,12 +56,32 @@ extern const struct scheduler sched_sedf_def; extern const struct scheduler sched_credit_def; +// added by Sisu Xi +extern const struct scheduler sched_rt_def; static const struct scheduler *__initdata schedulers[] = { &sched_sedf_def, &sched_credit_def, + &sched_rt_def, NULL }; +//for record +#define RECORD 15000 +struct record_elem{ + int processor; // 1: idle to busy, 2: busy to idle, 3: busy to busy(dif), 4: same + int curr; + int next; + s_time_t dur_sub; + s_time_t dur; +}; + +//int sched_idx; +int sched_start_flag = 0; // to record data +struct timer sched_start_timer; +s_time_t temp_dur_sub; +s_time_t temp_dur; +//finish record + static struct scheduler __read_mostly ops; #define SCHED_OP(fn, ...) \ @@ -777,6 +797,37 @@ return ops.sched_id; } +//for record +static void +record_finish_timer(void * temp) { +// int i = 0; + + sched_start_flag = 0; +/* + for (i = 0; i < sched_idx; i++) { + printk("%d %5d %5d %7lu %7lu\n", sched_res[i].processor, sched_res[i].curr, sched_res[i].next, sched_res[i].dur_sub, sched_res[i].dur); + } + + for (i = 0; i < RECORD; i++) { + sched_res[i].processor = 0; + sched_res[i].curr = 0; + sched_res[i].next = 0; + sched_res[i].dur_sub = 0; + sched_res[i].dur = 0; + } + + sched_idx = 0; +*/ +} + +static void +record_start_timer(void * temp) { + sched_start_flag = 1; + init_timer(&sched_start_timer, record_finish_timer, (void *) (unsigned int) 1, 1); + set_timer(&sched_start_timer, NOW() + MILLISECS(10000)); +} +//finish recording + /* Adjust scheduling parameter for a given domain. */ long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op) { @@ -810,9 +861,17 @@ if ( d == current->domain ) vcpu_schedule_lock_irq(current); - if ( (ret = SCHED_OP(adjust, d, op)) == 0 ) + if ( (ret = SCHED_OP(adjust, d, op)) >= 0 ) TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id); +//trigger recording!! + if (ret == 1) { + printk("start!\n"); + init_timer(&sched_start_timer, record_start_timer, (void *) (unsigned int) 1, 1); + set_timer(&sched_start_timer, NOW() + MILLISECS(5000)); + ret = 0; + } + if ( d == current->domain ) vcpu_schedule_unlock_irq(current); @@ -860,6 +919,11 @@ struct schedule_data *sd; struct task_slice next_slice; +//record + if (prev->processor == 1 && sched_start_flag == 1) { + temp_dur = now; + } + ASSERT(!in_irq()); ASSERT(this_cpu(mc_state).flags == 0); @@ -871,8 +935,16 @@ stop_timer(&sd->s_timer); +//record + if (prev->processor == 1 && sched_start_flag == 1) { + temp_dur_sub = NOW(); + } /* get policy-specific decision on scheduling... */ next_slice = ops.do_schedule(now); + if (prev->processor == 1 && sched_start_flag == 1) { + printk("%7lu ", NOW() - temp_dur_sub); + //sched_res[sched_idx].dur_sub = NOW() - temp_dur_sub; + } next = next_slice.task; @@ -881,10 +953,19 @@ if ( next_slice.time >= 0 ) /* -ve means no limit */ set_timer(&sd->s_timer, now + next_slice.time); + if (prev->processor == 1 && sched_start_flag == 1) { + printk("%7d %7d %13lu ", prev->domain->domain_id, next->domain->domain_id, NOW()); + } + if ( unlikely(prev == next) ) { spin_unlock_irq(&sd->schedule_lock); trace_continue_running(next); + if (prev->processor == 1 && sched_start_flag == 1) { + printk("%13lu\n", NOW()); + //sched_res[sched_idx].dur = NOW() - temp_dur; + //sched_idx++; + } return continue_running(prev); } @@ -931,7 +1012,11 @@ update_vcpu_system_time(next); vcpu_periodic_timer_work(next); - context_switch(prev, next); + if (prev->processor == 1) { + context_switch(sched_start_flag, prev, next); + } else { + context_switch(0, prev, next); + } } void context_saved(struct vcpu *prev) diff -ubrN xen/xen-4.0.1/xen/drivers/char/console.c xen-4.0.1/xen/drivers/char/console.c --- xen/xen-4.0.1/xen/drivers/char/console.c 2010-08-25 04:22:12.000000000 -0600 +++ xen-4.0.1/xen/drivers/char/console.c 2011-01-15 10:57:46.000000000 -0700 @@ -63,7 +63,9 @@ static uint32_t __initdata opt_conring_size; size_param("conring_size", opt_conring_size); -#define _CONRING_SIZE 16384 +//#define _CONRING_SIZE 16384 +//Sisu xi +#define _CONRING_SIZE 1638400 #define CONRING_IDX_MASK(i) ((i)&(conring_size-1)) static char __initdata _conring[_CONRING_SIZE]; static char *__read_mostly conring = _conring; diff -ubrN xen/xen-4.0.1/xen/include/public/domctl.h xen-4.0.1/xen/include/public/domctl.h --- xen/xen-4.0.1/xen/include/public/domctl.h 2010-08-25 04:22:14.000000000 -0600 +++ xen-4.0.1/xen/include/public/domctl.h 2011-04-24 15:51:25.000000000 -0600 @@ -303,6 +303,9 @@ /* Scheduler types. */ #define XEN_SCHEDULER_SEDF 4 #define XEN_SCHEDULER_CREDIT 5 +// added by Sisu Xi +#define XEN_SCHEDULER_RT 7 + /* Set or get info? */ #define XEN_DOMCTL_SCHEDOP_putinfo 0 #define XEN_DOMCTL_SCHEDOP_getinfo 1 @@ -321,6 +324,12 @@ uint16_t weight; uint16_t cap; } credit; + // added by Sisu Xi + struct xen_domctl_sched_rt { + uint16_t budget; + uint16_t period; + uint16_t level; + } rt; } u; }; typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; diff -ubrN xen/xen-4.0.1/xen/include/xen/sched.h xen-4.0.1/xen/include/xen/sched.h --- xen/xen-4.0.1/xen/include/xen/sched.h 2010-08-25 04:22:14.000000000 -0600 +++ xen-4.0.1/xen/include/xen/sched.h 2011-01-18 00:58:43.000000000 -0700 @@ -492,6 +492,7 @@ * sync_vcpu_execstate() will switch and commit @prev's state. */ void context_switch( + int flag, struct vcpu *prev, struct vcpu *next);