Patch constructed from ZIP posted at developer site - https://sites.google.com/site/realtimexen/download

http://students.cec.wustl.edu/~sx1/RT-XEN/xen0512.zip

diff -ubrN xen/xen-4.0.1/tools/libxc/Makefile xen-4.0.1/tools/libxc/Makefile
--- xen/xen-4.0.1/tools/libxc/Makefile	2010-08-25 04:22:09.000000000 -0600
+++ xen-4.0.1/tools/libxc/Makefile	2011-04-24 20:29:11.000000000 -0600
@@ -17,6 +17,7 @@
 CTRL_SRCS-y       += xc_private.c
 CTRL_SRCS-y       += xc_sedf.c
 CTRL_SRCS-y       += xc_csched.c
+CTRL_SRCS-y	   += xc_rt.c
 CTRL_SRCS-y       += xc_tbuf.c
 CTRL_SRCS-y       += xc_pm.c
 CTRL_SRCS-y       += xc_cpu_hotplug.c
diff -ubrN xen/xen-4.0.1/tools/libxc/xc_rt.c xen-4.0.1/tools/libxc/xc_rt.c
--- xen/xen-4.0.1/tools/libxc/xc_rt.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/tools/libxc/xc_rt.c	2011-04-24 20:52:41.000000000 -0600
@@ -0,0 +1,49 @@
+/****************************************************************************
+ * (C) 2006 - Emmanuel Ackaouy - XenSource Inc.
+ ****************************************************************************
+ *
+ *        File: xc_rt.c
+ *      Author: Sisu Xi
+ *
+ * Description: XC Interface to the ds scheduler
+ *
+ */
+#include "xc_private.h"
+
+int
+xc_sched_rt_domain_set(
+    int xc_handle,
+    uint32_t domid,
+    struct xen_domctl_sched_rt *sdom)
+{
+    DECLARE_DOMCTL;
+
+    domctl.cmd = XEN_DOMCTL_scheduler_op;
+    domctl.domain = (domid_t) domid;
+    domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_RT;
+    domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_putinfo;
+    domctl.u.scheduler_op.u.rt = *sdom;
+
+    return do_domctl(xc_handle, &domctl);
+}
+
+int
+xc_sched_rt_domain_get(
+    int xc_handle,
+    uint32_t domid,
+    struct xen_domctl_sched_rt *sdom)
+{
+    DECLARE_DOMCTL;
+    int err;
+
+    domctl.cmd = XEN_DOMCTL_scheduler_op;
+    domctl.domain = (domid_t) domid;
+    domctl.u.scheduler_op.sched_id = XEN_SCHEDULER_RT;
+    domctl.u.scheduler_op.cmd = XEN_DOMCTL_SCHEDOP_getinfo;
+
+    err = do_domctl(xc_handle, &domctl);
+    if ( err == 0 )
+        *sdom = domctl.u.scheduler_op.u.rt;
+
+    return err;
+}
diff -ubrN xen/xen-4.0.1/tools/libxc/xenctrl.h xen-4.0.1/tools/libxc/xenctrl.h
--- xen/xen-4.0.1/tools/libxc/xenctrl.h	2010-08-25 04:22:09.000000000 -0600
+++ xen-4.0.1/tools/libxc/xenctrl.h	2011-04-24 15:41:12.000000000 -0600
@@ -465,6 +465,15 @@
                                uint32_t domid,
                                struct xen_domctl_sched_credit *sdom);
 
+// added by Sisu Xi
+int xc_sched_rt_domain_set(int xc_handle,
+                               uint32_t domid,
+                               struct xen_domctl_sched_rt *sdom);
+
+int xc_sched_rt_domain_get(int xc_handle,
+                               uint32_t domid,
+                               struct xen_domctl_sched_rt *sdom);
+
 /**
  * This function sends a trigger to a domain.
  *
diff -ubrN xen/xen-4.0.1/tools/libxl/libxl.c xen-4.0.1/tools/libxl/libxl.c
--- xen/xen-4.0.1/tools/libxl/libxl.c	2010-08-25 04:22:09.000000000 -0600
+++ xen-4.0.1/tools/libxl/libxl.c	2011-04-24 15:50:49.000000000 -0600
@@ -2766,6 +2766,64 @@
     if (rc != 0)
         return rc;
 
+    return 0;
+}
+
+// added by Sisu Xi
+
+int libxl_sched_rt_domain_get(struct libxl_ctx *ctx, uint32_t domid, struct libxl_sched_rt *scinfo)
+{
+    struct xen_domctl_sched_rt sdom;
+    int rc;
+
+    rc = xc_sched_rt_domain_get(ctx->xch, domid, &sdom);
+    if (rc != 0)
+        return rc;
+
+    scinfo->budget = sdom.budget;
+    scinfo->period = sdom.period;
+    scinfo->level = sdom.level;
+
+    return 0;
+}
+
+int libxl_sched_rt_domain_set(struct libxl_ctx *ctx, uint32_t domid, struct libxl_sched_rt *scinfo)
+{
+    struct xen_domctl_sched_rt sdom;
+    xc_domaininfo_t domaininfo;
+    int rc;
+
+    rc = xc_domain_getinfolist(ctx->xch, domid, 1, &domaininfo);
+    if (rc != 1 || domaininfo.domain != domid)
+        return rc;
+
+
+    if (scinfo->budget < 1 || scinfo->budget > 65535) {
+        XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, rc,
+            "Cpu budget out of range, valid values are within range from 1 to 65535");
+        return -1;
+    }
+
+    if (scinfo->period < 1 || scinfo->period > 65535) {
+        XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, rc,
+            "Cpu period out of range, valid values are within range from 1 to 65535");
+        return -1;
+    }
+
+    if (scinfo->level < 1 || scinfo->level > 65535) {
+        XL_LOG_ERRNOVAL(ctx, XL_LOG_ERROR, rc,
+            "Cpu level out of range, valid values are within range from 1 to 65535");
+        return -1;
+    }
+
+    sdom.budget = scinfo->budget;
+    sdom.period = scinfo->period;
+    sdom.level = scinfo->level;
+
+    rc = xc_sched_rt_domain_set(ctx->xch, domid, &sdom);
+    if (rc != 0)
+        return rc;
+
     return 0;
 }
 
diff -ubrN xen/xen-4.0.1/tools/libxl/libxl.h xen-4.0.1/tools/libxl/libxl.h
--- xen/xen-4.0.1/tools/libxl/libxl.h	2010-08-25 04:22:09.000000000 -0600
+++ xen-4.0.1/tools/libxl/libxl.h	2011-04-24 15:47:43.000000000 -0600
@@ -499,10 +499,23 @@
     int cap;
 };
 
+// added by Sisu Xi
+struct libxl_sched_rt {
+    int budget;
+    int period;
+    int level;
+};
+
 int libxl_sched_credit_domain_get(struct libxl_ctx *ctx, uint32_t domid,
                                   struct libxl_sched_credit *scinfo);
 int libxl_sched_credit_domain_set(struct libxl_ctx *ctx, uint32_t domid,
                                   struct libxl_sched_credit *scinfo);
+
+// added by Sisu Xi
+int libxl_sched_rt_domain_get(struct libxl_ctx *ctx, uint32_t domid,
+                                  struct libxl_sched_rt *scinfo);
+int libxl_sched_rt_domain_set(struct libxl_ctx *ctx, uint32_t domid,
+                                  struct libxl_sched_rt *scinfo);
 int libxl_send_trigger(struct libxl_ctx *ctx, uint32_t domid,
                        char *trigger_name, uint32_t vcpuid);
 int libxl_send_sysrq(struct libxl_ctx *ctx, uint32_t domid, char sysrq);
diff -ubrN xen/xen-4.0.1/tools/libxl/xl_cmdimpl.c xen-4.0.1/tools/libxl/xl_cmdimpl.c
--- xen/xen-4.0.1/tools/libxl/xl_cmdimpl.c	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/libxl/xl_cmdimpl.c	2011-04-24 15:33:20.000000000 -0600
@@ -2989,8 +2989,7 @@
     printf("xen_minor              : %d\n", info->xen_version_minor);
     printf("xen_extra              : %s\n", info->xen_version_extra);
     printf("xen_caps               : %s\n", info->capabilities);
-    printf("xen_scheduler          : %s\n",
-        sched_id == XEN_SCHEDULER_SEDF ? "sedf" : "credit");
+    printf("xen_scheduler          : %d\n", sched_id);
     printf("xen_pagesize           : %lu\n", info->pagesize);
     printf("platform_params        : virt_start=0x%lx\n", info->virt_start);
     printf("xen_changeset          : %s\n", info->changeset);
@@ -3190,6 +3189,242 @@
     exit(0);
 }
 
+
+//added by Sisu Xi
+static int sched_rt_domain_get(
+    int domid, struct libxl_sched_rt *scinfo)
+{
+    int rc;
+
+    rc = libxl_sched_rt_domain_get(&ctx, domid, scinfo);
+    if (rc)
+        fprintf(stderr, "libxl_sched_rt_domain_get failed.\n");
+
+    return rc;
+}
+
+static int sched_rt_domain_set(
+    int domid, struct libxl_sched_rt *scinfo)
+{
+    int rc;
+
+    rc = libxl_sched_rt_domain_set(&ctx, domid, scinfo);
+    if (rc)
+        fprintf(stderr, "libxl_sched_rt_domain_set failed.\n");
+
+    return rc;
+}
+
+static void sched_rt_domain_output(
+    int domid, struct libxl_sched_rt *scinfo)
+{
+    printf("%-33s %4d %6d %4d %4d\n",
+        libxl_domid_to_name(&ctx, domid),
+        domid,
+        scinfo->budget,
+        scinfo->period,
+        scinfo->level);
+}
+
+int main_sched_rt(int argc, char **argv)
+{
+    struct libxl_dominfo *info;
+    struct libxl_sched_rt scinfo;
+    int nb_domain, i;
+    char *dom = NULL;
+    int budget = 25, period = 50, level = 10, opt_w = 0, opt_c = 0, opt_l = 0;
+    int opt, rc;
+
+    while ((opt = getopt(argc, argv, "hd:b:p:l:")) != -1) {
+        switch (opt) {
+        case 'd':
+            dom = optarg;
+            break;
+        case 'b':
+            budget = strtol(optarg, NULL, 10);
+            opt_w = 1;
+            break;
+        case 'p':
+            period = strtol(optarg, NULL, 10);
+            opt_c = 1;
+            break;
+        case 'l':
+            level = strtol(optarg, NULL, 10);
+            opt_l = 1;
+            break;
+        case 'h':
+            help("sched-rt");
+            exit(0);
+        default:
+            fprintf(stderr, "option `%c' not supported.\n", opt);
+            break;
+        }
+    }
+
+    if (!dom && (opt_w || opt_c || opt_l)) {
+        fprintf(stderr, "Must specify a domain.\n");
+        exit(1);
+    }
+
+    if (!dom) { /* list all domain's ds scheduler info */
+        info = libxl_list_domain(&ctx, &nb_domain);
+        if (!info) {
+            fprintf(stderr, "libxl_domain_infolist failed.\n");
+            exit(1);
+        }
+
+        printf("%-33s %4s %6s %4s %4s\n", "Name", "ID", "Budget", "Period", "Level");
+        for (i = 0; i < nb_domain; i++) {
+            rc = sched_rt_domain_get(info[i].domid, &scinfo);
+            if (rc)
+                exit(-rc);
+            sched_rt_domain_output(info[i].domid, &scinfo);
+        }
+    } else {
+        find_domain(dom);
+
+        rc = sched_rt_domain_get(domid, &scinfo);
+        if (rc)
+            exit(-rc);
+
+        if (!opt_w && !opt_c && !opt_l) { /* output ds scheduler info */
+            printf("%-33s %4s %6s %4s %4s\n", "Name", "ID", "Budget", "Period", "Level");
+            sched_rt_domain_output(domid, &scinfo);
+        } else { /* set ds scheduler paramaters */
+            if (opt_w)
+                scinfo.budget = budget;
+            if (opt_c)
+                scinfo.period = period;
+            if (opt_l)
+                scinfo.level  = level;
+            rc = sched_rt_domain_set(domid, &scinfo);
+            if (rc)
+                exit(-rc);
+        }
+    }
+
+    exit(0);
+}
+
+// 
+// 
+// static int sched_ps_domain_get(
+//     int domid, struct libxl_sched_ps *scinfo)
+// {
+//     int rc;
+// 
+//     rc = libxl_sched_ps_domain_get(&ctx, domid, scinfo);
+//     if (rc)
+//         fprintf(stderr, "libxl_sched_ps_domain_get failed.\n");
+// 
+//     return rc;
+// }
+// 
+// static int sched_ps_domain_set(
+//     int domid, struct libxl_sched_ps *scinfo)
+// {
+//     int rc;
+// 
+//     rc = libxl_sched_ps_domain_set(&ctx, domid, scinfo);
+//     if (rc)
+//         fprintf(stderr, "libxl_sched_ps_domain_set failed.\n");
+// 
+//     return rc;
+// }
+// 
+// static void sched_ps_domain_output(
+//     int domid, struct libxl_sched_ps *scinfo)
+// {
+//     printf("%-33s %4d %6d %4d %4d\n",
+//         libxl_domid_to_name(&ctx, domid),
+//         domid,
+//         scinfo->cost,
+//         scinfo->period,
+//         scinfo->level);
+// }
+// 
+// int main_sched_ps(int argc, char **argv)
+// {
+//     struct libxl_dominfo *info;
+//     struct libxl_sched_ps scinfo;
+//     int nb_domain, i;
+//     char *dom = NULL;
+//     int cost = 25, period = 50, level = 10, opt_w = 0, opt_c = 0, opt_l = 0;
+//     int opt, rc;
+// 
+//     while ((opt = getopt(argc, argv, "hd:c:p:l:")) != -1) {
+//         switch (opt) {
+//         case 'd':
+//             dom = optarg;
+//             break;
+//         case 'c':
+//             cost = strtol(optarg, NULL, 10);
+//             opt_w = 1;
+//             break;
+//         case 'p':
+//             period = strtol(optarg, NULL, 10);
+//             opt_c = 1;
+//             break;
+//         case 'l':
+//             level = strtol(optarg, NULL, 10);
+//             opt_l = 1;
+//             break;
+//         case 'h':
+//             help("sched-ps");
+//             exit(0);
+//         default:
+//             fprintf(stderr, "option `%c' not supported.\n", opt);
+//             break;
+//         }
+//     }
+// 
+//     if (!dom && (opt_w || opt_c || opt_l)) {
+//         fprintf(stderr, "Must specify a domain.\n");
+//         exit(1);
+//     }
+// 
+//     if (!dom) { /* list all domain's ps scheduler info */
+//         info = libxl_list_domain(&ctx, &nb_domain);
+//         if (!info) {
+//             fprintf(stderr, "libxl_domain_infolist failed.\n");
+//             exit(1);
+//         }
+// 
+//         printf("%-33s %4s %6s %4s %4s\n", "Name", "ID", "Cost", "Period", "Level");
+//         for (i = 0; i < nb_domain; i++) {
+//             rc = sched_ps_domain_get(info[i].domid, &scinfo);
+//             if (rc)
+//                 exit(-rc);
+//             sched_ps_domain_output(info[i].domid, &scinfo);
+//         }
+//     } else {
+//         find_domain(dom);
+// 
+//         rc = sched_ps_domain_get(domid, &scinfo);
+//         if (rc)
+//             exit(-rc);
+// 
+//         if (!opt_w && !opt_c && !opt_l) { /* output ps scheduler info */
+//             printf("%-33s %4s %6s %4s %4s\n", "Name", "ID", "Cost", "Period", "Level");
+//             sched_ps_domain_output(domid, &scinfo);
+//         } else { /* set ps scheduler paramaters */
+//             if (opt_w)
+//                 scinfo.cost = cost;
+//             if (opt_c)
+//                 scinfo.period = period;
+//             if (opt_l)
+//                 scinfo.level  = level;
+//             rc = sched_ps_domain_set(domid, &scinfo);
+//             if (rc)
+//                 exit(-rc);
+//         }
+//     }
+// 
+//     exit(0);
+// }
+
+
+
 int main_domid(int argc, char **argv)
 {
     int opt;
diff -ubrN xen/xen-4.0.1/tools/libxl/xl_cmdtable.c xen-4.0.1/tools/libxl/xl_cmdtable.c
--- xen/xen-4.0.1/tools/libxl/xl_cmdtable.c	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/libxl/xl_cmdtable.c	2011-04-24 15:32:54.000000000 -0600
@@ -175,6 +175,16 @@
       "-w WEIGHT, --weight=WEIGHT     Weight (int)\n"
       "-c CAP, --cap=CAP              Cap (int)"
     },
+    //added by Sisu Xi
+    { "sched-rt",
+      &main_sched_rt,
+      "Get/Set RT scheduler parameters",
+      "[-d <Domain> [-b[=BUDGET]|-p[=PERIOD]|-l[=LEVEL]]]",
+      "-d DOMAIN, --domain = DOMAIN     Domain to modify\n"
+      "-b BUDGET, --budget = BUDGET     Budget (int)\n"
+      "-p PERIOD, --period = PERIOD     Period (int)\n"
+      "-l LEVEL,  --level  = LEVEL      Level  (int)"
+    },
     { "domid",
       &main_domid,
       "Convert a domain name to domain id",
diff -ubrN xen/xen-4.0.1/tools/libxl/xl.h xen-4.0.1/tools/libxl/xl.h
--- xen/xen-4.0.1/tools/libxl/xl.h	2010-08-25 04:22:09.000000000 -0600
+++ xen-4.0.1/tools/libxl/xl.h	2011-04-24 15:24:44.000000000 -0600
@@ -49,6 +49,8 @@
 int main_memmax(int argc, char **argv);
 int main_memset(int argc, char **argv);
 int main_sched_credit(int argc, char **argv);
+//added by Sisu Xi
+int main_sched_rt(int argc, char **argv);
 int main_domid(int argc, char **argv);
 int main_domname(int argc, char **argv);
 int main_rename(int argc, char **argv);
diff -ubrN xen/xen-4.0.1/tools/python/xen/lowlevel/xc/xc.c xen-4.0.1/tools/python/xen/lowlevel/xc/xc.c
--- xen/xen-4.0.1/tools/python/xen/lowlevel/xc/xc.c	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/python/xen/lowlevel/xc/xc.c	2011-04-24 15:42:16.000000000 -0600
@@ -1455,6 +1455,54 @@
                          "cap",     sdom.cap);
 }
 
+// added by Sisu Xi
+static PyObject *pyxc_sched_rt_domain_set(XcObject *self,
+                                              PyObject *args,
+                                              PyObject *kwds)
+{
+    uint32_t domid;
+    uint16_t budget;
+    uint16_t period;
+    uint16_t level;
+    static char *kwd_list[] = { "domid", "budget", "period", "level", NULL };
+    static char kwd_type[] = "I|HHH";
+    struct xen_domctl_sched_rt sdom;
+
+    budget = 25;
+    period = 50;
+    level = 10;
+    if( !PyArg_ParseTupleAndKeywords(args, kwds, kwd_type, kwd_list,
+                                     &domid, &budget, &period, &level) )
+        return NULL;
+
+    sdom.budget = budget;
+    sdom.period = period;
+    sdom.level = level;
+
+    if ( xc_sched_rt_domain_set(self->xc_handle, domid, &sdom) != 0 )
+        return pyxc_error_to_exception();
+
+    Py_INCREF(zero);
+    return zero;
+}
+
+static PyObject *pyxc_sched_rt_domain_get(XcObject *self, PyObject *args)
+{
+    uint32_t domid;
+    struct xen_domctl_sched_rt sdom;
+
+    if( !PyArg_ParseTuple(args, "I", &domid) )
+        return NULL;
+
+    if ( xc_sched_rt_domain_get(self->xc_handle, domid, &sdom) != 0 )
+        return pyxc_error_to_exception();
+
+    return Py_BuildValue("{s:H,s:H,s:H}",
+                         "budget",  sdom.budget,
+                         "period",  sdom.period,
+                         "level", sdom.level);
+}
+
 static PyObject *pyxc_domain_setmaxmem(XcObject *self, PyObject *args)
 {
     uint32_t dom;
@@ -2010,6 +2058,26 @@
       "Returns:   [dict]\n"
       " weight    [short]: domain's scheduling weight\n"},
 
+// added by Sisu Xi
+    { "sched_rt_domain_set",
+      (PyCFunction)pyxc_sched_rt_domain_set,
+      METH_KEYWORDS, "\n"
+      "Set the scheduling parameters for a domain when running with the\n"
+      "SMP ds scheduler.\n"
+      " domid     [int]:   domain id to set\n"
+      " budget    [short]: domain's scheduling budget\n"
+      "Returns: [int] 0 on success; -1 on error.\n" },
+
+    { "sched_rt_domain_get",
+      (PyCFunction)pyxc_sched_rt_domain_get,
+      METH_VARARGS, "\n"
+      "Get the scheduling parameters for a domain when running with the\n"
+      "SMP ds scheduler.\n"
+      " domid     [int]:   domain id to get\n"
+      "Returns:   [dict]\n"
+      " budget    [short]: domain's scheduling budget\n"},
+
+
     { "evtchn_alloc_unbound", 
       (PyCFunction)pyxc_evtchn_alloc_unbound,
       METH_VARARGS | METH_KEYWORDS, "\n"
@@ -2378,7 +2446,10 @@
     /* Expose some libxc constants to Python */
     PyModule_AddIntConstant(m, "XEN_SCHEDULER_SEDF", XEN_SCHEDULER_SEDF);
     PyModule_AddIntConstant(m, "XEN_SCHEDULER_CREDIT", XEN_SCHEDULER_CREDIT);
-
+    // PyModule_AddIntConstant(m, "XEN_SCHEDULER_SS", XEN_SCHEDULER_SS);
+   // added by Sisu Xi
+    PyModule_AddIntConstant(m, "XEN_SCHEDULER_RT", XEN_SCHEDULER_RT);
+    // PyModule_AddIntConstant(m, "XEN_SCHEDULER_PS", XEN_SCHEDULER_PS);
 }
 
 
diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/server/SrvDomain.py xen-4.0.1/tools/python/xen/xend/server/SrvDomain.py
--- xen/xen-4.0.1/tools/python/xen/xend/server/SrvDomain.py	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/python/xen/xend/server/SrvDomain.py	2011-04-24 15:43:07.000000000 -0600
@@ -163,6 +163,22 @@
         val = fn(req.args, {'dom': self.dom.getName()})
         return val
 
+# added by Sisu Xi
+    def op_domain_sched_rt_get(self, _, req):
+        fn = FormFn(self.xd.domain_sched_rt_get,
+                    [['dom', 'str']])
+        val = fn(req.args, {'dom': self.dom.getName()})
+        return val
+
+    def op_domain_sched_rt_set(self, _, req):
+        fn = FormFn(self.xd.domain_sched_rt_set,
+                    [['dom', 'str'],
+                     ['budget', 'int'],
+                     ['period', 'int'],
+                     ['level', 'int']])
+        val = fn(req.args, {'dom': self.dom.getName()})
+        return val
+
     def op_maxmem_set(self, _, req):
         return self.call(self.dom.setMemoryMaximum,
                          [['memory', 'int']],
diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendAPI.py xen-4.0.1/tools/python/xen/xend/XendAPI.py
--- xen/xen-4.0.1/tools/python/xen/xend/XendAPI.py	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/python/xen/xend/XendAPI.py	2011-04-24 15:40:40.000000000 -0600
@@ -1629,6 +1629,16 @@
             cap = xeninfo.info['vcpus_params']['cap']
             xendom.domain_sched_credit_set(xeninfo.getDomid(), weight, cap)
 
+# added by Sisu Xi
+        #need to update sched params aswell
+        elif 'budget' in xeninfo.info['vcpus_params'] \
+           and 'period' in xeninfo.info['vcpus_params'] \
+           and 'level' in xeninfo.info['vcpus_params']:
+            budget = xeninfo.info['vcpus_params']['budget']
+            period = xeninfo.info['vcpus_params']['period']
+            level = xeninfo.info['vcpus_params']['level']
+            xendom.domain_sched_rt_set(xeninfo.getDomid(), budget, period, level)
+
     def VM_set_VCPUs_number_live(self, _, vm_ref, num):
         dom = XendDomain.instance().get_vm_by_uuid(vm_ref)
         dom.setVCpuCount(int(num))
diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendConfig.py xen-4.0.1/tools/python/xen/xend/XendConfig.py
--- xen/xen-4.0.1/tools/python/xen/xend/XendConfig.py	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/python/xen/xend/XendConfig.py	2011-01-13 09:50:30.000000000 -0700
@@ -677,6 +677,15 @@
             int(sxp.child_value(sxp_cfg, "cpu_weight", 256))
         cfg["vcpus_params"]["cap"] = \
             int(sxp.child_value(sxp_cfg, "cpu_cap", 0))
+        cfg["vcpus_params"]["budget"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_budget", 25))
+        cfg["vcpus_params"]["cost"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_cost", 25))
+        cfg["vcpus_params"]["period"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_period", 50))
+        cfg["vcpus_params"]["level"] = \
+            int(sxp.child_value(sxp_cfg, "cpu_level", 10))
+
 
         # Only extract options we know about.
         extract_keys = LEGACY_UNSUPPORTED_BY_XENAPI_CFG + \
@@ -1057,6 +1066,14 @@
             int(self['vcpus_params'].get('weight', 256))
         self['vcpus_params']['cap'] = \
             int(self['vcpus_params'].get('cap', 0))
+        self['vcpus_params']['budget'] = \
+            int(self['vcpus_params'].get('budget', 25))
+        self['vcpus_params']['cost'] = \
+            int(self['vcpus_params'].get('cost', 25))
+        self['vcpus_params']['period'] = \
+            int(self['vcpus_params'].get('period', 50))
+        self['vcpus_params']['level'] = \
+            int(self['vcpus_params'].get('level', 10))
 
         for key, val in self['vcpus_params'].items():
             if key.startswith('cpumap'):
@@ -1098,6 +1115,10 @@
         if legacy_only:
             sxpr.append(['cpu_weight', int(self['vcpus_params'].get('weight', 256))])
             sxpr.append(['cpu_cap', int(self['vcpus_params'].get('cap', 0))])
+            sxpr.append(['cpu_budget', int(self['vcpus_params'].get('budget', 25))])
+            sxpr.append(['cpu_cost', int(self['vcpus_params'].get('cost', 25))])
+            sxpr.append(['cpu_period', int(self['vcpus_params'].get('period', 50))])
+            sxpr.append(['cpu_level', int(self['vcpus_params'].get('level', 10))])
         else:
             for name, typ in XENAPI_CFG_TYPES.items():
                 if name in self and self[name] not in (None, []):
diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendDomainInfo.py xen-4.0.1/tools/python/xen/xend/XendDomainInfo.py
--- xen/xen-4.0.1/tools/python/xen/xend/XendDomainInfo.py	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/python/xen/xend/XendDomainInfo.py	2011-04-24 15:38:43.000000000 -0600
@@ -2019,6 +2019,25 @@
     def setWeight(self, cpu_weight):
         self.info['vcpus_params']['weight'] = cpu_weight
 
+# added by Sisu Xi
+    def getBudget(self):
+        return self.info['vcpus_params']['budget']
+
+    def setBudget(self, cpu_budget):
+        self.info['vcpus_params']['budget'] = cpu_budget
+
+    def getPeriod(self):
+        return self.info['vcpus_params']['period']
+
+    def setPeriod(self, cpu_period):
+        self.info['vcpus_params']['period'] = cpu_period
+
+    def getLevel(self):
+        return self.info['vcpus_params']['level']
+
+    def setLevel(self, cpu_level):
+        self.info['vcpus_params']['level'] = cpu_level
+
     def getRestartCount(self):
         return self._readVm('xend/restart_count')
 
@@ -2616,6 +2635,26 @@
                 raise VmError("Cpu cap out of range, valid range is from 0 to %s for specified number of vcpus" %
                               (self.getVCpuCount() * 100))
 
+        # added by Sisu Xi
+        # Check for cpu_{period|budget} validity for ds scheduler
+        if XendNode.instance().xenschedinfo() == 'rt':
+            period = self.getPeriod()
+            budget = self.getBudget()
+            level  = self.getLevel()
+
+            assert type(budget) == int
+            assert type(period) == int
+            assert type(level)  == int
+
+            if budget < 1 or budget > 65535:
+                raise VmError("Cpu budget out of range, valid values are within range from 1 to 65535")
+
+            if period < 1 or period > 65535:
+                raise VmError("Cpu period out of range, valid values are within range from 1 to 65535")
+
+            if level < 1 or level > 65535:
+                raise VmError("Cpu level out of range, valid values are within range from 1 to 65535")
+       
         # Test whether the devices can be assigned with VT-d
         self.info.update_platform_pci()
         pci = self.info["platform"].get("pci")
@@ -2811,6 +2850,14 @@
             XendDomain.instance().domain_sched_credit_set(self.getDomid(),
                                                           self.getWeight(),
                                                           self.getCap())
+# added by Sisu Xi
+    def _setSchedParams(self):
+        if XendNode.instance().xenschedinfo() == 'rt':
+            from xen.xend import XendDomain
+            XendDomain.instance().domain_sched_rt_set(self.getDomid(),
+                                                      self.getBudget(),
+                                                      self.getPeriod(),
+                                                      self.getLevel())
 
     def _initDomain(self):
         log.debug('XendDomainInfo.initDomain: %s %s',
@@ -3574,7 +3621,11 @@
         if self.getDomid() is None:
             return self.info['vcpus_params']
 
+        if XendNode.instance().xenschedinfo() == 'credit':
         retval = xc.sched_credit_domain_get(self.getDomid())
+        # added by Sisu Xi
+        elif XendNode.instance().xenschedinfo() == 'rt':
+            retval = xc.sched_rt_domain_get(self.getDomid())
         return retval
     def get_power_state(self):
         return XEN_API_VM_POWER_STATE[self._stateGet()]
diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendDomain.py xen-4.0.1/tools/python/xen/xend/XendDomain.py
--- xen/xen-4.0.1/tools/python/xen/xend/XendDomain.py	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/python/xen/xend/XendDomain.py	2011-04-24 15:39:47.000000000 -0600
@@ -1757,6 +1757,90 @@
             log.exception(ex)
             raise XendError(str(ex))
 
+# added by Sisu Xi
+    def domain_sched_rt_get(self, domid):
+        """Get ds scheduler parameters for a domain.
+
+        @param domid: Domain ID or Name
+        @type domid: int or string.
+        @rtype: dict with keys 'budget' and 'period'
+        @return: ds scheduler parameters
+        """
+        dominfo = self.domain_lookup_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+
+        if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+            try:
+                return xc.sched_rt_domain_get(dominfo.getDomid())
+            except Exception, ex:
+                raise XendError(str(ex))
+        else:
+            return {'budget' : dominfo.getBudget(),
+                    'period' : dominfo.getPeriod(),
+                    'level'  : dominfo.getLevel()}
+
+    def domain_sched_rt_set(self, domid, budget = None, period = None, level = None):
+        """Set ds scheduler parameters for a domain.
+
+        @param domid: Domain ID or Name
+        @type domid: int or string.
+        @type budget: int
+        @type period: int
+        @rtype: 0
+        """
+        set_budget = False
+        set_period = False
+        set_level  = False
+        dominfo = self.domain_lookup_nr(domid)
+        if not dominfo:
+            raise XendInvalidDomain(str(domid))
+        try:
+            if budget is None:
+                budget = int(0)
+            elif budget < 1 or budget > 65535:
+                raise XendError("Cpu budget out of range, valid values are "
+                                "within range from 1 to 65535")
+            else:
+                set_budget = True
+
+            if period is None:
+               period = int(0)
+            elif period < 1 or period > 65535:
+                raise XendError("Cpu period out of range, valid values are "
+                                "within range from 1 to 65535")
+            else:
+                set_period = True
+
+            if level is None:
+               level = int(0)
+            elif level < 1 or level > 65535:
+                raise XendError("Cpu level out of range, valid values are "
+                                "within range from 1 to 65535")
+            else:
+                set_level = True
+
+            assert type(budget) == int
+            assert type(period) == int
+            assert type(level) == int
+
+            rc = 0
+            if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED):
+                rc = xc.sched_rt_domain_set(dominfo.getDomid(), budget, period, level)
+            if rc == 0:
+                if set_budget:
+                    dominfo.setBudget(budget)
+                if set_period:
+                    dominfo.setPeriod(period)
+                if set_level:
+                    dominfo.setLevel(level)
+                self.managed_config_save(dominfo)
+            return rc
+        except Exception, ex:
+            log.exception(ex)
+            raise XendError(str(ex))
+
+
     def domain_maxmem_set(self, domid, mem):
         """Set the memory limit for a domain.
 
diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendNode.py xen-4.0.1/tools/python/xen/xend/XendNode.py
--- xen/xen-4.0.1/tools/python/xen/xend/XendNode.py	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/python/xen/xend/XendNode.py	2011-04-24 15:35:48.000000000 -0600
@@ -779,6 +779,9 @@
             return 'sedf'
         elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT:
             return 'credit'
+        # added by Sisu Xi
+        elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_RT:
+            return 'rt'
         else:
             return 'unknown'
 
@@ -981,6 +984,9 @@
             return 'sedf'
         elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_CREDIT:
             return 'credit'
+        # added by Sisu Xi
+        elif sched_id == xen.lowlevel.xc.XEN_SCHEDULER_RT:
+            return 'rt'
         else:
             return 'unknown'
 
diff -ubrN xen/xen-4.0.1/tools/python/xen/xend/XendVMMetrics.py xen-4.0.1/tools/python/xen/xend/XendVMMetrics.py
--- xen/xen-4.0.1/tools/python/xen/xend/XendVMMetrics.py	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/python/xen/xend/XendVMMetrics.py	2011-04-24 15:35:09.000000000 -0600
@@ -129,7 +129,10 @@
                 params_live['cpumap%i' % i] = \
                     ",".join(map(str, info['cpumap']))
 
+            if XendNode.instance().xenschedinfo() == 'credit':
             params_live.update(xc.sched_credit_domain_get(domid))
+            elif XendNode.instance().xenschedinfo() == 'rt':
+                 params_live.update(xc.sched_rt_domain_get(domid))
             
             return params_live
         else:
diff -ubrN xen/xen-4.0.1/tools/python/xen/xm/main.py xen-4.0.1/tools/python/xen/xm/main.py
--- xen/xen-4.0.1/tools/python/xen/xm/main.py	2010-08-25 04:22:10.000000000 -0600
+++ xen-4.0.1/tools/python/xen/xm/main.py	2011-04-24 15:47:00.000000000 -0600
@@ -151,6 +151,9 @@
     'sched-sedf'  : ('<Domain> [options]', 'Get/set EDF parameters.'),
     'sched-credit': ('[-d <Domain> [-w[=WEIGHT]|-c[=CAP]]]',
                      'Get/set credit scheduler parameters.'),
+    # added by Sisu Xi
+    'sched-rt'    : ('[-d <Domain> [-b[=BUDGET]|-p[=PERIOD]|-l[=LEVEL]]]',
+                     'Get/set ds scheduler parameters.'),
     'sysrq'       : ('<Domain> <letter>', 'Send a sysrq to a domain.'),
     'debug-keys'  : ('<Keys>', 'Send debug keys to Xen.'),
     'trigger'     : ('<Domain> <nmi|reset|init|s3resume|power> [<VCPU>]',
@@ -277,6 +280,13 @@
        ('-w WEIGHT', '--weight=WEIGHT', 'Weight (int)'),
        ('-c CAP',    '--cap=CAP',       'Cap (int)'),
     ),
+    # added by Sisu Xi
+    'sched-rt': (
+       ('-d DOMAIN', '--domain=DOMAIN', 'Domain to modify'),
+       ('-b BUDGET', '--budget=BUDGET', 'Budget (int)'),
+       ('-p PERIOD', '--period=PERIOD', 'Period (int)'),
+       ('-l LEVEL',  '--level =LEVEL',  'Level (int)'),
+    ),
     'list': (
        ('-l', '--long',         'Output all VM details in SXP'),
        ('', '--label',          'Include security labels'),
@@ -420,6 +430,8 @@
 scheduler_commands = [
     "sched-credit",
     "sched-sedf",
+    # added by Sisu Xi
+    "sched-rt",
     ]
 
 device_commands = [
@@ -1740,6 +1752,105 @@
             if result != 0:
                 err(str(result))
 
+# added by Sisu Xi
+def xm_sched_rt(args):
+    """Get/Set options for RT Scheduler."""
+
+    check_sched_type('rt')
+
+    try:
+        opts, params = getopt.getopt(args, "d:b:p:l:",
+            ["domain=", "budget=", "period=", "level="])
+    except getopt.GetoptError, opterr:
+        err(opterr)
+        usage('sched-rt')
+
+    domid = None
+    budget = None
+    period = None
+    level  = None
+
+    for o, a in opts:
+        if o in ["-d", "--domain"]:
+            domid = a
+        elif o in ["-b", "--budget"]:
+            budget = int(a)
+        elif o in ["-p", "--period"]:
+            period = int(a);
+        elif o in ["-l", "--level"]:
+        	level = int(a);
+
+    doms = filter(lambda x : domid_match(domid, x),
+                  [parse_doms_info(dom)
+                  for dom in getDomains(None, 'all')])
+
+    if budget is None and period is None and level is None:
+        if domid is not None and doms == []:
+            err("Domain '%s' does not exist." % domid)
+            usage('sched-rt')
+        # print header if we aren't setting any parameters
+        print '%-33s %4s %6s %4s %4s' % ('Name','ID','Budget','Period', 'Level')
+
+        for d in doms:
+            try:
+                if serverType == SERVER_XEN_API:
+                    info = server.xenapi.VM_metrics.get_VCPUs_params(
+                        server.xenapi.VM.get_metrics(
+                            get_single_vm(d['name'])))
+                else:
+                    info = server.xend.domain.sched_rt_get(d['name'])
+            except xmlrpclib.Fault:
+                pass
+
+            if 'budget' not in info or 'period' not in info or 'level' not in info:
+                # domain does not support sched-rt?
+                info = {'budget': -1, 'period': -1, 'level': -1}
+
+            info['budget'] = int(info['budget'])
+            info['period'] = int(info['period'])
+            info['level']  = int(info['level'])
+
+            info['name']  = d['name']
+            info['domid'] = str(d['domid'])
+            print( ("%(name)-32s %(domid)5s %(budget)6d %(period)6d %(level)6d") % info)
+    else:
+        if domid is None:
+            # place holder for system-wide scheduler parameters
+            err("No domain given.")
+            usage('sched-rt')
+
+        if serverType == SERVER_XEN_API:
+            if doms[0]['domid']:
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "budget",
+                    budget)
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "period",
+                    period)
+                server.xenapi.VM.add_to_VCPUs_params_live(
+                    get_single_vm(domid),
+                    "level",
+                    level)
+            else:
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "budget",
+                    budget)
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "period",
+                    period)
+                server.xenapi.VM.add_to_VCPUs_params(
+                    get_single_vm(domid),
+                    "level",
+                    level)
+        else:
+            result = server.xend.domain.sched_rt_set(domid, budget, period, level)
+            if result != 0:
+                err(str(result))
+
 def xm_info(args):
     arg_check(args, "info", 0, 1)
     
@@ -3492,6 +3603,8 @@
     # scheduler
     "sched-sedf": xm_sched_sedf,
     "sched-credit": xm_sched_credit,
+    # added by Sisu Xi
+    "sched-rt": xm_sched_rt,
     # block
     "block-attach": xm_block_attach,
     "block-detach": xm_block_detach,
diff -ubrN xen/xen-4.0.1/xen/arch/ia64/xen/domain.c xen-4.0.1/xen/arch/ia64/xen/domain.c
--- xen/xen-4.0.1/xen/arch/ia64/xen/domain.c	2010-08-25 04:22:11.000000000 -0600
+++ xen-4.0.1/xen/arch/ia64/xen/domain.c	2011-01-18 01:03:45.000000000 -0700
@@ -228,7 +228,7 @@
 	flush_vtlb_for_context_switch(prev, current);
 }
 
-void context_switch(struct vcpu *prev, struct vcpu *next)
+void context_switch(int flag, struct vcpu *prev, struct vcpu *next)
 {
     uint64_t spsr;
 
@@ -307,6 +307,9 @@
    
     flush_vtlb_for_context_switch(prev, current);
     flush_cache_for_context_switch(current);
+    if (flag == 1) {
+		printk("%13lu ia64\n", NOW());
+    }
     context_saved(prev);
 }
 
diff -ubrN xen/xen-4.0.1/xen/arch/x86/domain.c xen-4.0.1/xen/arch/x86/domain.c
--- xen/xen-4.0.1/xen/arch/x86/domain.c	2010-08-25 04:22:11.000000000 -0600
+++ xen-4.0.1/xen/arch/x86/domain.c	2011-01-18 01:13:02.000000000 -0700
@@ -1421,7 +1421,7 @@
 }
 
 
-void context_switch(struct vcpu *prev, struct vcpu *next)
+void context_switch(int flag, struct vcpu *prev, struct vcpu *next)
 {
     unsigned int cpu = smp_processor_id();
     cpumask_t dirty_mask = next->vcpu_dirty_cpumask;
@@ -1482,6 +1482,10 @@
     if (prev != next)
         update_runstate_area(next);
 
+    if (flag == 1) {
+		printk("%13lu\n", NOW());
+    }
+
     schedule_tail(next);
     BUG();
 }
diff -ubrN xen/xen-4.0.1/xen/common/Makefile xen-4.0.1/xen/common/Makefile
--- xen/xen-4.0.1/xen/common/Makefile	2010-08-25 04:22:12.000000000 -0600
+++ xen-4.0.1/xen/common/Makefile	2011-04-25 14:44:37.000000000 -0600
@@ -14,6 +14,11 @@
 obj-y += rangeset.o
 obj-y += sched_credit.o
 obj-y += sched_sedf.o
+obj-y += sched_rt_wcps.o
+obj-y += sched_rt_periodic.o
+obj-y += sched_rt_ssps.o
+obj-y += sched_rt.o
+obj-y += sched_rt_deferrable.o
 obj-y += schedule.o
 obj-y += shutdown.o
 obj-y += softirq.o
diff -ubrN xen/xen-4.0.1/xen/common/sched_credit.c xen-4.0.1/xen/common/sched_credit.c
--- xen/xen-4.0.1/xen/common/sched_credit.c	2010-08-25 04:22:12.000000000 -0600
+++ xen-4.0.1/xen/common/sched_credit.c	2011-04-09 22:29:29.000000000 -0600
@@ -710,6 +710,9 @@
             sdom->cap = op->u.credit.cap;
 
         spin_unlock_irqrestore(&csched_priv.lock, flags);
+        if ( d->domain_id == 0) {
+            return 1;
+        }
     }
 
     return 0;
diff -ubrN xen/xen-4.0.1/xen/common/sched_ds_ecrts11.c xen-4.0.1/xen/common/sched_ds_ecrts11.c
--- xen/xen-4.0.1/xen/common/sched_ds_ecrts11.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_ds_ecrts11.c	2011-01-22 13:05:56.000000000 -0700
@@ -0,0 +1,927 @@
+/******************************************************************************
+ * Periodic / Polling / Deferrable Server scheduler for xen
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+#include <xen/errno.h>
+#include <xen/keyhandler.h>
+
+#define DS_DOM(_dom)            ((struct ds_dom *) (_dom)->sched_priv)
+#define DS_PCPU(_c)             ((struct ds_pcpu *)per_cpu(schedule_data, _c).sched_priv)
+#define DS_VCPU(_vcpu)          ((struct ds_vcpu *) (_vcpu)->sched_priv)
+#define RUNQ(_cpu)              (&(DS_PCPU(_cpu)->runq))
+#define RDYQ(_cpu)              (&(DS_PCPU(_cpu)->rdyq))
+#define DS_CUR(_cpu)            DS_VCPU(per_cpu(schedule_data, _cpu).curr)
+#define BUDGET(_b)              (MILLISECS(_b))  // time to run for 1 budget, default setting is 1ms = 1 budget
+
+#define REPQ_CAPACITY           500    // repQ is used for the replenishment
+
+#define DS_DOM_0_PERIOD         100
+#define DS_IDLE_PERIOD          200
+
+#define DS_DOM_BUDGET           25        // default budget, can bu changed via xm sched-ss -d target -b budget -p period
+#define DS_DOM_PERIOD           50
+
+//used for replenishment
+struct rep_elem {
+    s_time_t re_time;
+    struct ds_vcpu *dvc;
+};
+
+//physical cpu
+struct ds_pcpu {
+    struct list_head runq; // runQ on the pcpu, organized by linked list
+    struct list_head rdyq;
+    struct rep_elem *repq; //repQ on the pcpu, organized by heap
+    int rep_size;        // current size, for later dynamic reqQ use. currently set equals to capacity
+    int rep_capacity;    // upper limit
+    struct timer ticker; // for preemptive use, tick every budget
+};
+
+//virtual cpu
+struct ds_vcpu {
+    struct list_head runq_elem;
+    struct list_head rdyq_elem;
+    struct ds_dom *ddom;
+    struct vcpu *vcpu;
+
+    uint16_t budget;
+    uint16_t period;
+    uint16_t level;
+
+    uint16_t cur_budget;
+    s_time_t last_start_time;  // used for burn_budget
+    int flag;
+};
+
+//domain
+struct ds_dom {
+    struct domain *dom;
+    uint16_t budget;
+    uint16_t period;
+    uint16_t level;
+};
+
+//global variable, records the number of cpus
+struct ds_private {
+    spinlock_t lock;    // used for init
+    uint32_t ncpus;    //number of physical cpus
+    int polling;     // polling server or deferrable server?
+    int periodic;
+};
+static struct ds_private ds_priv;
+/*
+//used for record, overhead measurement
+#define RECORD  11000   // record 10s
+struct record_elem{
+    s_time_t dur;
+
+    int curr;
+    int next;
+    s_time_t enter;         // enter schedule time
+    s_time_t leave;         // leave schedule time
+
+};
+*/
+struct timer ds_start_timer;    // would start after 10s, used only once
+int ds_start_flag = 0; // start to record or not
+int ds_wake = 0;
+/*
+int ds_idx = 0;        //ds_idx to record
+int ds_idx_tick = 0;
+int ds_wake = 0;
+int ds_sleep = 0;
+struct record_elem ds_res[RECORD]; // domain_id, time in ms;
+struct record_elem ds_res_tick[RECORD];
+//finish for the record
+*/
+static void ds_tick(void *_cpu);
+
+//dump the repq
+static void
+ds_dump_repq(int cpu) {
+    int loop = 0;
+    struct ds_pcpu *ppc = DS_PCPU(cpu);
+
+    printk("\n# into %s on cpu %d, now is %lu, size: %d, the repQ is :\n", __func__, cpu, NOW(), ppc->rep_size);
+    for (loop = 0; loop < ppc->rep_size; loop++) {
+        printk("\t[%d. %d]: %d @ %lu\n",
+            ppc->repq[loop].dvc->vcpu->domain->domain_id,
+            ppc->repq[loop].dvc->vcpu->vcpu_id,
+            ppc->repq[loop].dvc->period,
+            ppc->repq[loop].re_time);
+    }
+}
+
+//dump the virtual cpu
+static void
+ds_dump_vcpu(struct ds_vcpu *dvc) {
+    printk("\t[%i, %i], (%i, %i), cpu: %i, cur_budget: %i, level: %d\n",
+            dvc->vcpu->domain->domain_id, dvc->vcpu->vcpu_id, dvc->budget, dvc->period, dvc->vcpu->processor,
+            dvc->cur_budget, dvc->level);
+}
+
+//inlined code
+static inline struct ds_vcpu *
+__runq_elem(struct list_head *elem) {
+    return list_entry(elem, struct ds_vcpu, runq_elem);
+}
+
+//inlined code
+static inline struct ds_vcpu *
+__rdyq_elem(struct list_head *elem) {
+    return list_entry(elem, struct ds_vcpu, rdyq_elem);
+}
+
+//dump the physical cpu
+static void
+ds_dump_pcpu(int cpu) {
+    struct list_head *iter;
+    struct ds_pcpu *ppc = DS_PCPU(cpu);
+    struct list_head *runq = &ppc->runq;
+    struct list_head *rdyq = &ppc->rdyq;
+    struct ds_vcpu *dvc = DS_CUR(cpu);
+    int loop = 0;
+
+    printk("\n# into %s, on cpu: %d, now is: %lu\n", __func__, cpu, NOW());
+
+    if (dvc) {
+        printk("\trun: ");
+        ds_dump_vcpu(dvc);
+    }
+
+    printk("runq:\n");
+    list_for_each(iter, runq) {
+        dvc = __runq_elem(iter);
+        if (dvc) {
+            printk("\t%3d: ", ++loop);
+            ds_dump_vcpu(dvc);
+        }
+    }
+
+    printk("rdyq:\n");
+    list_for_each(iter, rdyq) {
+        dvc = __rdyq_elem(iter);
+        if (dvc) {
+            printk("\t%3d: ", ++loop);
+            ds_dump_vcpu(dvc);
+        }
+    }
+
+
+    ds_dump_repq(cpu);
+}
+/*
+//dump the record out.
+static void
+ds_dump_record(void) {
+    int i;
+
+    ds_start_flag = 0;
+
+    printk("For Schedule Function\n");
+
+    for (i = 1; i < ds_idx; i++) {
+        printk("%13lu\n", ds_res[i].dur);
+    }
+
+    printk("\n\nFor tick function\n");
+    for (i = 1; i < ds_idx_tick; i++) {
+        printk("%13lu\n", ds_res_tick[i].dur);
+    }
+
+    for (i = 0; i < RECORD; i++) {
+        ds_res[i].dur = 0;
+        ds_res_tick[i].dur = 0;
+    }
+
+    ds_wake = 0;
+    ds_sleep = 0;
+    ds_idx_tick = 0;
+    ds_idx = 0;
+}
+*/
+// the current vcpu is on runQ?
+static inline int
+__vcpu_on_runq(struct ds_vcpu *dvc) {
+    return !list_empty(&dvc->runq_elem);
+}
+
+// the current vcpu is on runQ?
+static inline int
+__vcpu_on_rdyq(struct ds_vcpu *dvc) {
+    return !list_empty(&dvc->rdyq_elem);
+}
+
+//pick the first vcpu whose budget is >0 from the runq
+static inline struct ds_vcpu *
+__runq_pick(unsigned int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, runq) {
+        struct ds_vcpu * iter_dvc = __runq_elem(iter);
+        if (iter_dvc->cur_budget > 0) {
+            return iter_dvc;
+        }
+    }
+
+    BUG_ON(1);
+    return NULL;
+}
+
+//insert into the runq, followed a FIFO way. sorted by period
+static inline void
+__runq_insert(unsigned int cpu, struct ds_vcpu *dvc) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    BUG_ON(__vcpu_on_runq(dvc));
+    BUG_ON(cpu != dvc->vcpu->processor);
+
+    list_for_each(iter, runq) {
+        struct ds_vcpu * iter_dvc = __runq_elem(iter);
+        if (dvc->level <= iter_dvc->level) {
+            break;
+        }
+    }
+
+    list_add_tail(&dvc->runq_elem, iter);
+}
+
+//insert into the runq, followed a FIFO way. sorted by period
+static inline void
+__rdyq_insert(unsigned int cpu, struct ds_vcpu *dvc) {
+    struct list_head * rdyq = RDYQ(cpu);
+    struct list_head * iter;
+
+    BUG_ON(__vcpu_on_rdyq(dvc));
+    BUG_ON(cpu != dvc->vcpu->processor);
+
+    list_for_each(iter, rdyq) {
+        struct ds_vcpu * iter_dvc = __rdyq_elem(iter);
+        if (dvc->level <= iter_dvc->level) {
+            break;
+        }
+    }
+
+    list_add_tail(&dvc->rdyq_elem, iter);
+}
+
+//remove it from runQ
+static inline void
+__runq_remove(struct ds_vcpu *dvc) {
+    BUG_ON(!__vcpu_on_runq(dvc));
+    list_del_init(&dvc->runq_elem);
+}
+
+//remove it from runQ
+static inline void
+__rdyq_remove(struct ds_vcpu *dvc) {
+    BUG_ON(!__vcpu_on_rdyq(dvc));
+    list_del_init(&dvc->rdyq_elem);
+}
+
+//used for the heap, repQ
+static inline int
+ds_rep_parent(int childIdx) {
+    return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1);
+}
+
+//insert into the repQ
+static inline void
+ds_repq_insert(unsigned int cpu, struct ds_vcpu *dvc) {
+    struct ds_pcpu * ppc = DS_PCPU(cpu);
+    int childIdx, parentIdx;
+
+    if (ppc->rep_size == ppc->rep_capacity) {
+        printk("\n# into %s, repQ full!!\n", __func__);
+        BUG_ON(1);
+    }
+
+    childIdx = ppc->rep_size;
+    parentIdx = ds_rep_parent(childIdx);
+
+    while (childIdx > 0 && (NOW() + dvc->period*BUDGET(1)) < ppc->repq[parentIdx].re_time) {
+        ppc->repq[childIdx] = ppc->repq[parentIdx];
+        childIdx = parentIdx;
+        parentIdx = ds_rep_parent(childIdx);
+    }
+
+    ppc->repq[childIdx].re_time = NOW() + dvc->period*BUDGET(1);
+    ppc->repq[childIdx].dvc = dvc;
+    ppc->rep_size++;
+/*
+    printk("\t add a repl. now: %lu, cpu: %d, re_time: %lu, amount: %d, for cpu [%d, %d]\n",
+        NOW(), cpu, dvc->next_time, amount, dvc->vcpu->domain->domain_id, dvc->vcpu->vcpu_id);
+    ds_dump_vcpu(dvc);
+*/
+}
+
+//remove from the repQ
+static inline void
+ds_repq_remove(unsigned int cpu) {
+    struct ds_pcpu * ppc = DS_PCPU(cpu);
+    int childIdx = 1;
+    int rightChildIdx;
+    int rootIdx = 0;
+    struct rep_elem temp;
+
+    BUG_ON(ppc->rep_size <= 0);
+
+    ppc->repq[0] = ppc->repq[ppc->rep_size - 1];
+    ppc->rep_size--;
+
+    temp = ppc->repq[0];
+
+    while (childIdx < ppc->rep_size) {
+        rightChildIdx = childIdx + 1;
+        if (rightChildIdx < ppc->rep_size && ppc->repq[rightChildIdx].re_time < ppc->repq[childIdx].re_time) {
+            childIdx = rightChildIdx;
+        }
+        if (ppc->repq[childIdx].re_time < temp.re_time) {
+            ppc->repq[rootIdx] = ppc->repq[childIdx];
+            rootIdx = childIdx;
+            childIdx = 2 * rootIdx + 1;
+        } else {
+            break;
+        }
+    }
+    ppc->repq[rootIdx] = temp;
+}
+
+//dump dump function
+static void
+ds_dump(void) {
+    printk("# into %s. Did Nothing\n", __func__);
+}
+
+//burn the scurr budget
+static void
+burn_budgets(struct ds_vcpu *dvc, s_time_t now) {
+    s_time_t delta;
+    unsigned int consume;
+    struct list_head * rdyq = RDYQ(dvc->vcpu->processor);
+    struct list_head * iter;
+
+    BUG_ON(dvc != DS_CUR(dvc->vcpu->processor));
+
+    if (dvc->last_start_time == 0) {
+        dvc->last_start_time = now;
+        return;
+    }
+
+    delta = now - dvc->last_start_time;
+    BUG_ON(delta <= 0);
+
+    consume = ( delta/BUDGET(1) );
+    if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++;
+    if (consume > dvc->cur_budget) {
+        //printk("\n# into %s, consumed more than cur budget!\n", __func__);
+        consume = dvc->cur_budget;
+    }
+
+    dvc->cur_budget -= consume;
+
+    if (ds_priv.periodic == 1) {
+        list_for_each(iter, rdyq) {
+            struct ds_vcpu * iter_dvc = __rdyq_elem(iter);
+            //rdyQ has higher priority
+            if (dvc->level > iter_dvc->level) {
+                iter_dvc->cur_budget -= consume;
+                if (iter_dvc->cur_budget < 0) {
+                    iter_dvc->cur_budget = 0;
+                }
+            }
+        }
+    }
+}
+
+//init the physical cpu
+static int
+ds_pcpu_init(int cpu) {
+    struct ds_pcpu *ppc;
+    unsigned long flags;
+
+    /* Allocate per-PCPU info */
+    ppc = xmalloc(struct ds_pcpu);
+    if (ppc == NULL)
+        return -1;
+    memset(ppc, 0, sizeof (*ppc));
+
+    spin_lock_irqsave(&ds_priv.lock, flags);
+
+    if (ds_priv.ncpus < cpu)
+        ds_priv.ncpus = cpu + 1;
+
+    init_timer(&ppc->ticker, ds_tick, (void *) (unsigned long) cpu, cpu);
+    INIT_LIST_HEAD(&ppc->runq);
+    INIT_LIST_HEAD(&ppc->rdyq);
+    per_cpu(schedule_data, cpu).sched_priv = ppc;
+
+    BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
+
+    ppc->rep_capacity = REPQ_CAPACITY;
+    ppc->repq = xmalloc_array(struct rep_elem, ppc->rep_capacity);
+    BUG_ON(ppc->repq == NULL);
+    ppc->rep_size = 0;
+
+    spin_unlock_irqrestore(&ds_priv.lock, flags);
+
+    printk("\n# finish %s, init cpu: %d\n", __func__, cpu);
+
+    return 0;
+}
+
+//check the vcpu
+static inline void
+__ds_vcpu_check(struct vcpu *vc) {
+    struct ds_vcpu * const dvc = DS_VCPU(vc);
+    struct ds_dom * const ddom = dvc->ddom;
+
+    BUG_ON(dvc->vcpu != vc);
+    BUG_ON(ddom != DS_DOM(vc->domain));
+    if (ddom) {
+        BUG_ON(is_idle_vcpu(vc));
+        BUG_ON(ddom->dom != vc->domain);
+    } else {
+        BUG_ON(!is_idle_vcpu(vc));
+    }
+}
+#define DS_VCPU_CHECK(_vc)  (__ds_vcpu_check(_vc))
+
+//pick a cpu to run, used to migrate from different cpus
+static int
+ds_cpu_pick(struct vcpu *vc) {
+    cpumask_t cpus;
+    int cpu;
+
+    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+
+    if (vc->domain->domain_id == 0 && vc->processor != 0) {
+        return cycle_cpu(vc->processor, cpus);
+    }
+
+    cpu = cpu_isset(vc->processor, cpus)
+            ? vc->processor
+            : cycle_cpu(vc->processor, cpus);
+
+    return cpu;
+}
+
+//check the current repQ to see if a repl needs to happen
+static int
+check_cpu_for_repl(int cpu) {
+    struct ds_pcpu * ppc = DS_PCPU(cpu);
+    int flag = 0;  //used for interrupt
+
+    while((ppc->rep_size != 0) && ppc->repq[0].re_time < NOW()) {
+        ppc->repq[0].dvc->cur_budget = ppc->repq[0].dvc->budget;
+        if (flag == 0 && ppc->repq[0].dvc->level < DS_CUR(cpu)->level) {
+            flag = 1;  // need interrupt
+        }
+        ds_repq_insert(ppc->repq[0].dvc->vcpu->processor, ppc->repq[0].dvc);
+        ds_repq_remove(cpu);
+    }
+
+    return flag;
+}
+
+//init the virtual cpu
+static int
+ds_vcpu_init(struct vcpu *vc) {
+    struct domain * const dom = vc->domain;
+    struct ds_dom *ddom = DS_DOM(dom);
+    struct ds_vcpu *dvc;
+
+    /* Allocate per-VCPU info */
+    dvc = xmalloc(struct ds_vcpu);
+    if (dvc == NULL) {
+        return -1;
+    }
+    memset(dvc, 0, sizeof (*dvc));
+
+    INIT_LIST_HEAD(&dvc->runq_elem);
+    INIT_LIST_HEAD(&dvc->rdyq_elem);
+    dvc->ddom = ddom;
+    dvc->vcpu = vc;
+    dvc->budget = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->budget;
+    dvc->period = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->period;
+    dvc->level  = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->level;
+    dvc->cur_budget = dvc->budget;
+
+    dvc->last_start_time = 0;
+    dvc->flag = 0;
+    vc->sched_priv = dvc;
+
+    /* Allocate per-PCPU info */
+    if (unlikely(!DS_PCPU(vc->processor))) {
+        if (ds_pcpu_init(vc->processor) != 0)
+            return -1;
+    }
+
+    DS_VCPU_CHECK(vc);
+
+    printk("\n# into %s, vcpu init: ", __func__);
+    ds_dump_vcpu(dvc);
+
+    return 0;
+}
+
+//destory the vcpu
+static void
+ds_vcpu_destroy(struct vcpu *vc) {
+    struct ds_vcpu * const dvc = DS_VCPU(vc);
+    struct ds_dom * const ddom = dvc->ddom;
+
+    printk("\n# into %s, vcpu destroy: ", __func__);
+    ds_dump_vcpu(dvc);
+
+    BUG_ON(ddom == NULL);
+    BUG_ON(!list_empty(&dvc->runq_elem));
+
+    xfree(dvc);
+}
+
+//sleep the vcpu
+static void
+ds_vcpu_sleep(struct vcpu *vc) {
+    struct ds_vcpu * const dvc = DS_VCPU(vc);
+
+    BUG_ON(is_idle_vcpu(vc));
+
+    if (per_cpu(schedule_data, vc->processor).curr == vc) {
+        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+    } else if (__vcpu_on_runq(dvc)) {
+        //polling server
+        if (ds_priv.polling == 1) {
+            dvc->cur_budget = 0;
+        }
+        __runq_remove(dvc);
+    } else if (__vcpu_on_rdyq(dvc)) {
+        __rdyq_remove(dvc);
+    }
+}
+
+//wake up the vcpu, insert it into runq, raise a softirq
+static void
+ds_vcpu_wake(struct vcpu *vc) {
+    struct ds_vcpu * const dvc = DS_VCPU(vc);
+    const unsigned int cpu = vc->processor;
+
+    BUG_ON(is_idle_vcpu(vc));
+
+    if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) {
+        //printk("\n# why wake up running? migration?\n");
+        return;
+    }
+    if (unlikely(__vcpu_on_runq(dvc))) {
+        //printk("\n# why wake up on runq ones? migration?\n");
+        return;
+    }
+
+/*
+    if (smp_processor_id() == 1) {
+        printk("%s, domain %d, now %lu\n", __func__, vc->domain->domain_id, NOW()/1000000);
+    }
+*/
+
+    if (__vcpu_on_rdyq(dvc)) {
+        __rdyq_remove(dvc);
+    }
+
+    __runq_insert(cpu, dvc);
+    if (dvc->level < DS_CUR(cpu)->level) {
+        if (ds_start_flag == 1 && dvc->vcpu->processor == 1) {
+            ds_wake++;
+        }
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+}
+
+static void
+ds_ds_finish_timer(void * temp) {
+    ds_start_flag = 0;
+    printk("wake up %d times\n", ds_wake);
+    ds_wake = 0;
+}
+
+//used for record data, for overhead measurement
+static void
+ds_ds_start_timer(void * temp) {
+    ds_start_flag = 1;
+    init_timer(&ds_start_timer, ds_ds_finish_timer, (void *) (unsigned int) 1, 1);
+    set_timer(&ds_start_timer, NOW() + MILLISECS(10000));
+}
+
+//adjust the domain's budget & period, also used to trigger the record
+static int
+ds_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) {
+    struct ds_dom * const ddom = DS_DOM(d);
+    unsigned long flags;
+    struct ds_vcpu *dvc = DS_VCPU(d->vcpu[0]);
+
+    if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) {
+        op->u.ds.budget = ddom->budget;
+        op->u.ds.period = ddom->period;
+        op->u.ds.level  = ddom->level;
+        //ds_dump_vcpu(dvc);
+    } else {
+        BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo);
+
+        spin_lock_irqsave(&ds_priv.lock, flags);
+        if (op->u.ds.budget != 0) {
+            ddom->budget = op->u.ds.budget;
+            dvc->budget = op->u.ds.budget;
+        }
+        if (op->u.ds.period != 0) {
+            ddom->period = op->u.ds.period;
+            dvc->period = op->u.ds.period;
+        }
+        if (op->u.ds.level != 0) {
+            ddom->level = op->u.ds.level;
+            dvc->level = op->u.ds.level;
+        }
+        dvc->cur_budget = dvc->budget;
+        spin_unlock_irqrestore(&ds_priv.lock, flags);
+
+        if (dvc->vcpu->domain->domain_id == 0) {
+            if (op->u.ds.budget == 100) {
+                if (ds_priv.polling == 0) {
+                    ds_priv.polling = 1;
+                    printk("running with the polling server!\n");
+                } else if (ds_priv.polling == 1) {
+                    ds_priv.polling = 0;
+                    printk("running with the deferrable server!\n");
+                }
+            } else if (op->u.ds.budget == 200) {
+                if (ds_priv.periodic == 0) {
+                    ds_priv.periodic = 1;
+                    ds_priv.polling = 0;
+                    printk("running with the periodic server!\n");
+                } else if (ds_priv.periodic == 1) {
+                    ds_priv.periodic = 0;
+                    if (ds_priv.polling == 1) {
+                        printk("running with the polling server!\n");
+                    } else if (ds_priv.polling == 0) {
+                        printk("running with the deferrable server!\n");
+                    }
+                }
+            } else if (op->u.ds.budget == 300) {
+                init_timer(&ds_start_timer, ds_ds_start_timer, (void *) (unsigned int) 1, 1);
+                set_timer(&ds_start_timer, NOW() + MILLISECS(5000));
+                return 1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+//init a dom
+static int
+ds_dom_init(struct domain *dom) {
+    struct ds_dom *ddom;
+
+    printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id);
+
+    if (is_idle_domain(dom)) {
+        printk("\t# init an idle domain\n");
+        return 0;
+    }
+
+    ddom = xmalloc(struct ds_dom);
+    if (ddom == NULL)
+        return -ENOMEM;
+    memset(ddom, 0, sizeof (*ddom));
+
+    /* Initialize budget and period */
+    ddom->dom = dom;
+
+    switch(dom->domain_id) {
+        case 32767:
+            ddom->budget = DS_IDLE_PERIOD;
+            ddom->period = DS_IDLE_PERIOD;
+            ddom->level = DS_IDLE_PERIOD;
+            break;
+        case 0:
+            ddom->budget = DS_DOM_0_PERIOD;
+            ddom->period = DS_DOM_0_PERIOD;
+            ddom->level = 1;
+            break;
+        default:
+            ddom->budget = DS_DOM_BUDGET;
+            ddom->period = DS_DOM_PERIOD;
+            ddom->level = 10;
+            break;
+    }
+
+    dom->sched_priv = ddom;
+
+    return 0;
+}
+
+//destory a domain
+static void
+ds_dom_destroy(struct domain *dom) {
+    printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id);
+    xfree(DS_DOM(dom));
+}
+
+//ticked by pcpu tick in pcpu.
+static void
+ds_tick(void *_cpu) {
+    unsigned int cpu = (unsigned long) _cpu;
+    struct ds_pcpu *ppc = DS_PCPU(cpu);
+/*
+    if (smp_processor_id() == 1 && ds_start_flag == 1) {
+        ds_res_tick[ds_idx_tick].enter = NOW();
+    }
+*/
+    BUG_ON(current->processor != cpu);
+
+    if (check_cpu_for_repl(cpu)) {
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+
+    if (ds_cpu_pick(current) != cpu) {
+        set_bit(_VPF_migrating, &current->pause_flags);
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+
+    set_timer(&ppc->ticker, NOW() + BUDGET(1));
+/*
+    if (smp_processor_id() == 1 && ds_start_flag == 1) {
+        ds_res_tick[ds_idx_tick].leave = NOW();
+        if (ds_idx_tick++ >= RECORD) {
+            printk("tick full!\n");
+            ds_dump_record();
+        }
+    }
+    */
+}
+
+// most important function, called every budget time
+static struct task_slice
+ds_schedule(s_time_t now) {
+    const int cpu = smp_processor_id();
+    struct list_head *runq = RUNQ(cpu);
+    struct ds_vcpu *scurr = DS_VCPU(current);
+    struct ds_vcpu *snext;
+    struct task_slice ret;
+
+    DS_VCPU_CHECK(current);
+/*
+// for record
+    if (smp_processor_id() == 1 && ds_start_flag == 1) {
+        if(is_idle_vcpu(scurr->vcpu)) ds_res[ds_idx].curr = 10;
+        else    ds_res[ds_idx].curr = scurr->vcpu->domain->domain_id;
+        ds_res[ds_idx].enter = NOW();
+    }
+*/
+    if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) {
+    //if (!is_idle_vcpu(scurr->vcpu)) {
+        burn_budgets(scurr, now);
+        if (scurr->flag == 0) {
+            scurr->flag = 1;
+            ds_repq_insert(scurr->vcpu->processor, scurr);
+        }
+    }
+
+    if (vcpu_runnable(current)) {
+        __runq_insert(cpu, scurr);
+    } else {
+        BUG_ON(is_idle_vcpu(current) || list_empty(runq));
+        // for the polling server
+        if (cpu == 1 && scurr->vcpu->domain->domain_id != 0 && ds_priv.polling == 1) {
+            scurr->cur_budget = 0;
+        }
+        __rdyq_insert(cpu, scurr);
+    }
+
+    snext = __runq_pick(cpu);
+
+    __runq_remove(snext);
+
+    if (cpu == 1 && snext->vcpu->domain->domain_id != 0) {
+        snext->last_start_time = NOW();
+    }
+
+    ret.time = (is_idle_vcpu(snext->vcpu) ? -1 : BUDGET(1));
+
+    //ret.time = BUDGET(1);
+    ret.task = snext->vcpu;
+
+    DS_VCPU_CHECK(ret.task);
+
+    BUG_ON(!vcpu_runnable(snext->vcpu));
+    //printk("now is %lu\n", now);
+/*
+// for record
+    if (smp_processor_id() == 1 && ds_start_flag == 1) {
+        if(is_idle_vcpu(snext->vcpu)) ds_res[ds_idx].next = 10;
+        else    ds_res[ds_idx].next = snext->vcpu->domain->domain_id;
+        ds_res[ds_idx].leave = NOW();
+        if(ds_idx++ >= RECORD) {
+            printk("full!!\n");
+            ds_dump_record();
+        }
+    }
+    */
+
+    return ret;
+}
+
+//init the global data
+static void
+ds_init(void) {
+    printk("\n# into %s\n", __func__);
+    spin_lock_init(&ds_priv.lock);
+    ds_priv.ncpus = 0;
+    ds_priv.polling = 0;
+    ds_priv.periodic = 0;
+}
+
+/* Tickers cannot be kicked until SMP subsystem is alive. */
+static __init int
+ds_start_tickers(void) {
+    struct ds_pcpu *ppc;
+    unsigned int cpu;
+
+    printk("\n# into %s, start all tickers right now\n", __func__);
+
+    if (ds_priv.ncpus == 0)
+        return 0;
+
+    for_each_online_cpu(cpu) {
+        ppc = DS_PCPU(cpu);
+        set_timer(&ppc->ticker, NOW() + BUDGET(1));
+    }
+
+    return 0;
+}
+__initcall(ds_start_tickers);
+
+static void ds_tick_suspend(void) {
+    struct ds_pcpu *ppc;
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    ppc = DS_PCPU(smp_processor_id());
+
+    stop_timer(&ppc->ticker);
+}
+
+static void ds_tick_resume(void) {
+    struct ds_pcpu *ppc;
+    uint64_t now = NOW();
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    ppc = DS_PCPU(smp_processor_id());
+
+    set_timer(&ppc->ticker, now + BUDGET(1));
+}
+
+const struct scheduler sched_ds_def = {
+    .name = "Deferrable Server Scheduler",
+    .opt_name = "ds",
+    .sched_id = XEN_SCHEDULER_DS,
+
+    .init_domain = ds_dom_init,
+    .destroy_domain = ds_dom_destroy,
+
+    .init_vcpu = ds_vcpu_init,
+    .destroy_vcpu = ds_vcpu_destroy,
+
+    .init = ds_init,
+
+    .pick_cpu = ds_cpu_pick,
+
+    .tick_suspend = ds_tick_suspend,
+    .tick_resume = ds_tick_resume,
+
+    .do_schedule = ds_schedule,
+
+    .sleep = ds_vcpu_sleep,
+    .wake = ds_vcpu_wake,
+
+    .adjust = ds_dom_cntl,
+
+    .dump_cpu_state = ds_dump_pcpu,
+    .dump_settings = ds_dump,
+};
diff -ubrN xen/xen-4.0.1/xen/common/sched_ds_emsoft11.c xen-4.0.1/xen/common/sched_ds_emsoft11.c
--- xen/xen-4.0.1/xen/common/sched_ds_emsoft11.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_ds_emsoft11.c	2011-04-10 11:56:00.000000000 -0600
@@ -0,0 +1,1136 @@
+/******************************************************************************
+ * Periodic / Polling / Deferrable Server scheduler for xen
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+#include <xen/errno.h>
+#include <xen/keyhandler.h>
+
+#define DS_DOM(_dom)            ((struct ds_dom *) (_dom)->sched_priv)
+#define DS_PCPU(_c)             ((struct ds_pcpu *)per_cpu(schedule_data, _c).sched_priv)
+#define DS_VCPU(_vcpu)          ((struct ds_vcpu *) (_vcpu)->sched_priv)
+#define RUNQ(_cpu)              (&(DS_PCPU(_cpu)->runq))
+#define RDYQ(_cpu)              (&(DS_PCPU(_cpu)->rdyq))
+#define DS_CUR(_cpu)            DS_VCPU(per_cpu(schedule_data, _cpu).curr)
+#define BUDGET(_b)              (MILLISECS(10*_b))  // time to run for 1 budget, default setting is 1ms = 1 budget
+
+#define REPQ_CAPACITY           500    // repQ is used for the replenishment
+
+#define DS_DOM_0_PERIOD         100
+#define DS_IDLE_PERIOD          200
+
+#define DS_DOM_BUDGET           25        // default budget, can bu changed via xm sched-ss -d target -b budget -p period
+#define DS_DOM_PERIOD           50
+
+//PES is the Simple PES, CS is the standard PES
+enum server_type {DS, POS, PES, CS, SS};
+
+//physical cpu
+struct ds_pcpu {
+    struct list_head runq; // runQ on the pcpu, organized by linked list
+    struct list_head rdyq;
+    struct rep_elem *repq; //repQ on the pcpu, organized by heap
+    int rep_size;        // current size, for later dynamic reqQ use. currently set equals to capacity
+    int rep_capacity;    // upper limit
+    struct timer ticker; // for preemptive use, tick every budget
+};
+
+//virtual cpu
+struct ds_vcpu {
+    struct list_head runq_elem;
+    struct list_head rdyq_elem;
+    struct list_head active_elem;    //used to link all active vcpu except domain 0 and idle one!
+    struct ds_dom *ddom;
+    struct vcpu *vcpu;
+    
+    uint16_t repq_pending;   // used to calculate how many items are on repq
+
+    uint16_t budget;
+    uint16_t period;
+    uint16_t level;
+
+    uint16_t cur_budget;
+    s_time_t last_start_time;  // used for burn_budget
+    s_time_t next_time;   //the next repl time
+    
+    int flag;   // represent whether the vCPU has started or not
+    
+    uint16_t burn_total;  // used only for Sporadic Server
+};
+
+//used for replenishment
+struct rep_elem {
+    s_time_t re_time;
+    int16_t re_amount;
+    struct ds_vcpu *dvc;
+};
+
+//domain
+struct ds_dom {
+    struct domain *dom;
+    uint16_t budget;
+    uint16_t period;
+    uint16_t level;
+};
+
+//global variable, records the number of cpus
+struct ds_private {
+    spinlock_t lock;    // used for init
+    uint32_t ncpus;    //number of physical cpus
+    enum server_type type;   // used for different type of servers
+    struct list_head active;  //active_vcpu except domain 0 and idle vcpu!
+};
+static struct ds_private ds_priv;
+
+static void ds_tick(void *_cpu);
+
+//dump the repq
+static void
+ds_dump_repq(int cpu) {
+    int loop = 0;
+    struct ds_pcpu *ppc = DS_PCPU(cpu);
+
+    printk("repq:           size: %d\n", ppc->rep_size);
+    for (loop = 0; loop < ppc->rep_size; loop++) {
+        printk("\t[%d, %d]: %d @ %lu\n",
+            ppc->repq[loop].dvc->vcpu->domain->domain_id,
+            ppc->repq[loop].dvc->vcpu->vcpu_id,
+            ppc->repq[loop].re_amount,
+            ppc->repq[loop].re_time);
+    }
+}
+
+//dump the virtual cpu
+static void
+ds_dump_vcpu(struct ds_vcpu *dvc) {
+    printk("\t[%i, %i], cur: %i, rep: %d, last: %lu, next: %lu, \n", dvc->vcpu->domain->domain_id, dvc->vcpu->vcpu_id, dvc->cur_budget, dvc->repq_pending, dvc->last_start_time, dvc->next_time);
+}
+
+//inlined code
+static inline struct ds_vcpu *
+__runq_elem(struct list_head *elem) {
+    return list_entry(elem, struct ds_vcpu, runq_elem);
+}
+
+//inlined code
+static inline struct ds_vcpu *
+__rdyq_elem(struct list_head *elem) {
+    return list_entry(elem, struct ds_vcpu, rdyq_elem);
+}
+
+//dump the physical cpu
+static void
+ds_dump_pcpu(int cpu) {
+    struct list_head *iter;
+    struct ds_pcpu *ppc = DS_PCPU(cpu);
+    struct list_head *runq = &ppc->runq;
+    struct list_head *rdyq = &ppc->rdyq;
+    struct ds_vcpu *dvc = DS_CUR(cpu);
+    int loop = 0;
+
+    printk("### cpu: %d, now is: %lu\n", cpu, NOW());
+
+    if (dvc) {
+        printk("\trun: ");
+        ds_dump_vcpu(dvc);
+    }
+
+    printk("runq:\n");
+    list_for_each(iter, runq) {
+        dvc = __runq_elem(iter);
+        if (dvc) {
+            printk("\t%3d: ", ++loop);
+            ds_dump_vcpu(dvc);
+        }
+    }
+
+    printk("rdyq:\n");
+    list_for_each(iter, rdyq) {
+        dvc = __rdyq_elem(iter);
+        if (dvc) {
+            printk("\t%3d: ", ++loop);
+            ds_dump_vcpu(dvc);
+        }
+    }
+
+    ds_dump_repq(cpu);
+    printk("\n");
+}
+
+// the current vcpu is on runQ?
+static inline int
+__vcpu_on_runq(struct ds_vcpu *dvc) {
+    return !list_empty(&dvc->runq_elem);
+}
+
+// the current vcpu is on runQ?
+static inline int
+__vcpu_on_rdyq(struct ds_vcpu *dvc) {
+    return !list_empty(&dvc->rdyq_elem);
+}
+
+//pick the first vcpu whose budget is >0 from the runq
+static inline struct ds_vcpu *
+__runq_pick(unsigned int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, runq) {
+        struct ds_vcpu * iter_dvc = __runq_elem(iter);
+        if (iter_dvc->cur_budget > 0) {
+            return iter_dvc;
+        }
+    }
+
+    BUG_ON(1);
+    return NULL;
+}
+
+//pick the first one with budget > 0, regardless of runnable or not
+static inline struct ds_vcpu *
+__rdyq_pick(unsigned int cpu) {
+    struct list_head * rdyq = RDYQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, rdyq) {
+        struct ds_vcpu *iter_dvc = __rdyq_elem(iter);
+        if (iter_dvc->cur_budget > 0) {
+            return iter_dvc;
+        }
+    }
+
+    return NULL;
+}
+
+static inline struct ds_vcpu *
+__runq_pick_idle(unsigned int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, runq) {
+        struct ds_vcpu * iter_dvc = __runq_elem(iter);
+        if (is_idle_vcpu(iter_dvc->vcpu)) {
+            return iter_dvc;
+        }
+    }
+    
+    BUG_ON(1);
+    return NULL;
+}
+
+//insert into the runq, followed a FIFO way. sorted by level
+static inline void
+__runq_insert(unsigned int cpu, struct ds_vcpu *dvc) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    BUG_ON(__vcpu_on_runq(dvc));
+    BUG_ON(cpu != dvc->vcpu->processor);
+
+    list_for_each(iter, runq) {
+        struct ds_vcpu * iter_dvc = __runq_elem(iter);
+        if (dvc->level < iter_dvc->level) {
+            break;
+        }
+    }
+
+    list_add_tail(&dvc->runq_elem, iter);
+}
+
+//insert into the runq, followed a FIFO way. sorted by level
+static inline void
+__rdyq_insert(unsigned int cpu, struct ds_vcpu *dvc) {
+    struct list_head * rdyq = RDYQ(cpu);
+    struct list_head * iter;
+
+    BUG_ON(__vcpu_on_rdyq(dvc));
+    BUG_ON(cpu != dvc->vcpu->processor);
+
+    list_for_each(iter, rdyq) {
+        struct ds_vcpu * iter_dvc = __rdyq_elem(iter);
+        if (dvc->level <= iter_dvc->level) {
+            break;
+        }
+    }
+
+    list_add_tail(&dvc->rdyq_elem, iter);
+}
+
+//remove it from runQ
+static inline void
+__runq_remove(struct ds_vcpu *dvc) {
+    BUG_ON(!__vcpu_on_runq(dvc));
+    list_del_init(&dvc->runq_elem);
+}
+
+//remove it from runQ
+static inline void
+__rdyq_remove(struct ds_vcpu *dvc) {
+    BUG_ON(!__vcpu_on_rdyq(dvc));
+    list_del_init(&dvc->rdyq_elem);
+}
+
+//used for the heap, repQ
+static inline int
+ds_rep_parent(int childIdx) {
+    return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1);
+}
+
+//insert into the repQ
+static inline void
+ds_repq_insert(unsigned int cpu, struct ds_vcpu *dvc, int amount) {
+    struct ds_pcpu * ppc = DS_PCPU(cpu);
+    int childIdx, parentIdx;
+
+    if (dvc->next_time == 0) {
+        printk("\n# in %s, ERROR! dvc is:", __func__);
+        ds_dump_vcpu(dvc);
+        ds_dump_pcpu(cpu);
+        dvc->next_time = NOW() + BUDGET(1) * dvc->period;
+    }
+    
+    if (amount == 0) {
+        return;
+    }
+
+    if (ppc->rep_size == ppc->rep_capacity) {
+        printk("\n# into %s, repQ full!!\n", __func__);
+        BUG_ON(1);
+    }
+
+    childIdx = ppc->rep_size;
+    parentIdx = ds_rep_parent(childIdx);
+
+    while (childIdx > 0 && dvc->next_time < ppc->repq[parentIdx].re_time) {
+        ppc->repq[childIdx] = ppc->repq[parentIdx];
+        childIdx = parentIdx;
+        parentIdx = ds_rep_parent(childIdx);
+    }
+
+    ppc->repq[childIdx].re_time = dvc->next_time;
+    ppc->repq[childIdx].dvc = dvc;
+    ppc->repq[childIdx].re_amount = amount;
+    ppc->rep_size++;
+    
+    // dvc->next_time = 0;
+    dvc->repq_pending++;
+}
+
+//remove from the repQ
+static inline void
+ds_repq_remove(unsigned int cpu) {
+    struct ds_pcpu * ppc = DS_PCPU(cpu);
+    int childIdx = 1;
+    int rightChildIdx;
+    int rootIdx = 0;
+    struct rep_elem temp;
+
+    BUG_ON(ppc->rep_size <= 0);
+
+    ppc->repq[0].dvc->repq_pending--;
+    ppc->repq[0] = ppc->repq[ppc->rep_size - 1];
+    ppc->rep_size--;
+
+    temp = ppc->repq[0];
+
+    while (childIdx < ppc->rep_size) {
+        rightChildIdx = childIdx + 1;
+        if (rightChildIdx < ppc->rep_size && ppc->repq[rightChildIdx].re_time < ppc->repq[childIdx].re_time) {
+            childIdx = rightChildIdx;
+        }
+        if (ppc->repq[childIdx].re_time < temp.re_time) {
+            ppc->repq[rootIdx] = ppc->repq[childIdx];
+            rootIdx = childIdx;
+            childIdx = 2 * rootIdx + 1;
+        } else {
+            break;
+        }
+    }
+    ppc->repq[rootIdx] = temp;
+}
+
+//dump dump function
+static void
+ds_dump(void) {
+    printk("# into %s. Did Nothing\n", __func__);
+}
+
+//burn the scurr budget
+//dom != 0 && !is_idle_vcpu(dvc)
+static void
+burn_budgets(struct ds_vcpu *dvc, s_time_t now) {
+    s_time_t delta;
+    unsigned int consume;
+    struct list_head * rdyq = RDYQ(dvc->vcpu->processor);
+    struct list_head * iter;
+
+    BUG_ON(dvc != DS_CUR(dvc->vcpu->processor));
+
+    if (dvc->last_start_time == 0) {
+        dvc->last_start_time = now;
+        printk("\nset last_start_time to 0 in %s\n", __func__);
+        return;
+    }
+
+    delta = now - dvc->last_start_time;
+    BUG_ON(delta <= 0);
+
+    consume = ( delta/BUDGET(1) );
+    if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++;
+
+    dvc->cur_budget -= consume;
+    if (dvc->cur_budget < 0) dvc->cur_budget = 0;
+    
+    if(ds_priv.type == SS) {
+        dvc->burn_total += consume;
+    }
+
+    // printk("\n\t%d @ burn\n", consume);
+//used for simple PES, to burn all the VCPU's budget who has higher priority
+    if (ds_priv.type == PES) {
+        list_for_each(iter, rdyq) {
+            struct ds_vcpu * iter_dvc = __rdyq_elem(iter);
+            //rdyQ has higher priority
+            if (dvc->level > iter_dvc->level && iter_dvc->cur_budget > 0) {
+                iter_dvc->cur_budget -= consume;
+                if (iter_dvc->cur_budget < 0) {
+                    iter_dvc->cur_budget = 0;
+                }
+                break;  // the enhanced old periodic server
+            }
+        }
+    }
+}
+
+//used for PES and CS, the dvc is the IDLE VCPU
+//domain != 0, is_idle_vcpu(dvc)
+static void
+burn_extra(struct ds_vcpu *dvc, s_time_t now) {
+    s_time_t delta;
+    unsigned int consume;
+    struct list_head * rdyq = RDYQ(dvc->vcpu->processor);
+    struct list_head * iter;
+
+    BUG_ON(dvc != DS_CUR(dvc->vcpu->processor));
+
+    if (dvc->last_start_time == 0) {
+        dvc->last_start_time = now;
+        return;
+    }
+
+    if (ds_priv.type == DS || ds_priv.type == POS || ds_priv.type == SS) {
+        return;
+    }
+
+    delta = now - dvc->last_start_time;
+    BUG_ON(delta <= 0);
+
+    consume = ( delta/BUDGET(1) );
+    if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++;
+
+    if (ds_priv.type == PES) {
+        list_for_each(iter, rdyq) {
+            struct ds_vcpu * iter_dvc = __rdyq_elem(iter);
+            //rdyQ has higher priority
+            if (iter_dvc->cur_budget > 0) {
+                iter_dvc->cur_budget -= consume;
+                if (iter_dvc->cur_budget < 0) {
+                   iter_dvc->cur_budget = 0;
+                }
+                break;    // the enhanced old periodic server
+            }
+        }
+    } else { // now for the CS, need to consume the budget of the first element on rdyq
+        list_for_each(iter, rdyq) {
+            struct ds_vcpu * iter_dvc = __rdyq_elem(iter);
+              //rdyQ has higher priority
+            if (iter_dvc->cur_budget > 0) {
+                iter_dvc->cur_budget -= consume;
+                if (iter_dvc->cur_budget < 0) {
+                   iter_dvc->cur_budget = 0;
+                }
+                break;
+            }
+        }//  if no one runs, also check the rdyQ, and then returns
+    }
+}
+
+//init the physical cpu
+static int
+ds_pcpu_init(int cpu) {
+    struct ds_pcpu *ppc;
+    unsigned long flags;
+
+    /* Allocate per-PCPU info */
+    ppc = xmalloc(struct ds_pcpu);
+    if (ppc == NULL)
+        return -1;
+    memset(ppc, 0, sizeof (*ppc));
+
+    spin_lock_irqsave(&ds_priv.lock, flags);
+
+    if (ds_priv.ncpus < cpu)
+        ds_priv.ncpus = cpu + 1;
+
+    init_timer(&ppc->ticker, ds_tick, (void *) (unsigned long) cpu, cpu);
+    INIT_LIST_HEAD(&ppc->runq);
+    INIT_LIST_HEAD(&ppc->rdyq);
+    per_cpu(schedule_data, cpu).sched_priv = ppc;
+
+    BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
+
+    ppc->rep_capacity = REPQ_CAPACITY;
+    ppc->repq = xmalloc_array(struct rep_elem, ppc->rep_capacity);
+    BUG_ON(ppc->repq == NULL);
+    ppc->rep_size = 0;
+
+    spin_unlock_irqrestore(&ds_priv.lock, flags);
+
+    printk("\n# finish %s, init cpu: %d\n", __func__, cpu);
+
+    return 0;
+}
+
+//check the vcpu
+static inline void
+__ds_vcpu_check(struct vcpu *vc) {
+    struct ds_vcpu * const dvc = DS_VCPU(vc);
+    struct ds_dom * const ddom = dvc->ddom;
+
+    BUG_ON(dvc->vcpu != vc);
+    BUG_ON(ddom != DS_DOM(vc->domain));
+    if (ddom) {
+        BUG_ON(is_idle_vcpu(vc));
+        BUG_ON(ddom->dom != vc->domain);
+    } else {
+        BUG_ON(!is_idle_vcpu(vc));
+    }
+}
+#define DS_VCPU_CHECK(_vc)  (__ds_vcpu_check(_vc))
+
+//pick a cpu to run, used to migrate from different cpus
+static int
+ds_cpu_pick(struct vcpu *vc) {
+    cpumask_t cpus;
+    int cpu;
+
+    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+
+    if (vc->domain->domain_id == 0 && vc->processor != 0) {
+        return cycle_cpu(vc->processor, cpus);
+    }
+
+    cpu = cpu_isset(vc->processor, cpus)
+            ? vc->processor
+            : cycle_cpu(vc->processor, cpus);
+
+    return cpu;
+}
+
+//for PES or CS, when the 
+//check the current repQ to see if a repl needs to happen
+static int
+check_cpu_for_repl(int cpu) {
+    struct ds_pcpu * ppc = DS_PCPU(cpu);
+    int flag = 0;  //used for interrupt
+    struct list_head * rdyq = RDYQ(cpu);
+    struct list_head * iter;
+
+    while((ppc->rep_size != 0) && ppc->repq[0].re_time < NOW()) {
+        ppc->repq[0].dvc->cur_budget += ppc->repq[0].re_amount;
+        if (ppc->repq[0].dvc->cur_budget > ppc->repq[0].dvc->budget) {
+            ppc->repq[0].dvc->cur_budget = ppc->repq[0].dvc->budget;
+        }
+        
+        if (ds_priv.type != SS) { // insert next repl
+            ppc->repq[0].dvc->next_time = NOW() + BUDGET(1) * ppc->repq[0].dvc->period;
+            ds_repq_insert(ppc->repq[0].dvc->vcpu->processor, ppc->repq[0].dvc, ppc->repq[0].dvc->budget);
+        }
+        
+        if (ds_priv.type != CS) {
+            if (ppc->repq[0].dvc->level < DS_CUR(cpu)->level) {
+                flag = 1;
+            }  // raise interrupt
+        } else {   // for the CS type
+            if (!is_idle_vcpu(current)) {
+                if (ppc->repq[0].dvc->level < DS_CUR(cpu)->level) {
+                    flag = 1;
+                }
+            } else {  // the idle VCPU
+                list_for_each(iter, rdyq) {
+                    struct ds_vcpu * iter_dvc = __rdyq_elem(iter);
+                    //rdyQ has higher priority
+                    if (iter_dvc->cur_budget > 0) {
+                        if (ppc->repq[0].dvc->level < iter_dvc->level) {
+                            flag = 1;   // higher priority
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+        
+        ds_repq_remove(cpu);
+    }
+
+    return flag;
+}
+
+//init the virtual cpu
+static int
+ds_vcpu_init(struct vcpu *vc) {
+    struct domain * const dom = vc->domain;
+    struct ds_dom *ddom = DS_DOM(dom);
+    struct ds_vcpu *dvc;
+
+    /* Allocate per-VCPU info */
+    dvc = xmalloc(struct ds_vcpu);
+    if (dvc == NULL) {
+        return -1;
+    }
+    memset(dvc, 0, sizeof (*dvc));
+
+    INIT_LIST_HEAD(&dvc->runq_elem);
+    INIT_LIST_HEAD(&dvc->rdyq_elem);
+    INIT_LIST_HEAD(&dvc->active_elem);   // init for active list
+    dvc->ddom = ddom;
+    dvc->vcpu = vc;
+    dvc->budget = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->budget;
+    dvc->period = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->period;
+    dvc->level  = is_idle_vcpu(vc)? DS_IDLE_PERIOD: ddom->level;
+    dvc->cur_budget = dvc->budget;
+    dvc->repq_pending = 0;
+
+    dvc->last_start_time = 0;
+    dvc->flag = 0;
+    
+    dvc->burn_total = 0;
+    dvc->next_time = 0;
+    
+    vc->sched_priv = dvc;
+
+    /* Allocate per-PCPU info */
+    if (unlikely(!DS_PCPU(vc->processor))) {
+        if (ds_pcpu_init(vc->processor) != 0)
+            return -1;
+    }
+
+    DS_VCPU_CHECK(vc);
+
+    printk("\n# into %s, vcpu init: ", __func__);
+    ds_dump_vcpu(dvc);
+
+    return 0;
+}
+
+//destory the vcpu
+static void
+ds_vcpu_destroy(struct vcpu *vc) {
+    struct ds_vcpu * const dvc = DS_VCPU(vc);
+    struct ds_dom * const ddom = dvc->ddom;
+
+    printk("\n# into %s, vcpu destroy: ", __func__);
+    ds_dump_vcpu(dvc);
+
+    BUG_ON(ddom == NULL);
+    BUG_ON(!list_empty(&dvc->runq_elem));
+    list_del_init(&dvc->active_elem);
+
+    xfree(dvc);
+}
+
+//sleep the vcpu
+static void
+ds_vcpu_sleep(struct vcpu *vc) {
+    struct ds_vcpu * const dvc = DS_VCPU(vc);
+
+    BUG_ON(is_idle_vcpu(vc));
+
+    if (per_cpu(schedule_data, vc->processor).curr == vc) {
+        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+    } else if (__vcpu_on_runq(dvc)) {
+        //polling server
+        if (ds_priv.type == POS) {
+            dvc->cur_budget = 0;
+        }
+        __runq_remove(dvc);
+    } else if (__vcpu_on_rdyq(dvc)) {
+        __rdyq_remove(dvc);
+    }
+}
+
+//wake up the vcpu, insert it into runq, raise a softirq
+static void
+ds_vcpu_wake(struct vcpu *vc) {
+    struct ds_vcpu * const dvc = DS_VCPU(vc);
+    const unsigned int cpu = vc->processor;
+    struct list_head * rdyq = RDYQ(cpu);
+    struct list_head * iter;
+
+    BUG_ON(is_idle_vcpu(vc));
+    
+    // if (vc->domain->domain_id != 0) {
+    //     printk("wake vcpu: now %lu ", NOW());
+    //     ds_dump_vcpu(dvc);
+    // }
+
+    if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) {
+        if (vc->domain->domain_id != 0) {
+            printk("\nrunning\n");
+        }
+        return;
+    }
+    if (unlikely(__vcpu_on_runq(dvc))) {
+        if (vc->domain->domain_id != 0) {
+            printk("\nrunq\n");
+        }
+        return;
+    }
+
+    if (__vcpu_on_rdyq(dvc)) {
+        __rdyq_remove(dvc);
+    }
+
+    if (!__vcpu_on_runq(dvc)) {
+        __runq_insert(cpu, dvc);
+    }
+    
+    if (ds_priv.type != CS) {
+        if (dvc->level < DS_CUR(cpu)->level) {
+            cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+            return;
+        }
+    } else {
+        if (!is_idle_vcpu(current)) {
+            if (dvc->level < DS_CUR(cpu)->level) {
+                cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+                return;
+            }
+        } else {
+            list_for_each(iter, rdyq) {
+                struct ds_vcpu * iter_dvc = __rdyq_elem(iter);
+                //rdyQ has higher priority
+                if (iter_dvc->cur_budget > 0) {
+                    if (dvc->level < iter_dvc->level) {
+                        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+                        return;
+                    }
+                    break;
+                }
+            }
+        }
+    }
+
+    return;
+}
+
+static inline void
+ds_reset(int cpu) {
+    struct ds_pcpu * ppc = DS_PCPU(cpu);
+    struct list_head * iter;
+    
+    printk("\nbefore reset\n");
+    ds_dump_pcpu(cpu);
+      
+    // empty the RepQ
+    while(ppc->rep_size != 0) {
+        ds_repq_remove(cpu);
+    }
+    
+    printk("\nvcpu on list is:\n");
+    //init each vcpu;
+    list_for_each(iter, &ds_priv.active) {
+        struct ds_vcpu * iter_dvc = list_entry(iter, struct ds_vcpu, active_elem);
+        if (__vcpu_on_runq(iter_dvc)) {
+            __runq_remove(iter_dvc);
+        }
+        if (__vcpu_on_rdyq(iter_dvc)) {
+            __rdyq_remove(iter_dvc);
+        }
+        iter_dvc->cur_budget = iter_dvc->budget;
+        iter_dvc->last_start_time = NOW();
+        iter_dvc->next_time = NOW() + BUDGET(1) * iter_dvc->period;
+        iter_dvc->burn_total = 0;
+        ds_dump_vcpu(iter_dvc);
+    }
+      
+    printk("\nafter reset\n");
+    ds_dump_pcpu(cpu);
+      
+    //insert into Queues
+    list_for_each(iter, &ds_priv.active) {
+        struct ds_vcpu * iter_dvc = list_entry(iter, struct ds_vcpu, active_elem);
+        if (vcpu_runnable(iter_dvc->vcpu)) {
+            if (!__vcpu_on_runq(iter_dvc)) {
+                __runq_insert(cpu, iter_dvc);
+            }
+        } else {
+            if (!__vcpu_on_rdyq(iter_dvc)) {
+                __rdyq_insert(cpu, iter_dvc);
+            }
+        }
+        if (ds_priv.type != SS) {
+            ds_repq_insert(iter_dvc->vcpu->processor, iter_dvc, iter_dvc->budget);
+        }
+    }
+        
+    printk("\nafter insert\n");
+    ds_dump_pcpu(cpu);
+}
+
+//adjust the domain's budget & period, also used to trigger the record
+static int
+ds_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) {
+    struct ds_dom * const ddom = DS_DOM(d);
+    unsigned long flags;
+    struct ds_vcpu *dvc = DS_VCPU(d->vcpu[0]);
+    int flag = 0;
+
+    if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) {
+        op->u.ds.budget = ddom->budget;
+        op->u.ds.period = ddom->period;
+        op->u.ds.level  = ddom->level;
+        //ds_dump_vcpu(dvc);
+    } else {
+        BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo);
+
+        spin_lock_irqsave(&ds_priv.lock, flags);
+        if (op->u.ds.budget != 0) {
+            ddom->budget = op->u.ds.budget;
+            dvc->budget = op->u.ds.budget;
+        }
+        if (op->u.ds.period != 0) {
+            ddom->period = op->u.ds.period;
+            dvc->period = op->u.ds.period;
+        }
+        if (op->u.ds.level != 0) {
+            ddom->level = op->u.ds.level;
+            dvc->level = op->u.ds.level;
+        }
+        dvc->cur_budget = dvc->budget;
+        spin_unlock_irqrestore(&ds_priv.lock, flags);
+
+        if (dvc->vcpu->domain->domain_id == 0) {  
+            switch (op->u.ds.budget) {
+                case 100:
+                    ds_priv.type = DS;
+                    flag = 1;
+                    break;
+                case 200:
+                    ds_priv.type = CS;
+                    flag = 1;
+                    break;
+                case 300:
+                    ds_priv.type = PES;
+                    flag = 1;
+                    break;
+                case 400:
+                    ds_priv.type = POS;
+                    flag = 1;
+                    break;
+                case 500:
+                    ds_priv.type = SS;
+                    flag = 1;
+                    break;
+                case 600:
+                    return 1;   // return to record the overhead!
+                    break;
+                case 700:
+                    ds_dump_pcpu(1);
+                    break;
+                default:
+                    printk("set budget of Domain-0 to : 100 (DS), 200 (CS), 300 (PES), 400 (POS), 500 (SS), 600 (record overhead), 700 (dump PCPU)\n");
+                    break;
+            }
+            printk("Currently running with Scheduler ");
+            switch (ds_priv.type) {
+                case CS:
+                    printk("CS\n");
+                    break;
+                case POS:
+                    printk("POS\n");
+                    break;
+                case PES:
+                    printk("PES\n");
+                    break;
+                case DS:
+                    printk("DS\n");
+                    break;
+                case SS:
+                    printk("SS\n");
+                    break;
+                default:
+                    printk("Wrong!!!\n");
+                    break;
+            }
+            if (flag == 1) {
+                ds_reset(1);
+                cpu_raise_softirq(1, SCHEDULE_SOFTIRQ);
+                flag = 0;
+            }
+        }
+    }
+
+    return 0;
+}
+
+//init a dom
+static int
+ds_dom_init(struct domain *dom) {
+    struct ds_dom *ddom;
+
+    printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id);
+
+    if (is_idle_domain(dom)) {
+        printk("\t# init an idle domain\n");
+        return 0;
+    }
+
+    ddom = xmalloc(struct ds_dom);
+    if (ddom == NULL)
+        return -ENOMEM;
+    memset(ddom, 0, sizeof (*ddom));
+
+    /* Initialize budget and period */
+    ddom->dom = dom;
+
+    switch(dom->domain_id) {
+        case 32767:
+            ddom->budget = DS_IDLE_PERIOD;
+            ddom->period = DS_IDLE_PERIOD;
+            ddom->level = 100;
+            break;
+        case 0:
+            ddom->budget = DS_DOM_0_PERIOD;
+            ddom->period = DS_DOM_0_PERIOD;
+            ddom->level = 1;
+            break;
+        default:
+            ddom->budget = DS_DOM_BUDGET;
+            ddom->period = DS_DOM_PERIOD;
+            ddom->level = 10;
+            break;
+    }
+
+    dom->sched_priv = ddom;
+
+    return 0;
+}
+
+//destory a domain
+static void
+ds_dom_destroy(struct domain *dom) {
+    printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id);
+    xfree(DS_DOM(dom));
+}
+
+//ticked by pcpu tick in pcpu.
+static void
+ds_tick(void *_cpu) {
+    unsigned int cpu = (unsigned long) _cpu;
+    struct ds_pcpu *ppc = DS_PCPU(cpu);
+
+    BUG_ON(current->processor != cpu);
+
+    if (check_cpu_for_repl(cpu)) {
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+
+    if (ds_cpu_pick(current) != cpu) {
+        set_bit(_VPF_migrating, &current->pause_flags);
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+
+    set_timer(&ppc->ticker, NOW() + BUDGET(1));
+}
+
+// most important function, called every budget time
+static struct task_slice
+ds_schedule(s_time_t now) {
+    const int cpu = smp_processor_id();
+    struct list_head *runq = RUNQ(cpu);
+    // struct list_head *rdyq = RDYQ(cpu);
+    struct ds_vcpu *scurr = DS_VCPU(current);
+    struct ds_vcpu *snext;
+    struct task_slice ret;
+
+    DS_VCPU_CHECK(current);
+
+// need to consider idle_vcpu for CS and PES
+    if (scurr->vcpu->domain->domain_id != 0) {
+        if (!is_idle_vcpu(scurr->vcpu)) {
+            // for the first time the VCPU is executed
+            if (scurr->flag == 0) {
+                scurr->flag = 1;
+                BUG_ON(!list_empty(&scurr->active_elem));
+                list_add(&scurr->active_elem, &ds_priv.active);
+                scurr->next_time = now + BUDGET(1) * scurr->period;
+                ds_repq_insert(scurr->vcpu->processor, scurr, scurr->budget);
+            }
+            if (cpu == 1) {
+                burn_budgets(scurr, now);
+            }
+        } else if (cpu == 1) {   // scurr is the IDLE VCPU, have to deal with specially in CS and PES
+            burn_extra(scurr, now);
+        }
+    }
+
+    if (vcpu_runnable(current)) {
+        if (!__vcpu_on_runq(scurr)) {
+            __runq_insert(cpu, scurr);
+        }
+    } else {
+        BUG_ON(is_idle_vcpu(current) || list_empty(runq));
+        //for POS server
+        if (cpu == 1 && scurr->vcpu->domain->domain_id != 0 && ds_priv.type == POS) {
+            scurr->cur_budget = 0;
+        }
+        if (!__vcpu_on_rdyq(scurr)) {
+            __rdyq_insert(cpu, scurr);
+        }
+    }
+
+    if (cpu != 1) {
+        snext = __runq_pick(cpu);
+    } else if (ds_priv.type != CS) {
+        snext = __runq_pick(cpu);
+    } else {  // now runs CS scheduler
+        // printk("\n\trdy empty? %d, run: %d, rdy: %d\n", list_empty(rdyq), __runq_pick(cpu)->level, __rdyq_pick(cpu)->level);
+        if ( __rdyq_pick(cpu) == NULL || (__runq_pick(cpu)->level < __rdyq_pick(cpu)->level) ) {
+            snext = __runq_pick(cpu);   //we are fine here
+            // printk("\npicked %d\n", snext->vcpu->domain->domain_id);
+        } else {
+            // if ( __rdyq_pick(cpu) == NULL ) {
+            //                 printk("\n\t\trdyq is null\n");
+            //             } else {
+            //                 printk("\n\t\trun: %d, rdy: %d\n", __runq_pick(cpu)->level, __rdyq_pick(cpu)->level);
+            //             }
+            snext = __runq_pick_idle(cpu);   // pick the IDLE VCPU for the VCPU on the RdyQ
+            // ds_dump_pcpu(1);
+        }
+    }
+
+    if (cpu == 1 && snext->vcpu->domain->domain_id != 0) {
+        snext->last_start_time = NOW();
+    }
+    
+    BUG_ON(!__vcpu_on_runq(snext));
+    __runq_remove(snext);
+    
+    //context switch happens
+    if (cpu == 1 && snext != scurr) {
+        if (ds_priv.type == SS) {
+            if (!is_idle_vcpu(snext->vcpu)) {
+                snext->next_time = now + BUDGET(1) * snext->period;
+            }
+            if (!is_idle_vcpu(scurr->vcpu)) {
+                ds_repq_insert(cpu, scurr, scurr->burn_total);
+                scurr->burn_total = 0;
+            }
+        }
+    }
+    
+    // ret.time = is_idle_vcpu(snext->vcpu) ? BUDGET(1) : BUDGET(1) * snext->cur_budget;
+    ret.time = BUDGET(1);   // used to test the enhanced old periodic server
+    ret.task = snext->vcpu;
+    
+    DS_VCPU_CHECK(ret.task);
+
+    BUG_ON(!vcpu_runnable(snext->vcpu));
+
+    return ret;
+}
+
+//init the global data
+static void
+ds_init(void) {
+    printk("\n# into %s\n", __func__);
+    spin_lock_init(&ds_priv.lock);
+    ds_priv.ncpus = 0;
+    ds_priv.type = DS;
+    INIT_LIST_HEAD(&ds_priv.active);
+}
+
+/* Tickers cannot be kicked until SMP subsystem is alive. */
+static __init int
+ds_start_tickers(void) {
+    struct ds_pcpu *ppc;
+    unsigned int cpu;
+
+    printk("\n# into %s, start all tickers right now\n", __func__);
+
+    if (ds_priv.ncpus == 0)
+        return 0;
+
+    for_each_online_cpu(cpu) {
+        ppc = DS_PCPU(cpu);
+        set_timer(&ppc->ticker, NOW() + BUDGET(1));
+    }
+
+    return 0;
+}
+__initcall(ds_start_tickers);
+
+static void ds_tick_suspend(void) {
+    struct ds_pcpu *ppc;
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    ppc = DS_PCPU(smp_processor_id());
+
+    stop_timer(&ppc->ticker);
+}
+
+static void ds_tick_resume(void) {
+    struct ds_pcpu *ppc;
+    uint64_t now = NOW();
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    ppc = DS_PCPU(smp_processor_id());
+
+    set_timer(&ppc->ticker, now + BUDGET(1));
+}
+
+const struct scheduler sched_ds_def = {
+    .name = "Deferrable Server Scheduler",
+    .opt_name = "ds",
+    .sched_id = XEN_SCHEDULER_DS,
+
+    .init_domain = ds_dom_init,
+    .destroy_domain = ds_dom_destroy,
+
+    .init_vcpu = ds_vcpu_init,
+    .destroy_vcpu = ds_vcpu_destroy,
+
+    .init = ds_init,
+
+    .pick_cpu = ds_cpu_pick,
+
+    .tick_suspend = ds_tick_suspend,
+    .tick_resume = ds_tick_resume,
+
+    .do_schedule = ds_schedule,
+
+    .sleep = ds_vcpu_sleep,
+    .wake = ds_vcpu_wake,
+
+    .adjust = ds_dom_cntl,
+
+    .dump_cpu_state = ds_dump_pcpu,
+    .dump_settings = ds_dump,
+};
diff -ubrN xen/xen-4.0.1/xen/common/sched_rt.c xen-4.0.1/xen/common/sched_rt.c
--- xen/xen-4.0.1/xen/common/sched_rt.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_rt.c	2011-05-01 00:42:25.000000000 -0600
@@ -0,0 +1,584 @@
+/******************************************************************************
+ * Real Time Xen scheduler Framework
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Jaewoo Lee (C) 2010 University of Pennsylvania
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+#include <xen/errno.h>
+#include <xen/keyhandler.h>
+#include "sched_rt.h"
+
+extern const struct rt_scheduler sched_deferrable_def;
+extern const struct rt_scheduler sched_periodic_def;
+extern const struct rt_scheduler sched_wcps_def;
+extern const struct rt_scheduler sched_ssps_def;
+//extern const struct rt_scheduler sched_polling_def;
+//extern const struct rt_scheduler sched_sporadic_def;
+
+static struct rt_private rt_priv;
+
+static void rt_tick(void *_cpu);
+
+
+//init the global data, picking schedulers!
+static void
+rt_init(void) {
+    printk("\n# into %s\n", __func__);
+
+    spin_lock_init(&rt_priv.lock);
+    rt_priv.ncpus = 0;
+	rt_priv.type = DS;
+    INIT_LIST_HEAD(&rt_priv.active);
+}
+
+//dump the physical cpu
+static void
+rt_dump_pcpu(int cpu) {
+    struct list_head *iter;
+    struct rt_pcpu *ppc = RT_PCPU(cpu);
+    struct list_head *runq = &ppc->runq;
+    struct list_head *rdyq = &ppc->rdyq;
+    struct rt_vcpu *dvc = RT_CUR(cpu);
+    int loop = 0;
+
+    printk("### cpu: %d, now is: %lu\n", cpu, NOW());
+
+    if (dvc) {
+        printk("\trun: ");
+        rt_dump_vcpu(dvc);
+    }
+
+    printk("runq:\n");
+    list_for_each(iter, runq) {
+        dvc = __runq_elem(iter);
+        if (dvc) {
+            printk("\t%3d: ", ++loop);
+            rt_dump_vcpu(dvc);
+        }
+    }
+
+    printk("rdyq:\n");
+    list_for_each(iter, rdyq) {
+        dvc = __rdyq_elem(iter);
+        if (dvc) {
+            printk("\t%3d: ", ++loop);
+            rt_dump_vcpu(dvc);
+        }
+    }
+
+    rt_dump_repq(cpu);
+    printk("\n");
+}
+
+//dump dump function
+static void
+rt_dump(void) {
+	rt_dump_pcpu(1);
+}
+
+//init the physical cpu
+static int
+rt_pcpu_init(int cpu) {
+    struct rt_pcpu *ppc;
+    unsigned long flags;
+
+    /* Allocate per-PCPU info */
+    ppc = xmalloc(struct rt_pcpu);
+    if (ppc == NULL)
+        return -1;
+    memset(ppc, 0, sizeof (*ppc));
+
+    spin_lock_irqsave(&rt_priv.lock, flags);
+
+    if (rt_priv.ncpus < cpu)
+        rt_priv.ncpus = cpu + 1;
+
+    init_timer(&ppc->ticker, rt_tick, (void *) (unsigned long) cpu, cpu);
+    INIT_LIST_HEAD(&ppc->runq);
+    INIT_LIST_HEAD(&ppc->rdyq);
+    per_cpu(schedule_data, cpu).sched_priv = ppc;
+
+    BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
+
+    ppc->rep_capacity = REPQ_CAPACITY;
+    ppc->repq = xmalloc_array(struct rep_elem, ppc->rep_capacity);
+    BUG_ON(ppc->repq == NULL);
+    ppc->rep_size = 0;
+
+    spin_unlock_irqrestore(&rt_priv.lock, flags);
+
+    printk("\n# finish %s, init cpu: %d\n", __func__, cpu);
+
+    return 0;
+}
+
+//pick a cpu to run, used to migrate from different cpus
+static int
+rt_cpu_pick(struct vcpu *vc) {
+    cpumask_t cpus;
+    int cpu;
+
+    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+
+    if (vc->domain->domain_id == 0 && vc->processor != 0) {
+        return cycle_cpu(vc->processor, cpus);
+    }
+
+    cpu = cpu_isset(vc->processor, cpus)
+            ? vc->processor
+            : cycle_cpu(vc->processor, cpus);
+
+    return cpu;
+}
+
+//init the virtual cpu
+static int
+rt_vcpu_init(struct vcpu *vc) {
+    struct domain * const dom = vc->domain;
+    struct rt_dom *ddom = RT_DOM(dom);
+    struct rt_vcpu *dvc;
+
+    /* Allocate per-VCPU info */
+    dvc = xmalloc(struct rt_vcpu);
+    if (dvc == NULL) {
+        return -1;
+    }
+    memset(dvc, 0, sizeof (*dvc));
+
+    INIT_LIST_HEAD(&dvc->runq_elem);
+    INIT_LIST_HEAD(&dvc->rdyq_elem);
+    INIT_LIST_HEAD(&dvc->active_elem);   // init for active list
+    dvc->ddom = ddom;
+    dvc->vcpu = vc;
+    dvc->budget = is_idle_vcpu(vc)? RT_IDLE_PERIOD: ddom->budget;
+    dvc->period = is_idle_vcpu(vc)? RT_IDLE_PERIOD: ddom->period;
+    dvc->level  = is_idle_vcpu(vc)? RT_IDLE_PERIOD: ddom->level;
+    dvc->cur_budget = dvc->budget;
+    dvc->repq_pending = 0;
+
+    dvc->last_start_time = 0;
+    
+    dvc->burn_total = 0;
+    dvc->next_time = 0;
+    
+    vc->sched_priv = dvc;
+
+    /* Allocate per-PCPU info */
+    if (unlikely(!RT_PCPU(vc->processor))) {
+        if (rt_pcpu_init(vc->processor) != 0)
+            return -1;
+    }
+
+    RT_VCPU_CHECK(vc);
+
+	BUG_ON(!list_empty(&dvc->active_elem));
+    list_add(&dvc->active_elem, &rt_priv.active);
+	dvc->next_time = NOW() + BUDGET(1) * dvc->period;
+    rt_repq_insert(dvc->vcpu->processor, dvc, dvc->budget);
+
+    printk("\n# into %s, vcpu init: ", __func__);
+    rt_dump_vcpu(dvc);
+
+    return 0;
+}
+
+//destory the vcpu
+static void
+rt_vcpu_destroy(struct vcpu *vc) {
+    struct rt_vcpu * const dvc = RT_VCPU(vc);
+    struct rt_dom * const ddom = dvc->ddom;
+
+    printk("\n# into %s, vcpu destroy: ", __func__);
+    rt_dump_vcpu(dvc);
+
+    BUG_ON(ddom == NULL);
+    BUG_ON(!list_empty(&dvc->runq_elem));
+    list_del_init(&dvc->active_elem);
+
+    xfree(dvc);
+}
+
+//init a dom
+static int
+rt_dom_init(struct domain *dom) {
+    struct rt_dom *ddom;
+
+    printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id);
+
+    if (is_idle_domain(dom)) {
+        printk("\t# init an idle domain\n");
+        return 0;
+    }
+
+    ddom = xmalloc(struct rt_dom);
+    if (ddom == NULL)
+        return -ENOMEM;
+    memset(ddom, 0, sizeof (*ddom));
+
+    /* Initialize budget and period */
+    ddom->dom = dom;
+
+    switch(dom->domain_id) {
+        case 32767:
+            ddom->budget = RT_IDLE_PERIOD;
+            ddom->period = RT_IDLE_PERIOD;
+            ddom->level = 100;
+            break;
+        case 0:
+            ddom->budget = RT_DOM_0_PERIOD;
+            ddom->period = RT_DOM_0_PERIOD;
+            ddom->level = 1;
+            break;
+        default:
+            ddom->budget = RT_DOM_BUDGET;
+            ddom->period = RT_DOM_PERIOD;
+            ddom->level = 10;
+            break;
+    }
+
+    dom->sched_priv = ddom;
+
+    return 0;
+}
+
+//destory a domain
+static void
+rt_dom_destroy(struct domain *dom) {
+    printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id);
+    xfree(RT_DOM(dom));
+}
+
+/* Tickers cannot be kicked until SMP subsystem is alive. */
+static __init int
+rt_start_tickers(void) {
+    struct rt_pcpu *ppc;
+    unsigned int cpu;
+
+    printk("\n# into %s, start all tickers right now\n", __func__);
+
+    if (rt_priv.ncpus == 0)
+        return 0;
+
+    for_each_online_cpu(cpu) {
+        ppc = RT_PCPU(cpu);
+        set_timer(&ppc->ticker, NOW() + BUDGET(1));
+    }
+
+    return 0;
+}
+__initcall(rt_start_tickers);
+
+static void rt_tick_suspend(void) {
+    struct rt_pcpu *ppc;
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    ppc = RT_PCPU(smp_processor_id());
+
+    stop_timer(&ppc->ticker);
+}
+
+static void rt_tick_resume(void) {
+    struct rt_pcpu *ppc;
+    uint64_t now = NOW();
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    ppc = RT_PCPU(smp_processor_id());
+
+    set_timer(&ppc->ticker, now + BUDGET(1));
+}
+
+
+/*********************************************
+ * Four Subscheduler Specific Functions
+*********************************************/
+
+//sleep the vcpu
+static void
+rt_vcpu_sleep(struct vcpu *vc) {
+    //SCHED_OP(vcpu_sleep, vc);
+	struct rt_vcpu * const dvc = RT_VCPU(vc);
+
+    BUG_ON(is_idle_vcpu(vc));
+
+    if (per_cpu(schedule_data, vc->processor).curr == vc) {
+        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+    } else if (__vcpu_on_runq(dvc)) {
+        __runq_remove(dvc);
+    } else if (__vcpu_on_rdyq(dvc)) {
+        __rdyq_remove(dvc);
+    }
+
+	return;
+}
+
+//wake up the vcpu, insert it into runq, raise a softirq
+static void
+rt_vcpu_wake(struct vcpu *vc) {
+	//SCHED_OP(vcpu_wake, vc);
+	struct rt_vcpu * const dvc = RT_VCPU(vc);
+    const unsigned int cpu = vc->processor;
+
+    BUG_ON(is_idle_vcpu(vc));
+
+    if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) {
+        if (vc->domain->domain_id != 0) {
+            printk("\nwake running\n");
+        }
+        return;
+    }
+    if (unlikely(__vcpu_on_runq(dvc))) {
+        if (vc->domain->domain_id != 0) {
+            printk("\nwake on runq\n");
+        }
+        return;
+    }
+
+    if (__vcpu_on_rdyq(dvc)) {
+        __rdyq_remove(dvc);
+    }
+
+    if (!__vcpu_on_runq(dvc)) {
+        __runq_insert(cpu, dvc);
+    }
+
+	if (dvc->level < RT_CUR(cpu)->level) {
+		cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+	}
+
+    return;
+}
+
+//ticked by pcpu tick in pcpu.
+static void
+rt_tick(void *_cpu) {
+	unsigned int cpu = (unsigned long) _cpu;
+	switch (rt_priv.type) {
+		case DS:
+			sched_deferrable_def.tick(_cpu);
+			break;
+		case PPS:
+			sched_periodic_def.tick(_cpu);
+			break;
+		case WCPS:
+			sched_wcps_def.tick(_cpu);
+			break;
+		case SSPS:
+			sched_ssps_def.tick(_cpu);
+			break;
+		default:
+			printk("Wrong in %s\n", __func__);
+			sched_deferrable_def.tick(_cpu);
+			break;
+	}
+	
+	if (rt_cpu_pick(current) != cpu) {
+        set_bit(_VPF_migrating, &current->pause_flags);
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+}
+
+// most important function, called every budget time
+static struct task_slice
+rt_schedule(s_time_t now) {
+	switch (rt_priv.type) {
+		case DS:
+			return sched_deferrable_def.schedule(now);
+			break;
+		case PPS:
+			return sched_periodic_def.schedule(now);
+			break;
+		case WCPS:
+			return sched_wcps_def.schedule(now);
+			break;
+		case SSPS:
+			return sched_ssps_def.schedule(now);
+			break;
+		default:
+			printk("Wrong in %s\n", __func__);
+			return sched_deferrable_def.schedule(now);
+			break;
+	}
+}
+
+/*********************************************
+ * Used to Adjust Domain parameters and switch schedulers
+*********************************************/
+/*
+// !!! Need to consider SS! for the repl queue!!   have not done yet!
+static inline void
+rt_reset(int cpu) {
+	struct rt_pcpu * ppc = RT_PCPU(cpu);
+	struct list_head * iter;
+	
+    printk("\nBefore reset\n");
+    rt_dump_pcpu(cpu);
+      
+    // empty the RepQ
+    while(ppc->rep_size != 0) {
+        rt_repq_remove(cpu);
+    }
+    
+    printk("\nvcpu on list is:\n");
+    //init each vcpu;
+    list_for_each(iter, &rt_priv.active) {
+        struct rt_vcpu * iter_dvc = list_entry(iter, struct rt_vcpu, active_elem);
+        if (__vcpu_on_runq(iter_dvc)) {
+            __runq_remove(iter_dvc);
+        }
+        if (__vcpu_on_rdyq(iter_dvc)) {
+            __rdyq_remove(iter_dvc);
+        }
+        iter_dvc->cur_budget = iter_dvc->budget;
+        iter_dvc->last_start_time = NOW();
+        iter_dvc->next_time = NOW() + BUDGET(1) * iter_dvc->period;
+        iter_dvc->burn_total = 0;
+        rt_dump_vcpu(iter_dvc);
+    }
+      
+    //insert into Queues
+    list_for_each(iter, &rt_priv.active) {
+        struct rt_vcpu * iter_dvc = list_entry(iter, struct rt_vcpu, active_elem);
+        if (vcpu_runnable(iter_dvc->vcpu)) {
+            if (!__vcpu_on_runq(iter_dvc)) {
+                __runq_insert(cpu, iter_dvc);
+            }
+        } else {
+            if (!__vcpu_on_rdyq(iter_dvc)) {
+                __rdyq_insert(cpu, iter_dvc);
+            }
+        }
+    }
+        
+    printk("\nAfter Reset\n");
+    rt_dump_pcpu(cpu);
+}
+*/
+//adjust the domain's budget & period, also used to trigger the record
+static int
+rt_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) {
+	struct rt_dom * const ddom = RT_DOM(d);
+	unsigned long flags;
+	struct rt_vcpu *dvc = RT_VCPU(d->vcpu[0]);
+	
+	if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) {
+		op->u.rt.budget = ddom->budget;
+		op->u.rt.period = ddom->period;
+		op->u.rt.level = ddom->level;
+	} else {
+		BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo);
+		
+		spin_lock_irqsave(&rt_priv.lock, flags);
+        
+		if (op->u.rt.budget != 0) {
+            ddom->budget = op->u.rt.budget;
+            dvc->budget = op->u.rt.budget;
+        }
+        
+		if (op->u.rt.period != 0) {
+            ddom->period = op->u.rt.period;
+            dvc->period = op->u.rt.period;
+        }
+        
+		if (op->u.rt.level != 0) {
+            ddom->level = op->u.rt.level;
+            dvc->level = op->u.rt.level;
+        }
+        dvc->cur_budget = dvc->budget;   // reset its budget
+        spin_unlock_irqrestore(&rt_priv.lock, flags);
+
+		if (dvc->vcpu->domain->domain_id == 0) {
+			switch (op->u.rt.budget) {
+				case 100:
+					printk("############################\n100: dump info\n200: DS\n300: PPS\n400: WC-PS\n500: SS-PS\n\n");
+					rt_dump_pcpu(1);
+					break;
+				case 200:
+					rt_priv.type = DS;   //change to DS;
+					//rt_reset(1);
+					break;
+				case 300:
+					rt_priv.type = PPS;  // to PPS
+					//rt_reset(1);
+					break;
+				case 400:
+					rt_priv.type = WCPS;	// WCPS
+					//rt_reset(1);
+					break;
+				case 500:
+					rt_priv.type = SSPS;  // SSPS
+					//rt_reset(1);
+					break;
+				default:
+					printk("############################\n100: dump info\n200: DS\n300: PPS\n400: WC-PS\n500: SS-PS\n\n");
+					break;
+			}
+			printk("Current Scheduler: ");
+			switch (rt_priv.type) {
+				case DS:
+					printk("%s\n", sched_deferrable_def.name);
+					break;
+				case PPS:
+					printk("%s\n", sched_periodic_def.name);
+					break;
+				case WCPS:
+					printk("%s\n", sched_wcps_def.name);
+					break;
+				case SSPS:
+					printk("%s\n", sched_ssps_def.name);
+					break;
+				default:
+					printk("wrong, reset to DS\n");
+					rt_priv.type = DS;
+					break;
+			}
+			
+		}
+	}
+	
+    return 0;
+}
+
+const struct scheduler sched_rt_def = {
+    .name = "Real Time Scheduler",
+    .opt_name = "rt",
+    .sched_id = XEN_SCHEDULER_RT,
+
+    .init_domain = rt_dom_init,
+    .destroy_domain = rt_dom_destroy,
+
+    .init_vcpu = rt_vcpu_init,
+    .destroy_vcpu = rt_vcpu_destroy,
+
+    .init = rt_init,
+
+    .pick_cpu = rt_cpu_pick,
+
+    .tick_suspend = rt_tick_suspend,
+    .tick_resume = rt_tick_resume,
+
+    .do_schedule = rt_schedule,
+
+    .sleep = rt_vcpu_sleep,
+    .wake = rt_vcpu_wake,
+
+    .adjust = rt_dom_cntl,
+
+    .dump_cpu_state = rt_dump_pcpu,
+    .dump_settings = rt_dump,
+};
diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_deferrable.c xen-4.0.1/xen/common/sched_rt_deferrable.c
--- xen/xen-4.0.1/xen/common/sched_rt_deferrable.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_rt_deferrable.c	2011-04-24 21:23:02.000000000 -0600
@@ -0,0 +1,79 @@
+/******************************************************************************
+ * Real Time Xen scheduler Framework
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+#include <xen/errno.h>
+#include <xen/keyhandler.h>
+#include "sched_rt.h"
+#include "sched_rt_repq.h"
+
+
+// most important function, called every budget time
+static struct task_slice
+deferrable_schedule(s_time_t now) {
+    const int cpu = smp_processor_id();
+    struct list_head *runq = RUNQ(cpu);
+    struct rt_vcpu *scurr = RT_VCPU(current);
+    struct rt_vcpu *snext;
+    struct task_slice ret;
+
+    RT_VCPU_CHECK(current);
+
+    if ((scurr->vcpu->domain->domain_id != 0) && (!is_idle_vcpu(scurr->vcpu))) {
+        repq_burn(scurr, now);
+    }
+
+    if (vcpu_runnable(current)) {
+        if (!__vcpu_on_runq(scurr)) {
+            __runq_insert(cpu, scurr);
+        }
+    } else {
+        BUG_ON(is_idle_vcpu(current) || list_empty(runq));
+        if (!__vcpu_on_rdyq(scurr)) {
+            __rdyq_insert(cpu, scurr);
+        }
+    }
+	
+	snext = __runq_pick(cpu);
+
+    if (snext->vcpu->domain->domain_id != 0) {
+        snext->last_start_time = NOW();
+    }
+
+    BUG_ON(!__vcpu_on_runq(snext));
+    __runq_remove(snext);
+
+    ret.time = is_idle_vcpu(snext->vcpu) ? BUDGET(1) : BUDGET(1) * snext->cur_budget;
+    ret.task = snext->vcpu;
+
+    RT_VCPU_CHECK(ret.task);
+
+    BUG_ON(!vcpu_runnable(snext->vcpu));
+
+    return ret;
+}
+
+const struct rt_scheduler sched_deferrable_def = {
+    .name 		= "Deferrable Server Scheduler",
+    .opt_name 	= "ds",
+
+    .tick		= repq_tick,
+	.vcpu_wake	= NULL,
+	.vcpu_sleep = NULL,
+	.schedule	= deferrable_schedule
+};
diff -ubrN xen/xen-4.0.1/xen/common/sched_rt.h xen-4.0.1/xen/common/sched_rt.h
--- xen/xen-4.0.1/xen/common/sched_rt.h	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_rt.h	2011-04-24 22:23:43.000000000 -0600
@@ -0,0 +1,348 @@
+/******************************************************************************
+ * Real Time Xen scheduler Headfile, including the common data structures
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Jaewoo Lee (C) 2010 University of Pennsylvania
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#define RT_DOM(_dom)            ((struct rt_dom *) (_dom)->sched_priv)
+#define RT_PCPU(_c)             ((struct rt_pcpu *)per_cpu(schedule_data, _c).sched_priv)
+#define RT_VCPU(_vcpu)          ((struct rt_vcpu *) (_vcpu)->sched_priv)
+#define RUNQ(_cpu)              (&(RT_PCPU(_cpu)->runq))
+#define RDYQ(_cpu)              (&(RT_PCPU(_cpu)->rdyq))
+#define RT_CUR(_cpu)            RT_VCPU(per_cpu(schedule_data, _cpu).curr)
+#define BUDGET(_b)              (MILLISECS(1*_b))  // time to run for 1 budget, default setting is 1ms = 1 budget
+
+#define REPQ_CAPACITY           500    // repQ is used for the replenishment
+
+#define RT_DOM_0_PERIOD         100
+#define RT_IDLE_PERIOD          200
+
+#define RT_DOM_BUDGET           25        // default budget, can bu changed via xm sched-ss -d target -b budget -p period
+#define RT_DOM_PERIOD           50
+
+enum server_type {DS, PPS, WCPS, SSPS};
+
+
+/*********************************************
+ * Data Structure
+*********************************************/
+
+//physical cpu
+struct rt_pcpu {
+    struct list_head runq; // runQ on the pcpu, organized by linked list
+    struct list_head rdyq;
+    struct rep_elem *repq; //repQ on the pcpu, organized by heap
+    int rep_size;        // current size, for later dynamic reqQ use. currently set equals to capacity
+    int rep_capacity;    // upper limit
+    struct timer ticker; // for preemptive use, tick every budget
+};
+
+//virtual cpu
+struct rt_vcpu {
+    struct list_head runq_elem;
+    struct list_head rdyq_elem;
+    struct list_head active_elem;    //used to link all active vcpu except domain 0 and idle one!
+    struct rt_dom *ddom;
+    struct vcpu *vcpu;
+    
+    uint16_t repq_pending;   // used to calculate how many items are on repq
+
+    uint16_t budget;
+    uint16_t period;
+    uint16_t level;
+
+    uint16_t cur_budget;
+    s_time_t last_start_time;  // used for burn_budget
+    s_time_t next_time;   //the next repl time
+   
+    uint16_t burn_total;  // used only for Sporadic Server
+};
+
+//used for replenishment
+struct rep_elem {
+    s_time_t re_time;
+    int16_t re_amount;
+    struct rt_vcpu *dvc;
+};
+
+//domain
+struct rt_dom {
+    struct domain *dom;
+    uint16_t budget;
+    uint16_t period;
+    uint16_t level;
+};
+
+//global variable, records the number of cpus
+struct rt_private {
+    spinlock_t lock;    // used for init
+    uint32_t ncpus;    //number of physical cpus
+    struct list_head active;  //active_vcpu except domain 0 and idle vcpu!
+	enum server_type type;		//used to represent scheduler
+};
+
+struct rt_scheduler {
+	char *name;
+	char *opt_name;
+	
+	void	(*vcpu_sleep)(struct vcpu *vc);
+	void 	(*tick)(void *_cpu);
+	struct 	task_slice (*schedule)(s_time_t);
+	void	(*vcpu_wake)(struct vcpu *vc);
+};
+
+/*********************************************
+ * Common Code
+*********************************************/
+
+//check the vcpu
+static inline void
+__rt_vcpu_check(struct vcpu *vc) {
+    struct rt_vcpu * const dvc = RT_VCPU(vc);
+    struct rt_dom * const ddom = dvc->ddom;
+
+    BUG_ON(dvc->vcpu != vc);
+    BUG_ON(ddom != RT_DOM(vc->domain));
+    if (ddom) {
+        BUG_ON(is_idle_vcpu(vc));
+        BUG_ON(ddom->dom != vc->domain);
+    } else {
+        BUG_ON(!is_idle_vcpu(vc));
+    }
+}
+#define RT_VCPU_CHECK(_vc)  (__rt_vcpu_check(_vc))
+
+//inlined code
+static inline struct rt_vcpu *
+__runq_elem(struct list_head *elem) {
+    return list_entry(elem, struct rt_vcpu, runq_elem);
+}
+
+//inlined code
+static inline struct rt_vcpu *
+__rdyq_elem(struct list_head *elem) {
+    return list_entry(elem, struct rt_vcpu, rdyq_elem);
+}
+
+// the current vcpu is on runQ?
+static inline int
+__vcpu_on_runq(struct rt_vcpu *dvc) {
+    return !list_empty(&dvc->runq_elem);
+}
+
+// the current vcpu is on runQ?
+static inline int
+__vcpu_on_rdyq(struct rt_vcpu *dvc) {
+    return !list_empty(&dvc->rdyq_elem);
+}
+
+/*********************************************
+ * Dump Settings Related
+*********************************************/
+
+//dump the repq
+static inline void
+rt_dump_repq(int cpu) {
+    int loop = 0;
+    struct rt_pcpu *ppc = RT_PCPU(cpu);
+
+    printk("repq:           size: %d\n", ppc->rep_size);
+    for (loop = 0; loop < ppc->rep_size; loop++) {
+        printk("\t[%d, %d]: %d @ %lu\n",
+            ppc->repq[loop].dvc->vcpu->domain->domain_id,
+            ppc->repq[loop].dvc->vcpu->vcpu_id,
+            ppc->repq[loop].re_amount,
+            ppc->repq[loop].re_time);
+    }
+}
+
+//dump the virtual cpu
+static inline void
+rt_dump_vcpu(struct rt_vcpu *dvc) {
+    printk("\t[%i, %i], cur: %i, rep: %d, last: %lu, next: %lu, \n", dvc->vcpu->domain->domain_id, dvc->vcpu->vcpu_id, dvc->cur_budget, dvc->repq_pending, dvc->last_start_time, dvc->next_time);
+}
+
+/*********************************************
+ * RunQ, RdyQ, and RepQ Related
+*********************************************/
+
+//pick the first vcpu whose budget is >0 from the runq
+static inline struct rt_vcpu *
+__runq_pick(unsigned int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, runq) {
+        struct rt_vcpu * iter_dvc = __runq_elem(iter);
+        if (iter_dvc->cur_budget > 0) {
+            return iter_dvc;
+        }
+    }
+
+    BUG_ON(1);
+    return NULL;
+}
+
+//pick the first one with budget > 0, regardless of runnable or not
+static inline struct rt_vcpu *
+__rdyq_pick(unsigned int cpu) {
+    struct list_head * rdyq = RDYQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, rdyq) {
+        struct rt_vcpu *iter_dvc = __rdyq_elem(iter);
+        if (iter_dvc->cur_budget > 0) {
+            return iter_dvc;
+        }
+    }
+
+    return NULL;
+}
+
+//pick the IDLE VCPU from RunQ, for Periodic Server
+static inline struct rt_vcpu *
+__runq_pick_idle(unsigned int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, runq) {
+        struct rt_vcpu * iter_dvc = __runq_elem(iter);
+        if (is_idle_vcpu(iter_dvc->vcpu)) {
+            return iter_dvc;
+        }
+    }
+    
+    BUG_ON(1);
+    return NULL;
+}
+
+//insert into the runq, followed a FIFO way. sorted by level
+static inline void
+__runq_insert(unsigned int cpu, struct rt_vcpu *dvc) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    BUG_ON(__vcpu_on_runq(dvc));
+    BUG_ON(cpu != dvc->vcpu->processor);
+
+    list_for_each(iter, runq) {
+        struct rt_vcpu * iter_dvc = __runq_elem(iter);
+        if (dvc->level < iter_dvc->level) {
+            break;
+        }
+    }
+
+    list_add_tail(&dvc->runq_elem, iter);
+}
+
+//insert into the runq, followed a FIFO way. sorted by level
+static inline void
+__rdyq_insert(unsigned int cpu, struct rt_vcpu *dvc) {
+    struct list_head * rdyq = RDYQ(cpu);
+    struct list_head * iter;
+
+    BUG_ON(__vcpu_on_rdyq(dvc));
+    BUG_ON(cpu != dvc->vcpu->processor);
+
+    list_for_each(iter, rdyq) {
+        struct rt_vcpu * iter_dvc = __rdyq_elem(iter);
+        if (dvc->level <= iter_dvc->level) {
+            break;
+        }
+    }
+
+    list_add_tail(&dvc->rdyq_elem, iter);
+}
+
+//remove it from runQ
+static inline void
+__runq_remove(struct rt_vcpu *dvc) {
+    BUG_ON(!__vcpu_on_runq(dvc));
+    list_del_init(&dvc->runq_elem);
+}
+
+//remove it from runQ
+static inline void
+__rdyq_remove(struct rt_vcpu *dvc) {
+    BUG_ON(!__vcpu_on_rdyq(dvc));
+    list_del_init(&dvc->rdyq_elem);
+}
+
+//used for the heap, repQ
+static inline int
+rt_rep_parent(int childIdx) {
+    return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1);
+}
+
+//insert into the repQ
+static inline void
+rt_repq_insert(unsigned int cpu, struct rt_vcpu *dvc, int amount) {
+    struct rt_pcpu * ppc = RT_PCPU(cpu);
+    int childIdx, parentIdx;
+
+    if (dvc->next_time == 0) {
+		return;
+    }
+    
+    if (amount == 0) {
+        return;
+    }
+
+    if (ppc->rep_size == ppc->rep_capacity) {
+        printk("\n# into %s, repQ full!!\n", __func__);
+        BUG_ON(1);
+    }
+
+    childIdx = ppc->rep_size;
+    parentIdx = rt_rep_parent(childIdx);
+
+    while (childIdx > 0 && dvc->next_time < ppc->repq[parentIdx].re_time) {
+        ppc->repq[childIdx] = ppc->repq[parentIdx];
+        childIdx = parentIdx;
+        parentIdx = rt_rep_parent(childIdx);
+    }
+
+    ppc->repq[childIdx].re_time = dvc->next_time;
+    ppc->repq[childIdx].dvc = dvc;
+    ppc->repq[childIdx].re_amount = amount;
+    ppc->rep_size++;
+    
+    // dvc->next_time = 0;
+    dvc->repq_pending++;
+}
+
+//remove from the repQ
+static inline void
+rt_repq_remove(unsigned int cpu) {
+    struct rt_pcpu * ppc = RT_PCPU(cpu);
+    int childIdx = 1;
+    int rightChildIdx;
+    int rootIdx = 0;
+    struct rep_elem temp;
+
+    BUG_ON(ppc->rep_size <= 0);
+
+    ppc->repq[0].dvc->repq_pending--;
+    ppc->repq[0] = ppc->repq[ppc->rep_size - 1];
+    ppc->rep_size--;
+
+    temp = ppc->repq[0];
+
+    while (childIdx < ppc->rep_size) {
+        rightChildIdx = childIdx + 1;
+        if (rightChildIdx < ppc->rep_size && ppc->repq[rightChildIdx].re_time < ppc->repq[childIdx].re_time) {
+            childIdx = rightChildIdx;
+        }
+        if (ppc->repq[childIdx].re_time < temp.re_time) {
+            ppc->repq[rootIdx] = ppc->repq[childIdx];
+            rootIdx = childIdx;
+            childIdx = 2 * rootIdx + 1;
+        } else {
+            break;
+        }
+    }
+    ppc->repq[rootIdx] = temp;
+}
+
diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_periodic.c xen-4.0.1/xen/common/sched_rt_periodic.c
--- xen/xen-4.0.1/xen/common/sched_rt_periodic.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_rt_periodic.c	2011-04-24 21:23:44.000000000 -0600
@@ -0,0 +1,125 @@
+/******************************************************************************
+ * Real Time Xen scheduler Framework
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+#include <xen/errno.h>
+#include <xen/keyhandler.h>
+#include "sched_rt.h"
+#include "sched_rt_repq.h"
+
+
+
+//burn the extra budget on RdyQ
+static void
+repq_burn_rdyq(struct rt_vcpu *dvc, s_time_t now) {
+	s_time_t delta;
+	unsigned int consume;
+	struct list_head * rdyq = RDYQ(dvc->vcpu->processor);
+	struct list_head * iter;
+
+	BUG_ON(dvc != RT_CUR(dvc->vcpu->processor));
+
+	if (dvc->last_start_time == 0) {
+		dvc->last_start_time = now;
+		return;
+	}
+
+	delta = now - dvc->last_start_time;
+	BUG_ON(delta <= 0);
+
+	consume = ( delta/BUDGET(1) );
+    if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++;
+
+	// burn budgets on RdyQ
+	list_for_each(iter, rdyq) {
+		struct rt_vcpu * iter_dvc = __rdyq_elem(iter);
+		if (iter_dvc->cur_budget > 0) {
+			iter_dvc->cur_budget -= consume;
+			if (iter_dvc->cur_budget < 0) {
+				iter_dvc->budget = 0;
+			}
+			return;   // only burn one budget
+		}
+	}
+
+	return;
+}
+
+
+// most important function, called every budget time
+static struct task_slice
+periodic_schedule(s_time_t now) {
+    const int cpu = smp_processor_id();
+    struct list_head *runq = RUNQ(cpu);
+    struct rt_vcpu *scurr = RT_VCPU(current);
+    struct rt_vcpu *snext;
+    struct task_slice ret;
+
+    RT_VCPU_CHECK(current);
+
+    if ((scurr->vcpu->domain->domain_id != 0)) {
+		if (!is_idle_vcpu(scurr->vcpu)) {
+			repq_burn(scurr, now);
+		} else if (cpu == 1) {   //scurr is the idle vcpu on cpu 1, need to deal with periodic server
+			repq_burn_rdyq(scurr, now);
+		}
+	}
+
+    if (vcpu_runnable(current)) {
+        if (!__vcpu_on_runq(scurr)) {
+            __runq_insert(cpu, scurr);
+        }
+    } else {
+        BUG_ON(is_idle_vcpu(current) || list_empty(runq));
+        if (!__vcpu_on_rdyq(scurr)) {
+            __rdyq_insert(cpu, scurr);
+        }
+    }
+	
+	if ( (__rdyq_pick(cpu) == NULL) || (__runq_pick(cpu)->level < __rdyq_pick(cpu)->level) ) {
+		snext = __runq_pick(cpu);
+	} else {
+		snext = __runq_pick_idle(cpu);   // pick the IDLE to mimic the as if budget idled away behavior
+	}
+	
+    if (snext->vcpu->domain->domain_id != 0) {
+        snext->last_start_time = NOW();
+    }
+
+    BUG_ON(!__vcpu_on_runq(snext));
+    __runq_remove(snext);
+
+	ret.time = BUDGET(1);
+    ret.task = snext->vcpu;
+
+    RT_VCPU_CHECK(ret.task);
+
+    BUG_ON(!vcpu_runnable(snext->vcpu));
+
+    return ret;
+}
+
+const struct rt_scheduler sched_periodic_def = {
+    .name 		= "Pure Periodic Server Scheduler",
+    .opt_name 	= "pps",
+
+	.tick		= repq_tick,
+	.vcpu_sleep		= NULL,
+	.vcpu_wake		= NULL,
+	.schedule	= periodic_schedule
+};
diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_repq.h xen-4.0.1/xen/common/sched_rt_repq.h
--- xen/xen-4.0.1/xen/common/sched_rt_repq.h	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_rt_repq.h	2011-05-02 21:06:21.000000000 -0600
@@ -0,0 +1,86 @@
+/******************************************************************************
+ * Real Time Xen scheduler Headfile, including the common data structures
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Jaewoo Lee (C) 2010 University of Pennsylvania
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+
+/*********************************************
+ * Shared by the subschedulers
+*********************************************/
+
+//check the current repQ to see if a repl needs to happen
+//Even if the IDLE VCPU is running, just raise an interrupt to trigger the schedule function!!
+static int
+check_cpu_for_repl(int cpu) {
+    struct rt_pcpu * ppc = RT_PCPU(cpu);
+    int flag = 0;  //used for interrupt
+
+    while((ppc->rep_size != 0) && ppc->repq[0].re_time < NOW()) {
+		ppc->repq[0].dvc->cur_budget += ppc->repq[0].re_amount;
+        if (ppc->repq[0].dvc->cur_budget > ppc->repq[0].dvc->budget) {
+            ppc->repq[0].dvc->cur_budget = ppc->repq[0].dvc->budget;
+        }
+
+		while (ppc->repq[0].dvc->next_time <= NOW()) {
+			ppc->repq[0].dvc->next_time += BUDGET(1) * ppc->repq[0].dvc->period;
+		}
+       	rt_repq_insert(ppc->repq[0].dvc->vcpu->processor, ppc->repq[0].dvc, ppc->repq[0].dvc->budget);
+
+		if (ppc->repq[0].dvc->level < RT_CUR(cpu)->level) flag = 1;  // need to raise an interrupt
+		
+		// bug fix 0501
+		if (ppc->repq[0].dvc->level != RT_CUR(cpu)->level) {    // do not change the current running one
+			ppc->repq[0].dvc->last_start_time = NOW();
+		}
+		
+        rt_repq_remove(cpu);
+    }
+
+    return flag;
+}
+
+//ticked by pcpu tick in pcpu, used in the repq way
+static void
+repq_tick(void *_cpu) {
+    unsigned int cpu = (unsigned long) _cpu;
+    struct rt_pcpu *ppc = RT_PCPU(cpu);
+
+    BUG_ON(current->processor != cpu);
+
+    if (check_cpu_for_repl(cpu)) {
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+
+    set_timer(&ppc->ticker, NOW() + BUDGET(1));
+}
+
+static int
+repq_burn(struct rt_vcpu *dvc, s_time_t now) {
+    s_time_t delta;
+    unsigned int consume;
+
+    BUG_ON(dvc != RT_CUR(dvc->vcpu->processor));
+
+    if (dvc->last_start_time == 0) {
+        dvc->last_start_time = now;
+        return 0;
+    }
+
+    delta = now - dvc->last_start_time;
+    BUG_ON(delta <= 0);
+
+    consume = ( delta/BUDGET(1) );
+    if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++;
+
+    if (consume > dvc->cur_budget) {
+		dvc->cur_budget = 0;
+	} else {
+		dvc->cur_budget -= consume;
+	}
+
+	return consume;
+}
+
diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_ssps.c xen-4.0.1/xen/common/sched_rt_ssps.c
--- xen/xen-4.0.1/xen/common/sched_rt_ssps.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_rt_ssps.c	2011-05-01 19:41:06.000000000 -0600
@@ -0,0 +1,197 @@
+/******************************************************************************
+ * Real Time Xen scheduler Framework
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Jaewoo Lee (C) U Penn
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+#include <xen/errno.h>
+#include <xen/keyhandler.h>
+#include "sched_rt.h"
+#include "sched_rt_repq.h"
+
+
+static int
+check_rdyq(int cpu) {
+	struct list_head * rdyq = RDYQ(cpu);
+	struct list_head * iter;
+	
+	list_for_each(iter, rdyq) {
+		struct rt_vcpu * iter_dvc = __rdyq_elem(iter);
+		if (iter_dvc->cur_budget > 0) {
+			return 1;
+		}
+	}
+	
+	return 0;
+}
+
+//burn the extra budget on RdyQ
+static void
+ssps_burn_rdyq(struct rt_vcpu *dvc, s_time_t now) {
+	s_time_t delta;
+	unsigned int consume;
+	struct list_head * rdyq = RDYQ(dvc->vcpu->processor);
+	struct list_head * iter;
+
+	BUG_ON(dvc != RT_CUR(dvc->vcpu->processor));
+
+	if (dvc->last_start_time == 0) {
+		dvc->last_start_time = now;
+		return;
+	}
+
+	delta = now - dvc->last_start_time;
+	BUG_ON(delta <= 0);
+
+	consume = ( delta/BUDGET(1) );
+    if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++;
+
+	// burn budgets on RdyQ
+	list_for_each(iter, rdyq) {
+		struct rt_vcpu * iter_dvc = __rdyq_elem(iter);
+		if (iter_dvc->cur_budget > 0) {
+			//bug fix 0501
+			
+			delta = now - iter_dvc->last_start_time;
+			// just get repled, skip this VCPU
+			if (delta < BUDGET(1)) {
+				continue;  
+			} 
+			if (consume > iter_dvc->cur_budget) {
+				iter_dvc->cur_budget = 0;
+			} else {
+				iter_dvc->cur_budget -= consume;
+			}
+			return;   // only burn one budget
+		}
+	}
+
+	return;
+}
+
+static void
+ssps_burn_extra(int cpu, int consume) {
+	struct list_head * rdyq = RDYQ(cpu);
+	struct list_head * iter;
+	
+	// burn budgets on RdyQ
+	list_for_each(iter, rdyq) {
+		struct rt_vcpu * iter_dvc = __rdyq_elem(iter);
+		if (iter_dvc->cur_budget > 0) {
+			if (consume > iter_dvc->cur_budget) {
+				iter_dvc->cur_budget = 0;
+			} else {
+				iter_dvc->cur_budget -= consume;
+			}
+			return;   // only burn one budget
+		}
+	}
+	
+	return;
+}
+
+//pick the first vcpu whose budget is >0 from the runq
+static inline struct rt_vcpu *
+__ssps_runq_pick(unsigned int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, runq) {
+        struct rt_vcpu * iter_dvc = __runq_elem(iter);
+        return iter_dvc;
+    }
+
+    BUG_ON(1);
+    return NULL;
+}
+
+// most important function, called every budget time
+static struct task_slice
+ssps_schedule(s_time_t now) {
+    const int cpu = smp_processor_id();
+    struct list_head *runq = RUNQ(cpu);
+    struct rt_vcpu *scurr = RT_VCPU(current);
+    struct rt_vcpu *snext;
+    struct task_slice ret;
+	int consume;
+	int old_budget;
+
+    RT_VCPU_CHECK(current);
+
+    if ((scurr->vcpu->domain->domain_id != 0)) {
+		if (!is_idle_vcpu(scurr->vcpu)) {
+			//check how many budget should burn
+			old_budget = scurr->cur_budget;
+			if (old_budget > 0) {
+				consume = repq_burn(scurr, now);
+				if (consume != 0 && __rdyq_pick(cpu) != NULL && __rdyq_pick(cpu)->level < scurr->level) {
+					scurr->cur_budget = old_budget;   //restore its original budget
+					ssps_burn_extra(cpu, consume);    // burn the one on rdyq instead
+				}
+			} else {
+				ssps_burn_rdyq(scurr, now);
+			}
+		} else {
+			ssps_burn_rdyq(scurr, now);   // idle VCPU, still need to burn the ones on rdyq
+		}
+	}
+
+    if (vcpu_runnable(current)) {
+        if (!__vcpu_on_runq(scurr)) {
+            __runq_insert(cpu, scurr);
+        }
+    } else {
+        BUG_ON(is_idle_vcpu(current) || list_empty(runq));
+        if (!__vcpu_on_rdyq(scurr)) {
+            __rdyq_insert(cpu, scurr);
+        }
+    }
+	
+	snext = __runq_pick(cpu);
+	//slack stealing!!
+	if (is_idle_vcpu(snext->vcpu)) {
+		if (check_rdyq(snext->vcpu->processor)) {
+			snext = __ssps_runq_pick(cpu);
+		}
+	}
+	
+    if (snext->vcpu->domain->domain_id != 0) {
+        snext->last_start_time = NOW();
+    }
+
+    BUG_ON(!__vcpu_on_runq(snext));
+    __runq_remove(snext);
+
+	ret.time = BUDGET(1);
+    ret.task = snext->vcpu;
+
+    RT_VCPU_CHECK(ret.task);
+
+    BUG_ON(!vcpu_runnable(snext->vcpu));
+
+    return ret;
+}
+
+
+const struct rt_scheduler sched_ssps_def = {
+    .name 		= "Slack Stealing Periodic Server Scheduler",
+    .opt_name 	= "ssps",
+    .tick		= repq_tick,
+	.vcpu_sleep		= NULL,
+	.vcpu_wake		= NULL,
+	.schedule	= ssps_schedule
+};
diff -ubrN xen/xen-4.0.1/xen/common/sched_rt_wcps.c xen-4.0.1/xen/common/sched_rt_wcps.c
--- xen/xen-4.0.1/xen/common/sched_rt_wcps.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_rt_wcps.c	2011-05-01 13:24:27.000000000 -0600
@@ -0,0 +1,146 @@
+/******************************************************************************
+ * Real Time Xen scheduler Framework
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+#include <xen/errno.h>
+#include <xen/keyhandler.h>
+#include "sched_rt.h"
+#include "sched_rt_repq.h"
+
+//burn the extra budget on RdyQ
+static void
+wcps_burn_rdyq(struct rt_vcpu *dvc, s_time_t now) {
+	s_time_t delta;
+	unsigned int consume;
+	struct list_head * rdyq = RDYQ(dvc->vcpu->processor);
+	struct list_head * iter;
+
+	BUG_ON(dvc != RT_CUR(dvc->vcpu->processor));
+
+	if (dvc->last_start_time == 0) {
+		dvc->last_start_time = now;
+		return;
+	}
+
+	delta = now - dvc->last_start_time;
+	BUG_ON(delta <= 0);
+
+	consume = ( delta/BUDGET(1) );
+    if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++;
+
+	// burn budgets on RdyQ
+	list_for_each(iter, rdyq) {
+		struct rt_vcpu * iter_dvc = __rdyq_elem(iter);
+		if (iter_dvc->cur_budget > 0) {
+			if (consume > iter_dvc->cur_budget) {
+				iter_dvc->cur_budget = 0;
+			} else {
+				iter_dvc->cur_budget -= consume;
+			}
+			return;   // only burn one budget
+		}
+	}
+
+	return;
+}
+
+
+static void
+wcps_burn_extra(int cpu, int consume) {
+	struct list_head * rdyq = RDYQ(cpu);
+	struct list_head * iter;
+	
+	// burn budgets on RdyQ
+	list_for_each(iter, rdyq) {
+		struct rt_vcpu * iter_dvc = __rdyq_elem(iter);
+		if (iter_dvc->cur_budget > 0) {
+			if (consume > iter_dvc->cur_budget) {
+				iter_dvc->cur_budget = 0;
+			} else {
+				iter_dvc->cur_budget -= consume;
+			}
+			return;   // only burn one budget
+		}
+	}
+	
+	return;
+}
+
+
+// most important function, called every budget time
+static struct task_slice
+wcps_schedule(s_time_t now) {
+    const int cpu = smp_processor_id();
+    struct list_head *runq = RUNQ(cpu);
+    struct rt_vcpu *scurr = RT_VCPU(current);
+    struct rt_vcpu *snext;
+    struct task_slice ret;
+	int consume;
+
+    RT_VCPU_CHECK(current);
+
+    if ((scurr->vcpu->domain->domain_id != 0)) {
+		if (!is_idle_vcpu(scurr->vcpu)) {
+			consume = repq_burn(scurr, now);
+			if (consume != 0 && __rdyq_pick(cpu) != NULL && __rdyq_pick(cpu)->level < scurr->level) {
+				wcps_burn_extra(cpu, consume);   // burn the extra budget on rdyq, mimic the idled away behavior
+			}
+		} else if (cpu == 1) {
+			wcps_burn_rdyq(scurr, now);
+		}
+	}
+
+    if (vcpu_runnable(current)) {
+        if (!__vcpu_on_runq(scurr)) {
+            __runq_insert(cpu, scurr);
+        }
+    } else {
+        BUG_ON(is_idle_vcpu(current) || list_empty(runq));
+        if (!__vcpu_on_rdyq(scurr)) {
+            __rdyq_insert(cpu, scurr);
+        }
+    }
+	
+	snext = __runq_pick(cpu);
+	
+    if (snext->vcpu->domain->domain_id != 0) {
+        snext->last_start_time = NOW();
+    }
+
+    BUG_ON(!__vcpu_on_runq(snext));
+    __runq_remove(snext);
+
+	ret.time = BUDGET(1);
+    ret.task = snext->vcpu;
+
+    RT_VCPU_CHECK(ret.task);
+
+    BUG_ON(!vcpu_runnable(snext->vcpu));
+
+    return ret;
+}
+
+const struct rt_scheduler sched_wcps_def = {
+    .name 		= "Work Conserving Periodic Server Scheduler",
+    .opt_name 	= "wcps",
+
+    .tick		= repq_tick,
+	.vcpu_sleep		= NULL,
+	.vcpu_wake		= NULL,
+	.schedule	= wcps_schedule,
+};
diff -ubrN xen/xen-4.0.1/xen/common/sched_sedf.c xen-4.0.1/xen/common/sched_sedf.c
--- xen/xen-4.0.1/xen/common/sched_sedf.c	2010-08-25 04:22:12.000000000 -0600
+++ xen-4.0.1/xen/common/sched_sedf.c	2011-04-09 23:29:38.000000000 -0600
@@ -1429,6 +1429,11 @@
         }
 
         rc = sedf_adjust_weights(op);
+
+		if (p->domain_id == 0) {
+			return 1;
+		}
+        
         if ( rc )
             return rc;
 
@@ -1453,6 +1458,7 @@
     }
 
     PRINT(2,"sedf_adjust_finished\n");
+
     return 0;
 }
 
diff -ubrN xen/xen-4.0.1/xen/common/sched_ss.c xen-4.0.1/xen/common/sched_ss.c
--- xen/xen-4.0.1/xen/common/sched_ss.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_ss.c	2011-01-21 08:38:10.000000000 -0700
@@ -0,0 +1,884 @@
+/******************************************************************************
+ * Sporadic Server scheduler for xen
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+#include <xen/errno.h>
+#include <xen/keyhandler.h>
+
+#define SS_DOM(_dom)			((struct ss_dom *) (_dom)->sched_priv)
+#define SS_PCPU(_c)				((struct ss_pcpu *)per_cpu(schedule_data, _c).sched_priv)
+#define SS_VCPU(_vcpu)			((struct ss_vcpu *) (_vcpu)->sched_priv)
+#define RUNQ(_cpu)				(&(SS_PCPU(_cpu)->runq))
+#define SS_CUR(_cpu)            SS_VCPU(per_cpu(schedule_data, _cpu).curr)
+#define BUDGET(_b)				(MILLISECS(_b))  // time to run for 1 budget, default setting is 1ms = 1 budget
+
+#define REPQ_CAPACITY           500	// repQ is used for the replenishment
+
+#define SS_DOM_0_PERIOD			100
+#define SS_IDLE_PERIOD			200
+
+#define SS_DOM_BUDGET			25		// default budget, can bu changed via xm sched-ss -d target -b budget -p period
+#define SS_DOM_PERIOD			50
+
+//used for status
+#define IDLE    				0
+#define ACTIVE					1
+
+//used for replenishment
+struct rep_elem {
+    s_time_t re_time;
+    uint16_t re_amount;
+    struct ss_vcpu *svc;
+};
+
+//physical cpu
+struct ss_pcpu {
+    struct list_head runq; // runQ on the pcpu, organized by linked list
+    struct rep_elem *repq; //repQ on the pcpu, organized by heap
+    int rep_size;		// current size
+    int rep_capacity;	// upper limit
+    struct timer ticker; // for preemptive use, tick every budget
+};
+
+//virtual cpu
+struct ss_vcpu {
+    struct list_head runq_elem;
+    struct ss_dom *sdom;
+    struct vcpu *vcpu;
+
+    uint16_t budget;
+    uint16_t period;
+
+    uint16_t cur_budget;
+    s_time_t last_start_time;  // used for burn_budget
+    uint16_t burn_total;      // used for budget repl
+    int status;
+
+    s_time_t next_time;		// used for repl
+};
+
+//domain
+struct ss_dom {
+    struct domain *dom;
+    uint16_t budget;
+    uint16_t period;
+};
+
+//global variable, records the number of cpus
+struct ss_private {
+    spinlock_t lock;
+    uint32_t ncpus;
+};
+static struct ss_private ss_priv;
+
+/*
+//used for record, overhead measurement
+#define RECORD  4000
+struct record_elem{
+	int curr;
+	int next;
+	s_time_t enter_base;     // enter rep insert time
+	s_time_t leave_base;     // leave rep insert time
+	s_time_t enter;         // enter schedule time
+	s_time_t leave;         // leave schedule time
+};
+*/
+struct timer ss_start_timer;	// would start after 10s, used only once
+int ss_start_flag = 0; // start to record or not
+int ss_wake = 0;
+/*
+int idx = 0;		//idx to record
+struct record_elem res[RECORD]; // domain_id, time in ms;
+//finish for the record
+*/
+static void ss_tick(void *_cpu);
+
+//dump the repq
+static void
+ss_dump_repq(int cpu) {
+    int loop = 0;
+    struct ss_pcpu *spc = SS_PCPU(cpu);
+
+    printk("\n# into %s on cpu %d, now is %lu, size: %d, the repQ is :\n", __func__, cpu, NOW(), spc->rep_size);
+    for (loop = 0; loop < spc->rep_size; loop++) {
+        printk("\t[%d. %d]: %d @ %lu\n",
+            spc->repq[loop].svc->vcpu->domain->domain_id,
+            spc->repq[loop].svc->vcpu->vcpu_id,
+            spc->repq[loop].re_amount,
+            spc->repq[loop].re_time);
+    }
+}
+
+//dump the virtual cpu
+static void
+ss_dump_vcpu(struct ss_vcpu *svc) {
+    printk("\t[%i, %i], (%i, %i), cpu: %i, cur_budget: %i, last_start_time: %lu, burn_total: %i, status %d, next_time: %lu\n",
+            svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id, svc->budget, svc->period, svc->vcpu->processor,
+            svc->cur_budget, svc->last_start_time, svc->burn_total, svc->status, svc->next_time);
+}
+
+//inlined code
+static inline struct ss_vcpu *
+__runq_elem(struct list_head *elem) {
+    return list_entry(elem, struct ss_vcpu, runq_elem);
+}
+
+//dump the physical cpu
+static void
+ss_dump_pcpu(int cpu) {
+    struct list_head *iter;
+    struct ss_pcpu *spc = SS_PCPU(cpu);
+    struct list_head *runq = &spc->runq;
+    struct ss_vcpu *svc = SS_CUR(cpu);
+    int loop = 0;
+
+    printk("\n# into %s, on cpu: %d, now is: %lu\n", __func__, cpu, NOW());
+
+    if (svc) {
+        printk("\trun: ");
+        ss_dump_vcpu(svc);
+    }
+
+    list_for_each(iter, runq) {
+        svc = __runq_elem(iter);
+        if (svc) {
+            printk("\t%3d: ", ++loop);
+            ss_dump_vcpu(svc);
+        }
+    }
+
+    ss_dump_repq(cpu);
+}
+/*
+//dump the record out.
+static void
+ss_dump_record(void) {
+	int i;
+
+	for (i = 1; i < RECORD; i++) {
+		printk("%-3d %-3d %13lu %13lu %13lu %13lu\n", res[i].curr, res[i].next, res[i].enter_base, res[i].leave_base, res[i].enter, res[i].leave);
+	}
+	ss_dump_pcpu(1);
+	idx = 0;
+	start_flag = 0;
+}
+
+*/
+// the current vcpu is on runQ?
+static inline int
+__vcpu_on_runq(struct ss_vcpu *svc) {
+    return !list_empty(&svc->runq_elem);
+}
+
+//pick the first vcpu whose budget is >0 from the runq
+static inline struct ss_vcpu *
+__runq_pick(unsigned int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, runq) {
+        struct ss_vcpu * iter_svc = __runq_elem(iter);
+        if (iter_svc->cur_budget > 0) {
+            return iter_svc;
+        }
+    }
+
+    BUG_ON(1);
+    return NULL;
+}
+
+//insert into the runq, followed a FIFO way. sorted by period
+static inline void
+__runq_insert(unsigned int cpu, struct ss_vcpu *svc) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    BUG_ON(__vcpu_on_runq(svc));
+    BUG_ON(cpu != svc->vcpu->processor);
+
+    list_for_each(iter, runq) {
+        struct ss_vcpu * iter_svc = __runq_elem(iter);
+        if (svc->vcpu->domain->domain_id <= iter_svc->vcpu->domain->domain_id) {
+            break;
+        }
+    }
+
+    list_add_tail(&svc->runq_elem, iter);
+}
+
+//remove it from runQ
+static inline void
+__runq_remove(struct ss_vcpu *svc) {
+    BUG_ON(!__vcpu_on_runq(svc));
+    list_del_init(&svc->runq_elem);
+}
+
+//used for the heap, repQ
+static inline int
+ss_rep_parent(int childIdx) {
+    return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1);
+}
+
+//insert into the repQ
+static inline void
+ss_repq_insert(unsigned int cpu, struct ss_vcpu *svc, int amount) {
+    struct ss_pcpu * spc = SS_PCPU(cpu);
+    int childIdx, parentIdx;
+
+    if (amount == 0) {
+        svc->next_time = 0;
+        return;
+    }
+
+    if (svc->next_time == 0) {
+        printk("\n# in %s, ERROR! svc is:", __func__);
+        ss_dump_vcpu(svc);
+        ss_dump_pcpu(cpu);
+        BUG_ON(1);
+    }
+
+    if (spc->rep_size == spc->rep_capacity) {
+        printk("\n# into %s, repQ full!!\n", __func__);
+        BUG_ON(1);
+    }
+
+    childIdx = spc->rep_size;
+    parentIdx = ss_rep_parent(childIdx);
+
+
+    while (childIdx > 0 && svc->next_time < spc->repq[parentIdx].re_time) {
+        spc->repq[childIdx] = spc->repq[parentIdx];
+        childIdx = parentIdx;
+        parentIdx = ss_rep_parent(childIdx);
+    }
+
+    spc->repq[childIdx].re_time = svc->next_time;
+    spc->repq[childIdx].re_amount = amount;
+    spc->repq[childIdx].svc = svc;
+    spc->rep_size++;
+/*
+    printk("\t add a repl. now: %lu, cpu: %d, re_time: %lu, amount: %d, for cpu [%d, %d]\n",
+        NOW(), cpu, svc->next_time, amount, svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id);
+    ss_dump_vcpu(svc);
+*/
+    svc->next_time = 0;
+}
+
+//remove from the repQ
+static inline void
+ss_repq_remove(unsigned int cpu) {
+    struct ss_pcpu * spc = SS_PCPU(cpu);
+    int childIdx = 1;
+    int rightChildIdx;
+    int rootIdx = 0;
+    struct rep_elem temp;
+
+    BUG_ON(spc->rep_size <= 0);
+
+    spc->repq[0] = spc->repq[spc->rep_size - 1];
+    spc->rep_size--;
+
+    temp = spc->repq[0];
+
+    while (childIdx < spc->rep_size) {
+        rightChildIdx = childIdx + 1;
+        if (rightChildIdx < spc->rep_size && spc->repq[rightChildIdx].re_time < spc->repq[childIdx].re_time) {
+            childIdx = rightChildIdx;
+        }
+        if (spc->repq[childIdx].re_time < temp.re_time) {
+            spc->repq[rootIdx] = spc->repq[childIdx];
+            rootIdx = childIdx;
+            childIdx = 2 * rootIdx + 1;
+        } else {
+            break;
+        }
+    }
+    spc->repq[rootIdx] = temp;
+}
+
+//svc should be snext. Doing this is because we can not get snext->period
+//scan the runQ to change status, deside next time or amount
+static void
+ss_scan_runq(unsigned int cpu, struct ss_vcpu *svc) {
+    struct list_head * runq = RUNQ(cpu);
+    struct ss_vcpu *cur = svc;
+    struct list_head * iter;
+    int re_amount;
+
+    list_for_each(iter, runq) {
+        struct ss_vcpu * iter_svc = __runq_elem(iter);
+        if (is_idle_vcpu(iter_svc->vcpu)) {
+            return;
+        }
+
+        if (iter_svc->vcpu->domain->domain_id < cur->vcpu->domain->domain_id) {
+            if (iter_svc->status == ACTIVE) {
+                //change from ACTIVE to IDLE, decide the repl amount
+                BUG_ON(iter_svc->next_time == 0);
+                iter_svc->status = IDLE;
+                re_amount = iter_svc->burn_total;
+                iter_svc->burn_total = 0;
+                ss_repq_insert(cpu, iter_svc, re_amount);
+			}
+        } else {
+            if (iter_svc->status == IDLE) {
+                //mark it to be ACTIVE, decide the repl time
+                iter_svc->status = ACTIVE;
+                BUG_ON(iter_svc->next_time != 0);
+                iter_svc->next_time = NOW() + BUDGET(iter_svc->period);
+            }
+        }
+    }
+}
+
+//dump dump function
+static void
+ss_dump(void) {
+	printk("# into %s.\n", __func__);
+}
+
+//burn the scurr budget
+static void
+burn_budgets(struct ss_vcpu *svc, s_time_t now) {
+    s_time_t delta;
+    unsigned int consume;
+
+    BUG_ON(svc != SS_CUR(svc->vcpu->processor));
+
+    if (svc->last_start_time == 0) {
+        svc->last_start_time = now;
+        return;
+    }
+
+    delta = now - svc->last_start_time;
+    BUG_ON(delta <= 0);
+
+    consume = ( delta/BUDGET(1) );
+    if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++;
+    if (consume > svc->cur_budget) {
+		//printk("\n# into %s, this should not happen!\n", __func__);
+		consume = svc->cur_budget;
+    }
+
+	svc->cur_budget -= consume;
+    svc->burn_total += consume;
+}
+
+//init the physical cpu
+static int
+ss_pcpu_init(int cpu) {
+    struct ss_pcpu *spc;
+    unsigned long flags;
+
+    /* Allocate per-PCPU info */
+    spc = xmalloc(struct ss_pcpu);
+    if (spc == NULL)
+        return -1;
+    memset(spc, 0, sizeof (*spc));
+
+    spin_lock_irqsave(&ss_priv.lock, flags);
+
+    if (ss_priv.ncpus < cpu)
+        ss_priv.ncpus = cpu + 1;
+
+    init_timer(&spc->ticker, ss_tick, (void *) (unsigned long) cpu, cpu);
+    INIT_LIST_HEAD(&spc->runq);
+    per_cpu(schedule_data, cpu).sched_priv = spc;
+
+    BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
+
+    spc->rep_capacity = REPQ_CAPACITY;
+    spc->repq = xmalloc_array(struct rep_elem, spc->rep_capacity);
+    BUG_ON(spc->repq == NULL);
+    spc->rep_size = 0;
+
+    spin_unlock_irqrestore(&ss_priv.lock, flags);
+
+    printk("\n# finish %s, init cpu: %d\n", __func__, cpu);
+
+    return 0;
+}
+
+//check the vcpu
+static inline void
+__ss_vcpu_check(struct vcpu *vc) {
+    struct ss_vcpu * const svc = SS_VCPU(vc);
+    struct ss_dom * const sdom = svc->sdom;
+
+    BUG_ON(svc->vcpu != vc);
+    BUG_ON(sdom != SS_DOM(vc->domain));
+    if (sdom) {
+        BUG_ON(is_idle_vcpu(vc));
+        BUG_ON(sdom->dom != vc->domain);
+    } else {
+        BUG_ON(!is_idle_vcpu(vc));
+    }
+}
+#define SS_VCPU_CHECK(_vc)  (__ss_vcpu_check(_vc))
+
+//pick a cpu to run, used to migrate from different cpus
+static int
+ss_cpu_pick(struct vcpu *vc) {
+    cpumask_t cpus;
+    int cpu;
+
+    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+
+	if (vc->domain->domain_id == 0 && vc->processor != 0) {
+		return cycle_cpu(vc->processor, cpus);
+	}
+
+    cpu = cpu_isset(vc->processor, cpus)
+            ? vc->processor
+            : cycle_cpu(vc->processor, cpus);
+
+    return cpu;
+}
+
+//check the current repQ to see if a repl needs to happen
+static int
+check_cpu_for_repl(int cpu) {
+    int ret = 0;
+    struct ss_pcpu * spc = SS_PCPU(cpu);
+
+    while((spc->rep_size != 0) && spc->repq[0].re_time < NOW()) {
+        spc->repq[0].svc->cur_budget += spc->repq[0].re_amount;
+        if (spc->repq[0].svc->cur_budget > spc->repq[0].svc->budget) {
+            printk("\n# into %s, this should not happen!\n", __func__);
+            spc->repq[0].svc->cur_budget = spc->repq[0].svc->budget;
+        }
+        ss_repq_remove(cpu);
+        ret = 1;
+    }
+
+    return ret;
+}
+
+//if a repl happens, do we need an interrupt? (higher priority than current running one)
+static void
+check_runq_for_interrupt(int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+    struct ss_vcpu * cur = SS_CUR(cpu);
+
+    list_for_each(iter, runq) {
+        struct ss_vcpu * iter_svc = __runq_elem(iter);
+        if (iter_svc->vcpu->domain->domain_id >= cur->vcpu->domain->domain_id) {
+            return;
+        } else if (iter_svc->cur_budget > 0) {
+            cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+        }
+    }
+}
+
+//init the virtual cpu
+static int
+ss_vcpu_init(struct vcpu *vc) {
+    struct domain * const dom = vc->domain;
+    struct ss_dom *sdom = SS_DOM(dom);
+    struct ss_vcpu *svc;
+
+    /* Allocate per-VCPU info */
+    svc = xmalloc(struct ss_vcpu);
+    if (svc == NULL) {
+        return -1;
+    }
+    memset(svc, 0, sizeof (*svc));
+
+    INIT_LIST_HEAD(&svc->runq_elem);
+    svc->sdom = sdom;
+    svc->vcpu = vc;
+    svc->budget = is_idle_vcpu(vc)? SS_IDLE_PERIOD: sdom->budget;
+	svc->period = is_idle_vcpu(vc)? SS_IDLE_PERIOD: sdom->period;
+    svc->cur_budget = svc->budget;
+
+    svc->last_start_time = 0;
+    svc->burn_total = 0;
+    svc->next_time = 0;
+    svc->status = IDLE;
+    vc->sched_priv = svc;
+
+    /* Allocate per-PCPU info */
+    if (unlikely(!SS_PCPU(vc->processor))) {
+        if (ss_pcpu_init(vc->processor) != 0)
+            return -1;
+    }
+
+    SS_VCPU_CHECK(vc);
+
+    printk("\n# into %s, vcpu init: ", __func__);
+    ss_dump_vcpu(svc);
+
+    return 0;
+}
+
+//destory the vcpu
+static void
+ss_vcpu_destroy(struct vcpu *vc) {
+    struct ss_vcpu * const svc = SS_VCPU(vc);
+    struct ss_dom * const sdom = svc->sdom;
+
+    printk("\n# into %s, vcpu destroy: ", __func__);
+    ss_dump_vcpu(svc);
+
+    BUG_ON(sdom == NULL);
+    BUG_ON(!list_empty(&svc->runq_elem));
+
+    xfree(svc);
+}
+
+//sleep the vcpu
+static void
+ss_vcpu_sleep(struct vcpu *vc) {
+    struct ss_vcpu * const svc = SS_VCPU(vc);
+
+    if (vc->domain->domain_id != 0) {
+    printk("\n# into %s: now %lu, sleep vcpu: \n", __func__, NOW());
+    ss_dump_vcpu(svc);
+    }
+
+    BUG_ON(is_idle_vcpu(vc));
+
+    if (per_cpu(schedule_data, vc->processor).curr == vc) {
+        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+    } else if (__vcpu_on_runq(svc)) {
+		//BUG_ON(svc->status == ACTIVE);
+        __runq_remove(svc);
+    }
+}
+
+//wake up the vcpu, insert it into runq, raise a softirq
+static void
+ss_vcpu_wake(struct vcpu *vc) {
+    struct ss_vcpu * const svc = SS_VCPU(vc);
+    const unsigned int cpu = vc->processor;
+
+    BUG_ON(is_idle_vcpu(vc));
+
+    if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) {
+        //printk("\n# why wake up running? migration?\n");
+        return;
+    }
+    if (unlikely(__vcpu_on_runq(svc))) {
+        //printk("\n# why wake up on runq ones? migration?\n");
+        return;
+    }
+
+    __runq_insert(cpu, svc);
+	if (svc->vcpu->domain->domain_id < SS_CUR(cpu)->vcpu->domain->domain_id) {
+	    if (svc->vcpu->processor == 1 && ss_start_flag == 1) {
+            ss_wake++;
+        }
+	    cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+}
+
+//used for record data, for overhead measurement
+static void
+ss_ss_finish_timer(void * temp) {
+    ss_start_flag = 0;
+    printk("wake up %d times\n", ss_wake);
+    ss_wake = 0;
+}
+
+static void
+ss_ss_start_timer(void * temp) {
+	ss_start_flag = 1;
+	init_timer(&ss_start_timer, ss_ss_finish_timer, (void *) (unsigned int) 1, 1);
+	set_timer(&ss_start_timer, NOW() + MILLISECS(10000));
+}
+
+//adjust the domain's budget & period, also used to trigger the record
+static int
+ss_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) {
+	struct ss_dom * const sdom = SS_DOM(d);
+	unsigned long flags;
+	struct ss_vcpu *svc = SS_VCPU(d->vcpu[0]);
+
+    if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) {
+		op->u.ss.budget = sdom->budget;
+		op->u.ss.period = sdom->period;
+		//ss_dump_vcpu(svc);
+	} else {
+		BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo);
+
+		spin_lock_irqsave(&ss_priv.lock, flags);
+		if (op->u.ss.budget != 0) {
+			sdom->budget = op->u.ss.budget;
+			svc->budget = op->u.ss.budget;
+		}
+		if (op->u.ss.period != 0) {
+			sdom->period = op->u.ss.period;
+			svc->period = op->u.ss.period;
+		}
+		svc->cur_budget = svc->budget;
+		spin_unlock_irqrestore(&ss_priv.lock, flags);
+
+		if (svc->vcpu->domain->domain_id == 0) {
+			init_timer(&ss_start_timer, ss_ss_start_timer, (void *) (unsigned int) 1, 1);
+			set_timer(&ss_start_timer, NOW() + MILLISECS(5000));
+            return 1;
+		}
+
+		//ss_dump_vcpu(svc);
+	}
+
+    return 0;
+}
+
+//init a dom
+static int
+ss_dom_init(struct domain *dom) {
+    struct ss_dom *sdom;
+
+    printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id);
+
+    if (is_idle_domain(dom)) {
+        printk("\t# init an idle domain\n");
+        return 0;
+    }
+
+    sdom = xmalloc(struct ss_dom);
+    if (sdom == NULL)
+        return -ENOMEM;
+    memset(sdom, 0, sizeof (*sdom));
+
+    /* Initialize budget and period */
+    sdom->dom = dom;
+
+	switch(dom->domain_id) {
+        case 32767:
+            sdom->budget = SS_IDLE_PERIOD;
+			sdom->period = SS_IDLE_PERIOD;
+			break;
+		case 0:
+			sdom->budget = SS_DOM_0_PERIOD;
+			sdom->period = SS_DOM_0_PERIOD;
+			break;
+		default:
+			sdom->budget = SS_DOM_BUDGET;
+			sdom->period = SS_DOM_PERIOD;
+			break;
+	}
+
+    dom->sched_priv = sdom;
+
+    return 0;
+}
+
+//destory a domain
+static void
+ss_dom_destroy(struct domain *dom) {
+    printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id);
+    xfree(SS_DOM(dom));
+}
+
+//ticked by pcpu tick in pcpu.
+static void
+ss_tick(void *_cpu) {
+    unsigned int cpu = (unsigned long) _cpu;
+    struct ss_pcpu *spc = SS_PCPU(cpu);
+
+    BUG_ON(current->processor != cpu);
+
+    if (check_cpu_for_repl(cpu)) {
+        check_runq_for_interrupt(cpu);
+    }
+
+    if (ss_cpu_pick(current) != cpu) {
+        set_bit(_VPF_migrating, &current->pause_flags);
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+
+    set_timer(&spc->ticker, NOW() + BUDGET(1));
+}
+
+// most important function, called every budget time
+static struct task_slice
+ss_schedule(s_time_t now) {
+    const int cpu = smp_processor_id();
+    struct list_head *runq = RUNQ(cpu);
+    struct ss_vcpu *scurr = SS_VCPU(current);
+    struct ss_vcpu *snext;
+    struct task_slice ret;
+	int re_amount;
+
+    SS_VCPU_CHECK(current);
+/*
+// for record
+	if (smp_processor_id() == 1 && start_flag == 1) {
+        if(is_idle_vcpu(scurr->vcpu)) res[idx].curr = 10;
+        else    res[idx].curr = scurr->vcpu->domain->domain_id;
+        res[idx].enter_base = NOW();
+        res[idx].leave_base = NOW();
+        res[idx].enter = NOW();
+	}
+*/
+    if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) {
+    //if (!is_idle_vcpu(scurr->vcpu)) {
+    	burn_budgets(scurr, now);
+    }
+
+    if (vcpu_runnable(current)) {
+        __runq_insert(cpu, scurr);
+    } else {
+        BUG_ON(is_idle_vcpu(current) || list_empty(runq));
+    }
+
+    snext = __runq_pick(cpu);
+
+    __runq_remove(snext);
+
+//context switch do happen!, and snext is not an idle vcpu
+    if (cpu == 1 && snext != scurr) {
+	//if (snext != scurr) {
+		//for the scurr:
+		//if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) {
+		if (!is_idle_vcpu(scurr->vcpu)) {
+		    BUG_ON(scurr->status != ACTIVE);
+		    BUG_ON(scurr->next_time == 0);
+    		scurr->status = IDLE;
+	    	re_amount = scurr->burn_total;
+	    	scurr->burn_total = 0;
+	    	//printk("\n# into %s, change status to IDLE, decide repl amount here! now is %lu, for vcpu[%d, %d], re_amount is: %d, re_time is %lu\n",
+	    	//	__func__, NOW(), scurr->vcpu->domain->domain_id, scurr->vcpu->vcpu_id, re_amount, scurr->next_time);
+	    	ss_repq_insert(cpu, scurr, re_amount);
+		}
+
+		//for the snext:
+		//if (!is_idle_vcpu(snext->vcpu) && snext->vcpu->domain->domain_id != 0) {
+    	if (!is_idle_vcpu(snext->vcpu)) {
+			if (snext->status == IDLE) {
+    		    BUG_ON(snext->next_time != 0);
+    		    snext->status = ACTIVE;
+    		    snext->next_time = NOW() + BUDGET(snext->period);
+    		    //printk("\n# into %s, change status to ACTIVE, decide repl time here! now is %lu, for vcpu [%d, %d], re_time is %lu\n",
+                //    __func__, NOW(), snext->vcpu->domain->domain_id, snext->vcpu->vcpu_id, snext->next_time);
+            }
+        }
+
+        //scan the whole runq
+        ss_scan_runq(cpu, snext);
+    }
+
+    if (cpu == 1 && snext->vcpu->domain->domain_id != 0) {
+    	snext->last_start_time = NOW();
+    }
+
+    ret.time = (is_idle_vcpu(snext->vcpu) ? -1 : BUDGET(1));
+    //ret.time = BUDGET(1);
+    ret.task = snext->vcpu;
+
+    SS_VCPU_CHECK(ret.task);
+
+    BUG_ON(!vcpu_runnable(snext->vcpu));
+    //printk("now is %lu\n", now);
+
+/*
+// for record
+
+	if (smp_processor_id() == 1 && start_flag == 1) {
+        if(is_idle_vcpu(snext->vcpu)) res[idx].next = 10;
+        else    res[idx].next = snext->vcpu->domain->domain_id;
+        res[idx].leave = NOW();
+        if(idx++ >= RECORD) {
+            ss_dump_pcpu(1);
+            ss_dump_record();
+        }
+	}
+*/
+    return ret;
+}
+
+//init the global data
+static void
+ss_init(void) {
+    printk("\n# into %s\n", __func__);
+    spin_lock_init(&ss_priv.lock);
+    ss_priv.ncpus = 0;
+}
+
+/* Tickers cannot be kicked until SMP subsystem is alive. */
+static __init int
+ss_start_tickers(void) {
+    struct ss_pcpu *spc;
+    unsigned int cpu;
+
+    printk("\n# into %s, start all tickers right now\n", __func__);
+
+    if (ss_priv.ncpus == 0)
+        return 0;
+
+    for_each_online_cpu(cpu) {
+        spc = SS_PCPU(cpu);
+        set_timer(&spc->ticker, NOW() + BUDGET(1));
+    }
+
+    return 0;
+}
+__initcall(ss_start_tickers);
+
+static void ss_tick_suspend(void) {
+    struct ss_pcpu *spc;
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    spc = SS_PCPU(smp_processor_id());
+
+    stop_timer(&spc->ticker);
+}
+
+static void ss_tick_resume(void) {
+    struct ss_pcpu *spc;
+    uint64_t now = NOW();
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    spc = SS_PCPU(smp_processor_id());
+
+    set_timer(&spc->ticker, now + BUDGET(1));
+}
+
+const struct scheduler sched_ss_def = {
+    .name = "Sporadic Server Scheduler",
+    .opt_name = "ss",
+    .sched_id = XEN_SCHEDULER_SS,
+
+    .init_domain = ss_dom_init,
+    .destroy_domain = ss_dom_destroy,
+
+    .init_vcpu = ss_vcpu_init,
+    .destroy_vcpu = ss_vcpu_destroy,
+
+    .init = ss_init,
+
+    .pick_cpu = ss_cpu_pick,
+
+    .tick_suspend = ss_tick_suspend,
+    .tick_resume = ss_tick_resume,
+
+    .do_schedule = ss_schedule,
+
+    .sleep = ss_vcpu_sleep,
+    .wake = ss_vcpu_wake,
+
+    .adjust = ss_dom_cntl,
+
+    .dump_cpu_state = ss_dump_pcpu,
+    .dump_settings = ss_dump,
+};
diff -ubrN xen/xen-4.0.1/xen/common/sched_ss_rtas11.c xen-4.0.1/xen/common/sched_ss_rtas11.c
--- xen/xen-4.0.1/xen/common/sched_ss_rtas11.c	1969-12-31 17:00:00.000000000 -0700
+++ xen-4.0.1/xen/common/sched_ss_rtas11.c	2010-12-16 00:46:03.000000000 -0700
@@ -0,0 +1,893 @@
+/******************************************************************************
+ * Sporadic Server scheduler for xen
+ *
+ * by Sisu Xi (C) 2010 Washington University in St. Louis
+ * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
+ ******************************************************************************/
+
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/sched.h>
+#include <xen/domain.h>
+#include <xen/delay.h>
+#include <xen/event.h>
+#include <xen/time.h>
+#include <xen/perfc.h>
+#include <xen/sched-if.h>
+#include <xen/softirq.h>
+#include <asm/atomic.h>
+#include <xen/errno.h>
+#include <xen/keyhandler.h>
+
+#define SS_DOM(_dom)			((struct ss_dom *) (_dom)->sched_priv)
+#define SS_PCPU(_c)				((struct ss_pcpu *)per_cpu(schedule_data, _c).sched_priv)
+#define SS_VCPU(_vcpu)			((struct ss_vcpu *) (_vcpu)->sched_priv)
+#define RUNQ(_cpu)				(&(SS_PCPU(_cpu)->runq))
+#define SS_CUR(_cpu)            SS_VCPU(per_cpu(schedule_data, _cpu).curr)
+#define BUDGET(_b)				(MILLISECS(_b))  // time to run for 1 budget, default setting is 1ms = 1 budget
+
+#define REPQ_CAPACITY           500	// repQ is used for the replenishment
+
+#define SS_DOM_0_PERIOD			100
+#define SS_IDLE_PERIOD			200
+
+#define SS_DOM_BUDGET			25		// default budget, can bu changed via xm sched-ss -d target -b budget -p period
+#define SS_DOM_PERIOD			50
+
+//used for status
+#define IDLE    				0		
+#define ACTIVE					1
+
+//used for replenishment
+struct rep_elem {
+    s_time_t re_time;
+    uint16_t re_amount;
+    struct ss_vcpu *svc;
+};
+
+//physical cpu
+struct ss_pcpu {
+    struct list_head runq; // runQ on the pcpu, organized by linked list
+    struct rep_elem *repq; //repQ on the pcpu, organized by heap
+    int rep_size;		// current size, for later dynamic reqQ use. currently set equals to capacity
+    int rep_capacity;	// upper limit
+    struct timer ticker; // for preemptive use, tick every budget
+};
+
+//virtual cpu
+struct ss_vcpu {
+    struct list_head runq_elem;
+    struct ss_dom *sdom;
+    struct vcpu *vcpu;
+
+    uint16_t budget;
+    uint16_t period;
+
+    uint16_t cur_budget;
+    s_time_t last_start_time;  // used for burn_budget
+    uint16_t burn_total;      // used for budget repl
+    int status;
+
+    s_time_t next_time;		// used for repl
+};
+
+//domain
+struct ss_dom {
+    struct domain *dom;
+    uint16_t budget;
+    uint16_t period;
+};
+
+//global variable, records the number of cpus
+struct ss_private {
+    spinlock_t lock;    // used for init
+    uint32_t ncpus;    //number of physical cpus
+};
+static struct ss_private ss_priv;
+
+//used for record, overhead measurement
+#define RECORD  4000
+struct record_elem{
+	int curr;
+	int next;
+	s_time_t enter_base;     // enter rep insert time
+	s_time_t leave_base;     // leave rep insert time
+	s_time_t enter;         // enter schedule time
+	s_time_t leave;         // leave schedule time
+};
+
+struct timer start_timer;	// would start after 10s, used only once
+int start_flag = 0; // start to record or not
+int idx = 0;		//idx to record
+struct record_elem res[RECORD]; // domain_id, time in ms;
+//finish for the record
+
+static void ss_tick(void *_cpu);
+
+//dump the repq
+static void
+ss_dump_repq(int cpu) {
+    int loop = 0;
+    struct ss_pcpu *spc = SS_PCPU(cpu);
+
+    printk("\n# into %s on cpu %d, now is %lu, size: %d, the repQ is :\n", __func__, cpu, NOW(), spc->rep_size);
+    for (loop = 0; loop < spc->rep_size; loop++) {
+        printk("\t[%d. %d]: %d @ %lu\n",
+            spc->repq[loop].svc->vcpu->domain->domain_id,
+            spc->repq[loop].svc->vcpu->vcpu_id,
+            spc->repq[loop].re_amount,
+            spc->repq[loop].re_time);
+    }
+}
+
+//dump the virtual cpu
+static void
+ss_dump_vcpu(struct ss_vcpu *svc) {
+    printk("\t[%i, %i], (%i, %i), cpu: %i, cur_budget: %i, last_start_time: %lu, burn_total: %i, status %d, next_time: %lu\n",
+            svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id, svc->budget, svc->period, svc->vcpu->processor,
+            svc->cur_budget, svc->last_start_time, svc->burn_total, svc->status, svc->next_time);
+}
+
+//inlined code
+static inline struct ss_vcpu *
+__runq_elem(struct list_head *elem) {
+    return list_entry(elem, struct ss_vcpu, runq_elem);
+}
+
+//dump the physical cpu
+static void
+ss_dump_pcpu(int cpu) {
+    struct list_head *iter;
+    struct ss_pcpu *spc = SS_PCPU(cpu);
+    struct list_head *runq = &spc->runq;
+    struct ss_vcpu *svc = SS_CUR(cpu);
+    int loop = 0;
+
+    printk("\n# into %s, on cpu: %d, now is: %lu\n", __func__, cpu, NOW());
+
+    if (svc) {
+        printk("\trun: ");
+        ss_dump_vcpu(svc);
+    }
+
+    list_for_each(iter, runq) {
+        svc = __runq_elem(iter);
+        if (svc) {
+            printk("\t%3d: ", ++loop);
+            ss_dump_vcpu(svc);
+        }
+    }
+
+    ss_dump_repq(cpu);
+}
+
+//dump the record out.
+static void
+ss_dump_record(void) {
+	int i;
+
+	for (i = 1; i < RECORD; i++) {
+		printk("%-3d %-3d %13lu %13lu %13lu %13lu\n", res[i].curr, res[i].next, res[i].enter_base, res[i].leave_base, res[i].enter, res[i].leave);
+	}
+	//ss_dump_pcpu(1);
+	idx = 0;
+	start_flag = 0;
+}
+
+// the current vcpu is on runQ?
+static inline int
+__vcpu_on_runq(struct ss_vcpu *svc) {
+    return !list_empty(&svc->runq_elem);
+}
+
+//pick the first vcpu whose budget is >0 from the runq
+static inline struct ss_vcpu *
+__runq_pick(unsigned int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    list_for_each(iter, runq) {
+        struct ss_vcpu * iter_svc = __runq_elem(iter);
+        if (iter_svc->cur_budget > 0) {
+            return iter_svc;
+        }
+    }
+
+    BUG_ON(1);
+    return NULL;
+}
+
+//insert into the runq, followed a FIFO way. sorted by period
+static inline void
+__runq_insert(unsigned int cpu, struct ss_vcpu *svc) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+
+    BUG_ON(__vcpu_on_runq(svc));
+    BUG_ON(cpu != svc->vcpu->processor);
+
+    list_for_each(iter, runq) {
+        struct ss_vcpu * iter_svc = __runq_elem(iter);
+        if (svc->period <= iter_svc->period) {
+            break;
+        }
+    }
+
+    list_add_tail(&svc->runq_elem, iter);
+}
+
+//remove it from runQ
+static inline void
+__runq_remove(struct ss_vcpu *svc) {
+    BUG_ON(!__vcpu_on_runq(svc));
+    list_del_init(&svc->runq_elem);
+}
+
+//used for the heap, repQ
+static inline int
+ss_rep_parent(int childIdx) {
+    return (childIdx & 1)? ((childIdx - 1) >> 1) : ((childIdx - 2) >> 1);
+}
+
+//insert into the repQ
+static inline void
+ss_repq_insert(unsigned int cpu, struct ss_vcpu *svc, int amount) {
+    struct ss_pcpu * spc = SS_PCPU(cpu);
+    int childIdx, parentIdx;
+   
+    if (amount == 0) {
+        svc->next_time = 0;
+        return;
+    }
+
+    if (svc->next_time == 0) {
+        printk("\n# in %s, ERROR! svc is:", __func__);
+        ss_dump_vcpu(svc);
+        ss_dump_pcpu(cpu);
+        BUG_ON(1);
+    }
+
+    if (spc->rep_size == spc->rep_capacity) {
+        printk("\n# into %s, repQ full!!\n", __func__);
+        BUG_ON(1);
+    }
+
+    childIdx = spc->rep_size;
+    parentIdx = ss_rep_parent(childIdx);
+
+
+    while (childIdx > 0 && svc->next_time < spc->repq[parentIdx].re_time) {
+        spc->repq[childIdx] = spc->repq[parentIdx];
+        childIdx = parentIdx;
+        parentIdx = ss_rep_parent(childIdx);
+    }
+
+    spc->repq[childIdx].re_time = svc->next_time;
+    spc->repq[childIdx].re_amount = amount;
+    spc->repq[childIdx].svc = svc;
+    spc->rep_size++;
+/*
+    printk("\t add a repl. now: %lu, cpu: %d, re_time: %lu, amount: %d, for cpu [%d, %d]\n",
+        NOW(), cpu, svc->next_time, amount, svc->vcpu->domain->domain_id, svc->vcpu->vcpu_id);
+    ss_dump_vcpu(svc);
+*/
+    svc->next_time = 0;
+}
+
+//remove from the repQ
+static inline void
+ss_repq_remove(unsigned int cpu) {
+    struct ss_pcpu * spc = SS_PCPU(cpu);
+    int childIdx = 1;
+    int rightChildIdx;
+    int rootIdx = 0;
+    struct rep_elem temp;
+
+    BUG_ON(spc->rep_size <= 0);
+
+    spc->repq[0] = spc->repq[spc->rep_size - 1];
+    spc->rep_size--;
+
+    temp = spc->repq[0];
+
+    while (childIdx < spc->rep_size) {
+        rightChildIdx = childIdx + 1;
+        if (rightChildIdx < spc->rep_size && spc->repq[rightChildIdx].re_time < spc->repq[childIdx].re_time) {
+            childIdx = rightChildIdx;
+        }
+        if (spc->repq[childIdx].re_time < temp.re_time) {
+            spc->repq[rootIdx] = spc->repq[childIdx];
+            rootIdx = childIdx;
+            childIdx = 2 * rootIdx + 1;
+        } else {
+            break;
+        }
+    }
+    spc->repq[rootIdx] = temp;
+}
+
+//svc should be snext. Doing this is because we can not get snext->period
+//scan the runQ to change status, deside next time or amount
+static void
+ss_scan_runq(unsigned int cpu, struct ss_vcpu *svc) {
+    struct list_head * runq = RUNQ(cpu);
+    struct ss_vcpu *cur = svc;
+    struct list_head * iter;
+    int re_amount;
+
+    list_for_each(iter, runq) {
+        struct ss_vcpu * iter_svc = __runq_elem(iter);
+        if (is_idle_vcpu(iter_svc->vcpu)) {
+            return;
+        }
+
+		//those who has higher priority but run out of budget
+        if (iter_svc->period < cur->period) {
+            if (iter_svc->status == ACTIVE) {
+                //change from ACTIVE to IDLE, decide the repl amount
+                BUG_ON(iter_svc->next_time == 0);
+                iter_svc->status = IDLE;
+                re_amount = iter_svc->burn_total;
+                iter_svc->burn_total = 0;
+                ss_repq_insert(cpu, iter_svc, re_amount);
+			}
+        } 
+		//those who has lower priority, should all be set to IDLE. On runQ means it has work to do!!!
+		else {
+			/*
+            if (iter_svc->status == IDLE) {
+                //mark it to be ACTIVE, decide the repl time
+                iter_svc->status = ACTIVE;
+                BUG_ON(iter_svc->next_time != 0);
+                iter_svc->next_time = NOW() + BUDGET(iter_svc->period);
+            }
+			*/
+			// modification made according to RTAS 10 paper
+			if (iter_svc->status == ACTIVE) {
+                //mark it to be IDLE, decide the repl amount
+				BUG_ON(iter_svc->next_time == 0);
+                iter_svc->status = IDLE;
+                re_amount = iter_svc->burn_total;
+                iter_svc->burn_total = 0;
+                ss_repq_insert(cpu, iter_svc, re_amount);
+				printk("# into %s, Mark lower running CPU to be IDLE!\n", __func__);
+            }
+        }
+    }
+}
+
+//dump dump function
+static void
+ss_dump(void) {
+	printk("# into %s. Did Nothing\n", __func__);
+}
+
+//burn the scurr budget
+static void
+burn_budgets(struct ss_vcpu *svc, s_time_t now) {
+    s_time_t delta;
+    unsigned int consume;
+
+    BUG_ON(svc != SS_CUR(svc->vcpu->processor));
+
+    if (svc->last_start_time == 0) {
+        svc->last_start_time = now;
+        return;
+    }
+
+    delta = now - svc->last_start_time;
+    BUG_ON(delta <= 0);
+    
+    consume = ( delta/BUDGET(1) );    
+    if ( delta%BUDGET(1) > BUDGET(1)/2 ) consume++;
+    if (consume > svc->cur_budget) {
+		printk("\n# into %s, consumed more than cur budget!\n", __func__);
+		consume = svc->cur_budget;
+    } 
+    
+	svc->cur_budget -= consume;
+    svc->burn_total += consume;
+}
+
+//init the physical cpu
+static int
+ss_pcpu_init(int cpu) {
+    struct ss_pcpu *spc;
+    unsigned long flags;
+
+    /* Allocate per-PCPU info */
+    spc = xmalloc(struct ss_pcpu);
+    if (spc == NULL)
+        return -1;
+    memset(spc, 0, sizeof (*spc));
+
+    spin_lock_irqsave(&ss_priv.lock, flags);
+
+    if (ss_priv.ncpus < cpu)
+        ss_priv.ncpus = cpu + 1;
+
+    init_timer(&spc->ticker, ss_tick, (void *) (unsigned long) cpu, cpu);
+    INIT_LIST_HEAD(&spc->runq);
+    per_cpu(schedule_data, cpu).sched_priv = spc;
+
+    BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
+
+    spc->rep_capacity = REPQ_CAPACITY;
+    spc->repq = xmalloc_array(struct rep_elem, spc->rep_capacity);
+    BUG_ON(spc->repq == NULL);
+    spc->rep_size = 0;
+
+    spin_unlock_irqrestore(&ss_priv.lock, flags);
+
+    printk("\n# finish %s, init cpu: %d\n", __func__, cpu);
+
+    return 0;
+}
+
+//check the vcpu
+static inline void
+__ss_vcpu_check(struct vcpu *vc) {
+    struct ss_vcpu * const svc = SS_VCPU(vc);
+    struct ss_dom * const sdom = svc->sdom;
+
+    BUG_ON(svc->vcpu != vc);
+    BUG_ON(sdom != SS_DOM(vc->domain));
+    if (sdom) {
+        BUG_ON(is_idle_vcpu(vc));
+        BUG_ON(sdom->dom != vc->domain);
+    } else {
+        BUG_ON(!is_idle_vcpu(vc));
+    }
+}
+#define SS_VCPU_CHECK(_vc)  (__ss_vcpu_check(_vc))
+
+//pick a cpu to run, used to migrate from different cpus
+static int
+ss_cpu_pick(struct vcpu *vc) {
+    cpumask_t cpus;
+    int cpu;
+
+    cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+
+	if (vc->domain->domain_id == 0 && vc->processor != 0) {
+		return cycle_cpu(vc->processor, cpus);
+	}
+
+    cpu = cpu_isset(vc->processor, cpus)
+            ? vc->processor
+            : cycle_cpu(vc->processor, cpus);
+
+    return cpu;
+}
+
+//check the current repQ to see if a repl needs to happen
+static int
+check_cpu_for_repl(int cpu) {
+//    int ret = 0;
+    struct ss_pcpu * spc = SS_PCPU(cpu);
+	int flag = 0;  //used for interrupt
+	int priority = SS_CUR(cpu)->period;   // current running vcpu's period
+
+    while((spc->rep_size != 0) && spc->repq[0].re_time < NOW()) {
+        spc->repq[0].svc->cur_budget += spc->repq[0].re_amount;
+        if (spc->repq[0].svc->cur_budget > spc->repq[0].svc->budget) {
+            //printk("\n# into %s, repl to more than init budget!\n", __func__);
+            spc->repq[0].svc->cur_budget = spc->repq[0].svc->budget;
+        }
+		if (flag == 0 && spc->repq[0].svc->period < priority) {
+			flag = 1;  // need interrupt
+		}
+        ss_repq_remove(cpu);
+//        ret = 1;
+    }
+	
+    return flag;
+}
+
+/*
+//if a repl happens, do we need an interrupt? (higher priority than current running one)
+static void
+check_runq_for_interrupt(int cpu) {
+    struct list_head * runq = RUNQ(cpu);
+    struct list_head * iter;
+    struct ss_vcpu * cur = SS_CUR(cpu);
+
+    list_for_each(iter, runq) {
+        struct ss_vcpu * iter_svc = __runq_elem(iter);
+        if (iter_svc->period >= cur->period) {
+            return;
+        } else if (iter_svc->cur_budget > 0) {
+            cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+        }
+    }
+}
+*/
+
+//init the virtual cpu
+static int
+ss_vcpu_init(struct vcpu *vc) {
+    struct domain * const dom = vc->domain;
+    struct ss_dom *sdom = SS_DOM(dom);
+    struct ss_vcpu *svc;
+
+    /* Allocate per-VCPU info */
+    svc = xmalloc(struct ss_vcpu);
+    if (svc == NULL) {
+        return -1;
+    }
+    memset(svc, 0, sizeof (*svc));
+
+    INIT_LIST_HEAD(&svc->runq_elem);
+    svc->sdom = sdom;
+    svc->vcpu = vc;
+    svc->budget = is_idle_vcpu(vc)? SS_IDLE_PERIOD: sdom->budget;
+	svc->period = is_idle_vcpu(vc)? SS_IDLE_PERIOD: sdom->period;
+    svc->cur_budget = svc->budget;
+
+    svc->last_start_time = 0;
+    svc->burn_total = 0;
+    svc->next_time = 0;
+    svc->status = IDLE;
+    vc->sched_priv = svc;
+
+    /* Allocate per-PCPU info */
+    if (unlikely(!SS_PCPU(vc->processor))) {
+        if (ss_pcpu_init(vc->processor) != 0)
+            return -1;
+    }
+
+    SS_VCPU_CHECK(vc);
+
+    printk("\n# into %s, vcpu init: ", __func__);
+    ss_dump_vcpu(svc);
+
+    return 0;
+}
+
+//destory the vcpu
+static void
+ss_vcpu_destroy(struct vcpu *vc) {
+    struct ss_vcpu * const svc = SS_VCPU(vc);
+    struct ss_dom * const sdom = svc->sdom;
+
+    printk("\n# into %s, vcpu destroy: ", __func__);
+    ss_dump_vcpu(svc);
+
+    BUG_ON(sdom == NULL);
+    BUG_ON(!list_empty(&svc->runq_elem));
+
+    xfree(svc);
+}
+
+//sleep the vcpu
+static void
+ss_vcpu_sleep(struct vcpu *vc) {
+    struct ss_vcpu * const svc = SS_VCPU(vc);
+
+/*
+    if (vc->domain->domain_id != 0) {
+    printk("\n# into %s: now %lu, sleep vcpu: \n", __func__, NOW());
+    ss_dump_vcpu(svc);
+    }
+*/
+    BUG_ON(is_idle_vcpu(vc));
+
+    if (per_cpu(schedule_data, vc->processor).curr == vc) {
+        cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ);
+    } else if (__vcpu_on_runq(svc)) {
+		//BUG_ON(svc->status == ACTIVE);
+        __runq_remove(svc);
+    }
+}
+
+//wake up the vcpu, insert it into runq, raise a softirq
+static void
+ss_vcpu_wake(struct vcpu *vc) {
+    struct ss_vcpu * const svc = SS_VCPU(vc);
+    const unsigned int cpu = vc->processor;
+
+    BUG_ON(is_idle_vcpu(vc));
+
+    if (unlikely(per_cpu(schedule_data, cpu).curr == vc)) {
+        //printk("\n# why wake up running? migration?\n");
+        return;
+    }
+    if (unlikely(__vcpu_on_runq(svc))) {
+        //printk("\n# why wake up on runq ones? migration?\n");
+        return;
+    }
+
+/*
+    if (smp_processor_id() == 1) {
+        printk("%s, domain %d, now %lu\n", __func__, vc->domain->domain_id, NOW()/1000000);
+    }
+*/  
+    __runq_insert(cpu, svc);
+	//if (svc->period < SS_CUR(cpu)->period)
+	cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+}
+
+//used for record data, for overhead measurement
+static void
+ss_start_timer(void * temp) { 
+	start_flag = 1;
+}
+
+//adjust the domain's budget & period, also used to trigger the record
+static int
+ss_dom_cntl(struct domain *d, struct xen_domctl_scheduler_op *op) {
+	struct ss_dom * const sdom = SS_DOM(d);
+	unsigned long flags;
+	struct ss_vcpu *svc = SS_VCPU(d->vcpu[0]);
+
+    if (op->cmd == XEN_DOMCTL_SCHEDOP_getinfo) {
+		op->u.ss.budget = sdom->budget;
+		op->u.ss.period = sdom->period;
+		//ss_dump_vcpu(svc);
+	} else {
+		BUG_ON(op->cmd != XEN_DOMCTL_SCHEDOP_putinfo);
+		
+		spin_lock_irqsave(&ss_priv.lock, flags);
+		if (op->u.ss.budget != 0) {
+			sdom->budget = op->u.ss.budget;
+			svc->budget = op->u.ss.budget;
+		}
+		if (op->u.ss.period != 0) {
+			sdom->period = op->u.ss.period;
+			svc->period = op->u.ss.period;
+		}
+		svc->cur_budget = svc->budget;
+		spin_unlock_irqrestore(&ss_priv.lock, flags);
+
+		if (svc->vcpu->domain->domain_id == 0) {
+		    printk("into %s, start to record now!\n", __func__);
+			init_timer(&start_timer, ss_start_timer, (void *) (unsigned int) 1, 1);
+			set_timer(&start_timer, NOW() + MILLISECS(10000));
+		}
+
+		//ss_dump_vcpu(svc);
+	}
+
+    return 0;
+}
+
+//init a dom
+static int
+ss_dom_init(struct domain *dom) {
+    struct ss_dom *sdom;
+
+    printk("\n# into %s, domain id is: %d\n", __func__, dom->domain_id);
+
+    if (is_idle_domain(dom)) {
+        printk("\t# init an idle domain\n");
+        return 0;
+    }
+
+    sdom = xmalloc(struct ss_dom);
+    if (sdom == NULL)
+        return -ENOMEM;
+    memset(sdom, 0, sizeof (*sdom));
+
+    /* Initialize budget and period */
+    sdom->dom = dom;
+
+	switch(dom->domain_id) {
+        case 32767:
+            sdom->budget = SS_IDLE_PERIOD;
+			sdom->period = SS_IDLE_PERIOD;
+			break;
+		case 0:
+			sdom->budget = SS_DOM_0_PERIOD;
+			sdom->period = SS_DOM_0_PERIOD;
+			break;	
+		default:
+			sdom->budget = SS_DOM_BUDGET;
+			sdom->period = SS_DOM_PERIOD;
+			break;
+	}
+
+    dom->sched_priv = sdom;
+
+    return 0;
+}
+
+//destory a domain
+static void
+ss_dom_destroy(struct domain *dom) {
+    printk("\n# into %s, destroy domain: %d\n", __func__, dom->domain_id);
+    xfree(SS_DOM(dom));
+}
+
+//ticked by pcpu tick in pcpu.
+static void
+ss_tick(void *_cpu) {
+    unsigned int cpu = (unsigned long) _cpu;
+    struct ss_pcpu *spc = SS_PCPU(cpu);
+
+    BUG_ON(current->processor != cpu);
+
+    if (check_cpu_for_repl(cpu)) {
+//        check_runq_for_interrupt(cpu);
+		cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+
+    if (ss_cpu_pick(current) != cpu) {
+        set_bit(_VPF_migrating, &current->pause_flags);
+        cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+    }
+
+    set_timer(&spc->ticker, NOW() + BUDGET(1));
+}
+
+// most important function, called every budget time
+static struct task_slice
+ss_schedule(s_time_t now) {
+    const int cpu = smp_processor_id();
+    struct list_head *runq = RUNQ(cpu);
+    struct ss_vcpu *scurr = SS_VCPU(current);
+    struct ss_vcpu *snext;
+    struct task_slice ret;
+	int re_amount;
+
+    SS_VCPU_CHECK(current);
+
+// for record
+	if (smp_processor_id() == 1 && start_flag == 1) {
+        if(is_idle_vcpu(scurr->vcpu)) res[idx].curr = 10;
+        else    res[idx].curr = scurr->vcpu->domain->domain_id;
+        res[idx].enter_base = NOW();
+        res[idx].leave_base = NOW();
+        res[idx].enter = NOW();
+	} 
+
+    if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) {
+    //if (!is_idle_vcpu(scurr->vcpu)) {
+    	burn_budgets(scurr, now);
+    }
+
+    if (vcpu_runnable(current)) {
+        __runq_insert(cpu, scurr); 
+    } else {
+        BUG_ON(is_idle_vcpu(current) || list_empty(runq));
+    }
+
+    snext = __runq_pick(cpu);
+
+    __runq_remove(snext);
+
+//context switch do happen!, and snext is not an idle vcpu
+    if (cpu == 1 && snext != scurr) {
+	//if (snext != scurr) {
+		//for the scurr:
+		//if (!is_idle_vcpu(scurr->vcpu) && scurr->vcpu->domain->domain_id != 0) {
+		if (!is_idle_vcpu(scurr->vcpu)) {
+		    BUG_ON(scurr->status != ACTIVE);
+		    BUG_ON(scurr->next_time == 0);
+    		scurr->status = IDLE;
+	    	re_amount = scurr->burn_total;
+	    	scurr->burn_total = 0;
+	    	//printk("\n# into %s, change status to IDLE, decide repl amount here! now is %lu, for vcpu[%d, %d], re_amount is: %d, re_time is %lu\n",
+	    	//	__func__, NOW(), scurr->vcpu->domain->domain_id, scurr->vcpu->vcpu_id, re_amount, scurr->next_time);
+	    	ss_repq_insert(cpu, scurr, re_amount);
+		}
+		
+		//for the snext:
+		//if (!is_idle_vcpu(snext->vcpu) && snext->vcpu->domain->domain_id != 0) {
+    	if (!is_idle_vcpu(snext->vcpu)) {
+			if (snext->status == IDLE) {
+    		    BUG_ON(snext->next_time != 0);
+    		    snext->status = ACTIVE;
+    		    snext->next_time = NOW() + BUDGET(snext->period);
+    		    //printk("\n# into %s, change status to ACTIVE, decide repl time here! now is %lu, for vcpu [%d, %d], re_time is %lu\n",
+                //    __func__, NOW(), snext->vcpu->domain->domain_id, snext->vcpu->vcpu_id, snext->next_time);
+            }
+        }
+
+        //scan the whole runq
+        ss_scan_runq(cpu, snext);
+    }
+
+    if (cpu == 1 && snext->vcpu->domain->domain_id != 0) {
+    	snext->last_start_time = NOW();
+    }
+
+    ret.time = (is_idle_vcpu(snext->vcpu) ? -1 : BUDGET(1));
+    //ret.time = BUDGET(1);
+    ret.task = snext->vcpu;
+
+    SS_VCPU_CHECK(ret.task);
+
+    BUG_ON(!vcpu_runnable(snext->vcpu));
+    //printk("now is %lu\n", now);
+
+// for record
+	if (smp_processor_id() == 1 && start_flag == 1) {
+        if(is_idle_vcpu(snext->vcpu)) res[idx].next = 10;
+        else    res[idx].next = snext->vcpu->domain->domain_id;
+        res[idx].leave = NOW();
+        if(idx++ >= RECORD) {
+            ss_dump_record();
+        }
+	} 
+
+    return ret;
+}
+
+//init the global data
+static void
+ss_init(void) {
+    printk("\n# into %s\n", __func__);
+    spin_lock_init(&ss_priv.lock);
+    ss_priv.ncpus = 0;
+}
+
+/* Tickers cannot be kicked until SMP subsystem is alive. */
+static __init int 
+ss_start_tickers(void) {
+    struct ss_pcpu *spc;
+    unsigned int cpu;
+
+    printk("\n# into %s, start all tickers right now\n", __func__);
+
+    if (ss_priv.ncpus == 0)
+        return 0;
+
+    for_each_online_cpu(cpu) {
+        spc = SS_PCPU(cpu);
+        set_timer(&spc->ticker, NOW() + BUDGET(1));
+    }
+
+    return 0;
+}
+__initcall(ss_start_tickers);
+
+static void ss_tick_suspend(void) {
+    struct ss_pcpu *spc;
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    spc = SS_PCPU(smp_processor_id());
+
+    stop_timer(&spc->ticker);
+}
+
+static void ss_tick_resume(void) {
+    struct ss_pcpu *spc;
+    uint64_t now = NOW();
+
+    printk("\n# into %s, why is this called?\n", __func__);
+
+    spc = SS_PCPU(smp_processor_id());
+
+    set_timer(&spc->ticker, now + BUDGET(1));
+}
+
+const struct scheduler sched_ss_def = {
+    .name = "Sporadic Server Scheduler",
+    .opt_name = "ss",
+    .sched_id = XEN_SCHEDULER_SS,
+
+    .init_domain = ss_dom_init,
+    .destroy_domain = ss_dom_destroy,
+
+    .init_vcpu = ss_vcpu_init,
+    .destroy_vcpu = ss_vcpu_destroy,
+
+    .init = ss_init,
+
+    .pick_cpu = ss_cpu_pick,
+
+    .tick_suspend = ss_tick_suspend,
+    .tick_resume = ss_tick_resume,
+
+    .do_schedule = ss_schedule,
+
+    .sleep = ss_vcpu_sleep,
+    .wake = ss_vcpu_wake,
+
+    .adjust = ss_dom_cntl,
+
+    .dump_cpu_state = ss_dump_pcpu,
+    .dump_settings = ss_dump,
+};
diff -ubrN xen/xen-4.0.1/xen/common/schedule.c xen-4.0.1/xen/common/schedule.c
--- xen/xen-4.0.1/xen/common/schedule.c	2010-08-25 04:22:12.000000000 -0600
+++ xen-4.0.1/xen/common/schedule.c	2011-04-24 15:43:52.000000000 -0600
@@ -34,8 +34,8 @@
 #include <public/sched.h>
 #include <xsm/xsm.h>
 
-/* opt_sched: scheduler - default to credit */
-static char __initdata opt_sched[10] = "credit";
+/* opt_sched: scheduler - default to rt */
+static char __initdata opt_sched[10] = "rt";
 string_param("sched", opt_sched);
 
 /* if sched_smt_power_savings is set,
@@ -56,12 +56,32 @@
 
 extern const struct scheduler sched_sedf_def;
 extern const struct scheduler sched_credit_def;
+// added by Sisu Xi
+extern const struct scheduler sched_rt_def;
 static const struct scheduler *__initdata schedulers[] = {
     &sched_sedf_def,
     &sched_credit_def,
+    &sched_rt_def,
     NULL
 };
 
+//for record
+#define RECORD 15000
+struct record_elem{
+    int processor;   // 1: idle to busy, 2: busy to idle, 3: busy to busy(dif), 4: same
+    int curr;
+    int next;
+    s_time_t dur_sub;
+    s_time_t dur;
+};
+
+//int sched_idx;
+int sched_start_flag = 0;  // to record data
+struct timer sched_start_timer;
+s_time_t temp_dur_sub;
+s_time_t temp_dur;
+//finish record
+
 static struct scheduler __read_mostly ops;
 
 #define SCHED_OP(fn, ...)                                 \
@@ -777,6 +797,37 @@
     return ops.sched_id;
 }
 
+//for record
+static void
+record_finish_timer(void * temp) {
+//    int i = 0;
+
+    sched_start_flag = 0;
+/*
+    for (i = 0; i < sched_idx; i++) {
+        printk("%d %5d %5d %7lu %7lu\n", sched_res[i].processor, sched_res[i].curr, sched_res[i].next, sched_res[i].dur_sub, sched_res[i].dur);
+    }
+
+    for (i = 0; i < RECORD; i++) {
+        sched_res[i].processor = 0;
+        sched_res[i].curr = 0;
+        sched_res[i].next = 0;
+        sched_res[i].dur_sub = 0;
+        sched_res[i].dur = 0;
+    }
+
+    sched_idx = 0;
+*/
+}
+
+static void
+record_start_timer(void * temp) {
+    sched_start_flag = 1;
+    init_timer(&sched_start_timer, record_finish_timer, (void *) (unsigned int) 1, 1);
+    set_timer(&sched_start_timer, NOW() + MILLISECS(10000));
+}
+//finish recording
+
 /* Adjust scheduling parameter for a given domain. */
 long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
 {
@@ -810,9 +861,17 @@
     if ( d == current->domain )
         vcpu_schedule_lock_irq(current);
 
-    if ( (ret = SCHED_OP(adjust, d, op)) == 0 )
+    if ( (ret = SCHED_OP(adjust, d, op)) >= 0 )
         TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
 
+//trigger recording!!
+    if (ret == 1) {
+    	printk("start!\n");
+        init_timer(&sched_start_timer, record_start_timer, (void *) (unsigned int) 1, 1);
+        set_timer(&sched_start_timer, NOW() + MILLISECS(5000));
+        ret = 0;
+    }
+
     if ( d == current->domain )
         vcpu_schedule_unlock_irq(current);
 
@@ -860,6 +919,11 @@
     struct schedule_data *sd;
     struct task_slice     next_slice;
 
+//record
+    if (prev->processor == 1 && sched_start_flag == 1) {
+        temp_dur = now;
+    }
+
     ASSERT(!in_irq());
     ASSERT(this_cpu(mc_state).flags == 0);
 
@@ -871,8 +935,16 @@
 
     stop_timer(&sd->s_timer);
     
+//record
+    if (prev->processor == 1 && sched_start_flag == 1) {
+        temp_dur_sub = NOW();
+    }
     /* get policy-specific decision on scheduling... */
     next_slice = ops.do_schedule(now);
+    if (prev->processor == 1 && sched_start_flag == 1) {
+    	printk("%7lu ", NOW() - temp_dur_sub);
+        //sched_res[sched_idx].dur_sub = NOW() - temp_dur_sub;
+    }
 
     next = next_slice.task;
 
@@ -881,10 +953,19 @@
     if ( next_slice.time >= 0 ) /* -ve means no limit */
         set_timer(&sd->s_timer, now + next_slice.time);
 
+    if (prev->processor == 1 && sched_start_flag == 1) {
+        printk("%7d %7d %13lu ", prev->domain->domain_id, next->domain->domain_id, NOW());
+    }
+
     if ( unlikely(prev == next) )
     {
         spin_unlock_irq(&sd->schedule_lock);
         trace_continue_running(next);
+        if (prev->processor == 1 && sched_start_flag == 1) {
+			printk("%13lu\n", NOW());
+            //sched_res[sched_idx].dur = NOW() - temp_dur;
+            //sched_idx++;
+        }
         return continue_running(prev);
     }
 
@@ -931,7 +1012,11 @@
     update_vcpu_system_time(next);
     vcpu_periodic_timer_work(next);
 
-    context_switch(prev, next);
+    if (prev->processor == 1) {
+    	context_switch(sched_start_flag, prev, next);
+    } else {
+		context_switch(0, prev, next);
+    }
 }
 
 void context_saved(struct vcpu *prev)
diff -ubrN xen/xen-4.0.1/xen/drivers/char/console.c xen-4.0.1/xen/drivers/char/console.c
--- xen/xen-4.0.1/xen/drivers/char/console.c	2010-08-25 04:22:12.000000000 -0600
+++ xen-4.0.1/xen/drivers/char/console.c	2011-01-15 10:57:46.000000000 -0700
@@ -63,7 +63,9 @@
 static uint32_t __initdata opt_conring_size;
 size_param("conring_size", opt_conring_size);
 
-#define _CONRING_SIZE 16384
+//#define _CONRING_SIZE 16384
+//Sisu xi
+#define _CONRING_SIZE 1638400
 #define CONRING_IDX_MASK(i) ((i)&(conring_size-1))
 static char __initdata _conring[_CONRING_SIZE];
 static char *__read_mostly conring = _conring;
diff -ubrN xen/xen-4.0.1/xen/include/public/domctl.h xen-4.0.1/xen/include/public/domctl.h
--- xen/xen-4.0.1/xen/include/public/domctl.h	2010-08-25 04:22:14.000000000 -0600
+++ xen-4.0.1/xen/include/public/domctl.h	2011-04-24 15:51:25.000000000 -0600
@@ -303,6 +303,9 @@
 /* Scheduler types. */
 #define XEN_SCHEDULER_SEDF     4
 #define XEN_SCHEDULER_CREDIT   5
+// added by Sisu Xi
+#define XEN_SCHEDULER_RT       7
+
 /* Set or get info? */
 #define XEN_DOMCTL_SCHEDOP_putinfo 0
 #define XEN_DOMCTL_SCHEDOP_getinfo 1
@@ -321,6 +324,12 @@
             uint16_t weight;
             uint16_t cap;
         } credit;
+        // added by Sisu Xi
+        struct xen_domctl_sched_rt {
+            uint16_t budget;
+            uint16_t period;
+            uint16_t level;
+        } rt;
     } u;
 };
 typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;
diff -ubrN xen/xen-4.0.1/xen/include/xen/sched.h xen-4.0.1/xen/include/xen/sched.h
--- xen/xen-4.0.1/xen/include/xen/sched.h	2010-08-25 04:22:14.000000000 -0600
+++ xen-4.0.1/xen/include/xen/sched.h	2011-01-18 00:58:43.000000000 -0700
@@ -492,6 +492,7 @@
  * sync_vcpu_execstate() will switch and commit @prev's state.
  */
 void context_switch(
+	int flag,
     struct vcpu *prev, 
     struct vcpu *next);