From 800f879a77716ad833d229ccc058e700c698b039 Mon Sep 17 00:00:00 2001
From: Aaron Taylor <ataylor@frostburg.subgeniuskitty.com>
Date: Sun, 25 Apr 2021 18:24:22 -0700
Subject: [PATCH] Initial commit of files contained in
 `mpss-modules-3.8.6.tar.bz2` for Intel Xeon Phi.

---
 .mpss-metadata                     |    2 +
 COPYING                            |  339 +++
 Kbuild                             |  106 +
 Makefile                           |  106 +
 dma/Kbuild                         |    5 +
 dma/mic_dma_lib.c                  | 1792 ++++++++++++++
 dma/mic_dma_md.c                   |  522 +++++
 dma/mic_sbox_md.c                  |   57 +
 host/Makefile                      |   47 +
 host/acptboot.c                    |  194 ++
 host/ioctl.c                       |  186 ++
 host/linpm.c                       |  232 ++
 host/linpsmi.c                     |  152 ++
 host/linscif_host.c                |  315 +++
 host/linsysfs.c                    |  766 ++++++
 host/linux.c                       |  796 +++++++
 host/linvcons.c                    |  687 ++++++
 host/linvnet.c                     |  802 +++++++
 host/micpsmi.c                     |  184 ++
 host/micscif_pm.c                  | 1062 +++++++++
 host/pm_ioctl.c                    |  603 +++++
 host/pm_pcstate.c                  | 1107 +++++++++
 host/tools_support.c               |  978 ++++++++
 host/uos_download.c                | 1950 ++++++++++++++++
 host/vhost/mic_blk.c               |  665 ++++++
 host/vhost/mic_vhost.c             |  697 ++++++
 host/vhost/vhost.h                 |  261 +++
 host/vmcore.c                      |  821 +++++++
 include/mic/bootparams.h           |  170 ++
 include/mic/compl_buf_ring.h       |  220 ++
 include/mic/io_interface.h         |  217 ++
 include/mic/mic_dma_api.h          |  170 ++
 include/mic/mic_dma_lib.h          |  207 ++
 include/mic/mic_dma_md.h           |  462 ++++
 include/mic/mic_macaddr.h          |  104 +
 include/mic/mic_pm.h               |  442 ++++
 include/mic/mic_sbox_md.h          |   90 +
 include/mic/mic_virtio.h           |   70 +
 include/mic/micbaseaddressdefine.h |  111 +
 include/mic/micdboxdefine.h        |   48 +
 include/mic/micpsmi.h              |   62 +
 include/mic/micsboxdefine.h        |  255 ++
 include/mic/micscif.h              |  900 ++++++++
 include/mic/micscif_intr.h         |   52 +
 include/mic/micscif_kmem_cache.h   |   62 +
 include/mic/micscif_map.h          |  276 +++
 include/mic/micscif_nm.h           |  234 ++
 include/mic/micscif_nodeqp.h       |  200 ++
 include/mic/micscif_rb.h           |  170 ++
 include/mic/micscif_rma.h          |  960 ++++++++
 include/mic/micscif_rma_list.h     |  151 ++
 include/mic/micscif_smpt.h         |  120 +
 include/mic/micscif_va_gen.h       |   86 +
 include/mic/micscif_va_node.h      |  115 +
 include/mic/micvcons.h             |  164 ++
 include/mic/micveth.h              |  145 ++
 include/mic/micveth_common.h       |   69 +
 include/mic/micveth_dma.h          |  279 +++
 include/mic/ringbuffer.h           |  195 ++
 include/mic_common.h               |  769 ++++++
 include/mic_interrupts.h           |  118 +
 include/micint.h                   |  114 +
 include/scif.h                     | 1743 ++++++++++++++
 include/scif_ioctl.h               |  225 ++
 mic.conf                           |   32 +
 mic.modules                        |    5 +
 micscif/Kbuild                     |   21 +
 micscif/micscif_api.c              | 3464 ++++++++++++++++++++++++++++
 micscif/micscif_debug.c            | 1005 ++++++++
 micscif/micscif_fd.c               |  528 +++++
 micscif/micscif_intr.c             |  159 ++
 micscif/micscif_main.c             |  606 +++++
 micscif/micscif_nm.c               | 1740 ++++++++++++++
 micscif/micscif_nodeqp.c           | 2902 +++++++++++++++++++++++
 micscif/micscif_ports.c            |  376 +++
 micscif/micscif_rb.c               |  372 +++
 micscif/micscif_rma.c              | 2633 +++++++++++++++++++++
 micscif/micscif_rma_dma.c          |  982 ++++++++
 micscif/micscif_rma_list.c         |  533 +++++
 micscif/micscif_select.c           |  446 ++++
 micscif/micscif_smpt.c             |  457 ++++
 micscif/micscif_sysfs.c            |  234 ++
 micscif/micscif_va_gen.c           |  480 ++++
 micscif/micscif_va_node.c          |  187 ++
 mpssboot/Kbuild                    |    1 +
 mpssboot/mpssboot.c                |  238 ++
 pm_scif/Kbuild                     |    1 +
 pm_scif/pm_scif.c                  |  439 ++++
 pm_scif/pm_scif.h                  |   48 +
 ramoops/Kbuild                     |    1 +
 ramoops/ramoops.c                  |  163 ++
 ras/Kbuild                         |    6 +
 ras/Makefile                       |  210 ++
 ras/micmca_api.h                   |  135 ++
 ras/micpm_api.h                    |  307 +++
 ras/micras.h                       |  536 +++++
 ras/micras_api.h                   | 1006 ++++++++
 ras/micras_common.c                |  968 ++++++++
 ras/micras_core.c                  |  973 ++++++++
 ras/micras_elog.c                  | 3136 +++++++++++++++++++++++++
 ras/micras_knc.c                   | 2794 ++++++++++++++++++++++
 ras/micras_knf.c                   | 1432 ++++++++++++
 ras/micras_main.c                  | 2650 +++++++++++++++++++++
 ras/micras_pm.c                    | 1050 +++++++++
 ras/micras_uncore.c                | 1194 ++++++++++
 ras/monahan.h                      |  201 ++
 trace_capture/Kbuild               |    1 +
 trace_capture/Makefile             |   34 +
 trace_capture/docapture.c          |   70 +
 trace_capture/tc_host.c            |  366 +++
 trace_capture/tc_memcvt.c          |   85 +
 trace_capture/trace_capture.c      | 2031 ++++++++++++++++
 trace_capture/trace_capture.h      |  245 ++
 udev-mic.rules                     |    9 +
 vcons/Kbuild                       |    3 +
 vcons/hvc_console.h                |  119 +
 vcons/hvc_mic.c                    |  341 +++
 virtio/Kbuild                      |    2 +
 virtio/mic_virtblk.c               |  862 +++++++
 vnet/Kbuild                        |    3 +
 vnet/mic.h                         |  108 +
 vnet/micveth.c                     |  869 +++++++
 vnet/micveth_dma.c                 | 1642 +++++++++++++
 vnet/micveth_param.c               |   95 +
 124 files changed, 66745 insertions(+)
 create mode 100644 .mpss-metadata
 create mode 100644 COPYING
 create mode 100644 Kbuild
 create mode 100644 Makefile
 create mode 100644 dma/Kbuild
 create mode 100644 dma/mic_dma_lib.c
 create mode 100644 dma/mic_dma_md.c
 create mode 100644 dma/mic_sbox_md.c
 create mode 100644 host/Makefile
 create mode 100644 host/acptboot.c
 create mode 100644 host/ioctl.c
 create mode 100644 host/linpm.c
 create mode 100644 host/linpsmi.c
 create mode 100644 host/linscif_host.c
 create mode 100644 host/linsysfs.c
 create mode 100644 host/linux.c
 create mode 100644 host/linvcons.c
 create mode 100644 host/linvnet.c
 create mode 100644 host/micpsmi.c
 create mode 100644 host/micscif_pm.c
 create mode 100644 host/pm_ioctl.c
 create mode 100644 host/pm_pcstate.c
 create mode 100644 host/tools_support.c
 create mode 100644 host/uos_download.c
 create mode 100644 host/vhost/mic_blk.c
 create mode 100644 host/vhost/mic_vhost.c
 create mode 100644 host/vhost/vhost.h
 create mode 100644 host/vmcore.c
 create mode 100644 include/mic/bootparams.h
 create mode 100644 include/mic/compl_buf_ring.h
 create mode 100644 include/mic/io_interface.h
 create mode 100644 include/mic/mic_dma_api.h
 create mode 100644 include/mic/mic_dma_lib.h
 create mode 100644 include/mic/mic_dma_md.h
 create mode 100644 include/mic/mic_macaddr.h
 create mode 100644 include/mic/mic_pm.h
 create mode 100644 include/mic/mic_sbox_md.h
 create mode 100644 include/mic/mic_virtio.h
 create mode 100644 include/mic/micbaseaddressdefine.h
 create mode 100644 include/mic/micdboxdefine.h
 create mode 100644 include/mic/micpsmi.h
 create mode 100644 include/mic/micsboxdefine.h
 create mode 100644 include/mic/micscif.h
 create mode 100644 include/mic/micscif_intr.h
 create mode 100644 include/mic/micscif_kmem_cache.h
 create mode 100644 include/mic/micscif_map.h
 create mode 100644 include/mic/micscif_nm.h
 create mode 100644 include/mic/micscif_nodeqp.h
 create mode 100644 include/mic/micscif_rb.h
 create mode 100644 include/mic/micscif_rma.h
 create mode 100644 include/mic/micscif_rma_list.h
 create mode 100644 include/mic/micscif_smpt.h
 create mode 100644 include/mic/micscif_va_gen.h
 create mode 100644 include/mic/micscif_va_node.h
 create mode 100644 include/mic/micvcons.h
 create mode 100644 include/mic/micveth.h
 create mode 100644 include/mic/micveth_common.h
 create mode 100644 include/mic/micveth_dma.h
 create mode 100644 include/mic/ringbuffer.h
 create mode 100644 include/mic_common.h
 create mode 100644 include/mic_interrupts.h
 create mode 100644 include/micint.h
 create mode 100644 include/scif.h
 create mode 100644 include/scif_ioctl.h
 create mode 100644 mic.conf
 create mode 100755 mic.modules
 create mode 100644 micscif/Kbuild
 create mode 100644 micscif/micscif_api.c
 create mode 100644 micscif/micscif_debug.c
 create mode 100644 micscif/micscif_fd.c
 create mode 100644 micscif/micscif_intr.c
 create mode 100644 micscif/micscif_main.c
 create mode 100644 micscif/micscif_nm.c
 create mode 100644 micscif/micscif_nodeqp.c
 create mode 100644 micscif/micscif_ports.c
 create mode 100644 micscif/micscif_rb.c
 create mode 100644 micscif/micscif_rma.c
 create mode 100644 micscif/micscif_rma_dma.c
 create mode 100644 micscif/micscif_rma_list.c
 create mode 100644 micscif/micscif_select.c
 create mode 100644 micscif/micscif_smpt.c
 create mode 100644 micscif/micscif_sysfs.c
 create mode 100644 micscif/micscif_va_gen.c
 create mode 100644 micscif/micscif_va_node.c
 create mode 100644 mpssboot/Kbuild
 create mode 100644 mpssboot/mpssboot.c
 create mode 100644 pm_scif/Kbuild
 create mode 100644 pm_scif/pm_scif.c
 create mode 100644 pm_scif/pm_scif.h
 create mode 100644 ramoops/Kbuild
 create mode 100644 ramoops/ramoops.c
 create mode 100644 ras/Kbuild
 create mode 100644 ras/Makefile
 create mode 100644 ras/micmca_api.h
 create mode 100644 ras/micpm_api.h
 create mode 100644 ras/micras.h
 create mode 100644 ras/micras_api.h
 create mode 100644 ras/micras_common.c
 create mode 100644 ras/micras_core.c
 create mode 100644 ras/micras_elog.c
 create mode 100644 ras/micras_knc.c
 create mode 100644 ras/micras_knf.c
 create mode 100644 ras/micras_main.c
 create mode 100644 ras/micras_pm.c
 create mode 100644 ras/micras_uncore.c
 create mode 100644 ras/monahan.h
 create mode 100644 trace_capture/Kbuild
 create mode 100644 trace_capture/Makefile
 create mode 100644 trace_capture/docapture.c
 create mode 100644 trace_capture/tc_host.c
 create mode 100644 trace_capture/tc_memcvt.c
 create mode 100644 trace_capture/trace_capture.c
 create mode 100644 trace_capture/trace_capture.h
 create mode 100644 udev-mic.rules
 create mode 100644 vcons/Kbuild
 create mode 100644 vcons/hvc_console.h
 create mode 100644 vcons/hvc_mic.c
 create mode 100644 virtio/Kbuild
 create mode 100644 virtio/mic_virtblk.c
 create mode 100644 vnet/Kbuild
 create mode 100644 vnet/mic.h
 create mode 100644 vnet/micveth.c
 create mode 100644 vnet/micveth_dma.c
 create mode 100644 vnet/micveth_param.c

diff --git a/.mpss-metadata b/.mpss-metadata
new file mode 100644
index 0000000..66c84b0
--- /dev/null
+++ b/.mpss-metadata
@@ -0,0 +1,2 @@
+3.8.6-1
+e8ef53c4fa26582ac37b5e0101b7451a70263f6c
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..d159169
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/Kbuild b/Kbuild
new file mode 100644
index 0000000..f56e01c
--- /dev/null
+++ b/Kbuild
@@ -0,0 +1,106 @@
+not-y := n
+not-n := y
+m-not-y := n
+m-not-n := m
+
+ifeq ($(CONFIG_X86_MICPCI),)
+CONFIG_X86_MICPCI := n
+endif
+ifeq ($(CONFIG_X86_MICPCI)$(MIC_CARD_ARCH),n)
+$(error building for host, but $$(MIC_CARD_ARCH) is unset)
+endif
+ifneq ($(MIC_CARD_ARCH),$(firstword $(filter l1om k1om,$(MIC_CARD_ARCH))))
+$(error $$(MIC_CARD_ARCH) must be l1om or k1om)
+endif
+
+# Force optimization to -O2 in case the kernel was configured to use
+# -Os. The main reason is pretty dumb -- -Os has a warning -O2 doesn't,
+# and we compile with -Werror internally. Another reason is that -O2 is
+# what we're used to in terms of validation and performance analysis. We
+# should probably get rid of this, though.
+subdir-ccflags-y += -O2
+
+# Makes it easy to inject "-Werror" from the environment
+subdir-ccflags-y += $(KERNWARNFLAGS)
+
+# Bake some information about who built the module(s), and what version
+# of the source code they started with. Possibly useful during debug.
+subdir-ccflags-y += -DBUILD_NUMBER=\"'$(MPSS_BUILDNO)'\"
+subdir-ccflags-y += -DBUILD_BYWHOM=\"'$(MPSS_BUILTBY)'\"
+subdir-ccflags-y += -DBUILD_ONDATE=\"'$(MPSS_BUILTON)'\"
+subdir-ccflags-y += -DBUILD_SCMVER=\"'$(MPSS_COMMIT)'\"
+subdir-ccflags-y += -DBUILD_VERSION=\"'$(or $(MPSS_VERSION),0.0) ($(MPSS_BUILTBY))'\"
+
+# Code common with the host mustn't use CONFIG_M[LK]1OM directly.
+# But of course it does anyway. Arrgh.
+subdir-ccflags-$(CONFIG_ML1OM) += -DMIC_IS_L1OM
+subdir-ccflags-$(CONFIG_MK1OM) += -DMIC_IS_K1OM
+ifeq ($(MIC_CARD_ARCH),l1om)
+subdir-ccflags-y += -DMIC_IS_L1OM -DCONFIG_ML1OM
+endif
+ifeq ($(MIC_CARD_ARCH),k1om)
+subdir-ccflags-y += -DMIC_IS_K1OM -DCONFIG_MK1OM
+endif
+
+# a shorthand for "runs on the card"?
+subdir-ccflags-$(CONFIG_X86_MICPCI) += -D_MIC_SCIF_
+
+# "runs on the host"
+subdir-ccflags-$(not-$(CONFIG_X86_MICPCI)) += -DHOST -DUSE_VCONSOLE
+
+# always set? what's this thing's purpose?
+subdir-ccflags-y += -D__LINUX_GPL__ -D_MODULE_SCIF_
+
+subdir-ccflags-y += -I$(M)/include
+
+obj-$(CONFIG_X86_MICPCI) += dma/ micscif/ pm_scif/ ras/
+obj-$(CONFIG_X86_MICPCI) += vcons/ vnet/ mpssboot/ ramoops/ virtio/
+
+obj-$(m-not-$(CONFIG_X86_MICPCI)) += mic.o
+
+mic-objs :=
+mic-objs += dma/mic_dma_lib.o
+mic-objs += dma/mic_dma_md.o
+mic-objs += host/acptboot.o
+mic-objs += host/ioctl.o
+mic-objs += host/linpm.o
+mic-objs += host/linpsmi.o
+mic-objs += host/linscif_host.o
+mic-objs += host/linsysfs.o
+mic-objs += host/linux.o
+mic-objs += host/linvcons.o
+mic-objs += host/linvnet.o
+mic-objs += host/micpsmi.o
+mic-objs += host/micscif_pm.o
+mic-objs += host/pm_ioctl.o
+mic-objs += host/pm_pcstate.o
+mic-objs += host/tools_support.o
+mic-objs += host/uos_download.o
+mic-objs += host/vhost/mic_vhost.o
+mic-objs += host/vhost/mic_blk.o
+mic-objs += host/vmcore.o
+mic-objs += micscif/micscif_api.o
+mic-objs += micscif/micscif_debug.o
+mic-objs += micscif/micscif_fd.o
+mic-objs += micscif/micscif_intr.o
+mic-objs += micscif/micscif_nm.o
+mic-objs += micscif/micscif_nodeqp.o
+mic-objs += micscif/micscif_ports.o
+mic-objs += micscif/micscif_rb.o
+mic-objs += micscif/micscif_rma_dma.o
+mic-objs += micscif/micscif_rma_list.o
+mic-objs += micscif/micscif_rma.o
+mic-objs += micscif/micscif_select.o
+mic-objs += micscif/micscif_smpt.o
+mic-objs += micscif/micscif_sysfs.o
+mic-objs += micscif/micscif_va_gen.o
+mic-objs += micscif/micscif_va_node.o
+mic-objs += vnet/micveth_dma.o
+mic-objs += vnet/micveth_param.o
+
+version-le = $(shell printf '%s\n' $(1) | sort -t. -k 1,1n -k 2,2n -k 3,3n -k 4,4n -c >/dev/null 2>&1 && echo t)
+ifeq ($(call version-le, 2.6.23 $(KERNELRELEASE)),t)
+ccflags-y += $(mic-cflags)
+else
+$(error building against kernels <= 2.6.23 is broken)
+endif
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..fa50e3d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,106 @@
+# Copyright 2010-2017 Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License, version 2,
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# Disclaimer: The codes contained in these modules may be specific to
+# the Intel Software Development Platform codenamed Knights Ferry,
+# and the Intel product codenamed Knights Corner, and are not backward
+# compatible with other Intel products. Additionally, Intel will NOT
+# support the codes or instruction set in future products.
+#
+# Intel offers no warranty of any kind regarding the code. This code is
+# licensed on an "AS IS" basis and Intel is not obligated to provide
+# any support, assistance, installation, training, or other services
+# of any kind. Intel is also not obligated to provide any updates,
+# enhancements or extensions. Intel specifically disclaims any warranty
+# of merchantability, non-infringement, fitness for any particular
+# purpose, and any other warranty.
+#
+# Further, Intel disclaims all liability of any kind, including but
+# not limited to liability for infringement of any proprietary rights,
+# relating to the use of the code, even if Intel is notified of the
+# possibility of such liability. Except as expressly stated in an Intel
+# license agreement provided with this code and agreed upon with Intel,
+# no license, express or implied, by estoppel or otherwise, to any
+# intellectual property rights is granted herein.
+
+MPSS_COMMIT ?= $(or $(shell sed -ne '2 p' .mpss-metadata 2>/dev/null), \
+	$(error .mpss-metadata file is missing or incorrect))
+MPSS_VERSION ?= $(or $(shell sed -ne '1 p' .mpss-metadata 2>/dev/null), \
+	$(error .mpss-metadata file is missing or incorrect))
+MPSS_BUILDNO ?= 0
+export MPSS_COMMIT := $(MPSS_COMMIT)
+export MPSS_VERSION := $(MPSS_VERSION)
+export MPSS_BUILDNO := $(MPSS_BUILDNO)
+export MPSS_BUILTBY := $(shell echo "`whoami`@`uname -n`")
+export MPSS_BUILTON := $(shell date +'%F %T %z')
+
+KERNEL_VERSION := $(shell uname -r)
+KERNEL_SRC = /lib/modules/$(KERNEL_VERSION)/build
+
+INSTALL = install
+INSTALL_d = $(INSTALL) -d
+INSTALL_x = $(INSTALL)
+INSTALL_f = $(INSTALL) -m644
+
+prefix = /usr/local
+sysconfdir = $(prefix)/etc
+includedir = $(prefix)/include
+
+kmodinstalldir = /lib/modules/$(KERNEL_VERSION)
+kmodincludedir = $(realpath $(KERNEL_SRC))/include/modules
+
+# If building the host's driver for a MIC co-processor card, which card
+# $(ARCH) it should support
+export MIC_CARD_ARCH
+
+.PHONY: all install modules
+.PHONY: modules_install conf_install dev_install kdev_install
+
+all: modules
+
+install: modules_install conf_install kdev_install
+
+modules modules_install: %:
+	$(MAKE) -C $(KERNEL_SRC) M=$(CURDIR) $* \
+		INSTALL_MOD_PATH=$(DESTDIR)
+
+conf_install:
+ifneq ($(MIC_CARD_ARCH),)
+	$(INSTALL_d) $(DESTDIR)$(sysconfdir)/sysconfig/modules
+	$(INSTALL_x) mic.modules $(DESTDIR)$(sysconfdir)/sysconfig/modules
+	$(INSTALL_d) $(DESTDIR)$(sysconfdir)/modprobe.d
+	$(INSTALL_f) mic.conf $(DESTDIR)$(sysconfdir)/modprobe.d
+endif
+	$(INSTALL_d) $(DESTDIR)$(sysconfdir)/udev/rules.d
+	$(INSTALL_f) udev-mic.rules $(DESTDIR)$(sysconfdir)/udev/rules.d/50-udev-mic.rules
+
+dev_install:
+	$(INSTALL_d) $(DESTDIR)$(includedir)/mic
+	$(INSTALL_f) include/scif_ioctl.h $(DESTDIR)$(includedir)
+	$(INSTALL_f) include/mic/io_interface.h $(DESTDIR)$(includedir)/mic
+	$(INSTALL_f) include/mic/mic_pm.h $(DESTDIR)$(includedir)/mic
+	$(INSTALL_f) ras/micras_api.h $(DESTDIR)$(includedir)/mic
+	$(INSTALL_f) ras/micmca_api.h $(DESTDIR)$(includedir)/mic
+ifeq ($(MIC_CARD_ARCH),) # Card side
+	$(INSTALL_f) ras/micpm_api.h $(DESTDIR)$(includedir)/mic
+	$(INSTALL_f) ras/micras.h $(DESTDIR)$(includedir)/mic
+else # Host side
+	$(INSTALL_f) include/mic/micbaseaddressdefine.h $(DESTDIR)$(includedir)/mic
+	$(INSTALL_f) include/mic/micsboxdefine.h $(DESTDIR)$(includedir)/mic
+	$(INSTALL_f) include/mic/micdboxdefine.h $(DESTDIR)$(includedir)/mic
+	$(INSTALL_f) ras/micpm_api.h $(DESTDIR)$(includedir)/mic
+endif
+
+kdev_install:
+	$(INSTALL_d) $(DESTDIR)$(kmodinstalldir)
+	$(INSTALL_f) Module.symvers $(DESTDIR)$(kmodinstalldir)/scif.symvers
+	$(INSTALL_d) $(DESTDIR)$(kmodincludedir)
+	$(INSTALL_f) include/scif.h $(DESTDIR)$(kmodincludedir)
diff --git a/dma/Kbuild b/dma/Kbuild
new file mode 100644
index 0000000..4db196d
--- /dev/null
+++ b/dma/Kbuild
@@ -0,0 +1,5 @@
+ccflags-y += -DDMA_CHAN_MIC_OWNER=0
+
+obj-m := dma_module.o
+
+dma_module-objs := mic_dma_lib.o mic_dma_md.o mic_sbox_md.o
diff --git a/dma/mic_dma_lib.c b/dma/mic_dma_lib.c
new file mode 100644
index 0000000..80a4a0b
--- /dev/null
+++ b/dma/mic_dma_lib.c
@@ -0,0 +1,1792 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include<linux/module.h>
+#include<linux/init.h>
+#include<linux/slab.h>
+#include<asm/io.h>
+#include<linux/mm.h>
+#include<linux/kernel.h>
+#include<linux/interrupt.h>
+#include<linux/proc_fs.h>
+#include<linux/bitops.h>
+#include<linux/version.h>
+#ifdef _MIC_SCIF_
+#include <asm/mic/mic_common.h>
+#ifdef CONFIG_PAGE_CACHE_DMA
+#include <linux/mic_dma/mic_dma_callback.h>
+#endif
+#endif
+
+#ifndef _MIC_SCIF_
+#include <mic/micscif.h>
+#include "mic_common.h"
+#endif
+
+#include <mic/mic_dma_lib.h>
+#include <mic/micscif_smpt.h>
+#include <mic/mic_dma_md.h>
+#include <mic/mic_dma_api.h>
+#include <mic/compl_buf_ring.h>
+#include <mic/micscif_smpt.h>
+#include <mic/micsboxdefine.h>
+
+MODULE_LICENSE("GPL");
+
+#ifdef MIC_IS_EMULATION
+#define DMA_TO		(INT_MAX)
+#define DMA_FENCE_TIMEOUT_CNT (INT_MAX)
+#else
+#define DMA_TO		(5 * HZ)
+#define DMA_SLOWEST_BW  (300)  // 300Mbps
+// the maximum size for each decriptor entry is 2M
+#define DMA_FENCE_TIMEOUT_CNT (2 * MIC_MAX_NUM_DESC_PER_RING /DMA_SLOWEST_BW/ (DMA_TO/HZ))
+#endif
+
+#ifdef _MIC_SCIF_
+#define MAX_DMA_XFER_SIZE  MIC_MAX_DMA_XFER_SIZE
+#else
+/* Use 512K as the maximum descriptor transfer size for Host */
+#define MAX_DMA_XFER_SIZE  (((1U) * 1024 * 1024) >> 1)
+#endif
+#ifndef KASSERT
+#define KASSERT(x, y, ...)		\
+	do {				\
+		if(!x)			\
+			printk(y, ##__VA_ARGS__);\
+		BUG_ON(!x);		\
+	} while(0)
+#endif
+/*
+ * Arrary of per device DMA contexts. The card only uses index 0. The host uses one
+ * context per card starting from 0.
+ */
+static struct mic_dma_ctx_t *mic_dma_context[MAX_BOARD_SUPPORTED + 1];
+static struct mutex lock_dma_dev_init[MAX_BOARD_SUPPORTED + 1];
+
+enum mic_desc_format_type {
+	NOP,
+	MEMCOPY,
+	STATUS,
+	GENERAL,
+	KEYNONCECNT,
+	KEY
+};
+char proc_dma_reg[]="mic_dma_registers_";
+char proc_dma_ring[]="mic_dma_ring_";
+
+#define PR_PREFIX "DMA_LIB_MI:"
+#define DMA_DESC_RING_SIZE MIC_MAX_NUM_DESC_PER_RING
+#define MAX_POLLING_BUFFERS DMA_DESC_RING_SIZE
+
+#define DMA_PROC
+static void mic_dma_proc_init(struct mic_dma_ctx_t *dma_ctx);
+static void mic_dma_proc_uninit(struct mic_dma_ctx_t *dma_ctx);
+
+/*
+ * TODO: This is size of s/w interrupt ring.
+ * We need to figure out a value so that we don't run out of memory in
+ * interrupt ring and at the same time don't waste memory
+ */
+#define NUM_COMP_BUFS (((PAGE_SIZE/sizeof(struct dma_completion_cb*)) - 10) * 10)
+
+struct intr_compl_buf_ring {
+	struct dma_completion_cb **comp_cb_array;
+	struct compl_buf_ring ring;
+	int old_tail;
+};
+
+struct mic_dma_ctx_t;			/* Forward Declaration */
+
+struct dma_channel {
+	int ch_num;/*Duplicated in md_mic_dma_chan struct too*/
+	struct md_mic_dma_chan *chan;
+	atomic_t flags;
+	wait_queue_head_t intr_wq;
+	wait_queue_head_t access_wq;
+	union md_mic_dma_desc *desc_ring_bak;
+	union md_mic_dma_desc *desc_ring;
+	phys_addr_t desc_ring_phys;
+	uint64_t next_write_index; /* next write index into desc ring */
+	struct intr_compl_buf_ring intr_ring;
+	struct compl_buf_ring poll_ring;
+	struct mic_dma_ctx_t *dma_ctx;	  /* Pointer to parent DMA context */
+};
+
+/* Per MIC device (per MIC board) DMA context */
+struct mic_dma_ctx_t {
+	struct dma_channel	dma_channels[MAX_NUM_DMA_CHAN];
+	int			last_allocated_dma_channel_num;
+	struct mic_dma_device	dma_dev;
+	int			device_num;
+	atomic_t		ref_count;	/* Reference count */
+	atomic_t		ch_num;
+};
+
+/* DMA Library Init/Uninit Routines */
+static int mic_dma_lib_init(uint8_t *mmio_va_base, struct mic_dma_ctx_t *dma_ctx);
+static void mic_dma_lib_uninit(struct mic_dma_ctx_t *dma_ctx);
+
+int get_chan_num(struct dma_channel *chan)
+{
+	return chan->ch_num;
+}
+EXPORT_SYMBOL(get_chan_num);
+
+void initdmaglobalvar(void)
+{
+	memset(mic_dma_context, 0, sizeof(struct mic_dma_ctx_t *) * (MAX_BOARD_SUPPORTED + 1));
+}
+
+static void
+ack_dma_interrupt(struct dma_channel *ch)
+{
+	md_mic_dma_chan_mask_intr(&ch->dma_ctx->dma_dev, ch->chan);
+	md_mic_dma_chan_unmask_intr(&ch->dma_ctx->dma_dev, ch->chan);
+}
+
+/* Returns true if the next write index is "within" bounds */
+static inline bool verify_next_write_index(struct dma_channel *ch)
+{
+	bool ret = false;
+
+	if (ch->next_write_index < DMA_DESC_RING_SIZE)
+		ret = true;
+	else
+		printk(KERN_ERR "%s %d OOB ch_num 0x%x next_write_index 0x%llx\n", 
+			__func__, __LINE__, 
+			ch->ch_num, ch->next_write_index);
+	return ret;
+}
+
+/* TODO:
+ * See if we can use __get_free_pages or something similar
+ * get_free_pages expects a power of 2 number of pages
+ */
+static void
+alloc_dma_desc_ring_mem(struct dma_channel *ch, struct mic_dma_ctx_t *dma_ctx)
+{
+#ifndef _MIC_SCIF_
+	struct pci_dev *pdev;
+#endif
+	/* Is there any kernel allocator which provides the
+	 * option to give the alignment??
+	 */
+	ch->desc_ring = kzalloc(
+			(DMA_DESC_RING_SIZE * sizeof(*ch->desc_ring)) + PAGE_SIZE, GFP_KERNEL);
+	ch->desc_ring_bak = ch->desc_ring;
+	ch->desc_ring = (union md_mic_dma_desc *)ALIGN(
+			(uint64_t)ch->desc_ring, PAGE_SIZE);
+#ifdef _MIC_SCIF_
+	ch->desc_ring_phys = virt_to_phys(ch->desc_ring);
+#else
+	micscif_pci_dev(dma_ctx->device_num, &pdev);
+	ch->desc_ring_phys = mic_map_single(dma_ctx->device_num - 1, pdev, (void *)ch->desc_ring,
+			(DMA_DESC_RING_SIZE * sizeof(*ch->desc_ring)) + PAGE_SIZE);
+	BUG_ON(pci_dma_mapping_error(pdev, ch->desc_ring_phys));
+#endif
+}
+
+/*
+ * Call completion cb functions:
+ * Take care of case where we allocated temp buf
+ */
+static void
+mic_dma_lib_interrupt_handler(struct dma_channel *chan)
+{
+	int i = 0;
+	int ring_size = chan->intr_ring.ring.size;
+	struct dma_completion_cb **temp = chan->intr_ring.comp_cb_array;
+	struct dma_completion_cb *cb;
+	int new_tail, old_tail;
+
+	if (mic_hw_family(chan->dma_ctx->device_num) == FAMILY_KNC &&
+		mic_hw_stepping(chan->dma_ctx->device_num) >= KNC_B0_STEP) {
+		unsigned long error = *((uint32_t*)chan->chan->dstat_wb_loc);
+		if (unlikely(test_bit(31, &error)))
+			printk(KERN_ERR "DMA h/w error - %s %d, dstatwb=%lx\n", 
+					__func__, __LINE__, error);
+	}
+	new_tail = read_tail(&chan->intr_ring.ring);
+	old_tail = chan->intr_ring.old_tail;
+
+	for (; i < ring_size && old_tail != new_tail;
+		old_tail = incr_rb_index(old_tail, ring_size), i++) {
+		cb = (struct dma_completion_cb *)xchg(&temp[old_tail], NULL);
+		if (cb) {
+			cb->dma_completion_func(cb->cb_cookie);
+		}
+	}
+	chan->intr_ring.old_tail = new_tail;
+	update_tail(&chan->intr_ring.ring, new_tail);
+	wake_up(&chan->intr_wq);
+	if (i == ring_size && old_tail != new_tail) {
+		printk(KERN_ERR PR_PREFIX "Something went wrong, old tail = %d, new tail = %d\n", 
+							old_tail, new_tail);
+	}
+}
+
+#ifdef _MIC_SCIF_
+/*
+ * TODO;
+ * Maybe move the logic into slow interrupt handler
+ */
+static irqreturn_t
+dma_interrupt_handler(int irq, void *dev_id)
+{
+	struct dma_channel *chan = ((struct dma_channel*)dev_id);
+
+	ack_dma_interrupt(chan);
+	mic_dma_lib_interrupt_handler(chan);
+
+	return IRQ_HANDLED;
+}
+#else
+
+#define SBOX_SICR0_DMA(x)  (((x) >> 8) & 0xff)
+
+/*
+ * TODO;
+ * Maybe move the logic into slow interrupt handler
+ */
+void
+host_dma_interrupt_handler(mic_dma_handle_t dma_handle, uint32_t sboxSicr0reg)
+{
+	struct mic_dma_ctx_t *dma_ctx = (struct mic_dma_ctx_t *) dma_handle;
+	uint32_t dma_chan_id;
+	struct dma_channel *ch;
+
+	for (dma_chan_id = 0; dma_chan_id < 8; dma_chan_id++) {
+		if (SBOX_SICR0_DMA(sboxSicr0reg) & (0x1 << dma_chan_id)) {
+			ch = &dma_ctx->dma_channels[dma_chan_id];
+			if (ch->desc_ring)
+				host_dma_lib_interrupt_handler(ch);
+		}
+	}
+}
+
+void
+host_dma_lib_interrupt_handler(struct dma_channel *chan)
+{
+	ack_dma_interrupt(chan);
+	mic_dma_lib_interrupt_handler(chan);
+}
+#endif
+
+static void
+mi_mic_dma_chan_setup(struct dma_channel *ch, struct mic_dma_ctx_t *dma_ctx)
+{
+	ch->next_write_index = ch->chan->cached_tail;
+
+	init_ring(&ch->poll_ring, MAX_POLLING_BUFFERS, dma_ctx->device_num);
+
+	ch->intr_ring.comp_cb_array =
+		kzalloc(sizeof(*ch->intr_ring.comp_cb_array) * NUM_COMP_BUFS, GFP_KERNEL);
+	init_ring(&ch->intr_ring.ring, NUM_COMP_BUFS, dma_ctx->device_num);
+	ch->intr_ring.old_tail = 0;
+}
+
+static void
+mi_mic_dma_chan_destroy(struct dma_channel *ch, struct mic_dma_ctx_t *dma_ctx)
+{
+	uninit_ring(&ch->intr_ring.ring, dma_ctx->device_num);
+	kfree(ch->intr_ring.comp_cb_array);
+	uninit_ring(&ch->poll_ring, dma_ctx->device_num);
+}
+
+int
+open_dma_device(int device_num, uint8_t *mmio_va_base, mic_dma_handle_t* dma_handle)
+{
+	int result = 0;
+
+	if (device_num >= MAX_BOARD_SUPPORTED)
+		return -EINVAL;
+
+	mutex_lock(&lock_dma_dev_init[device_num]);
+	if (!mic_dma_context[device_num]) {
+		mic_dma_context[device_num] = kzalloc(sizeof(struct mic_dma_ctx_t), GFP_KERNEL);
+		BUG_ON(!mic_dma_context[device_num]);
+
+		mic_dma_context[device_num]->device_num = device_num;
+
+		result = mic_dma_lib_init(mmio_va_base, mic_dma_context[device_num]);
+		BUG_ON(result);
+	}
+
+	atomic_inc(&mic_dma_context[device_num]->ref_count);
+	*dma_handle = mic_dma_context[device_num];
+	mutex_unlock(&lock_dma_dev_init[device_num]);
+
+	return result;
+}
+EXPORT_SYMBOL(open_dma_device);
+
+void
+close_dma_device(int device_num, mic_dma_handle_t *dma_handle)
+{
+	struct mic_dma_ctx_t *dma_ctx;
+
+	if (device_num >= MAX_BOARD_SUPPORTED)
+		return;
+
+	mutex_lock(&lock_dma_dev_init[device_num]);
+	dma_ctx = (struct mic_dma_ctx_t *) *dma_handle;
+	if (dma_ctx &&
+		atomic_read(&dma_ctx->ref_count) &&
+		atomic_dec_and_test(&dma_ctx->ref_count)) {
+		mic_dma_lib_uninit(dma_ctx);
+		mic_dma_context[dma_ctx->device_num] = 0;
+		*dma_handle = NULL;
+		kfree(dma_ctx);
+	}
+	mutex_unlock(&lock_dma_dev_init[device_num]);
+}
+EXPORT_SYMBOL(close_dma_device);
+
+void mi_mic_dma_chan_set_dstat_wb(struct mic_dma_ctx_t *dma_ctx,
+				struct md_mic_dma_chan *chan)
+{
+#ifndef _MIC_SCIF_
+	struct pci_dev *pdev;
+#endif
+	if (!chan->dstat_wb_phys) {
+		chan->dstat_wb_loc = kzalloc(sizeof(uint32_t), GFP_KERNEL);
+
+#ifdef _MIC_SCIF_
+		chan->dstat_wb_phys = virt_to_phys(chan->dstat_wb_loc);
+#else
+		micscif_pci_dev(dma_ctx->device_num, &pdev);
+		chan->dstat_wb_phys = mic_map_single(dma_ctx->device_num - 1, pdev, chan->dstat_wb_loc,
+			sizeof(uint32_t));
+		BUG_ON(pci_dma_mapping_error(pdev, chan->dstat_wb_phys));
+#endif
+	}
+	md_mic_dma_chan_set_dstat_wb(&dma_ctx->dma_dev, chan);
+}
+
+void
+md_mic_dma_chan_setup(struct mic_dma_ctx_t *dma_ctx, struct dma_channel *ch)
+{
+	md_mic_dma_chan_unmask_intr(&dma_ctx->dma_dev, ch->chan);
+
+	/*
+	 * Disable the channel, update desc ring base and size, write new head
+	 * and then enable the channel.
+	 */
+	if (mic_hw_family(ch->dma_ctx->device_num) == FAMILY_KNC &&
+		mic_hw_stepping(ch->dma_ctx->device_num) >= KNC_B0_STEP) {
+		mi_mic_dma_chan_set_dstat_wb(dma_ctx, ch->chan);
+		md_mic_dma_chan_set_dcherr_msk(&dma_ctx->dma_dev, ch->chan, 0);
+	}
+	md_mic_dma_chan_set_desc_ring(&dma_ctx->dma_dev, ch->chan,
+			ch->desc_ring_phys,
+			DMA_DESC_RING_SIZE);
+
+	wmb();
+
+	md_mic_dma_chan_unmask_intr(&dma_ctx->dma_dev, ch->chan);
+}
+
+int
+mic_dma_lib_init(uint8_t *mmio_va_base, struct mic_dma_ctx_t *dma_ctx)
+{
+	int i;
+#ifdef _MIC_SCIF_
+	int ret_value;
+#endif
+	struct dma_channel *ch;
+	enum md_mic_dma_chan_owner owner, currentOwner;
+
+	//pr_debug(PR_PREFIX "Initialized the dma mmio va=%p\n", mmio_va_base);
+	// Using this to check where the DMA lib is at for now.
+	currentOwner = mmio_va_base == 0 ? MIC_DMA_CHAN_MIC_OWNED : MIC_DMA_CHAN_HOST_OWNED;
+
+	// TODO: multi-card support
+	md_mic_dma_init(&dma_ctx->dma_dev, mmio_va_base);
+
+	for (i = 0 ; i < MAX_NUM_DMA_CHAN; i++) {
+		ch = &dma_ctx->dma_channels[i];
+
+		/* Initialize pointer to parent */
+		ch->dma_ctx = dma_ctx;
+
+		owner = i > __LAST_HOST_CHAN_NUM ? MIC_DMA_CHAN_MIC_OWNED
+						: MIC_DMA_CHAN_HOST_OWNED;
+
+		// This has to be done from card side
+		ch->chan = md_mic_dma_request_chan(&dma_ctx->dma_dev, owner);
+		KASSERT((ch->chan != NULL), "dummy\n");
+		ch->ch_num = ch->chan->ch_num;
+
+#ifdef _MIC_SCIF_
+		/*
+		 * Host driver would have executed by now and thus setup the
+		 * desc. ring
+		 */
+		if (ch->chan->owner == MIC_DMA_CHAN_HOST_OWNED)
+			md_mic_dma_enable_chan(&dma_ctx->dma_dev, i, true);
+#endif
+
+		atomic_set(&(ch->flags), CHAN_INUSE); // Mark as used by default
+		if (currentOwner == owner) {
+			alloc_dma_desc_ring_mem(ch, dma_ctx);
+
+#ifdef _MIC_SCIF_ // DMA now shares the IRQ handler with other system interrupts
+			ret_value = request_irq(i, dma_interrupt_handler, IRQF_DISABLED,
+						"dma channel", ch);
+			ret_value = ret_value;
+			//pr_debug(PR_PREFIX "Interrupt handler ret value for  chan %d = %d\n", i, ret_value);
+#endif
+			md_mic_dma_chan_setup(dma_ctx, ch);
+
+			mi_mic_dma_chan_setup(ch, dma_ctx);
+
+			init_waitqueue_head(&ch->intr_wq);
+			init_waitqueue_head(&ch->access_wq);
+			// Only mark owned channel to be available
+			atomic_set(&(ch->flags), CHAN_AVAILABLE);
+			md_mic_dma_print_debug(&dma_ctx->dma_dev, ch->chan);
+		} else {
+			ch->desc_ring = NULL;
+		}
+	}
+
+	/* Initialize last_allocated_dma_channel */
+	dma_ctx->last_allocated_dma_channel_num = -1;
+	//pr_debug(PR_PREFIX "Initialized the dma channels\n");
+	mic_dma_proc_init(dma_ctx);
+	return 0;
+}
+
+void
+mic_dma_lib_uninit(struct mic_dma_ctx_t *dma_ctx)
+{
+	int i;
+	struct dma_channel *ch;
+#ifndef _MIC_SCIF_
+	struct pci_dev *pdev;
+#endif
+
+	mic_dma_proc_uninit(dma_ctx);
+	for (i = 0 ; i < MAX_NUM_DMA_CHAN; i++) {
+		ch = &dma_ctx->dma_channels[i];
+		if (!ch->desc_ring)
+			continue;
+		drain_dma_intr(ch);
+		/* Request the channel but don't free it. Errors are okay */
+		request_dma_channel(ch);
+#ifdef _MIC_SCIF_ // DMA now shares the IRQ handler with other system interrupts
+		free_irq(i, ch);
+#endif
+		mi_mic_dma_chan_destroy(ch, dma_ctx);
+#ifndef _MIC_SCIF_
+		micscif_pci_dev(dma_ctx->device_num, &pdev);
+		mic_unmap_single(dma_ctx->device_num - 1, pdev, ch->desc_ring_phys,
+			(DMA_DESC_RING_SIZE * sizeof(*ch->desc_ring)) + PAGE_SIZE);
+#endif
+
+        kfree(ch->desc_ring_bak);
+		ch->desc_ring_bak = NULL; 
+		ch->desc_ring = NULL;
+		if (mic_hw_family(ch->dma_ctx->device_num) == FAMILY_KNC &&
+			mic_hw_stepping(dma_ctx->device_num) >= KNC_B0_STEP) {
+#ifndef _MIC_SCIF_
+			mic_unmap_single(dma_ctx->device_num - 1, pdev, ch->chan->dstat_wb_phys,
+				sizeof(uint32_t));
+#endif
+			kfree(ch->chan->dstat_wb_loc);
+			ch->chan->dstat_wb_loc = NULL;
+			ch->chan->dstat_wb_phys = 0;
+		}
+		md_mic_dma_free_chan(&dma_ctx->dma_dev, ch->chan);
+	}
+#ifndef MIC_IS_EMULATION
+	/* Ensure that all waiters for DMA channels time out */
+	msleep(DMA_TO/HZ * 1000);
+#endif
+	md_mic_dma_uninit(&dma_ctx->dma_dev);
+	//pr_debug(PR_PREFIX "Uninitialized the dma channels\n");
+}
+
+/*
+ * reserve_dma_channel - reserve a given dma channel for exclusive use
+ *
+ * @dma_handle - handle to DMA device returned by open_dma_device
+ * @chan_num - Channel number to be reserved
+ * @chan     - set to point to the dma channel reserved by the call
+ *
+ * Returns < 1 on error (errorno)
+ * Returns 0 on success
+ *
+ * NOTES: Should this function sleep waiting for the lock?
+ * TODO:
+ * Maybe there should be a blocking and non-blocking versions of this function
+ */
+int
+reserve_dma_channel(mic_dma_handle_t dma_handle, int chan_num, struct dma_channel **chan)
+{
+	struct mic_dma_ctx_t *dma_ctx = (struct mic_dma_ctx_t *) dma_handle;
+
+	/*
+	 * Do we need to do acquire the lock for statically allocated channels?
+	 * I am assuming we dont have to lock
+	 */
+	if (CHAN_AVAILABLE == atomic_cmpxchg(&(dma_ctx->dma_channels[chan_num].flags), 
+						   CHAN_AVAILABLE, CHAN_INUSE)) {
+		*chan = &dma_ctx->dma_channels[chan_num];
+		return 0;
+	}
+	return -1;
+}
+EXPORT_SYMBOL(reserve_dma_channel);
+
+/*
+ * allocate_dma_channel - dynamically allocate a dma channel (for a short while). Will
+ *    search for, choose, and lock down one channel for use by the calling thread.
+ *
+ * @dma_handle - handle to DMA device returned by open_dma_device
+ * @chan - Returns the dma_channel pointer that was allocated by the call
+ *
+ * Returns < 1 on error
+ * Returns 0 on success
+ *
+ * NOTE:  This function grabs a lock before exiting -- the calling thread MUST NOT
+ *  sleep, and must call free_dma_channel before returning to user-space or switching
+ *  volantarily to another thread.  Similarly, this function cannot be called from
+ *  an interrupt context at this time.
+ *
+ *  TODO: How do we pick a dma channel?
+ *  For now I am doing it in round robin fashion.
+ */
+int
+allocate_dma_channel(mic_dma_handle_t dma_handle, struct dma_channel **chan)
+{
+	struct mic_dma_ctx_t *dma_ctx = (struct mic_dma_ctx_t *) dma_handle;
+	int i, j;
+
+	if (!dma_ctx)
+		return -ENODEV;
+
+	j = dma_ctx->last_allocated_dma_channel_num + 1;
+
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++, j++) {
+		if (CHAN_AVAILABLE == atomic_cmpxchg(&(dma_ctx->dma_channels[j %
+							     MAX_NUM_DMA_CHAN].flags), 
+							   CHAN_AVAILABLE, CHAN_INUSE)) {
+			*chan = &(dma_ctx->dma_channels[j % MAX_NUM_DMA_CHAN]);
+			dma_ctx->last_allocated_dma_channel_num = j % MAX_NUM_DMA_CHAN;
+			return 0;
+		}
+	}
+	return -1;
+}
+EXPORT_SYMBOL(allocate_dma_channel);
+
+/*
+ * request_dma_channel - Request a specific DMA channel.
+ *
+ * @chan - Returns the dma_channel pointer that was requested
+ *
+ * Returns: 0 on success and -ERESTARTSYS if the wait was interrupted
+ * or -EBUSY if the channel was not available.
+ *
+ * NOTE:  This function must call free_dma_channel before returning to
+ * user-space.
+ */
+int request_dma_channel(struct dma_channel *chan)
+{
+	int ret;
+
+	ret = wait_event_interruptible_timeout(chan->access_wq, 
+		CHAN_AVAILABLE == atomic_cmpxchg(&chan->flags, 
+		CHAN_AVAILABLE, CHAN_INUSE), DMA_TO);
+	if (!ret) {
+		printk(KERN_ERR "%s %d TO chan 0x%x\n", __func__, __LINE__, chan->ch_num);
+		ret = -EBUSY;
+	}
+	if (ret > 0)
+		ret = 0;
+	return ret;
+}
+EXPORT_SYMBOL(request_dma_channel);
+
+/*
+ * free_dma_channel - after allocating a channel, used to
+ *                 free the channel after DMAs are submitted
+ *
+ * @chan - pointer to the dma_channel struct that was allocated
+ *
+ * Returns 0 on success, < 1 on error (errorno)
+ *
+ * NOTE: This function must be called after all do_dma calls are finished,
+ *  but can be called before the DMAs actually complete (as long as the comp_cb()
+ *  handler in do_dma don't refer to the dma_channel struct).  If called with a
+ *  dynamically allocated dma_chan, the caller must be the thread that called
+ *  allocate_dma_chan.  When operating on a dynamic channel, free unlocks the
+ *  mutex locked in allocate.  Statically allocated channels cannot be freed,
+ *  and calling this function with that type of channel will return an error.
+ */
+int
+free_dma_channel(struct dma_channel *chan)
+{
+	/*
+	 * Why can't we use this function with channels that were statically allocated??
+	 */
+	BUG_ON(CHAN_INUSE !=
+		atomic_cmpxchg(&chan->flags, CHAN_INUSE, CHAN_AVAILABLE));
+	wake_up(&chan->access_wq);
+	return 0;
+}
+EXPORT_SYMBOL(free_dma_channel);
+
+static __always_inline uint32_t
+get_dma_tail_pointer(struct dma_channel *chan)
+{
+	struct mic_dma_device *dma_dev;
+	dma_dev = &chan->dma_ctx->dma_dev;
+	return md_mic_dma_chan_read_tail(dma_dev, chan->chan);
+}
+/*
+ * Return -1 in case of error
+ */
+static int
+program_memcpy_descriptors(struct dma_channel *chan, uint64_t src, uint64_t dst, size_t len)
+{
+	size_t current_transfer_len;
+	bool is_astep = false;
+	unsigned long ts = jiffies;
+
+	if (mic_hw_family(chan->dma_ctx->device_num) == FAMILY_KNC) {
+		if (mic_hw_stepping(chan->dma_ctx->device_num) == KNC_A_STEP)
+			is_astep = true;
+	} else {
+		is_astep = true;
+	}
+	do {
+		current_transfer_len = (len > MAX_DMA_XFER_SIZE) ?
+					MAX_DMA_XFER_SIZE : len;
+
+		ts = jiffies;
+		while (!md_avail_desc_ring_space(&chan->dma_ctx->dma_dev, is_astep, chan->chan,
+						  (uint32_t)chan->next_write_index, 1)) {
+				if (time_after(jiffies,ts + DMA_TO)) {
+					printk(KERN_ERR "%s %d TO chan 0x%x\n", __func__, __LINE__, chan->ch_num);
+					return -ENOMEM;
+				}
+		}
+
+		//pr_debug("src_phys=0x%llx, dst_phys=0x%llx, size=0x%zx\n", src_phys_addr, dst_phys_addr, current_transfer_len);
+		md_mic_dma_memcpy_desc(&chan->desc_ring[chan->next_write_index],
+					    src, dst, current_transfer_len);
+		chan->next_write_index = incr_rb_index((int)chan->next_write_index,
+						       chan->chan->num_desc_in_ring);
+		len -= current_transfer_len;
+		dst = dst + current_transfer_len;
+		src = src + current_transfer_len;
+	} while(len > 0);
+
+	return 0;
+}
+
+/*
+ * do_dma - main dma function:  perform a dma memcpy, len bytes from src to dst
+ *
+ * @chan    - DMA channel to use for the transfer.  The channel can be allocated
+ *            dynamically by calling allocate_dma_chan, or statically by
+ *            reserve_dma_chan.  Using a channel not allocated in this way will
+ *            result in undefined behavior.
+ * @flags   - ATOMIC, called from an interrupt context (no blocking)
+ * @src     - src physical address
+ * @dst     - dst physical address
+ * @len     - Length of the dma
+ * @comp_cb - When the DMA is complete, the struct's function will be called.  NOTE!
+ *            comp_cb(cb_cookie) is called from an interrupt context, so the
+ *            function must not sleep or block.
+ *
+ * TODO: Figure out proper value instead of -2
+ * Return < 0 on error
+ * Return = -2 copy was done successfully, no need to wait
+ * Return >= 0: DMA has been queued.  Return value can be polled on for completion
+ *   if DO_DMA_POLLING was sent in flags
+ *   (poll cookie).  An example (simplified w/ no error handling).
+ *              int cookie = do_dma(...);
+ *              while (poll_dma_completion(cookie) == 0);
+ *              printf("DMA now complete\n");
+ */
+int
+do_dma(struct dma_channel *chan, int flags, uint64_t src,
+       uint64_t dst, size_t len, struct dma_completion_cb *comp_cb)
+{
+	/*
+	 * TODO:
+	 * Do we need to assert the ownership of channel??
+	 */
+	int poll_ring_index = -1;
+	int intr_ring_index = -1;
+	uint32_t num_status_desc = 0;
+	bool is_astep = false;
+	unsigned long ts = jiffies;
+
+	might_sleep();
+	if (flags & DO_DMA_INTR && !comp_cb)
+		return -EINVAL;
+
+	if (!verify_next_write_index(chan))
+		return -ENODEV;
+
+	//pr_debug(PR_PREFIX "Current transfer src = 0x%llx,dst = 0x%llx, len = 0x%zx\n", src, dst, len);
+	if (flags & DO_DMA_INTR) {
+		int err;
+		err = wait_event_interruptible_timeout(chan->intr_wq, 
+		(-1 != (intr_ring_index = allocate_buffer(&chan->intr_ring.ring))), 
+		DMA_TO);
+		if (!err) {
+			printk(KERN_ERR "%s %d TO chan 0x%x\n", __func__, __LINE__, chan->ch_num);
+			err = -ENOMEM;
+		}
+		if (err > 0)
+			err = 0;
+		if (!err) {
+			chan->intr_ring.comp_cb_array[intr_ring_index] = comp_cb;
+			num_status_desc++;
+#ifdef CONFIG_MK1OM
+			num_status_desc++;
+#endif
+		} else {
+			return err;
+		}
+		//pr_debug(PR_PREFIX "INTR intr_ring_index=%d, chan_num=%lx\n", intr_ring_index, (chan - dma_channels));
+	}
+
+	if (flags & DO_DMA_POLLING) {
+		poll_ring_index = allocate_buffer(&chan->poll_ring);
+		if (-1 == poll_ring_index)
+			return -ENOMEM;
+		num_status_desc++;
+		//pr_debug(PR_PREFIX "polling poll_ring_index=%d\n", poll_ring_index);
+	}
+	if (len && -ENOMEM == program_memcpy_descriptors(chan, src, dst, len)) {
+		//pr_debug(PR_PREFIX "ERROR: do_dma: No available space from program_memcpy_descriptors\n");
+		return -ENOMEM;
+	}
+
+	if (mic_hw_family(chan->dma_ctx->device_num) == FAMILY_KNC) {
+		if (mic_hw_stepping(chan->dma_ctx->device_num) == KNC_A_STEP)
+			is_astep = true;
+	} else {
+		is_astep = true;
+	}
+
+	ts = jiffies;
+
+	while (num_status_desc && num_status_desc > md_avail_desc_ring_space(&chan->dma_ctx->dma_dev,
+						is_astep, chan->chan, (uint32_t)chan->next_write_index, num_status_desc)) {
+		if (time_after(jiffies,ts + DMA_TO)) {
+			printk(KERN_ERR "%s %d TO chan 0x%x\n", __func__, __LINE__, chan->ch_num);
+			return -ENOMEM;
+		}
+		//pr_debug(PR_PREFIX "ERROR: do_dma: No available space from md_avail_desc_ring_space\n");
+	}
+
+	if (flags & DO_DMA_POLLING) {
+		incr_head(&chan->poll_ring);
+		md_mic_dma_prep_status_desc(&chan->desc_ring[chan->next_write_index],
+				poll_ring_index,
+				chan->poll_ring.tail_phys,
+				false);
+		chan->next_write_index = incr_rb_index((int)chan->next_write_index,
+						       chan->chan->num_desc_in_ring);
+	}
+
+	if (flags & DO_DMA_INTR) {
+		incr_head(&chan->intr_ring.ring);
+#ifdef CONFIG_MK1OM
+		md_mic_dma_prep_status_desc(&chan->desc_ring[chan->next_write_index],
+				intr_ring_index,
+				chan->intr_ring.ring.tail_phys,
+				false);
+		chan->next_write_index = incr_rb_index((int)chan->next_write_index,
+						       chan->chan->num_desc_in_ring);
+#endif
+		md_mic_dma_prep_status_desc(&chan->desc_ring[chan->next_write_index],
+				intr_ring_index,
+				chan->intr_ring.ring.tail_phys,
+				true);
+		chan->next_write_index = incr_rb_index((int)chan->next_write_index,
+						       chan->chan->num_desc_in_ring);
+	}
+
+	/*
+	 * TODO:
+	 * Maybe it is better if we update the head pointer for every descriptor??
+	 */
+	md_mic_dma_chan_write_head(&chan->dma_ctx->dma_dev, chan->chan, (uint32_t)chan->next_write_index);
+	//pr_debug(PR_PREFIX "in HW chan->next_write_index=%lld\n", chan->next_write_index);
+
+	if (DO_DMA_POLLING & flags)
+		return poll_ring_index;
+	return 0;
+}
+EXPORT_SYMBOL(do_dma);
+
+/*
+ * poll_dma_completion - check if a DMA is complete
+ *
+ * @poll_cookie - value returned from do_dma
+ *
+ * Returns
+ * 0 -> DMA pending
+ * 1 -> DMA completed
+ *
+ * Note: This is mostly useful after calling do_dma with a NULL comp_cb parameter, as
+ *  it will allow the caller to wait for DMA completion.
+ */
+int
+poll_dma_completion(int poll_cookie, struct dma_channel *chan)
+{
+	if (!chan)
+		return -EINVAL;
+	/*
+	 * In case of interrupts the ISR runs and reads the value
+	 * of the tail location. If we are polling then we need
+	 * to read the value of the tail location before checking
+	 * if the entry is processed.
+	 */
+	chan->poll_ring.tail = read_tail(&chan->poll_ring);
+	return is_entry_processed(&chan->poll_ring, poll_cookie);
+}
+EXPORT_SYMBOL(poll_dma_completion);
+
+/*
+ * do_status_update: Update physical address location with the value provided.
+ *		Ensures all previous DMA descriptors submitted on this DMA
+ *		channel are executed.
+ * @chan    - DMA channel to use for the transfer. The channel can be allocated
+ *            dynamically by calling allocate_dma_channel, or statically by
+ *            reserve_dma_channel. Using a channel not allocated in this way will
+ *            result in undefined behavior.
+ * @phys    - physical address
+ * @value   - Value to be programmed
+ */
+int do_status_update(struct dma_channel *chan, uint64_t phys, uint64_t value)
+{
+	unsigned long ts = jiffies;
+	bool is_astep = false;
+
+	if (!verify_next_write_index(chan))
+		return -ENODEV;
+
+	if (mic_hw_family(chan->dma_ctx->device_num) == FAMILY_KNC) {
+		if (mic_hw_stepping(chan->dma_ctx->device_num) == KNC_A_STEP)
+			is_astep = true;
+	} else {
+		is_astep = true;
+	}
+	/*
+	 * TODO:
+	 * Do we need to assert the ownership of channel??
+	 */
+	ts = jiffies;
+	while (!md_avail_desc_ring_space(&chan->dma_ctx->dma_dev,
+		is_astep, chan->chan, (uint32_t) chan->next_write_index, 1)) {
+		cpu_relax();
+		if (time_after(jiffies,ts + DMA_TO)) {
+			printk(KERN_ERR "%s %d TO chan 0x%x\n", __func__, __LINE__, chan->ch_num);
+			return -EBUSY;
+		}
+	}
+
+	md_mic_dma_prep_status_desc(&chan->desc_ring[chan->next_write_index],
+			value,
+			phys,
+			false);
+
+	chan->next_write_index = incr_rb_index((int)chan->next_write_index,
+			chan->chan->num_desc_in_ring);
+
+	md_mic_dma_chan_write_head(&chan->dma_ctx->dma_dev,
+			chan->chan, (uint32_t)chan->next_write_index);
+	return 0;
+}
+EXPORT_SYMBOL(do_status_update);
+
+/*
+ * get_dma_mark: Obtain current value of DMA mark
+ * @chan - DMA channel to use for the transfer. The channel can be allocated
+ *            dynamically by calling allocate_dma_channel, or statically by
+ *            reserve_dma_channel. Using a channel not allocated in this way will
+ *            result in undefined behavior.
+ */
+int get_dma_mark(struct dma_channel *chan)
+{
+	if (chan)
+		return chan->intr_ring.ring.head;
+	else
+		return -1;
+}
+EXPORT_SYMBOL(get_dma_mark);
+
+/*
+ * program_dma_mark: Increment the current value of the DMA mark for a DMA channel
+ * and program an interrupt status update descriptor which ensures that all DMA
+ * descriptors programmed uptil this point in time are completed.
+ * @chan - DMA channel to use for the transfer. The channel can be allocated
+ *            dynamically by calling allocate_dma_channel, or statically by
+ *            reserve_dma_channel. Using a channel not allocated in this way will
+ *            result in undefined behavior.
+ */
+int program_dma_mark(struct dma_channel *chan)
+{
+	/*
+	 * TODO:
+	 * Do we need to assert the ownership of channel??
+	 */
+	int intr_ring_index;
+	int err;
+	unsigned long ts = jiffies;
+	uint32_t num_status_desc = 1;
+	bool is_astep = false;
+
+	if (!verify_next_write_index(chan))
+		return -ENODEV;
+
+	if (mic_hw_family(chan->dma_ctx->device_num) == FAMILY_KNC) {
+		if (mic_hw_stepping(chan->dma_ctx->device_num) == KNC_A_STEP)
+			is_astep = true;
+	} else {
+		is_astep = true;
+	}
+	might_sleep();
+	err = wait_event_interruptible_timeout(chan->intr_wq, 
+	(-1 != (intr_ring_index = allocate_buffer(&chan->intr_ring.ring))), 
+	DMA_TO);
+	if (!err)
+		err = -EBUSY;
+	if (err > 0)
+		err = 0;
+	if (err)
+		return err;
+
+#ifdef CONFIG_MK1OM
+	num_status_desc++;
+#endif
+	ts = jiffies;
+	while (num_status_desc > md_avail_desc_ring_space(&chan->dma_ctx->dma_dev,
+			is_astep, chan->chan, (uint32_t)chan->next_write_index, num_status_desc)) {
+		cpu_relax();
+		if (time_after(jiffies,ts + DMA_TO)) {
+			printk(KERN_ERR "%s %d TO chan 0x%x\n", __func__, __LINE__, chan->ch_num);
+			return -EBUSY;
+		}
+	}
+
+	chan->intr_ring.comp_cb_array[intr_ring_index] = NULL;
+
+	incr_head(&chan->intr_ring.ring);
+#ifdef CONFIG_MK1OM
+	md_mic_dma_prep_status_desc(&chan->desc_ring[chan->next_write_index],
+			intr_ring_index,
+			chan->intr_ring.ring.tail_phys,
+			false);
+	chan->next_write_index = incr_rb_index((int)chan->next_write_index,
+			chan->chan->num_desc_in_ring);
+#endif
+	md_mic_dma_prep_status_desc(&chan->desc_ring[chan->next_write_index],
+			intr_ring_index,
+			chan->intr_ring.ring.tail_phys,
+			true);
+	chan->next_write_index = incr_rb_index((int)chan->next_write_index,
+			chan->chan->num_desc_in_ring);
+
+	md_mic_dma_chan_write_head(&chan->dma_ctx->dma_dev, chan->chan, (uint32_t)chan->next_write_index);
+	return intr_ring_index;
+}
+EXPORT_SYMBOL(program_dma_mark);
+
+/*
+ * is_current_dma_mark: Check if the dma mark provided is the current DMA mark.
+ * @chan - DMA channel
+ * @mark - DMA mark
+ *
+ * Return true on success and false on failure.
+ */
+bool is_current_dma_mark(struct dma_channel *chan, int mark)
+{
+	return (get_dma_mark(chan) == mark);
+}
+EXPORT_SYMBOL(is_current_dma_mark);
+
+/*
+ * is_dma_mark_processed: Check if the dma mark provided has been processed.
+ * @chan - DMA channel
+ * @mark - DMA mark
+ *
+ * Return true on success and false on failure.
+ */
+bool is_dma_mark_processed(struct dma_channel *chan, int mark)
+{
+	return is_entry_processed(&chan->intr_ring.ring, mark);
+}
+EXPORT_SYMBOL(is_dma_mark_processed);
+
+/*
+ * dma_mark_wait:
+ * @chan - DMA channel
+ * @mark - DMA mark
+ * @is_interruptible - Use wait_event_interruptible() or not.
+ *
+ * Wait for the dma mark to complete.
+ * Return 0 on success and appropriate error value on error.
+ */
+int dma_mark_wait(struct dma_channel *chan, int mark, bool is_interruptible)
+{
+	int err = 0;
+	uint32_t prev_tail = 0, new_tail;
+	uint32_t count = 0;
+
+	if (chan) {
+		might_sleep();
+__retry:
+		if (is_interruptible)
+			err = wait_event_interruptible_timeout(
+				chan->intr_wq, 
+				is_dma_mark_processed(chan, mark), 
+				DMA_TO);
+		else
+			err = wait_event_timeout(chan->intr_wq, 
+				is_dma_mark_processed(chan, mark), DMA_TO);
+
+		if (!err) { // 0 is timeout
+			new_tail = get_dma_tail_pointer(chan);
+			if ((count <= DMA_FENCE_TIMEOUT_CNT) &&
+				(!count || new_tail != prev_tail)) {  // For performance, prev_tail is not read at the begining
+					prev_tail = new_tail;
+					count++;
+					pr_debug("DMA fence wating is still ongoing, waiting for %d seconds\n", DMA_TO/HZ *count);
+					goto __retry;
+			} else {
+				printk(KERN_ERR "%s %d TO chan 0x%x\n", __func__, __LINE__, chan->ch_num);
+				err = -EBUSY;
+			}
+		}
+		if (err > 0)
+			err = 0;
+	}
+	return err;
+}
+EXPORT_SYMBOL(dma_mark_wait);
+
+/*
+ * drain_dma_poll - Drain all outstanding DMA operations for a particular
+ * DMA channel via polling.
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int drain_dma_poll(struct dma_channel *chan)
+{
+	int cookie, err;
+	unsigned long ts;
+	uint32_t prev_tail = 0, new_tail, count = 0;
+	if (chan) {
+		if ((err = request_dma_channel(chan)))
+			goto error;
+		if ((cookie = do_dma(chan,
+			DO_DMA_POLLING, 0, 0, 0, NULL)) < 0) {
+			err = cookie;
+			free_dma_channel(chan);
+			goto error;
+		}
+		free_dma_channel(chan);
+		ts = jiffies;
+		while (1 != poll_dma_completion(cookie, chan)) {
+			cpu_relax();
+			if (time_after(jiffies,ts + DMA_TO)) {
+				new_tail = get_dma_tail_pointer(chan);
+				if ((!count || new_tail != prev_tail) && (count <= DMA_FENCE_TIMEOUT_CNT)) {
+					prev_tail = new_tail;
+					ts = jiffies;
+					count++;
+					pr_debug("polling DMA is still ongoing,  wating for %d seconds\n", DMA_TO/HZ * count);
+				} else {
+					err = -EBUSY;
+					break;
+				}
+			}
+		}
+error:
+		if (err)
+			printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+	} else {
+		err = -EINVAL;
+	}
+	return err;
+}
+EXPORT_SYMBOL(drain_dma_poll);
+
+/*
+ * drain_dma_intr - Drain all outstanding DMA operations for a particular
+ * DMA channel via interrupt based blocking wait.
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int drain_dma_intr(struct dma_channel *chan)
+{
+	int cookie, err;
+
+	if (chan) {
+		if ((err = request_dma_channel(chan)))
+			goto error;
+		if ((cookie = program_dma_mark(chan)) < 0) {
+			err = cookie;
+			free_dma_channel(chan);
+			goto error;
+		}
+		free_dma_channel(chan);
+		err = dma_mark_wait(chan, cookie, false);
+error:
+		if (err)
+			printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+	} else {
+		err = -EINVAL;
+	}
+	return err;
+}
+EXPORT_SYMBOL(drain_dma_intr);
+
+/*
+ * drain_dma_global - Drain all outstanding DMA operations for
+ * all online DMA channel.
+ * Return none
+ */
+int drain_dma_global(mic_dma_handle_t dma_handle)
+{
+	int i, err = -EINVAL;
+	struct dma_channel *chan;
+	struct mic_dma_ctx_t *dma_ctx = (struct mic_dma_ctx_t *)dma_handle;
+
+	if (!dma_ctx)
+		return err;
+
+	might_sleep();
+	for (i = 0 ; i < MAX_NUM_DMA_CHAN; i++) {
+		chan = &dma_ctx->dma_channels[i];
+		if (chan->desc_ring == NULL)
+			continue;
+		if ((err = drain_dma_intr(chan)))
+			break;
+	}
+	return err;
+}
+EXPORT_SYMBOL(drain_dma_global);
+
+#ifdef _MIC_SCIF_
+/*
+ * dma_suspend: DMA tasks before transition to low power state.
+ * @dma_handle: Handle for a DMA driver context.
+ *
+ * Perform the following tasks before the device transitions
+ * to a low power state:
+ * 1) Store away the DMA descriptor ring physical address base for
+ * all DMA channels (both host/uOS owned) since the value would be
+ * required to reinitialize the DMA channels upon transition from
+ * low power to active state.
+ *
+ * Return: none
+ * Notes: Invoked only on MIC.
+ */
+void dma_suspend(mic_dma_handle_t dma_handle)
+{
+	int i;
+	struct dma_channel *ch;
+	struct mic_dma_ctx_t *dma_ctx = (struct mic_dma_ctx_t *)dma_handle;
+	struct mic_dma_device *dma_dev = &dma_ctx->dma_dev;
+
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++) {
+		ch = &dma_ctx->dma_channels[i];
+		ch->desc_ring_phys =
+			md_mic_dma_chan_get_desc_ring_phys(dma_dev, ch->chan);
+		ch->chan->dstat_wb_phys =
+			md_mic_dma_chan_get_dstatwb_phys(dma_dev, ch->chan);
+	}
+}
+EXPORT_SYMBOL(dma_suspend);
+
+/*
+ * dma_resume: DMA tasks after wake up from low power state.
+ * @dma_handle: Handle for a DMA driver context.
+ *
+ * Performs the following tasks before the device transitions
+ * from a low power state to active state:
+ * 1) As a test, reset the value in DMA configuration register.
+ * 2) Reset the next_write_index for the DMA descriptor ring to 0
+ * since the DMA channel will be reset shortly.
+ * 3) Reinitialize the DMA MD layer for the channel.
+ *
+ * Return: none
+ * Notes:
+ * Notes: Invoked only on MIC.
+ */
+void dma_resume(mic_dma_handle_t dma_handle)
+{
+	int i;
+	struct dma_channel *ch;
+	struct mic_dma_ctx_t *dma_ctx = (struct mic_dma_ctx_t *)dma_handle;
+	struct mic_dma_device *dma_dev = &dma_ctx->dma_dev;
+
+	/* TODO: Remove test write to SBOX_DCR */
+	mic_sbox_write_mmio(dma_dev->mm_sbox, SBOX_DCR, 0);
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++) {
+		ch = &dma_ctx->dma_channels[i];
+		ch->next_write_index = 0;
+		md_mic_dma_chan_init_attr(dma_dev, ch->chan);
+		md_mic_dma_chan_setup(dma_ctx, ch);
+	}
+}
+EXPORT_SYMBOL(dma_resume);
+
+#else
+
+/*
+ * dma_prep_suspend: DMA tasks required on host before a device can transition
+ * to a low power state.
+ * @dma_handle: Handle for a DMA driver context.
+ *
+ * Performs the following tasks on the host before the device can be allowed
+ * to transiti to a low power state.
+ * 1) Reset the next_Write_index for the DMA descriptor ring to 0
+ * since the DMA channel will be reset shortly. This is required primarily
+ * for Host owned DMA channels since MIC does not have access to this
+ * information.
+ * Return: none
+ * Invoked only on Host.
+ */
+void dma_prep_suspend(mic_dma_handle_t dma_handle)
+{
+	int i;
+	struct dma_channel *ch;
+	struct mic_dma_ctx_t *dma_ctx = (struct mic_dma_ctx_t *)dma_handle;
+
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++) {
+		ch = &dma_ctx->dma_channels[i];
+		ch->next_write_index = 0;
+	}
+}
+EXPORT_SYMBOL(dma_prep_suspend);
+#endif
+
+#ifdef CONFIG_PAGE_CACHE_DMA
+#ifdef _MIC_SCIF_
+static const struct dma_operations dma_operations_fast_copy = {
+	.do_dma               = do_dma,
+	.poll_dma_completion  = poll_dma_completion,
+	.free_dma_channel     = free_dma_channel,
+	.open_dma_device      = open_dma_device,
+	.close_dma_device     = close_dma_device,
+	.allocate_dma_channel = allocate_dma_channel,
+	.program_descriptors  = program_memcpy_descriptors,
+	.do_dma_polling				= DO_DMA_POLLING,
+};
+
+static const struct file_dma fdma_callback = {
+	.dmaops = &dma_operations_fast_copy,
+};
+#endif
+#endif
+
+#ifdef _MIC_SCIF_
+static int
+#else
+int
+#endif
+mic_dma_init(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_BOARD_SUPPORTED; i++)
+		mutex_init (&lock_dma_dev_init[i]);
+#ifdef CONFIG_PAGE_CACHE_DMA
+#ifdef _MIC_SCIF_
+	register_dma_for_fast_copy(&fdma_callback);
+#endif
+#endif
+	return 0;
+}
+
+#ifdef _MIC_SCIF_
+static void mic_dma_uninit(void)
+{
+#ifdef CONFIG_PAGE_CACHE_DMA
+	unregister_dma_for_fast_copy();
+#endif
+}
+
+module_init(mic_dma_init);
+module_exit(mic_dma_uninit);
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+static int
+mic_dma_proc_ring_show(struct seq_file *m, void *data)
+{
+	struct mic_dma_ctx_t *dma_ctx = m->private;
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(dma_ctx->device_num - 1);
+	int i, err;
+	struct compl_buf_ring *ring;
+
+	if ((err = micpm_get_reference(mic_ctx, true))) {
+		printk(KERN_ERR "%s %d: unable to get micpm reference: %d\n", 
+				 __func__, __LINE__, err);
+		return err;
+	}
+
+	seq_printf(m, "Intr rings\n");
+	seq_printf(m, "%-10s%-12s%-12s%-12s%-25s%-18s%-25s\n",
+		       "Chan", "Head", "Tail", "Size", "Tail loc", "Actual tail", "In Use");
+	for (i = first_dma_chan(); i <= last_dma_chan(); i++) {
+		ring = &dma_ctx->dma_channels[i].intr_ring.ring;
+		seq_printf(m, "%-#10x%-#12x%-#12x%-#12x%-#25llx%-#18x%-#18x\n",
+			i, ring->head, ring->tail, ring->size,
+			ring->tail_location, *(int*)ring->tail_location,
+			atomic_read(&dma_ctx->dma_channels[i].flags));
+	}
+	seq_printf(m, "Poll rings\n");
+	seq_printf(m, "%-10s%-12s%-12s%-12s%-25s%-18s\n",
+		       "Chan", "Head", "Tail", "Size", "Tail loc", "Actual tail");
+	for (i = first_dma_chan(); i <= last_dma_chan(); i++) {
+		ring = &dma_ctx->dma_channels[i].poll_ring;
+		seq_printf(m, "%-#10x%-#12x%-#12x%-#12x%-#25llx%-#18x\n",
+			       i, ring->head, ring->tail, ring->size,
+			       ring->tail_location, *(int*)ring->tail_location);
+	}
+	seq_printf(m, "Next_Write_Index\n");
+	seq_printf(m, "%-10s%-12s\n", "Chan", "Next_Write_Index");
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++) {
+		seq_printf(m, "%-#10x%-#12llx\n",
+			       i, dma_ctx->dma_channels[i].next_write_index);
+	}
+	micpm_put_reference(mic_ctx);
+	return 0;
+}
+
+static int
+mic_dma_proc_ring_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_dma_proc_ring_show, PDE_DATA(inode));
+}
+
+static int
+mic_dma_proc_reg_show(struct seq_file *m, void *data)
+{
+	int i, j, chan_num, size, dtpr, err;
+	struct mic_dma_ctx_t *dma_ctx = m->private;
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(dma_ctx->device_num - 1);
+	struct mic_dma_device *dma_dev = &dma_ctx->dma_dev;
+	struct dma_channel *curr_chan;
+	union md_mic_dma_desc desc;
+
+	if ((err = micpm_get_reference(mic_ctx, true))) {
+		printk(KERN_ERR "%s %d: unable to get micpm reference: %d\n", 
+				 __func__, __LINE__, err);
+		return err;
+	}
+
+	seq_printf(m, "========================================"
+				"=======================================\n");
+	seq_printf(m, "SBOX_DCR: %#x\n",
+				mic_sbox_read_mmio(dma_dev->mm_sbox, SBOX_DCR));
+	seq_printf(m, "DMA Channel Registers\n");
+	seq_printf(m, "========================================"
+				"=======================================\n");
+	seq_printf(m, "%-10s| %-10s %-10s %-10s %-10s %-10s %-10s"
+#ifdef CONFIG_MK1OM
+				  " %-10s %-11s %-14s %-10s"
+#endif
+				"\n", "Channel", "DCAR", "DTPR", "DHPR",
+					"DRAR_HI", "DRAR_LO",
+#ifdef CONFIG_MK1OM
+					"DSTATWB_LO", "DSTATWB_HI", "DSTAT_CHERR", "DSTAT_CHERRMSK",
+#endif
+					"DSTAT");
+	seq_printf(m, "========================================"
+				"=======================================\n");
+
+#ifdef _MIC_SCIF_
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++) {
+#else
+	for (i = first_dma_chan(); i <= last_dma_chan(); i++) {
+#endif
+		curr_chan = &dma_ctx->dma_channels[i];
+		chan_num = curr_chan->ch_num;
+		seq_printf(m, "%-10i| %-#10x %-#10x %-#10x %-#10x"
+			" %-#10x"
+#ifdef CONFIG_MK1OM
+			" %-#10x %-#11x %-#10x %-#14x"
+#endif
+			" %-#10x\n", chan_num,
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCAR),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DTPR),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DHPR),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DRAR_HI),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DRAR_LO),
+#ifdef CONFIG_MK1OM
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DSTATWB_LO),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DSTATWB_HI),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCHERR),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCHERRMSK),
+#endif
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DSTAT));
+	}
+
+	seq_printf(m, "\nDMA Channel Descriptor Rings\n");
+	seq_printf(m, "========================================"
+				"=======================================\n");
+
+	for (i = first_dma_chan(); i <= last_dma_chan(); i++) {
+		curr_chan = &dma_ctx->dma_channels[i];
+		chan_num = curr_chan->ch_num;
+		dtpr = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DTPR);
+		seq_printf(m,  "Channel %i: [", chan_num);
+		size = ((int) md_mic_dma_read_mmio(dma_dev, chan_num, REG_DHPR)
+			- dtpr) % curr_chan->chan->num_desc_in_ring;
+		/*
+		 * In KNC B0, empty condition is tail = head -1
+		 */
+		if (mic_hw_family(dma_ctx->device_num) == FAMILY_KNC &&
+			mic_hw_stepping(dma_ctx->device_num) >= KNC_B0_STEP)
+			size -= 1;
+
+		for (j = 0; j < size; j++) {
+			desc = curr_chan->desc_ring[(j+dtpr) % 
+				curr_chan->chan->num_desc_in_ring];	
+
+			switch (desc.desc.nop.type){
+			case NOP:
+				seq_printf(m," {Type: NOP, 0x%#llx"
+					" %#llx} ",  desc.qwords.qw0,
+						   desc.qwords.qw1);
+			case MEMCOPY:
+				seq_printf(m," {Type: MEMCOPY, SAP:"
+					" 0x%#llx, DAP: %#llx, length: %#llx} ",
+					  (uint64_t) desc.desc.memcopy.sap,
+					  (uint64_t) desc.desc.memcopy.dap,
+					  (uint64_t) desc.desc.memcopy.length);
+				break;
+			case STATUS:
+				seq_printf(m," {Type: STATUS, data:"
+					" 0x%#llx, DAP: %#llx, intr: %lli} ",
+					(uint64_t) desc.desc.status.data,
+					(uint64_t) desc.desc.status.dap,
+					(uint64_t) desc.desc.status.intr);
+				break;
+			case GENERAL:
+				seq_printf(m," {Type: GENERAL, "
+					"DAP: %#llx, dword: %#llx} ",
+					(uint64_t) desc.desc.general.dap,
+					(uint64_t) desc.desc.general.data);
+				break;
+			case KEYNONCECNT:
+				seq_printf(m," {Type: KEYNONCECNT, sel: "
+					"%lli, h: %lli, index: %lli, cs: %lli,"
+					" value: %#llx} ",
+						(uint64_t) desc.desc.keynoncecnt.sel,
+						(uint64_t) desc.desc.keynoncecnt.h,
+						(uint64_t) desc.desc.keynoncecnt.index,
+						(uint64_t) desc.desc.keynoncecnt.cs,
+						(uint64_t) desc.desc.keynoncecnt.data);
+				break;
+			case KEY:
+				seq_printf(m," {Type: KEY, dest_ind"
+					   "ex: %lli, ski: %lli, skap: %#llx ",
+						(uint64_t) desc.desc.key.di,
+						(uint64_t) desc.desc.key.ski,
+						(uint64_t) desc.desc.key.skap);
+				break;
+			default:
+				seq_printf(m," {Uknown Type=%lli ,"
+				 "%#llx %#llx} ",(uint64_t)  desc.desc.nop.type,
+						(uint64_t) desc.qwords.qw0,
+						(uint64_t) desc.qwords.qw1);
+			}
+		}
+		seq_printf(m,  "]\n");
+		if (mic_hw_family(dma_ctx->device_num) == FAMILY_KNC &&
+		    mic_hw_stepping(dma_ctx->device_num) >= KNC_B0_STEP &&
+		    curr_chan->chan->dstat_wb_loc)
+			seq_printf(m, "DSTAT_WB = 0x%x\n",
+				*((uint32_t*)curr_chan->chan->dstat_wb_loc));
+	}
+	micpm_put_reference(mic_ctx);
+
+	return 0;
+}
+
+static int
+mic_dma_proc_reg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_dma_proc_reg_show, PDE_DATA(inode));
+}
+
+struct file_operations micdma_ring_fops = {
+	.open		= mic_dma_proc_ring_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+struct file_operations micdma_reg_fops = {
+	.open		= mic_dma_proc_reg_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+static void
+mic_dma_proc_init(struct mic_dma_ctx_t *dma_ctx)
+{
+	char name[64];
+
+	snprintf(name, 63, "%s%d", proc_dma_ring, dma_ctx->device_num);
+	if (!proc_create_data(name,  S_IFREG | S_IRUGO, NULL, &micdma_ring_fops, dma_ctx))
+		printk("micdma: unable to register /proc/%s\n", name);
+
+	snprintf(name, 63, "%s%d", proc_dma_reg, dma_ctx->device_num);
+	if (!proc_create_data(name, S_IFREG | S_IRUGO, NULL, &micdma_reg_fops, dma_ctx))
+		printk("micdma: unable to register /proc/%s\n", name);
+
+}
+#else // LINUX VERSION
+static int
+mic_dma_proc_read_fn(char *buf, char **start, off_t offset, int count, int *eof, void *data)
+{
+	struct mic_dma_ctx_t *dma_ctx = data;
+	int i, len = 0;
+	struct compl_buf_ring *ring;
+
+	len += sprintf(buf + len, "Intr rings\n");
+	len += sprintf(buf + len, "%-10s%-12s%-12s%-12s%-25s%-18s%-25s\n",
+		       "Chan", "Head", "Tail", "Size", "Tail loc", "Actual tail", "In Use");
+	for (i = first_dma_chan(); i <= last_dma_chan(); i++) {
+		ring = &dma_ctx->dma_channels[i].intr_ring.ring;
+		len += sprintf(buf + len, "%-#10x%-#12x%-#12x%-#12x%-#25llx%-#18x%-#18x\n",
+			i, ring->head, ring->tail, ring->size,
+			ring->tail_location, *(int*)ring->tail_location,
+			atomic_read(&dma_ctx->dma_channels[i].flags));
+	}
+	len += sprintf(buf + len, "Poll rings\n");
+	len += sprintf(buf + len, "%-10s%-12s%-12s%-12s%-25s%-18s\n",
+		       "Chan", "Head", "Tail", "Size", "Tail loc", "Actual tail");
+	for (i = first_dma_chan(); i <= last_dma_chan(); i++) {
+		ring = &dma_ctx->dma_channels[i].poll_ring;
+		len += sprintf(buf + len, "%-#10x%-#12x%-#12x%-#12x%-#25llx%-#18x\n",
+			       i, ring->head, ring->tail, ring->size,
+			       ring->tail_location, *(int*)ring->tail_location);
+	}
+	len += sprintf(buf + len, "Next_Write_Index\n");
+	len += sprintf(buf + len, "%-10s%-12s\n", "Chan", "Next_Write_Index");
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++) {
+		len += sprintf(buf + len, "%-#10x%-#12llx\n",
+			       i, dma_ctx->dma_channels[i].next_write_index);
+	}
+	return len;
+}
+
+static int
+mic_dma_proc_read_registers_fn(char *buf, char **start, off_t offset, int count,
+							   int *eof, void *data)
+{
+	int i, j, chan_num, size, dtpr, len = 0;
+	struct mic_dma_ctx_t *dma_ctx = data;
+	struct mic_dma_device *dma_dev = &dma_ctx->dma_dev;
+	struct dma_channel *curr_chan;
+	union md_mic_dma_desc desc;
+
+	len += sprintf(buf + len, "========================================"
+				"=======================================\n");
+	len += sprintf(buf + len, "SBOX_DCR: %#x\n",
+				mic_sbox_read_mmio(dma_dev->mm_sbox, SBOX_DCR));
+	len += sprintf(buf + len, "DMA Channel Registers\n");
+	len += sprintf(buf + len, "========================================"
+				"=======================================\n");
+	len += sprintf(buf + len, "%-10s| %-10s %-10s %-10s %-10s %-10s %-10s"
+#ifdef CONFIG_MK1OM
+				  " %-10s %-11s %-14s %-10s"
+#endif
+				"\n", "Channel", "DCAR", "DTPR", "DHPR",
+					"DRAR_HI", "DRAR_LO",
+#ifdef CONFIG_MK1OM
+					"DSTATWB_LO", "DSTATWB_HI", "DSTAT_CHERR", "DSTAT_CHERRMSK",
+#endif
+					"DSTAT");
+	len += sprintf(buf + len, "========================================"
+				"=======================================\n");
+
+#ifdef _MIC_SCIF_
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++) {
+#else
+	for (i = first_dma_chan(); i <= last_dma_chan(); i++) {
+#endif
+		curr_chan = &dma_ctx->dma_channels[i];
+		chan_num = curr_chan->ch_num;
+		len += sprintf(buf + len, "%-10i| %-#10x %-#10x %-#10x %-#10x"
+			" %-#10x"
+#ifdef CONFIG_MK1OM
+			" %-#10x %-#11x %-#10x %-#14x"
+#endif
+			" %-#10x\n", chan_num,
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCAR),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DTPR),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DHPR),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DRAR_HI),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DRAR_LO),
+#ifdef CONFIG_MK1OM
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DSTATWB_LO),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DSTATWB_HI),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCHERR),
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCHERRMSK),
+#endif
+			md_mic_dma_read_mmio(dma_dev, chan_num, REG_DSTAT));
+	}
+
+	len += sprintf(buf + len, "\nDMA Channel Descriptor Rings\n");
+	len += sprintf(buf + len, "========================================"
+				"=======================================\n");
+
+	for (i = first_dma_chan(); i <= last_dma_chan(); i++) {
+		curr_chan = &dma_ctx->dma_channels[i];
+		chan_num = curr_chan->ch_num;
+		dtpr = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DTPR);
+		len += sprintf(buf + len,  "Channel %i: [", chan_num);
+		size = ((int) md_mic_dma_read_mmio(dma_dev, chan_num, REG_DHPR)
+			- dtpr) % curr_chan->chan->num_desc_in_ring;
+		/*
+		 * In KNC B0, empty condition is tail = head -1
+		 */
+		if (mic_hw_family(dma_ctx->device_num) == FAMILY_KNC &&
+			mic_hw_stepping(dma_ctx->device_num) >= KNC_B0_STEP)
+			size -= 1;
+
+		for (j = 0; j < size; j++) {
+			desc = curr_chan->desc_ring[(j+dtpr) % 
+				curr_chan->chan->num_desc_in_ring];	
+
+			switch (desc.desc.nop.type){
+			case NOP:
+				len += sprintf(buf + len," {Type: NOP, 0x%#llx"
+					" %#llx} ",  desc.qwords.qw0,
+						   desc.qwords.qw1);
+			case MEMCOPY:
+				len += sprintf(buf + len," {Type: MEMCOPY, SAP:"
+					" 0x%#llx, DAP: %#llx, length: %#llx} ",
+					  (uint64_t) desc.desc.memcopy.sap,
+					  (uint64_t) desc.desc.memcopy.dap,
+					  (uint64_t) desc.desc.memcopy.length);
+				break;
+			case STATUS:
+				len += sprintf(buf + len," {Type: STATUS, data:"
+					" 0x%#llx, DAP: %#llx, intr: %lli} ",
+					(uint64_t) desc.desc.status.data,
+					(uint64_t) desc.desc.status.dap,
+					(uint64_t) desc.desc.status.intr);
+				break;
+			case GENERAL:
+				len += sprintf(buf + len," {Type: GENERAL, "
+					"DAP: %#llx, dword: %#llx} ",
+					(uint64_t) desc.desc.general.dap,
+					(uint64_t) desc.desc.general.data);
+				break;
+			case KEYNONCECNT:
+				len += sprintf(buf + len," {Type: KEYNONCECNT, sel: "
+					"%lli, h: %lli, index: %lli, cs: %lli,"
+					" value: %#llx} ",
+						(uint64_t) desc.desc.keynoncecnt.sel,
+						(uint64_t) desc.desc.keynoncecnt.h,
+						(uint64_t) desc.desc.keynoncecnt.index,
+						(uint64_t) desc.desc.keynoncecnt.cs,
+						(uint64_t) desc.desc.keynoncecnt.data);
+				break;
+			case KEY:
+				len += sprintf(buf + len," {Type: KEY, dest_ind"
+					   "ex: %lli, ski: %lli, skap: %#llx ",
+						(uint64_t) desc.desc.key.di,
+						(uint64_t) desc.desc.key.ski,
+						(uint64_t) desc.desc.key.skap);
+				break;
+			default:
+				len += sprintf(buf + len," {Uknown Type=%lli ,"
+				 "%#llx %#llx} ",(uint64_t)  desc.desc.nop.type,
+						(uint64_t) desc.qwords.qw0,
+						(uint64_t) desc.qwords.qw1);
+			}
+		}
+		len += sprintf(buf + len,  "]\n");
+		if (mic_hw_family(dma_ctx->device_num) == FAMILY_KNC &&
+		    mic_hw_stepping(dma_ctx->device_num) >= KNC_B0_STEP &&
+		    curr_chan->chan->dstat_wb_loc)
+			len += sprintf(buf + len, "DSTAT_WB = 0x%x\n",
+				*((uint32_t*)curr_chan->chan->dstat_wb_loc));
+	}
+	return len;
+}
+
+static void
+mic_dma_proc_init(struct mic_dma_ctx_t *dma_ctx)
+{
+	struct proc_dir_entry *dma_proc;
+	char name[64];
+
+	snprintf(name, 63, "%s%d", proc_dma_ring, dma_ctx->device_num);
+	if ((dma_proc = create_proc_entry(name,  S_IFREG | S_IRUGO, NULL)) != NULL) {
+		dma_proc->read_proc = mic_dma_proc_read_fn;
+		dma_proc->data      = dma_ctx;
+	}
+	snprintf(name, 63, "%s%d", proc_dma_reg, dma_ctx->device_num);
+	if ((dma_proc = create_proc_entry(name, S_IFREG | S_IRUGO, NULL)) != NULL) {
+		dma_proc->read_proc = mic_dma_proc_read_registers_fn;
+		dma_proc->data      = dma_ctx;
+	}
+
+}
+#endif // LINUX VERSION
+
+static void
+mic_dma_proc_uninit(struct mic_dma_ctx_t *dma_ctx)
+{
+	char name[64];
+
+	snprintf(name, 63, "%s%d", proc_dma_reg, dma_ctx->device_num);
+	remove_proc_entry(name, NULL);
+	snprintf(name, 63, "%s%d", proc_dma_ring, dma_ctx->device_num);
+	remove_proc_entry(name, NULL);
+}
diff --git a/dma/mic_dma_md.c b/dma/mic_dma_md.c
new file mode 100644
index 0000000..705c504
--- /dev/null
+++ b/dma/mic_dma_md.c
@@ -0,0 +1,522 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include<linux/module.h>
+#include<linux/slab.h>
+#include<asm/io.h>
+#include<linux/kernel.h>
+
+#include <mic/micscif_smpt.h>
+#include <mic/mic_dma_md.h>
+#include <mic/mic_dma_api.h>
+
+#define PR_PREFIX "DMA_LIB_MD:"
+
+#ifdef CONFIG_ML1OM
+#define MIC_DMA_AES_CHAN_NUM	7
+#define is_AES_channel(n) ((n) == MIC_DMA_AES_CHAN_NUM)
+#else
+#define is_AES_channel(n) ((void)(n), 0)
+#endif
+
+#define DMA_CHAN_COOKIE		0xdeadc0d
+
+#define SBOX_DCAR_IM0		(0x1 << 24)	// APIC Interrupt mask bit
+#define SBOX_DCAR_IM1		(0x1 << 25)	// MSI-X Interrupt mask bit
+#define SBOX_DCAR_IS0		(0x1 << 26)	// Interrupt status
+
+#define SBOX_DRARHI_SYS_MASK	(0x1 << 26)
+
+#ifdef _MIC_SCIF_
+static inline uint32_t chan_to_dcr_mask(uint32_t dcr, struct md_mic_dma_chan *chan, struct mic_dma_device *dma_dev)
+{
+	uint32_t chan_num = chan->ch_num;
+	uint32_t owner;
+
+	if (!is_AES_channel(chan_num))
+		owner = chan->owner;
+	else
+		owner = chan->endianness;
+
+	return ((dcr & ~(0x1 << (chan_num * 2))) | (owner << (chan_num * 2)));
+}
+#endif
+
+static inline uint32_t drar_hi_to_ba_bits(uint32_t drar_hi)
+{
+	/*
+	 * Setting bits 3:2 should generate a DESC_ADDR_ERR but the hardware ignores
+	 * these bits currently and doesn't generate the error.
+	 */
+#ifdef _MIC_SCIF_
+	return drar_hi & 0xf;
+#else
+	return drar_hi & 0x3;
+#endif
+}
+
+static inline uint32_t physaddr_to_drarhi_ba(phys_addr_t phys_addr)
+{
+	return drar_hi_to_ba_bits((uint32_t)(phys_addr >> 32));
+}
+
+static inline uint32_t size_to_drar_hi_size(uint32_t size)
+{
+	return (size & 0x1ffff) << 4;
+}
+
+static inline uint32_t addr_to_drar_hi_smpt_bits(phys_addr_t mic_phys_addr)
+{
+	return ((mic_phys_addr >> MIC_SYSTEM_PAGE_SHIFT) & 0x1f) << 21;
+}
+
+static inline uint32_t drar_hi_to_smpt(uint32_t drar_hi, uint32_t chan_num)
+{
+	return ((drar_hi >> 21) & 0x1f);
+}
+
+void md_mic_dma_enable_chan(struct mic_dma_device *dma_dev, uint32_t chan_num, bool enable);
+
+
+#ifdef _MIC_SCIF_
+/**
+ * md_mic_dma_chan_init_attr - Set channel attributes like owner and endianness
+ * @chan: The DMA channel handle
+ */
+void md_mic_dma_chan_init_attr(struct mic_dma_device *dma_dev,
+				      struct md_mic_dma_chan *chan)
+{
+	uint32_t dcr;
+
+	CHECK_CHAN(chan);
+
+	dcr = mic_sbox_read_mmio(dma_dev->mm_sbox, SBOX_DCR);
+	dcr = chan_to_dcr_mask(dcr, chan, dma_dev);
+	mic_sbox_write_mmio(dma_dev->mm_sbox, SBOX_DCR, dcr);
+}
+#endif
+
+/* One time DMA Init API */
+void md_mic_dma_init(struct mic_dma_device *dma_dev, uint8_t *mmio_va_base)
+{
+	int i;
+#ifdef _MIC_SCIF_
+	dma_dev->mm_sbox = mic_sbox_md_init();
+#else
+	dma_dev->mm_sbox = mmio_va_base;
+#endif
+	//pr_debug("sbox: va=%p\n", dma_dev.mm_sbox);
+
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++) {
+		atomic_set(&(dma_dev->chan_info[i].in_use), CHAN_AVAILABLE);
+		dma_dev->chan_info[i].cookie = DMA_CHAN_COOKIE;
+		dma_dev->chan_info[i].dstat_wb_phys = 0;
+		dma_dev->chan_info[i].dstat_wb_loc = NULL;
+	}
+	return;
+}
+
+/* One time DMA Uninit API */
+void md_mic_dma_uninit(struct mic_dma_device *dma_dev)
+{
+	return;
+}
+
+/**
+ * md_mic_dma_request_chan
+ * @owner: DMA channel owner: MIC or Host
+ *
+ * Return - The DMA channel handle or NULL if failed
+ *
+ * Note: Allocating a Host owned channel is not allowed currently
+ */
+struct md_mic_dma_chan *md_mic_dma_request_chan(struct mic_dma_device *dma_dev,
+						enum md_mic_dma_chan_owner owner)
+{
+	struct md_mic_dma_chan *tmp = NULL;
+	int i;
+
+	for (i = 0; i < MAX_NUM_DMA_CHAN; i++) {
+		if (CHAN_AVAILABLE == atomic_cmpxchg(&(dma_dev->chan_info[i].in_use), 
+							   CHAN_AVAILABLE, CHAN_INUSE)) {
+			tmp = &dma_dev->chan_info[i];
+			tmp->owner = owner;
+			tmp->ch_num = i;
+			/*
+			 * Setting endianness by default to MIC_LITTLE_ENDIAN
+			 * in case the AES channel is used for clear transfers
+			 * This is a don't care for clear transfers.
+			 */
+			tmp->endianness = MIC_LITTLE_ENDIAN;
+#ifdef _MIC_SCIF_
+			md_mic_dma_chan_init_attr(dma_dev, tmp);
+#endif
+			break;
+		}
+	}
+	return tmp;
+}
+
+/**
+ * md_mic_dma_free_chan - Frees up a DMA channel
+ * @chan: The DMA channel handle
+ */
+void md_mic_dma_free_chan(struct mic_dma_device *dma_dev,
+			  struct md_mic_dma_chan *chan)
+{
+	CHECK_CHAN(chan);
+	atomic_set(&(chan->in_use), CHAN_AVAILABLE);
+	md_mic_dma_enable_chan(dma_dev, chan->ch_num, false);
+}
+
+/**
+ * md_mic_dma_enable_chan - Enable/disable the DMA channel
+ * @chan_num: The DMA channel
+ * @enable: enable/disable
+ *
+ * Must set desc ring and update head pointer only
+ * after disabling the channel
+ */
+void md_mic_dma_enable_chan(struct mic_dma_device *dma_dev,
+			    uint32_t chan_num, bool enable)
+{
+	uint32_t dcr = mic_sbox_read_mmio(dma_dev->mm_sbox, SBOX_DCR);
+
+	/*
+	 * There is a separate bit for every channel.
+	 * Look up sboxDcrReg.
+	 */
+	if (enable) {
+		dcr |= 2 << (chan_num << 1);
+	} else {
+		dcr &= ~(2 << (chan_num << 1));
+	}
+	mic_sbox_write_mmio(dma_dev->mm_sbox, SBOX_DCR, dcr);
+}
+
+#if 0
+uint32_t md_mic_dma_chan_read_completion_count(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan)
+{
+	CHECK_CHAN(chan);
+
+	return (md_mic_dma_read_mmio(dma_dev, chan->ch_num, REG_DSTAT) & 0xffff);
+}
+
+
+/* This function needs to be used only in error case */
+void update_compcount_and_tail(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan)
+{
+	chan->completion_count = md_mic_dma_chan_read_completion_count(dma_dev, chan);
+	chan->cached_tail = md_mic_dma_chan_read_tail(dma_dev, chan);
+}
+#endif
+void md_mic_dma_chan_set_dstat_wb(struct mic_dma_device *dma_dev,
+					struct md_mic_dma_chan *chan)
+{
+	uint32_t dstat_wb, dstat_wb_hi;
+	CHECK_CHAN(chan);
+
+	dstat_wb = (uint32_t)chan->dstat_wb_phys;
+	dstat_wb_hi = chan->dstat_wb_phys >> 32;
+	md_mic_dma_write_mmio(dma_dev, chan->ch_num, REG_DSTATWB_LO, dstat_wb);
+	md_mic_dma_write_mmio(dma_dev, chan->ch_num, REG_DSTATWB_HI, dstat_wb_hi);
+}
+
+void md_mic_dma_chan_set_dcherr_msk(struct mic_dma_device *dma_dev,
+				struct md_mic_dma_chan *chan, uint32_t mask)
+{
+	CHECK_CHAN(chan);
+	md_mic_dma_write_mmio(dma_dev, chan->ch_num, REG_DCHERRMSK, mask);
+}
+#if 0
+uint32_t md_mic_dma_chan_get_dcherr_msk(struct mic_dma_device *dma_dev,
+				struct md_mic_dma_chan *chan)
+{
+	CHECK_CHAN(chan);
+	return md_mic_dma_read_mmio(dma_dev, chan->ch_num, REG_DCHERRMSK);
+}
+
+uint32_t md_mic_dma_chan_get_dcherr(struct mic_dma_device *dma_dev,
+				struct md_mic_dma_chan *chan)
+{
+	CHECK_CHAN(chan);
+	return md_mic_dma_read_mmio(dma_dev, chan->ch_num, REG_DCHERR);
+}
+
+void md_mic_dma_chan_set_dcherr(struct mic_dma_device *dma_dev,
+				struct md_mic_dma_chan *chan, uint32_t value)
+{
+	CHECK_CHAN(chan);
+	md_mic_dma_write_mmio(dma_dev, chan->ch_num, REG_DCHERR, value);
+	printk("dcherr = %d\n", md_mic_dma_read_mmio(dma_dev, chan->ch_num, REG_DCHERR));
+}
+#endif
+
+/**
+ * md_mic_dma_chan_set_desc_ring - Configures the DMA channel desc ring
+ * @chan: The DMA channel handle
+ * @desc_ring_phys_addr: Physical address of the desc ring base. Needs to be
+ *                       physically contiguous and wired down memory.
+ * @num_desc: Number of descriptors must be a multiple of cache line size.
+ * Descriptor size should be determined using sizeof(union md_mic_dma_desc).
+ *            The maximum number of descriptors is defined by
+ *            MIC_MAX_NUM_DESC_PER_RING.
+ */
+void md_mic_dma_chan_set_desc_ring(struct mic_dma_device *dma_dev,
+				   struct md_mic_dma_chan *chan,
+				   phys_addr_t desc_ring_phys_addr,
+				   uint32_t num_desc)
+{
+	uint32_t chan_num;
+	uint32_t drar_lo = 0;
+	uint32_t drar_hi = 0;
+
+	CHECK_CHAN(chan);
+	chan_num = chan->ch_num;
+	/*
+	 * TODO: Maybe the 2nd condition should be different considering the
+	 * size of union md_mic_dma_desc?
+	 */
+	KASSERT((((num_desc) <= MIC_MAX_NUM_DESC_PER_RING) &&
+		(ALIGN((num_desc - (L1_CACHE_BYTES - 1)), L1_CACHE_BYTES) == num_desc)),
+		"num_desc > max or not multiple of cache line num 0x%x", num_desc);
+
+	md_mic_dma_enable_chan(dma_dev, chan_num, false);
+
+	drar_hi = size_to_drar_hi_size(num_desc);
+
+	if (MIC_DMA_CHAN_HOST_OWNED == chan->owner) {
+		drar_hi |= SBOX_DRARHI_SYS_MASK;
+		drar_hi |= addr_to_drar_hi_smpt_bits(desc_ring_phys_addr);
+	}
+	drar_lo = (uint32_t)desc_ring_phys_addr;
+	drar_hi |= physaddr_to_drarhi_ba(desc_ring_phys_addr);
+	md_mic_dma_write_mmio(dma_dev, chan_num, REG_DRAR_LO, drar_lo);
+	md_mic_dma_write_mmio(dma_dev, chan_num, REG_DRAR_HI, drar_hi);
+	chan->num_desc_in_ring = num_desc;
+	pr_debug("md_mic_dma_chan_set_desc_ring addr=0x%llx num=%d drar_hi.bits.pageno 0x%x\n", 
+			desc_ring_phys_addr, num_desc, 
+			(uint32_t)(desc_ring_phys_addr >> MIC_SYSTEM_PAGE_SHIFT));
+	chan->cached_tail = md_mic_dma_chan_read_tail(dma_dev, chan);
+
+	md_mic_dma_enable_chan(dma_dev, chan_num, true);
+}
+
+uint32_t md_mic_dma_chan_read_head(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan)
+{
+	CHECK_CHAN(chan);
+
+	return md_mic_dma_read_mmio(dma_dev, chan->ch_num, REG_DHPR);
+}
+
+uint32_t md_mic_dma_chan_read_tail(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan)
+{
+	CHECK_CHAN(chan);
+
+	return md_mic_dma_read_mmio(dma_dev, chan->ch_num, REG_DTPR);
+}
+
+/**
+ * md_mic_dma_chan_intr_pending - Reads interrupt status to figure out
+ *                              if an interrupt is pending.
+ * @chan: The DMA channel handle.
+ */
+bool md_mic_dma_chan_intr_pending(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan)
+{
+	uint32_t dcar;
+	CHECK_CHAN(chan);
+
+	dcar = md_mic_dma_read_mmio(dma_dev, chan->ch_num, REG_DCAR);
+	return (dcar >> 26) & 0x1;
+}
+
+/**
+ * md_mic_dma_chan_mask_intr - Mask or disable interrupts
+ * @chan: The DMA channel handle
+ *
+ * Masking interrupts will also acknowledge any pending
+ * interrupts on the channel.
+ */
+void md_mic_dma_chan_mask_intr(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan)
+{
+	uint32_t dcar;
+	uint32_t chan_num;
+	CHECK_CHAN(chan);
+	chan_num = chan->ch_num;
+
+	dcar = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCAR);
+
+	if (MIC_DMA_CHAN_MIC_OWNED == chan->owner)
+		dcar |= SBOX_DCAR_IM0;
+	else
+		dcar |= SBOX_DCAR_IM1;
+
+	md_mic_dma_write_mmio(dma_dev, chan_num, REG_DCAR, dcar);
+	/*
+	 * This read is completed only after previous write is completed.
+	 * It guarantees that, interrupts has been acknowledged to SBOX DMA
+	 * This read forces previous write to be commited in memory.
+	 * This is the actual fix for HSD# 3497216 based on theoretical
+	 * hypothesis that somehow previous write is not truly completed
+	 * since for writes as long as transactions are accepted by SBOX
+	 * ( not necessarily commited in memory) those write transactions
+	 * reported as complete.
+	 */
+	dcar = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCAR);
+}
+
+/**
+ * md_mic_dma_chan_unmask_intr - Unmask or enable interrupts
+ * @chan: The DMA channel handle
+ */
+void md_mic_dma_chan_unmask_intr(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan)
+{
+	uint32_t dcar;
+	uint32_t chan_num;
+	CHECK_CHAN(chan);
+	chan_num = chan->ch_num;
+
+	dcar = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCAR);
+
+	if (MIC_DMA_CHAN_MIC_OWNED == chan->owner)
+		dcar &= ~SBOX_DCAR_IM0;
+	else
+		dcar &= ~SBOX_DCAR_IM1;
+
+	md_mic_dma_write_mmio(dma_dev, chan_num, REG_DCAR, dcar);
+	/*
+	 * This read is completed only after previous write is completed.
+	 * It guarantees that, interrupts has been acknowledged to SBOX DMA
+	 * This read forces previous write to be commited in memory.
+	 * This is the actual fix for HSD# 3497216 based on theoretical
+	 * hypothesis that somehow previous write is not truly completed
+	 * since for writes as long as transactions are accepted by SBOX
+	 * ( not necessarily commited in memory) those write transactions
+	 * reported as complete.
+	 */
+	dcar = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCAR);
+}
+
+/**
+ * md_mic_dma_chan_get_desc_ring_phys - Compute the value of the descriptor ring
+ * base physical address from the descriptor ring attributes register.
+ * @dma_dev: DMA device.
+ * @chan: The DMA channel handle
+ */
+phys_addr_t
+md_mic_dma_chan_get_desc_ring_phys(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan)
+{
+	phys_addr_t phys, phys_hi;
+	uint32_t phys_lo, chan_num, drar_hi;
+
+	CHECK_CHAN(chan);
+	chan_num = chan->ch_num;
+	phys_lo = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DRAR_LO);
+	drar_hi = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DRAR_HI);
+	phys_hi = drar_hi_to_ba_bits(drar_hi);
+	phys_hi |= drar_hi_to_smpt(drar_hi, chan_num) << 2;
+
+	phys = phys_lo | (phys_hi << 32);
+	return phys;
+}
+
+/**
+ * md_mic_dma_chan_get_dstatwb_phys - Compute the value of the DSTAT write back
+ * physical address.
+ * @dma_dev: DMA device.
+ * @chan: The DMA channel handle
+ */
+phys_addr_t md_mic_dma_chan_get_dstatwb_phys(struct mic_dma_device *dma_dev,
+			struct md_mic_dma_chan *chan)
+{
+	uint32_t reg, chan_num;
+	phys_addr_t phys;
+
+	CHECK_CHAN(chan);
+	chan_num = chan->ch_num;
+	reg = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DSTATWB_HI);
+	phys = reg;
+	reg = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DSTATWB_LO);
+
+	phys = phys << 32 | reg;
+	return phys;
+}
+
+/**
+ * md_mic_dma_prep_nop_desc - Prepares a NOP descriptor.
+ * @desc: Descriptor to be populated.
+ *
+ * This descriptor is used to pad a cacheline if the previous
+ * descriptor does not end on a cacheline boundary.
+ */
+void md_mic_dma_prep_nop_desc(union md_mic_dma_desc *desc)
+{
+	KASSERT((desc != 0), ("NULL desc"));
+
+	desc->qwords.qw0 = 0;
+	desc->qwords.qw1 = 0;
+	desc->desc.nop.type = 0;
+}
+
+/* Only Debug Code Below */
+
+/**
+ * md_mic_dma_print_debug - Print channel debug information
+ * @chan: The DMA channel handle
+ * @sbuf: Print to an sbuf if not NULL else prints to console
+ */
+void md_mic_dma_print_debug(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan)
+{
+	uint32_t dcr;
+	uint32_t dcar;
+	uint32_t dtpr;
+	uint32_t dhpr;
+	uint32_t drar_lo;
+	uint32_t drar_hi;
+	uint32_t dstat;
+	uint32_t chan_num = chan->ch_num;
+
+	dcr  = mic_sbox_read_mmio(dma_dev->mm_sbox, SBOX_DCR);
+	dcar = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DCAR);
+	dtpr = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DTPR);
+	dhpr = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DHPR);
+	drar_lo = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DRAR_LO);
+	drar_hi = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DRAR_HI);
+	dstat = md_mic_dma_read_mmio(dma_dev, chan_num, REG_DSTAT);
+	pr_debug(PR_PREFIX "Chan_Num 0x%x DCR 0x%x DCAR 0x%x DTPR 0x%x" 
+			      "DHPR 0x%x DRAR_HI 0x%x DRAR_LO 0x%x DSTAT 0x%x\n", 
+			      chan_num, dcr, dcar, dtpr, dhpr, drar_hi, drar_lo, dstat);
+	pr_debug(PR_PREFIX "DCR 0x%x\n", dcr);
+}
diff --git a/dma/mic_sbox_md.c b/dma/mic_sbox_md.c
new file mode 100644
index 0000000..98118c2
--- /dev/null
+++ b/dma/mic_sbox_md.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include<linux/module.h>
+#include<linux/init.h>
+#include<asm/io.h>
+
+#include <mic/mic_sbox_md.h>
+#include <mic/micsboxdefine.h>
+
+#define PR_PREFIX "SBOX:"
+
+extern  void *mic_sbox_mmio_va;
+
+void *mic_sbox_md_init(void)
+{
+	return mic_sbox_mmio_va;
+}
+
+void mic_sbox_md_uninit(void *mic_sbox_mmio_va)
+{
+	iounmap(mic_sbox_mmio_va);
+	pr_debug(PR_PREFIX "Uninitialized sbox md\n");
+}
+
diff --git a/host/Makefile b/host/Makefile
new file mode 100644
index 0000000..52e6745
--- /dev/null
+++ b/host/Makefile
@@ -0,0 +1,47 @@
+#
+#  Manycore Throughput Linux Driver
+#  Copyright (c) 2010, Intel Corporation.
+#
+#  This program is free software; you can redistribute it and/or modify it
+#  under the terms and conditions of the GNU General Public License,
+#  version 2, as published by the Free Software Foundation.
+#
+#  This program is distributed in the hope it will be useful, but WITHOUT
+#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+#  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+#  more details.
+#
+#  You should have received a copy of the GNU General Public License along with
+#  this program; if not, write to the Free Software Foundation, Inc.,
+#  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#
+
+KERNELDIR = /lib/modules/$(shell uname -r)/build
+KBUILD := $(MAKE) -C $(KERNELDIR) M=$(CURDIR)
+EXTRADIR = $(shell readlink -f $(KERNELDIR))
+
+ifneq ($(DESTDIR),)
+INSTALL_MOD_PATH = $(DESTDIR)
+endif
+
+.PHONY: default modules install modules_install clean
+
+default: modules
+install: modules_install udev
+
+modules:
+	+$(KBUILD) $@
+
+modules_install:
+	+$(KBUILD) INSTALL_MOD_PATH=$(DESTDIR) modules_install
+	mkdir -p $(DESTDIR)$(EXTRADIR)/include
+	install -m644 include/scif.h $(DESTDIR)$(EXTRADIR)/include
+	install -m644 Module.symvers $(DESTDIR)$(EXTRADIR)/Module.symvers.mic
+
+udev: udev-scif.rules
+	mkdir -p $(DESTDIR)/etc/udev/rules.d
+	cp $< $(DESTDIR)/etc/udev/rules.d/50-$<
+
+clean:
+	+$(KBUILD) clean
diff --git a/host/acptboot.c b/host/acptboot.c
new file mode 100644
index 0000000..be56f8d
--- /dev/null
+++ b/host/acptboot.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <micint.h>
+
+#include <scif.h>
+#include <mic_common.h>
+
+#define ACPT_BACKLOG 120
+#define ACPT_POLL_MS 2000
+
+#define ACPT_BOOTED	  1
+#define ACPT_BOOT_ACK	  2
+#define ACPT_NACK_VERSION 3
+#define ACPT_REQUEST_TIME 4
+#define ACPT_TIME_DATA	  5
+
+#define ACPT_VERSION	1
+
+static acptboot_data_t *acptboot_data;
+
+
+void acptboot_getconn(struct work_struct *work)
+{
+	mic_ctx_t *node_ctx;
+	struct scif_portID data;
+	scif_epd_t conn_epd;
+	struct timespec tod;
+	int proto;
+	int version;
+	int err;
+
+	if ((err = scif_accept(acptboot_data->listen_epd, &data, &conn_epd,
+						SCIF_ACCEPT_SYNC))) {
+		pr_debug("ACPTBOOT: scif_accept_failed %d\n", err);
+		return;
+
+		//goto requeue_accept;
+	}
+
+	if (!data.node) {
+		printk(KERN_ERR "ACPTBOOT: connect received from invalid dev %d\n", 
+								-EINVAL);
+		goto close_epd;
+	}
+
+	if ((err = scif_recv(conn_epd, &version, sizeof(version), SCIF_RECV_BLOCK)) != sizeof(version)) {
+		printk(KERN_ERR "ACPTBOOT: failed to recieve version number err %d\n", err);
+		goto close_epd;
+	}
+
+	if ((err = scif_recv(conn_epd, &proto, sizeof(proto), SCIF_RECV_BLOCK)) != sizeof(proto)) {
+		printk(KERN_ERR "ACPTBOOT: failed to recieve proto id %d\n", err);
+		goto close_epd;
+	}
+
+	switch (proto) {
+	case ACPT_BOOTED:
+		node_ctx = get_per_dev_ctx(data.node - 1);
+		mic_setstate(node_ctx, MIC_ONLINE);
+		node_ctx->boot_count++;
+
+		proto = ACPT_BOOT_ACK;
+		scif_send(conn_epd, &proto, sizeof(proto), SCIF_SEND_BLOCK);
+		break;
+
+	case ACPT_REQUEST_TIME:
+		getnstimeofday(&tod);
+		proto = ACPT_TIME_DATA;
+		scif_send(conn_epd, &proto, sizeof(proto), SCIF_SEND_BLOCK);
+		scif_send(conn_epd, &tod, sizeof(tod), SCIF_SEND_BLOCK);
+		break;
+	}
+
+close_epd:
+	if ((err = scif_close(conn_epd)))
+		printk(KERN_ERR "ACPTBOOT: scif_close failed %d\n", err);
+
+//requeue_accept:
+	queue_work(acptboot_data->acptbootwq, &acptboot_data->acptbootwork);
+}
+
+void acptboot_exit(void)
+{
+	int err = 0;
+	if (acptboot_data) {
+		if (acptboot_data->listen_epd)
+			if ((err = scif_close(acptboot_data->listen_epd)) < 0)
+				pr_debug("scif_close failed %d\n", err);
+		destroy_workqueue(acptboot_data->acptbootwq);
+
+		kfree(acptboot_data);
+	}
+}
+
+int
+acptboot_init(void)
+{
+	int err, ret;
+
+	acptboot_data = (acptboot_data_t *)kzalloc(sizeof(*acptboot_data), GFP_KERNEL);
+
+	if (!acptboot_data) {
+		printk(KERN_ERR "ACPTBOOT: memory allocation failure\n");
+		return -ENOMEM;
+	}
+
+	acptboot_data->listen_epd = scif_open();
+
+	if (!acptboot_data->listen_epd) {
+		printk(KERN_ERR "ACPTBOOT: scif_open() failed!\n");
+		err = -ENOMEM;
+		goto error;
+	}
+
+	err = scif_bind(acptboot_data->listen_epd, MIC_NOTIFY);
+	if (err < 0) {
+		pr_debug("ACPTBOOT: scif_bind() failed! %d\n", err);
+		goto error;
+	}
+
+	acptboot_data->acptboot_pn = err;
+
+	err = scif_listen(acptboot_data->listen_epd, ACPT_BACKLOG);
+	if (err < 0) {
+		pr_debug("scif_listen() failed! %d\n", err);
+		goto error;
+
+	}
+
+	pr_debug("ACPT endpoint listening port %d\n", 
+						acptboot_data->acptboot_pn);
+
+	// Create workqueue
+	acptboot_data->acptbootwq = __mic_create_singlethread_workqueue(
+							"ACPTBOOT_WQ");
+
+	if (!acptboot_data->acptbootwq) {
+		printk(KERN_ERR "%s %d wq creation failed!\n", __func__, __LINE__);
+		goto error;
+	}
+
+	INIT_WORK(&acptboot_data->acptbootwork, acptboot_getconn);
+	queue_work(acptboot_data->acptbootwq, 
+					&acptboot_data->acptbootwork);
+	return 0;
+
+error:
+
+	if (acptboot_data->listen_epd)
+		if ((ret = scif_close(acptboot_data->listen_epd)) < 0)
+			pr_debug("ACPTBOOT: scif_close() failed! %d\n", ret);
+
+	kfree(acptboot_data);
+
+	return err;
+}
+
diff --git a/host/ioctl.c b/host/ioctl.c
new file mode 100644
index 0000000..f4a8296
--- /dev/null
+++ b/host/ioctl.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* contains code to handle MIC IO control codes */
+
+#include "mic_common.h"
+
+static int do_send_flash_cmd(mic_ctx_t *mic_ctx, struct ctrlioctl_flashcmd *args);
+static int get_card_mem(mic_ctx_t *mic_ctx, struct ctrlioctl_cardmemcpy *args);
+
+/*
+ DESCRIPTION:: Gets the opcode from the input buffer and call appropriate method
+ PARAMETERS::
+   [in]mic_ctx_t *mic_ctx - pointer to the mic private context
+   [in]void *in_buffer - input buffer containing opcode + ioctl arguments,
+ RETURN_VALUE:: 0 if successful, non-zero if failure
+*/
+int
+adapter_do_ioctl(uint32_t cmd, uint64_t arg)
+{
+	int status = 0;
+	mic_ctx_t *mic_ctx = NULL;
+
+	void __user *argp = (void __user *)arg;
+	switch (cmd) {
+
+	case IOCTL_FLASHCMD:
+	{
+		struct ctrlioctl_flashcmd args = {0};
+
+		if (copy_from_user(&args, argp, sizeof(struct ctrlioctl_flashcmd))) {
+			return -EFAULT;
+		}
+
+		if (args.brdnum >= (uint32_t)mic_data.dd_numdevs) {
+			printk(KERN_ERR "IOCTL error: given board num is invalid\n");
+			return -EINVAL;
+		}
+
+		mic_ctx = get_per_dev_ctx(args.brdnum);
+		if (!mic_ctx) {
+			printk(KERN_ERR "IOCTL error: null mic context\n");
+			return -ENODEV;
+		}
+
+		/* Make sure we are running in flash mode */
+		if (mic_ctx->mode != MODE_FLASH || mic_ctx->state != MIC_ONLINE) {
+			printk(KERN_ERR "%s Card is not online in flash mode or online state\n", __func__);
+			return -EPERM;
+		}
+
+		if (mic_ctx->bi_family != FAMILY_KNC) {
+			printk(KERN_ERR "%s IOCTL_FLASHCMD not supported for non KNC family cards\n", __func__);
+			return -EPERM;
+		}
+
+		status = do_send_flash_cmd(mic_ctx, &args);
+		if (status) {
+			printk(KERN_ERR "IOCTL error: failed to complete IOCTL for bdnum %d\n", args.brdnum);
+			return status;
+		}
+
+		if (copy_to_user(argp, &args, sizeof(struct ctrlioctl_flashcmd))) {
+			return -EFAULT;
+		}
+
+		break;
+	}
+
+	case IOCTL_CARDMEMCPY:
+	{
+		struct ctrlioctl_cardmemcpy args = {0};
+
+		if (copy_from_user(&args, argp, sizeof(struct ctrlioctl_cardmemcpy))) {
+			return -EFAULT;
+		}
+
+		if (args.brdnum >= (uint32_t)mic_data.dd_numdevs) {
+			printk(KERN_ERR "IOCTL error: given board num is invalid\n");
+			return -EINVAL;
+		}
+		mic_ctx = get_per_dev_ctx(args.brdnum);
+		if (!mic_ctx) {
+			printk(KERN_ERR "IOCTL error: null mic context\n");
+			return -ENODEV;
+		}
+
+		if(mic_ctx->state != MIC_ONLINE || mic_ctx->mode != MODE_LINUX) {
+			status = -EPERM;
+			printk("Error ! Card not in linux mode or online state!\n");
+			return status;
+		}
+
+		status = get_card_mem(mic_ctx, &args);
+		if (status) {
+			printk(KERN_ERR "IOCTL error: failed to complete IOCTL for bdnum %d\n", args.brdnum);
+			return status;
+		}
+
+		;
+		break;
+	}
+
+	default:
+		printk("Invalid IOCTL\n");
+		status = -EINVAL;
+		break;
+	}
+
+	return status;
+}
+
+int
+do_send_flash_cmd(mic_ctx_t *mic_ctx, struct ctrlioctl_flashcmd *args)
+{
+	int status = 0;
+
+	if(!capable(CAP_SYS_ADMIN)) {
+		printk(KERN_ERR "Cannot execute unless sysadmin\n");
+		return -EACCES;
+	}
+
+	pr_debug("%s\n IN:: brdnum = %d, type = %x, data = %p, len = %x\n", 
+			__func__, args->brdnum, args->type, args->data, args->len);
+
+	status = send_flash_cmd(mic_ctx, args->type, args->data, args->len);
+
+	return status;
+}
+
+
+int
+get_card_mem(mic_ctx_t *mic_ctx, struct ctrlioctl_cardmemcpy *args)
+{
+	int32_t status = 0;
+
+	if(!capable(CAP_SYS_ADMIN)) {
+		printk(KERN_ERR "Cannot execute unless sysadmin\n");
+		return -EACCES;
+	}
+
+	if (args->dest == NULL) {
+		status = EINVAL;
+		goto exit;
+	}
+	pr_debug("%s\n IN:: brdnum = %d, start = %qx, size = %qx, dest = %p\n", 
+			__func__, args->brdnum, args->start, args->size, args->dest);
+
+	status = get_cardside_mem(mic_ctx, args->start, args->size, args->dest);
+
+exit:
+	return status;
+
+}
diff --git a/host/linpm.c b/host/linpm.c
new file mode 100644
index 0000000..43d2e9a
--- /dev/null
+++ b/host/linpm.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+#include "micint.h"
+#include "mic/micveth.h"
+
+/*
+ * Retrieves the device context for a particular device
+ */
+mic_ctx_t *
+get_device_context(struct pci_dev *dev) {
+	int i = 0;
+	mic_ctx_t *mic_ctx = NULL;
+	for (i = (mic_data.dd_numdevs -1); i >= 0; i--) {
+		mic_ctx = &mic_data.dd_bi[i]->bi_ctx;
+		if (mic_ctx!= NULL) {
+			//TODO: Is bus number enough to uniquely identify a
+			//pci_dev struct in mic_ctx?
+			if (mic_ctx->bi_pdev->bus->number ==
+					dev->bus->number) {
+
+				//Bus number matches
+				break;
+			}
+		}
+	}
+	return mic_ctx;
+}
+
+/*
+ * Notifier callback with event specifying the actual power management
+ * event to have happened.Our events of Interest right now are:
+ * PM_HIBERNATION_PREPARE and PM_POST_RESTORE
+ */
+int
+micpm_notifier_block(struct notifier_block *nb, unsigned long event, void *dummy)
+{
+	int i;
+	mic_ctx_t *mic_ctx;
+	switch (event) {
+	case PM_POST_RESTORE:
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+		pr_debug("%s Calling MIC resume\n", __func__);
+		for(i = 0; i < mic_data.dd_numdevs; i++) {
+			mic_ctx = get_per_dev_ctx(i);
+			if (mic_ctx && mic_ctx->micpm_ctx.resume.wq) {
+				queue_work(mic_ctx->micpm_ctx.resume.wq, 
+						&mic_ctx->micpm_ctx.resume.work);
+			}
+		}
+		break;
+	default:
+		pr_debug("%s: Unrecognized event %lu\n", __func__, event);
+		break;
+	}
+return 0;
+}
+
+/*
+ * Called by the OS when going into suspend.
+ * Puts our device to D3Cold.
+ */
+int
+micpm_suspend(struct device *pdev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(pdev);
+	mic_ctx_t *mic_ctx = get_device_context(pci_dev);
+
+	if (!pci_dev) {
+		pr_debug("Not initialized, aborting suspend.\n");
+		return -ENODEV;
+	}
+
+	pr_debug("pm_stop_device called for dev: %d:%d:%d\n", pci_dev->bus->number, 
+		    PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
+	pm_stop_device(mic_ctx);
+	pci_save_state(pci_dev);
+	pci_disable_device(pci_dev);
+	if (pci_set_power_state(pci_dev, PCI_D3cold))
+		pr_debug("Not able to set to D3Cold state\n");
+	pr_debug("Returning from mic_suspend\n");
+	return 0;
+}
+
+/*
+ * Called by the OS when coming out of suspend.
+ * Puts our device to D0 and starts driver components.
+ */
+int
+micpm_resume(struct device *pdev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(pdev);
+	if (!pci_dev) {
+		pr_debug("Device not initialized. aborting resume");
+		return -ENODEV;
+	}
+
+	pci_set_power_state(pci_dev, PCI_D0);
+	if (pci_enable_device(pci_dev)) {
+		pr_debug("Failed to wake-up device.\n");
+		return -EIO;
+	}
+	pci_restore_state(pci_dev);
+	pci_set_master(pci_dev);
+	pr_debug("pm_start_device called for dev: %d:%d:%d\n", pci_dev->bus->number, 
+			PCI_SLOT(pci_dev->devfn), PCI_FUNC(pci_dev->devfn));
+	return 0;
+}
+
+int micpm_suspend_noirq(struct device *pdev) {
+	
+	struct pci_dev *pci_dev = to_pci_dev(pdev);
+	mic_ctx_t *mic_ctx;
+	bd_info_t *bd_info;
+
+	if (!pci_dev) {
+		pr_debug("Device not initialized. aborting suspend");
+		return -ENODEV;
+	}
+	
+	mic_ctx = get_device_context(pci_dev);
+	if(mic_ctx) {
+		bd_info = mic_ctx->bd_info;
+		/* MSI interrupts do not work on resume.
+		 * See http://www.digipedia.pl/usenet/thread/18815/2513/
+		 * for a discussion on this issue.
+		 */
+		if (mic_ctx->msie) {
+			free_irq(bd_info->bi_msix_entries[0].vector, &bd_info->bi_ctx);
+		}
+	}
+	return 0;
+}
+
+int micpm_resume_noirq(struct device *pdev) {
+	
+	struct pci_dev *pci_dev = to_pci_dev(pdev);
+	mic_ctx_t *mic_ctx;
+	bd_info_t *bd_info;
+	int err;
+
+	if (!pci_dev) {
+		pr_debug("Device not initialized. aborting resume");
+		return -ENODEV;
+	}
+	mic_ctx = get_device_context(pci_dev);
+	if(mic_ctx) {
+		bd_info = mic_ctx->bd_info;
+
+		/* MSI interrupts do not work on resume.
+		 * See http://www.digipedia.pl/usenet/thread/18815/2513/
+		 * for a discussion on this issue.
+		 */
+		if (mic_ctx->msie) {
+			err = request_irq(bd_info->bi_msix_entries[0].vector,
+							  mic_irq_isr, 0, "mic", mic_ctx);
+			if (err) {
+				pr_debug("%s: %d Error inititalizing MSI interrupts\n", 
+						__func__, __LINE__);
+				return 0;
+			}
+		}
+
+	}
+	return 0;
+}
+
diff --git a/host/linpsmi.c b/host/linpsmi.c
new file mode 100644
index 0000000..8c2780e
--- /dev/null
+++ b/host/linpsmi.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "micint.h"
+
+int mic_psmi_open(struct file *filp)
+{
+	bd_info_t *bd_info = mic_data.dd_bi[0];
+	if (!bd_info->bi_ctx.bi_psmi.enabled)
+		return -EINVAL;
+	((filp)->private_data) = &bd_info->bi_ctx;
+	return 0;
+}
+
+extern int usagemode_param;
+
+ssize_t mic_psmi_read(struct file * filp, char __user *buf,
+			size_t count, loff_t *pos)
+{
+	ssize_t total_bytes = 0;
+	unsigned int pg_no, pg_off, bytes;
+	mic_ctx_t *mic_ctx = ((filp)->private_data);
+	struct mic_psmi_ctx *psmi_ctx = &mic_ctx->bi_psmi;
+	loff_t mem_size;
+
+	if (!psmi_ctx->enabled)
+		return -EINVAL;
+	if (FAMILY_ABR == mic_ctx->bi_family &&
+			USAGE_MODE_NORMAL != usagemode_param)
+		mem_size = MIC_APERTURE_SIZE;
+	else
+		mem_size = psmi_ctx->dma_mem_size;
+	if (*pos >= mem_size || count <= 0)
+		return 0;
+	if (*pos + count > mem_size)
+		count = mem_size - *pos;
+	/* read aperture memory */
+	if (USAGE_MODE_NORMAL != usagemode_param) {
+		if (copy_to_user(buf,
+			mic_ctx->aper.va + *pos, count))
+			return -EFAULT;
+		goto read_exit;
+	}
+	/* read host memory allocated for psmi handler */
+	pg_no = *pos / MIC_PSMI_PAGE_SIZE;
+	pg_off = *pos % MIC_PSMI_PAGE_SIZE;
+	while (total_bytes < count) {
+		pci_dma_sync_single_for_cpu(mic_ctx->bi_pdev,
+			psmi_ctx->dma_tbl[pg_no + 1].pa,
+				MIC_PSMI_PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+		bytes = MIC_PSMI_PAGE_SIZE - pg_off;
+		if (total_bytes + bytes > count)
+			bytes = count - total_bytes;
+		if (copy_to_user(buf,
+			(void *)psmi_ctx->va_tbl[pg_no].pa + pg_off, bytes))
+			return -EFAULT;
+		total_bytes += bytes;
+		buf += bytes;
+		pg_no++;
+		/* Only the first page needs an offset */
+		pg_off = 0;
+	}
+read_exit:
+	*pos += count;
+	return count;
+}
+
+static ssize_t show_mem_size(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	mic_ctx_t *mic_ctx = dev_get_drvdata(dev);
+	struct mic_psmi_ctx *psmi_ctx = &mic_ctx->bi_psmi;
+
+	return snprintf(buf, PAGE_SIZE, "%ld\n",
+			(unsigned long)psmi_ctx->dma_mem_size);
+}
+static DEVICE_ATTR(mem_size, S_IRUGO, show_mem_size, NULL);
+
+static struct attribute *psmi_attributes[] = {
+	&dev_attr_mem_size.attr,
+	NULL
+};
+
+struct attribute_group psmi_attr_group = {
+	.attrs = psmi_attributes
+};
+
+#if (defined(RHEL_RELEASE_CODE) && \
+	(LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32))) || \
+		LINUX_VERSION_CODE > KERNEL_VERSION(2,6,34)
+static ssize_t mic_psmi_read_ptes(struct file *filp, struct kobject *kobj,
+	struct bin_attribute *attr, char *buf, loff_t pos, size_t size)
+#else
+static ssize_t mic_psmi_read_ptes(struct kobject *kobj,
+	struct bin_attribute *attr, char *buf, loff_t pos, size_t size)
+#endif
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct mic_psmi_ctx *psmi_ctx =
+		&((mic_ctx_t *)dev_get_drvdata(dev))->bi_psmi;
+
+	if (pos >= psmi_ctx->dma_tbl_size || size <= 0)
+		return 0;
+	if (pos + size > psmi_ctx->dma_tbl_size)
+		size = psmi_ctx->dma_tbl_size - pos;
+	memcpy(buf, psmi_ctx->dma_tbl, size);
+	return size;
+}
+
+struct bin_attribute mic_psmi_ptes_attr = {
+	.attr = {
+		.name = "psmi_ptes",
+		.mode = S_IRUSR
+	},
+	.read = mic_psmi_read_ptes
+};
+
+extern bool mic_psmi_enable;
+module_param_named(psmi, mic_psmi_enable, bool, S_IRUSR);
+MODULE_PARM_DESC(psmi, "Enable/disable mic psmi");
diff --git a/host/linscif_host.c b/host/linscif_host.c
new file mode 100644
index 0000000..233f8ea
--- /dev/null
+++ b/host/linscif_host.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic_common.h"
+#include "mic/micscif_smpt.h"
+#include "mic/micscif_nodeqp.h"
+#include "mic/micscif_intr.h"
+#include "mic/micscif_nm.h"
+#include "micint.h"
+
+struct micscif_info ms_info;
+struct micscif_dev scif_dev[MAX_BOARD_SUPPORTED + 1];
+
+bool mic_watchdog_enable = 1;
+bool mic_watchdog_auto_reboot = 1;
+bool mic_crash_dump_enabled = 1;
+
+int
+micscif_init(void)
+{
+	int err;
+	ms_info.mi_nodeid = 0;	// Host is node 0
+	ms_info.mi_maxid = 0;	// Host is at start the max card ID
+	ms_info.mi_total = 1;	// Host will know about this many MIC cards
+	ms_info.mi_mask = 1;	// first bit in the mask is the host node
+
+	mutex_init (&ms_info.mi_conflock);
+	spin_lock_init(&ms_info.mi_eplock);
+	spin_lock_init(&ms_info.mi_connlock);
+	spin_lock_init(&ms_info.mi_rmalock);
+	mutex_init (&ms_info.mi_fencelock);
+	mutex_init (&ms_info.mi_event_cblock);
+	spin_lock_init(&ms_info.mi_nb_connect_lock);
+	INIT_LIST_HEAD(&ms_info.mi_uaccept);
+	INIT_LIST_HEAD(&ms_info.mi_listen);
+	INIT_LIST_HEAD(&ms_info.mi_zombie);
+	INIT_LIST_HEAD(&ms_info.mi_connected);
+	INIT_LIST_HEAD(&ms_info.mi_disconnected);
+	INIT_LIST_HEAD(&ms_info.mi_rma);
+	INIT_LIST_HEAD(&ms_info.mi_rma_tc);
+#ifdef CONFIG_MMU_NOTIFIER
+	INIT_LIST_HEAD(&ms_info.mi_mmu_notif_cleanup);
+#endif
+	INIT_LIST_HEAD(&ms_info.mi_fence);
+	INIT_LIST_HEAD(&ms_info.mi_event_cb);
+	INIT_LIST_HEAD(&ms_info.mi_nb_connect_list);
+	ms_info.mi_watchdog_to = DEFAULT_WATCHDOG_TO;
+#ifdef MIC_IS_EMULATION
+	ms_info.mi_watchdog_enabled = 0;
+	ms_info.mi_watchdog_auto_reboot = 0;
+#else
+	ms_info.mi_watchdog_enabled = mic_watchdog_enable;
+	ms_info.mi_watchdog_auto_reboot = mic_watchdog_auto_reboot;
+#endif
+#ifdef RMA_DEBUG
+	ms_info.rma_unaligned_cpu_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+	ms_info.rma_alloc_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+	ms_info.rma_pin_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+#ifdef CONFIG_MMU_NOTIFIER
+	ms_info.mmu_notif_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+#endif
+#endif
+	ms_info.mi_misc_wq = __mic_create_singlethread_workqueue("SCIF_MISC");
+	if (!ms_info.mi_misc_wq) {
+		err = -ENOMEM;
+		goto wq_error;
+	}
+	INIT_WORK(&ms_info.mi_misc_work, micscif_misc_handler);
+#ifdef CONFIG_MMU_NOTIFIER
+	ms_info.mi_mmu_notif_wq = create_singlethread_workqueue("SCIF_MMU");
+	if (!ms_info.mi_mmu_notif_wq) {
+		err = -ENOMEM;
+		goto wq_error;
+	}
+	INIT_WORK(&ms_info.mi_mmu_notif_work, micscif_mmu_notif_handler);
+#endif
+	ms_info.mi_conn_wq = __mic_create_singlethread_workqueue("SCIF_NB_CONN");
+	if (!ms_info.mi_conn_wq) {
+		err = -ENOMEM;
+		goto wq_error;
+	}
+	INIT_WORK(&ms_info.mi_conn_work, micscif_conn_handler);
+
+	//pr_debug("micscif_create(%d) \n", num_bds);
+
+	// Setup information for self aka loopback.
+	scif_dev[SCIF_HOST_NODE].sd_node = SCIF_HOST_NODE;
+	micscif_setup_loopback_qp(&scif_dev[SCIF_HOST_NODE]);
+	scif_dev[SCIF_HOST_NODE].sd_state = SCIFDEV_RUNNING;
+	scif_dev[SCIF_HOST_NODE].scif_ref_cnt =
+		(atomic_long_t) ATOMIC_LONG_INIT(0);
+	scif_dev[SCIF_HOST_NODE].scif_map_ref_cnt = 0;
+	init_waitqueue_head(&scif_dev[SCIF_HOST_NODE].sd_wq);
+	init_waitqueue_head(&scif_dev[SCIF_HOST_NODE].sd_mmap_wq);
+	mutex_init (&scif_dev[SCIF_HOST_NODE].sd_lock);
+	ms_info.mi_rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
+	ms_info.en_msg_log = 0;
+	scif_proc_init();
+	return 0;
+wq_error:
+	if (ms_info.mi_misc_wq)
+		destroy_workqueue(ms_info.mi_misc_wq);
+#ifdef CONFIG_MMU_NOTIFIER
+	if (ms_info.mi_mmu_notif_wq)
+		destroy_workqueue(ms_info.mi_mmu_notif_wq);
+#endif
+	if (ms_info.mi_conn_wq)
+		destroy_workqueue(ms_info.mi_conn_wq);
+	return err;
+}
+
+void
+micscif_destroy(void)
+{
+	struct list_head *pos, *unused;
+	struct scif_callback *temp;
+#ifdef CONFIG_MMU_NOTIFIER
+	destroy_workqueue(ms_info.mi_mmu_notif_wq);
+#endif
+	destroy_workqueue(ms_info.mi_misc_wq);
+	destroy_workqueue(ms_info.mi_conn_wq);
+	micscif_destroy_loopback_qp(&scif_dev[SCIF_HOST_NODE]);
+	scif_proc_cleanup();
+	mic_debug_uninit();
+	list_for_each_safe(pos, unused, &ms_info.mi_event_cb) {
+		temp = list_entry(pos, struct scif_callback, list_member);
+		list_del(pos);
+		kfree(temp);
+	}
+	mutex_destroy(&ms_info.mi_event_cblock);
+}
+
+int
+micscif_host_doorbell_intr_handler(mic_ctx_t *mic_ctx, int doorbell)
+{
+	struct micscif_dev *dev = &scif_dev[mic_ctx->bi_id + 1];
+
+	queue_work(dev->sd_intr_wq, &dev->sd_intr_bh);
+	return 0;
+}
+
+int micscif_setup_host_qp(mic_ctx_t *mic_ctx, struct micscif_dev *scifdev);
+
+void
+micscif_probe(mic_ctx_t *mic_ctx)
+{
+	struct micscif_dev *scifdev = &scif_dev[mic_ctx->bi_id + 1];
+
+	// The host needs to keep track of scif_dev interfaces for all boards in
+	// the system.  Host is node zero for MIC board 0 is SCIF node 1, etc.
+	// This will need to become more dynamic if hot plug is supported
+
+	scifdev->sd_node = mic_ctx->bi_id + 1;
+	scifdev->sd_state = SCIFDEV_STOPPED;
+	scifdev->mm_sbox = mic_ctx->mmio.va + HOST_SBOX_BASE_ADDRESS;
+
+	/* This workqueue thread will handle all card->host interrupt processing. */
+	micscif_setup_interrupts(scifdev);
+
+	init_waitqueue_head(&scifdev->sd_mmap_wq);
+	init_waitqueue_head(&scifdev->sd_wq);
+	mutex_init (&scifdev->sd_lock);
+	INIT_LIST_HEAD(&scifdev->sd_p2p);
+
+	init_waitqueue_head(&scifdev->sd_watchdog_wq);
+	snprintf(scifdev->sd_ln_wqname, sizeof(scifdev->sd_intr_wqname),
+			"SCIF LOSTNODE %d", scifdev->sd_node);
+	if (!(scifdev->sd_ln_wq =
+		__mic_create_singlethread_workqueue(scifdev->sd_ln_wqname)))
+		printk(KERN_ERR "%s %d wq creation failed\n", __func__, __LINE__);
+	INIT_DELAYED_WORK(&scifdev->sd_watchdog_work, micscif_watchdog_handler);
+	/*
+	 * Register function for doorbell 0 which will
+	 * basically kick off the workqueue.
+	 */
+	mic_reg_irqhandler(mic_ctx, 0, "SCIF DoorBell 0",
+			   micscif_host_doorbell_intr_handler);
+}
+
+void
+micscif_start(mic_ctx_t *mic_ctx)
+{
+	struct micscif_dev *scifdev = &scif_dev[mic_ctx->bi_id + 1];
+
+	scifdev->scif_ref_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+	scifdev->scif_map_ref_cnt = 0;
+
+	scifdev->sd_state = SCIFDEV_INIT;
+
+
+	/* Sets up bd_bs and the host side of the queuepair */
+	pr_debug("micscif_probe: host setting up qp \n");
+	micscif_setup_host_qp(mic_ctx, scifdev);
+}
+
+void micscif_removehost_respose(struct micscif_dev *scifdev, struct nodemsg *msg);
+
+void
+micscif_stop(mic_ctx_t *mic_ctx)
+{
+	struct micscif_dev *scifdev = &scif_dev[mic_ctx->bi_id + 1];
+
+	if (scifdev->sd_state == SCIFDEV_STOPPED || scifdev->sd_state == SCIFDEV_INIT)
+		return;
+
+	micscif_disconnect_node(scifdev->sd_node, NULL, DISCONN_TYPE_LOST_NODE);
+}
+
+void
+micscif_remove(mic_ctx_t *mic_ctx)
+{
+	struct micscif_dev *scifdev = &scif_dev[mic_ctx->bi_id + 1];
+	struct micscif_qp *qp = &scifdev->qpairs[0];
+
+	destroy_workqueue(scifdev->sd_intr_wq);
+	scifdev->sd_intr_wq = 0;
+	cancel_delayed_work_sync(&scifdev->sd_watchdog_work);
+	if (scifdev->sd_ln_wq){
+		destroy_workqueue(scifdev->sd_ln_wq);
+		scifdev->sd_ln_wq = 0;
+	}
+	mic_unreg_irqhandler(mic_ctx, 0x0, "SCIF DoorBell 0");
+
+	if (qp) {
+		mic_ctx_unmap_single(mic_ctx, qp->local_buf, qp->inbound_q.size);
+		mic_ctx_unmap_single(mic_ctx, qp->local_qp, sizeof(struct micscif_qp));
+		kfree((void*)(qp->inbound_q.rb_base));
+	}
+
+	if (scifdev->qpairs) {
+		kfree(scifdev->qpairs);
+		scifdev->qpairs = NULL;
+	}
+}
+
+int
+scif_get_node_status(int node_id)
+{
+	struct micscif_dev *scifdev = &scif_dev[node_id];
+
+	return scifdev->sd_state;
+}
+
+struct scatterlist *
+micscif_p2p_mapsg(void *va, int page_size, int page_cnt)
+{
+	struct scatterlist *sg;
+	struct page *page;
+	int i;
+
+	if ((sg = kcalloc(page_cnt, sizeof(struct scatterlist), GFP_KERNEL)) == NULL) {
+		return NULL;
+	}
+
+	sg_init_table(sg, page_cnt);
+
+	for (i = 0; i < page_cnt; i++) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0))
+		phys_addr_t phys;
+		phys = slow_virt_to_phys(va);
+
+		if ((page = pfn_to_page(phys >> PAGE_SHIFT)) == NULL)
+			goto p2p_sg_err;
+#else
+		if ((page = vmalloc_to_page(va)) == NULL) 
+			goto p2p_sg_err;
+#endif
+		sg_set_page(&sg[i], page, page_size, 0);
+		va += page_size;
+	}
+
+	return sg;
+
+p2p_sg_err:
+	kfree(sg);
+	return NULL;
+}
+
+void
+micscif_p2p_freesg(struct scatterlist *sg)
+{
+	kfree(sg);
+}
diff --git a/host/linsysfs.c b/host/linsysfs.c
new file mode 100644
index 0000000..70c261f
--- /dev/null
+++ b/host/linsysfs.c
@@ -0,0 +1,766 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "micint.h"
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/kernel.h>
+#include "mic/micveth.h"
+
+#define SBOX_SCR9_VENDORID(x)	((x) & 0xf)
+#define SBOX_SCR9_REVISION(x)	(((x) >> 4) & 0xf)
+#define SBOX_SCR9_DENSITY(x)	(((x) >> 8) & 0x3)
+#define SBOX_SCR9_ECC(x)	(((x) >> 29) & 0x1)
+
+bd_info_t *
+dev_to_bdi(struct device *dev)
+{
+	struct list_head *pos, *tmpq;
+	bd_info_t *bdi = NULL;
+	list_for_each_safe(pos, tmpq, &mic_data.dd_bdlist) {
+		bdi = list_entry(pos, bd_info_t, bi_list);
+		if (bdi->bi_sysfsdev == dev)
+			break;
+	}
+	return bdi;
+}
+
+/*
+ * sysfs entries in lieu of MMIO ioctl
+ */
+
+struct device_attribute_sbox {
+	struct device_attribute devattr;
+	uint32_t offset, mask, shift;
+};
+
+uint32_t
+bd_sbox_read(bd_info_t *bdi, uint32_t offset)
+{
+	uint32_t reg_value, ret;
+	ret = micpm_get_reference(&bdi->bi_ctx, true);
+	if (ret)
+		return -EAGAIN;
+	reg_value = SBOX_READ(bdi->bi_ctx.mmio.va, offset);
+	ret = micpm_put_reference(&bdi->bi_ctx);
+	if (ret)
+		return -EAGAIN;
+
+	return reg_value;
+}
+
+#define DEVICE_ATTR_SBOX(_name, _mode, _offset, _mask, _shift) \
+struct device_attribute_sbox sbox_attr_##_name = \
+{ __ATTR(_name, _mode, show_sbox_register, NULL), _offset, _mask, _shift }
+
+ssize_t
+show_sbox_register(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct device_attribute_sbox *attr_sbox = container_of(attr,
+				struct device_attribute_sbox, devattr);
+	bd_info_t *bdi = dev_to_bdi(dev);
+	return snprintf(buf, PAGE_SIZE, "%x\n",
+	(bd_sbox_read(bdi, attr_sbox->offset) >> attr_sbox->shift) & attr_sbox->mask);
+}
+
+#ifdef CONFIG_ML1OM
+static DEVICE_ATTR_SBOX(corevoltage, S_IRUGO, SBOX_COREVOLT, MASK_COREVOLT, SHIFT_COREVOLT);
+static DEVICE_ATTR_SBOX(corefrequency, S_IRUGO, SBOX_COREFREQ, MASK_COREFREQ, SHIFT_COREFREQ);
+#endif
+static DEVICE_ATTR_SBOX(memoryvoltage, S_IRUGO, SBOX_MEMVOLT, MASK_MEMVOLT, SHIFT_MEMVOLT);
+static DEVICE_ATTR_SBOX(memoryfrequency, S_IRUGO, SBOX_MEMORYFREQ, MASK_MEMORYFREQ, SHIFT_MEMORYFREQ);
+static DEVICE_ATTR_SBOX(memsize, S_IRUGO, SBOX_SCRATCH0, MASK_MEMSIZE, SHIFT_MEMSIZE);
+static DEVICE_ATTR_SBOX(flashversion, S_IRUGO, SBOX_SCRATCH7, MASK_FLASHVERSION, SHIFT_FLASHVERSION);
+
+/* HW Info */
+static DEVICE_ATTR_SBOX(substepping_data, S_IRUGO, SBOX_SCRATCH13, MASK_SUBSTEPPING_DATA, SHIFT_SUBSTEPPING_DATA);
+static DEVICE_ATTR_SBOX(stepping_data, S_IRUGO, SBOX_SCRATCH13, MASK_STEPPING_DATA, SHIFT_STEPPING_DATA);
+static DEVICE_ATTR_SBOX(model, S_IRUGO, SBOX_SCRATCH13, MASK_MODEL, SHIFT_MODEL);
+static DEVICE_ATTR_SBOX(family_data, S_IRUGO, SBOX_SCRATCH13, MASK_FAMILY_DATA, SHIFT_FAMILY_DATA);
+static DEVICE_ATTR_SBOX(processor, S_IRUGO, SBOX_SCRATCH13, MASK_PROCESSOR, SHIFT_PROCESSOR);
+static DEVICE_ATTR_SBOX(platform, S_IRUGO, SBOX_SCRATCH13, MASK_PLATFORM, SHIFT_PLATFORM);
+static DEVICE_ATTR_SBOX(extended_model, S_IRUGO, SBOX_SCRATCH13, MASK_EXTENDED_MODEL, SHIFT_EXTENDED_MODEL);
+static DEVICE_ATTR_SBOX(extended_family, S_IRUGO, SBOX_SCRATCH13, MASK_EXTENDED_FAMILY, SHIFT_EXTENDED_FAMILY);
+/* copy of fuse_configuration_revision [129:120] */
+static DEVICE_ATTR_SBOX(fuse_config_rev, S_IRUGO, SBOX_SCRATCH7, MASK_FUSE_CONFIG_REV, SHIFT_FUSE_CONFIG_REV);
+
+static DEVICE_ATTR_SBOX(active_cores, S_IRUGO, SBOX_SCRATCH4, MASK_ACTIVE_CORES, SHIFT_ACTIVE_CORES);
+static DEVICE_ATTR_SBOX(fail_safe_offset, S_IRUSR, SBOX_FAIL_SAFE_OFFSET, MASK_FAIL_SAFE, SHIFT_FAIL_SAFE);
+
+ssize_t show_flash_update(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	uint32_t value, ret;
+	bd_info_t *bdi = dev_to_bdi(dev);
+	ret = micpm_get_reference(&bdi->bi_ctx, true);
+	if (ret)
+		return -EAGAIN;
+	value =  DBOX_READ(bdi->bi_ctx.mmio.va, DBOX_SWF0X0);
+	ret = micpm_put_reference(&bdi->bi_ctx);
+	if (ret)
+		return -EAGAIN;
+
+	return snprintf(buf, PAGE_SIZE, "%x\n", value);
+}
+
+static ssize_t
+set_flash_update(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	unsigned long value;
+	int ret;
+	bd_info_t *bdi = dev_to_bdi(dev);
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,39)
+	ret = kstrtoul(buf, 0, &value);
+	if (ret)
+		return count;
+#else
+	value = simple_strtoul(buf, NULL, 10);
+#endif
+	ret = micpm_get_reference(&bdi->bi_ctx, true);
+	if (ret)
+		return -EAGAIN;
+	DBOX_WRITE((unsigned int)value, bdi->bi_ctx.mmio.va, DBOX_SWF0X0);
+	ret = micpm_put_reference(&bdi->bi_ctx);
+	if (ret)
+		return -EAGAIN;
+
+	return count;
+
+}
+static DEVICE_ATTR(flash_update, S_IRUSR | S_IWUSR, show_flash_update, set_flash_update);
+
+ssize_t
+show_meminfo(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	uint32_t value;
+	bd_info_t *bdi = dev_to_bdi(dev);
+		value =  bd_sbox_read(bdi, SBOX_SCRATCH9);
+		return snprintf(buf, PAGE_SIZE, "vendor:%x,revision:%x"
+					",density:%x,ecc_enable:%x",
+				SBOX_SCR9_VENDORID(value), SBOX_SCR9_REVISION(value),
+				SBOX_SCR9_DENSITY(value), SBOX_SCR9_ECC(value));
+}
+static DEVICE_ATTR(meminfo, S_IRUGO, show_meminfo, NULL);
+
+ssize_t
+show_sku(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	return snprintf(buf, PAGE_SIZE, "%s\n", bdi->bi_ctx.sku_name);
+}
+static DEVICE_ATTR(sku, S_IRUGO, show_sku, NULL);
+/******************************************************************************/
+
+static ssize_t
+show_version(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", BUILD_VERSION);
+}
+static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
+
+static ssize_t
+show_p2p(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n", mic_p2p_enable? "enable" : "disable");
+}
+static DEVICE_ATTR(peer2peer, S_IRUGO, show_p2p, NULL);
+
+static struct attribute *host_attributes[] = {
+	&dev_attr_version.attr,
+	&dev_attr_peer2peer.attr,
+	NULL
+};
+
+struct attribute_group host_attr_group = {
+	.attrs = host_attributes
+};
+
+static ssize_t
+show_family(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	static const char KNF[] = "Knights Ferry";
+	static const char KNC[] = "x100";
+	bd_info_t *bdi = dev_to_bdi(dev);
+	const char *card = NULL;
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	if (mic_ctx->bi_family == FAMILY_ABR)
+		card = KNF;
+	else
+		card = KNC;
+
+	if (card)
+		return snprintf(buf, PAGE_SIZE, "%s\n", card);
+	else
+		return snprintf(buf, PAGE_SIZE, "Unknown\n");
+}
+static DEVICE_ATTR(family, S_IRUGO, show_family, NULL);
+
+static ssize_t
+show_stepping(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	char string[3];
+	show_stepping_comm(&bdi->bi_ctx,string);
+	return snprintf(buf, PAGE_SIZE, "%s\n", string);
+}
+static DEVICE_ATTR(stepping, S_IRUGO, show_stepping, NULL);
+
+char *micstates[] = {"ready", "booting", "no response", "boot failed",
+		     "online", "shutdown", "lost", "resetting", "reset failed", "invalid"};
+static ssize_t
+show_micstate(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+
+	if (bdi->bi_ctx.state >= MIC_INVALID)
+		mic_setstate(&bdi->bi_ctx, MIC_INVALID);
+	return snprintf(buf, PAGE_SIZE, "%s", micstates[bdi->bi_ctx.state]);
+}
+
+static int
+match_micstate(const char **buf, const char *string)
+{
+	size_t len = strlen(string);
+	if (!strncmp(*buf, string, len)) {
+		*buf += len;
+		return true;
+	}
+	return false;
+}
+
+static ssize_t
+set_micstate(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	const char *default_mm_image = "/usr/share/mpss/boot/rasmm-kernel.from-eeprom.elf";
+
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+	int mode;
+	size_t len;
+	char *arg, *arg2 = NULL;
+	int err = 0;
+
+	/* parse the new state */
+	if (match_micstate(&buf, "boot:linux:")) {
+		mode = MODE_LINUX;
+	} else if (match_micstate(&buf, "boot:elf:")) {
+		mode = MODE_ELF;
+	} else if (match_micstate(&buf, "boot:flash:")) {
+		mode = MODE_FLASH;
+	} else if (sysfs_streq(buf, "reset")) {
+
+		mutex_lock(&mic_ctx->state_lock);
+		if (mic_ctx->state == MIC_READY) {
+			mutex_unlock(&mic_ctx->state_lock);
+			return -EINVAL;
+		}
+
+		mutex_unlock(&mic_ctx->state_lock);
+		adapter_stop_device(mic_ctx, 1, 0);
+		return count;
+	} else if (sysfs_streq(buf, "reset:force")) {
+		int reattempt = !RESET_REATTEMPT;
+
+		mutex_lock(&mic_ctx->state_lock);
+		if (mic_ctx->state == MIC_READY)
+			reattempt = RESET_REATTEMPT;
+
+		mutex_unlock(&mic_ctx->state_lock);
+		adapter_stop_device(mic_ctx, 1, reattempt);
+		return count;
+	} else if (sysfs_streq(buf, "shutdown")) {
+		adapter_shutdown_device(mic_ctx);
+		return count;
+	} else {
+		return -EINVAL;
+	}
+
+	/* we're booting something; a filename follows the colon */
+	len = strlen(buf);
+	if (buf && buf[0] == '\n') {
+		len = 0;
+	}
+	if (!len && mode == MODE_FLASH) {
+		buf = default_mm_image;
+		len = strlen(buf);
+	}
+	if (!(arg = kmalloc(len + 1, GFP_KERNEL)))
+		return -ENOMEM;
+	memcpy(arg, buf, len + 1);
+	if (arg[len - 1] == '\n')
+		arg[len - 1] = '\0';
+
+	/* if booting linux, there may be yet another filename */
+	if (mode == MODE_LINUX && (arg2 = strchr(arg, ':')))
+		*arg2++ = '\0';
+
+	/* atomically change the state */
+	mutex_lock(&mic_ctx->state_lock);
+	if (mic_ctx->state == MIC_READY) {
+		kfree(mic_ctx->image);
+		mic_ctx->mode = mode;
+		mic_ctx->image = arg;
+		mic_ctx->initramfs = arg2;
+		mic_setstate(mic_ctx, MIC_BOOT);
+		mutex_unlock(&mic_ctx->state_lock);
+		printk("mic image: %s\n", mic_ctx->image);
+	} else {
+		kfree(arg);
+		printk(KERN_ERR "Error! Card not in offline/ready state. Cannot change mode\n");
+		mutex_unlock(&mic_ctx->state_lock);
+		return -EIO;
+	}
+
+	/* actually perform the boot */
+	if (mode == MODE_LINUX) {
+		mic_ctx->card_usage_mode = USAGE_MODE_NORMAL;
+		err = boot_linux_uos(mic_ctx, mic_ctx->image, mic_ctx->initramfs);
+		if (!err)
+			adapter_post_boot_device(mic_ctx);
+	} else {
+		err = boot_micdev_app(mic_ctx, mic_ctx->image);
+	}
+
+	if (!err)
+		return count;
+	printk("booting failed %d\n", err);
+	return err;
+}
+static DEVICE_ATTR(state, S_IRUGO|S_IWUSR, show_micstate, set_micstate);
+
+char *micmodes[] = {"N/A", "linux", "elf", "flash"};
+
+static ssize_t
+show_mode(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+
+	if (bdi->bi_ctx.mode > MODE_FLASH)
+		bdi->bi_ctx.mode = MODE_NONE;
+	return snprintf(buf, PAGE_SIZE, "%s", micmodes[bdi->bi_ctx.mode]);
+}
+static DEVICE_ATTR(mode, S_IRUGO, show_mode, NULL);
+
+int scif_get_node_status(int node_id);
+static char *scif_status_stings[] = {"not present", "initializing", "online",
+				     "sleeping", "stopping", "stopped"};
+static ssize_t
+show_scif_status(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	int scif_status;
+
+	scif_status = scif_get_node_status(bdi->bi_ctx.bi_id + 1);
+	return snprintf(buf, PAGE_SIZE, "%s\n", scif_status_stings[scif_status]);
+}
+static DEVICE_ATTR(scif_status, S_IRUGO, show_scif_status, NULL);
+
+static ssize_t
+show_image(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	return snprintf(buf, PAGE_SIZE, "%s", bdi->bi_ctx.image);
+}
+static DEVICE_ATTR(image, S_IRUGO, show_image, NULL);
+
+static ssize_t
+show_initramfs(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	return snprintf(buf, PAGE_SIZE, "%s", bdi->bi_ctx.initramfs);
+}
+static DEVICE_ATTR(initramfs, S_IRUGO, show_initramfs, NULL);
+
+static ssize_t
+show_postcode(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+	uint32_t postcode;
+
+	if ((micpm_get_reference(mic_ctx, true))) {
+		PM_DEBUG("get_reference failed. Node may be lost\n");
+		return -EBUSY;
+	}
+	postcode = mic_getpostcode(mic_ctx);
+	if (postcode == 0xffffffff) {
+		printk("Invalid Postcode : %c%c\n", postcode & 0xff, (postcode >> 8) & 0xff);
+		micpm_put_reference(mic_ctx);
+		return -ENXIO;
+	}
+
+	if (postcode == 0x0) {
+		printk("Postcode : %c%c\n", postcode & 0xff, (postcode >> 8) & 0xff);
+		micpm_put_reference(mic_ctx);
+		return -EAGAIN;
+	}
+	micpm_put_reference(mic_ctx);
+	return snprintf(buf, PAGE_SIZE, "%c%c", postcode & 0xff, (postcode >> 8) & 0xff);
+}
+static DEVICE_ATTR(post_code, S_IRUGO, show_postcode, NULL);
+
+static ssize_t
+show_boot_count(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	return snprintf(buf, PAGE_SIZE, "%d", mic_ctx->boot_count);
+}
+static DEVICE_ATTR(boot_count, S_IRUGO, show_boot_count, NULL);
+
+static ssize_t
+show_crash_count(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	return snprintf(buf, PAGE_SIZE, "%d", mic_ctx->crash_count);
+}
+static DEVICE_ATTR(crash_count, S_IRUGO, show_crash_count, NULL);
+
+static ssize_t
+show_cmdline(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+	char *cmdline = mic_ctx->sysfs_info.cmdline;
+
+	if (cmdline == NULL) {
+		return snprintf(buf, PAGE_SIZE, "not set\n");
+	} else {
+		return snprintf(buf, PAGE_SIZE, "%s\n", cmdline);
+	}
+	return 0;
+}
+
+static ssize_t
+set_cmdline(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	if (mic_ctx->sysfs_info.cmdline != NULL)
+		kfree(mic_ctx->sysfs_info.cmdline);
+
+	if ((mic_ctx->sysfs_info.cmdline = kmalloc(count + 100, GFP_ATOMIC)) == NULL)
+		return -ENOMEM;
+	strcpy(mic_ctx->sysfs_info.cmdline, buf);
+
+	if (mic_ctx->sysfs_info.cmdline[count - 1] == '\n')
+		mic_ctx->sysfs_info.cmdline[count - 1] = '\0';
+
+	return count;
+}
+static DEVICE_ATTR(cmdline, S_IRUGO|S_IWUSR, show_cmdline, set_cmdline);
+
+static ssize_t
+show_kernel_cmdline(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+	char *cmdline = mic_ctx->sysfs_info.kernel_cmdline;
+
+	if ((mic_ctx->state == MIC_READY) || (cmdline == NULL)) {
+		return snprintf(buf, PAGE_SIZE, "ready\n");
+	} else {
+		return snprintf(buf, PAGE_SIZE, "%s\n", cmdline);
+	}
+}
+static DEVICE_ATTR(kernel_cmdline, S_IRUGO, show_kernel_cmdline, NULL);
+
+static ssize_t show_pc3_enabled(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+	return snprintf(buf, PAGE_SIZE, "%d\n", mic_ctx->micpm_ctx.pc3_enabled);
+}
+static ssize_t
+store_pc3_enabled(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	int i, ret;
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	if(sscanf(buf, "%d", &i) != 1) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (i < 0) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = micpm_update_pc3(mic_ctx, (i) ? true : false);
+	if (ret)
+		goto exit;
+
+	pr_debug("pc3_enabled = %d\n", mic_ctx->micpm_ctx.pc3_enabled);
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR(pc3_enabled, S_IRUGO | S_IWUSR, show_pc3_enabled, store_pc3_enabled);
+
+static ssize_t show_pc6_enabled(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+	return snprintf(buf, PAGE_SIZE, "%d\n", mic_ctx->micpm_ctx.pc6_enabled);
+}
+
+static ssize_t
+store_pc6_enabled(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	int i, ret;
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	if(sscanf(buf, "%d", &i) != 1) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (i < 0) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = micpm_update_pc6(mic_ctx, (i) ? true : false);
+	if (ret)
+		goto exit;
+
+	pr_debug("pc6_enabled = %d\n", mic_ctx->micpm_ctx.pc6_enabled);
+	ret = count;
+exit:
+	return ret;
+}
+
+static DEVICE_ATTR(pc6_enabled, S_IRUGO | S_IWUSR, show_pc6_enabled, store_pc6_enabled);
+
+static ssize_t show_pc6_timeout(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+	return snprintf(buf, PAGE_SIZE, "%u\n", mic_ctx->micpm_ctx.pc6_timeout);
+}
+static ssize_t
+store_pc6_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	int i, ret;
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	if(sscanf(buf, "%d", &i) != 1) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (i < 0) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	if (mic_ctx->micpm_ctx.pc6_timeout != i) {
+		mic_ctx->micpm_ctx.pc6_timeout = i;
+	}
+	pr_debug("pc6 timeout set to %us\n", mic_ctx->micpm_ctx.pc6_timeout);
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR(pc6_timeout, S_IRUGO | S_IWUSR, show_pc6_timeout, store_pc6_timeout);
+
+static ssize_t show_log_buf_addr(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	return snprintf(buf, PAGE_SIZE, "%p\n", mic_ctx->log_buf_addr);
+}
+
+static ssize_t
+store_log_buf_addr(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	uint64_t addr;
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	if (sscanf(buf, "%llx", &addr) != 1) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	mic_ctx->log_buf_addr = (void*)addr;
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR(log_buf_addr, S_IRUGO | S_IWUSR, show_log_buf_addr, store_log_buf_addr);
+
+static ssize_t show_log_buf_len(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	return snprintf(buf, PAGE_SIZE, "%p\n", mic_ctx->log_buf_len);
+}
+
+static ssize_t
+store_log_buf_len(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	uint64_t addr;
+	bd_info_t *bdi = dev_to_bdi(dev);
+	mic_ctx_t *mic_ctx = &bdi->bi_ctx;
+
+	if (sscanf(buf, "%llx", &addr) != 1) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	mic_ctx->log_buf_len = (int*)addr;
+	ret = count;
+exit:
+	return ret;
+}
+static DEVICE_ATTR(log_buf_len, S_IRUGO | S_IWUSR, show_log_buf_len, store_log_buf_len);
+
+union serialnum {
+	uint32_t values[3];
+	char serial[13];
+};
+
+static ssize_t
+show_serialnumber(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bd_info_t *bdi = dev_to_bdi(dev);
+	union serialnum serial;
+	uint32_t ret;
+
+	memset(serial.serial, 0, sizeof(serial.serial));
+	ret = micpm_get_reference(&bdi->bi_ctx, true);
+	if (ret)
+		return -EAGAIN;
+	serial.values[0] = DBOX_READ(bdi->bi_ctx.mmio.va, DBOX_SWF1X0);
+	serial.values[1] = DBOX_READ(bdi->bi_ctx.mmio.va, DBOX_SWF1X1);
+	serial.values[2] = DBOX_READ(bdi->bi_ctx.mmio.va, DBOX_SWF1X2);
+	ret = micpm_put_reference(&bdi->bi_ctx);
+	if (ret)
+		return -EAGAIN;
+	return snprintf(buf, PAGE_SIZE, "%s", serial.serial);
+}
+static DEVICE_ATTR(serialnumber, S_IRUGO, show_serialnumber, NULL);
+
+static ssize_t
+show_interface_version(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s", LINUX_INTERFACE_VERSION);
+}
+static DEVICE_ATTR(interface_version, S_IRUGO, show_interface_version, NULL);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34) || \
+	defined(RHEL_RELEASE_CODE)
+extern ssize_t show_virtblk_file(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t store_virtblk_file(struct device *dev, struct device_attribute *attr,
+								  const char *buf, size_t count);
+static DEVICE_ATTR(virtblk_file, S_IRUGO | S_IWUSR, show_virtblk_file, store_virtblk_file);
+#endif
+
+static struct attribute *bd_attributes[] = {
+	&dev_attr_family.attr,
+	&dev_attr_stepping.attr,
+	&dev_attr_state.attr,
+	&dev_attr_mode.attr,
+	&dev_attr_image.attr,
+	&dev_attr_initramfs.attr,
+	&dev_attr_post_code.attr,
+	&dev_attr_boot_count.attr,
+	&dev_attr_crash_count.attr,
+	&dev_attr_cmdline.attr,
+	&dev_attr_kernel_cmdline.attr,
+	&dev_attr_serialnumber.attr,
+	&dev_attr_scif_status.attr,
+	&dev_attr_meminfo.attr,
+	&dev_attr_pc3_enabled.attr,
+	&dev_attr_pc6_enabled.attr,
+	&dev_attr_pc6_timeout.attr,
+	&dev_attr_flash_update.attr,
+	&dev_attr_log_buf_addr.attr,
+	&dev_attr_log_buf_len.attr,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34) || \
+	defined(RHEL_RELEASE_CODE)
+	&dev_attr_virtblk_file.attr,
+#endif
+	&dev_attr_sku.attr,
+	&dev_attr_interface_version.attr,
+
+#ifdef CONFIG_ML1OM
+	&sbox_attr_corevoltage.devattr.attr,
+	&sbox_attr_corefrequency.devattr.attr,
+#endif
+	&sbox_attr_memoryvoltage.devattr.attr,
+	&sbox_attr_memoryfrequency.devattr.attr,
+	&sbox_attr_memsize.devattr.attr,
+	&sbox_attr_flashversion.devattr.attr,
+	&sbox_attr_substepping_data.devattr.attr,
+	&sbox_attr_stepping_data.devattr.attr,
+	&sbox_attr_model.devattr.attr,
+	&sbox_attr_family_data.devattr.attr,
+	&sbox_attr_processor.devattr.attr,
+	&sbox_attr_platform.devattr.attr,
+	&sbox_attr_extended_model.devattr.attr,
+	&sbox_attr_extended_family.devattr.attr,
+	&sbox_attr_fuse_config_rev.devattr.attr,
+	&sbox_attr_active_cores.devattr.attr,
+	&sbox_attr_fail_safe_offset.devattr.attr,
+	NULL
+};
+
+struct attribute_group bd_attr_group = {
+	.attrs = bd_attributes
+};
diff --git a/host/linux.c b/host/linux.c
new file mode 100644
index 0000000..fd0411a
--- /dev/null
+++ b/host/linux.c
@@ -0,0 +1,796 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/string.h>
+
+#include "mic/micscif_kmem_cache.h"
+#include "micint.h"
+#include "mic_common.h"
+#include "mic/io_interface.h"
+#include "mic/mic_pm.h"
+#include "mic/micveth.h"
+
+MODULE_LICENSE("GPL");
+MODULE_INFO(build_number, BUILD_NUMBER);
+MODULE_INFO(build_bywhom, BUILD_BYWHOM);
+MODULE_INFO(build_ondate, BUILD_ONDATE);
+MODULE_INFO(build_scmver, BUILD_SCMVER);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34))
+#include <linux/pm_qos_params.h>
+#endif
+
+struct kmem_cache *unaligned_cache;
+mic_lindata_t mic_lindata;
+
+module_param_named(ulimit, mic_ulimit_check, bool, 0600);
+MODULE_PARM_DESC(ulimit, "SCIF ulimit check");
+
+module_param_named(reg_cache, mic_reg_cache_enable, bool, 0600);
+MODULE_PARM_DESC(reg_cache, "SCIF registration caching");
+
+module_param_named(huge_page, mic_huge_page_enable, bool, 0600);
+MODULE_PARM_DESC(huge_page, "SCIF Huge Page Support");
+
+extern bool mic_p2p_enable;
+module_param_named(p2p, mic_p2p_enable, bool, 0600);
+MODULE_PARM_DESC(p2p, "SCIF peer-to-peer");
+
+extern bool mic_p2p_proxy_enable;
+module_param_named(p2p_proxy, mic_p2p_proxy_enable, bool, 0600);
+MODULE_PARM_DESC(p2p_proxy, "SCIF peer-to-peer proxy DMA support");
+
+extern bool mic_watchdog_enable;
+module_param_named(watchdog, mic_watchdog_enable, bool, 0600);
+MODULE_PARM_DESC(watchdog, "SCIF Watchdog");
+
+extern bool mic_watchdog_auto_reboot;
+module_param_named(watchdog_auto_reboot, mic_watchdog_auto_reboot, bool, 0600);
+MODULE_PARM_DESC(watchdog_auto_reboot, "SCIF Watchdog auto reboot");
+
+bool mic_msi_enable = 1;
+module_param_named(msi, mic_msi_enable, bool, 0600);
+MODULE_PARM_DESC(mic_msi_enable, "To enable MSIx in the driver.");
+
+int mic_pm_qos_cpu_dma_lat = -1;
+module_param_named(pm_qos_cpu_dma_lat, mic_pm_qos_cpu_dma_lat, int, 0600);
+MODULE_PARM_DESC(mic_pm_qos_cpu_dma_lat, "PM QoS CPU DMA latency in usecs.");
+
+extern int ramoops_count;
+module_param_named(ramoops_count, ramoops_count, int, 0600);
+MODULE_PARM_DESC(ramoops_count, "Maximum frame count for the ramoops driver.");
+
+extern bool mic_crash_dump_enabled;
+module_param_named(crash_dump, mic_crash_dump_enabled, bool, 0600);
+MODULE_PARM_DESC(mic_crash_dump_enabled, "MIC Crash Dump enabled.");
+
+#define GET_FILE_SIZE_FROM_INODE(fp) i_size_read((fp)->f_path.dentry->d_inode)
+
+int usagemode_param = 0;
+
+static int
+mic_open(struct inode *inode, struct file *filp)
+{
+	dev_t dev = inode->i_rdev;
+
+	switch (MINOR(dev)) {
+	case 0:
+		return 0;
+	case 1:
+		return scif_fdopen(filp);
+	case 2:
+		return mic_psmi_open(filp);
+	}
+
+	return -EINVAL;
+}
+
+static int
+mic_release(struct inode *inode, struct file *filp)
+{
+	dev_t dev = inode->i_rdev;
+	int rc = 0;
+
+	switch (MINOR(dev)) {
+	case 0:
+		if (filp->private_data == filp) {
+			// Fasync is set
+			rc = fasync_helper(-1, filp, 0, &mic_data.dd_fasync);
+			mic_data.dd_fasync = NULL;
+		}
+		return rc;
+	case 1:
+		return scif_fdclose(filp);
+	case 2:
+		// psmi access to device
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+extern ssize_t mic_psmi_read(struct file * filp, char __user *buf,
+			size_t count, loff_t *pos);
+static ssize_t
+mic_read(struct file * filp, char __user *buf,
+		size_t count, loff_t *pos)
+{
+	dev_t dev = filp->f_path.dentry->d_inode->i_rdev;
+	if (MINOR(dev) == 2)
+		return mic_psmi_read(filp, buf, count, pos);
+
+	return -EINVAL;
+}
+
+static long
+mic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	dev_t dev;
+	int status = 0;
+
+	dev = filp->f_path.dentry->d_inode->i_rdev;
+	if (MINOR(dev) == 1)
+		return scif_process_ioctl(filp, cmd, arg);
+
+	if (MINOR(dev) == 2)
+		return -EINVAL;
+
+	status = adapter_do_ioctl(cmd, arg);
+	return status;
+}
+
+static int
+mic_fasync(int fd, struct file *filp, int on)
+{
+	int rc;
+
+	if ((rc = fasync_helper(fd, filp, on, &mic_data.dd_fasync)) < 0) {
+		return rc;
+	}
+
+	if (on) {
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0))
+		rc = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+#else
+		__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+#endif
+		filp->private_data = filp;
+	} else {
+		filp->private_data = NULL;
+	}
+
+	return rc;
+}
+
+int
+mic_mmap(struct file *f, struct vm_area_struct *vma)
+{
+	dev_t dev = f->f_path.dentry->d_inode->i_rdev;
+	if (MINOR(dev) == 1)
+		return micscif_mmap(f, vma);
+
+	return -EINVAL;
+}
+
+unsigned int
+mic_poll(struct file *f, poll_table *wait)
+{
+	dev_t dev = f->f_path.dentry->d_inode->i_rdev;
+	if (MINOR(dev) == 1)
+		return micscif_poll(f, wait);
+
+	return -EINVAL;
+}
+
+int
+mic_flush(struct file *f, fl_owner_t id)
+{
+	dev_t dev = f->f_path.dentry->d_inode->i_rdev;
+	if (MINOR(dev) == 1)
+		return micscif_flush(f, id);
+
+	return -EINVAL;
+}
+
+irqreturn_t
+mic_irq_isr(int irq, void *data)
+{
+	if (((mic_ctx_t *)data)->msie)
+		adapter_imsr((mic_ctx_t *)data);
+	else if (adapter_isr((mic_ctx_t *)data) < 0 ){
+		return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+}
+
+extern struct attribute_group bd_attr_group;
+extern struct attribute_group host_attr_group;
+extern struct attribute_group scif_attr_group;
+extern struct attribute_group psmi_attr_group;
+extern struct bin_attribute mic_psmi_ptes_attr;
+
+static int
+mic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int brdnum = mic_data.dd_numdevs;
+	int err = 0;
+	bd_info_t *bd_info;
+	mic_ctx_t *mic_ctx;
+#ifdef CONFIG_PCI_MSI
+	int i=0;
+#endif
+	if ((bd_info = (bd_info_t *)kzalloc(sizeof(bd_info_t), GFP_KERNEL)) == NULL) {
+		printk("MIC: probe failed allocating memory for bd_info\n");
+		return -ENOSPC;
+	}
+
+	mic_ctx = &bd_info->bi_ctx;
+	mic_ctx->bd_info = bd_info;
+	mic_ctx->bi_id = brdnum;
+	mic_ctx->bi_pdev = pdev;
+	mic_ctx->msie = 0;
+	mic_data.dd_bi[brdnum] = bd_info;
+
+	if ((err = pci_enable_device(pdev))) {
+		printk("pci_enable failed board #%d\n", brdnum);
+		goto probe_freebd;
+	}
+
+	pci_set_master(pdev);
+	err = pci_reenable_device(pdev);
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		printk("mic %d: ERROR DMA not available\n", brdnum);
+		goto probe_freebd;
+	}
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		printk("mic %d: ERROR pci_set_consistent_dma_mask(64) %d\n", brdnum, err);
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			printk("mic %d: ERROR pci_set_consistent_dma_mask(32) %d\n", brdnum, err);
+			goto probe_freebd;
+		}
+	}
+
+	// Allocate bar 4 for MMIO and GTT
+	bd_info->bi_ctx.mmio.pa = pci_resource_start(pdev, DLDR_MMIO_BAR);
+	bd_info->bi_ctx.mmio.len = pci_resource_len(pdev, DLDR_MMIO_BAR);
+	if (request_mem_region(bd_info->bi_ctx.mmio.pa,
+	    bd_info->bi_ctx.mmio.len, "mic") == NULL) {
+		printk("mic %d: failed to reserve mmio space\n", brdnum);
+		goto probe_freebd;
+	}
+
+	// Allocate bar 0 for access Aperture
+	bd_info->bi_ctx.aper.pa = pci_resource_start(pdev, DLDR_APT_BAR);
+	bd_info->bi_ctx.aper.len = pci_resource_len(pdev, DLDR_APT_BAR);
+	if (request_mem_region(bd_info->bi_ctx.aper.pa,
+	    bd_info->bi_ctx.aper.len, "mic") == NULL) {
+		printk("mic %d: failed to reserve aperture space\n", brdnum);
+		goto probe_relmmio;
+	}
+
+#ifdef CONFIG_PCI_MSI
+	if (mic_msi_enable){
+		for (i = 0; i < MIC_NUM_MSIX_ENTRIES; i ++)
+			bd_info->bi_msix_entries[i].entry = i;
+		err = pci_enable_msix(mic_ctx->bi_pdev, bd_info->bi_msix_entries,
+				      MIC_NUM_MSIX_ENTRIES);
+		if (err == 0 ) {
+			// Only support 1 MSIx for now
+			err = request_irq(bd_info->bi_msix_entries[0].vector,
+					  mic_irq_isr, 0, "mic", mic_ctx);
+			if (err != 0) {
+				printk("MIC: Error in request_irq %d\n", err);
+				goto probe_relaper;
+			}
+			mic_ctx->msie = 1;
+		}
+	}
+#endif
+
+	// TODO: this needs to be hardened and actually return errors
+	if ((err = adapter_init_device(mic_ctx)) != 0) {
+		printk("MIC: Adapter init device failed %d\n", err);
+		goto probe_relaper;
+	}
+
+	// Adding sysfs entries
+	set_sysfs_entries(mic_ctx);
+
+	bd_info->bi_sysfsdev = device_create(mic_lindata.dd_class, &pdev->dev,
+			mic_lindata.dd_dev + 2 + mic_ctx->bd_info->bi_ctx.bi_id,
+			NULL, "mic%d", mic_ctx->bd_info->bi_ctx.bi_id);
+	err = sysfs_create_group(&mic_ctx->bd_info->bi_sysfsdev->kobj, &bd_attr_group);
+	mic_ctx->sysfs_state = sysfs_get_dirent(mic_ctx->bd_info->bi_sysfsdev->kobj.sd,
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,35) && LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0))
+				NULL,
+#endif
+				"state");
+
+	dev_set_drvdata(mic_ctx->bd_info->bi_sysfsdev, mic_ctx);
+
+	if (!mic_ctx->msie)
+		if ((err = request_irq(mic_ctx->bi_pdev->irq, mic_irq_isr,
+				       IRQF_SHARED, "mic", mic_ctx)) != 0) {
+			printk("MIC: Error in request_irq %d\n", err);
+			goto probe_unmapaper;
+		}
+
+	adapter_probe(&bd_info->bi_ctx);
+
+	if (mic_ctx->bi_psmi.enabled) {
+		err = sysfs_create_group(&mic_ctx->bd_info->bi_sysfsdev->kobj,
+						&psmi_attr_group);
+		err = device_create_bin_file(mic_ctx->bd_info->bi_sysfsdev,
+						&mic_psmi_ptes_attr);
+	}
+
+	adapter_wait_reset(mic_ctx);
+
+	// Adding a board instance so increment the total number of MICs in the system.
+	list_add_tail(&bd_info->bi_list, &mic_data.dd_bdlist);
+	mic_data.dd_numdevs++;
+	printk("mic_probe %d:%d:%d as board #%d\n", pdev->bus->number,
+	       PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), brdnum);
+	return 0;
+
+probe_unmapaper:
+	wait_event(mic_ctx->ioremapwq, mic_ctx->aper.va || mic_ctx->state == MIC_RESETFAIL);
+	if (mic_ctx->aper.va)
+		iounmap((void *)bd_info->bi_ctx.aper.va);
+	iounmap((void *)bd_info->bi_ctx.mmio.va);
+
+probe_relaper:
+	release_mem_region(bd_info->bi_ctx.aper.pa, bd_info->bi_ctx.aper.len);
+
+probe_relmmio:
+	release_mem_region(bd_info->bi_ctx.mmio.pa, bd_info->bi_ctx.mmio.len);
+
+probe_freebd:
+	kfree(bd_info);
+	return err;
+}
+
+static void
+mic_remove(struct pci_dev *pdev)
+{
+	int32_t brdnum;
+	bd_info_t *bd_info;
+
+	if (mic_data.dd_numdevs - 1 < 0)
+		return;
+	mic_data.dd_numdevs--;
+	brdnum = mic_data.dd_numdevs;
+
+	/* Make sure boards are shutdown and not available. */
+	bd_info = mic_data.dd_bi[brdnum];
+
+	spin_lock_bh(&bd_info->bi_ctx.sysfs_lock);
+	sysfs_put(bd_info->bi_ctx.sysfs_state);
+	bd_info->bi_ctx.sysfs_state = NULL;
+	spin_unlock_bh(&bd_info->bi_ctx.sysfs_lock);
+
+	if (bd_info->bi_ctx.bi_psmi.enabled) {
+		device_remove_bin_file(bd_info->bi_sysfsdev, &mic_psmi_ptes_attr);
+		sysfs_remove_group(&bd_info->bi_sysfsdev->kobj, &psmi_attr_group);
+	}
+	sysfs_remove_group(&bd_info->bi_sysfsdev->kobj, &bd_attr_group);
+
+	free_sysfs_entries(&bd_info->bi_ctx);
+	device_destroy(mic_lindata.dd_class,
+		       mic_lindata.dd_dev + 2 + bd_info->bi_ctx.bi_id);
+
+	adapter_stop_device(&bd_info->bi_ctx, 1, 0);
+	/*
+	 * Need to wait for reset since accessing the card while GDDR training
+	 * is ongoing by adapter_remove(..) below for example can be fatal.
+	 */
+	wait_for_reset(&bd_info->bi_ctx);
+
+	mic_disable_interrupts(&bd_info->bi_ctx);
+
+	if (!bd_info->bi_ctx.msie) {
+		free_irq(bd_info->bi_ctx.bi_pdev->irq, &bd_info->bi_ctx);
+#ifdef CONFIG_PCI_MSI
+	} else {
+		free_irq(bd_info->bi_msix_entries[0].vector, &bd_info->bi_ctx);
+		pci_disable_msix(bd_info->bi_ctx.bi_pdev);
+#endif
+	}
+	adapter_remove(&bd_info->bi_ctx);
+	release_mem_region(bd_info->bi_ctx.aper.pa, bd_info->bi_ctx.aper.len);
+	release_mem_region(bd_info->bi_ctx.mmio.pa, bd_info->bi_ctx.mmio.len);
+	pci_disable_device(bd_info->bi_ctx.bi_pdev);
+	kfree(bd_info);
+}
+
+static void
+mic_shutdown(struct pci_dev *pdev) {
+	mic_ctx_t *mic_ctx;
+	mic_ctx = get_device_context(pdev);
+
+	if(!mic_ctx)
+		return;
+
+	adapter_stop_device(mic_ctx, !RESET_WAIT , !RESET_REATTEMPT);
+	return;
+}
+static const struct file_operations mic_fops = {
+	.open   = mic_open,
+	.release = mic_release,
+	.read	= mic_read,
+	.unlocked_ioctl = mic_ioctl,
+	.fasync = mic_fasync,
+	.mmap	= mic_mmap,
+	.poll	= mic_poll,
+	.flush	= mic_flush,
+	.owner  = THIS_MODULE,
+};
+
+static const struct dev_pm_ops pci_dev_pm_ops = {
+	.suspend = micpm_suspend,
+	.resume = micpm_resume,
+	.freeze = micpm_suspend,
+	.restore = micpm_resume,
+	.suspend_noirq = micpm_suspend_noirq,
+	.resume_noirq = micpm_resume_noirq,
+	.freeze_noirq = micpm_suspend_noirq,
+	.restore_noirq = micpm_resume_noirq,
+};
+
+static struct notifier_block mic_pm_notifer = {
+	.notifier_call = micpm_notifier_block,
+};
+
+static struct pci_device_id mic_pci_tbl[] = {
+#ifdef CONFIG_ML1OM
+	{ PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ABR_2249, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ABR_224a, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+#endif
+#ifdef CONFIG_MK1OM
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2250, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2251, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2252, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2253, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2254, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2255, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2256, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2257, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2258, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_2259, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_225a, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_225b, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_225c, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_225d, PCI_ANY_ID, PCI_ANY_ID,
+	  0, 0, 0 },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_KNC_225e, PCI_ANY_ID, PCI_ANY_ID,
+          0, 0, 0 },
+
+#endif
+	{ 0, }
+};
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,31)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)
+#define MODE_T umode_t
+#else
+#define MODE_T mode_t
+#endif
+static char *
+mic_devnode(struct device *dev, MODE_T *mode)
+{
+	return kasprintf(GFP_KERNEL, "mic/%s", dev_name(dev));
+}
+#undef MODE_T
+#endif
+
+static int __init
+mic_init(void)
+{
+	int ret, i;
+
+	adapter_init();
+
+	unaligned_cache = micscif_kmem_cache_create();
+	if (!unaligned_cache) {
+		ret = -ENOMEM;
+		goto init_free_ports;
+	}
+
+	mic_lindata.dd_pcidriver.name = "mic";
+	mic_lindata.dd_pcidriver.id_table = mic_pci_tbl;
+	mic_lindata.dd_pcidriver.probe = mic_probe;
+	mic_lindata.dd_pcidriver.remove = mic_remove;
+	mic_lindata.dd_pcidriver.driver.pm = &pci_dev_pm_ops;
+	mic_lindata.dd_pcidriver.shutdown = mic_shutdown;
+
+
+	if ((ret = alloc_chrdev_region(&mic_lindata.dd_dev,
+				       0, MAX_DLDR_MINORS, "mic") != 0)) {
+		printk("Error allocating device nodes: %d\n", ret);
+		goto init_free_ports;
+	}
+
+	cdev_init(&mic_lindata.dd_cdev, &mic_fops);
+	mic_lindata.dd_cdev.owner = THIS_MODULE;
+	mic_lindata.dd_cdev.ops = &mic_fops;
+
+	if ((ret = cdev_add(&mic_lindata.dd_cdev,
+			    mic_lindata.dd_dev, MAX_DLDR_MINORS) != 0)) {
+		kobject_put(&mic_lindata.dd_cdev.kobj);
+		goto init_free_region;
+	}
+
+	mic_lindata.dd_class = class_create(THIS_MODULE, "mic");
+	if (IS_ERR(mic_lindata.dd_class)) {
+		printk("MICDLDR: Error createing mic class\n");
+		cdev_del(&mic_lindata.dd_cdev);
+		ret = PTR_ERR(mic_lindata.dd_class);
+		goto init_free_region;
+	}
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,31)
+	mic_lindata.dd_class->devnode = mic_devnode;
+#endif
+
+	mic_lindata.dd_hostdev = device_create(mic_lindata.dd_class, NULL,
+					    mic_lindata.dd_dev, NULL, "ctrl");
+	mic_lindata.dd_scifdev = device_create(mic_lindata.dd_class, NULL,
+					    mic_lindata.dd_dev + 1, NULL, "scif");
+	ret = sysfs_create_group(&mic_lindata.dd_hostdev->kobj, &host_attr_group);
+	ret = sysfs_create_group(&mic_lindata.dd_scifdev->kobj, &scif_attr_group);
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,31)
+	mic_lindata.dd_class->devnode = NULL;
+#endif
+
+	if (micveth_init(mic_lindata.dd_hostdev))
+		printk(KERN_ERR "%s: micveth_init failed\n", __func__);
+
+	ret = pci_register_driver(&mic_lindata.dd_pcidriver);
+	if (ret) {
+		micscif_destroy();
+		printk("mic: failed to register pci driver %d\n", ret);
+		goto clean_unregister;
+	}
+
+	if (!mic_data.dd_numdevs) {
+		printk("mic: No MIC boards present.  SCIF available in loopback mode\n");
+	} else {
+		printk("mic: number of devices detected %d \n", mic_data.dd_numdevs);
+	}
+
+	for (i = 0; i < mic_data.dd_numdevs; i++) {
+		mic_ctx_t *mic_ctx = get_per_dev_ctx(i);
+		wait_event(mic_ctx->ioremapwq,
+			mic_ctx->aper.va || mic_ctx->state == MIC_RESETFAIL);
+		destroy_workqueue(mic_ctx->ioremapworkq);
+	}
+
+	micveth_init_legacy(mic_data.dd_numdevs, mic_lindata.dd_hostdev);
+
+	ret = acptboot_init();
+
+#ifdef USE_VCONSOLE
+	micvcons_create(mic_data.dd_numdevs);
+#endif
+
+	/* Initialize Data structures for PM Disconnect */
+	ret = micpm_disconn_init(mic_data.dd_numdevs + 1);
+	if (ret)
+		printk(KERN_ERR "%s: Failed to initialize PM disconnect"
+			" data structures. PM may not work as expected."
+			" ret = %d\n", __func__, ret);
+	register_pm_notifier(&mic_pm_notifer);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34))
+	ret = pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, "mic", mic_pm_qos_cpu_dma_lat);
+	if (ret) {
+		printk(KERN_ERR "%s %d mic_pm_qos_cpu_dma_lat %d ret %d\n", 
+			__func__, __LINE__, mic_pm_qos_cpu_dma_lat, ret);
+		ret = 0;
+		/* Dont fail driver load due to PM QoS API. Fall through */
+	}
+#endif
+	return 0;
+
+clean_unregister:
+	device_destroy(mic_lindata.dd_class, mic_lindata.dd_dev + 1);
+	device_destroy(mic_lindata.dd_class, mic_lindata.dd_dev);
+	class_destroy(mic_lindata.dd_class);
+	cdev_del(&mic_lindata.dd_cdev);
+	unregister_pm_notifier(&mic_pm_notifer);
+init_free_region:
+	unregister_chrdev_region(mic_lindata.dd_dev, MAX_DLDR_MINORS);
+init_free_ports:
+	micpm_uninit();
+	return ret;
+}
+
+static void __exit
+mic_exit(void)
+{
+	/* Close endpoints related to reverse registration */
+	acptboot_exit();
+
+#ifdef USE_VCONSOLE
+	micvcons_destroy(mic_data.dd_numdevs);
+#endif
+
+	pci_unregister_driver(&mic_lindata.dd_pcidriver);
+	micpm_uninit();
+
+	/* Uninit data structures for PM disconnect */
+	micpm_disconn_uninit(mic_data.dd_numdevs + 1);
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34))
+	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, "mic");
+#endif
+	micscif_kmem_cache_destroy();
+	vmcore_exit();
+	micveth_exit();
+	micscif_destroy();
+	ramoops_exit();
+
+	device_destroy(mic_lindata.dd_class, mic_lindata.dd_dev + 1);
+	device_destroy(mic_lindata.dd_class, mic_lindata.dd_dev);
+	class_destroy(mic_lindata.dd_class);
+	cdev_del(&mic_lindata.dd_cdev);
+	unregister_chrdev_region(mic_lindata.dd_dev, MAX_DLDR_MINORS);
+	unregister_pm_notifier(&mic_pm_notifer);
+	return;
+}
+
+void
+set_sysfs_entries(mic_ctx_t *mic_ctx)
+{
+	memset(&mic_ctx->sysfs_info, 0, sizeof(mic_ctx->sysfs_info));
+}
+
+void
+free_sysfs_entries(mic_ctx_t *mic_ctx)
+{
+	if (mic_ctx->image != NULL)
+		kfree(mic_ctx->image); /* mic_ctx->initramfs points into this buffer */
+	if (mic_ctx->sysfs_info.cmdline != NULL)
+		kfree(mic_ctx->sysfs_info.cmdline);
+	if (mic_ctx->sysfs_info.kernel_cmdline != NULL)
+		kfree(mic_ctx->sysfs_info.kernel_cmdline);
+}
+
+mic_ctx_t *
+get_per_dev_ctx(uint16_t node)
+{
+	/* TODO: Its important to check the upper bound of the dd_bi array as well.
+	 * Cannot be done currently since not all calling functions to get_per_dev_ctx
+	 * has the dd_numdevs set correctly. (See mic_ctx_map_single call in adapter_init_device
+	 * thats callled even before dd_numdevs is incremented. */
+	return &mic_data.dd_bi[node]->bi_ctx;
+}
+
+int
+get_num_devs(mic_ctx_t *mic_ctx, uint32_t *num_devs)
+{
+	if (num_devs == NULL)
+		return -EINVAL;
+	if (copy_to_user(num_devs, &mic_data.dd_numdevs, sizeof(uint32_t)))
+		return -EFAULT;
+	return 0;
+}
+
+int
+mic_get_file_size(const char* fn, uint32_t* file_len)
+{
+	struct file *filp;
+	loff_t filp_size;
+	uint32_t status = 0;
+	mm_segment_t fs = get_fs();
+
+	set_fs(get_ds());
+
+	if (!fn || IS_ERR(filp = filp_open(fn, 0, 0))) {
+		status = EINVAL;
+		goto cleanup_fs;
+	}
+
+	filp_size = GET_FILE_SIZE_FROM_INODE(filp);
+	if (filp_size <= 0) {
+		status = EINVAL;
+		goto cleanup_filp;
+	}
+
+	*file_len = filp_size;
+cleanup_filp:
+	filp_close(filp, current->files);
+cleanup_fs:
+	set_fs(fs);
+	return status;
+}
+
+// loads file from hdd into pci physical memory
+int
+mic_load_file(const char* fn, uint8_t* buffer, uint32_t max_size)
+{
+	long c;
+	int status = 0;
+	struct file *filp;
+	loff_t filp_size, pos = 0;
+
+	mm_segment_t fs = get_fs();
+	set_fs(get_ds());
+
+	if (!fn || IS_ERR(filp = filp_open(fn, 0, 0))) {
+		status = EINVAL;
+		goto cleanup_fs;
+	}
+
+	filp_size = GET_FILE_SIZE_FROM_INODE(filp);
+	if (filp_size <= 0) {
+		goto cleanup_filp;
+	}
+
+	c = vfs_read(filp, buffer, filp_size, &pos);
+	if(c != (long)filp_size) {
+		status = -1; //FIXME
+		goto cleanup_filp;
+	}
+
+cleanup_filp:
+	filp_close(filp, current->files);
+cleanup_fs:
+	set_fs(fs);
+
+	return status;
+}
+
+module_init(mic_init);
+module_exit(mic_exit);
diff --git a/host/linvcons.c b/host/linvcons.c
new file mode 100644
index 0000000..556a9b5
--- /dev/null
+++ b/host/linvcons.c
@@ -0,0 +1,687 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "micint.h"
+
+/* TODO: Improve debug messages */
+
+static int micvcons_open(struct tty_struct * tty, struct file * filp);
+static void micvcons_close(struct tty_struct * tty, struct file * filp);
+static int micvcons_write(struct tty_struct * tty, const unsigned char *buf, 
+								int count);
+static int micvcons_write_room(struct tty_struct *tty);
+static void micvcons_set_termios(struct tty_struct *tty, struct ktermios * old);
+static void micvcons_timeout(unsigned long);
+static void micvcons_throttle(struct tty_struct *tty);
+static void micvcons_unthrottle(struct tty_struct *tty);
+static void micvcons_wakeup_readbuf(struct work_struct *work);
+static int micvcons_resume(struct _mic_ctx_t *mic_ctx);
+
+static struct tty_operations micvcons_tty_ops = {
+	.open = micvcons_open,
+	.close = micvcons_close,
+	.write = micvcons_write,
+	.write_room = micvcons_write_room,
+	.set_termios = micvcons_set_termios,
+	.throttle = micvcons_throttle,
+	.unthrottle = micvcons_unthrottle,
+};
+
+static struct tty_driver *micvcons_tty = NULL;
+static u16 extra_timeout = 0;
+static u8 restart_timer_flag = MICVCONS_TIMER_RESTART;
+static struct timer_list vcons_timer;
+static struct list_head timer_list_head;
+static spinlock_t timer_list_lock;
+
+int
+micvcons_create(int num_bds)
+{
+	micvcons_port_t *port;
+	bd_info_t *bd_info;
+	int bd, ret = 0;
+	char wq_name[14];
+	struct device *dev;
+
+	INIT_LIST_HEAD(&timer_list_head);
+
+	if (micvcons_tty)
+		goto exit;
+
+	micvcons_tty = alloc_tty_driver(num_bds);
+	if (!micvcons_tty) {
+		ret = -ENOMEM;
+		goto exit;
+	}
+	micvcons_tty->owner = THIS_MODULE;
+	micvcons_tty->driver_name = MICVCONS_DEVICE_NAME;
+	micvcons_tty->name = MICVCONS_DEVICE_NAME;
+	micvcons_tty->major = 0;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0))
+	micvcons_tty->minor_num = num_bds;
+#endif
+	micvcons_tty->minor_start = 0;
+	micvcons_tty->type = TTY_DRIVER_TYPE_SERIAL;
+	micvcons_tty->subtype = SERIAL_TYPE_NORMAL;
+	micvcons_tty->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
+	micvcons_tty->init_termios = tty_std_termios;
+	micvcons_tty->init_termios.c_iflag = IGNCR;
+	micvcons_tty->init_termios.c_oflag = 0;
+	micvcons_tty->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+	micvcons_tty->init_termios.c_lflag = 0;
+
+	tty_set_operations(micvcons_tty, &micvcons_tty_ops);
+
+	if ((ret = tty_register_driver(micvcons_tty)) != 0) {
+		printk("Failed to register vcons tty driver\n");
+		put_tty_driver(micvcons_tty);
+		micvcons_tty = NULL;
+		goto exit;
+	}
+
+	for (bd = 0; bd < num_bds; bd++) {
+		port = &mic_data.dd_ports[bd];
+		port->dp_bdinfo = mic_data.dd_bi[bd];
+
+		spin_lock_init(&port->dp_lock);
+		mutex_init (&port->dp_mutex);
+
+		bd_info = (bd_info_t *)port->dp_bdinfo;
+		bd_info->bi_port = port;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+		tty_port_init(&port->port);
+		dev = tty_port_register_device(&port->port, micvcons_tty, bd, NULL);
+#else
+		dev = tty_register_device(micvcons_tty, bd, NULL);
+		if (IS_ERR(dev)) {
+			printk("Failed to register vcons tty device\n");
+			micvcons_destroy(bd);
+			ret = PTR_ERR(dev);
+			goto exit;
+		}
+#endif
+		snprintf(wq_name, sizeof(wq_name), "VCONS MIC %d", bd);
+		port->dp_wq = __mic_create_singlethread_workqueue(wq_name);
+		if (!port->dp_wq) {
+			printk(KERN_ERR "%s: create_singlethread_workqueue\n", 
+								__func__);
+			tty_unregister_device(micvcons_tty, bd);
+			micvcons_destroy(bd);
+			ret = -ENOMEM;
+			goto exit;
+		}
+		INIT_WORK(&port->dp_wakeup_read_buf, micvcons_wakeup_readbuf);
+	}
+	vcons_timer.function = micvcons_timeout;
+	vcons_timer.data = (unsigned long)(&timer_list_head);
+	init_timer(&vcons_timer);
+exit:
+	return ret;
+}
+
+void micvcons_destroy(int num_bds)
+{
+	int bd, ret;
+	micvcons_port_t *port;
+
+	if (!micvcons_tty)
+		return;
+	for (bd = 0; bd < num_bds; bd++) {
+		port = &mic_data.dd_ports[bd];
+		destroy_workqueue(port->dp_wq);
+		tty_unregister_device(micvcons_tty, bd);
+	}
+	ret = tty_unregister_driver(micvcons_tty);
+	put_tty_driver(micvcons_tty);
+	micvcons_tty = NULL;
+
+	if (ret)
+		printk(KERN_ERR "tty unregister_driver failed with code %d\n", ret);
+}
+
+static int
+micvcons_open(struct tty_struct * tty, struct file * filp)
+{
+	micvcons_port_t *port = &mic_data.dd_ports[tty->index];
+	int ret = 0;
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(tty->index);
+
+	tty->driver_data = port;
+
+	mutex_lock(&port->dp_mutex);
+	spin_lock_bh(&port->dp_lock);
+
+	if ((filp->f_flags & O_ACCMODE) != O_RDONLY) {
+		if (port->dp_writer) {
+			ret = -EBUSY;
+			goto exit_locked;
+		}
+		port->dp_writer = filp;
+		port->dp_bytes = 0;
+	}
+
+	if ((filp->f_flags & O_ACCMODE) != O_WRONLY) {
+		if (port->dp_reader) {
+			ret = -EBUSY;
+			goto exit_locked;
+		}
+		port->dp_reader = filp;
+		port->dp_canread = 1;
+	}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0))
+	tty->low_latency = 0;
+#endif
+
+	if (!port->dp_tty)
+		port->dp_tty = tty;
+	if (!port->dp_vcons)
+		port->dp_vcons = &mic_ctx->bi_vcons;
+	if (tty->count == 1) {
+		ret = micvcons_start(mic_ctx);
+		if (ret != 0)
+			goto exit_locked;
+		spin_lock(&timer_list_lock);
+		list_add_tail_rcu(&port->list_member, &timer_list_head);
+		if (list_is_singular(&timer_list_head)) {
+			restart_timer_flag = MICVCONS_TIMER_RESTART;
+			mod_timer(&vcons_timer, jiffies + 
+				msecs_to_jiffies(MICVCONS_SHORT_TIMEOUT));
+		}
+		spin_unlock(&timer_list_lock);
+	}
+
+exit_locked:
+	spin_unlock_bh(&port->dp_lock);
+	mutex_unlock(&port->dp_mutex);
+	return ret;
+}
+
+static inline void
+micvcons_del_timer_entry(micvcons_port_t *port)
+{
+	spin_lock(&timer_list_lock);
+	list_del_rcu(&port->list_member);
+	if (list_empty(&timer_list_head)) {
+		restart_timer_flag = MICVCONS_TIMER_SHUTDOWN;
+		spin_unlock(&timer_list_lock);
+		del_timer_sync(&vcons_timer);
+	} else {
+		spin_unlock(&timer_list_lock);
+	}
+	synchronize_rcu();
+}
+
+static void
+micvcons_close(struct tty_struct * tty, struct file * filp)
+{
+	micvcons_port_t *port = (micvcons_port_t *)tty->driver_data;
+
+	mutex_lock(&port->dp_mutex);
+	if (tty->count == 1) {
+		micvcons_del_timer_entry(port);
+		flush_workqueue(port->dp_wq);
+	}
+	spin_lock_bh(&port->dp_lock);
+	if (port->dp_reader == filp)
+		port->dp_reader = 0;
+
+	if (port->dp_writer == filp)
+		port->dp_writer = 0;
+
+	if (tty->count == 1)
+		port->dp_tty = 0;
+	spin_unlock_bh(&port->dp_lock);
+	mutex_unlock(&port->dp_mutex);
+}
+
+static int
+micvcons_write(struct tty_struct * tty, const unsigned char *buf, int count)
+{
+	micvcons_port_t *port = (micvcons_port_t *)tty->driver_data;
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(tty->index);
+	int bytes=0, status;
+	struct vcons_buf *vcons_host_header;
+	u8 card_alive = 1;
+
+	spin_lock_bh(&port->dp_lock);
+	vcons_host_header = (struct vcons_buf *)port->dp_vcons->dc_hdr_virt;
+	if (vcons_host_header->mic_magic == MIC_VCONS_SLEEPING) {
+		status = micvcons_resume(mic_ctx);
+		if (status != 0) {
+			/* If card can not wakeup, it is dead. */
+			card_alive = 0;
+			goto exit;
+		}
+	}
+	if (vcons_host_header->mic_magic != MIC_VCONS_READY)
+		goto exit;
+	bytes = micvcons_port_write(port, buf, count);
+	if (bytes) {
+		mic_send_hvc_intr(mic_ctx);
+		extra_timeout = 0;
+	}
+exit:
+	spin_unlock_bh(&port->dp_lock);
+	if (!card_alive)
+		micvcons_del_timer_entry(port);
+	return bytes;
+}
+
+static int
+micvcons_write_room(struct tty_struct *tty)
+{
+	micvcons_port_t *port = (micvcons_port_t *)tty->driver_data;
+	int room;
+
+	spin_lock_bh(&port->dp_lock);
+	if (port->dp_out)
+		room = micscif_rb_space(port->dp_out);
+	else
+		room = 0;
+	spin_unlock_bh(&port->dp_lock);
+
+	return room;
+}
+
+static void
+micvcons_set_termios(struct tty_struct *tty, struct ktermios * old)
+{
+}
+
+static int
+micvcons_readchars(micvcons_port_t *port)
+{
+	int len, ret, get_count;
+	int bytes_total = 0;
+	int bytes_read = 0;
+	char buf[64];
+
+	for (;;) {
+		len = micscif_rb_count(port->dp_in, sizeof(buf));
+		if (!len)
+			break;
+		get_count = min(len, (int)sizeof(buf));
+		ret = micscif_rb_get_next(port->dp_in, buf, get_count);
+		micscif_rb_update_read_ptr(port->dp_in);
+		if (port->dp_reader && port->dp_canread) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+			if ((bytes_read = tty_insert_flip_string(
+					&port->port, buf, get_count)) != 0)
+				tty_flip_buffer_push(&port->port);
+#else
+			bytes_read = tty_insert_flip_string(port->dp_tty, 
+								buf, get_count);
+			tty_flip_buffer_push(port->dp_tty);
+#endif
+			bytes_total += bytes_read;
+			if (bytes_read != get_count) {
+				printk(KERN_WARNING "dropping characters: \
+						bytes_read %d, get_count %d\n",
+						bytes_read, get_count);
+				break;
+			}
+		}
+	}
+	return bytes_total;
+}
+
+static int
+micvcons_initport(micvcons_port_t *port)
+{
+	struct vcons_buf *vcons_host_header;
+	struct vcons_mic_header  *vcons_mic_header;
+	char *mic_hdr, *mic_buf, *host_buf;
+
+	vcons_host_header = (struct vcons_buf *)port->dp_vcons->dc_hdr_virt;
+	if (!vcons_host_header) {
+		printk(KERN_ERR "vcons_host_header NULL\n");
+		return -EFAULT;
+	}
+
+	host_buf = (char *)port->dp_vcons->dc_buf_virt;
+	if (!host_buf) {
+		printk(KERN_ERR "host_buf NULL\n");
+		return -EFAULT;
+	}
+
+	if (port->dp_bdinfo->bi_ctx.bi_family == FAMILY_ABR) {
+		set_pci_aperture(&port->dp_bdinfo->bi_ctx,
+			(port->dp_bdinfo->bi_ctx.aper.len - PAGE_SIZE) >> PAGE_SHIFT,
+			vcons_host_header->i_hdr_addr & PAGE_MASK, PAGE_SIZE);
+		mic_hdr = port->dp_bdinfo->bi_ctx.aper.va +
+			port->dp_bdinfo->bi_ctx.aper.len - PAGE_SIZE;
+		mic_buf = mic_hdr + PAGE_SIZE/2;
+	} else {
+		mic_hdr = port->dp_bdinfo->bi_ctx.aper.va + vcons_host_header->i_hdr_addr;
+		mic_buf = port->dp_bdinfo->bi_ctx.aper.va + vcons_host_header->i_buf_addr;
+	}
+
+	port->dp_in = kmalloc(sizeof(struct micscif_rb), GFP_ATOMIC);
+	if (port->dp_in)
+		port->dp_out = kmalloc(sizeof(struct micscif_rb), GFP_ATOMIC);
+	else
+		return -ENOMEM;
+
+	if (port->dp_out) {
+		vcons_mic_header = (struct vcons_mic_header *)mic_hdr;
+		micscif_rb_init(port->dp_in,
+			&vcons_mic_header->o_rd,
+			&vcons_host_header->o_wr,
+			host_buf,
+			vcons_host_header->o_size);
+		micscif_rb_init(port->dp_out, &vcons_host_header->i_rd,
+				&vcons_mic_header->i_wr,
+				mic_buf,
+				vcons_host_header->i_size);
+		wmb();
+		writel(MIC_VCONS_HOST_OPEN, &vcons_mic_header->host_status);
+	} else {
+		kfree(port->dp_in);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int
+micvcons_readport(micvcons_port_t *port)
+{
+	int num_chars_read = 0, status;
+	static uint32_t prev_mic_magic;
+	struct vcons_buf *vcons_host_header;
+
+	if (!port || !port->dp_vcons)
+		return 0;
+
+	spin_lock_bh(&port->dp_lock);
+	if (!port->dp_tty) {
+		spin_unlock_bh(&port->dp_lock);
+		return 0;
+	}
+
+	vcons_host_header = (struct vcons_buf *)port->dp_vcons->dc_hdr_virt;
+	if ((vcons_host_header->mic_magic != MIC_VCONS_READY) &&
+			(vcons_host_header->mic_magic != MIC_VCONS_SLEEPING)) {
+		if ((vcons_host_header->mic_magic == MIC_VCONS_RB_VER_ERR)
+			&& (vcons_host_header->mic_magic != prev_mic_magic)) {
+			printk(KERN_ERR "Card and host ring buffer versions mismatch.");
+			printk(KERN_ERR "Card version: %d, Host version: %d \n", 
+						vcons_host_header->mic_rb_ver, 
+						vcons_host_header->host_rb_ver);
+		}
+		goto exit;
+	}
+	if (!port->dp_in) {
+		status = micvcons_initport(port);
+		if (status != 0) {
+			spin_unlock_bh(&port->dp_lock);
+			return status;
+		}
+	}
+
+	if (port->dp_in) {
+		if (vcons_host_header->mic_magic == MIC_VCONS_SLEEPING) {
+			/*
+			 * If the card is sleeping and there is data in the
+			 * buffer, schedule work in a work queue to wake-up
+			 * the card and read from the buffer.
+			 */
+			if (micscif_rb_count(port->dp_in, 1))
+				queue_work(port->dp_wq, 
+						&port->dp_wakeup_read_buf);
+		} else {
+			num_chars_read = micvcons_readchars(port);
+			tty_wakeup(port->dp_tty);
+		}
+	}
+exit:
+	prev_mic_magic = vcons_host_header->mic_magic;
+	spin_unlock_bh(&port->dp_lock);
+	return num_chars_read;
+}
+
+static void
+micvcons_wakeup_readbuf(struct work_struct *work)
+{
+	u8 card_alive = 1;
+	int status;
+	micvcons_port_t *port;
+	struct vcons_buf *vcons_host_header;
+
+	port = container_of(work, micvcons_port_t, dp_wakeup_read_buf);
+
+	vcons_host_header = (struct vcons_buf *)port->dp_vcons->dc_hdr_virt;
+	spin_lock_bh(&port->dp_lock);
+	status = micvcons_resume(get_per_dev_ctx(port->dp_tty->index));
+	if (status == 0) {
+		micvcons_readchars(port);
+		tty_wakeup(port->dp_tty);
+	} else {
+		/* If card can not wakeup, it is dead. */
+		card_alive = 0;
+	}
+	spin_unlock_bh(&port->dp_lock);
+	if (!card_alive)
+		micvcons_del_timer_entry(port);
+}
+
+static void
+micvcons_timeout(unsigned long data)
+{
+	struct list_head *timer_list_ptr = (struct list_head *)data;
+	micvcons_port_t *port;
+	u8 console_active = 0;
+	int num_chars_read = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, timer_list_ptr, list_member) {
+		num_chars_read = micvcons_readport(port);
+		if (num_chars_read != 0)
+			console_active = 1;
+	}
+	rcu_read_unlock();
+
+	spin_lock(&timer_list_lock);
+	if (restart_timer_flag == MICVCONS_TIMER_RESTART) {
+		extra_timeout = (console_active ? 0 :
+				extra_timeout + MICVCONS_SHORT_TIMEOUT);
+		extra_timeout = min(extra_timeout, (u16)MICVCONS_MAX_TIMEOUT);
+		mod_timer(&vcons_timer, jiffies + 
+			msecs_to_jiffies(MICVCONS_SHORT_TIMEOUT+extra_timeout));
+	}
+	spin_unlock(&timer_list_lock);
+}
+
+static void
+micvcons_throttle(struct tty_struct *tty)
+{
+	micvcons_port_t *port = (micvcons_port_t *)tty->driver_data;
+	port->dp_canread = 0;
+}
+
+static void
+micvcons_unthrottle(struct tty_struct *tty)
+{
+	micvcons_port_t *port = (micvcons_port_t *)tty->driver_data;
+	port->dp_canread = 1;
+}
+
+int micvcons_start(mic_ctx_t *mic_ctx)
+{
+	struct vcons_buf *vcons_host_header;
+	int status;
+	micvcons_port_t *port = mic_ctx->bd_info->bi_port;
+
+	vcons_host_header = (struct vcons_buf *)port->dp_vcons->dc_hdr_virt;
+	if (vcons_host_header->mic_magic == MIC_VCONS_SLEEPING) {
+		status = micvcons_resume(mic_ctx);
+		if (status != 0)
+			return status;
+	}
+	if (vcons_host_header->mic_magic == MIC_VCONS_READY) {
+		if (!port->dp_in) {
+			status = micvcons_initport(port);
+			if (status != 0)
+				return status;
+		}
+	}
+	return 0;
+}
+
+int micvcons_port_write(struct micvcons_port *port, const unsigned char *buf,
+			int count)
+{
+	int ret;
+	uint32_t bytes = 0;
+
+	if (port->dp_out) {
+		bytes = min(count, micscif_rb_space(port->dp_out));
+		ret = micscif_rb_write(port->dp_out, (void *)buf, bytes);
+		BUG_ON(ret);
+		port->dp_bytes += bytes;
+		micscif_rb_commit(port->dp_out);
+	}
+	return bytes;
+}
+
+/**
+ * micvcons_stop - cleans up before a node is rebooted
+ * @ mic_ctx: node to clean up
+ *
+ * Called before rebooting a node, reads remaining characters
+ * from the node's vcons output buffer, resets the input/output
+ * ring buffers so that things work when the node comes up again
+ */
+void
+micvcons_stop(mic_ctx_t *mic_ctx)
+{
+	micvcons_port_t	*port;
+	struct vcons_buf *vcons_host_header;
+
+	port = mic_ctx->bd_info->bi_port;
+	micvcons_readport(port);
+	spin_lock_bh(&port->dp_lock);
+	if (port->dp_in) {
+		vcons_host_header = (struct vcons_buf *)port->dp_vcons->dc_hdr_virt;
+		vcons_host_header->mic_magic = 0;
+		kfree(port->dp_in);
+		kfree(port->dp_out);
+		port->dp_in = NULL;
+		port->dp_out = NULL;
+	}
+	spin_unlock_bh(&port->dp_lock);
+}
+
+/**
+ * micvcons_resume - sets the state of a node's console to ready
+ * @ mic_ctx: node to clean up
+ *
+ * @ return: zero if successful.
+ * called before resuming a node from PC6. MUST acquire the spinlock
+ * port->dp_lock with bottom-halves disabled before calling this function.
+ */
+static int
+micvcons_resume(mic_ctx_t *mic_ctx)
+{
+	int status = 0;
+	micvcons_port_t	*port;
+	struct vcons_buf *vcons_host_header;
+
+	port = mic_ctx->bd_info->bi_port;
+	vcons_host_header = mic_ctx->bi_vcons.dc_hdr_virt;
+	if (vcons_host_header->mic_magic == MIC_VCONS_SLEEPING) {
+		do {
+			vcons_host_header->mic_magic = MIC_VCONS_WAKINGUP;
+			spin_unlock_bh(&port->dp_lock);
+			status = micscif_connect_node(mic_get_scifnode_id(mic_ctx), false);
+			spin_lock_bh(&port->dp_lock);
+		} while ((status == 0) && 
+			(vcons_host_header->mic_magic == MIC_VCONS_SLEEPING));
+		if (status == 0)
+			vcons_host_header->mic_magic = MIC_VCONS_READY;
+	}
+	return status;
+}
+
+/**
+ * micvcons_pm_disconnect_node - Check if a card can be put to sleep in case
+ * there is any activity on the virtual console. If yes, it also sets the
+ * internal state of a node's console to sleeping.
+ * @ node_bitmask: bits set indicate which cards to check.
+ *		   Bit-1 for the first, Bit-2 for the second,...
+ *		   Ignore Bit-0 which indicates host.
+ * @ return: bits set indicating which cards can sleep.
+ * This is called from PM to check if a card can be put to sleep (PC-6 state).
+ * This is called when the node is disconnected from the SCIF network
+ * before putting it into the PC6 state where it should no longer
+ * receive an PCIe transactions until woken up by the host driver.
+ */
+int
+micvcons_pm_disconnect_node(uint8_t *node_bitmask, enum disconn_type type)
+{
+	int err = 0;
+	if ((type == DISCONN_TYPE_POWER_MGMT) && (node_bitmask)) {
+		int i = 0;
+		mic_ctx_t *mic_ctx;
+		micvcons_port_t	*port;
+		struct vcons_buf *vcons_host_header;
+
+		for (i = 0; i <= mic_data.dd_numdevs; i++) {
+			if (!get_nodemask_bit(node_bitmask, i))
+				continue;
+
+			if (!(mic_ctx = get_per_dev_ctx(i - 1)))
+				continue;
+
+			port = mic_ctx->bd_info->bi_port;
+			micvcons_readport(port);
+			/*
+			* If this function is called when virtual console is
+			* not active, port->dp_vcons needs to be initialized.
+			*/
+			if (!port->dp_vcons)
+				port->dp_vcons = &mic_ctx->bi_vcons;
+
+			vcons_host_header = (struct vcons_buf *)port->dp_vcons->dc_hdr_virt;
+			spin_lock_bh(&port->dp_lock);
+			vcons_host_header->mic_magic = MIC_VCONS_SLEEPING;
+			spin_unlock_bh(&port->dp_lock);
+		}
+	}
+
+	return err;
+}
+
diff --git a/host/linvnet.c b/host/linvnet.c
new file mode 100644
index 0000000..8082e41
--- /dev/null
+++ b/host/linvnet.c
@@ -0,0 +1,802 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "micint.h"
+#include "mic_common.h"
+#include <mic/micsboxdefine.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/kernel.h>
+#include "mic/micveth.h"
+
+#define PWR_MGMT_NO_POLL_AFTER_LINKS_UP 1
+
+/*
+  In intr/poll modes, mic_smpt_uninit has already been called before
+  micveth_destroy is called during rmmod. This results in host driver crash. The
+  current workaround is, given the 'legacy' nature of VNET intr/poll modes, to
+  not call mic_ctx_unmap_single() at rmmod. This workaround will result in some
+  unmapped memory and a warn_on from micscif_smpt.c.
+ */
+#define WA_UNMAP_AT_RMMOD 0
+
+static void micveth_clientpoll(struct work_struct *work);
+static void micveth_poll(struct work_struct *work);
+static int micvnet_host_doorbell_intr_handler(mic_ctx_t *mic_ctx, int doorbell);
+static void micvnet_intr_bh_handler(struct work_struct *work);
+void micveth_send_intr(micveth_info_t *veth_info);
+
+micveth_t micveth;
+
+void dump_skb(struct sk_buff *skb, int xmit);
+
+static inline
+mic_ctx_t *veth_to_ctx(micveth_info_t *veth_info)
+{
+	return veth_info->mic_ctx;
+}
+
+static int
+micveth_set_address(struct net_device *dev, void *p)
+{
+	struct sockaddr *sa = p;
+
+	if (!is_valid_ether_addr(sa->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
+	return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
+static void
+micveth_multicast_list(struct net_device *dev)
+{
+}
+#endif
+
+static int
+micveth_deliver(struct sk_buff *skb, struct net_device *dev, micveth_info_t *veth_info)
+{
+	veth_ring_t *ring;
+	ring_queue_t *tx_queue;
+	ring_desc_t *desc;
+	ring_packet_t *packet;
+	int next_tail;
+
+	//dump_skb(skb, 1);
+
+	spin_lock(&veth_info->vi_txlock);
+	ring = &veth_info->vi_ring.ring;
+	tx_queue = &ring->r_tx;
+
+	next_tail = (tx_queue->rq_tail + 1) % tx_queue->rq_length;
+	if (next_tail == tx_queue->rq_head) {
+		// queue_full situation - just drop the packet and let the stack retry
+		spin_unlock(&veth_info->vi_txlock);
+		return 1;
+	}
+
+	desc = &tx_queue->rq_descs[tx_queue->rq_tail];
+	packet = &veth_info->vi_tx_desc[tx_queue->rq_tail];
+	packet->pd_skb = skb;
+	packet->pd_phys = mic_ctx_map_single(veth_to_ctx(veth_info),
+					     skb->data, skb->len);
+	packet->pd_length = skb->len;
+	desc->rd_phys = packet->pd_phys;
+	desc->rd_length = skb->len;
+	desc->rd_valid = 1;
+
+	/*
+	 * Need a write memory barrier between copying the skb data to
+	 * the buffer and updating the tail pointer.  NOT an smp_wmb(),
+	 * because this memory barrier needs to be done even if there is
+	 * a single CPU in the system.
+	 */
+	wmb();
+	tx_queue->rq_tail = (tx_queue->rq_tail + 1) % tx_queue->rq_length;
+	spin_unlock(&veth_info->vi_txlock);
+
+	if (mic_vnet_mode == VNET_MODE_INTR) {
+		micveth_send_intr(veth_info);
+	}
+
+	return 0;
+}
+
+static int
+micveth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	micveth_info_t *veth_info;
+
+	if (be16_to_cpu(skb->protocol) == ETH_P_IPV6) {
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	veth_info = dev->ml_priv;
+
+	if (veth_info->vi_state != VETH_STATE_LINKUP) {
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	if (micveth_deliver(skb, dev, veth_info)) {
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static int
+micveth_change_mtu(struct net_device *dev, int new_mtu)
+{
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+/* Start callback  */
+static int
+micveth_start_dev(struct net_device *dev)
+{
+	micveth_info_t *veth_info = dev->ml_priv;
+
+	micveth_start(veth_info->mic_ctx);
+	return 0;
+}
+
+/* Stop callback */
+static int
+micveth_stop_dev(struct net_device *dev)
+{
+	micveth_info_t *veth_info = dev->ml_priv;
+
+	micveth_stop(veth_info->mic_ctx);
+	return 0;
+}
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,28)
+static const struct net_device_ops veth_netdev_ops = {
+	.ndo_open		= micveth_start_dev,
+	.ndo_stop		= micveth_stop_dev,
+	.ndo_start_xmit		= micveth_xmit,
+	.ndo_validate_addr	= eth_validate_addr,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
+	.ndo_set_multicast_list = micveth_multicast_list,
+#endif
+	.ndo_set_mac_address	= micveth_set_address,
+	.ndo_change_mtu		= micveth_change_mtu,
+};
+#endif
+
+static void
+micveth_setup(struct net_device *dev)
+{
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,28)
+	dev->hard_start_xmit = micveth_xmit;
+	dev->set_multicast_list = micveth_multicast_list;
+	dev->set_mac_address = micveth_set_address;
+#endif
+	ether_setup(dev);
+
+	/* Initialize the device structure. */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,28)
+	dev->netdev_ops = &veth_netdev_ops;
+#endif
+	dev->destructor = free_netdev;
+
+	/* Fill in device structure with ethernet-generic values. */
+	dev->mtu = (MICVETH_MAX_PACKET_SIZE);
+	dev->tx_queue_len = 0;
+	dev->flags &= ~IFF_MULTICAST;
+	random_ether_addr(dev->dev_addr);
+}
+
+static int
+micveth_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+	return 0;
+}
+
+static struct rtnl_link_ops micveth_link_ops __read_mostly = {
+	.kind		= "micveth",
+	.setup		= micveth_setup,
+	.validate	= micveth_validate,
+};
+
+static int
+micveth_probe_int(micveth_info_t *veth_info, mic_ctx_t *mic_ctx)
+{
+	struct net_device *dev_veth;
+	ring_queue_t *queue;
+	ring_desc_t *desc;
+	ring_packet_t *packet;
+	int idx;
+	int err = 0;
+
+	veth_info->vi_pdev = mic_ctx->bi_pdev;
+	veth_info->vi_sbox = (uint8_t *)((unsigned long)mic_ctx->mmio.va +
+					 HOST_SBOX_BASE_ADDRESS);
+	veth_info->vi_scratch14 = (uint32_t *)((unsigned long)mic_ctx->mmio.va +
+					       HOST_SBOX_BASE_ADDRESS + SBOX_SCRATCH14);
+	veth_info->vi_scratch15 = (uint32_t *)((unsigned long)mic_ctx->mmio.va +
+					       HOST_SBOX_BASE_ADDRESS + SBOX_SCRATCH15);
+	veth_info->mic_ctx = mic_ctx;
+	mic_ctx->bi_vethinfo = (void *)veth_info;
+
+	spin_lock_init(&veth_info->vi_txlock);
+	spin_lock_init(&veth_info->vi_rxlock);
+
+	if (mic_vnet_mode == VNET_MODE_POLL)
+		INIT_DELAYED_WORK(&veth_info->vi_poll, micveth_poll);
+
+	// Set the current sk_buff allocation size
+	veth_info->vi_skb_mtu = MICVETH_MAX_PACKET_SIZE + 32;
+
+	// Get the physical memory address for the ring descriptors
+	veth_info->vi_ring.phys = mic_ctx_map_single(veth_to_ctx(veth_info), &veth_info->vi_ring.ring,
+						     sizeof(veth_ring_t));
+	veth_info->vi_ring.length = sizeof(veth_ring_t);
+
+	queue = &veth_info->vi_ring.ring.r_tx;
+	queue->rq_head = 0;
+	queue->rq_tail = 0;
+	queue->rq_length = MICVETH_TRANSFER_FIFO_SIZE;
+
+	veth_info->vi_pend = 0;
+
+	packet = &veth_info->vi_tx_desc[0];
+	for (idx = 0; idx < queue->rq_length; idx++) {
+		desc = &queue->rq_descs[idx];
+		packet[idx].pd_skb = NULL;
+		packet[idx].pd_phys = 0;
+		packet[idx].pd_length = 0;
+
+		desc->rd_phys = 0;
+		desc->rd_length = 0;
+		desc->rd_valid = 0;
+	}
+
+	// This is the recieve end.
+	queue = &veth_info->vi_ring.ring.r_rx;
+	queue->rq_head = 0;
+	queue->rq_tail = 0;
+	queue->rq_length = MICVETH_TRANSFER_FIFO_SIZE;
+
+	packet = &veth_info->vi_rx_desc[0];
+	for (idx = 0; idx < queue->rq_length; idx++) {
+		desc = &queue->rq_descs[idx];
+		if (!(packet[idx].pd_skb = dev_alloc_skb(veth_info->vi_skb_mtu)))
+			return -ENOMEM;
+		packet[idx].pd_phys = mic_ctx_map_single(veth_to_ctx(veth_info), packet[idx].pd_skb->data,
+							 veth_info->vi_skb_mtu);
+		packet[idx].pd_length = veth_info->vi_skb_mtu;
+
+		desc->rd_phys = packet[idx].pd_phys;
+		desc->rd_length = packet[idx].pd_length;
+		desc->rd_valid = 1;
+	}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if ((dev_veth = alloc_netdev(sizeof(micveth_info_t), "mic%d", micveth_setup)) == NULL) {
+#else
+	if ((dev_veth = alloc_netdev(sizeof(micveth_info_t), "mic%d", NET_NAME_UNKNOWN, micveth_setup)) == NULL) {
+#endif
+		return -ENOMEM;
+	}
+
+	veth_info->vi_netdev = dev_veth;
+	dev_veth->ml_priv = veth_info;
+	dev_veth->rtnl_link_ops = &micveth_link_ops;
+
+	if ((err = register_netdev(dev_veth)) < 0) {
+		printk("register netdev failed %d\n", err);
+		free_netdev(dev_veth);
+		return err;
+	}
+
+	veth_info->vi_state = VETH_STATE_INITIALIZED;
+	return 0;
+}
+
+static ssize_t show_veth(struct device *dev,
+			 struct device_attribute *attr, char *buf);
+DEVICE_ATTR(veth, S_IRUGO, show_veth, NULL);
+
+static int
+micveth_init_int(int num_bds, struct device *dev)
+{
+	int bd;
+	int err = 0;
+
+	micveth.lv_num_interfaces = num_bds;
+	micveth.lv_num_clients = num_bds;
+	micveth.lv_active_clients = 0;
+	micveth.lv_num_links_remaining = num_bds;
+
+	BUG_ON(rtnl_link_register(&micveth_link_ops));
+
+	// Allocate space for the control of each device in the system.
+	micveth.lv_info = kmalloc(sizeof(micveth_info_t) * num_bds, GFP_KERNEL);
+
+	// Initialize state mutex.  Overloaded use for several fields.
+	mutex_init(&micveth.lv_state_mutex);
+
+	// Setup of timer for probeing active mic clients.  When the total active board
+	// count is zero the poll is not running.
+	micveth.lv_pollstate = CLIENT_POLL_STOPPED;
+	INIT_DELAYED_WORK(&micveth.lv_poll, micveth_clientpoll);
+	init_waitqueue_head(&micveth.lv_wq);
+
+	// Init each of the existing boards.
+	for (bd = 0; bd < num_bds; bd++) {
+		micveth_probe_int(&micveth.lv_info[bd], &mic_data.dd_bi[bd]->bi_ctx);
+	}
+
+	err = device_create_file(dev, &dev_attr_veth);
+	return err;
+}
+
+static void
+micveth_exit_int(void)
+{
+	mic_ctx_t *mic_ctx = kmalloc(sizeof(mic_ctx_t), GFP_KERNEL);
+	micveth_info_t *veth_info;
+	ring_packet_t *packet;
+	int bd;
+	int idx;
+
+	rtnl_link_unregister(&micveth_link_ops);
+
+	for (bd = 0; bd < micveth.lv_num_clients; bd++) {
+		veth_info = &micveth.lv_info[bd];
+
+		/* veth_info->mic_ctx == mic_data.dd_bi[bd] is freed in
+		   remove so cannot be used in exit */
+		mic_ctx->bi_vethinfo = veth_info;
+		micveth_stop(mic_ctx);
+
+#if WA_UNMAP_AT_RMMOD
+		mic_ctx_unmap_single(veth_to_ctx(veth_info), veth_info->vi_ring.phys,
+				     sizeof(veth_ring_t));
+#endif
+
+		for (idx = 0; idx < veth_info->vi_ring.ring.r_tx.rq_length; idx++) {
+			packet = &veth_info->vi_tx_desc[idx];
+			if (packet->pd_skb != NULL) {
+#if WA_UNMAP_AT_RMMOD
+				mic_ctx_unmap_single(veth_to_ctx(veth_info), packet->pd_phys,
+						     packet->pd_skb->len);
+#endif
+				kfree_skb(packet->pd_skb);
+			}
+		}
+
+		for (idx = 0; idx < veth_info->vi_ring.ring.r_rx.rq_length; idx++) {
+			packet = &veth_info->vi_rx_desc[idx];
+#if WA_UNMAP_AT_RMMOD
+			mic_ctx_unmap_single(veth_to_ctx(veth_info), packet->pd_phys, packet->pd_skb->len);
+#endif
+			kfree_skb(packet->pd_skb);
+		}
+	}
+
+	kfree(mic_ctx);
+	kfree(micveth.lv_info);
+}
+
+static int
+micveth_start_int(mic_ctx_t *mic_ctx)
+{
+	micveth_info_t *veth_info = &micveth.lv_info[mic_ctx->bi_id];
+
+	// Eventuall (very soon) most of the descriptor allocation for a board will be done here
+	if (veth_info->vi_state != VETH_STATE_INITIALIZED)
+		return 0;
+
+	mutex_lock(&micveth.lv_state_mutex);
+
+	if (micveth.lv_pollstate == CLIENT_POLL_STOPPED) {
+		schedule_delayed_work(&micveth.lv_poll, msecs_to_jiffies(MICVETH_CLIENT_TIMER_DELAY));
+		micveth.lv_pollstate = CLIENT_POLL_RUNNING;
+	}
+
+	micveth.lv_active_clients++;
+	mutex_unlock(&micveth.lv_state_mutex);
+
+	veth_info->vi_pend = 0;
+
+	veth_info->vi_ring.ring.r_tx.rq_head = 0;
+	veth_info->vi_ring.ring.r_tx.rq_tail = 0;
+
+	veth_info->vi_ring.ring.r_rx.rq_head = 0;
+	veth_info->vi_ring.ring.r_rx.rq_tail = 0;
+	veth_info->vi_state = VETH_STATE_LINKDOWN;
+
+	if (mic_vnet_mode == VNET_MODE_INTR) {
+		snprintf(veth_info->vi_wqname, sizeof(veth_info->vi_wqname),
+			 "VNET INTR %d\n", mic_ctx->bi_id);
+		veth_info->vi_wq = create_singlethread_workqueue(veth_info->vi_wqname);
+		INIT_WORK(&veth_info->vi_bh, micvnet_intr_bh_handler);
+
+		// Install interrupt handler on doorbell 3
+		mic_reg_irqhandler(mic_ctx, 3, "Host DoorBell 3",
+				   micvnet_host_doorbell_intr_handler);
+	}
+
+	return 0;
+}
+
+static void
+micveth_stop_int(mic_ctx_t *mic_ctx)
+{
+	micveth_info_t *veth_info = (micveth_info_t *)(mic_ctx->bi_vethinfo);
+
+	if (veth_info->vi_state == VETH_STATE_INITIALIZED)
+		return;
+
+	mutex_lock(&micveth.lv_state_mutex);
+
+	if (mic_vnet_mode == VNET_MODE_INTR) {
+		// Remove interrupt handler on doorbell 3
+		mic_unreg_irqhandler(mic_ctx, 3, "Host DoorBell 3");
+
+		destroy_workqueue(veth_info->vi_wq);
+	}
+
+	micveth.lv_active_clients--;
+	veth_info->vi_state = VETH_STATE_INITIALIZED;
+
+	if (micveth.lv_active_clients) {
+		mutex_unlock(&micveth.lv_state_mutex);
+		return;
+	}
+
+	micveth.lv_num_links_remaining = micveth.lv_num_clients;
+
+#if PWR_MGMT_NO_POLL_AFTER_LINKS_UP
+	micveth.lv_pollstate = CLIENT_POLL_STOPPED;
+	mutex_unlock(&micveth.lv_state_mutex);
+#else
+	micveth.lv_pollstate = CLIENT_POLL_STOPPING;
+	mutex_unlock(&micveth.lv_state_mutex);
+	wait_event(micveth.lv_wq, micveth.lv_pollstate == CLIENT_POLL_STOPPED);
+#endif
+}
+
+#define NO_SRATCHREGREAD_AFTER_CONNECT  1
+static void
+micveth_clientpoll(struct work_struct *work)
+{
+	micveth_info_t *veth_info;
+	uint32_t transRingHi;
+	uint32_t transRingLo;
+	uint32_t scratch14 = 0;
+	uint32_t scratch15 = 0;
+	int bd;
+	static int enter = 0;
+
+	if (enter == 0)
+	{
+		printk("micveth is polling\n");
+		enter = 1;
+	}
+
+	mutex_lock(&micveth.lv_state_mutex);
+	if (micveth.lv_pollstate == CLIENT_POLL_STOPPING) {
+		micveth.lv_pollstate = CLIENT_POLL_STOPPED;
+		mutex_unlock(&micveth.lv_state_mutex);
+		wake_up(&micveth.lv_wq);
+		return;
+	}
+
+	// Check for state changes for each board in the system
+	for (bd = 0; bd < micveth.lv_num_clients; bd++) {
+		veth_info = &micveth.lv_info[bd];
+
+		// Do not poll boards that have not had the interface started.
+		if (veth_info->vi_state == VETH_STATE_INITIALIZED) {
+			break;
+		}
+
+#ifdef NO_SRATCHREGREAD_AFTER_CONNECT
+		if(veth_info->vi_state != VETH_STATE_LINKUP) {
+#endif
+		scratch14 = readl(veth_info->vi_scratch14);
+		scratch15 = readl(veth_info->vi_scratch15);
+#ifdef NO_SRATCHREGREAD_AFTER_CONNECT
+		}
+#endif
+
+		if (veth_info->vi_state == VETH_STATE_LINKUP) {
+			if (scratch14 == MICVETH_LINK_DOWN_MAGIC) {
+				veth_info->vi_state = VETH_STATE_LINKDOWN;
+			}
+		} else if (veth_info->vi_state == VETH_STATE_LINKDOWN) {
+			if (scratch14 == MICVETH_LINK_UP_MAGIC) {
+				// Write the transfer ring address.
+				transRingHi = (uint32_t)(veth_info->vi_ring.phys >> 32);
+				transRingLo = (uint32_t)(veth_info->vi_ring.phys & 0xffffffff);
+
+				writel(transRingLo, veth_info->vi_scratch14);
+				writel(transRingHi, veth_info->vi_scratch15);
+
+				veth_info->vi_state = VETH_STATE_LINKUP;
+				printk("MIC virtual ethernet up for board %d\n", bd);
+#ifdef MIC_IS_EMULATION
+				printk("Card wrote Magic: It must be UP!\n");
+#endif
+
+				if (mic_vnet_mode == VNET_MODE_POLL) {
+					schedule_delayed_work(&veth_info->vi_poll,
+						      msecs_to_jiffies(MICVETH_POLL_TIMER_DELAY));
+				}
+
+				micveth.lv_num_links_remaining--;
+			}
+#ifdef MIC_IS_EMULATION
+			else if (scratch14) {
+				printk("---> 0x%x \n", scratch14);
+				writel(0x0, veth_info->vi_scratch14);
+			}
+#endif
+		}
+	}
+
+	mutex_unlock(&micveth.lv_state_mutex);
+
+#if PWR_MGMT_NO_POLL_AFTER_LINKS_UP
+	if (micveth.lv_num_links_remaining)
+#endif
+		schedule_delayed_work(&micveth.lv_poll, msecs_to_jiffies(MICVETH_CLIENT_TIMER_DELAY));
+}
+
+static int
+micvnet_host_doorbell_intr_handler(mic_ctx_t *mic_ctx, int doorbell)
+{
+	micveth_info_t *veth_info;
+	veth_info = &micveth.lv_info[mic_ctx->bi_id];
+	queue_work(veth_info->vi_wq, &veth_info->vi_bh);
+	return 0;
+}
+
+void
+micveth_send_intr(micveth_info_t *veth_info)
+{
+	mic_ctx_t *mic_ctx = veth_info->mic_ctx;
+	mic_send_vnet_intr(mic_ctx);
+}
+
+void
+_micveth_process_descriptors(micveth_info_t *veth_info)
+{
+	veth_ring_t *ring = &veth_info->vi_ring.ring;
+	ring_queue_t *rx_queue = &ring->r_rx;
+	ring_queue_t *tx_queue = &ring->r_tx;
+	ring_desc_t *desc;
+	ring_packet_t *packet;
+	struct sk_buff *skb;
+	int receive_skb = 0;
+	int err;
+
+	if (veth_info->vi_state != VETH_STATE_LINKUP) {
+		return;
+	}
+
+	spin_lock_bh(&veth_info->vi_rxlock);
+
+	while (rx_queue->rq_head != rx_queue->rq_tail) {
+		desc = &rx_queue->rq_descs[rx_queue->rq_head];
+
+		veth_info->vi_netdev->stats.rx_packets++;
+		veth_info->vi_netdev->stats.rx_bytes += desc->rd_length;
+
+		packet = &veth_info->vi_rx_desc[rx_queue->rq_head];
+
+		skb = packet->pd_skb;
+		skb_put(skb, desc->rd_length);
+
+		//dump_skb(skb, 0);
+		mic_ctx_unmap_single(veth_to_ctx(veth_info), packet->pd_phys, veth_info->vi_skb_mtu);
+		packet->pd_skb = dev_alloc_skb(veth_info->vi_skb_mtu);
+		packet->pd_phys = mic_ctx_map_single(veth_to_ctx(veth_info), packet->pd_skb->data,
+						     veth_info->vi_skb_mtu);
+		desc->rd_phys = packet->pd_phys;
+		desc->rd_length = packet->pd_length;
+
+		skb->dev = veth_info->vi_netdev;
+		skb->protocol = eth_type_trans(skb, skb->dev);
+		skb->ip_summed = CHECKSUM_NONE;
+
+		err = netif_receive_skb(skb);
+		/*
+		 * Need a general memory barrier between copying the data from
+		 * the buffer and updating the head pointer. It's the general
+		 * mb() because we're ordering the read of the data with the write.
+		 */
+		mb();
+		rx_queue->rq_head = (rx_queue->rq_head + 1) % rx_queue->rq_length;
+		receive_skb++;
+	}
+
+	/* Send intr to TX so that pending SKB's can be freed */
+	if (receive_skb && mic_vnet_mode == VNET_MODE_INTR) {
+		micveth_send_intr(veth_info);
+	}
+
+	spin_unlock_bh(&veth_info->vi_rxlock);
+
+	spin_lock_bh(&veth_info->vi_txlock);
+
+	// Also handle completed tx requests
+	while (veth_info->vi_pend != tx_queue->rq_head) {
+		desc = &tx_queue->rq_descs[veth_info->vi_pend];
+		packet = &veth_info->vi_tx_desc[veth_info->vi_pend];
+
+		skb = packet->pd_skb;
+		packet->pd_skb = NULL;
+
+		mic_ctx_unmap_single(veth_to_ctx(veth_info), packet->pd_phys, skb->len);
+		packet->pd_phys = 0;
+
+		kfree_skb(skb);
+
+		veth_info->vi_pend = (veth_info->vi_pend + 1) % tx_queue->rq_length;
+	}
+
+	spin_unlock_bh(&veth_info->vi_txlock);
+
+	if (mic_vnet_mode == VNET_MODE_POLL) {
+		schedule_delayed_work(&veth_info->vi_poll, msecs_to_jiffies(MICVETH_POLL_TIMER_DELAY));
+	}
+}
+
+static void
+micvnet_intr_bh_handler(struct work_struct *work)
+{
+	micveth_info_t *veth_info = container_of(work, micveth_info_t, vi_bh);
+	_micveth_process_descriptors(veth_info);
+}
+
+static void
+micveth_poll(struct work_struct *work)
+{
+	micveth_info_t *veth_info = container_of(work, micveth_info_t, vi_poll.work);
+
+	_micveth_process_descriptors(veth_info);
+}
+
+static ssize_t
+show_veth(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+			micveth.lv_pollstate == CLIENT_POLL_RUNNING ?
+			"running" : "stopped");
+}
+
+/*
+  VNET driver public API. These are simply wrappers which either invoke the old
+  interrupt/poll mode functions or the new DMA mode functions. These are temporary and
+  will be phased out with the old interrupt/poll mode so only the DMA mode will be around
+  eventually.
+ */
+int __init
+micveth_init(struct device *dev)
+{
+	printk("vnet: mode: %s, buffers: %d\n", 
+		mic_vnet_modes[mic_vnet_mode], vnet_num_buffers);
+
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		return micvnet_init(dev);
+	/* Intr/poll modes use micveth_init_legacy */
+	return 0;
+}
+
+int __init
+micveth_init_legacy(int num_bds, struct device *dev)
+{
+	if (mic_vnet_mode != VNET_MODE_DMA)
+		return micveth_init_int(num_bds, dev);
+	/* DMA mode uses micveth_init */
+	return 0;
+}
+
+void
+micveth_exit(void)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		micvnet_exit();
+	else
+		micveth_exit_int();
+}
+
+int
+micveth_probe(mic_ctx_t *mic_ctx)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		return micvnet_probe(mic_ctx);
+	/* No support for micveth_probe in legacy intr/poll modes */
+	return 0;
+}
+
+void
+micveth_remove(mic_ctx_t *mic_ctx)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		micvnet_remove(mic_ctx);
+	/* No support for micveth_remove in legacy intr/poll modes */
+}
+
+int
+micveth_start(mic_ctx_t *mic_ctx)
+{
+	micveth_info_t *veth_info = mic_ctx->bi_vethinfo;
+	int err;
+
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		err = micvnet_start(mic_ctx);
+	else
+		err = micveth_start_int(mic_ctx);
+
+	if (!err)
+		netif_carrier_on(veth_info->vi_netdev);
+
+	return err;
+}
+
+void
+micveth_stop(mic_ctx_t *mic_ctx)
+{
+	micveth_info_t *veth_info = mic_ctx->bi_vethinfo;
+
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		micvnet_stop(mic_ctx);
+	else
+		micveth_stop_int(mic_ctx);
+
+	if (veth_info)
+		netif_carrier_off(veth_info->vi_netdev);
+}
diff --git a/host/micpsmi.c b/host/micpsmi.c
new file mode 100644
index 0000000..3db1b64
--- /dev/null
+++ b/host/micpsmi.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "micint.h"
+
+bool mic_psmi_enable = 0;
+
+extern struct bin_attribute mic_psmi_ptes_attr;
+
+static __always_inline void
+mic_psmi_free_pte(mic_ctx_t *mic_ctx, int i)
+{
+	pci_unmap_single(mic_ctx->bi_pdev, 
+		mic_ctx->bi_psmi.dma_tbl[i].pa, MIC_PSMI_PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	free_pages(mic_ctx->bi_psmi.va_tbl[i - 1].pa, MIC_PSMI_PAGE_ORDER);
+}
+
+static int mic_psmi_alloc_buffer(mic_ctx_t *mic_ctx)
+{
+	int i, j, ret;
+	void *va;
+	dma_addr_t dma_hndl;
+	struct mic_psmi_ctx *psmi_ctx = &mic_ctx->bi_psmi;
+
+	/* allocate psmi page tables */
+	psmi_ctx->nr_dma_pages =
+		ALIGN(psmi_ctx->dma_mem_size, 
+				MIC_PSMI_PAGE_SIZE) / MIC_PSMI_PAGE_SIZE;
+	if ((psmi_ctx->va_tbl =
+		kmalloc(psmi_ctx->nr_dma_pages *
+				sizeof(struct mic_psmi_pte), GFP_KERNEL)) == NULL) {
+		printk("mic: psmi va table alloc failed\n");
+		return -ENOMEM;
+	}
+	psmi_ctx->dma_tbl_size =
+		(psmi_ctx->nr_dma_pages + 2) * sizeof(struct mic_psmi_pte);
+	if ((psmi_ctx->dma_tbl =
+			kmalloc(psmi_ctx->dma_tbl_size, GFP_KERNEL)) == NULL) {
+		printk("mic: psmi dma table alloc failed\n");
+		ret = -ENOMEM;
+		goto free_va_tbl;
+	}
+	psmi_ctx->dma_tbl_hndl =
+		pci_map_single(mic_ctx->bi_pdev, 
+			psmi_ctx->dma_tbl, psmi_ctx->dma_tbl_size, PCI_DMA_BIDIRECTIONAL);
+	if (pci_dma_mapping_error(mic_ctx->bi_pdev, 
+						psmi_ctx->dma_tbl_hndl)) {
+		printk("mic: psmi dma table mapping failed\n");
+		ret = -ENOMEM;
+		goto free_dma_tbl;
+	}
+
+	/* allocate psmi pages */
+	for (i = 0; i < psmi_ctx->nr_dma_pages; i++) {
+		if ((va = (void *)__get_free_pages(
+				GFP_KERNEL | __GFP_HIGHMEM,
+					MIC_PSMI_PAGE_ORDER)) == NULL) {
+			printk("mic: psmi page alloc failed: %d\n", i);
+			ret = -ENOMEM;
+			goto free_ptes;
+		}
+		memset(va, 0, MIC_PSMI_PAGE_SIZE);
+		dma_hndl = pci_map_single(mic_ctx->bi_pdev, va, 
+						MIC_PSMI_PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+		if (pci_dma_mapping_error(mic_ctx->bi_pdev, dma_hndl)) {
+			printk("mic: psmi page mapping failed: %d\n", i);
+			free_pages((unsigned long)va, MIC_PSMI_PAGE_ORDER);
+			ret = -ENOMEM;
+			goto free_ptes;
+		}
+		psmi_ctx->dma_tbl[i + 1].pa = dma_hndl;
+		psmi_ctx->va_tbl[i].pa = (uint64_t)va;
+	}
+	psmi_ctx->dma_tbl[0].pa = MIC_PSMI_SIGNATURE;
+	psmi_ctx->dma_tbl[psmi_ctx->nr_dma_pages + 1].pa = MIC_PSMI_SIGNATURE;
+	printk("mic: psmi #%d, %ld bytes, "
+			"dma_tbl va=0x%lx hndl=0x%lx\n", mic_ctx->bi_id + 1, 
+			(unsigned long)psmi_ctx->dma_mem_size, 
+			(unsigned long)psmi_ctx->dma_tbl, 
+			(unsigned long)psmi_ctx->dma_tbl_hndl);
+	return 0;
+free_ptes:
+	for (j = 1; j < i; j++)
+		mic_psmi_free_pte(mic_ctx, j);
+	pci_unmap_single(mic_ctx->bi_pdev, 
+		psmi_ctx->dma_tbl_hndl, psmi_ctx->dma_tbl_size, PCI_DMA_BIDIRECTIONAL);
+free_dma_tbl:
+	kfree(psmi_ctx->dma_tbl);
+	psmi_ctx->dma_tbl = NULL;
+free_va_tbl:
+	kfree(psmi_ctx->va_tbl);
+	psmi_ctx->va_tbl = NULL;
+	return ret;
+}
+
+static void mic_psmi_free_buffer(mic_ctx_t *mic_ctx)
+{
+	struct mic_psmi_ctx *psmi_ctx = &mic_ctx->bi_psmi;
+	int i;
+
+	for (i = 1; i <= psmi_ctx->nr_dma_pages; i++)
+		mic_psmi_free_pte(mic_ctx, i);
+	pci_unmap_single(mic_ctx->bi_pdev, 
+		psmi_ctx->dma_tbl_hndl, psmi_ctx->dma_tbl_size, PCI_DMA_BIDIRECTIONAL);
+	kfree(psmi_ctx->dma_tbl);
+	psmi_ctx->dma_tbl = NULL;
+	kfree(psmi_ctx->va_tbl);
+	psmi_ctx->va_tbl = NULL;
+	printk("mic: psmi freed %ld bytes for board #%d\n", 
+		(unsigned long)psmi_ctx->dma_mem_size, mic_ctx->bi_id + 1);
+}
+
+extern int usagemode_param;
+
+int mic_psmi_init(mic_ctx_t *mic_ctx)
+{
+	int ret;
+	int status = 0;
+	uint32_t scratch0;
+	struct mic_psmi_ctx * psmi_ctx = &mic_ctx->bi_psmi;
+
+	psmi_ctx->enabled = 0;
+	/* Only initialize psmi for the first board */
+	if (!mic_psmi_enable || mic_ctx->bi_id)
+		return 0;
+	if(!(scratch0 = SBOX_READ(mic_ctx->mmio.va, SBOX_SCRATCH0))) {
+		status = wait_for_bootstrap(mic_ctx->mmio.va);
+		scratch0 = SBOX_READ(mic_ctx->mmio.va, SBOX_SCRATCH0);
+	}
+	/* Memory size includes 512K reserved for VGA & GTT table */
+	psmi_ctx->dma_mem_size =
+		SCRATCH0_MEM_SIZE_KB(scratch0) * ((1) * 1024) +
+			MIC_PSMI_PAGE_SIZE;
+	if (USAGE_MODE_NORMAL == usagemode_param) {
+		if ((ret = mic_psmi_alloc_buffer(mic_ctx)))
+			return ret;
+		mic_psmi_ptes_attr.size = psmi_ctx->dma_tbl_size;
+	}
+	psmi_ctx->enabled = 1;
+	return 0;
+}
+
+void mic_psmi_uninit(mic_ctx_t *mic_ctx)
+{
+	struct mic_psmi_ctx * psmi_ctx = &mic_ctx->bi_psmi;
+
+	if (!psmi_ctx->enabled)
+		return;
+	if (USAGE_MODE_NORMAL == usagemode_param)
+		mic_psmi_free_buffer(mic_ctx);
+	psmi_ctx->enabled = 0;
+}
diff --git a/host/micscif_pm.c b/host/micscif_pm.c
new file mode 100644
index 0000000..95e229d
--- /dev/null
+++ b/host/micscif_pm.c
@@ -0,0 +1,1062 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic_common.h"
+#include "scif.h"
+#include "mic/micscif.h"
+#include "mic/mic_pm.h"
+#include "mic/micveth.h"
+
+extern int set_host_state(mic_ctx_t *mic_ctx, PM_IDLE_STATE state);
+extern int pc6_entry_start(mic_ctx_t *mic_ctx);
+
+/* Function that decrements the count of number of PM clients connected
+ * to the host.
+ */
+void
+micpm_decrement_clients(void)
+{
+	if(unlikely(atomic_dec_return(&mic_data.dd_pm.connected_clients) < 0)) {
+		PM_DEBUG("connected_clients is negative (%d)\n",
+			atomic_read(&mic_data.dd_pm.connected_clients));
+	}
+	return;
+}
+
+static char *pm_message_types[PM_MESSAGE_MAX+1] = {"PM_MESSAGE_PC3READY",
+		"PM_MESSAGE_OPEN",
+		"PM_MESSAGE_OPEN_ACK",
+		"PM_MESSAGE_CLOSE",
+		"PM_MESSAGE_CLOSE_ACK",
+		"PM_MESSAGE_TEST",
+		"PM_MESSAGE_MAX"};
+void
+micpm_display_message(mic_ctx_t *mic_ctx, void *header, void *msg, const char* label) {
+	pm_msg_header *header_ref;
+	int msg_len;
+	int i=0;
+	char *payload;
+	scif_epd_t epd = mic_ctx->micpm_ctx.pm_epd;
+	header_ref = (pm_msg_header *)header;
+	msg_len = header_ref->len;
+
+	if(!epd)
+		return;
+
+	if(0 <= header_ref->opcode && header_ref->opcode < PM_MESSAGE_MAX) {
+		if(strcmp(label,"SENT")==0) {
+			printk("%s: Msg type %s, SrcNode:SrcPort %d:%d, DestNode:DestPort %d:%d", label, 
+					pm_message_types[header_ref->opcode], epd->port.node, epd->port.port, 
+					epd->peer.node, epd->peer.port);
+		}
+		else
+			printk("%s: Msg type %s, DestNode:DestPort %d:%d, SrcNode:SrcPort %d:%d", label, 
+					pm_message_types[header_ref->opcode], epd->port.node, epd->port.port, 
+					epd->peer.node, epd->peer.port);
+	}
+
+
+	if(msg != NULL) {
+		payload = (char *)msg;
+		printk(" Payload");
+		for(i=0;i<msg_len;i++){
+			printk("0x%02x:", payload[i]);
+		}
+	}
+}
+
+int micpm_update_pc6(mic_ctx_t *mic_ctx, bool set)
+{
+
+	int err = 0;
+	if (mic_ctx->micpm_ctx.pm_options.pc6_enabled) {
+		if (set && !mic_ctx->micpm_ctx.pc6_enabled) {
+			mic_ctx->micpm_ctx.pc6_enabled = set;
+			queue_delayed_work(mic_ctx->micpm_ctx.pc6_entry_wq, 
+				&mic_ctx->micpm_ctx.pc6_entry_work, 
+				mic_ctx->micpm_ctx.pc6_timeout*HZ);
+		}
+		if (set == false) {
+			mic_ctx->micpm_ctx.pc6_enabled = set;
+			micpm_get_reference(mic_ctx, true);
+			micpm_put_reference(mic_ctx);
+		}
+	} else {
+		if (set)
+			err = -EINVAL;
+		else
+			mic_ctx->micpm_ctx.pc6_enabled = set;
+	}
+	return err;
+}
+
+int micpm_update_pc3(mic_ctx_t *mic_ctx, bool set)
+{
+	int err = 0;
+	if (mic_ctx->micpm_ctx.pm_options.pc3_enabled) {
+		if (set) {
+			mic_ctx->micpm_ctx.pc3_enabled = set;
+		} else {
+			mic_ctx->micpm_ctx.pc3_enabled = set;
+			micpm_get_reference(mic_ctx, true);
+			micpm_put_reference(mic_ctx);
+		}
+	} else {
+		if (set)
+			err = -EINVAL;
+		else
+			mic_ctx->micpm_ctx.pc3_enabled = set;
+	}
+	return err;
+}
+
+/*
+ * Wraper to scif_send that takes in the buffer to be sent
+ * as input.
+ */
+int
+mic_pm_send(mic_ctx_t *mic_ctx, void *msg, uint32_t len)
+{
+	int err;
+	scif_epd_t epd;
+
+	if(mic_ctx == NULL) {
+		PM_DEBUG("Mic context not Initialized\n");
+		return -EINVAL;
+	}
+
+	if((msg == NULL) || (len == 0)) {
+		PM_DEBUG("Invalid Parameters\n");
+		return -EINVAL;
+	}
+
+	epd = mic_ctx->micpm_ctx.pm_epd;
+	if(epd == NULL) {
+		PM_DEBUG("Scif Endpoint Undefined\n");
+		return -EINVAL;
+	}
+
+	if ((mic_ctx->micpm_ctx.con_state != PM_CONNECTING) &&
+		(mic_ctx->micpm_ctx.con_state != PM_CONNECTED)) {
+		PM_DEBUG("Endpoint not in connected state\n");
+		return -EINVAL;
+	}
+
+	err = scif_send(epd, msg, len, PM_SEND_MODE);
+	/*scif_send returns the number of bytes returned on success */
+	if(err <= 0) {
+		PM_DEBUG("scif_send to node: %d port: %d failed with error %d\n",
+				epd->peer.node, epd->peer.port, err);
+	} else {
+		PM_DEBUG("Bytes sent = %d\n",err);
+		err = 0;
+	}
+
+	return err;
+}
+
+/*
+ * Wrapper to scif_recv.
+ */
+int
+mic_pm_recv(mic_ctx_t *mic_ctx, void *msg, uint32_t len)
+{
+	int err;
+	scif_epd_t epd;
+
+	if(mic_ctx == NULL) {
+		PM_DEBUG("Mic context not Initialized\n");
+		return -EINVAL;
+	}
+
+	if((msg == NULL) || (len == 0)) {
+		PM_DEBUG("Invalid Parameters\n");
+		return -EINVAL;
+	}
+
+	epd = mic_ctx->micpm_ctx.pm_epd;
+	if(epd == NULL) {
+		PM_DEBUG("Scif Endpoint Undefined\n");
+		return -EINVAL;
+	}
+
+	if ((mic_ctx->micpm_ctx.con_state != PM_CONNECTING) &&
+		(mic_ctx->micpm_ctx.con_state != PM_CONNECTED)) {
+		PM_DEBUG("Endpoint not in connected state\n");
+		return -EINVAL;
+	}
+
+	err = scif_recv(epd, msg, len, PM_RECV_MODE);
+
+	if(err <= 0) {
+		pr_debug("scif_recv failed with error %d\n", err);
+		if(err == 0) {
+			/*0 bytes were sent */
+			err = -ENXIO;
+		}
+	} else {
+		PM_DEBUG("Bytes received = %d\n",err);
+		err = 0;
+	}
+	return err;
+}
+
+/*
+ * Function to send a Power Management message over scif. Gets the message type
+ * as input and builds a message header. It then creates a single message buffer
+ * with this header and body and sends it to the receiving node.
+ */
+int
+mic_pm_send_msg(mic_ctx_t *mic_ctx, PM_MESSAGE type, void *msg, uint32_t len)
+{
+	pm_msg_header header;
+	char *send_msg = NULL;
+	int err = 0;
+
+	header.opcode = type;
+	header.len = len;
+
+	send_msg = kmalloc(len + sizeof(pm_msg_header), GFP_KERNEL);
+	if(send_msg == NULL) {
+		PM_DEBUG("error allocating memory");
+		err = -ENOMEM;
+		return err;
+	}
+	memcpy(send_msg , &header, sizeof(pm_msg_header));
+	if((len != 0) && (msg != NULL)) {
+		memcpy((send_msg + sizeof(pm_msg_header)), msg, len);
+	}
+
+	if(mic_data.dd_pm.enable_pm_logging) {
+		if((len != 0) && (msg != NULL))
+			micpm_display_message(mic_ctx,send_msg,send_msg+sizeof(pm_msg_header),"SENT");
+		else
+			micpm_display_message(mic_ctx,send_msg,NULL,"SENT");
+	}
+	err = mic_pm_send(mic_ctx, send_msg, len + sizeof(pm_msg_header));
+	kfree(send_msg);
+	return err;
+}
+
+/*
+ * Handler invoked when receiving a PC3 ready message.
+ */
+int
+handle_pc3_ready(mic_ctx_t *mic_ctx)
+{
+	int err = 0;
+	PM_ENTRY;
+	err = pm_pc3_entry(mic_ctx);
+	PM_EXIT;
+	return err;
+}
+
+/*
+ * Handler invoked when receiving the latency response message
+ */
+int
+handle_open_ack(mic_ctx_t *mic_ctx, pm_msg_pm_options *msg)
+{
+	int err = 0;
+	PM_ENTRY;
+
+	if ((mic_ctx == NULL) || (msg == NULL)) {
+		err = EINVAL;
+		goto inval;
+	}
+
+	if ((msg->version.major_version != PM_MAJOR_VERSION) ||
+		(msg->version.minor_version != PM_MINOR_VERSION)) {
+		printk(KERN_ERR "PM Driver version mismatch. "
+			"Expected version: %d.%d Received version %d.%d\n", 
+			PM_MAJOR_VERSION, PM_MINOR_VERSION, 
+			msg->version.major_version, msg->version.minor_version);
+		schedule_work(&mic_ctx->micpm_ctx.pm_close);
+		goto inval;
+	}
+
+	mic_ctx->micpm_ctx.pm_options.pc3_enabled = msg->pc3_enabled;
+	mic_ctx->micpm_ctx.pm_options.pc6_enabled = msg->pc6_enabled;
+
+	mic_ctx->micpm_ctx.pc3_enabled =
+			(mic_ctx->micpm_ctx.pm_options.pc3_enabled)? true : false;
+	mic_ctx->micpm_ctx.pc6_enabled =
+			(mic_ctx->micpm_ctx.pm_options.pc6_enabled)? true : false;
+
+	mic_ctx->micpm_ctx.con_state = PM_CONNECTED;
+
+inval:
+	PM_EXIT;
+	return err;
+}
+
+/*
+ * Message handler invoked by the per device receive workqueue when it receives
+ * a message from the device.
+ */
+int
+mic_pm_handle_message(mic_ctx_t *mic_ctx, pm_recv_msg_t *recv_msg)
+{
+	int res = 0;
+
+	if(mic_ctx == NULL) {
+		return -EINVAL;
+	}
+
+	if(recv_msg == NULL) {
+		PM_DEBUG("Undefined message\n");
+		return -EINVAL;
+	}
+
+	switch(recv_msg->msg_header.opcode) {
+	case PM_MESSAGE_PC3READY:
+		res = handle_pc3_ready(mic_ctx);
+		break;
+	case PM_MESSAGE_OPEN_ACK:
+		/*Size of the payload needs to be equal to what the
+		 * host is trying to cast it to
+		 */
+		if (sizeof(pm_msg_pm_options) != recv_msg->msg_header.len) {
+			printk(KERN_ERR "Incompatible PM message. Opcode = %d\n", 
+				recv_msg->msg_header.opcode);
+			return -EINVAL;
+		}
+		res = handle_open_ack(mic_ctx,
+			((pm_msg_pm_options *) recv_msg->msg_body));
+		break;
+	default:
+		printk(KERN_ERR "Unknown PM message. Opcode = %d\n", 
+			recv_msg->msg_header.opcode);
+		break;
+	}
+	return res;
+}
+
+/*
+ * retrieve_msg:
+ *
+ * Retrieve message from the head of list.
+ * @mic_ctx: The device context
+ * Returns the retrieved message.
+ */
+pm_recv_msg_t *
+pm_retrieve_msg(mic_ctx_t *mic_ctx) {
+
+	pm_recv_msg_t *recv_msg = NULL;
+	struct list_head *pos, *tmpq;
+	bool msg_found = false;
+
+	mutex_lock(&mic_ctx->micpm_ctx.msg_mutex);
+	if (!list_empty_careful(&mic_ctx->micpm_ctx.msg_list))
+	{
+		list_for_each_safe(pos, tmpq, &mic_ctx->micpm_ctx.msg_list) {
+			recv_msg = list_entry(pos, pm_recv_msg_t, msg);
+			/*Do not touch the message if its a test message */
+			if (recv_msg->msg_header.opcode != PM_MESSAGE_TEST) {
+				list_del(&recv_msg->msg);
+				msg_found = true;
+				break;
+			}
+		}
+	}
+
+	if (msg_found == false)
+		recv_msg = NULL;
+
+	mutex_unlock(&mic_ctx->micpm_ctx.msg_mutex);
+	return recv_msg;
+}
+
+/*
+ * pm_process_msg_list:
+ *
+ * Process the message list of a node and handle each message in the list.
+ * @mic_ctx[in]: The deive context whose message list is to be processed
+ * Returns: None
+ */
+void
+pm_process_msg_list(mic_ctx_t *mic_ctx) {
+
+	pm_recv_msg_t *process_msg = NULL;
+	int ret = 0;
+
+	if(mic_ctx == NULL) {
+		PM_DEBUG("Cannot get device handle \n");
+		return;
+	}
+
+	while(!list_empty(&mic_ctx->micpm_ctx.msg_list)) {
+		process_msg = pm_retrieve_msg(mic_ctx);
+		if(!process_msg) {
+			PM_DEBUG("No Message to process.\n");
+			return;
+		}
+
+		ret = mic_pm_handle_message(mic_ctx, process_msg);
+		if(ret) {
+			PM_DEBUG("Power Management message not processed"
+					" successfully.\n");
+		}
+
+		if(process_msg->msg_body != NULL) {
+			kfree(process_msg->msg_body);
+		}
+		kfree(process_msg);
+	}
+}
+
+/*
+ * Retrieves each message from the message list and calls the handler
+ * for the same. After the handler returns, the message is removed
+ * from the list and deleted.
+ */
+static void
+mic_pm_msg_handle_work(struct work_struct *msg_handle_work)
+{
+	pm_wq_t *pm_wq = container_of(msg_handle_work, pm_wq_t, work);
+	micpm_ctx_t *pm_ctx = container_of(pm_wq, micpm_ctx_t, handle_msg);
+	mic_ctx_t *mic_ctx = container_of(pm_ctx, mic_ctx_t, micpm_ctx);
+	pm_process_msg_list(mic_ctx);
+	return;
+}
+
+static void
+pc6_entry_work(struct work_struct *work)
+{
+	int err;
+	micpm_ctx_t *pm_ctx =
+		container_of(to_delayed_work(work), 
+		micpm_ctx_t, pc6_entry_work);
+	mic_ctx_t *mic_ctx = container_of(pm_ctx, mic_ctx_t, micpm_ctx);
+
+	err = pc6_entry_start(mic_ctx);
+	if (err == -EAGAIN)
+		queue_delayed_work(mic_ctx->micpm_ctx.pc6_entry_wq, 
+			&mic_ctx->micpm_ctx.pc6_entry_work, 
+			mic_ctx->micpm_ctx.pc6_timeout*HZ);
+	return;
+}
+
+/*
+ * Called when a device creates a PM connection to Host. There can be
+ * only one PM connection between Host and a device. The function checks
+ * for an existing connection and rejects this new request if present.
+ */
+static void
+mic_pm_accept_work(struct work_struct *work)
+{
+	scif_epd_t newepd;
+	struct scif_portID portID;
+	int err;
+	uint16_t i;
+	mic_ctx_t *mic_ctx;
+	mic_data_t *mic_data_p = &mic_data;
+
+	PM_DEBUG("Accept thread waiting for new PM connections\n");
+	err =  scif_accept(mic_data.dd_pm.epd, &portID, &newepd, SCIF_ACCEPT_SYNC);
+	if (err == -EBUSY || err == -ENODEV) {
+		PM_DEBUG("scif_accept error %d\n", err);
+		goto continue_accepting;
+	}
+	else if (err < 0) {
+		PM_DEBUG("scif_accept failed with errno %d\n", err);
+		goto exit;
+
+	}
+	PM_DEBUG("Connection request received. \n");
+
+	mutex_lock(&mic_data.dd_pm.pm_accept_mutex);
+
+	if (newepd->peer.node == SCIF_HOST_NODE) {
+		/* Reject connection request from HOST itself */
+		PM_DEBUG("PM: Peer node cannot be HOST. Peer Node = %d Peer Port = %d",
+				newepd->peer.node, newepd->peer.port);
+		scif_close(newepd);
+		mutex_unlock(&mic_data.dd_pm.pm_accept_mutex);
+		goto continue_accepting;
+	}
+
+	/*Only one Power Management connection per node. */
+	for (i = 0; i < mic_data_p->dd_numdevs; i++) {
+		mic_ctx = get_per_dev_ctx(i);
+		if (mic_ctx != NULL) {
+			if (mic_ctx->micpm_ctx.pm_epd != NULL) {
+				if (mic_ctx->micpm_ctx.pm_epd->peer.node == newepd->peer.node) {
+					PM_DEBUG("There is already Power Management connection"
+						    " established from this node. Rejecting request.\n");
+					PM_DEBUG("Peer Node = %d, Peer Port = %d\n",
+						    mic_ctx->micpm_ctx.pm_epd->peer.node,
+						    mic_ctx->micpm_ctx.pm_epd->peer.port);
+					scif_close(newepd);
+					mutex_unlock(&mic_data.dd_pm.pm_accept_mutex);
+					goto continue_accepting;
+				}
+			}
+		}
+
+	}
+	mutex_unlock(&mic_data.dd_pm.pm_accept_mutex);
+	mic_ctx = get_per_dev_ctx(newepd->peer.node -1);
+	mic_ctx->micpm_ctx.pm_epd = newepd;
+	micpm_start(mic_ctx);
+
+
+continue_accepting:
+	mutex_lock(&mic_data.dd_pm.pm_accept_mutex);
+	queue_work(mic_data.dd_pm.accept.wq, 
+		   &mic_data.dd_pm.accept.work);
+	mutex_unlock(&mic_data.dd_pm.pm_accept_mutex);
+exit:
+	return;
+}
+
+/*
+ * Work item function that waits for incoming PM messages from
+ * a node. The function adds the message to a per device message
+ * list that is later processed by the message handler.
+ */
+static void
+mic_pm_recv_work(struct work_struct *recv_work)
+{
+	int err = 0;
+	int size = 0;
+
+	pm_wq_t *pm_wq = container_of(recv_work, pm_wq_t, work);
+	micpm_ctx_t *pm_ctx = container_of(pm_wq, micpm_ctx_t, recv);
+	mic_ctx_t *mic_ctx = container_of(pm_ctx, mic_ctx_t, micpm_ctx);
+	pm_recv_msg_t *recv_msg = NULL;
+
+	if (mic_ctx == NULL || pm_ctx == NULL) {
+		PM_DEBUG("Error retrieving driver context \n");
+		goto unqueue;
+	}
+
+	size = sizeof(pm_msg_header);
+	recv_msg = (void *)kmalloc(sizeof(pm_recv_msg_t), GFP_KERNEL);
+
+	if (recv_msg == NULL) {
+		PM_DEBUG("Error allocating memory to save receive message.\n");
+		goto unqueue;
+	}
+	INIT_LIST_HEAD(&recv_msg->msg);
+	recv_msg->msg_body = NULL;
+
+	/*Get the header */
+	err = mic_pm_recv(mic_ctx, &recv_msg->msg_header, size);
+	if (err < 0) {
+		PM_DEBUG("Error in scif_recv while waiting for PM header message.\n");
+		if (err == -ECONNRESET) {
+			/*Remote node is not in a connected state. */
+			schedule_work(&mic_ctx->micpm_ctx.pm_close);
+		}
+		goto unqueue;
+
+	}
+
+	if(recv_msg->msg_header.len != 0) {
+		PM_DEBUG("Retrieving %d bytes of message body\n", recv_msg->msg_header.len);
+		recv_msg->msg_body = (void *)kmalloc((sizeof(char) * recv_msg->msg_header.len), GFP_KERNEL);
+		if (recv_msg->msg_body == NULL) {
+			PM_DEBUG("Error allocating memory to receive PM Message\n");
+			goto unqueue;
+		}
+		err = mic_pm_recv(mic_ctx, recv_msg->msg_body, recv_msg->msg_header.len);
+		if (err < 0) {
+			PM_DEBUG("Error in scif_recv while waiting for PM message body\n");
+			if (err == -ECONNRESET) {
+				/*Remote node is not in a connected state. */
+				schedule_work(&mic_ctx->micpm_ctx.pm_close);
+			}
+			goto unqueue;
+		}
+	}
+
+	if(mic_data.dd_pm.enable_pm_logging) {
+		micpm_display_message(mic_ctx,&recv_msg->msg_header,
+				recv_msg->msg_body,"RECV");
+	}
+
+	if ((recv_msg->msg_header.opcode != PM_MESSAGE_CLOSE) &&
+			((recv_msg->msg_header.opcode != PM_MESSAGE_CLOSE_ACK))){
+		PM_DEBUG("Adding received message from node %d to list.\n",
+				mic_ctx->bi_id+1);
+		mutex_lock(&mic_ctx->micpm_ctx.msg_mutex);
+		list_add_tail(&recv_msg->msg , &mic_ctx->micpm_ctx.msg_list);
+		mutex_unlock(&mic_ctx->micpm_ctx.msg_mutex);
+
+		if(likely(recv_msg->msg_header.opcode != PM_MESSAGE_TEST)) {
+			PM_DEBUG("Queue message handler work for node: %d\n",mic_ctx->bi_id+1);
+			queue_work(mic_ctx->micpm_ctx.handle_msg.wq, 
+					&mic_ctx->micpm_ctx.handle_msg.work);
+		}
+
+		queue_work(mic_ctx->micpm_ctx.recv.wq, 
+			   &mic_ctx->micpm_ctx.recv.work);
+	} else {
+
+		if (recv_msg->msg_header.opcode == PM_MESSAGE_CLOSE) {
+			mic_pm_send_msg(mic_ctx , PM_MESSAGE_CLOSE_ACK, NULL, 0);
+			mic_ctx->micpm_ctx.con_state = PM_DISCONNECTING;
+			schedule_work(&mic_ctx->micpm_ctx.pm_close);
+		} else {
+			mic_ctx->micpm_ctx.con_state = PM_DISCONNECTING;
+			wake_up(&mic_ctx->micpm_ctx.disc_wq);
+		}
+		goto unqueue;
+	}
+	return;
+unqueue:
+	if (recv_msg) {
+		if (recv_msg->msg_body)
+			kfree(recv_msg->msg_body);
+		kfree(recv_msg);
+	}
+	return;
+}
+
+/*
+ * Work item to handle closing of PM end point to a device and all the
+ * related receive workqueues.
+ */
+static void
+mic_pm_close_work(struct work_struct *work)
+{
+	micpm_ctx_t *pm_ctx = container_of(work, micpm_ctx_t, pm_close);
+	mic_ctx_t *mic_ctx = container_of(pm_ctx, mic_ctx_t, micpm_ctx);
+	micpm_stop(mic_ctx);
+	return;
+}
+
+static void
+mic_pm_resume_work(struct work_struct *resume_work)
+{
+	int err;
+	pm_wq_t *pm_wq = container_of(resume_work, pm_wq_t, work);
+	micpm_ctx_t *pm_ctx = container_of(pm_wq, micpm_ctx_t, resume);
+	mic_ctx_t *mic_ctx = container_of(pm_ctx, mic_ctx_t, micpm_ctx);
+
+	if (mic_ctx != NULL) {
+		err = pm_start_device(mic_ctx);
+		if (err) {
+			PM_DEBUG("Failed to start device %d after resume\n",
+					mic_ctx->bi_id);
+		}
+	} else {
+		PM_DEBUG("Error retrieving node context.\n");
+	}
+}
+
+/* Create PM specific workqueues during driver probe.
+ *
+ * Receive workqueue will store the received message and kick-off
+ * a message handler workqueue which will process them.
+ *
+ * Resume workqueue handles the task of booting uOS rduring
+ * OSPM resume/restore phase.
+ */
+int
+setup_pm_workqueues(mic_ctx_t *mic_ctx)
+{
+	int err = 0;
+
+	if(!mic_ctx) {
+		PM_DEBUG("Failed to retrieve device context\n");
+		err = -EINVAL;
+		goto err;
+	}
+
+	/* setup resume wq */
+	snprintf(mic_ctx->micpm_ctx.resume.wq_name,
+		sizeof(mic_ctx->micpm_ctx.resume.wq_name),
+		 "PM_RESUME_WQ %d", mic_get_scifnode_id(mic_ctx));
+
+	if (!(mic_ctx->micpm_ctx.resume.wq
+		= __mic_create_singlethread_workqueue(
+				mic_ctx->micpm_ctx.resume.wq_name))) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	/* Setup Receive wq */
+	snprintf(mic_ctx->micpm_ctx.recv.wq_name,
+			sizeof(mic_ctx->micpm_ctx.recv.wq_name),
+			 "RECV_WORK_Q %d", mic_get_scifnode_id(mic_ctx));
+
+	if (!(mic_ctx->micpm_ctx.recv.wq
+			= __mic_create_singlethread_workqueue(
+					mic_ctx->micpm_ctx.recv.wq_name))) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	/* Setup Msg handler wq */
+	snprintf(mic_ctx->micpm_ctx.handle_msg.wq_name,
+		sizeof(mic_ctx->micpm_ctx.handle_msg.wq_name),
+		 "MSG_HANDLER_WQ %d", mic_get_scifnode_id(mic_ctx));
+
+	if (!(mic_ctx->micpm_ctx.handle_msg.wq
+			= __mic_create_singlethread_workqueue(
+					mic_ctx->micpm_ctx.handle_msg.wq_name))) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	/* Setup pc6 entry wq */
+	snprintf(mic_ctx->micpm_ctx.pc6_wq_name,
+			sizeof(mic_ctx->micpm_ctx.pc6_wq_name),
+			 "PC6_WORK_Q %d", mic_get_scifnode_id(mic_ctx));
+
+	if (!(mic_ctx->micpm_ctx.pc6_entry_wq
+		= __mic_create_singlethread_workqueue(
+				mic_ctx->micpm_ctx.pc6_wq_name))) {
+		err = -ENOMEM;
+		goto err;
+	}
+	INIT_WORK(&mic_ctx->micpm_ctx.recv.work, mic_pm_recv_work);
+	INIT_WORK(&mic_ctx->micpm_ctx.handle_msg.work, mic_pm_msg_handle_work);
+	INIT_WORK(&mic_ctx->micpm_ctx.pm_close, mic_pm_close_work);
+	INIT_WORK(&mic_ctx->micpm_ctx.resume.work, mic_pm_resume_work);
+	INIT_DELAYED_WORK(&mic_ctx->micpm_ctx.pc6_entry_work, pc6_entry_work);
+
+err:
+	return err;
+}
+/*Power Management Initialization function. Sets up SCIF
+ * end points and accept threads.
+ */
+int micpm_init()
+{
+	scif_epd_t epd;
+	int con_port;
+	int err = 0;
+
+	epd = scif_open();
+	if (epd == SCIF_OPEN_FAILED || epd == NULL) {
+		PM_DEBUG("scif_open failed\n");
+		return -1;
+	}
+
+	if ((con_port = scif_bind(epd, SCIF_PM_PORT_0)) < 0) {
+		PM_DEBUG("scif_bind to port failed with error %d\n", con_port);
+		err = con_port;
+		goto exit_close;
+	}
+
+	/*No real upper limit on number of connections.
+	Once scif_listen accepts 0 as an acceptable parameter for max
+	connections(to mean tht there is no upper limit), change this. */
+	if ((err = scif_listen(epd, 100)) < 0) {
+		PM_DEBUG("Listen ioctl failed with error %d\n", err);
+		goto exit_close;
+	}
+	mic_data.dd_pm.epd = epd;
+
+	snprintf(mic_data.dd_pm.accept.wq_name,
+			sizeof(mic_data.dd_pm.accept.wq_name),"PM ACCEPT");
+
+	mic_data.dd_pm.accept.wq =
+			__mic_create_singlethread_workqueue(mic_data.dd_pm.accept.wq_name);
+	if (!mic_data.dd_pm.accept.wq){
+		err = -ENOMEM;
+		PM_DEBUG("create workqueue returned null\n");
+		goto exit_close;
+	}
+	INIT_WORK(&mic_data.dd_pm.accept.work, mic_pm_accept_work);
+	mutex_init (&mic_data.dd_pm.pm_accept_mutex);
+	mutex_init (&mic_data.dd_pm.pm_idle_mutex);
+	atomic_set(&mic_data.dd_pm.connected_clients, 0);
+
+	/*Add work to the work queue */
+	queue_work(mic_data.dd_pm.accept.wq, 
+			&mic_data.dd_pm.accept.work);
+	mic_data.dd_pm.enable_pm_logging = 0;
+	atomic_set(&mic_data.dd_pm.wakeup_in_progress, 0);
+
+	micpm_dbg_parent_init();
+
+	return err;
+
+exit_close:
+	scif_close(epd);
+	return err;
+}
+
+/*
+ * Close the SCIF acceptor endpoint and uninit a lot of driver level
+ * data structures including accept threads,
+ */
+void
+micpm_uninit(void)
+{
+	int err;
+	scif_epd_t epd = mic_data.dd_pm.epd;
+
+	if(atomic_read(&mic_data.dd_pm.connected_clients) > 0) {
+		PM_DEBUG("connected_clients is nonzero (%d)\n",
+			atomic_read(&mic_data.dd_pm.connected_clients));
+	}
+	err = scif_close(epd);
+	if (err != 0) {
+		PM_DEBUG("Scif_close failed with error %d\n",err);
+	}
+
+	if (mic_data.dd_pm.accept.wq != NULL) {
+		PM_DEBUG("Flushing accept workqueue\n");
+		flush_workqueue(mic_data.dd_pm.accept.wq);
+		destroy_workqueue(mic_data.dd_pm.accept.wq);
+		mic_data.dd_pm.accept.wq = NULL;
+	}
+
+	mutex_destroy(&mic_data.dd_pm.pm_accept_mutex);
+	mutex_destroy(&mic_data.dd_pm.pm_idle_mutex);
+
+	debugfs_remove_recursive(mic_data.dd_pm.pmdbgparent_dir);
+
+}
+
+/*
+ * Open the Per device Power Management context.
+ */
+int
+micpm_probe(mic_ctx_t * mic_ctx) {
+
+	int err = 0;
+
+	mic_ctx->micpm_ctx.pm_epd = NULL;
+	mic_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_PC0;
+	mic_ctx->micpm_ctx.recv.wq = NULL;
+	mic_ctx->micpm_ctx.handle_msg.wq = NULL;
+	mic_ctx->micpm_ctx.mic_suspend_state = MIC_RESET;
+	mic_ctx->micpm_ctx.pc3_enabled = true;
+	mic_ctx->micpm_ctx.pc6_enabled = true;
+	mic_ctx->micpm_ctx.pm_options.pc3_enabled = 0;
+	mic_ctx->micpm_ctx.pm_options.pc6_enabled = 0;
+
+	if ((err = setup_pm_workqueues(mic_ctx)))
+		goto err;
+
+	mutex_init (&mic_ctx->micpm_ctx.msg_mutex);
+	INIT_LIST_HEAD(&mic_ctx->micpm_ctx.msg_list);
+	init_waitqueue_head(&mic_ctx->micpm_ctx.disc_wq);
+	atomic_set(&mic_ctx->micpm_ctx.pm_ref_cnt, 0);
+	mic_ctx->micpm_ctx.pc6_timeout = PC6_TIMER;
+
+	/* create debugfs entries*/
+	micpm_dbg_init(mic_ctx);
+
+err:
+	return err;
+}
+
+int
+micpm_remove(mic_ctx_t * mic_ctx) {
+
+	debugfs_remove_recursive(mic_ctx->micpm_ctx.pmdbg_dir);
+
+	if (mic_ctx->micpm_ctx.resume.wq != NULL) {
+		destroy_workqueue(mic_ctx->micpm_ctx.resume.wq);
+		mic_ctx->micpm_ctx.resume.wq = NULL;
+	}
+
+	if(mic_ctx->micpm_ctx.pc6_entry_wq != NULL) {
+		destroy_workqueue(mic_ctx->micpm_ctx.pc6_entry_wq);
+		mic_ctx->micpm_ctx.pc6_entry_wq = NULL;
+	}
+
+	if(mic_ctx->micpm_ctx.recv.wq != NULL) {
+		destroy_workqueue(mic_ctx->micpm_ctx.recv.wq);
+		mic_ctx->micpm_ctx.recv.wq = NULL;
+	}
+
+	if(mic_ctx->micpm_ctx.handle_msg.wq != NULL) {
+		destroy_workqueue(mic_ctx->micpm_ctx.handle_msg.wq);
+		mic_ctx->micpm_ctx.handle_msg.wq = NULL;
+	}
+
+	micpm_nodemask_uninit(mic_ctx);
+
+	mutex_destroy(&mic_ctx->micpm_ctx.msg_mutex);
+	return 0;
+}
+
+int
+micpm_start(mic_ctx_t *mic_ctx) {
+
+	int ref_cnt;
+	mic_ctx->micpm_ctx.con_state = PM_CONNECTING;
+
+	/* queue receiver */
+	queue_work(mic_ctx->micpm_ctx.recv.wq, 
+			&mic_ctx->micpm_ctx.recv.work);
+
+	atomic_inc(&mic_data.dd_pm.connected_clients);
+	if ((ref_cnt = atomic_read(&mic_ctx->micpm_ctx.pm_ref_cnt)))
+		printk("Warning: PM ref_cnt is non-zero during start. "
+				"ref_cnt = %d PM features may not work as expected\n", 
+				ref_cnt);
+	mic_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_PC0;
+	set_host_state(mic_ctx, PM_IDLE_STATE_PC0);
+	return mic_pm_send_msg(mic_ctx , PM_MESSAGE_OPEN, NULL, 0);
+}
+
+/*
+ * Close the per device Power management context here.
+ * It does various things such as: closing scif endpoints,
+ * delete pending work items and wait for those that are
+ * executing to complete, delete pending messages in the
+ * message list, delete pending timers and wait for runnig
+ * timers to complete. The function can block.
+ */
+int
+micpm_stop(mic_ctx_t *mic_ctx) {
+
+	int err = 0;
+	int node_lost = 0;
+	if(mic_ctx == NULL) {
+		PM_DEBUG("Mic context not Initialized\n");
+		return -EINVAL;
+	}
+
+	if ((micpm_get_reference(mic_ctx, true))) {
+		PM_DEBUG("get_reference failed. Node may be lost\n");
+		node_lost = 1;
+	}
+
+	mutex_lock(&mic_data.dd_pm.pm_accept_mutex);
+	if ((mic_ctx->micpm_ctx.con_state == PM_CONNECTED) &&
+			(mic_ctx->state != MIC_LOST)) {
+		if (!mic_pm_send_msg(mic_ctx, PM_MESSAGE_CLOSE, NULL, 0)) {
+			err = wait_event_timeout(
+				mic_ctx->micpm_ctx.disc_wq, 
+				mic_ctx->micpm_ctx.con_state == PM_DISCONNECTING, 
+				NODE_ALIVE_TIMEOUT);
+			if (!err) {
+				PM_DEBUG("Timed out waiting CLOSE ACK"
+				" from node.\n");
+			}
+		}
+	}
+
+	if(mic_ctx->micpm_ctx.pm_epd != NULL) {
+		PM_DEBUG("Power Management: Closing connection to"
+				" node: %d port:%d\n", mic_ctx->micpm_ctx.pm_epd->peer.node,
+				mic_ctx->micpm_ctx.pm_epd->peer.port);
+		err = scif_close(mic_ctx->micpm_ctx.pm_epd);
+		if(err!= 0)
+			PM_DEBUG("Scif_close failed with error %d\n",err);
+		mic_ctx->micpm_ctx.pm_epd = NULL;
+		micpm_decrement_clients();
+	}
+	mic_ctx->micpm_ctx.con_state = PM_DISCONNECTED;
+	mic_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_PC0;
+	flush_workqueue(mic_ctx->micpm_ctx.resume.wq);
+	flush_workqueue(mic_ctx->micpm_ctx.recv.wq);
+	flush_workqueue(mic_ctx->micpm_ctx.handle_msg.wq);
+	cancel_delayed_work_sync(&mic_ctx->micpm_ctx.pc6_entry_work);
+
+	/* Process messages in message queue */
+	pm_process_msg_list(mic_ctx);
+
+	if (!node_lost)
+		micpm_put_reference(mic_ctx);
+	mutex_unlock(&mic_data.dd_pm.pm_accept_mutex);
+	return err;
+}
+
+/*
+ * Function to load the uOS and start all the driver components
+ * after a resume/restore operation
+ */
+int
+pm_start_device(mic_ctx_t *mic_ctx)
+{
+	if (!mic_ctx) {
+		PM_DEBUG("Error retreving driver context\n");
+		return 0;
+	}
+
+	PM_DEBUG("Resume MIC device:%d\n", mic_ctx->bi_id);
+	/* Make sure the Power reset during Resume/Restore is complete*/
+	adapter_wait_reset(mic_ctx);
+	wait_for_reset(mic_ctx);
+
+	/*Perform software reset */
+	adapter_reset(mic_ctx, RESET_WAIT, !RESET_REATTEMPT);
+	wait_for_reset(mic_ctx);
+
+	/* Boot uOS only if it was online before suspend */
+	if (MIC_ONLINE == mic_ctx->micpm_ctx.mic_suspend_state) {
+		if(adapter_start_device(mic_ctx)) {
+			PM_DEBUG("booting uos... failed\n");
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Function to stop all the driver components and unload the uOS
+ * during a suspend/hibernate operation
+ */
+int
+pm_stop_device(mic_ctx_t *mic_ctx)
+{
+	if (!mic_ctx) {
+		PM_DEBUG("Error retreving driver context\n");
+		return 0;
+	}
+
+	mic_ctx->micpm_ctx.mic_suspend_state = mic_ctx->state;
+
+	PM_DEBUG("Suspend MIC device:#%d\n", mic_ctx->bi_id);
+	if (MIC_ONLINE == mic_ctx->micpm_ctx.mic_suspend_state) {
+		adapter_shutdown_device(mic_ctx);
+		if (!wait_for_shutdown_and_reset(mic_ctx)) {
+			/* Shutdown failed. Fall back on forced reset */
+			adapter_stop_device(mic_ctx, RESET_WAIT, !RESET_REATTEMPT);
+			wait_for_reset(mic_ctx);
+		}
+	}
+	else {
+		/* If card is in any state but ONLINE, make sure card stops */
+		adapter_stop_device(mic_ctx, RESET_WAIT, !RESET_REATTEMPT);
+		wait_for_reset(mic_ctx);
+	}
+
+	mutex_lock(&mic_ctx->state_lock);
+	mic_ctx->state = MIC_RESET;
+	mutex_unlock(&mic_ctx->state_lock);
+	return 0;
+}
diff --git a/host/pm_ioctl.c b/host/pm_ioctl.c
new file mode 100644
index 0000000..139d820
--- /dev/null
+++ b/host/pm_ioctl.c
@@ -0,0 +1,603 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* contains code to handle MIC IO control codes */
+
+
+#include "mic_common.h"
+#include <linux/module.h>
+
+/* helper methods for debugging/unit testing /*/
+static int check_test_msg(mic_ctx_t *mic_ctx, void *buf, uint32_t len);
+
+
+
+#define PM_MMIO_REGVALUE_GET(_name, _offset)				\
+int get_##_name(void *data, uint64_t *value)				\
+{									\
+	uint64_t bid;							\
+	mic_ctx_t *mic_ctx;						\
+									\
+	bid = (uint64_t)data;						\
+	if (bid >= mic_data.dd_numdevs) {				\
+		return -EINVAL;					\
+	}								\
+	mic_ctx = get_per_dev_ctx(bid);					\
+	if (!mic_ctx) {							\
+		printk("DD");						\
+		return -EINVAL;					\
+	}								\
+									\
+	*value = pm_reg_read(mic_ctx, _offset);				\
+	return 0;							\
+}									\
+DEFINE_SIMPLE_ATTRIBUTE(fops_##_name, get_##_name, NULL, "%llu");	\
+
+static PM_MMIO_REGVALUE_GET(svidctrl, SBOX_SVID_CONTROL);
+static PM_MMIO_REGVALUE_GET(pcuctrl, SBOX_PCU_CONTROL);
+static PM_MMIO_REGVALUE_GET(hoststate,SBOX_HOST_PMSTATE);
+static PM_MMIO_REGVALUE_GET(cardstate, SBOX_UOS_PMSTATE);
+static PM_MMIO_REGVALUE_GET(wtimer, SBOX_C3WAKEUP_TIMER);
+static PM_MMIO_REGVALUE_GET(gpmctrl, GBOX_PM_CTRL);
+static PM_MMIO_REGVALUE_GET(core_volt, SBOX_COREVOLT);
+static PM_MMIO_REGVALUE_GET(uos_pcuctrl, SBOX_UOS_PCUCONTROL);
+
+static int depgraph_j2i_show(struct seq_file *s, void *pos)
+{
+	uint64_t bid = (uint64_t)s->private;
+	mic_ctx_t *mic_ctx;
+	int i, j;
+
+	mic_ctx = get_per_dev_ctx(bid);
+	if (!mic_ctx) {
+		return -EINVAL;
+	}
+
+	seq_printf(s,"=================================================================\n");
+	seq_printf(s,"%-10s |%-25s\n", "Scif Node" , "dependent nodes");
+	seq_printf(s,"=================================================================\n");
+
+	for ( i = 0; i <= ms_info.mi_maxid; i++) {
+		seq_printf(s, "%-10d |", i);
+		for (j = 0; j <= ms_info.mi_maxid; j++) {
+			switch(ms_info.mi_depmtrx[j][i]) {
+				case DEP_STATE_DEPENDENT:
+				{
+					/* (A) - active dependency on node i */
+					seq_printf(s, "%d(A),", j);
+					break;
+				}
+				case DEP_STATE_DISCONNECT_READY:
+				{
+					/* (R) - node j has sent PC6 ready message to the host
+					 * dependency is not active so node i can go idle
+					 */
+					seq_printf(s, "%d(R),", j);
+					break;
+				}
+				case DEP_STATE_DISCONNECTED:
+				{
+					/* (D) - node j is in idle state.
+					 * dependency is not active so node i can go idle
+					 */
+					seq_printf(s, "%d(D),", j);
+					break;
+				}
+			}
+		}
+		seq_printf(s,"\n=================================================================\n");
+	}
+
+	return 0;
+}
+
+static int depgraph_j2i_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, depgraph_j2i_show, inode->i_private);
+}
+
+static struct file_operations depgraph_j2i_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = depgraph_j2i_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+static int depgraph_i2j_show(struct seq_file *s, void *pos)
+{
+	uint64_t bid = (uint64_t)s->private;
+	mic_ctx_t *mic_ctx;
+	int i, j;
+
+	mic_ctx = get_per_dev_ctx(bid);
+	if (!mic_ctx) {
+		return -EINVAL;
+	}
+
+	seq_printf(s,"=================================================================\n");
+	seq_printf(s,"%-10s |%-25s\n", "Scif Node" , "is dependent on Nodes");
+	seq_printf(s,"=================================================================\n");
+
+	for ( i = 0; i <= ms_info.mi_maxid; i++) {
+		seq_printf(s, "%-10d |", i);
+		for (j = 0; j <= ms_info.mi_maxid; j++) {
+			switch(ms_info.mi_depmtrx[i][j]) {
+				case DEP_STATE_DEPENDENT:
+				{
+					/* (A) - active dependency on node j */
+					seq_printf(s, "%d(A),", j);
+					break;
+				}
+				case DEP_STATE_DISCONNECT_READY:
+				{
+					/* (R) - node j has sent PC6 ready message to the host */
+					seq_printf(s, "%d(R),", j);
+					break;
+				}
+				case DEP_STATE_DISCONNECTED:
+				{
+					/* (D) - node j is in idle state.
+					 * This should not happen unless node i itself is in idle state
+					 */
+					seq_printf(s, "%d(D),", j);
+					break;
+				}
+			}
+		}
+		seq_printf(s,"\n=================================================================\n");
+	}
+
+	return 0;
+}
+
+static int depgraph_i2j_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, depgraph_i2j_show, inode->i_private);
+}
+
+static struct file_operations depgraph_i2j_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = depgraph_i2j_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+static int connection_info_show(struct seq_file *s, void *pos) {
+
+	uint64_t bid = (uint64_t)s->private;
+	mic_ctx_t *mic_ctx;
+	int count = 0;
+	struct list_head *position, *tmpq;
+
+	mic_ctx = get_per_dev_ctx(bid);
+	if (!mic_ctx) {
+		return -EINVAL;
+	}
+
+	seq_printf(s,"=========================================================================\n");
+	if(mic_ctx->micpm_ctx.pm_epd != NULL) {
+		seq_printf(s, "%-35s | %35d\n", "Local Node", mic_ctx->micpm_ctx.pm_epd->port.node);
+		seq_printf(s, "%-35s | %35d\n", "Local Port", mic_ctx->micpm_ctx.pm_epd->port.port);
+		seq_printf(s, "%-35s | %35d\n", "Remote Node", mic_ctx->micpm_ctx.pm_epd->peer.node);
+		seq_printf(s, "%-35s | %35d\n", "Remote Port", mic_ctx->micpm_ctx.pm_epd->peer.port);
+		seq_printf(s, "%-35s | %35d\n", "Connection state", mic_ctx->micpm_ctx.pm_epd->state);
+		if(!list_empty(&mic_ctx->micpm_ctx.msg_list)) {
+			list_for_each_safe(position, tmpq, &mic_ctx->micpm_ctx.msg_list) {
+				count++;
+			}
+		} else {
+			count = 0;
+		}
+		seq_printf(s, "%-35s | %35d\n", "Messages in queue", count);
+	} else {
+		seq_printf(s, "%s\n", "No PM connection found");
+	}
+	seq_printf(s,"=========================================================================\n");
+
+	return 0;
+}
+
+static int connection_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, connection_info_show, inode->i_private);
+}
+
+static struct file_operations connection_info_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = connection_info_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+static int active_set_show(struct seq_file *s, void *pos) {
+
+	uint64_t bid = (uint64_t)s->private;
+	mic_ctx_t *mic_ctx;
+	int i, j = 0;
+	uint8_t *nodemask;
+	uint8_t *temp_buf_ptr;
+
+	mic_ctx = get_per_dev_ctx(bid);
+	if (!mic_ctx) {
+		return -EINVAL;
+	}
+
+	nodemask = (uint8_t*) kzalloc(mic_ctx->micpm_ctx.nodemask.len, GFP_KERNEL);
+	if (!nodemask) {
+		seq_printf(s, "%s\n", "Cannot allocate buffer");
+		return 0;
+	}
+
+	if ((micscif_get_activeset(mic_ctx->bi_id + 1, nodemask))) {
+		seq_printf(s, "%s\n", "Cannot calculate activation set");
+		kfree(nodemask);
+		return 0;
+	}
+
+	seq_printf(s, "%s\n", "Nodes in activation set:");
+	temp_buf_ptr = nodemask;
+	for ( i = 0; i < mic_ctx->micpm_ctx.nodemask.len; i++) {
+		temp_buf_ptr = nodemask + i;
+		for (j = 0; j < 8; j++) {
+			if (*temp_buf_ptr & (1ULL << j))
+				seq_printf(s, "%d ", j + (i * 8));
+		}
+	}
+	seq_printf(s, "\n");
+	kfree(nodemask);
+	return 0;
+}
+
+static int active_set_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, active_set_show, inode->i_private);
+}
+
+static struct file_operations activation_set_file_ops = {
+	.owner = THIS_MODULE,
+	.open = active_set_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release
+};
+
+static int deactive_set_show(struct seq_file *s, void *pos) {
+
+	uint64_t bid = (uint64_t)s->private;
+	mic_ctx_t *mic_ctx;
+	int i, j;
+	uint8_t *nodemask;
+	uint8_t *temp_buf_ptr;
+
+	mic_ctx = get_per_dev_ctx(bid);
+	if (!mic_ctx) {
+		return -EINVAL;
+	}
+
+	nodemask = (uint8_t*) kzalloc(mic_ctx->micpm_ctx.nodemask.len, GFP_KERNEL);
+	if (!nodemask) {
+		seq_printf(s, "%s\n", "Cannot allocate buffer");
+		return 0;
+	}
+
+	if ((micscif_get_deactiveset(mic_ctx->bi_id +1, nodemask, 1))) {
+		seq_printf(s, "%s\n", "Cannot calculate activation set");
+		kfree(nodemask);
+		return 0;
+	}
+
+	seq_printf(s, "%s\n", "Nodes in deactivation set:");
+	temp_buf_ptr = nodemask;
+	for ( i = 0; i < mic_ctx->micpm_ctx.nodemask.len; i++) {
+		temp_buf_ptr = nodemask + i;
+		for (j = 0; j < 8; j++) {
+			if (*temp_buf_ptr & (1ULL << j))
+				seq_printf(s, "%d ", j + (i * 8));
+		}
+	}
+	seq_printf(s, "\n");
+	kfree(nodemask);
+	return 0;
+}
+
+static int deactive_set_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, deactive_set_show, inode->i_private);
+}
+
+static struct file_operations deactivation_set_file_ops = {
+	.owner = THIS_MODULE,
+	.open = deactive_set_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release
+};
+
+static int ospm_restart_show(struct seq_file *s, void *pos) {
+
+	uint64_t bid = (uint64_t)s->private;
+	mic_ctx_t *mic_ctx;
+	int err;
+
+	mic_ctx = get_per_dev_ctx(bid);
+	if (!mic_ctx) {
+		return -EINVAL;
+	}
+
+	err = pm_stop_device(mic_ctx);
+	if(err) {
+		seq_printf(s, "%s:%d\n", "Error calling pm_stop_device.", err);
+		return err;
+	}
+
+	err = pm_start_device(mic_ctx);
+	if(err) {
+		seq_printf(s, "%s:%d\n", "Error calling pm_start_device.", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int ospm_restart_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ospm_restart_show, inode->i_private);
+}
+
+static struct file_operations ospm_restart_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = ospm_restart_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = single_release
+};
+
+static int testmsg_set(void *data, uint64_t value)
+{
+	uint64_t bid;
+	mic_ctx_t *mic_ctx;
+	int err;
+
+	bid = (uint64_t)data;
+	if (bid >= mic_data.dd_numdevs) {
+		return -EINVAL;
+	}
+
+	mic_ctx = get_per_dev_ctx(bid);
+	if (!mic_ctx) {
+		return -EINVAL;
+	}
+
+	if (value == 0) {
+		return -EINVAL;
+	}
+
+	err = mic_pm_send_msg(mic_ctx ,PM_MESSAGE_TEST, PM_TEST_MSG_BODY, sizeof(PM_TEST_MSG_BODY));
+	return err;
+}
+
+static int testmsg_get(void *data, uint64_t *value)
+{
+	uint64_t bid;
+	mic_ctx_t *mic_ctx;
+	int err;
+
+	bid = (uint64_t)data;
+	if (bid >= mic_data.dd_numdevs) {
+		return -EINVAL;
+	}
+
+	mic_ctx = get_per_dev_ctx(bid);
+	if (!mic_ctx) {
+		return -EINVAL;
+	}
+
+	err = check_test_msg(mic_ctx,PM_TEST_MSG_BODY, sizeof(PM_TEST_MSG_BODY));
+	*value = err;
+
+	return err;
+}
+DEFINE_SIMPLE_ATTRIBUTE(testmsg_fops, testmsg_get, testmsg_set, "%llu");
+
+int
+micpm_dbg_init(mic_ctx_t *mic_ctx)
+{
+	/* directory name will be in format micpmXXXXX
+	 * so assuming the name string wont excceed 12 characters */
+	const uint32_t DBG_DIRNAME_LENGTH = 12;
+	char pmdbg_dir_name[DBG_DIRNAME_LENGTH];
+	micpm_ctx_t *micpm_ctx = &mic_ctx->micpm_ctx;
+	struct dentry *mmiodir;
+	uint64_t id = mic_ctx->bi_id;
+
+
+	if(!mic_data.dd_pm.pmdbgparent_dir) {
+		printk(KERN_ERR "%s: %d Parent debugfs directory does not exist.\n"
+			"debugfs may not be supported in kernel", __func__, __LINE__);
+		return -EOPNOTSUPP;
+	}
+
+	snprintf(pmdbg_dir_name, sizeof(pmdbg_dir_name), "micpm%d", mic_ctx->bi_id);
+	micpm_ctx->pmdbg_dir = debugfs_create_dir
+			(pmdbg_dir_name, mic_data.dd_pm.pmdbgparent_dir);
+	if (!micpm_ctx->pmdbg_dir) {
+		printk(KERN_ERR "%s: %d Failed in creating debugfs directory\n"
+			"debugfs may noe be supported in kernel", __func__, __LINE__);
+		return -EOPNOTSUPP;
+	}
+
+	/* Create debugfs entry to get/set idle state of the card known by host*/
+	debugfs_create_u32("idle_state", S_IRUGO | S_IWUSR, micpm_ctx->pmdbg_dir, &micpm_ctx->idle_state);
+
+	/*
+	 * Create debugfs entry for sending PM_TEST_MESSAGE for testing communication to card
+	 * set value = PM_MESSAGE_TEST to send the message to card
+	 * get value to verfy that message was successfully sent, looped back by card and received.(0 = success)
+	*/
+	debugfs_create_file("testmsg", S_IRUGO | S_IWUSR, micpm_ctx->pmdbg_dir, (void*)id, &testmsg_fops);
+
+	/* Create debugfs entry for showing for each node 'i' , all nodes 'j' i is dependent on */
+	debugfs_create_file("depgraph_i2j",
+			S_IRUGO,
+			micpm_ctx->pmdbg_dir,
+			(void*)id,
+			&depgraph_i2j_file_ops);
+
+	/* Create debugfs entry for showing for each node 'i', all nodes 'j' which are dependent on 'i' */
+	debugfs_create_file("depgraph_j2i",
+			S_IRUGO,
+			micpm_ctx->pmdbg_dir,
+			(void*)id,
+			&depgraph_j2i_file_ops);
+
+	/* Create debugfs entry for showing connection info for a node */
+	debugfs_create_file("connection_info",
+			S_IRUGO,
+			micpm_ctx->pmdbg_dir,
+			(void*)id,
+			&connection_info_file_ops);
+
+	/* Create debugfs entry to initiate OSPM restart for a node */
+	debugfs_create_file("ospm_restart",
+			S_IRUGO,
+			micpm_ctx->pmdbg_dir,
+			(void*)id,
+			&ospm_restart_file_ops);
+
+	/* Create debugfs entry to display activation set for a node */
+	debugfs_create_file("activation_set",
+			S_IRUGO,
+			micpm_ctx->pmdbg_dir,
+			(void*)id,
+			&activation_set_file_ops);
+
+	/* Create debugfs entry to display de-activation set for a node */
+	debugfs_create_file("deactivation_set",
+			S_IRUGO,
+			micpm_ctx->pmdbg_dir,
+			(void*)id,
+			&deactivation_set_file_ops);
+
+	/* Create debugfs entries for reading power management status/control register value in MMIO region */
+	mmiodir = debugfs_create_dir("mmio", micpm_ctx->pmdbg_dir);
+	if (!mmiodir) {
+		printk(KERN_ERR "%s: %d Failed in creating debugfs directory\n"
+			"debugfs may noe be supported in kernel", __func__, __LINE__);
+		return -EOPNOTSUPP;
+	}
+	debugfs_create_file("svidctrl", S_IRUGO, mmiodir,(void*)id, &fops_svidctrl);
+	debugfs_create_file("pcuctrl", S_IRUGO, mmiodir,(void*)id, &fops_pcuctrl);
+	debugfs_create_file("hoststate", S_IRUGO, mmiodir,(void*)id, &fops_hoststate);
+	debugfs_create_file("cardstate", S_IRUGO, mmiodir,(void*)id, &fops_cardstate);
+	debugfs_create_file("wtimer", S_IRUGO, mmiodir,(void*)id, &fops_wtimer);
+	debugfs_create_file("gpmctrl", S_IRUGO, mmiodir,(void*)id, &fops_gpmctrl);
+	debugfs_create_file("core_volt", S_IRUGO, mmiodir,(void*)id, &fops_core_volt);
+	debugfs_create_file("uos_pcuctrl", S_IRUGO, mmiodir,(void*)id, &fops_uos_pcuctrl);
+
+	return 0;
+}
+
+void micpm_dbg_parent_init(void) {
+	mic_data.dd_pm.pmdbgparent_dir = debugfs_create_dir("micpm", NULL);
+	if (!mic_data.dd_pm.pmdbgparent_dir) {
+		PM_DEBUG("%s: %d Failed in creating debugfs directory\n"
+			"debugfs may not be supported in kernel", __func__, __LINE__);
+	}
+
+	debugfs_create_u32("enable_pm_logging", S_IRUGO | S_IWUSR,
+			mic_data.dd_pm.pmdbgparent_dir, &mic_data.dd_pm.enable_pm_logging);
+
+	return;
+}
+
+
+/*
+ * Test message is looped back to driver and lives in the message list.
+ * This function retrieves the message and send it to user space which
+ * can check if its the same message as that was sent.
+ */
+static int
+check_test_msg(mic_ctx_t *mic_ctx, void *buf, uint32_t len)
+{
+	int err = -EINVAL;
+	pm_recv_msg_t *recv_msg = NULL;
+	struct list_head *pos = NULL, *tmpq = NULL;
+	bool msg_found = false;
+
+	if(len != sizeof(pm_msg_unit_test)) {
+		pr_debug("Invalid Args: Size of buffer\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&mic_ctx->micpm_ctx.msg_mutex);
+	if(!list_empty_careful(&mic_ctx->micpm_ctx.msg_list)) {
+		list_for_each_safe(pos, tmpq, &mic_ctx->micpm_ctx.msg_list) {
+			recv_msg = list_entry(pos, pm_recv_msg_t, msg);
+			/*Do not touch the message if its not a test message */
+			if (recv_msg->msg_header.opcode == PM_MESSAGE_TEST) {
+				list_del(&recv_msg->msg);
+				msg_found = true;
+				break;
+			}
+		}
+	} else {
+		pr_debug("empty message list \n");
+		goto no_msg;
+	}
+
+	if (msg_found == false) {
+		pr_debug("Test msg not found \n");
+		goto no_msg;
+	}
+
+	if(recv_msg->msg_body == NULL) {
+		pr_debug("Invalid source buffer\n");
+		goto list_free;
+	}
+
+	err = strncmp((char*)recv_msg->msg_body, (char*)buf, len);
+	kfree(recv_msg->msg_body);
+
+list_free:
+	kfree(recv_msg);
+
+no_msg:
+	mutex_unlock(&mic_ctx->micpm_ctx.msg_mutex);
+	return err;
+
+}
diff --git a/host/pm_pcstate.c b/host/pm_pcstate.c
new file mode 100644
index 0000000..00c9ec4
--- /dev/null
+++ b/host/pm_pcstate.c
@@ -0,0 +1,1107 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic_common.h"
+#include "scif.h"
+#include "mic/micscif.h"
+#include "mic/mic_pm.h"
+#include "mic/micveth_dma.h"
+#include <linux/virtio_ring.h>
+#include "linux/virtio_blk.h"
+#include "mic/mic_virtio.h"
+
+//few helper functions
+int pm_reg_read(mic_ctx_t *mic_ctx, uint32_t regoffset) {
+	uint32_t regval = 0;
+if (mic_ctx->bi_family == FAMILY_ABR)
+	regval = DBOX_READ(mic_ctx->mmio.va, regoffset);
+else if (mic_ctx->bi_family == FAMILY_KNC)
+	regval = SBOX_READ(mic_ctx->mmio.va, regoffset);
+
+	return regval;
+}
+
+int pm_reg_write(uint32_t value, mic_ctx_t *mic_ctx, uint32_t regoffset) {
+	int err = 0;
+if (mic_ctx->bi_family == FAMILY_ABR)
+	DBOX_WRITE(value, mic_ctx->mmio.va, regoffset);
+else if (mic_ctx->bi_family == FAMILY_KNC)
+	SBOX_WRITE(value, mic_ctx->mmio.va, regoffset);
+
+	return err;
+}
+
+int hw_idle(mic_ctx_t *mic_ctx) {
+
+	uint8_t is_ring_active;
+	sbox_pcu_ctrl_t ctrl_regval = {0};
+	uint32_t idle_wait_cnt;
+
+	for(idle_wait_cnt = 0; idle_wait_cnt <= MAX_HW_IDLE_WAIT_COUNT;
+			idle_wait_cnt++) {
+		ctrl_regval.value = pm_reg_read(mic_ctx,SBOX_PCU_CONTROL);
+		is_ring_active = ctrl_regval.bits.mclk_enabled;
+		if(likely(!is_ring_active))
+			return !is_ring_active;
+		msleep(1);
+	}
+
+	PM_DEBUG("Timing out waiting for HW to become idle\n");
+	return !is_ring_active;
+}
+
+int hw_active(mic_ctx_t *mic_ctx) {
+	uint8_t is_ring_active;
+	sbox_pcu_ctrl_t ctrl_regval;
+	uint32_t idle_wait_cnt;
+
+	for(idle_wait_cnt = 0; idle_wait_cnt <= MAX_HW_IDLE_WAIT_COUNT;
+			idle_wait_cnt++) {
+		ctrl_regval.value = pm_reg_read(mic_ctx,SBOX_PCU_CONTROL);
+		is_ring_active = ctrl_regval.bits.mclk_enabled;
+		if (likely(is_ring_active))
+			return is_ring_active;
+		msleep(10);
+	}
+
+	PM_DEBUG("Timing out waiting for HW to become active\n");
+	return is_ring_active;
+
+}
+
+PM_IDLE_STATE get_card_state(mic_ctx_t *mic_ctx) {
+
+	PM_IDLE_STATE state;
+	sbox_uos_pm_state_t upmstate_regval =  {0};
+	upmstate_regval.value = pm_reg_read(mic_ctx, SBOX_UOS_PMSTATE);
+	state = (PM_IDLE_STATE)(upmstate_regval.bits.uos_pm_state);
+	return state;
+
+}
+
+PM_IDLE_STATE get_host_state(mic_ctx_t *mic_ctx) {
+
+	PM_IDLE_STATE state;
+	sbox_host_pm_state_t hpmstate_regval =  {0};
+	hpmstate_regval.value = pm_reg_read(mic_ctx, SBOX_HOST_PMSTATE);
+	state = (PM_IDLE_STATE)(hpmstate_regval.bits.host_pm_state);
+	return state;
+
+}
+
+int set_host_state(mic_ctx_t *mic_ctx, PM_IDLE_STATE state) {
+
+	int err = 0;
+	sbox_host_pm_state_t hpmstate_regval = {0};
+	hpmstate_regval.value = pm_reg_read(mic_ctx, SBOX_HOST_PMSTATE);
+	hpmstate_regval.bits.host_pm_state = 0;
+	hpmstate_regval.bits.host_pm_state = state;
+	pm_reg_write(hpmstate_regval.value, mic_ctx, SBOX_HOST_PMSTATE);
+	return err;
+}
+
+int check_card_state(mic_ctx_t *mic_ctx, PM_IDLE_STATE state) {
+	PM_IDLE_STATE card_state = get_card_state(mic_ctx);
+	return (state == card_state) ? 1 : 0;
+}
+
+int check_host_state(mic_ctx_t *mic_ctx, PM_IDLE_STATE state) {
+	PM_IDLE_STATE host_state = get_host_state(mic_ctx);
+	return (state == host_state) ? 1 : 0;
+}
+
+uint32_t svid_cmd_fmt(unsigned int bits)
+{
+	unsigned int bits_set,bmask;
+
+	bmask = bits;
+
+	for (bits_set = 0; bmask; bits_set++) {
+		/* Zero the least significant bit that is set */
+		bmask &=  (bmask - 1);
+	}
+	bits <<= 1; /* Make way for the parity bit	*/
+	if (bits_set & 1) {	/* odd number of 1s	*/
+		bits |= 1;
+	}
+
+	return bits;
+}
+
+void set_vid(mic_ctx_t *mic_ctx, sbox_svid_control svidctrl_regval, unsigned int vidcode) {
+
+	uint32_t temp;
+	uint32_t svid_cmd = 0;
+	uint32_t svid_dout = 0;
+	temp = svid_cmd_fmt((KNC_SVID_ADDR << 13) |
+			(KNC_SETVID_SLOW << 8) | vidcode);
+	svid_cmd = (KNC_SVID_ADDR << 5) | KNC_SETVID_SLOW;
+	svidctrl_regval.bits.svid_cmd = 0x0e0;
+	svidctrl_regval.bits.svid_cmd = svid_cmd;
+
+	svid_dout = temp & 0x1ff;
+	svidctrl_regval.bits.svid_dout = 0;
+	svidctrl_regval.bits.svid_dout = svid_dout;
+
+	svidctrl_regval.bits.cmd_start = 0x1;
+	pm_reg_write(svidctrl_regval.value, mic_ctx,
+			SBOX_SVID_CONTROL);
+
+	msleep(10);
+
+	return;
+}
+
+int set_vid_knc(mic_ctx_t *mic_ctx, unsigned int vidcode)
+{
+	uint32_t status = 0;
+
+	sbox_svid_control svidctrl_regval = {0};
+	uint32_t svid_idle = 0;
+	uint32_t svid_error = 0;
+	int i = 0;
+	uint32_t wait_cnt = 0;
+	sbox_core_volt_t core_volt_regval = {0};
+	int retry = 0;
+
+	if (mic_ctx->bi_stepping >= KNC_B0_STEP) {
+		for (retry = 0; retry < SET_VID_RETRY_COUNT; retry++) {
+			status = 0;
+			for (i = 0; i < KNC_SETVID_ATTEMPTS; i++) {
+				svidctrl_regval.value = pm_reg_read(mic_ctx,SBOX_SVID_CONTROL);
+				svid_idle = svidctrl_regval.bits.svid_idle;
+
+				if (svid_idle) {
+					set_vid(mic_ctx, svidctrl_regval, vidcode);
+					svidctrl_regval.value =
+							pm_reg_read(mic_ctx,SBOX_SVID_CONTROL);
+					svid_idle =  svidctrl_regval.bits.svid_idle;
+					svid_error = svidctrl_regval.bits.svid_error;
+
+					if (!svid_idle) {
+						printk(KERN_ERR "%s SVID command failed - Idle not set\n", 
+								__func__);
+						msleep(10);
+						continue;
+					}
+
+					if (svid_error) {
+						if (SBOX_SVIDCTRL_ACK1ACK0(svidctrl_regval.value) == 0x2) {
+							printk(KERN_ERR "%s SVID command failed - rx parity error\n", 
+									__func__);
+						} else {
+						printk(KERN_ERR "%s SVID command failed - tx parity error\n", 
+								__func__);
+						}
+						status = -EINVAL;
+						goto exit;
+					} else {
+						PM_DEBUG("SVID Command Successful - VID set to %d\n",vidcode);
+						break;
+					}
+				}
+			}
+
+			if (i == KNC_SETVID_ATTEMPTS) {
+				printk(KERN_ERR "%s Timed out waiting for SVID idle\n", __func__);
+				status = -EINVAL;
+				goto exit;
+			}
+
+			/* Verify that the voltage is set */
+			for(wait_cnt = 0; wait_cnt <= 100; wait_cnt++) {
+				core_volt_regval.value = pm_reg_read(mic_ctx, SBOX_COREVOLT);
+				if(vidcode == core_volt_regval.bits.vid) {
+					return status;
+				}
+				msleep(10);
+				PM_DEBUG("Retry: %d Voltage not set yet. vidcode = 0x%x Current vid = 0x%x\n",
+						retry, vidcode, core_volt_regval.bits.vid);
+			}
+
+			PM_PRINT("Retry: %d Failed to set vid for node %d. vid code = 0x%x Current vid = 0x%x.\n",
+				retry, mic_get_scifnode_id(mic_ctx), vidcode, core_volt_regval.bits.vid);
+			status = -ENODEV;
+		}
+	} else {
+		set_vid(mic_ctx, svidctrl_regval, vidcode);
+
+		/* SBOX_COREVOLT does not reflect the correct vid
+		 * value on A0. Just wait here for sometime to
+		 * allow for the vid to be set.
+		 */
+		msleep(20);
+	}
+
+exit:
+	return status;
+}
+
+/* @print_nodemaskbuf
+ *
+ * @param -  buf - the nodemask buffer
+ *
+ * prints the nodes in the nodemask.
+ *
+ * @returns - none
+ */
+void print_nodemaskbuf(uint8_t* buf) {
+
+	uint8_t *temp_buf_ptr;
+	uint32_t i,j;
+
+	temp_buf_ptr = buf;
+	PM_DEBUG("Nodes in nodemask: ");
+	for(i = 0; i <= ms_info.mi_maxid; i++) {
+		temp_buf_ptr = buf + i;
+		for (j = 0; j < 8; j++) {
+			if (get_nodemask_bit(temp_buf_ptr, j))
+				pr_debug("%d ", j + (i * 8));
+		}
+	}
+}
+
+void restore_pc6_registers(mic_ctx_t *mic_ctx, bool from_dpc3) {
+	sbox_pcu_ctrl_t ctrl_regval = {0};
+	sbox_uos_pcu_ctrl_t uos_ctrl_regval = {0};
+	gbox_pm_control pmctrl_reg = {0};
+	sbox_core_freq_t core_freq_reg = {0};
+
+	if (!from_dpc3) {
+		if(KNC_A_STEP == mic_ctx->bi_stepping) {
+			ctrl_regval.value = pm_reg_read(mic_ctx, SBOX_PCU_CONTROL);
+			ctrl_regval.bits.enable_mclk_pl_shutdown = 0;
+			pm_reg_write(ctrl_regval.value, mic_ctx, SBOX_PCU_CONTROL);
+		} else {
+			uos_ctrl_regval.value = pm_reg_read(mic_ctx,SBOX_UOS_PCUCONTROL);
+			uos_ctrl_regval.bits.enable_mclk_pll_shutdown = 0;
+			pm_reg_write(uos_ctrl_regval.value, mic_ctx, SBOX_UOS_PCUCONTROL);
+		}
+
+
+		ctrl_regval.value = pm_reg_read(mic_ctx, SBOX_PCU_CONTROL);
+		ctrl_regval.bits.prevent_auto_c3_exit = 0;
+		pm_reg_write(ctrl_regval.value, mic_ctx, SBOX_PCU_CONTROL);
+	}
+
+	pmctrl_reg.value = pm_reg_read(mic_ctx, GBOX_PM_CTRL);
+	pmctrl_reg.bits.in_pckgc6 = 0;
+	pm_reg_write(pmctrl_reg.value, mic_ctx, GBOX_PM_CTRL);
+
+	ctrl_regval.value = pm_reg_read(mic_ctx, SBOX_PCU_CONTROL);
+	ctrl_regval.bits.grpB_pwrgood_mask = 0;
+	pm_reg_write(ctrl_regval.value, mic_ctx, SBOX_PCU_CONTROL);
+
+	core_freq_reg.value = pm_reg_read(mic_ctx, SBOX_COREFREQ);
+	core_freq_reg.bits.booted = 1;
+	pm_reg_write(core_freq_reg.value, mic_ctx, SBOX_COREFREQ);
+}
+
+void program_mclk_shutdown(mic_ctx_t *mic_ctx, bool set)
+{
+	sbox_uos_pcu_ctrl_t uos_ctrl_regval;
+	sbox_pcu_ctrl_t ctrl_regval;
+
+	if(KNC_A_STEP == mic_ctx->bi_stepping) {
+		ctrl_regval.value = pm_reg_read(mic_ctx,SBOX_PCU_CONTROL);
+		ctrl_regval.bits.enable_mclk_pl_shutdown =  (set ? 1: 0);
+		pm_reg_write(ctrl_regval.value, mic_ctx, SBOX_PCU_CONTROL);
+	} else {
+		uos_ctrl_regval.value = pm_reg_read(mic_ctx,
+				SBOX_UOS_PCUCONTROL);
+		uos_ctrl_regval.bits.enable_mclk_pll_shutdown = (set ? 1: 0);
+		pm_reg_write(uos_ctrl_regval.value,
+				mic_ctx, SBOX_UOS_PCUCONTROL);
+	}
+}
+
+void program_prevent_C3Exit(mic_ctx_t *mic_ctx, bool set)
+{
+	sbox_pcu_ctrl_t ctrl_regval;
+	ctrl_regval.value = pm_reg_read(mic_ctx,SBOX_PCU_CONTROL);
+	ctrl_regval.bits.prevent_auto_c3_exit = (set ? 1: 0);
+	pm_reg_write(ctrl_regval.value, mic_ctx, SBOX_PCU_CONTROL);
+
+}
+
+int pm_pc3_to_pc6_entry(mic_ctx_t *mic_ctx)
+{
+	int err;
+	sbox_pcu_ctrl_t ctrl_regval;
+	gbox_pm_control pmctrl_reg;
+	sbox_core_freq_t core_freq_reg;
+
+	if ((get_card_state(mic_ctx)) != PM_IDLE_STATE_PC3) {
+		PM_DEBUG("Card not ready to go to PC6. \n");
+		err = -EAGAIN;
+		goto exit;
+	}
+
+	if (atomic_cmpxchg(&mic_ctx->gate_interrupt, 0, 1) == 1) {
+		PM_DEBUG("Cannot gate interrupt handler while it is in use\n");
+		err = -EFAULT;
+		goto exit;
+	}
+
+	program_prevent_C3Exit(mic_ctx, true);
+	program_mclk_shutdown(mic_ctx, true);
+
+	/* Wait for uos to become idle. */
+	if (!hw_idle(mic_ctx)) {
+		program_mclk_shutdown(mic_ctx, false);
+		if (!hw_idle(mic_ctx)) {
+			program_prevent_C3Exit(mic_ctx, false);
+			PM_DEBUG("Card not ready to go to PC6. \n");
+			err = -EAGAIN;
+			goto intr_ungate;
+		} else {
+			program_mclk_shutdown(mic_ctx, true);
+		}
+	}
+
+	pmctrl_reg.value = pm_reg_read(mic_ctx, GBOX_PM_CTRL);
+	pmctrl_reg.bits.in_pckgc6 = 1;
+	pm_reg_write(pmctrl_reg.value, mic_ctx, GBOX_PM_CTRL);
+
+	core_freq_reg.value = pm_reg_read(mic_ctx, SBOX_COREFREQ);
+	core_freq_reg.bits.booted = 0;
+	pm_reg_write(core_freq_reg.value, mic_ctx, SBOX_COREFREQ);
+
+	udelay(500);
+
+	ctrl_regval.value = pm_reg_read(mic_ctx, SBOX_PCU_CONTROL);
+	ctrl_regval.bits.grpB_pwrgood_mask = 1;
+	pm_reg_write(ctrl_regval.value, mic_ctx, SBOX_PCU_CONTROL);
+
+	err = set_vid_knc(mic_ctx, 0);
+	if (err != 0) {
+		PM_DEBUG("Aborting PC6 entry...Failed to set VID\n");
+		restore_pc6_registers(mic_ctx, true);
+		goto intr_ungate;
+	}
+
+	mic_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_PC6;
+	set_host_state(mic_ctx, PM_IDLE_STATE_PC6);
+
+	dma_prep_suspend(mic_ctx->dma_handle);
+
+	PM_PRINT("Node %d entered PC6\n",
+		mic_get_scifnode_id(mic_ctx));
+
+	return err;
+
+intr_ungate:
+	atomic_set(&mic_ctx->gate_interrupt, 0);
+	tasklet_schedule(&mic_ctx->bi_dpc);
+exit:
+	return err;
+}
+
+/*
+ * pm_pc6_exit:
+ *
+ * Execute pc6 exit for a node.
+ * mic_ctx: The driver context of the node.
+ */
+int pm_pc6_exit(mic_ctx_t *mic_ctx)
+{
+
+	int err = 0;
+
+	sbox_host_pm_state_t hpmstate_regval;
+	sbox_pcu_ctrl_t ctrl_regval;
+	uint8_t tdp_vid = 0;
+	uint8_t is_pll_locked;
+	uint32_t wait_cnt;
+	int i;
+
+
+	if (!check_host_state(mic_ctx, PM_IDLE_STATE_PC6)) {
+		PM_DEBUG("Wrong Host PM state. State = %d\n",
+				 get_host_state(mic_ctx));
+		err = -EINVAL;
+		goto restore_registers;
+	}
+
+	hpmstate_regval.value = pm_reg_read(mic_ctx, SBOX_HOST_PMSTATE);
+	tdp_vid = hpmstate_regval.bits.tdp_vid;
+	PM_DEBUG("TDP_VID value obtained from Host PM Register = %d",tdp_vid);
+
+	PM_DEBUG("Setting voltage to %dV using SVID Control\n",tdp_vid);
+	err = set_vid_knc(mic_ctx, tdp_vid);
+	if (err != 0) {
+		printk(KERN_ERR "%s Failed PC6 entry...error in setting VID\n", 
+				__func__);
+		goto restore_registers;
+	}
+
+	ctrl_regval.value = pm_reg_read(mic_ctx, SBOX_PCU_CONTROL);
+
+	program_mclk_shutdown(mic_ctx, false);
+	program_prevent_C3Exit(mic_ctx, false);
+
+	for(wait_cnt = 0; wait_cnt < 200; wait_cnt++) {
+		ctrl_regval.value = pm_reg_read(mic_ctx,SBOX_PCU_CONTROL);
+		is_pll_locked = ctrl_regval.bits.mclk_pll_lock;
+		if(likely(is_pll_locked))
+				break;
+		msleep(10);
+	}
+
+	if(wait_cnt >= 200) {
+		PM_DEBUG("mclk_pll_locked bit is not set.\n");
+		err = -EAGAIN;
+		goto restore_registers;
+	}
+
+	ctrl_regval.bits.grpB_pwrgood_mask = 0;
+	pm_reg_write(ctrl_regval.value, mic_ctx, SBOX_PCU_CONTROL);
+
+	if (!hw_active(mic_ctx)) {
+		PM_DEBUG("Timing out waiting for hw to become active");
+		goto restore_registers;
+	}
+
+	for(wait_cnt = 0; wait_cnt < 200; wait_cnt++) {
+		if ((get_card_state(mic_ctx)) == PM_IDLE_STATE_PC0)
+			break;
+		msleep(10);
+	}
+
+	if(wait_cnt >= 200) {
+		PM_DEBUG("PC6 Exit not complete.\n");
+		err = -EFAULT;
+		goto restore_registers;
+	}
+
+	mic_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_PC0;
+
+	for (i = 0; i <= mic_data.dd_numdevs; i++) {
+		if (micscif_get_nodedep(mic_get_scifnode_id(mic_ctx), i) ==
+				DEP_STATE_DISCONNECTED) {
+			micscif_set_nodedep(mic_get_scifnode_id(mic_ctx), i,
+					DEP_STATE_DEPENDENT);
+		}
+	}
+
+	PM_PRINT("Node %d exited PC6\n",
+			mic_get_scifnode_id(mic_ctx));
+	goto exit;
+
+restore_registers:
+	restore_pc6_registers(mic_ctx, false);
+exit:
+	atomic_set(&mic_ctx->gate_interrupt, 0);
+	tasklet_schedule(&mic_ctx->bi_dpc);
+	return err;
+}
+
+/*
+ * setup_pm_dependency:
+ *
+ * Function sets up the dependency matrix by populating
+ * the matrix with node depency information.
+ *
+ * Returns 0 on success. Appropriate error on failure.
+ */
+int setup_pm_dependency(void){
+	int err = 0;
+	uint16_t i;
+	uint16_t j;
+	mic_ctx_t *mic_ctx;
+
+	for (i = 0; i < mic_data.dd_numdevs; i++) {
+		mic_ctx = get_per_dev_ctx(i);
+		if (!mic_ctx) {
+			PM_DEBUG("Failed to retrieve driver context\n");
+			return -EFAULT;
+		}
+		if (mic_ctx->micpm_ctx.idle_state ==
+			PM_IDLE_STATE_PC3_READY) {
+			for (j = 0; j < mic_data.dd_numdevs; j++) {
+				if (micscif_get_nodedep(mic_get_scifnode_id(mic_ctx),j+1) ==
+						DEP_STATE_DEPENDENT) {
+					micscif_set_nodedep(mic_get_scifnode_id(mic_ctx),j+1,
+							DEP_STATE_DISCONNECT_READY);
+				}
+			}
+		}
+	}
+	return err;
+}
+
+/*
+ * teardown_pm_dependency
+ *
+ * Function resets dependency matrix by removing all depenendy info
+ * from it.
+ *
+ * Returns 0 on success. Appropriate error on failure.
+ */
+int teardown_pm_dependency(void) {
+	int err = 0;
+	int i;
+	int j;
+
+	for (i = 0; i < mic_data.dd_numdevs; i++) {
+		for (j = 0; j < mic_data.dd_numdevs; j++) {
+
+			if (micscif_get_nodedep(i+1,j+1) == DEP_STATE_DISCONNECT_READY) {
+				micscif_set_nodedep(i+1,j+1, DEP_STATE_DEPENDENT);
+			}
+		}
+	}
+	return err;
+}
+
+/*
+ * revert_idle_entry_trasaction:
+ *
+ * @node_disc_bitmask: Bitmask of nodes which were involved in the
+ *  transaction
+ *
+ *  Function Reverts idle state changes made to nodes when an idle
+ *  state trasaction fails.
+ */
+int revert_idle_entry_trasaction(uint8_t *node_disc_bitmask) {
+	int err = 0;
+	mic_ctx_t *node_ctx;
+	uint32_t node_id = 0;
+
+	for(node_id = 0; node_id <= ms_info.mi_maxid;  node_id++) {
+		if (node_id == SCIF_HOST_NODE)
+			continue;
+
+		if (!get_nodemask_bit(node_disc_bitmask, node_id))
+			continue;
+
+		node_ctx = get_per_dev_ctx(node_id - 1);
+		if (!node_ctx) {
+			PM_DEBUG("Failed to retrieve node context.");
+			err = -EINVAL;
+			goto exit;
+		}
+
+		if (node_ctx->micpm_ctx.idle_state == PM_IDLE_STATE_PC3) {
+			err = pm_pc3_exit(node_ctx);
+			if (err) {
+				PM_DEBUG("Wakeup of Node %d failed. Node is lost"
+					" and is to be disconnected",node_id);
+				node_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_LOST;
+				/* Since node is lost, ref_cnt increment(decement) through the
+				* pm_get(put)_reference interface is prevented by idle_state.
+				* We still need to ensure the ref_cnt iself is reset
+				* back to 0 so that pm_get(put)_reference will work after the
+				* lost node interface recovers the node. */
+				atomic_set(&node_ctx->micpm_ctx.pm_ref_cnt, 0);
+			}
+		}
+	}
+exit:
+	return err;
+}
+
+/* pm_node_disconnect
+ *
+ * Called during idlestate entry.
+ *
+ * Function checks the pm_ref_cnt and returns ACK
+ * or NACK depending on the pm_ref_cnt value.
+ */
+int pm_node_disconnect(uint8_t *nodemask) {
+
+	uint32_t node_id;
+	mic_ctx_t *mic_ctx;
+	int ret = 0;
+	int err = 0;
+
+	for (node_id = 0; node_id <= ms_info.mi_maxid; node_id++) {
+		if (node_id == SCIF_HOST_NODE)
+			continue;
+
+		if (!get_nodemask_bit(nodemask, node_id))
+			continue;
+
+		mic_ctx = get_per_dev_ctx(node_id - 1);
+		if (!mic_ctx) {
+			set_nodemask_bit(nodemask, node_id, 0);
+			return -EAGAIN;
+		}
+
+		if (mic_ctx->state != MIC_ONLINE) {
+			set_nodemask_bit(nodemask, node_id, 0);
+			return -EAGAIN;
+		}
+
+		ret = atomic_cmpxchg(&mic_ctx->micpm_ctx.pm_ref_cnt, 
+			0, PM_NODE_IDLE);
+		if (((ret != 0) && (ret != PM_NODE_IDLE))
+			|| atomic_read(&mic_data.dd_pm.wakeup_in_progress)) {
+			set_nodemask_bit(nodemask, node_id, 0);
+			return -EAGAIN;
+		}
+	}
+
+	return err;
+}
+
+/*
+ * pm_pc3_entry:
+ *
+ * Execute pc3 entry for a node.
+ * mic_ctx: The driver context of the node.
+ */
+int pm_pc3_entry(mic_ctx_t *mic_ctx)
+{
+	int err = 0;
+	if (mic_ctx == NULL) {
+		err = -EINVAL;
+		goto exit;
+	}
+
+	if (((!check_host_state(mic_ctx, PM_IDLE_STATE_PC0))) ||
+		(mic_ctx->micpm_ctx.idle_state != PM_IDLE_STATE_PC0)) {
+		PM_DEBUG("Wrong host state. register state = %d"
+			" idle state = %d\n", get_host_state(mic_ctx),
+			mic_ctx->micpm_ctx.idle_state);
+		goto send_wakeup;
+	}
+
+	/* cancel pc6 entry work that may be scheduled. We need to
+ 	* do this either here or after a pervious pc3 exit */
+	cancel_delayed_work_sync(&mic_ctx->micpm_ctx.pc6_entry_work);
+
+	if ((mic_ctx->micpm_ctx.con_state != PM_CONNECTED) ||
+		(!mic_ctx->micpm_ctx.pc3_enabled))
+		goto send_wakeup;
+
+	mic_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_PC3_READY;
+	err = do_idlestate_entry(mic_ctx);
+	if (err)
+		goto exit;
+	if ((mic_ctx->micpm_ctx.pc6_enabled) &&
+		(KNC_C_STEP <= mic_ctx->bi_stepping) &&
+		(KNC_B1_STEP != mic_ctx->bi_stepping)) {
+			queue_delayed_work(mic_ctx->micpm_ctx.pc6_entry_wq, 
+			&mic_ctx->micpm_ctx.pc6_entry_work, 
+			mic_ctx->micpm_ctx.pc6_timeout*HZ);
+	}
+
+	goto exit;
+
+send_wakeup:
+	mutex_lock(&mic_data.dd_pm.pm_idle_mutex);
+	pm_pc3_exit(mic_ctx);
+	mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+exit:
+	return err;
+}
+
+/*
+ * pm_pc3_exit: 
+ * Calling function needs to grab idle_state mutex.
+ *
+ * Execute pc3 exit for a node.
+ * mic_ctx: The driver context of the node.
+ */
+int pm_pc3_exit(mic_ctx_t *mic_ctx)
+{
+	int err;
+	int wait_cnt;
+
+	WARN_ON(!mutex_is_locked(&mic_data.dd_pm.pm_idle_mutex));
+	mic_send_pm_intr(mic_ctx);
+	for (wait_cnt = 0; wait_cnt < PC3_EXIT_WAIT_COUNT; wait_cnt++) {
+		if (check_card_state(mic_ctx, PM_IDLE_STATE_PC0))
+			break;
+		msleep(1);
+	}
+
+
+	if(wait_cnt >= PC3_EXIT_WAIT_COUNT) {
+		PM_DEBUG("Syncronization with card failed."
+			" Node is lost\n");
+		err = -EFAULT;
+		goto exit;
+	}
+
+	set_host_state(mic_ctx, PM_IDLE_STATE_PC0);
+	mic_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_PC0;
+	PM_DEBUG("Node %d exited PC3\n", mic_get_scifnode_id(mic_ctx));
+
+	return 0;
+exit:
+	return err;
+}
+
+/*
+ * do_idlestate_entry:
+ *
+ * Function to start the idle state entry transaction for a node. Puts a node
+ * and all the nodes that are dependent on this node to idle state if
+ * it is possible.
+ *
+ * mic_ctx: The device context of node that needs to be put in idle state
+ * Returs 0 in success. Appropriate error code on failure
+ */
+int do_idlestate_entry(mic_ctx_t *mic_ctx)
+{
+	int err = 0;
+	uint32_t node_id = 0;
+	mic_ctx_t *node_ctx;
+	uint8_t *nodemask_buf;
+
+	if(!mic_ctx)
+		return -EINVAL;
+
+	mutex_lock(&mic_data.dd_pm.pm_idle_mutex);
+
+	if ((err = setup_pm_dependency())) {
+		PM_DEBUG("Failed to set up PM specific dependencies");
+		goto unlock;
+	}
+
+	nodemask_buf = (uint8_t *)
+		kzalloc(mic_ctx->micpm_ctx.nodemask.len, GFP_KERNEL);
+	if(!nodemask_buf) {
+		PM_DEBUG("Error allocating nodemask buffer\n");
+		err = ENOMEM;
+		goto dep_teardown;
+	}
+
+	err = micscif_get_deactiveset(mic_get_scifnode_id(mic_ctx),
+		nodemask_buf, 1);
+	if (err) {
+		PM_DEBUG("Node disconnection failed "
+			"during deactivation set calculation");
+		goto free_buf;
+	}
+
+	print_nodemaskbuf(nodemask_buf);
+
+	if ((err = micscif_disconnect_node(mic_get_scifnode_id(mic_ctx),
+			nodemask_buf, DISCONN_TYPE_POWER_MGMT))) {
+		PM_DEBUG("SCIF Node disconnect failed. err: %d", err);
+		goto free_buf;
+	}
+
+	if ((err = pm_node_disconnect(nodemask_buf))) {
+		PM_DEBUG("PM Node disconnect failed. err = %d\n", err);
+		goto free_buf;
+	}
+
+	if ((err = micvcons_pm_disconnect_node(nodemask_buf,
+		DISCONN_TYPE_POWER_MGMT))) {
+		PM_DEBUG("VCONS Node disconnect failed. err = %d\n", err);
+		goto free_buf;
+	}
+
+	for(node_id = 0; node_id <= ms_info.mi_maxid;  node_id++) {
+		if (node_id == SCIF_HOST_NODE)
+			continue;
+		if (!get_nodemask_bit(nodemask_buf, node_id))
+			continue;
+		node_ctx = get_per_dev_ctx(node_id - 1);
+		if (!node_ctx) {
+			PM_DEBUG("Failed to retrieve node context.");
+			err = -EINVAL;
+			goto revert;
+		}
+
+		if (node_ctx->micpm_ctx.idle_state ==
+			PM_IDLE_STATE_PC3_READY) {
+			set_host_state(node_ctx, PM_IDLE_STATE_PC3);
+			node_ctx->micpm_ctx.idle_state =
+				PM_IDLE_STATE_PC3;
+			PM_DEBUG("Node %d entered PC3\n",
+				mic_get_scifnode_id(node_ctx));
+		} else {
+			PM_DEBUG("Invalid idle state \n");
+			err = -EINVAL;
+			goto revert;
+		}
+	}
+
+revert:
+	if (err)
+		revert_idle_entry_trasaction(nodemask_buf);
+free_buf:
+	kfree(nodemask_buf);
+dep_teardown:
+	teardown_pm_dependency();
+unlock:
+	if (err && (mic_ctx->micpm_ctx.idle_state != PM_IDLE_STATE_PC0))
+		pm_pc3_exit(mic_ctx);
+
+	mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+	return err;
+}
+
+/**
+ * is_idlestate_exit_needed:
+ *
+ * @node_id[in]: node to wakeup.
+ *
+ * Method responsible for checking if idle state exit is required
+ * In some situation we would like to know whether node is idle or not before
+ * making decision to bring the node out of idle state.
+ * For example - Lost node detection.
+ * returns false if the node is not in IDLE state, returns true otherwise
+ */
+int
+is_idlestate_exit_needed(mic_ctx_t *mic_ctx)
+{
+	int ret = 0;
+	mutex_lock(&mic_data.dd_pm.pm_idle_mutex);
+
+	switch (mic_ctx->micpm_ctx.idle_state)
+	{
+		case PM_IDLE_STATE_PC0:
+		case PM_IDLE_STATE_LOST:
+			break;
+		case PM_IDLE_STATE_PC3:
+		case PM_IDLE_STATE_PC3_READY:
+		case PM_IDLE_STATE_PC6:
+		{
+			ret = 1;
+			break;
+		}
+		default:
+			ret = 1;
+	}
+
+	mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+	return ret;
+}
+
+/* do_idlestate_exit:
+ *
+ * Initiate idle state exits for nodes specified
+ * by the bitmask.
+ *
+ * mic_ctx: The device context.
+ * get_ref: Set to true if the entity that wants to wake
+ * a node up also wantes to get a reference to the node.
+ *
+ * Returs 0 on success. Appropriate error on failure.
+ *
+ */
+int do_idlestate_exit(mic_ctx_t *mic_ctx, bool get_ref) {
+	int err = 0;
+	uint32_t node_id = 0;
+	mic_ctx_t *node_ctx;
+	uint8_t *nodemask_buf;
+
+	if(!mic_ctx)
+		return -EINVAL;
+
+	might_sleep();
+	/* If the idle_state_mutex is already obtained by another thread
+	 * try to wakeup the thread which MAY be waiting for REMOVE_NODE
+	 * responses. This way, we give priority to idle state exits than
+	 * idle state entries.
+	 */
+	if (!mutex_trylock(&mic_data.dd_pm.pm_idle_mutex)) {
+		atomic_inc(&mic_data.dd_pm.wakeup_in_progress);
+		wake_up(&ms_info.mi_disconn_wq);
+		mutex_lock(&mic_data.dd_pm.pm_idle_mutex);
+		atomic_dec(&mic_data.dd_pm.wakeup_in_progress);
+	}
+
+	nodemask_buf = (uint8_t *)kzalloc(mic_ctx->micpm_ctx.nodemask.len, GFP_KERNEL);
+	if(!nodemask_buf) {
+		PM_DEBUG("Error allocating nodemask buffer\n");
+		mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+		err = ENOMEM;
+		goto abort_node_wake;
+	}
+
+	if ((err = micscif_get_activeset(mic_get_scifnode_id(mic_ctx), nodemask_buf))) {
+		PM_DEBUG("Node connect failed during Activation set calculation for node\n");
+		mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+		err = -EINVAL;
+		goto free_buf;
+	}
+
+	print_nodemaskbuf(nodemask_buf);
+
+	for(node_id = 0; node_id <= ms_info.mi_maxid;  node_id++) {
+		if (node_id == SCIF_HOST_NODE)
+			continue;
+
+		if (!get_nodemask_bit(nodemask_buf, node_id))
+			continue;
+
+		node_ctx = get_per_dev_ctx(node_id - 1);
+		if (!node_ctx) {
+			mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+			goto free_buf;
+		}
+
+		switch (node_ctx->micpm_ctx.idle_state) {
+		case PM_IDLE_STATE_PC3:
+		case PM_IDLE_STATE_PC3_READY:
+			if ((err = pm_pc3_exit(node_ctx))) {
+				PM_DEBUG("Wakeup of Node %d failed."
+					"Node to be disconnected",node_id);
+				set_nodemask_bit(nodemask_buf, node_id, 0);
+				node_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_LOST;
+				/* Since node is lost, ref_cnt increment(decement) through the
+				* pm_get(put)_reference interface is prevented by idle_state.
+				* We still need to ensure the ref_cnt iself is reset
+				* back to 0 so that pm_get(put)_reference will work after the
+				* lost node interface recovers the node. */
+				atomic_set(&node_ctx->micpm_ctx.pm_ref_cnt, 0);
+			} else {
+				if ((mic_ctx == node_ctx) && get_ref)
+					if (atomic_cmpxchg(&mic_ctx->micpm_ctx.pm_ref_cnt, PM_NODE_IDLE, 1) !=
+							PM_NODE_IDLE)
+						atomic_inc(&mic_ctx->micpm_ctx.pm_ref_cnt);
+			}
+			break;
+		case PM_IDLE_STATE_PC6:
+			if ((err = pm_pc6_exit(node_ctx))) {
+				PM_DEBUG("Wakeup of Node %d failed."
+					"Node to be disconnected",node_id);
+				set_nodemask_bit(nodemask_buf, node_id, 0);
+				node_ctx->micpm_ctx.idle_state = PM_IDLE_STATE_LOST;
+				/* Since node is lost, ref_cnt increment(decement) through the
+				* pm_get(put)_reference interface is prevented by idle_state.
+				* We still need to ensure the ref_cnt iself is reset
+				* back to 0 so that pm_get(put)_reference will work after the
+				* lost node interface recovers the node. */
+				atomic_set(&node_ctx->micpm_ctx.pm_ref_cnt, 0);
+			} else {
+				if ((mic_ctx == node_ctx) && get_ref)
+					if (atomic_cmpxchg(&mic_ctx->micpm_ctx.pm_ref_cnt, PM_NODE_IDLE, 1) !=
+							PM_NODE_IDLE)
+						atomic_inc(&mic_ctx->micpm_ctx.pm_ref_cnt);
+			}
+			break;
+		case PM_IDLE_STATE_PC0:
+			PM_DEBUG("Node %d is in state %d "
+					"and already out of package state.\n",node_id,
+					node_ctx->micpm_ctx.idle_state);
+			if ((mic_ctx == node_ctx) && get_ref)
+					if (atomic_cmpxchg(&mic_ctx->micpm_ctx.pm_ref_cnt, PM_NODE_IDLE, 1) !=
+							PM_NODE_IDLE)
+						atomic_inc(&mic_ctx->micpm_ctx.pm_ref_cnt);
+			break;
+		default:
+			PM_DEBUG("Invalid idle state of node %d."
+					" State = %d \n", node_id,
+					node_ctx->micpm_ctx.idle_state);
+			mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+			err = -ENODEV;
+			goto free_buf;
+		}
+	}
+
+	/* Idle state exit of nodes are complete.
+	 * Set the register state now for those nodes
+	 * that are successfully up.
+	 */
+	for(node_id = 0; node_id <= ms_info.mi_maxid;  node_id++) {
+		if (node_id == SCIF_HOST_NODE)
+			continue;
+
+		if (!get_nodemask_bit(nodemask_buf, node_id))
+			continue;
+
+		node_ctx = get_per_dev_ctx(node_id - 1);
+		if (!node_ctx) {
+			PM_DEBUG("Failed to retrieve node context.");
+			continue;
+		}
+
+
+		if (node_ctx->micpm_ctx.idle_state == PM_IDLE_STATE_PC0)
+			set_host_state(node_ctx, PM_IDLE_STATE_PC0);
+	}
+
+	mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+free_buf:
+	kfree(nodemask_buf);
+abort_node_wake:
+	return err;
+}
+
+int pc6_entry_start(mic_ctx_t *mic_ctx) {
+
+	int err = 0;
+
+	if (mic_ctx->micpm_ctx.idle_state == PM_IDLE_STATE_PC0) {
+		PM_DEBUG("Node not in PC3\n");
+		err = -EFAULT;
+		goto exit;
+	}
+
+	mutex_lock(&mic_data.dd_pm.pm_idle_mutex);
+
+	if (mic_ctx->micpm_ctx.idle_state != PM_IDLE_STATE_PC3) {
+		PM_DEBUG("PC6 transition failed. Node not in PC3\n");
+		mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+		err = -EINVAL;
+		goto exit;
+	}
+
+	if ((err = pm_pc3_to_pc6_entry(mic_ctx))) {
+		PM_DEBUG("PC6 transition from PC3 failed for node %d\n",
+				mic_get_scifnode_id(mic_ctx));
+		mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+		goto exit;
+	}
+	mutex_unlock(&mic_data.dd_pm.pm_idle_mutex);
+exit:
+	return err;
+
+}
+
+/*
+ * mic_get_scifnode_id:
+ *
+ * Function to retrieve node id of a scif node.
+ *
+ * mic_ctx: The driver context of the specified node.
+ * Returns the scif node_id of the specified node.
+ */
+uint32_t mic_get_scifnode_id(mic_ctx_t *mic_ctx) {
+	/* NOTE: scif node_id cannot assumed to be a simple increment
+	 * of the bi_id of the driver context. This function is really
+	 * a placeholder for the board_id to node_id conversion that
+	 * we need to do in the host driver.
+	 */
+	return (uint32_t)mic_ctx->bi_id + 1;
+}
diff --git a/host/tools_support.c b/host/tools_support.c
new file mode 100644
index 0000000..93922f8
--- /dev/null
+++ b/host/tools_support.c
@@ -0,0 +1,978 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* code to send escape calls to uOS; meant to test the ring buffer */
+
+#include "mic_common.h"
+#include "mic/mic_dma_lib.h"
+#include "mic/mic_dma_api.h"
+#include <mic/micscif.h>
+#include <mic/micscif_smpt.h>
+
+// constants defined for flash commands for setting PCI aperture
+#define RASMM_DEFAULT_OFFSET 0x4000000
+#define RASMM_FLASH_SIZE 0x200000
+#define MAX_CORE_INDEX 61
+#define SKU_MEM_DIVIDE 4
+#define SKU_LOW_MEM 0
+#define SKU_HIGH_MEM 1
+#define FREQ_2P4 0x630
+#define FREQ_4P5 0x65A
+#define FREQ_5P0 0x664
+#define FREQ_5P5 0x66E
+#define MASK_MEMFREQ 0xfff
+#define SHIFT_MEMFREQ 16
+
+int
+mic_unpin_user_pages(struct page **pages, uint32_t nf_pages)
+{
+	uint32_t j = 0;
+	uint32_t status = 0;
+	if (pages) {
+		for (j = 0; j < nf_pages; j++) {
+			if (pages[j]) {
+				SetPageDirty(pages[j]);
+				page_cache_release(pages[j]);
+			}
+		}
+		kfree(pages);
+	}
+
+	return status;
+}
+
+int
+mic_pin_user_pages (void *data, struct page **pages, uint32_t len, int32_t *nf_pages, int32_t nr_pages)
+{
+
+	int32_t status = 0;
+
+
+	if (!(pages)) {
+		printk("%s Failed to allocate memory for pages\n", __func__);
+		status = -ENOMEM;
+		return status;
+
+	}
+
+	// pin the user pages; use semaphores on linux for doing the same
+	down_read(&current->mm->mmap_sem);
+	*nf_pages = (int32_t)get_user_pages(current, current->mm, (uint64_t)data,
+			  nr_pages, PROT_WRITE, 1, pages, NULL);
+	up_read(&current->mm->mmap_sem);
+
+	// compare if the no of final pages is equal to no of requested pages
+	if ((*nf_pages) < nr_pages) {
+		printk("%s failed to do _get_user_pages\n", __func__);
+		status = -EFAULT;
+		mic_unpin_user_pages(pages, *nf_pages);
+		return status;
+	}
+
+
+	return status;
+
+}
+
+int
+send_flash_cmd(mic_ctx_t *mic_ctx, MIC_FLASH_CMD_TYPE type, void *data, uint32_t len)
+{
+	int32_t status = 0;
+	uint8_t *mmio_va = mic_ctx->mmio.va;
+	sbox_scratch1_reg_t scratch1reg = {0};
+	sbox_scratch2_reg_t scratch2reg = {0};
+	uint32_t ret = 0;
+	void *src;
+	struct timeval t;
+	struct flash_stat *statbuf = NULL;
+	uint64_t temp;
+	uint32_t i = 0;
+	struct version_struct *verbuf = NULL;
+	int32_t offset = 0;
+	uint8_t cmddata = 0;
+
+	scratch1reg.bits.status = FLASH_CMD_INVALID;
+	switch (type) {
+	case FLASH_CMD_READ:
+
+		/*
+		 * image address = the upper 20 bits of the 32-bit of scracth2 register
+		 * is card side physical address where the flash image resides
+		 * program scratch2 register to notify the image address
+		 */
+		scratch2reg.bits.image_addr = RASMM_DEFAULT_OFFSET >> 12;
+		SBOX_WRITE(scratch2reg.value, mmio_va, SBOX_SCRATCH2);
+
+		/* set command */
+		scratch1reg.bits.command = FLASH_CMD_READ;
+		SBOX_WRITE(scratch1reg.value, mmio_va, SBOX_SCRATCH1);
+
+		mic_send_bootstrap_intr(mic_ctx);
+		break;
+
+	case FLASH_CMD_READ_DATA:
+
+		/*
+		 * flash read_data command : set pci aperture to 128MB
+		 * read the value of scratch2 in a variable
+		 */
+		ret = SBOX_READ(mmio_va, SBOX_SCRATCH2);
+		scratch2reg.value = ret;
+
+		/*
+		 * convert physical to virtual address
+		 * image address = the upper 20 bits of the 32-bit KNC side physical
+		 * address where the flash image resides
+		 */
+		offset = scratch2reg.bits.image_addr << 12 ;
+		if (len == 0) {
+			status = -EINVAL;
+			goto exit;
+		}
+
+		if (len > (mic_ctx->aper.len - offset)) {
+			status = -EINVAL;
+			goto exit;
+		}
+		src = mic_ctx->aper.va + offset;
+
+		temp = copy_to_user(data, src, len);
+		if (temp > 0) {
+			printk("error while copy to user \n");
+			status = -EFAULT;
+			goto exit;
+		}
+		break;
+
+	case FLASH_CMD_ABORT:
+
+		scratch1reg.bits.command = FLASH_CMD_ABORT;
+		SBOX_WRITE(scratch1reg.value, mmio_va, SBOX_SCRATCH1);
+
+		mic_send_bootstrap_intr(mic_ctx);
+		break;
+
+	case FLASH_CMD_VERSION:
+
+		/*
+		 * image address = the upper 20 bits of the 32-bit of scracth2 register
+		 * is card side physical address where the flash image resides
+		 */
+		scratch2reg.bits.image_addr = RASMM_DEFAULT_OFFSET >> 12;
+		SBOX_WRITE(scratch2reg.value, mmio_va, SBOX_SCRATCH2);
+
+		/*
+		 * flash version command : similar to read_data command.
+		 * Instead of get_user_pages(), use kmalloc() as we are allocating
+		 * buffer of lesser size
+		 */
+		scratch1reg.bits.command = FLASH_CMD_VERSION;
+		SBOX_WRITE(scratch1reg.value, mmio_va, SBOX_SCRATCH1);
+
+		mic_send_bootstrap_intr(mic_ctx);
+
+		/* poll for completion */
+		while(scratch1reg.bits.status != FLASH_CMD_COMPLETED) {
+			ret = SBOX_READ(mmio_va, SBOX_SCRATCH1);
+			scratch1reg.value = ret;
+			msleep(1);
+			i++;
+			printk("Looping for status (time = %d ms)\n", i);
+			if(i > 3000) {
+				status = -ETIME;
+				goto exit;
+			}
+
+		}
+
+		src = mic_ctx->aper.va + RASMM_DEFAULT_OFFSET;
+
+		if (len == 0) {
+			status = -EINVAL;
+			goto exit;
+		}
+		verbuf = kmalloc(len, GFP_KERNEL);
+		if (!verbuf) {
+			status = -ENOMEM;
+			goto exit;
+		}
+
+		memcpy(verbuf, src, len);
+
+		printk("header verbuf is : %x\n", verbuf->hdr_ver);
+		printk("odm verbuf is : %x\n", verbuf->odm_ver);
+		printk("uptd time bcd is : %llu\n", verbuf->upd_time_bcd);
+		printk("updated verbuf is : %d\n", *((int*)(&verbuf->upd_ver)));
+		printk("mfg time bcd is : %llu\n", verbuf->mfg_time_bcd);
+		printk("mfg verbuf is : %d\n", *((int*)(&verbuf->mfg_ver)));
+
+		temp = copy_to_user(data, verbuf, len);
+		if(temp > 0) {
+			printk("error while copy to user \n");
+			status = -EFAULT;
+			if(verbuf) {
+				kfree(verbuf);
+			}
+			goto exit;
+		}
+
+		if(verbuf) {
+			kfree(verbuf);
+		}
+
+		break;
+
+	case FLASH_CMD_WRITE:
+
+		/* flash write command : pin user pages for the data buffer which contains
+		 * the image.
+		 * For the write command, we provide the offset for writing.
+		 * GTT is set to 64MB and offset = 0.
+		 */
+		if (len > (mic_ctx->aper.len - RASMM_DEFAULT_OFFSET)) {
+			status = -EINVAL;
+			goto exit;
+		}
+		src = mic_ctx->aper.va + RASMM_DEFAULT_OFFSET;
+		if (len == 0) {
+			status = -EINVAL;
+			goto exit;
+		}
+		temp = copy_from_user(src, data, len);
+		if (temp > 0) {
+			printk("error while copying from user \n");
+			status = -EFAULT;
+			goto exit;
+		}
+
+		/* image address = the upper 20 bits of the 32-bit KNC side physical
+		 * address where the flash image resides
+		 */
+		scratch2reg.bits.image_addr = RASMM_DEFAULT_OFFSET >> 12;
+		SBOX_WRITE(scratch2reg.value, mmio_va, SBOX_SCRATCH2);
+
+		scratch1reg.bits.command = FLASH_CMD_WRITE;
+		SBOX_WRITE(scratch1reg.value, mmio_va, SBOX_SCRATCH1);
+
+		mic_send_bootstrap_intr(mic_ctx);
+		;
+
+	break;
+
+	case RAS_CMD_CORE_DISABLE:
+	case RAS_CMD_CORE_ENABLE:
+		if (copy_from_user(&cmddata, data, sizeof(cmddata))) {
+			status = -EFAULT;
+			goto exit;
+		}
+		scratch1reg.bits.cmd_data = cmddata;
+		if (cmddata > MAX_CORE_INDEX) {
+			printk("Parameter given is greater than physical core index\n");
+			status = -EINVAL;
+			goto exit;
+		}
+
+	case RAS_CMD:
+	case RAS_CMD_INJECT_REPAIR:
+	case RAS_CMD_ECC_DISABLE:
+	case RAS_CMD_ECC_ENABLE:
+	case RAS_CMD_EXIT:
+		do_gettimeofday(&t);
+		SBOX_WRITE(t.tv_sec, mmio_va, SBOX_SCRATCH3);
+		scratch1reg.bits.command = type;
+		SBOX_WRITE(scratch1reg.value, mmio_va, SBOX_SCRATCH1);
+
+		mic_send_bootstrap_intr(mic_ctx);
+
+	break;
+
+	case FLASH_CMD_STATUS:
+
+		/* status command : mmio read of SCRATCH1 register
+		 * The percentage completion is only updated on the
+		 * Flash Write function as currently implemented.
+		 * The other functions are expected to complete almost instantly
+		 */
+		if(len != sizeof(struct flash_stat)) {
+			status = -EINVAL;
+			goto exit;
+		}
+		if (len == 0) {
+			status = -EINVAL;
+			goto exit;
+		}
+		statbuf = kmalloc(len, GFP_KERNEL);
+		if(!statbuf) {
+			status = -ENOMEM;
+			goto exit;
+		}
+
+		temp = SBOX_READ(mmio_va, SBOX_SCRATCH1);
+		scratch1reg.value = (uint32_t)temp;
+
+		statbuf->status = scratch1reg.bits.status;
+		statbuf->percent = scratch1reg.bits.percent;
+		statbuf->smc_status = scratch1reg.bits.smc_status;
+		statbuf->cmd_data = scratch1reg.bits.cmd_data;
+		statbuf->mm_debug = scratch1reg.bits.mm_debug;
+
+		temp = copy_to_user(data, statbuf, len);
+		if(temp > 0) {
+			printk("Error copying data to user buffer\n");
+			status = -EFAULT;
+			if(statbuf) {
+				kfree(statbuf);
+			}
+			goto exit;
+		}
+
+		if(statbuf) {
+			kfree(statbuf);
+		}
+
+	break;
+
+	default:
+		printk(KERN_ERR "Unknown command\n");
+		status = -EOPNOTSUPP;
+	break;
+
+	}
+
+	exit :
+	return status;
+}
+
+int get_cardside_mem(mic_ctx_t *mic_ctx, uint64_t start, uint64_t size, void *dest)
+{
+	int32_t status = 0;
+	uint64_t len;
+	uint64_t dest_pa;
+	struct dma_channel *ch = NULL;
+	int flags = 0;
+	int poll_cookie;
+	int i, next_page;
+	int j;
+	uint64_t num_pages;
+	uint64_t card_pa;
+	int32_t nf_pages = 0;
+	uint64_t nr_pages = 0;
+	struct page **pages = NULL;
+	void *pg_virt_add;
+	unsigned long t = jiffies;
+	int dma_ret = 0;
+	card_pa = start;
+	len = size;
+
+	if (len % PAGE_SIZE)
+		nr_pages = (len >> PAGE_SHIFT) + 1;
+	else
+		nr_pages = len >> PAGE_SHIFT;
+
+	flags |= DO_DMA_POLLING;
+	num_pages = len / PAGE_SIZE;
+	next_page = 0;
+
+	pages = kmalloc(nr_pages * sizeof(struct page*), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+	status = mic_pin_user_pages(dest, pages, (uint32_t)len, &nf_pages, (int32_t)nr_pages);
+
+	if (status)
+		goto exit;
+
+	/* allocate_dma_channel should fail in 2 cases : 1. if it doesnt get dma channel
+	 * then it times out 2. there is no device present
+	 */
+	status = micpm_get_reference(mic_ctx, true);
+	if (status)
+		goto exit;
+
+	while ((dma_ret = allocate_dma_channel(mic_ctx->dma_handle, &ch)) != 0) {
+			if (dma_ret == -ENODEV) {
+				printk("No device present\n");
+				status = -ENODEV;
+				goto put_ref;
+			}
+			msleep(1);
+			if (time_after(jiffies,t + NODE_ALIVE_TIMEOUT)) {
+				printk("dma channel allocation error\n");
+				status = -EBUSY;
+				goto put_ref;
+			}
+	}
+
+	for(j = 0; j < num_pages; j++) {
+		i = 0;
+		pg_virt_add = lowmem_page_address(pages[j]);
+		/* get card side address */
+		dest_pa = mic_ctx_map_single(mic_ctx, pg_virt_add, PAGE_SIZE);
+
+		/* do dma and keep polling for completion */
+		poll_cookie = do_dma(ch, flags, card_pa + next_page, dest_pa, PAGE_SIZE, NULL);
+		pr_debug("Poll cookie %d\n", poll_cookie);
+		if (0 > poll_cookie) {
+			printk("Error programming the dma descriptor\n");
+			status = poll_cookie;
+			goto put_ref;
+		} else if (-2 == poll_cookie) {
+			printk( "Copy was done successfully, check for validity\n");
+		} else if(-1 != poll_cookie) {
+			while (i < 10000 && 1 != poll_dma_completion(poll_cookie, ch)) {
+				i++;
+			}
+			if (i == 10000) {
+				printk("DMA timed out \n");
+			} else {
+				pr_debug("DMA SUCCESS at %d\n", i);
+				/* increment by PAGE_SIZE on DMA SUCCESS to transfer next page */
+				next_page = next_page + PAGE_SIZE;
+			}
+		}
+		mic_ctx_unmap_single(mic_ctx, (dma_addr_t)dest_pa, PAGE_SIZE);
+	}
+
+put_ref:
+	micpm_put_reference(mic_ctx);
+exit:
+	mic_unpin_user_pages(pages, nf_pages);
+	if (ch)
+		free_dma_channel(ch);
+	return status;
+}
+
+/* SKU functions */
+void
+sku_swap_list(struct list_head *in, struct list_head *out)
+{
+	struct list_head *pos, *tmp;
+	sku_info_t *node;
+	list_for_each_safe(pos, tmp, in) {
+		node = list_entry(pos, sku_info_t, sku);
+		list_del(pos);
+		list_add_tail(&node->sku, out);
+	}
+}
+
+int
+sku_create_node(uint32_t fuserev_low,
+		uint32_t fuserev_high, uint32_t mem_size,
+		uint32_t mem_freq, char *sku_name,
+		sku_info_t ** newnode)
+{
+	sku_info_t *temp;
+
+	temp = kmalloc(sizeof(sku_info_t), GFP_KERNEL);
+	if (temp == NULL)
+		return -ENOMEM;
+	temp->fuserev_low = fuserev_low;
+	temp->fuserev_high = fuserev_high;
+	temp->memsize = mem_size;
+	temp->memfreq = mem_freq;
+	strncpy(temp->sku_name, sku_name, SKU_NAME_LEN - 1);
+	temp->sku_name[SKU_NAME_LEN - 1] = '\0';
+	*newnode = temp;
+	return 0;
+}
+
+void
+sku_destroy_table()
+{
+	int i;
+	sku_info_t *node;
+	struct list_head *pos, *tmp;
+	for (i = 0; i < MAX_DEV_IDS; i++)
+		list_for_each_safe(pos, tmp, &mic_data.sku_table[i]) {
+			node = list_entry(pos, sku_info_t, sku);
+			list_del(pos);
+			kfree(node);
+		}
+}
+
+int
+sku_find(mic_ctx_t *mic_ctx, uint32_t device_id)
+{
+	int ret = 0;
+	uint32_t cnt = 0;
+	sku_info_t *match, *newnode = NULL, *skunode;
+	struct list_head skulist_memsize_in;
+	struct list_head skulist_memfreq_in;
+	struct list_head skulist_out;
+	uint32_t fuse_rev, memsize, memfreq;
+	struct list_head *pos, *tmp;
+	const char *invalid = "INVALID SKU";
+
+	/* Use the LSB as index to the array of pointers to the SKU table*/
+	device_id = device_id & 0xf;
+
+	if (device_id > MAX_DEV_IDS) {
+		strncpy(mic_ctx->sku_name, invalid, SKU_NAME_LEN - 1);
+		mic_ctx->sku_name[SKU_NAME_LEN - 1] = '\0';
+		return -EINVAL;
+	}
+
+	INIT_LIST_HEAD(&skulist_memsize_in);
+	INIT_LIST_HEAD(&skulist_memfreq_in);
+	INIT_LIST_HEAD(&skulist_out);
+
+	/* Search by fuse_config_rev */
+	fuse_rev = SBOX_READ(mic_ctx->mmio.va, SBOX_SCRATCH7);
+	fuse_rev = (fuse_rev >> SHIFT_FUSE_CONFIG_REV) & MASK_FUSE_CONFIG_REV;
+
+	list_for_each_safe(pos, tmp, &mic_data.sku_table[device_id]) {
+		match = list_entry(pos, sku_info_t, sku);
+		if ((match->fuserev_low <= fuse_rev) && (match->fuserev_high >= fuse_rev)) {
+			cnt++;
+			ret = sku_create_node(match->fuserev_low, match->fuserev_high,
+					match->memsize, match->memfreq, match->sku_name, &newnode);
+			if (ret) {
+				strncpy(mic_ctx->sku_name, invalid, SKU_NAME_LEN - 1);
+				mic_ctx->sku_name[SKU_NAME_LEN - 1] = '\0';
+				goto cleanup;
+			}
+			list_add_tail(&newnode->sku, &skulist_out);
+		}
+	}
+	/* If only one node is present, the match has been found */
+	if (cnt == 1) {
+		strncpy(mic_ctx->sku_name, newnode->sku_name, SKU_NAME_LEN - 1);
+		mic_ctx->sku_name[SKU_NAME_LEN - 1] = '\0';
+		goto cleanup;
+	}
+
+	sku_swap_list(&skulist_out, &skulist_memsize_in);
+	/* Search by memsize */
+	memsize = SBOX_READ(mic_ctx->mmio.va, SBOX_SCRATCH0);
+	memsize = (memsize >> SHIFT_MEMSIZE) & MASK_MEMSIZE;
+	memsize = memsize >> 20;
+		if (memsize > SKU_MEM_DIVIDE)
+			memsize = SKU_HIGH_MEM;
+		else
+			memsize = SKU_LOW_MEM;
+
+	cnt = 0;
+	list_for_each_safe(pos, tmp, &skulist_memsize_in) {
+		match = list_entry(pos, sku_info_t, sku);
+		/* Use the MSB for comparison */
+		/* Assumption - From the latest documentation, a particular
+		 * combination of device id and fuse_rev can either have memory
+		 * <=4GB (SKU_LOW_MEM) or > 4GB (SKU_HIGH_MEM)
+		 */
+		if (memsize == match->memsize) {
+			cnt++;
+			ret = sku_create_node(match->fuserev_low, match->fuserev_high,
+					match->memsize, match->memfreq, match->sku_name, &newnode);
+			if (ret) {
+				strncpy(mic_ctx->sku_name, invalid, SKU_NAME_LEN - 1);
+				mic_ctx->sku_name[SKU_NAME_LEN - 1] = '\0';
+				goto cleanup;
+			}
+			list_add_tail(&newnode->sku, &skulist_out);
+		}
+
+	}
+	list_for_each_safe(pos, tmp, &skulist_memsize_in) {
+		skunode = list_entry(pos, sku_info_t, sku);
+		list_del(pos);
+		kfree(skunode);
+	}
+	if (cnt == 1) {
+		strncpy(mic_ctx->sku_name, newnode->sku_name, SKU_NAME_LEN - 1);
+		mic_ctx->sku_name[SKU_NAME_LEN - 1] = '\0';
+		goto cleanup;
+	}
+
+	sku_swap_list(&skulist_out, &skulist_memfreq_in);
+	/* Search by memfreq */
+	memfreq = SBOX_READ(mic_ctx->mmio.va, SBOX_SCRATCH9);
+	memfreq = (memfreq >> SHIFT_MEMFREQ) & MASK_MEMFREQ;
+
+	cnt = 0;
+	list_for_each_safe(pos, tmp, &skulist_memfreq_in) {
+		match = list_entry(pos, sku_info_t, sku);
+		if (memfreq == match->memfreq) {
+			cnt++;
+			ret = sku_create_node(match->fuserev_low, match->fuserev_high,
+					match->memsize, match->memfreq, match->sku_name, &newnode);
+			if (ret) {
+				strncpy(mic_ctx->sku_name, invalid, SKU_NAME_LEN - 1);
+				mic_ctx->sku_name[SKU_NAME_LEN - 1] = '\0';
+				goto cleanup;
+			}
+			list_add_tail(&newnode->sku, &skulist_out);
+		}
+
+	}
+	list_for_each_safe(pos, tmp, &skulist_memfreq_in) {
+		skunode = list_entry(pos, sku_info_t, sku);
+		list_del(pos);
+		kfree(skunode);
+	}
+	if (cnt == 1) {
+		strncpy(mic_ctx->sku_name, newnode->sku_name, SKU_NAME_LEN - 1);
+		mic_ctx->sku_name[SKU_NAME_LEN - 1] = '\0';
+	} else {
+		strncpy(mic_ctx->sku_name, invalid, SKU_NAME_LEN - 1);
+		mic_ctx->sku_name[SKU_NAME_LEN - 1] = '\0';
+	}
+
+
+cleanup:
+	list_for_each_safe(pos, tmp, &skulist_out) {
+		skunode = list_entry(pos, sku_info_t, sku);
+		list_del(pos);
+		kfree(skunode);
+	}
+
+	return ret;
+}
+
+
+int
+sku_build_table(void)
+{
+	int i = 0;
+	sku_info_t *newnode = NULL;
+
+	for ( i = 0; i < MAX_DEV_IDS; i++)
+		INIT_LIST_HEAD(&mic_data.sku_table[i]);
+
+	/*2250*/
+	if (sku_create_node(0, 1, SKU_LOW_MEM, FREQ_2P4, "A0PO-SKU1", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(0, 1, SKU_HIGH_MEM, FREQ_2P4, "A0PO-SKU1", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(2, 3, SKU_LOW_MEM, FREQ_4P5,"ES1-SKU2", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(2, 3, SKU_HIGH_MEM, FREQ_4P5, "ES1-SKU2", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(4, 49, SKU_HIGH_MEM, FREQ_4P5, "ES1B-SKU2", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(50, 100, SKU_HIGH_MEM, FREQ_4P5, "B0PO-SKU2", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(101, 150, SKU_HIGH_MEM, FREQ_5P0, "ES2-P1640", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(153, 154, SKU_HIGH_MEM, FREQ_5P0, "B1PO-5110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(151, 152, SKU_HIGH_MEM, FREQ_5P0, "B1PO-P1640/D1650", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(156, 156, SKU_HIGH_MEM, FREQ_5P0, "B1PRQ-5110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(155, 155, SKU_HIGH_MEM, FREQ_5P0, "B1QS-5110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(157, 157, SKU_HIGH_MEM, FREQ_5P0, "B1PRQ-5110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(158, 250, SKU_HIGH_MEM, FREQ_5P0, "B1PRQ-5110P/5120D", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(251, 253, SKU_HIGH_MEM, FREQ_5P0, "C0-5110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(254, 255, SKU_HIGH_MEM, FREQ_5P0, "C0QS-5110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(251, 253, SKU_HIGH_MEM, FREQ_5P5, "C0-5120D", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(254, 255, SKU_HIGH_MEM, FREQ_5P5, "C0QS-5120D", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(256, 350, SKU_HIGH_MEM, FREQ_5P0, "C0PRQ-5110P/5140P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	if (sku_create_node(256, 350, SKU_HIGH_MEM, FREQ_5P5, "C0PRQ-5120D/5140D", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[0]);
+
+	/*2251*/
+	if (sku_create_node(0, 1, SKU_LOW_MEM, FREQ_2P4, "A0PO-SKU2", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[1]);
+
+	if (sku_create_node(0, 1, SKU_HIGH_MEM, FREQ_2P4, "A0PO-SKU2", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[1]);
+
+	/*2252*/
+	if (sku_create_node(0, 1, SKU_LOW_MEM, FREQ_2P4, "A0PO-SKU3", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[2]);
+
+	/*2253*/
+	if (sku_create_node(0, 1, SKU_LOW_MEM, FREQ_2P4, "A0PO-SKU4", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[3]);
+
+	if (sku_create_node(2, 3, SKU_LOW_MEM, FREQ_2P4, "ES1-SKU5", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[3]);
+
+	if (sku_create_node(4, 49, SKU_LOW_MEM, FREQ_2P4, "ES1B-SKU5", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[3]);
+
+	if (sku_create_node(50, 100, SKU_LOW_MEM, FREQ_4P5, "B0PO-SKU5", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[3]);
+
+	/*2254*/
+
+	/*2255*/
+	if (sku_create_node(0, 1, SKU_LOW_MEM, FREQ_2P4, "A0PO-SKUX", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[5]);
+
+	/*2256*/
+	if (sku_create_node(0, 1, SKU_LOW_MEM, FREQ_2P4, "A0PO-SKU5", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[6]);
+
+	/*2257*/
+	if (sku_create_node(0, 1, SKU_LOW_MEM, FREQ_2P4, "A0PO-SKUZ", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[7]);
+
+	/*2258*/
+	if (sku_create_node(2, 3, SKU_LOW_MEM, FREQ_4P5, "ES1-SKU1", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[8]);
+	if (sku_create_node(2, 3, SKU_HIGH_MEM, FREQ_4P5, "ES1-SKU1", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[8]);
+	if (sku_create_node(4, 49, SKU_HIGH_MEM, FREQ_5P5, "ES1B-SKU1", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[8]);
+	if (sku_create_node(50, 100, SKU_HIGH_MEM, FREQ_5P5, "B0PO-SKU1", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[8]);
+
+	/*2259*/
+	if (sku_create_node(2, 3, SKU_LOW_MEM, FREQ_4P5, "ES1-SKU3", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[9]);
+
+	if (sku_create_node(2, 3, SKU_HIGH_MEM, FREQ_4P5, "ES1-SKU3", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[9]);
+
+	/*225A*/
+	if (sku_create_node(2, 3, SKU_LOW_MEM, FREQ_4P5, "ES1-SKU4", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[10]);
+
+	if (sku_create_node(4, 49, SKU_LOW_MEM, FREQ_5P0, "ES1B-SKU4", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[10]);
+
+	if (sku_create_node(50, 100, SKU_LOW_MEM, FREQ_5P0, "B0PO-SKU4", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[10]);
+
+	if (sku_create_node(101, 150, SKU_LOW_MEM, FREQ_5P0, "ES2-SKU4", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[10]);
+
+	/*225B*/
+	if (sku_create_node(4, 49, SKU_HIGH_MEM, FREQ_5P5, "ES1B-SKU3cs", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[11]);
+
+	if (sku_create_node(4, 49, SKU_LOW_MEM, FREQ_5P5, "ES1B-SKU3ncs", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[11]);
+
+	if (sku_create_node(50, 100, SKU_HIGH_MEM, FREQ_5P5, "B0PO-SKU3cs", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[11]);
+
+	if (sku_create_node(50, 100, SKU_LOW_MEM, FREQ_5P5, "B0PO-SKU3ncs", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[11]);
+
+	/*225C*/
+	if (sku_create_node(101, 150, SKU_HIGH_MEM, FREQ_5P5, "ES2-P/A/X 1750", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(153, 154, SKU_HIGH_MEM, FREQ_5P5, "B1PO-7110 P/A/X", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(155, 155, SKU_HIGH_MEM, FREQ_5P5, "B1QS-7110 P/A/X", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(151, 152, SKU_HIGH_MEM, FREQ_5P0, "B1PO-P/A 1750", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(156, 156, SKU_HIGH_MEM, FREQ_5P5, "B1PRQ-7110 P/A/X", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(157, 157, SKU_HIGH_MEM, FREQ_5P5, "B1PRQ-7110 P/A/X", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(158, 202, SKU_HIGH_MEM, FREQ_5P5, "B1PRQ-7110 P/X", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(203, 250, SKU_HIGH_MEM, FREQ_5P5, "B1PRQ-SE10 P/X", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(251, 253, SKU_HIGH_MEM, FREQ_5P5, "C0-7120 P/A/X/D", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(254, 255, SKU_HIGH_MEM, FREQ_5P5, "C0QS-7120 P/A/X/D", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	if (sku_create_node(256, 350, SKU_HIGH_MEM, FREQ_5P5, "C0PRQ-7120 P/A/X/D", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[12]);
+
+	/*225D*/
+	if (sku_create_node(101, 150, SKU_LOW_MEM, FREQ_5P0, "ES2-P1310", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(101, 150, SKU_HIGH_MEM, FREQ_5P0, "ES2-A1330", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(153, 154, SKU_LOW_MEM, FREQ_5P0, "B1PO-3110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(153, 154, SKU_HIGH_MEM, FREQ_5P0, "B1PO-3115A", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(157, 157, SKU_LOW_MEM, FREQ_5P0, "B1PRQ-3110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(157, 157, SKU_HIGH_MEM, FREQ_5P0, "B1PRQ-3115A", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(156, 156, SKU_LOW_MEM, FREQ_5P0, "B1PRQ-3110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(156, 156, SKU_HIGH_MEM, FREQ_5P0, "B1PRQ-3115A", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(155, 155, SKU_HIGH_MEM, FREQ_5P0, "B1QS-3115A", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(155, 155, SKU_LOW_MEM, FREQ_5P0, "B1QS-3110P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(158, 250, SKU_HIGH_MEM, FREQ_5P0, "B1PRQ-3120P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(251, 253, SKU_HIGH_MEM, FREQ_5P0, "C0-3120 P/A", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(254, 255, SKU_HIGH_MEM, FREQ_5P0, "C0QS-3120 P/A", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	if (sku_create_node(256, 350, SKU_HIGH_MEM, FREQ_5P0, "C0PRQ-3120/3140 P/A", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[13]);
+
+	/*225E*/
+	if (sku_create_node(157, 157, SKU_HIGH_MEM, FREQ_5P0, "B1PRQ-31S1P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[14]);
+
+	if (sku_create_node(158, 250, SKU_HIGH_MEM, FREQ_5P0, "B1PRQ-31S1P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[14]);
+
+	if (sku_create_node(251, 253, SKU_HIGH_MEM, FREQ_5P0, "C0-31S1P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[14]);
+
+	if (sku_create_node(254, 255, SKU_HIGH_MEM, FREQ_5P0, "C0QS-31S1P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[14]);
+
+	if (sku_create_node(256, 350, SKU_HIGH_MEM, FREQ_5P0, "C0PRQ-31S1P", &newnode))
+        return -ENOMEM;
+	list_add_tail(&newnode->sku, &mic_data.sku_table[14]);
+
+    return 0; // Successed
+}
diff --git a/host/uos_download.c b/host/uos_download.c
new file mode 100644
index 0000000..6a323c7
--- /dev/null
+++ b/host/uos_download.c
@@ -0,0 +1,1950 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* contains code to download uos on MIC card */
+
+#include "mic_common.h"
+#include <mic/ringbuffer.h>
+#include "micint.h"
+#include <linux/virtio_ring.h>
+#include <linux/virtio_blk.h>
+#include "mic/mic_virtio.h"
+#include <linux/proc_fs.h>
+#include "mic/micveth.h"
+
+
+#define APERTURE_SEGMENT_SIZE   ((1) * 1024 * 1024 * 1024ULL)
+
+#define UOS_RESERVE_SIZE_MIN	((128) * 1024 * 1024)
+#define OS_RESERVE_SIZE_MIN	((32) * 1024 * 1024)
+#define UOS_RESERVE_SIZE_MAX	(((4) * 1024 * 1024 * 1024ULL) - ((4) * 1024))
+#define UOS_RESERVE_PERCENT	50
+
+#define UOS_WATCHDOG_TIMEOUT	5000 // default watchdog timeout in milliseconds
+
+#define PCIE_CLASS_CODE(x)			((x) >> 24 )
+
+/* zombie class code as per the HAS is 0xFF
+ * but on KNC, we found it as 0x03
+ */
+#define ZOMBIE_CLASS_CODE			0x03
+#define DISABLE_BAR					0x02
+#define RESET_FAILED_F2		12870
+#define RESET_FAILED_F4		13382
+
+void ramoops_remove(mic_ctx_t *mic_ctx);
+
+static struct proc_dir_entry *ramoops_dir;
+struct proc_dir_entry *vmcore_dir;
+
+
+static void adapter_dpc(unsigned long dpc);
+extern int mic_vhost_blk_probe(bd_info_t *bd_info);
+extern void mic_vhost_blk_remove(bd_info_t *bd_info);
+
+/* driver wide global common data */
+mic_data_t mic_data;
+extern int usagemode_param;
+extern bool mic_crash_dump_enabled;
+extern bool mic_watchdog_auto_reboot;
+
+static int64_t etc_comp = 0;
+
+static uint64_t
+etc_read(uint8_t *mmio_va)
+{
+	uint32_t low;
+	uint32_t hi1,hi2;
+
+	do {
+		hi1 = SBOX_READ(mmio_va, SBOX_ELAPSED_TIME_HIGH);
+		low = SBOX_READ(mmio_va, SBOX_ELAPSED_TIME_LOW);
+		hi2 = SBOX_READ(mmio_va, SBOX_ELAPSED_TIME_HIGH);
+	} while(hi1 != hi2);
+
+	return((uint64_t)((((uint64_t)hi1 << 32)  | low) >> 5));
+}
+
+static int64_t
+calc_deltaf(mic_ctx_t *mic_ctx)
+{
+	const int64_t ETC_CLK_FREQ = 15625000;
+	const uint32_t TIME_DELAY_IN_SEC = 10;
+	const int64_t etc_cnt1 = ETC_CLK_FREQ * TIME_DELAY_IN_SEC;
+	int64_t etc_cnt2;
+
+	uint64_t cnt1, cnt2;
+	int64_t deltaf_in_ppm, deltaf;
+
+	/*
+	 * (etc_freq2 / etc_freq1) = (etc_count2 / etc_count1)
+	 * etc_freq1 = ETC_CLK_FREQ
+	 * => etc_count1 = TIME_DELAY_IN_SEC * ETC_CLK_FREQ
+	 * (etc_freq2 / etc_freq1) = (etc_count2 / etc_count1)
+	 * etc_freq2 = etc_freq1 * (etc_count2 / etc_count1)
+	 * etc_freq2 - etc_freq1 = etc_freq1((etc_count2 / etc_count1) - 1)
+	 * deltaf = etc_freq1(etc_count2 - etc_count1)/etc_count1
+	 * deltaf_in_ppm = deltaf * 10 ^ 6 / etc_freq1
+	 * deltaf_in_ppm = ((etc_count2 - etc_count1) * 10 ^ 6) / etc_count1
+	 */
+	/* Need to implement the monotonic/irqsave logic for windows */
+	unsigned long flags;
+	struct timespec ts1, ts2;
+	int64_t mono_ns;
+	int i = 0;
+	do {
+		local_irq_save(flags);
+		cnt1 = etc_read(mic_ctx->mmio.va);
+		getrawmonotonic(&ts1);
+		local_irq_restore(flags);
+		mdelay(TIME_DELAY_IN_SEC * 1000);
+		local_irq_save(flags);
+		cnt2 = etc_read(mic_ctx->mmio.va);
+		getrawmonotonic(&ts2);
+		local_irq_restore(flags);
+		etc_cnt2 = cnt2 - cnt1;
+		ts2 = timespec_sub(ts2, ts1);
+		mono_ns = timespec_to_ns(&ts2);
+		/* Recalculate etc_cnt2 based on getrawmonotonic */
+		etc_cnt2 = (etc_cnt2 * TIME_DELAY_IN_SEC * 1000 * 1000 * 1000) / mono_ns;
+		deltaf = ( ETC_CLK_FREQ * (etc_cnt2 - etc_cnt1)) / etc_cnt1;
+		deltaf_in_ppm = (1000 * 1000 * (etc_cnt2 - etc_cnt1)) / etc_cnt1;
+		i++;
+		/*
+		 * HSD #4844900
+		 * On some of the systems deltaf_in_ppm is turning out
+		 * way higher than expected. The only reasons I can think of
+		 * are:
+		 * i) mmio traffic cauing variable delays for mmio read
+		 * ii) NMIs affecting this code
+		 */
+	} while (i < 10 && (deltaf_in_ppm > 2700 || deltaf_in_ppm < -2700));
+
+	pr_debug("etc deltaf: %lld\n", deltaf);
+	/*
+	 * For intel chipsets, Spread Spectrum Clocking (SSC) (in the limit)
+	 * is downspread with a frequency of 30hz and an amplitude of 0.5%
+	 * which translates to 2500ppm. This is also the ppm observed on KNC + CrownPass
+	 * Hence, if ppm > 2500, the code would need to retry to eliminate any chance of error
+	 * Added an error margin of 1ppm (etc mmio reads can take really long time)
+	 */
+	if (deltaf_in_ppm > 2700 || deltaf_in_ppm < -2700) {
+		printk(KERN_ERR "ETC timer compensation(%lldppm) is much higher"
+					"than expected\n", deltaf_in_ppm);
+		/*
+		 * HSD #4844900
+		 * Clamp etc compensation to 2500ppm
+		 */
+		if (deltaf_in_ppm > 2700)
+			deltaf_in_ppm = 2500;
+		else
+			deltaf_in_ppm = -2500;
+		deltaf = (ETC_CLK_FREQ * deltaf_in_ppm) / (1000 * 1000);
+	}
+	if (deltaf > 0 && deltaf <= 10)
+		deltaf = 0;
+	return deltaf;
+}
+
+void
+calculate_etc_compensation(mic_ctx_t *mic_ctx)
+{
+	if (mic_ctx->bi_family == FAMILY_KNC) {
+		if (!etc_comp)
+			etc_comp = calc_deltaf(mic_ctx);
+		mic_ctx->etc_comp = etc_comp;
+	}
+}
+
+/*
+  DESCRIPTION:: waits for bootstrap loader is finished
+  PARAMETERS::
+	[in]void *mmio_va - virtual address to access MMIO registers
+  RETURN_VALUE:: 0 if successful, non-zero if failure
+*/
+int
+wait_for_bootstrap(uint8_t *mmio_va)
+{
+	uint32_t scratch2 = 0;
+	int count = 0;
+#ifdef MIC_IS_EMULATION
+	int wait_time = 0;
+#endif
+
+	// Wait until the boot loader is finished
+	while (!SCRATCH2_DOWNLOAD_STATUS(scratch2)) {
+		msleep(100);
+		if (count == 600) {
+#ifndef MIC_IS_EMULATION
+			printk("Firmware is not responding with ready bit\n");
+			return -EIO;
+#else
+			/* We don't want to be polling too often on the emulator, it is SLOW! */
+			pr_debug("Wait for bootstrap: %d min(s) \n", wait_time++);
+			count = 0;
+#endif
+		}
+
+		count++;
+		scratch2 = SBOX_READ(mmio_va, SBOX_SCRATCH2);
+	}
+
+	return 0;
+}
+
+/*
+  DESCRIPTION::gets adapter memory size. calculates size based on scratch register 0
+  PARAMETERS::
+	[in]void *mmio_va - virtual address to access MMIO registers
+	[out]uint32_t *adapter_mem_size - adapter memory size
+  RETURN_VALUE:: none
+*/
+void
+get_adapter_memsize(uint8_t *mmio_va, uint32_t *adapter_mem_size)
+{
+	uint32_t memsize = 0;
+	uint32_t scratch0 = {0};
+
+	scratch0 = SBOX_READ(mmio_va, SBOX_SCRATCH0);
+	memsize = SCRATCH0_MEM_SIZE_KB(scratch0) * ((1) * 1024);
+
+	// Adjust the memory size based on the memory usage
+	switch (SCRATCH0_MEM_USAGE(scratch0)) {
+	case SCR0_MEM_ALL:
+		// Do nothing
+		break;
+
+	case SCR0_MEM_HALF:
+		memsize /= 2;
+		break;
+
+	case SCR0_MEM_THIRD:
+		memsize /= 3;
+		break;
+
+	case SCR0_MEM_FOURTH:
+		memsize /= 4;
+		break;
+
+	default:
+		// DBG_ASSERT_MSG(false, "Invalid memory usage specified by the bootstrap.\n");
+		break;
+	}
+
+	*adapter_mem_size = memsize;
+}
+
+/*
+  DESCRIPTION:: gets uos load offset from scratch register 2
+  PARAMETERS::
+	[in]void *mmio_va - virtual address to access MMIO registers
+	[out]uint32_t *uos_load_offset - offset at which uos will be loaded
+  RETURN_VALUE:: none
+*/
+void
+get_uos_loadoffset(uint8_t *mmio_va, uint32_t *uos_load_offset)
+{
+	uint32_t scratch2 = 0;
+
+	scratch2 = SBOX_READ(mmio_va, SBOX_SCRATCH2);
+	*uos_load_offset = SCRATCH2_DOWNLOAD_ADDR(scratch2);
+}
+
+/*
+  DESCRIPTION:: gets reserved size for uos
+  PARAMETERS::
+	[out]uint32_t *uos_reserve_size - reserved uos size
+  RETURN_VALUE:: none
+*/
+void
+get_uos_reserved_size(uint8_t* mmio_va, uint32_t adapter_memsize, uint32_t *uos_reserve_size)
+{
+	uint32_t reserve_size = 0;
+
+	// Only calculate if not explicitly specified by the user
+	reserve_size = (uint32_t)(adapter_memsize * UOS_RESERVE_PERCENT / 100);
+
+	// Make sure there is at least WINDOWS_RESERVE_SIZE_MIN bytes
+	reserve_size = GET_MIN(reserve_size, adapter_memsize - OS_RESERVE_SIZE_MIN);
+
+	// Keep in mind maximum uos reserve size is uint32_t, so we never overflow
+	reserve_size = GET_MIN(reserve_size, UOS_RESERVE_SIZE_MAX);
+	reserve_size = GET_MAX(reserve_size, UOS_RESERVE_SIZE_MIN);
+
+	// Always align uos reserve size to a page
+	reserve_size = (uint32_t)AlignLow(reserve_size, ((4) * 1024));
+
+	*uos_reserve_size = reserve_size;
+}
+
+/*
+  DESCRIPTION:: gets APIC ID from scratch register 2
+  PARAMETERS::
+	[in]void *mmio_va - virtual address to access MMIO registers
+	[out]uint32_t *apic_id - apic id
+  RETURN_VALUE:: none
+*/
+void
+get_apic_id(uint8_t *mmio_va, uint32_t *apic_id)
+{
+	uint32_t scratch2 = 0;
+
+	scratch2 = SBOX_READ(mmio_va, SBOX_SCRATCH2);
+	*apic_id = SCRATCH2_APIC_ID(scratch2);
+}
+
+/*
+  DESCRIPTION::program the PCI aperture as a contiguous window. (only supports upto 4GB memory)
+  PARAMETERS::
+	[in]mic_ctx_t *mic_ctx - mic ctx
+	[in]int gtt_index - beginning gtt entry index
+	[in]uint64_t phy_addr - physical address for PCI aperture
+	[in]uint32_t num_bytes - size of PCI aperture
+  RETURN_VALUE:: None
+  */
+void
+set_pci_aperture(mic_ctx_t *mic_ctx, uint32_t gtt_index, uint64_t phy_addr, uint32_t num_bytes)
+{
+	uint32_t num_pages;
+	uint32_t gtt_entry;
+	uint32_t i;
+
+	num_pages = ALIGN(num_bytes, PAGE_SIZE) >> PAGE_SHIFT;
+
+	for (i = 0; i < num_pages; i++) {
+
+		gtt_entry = ((uint32_t)(phy_addr >> PAGE_SHIFT) + i) << 1 | 0x1u;
+		GTT_WRITE(gtt_entry, mic_ctx->mmio.va, (gtt_index + i)*sizeof(gtt_entry));
+	}
+
+	// XPU_RACE_CONDITION:
+	// Writing GttTlbFlushReg DOES NOT flush all write transactions from SBOX to GDDR
+	//	because GttTlbFlushReg is an SBOX register and transaction terminates in SBOX
+	// MMIO write must use MIC ringbus to be serializing.
+	// Writing GTT itself DOES serialize: GTT is in MMIO space, and write goes to the ringbus
+	// MemoryBarrier makes sure all writes make it to GDDR before tlbFlush write
+	smp_mb(); // FIXME: only needs SFENCE
+
+	// write any value to cause a flush
+	SBOX_WRITE(1, mic_ctx->mmio.va, SBOX_TLB_FLUSH);
+}
+
+/*
+ DESCRIPTION:: Programs a scratch register that the bootstrap reads to determine
+			   how large is uOS image.
+ PARAMETERS::
+   [in]void *mmio_va - virtual address to mmio register,
+   [in]uint32_t uos_size - size of uos image
+ RETURN_VALUE:: none
+*/
+void
+set_uos_size(uint8_t *mmio_va, uint32_t uos_size)
+{
+	uint32_t scratch5;
+
+	scratch5 = uos_size;
+	// XPU_RACE_CONDITION: write to MMIO space is uncached and flushes WC buffers
+	SBOX_WRITE(scratch5, mmio_va, SBOX_SCRATCH5);
+}
+
+/*
+ DESCRIPTION:: Programs a scratch register that the uOS reads to determine how
+			 much memory to reserve.
+ PARAMETERS::
+   [in]void *mmio_va - virtual address to mmio register,
+   [in]uint32_t uos_reserved_size - size of memory to be reserved by uos.
+ RETURN_VALUE:: none
+*/
+void
+set_uos_reserved_size(uint8_t *mmio_va, uint32_t uos_reserved_size)
+{
+	uint32_t scratch3;
+
+	scratch3 = uos_reserved_size;
+	// XPU_RACE_CONDITION: write to MMIO space is uncached and flushes WC buffers
+	SBOX_WRITE(scratch3, mmio_va, SBOX_SCRATCH3);
+}
+
+/*
+ DESCRIPTION:: .
+ PARAMETERS::
+   [in]uint32_t device_id - device ID,
+ RETURN_VALUE:: family type
+*/
+product_family_t
+get_product_family(uint32_t device_id)
+{
+	product_family_t product_family;
+
+	switch (device_id) {
+	case PCI_DEVICE_ABR_2249:
+	case PCI_DEVICE_ABR_224a:
+		product_family = FAMILY_ABR;
+		break;
+
+	case PCI_DEVICE_KNC_2250:
+	case PCI_DEVICE_KNC_2251:
+	case PCI_DEVICE_KNC_2252:
+	case PCI_DEVICE_KNC_2253:
+	case PCI_DEVICE_KNC_2254:
+	case PCI_DEVICE_KNC_2255:
+	case PCI_DEVICE_KNC_2256:
+	case PCI_DEVICE_KNC_2257:
+	case PCI_DEVICE_KNC_2258:
+	case PCI_DEVICE_KNC_2259:
+	case PCI_DEVICE_KNC_225a:
+	case PCI_DEVICE_KNC_225b:
+	case PCI_DEVICE_KNC_225c:
+	case PCI_DEVICE_KNC_225d:
+	case PCI_DEVICE_KNC_225e:
+		product_family = FAMILY_KNC;
+		break;
+
+	default:
+		pr_debug( "Invalid/Unknown device ID %d\r\n", device_id);
+		product_family = FAMILY_UNKNOWN;
+		break;
+	}
+
+	return product_family;
+}
+
+/*
+  DESCRIPTION:: loads uos image at given path into gddr
+  PARAMETERS::
+    [in]mic_ctx_t *mic_ctx - mic context
+    [in]imgname - file path for uos file to be loaded
+    [out]uos_size - size of uos image
+ */
+int
+load_uos_into_gddr(mic_ctx_t *mic_ctx, char *imgname, uint32_t* uos_size, uint64_t *uos_cmd_offset)
+{
+	void *aperture_va;
+	uint8_t *mmio_va;
+	uint32_t apic_id = 0;
+	uint32_t uos_load_offset = 0;
+	uint32_t adapter_memsize = 0;
+	int status = 0;
+
+	aperture_va = mic_ctx->aper.va;
+	mmio_va = mic_ctx->mmio.va;
+
+	if (mic_ctx->state != MIC_BOOT) {
+		printk("Not in booting state\n");
+		return -EPERM;
+	}
+
+	status = mic_get_file_size(imgname, uos_size);
+
+	if (status) {
+		mic_ctx->state = MIC_BOOTFAIL;
+		printk("Linux image not found at %s , status returned %d\n", imgname, status);
+		return status;
+	}
+
+	get_uos_loadoffset(mmio_va, &uos_load_offset);
+	// Determine the uOS reserve size after we have the m_pXpu interface
+	get_adapter_memsize(mmio_va, &adapter_memsize);
+
+	get_apic_id(mmio_va, &apic_id);
+	// store apic_id in adapter context for later use
+	mic_ctx->apic_id = apic_id;
+
+	if (mic_ctx->bi_family == FAMILY_ABR){
+		// Program the PCI aperture as a contiguous window
+		// Need an extra page to provide enough buffer space for command line arguments.
+		set_pci_aperture(mic_ctx, 0, uos_load_offset, *uos_size + PAGE_SIZE);
+		uos_load_offset = 0;
+	}
+
+	// transfer uOs image file to gddr
+	status = mic_load_file(imgname, ((uint8_t*)aperture_va) + uos_load_offset, *uos_size);
+
+	// for the emulator we want to skip "downloading" the file
+	*uos_cmd_offset = (uint64_t)uos_load_offset + *uos_size;
+
+	// This only applies to KNF bootstrap, it is NOT needed for KNC
+	if (mic_ctx->bi_family == FAMILY_ABR) {
+		// clear UOS load offset register after uOS was uploaded
+		SBOX_WRITE(0, mmio_va, SBOX_SCRATCH2);
+		SBOX_READ(mmio_va, SBOX_SCRATCH2);
+	}
+
+	return status;
+}
+
+/*
+  DESCRIPTION:: loads uos initramfs image at given path into gddr for KNC.
+  PARAMETERS::
+    [in]mic_ctx_t *mic_ctx - mic context
+    [in]initramfsname - file path for uos initramfs file to be loaded
+ */
+int
+load_initramfs(mic_ctx_t *mic_ctx, char *initramfsname, uint32_t *initramfs_image, uint32_t *initramfs_size)
+{
+	uint8_t *aperture_va;
+	uint8_t *mmio_va;
+	uint32_t apic_id = 0;
+	uint32_t uos_load_offset = 0;
+	uint32_t file_load_offset = 0;
+	uint32_t adapter_memsize = 0;
+	uint32_t file_size = 0;
+	int status = 0;
+	uint32_t *ramfs_addr_ptr;
+
+	aperture_va = mic_ctx->aper.va;
+	mmio_va = mic_ctx->mmio.va;
+
+	if (mic_ctx->state != MIC_BOOT) {
+		printk("Not in booting state\n");
+		return -EPERM;
+	}
+
+	status = mic_get_file_size(initramfsname, &file_size);
+
+	if (status) {
+		mic_ctx->state = MIC_BOOTFAIL;
+		printk("Init ram disk image not found at %s , status returned %d\n", initramfsname, status);
+		return status;
+	}
+
+	get_uos_loadoffset(mmio_va, &uos_load_offset);
+	file_load_offset = uos_load_offset << 1; /* Place initramfs higher than kernel; 128MB is ok */
+
+	*initramfs_size = file_size;
+	*initramfs_image = file_load_offset;
+
+	// Determine the uOS reserve size after we have the m_pXpu interface
+	get_adapter_memsize(mmio_va, &adapter_memsize);
+	get_apic_id(mmio_va, &apic_id);
+
+	// store apic_id in adapter context for later use
+	mic_ctx->apic_id = apic_id;
+
+	// transfer uOs image file to gddr
+	status = mic_load_file(initramfsname, aperture_va + file_load_offset, file_size);
+
+	// write the initramfs load address and size to the fields in the kernel header
+	ramfs_addr_ptr = (uint32_t *)(aperture_va + uos_load_offset + 0x218);
+	*ramfs_addr_ptr = file_load_offset;
+	ramfs_addr_ptr = (uint32_t *)(aperture_va + uos_load_offset + 0x21c);
+	*ramfs_addr_ptr = *initramfs_size;
+
+	return status;
+}
+
+struct tmpqp {
+	uint64_t ep;
+	uint64_t magic;
+};
+
+int
+load_command_line(mic_ctx_t *mic_ctx, uint64_t uos_cmd_offset)
+{
+	void *cmd_line_va = mic_ctx->aper.va + uos_cmd_offset;
+	uint32_t cmdlen = 0;
+	char *buf = NULL;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) || defined(RHEL_RELEASE_CODE)
+	struct board_info *bi = mic_ctx->bd_info;
+#endif
+
+#ifdef USE_VCONSOLE
+	micvcons_t *vcons = &mic_ctx->bi_vcons;
+	dma_addr_t vc_hdr_dma_addr = 0;
+#endif
+
+	/*
+	 * mic_ctx->boot_mem will also be set in IOCTL to boot the card in restricted memory
+	 * FIXME::This code is added to keep the backward compatibility with IOCTLs
+	 */
+	if (mic_ctx->bi_family == FAMILY_KNC)
+		if (mic_ctx->boot_mem == 0 || mic_ctx->boot_mem > mic_ctx->aper.len >> 20)
+			mic_ctx->boot_mem = (uint32_t)(mic_ctx->aper.len >> 20);
+	if (!(buf = kzalloc(MIC_CMDLINE_BUFSIZE, GFP_KERNEL))) {
+		printk(KERN_ERR "failed to allocate %d bytes for uOS command line\n", 
+			    MIC_CMDLINE_BUFSIZE);
+		return -ENOMEM;
+	}
+
+	cmdlen = snprintf(buf, MIC_CMDLINE_BUFSIZE, "card=%d vnet=%s scif_id=%d scif_addr=0x%llx",
+				mic_ctx->bi_id, mic_vnet_modes[mic_vnet_mode],
+				mic_ctx->bi_id + 1, mic_ctx->bi_scif.si_pa);
+
+	if (mic_vnet_mode == VNET_MODE_DMA) {
+		struct micvnet_info *vnet_info = mic_ctx->bi_vethinfo;
+		cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+			" vnet_addr=0x%llx", vnet_info->vi_rp_phys);
+	}
+
+#ifdef USE_VCONSOLE
+	if (vcons->dc_enabled)
+		vc_hdr_dma_addr = vcons->dc_hdr_dma_addr;
+
+	cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+		" vcons_hdr_addr=0x%llx", vc_hdr_dma_addr);
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) || defined(RHEL_RELEASE_CODE)
+	cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen, " virtio_addr=0x%llx",
+				mic_ctx_map_single(mic_ctx, bi->bi_virtio, sizeof(struct vb_shared)));
+#endif
+
+	if (mic_ctx->boot_mem)
+		cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+					 " mem=%dM", mic_ctx->boot_mem);
+	mic_ctx->boot_mem = 0;
+
+	if (mic_ctx->ramoops_size)
+		cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+			" ramoops_size=%d ramoops_addr=0x%llx",
+			mic_ctx->ramoops_size, mic_ctx->ramoops_pa[0]);
+
+	cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+		" p2p=%d p2p_proxy=%d", mic_p2p_enable, mic_p2p_proxy_enable);
+
+	if (mic_ctx->bi_family == FAMILY_KNC)
+		cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+			" etc_comp=%lld", mic_ctx->etc_comp);
+
+	cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+		" reg_cache=%d", mic_reg_cache_enable);
+	cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+		" ulimit=%d", mic_ulimit_check);
+	cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+		" huge_page=%d", mic_huge_page_enable);
+	if (mic_crash_dump_enabled)
+		cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+			" crashkernel=1M@80M");
+	/*
+	 * Limitations in the Intel Jaketown and Ivytown platforms require SCIF
+	 * to proxy P2P DMA read transfers in order to convert them into a P2P DMA
+	 * write for better performance. The SCIF module on MIC needs the
+	 * numa node the MIC is connected to on the host to make decisions
+	 * about whether to proxy P2P DMA reads or not based on whether the two MIC
+	 * devices are connected to the same QPI/socket/numa node or not.
+	 * The assumption here is that a socket/QPI will have a unique
+	 * numa node number.
+	 */
+	pr_debug("CPU family = %d, CPU model = %d\n", boot_cpu_data.x86, boot_cpu_data.x86_model);
+
+	if (mic_p2p_proxy_enable && (boot_cpu_data.x86==6) &&
+		(boot_cpu_data.x86_model == 45 || boot_cpu_data.x86_model == 62)) {
+		int numa_node = dev_to_node(&mic_ctx->bi_pdev->dev);
+		if (-1 != numa_node) {
+			if (boot_cpu_data.x86_model == 45)
+				ms_info.mi_proxy_dma_threshold = SCIF_PROXY_DMA_THRESHOLD_JKT;
+			if (boot_cpu_data.x86_model == 62)
+				ms_info.mi_proxy_dma_threshold = SCIF_PROXY_DMA_THRESHOLD_IVT;
+			cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+				" numa_node=%d", numa_node);
+			cmdlen += snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+				" p2p_proxy_thresh=%lld", ms_info.mi_proxy_dma_threshold);
+		}
+	}
+
+	if (mic_ctx->sysfs_info.cmdline != NULL)
+		snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+			       " %s", mic_ctx->sysfs_info.cmdline);
+	else
+		snprintf(buf + cmdlen, MIC_CMDLINE_BUFSIZE - cmdlen,
+			       " hostname=mic%d ipaddr=171.31.%d.2 quiet console=ttyS0,115200n8",
+			       mic_ctx->bi_id, mic_ctx->bi_id + 1);
+
+	memcpy_toio(cmd_line_va, buf, strlen(buf) + 1);
+
+	if (mic_ctx->sysfs_info.kernel_cmdline != NULL)
+		kfree(mic_ctx->sysfs_info.kernel_cmdline);
+
+	if ((mic_ctx->sysfs_info.kernel_cmdline = kmalloc(strlen(buf) + 1, GFP_KERNEL)) != NULL)
+		strcpy(mic_ctx->sysfs_info.kernel_cmdline, buf);
+
+	kfree(buf);
+	return 0;
+}
+
+/*
+  DESCRIPTION:: method responsible for programming scratch register with uos image size
+		and notifying bootstrap to start booting uos
+  PARAMETERS::
+    [in]mic_ctx_t *mic_ctx - mic context
+    [in]uint32_t uos_size - size of uos image
+ */
+int
+notify_uosboot(mic_ctx_t *mic_ctx, uint32_t uos_size)
+{
+	int status = 0;
+	uint32_t adapter_memsize = 0;
+	uint32_t uos_reserved_size = 0;
+	uint8_t* mmio_va = mic_ctx->mmio.va;
+
+	// Program the register with uOS image size for bootstrap
+	set_uos_size(mmio_va, uos_size);
+
+	get_adapter_memsize(mmio_va, &adapter_memsize);
+
+	// Program the register to inform the uOS of how much space to reserve
+	get_uos_reserved_size(mmio_va, adapter_memsize, &uos_reserved_size);
+	set_uos_reserved_size(mmio_va, uos_reserved_size);
+
+	mic_send_bootstrap_intr(mic_ctx);
+
+	return status;
+}
+
+/*
+ DESCRIPTION :: boots Linux OS on the card
+ PARAMETERS ::
+  [in]mic_ctx_t *mic_ctx - mic context
+  [in]char *imgname - file path for uos image to be loaded on the card
+  RETURN_VALUE:: 0 if successful, non-zero if failure
+*/
+int
+boot_linux_uos(mic_ctx_t *mic_ctx, char *imgname, char *initramfsname)
+{
+	int status = 0;
+	uint32_t uos_size = 0;
+	uint64_t uos_cmd_offset = 0;
+	uint32_t initramfs_image = 0;
+	uint32_t initramfs_size = 0;
+
+	printk("MIC %d Booting\n", mic_ctx->bi_id);
+
+	if (mic_ctx->state != MIC_BOOT) {
+		printk(KERN_ERR "MIC %d is not in offline mode\n", mic_ctx->bi_id);
+		return -EPERM;
+	}
+
+	//loads uos image at given path into gddr
+	if ((status = load_uos_into_gddr(mic_ctx, imgname, &uos_size, &uos_cmd_offset)) != 0) {
+		printk("Cannot load uos in gddr\n");
+		return status;
+	}
+
+	if (initramfsname && (status = load_initramfs(mic_ctx, initramfsname, &initramfs_image, &initramfs_size)) != 0) {
+		printk("Cannot load initramfs in gddr\n");
+		return status;
+	}
+
+	status = load_command_line(mic_ctx, uos_cmd_offset);
+
+	//program scratch register with uos image size and notify bootstrap
+	status = notify_uosboot(mic_ctx, uos_size);
+
+	return status;
+}
+
+/*
+ DESCRIPTION :: boots Maintenance mode handler on the card
+ PARAMETERS ::
+  [in]mic_ctx_t *mic_ctx - mic context
+  [in]char *imgname - file path for uos image to be loaded on the card
+  RETURN_VALUE:: 0 if successful, non-zero if failure
+*/
+int boot_micdev_app(mic_ctx_t *mic_ctx, char *imgname)
+{
+	int status = 0;
+	uint32_t uos_size = 0;
+	uint8_t *mmio_va = 0;
+	uint64_t uos_cmd_offset = 0;
+	int32_t temp_scratch2 = 0;
+
+	printk("MIC %d Booting\n", mic_ctx->bi_id);
+	mmio_va = mic_ctx->mmio.va;
+	status = load_uos_into_gddr(mic_ctx, imgname, &uos_size, &uos_cmd_offset);
+	if(status) {
+		printk("Cannot load uos in gddr\n");
+		goto exit;
+	}
+
+	temp_scratch2 = SBOX_READ(mmio_va, SBOX_SCRATCH2);
+	/* clear download bit */
+	temp_scratch2 = SCRATCH2_CLEAR_DOWNLOAD_STATUS(temp_scratch2);
+	SBOX_WRITE(temp_scratch2, mmio_va, SBOX_SCRATCH2);
+
+	//program scratch register with uos image size and notify bootstrap
+	status = notify_uosboot(mic_ctx, uos_size);
+	if(status)
+		goto exit;
+	status = wait_for_bootstrap(mmio_va);
+exit:
+	if(status) {
+		mic_setstate(mic_ctx, MIC_BOOTFAIL);
+	} else {
+		mic_setstate(mic_ctx, MIC_ONLINE);
+		mic_ctx->boot_count++;
+		printk("ELF booted succesfully\n");
+		;
+	}
+	return status;
+}
+
+/* Perform hardware reset of the device */
+void
+reset_timer(unsigned long arg)
+{
+	mic_ctx_t *mic_ctx = (mic_ctx_t *)arg;
+	uint32_t scratch2 = 0;
+	uint32_t postcode = mic_getpostcode(mic_ctx);
+
+	printk("mic%d: Resetting (Post Code %c%c)\n", mic_ctx->bi_id, 
+		    postcode & 0xff, (postcode >> 8) & 0xff);
+	mic_ctx->reset_count++;
+
+	/* Assuming that the bootstrap takes around 90 seconds to reset,
+	 * we fail after 300 seconds, thus allowing 3 attempts to reset
+	 */
+	if (mic_ctx->reset_count == RESET_FAIL_TIME ||
+		!postcode || 0xffffffff == postcode || mic_ctx->state == MIC_RESETFAIL) {
+		mic_ctx->reset_count = 0;
+		mic_setstate(mic_ctx, MIC_RESETFAIL);
+		wake_up(&mic_ctx->resetwq);
+		printk("MIC %d RESETFAIL postcode %c%c %d\n", mic_ctx->bi_id, 
+			postcode & 0xff, (postcode >> 8) & 0xff, postcode);
+		return;
+	}
+
+	/* check for F2 or F4 error codes from bootstrap */
+	if ((postcode == RESET_FAILED_F2) || (postcode == RESET_FAILED_F4)) {
+		if (mic_ctx->resetworkq) {
+			queue_work(mic_ctx->resetworkq, &mic_ctx->resetwork);
+		} else {
+			mic_ctx->reset_count = 0;
+			mic_setstate(mic_ctx, MIC_RESETFAIL);
+			wake_up(&mic_ctx->resetwq);
+			return;
+		}
+	}
+
+	/* checking if bootstrap is ready or still resetting */
+	scratch2 = SBOX_READ(mic_ctx->mmio.va, SBOX_SCRATCH2);
+	if (SCRATCH2_DOWNLOAD_STATUS(scratch2)) {
+		mic_ctx->boot_start = 0;
+		mic_setstate(mic_ctx, MIC_READY);
+
+		if (mic_ctx->msie)
+			mic_enable_msi_interrupts(mic_ctx);
+		mic_enable_interrupts(mic_ctx);
+		mic_smpt_restore(mic_ctx);
+		micscif_start(mic_ctx);
+
+		wake_up(&mic_ctx->resetwq);
+		mic_ctx->reset_count = 0;
+
+		return;
+	}
+
+	mic_ctx->boot_timer.function = reset_timer;
+	mic_ctx->boot_timer.data = (unsigned long)mic_ctx;
+	mic_ctx->boot_timer.expires = jiffies + HZ;
+
+	add_timer(&mic_ctx->boot_timer);
+}
+
+void
+adapter_wait_reset(mic_ctx_t *mic_ctx)
+{
+	mic_ctx->boot_timer.function = reset_timer;
+	mic_ctx->boot_timer.data = (unsigned long)mic_ctx;
+	mic_ctx->boot_timer.expires = jiffies + HZ;
+	mic_ctx->boot_start = jiffies;
+
+	add_timer(&mic_ctx->boot_timer);
+}
+
+void
+adapter_reset(mic_ctx_t *mic_ctx, int wait_reset, int reattempt)
+{
+	uint32_t resetReg;
+	mutex_lock(&mic_ctx->state_lock);
+	/* TODO: check state for lost node as well once design is done */
+	if ((mic_ctx->state == MIC_RESET || mic_ctx->state == MIC_READY) && (reattempt == 0)) {
+		if (wait_reset == 0) {
+			mic_setstate(mic_ctx, MIC_INVALID);
+			del_timer_sync(&mic_ctx->boot_timer);
+			mutex_unlock(&mic_ctx->state_lock);
+			return;
+		}
+		mutex_unlock(&mic_ctx->state_lock);
+		return;
+	}
+
+	mic_setstate(mic_ctx, MIC_RESET);
+
+	mutex_unlock(&mic_ctx->state_lock);
+
+	del_timer_sync(&mic_ctx->boot_timer);
+
+	//Write 0 to uos download status otherwise we might continue booting
+	//before reset has completed...
+	SBOX_WRITE(0, mic_ctx->mmio.va, SBOX_SCRATCH2);
+
+	// Virtual network link value should be 0 before reset
+	SBOX_WRITE(0, mic_ctx->mmio.va, SBOX_SCRATCH14);
+
+	// Data from Doorbell1 about restart/shutdown should be 0 before reset
+	SBOX_WRITE(0, mic_ctx->mmio.va, SBOX_SDBIC1);
+
+	//This will trigger reset
+	resetReg = SBOX_READ(mic_ctx->mmio.va, SBOX_RGCR);
+	resetReg |= 0x1;
+	SBOX_WRITE(resetReg, mic_ctx->mmio.va, SBOX_RGCR);
+
+	/* At least of KNF it seems we really want to delay at least 1 second */
+	/* after touching reset to prevent a lot of problems. */
+	msleep(1000);
+
+	if (!wait_reset) {
+		return;
+	}
+
+	adapter_wait_reset(mic_ctx);
+
+}
+
+void ramoops_flip(mic_ctx_t *mic_ctx);
+
+int
+adapter_shutdown_device(mic_ctx_t *mic_ctx)
+{
+	;
+
+	if (micpm_get_reference(mic_ctx, true))
+		return 0;
+
+	mutex_lock(&mic_ctx->state_lock);
+	if (mic_ctx->state == MIC_ONLINE) {
+		mic_setstate(mic_ctx, MIC_SHUTDOWN);
+
+		/*
+		 * Writing to SBOX RDMASR0 will generate an interrupt
+		 * on the uOS which will initiate orderly shutdown.
+		 */
+		mic_send_sht_intr(mic_ctx);
+	}
+	mutex_unlock(&mic_ctx->state_lock);
+
+	micpm_put_reference(mic_ctx);
+	return 0;
+}
+
+int
+adapter_stop_device(mic_ctx_t *mic_ctx, int wait_reset, int reattempt)
+{
+	;
+
+	micvcons_stop(mic_ctx);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34) || \
+	defined(RHEL_RELEASE_CODE)
+	mic_vhost_blk_stop(mic_ctx->bd_info);
+#endif
+	micveth_stop(mic_ctx);
+
+	micpm_stop(mic_ctx);
+	micscif_stop(mic_ctx);
+	vmcore_remove(mic_ctx);
+	close_dma_device(mic_ctx->bi_id + 1, &mic_ctx->dma_handle);
+	ramoops_flip(mic_ctx);
+
+	/* Calling adapter_reset after issuing Host shutdown/reboot
+ 	* leads to randon NMIs. These are not rleated to any Card in
+ 	* specific but occurs on the PCI bridge.  */
+	if ((system_state == SYSTEM_POWER_OFF) ||
+		(system_state == SYSTEM_RESTART) ||
+		(system_state == SYSTEM_HALT))
+		return 0;
+	adapter_reset(mic_ctx, wait_reset, reattempt);
+
+    return 0;
+}
+
+static void
+destroy_reset_workqueue(mic_ctx_t *mic_ctx)
+{
+	struct workqueue_struct *tempworkq;
+	tempworkq = mic_ctx->resetworkq;
+	mic_ctx->resetworkq = NULL;
+	destroy_workqueue(tempworkq);
+	del_timer_sync(&mic_ctx->boot_timer);
+}
+
+int
+adapter_remove(mic_ctx_t *mic_ctx)
+{
+
+#ifdef USE_VCONSOLE
+	if (mic_ctx->bi_vcons.dc_hdr_virt) {
+		mic_ctx_unmap_single(mic_ctx, mic_ctx->bi_vcons.dc_hdr_dma_addr,
+			sizeof(struct vcons_buf));
+		kfree(mic_ctx->bi_vcons.dc_hdr_virt);
+		mic_ctx->bi_vcons.dc_hdr_virt = NULL;
+	}
+
+	if (mic_ctx->bi_vcons.dc_buf_virt) {
+		mic_ctx_unmap_single(mic_ctx, mic_ctx->bi_vcons.dc_dma_addr,
+			MICVCONS_BUF_SIZE);
+		free_pages((uint64_t)mic_ctx->bi_vcons.dc_buf_virt, 0);
+		mic_ctx->bi_vcons.dc_buf_virt = NULL;
+	}
+#endif
+
+	mic_psmi_uninit(mic_ctx);
+	micpm_remove(mic_ctx);
+	micscif_remove(mic_ctx);
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) || defined(RHEL_RELEASE_CODE)
+	mic_vhost_blk_remove(mic_ctx->bd_info);
+#endif
+	micveth_remove(mic_ctx);
+	mic_unreg_irqhandler(mic_ctx, 0x1, "MIC SHUTDOWN DoorBell 1");
+
+	ramoops_remove(mic_ctx);
+	vmcore_remove(mic_ctx);
+	mic_smpt_uninit(mic_ctx);
+	/* Make sure that no reset timer is running after the workqueue is destroyed */
+	destroy_reset_workqueue(mic_ctx);
+
+	if (mic_ctx->mmio.va) {
+		iounmap((void *)mic_ctx->mmio.va);
+		mic_ctx->mmio.va = 0;
+	}
+
+	if (mic_ctx->aper.va) {
+		iounmap((void *)mic_ctx->aper.va);
+		mic_ctx->aper.va = 0;
+	}
+
+
+	return 0;
+}
+
+#define MIC_MAX_BOOT_TIME 180	// Maximum number of seconds to wait for boot to complete
+
+static void
+online_timer(unsigned long arg)
+{
+	mic_ctx_t *mic_ctx = (mic_ctx_t *)arg;
+	uint64_t delay = (jiffies - mic_ctx->boot_start) / HZ;
+
+	if (mic_ctx->state == MIC_ONLINE)
+		return;
+
+	if (delay > MIC_MAX_BOOT_TIME) {
+		printk("Fail booting MIC %d. Wait time execeed %d seconds\n", mic_ctx->bi_id, MIC_MAX_BOOT_TIME);
+		mic_ctx->state = MIC_BOOTFAIL;
+		return;
+	}
+
+	mic_ctx->boot_timer.function = online_timer;
+	mic_ctx->boot_timer.data = (unsigned long)mic_ctx;
+	mic_ctx->boot_timer.expires = jiffies + HZ;
+	add_timer(&mic_ctx->boot_timer);
+
+	if (!(delay % 5))
+		printk("Waiting for MIC %d boot %lld\n", mic_ctx->bi_id, delay);
+}
+
+static void
+boot_timer(unsigned long arg)
+{
+	mic_ctx_t *mic_ctx = (mic_ctx_t *)arg;
+	struct micvnet_info *vnet_info = (struct micvnet_info *) mic_ctx->bi_vethinfo;
+	uint64_t delay = (jiffies - mic_ctx->boot_start) / HZ;
+	bool timer_restart = false;
+
+	if ((mic_ctx->state != MIC_BOOT) && (mic_ctx->state != MIC_ONLINE)) {
+		return;
+	}
+
+	if (delay > MIC_MAX_BOOT_TIME) {
+		printk("Fail booting MIC %d. Wait time execeed %d seconds\n", mic_ctx->bi_id, MIC_MAX_BOOT_TIME);
+		mic_ctx->state = MIC_BOOTFAIL;
+		return;
+	}
+
+	if (!(delay % 5))
+		printk("Waiting for MIC %d boot %lld\n", mic_ctx->bi_id, delay);
+
+	if (mic_vnet_mode != VNET_MODE_DMA)
+		timer_restart = (SBOX_READ(mic_ctx->mmio.va, SBOX_SCRATCH14) == 0)?
+				true : false;
+	else if (atomic_read(&vnet_info->vi_state) != MICVNET_STATE_LINKUP)
+		timer_restart = (mic_ctx->state != MIC_ONLINE)? true: false;
+
+	if (timer_restart) {
+		mic_ctx->boot_timer.function = boot_timer;
+		mic_ctx->boot_timer.data = (unsigned long)mic_ctx;
+		mic_ctx->boot_timer.expires = jiffies + HZ;
+
+		add_timer(&mic_ctx->boot_timer);
+		return;
+	}
+
+	mic_ctx->boot_timer.function = online_timer;
+	mic_ctx->boot_timer.data = (unsigned long)mic_ctx;
+	mic_ctx->boot_timer.expires = jiffies + HZ;
+	add_timer(&mic_ctx->boot_timer);
+
+	printk("MIC %d Network link is up\n", mic_ctx->bi_id);
+	schedule_work(&mic_ctx->boot_ws);
+}
+
+void
+post_boot_startup(struct work_struct *work)
+{
+
+	mic_ctx_t *mic_ctx
+		= container_of(work, mic_ctx_t, boot_ws);
+
+	if (micpm_get_reference(mic_ctx, true) != 0)
+		return;
+
+	// We should only enable DMA after uos is booted
+	BUG_ON(open_dma_device(mic_ctx->bi_id+1,
+				     mic_ctx->mmio.va + HOST_SBOX_BASE_ADDRESS,
+				     &mic_ctx->dma_handle));
+	if (micveth_start(mic_ctx))
+		printk(KERN_ERR "%s: micveth_start failed\n", __FUNCTION__);
+	micpm_put_reference(mic_ctx);
+
+}
+
+void
+attempt_reset(struct work_struct *work)
+{
+	mic_ctx_t *mic_ctx
+		= container_of(work, mic_ctx_t, resetwork);
+	printk("Reattempting reset after F2/F4 failure\n");
+	adapter_reset(mic_ctx, RESET_WAIT, RESET_REATTEMPT);
+}
+
+static void
+ioremap_work(struct work_struct *work)
+{
+	mic_ctx_t *mic_ctx
+		= container_of(work, mic_ctx_t, ioremapwork);
+	mic_ctx->aper.va = ioremap_wc(mic_ctx->aper.pa, mic_ctx->aper.len);
+	if (mic_ctx->aper.va == NULL) {
+		printk(KERN_ERR "mic %d: failed to map aperture space\n", mic_ctx->bi_id);
+		mutex_lock(&mic_ctx->state_lock);
+		mic_setstate(mic_ctx, MIC_RESETFAIL);
+		mutex_unlock(&mic_ctx->state_lock);
+	}
+	wake_up(&mic_ctx->ioremapwq);
+}
+
+int
+adapter_post_boot_device(mic_ctx_t *mic_ctx)
+{
+	mic_ctx->boot_timer.function = boot_timer;
+	mic_ctx->boot_timer.data = (unsigned long)mic_ctx;
+	mic_ctx->boot_timer.expires = jiffies + HZ;
+	mic_ctx->boot_start = jiffies;
+
+	add_timer(&mic_ctx->boot_timer);
+	return 0;
+}
+
+int
+mic_shutdown_host_doorbell_intr_handler(mic_ctx_t *mic_ctx, int doorbell)
+{
+	struct micscif_dev *dev = &scif_dev[mic_get_scifnode_id(mic_ctx)];
+	mic_ctx->sdbic1 = SBOX_READ(mic_ctx->mmio.va, SBOX_SDBIC1);
+	SBOX_WRITE(0x0, mic_ctx->mmio.va, SBOX_SDBIC1);
+	if (mic_ctx->sdbic1)
+		queue_delayed_work(dev->sd_ln_wq, 
+				&dev->sd_watchdog_work, 0);
+	return 0;
+}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+static int
+ramoops_proc_show(struct seq_file *m, void *data)
+{
+	uint64_t id = ((uint64_t)data) & 0xffffffff;
+	uint64_t entry = ((uint64_t)data) >> 32;
+	struct list_head *pos, *tmpq;
+	bd_info_t *bd = NULL;
+	mic_ctx_t *mic_ctx = NULL;
+	char *record;
+	char *end;
+	int size = 0;
+	int l = 0;
+	char *output;
+	unsigned long flags;
+
+	list_for_each_safe(pos, tmpq, &mic_data.dd_bdlist) {
+		bd = list_entry(pos, bd_info_t, bi_list);
+		mic_ctx = &bd->bi_ctx;
+		if (mic_ctx->bi_id == id)
+			break;
+	}
+
+	if (mic_ctx == NULL)
+		return 0;
+
+	spin_lock_irqsave(&mic_ctx->ramoops_lock, flags);
+
+	record = mic_ctx->ramoops_va[entry];
+	if (record == NULL) {
+		spin_unlock_irqrestore(&mic_ctx->ramoops_lock, flags);
+		return -EEXIST;
+	}
+
+	size = mic_ctx->ramoops_size;
+	end = record + size;
+
+	if ((output = kzalloc(size, GFP_ATOMIC)) == NULL) {
+		spin_unlock_irqrestore(&mic_ctx->ramoops_lock, flags);
+		return -ENOMEM;
+	}
+
+	l += scnprintf(output, size, "%s", record);
+
+	spin_unlock_irqrestore(&mic_ctx->ramoops_lock, flags);
+
+	seq_printf(m, "%s", output);
+	return 0;
+}
+
+static int
+ramoops_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ramoops_proc_show, NULL);
+}
+
+struct file_operations ramoops_proc_fops = {
+	.open		= ramoops_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+#else // LINUX VERSION
+static int
+ramoops_read(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	uint64_t id = ((uint64_t)data) & 0xffffffff;
+	uint64_t entry = ((uint64_t)data) >> 32;
+	struct list_head *pos, *tmpq;
+	bd_info_t *bd = NULL;
+	mic_ctx_t *mic_ctx = NULL;
+	char *record;
+	char *end;
+	int size = 0;
+	int l = 0;
+	int left_to_read;
+	char *output;
+	unsigned long flags;
+
+	list_for_each_safe(pos, tmpq, &mic_data.dd_bdlist) {
+		bd = list_entry(pos, bd_info_t, bi_list);
+		mic_ctx = &bd->bi_ctx;
+		if (mic_ctx->bi_id == id)
+			break;
+	}
+
+	if (mic_ctx == NULL)
+		return 0;
+
+	spin_lock_irqsave(&mic_ctx->ramoops_lock, flags);
+
+	record = mic_ctx->ramoops_va[entry];
+	if (record == NULL) {
+		spin_unlock_irqrestore(&mic_ctx->ramoops_lock, flags);
+		*eof = 1;
+		return 0;
+	}
+
+	size = mic_ctx->ramoops_size;
+	end = record + size;
+
+	if ((output = kzalloc(size, GFP_ATOMIC)) == NULL) {
+		spin_unlock_irqrestore(&mic_ctx->ramoops_lock, flags);
+		return -ENOMEM;
+	}
+
+	l += scnprintf(output, size, "%s", record);
+
+	spin_unlock_irqrestore(&mic_ctx->ramoops_lock, flags);
+
+	left_to_read = l - offset;
+	if (left_to_read < 0)
+		left_to_read = 0;
+	if (left_to_read == 0)
+		*eof = 1;
+
+	left_to_read = min(len, left_to_read);
+	memcpy(buf, output + offset, left_to_read);
+	kfree(output);
+	*start = buf;
+	return left_to_read;
+}
+#endif // LINUX VERSION
+
+int
+set_ramoops_pa(mic_ctx_t *mic_ctx)
+{
+	if (mic_ctx->ramoops_pa[0] == 0L) {
+		kfree(mic_ctx->ramoops_va[0]);
+		mic_ctx->ramoops_size = 0;
+		mic_ctx->ramoops_va[0] = NULL;
+		return 1;
+	}
+	return 0;
+}
+
+int ramoops_count = 4;
+
+void
+ramoops_probe(mic_ctx_t *mic_ctx)
+{
+	char name[64];
+
+	mic_ctx->ramoops_size = ramoops_count * PAGE_SIZE;
+	if ((mic_ctx->ramoops_va[0] = kzalloc(mic_ctx->ramoops_size, GFP_KERNEL)) != NULL) {
+		spin_lock_init(&mic_ctx->ramoops_lock);
+		mic_ctx->ramoops_va[1] = NULL;
+
+		mic_ctx->ramoops_pa[0] = mic_ctx_map_single(mic_ctx, mic_ctx->ramoops_va[0],
+							    mic_ctx->ramoops_size);
+		if (set_ramoops_pa(mic_ctx))
+			return;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+		snprintf(name, 64, "mic%d", mic_ctx->bi_id);
+		proc_create_data(name, 0444, ramoops_dir, &ramoops_proc_fops,
+				 (void *)(long)mic_ctx->bi_id);
+
+		snprintf(name, 64, "mic%d_prev", mic_ctx->bi_id);
+		proc_create_data(name, 0444, ramoops_dir, &ramoops_proc_fops,
+				 (void *)((long)mic_ctx->bi_id | (1L << 32)));
+#else // LINUX VERSION
+		snprintf(name, 64, "mic%d", mic_ctx->bi_id);
+		if (create_proc_read_entry(name, 0444, ramoops_dir, ramoops_read,
+					   (void *)(long)mic_ctx->bi_id) == NULL)
+			printk("Failed to intialize /proc/mic_ramoops/%s\n", name);
+
+		snprintf(name, 64, "mic%d_prev", mic_ctx->bi_id);
+		if (create_proc_read_entry(name, 0444, ramoops_dir, ramoops_read,
+					   (void *)((long)mic_ctx->bi_id | (1L << 32))) == NULL)
+			printk("Failed to intialize /proc/mic_ramoops/%s\n", name);
+#endif //LINUX VERSION
+	} else {
+		mic_ctx->ramoops_size = 0;
+	}
+}
+
+void
+ramoops_flip(mic_ctx_t *mic_ctx)
+{
+	unsigned long flags;
+
+	if (mic_ctx->ramoops_size == 0)
+		return;
+
+	spin_lock_irqsave(&mic_ctx->ramoops_lock, flags);
+	if (mic_ctx->ramoops_va[1] != NULL) {
+		mic_ctx_unmap_single(mic_ctx, mic_ctx->ramoops_pa[1], mic_ctx->ramoops_size);
+		kfree(mic_ctx->ramoops_va[1]);
+	}
+
+	mic_ctx->ramoops_pa[1] = mic_ctx->ramoops_pa[0];
+	mic_ctx->ramoops_va[1] = mic_ctx->ramoops_va[0];
+	if ((mic_ctx->ramoops_va[0] = kzalloc(mic_ctx->ramoops_size, GFP_ATOMIC)) != NULL) {
+		mic_ctx->ramoops_pa[0] = mic_ctx_map_single(mic_ctx, mic_ctx->ramoops_va[0],
+							    mic_ctx->ramoops_size);
+		set_ramoops_pa(mic_ctx);
+	}
+	spin_unlock_irqrestore(&mic_ctx->ramoops_lock, flags);
+}
+
+int
+adapter_probe(mic_ctx_t *mic_ctx)
+{
+	int db;
+	uint32_t scratch13;
+	int32_t status = 0;
+
+	// Init the irq information
+	atomic_set(&mic_ctx->bi_irq.mi_received, 0);
+	spin_lock_init(&mic_ctx->bi_irq.mi_lock);
+	tasklet_init(&mic_ctx->bi_dpc, adapter_dpc, (unsigned long)&mic_ctx->bi_dpc);
+
+	for (db = 0; db < MIC_NUM_DB; db++) {
+		INIT_LIST_HEAD(&mic_ctx->bi_irq.mi_dblist[db]);
+	}
+
+	if (mic_ctx->msie)
+		mic_enable_msi_interrupts(mic_ctx);
+
+	scratch13 = SBOX_READ(mic_ctx->mmio.va, SBOX_SCRATCH13);
+	mic_ctx->bi_stepping = SCRATCH13_STEP_ID(scratch13);
+	mic_ctx->bi_substepping = SCRATCH13_SUB_STEP(scratch13);
+#ifdef MIC_IS_EMULATION
+	mic_ctx->bi_platform = PLATFORM_EMULATOR;
+#else
+	mic_ctx->bi_platform = SCRATCH13_PLATFORM_ID(scratch13);
+#endif
+
+	mic_enable_interrupts(mic_ctx);
+	if (micveth_probe(mic_ctx))
+		printk(KERN_ERR "%s: micveth_probe failed\n", __FUNCTION__);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) || defined(RHEL_RELEASE_CODE)
+	if (mic_vhost_blk_probe(mic_ctx->bd_info))
+		printk(KERN_ERR "%s: mic_vhost_blk_probe failed\n", __FUNCTION__);
+#endif
+	micscif_probe(mic_ctx);
+	if(micpm_probe(mic_ctx))
+		printk(KERN_ERR "%s: micpm_probe failed\n", __FUNCTION__);
+
+	mic_reg_irqhandler(mic_ctx, 1, "MIC SHUTDOWN DoorBell 1",
+			mic_shutdown_host_doorbell_intr_handler);
+
+	ramoops_probe(mic_ctx);
+	if (status) {
+		printk("boot_linux_uos failed \n");
+		return status;
+	}
+
+	// We should only enable DMA after uos is booted
+	//mic_dma_lib_init(mic_ctx->mmio.va+HOST_SBOX_BASE_ADDRESS);
+
+	return status;
+}
+
+int
+adapter_start_device(mic_ctx_t *mic_ctx)
+{
+	int ret;
+
+	mutex_lock(&mic_ctx->state_lock);
+	if (mic_ctx->state == MIC_READY) {
+		mic_setstate(mic_ctx, MIC_BOOT);
+	} else {
+		mutex_unlock(&mic_ctx->state_lock);
+		/* TODO: Unknown state handling? */
+		printk(KERN_ERR "%s %d state %d??\n", 
+			__func__, __LINE__, mic_ctx->state);
+		ret = -EINVAL;
+		goto exit;
+	}
+	mutex_unlock(&mic_ctx->state_lock);
+	mic_ctx->mode = MODE_LINUX;
+	ret = boot_linux_uos(mic_ctx, mic_ctx->image, mic_ctx->initramfs);
+	if (ret) {
+		printk(KERN_ERR "boot_linux_uos failed %d\n", ret);
+		goto exit;
+	}
+
+	ret = adapter_post_boot_device(mic_ctx);
+	if (ret) {
+		printk(KERN_ERR "adapter post boot failed %d\n", ret);
+		goto exit;
+	}
+
+	pr_debug("adapter started successfully\n");
+exit:
+	return ret;
+}
+
+int
+adapter_init_device(mic_ctx_t *mic_ctx)
+{
+#ifdef USE_VCONSOLE
+	struct vcons_buf *vcons_buf;
+#endif
+	uint32_t mmio_data_cc; /* mmio data from class code register */
+	uint32_t mmio_data_bar; /* mmio data from bar enable register */
+	uint32_t device_id;
+	int err = 0;
+
+	spin_lock_init(&mic_ctx->sysfs_lock);
+	mic_setstate(mic_ctx, MIC_RESET);
+	mic_ctx->mode = MODE_NONE;
+	mic_ctx->reset_count = 0;
+	mutex_init (&mic_ctx->state_lock);
+	init_waitqueue_head(&mic_ctx->resetwq);
+	init_waitqueue_head(&mic_ctx->ioremapwq);
+	init_timer(&mic_ctx->boot_timer);
+	if (!(mic_ctx->resetworkq = __mic_create_singlethread_workqueue("RESET WORK")))
+		return -ENOMEM;
+	if (!(mic_ctx->ioremapworkq = __mic_create_singlethread_workqueue("IOREMAP_WORK"))) {
+		err = -EINVAL;
+		goto destroy_reset_wq;
+	}
+	INIT_WORK(&mic_ctx->ioremapwork, ioremap_work);
+	INIT_WORK(&mic_ctx->boot_ws, post_boot_startup);
+	INIT_WORK(&mic_ctx->resetwork, attempt_reset);
+	atomic_set(&mic_ctx->gate_interrupt, 0);
+
+	device_id = mic_ctx->bi_pdev->device;
+	mic_ctx->bi_family = get_product_family(device_id);
+
+	if ((mic_ctx->mmio.va = ioremap_nocache(mic_ctx->mmio.pa, 
+						mic_ctx->mmio.len)) == NULL) {
+		printk("mic %d: failed to map mmio space\n", mic_ctx->bi_id);
+		err = -ENOMEM;
+		goto destroy_remap_wq;
+	}
+
+	if (mic_ctx->aper.pa == 0) {
+		/*
+		 * Read class code from SBOX_PCIE_PCI_REVISION_ID_AND_C_0X8 register
+		 * If the mode is zombie, then
+		 * 1> Aperture is not available
+		 * 2> Register 0x5CD4 is written to 0x00000002 to disable all BARs except MMIO
+		 * 3> Register 0x5808 is written to 0xFF0000XX to set the class ID to a generic PCI device.
+		 */
+		mmio_data_cc = SBOX_READ(mic_ctx->mmio.va, SBOX_PCIE_PCI_REVISION_ID_AND_C_0X8);
+		mmio_data_cc = PCIE_CLASS_CODE(mmio_data_cc);
+		mmio_data_bar = SBOX_READ(mic_ctx->mmio.va, SBOX_PCIE_BAR_ENABLE);
+
+		if((mmio_data_cc == ZOMBIE_CLASS_CODE) && (mmio_data_bar == DISABLE_BAR)) {
+			mic_ctx->card_usage_mode = USAGE_MODE_ZOMBIE;
+			usagemode_param = USAGE_MODE_ZOMBIE;
+		} else {
+			printk("Error: Not in zombie mode and aperture is 0\n");
+			err = -EINVAL;
+			goto adap_init_unmapmmio;
+		}
+	} else {
+		if (mic_ctx->ioremapworkq) {
+			queue_work(mic_ctx->ioremapworkq, &mic_ctx->ioremapwork);
+		} else {
+			if ((mic_ctx->aper.va = ioremap_wc(mic_ctx->aper.pa, mic_ctx->aper.len)) == NULL) {
+				printk("mic %d: failed to map aperture space\n", mic_ctx->bi_id);
+				err = -EINVAL;
+				goto adap_init_unmapmmio;
+			}
+		}
+	}
+
+	mic_debug_init(mic_ctx);
+	mic_smpt_init(mic_ctx);
+#ifdef USE_VCONSOLE
+	// Allocate memory for PCI serial console
+	mic_ctx->bi_vcons.dc_buf_virt = (void *)get_zeroed_page(GFP_KERNEL);
+	mic_ctx->bi_vcons.dc_hdr_virt = kzalloc(sizeof(struct vcons_buf), GFP_KERNEL);
+
+	if ((!mic_ctx->bi_vcons.dc_buf_virt) || (!mic_ctx->bi_vcons.dc_hdr_virt)) {
+		printk(KERN_ERR "mic %d: failed to allocate memory for vcons buffer\n", 
+			    mic_ctx->bi_id);
+		mic_ctx->bi_vcons.dc_enabled = 0;
+		if (mic_ctx->bi_vcons.dc_buf_virt)
+			free_pages((uint64_t)mic_ctx->bi_vcons.dc_buf_virt, 0);
+		if (mic_ctx->bi_vcons.dc_hdr_virt)
+			kfree(mic_ctx->bi_vcons.dc_hdr_virt);
+	} else {
+		mic_ctx->bi_vcons.dc_hdr_dma_addr = mic_ctx_map_single(mic_ctx,
+						mic_ctx->bi_vcons.dc_hdr_virt,
+						sizeof(struct vcons_buf));
+		mic_ctx->bi_vcons.dc_dma_addr = mic_ctx_map_single(mic_ctx,
+						mic_ctx->bi_vcons.dc_buf_virt,
+						MICVCONS_BUF_SIZE);
+		if ((!mic_ctx->bi_vcons.dc_dma_addr) ||
+			(!mic_ctx->bi_vcons.dc_hdr_dma_addr))
+			mic_ctx->bi_vcons.dc_enabled = 0;
+		else
+			mic_ctx->bi_vcons.dc_enabled = 1;
+		mic_ctx->bi_vcons.dc_size = MICVCONS_BUF_SIZE;
+		vcons_buf = (struct vcons_buf *)(mic_ctx->bi_vcons.dc_hdr_virt);
+		vcons_buf->o_buf_dma_addr = mic_ctx->bi_vcons.dc_dma_addr;
+		vcons_buf->o_size = MICVCONS_BUF_SIZE;
+		smp_wmb();
+		vcons_buf->host_magic = MIC_HOST_VCONS_READY;
+		vcons_buf->host_rb_ver = micscif_rb_get_version();
+	}
+#endif // USE_VCONSOLE
+	mic_ctx->boot_mem = 0;
+	mic_psmi_init(mic_ctx);
+	mic_ctx->dma_handle = NULL;
+	mic_ctx->sdbic1 = 0;
+    // To avoid hazard on Windows, sku_build_table is done on DriverEntry
+	sku_build_table();
+	device_id = mic_ctx->bi_pdev->device;
+	sku_find(mic_ctx, device_id);
+    // To avoid hazard on Windows, sku_destroy_table is done on MicUnload
+	sku_destroy_table();
+
+	/* Determine the amount of compensation that needs to be applied to MIC's ETC timer */
+	calculate_etc_compensation(mic_ctx);
+
+	return 0;
+
+adap_init_unmapmmio:
+	iounmap(mic_ctx->mmio.va);
+destroy_remap_wq:
+	destroy_workqueue(mic_ctx->ioremapworkq);
+destroy_reset_wq:
+	destroy_workqueue(mic_ctx->resetworkq);
+	return err;
+}
+
+void
+mic_enable_interrupts(mic_ctx_t *mic_ctx)
+{
+	ENABLE_MIC_INTERRUPTS(mic_ctx->mmio.va);
+}
+
+void
+mic_disable_interrupts(mic_ctx_t *mic_ctx)
+{
+	uint32_t sboxSice0reg;
+
+	sboxSice0reg = SBOX_READ(mic_ctx->mmio.va, SBOX_SICE0);
+	SBOX_WRITE(sboxSice0reg, mic_ctx->mmio.va, SBOX_SICC0);
+}
+
+void
+mic_enable_msi_interrupts(mic_ctx_t *mic_ctx)
+{
+	uint32_t sboxMXARreg;
+
+	// Only support single MSI interrupt for now
+	sboxMXARreg = SBOX_SICE0_DBR_BITS(0xf) | SBOX_SICE0_DMA_BITS(0xff);
+	if (mic_ctx->bi_family == FAMILY_KNC)
+		SBOX_WRITE(sboxMXARreg, mic_ctx->mmio.va, SBOX_MXAR0_K1OM);
+	else
+		SBOX_WRITE(sboxMXARreg, mic_ctx->mmio.va, SBOX_MXAR0);
+}
+
+int
+mic_reg_irqhandler(mic_ctx_t *mic_ctx, int doorbell, char *idstring,
+		   int (*irqfunc)(mic_ctx_t *mic_ctx, int doorbell))
+{
+	mic_irqhandler_t *irqhandle;
+	unsigned long flags;
+
+	if (doorbell > MIC_IRQ_MAX) {
+		return EINVAL;
+	}
+
+	if (!(irqhandle = kmalloc(sizeof(mic_irqhandler_t), GFP_ATOMIC)))
+		goto memerror1;
+
+	if (!(irqhandle->ih_idstring = kmalloc(strlen(idstring) + 1, GFP_ATOMIC)))
+		goto memerror2;
+
+	irqhandle->ih_func = irqfunc;
+	strcpy(irqhandle->ih_idstring, idstring);
+
+	spin_lock_irqsave(&mic_ctx->bi_irq.mi_lock, flags);
+	list_add_tail(&irqhandle->ih_list, &mic_ctx->bi_irq.mi_dblist[doorbell]);
+	spin_unlock_irqrestore(&mic_ctx->bi_irq.mi_lock, flags);
+	return 0;
+
+memerror2:
+	kfree(irqhandle);
+memerror1:
+	return -ENOMEM;
+}
+
+int
+mic_unreg_irqhandler(mic_ctx_t *mic_ctx, int doorbell, char *idstring)
+{
+	mic_irqhandler_t *irqhandle;
+	struct list_head *pos, *tmpq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mic_ctx->bi_irq.mi_lock, flags);
+	list_for_each_safe(pos, tmpq, &mic_ctx->bi_irq.mi_dblist[doorbell]) {
+		irqhandle = list_entry(pos, mic_irqhandler_t, ih_list);
+		if (strcmp(idstring, irqhandle->ih_idstring) == 0) {
+			list_del(pos);
+			kfree(irqhandle->ih_idstring);
+			kfree(irqhandle);
+		}
+	}
+	spin_unlock_irqrestore(&mic_ctx->bi_irq.mi_lock, flags);
+
+	return 0;
+}
+
+static __always_inline
+void adapter_process_one_interrupt(mic_ctx_t *mic_ctx, uint32_t events)
+{
+	mic_irqhandler_t *irqhandle;
+	struct list_head *pos;
+	int doorbell;
+
+	atomic_inc(&mic_ctx->bi_irq.mi_received);
+
+	if (SBOX_SICR0_DBR(events)) {
+		for (doorbell = 0; doorbell < 4; doorbell++) {
+			if (SBOX_SICR0_DBR(events) & (0x1 << doorbell)) {
+				spin_lock(&mic_ctx->bi_irq.mi_lock);
+				list_for_each(pos, &mic_ctx->bi_irq.mi_dblist[doorbell]) {
+					irqhandle = list_entry(pos, mic_irqhandler_t, ih_list);
+					irqhandle->ih_func(mic_ctx, doorbell);
+				}
+				spin_unlock(&mic_ctx->bi_irq.mi_lock);
+			}
+		}
+
+	}
+
+	if (SBOX_SICR0_DMA(events))
+		host_dma_interrupt_handler(mic_ctx->dma_handle, events);
+}
+
+int
+adapter_isr(mic_ctx_t *mic_ctx)
+{
+	volatile uint32_t sboxSicr0reg;
+	if (atomic_cmpxchg(&mic_ctx->gate_interrupt, 0, 1) == 1)
+		return -1;
+
+	sboxSicr0reg = SBOX_READ(mic_ctx->mmio.va, SBOX_SICR0);
+
+	if (unlikely(!sboxSicr0reg)) {
+		// Spurious interrupt
+		atomic_set(&mic_ctx->gate_interrupt, 0);
+		return -1;
+	}
+
+	// tell mic that we recived interrupt otherwise it will keep sending them
+	SBOX_WRITE(sboxSicr0reg, mic_ctx->mmio.va, SBOX_SICR0);
+
+	// This only applies to KNC B0
+	if (FAMILY_KNC == mic_ctx->bi_family &&
+		mic_ctx->bi_stepping >= KNC_B0_STEP)
+		mic_enable_interrupts(mic_ctx);
+
+	atomic_set(&mic_ctx->gate_interrupt, 0);
+	adapter_process_one_interrupt(mic_ctx, sboxSicr0reg);
+	return 0;
+}
+
+int
+adapter_imsr(mic_ctx_t *mic_ctx)
+{
+#if 0 /* TODO: disable interrupt when KNC auto-enable isn't used */
+	mic_disable_interrupts(mic_ctx);
+#endif
+	tasklet_schedule(&mic_ctx->bi_dpc);
+	return 0;
+}
+
+static void adapter_dpc(unsigned long dpc)
+{
+	mic_ctx_t *mic_ctx =
+		container_of((struct tasklet_struct *)dpc, mic_ctx_t, bi_dpc);
+
+	volatile uint32_t sboxSicr0reg;
+
+	if (atomic_cmpxchg(&mic_ctx->gate_interrupt, 0, 1) == 1)
+		return;
+
+	/* Clear pending bit array */
+	if (FAMILY_KNC == mic_ctx->bi_family) {
+		if (KNC_A_STEP == mic_ctx->bi_stepping)
+			SBOX_WRITE(1, mic_ctx->mmio.va, SBOX_MSIXPBACR_K1OM);
+	} else
+		SBOX_WRITE(1, mic_ctx->mmio.va, SBOX_MSIXPBACR);
+
+	sboxSicr0reg = SBOX_READ(mic_ctx->mmio.va, SBOX_SICR0);
+	if (unlikely(!sboxSicr0reg)) {
+		atomic_set(&mic_ctx->gate_interrupt, 0);
+		return;
+	}
+
+	SBOX_WRITE(sboxSicr0reg, mic_ctx->mmio.va, SBOX_SICR0);
+
+	// This only applies to KNC B0
+	if (FAMILY_KNC == mic_ctx->bi_family &&
+		mic_ctx->bi_stepping >= KNC_B0_STEP)
+		mic_enable_interrupts(mic_ctx);
+
+	atomic_set(&mic_ctx->gate_interrupt, 0);
+	adapter_process_one_interrupt(mic_ctx, sboxSicr0reg);
+}
+
+void ramoops_init(void)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+	ramoops_dir = proc_mkdir("mic_ramoops", NULL);
+#else
+	ramoops_dir = create_proc_entry("mic_ramoops", S_IFDIR | S_IRUGO, NULL);
+#endif
+}
+
+void ramoops_exit(void)
+{
+	remove_proc_entry("mic_ramoops", NULL);
+}
+
+void ramoops_remove(mic_ctx_t *mic_ctx)
+{
+	char name[64];
+	int i;
+
+	snprintf(name, 64, "mic%d", mic_ctx->bi_id);
+	remove_proc_entry(name, ramoops_dir);
+
+	snprintf(name, 64, "mic%d_prev", mic_ctx->bi_id);
+	remove_proc_entry(name, ramoops_dir);
+	if (mic_ctx->ramoops_size == 0)
+		return;
+
+	for (i = 0; i < 2; i++) {
+		if (mic_ctx->ramoops_va[i] != NULL) {
+			mic_ctx_unmap_single(mic_ctx, mic_ctx->ramoops_pa[i],
+					     mic_ctx->ramoops_size);
+			kfree(mic_ctx->ramoops_va[i]);
+		}
+	}
+}
+
+void vmcore_init(void)
+{
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+	vmcore_dir = proc_mkdir("mic_vmcore", NULL);
+#else
+	vmcore_dir = create_proc_entry("mic_vmcore", S_IFDIR | S_IRUGO, NULL);
+#endif
+}
+
+void vmcore_exit(void)
+{
+	if (vmcore_dir) {
+		remove_proc_entry("mic_vmcore", NULL);
+		vmcore_dir = NULL;
+	}
+}
+
+void vmcore_remove(mic_ctx_t *mic_ctx)
+{
+	char name[64];
+
+	snprintf(name, 64, "mic%d", mic_ctx->bi_id);
+	if (mic_ctx->vmcore_dir) {
+		remove_proc_entry(name, vmcore_dir);
+		mic_ctx->vmcore_dir = NULL;
+	}
+	if (mic_ctx->elfcorebuf) {
+		kfree(mic_ctx->elfcorebuf);
+		mic_ctx->elfcorebuf = NULL;
+		mic_ctx->elfcorebuf_sz = 0;
+		mic_ctx->vmcore_size = 0;
+	}
+}
+
+
+void
+adapter_init(void)
+{
+	// Per driver init ONLY.
+	mic_dma_init();
+	micscif_init();
+	micpm_init();
+	ramoops_init();
+	vmcore_init();
+	INIT_LIST_HEAD(&mic_data.dd_bdlist);
+}
+
+
+void show_stepping_comm(mic_ctx_t *mic_ctx,char *buf)
+{
+#define STEPINGSTRSIZE 3
+	char string[STEPINGSTRSIZE];
+	switch (mic_ctx->bi_family) {
+	case FAMILY_ABR:
+		switch (mic_ctx->bi_stepping) {
+		case 0:
+			string[0] = 'A';
+			string[1] =  mic_ctx->bi_substepping + '0';
+			break;
+		case 2:
+			string[0] = 'B';
+			string[1] = '0';
+			break;
+		case 3:
+			string[0] = 'B';
+			string[1] = '1';
+			break;
+		case 4:
+			string[0] = 'C';
+			string[1] = '0';
+			break;
+		case 5:
+			string[0] = 'C';
+			string[1] = '1';
+			break;
+		case 6:
+			string[0] = 'D';
+			string[1] = '0';
+			break;
+		default:
+			string[0] = '?';
+			string[1] = '?';
+			break;
+		}
+		break;
+	case FAMILY_KNC:
+		switch (mic_ctx->bi_stepping) {
+		case KNC_A_STEP:
+			string[0] = 'A';
+			string[1] = '0';
+			break;
+		case KNC_B0_STEP:
+			string[0] = 'B';
+			string[1] = '0';
+			break;
+		case KNC_B1_STEP:
+			string[0] = 'B';
+			string[1] = '1';
+			break;
+		case KNC_C_STEP:
+			string[0] = 'C';
+			string[1] = '0';
+			break;
+		default:
+			string[0] = '?';
+			string[1] = '?';
+			break;
+		}
+		break;
+	default:
+		string[0] = '?';
+		string[1] = '?';
+		break;
+	}
+
+	string[2] = '\0';
+
+	strncpy(buf,string,STEPINGSTRSIZE);
+}
+
+
diff --git a/host/vhost/mic_blk.c b/host/vhost/mic_blk.c
new file mode 100644
index 0000000..9ac2cb8
--- /dev/null
+++ b/host/vhost/mic_blk.c
@@ -0,0 +1,665 @@
+ /*
+  * Copyright (C) 2009 Red Hat, Inc.
+  * Author: Michael S. Tsirkin <mst@redhat.com>
+  *
+  * This work is licensed under the terms of the GNU GPL, version 2.
+
+  * (C) Badari Pulavarty pbadari@us.ibm.com 2010 with the following comment.
+  * He posted on http://lwn.net/Articles/382543/
+
+  * virtio-block server in host kernel.
+  * Inspired by vhost-net and shamlessly ripped code from it :)
+
+  * For adapting to MIC
+  * (C) Copyright 2012 Intel Corporation
+  * Author: Caz Yokoyama <Caz.Yokoyama@intel.com>
+  */
+#include <linux/version.h>
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34) || \
+	defined(RHEL_RELEASE_CODE)
+
+#include <linux/compat.h>
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_blk.h>
+#include <linux/mmu_context.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+
+#ifndef VIRTIO_RING_F_EVENT_IDX  /* virtio_ring.h of rhel6.0 does not define */
+#define VIRTIO_RING_F_EVENT_IDX		29
+#endif
+#include "mic_common.h"
+#include "mic/micveth_dma.h"
+#include "vhost.h"
+#include "mic/mic_virtio.h"
+
+#define SECTOR_SHIFT		9
+#define SECTOR_SIZE		(1UL << SECTOR_SHIFT)
+#define VIRTIO_BLK_QUEUE_SIZE		128
+#define DISK_SEG_MAX			(VIRTIO_BLK_QUEUE_SIZE - 2)
+
+#define VHOST_BLK_VQ_MAX 1
+#define WQNAME_SIZE 16
+
+struct vhost_blk {
+	struct vhost_dev dev;
+	struct vhost_virtqueue vqs[VHOST_BLK_VQ_MAX];
+	struct vhost_poll poll[VHOST_BLK_VQ_MAX];
+	struct workqueue_struct *vb_wq;
+	char vb_wqname[WQNAME_SIZE];
+	struct work_struct vb_ws_bh;
+	struct workqueue_struct *vblk_workqueue;
+	struct board_info *bd_info;
+	char *file_name;
+	struct file *virtblk_file;
+};
+
+struct vhost_blk_io {
+	struct list_head list;
+	struct work_struct work;
+	struct vhost_blk *blk;
+	struct file *file;
+	int head;
+	uint32_t type;
+	uint32_t nvecs;
+	uint64_t sector;
+	uint64_t len;
+	struct iovec iov[0];
+};
+
+#define mic_addr_in_host(va, pa) ((u8 *)(va) + (u64)(pa))
+
+static LIST_HEAD(write_queue);
+static LIST_HEAD(read_queue);
+
+static void
+cleanup_vblk_workqueue(struct vhost_blk_io *vbio, struct vhost_virtqueue *vq)
+{
+	struct list_head single, *head, *node, *tmp;
+	int need_free;
+	struct vhost_blk_io *entry;
+
+	if (vbio->head != -1) {
+		INIT_LIST_HEAD(&single);
+		list_add(&vbio->list, &single);
+		head = &single;
+		need_free = 0;
+	} else {
+		head = &vbio->list;
+		need_free = 1;
+	}
+
+	mutex_lock(&vq->mutex);
+	list_for_each_safe(node, tmp, head) {
+		entry = list_entry(node, struct vhost_blk_io, list);
+		list_del(node);
+		kfree(entry);
+	}
+	mutex_unlock(&vq->mutex);
+
+	if (need_free)
+		kfree(vbio);
+}
+
+static void handle_io_work(struct work_struct *work)
+{
+	struct vhost_blk_io *vbio, *entry;
+	struct vhost_virtqueue *vq;
+	struct vhost_blk *blk;
+	struct list_head single, *head, *node, *tmp;
+	struct iovec *iov;
+	uint8_t *aper_va;
+	struct vring *vring;
+	unsigned int num;
+
+	int need_free, ret = 0;
+	loff_t pos;
+	uint8_t status = 0;
+
+	vbio = container_of(work, struct vhost_blk_io, work);
+	blk = vbio->blk;
+	vq = &blk->dev.vqs[0];
+	pos = vbio->sector << SECTOR_SHIFT;
+	aper_va = blk->bd_info->bi_ctx.aper.va;
+
+	vring = &((struct mic_virtblk *)blk->bd_info->bi_virtio)->vb_shared.vring;
+	num = readl(&vring->num);
+	if (num == 0 || micpm_get_reference(&blk->bd_info->bi_ctx, true)) {
+		cleanup_vblk_workqueue(vbio, vq);
+		return;
+	}
+
+	if (atomic64_read(&vbio->file->f_count) == 0) {  /* file is closed */
+		ret = -1;
+	} else if (vbio->type & VIRTIO_BLK_T_FLUSH)  {
+#ifdef RHEL_RELEASE_CODE
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+		ret = vfs_fsync(vbio->file, 1);
+#else
+		ret = vfs_fsync(vbio->file, vbio->file->f_path.dentry, 1);
+#endif
+#else
+		ret = vfs_fsync(vbio->file, 1);
+#endif
+	} else if (vbio->type & VIRTIO_BLK_T_OUT) {
+	  for (iov = vbio->iov; iov < &vbio->iov[vbio->nvecs]; iov++) {
+		iov->iov_base = mic_addr_in_host(aper_va, iov->iov_base);
+	  }
+		ret = vfs_writev(vbio->file, vbio->iov, vbio->nvecs, &pos);
+	} else {
+	  for (iov = vbio->iov; iov < &vbio->iov[vbio->nvecs]; iov++) {
+		iov->iov_base = mic_addr_in_host(aper_va, iov->iov_base);
+	  }
+		ret = vfs_readv(vbio->file, vbio->iov, vbio->nvecs, &pos);
+	}
+	status = (ret < 0) ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
+	if (vbio->head != -1) {
+		INIT_LIST_HEAD(&single);
+		list_add(&vbio->list, &single);
+		head = &single;
+		need_free = 0;
+	} else {
+		head = &vbio->list;
+		need_free = 1;
+	}
+	list_for_each_entry(entry, head, list) {
+		memcpy_toio(mic_addr_in_host(aper_va, entry->iov[entry->nvecs].iov_base), &status, sizeof(status));
+	}
+	mutex_lock(&vq->mutex);
+	list_for_each_safe(node, tmp, head) {
+		entry = list_entry(node, struct vhost_blk_io, list);
+		vhost_add_used_and_signal(&blk->dev, vq, entry->head, ret);
+		list_del(node);
+		kfree(entry);
+	}
+	mutex_unlock(&vq->mutex);
+	if (need_free)
+		kfree(vbio);
+	micpm_put_reference(&blk->bd_info->bi_ctx);
+}
+
+static struct vhost_blk_io *allocate_vbio(int nvecs)
+{
+	struct vhost_blk_io *vbio;
+	int size = sizeof(struct vhost_blk_io) + nvecs * sizeof(struct iovec);
+	vbio = kmalloc(size, GFP_KERNEL);
+	if (vbio) {
+		INIT_WORK(&vbio->work, handle_io_work);
+		INIT_LIST_HEAD(&vbio->list);
+	}
+	return vbio;
+}
+
+static void merge_and_handoff_work(struct list_head *queue)
+{
+	struct vhost_blk_io *vbio, *entry;
+	int nvecs = 0;
+	int entries = 0;
+
+	list_for_each_entry(entry, queue, list) {
+		nvecs += entry->nvecs;
+		entries++;
+	}
+
+	if (entries == 1) {
+		vbio = list_first_entry(queue, struct vhost_blk_io, list);
+		list_del(&vbio->list);
+		queue_work(vbio->blk->vblk_workqueue, &vbio->work);
+		return;
+	}
+
+	vbio = allocate_vbio(nvecs);
+	if (!vbio) {
+		/* Unable to allocate memory - submit IOs individually */
+		list_for_each_entry(vbio, queue, list) {
+			queue_work(vbio->blk->vblk_workqueue, &vbio->work);
+		}
+		INIT_LIST_HEAD(queue);
+		return;
+	}
+
+	entry = list_first_entry(queue, struct vhost_blk_io, list);
+	vbio->nvecs = nvecs;
+	vbio->blk = entry->blk;
+	vbio->file = entry->file;
+	vbio->type = entry->type;
+	vbio->sector = entry->sector;
+	vbio->head = -1;
+	vbio->len = 0;
+	nvecs = 0;
+
+	list_for_each_entry(entry, queue, list) {
+		memcpy(&vbio->iov[nvecs], entry->iov, entry->nvecs * sizeof(struct iovec));
+		nvecs += entry->nvecs;
+		vbio->len += entry->len;
+	}
+	list_replace_init(queue, &vbio->list);
+	queue_work(vbio->blk->vblk_workqueue, &vbio->work);
+}
+
+static void start_io(struct list_head *queue)
+{
+	struct list_head start;
+	struct vhost_blk_io *vbio = NULL, *entry;
+
+	if (list_empty(queue))
+                return;
+
+	list_for_each_entry(entry, queue, list) {
+		if (!vbio) {
+			vbio = entry;
+			continue;
+		}
+		if (vbio->sector + (vbio->len >> SECTOR_SHIFT) == entry->sector) {
+			vbio = entry;
+		} else {
+			INIT_LIST_HEAD(&start);
+			list_cut_position(&start, queue, &vbio->list);
+			merge_and_handoff_work(&start);
+			vbio = entry;
+		}
+	}
+	if (!list_empty(queue))
+		merge_and_handoff_work(queue);
+}
+
+static uint64_t calculate_len(struct iovec *iov, int nvecs)
+{
+	uint64_t len = 0;
+	int i;
+
+	for (i=0; i<nvecs; i++)
+		len += iov[i].iov_len;
+	return len;
+}
+
+static void insert_to_queue(struct vhost_blk_io *vbio,
+			struct list_head *queue)
+{
+	struct vhost_blk_io *entry;
+
+	list_for_each_entry(entry, queue, list) {
+		if (entry->sector > vbio->sector)
+			break;
+	}
+	list_add_tail(&vbio->list, &entry->list);
+}
+
+static int handoff_io(struct vhost_blk *blk, int head,
+			uint32_t type, uint64_t sector,
+			struct iovec *iov, int nvecs)
+{
+	struct vhost_virtqueue *vq = &blk->dev.vqs[0];
+	struct vhost_blk_io *vbio;
+
+	vbio = allocate_vbio(nvecs+1);
+	if (!vbio) {
+		return -ENOMEM;
+	}
+	vbio->blk = blk;
+	vbio->head = head;
+	vbio->file = vq->private_data;
+	vbio->type = type;
+	vbio->sector = sector;
+	vbio->nvecs = nvecs;
+	vbio->len = calculate_len(iov, nvecs);
+	memcpy(vbio->iov, iov, (nvecs + 1) * sizeof(struct iovec));
+
+	if (vbio->type & VIRTIO_BLK_T_FLUSH) {
+#if 0
+		/* Sync called - do I need to submit IOs in the queue ? */
+		start_io(&read_queue);
+		start_io(&write_queue);
+#endif
+		queue_work(blk->vblk_workqueue, &vbio->work);
+	} else if (vbio->type & VIRTIO_BLK_T_OUT) {
+		insert_to_queue(vbio, &write_queue);
+	} else {
+		insert_to_queue(vbio, &read_queue);
+	}
+	return 0;
+}
+
+static void handle_blk(struct vhost_blk *blk)
+{
+	struct vhost_virtqueue *vq = &blk->dev.vqs[0];
+	unsigned head, out, in;
+	struct virtio_blk_outhdr hdr;
+	int nvecs;
+	struct board_info *bd_info = blk->bd_info;
+	struct vring *vring;
+
+	vring = &((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared.vring;
+	if (vring == 0 || readl(&vring->num) == 0) {
+		printk("request comes in while card side driver is not loaded yet. Ignore\n");
+		return;
+	}
+	/* the first time since the card side driver becomes ready */
+	if (vq->desc == NULL || readb(&((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared.update)) {
+	  vq->num = readl(&vring->num);
+	  vq->desc = (struct vring_desc *)readq(&vring->desc);
+	  vq->avail = (struct vring_avail *)readq(&vring->avail);
+	  vq->used = (struct vring_used *)readq(&vring->used);
+	  vq->last_avail_idx = 0;
+	  vq->avail_idx = 0;
+	  vq->last_used_idx = 0;
+	  vq->signalled_used = 0;
+	  vq->signalled_used_valid = false;
+	  vq->done_idx = 0;
+	  writeb(false, &((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared.update);
+	}
+
+	if (micpm_get_reference(&blk->bd_info->bi_ctx,  true))
+		return;
+
+	mutex_lock(&vq->mutex);
+
+	vhost_disable_notify(&blk->dev, vq);
+
+	for (;;) {
+		head = vhost_get_vq_desc(&blk->dev, vq, vq->iov,
+					 ARRAY_SIZE(vq->iov),
+					 &out, &in, NULL, NULL);
+		if ((head == vq->num) || (head == -EFAULT) || (head == -EINVAL)) {
+			if (unlikely(vhost_enable_notify(&blk->dev, vq))) {
+				vhost_disable_notify(&blk->dev, vq);
+				continue;
+			}
+			start_io(&read_queue);
+			start_io(&write_queue);
+			break;
+		}
+
+		BUG_ON(vq->iov[0].iov_len != 16);
+
+		memcpy_fromio(&hdr, mic_addr_in_host(bd_info->bi_ctx.aper.va, vq->iov[0].iov_base),
+					  sizeof(hdr));
+
+		nvecs = out - 1;
+		if (hdr.type == VIRTIO_BLK_T_IN)
+			nvecs = in - 1;
+
+		BUG_ON(vq->iov[nvecs+1].iov_len != 1);
+		if (handoff_io(blk, head, hdr.type, hdr.sector, &vq->iov[1], nvecs) < 0) {
+		  vhost_discard_vq_desc(vq, 1);
+			continue;
+		}
+	}
+	mutex_unlock(&vq->mutex);
+	micpm_put_reference(&blk->bd_info->bi_ctx);
+}
+
+static void handle_blk_kick(struct work_struct *work)
+{
+	struct vhost_blk *vblk;
+
+	vblk = container_of(work, struct vhost_blk, vb_ws_bh);
+	handle_blk(vblk);
+}
+
+#if 0
+static void handle_rq_blk(struct vhost_work *work)
+{
+	struct vhost_blk *blk;
+
+	blk = container_of(work, struct vhost_blk, poll[0].work);
+	handle_blk(blk);
+}
+#endif
+
+static int
+vhost_doorbell_intr_handler(mic_ctx_t *mic_ctx, int doorbell)
+{
+	struct board_info *bi;
+	struct vhost_blk *vblk;
+
+	bi = container_of(mic_ctx, struct board_info, bi_ctx);
+	vblk = ((struct mic_virtblk *)bi->bi_virtio)->vblk;
+	queue_work(vblk->vb_wq, &vblk->vb_ws_bh);
+
+	return 0;
+}
+
+static long vhost_blk_set_backend(struct vhost_blk *vblk)
+{
+	struct vhost_virtqueue *vq;
+	struct board_info *bd_info = vblk->bd_info;
+	unsigned index = bd_info->bi_ctx.bi_id;
+	struct vb_shared *vb_shared;
+	int ret = 0;
+	struct kstat stat;
+	unsigned int virtio_blk_features = (1U << VIRTIO_BLK_F_SEG_MAX) |
+					   (1U << VIRTIO_BLK_F_BLK_SIZE);
+
+	if (index >= MAX_BOARD_SUPPORTED) {
+		ret = -ENOBUFS;
+		goto _exit_;
+	}
+	if (vblk->virtblk_file == NULL) {
+		ret = -EBADF;
+		goto _exit_;
+	}
+
+	vq = &vblk->vqs[0];
+	mutex_lock(&vq->mutex);
+	rcu_assign_pointer(vq->private_data, vblk->virtblk_file);
+	mutex_unlock(&vq->mutex);
+
+	snprintf(vblk->vb_wqname, sizeof(vblk->vb_wqname),
+		 "virtblk wq %d", index);
+	vblk->vb_wq = __mic_create_singlethread_workqueue(vblk->vb_wqname);
+	if (vblk->vb_wq == NULL) {
+		ret = -ENOMEM;
+		goto _exit_;
+	}
+	INIT_WORK(&vblk->vb_ws_bh, handle_blk_kick);
+
+	/* They have to be accessed from "struct vhost_virtqueue *vq" in mic_vhost.c.
+	   They are not used in vhost block. I don't modify vhost.h. */
+	vq->log_base = (void __user *)&bd_info->bi_ctx;
+	vq->log_addr = (u64)bd_info->bi_ctx.aper.va;
+
+	vb_shared = &((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0))
+	virtio_blk_features |= (1U << VIRTIO_BLK_F_FLUSH);
+#endif
+	writel(virtio_blk_features, &vb_shared->host_features);
+	writel(DISK_SEG_MAX, &vb_shared->blk_config.seg_max);
+	writel(SECTOR_SIZE, &vb_shared->blk_config.blk_size);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,9,0))
+	ret = vfs_getattr(&vblk->virtblk_file->f_path, &stat);
+#else
+	ret = vfs_getattr(vblk->virtblk_file->f_path.mnt,
+					  vblk->virtblk_file->f_path.dentry, &stat);
+#endif
+	if (ret < 0)
+		goto _exit_;
+
+	if (S_ISBLK(stat.mode)) {
+	  writel(i_size_read(I_BDEV(vblk->virtblk_file->f_mapping->host)->bd_inode) / SECTOR_SIZE,
+			 &vb_shared->blk_config.capacity);
+	} else {
+	  writel(stat.size / SECTOR_SIZE, &vb_shared->blk_config.capacity);
+	}
+
+	ret = mic_reg_irqhandler(&bd_info->bi_ctx, MIC_IRQ_DB2, "Host DoorBell 2",
+				 vhost_doorbell_intr_handler);
+
+_exit_:
+	return ret;
+}
+
+void
+mic_vhost_blk_stop(bd_info_t *bd_info)
+{
+	struct vring *vring = &((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared.vring;
+
+	writel(0, &vring->num);  /* reject subsequent request from MIC card */
+}
+
+extern bd_info_t *dev_to_bdi(struct device *dev);
+
+ssize_t
+show_virtblk_file(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct board_info *bd_info = dev_to_bdi(dev);
+	struct mic_virtblk *mic_virtblk;
+	struct vhost_blk *vblk;
+
+	BUG_ON(bd_info == NULL);
+	mic_virtblk = bd_info->bi_virtio;
+	BUG_ON(mic_virtblk == NULL);
+	vblk = mic_virtblk->vblk;
+	BUG_ON(vblk == NULL);
+
+	if (vblk->file_name != NULL)
+		return snprintf(buf, PAGE_SIZE, "%s\n", vblk->file_name);
+	else
+		return 0;
+}
+
+ssize_t
+store_virtblk_file(struct device *dev, struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	int ret = 0;
+	struct board_info *bd_info = dev_to_bdi(dev);
+	struct mic_virtblk *mic_virtblk;
+	struct vhost_blk *vblk;
+	struct vhost_virtqueue *vq;
+	char *p;
+	struct file *virtblk_file;
+
+	BUG_ON(bd_info == NULL);
+	mic_virtblk = bd_info->bi_virtio;
+	BUG_ON(mic_virtblk == NULL);
+	vblk = mic_virtblk->vblk;
+	BUG_ON(vblk == NULL);
+	vq = &vblk->vqs[0];
+	BUG_ON(vq == NULL);
+
+	if (buf == NULL) {
+		ret = -EINVAL;
+		goto _return_;
+	}
+	if (count <= 1) {
+		ret = -EINVAL;
+		goto _return_;
+	}
+
+	p = strchr(buf, '\n');
+	if (p != NULL)
+		*p = '\0';
+
+	mutex_lock(&vq->mutex);
+	if (vblk->virtblk_file != NULL) {  /* if virtblk file is already assigned */
+		printk(KERN_ALERT "you are changing virtblk file: %s -> %s.\n", vblk->file_name, buf);
+		kfree(vblk->file_name);
+		vblk->file_name = NULL;
+		filp_close(vblk->virtblk_file, current->files);
+		vblk->virtblk_file = NULL;
+	}
+
+	vblk->file_name = kmalloc(count + 1, GFP_KERNEL);
+	strcpy(vblk->file_name, buf);
+	virtblk_file = filp_open(vblk->file_name, O_RDWR|O_LARGEFILE, 0);
+	if (IS_ERR(virtblk_file)) {
+		ret = PTR_ERR(virtblk_file);
+		mutex_unlock(&vq->mutex);
+		goto free_file_name;
+	}
+	vblk->virtblk_file = virtblk_file;
+	mutex_unlock(&vq->mutex);
+
+	ret = vhost_blk_set_backend(vblk);
+	if (ret < 0)
+		goto close_virtblk_file;
+
+	return count;
+
+ close_virtblk_file:
+	filp_close(vblk->virtblk_file, current->files);
+ free_file_name:
+	kfree(vblk->file_name);
+ _return_:
+	return ret;
+}
+
+int mic_vhost_blk_probe(bd_info_t *bd_info)
+{
+	int ret = 0;
+	char wq_name[8];
+	struct mic_virtblk *mic_virtblk;
+	struct vhost_blk *vblk;
+
+	mic_virtblk = kzalloc(sizeof(*mic_virtblk), GFP_KERNEL);
+	if (mic_virtblk == NULL) {
+		ret = -ENOMEM;
+		goto err_vblk;
+	}
+	bd_info->bi_virtio = mic_virtblk;
+
+	vblk = kzalloc(sizeof *vblk, GFP_KERNEL);
+	if (vblk == NULL) {
+		ret = -ENOMEM;
+		goto free_mic_virtblk;
+	}
+	mic_virtblk->vblk = vblk;
+	vblk->bd_info = bd_info;
+
+	ret = vhost_dev_init(&vblk->dev, vblk->vqs, VHOST_BLK_VQ_MAX);
+	if (ret < 0)
+		goto free_vblk;
+
+#if 0
+	vhost_poll_init(vblk->poll, handle_rq_blk, POLLOUT|POLLIN, &vblk->dev);
+#endif
+
+	BUG_ON(bd_info->bi_ctx.bi_id >= 1000);
+	snprintf(wq_name, ARRAY_SIZE(wq_name), "vblk%03d", bd_info->bi_ctx.bi_id);
+	vblk->vblk_workqueue = __mic_create_singlethread_workqueue(wq_name);
+	if (vblk->vblk_workqueue == NULL) {
+		ret = -ENOMEM;
+		goto free_vblk;
+	}
+
+	return ret;
+
+ free_vblk:
+	kfree(vblk);
+ free_mic_virtblk:
+	kfree(mic_virtblk);
+ err_vblk:
+	return ret;
+}
+
+void mic_vhost_blk_remove(bd_info_t *bd_info)
+{
+	struct mic_virtblk *mic_virtblk = bd_info->bi_virtio;
+	struct vhost_blk *vblk = mic_virtblk->vblk;
+	struct vb_shared *vb_shared = &mic_virtblk->vb_shared;
+
+	if (vblk->virtblk_file != NULL) {
+		mic_unreg_irqhandler(&bd_info->bi_ctx, MIC_IRQ_DB2, "Host DoorBell 2");
+		memset(&vb_shared->blk_config, 0, sizeof(vb_shared->blk_config));
+		destroy_workqueue(vblk->vb_wq);
+		if (vblk->vqs[0].private_data != NULL)
+			fput(vblk->vqs[0].private_data);
+		kfree(vblk->file_name);
+		filp_close(vblk->virtblk_file, current->files);
+	}
+	vhost_dev_cleanup(&vblk->dev);
+	destroy_workqueue(vblk->vblk_workqueue);
+	kfree(vblk);
+	kfree(mic_virtblk);
+}
+#endif
diff --git a/host/vhost/mic_vhost.c b/host/vhost/mic_vhost.c
new file mode 100644
index 0000000..1aa946b
--- /dev/null
+++ b/host/vhost/mic_vhost.c
@@ -0,0 +1,697 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * (C) Badari Pulavarty pbadari@us.ibm.com 2010 with the following comment.
+ * Inspiration, some code, and most witty comments come from
+ * Documentation/lguest/lguest.c, by Rusty Russell
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ 
+ * For adapting to MIC
+ * (C) Copyright 2012 Intel Corporation
+ * Author: Caz Yokoyama <Caz.Yokoyama@intel.com>
+ *
+ * Generic code for virtio server in host kernel.
+ */
+
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,34)) || \
+        defined(RHEL_RELEASE_CODE)
+
+#include <linux/eventfd.h>
+#ifdef RHEL_RELEASE_CODE
+#include <linux/vhost.h>
+#else
+#include "./linux/vhost.h"
+#endif
+#include <linux/virtio_net.h>
+#include <linux/mm.h>
+#include <linux/mmu_context.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/cgroup.h>
+
+#include <linux/net.h>
+#include <linux/if_packet.h>
+#include <linux/if_arp.h>
+
+#include <net/sock.h>
+
+#ifndef VIRTIO_RING_F_EVENT_IDX  /* virtio_ring.h of rhel6.0 does not define */
+#define VIRTIO_RING_F_EVENT_IDX		29
+#endif
+#include "vhost.h"
+#include "mic/micveth_dma.h"
+
+#define mic_addr_in_host(va, pa) ((u8 *)(va) + (u64)(pa))
+
+enum {
+	VHOST_MEMORY_MAX_NREGIONS = 64,
+	VHOST_MEMORY_F_LOG = 0x1,
+};
+
+#if 0
+static unsigned vhost_zcopy_mask __read_mostly;
+#endif
+
+static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
+			    poll_table *pt)
+{
+	struct vhost_poll *poll;
+	poll = container_of(pt, struct vhost_poll, table);
+
+	poll->wqh = wqh;
+	add_wait_queue(wqh, &poll->wait);
+}
+
+static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
+			     void *key)
+{
+	struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
+
+	if (!((unsigned long)key & poll->mask))
+		return 0;
+
+	vhost_poll_queue(poll);
+	return 0;
+}
+
+static void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
+{
+	INIT_LIST_HEAD(&work->node);
+	work->fn = fn;
+	init_waitqueue_head(&work->done);
+	work->flushing = 0;
+	work->queue_seq = work->done_seq = 0;
+}
+
+/* Init poll structure */
+void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
+		     unsigned long mask, struct vhost_dev *dev)
+{
+	init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
+	init_poll_funcptr(&poll->table, vhost_poll_func);
+	poll->mask = mask;
+	poll->dev = dev;
+
+	vhost_work_init(&poll->work, fn);
+}
+
+#if 0
+/* Start polling a file. We add ourselves to file's wait queue. The caller must
+ * keep a reference to a file until after vhost_poll_stop is called. */
+void vhost_poll_start(struct vhost_poll *poll, struct file *file)
+{
+	unsigned long mask;
+	mask = file->f_op->poll(file, &poll->table);
+	if (mask)
+		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
+}
+#endif
+
+/* Stop polling a file. After this function returns, it becomes safe to drop the
+ * file reference. You must also flush afterwards. */
+void vhost_poll_stop(struct vhost_poll *poll)
+{
+	remove_wait_queue(poll->wqh, &poll->wait);
+}
+
+static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
+				unsigned seq)
+{
+	int left;
+	spin_lock_irq(&dev->work_lock);
+	left = seq - work->done_seq;
+	spin_unlock_irq(&dev->work_lock);
+	return left <= 0;
+}
+
+static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
+{
+	unsigned seq;
+	int flushing;
+
+	spin_lock_irq(&dev->work_lock);
+	seq = work->queue_seq;
+	work->flushing++;
+	spin_unlock_irq(&dev->work_lock);
+	wait_event(work->done, vhost_work_seq_done(dev, work, seq));
+	spin_lock_irq(&dev->work_lock);
+	flushing = --work->flushing;
+	spin_unlock_irq(&dev->work_lock);
+	BUG_ON(flushing < 0);
+}
+
+/* Flush any work that has been scheduled. When calling this, don't hold any
+ * locks that are also used by the callback. */
+void vhost_poll_flush(struct vhost_poll *poll)
+{
+	vhost_work_flush(poll->dev, &poll->work);
+}
+
+static inline void vhost_work_queue(struct vhost_dev *dev,
+				    struct vhost_work *work)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->work_lock, flags);
+	if (list_empty(&work->node)) {
+		list_add_tail(&work->node, &dev->work_list);
+		work->queue_seq++;
+		wake_up_process(dev->worker);
+	}
+	spin_unlock_irqrestore(&dev->work_lock, flags);
+}
+
+void vhost_poll_queue(struct vhost_poll *poll)
+{
+	vhost_work_queue(poll->dev, &poll->work);
+}
+
+static void vhost_vq_reset(struct vhost_dev *dev,
+			   struct vhost_virtqueue *vq)
+{
+	vq->num = 1;
+	vq->desc = NULL;
+	vq->avail = NULL;
+	vq->used = NULL;
+	vq->last_avail_idx = 0;
+	vq->avail_idx = 0;
+	vq->last_used_idx = 0;
+	vq->signalled_used = 0;
+	vq->signalled_used_valid = false;
+	vq->used_flags = 0;
+	vq->log_used = false;
+	vq->log_addr = -1ull;
+	vq->vhost_hlen = 0;
+	vq->sock_hlen = 0;
+	vq->private_data = NULL;
+	vq->log_base = NULL;
+	vq->error_ctx = NULL;
+	vq->error = NULL;
+	vq->kick = NULL;
+	vq->call_ctx = NULL;
+	vq->call = NULL;
+	vq->log_ctx = NULL;
+	vq->upend_idx = 0;
+	vq->done_idx = 0;
+	vq->ubufs = NULL;
+}
+
+static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
+{
+	kfree(vq->indirect);
+	vq->indirect = NULL;
+	kfree(vq->log);
+	vq->log = NULL;
+	kfree(vq->heads);
+	vq->heads = NULL;
+	kfree(vq->ubuf_info);
+	vq->ubuf_info = NULL;
+}
+
+#if 0
+void vhost_enable_zcopy(int vq)
+{
+	vhost_zcopy_mask |= 0x1 << vq;
+}
+#endif
+
+static void vhost_dev_free_iovecs(struct vhost_dev *dev)
+{
+	int i;
+	for (i = 0; i < dev->nvqs; ++i)
+		vhost_vq_free_iovecs(&dev->vqs[i]);
+}
+
+long vhost_dev_init(struct vhost_dev *dev,
+		    struct vhost_virtqueue *vqs, int nvqs)
+{
+	int i;
+
+	dev->vqs = vqs;
+	dev->nvqs = nvqs;
+	mutex_init(&dev->mutex);
+	dev->log_ctx = NULL;
+	dev->log_file = NULL;
+	dev->memory = NULL;
+	dev->mm = NULL;
+	spin_lock_init(&dev->work_lock);
+	INIT_LIST_HEAD(&dev->work_list);
+	dev->worker = NULL;
+
+	for (i = 0; i < dev->nvqs; ++i) {
+		dev->vqs[i].log = NULL;
+		dev->vqs[i].indirect = NULL;
+		dev->vqs[i].heads = NULL;
+		dev->vqs[i].ubuf_info = NULL;
+		dev->vqs[i].dev = dev;
+		mutex_init(&dev->vqs[i].mutex);
+		vhost_vq_reset(dev, dev->vqs + i);
+		if (dev->vqs[i].handle_kick)
+			vhost_poll_init(&dev->vqs[i].poll,
+					dev->vqs[i].handle_kick, POLLIN, dev);
+	}
+
+	return 0;
+}
+
+#if 0
+/* Caller should have device mutex */
+long vhost_dev_check_owner(struct vhost_dev *dev)
+{
+	/* Are you the owner? If not, I don't think you mean to do that */
+	return dev->mm == current->mm ? 0 : -EPERM;
+}
+#endif
+
+struct vhost_attach_cgroups_struct {
+	struct vhost_work work;
+	struct task_struct *owner;
+	int ret;
+};
+
+#if 0
+/* Caller should have device mutex */
+long vhost_dev_reset_owner(struct vhost_dev *dev)
+{
+	struct vhost_memory *memory;
+
+	/* Restore memory to default empty mapping. */
+	memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
+	if (!memory)
+		return -ENOMEM;
+
+	vhost_dev_cleanup(dev);
+
+	memory->nregions = 0;
+	dev->memory = memory;
+	return 0;
+}
+#endif
+
+/* In case of DMA done not in order in lower device driver for some reason.
+ * upend_idx is used to track end of used idx, done_idx is used to track head
+ * of used idx. Once lower device DMA done contiguously, we will signal KVM
+ * guest used idx.
+ */
+int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq)
+{
+	int i;
+	int j = 0;
+
+	for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
+		if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) {
+			vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
+			vhost_add_used_and_signal(vq->dev, vq,
+						  vq->heads[i].id, 0);
+			++j;
+		} else
+			break;
+	}
+	if (j)
+		vq->done_idx = i;
+	return j;
+}
+
+/* Caller should have device mutex */
+void vhost_dev_cleanup(struct vhost_dev *dev)
+{
+	int i;
+	for (i = 0; i < dev->nvqs; ++i) {
+		if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
+			vhost_poll_stop(&dev->vqs[i].poll);
+			vhost_poll_flush(&dev->vqs[i].poll);
+		}
+		BUG_ON(dev->vqs[i].ubufs != NULL);
+
+		/* Signal guest as appropriate. */
+		vhost_zerocopy_signal_used(&dev->vqs[i]);
+
+		if (dev->vqs[i].error_ctx)
+			eventfd_ctx_put(dev->vqs[i].error_ctx);
+		if (dev->vqs[i].error)
+			fput(dev->vqs[i].error);
+		if (dev->vqs[i].kick)
+			fput(dev->vqs[i].kick);
+		if (dev->vqs[i].call_ctx)
+			eventfd_ctx_put(dev->vqs[i].call_ctx);
+		if (dev->vqs[i].call)
+			fput(dev->vqs[i].call);
+		vhost_vq_reset(dev, dev->vqs + i);
+	}
+	vhost_dev_free_iovecs(dev);
+	if (dev->log_ctx)
+		eventfd_ctx_put(dev->log_ctx);
+	dev->log_ctx = NULL;
+	if (dev->log_file)
+		fput(dev->log_file);
+	dev->log_file = NULL;
+	/* No one will access memory at this point */
+	kfree(dev->memory);
+	dev->memory = NULL;
+	WARN_ON(!list_empty(&dev->work_list));
+	if (dev->worker) {
+		kthread_stop(dev->worker);
+		dev->worker = NULL;
+	}
+	if (dev->mm)
+		mmput(dev->mm);
+	dev->mm = NULL;
+}
+
+#if 0
+/* Caller must have device mutex */
+long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
+{
+	return 0;
+}
+#endif
+
+static int vhost_update_used_flags(struct vhost_virtqueue *vq)
+{
+  iowrite16(vq->used_flags, mic_addr_in_host(vq->log_addr, &vq->used->flags));
+  return 0;
+}
+
+#if 0
+int vhost_init_used(struct vhost_virtqueue *vq)
+{
+	int r;
+	if (!vq->private_data)
+		return 0;
+
+	r = vhost_update_used_flags(vq);
+	if (r)
+		return r;
+	vq->signalled_used_valid = false;
+	vq->last_used_idx = ioread16(mic_addr_in_host(vq->log_addr, &vq->used->idx));
+	return 0;
+}
+#endif
+
+/* Each buffer in the virtqueues is actually a chain of descriptors.  This
+ * function returns the next descriptor in the chain,
+ * or -1U if we're at the end. */
+static unsigned next_desc(struct vring_desc *desc)
+{
+	unsigned int next;
+
+	/* If this descriptor says it doesn't chain, we're done. */
+	if (!(desc->flags & VRING_DESC_F_NEXT))
+		return -1U;
+
+	/* Check they're not leading us off end of descriptors. */
+	next = desc->next;
+	/* Make sure compiler knows to grab that: we don't want it changing! */
+	/* We will use the result as an index in an array, so most
+	 * architectures only need a compiler barrier here. */
+	read_barrier_depends();
+
+	return next;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access.  Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which is
+ * never a valid descriptor number) if none was found.  A negative code is
+ * returned on error. */
+int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+		      struct iovec iov[], unsigned int iov_size,
+		      unsigned int *out_num, unsigned int *in_num,
+		      struct vhost_log *log, unsigned int *log_num)
+{
+	struct vring_desc desc;
+	unsigned int i, head, found = 0;
+	u16 last_avail_idx;
+	int ret;
+
+	/* Check it isn't doing very strange things with descriptor numbers. */
+	last_avail_idx = vq->last_avail_idx;
+	vq->avail_idx = ioread16(mic_addr_in_host(vq->log_addr, &vq->avail->idx));
+
+	if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
+		vq_err(vq, "Guest moved used index from %u to %u",
+		       last_avail_idx, vq->avail_idx);
+		return -EFAULT;
+	}
+
+	/* If there's nothing new since last we looked, return invalid. */
+	if (vq->avail_idx == last_avail_idx)
+		return vq->num;
+
+	/* Only get avail ring entries after they have been exposed by guest. */
+	smp_rmb();
+
+	/* Grab the next descriptor number they're advertising, and increment
+	 * the index we've seen. */
+	head = ioread16(mic_addr_in_host(vq->log_addr,
+									 &vq->avail->ring[last_avail_idx % vq->num]));
+
+	/* If their number is silly, that's an error. */
+	if (unlikely(head >= vq->num)) {
+		vq_err(vq, "Guest says index %u > %u is available",
+		       head, vq->num);
+		return -EINVAL;
+	}
+
+	/* When we start there are none of either input nor output. */
+	*out_num = *in_num = 0;
+	if (unlikely(log))
+		*log_num = 0;
+
+	i = head;
+	do {
+		unsigned iov_count = *in_num + *out_num;
+		if (unlikely(i >= vq->num)) {
+			vq_err(vq, "Desc index is %u > %u, head = %u",
+			       i, vq->num, head);
+			return -EINVAL;
+		}
+		if (unlikely(++found > vq->num)) {
+			vq_err(vq, "Loop detected: last one at %u "
+			       "vq size %u head %u\n",
+			       i, vq->num, head);
+			return -EINVAL;
+		}
+		memcpy_fromio(&desc, mic_addr_in_host(vq->log_addr, vq->desc + i), sizeof(desc));
+
+		(iov + iov_count)->iov_base = (void *)desc.addr;
+		(iov + iov_count)->iov_len = desc.len;
+		ret = 1;
+		if (desc.flags & VRING_DESC_F_WRITE) {
+			/* If this is an input descriptor,
+			 * increment that count. */
+			*in_num += ret;
+			if (unlikely(log)) {
+				log[*log_num].addr = desc.addr;
+				log[*log_num].len = desc.len;
+				++*log_num;
+			}
+		} else {
+			/* If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors. */
+			if (unlikely(*in_num)) {
+				vq_err(vq, "Descriptor has out after in: "
+				       "idx %d\n", i);
+				return -EINVAL;
+			}
+			*out_num += ret;
+		}
+	} while ((i = next_desc(&desc)) != -1);
+
+	/* On success, increment avail index. */
+	vq->last_avail_idx++;
+
+	/* Assume notifications from guest are disabled at this point,
+	 * if they aren't we would need to update avail_event index. */
+	BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
+	return head;
+}
+
+/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
+void vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n)
+{
+	vq->last_avail_idx -= n;
+}
+
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
+{
+	struct vring_used_elem __user *used;
+
+	/* The virtqueue contains a ring of used buffers.  Get a pointer to the
+	 * next entry in that used ring. */
+	used = &vq->used->ring[vq->last_used_idx % vq->num];
+	iowrite16(head, mic_addr_in_host(vq->log_addr, &used->id));
+	iowrite16(len, mic_addr_in_host(vq->log_addr, &used->len));
+	/* Make sure buffer is written before we update index. */
+	smp_wmb();
+	ioread16(mic_addr_in_host(vq->log_addr, &used->id));
+	iowrite16(vq->last_used_idx + 1, mic_addr_in_host(vq->log_addr, &vq->used->idx));
+
+	vq->last_used_idx++;
+
+	/* If the driver never bothers to signal in a very long while,
+	 * used index might wrap around. If that happens, invalidate
+	 * signalled_used index we stored. TODO: make sure driver
+	 * signals at least once in 2^16 and remove this. */
+	if (unlikely(vq->last_used_idx == vq->signalled_used))
+		vq->signalled_used_valid = false;
+	return 0;
+}
+
+static int __vhost_add_used_n(struct vhost_virtqueue *vq,
+			    struct vring_used_elem *heads,
+			    unsigned count)
+{
+	struct vring_used_elem __user *used;
+	u16 old, new;
+	int start;
+
+	start = vq->last_used_idx % vq->num;
+	used = vq->used->ring + start;
+	memcpy_toio(mic_addr_in_host(vq->log_addr, used), heads, count * sizeof(*used));
+	old = vq->last_used_idx;
+	new = (vq->last_used_idx += count);
+	/* If the driver never bothers to signal in a very long while,
+	 * used index might wrap around. If that happens, invalidate
+	 * signalled_used index we stored. TODO: make sure driver
+	 * signals at least once in 2^16 and remove this. */
+	if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
+		vq->signalled_used_valid = false;
+	return 0;
+}
+
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
+		     unsigned count)
+{
+	int start, n, r;
+
+	start = vq->last_used_idx % vq->num;
+	n = vq->num - start;
+	if (n < count) {
+		r = __vhost_add_used_n(vq, heads, n);
+		if (r < 0)
+			return r;
+		heads += n;
+		count -= n;
+	}
+	r = __vhost_add_used_n(vq, heads, count);
+
+	/* Make sure buffer is written before we update index. */
+	smp_wmb();
+	iowrite16(vq->last_used_idx, mic_addr_in_host(vq->log_addr, &vq->used->idx));
+	return r;
+}
+
+static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+	__u16 old, new;
+	bool v;
+	/* Flush out used index updates. This is paired
+	 * with the barrier that the Guest executes when enabling
+	 * interrupts. */
+	smp_mb();
+
+	if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+	    unlikely(vq->avail_idx == vq->last_avail_idx))
+		return true;
+
+	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+		__u16 flags;
+		flags = ioread16(mic_addr_in_host(vq->log_addr, &vq->avail->flags));
+		return !(flags & VRING_AVAIL_F_NO_INTERRUPT);
+	}
+	old = vq->signalled_used;
+	v = vq->signalled_used_valid;
+	new = vq->signalled_used = vq->last_used_idx;
+	vq->signalled_used_valid = true;
+
+	if (unlikely(!v))
+		return true;
+
+	return false;
+}
+
+/* This actually signals the guest, using eventfd. */
+void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+	/* Signal the Guest tell them we used something up. */
+	if (vq->log_base && vhost_notify(dev, vq))
+		mic_send_virtio_intr((struct _mic_ctx_t *)vq->log_base);
+}
+
+/* And here's the combo meal deal.  Supersize me! */
+void vhost_add_used_and_signal(struct vhost_dev *dev,
+			       struct vhost_virtqueue *vq,
+			       unsigned int head, int len)
+{
+	vhost_add_used(vq, head, len);
+	vhost_signal(dev, vq);
+}
+
+#if 0
+/* multi-buffer version of vhost_add_used_and_signal */
+void vhost_add_used_and_signal_n(struct vhost_dev *dev,
+				 struct vhost_virtqueue *vq,
+				 struct vring_used_elem *heads, unsigned count)
+{
+	vhost_add_used_n(vq, heads, count);
+	vhost_signal(dev, vq);
+}
+#endif
+
+/* OK, now we need to know about added descriptors. */
+bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+	u16 avail_idx;
+	int r;
+	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
+		return false;
+	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
+	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+		r = vhost_update_used_flags(vq);
+		if (r) {
+			vq_err(vq, "Failed to enable notification at %p: %d\n",
+			       &vq->used->flags, r);
+			return false;
+		}
+	}
+	/* They could have slipped one in as we were doing that: make
+	 * sure it's written, then check again. */
+	smp_mb();
+	avail_idx = ioread16(mic_addr_in_host(vq->log_addr, &vq->avail->idx));
+
+	return avail_idx != vq->avail_idx;
+}
+
+/* We don't need to be notified again. */
+void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+	int r;
+	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
+		return;
+	vq->used_flags |= VRING_USED_F_NO_NOTIFY;
+	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+		r = vhost_update_used_flags(vq);
+		if (r)
+			vq_err(vq, "Failed to enable notification at %p: %d\n",
+			       &vq->used->flags, r);
+	}
+}
+#endif
diff --git a/host/vhost/vhost.h b/host/vhost/vhost.h
new file mode 100644
index 0000000..9bb1653
--- /dev/null
+++ b/host/vhost/vhost.h
@@ -0,0 +1,261 @@
+/*
+  This is the exact copy of linux-2.6.32-220.7.1.el6.x86_64/drivers/vhost/vhost.h
+  except for this comment.
+ */
+#ifndef _VHOST_H
+#define _VHOST_H
+
+#include <linux/eventfd.h>
+#ifdef RHEL_RELEASE_CODE
+#include <linux/vhost.h>
+#else
+#include "./linux/vhost.h"
+#endif
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/skbuff.h>
+#include <linux/uio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <asm/atomic.h>
+
+/* This is for zerocopy, used buffer len is set to 1 when lower device DMA
+ * done */
+#define VHOST_DMA_DONE_LEN	1
+#define VHOST_DMA_CLEAR_LEN	0
+
+struct vhost_device;
+
+struct vhost_work;
+typedef void (*vhost_work_fn_t)(struct vhost_work *work);
+
+struct vhost_work {
+	struct list_head	  node;
+	vhost_work_fn_t		  fn;
+	wait_queue_head_t	  done;
+	int			  flushing;
+	unsigned		  queue_seq;
+	unsigned		  done_seq;
+};
+
+/* Poll a file (eventfd or socket) */
+/* Note: there's nothing vhost specific about this structure. */
+struct vhost_poll {
+	poll_table                table;
+	wait_queue_head_t        *wqh;
+	wait_queue_t              wait;
+	struct vhost_work	  work;
+	unsigned long		  mask;
+	struct vhost_dev	 *dev;
+};
+
+void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
+		     unsigned long mask, struct vhost_dev *dev);
+void vhost_poll_start(struct vhost_poll *poll, struct file *file);
+void vhost_poll_stop(struct vhost_poll *poll);
+void vhost_poll_flush(struct vhost_poll *poll);
+void vhost_poll_queue(struct vhost_poll *poll);
+
+struct vhost_log {
+	u64 addr;
+	u64 len;
+};
+
+struct vhost_virtqueue;
+
+struct vhost_ubuf_ref {
+	struct kref kref;
+	wait_queue_head_t wait;
+	struct vhost_virtqueue *vq;
+};
+
+struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy);
+void vhost_ubuf_put(struct vhost_ubuf_ref *);
+void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *);
+
+/* The virtqueue structure describes a queue attached to a device. */
+struct vhost_virtqueue {
+	struct vhost_dev *dev;
+
+	/* The actual ring of buffers. */
+	struct mutex mutex;
+	unsigned int num;
+	struct vring_desc __user *desc;
+	struct vring_avail __user *avail;
+	struct vring_used __user *used;
+	struct file *kick;
+	struct file *call;
+	struct file *error;
+	struct eventfd_ctx *call_ctx;
+	struct eventfd_ctx *error_ctx;
+	struct eventfd_ctx *log_ctx;
+
+	struct vhost_poll poll;
+
+	/* The routine to call when the Guest pings us, or timeout. */
+	vhost_work_fn_t handle_kick;
+
+	/* Last available index we saw. */
+	u16 last_avail_idx;
+
+	/* Caches available index value from user. */
+	u16 avail_idx;
+
+	/* Last index we used. */
+	u16 last_used_idx;
+
+	/* Used flags */
+	u16 used_flags;
+
+	/* Last used index value we have signalled on */
+	u16 signalled_used;
+
+	/* Last used index value we have signalled on */
+	bool signalled_used_valid;
+
+	/* Log writes to used structure. */
+	bool log_used;
+	u64 log_addr;
+
+	struct iovec iov[UIO_MAXIOV];
+	/* hdr is used to store the virtio header.
+	 * Since each iovec has >= 1 byte length, we never need more than
+	 * header length entries to store the header. */
+	struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
+	struct iovec *indirect;
+	size_t vhost_hlen;
+	size_t sock_hlen;
+	struct vring_used_elem *heads;
+	/* We use a kind of RCU to access private pointer.
+	 * All readers access it from worker, which makes it possible to
+	 * flush the vhost_work instead of synchronize_rcu. Therefore readers do
+	 * not need to call rcu_read_lock/rcu_read_unlock: the beginning of
+	 * vhost_work execution acts instead of rcu_read_lock() and the end of
+	 * vhost_work execution acts instead of rcu_read_lock().
+	 * Writers use virtqueue mutex. */
+	void *private_data;
+	/* Log write descriptors */
+	void __user *log_base;
+	struct vhost_log *log;
+	/* vhost zerocopy support fields below: */
+	/* last used idx for outstanding DMA zerocopy buffers */
+	int upend_idx;
+	/* first used idx for DMA done zerocopy buffers */
+	int done_idx;
+	/* an array of userspace buffers info */
+	struct ubuf_info *ubuf_info;
+	/* Reference counting for outstanding ubufs.
+	 * Protected by vq mutex. Writers must also take device mutex. */
+	struct vhost_ubuf_ref *ubufs;
+};
+
+struct vhost_dev {
+	/* Readers use RCU to access memory table pointer
+	 * log base pointer and features.
+	 * Writers use mutex below.*/
+	struct vhost_memory *memory;
+	struct mm_struct *mm;
+	struct mutex mutex;
+	unsigned acked_features;
+	struct vhost_virtqueue *vqs;
+	int nvqs;
+	struct file *log_file;
+	struct eventfd_ctx *log_ctx;
+	spinlock_t work_lock;
+	struct list_head work_list;
+	struct task_struct *worker;
+};
+
+long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
+long vhost_dev_check_owner(struct vhost_dev *);
+long vhost_dev_reset_owner(struct vhost_dev *);
+void vhost_dev_cleanup(struct vhost_dev *);
+long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg);
+int vhost_vq_access_ok(struct vhost_virtqueue *vq);
+int vhost_log_access_ok(struct vhost_dev *);
+
+int vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
+		      struct iovec iov[], unsigned int iov_count,
+		      unsigned int *out_num, unsigned int *in_num,
+		      struct vhost_log *log, unsigned int *log_num);
+void vhost_discard_vq_desc(struct vhost_virtqueue *, int n);
+
+int vhost_init_used(struct vhost_virtqueue *);
+int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
+int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads,
+		     unsigned count);
+void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
+			       unsigned int id, int len);
+void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *,
+			       struct vring_used_elem *heads, unsigned count);
+void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
+void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *);
+bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
+
+int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+		    unsigned int log_num, u64 len);
+void vhost_zerocopy_callback(void *arg);
+int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq);
+
+#define vq_err(vq, fmt, ...) do {                                  \
+		pr_debug(pr_fmt(fmt), ##__VA_ARGS__);       \
+		if ((vq)->error_ctx)                               \
+				eventfd_signal((vq)->error_ctx, 1);\
+	} while (0)
+
+#ifndef __rcu_dereference_index_check
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0))
+#define __rcu_dereference_index_check(p, c) \
+	({ \
+	 typeof(p) _________p1 = ACCESS_ONCE(p); \
+	 rcu_lockdep_assert(c, \
+		 "suspicious rcu_dereference_index_check()" \
+		 " usage"); \
+	 smp_read_barrier_depends(); \
+	 (_________p1); \
+	 })
+#else
+#define __rcu_dereference_index_check(p, c) \
+	({ \
+	 typeof(p) _________p1 = ACCESS_ONCE(p); \
+	 RCU_LOCKDEP_WARN(c, \
+		 "suspicious rcu_dereference_index_check()" \
+		 " usage"); \
+	 smp_read_barrier_depends(); \
+	 (_________p1); \
+	})
+#endif
+#endif
+
+enum {
+	VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
+			 (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+			 (1ULL << VIRTIO_RING_F_EVENT_IDX) |
+			 (1ULL << VHOST_F_LOG_ALL) |
+			 (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
+			 (1ULL << VIRTIO_NET_F_MRG_RXBUF),
+};
+
+static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
+{
+#ifdef RHEL_RELEASE_CODE
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+	unsigned acked_features = rcu_dereference_index_check(dev->acked_features, rcu_read_lock_held());
+#else
+	unsigned acked_features = rcu_dereference(dev->acked_features);
+#endif
+#else
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(4,2,0))
+	unsigned acked_features = rcu_dereference_index_check(dev->acked_features, rcu_read_lock_held());
+#else
+	unsigned acked_features = __rcu_dereference_index_check(dev->acked_features, rcu_read_lock_held());
+#endif
+#endif
+	return acked_features & (1 << bit);
+}
+
+void vhost_enable_zcopy(int vq);
+
+#endif
diff --git a/host/vmcore.c b/host/vmcore.c
new file mode 100644
index 0000000..fb5819d
--- /dev/null
+++ b/host/vmcore.c
@@ -0,0 +1,821 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ *	fs/proc/vmcore.c Interface for accessing the crash
+ * 				 dump from the system's previous life.
+ * 	Heavily borrowed from fs/proc/kcore.c
+ *	Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
+ *	Copyright (C) IBM Corporation, 2004. All rights reserved
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/user.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/crash_dump.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+#include <linux/kcore.h>
+#endif
+#include "mic_common.h"
+
+extern struct proc_dir_entry *vmcore_dir;
+
+/* Stores the physical address of elf header of crash image. */
+unsigned long long elfcorehdr_addr = 0x50e9000;
+
+/**
+ * mic_copy_oldmem_page - copy one page from "oldmem"
+ * @pfn: page frame number to be copied
+ * @buf: target memory address for the copy; this can be in kernel address
+ *	space or user address space (see @userbuf)
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page (based on pfn) to begin the copy
+ * @userbuf: if set, @buf is in user address space, use copy_to_user(),
+ *	otherwise @buf is in kernel address space, use memcpy().
+ *
+ * Copy a page from "oldmem". For this page, there is no pte mapped
+ * in the current kernel. We stitch up a pte, similar to kmap_atomic.
+ */
+ssize_t mic_copy_oldmem_page(mic_ctx_t *mic_ctx,
+		unsigned long pfn, char *buf,
+		size_t csize, unsigned long offset, int userbuf)
+{
+	void  *vaddr, *tmp;
+	int err;
+	struct dma_channel *dma_chan;
+	dma_addr_t mic_dst_phys_addr;
+
+	vaddr = mic_ctx->aper.va + (pfn << PAGE_SHIFT);
+
+	if (!csize)
+		return 0;
+	if (csize == PAGE_SIZE && !offset) {
+		if (!(tmp = (void*)__get_free_pages(GFP_KERNEL, get_order(PAGE_SIZE)))) {
+			printk(KERN_ERR "%s: tmp buffer allocation failed\n", __func__);
+			return -ENOMEM;
+		}
+		mic_dst_phys_addr = mic_ctx_map_single(mic_ctx, tmp, csize);
+		if (mic_map_error(mic_dst_phys_addr)) {
+			printk(KERN_ERR "%s: mic_ctx_map_single failed\n", __func__);
+			free_pages((unsigned long)tmp, get_order(PAGE_SIZE));
+			return -ENOMEM;
+		}
+
+		if ((allocate_dma_channel(mic_ctx->dma_handle, &dma_chan))) {
+			printk(KERN_ERR "%s: allocate_dma_channel failed\n", __func__);
+			mic_ctx_unmap_single(mic_ctx, mic_dst_phys_addr, csize);
+			free_pages((unsigned long)tmp, get_order(PAGE_SIZE));
+			return -EBUSY;
+		}
+
+		err = do_dma(dma_chan,
+				0,
+				pfn << PAGE_SHIFT,
+				mic_dst_phys_addr,
+				csize,
+				NULL);
+		if (err) {
+			printk(KERN_ERR "DMA do_dma err %s %d err %d src 0x%lx "
+				"dst 0x%llx csize 0x%lx\n", 
+				__func__, __LINE__, err, pfn << PAGE_SHIFT, 
+				mic_dst_phys_addr, csize);
+			free_dma_channel(dma_chan);
+			mic_ctx_unmap_single(mic_ctx, mic_dst_phys_addr, csize);
+			free_pages((unsigned long)tmp, get_order(PAGE_SIZE));
+			return err;
+		}
+		free_dma_channel(dma_chan);
+		err = drain_dma_poll(dma_chan);
+		if (err) {
+			printk(KERN_ERR "DMA poll err %s %d err %d src 0x%lx i"
+				"dst 0x%llx csize 0x%lx\n", 
+				__func__, __LINE__, err, pfn << PAGE_SHIFT, 
+				mic_dst_phys_addr, csize);
+			mic_ctx_unmap_single(mic_ctx, mic_dst_phys_addr, csize);
+			free_pages((unsigned long)tmp, get_order(PAGE_SIZE));
+			return err;
+		}
+		if (userbuf) {
+			if (copy_to_user(buf, tmp, csize)) {
+				mic_ctx_unmap_single(mic_ctx, mic_dst_phys_addr, csize);
+				free_pages((unsigned long)tmp, get_order(PAGE_SIZE));
+				return -EFAULT;
+			}
+		} else {
+			memcpy(buf, tmp, csize);
+		}
+		smp_mb();
+		mic_ctx_unmap_single(mic_ctx, mic_dst_phys_addr, csize);
+		free_pages((unsigned long)tmp, get_order(PAGE_SIZE));
+	} else {
+		if (userbuf) {
+			if (copy_to_user(buf, vaddr + offset, csize))
+				return -EFAULT;
+		} else
+			memcpy_fromio(buf, vaddr + offset, csize);
+	}
+	return csize;
+}
+
+/* Reads a page from the oldmem device from given offset. */
+static ssize_t read_from_oldmem(mic_ctx_t *mic_ctx,
+				char *buf, size_t count,
+				u64 *ppos, int userbuf)
+{
+	unsigned long pfn, offset;
+	size_t nr_bytes;
+	ssize_t read = 0, tmp;
+
+	if (!count)
+		return 0;
+
+	offset = (unsigned long)(*ppos % PAGE_SIZE);
+	pfn = (unsigned long)(*ppos / PAGE_SIZE);
+
+	do {
+		if (count > (PAGE_SIZE - offset))
+			nr_bytes = PAGE_SIZE - offset;
+		else
+			nr_bytes = count;
+
+		tmp = mic_copy_oldmem_page(mic_ctx, pfn, buf, nr_bytes, offset, userbuf);
+		if (tmp < 0)
+			return tmp;
+		*ppos += nr_bytes;
+		count -= nr_bytes;
+		buf += nr_bytes;
+		read += nr_bytes;
+		++pfn;
+		offset = 0;
+	} while (count);
+
+	return read;
+}
+
+/* Maps vmcore file offset to respective physical address in memroy. */
+static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list,
+					struct vmcore **m_ptr)
+{
+	struct vmcore *m;
+	u64 paddr;
+
+	list_for_each_entry(m, vc_list, list) {
+		u64 start, end;
+		start = m->offset;
+		end = m->offset + m->size - 1;
+		if (offset >= start && offset <= end) {
+			paddr = m->paddr + offset - start;
+			*m_ptr = m;
+			return paddr;
+		}
+	}
+	*m_ptr = NULL;
+	return 0;
+}
+
+/* Read from the ELF header and then the crash dump. On error, negative value is
+ * returned otherwise number of bytes read are returned.
+ */
+static ssize_t read_vmcore(struct file *file, char __user *buffer,
+				size_t buflen, loff_t *fpos)
+{
+	ssize_t acc = 0, tmp;
+	size_t tsz;
+	u64 start, nr_bytes;
+	struct vmcore *curr_m = NULL;
+	struct inode *inode = file->f_path.dentry->d_inode;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+	mic_ctx_t *mic_ctx = PDE_DATA(inode);
+#else
+	struct proc_dir_entry *entry = PDE(inode);
+	mic_ctx_t *mic_ctx = entry->data;
+#endif
+
+	if (buflen == 0 || *fpos >= mic_ctx->vmcore_size)
+		return 0;
+
+	/* trim buflen to not go beyond EOF */
+	if (buflen > mic_ctx->vmcore_size - *fpos)
+		buflen = mic_ctx->vmcore_size - *fpos;
+
+	/* Read ELF core header */
+	if (*fpos < mic_ctx->elfcorebuf_sz) {
+		tsz = mic_ctx->elfcorebuf_sz - *fpos;
+		if (buflen < tsz)
+			tsz = buflen;
+		if (copy_to_user(buffer, mic_ctx->elfcorebuf + *fpos, tsz))
+			return -EFAULT;
+		buflen -= tsz;
+		*fpos += tsz;
+		buffer += tsz;
+		acc += tsz;
+
+		/* leave now if filled buffer already */
+		if (buflen == 0)
+			return acc;
+	}
+
+	start = map_offset_to_paddr(*fpos, &mic_ctx->vmcore_list, &curr_m);
+	if (!curr_m)
+		return -EINVAL;
+	if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
+		tsz = buflen;
+
+	/* Calculate left bytes in current memory segment. */
+	nr_bytes = (curr_m->size - (start - curr_m->paddr));
+	if (tsz > nr_bytes)
+		tsz = nr_bytes;
+
+	while (buflen) {
+		tmp = read_from_oldmem(mic_ctx,buffer, tsz, &start, 1);
+		if (tmp < 0)
+			return tmp;
+		buflen -= tsz;
+		*fpos += tsz;
+		buffer += tsz;
+		acc += tsz;
+		if (start >= (curr_m->paddr + curr_m->size)) {
+			if (curr_m->list.next == &mic_ctx->vmcore_list)
+				return acc;	/*EOF*/
+			curr_m = list_entry(curr_m->list.next,
+						struct vmcore, list);
+			start = curr_m->paddr;
+		}
+		if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
+			tsz = buflen;
+		/* Calculate left bytes in current memory segment. */
+		nr_bytes = (curr_m->size - (start - curr_m->paddr));
+		if (tsz > nr_bytes)
+			tsz = nr_bytes;
+	}
+	return acc;
+}
+
+static const struct file_operations proc_vmcore_operations = {
+	.read		= read_vmcore,
+};
+
+static struct vmcore* get_new_element(void)
+{
+	return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
+}
+
+static u64 get_vmcore_size_elf64(char *elfptr)
+{
+	int i;
+	u64 size;
+	Elf64_Ehdr *ehdr_ptr;
+	Elf64_Phdr *phdr_ptr;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
+	size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++) {
+		size += phdr_ptr->p_memsz;
+		phdr_ptr++;
+	}
+	return size;
+}
+
+static u64 get_vmcore_size_elf32(char *elfptr)
+{
+	int i;
+	u64 size;
+	Elf32_Ehdr *ehdr_ptr;
+	Elf32_Phdr *phdr_ptr;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
+	size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++) {
+		size += phdr_ptr->p_memsz;
+		phdr_ptr++;
+	}
+	return size;
+}
+
+/* Merges all the PT_NOTE headers into one. */
+static int merge_note_headers_elf64(mic_ctx_t *mic_ctx,
+				char *elfptr, size_t *elfsz,
+				struct list_head *vc_list)
+{
+	int i, nr_ptnote=0, rc=0;
+	char *tmp;
+	Elf64_Ehdr *ehdr_ptr;
+	Elf64_Phdr phdr, *phdr_ptr;
+	Elf64_Nhdr *nhdr_ptr;
+	u64 phdr_sz = 0, note_off;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		int j;
+		void *notes_section;
+		struct vmcore *new;
+		u64 offset, max_sz, sz, real_sz = 0;
+		if (phdr_ptr->p_type != PT_NOTE)
+			continue;
+		nr_ptnote++;
+		max_sz = phdr_ptr->p_memsz;
+		offset = phdr_ptr->p_offset;
+		notes_section = kmalloc(max_sz, GFP_KERNEL);
+		if (!notes_section)
+			return -ENOMEM;
+		rc = read_from_oldmem(mic_ctx, notes_section, max_sz, &offset, 0);
+		if (rc < 0) {
+			kfree(notes_section);
+			return rc;
+		}
+		nhdr_ptr = notes_section;
+		for (j = 0; j < max_sz; j += sz) {
+			if (nhdr_ptr->n_namesz == 0)
+				break;
+			sz = sizeof(Elf64_Nhdr) +
+				((nhdr_ptr->n_namesz + 3) & ~3) +
+				((nhdr_ptr->n_descsz + 3) & ~3);
+			real_sz += sz;
+			nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
+		}
+
+		/* Add this contiguous chunk of notes section to vmcore list.*/
+		new = get_new_element();
+		if (!new) {
+			kfree(notes_section);
+			return -ENOMEM;
+		}
+		new->paddr = phdr_ptr->p_offset;
+		new->size = real_sz;
+		list_add_tail(&new->list, vc_list);
+		phdr_sz += real_sz;
+		kfree(notes_section);
+	}
+
+	/* Prepare merged PT_NOTE program header. */
+	phdr.p_type    = PT_NOTE;
+	phdr.p_flags   = 0;
+	note_off = sizeof(Elf64_Ehdr) +
+			(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr);
+	phdr.p_offset  = note_off;
+	phdr.p_vaddr   = phdr.p_paddr = 0;
+	phdr.p_filesz  = phdr.p_memsz = phdr_sz;
+	phdr.p_align   = 0;
+
+	/* Add merged PT_NOTE program header*/
+	tmp = elfptr + sizeof(Elf64_Ehdr);
+	memcpy(tmp, &phdr, sizeof(phdr));
+	tmp += sizeof(phdr);
+
+	/* Remove unwanted PT_NOTE program headers. */
+	i = (nr_ptnote - 1) * sizeof(Elf64_Phdr);
+	*elfsz = *elfsz - i;
+	memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr)));
+
+	/* Modify e_phnum to reflect merged headers. */
+	ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
+
+	return 0;
+}
+
+/* Merges all the PT_NOTE headers into one. */
+static int merge_note_headers_elf32(mic_ctx_t *mic_ctx,
+			char *elfptr, size_t *elfsz,
+			struct list_head *vc_list)
+{
+	int i, nr_ptnote=0, rc=0;
+	char *tmp;
+	Elf32_Ehdr *ehdr_ptr;
+	Elf32_Phdr phdr, *phdr_ptr;
+	Elf32_Nhdr *nhdr_ptr;
+	u64 phdr_sz = 0, note_off;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		int j;
+		void *notes_section;
+		struct vmcore *new;
+		u64 offset, max_sz, sz, real_sz = 0;
+		if (phdr_ptr->p_type != PT_NOTE)
+			continue;
+		nr_ptnote++;
+		max_sz = phdr_ptr->p_memsz;
+		offset = phdr_ptr->p_offset;
+		notes_section = kmalloc(max_sz, GFP_KERNEL);
+		if (!notes_section)
+			return -ENOMEM;
+		rc = read_from_oldmem(mic_ctx, notes_section, max_sz, &offset, 0);
+		if (rc < 0) {
+			kfree(notes_section);
+			return rc;
+		}
+		nhdr_ptr = notes_section;
+		for (j = 0; j < max_sz; j += sz) {
+			if (nhdr_ptr->n_namesz == 0)
+				break;
+			sz = sizeof(Elf32_Nhdr) +
+				((nhdr_ptr->n_namesz + 3) & ~3) +
+				((nhdr_ptr->n_descsz + 3) & ~3);
+			real_sz += sz;
+			nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
+		}
+
+		/* Add this contiguous chunk of notes section to vmcore list.*/
+		new = get_new_element();
+		if (!new) {
+			kfree(notes_section);
+			return -ENOMEM;
+		}
+		new->paddr = phdr_ptr->p_offset;
+		new->size = real_sz;
+		list_add_tail(&new->list, vc_list);
+		phdr_sz += real_sz;
+		kfree(notes_section);
+	}
+
+	/* Prepare merged PT_NOTE program header. */
+	phdr.p_type    = PT_NOTE;
+	phdr.p_flags   = 0;
+	note_off = sizeof(Elf32_Ehdr) +
+			(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr);
+	phdr.p_offset  = note_off;
+	phdr.p_vaddr   = phdr.p_paddr = 0;
+	phdr.p_filesz  = phdr.p_memsz = phdr_sz;
+	phdr.p_align   = 0;
+
+	/* Add merged PT_NOTE program header*/
+	tmp = elfptr + sizeof(Elf32_Ehdr);
+	memcpy(tmp, &phdr, sizeof(phdr));
+	tmp += sizeof(phdr);
+
+	/* Remove unwanted PT_NOTE program headers. */
+	i = (nr_ptnote - 1) * sizeof(Elf32_Phdr);
+	*elfsz = *elfsz - i;
+	memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr)));
+
+	/* Modify e_phnum to reflect merged headers. */
+	ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
+
+	return 0;
+}
+
+/* Add memory chunks represented by program headers to vmcore list. Also update
+ * the new offset fields of exported program headers. */
+static int process_ptload_program_headers_elf64(char *elfptr,
+						size_t elfsz,
+						struct list_head *vc_list)
+{
+	int i;
+	Elf64_Ehdr *ehdr_ptr;
+	Elf64_Phdr *phdr_ptr;
+	loff_t vmcore_off;
+	struct vmcore *new;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+	phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
+
+	/* First program header is PT_NOTE header. */
+	vmcore_off = sizeof(Elf64_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) +
+			phdr_ptr->p_memsz; /* Note sections */
+
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		if (phdr_ptr->p_type != PT_LOAD)
+			continue;
+
+		/* Add this contiguous chunk of memory to vmcore list.*/
+		new = get_new_element();
+		if (!new)
+			return -ENOMEM;
+		new->paddr = phdr_ptr->p_offset;
+		new->size = phdr_ptr->p_memsz;
+		list_add_tail(&new->list, vc_list);
+
+		/* Update the program header offset. */
+		phdr_ptr->p_offset = vmcore_off;
+		vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+	}
+	return 0;
+}
+
+static int process_ptload_program_headers_elf32(char *elfptr,
+						size_t elfsz,
+						struct list_head *vc_list)
+{
+	int i;
+	Elf32_Ehdr *ehdr_ptr;
+	Elf32_Phdr *phdr_ptr;
+	loff_t vmcore_off;
+	struct vmcore *new;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+	phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
+
+	/* First program header is PT_NOTE header. */
+	vmcore_off = sizeof(Elf32_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) +
+			phdr_ptr->p_memsz; /* Note sections */
+
+	for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+		if (phdr_ptr->p_type != PT_LOAD)
+			continue;
+
+		/* Add this contiguous chunk of memory to vmcore list.*/
+		new = get_new_element();
+		if (!new)
+			return -ENOMEM;
+		new->paddr = phdr_ptr->p_offset;
+		new->size = phdr_ptr->p_memsz;
+		list_add_tail(&new->list, vc_list);
+
+		/* Update the program header offset */
+		phdr_ptr->p_offset = vmcore_off;
+		vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+	}
+	return 0;
+}
+
+/* Sets offset fields of vmcore elements. */
+static void set_vmcore_list_offsets_elf64(char *elfptr,
+						struct list_head *vc_list)
+{
+	loff_t vmcore_off;
+	Elf64_Ehdr *ehdr_ptr;
+	struct vmcore *m;
+
+	ehdr_ptr = (Elf64_Ehdr *)elfptr;
+
+	/* Skip Elf header and program headers. */
+	vmcore_off = sizeof(Elf64_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr);
+
+	list_for_each_entry(m, vc_list, list) {
+		m->offset = vmcore_off;
+		vmcore_off += m->size;
+	}
+}
+
+/* Sets offset fields of vmcore elements. */
+static void set_vmcore_list_offsets_elf32(char *elfptr,
+						struct list_head *vc_list)
+{
+	loff_t vmcore_off;
+	Elf32_Ehdr *ehdr_ptr;
+	struct vmcore *m;
+
+	ehdr_ptr = (Elf32_Ehdr *)elfptr;
+
+	/* Skip Elf header and program headers. */
+	vmcore_off = sizeof(Elf32_Ehdr) +
+			(ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr);
+
+	list_for_each_entry(m, vc_list, list) {
+		m->offset = vmcore_off;
+		vmcore_off += m->size;
+	}
+}
+
+static int parse_crash_elf64_headers(mic_ctx_t *mic_ctx)
+{
+	int rc=0;
+	Elf64_Ehdr ehdr;
+	u64 addr;
+
+	addr = elfcorehdr_addr;
+
+	/* Read Elf header */
+	rc = read_from_oldmem(mic_ctx, (char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0);
+	if (rc < 0)
+		return rc;
+
+	/* Do some basic Verification. */
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
+		(ehdr.e_type != ET_CORE) ||
+#ifdef CONFIG_CRASH_DUMP
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,36))
+		!vmcore_elf64_check_arch(&ehdr) ||
+#else
+		!vmcore_elf_check_arch(&ehdr) ||
+#endif
+#else
+		!elf_check_arch(&ehdr) ||
+#endif
+		ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
+		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
+		ehdr.e_version != EV_CURRENT ||
+		ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
+		ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
+		ehdr.e_phnum == 0) {
+		printk(KERN_WARNING "Warning: Core image elf header is not"
+					"sane\n");
+		return -EINVAL;
+	}
+
+	WARN_ON(mic_ctx->elfcorebuf);
+	/* Read in all elf headers. */
+	mic_ctx->elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr);
+	mic_ctx->elfcorebuf = kmalloc(mic_ctx->elfcorebuf_sz, GFP_KERNEL);
+	if (!mic_ctx->elfcorebuf)
+		return -ENOMEM;
+	addr = elfcorehdr_addr;
+	rc = read_from_oldmem(mic_ctx, mic_ctx->elfcorebuf, mic_ctx->elfcorebuf_sz, &addr, 0);
+	if (rc < 0) {
+		kfree(mic_ctx->elfcorebuf);
+		mic_ctx->elfcorebuf = NULL;
+		return rc;
+	}
+
+	/* Merge all PT_NOTE headers into one. */
+	rc = merge_note_headers_elf64(mic_ctx, mic_ctx->elfcorebuf, &mic_ctx->elfcorebuf_sz, &mic_ctx->vmcore_list);
+	if (rc) {
+		kfree(mic_ctx->elfcorebuf);
+		mic_ctx->elfcorebuf = NULL;
+		return rc;
+	}
+	rc = process_ptload_program_headers_elf64(mic_ctx->elfcorebuf, mic_ctx->elfcorebuf_sz,
+							&mic_ctx->vmcore_list);
+	if (rc) {
+		kfree(mic_ctx->elfcorebuf);
+		mic_ctx->elfcorebuf = NULL;
+		return rc;
+	}
+	set_vmcore_list_offsets_elf64(mic_ctx->elfcorebuf, &mic_ctx->vmcore_list);
+	return 0;
+}
+
+static int parse_crash_elf32_headers(mic_ctx_t *mic_ctx)
+{
+	int rc=0;
+	Elf32_Ehdr ehdr;
+	u64 addr;
+
+	addr = elfcorehdr_addr;
+
+	/* Read Elf header */
+	rc = read_from_oldmem(mic_ctx, (char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0);
+	if (rc < 0)
+		return rc;
+
+	/* Do some basic Verification. */
+	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
+		(ehdr.e_type != ET_CORE) ||
+		!elf_check_arch(&ehdr) ||
+		ehdr.e_ident[EI_CLASS] != ELFCLASS32||
+		ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
+		ehdr.e_version != EV_CURRENT ||
+		ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
+		ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
+		ehdr.e_phnum == 0) {
+		printk(KERN_WARNING "Warning: Core image elf header is not"
+					"sane\n");
+		return -EINVAL;
+	}
+
+	WARN_ON(mic_ctx->elfcorebuf);
+	/* Read in all elf headers. */
+	mic_ctx->elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
+	mic_ctx->elfcorebuf = kmalloc(mic_ctx->elfcorebuf_sz, GFP_KERNEL);
+	if (!mic_ctx->elfcorebuf)
+		return -ENOMEM;
+	addr = elfcorehdr_addr;
+	rc = read_from_oldmem(mic_ctx, mic_ctx->elfcorebuf, mic_ctx->elfcorebuf_sz, &addr, 0);
+	if (rc < 0) {
+		kfree(mic_ctx->elfcorebuf);
+		mic_ctx->elfcorebuf = NULL;
+		return rc;
+	}
+
+	/* Merge all PT_NOTE headers into one. */
+	rc = merge_note_headers_elf32(mic_ctx, mic_ctx->elfcorebuf, &mic_ctx->elfcorebuf_sz, &mic_ctx->vmcore_list);
+	if (rc) {
+		kfree(mic_ctx->elfcorebuf);
+		mic_ctx->elfcorebuf = NULL;
+		return rc;
+	}
+	rc = process_ptload_program_headers_elf32(mic_ctx->elfcorebuf, mic_ctx->elfcorebuf_sz,
+								&mic_ctx->vmcore_list);
+	if (rc) {
+		kfree(mic_ctx->elfcorebuf);
+		mic_ctx->elfcorebuf = NULL;
+		return rc;
+	}
+	set_vmcore_list_offsets_elf32(mic_ctx->elfcorebuf, &mic_ctx->vmcore_list);
+	return 0;
+}
+
+static int parse_crash_elf_headers(mic_ctx_t *mic_ctx)
+{
+	unsigned char e_ident[EI_NIDENT];
+	u64 addr;
+	int rc=0;
+
+	addr = elfcorehdr_addr;
+	rc = read_from_oldmem(mic_ctx, e_ident, EI_NIDENT, &addr, 0);
+	if (rc < 0)
+		return rc;
+	if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
+		printk(KERN_WARNING "Warning: Core image elf header"
+					" not found\n");
+		return -EINVAL;
+	}
+
+	if (e_ident[EI_CLASS] == ELFCLASS64) {
+		rc = parse_crash_elf64_headers(mic_ctx);
+		if (rc)
+			return rc;
+
+		/* Determine vmcore size. */
+		mic_ctx->vmcore_size = get_vmcore_size_elf64(mic_ctx->elfcorebuf);
+	} else if (e_ident[EI_CLASS] == ELFCLASS32) {
+		rc = parse_crash_elf32_headers(mic_ctx);
+		if (rc)
+			return rc;
+
+		/* Determine vmcore size. */
+		mic_ctx->vmcore_size = get_vmcore_size_elf32(mic_ctx->elfcorebuf);
+	} else {
+		printk(KERN_WARNING "Warning: Core image elf header is not"
+					" sane\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Init function for vmcore module. */
+int vmcore_create(mic_ctx_t *mic_ctx)
+{
+	int rc = 0;
+        char name[64];
+	if (!vmcore_dir) {
+		rc = -ENOMEM;
+		return rc;
+	}
+	INIT_LIST_HEAD(&mic_ctx->vmcore_list);
+	rc = parse_crash_elf_headers(mic_ctx);
+	if (rc) {
+		printk(KERN_WARNING "Kdump: vmcore not initialized\n");
+		if (mic_ctx->vmcore_dir) {
+			remove_proc_entry(name, vmcore_dir);
+			mic_ctx->vmcore_dir = NULL;
+		}
+		return rc;
+	}
+	snprintf(name, 64, "mic%d", mic_ctx->bi_id);
+	if (!mic_ctx->vmcore_dir) {
+		mic_ctx->vmcore_dir = proc_create_data(name, S_IRUSR,
+			vmcore_dir, &proc_vmcore_operations, mic_ctx);
+		if (!mic_ctx->vmcore_dir) {
+			printk(KERN_WARNING "Kdump: proc creation for %s failed\n", name);
+			rc = -ENOMEM;
+			return rc;
+		}
+	}
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+#else
+	if (mic_ctx->vmcore_dir)
+		mic_ctx->vmcore_dir->size = mic_ctx->vmcore_size;
+#endif
+	return 0;
+}
diff --git a/include/mic/bootparams.h b/include/mic/bootparams.h
new file mode 100644
index 0000000..2102362
--- /dev/null
+++ b/include/mic/bootparams.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+
+#define MIC_BOOT_PARAM_HEADER_VERSION   8
+
+#define MIC_OS_BOOTSTATUS_SUCCESS		 1
+#define MIC_OS_BOOTSTATUS_BOOT_0		 2 // Initial state of uOS boot
+#define MIC_OS_BOOTSTATUS_ERROR_VERSION_MISMATCH 3
+#define MIC_OS_BOOTSTATUS_ERROR			 4
+
+#define MIC_HOST_DEFAULT		6	// Only value accepted so do not change
+
+#define MIC_ENG_APPLICATION		0
+#define MIC_ENG_PAGING			1
+#define MIC_ENG_VIDEO			2
+#define MIC_ENG_HIGHPRIORITY		3
+#define MIC_ENG_MAX_SUPPORTED_ENGINES	4
+
+struct ringbuf_memdesc
+{
+	uint64_t address;	// Location of the ring buffer
+	uint32_t size;		// size of ring buffer
+	uint32_t reserved;	// pad
+};
+
+struct mic_bootparam
+{
+	uint64_t bp_version;
+
+	union
+	{
+		uint32_t bp_bootstatus;
+		uint64_t bp_reserved;
+	};
+
+	uint64_t bp_vcons_addr;
+	uint64_t bp_vcons_size;
+	uint64_t bp_shdata_addr;
+	uint64_t bp_shdata_size;
+	struct ringbuf_memdesc bp_ringbuf[MIC_ENG_MAX_SUPPORTED_ENGINES];
+
+	uint64_t bp_unused0;
+	uint64_t bp_unused1;
+	uint64_t bp_unused2;
+	uint64_t bp_unused3;
+	uint64_t bp_unused4;
+	uint64_t bp_unused5;
+	uint64_t bp_unused6;
+	uint64_t bp_unused7;
+
+	uint64_t bp_engstate_addr;
+
+	struct ringbuf_memdesc bp_unused8;
+
+	uint64_t bp_unused9;
+	uint64_t bp_unused10;
+	uint64_t bp_unused11;
+
+};
+
+struct host_bootparam
+{
+	uint64_t bp_version; 
+
+	union
+	{
+		uint64_t bp_host_type;
+		uint64_t bp_reserved;
+	};
+
+	uint64_t bp_vcons_addr;
+	uint64_t bp_vcons_size;
+
+	uint64_t bp_unused0;
+
+	uint64_t bp_engstate_addr;
+
+	struct ringbuf_memdesc bp_ringbuf[MIC_ENG_MAX_SUPPORTED_ENGINES];
+
+	uint64_t bp_dmabuf_size[MIC_ENG_MAX_SUPPORTED_ENGINES];
+
+	uint64_t bp_unused1;
+	uint64_t bp_unused2;
+
+	uint64_t bp_aper_size;
+
+	uint8_t  bp_unused3[36];
+	uint64_t bp_unused4;
+
+	struct ringbuf_memdesc bp_unused5;
+
+	uint64_t bp_unused6;
+	uint64_t bp_unused7;
+
+	uint32_t bp_watchdog_timeout;
+};
+
+struct enginestate_mic
+{
+	uint32_t writeOffset __attribute__((aligned(64)));
+	uint32_t lastCompletedFence __attribute__((aligned(64)));
+	uint32_t fenceWhenPreempted __attribute__((aligned(64)));
+	uint32_t preemptOffset __attribute__((aligned(64)));
+};
+
diff --git a/include/mic/compl_buf_ring.h b/include/mic/compl_buf_ring.h
new file mode 100644
index 0000000..4882525
--- /dev/null
+++ b/include/mic/compl_buf_ring.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef COMPL_BUF_RING_H
+#define COMPL_BUF_RING_H
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+#include "mic_dma_md.h"
+#ifndef _MIC_SCIF_
+#include "micscif.h"
+#include "micscif_smpt.h"
+#endif
+#define MAX_POLL_TAIL_READ_RETRIES 20
+
+/*
+ * Assuming read/write to int is atomic
+ * This can't be used as generic ring because of update_tail()
+ * One entry is left in the ring to differentiate between ring being empty and
+ * full
+ */
+struct compl_buf_ring {
+	int head;
+	int tail;
+	int size;
+	uint64_t tail_location;
+	dma_addr_t tail_phys;
+};
+
+/* 
+ * FIXME:
+ * Function calls pci_map_single etc, return type needs to indicate
+ * an error
+ */
+static __always_inline void init_ring(struct compl_buf_ring *ring, int size,
+				int device_num)
+{
+#ifndef _MIC_SCIF_
+	struct pci_dev *pdev;
+#endif
+	ring->head = 0;
+	ring->tail = 0;
+	ring->size = size;
+	ring->tail_location = (uint64_t) kmalloc(sizeof(uint64_t), GFP_ATOMIC);
+	BUG_ON(!ring->tail_location);
+	*(int*)ring->tail_location = -1;
+#ifdef _MIC_SCIF_
+	ring->tail_phys = virt_to_phys((void*)ring->tail_location);
+#else
+	micscif_pci_dev(device_num, &pdev);
+
+	ring->tail_phys = mic_map_single(device_num - 1, pdev, (void *)ring->tail_location,
+					sizeof(uint64_t));
+	if (mic_map_error(ring->tail_phys))
+		printk(KERN_ERR "mic_map returned error please help\n");
+#endif
+}
+
+static __always_inline void uninit_ring(struct compl_buf_ring *ring,
+				int device_num)
+{
+#ifndef _MIC_SCIF_
+	struct pci_dev *pdev;
+#endif
+	ring->head = 0;
+	ring->tail = 0;
+	ring->size = 0;
+#ifndef _MIC_SCIF_
+	micscif_pci_dev(device_num, &pdev);
+	mic_unmap_single(device_num - 1, pdev, ring->tail_phys, sizeof(uint64_t));
+#endif
+	kfree((void *)ring->tail_location);
+}
+
+static __always_inline int incr_rb_index(int cur_index, int ring_size)
+{
+	return((cur_index + 1) % ring_size);
+}
+
+/*
+ * Tail location has the index that has been recently processed by dma engine
+ * But, tail has to point to the index that will be processed next
+ * So increment the tail
+ */
+static __always_inline void update_tail(struct compl_buf_ring *ring, int new_tail)
+{
+	ring->tail = new_tail;
+}
+
+static __always_inline int read_tail(struct compl_buf_ring *ring)
+{
+	return incr_rb_index(*(volatile int*)ring->tail_location, ring->size);
+}
+
+/*
+ * This fn. assumes no one else is updating head
+ * Returns - avaliable space
+ * 0 - if no space is available
+ */
+static __always_inline bool avail_space_in_ring(struct compl_buf_ring *ring)
+{
+	int count = 0, max_num_retries = MAX_POLL_TAIL_READ_RETRIES, num_retries = 0;
+	int head = ring->head, tail = ring->tail;
+retry:
+	if (head > tail)
+		count = (tail - 0) + (ring->size - head);
+	else if (tail > head)
+		count = tail - head;
+	else
+		return ring->size - 1;
+
+	if (1 != count)
+		return count - 1;
+
+	num_retries++;
+	if (num_retries == max_num_retries)
+		return 0;
+	cpu_relax();
+
+	ring->tail = read_tail(ring);
+	tail = ring->tail;
+
+	goto retry;
+}
+
+/*
+ * Used for polling
+ */
+static __always_inline bool is_entry_processed(struct compl_buf_ring *ring, int index)
+{
+	int head = ring->head, tail = ring->tail;
+	if (head < tail) {
+		if (index >= head && index < tail)
+			return 1;
+	} else {
+		if (index >= head || index < tail)
+			return 1;
+	}
+	return 0;
+}
+
+static __always_inline void incr_head(struct compl_buf_ring *ring)
+{
+	ring->head = incr_rb_index(ring->head, ring->size);
+}
+
+/*
+ * This function is not reentrant
+ * It is expected that the user of this func, will call incr_head() if allocated
+ * buffer is used
+ */
+static __always_inline int allocate_buffer(struct compl_buf_ring *ring)
+{
+	if (avail_space_in_ring(ring))
+		return ring->head;
+	else
+		return -1;
+}
+#endif
diff --git a/include/mic/io_interface.h b/include/mic/io_interface.h
new file mode 100644
index 0000000..755a381
--- /dev/null
+++ b/include/mic/io_interface.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* Contains common definitions for Windows and Linux IO Interface */
+
+#ifndef __IO_INTERFACE_H__
+#define __IO_INTERFACE_H__
+
+/*
+ * The host driver exports sysfs entries in
+ *	/sys/class/mic/micX/
+ * The "/sys/class/mic/micX/state" entry reflects the state of the
+ * card as it transitions from hardware reset through booting an image
+ *
+ * All the other entries have valid values when the state entry is either
+ * "ready" or "online"
+ */
+
+/*
+ * -----------------------------------------
+ * IOCTL interface information
+ * -----------------------------------------
+ */
+
+#define IOCTL_FLASHCMD		_IOWR('c', 5, struct ctrlioctl_flashcmd *)
+#define IOCTL_CARDMEMCPY	_IOWR('c', 8, struct ctrlioctl_cardmemcpy *)
+
+typedef enum _product_knc_stepping_t
+{
+	KNC_A_STEP,
+	KNC_B0_STEP,
+	KNC_C_STEP,
+	KNC_B1_STEP
+} product_knc_stepping_t;
+
+typedef enum {
+	FLASH_CMD_ABORT,
+	FLASH_CMD_READ,
+	FLASH_CMD_WRITE,
+	FLASH_CMD_VERSION,
+	RAS_CMD,
+	RAS_CMD_INJECT_REPAIR,
+	RAS_CMD_CORE_DISABLE,
+	RAS_CMD_CORE_ENABLE,
+	RAS_CMD_ECC_DISABLE = 0xD,
+	RAS_CMD_ECC_ENABLE = 0xE,
+	RAS_CMD_EXIT = 0xF,
+	/* Driver only commands that are not passed to RASMM */
+	FLASH_CMD_READ_DATA,
+	FLASH_CMD_STATUS,
+} MIC_FLASH_CMD_TYPE;
+
+/**
+ * struct ctrlioctl_flashcmd:
+ *
+ * \param brdnum			board for which IOCLT is requested
+ * \param type				arguments needed for the uos escape call
+ * \param data				size of escape arguments
+ * \param len				uos escape opecode
+ *
+ * This structure is used for IOCTL_FLASHCMD.
+ *
+ * This IOCTL can only be issued when /sys/class/mic/mic0/state returns "online"
+ * after it has been set to "boot:flash"
+ */
+struct ctrlioctl_flashcmd {
+	uint32_t brdnum;
+	MIC_FLASH_CMD_TYPE type;
+	void *data;
+	uint32_t len;
+};
+
+
+/*
+ * IN/OUT structure used by MIC_FLASH_CMD_TYPE FLASH_CMD_VERSION
+ * This structure is passed in as data in above command
+ */
+#define MAX_FLASH_VER_STRLEN 16
+struct version_struct {
+	uint16_t	hdr_ver;
+	uint16_t	odm_ver;//revision for ODM change for flash
+	uint64_t	upd_time_bcd;
+	uint8_t		upd_ver[MAX_FLASH_VER_STRLEN]; // 16 bytes for flash version
+	uint64_t	mfg_time_bcd;
+	uint8_t		mfg_ver[MAX_FLASH_VER_STRLEN]; // 16 bytes for flash version
+};
+
+/*
+ * status values returned in MIC_FLASH_CMD_TYPE FLASH_CMD_STATUS
+ */
+typedef enum {
+	FLASH_IDLE,
+	FLASH_CMD_IN_PROGRESS,
+	FLASH_CMD_COMPLETED,
+	FLASH_CMD_FAILED,
+	FLASH_CMD_AUTH_FAILED,
+	FLASH_SMC_CMD_IN_PROGRESS,
+	FLASH_SMC_CMD_COMPLETE,
+	FLASH_SMC_CMD_FAILED,
+	FLASH_SMC_CMD_AUTH_FAILED,
+	FLASH_CMD_INVALID = 0xF,
+} MIC_FLASH_STATUS;
+
+struct flash_stat {
+	MIC_FLASH_STATUS status;
+	uint32_t percent;
+	uint32_t smc_status;
+	uint32_t cmd_data;
+	uint32_t mm_debug;
+};
+
+typedef enum {
+	DBOX,
+	SBOX,
+} MMIO_REGISTER_TYPE;
+
+/**
+ * struct ctrlioctl_cardmemcpy:
+ *
+ * \param brdnum			board for which IOCLT is requested
+ * \param start		        card side physical address from which the copy will start
+ * \param size		        offset of the register from data is to be read
+ * \param dest	            user buffer in which data is to be copied
+ *
+ * This structure is used for IOCTL_MMIOREAD.
+ */
+struct ctrlioctl_cardmemcpy {
+	uint32_t brdnum;
+	uint64_t start;
+	uint64_t size;
+	void *dest;
+};
+
+/*
+ * FIXME:: All the typedefines and structures below and their references need
+ * to be cleaned up from the driver code
+ *---------------------------------------------------------------------------
+ */
+
+typedef enum _product_family_t
+{
+	FAMILY_UNKNOWN = 0,
+	FAMILY_ABR,
+	FAMILY_KNC
+} product_family_t;
+
+typedef enum {
+	USAGE_MODE_NORMAL = 0,
+	USAGE_MODE_MAINTENANCE,
+	USAGE_MODE_ZOMBIE,
+	USAGE_MODE_MEMDIAG,
+	USAGE_MODE_NORMAL_RESTRICTED,
+	USAGE_MODE_NOP,
+	USAGE_MODE_MAX,
+
+} CARD_USAGE_MODE;
+
+/*
+ * SBOX register definitions
+ * TODO: Remove the bit fields and replace them with bitwise operators
+ */
+typedef union sbox_scratch1_reg {
+	uint32_t value;
+	struct  {
+		uint32_t percent : 7;
+		uint32_t status : 4;
+		uint32_t command : 4;
+		uint32_t smc_status : 4;
+		uint32_t reserved : 5;
+		uint32_t cmd_data : 7;
+		uint32_t mm_debug : 1;
+	} bits;
+} sbox_scratch1_reg_t;
+
+typedef union sbox_scratch2_reg {
+	uint32_t value;
+	struct  {
+		uint32_t bootstrap_ready : 1;
+		uint32_t bsp_apic_id : 9;
+		uint32_t reserved : 2;
+		uint32_t image_addr : 20;
+	} bits;
+} sbox_scratch2_reg_t;
+
+#endif //!__IO_INTERFACE_H__
diff --git a/include/mic/mic_dma_api.h b/include/mic/mic_dma_api.h
new file mode 100644
index 0000000..f9caffa
--- /dev/null
+++ b/include/mic/mic_dma_api.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MIC_DMA_API_H
+#define MIC_DMA_API_H
+
+struct dma_channel;
+/* API exported by the DMA library */
+
+/*
+ * Per MIC device (per MIC card) DMA handle. The card opens the handle to its own device.
+ * The host opens the handle to the DMA devices of one of the cards.
+ */
+typedef void * mic_dma_handle_t; 
+
+/* DMA Library Init/Uninit Routines */
+int open_dma_device(int device_num, uint8_t *mmio_va_base, mic_dma_handle_t* dma_handle);
+
+void close_dma_device(int device_num, mic_dma_handle_t *dma_handle);
+
+/*
+ * reserve_dma_channel - reserve a given dma channel for exclusive use
+ *
+ * @dma_handle - handle to DMA device returned by open_dma_device
+ * @chan_num   - Channel number to be reserved
+ * @chan       - set to point to the dma channel reserved by the call
+ *
+ * Returns < 1 on error (errorno)
+ * Returns 0 on success
+ */
+int reserve_dma_channel(mic_dma_handle_t dma_handle, int chan_num, struct dma_channel **chan);
+
+/*
+ * allocate_dma_channel - dynamically allocate a dma channel (for a short while). Will 
+ *    search for, choose, and lock down one channel for use by the calling thread.
+ *
+ * @dma_handle - handle to DMA device returned by open_dma_device
+ * @chan - Returns the dma_channel pointer that was allocated by the call
+ *
+ * Returns < 1 on error
+ * Returns 0 on success
+ *
+ * NOTE:  This function grabs a lock before exiting -- the calling thread MUST NOT
+ *  sleep, and must call free_dma_channel before returning to user-space or switching
+ *  volantarily to another thread.  Similarly, this function cannot be called from 
+ *  an interrupt context at this time.
+ */
+int allocate_dma_channel(mic_dma_handle_t dma_handle, struct dma_channel **chan);
+
+/*
+ * request_dma_channel - Request a specific DMA channel.
+ *
+ * @dma_handle - handle to DMA device returned by open_dma_device
+ * @chan - Returns the dma_channel pointer that was requested
+ *
+ * Returns: 0 on success and -ERESTARTSYS if the wait was interrupted
+ * or -EBUSY if the channel was not available.
+ *
+ * NOTE:  This function grabs a lock before exiting -- the calling thread MUST NOT
+ * sleep, and must call free_dma_channel before returning to user-space or switching
+ * volantarily to another thread.  Similarly, this function cannot be called from
+ * an interrupt context at this time.
+ */
+int request_dma_channel(struct dma_channel *chan);
+
+/*
+ * free_dma_channel - after allocating a channel, used to 
+ *                 free the channel after DMAs are submitted
+ *
+ * @chan       - pointer to the dma_channel struct that was allocated
+ *
+ * Returns 0 on success, < 1 on error (errorno) 
+ *
+ * NOTE: This function must be called after all do_dma calls are finished,
+ *  but can be called before the DMAs actually complete (as long as the comp_cb()
+ *  handler in do_dma don't refer to the dma_channel struct).  If called with a
+ *  dynamically allocated dma_channel, the caller must be the thread that called
+ *  allocate_dma_channel.  When operating on a dynamic channel, free unlocks the 
+ *  mutex locked in allocate.  Statically allocated channels cannot be freed,
+ *  and calling this function with that type of channel will return an error.
+ */
+int free_dma_channel(struct dma_channel *chan);
+
+/*
+ * drain_dma_poll - Drain all outstanding DMA operations for a particular
+ * DMA channel via polling.
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int drain_dma_poll(struct dma_channel *chan);
+
+/*
+ * drain_dma_intr - Drain all outstanding DMA operations for a particular
+ * DMA channel via interrupt based blocking wait.
+ * @chan - DMA channel
+ * Return 0 on success and -errno on error.
+ */
+int drain_dma_intr(struct dma_channel *chan);
+
+/*
+ * drain_dma_global - Drain all outstanding DMA operations for
+ * all online DMA channel.
+ * @block - Is it okay to block while operations are drained?
+ * Return 0 on success and -errno on error.
+ */
+int drain_dma_global(mic_dma_handle_t dma_handle);
+
+#ifdef _MIC_SCIF_
+/*
+ * dma_suspend: DMA tasks before transition to low power state.
+ * @dma_handle: Handle for a DMA driver context.
+ */
+void dma_suspend(mic_dma_handle_t dma_handle);
+
+/*
+ * dma_resume: DMA tasks after wake up from low power state.
+ * @dma_handle: Handle for a DMA driver context.
+ */
+void dma_resume(mic_dma_handle_t dma_handle);
+#else
+/*
+ * dma_prep_suspend: DMA tasks required on host before a device can transition
+ * to a low power state.
+ * @dma_handle: Handle for a DMA driver context.
+ */
+void dma_prep_suspend(mic_dma_handle_t dma_handle);
+#endif
+
+static inline void mic_dma_thread_free_chan(struct dma_channel *chan)
+{
+	free_dma_channel(chan);
+}
+#ifndef _MIC_SCIF_
+//extern struct mutex lock_dma_dev_init;
+void host_dma_interrupt_handler(mic_dma_handle_t dma_handle, uint32_t sboxSicr0Reg);
+#endif
+
+#endif /* MIC_DMA_API_H */
diff --git a/include/mic/mic_dma_lib.h b/include/mic/mic_dma_lib.h
new file mode 100644
index 0000000..7b7d30a
--- /dev/null
+++ b/include/mic/mic_dma_lib.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MIC_DMA_LIB_H
+#define MIC_DMA_LIB_H
+
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+
+/* Program SUD for poll ring */
+#define DO_DMA_POLLING	(1<<0)
+/* Program SUD for interrupt ring */
+#define DO_DMA_INTR	(1<<1)
+
+struct dma_channel;
+
+struct dma_completion_cb {
+	void (*dma_completion_func) (uint64_t cookie);
+	uint64_t cb_cookie;
+	uint8_t *temp_buf;
+	uint8_t *temp_buf_to_free;
+	bool is_cache;
+	uint64_t dst_offset;
+	uint64_t tmp_offset;
+	struct reg_range_t *dst_window;
+	size_t len;
+	dma_addr_t temp_phys;
+	int remote_node;
+	int header_padding;
+};
+
+int get_chan_num(struct dma_channel *chan);
+/*
+ * do_dma - main dma function:  perform a dma memcpy, len bytes from src to dst
+ *
+ * @chan    - DMA channel to use for the transfer.  The channel can be allocated 
+ *            dynamically by calling allocate_dma_channel, or statically by 
+ *            reserve_dma_channel.  Using a channel not allocated in this way will
+ *            result in undefined behavior.
+ * @flags   - ATOMIC, called from an interrupt context (no blocking)
+ * @src     - src physical address
+ * @dst     - dst physical address
+ * @len     - Length of the dma
+ * @comp_cb - When the DMA is complete, the struct's function will be called.  NOTE!
+ *            comp_cb(cb_cookie) is called from an interrupt context, so the
+ *            function must not sleep or block.
+ *
+ * Return < 1 on error
+ * Return 0 on success and DMA is completed
+ * Return > 1: DMA has been queued.  Return value can be polled on for completion
+ *   (poll cookie).  An example (simplified w/ no error handling).
+ *              int cookie = do_dma(...);
+ *              while (poll_dma_completion(cookie) == 0);
+ *              printf("DMA now complete\n");
+ */
+int do_dma(struct dma_channel *chan, int flags,
+		uint64_t src, uint64_t dst, size_t len,
+		struct dma_completion_cb *comp_cb);
+/*
+ * poll_dma_completion - check if a DMA is complete
+ *
+ * @poll_cookie - value returned from do_dma
+ *
+ * Returns
+ * < 0 -> error (e.g., invalid cookie)
+ * 0 -> DMA pending 
+ * 1 -> DMA completed 
+ *
+ * Note: This is mostly useful after calling do_dma with a NULL comp_cb parameter, as
+ *  it will allow the caller to wait for DMA completion.
+ */
+int poll_dma_completion(int poll_cookie, struct dma_channel *chan);
+
+/*
+ * do_status_update: Update physical address location with the value provided.
+ *		Ensures all previous DMA descriptors submitted on this DMA
+ *		channel are executed.
+ * @chan    - DMA channel to use for the transfer. The channel can be allocated
+ *            dynamically by calling allocate_dma_channel, or statically by
+ *            reserve_dma_channel. Using a channel not allocated in this way will
+ *            result in undefined behavior.
+ * @phys    - physical address
+ * @value   - Value to be programmed
+ *
+ * Return 0 on success and appropriate error value on error.
+ */
+int do_status_update(struct dma_channel *chan, uint64_t phys, uint64_t value);
+
+/*
+ * get_dma_mark: Obtain current value of DMA mark
+ * @chan - DMA channel to use for the transfer. The channel can be allocated
+ *            dynamically by calling allocate_dma_channel, or statically by
+ *            reserve_dma_channel. Using a channel not allocated in this way will
+ *            result in undefined behavior.
+ *
+ * Return mark.
+ */
+int get_dma_mark(struct dma_channel *chan);
+
+/*
+ * is_current_dma_mark: Check if the dma mark provided is the current DMA mark.
+ * @chan - DMA channel
+ * @mark - DMA mark
+ *
+ * Return true on success and false on failure.
+ */
+bool is_current_dma_mark(struct dma_channel *chan, int mark);
+
+/*
+ * program_dma_mark: Increment the current value of the DMA mark for a DMA channel
+ * and program an interrupt status update descriptor which ensures that all DMA
+ * descriptors programmed until this point in time are completed.
+ * @chan - DMA channel to use for the transfer. The channel can be allocated
+ *            dynamically by calling allocate_dma_channel, or statically by
+ *            reserve_dma_channel. Using a channel not allocated in this way will
+ *            result in undefined behavior.
+ *
+ * Return mark upon success and appropriate negative error value on error.
+ */
+int program_dma_mark(struct dma_channel *chan);
+
+/*
+ * is_dma_mark_wait: Check if the dma mark provided has been processed.
+ * @chan - DMA channel
+ * @mark - DMA mark
+ *
+ * Return true on success and false on failure.
+ */
+bool is_dma_mark_processed(struct dma_channel *chan, int mark);
+
+/*
+ * dma_mark_wait: Wait for the dma mark to complete.
+ * @chan - DMA channel
+ * @mark - DMA mark
+ * @is_interruptible - Use wait_event_interruptible() or not.
+ *
+ * Return 0 on success and appropriate error value on error.
+ */
+int dma_mark_wait(struct dma_channel *chan, int mark, bool is_interruptible);
+
+#ifndef _MIC_SCIF_
+void host_dma_lib_interrupt_handler(struct dma_channel *chan);
+#endif
+
+#endif /* MIC_DMA_LIB_H */
diff --git a/include/mic/mic_dma_md.h b/include/mic/mic_dma_md.h
new file mode 100644
index 0000000..bc8af28
--- /dev/null
+++ b/include/mic/mic_dma_md.h
@@ -0,0 +1,462 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MIC_DMA_MD_H
+#define MIC_DMA_MD_H
+
+#include "mic_sbox_md.h"
+#include "micsboxdefine.h"
+
+#define MAX_NUM_DMA_CHAN 8
+/*
+ * WE ASSUME 0 to __LAST_HOST_CHAN_NUM are owned by host
+ * Keep this in mind when changing this value
+ */
+#define __LAST_HOST_CHAN_NUM	3
+
+#ifdef _MIC_SCIF_
+static inline int first_dma_chan(void)
+{
+	return __LAST_HOST_CHAN_NUM + 1;
+}
+
+static inline int last_dma_chan(void)
+{
+	return MAX_NUM_DMA_CHAN - 1;
+}
+#else
+static inline int first_dma_chan(void)
+{
+	return 0;
+}
+
+static inline int last_dma_chan(void)
+{
+	return __LAST_HOST_CHAN_NUM;
+}
+#endif
+enum md_mic_dma_chan_reg {
+	REG_DCAR = 0,
+	REG_DHPR,
+	REG_DTPR,
+	REG_DAUX_HI,
+	REG_DAUX_LO,
+	REG_DRAR_HI,
+	REG_DRAR_LO,
+	REG_DITR,
+	REG_DSTAT,
+	REG_DSTATWB_LO,
+	REG_DSTATWB_HI,
+	REG_DCHERR,
+	REG_DCHERRMSK,
+};
+
+
+/* Pre-defined L1_CACHE_SHIFT is 6 on RH and 7 on Suse */
+#undef L1_CACHE_SHIFT
+#define L1_CACHE_SHIFT	6
+#undef L1_CACHE_BYTES
+#define L1_CACHE_BYTES	(1 << L1_CACHE_SHIFT)
+
+enum dma_chan_flags {
+	CHAN_AVAILABLE = 2,
+	CHAN_INUSE = 3
+};
+
+/* Maximum DMA transfer size for a single memory copy descriptor */
+#define MIC_MAX_DMA_XFER_SIZE  (((1U) * 1024 * 1024) - L1_CACHE_BYTES)
+
+/* TODO:
+ * I think it should be 128K - 64 (even 128k - 4 may work).
+ * SIVA: Check this in the end
+ */
+/*
+ * The maximum number of descriptors in the DMA descriptor queue is
+ * 128K - 1 but since it needs to be a multiple of cache lines it is 128K - 64
+ */
+#define MIC_MAX_NUM_DESC_PER_RING	((128 * 1024) - L1_CACHE_BYTES)
+
+/**
+ * enum md_mic_dma_chan_owner - Memory copy DMA channels can be Host or MIC owned.
+ *			AES channel can only be MIC owned.
+ */
+enum md_mic_dma_chan_owner {
+	MIC_DMA_CHAN_MIC_OWNED = 0,
+	MIC_DMA_CHAN_HOST_OWNED
+};
+
+/**
+ * enum md_mic_dma_aes_endianness - Endianness needs to be provided
+ *				only for the AES channel
+ */
+enum md_mic_dma_aes_endianness {
+	/*
+	 * The following two bits are opposite of what is given in
+	 * content protection HAS but this is how it is implemented in RTL.
+	 */
+	MIC_BIG_ENDIAN = 0,
+	MIC_LITTLE_ENDIAN
+};
+
+
+/**
+ * struct md_mic_dma_chan - Opaque data structure for DMA channel specific fields.
+ */
+/*
+ * struct md_mic_dma_chan: DMA channel specific structure
+ * @in_use - true if the channel is in use and false otherwise
+ * @owner - host or MIC required for masking/unmasking
+ *		interrupts and enabling channels
+ * @endianness - required for enabling AES channel
+ * @cookie - Debug cookie to identify this structure
+ * @num_desc_in_ring - Number of descriptors in the descriptor
+ *                     ring for this channel.
+ */
+struct md_mic_dma_chan {
+	int ch_num;
+	atomic_t in_use;
+	enum md_mic_dma_chan_owner owner;
+	enum md_mic_dma_aes_endianness endianness;
+	int cookie;
+	uint32_t num_desc_in_ring;
+	uint32_t cached_tail;
+	uint32_t completion_count;
+	void *dstat_wb_loc;
+	dma_addr_t dstat_wb_phys;
+	/* Add debug/profiling stats here */
+};
+
+
+/*
+ * struct mic_dma_device - MIC DMA Device specific structure
+ * @chan_info - static array of MIC DMA channel specific structures
+ * @lock - MTX_DEF lock to synchronize allocation/deallocation of DMA channels
+ */
+struct mic_dma_device {
+	struct md_mic_dma_chan chan_info[MAX_NUM_DMA_CHAN];
+	void *mm_sbox;
+};
+
+
+/**
+ * union md_mic_dma_desc - Opaque data structure for DMA descriptor format.
+ */
+/* TODO: Change bitfields to portable masks */
+union md_mic_dma_desc {
+	union {
+		struct {
+			uint64_t rsvd0;
+			uint64_t rsvd1:60;
+			uint64_t type:4;
+		} nop;
+		struct {
+			uint64_t sap:40;
+			uint64_t index:3;
+			uint64_t rsvd0:3;
+			uint64_t length:14;
+			uint64_t rsvd1:4;
+			uint64_t dap:40;
+			uint64_t resd:15;
+			uint64_t twb:1;
+			uint64_t intr:1;
+			uint64_t c:1;
+			uint64_t co:1;
+			uint64_t ecy:1;
+			uint64_t type:4;
+		} memcopy;
+		struct {
+			uint64_t data;
+			uint64_t dap:40;
+			uint64_t rsvdr0:19;
+			uint64_t intr:1;
+			uint64_t type:4;
+		} status;
+		struct {
+			uint64_t data:32;
+			uint64_t rsvd0:32;
+			uint64_t dap:40;
+			uint64_t rsvd1:20;
+			uint64_t type:4;
+		} general;
+		struct {
+			uint64_t data;
+			uint64_t rsvd0:53;
+			uint64_t cs:1;
+			uint64_t index:3;
+			uint64_t h:1;
+			uint64_t sel:2;
+			uint64_t type:4;
+		} keynoncecnt;
+		struct {
+			uint64_t skap:40;
+			uint64_t ski:3;
+			uint64_t rsvd0:21;
+			uint64_t rsvd1:51;
+			uint64_t di:3;
+			uint64_t rsvd2:6;
+			uint64_t type:4;
+		} key;
+	} desc;
+	struct {
+		uint64_t qw0;
+		uint64_t qw1;
+	} qwords;
+};
+
+/* Initialization functions */
+void md_mic_dma_init(struct mic_dma_device *dma_dev, uint8_t *mmio_va_base);
+void md_mic_dma_uninit(struct mic_dma_device *dma_dev);
+void md_mic_dma_chan_init_attr(struct mic_dma_device *dma_dev,
+				      struct md_mic_dma_chan *chan);
+void md_mic_dma_chan_mask_intr(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan);
+void md_mic_dma_chan_unmask_intr(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan);
+void md_mic_dma_chan_set_desc_ring(struct mic_dma_device *dma_dev,
+				   struct md_mic_dma_chan *chan,
+				   phys_addr_t desc_ring_phys_addr,
+				   uint32_t num_desc);
+void md_mic_dma_enable_chan(struct mic_dma_device *dma_dev, uint32_t chan_num, bool enable);
+/* API */
+struct md_mic_dma_chan *md_mic_dma_request_chan(struct mic_dma_device *dma_dev,
+						enum md_mic_dma_chan_owner owner);
+void md_mic_dma_free_chan(struct mic_dma_device *dma_dev, 
+			  struct md_mic_dma_chan *chan);
+
+static uint32_t mic_dma_reg[8][13] = {
+	{SBOX_DCAR_0, SBOX_DHPR_0, SBOX_DTPR_0, SBOX_DAUX_HI_0, SBOX_DAUX_LO_0, SBOX_DRAR_HI_0,
+	 SBOX_DRAR_LO_0, SBOX_DITR_0, SBOX_DSTAT_0,
+	 SBOX_DSTATWB_LO_0, SBOX_DSTATWB_HI_0, SBOX_DCHERR_0, SBOX_DCHERRMSK_0},
+	{SBOX_DCAR_1, SBOX_DHPR_1, SBOX_DTPR_1, SBOX_DAUX_HI_1, SBOX_DAUX_LO_1, SBOX_DRAR_HI_1,
+	 SBOX_DRAR_LO_1, SBOX_DITR_1, SBOX_DSTAT_1,
+	 SBOX_DSTATWB_LO_1, SBOX_DSTATWB_HI_1, SBOX_DCHERR_1, SBOX_DCHERRMSK_1},
+	{SBOX_DCAR_2, SBOX_DHPR_2, SBOX_DTPR_2, SBOX_DAUX_HI_2, SBOX_DAUX_LO_2, SBOX_DRAR_HI_2,
+	 SBOX_DRAR_LO_2, SBOX_DITR_2, SBOX_DSTAT_2,
+	 SBOX_DSTATWB_LO_2, SBOX_DSTATWB_HI_2, SBOX_DCHERR_2, SBOX_DCHERRMSK_2},
+	{SBOX_DCAR_3, SBOX_DHPR_3, SBOX_DTPR_3, SBOX_DAUX_HI_3, SBOX_DAUX_LO_3, SBOX_DRAR_HI_3,
+	 SBOX_DRAR_LO_3, SBOX_DITR_3, SBOX_DSTAT_3,
+	 SBOX_DSTATWB_LO_3, SBOX_DSTATWB_HI_3, SBOX_DCHERR_3, SBOX_DCHERRMSK_3},
+	{SBOX_DCAR_4, SBOX_DHPR_4, SBOX_DTPR_4, SBOX_DAUX_HI_4, SBOX_DAUX_LO_4, SBOX_DRAR_HI_4,
+	 SBOX_DRAR_LO_4, SBOX_DITR_4, SBOX_DSTAT_4,
+	 SBOX_DSTATWB_LO_4, SBOX_DSTATWB_HI_4, SBOX_DCHERR_4, SBOX_DCHERRMSK_4},
+	{SBOX_DCAR_5, SBOX_DHPR_5, SBOX_DTPR_5, SBOX_DAUX_HI_5, SBOX_DAUX_LO_5, SBOX_DRAR_HI_5,
+	 SBOX_DRAR_LO_5, SBOX_DITR_5, SBOX_DSTAT_5,
+	 SBOX_DSTATWB_LO_5, SBOX_DSTATWB_HI_5, SBOX_DCHERR_5, SBOX_DCHERRMSK_5},
+	{SBOX_DCAR_6, SBOX_DHPR_6, SBOX_DTPR_6, SBOX_DAUX_HI_6, SBOX_DAUX_LO_6, SBOX_DRAR_HI_6,
+	 SBOX_DRAR_LO_6, SBOX_DITR_6, SBOX_DSTAT_6,
+	 SBOX_DSTATWB_LO_6, SBOX_DSTATWB_HI_6, SBOX_DCHERR_6, SBOX_DCHERRMSK_6},
+	{SBOX_DCAR_7, SBOX_DHPR_7, SBOX_DTPR_7, SBOX_DAUX_HI_7, SBOX_DAUX_LO_7, SBOX_DRAR_HI_7,
+	 SBOX_DRAR_LO_7, SBOX_DITR_7, SBOX_DSTAT_7,
+	 SBOX_DSTATWB_LO_7, SBOX_DSTATWB_HI_7, SBOX_DCHERR_7, SBOX_DCHERRMSK_7}
+};
+
+static __always_inline uint32_t
+md_mic_dma_read_mmio(struct mic_dma_device *dma_dev,
+		int chan, enum md_mic_dma_chan_reg reg)
+{
+	return mic_sbox_read_mmio(dma_dev->mm_sbox, mic_dma_reg[chan][reg]);
+}
+
+static __always_inline void
+md_mic_dma_write_mmio(struct mic_dma_device *dma_dev, int chan,
+		enum md_mic_dma_chan_reg reg, uint32_t value)
+{
+	mic_sbox_write_mmio(dma_dev->mm_sbox, mic_dma_reg[chan][reg], value);
+}
+
+#ifdef DEBUG
+#ifndef KASSERT
+#define KASSERT(x, y, ...)		\
+	do {				\
+		if(!x)			\
+			printk(y, ##__VA_ARGS__);\
+		BUG_ON(!x);		\
+	} while(0)
+#endif
+#define CHECK_CHAN(chan)							\
+	do {									\
+		KASSERT((chan), "NULL DMA channel\n");				\
+		KASSERT((DMA_CHAN_COOKIE == chan->cookie),			\
+			"Bad DMA channel cookie 0x%x\n", chan->cookie);		\
+		KASSERT(atomic_read(&(chan->in_use)), "DMA Channel not in use\n");	\
+	} while(0)
+#else // DEBUG
+#ifndef KASSERT
+#define KASSERT(x, y, ...)		\
+	do {				\
+		if(!x)			\
+			printk(y, ##__VA_ARGS__);\
+		BUG_ON(!x);		\
+	} while(0)
+#endif
+#define CHECK_CHAN(chan)
+
+#endif // DEBUG
+
+struct mic_dma_ctx_t;
+void md_mic_dma_chan_set_dstat_wb(struct mic_dma_device *dma_dev,
+					struct md_mic_dma_chan *chan);
+
+void md_mic_dma_chan_set_dcherr_msk(struct mic_dma_device *dma_dev,
+				struct md_mic_dma_chan *chan, uint32_t mask);
+
+static __always_inline void
+md_mic_dma_chan_write_head(struct mic_dma_device *dma_dev,
+				struct md_mic_dma_chan *chan, uint32_t head)
+{
+	uint32_t chan_num;
+	CHECK_CHAN(chan);
+	chan_num = chan->ch_num;
+	KASSERT((head < chan->num_desc_in_ring),
+		"head 0x%x > num_desc_in_ring 0x%x chan_num %d\n",
+		head, chan->num_desc_in_ring, chan_num);
+	md_mic_dma_write_mmio(dma_dev, chan_num, REG_DHPR, head);
+}
+
+uint32_t md_mic_dma_chan_read_head(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan);
+uint32_t md_mic_dma_chan_read_tail(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan);
+
+#define TAIL_PTR_READ_RETRIES   500000
+#define HW_CMP_CNT_MASK			0x1ffff
+static __always_inline uint32_t
+md_avail_desc_ring_space(struct mic_dma_device *dma_dev, bool is_astep,
+				  struct md_mic_dma_chan *chan, uint32_t head, uint32_t required)
+{
+	uint32_t count = 0, max_num_retries = TAIL_PTR_READ_RETRIES, num_retries = 0;
+	uint32_t tail = chan->cached_tail;
+retry:
+	if (head > tail)
+		count = (tail - 0) + (chan->num_desc_in_ring - head);
+	else if (tail > head)
+		count = tail - head;
+	else
+		return (chan->num_desc_in_ring - 1);
+
+	if (count > required) {
+		return count - 1;
+	} else {
+		if (is_astep)
+			tail = md_mic_dma_chan_read_tail(dma_dev, chan);
+		else
+			tail = HW_CMP_CNT_MASK & md_mic_dma_read_mmio(dma_dev, chan->ch_num, REG_DSTAT);
+	}
+	chan->cached_tail = tail;
+	num_retries++;
+	if (num_retries == max_num_retries)
+		return 0;
+	cpu_relax();
+	goto retry;
+}
+
+bool md_mic_dma_chan_intr_pending(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan);
+phys_addr_t md_mic_dma_chan_get_desc_ring_phys(struct mic_dma_device *dma_dev,
+			struct md_mic_dma_chan *chan);
+phys_addr_t md_mic_dma_chan_get_dstatwb_phys(struct mic_dma_device *dma_dev,
+			struct md_mic_dma_chan *chan);
+inline uint32_t md_mic_dma_read_mmio(struct mic_dma_device *dma_dev, 
+					    int chan, enum md_mic_dma_chan_reg reg);
+
+/* Descriptor programming helpers */
+void md_mic_dma_prep_nop_desc(union md_mic_dma_desc *desc);
+
+/**
+ * md_mic_dma_memcpy_desc - Prepares a memory copy descriptor
+ * @src_phys: Source Physical Address must be cache line aligned
+ * @dst_phys: Destination physical address must be cache line aligned
+ * @size: Size of the transfer should not be 0 and must be a multiple
+ *        of cache line size
+ */
+static __always_inline void
+md_mic_dma_memcpy_desc(union md_mic_dma_desc *desc,
+		uint64_t src_phys,
+		uint64_t dst_phys,
+		uint64_t size)
+{
+	KASSERT((desc != 0), ("NULL desc"));
+	KASSERT((ALIGN(src_phys - (L1_CACHE_BYTES - 1), L1_CACHE_BYTES) == src_phys),
+			"src not cache line aligned 0x%llx\n", (unsigned long long)src_phys);
+	KASSERT((ALIGN(dst_phys - (L1_CACHE_BYTES - 1), L1_CACHE_BYTES) == dst_phys),
+			"dst not cache line aligned 0x%llx\n", (unsigned long long)dst_phys);
+	KASSERT(((size != 0) && (size <= MIC_MAX_DMA_XFER_SIZE) &&
+				(ALIGN(size - (L1_CACHE_BYTES - 1), L1_CACHE_BYTES) == size)),
+			"size > MAX_DMA_XFER_SIZE size 0x%llx", (unsigned long long)size);
+
+	desc->qwords.qw0 = 0;
+	desc->qwords.qw1 = 0;
+	desc->desc.memcopy.type = 1;
+	desc->desc.memcopy.sap = src_phys;
+	desc->desc.memcopy.dap = dst_phys;
+	desc->desc.memcopy.length = (size >> L1_CACHE_SHIFT);
+}
+
+/**
+ * md_mic_dma_prep_status_desc - Prepares a status descriptor
+ * @data - Value to be updated by the DMA engine @ dst_phys
+ * @dst_phys: Destination physical address
+ * @generate_intr: Interrupt must be generated when the DMA HW
+ *                 completes processing this descriptor
+ */
+static __always_inline void
+md_mic_dma_prep_status_desc(union md_mic_dma_desc *desc, uint64_t data,
+				 uint64_t dst_phys, bool generate_intr)
+{
+	KASSERT((desc != 0), ("NULL desc"));
+
+	desc->qwords.qw0 = 0;
+	desc->qwords.qw1 = 0;
+	desc->desc.memcopy.type = 2;
+	desc->desc.status.data = data;
+	desc->desc.status.dap = dst_phys;
+	if (generate_intr)
+		desc->desc.status.intr = 1;
+}
+
+/**
+ * md_mic_dma_prep_gp_desc - Prepares a general purpose descriptor
+ * @data - Value to be updated by the DMA engine @ dst_phys
+ * @dst_phys: Destination physical address
+ */
+static __always_inline void
+md_mic_dma_prep_gp_desc(union md_mic_dma_desc *desc, uint32_t data, uint64_t dst_phys)
+{
+	KASSERT((desc != 0), ("NULL desc"));
+
+	desc->qwords.qw0 = 0;
+	desc->qwords.qw1 = 0;
+	desc->desc.general.type = 3;
+	desc->desc.general.data = data;
+	desc->desc.general.dap = dst_phys;
+}
+/* Debug functions */
+void md_mic_dma_print_debug(struct mic_dma_device *dma_dev, struct md_mic_dma_chan *chan);
+#endif
diff --git a/include/mic/mic_macaddr.h b/include/mic/mic_macaddr.h
new file mode 100644
index 0000000..520d735
--- /dev/null
+++ b/include/mic/mic_macaddr.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef __MIC_MACADDR_H__
+#define __MIC_MACADDR_H__
+
+#define MAC_RUN_SHIFT	1
+#define MAC_DATE_SHIFT	16
+
+/**
+ * mic_get_mac_from_serial	- Create MAC address from serial number string
+ *      \param serial		string containing serial number
+ *      \param mac		data space to place MAC address
+ *      \param host		if true set least significant bit for hosts MAC
+ *
+ * mic_get_mac_from_serial() creates a MAC address from a MIC host's serial number.
+ *
+ * A MAC address contains 6 bytes of which the first 3 are either assigned by IEEE
+ * or bit 2 of the first byte is set to indicate locally created.  While awaiting
+ * our assigned values, the first the bytes have been set to 'MIC' with the local
+ * bit also being set and multicast not.  The result is actually seeing "NIC".
+ *
+ * The last 3 bytes, or 24 bits are set in the pattern:
+ *   o 8 bits are created by subtracting 1 from the cards year character mulitplied
+ *     by the work week field.  By subtracting 1 the year starts at 2012 and there
+ *     is enough room to accout for MIC cards build through 2017
+ *   o 15 bits are the work week running number from the serail number.  This allows
+ *     space for 32k of boards to be build in any one week.
+ *   o 1 bit is used to indicated whether it is the host or card end of the virtual
+ *     network connection.  The bit being set is the card MAC address.
+ *
+ * Upon successful completion, mic_get_mac_from_serial returns zero.  If the serial
+ * number does not have "KC" (for Knights Corner) as the 3rd and 4th characters
+ * then the serial number is invalid and a non zero value is returned.
+ */
+
+static int
+mic_get_mac_from_serial(char *serial, unsigned char *mac, int host)
+{
+	unsigned long final;
+	int y;
+	int ww;
+
+	if ((serial == NULL) || (serial[2] != 'K') || (serial[3] != 'C'))
+		return 1;
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,39)
+	y = kstrtoul(&serial[7], 10, &final);	// y is to shutup Suse build
+#else
+	final = simple_strtoul(&serial[7], NULL, 10);
+#endif
+
+	final = final << MAC_RUN_SHIFT;	/* Card side will add one */
+
+	y = (serial[4] - '1');		/* start year 2012 end year 2016 */
+	ww = ((serial[5] - '0') * 10) + (serial[6] - '0');
+
+	final += (y * ww) << MAC_DATE_SHIFT;
+
+	if (host)			/* least bit indicates host MAC */
+		final++;
+
+	mac[0] = 0x4c;
+	mac[1] = 0x79;
+	mac[2] = 0xba;
+	mac[3] = (final >> 16) & 0xff;
+	mac[4] = (final >> 8) & 0xff;
+	mac[5] = final & 0xff;
+	return 0;
+}
+
+#endif /* __MIC_MACADDR_H__ */
diff --git a/include/mic/mic_pm.h b/include/mic/mic_pm.h
new file mode 100644
index 0000000..12b492c
--- /dev/null
+++ b/include/mic/mic_pm.h
@@ -0,0 +1,442 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* common power management specific header defines for host and card */
+
+#include "io_interface.h"
+
+#if !defined(__MIC_PM_H)
+#define __MIC_PM_H
+
+#define PC6_TIMER 10
+
+#define IOCTL_PM_SendIoctl _IOC(_IOC_READ|_IOC_WRITE, 'l', 2, 0)
+
+#define MAX_HW_IDLE_WAIT_COUNT 100
+#define PC3_EXIT_WAIT_COUNT 1000
+#define PM_SEND_MODE SCIF_SEND_BLOCK
+#define PM_RECV_MODE SCIF_RECV_BLOCK
+#define SET_VID_RETRY_COUNT 3
+
+#define PM_NODE_MAGIC_BIT 31
+#define PM_NODE_IDLE (1 << PM_NODE_MAGIC_BIT)
+
+#define PM_PRINT(fmt, ...) printk("[ %s : %d ]:"fmt, \
+		__func__, __LINE__, ##__VA_ARGS__)
+
+#define PM_DEBUG(fmt, ...) pr_debug("[ %s : %d ]:"fmt, \
+		__func__, __LINE__, ##__VA_ARGS__)
+
+#define PM_ENTRY PM_DEBUG("==> %s\n", __func__)
+#define PM_EXIT PM_DEBUG("<== %s\n", __func__)
+#define PM_MAJOR_VERSION 1
+#define PM_MINOR_VERSION 0
+
+
+typedef enum _PM_MESSAGE {
+	PM_MESSAGE_PC3READY,
+	PM_MESSAGE_OPEN,
+	PM_MESSAGE_OPEN_ACK,
+	PM_MESSAGE_CLOSE,
+	PM_MESSAGE_CLOSE_ACK,
+	PM_MESSAGE_TEST,
+	PM_MESSAGE_MAX,
+} PM_MESSAGE;
+
+typedef enum _PM_IDLE_STATE {
+	PM_IDLE_STATE_PC0,
+	PM_IDLE_STATE_PC3_READY,
+	PM_IDLE_STATE_PC3,
+	PM_IDLE_STATE_PC6,
+	PM_IDLE_STATE_LOST,
+	PM_IDLE_STATE_MAX,
+} PM_IDLE_STATE;
+
+#ifndef _MIC_SCIF_
+typedef enum {
+	IOCTL_pm_send,
+	IOCTL_pm_recv,
+	IOCTL_pm_send_check,
+	IOCTL_pm_get_idle_state,
+	IOCTL_pm_exit_idle_state,
+	// For emulator testing
+	IOCTL_pmemu_pc3_entry,
+	IOCTL_pmemu_pc3_exit,
+	IOCTL_pmemu_pc6_entry,
+	IOCTL_pmemu_pc6_exit,
+	IOCTL_pmemu_dpc3_entry,
+	IOCTL_pmemu_dpc3_exit,
+	IOCTL_get_dependency_graph,
+    IOCTL_get_dependency_set,
+	IOCTL_pm_toggle_connection,
+	IOCTL_pm_idlestate_exit,
+	IOCTL_pm_enable_dpc3_testing,
+	IOCTL_pm_device_restart,
+} PM_IOCTL_TYPE;
+
+struct pm_ioctl_header {
+	uint32_t node;
+	PM_IOCTL_TYPE opcode;
+	uint64_t arglen;
+};
+#define PM_TEST_MSG_BODY "PM Test Message"
+#endif
+
+//Generic PM Header. Has message type and length of message.
+typedef struct _pm_msg_header {
+	PM_MESSAGE opcode;
+	uint32_t len;
+} pm_msg_header;
+
+typedef struct _pm_msg_unit_test
+{
+	pm_msg_header header;
+	void * buf;
+} pm_msg_unit_test;
+
+typedef struct _pm_version
+{
+	uint16_t major_version;
+	uint16_t minor_version;
+
+} pm_version;
+
+typedef struct _pm_msg_pm_options
+{
+	uint8_t pc3_enabled;
+	uint8_t pc6_enabled;
+	pm_version version;
+} pm_msg_pm_options;
+
+#ifndef _MIC_SCIF_
+// PM IOCTLs
+struct pm_scif_send {
+	struct pm_ioctl_header header;
+	uint32_t length;
+	void *buf;
+};
+
+struct pm_scif_recv {
+	struct pm_ioctl_header header;
+	uint32_t length;
+	void *buf;
+};
+
+struct pm_scif_send_check {
+	struct pm_ioctl_header header;
+	uint32_t length;
+	void *buf;
+};
+
+typedef struct pm_get_idle_state {
+	struct pm_ioctl_header header;
+	PM_IDLE_STATE *idle_state;
+} pm_get_idle_state_t;
+
+typedef struct pm_exit_idle_state {
+	struct pm_ioctl_header header;
+	PM_IDLE_STATE idle_state;
+}pm_exit_idlestate_t;
+
+typedef struct dependency_graph {
+	struct pm_ioctl_header header;
+	uint32_t** depmtrx;
+} dependency_graph_t;
+
+struct io_dependency_set {
+    struct pm_ioctl_header header;
+    int is_active_set;
+    uint64_t dep_set;
+};
+
+struct io_enable_dpc3_test {
+	struct pm_ioctl_header header;
+	uint32_t enable_test;
+	uint32_t state;
+};
+
+typedef struct _pm_status {
+	uint32_t hoststate_reg;
+	uint32_t cardstate_reg;
+	uint32_t c3waketimer_reg;
+	uint32_t pcucontrol_reg;
+	uint32_t uos_pcucontrol_reg;
+	uint32_t corevolt_reg;
+	uint32_t gpmctrl_reg;
+	uint32_t idle_state;
+	uint32_t board_id;
+} pm_status_t;
+
+typedef struct _test_msg_ctrl {
+	uint32_t action;
+} test_msg_ctrl_t;
+
+typedef struct _connection_info {
+	int32_t conn_state;
+	int32_t local_port;
+	int32_t local_node;
+	int32_t remote_port;
+	int32_t remote_node;
+	int32_t num_messages_queued;
+} connection_info_t;
+
+#endif //_MIC_SCIF_
+
+#if defined(CONFIG_MK1OM)
+
+#define SBOX_SVID_CONTROL 0x00004110
+#define SBOX_PCU_CONTROL 0x00004114
+#define SBOX_HOST_PMSTATE 0x00004118
+#define SBOX_UOS_PMSTATE 0x0000411c
+#define SBOX_C3WAKEUP_TIMER 0x00004120
+#define GBOX_PM_CTRL 0x0000413C
+#define SBOX_UOS_PCUCONTROL 0x0000412C
+
+#elif defined(CONFIG_ML1OM) || defined(WINDOWS)
+
+#define DBOX_SWFOX1 0x00002414
+#define DBOX_SWFOX2 0x00002418
+#define DBOX_SWFOX3 0x0000241C
+#define DBOX_SWFOX4 0x00002420
+#define DBOX_SWFOX5 0x00002424
+#define DBOX_SWFOX6 0x00002428
+#define DBOX_SWFOX7 0x0000242C
+#define DBOX_SWF0X8 0x00002430
+
+#define SBOX_SVID_CONTROL DBOX_SWFOX1
+#define SBOX_PCU_CONTROL DBOX_SWFOX2
+#define SBOX_HOST_PMSTATE DBOX_SWFOX3
+#define SBOX_UOS_PMSTATE DBOX_SWFOX4
+#define SBOX_C3WAKEUP_TIMER DBOX_SWFOX5
+#define GBOX_PM_CTRL DBOX_SWFOX6
+#define SBOX_UOS_PCUCONTROL DBOX_SWFOX7
+
+#else
+#error Neither CONFIG_ML1OM nor CONFIG_MK1OM defined
+#endif
+
+#define SBOX_SVIDCTRL_SVID_DOUT(x)                        ((x) & 0x1ff)
+#define SBOX_SVIDCTRL_SVID_DOUT_BITS(x)                   ((x) & 0x1ff)
+#define SBOX_SVIDCTRL_SVID_CMD(x)                         (((x) >> 9) & 0x1ff)
+#define SBOX_SVIDCTRL_SVID_CMD_BITS(x)                    (((x) & 0x1ff) << 9)
+#define SBOX_SVIDCTRL_SVID_DIN(x)                         (((x) >> 18) & 0x3ff)
+#define SBOX_SVIDCTRL_SVID_ERROR(x)                       (((x) >> 29) & 0x1)
+#define SBOX_SVIDCTRL_SVID_IDLE(x)                        (((x) >> 30) & 0x1)
+#define SBOX_SVIDCTRL_CMD_START(x)                        (((x) >> 31) & 0x1)
+#define SBOX_SVIDCTRL_CMD_START_BITS(x)                   (((x) & 0x1) << 31)
+// This is not a register field, but we need to check these bits to determine parity error
+#define SBOX_SVIDCTRL_ACK1ACK0(x)                         (((x) >> 27) & 0x11)
+
+#define SBOX_PCUCTRL_ENABLE_MCLK_SHUTDWN(x)               ((x) & 0x1)
+#define SBOX_PCUCTRL_ENABLE_MCLK_SHUTDWN_BITS(x)          ((x) & 0x1)
+#define SBOX_PCUCTRL_RING_ACTIVE(x)                       (((x) >> 2) & 0x1)
+#define SBOX_PCUCTRL_RING_ACTIVE_BITS(x)                  (((x) & 0x1) << 2)
+#define SBOX_PCUCTRL_PREVENT_AUTOC3_EXIT(x)               (((x) >> 3) & 0x1)
+#define SBOX_PCUCTRL_PREVENT_AUTOC3_EXIT_BITS(x)          (((x) & 0x1) << 3)
+#define SBOX_PCUCTRL_PWRGOOD_MASK(x)                      (((x) >> 17) & 0x1)
+#define SBOX_PCUCTRL_PWRGOOD_MASK_BITS(x)                 (((x) & 0x1) << 17)
+#define SBOX_PCUCTRL_MCLK_PLL_LCK(x)			 		  (((x) >> 16) & 0x1)
+#define SBOX_THERMAL_STS_ALERT_LOG(x)					  (((x) >> 3) & 0x1)
+#define SBOX_THERMAL_STS_ALERT_LOG_BITS(x)				  (((x) & 0x1) << 3)
+
+// used by host to communicate card idle state to uos
+#define SBOX_HPMSTATE_STATUS(x)                           ((x) & 0xff)
+#define SBOX_HPMSTATE_STATUS_BITS(x)                      ((x) & 0xff)
+#define SBOX_HPMSTATE_MINVID(x)                           (((x) >> 8) & 0xff)
+#define SBOX_HPMSTATE_TDPVID(x)                           (((x) >> 16) & 0xff)
+// used by uos to communicate card idle state to host
+#define SBOX_UPMSTATE_STATUS(x)                           ((x) & 0xff)
+#define SBOX_UPMSTATE_STATUS_BITS(x)			  ((x) & 0xff)
+
+#define SBOX_C3WAKEUP_TIME(x)                             ((x) & 0xffff)
+#define SBOX_C3WAKEUP_TIME_BITS(x)                        ((x) & 0xffff)
+
+#define IN_PCKGC6_BITS(x)								  (((x) & 0x1) << 1)
+#define KNC_SVID_ADDR	0
+#define	KNC_SETVID_FAST	1
+#define	KNC_SETVID_SLOW	2
+#define KNC_SETVID_ATTEMPTS 50
+
+
+typedef union _sbox_pcu_ctrl {
+	uint32_t value;
+	struct {
+		uint32_t enable_mclk_pl_shutdown	:1;
+		uint32_t mclk_enabled				:1;
+		uint32_t ring_active				:1;
+		uint32_t prevent_auto_c3_exit		:1;
+		uint32_t ghost_active				:1;
+		uint32_t tcu_active					:1;
+		uint32_t itp_scllk_gate_disable		:1;
+		uint32_t itp_pkg_c3_disable			:1;
+		uint32_t scratch					:1;
+		uint32_t unallocated_1				:1;
+		uint32_t sysint_active				:1;
+		uint32_t sclk_grid_off_disable		:1;
+		uint32_t icc_dvo_ssc_cg_enable		:1;
+		uint32_t icc_core_ref_clk_cg_enable :1;
+		uint32_t icc_gddr_ssc_cg_enable		:1;
+		uint32_t icc_pll_disable			:1;
+		uint32_t mclk_pll_lock				:1;
+		uint32_t grpB_pwrgood_mask			:1;
+		uint32_t unallocated_2				:14;
+	} bits;
+
+} sbox_pcu_ctrl_t;
+
+typedef union _sbox_host_pm_state {
+	uint32_t value;
+	struct {
+		uint32_t host_pm_state			:7;
+		uint32_t abort_not_processed		:1;
+		uint32_t min_vid			:8;
+		uint32_t tdp_vid			:8;
+		uint32_t unallocated			:8;
+	} bits;
+
+} sbox_host_pm_state_t;
+
+typedef union _sbox_uos_pm_state {
+	uint32_t value;
+	struct {
+		uint32_t uos_pm_state				:8;
+		uint32_t unallocated				:24;
+	}bits;
+
+} sbox_uos_pm_state_t;
+
+typedef union _c3_wakeup_timer {
+	uint32_t value;
+	struct {
+		uint32_t c3_wake_time				:16;
+		uint32_t unallocated_1				:1;
+		uint32_t c3_wake_timeout			:1;
+		uint32_t unallocated_2				:14;
+	} bits;
+
+} c3_wakeup_timer_t;
+
+typedef union _sbox_svid_control {
+	uint32_t value;
+	struct {
+		uint32_t svid_dout					:9;
+		uint32_t svid_cmd					:9;
+		uint32_t svid_din					:11;
+		uint32_t svid_error					:1;
+		uint32_t svid_idle					:1;
+		uint32_t cmd_start					:1;
+	} bits;
+
+} sbox_svid_control;
+
+typedef union _gbox_pm_control {
+	uint32_t value;
+	struct {
+		uint32_t c6_disable					:1;
+		uint32_t in_pckgc6					:1;
+		uint32_t gbox_inM3					:2;
+		uint32_t unallocated				:28;
+	} bits;
+
+} gbox_pm_control;
+
+typedef union _sbox_thermal_sts_interrupt {
+	uint32_t value;
+	struct {
+		uint32_t mclk_ratio_status			:1;
+		uint32_t mclk_ratio_log				:1;
+		uint32_t alert_status				:1;
+		uint32_t alert_log					:1;
+		uint32_t gpu_hot_status				:1;
+		uint32_t gpu_hot_log				:1;
+		uint32_t pwr_alert_status			:1;
+		uint32_t pwr_alert_log				:1;
+		uint32_t pmu_status					:1;
+		uint32_t pmu_log					:1;
+		uint32_t etc_freeze					:1;
+		uint32_t unallocated				:21;
+	}bits;
+
+} sbox_thermal_sts_interrupt;
+
+typedef union _sboxUosPcucontrolReg
+{
+	uint32_t value;
+	struct
+	{
+		uint32_t c3_wakeuptimer_enable		:1;
+		uint32_t enable_mclk_pll_shutdown	:1;
+		uint32_t spi_clk_disable			:1;
+		uint32_t unallocated				:29;
+	} bits;
+
+} sbox_uos_pcu_ctrl_t;
+
+typedef union _sboxCorefreqReg
+{
+	uint32_t value;
+	struct
+	{
+		uint32_t ratio					   :12; // bit 0-11 Ratio
+		uint32_t rsvd0					   : 3; // bit 12-14
+		uint32_t fuseratio				   : 1; // bit 15 If overclocking is enabled, setting this bit will default the goal ratio to the fuse value.
+		uint32_t asyncmode				   : 1; // bit 16 Async Mode Bit 16, Reserved Bits 20:17 used to be ExtClkFreq,
+		uint32_t rsvd1					   : 9; // bit 17-25
+		uint32_t ratiostep				   : 4; // bit 26-29 Power throttle ratio-step
+		uint32_t jumpratio				   : 1; // bit 30 Power throttle jump at once
+		uint32_t booted					   : 1; // bit 31 Booted: This bit selects between the default MCLK Ratio (600MHz) and the programmable MCLK ratio. 0=default 1=programmable.
+	} bits;
+
+} sbox_core_freq_t;
+
+typedef union _sboxCoreVoltReg
+{
+	uint32_t value;
+	struct
+	{
+		uint32_t vid        				:8;
+		uint32_t unallocated				:24;
+	} bits;
+
+} sbox_core_volt_t;
+
+typedef enum _PM_CONNECTION_STATE {
+	PM_CONNECTING,
+	PM_CONNECTED,
+	PM_DISCONNECTING,
+	PM_DISCONNECTED
+} PM_CONNECTION_STATE;
+
+#endif //__MIC_PM_H
diff --git a/include/mic/mic_sbox_md.h b/include/mic/mic_sbox_md.h
new file mode 100644
index 0000000..4ad8cf9
--- /dev/null
+++ b/include/mic/mic_sbox_md.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MIC_SBOX_MD_H
+#define MIC_SBOX_MD_H
+/*
+ * TODO: SBOX MCA Handling
+ */
+#ifdef _MIC_SCIF_
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+#endif // _MIC_SCIF_
+
+#ifdef _MIC_SCIF_
+void *mic_sbox_md_init(void);
+void mic_sbox_md_uninit(void *mic_sbox_mmio_va);
+#endif
+
+static inline uint32_t mic_sbox_read_mmio(void *mic_sbox_mmio_va, uint32_t offset)
+{
+	return readl((uint8_t *)mic_sbox_mmio_va + offset);
+}
+
+static inline void mic_sbox_write_mmio(void *mic_sbox_mmio_va, uint32_t offset, uint32_t value)
+{
+	writel(value, (uint8_t *)mic_sbox_mmio_va + offset);
+}
+#endif
diff --git a/include/mic/mic_virtio.h b/include/mic/mic_virtio.h
new file mode 100644
index 0000000..4222e7d
--- /dev/null
+++ b/include/mic/mic_virtio.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+  Structures which are passed from host to MIC card through
+  uOS kernel command line option, virtio_addr.
+
+  (C) Copyright 2012 Intel Corporation
+  Author: Caz Yokoyama <Caz.Yokoyama@intel.com>
+ */
+#ifndef MIC_VIRTIO_H
+#define MIC_VIRTIO_H
+
+struct vb_shared {
+	uint32_t host_features;
+	uint32_t client_features;
+	bool update;
+	struct vring vring;
+	struct virtio_blk_config blk_config;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0))
+	uint32_t unused;
+#endif
+} __attribute__((aligned(8)));
+
+struct mic_virtblk {
+#ifdef HOST
+	struct vb_shared vb_shared;
+	void *vblk;  /* keep vblk in vhost for virtblk */
+#else
+	struct vb_shared *vb_shared;
+	void *vdev;  /* keep vdev in virtio for virtblk */
+#endif
+};
+
+uint64_t mic_vhost_pm_disconnect_node(uint64_t node_bitmask, enum disconn_type type);
+void mic_vhost_blk_stop(bd_info_t *bd_info);
+
+#endif // MIC_VIRTIO_H
diff --git a/include/mic/micbaseaddressdefine.h b/include/mic/micbaseaddressdefine.h
new file mode 100644
index 0000000..15e3991
--- /dev/null
+++ b/include/mic/micbaseaddressdefine.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* "Raw" register offsets & bit specifications for MIC */
+#ifndef _MIC_MICBASEDEFINE_REGISTERS_H_
+#define _MIC_MICBASEDEFINE_REGISTERS_H_
+
+#define COMMON_MMIO_BOX_SIZE		(1<<16)
+
+/* CBOX register base defines */
+#define CBOX_BASE		0x0000000000ULL
+
+/* TXS register base defines */
+#define TXS0_BASE		0x0800780000ULL
+#define TXS1_BASE		0x0800770000ULL
+#define TXS2_BASE		0x0800760000ULL
+#define TXS3_BASE		0x0800750000ULL
+#define TXS4_BASE		0x0800740000ULL
+#define TXS5_BASE		0x0800730000ULL
+#define TXS6_BASE		0x0800720000ULL
+#define TXS7_BASE		0x0800710000ULL
+#define TXS8_BASE		0x08006E0000ULL
+
+/* GBOX register base defines */
+#define GBOX0_BASE		0x08007A0000ULL
+#define GBOX1_BASE		0x0800790000ULL
+#define GBOX2_BASE		0x0800700000ULL
+#define GBOX3_BASE		0x08006F0000ULL
+
+#define GBOX_CHANNEL0_BASE	0x00000000
+#define GBOX_CHANNEL1_BASE	0x00000800
+#define GBOX_CHANNEL2_BASE	0x00001000
+
+/* VBOX register base defines */
+#define VBOX_BASE		0x08007B0000ULL
+
+/* DBOX register base defines */
+#define DBOX_BASE		0x08007C0000ULL
+
+/* SBOX register base defines */
+#define SBOX_BASE		0x08007D0000ULL
+
+#define MIC_GTT_BASE		0x0800800000ULL
+#define MIC_GTT_TOP		0x080083FFFFULL
+#define MIC_GTT_SIZE		(MIC_GTT_TOP - MIC_GTT_BASE + 1)
+
+/*	Aperture defines */
+#define MIC_APERTURE_BASE		0x0900000000ULL
+#define MIC_APERTURE_TOP		0x090FFFFFFFULL
+#define MIC_APERTURE_SIZE		(MIC_APERTURE_TOP - MIC_APERTURE_BASE + 1)
+
+/*	SPI flash defines */
+#define MIC_SPI_BOOTLOADER_BASE		0x0FFFFF0000ULL
+#define MIC_SPI_BOOTLOADER_TOP		0x0FFFFFFFFFULL
+#define MIC_SPI_BOOTLOADER_SIZE		(MIC_SPI_BOOTLOADER_TOP - MIC_SPI_BOOTLOADER_BASE + 1)
+#define MIC_SPI_2ND_STAGE_BASE		0x0FFFFE0000ULL
+#define MIC_SPI_2ND_STAGE_TOP		0x0FFFFEFFFFULL
+#define MIC_SPI_2ND_STAGE_SIZE		(MIC_SPI_2ND_STAGE_TOP - MIC_SPI_2ND_STAGE_BASE + 1)
+#define MIC_SPI_PARAMETER_BASE		0x0FFFFDC000ULL
+#define MIC_SPI_PARAMETER_TOP		0x0FFFFDFFFFULL
+#define MIC_SPI_PARAMETER_SIZE		(MIC_SPI_PARAMETER_TOP - MIC_SPI_PARAMETER_BASE + 1)
+
+/*	remote defines */
+#define MIC_REMOTE_BASE		0x1000000000ULL
+#define MIC_REMOTE_TOP		0x7FFFFFFFFFULL
+#define MIC_REMOTE_SIZE		(MIC_REMOTE_TOP - MIC_REMOTE_BASE + 1)
+
+/*	system defines */
+#define MIC_SYSTEM_BASE		0x8000000000ULL
+#define MIC_SYSTEM_TOP		0xFFFFFFFFFFULL
+#define MIC_SYSTEM_PAGE_SIZE	0x0400000000ULL
+#define MIC_SYSTEM_SIZE		(MIC_SYSTEM_TOP - MIC_SYSTEM_BASE + 1)
+
+#define MIC_PHYSICAL_ADDRESS_BITS	 40
+#define MIC_PHYSICAL_ADDRESS_SPACE_SIZE	 ( 1ULL << MIC_PHYSICAL_ADDRESS_BITS )
+
+#define MIC_HOST_MMIO_BASE		 DBOX_BASE
+
+#endif
diff --git a/include/mic/micdboxdefine.h b/include/mic/micdboxdefine.h
new file mode 100644
index 0000000..cba2c7a
--- /dev/null
+++ b/include/mic/micdboxdefine.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* "Raw" register offsets & bit specifications for Intel MIC (KNF) */
+#ifndef _MIC_DBOXDEFINE_REGISTERS_H_
+#define _MIC_DBOXDEFINE_REGISTERS_H_
+
+#define DBOX_SWF0X0						0x00002410
+
+
+#define DBOX_SWF1X0						0x00003410
+#define DBOX_SWF1X1						0x00003414
+#define DBOX_SWF1X2						0x00003418
+#define DBOX_SWF1X3						0x0000341C
+
+#endif
diff --git a/include/mic/micpsmi.h b/include/mic/micpsmi.h
new file mode 100644
index 0000000..f9c3b90
--- /dev/null
+++ b/include/mic/micpsmi.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef _MIC_PSMI_H
+#define _MIC_PSMI_H
+
+struct mic_psmi_pte {
+	uint64_t pa;
+};
+
+struct mic_psmi_ctx
+{
+	unsigned char 		enabled;
+
+	struct mic_psmi_pte	*dma_tbl;
+	int 			dma_tbl_size;
+	dma_addr_t 	dma_tbl_hndl;
+	uint64_t		dma_mem_size;
+	int 			nr_dma_pages;
+
+	struct mic_psmi_pte 	*va_tbl;
+};
+
+#define MIC_PSMI_PAGE_ORDER (7)
+#define MIC_PSMI_PAGE_SIZE (PAGE_SIZE << MIC_PSMI_PAGE_ORDER)
+#define MIC_PSMI_SIGNATURE 0x4B434F52494D5350L
+
+int mic_psmi_open(struct file *filp);
+
+#endif /* _MIC_PSMI_H */
diff --git a/include/mic/micsboxdefine.h b/include/mic/micsboxdefine.h
new file mode 100644
index 0000000..36b1b30
--- /dev/null
+++ b/include/mic/micsboxdefine.h
@@ -0,0 +1,255 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* "Raw" register offsets & bit specifications for Intel MIC (KNF) */
+#ifndef _MIC_SBOXDEFINE_REGISTERS_H_
+#define _MIC_SBOXDEFINE_REGISTERS_H_
+
+
+#define SBOX_OC_I2C_ICR						0x00001000
+#define SBOX_THERMAL_STATUS					0x00001018
+#define SBOX_THERMAL_INTERRUPT_ENABLE				0x0000101C
+#define SBOX_STATUS_FAN1					0x00001024
+#define SBOX_STATUS_FAN2					0x00001028
+#define SBOX_SPEED_OVERRIDE_FAN					0x0000102C
+#define SBOX_BOARD_TEMP1					0x00001030
+#define SBOX_BOARD_TEMP2					0x00001034
+#define SBOX_BOARD_VOLTAGE_SENSE				0x00001038
+#define SBOX_CURRENT_DIE_TEMP0					0x0000103C
+#define SBOX_CURRENT_DIE_TEMP1					0x00001040
+#define SBOX_CURRENT_DIE_TEMP2					0x00001044
+#define SBOX_MAX_DIE_TEMP0					0x00001048
+#define SBOX_MAX_DIE_TEMP1					0x0000104C
+#define SBOX_MAX_DIE_TEMP2					0x00001050
+#define SBOX_ELAPSED_TIME_LOW                                   0x00001074
+#define SBOX_ELAPSED_TIME_HIGH                                  0x00001078
+#define SBOX_FAIL_SAFE_OFFSET					0x00002004
+#define SBOX_CURRENT_CLK_RATIO					0x00003004
+#define SBOX_SMPT00						0x00003100
+#define SBOX_SMPT02						0x00003108
+#define SBOX_RGCR						0x00004010
+#define SBOX_DSTAT						0x00004014
+#define SBOX_PCIE_PCI_REVISION_ID_AND_C_0X8			0x00005808
+#define SBOX_PCIE_BAR_ENABLE					0x00005CD4
+#define SBOX_SICR0						0x00009004
+#define SBOX_SICE0						0x0000900C
+#define SBOX_SICC0						0x00009010
+#define SBOX_SICR1						0x0000901C
+#define SBOX_SICC1						0x00009028
+#ifdef CONFIG_MK1OM
+#define SBOX_PMU_PERIOD_SEL             0x00001070
+#define SBOX_THERMAL_STATUS_INTERRUPT   0x0000107C
+#define SBOX_THERMAL_STATUS_2           0x00001080
+#define SBOX_THERMAL_TEST_2             0x00001084
+#define SBOX_COREFREQ					0x00004100
+#define SBOX_COREVOLT					0x00004104
+#define SBOX_MEMORYFREQ					0x00004108
+#define SBOX_MEMVOLT					0x0000410C
+//add defines used by drivers that are the same as DOORBELL_INTX
+#define SBOX_SDBIC0                     0x0000CC90
+#define SBOX_SDBIC1                     0x0000CC94
+#define SBOX_SDBIC2                     0x0000CC98
+#define SBOX_SDBIC3                     0x0000CC9C
+#else
+#define SBOX_SDBIC0						0x00009030
+#define SBOX_SDBIC1						0x00009034
+#define SBOX_SDBIC2						0x00009038
+#define SBOX_SDBIC3						0x0000903C
+#define SBOX_COREFREQ					0x00004040
+#define SBOX_COREVOLT					0x00004044
+#define SBOX_MEMORYFREQ					0x00004048
+#define SBOX_MEMVOLT					0x0000404C
+#define SBOX_RSC0                       0x0000CC10
+#define SBOX_RSC1                       0x0000CC14
+
+#endif
+#define SBOX_MXAR0						0x00009040
+#define	SBOX_MXAR0_K1OM						0x00009044
+#define SBOX_MXAR1						0x00009044
+#define SBOX_MXAR2						0x00009048
+#define SBOX_MXAR3						0x0000904C
+#define SBOX_MXAR4						0x00009050
+#define SBOX_MXAR5						0x00009054
+#define SBOX_MXAR6						0x00009058
+#define SBOX_MXAR7						0x0000905C
+#define SBOX_MXAR8						0x00009060
+#define SBOX_MXAR9						0x00009064
+#define SBOX_MXAR10						0x00009068
+#define SBOX_MXAR11						0x0000906C
+#define SBOX_MXAR12						0x00009070
+#define SBOX_MXAR13						0x00009074
+#define SBOX_MXAR14						0x00009078
+#define SBOX_MXAR15						0x0000907C
+#define SBOX_MSIXPBACR						0x00009080
+#define	SBOX_MSIXPBACR_K1OM					0x00009084
+#define SBOX_DCAR_0						0x0000A000
+#define SBOX_DHPR_0						0x0000A004
+#define SBOX_DTPR_0						0x0000A008
+#define SBOX_DAUX_LO_0						0x0000A00C
+#define SBOX_DAUX_HI_0						0x0000A010
+#define SBOX_DRAR_LO_0						0x0000A014
+#define SBOX_DRAR_HI_0						0x0000A018
+#define SBOX_DITR_0						0x0000A01C
+#define SBOX_DSTAT_0						0x0000A020
+#define	SBOX_DSTATWB_LO_0					0x0000A024
+#define	SBOX_DSTATWB_HI_0					0x0000A028
+#define	SBOX_DCHERR_0						0x0000A02C
+#define	SBOX_DCHERRMSK_0					0x0000A030
+#define SBOX_DCAR_1						0x0000A040
+#define SBOX_DHPR_1						0x0000A044
+#define SBOX_DTPR_1						0x0000A048
+#define SBOX_DAUX_LO_1						0x0000A04C
+#define SBOX_DAUX_HI_1						0x0000A050
+#define SBOX_DRAR_LO_1						0x0000A054
+#define SBOX_DRAR_HI_1						0x0000A058
+#define SBOX_DITR_1						0x0000A05C
+#define SBOX_DSTAT_1						0x0000A060
+#define	SBOX_DSTATWB_LO_1					0x0000A064
+#define	SBOX_DSTATWB_HI_1					0x0000A068
+#define	SBOX_DCHERR_1						0x0000A06C
+#define	SBOX_DCHERRMSK_1					0x0000A070
+#define SBOX_DCAR_2						0x0000A080
+#define SBOX_DHPR_2						0x0000A084
+#define SBOX_DTPR_2						0x0000A088
+#define SBOX_DAUX_LO_2						0x0000A08C
+#define SBOX_DAUX_HI_2						0x0000A090
+#define SBOX_DRAR_LO_2						0x0000A094
+#define SBOX_DRAR_HI_2						0x0000A098
+#define SBOX_DITR_2						0x0000A09C
+#define SBOX_DSTAT_2						0x0000A0A0
+#define	SBOX_DSTATWB_LO_2					0x0000A0A4
+#define	SBOX_DSTATWB_HI_2					0x0000A0A8
+#define	SBOX_DCHERR_2						0x0000A0AC
+#define	SBOX_DCHERRMSK_2					0x0000A0B0
+#define SBOX_DCAR_3						0x0000A0C0
+#define SBOX_DHPR_3						0x0000A0C4
+#define SBOX_DTPR_3						0x0000A0C8
+#define SBOX_DAUX_LO_3						0x0000A0CC
+#define SBOX_DAUX_HI_3						0x0000A0D0
+#define SBOX_DRAR_LO_3						0x0000A0D4
+#define SBOX_DRAR_HI_3						0x0000A0D8
+#define SBOX_DITR_3						0x0000A0DC
+#define SBOX_DSTAT_3						0x0000A0E0
+#define	SBOX_DSTATWB_LO_3					0x0000A0E4
+#define	SBOX_DSTATWB_HI_3					0x0000A0E8
+#define	SBOX_DCHERR_3						0x0000A0EC
+#define	SBOX_DCHERRMSK_3					0x0000A0F0
+#define SBOX_DCAR_4						0x0000A100
+#define SBOX_DHPR_4						0x0000A104
+#define SBOX_DTPR_4						0x0000A108
+#define SBOX_DAUX_LO_4						0x0000A10C
+#define SBOX_DAUX_HI_4						0x0000A110
+#define SBOX_DRAR_LO_4						0x0000A114
+#define SBOX_DRAR_HI_4						0x0000A118
+#define SBOX_DITR_4						0x0000A11C
+#define SBOX_DSTAT_4						0x0000A120
+#define	SBOX_DSTATWB_LO_4					0x0000A124
+#define	SBOX_DSTATWB_HI_4					0x0000A128
+#define	SBOX_DCHERR_4						0x0000A12C
+#define	SBOX_DCHERRMSK_4					0x0000A130
+#define SBOX_DCAR_5						0x0000A140
+#define SBOX_DHPR_5						0x0000A144
+#define SBOX_DTPR_5						0x0000A148
+#define SBOX_DAUX_LO_5						0x0000A14C
+#define SBOX_DAUX_HI_5						0x0000A150
+#define SBOX_DRAR_LO_5						0x0000A154
+#define SBOX_DRAR_HI_5						0x0000A158
+#define SBOX_DITR_5						0x0000A15C
+#define SBOX_DSTAT_5						0x0000A160
+#define	SBOX_DSTATWB_LO_5					0x0000A164
+#define	SBOX_DSTATWB_HI_5					0x0000A168
+#define	SBOX_DCHERR_5						0x0000A16C
+#define	SBOX_DCHERRMSK_5					0x0000A170
+#define SBOX_DCAR_6						0x0000A180
+#define SBOX_DHPR_6						0x0000A184
+#define SBOX_DTPR_6						0x0000A188
+#define SBOX_DAUX_LO_6						0x0000A18C
+#define SBOX_DAUX_HI_6						0x0000A190
+#define SBOX_DRAR_LO_6						0x0000A194
+#define SBOX_DRAR_HI_6						0x0000A198
+#define SBOX_DITR_6						0x0000A19C
+#define SBOX_DSTAT_6						0x0000A1A0
+#define	SBOX_DSTATWB_LO_6					0x0000A1A4
+#define	SBOX_DSTATWB_HI_6					0x0000A1A8
+#define	SBOX_DCHERR_6						0x0000A1AC
+#define	SBOX_DCHERRMSK_6					0x0000A1B0
+#define SBOX_DCAR_7						0x0000A1C0
+#define SBOX_DHPR_7						0x0000A1C4
+#define SBOX_DTPR_7						0x0000A1C8
+#define SBOX_DAUX_LO_7						0x0000A1CC
+#define SBOX_DAUX_HI_7						0x0000A1D0
+#define SBOX_DRAR_LO_7						0x0000A1D4
+#define SBOX_DRAR_HI_7						0x0000A1D8
+#define SBOX_DITR_7						0x0000A1DC
+#define SBOX_DSTAT_7						0x0000A1E0
+#define	SBOX_DSTATWB_LO_7					0x0000A1E4
+#define	SBOX_DSTATWB_HI_7					0x0000A1E8
+#define	SBOX_DCHERR_7						0x0000A1EC
+#define	SBOX_DCHERRMSK_7					0x0000A1F0
+#define SBOX_DCR						0x0000A280
+#define SBOX_APICICR0						0x0000A9D0
+#define SBOX_APICICR1						0x0000A9D8
+#define SBOX_APICICR2						0x0000A9E0
+#define SBOX_APICICR3						0x0000A9E8
+#define SBOX_APICICR4						0x0000A9F0
+#define SBOX_APICICR5						0x0000A9F8
+#define SBOX_APICICR6						0x0000AA00
+#define SBOX_APICICR7						0x0000AA08
+#define SBOX_SCRATCH0						0x0000AB20
+#define SBOX_SCRATCH1						0x0000AB24
+#define SBOX_SCRATCH2						0x0000AB28
+#define SBOX_SCRATCH3						0x0000AB2C
+#define SBOX_SCRATCH4						0x0000AB30
+#define SBOX_SCRATCH5						0x0000AB34
+#define SBOX_SCRATCH6						0x0000AB38
+#define SBOX_SCRATCH7						0x0000AB3C
+#define SBOX_SCRATCH8						0x0000AB40
+#define SBOX_SCRATCH9						0x0000AB44
+#define SBOX_SCRATCH10						0x0000AB48
+#define SBOX_SCRATCH11						0x0000AB4C
+#define SBOX_SCRATCH12						0x0000AB50
+#define SBOX_SCRATCH13						0x0000AB54
+#define SBOX_SCRATCH14						0x0000AB58
+#define SBOX_SCRATCH15						0x0000AB5C
+#define SBOX_RDMASR0						0x0000B180
+#define	SBOX_SBQ_FLUSH				                0x0000B1A0 // Pseudo-register, not autogen, must add manually
+#define SBOX_TLB_FLUSH						0x0000B1A4
+#define SBOX_GTT_PHY_BASE					0x0000C118
+#define SBOX_EMON_CNT0						0x0000CC28
+#define SBOX_EMON_CNT1						0x0000CC2C
+#define SBOX_EMON_CNT2						0x0000CC30
+#define SBOX_EMON_CNT3						0x0000CC34
+
+#endif
diff --git a/include/mic/micscif.h b/include/mic/micscif.h
new file mode 100644
index 0000000..c0b6223
--- /dev/null
+++ b/include/mic/micscif.h
@@ -0,0 +1,900 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICSCIF_H
+#define MICSCIF_H
+
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+
+#ifdef _MODULE_SCIF_
+#include <linux/mman.h>
+#include <linux/pagemap.h>
+#include <asm/uaccess.h>
+#include <linux/poll.h>
+#include <linux/mmzone.h>
+#include <linux/version.h>
+#endif /* MODULE_SCIF */
+
+#include <linux/notifier.h>
+#include "scif.h"
+#include "mic/micbaseaddressdefine.h"
+#include "mic/micsboxdefine.h"
+
+/* The test runs in a separate thread context from the bottom
+ * half that processes messages from the card and setup p2p
+ * when these run concurrently, p2p messages get lost since they
+ * may be consumed by the test thread
+ */
+//#define ENABLE_TEST	// Used to enable testing at board connect
+#ifdef MIC_IS_EMULATION
+#define TEST_LOOP 2
+#else
+#define TEST_LOOP 2000
+#endif
+
+//#define P2P_HACK 0
+#include "scif.h"
+#include "scif_ioctl.h"
+
+#define SCIF_READY_MAGIC_NUM 0x1eedfee0
+
+#ifndef SCIF_MAJOR
+#define SCIF_MAJOR 0 /* Use dynamic major number by default */
+#endif
+
+#define SCIF_HOST_NODE	0  // By default the host is always node zero
+
+#define SCIF_RMA_TEMP_CACHE_LIMIT 0x20000
+/*
+ * The overhead for proxying a P2P DMA read to convert it to
+ * a DMA write by sending a SCIF Node QP message has been
+ * seen to be higher than programming a P2P DMA Read on self
+ * for transfer sizes less than the PROXY_DMA_THRESHOLD.
+ * The minimum threshold is different for Jaketown versus
+ * Ivytown and tuned for best DMA performance.
+ */
+#define SCIF_PROXY_DMA_THRESHOLD_JKT (32 * 1024ULL)
+#define SCIF_PROXY_DMA_THRESHOLD_IVT (1024 * 1024ULL)
+
+//#define RMA_DEBUG 0
+
+/* Pre-defined L1_CACHE_SHIFT is 6 on RH and 7 on Suse */
+#undef L1_CACHE_SHIFT
+#define L1_CACHE_SHIFT	6
+#undef L1_CACHE_BYTES
+#define L1_CACHE_BYTES	(1 << L1_CACHE_SHIFT)
+
+#define MI_EPLOCK_HELD  (true)
+#define MAX_RDMASR	8
+
+// Device wide SCIF information
+struct micscif_info {
+	uint32_t	 mi_nodeid;	// Node ID this node is to others.
+
+	struct mutex	 mi_conflock;	// Configuration lock (used in p2p setup)
+	uint32_t	 mi_maxid;	// Max known board ID
+	uint32_t	 mi_total;	// Total number of running interfaces
+	uint32_t	 mi_nr_zombies; // Keep track of the number of zombie EP.
+	unsigned long	 mi_mask;	// bit mask of online scif interfaces
+	uint64_t	 mi_nr_ioremap; // Keep track of number of ioremap() calls on the host
+					// to decide when to purge aliases for performance.
+	spinlock_t	 mi_eplock;
+	spinlock_t	 mi_connlock;
+	spinlock_t	 mi_rmalock;    // Synchronize access to list of temporary registered
+					// windows to be destroyed.
+	struct mutex	 mi_fencelock;  // Synchronize access to list of remote fences requested.
+	struct mutex	 mi_event_cblock;
+	spinlock_t	 mi_nb_connect_lock;
+
+	struct list_head mi_uaccept;	// List of user acceptreq waiting for acceptreg
+	struct list_head mi_listen;	// List of listening end points
+	struct list_head mi_zombie;	// List of zombie end points with pending RMA's.
+	struct list_head mi_connected;	// List of end points in connected state
+	struct list_head mi_disconnected;	// List of end points in disconnected state
+	struct list_head mi_rma;	// List of temporary registered windows to be destroyed.
+	struct list_head mi_rma_tc;	// List of temporary
+					// registered & cached windows
+					// to be destroyed.
+	struct list_head mi_fence;	// List of remote fence requests.
+	struct list_head mi_event_cb; /* List of event handlers registered */
+	struct list_head mi_nb_connect_list;
+#ifdef CONFIG_MMU_NOTIFIER
+	struct list_head mi_mmu_notif_cleanup;
+#endif
+	struct notifier_block mi_panic_nb;
+#ifndef _MIC_SCIF_
+	/* The host needs to keep track of node dependencies in form of graph.
+	 * This will need to be dynamically grown to support hotplug.
+	 */
+	uint32_t         **mi_depmtrx;
+	/*
+	 * Wait queue used for blocking while waiting for nodes
+	 * to respond for disconnect message sent from host.
+	 */
+	wait_queue_head_t	mi_disconn_wq;
+	/* stus of node remove operation*/
+	uint64_t mi_disconnect_status;
+	atomic_long_t mi_unique_msgid;
+#endif
+	/*
+	 * Watchdog timeout on the host. Timer expiry will result in the host
+	 * treating the remote node as a lost node. Default value is
+	 * DEFAULT_WATCHDOG_TO and can be modified to a value greater than 1
+	 * second via SCIF sysfs watchdog_to entry.
+	 */
+	int		mi_watchdog_to;	// Watchdog timeout
+	int		mi_watchdog_enabled;	// Watchdog timeout enabled
+	int		mi_watchdog_auto_reboot;	// Watchdog auto reboot enabled
+	struct workqueue_struct *mi_misc_wq;  // Workqueue for miscellaneous SCIF tasks.
+	struct work_struct	mi_misc_work;
+#ifdef CONFIG_MMU_NOTIFIER
+	struct workqueue_struct *mi_mmu_notif_wq;  // Workqueue for MMU notifier cleanup tasks.
+	struct work_struct	mi_mmu_notif_work;
+#endif
+	int		nr_gtt_entries;	// GTT Debug Counter to detect leaks
+	uint64_t	nr_2mb_pages;  // Debug Counter for number of 2mb pages.
+	uint64_t	nr_4k_pages; // Debug Counter for number of 4K pages
+	uint8_t		en_msg_log;
+	wait_queue_head_t mi_exitwq;
+	unsigned long	mi_rma_tc_limit;
+	uint64_t	mi_proxy_dma_threshold;
+#ifdef RMA_DEBUG
+	atomic_long_t	rma_mm_cnt;
+	atomic_long_t	rma_unaligned_cpu_cnt;
+	atomic_long_t	rma_alloc_cnt;
+	atomic_long_t	rma_pin_cnt;
+#ifdef CONFIG_MMU_NOTIFIER
+	atomic_long_t	mmu_notif_cnt;
+#endif
+#endif
+#ifdef _MIC_SCIF_
+	int mi_intr_rcnt[MAX_RDMASR]; // Ref count to track SCIF Interrupt Handlers
+#endif
+	struct workqueue_struct 	*mi_conn_wq;
+	struct work_struct		mi_conn_work;
+};
+
+extern struct micscif_info ms_info;
+
+#define SCIF_NODE_MAGIC_BIT 63
+/* Magic value used to indicate a remote idle node without grabbing any locks */
+#define SCIF_NODE_IDLE (1ULL << SCIF_NODE_MAGIC_BIT)
+
+enum scif_state {
+	SCIFDEV_NOTPRESENT,
+	SCIFDEV_INIT,
+	SCIFDEV_RUNNING,
+	SCIFDEV_SLEEPING,
+	SCIFDEV_STOPPING,
+	SCIFDEV_STOPPED
+};
+
+extern bool mic_p2p_enable;
+extern bool mic_p2p_proxy_enable;
+extern bool mic_reg_cache_enable;
+extern bool mic_ulimit_check;
+/* p2p mapping from node id to peer id */
+struct scif_p2p_info {
+	int		    ppi_peer_id;
+	struct scatterlist  *ppi_sg[2];
+	uint64_t            sg_nentries[2]; // no of entries in scatterlists
+	dma_addr_t    ppi_pa[2];  // one for mmio; one for aper
+	dma_addr_t    ppi_mic_addr[2];  // one for mmio; one for aper
+	uint64_t	    ppi_len[2];
+#define PPI_MMIO        0
+#define PPI_APER        1
+	enum scif_state		ppi_disc_state; //Disconnection state of this peer node.
+	struct list_head   ppi_list;
+};
+
+/* one per remote node */
+struct micscif_dev {
+	uint16_t		sd_node;
+	enum scif_state		sd_state;
+	volatile void		*mm_sbox;
+	uint64_t		sd_base_addr;		/* Remote node's base bus addr
+							 * for the local node's aperture
+							 */
+#ifndef _MIC_SCIF_
+	struct list_head	sd_p2p;			/* List of bus addresses for
+							 * other nodes, these are allocated
+							 * by the host driver and are
+							 * valid only on the host node
+							 */
+	struct delayed_work	sd_watchdog_work;
+	wait_queue_head_t	sd_watchdog_wq;
+	struct workqueue_struct	*sd_ln_wq;
+	char			sd_ln_wqname[16];
+#endif
+
+	int			n_qpairs;		/* FIXME:
+							 * This is always set to 1,
+							 */
+
+	struct micscif_qp	*qpairs;		/* Same FIXME as above
+							 * There is single qp established
+							 * with this remote node
+							 */
+
+	struct workqueue_struct       *sd_intr_wq;		/* sd_intr_wq & sd_intr_bh
+							 * together constitute the workqueue
+							 * infrastructure needed to
+							 * run the bottom half handler
+							 * for messages received from
+							 * this node
+							 */
+	char			sd_intr_wqname[16];
+	struct work_struct		sd_intr_bh;
+	unsigned int		sd_intr_handle;
+	uint32_t		sd_rdmasr;
+	struct workqueue_struct	*sd_loopb_wq;
+	char			sd_loopb_wqname[16];
+	struct work_struct		sd_loopb_work;
+	struct list_head	sd_loopb_recv_q;
+	/* Lock to synchronize remote node state transitions */
+	struct mutex		sd_lock;
+	/*
+	 * Global Ref count per SCIF device tracking all SCIF API's which
+	 * might communicate across PCIe.
+	 */
+	atomic_long_t	scif_ref_cnt;
+	/*
+	 * Global Ref count per SCIF device tracking scif_mmap()/
+	 * scif_get_pages(). sd_lock protects scif_map_ref_cnt
+	 * hence it does not need to be an atomic operation. Note that
+	 * scif_mmap()/scif_get_pages() is not in the critical
+	 * perf path.
+	 */
+	int			scif_map_ref_cnt;
+	/*
+	 * Wait queue used for blocking while waiting for nodes
+	 * to wake up or to be removed.
+	 */
+	wait_queue_head_t	sd_wq;
+	uint64_t		sd_wait_status;
+#ifdef _MIC_SCIF_
+	wait_queue_head_t	sd_p2p_wq;
+	bool			sd_proxy_dma_reads;
+	struct delayed_work	sd_p2p_dwork;
+	int			sd_p2p_retry;
+#endif
+	/*
+	 * The NUMA node the peer is attached to on the host.
+	 */
+	int			sd_numa_node;
+	/*
+	 * Waitqueue for blocking while waiting for remote memory
+	 * mappings to drop to zero.
+	 */
+	wait_queue_head_t	sd_mmap_wq;
+
+	/* When a nodeqp message is received, this is set.
+	 * And it is reset by the watchdog time */
+	atomic_t		sd_node_alive;
+	int			num_active_conn;
+#ifdef ENABLE_TEST
+	struct workqueue_struct	*producer;
+	struct workqueue_struct	*consumer;
+	char			producer_name[16];
+	char			consumer_name[16];
+	struct work_struct		producer_work;
+	struct work_struct		consumer_work;
+	int			count;
+	int			test_done;
+#endif // ENABLE_TEST
+};
+
+extern struct micscif_dev scif_dev[];
+
+#include "mic/micscif_nodeqp.h"
+#include "mic/micscif_nm.h"
+#include "mic/micscif_smpt.h"
+#include "mic/micscif_va_gen.h"
+#include "mic/mic_dma_api.h"
+#include "mic/mic_dma_lib.h"
+#include "mic/micscif_rma.h"
+#include "mic/micscif_rma_list.h"
+
+/*
+ * data structure used to sync SCIF_GET_NODE_INFO messaging
+ */
+struct get_node_info {
+	enum micscif_msg_state  state;
+	wait_queue_head_t       wq;
+};
+
+static inline uint64_t align_low(uint64_t data, uint32_t granularity)
+{
+	return ALIGN(data - (granularity - 1), granularity);
+}
+
+#define SCIF_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define SCIF_MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+enum endptstate {
+	SCIFEP_CLOSED,		// Internal state
+	SCIFEP_UNBOUND,		// External state
+	SCIFEP_BOUND,		// External state
+	SCIFEP_LISTENING,	// External state
+	SCIFEP_CONNECTED,	// External state
+	SCIFEP_CONNECTING,	// Internal state
+	SCIFEP_MAPPING,		// Internal state
+	SCIFEP_CLOSING,		// Internal state
+	SCIFEP_CLLISTEN,	// Internal state
+	SCIFEP_DISCONNECTED,	// Internal state
+	SCIFEP_ZOMBIE		// Internal state
+};
+
+extern char *scif_ep_states[];
+
+// Used for coordinating connection accept sequence. This is the data structure
+// for the conlist in the endpoint.
+struct conreq {
+	struct nodemsg	  msg;
+	struct list_head list;
+};
+
+/* Size of the RB for the Node QP */
+#define NODE_QP_SIZE    0x10000
+/* Size of the RB for the Endpoint QP */
+#define ENDPT_QP_SIZE   0x1000
+
+struct endpt_qp_info {
+	/* Qpair for this endpoint */
+	struct micscif_qp *qp;
+	/*
+	 * Physical addr of the QP for Host or
+	 * GTT offset of the QP for MIC.
+	 * Required for unmapping the QP during close.
+	 */
+	dma_addr_t qp_offset;
+	/*
+	 * Payload in a SCIF_CNCT_GNT message containing the
+	 * physical address of the remote_qp.
+	 */
+	dma_addr_t cnct_gnt_payload;
+};
+
+#define SCIFEP_MAGIC	0x5c1f000000005c1f
+
+struct endpt {
+	volatile enum endptstate state;
+	spinlock_t		lock;
+
+	struct scif_portID	port;
+	struct scif_portID	peer;
+
+	int			backlog;
+
+	struct endpt_qp_info 	qp_info;
+	struct endpt_rma_info	rma_info;
+	/*
+	 * scifdev used by this endpt to communicate with remote node.
+	 */
+	struct micscif_dev	*remote_dev;
+	uint64_t		remote_ep;
+	/*
+	 * Keep track of number of connection requests.
+	 */
+	int			conreqcnt;
+	/*
+	 * Cache remote SCIF device state.
+	 */
+	enum scif_state		sd_state;
+	/*
+	 * True if the endpoint was created
+	 * via scif_accept(..).
+	 */
+	bool			accepted_ep;
+	/*
+	 * Open file information used to match the id passed
+	 * in with the flush routine.
+	 */
+	struct files_struct	*files;
+	/*
+	 * Reference count for functions using this endpoint.
+	 */
+	struct kref        ref_count;
+	struct list_head	conlist;
+	wait_queue_head_t	conwq;
+	wait_queue_head_t	disconwq;
+	wait_queue_head_t	diswq;
+	wait_queue_head_t	sendwq;
+	wait_queue_head_t	recvwq;
+	struct mutex		sendlock;
+	struct mutex		recvlock;
+	struct list_head	list;
+
+#ifdef CONFIG_MMU_NOTIFIER
+	struct list_head	mmu_list;
+#endif
+
+	struct list_head	li_accept;	/* pending ACCEPTREG */
+	int			acceptcnt;	/* pending ACCEPTREG cnt */
+	struct list_head	liacceptlist;	/* link to listen accept */
+	struct list_head	miacceptlist;	/* link to mi_uaccept */
+	struct endpt		*listenep;	/* associated listen ep */
+
+	/* Non-blocking connect */
+	struct work_struct		conn_work;
+	struct scif_portID	conn_port;
+	int					conn_err;
+	int					conn_async_state;
+	wait_queue_head_t	conn_pend_wq;
+	struct list_head	conn_list;
+};
+
+static __always_inline void
+micscif_queue_for_cleanup(struct reg_range_t *window, struct list_head *list)
+{
+	struct endpt *ep = (struct endpt *)window->ep;
+	INIT_LIST_HEAD(&window->list_member);
+	window->dma_mark = get_dma_mark(ep->rma_info.dma_chan);
+	spin_lock(&ms_info.mi_rmalock);
+	list_add_tail(&window->list_member, list);
+	spin_unlock(&ms_info.mi_rmalock);
+	queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
+}
+
+static __always_inline void
+__micscif_rma_destroy_tcw_helper(struct reg_range_t *window)
+{
+	list_del(&window->list_member);
+	micscif_queue_for_cleanup(window, &ms_info.mi_rma_tc);
+}
+
+void print_ep_state(struct endpt *ep, char *label);
+
+// Function prototypes needed by Unix/Linux drivers linking to scif
+int scif_fdopen(struct file *f);
+int scif_fdclose(struct file *f);
+int scif_process_ioctl(struct file *f, unsigned int cmd, uint64_t arg);
+int micscif_mmap(struct file *file, struct vm_area_struct *vma);
+int scif_mmap(struct vm_area_struct *vma, scif_epd_t epd);
+void scif_munmap(struct vm_area_struct *vma);
+void scif_proc_init(void);
+void scif_proc_cleanup(void);
+int scif_user_send(scif_epd_t epd, void *msg, int len, int flags);
+int scif_user_recv(scif_epd_t epd, void *msg, int len, int flags);
+int __scif_pin_pages(void *addr, size_t len, int *out_prot,
+	int map_flags, scif_pinned_pages_t *pages);
+scif_epd_t __scif_open(void);
+int __scif_bind(scif_epd_t epd, uint16_t pn);
+int __scif_listen(scif_epd_t epd, int backlog);
+int __scif_connect(scif_epd_t epd, struct scif_portID *dst, bool non_block);
+int __scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t
+*newepd, int flags);
+int __scif_close(scif_epd_t epd);
+int __scif_send(scif_epd_t epd, void *msg, int len, int flags);
+int __scif_recv(scif_epd_t epd, void *msg, int len, int flags);
+off_t __scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+int prot_flags, int map_flags);
+int __scif_unregister(scif_epd_t epd, off_t offset, size_t len);
+int __scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
+roffset, int rma_flags);
+int __scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
+roffset, int rma_flags);
+int __scif_fence_mark(scif_epd_t epd, int flags, int *mark);
+int __scif_fence_wait(scif_epd_t epd, int mark);
+int __scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval, off_t roff,
+uint64_t rval, int flags);
+off_t __scif_register_pinned_pages(scif_epd_t epd,
+scif_pinned_pages_t pinned_pages, off_t offset, int map_flags);
+int __scif_get_pages(scif_epd_t epd, off_t offset, size_t len,
+struct scif_range **pages);
+int __scif_put_pages(struct scif_range *pages);
+int __scif_flush(scif_epd_t epd);
+
+void micscif_misc_handler(struct work_struct *work);
+void micscif_conn_handler(struct work_struct *work);
+
+uint16_t rsrv_scif_port(uint16_t port);
+uint16_t get_scif_port(void);
+void put_scif_port(uint16_t port);
+
+void micscif_send_exit(void);
+
+void scif_ref_rel(struct kref *kref_count);
+
+#ifdef _MODULE_SCIF_
+unsigned int micscif_poll(struct file *f, poll_table *wait);
+unsigned int scif_pollfd(struct file *f, poll_table *wait, scif_epd_t epd);
+unsigned int __scif_pollfd(struct file *f, poll_table *wait, struct endpt *ep);
+int micscif_flush(struct file *f, fl_owner_t id);
+#endif
+
+#ifdef _MIC_SCIF_
+void mic_debug_init(void);
+void micscif_get_node_info(void);
+void scif_poll_qp_state(struct work_struct *work);
+#endif
+void mic_debug_uninit(void);
+
+#define serializing_request(x) ((void)*(volatile uint8_t*)(x))
+
+// State list helper functions.
+// Each of these functions must be called with the end point lock unlocked.  If
+// the end point is found on the list the end point returned will have its lock
+// set and sflags will return the value to be used to do an unlock_irqrestore
+// at the end of the calling function.
+static inline struct endpt *
+micscif_find_listen_ep(uint16_t port, unsigned long *sflags)
+{
+	struct endpt *ep = NULL;
+	struct list_head *pos, *tmpq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ms_info.mi_eplock, flags);
+	list_for_each_safe(pos, tmpq, &ms_info.mi_listen) {
+		ep = list_entry(pos, struct endpt, list);
+		if (ep->port.port == port) {
+			*sflags = flags;
+			spin_lock(&ep->lock);
+			spin_unlock(&ms_info.mi_eplock);
+			return ep;
+		}
+	}
+	spin_unlock_irqrestore(&ms_info.mi_eplock, flags);
+	return (struct endpt *)NULL;
+}
+
+// Must be called with end point locked
+static inline struct conreq *
+miscscif_get_connection_request(struct endpt *ep, uint64_t payload)
+{
+	struct conreq *conreq;
+	struct list_head *pos, *tmpq;
+
+	list_for_each_safe(pos, tmpq, &ep->conlist) {
+		conreq = list_entry(pos, struct conreq, list);
+		if (conreq->msg.payload[0] == payload) {
+			list_del(pos);
+			ep->conreqcnt--;
+			return conreq;
+		}
+	}
+	return (struct conreq *)NULL;
+}
+
+// There is no requirement for the callee to have the end point
+// locked like other API's above.
+static inline void
+micscif_remove_zombie_ep(struct endpt *ep)
+{
+	struct list_head *pos, *tmpq;
+	unsigned long sflags;
+	struct endpt *tmpep;
+
+	spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	list_for_each_safe(pos, tmpq, &ms_info.mi_zombie) {
+		tmpep = list_entry(pos, struct endpt, list);
+		if (tmpep == ep) {
+			list_del(pos);
+			ms_info.mi_nr_zombies--;
+		}
+	}
+	spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+}
+
+static inline void
+micscif_cleanup_zombie_epd(void)
+{
+	struct list_head *pos, *tmpq;
+	unsigned long sflags;
+	struct endpt *ep;
+
+	spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	list_for_each_safe(pos, tmpq, &ms_info.mi_zombie) {
+		ep = list_entry(pos, struct endpt, list);
+		if (micscif_rma_ep_can_uninit(ep)) {
+			list_del(pos);
+			ms_info.mi_nr_zombies--;
+			va_gen_destroy(&ep->rma_info.va_gen);
+			kfree(ep);
+		}
+	}
+	spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+}
+
+#define	SCIF_WAKE_UP_SEND	(1 << 1)
+#define	SCIF_WAKE_UP_RECV	(1 << 2)
+
+/**
+ * scif_wakeup_ep() - Wake up all clients based on the type
+ * requested i.e. threads blocked in scif_send(..) and/or scif_recv(..).
+ */
+static inline void
+scif_wakeup_ep(int type)
+{
+	struct endpt *ep;
+	unsigned long sflags;
+	struct list_head *pos, *tmpq;
+
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each_safe(pos, tmpq, &ms_info.mi_connected) {
+		ep = list_entry(pos, struct endpt, list);
+		if (type & SCIF_WAKE_UP_SEND)
+			wake_up_interruptible(&ep->sendwq);
+		if (type & SCIF_WAKE_UP_RECV)
+			wake_up_interruptible(&ep->recvwq);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+}
+
+/*
+ * is_self_scifdev:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device passed is the self aka Loopback SCIF device.
+ */
+static inline int is_self_scifdev(struct micscif_dev *dev)
+{
+	return dev->sd_node == ms_info.mi_nodeid;
+}
+
+/*
+ * is_p2p_scifdev:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the SCIF Device is a MIC Peer to Peer SCIF device.
+ */
+static inline bool is_p2p_scifdev(struct micscif_dev *dev)
+{
+#ifdef _MIC_SCIF_
+	return dev != &scif_dev[SCIF_HOST_NODE] && !is_self_scifdev(dev);
+#else
+	return false;
+#endif
+}
+
+/*
+ * get_conn_count:
+ * @dev: The remote SCIF Device
+ *
+ * Increments the number of active SCIF connections. Callee is expected
+ * to synchronize calling this API with put_conn_count.
+ */
+static __always_inline void
+get_conn_count(struct micscif_dev *dev)
+{
+	dev->num_active_conn++;
+}
+
+/*
+ * put_conn_count:
+ * @dev: The remote SCIF Device
+ *
+ * Decrements the number of active connections. Callee is expected
+ * to synchronize calling this API with get_conn_count.
+ */
+static __always_inline void
+put_conn_count(struct micscif_dev *dev)
+{
+	dev->num_active_conn--;
+	BUG_ON(dev->num_active_conn < 0);
+}
+
+/*
+ * get_kref_count:
+ * epd: SCIF endpoint
+ *
+ * Increments kmod endpoint reference count. Callee is expected
+ * to synchronize calling this API with put_kref_count.
+ */
+static __always_inline void
+get_kref_count(scif_epd_t epd)
+{
+	kref_get(&(epd->ref_count));
+}
+
+/*
+ * put_kref_count:
+ * epd: SCIF endpoint
+ *
+ * Decrements kmod endpoint reference count. Callee is expected
+ * to synchronize calling this API with get_kref_count.
+ */
+static __always_inline void
+put_kref_count(scif_epd_t epd)
+{
+	kref_put(&(epd->ref_count), scif_ref_rel);
+}
+
+/*
+ * is_scifdev_alive:
+ * @dev: The remote SCIF Device
+ *
+ * Returns true if the remote SCIF Device is running or sleeping for
+ * this endpoint.
+ */
+static inline int scifdev_alive(struct endpt *ep)
+{
+	return (((SCIFDEV_RUNNING == ep->remote_dev->sd_state) ||
+		(SCIFDEV_SLEEPING == ep->remote_dev->sd_state)) &&
+		SCIFDEV_RUNNING == ep->sd_state);
+}
+
+/*
+ * verify_epd:
+ * ep: SCIF endpoint
+ *
+ * Checks several generic error conditions and returns the
+ * appropiate error.
+ */
+static inline int verify_epd(struct endpt *ep)
+{
+	if (ep->state == SCIFEP_DISCONNECTED)
+		return -ECONNRESET;
+
+	if (ep->state != SCIFEP_CONNECTED)
+		return -ENOTCONN;
+
+	if (!scifdev_alive(ep))
+		return -ENODEV;
+
+	return 0;
+}
+
+/**
+ * scif_invalidate_ep() - Set remote SCIF device state for all connected
+ * and disconnected endpoints for a particular node to SCIFDEV_STOPPED,
+ * change endpoint state to disconnected and wake up all send/recv/con
+ * waitqueues.
+ */
+static inline void
+scif_invalidate_ep(int node)
+{
+	struct endpt *ep;
+	unsigned long sflags;
+	struct list_head *pos, *tmpq;
+
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each_safe(pos, tmpq, &ms_info.mi_disconnected) {
+		ep = list_entry(pos, struct endpt, list);
+		if (ep->remote_dev->sd_node == node) {
+			spin_lock(&ep->lock);
+			ep->sd_state = SCIFDEV_STOPPED;
+			spin_unlock(&ep->lock);
+		}
+	}
+	list_for_each_safe(pos, tmpq, &ms_info.mi_connected) {
+		ep = list_entry(pos, struct endpt, list);
+		if (ep->remote_dev->sd_node == node) {
+			list_del(pos);
+			put_conn_count(ep->remote_dev);
+			spin_lock(&ep->lock);
+			ep->state = SCIFEP_DISCONNECTED;
+			list_add_tail(&ep->list, &ms_info.mi_disconnected);
+			ep->sd_state = SCIFDEV_STOPPED;
+			wake_up_interruptible(&ep->sendwq);
+			wake_up_interruptible(&ep->recvwq);
+			wake_up_interruptible(&ep->conwq);
+			spin_unlock(&ep->lock);
+		}
+	}
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+	flush_workqueue(ms_info.mi_conn_wq);
+}
+
+/*
+ * Only Debug Functions Below
+ */
+#define SCIF_CRUMB pr_debug("%s %d\n", __func__, __LINE__)
+
+static inline void
+micscif_display_all_zombie_ep(void)
+{
+	struct list_head *pos, *tmpq;
+	unsigned long sflags;
+	struct endpt *ep;
+
+	pr_debug("Zombie Info Start\n");
+	spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	list_for_each_safe(pos, tmpq, &ms_info.mi_zombie) {
+		ep = list_entry(pos, struct endpt, list);
+		if (!list_empty(&ep->rma_info.reg_list))
+			micscif_display_all_windows(&ep->rma_info.reg_list);
+		if (!list_empty(&ep->rma_info.remote_reg_list))
+			micscif_display_all_windows(
+				&ep->rma_info.remote_reg_list);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+	pr_debug("Zombie Info End\n");
+}
+
+static inline void dump_ep(scif_epd_t epd, const char *func, int line)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	pr_debug("%s %d state %d lock %p port.node 0x%x"
+		"port.port 0x%x peer.node 0x%x peer.port 0x%x backlog %d qp %p"
+		"qp_offset 0x%llx cnct_gnt_payload 0x%llx remote_dev %p\n", 
+		func, line, ep->state, &ep->lock, ep->port.node, 
+		ep->port.port, ep->peer.node, ep->peer.port, ep->backlog, 
+		ep->qp_info.qp, ep->qp_info.qp_offset, 
+		ep->qp_info.cnct_gnt_payload, ep->remote_dev);
+}
+
+static inline void dump_qp(volatile struct micscif_qp *qp, const char *func, int line)
+{
+	pr_debug("%s %d qp %p local_buf 0x%llx"
+		" local_qp 0x%llx remote_buf 0x%llx remote_qp %p ep 0x%llx\n", 
+		func, line, qp, qp->local_buf, 
+		qp->local_qp, qp->remote_buf, qp->remote_qp, qp->ep);
+}
+
+static inline void dump_rb(struct micscif_rb *rb, const char *func, int line)
+{
+	pr_debug("%s %d rb %p rb_base %p *read_ptr 0x%x"
+			" *write_ptr 0x%x size 0x%x"
+			" cro 0x%x cwo 0x%x ocro 0x%x ocwo 0x%x\n", 
+			func, line, rb, rb->rb_base, *rb->read_ptr, 
+			*rb->write_ptr, rb->size, rb->current_read_offset, 
+			rb->current_write_offset, 
+			rb->old_current_read_offset, 
+			rb->old_current_write_offset);
+}
+
+#endif /* MICSCIF_H */
diff --git a/include/mic/micscif_intr.h b/include/mic/micscif_intr.h
new file mode 100644
index 0000000..204d7b5
--- /dev/null
+++ b/include/mic/micscif_intr.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICSCIF_INTR_H
+#define MICSCIF_INTR_H
+#define SBOX_SDBIC0_DBSTAT_BIT  0x40000000
+#define SBOX_SDBIC0_DBREQ_BIT   0x80000000
+
+/* RDMASR Info */
+#define RDMASR_IRQ_BASE	17
+#define get_rdmasr_irq(m)	((RDMASR_IRQ_BASE) + (m))
+#define get_rdmasr_offset(m)	(((m) << 2) + (SBOX_RDMASR0))
+
+#ifdef _MIC_SCIF_
+int register_scif_intr_handler(struct micscif_dev *dev);
+void deregister_scif_intr_handler(struct micscif_dev *dev);
+#endif
+int micscif_setup_interrupts(struct micscif_dev *dev);
+void micscif_destroy_interrupts(struct micscif_dev *scifdev);
+#endif /*  MICSCIF_INTR_H */
diff --git a/include/mic/micscif_kmem_cache.h b/include/mic/micscif_kmem_cache.h
new file mode 100644
index 0000000..3f40e29
--- /dev/null
+++ b/include/mic/micscif_kmem_cache.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MIC_KMEM_CACHE_H
+#define MIC_KMEM_CACHE_H
+#define MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL)
+#define KMEM_UNALIGNED_BUF_SIZE (MAX_UNALIGNED_BUF_SIZE + (L1_CACHE_BYTES << 1))
+#include<linux/slab.h>
+extern struct kmem_cache *unaligned_cache;
+
+static inline void micscif_kmem_cache_free(void *buffer)
+{
+	kmem_cache_free(unaligned_cache, buffer);
+}
+
+static inline void *micscif_kmem_cache_alloc(void)
+{
+	return kmem_cache_alloc(unaligned_cache, GFP_KERNEL|GFP_ATOMIC);
+}
+
+static inline struct kmem_cache *micscif_kmem_cache_create(void)
+{
+	return kmem_cache_create("Unaligned_DMA", KMEM_UNALIGNED_BUF_SIZE, 0, SLAB_HWCACHE_ALIGN, NULL);
+}
+
+static inline void micscif_kmem_cache_destroy(void)
+{
+	kmem_cache_destroy(unaligned_cache);
+}
+#endif
diff --git a/include/mic/micscif_map.h b/include/mic/micscif_map.h
new file mode 100644
index 0000000..ef2f9a5
--- /dev/null
+++ b/include/mic/micscif_map.h
@@ -0,0 +1,276 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICSCIF_MAP_H
+#define MICSCIF_MAP_H
+
+static __always_inline
+void *get_local_va(off_t off, struct reg_range_t *window, size_t len)
+{
+	struct page **pages = window->pinned_pages->pages;
+	
+	uint64_t page_nr = ((off - window->offset) >> PAGE_SHIFT);
+
+	off_t page_off = off & ~PAGE_MASK;
+
+	return (void *)((uint64_t)
+		(page_address(pages[page_nr])) | page_off);
+}
+
+static __always_inline void
+scif_iounmap(void *virt, size_t len, struct micscif_dev *dev)
+{
+#ifdef _MIC_SCIF_
+	if (!is_self_scifdev(dev))
+		iounmap(virt);
+#endif
+}
+
+#ifdef _MIC_SCIF_
+/* FIXME: fix the documentation and functions names since these are also 
+ * used in p2p
+ */
+/*
+ * Maps the VA passed in local to the aperture and returns the
+ * corresponding GTT index in offset by reference.
+ * In the loopback case simply return the physical address.
+ */
+static __always_inline int
+map_virt_into_aperture(phys_addr_t *out_offset,
+		void *local,
+		struct micscif_dev *dev,
+		size_t size)
+{
+	if (is_self_scifdev(dev))
+		*out_offset = virt_to_phys(local);
+	else {
+		/* Error unwinding code relies on return value being zero */
+		*out_offset = virt_to_phys(local);
+		if (dev != &scif_dev[0])
+			*out_offset = *out_offset + dev->sd_base_addr;
+	}
+
+	return 0;
+}
+
+/*
+ * Maps the struct page passed in page to the aperture and returns the
+ * corresponding GTT index in offset by reference.
+ * In the loopback case simply return the physical address.
+ */
+static __always_inline int
+map_page_into_aperture(phys_addr_t *out_offset,
+			struct page *page,
+			struct micscif_dev *dev)
+{
+	if (is_self_scifdev(dev))
+		*out_offset = page_to_phys(page);
+	else {
+		/* Error unwinding code relies on return value being zero */
+		*out_offset = page_to_phys(page);
+		if (dev != &scif_dev[0])
+			*out_offset = *out_offset + dev->sd_base_addr;
+	}
+	return 0;
+}
+
+/*
+ * Nothing to do on card side
+ */
+static __always_inline void
+unmap_from_aperture(phys_addr_t local,
+		struct micscif_dev *dev,
+		size_t size)
+{
+}
+
+/*
+ * Maps Host physical address passed in phys to MIC.
+ * In the loopback case simply return the VA from the PA.
+ */
+static __always_inline void *
+scif_ioremap(phys_addr_t phys, size_t size, struct micscif_dev *dev)
+{
+	void *out_virt;
+
+	if (is_self_scifdev(dev))
+		out_virt = phys_to_virt(phys);
+	else
+		out_virt = ioremap_nocache(phys, size);
+
+	return out_virt;
+}
+
+/*
+ * Get the system physical address from the physical address passed
+ * by the host. In the case of loopback simply return the physical
+ * address.
+ */
+static __always_inline phys_addr_t
+get_phys_addr(phys_addr_t phys, struct micscif_dev *dev)
+{
+	return phys;
+}
+
+#else /* !_MIC_SCIF_ */
+/*
+ * Maps the VA passed in local to the aperture and returns the
+ * corresponding physical address in offset.
+ * In the loopback case simply return the physical address.
+ */
+static __always_inline int
+map_virt_into_aperture(phys_addr_t *out_offset,
+		void *local,
+		struct micscif_dev *dev,
+		size_t size)
+{
+	int err = 0;
+        int bid;
+	struct pci_dev *hwdev;
+
+	if (is_self_scifdev(dev))
+		*(out_offset) = virt_to_phys((local));
+	else {
+
+		bid = dev->sd_node - 1;
+		hwdev = get_per_dev_ctx(bid)->bi_pdev;
+                *out_offset = mic_map_single(bid, hwdev, local, size);
+                if (mic_map_error(*out_offset))
+                        err = -ENOMEM;
+	}
+
+	if (err)
+		*out_offset = 0;
+
+	return err;
+}
+/*
+ * Maps the struct page passed in page to the aperture and returns the
+ * corresponding physical address in offset.
+ * In the loopback case simply return the physical address.
+ */
+static __always_inline int
+map_page_into_aperture(phys_addr_t *out_offset,
+			struct page *page,
+			struct micscif_dev *dev)
+{
+	int err = 0;
+	int bid;
+	dma_addr_t mic_addr;
+	struct pci_dev *hwdev;
+
+	if (is_self_scifdev(dev))
+		*out_offset = page_to_phys(page);
+	else {
+
+		bid = dev->sd_node - 1;
+		hwdev = get_per_dev_ctx(bid)->bi_pdev;
+
+		*out_offset = pci_map_page(hwdev, page, 0x0, PAGE_SIZE, 
+				PCI_DMA_BIDIRECTIONAL);
+		if (pci_dma_mapping_error(hwdev, *out_offset)) {
+			err = -EINVAL;
+		} else {
+			if (!(mic_addr = mic_map(bid, *out_offset, PAGE_SIZE))) {
+				printk(KERN_ERR "mic_map failed board id %d\
+					     addr %#016llx size %#016zx\n", 
+					     bid, *out_offset, PAGE_SIZE);
+				pci_unmap_single(hwdev, *out_offset, 
+						       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+				err = -EINVAL;
+			} else
+				*out_offset = mic_addr;
+		}
+	}
+
+	if (err)
+		*out_offset = 0;
+
+	return err;
+}
+
+/*
+ * Unmaps the physical address passed in lo/al from the PCIe aperture.
+ * Nothing to do in the loopback case.
+ */
+static __always_inline void
+unmap_from_aperture(phys_addr_t local,
+	struct micscif_dev *dev,
+	size_t size)
+{
+
+	if (!is_self_scifdev(dev))
+		mic_ctx_unmap_single(get_per_dev_ctx(dev->sd_node - 1),
+			local, size);
+}
+
+/*
+ * TODO: I'm thinking maybe we should take the apt_phys offset off of this macro
+ * and have it be outside ...
+ * Maps the page corresponding to the GTT offset passed in phys.
+ * In the loopback case simply return the VA from the PA.
+ */
+static __always_inline void *
+scif_ioremap(phys_addr_t phys, size_t size, struct micscif_dev *dev)
+{
+	void *out_virt;
+
+	if (is_self_scifdev(dev))
+		out_virt = phys_to_virt(phys);
+	else {
+		out_virt = get_per_dev_ctx(dev->sd_node - 1)->aper.va + phys;
+	}
+	return out_virt;
+}
+
+static __always_inline phys_addr_t
+get_phys_addr(phys_addr_t phys, struct micscif_dev *dev)
+{
+	phys_addr_t out_phys;
+
+	if (is_self_scifdev(dev))
+		out_phys = phys;
+	else {
+		phys_addr_t __apt_base =
+		(phys_addr_t)get_per_dev_ctx(dev->sd_node - 1)->aper.pa;
+		out_phys = phys + __apt_base;
+	}
+
+	return out_phys;
+}
+
+#endif /* !_MIC_SCIF_ */
+
+#endif  /* MICSCIF_MAP_H */
diff --git a/include/mic/micscif_nm.h b/include/mic/micscif_nm.h
new file mode 100644
index 0000000..9f2ff48
--- /dev/null
+++ b/include/mic/micscif_nm.h
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICSCIF_NM_H
+#define MICSCIF_NM_H
+
+#include <scif.h>
+
+#ifdef MIC_IS_EMULATION
+#define DEFAULT_WATCHDOG_TO	(INT_MAX)
+#define NODE_ALIVE_TIMEOUT	(INT_MAX)
+#define NODE_QP_TIMEOUT		(INT_MAX)
+#define NODE_ACCEPT_TIMEOUT	(INT_MAX)
+#define NODEQP_SEND_TO_MSEC	(INT_MAX)
+#else
+#define DEFAULT_WATCHDOG_TO	(30)
+#define NODE_ALIVE_TIMEOUT	(ms_info.mi_watchdog_to * HZ)
+#define NODE_QP_TIMEOUT		(100)
+#define NODE_ACCEPT_TIMEOUT	(3 * HZ)
+#define NODEQP_SEND_TO_MSEC	(3 * 1000)
+#endif
+
+#define SCIF_ENABLE_PM 1
+
+#define	DESTROY_WQ		(true)
+
+enum disconn_type {
+	DISCONN_TYPE_POWER_MGMT,
+	DISCONN_TYPE_LOST_NODE,
+	DISCONN_TYPE_MAINTENANCE_MODE,
+};
+
+/*
+ * Notify the host about a new dependency with the remote SCIF device.
+ * Dependencies are created during scif_mmap()/scif_get_pages().
+ */
+void micscif_create_node_dep(struct micscif_dev *dev, int nr_pages);
+
+/*
+ * Notify the host that an existing dependency with the remote SCIF
+ * device no longer exists.
+ */
+void micscif_destroy_node_dep(struct micscif_dev *dev, int nr_pages);
+
+/**
+ * micscif_inc_node_refcnt:
+ *
+ * @dev: Remote SCIF device.
+ * @count: ref count
+ *
+ * Increment the global activity ref count for the remote SCIF device.
+ * If the remote SCIF device is idle, then notify the host to wake up
+ * the remote SCIF device and then wait for an ACK.
+ */
+static __always_inline void
+micscif_inc_node_refcnt(struct micscif_dev *dev, long cnt)
+{
+#ifdef SCIF_ENABLE_PM
+	if (unlikely(dev && !atomic_long_add_unless(&dev->scif_ref_cnt, 
+		cnt, SCIF_NODE_IDLE))) {
+		/*
+		 * This code path would not be entered unless the remote
+		 * SCIF device has actually been put to sleep by the host.
+		 */
+		mutex_lock(&dev->sd_lock);
+		if (SCIFDEV_STOPPED == dev->sd_state ||
+			SCIFDEV_STOPPING == dev->sd_state ||
+			SCIFDEV_INIT == dev->sd_state)
+			goto bail_out;
+		if (test_bit(SCIF_NODE_MAGIC_BIT, 
+			&dev->scif_ref_cnt.counter)) {
+			/* Notify host that the remote node must be woken */
+			struct nodemsg notif_msg;
+
+			dev->sd_wait_status = OP_IN_PROGRESS;
+			notif_msg.uop = SCIF_NODE_WAKE_UP;
+			notif_msg.src.node = ms_info.mi_nodeid;
+			notif_msg.dst.node = SCIF_HOST_NODE;
+			notif_msg.payload[0] = dev->sd_node;
+			/* No error handling for Host SCIF device */
+			micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE],
+						&notif_msg, NULL);
+			/*
+			 * A timeout is not required since only the cards can
+			 * initiate this message. The Host is expected to be alive.
+			 * If the host has crashed then so will the cards.
+			 */
+			wait_event(dev->sd_wq, 
+				dev->sd_wait_status != OP_IN_PROGRESS);
+			/*
+			 * Aieee! The host could not wake up the remote node.
+			 * Bail out for now.
+			 */
+			if (dev->sd_wait_status == OP_COMPLETED) {
+				dev->sd_state = SCIFDEV_RUNNING;
+				clear_bit(SCIF_NODE_MAGIC_BIT, 
+					&dev->scif_ref_cnt.counter);
+			}
+		}
+		/* The ref count was not added if the node was idle. */
+		atomic_long_add(cnt, &dev->scif_ref_cnt);
+bail_out:
+		mutex_unlock(&dev->sd_lock);
+	}
+#endif
+}
+
+/**
+ * micscif_dec_node_refcnt:
+ *
+ * @dev: Remote SCIF device.
+ * @nr_pages: number of pages
+ *
+ * Decrement the global activity ref count for the remote SCIF device.
+ * Assert if the ref count drops to negative.
+ */
+static __always_inline void
+micscif_dec_node_refcnt(struct micscif_dev *dev, long cnt)
+{
+#ifdef SCIF_ENABLE_PM
+	if (dev) {
+		if (unlikely((atomic_long_sub_return(cnt, 
+			&dev->scif_ref_cnt)) < 0)) {
+			printk(KERN_ERR "%s %d dec dev %p node %d ref %ld "
+				" caller %p Lost Node?? \n", 
+				__func__, __LINE__, dev, dev->sd_node, 
+				atomic_long_read(&dev->scif_ref_cnt), 
+				__builtin_return_address(0));
+			atomic_long_add_unless(&dev->scif_ref_cnt, cnt, 
+							SCIF_NODE_IDLE);
+		}
+	}
+#endif
+}
+
+/* Handle a SCIF_NODE_REMOVE message */
+uint64_t micscif_handle_remove_node(uint64_t mask, uint64_t flags);
+void micscif_cleanup_scifdev(struct micscif_dev *dev, bool destroy_wq);
+
+void micscif_node_add_callback(int node);
+
+void set_nodemask_bit(uint8_t* nodemask, uint32_t node_id, int val);
+int get_nodemask_bit(uint8_t* nodemask, uint32_t node_id);
+
+#ifndef _MIC_SCIF_
+
+/* definition of stack node used in activation/deactivation set algorithms*/
+struct stack_node {
+	struct list_head next;
+	uint32_t node_id;
+};
+
+enum dependency_state {
+	DEP_STATE_NOT_DEPENDENT,
+	DEP_STATE_DEPENDENT,
+	DEP_STATE_DISCONNECT_READY,
+	DEP_STATE_DISCONNECTED
+};
+
+
+uint64_t micscif_send_pm_rmnode_msg(int node, uint64_t nodemask_addr,
+		uint64_t nodemask_size, int orig_node);
+uint64_t micscif_send_lost_node_rmnode_msg(int node, int orig_node);
+
+/* definitions of stack methods used in activation/deactivation set algorithms */
+int init_depgraph_stack(struct list_head *stack_ptr);
+int uninit_depgraph_stack(struct list_head *stack_ptr);
+int is_stack_empty(struct list_head *stack_ptr);
+int stack_push_node(struct list_head *stack_ptr, uint32_t node_id);
+int stack_pop_node(struct list_head *stack_ptr, uint32_t *node_id);
+int micscif_get_activeset(uint32_t node_id, uint8_t *nodemask);
+int micscif_get_minimal_deactiveset(uint32_t node_id, uint8_t *nodemask, uint8_t *visited);
+int micscif_get_deactiveset(uint32_t node_id, uint8_t *nodemask, int max_possible);
+void micscif_update_p2p_state(uint32_t node_id, uint32_t peer_id, enum scif_state state);
+
+/* Method responsible for disconnecting node from the scif network */
+int micscif_disconnect_node(uint32_t node_id, uint8_t *nodemask, enum disconn_type type);
+int micscif_connect_node(uint32_t node_id, bool get_ref);
+
+void micscif_set_nodedep(uint32_t src_node, uint32_t dst_node, enum dependency_state state);
+enum dependency_state micscif_get_nodedep(uint32_t src_node, uint32_t dst_node);
+uint64_t micscif_send_node_alive(int node);
+void micscif_watchdog_handler(struct work_struct *work);
+int micscif_handle_lostnode(uint32_t nodeid);
+#endif /*_MIC_SCIF_*/
+
+/* SCIF tasks before transition to low power state */
+int micscif_suspend_handler(struct notifier_block *notif,
+		unsigned long event, void *ptr);
+
+/*
+ * SCIF tasks if a previous low power state transition
+ * has failed after a suspend call.
+ */
+int micscif_fail_suspend_handler(struct notifier_block *notif,
+		unsigned long event, void *ptr);
+
+/* SCIF tasks after wake up from low powe state */
+int micscif_resume_handler(struct notifier_block *notif,
+		unsigned long event, void *ptr);
+
+#endif /* MICSCIF_NM_H */
diff --git a/include/mic/micscif_nodeqp.h b/include/mic/micscif_nodeqp.h
new file mode 100644
index 0000000..a69ec93
--- /dev/null
+++ b/include/mic/micscif_nodeqp.h
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICSCIF_NODEQP
+#define MICSCIF_NODEQP
+
+#include "micscif_rb.h"
+
+				   /* Payload Description */
+#define SCIF_INIT		1  /* Address of node's node First message sent by a node to
+				    * array the host, and host to node
+				    */
+#define SCIF_EXIT		2  /* Last message telling the host the driver is exiting */
+#define SCIF_NODE_ADD		3  /* Tell Online nodes a new node exits */
+#define SCIF_NODE_ADD_ACK	4  /* Confirm to host sequence is finished TODO Needed??? */
+#define SCIF_CNCT_REQ		5  /* Phys addr of Request connection to a port */
+#define SCIF_CNCT_GNT		6  /* Phys addr of new Grant connection request */
+#define SCIF_CNCT_GNTACK	7  /* Error type Reject a connection request */
+#define SCIF_CNCT_GNTNACK	8  /* Error type Reject a connection request */
+#define SCIF_CNCT_REJ		9  /* Error type Reject a connection request */
+#define SCIF_CNCT_TERM		10 /* Terminate type Terminate a connection request */
+#define SCIF_TERM_ACK		11 /* Terminate type Terminate a connection request */
+#define SCIF_DISCNCT		12 /* Notify peer that connection is being terminated */
+#define SCIF_DISCNT_ACK		13 /* Notify peer that connection is being terminated */
+#define SCIF_REGISTER		14 /* Tell peer about a new registered window */
+#define SCIF_REGISTER_ACK	15 /* Notify peer about unregistration success */
+#define SCIF_REGISTER_NACK	16 /* Notify peer about registration success */
+#define SCIF_UNREGISTER		17 /* Tell peer about unregistering a registered window */
+#define SCIF_UNREGISTER_ACK	18 /* Notify peer about registration failure */
+#define SCIF_UNREGISTER_NACK	19 /* Notify peer about unregistration failure */
+#define SCIF_ALLOC_REQ		20 /* Request a mapped buffer */
+#define SCIF_ALLOC_GNT		21 /* Notify peer about allocation success */
+#define SCIF_ALLOC_REJ		22 /* Notify peer about allocation failure */
+#define SCIF_FREE_PHYS		23 /* Free previously allocated GTT/PCI mappings */
+#define SCIF_FREE_VIRT		24 /* Free previously allocated virtual memory */
+#define SCIF_CLIENT_SENT	25 /* Notify the peer that a data message has been written to the RB */
+#define SCIF_CLIENT_RCVD	26 /* Notify the peer that a data message has been read from the RB */
+#define SCIF_MUNMAP		27 /* Acknowledgment for a SCIF_MMAP request */
+#define SCIF_MARK		28 /* SCIF Remote Fence Mark Request */
+#define SCIF_MARK_ACK		29 /* SCIF Remote Fence Mark Success */
+#define SCIF_MARK_NACK		30 /* SCIF Remote Fence Mark Failure */
+#define SCIF_WAIT		31 /* SCIF Remote Fence Wait Request */
+#define SCIF_WAIT_ACK		32 /* SCIF Remote Fence Wait Success */
+#define SCIF_WAIT_NACK		33 /* SCIF Remote Fence Wait Failure */
+#define SCIF_SIG_LOCAL		34 /* SCIF Remote Fence Local Signal Request */
+#define SCIF_SIG_REMOTE		35 /* SCIF Remote Fence Remote Signal Request */
+#define SCIF_SIG_ACK		36 /* SCIF Remote Fence Remote Signal Success */
+#define SCIF_SIG_NACK		37 /* SCIF Remote Fence Remote Signal Failure */
+#define SCIF_NODE_CREATE_DEP	42 /* Notify the Host that a new dependency is
+ 				    * being created between two nodes
+ 				    */
+#define SCIF_NODE_DESTROY_DEP	43 /* Notify the Host that an existing dependency is
+ 				    * being destroyed between two nodes
+ 				    */
+#define SCIF_NODE_REMOVE	44 /* Request to deactivate a set of remote SCIF nodes */
+#define SCIF_NODE_REMOVE_ACK	45 /* Response to a SCIF_NODE_REMOVE message */
+#define SCIF_NODE_WAKE_UP	46 /* Notification to the Host to wake up a remote node */
+#define SCIF_NODE_WAKE_UP_ACK	47 /* Response to SCIF_NODE_WAKE_UP message */
+#define SCIF_NODE_WAKE_UP_NACK	48 /* Response to SCIF_NODE_WAKE_UP message. Think Lost Node */
+#define SCIF_NODE_ALIVE		49 /* Check if kn* card is alive */
+#define SCIF_NODE_ALIVE_ACK	50 /* ACK the for above message */
+#define SMPT_SET		51 /* Add a smpt entry */
+#define SCIF_PROXY_DMA		56 /* Proxies DMA read requests to peer for performance */
+#define SCIF_PROXY_ORDERED_DMA	57 /* Proxies DMA read requests to peer for performance */
+#define SCIF_NODE_CONNECT	58 /* Setup a p2p connection b/w two nodes */
+#define SCIF_NODE_CONNECT_NACK	59 /* p2p connection is not successful */
+#define SCIF_NODE_ADD_NACK	60 /* SCIF_NODE_ADD failed report to the waiting thread(s) */
+#define SCIF_GET_NODE_INFO	61 /* Get current node mask from the host*/
+#define SCIF_TEST		62 /* Test value Used for test only */
+#define SCIF_MAX_MSG		SCIF_TEST
+
+
+/*
+ * The *only* reason we need 2 uint64_t for payload
+ * right now is because the SCIF_CNCT_GNT message needs
+ * to send across both the QP offset and the QP id.
+ *
+ * Now we have to increase this to 3 uint64_t because
+ * the Alloc message requires the remote EP, allocation size
+ * and the allocation handle.
+ *
+ * Increased to 4 uint64_t because SCIF_FENCE requires
+ * ep, offset, len and the waitqueue pointer to wake up.
+ */
+struct nodemsg {
+	struct scif_portID src;
+	struct scif_portID dst;
+	uint32_t uop;
+	uint64_t payload[4];
+} __attribute__ ((packed));
+
+
+/*
+ * Generic state used for certain node QP message exchanges
+ * like Unregister, Alloc etc.
+ */
+enum micscif_msg_state {
+	OP_IDLE = 1,
+	OP_IN_PROGRESS,
+	OP_COMPLETED,
+	OP_FAILED
+};
+
+/*
+ * Generic structure used for exchanging ALLOC_REQ/GNT messages.
+ */
+struct allocmsg {
+	dma_addr_t	phys_addr;
+	void			*vaddr;
+	uint32_t		uop;
+	size_t			size;
+	enum micscif_msg_state	state;
+	wait_queue_head_t	allocwq;
+};
+
+/* Interesting structure -- a little difficult because we can only
+ * write across the PCIe, so any r/w pointer we need to read is
+ * local.  We only need to read the read pointer on the inbound_q
+ * and read the write pointer in the outbound_q
+ */
+struct micscif_qp {
+	uint64_t 		ep;
+	uint64_t 		magic;
+	uint64_t		blast;
+#define SCIFEP_MAGIC    0x5c1f000000005c1f
+	struct micscif_rb 	outbound_q;
+	struct micscif_rb 	inbound_q;
+	/* FIXME cache align local_write/read */
+	uint32_t 		local_write; /* For local inbound */
+	uint32_t 		local_read;  /* For local outbound */
+	volatile struct micscif_qp *remote_qp;
+	dma_addr_t 	local_buf;  /* Local BS */
+	dma_addr_t 	local_qp;
+	dma_addr_t 	remote_buf; /* Remote BS */
+	volatile uint32_t	qp_state;
+#define QP_OFFLINE	0xdead
+#define QP_ONLINE	0xc0de
+	uint16_t		scif_version;
+	spinlock_t 		qp_send_lock;
+	spinlock_t 		qp_recv_lock;
+};
+
+/*
+ * An element in the loopback Node QP message list.
+ */
+struct loopb_msg {
+	struct nodemsg		msg;
+	struct list_head	list_member;
+};
+
+struct micscif_qp *micscif_nodeqp_nextmsg(struct micscif_dev *scifdev);
+int micscif_nodeqp_send(struct micscif_dev *scifdev, struct nodemsg *msg, struct endpt *ep);
+int micscif_nodeqp_intrhandler(struct micscif_dev *scifdev, struct micscif_qp *qp);
+int micscif_loopb_msg_handler(struct micscif_dev *scifdev, struct micscif_qp *qp);
+
+// Card side only functions
+int micscif_setup_card_qp(phys_addr_t host_phys, struct micscif_dev *dev);
+
+int micscif_setuphost_response(struct micscif_dev *scifdev, uint64_t payload);
+int micscif_setup_qp_connect(struct micscif_qp *qp, dma_addr_t *qp_offset, int local_size, struct micscif_dev *scifdev);
+int micscif_setup_qp_accept(struct micscif_qp *qp, dma_addr_t *qp_offset, dma_addr_t phys, int local_size, struct micscif_dev *scifdev);
+int micscif_setup_qp_connect_response(struct micscif_dev *scifdev, struct micscif_qp *qp, uint64_t payload);
+int micscif_setup_loopback_qp(struct micscif_dev *scifdev);
+int micscif_destroy_loopback_qp(struct micscif_dev *scifdev);
+void micscif_teardown_ep(void *endpt);
+void micscif_add_epd_to_zombie_list(struct endpt *ep, bool mi_eplock_held);
+
+#endif  /* MICSCIF_NODEQP */
diff --git a/include/mic/micscif_rb.h b/include/mic/micscif_rb.h
new file mode 100644
index 0000000..20a5fe7
--- /dev/null
+++ b/include/mic/micscif_rb.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef _SCIF_RING_BUFFER_DEFINE
+#define _SCIF_RING_BUFFER_DEFINE
+
+/*
+ * This describes a general purpose, byte based
+ * ring buffer.  It handles multiple readers or
+ * writers using a lock -- it is lockless between
+ * producer and consumer (so it can handle being
+ * used across the PCIe bus).
+ */
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+
+/**
+ * This version is used to ensure component compatibility between the host and
+ * card driver modules that use the ring buffer functions. This version should
+ * be incremented whenever there is a change to the ring buffer module that
+ * affects the functionality of the ring buffer.
+ */
+#define RING_BUFFER_VERSION	1
+
+/* Two of these actually form a single queue -- one on each side of the PCIe
+ * bus
+ *
+ * NOTE!  This only works if the queue (pointed to at rb_base) exists in the
+ * consumer's memory.  The code does not do any wbinvd after writing to the
+ * buffer, which assumes that the memory is not cached on the writers side.
+ *
+ * If this structure were to be used across the PCIe bus with the buffer
+ * living on the other side of the bus, it wouldn't work (would require a
+ * wbinvd or use the linux dma streaming buffer API)
+ */
+struct micscif_rb {
+	volatile void *rb_base;
+	volatile uint32_t *read_ptr;	/* Points to the read offset */
+	volatile uint32_t *write_ptr;	/* Points to the write offset */
+	uint32_t size;
+	uint32_t current_read_offset;	/* cache it to improve performance */
+	uint32_t current_write_offset;	/* cache it to improve performance */
+	uint32_t old_current_read_offset;
+	uint32_t old_current_write_offset;
+};
+
+/**
+ * methods used by both
+ */
+void micscif_rb_init(struct micscif_rb *rb, volatile uint32_t *read_ptr,
+		     volatile uint32_t *write_ptr, volatile void *rb_base,
+		     const uint32_t size);
+
+/**
+ * writer-only methods
+ */
+/*
+ * write a new command, then micscif_rb_commit()
+ */
+int micscif_rb_write(struct micscif_rb *rb, void *msg, uint32_t size);
+/*
+ * After write(), then micscif_rb_commit()
+ */
+void micscif_rb_commit(struct micscif_rb *rb);
+/*
+ * used on power state change to reset cached pointers
+ */
+void micscif_rb_reset(struct micscif_rb *rb);
+
+/*
+ * Query space available for writing to a RB.
+ */
+int micscif_rb_space(struct micscif_rb *rb);
+/**
+ * reader-only methods
+ */
+/*
+ * uses (updates) the cached read pointer to get the next command,
+ * so writer doesnt see the command as consumed.
+ *
+ * Returns number of bytes read
+ *
+ * Size is IN -- the caller passes in a size (the max size that
+ * the function will read out)
+ *
+ * msg is OUT, but the caller is responsible for allocating space to
+ * read into.  The max size this function will read is what is passed
+ * into by size, so the buffer pointer to by msg MUST be at least size
+ * bytes long.
+ */
+int micscif_rb_get_next (struct micscif_rb *rb, void *msg, uint32_t size);
+
+/*
+ * updates the control block read pointer,
+ * which will be visible to the writer so it can re-use the space
+ */
+void micscif_rb_update_read_ptr(struct micscif_rb *rb);
+
+/*
+ * Count the number of empty slots in the RB
+ */
+uint32_t micscif_rb_count(struct micscif_rb *rb, uint32_t size);
+
+/**
+ *  Return the ring buffer module version.
+ */
+uint16_t micscif_rb_get_version(void);
+#endif
diff --git a/include/mic/micscif_rma.h b/include/mic/micscif_rma.h
new file mode 100644
index 0000000..275e086
--- /dev/null
+++ b/include/mic/micscif_rma.h
@@ -0,0 +1,960 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICSCIF_RMA_H
+#define MICSCIF_RMA_H
+
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <linux/huge_mm.h>
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#include <linux/hugetlb.h>
+#endif
+#endif
+#include "scif.h"
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+#include "mic/micscif_kmem_cache.h"
+
+struct rma_mmu_notifier {
+#ifdef CONFIG_MMU_NOTIFIER
+	struct mmu_notifier	ep_mmu_notifier;
+#endif
+	bool			ep_mn_registered;
+	/* List of temp registration windows for self */
+	struct list_head	tc_reg_list;
+	struct mm_struct	*mm;
+	struct endpt		*ep;
+	struct list_head	list_member;
+};
+
+/* Per Endpoint Remote Memory Access Information */
+struct endpt_rma_info {
+	/* List of registration windows for self */
+	struct list_head	reg_list;
+	/* List of registration windows for peer */
+	struct list_head	remote_reg_list;
+	/* Offset generator */
+	struct va_gen_addr	va_gen;
+	/*
+	 * Synchronizes access to self/remote list and also
+	 * protects the window from being destroyed while
+	 * RMAs are in progress.
+	 */
+	struct mutex		rma_lock;
+	/*
+	 * Synchronizes access to temporary cached windows list
+	 * for SCIF Registration Caching.
+	 */
+	spinlock_t		tc_lock;
+	/*
+	 * Synchronizes access to the list of MMU notifiers
+	 * registered for this SCIF endpoint.
+	 */
+	struct mutex		mmn_lock;
+	/*
+	 * Synchronizes access to the SCIF registered address space
+	 * offset generator.
+	 */
+	struct mutex		va_lock;
+	/*
+	 * Keeps track of number of outstanding temporary registered
+	 * windows created by scif_vreadfrom/scif_vwriteto which have
+	 * not been destroyed. tcw refers to the number of temporary
+	 * cached windows and total number of pages pinned.
+	 */
+	atomic_t		tw_refcount;
+	atomic_t		tw_total_pages;
+	atomic_t		tcw_refcount;
+	atomic_t		tcw_total_pages;
+	/*
+	 * MMU notifier so that we can destroy the windows when there is
+	 * a change
+	 */
+	struct list_head		mmn_list;
+	/*
+	 * Keeps track of number of outstanding remote fence requests
+	 * which have been received by the peer.
+	 */
+	int			fence_refcount;
+	/*
+	 * The close routine blocks on this wait queue to ensure that all
+	 * remote fence requests have been serviced.
+	 */
+	wait_queue_head_t	fence_wq;
+	/*
+	 * DMA channel used for all DMA transfers for this endpoint.
+	 */
+	struct dma_channel	*dma_chan;
+	/* Detect asynchronous list entry deletion */
+	int			async_list_del;
+#ifdef _MIC_SCIF_
+	/* Local P2P proxy DMA virtual address for SUD updates by peer */
+	void			*proxy_dma_va;
+	/* Local P2P proxy DMA physical address location for SUD updates */
+	dma_addr_t	proxy_dma_phys;
+	/* Remote P2P proxy DMA physical address location for SUD updates */
+	dma_addr_t	proxy_dma_peer_phys;
+#endif
+	/* List of tasks which have remote memory mappings */
+	struct list_head	task_list;
+};
+
+/* Information used for tracking remote fence requests */
+struct fence_info {
+	/* State of this transfer */
+	enum micscif_msg_state	state;
+
+	/* Fences wait on this queue */
+	wait_queue_head_t	wq;
+
+	/* Used for storing the DMA mark */
+	int			dma_mark;
+};
+
+/* Per remote fence wait request */
+struct remote_fence_info {
+	/* The SCIF_WAIT message */
+	struct nodemsg msg;
+
+	struct list_head list_member;
+};
+
+/* Self or Peer window */
+enum rma_window_type {
+	RMA_WINDOW_SELF = 0x1,
+	RMA_WINDOW_PEER
+};
+
+/* The number of physical addresses that can be stored in a PAGE. */
+#define NR_PHYS_ADDR_IN_PAGE	(PAGE_SIZE >> 3)
+
+/*
+ * Store an array of lookup offsets. Each offset in this array maps
+ * one 4K page containing 512 physical addresses i.e. 2MB. 512 such
+ * offsets in a 4K page will correspond to 1GB of registered address space.
+ */
+struct rma_lookup {
+	/* Array of offsets */
+	dma_addr_t	*lookup;
+	/* Offset used to map lookup array */
+	dma_addr_t	offset;
+};
+
+
+/*
+ * A set of pinned pages obtained with scif_pin_pages() which could be part
+ * of multiple registered windows across different end points.
+ */
+struct scif_pinned_pages {
+	int64_t			nr_pages;
+	int			prot;
+	int			map_flags;
+	atomic_t		ref_count;
+	uint64_t		magic;
+	/*
+	 * Array of pointers to struct pages populated
+	 * with get_user_pages(..)
+	 */
+	struct page		**pages;
+	int			*num_pages;
+	int64_t			nr_contig_chunks;
+	/* Only for Hosts without THP but with Huge TLB FS Like SuSe11 SP1 */
+	struct vm_area_struct         **vma;
+};
+
+/*
+ * Information about a particular task which has remote memory mappings
+ * created via scif_mmap(..).
+ */
+struct rma_task_info {
+	/*
+	 * Stores the pid struct of the grp_leader task structure which
+	 * scif_mmap(..)'d the remote window.
+	 */
+	struct pid		*pid;
+	int			ref_count;
+	struct list_head       list_member;
+};
+
+/* Registration Window for Self */
+struct reg_range_t {
+	int64_t			nr_pages;
+	/* Number of contiguous physical chunks */
+	int64_t			nr_contig_chunks;
+	int			prot;
+	int			ref_count;
+	/* Cookie to detect corruption */
+	uint64_t		magic;
+	uint64_t		offset;
+	/* va address that this window represents
+	 * Useful for only for temp windows*/
+	void			*va_for_temp;
+	/* Used for temporary windows*/
+	int			dma_mark;
+	/*
+	 * Pointer to EP. Useful for passing EP around
+	 * with messages to avoid expensive list
+	 * traversals.
+	 */
+	uint64_t		ep;
+
+	struct list_head	list_member;
+
+	enum rma_window_type	type;
+
+	/*
+	 * Pointer to peer window. Useful for sending
+	 * messages to peer without requiring an
+	 * extra list traversal
+	 */
+	uint64_t		peer_window;
+
+	/* Unregistration state */
+	enum micscif_msg_state	unreg_state;
+
+	/*
+	 * True for temporary windows created via
+	 * scif_vreadfrom/scif_vwriteto.
+	 */
+	bool			temp;
+
+	bool			offset_freed;
+
+	/* Local P2P proxy DMA physical address location for SUD updates */
+	dma_addr_t	proxy_dma_phys;
+
+	union {
+		/* Self RAS */
+		struct {
+			/* The set of pinned_pages backing this window */
+			struct scif_pinned_pages *pinned_pages;
+
+			/* Handle for sending ALLOC_REQ */
+			struct allocmsg		alloc_handle;
+
+			/* Wait Queue for an registration (N)ACK */
+			wait_queue_head_t	regwq;
+
+			/* Registration state */
+			enum micscif_msg_state	reg_state;
+
+			/* Wait Queue for an unregistration (N)ACK */
+			wait_queue_head_t	unregwq;
+		};
+		/* Peer RAS specific window elements */
+		struct {
+#ifdef CONFIG_ML1OM
+			/* Lookup for physical addresses used for mmap */
+			struct rma_lookup	phys_addr_lookup;
+
+			/* Lookup for temp physical addresses used for mmap */
+			struct rma_lookup	temp_phys_addr_lookup;
+
+			/* Mmap state */
+			enum micscif_msg_state	gttmap_state;
+
+			/* Wait Queue for an unregistration (N)ACK */
+			wait_queue_head_t	gttmapwq;
+
+			/* Ref count per page */
+			int			*page_ref_count;
+#endif
+			/* Lookup for physical addresses used for DMA */
+			struct rma_lookup	dma_addr_lookup;
+
+			/* Number of entries in lookup */
+			int			nr_lookup;
+
+			/* Offset used to map the window by the peer */
+			dma_addr_t	mapped_offset;
+
+			/* Ref count for tracking scif_get_pages */
+			int			get_put_ref_count;
+		};
+	};
+#ifdef CONFIG_ML1OM
+	/* Array of physical addresses used for creating VtoP mappings */
+	/* FIXME: these are phys_addr as seen by the peer node, node at the
+	 * opposite end of the endpt
+	 */
+	dma_addr_t		*phys_addr;
+
+	/* Temporary array for storing physical addresses for performance */
+	dma_addr_t		*temp_phys_addr;
+#endif
+
+	/* Array of physical addresses used for Host & MIC initiated DMA */
+	dma_addr_t		*dma_addr;
+
+	/* Array specifying number of pages for each physical address */
+	int				*num_pages;
+	struct mm_struct		*mm;
+} __attribute__ ((packed));
+
+
+#define RMA_MAGIC(x) BUG_ON(x->magic != SCIFEP_MAGIC)
+
+/* If this bit is set then the mark is a remote fence mark */
+#define SCIF_REMOTE_FENCE_BIT		30
+/* Magic value used to indicate a remote fence request */
+#define SCIF_REMOTE_FENCE (1ULL << SCIF_REMOTE_FENCE_BIT)
+
+enum rma_direction {
+	LOCAL_TO_REMOTE,
+	REMOTE_TO_LOCAL
+};
+
+/* Initialize RMA for this EP */
+int micscif_rma_ep_init(struct endpt *ep);
+
+/* Check if epd can be uninitialized */
+int micscif_rma_ep_can_uninit(struct endpt *ep);
+
+/* Obtain a new offset. Callee must grab RMA lock */
+int micscif_get_window_offset(struct endpt *ep, int flags,
+			      uint64_t offset, size_t len, uint64_t *out_offset);
+
+/* Free offset. Callee must grab RMA lock */
+void micscif_free_window_offset(struct endpt *ep,
+				uint64_t offset, size_t len);
+
+/* Create self registration window */
+struct reg_range_t *micscif_create_window(struct endpt *ep,
+	int64_t nr_pages, uint64_t offset, bool temp);
+
+/* Create a set of pinned pages */
+struct scif_pinned_pages *micscif_create_pinned_pages(int nr_pages, int prot);
+
+/* Destroy a set of pinned pages */
+int micscif_destroy_pinned_pages(struct scif_pinned_pages *pages);
+
+/* Destroy self registration window.*/
+int micscif_destroy_window(struct endpt *ep, struct reg_range_t *window);
+
+int micscif_destroy_incomplete_window(struct endpt *ep, struct reg_range_t *window);
+
+/* Map pages of self window to Aperture/PCI */
+int micscif_map_window_pages(struct endpt *ep, struct reg_range_t *window, bool temp);
+
+/* Unregister a self window */
+int micscif_unregister_window(struct reg_range_t *window);
+
+/* Create remote registration window */
+struct reg_range_t *micscif_create_remote_window(struct endpt *ep, int nr_pages);
+
+/* Destroy remote registration window */
+void micscif_destroy_remote_window(struct endpt *ep, struct reg_range_t *window);
+
+int micscif_send_alloc_request(struct endpt *ep, struct reg_range_t *window);
+
+/* Prepare a remote registration window */
+int micscif_prep_remote_window(struct endpt *ep, struct reg_range_t *window);
+
+/* Create remote lookup entries for physical addresses */
+int micscif_create_remote_lookup(struct endpt *ep, struct reg_range_t *window);
+
+/* Destroy remote lookup entries for physical addresses */
+void micscif_destroy_remote_lookup(struct endpt *ep, struct reg_range_t *window);
+
+/* Send a SCIF_REGISTER message and wait for an ACK */
+int micscif_send_scif_register(struct endpt *ep, struct reg_range_t *window);
+
+/* Send a SCIF_UNREGISTER message */
+int micscif_send_scif_unregister(struct endpt *ep, struct reg_range_t *window);
+
+/* RMA copy API */
+int micscif_rma_copy(scif_epd_t epd, off_t loffset, void *addr, size_t len,
+	off_t roffset, int flags, enum rma_direction dir, bool last_chunk);
+
+/* Sends a remote fence mark request */
+int micscif_send_fence_mark(scif_epd_t epd, int *out_mark);
+
+/* Sends a remote fence wait request */
+int micscif_send_fence_wait(scif_epd_t epd, int mark);
+
+/* Sends a remote fence signal request */
+int micscif_send_fence_signal(scif_epd_t epd, off_t roff, uint64_t rval,
+		off_t loff, uint64_t lval, int flags);
+
+/* Setup a DMA mark for an endpoint */
+int micscif_fence_mark(scif_epd_t epd);
+
+void ep_unregister_mmu_notifier(struct endpt *ep);
+#ifdef CONFIG_MMU_NOTIFIER
+void micscif_mmu_notif_handler(struct work_struct *work);
+#endif
+
+void micscif_rma_destroy_temp_windows(void);
+void micscif_rma_destroy_tcw_ep(struct endpt *ep);
+void micscif_rma_destroy_tcw_invalid(struct list_head *list);
+
+void micscif_rma_handle_remote_fences(void);
+
+/* Reserve a DMA channel for a particular endpoint */
+int micscif_reserve_dma_chan(struct endpt *ep);
+
+/* Program DMA SUD's after verifying the registered offset */
+int micscif_prog_signal(scif_epd_t epd, off_t offset, uint64_t val,
+                enum rma_window_type type);
+
+/* Kill any applications which have valid remote memory mappings */
+void micscif_kill_apps_with_mmaps(int node);
+
+/* Query if any applications have remote memory mappings */
+bool micscif_rma_do_apps_have_mmaps(int node);
+
+/* Get a reference to the current task which is creating a remote memory mapping */
+int micscif_rma_get_task(struct endpt *ep, int nr_pages);
+
+/* Release a reference to the current task which is destroying a remote memory mapping */
+void micscif_rma_put_task(struct endpt *ep, int nr_pages);
+
+/* Cleanup remote registration lists for zombie endpoints */
+void micscif_cleanup_rma_for_zombies(int node);
+
+#ifdef _MIC_SCIF_
+void micscif_teardown_proxy_dma(struct endpt *ep);
+#endif
+
+static __always_inline
+bool is_unaligned(off_t src_offset, off_t dst_offset)
+{
+        src_offset = src_offset & (L1_CACHE_BYTES - 1);
+        dst_offset = dst_offset & (L1_CACHE_BYTES - 1);
+        if (src_offset == dst_offset)
+                return false;
+        else
+                return true;
+}
+
+static __always_inline
+int __scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
+		off_t roffset, int flags)
+{
+	int err;
+
+	pr_debug("SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx"
+		" offset 0x%lx flags 0x%x\n", 
+		epd, loffset, len, roffset, flags);
+
+	if (is_unaligned(loffset, roffset)) {
+		while(len > MAX_UNALIGNED_BUF_SIZE) {
+			err = micscif_rma_copy(epd, loffset, NULL,
+				MAX_UNALIGNED_BUF_SIZE,
+				roffset, flags, REMOTE_TO_LOCAL, false);
+			if (err)
+				goto readfrom_err;
+			loffset += MAX_UNALIGNED_BUF_SIZE;
+			roffset += MAX_UNALIGNED_BUF_SIZE;
+			len -=MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = micscif_rma_copy(epd, loffset, NULL, len,
+		roffset, flags, REMOTE_TO_LOCAL, true);
+readfrom_err:
+	return err;
+}
+
+static __always_inline
+int __scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
+				off_t roffset, int flags)
+{
+	int err;
+
+	pr_debug("SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx"
+		" roffset 0x%lx flags 0x%x\n", 
+		epd, loffset, len, roffset, flags);
+
+	if (is_unaligned(loffset, roffset)) {
+		while(len > MAX_UNALIGNED_BUF_SIZE) {
+			err = micscif_rma_copy(epd, loffset, NULL,
+				MAX_UNALIGNED_BUF_SIZE,
+				roffset, flags, LOCAL_TO_REMOTE, false);
+			if (err)
+				goto writeto_err;
+			loffset += MAX_UNALIGNED_BUF_SIZE;
+			roffset += MAX_UNALIGNED_BUF_SIZE;
+			len -= MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = micscif_rma_copy(epd, loffset, NULL, len,
+		roffset, flags, LOCAL_TO_REMOTE, true);
+writeto_err:
+	return err;
+}
+
+static __always_inline
+int __scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, int flags)
+{
+	int err;
+
+	pr_debug("SCIFAPI vreadfrom: ep %p addr %p len 0x%lx"
+		" roffset 0x%lx flags 0x%x\n", 
+		epd, addr, len, roffset, flags);
+
+	if (is_unaligned((off_t)addr, roffset)) {
+		if (len > MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while(len > MAX_UNALIGNED_BUF_SIZE) {
+			err = micscif_rma_copy(epd, 0, addr,
+				MAX_UNALIGNED_BUF_SIZE,
+				roffset, flags, REMOTE_TO_LOCAL, false);
+			if (err)
+				goto vreadfrom_err;
+			addr = (void *)((uint64_t)addr + MAX_UNALIGNED_BUF_SIZE);
+			roffset += MAX_UNALIGNED_BUF_SIZE;
+			len -= MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = micscif_rma_copy(epd, 0, addr, len,
+		roffset, flags, REMOTE_TO_LOCAL, true);
+vreadfrom_err:
+	return err;
+}
+
+static __always_inline
+int __scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, int flags)
+{
+	int err;
+
+	pr_debug("SCIFAPI vwriteto: ep %p addr %p len 0x%lx"
+		" roffset 0x%lx flags 0x%x\n", 
+		epd, addr, len, roffset, flags);
+
+	if (is_unaligned((off_t)addr, roffset)) {
+		if (len > MAX_UNALIGNED_BUF_SIZE)
+			flags &= ~SCIF_RMA_USECACHE;
+
+		while(len > MAX_UNALIGNED_BUF_SIZE) {
+			err = micscif_rma_copy(epd, 0, addr,
+				MAX_UNALIGNED_BUF_SIZE,
+				roffset, flags, LOCAL_TO_REMOTE, false);
+			if (err)
+				goto vwriteto_err;
+			addr = (void *)((uint64_t)addr + MAX_UNALIGNED_BUF_SIZE);
+			roffset += MAX_UNALIGNED_BUF_SIZE;
+			len -= MAX_UNALIGNED_BUF_SIZE;
+		}
+	}
+	err = micscif_rma_copy(epd, 0, addr, len,
+		roffset, flags, LOCAL_TO_REMOTE, true);
+vwriteto_err:
+	return err;
+}
+
+void micscif_rma_completion_cb(uint64_t data);
+
+int micscif_pci_dev(uint16_t node, struct pci_dev **pdev);
+#ifndef _MIC_SCIF_
+int micscif_pci_info(uint16_t node, struct scif_pci_info *dev);
+#endif
+
+/*
+ * nr_pages in a 2MB page is specified via the top 12 bits in the
+ * physical address.
+ */
+
+/* Check all parenthesis in these macros. See if putting in bottom makes sense? */
+#define RMA_HUGE_NR_PAGE_SHIFT ((52))
+#define RMA_HUGE_NR_PAGE_MASK (((0xFFFULL) << RMA_HUGE_NR_PAGE_SHIFT))
+#define RMA_GET_NR_PAGES(addr) ((addr) >> RMA_HUGE_NR_PAGE_SHIFT)
+#define RMA_SET_NR_PAGES(addr, nr_pages) ((addr) = (((nr_pages) & 0xFFFULL) << RMA_HUGE_NR_PAGE_SHIFT) | ((uint64_t)(addr)))
+#define RMA_GET_ADDR(addr) ((addr) & ~(RMA_HUGE_NR_PAGE_MASK))
+
+extern bool mic_huge_page_enable;
+
+#define SCIF_HUGE_PAGE_SHIFT	21
+
+/*
+ * micscif_is_huge_page:
+ * @page: A physical page.
+ */
+static __always_inline int
+micscif_is_huge_page(struct scif_pinned_pages *pinned_pages, int index)
+{
+	int huge = 0;
+	struct page *page = pinned_pages->pages[index];
+
+	if (compound_order(page) + PAGE_SHIFT == SCIF_HUGE_PAGE_SHIFT)
+		huge = 1;
+	if (huge)
+		ms_info.nr_2mb_pages++;
+	if (!mic_huge_page_enable)
+		huge = 0;
+#ifdef RMA_DEBUG
+	WARN_ON(!page_count(page));
+	WARN_ON(page_mapcount(page) < 0);
+#endif
+	return huge;
+}
+
+/*
+ * micscif_detect_large_page:
+ * @pinned_pages: A set of pinned pages.
+ */
+static __always_inline int
+micscif_detect_large_page(struct scif_pinned_pages *pinned_pages, char *addr)
+{
+	int i = 0, nr_pages, huge;
+	char *next_huge, *end;
+	char *end_addr = addr + (pinned_pages->nr_pages << PAGE_SHIFT);
+
+	while (addr < end_addr) {
+		huge = micscif_is_huge_page(pinned_pages, i);
+		if (huge) {
+			next_huge = (char *)ALIGN(
+				(unsigned long)(addr + 1), 
+				PMD_SIZE);
+			end = next_huge > end_addr ? end_addr : next_huge;
+			nr_pages = (int)((end - addr) >> PAGE_SHIFT);
+			pinned_pages->num_pages[i] = (int)nr_pages;   
+			addr = end;
+			i += (int)nr_pages;   
+						
+		} else {
+			pinned_pages->num_pages[i] = 1;
+			i++;
+			addr += PAGE_SIZE;
+			ms_info.nr_4k_pages++;
+		}
+		pinned_pages->nr_contig_chunks++;
+	}
+	return 0;
+}
+
+/**
+ * micscif_set_nr_pages:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Set nr_pages in every entry of physical address/dma address array
+ * and also remove nr_pages information from physical addresses.
+ */
+static __always_inline void
+micscif_set_nr_pages(struct micscif_dev *dev, struct reg_range_t *window)
+{
+	int j;
+#ifdef CONFIG_ML1OM
+	int l = 0, k;
+#endif
+
+	for (j = 0; j < window->nr_contig_chunks; j++) {
+		window->num_pages[j] = RMA_GET_NR_PAGES(window->dma_addr[j]);
+		if (window->num_pages[j])
+			window->dma_addr[j] = RMA_GET_ADDR(window->dma_addr[j]);
+		else
+			break;
+#ifdef CONFIG_ML1OM
+		for (k = 0; k < window->num_pages[j]; k++)
+			if (window->temp_phys_addr[j])
+				window->phys_addr[l + k] =
+					RMA_GET_ADDR(window->temp_phys_addr[j]) + (k << PAGE_SHIFT);
+		l += window->num_pages[j];
+#endif
+	}
+}
+
+#ifdef CONFIG_ML1OM
+/*
+ * micscif_get_phys_addr:
+ * Obtain the phys_addr given the window and the offset.
+ * @window: Registered window.
+ * @off: Window offset.
+ */
+static __always_inline dma_addr_t
+micscif_get_phys_addr(struct reg_range_t *window, uint64_t off)
+{
+	int page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+	return window->phys_addr[page_nr] | page_off;
+}
+#endif
+
+#define RMA_ERROR_CODE (~(dma_addr_t)0x0)
+
+/*
+ * micscif_get_dma_addr:
+ * Obtain the dma_addr given the window and the offset.
+ * @window: Registered window.
+ * @off: Window offset.
+ * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
+ * @index: Return the index of the dma_addr array found.
+ * @start_off: start offset of index of the dma addr array found.
+ * The nr_bytes provides the callee an estimate of the maximum possible
+ * DMA xfer possible while the index/start_off provide faster lookups
+ * for the next iteration.
+ */
+static __always_inline dma_addr_t
+micscif_get_dma_addr(struct reg_range_t *window, uint64_t off, size_t *nr_bytes, int *index, uint64_t *start_off)
+{
+	if (window->nr_pages == window->nr_contig_chunks) {
+		int page_nr = (int)((off - window->offset) >> PAGE_SHIFT);
+		off_t page_off = off & ~PAGE_MASK;
+		if (nr_bytes)
+			*nr_bytes = PAGE_SIZE - page_off;
+        if (page_nr >= window->nr_pages) {
+            printk(KERN_ERR "%s dma_addr out of boundary\n", __FUNCTION__);
+        }
+		return window->dma_addr[page_nr] | page_off;
+	} else {
+		int i = index ? *index : 0;
+		uint64_t end;
+		uint64_t start = start_off ? *start_off : window->offset;
+		for (; i < window->nr_contig_chunks; i++) {
+			end = start + (window->num_pages[i] << PAGE_SHIFT);
+			if (off >= start && off < end) {
+				if (index)
+					*index = i;
+				if (start_off)
+					*start_off = start;
+				if (nr_bytes)
+					*nr_bytes = end - off;
+				return (window->dma_addr[i] + (off - start));
+			}
+			start += (window->num_pages[i] << PAGE_SHIFT);
+		}
+	}
+#ifdef CONFIG_MK1OM
+	printk(KERN_ERR "%s %d BUG. Addr not found? window %p off 0x%llx\n", __func__, __LINE__, window, off);
+	BUG_ON(1);
+#endif
+	return RMA_ERROR_CODE;
+}
+
+/*
+ * scif_memset:
+ * @va: kernel virtual address
+ * @c: The byte used to fill the memory
+ * @size: Buffer size
+ *
+ * Helper API which fills size bytes of memory pointed to by va with the
+ * constant byte c. This API fills the memory in chunks of 4GB - 1 bytes
+ * for a single invocation of memset(..) to work around a kernel bug in
+ * x86_64 @ https://bugzilla.kernel.org/show_bug.cgi?id=27732
+ * where memset(..) does not do "ANY" work for size >= 4GB.
+ * This kernel bug has been fixed upstream in v3.2 via the commit
+ * titled "x86-64: Fix memset() to support sizes of 4Gb and above"
+ * but has not been backported to distributions like RHEL 6.3 yet.
+ */
+static __always_inline void scif_memset(char *va, int c, size_t size)
+{
+	size_t loop_size;
+	const size_t four_gb = 4 * 1024 * 1024 * 1024ULL;
+
+	while (size) {
+		loop_size = min(size, four_gb - 1);
+		memset(va, c, loop_size);
+		size -= loop_size;
+		va += loop_size;
+	}
+}
+
+/*
+ * scif_zalloc:
+ * @size: Size of the allocation request.
+ *
+ * Helper API which attempts to allocate zeroed pages via
+ * __get_free_pages(..) first and then falls back on
+ * vmalloc(..) if that fails. This is required because
+ * vmalloc(..) is *slow*.
+ */
+static __always_inline void *scif_zalloc(size_t size)
+{
+	void *ret;
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (!align)
+		return NULL;
+
+	if (align <= (1 << (MAX_ORDER + PAGE_SHIFT - 1)))
+		if ((ret = (void*)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+						get_order(align))))
+			goto done;
+	if (!(ret = vmalloc(align)))
+		return NULL;
+
+	/* TODO: Use vzalloc once kernel supports it */
+	scif_memset(ret, 0, size);
+done:
+#ifdef RMA_DEBUG
+	atomic_long_add_return(align, &ms_info.rma_alloc_cnt);
+#endif
+	return ret;
+}
+
+/*
+ * scif_free:
+ * @addr: Address to be freed.
+ * @size: Size of the allocation.
+ * Helper API which frees memory allocated via scif_zalloc().
+ */
+static __always_inline void scif_free(void *addr, size_t size)
+{
+	size_t align = ALIGN(size, PAGE_SIZE);
+
+	if (unlikely(is_vmalloc_addr(addr)))
+		vfree(addr);
+	else {
+		free_pages((unsigned long)addr, get_order(align));
+	}
+#ifdef RMA_DEBUG
+	WARN_ON(atomic_long_sub_return(align, &ms_info.rma_alloc_cnt) < 0);
+#endif
+}
+
+static __always_inline void
+get_window_ref_count(struct reg_range_t *window, int64_t nr_pages)
+{
+	window->ref_count += (int)nr_pages;
+}
+
+static __always_inline void
+put_window_ref_count(struct reg_range_t *window, int64_t nr_pages)
+{
+	window->ref_count -= (int)nr_pages; 
+	BUG_ON(window->nr_pages < 0);
+}
+
+static __always_inline void
+set_window_ref_count(struct reg_range_t *window, int64_t nr_pages)
+{
+    window->ref_count = (int)nr_pages; 
+}
+
+/* Debug API's */
+void micscif_display_window(struct reg_range_t *window, const char *s, int line);
+static inline struct mm_struct *__scif_acquire_mm(void)
+{
+	if (mic_ulimit_check) {
+#ifdef RMA_DEBUG
+		atomic_long_add_return(1, &ms_info.rma_mm_cnt);
+#endif
+		return get_task_mm(current);
+	}
+	return NULL;
+}
+
+static inline void __scif_release_mm(struct mm_struct *mm)
+{
+	if (mic_ulimit_check && mm) {
+#ifdef RMA_DEBUG
+		WARN_ON(atomic_long_sub_return(1, &ms_info.rma_mm_cnt) < 0);
+#endif
+		mmput(mm);
+	}
+}
+
+static inline int __scif_dec_pinned_vm_lock(struct mm_struct *mm,
+					int64_t nr_pages, bool try_lock)
+{
+	if (mm && nr_pages && mic_ulimit_check) {
+		if (try_lock) {
+			if (!down_write_trylock(&mm->mmap_sem)) {
+				return -1;
+			}
+		} else {
+			down_write(&mm->mmap_sem);
+		}
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0))
+		mm->pinned_vm -= nr_pages;
+#else
+		mm->locked_vm -= nr_pages;
+#endif
+		up_write(&mm->mmap_sem);
+	}
+	return 0;
+}
+
+static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
+					     int64_t nr_pages)
+{
+	if (mm && mic_ulimit_check && nr_pages) {
+		unsigned long locked, lock_limit;
+		locked = nr_pages;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0))
+		locked += mm->pinned_vm;
+#else
+		locked += mm->locked_vm;
+#endif
+		lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+		if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+			pr_debug("locked(%lu) > lock_limit(%lu)\n", 
+				    locked, lock_limit);
+			return -ENOMEM;
+		} else {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 1, 0))
+			mm->pinned_vm = locked;
+#else
+			mm->locked_vm = locked;
+#endif
+		}
+	}
+	return 0;
+}
+#endif
diff --git a/include/mic/micscif_rma_list.h b/include/mic/micscif_rma_list.h
new file mode 100644
index 0000000..c7f25ed
--- /dev/null
+++ b/include/mic/micscif_rma_list.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICSCIF_RMA_LIST_H
+#define MICSCIF_RMA_LIST_H
+
+/*
+ * RMA Linked List Manipulation API's.
+ * Callee must Hold RMA lock to call the API's below.
+ * When and if RMA uses RB trees for log(n) search,
+ * similar API's should be implemented.
+ */
+
+/*
+ * Specifies whether an RMA operation can span
+ * across partial windows, a single window or multiple
+ * contiguous windows.
+ * Mmaps can span across parial windows.
+ * Unregistration can span across complete windows.
+ * scif_get_pages() can span a single window.
+ */
+enum range_request {
+	WINDOW_PARTIAL,
+	WINDOW_SINGLE,
+	WINDOW_FULL
+};
+
+/* Self Registration list RMA Request query */
+struct micscif_rma_req {
+	struct reg_range_t **out_window;
+	uint64_t offset;
+	size_t nr_bytes;
+	int prot;
+	enum range_request type;
+	struct list_head *head;
+	void *va_for_temp;
+};
+
+/**
+ * struct mic_copy_work:
+ *
+ * Work for DMA copy thread is provided by alloocating and preparing
+ * struct mic_copy_work and calling mic_enqueue_copy_work.
+ */
+struct mic_copy_work {
+	uint64_t src_offset;
+
+	uint64_t dst_offset;
+
+	/* Starting src registered window */
+	struct reg_range_t *src_window;
+
+	/* Starting dst registered window */
+	struct reg_range_t *dst_window;
+
+	/* Is this transfer a loopback transfer? */
+	int loopback;
+
+	size_t len;
+	/* DMA copy completion callback. Details in mic_dma_lib.h */
+	struct dma_completion_cb   *comp_cb;
+
+	struct micscif_dev	*remote_dev;
+
+	/* DO_DMA_POLLING or DO_DMA_INTR or none */
+	int fence_type;
+
+	bool ordered;
+
+#ifdef CONFIG_ML1OM
+	/* GTT map state */
+	enum micscif_msg_state	gttmap_state;
+
+	/* Wait Queue for a GTT map (N)ACK */
+	wait_queue_head_t	gttmapwq;
+
+	uint64_t		gtt_offset;
+
+	uint64_t		gtt_length;
+
+#endif
+	bool			dma_chan_released;
+	struct list_head list_member;
+};
+
+/* Insert */
+void micscif_insert_window(struct reg_range_t *window, struct list_head *head);
+void micscif_insert_tcw(struct reg_range_t *window,
+					struct list_head *head);
+
+/* Query */
+int micscif_query_window(struct micscif_rma_req *request);
+int micscif_query_tcw(struct endpt *ep, struct micscif_rma_req *request);
+
+/* Called from close to unregister all self windows */
+int micscif_unregister_all_windows(scif_epd_t epd);
+
+/* Traverse list and munmap */
+void micscif_rma_list_munmap(struct reg_range_t *window, uint64_t offset, int nr_pages);
+/* Traverse list and mmap */
+int micscif_rma_list_mmap(struct reg_range_t *start_window,
+			  uint64_t offset, int nr_pages, struct vm_area_struct *vma);
+/* Traverse list and unregister */
+int micscif_rma_list_unregister(struct reg_range_t *window, uint64_t offset, int nr_pages);
+
+/* CPU copy */
+int micscif_rma_list_cpu_copy(struct mic_copy_work *work);
+
+/* Traverse remote RAS and ensure none of the get_put_ref_counts are +ve */
+int micscif_rma_list_get_pages_check(struct endpt *ep);
+
+/* Debug API's */
+void micscif_display_all_windows(struct list_head *head);
+
+int micscif_rma_list_dma_copy_wrapper(struct endpt *epd, struct mic_copy_work *work, struct dma_channel *chan, off_t loffset);
+
+void micscif_rma_local_cpu_copy(uint64_t offset, struct reg_range_t *window, uint8_t *temp, size_t remaining_len, bool to_temp);
+
+#endif /* MICSCIF_RMA_LIST_H */
diff --git a/include/mic/micscif_smpt.h b/include/mic/micscif_smpt.h
new file mode 100644
index 0000000..7c3c0f9
--- /dev/null
+++ b/include/mic/micscif_smpt.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MIC_SMPT_H
+#define MIC_SMPT_H
+
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+
+#define MAX_BOARD_SUPPORTED	256
+
+#define SNOOP_ON  		(0 << 0)
+#define SNOOP_OFF 		(1 << 0)
+#define NUM_SMPT_REGISTERS 	32
+#define	NUM_SMPT_ENTRIES_IN_USE	32
+#define SMPT_MASK 		0x1F
+#define MIC_SYSTEM_PAGE_SHIFT 	34ULL
+#define MIC_SYSTEM_PAGE_MASK 	((1ULL << MIC_SYSTEM_PAGE_SHIFT) - 1ULL)
+
+struct _mic_ctx_t;
+struct pci_dev;
+
+typedef struct mic_smpt {
+	dma_addr_t dma_addr; 
+	int64_t ref_count;
+} mic_smpt_t;
+
+
+/* Sbox Smpt Reg Bits:
+ * Bits 	31:2	Host address
+ * Bits 	1	RSVD
+ * Bits		0	No snoop
+ */
+#define BUILD_SMPT(NO_SNOOP, HOST_ADDR)  \
+	(uint32_t)(((((HOST_ADDR)<< 2) & (~0x03)) | ((NO_SNOOP) & (0x01))))
+
+bool is_syspa(dma_addr_t hostmic_pa);
+
+dma_addr_t mic_map(int bid, dma_addr_t dma_addr, size_t size);
+void mic_unmap(int bid, dma_addr_t dma_addr, size_t size);
+
+dma_addr_t mic_map_single(int bid, struct pci_dev *hwdev, void *p, size_t size);
+void mic_unmap_single(int bid, struct pci_dev *hwdev, dma_addr_t mic_addr,
+		size_t size);
+
+dma_addr_t mic_ctx_map_single(struct _mic_ctx_t *mic_ctx, void *p, size_t size);
+void mic_ctx_unmap_single(struct _mic_ctx_t *mic_ctx, dma_addr_t dma_addr,
+		size_t size);
+
+dma_addr_t mic_to_dma_addr(int bid, dma_addr_t mic_addr);
+void mic_smpt_set(volatile void *mm_sbox, uint64_t dma_addr, uint64_t index);
+
+static inline
+bool mic_map_error(dma_addr_t mic_addr)
+{
+	return !mic_addr;
+}
+#endif // MIC_SMPT_H
diff --git a/include/mic/micscif_va_gen.h b/include/mic/micscif_va_gen.h
new file mode 100644
index 0000000..b1df13b
--- /dev/null
+++ b/include/mic/micscif_va_gen.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* generate a virtual address for a given size */
+
+#ifndef MICSCIF_VA_GEN_H
+#define MICSCIF_VA_GEN_H
+
+#include "micscif_va_node.h"
+
+/*
+ * To avoid collisions with user applications trying to use
+ * MAP_FIXED with scif_register(), the following window address space
+ * allocation scheme is used.
+ *
+ * 1) (0) - (2 ^ 62 - 1))
+ *	Window Address Space that can be claimed using MAP_FIXED.
+ * 2) (2 ^ 62) - (2 ^ 63) - 1)
+ *	Window address space used for allocations by the SCIF driver
+ *	when MAP_FIXED is not passed.
+ */
+#define VA_GEN_MIN      0x4000000000000000
+#define VA_GEN_RANGE    0x3f00000000000000
+
+#define INVALID_VA_GEN_ADDRESS 0xff00000000000000
+#define INVALID_VA_PAGE_INDEX  0xff00000000000
+
+struct va_gen_addr {
+    struct va_node_allocator allocator;
+    uint32_t hole_list;
+    uint32_t claims_list;
+    uint64_t base;
+};
+
+/*
+ * return a base for the range
+ * caller trusted to keep track of both base and range
+ */
+uint64_t va_gen_alloc(struct va_gen_addr *addr,
+		      uint64_t num_bytes, uint32_t align_bytes);
+
+/* Claim ownership of memory region. Fails if already occupied */
+uint64_t va_gen_claim(struct va_gen_addr *addr,
+		      uint64_t address, uint64_t num_bytes);
+
+/* release ownership of a base/range */
+void va_gen_free(struct va_gen_addr *addr,
+		 uint64_t address, uint64_t num_bytes);
+
+int va_gen_init(struct va_gen_addr *addr, uint64_t base, uint64_t range);
+
+void va_gen_destroy(struct va_gen_addr *addr);
+
+#endif
diff --git a/include/mic/micscif_va_node.h b/include/mic/micscif_va_node.h
new file mode 100644
index 0000000..659f62f
--- /dev/null
+++ b/include/mic/micscif_va_node.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* generate a virtual address for a given size */
+#ifndef MICSCIF_VA_NODE_H
+#define MICSCIF_VA_NODE_H
+
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+
+#define invalid_va_node_index ((uint32_t)(-1))
+
+struct va_node {
+	uint32_t next;
+	uint64_t base;
+	uint64_t range;
+};
+
+struct va_node_allocator {
+	/* Emulated variable-size array
+	 * is implemented as a sequence of fixed-sized slabs.
+	 * SlabDirectory keeps the sequence.
+	 * Slab is a contiguous block of nodes -- saves number of allocations
+	 * when allocing a new slab of nodes, alloc this size
+	 */
+	uint32_t slab_shift;
+	uint32_t nodes_in_slab;
+	uint32_t slab_mask;
+	struct va_node **pp_slab_directory;
+	uint32_t num_slabs;
+	uint32_t num_free_slabs;
+	uint32_t free_list;
+};
+
+int va_node_is_valid(uint32_t index);
+
+/*
+ * get the node corresponding to a NodePtr
+ * We are emulating a variable-size array
+ */
+struct va_node *va_node_get(struct va_node_allocator *node, uint32_t index);
+
+/* returns an NodePtr to a free node */
+int va_node_alloc(struct va_node_allocator *node, uint32_t *out_alloc);
+
+/* put a node back into the free pool, by NodePtr */
+void va_node_free(struct va_node_allocator *node, uint32_t index);
+
+void va_node_init(struct va_node_allocator *node);
+
+void va_node_destroy(struct va_node_allocator *node);
+
+#endif
diff --git a/include/mic/micvcons.h b/include/mic/micvcons.h
new file mode 100644
index 0000000..26e60a5
--- /dev/null
+++ b/include/mic/micvcons.h
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICVCONS_H
+#define MICVCONS_H
+
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+#include <mic/micscif.h>
+#include <mic/micscif_nm.h>
+
+#define MICVCONS_DEVICE_NAME "ttyMIC"
+
+#define MICVCONS_BUF_SIZE		PAGE_SIZE
+#define MICDCONS_MAX_OUTPUT_BYTES	64
+#define MICVCONS_SHORT_TIMEOUT	100
+#define MICVCONS_MAX_TIMEOUT	500
+
+#define MIC_VCONS_READY		0xc0de
+#define MIC_VCONS_SLEEPING	0xd00d
+#define MIC_VCONS_WAKINGUP	0xd12d
+#define MIC_HOST_VCONS_READY	0xbad0
+#define MIC_VCONS_HOST_OPEN	0xbad1
+#define MIC_VCONS_RB_VER_ERR	0xbad2
+
+#define MICVCONS_TIMER_RESTART	1
+#define MICVCONS_TIMER_SHUTDOWN	0
+
+typedef struct micvcons {
+	int			dc_enabled;
+	void			*dc_hdr_virt;
+	void			*dc_buf_virt;
+	dma_addr_t	dc_hdr_dma_addr;
+	dma_addr_t	dc_dma_addr;
+	uint32_t		dc_size;
+} micvcons_t;
+
+typedef struct micvcons_port {
+	struct board_info	*dp_bdinfo;
+	struct micvcons		*dp_vcons;
+	struct micscif_rb	*dp_in;
+	struct micscif_rb	*dp_out;
+	struct tty_struct	*dp_tty;
+	struct list_head	list_member;
+	/*
+	 * work queue to schedule work that wakes up a sleeping card
+	 * and read the data from the buffer.
+	 */
+	struct workqueue_struct	*dp_wq;
+	struct work_struct		dp_wakeup_read_buf;
+
+	spinlock_t		dp_lock;
+	struct mutex		dp_mutex;
+
+	volatile int		dp_bytes;
+	volatile uint32_t	dp_canread;
+
+	volatile struct file	*dp_reader;
+	volatile struct file	*dp_writer;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+	struct tty_port		port;
+#endif
+} micvcons_port_t;
+
+/* vcons IPC layout */
+struct vcons_buf
+{
+	uint32_t	host_magic;
+	uint32_t	mic_magic;
+
+	uint16_t	host_rb_ver;
+	uint16_t	mic_rb_ver;
+
+	/* mic o/p buffer */
+	dma_addr_t o_buf_dma_addr;	/* host buf dma addr*/
+	uint32_t	o_wr;
+	uint32_t	o_size;
+
+	/* mic i/p buffer */
+	uint64_t	i_hdr_addr;		/* mic hdr addr */
+	uint64_t	i_buf_addr;		/* mic buf addr */
+	uint32_t	i_rd;
+	uint32_t	i_size;
+};
+
+struct vcons_mic_header
+{
+	uint32_t o_rd;
+	uint32_t i_wr;
+	uint32_t host_status;
+};
+
+int micvcons_start(struct _mic_ctx_t *mic_ctx);
+int micvcons_port_write(struct micvcons_port *port, const unsigned char *buf,
+				int count);
+struct _mic_ctx_t;
+void micvcons_stop(struct _mic_ctx_t *mic_ctx);
+int micvcons_pm_disconnect_node(uint8_t *node_bitmask, enum disconn_type type);
+#endif /* MICVCONS_H */
diff --git a/include/mic/micveth.h b/include/mic/micveth.h
new file mode 100644
index 0000000..c4e65a6
--- /dev/null
+++ b/include/mic/micveth.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICVETH_H
+#define MICVETH_H
+
+#include "micveth_dma.h"
+
+#include "micint.h"
+#include "micveth_common.h"
+
+#define MICVETH_MAX_PACKET_SIZE		(63 * 1024)
+#define MICVETH_TRANSFER_FIFO_SIZE	128
+
+#define MICVETH_LINK_UP_MAGIC		0x1A77ABEE
+#define MICVETH_LINK_DOWN_MAGIC		0x1DEADBEE
+
+#define MICVETH_POLL_TIMER_DELAY	1
+#define MICVETH_CLIENT_TIMER_DELAY	10
+
+typedef struct ring_packet {
+	struct sk_buff	*pd_skb;
+	uint64_t	pd_phys;
+	uint64_t	pd_length;
+} ring_packet_t;
+
+typedef struct ring_desc {
+	uint64_t	rd_phys;
+	uint64_t	rd_length;
+	uint32_t	rd_valid;
+} ring_desc_t;
+
+typedef struct ring_queue {
+	uint32_t	rq_head;
+	uint32_t	rq_tail;
+	uint32_t	rq_length;
+	ring_desc_t	rq_descs[MICVETH_TRANSFER_FIFO_SIZE];
+} ring_queue_t;
+
+typedef struct ring {
+	ring_queue_t	r_tx;
+	ring_queue_t	r_rx;
+} veth_ring_t;
+
+#define VETH_STATE_INITIALIZED		0
+#define VETH_STATE_LINKUP		1
+#define VETH_STATE_LINKDOWN		2
+
+
+typedef struct micveth_info {
+	struct pci_dev		*vi_pdev;
+	struct net_device 	*vi_netdev;
+	uint8_t			*vi_sbox;
+	uint8_t			*vi_dbox;
+	uint32_t		*vi_scratch14;
+	uint32_t		*vi_scratch15;
+	mic_ctx_t		*mic_ctx;
+	volatile uint32_t	vi_state;
+	uint32_t		vi_skb_mtu;
+
+	struct delayed_work	vi_poll;
+
+	struct workqueue_struct	*vi_wq;
+	char			vi_wqname[16];
+	struct work_struct	vi_bh;
+	struct work_struct	vi_txws;
+
+	spinlock_t		vi_rxlock;
+	spinlock_t		vi_txlock;
+
+	struct {
+		veth_ring_t	ring;
+		uint64_t	phys;
+		uint64_t	length;
+	} vi_ring;
+
+	veth_ring_t		*ring_ptr;
+
+	ring_packet_t		vi_tx_desc[MICVETH_TRANSFER_FIFO_SIZE];
+	ring_packet_t		vi_rx_desc[MICVETH_TRANSFER_FIFO_SIZE];
+	uint32_t		vi_pend;
+} micveth_info_t;
+
+enum {
+	CLIENT_POLL_STOPPED,
+	CLIENT_POLL_RUNNING,
+	CLIENT_POLL_STOPPING,
+};
+
+typedef struct micveth {
+	int			lv_num_interfaces;
+	int			lv_num_clients;
+	int			lv_active_clients;
+	int			lv_num_links_remaining;
+	micveth_info_t		*lv_info;
+
+	struct mutex		lv_state_mutex;
+
+	uint32_t		lv_pollstate;
+	struct delayed_work	lv_poll;
+	wait_queue_head_t	lv_wq;
+
+} micveth_t;
+
+int micveth_init(struct device *dev);
+int micveth_init_legacy(int num_bds, struct device *dev);
+void micveth_exit(void);
+int micveth_probe(mic_ctx_t *mic_ctx);
+void micveth_remove(mic_ctx_t *mic_ctx);
+int micveth_start(mic_ctx_t *mic_ctx);
+void micveth_stop(mic_ctx_t *mic_ctx);
+
+#endif /* MICVETH_H */
diff --git a/include/mic/micveth_common.h b/include/mic/micveth_common.h
new file mode 100644
index 0000000..5df0afb
--- /dev/null
+++ b/include/mic/micveth_common.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICVETHCOMMON_H
+#define MICVETHCOMMON_H
+
+#ifndef ETH_HLEN
+#define ETH_HLEN	14
+#endif
+
+typedef enum micvnet_state {
+	MICVNET_STATE_UNDEFINED,
+	MICVNET_STATE_UNINITIALIZED,
+	MICVNET_STATE_LINKUP,
+	MICVNET_STATE_LINK_DOWN,
+	MICVNET_STATE_BEGIN_UNINIT,
+	MICVNET_STATE_TRANSITIONING,
+}micvnet_state;
+
+
+/*
+ * Fancy way of defining an enumeration and the mapping between them and
+ * the module parameter--they're guaranteed to be in sync this way.
+ */
+#define VNET_MODES				\
+	__VNET_MODE(POLL, poll)			\
+	__VNET_MODE(INTR, intr)			\
+	__VNET_MODE(DMA,  dma)			\
+	/* end */
+#define __VNET_MODE(u, l) VNET_MODE_##u ,
+enum { VNET_MODES };
+#undef __VNET_MODE
+
+extern char *mic_vnet_modes[];
+extern int mic_vnet_mode;
+
+#endif /* MICVETHCOMMON_H */
diff --git a/include/mic/micveth_dma.h b/include/mic/micveth_dma.h
new file mode 100644
index 0000000..d48598f
--- /dev/null
+++ b/include/mic/micveth_dma.h
@@ -0,0 +1,279 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICVETH_DMA_H
+#define MICVETH_DMA_H
+
+#include <linux/kernel.h>
+#include "micint.h"
+
+#include "mic_common.h"
+#include "mic_dma_lib.h"
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+
+
+/*
+  Define this if only DMA mode is supported without legacy POLL/INTR modes
+  (i.e if only micveth_dma.c is included in the host/card side drivers, i.e
+  when linvnet.c is excluded from host side driver and micveth.c from card
+  side driver). This will ensure that other global symbols which are at
+  present common with legacy modes (in linvnet.c/micveth.c) are all included
+  in micveth_dma.c.
+*/
+#undef STANDALONE_VNET_DMA
+
+/*******************************************************/
+#define MICVNET_MSG_RB_SIZE 128
+#define DMA_ALIGNMENT L1_CACHE_BYTES
+#define VNET_MAX_SKBS 62
+
+/* The maximum total number of outstanding messages possible in the current
+   implementation is 2 * VNET_MAX_SKBS + 1. */
+#if (MICVNET_MSG_RB_SIZE < 2 * VNET_MAX_SKBS + 2)
+#error "MICVNET_MSG_RB_SIZE should be at least (2 * VNET_MAX_SKBS + 2)"
+#endif
+
+#if (MICVNET_MSG_RB_SIZE & (MICVNET_MSG_RB_SIZE - 1))
+#error "MICVNET_MSG_RB_SIZE should be power of 2"
+#endif
+
+enum micvnet_msg_id {
+	MICVNET_MSG_ADD_DMA_BUFFER,
+	MICVNET_MSG_DMA_COMPLETE,
+	MICVNET_MSG_LINK_DOWN,
+	MICVNET_MSG_LINK_UP,
+};
+
+struct micvnet_msg_add_dma_buffer {
+	uint64_t buf_phys;
+	uint64_t buf_size;
+};
+
+struct micvnet_msg_dma_complete {
+	uint64_t dst_phys;
+	uint64_t size;
+	uint64_t dma_offset;
+};
+
+#define VNET_DRIVER_VERSION	1
+struct micvnet_msg_link_up {
+	uint64_t vnet_driver_version;
+};
+
+union micvnet_msg_body {
+	struct micvnet_msg_add_dma_buffer	micvnet_msg_add_dma_buffer;
+	struct micvnet_msg_dma_complete		micvnet_msg_dma_complete;
+	struct micvnet_msg_link_up		micvnet_msg_link_up;
+};
+
+struct micvnet_msg {
+	uint64_t		msg_id;
+	union micvnet_msg_body	body;
+};
+
+struct micvnet_msg_rb {
+	struct micvnet_msg buf[MICVNET_MSG_RB_SIZE];
+	volatile uint32_t head;
+	volatile uint32_t tail;
+	uint32_t size;
+	volatile uint32_t prev_head;
+	volatile uint32_t prev_tail;
+};
+
+struct micvnet_msg_ring_pair {
+	struct micvnet_msg_rb rb_tx;
+	struct micvnet_msg_rb rb_rx;
+};
+
+struct micvnet_msg_qp {
+	struct micvnet_msg_rb *tx;
+	struct micvnet_msg_rb *rx;
+};
+
+/*******************************************************/
+
+/* Restict micvnet mtu to 63K because ping does not work on RHEL 6.3 with 64K
+   MTU - HSD [4118026] */
+#define MICVNET_MAX_MTU			(63 * 1024)
+#define MICVNET_CARD_UP_MAGIC		0x1A77BBEE
+
+struct rx_node {
+	struct list_head	 list;
+	struct sk_buff		*skb;
+	uint64_t		 phys;
+	uint64_t		 size;
+};
+
+struct dma_node {
+	struct list_head	 list;
+	uint64_t		 phys;
+	uint64_t		 size;
+};
+
+struct tx_node {
+	struct list_head	 list;
+	struct sk_buff		*skb;
+};
+
+struct sched_node {
+	struct list_head	 list;
+	struct sk_buff		*skb;
+	unsigned char		*skb_data_aligned;
+	uint64_t		 dma_src_phys;
+	uint64_t		 dma_size;
+	uint64_t		 dma_offset;
+	uint64_t		 dst_phys;
+};
+
+struct obj_list {
+	char	*buf;
+	int	 size;
+	size_t	 obj_size;
+	volatile uint32_t head;
+	volatile uint32_t tail;
+};
+
+struct micvnet_info {
+	struct pci_dev			*vi_pdev;
+	struct net_device		*vi_netdev;
+	uint8_t				*vi_sbox;
+	uint8_t				*vi_dbox;
+	uint32_t			*vi_scratch14;
+	mic_ctx_t			*mic_ctx;
+	atomic_t			 vi_state;
+
+	struct workqueue_struct		*vi_wq;
+	char				 vi_wqname[16];
+	struct work_struct			 vi_ws_bh;
+	struct work_struct			 vi_ws_tx;
+	struct work_struct			 vi_ws_dmacb;
+	struct work_struct			 vi_ws_link_down;
+	struct work_struct			 vi_ws_stop;
+	struct work_struct			 vi_ws_start;
+
+	spinlock_t			 vi_rxlock;
+	spinlock_t			 vi_txlock;
+
+#ifdef HOST
+	struct micvnet_msg_ring_pair	 vi_rp;
+#else
+	struct micvnet_msg_ring_pair	*ring_ptr;
+#endif
+	uint64_t			 vi_rp_phys;
+	struct micvnet_msg_qp		 vi_qp;
+
+	struct obj_list			 dnode_list;
+
+	struct list_head		 vi_rx_skb;
+	struct list_head		 vi_dma_buf;
+	struct list_head		 vi_tx_skb;
+	struct list_head		 vi_sched_skb;
+
+	mic_dma_handle_t		 dma_handle;
+	struct dma_channel		*dma_chan;
+	struct dma_completion_cb	 dma_cb;
+	atomic_t			 cnt_dma_complete;
+
+	atomic_t			 cnt_dma_buf_avail;
+	bool				 link_down_initiator;
+	atomic_t			 cnt_tx_pending;
+	wait_queue_head_t		 stop_waitq;
+};
+
+
+struct micvnet {
+	atomic_t		lv_active_clients;
+	int			created;
+};
+
+int micvnet_init(struct device *dev);
+void micvnet_exit(void);
+int micvnet_probe(mic_ctx_t *mic_ctx);
+void micvnet_remove(mic_ctx_t *mic_ctx);
+int micvnet_xmit(struct sk_buff *skb, struct net_device *dev);
+
+int micvnet_start(mic_ctx_t *mic_ctx);
+void micvnet_stop(mic_ctx_t *mic_ctx);
+
+#ifndef HOST
+int __init micvnet_module_init(void);
+void __exit micvnet_module_exit(void);
+#endif
+
+#ifdef STANDALONE_VNET_DMA
+#define micveth_init	micvnet_init
+#define micveth_exit	micvnet_exit
+#define micveth_probe	micvnet_probe
+#define micveth_remove	micvnet_remove
+#define micveth_start	micvnet_start
+#define micveth_stop	micvnet_stop
+#endif
+
+extern int vnet_num_buffers;
+#ifndef HOST
+extern ulong vnet_addr;
+#endif
+#endif // MICVETH_DMA_H
diff --git a/include/mic/ringbuffer.h b/include/mic/ringbuffer.h
new file mode 100644
index 0000000..5fe81af
--- /dev/null
+++ b/include/mic/ringbuffer.h
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+Description: This is a generic ring buffer implementation to be used by
+anyone who needs a ring buffer.  The ring buffer is maipulated
+using Read and Write functions.  These functions perform all of
+the necessary space checks and only complete the operation if
+if the requested number of items can be read or written.  A
+return value of false indicates that either the ring buffer
+contains less then the requested number of items (for Read) or
+there isn't enough space left in the ring buffer (for Write).
+*/
+
+#ifndef _MICHOST_RING_BUFFER_DEFINE
+
+#define _MICHOST_RING_BUFFER_DEFINE
+
+//
+// Requirements:
+// Ring base should be already aligned properly
+// Ring size should be just multiple of the alignment size
+// All packets should be at least multiple of 4 bytes for the purpose of padding
+//
+
+#define RINGBUFFER_ALIGNMENT_SIZE 64 // in byte
+
+typedef struct _ringbuffer
+{
+	uint8_t *ringbuff_ptr;
+	volatile uint32_t *readptr;	// Points to the read offset
+	volatile uint32_t *writeptr;	// Points to the write offset
+	uint32_t ringbuffsize;
+	uint32_t curr_readoffset;	// cache it to improve performance.
+	uint32_t curr_writeoffset;	// cache it to improve performance.
+	uint32_t old_readoffset;
+	uint32_t old_writeoffset;
+} ringbuffer;
+
+// Commands common across all ring buffers
+typedef enum _rb_cmdopcode
+{
+	// note: don't use 0, because the ring buffer
+	//   is initialized to a bunch of 0's that aren't really commands.
+	MIC_RBCT_ERROR = 0x0,		// an error has occurred if encountered
+	MIC_RBCT_NOP,			// Used to skip empty space in the ringbuffer.
+	MIC_RBCT_DMAEXEC,		// DMA buffer to transfer/execute
+	MIC_RBCT_SHUTDOWN,		// bus power-down eminent
+	MIC_RBCT_CREATESTDPROCESS,	// Launches an executable on the ramdisk.
+	MIC_RBCT_CREATENATIVEPROCESS,	// Launches a native process.
+	  // NRFIX : not implemented. If native apps are launched by loading shared
+	  // libraries(DLLs) into a standard stub app then this command goes away.
+	MIC_RBCT_DESTROYPROCESS,	// Destroys a process.
+	MIC_RBCT_VIRTUALALLOC,		// Creates a uOS virtual address range
+	MIC_RBCT_MAPHOSTMEMORY,		// Used by implement host kernel mode driver services
+	MIC_RBCT_UNMAPHOSTMEMORY,	// Unmaps host memory
+	MIC_RBCT_UOSESCAPE,		// Used to pass uOS escapes from the host
+	MIC_RBCT_RESERVED1,		// Reserved for future use
+	MIC_RBCT_RESERVED2,		// Reserved for future use
+	MIC_RBCT_UPLOADSTDAPPLICATION,	// Uploads a standard application to the uOS
+	MIC_RBCT_CREATEUOSRESOURCE,	// Creates a DPT page cache
+	MIC_RBCT_DESTROYUOSRESOURCE,	// Destroys a DPT page cache
+	MIC_RBCT_RESERVE_RING_BANDWIDTH_DBOX_TRAFFIC, // Reserves a ring bandwidth for DBOX traffic
+
+	// Following commands are from MIC->Host (CRBT => CPU ring buffer.)
+	MIC_CRBT_LOG_INFO,		// Host logs information sent by the uOS.
+
+	// Always make these the last ones in the list
+#if defined(MIC_DEBUG) || defined(ENABLE_GFXDEBUG)
+	MIC_RBCT_READPHYSICALMEMORY = 0x8000, // Used by debug tools to read memory on the device
+	MIC_RBCT_WRITEPHYSICALMEMORY,	// Used by debug tools to write memory on the device
+#endif // defined(MIC_DEBUG) || defined(ENABLE_GFXDEBUG)
+	MIC_RBCT_CMD_MAX		// No valid OpCodes above this one
+}ringbuff_cmdop;
+
+typedef struct _ringbuff_cmdhdr
+{
+	ringbuff_cmdop	opcode:16;
+	uint32_t	size:16;
+}ringbuff_cmdhdr;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//---------------------------------
+// methods used by both
+//---------------------------------
+// initialize cached ring buffer structure
+void rb_initialize(ringbuffer* ringbuff, volatile uint32_t* readptr,
+		   volatile uint32_t* writeptr, void *buff, const uint32_t size);
+
+//---------------------------------
+// writer-only methods
+//---------------------------------
+// write a new command.  Must follow with fence/MMIO, then RingBufferCommit()
+int rb_write(ringbuffer* ringbuff, ringbuff_cmdhdr* cmd_header);
+// After write(), do an mfence(), an MMIO write to serialize, then Commit()
+void rb_commit(ringbuffer* ringbuff);
+// used on power state change to reset cached pointers
+void rb_reset(ringbuffer* ringbuff);
+// used to determine the largest possible command that could be sent next
+uint32_t rb_get_available_space(ringbuffer* ringbuff);
+
+// TODO:  It may be more optimal to have "Reserve" function exposed to the client
+//        instead of requiring it to create a command that will be copied into the ring buffer.
+
+
+//---------------------------------
+// reader-only methods
+//---------------------------------
+// uses (updates) the cached read pointer to get the next command, so writer doesn't
+// see the command as consumed
+ringbuff_cmdhdr* rb_get_next_cmd(ringbuffer* ringbuff);
+// updates the control block read pointer, which will be visible to the writer so it
+// can re-use the space
+void  rb_update_readptr(ringbuffer* ringbuff, ringbuff_cmdhdr* cmd_header);
+// reader skips all commands, updating its next read offset
+void rb_skip_to_offset(ringbuffer* ringbuff, uint32_t new_readptr);
+
+// uOS used this method to determine if RingBuffer is empty or not before attempting
+// to fetch command out of ring buffer If ringbuffer is empty, means uOS would have
+// fetched it earlier.
+uint32_t rb_empty(ringbuffer* ringbuff);
+
+// only used by host simulator
+void rb_sync(ringbuffer* ringbuff);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __LINUX_GPL__
+//==============================================================================
+//  FUNCTION: AlignLow
+//
+//  DESCRIPTION: Returns trunk(in_data / in_granularity) * in_granularity
+//
+//  PARAMETERS:
+//      in_data - Data to be aligned
+//      in_granularity - Alignment chunk size - must be a power of 2
+#if defined(__cplusplus)
+template <typename TData>
+#else // no C++
+#define TData uint64_t
+#endif // if C++
+
+static inline TData AlignLow(TData in_data, uintptr_t in_granularity)
+{
+	TData mask = (TData)(in_granularity-1); // 64 -> 0x3f
+
+	// floor to granularity
+	TData low = in_data & ~mask;
+
+	return low;
+}
+
+#if !defined(__cplusplus)
+#undef TData
+#endif // if no C++
+#endif // __LINUX_GPL_
+
+#endif //_MICHOST_RING_BUFFER_DEFINE
diff --git a/include/mic_common.h b/include/mic_common.h
new file mode 100644
index 0000000..92554ad
--- /dev/null
+++ b/include/mic_common.h
@@ -0,0 +1,769 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#if !defined(__MIC_COMMON_H)
+#define __MIC_COMMON_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+#include <mic/bootparams.h>
+#include <mic/micsboxdefine.h>
+#include <mic/micdboxdefine.h>
+#include <mic/ringbuffer.h>
+#include <mic/micscif.h>
+#ifdef USE_VCONSOLE
+#include <mic/micvcons.h>
+#endif
+#include <mic/micpsmi.h>
+#include <mic/io_interface.h>
+#include <mic/mic_pm.h>
+#include <mic/mic_dma_api.h>
+#include <mic/micveth_common.h>
+#include <mic/micscif_nm.h>
+
+#define GET_MAX(a, b)	( ((a) > (b)) ? (a) : (b) )
+#define GET_MIN(a, b)	( ((a) < (b)) ? (a) : (b) )
+
+// System Interrupt Cause Read Register 0
+#define SBOX_SICR0_DBR(x)		((x) & 0xf)
+#define SBOX_SICR0_DMA(x)		(((x) >> 8) & 0xff)
+
+// System Interrupt Cause Enable Register 0
+#define SBOX_SICE0_DBR(x)		((x) & 0xf)
+#define SBOX_SICE0_DBR_BITS(x)		((x) & 0xf)
+#define SBOX_SICE0_DMA(x)		(((x) >> 8) & 0xff)
+#define SBOX_SICE0_DMA_BITS(x)		(((x) & 0xff) << 8)
+
+// System Interrupt Cause Read Register 1
+#define SBOX_SICR1_SBOXERR(x)		((x) & 0x1)
+#define SBOX_SICR1_SPIDONE(x)		(((x) >> 4) & 0x1)
+
+// System Interrupt Cause Set Register 1
+#define SBOX_SICC1_SBOXERR(x)		((x) & 0x1)
+#define SBOX_SICC1_SPIDONE(x)		(((x) >> 4) & 0x1)
+
+// Offsets in the MMIO Range for register segments
+#define HOST_DBOX_BASE_ADDRESS		0x00000000
+#define HOST_SBOX_BASE_ADDRESS		0x00010000
+#define HOST_GTT_BASE_ADDRESS		0x00040000
+
+#define SCRATCH0_MEM_TEST_DISABLE(x)	((x) & 0x1)
+#define SCRATCH0_MEM_USAGE(x)		(((x) >> 1) & 0x3)
+#define    SCR0_MEM_ALL			0x0
+#define    SCR0_MEM_HALF		0x1
+#define    SCR0_MEM_THIRD		0x2
+#define    SCR0_MEM_FOURTH		0x3
+#define SCRATCH0_MEM_SIZE_KB(x)		((x) >> 0x3)
+
+#define SCRATCH2_DOWNLOAD_STATUS(x)	((x) & 0x1)
+
+#define SCRATCH2_CLEAR_DOWNLOAD_STATUS(x)	((x) & ~0x1)
+#define SCRATCH2_APIC_ID(x)		(((x) >> 1) & 0x1ff)
+#define SCRATCH2_DOWNLOAD_ADDR(x)	((x) & 0xfffff000)
+
+#define SCRATCH13_SUB_STEP(x)		((x) & 0xf)
+#define SCRATCH13_STEP_ID(x)		(((x) >> 4) & 0xf)
+#define SCRATCH13_PLATFORM_ID(x)	(((x) >> 18) & 0x3)
+
+
+#define MEMVOLT_MEMVOLT(x)     (((x) >>SHIFT_MEMVOLT) & MASK_MEMVOLT)
+#define MEMFREQ_MEMFREQ(x)     (((x) >>SHIFT_MEMORYFREQ) & MASK_MEMORYFREQ)
+#define FAILSAFEOFFSET_FAILSAFE(x) (((x) >>SHIFT_FAIL_SAFE) & MASK_FAIL_SAFE)
+
+#define SCRATCH4_ACTIVE_CORES(x) (((x) >>SHIFT_ACTIVE_CORES) & MASK_ACTIVE_CORES)
+#define SCRATCH0_MEMSIZE(x) (((x) >>SHIFT_MEMSIZE) & MASK_MEMSIZE)
+#define SCRATCH7_FLASHVERSION(x) (((x) >>SHIFT_FLASHVERSION) & MASK_FLASHVERSION)
+#define SCRATCH7_FUSECONFIGREV(x) (((x) >>SHIFT_FUSE_CONFIG_REV) & MASK_FUSE_CONFIG_REV)
+#define SCRATCH13_MODEL(x) (((x) >>SHIFT_MODEL) & MASK_MODEL)
+#define SCRATCH13_FAMILY_DATA(x) (((x) >>SHIFT_FAMILY_DATA) & MASK_FAMILY_DATA)
+#define SCRATCH13_PROCESSOR(x) (((x) >>SHIFT_PROCESSOR) & MASK_PROCESSOR)
+#define SCRATCH13_EXTENDED_MODEL(x) (((x) >>SHIFT_EXTENDED_MODEL) & MASK_EXTENDED_MODEL)
+#define SCRATCH13_EXTENDED_FAMILY(x) (((x) >>SHIFT_EXTENDED_FAMILY) & MASK_EXTENDED_FAMILY)
+
+
+#define DBOX_READ(mmio, offset) \
+	readl((uint32_t*)((uint8_t*)(mmio) + (HOST_DBOX_BASE_ADDRESS + (offset))))
+#define DBOX_WRITE(value, mmio, offset) \
+	writel((value), (uint32_t*)((uint8_t*)(mmio) + (HOST_DBOX_BASE_ADDRESS + (offset))))
+
+#define SBOX_READ(mmio, offset) \
+	readl((uint32_t*)((uint8_t*)(mmio) + (HOST_SBOX_BASE_ADDRESS + (offset))))
+#define SBOX_WRITE(value, mmio, offset) \
+	writel((value), (uint32_t*)((uint8_t*)(mmio) + (HOST_SBOX_BASE_ADDRESS + (offset))))
+
+#define SET_BUS_DEV_FUNC(bus, device, function, reg_offset) \
+	(( bus << 16 ) | ( device << 11 ) | ( function << 8 ) | reg_offset)
+
+#define GTT_READ(mmio, offset) \
+	readl((uint32_t*)((uint8_t*)(mmio) + (HOST_GTT_BASE_ADDRESS + (offset))))
+#define GTT_WRITE(value, mmio, offset) \
+	writel((value), (uint32_t*)((uint8_t*)(mmio) + (HOST_GTT_BASE_ADDRESS + (offset))))
+
+
+#define ENABLE_MIC_INTERRUPTS(mmio) { \
+	uint32_t sboxSice0reg = SBOX_READ((mmio), SBOX_SICE0); \
+	sboxSice0reg |= SBOX_SICE0_DBR_BITS(0xf) | SBOX_SICE0_DMA_BITS(0xff); \
+	SBOX_WRITE(sboxSice0reg, (mmio), SBOX_SICE0); }
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+#endif
+
+#define DLDR_APT_BAR 0
+#define DLDR_MMIO_BAR 4
+
+#define PCI_VENDOR_INTEL	0x8086
+
+#define PCI_DEVICE_ABR_2249	0x2249
+#define PCI_DEVICE_ABR_224a	0x224a
+
+#define PCI_DEVICE_KNC_2250	0x2250
+#define PCI_DEVICE_KNC_2251	0x2251
+#define PCI_DEVICE_KNC_2252	0x2252
+#define PCI_DEVICE_KNC_2253	0x2253
+#define PCI_DEVICE_KNC_2254	0x2254
+#define PCI_DEVICE_KNC_2255	0x2255
+#define PCI_DEVICE_KNC_2256	0x2256
+#define PCI_DEVICE_KNC_2257	0x2257
+#define PCI_DEVICE_KNC_2258	0x2258
+#define PCI_DEVICE_KNC_2259	0x2259
+#define PCI_DEVICE_KNC_225a	0x225a
+
+#define PCI_DEVICE_KNC_225b	0x225b
+#define PCI_DEVICE_KNC_225c	0x225c
+#define PCI_DEVICE_KNC_225d	0x225d
+#define PCI_DEVICE_KNC_225e	0x225e
+
+#define MIC_CMDLINE_BUFSIZE	1024
+#define RESET_FAIL_TIME		300
+
+/* Masks for sysfs entries */
+#ifdef CONFIG_ML1OM
+#define MASK_COREVOLT 0xff
+#define MASK_COREFREQ 0xfff
+#endif
+#define MASK_MEMVOLT 0xff
+#define MASK_MEMORYFREQ 0xff
+#define MASK_MEMSIZE 0x1fffffff
+#define MASK_FLASHVERSION 0xffff
+#define MASK_SUBSTEPPING_DATA 0xf
+#define MASK_STEPPING_DATA 0xf
+#define MASK_MODEL 0xf
+#define MASK_FAMILY_DATA 0xf
+#define MASK_PROCESSOR 0x3
+#define MASK_PLATFORM 0x3
+#define MASK_EXTENDED_MODEL 0xf
+#define MASK_EXTENDED_FAMILY 0xff
+#define MASK_FUSE_CONFIG_REV 0x3ff
+#define MASK_ACTIVE_CORES 0x3f
+#define MASK_FAIL_SAFE 0xffffffff
+#define MASK_FLASH_UPDATE 0xffffffff
+/* Shifts for sysfs entries */
+#ifdef CONFIG_ML1OM
+#define SHIFT_COREVOLT 0
+#define SHIFT_COREFREQ 0
+#endif
+#define SHIFT_MEMVOLT 0
+#define SHIFT_MEMORYFREQ 0
+#define SHIFT_MEMSIZE 3
+#define SHIFT_FLASHVERSION 16
+#define SHIFT_SUBSTEPPING_DATA 0
+#define SHIFT_STEPPING_DATA 4
+#define SHIFT_MODEL 8
+#define SHIFT_FAMILY_DATA 12
+#define SHIFT_PROCESSOR 16
+#define SHIFT_PLATFORM 18
+#define SHIFT_EXTENDED_MODEL 20
+#define SHIFT_EXTENDED_FAMILY 24
+#define SHIFT_FUSE_CONFIG_REV 0
+#define SHIFT_ACTIVE_CORES 10
+#define SHIFT_FAIL_SAFE 0
+#define SHIFT_FLASH_UPDATE 0
+
+#define SKU_NAME_LEN	20
+
+/* Should be updated to reflect the latest interface version in sysfs and wmi property */
+#define LINUX_INTERFACE_VERSION "1.0"
+#define WINDOWS_INTERFACE_VERSION "1.0"
+
+typedef enum mic_modes
+{
+	MODE_NONE,
+	MODE_LINUX,
+	MODE_ELF,
+	MODE_FLASH
+} MIC_MODES;
+
+typedef enum mic_status
+{
+	MIC_READY,
+	MIC_BOOT,
+	MIC_NORESPONSE,
+	MIC_BOOTFAIL,
+	MIC_ONLINE,
+	MIC_SHUTDOWN,
+	MIC_LOST,
+	MIC_RESET,
+	MIC_RESETFAIL,
+	MIC_INVALID
+} MIC_STATUS;
+
+typedef enum _product_platform_t
+{
+	PLATFORM_SILICON = 0,
+	PLATFORM_EMULATOR = 2,
+}product_platform_t;
+
+
+typedef enum _platform_resource_type
+{
+	PCI_APERTURE,
+	MMIO,
+	MAX_RESOURCE_TYPE
+}platform_resource_type;
+
+typedef struct _platform_resource_t
+{
+	uint8_t* va; // mapped by driver
+	uint64_t pa; // from PCI config space
+	uint64_t len;// from PCI config space
+}platform_resource_t;
+
+
+typedef struct micscifhost_info {
+	dma_addr_t	si_pa;
+	struct delayed_work	si_bs_check;
+	uint32_t		si_bs_wait_count;
+} scifhost_info_t;
+
+#define MIC_NUM_DB 4
+typedef struct mic_irq {
+	spinlock_t		mi_lock;
+	struct list_head	mi_dblist[MIC_NUM_DB];  // The 4 doorbell interrupts.
+	atomic_t		mi_received;
+} mic_irq_t;
+
+typedef struct sysfs_info {
+	char	*cmdline;
+	char	*kernel_cmdline;
+} sysfs_info_t;
+
+typedef struct pm_recv_msg {
+	struct list_head msg;
+	pm_msg_header msg_header;
+	void * msg_body;
+} pm_recv_msg_t;
+
+typedef struct pm_wq {
+	struct workqueue_struct	*wq;
+	struct work_struct		work;
+	char			wq_name[20];
+} pm_wq_t;
+
+/*
+ * Driver wide power management context
+ * common power management context for all the devices
+ */
+typedef struct micscif_pm {
+	scif_epd_t 		epd;
+	atomic_t		connected_clients;
+	pm_wq_t			accept;
+	struct mutex		pm_accept_mutex;
+	struct mutex		pm_idle_mutex;
+	struct dentry		*pmdbgparent_dir;
+	uint32_t		enable_pm_logging;
+	atomic_t		wakeup_in_progress;
+	uint8_t			*nodemask;
+	uint32_t		nodemask_len;
+} micscif_pm_t;
+
+/* per device power management context */
+typedef struct micpm_ctx
+{
+	scif_epd_t		pm_epd;
+	PM_IDLE_STATE		idle_state;
+	struct mutex		msg_mutex;
+	struct list_head	msg_list;
+	uint32_t		pc6_timeout;
+	struct work_struct		pm_close;
+	MIC_STATUS		mic_suspend_state;
+	bool			pc3_enabled;
+	bool			pc6_enabled;
+	pm_msg_pm_options	pm_options;
+	atomic_t		pm_ref_cnt;
+	platform_resource_t	nodemask;
+	pm_wq_t			recv;
+	pm_wq_t			handle_msg;
+	pm_wq_t			resume;
+	struct workqueue_struct	*pc6_entry_wq;
+	struct delayed_work	pc6_entry_work;
+	char			pc6_wq_name[20];
+	struct dentry		*pmdbg_dir;
+	PM_CONNECTION_STATE con_state;
+	wait_queue_head_t	disc_wq;
+} micpm_ctx_t;
+
+typedef struct _mic_ctx_t {
+	platform_resource_t	mmio;
+	platform_resource_t	aper;
+	uint32_t		apic_id;
+	uint32_t		msie;
+	ringbuffer		ringbuff[MIC_ENG_MAX_SUPPORTED_ENGINES];
+	uint32_t rb_readoff __attribute__((aligned(64)));
+	micpm_ctx_t		micpm_ctx;
+	CARD_USAGE_MODE		card_usage_mode;
+	uint64_t		adptr_base_pa;
+
+	int32_t			bi_id;
+	mic_irq_t		bi_irq;
+	struct tasklet_struct		bi_dpc;
+	scifhost_info_t		bi_scif;
+#ifdef USE_VCONSOLE
+	micvcons_t		bi_vcons;
+#endif
+	void			*bi_vethinfo;
+	struct mic_psmi_ctx	bi_psmi;
+	struct pci_dev		*bi_pdev;
+
+	MIC_STATUS		state;
+	struct mutex		state_lock;
+	MIC_MODES		mode;
+	wait_queue_head_t	resetwq;
+	char			*image;
+	char			*initramfs;
+	struct timer_list	boot_timer;
+	unsigned long	boot_start;
+	struct work_struct		boot_ws;
+
+	struct workqueue_struct	*resetworkq;
+	struct work_struct		resetwork;
+	struct workqueue_struct	*ioremapworkq;
+	struct work_struct		ioremapwork;
+	wait_queue_head_t	ioremapwq;
+	uint32_t		reset_count;
+
+	atomic_t		bi_irq_received;
+	uint8_t			bi_stepping;
+	uint8_t			bi_substepping;
+	product_platform_t	bi_platform;
+	product_family_t	bi_family;
+	struct board_info	*bd_info;
+	sysfs_info_t		sysfs_info;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,14,0))
+	struct kernfs_node	*sysfs_state;
+#else
+	struct sysfs_dirent	*sysfs_state;
+#endif
+	spinlock_t		sysfs_lock;
+	mic_dma_handle_t	dma_handle;
+	uint32_t		boot_mem;
+	mic_smpt_t		*mic_smpt;
+	spinlock_t		smpt_lock;
+	uint32_t		sdbic1;
+	int64_t			etc_comp;
+	spinlock_t		ramoops_lock;
+	void			*ramoops_va[2];
+	int			ramoops_size;
+	dma_addr_t	ramoops_pa[2];
+	struct proc_dir_entry	*ramoops_dir;
+	struct proc_dir_entry	*vmcore_dir;
+	/*
+	 * List representing chunks of contiguous memory areas and
+	 * their offsets in vmcore file.
+	 */
+	struct list_head	vmcore_list;
+	/* Stores the pointer to the buffer containing kernel elf core headers */
+	char			*elfcorebuf;
+	size_t			elfcorebuf_sz;
+	/* Total size of vmcore file. */
+	uint64_t		vmcore_size;
+	int			crash_count;
+	int			boot_count;
+	void			*log_buf_addr;
+	int			*log_buf_len;
+	char			sku_name[SKU_NAME_LEN];
+	atomic_t		disconn_rescnt;
+	atomic_t		gate_interrupt;
+	uint16_t            numa_node;
+} mic_ctx_t;
+
+
+typedef struct mic_irqhander {
+	int (*ih_func)(mic_ctx_t *mic_ctx, int doorbell);
+	struct list_head ih_list;
+	char *ih_idstring;
+} mic_irqhandler_t;
+
+/* SKU related definitions and declarations */
+#define MAX_DEV_IDS 16
+typedef struct sku_info {
+	uint32_t fuserev_low;
+	uint32_t fuserev_high;
+	uint32_t memsize;
+	uint32_t memfreq;
+	char sku_name[SKU_NAME_LEN];
+	struct list_head sku;
+} sku_info_t;
+
+int sku_create_node(uint32_t fuserev_low,
+		uint32_t fuserev_high, uint32_t mem_size,
+		uint32_t mem_freq, char *sku_name,
+		sku_info_t ** newnode);
+
+int sku_build_table(void);
+void sku_destroy_table(void);
+int sku_find(mic_ctx_t *mic_ctx, uint32_t device_id);
+
+/* End SKU related definitions and declarations */
+
+#define MIC_NUM_MSIX_ENTRIES 1
+typedef struct mic_data {
+	int32_t			dd_numdevs;
+	int32_t			dd_inuse;
+#ifdef USE_VCONSOLE
+	micvcons_port_t		dd_ports[MAX_BOARD_SUPPORTED];
+#endif
+	struct board_info	*dd_bi[MAX_BOARD_SUPPORTED];
+	struct list_head	dd_bdlist;
+	micscif_pm_t		dd_pm;
+	uint64_t		sysram;
+	struct fasync_struct	*dd_fasync;
+	struct list_head	sku_table[MAX_DEV_IDS];
+} mic_data_t;
+
+#include "mic_interrupts.h"
+extern mic_data_t mic_data;
+extern struct micscif_dev scif_dev[];
+
+typedef struct acptboot_data {
+	scif_epd_t		listen_epd;
+	uint16_t		acptboot_pn;
+	struct workqueue_struct	*acptbootwq;
+	struct work_struct		acptbootwork;
+}acptboot_data_t;
+
+void acptboot_exit(void);
+int acptboot_init(void);
+void adapter_init(void);
+int adapter_isr(mic_ctx_t *mic_ctx);
+int adapter_imsr(mic_ctx_t *mic_ctx);
+int adapter_remove(mic_ctx_t *mic_ctx);
+int adapter_do_ioctl(uint32_t cmd, uint64_t arg);
+int adapter_stop_device(mic_ctx_t *mic_ctx, int wait_reset, int reattempt);
+int adapter_shutdown_device(mic_ctx_t *mic_ctx);
+void calculate_etc_compensation(mic_ctx_t *mic_ctx);
+int adapter_probe(mic_ctx_t *mic_ctx);
+int adapter_post_boot_device(mic_ctx_t *mic_ctx);
+int adapter_start_device(mic_ctx_t *mic_ctx);
+int adapter_restart_device(mic_ctx_t *mic_ctx);
+int adapter_init_device(mic_ctx_t *mic_ctx);
+int pm_adapter_do_ioctl(mic_ctx_t *mic_ctx, void *in_buffer);
+int adapter_reset_depgraph(mic_ctx_t *mic_ctx);
+
+/*
+ * RESET_WAIT : launch the timer thread and wait for reset to complete
+ * 		The caller has to add itself to the resetwq by calling wait_for_reset
+ * RESET_REATTEMPT : Reattempt reset after detecting failures in reset
+ */
+#define RESET_WAIT	1
+#define RESET_REATTEMPT	1
+void adapter_reset(mic_ctx_t *mic_ctx, int wait_reset, int reattempt);
+
+void adapter_wait_reset(mic_ctx_t *mic_ctx);
+void get_adapter_memsize(uint8_t *mmio_va, uint32_t *adapter_mem_size);
+int wait_for_bootstrap(uint8_t *mmio_va);
+void post_boot_startup(struct work_struct *work);
+void attempt_reset(struct work_struct *work);
+
+int send_uos_escape(mic_ctx_t *mic_ctx, uint32_t uos_op,
+		    uint32_t data_size, void *escape_data);
+int boot_linux_uos(mic_ctx_t *mic_ctx, char *imgname, char *initramfsname);
+
+int boot_micdev_app(mic_ctx_t *mic_ctx, char *imgname);
+int allocate_tools_buffer(mic_ctx_t *mic_ctx, uint32_t databuf_size,
+			  uint32_t stsbuf_size, uint64_t *gddr_data_ptr,
+			  uint64_t *gddr_stsbuf_ptr);
+
+int micpm_init(void);
+void micpm_uninit(void);
+int micpm_stop(mic_ctx_t *mic_ctx);
+int micpm_start(mic_ctx_t *mic_ctx);
+int micpm_probe(mic_ctx_t *mic_ctx);
+int micpm_remove(mic_ctx_t *mic_ctx);
+void micpm_nodemask_uninit(mic_ctx_t* mic_ctx);
+int micpm_nodemask_init(uint32_t num_devs, mic_ctx_t* mic_ctx);
+int micpm_disconn_init(uint32_t num_nodes);
+int micpm_disconn_uninit(uint32_t num_nodes);
+int micpm_dbg_init(mic_ctx_t *mic_ctx);
+void micpm_dbg_parent_init(void);
+int pm_reg_read(mic_ctx_t *mic_ctx, uint32_t regoffset);
+int micpm_update_pc6(mic_ctx_t *mic_ctx, bool set);
+int micpm_update_pc3(mic_ctx_t *mic_ctx, bool set);
+int pm_start_device(mic_ctx_t *mic_ctx);
+int pm_stop_device(mic_ctx_t *mic_ctx);
+int mic_pm_recv(mic_ctx_t *mic_ctx, void *msg, uint32_t len);
+int mic_pm_send_msg(mic_ctx_t *mic_ctx, PM_MESSAGE type,
+		void *msg, uint32_t len);
+
+int pm_pc3_entry(mic_ctx_t *mic_ctx);
+int pm_pc3_exit(mic_ctx_t *mic_ctx);
+int do_idlestate_entry(mic_ctx_t *mic_ctx);
+int do_idlestate_exit(mic_ctx_t *mic_ctx, bool get_ref);
+int is_idlestate_exit_needed(mic_ctx_t *mic_ctx);
+uint32_t mic_get_scifnode_id(mic_ctx_t *mic_ctx);
+
+mic_ctx_t* get_per_dev_ctx(uint16_t node);
+int get_num_devs(mic_ctx_t *mic_ctx, uint32_t *num_devs);
+
+
+void adapter_uninit(void);
+void adapter_add(mic_ctx_t *mic_ctx);
+void adapter_start(mic_ctx_t *mic_ctx);
+int send_flash_cmd(mic_ctx_t *mic_ctx, MIC_FLASH_CMD_TYPE type, void *data,
+											uint32_t len);
+int cmdline_mem(mic_ctx_t *mic_ctx, uint32_t mem);
+int get_cardside_mem(mic_ctx_t *mic_ctx, uint64_t start, uint64_t size, void *dest);
+
+int mic_pin_user_pages (void *data, struct page **pages, uint32_t len, int32_t *nf_pages, int32_t nr_pages);
+int mic_unpin_user_pages(struct page **pages, uint32_t nf_pages);
+product_family_t get_product_family(uint32_t device_id);
+void show_stepping_comm(mic_ctx_t *mic_ctx,char *buf);
+void micscif_destroy_p2p(mic_ctx_t *mic_ctx);
+
+#ifdef HOST
+void mic_smpt_init(mic_ctx_t *mic_ctx);
+void mic_smpt_restore(mic_ctx_t *mic_ctx);
+#endif
+void mic_smpt_uninit(mic_ctx_t *mic_ctx);
+int mic_dma_init(void);
+
+#ifndef _MIC_SCIF_
+static __always_inline int micpm_get_reference(mic_ctx_t *mic_ctx, bool force_wakeup) {
+	int err;
+	if (!mic_ctx)
+		return -EINVAL;
+
+	if (mic_ctx->micpm_ctx.idle_state == PM_IDLE_STATE_LOST)
+		return -ENODEV;
+
+	if (unlikely(!atomic_add_unless(&mic_ctx->micpm_ctx.pm_ref_cnt, 
+		1, PM_NODE_IDLE))) {
+		if (!force_wakeup) {
+			if (is_idlestate_exit_needed(mic_ctx)) {
+				return -EAGAIN;
+			}
+		}
+
+		if ((err = micscif_connect_node(mic_get_scifnode_id(mic_ctx), true)) != 0)
+			return -ENODEV;
+	}
+	return 0;
+}
+#endif
+
+static __always_inline int micpm_put_reference(mic_ctx_t *mic_ctx) {
+	int ret;
+
+	if(!mic_ctx)
+		return -EINVAL;
+
+	if (mic_ctx->micpm_ctx.idle_state == PM_IDLE_STATE_LOST)
+		return -ENODEV;
+
+	if (unlikely((ret = atomic_sub_return(1, 
+			&mic_ctx->micpm_ctx.pm_ref_cnt)) < 0)) {
+		printk(KERN_ERR "%s %d Invalid PM ref_cnt %d \n", 
+			__func__, __LINE__, atomic_read(&mic_ctx->micpm_ctx.pm_ref_cnt));
+	}
+
+	return 0;
+
+}
+
+static __always_inline int
+mic_hw_family(int node_id) {
+	mic_ctx_t *mic_ctx;
+
+	/* For Host Loopback */
+	if (!node_id)
+		return -EINVAL;
+
+	mic_ctx = get_per_dev_ctx(node_id - 1);
+	return mic_ctx->bi_family;
+}
+
+static __always_inline void
+wait_for_reset(mic_ctx_t *mic_ctx)
+{
+	int ret = 0;
+	while (!ret) {
+	ret = wait_event_timeout(mic_ctx->resetwq, 
+			mic_ctx->state != MIC_RESET, RESET_FAIL_TIME * HZ);
+	}
+}
+
+/* Called only by host PM suspend */
+static __always_inline int
+wait_for_shutdown_and_reset(mic_ctx_t *mic_ctx)
+{
+	int ret;
+	ret = wait_event_interruptible_timeout(mic_ctx->resetwq, 
+		mic_ctx->state != MIC_RESET && mic_ctx->state != MIC_SHUTDOWN, 
+		RESET_FAIL_TIME * HZ);
+	return ret;
+}
+
+static __always_inline void
+mic_signal_daemon(void)
+{
+	if (mic_data.dd_fasync != NULL)
+		kill_fasync(&mic_data.dd_fasync, SIGIO, POLL_IN);
+}
+
+extern char *micstates[];
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+#define __mic_create_singlethread_workqueue(name)	alloc_ordered_workqueue(name, 0)
+#else
+#define __mic_create_singlethread_workqueue(name)	create_singlethread_workqueue(name)
+#endif
+
+static __always_inline void
+mic_setstate(mic_ctx_t *mic_ctx, enum mic_status newstate)
+{
+	printk("mic%d: Transition from state %s to %s\n", mic_ctx->bi_id, 
+		    micstates[mic_ctx->state], micstates[newstate]);
+	mic_ctx->state = newstate;
+	spin_lock_bh(&mic_ctx->sysfs_lock);
+	if (mic_ctx->sysfs_state)
+		sysfs_notify_dirent(mic_ctx->sysfs_state);
+	spin_unlock_bh(&mic_ctx->sysfs_lock);
+}
+
+#define MICREG_POSTCODE 0x242c
+
+static __always_inline uint32_t
+mic_getpostcode(mic_ctx_t *mic_ctx)
+{
+        return DBOX_READ(mic_ctx->mmio.va, MICREG_POSTCODE);
+}
+
+static __always_inline int
+mic_hw_stepping(int node_id) {
+	mic_ctx_t *mic_ctx;
+
+	/* For Host Loopback */
+	if (!node_id)
+		return -EINVAL;
+
+	mic_ctx = get_per_dev_ctx(node_id - 1);
+	return mic_ctx->bi_stepping;
+}
+
+#define MIC_IRQ_DB0	0
+#define MIC_IRQ_DB1	1
+#define MIC_IRQ_DB2	2
+#define MIC_IRQ_DB3	3
+#define MIC_IRQ_MAX	MIC_IRQ_DB3
+
+int mic_reg_irqhandler(mic_ctx_t *mic_ctx, int doorbell, char *idstring,
+		       int (*irqfunc)(mic_ctx_t *mic_ctx, int doorbell));
+int mic_unreg_irqhandler(mic_ctx_t *mic_ctx, int doorbell, char *idstring);
+void mic_enable_interrupts(mic_ctx_t *mic_ctx);
+void mic_disable_interrupts(mic_ctx_t *mic_ctx);
+void mic_enable_msi_interrupts(mic_ctx_t *mic_ctx);
+
+int micscif_init(void);
+void micscif_destroy(void);
+void micscif_probe(mic_ctx_t *mic_ctx);
+void micscif_remove(mic_ctx_t *mic_ctx);
+void micscif_start(mic_ctx_t *mic_ctx);
+void micscif_stop(mic_ctx_t *mic_ctx);
+
+mic_ctx_t *get_device_context(struct pci_dev *dev);
+void ramoops_exit(void);
+void vmcore_exit(void);
+int vmcore_create(mic_ctx_t *mic_ctx);
+void vmcore_remove(mic_ctx_t *mic_ctx);
+
+// loads file into memory
+int mic_get_file_size(const char *path, uint32_t *file_length);
+int mic_load_file(const char *fn, uint8_t *buffer, uint32_t max_size);
+#ifndef _MIC_SCIF_
+void mic_debug_init(mic_ctx_t *mic_ctx);
+#endif
+void mic_debug_uninit(void);
+void
+set_pci_aperture(mic_ctx_t *mic_ctx, uint32_t gtt_index, uint64_t phy_addr, uint32_t num_bytes);
+#ifdef __cplusplus
+};
+#endif
+
+#endif // __MIC_COMMON_H
+
diff --git a/include/mic_interrupts.h b/include/mic_interrupts.h
new file mode 100644
index 0000000..b3c6b60
--- /dev/null
+++ b/include/mic_interrupts.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic_common.h"
+
+/* vnet/mic_shutdown/hvc/virtio */
+#define VNET_SBOX_INT_IDX	0
+#define MIC_SHT_SBOX_INT_IDX	1
+#define HVC_SBOX_INT_IDX	2
+#define VIRTIO_SBOX_INT_IDX	3
+#define PM_SBOX_INT_IDX		4
+
+#define MIC_BSP_INTERRUPT_VECTOR 229	// Host->Card(bootstrap) Interrupt Vector#
+/*
+ * Current usage of MIC interrupts:
+ * APICICR1 - mic shutdown interrupt
+ * APCICR0 - rest
+ *
+ * Planned Usage:
+ * SCIF - rdmasrs
+ * vnet/hvc/virtio - APICICR0
+ * mic shutdown interrupt - APICICR1
+ */
+static void __mic_send_intr(mic_ctx_t *mic_ctx, int i)
+{
+	uint32_t apicicr_low;
+	uint64_t apic_icr_offset = SBOX_APICICR0 + i * 8;
+
+	apicicr_low = SBOX_READ(mic_ctx->mmio.va, apic_icr_offset);
+	/* for KNC we need to make sure we "hit" the send_icr bit (13) */
+	if (mic_ctx->bi_family == FAMILY_KNC)
+		apicicr_low = (apicicr_low | (1 << 13));
+
+	/* MIC card only triggers when we write the lower part of the
+	 * address (upper bits)
+	 */
+	SBOX_WRITE(apicicr_low, mic_ctx->mmio.va, apic_icr_offset);
+}
+
+static inline void mic_send_vnet_intr(mic_ctx_t *mic_ctx)
+{
+	__mic_send_intr(mic_ctx, VNET_SBOX_INT_IDX);
+}
+
+static inline void mic_send_hvc_intr(mic_ctx_t *mic_ctx)
+{
+	__mic_send_intr(mic_ctx, HVC_SBOX_INT_IDX);
+}
+
+static inline void mic_send_scif_intr(mic_ctx_t *mic_ctx)
+{
+	__mic_send_intr(mic_ctx, 0);
+}
+
+static inline void mic_send_virtio_intr(mic_ctx_t *mic_ctx)
+{
+	__mic_send_intr(mic_ctx, VIRTIO_SBOX_INT_IDX);
+}
+
+static inline void mic_send_sht_intr(mic_ctx_t *mic_ctx)
+{
+	__mic_send_intr(mic_ctx, 1);
+}
+
+static inline void mic_send_pm_intr(mic_ctx_t *mic_ctx)
+{
+	__mic_send_intr(mic_ctx, PM_SBOX_INT_IDX);
+}
+
+static inline void mic_send_bootstrap_intr(mic_ctx_t *mic_ctx)
+{
+	uint32_t apicicr_low;
+	uint64_t apic_icr_offset = SBOX_APICICR7;
+	int vector = MIC_BSP_INTERRUPT_VECTOR;
+
+	if (mic_ctx->bi_family == FAMILY_ABR){
+		apicicr_low = vector;
+	} else {
+		/* for KNC we need to make sure we "hit" the send_icr bit (13) */
+		apicicr_low = (vector | (1 << 13));
+	}
+
+	SBOX_WRITE(mic_ctx->apic_id, mic_ctx->mmio.va, apic_icr_offset + 4);
+	// MIC card only triggers when we write the lower part of the address (upper bits)
+	SBOX_WRITE(apicicr_low, mic_ctx->mmio.va, apic_icr_offset);
+}
diff --git a/include/micint.h b/include/micint.h
new file mode 100644
index 0000000..bf3f095
--- /dev/null
+++ b/include/micint.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICINT_H
+#define MICINT_H
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/fs.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/capability.h>
+#include <linux/uio.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/version.h>
+#include <asm/io.h>
+#include <asm/ioctl.h>
+#include <asm/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/rtnetlink.h>
+#include <linux/pm.h>
+#include <linux/notifier.h>
+#include <linux/suspend.h>
+#include <linux/ctype.h>
+#include <linux/sysfs.h>
+
+#include "mic_common.h"
+#include <mic/micscif.h>
+
+#define MAX_DLDR_MINORS		68
+typedef struct mic_lindata {
+    dev_t			dd_dev;
+	struct cdev		dd_cdev;
+	struct device		*dd_hostdev;
+	struct device		*dd_scifdev;
+	struct class		*dd_class;
+	struct pci_driver	dd_pcidriver;
+}mic_lindata_t;
+
+typedef struct board_info {
+	struct device		*bi_sysfsdev;
+#ifdef CONFIG_PCI_MSI
+	struct msix_entry 	bi_msix_entries[MIC_NUM_MSIX_ENTRIES];
+#endif
+#ifdef USE_VCONSOLE
+	micvcons_port_t		*bi_port;
+#endif
+	void			*bi_virtio;  /* for virtio */
+
+	struct list_head	bi_list;
+	mic_ctx_t 		bi_ctx;
+} bd_info_t;
+
+extern mic_lindata_t mic_lindata;
+
+#ifdef USE_VCONSOLE
+int micvcons_create(int num_bds);
+void micvcons_destroy(int num_bds);
+#endif
+
+int micpm_suspend(struct device *pdev);
+int micpm_resume(struct device *pdev);
+int micpm_suspend_noirq(struct device *pdev);
+int micpm_resume_noirq(struct device *pdev);
+int micpm_notifier_block(struct notifier_block *nb, unsigned long event, void *dummy);
+irqreturn_t mic_irq_isr(int irq, void *data);
+
+int mic_psmi_init(mic_ctx_t *mic_ctx);
+void mic_psmi_uninit(mic_ctx_t *mic_ctx);
+
+void set_sysfs_entries(mic_ctx_t *mic_ctx);
+void free_sysfs_entries(mic_ctx_t *mic_ctx);
+#endif // MICINT_H
diff --git a/include/scif.h b/include/scif.h
new file mode 100644
index 0000000..934bc82
--- /dev/null
+++ b/include/scif.h
@@ -0,0 +1,1743 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * Revised 15:05 11/24/2010
+ * Derived from SCIF SAS v0.41 with additional corrections
+ */
+
+#ifndef __SCIF_H__
+#define __SCIF_H__
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/poll.h>
+#include <linux/pci.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SCIF_ACCEPT_SYNC	1
+#define SCIF_SEND_BLOCK		1
+#define SCIF_RECV_BLOCK		1
+
+/* Start: Deprecated Temporary definition for compatability */
+#define ACCEPT_SYNC		SCIF_ACCEPT_SYNC
+#define SEND_BLOCK		SCIF_SEND_BLOCK
+#define RECV_BLOCK		SCIF_RECV_BLOCK
+/* End: Deprecated Temporary definition for compatability */
+
+enum {
+	SCIF_PROT_READ  = (1<<0),
+	SCIF_PROT_WRITE = (1<<1)
+};
+
+/* 0x40 is used internally by scif */
+enum {
+	SCIF_MAP_FIXED = 0x10,
+	SCIF_MAP_KERNEL	= 0x20,
+};
+
+enum {
+	SCIF_FENCE_INIT_SELF = (1<<0),
+	SCIF_FENCE_INIT_PEER = (1<<1)
+};
+
+enum {
+	SCIF_FENCE_RAS_SELF = (1<<2),
+	SCIF_FENCE_RAS_PEER = (1<<3)
+};
+
+enum {
+	SCIF_SIGNAL_LOCAL = (1<<4),
+	SCIF_SIGNAL_REMOTE = (1<<5)
+};
+
+#define SCIF_RMA_USECPU     1
+#define SCIF_RMA_USECACHE   (1<<1)
+#define SCIF_RMA_SYNC       (1<<2)
+#define SCIF_RMA_ORDERED    (1<<3)
+//! @cond (Prevent doxygen from including these)
+#define SCIF_POLLIN		POLLIN
+#define SCIF_POLLOUT		POLLOUT
+#define SCIF_POLLERR		POLLERR
+#define SCIF_POLLHUP		POLLHUP
+#define SCIF_POLLNVAL		POLLNVAL
+
+/* SCIF Reserved Ports */
+/* COI */
+#define SCIF_COI_PORT_0		40
+#define SCIF_COI_PORT_1		41
+#define SCIF_COI_PORT_2		42
+#define SCIF_COI_PORT_3		43
+#define SCIF_COI_PORT_4		44
+#define SCIF_COI_PORT_5		45
+#define SCIF_COI_PORT_6		46
+#define SCIF_COI_PORT_7		47
+#define SCIF_COI_PORT_8		48
+#define SCIF_COI_PORT_9		49
+
+/* OFED */
+#define SCIF_OFED_PORT_0	60
+#define SCIF_OFED_PORT_1	61
+#define SCIF_OFED_PORT_2	62
+#define SCIF_OFED_PORT_3	63
+#define SCIF_OFED_PORT_4	64
+#define SCIF_OFED_PORT_5	65
+#define SCIF_OFED_PORT_6	66
+#define SCIF_OFED_PORT_7	67
+#define SCIF_OFED_PORT_8	68
+#define SCIF_OFED_PORT_9	69
+
+/* NETDEV */
+#define SCIF_NETDEV_PORT_0	80
+#define SCIF_NETDEV_PORT_1	81
+#define SCIF_NETDEV_PORT_2	82
+#define SCIF_NETDEV_PORT_3	83
+#define SCIF_NETDEV_PORT_4	84
+#define SCIF_NETDEV_PORT_5	85
+#define SCIF_NETDEV_PORT_6	86
+#define SCIF_NETDEV_PORT_7	87
+#define SCIF_NETDEV_PORT_8	88
+#define SCIF_NETDEV_PORT_9	89
+
+/* RAS */
+#define SCIF_RAS_PORT_0		100
+#define SCIF_RAS_PORT_1		101
+#define SCIF_RAS_PORT_2		102
+#define SCIF_RAS_PORT_3		103
+#define SCIF_RAS_PORT_4		104
+#define SCIF_RAS_PORT_5		105
+#define SCIF_RAS_PORT_6		106
+#define SCIF_RAS_PORT_7		107
+#define SCIF_RAS_PORT_8		108
+#define SCIF_RAS_PORT_9		109
+
+/* Power Management */
+#define SCIF_PM_PORT_0		120
+#define SCIF_PM_PORT_1		121
+#define SCIF_PM_PORT_2		122
+#define SCIF_PM_PORT_3		123
+#define SCIF_PM_PORT_4		124
+#define SCIF_PM_PORT_5		125
+#define SCIF_PM_PORT_6		126
+#define SCIF_PM_PORT_7		127
+#define SCIF_PM_PORT_8		128
+#define SCIF_PM_PORT_9		129
+
+/* Board Tools */
+#define SCIF_BT_PORT_0		130
+#define SCIF_BT_PORT_1		131
+#define SCIF_BT_PORT_2		132
+#define SCIF_BT_PORT_3		133
+#define SCIF_BT_PORT_4		134
+#define SCIF_BT_PORT_5		135
+#define SCIF_BT_PORT_6		136
+#define SCIF_BT_PORT_7		137
+#define SCIF_BT_PORT_8		138
+#define SCIF_BT_PORT_9		139
+
+/* MIC Boot/Configuration support */
+#define MPSSD_MONRECV		160
+#define MIC_NOTIFY		161
+#define MPSSD_CRED		162
+#define MPSSD_MONSEND		163
+#define MPSSD_MICCTRL		164
+#define MPSSD_RESV5		165
+#define MPSSD_RESV6		166
+#define MPSSD_RESV7		167
+#define MPSSD_RESV8		168
+#define MPSSD_RESV9		169
+
+#define SCIF_ADMIN_PORT_END	1024
+
+/* MYO */
+#define SCIF_MYO_PORT_0		1025
+#define SCIF_MYO_PORT_1		1026
+#define SCIF_MYO_PORT_2		1027
+#define SCIF_MYO_PORT_3		1028
+#define SCIF_MYO_PORT_4		1029
+#define SCIF_MYO_PORT_5		1030
+#define SCIF_MYO_PORT_6		1031
+#define SCIF_MYO_PORT_7		1032
+#define SCIF_MYO_PORT_8		1033
+#define SCIF_MYO_PORT_9		1034
+
+/* SSG Tools */
+#define SCIF_ST_PORT_0		1044
+#define SCIF_ST_PORT_1		1045
+#define SCIF_ST_PORT_2		1046
+#define SCIF_ST_PORT_3		1047
+#define SCIF_ST_PORT_4		1048
+#define SCIF_ST_PORT_5		1049
+#define SCIF_ST_PORT_6		1050
+#define SCIF_ST_PORT_7		1051
+#define SCIF_ST_PORT_8		1052
+#define SCIF_ST_PORT_9		1053
+
+/* End of SCIF Reserved Ports */
+#define SCIF_PORT_RSVD		1088
+//! @endcond
+
+typedef struct endpt *scif_epd_t;
+
+typedef struct scif_pinned_pages *scif_pinned_pages_t;
+
+struct scif_range {
+	void *cookie;		/* cookie */
+	int nr_pages;		/* Number of Pages */
+	int prot_flags;		/* R/W protection */
+	/* Arrays phys_addr/va below are virtually contiguous */
+	dma_addr_t *phys_addr;	/* Array of physical addresses */
+	void **va;		/* Array of virtual addresses
+				 * and populated only when called
+				 * on the host for a remote SCIF
+				 * connection on MIC.
+				 */
+};
+
+struct scif_pollepd {
+	scif_epd_t epd;   /* endpoint descriptor */
+	short events;     /* requested events */
+	short revents;    /* returned events */
+};
+enum scif_event_type {
+	SCIF_NODE_ADDED = 1<<0,
+	SCIF_NODE_REMOVED = 1<<1
+};
+
+union eventd {
+       uint16_t scif_node_added;
+       uint16_t scif_node_removed;
+};
+
+typedef void (*scif_callback_t)(enum scif_event_type event, union eventd
+data);
+
+struct scif_callback {
+	struct list_head	list_member;
+	scif_callback_t		callback_handler;
+};
+
+#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
+#define SCIF_REGISTER_FAILED ((off_t)-1)
+#define SCIF_MMAP_FAILED ((void *)-1)
+
+struct scif_portID {
+	uint16_t node; /* node on which port resides */
+	uint16_t port; /* Local port number */
+};
+
+/* Start: Deprecated Temporary definition for compatability */
+#define portID scif_portID
+typedef struct portID portID_t;
+/* End: Deprecated Temporary definition for compatability */
+
+/**
+ * scif_open - Create an endpoint
+ *
+ *\return
+ * The scif_open() function creates a new endpoint.
+ *
+ * Upon successful completion, scif_open() returns an endpoint descriptor to
+ * be used in subsequent SCIF functions calls to refer to that endpoint;
+ * otherwise: in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
+ * returned and errno is set to indicate the error; in kernel mode a NULL
+ * scif_epd_t is returned.
+ *
+ *\par Errors:
+ *- ENOMEM
+ * - Insufficient kernel memory was available.
+ *- ENXIO
+ * - Version mismatch between micscif driver and libscif.
+ */
+scif_epd_t scif_open(void);
+
+/**
+ * scif _bind - Bind an endpoint to a port
+ *	\param epd			endpoint descriptor
+ *	\param pn			port number
+ *
+ * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
+ * local node. If pn is zero, a port number greater than or equal to
+ * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
+ * exactly one local port. Ports less than 1024 when requested can only be bound
+ * by system (or root) processes or by processes executed by privileged users.
+ *
+ *\return
+ * Upon successful completion, scif_bind() returns the port number to which epd
+ * is bound; otherwise: in user mode -1 is returned and errno is set to
+ * indicate the error; in kernel mode the negative of one of the following
+ * errors is returned.
+ *
+ *\par Errors:
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - The endpoint or the port are already bound.
+ *- EISCONN
+ * - The endpoint is already connected.
+ *- ENOSPC
+ * - No port number available for assignment (when pn==0).
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ *- EACCES
+ * - The port requested is protected and the user is not the superuser.
+*/
+int scif_bind(scif_epd_t epd, uint16_t pn);
+
+/**
+ * scif_listen - Listen for connections on an endpoint
+ *
+ *	\param epd		endpoint descriptor
+ *	\param backlog		maximum pending connection requests
+ *
+ * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
+ * an endpoint that will be used to accept incoming connection requests. Once
+ * so marked, the endpoint is said to be in the listening state and may not be
+ * used as the endpoint of a connection.
+ *
+ * The endpoint, epd, must have been bound to a port.
+ *
+ * The backlog argument defines the maximum length to which the queue of
+ * pending connections for epd may grow.  If a connection request arrives when
+ * the queue is full, the client may receive an error with an indication that
+ * the connection was refused.
+ *
+ *\return
+ * Upon successful completion, scif_listen() returns 0; otherwise: in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ * negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - The endpoint is not bound to a port
+ *- EISCONN
+ * - The endpoint is already connected or listening
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+*/
+int scif_listen(scif_epd_t epd, int backlog);
+
+/**
+ * scif_connect - Initiate a connection on a port
+ *	\param epd		endpoint descriptor
+ *	\param dst		global id of port to which to connect
+ *
+ * The scif_connect() function requests the connection of endpoint epd to remote
+ * port dst. If the connection is successful, a peer endpoint, bound to dst, is
+ * created on node dst.node. On successful return, the connection is complete.
+ *
+ * If the endpoint epd has not already been bound to a port, scif_connect()
+ * will bind it to an unused local port.
+ *
+ * A connection is terminated when an endpoint of the connection is closed,
+ * either explicitly by scif_close(), or when a process that owns one of the
+ * endpoints of a connection is terminated.
+ *
+ *\return
+ * Upon successful completion, scif_connect() returns the port ID to which the
+ * endpoint, epd, is bound; otherwise: in user mode -1 is returned and errno is
+ * set to indicate the error; in kernel mode the negative of one of the
+ * following errors is returned.
+ *
+ *\par Errors:
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNREFUSED
+ * - The destination was not listening for connections or refused the
+ *   connection request.
+ *- EINTR
+ * - Interrupted function
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - dst.port is not a valid port ID
+ *- EISCONN
+ * - The endpoint is already connected
+ *- ENOBUFS
+ * - No buffer space is available
+ *- ENODEV
+ * - The destination node does not exist, or
+ * - The node is lost.
+ *- ENOSPC
+ * - No port number available for assignment (when pn==0).
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ *- EOPNOTSUPP
+ * - The endpoint is listening and cannot be connected
+*/
+int scif_connect(scif_epd_t epd, struct scif_portID *dst);
+
+/**
+ * scif_accept - Accept a connection on an endpoint
+ *	\param epd		endpoint descriptor
+ *	\param peer		global id of port to which connected
+ *	\param newepd		new connected endpoint descriptor
+ *	\param flags		flags
+ *
+ * The scif_accept() call extracts the first connection request on the queue of
+ * pending connections for the port on which epd is listening. scif_accept()
+ * creates a new endpoint, bound to the same port as epd, and allocates a new
+ * SCIF endpoint descriptor, returned in newepd, for the endpoint.  The new
+ * endpoint is connected to the endpoint through which the connection was
+ * requested. epd is unaffected by this call, and remains in the listening
+ * state.
+ *
+ * On successful return, peer holds the global port identifier (node id and
+ * local port number) of the port which requested the connection.
+ *
+ * If the peer endpoint which requested the connection is closed, the endpoint
+ * returned by scif_accept() is closed.
+ *
+ * The number of connections that can (subsequently) be accepted on epd is only
+ * limited by system resources (memory).
+ *
+ * The flags argument is formed by OR'ing together zero or more of the
+ * following values:
+ *- SCIF_ACCEPT_SYNC: block until a connection request is presented. If
+ *		      SCIF_ACCEPT_SYNC is not in flags, and no pending
+ *		      connections are present on the queue, scif_accept()fails
+ *		      with an EAGAIN error
+ *
+ * On Linux in user mode, the select() and poll() functions can be used to
+ * determine when there is a connection request. On Microsoft Windows* and on
+ * Linux in kernel mode, the scif_poll() function may be used for this purpose.
+ * A readable event will be delivered when a connection is requested.
+ *
+ *\return
+ * Upon successful completion, scif_accept() returns 0; otherwise: in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ *  negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EAGAIN
+ * - SCIF_ACCEPT_SYNC is not set and no connections are present to be accepted, or
+ * - SCIF_ACCEPT_SYNC is not set and remote node failed to complete its
+ *   connection request
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- EINTR
+ * - Interrupted function
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - epd is not a listening endpoint
+ * - flags is invalid
+ * - peer is NULL
+ * - newepd is NULL
+ *- ENOBUFS
+ * - No buffer space is available
+ *- ENODEV
+ * - The requesting node is lost.
+ *- ENOMEM
+ * - Not enough space
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ *- ENOENT
+ * - Secondary part of epd registeration failed.
+*/
+int scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t
+*newepd, int flags);
+
+/**
+ * scif_close - Close an endpoint
+ *	\param epd	endpoint descriptor
+ *
+ * scif_close() closes an endpoint and performs necessary teardown of
+ * facilities associated with that endpoint.
+ *
+ * If epd is a listening endpoint then it will no longer accept connection
+ * requests on the port to which it is bound. Any pending connection requests
+ * are rejected.
+ *
+ * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
+ * which are in-process through epd or its peer endpoint will complete before
+ * scif_close() returns. Registered windows of the local and peer endpoints are
+ * released as if scif_unregister() was called against each window.
+ *
+ * Closing an endpoint does not affect mappings to remote memory. These remain
+ * until explicitly removed by calling scif_munmap().
+ *
+ * If the peer endpoint's receive queue is not empty at the time that epd is
+ * closed, then the peer endpoint can be passed as the endpoint parameter to
+ * scif_recv() until the receive queue is empty.
+ *
+ * If epd is bound to a port, then the port is returned to the pool of
+ * available ports.
+ *
+ * epd is freed and may no longer be accessed.
+ *
+ *\return
+ * Upon successful completion, scif_close() returns 0; otherwise: in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ * negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor
+ */
+int scif_close(scif_epd_t epd);
+
+/**
+ * scif_send - Send a message
+ *	\param epd		endpoint descriptor
+ *	\param msg		message buffer address
+ *	\param len		message length
+ *	\param flags		blocking mode flags
+ *
+ * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
+ * are copied from memory starting at address msg. On successful execution the
+ * return value of scif_send() is the number of bytes that were sent, and is
+ * zero if no bytes were sent because len was zero. scif_send() may be called
+ * only when the endpoint is in a connected state.
+ *
+ * If a scif_send() call is non-blocking, then it sends only those bytes which
+ * can be sent without waiting, up to a maximum of len bytes.
+ *
+ * If a scif_send() call is blocking, then it normally returns after sending
+ * all len bytes. If a blocking call is interrupted or the connection is
+ * forcibly closed, the call is considered successful if some bytes were sent
+ * or len is zero, otherwise the call is considered unsuccessful.
+ *
+ * On Linux in user mode, the select() and poll() functions can be used to
+ * determine when the send queue is not full. On Microsoft Windows* and on
+ * Linux in kernel mode, the scif_poll() function may be used for this purpose.
+ *
+ * It is recommended that scif_send()/scif_recv() only be used for short
+ * control-type message communication between SCIF endpoints. The SCIF RMA
+ * APIs are expected to provide better performance for transfer sizes of
+ * 1024 bytes or longer.
+ *
+ * The flags argument is formed by ORing together zero or more of the following
+ * values:
+ *- SCIF_SEND_BLOCK: block until the entire message is sent.
+ *
+ *\return
+ * Upon successful completion, scif_send() returns the number of bytes sent;
+ * otherwise: in user mode -1 is returned and errno is set to indicate the
+ * error; in kernel mode the negative of one of the following errors is
+ * returned.
+ *
+ *\par Errors:
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EFAULT
+ * - An invalid address was specified for a parameter.
+ *- EINTR
+ * - epd was closed by scif_close()
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - flags is invalid
+ * - len is negative
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOMEM
+ * - Not enough space
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ */
+int scif_send(scif_epd_t epd, void *msg, int len, int flags);
+
+/**
+ * scif_recv - Receive a message
+ *	\param epd		endpoint descriptor
+ *	\param msg		message buffer address
+ *	\param len		message buffer length
+ *	\param flags		blocking mode flags
+ *
+ * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
+ * data are copied to memory starting at address msg. On successful execution
+ * the return value of scif_recv() is the number of bytes that were received,
+ * and is zero if no bytes were received because len was zero. scif_recv() may
+ * be called only when the endpoint is in a connected state.
+ *
+ * If a scif_recv() call is non-blocking, then it receives only those bytes
+ * which can be received without waiting, up to a maximum of len bytes.
+ *
+ * If a scif_recv() call is blocking, then it normally returns after receiving
+ * all len bytes. If a blocking call is interrupted or the connection is
+ * forcibly closed, the call is considered successful if some bytes were
+ * received or len is zero, otherwise the call is considered unsuccessful;
+ * subsequent calls to scif_recv() will successfully receive all data sent
+ * through peer endpoint interruption or the connection was forcibly closed.
+ *
+ * On Linux in user mode, the select() and poll() functions can be used to
+ * determine when data is available to be received. On Microsoft Windows* and
+ * on Linux in kernel mode, the scif_poll() function may be used for this
+ * purpose.
+ *
+ * It is recommended that scif_send()/scif_recv() only be used for short
+ * control-type message communication between SCIF endpoints. The SCIF RMA
+ * APIs are expected to provide better performance for transfer sizes of
+ * 1024 bytes or longer.
+ *
+ * The flags argument is formed by ORing together zero or more of the following
+ * values:
+ *- SCIF_RECV_BLOCK: block until the entire message is received.
+ *
+ *\return
+ * Upon successful completion, scif_recv() returns the number of bytes
+ * received; otherwise: in user mode -1 is returned and errno is set to
+ * indicate the error; in kernel mode the negative of one of the following
+ * errors is returned.
+ *
+ *\par Errors:
+ *- EAGAIN
+ * - The destination node is returning from a low power state.
+ *- EBADF
+ * - epd is not a valid endpoint descriptor .
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EFAULT
+ * - An invalid address was specified for a parameter.
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - flags  is invalid, or
+ * - len is negative.
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOMEM
+ * - Not enough space.
+ *- ENOTCONN
+ * - The endpoint is not connected.
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ */
+int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
+
+/**
+ * scif_register - Mark a memory region for remote access.
+ *	\param epd		endpoint descriptor
+ *	\param addr		starting virtual address
+ *	\param len		length of range
+ *	\param offset		offset of window
+ *	\param prot_flags	read/write protection flags
+ *	\param map_flags	mapping flags
+ *
+ * The scif_register() function opens a window, a range of whole pages of the
+ * registered address space of the endpoint epd, starting at offset po and
+ * continuing for len bytes. The value of po, further described below, is a
+ * function of the parameters offset and len, and the value of map_flags. Each
+ * page of the window represents the physical memory page which backs the
+ * corresponding page of the range of virtual address pages starting at addr
+ * and continuing for len bytes. addr and len are constrained to be multiples
+ * of the page size. addr is interpreted as a user space address. A successful
+ * scif_register() call returns po as the return value.
+ *
+ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
+ * exactly, and offset is constrained to be a multiple of the page size. The
+ * mapping established by scif_register() will not replace any existing
+ * registration; an error is returned if any page within the range [offset,
+ * offset+len-1] intersects an existing window.
+ * Note: When SCIF_MAP_FIXED is set the current implementation limits
+ * offset to the range [0..2^62-1] and returns EADDRINUSE if the offset
+ * requested with SCIF_MAP_FIXED is in the range [2^62..2^63-1].
+ *
+ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
+ * implementation-defined manner to arrive at po. The po value so chosen will
+ * be an area of the registered address space that the implementation deems
+ * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
+ * granting the implementation complete freedom in selecting po, subject to
+ * constraints described below. A non-zero value of offset is taken to be a
+ * suggestion of an offset near which the mapping should be placed. When the
+ * implementation selects a value for po, it does not replace any extant
+ * window. In all cases, po will be a multiple of the page size.
+ *
+ * The physical pages which are so represented by a window are available for
+ * access in calls to scif_mmap(), scif_readfrom(), scif_writeto(),
+ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
+ * physical pages represented by the window will not be reused by the memory
+ * subsystem for any other purpose. Note that the same physical page may be
+ * represented by multiple windows.
+ *
+ * Subsequent operations which change the memory pages to which virtual
+ * addresses are mapped (such as mmap(), munmap(), scif_mmap() and
+ * scif_munmap()) have no effect on existing windows.
+ *
+ * On Linux, if the process will fork(), it is recommended that the registered
+ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
+ * problems due to copy-on-write semantics.
+ *
+ * The prot_flags argument is formed by OR'ing together one or more of the
+ * following values:
+ *- SCIF_PROT_READ: allow read operations from the window
+ *- SCIF_PROT_WRITE: allow write operations to the window
+ *
+ * The map_flags argument is formed by OR'ing together zero or more of
+ * the following values:
+ *- SCIF_MAP_FIXED: interpret offset exactly
+ *
+ *\return
+ * Upon successful completion, scif_register() returns the offset at which the
+ * mapping was placed (po); otherwise: in user mode SCIF_REGISTER_FAILED (that
+ * is (off_t *)-1) is returned and errno is set to indicate the error; in
+ * kernel mode the negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EADDRINUSE
+ * - SCIF_MAP_FIXED is set in map_flags, and pages in the range [offset,
+ *   offset+len-1] are already registered
+ *- EAGAIN
+ * - The mapping could not be performed due to lack of resources
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EFAULT
+ * - Addresses in the range [addr , addr + len - 1] are invalid
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - map_flags is invalid, or
+ * - prot_flags is invalid, or
+ * - SCIF_MAP_FIXED is set in flags, and offset is not a multiple of
+ *   the page size, or
+ * - addr is not a multiple of the page size, or
+ * - len is not a multiple of the page size, or is 0, or
+ * - offset is negative
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOMEM
+ * - Not enough space
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ */
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+int prot_flags, int map_flags);
+
+/**
+ * scif_unregister - Mark a memory region for remote access.
+ *	\param epd		endpoint descriptor
+ *	\param offset		start of range to unregister
+ *	\param len		length of range to unregister
+ *
+ * The scif_unregister() function closes those previously registered windows
+ * which are entirely within the range [offset,offset+len-1]. It is an error to
+ * specify a range which intersects only a subrange of a window.
+ *
+ * On a successful return, pages within the window may no longer be specified
+ * in calls to scif_mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
+ * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, however,
+ * continues to exist until all previous references against it are removed. A
+ * window is referenced if there is a mapping to it created by scif_mmap(), or if
+ * scif_get_pages() was called against the window (and the pages have not been
+ * returned via scif_put_pages()). A window is also referenced while an RMA, in
+ * which some range of the window is a source or destination, is in progress.
+ * Finally a window is referenced while some offset in that window was specified
+ * to scif_fence_signal(), and the RMAs marked by that call to
+ * scif_fence_signal() have not completed. While a window is in this state, its
+ * registered address space pages are not available for use in a new registered
+ * window.
+ *
+ * When all such references to the window have been removed, its references to
+ * all the physical pages which it represents are removed. Similarly, the
+ * registered address space pages of the window become available for
+ * registration in a new window.
+ *
+ *\return
+ * Upon successful completion, scif_unregister() returns 0; otherwise: in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned. In the event of an
+ * error, no windows are unregistered.
+ *
+ *\par Errors:
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EINVAL
+ * - epd  is not a valid endpoint descriptor, or
+ * - The range [offset,offset+len-1] intersects a subrange of a window, or
+ * - offset is negative
+ *- ENODEV
+ * -The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ *- ENXIO
+ * - Addresses in the range [offset,offset+len-1] are invalid for the
+ *   registered address space of epd.
+ */
+int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
+
+
+/**
+ * scif_readfrom - Copy from a remote address space
+ *	\param epd		endpoint descriptor
+ *	\param loffset		offset in local registered address space to
+ *				which to copy
+ *	\param len		length of range to copy
+ *	\param roffset		offset in remote registered address space
+ *				from which to copy
+ *	\param rma_flags	transfer mode flags
+ *
+ * scif_readfrom() copies len bytes from the remote registered address space of
+ * the peer of endpoint epd, starting at the offset roffset to the local
+ * registered address space of epd, starting at the offset loffset.
+ *
+ * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+
+ * len-1] must be within some registered window or windows of the local and
+ * remote nodes respectively. A range  may intersect multiple registered
+ * windows, but only if those windows are contiguous in the registered address
+ * space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two aynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * The optimal DMA performance will likely be realized if both
+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if loffset  and roffset are not
+ * cacheline aligned but are separated by some multiple of 64. The lowest level
+ * of performance is likely if loffset and roffset are not separated by a
+ * multiple of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values:
+ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
+ *                   engine.
+ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
+ *                 transfer has completed. Passing this flag might result in
+ *                 the API busy waiting and consuming CPU cycles while the DMA
+ *                 transfer is in progress.
+ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
+ *                    the source range becomes visible on the destination node
+ *                    after all other transferred data in the source range has
+ *                    become visible on the destination
+ *
+ *\return
+ * Upon successful completion, scif_readfrom() returns 0; otherwise: in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ *\par Errors
+ *- EACCESS
+ * - Attempt to write to a read-only range or read from a write-only range
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - rma_flags is invalid
+ *- ENODEV
+ * -The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ *- ENXIO
+ * - The range [loffset,loffset+len-1] is invalid for the registered address
+ *   space of epd, or,
+ * - The range [roffset,roffset+len-1] is invalid for the registered address
+ *   space of the peer of epd, or
+ * - loffset or roffset is negative
+*/
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
+roffset, int rma_flags);
+
+/**
+ * scif_writeto - Copy to a remote address space
+ *	\param epd		endpoint descriptor
+ *	\param loffset		offset in local registered address space
+ *				from which to copy
+ *	\param len		length of range to copy
+ *	\param roffset		offset in remote registered address space to
+ *				which to copy
+ *	\param rma_flags	transfer mode flags
+ *
+ * scif_writeto() copies len bytes from the local registered address space of
+ * epd, starting at the offset loffset to the remote registered address space
+ * of the peer of endpoint epd, starting at the offset roffset.
+ *
+ * Each of the specified ranges [loffset,loffset+len-1] and [roffset,roffset+
+ * len-1] must be within some registered window or windows of the local and
+ * remote nodes respectively. A range may intersect multiple registered
+ * windows, but only if those windows are contiguous in the registered address
+ * space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two aynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * The optimal DMA performance will likely be realized if both
+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if loffset and roffset are not cacheline
+ * aligned but are separated by some multiple of 64. The lowest level of
+ * performance is likely if loffset and roffset are not separated by a multiple
+ * of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values:
+ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
+ *                   engine.
+ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
+ *                 transfer has completed. Passing this flag might result in
+ *                 the API busy waiting and consuming CPU cycles while the DMA
+ *                 transfer is in progress.
+ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
+ *                    the source range becomes visible on the destination node
+ *                    after all other transferred data in the source range has
+ *                    become visible on the destination
+ *
+ *\return
+ * Upon successful completion, scif_readfrom() returns 0; otherwise: in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EACCESS
+ * - Attempt to write to a read-only range or read from a write-only range
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - rma_flags is invalid
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ *- ENXIO
+ * - The range [loffset,loffset+len-1] is invalid for the registered address
+ *   space of epd, or,
+ * - The range [roffset , roffset + len -1] is invalid for the registered
+ *   address space of the peer of epd, or
+ * - loffset or roffset is negative
+ */
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
+roffset, int rma_flags);
+
+/**
+ * scif_vreadfrom - Copy from a remote address space
+ *	\param epd		endpoint descriptor
+ *	\param addr		address to which to copy
+ *	\param len		length of range to copy
+ *	\param roffset		offset in remote registered address space
+ *				from which to copy
+ *	\param rma_flags	transfer mode flags
+ *
+ * scif_vreadfrom() copies len bytes from the remote registered address
+ * space of the peer of endpoint epd, starting at the offset roffset, to local
+ * memory, starting at addr. addr is interpreted as a user space address.
+ *
+ * The specified range [roffset,roffset+len-1] must be within some registered
+ * window or windows of the remote nodes respectively. The range may intersect
+ * multiple registered windows, but only if those windows are contiguous in the
+ * registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two aynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
+ * the specified local memory range may be remain in a pinned state even after
+ * the specified transfer completes. This may reduce overhead if some or all of
+ * the same virtual address range is referenced in a subsequent call of
+ * scif_vreadfrom() or scif_vwriteto().
+ *
+ * The optimal DMA performance will likely be realized if both
+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if loffset  and roffset are not
+ * cacheline aligned but are separated by some multiple of 64. The lowest level
+ * of performance is likely if loffset and roffset are not separated by a
+ * multiple of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values:
+ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
+ *                   engine.
+ *- SCIF_RMA_USECACHE: enable registration caching
+ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
+ *                 transfer has completed. Passing this flag might result in
+ *                 the API busy waiting and consuming CPU cycles while the DMA
+ *                 transfer is in progress.
+ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
+ *                    the source range becomes visible on the destination node
+ *                    after all other transferred data in the source range has
+ *                    become visible on the destination
+ *
+ *\return
+ * Upon successful completion, scif_vreadfrom() returns 0; otherwise: in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EACCESS
+ * - Attempt to write to a read-only range or read from a write-only range
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EFAULT
+ * - Addresses in the range [addr,addr+len-1] are invalid
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - rma_flags is invalid
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ *- ENXIO
+ * - Addresses in the range [roffset,roffset+len-1] are invalid for the
+ *   registered address space of epd.
+ */
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t offset,
+int rma_flags);
+
+/**
+ * scif_vwriteto - Copy to a remote address space
+ *	\param epd		endpoint descriptor
+ *	\param addr		address from which to copy
+ *	\param len		length of range to copy
+ *	\param roffset		offset in remote registered address space to
+ *				which to copy
+ *	\param rma_flags	transfer mode flags
+ *
+ * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
+ * the remote registered address space of the peer of endpoint epd, starting at
+ * the offset roffset. addr is interpreted as a user space address.
+ *
+ * The specified range [roffset,roffset+len-1] must be within some registered
+ * window or windows of the remote nodes respectively. The range may intersect
+ * multiple registered windows, but only if those windows are contiguous in the
+ * registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two aynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
+ * the specified local memory range may be remain in a pinned state even after
+ * the specified transfer completes. This may reduce overhead if some or all of
+ * the same virtual address range is referenced in a subsequent call of
+ * scif_vreadfrom() or scif_vwriteto().
+ *
+ * The optimal DMA performance will likely be realized if both
+ * addr and offset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if addr  and offset are not cacheline
+ * aligned but are separated by some multiple of 64. The lowest level of
+ * performance is likely if addr and offset are not separated by a multiple of
+ * 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values:
+ *- SCIF_RMA_USECPU: perform the transfer using the CPU, otherwise use the DMA
+ *                   engine.
+ *- SCIF_RMA_USECACHE: allow registration caching
+ *- SCIF_RMA_SYNC: perform the transfer synchronously, returning after the
+ *                 transfer has completed. Passing this flag might result in
+ *                 the API busy waiting and consuming CPU cycles while the DMA
+ *                 transfer is in progress.
+ *- SCIF_RMA_ORDERED: ensure that the last cacheline or partial cacheline of
+ *                    the source range becomes visible on the destination node
+ *                    after all other transferred data in the source range has
+ *                    become visible on the destination
+ *
+ *\return
+ * Upon successful completion, scif_vwriteto () returns 0; otherwise: in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EACCESS
+ * - Attempt to write to a read-only range or read from a write-only range
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EFAULT
+ * - Addresses in the range [addr,addr+len-1] are invalid
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - rma_flags is invalid
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ *- ENXIO
+ * - Addresses in the range [roffset,roffset+len-1] are invalid for the
+ *   registered address space of epd.
+ */
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t offset,
+int rma_flags);
+
+/**
+ * scif_fence_mark - Mark previously issued RMAs
+ * 	\param epd		endpoint descriptor
+ * 	\param flags		control flags
+ * 	\param mark		marked handle returned as output.
+ *
+ * scif_fence_mark() returns after marking the current set of all uncompleted
+ * RMAs initiated through the endpoint epd or the current set of all
+ * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
+ * marked with a value returned at mark. The application may subsequently call
+ * scif_fence_wait(), passing the value returned at mark, to await completion
+ * of all RMAs so marked.
+ *
+ * The flags argument has exactly one of the following values:
+ *- SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint
+ *  epd are marked
+ *- SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer
+ *  of endpoint epd are marked
+ *
+ * \return
+ * Upon successful completion, scif_fence_mark() returns 0; otherwise: in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EINVAL
+ * - flags is invalid, or
+ * - epd is not a valid endpoint descriptor, or
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOMEM
+ * - Insufficient kernel memory was available.
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ */
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
+
+/**
+ * scif_fence_wait - Wait for completion of marked RMAs
+ *
+ * 	\param epd		endpoint descriptor
+ * 	\param mark		mark request
+ *
+ * scif_fence_wait() returns after all RMAs marked with mark have completed.
+ * The value passed in mark must have been obtained in a previous call to
+ * scif_fence_mark().
+ *
+ *\return
+ * Upon successful completion, scif_fence_wait() returns 0; otherwise: in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOMEM
+ * - Insufficient kernel memory was available.
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ */
+int scif_fence_wait(scif_epd_t epd, int mark);
+
+/**
+ * scif_fence_signal - Request a signal on completion of RMAs
+ * 	\param loff		local offset
+ * 	\param lval		local value to write to loffset
+ * 	\param roff		remote offset
+ * 	\param rval		remote value to write to roffset
+ * 	\param flags		flags
+ *
+ * scif_fence_signal() returns after marking the current set of all uncompleted
+ * RMAs initiated through the endpoint epd or marking the current set of all
+ * uncompleted RMAs initiated through the peer of endpoint epd.
+ *
+ * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
+ * marked set, lval is written to memory at the address corresponding to offset
+ * loff in the local registered address space of epd. loff must be within a
+ * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
+ * of the RMAs in the marked set, rval is written to memory at the  * address
+ * corresponding to offset roff in the remote registered address space of epd.
+ * roff must be within a remote registered window of the peer of epd. Note
+ * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
+ *
+ * The flags argument is formed by OR'ing together the following:
+ *- Exactly one of the following values:
+ * - SCIF_FENCE_INIT_SELF: RMA operations initiated through endpoint
+ *   epd are marked
+ * - SCIF_FENCE_INIT_PEER: RMA operations initiated through the peer
+ *   of endpoint epd are marked
+ *- One or more of the following values:
+ * - SCIF_SIGNAL_LOCAL: On completion of the marked set of RMAs, write lval to
+ *   memory at the address corresponding to offset loff in the local registered
+ *   address space of epd.
+ * - SCIF_SIGNAL_REMOTE: On completion of the marked set of RMAs, write lval to
+ *   memory at the address corresponding to offset roff in the remote registered
+ *   address space of epd.
+ *
+ *\return
+ * Upon successful completion, scif_fence_signal() returns 0; otherwise: in
+ * user mode -1 is returned and errno is set to indicate the error; in kernel
+ * mode the negative of one of the following errors is returned.
+ *\par Errors:
+ *- EBADF
+ * - epd is not a valid endpoint descriptor
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - flags is invalid, or
+ * - loff or roff are not DWORD aligned
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENOTTY
+ * - epd is not a valid endpoint descriptor
+ *- ENXIO
+ * - loff is invalid for the registered address of epd, or
+ * - roff is invalid for the registered address space, of the peer of epd
+ */
+int scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval, off_t roff,
+uint64_t rval, int flags);
+
+/**
+ * scif_get_nodeIDs - Return information about online nodes
+ * 	\param nodes 		array in which to return online node IDs
+ * 	\param len	 	number of entries in the nodes array
+ * 	\param self 		address to place the node ID of the local node
+ *
+ * scif_get_nodeIDs() fills in the nodes array with up to len node IDs of the
+ * nodes in the SCIF network. If there is not enough space in nodes, as
+ * indicated by the len parameter, only len node IDs are returned in nodes. The
+ * return value of scif_get_nodeID() is the total number of nodes currently in
+ * the SCIF network. By checking the return value against the len parameter, the user may
+ * determine if enough space for nodes was allocated.
+ *
+ * The node ID of the local node is returned at self.
+ *
+ *\return
+ * Upon successful completion, scif_get_nodeIDs() returns the actual number of
+ * online nodes in the SCIF network including 'self'; otherwise: in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode no
+ * errors are returned.
+ *
+ *\par Errors:
+ *- EFAULT
+ * - Bad address
+ */
+int scif_get_nodeIDs(uint16_t *nodes, int len, uint16_t *self);
+
+
+/**
+ * scif_pin_pages - Pin a set of pages
+ * 	\param addr		Virtual address of range to pin
+ * 	\param len		Length of range to pin
+ * 	\param prot_flags 	Page protection flags
+ * 	\param map_flags	Page classification flags
+ * 	\param pinned_pages	Opaque handle of pinned pages
+ *
+ * scif_pin_pages() pins (locks in physical memory) the physical pages which
+ * back the range of virtual address pages starting at addr and continuing for
+ * len bytes. addr and len are constrained to be multiples of the page size. A
+ * successful scif_register() call returns an opaque pointer value at
+ * pinned_pages which may be used in subsequent calls to
+ * scif_register_pinned_pages().
+ *
+ * The pages will remain pinned as long as there is a reference against the
+ * scif_pinned_pages_t value returned by scif_pin_pages() and until
+ * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A
+ * reference is added to a scif_pinned_pages_t value each time a window is
+ * created by calling scif_register_pinned_pages() and passing the
+ * scif_pinned_pages_t value. A reference is removed from a scif_pinned_pages_t value
+ * each time such a window is deleted.
+ *
+ * Subsequent operations which change the memory pages to which virtual
+ * addresses are mapped (such as mmap(), munmap(), scif_mmap() and
+ * scif_munmap()) have no effect on the scif_pinned_pages_t value or windows
+ * created against it.
+ *
+ * On Linux, if the process will fork(), it is recommended that the registered
+ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
+ * problems due to copy-on-write semantics.
+ *
+ * The prot_flags argument is formed by OR'ing together one or more of the
+ * following values:
+ *- SCIF_PROT_READ: allow read operations against the pages
+ *- SCIF_PROT_WRITE: allow write operations against the pages
+ * The map_flags argument is formed by OR'ing together zero or more of the
+ * following values:
+ *- SCIF_MAP_KERNEL: interpret addr as a kernel space address. By default, addr
+ *  is interpreted as a user space address.
+ *
+ *\return
+ * Upon successful completion, scif_register() returns 0; otherwise the
+ * negative of one of the following errors is returned.
+ *\par Errors:
+ *- EFAULT
+ * - Addresses in the range [addr,addr+len-1]  are invalid
+ *- EINVAL
+ * - prot_flags is invalid,
+ * - map_flags is invalid, or
+ * - offset is negative
+ *- ENOMEM
+ * - Not enough space
+ */
+int
+scif_pin_pages(
+	void *addr,
+	size_t len,
+	int prot_flags,
+	int map_flags,
+	scif_pinned_pages_t *pinned_pages);
+
+/**
+ * scif_unpin_pages - Unpin a set of pages
+ * 	\param pinned_pages	Opaque handle of pages to be unpinned
+ *
+ * scif_unpin_pages() prevents scif_register_pinned_pages()from registering new
+ * windows against pinned_pages. The physical pages represented by pinned_pages
+ * will remain pinned until all windows previously registered against
+ * pinned_pages are deleted (the window is scif_unregister()'d and all
+ * references to the window are removed (see scif_unregister()).
+ *
+ * pinned_pages must have been obtain from a previous call to scif_pin_pages().
+ * After calling scif_unpin_pages(), it is an error to pass pinned_pages to
+ * scif_register_pinned_pages().
+ *
+ *\return:
+ * Upon successful completion, scif_unpin_pages() returns 0; otherwise the
+ * negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EINVAL
+ * - pinned_pages is not valid
+ */
+int
+scif_unpin_pages(
+       scif_pinned_pages_t pinned_pages);
+
+/**
+ * scif_register_pinned_pages - Mark a memory region for remote access.
+ * 	\param epd		Endpoint descriptor
+ * 	\param pinned_pages	Opaque handle of pinned pages
+ * 	\param offset		Registered address space offset
+ * 	\param map_flags	Flags which control where pages are mapped
+ *
+ * The scif_register_pinned_pages() function opens a window, a range of whole
+ * pages of the registered address space of the endpoint epd, starting at
+ * offset po. The value of po, further described below, is a function of the
+ * parameters offset and pinned_pages, and the value of map_flags. Each page of
+ * the window represents a corresponding physical memory page of the range
+ * represented by pinned_pages; the length of the window is the same as the
+ * length of range represented by pinned_pages. A successful scif_register()
+ * call returns po as the return value.
+ *
+ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
+ * exactly, and offset is constrained to be a multiple of the page size. The
+ * mapping established by scif_register() will not replace any existing
+ * registration; an error is returned if any page of the new window would
+ * intersect an existing window.
+ *
+ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
+ * implementation-defined manner to arrive at po. The po so chosen will be an
+ * area of the registered address space that the implementation deems suitable
+ * for a mapping of the required size. An offset value of 0 is interpreted as
+ * granting the implementation complete freedom in selecting po, subject to
+ * constraints described below. A non-zero value of offset is taken to be a
+ * suggestion of an offset near which the mapping should be placed. When the
+ * implementation selects a value for po, it does not replace any extant
+ * window. In all cases, po will be a multiple of the page size.
+ *
+ * The physical pages which are so represented by a window are available for
+ * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(),
+ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
+ * physical pages represented by the window will not be reused by the memory
+ * subsytem for any other purpose. Note that the same physical page may be
+ * represented by multiple windows.
+ *
+ * Windows created by scif_register_pinned_pages() are unregistered by
+ * scif_unregister().
+ *
+ * The map_flags argument is formed by OR'ing together zero or more of the
+ * following values:
+ *- SCIF_MAP_FIXED: interpret offset exactly
+ *
+ *\return
+ * Upon successful completion, scif_register_pinned_pages() returns the offset
+ * at which the mapping was placed (po); otherwise the negative of one of the
+ * following errors is returned.
+ *\par Errors:
+ *- EADDRINUSE
+ * - SCIF_MAP_FIXED is set in map_flags and pages in the new
+ *   window would intersect an existing window
+ *- EAGAIN
+ * - The mapping could not be performed due to lack of resources
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - map_flags is invalid, or
+ * - SCIF_MAP_FIXED is set in map_flags, and offset is not a
+ *   multiple of the page size, or
+ * - offset is negative
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOMEM
+ * - Not enough space
+ *- ENOTCONN
+ * - The endpoint is not connected
+ */
+off_t
+scif_register_pinned_pages(
+       scif_epd_t epd,
+	scif_pinned_pages_t pinned_pages,
+	off_t offset,
+	int map_flags);
+
+/**
+ * scif_get_pages - Add references to remote registered pages
+ * 	\param epd		endpoint descriptor
+ * 	\param offset		registered address space offset
+ * 	\param len		length of range of pages
+ * 	\param pages		returned scif_range structure
+ *
+ * scif_get_pages() returns the addresses of the physical pages represented by
+ * those pages of the registered address space of the peer of epd, starting at
+ * offset and continuing for len bytes. offset and len are constrained to be
+ * multiples of the page size.
+ *
+ * All of the pages in the specified range [offset,offset+len-1] must be within
+ * a single window of the registered address space of the peer of epd.
+ *
+ * The addresses are returned as a virtually contiguous array pointed to by the
+ * phys_addr component of the scif_range structure whose address is returned in
+ * pages. The nr_pages component of scif_range is the length of the array. The
+ * prot_flags component of scif_range holds the protection flag value passed
+ * when the pages were registered.
+ *
+ * Each physical page whose address is returned by scif_get_pages() remains
+ * available and will not be released for reuse until the scif_range structure
+ * is returned in a call to scif_put_pages(). The scif_range structure returned
+ * by scif_get_pages() must be unmodified.
+ *
+ * It is an error to call scif_close() on an endpoint on which a scif_range
+ * structure of that endpoint has not been returned to scif_put_pages().
+ *
+ *\return
+ * Upon successful completion, scif_get_pages() returns 0; otherwise the
+ * negative of one of the following errors is returned.
+ *\par Errors:
+ *- ECONNRESET
+ * - A connection was forcibly closed by a peer.
+ *- EINVAL
+ * - epd is not a valid endpoint descriptor, or
+ * - offset is not a multiple of the page size, or
+ * - offset is negative, or
+ * - len is not a multiple of the page size
+ *- ENODEV
+ * -The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected
+ *- ENXIO
+ * - Addresses in the range [offset,offset+len-1] are invalid
+ *   for the registered address space of the peer epd.
+ */
+int scif_get_pages(
+       scif_epd_t epd,
+       off_t offset,
+       size_t len,
+       struct scif_range **pages);
+
+/**
+ * scif_put_pages - Remove references from remote registered pages
+ * 	\param pages		pages to be returned
+ *
+ * scif_put_pages() releases a scif_range structure previously obtained by
+ * calling scif_get_pages(). The physical pages represented by pages may
+ * be reused when the window which represented those pages is unregistered.
+ * Therefore, those pages must not be accessed after calling scif_put_pages().
+ *
+ *\return
+ * Upon successful completion, scif_put_pages() returns 0; otherwise the
+ * negative of one of the following errors is returned.
+ *\par Errors:
+ *- EINVAL
+ * - pages does not point to a valid scif_range structure, or
+ * - the scif_range structure pointed to by pages was already returned.
+ *- ENODEV
+ * - The remote node is lost.
+ *- ENOTCONN
+ * - The endpoint is not connected.
+ */
+int scif_put_pages(
+       struct scif_range *pages);
+
+/**
+ * scif_poll - Wait for some event on an endpoint
+ * 	\param epds		Array of endpoint descriptors
+ * 	\param nepds		Length of epds
+ * 	\param timeout		Upper limit on time for which scif_poll() will
+ * 				block
+ *
+ * scif_poll() waits for one of a set of endpoints to become ready to perform
+ * an I/O operation. scif_poll() exposes a subset of the functionality of the
+ * POSIX standard poll() function.
+ *
+ * The epds argument specifies the endpoint descriptors to be examined and the
+ * events of interest for each endpoint descriptor. epds is a pointer to an
+ * array with one member for each open endpoint descriptor of interest.
+ *
+ * The number of items in the epds array is specified in nepds. The epd field
+ * of scif_pollepd is an endpoint descriptor of an open endpoint. The field
+ * events is a bitmask specifying the events which the application is
+ * interested in. The field revents is an output parameter, filled by the
+ * kernel with the events that actually occurred. The bits returned in revents
+ * can include any of those specified in events, or one of the values
+ * SCIF_POLLERR, SCIF_POLLHUP, or SCIF_POLLNVAL. (These three bits are
+ * meaningless in the events field, and will be set in the revents field
+ * whenever the corresponding condition is true.)
+ *
+ * If none of the events requested (and no error) has occurred for any of the
+ * endpoint descriptors, then scif_poll() blocks until one of the events occurs.
+ *
+ * The timeout argument specifies an upper limit on the time for which
+ * scif_poll() will block, in milliseconds. Specifying a negative value in
+ * timeout means an infinite timeout.
+ *
+ * The following bits may be set in events and returned in revents:
+ *- SCIF_POLLIN: Data may be received without blocking. For a connected
+ *  endpoint, this means that scif_recv() may be called without blocking. For a
+ *  listening endpoint, this means that scif_accept() may be called without
+ *  blocking.
+ *- SCIF_POLLOUT: Data may be sent without blocking. For a connected endpoint,
+ *  this means that scif_send() may be called without blocking. This bit value
+ *  has no meaning for a listening endpoint and is ignored if specified.
+ *
+ * The following bits are only returned in revents, and are ignored if set in
+ * events:
+ *- SCIF_POLLERR: An error occurred on the endpoint
+ *- SCIF_POLLHUP: The connection to the peer endpoint was disconnected
+ *- SCIF_POLLNVAL: The specified endpoint descriptor is invalid.
+ *
+ *\return
+ * Upon successful completion, scif_poll()returns a non-negative value. A
+ * positive value indicates the total number of endpoint descriptors that have
+ * been selected (that is, endpoint descriptors for which the revents member is
+ * non-zero. A value of 0 indicates that the call timed out and no endpoint
+ * descriptors have been selected. Otherwise: in user mode -1 is returned and
+ * errno is set to indicate the error; in kernel mode the negative of one of
+ * the following errors is returned.
+ *
+ *\par Errors:
+ *- EFAULT
+ * - The array given as argument was not contained in the calling program's
+ *   address space.
+ *- EINTR
+ * - A signal occurred before any requested event.
+ *- EINVAL
+ * - The nepds argument is greater than {OPEN_MAX}
+ *- ENOMEM
+ * - There was no space to allocate file descriptor tables.
+*/
+int
+scif_poll(
+	struct scif_pollepd *epds,
+	unsigned int nepds,
+	long timeout);
+
+/**
+ * scif_event_register - Register an event handler
+ *	\param handler		Event handler to be registered
+ *
+ * scif_event_register() registers a routine, handler, to be called when some
+ * event occurs. The event parameter to handler indicates the type of event
+ * which has occurred, and the corresponding component of the data parameter to
+ * handler provides additional data about the event.
+ *
+ * The following events are defined:
+ *- SCIF_NODE_ADDED: A node has been added to the SCIF network. The
+ *  scif_node_added component of the data parameter to handler identifies the
+ *  node. This event is informational. There are no requirements on the event
+ *  handler.
+ *- SCIF_NODE_REMOVED: A node is being removed from the SCIF network. The
+ *  scif_node_removed component of the data parameter to handler identifies the
+ *  node. Upon being called, and before returning, the event handler must
+ *  return, using scif_put_pages(), all structures obtained using
+ *  scif_get_pages() against an endpoint connected to the lost node. It is
+ *  recommended and expected that the handler will also scif_close() all
+ *  endpoints connected to the lost node.
+ *
+ *\return
+ * Upon successful completion scif_event_register() returns 0.
+ *
+ *\par Errors:
+ *- ENOMEM
+ * - There was no space to allocate file descriptor tables.
+*/
+
+int
+scif_event_register(
+	scif_callback_t handler);
+
+/**
+ * scif_event_unregister - Unregister event handler
+ *	\param handler		Event handler to be unregistered
+ *
+ * scif_event_unregister() unregisters the handler which was registered
+ * previously by using scif_event_register().
+ *
+ * WARNING: scif_event_unregister must be called before the module
+ * (that registered handles) exits for every handler that is registered.
+ * Failure to do so will result in crash of the scif module.
+ *
+ *\return
+ * Upon successful completion scif_event_unregister() returns 0.
+ *\par Errors:
+ *- EINVAL
+ * -If the event handler was not found/registered.
+*/
+int
+scif_event_unregister(
+	scif_callback_t handler);
+
+/*
+ * Note: The callee can use pci_resource_start(dev, index) and
+ * pci_resource_len(dev, index) to obtain the PCI resource starting
+ * physical address and length for valid non null indexes of the va
+ * array. MMIO bars will not have IORESOURCE_PREFETCH set in the
+ * flags obtained from pci_resource_flags(dev, index). va[index]
+ * will be set to NULL for invalid resources.
+ */
+struct scif_pci_info {
+	/* pci_dev pointer associated with a node */
+	struct pci_dev *pdev;
+	/* Ioremapped virtual address base for every valid PCIe resource */
+	void __iomem *va[PCI_NUM_RESOURCES];
+};
+
+/**
+ * scif_pci_info - Populate the scif_pci_info structure for a node.
+ * \param node		The node to query
+ * \param dev		The scif_pci_info structure to populate.
+ *
+ * scif_pci_info() populates the provided scif_pci_info structure
+ * associated with a node. The requested node ID cannot be the same as
+ * the current node.  This routine will only return success when called from
+ * the host.
+ *
+ *\return
+ * Upon successful completion, scif_pci_info() returns 0; otherwise the
+ * negative of one of the following errors is returned.
+ *
+ *\par Errors:
+ *- EINVAL
+ * - The requested node is not valid.
+ * - Called on MIC instead of the host.
+ *- ENODEV
+ * - No pci_dev association exists for the node.
+ */
+int
+scif_pci_info(
+	uint16_t node,
+	struct scif_pci_info *dev);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __SCIF_H__ */
diff --git a/include/scif_ioctl.h b/include/scif_ioctl.h
new file mode 100644
index 0000000..fd72fc4
--- /dev/null
+++ b/include/scif_ioctl.h
@@ -0,0 +1,225 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * -----------------------------------------
+ * SCIF IOCTL interface information
+ * -----------------------------------------
+ */
+#if defined(_WIN32) && !defined(_WIN64)
+#define ptr64_t __ptr64
+#else
+#define ptr64_t
+#endif
+
+/**
+ * The purpose of SCIF_VERSION is to check for compatibility between host and
+ * card SCIF modules and also between SCIF driver and libscif. This version
+ * should be incremented whenever a change is made to SCIF that affects the
+ * interface between SCIF driver and libscif or between the card and host SCIF
+ * driver components.
+ */
+#define SCIF_VERSION		1
+
+/**
+ * struct scifioctl_connect:
+ *
+ * \param self			used to read back the assigned portID
+ * \param peer			destination node and port to connect to
+ *
+ * This structure is used for CONNECT IOCTL.
+ */
+struct scifioctl_connect {
+	struct scif_portID	self;
+	struct scif_portID	peer;
+};
+
+
+/**
+ * struct scifioctl_accept:
+ *
+ * \param flags			flags
+ * \param peer			global id of peer endpoint
+ * \param newepd		new connected endpoint descriptor
+ *
+ * This structure is used for SCIF_ACCEPTREQ IOCTL.
+ */
+struct scifioctl_accept {
+	int			flags;
+	struct scif_portID	peer;
+	void			* ptr64_t endpt;
+};
+
+/**
+ * struct scifioctl_msg:
+ *
+ * \param msg			message buffer address
+ * \param len			message length
+ * \param flags			flags
+ * \param out_len		Number of bytes sent/received.
+ *
+ * This structure is used for SCIF_SEND/SCIF_RECV IOCTL.
+ */
+struct scifioctl_msg {
+	void		* ptr64_t msg;
+	int		len;
+	int		flags;
+	int		out_len;
+};
+
+/**
+ * struct scifioctl_reg:
+ *
+ * \param addr starting virtual address
+ * \param len			length of range
+ * \param offset		offset of window
+ * \param prot			read/write protection
+ * \param flags			flags
+ * \param out_len		offset returned.
+ *
+ * This structure is used for SCIF_REG IOCTL.
+ */
+struct scifioctl_reg {
+	void		* ptr64_t addr;
+	uint64_t	len;
+	off_t		offset;
+	int		prot;
+	int		flags;
+	off_t		out_offset;
+};
+
+/**
+ * struct scifioctl_unreg:
+ *
+ * \param offset		start of range to unregister
+ * \param len			length of range to unregister
+ *
+ * This structure is used for SCIF_UNREG IOCTL.
+ */
+struct scifioctl_unreg {
+	off_t		offset;
+	uint64_t	len;
+};
+
+/**
+ * struct scifioctl_copy:
+ *
+ * \param loffset	offset in local registered address space to/from
+which to copy
+ * \param len		length of range to copy
+ * \param roffset	offset in remote registered address space to/from
+which to copy
+ * \param addr		user virtual address to/from which to copy
+ * \param flags		flags
+ *
+ * This structure is used for SCIF_READFROM, SCIF_WRITETO, SCIF_VREADFROM
+and
+ * SCIF_VREADFROM IOCTL's.
+ */
+struct scifioctl_copy {
+	off_t		loffset;
+	uint64_t	len;
+	off_t		roffset;
+	uint8_t		* ptr64_t addr;
+	int		flags;
+};
+
+/**
+ * struct scifioctl_fence_mark:
+ *
+ * \param flags		flags
+ * \param mark		Fence handle returned by reference.
+ *
+ * This structure is used from SCIF_FENCE_MARK IOCTL.
+ */
+struct scifioctl_fence_mark {
+	int             flags;
+	int             *mark;
+};
+
+/**
+ * struct scifioctl_fence_signal:
+ *
+ * \param loff		local offset
+ * \param lval		local value to write to loffset
+ * \param roff		remote offset
+ * \param rval		remote value to write to roffset
+ * \param flags		flags
+ *
+ * This structure is used for SCIF_FENCE_SIGNAL IOCTL.
+ */
+struct scifioctl_fence_signal {
+	off_t loff;
+	uint64_t lval;
+	off_t roff;
+	uint64_t rval;
+	int flags;
+};
+
+/**
+ * struct scifioctl_nodeIDs:
+ *
+ * \param nodes		pointer to an array of nodeIDs
+ * \param len		length of array
+ * \param self		ID of the current node
+ *
+ * This structure is used for the SCIF_GET_NODEIDS ioctl
+ */
+struct scifioctl_nodeIDs {
+	uint16_t * ptr64_t nodes;
+	int	 len;
+	uint16_t * ptr64_t self;
+};
+
+
+#define SCIF_BIND		_IOWR('s', 1, int *)
+#define SCIF_LISTEN		_IOW('s', 2, int)
+#define SCIF_CONNECT		_IOWR('s', 3, struct scifioctl_connect *)
+#define SCIF_ACCEPTREQ		_IOWR('s', 4, struct scifioctl_accept *)
+#define SCIF_ACCEPTREG		_IOWR('s', 5, void *)
+#define SCIF_SEND		_IOWR('s', 6, struct scifioctl_msg *)
+#define SCIF_RECV		_IOWR('s', 7, struct scifioctl_msg *)
+#define SCIF_REG		_IOWR('s', 8, struct scifioctl_reg *)
+#define SCIF_UNREG		_IOWR('s', 9, struct scifioctl_unreg *)
+#define SCIF_READFROM		_IOWR('s', 10, struct scifioctl_copy *)
+#define SCIF_WRITETO		_IOWR('s', 11, struct scifioctl_copy *)
+#define SCIF_VREADFROM		_IOWR('s', 12, struct scifioctl_copy *)
+#define SCIF_VWRITETO		_IOWR('s', 13, struct scifioctl_copy *)
+#define SCIF_GET_NODEIDS	_IOWR('s', 14, struct scifioctl_nodeIDs *)
+#define SCIF_FENCE_MARK		_IOWR('s', 15, struct scifioctl_fence_mark *)
+#define SCIF_FENCE_WAIT		_IOWR('s', 16, int)
+#define SCIF_FENCE_SIGNAL	_IOWR('s', 17, struct scifioctl_fence_signal *)
+
+#define SCIF_GET_VERSION	_IO('s', 23)
diff --git a/mic.conf b/mic.conf
new file mode 100644
index 0000000..a661522
--- /dev/null
+++ b/mic.conf
@@ -0,0 +1,32 @@
+# Options for the Intel Many Integrated Core Co-processor card driver
+#
+# p2p enables the use of the SCIF interface peer to peer communication
+# 1 to enable or 0 to disable
+#
+# p2p_proxy enables the use of SCIF P2P Proxy DMA which converts DMA
+# reads into DMA writes for performance on certain Intel platforms.
+# 1 to enable or 0 to disable
+#
+# reg_cache enables SCIF Registration Caching
+# 1 to enable or 0 to disable
+#
+# huge_page enables SCIF Huge Page Support
+# 1 to enable or 0 to disable
+#
+# watchdog enables the SCIF watchdog for Lost Node detection.
+# 1 to enable or 0 to disable
+#
+# watchdog_auto_reboot configures the behavior of the MIC host driver
+# upon detection of a lost node. This option is a nop if watchdog=0.
+# 1 Allow the host driver to reboot the node back to "online" state
+# 0 Allow the host driver to reset the node back to "ready" state.
+#   It will be upto the user to reboot the node or not.
+#
+# crash_dump enables uOS Kernel Crash Dump Captures
+# 1 to enable or 0 to disable
+#
+# ulimit enables ulimit checks on max locked memory for scif_register
+# 1 to enable or 0 to disable
+#
+options mic reg_cache=1 huge_page=1 watchdog=1 watchdog_auto_reboot=1 crash_dump=1 p2p=1 p2p_proxy=1 ulimit=0
+options mic_host reg_cache=1 huge_page=1 watchdog=1 watchdog_auto_reboot=1 crash_dump=1 p2p=1 p2p_proxy=1 ulimit=0
diff --git a/mic.modules b/mic.modules
new file mode 100755
index 0000000..a95f8b0
--- /dev/null
+++ b/mic.modules
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ ! -d /sys/class/mic ]; then
+	exec /sbin/modprobe mic >/dev/null 2>&1
+fi
diff --git a/micscif/Kbuild b/micscif/Kbuild
new file mode 100644
index 0000000..1171632
--- /dev/null
+++ b/micscif/Kbuild
@@ -0,0 +1,21 @@
+obj-m := ringbuffer.o
+obj-m += micscif.o
+
+ringbuffer-objs := micscif_rb.o
+
+micscif-objs := micscif_main.o
+micscif-objs += micscif_sysfs.o
+micscif-objs += micscif_smpt.o
+micscif-objs += micscif_intr.o
+micscif-objs += micscif_api.o
+micscif-objs += micscif_fd.o
+micscif-objs += micscif_nodeqp.o
+micscif-objs += micscif_va_node.o
+micscif-objs += micscif_va_gen.o
+micscif-objs += micscif_rma.o
+micscif-objs += micscif_rma_list.o
+micscif-objs += micscif_rma_dma.o
+micscif-objs += micscif_debug.o
+micscif-objs += micscif_ports.o
+micscif-objs += micscif_select.o
+micscif-objs += micscif_nm.o
diff --git a/micscif/micscif_api.c b/micscif/micscif_api.c
new file mode 100644
index 0000000..e13e59d
--- /dev/null
+++ b/micscif/micscif_api.c
@@ -0,0 +1,3464 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/poll.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include "scif.h"
+#include "mic/micscif.h"
+#ifndef _MIC_SCIF_
+#include "mic_common.h"
+#endif
+#include "mic/micscif_map.h"
+
+#define SCIF_MAP_ULIMIT 0x40
+
+bool mic_ulimit_check = 0;
+
+char *scif_ep_states[] = {
+	"Closed",
+	"Unbound",
+	"Bound",
+	"Listening",
+	"Connected",
+	"Connecting",
+	"Mapping",
+	"Closing",
+	"Close Listening",
+	"Disconnected",
+	"Zombie"};
+
+enum conn_async_state {
+	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
+	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
+	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
+};
+
+/**
+ * scif_open() - Create a SCIF end point
+ *
+ * Create a SCIF end point and set the state to UNBOUND.  This function
+ * returns the address of the end point data structure.
+ */
+scif_epd_t
+__scif_open(void)
+{
+	struct endpt *ep;
+
+	might_sleep();
+	if ((ep = (struct endpt *)kzalloc(sizeof(struct endpt), GFP_KERNEL)) == NULL) {
+		printk(KERN_ERR "SCIFAPI open: kzalloc fail on scif end point descriptor\n");
+		goto err_ep_alloc;
+	}
+
+	if ((ep->qp_info.qp = (struct micscif_qp *)
+			kzalloc(sizeof(struct micscif_qp), GFP_KERNEL)) == NULL) {
+		printk(KERN_ERR "SCIFAPI open: kzalloc fail on scif end point queue pointer\n");
+		goto err_qp_alloc;
+	}
+
+	spin_lock_init(&ep->lock);
+	mutex_init (&ep->sendlock);
+	mutex_init (&ep->recvlock);
+
+	if (micscif_rma_ep_init(ep) < 0) {
+		printk(KERN_ERR "SCIFAPI _open: RMA EP Init failed\n");
+		goto err_rma_init;
+	}
+
+	ep->state = SCIFEP_UNBOUND;
+	pr_debug("SCIFAPI open: ep %p success\n", ep);
+	return (scif_epd_t)ep;
+
+err_rma_init:
+	kfree(ep->qp_info.qp);
+err_qp_alloc:
+	kfree(ep);
+err_ep_alloc:
+	return NULL;
+}
+
+scif_epd_t
+scif_open(void)
+{
+	struct endpt *ep;
+	ep = (struct endpt *)__scif_open();
+	if (ep)
+		kref_init(&(ep->ref_count));
+	return (scif_epd_t)ep;
+}
+EXPORT_SYMBOL(scif_open);
+
+/**
+ * scif_close() - Terminate a SCIF end point
+ * @epd:        The end point address returned from scif_open()
+ *
+ * The function terminates a scif connection.  It must ensure all traffic on
+ * the connection is finished before removing it.
+ *
+ * On Connection with memory mapped this become more difficult.  Once normal
+ * DMA and message traffic has ended the end point must be placed in a zombie
+ * state and wait for the other side to also release it's memory references.
+ */
+int
+__scif_close(scif_epd_t epd)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	struct endpt *tmpep;
+	struct list_head *pos, *tmpq;
+	unsigned long sflags;
+	enum endptstate oldstate;
+	int err;
+	bool flush_conn;
+
+	pr_debug("SCIFAPI close: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	might_sleep();
+
+	spin_lock(&ep->lock);
+	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
+	spin_unlock(&ep->lock);
+
+	if (flush_conn)
+		flush_workqueue(ms_info.mi_conn_wq);
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+
+	spin_lock_irqsave(&ep->lock, sflags);
+	oldstate = ep->state;
+
+	ep->state = SCIFEP_CLOSING;
+
+	switch (oldstate) {
+	case SCIFEP_ZOMBIE:
+		BUG_ON(SCIFEP_ZOMBIE == oldstate);
+	case SCIFEP_CLOSED:
+	case SCIFEP_DISCONNECTED:
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		micscif_unregister_all_windows(epd);
+		// Remove from the disconnected list
+		spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+		list_for_each_safe(pos, tmpq, &ms_info.mi_disconnected) {
+			tmpep = list_entry(pos, struct endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+				break;
+			}
+		}
+		spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+		break;
+	case SCIFEP_UNBOUND:
+	case SCIFEP_BOUND:
+	case SCIFEP_CONNECTING:
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		break;
+	case SCIFEP_MAPPING:
+	case SCIFEP_CONNECTED:
+	case SCIFEP_CLOSING:
+	{
+		struct nodemsg msg;
+		struct endpt *fep = NULL;
+		struct endpt *tmpep;
+		unsigned long ts = jiffies;
+		struct list_head *pos, *tmpq;
+
+		// Very short time before mapping completes and state becomes connected
+		// and does a standard teardown.
+		ts = jiffies;
+		while (ep->state == SCIFEP_MAPPING) {
+			cpu_relax();
+			if (time_after((unsigned long)jiffies,ts + NODE_ALIVE_TIMEOUT)) {
+				printk(KERN_ERR "%s %d ep->state %d\n", __func__, __LINE__, ep->state);
+				ep->state = SCIFEP_BOUND;
+				break;
+			}
+		}
+
+		init_waitqueue_head(&ep->disconwq);	// Wait for connection queue
+		spin_unlock_irqrestore(&ep->lock, sflags);
+
+		micscif_unregister_all_windows(epd);
+
+		// Remove from the connected list
+		spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+		list_for_each_safe(pos, tmpq, &ms_info.mi_connected) {
+			tmpep = list_entry(pos, struct endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+				put_conn_count(ep->remote_dev);
+				fep = tmpep;
+				spin_lock(&ep->lock);
+				break;
+			}
+		}
+
+		if (fep == NULL) {
+			// The other side has completed the disconnect before
+			// the end point can be removed from the list.  Therefore
+			// the ep lock is not locked, traverse the disconnected list
+			// to find the endpoint, release the conn lock and
+			// proceed to teardown the end point below.
+			list_for_each_safe(pos, tmpq, &ms_info.mi_disconnected) {
+				tmpep = list_entry(pos, struct endpt, list);
+				if (tmpep == ep) {
+					list_del(pos);
+					break;
+				}
+			}
+			spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+			break;
+		}
+
+		spin_unlock(&ms_info.mi_connlock);
+
+		// Now we are free to close out the connection
+		msg.uop = SCIF_DISCNCT;
+		msg.src = ep->port;
+		msg.dst = ep->peer;
+		msg.payload[0] = (uint64_t)ep;
+		msg.payload[1] = ep->remote_ep;
+
+		err = micscif_nodeqp_send(ep->remote_dev, &msg, ep);
+		spin_unlock_irqrestore(&ep->lock, sflags);
+
+		if (!err)
+			/* Now wait for the remote node to respond */
+			wait_event_timeout(ep->disconwq, 
+				(ep->state == SCIFEP_DISCONNECTED), NODE_ALIVE_TIMEOUT);
+		/*
+		 * Grab and release the ep lock to synchronize with the
+		 * thread waking us up. If we dont grab this lock, then
+		 * the ep might be freed before the wakeup completes
+		 * resulting in potential memory corruption.
+		 */
+		spin_lock_irqsave(&ep->lock, sflags);
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		break;
+	}
+	case SCIFEP_LISTENING:
+	case SCIFEP_CLLISTEN:
+	{
+		struct conreq *conreq;
+		struct nodemsg msg;
+		struct endpt *aep;
+
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+
+		// remove from listen list
+		list_for_each_safe(pos, tmpq, &ms_info.mi_listen) {
+			tmpep = list_entry(pos, struct endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+			}
+		}
+		// Remove any dangling accepts
+		while (ep->acceptcnt) {
+			aep = list_first_entry(&ep->li_accept, struct endpt, liacceptlist);
+			BUG_ON(!aep);
+			list_del(&aep->liacceptlist);
+			if (aep->port.port && !aep->accepted_ep)
+				put_scif_port(aep->port.port);
+			list_for_each_safe(pos, tmpq, &ms_info.mi_uaccept) {
+				tmpep = list_entry(pos, struct endpt, miacceptlist);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+			spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+			list_for_each_safe(pos, tmpq, &ms_info.mi_connected) {
+				tmpep = list_entry(pos, struct endpt, list);
+				if (tmpep == aep) {
+					list_del(pos);
+					put_conn_count(aep->remote_dev);
+					break;
+				}
+			}
+			list_for_each_safe(pos, tmpq, &ms_info.mi_disconnected) {
+				tmpep = list_entry(pos, struct endpt, list);
+				if (tmpep == aep) {
+					list_del(pos);
+					break;
+				}
+			}
+			spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+			micscif_teardown_ep(aep);
+			spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+			micscif_add_epd_to_zombie_list(aep, MI_EPLOCK_HELD);
+			ep->acceptcnt--;
+		}
+
+		spin_lock(&ep->lock);
+		spin_unlock(&ms_info.mi_eplock);
+
+		// Remove and reject any pending connection requests.
+		while (ep->conreqcnt) {
+			conreq = list_first_entry(&ep->conlist, struct conreq, list);
+			list_del(&conreq->list);
+
+			msg.uop = SCIF_CNCT_REJ;
+			msg.dst.node = conreq->msg.src.node;
+			msg.dst.port = conreq->msg.src.port;
+			msg.payload[0] = conreq->msg.payload[0];
+			msg.payload[1] = conreq->msg.payload[1];
+			/*
+			 * No Error Handling on purpose for micscif_nodeqp_send().
+			 * If the remote node is lost we still want free the connection
+			 * requests on the self node.
+			 */
+			micscif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg, ep);
+
+			ep->conreqcnt--;
+			kfree(conreq);
+		}
+
+		// If a kSCIF accept is waiting wake it up
+		wake_up_interruptible(&ep->conwq);
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		break;
+	}
+	}
+	if (ep->port.port && !ep->accepted_ep)
+		put_scif_port(ep->port.port);
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	micscif_teardown_ep(ep);
+	micscif_add_epd_to_zombie_list(ep, !MI_EPLOCK_HELD);
+	return 0;
+}
+
+void
+scif_ref_rel(struct kref *kref_count)
+{
+	struct endpt *epd;
+	epd = container_of(kref_count, struct endpt, ref_count);
+	__scif_close((scif_epd_t)epd);
+}
+
+int
+scif_close(scif_epd_t epd)
+{
+	__scif_flush(epd);
+	put_kref_count(epd);
+	return 0;
+}
+EXPORT_SYMBOL(scif_close);
+
+/**
+ * scif_flush() - Flush the endpoint
+ * @epd:        The end point address returned from scif_open()
+ *
+ */
+int
+__scif_flush(scif_epd_t epd)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	struct endpt *tmpep;
+	struct list_head *pos, *tmpq;
+	unsigned long sflags;
+	int err;
+
+	might_sleep();
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+
+	spin_lock_irqsave(&ep->lock, sflags);
+
+	switch (ep->state) {
+	case SCIFEP_CONNECTED:
+	{
+		struct nodemsg msg;
+		struct endpt *fep = NULL;
+
+		init_waitqueue_head(&ep->disconwq);	// Wait for connection queue
+		WARN_ON(ep->files); // files should never be set while connected
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+
+		list_for_each_safe(pos, tmpq, &ms_info.mi_connected) {
+			tmpep = list_entry(pos, struct endpt, list);
+			if (tmpep == ep) {
+				list_del(pos);
+				put_conn_count(ep->remote_dev);
+				fep = tmpep;
+				spin_lock(&ep->lock);
+				break;
+			}
+		}
+
+		if (fep == NULL) {
+			// The other side has completed the disconnect before
+			// the end point can be removed from the list.  Therefore
+			// the ep lock is not locked, traverse the disconnected list
+			// to find the endpoint, release the conn lock.
+			list_for_each_safe(pos, tmpq, &ms_info.mi_disconnected) {
+				tmpep = list_entry(pos, struct endpt, list);
+				if (tmpep == ep) {
+					list_del(pos);
+					break;
+				}
+			}
+			spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+			break;
+		}
+
+		spin_unlock(&ms_info.mi_connlock);
+
+		msg.uop = SCIF_DISCNCT;
+		msg.src = ep->port;
+		msg.dst = ep->peer;
+		msg.payload[0] = (uint64_t)ep;
+		msg.payload[1] = ep->remote_ep;
+
+		err = micscif_nodeqp_send(ep->remote_dev, &msg, ep);
+
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		if (!err)
+			/* Now wait for the remote node to respond */
+			wait_event_timeout(ep->disconwq, 
+				(ep->state == SCIFEP_DISCONNECTED), NODE_ALIVE_TIMEOUT);
+		spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+		spin_lock(&ep->lock);
+		list_add_tail(&ep->list, &ms_info.mi_disconnected);
+		ep->state = SCIFEP_DISCONNECTED;
+		spin_unlock(&ep->lock);
+		spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+		// Wake up threads blocked in send and recv
+		wake_up_interruptible(&ep->sendwq);
+		wake_up_interruptible(&ep->recvwq);
+		break;
+	}
+	case SCIFEP_LISTENING:
+	{
+		ep->state = SCIFEP_CLLISTEN;
+
+		// If an accept is waiting wake it up
+		wake_up_interruptible(&ep->conwq);
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		break;
+	}
+	default:
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		break;
+	}
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	return 0;
+}
+
+/**
+ * scif_bind() - Bind a SCIF end point to a port ID.
+ * @epd:        The end point address returned from scif_open()
+ * @pn:         Port ID (number) to bind to
+ *
+ * Set the port ID associated with the end point and place it in the bound state.
+ * If a port ID of zero is requested a non zero port ID is allocated for it.
+ *
+ * Upon successful compltion the port id (number) will be returned.
+ *
+ * If the end point is not in the unbound state then return -EISCONN.
+ *
+ * If port ID zero is specified and allocation of a port ID fails -ENOSPC
+ * will be returned.
+ */
+int
+__scif_bind(scif_epd_t epd, uint16_t pn)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	unsigned long sflags;
+	int ret = 0;
+	int tmp;
+
+	pr_debug("SCIFAPI bind: ep %p %s requested port number %d\n", 
+		    ep, scif_ep_states[ep->state], pn);
+
+	might_sleep();
+
+	if (pn) {
+		/*
+		 * Modeled on http://www.ietf.org/rfc/rfc1700.txt?number=1700
+		 * SCIF ports below SCIF_ADMIN_PORT_END can only be bound by
+		 * system (or root) processes or by processes executed by
+		 * privileged users.
+		 */
+		if ( pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
+			ret = -EACCES;
+			goto scif_bind_admin_exit;
+		}
+	}
+
+	spin_lock_irqsave(&ep->lock, sflags);
+	if (ep->state == SCIFEP_BOUND) {
+		ret = -EINVAL;
+		goto scif_bind_exit;
+	} else if (ep->state != SCIFEP_UNBOUND) {
+		ret = -EISCONN;
+		goto scif_bind_exit;
+	}
+
+	if (pn) {
+		if ((tmp = rsrv_scif_port(pn)) != pn) {
+			ret = -EINVAL;
+			goto scif_bind_exit;
+		}
+	} else {
+		pn = get_scif_port();
+		if (!pn) {
+			ret = -ENOSPC;
+			goto scif_bind_exit;
+		}
+	}
+
+	ep->state = SCIFEP_BOUND;
+	ep->port.node = ms_info.mi_nodeid;
+	ep->port.port = pn;
+	ep->conn_async_state = ASYNC_CONN_IDLE;
+	ret = pn;
+	pr_debug("SCIFAPI bind: bound to port number %d\n", pn);
+
+scif_bind_exit:
+	spin_unlock_irqrestore(&ep->lock, sflags);
+scif_bind_admin_exit:
+	return ret;
+}
+
+int
+scif_bind(scif_epd_t epd, uint16_t pn)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_bind(epd, pn);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_bind);
+
+/**
+ * scif_listen() - Place the end point in the listening state
+ * @epd:        The end point address returned from scif_open()
+ * @backlog:	Maximum number of pending connection requests.
+ *
+ * The end point is placed in the listening state ready to accept connection
+ * requests.  The backlog paramter is saved to indicate the maximun number of
+ * connection requests from the remote node to save.  The end point is
+ * placed on a list of listening end points to allow a connection request to
+ * find it.
+ *
+ * Upon successful completion a zero is returned.
+ *
+ * If the end point is not in the bound state -EINVAL or -EISCONN is returned.
+ *
+ */
+int
+__scif_listen(scif_epd_t epd, int backlog)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	unsigned long sflags;
+
+	pr_debug("SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	might_sleep();
+	spin_lock_irqsave(&ep->lock, sflags);
+	switch (ep->state) {
+	case SCIFEP_ZOMBIE:
+		BUG_ON(SCIFEP_ZOMBIE == ep->state);
+	case SCIFEP_CLOSED:
+	case SCIFEP_CLOSING:
+	case SCIFEP_CLLISTEN:
+	case SCIFEP_UNBOUND:
+	case SCIFEP_DISCONNECTED:
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		return -EINVAL;
+	case SCIFEP_LISTENING:
+	case SCIFEP_CONNECTED:
+	case SCIFEP_CONNECTING:
+	case SCIFEP_MAPPING:
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		return -EISCONN;
+	case SCIFEP_BOUND:
+		break;
+	}
+
+	ep->state = SCIFEP_LISTENING;
+	ep->backlog = backlog;
+
+	ep->conreqcnt = 0;
+	ep->acceptcnt = 0;
+	INIT_LIST_HEAD(&ep->conlist);	// List of connection requests
+	init_waitqueue_head(&ep->conwq);	// Wait for connection queue
+	INIT_LIST_HEAD(&ep->li_accept);	// User ep list for ACCEPTREG calls
+	spin_unlock_irqrestore(&ep->lock, sflags);
+
+	// Listen status is complete so delete the qp information not needed
+	// on a listen before placing on the list of listening ep's
+	micscif_teardown_ep((void *)ep);
+	ep->qp_info.qp = NULL;
+
+	spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	list_add_tail(&ep->list, &ms_info.mi_listen);
+	spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+	return 0;
+}
+
+int
+scif_listen(scif_epd_t epd, int backlog)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_listen(epd, backlog);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_listen);
+
+#ifdef _MIC_SCIF_
+/*
+ * scif_p2p_connect:
+ * @node:     destination node id
+ *
+ * Try to setup a p2p connection between the current
+ * node and the desitination node. We need host to
+ * setup the initial p2p connections. So we send
+ * this message to the host which acts like proxy
+ * in setting up p2p connection.
+ */
+static int scif_p2p_connect(int node)
+{
+	struct micscif_dev *remote_dev = &scif_dev[node];
+	struct nodemsg msg;
+	int err;
+
+	pr_debug("%s:%d SCIF_NODE_CONNECT to host\n", __func__, __LINE__);
+	micscif_inc_node_refcnt(&scif_dev[SCIF_HOST_NODE], 1);
+
+	msg.dst.node = SCIF_HOST_NODE;
+	msg.payload[0] = node;
+	msg.uop = SCIF_NODE_CONNECT;
+
+	if ((err = micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE],
+		&msg, NULL))) {
+		printk(KERN_ERR "%s:%d error while sending SCIF_NODE_CONNECT to"
+			" node %d\n", __func__, __LINE__, node);
+		micscif_dec_node_refcnt(&scif_dev[SCIF_HOST_NODE], 1);
+		goto error;
+	}
+
+	wait_event_interruptible_timeout(remote_dev->sd_p2p_wq, 
+		(remote_dev->sd_state == SCIFDEV_RUNNING) ||
+		(remote_dev->sd_state == SCIFDEV_NOTPRESENT), NODE_ALIVE_TIMEOUT);
+
+	pr_debug("%s:%d SCIF_NODE_CONNECT state:%d\n", __func__, __LINE__, 
+						remote_dev->sd_state);
+	micscif_dec_node_refcnt(&scif_dev[SCIF_HOST_NODE], 1);
+error:
+	return err;
+}
+#endif
+
+static int scif_conn_func(struct endpt *ep)
+{
+	int err = 0;
+	struct nodemsg msg;
+	unsigned long sflags;
+	int term_sent = 0;
+
+	if ((err = micscif_reserve_dma_chan(ep))) {
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
+	// Initiate the first part of the endpoint QP setup
+	err = micscif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
+			ENDPT_QP_SIZE, ep->remote_dev);
+	if (err) {
+		printk(KERN_ERR "%s err %d qp_offset 0x%llx\n", 
+			__func__, err, ep->qp_info.qp_offset);
+		ep->state = SCIFEP_BOUND;
+		goto connect_error_simple;
+	}
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+
+	// Format connect message and send it
+	msg.src = ep->port;
+	msg.dst = ep->conn_port;
+	msg.uop = SCIF_CNCT_REQ;
+	msg.payload[0] = (uint64_t)ep;
+	msg.payload[1] = ep->qp_info.qp_offset;
+	if ((err = micscif_nodeqp_send(ep->remote_dev, &msg, ep))) {
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		goto connect_error_simple;
+	}
+	// Wait for request to be processed.
+	while ((err = wait_event_interruptible_timeout(ep->conwq, 
+		(ep->state != SCIFEP_CONNECTING), NODE_ALIVE_TIMEOUT)) <= 0) {
+		if (!err)
+			err = -ENODEV;
+
+		pr_debug("SCIFAPI connect: ep %p ^C detected\n", ep);
+		// interrupted out of the wait
+		if (!term_sent++) {
+			int bak_err = err;
+			msg.uop = SCIF_CNCT_TERM;
+			if (!(err = micscif_nodeqp_send(ep->remote_dev, &msg, ep))) {
+retry:
+				err = wait_event_timeout(ep->diswq, 
+					(ep->state != SCIFEP_CONNECTING), NODE_ALIVE_TIMEOUT);
+				if (!err && scifdev_alive(ep))
+					goto retry;
+				if (!err)
+					err = -ENODEV;
+				if (err > 0)
+					err = 0;
+			}
+			if (ep->state == SCIFEP_MAPPING) {
+				micscif_setup_qp_connect_response(ep->remote_dev,
+					ep->qp_info.qp, ep->qp_info.cnct_gnt_payload);
+				// Send grant nack
+				msg.uop = SCIF_CNCT_GNTNACK;
+				msg.payload[0] = ep->remote_ep;
+				/* No error handling for Notification messages */
+				micscif_nodeqp_send(ep->remote_dev, &msg, ep);
+			}
+			// Ensure after that even after a timeout the state of the end point is bound
+			ep->state = SCIFEP_BOUND;
+			if (bak_err)
+				err = bak_err;
+			break;
+		}
+	}
+
+	if (err > 0)
+		err = 0;
+
+	if (term_sent || err) {
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		goto connect_error_simple;
+	}
+
+	if (ep->state == SCIFEP_MAPPING) {
+		err = micscif_setup_qp_connect_response(ep->remote_dev,
+			ep->qp_info.qp, ep->qp_info.cnct_gnt_payload);
+
+		// If the resource to map the queue are not available then we need
+		// to tell the other side to terminate the accept
+		if (err) {
+			printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+
+			// Send grant nack
+			msg.uop = SCIF_CNCT_GNTNACK;
+			msg.payload[0] = ep->remote_ep;
+			/* No error handling for Notification messages */
+			micscif_nodeqp_send(ep->remote_dev, &msg, ep);
+
+			ep->state = SCIFEP_BOUND;
+			micscif_dec_node_refcnt(ep->remote_dev, 1);
+			goto connect_error_simple;
+		}
+
+		// Send a grant ack to inform the accept we are done mapping its resources.
+		msg.uop = SCIF_CNCT_GNTACK;
+		msg.payload[0] = ep->remote_ep;
+		if (!(err = micscif_nodeqp_send(ep->remote_dev, &msg, ep))) {
+			ep->state = SCIFEP_CONNECTED;
+			spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+			list_add_tail(&ep->list, &ms_info.mi_connected);
+			get_conn_count(ep->remote_dev);
+			spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+			pr_debug("SCIFAPI connect: ep %p connected\n", ep);
+		} else
+			ep->state = SCIFEP_BOUND;
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		goto connect_error_simple;
+
+	} else if (ep->state == SCIFEP_BOUND) {
+		pr_debug("SCIFAPI connect: ep %p connection refused\n", ep);
+		err = -ECONNREFUSED;
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		goto connect_error_simple;
+
+	} else {
+		pr_debug("SCIFAPI connect: ep %p connection interrupted\n", ep);
+		err = -EINTR;
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		goto connect_error_simple;
+	}
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+connect_error_simple:
+	return err;
+}
+
+/*
+ * micscif_conn_handler:
+ *
+ * Workqueue handler for servicing non-blocking SCIF connect
+ *
+ */
+void micscif_conn_handler(struct work_struct *work)
+{
+	struct endpt *ep;
+
+	do {
+		ep = NULL;
+		spin_lock(&ms_info.mi_nb_connect_lock);
+		if (!list_empty(&ms_info.mi_nb_connect_list)) {
+			ep = list_first_entry(&ms_info.mi_nb_connect_list, 
+					struct endpt, conn_list);
+			list_del(&ep->conn_list);
+		}
+		spin_unlock(&ms_info.mi_nb_connect_lock);
+		if (ep) {
+			ep->conn_err = scif_conn_func(ep);
+			wake_up_interruptible(&ep->conn_pend_wq);
+		}
+	} while (ep);
+}
+
+/**
+ * scif_connect() - Request a connection to a remote node
+ * @epd:        The end point address returned from scif_open()
+ * @dst:	Remote note address informtion
+ *
+ * The function requests a scif connection to the remote node
+ * identified by the dst parameter.  "dst" contains the remote node and
+ * port ids.
+ *
+ * Upon successful complete a zero will be returned.
+ *
+ * If the end point is not in the bound state -EINVAL will be returned.
+ *
+ * If during the connection sequence resource allocation fails the -ENOMEM
+ * will be returned.
+ *
+ * If the remote side is not responding to connection requests the caller may
+ * terminate this funciton with a signal.  If so a -EINTR will be returned.
+ */
+int
+__scif_connect(scif_epd_t epd, struct scif_portID *dst, bool non_block)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	unsigned long sflags;
+	int err = 0;
+#ifdef _MIC_SCIF_
+	struct micscif_dev *remote_dev;
+#endif
+
+	pr_debug("SCIFAPI connect: ep %p %s\n", ep, 
+					scif_ep_states[ep->state]);
+
+	if (dst->node > MAX_BOARD_SUPPORTED)
+		return -ENODEV;
+
+	might_sleep();
+
+#ifdef _MIC_SCIF_
+	remote_dev = &scif_dev[dst->node];
+	if ((SCIFDEV_INIT == remote_dev->sd_state ||
+		SCIFDEV_STOPPED == remote_dev->sd_state) && mic_p2p_enable)
+		if ((err = scif_p2p_connect(dst->node)))
+			return err;
+#endif
+
+	if (SCIFDEV_RUNNING != scif_dev[dst->node].sd_state &&
+		SCIFDEV_SLEEPING != scif_dev[dst->node].sd_state)
+		return -ENODEV;
+
+	spin_lock_irqsave(&ep->lock, sflags);
+	switch (ep->state) {
+	case SCIFEP_ZOMBIE:
+		BUG_ON(SCIFEP_ZOMBIE == ep->state);
+
+	case SCIFEP_CLOSED:
+	case SCIFEP_CLOSING:
+		err = -EINVAL;
+		break;
+
+	case SCIFEP_DISCONNECTED:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		else
+			err = -EINVAL;
+		break;
+
+	case SCIFEP_LISTENING:
+	case SCIFEP_CLLISTEN:
+		err = -EOPNOTSUPP;
+		break;
+
+	case SCIFEP_CONNECTING:
+	case SCIFEP_MAPPING:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			err = -EINPROGRESS;
+		else
+			err = -EISCONN;
+		break;
+
+	case SCIFEP_CONNECTED:
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		else
+			err = -EISCONN;
+		break;
+
+	case SCIFEP_UNBOUND:
+		if ((ep->port.port = get_scif_port()) == 0)
+			err = -ENOSPC;
+		else {
+			ep->port.node = ms_info.mi_nodeid;
+			ep->conn_async_state = ASYNC_CONN_IDLE;
+		}
+		/* Fall through */
+	case SCIFEP_BOUND:
+		/*
+		 * If a non-blocking connect has been already initiated (conn_async_state
+		 * is either ASYNC_CONN_INPROGRESS or ASYNC_CONN_FLUSH_WORK), the end point
+		 * could end up in SCIF_BOUND due an error in the connection
+		 * process (e.g., connnection refused)
+		 * If conn_async_state is ASYNC_CONN_INPROGRESS - transition to
+		 * ASYNC_CONN_FLUSH_WORK so that the error status can be collected.
+		 * If the state is already ASYNC_CONN_FLUSH_WORK - then set the error
+		 * to EINPROGRESS since some other thread is waiting to collect error status.
+		 */
+		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
+			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
+		else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
+			err = -EINPROGRESS;
+		else {
+			ep->conn_port = *dst;
+			init_waitqueue_head(&ep->sendwq);
+			init_waitqueue_head(&ep->recvwq);
+			init_waitqueue_head(&ep->conwq);
+			init_waitqueue_head(&ep->diswq);
+			ep->conn_async_state = 0;
+
+			if (unlikely(non_block))
+				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
+		}
+		break;
+	}
+
+	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
+			goto connect_simple_unlock1;
+
+	ep->state = SCIFEP_CONNECTING;
+	ep->remote_dev = &scif_dev[dst->node];
+	ep->sd_state = SCIFDEV_RUNNING;
+	ep->qp_info.qp->magic = SCIFEP_MAGIC;
+	ep->qp_info.qp->ep = (uint64_t)ep;
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+		init_waitqueue_head(&ep->conn_pend_wq);
+		spin_lock(&ms_info.mi_nb_connect_lock);
+		list_add_tail(&ep->conn_list, 
+				&ms_info.mi_nb_connect_list);
+		spin_unlock(&ms_info.mi_nb_connect_lock);
+		err = -EINPROGRESS;
+		queue_work(ms_info.mi_conn_wq, &ms_info.mi_conn_work);
+	}
+connect_simple_unlock1:
+	spin_unlock_irqrestore(&ep->lock, sflags);
+
+	if (err)
+		return err;
+	else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
+		flush_workqueue(ms_info.mi_conn_wq);
+		err = ep->conn_err;
+		spin_lock_irqsave(&ep->lock, sflags);
+		ep->conn_async_state = ASYNC_CONN_IDLE;
+		spin_unlock_irqrestore(&ep->lock, sflags);
+	} else {
+		err = scif_conn_func(ep);
+	}
+	return err;
+}
+
+int
+scif_connect(scif_epd_t epd, struct scif_portID *dst)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_connect(epd, dst, false);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_connect);
+
+/**
+ * scif_accept() - Accept a connection request from the remote node
+ * @epd:        The end point address returned from scif_open()
+ * @peer:       Filled in with pear node and port information
+ * @newepd:     New end point created for connection
+ * @flags:      Indicates sychronous or asynchronous mode
+ *
+ * The function accepts a connection request from the remote node.  Successful
+ * complete is indicate by a new end point being created and passed back
+ * to the caller for future reference.
+ *
+ * Upon successful complete a zero will be returned and the peer information
+ * will be filled in.
+ *
+ * If the end point is not in the listening state -EINVAL will be returned.
+ *
+ * If during the connection sequence resource allocation fails the -ENOMEM
+ * will be returned.
+ *
+ * If the function is called asynchronously and not connection request are
+ * pending it will return -EAGAIN.
+ *
+ * If the remote side is not sending any connection requests the caller may
+ * terminate this funciton with a signal.  If so a -EINTR will be returned.
+ */
+int
+__scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t *newepd, int flags)
+{
+	struct endpt *lep = (struct endpt *)epd;
+	struct endpt *cep;
+	struct conreq *conreq;
+	struct nodemsg msg;
+	unsigned long sflags;
+	int err;
+
+	pr_debug("SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
+
+	// Error if flags other than SCIF_ACCEPT_SYNC are set
+	if (flags & ~SCIF_ACCEPT_SYNC) {
+		pr_debug("SCIFAPI accept: ep %p invalid flags %x\n", lep, flags & ~SCIF_ACCEPT_SYNC);
+		return -EINVAL;
+	}
+
+	if (!peer || !newepd) {
+		pr_debug("SCIFAPI accept: ep %p peer %p or newepd %p NULL\n", 
+			lep, peer, newepd);
+		return -EINVAL;
+	}
+
+	might_sleep();
+	spin_lock_irqsave(&lep->lock, sflags);
+	if (lep->state != SCIFEP_LISTENING) {
+		pr_debug("SCIFAPI accept: ep %p not listending\n", lep);
+		spin_unlock_irqrestore(&lep->lock, sflags);
+		return -EINVAL;
+	}
+
+	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
+		// No connection request present and we do not want to wait
+		pr_debug("SCIFAPI accept: ep %p async request with nothing pending\n", lep);
+		spin_unlock_irqrestore(&lep->lock, sflags);
+		return -EAGAIN;
+	}
+
+retry_connection:
+	spin_unlock_irqrestore(&lep->lock, sflags);
+	lep->files = current ? current->files : NULL;
+	if ((err = wait_event_interruptible(lep->conwq, 
+		(lep->conreqcnt || (lep->state != SCIFEP_LISTENING)))) != 0) {
+		// wait was interrupted
+		pr_debug("SCIFAPI accept: ep %p ^C detected\n", lep);
+		return err;	// -ERESTARTSYS
+	}
+
+	if (lep->state != SCIFEP_LISTENING) {
+		return -EINTR;
+	}
+
+	spin_lock_irqsave(&lep->lock, sflags);
+
+	if (!lep->conreqcnt) {
+		goto retry_connection;
+	}
+
+	// Get the first connect request off the list
+	conreq = list_first_entry(&lep->conlist, struct conreq, list);
+	list_del(&conreq->list);
+	lep->conreqcnt--;
+	spin_unlock_irqrestore(&lep->lock, sflags);
+
+	// Fill in the peer information
+	peer->node = conreq->msg.src.node;
+	peer->port = conreq->msg.src.port;
+
+	// Create the connection endpoint
+	cep = (struct endpt *)kzalloc(sizeof(struct endpt), GFP_KERNEL);
+	if (!cep) {
+		pr_debug("SCIFAPI accept: ep %p new end point allocation failed\n", lep);
+		err = -ENOMEM;
+		goto scif_accept_error_epalloc;
+	}
+	spin_lock_init(&cep->lock);
+	mutex_init (&cep->sendlock);
+	mutex_init (&cep->recvlock);
+	cep->state = SCIFEP_CONNECTING;
+	cep->remote_dev = &scif_dev[peer->node];
+	cep->remote_ep = conreq->msg.payload[0];
+	cep->sd_state = SCIFDEV_RUNNING;
+
+	if (!scifdev_alive(cep)) {
+		err = -ENODEV;
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		goto scif_accept_error_qpalloc;
+	}
+
+	if (micscif_rma_ep_init(cep) < 0) {
+		pr_debug("SCIFAPI accept: ep %p new %p RMA EP init failed\n", lep, cep);
+		err = -ENOMEM;
+		goto scif_accept_error_qpalloc;
+	}
+
+	if ((err = micscif_reserve_dma_chan(cep))) {
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		goto scif_accept_error_qpalloc;
+	}
+
+	cep->qp_info.qp = (struct micscif_qp *)kzalloc(sizeof(struct micscif_qp), GFP_KERNEL);
+	if (!cep->qp_info.qp) {
+		printk(KERN_ERR "Port Qp Allocation Failed\n");
+		err = -ENOMEM;
+		goto scif_accept_error_qpalloc;
+	}
+
+	cep->qp_info.qp->magic = SCIFEP_MAGIC;
+	cep->qp_info.qp->ep = (uint64_t)cep;
+	micscif_inc_node_refcnt(cep->remote_dev, 1);
+	err = micscif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
+		conreq->msg.payload[1], ENDPT_QP_SIZE, cep->remote_dev);
+	if (err) {
+		pr_debug("SCIFAPI accept: ep %p new %p micscif_setup_qp_accept %d qp_offset 0x%llx\n", 
+			    lep, cep, err, cep->qp_info.qp_offset);
+		micscif_dec_node_refcnt(cep->remote_dev, 1);
+		goto scif_accept_error_map;
+	}
+
+	cep->port.node = lep->port.node;
+	cep->port.port = lep->port.port;
+	cep->peer.node = peer->node;
+	cep->peer.port = peer->port;
+	cep->accepted_ep = true;
+	init_waitqueue_head(&cep->sendwq); // Wait for data to be consumed
+	init_waitqueue_head(&cep->recvwq); // Wait for data to be produced
+	init_waitqueue_head(&cep->conwq);  // Wait for connection request
+
+	// Return the grant message
+	msg.uop = SCIF_CNCT_GNT;
+	msg.src = cep->port;
+	msg.payload[0] = cep->remote_ep;
+	msg.payload[1] = cep->qp_info.qp_offset;
+	msg.payload[2] = (uint64_t)cep;
+
+	err = micscif_nodeqp_send(cep->remote_dev, &msg, cep);
+
+	micscif_dec_node_refcnt(cep->remote_dev, 1);
+	if (err)
+		goto scif_accept_error_map;
+retry:
+	err = wait_event_timeout(cep->conwq, 
+		(cep->state != SCIFEP_CONNECTING), NODE_ACCEPT_TIMEOUT);
+	if (!err && scifdev_alive(cep))
+		goto retry;
+
+	if (!err) {
+		err = -ENODEV;
+		goto scif_accept_error_map;
+	}
+
+	if (err > 0)
+		err = 0;
+
+	kfree(conreq);
+
+	spin_lock_irqsave(&cep->lock, sflags);
+
+	if (cep->state == SCIFEP_CONNECTED) {
+		// Connect sequence complete return new endpoint information
+		*newepd = (scif_epd_t)cep;
+		spin_unlock_irqrestore(&cep->lock, sflags);
+		pr_debug("SCIFAPI accept: ep %p new %p returning new epnd point\n", lep, cep);
+		return 0;
+	}
+
+	if (cep->state == SCIFEP_CLOSING) {
+		// Remote failed to allocate resources and NAKed the grant.
+		// There is at this point nothing referencing the new end point.
+		spin_unlock_irqrestore(&cep->lock, sflags);
+		micscif_teardown_ep((void *)cep);
+		kfree(cep);
+
+		// If call with sync flag then go back and wait.
+		if (flags & SCIF_ACCEPT_SYNC) {
+			spin_lock_irqsave(&lep->lock, sflags);
+			goto retry_connection;
+		}
+
+		pr_debug("SCIFAPI accept: ep %p new %p remote failed to allocate resources\n", lep, cep);
+		return -EAGAIN;
+	}
+
+	// While connect was in progress the other side closed and sent a disconnect
+	// so set the end point status to closed but return anyway.  This will allow
+	// the caller to drain anything the other side may have put in the message queue.
+	*newepd = (scif_epd_t)cep;
+	spin_unlock_irqrestore(&cep->lock, sflags);
+	return 0;
+
+	// Error allocating or mapping resources
+scif_accept_error_map:
+	kfree(cep->qp_info.qp);
+
+scif_accept_error_qpalloc:
+	kfree(cep);
+
+scif_accept_error_epalloc:
+	micscif_inc_node_refcnt(&scif_dev[conreq->msg.src.node], 1);
+	// New reject the connection request due to lack of resources
+	msg.uop = SCIF_CNCT_REJ;
+	msg.dst.node = conreq->msg.src.node;
+	msg.dst.port = conreq->msg.src.port;
+	msg.payload[0] = conreq->msg.payload[0];
+	msg.payload[1] = conreq->msg.payload[1];
+	/* No error handling for Notification messages */
+	micscif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg, NULL);
+	micscif_dec_node_refcnt(&scif_dev[conreq->msg.src.node], 1);
+
+	kfree(conreq);
+	return err;
+}
+
+int
+scif_accept(scif_epd_t epd, struct scif_portID *peer, scif_epd_t *newepd, int flags)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_accept(epd, peer, newepd, flags);
+	if (ret == 0) {
+		kref_init(&((*newepd)->ref_count));
+	}
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_accept);
+
+/*
+ * scif_msg_param_check:
+ * @epd:        The end point address returned from scif_open()
+ * @len:	Length to receive
+ * @flags:	Syncronous or asynchronous access
+ *
+ * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
+ */
+static inline int
+scif_msg_param_check(scif_epd_t epd, int len, int flags)
+{
+	int ret = -EINVAL;
+
+	if (len < 0)
+		goto err_ret;
+
+	if (flags && (!(flags & SCIF_RECV_BLOCK)))
+		goto err_ret;
+
+	ret = 0;
+
+err_ret:
+	return ret;
+}
+
+#define SCIF_BLAST     (1 << 1)	/* Use bit 1 of flags field */
+
+#ifdef SCIF_BLAST
+/*
+ * Added a temporary implementation of the exception path.
+ * The cost to the normal path is 1 local variable (set once and
+ * tested once) plus 2 tests for the 'blast' flag.
+ * This only apply to the card side kernel API.
+ */
+#ifndef _MIC_SCIF_
+#undef SCIF_BLAST
+#endif
+#endif
+
+/**
+ * _scif_send() - Send data to connection queue
+ * @epd:        The end point address returned from scif_open()
+ * @msg:	Address to place data
+ * @len:	Length to receive
+ * @flags:	Syncronous or asynchronous access
+ *
+ * This function sends a packet of data to the queue * created by the
+ * connection establishment sequence.  It returns when the packet has
+ * been completely sent.
+ *
+ * Successful completion returns the number of bytes sent.
+ *
+ * If the end point is not in the connect state returns -ENOTCONN;
+ *
+ * This function may be interrupted by a signal and will return -EINTR.
+ */
+int
+_scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	struct nodemsg notif_msg;
+	unsigned long sflags;
+	size_t curr_xfer_len = 0;
+	size_t sent_len = 0;
+	size_t write_count;
+	int ret;
+#ifdef SCIF_BLAST
+	int tl;
+#endif
+
+	if (flags & SCIF_SEND_BLOCK)
+		might_sleep();
+
+#ifdef SCIF_BLAST
+	if (flags & SCIF_BLAST) {
+		/*
+		 * Do a decent try to acquire lock (~100 uSec)
+		 */
+		for (ret = tl = 0; ret < 100 && !tl; ret++) {
+			tl = spin_trylock_irqsave(&ep->lock, sflags);
+			cpu_relax();
+		}
+	} else {
+		tl = 1;
+		spin_lock_irqsave(&ep->lock, sflags);
+	}
+#else
+	spin_lock_irqsave(&ep->lock, sflags);
+#endif
+
+	while (sent_len != len) {
+		if (ep->state == SCIFEP_DISCONNECTED) {
+			ret = (int)(sent_len ? sent_len : -ECONNRESET);
+			goto unlock_dec_return;
+		}
+		if (ep->state != SCIFEP_CONNECTED) {
+			ret = (int)(sent_len ? sent_len : -ENOTCONN);
+			goto unlock_dec_return;
+		}
+		if (!scifdev_alive(ep)) {
+			ret = (int) (sent_len ? sent_len : -ENODEV);
+			goto unlock_dec_return;
+		}
+		write_count = micscif_rb_space(&ep->qp_info.qp->outbound_q);
+		if (write_count) {
+			/*
+			 * Best effort to send as much data as there
+			 * is space in the RB particularly important for the
+			 * Non Blocking case.
+			 */
+			curr_xfer_len = min(len - sent_len, write_count);
+			ret = micscif_rb_write(&ep->qp_info.qp->outbound_q, msg,
+						(uint32_t)curr_xfer_len);
+			if (ret < 0) {
+				ret = -EFAULT;
+				goto unlock_dec_return;
+			}
+			if (ret) {
+				spin_unlock_irqrestore(&ep->lock, sflags);
+				/*
+				 * If there is space in the RB and we have the
+				 * EP lock held then writing to the RB should
+				 * succeed. Releasing spin lock before asserting
+				 * to avoid deadlocking the system.
+				 */
+				BUG_ON(ret);
+			}
+			/*
+			 * Success. Update write pointer.
+			 */
+			micscif_rb_commit(&ep->qp_info.qp->outbound_q);
+#ifdef SCIF_BLAST
+			if (flags & SCIF_BLAST) {
+				/*
+				 * Bypass-path; set flag int the host side node_qp
+				 * and ring the doorbell. Host will wake-up all
+				 * listeners, such that the message will be seen.
+				 * Need micscif_send_host_intr() to be non-static.
+				 */
+				extern int micscif_send_host_intr(struct micscif_dev *, uint32_t);
+				ep->remote_dev->qpairs->remote_qp->blast = 1;
+				smp_wmb();    /* Sufficient or need sfence? */
+				micscif_send_host_intr(ep->remote_dev, 0);
+			} else {
+				/*
+				 * Normal path: send notification on the
+				 * node_qp ring buffer and ring the doorbell.
+				 */
+				notif_msg.src = ep->port;
+				notif_msg.uop = SCIF_CLIENT_SENT;
+				notif_msg.payload[0] = ep->remote_ep;
+				if ((ret = micscif_nodeqp_send(ep->remote_dev, &notif_msg, ep))) {
+					ret = sent_len ? sent_len : ret;
+					goto unlock_dec_return;
+				}
+			}
+#else
+			/*
+			 * Send a notification to the peer about the
+			 * produced data message.
+			 */
+			notif_msg.src = ep->port;
+			notif_msg.uop = SCIF_CLIENT_SENT;
+			notif_msg.payload[0] = ep->remote_ep;
+			if ((ret = micscif_nodeqp_send(ep->remote_dev, &notif_msg, ep))) {
+				ret = (int)(sent_len ? sent_len : ret);
+				goto unlock_dec_return;
+			}
+#endif
+			sent_len += curr_xfer_len;
+			msg = (char *)msg + curr_xfer_len;
+			continue;
+		}
+		curr_xfer_len = min(len - sent_len, (size_t)(ENDPT_QP_SIZE - 1));
+		/*
+		 * Not enough space in the RB. Return in the Non Blocking case.
+		 */
+		if (!(flags & SCIF_SEND_BLOCK)) {
+			ret = (int)sent_len;
+			goto unlock_dec_return;
+		}
+#ifdef SCIF_BLAST
+		/*
+		 * Flags SCIF_BLAST and SCIF_SEND_BLOCK are mutually
+		 * exclusive, so if we get here we know that SCIF_BLAST
+		 * was not set and thus we _do_ have the spinlock.
+		 * No need to check variable tl here
+		 */
+#endif
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		/*
+		 * Wait for a message now in the Blocking case.
+		 */
+		if ((ret = wait_event_interruptible(ep->sendwq, 
+			(SCIFEP_CONNECTED != ep->state) ||
+			(micscif_rb_space(&ep->qp_info.qp->outbound_q)
+				>= curr_xfer_len) || (!scifdev_alive(ep))))) {
+			ret = (int) (sent_len ? sent_len : ret);
+			goto dec_return;
+		}
+		spin_lock_irqsave(&ep->lock, sflags);
+	}
+	ret = len;
+unlock_dec_return:
+#ifdef SCIF_BLAST
+	if (tl)
+#endif
+	spin_unlock_irqrestore(&ep->lock, sflags);
+dec_return:
+	return ret;
+}
+
+/**
+ * _scif_recv() - Recieve data from connection queue
+ * @epd:        The end point address returned from scif_open()
+ * @msg:	Address to place data
+ * @len:	Length to receive
+ * @flags:	Syncronous or asynchronous access
+ * @touser: package send to user buffer or kernel
+ *
+ * This function requests to receive a packet of data from the queue
+ * created by the connection establishment sequence.  It reads the amount
+ * of data requested before returning.
+ *
+ * This function differs from the scif_send() by also returning data if the
+ * end point is in the disconnected state and data is present.
+ *
+ * Successful completion returns the number of bytes read.
+ *
+ * If the end point is not in the connect state or in the disconnected state
+ * with data prosent it returns -ENOTCONN;
+ *
+ * This function may be interrupted by a signal and will return -EINTR.
+ */
+int
+_scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	int read_size;
+	struct endpt *ep = (struct endpt *)epd;
+	unsigned long sflags;
+	struct nodemsg notif_msg;
+	size_t curr_recv_len = 0;
+	size_t remaining_len = len;
+	size_t read_count;
+	int ret;
+
+	if (flags & SCIF_RECV_BLOCK)
+		might_sleep();
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+	spin_lock_irqsave(&ep->lock, sflags);
+	while (remaining_len) {
+		if (ep->state != SCIFEP_CONNECTED &&
+			ep->state != SCIFEP_DISCONNECTED) {
+			ret = (int) (len - remaining_len) ?
+				(int) (len - remaining_len) : -ENOTCONN;
+			goto unlock_dec_return;
+		}
+		read_count = micscif_rb_count(&ep->qp_info.qp->inbound_q,
+					(int) remaining_len);
+		if (read_count) {
+			/*
+			 * Best effort to recv as much data as there
+			 * are bytes to read in the RB particularly
+			 * important for the Non Blocking case.
+			 */
+			curr_recv_len = min(remaining_len, read_count);
+			read_size = micscif_rb_get_next(
+					&ep->qp_info.qp->inbound_q,
+					msg, (int) curr_recv_len);
+			if (read_size < 0){
+				/* only could happen when copy to USER buffer
+				*/
+				ret = -EFAULT;
+				goto unlock_dec_return;
+			}
+			if (read_size != curr_recv_len) {
+				spin_unlock_irqrestore(&ep->lock, sflags);
+				/*
+				 * If there are bytes to be read from the RB and
+				 * we have the EP lock held then reading from
+				 * RB should succeed. Releasing spin lock before
+				 * asserting to avoid deadlocking the system.
+				 */
+				BUG_ON(read_size != curr_recv_len);
+			}
+			if (ep->state == SCIFEP_CONNECTED) {
+				/*
+				 * Update the read pointer only if the endpoint is
+				 * still connected else the read pointer might no
+				 * longer exist since the peer has freed resources!
+				 */
+				micscif_rb_update_read_ptr(&ep->qp_info.qp->inbound_q);
+				/*
+				 * Send a notification to the peer about the
+				 * consumed data message only if the EP is in
+				 * SCIFEP_CONNECTED state.
+				 */
+				notif_msg.src = ep->port;
+				notif_msg.uop = SCIF_CLIENT_RCVD;
+				notif_msg.payload[0] = ep->remote_ep;
+				if ((ret = micscif_nodeqp_send(ep->remote_dev, &notif_msg, ep))) {
+					ret = (len - (int)remaining_len) ?
+						(len - (int)remaining_len) : ret;
+					goto unlock_dec_return;
+				}
+			}
+			remaining_len -= curr_recv_len;
+			msg = (char *)msg + curr_recv_len;
+			continue;
+		}
+		curr_recv_len = min(remaining_len, (size_t)(ENDPT_QP_SIZE - 1));
+		/*
+		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
+		 * we will keep looping forever.
+		 */
+		if (ep->state == SCIFEP_DISCONNECTED) {
+			ret = (len - (int)remaining_len) ?
+				(len - (int)remaining_len) : -ECONNRESET;
+			goto unlock_dec_return;
+		}
+		/*
+		 * Return in the Non Blocking case if there is no data
+		 * to read in this iteration.
+		 */
+		if (!(flags & SCIF_RECV_BLOCK)) {
+			ret = len - (int)remaining_len;
+			goto unlock_dec_return;
+		}
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		/*
+		 * Wait for a message now in the Blocking case.
+		 * or until other side disconnects.
+		 */
+		if ((ret = wait_event_interruptible(ep->recvwq, 
+				(SCIFEP_CONNECTED != ep->state) ||
+				(micscif_rb_count(&ep->qp_info.qp->inbound_q,
+				 curr_recv_len) >= curr_recv_len) || (!scifdev_alive(ep))))) {
+			ret = (len - remaining_len) ?
+				(len - (int)remaining_len) : ret;
+			goto dec_return;
+		}
+		micscif_inc_node_refcnt(ep->remote_dev, 1);
+		spin_lock_irqsave(&ep->lock, sflags);
+	}
+	ret = len;
+unlock_dec_return:
+	spin_unlock_irqrestore(&ep->lock, sflags);
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+dec_return:
+	return ret;
+}
+
+
+/**
+ * scif_user_send() - Send data to connection queue
+ * @epd:        The end point address returned from scif_open()
+ * @msg:	Address to place data
+ * @len:	Length to receive
+ * @flags:	Syncronous or asynchronous access
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_send().
+ */
+int
+scif_user_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	int err = 0;
+	int sent_len = 0;
+	char *tmp;
+	int loop_len;
+	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));;
+	pr_debug("SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	if (!len)
+		return 0;
+
+	if ((err = scif_msg_param_check(epd, len, flags)))
+		goto send_err;
+
+	if (!(tmp = kmalloc(chunk_len, GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto send_err;
+	}
+	err = 0;
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+	/*
+	 * Grabbing the lock before breaking up the transfer in
+	 * multiple chunks is required to ensure that messages do
+	 * not get fragmented and reordered.
+	 */
+	mutex_lock(&ep->sendlock);
+	
+	while (sent_len != len) {
+		msg = (void *)((char *)msg + err);
+		loop_len = len - sent_len;
+		loop_len = min(chunk_len, loop_len);
+		if (copy_from_user(tmp, msg, loop_len)) {
+			err = -EFAULT;
+			goto send_free_err;
+		}
+		err = _scif_send(epd, (void *)tmp, loop_len, flags);
+		if (err < 0) {
+			goto send_free_err;
+		}
+		sent_len += err;
+		if (err !=loop_len) {
+			goto send_free_err;
+		}
+	}
+send_free_err:
+	mutex_unlock(&ep->sendlock);
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	kfree(tmp);
+send_err:
+	return err < 0 ? err : sent_len;
+}
+
+/**
+ * scif_user_recv() - Recieve data from connection queue
+ * @epd:        The end point address returned from scif_open()
+ * @msg:	Address to place data
+ * @len:	Length to receive
+ * @flags:	Syncronous or asynchronous access
+ *
+ * This function is called from the driver IOCTL entry point
+ * only and is a wrapper for _scif_recv().
+ */
+int
+scif_user_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	int err = 0;
+	int recv_len = 0;
+	char *tmp;
+	int loop_len;
+	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));;
+	pr_debug("SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	if (!len)
+		return 0;
+
+	if ((err = scif_msg_param_check(epd, len, flags)))
+		goto recv_err;
+
+	if (!(tmp = kmalloc(chunk_len, GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto recv_err;
+	}
+	err = 0;
+	/*
+	 * Grabbing the lock before breaking up the transfer in
+	 * multiple chunks is required to ensure that messages do
+	 * not get fragmented and reordered.
+	 */
+	mutex_lock(&ep->recvlock);
+
+	while (recv_len != len) {
+		msg = (void *)((char *)msg + err);
+		loop_len = len - recv_len;
+		loop_len = min(chunk_len, loop_len);
+		if ((err = _scif_recv(epd, tmp, loop_len, flags)) < 0)
+			goto recv_free_err;
+		if (copy_to_user(msg, tmp, err)) {
+			err = -EFAULT;
+			goto recv_free_err;
+		}
+		recv_len += err;
+		if (err !=loop_len) {
+			goto recv_free_err;
+		}
+	}
+recv_free_err:
+	mutex_unlock(&ep->recvlock);
+	kfree(tmp);
+recv_err:
+	return err < 0 ? err : recv_len;
+}
+
+#ifdef SCIF_BLAST
+/*
+ * Added a temporary implementation of the exception path.
+ * The cost to the normal path testing of 2 flag bits instead
+ * of just one and a change to condition for node-wakeup.
+ */
+#endif
+
+/**
+ * scif_send() - Send data to connection queue
+ * @epd:        The end point address returned from scif_open()
+ * @msg:	Address to place data
+ * @len:	Length to receive
+ * @flags:	Syncronous or asynchronous access
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_send().
+ */
+int
+__scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	int ret;
+
+	pr_debug("SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+	if (!len)
+		return 0;
+
+#ifdef SCIF_BLAST
+	/*
+	 * KAA: this is same code as scif_msg_param_check(),
+	 * but since that routine is shared with scif_recv
+	 * I thought is safer to replicate code here.
+	 */
+	if (len < 0)
+		return -EINVAL;
+
+	if (flags && !(flags & (SCIF_SEND_BLOCK | SCIF_BLAST)))
+		return -EINVAL;
+
+	if ((flags & (SCIF_SEND_BLOCK | SCIF_BLAST)) ==
+			(SCIF_SEND_BLOCK | SCIF_BLAST))
+		return -EINVAL;
+#else
+	if ((ret = scif_msg_param_check(epd, len, flags)))
+		return ret;
+#endif
+	/*
+	 * Cannot block while waiting for node to wake up
+	 * if non blocking messaging mode is requested. Return
+	 * ENODEV if the remote node is idle.
+	 */
+	if (!(flags & SCIF_SEND_BLOCK) && ep->remote_dev &&
+		SCIF_NODE_IDLE == atomic_long_read(
+			&ep->remote_dev->scif_ref_cnt))
+		return -ENODEV;
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+
+	/*
+	 * Grab the mutex lock in the blocking case only
+	 * to ensure messages do not get fragmented/reordered.
+	 * The non blocking mode is protected using spin locks
+	 * in _scif_send().
+	 */
+	if (flags & SCIF_SEND_BLOCK)
+		mutex_lock(&ep->sendlock);
+
+	ret = _scif_send(epd, msg, len, flags);
+
+	if (flags & SCIF_SEND_BLOCK)
+		mutex_unlock(&ep->sendlock);
+
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	return ret;
+}
+
+int
+scif_send(scif_epd_t epd, void *msg, int len, int flags)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_send(epd, msg, len, flags);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_send);
+
+/**
+ * scif_recv() - Recieve data from connection queue
+ * @epd:        The end point address returned from scif_open()
+ * @msg:	Address to place data
+ * @len:	Length to receive
+ * @flags:	Syncronous or asynchronous access
+ *
+ * This function is called from the kernel mode only and is
+ * a wrapper for _scif_recv().
+ */
+int
+__scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	int ret;
+
+	pr_debug("SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	if (!len)
+		return 0;
+
+	if ((ret = scif_msg_param_check(epd, len, flags)))
+		return ret;
+
+	/*
+	 * Cannot block while waiting for node to wake up
+	 * if non blocking messaging mode is requested. Return
+	 * ENODEV if the remote node is idle.
+	 */
+	if (!flags && ep->remote_dev &&
+		SCIF_NODE_IDLE == atomic_long_read(
+			&ep->remote_dev->scif_ref_cnt))
+		return -ENODEV;
+
+	/*
+	 * Grab the mutex lock in the blocking case only
+	 * to ensure messages do not get fragmented/reordered.
+	 * The non blocking mode is protected using spin locks
+	 * in _scif_send().
+	 */
+	if (flags & SCIF_RECV_BLOCK)
+		mutex_lock(&ep->recvlock);
+
+	ret = _scif_recv(epd, msg, len, flags);
+
+	if (flags & SCIF_RECV_BLOCK)
+		mutex_unlock(&ep->recvlock);
+
+	return ret;
+}
+
+int
+scif_recv(scif_epd_t epd, void *msg, int len, int flags)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_recv(epd, msg, len, flags);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_recv);
+
+/**
+ * __scif_pin_pages - __scif_pin_pages() pins the physical pages which back
+ * the range of virtual address pages starting at addr and continuing for
+ * len bytes. addr and len are constrained to be multiples of the page size.
+ * A successful  scif_register() call returns an opaque pointer value
+ * which may be used in subsequent calls to scif_register_pinned_pages().
+ *
+ * Return Values
+ * Upon successful completion, __scif_pin_pages() returns a
+ * scif_pinned_pages_t value else an apt error is returned as documented
+ * in scif.h. Protections of the set of pinned pages are also returned by
+ * reference via out_prot.
+ */
+int
+__scif_pin_pages(void *addr, size_t len, int *out_prot,
+		int map_flags, scif_pinned_pages_t *pages)
+{
+	struct scif_pinned_pages *pinned_pages;
+	int nr_pages, err = 0, i;
+	bool vmalloc_addr = false;
+	bool try_upgrade = false;
+	int prot = *out_prot;
+	int ulimit = 0;
+	struct mm_struct *mm = NULL;
+
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
+		return -EINVAL;
+	ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if ((!len) ||
+		(align_low((uint64_t)addr, PAGE_SIZE) != (uint64_t)addr) ||
+		(align_low((uint64_t)len, PAGE_SIZE) != (uint64_t)len))
+		return -EINVAL;
+
+	might_sleep();
+
+	nr_pages = (int)(len >> PAGE_SHIFT);
+
+	/* Allocate a set of pinned pages */
+	if (!(pinned_pages = micscif_create_pinned_pages(nr_pages, prot)))
+		return -ENOMEM;
+
+	if (unlikely(map_flags & SCIF_MAP_KERNEL)) {
+		if (is_vmalloc_addr(addr))
+			vmalloc_addr = true;
+
+		for (i = 0; i < nr_pages; i++) {
+			if (unlikely(vmalloc_addr))
+				pinned_pages->pages[i] =
+					vmalloc_to_page((char *)addr + (i * PAGE_SIZE) );
+			else
+				pinned_pages->pages[i] =
+					virt_to_page((char *)addr + (i * PAGE_SIZE) );
+			pinned_pages->num_pages[i] = 1;
+			pinned_pages->nr_contig_chunks++;
+		}
+		pinned_pages->nr_pages = nr_pages;
+		pinned_pages->map_flags = SCIF_MAP_KERNEL;
+	} else {
+		if (prot == SCIF_PROT_READ)
+			try_upgrade = true;
+		prot |= SCIF_PROT_WRITE;
+retry:
+		mm = current->mm;
+		down_write(&mm->mmap_sem);
+		if (ulimit) {
+			err = __scif_check_inc_pinned_vm(mm, nr_pages);
+			if (err) {
+				up_write(&mm->mmap_sem);
+				pinned_pages->nr_pages = 0;
+				goto error_unmap;
+			}
+		}
+
+		pinned_pages->nr_pages = get_user_pages(
+				current,
+				mm,
+				(uint64_t)addr,
+				nr_pages,
+				!!(prot & SCIF_PROT_WRITE),
+				0,
+				pinned_pages->pages,
+				pinned_pages->vma);
+		up_write(&mm->mmap_sem);
+		if (nr_pages == pinned_pages->nr_pages) {
+#ifdef RMA_DEBUG
+			atomic_long_add_return(nr_pages, &ms_info.rma_pin_cnt);
+#endif
+			micscif_detect_large_page(pinned_pages, addr);
+		} else {
+			if (try_upgrade) {
+				if (ulimit)
+					__scif_dec_pinned_vm_lock(mm, nr_pages, 0);
+#ifdef RMA_DEBUG
+				WARN_ON(atomic_long_sub_return(1,
+						&ms_info.rma_mm_cnt) < 0);
+#endif
+				/* Roll back any pinned pages */
+				for (i = 0; i < pinned_pages->nr_pages; i++) {
+					if (pinned_pages->pages[i])
+						page_cache_release(pinned_pages->pages[i]);
+				}
+				prot &= ~SCIF_PROT_WRITE;
+				try_upgrade = false;
+				goto retry;
+			}
+		}
+		pinned_pages->map_flags = 0;
+	}
+
+	if (pinned_pages->nr_pages < nr_pages) {
+		err = -EFAULT;
+		pinned_pages->nr_pages = nr_pages;
+		goto dec_pinned;
+	}
+
+	*out_prot = prot;
+	atomic_set(&pinned_pages->ref_count, nr_pages);
+	*pages = pinned_pages;
+	return err;
+dec_pinned:
+	if (ulimit)
+		__scif_dec_pinned_vm_lock(mm, nr_pages, 0);
+	/* Something went wrong! Rollback */
+error_unmap:
+	pinned_pages->nr_pages = nr_pages;
+	micscif_destroy_pinned_pages(pinned_pages);
+	*pages = NULL;
+	pr_debug("%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
+	return err;
+
+}
+
+/**
+ * scif_pin_pages - scif_pin_pages() pins the physical pages which back
+ * the range of virtual address pages starting at addr and continuing for
+ * len bytes. addr and len are constrained to be multiples of the page size.
+ * A successful  scif_register() call returns an opaque pointer value
+ * which may be used in subsequent calls to scif_register_pinned_pages().
+ *
+ * Return Values
+ * Upon successful completion, scif_register() returns a
+ * scif_pinned_pages_t value else an apt error is returned as documented
+ * in scif.h
+ */
+int
+scif_pin_pages(void *addr, size_t len, int prot,
+		int map_flags, scif_pinned_pages_t *pages)
+{
+	return __scif_pin_pages(addr, len, &prot, map_flags, pages);
+}
+EXPORT_SYMBOL(scif_pin_pages);
+
+/**
+ * scif_unpin_pages: Unpin a set of pages
+ *
+ * Return Values:
+ * Upon successful completion, scif_unpin_pages() returns 0;
+ * else an apt error is returned as documented in scif.h
+ */
+int
+scif_unpin_pages(scif_pinned_pages_t pinned_pages)
+{
+	int err = 0, ret;
+
+	if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
+		return -EINVAL;
+
+	ret = atomic_sub_return((int32_t)pinned_pages->nr_pages, 
+			&pinned_pages->ref_count);
+	BUG_ON(ret < 0);
+
+	/*
+	 * Destroy the window if the ref count for this set of pinned
+	 * pages has dropped to zero. If it is positive then there is
+	 * a valid registered window which is backed by these pages and
+	 * it will be destroyed once all such windows are unregistered.
+	 */
+	if (!ret)
+		err = micscif_destroy_pinned_pages(pinned_pages);
+
+	return err;
+}
+EXPORT_SYMBOL(scif_unpin_pages);
+
+/**
+ * scif_register_pinned_pages: Mark a memory region for remote access.
+ *
+ * The scif_register_pinned_pages() function opens a window, a range
+ * of whole pages of the registered address space of the endpoint epd,
+ * starting at offset po. The value of po, further described below, is
+ * a function of the parameters offset and pinned_pages, and the value
+ * of map_flags. Each page of the window represents a corresponding
+ * physical memory page of pinned_pages; the length of the window is
+ * the same as the length of pinned_pages. A successful scif_register()
+ * call returns po as the return value.
+ *
+ * Return Values
+ *	Upon successful completion, scif_register_pinned_pages() returns
+ *	the offset at which the mapping was placed (po);
+ *	else an apt error is returned as documented in scif.h
+ */
+off_t
+__scif_register_pinned_pages(scif_epd_t epd,
+	scif_pinned_pages_t pinned_pages, off_t offset, int map_flags)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	uint64_t computed_offset;
+	struct reg_range_t *window;
+	int err;
+	size_t len;
+
+#ifdef DEBUG
+	/* Bad EP */
+	if (!ep || !pinned_pages || pinned_pages->magic != SCIFEP_MAGIC)
+		return -EINVAL;
+#endif
+	/* Unsupported flags */
+	if (map_flags & ~SCIF_MAP_FIXED)
+		return -EINVAL;
+
+	len = pinned_pages->nr_pages << PAGE_SHIFT;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+		((align_low(offset, PAGE_SIZE) != offset) ||
+		(offset < 0) ||
+		(offset + (off_t)len < offset)))
+		return -EINVAL;
+
+	might_sleep();
+
+	if ((err = verify_epd(ep)))
+		return err;
+
+	/* Compute the offset for this registration */
+	if ((err = micscif_get_window_offset(ep, map_flags, offset,
+			len, &computed_offset)))
+		return err;
+
+	/* Allocate and prepare self registration window */
+	if (!(window = micscif_create_window(ep, pinned_pages->nr_pages,
+			computed_offset, false))) {
+		micscif_free_window_offset(ep, computed_offset, len);
+		return -ENOMEM;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->nr_pages = pinned_pages->nr_pages;
+	window->nr_contig_chunks = pinned_pages->nr_contig_chunks;
+	window->prot = pinned_pages->prot;
+
+	/*
+	 * This set of pinned pages now belongs to this window as well.
+	 * Assert if the ref count is zero since it is an error to
+	 * pass pinned_pages to scif_register_pinned_pages() after
+	 * calling scif_unpin_pages().
+	 */
+	if (!atomic_add_unless(&pinned_pages->ref_count, 
+				(int32_t)pinned_pages->nr_pages, 0))
+		BUG_ON(1);
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+
+	if ((err = micscif_send_alloc_request(ep, window))) {
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Prepare the remote registration window */
+	if ((err = micscif_prep_remote_window(ep, window))) {
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		micscif_set_nr_pages(ep->remote_dev, window);
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	if ((err = micscif_send_scif_register(ep, window))) {
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+
+	/* No further failures expected. Insert new window */
+	mutex_lock(&ep->rma_info.rma_lock);
+	set_window_ref_count(window, pinned_pages->nr_pages);
+	micscif_insert_window(window, &ep->rma_info.reg_list);
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	return computed_offset;
+error_unmap:
+	micscif_destroy_window(ep, window);
+	printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+
+off_t
+scif_register_pinned_pages(scif_epd_t epd,
+	scif_pinned_pages_t pinned_pages, off_t offset, int map_flags)
+{
+	off_t ret;
+	get_kref_count(epd);
+	ret = __scif_register_pinned_pages(epd, pinned_pages, offset, map_flags);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_register_pinned_pages);
+
+/**
+ * scif_get_pages - Add references to remote registered pages
+ *
+ * scif_get_pages() returns the addresses of the physical pages represented
+ * by those pages of the registered address space of the peer of epd, starting
+ * at offset offset and continuing for len bytes. offset and len are constrained
+ * to be multiples of the page size.
+ *
+ * Return Values
+ *	Upon successful completion, scif_get_pages() returns 0;
+ *	else an apt error is returned as documented in scif.h.
+ */
+int
+__scif_get_pages(scif_epd_t epd, off_t offset, size_t len, struct scif_range **pages)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	struct micscif_rma_req req;
+	struct reg_range_t *window = NULL;
+	int nr_pages, err, i;
+
+	pr_debug("SCIFAPI get_pinned_pages: ep %p %s offset 0x%lx len 0x%lx\n", 
+		ep, scif_ep_states[ep->state], offset, len);
+
+	if ((err = verify_epd(ep)))
+		return err;
+
+	if ((!len) ||
+		(offset < 0) ||
+		(offset + len < offset) ||
+		(align_low((uint64_t)offset, PAGE_SIZE) != (uint64_t)offset) ||
+		(align_low((uint64_t)len, PAGE_SIZE) != (uint64_t)len))
+		return -EINVAL;
+
+	nr_pages = len >> PAGE_SHIFT;
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = WINDOW_SINGLE;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if ((err = micscif_query_window(&req))) {
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+	RMA_MAGIC(window);
+
+	/* Allocate scif_range */
+	if (!(*pages = kzalloc(sizeof(struct scif_range), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* Allocate phys addr array */
+	if (!((*pages)->phys_addr = scif_zalloc(nr_pages * sizeof(dma_addr_t)))) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+#ifndef _MIC_SCIF_
+	/* Allocate virtual address array */
+	if (!((*pages)->va = scif_zalloc(nr_pages * sizeof(void *)))) {
+		err = -ENOMEM;
+		goto error;
+	}
+#endif
+	/* Populate the values */
+	(*pages)->cookie = window;
+	(*pages)->nr_pages = nr_pages;
+	(*pages)->prot_flags = window->prot;
+
+	for (i = 0; i < nr_pages; i++) {
+		(*pages)->phys_addr[i] =
+#if !defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
+			is_self_scifdev(ep->remote_dev) ?
+				micscif_get_dma_addr(window, offset + (i * PAGE_SIZE),
+				NULL, NULL, NULL) : window->phys_addr[i];
+#else
+			get_phys_addr(micscif_get_dma_addr(window, offset + (i * PAGE_SIZE),
+				NULL, NULL, NULL), ep->remote_dev);
+#endif
+#ifndef _MIC_SCIF_
+		if (!is_self_scifdev(ep->remote_dev))
+			(*pages)->va[i] =
+				get_per_dev_ctx(ep->remote_dev->sd_node - 1)->aper.va +
+				(*pages)->phys_addr[i] -
+				get_per_dev_ctx(ep->remote_dev->sd_node - 1)->aper.pa;
+#endif
+	}
+
+	window->get_put_ref_count += nr_pages;
+	get_window_ref_count(window, nr_pages);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (err) {
+		if (*pages) {
+			if ((*pages)->phys_addr)
+				scif_free((*pages)->phys_addr, nr_pages * sizeof(dma_addr_t));
+#ifndef _MIC_SCIF_
+			if ((*pages)->va)
+				scif_free((*pages)->va, nr_pages * sizeof(void *));
+#endif
+			kfree(*pages);
+			*pages = NULL;
+		}
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+	} else {
+		micscif_create_node_dep(ep->remote_dev, nr_pages);
+	}
+	return err;
+}
+
+int
+scif_get_pages(scif_epd_t epd, off_t offset, size_t len, struct scif_range **pages)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_get_pages(epd, offset, len, pages);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_get_pages);
+
+/**
+ * scif_put_pages - Remove references from remote registered pages
+ *
+ * scif_put_pages() returns a scif_range structure previously obtained by
+ * calling scif_get_pages(). When control returns, the physical pages may
+ * become available for reuse if and when the window which represented
+ * those pages is unregistered. Therefore, those pages must never be accessed.
+ *
+ * Return Values
+ *	Upon success, zero is returned.
+ *	else an apt error is returned as documented in scif.h.
+ */
+int
+__scif_put_pages(struct scif_range *pages)
+{
+	struct endpt *ep;
+	struct reg_range_t *window;
+	struct nodemsg msg;
+
+	if (!pages || !pages->cookie)
+		return -EINVAL;
+
+	window = pages->cookie;
+
+	if (!window || window->magic != SCIFEP_MAGIC ||
+		!window->get_put_ref_count)
+		return -EINVAL;
+
+	ep = (struct endpt *)window->ep;
+
+        /*
+	 * If the state is SCIFEP_CONNECTED or SCIFEP_DISCONNECTED then the
+	 * callee should be allowed to release references to the pages,
+	 * else the endpoint was not connected in the first place,
+	 * hence the ENOTCONN.
+	 */
+	if (ep->state != SCIFEP_CONNECTED && ep->state != SCIFEP_DISCONNECTED)
+		return -ENOTCONN;
+
+	/*
+	 * TODO: Re-enable this check once ref counts for kernel mode APIs
+	 * have been implemented and node remove call backs are called before
+	 * the node is removed. This check results in kernel mode APIs not
+	 * being able to release pages correctly since node remove callbacks
+	 * are called after the node is removed currently.
+	 *	if (!scifdev_alive(ep))
+	 *		return -ENODEV;
+	 */
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	/* Decrement the ref counts and check for errors */
+	window->get_put_ref_count -= pages->nr_pages;
+	BUG_ON(window->get_put_ref_count < 0);
+	put_window_ref_count(window, pages->nr_pages);
+
+	/* Initiate window destruction if ref count is zero */
+	if (!window->ref_count) {
+		drain_dma_intr(ep->rma_info.dma_chan);
+		/* Inform the peer about this window being destroyed. */
+		msg.uop = SCIF_MUNMAP;
+		msg.src = ep->port;
+		msg.payload[0] = window->peer_window;
+		/* No error handling for notification messages */
+		micscif_nodeqp_send(ep->remote_dev, &msg, ep);
+		list_del(&window->list_member);
+		/* Destroy this window from the peer's registered AS */
+		micscif_destroy_remote_window(ep, window);
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	micscif_destroy_node_dep(ep->remote_dev, pages->nr_pages);
+	scif_free(pages->phys_addr, pages->nr_pages * sizeof(dma_addr_t));
+#ifndef _MIC_SCIF_
+	scif_free(pages->va, pages->nr_pages * sizeof(void*));
+#endif
+	kfree(pages);
+	return 0;
+}
+
+int
+scif_put_pages(struct scif_range *pages)
+{
+	int ret;
+	struct reg_range_t *window = pages->cookie;
+	struct endpt *ep = (struct endpt *)window->ep;
+	if (atomic_read(&(&(ep->ref_count))->refcount) > 0) {
+		kref_get(&(ep->ref_count));
+	} else {
+		WARN_ON(1);
+	}
+	ret = __scif_put_pages(pages);
+	if (atomic_read(&(&(ep->ref_count))->refcount) > 0) {
+		kref_put(&(ep->ref_count), scif_ref_rel);
+	} else {
+		//WARN_ON(1);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(scif_put_pages);
+
+int scif_event_register(scif_callback_t handler)
+{
+	/* Add to the list of event handlers */
+	struct scif_callback *cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+	if (!cb)
+		return -ENOMEM;
+	mutex_lock(&ms_info.mi_event_cblock);
+	cb->callback_handler = handler;
+	list_add_tail(&cb->list_member, &ms_info.mi_event_cb);
+	mutex_unlock(&ms_info.mi_event_cblock);
+	return 0;
+}
+EXPORT_SYMBOL(scif_event_register);
+
+int scif_event_unregister(scif_callback_t handler)
+{
+	struct list_head *pos, *unused;
+	struct scif_callback *temp;
+	int err = -EINVAL;
+
+	mutex_lock(&ms_info.mi_event_cblock);
+	list_for_each_safe(pos, unused, &ms_info.mi_event_cb) {
+		temp = list_entry(pos, struct scif_callback, list_member);
+		if (temp->callback_handler == handler) {
+			err = 0;
+			list_del(pos);
+			kfree(temp);
+			break;
+		}
+	}
+
+	mutex_unlock(&ms_info.mi_event_cblock);
+	return err;
+}
+EXPORT_SYMBOL(scif_event_unregister);
+
+/**
+ * scif_register - Mark a memory region for remote access.
+ *	@epd:		endpoint descriptor
+ *	@addr:		starting virtual address
+ *	@len:		length of range
+ *	@offset:	offset of window
+ *	@prot:		read/write protection
+ *	@map_flags:	flags
+ *
+ * Return Values
+ *	Upon successful completion, scif_register() returns the offset
+ *	at which the mapping was placed else an apt error is returned
+ *	as documented in scif.h.
+ */
+off_t
+__scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+					int prot, int map_flags)
+{
+	scif_pinned_pages_t pinned_pages;
+	off_t err;
+	struct endpt *ep = (struct endpt *)epd;
+	uint64_t computed_offset;
+	struct reg_range_t *window;
+	struct mm_struct *mm = NULL;
+
+	pr_debug("SCIFAPI register: ep %p %s addr %p len 0x%lx"
+		" offset 0x%lx prot 0x%x map_flags 0x%x\n", 
+		epd, scif_ep_states[epd->state], addr, len, offset, prot, map_flags);
+
+	/* Unsupported flags */
+	if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
+		return -EINVAL;
+
+	/* Unsupported protection requested */
+	if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
+		return -EINVAL;
+
+	/* addr/len must be page aligned. len should be non zero */
+	if ((!len) ||
+		(align_low((uint64_t)addr, PAGE_SIZE) != (uint64_t)addr) ||
+		(align_low((uint64_t)len, PAGE_SIZE) != (uint64_t)len))
+		return -EINVAL;
+
+	/*
+	 * Offset is not page aligned/negative or offset+len
+	 * wraps around with SCIF_MAP_FIXED.
+	 */
+	if ((map_flags & SCIF_MAP_FIXED) &&
+		((align_low(offset, PAGE_SIZE) != offset) ||
+		(offset < 0) ||
+		(offset + (off_t)len < offset)))
+		return -EINVAL;
+
+
+	might_sleep();
+
+#ifdef DEBUG
+	/* Bad EP */
+	if (!ep)
+		return -EINVAL;
+#endif
+
+	if ((err = verify_epd(ep)))
+		return err;
+
+	/* Compute the offset for this registration */
+	if ((err = micscif_get_window_offset(ep, map_flags, offset,
+			len, &computed_offset)))
+		return err;
+
+	/* Allocate and prepare self registration window */
+	if (!(window = micscif_create_window(ep, len >> PAGE_SHIFT,
+			computed_offset, false))) {
+		micscif_free_window_offset(ep, computed_offset, len);
+		return -ENOMEM;
+	}
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+
+	window->nr_pages = len >> PAGE_SHIFT;
+
+	if ((err = micscif_send_alloc_request(ep, window))) {
+		micscif_destroy_incomplete_window(ep, window);
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		return err;
+	}
+
+	if (!(map_flags & SCIF_MAP_KERNEL)) {
+		mm = __scif_acquire_mm();
+		map_flags |= SCIF_MAP_ULIMIT;
+	}
+	/* Pin down the pages */
+	if ((err = scif_pin_pages(addr, len, prot,
+			map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
+			&pinned_pages))) {
+		micscif_destroy_incomplete_window(ep, window);
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		__scif_release_mm(mm);
+		goto error;
+	}
+
+	window->pinned_pages = pinned_pages;
+	window->nr_contig_chunks = pinned_pages->nr_contig_chunks;
+	window->prot = pinned_pages->prot;
+	window->mm = mm;
+
+	/* Prepare the remote registration window */
+	if ((err = micscif_prep_remote_window(ep, window))) {
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		micscif_set_nr_pages(ep->remote_dev, window);
+		printk(KERN_ERR "%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	/* Tell the peer about the new window */
+	if ((err = micscif_send_scif_register(ep, window))) {
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		printk(KERN_ERR "%s %d err %ld\n", __func__, __LINE__, err);
+		goto error_unmap;
+	}
+
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+
+	/* No further failures expected. Insert new window */
+	mutex_lock(&ep->rma_info.rma_lock);
+	set_window_ref_count(window, pinned_pages->nr_pages);
+	micscif_insert_window(window, &ep->rma_info.reg_list);
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	pr_debug("SCIFAPI register: ep %p %s addr %p"
+		" len 0x%lx computed_offset 0x%llx\n", 
+		epd, scif_ep_states[epd->state], addr, len, computed_offset);
+	return computed_offset;
+error_unmap:
+	micscif_destroy_window(ep, window);
+error:
+	printk(KERN_ERR "%s %d err %ld\n", __func__, __LINE__, err);
+	return err;
+}
+
+off_t
+scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+					int prot, int map_flags)
+{
+	off_t ret;
+	get_kref_count(epd);
+	ret = __scif_register(epd, addr, len, offset, prot, map_flags);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_register);
+
+/**
+ * scif_unregister - Release a memory region registered for remote access.
+ *	@epd:		endpoint descriptor
+ *	@offset:	start of range to unregister
+ *	@len:		length of range to unregister
+ *
+ * Return Values
+ *	Upon successful completion, scif_unegister() returns zero
+ *	else an apt error is returned as documented in scif.h.
+ */
+int
+__scif_unregister(scif_epd_t epd, off_t offset, size_t len)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	struct reg_range_t *window = NULL;
+	struct micscif_rma_req req;
+	int nr_pages, err;
+
+	pr_debug("SCIFAPI unregister: ep %p %s offset 0x%lx len 0x%lx\n", 
+		ep, scif_ep_states[ep->state], offset, len);
+
+	/* len must be page aligned. len should be non zero */
+	if ((!len) ||
+		(align_low((uint64_t)len, PAGE_SIZE) != (uint64_t)len))
+		return -EINVAL;
+
+	/* Offset is not page aligned or offset+len wraps around */
+	if ((align_low(offset, PAGE_SIZE) != offset) ||
+		(offset + (off_t)len < offset))
+		return -EINVAL;
+
+	if ((err = verify_epd(ep)))
+		return err;
+
+	might_sleep();
+	nr_pages = (int)(len >> PAGE_SHIFT);
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.prot = 0;
+	req.nr_bytes = len;
+	req.type = WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if ((err = micscif_query_window(&req))) {
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+	/* Unregister all the windows in this range */
+	if ((err = micscif_rma_list_unregister(window, offset, nr_pages)))
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	return err;
+}
+
+int
+scif_unregister(scif_epd_t epd, off_t offset, size_t len)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_unregister(epd, offset, len);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_unregister);
+
+unsigned int scif_pollfd(struct file *f, poll_table *wait, scif_epd_t epd)
+{
+	unsigned int ret;
+	get_kref_count(epd);
+	ret = __scif_pollfd(f, wait, (struct endpt *)epd);
+	put_kref_count(epd);
+	return ret;
+}
+
+unsigned int __scif_pollfd(struct file *f, poll_table *wait, struct endpt *ep)
+{
+	unsigned int mask = 0;
+	unsigned long sflags;
+
+	pr_debug("SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+	spin_lock_irqsave(&ep->lock, sflags);
+
+	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+		if (!wait || poll_requested_events(wait) & SCIF_POLLOUT) {
+#else
+		if (!wait || wait->key & SCIF_POLLOUT) {
+#endif
+		poll_wait(f, &ep->conn_pend_wq, wait);
+		if (ep->state == SCIFEP_CONNECTED ||
+			ep->state == SCIFEP_DISCONNECTED ||
+			ep->conn_err) {
+			mask |= SCIF_POLLOUT;
+		}
+		goto return_scif_poll;
+		}
+	}
+
+	/* Is it OK to use wait->key?? */
+	if (ep->state == SCIFEP_LISTENING) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+		if (!wait || poll_requested_events(wait) & SCIF_POLLIN) {
+#else
+		if (!wait || wait->key & SCIF_POLLIN) {
+#endif
+			spin_unlock_irqrestore(&ep->lock, sflags);
+			poll_wait(f, &ep->conwq, wait);
+			spin_lock_irqsave(&ep->lock, sflags);
+			if (ep->conreqcnt)
+				mask |= SCIF_POLLIN;
+		} else {
+			mask |= SCIF_POLLERR;
+		}
+		goto return_scif_poll;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+	if (!wait || poll_requested_events(wait) & SCIF_POLLIN) {
+#else
+	if (!wait || wait->key & SCIF_POLLIN) {
+#endif
+		if (ep->state != SCIFEP_CONNECTED &&
+		    ep->state != SCIFEP_LISTENING &&
+		    ep->state != SCIFEP_DISCONNECTED) {
+			mask |= SCIF_POLLERR;
+			goto return_scif_poll;
+		}
+
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		poll_wait(f, &ep->recvwq, wait);
+		spin_lock_irqsave(&ep->lock, sflags);
+		if (micscif_rb_count(&ep->qp_info.qp->inbound_q, 1))
+			mask |= SCIF_POLLIN;
+	}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+	if (!wait || poll_requested_events(wait) & SCIF_POLLOUT) {
+#else
+	if (!wait || wait->key & SCIF_POLLOUT) {
+#endif
+		if (ep->state != SCIFEP_CONNECTED &&
+		    ep->state != SCIFEP_LISTENING) {
+			mask |= SCIF_POLLERR;
+			goto return_scif_poll;
+		}
+
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		poll_wait(f, &ep->sendwq, wait);
+		spin_lock_irqsave(&ep->lock, sflags);
+		if (micscif_rb_space(&ep->qp_info.qp->outbound_q))
+			mask |= SCIF_POLLOUT;
+	}
+
+return_scif_poll:
+	/* If the endpoint is in the diconnected state then return hangup instead of error */
+	if (ep->state == SCIFEP_DISCONNECTED) {
+		mask &= ~SCIF_POLLERR;
+		mask |= SCIF_POLLHUP;
+	}
+
+	spin_unlock_irqrestore(&ep->lock, sflags);
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	return mask;
+}
+
+/*
+ * The private data field of each VMA used to mmap a remote window
+ * points to an instance of struct vma_pvt
+ */
+struct vma_pvt {
+	struct endpt *ep;	/* End point for remote window */
+	uint64_t offset;	/* offset within remote window */
+	bool valid_offset;	/* offset is valid only if the original
+				 * mmap request was for a single page
+				 * else the offset within the vma is
+				 * the correct offset
+				 */
+	struct kref ref;
+};
+
+static void vma_pvt_release(struct kref *ref)
+{
+	struct vma_pvt *vmapvt = container_of(ref, struct vma_pvt, ref);
+	kfree(vmapvt);
+}
+
+/**
+ * scif_vma_open - VMA open driver callback
+ *	@vma: VMM memory area.
+ * The open method is called by the kernel to allow the subsystem implementing
+ * the VMA to initialize the area. This method is invoked any time a new
+ * reference to the VMA is made (when a process forks, for example).
+ * The one exception happens when the VMA is first created by mmap;
+ * in this case, the driver's mmap method is called instead.
+ * This function is also invoked when an existing VMA is split by the kernel
+ * due to a call to munmap on a subset of the VMA resulting in two VMAs.
+ * The kernel invokes this function only on one of the two VMAs.
+ *
+ * Return Values: None.
+ */
+static void scif_vma_open(struct vm_area_struct *vma)
+{
+	struct vma_pvt *vmapvt = ((vma)->vm_private_data);
+	pr_debug("SCIFAPI vma open: vma_start 0x%lx vma_end 0x%lx\n", 
+			((vma)->vm_start), ((vma)->vm_end));
+	kref_get(&vmapvt->ref);
+}
+
+/**
+ * scif_munmap - VMA close driver callback.
+ *	@vma: VMM memory area.
+ * When an area is destroyed, the kernel calls its close operation.
+ * Note that there's no usage count associated with VMA's; the area
+ * is opened and closed exactly once by each process that uses it.
+ *
+ * Return Values: None.
+ */
+void scif_munmap(struct vm_area_struct *vma)
+{
+	struct endpt *ep;
+	struct vma_pvt *vmapvt = ((vma)->vm_private_data);
+	int nr_pages = (int)( (((vma)->vm_end) - ((vma)->vm_start)) >> PAGE_SHIFT );
+	uint64_t offset;
+	struct micscif_rma_req req;
+	struct reg_range_t *window = NULL;
+	int err;
+
+	might_sleep();
+	pr_debug("SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n", 
+			((vma)->vm_start), ((vma)->vm_end));
+	/* used to be a BUG_ON(), prefer keeping the kernel alive */
+	if (!vmapvt) {
+		WARN_ON(1);
+		printk(KERN_ERR "SCIFAPI munmap: vma_start 0x%lx vma_end 0x%lx\n", 
+			((vma)->vm_start), ((vma)->vm_end));
+		return;
+	}
+
+	ep = vmapvt->ep;
+	offset = vmapvt->valid_offset ? vmapvt->offset :
+		((vma)->vm_pgoff) << PAGE_SHIFT;
+	pr_debug("SCIFAPI munmap: ep %p %s  nr_pages 0x%x offset 0x%llx\n", 
+		ep, scif_ep_states[ep->state], nr_pages, offset);
+
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = ((vma)->vm_end) - ((vma)->vm_start);
+	req.prot = ((vma)->vm_flags) & (VM_READ | VM_WRITE);
+	req.type = WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+	mutex_lock(&ep->rma_info.rma_lock);
+
+	if ((err = micscif_query_window(&req)))
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+	else
+		micscif_rma_list_munmap(window, offset, nr_pages);
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+
+	micscif_destroy_node_dep(ep->remote_dev, nr_pages);
+
+	/*
+	 * The kernel probably zeroes these out but we still want
+	 * to clean up our own mess just in case.
+	 */
+	vma->vm_ops = NULL;
+	((vma)->vm_private_data) = NULL;
+	kref_put(&vmapvt->ref, vma_pvt_release);
+	micscif_rma_put_task(ep, nr_pages);
+}
+
+static const struct vm_operations_struct micscif_vm_ops = {
+	.open = scif_vma_open,
+	.close = scif_munmap,
+};
+
+/**
+ * scif_mmap - Map pages in virtual address space to a remote window.
+ *	@vma: VMM memory area.
+ *	@epd:		endpoint descriptor
+ *
+ * Return Values
+ *	Upon successful completion, scif_mmap() returns zero
+ *	else an apt error is returned as documented in scif.h.
+ */
+int
+scif_mmap(struct vm_area_struct *vma, scif_epd_t epd)
+{
+	struct micscif_rma_req req;
+	struct reg_range_t *window = NULL;
+	struct endpt *ep = (struct endpt *)epd;
+	uint64_t start_offset = ((vma)->vm_pgoff) << PAGE_SHIFT;
+	int nr_pages = (int)( (((vma)->vm_end) - ((vma)->vm_start)) >> PAGE_SHIFT);
+	int err;
+	struct vma_pvt *vmapvt;
+
+	pr_debug("SCIFAPI mmap: ep %p %s start_offset 0x%llx nr_pages 0x%x\n", 
+		ep, scif_ep_states[ep->state], start_offset, nr_pages);
+
+	if ((err = verify_epd(ep)))
+		return err;
+
+	might_sleep();
+
+	if ((err = micscif_rma_get_task(ep, nr_pages)))
+		return err;
+
+	if (!(vmapvt = kzalloc(sizeof(*vmapvt), GFP_KERNEL))) {
+		micscif_rma_put_task(ep, nr_pages);
+		return -ENOMEM;
+	}
+
+	vmapvt->ep = ep;
+	kref_init(&vmapvt->ref);
+
+	micscif_create_node_dep(ep->remote_dev, nr_pages);
+
+	req.out_window = &window;
+	req.offset = start_offset;
+	req.nr_bytes = ((vma)->vm_end) - ((vma)->vm_start);
+	req.prot = ((vma)->vm_flags) & (VM_READ | VM_WRITE);
+	req.type = WINDOW_PARTIAL;
+	req.head = &ep->rma_info.remote_reg_list;
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* Does a valid window exist? */
+	if ((err = micscif_query_window(&req))) {
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+	RMA_MAGIC(window);
+
+	/* Default prot for loopback */
+	if (!is_self_scifdev(ep->remote_dev)) {
+#ifdef _MIC_SCIF_
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+#else
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+#endif
+	}
+
+	/*
+	 * VM_DONTCOPY - Do not copy this vma on fork
+	 * VM_DONTEXPAND - Cannot expand with mremap()
+	 * VM_RESERVED - Count as reserved_vm like IO
+	 * VM_PFNMAP - Page-ranges managed without "struct page"
+	 * VM_IO - Memory mapped I/O or similar
+	 *
+	 * We do not want to copy this VMA automatically on a fork(),
+	 * expand this VMA due to mremap() or swap out these pages since
+	 * the VMA is actually backed by physical pages in the remote
+	 * node's physical memory and not via a struct page.
+	 */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_PFNMAP;
+#else
+	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP;
+#endif
+
+	if (!is_self_scifdev(ep->remote_dev))
+		((vma)->vm_flags) |= VM_IO;
+
+	/* Map this range of windows */
+	if ((err = micscif_rma_list_mmap(window,
+			start_offset, nr_pages, vma))) {
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		goto error;
+	}
+	/* Set up the driver call back */
+	vma->vm_ops = &micscif_vm_ops;
+	((vma)->vm_private_data) = vmapvt;
+	/*
+	 * For 1 page sized VMAs the kernel (remap_pfn_range) replaces the
+	 * offset in the VMA with the pfn, so in that case save off the
+	 * original offset, since the page sized VMA can't be split into
+	 * smaller VMAs the offset is not going to change.
+	 */
+	if (nr_pages == 1) {
+		vmapvt->offset = start_offset;
+		vmapvt->valid_offset = true;
+	}
+	err = 0;
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	if (err) {
+		micscif_destroy_node_dep(ep->remote_dev, nr_pages);
+		kfree(vmapvt);
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		micscif_rma_put_task(ep, nr_pages);
+	}
+	return err;
+}
+
+/**
+ * scif_readfrom() - Read SCIF offset data from remote connection
+ * @epd:	endpoint descriptor
+ * @loffset:	offset in local registered address space to which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space from which to copy
+ * @flags:	flags
+ *
+ * Return Values
+ *	Upon successful completion, scif_readfrom() returns zero
+ *	else an apt error is returned as documented in scif.h.
+ */
+int
+scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
+				off_t roffset, int flags)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_readfrom(epd, loffset, len, roffset, flags);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_readfrom);
+
+/**
+ * scif_writeto() - Send SCIF offset data to remote connection
+ * @epd:	endpoint descriptor
+ * @loffset:	offset in local registered address space from which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space to which to copy
+ * @flags:	flags
+ *
+ * Return Values
+ *	Upon successful completion, scif_writeto() returns zero
+ *	else an apt error is returned as documented in scif.h.
+ *
+ */
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
+				off_t roffset, int flags)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_writeto(epd, loffset, len, roffset, flags);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_writeto);
+
+#define HOST_LOOPB_MAGIC_MARK 0xdead
+
+/**
+ * scif_fence_mark:
+ * @epd:	endpoint descriptor
+ * @flags:	control flags
+ * @mark:	marked handle returned as output.
+ *
+ * scif_fence_mark() returns after marking the current set of all uncompleted
+ * RMAs initiated through the endpoint epd or marking the current set of all
+ * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
+ * marked with a value returned in mark. The application may subsequently
+ * await completion of all RMAs so marked.
+ *
+ * Return Values
+ *	Upon successful completion, scif_fence_mark() returns 0;
+ *	else an apt error is returned as documented in scif.h.
+ */
+int __scif_fence_mark(scif_epd_t epd, int flags, int *mark)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	int err = 0;
+
+	pr_debug("SCIFAPI fence_mark: ep %p %s flags 0x%x mark 0x%x\n", 
+		ep, scif_ep_states[ep->state], flags, *mark);
+
+	if ((err = verify_epd(ep)))
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+#ifndef _MIC_SCIF_
+	/*
+	 * Host Loopback does not need to use DMA.
+	 * Return a valid mark to be symmetric.
+	 */
+	if (is_self_scifdev(ep->remote_dev)) {
+		*mark = HOST_LOOPB_MAGIC_MARK;
+		return 0;
+	}
+#endif
+
+	if (flags & SCIF_FENCE_INIT_SELF) {
+		if ((*mark = micscif_fence_mark(epd)) < 0)
+			err = *mark;
+	} else {
+		micscif_inc_node_refcnt(ep->remote_dev, 1);
+		err = micscif_send_fence_mark(ep, mark);
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+	}
+	if (err)
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+
+	pr_debug("SCIFAPI fence_mark: ep %p %s flags 0x%x mark 0x%x err %d\n", 
+		ep, scif_ep_states[ep->state], flags, *mark, err);
+	return err;
+}
+
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_fence_mark(epd, flags, mark);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_fence_mark);
+
+/**
+ * scif_fence_wait:
+ * @epd:	endpoint descriptor
+ * @mark:	mark request.
+ *
+ * scif_fence_wait() returns after all RMAs marked with mark have completed.
+ *
+ * Return Values
+ *	Upon successful completion, scif_fence_wait() returns 0;
+ *	else an apt error is returned as documented in scif.h.
+ */
+int __scif_fence_wait(scif_epd_t epd, int mark)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	int err = 0;
+
+	pr_debug("SCIFAPI fence_wait: ep %p %s mark 0x%x\n", 
+		ep, scif_ep_states[ep->state], mark);
+
+	if ((err = verify_epd(ep)))
+		return err;
+
+#ifndef _MIC_SCIF_
+	/*
+	 * Host Loopback does not need to use DMA.
+	 * The only valid mark provided is 0 so simply
+	 * return success if the mark is valid.
+	 */
+	if (is_self_scifdev(ep->remote_dev)) {
+		if (HOST_LOOPB_MAGIC_MARK == mark)
+			return 0;
+		else
+			return -EINVAL;
+	}
+#endif
+	if (mark & SCIF_REMOTE_FENCE) {
+		micscif_inc_node_refcnt(ep->remote_dev, 1);
+		err = micscif_send_fence_wait(epd, mark);
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+	} else {
+		err = dma_mark_wait(epd->rma_info.dma_chan, mark, true);
+		if (!err && atomic_read(&ep->rma_info.tw_refcount))
+			queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
+	}
+
+	if (err < 0)
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+
+int scif_fence_wait(scif_epd_t epd, int mark)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_fence_wait(epd, mark);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_fence_wait);
+
+/*
+ * scif_fence_signal:
+ * @loff:	local offset
+ * @lval:	local value to write to loffset
+ * @roff:	remote offset
+ * @rval:	remote value to write to roffset
+ * @flags:	flags
+ *
+ * scif_fence_signal() returns after marking the current set of all
+ * uncompleted RMAs initiated through the endpoint epd or marking
+ * the current set of all uncompleted RMAs initiated through the peer
+ * of endpoint epd.
+ *
+ * Return Values
+ * 	Upon successful completion, scif_fence_signal() returns 0;
+ *	else an apt error is returned as documented in scif.h.
+ */
+int __scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval,
+				off_t roff, uint64_t rval, int flags)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	int err = 0;
+
+	pr_debug("SCIFAPI fence_signal: ep %p %s loff 0x%lx lval 0x%llx "
+		"roff 0x%lx rval 0x%llx flags 0x%x\n", 
+		ep, scif_ep_states[ep->state], loff, lval, roff, rval, flags);
+
+	if ((err = verify_epd(ep)))
+		return err;
+
+	/* Invalid flags? */
+	if (flags & ~(SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER |
+			SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE))
+		return -EINVAL;
+
+	/* At least one of init self or peer RMA should be set */
+	if (!(flags & (SCIF_FENCE_INIT_SELF | SCIF_FENCE_INIT_PEER)))
+		return -EINVAL;
+
+	/* Exactly one of init self or peer RMA should be set but not both */
+	if ((flags & SCIF_FENCE_INIT_SELF) && (flags & SCIF_FENCE_INIT_PEER))
+		return -EINVAL;
+
+	/* At least one of SCIF_SIGNAL_LOCAL or SCIF_SIGNAL_REMOTE required */
+	if (!(flags & (SCIF_SIGNAL_LOCAL | SCIF_SIGNAL_REMOTE)))
+		return -EINVAL;
+
+	/* Only Dword offsets allowed */
+	if ((flags & SCIF_SIGNAL_LOCAL) && (loff & (sizeof(uint32_t) - 1)))
+		return -EINVAL;
+
+	/* Only Dword aligned offsets allowed */
+	if ((flags & SCIF_SIGNAL_REMOTE) && (roff & (sizeof(uint32_t) - 1)))
+		return -EINVAL;
+
+	if (flags & SCIF_FENCE_INIT_PEER) {
+		micscif_inc_node_refcnt(ep->remote_dev, 1);
+		err = micscif_send_fence_signal(epd, roff,
+			rval, loff, lval, flags);
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+	} else {
+		/* Local Signal in Local RAS */
+		if (flags & SCIF_SIGNAL_LOCAL)
+			if ((err = micscif_prog_signal(epd, loff,
+					lval, RMA_WINDOW_SELF)))
+				goto error_ret;
+
+		/* Signal in Remote RAS */
+		if (flags & SCIF_SIGNAL_REMOTE) {
+			micscif_inc_node_refcnt(ep->remote_dev, 1);
+			err = micscif_prog_signal(epd, roff,
+					rval, RMA_WINDOW_PEER);
+			micscif_dec_node_refcnt(ep->remote_dev, 1);
+		}
+	}
+error_ret:
+	if (err)
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+	else if (atomic_read(&ep->rma_info.tw_refcount))
+		queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
+	return err;
+}
+
+int scif_fence_signal(scif_epd_t epd, off_t loff, uint64_t lval,
+				off_t roff, uint64_t rval, int flags)
+{
+	int ret;
+	get_kref_count(epd);
+	ret = __scif_fence_signal(epd, loff, lval, roff, rval, flags);
+	put_kref_count(epd);
+	return ret;
+}
+EXPORT_SYMBOL(scif_fence_signal);
+
+/**
+ * scif_get_nodeIDs - Return information about online nodes
+ * @nodes: array space reserved for returning online node IDs
+ * @len: number of entries on the nodes array
+ * @self: address to place the node ID of this system
+ *
+ * Return Values
+ *	scif_get_nodeIDs() returns the total number of scif nodes
+ *	(including host) in the system
+ */
+int
+scif_get_nodeIDs(uint16_t *nodes, int len, uint16_t *self)
+{
+	int online = 0;
+	int offset = 0;
+	int node;
+#ifdef _MIC_SCIF_
+	micscif_get_node_info();
+#endif
+
+	*self = ms_info.mi_nodeid;
+	mutex_lock(&ms_info.mi_conflock);
+	len = SCIF_MIN(len, (int32_t)ms_info.mi_total);
+	for (node = 0; node <=(int32_t)ms_info.mi_maxid; node++) {
+		if (ms_info.mi_mask & (1UL << node)) {
+			online++;
+			if (offset < len)
+				nodes[offset++] = node;
+		}
+	}
+	pr_debug("SCIFAPI get_nodeIDs total %d online %d filled in %d nodes\n", 
+		ms_info.mi_total, online, len);
+	mutex_unlock(&ms_info.mi_conflock);
+
+	return online;
+}
+
+EXPORT_SYMBOL(scif_get_nodeIDs);
+
+/**
+ * micscif_pci_dev:
+ * @node: node ID
+ *
+ *  Return the pci_dev associated with a node.
+ */
+int micscif_pci_dev(uint16_t node, struct pci_dev **pdev)
+{
+#ifdef _MIC_SCIF_
+       /* This *is* a PCI device, therefore no pdev to return. */
+       return -ENODEV;
+#else
+	 mic_ctx_t *mic_ctx = get_per_dev_ctx(node - 1);
+	*pdev = mic_ctx->bi_pdev;
+	return 0;
+#endif
+}
+
+#ifndef _MIC_SCIF_
+/**
+ * micscif_pci_info:
+ * @node: node ID
+ *
+ * Populate the pci device info pointer associated with a node.
+ */
+int micscif_pci_info(uint16_t node, struct scif_pci_info *dev)
+{
+	int i;
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(node - 1);
+	struct pci_dev *pdev;
+
+	if (!mic_ctx)
+		return -ENODEV;
+
+	dev->pdev = pdev = mic_ctx->bi_pdev;
+	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
+		if (!pci_resource_start(pdev, i)) {
+			dev->va[i] = NULL;
+			continue;
+		}
+		if (pci_resource_flags(pdev, i) & IORESOURCE_PREFETCH) {
+			/* TODO: Change comparison check for KNL. */
+			if (pci_resource_start(pdev, i) == mic_ctx->aper.pa)
+				dev->va[i] = mic_ctx->aper.va;
+			else
+				dev->va[i] = NULL;
+		} else {
+			dev->va[i] = mic_ctx->mmio.va;
+		}
+	}
+	return 0;
+}
+#endif
+
+/**
+ * scif_pci_info - Populate the pci device info pointer associated with a node
+ * @node:              the node to query
+ * @scif_pdev:         The scif_pci_info structure to populate.
+ *
+ * scif_pci_info() populates the provided scif_pci_info structure
+ * associated with a node. The requested node ID cannot be the same as
+ * the current node.  This routine may only return success when called from
+ * the host.
+ *
+ * Return Values
+ *     Upon successful completion, scif_pci_info() returns 0; otherwise the
+ *	an appropriate error is returned as documented in scif.h.
+ */
+int scif_pci_info(uint16_t node, struct scif_pci_info *dev)
+{
+#ifdef _MIC_SCIF_
+	return -EINVAL;
+#else
+	if (node > ms_info.mi_maxid)
+		return -EINVAL;
+
+	if ((scif_dev[node].sd_state == SCIFDEV_NOTPRESENT) ||
+	    is_self_scifdev(&scif_dev[node]))
+		return -ENODEV;
+
+	return micscif_pci_info(node, dev);
+#endif
+}
+EXPORT_SYMBOL(scif_pci_info);
+
+/*
+ * DEBUG helper functions
+ */
+void
+print_ep_state(struct endpt *ep, char *label)
+{
+	if (ep)
+		printk("%s: EP %p state %s\n", 
+			label, ep, scif_ep_states[ep->state]);
+	else
+		printk("%s: EP %p\n state ?\n", label, ep);
+}
+
diff --git a/micscif/micscif_debug.c b/micscif/micscif_debug.c
new file mode 100644
index 0000000..7f26f5a
--- /dev/null
+++ b/micscif/micscif_debug.c
@@ -0,0 +1,1005 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic/micscif.h"
+#ifndef _MIC_SCIF_
+#include "mic_common.h"
+#endif
+#include "scif.h"
+#include <linux/proc_fs.h>
+#include <linux/debugfs.h>
+
+#include <linux/module.h>
+
+static char *window_type[] = {
+	"NONE",
+	"SELF",
+	"PEER"};
+
+static char *scifdev_state[] = {
+	"SCIFDEV_NOTPRESENT",
+	"SCIFDEV_INIT",
+	"SCIFDEV_RUNNING",
+	"SCIFDEV_SLEEPING",
+	"SCIFDEV_STOPPING",
+	"SCIFDEV_STOPPED"};
+
+static struct proc_dir_entry *scif_proc;
+static struct dentry *mic_debug = NULL;
+
+#define DEBUG_LEN 10
+
+static int
+scif_ep_show(struct seq_file *m, void *data)
+{
+	struct endpt *ep;
+	struct list_head *pos;
+	unsigned long sflags;
+
+	seq_printf(m, "EP Address         State      Port  Peer     Remote Ep Address\n");
+	seq_printf(m, "=================================================================\n");
+	spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	list_for_each(pos, &ms_info.mi_listen) {
+		ep = list_entry(pos, struct endpt, list);
+		seq_printf(m, "%p %s %6d\n",
+			      ep, scif_ep_states[ep->state], ep->port.port);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each(pos, &ms_info.mi_connected) {
+		ep = list_entry(pos, struct endpt, list);
+		seq_printf(m, "%p %s %6d %2d:%-6d %p\n",
+			      ep, scif_ep_states[ep->state], ep->port.port, ep->peer.node,
+			      ep->peer.port, (void *)ep->remote_ep);
+	}
+	list_for_each(pos, &ms_info.mi_disconnected) {
+		ep = list_entry(pos, struct endpt, list);
+		seq_printf(m, "%p %s %6d %2d:%-6d %p\n",
+			      ep, scif_ep_states[ep->state], ep->port.port, ep->peer.node,
+			      ep->peer.port, (void *)ep->remote_ep);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+
+	seq_printf(m, "EP Address         State      Port  Peer     Remote Ep Address reg_list "
+		"remote_reg_list mmn_list tw_refcount tcw_refcount mi_rma mi_rma_tc "
+		"task_list mic_mmu_notif_cleanup\n");
+	seq_printf(m, "=================================================================\n");
+	spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	list_for_each(pos, &ms_info.mi_zombie) {
+		ep = list_entry(pos, struct endpt, list);
+		seq_printf(m, "%p %s %6d %2d:%-6d %p %d %d %d %d %d %d %d %d %d\n",
+				ep, scif_ep_states[ep->state], ep->port.port, ep->peer.node,
+				ep->peer.port, (void *)ep->remote_ep,
+				list_empty(&ep->rma_info.reg_list),
+				list_empty(&ep->rma_info.remote_reg_list),
+				list_empty(&ep->rma_info.mmn_list),
+				atomic_read(&ep->rma_info.tw_refcount),
+				atomic_read(&ep->rma_info.tcw_refcount),
+				list_empty(&ms_info.mi_rma),
+				list_empty(&ms_info.mi_rma_tc),
+				list_empty(&ep->rma_info.task_list),
+#ifdef CONFIG_MMU_NOTIFIER
+				list_empty(&ms_info.mi_mmu_notif_cleanup)
+#else
+				-1
+#endif
+			    );
+	}
+	spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+
+	return 0;
+}
+
+static int
+scif_ep_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_ep_show, NULL);
+}
+
+struct file_operations scif_ep_fops = {
+	.open		= scif_ep_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+
+static int
+scif_rma_window_show(struct seq_file *m, void *data)
+{
+	struct endpt *ep;
+	struct list_head *pos, *item, *tmp;
+	unsigned long sflags;
+	struct reg_range_t *window;
+
+	seq_printf(m, "SCIF Connected EP RMA Window Info\n");
+	seq_printf(m, "=================================================================\n");
+	seq_printf(m, "%-16s\t%-16s %-16s %-16s %-8s %-8s %-8s\n",
+		      "Endpoint", "Type", "Offset", "NumPages", "Prot", "Ref_Count", "Unreg State");
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each(pos, &ms_info.mi_connected) {
+		ep = list_entry(pos, struct endpt, list);
+		if (mutex_trylock(&ep->rma_info.rma_lock)) {
+			list_for_each_safe(item, tmp, &ep->rma_info.reg_list) {
+				window = list_entry(item, struct reg_range_t, list_member);
+				seq_printf(m, 
+					"%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+					ep, window_type[window->type], window->offset,
+					window->nr_pages, window->prot, window->ref_count,
+					window->unreg_state);
+			}
+			list_for_each_safe(item, tmp, &ep->rma_info.remote_reg_list) {
+				window = list_entry(item, struct reg_range_t, list_member);
+				seq_printf(m, 
+					"%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+					ep, window_type[window->type], window->offset,
+					window->nr_pages, window->prot, window->ref_count,
+					window->unreg_state);
+			}
+			mutex_unlock(&ep->rma_info.rma_lock);
+		} else
+			seq_printf(m, 
+					"Try Again, some other thread has the RMA lock for ep %p\n",
+					ep);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+
+	seq_printf(m, "=================================================================\n");
+	seq_printf(m, "SCIF Zombie EP RMA Window Info\n");
+	spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	list_for_each(pos, &ms_info.mi_zombie) {
+		ep = list_entry(pos, struct endpt, list);
+		if (mutex_trylock(&ep->rma_info.rma_lock)) {
+			list_for_each_safe(item, tmp, &ep->rma_info.reg_list) {
+				window = list_entry(item, struct reg_range_t, list_member);
+				seq_printf(m, 
+					"%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+					ep, window_type[window->type], window->offset,
+					window->nr_pages, window->prot, window->ref_count,
+					window->unreg_state);
+			}
+			list_for_each_safe(item, tmp, &ep->rma_info.remote_reg_list) {
+				window = list_entry(item, struct reg_range_t, list_member);
+				seq_printf(m, 
+					"%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+					ep, window_type[window->type], window->offset,
+					window->nr_pages, window->prot, window->ref_count,
+					window->unreg_state);
+			}
+			mutex_unlock(&ep->rma_info.rma_lock);
+		} else
+			seq_printf(m, 
+					"Try Again, some other thread has the RMA lock for ep %p\n",
+					ep);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+	seq_printf(m, "=================================================================\n");
+	seq_printf(m, "%-16s\t%-16s %-16s %-16s %-8s %-8s %-8s\n",
+			"Endpoint", "Type", "Offset", "NumPages", "Prot", "Ref_Count", "Unreg State");
+	spin_lock(&ms_info.mi_rmalock);
+	list_for_each_safe(item, tmp, &ms_info.mi_rma) {
+		window = list_entry(item, 
+				struct reg_range_t, list_member);
+		ep = (struct endpt *)window->ep;
+		seq_printf(m, "%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+			ep, window_type[window->type], window->offset,
+			window->nr_pages, window->prot, window->ref_count,
+			window->unreg_state);
+	}
+	spin_unlock(&ms_info.mi_rmalock);
+
+	return 0;
+}
+
+static int
+scif_rma_window_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_rma_window_show, NULL);
+}
+
+struct file_operations scif_rma_window_fops = {
+	.open		= scif_rma_window_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+static int
+scif_rma_xfer_show(struct seq_file *m, void *data)
+{
+	struct endpt *ep;
+	struct list_head *pos;
+	unsigned long sflags;
+
+	seq_printf(m, "SCIF RMA Debug\n");
+	seq_printf(m, "=================================================================\n");
+	seq_printf(m, "%-16s\t %-16s %-16s %-16s\n",
+		      "Endpoint", "Fence Ref Count", "Temp Window Ref Count", "DMA CHANNEL");
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each(pos, &ms_info.mi_connected) {
+		ep = list_entry(pos, struct endpt, list);
+		seq_printf(m, "%-16p\t%-16d %-16d %-16d\n",
+			ep, ep->rma_info.fence_refcount,
+			atomic_read(&ep->rma_info.tw_refcount),
+			ep->rma_info.dma_chan ? get_chan_num(ep->rma_info.dma_chan): -1);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+	return 0;
+}
+
+static int
+scif_rma_xfer_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_rma_xfer_show, NULL);
+}
+
+struct file_operations scif_rma_xfer_fops = {
+	.open		= scif_rma_xfer_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+static int
+scif_dev_show(struct seq_file *m, void *data)
+{
+	int node;
+
+	seq_printf(m, "Total Nodes %d Self Node Id %d Maxid %d\n",
+		ms_info.mi_total, ms_info.mi_nodeid, ms_info.mi_maxid);
+
+	seq_printf(m, "%-16s\t%-16s %-16s\t%-16s\t%-8s\t%-8s\t%-8s\n",
+		"node_id", "state", "scif_ref_cnt", "scif_map_ref_cnt",
+		"wait_status", "conn count", "numa_node");
+
+	for (node = 0; node <= ms_info.mi_maxid; node++)
+		seq_printf(m, "%-16d\t%-16s\t0x%-16lx\t%-16d\t%-16lld\t%-16d\t%-16d\n",
+			scif_dev[node].sd_node, scifdev_state[scif_dev[node].sd_state],
+			atomic_long_read(&scif_dev[node].scif_ref_cnt),
+			scif_dev[node].scif_map_ref_cnt,
+			scif_dev[node].sd_wait_status,
+			scif_dev[node].num_active_conn,
+			scif_dev[node].sd_numa_node);
+
+	return 0;
+}
+
+static int
+scif_dev_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_dev_show, NULL);
+}
+
+struct file_operations scif_dev_fops = {
+	.open		= scif_dev_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+static int
+scif_debug_show(struct seq_file *m, void *data)
+{
+	seq_printf(m, "Num gtt_entries %d\n", ms_info.nr_gtt_entries);
+	/*
+	 * Tracking the number of zombies for debug.
+	 * Need to make sure they are not being left behind forever.
+	 */
+	seq_printf(m, "Num Zombie Endpoints %d\n", ms_info.mi_nr_zombies);
+	seq_printf(m, "Watchdog timeout %d\n", ms_info.mi_watchdog_to);
+	seq_printf(m, "Watchdog enabled %d\n", ms_info.mi_watchdog_enabled);
+	seq_printf(m, "Watchdog auto reboot %d\n", ms_info.mi_watchdog_auto_reboot);
+	seq_printf(m, "Huge Pages Enabled %d Detected 2mb %lld 4k %lld\n",
+		mic_huge_page_enable, ms_info.nr_2mb_pages, ms_info.nr_4k_pages);
+#ifdef RMA_DEBUG
+	seq_printf(m, "rma_alloc_cnt %ld rma_pin_cnt %ld mmu_notif %ld rma_unaligned_cpu_cnt %ld\n",
+		atomic_long_read(&ms_info.rma_alloc_cnt),
+		atomic_long_read(&ms_info.rma_pin_cnt),
+		atomic_long_read(&ms_info.mmu_notif_cnt),
+		atomic_long_read(&ms_info.rma_unaligned_cpu_cnt));
+#endif
+	seq_printf(m, "List empty? mi_uaccept %d mi_listen %d mi_zombie %d "
+		"mi_connected %d mi_disconnected %d\n",
+		list_empty(&ms_info.mi_uaccept),
+		list_empty(&ms_info.mi_listen),
+		list_empty(&ms_info.mi_zombie),
+		list_empty(&ms_info.mi_connected),
+		list_empty(&ms_info.mi_disconnected));
+
+	return 0;
+}
+
+static int
+scif_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_debug_show, NULL);
+}
+
+struct file_operations scif_debug_fops = {
+	.open		= scif_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+static int
+scif_suspend_show(struct seq_file *m, void *data)
+{
+	int node;
+	uint64_t ret;
+	seq_printf(m, "Removing Nodes mask 0x7\n");
+
+	for (node = 1; node < ms_info.mi_total; node++) {
+		ret = micscif_disconnect_node(node, 0 , 1);
+		seq_printf(m, "Node %d requested disconnect. ret = %lld\n",
+			      node, ret);
+	}
+
+	return 0;
+}
+
+static int
+scif_suspend_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_suspend_show, NULL);
+}
+
+struct file_operations scif_suspend_fops = {
+	.open		= scif_suspend_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+static int
+scif_cache_limit_show(struct seq_file *m, void *data)
+{
+	seq_printf(m, "reg_cache_limit = 0x%lx\n", ms_info.mi_rma_tc_limit);
+	return 0;
+}
+
+static int
+scif_cache_limit_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, scif_cache_limit_show, NULL);
+}
+
+struct file_operations scif_cache_limit_fops = {
+	.open		= scif_cache_limit_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+        .release 	= single_release,
+};
+
+#else // LINUX VERSION 3.10
+
+static int
+scif_rma_window_read(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	struct endpt *ep;
+	struct list_head *pos, *item, *tmp;
+	unsigned long sflags;
+	int l = 0;
+	struct reg_range_t *window;
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+		      "SCIF Connected EP RMA Window Info\n");
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+		      "=================================================================\n");
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+		      "%-16s\t%-16s %-16s %-16s %-8s %-8s %-8s\n",
+		      "Endpoint", "Type", "Offset", "NumPages", "Prot", "Ref_Count", "Unreg State");
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each(pos, &ms_info.mi_connected) {
+		ep = list_entry(pos, struct endpt, list);
+		if (mutex_trylock(&ep->rma_info.rma_lock)) {
+			list_for_each_safe(item, tmp, &ep->rma_info.reg_list) {
+				window = list_entry(item, struct reg_range_t, list_member);
+				l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+					"%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+					ep, window_type[window->type], window->offset,
+					window->nr_pages, window->prot, window->ref_count,
+					window->unreg_state);
+			}
+			list_for_each_safe(item, tmp, &ep->rma_info.remote_reg_list) {
+				window = list_entry(item, struct reg_range_t, list_member);
+				l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+					"%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+					ep, window_type[window->type], window->offset,
+					window->nr_pages, window->prot, window->ref_count,
+					window->unreg_state);
+			}
+			mutex_unlock(&ep->rma_info.rma_lock);
+		} else
+			l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+					"Try Again, some other thread has the RMA lock for ep %p\n",
+					ep);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+		      "=================================================================\n");
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+		      "SCIF Zombie EP RMA Window Info\n");
+	spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	list_for_each(pos, &ms_info.mi_zombie) {
+		ep = list_entry(pos, struct endpt, list);
+		if (mutex_trylock(&ep->rma_info.rma_lock)) {
+			list_for_each_safe(item, tmp, &ep->rma_info.reg_list) {
+				window = list_entry(item, struct reg_range_t, list_member);
+				l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+					"%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+					ep, window_type[window->type], window->offset,
+					window->nr_pages, window->prot, window->ref_count,
+					window->unreg_state);
+			}
+			list_for_each_safe(item, tmp, &ep->rma_info.remote_reg_list) {
+				window = list_entry(item, struct reg_range_t, list_member);
+				l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+					"%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+					ep, window_type[window->type], window->offset,
+					window->nr_pages, window->prot, window->ref_count,
+					window->unreg_state);
+			}
+			mutex_unlock(&ep->rma_info.rma_lock);
+		} else
+			l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+					"Try Again, some other thread has the RMA lock for ep %p\n",
+					ep);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+			"=================================================================\n");
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+			"%-16s\t%-16s %-16s %-16s %-8s %-8s %-8s\n",
+			"Endpoint", "Type", "Offset", "NumPages", "Prot", "Ref_Count", "Unreg State");
+	spin_lock(&ms_info.mi_rmalock);
+	list_for_each_safe(item, tmp, &ms_info.mi_rma) {
+		window = list_entry(item, 
+				struct reg_range_t, list_member);
+		ep = (struct endpt *)window->ep;
+		l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+			"%-16p\t%-16s 0x%-16llx %-16lld %-8d %-8d %-8d\n",
+			ep, window_type[window->type], window->offset,
+			window->nr_pages, window->prot, window->ref_count,
+			window->unreg_state);
+	}
+	spin_unlock(&ms_info.mi_rmalock);
+
+	*eof = 1;
+	return l;
+}
+
+static int
+scif_rma_xfer_read(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	struct endpt *ep;
+	struct list_head *pos;
+	unsigned long sflags;
+	int l = 0;
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 , "SCIF RMA Debug\n");
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+		      "=================================================================\n");
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 , "%-16s\t %-16s %-16s %-16s\n",
+		      "Endpoint", "Fence Ref Count", "Temp Window Ref Count", "DMA CHANNEL");
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each(pos, &ms_info.mi_connected) {
+		ep = list_entry(pos, struct endpt, list);
+		l += snprintf(buf + l, len - l > 0 ? len - l : 0 , "%-16p\t%-16d %-16d %-16d\n",
+			ep, ep->rma_info.fence_refcount,
+			atomic_read(&ep->rma_info.tw_refcount),
+			ep->rma_info.dma_chan ? get_chan_num(ep->rma_info.dma_chan): -1);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+
+	*eof = 1;
+	return l;
+}
+
+/* Place Holder for generic SCIF debug information */
+static int
+scif_debug_read(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	int l = 0;
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		"Num gtt_entries %d\n", ms_info.nr_gtt_entries);
+	/*
+	 * Tracking the number of zombies for debug.
+	 * Need to make sure they are not being left behind forever.
+	 */
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		 "Num Zombie Endpoints %d\n", ms_info.mi_nr_zombies);
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		 "Watchdog timeout %d\n", ms_info.mi_watchdog_to);
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		"Watchdog enabled %d\n", ms_info.mi_watchdog_enabled);
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		"Watchdog auto reboot %d\n", ms_info.mi_watchdog_auto_reboot);
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		"Huge Pages Enabled %d Detected 2mb %lld 4k %lld\n",
+		mic_huge_page_enable, ms_info.nr_2mb_pages, ms_info.nr_4k_pages);
+#ifdef RMA_DEBUG
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		"mm ref cnt %ld rma_alloc_cnt %ld rma_pin_cnt %ld mmu_notif %ld rma_unaligned_cpu_cnt %ld\n",
+		atomic_long_read(&ms_info.rma_mm_cnt),
+		atomic_long_read(&ms_info.rma_alloc_cnt),
+		atomic_long_read(&ms_info.rma_pin_cnt),
+		atomic_long_read(&ms_info.mmu_notif_cnt),
+		atomic_long_read(&ms_info.rma_unaligned_cpu_cnt));
+#endif
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		"List empty? mi_uaccept %d mi_listen %d mi_zombie %d "
+		"mi_connected %d mi_disconnected %d\n",
+		list_empty(&ms_info.mi_uaccept),
+		list_empty(&ms_info.mi_listen),
+		list_empty(&ms_info.mi_zombie),
+		list_empty(&ms_info.mi_connected),
+		list_empty(&ms_info.mi_disconnected));
+
+	*eof = 1;
+	return l;
+}
+
+static int
+scif_dev_info(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	int l = 0;
+	int node;
+
+#ifdef _MIC_SCIF_
+	micscif_get_node_info();
+
+	mutex_lock(&ms_info.mi_conflock);
+#endif
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		"Total Nodes %d Self Node Id %d Maxid %d\n",
+		ms_info.mi_total, ms_info.mi_nodeid, ms_info.mi_maxid);
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+		"%-16s\t%-16s %-16s\t%-16s\t%-8s\t%-8s\t%-8s\n",
+		"node_id", "state", "scif_ref_cnt", "scif_map_ref_cnt",
+		"wait_status", "conn count", "numa_node");
+
+	for (node = 0; node <= ms_info.mi_maxid; node++)
+		l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+			"%-16d\t%-16s\t0x%-16lx\t%-16d\t%-16lld\t%-16d\t%-16d\n",
+			scif_dev[node].sd_node, scifdev_state[scif_dev[node].sd_state],
+			atomic_long_read(&scif_dev[node].scif_ref_cnt),
+			scif_dev[node].scif_map_ref_cnt,
+			scif_dev[node].sd_wait_status,
+			scif_dev[node].num_active_conn,
+			scif_dev[node].sd_numa_node);
+#ifdef _MIC_SCIF_
+	mutex_unlock(&ms_info.mi_conflock);
+#endif
+
+	*eof = 1;
+	return l;
+}
+
+static int
+scif_suspend(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	int l = 0;
+
+#ifdef _MIC_SCIF_
+	micscif_suspend_handler(NULL, 0, NULL);
+#else
+	{
+		int node;
+		uint64_t ret;
+		l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+			      "Removing Nodes mask 0x7\n");
+		for (node = 1; node < ms_info.mi_total; node++) {
+			ret = micscif_disconnect_node(node, 0 , 1);
+			l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+				      "Node %d requested disconnect. ret = %lld\n",
+				      node, ret);
+		}
+	}
+#endif
+
+	*eof = 1;
+	return l;
+}
+
+#ifdef _MIC_SCIF_
+static int
+scif_crash(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	int l = 0;
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		      "%s %d Crash the Card to test Lost Nodes\n", __func__, __LINE__);
+	panic("Test Lost Node! Crash the card intentionally\n");
+	*eof = 1;
+	return l;
+}
+
+static int
+scif_bugon(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	int l = 0;
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		      "%s %d Bug on the Card to test Lost Nodes\n", __func__, __LINE__);
+	BUG_ON(1);
+	*eof = 1;
+	return l;
+}
+#endif
+
+static int
+scif_fail_suspend(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	int l = 0;
+
+#ifdef _MIC_SCIF_
+	micscif_fail_suspend_handler(NULL, 0, NULL);
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		      "Failing Suspend\n");
+#endif
+
+	*eof = 1;
+	return l;
+}
+
+static int
+scif_resume(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	int l = 0;
+
+#ifdef _MIC_SCIF_
+	micscif_resume_handler(NULL, 0, NULL);
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0,
+		      "Resuming/Waking up node\n");
+#endif
+
+	*eof = 1;
+	return l;
+}
+
+static int
+scif_get_reg_cache_limit(char *buf, char **start, off_t offset, int len, int *eof, void *data)
+{
+	int l = 0;
+
+	l += snprintf(buf + l, len - l > 0 ? len - l : 0 ,
+			"reg_cache_limit = 0x%lx\n", ms_info.mi_rma_tc_limit);
+	*eof = 1;
+	return l;
+}
+
+static int
+scif_set_reg_cache_limit(struct file *file, const char __user *buffer,
+					unsigned long len, void *unused)
+{
+	unsigned long data = 0;
+	char *p;
+	if (!(p = kzalloc(len, GFP_KERNEL)))
+		return -ENOMEM;
+	if (copy_from_user(p, buffer, len))
+		return -EFAULT;
+	data = simple_strtoul(p, NULL, 0);
+	ms_info.mi_rma_tc_limit = data;
+	return len;
+}
+#endif
+
+#ifdef _MIC_SCIF_
+static int smpt_seq_show(struct seq_file *s, void *pos)
+{
+	volatile uint8_t *mm_sbox = scif_dev[SCIF_HOST_NODE].mm_sbox;
+	uint32_t smpt_reg_offset = SBOX_SMPT00;
+	uint32_t smpt_reg_val;
+	int i;
+
+	seq_printf(s,
+		"=================================================================\n");
+	seq_printf(s,"%-11s| %-15s %-14s %-5s \n",
+		"SMPT entry", "SMPT reg value", "DMA addr", "SNOOP");
+	seq_printf(s,
+		"=================================================================\n");
+
+	for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
+		smpt_reg_val = readl(mm_sbox + smpt_reg_offset);
+		seq_printf(s,"%-11d| %-#15x %-#14llx %-5s \n",
+			i, smpt_reg_val, ((uint64_t)smpt_reg_val >> 2ULL) << MIC_SYSTEM_PAGE_SHIFT,
+			(smpt_reg_val & 0x1) ? "OFF" : "ON");
+		smpt_reg_offset += 4;
+	}
+
+	seq_printf(s,
+		"=================================================================\n");
+	return 0;
+}
+
+#else
+static int smpt_seq_show(struct seq_file *s, void *pos)
+{
+	uint64_t bid = (uint64_t)s->private;
+	mic_ctx_t *mic_ctx;
+	int i;
+	unsigned long flags;
+
+	mic_ctx = get_per_dev_ctx(bid);
+	seq_printf(s,
+		"=================================================================\n");
+	seq_printf(s,"Board %-2d |%-10s| %-14s %-10s \n",
+		(int)bid + 1, "SMPT entry", "DMA addr", "Reference Count");
+	seq_printf(s,
+		"=================================================================\n");
+
+	if (mic_ctx && mic_ctx->mic_smpt) {
+		spin_lock_irqsave(&mic_ctx->smpt_lock, flags);
+		for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
+			seq_printf(s,"%9s|%-10d| %-#14llx %-10lld \n",
+			" ",  i, mic_ctx->mic_smpt[i].dma_addr, mic_ctx->mic_smpt[i].ref_count);
+		}
+		spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags);
+	}
+
+	seq_printf(s,
+		"================================================================X\n");
+	return 0;
+}
+#endif
+
+static int smpt_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, smpt_seq_show, inode->i_private);
+}
+
+static int smpt_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static struct file_operations smpt_file_ops = {
+	.owner   = THIS_MODULE,
+	.open    = smpt_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = smpt_debug_release
+};
+
+#ifndef _MIC_SCIF_
+static int log_buf_seq_show(struct seq_file *s, void *pos)
+{
+	uint64_t bid = (uint64_t)s->private;
+	mic_ctx_t *mic_ctx;
+	void *log_buf_len_va, *log_buf_va;
+	struct micscif_dev *dev;
+
+	mic_ctx = get_per_dev_ctx(bid);
+	if (!mic_ctx || !mic_ctx->log_buf_addr || !mic_ctx->log_buf_len)
+		goto done;
+
+	if (mic_ctx->bi_family == FAMILY_ABR) {
+		seq_printf(s, "log buffer display not supported for KNF\n");
+		goto done;
+	}
+
+	dev = &scif_dev[mic_get_scifnode_id(mic_ctx)];
+	log_buf_len_va = virt_to_phys(mic_ctx->log_buf_len) + mic_ctx->aper.va;
+	log_buf_va = virt_to_phys(mic_ctx->log_buf_addr) + mic_ctx->aper.va;
+
+	mutex_lock(&mic_ctx->state_lock);
+	switch (mic_ctx->state) {
+	case MIC_BOOT:
+	case MIC_BOOTFAIL:
+	case MIC_ONLINE:
+	case MIC_SHUTDOWN:
+	case MIC_LOST:
+		micscif_inc_node_refcnt(dev, 1);
+		seq_write(s, log_buf_va, *(int*)log_buf_len_va);
+		micscif_dec_node_refcnt(dev, 1);
+		break;
+	case MIC_NORESPONSE:
+	case MIC_READY:
+	/* Cannot access GDDR while reset is ongoing */
+	case MIC_RESET:
+	case MIC_RESETFAIL:
+	case MIC_INVALID:
+	default:
+		break;
+	}
+	mutex_unlock(&mic_ctx->state_lock);
+done:
+	return 0;
+}
+
+static int log_buf_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, log_buf_seq_show, inode->i_private);
+}
+
+static int log_buf_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static struct file_operations log_buf_ops = {
+	.owner   = THIS_MODULE,
+	.open    = log_buf_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = log_buf_release
+};
+#endif
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+void
+scif_proc_init(void)
+{
+	if ((scif_proc = proc_mkdir("scif", NULL)) != NULL) {
+		proc_create_data("ep", 0444, scif_proc, &scif_ep_fops, NULL);
+		proc_create_data("rma_window", 0444, scif_proc, &scif_rma_window_fops, NULL);
+		proc_create_data("rma_xfer", 0444, scif_proc, &scif_rma_xfer_fops, NULL);
+		proc_create_data("scif_dev", 0444, scif_proc, &scif_dev_fops, NULL);
+		proc_create_data("debug", 0444, scif_proc, &scif_debug_fops, NULL);
+		proc_create_data("suspend", 0444, scif_proc, &scif_suspend_fops, NULL);
+		proc_create("reg_cache_limit", S_IFREG | S_IRUGO | S_IWUGO, scif_proc,
+			    &scif_cache_limit_fops);
+	}
+}
+#else
+void
+scif_proc_init(void)
+{
+	struct proc_dir_entry *reg_cache_limit_entry;
+	struct proc_dir_entry *ep_entry;
+
+	if ((scif_proc = create_proc_entry("scif", S_IFDIR | S_IRUGO, NULL)) != NULL) {
+		create_proc_read_entry("rma_window", 0444, scif_proc, scif_rma_window_read, NULL);
+		create_proc_read_entry("rma_xfer", 0444, scif_proc, scif_rma_xfer_read, NULL);
+		create_proc_read_entry("scif_dev", 0444, scif_proc, scif_dev_info, NULL);
+		create_proc_read_entry("debug", 0444, scif_proc, scif_debug_read, NULL);
+		create_proc_read_entry("suspend", 0444, scif_proc, scif_suspend, NULL);
+		create_proc_read_entry("fail_suspend", 0444, scif_proc, scif_fail_suspend, NULL);
+		create_proc_read_entry("resume", 0444, scif_proc, scif_resume, NULL);
+#ifdef _MIC_SCIF_
+		create_proc_read_entry("crash", 0444, scif_proc, scif_crash, NULL);
+		create_proc_read_entry("bugon", 0444, scif_proc, scif_bugon, NULL);
+#endif
+		if ((reg_cache_limit_entry = create_proc_entry("reg_cache_limit", S_IFREG | S_IRUGO | S_IWUGO, scif_proc))) {
+			reg_cache_limit_entry->write_proc = scif_set_reg_cache_limit;
+			reg_cache_limit_entry->read_proc = scif_get_reg_cache_limit;
+			reg_cache_limit_entry->data = NULL;
+		}
+		if ((ep_entry = create_proc_entry("ep", S_IFREG | S_IRUGO | S_IWUGO, scif_proc))) {
+			ep_entry->proc_fops = &scif_ep_fops;
+		}		
+
+
+	}
+}
+#endif // LINUX VERSION
+
+#ifdef _MIC_SCIF_
+void
+mic_debug_init(void)
+{
+	if ((mic_debug = debugfs_create_dir("mic_debug", NULL))) {
+		debugfs_create_file("smpt", 0444, mic_debug, NULL, &smpt_file_ops);
+		debugfs_create_u8("enable_msg_logging", 0666, mic_debug, &(ms_info.en_msg_log));
+	}
+}
+#else
+void
+mic_debug_init(mic_ctx_t *mic_ctx)
+{
+	char name[DEBUG_LEN];
+	uint64_t id = mic_ctx->bi_id;
+	struct dentry *child;
+
+	if (!mic_debug)
+		mic_debug = debugfs_create_dir("mic_debug", NULL);
+
+	if (mic_debug) {
+		snprintf(name, DEBUG_LEN, "mic%d", (int)id);
+		if ((child = debugfs_create_dir(name, mic_debug))) {
+			debugfs_create_file("smpt", 0444, child, (void*)id, &smpt_file_ops);
+			debugfs_create_file("log_buf", 0444, child, (void*)id, &log_buf_ops);
+		}
+		debugfs_create_u8("enable_msg_logging", 0666, mic_debug, &(ms_info.en_msg_log));
+	}
+}
+#endif
+
+void
+mic_debug_uninit(void)
+{
+	debugfs_remove_recursive(mic_debug);
+}
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+void
+scif_proc_cleanup(void)
+{
+	if (scif_proc)
+		remove_proc_subtree("scif", NULL);
+}
+#else
+void
+scif_proc_cleanup(void)
+{
+	if (scif_proc) {
+		remove_proc_entry("reg_cache_limit", scif_proc);
+		remove_proc_entry("ep", scif_proc);
+		remove_proc_entry("rma_window", scif_proc);
+		remove_proc_entry("rma_xfer", scif_proc);
+		remove_proc_entry("scif_dev", scif_proc);
+		remove_proc_entry("debug", scif_proc);
+		remove_proc_entry("suspend", scif_proc);
+		remove_proc_entry("fail_suspend", scif_proc);
+		remove_proc_entry("resume", scif_proc);
+#ifdef _MIC_SCIF_
+		remove_proc_entry("crash", scif_proc);
+		remove_proc_entry("bugon", scif_proc);
+#endif
+		remove_proc_entry("scif", NULL);
+		scif_proc = NULL;
+	}
+}
+#endif
+
+#ifdef _MIC_SCIF_
+extern int micscif_max_msg_id;
+
+/*
+ * Test entry point for error injection
+ */
+int
+micscif_error_inject(int scenario)
+{
+	switch (scenario) {
+	case 1:
+		micscif_max_msg_id = 0;
+		break;
+	default:
+		pr_debug("Illegal error injection scenario %d\n", scenario);
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(micscif_error_inject);
+#endif // _MIC_SCIF_
diff --git a/micscif/micscif_fd.c b/micscif/micscif_fd.c
new file mode 100644
index 0000000..9a4eb9c
--- /dev/null
+++ b/micscif/micscif_fd.c
@@ -0,0 +1,528 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic/micscif.h"
+
+struct mic_priv {
+	scif_epd_t	epd;
+};
+
+
+int
+scif_fdopen(struct file *f)
+{
+	struct mic_priv *priv = (struct mic_priv *)
+		kmalloc(sizeof(struct mic_priv), GFP_KERNEL);
+	/*
+	 * Not a valid errno as defined in scif.h but should be?
+	 */
+	if (!priv)
+		return -ENOMEM;
+
+	/* SCIF device */
+	if (!(priv->epd = __scif_open())) {
+		kfree(priv);
+		return -ENOMEM;
+	}
+
+	((f)->private_data) = priv;
+	return 0;
+}
+
+int
+scif_fdclose(struct file *f)
+{
+	struct mic_priv *priv = ((f)->private_data);
+	int err = 0;
+
+	/* Only actually request of tear down of end point if file reference
+	 * count is greater than 1.  This accounts for the fork() issue.
+	 */
+	if (atomic64_read(&f->f_count) == 0) {
+		err = __scif_close(priv->epd);
+		kfree(priv);
+	}
+	return err;
+}
+
+int
+micscif_mmap(struct file *f, struct vm_area_struct *vma)
+{
+	struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+	return scif_mmap(vma, priv->epd);
+}
+
+unsigned int
+micscif_poll(struct file *f, poll_table *wait)
+{
+	struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+	return __scif_pollfd(f, wait, (struct endpt *)priv->epd);
+}
+
+int
+micscif_flush(struct file *f, fl_owner_t id)
+{
+	struct mic_priv *priv;
+	dev_t dev;
+	struct endpt *ep;
+
+	priv = (struct mic_priv *)f->private_data;
+	dev = f->f_path.dentry->d_inode->i_rdev;
+	if (MINOR(dev) != 1) // SCIF MINOR
+		return 0;
+
+	ep = priv->epd;
+
+	/* Handles fork issue, making suer an endpoint only closes when the original
+	 * thread that created it tries to close it, or when there are no more
+	 * references to it.
+	 */
+	if (ep->files == id)
+		__scif_flush(ep);
+
+	return 0;
+}
+
+
+static __always_inline void
+scif_err_debug(int err, const char *str)
+{
+	/*
+	 * ENOTCONN is a common uninteresting error which is
+	 * flooding debug messages to the console unnecessarily.
+	 */
+	if (err < 0 && err != -ENOTCONN)
+		pr_debug("%s err %d\n", str, err);
+}
+
+
+
+int
+scif_process_ioctl(struct file *f, unsigned int cmd, uint64_t arg)
+{
+	struct mic_priv *priv = ((f)->private_data);
+	void __user *argp = (void __user *)arg;
+	int err = 0;
+	struct scifioctl_msg request;
+	bool non_block = false;
+
+	non_block = !!(f->f_flags & O_NONBLOCK);
+
+	switch (cmd) {
+	case SCIF_BIND:
+	{
+		int pn;
+
+		if (copy_from_user(&pn, argp, sizeof(pn))) {
+			return -EFAULT;
+		}
+
+		if ((pn = __scif_bind(priv->epd, pn)) < 0) {
+			return pn;
+		}
+
+		if (copy_to_user(argp, &pn, sizeof(pn))) {
+			return -EFAULT;
+		}
+
+		return 0;
+	}
+	case SCIF_LISTEN:
+		return __scif_listen(priv->epd, arg);
+	case SCIF_CONNECT:
+	{
+		struct scifioctl_connect req;
+		struct endpt *ep = (struct endpt *)priv->epd;
+
+		if (copy_from_user(&req, argp, sizeof(struct scifioctl_connect))) {
+			return -EFAULT;
+		}
+
+		if ((err = __scif_connect(priv->epd, &req.peer, non_block)) < 0) {
+			return err;
+		}
+
+		req.self.node = ep->port.node;
+		req.self.port = ep->port.port;
+
+		if (copy_to_user(argp, &req, sizeof(struct scifioctl_connect))) {
+			return -EFAULT;
+		}
+
+
+		return 0;
+	}
+	// Accept is done in two halves.  Thes request ioctl does the basic functility of accepting
+	// the request and returning the information about it including the internal ID of the
+	// end point.  The register is done with the internID on a new file desciptor opened by the
+	// requesting process.
+	case SCIF_ACCEPTREQ:
+	{
+		struct scifioctl_accept request;
+		unsigned long sflags;
+		scif_epd_t *ep = (scif_epd_t *)&request.endpt;
+
+		if (copy_from_user(&request, argp, sizeof(struct scifioctl_accept))) {
+			return -EFAULT;
+		}
+
+		if ((err = __scif_accept(priv->epd, &request.peer, ep, request.flags)) < 0) {
+			return err;
+		}
+
+		if (copy_to_user(argp, &request, sizeof(struct scifioctl_accept))) {
+			scif_close(*ep);
+			return -EFAULT;
+		}
+
+		// Add to the list of user mode eps where the second half of the accept
+		// is not yet completed.
+		spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+		list_add_tail(&((*ep)->miacceptlist), &ms_info.mi_uaccept);
+		list_add_tail(&((*ep)->liacceptlist), &priv->epd->li_accept);
+		(*ep)->listenep = priv->epd;
+		priv->epd->acceptcnt++;
+		spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+
+		return 0;
+	}
+	case SCIF_ACCEPTREG:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		struct endpt *newep;
+		struct endpt *lisep;
+		struct endpt *ep;
+		struct endpt *fep = NULL;
+		struct endpt *tmpep;
+		struct list_head *pos, *tmpq;
+		unsigned long sflags;
+
+		// Finally replace the pointer to the accepted endpoint
+		if (copy_from_user(&newep, argp, sizeof(void *)))
+			return -EFAULT;
+
+		// Remove form the user accept queue
+		spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+		list_for_each_safe(pos, tmpq, &ms_info.mi_uaccept) {
+			tmpep = list_entry(pos, struct endpt, miacceptlist);
+			if (tmpep == newep) {
+				list_del(pos);
+				fep = tmpep;
+				break;
+			}
+		}
+
+		if (fep == NULL) {
+			spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+			return -ENOENT;
+		}
+
+		lisep = newep->listenep;
+		list_for_each_safe(pos, tmpq, &lisep->li_accept) {
+			tmpep = list_entry(pos, struct endpt, liacceptlist);
+			if (tmpep == newep) {
+				list_del(pos);
+				lisep->acceptcnt--;
+				break;
+			}
+		}
+
+		spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+
+		// Free the resources automatically created from the open.
+		micscif_teardown_ep(priv->epd);
+		micscif_add_epd_to_zombie_list(priv->epd, !MI_EPLOCK_HELD);
+		priv->epd = newep;
+		ep = (struct endpt *)priv->epd;
+		ep = ep;
+		return 0;
+	}
+	case SCIF_SEND:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+
+		if (copy_from_user(&request, argp,
+			sizeof(struct scifioctl_msg))) {
+			err = -EFAULT;
+			goto send_err;
+		}
+
+		if ((err = scif_user_send(priv->epd, request.msg,
+			request.len, request.flags)) < 0)
+			goto send_err;
+
+		if (copy_to_user(&((struct scifioctl_msg*)argp)->out_len,
+					&err, sizeof(err))) {
+			err = -EFAULT;
+			goto send_err;
+		}
+		err = 0;
+send_err:
+		scif_err_debug(err, "scif_send");
+		return err;
+	}
+	case SCIF_RECV:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+
+		if (copy_from_user(&request, argp,
+			sizeof(struct scifioctl_msg))) {
+			err = -EFAULT;
+			goto recv_err;
+		}
+
+		if ((err = scif_user_recv(priv->epd, request.msg,
+			request.len, request.flags)) < 0)
+			goto recv_err;
+
+		if (copy_to_user(&((struct scifioctl_msg*)argp)->out_len,
+			&err, sizeof(err))) {
+			err = -EFAULT;
+			goto recv_err;
+		}
+		err = 0;
+recv_err:
+		scif_err_debug(err, "scif_recv");
+		return err;
+	}
+	case SCIF_REG:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		struct scifioctl_reg reg;
+		off_t ret;
+
+		if (copy_from_user(&reg, argp, sizeof(reg))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		if (reg.flags & SCIF_MAP_KERNEL) {
+			err = -EINVAL;
+			goto reg_err;
+		}
+		if ((ret = __scif_register(priv->epd, reg.addr, reg.len,
+				reg.offset, reg.prot, reg.flags)) < 0) {
+			err = (int)ret;
+			goto reg_err;
+		}
+
+		if (copy_to_user(&((struct scifioctl_reg*)argp)->out_offset,
+				&ret, sizeof(reg.out_offset))) {
+			err = -EFAULT;
+			goto reg_err;
+		}
+		err = 0;
+reg_err:
+		scif_err_debug(err, "scif_register");
+		return err;
+	}
+	case SCIF_UNREG:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		struct scifioctl_unreg unreg;
+
+		if (copy_from_user(&unreg, argp, sizeof(unreg))) {
+			err = -EFAULT;
+			goto unreg_err;
+		}
+		err = __scif_unregister(priv->epd, unreg.offset, unreg.len);
+unreg_err:
+		scif_err_debug(err, "scif_unregister");
+		return err;
+	}
+	case SCIF_READFROM:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto readfrom_err;
+		}
+		err = __scif_readfrom(priv->epd,
+					copy.loffset,
+					copy.len,
+					copy.roffset,
+					copy.flags);
+readfrom_err:
+		scif_err_debug(err, "scif_readfrom");
+		return err;
+	}
+	case SCIF_WRITETO:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto writeto_err;
+		}
+		err = __scif_writeto(priv->epd,
+					copy.loffset,
+					copy.len,
+					copy.roffset,
+					copy.flags);
+writeto_err:
+		scif_err_debug(err, "scif_writeto");
+		return err;
+	}
+	case SCIF_VREADFROM:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vreadfrom_err;
+		}
+		err = __scif_vreadfrom(priv->epd,
+					copy.addr,
+					copy.len,
+					copy.roffset,
+					copy.flags);
+vreadfrom_err:
+		scif_err_debug(err, "scif_vreadfrom");
+		return err;
+	}
+	case SCIF_VWRITETO:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		struct scifioctl_copy copy;
+
+		if (copy_from_user(&copy, argp, sizeof(copy))) {
+			err = -EFAULT;
+			goto vwriteto_err;
+		}
+		err = __scif_vwriteto(priv->epd,
+					copy.addr,
+					copy.len,
+					copy.roffset,
+					copy.flags);
+vwriteto_err:
+		scif_err_debug(err, "scif_vwriteto");
+		return err;
+	}
+	case SCIF_GET_NODEIDS:
+	{
+		struct scifioctl_nodeIDs nodeIDs;
+		int entries;
+		uint16_t *nodes;
+		uint16_t self;
+
+		if (copy_from_user(&nodeIDs, argp, sizeof(nodeIDs))) {
+			err = -EFAULT;
+			goto getnodes_err2;
+		}
+
+		entries = SCIF_MIN(MAX_BOARD_SUPPORTED, nodeIDs.len);
+
+		nodes = kmalloc(sizeof(uint16_t) * entries, GFP_KERNEL);
+		if ( (entries != 0) && (!nodes) ){
+			err = -ENOMEM;
+			goto getnodes_err2;
+		}
+		nodeIDs.len = scif_get_nodeIDs(nodes, entries, &self);
+
+		if (copy_to_user(nodeIDs.nodes,
+				nodes, sizeof(uint16_t) * entries)) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+
+		if (copy_to_user(nodeIDs.self,
+				&self, sizeof(uint16_t))) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+
+		if (copy_to_user(argp, &nodeIDs, sizeof(nodeIDs))) {
+			err = -EFAULT;
+			goto getnodes_err1;
+		}
+getnodes_err1:
+		kfree(nodes);
+getnodes_err2:
+		return err;
+	}
+	case SCIF_FENCE_MARK:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		struct scifioctl_fence_mark mark;
+		int tmp_mark = 0;
+
+		if (copy_from_user(&mark, argp, sizeof(mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+		if ((err = __scif_fence_mark(priv->epd,
+					mark.flags, &tmp_mark)))
+			goto fence_mark_err;
+		if (copy_to_user(mark.mark, &tmp_mark, sizeof(tmp_mark))) {
+			err = -EFAULT;
+			goto fence_mark_err;
+		}
+fence_mark_err:
+		scif_err_debug(err, "scif_fence_mark");
+		return err;
+	}
+	case SCIF_FENCE_WAIT:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		err = __scif_fence_wait(priv->epd, arg);
+		scif_err_debug(err, "scif_fence_wait");
+		return err;
+	}
+	case SCIF_FENCE_SIGNAL:
+	{
+		struct mic_priv *priv = (struct mic_priv *)((f)->private_data);
+		struct scifioctl_fence_signal signal;
+
+		if (copy_from_user(&signal, argp, sizeof(signal))) {
+			err = -EFAULT;
+			goto fence_signal_err;
+		}
+
+		err = __scif_fence_signal(priv->epd, signal.loff,
+			signal.lval, signal.roff, signal.rval, signal.flags);
+fence_signal_err:
+		scif_err_debug(err, "scif_fence_signal");
+		return err;
+	}
+	case SCIF_GET_VERSION:
+	{
+		return SCIF_VERSION;
+	}
+	}
+	return -EINVAL;
+}
diff --git a/micscif/micscif_intr.c b/micscif/micscif_intr.c
new file mode 100644
index 0000000..10268e0
--- /dev/null
+++ b/micscif/micscif_intr.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic/micscif.h"
+#include "mic/micscif_intr.h"
+#include "mic/micscif_nodeqp.h"
+#include "mic_common.h"
+
+/* Runs in the context of sd_intr_wq */
+static void micscif_intr_bh_handler(struct work_struct *work)
+{
+	struct micscif_dev *scifdev =
+			container_of(work, struct micscif_dev, sd_intr_bh);
+
+	/* figure out which qp we got a recv on */
+	struct micscif_qp *qp = micscif_nodeqp_nextmsg(scifdev);
+	if (qp != NULL) {
+		if (is_self_scifdev(scifdev))
+			micscif_loopb_msg_handler(scifdev, qp);
+		else
+			micscif_nodeqp_intrhandler(scifdev, qp);
+	}
+}
+
+int micscif_setup_interrupts(struct micscif_dev *scifdev)
+{
+	if (!scifdev->sd_intr_wq) {
+		snprintf(scifdev->sd_intr_wqname, sizeof(scifdev->sd_intr_wqname),
+			"SCIF INTR %d", scifdev->sd_node);
+
+		/* FIXME: Fix windows */
+		if (!(scifdev->sd_intr_wq =
+			__mic_create_singlethread_workqueue(scifdev->sd_intr_wqname)))
+			return -ENOMEM;
+
+		INIT_WORK(&scifdev->sd_intr_bh, micscif_intr_bh_handler);
+	}
+	return 0;
+}
+
+void micscif_destroy_interrupts(struct micscif_dev *scifdev)
+{
+	destroy_workqueue(scifdev->sd_intr_wq);
+}
+
+#ifdef _MIC_SCIF_
+irqreturn_t micscif_intr_handler(int irq, void *dev_id)
+{
+	struct micscif_dev *dev = (struct micscif_dev *)dev_id;
+	queue_work(dev->sd_intr_wq, &dev->sd_intr_bh);
+	return IRQ_HANDLED;
+}
+
+/*
+ * register_scif_intr_handler() - Registers SCIF interrupt handler with
+ * appropriate IRQ
+ * @dev:   per node dev structure to store the intr handle
+ *
+ * IRQ 17 - 24 Corresponds to RDMASR registers RDMASR0 - RRDMASR7.
+ * RDMASR registers are chosen based on the lowest ref count.
+ * There are 8 RDMASRS for the host and the nodes. So When the number of
+ * nodes added to the current node's p2p network increases beyond
+ * 7, it starts sharing the interrupt.
+ */
+int
+register_scif_intr_handler(struct micscif_dev *dev)
+{
+	unsigned int handle = 0;
+	unsigned int i;
+	int ret;
+
+	mutex_lock(&ms_info.mi_conflock);
+
+	/* Find the first lowest ref count */
+	for (i = 0; i < MAX_RDMASR; i++)
+		if (ms_info.mi_intr_rcnt[handle] >
+				ms_info.mi_intr_rcnt[i])
+			handle = i;
+
+	if ((ret = request_irq(get_rdmasr_irq(handle), micscif_intr_handler,
+			IRQF_SHARED, dev->sd_intr_wqname, dev))) {
+		printk(KERN_ERR "Cannot request irq number %d, ret = %d\n"
+						, get_rdmasr_irq(handle), ret);
+		goto error;
+	}
+
+	ms_info.mi_intr_rcnt[handle]++;
+	dev->sd_intr_handle = handle;
+
+	printk("Registered interrupt handler for node %d, for IRQ = %d,"
+		"handle = %d\n", dev->sd_node, get_rdmasr_irq(handle), handle);
+
+error:
+	mutex_unlock(&ms_info.mi_conflock);
+	return ret;
+}
+
+/*
+ * deregister_scif_intr_handler() - Deregisters SCIF interrupt
+ * handler from appropriate IRQ
+ * @dev:   per node dev structure to retrieve the intr handle
+ *
+ */
+void
+deregister_scif_intr_handler(struct micscif_dev *dev)
+{
+	unsigned int handle = dev->sd_intr_handle;
+
+	if (handle >= MAX_RDMASR)
+		return;
+
+	mutex_lock(&ms_info.mi_conflock);
+	ms_info.mi_intr_rcnt[handle]--;
+
+	if (ms_info.mi_intr_rcnt[handle] < 0) {
+		printk("scif intr deregister negative ref count"
+			" for node %d, handle = %d, IRQ = %d\n", dev->sd_node, 
+				handle, get_rdmasr_irq(handle));
+		WARN_ON(1);
+	}
+
+	mutex_unlock(&ms_info.mi_conflock);
+	free_irq(get_rdmasr_irq(handle), dev);
+	printk("Deregistered interrupt handler for node %d, for IRQ = %d,"
+			"handle = %d\n", dev->sd_node, get_rdmasr_irq(handle), handle);
+}
+#endif /* _MIC_SCIF_ */
diff --git a/micscif/micscif_main.c b/micscif/micscif_main.c
new file mode 100644
index 0000000..45d5bf4
--- /dev/null
+++ b/micscif/micscif_main.c
@@ -0,0 +1,606 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/cdev.h>
+#include <linux/reboot.h>
+#include <linux/version.h>
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34))
+#include <linux/pm_qos_params.h>
+#endif
+
+#include <mic/micscif.h>
+#include <mic/micscif_smpt.h>
+#include <mic/micscif_rb.h>
+#include <mic/micscif_intr.h>
+//#include <micscif_test.h>
+#include <mic/micscif_nodeqp.h>
+#include <mic/mic_dma_api.h>
+#include <mic/micscif_kmem_cache.h>
+/* Include this for suspend/resume notifications from pm driver */
+#include <mic/micscif_nm.h>
+
+#ifdef CONFIG_MK1OM
+#define MICPM_DEVEVENT_SUSPEND		1
+#define MICPM_DEVEVENT_RESUME		2
+#define MICPM_DEVEVENT_FAIL_SUSPEND	3
+extern void micpm_device_register(struct notifier_block *n);
+extern void micpm_device_unregister(struct notifier_block *n);
+#endif
+
+int scif_id = 0;
+module_param(scif_id, int, 0400);
+MODULE_PARM_DESC(scif_id, "Set scif driver node ID");
+
+ulong scif_addr = 0;
+module_param(scif_addr, ulong, 0400);
+MODULE_PARM_DESC(scif_addr, "Set scif driver host address");
+
+struct kmem_cache *unaligned_cache;
+
+struct mic_info {
+	dev_t		 m_dev;
+	struct cdev	 m_cdev;
+	struct class *	 m_class;
+	struct device *	 m_scifdev;
+} micinfo;
+
+int micscif_major = SCIF_MAJOR;
+int micscif_minor = 0;
+
+struct micscif_info ms_info;
+
+// MAX MIC cards + 1 for the Host
+struct micscif_dev scif_dev[MAX_BOARD_SUPPORTED + 1];
+
+extern mic_dma_handle_t mic_dma_handle;
+
+static int mic_pm_qos_cpu_dma_lat = -1;
+static int mic_host_numa_node = -1;
+static unsigned long mic_p2p_proxy_thresh = -1;
+
+#ifdef CONFIG_MK1OM
+static int micscif_devevent_handler(struct notifier_block *nb,
+				    unsigned long event,
+				    void *msg)
+{
+	if (event == MICPM_DEVEVENT_SUSPEND)
+		return micscif_suspend_handler(nb, event, msg);
+	else if (event == MICPM_DEVEVENT_RESUME)
+		return micscif_resume_handler(nb, event, msg);
+	else if (event == MICPM_DEVEVENT_FAIL_SUSPEND)
+		return micscif_fail_suspend_handler(nb, event, msg);
+	return 0;
+}
+
+static struct notifier_block mic_deviceevent = {
+	.notifier_call = micscif_devevent_handler,
+};
+#endif
+
+static int micscif_open(struct inode *in, struct file *f)
+{
+	dev_t dev = in->i_rdev;
+
+	switch (MINOR(dev)) {
+	case 0:
+		/*  base mic device access for testing */
+		return 0;
+	case 1:
+		return scif_fdopen(f);
+	}
+
+	return -EINVAL;
+}
+
+static int micscif_ioctl(struct inode *in, struct file *f,
+	unsigned int cmd, unsigned long arg)
+{
+	dev_t dev = in->i_rdev;
+
+	if (MINOR(dev) == 1) {
+		/*  SCIF device */
+		return scif_process_ioctl(f, cmd, arg);
+	}
+	return -EINVAL;
+}
+
+static long micscif_unlocked_ioctl(struct file *f,
+	unsigned int cmd, unsigned long arg)
+{
+	return (long) micscif_ioctl(f->f_path.dentry->d_inode, f, cmd, arg);
+}
+
+static int micscif_release(struct inode *in, struct file *f)
+{
+	dev_t dev = in->i_rdev;
+
+	switch (MINOR(dev)) {
+	case 0:
+		/*  base mic device access for testing */
+		return 0;
+	case 1:
+		return scif_fdclose(f);
+	}
+
+	return -EINVAL;
+}
+
+/* TODO: Need to flush the queue, grab some lock, and probably
+ * notify the remote node we're going down ... right now, we're
+ * just freeing things, which is probably a bad idea :-)
+ */
+static int micscif_uninit_qp(struct micscif_dev *scifdev)
+{
+	int i;
+	/* first, iounmap/unmap/free any memory we mapped */
+	for (i = 0; i < scifdev->n_qpairs; i++) {
+		iounmap(scifdev->qpairs[i].remote_qp);
+		iounmap(scifdev->qpairs[i].outbound_q.rb_base);
+		kfree((void *)scifdev->qpairs[i].inbound_q.rb_base);
+	}
+	kfree(scifdev->qpairs);
+	scifdev->n_qpairs = 0;
+
+	return 0;
+}
+
+static int micscif_reboot(struct notifier_block *notifier, unsigned long unused1, void *unused2);
+
+static struct notifier_block micscif_reboot_notifier = {
+	.notifier_call = micscif_reboot,
+	.priority = 0,
+};
+
+extern struct attribute_group scif_attr_group;
+
+void micscif_destroy_base(void)
+{
+#ifdef CONFIG_MMU_NOTIFIER
+	destroy_workqueue(ms_info.mi_mmu_notif_wq);
+#endif
+	destroy_workqueue(ms_info.mi_misc_wq);
+	destroy_workqueue(ms_info.mi_conn_wq);
+
+	sysfs_remove_group(&micinfo.m_scifdev->kobj, &scif_attr_group);
+	device_destroy(micinfo.m_class, micinfo.m_dev + 1);
+	device_destroy(micinfo.m_class, micinfo.m_dev);
+	class_destroy(micinfo.m_class);
+	cdev_del(&(micinfo.m_cdev));
+	unregister_chrdev_region(micinfo.m_dev, 2);
+}
+
+static void _micscif_exit(void)
+{
+	struct list_head *pos, *unused;
+	struct scif_callback *temp;
+	struct micscif_dev *dev;
+	int i;
+
+	pr_debug("Goodbye SCIF!\n");
+	/* Cleanup P2P Node Qp/ Interrupt Handlers */
+	for (i = SCIF_HOST_NODE + 1; i <= MAX_BOARD_SUPPORTED; i++) {
+		dev = &scif_dev[i];
+
+		if (is_self_scifdev(dev))
+			continue;
+
+		micscif_cleanup_scifdev(dev, DESTROY_WQ);
+	}
+
+	list_for_each_safe(pos, unused, &ms_info.mi_event_cb) {
+		temp = list_entry(pos, struct scif_callback, list_member);
+		list_del(pos);
+		kfree(temp);
+	}
+	mutex_destroy(&ms_info.mi_event_cblock);
+
+#ifdef CONFIG_MK1OM
+	micpm_device_unregister(&mic_deviceevent);
+#endif
+
+	scif_dev[ms_info.mi_nodeid].sd_state = SCIFDEV_STOPPING;
+	scif_dev[SCIF_HOST_NODE].sd_state = SCIFDEV_STOPPING;
+
+	/* The EXIT message is the last message from MIC to the Host */
+	micscif_send_exit();
+
+	/*
+	 * Deliberate infinite wait for a host response during driver
+	 * unload since the host must inform other SCIF nodes about
+	 * this node going away and then only send a response back
+	 * to this node to avoid this nodes host shutdown handler racing
+	 * with disconnection from the SCIF network. There is a timeout
+	 * on the host for sending a response back so a response will
+	 * be sent else the host has crashed.
+	 */
+	wait_event(ms_info.mi_exitwq,
+		scif_dev[ms_info.mi_nodeid].sd_state == SCIFDEV_STOPPED);
+	scif_proc_cleanup();
+	mic_debug_uninit();
+	micscif_kmem_cache_destroy();
+
+	micscif_destroy_base();
+
+	/* Disable interrupts */
+	deregister_scif_intr_handler(&scif_dev[SCIF_HOST_NODE]);
+	destroy_workqueue(scif_dev[SCIF_HOST_NODE].sd_intr_wq);
+	micscif_destroy_loopback_qp(&scif_dev[ms_info.mi_nodeid]);
+
+	/* Close DMA device */
+	close_dma_device(0, &mic_dma_handle);
+
+	micscif_uninit_qp(&scif_dev[SCIF_HOST_NODE]);
+	iounmap(scif_dev[SCIF_HOST_NODE].mm_sbox);
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34))
+	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, "micscif");
+#endif
+}
+
+static void micscif_exit(void)
+{
+	unregister_reboot_notifier(&micscif_reboot_notifier);
+	_micscif_exit();
+}
+
+static int micscif_reboot(struct notifier_block *notifier, unsigned long unused1, void *unused2)
+{
+	_micscif_exit();
+	return NOTIFY_OK;
+}
+
+struct file_operations micscif_ops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = micscif_unlocked_ioctl,
+	.mmap = micscif_mmap,
+	.poll = micscif_poll,
+	.flush = micscif_flush,
+	.open  = micscif_open,
+	.release = micscif_release,
+};
+
+static char * scif_devnode(struct device *dev, mode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "mic/%s", dev_name(dev));
+}
+
+// Setup the base informaiton for the driver.  No interface specific code.
+static int micscif_setup_base(void)
+{
+	long int result;
+
+	if (micscif_major) {
+		micinfo.m_dev = MKDEV(micscif_major, micscif_minor);
+		result = register_chrdev_region(micinfo.m_dev, 2, "micscif");
+	} else {
+		result = alloc_chrdev_region(&micinfo.m_dev, micscif_minor, 2, "micscif");
+		micscif_major = MAJOR(micinfo.m_dev);
+	}
+
+	if (result >= 0) {
+		cdev_init(&(micinfo.m_cdev), &micscif_ops);
+		micinfo.m_cdev.owner = THIS_MODULE;
+		if ((result = cdev_add(&(micinfo.m_cdev), micinfo.m_dev, 2)))
+			goto unreg_chrdev;
+	} else {
+		goto unreg_chrdev;
+	}
+
+	micinfo.m_class = class_create(THIS_MODULE, "micscif");
+	if (IS_ERR(micinfo.m_class)) {
+		result = PTR_ERR(micinfo.m_class);
+		goto del_m_dev;
+	}
+
+	micinfo.m_class->devnode = scif_devnode;
+	if (IS_ERR((int *)(result =
+		(long int)device_create(micinfo.m_class, NULL, micinfo.m_dev, NULL, "mic")))) {
+		result = PTR_ERR((int *)result);
+		goto class_destroy;
+	}
+	if (IS_ERR(micinfo.m_scifdev =
+		device_create(micinfo.m_class, NULL, micinfo.m_dev + 1, NULL, "scif"))) {
+		result = PTR_ERR(micinfo.m_scifdev);
+		goto device_destroy;
+	}
+	if ((result = sysfs_create_group(&micinfo.m_scifdev->kobj, &scif_attr_group)))
+		goto device_destroy1;
+
+	spin_lock_init(&ms_info.mi_eplock);
+	spin_lock_init(&ms_info.mi_connlock);
+	spin_lock_init(&ms_info.mi_rmalock);
+	mutex_init(&ms_info.mi_fencelock);
+	spin_lock_init(&ms_info.mi_nb_connect_lock);
+	INIT_LIST_HEAD(&ms_info.mi_uaccept);
+	INIT_LIST_HEAD(&ms_info.mi_listen);
+	INIT_LIST_HEAD(&ms_info.mi_zombie);
+	INIT_LIST_HEAD(&ms_info.mi_connected);
+	INIT_LIST_HEAD(&ms_info.mi_disconnected);
+	INIT_LIST_HEAD(&ms_info.mi_rma);
+	INIT_LIST_HEAD(&ms_info.mi_rma_tc);
+	INIT_LIST_HEAD(&ms_info.mi_nb_connect_list);
+
+#ifdef CONFIG_MMU_NOTIFIER
+	INIT_LIST_HEAD(&ms_info.mi_mmu_notif_cleanup);
+#endif
+	INIT_LIST_HEAD(&ms_info.mi_fence);
+	if (!(ms_info.mi_misc_wq = create_singlethread_workqueue("SCIF_MISC"))) {
+		result = -ENOMEM;
+		goto remove_group;
+	}
+	INIT_WORK(&ms_info.mi_misc_work, micscif_misc_handler);
+	if (!(ms_info.mi_conn_wq = create_singlethread_workqueue("SCIF_NB_CONN"))) {
+		result = -ENOMEM;
+		goto destroy_misc_wq;
+	}
+	INIT_WORK(&ms_info.mi_conn_work, micscif_conn_handler);
+#ifdef CONFIG_MMU_NOTIFIER
+	if (!(ms_info.mi_mmu_notif_wq = create_singlethread_workqueue("SCIF_MMU"))) {
+		result = -ENOMEM;
+		goto destroy_conn_wq;
+	}
+	INIT_WORK(&ms_info.mi_mmu_notif_work, micscif_mmu_notif_handler);
+#endif
+	ms_info.mi_watchdog_to = DEFAULT_WATCHDOG_TO;
+#ifdef MIC_IS_EMULATION
+	ms_info.mi_watchdog_enabled = 0;
+#else
+	ms_info.mi_watchdog_enabled = 1;
+#endif
+	ms_info.mi_rma_tc_limit = SCIF_RMA_TEMP_CACHE_LIMIT;
+	ms_info.mi_proxy_dma_threshold = mic_p2p_proxy_thresh;
+	ms_info.en_msg_log = 0;
+	return result;
+#ifdef CONFIG_MMU_NOTIFIER
+destroy_conn_wq:
+	destroy_workqueue(ms_info.mi_conn_wq);
+#endif
+destroy_misc_wq:
+	destroy_workqueue(ms_info.mi_misc_wq);
+remove_group:
+	sysfs_remove_group(&micinfo.m_scifdev->kobj, &scif_attr_group);
+device_destroy1:
+	device_destroy(micinfo.m_class, micinfo.m_dev + 1);
+device_destroy:
+	device_destroy(micinfo.m_class, micinfo.m_dev);
+class_destroy:
+	class_destroy(micinfo.m_class);
+del_m_dev:
+	cdev_del(&(micinfo.m_cdev));
+unreg_chrdev:
+	unregister_chrdev_region(micinfo.m_dev, 2);
+//error:
+	return result;
+}
+
+#define SBOX_MMIO_LENGTH 0x10000
+
+static int micscif_init(void)
+{
+	int result = 0;
+	int i;
+	phys_addr_t host_queue_phys;
+	phys_addr_t gtt_phys_base;
+
+	pr_debug("HELLO SCIF!\n");
+
+#if defined(CONFIG_ML1OM)
+	pr_debug("micscif_init(): Hello KNF!\n");
+#elif defined(CONFIG_MK1OM)
+	pr_debug("micscif_init(): Hello KNC!\n");
+#endif
+
+	if (!scif_id || !scif_addr) {
+		printk(KERN_ERR "%s %d scif_id 0x%x scif_addr 0x%lx"
+			"not provided as module parameter. Fail module load",
+			__func__, __LINE__, scif_id, scif_addr);
+		return -EINVAL;
+	}
+
+	for (i = 1; i <= MAX_BOARD_SUPPORTED; i++) {
+		scif_dev[i].sd_state = SCIFDEV_INIT;
+		scif_dev[i].sd_node = i;
+		scif_dev[i].sd_numa_node = -1;
+		mutex_init (&scif_dev[i].sd_lock);
+		init_waitqueue_head(&scif_dev[i].sd_mmap_wq);
+		init_waitqueue_head(&scif_dev[i].sd_wq);
+		init_waitqueue_head(&scif_dev[i].sd_p2p_wq);
+		INIT_DELAYED_WORK(&scif_dev[i].sd_p2p_dwork,
+			scif_poll_qp_state);
+		scif_dev[i].sd_p2p_retry = 0;
+	}
+
+	// Setup the host node access information
+	// Initially only talks to the host => node 0
+	scif_dev[SCIF_HOST_NODE].sd_node = SCIF_HOST_NODE;
+	scif_dev[SCIF_HOST_NODE].sd_state = SCIFDEV_RUNNING;
+	if (!(scif_dev[SCIF_HOST_NODE].mm_sbox =
+		ioremap_nocache(SBOX_BASE, SBOX_MMIO_LENGTH))) {
+		result = -ENOMEM;
+		goto error;
+	}
+	scif_dev[SCIF_HOST_NODE].scif_ref_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+	scif_dev[SCIF_HOST_NODE].scif_map_ref_cnt = 0;
+	init_waitqueue_head(&scif_dev[SCIF_HOST_NODE].sd_wq);
+	init_waitqueue_head(&scif_dev[SCIF_HOST_NODE].sd_mmap_wq);
+	mutex_init(&scif_dev[SCIF_HOST_NODE].sd_lock);
+	gtt_phys_base = readl(scif_dev[SCIF_HOST_NODE].mm_sbox + SBOX_GTT_PHY_BASE);
+	gtt_phys_base *= ((4) * 1024);
+	pr_debug("GTT PHY BASE in GDDR 0x%llx\n", gtt_phys_base);
+	pr_debug("micscif_init(): gtt_phy_base x%llx\n", gtt_phys_base);
+
+	/* Get handle to DMA device */
+	if ((result = open_dma_device(0, 0, &mic_dma_handle)))
+		goto unmap_sbox;
+
+	ms_info.mi_nodeid = scif_id;
+	ms_info.mi_maxid = scif_id;
+	ms_info.mi_total = 2;	// Host plus this card
+
+#ifdef RMA_DEBUG
+	ms_info.rma_unaligned_cpu_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+	ms_info.rma_alloc_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+	ms_info.rma_pin_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+#ifdef CONFIG_MMU_NOTIFIER
+	ms_info.mmu_notif_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+#endif
+#endif
+
+	pr_debug("micscif_init(): setup_card_qp \n");
+	host_queue_phys = scif_addr;
+	mutex_init(&ms_info.mi_event_cblock);
+	mutex_init(&ms_info.mi_conflock);
+	INIT_LIST_HEAD(&ms_info.mi_event_cb);
+
+	pr_debug("micscif_init(): setup_interrupts \n");
+	/*
+	 * Set up the workqueue thread for interrupt handling
+	 */
+	if ((result = micscif_setup_interrupts(&scif_dev[SCIF_HOST_NODE])))
+		goto close_dma;
+
+	pr_debug("micscif_init(): host_intr_handler \n");
+	if ((result = micscif_setup_card_qp(host_queue_phys, &scif_dev[SCIF_HOST_NODE]))) {
+		if (result == -ENXIO)
+			goto uninit_qp;
+		else
+			goto destroy_intr_wq;
+	}
+	/* need to do this last -- as soon as the dev is setup, userspace
+	 * can try to use the device
+	 */
+	pr_debug("micscif_init(): setup_base \n");
+	if ((result = micscif_setup_base()))
+		goto uninit_qp;
+	/*
+	 * Register the interrupt
+	 */
+	if ((result = register_scif_intr_handler(&scif_dev[SCIF_HOST_NODE])))
+		goto destroy_base;
+
+	// Setup information for self aka loopback.
+	scif_dev[ms_info.mi_nodeid].sd_node = ms_info.mi_nodeid;
+	scif_dev[ms_info.mi_nodeid].sd_numa_node = mic_host_numa_node;
+	scif_dev[ms_info.mi_nodeid].mm_sbox = scif_dev[SCIF_HOST_NODE].mm_sbox;
+	scif_dev[ms_info.mi_nodeid].scif_ref_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+	scif_dev[ms_info.mi_nodeid].scif_map_ref_cnt = 0;
+	init_waitqueue_head(&scif_dev[ms_info.mi_nodeid].sd_wq);
+	init_waitqueue_head(&scif_dev[ms_info.mi_nodeid].sd_mmap_wq);
+	mutex_init(&scif_dev[ms_info.mi_nodeid].sd_lock);
+	if ((result = micscif_setup_loopback_qp(&scif_dev[ms_info.mi_nodeid])))
+		goto dereg_intr_handle;
+	scif_dev[ms_info.mi_nodeid].sd_state = SCIFDEV_RUNNING;
+
+	unaligned_cache = micscif_kmem_cache_create();
+	if (!unaligned_cache) {
+		result = -ENOMEM;
+		goto destroy_loopb;
+	}
+	scif_proc_init();
+	mic_debug_init();
+
+	pr_debug("micscif_init(): Setup successful: 0x%llx \n", host_queue_phys);
+
+#ifdef CONFIG_MK1OM
+	micpm_device_register(&mic_deviceevent);
+#endif
+	if ((result = register_reboot_notifier(&micscif_reboot_notifier)))
+		goto cache_destroy;
+
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,34))
+	result = pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, "micscif", mic_pm_qos_cpu_dma_lat);
+	if (result) {
+		printk("%s %d mic_pm_qos_cpu_dma_lat %d result %d\n",
+			__func__, __LINE__, mic_pm_qos_cpu_dma_lat, result);
+		result = 0;
+		/* Dont fail driver load due to PM QoS API. Fall through */
+	}
+#endif
+
+	return result;
+cache_destroy:
+#ifdef CONFIG_MK1OM
+	micpm_device_unregister(&mic_deviceevent);
+#endif
+	micscif_kmem_cache_destroy();
+destroy_loopb:
+	micscif_destroy_loopback_qp(&scif_dev[ms_info.mi_nodeid]);
+dereg_intr_handle:
+	deregister_scif_intr_handler(&scif_dev[SCIF_HOST_NODE]);
+destroy_base:
+	pr_debug("Unable to finish scif setup for some reason: %d\n", result);
+	micscif_destroy_base();
+uninit_qp:
+	micscif_uninit_qp(&scif_dev[SCIF_HOST_NODE]);
+destroy_intr_wq:
+	micscif_destroy_interrupts(&scif_dev[SCIF_HOST_NODE]);
+close_dma:
+	close_dma_device(0, &mic_dma_handle);
+unmap_sbox:
+	iounmap(scif_dev[SCIF_HOST_NODE].mm_sbox);
+error:
+	return result;
+}
+
+module_init(micscif_init);
+module_exit(micscif_exit);
+
+module_param_named(huge_page, mic_huge_page_enable, bool, 0600);
+MODULE_PARM_DESC(huge_page, "SCIF Huge Page Support");
+
+module_param_named(ulimit, mic_ulimit_check, bool, 0600);
+MODULE_PARM_DESC(ulimit, "SCIF ulimit check");
+
+module_param_named(reg_cache, mic_reg_cache_enable, bool, 0600);
+MODULE_PARM_DESC(reg_cache, "SCIF registration caching");
+module_param_named(p2p, mic_p2p_enable, bool, 0600);
+MODULE_PARM_DESC(p2p, "SCIF peer-to-peer");
+
+module_param_named(p2p_proxy, mic_p2p_proxy_enable, bool, 0600);
+MODULE_PARM_DESC(p2p_proxy, "SCIF peer-to-peer proxy DMA support");
+
+module_param_named(pm_qos_cpu_dma_lat, mic_pm_qos_cpu_dma_lat, int, 0600);
+MODULE_PARM_DESC(pm_qos_cpu_dma_lat, "PM QoS CPU DMA latency in usecs.");
+
+module_param_named(numa_node, mic_host_numa_node, int, 0600);
+MODULE_PARM_DESC(numa_node, "Host Numa node to which MIC is attached");
+
+module_param_named(p2p_proxy_thresh, mic_p2p_proxy_thresh, ulong, 0600);
+MODULE_PARM_DESC(numa_node, "Transfer size after which Proxy DMA helps DMA perf");
+
+MODULE_LICENSE("GPL");
+MODULE_INFO(build_number, BUILD_NUMBER);
+MODULE_INFO(build_bywhom, BUILD_BYWHOM);
+MODULE_INFO(build_ondate, BUILD_ONDATE);
+MODULE_INFO(build_scmver, BUILD_SCMVER);
diff --git a/micscif/micscif_nm.c b/micscif/micscif_nm.c
new file mode 100644
index 0000000..7d34942
--- /dev/null
+++ b/micscif/micscif_nm.c
@@ -0,0 +1,1740 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* SCIF Node Management */
+
+#include "mic/micscif.h"
+#ifndef _MIC_SCIF_
+#include "mic_common.h"
+
+#endif
+#include "mic/micscif_map.h"
+#include "mic/micscif_intr.h"
+#ifdef _MIC_SCIF_
+extern mic_dma_handle_t mic_dma_handle;
+#else
+extern bool mic_crash_dump_enabled;
+#endif
+
+
+/**
+ * micscif_create_node_dep:
+ *
+ * @dev: Remote SCIF device.
+ * @nr_pages: number of pages*
+ *
+ * Increment the map SCIF device ref count and notify the host if this is the
+ * first dependency being create between the two nodes.
+ */
+void
+micscif_create_node_dep(struct micscif_dev *dev, int nr_pages)
+{
+#ifdef SCIF_ENABLE_PM
+	struct nodemsg notif_msg;
+
+	if (dev) {
+		mutex_lock(&dev->sd_lock);
+		if (!dev->scif_map_ref_cnt) {
+			/* Notify Host if this is the first dependency being created */
+			notif_msg.uop = SCIF_NODE_CREATE_DEP;
+			notif_msg.src.node = ms_info.mi_nodeid;
+			notif_msg.payload[0] = dev->sd_node;
+			/* No error handling for Host SCIF device */
+			micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], &notif_msg, NULL);
+		}
+		dev->scif_map_ref_cnt += nr_pages;
+		mutex_unlock(&dev->sd_lock);
+	}
+#endif
+}
+
+/**
+ * micscif_destroy_node_dep:
+ *
+ * @dev: Remote SCIF device.
+ * @nr_pages: number of pages
+ *
+ * Decrement the map SCIF device ref count and notify the host if a dependency
+ * no longer exists between two nodes.
+ */
+void
+micscif_destroy_node_dep(struct micscif_dev *dev, int nr_pages)
+{
+#ifdef SCIF_ENABLE_PM
+	struct nodemsg notif_msg;
+
+	if (dev) {
+		mutex_lock(&dev->sd_lock);
+		dev->scif_map_ref_cnt -= nr_pages;
+		if (!dev->scif_map_ref_cnt) {
+			/* Notify Host if all dependencies have been destroyed */
+			notif_msg.uop = SCIF_NODE_DESTROY_DEP;
+			notif_msg.src.node = ms_info.mi_nodeid;
+			notif_msg.payload[0] = dev->sd_node;
+			/* No error handling for Host SCIF device */
+			micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], &notif_msg, NULL);
+		}
+		mutex_unlock(&dev->sd_lock);
+	}
+#endif
+}
+
+/**
+ * micscif_callback:
+ *
+ * @node: node id of the node added/removed.
+ * @event_type: SCIF_NODE_ADDED if a new node is added
+ *	SCIF_NODE_REMOVED if a new node is removed
+ *
+ * Calls the callback function whenever a new node is added/removed
+ */
+static void micscif_callback(uint16_t node, enum scif_event_type event_type)
+{
+	struct list_head *pos;
+	struct scif_callback *temp;
+	union eventd event;
+
+	switch (event_type) {
+		case SCIF_NODE_ADDED:
+			event.scif_node_added = node;
+			break;
+		case SCIF_NODE_REMOVED:
+			event.scif_node_removed = node;
+			break;
+		default:
+			return;
+	}
+
+	mutex_lock(&ms_info.mi_event_cblock);
+	list_for_each(pos, &ms_info.mi_event_cb) {
+		temp = list_entry(pos, struct scif_callback, list_member);
+		temp->callback_handler(event_type, event);
+	}
+	mutex_unlock(&ms_info.mi_event_cblock);
+}
+
+/**
+ * micscif_node_remove_callback:
+ *
+ * @node: node id of the node removed.
+ *
+ * Calls the callback function whenever a new node is removed
+ */
+static void micscif_node_remove_callback(int node)
+{
+	micscif_callback((uint16_t)node, SCIF_NODE_REMOVED);
+}
+
+/**
+ * micscif_node_add_callback:
+ *
+ * @node: node id of the node added.
+ *
+ * Calls the callback function whenever a new node is added
+ */
+void micscif_node_add_callback(int node)
+{
+	micscif_callback((uint16_t)node, SCIF_NODE_ADDED);
+}
+
+void micscif_cleanup_qp(struct micscif_dev *dev)
+{
+	struct micscif_qp *qp;
+
+	qp = &dev->qpairs[0];
+
+	if (!qp)
+		return;
+
+	scif_iounmap((void*)qp->remote_qp, sizeof(struct micscif_qp), dev);
+	scif_iounmap((void*)dev->qpairs[0].outbound_q.rb_base, sizeof(struct micscif_qp), dev);
+	qp->remote_qp = NULL;
+	dev->qpairs[0].local_write = 0;
+	dev->qpairs[0].inbound_q.current_write_offset = 0;
+	dev->qpairs[0].inbound_q.current_read_offset = 0;
+#ifdef _MIC_SCIF_
+	kfree((void*)(qp->inbound_q.rb_base));
+	kfree(dev->qpairs);
+	qp = NULL;
+#endif
+}
+
+/*
+ * micscif_cleanup_scifdev
+ *
+ * @dev: Remote SCIF device.
+ * Uninitialize SCIF data structures for remote SCIF device.
+ */
+void micscif_cleanup_scifdev(struct micscif_dev *dev, bool destroy_wq)
+{
+	int64_t ret;
+#ifndef _MIC_SCIF_
+	mic_ctx_t *mic_ctx;
+#endif
+	if (SCIFDEV_NOTPRESENT == dev->sd_state) {
+#ifdef _MIC_SCIF_
+		/*
+		 * If there are any stale qp allocated due to
+		 * p2p connection failures then cleanup now
+		 */
+		micscif_cleanup_qp(dev);
+#endif
+		return;
+	}
+
+	dev->sd_wait_status = OP_FAILED;
+	wake_up(&dev->sd_wq);
+
+#ifdef _MIC_SCIF_
+	/*
+	 * Need to protect destruction of the workqueue since this code
+	 * can be called from two contexts:
+	 * a) Remove Node Handling.
+	 * b) SCIF driver unload
+	 */
+	mutex_lock(&dev->sd_lock);
+	if ((SCIFDEV_RUNNING != dev->sd_state) && (SCIFDEV_SLEEPING != dev->sd_state))
+		goto unlock;
+	dev->sd_state = SCIFDEV_STOPPED;
+	wake_up(&dev->sd_p2p_wq);
+	mutex_unlock(&dev->sd_lock);
+	deregister_scif_intr_handler(dev);
+	if (destroy_wq && dev->sd_intr_wq) {
+		destroy_workqueue(dev->sd_intr_wq);
+		dev->sd_intr_wq = NULL;
+	}
+#endif
+
+	mutex_lock(&dev->sd_lock);
+#ifndef _MIC_SCIF_
+	if ((SCIFDEV_RUNNING != dev->sd_state) && (SCIFDEV_SLEEPING != dev->sd_state))
+		goto unlock;
+	dev->sd_state = SCIFDEV_STOPPED;
+#endif
+	/*
+	 * Change the state of the remote SCIF device
+	 * to idle as soon as the activity counter is
+	 * zero. The node state and ref count is
+	 * maintained within a single atomic_long_t.
+	 * No timeout for this tight loop since we expect
+	 * the node to complete the API it is currently
+	 * executing following which the scif_ref_count
+	 * will drop to zero.
+	 */
+	do {
+		ret = atomic_long_cmpxchg(
+			&dev->scif_ref_cnt, 0, SCIF_NODE_IDLE);
+		cpu_relax();
+	} while (ret && ret != SCIF_NODE_IDLE);
+
+	mutex_unlock(&dev->sd_lock);
+	/* Cleanup temporary registered windows */
+	flush_workqueue(ms_info.mi_misc_wq);
+	mutex_lock(&dev->sd_lock);
+
+#ifdef _MIC_SCIF_
+	drain_dma_global(mic_dma_handle);
+#else
+	mic_ctx = get_per_dev_ctx(dev->sd_node - 1);
+	drain_dma_global(mic_ctx->dma_handle);
+	micscif_destroy_p2p(mic_ctx);
+#endif
+	scif_invalidate_ep(dev->sd_node);
+	micscif_kill_apps_with_mmaps(dev->sd_node);
+
+	micscif_cleanup_qp(dev);
+	mutex_unlock(&dev->sd_lock);
+#ifndef _MIC_SCIF_
+	mutex_lock(&ms_info.mi_conflock);
+	ms_info.mi_mask &= ~(0x1 << dev->sd_node);
+	ms_info.mi_total--;
+	mutex_unlock(&ms_info.mi_conflock);
+#endif
+
+	/* Wait for all applications to unmap remote memory mappings. */
+	wait_event(dev->sd_mmap_wq, 
+		!micscif_rma_do_apps_have_mmaps(dev->sd_node));
+	micscif_cleanup_rma_for_zombies(dev->sd_node);
+	micscif_node_remove_callback(dev->sd_node);
+	return;
+unlock:
+	mutex_unlock(&dev->sd_lock);
+}
+
+/*
+ * micscif_remove_node:
+ *
+ * @mask: bitmask of nodes in the deactivation set.
+ * @flags: Type of deactivation set i.e. Power Management,
+ * RAS, Maintenance Mode etc.
+ * @block: Can block.
+ *
+ * Attempt to deactivate a set of remote SCIF devices nodes passed in mask.
+ * If the SCIF activity ref count is positive for a remote node then
+ * the approporiate bit in the input bitmask is reset and the resultant
+ * bitmask is returned.
+ */
+uint64_t micscif_handle_remove_node(uint64_t mask, uint64_t payload)
+{
+	int64_t ret;
+	int err = 0;
+	uint32_t i;
+	struct micscif_dev *dev;
+	uint64_t flags = 0;
+	flags = payload & 0x00000000FFFFFFFF;
+
+	switch(flags) {
+	case DISCONN_TYPE_POWER_MGMT:
+	{
+		uint8_t *nodemask_buf = NULL;
+		int size = payload >> 32;
+
+#ifndef _MIC_SCIF_
+		nodemask_buf = mic_data.dd_pm.nodemask;
+#else
+		nodemask_buf = scif_ioremap(mask, size,  &scif_dev[SCIF_HOST_NODE]);
+#endif
+		if (!nodemask_buf) {
+			err = EAGAIN;
+			break;
+		}
+
+		for (i = 0; i <= ms_info.mi_maxid; i++) {
+			dev = &scif_dev[i];
+			if (!get_nodemask_bit(nodemask_buf , i))
+				continue;
+			/*
+			 * Try for the SCIF device lock. Bail out if
+			 * it is already grabbed since some other
+			 * thread is already working on some other
+			 * node state transition for this remote SCIF device.
+			 */
+			if (mutex_trylock(&dev->sd_lock)) {
+
+				if (SCIFDEV_RUNNING != dev->sd_state) {
+					mutex_unlock(&dev->sd_lock);
+					continue;
+				}
+				/*
+				 * Change the state of the remote SCIF device
+				 * to idle only if the activity counter is
+				 * already zero. The node state and ref count
+				 * is maintained within a single atomic_long_t.
+				 */
+				ret = atomic_long_cmpxchg(
+						&dev->scif_ref_cnt, 0, SCIF_NODE_IDLE);
+
+				if (!ret || ret == SCIF_NODE_IDLE) {
+					if (!ret) {
+#ifdef _MIC_SCIF_
+						drain_dma_global(mic_dma_handle);
+#else
+						mic_ctx_t *mic_ctx = get_per_dev_ctx(dev->sd_node - 1);
+						drain_dma_global(mic_ctx->dma_handle);
+#endif
+					}
+					/*
+					 * Turn off the remote SCIF device.
+					 * Any communication to this SCIF
+					 * after this point will require a
+					 * wake up message to the host.
+					 */
+					dev->sd_state = SCIFDEV_SLEEPING;
+					err = 0;
+				}
+				else {
+					/*
+					 * Cannot put the remote SCIF device
+					 * to sleep.
+					 */
+					err = EAGAIN;
+					mutex_unlock(&dev->sd_lock);
+					break;
+				}
+				mutex_unlock(&dev->sd_lock);
+			} else {
+				err = EAGAIN;
+				break;
+			}
+		}
+
+#ifndef _MIC_SCIF_
+			scif_iounmap(nodemask_buf, size, &scif_dev[SCIF_HOST_NODE]);
+#endif
+
+		break;
+	}
+	case DISCONN_TYPE_LOST_NODE:
+	{
+		/* In the case of lost node, first paramater
+		 * is the node id and not a mask.
+		 */
+		dev = &scif_dev[mask];
+		micscif_cleanup_scifdev(dev, !DESTROY_WQ);
+		break;
+	}
+	default:
+	{
+		/* Unknown remove node flags */
+		BUG_ON(1);
+	}
+	}
+
+	return err;
+}
+
+/**
+ * set_nodemask_bit:
+ *
+ * @node_id[in]: node id to be set in the mask
+ *
+ * Set bit in the nodemask. each bit represents node. set bit to add node in to
+ * activation/de-activation set
+ */
+//void
+//set_nodemask_bit(uint64_t *nodemask, uint32_t node_id)
+void
+set_nodemask_bit(uint8_t* nodemask, uint32_t node_id, int val)
+{
+	int index = 0;
+	uint8_t *temp_mask;
+
+	index = (int) node_id / 8;
+	temp_mask = nodemask + index;
+	node_id = node_id - (index * 8);
+	if (val)
+		*temp_mask |= (1ULL << node_id);
+	else
+		*temp_mask &= ~(1ULL << node_id);
+}
+
+/**
+ * check_nodemask_bit:
+ *
+ * @node_id[in]: node id to be set in the mask
+ *
+ * Check if a bit in the nodemask corresponding to a
+ * node id is set.
+ *
+ * return 1 if the bit is set. 0 if the bit is cleared.
+ */
+int
+get_nodemask_bit(uint8_t* nodemask, uint32_t node_id) {
+	int index = 0;
+	uint8_t *temp_mask;
+
+	index = (int) node_id / 8;
+	temp_mask = nodemask + index;
+	node_id = node_id - (index * 8);
+	return *temp_mask & (1ULL << node_id);
+
+}
+/**
+* nodemask_isvalid - Check if a nodemask is valid after
+* calculating the de-activation set.
+*
+* @nodemask[in]: The nodemask to be checked.
+*
+* Returns true if valid.
+*/
+bool nodemask_isvalid(uint8_t* nodemask) {
+	uint32_t i;
+	for (i = 0; i <= ms_info.mi_maxid; i++) {
+		if (get_nodemask_bit(nodemask, i))
+			return true;
+	}
+
+	return false;
+}
+
+#ifndef _MIC_SCIF_
+/*
+ * micscif_send_rmnode_msg:
+ *
+ * @mask: Bitmask of nodes in the deactivation set.
+ * @node: Destination node for a deactivation set.
+ * @flags: Type of deactivation set i.e. Power Management,
+ * RAS, Maintenance Mode etc.
+ * @orig_node: The node which triggered this remove node message.
+ *
+ * Sends a deactivation request to the valid nodes not included in the
+ * deactivation set from the Host and waits for a response.
+ * Returns the response mask received from the node.
+ */
+uint64_t micscif_send_pm_rmnode_msg(int node, uint64_t nodemask_addr,
+		uint64_t nodemask_size, int orig_node) {
+
+	uint64_t ret;
+	struct nodemsg notif_msg;
+	struct micscif_dev *dev = &scif_dev[node];
+
+	/*
+	 * Send remove node msg only to running nodes.
+	 * An idle node need not know about another _lost_ node
+	 * until it wakes up. When it does, it will request the
+	 * host to wake up the _lost_ node to which the host will
+	 * respond with a NACK
+	 */
+
+	if (SCIFDEV_RUNNING != dev->sd_state)
+		return -ENODEV;
+
+	notif_msg.uop = SCIF_NODE_REMOVE;
+	notif_msg.src.node = ms_info.mi_nodeid;
+	notif_msg.dst.node = node;
+	notif_msg.payload[0] = nodemask_addr;
+	notif_msg.payload[1] = DISCONN_TYPE_POWER_MGMT;
+	notif_msg.payload[1] |= (nodemask_size << 32);
+	notif_msg.payload[2] = atomic_long_read(&ms_info.mi_unique_msgid);
+	notif_msg.payload[3] = orig_node;
+	/* Send the request to remove a set of nodes */
+	pr_debug("Send PM rmnode msg for node %d to node %d\n", orig_node, node);
+	ret = micscif_nodeqp_send(dev, &notif_msg, NULL);
+
+	return ret;
+}
+
+uint64_t micscif_send_lost_node_rmnode_msg(int node, int orig_node) {
+	uint64_t ret;
+	struct nodemsg notif_msg;
+	struct micscif_dev *dev = &scif_dev[node];
+
+	/*
+	 * Send remove node msg only to running nodes.
+	 * An idle node need not know about another _lost_ node
+	 * until it wakes up. When it does, it will request the
+	 * host to wake up the _lost_ node to which the host will
+	 * respond with a NACK
+	 */
+	if (SCIFDEV_RUNNING != dev->sd_state)
+		return -ENODEV;
+
+	micscif_inc_node_refcnt(dev, 1);
+	notif_msg.uop = SCIF_NODE_REMOVE;
+	notif_msg.src.node = ms_info.mi_nodeid;
+	notif_msg.dst.node = node;
+	notif_msg.payload[0] = orig_node;
+	notif_msg.payload[1] = DISCONN_TYPE_LOST_NODE;
+	notif_msg.payload[3] = orig_node;
+	/* Send the request to remove a set of nodes */
+	ret = micscif_nodeqp_send(dev, &notif_msg, NULL);
+	micscif_dec_node_refcnt(dev, 1);
+
+	return ret;
+}
+
+/*
+ * micpm_nodemask_uninit:
+ * @node - node to uninitalize
+ *
+ * Deallocate memory for per-card nodemask buffer
+*/
+void
+micpm_nodemask_uninit(mic_ctx_t* mic_ctx)
+{
+	if (mic_ctx && mic_ctx->micpm_ctx.nodemask.va) {
+		mic_ctx_unmap_single(mic_ctx, mic_ctx->micpm_ctx.nodemask.pa,
+			mic_ctx->micpm_ctx.nodemask.len);
+		kfree(mic_ctx->micpm_ctx.nodemask.va);
+	}
+}
+
+/*
+ * micpm_nodemask_init:
+ * @num_devs - no of scif nodes including the host
+ * @node - node to initialize
+ *
+ * Allocate memory for per-card nodemask buffer
+*/
+int
+micpm_nodemask_init(uint32_t num_devs, mic_ctx_t* mic_ctx)
+{
+	if (!mic_ctx)
+		return 0;
+
+	mic_ctx->micpm_ctx.nodemask.len =  ((int) (num_devs / 8) +
+				((num_devs % 8) ? 1 : 0));
+	mic_ctx->micpm_ctx.nodemask.va = (uint8_t *)
+				kzalloc(mic_ctx->micpm_ctx.nodemask.len, GFP_KERNEL);
+
+	if (!mic_ctx->micpm_ctx.nodemask.va) {
+		PM_DEBUG("Error allocating nodemask buffer\n");
+		return -ENOMEM;
+	}
+
+	mic_ctx->micpm_ctx.nodemask.pa  = mic_ctx_map_single(mic_ctx,
+			mic_ctx->micpm_ctx.nodemask.va,
+			mic_ctx->micpm_ctx.nodemask.len);
+
+	if(mic_map_error(mic_ctx->micpm_ctx.nodemask.pa)) {
+		PM_PRINT("Error Mapping nodemask buffer\n");
+		kfree(mic_ctx->micpm_ctx.nodemask.va);
+	}
+	return 0;
+}
+
+/**
+ * micpm_disconn_uninit:
+ * @num_devs - no of scif nodes including host
+ * Note - can not use ms_info.mi_total(total no of scif nodes) as it is updated after the driver load is complete
+ *
+ * Reset/re-initialize data structures needed for PM disconnection. This is necessary everytime the board is reset.
+ * Since host(node 0)represents one of the node in network, it is necessary to clear dependency of host with the given node
+ */
+int
+micpm_disconn_uninit(uint32_t num_devs)
+{
+	uint32_t i;
+	uint32_t status = 0;
+
+	/*
+	 * ms_info.mi_total is updated after the driver load is complete
+	 * switching back to static allocation of max nodes
+	 */
+
+	if (ms_info.mi_depmtrx) {
+
+		for (i = 0; i < (int)num_devs; i++) {
+			if (ms_info.mi_depmtrx[i]) {
+				kfree(ms_info.mi_depmtrx[i]);
+			}
+		}
+		kfree(ms_info.mi_depmtrx);
+	}
+
+	if (mic_data.dd_pm.nodemask)
+		kfree(mic_data.dd_pm.nodemask);
+
+	return status;
+}
+
+/**
+ * micpm_disconn_init:
+ * @num_devs - no of scif nodes including host
+ * Note - can not use ms_info.mi_total(total no of scif nodes) as it is updated after the driver load is complete
+ *
+ * Allocate memory for dependency graph. Initialize dependencies for the node.
+ * The memory allocated is based on the no of devices present during driver load.
+ */
+int
+micpm_disconn_init(uint32_t num_devs)
+{
+	uint32_t i;
+	uint32_t status = 0;
+	mic_ctx_t *mic_ctx;
+
+	if (ms_info.mi_depmtrx)
+		return status;
+
+	ms_info.mi_depmtrx = (uint32_t**)kzalloc(sizeof(uint32_t*) * num_devs, GFP_KERNEL);
+	if (!ms_info.mi_depmtrx) {
+		pr_debug("dependency graph initialization failed\n");
+		status = -ENOMEM;
+		goto exit;
+	}
+
+	for (i = 0; i < (int)num_devs; i++) {
+		ms_info.mi_depmtrx[i] = (uint32_t*)kzalloc(sizeof(uint32_t) * num_devs, GFP_KERNEL);
+		if (!ms_info.mi_depmtrx[i]) {
+			micpm_disconn_uninit(num_devs);
+			pr_debug("dependency graph initialization failed\n");
+			status = -ENOMEM;
+			goto exit;
+		}
+	}
+	init_waitqueue_head(&ms_info.mi_disconn_wq);
+	atomic_long_set(&ms_info.mi_unique_msgid, 0);
+
+	//In Windows, this code is executed during micpm_probe
+	for(i = 0; i < (num_devs - 1); i++) {
+		mic_ctx = get_per_dev_ctx(i);
+		status = micpm_nodemask_init(num_devs, mic_ctx);
+		if (status)
+			goto exit;
+	}
+
+	/* Set up a nodemask buffer for Host scif node in a common pm_ctx */
+	mic_data.dd_pm.nodemask_len =  ((int) (num_devs / 8) +
+				((num_devs % 8) ? 1 : 0));
+	mic_data.dd_pm.nodemask = (uint8_t *)
+				kzalloc(mic_data.dd_pm.nodemask_len, GFP_KERNEL);
+
+	if (!mic_data.dd_pm.nodemask) {
+		PM_DEBUG("Error allocating nodemask buffer\n");
+		status = -ENOMEM;
+		goto exit;
+	}
+
+exit:
+	return status;
+}
+
+/**
+ * micscif_set_nodedep:
+ *
+ * @src_node: node which is creating dependency.
+ * @dst_node: node on which dependency is being created
+ *
+ * sets the given value in dependency graph for src_node -> dst_node
+ */
+void
+micscif_set_nodedep(uint32_t src_node, uint32_t dst_node, enum dependency_state state)
+{
+	/* We dont need to lock dependency graph while updating
+	 * as every node will modify its own row
+	 */
+	if (ms_info.mi_depmtrx)
+		ms_info.mi_depmtrx[src_node][dst_node] = state;
+}
+
+/**
+ * micscif_get_nodedep:
+ *
+ * @src_node: node which has/has not created dependency.
+ * @dst_node: node on which dependency was/was not created
+ *
+ * gets the current value in dependency graph for src_node -> dst_node
+ */
+enum dependency_state
+micscif_get_nodedep(uint32_t src_node, uint32_t dst_node)
+{
+	enum dependency_state state = DEP_STATE_NOT_DEPENDENT;
+	if (ms_info.mi_depmtrx)
+		state = ms_info.mi_depmtrx[src_node][dst_node];
+	return state;
+}
+
+/**
+ * init_depgraph_stack:
+ *
+ * @stack_ptr: list head.
+ *
+ * Initialize linked list to be used as stack
+ */
+int
+init_depgraph_stack(struct list_head *stack_ptr)
+{
+	int status = 0;
+
+	if (!stack_ptr) {
+		pr_debug("%s argument stack_ptr is invalid\n", __func__);
+		status = -EINVAL;
+		goto exit;
+	}
+	/* Initialize stack */
+	INIT_LIST_HEAD(stack_ptr);
+
+exit:
+	return status;
+}
+
+/**
+ * uninit_depgraph_stack:
+ *
+ * @stack_ptr: list head for linked list(stack).
+ *
+ * Empty stack(linked list). Pop all the nodes left in the stack.
+ */
+int
+uninit_depgraph_stack(struct list_head *stack_ptr)
+{
+	int status = 0;
+	uint32_t node_id;
+	if (!stack_ptr) {
+		pr_debug("%s argument stack_ptr is invalid\n", __func__);
+		status = -EINVAL;
+		goto exit;
+	}
+
+	/* pop all the nodes left in the stack */
+	while (!is_stack_empty(stack_ptr)) {
+		status = stack_pop_node(stack_ptr, &node_id);
+		if (status) {
+			pr_debug("%s error while cleaning up depgraph stack\n", __func__);
+			status = -EINVAL;
+			goto exit;
+		}
+	}
+
+exit:
+	return status;
+}
+
+/**
+ * is_stack_empty:
+ *
+ * @stack_ptr: list head for linked list(stack).
+ *
+ * returns true if the stack is empty.
+ */
+int
+is_stack_empty(struct list_head *stack_ptr)
+{
+	if(list_empty(stack_ptr)) {
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * stack_push_node:
+ *
+ * @stack_ptr[in]: list head for linked list(stack).
+ * @node_id[in]: node id to be pushed
+ *
+ * Push node in to the stack i.e. create node and add it at the start of linked list
+ */
+int
+stack_push_node(struct list_head *stack_ptr, uint32_t node_id)
+{
+	int status = 0;
+	struct stack_node *datanode = NULL;
+
+	datanode = kmalloc(sizeof(struct stack_node), GFP_KERNEL);
+	if (!datanode) {
+		pr_debug("%s error allocating memory to stack node.\n", __func__);
+		status = -ENOMEM;
+		goto exit;
+	}
+
+	datanode->node_id = node_id;
+	list_add(&datanode->next, stack_ptr);
+exit:
+	return status;
+}
+
+/**
+ * stack_pop_node:
+ *
+ * @stack_ptr[in]: list head for linked list(stack).
+ * @node_id[out]: pointer to the node id to be popped
+ *
+ * Pop node from the stack i.e. delete first entry of linked list and return its data.
+ */
+int
+stack_pop_node(struct list_head *stack_ptr, uint32_t *node_id)
+{
+	int status = 0;
+	struct stack_node *datanode = NULL;
+
+	if(is_stack_empty(stack_ptr)) {
+		pr_debug("%s stack found empty when tried to pop\n", __func__);
+		status = -EFAULT;
+		goto exit;
+	}
+
+	datanode = list_first_entry(stack_ptr, struct stack_node, next);
+	if (!datanode) {
+		pr_debug("%s Unable to pop from stack\n", __func__);
+		status = -EFAULT;
+		goto exit;
+	}
+	*node_id = datanode->node_id;
+
+	list_del(&datanode->next);
+	if (datanode) {
+		kfree(datanode);
+	}
+
+exit:
+	return status;
+}
+
+/**
+ * micscif_get_activeset:
+ *
+ * @node_id[in]: source node id.
+ * @nodemask[out]: bitmask of nodes present in activation set
+ *
+ * Algorithm to find out activation set for the given source node. Activation set is used to re-connect node into
+ * the scif network.
+ */
+int
+micscif_get_activeset(uint32_t node_id, uint8_t *nodemask)
+{
+	int status = 0;
+	uint32_t i = 0;
+	struct list_head	stack;
+	uint8_t visited[128] = {0}; // 128 is max number of nodes.
+	uint32_t num_nodes = ms_info.mi_maxid + 1;
+	mic_ctx_t *mic_ctx;
+
+	if (!ms_info.mi_depmtrx) {
+		status = -EINVAL;
+		goto exit;
+	}
+
+	status = init_depgraph_stack(&stack);
+	if (status) {
+		pr_debug("%s failed to initilize depgraph stack\n", __func__);
+		goto exit;
+	}
+
+	status = stack_push_node(&stack, node_id);
+	if (status) {
+		pr_debug("%s error while running activation set algorithm\n", __func__);
+		goto exit;
+	}
+
+	/* mark node visited to avoid repetition of the algorithm for the same node */
+	visited[node_id] = 1;
+
+	while (!is_stack_empty(&stack)) {
+		status = stack_pop_node(&stack, &node_id);
+		if (status) {
+			pr_debug("%s error while running activation set algorithm\n", __func__);
+			goto exit;
+		}
+
+		/* include node_id in the activation set*/
+		set_nodemask_bit(nodemask, node_id, 1);
+
+		for (i = 0; i < num_nodes; i++) {
+			/* check if node has dependency on any node 'i' which is also disconnected at this time*/
+			if ((!visited[i]) && (ms_info.mi_depmtrx[node_id][i] == DEP_STATE_DISCONNECTED)) {
+				visited[i] = 1;
+				if (i == 0)
+					continue;
+				mic_ctx = get_per_dev_ctx(i - 1);
+				if ((mic_ctx->micpm_ctx.idle_state == PM_IDLE_STATE_PC3) ||
+				    (mic_ctx->micpm_ctx.idle_state == PM_IDLE_STATE_PC6)) {
+					status = stack_push_node(&stack, i);
+					if (status) {
+						pr_debug("%s error while running activation set algorithm\n", __func__);
+						goto exit;
+					}
+				}
+			}
+		}
+	} /* end of while (!is_stack_empty(&stack)) */
+exit:
+	uninit_depgraph_stack(&stack);
+	return status;
+}
+
+/**
+ * micscif_get_minimal_deactiveset:
+ *
+ * @node_id[in]: source node id.
+ * @nodemask[out]: bitmask of nodes present in de-activation set
+ * @visited[in/out]: information of which nodes are already visited in de-activation set algorithm
+ *
+ * Algorithm to find out minimum/must de-activation set for the given source node. This method is part of and used by
+ * micscif_get_deactiveset.
+ */
+int micscif_get_minimal_deactiveset(uint32_t node_id, uint8_t *nodemask, uint8_t *visited)
+{
+	int status = 0;
+	uint32_t i = 0;
+	struct list_head stack;
+	uint32_t num_nodes = ms_info.mi_maxid + 1;
+
+	if (!ms_info.mi_depmtrx) {
+		status = -EINVAL;
+		goto exit;
+	}
+
+	status = init_depgraph_stack(&stack);
+	if (!visited) {
+		pr_debug("%s invalid parameter visited", __func__);
+		status = -EINVAL;
+		goto exit_pop;
+	}
+
+	if (status) {
+		pr_debug("%s failed to initilize depgraph stack\n", __func__);
+		goto exit_pop;
+	}
+
+	status = stack_push_node(&stack, node_id);
+	if (status) {
+		pr_debug("%s error while running de-activation set algorithm\n", __func__);
+		goto exit_pop;
+	}
+
+	/* mark node visited to avoid repetition of the algorithm for the same node */
+	visited[node_id] = 1;
+
+	while (!is_stack_empty(&stack)) {
+
+		status = stack_pop_node(&stack, &node_id);
+		if (status) {
+			pr_debug("%s error while running de-activation set algorithm\n", __func__);
+			goto exit_pop;
+		}
+
+		/* include node_id in the activation set*/
+		set_nodemask_bit(nodemask, node_id, 1);
+
+		for (i = 0; i < num_nodes; i++) {
+			if (!visited[i]) {
+				if (ms_info.mi_depmtrx[i][node_id] == DEP_STATE_DEPENDENT) {
+					/* The algorithm terminates, if we find any dependent node active */
+					status = -EOPNOTSUPP;
+					goto exit_pop;
+				} else if(ms_info.mi_depmtrx[i][node_id] == DEP_STATE_DISCONNECT_READY) {
+					/* node is dependent but ready to get disconnected */
+					visited[i] = 1;
+					status = stack_push_node(&stack, i);
+					if (status) {
+						pr_debug("%s error while running de-activation set algorithm\n", __func__);
+						goto exit_pop;
+					}
+				}
+			}
+		}
+	}/*end of while(!is_stack_empty(&stack))*/
+
+exit_pop:
+	while (!is_stack_empty(&stack)) {
+		status = stack_pop_node(&stack, &node_id);
+		if (status) {
+			pr_debug("%s error while running activation set algorithm\n", __func__);
+			break;
+		}
+		if (visited)
+			visited[node_id] = 0;
+	}
+exit:
+	return status;
+}
+
+/**
+ * micscif_get_deactiveset:
+ *
+ * @node_id[in]: source node id.
+ * @nodemask[out]: bitmask of nodes present in de-activation set
+ * @max_disconn: flag to restrict de-activation set algoritthm to minimum/must set.
+ *                True value indicates maximum de-activation set
+ *
+ * Algorithm to find out de-activation set for the given source node. De-activation set is used to disconnect node into
+ * the scif network. The algorithm can find out maximum possible de-activation set(required in situations like
+ * power management)if the max_possible flag is set.
+ */
+int
+micscif_get_deactiveset(uint32_t node_id, uint8_t *nodemask, int max_disconn)
+{
+	int status = 0;
+	uint32_t i = 0;
+	struct list_head 	stack;
+	uint8_t *visited = NULL;
+	uint8_t cont_next_step = 0;
+	uint32_t num_nodes = ms_info.mi_maxid + 1;
+	mic_ctx_t *mic_ctx;
+
+	if (!ms_info.mi_depmtrx) {
+		status = -EINVAL;
+		goto exit;
+	}
+
+	status = init_depgraph_stack(&stack);
+	if (status) {
+		pr_debug("%s failed to initilize depgraph stack\n", __func__);
+		goto exit;
+	}
+
+	visited = kzalloc(sizeof(uint8_t) * num_nodes, GFP_KERNEL);
+	if (!visited) {
+		pr_debug("%s failed to allocated memory for visited array", __func__);
+		status = -ENOMEM;
+		goto exit;
+	}
+
+	status = stack_push_node(&stack, node_id);
+	if (status) {
+		pr_debug("%s error while running de-activation set algorithm\n", __func__);
+		goto exit;
+	}
+
+	while (!is_stack_empty(&stack)) {
+
+		status = stack_pop_node(&stack, &node_id);
+		if (status) {
+			pr_debug("%s error while running de-activation set algorithm\n", __func__);
+			goto exit;
+		}
+
+		/* check if we want to find out maximum possible de-activation set */
+		if (max_disconn) {
+			cont_next_step = 1;
+		}
+
+		if (!visited[node_id]) {
+			status = micscif_get_minimal_deactiveset(node_id, nodemask, visited);
+			if (status) {
+				if (status == -EOPNOTSUPP) {
+					pr_debug("%s No deactivation set found for node %d", __func__, node_id);
+					cont_next_step = 0;
+				}
+				else {
+					pr_debug("%s Failed to calculate deactivation set", __func__);
+					goto exit;
+				}
+			}
+
+		} /* end for  if (!visited[node_id]) */
+
+		if (cont_next_step) {
+			for (i = 0; i < num_nodes; i++) {
+				/* check if we can put more nodes 'i' in de-activation set if this node(dependent node)
+				 * is de-activating
+				 */
+				if ((!visited[i]) &&
+				    (ms_info.mi_depmtrx[node_id][i] == DEP_STATE_DISCONNECT_READY)) {
+					if (i == 0)
+						continue;
+					mic_ctx = get_per_dev_ctx(i - 1);
+					if (mic_ctx->micpm_ctx.idle_state ==
+						PM_IDLE_STATE_PC3_READY) {
+						/* This node might be able to get into deactivation set */
+						status = stack_push_node(&stack, i);
+						if (status) {
+							pr_debug("%s error while running de-activation set algorithm\n", __func__);
+							goto exit;
+						}
+					}
+				}
+			}
+		}
+	} /* end for while (!is_stack_empty(&stack)) */
+
+	if (!nodemask_isvalid(nodemask)) {
+		pr_debug("%s No deactivation set found for node %d", 
+		__func__, node_id);
+		status = -EOPNOTSUPP;
+	}
+exit:
+	if (visited) {
+		kfree(visited);
+	}
+	uninit_depgraph_stack(&stack);
+	return status;
+}
+
+/* micscif_update_p2p_state:
+ *
+ * Update the p2p_disc_state of peer node peer_id in the p2p list of node node_id.
+ *
+ * @node_id: The node id whose p2p list needs to be updated.
+ * @peer_id: The node id in the p2p list of the node_id that will get updated.
+ * @scif_state: The state to be updated to.
+ *
+ */
+void micscif_update_p2p_state(uint32_t node_id, uint32_t peer_id, enum scif_state state) {
+
+	struct micscif_dev *dev;
+	struct list_head *pos, *tmp;
+	struct scif_p2p_info *p2p;
+
+	dev = &scif_dev[node_id];
+	if (!list_empty(&dev->sd_p2p)) {
+		list_for_each_safe(pos, tmp, &dev->sd_p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, 
+			ppi_list);
+			if(p2p->ppi_peer_id == peer_id) {
+				p2p->ppi_disc_state = state;
+				break;
+			}
+		}
+	}
+}
+
+/* micscif_p2p_node_exists: Check if a node exists in the
+ * list of nodes that have been sent an rmnode message.
+ *
+ * node_list: The list that contains the nodes that has been
+ * sent the rmnode message for this transaction.
+ * node_id: the node to be searched for.
+ *
+ * returns: true of the node exists.False otherwise
+ */
+bool micscif_rmnode_msg_sent(struct list_head *node_list, uint32_t node_id) {
+
+	struct list_head *pos1, *tmp1;
+	struct stack_node *added_node;
+
+	if (!list_empty(node_list)) {
+		list_for_each_safe(pos1, tmp1, node_list) {
+			added_node = list_entry(pos1, struct stack_node, next);
+			if(added_node->node_id == node_id)
+				return true;
+		}
+	}
+	return false;
+}
+
+/**
+ * micscif_execute_disconnecte: Perform PM disconnection of a node
+ * with its neighboring nodes.
+ *
+ * node_id: The node to be disconnected.
+ * nodemask: Mask containing the list of nodes (including node_id)
+ * to be disconnected.
+ * node_list: List of nodes that received the disconnection message.
+ */
+int micscif_execute_disconnect(uint32_t node_id,
+			uint8_t *nodemask,
+			struct list_head *node_list)
+{
+	uint32_t status = 0;
+	int ret;
+	uint64_t msg_cnt = 0;
+	uint32_t i = 0;
+	int pending_wakeups = 0;
+	mic_ctx_t *send_rmnode_ctx;
+	uint32_t node;
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(node_id - 1);
+	struct scif_p2p_info *p2p;
+	struct list_head *pos, *tmp;
+	struct micscif_dev *dev;
+
+
+	/* Always send rmnode msg to SCIF_HOST_NODE */
+	memcpy(mic_data.dd_pm.nodemask, nodemask, 
+			mic_data.dd_pm.nodemask_len);
+	ret = (int) micscif_send_pm_rmnode_msg(SCIF_HOST_NODE, 0, mic_data.dd_pm.nodemask_len,
+			node_id);
+	/* Add this node to msg list. */
+	if(!ret) {
+		msg_cnt++;
+		stack_push_node(node_list, SCIF_HOST_NODE);
+	}
+
+	if((ret == 0)||(ret == -ENODEV)) {
+		status = 0;
+	}
+
+	/* For each node in the nodemask, traverse its p2p list
+	 * and send rmnode_msg to those nodes 1) That are not also
+	 * in the node mask and 2) That have not been already sent
+	 * rmnode messages in this transaction and 3) That have
+	 * their disconnection state as RUNNING.
+	 */
+	for (i = 0; i <= ms_info.mi_maxid; i++) {
+		/* verify if the node is present in deactivation set */
+		if (!get_nodemask_bit(nodemask, i))
+			continue;
+
+		/* Get to the p2p list of this node */
+		dev = &scif_dev[i];
+		list_for_each_safe(pos, tmp, &dev->sd_p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, 
+			ppi_list);
+
+			if (get_nodemask_bit(nodemask, p2p->ppi_peer_id))
+				continue;
+			if (p2p->ppi_disc_state == SCIFDEV_SLEEPING)
+				continue;
+
+			if(micscif_rmnode_msg_sent(node_list, p2p->ppi_peer_id))
+				continue;
+			send_rmnode_ctx = get_per_dev_ctx(p2p->ppi_peer_id - 1);
+			if (!send_rmnode_ctx->micpm_ctx.nodemask.va) {
+				status = -EINVAL;
+				goto list_cleanup;
+			}
+
+			memcpy(send_rmnode_ctx->micpm_ctx.nodemask.va, nodemask, 
+				send_rmnode_ctx->micpm_ctx.nodemask.len);
+			ret = (int) micscif_send_pm_rmnode_msg(p2p->ppi_peer_id,
+				send_rmnode_ctx->micpm_ctx.nodemask.pa,
+				send_rmnode_ctx->micpm_ctx.nodemask.len,node_id);
+
+			/* Add this node to msg list. */
+			if(!ret) {
+				msg_cnt++;
+				stack_push_node(node_list, p2p->ppi_peer_id);
+			}
+
+			if((ret == 0)||(ret == -ENODEV)) {
+				status = 0;
+			}
+		}
+	}
+
+	ret = wait_event_timeout(ms_info.mi_disconn_wq, 
+		(atomic_read(&mic_ctx->disconn_rescnt) == msg_cnt) ||
+		(pending_wakeups = atomic_read(&mic_data.dd_pm.wakeup_in_progress)), 
+		NODE_ALIVE_TIMEOUT);
+	if ((!ret) || (atomic_read(&mic_ctx->disconn_rescnt) != msg_cnt)
+		|| (ms_info.mi_disconnect_status == OP_FAILED)) {
+		pr_debug("SCIF disconnect failed.  "
+			"remove_node messages sent: = %llu "
+			"remove_node acks received: %d "
+			"Pending wakeups: %d ret = %d\n", msg_cnt, 
+			atomic_read(&mic_ctx->disconn_rescnt), 
+			pending_wakeups, ret);
+
+		status = -EAGAIN;
+		goto list_cleanup;
+	}
+	return status;
+
+list_cleanup:
+	while (!is_stack_empty(node_list))
+		stack_pop_node(node_list, &node);
+	return status;
+}
+
+/**
+ * micscif_node_disconnect:
+ *
+ * @node_id[in]: source node id.
+ * @nodemask[out]: bitmask of nodes that have to be disconnected together.
+ *                 it represents node_id
+ * @disconn_type[in]: flag to identify disconnection type. (for example - power mgmt, lost node, maintenance mode etc)
+ *
+ * Method responsible for disconnecting node from the scif network. considers dependencies with other node.
+ * finds out deactivation set. Sends node queue pair messages to all the scif nodes outside deactivation set
+ * returns error if node can not be disconnected from the network.
+ */
+int micscif_disconnect_node(uint32_t node_id, uint8_t *nodemask, enum disconn_type type)
+{
+	uint32_t status = 0;
+	int ret;
+	uint64_t msg_cnt = 0;
+	uint32_t i = 0;
+	mic_ctx_t *mic_ctx = 0;
+	struct list_head node_list;
+	uint32_t node;
+
+	if (!node_id)
+		return -EINVAL;
+
+	mic_ctx = get_per_dev_ctx(node_id - 1);
+
+	if (!mic_ctx)
+		return -EINVAL;
+
+	switch(type) {
+	case DISCONN_TYPE_POWER_MGMT:
+	{
+		if (!nodemask)
+			return -EINVAL;
+
+		atomic_long_add(1, &ms_info.mi_unique_msgid);
+		atomic_set(&mic_ctx->disconn_rescnt, 0);
+		ms_info.mi_disconnect_status = OP_IN_PROGRESS;
+		INIT_LIST_HEAD(&node_list);
+
+		status = micscif_execute_disconnect(node_id,
+				nodemask, &node_list);
+		if (status)
+			return status;
+
+		/* Reset unique msg_id */
+		atomic_long_set(&ms_info.mi_unique_msgid, 0);
+
+		while (!is_stack_empty(&node_list)) {
+			status = stack_pop_node(&node_list, &node);
+			if (status)
+				break;
+
+			for (i = 0; i <= ms_info.mi_maxid; i++) {
+				if (!get_nodemask_bit(nodemask, i))
+					continue;
+				micscif_update_p2p_state(i, node, SCIFDEV_SLEEPING);
+			}
+		}
+		break;
+	}
+	case DISCONN_TYPE_LOST_NODE:
+	{
+		atomic_long_add(1, &ms_info.mi_unique_msgid);
+		atomic_set(&mic_ctx->disconn_rescnt, 0);
+
+		for (i = 0; ((i <= ms_info.mi_maxid) && (i != node_id)); i++) {
+			ret = (int)micscif_send_lost_node_rmnode_msg(i, node_id);
+			if(!ret)
+				msg_cnt++;
+			if((ret == 0)||(ret == -ENODEV)) {
+				status = 0;
+			}
+		}
+
+		ret = wait_event_timeout(ms_info.mi_disconn_wq, 
+			(atomic_read(&mic_ctx->disconn_rescnt) == msg_cnt), 
+			NODE_ALIVE_TIMEOUT);
+		break;
+	}
+	default:
+		status = -EINVAL;
+	}
+
+	return status;
+}
+
+/**
+ * micscif_node_connect:
+ *
+ * @node_id[in]: node to wakeup.
+ * @bool get_ref[in]: Also get node reference after wakeup by incrementing the PM reference count
+ *
+ * Method responsible for connecting node into the scif network. considers dependencies with other node.
+ * finds out activation set. connects all the depenendent nodes in the activation set
+ * returns error if node can not be connected from the network.
+ */
+int
+micscif_connect_node(uint32_t node_id, bool get_ref)
+{
+	return do_idlestate_exit(get_per_dev_ctx(node_id - 1), get_ref);
+}
+
+uint64_t micscif_send_node_alive(int node)
+{
+	struct nodemsg alive_msg;
+	struct micscif_dev *dev = &scif_dev[node];
+	int err;
+
+	alive_msg.uop = SCIF_NODE_ALIVE;
+	alive_msg.src.node = ms_info.mi_nodeid;
+	alive_msg.dst.node = node;
+	pr_debug("node alive msg sent to node %d\n", node);
+	micscif_inc_node_refcnt(dev, 1);
+	err = micscif_nodeqp_send(dev, &alive_msg, NULL);
+	micscif_dec_node_refcnt(dev, 1);
+	return err;
+}
+
+int micscif_handle_lostnode(uint32_t node_id)
+{
+	mic_ctx_t *mic_ctx;
+	uint32_t status = -EOPNOTSUPP;
+#ifdef MM_HANDLER_ENABLE
+	uint8_t *mmio_va;
+	sbox_scratch1_reg_t scratch1reg = {0};
+#endif
+
+	printk("%s %d node %d\n", __func__, __LINE__, node_id);
+	mic_ctx = get_per_dev_ctx(node_id - 1);
+
+	if (mic_ctx->state != MIC_ONLINE && mic_ctx->state != MIC_SHUTDOWN)
+		return 0;
+
+	if (mic_crash_dump_enabled) {
+		if (!(status = vmcore_create(mic_ctx)))
+			printk("%s %d node %d ready for crash dump!\n", 
+				__func__, __LINE__, node_id);
+		else
+			printk(KERN_ERR "%s %d node %d crash dump failed status %d\n", 
+				__func__, __LINE__, node_id, status);
+	}
+
+	mic_ctx->crash_count++;
+	mutex_lock(&mic_ctx->state_lock);
+	if (mic_ctx->state == MIC_ONLINE ||
+		mic_ctx->state == MIC_SHUTDOWN)
+		mic_setstate(mic_ctx, MIC_LOST);
+	mutex_unlock(&mic_ctx->state_lock);
+
+	/* mpssd will handle core dump and reset/auto reboot */
+	if (mic_crash_dump_enabled && !status)
+		return status;
+
+	printk("%s %d stopping node %d to recover lost node!\n", 
+		__func__, __LINE__, node_id);
+	status = adapter_stop_device(mic_ctx, 1, !RESET_REATTEMPT);
+	wait_for_reset(mic_ctx);
+
+	if (!ms_info.mi_watchdog_auto_reboot) {
+		printk("%s %d cannot boot node %d to recover lost node since auto_reboot is off\n", 
+		__func__, __LINE__, node_id);
+		return status;
+	}
+
+/* Disabling MM handler invocation till it is ready to handle errors
+ * till then we just reboot the card
+ */
+#ifdef MM_HANDLER_ENABLE
+	mmio_va = mic_ctx->mmio.va;
+	scratch1reg.bits.status = FLASH_CMD_INVALID;
+
+	if(mic_ctx->bi_family == FAMILY_ABR) {
+		printk("Node %d lost. Cannot recover in KNF\n", node_id);
+		status = adapter_start_device(mic_ctx);
+		return status;
+	}
+
+	printk("Booting maintenance mode handler\n");
+	status =  set_card_usage_mode(mic_ctx, USAGE_MODE_MAINTENANCE, NULL, 0);
+	if(status) {
+		printk("Unable to boot maintenance mode\n");
+		return status;
+	}
+
+	status = send_flash_cmd(mic_ctx, RAS_CMD, NULL, 0);
+	if(status) {
+		printk("Unable to recover node\n");
+		return status;
+	}
+	while(scratch1reg.bits.status != FLASH_CMD_COMPLETED) {
+		ret = SBOX_READ(mmio_va, SBOX_SCRATCH1);
+		scratch1reg.value = ret;
+		msleep(1);
+		i++;
+		printk("Looping for status (time = %d ms)\n", i);
+		if(i > NODE_ALIVE_TIMEOUT) {
+			status = -ETIME;
+			printk("Unable to recover node. Status bit is : %d\n", 
+					scratch1reg.bits.status);
+			return status;
+		}
+
+	}
+#endif
+	printk("%s %d booting node %d to recover lost node!\n", 
+		__func__, __LINE__, node_id);
+	status = adapter_start_device(mic_ctx);
+	return status;
+}
+
+void micscif_watchdog_handler(struct work_struct *work)
+{
+	struct micscif_dev *dev =
+		container_of(to_delayed_work(work), 
+				struct micscif_dev, sd_watchdog_work);
+	struct _mic_ctx_t *mic_ctx;
+	int i = dev->sd_node, err, ret;
+
+	mic_ctx = get_per_dev_ctx(i - 1);
+
+	switch (mic_ctx->sdbic1) {
+	case SYSTEM_HALT:
+	case SYSTEM_POWER_OFF:
+	{
+		adapter_stop_device(mic_ctx, 1, !RESET_REATTEMPT);
+		wait_for_reset(mic_ctx);
+		mic_ctx->sdbic1 = 0;
+		break;
+	}
+	case SYSTEM_RESTART:
+	{
+		mic_setstate(mic_ctx, MIC_LOST);
+		mic_ctx->sdbic1 = 0;
+		break;
+	}
+	case SYSTEM_BOOTING:
+	case SYSTEM_RUNNING:
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0))
+	case SYSTEM_SUSPEND_DISK:
+#endif
+		break;
+	case 0xdead:
+		if (mic_crash_dump_enabled)
+			micscif_handle_lostnode(i);
+		mic_ctx->sdbic1 = 0;
+		break;
+	default:
+		break;
+	}
+
+	switch (mic_ctx->state) {
+	case MIC_ONLINE:
+		break;
+	case MIC_BOOT:
+		goto restart_timer;
+	case MIC_SHUTDOWN:
+	case MIC_LOST:
+	case MIC_READY:
+	case MIC_NORESPONSE:
+	case MIC_BOOTFAIL:
+	case MIC_RESET:
+	case MIC_RESETFAIL:
+	case MIC_INVALID:
+		return;
+	}
+
+	if (!ms_info.mi_watchdog_enabled)
+		return;
+
+	err = micpm_get_reference(mic_ctx, false);
+	if (err == -EAGAIN) {
+		goto restart_timer;
+	} else if (err == -ENODEV) {
+		micscif_handle_lostnode(i);
+		goto restart_timer;
+	}
+
+	if (1 != atomic_cmpxchg(&dev->sd_node_alive, 1, 0)) {
+
+		err = (int)(micscif_send_node_alive(i));
+
+		if (err) {
+			micpm_put_reference(mic_ctx);
+			goto restart_timer;
+		}
+
+		ret = wait_event_timeout(dev->sd_watchdog_wq, 
+				(atomic_cmpxchg(&dev->sd_node_alive, 1, 0) == 1), 
+				NODE_ALIVE_TIMEOUT);
+		if (!ret || err)
+			micscif_handle_lostnode(i);
+	}
+	micpm_put_reference(mic_ctx);
+
+restart_timer:
+	if (dev->sd_ln_wq)
+		queue_delayed_work(dev->sd_ln_wq, 
+			&dev->sd_watchdog_work, NODE_ALIVE_TIMEOUT);
+}
+#else
+
+long micscif_suspend(uint8_t* nodemask) {
+	long ret = 0;
+	int i;
+	struct micscif_dev *dev;
+
+	for (i = 0; i <= ms_info.mi_maxid; i++) {
+		if (get_nodemask_bit(nodemask , i)) {
+			dev = &scif_dev[i];
+			if (SCIFDEV_RUNNING != dev->sd_state)
+				continue;
+
+			ret = atomic_long_cmpxchg(
+				&dev->scif_ref_cnt, 0, SCIF_NODE_IDLE);
+			if (!ret || ret == SCIF_NODE_IDLE) {
+				dev->sd_state = SCIFDEV_SLEEPING;
+				ret = 0;
+			}
+			else {
+				set_nodemask_bit(nodemask, i, 0);
+				ret = EAGAIN;
+			}
+		}
+	}
+	return ret;
+}
+/*
+ * scif_suspend_handler - SCIF tasks before transition to low power state.
+ */
+int micscif_suspend_handler(struct notifier_block *this,
+		unsigned long event, void *ptr)
+{
+	int ret = 0;
+#ifdef SCIF_ENABLE_PM
+	int node = 0;
+	int size;
+	uint8_t *nodemask_buf;
+
+	size = ((int) ((ms_info.mi_maxid + 1) / 8) +
+			(((ms_info.mi_maxid + 1) % 8) ? 1 : 0));
+	nodemask_buf = (uint8_t*)kzalloc(size, GFP_ATOMIC);
+	if(!nodemask_buf)
+		return -ENOMEM;
+
+	for (node = 0; node <= ms_info.mi_maxid; node++) {
+		if ((node != SCIF_HOST_NODE) && (node != ms_info.mi_nodeid))
+			set_nodemask_bit(nodemask_buf, node, 1);
+	}
+
+	if (micscif_suspend(nodemask_buf)){
+		ret = -EBUSY;
+		goto clean_up;
+	}
+
+	dma_suspend(mic_dma_handle);
+clean_up:
+	kfree(nodemask_buf);
+#endif
+	return ret;
+}
+
+/*
+ * micscif_resume_handler - SCIF tasks after wake up from low power state.
+ */
+int micscif_resume_handler(struct notifier_block *this,
+		unsigned long event, void *ptr)
+{
+#ifdef SCIF_ENABLE_PM
+#ifdef _MIC_SCIF_
+	queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
+#endif
+	dma_resume(mic_dma_handle);
+#endif
+	return 0;
+}
+
+/*
+ * scif_fail_suspend_handler - SCIF tasks if a previous scif_suspend call has
+ * failed since a low power state transition could not be completed.
+ */
+int micscif_fail_suspend_handler(struct notifier_block *this,
+		unsigned long event, void *ptr)
+{
+/* Stub out function since it is an optimization that isn't working properly */
+#if 0
+#ifdef SCIF_ENABLE_PM
+	int node = 0;
+	long ret;
+	struct micscif_dev *dev;
+
+	for (node = 0; node <= ms_info.mi_maxid; node++) {
+		dev = &scif_dev[node];
+		ret = atomic_long_cmpxchg(&dev->scif_ref_cnt, SCIF_NODE_IDLE, 0);
+		if (ret != SCIF_NODE_IDLE)
+			continue;
+		if (SCIFDEV_SLEEPING == dev->sd_state)
+			dev->sd_state = SCIFDEV_RUNNING;
+	}
+#endif
+#endif
+	return 0;
+}
+
+void micscif_get_node_info(void)
+{
+	struct nodemsg msg;
+	struct get_node_info node_info;
+
+	init_waitqueue_head(&node_info.wq);
+	node_info.state = OP_IN_PROGRESS;
+	micscif_inc_node_refcnt(&scif_dev[SCIF_HOST_NODE], 1);
+	msg.uop = SCIF_GET_NODE_INFO;
+	msg.src.node = ms_info.mi_nodeid;
+	msg.dst.node = SCIF_HOST_NODE;
+	msg.payload[3] = (uint64_t)&node_info;
+
+	if ((micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], &msg, NULL)))
+		goto done;
+
+	wait_event(node_info.wq, node_info.state != OP_IN_PROGRESS);
+done:
+	micscif_dec_node_refcnt(&scif_dev[SCIF_HOST_NODE], 1);
+	/* Synchronize with the thread waking us up */
+	mutex_lock(&ms_info.mi_conflock);
+	mutex_unlock(&ms_info.mi_conflock);
+	;
+}
+#endif /* _MIC_SCIF_ */
diff --git a/micscif/micscif_nodeqp.c b/micscif/micscif_nodeqp.c
new file mode 100644
index 0000000..7dc5350
--- /dev/null
+++ b/micscif/micscif_nodeqp.c
@@ -0,0 +1,2902 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic/micscif.h"
+#include "mic/micscif_smpt.h"
+#include "mic/micscif_nodeqp.h"
+#include "mic/micscif_intr.h"
+#include "mic/micscif_nm.h"
+#include "mic_common.h"
+#include "mic/micscif_map.h"
+
+#define SBOX_MMIO_LENGTH	0x10000
+/* FIXME: HW spefic, define someplace else */
+/* SBOX Offset in MMIO space */
+#define SBOX_OFFSET			0x10000
+
+#ifdef ENABLE_TEST
+static void micscif_qp_testboth(struct micscif_dev *scifdev);
+#endif
+
+bool mic_p2p_enable = 1;
+bool mic_p2p_proxy_enable = 1;
+
+void micscif_teardown_ep(void *endpt)
+{
+	struct endpt *ep = (struct endpt *)endpt;
+	struct micscif_qp *qp = ep->qp_info.qp;
+	if (qp) {
+		if (qp->outbound_q.rb_base)
+			scif_iounmap((void *)qp->outbound_q.rb_base,
+				qp->outbound_q.size, ep->remote_dev);
+		if (qp->remote_qp)
+			scif_iounmap((void *)qp->remote_qp,
+				sizeof(struct micscif_qp), ep->remote_dev);
+		if (qp->local_buf) {
+			unmap_from_aperture(
+				qp->local_buf,
+				ep->remote_dev, ENDPT_QP_SIZE);
+		}
+		if (qp->local_qp) {
+			unmap_from_aperture(qp->local_qp, ep->remote_dev,
+					sizeof(struct micscif_qp));
+		}
+		if (qp->inbound_q.rb_base)
+			kfree((void *)qp->inbound_q.rb_base);
+		kfree(qp);
+#ifdef _MIC_SCIF_
+		micscif_teardown_proxy_dma(endpt);
+#endif
+		WARN_ON(!list_empty(&ep->rma_info.task_list));
+	}
+}
+
+/*
+ * Enqueue the endpoint to the zombie list for cleanup.
+ * The endpoint should not be accessed once this API returns.
+ */
+void micscif_add_epd_to_zombie_list(struct endpt *ep, bool mi_eplock_held)
+{
+	unsigned long sflags = 0;
+
+	/*
+	 * It is an error to call scif_close() on an endpoint on which a
+	 * scif_range structure of that endpoint has not been returned
+	 * after a call to scif_get_pages() via scif_put_pages().
+	 */
+	if (SCIFEP_CLOSING == ep->state ||
+		SCIFEP_CLOSED == ep->state ||
+		SCIFEP_DISCONNECTED == ep->state)
+		BUG_ON(micscif_rma_list_get_pages_check(ep));
+
+	if (list_empty(&ep->rma_info.task_list) && ep->remote_dev)
+		wake_up(&ep->remote_dev->sd_mmap_wq);
+	if (!mi_eplock_held)
+		spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	spin_lock(&ep->lock);
+	ep->state = SCIFEP_ZOMBIE;
+	spin_unlock(&ep->lock);
+	list_add_tail(&ep->list, &ms_info.mi_zombie);
+	ms_info.mi_nr_zombies++;
+	if (!mi_eplock_held)
+		spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+	queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
+}
+
+/* Initializes "local" data structures for the QP
+ *
+ * Allocates the QP ring buffer (rb), initializes the "in bound" queue
+ * For the host generate bus addresses for QP rb & qp, in the card's case
+ * map these into the pci aperture
+ */
+int micscif_setup_qp_connect(struct micscif_qp *qp, dma_addr_t *qp_offset,
+				int local_size, struct micscif_dev *scifdev)
+{
+	void *local_q = NULL;
+	int err = 0;
+	volatile uint32_t tmp_rd;
+
+	spin_lock_init(&qp->qp_send_lock);
+	spin_lock_init(&qp->qp_recv_lock);
+
+	if (!qp->inbound_q.rb_base) {
+		/* we need to allocate the local buffer for the incoming queue */
+		local_q = kzalloc(local_size, GFP_ATOMIC);
+		if (!local_q) {
+			printk(KERN_ERR "Ring Buffer Allocation Failed\n");
+			err = -ENOMEM;
+			return err;
+		}
+		/* to setup the inbound_q, the buffer lives locally (local_q),
+		 * the read pointer is remote (in remote_qp's local_read)
+		 * the write pointer is local (in local_write)
+		 */
+		tmp_rd = 0;
+		micscif_rb_init(&qp->inbound_q,
+				&tmp_rd, /* No read ptr right now ... */
+				&(scifdev->qpairs[0].local_write),
+				(volatile void *) local_q,
+				local_size);
+		qp->inbound_q.read_ptr = NULL; /* it is unsafe to use the ring buffer until this changes! */
+	}
+
+	if (!qp->local_buf) {
+		err = map_virt_into_aperture(&qp->local_buf, local_q, scifdev, local_size);
+		if (err) {
+			printk(KERN_ERR "%s %d error %d\n", 
+					__func__, __LINE__, err);
+			return err;
+		}
+	}
+
+	if (!qp->local_qp) {
+		err = map_virt_into_aperture(qp_offset, qp, scifdev, sizeof(struct micscif_qp));
+		if (err) {
+			printk(KERN_ERR "%s %d error %d\n", 
+					__func__, __LINE__, err);
+			return err;
+		}
+		qp->local_qp = *qp_offset;
+	} else {
+		*qp_offset = qp->local_qp;
+	}
+	return err;
+}
+
+/* When the other side has already done it's allocation, this is called */
+/* TODO: Replace reads that go across the bus somehow ... */
+int micscif_setup_qp_accept(struct micscif_qp *qp, dma_addr_t *qp_offset, dma_addr_t phys, int local_size, struct micscif_dev *scifdev)
+{
+	void *local_q;
+	volatile void *remote_q;
+	struct micscif_qp *remote_qp;
+	int remote_size;
+	int err = 0;
+
+	spin_lock_init(&qp->qp_send_lock);
+	spin_lock_init(&qp->qp_recv_lock);
+	/* Start by figuring out where we need to point */
+	remote_qp = scif_ioremap(phys, sizeof(struct micscif_qp), scifdev);
+	qp->remote_qp = remote_qp;
+	qp->remote_buf = remote_qp->local_buf;
+	/* To setup the outbound_q, the buffer lives in remote memory (at scifdev->bs->buf phys),
+	 * the read pointer is local (in local's local_read)
+	 * the write pointer is remote (In remote_qp's local_write)
+	 */
+	remote_size = qp->remote_qp->inbound_q.size; /* TODO: Remove this read for p2p */
+	remote_q = scif_ioremap(qp->remote_buf, remote_size, scifdev);
+
+	BUG_ON(qp->remote_qp->magic != SCIFEP_MAGIC);
+
+	qp->remote_qp->local_write = 0;
+	micscif_rb_init(&(qp->outbound_q),
+			&(qp->local_read), /*read ptr*/
+			&(qp->remote_qp->local_write), /*write ptr*/
+			remote_q, /*rb_base*/
+			remote_size);
+	/* to setup the inbound_q, the buffer lives locally (local_q),
+	 * the read pointer is remote (in remote_qp's local_read)
+	 * the write pointer is local (in local_write)
+	 */
+	local_q = kzalloc(local_size, GFP_KERNEL);
+	if (!local_q) {
+		printk(KERN_ERR "Ring Buffer Allocation Failed\n");
+		err = -ENOMEM;
+		return err;
+	}
+
+	qp->remote_qp->local_read = 0;
+	micscif_rb_init(&(qp->inbound_q),
+			&(qp->remote_qp->local_read),
+			&(qp->local_write),
+			local_q,
+			local_size);
+	err = map_virt_into_aperture(&qp->local_buf, local_q, scifdev, local_size);
+	if (err) {
+		printk(KERN_ERR "%s %d error %d\n", 
+				__func__, __LINE__, err);
+		return err;
+	}
+	err = map_virt_into_aperture(qp_offset, qp, scifdev, sizeof(struct micscif_qp));
+	if (err) {
+		printk(KERN_ERR "%s %d error %d\n", 
+				__func__, __LINE__, err);
+		return err;
+	}
+	qp->local_qp = *qp_offset;
+	return err;
+}
+
+int micscif_setup_qp_connect_response(struct micscif_dev *scifdev, struct micscif_qp *qp, uint64_t payload)
+{
+	int err = 0;
+	void *r_buf;
+	int remote_size;
+	phys_addr_t tmp_phys;
+
+	qp->remote_qp = scif_ioremap(payload, sizeof(struct micscif_qp), scifdev);
+
+	if (!qp->remote_qp) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (qp->remote_qp->magic != SCIFEP_MAGIC) {
+		printk(KERN_ERR "SCIFEP_MAGIC doesnot match between node %d "
+			"(self) and %d (remote)\n", scif_dev[ms_info.mi_nodeid].sd_node, 
+					scifdev->sd_node);
+		WARN_ON(1);
+		err = -ENODEV;
+		goto error;
+	}
+
+	tmp_phys = readq(&(qp->remote_qp->local_buf));
+	remote_size = readl(&qp->remote_qp->inbound_q.size);
+	r_buf = scif_ioremap(tmp_phys, remote_size, scifdev);
+
+#if 0
+	pr_debug("payload = 0x%llx remote_qp = 0x%p tmp_phys=0x%llx \
+			remote_size=%d r_buf=%p\n", payload, qp->remote_qp, 
+			tmp_phys, remote_size, r_buf);
+#endif
+
+	micscif_rb_init(&(qp->outbound_q),
+			&(qp->local_read),
+			&(qp->remote_qp->local_write),
+			r_buf,
+			remote_size);
+	/* resetup the inbound_q now that we know where the inbound_read really is */
+	micscif_rb_init(&(qp->inbound_q),
+			&(qp->remote_qp->local_read),
+			&(qp->local_write),
+			qp->inbound_q.rb_base,
+			qp->inbound_q.size);
+error:
+	return err;
+}
+
+#ifdef _MIC_SCIF_
+extern int micscif_send_host_intr(struct micscif_dev *, uint32_t);
+
+int micscif_send_host_intr(struct micscif_dev *dev, uint32_t doorbell)
+{
+	uint32_t db_reg;
+
+	if (doorbell > 3)
+		return -EINVAL;
+
+	db_reg = readl(dev->mm_sbox +
+			(SBOX_SDBIC0 + (4 * doorbell))) | SBOX_SDBIC0_DBREQ_BIT;
+	writel(db_reg, dev->mm_sbox + (SBOX_SDBIC0 + (4 * doorbell)));
+	return 0;
+}
+#endif
+
+/*
+ * Interrupts remote mic
+ */
+static void
+micscif_send_mic_intr(struct micscif_dev *dev)
+{
+	/* Writes to RDMASR triggers the interrupt */
+	writel(0, (uint8_t *)dev->mm_sbox + dev->sd_rdmasr);
+}
+
+/* scifdev - remote scif device
+ * also needs the local scif device so that we can decide which RMASR
+ * to target on the remote mic
+ */
+static __always_inline void
+scif_send_msg_intr(struct micscif_dev *scifdev)
+{
+#ifdef _MIC_SCIF_
+	if (scifdev == &scif_dev[0])
+		micscif_send_host_intr(scifdev, 0);
+	else
+#endif
+		micscif_send_mic_intr(scifdev);
+}
+
+#ifdef _MIC_SCIF_
+int micscif_setup_card_qp(phys_addr_t host_phys, struct micscif_dev *scifdev)
+{
+	int local_size;
+	dma_addr_t qp_offset;
+	int err = 0;
+	struct nodemsg tmp_msg;
+	uint16_t host_scif_ver;
+
+	pr_debug("Got 0x%llx from the host\n", host_phys);
+
+	local_size = NODE_QP_SIZE;
+
+	/* FIXME: n_qpairs is always 1 OK to get rid of it ? */
+	scifdev->n_qpairs = 1;
+	scifdev->qpairs = kzalloc(sizeof(struct micscif_qp), GFP_KERNEL);
+	if (!scifdev->qpairs) {
+		printk(KERN_ERR "Node QP Allocation failed\n");
+		err = -ENOMEM;
+		return err;
+	}
+
+	scifdev->qpairs->magic = SCIFEP_MAGIC;
+	pr_debug("micscif_card(): called qp_accept\n");
+	err = micscif_setup_qp_accept(&scifdev->qpairs[0], &qp_offset, host_phys, local_size, scifdev);
+
+	if (!err) {
+		host_scif_ver = readw(&(&scifdev->qpairs[0])->remote_qp->scif_version);
+		if (host_scif_ver != SCIF_VERSION) {
+			printk(KERN_ERR "Card and host SCIF versions do not match. \n");
+			printk(KERN_ERR "Card version: %u, Host version: %u \n", 
+						SCIF_VERSION, host_scif_ver);
+			err = -ENXIO;
+			goto error_exit;
+		}
+		/* now that everything is setup and mapped, we're ready to tell the
+		 * host where our queue's location
+		 */
+		tmp_msg.uop = SCIF_INIT;
+		tmp_msg.payload[0] = qp_offset;
+		tmp_msg.payload[1] = get_rdmasr_offset(scifdev->sd_intr_handle);
+		tmp_msg.dst.node = 0; /* host */
+
+		pr_debug("micscif_setup_card_qp: micscif_setup_qp_accept, INIT message\n");
+		err = micscif_nodeqp_send(scifdev, &tmp_msg, NULL);
+	}
+error_exit:
+	if (err)
+		printk(KERN_ERR "%s %d error %d\n", 
+				__func__, __LINE__, err);
+	return err;
+}
+
+
+void micscif_send_exit(void)
+{
+	struct nodemsg msg;
+	struct micscif_dev *scifdev = &scif_dev[SCIF_HOST_NODE];
+
+	init_waitqueue_head(&ms_info.mi_exitwq);
+
+	msg.uop = SCIF_EXIT;
+	msg.src.node = ms_info.mi_nodeid;
+	msg.dst.node = scifdev->sd_node;
+	/* No error handling for Host SCIF device */
+	micscif_nodeqp_send(scifdev, &msg, NULL);
+}
+
+#else /* !_MIC_SCIF_ */
+static uint32_t tmp_r_ptr;
+int micscif_setup_host_qp(mic_ctx_t *mic_ctx, struct micscif_dev *scifdev)
+{
+	int err = 0;
+	int local_size;
+
+	/* Bail out if the node QP is already setup */
+	if (scifdev->qpairs)
+		return err;
+
+	local_size = NODE_QP_SIZE;
+
+	/* for now, assume that we only have one queue-pair -- with the host */
+	scifdev->n_qpairs = 1;
+	scifdev->qpairs = (struct micscif_qp *)kzalloc(sizeof(struct micscif_qp), GFP_ATOMIC);
+	if (!scifdev->qpairs) {
+		printk(KERN_ERR "Node QP Allocation failed\n");
+		err = -ENOMEM;
+		return err;
+	}
+
+	scifdev->qpairs->magic = SCIFEP_MAGIC;
+	scifdev->qpairs->scif_version = SCIF_VERSION;
+	err = micscif_setup_qp_connect(&scifdev->qpairs[0], &(mic_ctx->bi_scif.si_pa), local_size, scifdev);
+	/* fake the read pointer setup so we can use the inbound q */
+	scifdev->qpairs[0].inbound_q.read_ptr = &tmp_r_ptr;
+
+	/* We're as setup as we can be ... the inbound_q is setup, w/o
+	 * a usable outbound q.  When we get a message, the read_ptr will
+	 * be updated, so we know there's something here.  When that happens,
+	 * we finish the setup (just point the write pointer to the real
+	 * write pointer that lives on the card), and pull the message off
+	 * the card.
+	 * Tell the card where we are.
+	 */
+	printk("My Phys addrs: 0x%llx and scif_addr 0x%llx\n", scifdev->qpairs[0].local_buf, 
+		    mic_ctx->bi_scif.si_pa);
+
+	if (err) printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
+	return err;
+}
+
+
+/* FIXME: add to header */
+struct scatterlist * micscif_p2p_mapsg(void *va, int page_size, int page_cnt);
+void micscif_p2p_freesg(struct scatterlist *);
+mic_ctx_t* get_per_dev_ctx(uint16_t node);
+
+/* Init p2p mappings required to access peerdev from scifdev */
+static  struct scif_p2p_info *
+init_p2p_info(struct micscif_dev *scifdev, struct micscif_dev *peerdev)
+{
+	struct _mic_ctx_t *mic_ctx_peer;
+	struct _mic_ctx_t *mic_ctx;
+	struct scif_p2p_info *p2p;
+	int num_mmio_pages;
+	int num_aper_pages;
+
+	mic_ctx = get_per_dev_ctx(scifdev->sd_node - 1);
+	mic_ctx_peer = get_per_dev_ctx(peerdev->sd_node - 1);
+
+	num_mmio_pages = (int) (mic_ctx_peer->mmio.len >> PAGE_SHIFT);
+	num_aper_pages = (int) (mic_ctx_peer->aper.len >> PAGE_SHIFT);
+
+	// First map the peer board addresses into the new board
+	p2p = kzalloc(sizeof(struct scif_p2p_info), GFP_KERNEL);
+
+	if (p2p){
+		int sg_page_shift = get_order(min(mic_ctx_peer->aper.len,(uint64_t)(1 << 30)));		
+		/* FIXME: check return codes below */
+		p2p->ppi_sg[PPI_MMIO] = micscif_p2p_mapsg(mic_ctx_peer->mmio.va, PAGE_SIZE,
+							num_mmio_pages);
+		p2p->sg_nentries[PPI_MMIO] = num_mmio_pages;
+		p2p->ppi_sg[PPI_APER] = micscif_p2p_mapsg(mic_ctx_peer->aper.va, 1 << sg_page_shift,
+							num_aper_pages >> (sg_page_shift - PAGE_SHIFT));
+		p2p->sg_nentries[PPI_APER] = num_aper_pages >> (sg_page_shift - PAGE_SHIFT);
+
+		pci_map_sg(mic_ctx->bi_pdev, p2p->ppi_sg[PPI_MMIO], num_mmio_pages, PCI_DMA_BIDIRECTIONAL);
+		pci_map_sg(mic_ctx->bi_pdev, p2p->ppi_sg[PPI_APER], 
+							num_aper_pages >> (sg_page_shift - PAGE_SHIFT), PCI_DMA_BIDIRECTIONAL);
+
+		p2p->ppi_pa[PPI_MMIO] = sg_dma_address(p2p->ppi_sg[PPI_MMIO]);
+		p2p->ppi_pa[PPI_APER] = sg_dma_address(p2p->ppi_sg[PPI_APER]);
+		p2p->ppi_len[PPI_MMIO] = num_mmio_pages;
+		p2p->ppi_len[PPI_APER] = num_aper_pages;
+		p2p->ppi_disc_state = SCIFDEV_RUNNING;
+		p2p->ppi_peer_id = peerdev->sd_node;
+
+	}
+	return (p2p);
+}
+
+
+int micscif_setuphost_response(struct micscif_dev *scifdev, uint64_t payload)
+{
+	int read_size;
+	struct nodemsg msg;
+	int err = 0;
+
+	pr_debug("micscif_setuphost_response: scif node  %d\n", scifdev->sd_node);
+	err = micscif_setup_qp_connect_response(scifdev, &scifdev->qpairs[0], payload);
+	if (err) {
+		printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
+		return err;
+	}
+	/* re-recieve the bootstrap message after re-init call */
+	pr_debug("micscif_host(): reading INIT message after re-init call\n");
+	read_size = micscif_rb_get_next(&(scifdev->qpairs[0].inbound_q), &msg,
+		sizeof(struct nodemsg));
+	micscif_rb_update_read_ptr(&(scifdev->qpairs[0].inbound_q));
+
+	scifdev->sd_rdmasr = (uint32_t)msg.payload[1];
+
+	/* for testing, send a message back to the card */
+	msg.uop = SCIF_INIT;
+	msg.payload[0] = 0xdeadbeef;
+	msg.dst.node = scifdev->sd_node; /* card */
+	if ((err = micscif_nodeqp_send(scifdev, &msg, NULL))) {
+		printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
+		return err;
+	}
+
+#ifdef ENABLE_TEST
+	/* Launch the micscif_rb test */
+	pr_debug("micscif_host(): starting TEST\n");
+	micscif_qp_testboth(scifdev);
+#endif
+
+	/*
+	 * micscif_nodeqp_intrhandler(..) increments the ref_count before calling
+	 * this API hence clamp the scif_ref_cnt to 1. This is required to
+	 * handle the SCIF module load/unload case on MIC. The SCIF_EXIT message
+	 * keeps the ref_cnt clamped to SCIF_NODE_IDLE during module unload.
+	 * Setting the ref_cnt to 1 during SCIF_INIT ensures that the ref_cnt
+	 * returns back to 0 once SCIF module load completes.
+	 */
+#ifdef SCIF_ENABLE_PM
+	scifdev->scif_ref_cnt = (atomic_long_t) ATOMIC_LONG_INIT(1);
+#endif
+	mutex_lock(&ms_info.mi_conflock);
+	ms_info.mi_mask |= 0x1 << scifdev->sd_node;
+	ms_info.mi_maxid = SCIF_MAX(scifdev->sd_node, ms_info.mi_maxid);
+	ms_info.mi_total++;
+	scifdev->sd_state = SCIFDEV_RUNNING;
+	mutex_unlock(&ms_info.mi_conflock);
+
+	micscif_node_add_callback(scifdev->sd_node);
+	return err;
+}
+
+void
+micscif_removehost_respose(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(scifdev->sd_node -1);
+	int err;
+
+	if (scifdev->sd_state != SCIFDEV_RUNNING)
+		return;
+
+	micscif_stop(mic_ctx);
+
+	if ((err = micscif_nodeqp_send(scifdev, msg, NULL)))
+		printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
+
+	scifdev->sd_state = SCIFDEV_INIT;
+}
+#endif
+
+/* TODO: Fix the non-symmetric use of micscif_dev on the host and the card.  Right
+ * now, the card's data structures are shaping up such that there is a single
+ * micscif_dev structure with multiple qp's.  The host ends up with multiple
+ * micscif_devs (one per card).  We should unify the way this will work.
+ */
+static struct micscif_qp *micscif_nodeqp_find(struct micscif_dev *scifdev, uint8_t node)
+{
+	struct micscif_qp *qp = NULL;
+#ifdef _MIC_SCIF_
+	/* This is also a HACK.  Even though the code is identical with the host right
+	 * now, I broke it into two parts because they will likely not be identical
+	 * moving forward
+	 */
+	qp = scifdev->qpairs;
+#else
+	/* HORRIBLE HACK!  Since we only have one card, and one scifdev, we
+	 * can just grab the scifdev->qp to find the qp.  We don't actually have to
+	 * do any kind of looking for it
+	 */
+	qp = scifdev->qpairs;
+#endif /*  !_MIC_SCIF_ */
+	return qp;
+}
+
+static char *scifdev_state[] = {"SCIFDEV_NOTPRESENT",
+				"SCIFDEV_INIT",
+				"SCIFDEV_RUNNING",
+				"SCIFDEV_SLEEPING",
+				"SCIFDEV_STOPPING",
+				"SCIFDEV_STOPPED"};
+
+static char *message_types[] = {"BAD",
+				"INIT",
+				"EXIT",
+				"SCIF_NODE_ADD",
+				"SCIF_NODE_ADD_ACK",
+				"CNCT_REQ",
+				"CNCT_GNT",
+				"CNCT_GNTACK",
+				"CNCT_GNTNACK",
+				"CNCT_REJ",
+				"CNCT_TERM",
+				"TERM_ACK",
+				"DISCNCT",
+				"DISCNT_ACK",
+				"REGISTER",
+				"REGISTER_ACK",
+				"REGISTER_NACK",
+				"UNREGISTER",
+				"UNREGISTER_ACK",
+				"UNREGISTER_NACK",
+				"ALLOC_REQ",
+				"ALLOC_GNT",
+				"ALLOC_REJ",
+				"FREE_PHYS",
+				"FREE_VIRT",
+				"CLIENT_SENT",
+				"CLIENT_RCVD",
+				"MUNMAP",
+				"MARK",
+				"MARK_ACK",
+				"MARK_NACK",
+				"WAIT",
+				"WAIT_ACK",
+				"WAIT_NACK",
+				"SIGNAL_LOCAL",
+				"SIGNAL_REMOTE",
+				"SIG_ACK",
+				"SIG_NACK",
+				"MAP_GTT",
+				"MAP_GTT_ACK",
+				"MAP_GTT_NACK",
+				"UNMAP_GTT",
+				"CREATE_NODE_DEP",
+				"DESTROY_NODE_DEP",
+				"REMOVE_NODE",
+				"REMOVE_NODE_ACK",
+				"WAKE_UP_NODE",
+				"WAKE_UP_NODE_ACK",
+				"WAKE_UP_NODE_NACK",
+				"SCIF_NODE_ALIVE",
+				"SCIF_NODE_ALIVE_ACK",
+				"SCIF_SMPT",
+				"SCIF_GTT_DMA_MAP",
+				"SCIF_GTT_DMA_ACK",
+				"SCIF_GTT_DMA_NACK",
+				"SCIF_GTT_DMA_UNMAP",
+				"SCIF_PROXY_DMA",
+				"SCIF_PROXY_ORDERED_DMA",
+				"SCIF_NODE_CONNECT",
+				"SCIF_NODE_CONNECT_NACK",
+				"SCIF_NODE_ADD_NACK",
+				"SCIF_GET_NODE_INFO",
+				"TEST"};
+
+static void
+micscif_display_message(struct micscif_dev *scifdev, struct nodemsg *msg,
+						const char *label)
+{
+	if (!ms_info.en_msg_log)
+		return;
+	if (msg->uop > SCIF_MAX_MSG) {
+		pr_debug("%s: unknown msg type %d\n", label, msg->uop);
+		return;
+	}
+	if (msg->uop == SCIF_TEST)
+		return;
+
+	printk("%s: %s msg type %s, src %d:%d, dest %d:%d "
+		"payload 0x%llx:0x%llx:0x%llx:0x%llx\n", 
+		label, scifdev_state[scifdev->sd_state], 
+		message_types[msg->uop], msg->src.node, msg->src.port, 
+		msg->dst.node, msg->dst.port, msg->payload[0], msg->payload[1], 
+		msg->payload[2], msg->payload[3]);
+}
+
+/**
+ * micscif_nodeqp_send - Send a message on the Node Qp.
+ * @scifdev: Scif Device.
+ * @msg: The message to be sent.
+ *
+ * This function will block till a message is not sent to the destination
+ * scif device.
+ */
+int micscif_nodeqp_send(struct micscif_dev *scifdev,
+			struct nodemsg *msg, struct endpt *ep)
+{
+	struct micscif_qp *qp;
+	int err = -ENOMEM, loop_cnt = 0;
+
+	if (oops_in_progress ||
+		(SCIF_INIT != msg->uop &&
+		SCIF_EXIT != msg->uop &&
+		SCIFDEV_RUNNING != scifdev->sd_state &&
+		SCIFDEV_SLEEPING != scifdev->sd_state) ||
+		(ep && SCIFDEV_STOPPED == ep->sd_state)) {
+		err = -ENODEV;
+		goto error;
+	}
+
+	micscif_display_message(scifdev, msg, "Sent");
+
+	qp = micscif_nodeqp_find(scifdev, (uint8_t)msg->dst.node);
+	if (!qp) {
+		err = -EINVAL;
+		goto error;
+	}
+	spin_lock(&qp->qp_send_lock);
+
+	while ((err = micscif_rb_write(&qp->outbound_q,
+			msg, sizeof(struct nodemsg)))) {
+		cpu_relax();
+		mdelay(1);
+		if (loop_cnt++ > (NODEQP_SEND_TO_MSEC)) {
+			err = -ENODEV;
+			break;
+		}
+	}
+	if (!err)
+		micscif_rb_commit(&qp->outbound_q);
+	spin_unlock(&qp->qp_send_lock);
+	if (!err) {
+		if (is_self_scifdev(scifdev))
+			/*
+			 * For loopback we need to emulate an interrupt by queueing
+			 * work for the queue handling real Node Qp interrupts.
+			 */
+
+			queue_work(scifdev->sd_intr_wq, &scifdev->sd_intr_bh);
+		else
+			scif_send_msg_intr(scifdev);
+	}
+error:
+	if (err)
+		pr_debug("%s %d error %d uop %d\n", 
+			__func__, __LINE__, err, msg->uop);
+	return err;
+}
+
+/* TODO: Make this actually figure out where the interrupt came from.  For host, it can
+ * be a little easier (one "vector" per board).  For the cards, we'll have to do some
+ * scanning, methinks
+ */
+struct micscif_qp *micscif_nodeqp_nextmsg(struct micscif_dev *scifdev)
+{
+	return &scifdev->qpairs[0];
+}
+
+/*
+ * micscif_misc_handler:
+ *
+ * Work queue handler for servicing miscellaneous SCIF tasks.
+ * Examples include:
+ * 1) Remote fence requests.
+ * 2) Destruction of temporary registered windows
+ *    created during scif_vreadfrom()/scif_vwriteto().
+ * 3) Cleanup of zombie endpoints.
+ */
+void micscif_misc_handler(struct work_struct *work)
+{
+	micscif_rma_handle_remote_fences();
+	micscif_rma_destroy_temp_windows();
+#ifdef _MIC_SCIF_
+	vm_unmap_aliases();
+#endif
+	micscif_rma_destroy_tcw_invalid(&ms_info.mi_rma_tc);
+	micscif_cleanup_zombie_epd();
+}
+
+/**
+ * scif_init_resp() - Respond to SCIF_INIT interrupt message
+ * @scifdev:    Other node device to respond to
+ * @msg:        Interrupt message
+ *
+ * Loading the driver on the MIC card sends an INIT message to the host
+ * with the PCI bus memory information it needs.  This function receives
+ * that message, finishes its intialization and echoes it back to the card.
+ *
+ * When the card receives the message this function starts a connection test.
+ */
+static __always_inline void
+scif_init_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+#ifdef _MIC_SCIF_
+	if (msg->payload[0] != 0xdeadbeef)
+		printk(KERN_ERR "Bad payload 0x%llx\n", msg->payload[0]);
+#ifdef ENABLE_TEST
+	else
+		micscif_qp_testboth(scifdev);
+#endif
+#else
+	pr_debug("scifhost(): sending response to INIT\n");
+	micscif_setuphost_response(scifdev, msg->payload[0]);
+	atomic_set(&scifdev->sd_node_alive, 0);
+	if (scifdev->sd_ln_wq)
+		queue_delayed_work(scifdev->sd_ln_wq, 
+			&scifdev->sd_watchdog_work, NODE_ALIVE_TIMEOUT);
+#endif
+}
+
+/**
+ * scif_exit_resp() - Respond to SCIF_EXIT interrupt message
+ * @scifdev:    Other node device to respond to
+ * @msg:        Interrupt message
+ *
+ * Loading the driver on the MIC card sends an INIT message to the host
+ * with the PCI bus memory information it needs.  This function receives
+ * that message, finishes its intialization and echoes it back to the card.
+ *
+ * When the card receives the message this function starts a connection test.
+ */
+static __always_inline void
+scif_exit_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+#ifdef _MIC_SCIF_
+	printk("card: scif node %d exiting\n", ms_info.mi_nodeid);
+	scif_dev[ms_info.mi_nodeid].sd_state = SCIFDEV_STOPPED;
+	wake_up(&ms_info.mi_exitwq);
+#else
+	printk("host: scif node %d exiting\n", msg->src.node);
+	/* The interrupt handler that received the message would have
+	 * bumped up the ref_cnt by 1. micscif_removehost_response
+	 * calls micscif_cleanup_scifdev which loops forever for the ref_cnt
+	 * to drop to 0 thereby leading to a soft lockup. To prevent
+	 * that, decrement the ref_cnt here.
+	 */
+	micscif_dec_node_refcnt(scifdev, 1);
+	micscif_removehost_respose(scifdev, msg);
+	/* increment the ref_cnt here. The interrupt handler will now
+	 * decrement it, leaving the ref_cnt to 0 if everything
+	 * works as expected. Note that its not absolutely necessary
+	 * to do this execpt to make sure ref_cnt is 0 and to catch
+	 * errors that may happen if ref_cnt drops to a negative value.
+	 */
+	micscif_inc_node_refcnt(scifdev, 1);
+
+#endif
+}
+
+/**
+ * scif_nodeadd_resp() - Respond to SCIF_NODE_ADD interrupt message
+ * @scifdev:    Other node device to respond to
+ * @msg:        Interrupt message
+ *
+ * When the host driver has finished initializing a MIC node queue pair it
+ * marks the board as online.  It then looks for all currently online MIC
+ * cards and send a SCIF_NODE_ADD message to identify the ID of the new card for
+ * peer to peer initialization
+ *
+ * The local node allocates its incoming queue and sends its address in the
+ * SCIF_NODE_ADD_ACK message back to the host, the host "reflects" this message
+ * to the new node
+ */
+static __always_inline void
+scif_nodeadd_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+#ifdef _MIC_SCIF_
+	struct micscif_dev *newdev;
+	dma_addr_t qp_offset;
+	int qp_connect;
+
+	pr_debug("Scifdev %d:%d received NODE_ADD msg for node %d\n", 
+	scifdev->sd_node, msg->dst.node, msg->src.node);
+	pr_debug("Remote address for this node's aperture %llx\n", 
+	msg->payload[0]);
+	printk("Remote node's sbox %llx\n", msg->payload[1]);
+
+	newdev = &scif_dev[msg->src.node];
+	newdev->sd_node = msg->src.node;
+
+	if (micscif_setup_interrupts(newdev)) {
+		printk(KERN_ERR "failed to setup interrupts for %d\n", msg->src.node);
+		goto interrupt_setup_error;
+	}
+
+	newdev->mm_sbox = ioremap_nocache(msg->payload[1] + SBOX_OFFSET, SBOX_MMIO_LENGTH);
+
+	if (!newdev->mm_sbox) {
+		printk(KERN_ERR "failed to map mmio for %d\n", msg->src.node);
+		goto mmio_map_error;
+	}
+
+	if (!(newdev->qpairs = kzalloc(sizeof(struct micscif_qp), GFP_KERNEL))) {
+		printk(KERN_ERR "failed to allocate qpair for %d\n", msg->src.node);
+		goto qp_alloc_error;
+	}
+
+	/* Set the base address of the remote node's memory since it gets
+	 * added to qp_offset
+	 */
+	newdev->sd_base_addr = msg->payload[0];
+
+	if ((qp_connect = micscif_setup_qp_connect(newdev->qpairs, &qp_offset,
+		NODE_QP_SIZE, newdev))) {
+		printk(KERN_ERR "failed to setup qp_connect %d\n", qp_connect);
+		goto qp_connect_error;
+	}
+
+	if (register_scif_intr_handler(newdev))
+		goto qp_connect_error;
+
+	newdev->scif_ref_cnt = (atomic_long_t) ATOMIC_LONG_INIT(0);
+	micscif_node_add_callback(msg->src.node);
+	newdev->qpairs->magic = SCIFEP_MAGIC;
+	newdev->qpairs->qp_state = QP_OFFLINE;
+	wmb();
+
+	msg->uop = SCIF_NODE_ADD_ACK;
+	msg->dst.node = msg->src.node;
+	msg->src.node = ms_info.mi_nodeid;
+	msg->payload[0] = qp_offset;
+	msg->payload[2] = get_rdmasr_offset(newdev->sd_intr_handle);
+	msg->payload[3] = scif_dev[ms_info.mi_nodeid].sd_numa_node;
+	micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], msg, NULL);
+	return;
+
+qp_connect_error:
+	kfree(newdev->qpairs);
+	newdev->qpairs = NULL;
+qp_alloc_error:
+	iounmap(newdev->mm_sbox);
+	newdev->mm_sbox = NULL;
+mmio_map_error:
+interrupt_setup_error:
+	printk(KERN_ERR "node add failed for node %d\n", msg->src.node);
+	/*
+	 * Update self with NODE ADD failure and send
+	 * nack to update the peer.
+	 */
+	mutex_lock(&newdev->sd_lock);
+	newdev->sd_state = SCIFDEV_NOTPRESENT;
+	mutex_unlock(&newdev->sd_lock);
+	wake_up_interruptible(&newdev->sd_p2p_wq);
+	msg->uop = SCIF_NODE_ADD_NACK;
+	msg->dst.node = msg->src.node;
+	msg->src.node = ms_info.mi_nodeid;
+	micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], msg, NULL);
+#endif
+}
+
+#ifdef _MIC_SCIF_
+static inline void scif_p2pdev_uninit(struct micscif_dev *peerdev)
+{
+	deregister_scif_intr_handler(peerdev);
+	iounmap(peerdev->mm_sbox);
+	mutex_lock(&peerdev->sd_lock);
+	peerdev->sd_state = SCIFDEV_NOTPRESENT;
+	mutex_unlock(&peerdev->sd_lock);
+}
+
+void scif_poll_qp_state(struct work_struct *work)
+{
+#define NODE_QP_RETRY 100
+	struct micscif_dev *peerdev = container_of(work, struct micscif_dev,
+							sd_p2p_dwork.work);
+	struct micscif_qp *qp = &peerdev->qpairs[0];
+
+	if (SCIFDEV_RUNNING != peerdev->sd_state)
+		return;
+	if (qp->qp_state == QP_OFFLINE) {
+		if (peerdev->sd_p2p_retry++ == NODE_QP_RETRY) {
+			printk(KERN_ERR "Warning: QP check timeout with "
+				"state %d\n", qp->qp_state);
+			goto timeout;
+		}
+		schedule_delayed_work(&peerdev->sd_p2p_dwork,
+			msecs_to_jiffies(NODE_QP_TIMEOUT));
+		return;
+	}
+	wake_up(&peerdev->sd_p2p_wq);
+	return;
+timeout:
+	printk(KERN_ERR "%s %d remote node %d offline,  state = 0x%x\n", 
+		__func__, __LINE__, peerdev->sd_node, qp->qp_state);
+	micscif_inc_node_refcnt(peerdev, 1);
+	qp->remote_qp->qp_state = QP_OFFLINE;
+	micscif_dec_node_refcnt(peerdev, 1);
+	scif_p2pdev_uninit(peerdev);
+	wake_up(&peerdev->sd_p2p_wq);
+}
+#endif
+
+/**
+ * scif_nodeaddack_resp() - Respond to SCIF_NODE_ADD_ACK interrupt message
+ * @scifdev:    Other node device to respond to
+ * @msg:        Interrupt message
+ *
+ * After a MIC node receives the SCIF_LINK_ADD_ACK message it send this
+ * message to the host to confirm the sequeuce is finished.
+ *
+ */
+static __always_inline void
+scif_nodeaddack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+#ifdef _MIC_SCIF_
+	struct micscif_dev *peerdev;
+	struct micscif_qp *qp;
+#else
+	struct micscif_dev *dst_dev = &scif_dev[msg->dst.node];
+#endif
+	pr_debug("Scifdev %d received SCIF_NODE_ADD_ACK msg for src %d dst %d\n", 
+		scifdev->sd_node, msg->src.node, msg->dst.node);
+	pr_debug("payload %llx %llx %llx %llx\n", msg->payload[0], msg->payload[1], 
+		msg->payload[2], msg->payload[3]);
+#ifndef _MIC_SCIF_
+
+	/* the lock serializes with micscif_setuphost_response
+	* The host is forwarding the NODE_ADD_ACK message from src to dst
+	* we need to make sure that the dst has already received a NODE_ADD
+	* for src and setup its end of the qp to dst
+	*/
+	mutex_lock(&ms_info.mi_conflock);
+	msg->payload[1] = ms_info.mi_maxid;
+	mutex_unlock(&ms_info.mi_conflock);
+	micscif_inc_node_refcnt(dst_dev, 1);
+	micscif_nodeqp_send(dst_dev, msg, NULL);
+	micscif_dec_node_refcnt(dst_dev, 1);
+#else
+	peerdev = &scif_dev[msg->src.node];
+	peerdev->sd_node = msg->src.node;
+
+	if (peerdev->sd_state == SCIFDEV_NOTPRESENT)
+		return;
+
+	qp = &peerdev->qpairs[0];
+
+	if ((micscif_setup_qp_connect_response(peerdev, &peerdev->qpairs[0],
+				msg->payload[0])))
+		goto local_error;
+
+	mutex_lock(&peerdev->sd_lock);
+	peerdev->sd_numa_node = msg->payload[3];
+	/*
+	 * Proxy the DMA only for P2P reads with transfer size
+	 * greater than proxy DMA threshold. Proxying reads to convert
+	 * them into writes is only required for host jaketown platforms
+	 * when the two MIC devices are connected to the same
+	 * QPI/IOH/numa node. The host will not pass the numa node
+	 * information for non Intel Jaketown platforms and it will
+	 * be -1 in that case.
+	 */
+	peerdev->sd_proxy_dma_reads =
+		mic_p2p_proxy_enable &&
+		scif_dev[ms_info.mi_nodeid].sd_numa_node != -1 &&
+		(peerdev->sd_numa_node ==
+		 scif_dev[ms_info.mi_nodeid].sd_numa_node);
+	peerdev->sd_state = SCIFDEV_RUNNING;
+	mutex_unlock(&peerdev->sd_lock);
+
+	mutex_lock(&ms_info.mi_conflock);
+	ms_info.mi_maxid = msg->payload[1];
+	peerdev->sd_rdmasr = msg->payload[2];
+	mutex_unlock(&ms_info.mi_conflock);
+
+	/* accessing the peer qp. Make sure the peer is awake*/
+	micscif_inc_node_refcnt(peerdev, 1);
+	qp->remote_qp->qp_state = QP_ONLINE;
+	micscif_dec_node_refcnt(peerdev, 1);
+	schedule_delayed_work(&peerdev->sd_p2p_dwork,
+		msecs_to_jiffies(NODE_QP_TIMEOUT));
+	return;
+local_error:
+	scif_p2pdev_uninit(peerdev);
+	wake_up(&peerdev->sd_p2p_wq);
+#endif
+}
+
+/**
+ * scif_cnctreq_resp() - Respond to SCIF_CNCT_REQ interrupt message
+ * @msg:        Interrupt message
+ *
+ * This message is initiated by the remote node to request a connection
+ * to the local node.  This function looks for an end point in the
+ * listen state on the requested port id.
+ *
+ * If it finds a listening port it places the connect request on the
+ * listening end points queue and wakes up any pending accept calls.
+ *
+ * If it does not find a listening end point it sends a connection
+ * reject message to the remote node.
+ */
+static __always_inline void
+scif_cnctreq_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = NULL;
+	struct conreq *conreq;
+	unsigned long sflags;
+
+	if ((conreq = (struct conreq *)kmalloc(sizeof(struct conreq), GFP_KERNEL)) == NULL) {
+		// Lack of resources so reject the request.
+		goto conreq_sendrej;
+	}
+
+	if ((ep = micscif_find_listen_ep(msg->dst.port, &sflags)) == NULL) {
+		//  Send reject due to no listening ports
+		goto conreq_sendrej_free;
+	}
+
+	if (ep->backlog <= ep->conreqcnt) {
+		//  Send reject due to too many pending requests
+		spin_unlock_irqrestore(&ep->lock, sflags);
+		goto conreq_sendrej_free;
+	}
+
+	conreq->msg = *msg;
+	list_add_tail(&conreq->list, &ep->conlist);
+	ep->conreqcnt++;
+	spin_unlock_irqrestore(&ep->lock, sflags);
+
+	wake_up_interruptible(&ep->conwq);
+	return;
+
+conreq_sendrej_free:
+	kfree(conreq);
+conreq_sendrej:
+	msg->uop = SCIF_CNCT_REJ;
+	micscif_nodeqp_send(&scif_dev[msg->src.node], msg, NULL);
+}
+
+/**
+ * scif_cnctgnt_resp() - Respond to SCIF_CNCT_GNT interrupt message
+ * @msg:        Interrupt message
+ *
+ * An accept() on the remote node has occured and sent this message
+ * to indicate success.  Place the end point in the MAPPING state and
+ * save the remote nodes memory information.  Then wake up the connect
+ * request so it can finish.
+ */
+static __always_inline void
+scif_cnctgnt_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	unsigned long sflags;
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+
+	spin_lock_irqsave(&ep->lock, sflags);
+	if (SCIFEP_CONNECTING == ep->state) {
+		ep->peer.node = msg->src.node;
+		ep->peer.port = msg->src.port;
+		ep->qp_info.cnct_gnt_payload = msg->payload[1];
+		ep->remote_ep = msg->payload[2];
+		ep->state = SCIFEP_MAPPING;
+
+		wake_up_interruptible(&ep->conwq);
+		wake_up(&ep->diswq);
+	}
+	spin_unlock_irqrestore(&ep->lock, sflags);
+}
+
+/**
+ * scif_cnctgntack_resp() - Respond to SCIF_CNCT_GNTACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote connection request has finished mapping the local memmory.
+ * Place the connection in the connected state and wake up the pending
+ * accept() call.
+ */
+static __always_inline void
+scif_cnctgntack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	unsigned long sflags;
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	spin_lock(&ep->lock);
+	// New ep is now connected with all resouces set.
+	ep->state = SCIFEP_CONNECTED;
+	list_add_tail(&ep->list, &ms_info.mi_connected);
+	get_conn_count(scifdev);
+	wake_up(&ep->conwq);
+	spin_unlock(&ep->lock);
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+}
+
+/**
+ * scif_cnctgntnack_resp() - Respond to SCIF_CNCT_GNTNACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote connection request failed to map the local memory it was sent.
+ * Place the end point in the CLOSING state to indicate it and wake up
+ * the pending accept();
+ */
+static __always_inline void
+scif_cnctgntnack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	unsigned long sflags;
+
+	spin_lock_irqsave(&ep->lock, sflags);
+	ep->state = SCIFEP_CLOSING;
+	wake_up(&ep->conwq);
+	spin_unlock_irqrestore(&ep->lock, sflags);
+}
+
+/**
+ * scif_cnctrej_resp() - Respond to SCIF_CNCT_REJ interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote end has rejected the connection request.  Set the end
+ * point back to the bound state and wake up the pending connect().
+ */
+static __always_inline void
+scif_cnctrej_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	unsigned long sflags;
+
+	spin_lock_irqsave(&ep->lock, sflags);
+	if (SCIFEP_CONNECTING == ep->state) {
+		ep->state = SCIFEP_BOUND;
+		wake_up_interruptible(&ep->conwq);
+	}
+	spin_unlock_irqrestore(&ep->lock, sflags);
+}
+
+/**
+ * scif_cnctterm_resp() - Respond to SCIF_CNCT_TERM interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote connect() has waited to long for an accept() to occur and
+ * is removing the connection request.
+ *
+ * If the connection request is not found then it is currently being
+ * processed and a NACK is sent to indicate to the remote connect() to
+ * wait for connection to complete.
+ *
+ * Otherwise the request is removed and an ACK is returned to indicate
+ * success.
+ */
+static __always_inline void
+scif_cnctterm_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	unsigned long sflags;
+	struct endpt *ep = NULL;
+	struct conreq *conreq = NULL;
+
+	ep = micscif_find_listen_ep(msg->dst.port, &sflags);
+
+	if (ep != NULL) {
+		conreq = miscscif_get_connection_request(ep, msg->payload[0]);
+		spin_unlock_irqrestore(&ep->lock, sflags);
+	}
+
+	if (conreq != NULL) {
+		kfree(conreq);
+		msg->uop = SCIF_TERM_ACK;
+		micscif_nodeqp_send(&scif_dev[msg->src.node], msg, NULL);
+	}
+}
+
+/**
+ * scif_termack_resp() - Respond to SCIF_TERM_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Connection termination has been confirmed so set the end point
+ * to bound and allow the connection request to error out.
+ */
+static __always_inline void
+scif_termack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	unsigned long sflags;
+
+	spin_lock_irqsave(&ep->lock, sflags);
+	if (ep->state != SCIFEP_BOUND) {
+		ep->state = SCIFEP_BOUND;
+		wake_up(&ep->diswq);
+	}
+	spin_unlock_irqrestore(&ep->lock, sflags);
+}
+
+/**
+ * scif_discnct_resp() - Respond to SCIF_DISCNCT interrupt message
+ * @msg:        Interrupt message
+ *
+ * The remote node has indicated close() has been called on its end
+ * point.  Remove the local end point from the connected list, set its
+ * state to disconnected and ensure accesses to the remote node are
+ * shutdown.
+ *
+ * When all accesses to the remote end have completed then send a
+ * DISCNT_ACK to indicate it can remove its resources and complete
+ * the close routine.
+ */
+static __always_inline void
+scif_discnct_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	unsigned long sflags;
+	struct endpt *ep = NULL;
+	struct endpt *tmpep;
+	struct list_head *pos, *tmpq;
+
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each_safe(pos, tmpq, &ms_info.mi_connected) {
+		tmpep = list_entry(pos, struct endpt, list);
+		if (((uint64_t)tmpep == msg->payload[1]) && ((uint64_t)tmpep->remote_ep == msg->payload[0])) {
+			list_del(pos);
+			put_conn_count(scifdev);
+			ep = tmpep;
+			spin_lock(&ep->lock);
+			break;
+		}
+	}
+
+	// If the terminated end is not found then this side started closing
+	// before the other side sent the disconnect.  If so the ep will no
+	// longer be on the connected list.  Reguardless the other side
+	// needs to be acked to let it know close is complete.
+	if (ep == NULL) {
+		// Need to unlock conn lock and restore irq state
+		spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+		goto discnct_resp_ack;
+	}
+
+	ep->state = SCIFEP_DISCONNECTED;
+	list_add_tail(&ep->list, &ms_info.mi_disconnected);
+
+	// TODO Cause associated resources to be freed.
+	// First step: wake up threads blocked in send and recv
+	wake_up_interruptible(&ep->sendwq);
+	wake_up_interruptible(&ep->recvwq);
+	wake_up_interruptible(&ep->conwq);
+	spin_unlock(&ep->lock);
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+
+discnct_resp_ack:
+	msg->uop = SCIF_DISCNT_ACK;
+	micscif_nodeqp_send(&scif_dev[msg->src.node], msg, NULL);
+}
+
+/**
+ * scif_discnctack_resp() - Respond to SCIF_DISCNT_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side has indicated it has not more references to local resources
+ */
+static __always_inline void
+scif_discntack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	unsigned long sflags;
+
+	spin_lock_irqsave(&ep->lock, sflags);
+	ep->state = SCIFEP_DISCONNECTED;
+	wake_up(&ep->disconwq);
+	spin_unlock_irqrestore(&ep->lock, sflags);
+}
+
+/**
+ * scif_clientsend_resp() - Respond to SCIF_CLIENT_SEND interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is confirming send or recieve interrupt handling is complete.
+ */
+static __always_inline void
+scif_clientsend_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+
+	if (SCIFEP_CONNECTED == ep->state) {
+		wake_up_interruptible(&ep->recvwq);
+	}
+}
+
+/**
+ * scif_clientrcvd_resp() - Respond to SCIF_CLIENT_RCVD interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is confirming send or recieve interrupt handling is complete.
+ */
+static __always_inline void
+scif_clientrcvd_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+
+	if (SCIFEP_CONNECTED == ep->state) {
+		wake_up_interruptible(&ep->sendwq);
+	}
+}
+
+/**
+ * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is requesting a memory allocation.
+ */
+static __always_inline void
+scif_alloc_req(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	int err, opcode = (int)msg->payload[3];
+	struct reg_range_t *window = 0;
+	size_t nr_pages = msg->payload[1];
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+
+	might_sleep();
+
+	if (SCIFEP_CONNECTED != ep->state) {
+		err = -ENOTCONN;
+		goto error;
+	}
+
+	switch (opcode) {
+	case SCIF_REGISTER:
+		if (!(window = micscif_create_remote_window(ep,
+			(int)nr_pages))) {
+			err = -ENOMEM;
+			goto error;
+		}
+		break;
+	default:
+		/* Unexpected allocation request */
+		printk(KERN_ERR "Unexpected allocation request opcode 0x%x ep = 0x%p "
+			" scifdev->sd_state 0x%x scifdev->sd_node 0x%x\n", 
+			opcode, ep, scifdev->sd_state, scifdev->sd_node);
+		err = -EINVAL;
+		goto error;
+	};
+
+	/* The peer's allocation request is granted */
+	msg->uop = SCIF_ALLOC_GNT;
+	msg->payload[0] = (uint64_t)window;
+	msg->payload[1] = window->mapped_offset;
+	if ((err = micscif_nodeqp_send(ep->remote_dev, msg, ep)))
+		micscif_destroy_remote_window(ep, window);
+	return;
+error:
+	/* The peer's allocation request is rejected */
+	printk(KERN_ERR "%s %d error %d alloc_ptr %p nr_pages 0x%lx\n", 
+		__func__, __LINE__, err, window, nr_pages);
+	msg->uop = SCIF_ALLOC_REJ;
+	micscif_nodeqp_send(ep->remote_dev, msg, ep);
+}
+
+/**
+ * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side responded to a memory allocation.
+ */
+static __always_inline void
+scif_alloc_gnt_rej(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct allocmsg *handle = (struct allocmsg *)msg->payload[2];
+	switch (handle->uop) {
+	case SCIF_REGISTER:
+	{
+		handle->vaddr = (void *)msg->payload[0];
+		handle->phys_addr = msg->payload[1];
+		if (msg->uop == SCIF_ALLOC_GNT)
+			handle->state = OP_COMPLETED;
+		else
+			handle->state = OP_FAILED;
+		wake_up(&handle->allocwq);
+		break;
+	}
+	default:
+	{
+		printk(KERN_ERR "Bug Unknown alloc uop 0x%x\n", handle->uop);
+	}
+	}
+}
+
+/**
+ * scif_free_phys: Respond to SCIF_FREE_PHYS interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remote side is done accessing earlier memory allocation.
+ * Remove GTT/PCI mappings created earlier.
+ */
+static __always_inline void
+scif_free_phys(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	return;
+}
+
+/**
+ * scif_free_phys: Respond to SCIF_FREE_VIRT interrupt message
+ * @msg:        Interrupt message
+ *
+ * Free up memory kmalloc'd earlier.
+ */
+static __always_inline void
+scif_free_virt(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	int opcode = (int)msg->payload[3];
+	struct reg_range_t *window =
+		(struct reg_range_t *)msg->payload[1];
+
+	switch (opcode) {
+	case SCIF_REGISTER:
+		micscif_destroy_remote_window(ep, window);
+		break;
+	default:
+		/* Unexpected allocation request */
+		BUG_ON(opcode != SCIF_REGISTER);
+	};
+}
+
+/**
+ * scif_recv_register: Respond to SCIF_REGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Update remote window list with a new registered window.
+ */
+static __always_inline void
+scif_recv_register(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	unsigned long sflags;
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	struct reg_range_t *window =
+		(struct reg_range_t *)msg->payload[1];
+
+	might_sleep();
+	RMA_MAGIC(window);
+	mutex_lock(&ep->rma_info.rma_lock);
+	/* FIXME:
+	 * ep_lock lock needed ? rma_lock is already held
+	 */
+	spin_lock_irqsave(&ep->lock, sflags);
+	if (SCIFEP_CONNECTED == ep->state) {
+		msg->uop = SCIF_REGISTER_ACK;
+		micscif_nodeqp_send(ep->remote_dev, msg, ep);
+		micscif_set_nr_pages(ep->remote_dev, window);
+		/* No further failures expected. Insert new window */
+		micscif_insert_window(window,
+			&ep->rma_info.remote_reg_list);
+	} else {
+		msg->uop = SCIF_REGISTER_NACK;
+		micscif_nodeqp_send(ep->remote_dev, msg, ep);
+	}
+	spin_unlock_irqrestore(&ep->lock, sflags);
+	mutex_unlock(&ep->rma_info.rma_lock);
+	/*
+	 * We could not insert the window but we need to
+	 * destroy the window.
+	 */
+	if (SCIF_REGISTER_NACK == msg->uop)
+		micscif_destroy_remote_window(ep, window);
+	else {
+#ifdef _MIC_SCIF_
+		micscif_destroy_remote_lookup(ep, window);
+#endif
+	}
+}
+
+/**
+ * scif_recv_unregister: Respond to SCIF_UNREGISTER interrupt message
+ * @msg:        Interrupt message
+ *
+ * Remove window from remote registration list;
+ */
+static __always_inline void
+scif_recv_unregister(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct micscif_rma_req req;
+	struct reg_range_t *window = NULL;
+	struct reg_range_t *recv_window =
+		(struct reg_range_t *)msg->payload[0];
+	struct endpt *ep;
+	int del_window = 0;
+
+	might_sleep();
+	RMA_MAGIC(recv_window);
+	ep = (struct endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = 0;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = WINDOW_FULL;
+	req.head = &ep->rma_info.remote_reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/*
+	 * Does a valid window exist?
+	 */
+	if (micscif_query_window(&req)) {
+		printk(KERN_ERR "%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+	if (window) {
+		RMA_MAGIC(window);
+		if (window->ref_count)
+			put_window_ref_count(window, window->nr_pages);
+		window->unreg_state = OP_COMPLETED;
+		if (!window->ref_count) {
+			msg->uop = SCIF_UNREGISTER_ACK;
+			atomic_inc(&ep->rma_info.tw_refcount);
+			atomic_add_return((int32_t)window->nr_pages, &ep->rma_info.tw_total_pages);
+			ep->rma_info.async_list_del = 1;
+			list_del(&window->list_member);
+			window->offset = INVALID_VA_GEN_ADDRESS;
+			del_window = 1;
+		} else
+			/* NACK! There are valid references to this window */
+			msg->uop = SCIF_UNREGISTER_NACK;
+	} else {
+		/* The window did not make its way to the list at all. ACK */
+		msg->uop = SCIF_UNREGISTER_ACK;
+		micscif_destroy_remote_window(ep, recv_window);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (del_window)
+		drain_dma_intr(ep->rma_info.dma_chan);
+	micscif_nodeqp_send(ep->remote_dev, msg, ep);
+	if (del_window)
+		micscif_queue_for_cleanup(window, &ms_info.mi_rma);
+	return;
+}
+
+/**
+ * scif_recv_register_ack: Respond to SCIF_REGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete registration.
+ */
+static __always_inline void
+scif_recv_register_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct reg_range_t *window =
+		(struct reg_range_t *)msg->payload[2];
+	RMA_MAGIC(window);
+	window->reg_state = OP_COMPLETED;
+	wake_up(&window->regwq);
+}
+
+/**
+ * scif_recv_register_nack: Respond to SCIF_REGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that registration
+ * cannot be completed.
+ */
+static __always_inline void
+scif_recv_register_nack(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct reg_range_t *window =
+		(struct reg_range_t *)msg->payload[2];
+	RMA_MAGIC(window);
+	window->reg_state = OP_FAILED;
+	wake_up(&window->regwq);
+}
+/**
+ * scif_recv_unregister_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to complete unregistration.
+ */
+static __always_inline void
+scif_recv_unregister_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct reg_range_t *window =
+		(struct reg_range_t *)msg->payload[1];
+	RMA_MAGIC(window);
+	window->unreg_state = OP_COMPLETED;
+	wake_up(&window->unregwq);
+}
+
+/**
+ * scif_recv_unregister_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Wake up the window waiting to inform it that unregistration
+ * cannot be completed immediately.
+ */
+static __always_inline void
+scif_recv_unregister_nack(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct reg_range_t *window =
+		(struct reg_range_t *)msg->payload[1];
+	RMA_MAGIC(window);
+	window->unreg_state = OP_FAILED;
+	wake_up(&window->unregwq);
+}
+
+static __always_inline void
+scif_recv_munmap(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct micscif_rma_req req;
+	struct reg_range_t *window = NULL;
+	struct reg_range_t *recv_window =
+		(struct reg_range_t *)msg->payload[0];
+	struct endpt *ep;
+	int del_window = 0;
+
+	might_sleep();
+	RMA_MAGIC(recv_window);
+	ep = (struct endpt *)recv_window->ep;
+	req.out_window = &window;
+	req.offset = recv_window->offset;
+	req.prot = recv_window->prot;
+	req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
+	req.type = WINDOW_FULL;
+	req.head = &ep->rma_info.reg_list;
+	msg->payload[0] = ep->remote_ep;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/*
+	 * Does a valid window exist?
+	 */
+	if (micscif_query_window(&req)) {
+		printk(KERN_ERR "%s %d -ENXIO\n", __func__, __LINE__);
+		msg->uop = SCIF_UNREGISTER_ACK;
+		goto error;
+	}
+
+	RMA_MAGIC(window);
+
+	if (window->ref_count)
+		put_window_ref_count(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		atomic_add_return((int32_t)window->nr_pages, &ep->rma_info.tw_total_pages);
+		ep->rma_info.async_list_del = 1;
+		list_del(&window->list_member);
+		micscif_free_window_offset(ep, window->offset,
+				window->nr_pages << PAGE_SHIFT);
+		window->offset_freed = true;
+		del_window = 1;
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	if (del_window)
+		micscif_queue_for_cleanup(window, &ms_info.mi_rma);
+}
+
+/**
+ * scif_recv_mark: Handle SCIF_MARK request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a mark.
+ */
+static __always_inline void
+scif_recv_mark(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	int mark;
+
+	if (SCIFEP_CONNECTED != ep->state) {
+		msg->payload[0] = ep->remote_ep;
+		msg->uop = SCIF_MARK_NACK;
+		micscif_nodeqp_send(ep->remote_dev, msg, ep);
+		return;
+	}
+
+	if ((mark = micscif_fence_mark(ep)) < 0)
+		msg->uop = SCIF_MARK_NACK;
+	else
+		msg->uop = SCIF_MARK_ACK;
+	msg->payload[0] = ep->remote_ep;
+	msg->payload[2] = mark;
+	micscif_nodeqp_send(ep->remote_dev, msg, ep);
+}
+
+/**
+ * scif_recv_mark_resp: Handle SCIF_MARK_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_MARK message.
+ */
+static __always_inline void
+scif_recv_mark_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	struct fence_info *fence_req = (struct fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (SCIF_MARK_ACK == msg->uop) {
+		fence_req->state = OP_COMPLETED;
+		fence_req->dma_mark = (int)msg->payload[2];
+	} else
+		fence_req->state = OP_FAILED;
+	wake_up(&fence_req->wq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_wait: Handle SCIF_WAIT request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested waiting on a fence.
+ */
+static __always_inline void
+scif_recv_wait(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	struct remote_fence_info *fence;
+
+	/*
+	 * Allocate structure for remote fence information and
+	 * send a NACK if the allocation failed. The peer will
+	 * return ENOMEM upon receiving a NACK.
+	 */
+	if (!(fence = (struct remote_fence_info *)kmalloc(
+			sizeof(struct remote_fence_info), GFP_KERNEL))) {
+		msg->payload[0] = ep->remote_ep;
+		msg->uop = SCIF_WAIT_NACK;
+		micscif_nodeqp_send(ep->remote_dev, msg, ep);
+		return;
+	}
+
+	/* Prepare the fence request */
+	memcpy(&fence->msg, msg, sizeof(struct nodemsg));
+	INIT_LIST_HEAD(&fence->list_member);
+
+	/* Insert to the global remote fence request list */
+	mutex_lock(&ms_info.mi_fencelock);
+	ep->rma_info.fence_refcount++;
+	list_add_tail(&fence->list_member, &ms_info.mi_fence);
+	mutex_unlock(&ms_info.mi_fencelock);
+
+	queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
+}
+
+/**
+ * scif_recv_wait_resp: Handle SCIF_WAIT_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a SCIF_WAIT message.
+ */
+static __always_inline void
+scif_recv_wait_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	struct fence_info *fence_req = (struct fence_info *)msg->payload[1];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (SCIF_WAIT_ACK == msg->uop)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	wake_up(&fence_req->wq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/**
+ * scif_recv_local_signal: Handle SCIF_SIG_LOCAL request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a local offset.
+ */
+static __always_inline void
+scif_recv_signal_local(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	int err = 0;
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+
+	err = micscif_prog_signal(ep,
+			msg->payload[1],
+			msg->payload[2],
+			RMA_WINDOW_SELF);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	if ((err = micscif_nodeqp_send(ep->remote_dev, msg, ep)))
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+}
+
+/**
+ * scif_recv_signal_remote: Handle SCIF_SIGNAL_REMOTE request
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a signal on a remote offset.
+ */
+static __always_inline void
+scif_recv_signal_remote(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	int err = 0;
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+
+	err = micscif_prog_signal(ep,
+			msg->payload[1],
+			msg->payload[2],
+			RMA_WINDOW_PEER);
+	if (err)
+		msg->uop = SCIF_SIG_NACK;
+	else
+		msg->uop = SCIF_SIG_ACK;
+	msg->payload[0] = ep->remote_ep;
+	if ((err = micscif_nodeqp_send(ep->remote_dev, msg, ep)))
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+}
+
+/**
+ * scif_recv_signal_remote: Handle SCIF_SIG_(N)ACK messages.
+ * @msg:	Interrupt message
+ *
+ * The peer has responded to a signal request.
+ */
+static __always_inline void
+scif_recv_signal_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	struct fence_info *fence_req = (struct fence_info *)msg->payload[3];
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (SCIF_SIG_ACK == msg->uop)
+		fence_req->state = OP_COMPLETED;
+	else
+		fence_req->state = OP_FAILED;
+	wake_up(&fence_req->wq);
+	mutex_unlock(&ep->rma_info.rma_lock);
+}
+
+/*
+ * scif_node_wake_up_ack: Handle SCIF_NODE_WAKE_UP_ACK message
+ * @msg: Interrupt message
+ *
+ * Response for a SCIF_NODE_WAKE_UP message.
+ */
+static __always_inline void
+scif_node_wake_up_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	scif_dev[msg->payload[0]].sd_wait_status = OP_COMPLETED;
+	wake_up(&scif_dev[msg->payload[0]].sd_wq);
+}
+
+/*
+ * scif_node_wake_up_nack: Handle SCIF_NODE_WAKE_UP_NACK message
+ * @msg: Interrupt message
+ *
+ * Response for a SCIF_NODE_WAKE_UP message.
+ */
+static __always_inline void
+scif_node_wake_up_nack(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	scif_dev[msg->payload[0]].sd_wait_status = OP_FAILED;
+	wake_up(&scif_dev[msg->payload[0]].sd_wq);
+}
+
+/*
+ * scif_node_remove: Handle SCIF_NODE_REMOVE message
+ * @msg: Interrupt message
+ *
+ * Handle node removal.
+ */
+static __always_inline void
+scif_node_remove(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	msg->payload[0] = micscif_handle_remove_node(msg->payload[0], msg->payload[1]);
+	msg->uop = SCIF_NODE_REMOVE_ACK;
+	msg->src.node = ms_info.mi_nodeid;
+	micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], msg, NULL);
+}
+
+#ifndef _MIC_SCIF_
+/*
+ * scif_node_remove_ack: Handle SCIF_NODE_REMOVE_ACK message
+ * @msg: Interrupt message
+ *
+ * The peer has acked a SCIF_NODE_REMOVE message.
+ */
+static __always_inline void
+scif_node_remove_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	bool ack_is_current = true;
+	int orig_node = (int)msg->payload[3];
+
+	if ((msg->payload[1] << 32) == DISCONN_TYPE_POWER_MGMT) {
+		if (msg->payload[2] != atomic_long_read(&ms_info.mi_unique_msgid))
+			ack_is_current = false;
+	}
+
+	if (ack_is_current) {
+		mic_ctx_t *mic_ctx = get_per_dev_ctx(orig_node - 1);
+		if (!mic_ctx) {
+			printk(KERN_ERR "%s %d mic_ctx %p orig_node %d\n", 
+				__func__, __LINE__, mic_ctx, orig_node);
+			return;
+		}
+
+		if (msg->payload[0]) {
+			pr_debug("%s failed to get remove ack from node id %d", __func__, msg->src.node);
+			ms_info.mi_disconnect_status = OP_FAILED;
+		}
+
+		atomic_inc(&mic_ctx->disconn_rescnt);
+		wake_up(&ms_info.mi_disconn_wq);
+	}
+}
+
+/*
+ * scif_node_create_ack: Handle SCIF_NODE_CREATE_DEP message
+ * @msg: Interrupt message
+ *
+ * Notification about a new SCIF dependency between two nodes.
+ */
+static __always_inline void
+scif_node_create_dep(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	uint32_t src_node = msg->src.node;
+	uint32_t dst_node = (uint32_t)msg->payload[0];
+	/*
+	 * Host driver updates dependency graph.
+	 * src_node created dependency on dst_node
+	 * src_node -> dst_node
+	 */
+	micscif_set_nodedep(src_node, dst_node, DEP_STATE_DEPENDENT);
+}
+
+/*
+ * scif_node_destroy_ack: Handle SCIF_NODE_DESTROY_DEP message
+ * @msg: Interrupt message
+ *
+ * Notification about tearing down an existing SCIF dependency
+ * between two nodes.
+ */
+static __always_inline void
+scif_node_destroy_dep(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	uint32_t src_node = msg->src.node;
+	uint32_t dst_node = (uint32_t)msg->payload[0];
+	/*
+	 * Host driver updates dependency graph.
+	 * src_node removed dependency on dst_node
+	 */
+	micscif_set_nodedep(src_node, dst_node, DEP_STATE_NOT_DEPENDENT);
+}
+
+/*
+ * scif_node_wake_up: Handle SCIF_NODE_WAKE_UP message
+ * @msg: Interrupt message
+ *
+ * The host has received a request to wake up a remote node.
+ */
+static __always_inline void
+scif_node_wake_up(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	/*
+	 * Host Driver now needs to wake up the remote node
+	 * available in msg->payload[0].
+	 */
+	uint32_t ret = 0;
+	ret = micscif_connect_node((uint32_t)msg->payload[0], false);
+
+	if(!ret) {
+		msg->uop = SCIF_NODE_WAKE_UP_ACK;
+		micscif_update_p2p_state((uint32_t)msg->payload[0],
+				msg->src.node, SCIFDEV_RUNNING);
+	} else {
+		msg->uop = SCIF_NODE_WAKE_UP_NACK;
+	}
+	micscif_nodeqp_send(&scif_dev[msg->src.node], msg, NULL);
+}
+#endif
+
+#ifdef _MIC_SCIF_
+static __always_inline void
+scif_node_alive_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	msg->uop = SCIF_NODE_ALIVE_ACK;
+	msg->src.node = ms_info.mi_nodeid;
+	msg->dst.node = SCIF_HOST_NODE;
+	micscif_nodeqp_send(&scif_dev[SCIF_HOST_NODE], msg, NULL);
+	pr_debug("node alive ack sent from node %d oops_in_progress %d\n", 
+			ms_info.mi_nodeid, oops_in_progress);
+}
+#else
+static __always_inline void
+scif_node_alive_ack(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	pr_debug("node alive ack received from node %d\n", msg->src.node);
+	atomic_set(&scif_dev[msg->src.node].sd_node_alive, 1);
+	wake_up(&scifdev->sd_watchdog_wq);
+}
+#endif
+
+
+#ifdef _MIC_SCIF_
+static __always_inline void
+_scif_proxy_dma(struct micscif_dev *scifdev, struct nodemsg *msg, int flags)
+{
+	struct endpt *ep = (struct endpt *)msg->payload[0];
+	off_t loffset = msg->payload[1];
+	off_t roffset = msg->payload[2];
+	size_t len = msg->payload[3];
+	struct dma_channel *chan = ep->rma_info.dma_chan;
+	struct endpt_rma_info *rma = &ep->rma_info;
+	int err = __scif_writeto(ep, loffset, len, roffset, flags);
+
+	if (!err && rma->proxy_dma_peer_phys &&
+		!request_dma_channel(chan)) {
+		do_status_update(chan, rma->proxy_dma_peer_phys, OP_COMPLETED);
+		free_dma_channel(chan);
+	}
+	if (!rma->proxy_dma_peer_phys)
+		/* The proxy DMA physical address should have been set up? */
+		WARN_ON(1);
+}
+
+/**
+ * scif_proxy_dma: Handle SCIF_PROXY_DMA request.
+ * @msg:	Interrupt message
+ *
+ * The peer has requested a Proxy DMA.
+ */
+static __always_inline void
+scif_proxy_dma(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	_scif_proxy_dma(scifdev, msg, 0x0);
+}
+
+/**
+ * scif_proxy_ordered_dma: Handle SCIF_PROXY_ORDERED_DMA request.
+ * @msg:	Interrupt message
+ *
+ * The peer has requested an ordered Proxy DMA.
+ */
+static __always_inline void
+scif_proxy_ordered_dma(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	_scif_proxy_dma(scifdev, msg, SCIF_RMA_ORDERED);
+}
+#endif
+
+#ifndef _MIC_SCIF_
+/**
+ * scif_node_connect: Respond to SCIF_NODE_CONNECT interrupt message
+ * @msg:        Interrupt message
+ *
+ * Connect the src and dst node by setting up the p2p connection
+ * between them. Host here acts like a proxy.
+ */
+static __always_inline void
+scif_node_connect_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct micscif_dev *dev_j = scifdev;
+	struct micscif_dev *dev_i = NULL;
+	struct scif_p2p_info *p2p_ij = NULL;    /* bus addr for j from i */
+	struct scif_p2p_info *p2p_ji = NULL;    /* bus addr for i from j */
+	struct scif_p2p_info *p2p;
+	struct list_head *pos, *tmp;
+	uint32_t bid = (uint32_t)msg->payload[0];
+	int err;
+	uint64_t tmppayload;
+
+	pr_debug("%s:%d SCIF_NODE_CONNECT from %d connecting to %d \n", 
+				 __func__, __LINE__, scifdev->sd_node, bid);
+
+	mutex_lock(&ms_info.mi_conflock);
+	if (bid < 1 || bid > ms_info.mi_maxid) {
+		printk(KERN_ERR "%s %d unknown bid %d\n", __func__, __LINE__, bid);
+		goto nack;
+	}
+
+	dev_i = &scif_dev[bid];
+	mutex_unlock(&ms_info.mi_conflock);
+	micscif_inc_node_refcnt(dev_i, 1);
+	mutex_lock(&ms_info.mi_conflock);
+
+	if (dev_i->sd_state != SCIFDEV_RUNNING)
+		goto ref_nack;
+
+	/*
+	 * If the p2p connection is already setup or in the process of setting up
+	 * then just ignore this request. The requested node will get informed
+	 * by SCIF_NODE_ADD_ACK or SCIF_NODE_ADD_NACK
+	 */
+	if (!list_empty(&dev_i->sd_p2p)) {
+		list_for_each_safe(pos, tmp, &dev_i->sd_p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, 
+			ppi_list);
+			if (p2p->ppi_peer_id == dev_j->sd_node) {
+				mutex_unlock(&ms_info.mi_conflock);
+				micscif_dec_node_refcnt(dev_i, 1);
+				return;
+			}
+		}
+	}
+
+	p2p_ij = init_p2p_info(dev_i, dev_j);
+	p2p_ji = init_p2p_info(dev_j, dev_i);
+
+	list_add_tail(&p2p_ij->ppi_list, &dev_i->sd_p2p);
+	list_add_tail(&p2p_ji->ppi_list, &dev_j->sd_p2p);
+
+	/* Send a SCIF_NODE_ADD to dev_i, pass it its bus address
+	 * as seen from dev_j
+	 */
+	msg->uop = SCIF_NODE_ADD;
+	msg->src.node = dev_j->sd_node;
+	msg->dst.node = dev_i->sd_node;
+
+	p2p_ji->ppi_mic_addr[PPI_APER] = mic_map(msg->src.node - 1,
+		p2p_ji->ppi_pa[PPI_APER],
+		p2p_ji->ppi_len[PPI_APER] << PAGE_SHIFT);
+	msg->payload[0] = p2p_ji->ppi_mic_addr[PPI_APER];
+
+	/* addresses for node j */
+	p2p_ij->ppi_mic_addr[PPI_MMIO] =  mic_map(msg->dst.node - 1,
+		p2p_ij->ppi_pa[PPI_MMIO],
+		p2p_ij->ppi_len[PPI_MMIO] << PAGE_SHIFT);
+	msg->payload[1] = p2p_ij->ppi_mic_addr[PPI_MMIO];
+
+	p2p_ij->ppi_mic_addr[PPI_APER] = mic_map(msg->dst.node - 1,
+	p2p_ij->ppi_pa[PPI_APER],
+	p2p_ij->ppi_len[PPI_APER] << PAGE_SHIFT);
+	msg->payload[2] = p2p_ij->ppi_mic_addr[PPI_APER];
+
+	msg->payload[3] = p2p_ij->ppi_len[PPI_APER] << PAGE_SHIFT;
+
+	if ((err = micscif_nodeqp_send(dev_i,  msg, NULL))) {
+		printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
+		goto ref_nack;
+	}
+
+	/* Same as above but to dev_j */
+	msg->uop = SCIF_NODE_ADD;
+	msg->src.node = dev_i->sd_node;
+	msg->dst.node = dev_j->sd_node;
+
+	tmppayload = msg->payload[0];
+	msg->payload[0] = msg->payload[2];
+	msg->payload[2] = tmppayload;
+
+	p2p_ji->ppi_mic_addr[PPI_MMIO] = mic_map(msg->dst.node - 1, p2p_ji->ppi_pa[PPI_MMIO],
+		p2p_ji->ppi_len[PPI_MMIO] << PAGE_SHIFT);
+	msg->payload[1] = p2p_ji->ppi_mic_addr[PPI_MMIO];
+	msg->payload[3] = p2p_ji->ppi_len[PPI_APER] << PAGE_SHIFT;
+
+	if ((err = micscif_nodeqp_send(dev_j,  msg, NULL))) {
+		printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
+		goto ref_nack;
+	}
+
+	mutex_unlock(&ms_info.mi_conflock);
+	micscif_dec_node_refcnt(dev_i, 1);
+	return;
+ref_nack:
+	micscif_dec_node_refcnt(dev_i, 1);
+nack:
+	mutex_unlock(&ms_info.mi_conflock);
+	msg->uop = SCIF_NODE_CONNECT_NACK;
+	msg->dst.node = dev_j->sd_node;
+	msg->payload[0] = bid;
+	if ((err = micscif_nodeqp_send(dev_j,  msg, NULL)))
+		printk(KERN_ERR "%s %d error %d\n", __func__, __LINE__, err);
+}
+#endif /* SCIF */
+
+#ifdef _MIC_SCIF_
+/**
+ * scif_node_connect_nack_resp: Respond to SCIF_NODE_CONNECT_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * Tell the node that initiated SCIF_NODE_CONNECT earlier has failed.
+ */
+static __always_inline void
+scif_node_connect_nack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	struct micscif_dev *peerdev;
+	unsigned int bid = msg->payload[0];
+
+	if (bid > MAX_BOARD_SUPPORTED) {
+		printk(KERN_ERR "recieved a nack for invalid bid %d\n", bid);
+		WARN_ON(1);
+		return;
+	}
+
+	peerdev = &scif_dev[bid];
+	mutex_lock(&peerdev->sd_lock);
+	peerdev->sd_state = SCIFDEV_NOTPRESENT;
+	mutex_unlock(&peerdev->sd_lock);
+	wake_up(&peerdev->sd_p2p_wq);
+}
+#endif
+
+/**
+ * scif_node_add_nack_resp: Respond to SCIF_NODE_ADD_NACK interrupt message
+ * @msg:        Interrupt message
+ *
+ * SCIF_NODE_ADD failed, so inform the waiting wq.
+ */
+static __always_inline void
+scif_node_add_nack_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+#ifndef _MIC_SCIF_
+	struct micscif_dev *dst_dev = &scif_dev[msg->dst.node];
+	pr_debug("SCIF_NODE_ADD_NACK recieved from %d \n", scifdev->sd_node);
+	micscif_inc_node_refcnt(dst_dev, 1);
+	micscif_nodeqp_send(dst_dev, msg, NULL);
+	micscif_dec_node_refcnt(dst_dev, 1);
+#else
+	struct micscif_dev *peerdev;
+
+	peerdev = &scif_dev[msg->src.node];
+
+	if (peerdev->sd_state == SCIFDEV_NOTPRESENT)
+		return;
+
+	mutex_lock(&peerdev->sd_lock);
+	peerdev->sd_state = SCIFDEV_NOTPRESENT;
+	mutex_unlock(&peerdev->sd_lock);
+	wake_up(&peerdev->sd_p2p_wq);
+#endif
+}
+
+/**
+ * scif_get_node_info_resp: Respond to SCIF_GET_NODE_INFO interrupt message
+ * @msg:        Interrupt message
+ *
+ * Retrieve node info i.e maxid, total and node mask from the host.
+ */
+static __always_inline void
+scif_get_node_info_resp(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+#ifdef _MIC_SCIF_
+	struct get_node_info *node_info = (struct get_node_info *)msg->payload[3];
+
+	mutex_lock(&ms_info.mi_conflock);
+	ms_info.mi_mask = msg->payload[0];
+	ms_info.mi_maxid = msg->payload[1];
+	ms_info.mi_total = msg->payload[2];
+
+	node_info->state = OP_COMPLETED;
+	wake_up(&node_info->wq);
+	mutex_unlock(&ms_info.mi_conflock);
+#else
+	swap(msg->dst.node, msg->src.node);
+	mutex_lock(&ms_info.mi_conflock);
+	msg->payload[0] = ms_info.mi_mask;
+	msg->payload[1] = ms_info.mi_maxid;
+	msg->payload[2] = ms_info.mi_total;
+	mutex_unlock(&ms_info.mi_conflock);
+
+	if (micscif_nodeqp_send(scifdev,  msg, NULL))
+		printk(KERN_ERR "%s %d error \n", __func__, __LINE__);
+#endif
+}
+
+#ifdef ENABLE_TEST
+static void
+scif_test(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	if (msg->payload[0] != scifdev->count) {
+		printk(KERN_ERR "Con fail: payload == %llx\n", msg->payload[0]);
+		scifdev->test_done = -1;
+	} else if (scifdev->count == TEST_LOOP) {
+		pr_debug("Test success state %d!\n", scifdev->sd_state);
+		scifdev->test_done = 1;
+	}
+
+	if (scifdev->test_done != 0) {
+		while (scifdev->test_done != 2) {
+			cpu_relax();
+			schedule();
+		}
+
+		destroy_workqueue(scifdev->producer);
+		destroy_workqueue(scifdev->consumer);
+		pr_debug("Destroyed workqueue state %d!\n", scifdev->sd_state);
+	}
+	scifdev->count++;
+}
+#endif /* ENABLE_TEST */
+
+static void
+scif_msg_unknown(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	/* Bogus Node Qp Message? */
+	printk(KERN_ERR "Unknown message 0x%xn scifdev->sd_state 0x%x "
+		"scifdev->sd_node 0x%x\n", 
+		msg->uop, scifdev->sd_state, scifdev->sd_node);
+	BUG_ON(1);
+}
+
+#ifdef _MIC_SCIF_
+static void
+smpt_set(struct micscif_dev *scifdev, struct nodemsg *msg)
+{
+	printk("msd recvd : smpt add\n");
+	printk("dma_addr = 0x%llX, entry = 0x%llX\n", msg->payload[0], msg->payload[1]);
+	mic_smpt_set(scif_dev->mm_sbox, msg->payload[0], msg->payload[1]);
+}
+#endif
+
+void (*scif_intr_func[SCIF_MAX_MSG + 1])(struct micscif_dev *, struct nodemsg *msg) = {
+	scif_msg_unknown,		// Error
+	scif_init_resp,			// SCIF_INIT
+	scif_exit_resp,			// SCIF_EXIT
+	scif_nodeadd_resp,		// SCIF_NODE_ADD
+	scif_nodeaddack_resp,		// SCIF_NODE_ADD_ACK
+	scif_cnctreq_resp,		// SCIF_CNCT_REQ
+	scif_cnctgnt_resp,		// SCIF_CNCT_GNT
+	scif_cnctgntack_resp,		// SCIF_CNCT_GNTACK
+	scif_cnctgntnack_resp,		// SCIF_CNCT_GNTNACK
+	scif_cnctrej_resp,		// SCIF_CNCT_REJ
+	scif_cnctterm_resp,		// SCIF_CNCT_TERM	10
+	scif_termack_resp,		// SCIF_TERM_ACK
+	scif_discnct_resp,		// SCIF_DISCNCT
+	scif_discntack_resp,		// SCIF_DISCNT_ACK
+	scif_recv_register,		// SCIF_REGISTER
+	scif_recv_register_ack,		// SCIF_REGISTER_ACK
+	scif_recv_register_nack,	// SCIF_REGISTER_NACK
+	scif_recv_unregister,		// SCIF_UNREGISTER
+	scif_recv_unregister_ack,	// SCIF_UNREGISTER_ACK
+	scif_recv_unregister_nack,	// SCIF_UNREGISTER_NACK
+	scif_alloc_req,			// SCIF_ALLOC_REQ	20
+	scif_alloc_gnt_rej,		// SCIF_ALLOC_GNT
+	scif_alloc_gnt_rej,		// SCIF_ALLOC_REJ
+	scif_free_phys,			// SCIF_FREE_PHYS
+	scif_free_virt,			// SCIF_FREE_VIRT
+	scif_clientsend_resp,		// SCIF_CLIENT_SENT
+	scif_clientrcvd_resp,		// SCIF_CLIENT_RCVD
+	scif_recv_munmap,		// SCIF_MUNMAP
+	scif_recv_mark,			// SCIF_MARK
+	scif_recv_mark_resp,		// SCIF_MARK_ACK
+	scif_recv_mark_resp,		// SCIF_MARK_NACK	30
+	scif_recv_wait,			// SCIF_WAIT
+	scif_recv_wait_resp,		// SCIF_WAIT_ACK
+	scif_recv_wait_resp,		// SCIF_WAIT_NACK
+	scif_recv_signal_local,		// SCIF_SIG_LOCAL
+	scif_recv_signal_remote,	// SCIF_SIG_REMOTE
+	scif_recv_signal_resp,		// SCIF_SIG_ACK
+	scif_recv_signal_resp,		// SCIF_SIG_NACK
+#ifdef _MIC_SCIF_
+	scif_msg_unknown,
+	scif_msg_unknown,
+	scif_msg_unknown,
+	scif_msg_unknown,
+	scif_msg_unknown,		// SCIF_NODE_CREATE_DEP Not on card
+	scif_msg_unknown,		// SCIF_NODE_DESTROY_DEP Not on card
+#else
+	scif_msg_unknown,
+	scif_msg_unknown,
+	scif_msg_unknown,
+	scif_msg_unknown,
+	scif_node_create_dep,		// SCIF_NODE_CREATE_DEP
+	scif_node_destroy_dep,		// SCIF_NODE_DESTROY_DEP
+#endif
+	scif_node_remove,		// SCIF_NODE_REMOVE
+#ifdef _MIC_SCIF_
+	scif_msg_unknown,		// SCIF_NODE_REMOVE_ACK Not on card
+	scif_msg_unknown,		// SCIF_NODE_WAKE_UP Not on card
+#else
+	scif_node_remove_ack,		// SCIF_NODE_REMOVE_ACK
+	scif_node_wake_up,		// SCIF_NODE_WAKE_UP
+#endif
+	scif_node_wake_up_ack,		// SCIF_NODE_WAKE_UP_ACK
+	scif_node_wake_up_nack,		// SCIF_NODE_WAKE_UP_NACK
+#ifdef _MIC_SCIF_
+	scif_node_alive_resp,		// SCIF_NODE_ALIVE
+	scif_msg_unknown,		// SCIF_NODE_ALIVE_ACK not on card
+	smpt_set,			// SMPT_SET
+#else
+	scif_msg_unknown,		// SCIF_NODE_ALIVE not on Host
+	scif_node_alive_ack,		// SCIF_NODE_ALIVE_ACK
+	scif_msg_unknown,		// SCIF_NODE_ALIVE not on Host
+#endif
+	scif_msg_unknown,
+	scif_msg_unknown,
+	scif_msg_unknown,
+	scif_msg_unknown,
+#ifdef _MIC_SCIF_
+	scif_proxy_dma,			// SCIF_PROXY_DMA only for MIC
+	scif_proxy_ordered_dma,		// SCIF_PROXY_ORDERED_DMA only for MIC
+#else
+	scif_msg_unknown,
+	scif_msg_unknown,
+#endif
+#ifdef _MIC_SCIF_
+	scif_msg_unknown,
+	scif_node_connect_nack_resp,	//SCIF_NODE_CONNECT_NACK
+#else
+	scif_node_connect_resp,		//SCIF_NODE_CONNECT
+	scif_msg_unknown,
+#endif
+	scif_node_add_nack_resp,	//SCIF_NODE_ADD_NACK
+	scif_get_node_info_resp,	//SCIF_GET_NODE_INFO
+#ifdef ENABLE_TEST
+	scif_test			// SCIF_TEST
+#else
+	scif_msg_unknown
+#endif
+};
+
+/**
+ * scif_nodeqp_msg_hander() - Common handler for node messages
+ * @scifdev: Remote device to respond to
+ * @qp: Remote memory pointer
+ * @msg: The message to be handled.
+ *
+ * This routine calls the appriate routine to handle a Node Qp message receipt.
+ */
+int micscif_max_msg_id = SCIF_MAX_MSG;
+
+static void
+micscif_nodeqp_msg_handler(struct micscif_dev *scifdev, struct micscif_qp *qp, struct nodemsg *msg)
+{
+	micscif_display_message(scifdev, msg, "Rcvd");
+
+	if (msg->uop > (uint32_t)micscif_max_msg_id) {
+		/* Bogus Node Qp Message? */
+		printk(KERN_ERR "Unknown message 0x%xn scifdev->sd_state 0x%x "
+			"scifdev->sd_node 0x%x\n", 
+			msg->uop, scifdev->sd_state, scifdev->sd_node);
+		BUG_ON(1);
+	}
+
+	scif_intr_func[msg->uop](scifdev, msg);
+}
+
+/**
+ * scif_nodeqp_intrhander() - Interrupt handler for node messages
+ * @scifdev:    Remote device to respond to
+ * @qp:         Remote memory pointer
+ *
+ * This routine is triggered by the interrupt mechanism.  It reads
+ * messages from the node queue RB and calls the Node QP Message handling
+ * routine.
+ */
+int
+micscif_nodeqp_intrhandler(struct micscif_dev *scifdev, struct micscif_qp *qp)
+{
+	struct nodemsg msg;
+	int read_size;
+
+	do {
+#ifndef _MIC_SCIF_
+		if (qp->blast) {
+			scif_wakeup_ep(SCIF_WAKE_UP_RECV);
+			qp->blast = 0;
+		}
+#endif
+		if (SCIFDEV_STOPPED == scifdev->sd_state)
+			return 0;
+		read_size = micscif_rb_get_next(&qp->inbound_q, &msg,
+							sizeof(msg));
+		/* Stop handling messages if an oops is in progress */
+		if (read_size != sizeof(msg) || oops_in_progress)
+			break;
+#ifndef _MIC_SCIF_
+		atomic_set(&scifdev->sd_node_alive, 1);
+#endif
+
+		micscif_inc_node_refcnt(scifdev, 1);
+		micscif_nodeqp_msg_handler(scifdev, qp, &msg);
+		/*
+		 * The reference count is reset to SCIF_NODE_IDLE
+		 * during scif device cleanup so decrementing the
+		 * reference count further is not required.
+		 */
+		if (SCIFDEV_INIT == scifdev->sd_state)
+			return 0;
+		if (SCIFDEV_STOPPED == scifdev->sd_state) {
+			micscif_dec_node_refcnt(scifdev, 1);
+			return 0;
+		}
+		micscif_rb_update_read_ptr(&qp->inbound_q);
+		micscif_dec_node_refcnt(scifdev, 1);
+	} while (read_size == sizeof(msg));
+#ifdef _MIC_SCIF_
+	/*
+	 * Keep polling the Node QP RB in case there are active SCIF
+	 * P2P connections to provide better Node QP responsiveness
+	 * in anticipation of P2P Proxy DMA requests for performance.
+	 */
+	if (scifdev->sd_proxy_dma_reads &&
+		scifdev->num_active_conn &&
+		SCIFDEV_STOPPED != scifdev->sd_state) {
+		queue_work(scifdev->sd_intr_wq, &scifdev->sd_intr_bh);
+		schedule();
+	}
+#endif
+	return read_size;
+}
+
+/**
+ * micscif_loopb_wq_handler - Loopback Workqueue Handler.
+ * @work: loop back work
+ *
+ * This work queue routine is invoked by the loopback work queue handler.
+ * It grabs the recv lock, dequeues any available messages from the head
+ * of the loopback message list, calls the node QP message handler,
+ * waits for it to return, then frees up this message and dequeues more
+ * elements of the list if available.
+ */
+static void micscif_loopb_wq_handler(struct work_struct *work)
+{
+	struct micscif_dev *scifdev =
+		container_of(work, struct micscif_dev, sd_loopb_work);
+	struct micscif_qp *qp = micscif_nodeqp_nextmsg(scifdev);
+	struct loopb_msg *msg;
+
+	do {
+		msg = NULL;
+		spin_lock(&qp->qp_recv_lock);
+		if (!list_empty(&scifdev->sd_loopb_recv_q)) {
+			msg = list_first_entry(&scifdev->sd_loopb_recv_q, 
+					struct loopb_msg, list_member);
+			list_del(&msg->list_member);
+		}
+		spin_unlock(&qp->qp_recv_lock);
+
+		if (msg) {
+			micscif_nodeqp_msg_handler(scifdev, qp, &msg->msg);
+			kfree(msg);
+		}
+	} while (msg);
+}
+
+/**
+ * micscif_loopb_msg_handler() - Workqueue handler for loopback messages.
+ * @scifdev: SCIF device
+ * @qp: Queue pair.
+ *
+ * This work queue routine is triggered when a loopback message is received.
+ *
+ * We need special handling for receiving Node Qp messages on a loopback SCIF
+ * device via two workqueues for receiving messages.
+ *
+ * The reason we need the extra workqueue which is not required with *normal*
+ * non-loopback SCIF devices is the potential classic deadlock described below:
+ *
+ * Thread A tries to send a message on a loopback SCIF devide and blocks since
+ * there is no space in the RB while it has the qp_send_lock held or another
+ * lock called lock X for example.
+ *
+ * Thread B: The Loopback Node QP message receive workqueue receives the message
+ * and tries to send a message (eg an ACK) to the loopback SCIF device. It tries
+ * to grab the send lock again or lock X and deadlocks with Thread A. The RB
+ * cannot be drained any further due to this classic deadlock.
+ *
+ * In order to avoid deadlocks as mentioned above we have an extra level of
+ * indirection achieved by having two workqueues.
+ * 1) The first workqueue whose handler is micscif_loopb_msg_handler reads
+ * messages from the Node QP RB, adds them to a list and queues work for the
+ * second workqueue.
+ *
+ * 2) The second workqueue whose handler is micscif_loopb_wq_handler dequeues
+ * messages from the list, handles them, frees up the memory and dequeues
+ * more elements from the list if possible.
+ */
+int
+micscif_loopb_msg_handler(struct micscif_dev *scifdev, struct micscif_qp *qp)
+{
+	int read_size;
+	struct loopb_msg *msg;
+
+	do {
+		if (!(msg = kmalloc(sizeof(struct loopb_msg), GFP_KERNEL))) {
+			printk(KERN_ERR "%s %d ENOMEM\n", __func__, __LINE__);
+			return -ENOMEM;
+		}
+
+		read_size = micscif_rb_get_next(&qp->inbound_q, &msg->msg,
+				sizeof(struct nodemsg));
+
+		if (read_size != sizeof(struct nodemsg)) {
+			kfree(msg);
+			micscif_rb_update_read_ptr(&qp->inbound_q);
+			break;
+		}
+
+		spin_lock(&qp->qp_recv_lock);
+		list_add_tail(&msg->list_member, &scifdev->sd_loopb_recv_q);
+		spin_unlock(&qp->qp_recv_lock);
+		queue_work(scifdev->sd_loopb_wq, &scifdev->sd_loopb_work);
+		micscif_rb_update_read_ptr(&qp->inbound_q);
+	} while (read_size == sizeof(struct nodemsg));
+	return read_size;
+}
+
+/**
+ * micscif_setup_loopback_qp - One time setup work for Loopback Node Qp.
+ * @scifdev: SCIF device
+ *
+ * Sets up the required loopback workqueues, queue pairs, ring buffers
+ * and also tests out the Queue Pairs.
+ */
+int micscif_setup_loopback_qp(struct micscif_dev *scifdev)
+{
+	int err = 0;
+	void *local_q;
+	struct micscif_qp *qp;
+
+	/* Set up the work queues */
+	if ((err = micscif_setup_interrupts(scifdev)))
+		goto error;
+
+	INIT_LIST_HEAD(&scifdev->sd_loopb_recv_q);
+	snprintf(scifdev->sd_loopb_wqname, sizeof(scifdev->sd_loopb_wqname),
+			"SCIF LOOPB %d", scifdev->sd_node);
+	if (!(scifdev->sd_loopb_wq =
+		__mic_create_singlethread_workqueue(scifdev->sd_loopb_wqname))){
+		err = -ENOMEM;
+		goto destroy_intr_wq;
+	}
+	INIT_WORK(&scifdev->sd_loopb_work, micscif_loopb_wq_handler);
+	/* Allocate Self Qpair */
+	scifdev->n_qpairs = 1;
+	scifdev->qpairs = (struct micscif_qp *)kzalloc(sizeof(struct micscif_qp), GFP_KERNEL);
+	if (!scifdev->qpairs) {
+		printk(KERN_ERR "Node QP Allocation failed\n");
+		err = -ENOMEM;
+		goto destroy_loopb_wq;
+	}
+
+	qp = scifdev->qpairs;
+	qp->magic = SCIFEP_MAGIC;
+	spin_lock_init(&qp->qp_send_lock);
+	spin_lock_init(&qp->qp_recv_lock);
+	init_waitqueue_head(&scifdev->sd_mmap_wq);
+
+	local_q = kzalloc(NODE_QP_SIZE, GFP_KERNEL);
+	if (!local_q) {
+		printk(KERN_ERR "Ring Buffer Allocation Failed\n");
+		err = -ENOMEM;
+		goto free_qpairs;
+	}
+
+	/*
+	 * For loopback the inbound_q and outbound_q are essentially the same
+	 * since the Node sends a message on the loopback interface to the
+	 * outbound_q which is then received on the inbound_q.
+	 */
+	micscif_rb_init(&qp->outbound_q,
+			&(scifdev->qpairs[0].local_read),
+			&(scifdev->qpairs[0].local_write),
+			local_q,
+			NODE_QP_SIZE);
+
+	micscif_rb_init(&(qp->inbound_q),
+			&(scifdev->qpairs[0].local_read),
+			&(scifdev->qpairs[0].local_write),
+			local_q,
+			NODE_QP_SIZE);
+
+	/* Launch the micscif_rb test */
+#ifdef ENABLE_TEST
+	micscif_qp_testboth(scifdev);
+#endif
+	return err;
+free_qpairs:
+	kfree(scifdev->qpairs);
+destroy_loopb_wq:
+	destroy_workqueue(scifdev->sd_loopb_wq);
+destroy_intr_wq:
+	destroy_workqueue(scifdev->sd_intr_wq);
+error:
+	return err;
+}
+
+/**
+ * micscif_destroy_loopback_qp - One time uninit work for Loopback Node Qp
+ * @scifdev: SCIF device
+ *
+ * Detroys the workqueues and frees up the Ring Buffer and Queue Pair memory.
+ */
+int micscif_destroy_loopback_qp(struct micscif_dev *scifdev)
+{
+	micscif_destroy_interrupts(scifdev);
+	destroy_workqueue(scifdev->sd_loopb_wq);
+	kfree((void *)scifdev->qpairs->outbound_q.rb_base);
+	kfree(scifdev->qpairs);
+	return 0;
+}
+
+#ifndef _MIC_SCIF_
+void micscif_destroy_p2p(mic_ctx_t *mic_ctx)
+{
+	mic_ctx_t * mic_ctx_peer;
+	struct micscif_dev *mic_scif_dev;
+	struct micscif_dev *peer_dev;
+	struct scif_p2p_info *p2p;
+	struct list_head *pos, *tmp;
+	uint32_t bd;
+
+	if (!mic_p2p_enable)
+		return;
+
+
+	/* FIXME: implement node deletion */
+	mic_scif_dev = &scif_dev[mic_get_scifnode_id(mic_ctx)];
+
+	/* Free P2P mappings in the given node for all its peer nodes */
+	list_for_each_safe(pos, tmp, &mic_scif_dev->sd_p2p) {
+		p2p = list_entry(pos, struct scif_p2p_info, 
+				ppi_list);
+
+		mic_unmap(mic_ctx->bi_id, p2p->ppi_mic_addr[PPI_MMIO],
+			p2p->ppi_len[PPI_MMIO] << PAGE_SHIFT);
+		mic_unmap(mic_ctx->bi_id, p2p->ppi_mic_addr[PPI_APER],
+			p2p->ppi_len[PPI_APER] << PAGE_SHIFT);
+		pci_unmap_sg(mic_ctx->bi_pdev, 
+			p2p->ppi_sg[PPI_MMIO], p2p->sg_nentries[PPI_MMIO], PCI_DMA_BIDIRECTIONAL);
+		micscif_p2p_freesg(p2p->ppi_sg[PPI_MMIO]);
+		pci_unmap_sg(mic_ctx->bi_pdev, 
+			p2p->ppi_sg[PPI_APER], p2p->sg_nentries[PPI_APER], PCI_DMA_BIDIRECTIONAL);
+		micscif_p2p_freesg(p2p->ppi_sg[PPI_APER]);
+		list_del(pos);
+		kfree(p2p);
+	}
+
+	/* Free P2P mapping created in the peer nodes for the given node */
+	for (bd = SCIF_HOST_NODE + 1; bd <= ms_info.mi_maxid; bd++) {
+		peer_dev = &scif_dev[bd];
+
+		list_for_each_safe(pos, tmp, &peer_dev->sd_p2p) {
+			p2p = list_entry(pos, struct scif_p2p_info, 
+					ppi_list);
+			if (p2p->ppi_peer_id == mic_get_scifnode_id(mic_ctx)) {
+
+				mic_ctx_peer = get_per_dev_ctx(peer_dev->sd_node - 1);
+				mic_unmap(mic_ctx_peer->bi_id, p2p->ppi_mic_addr[PPI_MMIO],
+					p2p->ppi_len[PPI_MMIO] << PAGE_SHIFT);
+				mic_unmap(mic_ctx_peer->bi_id, p2p->ppi_mic_addr[PPI_APER],
+					p2p->ppi_len[PPI_APER] << PAGE_SHIFT);
+				pci_unmap_sg(mic_ctx_peer->bi_pdev, 
+					p2p->ppi_sg[PPI_MMIO], p2p->sg_nentries[PPI_MMIO], PCI_DMA_BIDIRECTIONAL);
+				micscif_p2p_freesg(p2p->ppi_sg[PPI_MMIO]);
+				pci_unmap_sg(mic_ctx_peer->bi_pdev, p2p->ppi_sg[PPI_APER], 
+					p2p->sg_nentries[PPI_APER], PCI_DMA_BIDIRECTIONAL);
+				micscif_p2p_freesg(p2p->ppi_sg[PPI_APER]);
+				list_del(pos);
+				kfree(p2p);
+			}
+		}
+	}
+}
+#endif
+
+/**
+ * ONLY TEST CODE BELOW
+ */
+#ifdef ENABLE_TEST
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include "mic/micscif_nodeqp.h"
+
+static void micscif_rb_trigger_consumer(struct work_struct *work)
+{
+	struct micscif_dev *scifdev = container_of(work, struct micscif_dev, consumer_work);
+
+	while (scifdev->test_done == 0) {
+		cpu_relax();
+		schedule();
+	}
+	if (scifdev->test_done != 1)
+		printk(KERN_ERR "Consumer failed!\n");
+	else
+		pr_debug("Test finished: Success\n");
+	scifdev->test_done = 2;
+}
+
+/**
+ * micscif_rb_trigger_producer
+ * This is the producer thread to create messages and update the
+ * RB write offset accordingly.
+ */
+static void micscif_rb_trigger_producer(struct work_struct *work)
+{
+	struct nodemsg msg;
+	int count = 0;
+	struct micscif_dev *scifdev = container_of(work, struct micscif_dev, producer_work);
+
+	msg.dst.node = scifdev->sd_node;
+	msg.uop = SCIF_TEST;
+
+	while (count <= TEST_LOOP) {
+		msg.payload[0] = count++;
+		micscif_nodeqp_send(scifdev, &msg, NULL);
+		/* pr_debug(("Prod payload %llu\n", msg.payload[0]); */
+	}
+}
+
+/* this is called from the host and the card at the same time on a queue pair.
+ * Each sets up a producer and a consumer and spins on the queue pair until done
+ */
+static void micscif_qp_testboth(struct micscif_dev *scifdev)
+{
+	scifdev->count = 0;
+	scifdev->test_done = 0;
+	snprintf(scifdev->producer_name, sizeof(scifdev->producer_name),
+		 "PRODUCER %d", scifdev->sd_node);
+	snprintf(scifdev->consumer_name, sizeof(scifdev->consumer_name),
+		 "CONSUMER %d", scifdev->sd_node);
+	scifdev->producer =
+		__mic_create_singlethread_workqueue(scifdev->producer_name);
+	scifdev->consumer =
+		__mic_create_singlethread_workqueue(scifdev->consumer_name);
+
+	INIT_WORK(&scifdev->producer_work, micscif_rb_trigger_producer);
+	INIT_WORK(&scifdev->consumer_work, micscif_rb_trigger_consumer);
+
+	queue_work(scifdev->producer, &scifdev->producer_work);
+	queue_work(scifdev->consumer, &scifdev->consumer_work);
+}
+#endif
diff --git a/micscif/micscif_ports.c b/micscif/micscif_ports.c
new file mode 100644
index 0000000..2a59410
--- /dev/null
+++ b/micscif/micscif_ports.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * Port reservation mechnism.
+ * Since this goes with SCIF it must be available for any OS
+ * and should not consume IP ports. Therefore, roll our own.
+ * This is not required to be high performance, so a simple bit
+ * array should do just fine.
+ *
+ * API specification (loosely):
+ *
+ *   uint16_t port
+ *	Port number is a 16 bit unsigned integer
+ *
+ *   uint16_t rsrv_scif_port(uint16_t)
+ *	reserve specified port #
+ *	returns port #, or 0 if port unavailable.
+ *
+ *   uint16_t get_scif_port(void)
+ *	reserve any available port #
+ *	returns port #, or 0 if no ports available
+ *
+ *   void  put_scif_port(uint16_t)
+ *	release port #
+ *
+ * Reserved ports comes from the lower end of the allocatable range,
+ * and is reserved only in the sense that get_scif_port() won't use
+ * them and there is only a predefined count of them available.
+ */
+
+#include <mic/micscif.h>
+
+/*
+ * Manifests
+ * Port counts must be an integer multiple of 64
+ */
+
+#define	SCIF_PORT_BASE	0x0000	/* Start port (port reserved if 0) */
+#define SCIF_PORT_COUNT	0x10000	/* Ports available */
+
+#if SCIF_PORT_RSVD > (SCIF_PORT_COUNT/2)
+#error	"No more than half of scif ports can be reserved !!"
+#endif
+#if (SCIF_PORT_BASE + SCIF_PORT_COUNT) > (2 << 16)
+#error	"Scif ports cannot exceed 16 bit !!"
+#endif
+
+#include <linux/bitops.h>
+#include <linux/spinlock_types.h>
+static spinlock_t port_lock = __SPIN_LOCK_UNLOCKED(port_lock);
+
+/*
+ * Data structures
+ *  init_array	Flag for initialize (mark as init_code?)
+ *  port_bits	1 bit representing each possible port.
+ *  first_free	Index into port_bits for free area
+ *  port_lock	Lock for exclusive access
+ *  port_rsvd	Total of successful "get/resv" calls.
+ *  port_free	Total of successful "free" calls.
+ *  port_err	Total of unsuccessfull calls.
+ */
+
+#define BITS_PR_PORT	 (8 * sizeof(uint64_t))
+#define PORTS_ARRAY_SIZE ((SCIF_PORT_COUNT + (BITS_PR_PORT - 1)) / BITS_PR_PORT)
+
+
+static int	init_array = 1;
+static uint16_t	first_free;
+static uint64_t	port_bits[PORTS_ARRAY_SIZE];
+static uint64_t	port_rsvd;
+static uint64_t	port_free;
+static uint64_t	port_err;
+
+
+/*
+ * Bitfield handlers.
+ *
+ * Need 3 bit-fiddlers to operate on individual bits within
+ * one 64 bit word in memory (always passing a pointer).
+ * Individual bits are enumerated from 1, allowing for use
+ * of value 0 to indicate an error condition.
+ *
+ * 1) __scif_ffsclr() returns index of first set bit in the
+ *    64 bit word and clears it. A return value 0 means there
+ *    were no set bits in the word.
+ *
+ * 2) __scif_clrbit() clears a specified bit in the 64 bit word
+ *    The bit index is returned if bit was previously set and a
+ *    value 0 is returned if it was previously clear.
+ *
+ * 3) __scif_setbit() sets a specified bit in the 64 bit word.
+ *
+ * Two versions, one should work for you.
+ */
+
+#if 1 && (defined(__GNUC__) || defined(ICC))
+/*
+ * Use GNU style inline assembly for bit operations.
+ *
+ * Gcc complains about uninitialized use of variables
+ * big_bit in ffsclr and avl in clrbit. Generated code
+ * is correct, just haven't figured out the correct
+ * contraints yet.
+ *
+ * gcc -O2:
+ *  __scif_ffsclr:  40 bytes
+ *  __scif_clrbit:  34 bytes
+ *  __scif_setbit:  17 bytes
+ */
+
+static int
+__scif_ffsclr(uint64_t *word)
+{
+	uint64_t  big_bit = 0;
+	uint64_t  field = *word;
+
+	asm volatile (
+		"bsfq %1,%0\n\t"
+		"jnz 1f\n\t"
+		"movq $-1,%0\n"
+		"jmp 2f\n\t"
+		"1:\n\t"
+		"btrq %2,%1\n\t"
+		"2:"
+		: "=r" (big_bit), "=r" (field)
+		: "0" (big_bit),  "1" (field)
+	);
+
+	if (big_bit == -1)
+		return 0;
+
+	*word = field;
+	return big_bit + 1;
+}
+
+static int
+__scif_clrbit(uint64_t *word, uint16_t bit)
+{
+	uint64_t  field = *word;
+	uint64_t  big_bit = bit;
+	int  avl = 0;
+
+	big_bit--;
+	asm volatile (
+		"xorl %2,%2\n\t"
+		"btrq %3,%1\n\t"
+		"rcll $1,%2\n\t"
+		: "=Ir" (big_bit), "=r" (field), "=r" (avl)
+		: "0" (big_bit),   "1" (field),  "2" (avl)
+	);
+
+	*word = field;
+	return avl ? bit : 0;
+}
+
+static void
+__scif_setbit(uint64_t *word, uint16_t bit)
+{
+	uint64_t  field = *word;
+	uint64_t  big_bit = bit;
+
+	big_bit--;
+	asm volatile (
+		"btsq %2,%1"
+		: "=r" (field)
+		: "0" (field), "Jr" (big_bit)
+	);
+
+	*word = field;
+}
+#else
+/*
+ * C inliners for bit operations.
+ *
+ * gcc -O2:
+ *  __scif_ffsclr:  50 bytes
+ *  __scif_clrbit:  45 bytes
+ *  __scif_setbit:  18 bytes
+ *
+ * WARNING:
+ *  1) ffsll() may be glibc specific
+ *  2) kernel ffs() use cmovz instruction that may not
+ *     work in uOS kernel (see arch/x86/include/asm/bitops.h)
+ *
+ */
+
+
+static int
+__scif_ffsclr(uint64_t *word)
+{
+	int	bit;
+/*
+ *  ffsll()		Find 1st bit in 64 bit word
+ */
+
+	bit = ffsll(*word);
+	if (bit)
+		*word &= ~(1LL << (bit - 1));
+
+	return bit;
+}
+
+static int
+__scif_clrbit(uint64_t *word, uint16_t bit)
+{
+	uint64_t msk = (1LL << (bit - 1));
+
+	if (*word & msk) {
+		*word &= ~msk;
+		return bit;
+	}
+	return 0;
+}
+
+static void
+__scif_setbit(uint64_t *word, uint16_t bit)
+{
+	*word |= (1LL << (bit - 1));
+}
+#endif
+
+
+static void
+init_scif_array(void)
+{
+	spin_lock(&port_lock);
+	if (init_array) {
+		int i;
+		for (i = 0; i < PORTS_ARRAY_SIZE; i++)
+			port_bits[i] = ~0;
+		first_free = SCIF_PORT_RSVD / BITS_PR_PORT;
+		if (!SCIF_PORT_BASE)
+			port_bits[0] ^= 1;
+		port_rsvd = 0;
+		port_free = 0;
+		port_err = 0;
+		init_array = 0;
+	}
+	spin_unlock(&port_lock);
+	pr_debug("SCIF port array init:\n"
+			"  %d ports available starting at %d, %d reserved\n"
+			"  Array consists of %ld %ld-bit wide integers\n", 
+			SCIF_PORT_BASE ? SCIF_PORT_COUNT : SCIF_PORT_COUNT - 1, 
+			SCIF_PORT_BASE ? SCIF_PORT_BASE : 1, SCIF_PORT_RSVD, 
+			PORTS_ARRAY_SIZE, BITS_PR_PORT);
+}
+
+
+/*
+ * Reserve a specified port for SCIF
+ * TBD: doxyfy this header
+ */
+uint16_t
+rsrv_scif_port(uint16_t port)
+{
+	uint16_t port_ix;
+
+	if (!port) {
+		pr_debug("rsrv_scif_port: invalid port %d\n", port);
+		port_err++;
+		return 0;
+	}
+
+	if (init_array)
+		init_scif_array();
+
+	port -= SCIF_PORT_BASE;
+	port_ix = port / BITS_PR_PORT;
+
+	spin_lock(&port_lock);
+	port = __scif_clrbit(port_bits + port_ix, 1 + (port % BITS_PR_PORT));
+	if (port) {
+		port = port - 1 + BITS_PR_PORT * port_ix + SCIF_PORT_BASE;
+		port_rsvd++;
+	} else {
+		port_err++;
+	}
+	spin_unlock(&port_lock);
+
+	return port;
+}
+
+
+/*
+ * Get and reserve any port # for SCIF
+ * TBD: doxyfy this header
+ */
+uint16_t
+get_scif_port(void)
+{
+	uint16_t	port;
+
+	if (init_array)
+		init_scif_array();
+
+	spin_lock(&port_lock);
+	if (first_free >= PORTS_ARRAY_SIZE) {	/* Pool is empty */
+		port = 0;
+		port_err++;
+		goto out;
+	}
+	port = __scif_ffsclr(port_bits + first_free);
+	if (port) {
+		port = port - 1 + BITS_PR_PORT * first_free + SCIF_PORT_BASE;
+		while ((first_free < PORTS_ARRAY_SIZE) && !port_bits[first_free])
+			first_free++;
+		port_rsvd++;
+	} else
+		port_err++;
+out:
+	spin_unlock(&port_lock);
+	return port;
+}
+
+
+/*
+ * Release a reserved port # for SCIF
+ * For now, just ignore release on unreserved port
+ * TBD: doxyfy this header
+ */
+
+void
+put_scif_port(uint16_t port)
+{
+	uint16_t port_ix;
+
+	if (!port) {
+		pr_debug("put_scif_port: invalid port %d\n", port);
+		port_err++;
+		return;
+	}
+
+	port -= SCIF_PORT_BASE;
+	port_ix = port / BITS_PR_PORT;
+
+	spin_lock(&port_lock);
+	__scif_setbit(port_bits + port_ix, 1 + (port % BITS_PR_PORT));
+	if (port >= SCIF_PORT_RSVD && port_ix < first_free)
+		first_free = port_ix;
+	port_free++;
+	spin_unlock(&port_lock);
+}
+
diff --git a/micscif/micscif_rb.c b/micscif/micscif_rb.c
new file mode 100644
index 0000000..3fdbf8f
--- /dev/null
+++ b/micscif/micscif_rb.c
@@ -0,0 +1,372 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic/micscif.h"
+#include "mic/micscif_rb.h"
+
+#include <linux/circ_buf.h>
+#include <linux/module.h>
+#define count_in_ring(head, tail, size)    CIRC_CNT(head, tail, size)
+#define space_in_ring(head, tail, size)    CIRC_SPACE(head, tail, size)
+
+MODULE_LICENSE("GPL");
+
+static void *micscif_rb_get(struct micscif_rb *rb, uint32_t size);
+
+/**
+ * micscif_rb_init - To Initialize the RingBuffer
+ * @rb: The RingBuffer context
+ * @read_ptr: A pointer to the memory location containing
+ * the updated read pointer
+ * @write_ptr: A pointer to the memory location containing
+ * the updated write pointer
+ * @rb_base: The pointer to the ring buffer
+ * @size: The size of the ring buffer
+ */
+void micscif_rb_init(struct micscif_rb *rb,
+		volatile uint32_t *read_ptr,
+		volatile uint32_t *write_ptr,
+		volatile void *rb_base,
+		const uint32_t size)
+{
+	/* Size must be a power of two -- all logic assoicated with
+	 * incrementing the read and write pointers relies on the size
+	 * being a power of 2
+	 */
+	BUG_ON((size & (size-1)) != 0);
+	rb->rb_base = rb_base;
+	rb->size = size;
+	rb->read_ptr = read_ptr;
+	rb->write_ptr = write_ptr;
+	rb->current_read_offset = *read_ptr;
+	rb->current_write_offset = *write_ptr;
+}
+EXPORT_SYMBOL(micscif_rb_init);
+
+/**
+ * micscif_rb_reset - To reset the RingBuffer
+ * @rb - The RingBuffer context
+ */
+void micscif_rb_reset(struct micscif_rb *rb)
+{
+	/*
+	 * XPU_RACE_CONDITION: write followed by read
+	 * MFENCE after write
+	 * Read should take care of SBOX sync
+	 * Ponters are volatile (see RingBuffer declaration)
+	 */
+	*rb->read_ptr = 0x0;
+	*rb->write_ptr = 0x0;
+	smp_mb();
+	rb->current_write_offset = *rb->write_ptr;
+	rb->current_read_offset = *rb->read_ptr;
+}
+EXPORT_SYMBOL(micscif_rb_reset);
+
+/* Copies a message to the ring buffer -- handles the wrap around case */
+static int memcpy_torb(struct micscif_rb *rb, void *header,
+			void *msg, uint32_t size)
+{
+	/* Need to call two copies if it wraps around */
+	uint32_t size1, size2;
+	if ((char*)header + size >= (char*)rb->rb_base + rb->size) {
+		size1 = (uint32_t) ( ((char*)rb->rb_base + rb->size) - (char*)header);
+		size2 = size - size1;
+		memcpy_toio(header, msg, size1);
+		memcpy_toio(rb->rb_base, (char*)msg+size1, size2);
+	} else {
+		memcpy_toio(header, msg, size);
+	}
+	return 0;
+}
+
+/* Copies a message from the ring buffer -- handles the wrap around case */
+static int memcpy_fromrb(struct micscif_rb *rb, void *header,
+			void *msg, uint32_t size)
+{
+	/* Need to call two copies if it wraps around */
+	uint32_t size1, size2;
+	if ((char*)header + size >= (char*)rb->rb_base + rb->size) {
+		size1 = (uint32_t) ( ((char*)rb->rb_base + rb->size) - (char*)header );
+		size2 = size - size1;
+		memcpy_fromio(msg, header, size1);
+		memcpy_fromio((char*)msg+size1, rb->rb_base, size2);
+	} else {
+		memcpy_fromio(msg, header, size);
+	}
+	return 0;
+}
+
+/**
+ * micscif_rb_space -
+ * Query space available for writing to the given RB.
+ *
+ * @rb - The RingBuffer context
+ *
+ * Returns: size available for writing to RB in bytes.
+ */
+int micscif_rb_space(struct micscif_rb *rb)
+{
+	rb->old_current_read_offset = rb->current_read_offset;
+
+	rb->current_read_offset = *rb->read_ptr;
+	return space_in_ring(rb->current_write_offset,
+		rb->current_read_offset, rb->size);
+}
+EXPORT_SYMBOL(micscif_rb_space);
+
+/**
+ * micscif_rb_write - Write one package to the given ring buffer
+ * @rb - The RingBuffer context
+ * @msg - The package to be put in the ring buffer
+ * @size - the size (in bytes) you want to copy
+ *
+ * This API does not block if there isn't enough space in the RB.
+ */
+int micscif_rb_write(struct micscif_rb *rb,
+			void *msg,
+			uint32_t size)
+{
+	void *header;
+	int ret = 0;
+	if ((uint32_t)micscif_rb_space(rb) < size)
+		return -ENOMEM;
+	header = (char*)rb->rb_base + rb->current_write_offset;
+	ret = memcpy_torb(rb, header, msg, size);
+	if (!ret) {
+		/*
+		 * XPU_RACE_CONDITION: Don't do anything here!
+		 * Wait until micscif_rb_commit()
+		 * Update the local ring buffer data, not the shared data until commit.
+		 */
+		rb->old_current_write_offset = rb->current_write_offset;
+		rb->current_write_offset = (rb->current_write_offset + size) & (rb->size - 1);
+	}
+	return ret;
+}
+EXPORT_SYMBOL(micscif_rb_write);
+
+/*
+ * micscif_rb_get_next
+ * Read from ring buffer.
+ * @rb - The RingBuffer context
+ * @msg - buffer to hold the message.  Must be at least size bytes long
+ * @size - Size to be read out passed in, actual bytes read
+ *          is returned.
+ * RETURN:
+ * Returns the number of bytes possible to read -- if retVal != size, then
+ * the read does not occur.
+ */
+int micscif_rb_get_next (struct micscif_rb *rb, void *msg, uint32_t size)
+{
+	void *header = NULL;
+	int read_size = 0;
+	/*
+	 * warning:  RingBufferGet() looks at the shared write pointer
+	 */
+	header = micscif_rb_get(rb, size);
+	if (header) {
+		uint32_t next_cmd_offset =
+			(rb->current_read_offset + size) & (rb->size - 1);
+		read_size = size;
+		rb->old_current_read_offset = rb->current_read_offset;
+		rb->current_read_offset = next_cmd_offset;
+		if (memcpy_fromrb(rb, header, msg, size))  // add check here
+			return -EFAULT;
+	}
+	return read_size;
+}
+EXPORT_SYMBOL(micscif_rb_get_next);
+
+/**
+ * micscif_rb_update_read_ptr
+ * @rb - The RingBuffer context
+ */
+void micscif_rb_update_read_ptr(struct micscif_rb *rb)
+{
+	uint32_t old_offset;
+	uint32_t new_offset;
+	smp_mb();
+	old_offset = rb->old_current_read_offset;
+	new_offset = rb->current_read_offset;
+
+	/*
+	 * XPU_RACE_CONDITION:
+	 * pReadPointer is ready to move
+	 * Moving read pointer transfers ownership to MIC
+	 * What if MICCPU starts writing to buffer before all
+	 * writes were flushed?
+	 * Need to flush out all pending writes before pointer update
+	 */
+	smp_mb();
+
+#ifdef CONFIG_ML1OM
+	serializing_request((volatile uint8_t*) rb->rb_base+old_offset);
+#endif
+
+	*rb->read_ptr = new_offset;
+#ifdef CONFIG_ML1OM
+	/*
+	 * Readback since KNF doesn't guarantee that PCI ordering is maintained.
+	 * Need a memory barrier on the host before the readback so the readback
+	 * doesn't load from the write combining buffer but will go across to the
+	 * PCI bus that will then flush the posted write to the device.
+	 */
+	smp_mb();
+	serializing_request(rb->read_ptr);
+#endif
+#if defined(CONFIG_MK1OM) && defined(_MIC_SCIF_)
+	/*
+	 * KNC Si HSD 3853952: For the case where a Core is performing an EXT_WR
+	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+	 * same address with the same data before it does the Doorbell Write.
+	 * This way, if ordering is violate for the Interrupt Message, it will
+	 * fall just behind the first Posted associated with the first EXT_WR.
+	 */
+	*rb->read_ptr = new_offset;
+#endif
+	smp_mb();
+}
+EXPORT_SYMBOL(micscif_rb_update_read_ptr);
+
+/**
+ * micscif_rb_count
+ * @rb - The RingBuffer context
+ * RETURN: number of empty slots in the RB
+ */
+uint32_t micscif_rb_count(struct micscif_rb *rb, uint32_t size)
+{
+	if (count_in_ring(rb->current_write_offset,
+			rb->current_read_offset,
+			rb->size) < size) {
+		/*
+		 * Update from the HW write pointer if empty
+		 */
+		rb->old_current_write_offset = rb->current_write_offset;
+		rb->current_write_offset = *rb->write_ptr;
+	}
+	return count_in_ring(rb->current_write_offset,
+			rb->current_read_offset,
+			rb->size);
+}
+EXPORT_SYMBOL(micscif_rb_count);
+
+/**
+ * micscif_rb_commit
+ * To submit the buffer to let the uOS to fetch it
+ * @rb - The RingBuffer context
+ */
+void micscif_rb_commit(struct micscif_rb *rb)
+{
+	/*
+	 * XPU_RACE_CONDITION:
+	 * Writing to ringbuffer memory before updating the pointer
+	 * can be out-of-order and write combined.
+	 * This is the point where we start to care about
+	 * consistency of the data.
+	 * There are two race conditions below:
+	 * (1) Ring buffer pointer moves before all data is flushed:
+	 * if uOS is late taking the interrupt for the previous transaction,
+	 * it may take the new write pointer immediately
+	 * and start accessing data in the ringbuffer.
+	 * Ring buffer data must be consistent before we update the write
+	 * pointer. We read back the address at oldCurrentWriteOffset
+	 * -- this is the location in memory written during the last
+	 * ring buffer operation; keep in mind that ring buffers and ring buffer
+	 * pointers can be in different kinds of memory (host vs MIC,
+	 * depending on currently active workaround flags.
+	 * (2) If uOS takes interrupt while write pointer value is still
+	 * in-flight may result in uOS reading old value, message being lost,
+	 * and the deadlock. Must put another memory barrier after readback --
+	 * revents read-passing-read from later read
+	 */
+	smp_mb();
+#ifdef CONFIG_ML1OM
+	/*
+	 * Also makes sure the following read is not reordered
+	 */
+	serializing_request((char*)rb->rb_base + rb->current_write_offset);
+#endif
+	*rb->write_ptr = rb->current_write_offset;
+#ifdef CONFIG_ML1OM
+	/*
+	 * Readback since KNF doesn't guarantee that PCI ordering is maintained.
+	 * Need a memory barrier on the host before the readback so the readback
+	 * doesn't load from the write combining buffer but will go across to the
+	 * PCI bus that will then flush the posted write to the device.
+	 */
+	smp_mb();
+	serializing_request(rb->write_ptr);
+#endif
+#if defined(CONFIG_MK1OM) && defined(_MIC_SCIF_)
+	/*
+	 * KNC Si HSD 3853952: For the case where a Core is performing an EXT_WR
+	 * followed by a Doorbell Write, the Core must perform two EXT_WR to the
+	 * same address with the same data before it does the Doorbell Write.
+	 * This way, if ordering is violate for the Interrupt Message, it will
+	 * fall just behind the first Posted associated with the first EXT_WR.
+	 */
+	*rb->write_ptr = rb->current_write_offset;
+#endif
+	smp_mb();
+}
+EXPORT_SYMBOL(micscif_rb_commit);
+
+/**
+ * micscif_rb_get
+ * To get next packet from the ring buffer
+ * @rb - The RingBuffer context
+ * RETURN:
+ * NULL if no packet in the ring buffer
+ * Otherwise The pointer of the next packet
+ */
+static void *micscif_rb_get(struct micscif_rb *rb, uint32_t size)
+{
+	void *header = NULL;
+
+	if (micscif_rb_count(rb, size) >= size)
+		header = (char*)rb->rb_base + rb->current_read_offset;
+	return header;
+}
+
+/**
+ * micscif_rb_get_version
+ * Return the ring buffer module version
+ */
+uint16_t micscif_rb_get_version(void)
+{
+	return RING_BUFFER_VERSION;
+}
+EXPORT_SYMBOL(micscif_rb_get_version);
diff --git a/micscif/micscif_rma.c b/micscif/micscif_rma.c
new file mode 100644
index 0000000..9c6de2e
--- /dev/null
+++ b/micscif/micscif_rma.c
@@ -0,0 +1,2633 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic/micscif.h"
+#include "mic/micscif_smpt.h"
+#include "mic/micscif_kmem_cache.h"
+#include "mic/micscif_rma_list.h"
+#ifndef _MIC_SCIF_
+#include "mic_common.h"
+#endif
+#include "mic/mic_dma_api.h"
+#include "mic/micscif_map.h"
+
+bool mic_reg_cache_enable = 0;
+
+bool mic_huge_page_enable = 1;
+
+#ifdef _MIC_SCIF_
+mic_dma_handle_t mic_dma_handle;
+#endif
+static inline
+void micscif_rma_destroy_tcw(struct rma_mmu_notifier *mmn,
+						struct endpt *ep, bool inrange,
+						uint64_t start, uint64_t len);
+#ifdef CONFIG_MMU_NOTIFIER
+static void scif_mmu_notifier_release(struct mmu_notifier *mn,
+			struct mm_struct *mm);
+static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+				struct mm_struct *mm,
+				unsigned long address);
+static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+				       struct mm_struct *mm,
+				       unsigned long start, unsigned long end);
+static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+				     struct mm_struct *mm,
+				     unsigned long start, unsigned long end);
+static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
+	.release = scif_mmu_notifier_release,
+	.clear_flush_young = NULL,
+	.change_pte = NULL,/*TODO*/
+	.invalidate_page = scif_mmu_notifier_invalidate_page,
+	.invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
+	.invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
+
+static void scif_mmu_notifier_release(struct mmu_notifier *mn,
+			struct mm_struct *mm)
+{
+	struct endpt *ep;
+	struct rma_mmu_notifier	*mmn;
+	mmn = container_of(mn, struct rma_mmu_notifier, ep_mmu_notifier);
+	ep = mmn->ep;
+	micscif_rma_destroy_tcw(mmn, ep, false, 0, 0);
+	pr_debug("%s\n", __func__);
+	return;
+}
+
+static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
+				struct mm_struct *mm,
+				unsigned long address)
+{
+	struct endpt *ep;
+	struct rma_mmu_notifier	*mmn;
+	mmn = container_of(mn, struct rma_mmu_notifier, ep_mmu_notifier);
+	ep = mmn->ep;
+	micscif_rma_destroy_tcw(mmn, ep, true, address, PAGE_SIZE);
+	pr_debug("%s address 0x%lx\n", __func__, address);
+	return;
+}
+
+static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+				       struct mm_struct *mm,
+				       unsigned long start, unsigned long end)
+{
+	struct endpt *ep;
+	struct rma_mmu_notifier	*mmn;
+	mmn = container_of(mn, struct rma_mmu_notifier, ep_mmu_notifier);
+	ep = mmn->ep;
+	micscif_rma_destroy_tcw(mmn, ep, true, (uint64_t)start, (uint64_t)(end - start));
+	pr_debug("%s start=%lx, end=%lx\n", __func__, start, end);
+	return;
+}
+
+static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
+				     struct mm_struct *mm,
+				     unsigned long start, unsigned long end)
+{
+	/* Nothing to do here, everything needed was done in invalidate_range_start */
+	pr_debug("%s\n", __func__);
+	return;
+}
+#endif
+
+#ifdef CONFIG_MMU_NOTIFIER
+void ep_unregister_mmu_notifier(struct endpt *ep)
+{
+	struct endpt_rma_info *rma = &ep->rma_info;
+	struct rma_mmu_notifier *mmn = NULL;
+	struct list_head *item, *tmp;
+	mutex_lock(&ep->rma_info.mmn_lock);
+	list_for_each_safe(item, tmp, &rma->mmn_list) {
+		mmn = list_entry(item, 
+			struct rma_mmu_notifier, list_member);
+		mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
+#ifdef RMA_DEBUG
+		BUG_ON(atomic_long_sub_return(1, &ms_info.mmu_notif_cnt) < 0);
+#endif
+		list_del(item);
+		kfree(mmn);
+	}
+	mutex_unlock(&ep->rma_info.mmn_lock);
+}
+
+static void init_mmu_notifier(struct rma_mmu_notifier *mmn, struct mm_struct *mm, struct endpt *ep)
+{
+	mmn->ep = ep;
+	mmn->mm = mm;
+	mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
+	INIT_LIST_HEAD(&mmn->list_member);
+	INIT_LIST_HEAD(&mmn->tc_reg_list);
+}
+
+static struct rma_mmu_notifier *find_mmu_notifier(struct mm_struct *mm, struct endpt_rma_info *rma)
+{
+	struct rma_mmu_notifier *mmn;
+	struct list_head *item;
+	list_for_each(item, &rma->mmn_list) {
+		mmn = list_entry(item, 
+			struct rma_mmu_notifier, list_member);
+		if (mmn->mm == mm)
+			return mmn;
+	}
+	return NULL;
+}
+#endif
+
+/**
+ * micscif_rma_ep_init:
+ * @ep: end point
+ *
+ * Initialize RMA per EP data structures.
+ */
+int micscif_rma_ep_init(struct endpt *ep)
+{
+	int ret;
+	struct endpt_rma_info *rma = &ep->rma_info;
+
+	mutex_init (&rma->rma_lock);
+	if ((ret = va_gen_init(&rma->va_gen,
+		VA_GEN_MIN, VA_GEN_RANGE)) < 0)
+		goto init_err;
+	spin_lock_init(&rma->tc_lock);
+	mutex_init (&rma->mmn_lock);
+	mutex_init (&rma->va_lock);
+	INIT_LIST_HEAD(&rma->reg_list);
+	INIT_LIST_HEAD(&rma->remote_reg_list);
+	atomic_set(&rma->tw_refcount, 0);
+	atomic_set(&rma->tw_total_pages, 0);
+	atomic_set(&rma->tcw_refcount, 0);
+	atomic_set(&rma->tcw_total_pages, 0);
+	init_waitqueue_head(&rma->fence_wq);
+	rma->fence_refcount = 0;
+	rma->async_list_del = 0;
+	rma->dma_chan = NULL;
+	INIT_LIST_HEAD(&rma->mmn_list);
+	INIT_LIST_HEAD(&rma->task_list);
+init_err:
+	return ret;
+}
+
+/**
+ * micscif_rma_ep_can_uninit:
+ * @ep: end point
+ *
+ * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
+ */
+int micscif_rma_ep_can_uninit(struct endpt *ep)
+{
+	int ret = 0;
+
+	/* Destroy RMA Info only if both lists are empty */
+	if (list_empty(&ep->rma_info.reg_list) &&
+		list_empty(&ep->rma_info.remote_reg_list) &&
+#ifdef CONFIG_MMU_NOTIFIER
+		list_empty(&ep->rma_info.mmn_list) &&
+#endif
+		!atomic_read(&ep->rma_info.tw_refcount) &&
+		!atomic_read(&ep->rma_info.tcw_refcount))
+		ret = 1;
+	return ret;
+}
+
+#ifdef _MIC_SCIF_
+/**
+ * __micscif_setup_proxy_dma:
+ * @ep: SCIF endpoint descriptor.
+ *
+ * Sets up data structures for P2P Proxy DMAs.
+ */
+static int __micscif_setup_proxy_dma(struct endpt *ep)
+{
+	struct endpt_rma_info *rma = &ep->rma_info;
+	int err = 0;
+	uint64_t *tmp = NULL;
+
+	mutex_lock(&rma->rma_lock);
+	if (is_p2p_scifdev(ep->remote_dev) && !rma->proxy_dma_va) {
+		if (!(tmp = scif_zalloc(PAGE_SIZE))) {
+			err = -ENOMEM;
+			goto error;
+		}
+		if ((err = map_virt_into_aperture(&rma->proxy_dma_phys,
+						tmp,
+						ep->remote_dev, PAGE_SIZE))) {
+			scif_free(tmp, PAGE_SIZE);
+			goto error;
+		}
+		*tmp = OP_IDLE;
+		rma->proxy_dma_va = tmp;
+	}
+error:
+	mutex_unlock(&rma->rma_lock);
+	return err;
+}
+
+static __always_inline int micscif_setup_proxy_dma(struct endpt *ep)
+{
+	if (ep->rma_info.proxy_dma_va)
+		return 0;
+
+	return __micscif_setup_proxy_dma(ep);
+}
+
+/**
+ * micscif_teardown_proxy_dma:
+ * @ep: SCIF endpoint descriptor.
+ *
+ * Tears down data structures setup for P2P Proxy DMAs.
+ */
+void micscif_teardown_proxy_dma(struct endpt *ep)
+{
+	struct endpt_rma_info *rma = &ep->rma_info;
+	mutex_lock(&rma->rma_lock);
+	if (rma->proxy_dma_va) {
+		unmap_from_aperture(rma->proxy_dma_phys, ep->remote_dev, PAGE_SIZE);
+		scif_free(rma->proxy_dma_va, PAGE_SIZE);
+		rma->proxy_dma_va = NULL;
+	}
+	mutex_unlock(&rma->rma_lock);
+}
+
+/**
+ * micscif_proxy_dma:
+ * @ep: SCIF endpoint descriptor.
+ * @copy_work: DMA copy work information.
+ *
+ * This API does the following:
+ * 1) Sends the peer a SCIF Node QP message with the information
+ * required to program a proxy DMA to covert a P2P Read to a Write
+ * which will initiate a DMA transfer from the peer card to self.
+ * The reason for this special code path is KNF and KNC P2P read
+ * performance being much lower than P2P write performance on Crown
+ * Pass platforms.
+ * 2) Poll for an update of the known proxy dma VA to OP_COMPLETED
+ * via a SUD by the peer.
+ */
+static int micscif_proxy_dma(scif_epd_t epd, struct mic_copy_work *work)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	struct nodemsg msg;
+	unsigned long ts = jiffies;
+	struct endpt_rma_info *rma = &ep->rma_info;
+	int err;
+	volatile uint64_t *proxy_dma_va = rma->proxy_dma_va;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	/*
+	 * Bail out if there is a Proxy DMA already in progress
+	 * for this endpoint. The callee will fallback on self
+	 * DMAs upon an error.
+	 */
+	if (*proxy_dma_va != OP_IDLE) {
+		mutex_unlock(&ep->rma_info.rma_lock);
+		err = -EBUSY;
+		goto error;
+	}
+	*proxy_dma_va = OP_IN_PROGRESS;
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	msg.src = ep->port;
+	msg.uop = work->ordered ? SCIF_PROXY_ORDERED_DMA : SCIF_PROXY_DMA;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = work->src_offset;
+	msg.payload[2] = work->dst_offset;
+	msg.payload[3] = work->len;
+
+	if ((err = micscif_nodeqp_send(ep->remote_dev, &msg, ep)))
+		goto error_init_va;
+
+	while (*proxy_dma_va != OP_COMPLETED) {
+		schedule();
+		if (time_after(jiffies,
+			ts + NODE_ALIVE_TIMEOUT)) {
+			err = -EBUSY;
+			goto error_init_va;
+		}
+	}
+	err = 0;
+error_init_va:
+	*proxy_dma_va = OP_IDLE;
+error:
+	return err;
+}
+#endif
+
+/**
+ * micscif_create_pinned_pages:
+ * @nr_pages: number of pages in window
+ * @prot: read/write protection
+ *
+ * Allocate and prepare a set of pinned pages.
+ */
+struct scif_pinned_pages *micscif_create_pinned_pages(int nr_pages, int prot)
+{
+	struct scif_pinned_pages *pinned_pages;
+
+	might_sleep();
+	if (!(pinned_pages = scif_zalloc(sizeof(*pinned_pages))))
+		goto error;
+
+	if (!(pinned_pages->pages = scif_zalloc(nr_pages *
+			sizeof(*(pinned_pages->pages)))))
+		goto error_free_pinned_pages;
+
+	if (!(pinned_pages->num_pages = scif_zalloc(nr_pages *
+			sizeof(*(pinned_pages->num_pages)))))
+		goto error_free_pages;
+
+#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_HUGETLB_PAGE) && !defined(_MIC_SCIF_)
+	if (!(pinned_pages->vma = scif_zalloc(nr_pages *
+			sizeof(*(pinned_pages->vma)))))
+		goto error_free_num_pages;
+#endif
+
+	pinned_pages->prot = prot;
+	pinned_pages->magic = SCIFEP_MAGIC;
+	pinned_pages->nr_contig_chunks = 0;
+	return pinned_pages;
+
+#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_HUGETLB_PAGE) && !defined(_MIC_SCIF_)
+error_free_num_pages:
+	scif_free(pinned_pages->num_pages,
+		pinned_pages->nr_pages * sizeof(*(pinned_pages->num_pages)));
+#endif
+error_free_pages:
+	scif_free(pinned_pages->pages,
+		pinned_pages->nr_pages * sizeof(*(pinned_pages->pages)));
+error_free_pinned_pages:
+	scif_free(pinned_pages, sizeof(*pinned_pages));
+error:
+	return NULL;
+}
+
+/**
+ * micscif_destroy_pinned_pages:
+ * @pinned_pages: A set of pinned pages.
+ *
+ * Deallocate resources for pinned pages.
+ */
+int micscif_destroy_pinned_pages(struct scif_pinned_pages *pinned_pages)
+{
+	int j;
+	int writeable = pinned_pages->prot & SCIF_PROT_WRITE;
+	int kernel = SCIF_MAP_KERNEL & pinned_pages->map_flags;
+
+	for (j = 0; j < pinned_pages->nr_pages; j++) {
+		if (pinned_pages->pages[j]) {
+			if (!kernel) {
+				if (writeable)
+					SetPageDirty(pinned_pages->pages[j]);
+#ifdef RMA_DEBUG
+				BUG_ON(!page_count(pinned_pages->pages[j]));
+				BUG_ON(atomic_long_sub_return(1, &ms_info.rma_pin_cnt) < 0);
+#endif
+				page_cache_release(pinned_pages->pages[j]);
+			}
+		}
+	}
+
+#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_HUGETLB_PAGE) && !defined(_MIC_SCIF_)
+	scif_free(pinned_pages->vma,
+		pinned_pages->nr_pages * sizeof(*(pinned_pages->vma)));
+#endif
+	scif_free(pinned_pages->pages,
+		pinned_pages->nr_pages * sizeof(*(pinned_pages->pages)));
+	scif_free(pinned_pages->num_pages,
+		pinned_pages->nr_pages * sizeof(*(pinned_pages->num_pages)));
+	scif_free(pinned_pages, sizeof(*pinned_pages));
+	return 0;
+}
+
+/*
+ * micscif_create_window:
+ * @ep: end point
+ * @pinned_pages: Set of pinned pages which wil back this window.
+ * @offset: offset hint
+ *
+ * Allocate and prepare a self registration window.
+ */
+struct reg_range_t *micscif_create_window(struct endpt *ep,
+			int64_t nr_pages, uint64_t offset, bool temp)
+{
+	struct reg_range_t *window;
+
+	might_sleep();
+	if (!(window = scif_zalloc(sizeof(struct reg_range_t))))
+		goto error;
+
+#ifdef CONFIG_ML1OM
+	if (!temp) {
+		if (!(window->phys_addr = scif_zalloc(nr_pages *
+			sizeof(*(window->phys_addr)))))
+			goto error_free_window;
+
+		if (!(window->temp_phys_addr = scif_zalloc(nr_pages *
+			sizeof(*(window->temp_phys_addr)))))
+			goto error_free_window;
+	}
+#endif
+
+	if (!(window->dma_addr = scif_zalloc(nr_pages *
+			sizeof(*(window->dma_addr)))))
+		goto error_free_window;
+
+	if (!(window->num_pages = scif_zalloc(nr_pages *
+			sizeof(*(window->num_pages)))))
+		goto error_free_window;
+
+	window->offset = offset;
+	window->ep = (uint64_t)ep;
+	window->magic = SCIFEP_MAGIC;
+	window->reg_state = OP_IDLE;
+	init_waitqueue_head(&window->regwq);
+	window->unreg_state = OP_IDLE;
+	init_waitqueue_head(&window->unregwq);
+	INIT_LIST_HEAD(&window->list_member);
+	window->type = RMA_WINDOW_SELF;
+	window->temp = temp;
+#ifdef _MIC_SCIF_
+	micscif_setup_proxy_dma(ep);
+#endif
+	return window;
+
+error_free_window:
+	if (window->dma_addr)
+		scif_free(window->dma_addr, nr_pages * sizeof(*(window->dma_addr)));
+#ifdef CONFIG_ML1OM
+	if (window->temp_phys_addr)
+		scif_free(window->temp_phys_addr, nr_pages * sizeof(*(window->temp_phys_addr)));
+	if (window->phys_addr)
+		scif_free(window->phys_addr, nr_pages * sizeof(*(window->phys_addr)));
+#endif
+	scif_free(window, sizeof(*window));
+error:
+	return NULL;
+}
+
+/**
+ * micscif_destroy_incomplete_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+int micscif_destroy_incomplete_window(struct endpt *ep, struct reg_range_t *window)
+{
+	int err;
+	int64_t nr_pages = window->nr_pages;
+	struct allocmsg *alloc = &window->alloc_handle;
+	struct nodemsg msg;
+
+	RMA_MAGIC(window);
+retry:
+	err = wait_event_timeout(alloc->allocwq, alloc->state != OP_IN_PROGRESS, NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	if (OP_COMPLETED == alloc->state) {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = (uint64_t)window->alloc_handle.vaddr;
+		msg.payload[2] = (uint64_t)window;
+		msg.payload[3] = SCIF_REGISTER;
+		micscif_nodeqp_send(ep->remote_dev, &msg, ep);
+	}
+
+	micscif_free_window_offset(ep, window->offset,
+		window->nr_pages << PAGE_SHIFT);
+	if (window->dma_addr)
+		scif_free(window->dma_addr, nr_pages *
+			sizeof(*(window->dma_addr)));
+	if (window->num_pages)
+		scif_free(window->num_pages, nr_pages *
+			sizeof(*(window->num_pages)));
+#ifdef CONFIG_ML1OM
+	if (window->phys_addr)
+		scif_free(window->phys_addr, window->nr_pages *
+			sizeof(*(window->phys_addr)));
+	if (window->temp_phys_addr)
+		scif_free(window->temp_phys_addr, nr_pages *
+			sizeof(*(window->temp_phys_addr)));
+#endif
+	scif_free(window, sizeof(*window));
+	return 0;
+}
+
+/**
+ * micscif_destroy_window:
+ * @ep: end point
+ * @window: registration window
+ *
+ * Deallocate resources for self window.
+ */
+int micscif_destroy_window(struct endpt *ep, struct reg_range_t *window)
+{
+	int j;
+	struct scif_pinned_pages *pinned_pages = window->pinned_pages;
+	int64_t nr_pages = window->nr_pages;
+
+	might_sleep();
+	RMA_MAGIC(window);
+	if (!window->temp && window->mm) {
+		__scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
+		__scif_release_mm(window->mm);
+		window->mm = NULL;
+	}
+
+	if (!window->offset_freed)
+		micscif_free_window_offset(ep, window->offset,
+			window->nr_pages << PAGE_SHIFT);
+	for (j = 0; j < window->nr_contig_chunks; j++) {
+		if (window->dma_addr[j]) {
+			unmap_from_aperture(
+				window->dma_addr[j],
+				ep->remote_dev,
+				window->num_pages[j] << PAGE_SHIFT);
+		}
+	}
+
+	/*
+	 * Decrement references for this set of pinned pages from
+	 * this window.
+	 */
+	j = atomic_sub_return((int32_t)pinned_pages->nr_pages, 
+				&pinned_pages->ref_count);
+	BUG_ON(j < 0);
+	/*
+	 * If the ref count for pinned_pages is zero then someone
+	 * has already called scif_unpin_pages() for it and we should
+	 * destroy the page cache.
+	 */
+	if (!j)
+		micscif_destroy_pinned_pages(window->pinned_pages);
+	if (window->dma_addr)
+		scif_free(window->dma_addr, nr_pages *
+			sizeof(*(window->dma_addr)));
+	if (window->num_pages)
+		scif_free(window->num_pages, nr_pages *
+			sizeof(*(window->num_pages)));
+#ifdef CONFIG_ML1OM
+	if (window->phys_addr)
+		scif_free(window->phys_addr, window->nr_pages *
+			sizeof(*(window->phys_addr)));
+	if (window->temp_phys_addr)
+		scif_free(window->temp_phys_addr, nr_pages *
+			sizeof(*(window->temp_phys_addr)));
+#endif
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+	return 0;
+}
+
+/**
+ * micscif_create_remote_lookup:
+ * @ep: end point
+ * @window: remote window
+ *
+ * Allocate and prepare lookup entries for the remote
+ * end to copy over the physical addresses.
+ * Returns 0 on success and appropriate errno on failure.
+ */
+int micscif_create_remote_lookup(struct endpt *ep, struct reg_range_t *window)
+{
+	int i, j, err = 0;
+	int64_t nr_pages = window->nr_pages;
+	bool vmalloc_dma_phys;
+#ifdef CONFIG_ML1OM
+	bool vmalloc_temp_phys = false;
+	bool vmalloc_phys = false;
+#endif
+	might_sleep();
+
+	/* Map window */
+	err = map_virt_into_aperture(&window->mapped_offset,
+		window, ep->remote_dev, sizeof(*window));
+	if (err)
+		goto error_window;
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE, 
+					((2) * 1024 * 1024)) >> 21;
+
+	if (!(window->dma_addr_lookup.lookup =
+		scif_zalloc(window->nr_lookup *
+		sizeof(*(window->dma_addr_lookup.lookup)))))
+		goto error_window;
+
+	/* Map DMA physical addess lookup array */
+	err = map_virt_into_aperture(&window->dma_addr_lookup.offset,
+		window->dma_addr_lookup.lookup, ep->remote_dev,
+		window->nr_lookup *
+		sizeof(*window->dma_addr_lookup.lookup));
+	if (err)
+		goto error_window;
+
+	vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
+
+#ifdef CONFIG_ML1OM
+	if (ep->remote_dev != &scif_dev[SCIF_HOST_NODE] && !is_self_scifdev(ep->remote_dev)) {
+		if (!(window->temp_phys_addr_lookup.lookup =
+			scif_zalloc(window->nr_lookup *
+				sizeof(*(window->temp_phys_addr_lookup.lookup)))))
+			goto error_window;
+
+		/* Map physical addess lookup array */
+		err = map_virt_into_aperture(&window->temp_phys_addr_lookup.offset,
+			window->temp_phys_addr_lookup.lookup, ep->remote_dev,
+			window->nr_lookup *
+			sizeof(*window->temp_phys_addr_lookup.lookup));
+		if (err)
+			goto error_window;
+
+		if (!(window->phys_addr_lookup.lookup =
+			scif_zalloc(window->nr_lookup *
+				sizeof(*(window->phys_addr_lookup.lookup)))))
+			goto error_window;
+
+		/* Map physical addess lookup array */
+		err = map_virt_into_aperture(&window->phys_addr_lookup.offset,
+			window->phys_addr_lookup.lookup, ep->remote_dev,
+			window->nr_lookup *
+			sizeof(*window->phys_addr_lookup.lookup));
+		if (err)
+			goto error_window;
+
+		vmalloc_phys = is_vmalloc_addr(&window->phys_addr[0]);
+		vmalloc_temp_phys = is_vmalloc_addr(&window->temp_phys_addr[0]);
+	}
+#endif
+
+	/* Now map each of the pages containing physical addresses */
+	for (i = 0, j = 0; i < nr_pages; i += NR_PHYS_ADDR_IN_PAGE, j++) {
+#ifdef CONFIG_ML1OM
+		if (ep->remote_dev != &scif_dev[SCIF_HOST_NODE] && !is_self_scifdev(ep->remote_dev)) {
+			err = map_page_into_aperture(
+				&window->temp_phys_addr_lookup.lookup[j],
+				vmalloc_temp_phys ?
+					vmalloc_to_page(&window->temp_phys_addr[i]) :
+				virt_to_page(&window->temp_phys_addr[i]),
+				ep->remote_dev);
+			if (err)
+				goto error_window;
+
+			err = map_page_into_aperture(
+				&window->phys_addr_lookup.lookup[j],
+				vmalloc_phys ?
+				vmalloc_to_page(&window->phys_addr[i]) :
+				virt_to_page(&window->phys_addr[i]),
+				ep->remote_dev);
+			if (err)
+				goto error_window;
+		}
+#endif
+		err = map_page_into_aperture(
+			&window->dma_addr_lookup.lookup[j],
+			vmalloc_dma_phys ?
+			vmalloc_to_page(&window->dma_addr[i]) :
+			virt_to_page(&window->dma_addr[i]),
+			ep->remote_dev);
+		if (err)
+			goto error_window;
+	}
+	return 0;
+error_window:
+	return err;
+}
+
+/**
+ * micscif_destroy_remote_lookup:
+ * @ep: end point
+ * @window: remote window
+ *
+ * Destroy lookup entries used for the remote
+ * end to copy over the physical addresses.
+ */
+void micscif_destroy_remote_lookup(struct endpt *ep, struct reg_range_t *window)
+{
+	int i, j;
+
+	RMA_MAGIC(window);
+	if (window->nr_lookup) {
+		for (i = 0, j = 0; i < window->nr_pages;
+			i += NR_PHYS_ADDR_IN_PAGE, j++) {
+			if (window->dma_addr_lookup.lookup &&
+				window->dma_addr_lookup.lookup[j]) {
+				unmap_from_aperture(
+				window->dma_addr_lookup.lookup[j],
+				ep->remote_dev, PAGE_SIZE);
+			}
+		}
+		if (window->dma_addr_lookup.offset) {
+			unmap_from_aperture(
+				window->dma_addr_lookup.offset,
+				ep->remote_dev, window->nr_lookup *
+				sizeof(*window->dma_addr_lookup.lookup));
+		}
+		if (window->dma_addr_lookup.lookup)
+			scif_free(window->dma_addr_lookup.lookup, window->nr_lookup *
+				sizeof(*(window->dma_addr_lookup.lookup)));
+		if (window->mapped_offset) {
+			unmap_from_aperture(window->mapped_offset,
+				ep->remote_dev, sizeof(*window));
+		}
+		window->nr_lookup = 0;
+	}
+}
+
+/**
+ * micscif_create_remote_window:
+ * @ep: end point
+ * @nr_pages: number of pages in window
+ *
+ * Allocate and prepare a remote registration window.
+ */
+struct reg_range_t *micscif_create_remote_window(struct endpt *ep, int nr_pages)
+{
+	struct reg_range_t *window;
+
+	might_sleep();
+	if (!(window = scif_zalloc(sizeof(struct reg_range_t))))
+		goto error_ret;
+
+	window->magic = SCIFEP_MAGIC;
+	window->nr_pages = nr_pages;
+
+#if !defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
+	if (!(window->page_ref_count = scif_zalloc(nr_pages *
+			sizeof(*(window->page_ref_count)))))
+		goto error_window;
+#endif
+
+	if (!(window->dma_addr = scif_zalloc(nr_pages *
+			sizeof(*(window->dma_addr)))))
+		goto error_window;
+
+	if (!(window->num_pages = scif_zalloc(nr_pages *
+			sizeof(*(window->num_pages)))))
+		goto error_window;
+
+#ifdef CONFIG_ML1OM
+	if (!(window->phys_addr = scif_zalloc(nr_pages *
+			sizeof(*(window->phys_addr)))))
+		goto error_window;
+
+	if (!(window->temp_phys_addr = scif_zalloc(nr_pages *
+			sizeof(*(window->temp_phys_addr)))))
+		goto error_window;
+#endif
+
+	if (micscif_create_remote_lookup(ep, window))
+		goto error_window;
+
+	window->ep = (uint64_t)ep;
+	window->type = RMA_WINDOW_PEER;
+	set_window_ref_count(window, nr_pages);
+	window->get_put_ref_count = 0;
+	window->unreg_state = OP_IDLE;
+#if !defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
+	window->gttmap_state = OP_IDLE;
+	init_waitqueue_head(&window->gttmapwq);
+#endif
+#ifdef _MIC_SCIF_
+	micscif_setup_proxy_dma(ep);
+	window->proxy_dma_phys = ep->rma_info.proxy_dma_phys;
+#endif
+	return window;
+error_window:
+	micscif_destroy_remote_window(ep, window);
+error_ret:
+	return NULL;
+}
+
+/**
+ * micscif_destroy_remote_window:
+ * @ep: end point
+ * @window: remote registration window
+ *
+ * Deallocate resources for remote window.
+ */
+void micscif_destroy_remote_window(struct endpt *ep, struct reg_range_t *window)
+{
+	RMA_MAGIC(window);
+	micscif_destroy_remote_lookup(ep, window);
+	if (window->dma_addr)
+		scif_free(window->dma_addr, window->nr_pages *
+				sizeof(*(window->dma_addr)));
+	if (window->num_pages)
+		scif_free(window->num_pages, window->nr_pages *
+				sizeof(*(window->num_pages)));
+#ifdef CONFIG_ML1OM
+	if (window->phys_addr)
+		scif_free(window->phys_addr, window->nr_pages *
+				sizeof(*(window->phys_addr)));
+	if (window->temp_phys_addr)
+		scif_free(window->temp_phys_addr, window->nr_pages *
+				sizeof(*(window->temp_phys_addr)));
+#endif
+
+#if !defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
+	if (window->page_ref_count)
+		scif_free(window->page_ref_count, window->nr_pages *
+				sizeof(*(window->page_ref_count)));
+#endif
+	window->magic = 0;
+	scif_free(window, sizeof(*window));
+}
+
+/**
+ * micscif_map_window_pages:
+ * @ep: end point
+ * @window: self registration window
+ * @tmp_wnd: is a temporary window?
+ *
+ * Map pages of a window into the aperture/PCI.
+ * Also compute physical addresses required for DMA.
+ */
+int micscif_map_window_pages(struct endpt *ep, struct reg_range_t *window, bool tmp_wnd)
+{
+	int j, i, err = 0, nr_pages;
+	scif_pinned_pages_t pinned_pages;
+
+	might_sleep();
+	RMA_MAGIC(window);
+
+	pinned_pages = window->pinned_pages;
+	for (j = 0, i = 0; j < window->nr_contig_chunks; j++, i += nr_pages) {
+		nr_pages = pinned_pages->num_pages[i];
+#ifdef _MIC_SCIF_
+#ifdef CONFIG_ML1OM
+		/* phys_addr[] holds addresses as seen from the remote node
+		 * these addressed are then copied into the remote card's
+		 * window structure
+		 * when the remote node is the host and the card is knf
+		 * these addresses are only created at the point of mapping
+		 * the card physical address into gtt (for the KNC the
+		 * the gtt code path returns the local address)
+		 * when the remote node is loopback - the address remains
+		 * the same
+		 * when the remote node is a kn* - the base address of the local
+		 * card as seen from the remote node is added in
+		 */
+		if (!tmp_wnd) {
+			if(ep->remote_dev != &scif_dev[SCIF_HOST_NODE]) {
+				if ((err = map_virt_into_aperture(
+					&window->temp_phys_addr[j],
+					phys_to_virt(page_to_phys(pinned_pages->pages[i])),
+					ep->remote_dev,
+					nr_pages << PAGE_SHIFT))) {
+					int k,l;
+
+					for (l = k = 0; k < i; l++) {
+						nr_pages = pinned_pages->num_pages[k];
+						window->temp_phys_addr[l]
+							&= ~RMA_HUGE_NR_PAGE_MASK;
+						unmap_from_aperture(
+							window->temp_phys_addr[l],
+							ep->remote_dev,
+							nr_pages << PAGE_SHIFT);
+						k += nr_pages;
+						window->temp_phys_addr[l] = 0;
+					}
+					return err;
+				}
+				if (!tmp_wnd)
+					RMA_SET_NR_PAGES(window->temp_phys_addr[j], nr_pages);
+			}
+		}
+#endif
+		window->dma_addr[j] =
+			page_to_phys(pinned_pages->pages[i]);
+		if (!tmp_wnd)
+			RMA_SET_NR_PAGES(window->dma_addr[j], nr_pages);
+#else
+		err = map_virt_into_aperture(&window->dma_addr[j],
+			phys_to_virt(page_to_phys(pinned_pages->pages[i])),
+			ep->remote_dev, nr_pages << PAGE_SHIFT);
+		if (err)
+			return err;
+		if (!tmp_wnd)
+			RMA_SET_NR_PAGES(window->dma_addr[j], nr_pages);
+#endif
+		window->num_pages[j] = nr_pages;
+	}
+	return err;
+}
+
+
+/**
+ * micscif_unregister_window:
+ * @window: self registration window
+ *
+ * Send an unregistration request and wait for a response.
+ */
+int micscif_unregister_window(struct reg_range_t *window)
+{
+	int err = 0;
+	struct endpt *ep = (struct endpt *)window->ep;
+	bool send_msg = false;
+
+	might_sleep();
+	BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock));
+
+	switch (window->unreg_state) {
+	case OP_IDLE:
+	{
+		window->unreg_state = OP_IN_PROGRESS;
+		send_msg = true;
+		/* fall through */
+	}
+	case OP_IN_PROGRESS:
+	{
+		get_window_ref_count(window, 1);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if (send_msg && (err = micscif_send_scif_unregister(ep, window))) {
+			window->unreg_state = OP_COMPLETED;
+			goto done;
+		}
+retry:
+		err = wait_event_timeout(window->unregwq, 
+			window->unreg_state != OP_IN_PROGRESS, NODE_ALIVE_TIMEOUT);
+		if (!err && scifdev_alive(ep))
+			goto retry;
+		if (!err) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+			printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+		}
+		if (err > 0)
+			err = 0;
+done:
+		mutex_lock(&ep->rma_info.rma_lock);
+		put_window_ref_count(window, 1);
+		break;
+	}
+	case OP_FAILED:
+	{
+		if (!scifdev_alive(ep)) {
+			err = -ENODEV;
+			window->unreg_state = OP_COMPLETED;
+		}
+		break;
+	}
+	case OP_COMPLETED:
+		break;
+	default:
+		/* Invalid opcode? */
+		BUG_ON(1);
+	}
+
+	if (OP_COMPLETED == window->unreg_state &&
+			window->ref_count)
+		put_window_ref_count(window, window->nr_pages);
+
+	if (!window->ref_count) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		atomic_add_return((int32_t)window->nr_pages, &ep->rma_info.tw_total_pages);
+		list_del(&window->list_member);
+		micscif_free_window_offset(ep, window->offset,
+				window->nr_pages << PAGE_SHIFT);
+		window->offset_freed = true;
+		mutex_unlock(&ep->rma_info.rma_lock);
+		if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL))
+			&& scifdev_alive(ep)) {
+			drain_dma_intr(ep->rma_info.dma_chan);
+		} else {
+			if (!__scif_dec_pinned_vm_lock(window->mm,
+						  window->nr_pages, 1)) {
+				__scif_release_mm(window->mm);
+				window->mm = NULL;
+			}
+		}
+		micscif_queue_for_cleanup(window, &ms_info.mi_rma);
+		mutex_lock(&ep->rma_info.rma_lock);
+	}
+	return err;
+}
+
+/**
+ * micscif_send_alloc_request:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request
+ */
+int micscif_send_alloc_request(struct endpt *ep, struct reg_range_t *window)
+{
+	struct nodemsg msg;
+	struct allocmsg *alloc = &window->alloc_handle;
+
+	/* Set up the Alloc Handle */
+	alloc->uop = SCIF_REGISTER;
+	alloc->state = OP_IN_PROGRESS;
+	init_waitqueue_head(&alloc->allocwq);
+
+	/* Send out an allocation request */
+	msg.uop = SCIF_ALLOC_REQ;
+	msg.src = ep->port;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = window->nr_pages;
+	msg.payload[2] = (uint64_t)&window->alloc_handle;
+	msg.payload[3] = SCIF_REGISTER;
+	return micscif_nodeqp_send(ep->remote_dev, &msg, ep);
+}
+
+/**
+ * micscif_prep_remote_window:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a remote window allocation request, wait for an allocation response,
+ * prepare the remote window and notify the peer to unmap it once done.
+ */
+int micscif_prep_remote_window(struct endpt *ep, struct reg_range_t *window)
+{
+	struct nodemsg msg;
+	struct reg_range_t *remote_window;
+	struct allocmsg *alloc = &window->alloc_handle;
+	dma_addr_t *dma_phys_lookup, *tmp;
+	int i = 0, j = 0;
+	int nr_contig_chunks, loop_nr_contig_chunks, remaining_nr_contig_chunks, nr_lookup;
+#if defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
+	dma_addr_t *phys_lookup = 0;
+#endif
+	int err, map_err;
+
+	nr_contig_chunks = remaining_nr_contig_chunks = (int)window->nr_contig_chunks;
+
+	if ((map_err = micscif_map_window_pages(ep, window, false))) {
+		printk(KERN_ERR "%s %d map_err %d\n", __func__, __LINE__, map_err);
+	}
+retry:
+	/* Now wait for the response */
+	err = wait_event_timeout(alloc->allocwq, alloc->state != OP_IN_PROGRESS, NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+
+	if (!err)
+		err = -ENODEV;
+
+	if (err > 0)
+		err = 0;
+	else
+		return err;
+
+	/* Bail out. The remote end rejected this request */
+	if (OP_FAILED == alloc->state)
+		return -ENOMEM;
+
+	if (map_err) {
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, map_err);
+		msg.uop = SCIF_FREE_VIRT;
+		msg.src = ep->port;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = (uint64_t)window->alloc_handle.vaddr;
+		msg.payload[2] = (uint64_t)window;
+		msg.payload[3] = SCIF_REGISTER;
+		if (!(err = micscif_nodeqp_send(ep->remote_dev, &msg, ep)))
+			err = -ENOTCONN;
+		else
+			err = map_err;
+		return err;
+	}
+
+
+	remote_window = scif_ioremap(alloc->phys_addr,
+		sizeof(*window), ep->remote_dev);
+
+	RMA_MAGIC(remote_window);
+
+	/* Compute the number of lookup entries. 21 == 2MB Shift */
+	nr_lookup = ALIGN(nr_contig_chunks * PAGE_SIZE, ((2) * 1024 * 1024)) >> 21;
+#if defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
+	if (is_p2p_scifdev(ep->remote_dev))
+		phys_lookup = scif_ioremap(remote_window->temp_phys_addr_lookup.offset,
+			nr_lookup *
+			sizeof(*remote_window->temp_phys_addr_lookup.lookup),
+			ep->remote_dev);
+#endif
+
+	dma_phys_lookup = scif_ioremap(remote_window->dma_addr_lookup.offset,
+		nr_lookup *
+		sizeof(*remote_window->dma_addr_lookup.lookup),
+		ep->remote_dev);
+
+	while (remaining_nr_contig_chunks) {
+		loop_nr_contig_chunks = min(remaining_nr_contig_chunks, (int)NR_PHYS_ADDR_IN_PAGE);
+		/* #1/2 - Copy  physical addresses over to the remote side */
+
+#if defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
+		/*  If the remote dev is self or is any node except the host
+		 * its OK to copy the bus address to the remote window
+		 * in the case of the host (for KNF only) the bus address
+		 * is generated at the time of mmap(..) into card memory
+		 * and does not exist at this time
+		 */
+		/* Note:
+		 * the phys_addr[] holds MIC address for remote cards
+		 * -> GTT offset  for the host (KNF)
+		 * -> local address for the host (KNC)
+		 * -> local address for loopback
+		 * this is done in map_window_pages(..) except for GTT
+		 * offset for KNF
+		 */
+		if (is_p2p_scifdev(ep->remote_dev)) {
+			tmp = scif_ioremap(phys_lookup[j],
+				loop_nr_contig_chunks * sizeof(*window->temp_phys_addr),
+				ep->remote_dev);
+			memcpy_toio(tmp, &window->temp_phys_addr[i], 
+				loop_nr_contig_chunks * sizeof(*window->temp_phys_addr));
+			serializing_request(tmp);
+			smp_mb();
+			scif_iounmap(tmp, PAGE_SIZE, ep->remote_dev);
+		}
+#endif
+		/* #2/2 - Copy DMA addresses (addresses that are fed into the DMA engine)
+		 * We transfer bus addresses which are then converted into a MIC physical
+		 * address on the remote side if it is a MIC, if the remote node is a host
+		 * we transfer the MIC physical address
+		 */
+		tmp = scif_ioremap(
+			dma_phys_lookup[j],
+			loop_nr_contig_chunks * sizeof(*window->dma_addr),
+			ep->remote_dev);
+#ifdef _MIC_SCIF_
+		if (is_p2p_scifdev(ep->remote_dev)) {
+			/* knf:
+			 * send the address as mapped through the GTT (the remote node's
+			 * base address for this node is already added in)
+			 * knc:
+			 * add remote node's base address for this node to convert it
+			 * into a MIC address
+			 */
+			int m;
+			dma_addr_t dma_addr;
+			for (m = 0; m < loop_nr_contig_chunks; m++) {
+#ifdef CONFIG_ML1OM
+				dma_addr = window->temp_phys_addr[i + m];
+#else
+				dma_addr = window->dma_addr[i + m] +
+					ep->remote_dev->sd_base_addr;
+#endif
+				writeq(dma_addr, &tmp[m]);
+			}
+		} else
+			/* Host node or loopback - transfer DMA addresses as is, this is
+			 * the same as a MIC physical address (we use the dma_addr
+			 * and not the phys_addr array since the phys_addr is only setup
+			 * if there is a mmap() request from the host)
+			 */
+			memcpy_toio(tmp, &window->dma_addr[i], 
+				loop_nr_contig_chunks * sizeof(*window->dma_addr));
+#else
+		/* Transfer the physical address array - this is the MIC address
+		 * as seen by the card
+		 */
+		memcpy_toio(tmp, &window->dma_addr[i], 
+			loop_nr_contig_chunks * sizeof(*window->dma_addr));
+#endif
+		remaining_nr_contig_chunks -= loop_nr_contig_chunks;
+		i += loop_nr_contig_chunks;
+		j++;
+		serializing_request(tmp);
+		smp_mb();
+		scif_iounmap(tmp, PAGE_SIZE, ep->remote_dev);
+	}
+
+	/* Prepare the remote window for the peer */
+	remote_window->peer_window = (uint64_t)window;
+	remote_window->offset = window->offset;
+	remote_window->prot = window->prot;
+	remote_window->nr_contig_chunks = nr_contig_chunks;
+#ifdef _MIC_SCIF_
+	if (!ep->rma_info.proxy_dma_peer_phys)
+		ep->rma_info.proxy_dma_peer_phys = remote_window->proxy_dma_phys;
+#endif
+#if defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
+	if (is_p2p_scifdev(ep->remote_dev))
+		scif_iounmap(phys_lookup,
+			nr_lookup *
+			sizeof(*remote_window->temp_phys_addr_lookup.lookup),
+			ep->remote_dev);
+#endif
+	scif_iounmap(dma_phys_lookup,
+		nr_lookup *
+		sizeof(*remote_window->dma_addr_lookup.lookup),
+		ep->remote_dev);
+	scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
+	window->peer_window = (uint64_t)alloc->vaddr;
+	return err;
+}
+
+/**
+ * micscif_send_scif_register:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_REGISTER message if EP is connected and wait for a
+ * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
+ * message so that the peer can free its remote window allocated earlier.
+ */
+int micscif_send_scif_register(struct endpt *ep, struct reg_range_t *window)
+{
+	int err = 0;
+	struct nodemsg msg;
+
+	msg.src = ep->port;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = (uint64_t)window->alloc_handle.vaddr;
+	msg.payload[2] = (uint64_t)window;
+	if (SCIFEP_CONNECTED == ep->state) {
+		msg.uop = SCIF_REGISTER;
+		window->reg_state = OP_IN_PROGRESS;
+		if (!(err = micscif_nodeqp_send(ep->remote_dev, &msg, ep))) {
+			micscif_set_nr_pages(ep->remote_dev, window);
+retry:
+			err = wait_event_timeout(window->regwq, 
+				window->reg_state != OP_IN_PROGRESS, NODE_ALIVE_TIMEOUT);
+			if (!err && scifdev_alive(ep))
+				goto retry;
+			if (!err)
+				err = -ENODEV;
+			if (err > 0)
+				err = 0;
+			if (OP_FAILED == window->reg_state)
+				err = -ENOTCONN;
+		} else {
+			micscif_set_nr_pages(ep->remote_dev, window);
+		}
+	} else {
+		msg.uop = SCIF_FREE_VIRT;
+		msg.payload[3] = SCIF_REGISTER;
+		if (!(err = micscif_nodeqp_send(ep->remote_dev, &msg, ep)))
+			err = -ENOTCONN;
+		micscif_set_nr_pages(ep->remote_dev, window);
+	}
+	return err;
+}
+
+/**
+ * micscif_send_scif_unregister:
+ * @ep: end point
+ * @window: self registration window
+ *
+ * Send a SCIF_UNREGISTER message.
+ */
+int micscif_send_scif_unregister(struct endpt *ep, struct reg_range_t *window)
+{
+	struct nodemsg msg;
+
+	RMA_MAGIC(window);
+	msg.uop = SCIF_UNREGISTER;
+	msg.src = ep->port;
+	msg.payload[0] = (uint64_t)window->alloc_handle.vaddr;
+	msg.payload[1] = (uint64_t)window;
+	return micscif_nodeqp_send(ep->remote_dev, &msg, ep);
+}
+
+/**
+ * micscif_get_window_offset:
+ * @epd: end point descriptor
+ * @flags: flags
+ * @offset: offset hint
+ * @len: length of range
+ * @out_offset: computed offset returned by reference.
+ *
+ * Compute/Claim a new offset for this EP. The callee is supposed to grab
+ * the RMA mutex before calling this API.
+ */
+int micscif_get_window_offset(struct endpt *ep, int flags,
+		uint64_t offset, size_t len, uint64_t *out_offset)
+{
+	uint64_t computed_offset;
+	int err = 0;
+
+	might_sleep();
+	mutex_lock(&ep->rma_info.va_lock);
+	if (flags & SCIF_MAP_FIXED) {
+		computed_offset = va_gen_claim(&ep->rma_info.va_gen,
+						(uint64_t)offset, len);
+		if (INVALID_VA_GEN_ADDRESS == computed_offset)
+			err = -EADDRINUSE;
+	} else {
+		computed_offset = va_gen_alloc(&ep->rma_info.va_gen,
+						len, PAGE_SIZE);
+		if (INVALID_VA_GEN_ADDRESS == computed_offset)
+			err = -ENOMEM;
+	}
+	*out_offset = computed_offset;
+	mutex_unlock(&ep->rma_info.va_lock);
+	return err;
+}
+
+/**
+ * micscif_free_window_offset:
+ * @offset: offset hint
+ * @len: length of range
+ *
+ * Free offset for this EP. The callee is supposed to grab
+ * the RMA mutex before calling this API.
+ */
+void micscif_free_window_offset(struct endpt *ep,
+		uint64_t offset, size_t len)
+{
+	mutex_lock(&ep->rma_info.va_lock);
+	va_gen_free(&ep->rma_info.va_gen, offset, len);
+	mutex_unlock(&ep->rma_info.va_lock);
+}
+
+/**
+ * scif_register_temp:
+ * @epd: End Point Descriptor.
+ * @addr: virtual address to/from which to copy
+ * @len: length of range to copy
+ * @out_offset: computed offset returned by reference.
+ * @out_window: allocated registered window returned by reference.
+ *
+ * Create a temporary registered window. The peer will not know about this
+ * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
+ */
+static int
+micscif_register_temp(scif_epd_t epd, void *addr, size_t len, int prot,
+		off_t *out_offset, struct reg_range_t **out_window)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	int err;
+	scif_pinned_pages_t pinned_pages;
+	size_t aligned_len;
+
+	aligned_len = ALIGN(len, PAGE_SIZE);
+
+	if ((err = __scif_pin_pages((void *)((uint64_t)addr &
+			PAGE_MASK),
+			aligned_len, &prot, 0, &pinned_pages)))
+		return err;
+
+	pinned_pages->prot = prot;
+
+	/* Compute the offset for this registration */
+	if ((err = micscif_get_window_offset(ep, 0, 0,
+		aligned_len, (uint64_t *)out_offset)))
+		goto error_unpin;
+
+	/* Allocate and prepare self registration window */
+	if (!(*out_window = micscif_create_window(ep, aligned_len >> PAGE_SHIFT,
+					*out_offset, true))) {
+		micscif_free_window_offset(ep, *out_offset, aligned_len);
+		err = -ENOMEM;
+		goto error_unpin;
+	}
+
+	(*out_window)->pinned_pages = pinned_pages;
+	(*out_window)->nr_pages = pinned_pages->nr_pages;
+	(*out_window)->nr_contig_chunks = pinned_pages->nr_contig_chunks;
+	(*out_window)->prot = pinned_pages->prot;
+
+	(*out_window)->va_for_temp = (void*)((uint64_t)addr & PAGE_MASK);
+	if ((err = micscif_map_window_pages(ep, *out_window, true))) {
+		/* Something went wrong! Rollback */
+		micscif_destroy_window(ep, *out_window);
+		*out_window = NULL;
+	} else
+		*out_offset |= ((uint64_t)addr & ~PAGE_MASK);
+
+	return err;
+error_unpin:
+	if (err)
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+	scif_unpin_pages(pinned_pages);
+	return err;
+}
+
+/**
+ * micscif_rma_completion_cb:
+ * @data: RMA cookie
+ *
+ * RMA interrupt completion callback.
+ */
+void micscif_rma_completion_cb(uint64_t data)
+{
+	struct dma_completion_cb *comp_cb = (struct dma_completion_cb *)data;
+#ifndef _MIC_SCIF_
+	struct pci_dev *pdev;
+#endif
+
+	/* Free DMA Completion CB. */
+	if (comp_cb && comp_cb->temp_buf) {
+		if (comp_cb->dst_window) {
+			micscif_rma_local_cpu_copy(comp_cb->dst_offset,
+				comp_cb->dst_window, comp_cb->temp_buf + comp_cb->header_padding,
+				comp_cb->len, false);
+		}
+#ifndef _MIC_SCIF_
+		micscif_pci_dev(comp_cb->remote_node, &pdev);
+		mic_ctx_unmap_single(get_per_dev_ctx(comp_cb->remote_node - 1), 
+			comp_cb->temp_phys, KMEM_UNALIGNED_BUF_SIZE);
+#endif
+		if (comp_cb->is_cache)
+			micscif_kmem_cache_free(comp_cb->temp_buf_to_free);
+		else
+			kfree(comp_cb->temp_buf_to_free);
+	}
+	kfree(comp_cb);
+}
+
+static void __micscif_rma_destroy_tcw_ep(struct endpt *ep);
+static
+bool micscif_rma_tc_can_cache(struct endpt *ep, size_t cur_bytes)
+{
+	if ((cur_bytes >> PAGE_SHIFT) > ms_info.mi_rma_tc_limit)
+		return false;
+	if ((atomic_read(&ep->rma_info.tcw_total_pages)
+			+ (cur_bytes >> PAGE_SHIFT)) >
+			ms_info.mi_rma_tc_limit) {
+		printk(KERN_ALERT "%s %d total=%d, current=%zu reached max\n", 
+				__func__, __LINE__, 
+				atomic_read(&ep->rma_info.tcw_total_pages), 
+				(1 + (cur_bytes >> PAGE_SHIFT)));
+		micscif_rma_destroy_tcw_invalid(&ms_info.mi_rma_tc);
+		__micscif_rma_destroy_tcw_ep(ep);
+	}
+	return true;
+}
+
+/**
+ * micscif_rma_copy:
+ * @epd: end point descriptor.
+ * @loffset: offset in local registered address space to/from which to copy
+ * @addr: user virtual address to/from which to copy
+ * @len: length of range to copy
+ * @roffset: offset in remote registered address space to/from which to copy
+ * @flags: flags
+ * @dir: LOCAL->REMOTE or vice versa.
+ *
+ * Validate parameters, check if src/dst registered ranges requested for copy
+ * are valid and initiate either CPU or DMA copy.
+ */
+int micscif_rma_copy(scif_epd_t epd, off_t loffset, void *addr, size_t len,
+		off_t roffset, int flags, enum rma_direction dir, bool last_chunk)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	struct micscif_rma_req remote_req;
+	struct micscif_rma_req req;
+	struct reg_range_t *window = NULL;
+	struct reg_range_t *remote_window = NULL;
+	struct mic_copy_work copy_work;
+	bool loopback;
+	int err = 0;
+	struct dma_channel *chan;
+	struct rma_mmu_notifier *mmn = NULL;
+	bool insert_window = false;
+	bool cache = false;
+
+	if ((err = verify_epd(ep)))
+		return err;
+
+	if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE | SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
+		return -EINVAL;
+
+	if (!len)
+		return -EINVAL;
+	loopback = is_self_scifdev(ep->remote_dev) ? true : false;
+	copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ? DO_DMA_POLLING : 0;
+	copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
+
+#ifdef CONFIG_MMU_NOTIFIER
+	if (!mic_reg_cache_enable)
+		flags &= ~SCIF_RMA_USECACHE;
+#else
+	flags &= ~SCIF_RMA_USECACHE;
+#endif
+#ifndef _MIC_SCIF_
+#ifdef CONFIG_ML1OM
+	/* Use DMA Copies even if CPU copy is requested on KNF MIC from Host */
+	if (flags & SCIF_RMA_USECPU) {
+		flags &= ~SCIF_RMA_USECPU;
+		if (last_chunk)
+			copy_work.fence_type = DO_DMA_POLLING;
+	}
+#endif
+	/* Use CPU for Host<->Host Copies */
+	if (loopback) {
+		flags |= SCIF_RMA_USECPU;
+		copy_work.fence_type = 0x0;
+	}
+#endif
+
+	cache = flags & SCIF_RMA_USECACHE;
+
+	/* Trying to wrap around */
+	if ((loffset && (loffset + (off_t)len < loffset)) ||
+		(roffset + (off_t)len < roffset))
+		return -EINVAL;
+
+	remote_req.out_window = &remote_window;
+	remote_req.offset = roffset;
+	remote_req.nr_bytes = len;
+	/*
+	 * If transfer is from local to remote then the remote window
+	 * must be writeable and vice versa.
+	 */
+	remote_req.prot = LOCAL_TO_REMOTE == dir ? VM_WRITE : VM_READ;
+	remote_req.type = WINDOW_PARTIAL;
+	remote_req.head = &ep->rma_info.remote_reg_list;
+
+#ifdef CONFIG_MMU_NOTIFIER
+	if (addr && cache) {
+		mutex_lock(&ep->rma_info.mmn_lock);
+		mmn = find_mmu_notifier(current->mm, &ep->rma_info);
+		if (!mmn) {
+			mmn = kzalloc(sizeof(*mmn), GFP_KERNEL);
+			if (!mmn) {
+				mutex_unlock(&ep->rma_info.mmn_lock);
+				return -ENOMEM;
+			}
+			init_mmu_notifier(mmn, current->mm, ep);
+			if (mmu_notifier_register(&mmn->ep_mmu_notifier, current->mm)) {
+				mutex_unlock(&ep->rma_info.mmn_lock);
+				kfree(mmn);
+				return -EBUSY;
+			}
+#ifdef RMA_DEBUG
+			atomic_long_add_return(1, &ms_info.mmu_notif_cnt);
+#endif
+			list_add(&mmn->list_member, &ep->rma_info.mmn_list);
+		}
+		mutex_unlock(&ep->rma_info.mmn_lock);
+	}
+#endif
+
+	micscif_inc_node_refcnt(ep->remote_dev, 1);
+#ifdef _MIC_SCIF_
+	if (!(flags & SCIF_RMA_USECPU)) {
+		/*
+		 * Proxy the DMA only for P2P reads with transfer size
+		 * greater than proxy DMA threshold. scif_vreadfrom(..)
+		 * and scif_vwriteto(..) is not supported since the peer
+		 * does not have the page lists required to perform the
+		 * proxy DMA.
+		 */
+		if (ep->remote_dev->sd_proxy_dma_reads &&
+			!addr && dir == REMOTE_TO_LOCAL &&
+			ep->rma_info.proxy_dma_va &&
+			len >= ms_info.mi_proxy_dma_threshold) {
+			copy_work.len = len;
+			copy_work.src_offset = roffset;
+			copy_work.dst_offset = loffset;
+			/* Fall through if there were errors */
+			if (!(err = micscif_proxy_dma(epd, &copy_work)))
+				goto error;
+		}
+	}
+#endif
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (addr) {
+		req.out_window = &window;
+		req.nr_bytes = ALIGN(len + ((uint64_t)addr & ~PAGE_MASK), PAGE_SIZE);
+		if (mmn)
+			req.head = &mmn->tc_reg_list;
+		req.va_for_temp = (void*)((uint64_t)addr & PAGE_MASK);
+		req.prot = (LOCAL_TO_REMOTE == dir ? VM_READ : VM_WRITE | VM_READ);
+		/* Does a valid local window exist? */
+
+		pr_debug("%s %d req.va_for_temp %p addr %p req.nr_bytes 0x%lx len 0x%lx\n", 
+			__func__, __LINE__, req.va_for_temp, addr, req.nr_bytes, len);
+		spin_lock(&ep->rma_info.tc_lock);
+		if (!mmn || (err = micscif_query_tcw(ep, &req))) {
+			pr_debug("%s %d err %d req.va_for_temp %p addr %p req.nr_bytes 0x%lx len 0x%lx\n", 
+				__func__, __LINE__, err, req.va_for_temp, addr, req.nr_bytes, len);
+			spin_unlock(&ep->rma_info.tc_lock);
+			mutex_unlock(&ep->rma_info.rma_lock);
+			if (cache)
+				if (!micscif_rma_tc_can_cache(ep, req.nr_bytes))
+					cache = false;
+			if ((err = micscif_register_temp(epd, req.va_for_temp, req.nr_bytes,
+					req.prot,
+					&loffset, &window))) {
+				goto error;
+			}
+			mutex_lock(&ep->rma_info.rma_lock);
+			pr_debug("New temp window created addr %p\n", addr);
+			if (cache) {
+				atomic_inc(&ep->rma_info.tcw_refcount);
+				atomic_add_return((int32_t)window->nr_pages, &ep->rma_info.tcw_total_pages);
+				if (mmn) {
+					spin_lock(&ep->rma_info.tc_lock);
+					micscif_insert_tcw(window, &mmn->tc_reg_list);
+					spin_unlock(&ep->rma_info.tc_lock);
+				}
+			}
+			insert_window = true;
+		} else {
+			spin_unlock(&ep->rma_info.tc_lock);
+			pr_debug("window found for addr %p\n", addr);
+			BUG_ON(window->va_for_temp > addr);
+		}
+		loffset = window->offset + ((uint64_t)addr - (uint64_t)window->va_for_temp);
+		pr_debug("%s %d addr %p loffset 0x%lx window->nr_pages 0x%llx"
+			" window->va_for_temp %p\n", __func__, __LINE__, 
+			addr, loffset, window->nr_pages, window->va_for_temp);
+		RMA_MAGIC(window);
+	}
+
+	/* Does a valid remote window exist? */
+	if ((err = micscif_query_window(&remote_req))) {
+		pr_debug("%s %d err %d roffset 0x%lx len 0x%lx\n", 
+				__func__, __LINE__, err, roffset, len);
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto error;
+	}
+	RMA_MAGIC(remote_window);
+	if (!addr) {
+		req.out_window = &window;
+		req.offset = loffset;
+		/*
+		 * If transfer is from local to remote then the self window
+		 * must be readable and vice versa.
+		 */
+		req.prot = LOCAL_TO_REMOTE == dir ? VM_READ : VM_WRITE;
+		req.nr_bytes = len;
+		req.type = WINDOW_PARTIAL;
+		req.head = &ep->rma_info.reg_list;
+		/* Does a valid local window exist? */
+		if ((err = micscif_query_window(&req))) {
+			printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+			mutex_unlock(&ep->rma_info.rma_lock);
+			goto error;
+		}
+		RMA_MAGIC(window);
+	}
+
+	/*
+	 * Preprare copy_work for submitting work to the DMA kernel thread
+	 * or CPU copy routine.
+	 */
+	copy_work.len = len;
+	copy_work.loopback = loopback;
+	copy_work.remote_dev = ep->remote_dev;
+	copy_work.dma_chan_released = false;
+	if (LOCAL_TO_REMOTE == dir) {
+		copy_work.src_offset = loffset;
+		copy_work.src_window = window;
+		copy_work.dst_offset = roffset;
+		copy_work.dst_window = remote_window;
+	} else {
+		copy_work.src_offset = roffset;
+		copy_work.src_window = remote_window;
+		copy_work.dst_offset = loffset;
+		copy_work.dst_window = window;
+	}
+
+	if (!(flags & SCIF_RMA_USECPU)) {
+		chan = ep->rma_info.dma_chan;
+		if ((err = request_dma_channel(chan))) {
+			mutex_unlock(&ep->rma_info.rma_lock);
+			goto error;
+		}
+		err = micscif_rma_list_dma_copy_wrapper(epd, &copy_work,
+							chan, loffset);
+		if (!copy_work.dma_chan_released)
+			free_dma_channel(chan);
+	}
+	if (flags & SCIF_RMA_USECPU) {
+		/* Initiate synchronous CPU copy */
+		micscif_rma_list_cpu_copy(&copy_work);
+	}
+	if (insert_window && !cache) {
+		atomic_inc(&ep->rma_info.tw_refcount);
+		atomic_add_return((int32_t)window->nr_pages, &ep->rma_info.tw_total_pages);
+	}
+
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	if (last_chunk) {
+		if (DO_DMA_POLLING == copy_work.fence_type)
+			err = drain_dma_poll(ep->rma_info.dma_chan);
+		else if (DO_DMA_INTR == copy_work.fence_type)
+			err = drain_dma_intr(ep->rma_info.dma_chan);
+	}
+
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	if (insert_window && !cache)
+		micscif_queue_for_cleanup(window, &ms_info.mi_rma);
+	return err;
+error:
+	if (err) {
+		if (addr && window && !cache)
+			micscif_destroy_window(ep, window);
+		printk(KERN_ERR "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
+	}
+	micscif_dec_node_refcnt(ep->remote_dev, 1);
+	return err;
+}
+
+/**
+ * micscif_send_fence_mark:
+ * @epd: end point descriptor.
+ * @out_mark: Output DMA mark reported by peer.
+ *
+ * Send a remote fence mark request.
+ */
+int micscif_send_fence_mark(scif_epd_t epd, int *out_mark)
+{
+	int err;
+	struct nodemsg msg;
+	struct fence_info *fence_req;
+	struct endpt *ep = (struct endpt *)epd;
+
+	if (!(fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_waitqueue_head(&fence_req->wq);
+
+	msg.src = ep->port;
+	msg.uop = SCIF_MARK;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = (uint64_t)fence_req;
+
+	if ((err = micscif_nodeqp_send(ep->remote_dev, &msg, ep)))
+		goto error;
+
+retry:
+	err = wait_event_timeout(fence_req->wq, 
+		(OP_IN_PROGRESS != fence_req->state), NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	if (err < 0) {
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (OP_IN_PROGRESS == fence_req->state)
+			fence_req->state = OP_FAILED;
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	if (OP_COMPLETED == fence_req->state)
+		*out_mark = SCIF_REMOTE_FENCE | fence_req->dma_mark;
+
+	if (OP_FAILED == fence_req->state && !err)
+		err = -ENOMEM;
+	mutex_lock(&ep->rma_info.rma_lock);
+	mutex_unlock(&ep->rma_info.rma_lock);
+	kfree(fence_req);
+error:
+	return err;
+}
+
+/**
+ * micscif_send_fence_wait:
+ * @epd: end point descriptor.
+ * @mark: DMA mark to wait for.
+ *
+ * Send a remote fence wait request.
+ */
+int micscif_send_fence_wait(scif_epd_t epd, int mark)
+{
+	int err;
+	struct nodemsg msg;
+	struct fence_info *fence_req;
+	struct endpt *ep = (struct endpt *)epd;
+
+	if (!(fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_waitqueue_head(&fence_req->wq);
+
+	msg.src = ep->port;
+	msg.uop = SCIF_WAIT;
+	msg.payload[0] = ep->remote_ep;
+	msg.payload[1] = (uint64_t)fence_req;
+	msg.payload[2] = mark;
+
+	if ((err = micscif_nodeqp_send(ep->remote_dev, &msg, ep)))
+		goto error;
+retry:
+	err = wait_event_timeout(fence_req->wq, 
+		(OP_IN_PROGRESS != fence_req->state), NODE_ALIVE_TIMEOUT);
+	if (!err && scifdev_alive(ep))
+		goto retry;
+	if (!err)
+		err = -ENODEV;
+	if (err > 0)
+		err = 0;
+	if (err < 0) {
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (OP_IN_PROGRESS == fence_req->state)
+			fence_req->state = OP_FAILED;
+		mutex_unlock(&ep->rma_info.rma_lock);
+	}
+	if (OP_FAILED == fence_req->state && !err)
+		err = -ENOMEM;
+	mutex_lock(&ep->rma_info.rma_lock);
+	mutex_unlock(&ep->rma_info.rma_lock);
+	kfree(fence_req);
+error:
+	return err;
+}
+
+/**
+ * micscif_send_fence_signal:
+ * @epd - endpoint descriptor
+ * @loff - local offset
+ * @lval - local value to write to loffset
+ * @roff - remote offset
+ * @rval - remote value to write to roffset
+ * @flags - flags
+ *
+ * Sends a remote fence signal request
+ */
+int micscif_send_fence_signal(scif_epd_t epd, off_t roff, uint64_t rval,
+		off_t loff, uint64_t lval, int flags)
+{
+	int err = 0;
+	struct nodemsg msg;
+	struct fence_info *fence_req;
+	struct endpt *ep = (struct endpt *)epd;
+
+	if (!(fence_req = kmalloc(sizeof(*fence_req), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	fence_req->state = OP_IN_PROGRESS;
+	init_waitqueue_head(&fence_req->wq);
+
+	msg.src = ep->port;
+	if (flags & SCIF_SIGNAL_LOCAL) {
+		msg.uop = SCIF_SIG_LOCAL;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = roff;
+		msg.payload[2] = rval;
+		msg.payload[3] = (uint64_t)fence_req;
+		if ((err = micscif_nodeqp_send(ep->remote_dev, &msg, ep)))
+			goto error_free;
+retry1:
+		err = wait_event_timeout(fence_req->wq, 
+			(OP_IN_PROGRESS != fence_req->state), NODE_ALIVE_TIMEOUT);
+		if (!err && scifdev_alive(ep))
+			goto retry1;
+		if (!err)
+			err = -ENODEV;
+		if (err > 0)
+			err = 0;
+		if (err < 0) {
+			mutex_lock(&ep->rma_info.rma_lock);
+			if (OP_IN_PROGRESS == fence_req->state)
+				fence_req->state = OP_FAILED;
+			mutex_unlock(&ep->rma_info.rma_lock);
+		}
+		if (OP_FAILED == fence_req->state && !err) {
+			err = -ENXIO;
+			goto error_free;
+		}
+	}
+	fence_req->state = OP_IN_PROGRESS;
+
+	if (flags & SCIF_SIGNAL_REMOTE) {
+		msg.uop = SCIF_SIG_REMOTE;
+		msg.payload[0] = ep->remote_ep;
+		msg.payload[1] = loff;
+		msg.payload[2] = lval;
+		msg.payload[3] = (uint64_t)fence_req;
+		if ((err = micscif_nodeqp_send(ep->remote_dev, &msg, ep)))
+			goto error_free;
+retry2:
+		err = wait_event_timeout(fence_req->wq, 
+			(OP_IN_PROGRESS != fence_req->state), NODE_ALIVE_TIMEOUT);
+		if (!err && scifdev_alive(ep))
+			goto retry2;
+		if (!err)
+			err = -ENODEV;
+		if (err > 0)
+			err = 0;
+		if (err < 0) {
+			mutex_lock(&ep->rma_info.rma_lock);
+			if (OP_IN_PROGRESS == fence_req->state)
+				fence_req->state = OP_FAILED;
+			mutex_unlock(&ep->rma_info.rma_lock);
+		}
+		if (OP_FAILED == fence_req->state && !err) {
+			err = -ENXIO;
+			goto error_free;
+		}
+	}
+error_free:
+	mutex_lock(&ep->rma_info.rma_lock);
+	mutex_unlock(&ep->rma_info.rma_lock);
+	kfree(fence_req);
+error:
+	return err;
+}
+
+/*
+ * micscif_fence_mark:
+ *
+ * @epd - endpoint descriptor
+ * Set up a mark for this endpoint and return the value of the mark.
+ */
+int micscif_fence_mark(scif_epd_t epd)
+{
+	int mark = 0;
+	struct endpt *ep = (struct endpt *)epd;
+	struct dma_channel *chan = ep->rma_info.dma_chan;
+
+	if ((mark = request_dma_channel(chan)))
+		goto error;
+
+	mark = program_dma_mark(chan);
+
+	free_dma_channel(chan);
+error:
+	return mark;
+}
+
+/**
+ * micscif_rma_destroy_temp_windows:
+ *
+ * This routine destroys temporary registered windows created
+ * by scif_vreadfrom() and scif_vwriteto().
+ */
+void micscif_rma_destroy_temp_windows(void)
+{
+	struct list_head *item, *tmp;
+	struct reg_range_t *window;
+	struct endpt *ep;
+	struct dma_channel *chan;
+	might_sleep();
+restart:
+	spin_lock(&ms_info.mi_rmalock);
+	list_for_each_safe(item, tmp, &ms_info.mi_rma) {
+		window = list_entry(item, 
+			struct reg_range_t, list_member);
+		ep = (struct endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+
+		list_del(&window->list_member);
+		spin_unlock(&ms_info.mi_rmalock);
+		micscif_inc_node_refcnt(ep->remote_dev, 1);
+		if (!chan ||
+			!scifdev_alive(ep) ||
+			(!is_current_dma_mark(chan, window->dma_mark) &&
+			is_dma_mark_processed(chan, window->dma_mark)) ||
+			!drain_dma_intr(chan)) {
+			micscif_dec_node_refcnt(ep->remote_dev, 1);
+			/* Remove window from global list */
+			window->unreg_state = OP_COMPLETED;
+		} else {
+			micscif_dec_node_refcnt(ep->remote_dev, 1);
+			/* DMA engine hung ?? */
+			printk(KERN_ERR "%s %d DMA channel %d hung ep->state %d "
+				"window->dma_mark 0x%x channel_mark 0x%x\n", 
+				__func__, __LINE__, get_chan_num(chan), 
+				ep->sd_state, window->dma_mark, get_dma_mark(chan));
+			WARN_ON(1);
+			micscif_queue_for_cleanup(window, &ms_info.mi_rma);
+			goto restart;
+		}
+
+		if (OP_COMPLETED == window->unreg_state) {
+			BUG_ON(atomic_sub_return((int32_t)window->nr_pages, 
+					&ep->rma_info.tw_total_pages) < 0);
+			if (RMA_WINDOW_SELF == window->type)
+				micscif_destroy_window(ep, window);
+			else
+				micscif_destroy_remote_window(ep, window);
+			BUG_ON(atomic_dec_return(
+				&ep->rma_info.tw_refcount) < 0);
+		}
+		goto restart;
+	}
+	spin_unlock(&ms_info.mi_rmalock);
+}
+
+/**
+ * micscif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary registered windows created
+ * by scif_vreadfrom() and scif_vwriteto().
+ */
+static
+void __micscif_rma_destroy_tcw(struct rma_mmu_notifier *mmn,
+						struct endpt *ep, bool inrange,
+						uint64_t start, uint64_t len)
+{
+	struct list_head *item, *tmp;
+	struct reg_range_t *window;
+	uint64_t start_va, end_va;
+	uint64_t end = start + len;
+	list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
+		window = list_entry(item, 
+			struct reg_range_t, list_member);
+		ep = (struct endpt *)window->ep;
+		if (inrange) {
+			if (0 == len)
+				break;
+			start_va = (uint64_t)window->va_for_temp;
+			end_va = start_va+ (window->nr_pages << PAGE_SHIFT);
+			if (start < start_va) {
+				if (end <= start_va) {
+					break;
+				} else {
+				}
+
+			} else {
+				if (start >= end_va) {
+					continue;
+				} else {
+				}
+			}
+		}
+		__micscif_rma_destroy_tcw_helper(window);
+	}
+}
+
+static inline
+void micscif_rma_destroy_tcw(struct rma_mmu_notifier *mmn,
+						struct endpt *ep, bool inrange,
+						uint64_t start, uint64_t len)
+{
+	unsigned long sflags;
+
+	spin_lock_irqsave(&ep->rma_info.tc_lock, sflags);
+	__micscif_rma_destroy_tcw(mmn, ep, inrange, start, len);
+	spin_unlock_irqrestore(&ep->rma_info.tc_lock, sflags);
+}
+
+static void __micscif_rma_destroy_tcw_ep(struct endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct rma_mmu_notifier *mmn;
+	spin_lock(&ep->rma_info.tc_lock);
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, 
+			struct rma_mmu_notifier, list_member);
+		__micscif_rma_destroy_tcw(mmn, ep, false, 0, 0);
+	}
+	spin_unlock(&ep->rma_info.tc_lock);
+}
+
+void micscif_rma_destroy_tcw_ep(struct endpt *ep)
+{
+	struct list_head *item, *tmp;
+	struct rma_mmu_notifier *mmn;
+	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
+		mmn = list_entry(item, 
+			struct rma_mmu_notifier, list_member);
+		micscif_rma_destroy_tcw(mmn, ep, false, 0, 0);
+	}
+}
+
+/**
+ * micscif_rma_destroy_tcw:
+ *
+ * This routine destroys temporary registered windows created
+ * by scif_vreadfrom() and scif_vwriteto().
+ */
+void micscif_rma_destroy_tcw_invalid(struct list_head *list)
+{
+	struct list_head *item, *tmp;
+	struct reg_range_t *window;
+	struct endpt *ep;
+	struct dma_channel *chan;
+	might_sleep();
+restart:
+	spin_lock(&ms_info.mi_rmalock);
+	list_for_each_safe(item, tmp, list) {
+		window = list_entry(item, 
+			struct reg_range_t, list_member);
+		ep = (struct endpt *)window->ep;
+		chan = ep->rma_info.dma_chan;
+		list_del(&window->list_member);
+		spin_unlock(&ms_info.mi_rmalock);
+		micscif_inc_node_refcnt(ep->remote_dev, 1);
+		mutex_lock(&ep->rma_info.rma_lock);
+		if (!chan ||
+			!scifdev_alive(ep) ||
+			(!is_current_dma_mark(chan, window->dma_mark) &&
+			is_dma_mark_processed(chan, window->dma_mark)) ||
+			!drain_dma_intr(chan)) {
+			micscif_dec_node_refcnt(ep->remote_dev, 1);
+			BUG_ON(atomic_sub_return((int32_t)window->nr_pages, 
+						&ep->rma_info.tcw_total_pages) < 0);
+			micscif_destroy_window(ep, window);
+			BUG_ON(atomic_dec_return(
+						&ep->rma_info.tcw_refcount) < 0);
+		} else {
+			/* DMA engine hung ?? */
+			printk(KERN_ERR "%s %d DMA channel %d hung ep->state %d "
+				"window->dma_mark 0x%x channel_mark 0x%x\n", 
+				__func__, __LINE__, get_chan_num(chan), 
+				ep->sd_state, window->dma_mark, get_dma_mark(chan));
+			WARN_ON(1);
+			mutex_unlock(&ep->rma_info.rma_lock);
+			micscif_dec_node_refcnt(ep->remote_dev, 1);
+			micscif_queue_for_cleanup(window, &ms_info.mi_rma);
+			goto restart;
+		}
+		mutex_unlock(&ep->rma_info.rma_lock);
+		goto restart;
+	}
+	spin_unlock(&ms_info.mi_rmalock);
+}
+
+/**
+ * micscif_rma_handle_remote_fences:
+ *
+ * This routine services remote fence requests.
+ */
+void micscif_rma_handle_remote_fences(void)
+{
+	struct list_head *item, *tmp;
+	struct remote_fence_info *fence;
+	struct endpt *ep;
+	int mark;
+
+	might_sleep();
+	mutex_lock(&ms_info.mi_fencelock);
+	list_for_each_safe(item, tmp, &ms_info.mi_fence) {
+		fence = list_entry(item, 
+			struct remote_fence_info, list_member);
+		/* Remove fence from global list */
+		list_del(&fence->list_member);
+
+		/* Initiate the fence operation */
+		ep = (struct endpt *)fence->msg.payload[0];
+		mark = (int)fence->msg.payload[2];
+		BUG_ON(!(mark & SCIF_REMOTE_FENCE));
+		if (dma_mark_wait(ep->rma_info.dma_chan,
+				mark & ~SCIF_REMOTE_FENCE, false)) {
+			printk(KERN_ERR "%s %d err\n", __func__, __LINE__);
+			fence->msg.uop = SCIF_WAIT_NACK;
+		} else {
+			fence->msg.uop = SCIF_WAIT_ACK;
+		}
+		micscif_inc_node_refcnt(ep->remote_dev, 1);
+		fence->msg.payload[0] = ep->remote_ep;
+		/* No error handling for Notification messages. */
+		micscif_nodeqp_send(ep->remote_dev, &fence->msg, ep);
+		micscif_dec_node_refcnt(ep->remote_dev, 1);
+		kfree(fence);
+		/*
+		 * Decrement ref count and wake up
+		 * any thread blocked in the EP close routine waiting
+		 * for all such remote fence requests to complete.
+		 */
+		ep->rma_info.fence_refcount--;
+		wake_up(&ep->rma_info.fence_wq);
+	}
+	mutex_unlock(&ms_info.mi_fencelock);
+}
+
+#ifdef CONFIG_MMU_NOTIFIER
+void micscif_mmu_notif_handler(struct work_struct *work)
+{
+	struct list_head *pos, *tmpq;
+	struct endpt *ep;
+restart:
+	micscif_rma_destroy_tcw_invalid(&ms_info.mi_rma_tc);
+	spin_lock(&ms_info.mi_rmalock);
+	list_for_each_safe(pos, tmpq, &ms_info.mi_mmu_notif_cleanup) {
+		ep = list_entry(pos, struct endpt, mmu_list);
+		list_del(&ep->mmu_list);
+		spin_unlock(&ms_info.mi_rmalock);
+		BUG_ON(list_empty(&ep->rma_info.mmn_list));
+
+		micscif_rma_destroy_tcw_ep(ep);
+		ep_unregister_mmu_notifier(ep);
+		queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
+		goto restart;
+	}
+	spin_unlock(&ms_info.mi_rmalock);
+}
+#endif
+
+/**
+ * micscif_reserve_dma_chan:
+ * @ep: Endpoint Descriptor.
+ *
+ * This routine reserves a DMA channel for a particular
+ * endpoint. All DMA transfers for an endpoint are always
+ * programmed on the same DMA channel.
+ */
+int micscif_reserve_dma_chan(struct endpt *ep)
+{
+	int err = 0;
+#ifndef _MIC_SCIF_
+	/*
+	 * Host Loopback cannot use DMA by design and hence
+	 * reserving DMA channels is a nop.
+	 */
+	if (is_self_scifdev(ep->remote_dev))
+		return 0;
+#endif
+	mutex_lock(&ep->rma_info.rma_lock);
+	if (!ep->rma_info.dma_chan) {
+		struct dma_channel **chan = &ep->rma_info.dma_chan;
+		unsigned long ts = jiffies;
+#ifndef _MIC_SCIF_
+		mic_ctx_t *mic_ctx =
+			get_per_dev_ctx(ep->remote_dev->sd_node - 1);
+		BUG_ON(!ep->remote_dev->sd_node);
+#endif
+		while (true) {
+			if (!(err = allocate_dma_channel((struct mic_dma_ctx_t *)
+#ifdef _MIC_SCIF_
+				mic_dma_handle,
+#else
+				mic_ctx->dma_handle,
+#endif
+				chan)))
+				break;
+			schedule();
+			if (time_after(jiffies,
+				ts + NODE_ALIVE_TIMEOUT)) {
+				err = -EBUSY;
+				goto error;
+			}
+		}
+		mic_dma_thread_free_chan(*chan);
+	}
+error:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+/*
+ * micscif_prog_signal:
+ * @epd - Endpoint Descriptor
+ * @offset - registered address
+ * @val - Value to be programmed in SUD.
+ * @type - Type of the window.
+ *
+ * Program a status update descriptor adter ensuring that the offset
+ * provided is indeed valid.
+ */
+int micscif_prog_signal(scif_epd_t epd, off_t offset, uint64_t val,
+		enum rma_window_type type)
+{
+	struct endpt *ep = (struct endpt *)epd;
+	struct dma_channel *chan = ep->rma_info.dma_chan;
+	struct reg_range_t *window = NULL;
+	struct micscif_rma_req req;
+	int err;
+	dma_addr_t phys;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	req.out_window = &window;
+	req.offset = offset;
+	req.nr_bytes = sizeof(uint64_t);
+	req.prot = SCIF_PROT_WRITE;
+	req.type = WINDOW_SINGLE;
+	if (RMA_WINDOW_SELF == type)
+		req.head = &ep->rma_info.reg_list;
+	else
+		req.head = &ep->rma_info.remote_reg_list;
+	/* Does a valid window exist? */
+	if ((err = micscif_query_window(&req))) {
+		printk(KERN_ERR "%s %d err %d\n", 
+				__func__, __LINE__, err);
+		goto unlock_ret;
+	}
+	RMA_MAGIC(window);
+
+#ifndef _MIC_SCIF_
+	if (unlikely(is_self_scifdev(ep->remote_dev))) {
+		void *dst_virt;
+		if (RMA_WINDOW_SELF == type)
+			dst_virt = get_local_va(offset, window,
+						sizeof(uint32_t));
+		else {
+			struct page **pages = ((struct reg_range_t *)
+				(window->peer_window))->pinned_pages->pages;
+			int page_nr = (int) ( (offset - window->offset) >> PAGE_SHIFT );
+			off_t page_off = offset & ~PAGE_MASK;
+			dst_virt = (void *)((uint64_t)phys_to_virt(page_to_phys(
+				pages[page_nr])) | page_off);
+		}
+		*(uint64_t*)dst_virt = val;
+		goto unlock_ret;
+	}
+#endif
+	phys = micscif_get_dma_addr(window, offset, NULL, NULL, NULL);
+	if ((err = request_dma_channel(chan)))
+		goto unlock_ret;
+	err = do_status_update(chan, phys, val);
+	free_dma_channel(chan);
+unlock_ret:
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+/*
+ * __micscif_kill_apps_with_mmaps:
+ * @ep - The SCIF endpoint
+ *
+ * Kill the applications which have valid remote memory mappings
+ * created via scif_mmap(..).
+ */
+static void __micscif_kill_apps_with_mmaps(struct endpt *ep)
+{
+	struct list_head *item;
+	struct rma_task_info *info;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.task_list) {
+		info = list_entry(item, struct rma_task_info, list_member);
+		kill_pid(info->pid, SIGKILL, 1);
+		pr_debug("%s ep %p pid %p ref %d\n", 
+			__func__, ep, info->pid, info->ref_count);
+	}
+	spin_unlock(&ep->lock);
+}
+
+/*
+ * _micscif_kill_apps_with_mmaps:
+ * @node - remote node id.
+ * @head - head of the list of endpoints to kill.
+ *
+ * Traverse the list of endpoints for a particular remote node and
+ * kill applications with valid remote memory mappings.
+ */
+static void _micscif_kill_apps_with_mmaps(int node, struct list_head *head)
+{
+	struct endpt *ep;
+	unsigned long sflags;
+	struct list_head *item;
+
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each(item, head) {
+		ep = list_entry(item, struct endpt, list);
+		if (ep->remote_dev->sd_node == node)
+			__micscif_kill_apps_with_mmaps(ep);
+	}
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+}
+
+/*
+ * micscif_kill_apps_with_mmaps:
+ * @node - remote node id.
+ *
+ * Wrapper for killing applications with valid remote memory mappings
+ * for a particular node. This API is called by peer nodes as part of
+ * handling a lost node.
+ */
+void micscif_kill_apps_with_mmaps(int node)
+{
+	_micscif_kill_apps_with_mmaps(node, &ms_info.mi_connected);
+	_micscif_kill_apps_with_mmaps(node, &ms_info.mi_disconnected);
+}
+
+/*
+ * micscif_query_apps_with_mmaps:
+ * @node - remote node id.
+ * @head - head of the list of endpoints to query.
+ *
+ * Query if any applications for a remote node have valid remote memory
+ * mappings.
+ */
+static bool micscif_query_apps_with_mmaps(int node, struct list_head *head)
+{
+	struct endpt *ep;
+	unsigned long sflags;
+	struct list_head *item;
+	bool ret = false;
+
+	spin_lock_irqsave(&ms_info.mi_connlock, sflags);
+	list_for_each(item, head) {
+		ep = list_entry(item, struct endpt, list);
+		if (ep->remote_dev->sd_node == node &&
+			!list_empty(&ep->rma_info.task_list)) {
+			ret = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&ms_info.mi_connlock, sflags);
+	return ret;
+}
+
+/*
+ * micscif_rma_do_apps_have_mmaps:
+ * @node - remote node id.
+ *
+ * Wrapper for querying if any applications have remote memory mappings
+ * for a particular node.
+ */
+bool micscif_rma_do_apps_have_mmaps(int node)
+{
+	return (micscif_query_apps_with_mmaps(node, &ms_info.mi_connected) ||
+		micscif_query_apps_with_mmaps(node, &ms_info.mi_disconnected));
+}
+
+/*
+ * __micscif_cleanup_rma_for_zombies:
+ * @ep - The SCIF endpoint
+ *
+ * This API is only called while handling a lost node:
+ * a) Remote node is dead.
+ * b) All endpoints with remote memory mappings have been killed.
+ * So we can traverse the remote_reg_list without any locks. Since
+ * the window has not yet been unregistered we can drop the ref count
+ * and queue it to the cleanup thread.
+ */
+static void __micscif_cleanup_rma_for_zombies(struct endpt *ep)
+{
+	struct list_head *pos, *tmp;
+	struct reg_range_t *window;
+
+	list_for_each_safe(pos, tmp, &ep->rma_info.remote_reg_list) {
+		window = list_entry(pos, struct reg_range_t, list_member);
+		/* If unregistration is complete then why is it on the list? */
+		WARN_ON(window->unreg_state == OP_COMPLETED);
+		if (window->ref_count)
+			put_window_ref_count(window, window->nr_pages);
+		if (!window->ref_count) {
+			atomic_inc(&ep->rma_info.tw_refcount);
+			atomic_add_return((int32_t)window->nr_pages, 
+				&ep->rma_info.tw_total_pages);
+			list_del(&window->list_member);
+			micscif_queue_for_cleanup(window, &ms_info.mi_rma);
+		}
+	}
+}
+
+/*
+ * micscif_cleanup_rma_for_zombies:
+ * @node - remote node id.
+ *
+ * Cleanup remote registration lists for zombie endpoints.
+ */
+void micscif_cleanup_rma_for_zombies(int node)
+{
+	struct endpt *ep;
+	unsigned long sflags;
+	struct list_head *item;
+
+	spin_lock_irqsave(&ms_info.mi_eplock, sflags);
+	list_for_each(item, &ms_info.mi_zombie) {
+		ep = list_entry(item, struct endpt, list);
+		if (ep->remote_dev && ep->remote_dev->sd_node == node) {
+			/*
+			 * If the zombie endpoint remote node matches the lost
+			 * node then the scifdev should not be alive.
+			 */
+			WARN_ON(scifdev_alive(ep));
+			__micscif_cleanup_rma_for_zombies(ep);
+		}
+	}
+	spin_unlock_irqrestore(&ms_info.mi_eplock, sflags);
+}
+
+/*
+ * micscif_rma_get_task:
+ *
+ * Store the parent task struct and bump up the number of remote mappings.
+ * If this is the first remote memory mapping for this endpoint then
+ * create a new rma_task_info entry in the epd task list.
+ */
+int micscif_rma_get_task(struct endpt *ep, int nr_pages)
+{
+	struct list_head *item;
+	struct rma_task_info *info;
+	int err = 0;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.task_list) {
+		info = list_entry(item, struct rma_task_info, list_member);
+		if (info->pid == task_tgid(current)) {
+			info->ref_count += nr_pages;
+			pr_debug("%s ep %p existing pid %p ref %d\n", 
+				__func__, ep, info->pid, info->ref_count);
+			goto unlock;
+		}
+	}
+	spin_unlock(&ep->lock);
+
+	/* A new task is mapping this window. Create a new entry */
+	if (!(info = kzalloc(sizeof(*info), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto done;
+	}
+
+	info->pid = get_pid(task_tgid(current));
+	info->ref_count = nr_pages;
+	pr_debug("%s ep %p new pid %p ref %d\n", 
+		__func__, ep, info->pid, info->ref_count);
+	spin_lock(&ep->lock);
+	list_add_tail(&info->list_member, &ep->rma_info.task_list);
+unlock:
+	spin_unlock(&ep->lock);
+done:
+	return err;
+}
+
+/*
+ * micscif_rma_put_task:
+ *
+ * Bump down the number of remote mappings. if the ref count for this
+ * particular task drops to zero then remove the rma_task_info from
+ * the epd task list.
+ */
+void micscif_rma_put_task(struct endpt *ep, int nr_pages)
+{
+	struct list_head *item;
+	struct rma_task_info *info;
+
+	spin_lock(&ep->lock);
+	list_for_each(item, &ep->rma_info.task_list) {
+		info = list_entry(item, struct rma_task_info, list_member);
+		if (info->pid == task_tgid(current)) {
+			info->ref_count -= nr_pages;
+			pr_debug("%s ep %p pid %p ref %d\n", 
+				__func__, ep, info->pid, info->ref_count);
+			if (!info->ref_count) {
+				list_del(&info->list_member);
+				put_pid(info->pid);
+				kfree(info);
+			}
+			goto done;
+		}
+	}
+	/* Why was the task not found? This is a bug. */
+	WARN_ON(1);
+done:
+	spin_unlock(&ep->lock);
+	return;
+}
+
+/* Only debug API's below */
+void micscif_display_window(struct reg_range_t *window, const char *s, int line)
+{
+	int j;
+
+	printk("%s %d window %p type %d temp %d offset 0x%llx"
+		" nr_pages 0x%llx nr_contig_chunks 0x%llx"
+		" prot %d ref_count %d magic 0x%llx peer_window 0x%llx"
+		" unreg_state 0x%x va_for_temp %p\n", 
+		s, line, window, window->type, window->temp, 
+		window->offset, window->nr_pages, window->nr_contig_chunks, 
+		window->prot, window->ref_count, window->magic, 
+		window->peer_window, window->unreg_state, window->va_for_temp);
+
+	for (j = 0; j < window->nr_contig_chunks; j++)
+		pr_debug("page[%d] = dma_addr 0x%llx num_pages 0x%x\n", 
+			j, 
+			window->dma_addr[j], 
+			window->num_pages[j]);
+
+	if (RMA_WINDOW_SELF == window->type && window->pinned_pages)
+		for (j = 0; j < window->nr_pages; j++)
+			pr_debug("page[%d] = pinned_pages %p address %p\n", 
+				j, window->pinned_pages->pages[j], 
+				page_address(window->pinned_pages->pages[j]));
+
+#ifdef CONFIG_ML1OM
+	if (window->temp_phys_addr)
+		for (j = 0; j < window->nr_contig_chunks; j++)
+			pr_debug("page[%d] = temp_phys_addr 0x%llx\n", 
+				j, window->temp_phys_addr[j]);
+	if (window->phys_addr)
+		for (j = 0; j < window->nr_pages; j++)
+			pr_debug("page[%d] = phys_addr 0x%llx\n", 
+				j, window->phys_addr[j]);
+#endif
+	RMA_MAGIC(window);
+}
diff --git a/micscif/micscif_rma_dma.c b/micscif/micscif_rma_dma.c
new file mode 100644
index 0000000..9fafc4c
--- /dev/null
+++ b/micscif/micscif_rma_dma.c
@@ -0,0 +1,982 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic/micscif.h"
+#include "mic/micscif_smpt.h"
+#include "mic/mic_dma_api.h"
+#include "mic/micscif_kmem_cache.h"
+#include "mic/micscif_rma.h"
+#include "mic/micscif_rma_list.h"
+#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
+#include <linux/sched.h>
+#endif
+#include <linux/highmem.h>
+#ifndef _MIC_SCIF_
+#include "mic_common.h"
+#endif
+
+static __always_inline
+void *get_local_va(off_t off, struct reg_range_t *window, size_t len)
+{
+	uint64_t page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+	void *va;
+
+	if (RMA_WINDOW_SELF == window->type) {
+		struct page **pages = window->pinned_pages->pages;
+		va = (void *)((uint64_t)
+			(page_address(pages[page_nr])) | page_off);
+	} else {
+		dma_addr_t phys =	micscif_get_dma_addr(window, off, NULL, NULL, NULL);
+#ifdef CONFIG_ML1OM
+		if (RMA_ERROR_CODE == phys)
+			return NULL;
+#endif
+		va = (void *)((uint64_t) (phys_to_virt(phys)));
+	}
+	return va;
+}
+
+#ifdef _MIC_SCIF_
+static __always_inline
+void *ioremap_remote(off_t off, struct reg_range_t *window,
+				size_t len, bool loopback, struct micscif_dev *dev, int *index, uint64_t *start_off)
+{
+	void *ret;
+	dma_addr_t phys =	micscif_get_dma_addr(window, off, NULL, index, start_off);
+
+#ifdef CONFIG_ML1OM
+	if (RMA_ERROR_CODE == phys)
+		return NULL;
+#endif
+	if (!loopback)
+		ret = ioremap_nocache(phys, len);
+	else
+		ret = (void *)((uint64_t)phys_to_virt(phys));
+	return ret;
+}
+
+static __always_inline
+void *ioremap_remote_gtt(off_t off, struct reg_range_t *window,
+	size_t len, bool loopback, struct micscif_dev *dev, int ch_num, struct mic_copy_work *work)
+{
+	return ioremap_remote(off, window, len, loopback, dev, NULL, NULL);
+}
+#else
+static __always_inline
+void *ioremap_remote_gtt(off_t off, struct reg_range_t *window,
+	size_t len, bool loopback, struct micscif_dev *dev, int ch_num, struct mic_copy_work *work)
+{
+	void *ret;
+	uint64_t page_nr = (off - window->offset) >> PAGE_SHIFT;
+	off_t page_off = off & ~PAGE_MASK;
+	if (!loopback) {
+		dma_addr_t phys =	micscif_get_dma_addr(window, off, NULL, NULL, NULL);
+		/* Ideally there should be a helper to do the +/-1 */
+		ret = get_per_dev_ctx(dev->sd_node - 1)->aper.va + phys;
+	} else {
+		struct page **pages = ((struct reg_range_t *)
+			(window->peer_window))->pinned_pages->pages;
+		ret = (void *)((uint64_t)phys_to_virt(page_to_phys(pages[page_nr]))
+				| page_off);
+	}
+	return ret;
+}
+
+static __always_inline
+void *ioremap_remote(off_t off, struct reg_range_t *window,
+				size_t len, bool loopback, struct micscif_dev *dev, int *index, uint64_t *start_off)
+{
+	void *ret;
+	int page_nr = (int)((off - window->offset) >> PAGE_SHIFT);
+	off_t page_off = off & ~PAGE_MASK;
+
+	if (!loopback) {
+		dma_addr_t phys;
+		mic_ctx_t *mic_ctx = get_per_dev_ctx(dev->sd_node - 1);
+		phys = micscif_get_dma_addr(window, off, NULL, index, start_off);
+		ret = mic_ctx->aper.va + phys;
+	} else {
+		struct page **pages = ((struct reg_range_t *)
+			(window->peer_window))->pinned_pages->pages;
+		ret = (void *)((uint64_t)phys_to_virt(page_to_phys(pages[page_nr]))
+				| page_off);
+	}
+	return ret;
+}
+#endif
+
+static __always_inline void
+iounmap_remote(void *virt, size_t size, struct mic_copy_work *work)
+{
+#ifdef _MIC_SCIF_
+	if (!work->loopback)
+		iounmap(virt);
+#endif
+}
+
+/*
+ * Takes care of ordering issue caused by
+ * 1. Hardware:  Only in the case of cpu copy from host to card because of WC memory.
+ * 2. Software: If memcpy reorders copy instructions for optimization. This could happen
+ * at both host and card.
+ */
+static inline void ordered_memcpy(volatile char *dst,
+		const char *src, size_t count)
+{
+	if (!count)
+		return;
+
+	memcpy_toio(dst, src, --count);
+	wmb();
+	*(dst + count) = *(src + count);
+}
+
+static inline void micscif_unaligned_memcpy(volatile char *dst,
+		const char *src, size_t count, bool ordered)
+{
+	if (unlikely(ordered))
+		ordered_memcpy(dst, src, count);
+	else
+		memcpy_toio(dst, src, count);
+}
+
+/*
+ * Copy between rma window and temporary buffer
+ */
+void micscif_rma_local_cpu_copy(uint64_t offset, struct reg_range_t *window, uint8_t *temp, size_t remaining_len, bool to_temp)
+{
+	void *window_virt;
+	size_t loop_len;
+	int offset_in_page;
+	uint64_t end_offset;
+	struct list_head *item;
+
+	BUG_ON(RMA_WINDOW_SELF != window->type);
+
+	offset_in_page = offset & ~PAGE_MASK;
+	loop_len = PAGE_SIZE - offset_in_page;
+
+	if (remaining_len < loop_len)
+		loop_len = remaining_len;
+
+	if (!(window_virt = get_local_va(offset, window, loop_len)))
+		return;
+	if (to_temp)
+		memcpy(temp, window_virt, loop_len);
+	else
+		memcpy(window_virt, temp, loop_len);
+
+	offset		+= loop_len;
+	temp		+= loop_len;
+	remaining_len	-= loop_len;
+
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+	while (remaining_len) {
+		if (offset == end_offset) {
+			item = (
+				&window->list_member)->next;
+			window = list_entry(item, 
+					struct reg_range_t, 
+					list_member);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+
+		loop_len = min(PAGE_SIZE, remaining_len);
+
+		if (!(window_virt = get_local_va(offset, window, loop_len)))
+			return;
+
+		if (to_temp)
+			memcpy(temp, window_virt, loop_len);
+		else
+			memcpy(window_virt, temp, loop_len);
+
+		offset	+= loop_len;
+		temp		+= loop_len;
+		remaining_len	-= loop_len;
+	}
+}
+
+/*
+ * Comment this
+ *
+ */
+static int micscif_rma_list_dma_copy_unaligned(struct mic_copy_work *work, uint8_t *temp, struct dma_channel *chan, bool src_local)
+{
+	struct dma_completion_cb *comp_cb = work->comp_cb;
+	dma_addr_t window_dma_addr, temp_dma_addr;
+#ifndef _MIC_SCIF_
+	dma_addr_t temp_phys = comp_cb->temp_phys;
+#endif
+	size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
+	int offset_in_page;
+	uint64_t end_offset = 0, offset = 0;
+	struct reg_range_t *window = NULL;
+	struct list_head *item = NULL;
+	int ret = 0;
+	void *window_virt_addr = NULL;
+	size_t tail_len = 0;
+
+	if (src_local) {
+		offset = work->dst_offset;
+		window = work->dst_window;
+	} else {
+		offset = work->src_offset;
+		window = work->src_window;
+	}
+
+	offset_in_page	= offset & (L1_CACHE_BYTES - 1);
+	if (offset_in_page) {
+		loop_len = L1_CACHE_BYTES - offset_in_page;
+		loop_len = min(loop_len, remaining_len);
+
+		if (!(window_virt_addr = ioremap_remote_gtt(offset, window, loop_len,
+				work->loopback, work->remote_dev,
+				get_chan_num(chan), work)))
+			return -ENOMEM;
+
+		if (src_local) {
+			micscif_unaligned_memcpy(window_virt_addr, temp, loop_len, work->ordered &&
+							 !(remaining_len - loop_len));
+			serializing_request(window_virt_addr);
+		} else {
+			memcpy_fromio(temp, window_virt_addr, loop_len);
+			serializing_request(temp);
+		}
+#ifdef RMA_DEBUG
+		atomic_long_add_return(loop_len, &ms_info.rma_unaligned_cpu_cnt);
+#endif
+		smp_mb();
+		iounmap_remote(window_virt_addr, loop_len, work);
+
+		offset += loop_len;
+		temp += loop_len;
+#ifndef _MIC_SCIF_
+		temp_phys += loop_len;
+#endif
+		remaining_len -= loop_len;
+	}
+
+	offset_in_page = offset & ~PAGE_MASK;
+	end_offset = window->offset +
+		(window->nr_pages << PAGE_SHIFT);
+
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (offset == end_offset) {
+			item = (&window->list_member)->next;
+			window = list_entry(item, 
+					struct reg_range_t, 
+					list_member);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+#ifndef _MIC_SCIF_
+		temp_dma_addr = temp_phys;
+#else
+		temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
+#endif
+		window_dma_addr = micscif_get_dma_addr(window, offset, &nr_contig_bytes, NULL, NULL);
+
+#ifdef CONFIG_ML1OM
+		if (RMA_ERROR_CODE == window_dma_addr)
+			return -ENXIO;
+#endif
+		loop_len = min(nr_contig_bytes, remaining_len);
+
+		if (src_local) {
+			if (unlikely(work->ordered && !tail_len &&
+				!(remaining_len - loop_len) &&
+				loop_len != L1_CACHE_BYTES)) {
+				/*
+				 * Break up the last chunk of the transfer into two steps
+				 * if there is no tail to gurantee DMA ordering.
+				 * Passing DO_DMA_POLLING inserts a status update descriptor
+				 * in step 1 which acts as a double sided synchronization
+				 * fence for the DMA engine to ensure that the last cache line
+				 * in step 2 is updated last.
+				 */
+				/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+				ret = do_dma(chan, DO_DMA_POLLING, temp_dma_addr, window_dma_addr,
+						loop_len - L1_CACHE_BYTES, NULL);
+				if (ret < 0) {
+					printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", 
+							__func__, __LINE__, ret);
+					return ret;
+				}
+				offset += (loop_len - L1_CACHE_BYTES);
+				temp_dma_addr += (loop_len - L1_CACHE_BYTES);
+				window_dma_addr += (loop_len - L1_CACHE_BYTES);
+				remaining_len -= (loop_len - L1_CACHE_BYTES);
+				loop_len = remaining_len;
+
+				/* Step 2) DMA: L1_CACHE_BYTES */
+				ret = do_dma(chan, 0, temp_dma_addr, window_dma_addr,
+						loop_len, NULL);
+				if (ret < 0) {
+					printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", 
+							__func__, __LINE__, ret);
+					return ret;
+				}
+			} else {
+				int flags = 0;
+				if (remaining_len == loop_len + L1_CACHE_BYTES)
+					flags = DO_DMA_POLLING;
+				ret = do_dma(chan, flags, temp_dma_addr, window_dma_addr,
+						loop_len, NULL);
+			}
+		} else {
+			ret = do_dma(chan, 0, window_dma_addr, temp_dma_addr,
+					loop_len, NULL);
+		}
+		if (ret < 0) {
+			printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", 
+					__func__, __LINE__, ret);
+			return ret;
+		}
+		offset += loop_len;
+		temp += loop_len;
+#ifndef _MIC_SCIF_
+		temp_phys += loop_len;
+#endif
+		remaining_len -= loop_len;
+		offset_in_page = 0;
+	}
+	if (tail_len) {
+		if (offset == end_offset) {
+			item = (&window->list_member)->next;
+			window = list_entry(item, 
+					struct reg_range_t, 
+					list_member);
+			end_offset = window->offset +
+				(window->nr_pages << PAGE_SHIFT);
+		}
+		if (!(window_virt_addr = ioremap_remote_gtt(offset, window, tail_len,
+					work->loopback, work->remote_dev,
+					get_chan_num(chan), work)))
+			return -ENOMEM;
+
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once previous
+		 * DMA transfers for this endpoint have completed to guarantee
+		 * ordering.
+		 */
+		if (unlikely(work->ordered)) {
+			free_dma_channel(chan);
+			work->dma_chan_released = true;
+			if ((ret = drain_dma_intr(chan)))
+				return ret;
+		}
+
+		if (src_local) {
+			micscif_unaligned_memcpy(window_virt_addr, temp, tail_len, work->ordered);
+			serializing_request(window_virt_addr);
+		} else {
+			memcpy_fromio(temp, window_virt_addr, tail_len);
+			serializing_request(temp);
+		}
+#ifdef RMA_DEBUG
+		atomic_long_add_return(tail_len, &ms_info.rma_unaligned_cpu_cnt);
+#endif
+		smp_mb();
+		iounmap_remote(window_virt_addr, tail_len, work);
+	}
+	if (work->dma_chan_released) {
+		if ((ret = request_dma_channel(chan)))
+			return ret;
+		/* Callee frees the DMA channel lock, if it is held */
+		work->dma_chan_released = false;
+	}
+	ret = do_dma(chan, DO_DMA_INTR, 0, 0, 0, comp_cb);
+	if (ret < 0) {
+		printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", 
+				__func__, __LINE__, ret);
+		return ret;
+	}
+	return 0;
+}
+
+static inline bool is_local_dma_addr(uint64_t addr)
+{
+#ifdef _MIC_SCIF_
+	return (addr >> PAGE_SHIFT < num_physpages);
+#else
+	return is_syspa(addr);
+#endif
+}
+
+/*
+ * micscif_rma_list_dma_copy_aligned:
+ *
+ * Traverse all the windows and perform DMA copy.
+ */
+static int micscif_rma_list_dma_copy_aligned(struct mic_copy_work *work, struct dma_channel *chan)
+{
+	dma_addr_t src_dma_addr, dst_dma_addr;
+	size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0, dst_contig_bytes = 0;
+	int src_cache_off, dst_cache_off, src_last_index = 0, dst_last_index = 0;
+	uint64_t end_src_offset, end_dst_offset;
+	void *src_virt, *dst_virt;
+	struct reg_range_t *src_window = work->src_window;
+	struct reg_range_t *dst_window = work->dst_window;
+	uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset;
+	uint64_t src_start_offset = src_window->offset, dst_start_offset = dst_window->offset;
+	struct list_head *item;
+	int ret = 0;
+
+	remaining_len = work->len;
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
+	if (src_cache_off != dst_cache_off) {
+		BUG_ON(1);
+	} else if (src_cache_off != 0) {
+		/* Head */
+		loop_len = L1_CACHE_BYTES - src_cache_off;
+		loop_len = min(loop_len, remaining_len);
+		src_dma_addr = micscif_get_dma_addr(src_window, src_offset, NULL, NULL, NULL);
+		dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, NULL, NULL, NULL);
+#ifdef CONFIG_ML1OM
+		if (RMA_ERROR_CODE == src_dma_addr)
+			return -ENXIO;
+		if (RMA_ERROR_CODE == dst_dma_addr)
+			return -ENXIO;
+		get_window_ref_count(src_window, 1);
+		get_window_ref_count(dst_window, 1);
+#endif
+		if (is_local_dma_addr(src_dma_addr))
+			src_virt = get_local_va(src_offset, src_window, loop_len);
+		else
+			src_virt = ioremap_remote_gtt(src_offset, src_window,
+					loop_len, work->loopback,
+					work->remote_dev, get_chan_num(chan), work);
+		if (!src_virt) {
+#ifdef CONFIG_ML1OM
+			put_window_ref_count(src_window, 1);
+			put_window_ref_count(dst_window, 1);
+#endif
+			return -ENOMEM;
+		}
+		if (is_local_dma_addr(dst_dma_addr))
+			dst_virt = get_local_va(dst_offset, dst_window, loop_len);
+		else
+			dst_virt = ioremap_remote_gtt(dst_offset, dst_window,
+						loop_len, work->loopback,
+						work->remote_dev, get_chan_num(chan), work);
+#ifdef CONFIG_ML1OM
+		put_window_ref_count(src_window, 1);
+		put_window_ref_count(dst_window, 1);
+#endif
+		if (!dst_virt) {
+			if (!is_local_dma_addr(src_dma_addr))
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+		if (is_local_dma_addr(src_dma_addr)){
+			micscif_unaligned_memcpy(dst_virt, src_virt, loop_len,
+					remaining_len == loop_len ? work->ordered : false);
+		}
+		else{
+			memcpy_fromio(dst_virt, src_virt, loop_len);
+		}
+		serializing_request(dst_virt);
+		smp_mb();
+		if (!is_local_dma_addr(src_dma_addr))
+			iounmap_remote(src_virt, loop_len, work);
+		if (!is_local_dma_addr(dst_dma_addr))
+			iounmap_remote(dst_virt, loop_len, work);
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+
+	end_src_offset = src_window->offset +
+		(src_window->nr_pages << PAGE_SHIFT);
+	end_dst_offset = dst_window->offset +
+		(dst_window->nr_pages << PAGE_SHIFT);
+	tail_len = remaining_len & (L1_CACHE_BYTES - 1);
+	remaining_len -= tail_len;
+	while (remaining_len) {
+		if (src_offset == end_src_offset) {
+			item = (&src_window->list_member)->next;
+			src_window = list_entry(item, 
+					struct reg_range_t, 
+					list_member);
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			src_last_index = 0;
+			src_start_offset = src_window->offset;
+		}
+		if (dst_offset == end_dst_offset) {
+			item = (&dst_window->list_member)->next;
+			dst_window = list_entry(item, struct reg_range_t, list_member);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			dst_last_index = 0;
+			dst_start_offset = dst_window->offset;
+		}
+
+		/* compute dma addresses for transfer */
+		src_dma_addr = micscif_get_dma_addr(src_window, src_offset, &src_contig_bytes, &src_last_index, &src_start_offset);
+		dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, &dst_contig_bytes, &dst_last_index, &dst_start_offset);
+#ifdef CONFIG_ML1OM
+		if (RMA_ERROR_CODE == src_dma_addr)
+			return -ENXIO;
+		if (RMA_ERROR_CODE == dst_dma_addr)
+			return -ENXIO;
+#endif
+		loop_len = min(src_contig_bytes, dst_contig_bytes);
+		loop_len = min(loop_len, remaining_len);
+		if (unlikely(work->ordered && !tail_len &&
+			!(remaining_len - loop_len) &&
+			loop_len != L1_CACHE_BYTES)) {
+			/*
+			 * Break up the last chunk of the transfer into two steps
+			 * if there is no tail to gurantee DMA ordering.
+			 * Passing DO_DMA_POLLING inserts a status update descriptor
+			 * in step 1 which acts as a double sided synchronization
+			 * fence for the DMA engine to ensure that the last cache line
+			 * in step 2 is updated last.
+			 */
+			/* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
+			ret = do_dma(chan, DO_DMA_POLLING, src_dma_addr, dst_dma_addr,
+					loop_len - L1_CACHE_BYTES, NULL);
+			if (ret < 0) {
+				printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", 
+						__func__, __LINE__, ret);
+				return ret;
+			}
+			src_offset += (loop_len - L1_CACHE_BYTES);
+			dst_offset += (loop_len - L1_CACHE_BYTES);
+			src_dma_addr += (loop_len - L1_CACHE_BYTES);
+			dst_dma_addr += (loop_len - L1_CACHE_BYTES);
+			remaining_len -= (loop_len - L1_CACHE_BYTES);
+			loop_len = remaining_len;
+
+			/* Step 2) DMA: L1_CACHE_BYTES */
+			ret = do_dma(chan, 0, src_dma_addr, dst_dma_addr,
+				loop_len, NULL);
+			if (ret < 0) {
+				printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", 
+					__func__, __LINE__, ret);
+				return ret;
+			}
+		} else {
+			int flags = 0;
+			if (remaining_len == loop_len + L1_CACHE_BYTES)
+				flags = DO_DMA_POLLING;
+			ret = do_dma(chan, flags, src_dma_addr, dst_dma_addr,
+					loop_len, NULL);
+			if (ret < 0) {
+				printk(KERN_ERR "%s %d Desc Prog Failed ret %d\n", 
+						__func__, __LINE__, ret);
+				return ret;
+			}
+		}
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+	}
+#ifdef CONFIG_MK1OM
+	BUG_ON(remaining_len != 0);
+#endif
+#ifdef CONFIG_ML1OM
+	if (remaining_len)
+		return - ENXIO;
+#endif
+	remaining_len = tail_len;
+	if (remaining_len) {
+		loop_len = remaining_len;
+		if (src_offset == end_src_offset) {
+			item = (&src_window->list_member)->next;
+			src_window = list_entry(item, 
+					struct reg_range_t, 
+					list_member);
+		}
+		if (dst_offset == end_dst_offset) {
+			item = (&dst_window->list_member)->next;
+			dst_window = list_entry(item, struct reg_range_t, list_member);
+		}
+
+        src_dma_addr = micscif_get_dma_addr(src_window, src_offset, NULL, NULL, NULL);
+		dst_dma_addr = micscif_get_dma_addr(dst_window, dst_offset, NULL, NULL, NULL);
+#ifdef CONFIG_ML1OM
+		if (RMA_ERROR_CODE == src_dma_addr)
+			return -ENXIO;
+		if (RMA_ERROR_CODE == dst_dma_addr)
+			return -ENXIO;
+#endif
+		/*
+		 * The CPU copy for the tail bytes must be initiated only once previous
+		 * DMA transfers for this endpoint have completed to guarantee
+		 * ordering.
+		 */
+		if (unlikely(work->ordered)) {
+			free_dma_channel(chan);
+			work->dma_chan_released = true;
+			if ((ret = drain_dma_poll(chan)))
+				return ret;
+		}
+#ifdef CONFIG_ML1OM
+		get_window_ref_count(src_window, 1);
+		get_window_ref_count(dst_window, 1);
+#endif
+		if (is_local_dma_addr(src_dma_addr))
+			src_virt = get_local_va(src_offset, src_window, loop_len);
+		else
+			src_virt = ioremap_remote_gtt(src_offset, src_window,
+						loop_len, work->loopback,
+						work->remote_dev, get_chan_num(chan), work);
+		if (!src_virt) {
+#ifdef CONFIG_ML1OM
+			put_window_ref_count(src_window, 1);
+			put_window_ref_count(dst_window, 1);
+#endif
+			return -ENOMEM;
+		}
+
+		if (is_local_dma_addr(dst_dma_addr))
+			dst_virt = get_local_va(dst_offset, dst_window, loop_len);
+		else
+			dst_virt = ioremap_remote_gtt(dst_offset, dst_window,
+						loop_len, work->loopback,
+						work->remote_dev, get_chan_num(chan), work);
+#ifdef CONFIG_ML1OM
+		put_window_ref_count(src_window, 1);
+		put_window_ref_count(dst_window, 1);
+#endif
+		if (!dst_virt) {
+			if (!is_local_dma_addr(src_dma_addr))
+				iounmap_remote(src_virt, loop_len, work);
+			return -ENOMEM;
+		}
+
+		if (is_local_dma_addr(src_dma_addr)){
+			micscif_unaligned_memcpy(dst_virt, src_virt, loop_len, work->ordered);
+		}
+		else{
+			memcpy_fromio(dst_virt, src_virt, loop_len);
+		}	
+		serializing_request(dst_virt);
+		smp_mb();
+		if (!is_local_dma_addr(src_dma_addr))
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (!is_local_dma_addr(dst_dma_addr))
+			iounmap_remote(dst_virt, loop_len, work);
+
+		remaining_len -= loop_len;
+#ifdef CONFIG_MK1OM
+		BUG_ON(remaining_len != 0);
+#endif
+#ifdef CONFIG_ML1OM
+		if (remaining_len)
+			return - ENXIO;
+#endif
+	}
+
+	return ret;
+}
+
+int micscif_rma_list_dma_copy_wrapper(struct endpt *epd, struct mic_copy_work *work, struct dma_channel *chan, off_t loffset)
+{
+	int src_cache_off, dst_cache_off;
+	uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset;
+	uint8_t *temp = NULL;
+	bool src_local = true, dst_local = false;
+	struct dma_completion_cb *comp_cb;
+	dma_addr_t src_dma_addr, dst_dma_addr;
+#ifndef _MIC_SCIF_
+	struct pci_dev *pdev;
+#endif
+
+	src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
+	dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
+	if (dst_cache_off == src_cache_off)
+		return micscif_rma_list_dma_copy_aligned(work, chan);
+
+	if (work->loopback) {
+#ifdef _MIC_SCIF_
+		BUG_ON(micscif_rma_list_cpu_copy(work));
+		return 0;
+#else
+		BUG_ON(1);
+#endif
+	}
+
+	src_dma_addr = micscif_get_dma_addr(work->src_window, src_offset, NULL, NULL, NULL);
+	dst_dma_addr = micscif_get_dma_addr(work->dst_window, dst_offset, NULL, NULL, NULL);
+
+	if (is_local_dma_addr(src_dma_addr))
+		src_local = true;
+	else
+		src_local = false;
+
+	if (is_local_dma_addr(dst_dma_addr))
+		dst_local = true;
+	else
+		dst_local = false;
+
+	dst_local = dst_local;
+	BUG_ON(work->len + (L1_CACHE_BYTES << 1) > KMEM_UNALIGNED_BUF_SIZE);
+
+	/* Allocate dma_completion cb */
+	if (!(comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL)))
+		goto error;
+
+	work->comp_cb = comp_cb;
+	comp_cb->cb_cookie = (uint64_t)comp_cb;
+	comp_cb->dma_completion_func = &micscif_rma_completion_cb;
+
+	if (work->len + (L1_CACHE_BYTES << 1) < KMEM_UNALIGNED_BUF_SIZE) {
+		comp_cb->is_cache = false;
+		if (!(temp = kmalloc(work->len + (L1_CACHE_BYTES << 1), GFP_KERNEL)))
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+		/* kmalloc(..) does not guarantee cache line alignment */
+		if ((uint64_t)temp & (L1_CACHE_BYTES - 1))
+			temp = (uint8_t*)ALIGN((uint64_t)temp, L1_CACHE_BYTES);
+	} else {
+		comp_cb->is_cache = true;
+		if (!(temp = micscif_kmem_cache_alloc()))
+			goto free_comp_cb;
+		comp_cb->temp_buf_to_free = temp;
+	}
+
+	if (src_local) {
+		temp += dst_cache_off;
+		comp_cb->tmp_offset = dst_cache_off;
+		micscif_rma_local_cpu_copy(work->src_offset, work->src_window, temp, work->len, true);
+	} else {
+		comp_cb->dst_window = work->dst_window;
+		comp_cb->dst_offset = work->dst_offset;
+		work->src_offset = work->src_offset - src_cache_off;
+		comp_cb->len = work->len;
+		work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
+		comp_cb->header_padding = src_cache_off;
+	}
+	comp_cb->temp_buf = temp;
+
+#ifndef _MIC_SCIF_
+	micscif_pci_dev(work->remote_dev->sd_node, &pdev);
+	comp_cb->temp_phys = mic_map_single(work->remote_dev->sd_node - 1,
+			pdev, temp, KMEM_UNALIGNED_BUF_SIZE);
+
+	if (mic_map_error(comp_cb->temp_phys)) {
+		goto free_temp_buf;
+	}
+
+	comp_cb->remote_node = work->remote_dev->sd_node;
+#endif
+	if (0 > micscif_rma_list_dma_copy_unaligned(work, temp, chan, src_local))
+		goto free_temp_buf;
+	if (!src_local)
+		work->fence_type = DO_DMA_INTR;
+	return 0;
+free_temp_buf:
+	if (comp_cb->is_cache)
+		micscif_kmem_cache_free(comp_cb->temp_buf_to_free);
+	else
+		kfree(comp_cb->temp_buf_to_free);
+free_comp_cb:
+	kfree(comp_cb);
+error:
+	printk(KERN_ERR "Unable to malloc %s %d\n", __func__, __LINE__);
+	return -ENOMEM;
+}
+
+#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
+static int softlockup_threshold = 60;
+static void avert_softlockup(unsigned long data)
+{
+	*(unsigned long*)data = 1;
+}
+
+/*
+ * Add a timer to handle the case of hogging the cpu for
+ * time > softlockup_threshold.
+ * Add the timer every softlockup_threshold / 3 so that even if
+ * there is a huge delay in running our timer, we will still don't hit
+ * the softlockup case.(softlockup_tick() is run in hardirq() context while
+ * timers are run at softirq context)
+ *
+ */
+static inline void add_softlockup_timer(struct timer_list *timer, unsigned long *data)
+{
+	setup_timer(timer, avert_softlockup, (unsigned long) data);
+	timer->expires = jiffies + usecs_to_jiffies(softlockup_threshold * 1000000 / 3);
+	add_timer(timer);
+}
+
+static inline void del_softlockup_timer(struct timer_list *timer)
+{
+	/* We need delete synchronously since the variable being touched by
+	 * timer interrupt is on the stack
+	 */
+	del_timer_sync(timer);
+}
+#endif
+
+/*
+ * micscif_rma_list_cpu_copy:
+ *
+ * Traverse all the windows and perform CPU copy.
+ */
+int micscif_rma_list_cpu_copy(struct mic_copy_work *work)
+{
+	void *src_virt, *dst_virt;
+	size_t loop_len, remaining_len;
+	int src_cache_off, dst_cache_off;
+	uint64_t src_offset = work->src_offset, dst_offset = work->dst_offset;
+	struct reg_range_t *src_window = work->src_window;
+	struct reg_range_t *dst_window = work->dst_window;
+	uint64_t end_src_offset, end_dst_offset;
+	struct list_head *item;
+    int srcchunk_ind = 0;
+    int dstchunk_ind = 0;
+    uint64_t src_start_offset, dst_start_offset;
+	int ret = 0;
+#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
+	unsigned long timer_fired = 0;
+	struct timer_list timer;
+	int cpu = smp_processor_id();
+	add_softlockup_timer(&timer, &timer_fired);
+#endif
+
+	remaining_len = work->len;
+    src_start_offset = src_window->offset;
+    dst_start_offset = dst_window->offset;
+
+	while (remaining_len) {
+#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
+		/* Ideally we should call schedule only if we didn't sleep
+		 * in between. But there is no way to know that.
+		 */
+		if (timer_fired) {
+			timer_fired = 0;
+			if (smp_processor_id() == cpu)
+				touch_softlockup_watchdog();
+			else
+				cpu = smp_processor_id();
+			add_softlockup_timer(&timer, &timer_fired);
+		}
+#endif
+		src_cache_off = src_offset & ~PAGE_MASK;
+		dst_cache_off = dst_offset & ~PAGE_MASK;
+		loop_len = PAGE_SIZE -
+			((src_cache_off > dst_cache_off) ?
+			src_cache_off : dst_cache_off);
+		if (remaining_len < loop_len)
+			loop_len = remaining_len;
+
+		if (RMA_WINDOW_SELF == src_window->type)
+			src_virt = get_local_va(src_offset, src_window, loop_len);
+		else
+			src_virt = ioremap_remote(src_offset,
+					src_window, loop_len, work->loopback, work->remote_dev, &srcchunk_ind, &src_start_offset);
+		if (!src_virt) {
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (RMA_WINDOW_SELF == dst_window->type)
+			dst_virt = get_local_va(dst_offset, dst_window, loop_len);
+		else
+			dst_virt = ioremap_remote(dst_offset,
+					dst_window, loop_len, work->loopback, work->remote_dev, &dstchunk_ind, &dst_start_offset);
+		if (!dst_virt) {
+			if (RMA_WINDOW_PEER == src_window->type)
+				iounmap_remote(src_virt, loop_len, work);
+			ret = -ENOMEM;
+			goto error;
+		}
+
+		if (work->loopback)
+			memcpy(dst_virt, src_virt, loop_len);
+		else {
+
+			if (RMA_WINDOW_SELF == src_window->type){
+				memcpy_toio(dst_virt, src_virt, loop_len);
+			}
+			else{
+				memcpy_fromio(dst_virt, src_virt, loop_len);
+			}
+			serializing_request(dst_virt);
+			smp_mb();
+		}
+		if (RMA_WINDOW_PEER == src_window->type)
+			iounmap_remote(src_virt, loop_len, work);
+
+		if (RMA_WINDOW_PEER == dst_window->type)
+			iounmap_remote(dst_virt, loop_len, work);
+
+		src_offset += loop_len;
+		dst_offset += loop_len;
+		remaining_len -= loop_len;
+		if (remaining_len) {
+			end_src_offset = src_window->offset +
+				(src_window->nr_pages << PAGE_SHIFT);
+			end_dst_offset = dst_window->offset +
+				(dst_window->nr_pages << PAGE_SHIFT);
+			if (src_offset == end_src_offset) {
+				item = (
+					&src_window->list_member)->next;
+				src_window = list_entry(item, 
+						struct reg_range_t, 
+						list_member);
+                srcchunk_ind = 0;
+                src_start_offset = src_window->offset;
+			}
+			if (dst_offset == end_dst_offset) {
+				item = (
+						&dst_window->list_member)->next;
+				dst_window = list_entry(item, 
+						struct reg_range_t, 
+						list_member);
+                dstchunk_ind = 0;
+                dst_start_offset = dst_window->offset;
+			}
+		}
+	}
+error:
+#if !defined(WINDOWS) && !defined(CONFIG_PREEMPT)
+	del_softlockup_timer(&timer);
+#endif
+	return ret;
+}
diff --git a/micscif/micscif_rma_list.c b/micscif/micscif_rma_list.c
new file mode 100644
index 0000000..9052c1f
--- /dev/null
+++ b/micscif/micscif_rma_list.c
@@ -0,0 +1,533 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include "mic/micscif.h"
+#include "mic/micscif_smpt.h"
+#include "mic/mic_dma_api.h"
+#include "mic/micscif_kmem_cache.h"
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#include <linux/highmem.h>
+#endif
+#ifndef _MIC_SCIF_
+#include "mic_common.h"
+#endif
+#include "mic/micscif_map.h"
+
+/*
+ * micscif_insert_tcw:
+ *
+ * Insert a temp window to the temp registration list sorted by va_for_temp.
+ * RMA lock must be held.
+ */
+void micscif_insert_tcw(struct reg_range_t *window,
+					struct list_head *head)
+{
+	struct reg_range_t *curr = NULL, *prev = NULL;
+	struct list_head *item;
+	BUG_ON(!window);
+	INIT_LIST_HEAD(&window->list_member);
+	/*
+	 * HSD 4845254
+	 * Hack for worst case performance
+	 * Compare with tail and if the new entry is new tail add it to the end
+	 */
+	if (!list_empty(head)) {
+		curr = list_entry(head->prev, struct reg_range_t, list_member);
+		if ((uint64_t) curr->va_for_temp < (uint64_t) window->va_for_temp) {
+			list_add_tail(&window->list_member, head);
+			return;
+		}
+	}
+	/*
+	 * We don't need the if(!prev) code but I am gonna leave it as
+	 * is for now. If someone touches the above code it is likely that they
+	 * will miss that they have to add if(!prev) block
+	 */
+	list_for_each(item, head) {
+		curr = list_entry(item, struct reg_range_t, list_member);
+		if ((uint64_t) curr->va_for_temp > (uint64_t) window->va_for_temp)
+			break;
+		prev = curr;
+	}
+	if (!prev)
+		list_add(&window->list_member, head);
+	else
+		list_add(&window->list_member, &prev->list_member);
+}
+/*
+ * micscif_insert_window:
+ *
+ * Insert a window to the self registration list sorted by offset.
+ * RMA lock must be held.
+ */
+void micscif_insert_window(struct reg_range_t *window, struct list_head *head)
+{
+	struct reg_range_t *curr = NULL, *prev = NULL;
+	struct list_head *item;
+	BUG_ON(!window);
+	INIT_LIST_HEAD(&window->list_member);
+	list_for_each(item, head) {
+		curr = list_entry(item, struct reg_range_t, list_member);
+		if (curr->offset > window->offset)
+			break;
+		prev = curr;
+	}
+	if (!prev)
+		list_add(&window->list_member, head);
+	else
+		list_add(&window->list_member, &prev->list_member);
+}
+
+/*
+ * micscif_query_tcw:
+ *
+ * Query the temp cached registration list of ep and check if a valid contiguous
+ * range of windows exist.
+ * If there is a partial overlap, delete the existing window and create a new one
+ * that encompasses the previous window and a new range
+ * RMA lock must be held.
+ */
+int micscif_query_tcw(struct endpt *ep, struct micscif_rma_req *req)
+{
+	struct list_head *item, *temp;
+	struct reg_range_t *window;
+	uint64_t start_va_window, start_va_req = (uint64_t) req->va_for_temp;
+	uint64_t end_va_window, end_va_req = start_va_req + req->nr_bytes;
+
+	/*
+	 * HSD 4845254
+	 * Hack for the worst case scenario
+	 * Avoid traversing the entire list to find out that there is no
+	 * entry that matches
+	 */
+	if (!list_empty(req->head)) {
+		temp = req->head->prev;
+		window = list_entry(temp, 
+			struct reg_range_t, list_member);
+		end_va_window = (uint64_t) window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		if (start_va_req > end_va_window)
+			return -ENXIO;
+	}
+	list_for_each_safe(item, temp, req->head) {
+		window = list_entry(item, 
+			struct reg_range_t, list_member);
+		start_va_window = (uint64_t) window->va_for_temp;
+		end_va_window = (uint64_t) window->va_for_temp +
+			(window->nr_pages << PAGE_SHIFT);
+		pr_debug("%s %d start_va_window 0x%llx end_va_window 0x%llx"
+			" start_va_req 0x%llx end_va_req 0x%llx req->nr_bytes 0x%lx\n", 
+			__func__, __LINE__, start_va_window, end_va_window, 
+			start_va_req, end_va_req, req->nr_bytes);
+		if (start_va_req < start_va_window) {
+			if (end_va_req < start_va_window) {
+				/* No overlap */
+			} else {
+				if ((window->prot & req->prot) != req->prot) {
+					
+				} else {
+					req->nr_bytes += ((end_va_req > end_va_window) ? 0:(end_va_window - end_va_req));
+					pr_debug("%s %d Extend req->va_for_temp %p req->nr_byte 0x%lx\n", 
+						__func__, __LINE__, req->va_for_temp, req->nr_bytes);
+				}
+				__micscif_rma_destroy_tcw_helper(window);
+			}
+			break;
+		} else {
+			if (start_va_req > end_va_window) {
+				/* No overlap */
+				continue;
+			} else {
+				if ((window->prot & req->prot) != req->prot) {
+					__micscif_rma_destroy_tcw_helper(window);
+					break;
+				}
+				if (end_va_req > end_va_window) {
+					req->va_for_temp = (void*) start_va_window;
+					req->nr_bytes = end_va_req - start_va_window;
+					pr_debug("%s %d Extend req->va_for_temp %p req->nr_byte 0x%lx\n", 
+						__func__, __LINE__, req->va_for_temp, req->nr_bytes);
+					__micscif_rma_destroy_tcw_helper(window);
+					return -ENXIO;
+				} else {
+					*(req->out_window) = window;
+					return 0;
+				}
+			}
+		}
+	}
+	pr_debug("%s %d ENXIO\n", __func__, __LINE__);
+	return -ENXIO;
+}
+
+/*
+ * micscif_query_window:
+ *
+ * Query the registration list and check if a valid contiguous
+ * range of windows exist.
+ * RMA lock must be held.
+ */
+int micscif_query_window(struct micscif_rma_req *req)
+{
+	struct list_head *item;
+	struct reg_range_t *window;
+	uint64_t end_offset, offset = req->offset;
+	uint64_t tmp_min, nr_bytes_left = req->nr_bytes;
+
+	list_for_each(item, req->head) {
+		window = list_entry(item, 
+			struct reg_range_t, list_member);
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		if (offset < window->offset)
+			/* Offset not found! */
+			return -ENXIO;
+		if (offset < end_offset) {
+			/* Check read/write protections. */
+			if ((window->prot & req->prot) != req->prot)
+				return -EPERM;
+			if (nr_bytes_left == req->nr_bytes)
+				/* Store the first window */
+				*(req->out_window) = window;
+			tmp_min = min(end_offset - offset, nr_bytes_left);
+			nr_bytes_left -= tmp_min;
+			offset += tmp_min;
+			/*
+			 * Range requested encompasses
+			 * multiple windows contiguously.
+			 */
+			if (!nr_bytes_left) {
+				/* Done for partial window */
+				if (req->type == WINDOW_PARTIAL ||
+					req->type == WINDOW_SINGLE)
+					return 0;
+				/* Extra logic for full windows */
+				if (offset == end_offset)
+					/* Spanning multiple whole windows */
+					return 0;
+				/* Not spanning multiple whole windows */
+				return -ENXIO;
+			}
+			if (req->type == WINDOW_SINGLE)
+				break;
+		}
+	}
+	printk(KERN_ERR "%s %d ENXIO\n", __func__, __LINE__);
+	return -ENXIO;
+}
+
+/*
+ * micscif_rma_list_mmap:
+ *
+ * Traverse the remote registration list starting from start_window:
+ * 1) Check read/write protections.
+ * 2) Create VtoP mappings via remap_pfn_range(..)
+ * 3) Once step 1) and 2) complete successfully then traverse the range of
+ *    windows again and bump the reference count.
+ * RMA lock must be held.
+ */
+int micscif_rma_list_mmap(struct reg_range_t *start_window,
+	uint64_t offset, int nr_pages, struct vm_area_struct *vma)
+{
+	struct list_head *item, *head;
+	uint64_t end_offset, loop_offset = offset;
+	struct reg_range_t *window;
+	int64_t start_page_nr, loop_nr_pages, nr_pages_left = nr_pages;
+	struct endpt *ep = (struct endpt *)start_window->ep;
+	int i, err = 0;
+	uint64_t j =0;
+	dma_addr_t phys_addr;
+
+	might_sleep();
+	BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock));
+
+	/* Start traversing from the previous link in the list */
+	head = ((&start_window->list_member))->prev;
+	list_for_each(item, head) {
+		window = list_entry(item, struct reg_range_t, 
+				list_member);
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		start_page_nr = (loop_offset - window->offset) >> PAGE_SHIFT;
+		loop_nr_pages = min((int64_t)((end_offset - loop_offset) >> PAGE_SHIFT),
+				nr_pages_left);
+		for (i = (int)start_page_nr;
+			i < ((int)start_page_nr + (int)loop_nr_pages); i++, j++) {
+
+			phys_addr =
+#if !defined(_MIC_SCIF_) && defined(CONFIG_ML1OM)
+			is_self_scifdev(ep->remote_dev) ?
+				micscif_get_dma_addr(window, loop_offset,
+				NULL, NULL, NULL) : window->phys_addr[i];
+#else
+			get_phys_addr(micscif_get_dma_addr(window, loop_offset,
+				NULL, NULL, NULL), ep->remote_dev);
+#endif
+			/*
+			 * Note:
+			 * 1) remap_pfn_rnage returns an error if there is an
+			 * attempt to create MAP_PRIVATE COW mappings.
+			 */
+			if ((err = remap_pfn_range(vma,
+				((vma)->vm_start) + (j * PAGE_SIZE),
+				phys_addr >> PAGE_SHIFT,
+				PAGE_SIZE,
+				((vma)->vm_page_prot))))
+				goto error;
+			loop_offset += PAGE_SIZE;
+		}
+		nr_pages_left -= loop_nr_pages;
+		if (!nr_pages_left)
+			break;
+	}
+	BUG_ON(nr_pages_left);
+	/*
+	 * No more failures expected. Bump up the ref count for all
+	 * the windows. Another traversal from start_window required
+	 * for handling errors encountered across windows during
+	 * remap_pfn_range(..).
+	 */
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	head = (&(start_window->list_member))->prev;
+	list_for_each(item, head) {
+		window = list_entry(item, struct reg_range_t, 
+				list_member);
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		start_page_nr = (loop_offset - window->offset) >> PAGE_SHIFT;
+		loop_nr_pages = min((int64_t)((end_offset - loop_offset) >> PAGE_SHIFT),
+				nr_pages_left);
+		get_window_ref_count(window, loop_nr_pages);
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+	BUG_ON(nr_pages_left);
+error:
+	if (err)
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+	return err;
+}
+
+/*
+ * micscif_rma_list_munmap:
+ *
+ * Traverse the remote registration list starting from window:
+ * 1) Decrement ref count.
+ * 2) If the ref count drops to zero then send a SCIF_MUNMAP message to peer.
+ * RMA lock must be held.
+ */
+void micscif_rma_list_munmap(struct reg_range_t *start_window,
+				uint64_t offset, int nr_pages)
+{
+	struct list_head *item, *tmp, *head;
+	struct nodemsg msg;
+	uint64_t loop_offset = offset, end_offset;
+	int64_t loop_nr_pages, nr_pages_left = nr_pages;
+	struct endpt *ep = (struct endpt *)start_window->ep;
+	struct reg_range_t *window;
+
+	BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock));
+
+	msg.uop = SCIF_MUNMAP;
+	msg.src = ep->port;
+	loop_offset = offset;
+	nr_pages_left = nr_pages;
+	/* Start traversing from the previous link in the list */
+	head = (&(start_window->list_member))->prev;
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct reg_range_t, 
+				list_member);
+		RMA_MAGIC(window);
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min((int64_t)((end_offset - loop_offset) >> PAGE_SHIFT),
+				nr_pages_left);
+		put_window_ref_count(window, loop_nr_pages);
+		if (!window->ref_count) {
+			if (scifdev_alive(ep))
+				drain_dma_intr(ep->rma_info.dma_chan);
+			/* Inform the peer about this munmap */
+			msg.payload[0] = window->peer_window;
+			/* No error handling for Notification messages. */
+			micscif_nodeqp_send(ep->remote_dev, &msg, ep);
+			list_del(&window->list_member);
+			/* Destroy this window from the peer's registered AS */
+			micscif_destroy_remote_window(ep, window);
+		}
+		nr_pages_left -= loop_nr_pages;
+		loop_offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages_left)
+			break;
+	}
+	BUG_ON(nr_pages_left);
+}
+
+/*
+ * micscif_rma_list_unregister:
+ *
+ * Traverse the self registration list starting from window:
+ * 1) Call micscif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int micscif_rma_list_unregister(struct reg_range_t *window,
+				uint64_t offset, int nr_pages)
+{
+	struct list_head *item, *tmp, *head;
+	uint64_t end_offset;
+	int err = 0;
+	int64_t loop_nr_pages;
+	struct endpt *ep = (struct endpt *)window->ep;
+
+	BUG_ON(!mutex_is_locked(&ep->rma_info.rma_lock));
+	/* Start traversing from the previous link in the list */
+	head = (&window->list_member)->prev;
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, struct reg_range_t, 
+				list_member);
+		RMA_MAGIC(window);
+		end_offset = window->offset +
+			(window->nr_pages << PAGE_SHIFT);
+		loop_nr_pages = min((int)((end_offset - offset) >> PAGE_SHIFT),
+				nr_pages);
+		if ((err = micscif_unregister_window(window)))
+			return err;
+		nr_pages -= (int)loop_nr_pages;
+		offset += (loop_nr_pages << PAGE_SHIFT);
+		if (!nr_pages)
+			break;
+	}
+	BUG_ON(nr_pages);
+	return 0;
+}
+
+/*
+ * micscif_unregister_all_window:
+ *
+ * Traverse all the windows in the self registration list and:
+ * 1) Call micscif_unregister_window(..)
+ * RMA lock must be held.
+ */
+int micscif_unregister_all_windows(scif_epd_t epd)
+{
+	struct list_head *item, *tmp;
+	struct reg_range_t *window;
+	struct endpt *ep = (struct endpt *)epd;
+	struct list_head *head = &ep->rma_info.reg_list;
+	int err = 0;
+
+	queue_work(ms_info.mi_misc_wq, &ms_info.mi_misc_work);
+	mutex_lock(&ep->rma_info.rma_lock);
+retry:
+	item = NULL;
+	tmp = NULL;
+	list_for_each_safe(item, tmp, head) {
+		window = list_entry(item, 
+			struct reg_range_t, list_member);
+		ep->rma_info.async_list_del = 0;
+		if ((err = micscif_unregister_window(window)))
+			pr_debug("%s %d err %d\n", 
+				__func__, __LINE__, err);
+		/*
+		 * Need to restart list traversal if there has been
+		 * an asynchronous list entry deletion.
+		 */
+		if (ep->rma_info.async_list_del)
+			goto retry;
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+
+	/*
+	 * The following waits cannot be interruptible since they are
+	 * from the driver release() entry point.
+	 */
+	err = wait_event_timeout(ep->rma_info.fence_wq, 
+			!ep->rma_info.fence_refcount, NODE_ALIVE_TIMEOUT);
+	/* Timeout firing is unexpected. Is the DMA engine hung? */
+	if (!err)
+		printk(KERN_ERR "%s %d err %d\n", __func__, __LINE__, err);
+
+#ifdef CONFIG_MMU_NOTIFIER
+	if (!list_empty(&ep->rma_info.mmn_list)) {
+		spin_lock(&ms_info.mi_rmalock);
+		list_add_tail(&ep->mmu_list, &ms_info.mi_mmu_notif_cleanup);
+		spin_unlock(&ms_info.mi_rmalock);
+		queue_work(ms_info.mi_mmu_notif_wq, &ms_info.mi_mmu_notif_work);
+	}
+#endif
+	return err;
+}
+
+/*
+ * micscif_rma_list_get_pages_check:
+ *
+ * Traverse the remote registration list and return 0 if all the
+ * scif_get_pages()/scif_put_pages() ref_counts are zero else return -1.
+ */
+int micscif_rma_list_get_pages_check(struct endpt *ep)
+{
+	struct list_head *item, *head = &ep->rma_info.remote_reg_list;
+	struct reg_range_t *window;
+	int err = 0;
+
+	mutex_lock(&ep->rma_info.rma_lock);
+	list_for_each(item, head) {
+		window = list_entry(item, 
+			struct reg_range_t, list_member);
+		if (window->get_put_ref_count) {
+			err = -1;
+			break;
+		}
+	}
+	mutex_unlock(&ep->rma_info.rma_lock);
+	return err;
+}
+
+/* Only debug API's below */
+void micscif_display_all_windows(struct list_head *head)
+{
+	struct list_head *item;
+	struct reg_range_t *window;
+	pr_debug("\nWindow List Start\n");
+	list_for_each(item, head) {
+		window = list_entry(item, 
+			struct reg_range_t, list_member);
+		micscif_display_window(window, __func__, __LINE__);
+	}
+	pr_debug("Window List End\n\n");
+}
diff --git a/micscif/micscif_select.c b/micscif/micscif_select.c
new file mode 100644
index 0000000..c6f125f
--- /dev/null
+++ b/micscif/micscif_select.c
@@ -0,0 +1,446 @@
+/*
+ * Implementation of select and poll
+ *
+ * Copyright 2011-2012 Intel Corporation.
+ *
+ * This file is a derivative of fs/select.c from within the Linux kernel
+ * source distribution, version 2.6.34; it has been modified (starting
+ * in May 2011) to work within the context of the SCIF driver.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA.
+ *
+ * Initial comment from fs/select.c:
+ *
+ * This file contains the procedures for the handling of select and poll
+ *
+ * Created for Linux based loosely upon Mathius Lattner's minix
+ * patches by Peter MacDonald. Heavily edited by Linus.
+ *
+ *  4 February 1994
+ *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
+ *     flag set in its personality we do *not* modify the given timeout
+ *     parameter to reflect time remaining.
+ *
+ *  24 January 2000
+ *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation
+ *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/file.h>
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+
+#include "mic/micscif.h"
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+#include <linux/sched/rt.h>
+#endif
+
+struct poll_table_page {
+	struct poll_table_page *next;
+	struct poll_table_entry *entry;
+	struct poll_table_entry entries[0];
+};
+
+/*
+ * Estimate expected accuracy in ns from a timeval.
+ *
+ * After quite a bit of churning around, we've settled on
+ * a simple thing of taking 0.1% of the timeout as the
+ * slack, with a cap of 100 msec.
+ * "nice" tasks get a 0.5% slack instead.
+ *
+ * Consider this comment an open invitation to come up with even
+ * better solutions..
+ */
+
+#define MAX_SLACK	(100 * NSEC_PER_MSEC)
+
+static long __estimate_accuracy(struct timespec *tv)
+{
+	long slack;
+	int divfactor = 1000;
+
+	if (tv->tv_sec < 0)
+		return 0;
+
+	if (task_nice(current) > 0)
+		divfactor = divfactor / 5;
+
+	if (tv->tv_sec > MAX_SLACK / (NSEC_PER_SEC/divfactor))
+		return MAX_SLACK;
+
+	slack = tv->tv_nsec / divfactor;
+	slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
+
+	if (slack > MAX_SLACK)
+		return MAX_SLACK;
+
+	return slack;
+}
+
+static long estimate_accuracy(struct timespec *tv)
+{
+	unsigned long ret;
+	struct timespec now;
+
+	/*
+	 * Realtime tasks get a slack of 0 for obvious reasons.
+	 */
+
+	if (rt_task(current))
+		return 0;
+
+	ktime_get_ts(&now);
+	now = timespec_sub(*tv, now);
+	ret = __estimate_accuracy(&now);
+	if (ret < current->timer_slack_ns)
+		return current->timer_slack_ns;
+	return ret;
+}
+
+#define POLL_TABLE_FULL(table) \
+	((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
+
+/*
+ * Ok, Peter made a complicated, but straightforward multiple_wait() function.
+ * I have rewritten this, taking some shortcuts: This code may not be easy to
+ * follow, but it should be free of race-conditions, and it's practical. If you
+ * understand what I'm doing here, then you understand how the linux
+ * sleep/wakeup mechanism works.
+ *
+ * Two very simple procedures, poll_wait() and poll_freewait() make all the
+ * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
+ * as all select/poll functions have to call it to add an entry to the
+ * poll table.
+ */
+static void __pollwait(struct file *filp __attribute__((unused)), wait_queue_head_t *wait_address,
+		       poll_table *p);
+
+static void scif_poll_initwait(struct poll_wqueues *pwq)
+{
+	init_poll_funcptr(&pwq->pt, __pollwait);
+	pwq->polling_task = current;
+	pwq->triggered = 0;
+	pwq->error = 0;
+	pwq->table = NULL;
+	pwq->inline_index = 0;
+}
+
+static void free_poll_entry(struct poll_table_entry *entry)
+{
+	remove_wait_queue(entry->wait_address, &entry->wait);
+}
+
+static void scif_poll_freewait(struct poll_wqueues *pwq)
+{
+	struct poll_table_page * p = pwq->table;
+	int i;
+	for (i = 0; i < pwq->inline_index; i++)
+		free_poll_entry(pwq->inline_entries + i);
+	while (p) {
+		struct poll_table_entry *entry;
+		struct poll_table_page *old;
+
+		entry = p->entry;
+		do {
+			entry--;
+			free_poll_entry(entry);
+		} while (entry > p->entries);
+		old = p;
+		p = p->next;
+		free_page((unsigned long) old);
+	}
+}
+
+static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
+{
+	struct poll_table_page *table = p->table;
+
+	if (p->inline_index < N_INLINE_POLL_ENTRIES)
+		return p->inline_entries + p->inline_index++;
+
+	if (!table || POLL_TABLE_FULL(table)) {
+		struct poll_table_page *new_table;
+
+		new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
+		if (!new_table) {
+			p->error = -ENOMEM;
+			return NULL;
+		}
+		new_table->entry = new_table->entries;
+		new_table->next = table;
+		p->table = new_table;
+		table = new_table;
+	}
+
+	return table->entry++;
+}
+
+static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct poll_wqueues *pwq = wait->private;
+	DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
+
+	/*
+	 * Although this function is called under waitqueue lock, LOCK
+	 * doesn't imply write barrier and the users expect write
+	 * barrier semantics on wakeup functions.  The following
+	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
+	 * and is paired with set_mb() in poll_schedule_timeout.
+	 */
+	smp_wmb();
+	pwq->triggered = 1;
+
+	/*
+	 * Perform the default wake up operation using a dummy
+	 * waitqueue.
+	 *
+	 * TODO: This is hacky but there currently is no interface to
+	 * pass in @sync.  @sync is scheduled to be removed and once
+	 * that happens, wake_up_process() can be used directly.
+	 */
+	return default_wake_function(&dummy_wait, mode, sync, key);
+}
+
+static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct poll_table_entry *entry;
+
+	entry = container_of(wait, struct poll_table_entry, wait);
+	if (key && !((unsigned long)key & entry->key))
+		return 0;
+	return __pollwake(wait, mode, sync, key);
+}
+
+/* Add a new entry */
+static void __pollwait(struct file *filp __attribute__((unused)), wait_queue_head_t *wait_address,
+				poll_table *p)
+{
+	struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
+	struct poll_table_entry *entry = poll_get_entry(pwq);
+	if (!entry)
+		return;
+	entry->filp = NULL;
+	entry->wait_address = wait_address;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+	entry->key = p->_key;
+#else
+	entry->key = p->key;
+#endif
+	init_waitqueue_func_entry(&entry->wait, pollwake);
+	entry->wait.private = pwq;
+	add_wait_queue(wait_address, &entry->wait);
+}
+
+int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+			  ktime_t *expires, unsigned long slack)
+{
+	int rc = -EINTR;
+
+	set_current_state(state);
+	if (!pwq->triggered)
+		rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS);
+	__set_current_state(TASK_RUNNING);
+
+	/*
+	 * Prepare for the next iteration.
+	 *
+	 * The following set_mb() serves two purposes.  First, it's
+	 * the counterpart rmb of the wmb in pollwake() such that data
+	 * written before wake up is always visible after wake up.
+	 * Second, the full barrier guarantees that triggered clearing
+	 * doesn't pass event check of the next iteration.  Note that
+	 * this problem doesn't exist for the first iteration as
+	 * add_wait_queue() has full barrier semantics.
+	 */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0))
+	smp_store_mb(pwq->triggered, 0);
+#else
+	set_mb(pwq->triggered, 0);
+#endif
+
+	return rc;
+}
+
+static unsigned int scif_poll_kernel(poll_table *pwait, struct endpt *ep)
+{
+	return __scif_pollfd(NULL, pwait, ep);
+}
+
+/*
+ * Fish for pollable events on the pollfd->fd file descriptor. We're only
+ * interested in events matching the pollfd->events mask, and the result
+ * matching that mask is both recorded in pollfd->revents and returned. The
+ * pwait poll_table will be used by the fd-provided poll handler for waiting,
+ * if non-NULL.
+ */
+static inline unsigned int do_pollfd(struct scif_pollepd *pollfd, poll_table *pwait)
+{
+	unsigned int mask;
+	scif_epd_t epd;
+
+	mask = 0;
+	epd = pollfd->epd;
+	if (epd) {
+		mask = POLLNVAL;
+		mask = DEFAULT_POLLMASK;
+		if (pwait)
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0))
+			pwait->_key = pollfd->events | POLLERR | POLLHUP;
+#else
+			pwait->key = pollfd->events | POLLERR | POLLHUP;
+#endif
+		mask = scif_poll_kernel(pwait, epd);
+		/* Mask out unneeded events. */
+		mask &= pollfd->events | POLLERR | POLLHUP;
+	}
+	pollfd->revents = mask;
+
+	return mask;
+}
+
+static int do_poll(unsigned int nfds,  struct scif_pollepd *ufds,
+		   struct poll_wqueues *wait, struct timespec *end_time)
+{
+	poll_table* pt = &wait->pt;
+	ktime_t expire, *to = NULL;
+	int timed_out = 0, count = 0, i = 0;
+	unsigned long slack = 0;
+
+	/* Optimise the no-wait case */
+	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
+		pt = NULL;
+		timed_out = 1;
+	}
+
+	if (end_time && !timed_out)
+		slack = estimate_accuracy(end_time);
+
+	for (;;) {
+		for (i = 0; i < nfds; i++) {
+			/*
+			 * Fish for events. If we found one, record it
+			 * and kill the poll_table, so we don't
+			 * needlessly register any other waiters after
+			 * this. They'll get immediately deregistered
+			 * when we break out and return.
+			 */
+			if (do_pollfd(ufds + i, pt)) {
+				count++;
+				pt = NULL;
+			}
+		}
+		/*
+		 * All waiters have already been registered, so don't provide
+		 * a poll_table to them on the next loop iteration.
+		 */
+		pt = NULL;
+		if (!count) {
+			count = wait->error;
+			if (signal_pending(current))
+				count = -EINTR;
+		}
+		if (count || timed_out)
+			break;
+
+		/*
+		 * If this is the first loop and we have a timeout
+		 * given, then we convert to ktime_t and set the to
+		 * pointer to the expiry value.
+		 */
+		if (end_time && !to) {
+			expire = timespec_to_ktime(*end_time);
+			to = &expire;
+		}
+
+		if (!poll_schedule_timeout(wait, TASK_INTERRUPTIBLE, to, slack))
+			timed_out = 1;
+	}
+	return count;
+}
+
+static int do_scif_poll(struct scif_pollepd *ufds, unsigned int nfds,
+		struct timespec *end_time)
+{
+	struct poll_wqueues table;
+ 	int epdcount;
+
+	scif_poll_initwait(&table);
+	epdcount = do_poll(nfds, ufds, &table, end_time);
+	scif_poll_freewait(&table);
+
+	return epdcount;
+}
+
+/*
+ * Add two timespec values and do a safety check for overflow.
+ * It's assumed that both values are valid (>= 0)
+ */
+static struct timespec scif_timespec_add_safe(const struct timespec lhs,
+				  const struct timespec rhs)
+{
+	struct timespec res;
+
+	set_normalized_timespec(&res, lhs.tv_sec + rhs.tv_sec,
+				lhs.tv_nsec + rhs.tv_nsec);
+
+	if (res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)
+		res.tv_sec = TIME_T_MAX;
+
+	return res;
+}
+/**
+ * poll_select_set_timeout - helper function to setup the timeout value
+ * @to:		pointer to timespec variable for the final timeout
+ * @sec:	seconds (from user space)
+ * @nsec:	nanoseconds (from user space)
+ *
+ * Note, we do not use a timespec for the user space value here, That
+ * way we can use the function for timeval and compat interfaces as well.
+ *
+ * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
+ */
+static int scif_poll_select_set_timeout(struct timespec *to, long sec, long nsec)
+{
+	struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
+
+	if (!timespec_valid(&ts))
+		return -EINVAL;
+
+	/* Optimize for the zero timeout value here */
+	if (!sec && !nsec) {
+		to->tv_sec = to->tv_nsec = 0;
+	} else {
+		ktime_get_ts(to);
+		*to = scif_timespec_add_safe(*to, ts);
+	}
+	return 0;
+}
+
+int scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
+{
+	struct timespec end_time, *to = NULL;
+	if (timeout_msecs >= 0) {
+		to = &end_time;
+		scif_poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
+			NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
+	}
+
+	return do_scif_poll(ufds, nfds, to);
+}
+EXPORT_SYMBOL(scif_poll);
diff --git a/micscif/micscif_smpt.c b/micscif/micscif_smpt.c
new file mode 100644
index 0000000..35c0ec2
--- /dev/null
+++ b/micscif/micscif_smpt.c
@@ -0,0 +1,457 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <mic/micscif.h>
+#include <mic/micscif_smpt.h>
+#if defined(HOST) || defined(WINDOWS)
+#include "mic_common.h"
+#endif
+
+struct _mic_ctx_t;
+// Figure out which SMPT entry based on the host addr
+#define SYSTEM_ADDR_TO_SMPT(sysaddr) 	((sysaddr) >> (MIC_SYSTEM_PAGE_SHIFT))
+#define HOSTMIC_PA_TO_SMPT(hostmic_pa) (((hostmic_pa) - MIC_SYSTEM_BASE)\
+						 >> MIC_SYSTEM_PAGE_SHIFT)
+
+#define	NUM_SMPT_ENTRIES_IN_USE		32
+#define SMPT_TO_MIC_PA(smpt_index) 	(MIC_SYSTEM_BASE + ((smpt_index) * \
+						MIC_SYSTEM_PAGE_SIZE))
+#define MAX_HOST_MEMORY 		((NUM_SMPT_ENTRIES_IN_USE) * MIC_SYSTEM_PAGE_SIZE)
+#define MAX_SYSTEM_ADDR 		((MIC_SYSTEM_BASE) + (MAX_HOST_MEMORY) - (1))
+#define IS_MIC_SYSTEM_ADDR(addr)	(((addr) >=  MIC_SYSTEM_BASE) && \
+						 ((addr) <= MAX_SYSTEM_ADDR))
+
+#define _PAGE_OFFSET(x) 	((x) & ((PAGE_SIZE) - (1ULL)))
+#define SMPT_OFFSET(x) 		((x) & MIC_SYSTEM_PAGE_MASK)
+#define PAGE_ALIGN_LOW(x)	ALIGN(((x) - ((PAGE_SIZE) - 1ULL)), (PAGE_SIZE))
+#define PAGE_ALIGN_HIGH(x) 	ALIGN((x), (PAGE_SIZE))
+#define SMPT_ALIGN_LOW(x) 	ALIGN(((x) - (MIC_SYSTEM_PAGE_MASK)), \
+								(MIC_SYSTEM_PAGE_SIZE))
+#define SMPT_ALIGN_HIGH(x) 	ALIGN((x), (MIC_SYSTEM_PAGE_SIZE))
+
+#if defined(HOST)
+#define SMPT_LOGGING			0
+#if SMPT_LOGGING
+static int64_t smpt_ref_count_g[MAX_BOARD_SUPPORTED];
+static int64_t map_count_g;
+static int64_t unmap_count_g;
+#endif
+#endif
+
+void mic_smpt_set(volatile void *mm_sbox, uint64_t dma_addr, uint64_t index)
+{
+	uint32_t smpt_reg_val = BUILD_SMPT(SNOOP_ON, dma_addr >> MIC_SYSTEM_PAGE_SHIFT);
+	writel(smpt_reg_val, (uint8_t*)mm_sbox + SBOX_SMPT00 + (4 * index));
+}
+
+#if defined(HOST)
+/*
+ * Called once per board as part of starting a MIC
+ * to restore the SMPT state to the previous values
+ * as stored in SMPT SW data structures.
+ */
+void mic_smpt_restore(mic_ctx_t *mic_ctx)
+{
+	int i;
+	dma_addr_t dma_addr;
+	uint32_t *smpt = (uint32_t*)(mic_ctx->mmio.va +
+			 HOST_SBOX_BASE_ADDRESS + SBOX_SMPT00);
+	uint32_t smpt_reg_val;
+
+	for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
+		dma_addr = mic_ctx->mic_smpt[i].dma_addr;
+		if (mic_ctx->bi_family == FAMILY_KNC) {
+			smpt_reg_val = BUILD_SMPT(SNOOP_ON,
+					dma_addr >> MIC_SYSTEM_PAGE_SHIFT);
+			writel(smpt_reg_val, &smpt[i]);
+		}
+	}
+}
+
+/*
+ * Called once per board as part of smpt init
+ * This does a 0-512G smpt mapping, 
+ */
+void mic_smpt_init(mic_ctx_t *mic_ctx)
+{
+	int i;
+	dma_addr_t dma_addr;
+	uint32_t *smpt = (uint32_t*)(mic_ctx->mmio.va +
+			 HOST_SBOX_BASE_ADDRESS + SBOX_SMPT00);
+	uint32_t smpt_reg_val;
+#if SMPT_LOGGING
+	smpt_ref_count_g[mic_ctx->bi_id] = 0;
+#endif
+
+	spin_lock_init(&mic_ctx->smpt_lock);
+	mic_ctx->mic_smpt = kmalloc(sizeof(mic_smpt_t)
+					* NUM_SMPT_ENTRIES_IN_USE, GFP_KERNEL);
+
+	for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
+		dma_addr = i * MIC_SYSTEM_PAGE_SIZE;
+		mic_ctx->mic_smpt[i].dma_addr = dma_addr;
+		mic_ctx->mic_smpt[i].ref_count = 0;
+		if (mic_ctx->bi_family == FAMILY_KNC) {
+			smpt_reg_val = BUILD_SMPT(SNOOP_ON,
+					dma_addr >> MIC_SYSTEM_PAGE_SHIFT);
+			writel(smpt_reg_val, &smpt[i]);
+		}
+	}
+}
+
+/*
+ * Called during mic exit per ctx (i.e once for every board)
+ * If ref count is non-zero, then it means that some module
+ * did not call mic_unmap_single/mic_ctx_unmap_single correctly.
+ */
+void
+mic_smpt_uninit(mic_ctx_t *mic_ctx)
+{
+#if SMPT_LOGGING
+	printk("global ref count for node = %d is %lld\n", 
+		mic_ctx->bi_id+1, smpt_ref_count_g[mic_ctx->bi_id]);
+	printk("mic map calls = %lld, mic unmap calls = %lld \n", 
+					map_count_g, unmap_count_g);
+
+	for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
+		printk("[smpt_san%d] smpt_entry[%d]  dma_addr = 0x%llX"
+			" ref_count = %lld \n", mic_ctx->bi_id+1, i, 
+			mic_ctx->mic_smpt[i].dma_addr, 
+			mic_ctx->mic_smpt[i].ref_count);
+	}
+#endif
+#ifdef DEBUG
+	{
+		int i;
+		for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++)
+			WARN_ON(mic_ctx->mic_smpt[i].ref_count);
+	}
+#endif
+
+	kfree(mic_ctx->mic_smpt);
+	mic_ctx->mic_smpt = NULL;
+	;
+}
+
+dma_addr_t mic_ctx_map_single(mic_ctx_t *mic_ctx, void *p, size_t size)
+{
+	struct pci_dev *hwdev = mic_ctx->bi_pdev;
+	int bid = mic_ctx->bi_id;
+
+	return mic_map_single(bid, hwdev, p, size);
+}
+
+void mic_unmap_single(int bid, struct pci_dev *hwdev, dma_addr_t mic_addr,
+		size_t size)
+{
+	dma_addr_t dma_addr = mic_to_dma_addr(bid, mic_addr);
+	mic_unmap(bid, mic_addr, size);
+	pci_unmap_single(hwdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
+}
+
+void mic_ctx_unmap_single(mic_ctx_t *mic_ctx, dma_addr_t dma_addr,
+		size_t size)
+{
+	struct pci_dev *hwdev = mic_ctx->bi_pdev;
+	int bid = mic_ctx->bi_id;
+	mic_unmap_single(bid, hwdev, dma_addr, size);
+}
+
+dma_addr_t mic_map_single(int bid, struct pci_dev *hwdev, void *p,
+		size_t size)
+{
+	dma_addr_t mic_addr = 0;
+	dma_addr_t dma_addr;
+
+	dma_addr = pci_map_single(hwdev, p, size, PCI_DMA_BIDIRECTIONAL);
+
+	if (!pci_dma_mapping_error(hwdev, dma_addr))
+		if (!(mic_addr = mic_map(bid, dma_addr, size))) {
+				printk(KERN_ERR "mic_map failed board id %d\
+					     addr %#016llx size %#016zx\n", 
+					     bid, dma_addr, size);
+				pci_unmap_single(hwdev, dma_addr, 
+						       size, PCI_DMA_BIDIRECTIONAL);
+	}
+	return mic_addr;
+}
+
+void add_smpt_entry(int spt, int64_t *ref, uint64_t dma_addr, int entries, mic_ctx_t *mic_ctx)
+{
+
+	struct nodemsg msg;
+	dma_addr_t addr = dma_addr;
+	mic_smpt_t *mic_smpt = mic_ctx->mic_smpt;
+	int dev_id = mic_ctx->bi_id + 1;
+	void *mm_sbox = mic_ctx->mmio.va + HOST_SBOX_BASE_ADDRESS;
+	int i;
+
+	for (i = spt; i < spt + entries; i++, addr += MIC_SYSTEM_PAGE_SIZE) {
+#ifdef CONFIG_ML1OM
+		/*
+		 * For KNF if the ref count is 0 and the entry number is greater
+		 * than 16 then we must resend a SMPT_SET message in case the uOS
+		 * was rebooted and lost SMPT register state (example during host
+		 * suspend/hibernate.
+		 */
+		if (!mic_smpt[i].ref_count && i >= (NUM_SMPT_ENTRIES_IN_USE >> 1)) {
+#else
+		if (!mic_smpt[i].ref_count && (mic_smpt[i].dma_addr != addr)) {
+#endif
+			/*
+ 			 * ref count was zero and dma_addr requested did not
+ 			 * match the dma address in the table. So, this is a
+ 			 * new entry in the table. 
+ 			 * KNF: Send a message to the card
+ 			 * to update its smpt table with a new value.
+ 			 * KNC: write to the SMPT registers from host since
+ 			 * they are accessible. 
+ 			 */
+			if (mic_ctx->bi_family == FAMILY_ABR) {
+				msg.uop = SMPT_SET;
+				msg.payload[0] = addr;
+				msg.payload[1] = i;
+				msg.dst.node  = scif_dev[dev_id].sd_node;
+				msg.src.node  = 0;
+#if SMPT_LOGGING
+				printk("[smpt_node%d] ==> sending msg to "
+					" node = %d dma_addr = 0x%llX, entry =" 
+					"0x%llX\n" , mic_ctx->bi_id + 1, 
+					scif_dev[dev_id].sd_node, 
+					msg.payload[0], msg.payload[1]);
+#endif
+				micscif_inc_node_refcnt(&scif_dev[dev_id], 1);
+				micscif_nodeqp_send(&scif_dev[dev_id], &msg, NULL);
+				micscif_dec_node_refcnt(&scif_dev[dev_id], 1);
+			}
+			else
+				mic_smpt_set(mm_sbox, addr, i);
+			mic_smpt[i].dma_addr = addr;
+		}
+		mic_smpt[i].ref_count += ref[i - spt];
+	}
+}
+
+dma_addr_t smpt_op(int bid, uint64_t dma_addr,
+				int entries, int64_t *ref)
+{
+	int spt = -1;   /* smpt index */
+	int ee = 0; 	/* existing entries */
+	int fe = 0;     /* free entries */
+	int i;
+	unsigned long flags;
+	dma_addr_t mic_addr = 0;
+	dma_addr_t addr = dma_addr;
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(bid);
+	mic_smpt_t *mic_smpt = mic_ctx->mic_smpt;
+
+	if (micpm_get_reference(mic_ctx, true))
+		goto exit;
+	spin_lock_irqsave(&mic_ctx->smpt_lock, flags);
+
+	/* find existing entries */
+	for (i = 0; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
+		if (mic_smpt[i].dma_addr == addr) {
+			ee++;
+			addr += MIC_SYSTEM_PAGE_SIZE;
+		}
+		else if (ee) /* cannot find contiguous entries */
+			goto not_found;
+
+		if (ee == entries)
+			goto found;
+	}
+
+	/* find free entry */
+#ifdef CONFIG_ML1OM
+	/*
+	 * For KNF the SMPT registers are not host accessible so we maintain a
+	 * 1:1 map for SMPT registers from 0-256GB i.e. the first 16 entries and
+	 * look for SMPT entries for P2P and IB etc from the 16th entry onwards.
+	 * This allows the KNF card to boot on Host systems with < 256GB system
+	 * memory and access VNET/SCIF buffers without crashing. P2P and IB SMPT
+	 * entries are setup after SCIF driver load/reload via SCIF Node QP
+	 * SMPT_SET messages.
+	 */
+	for (i = NUM_SMPT_ENTRIES_IN_USE / 2 ; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
+#else
+	for (i = 0 ; i < NUM_SMPT_ENTRIES_IN_USE; i++) {
+#endif
+		fe = (mic_smpt[i].ref_count == 0) ? fe + 1: 0;
+		if (fe == entries)
+			goto found;
+	}
+
+not_found:
+	spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags);
+	micpm_put_reference(mic_ctx);
+exit:
+	return mic_addr;
+found:
+	spt = i - entries + 1;
+	mic_addr = SMPT_TO_MIC_PA(spt);
+	add_smpt_entry(spt, ref, dma_addr, entries, mic_ctx);
+	spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags);
+	micpm_put_reference(mic_ctx);
+	return mic_addr;
+}
+
+
+/*
+ * Returns number of smpt entries needed for dma_addr to dma_addr + size
+ * also returns the reference count array for each of those entries
+ * and the starting smpt address
+ */
+int get_smpt_ref_count(int64_t *ref, dma_addr_t dma_addr, size_t size,
+						uint64_t *smpt_start)
+{
+	uint64_t start =  dma_addr;
+	uint64_t end = dma_addr + size;
+	int i = 0;
+
+	while (start < end) {
+		ref[i++] = min(SMPT_ALIGN_HIGH(start + 1), end) - start;
+		start = SMPT_ALIGN_HIGH(start + 1);
+	}
+
+	if (smpt_start)
+		*smpt_start = SMPT_ALIGN_LOW(dma_addr);
+
+	return i;
+}
+
+/*
+ * Maps dma_addr to dma_addr + size memory in the smpt table
+ * of board bid
+ */
+dma_addr_t mic_map(int bid, dma_addr_t dma_addr, size_t size)
+{
+	dma_addr_t mic_addr = 0;
+	int entries;
+	int64_t ref[NUM_SMPT_ENTRIES_IN_USE];
+	uint64_t smpt_start;
+#if SMPT_LOGGING
+	unsigned long flags;
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(bid);
+	spin_lock_irqsave(&mic_ctx->smpt_lock, flags);
+	map_count_g++;
+	smpt_ref_count_g[bid] += (int64_t)size;
+	spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags);
+#endif
+	if (!size)
+		return mic_addr;
+
+	/*
+ 	 * Get number of smpt entries to be mapped, ref count array
+ 	 * and the starting smpt address to start the search for
+ 	 * free or existing smpt entries.
+ 	 */
+	entries = get_smpt_ref_count(ref, dma_addr, size, &smpt_start);
+
+	/* Set the smpt table appropriately and get 16G aligned mic address */
+	mic_addr =  smpt_op(bid, smpt_start, entries, ref);
+
+	/*
+ 	 * If mic_addr is zero then its a error case
+ 	 * since mic_addr can never be zero.
+ 	 * else generate mic_addr by adding the 16G offset in dma_addr
+ 	 */
+	if (!mic_addr) {
+		WARN_ON(1);
+		return mic_addr;
+	}
+	else
+		return (mic_addr + (dma_addr & MIC_SYSTEM_PAGE_MASK));
+}
+
+/*
+ * Unmaps mic_addr to mic_addr + size memory in the smpt table
+ * of board bid
+ */
+void mic_unmap(int bid, dma_addr_t mic_addr, size_t size)
+{
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(bid);
+	mic_smpt_t *mic_smpt = mic_ctx->mic_smpt;
+	int64_t ref[NUM_SMPT_ENTRIES_IN_USE];
+	int num_smpt;
+	int spt = HOSTMIC_PA_TO_SMPT(mic_addr);
+	int i;
+	unsigned long flags;
+
+	if (!size)
+		return;
+
+	if (!IS_MIC_SYSTEM_ADDR(mic_addr)) {
+		WARN_ON(1);
+		return;
+	}
+
+ 	/* Get number of smpt entries to be mapped, ref count array */
+	num_smpt = get_smpt_ref_count(ref, mic_addr, size, NULL);
+
+	spin_lock_irqsave(&mic_ctx->smpt_lock, flags);
+
+#if SMPT_LOGGING
+	unmap_count_g++;
+	smpt_ref_count_g[bid] -= (int64_t)size;
+#endif
+
+	for (i = spt; i < spt + num_smpt; i++) {
+         	mic_smpt[i].ref_count -= ref[i - spt];
+		WARN_ON(mic_smpt[i].ref_count < 0);
+	}
+	spin_unlock_irqrestore(&mic_ctx->smpt_lock, flags);
+}
+
+dma_addr_t mic_to_dma_addr(int bid, dma_addr_t mic_addr)
+{
+	mic_ctx_t *mic_ctx = get_per_dev_ctx(bid);
+	int spt = HOSTMIC_PA_TO_SMPT(mic_addr);
+	dma_addr_t dma_addr;
+
+	if (!IS_MIC_SYSTEM_ADDR(mic_addr)) {
+		WARN_ON(1);
+		return 0;
+	}
+	dma_addr = mic_ctx->mic_smpt[spt].dma_addr + SMPT_OFFSET(mic_addr);
+	return dma_addr;
+}
+
+#endif
+
+bool is_syspa(dma_addr_t pa)
+{
+	return IS_MIC_SYSTEM_ADDR(pa);
+}
diff --git a/micscif/micscif_sysfs.c b/micscif/micscif_sysfs.c
new file mode 100644
index 0000000..c38a383
--- /dev/null
+++ b/micscif/micscif_sysfs.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <mic/micscif.h>
+
+unsigned long scif_get_maxid(void);
+static ssize_t show_scif_maxid(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ms_info.mi_maxid);
+}
+static DEVICE_ATTR(maxnode, S_IRUGO, show_scif_maxid, NULL);
+
+unsigned long scif_get_total(void);
+static ssize_t show_scif_total(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ms_info.mi_total);
+}
+static DEVICE_ATTR(total, S_IRUGO, show_scif_total, NULL);
+
+unsigned long scif_get_nodes(void);
+static ssize_t show_scif_nodes(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	int len = 0;
+	int node;
+
+	len += snprintf(buf + len, PAGE_SIZE, "%d:", ms_info.mi_total);
+	len += snprintf(buf + len, PAGE_SIZE, "%d", ms_info.mi_nodeid);
+
+	for (node = 0; node <= ms_info.mi_maxid; node++) {
+		if (scif_dev[node].sd_state == SCIFDEV_RUNNING ||
+			scif_dev[node].sd_state == SCIFDEV_SLEEPING ||
+			is_self_scifdev(&scif_dev[node])) {
+			len += snprintf(buf + len, PAGE_SIZE, ",%d", scif_dev[node].sd_node);
+		}
+	}
+
+	len += snprintf(buf + len, PAGE_SIZE, "\n");
+	return len;
+}
+static DEVICE_ATTR(nodes, S_IRUGO, show_scif_nodes, NULL);
+
+static ssize_t show_watchdog_to(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ms_info.mi_watchdog_to);
+}
+
+static ssize_t store_watchdog_to(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	int i, ret;
+
+	if (sscanf(buf, "%d", &i) != 1)
+		goto invalid;
+
+	if (i <= 0)
+		goto invalid;
+
+	ms_info.mi_watchdog_to = i;
+	ret = strlen(buf);
+	printk("Current watchdog timeout %d seconds\n", ms_info.mi_watchdog_to);
+	goto bail;
+
+invalid:
+	printk(KERN_ERR "Attempt to set invalid watchdog timeout\n");
+	ret = -EINVAL;
+bail:
+	return ret;
+}
+static DEVICE_ATTR(watchdog_to, S_IRUGO | S_IWUSR, show_watchdog_to, store_watchdog_to);
+
+static ssize_t show_watchdog_enabled(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ms_info.mi_watchdog_enabled);
+}
+
+static ssize_t store_watchdog_enabled(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	int i, ret;
+#ifndef _MIC_SCIF_
+	struct micscif_dev *scifdev;
+	int node;
+#endif
+
+	if (sscanf(buf, "%d", &i) != 1)
+		goto invalid;
+
+	if (i < 0)
+		goto invalid;
+
+	if (i && !ms_info.mi_watchdog_enabled) {
+		ms_info.mi_watchdog_enabled = 1;
+#ifndef _MIC_SCIF_
+		for (node = 1; node <= ms_info.mi_maxid; node++) {
+			scifdev = &scif_dev[node];
+			if (scifdev->sd_ln_wq)
+				queue_delayed_work(scifdev->sd_ln_wq, 
+					&scifdev->sd_watchdog_work, NODE_ALIVE_TIMEOUT);
+		}
+#endif
+	}
+
+	if (!i)
+		ms_info.mi_watchdog_enabled = 0;
+
+	ret = strlen(buf);
+	printk("Watchdog timeout enabled = %d\n", ms_info.mi_watchdog_enabled);
+	goto bail;
+invalid:
+	ret = -EINVAL;
+bail:
+	return ret;
+}
+static DEVICE_ATTR(watchdog_enabled, S_IRUGO | S_IWUSR, show_watchdog_enabled, store_watchdog_enabled);
+
+static ssize_t show_watchdog_auto_reboot(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", ms_info.mi_watchdog_auto_reboot);
+}
+
+static ssize_t store_watchdog_auto_reboot(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	int i, ret;
+
+	if (sscanf(buf, "%d", &i) != 1)
+		goto invalid;
+
+	if (i < 0)
+		goto invalid;
+
+	if (i && !ms_info.mi_watchdog_auto_reboot)
+		ms_info.mi_watchdog_auto_reboot = 1;
+
+	if (!i)
+		ms_info.mi_watchdog_auto_reboot = 0;
+
+	ret = strlen(buf);
+	printk("Watchdog auto reboot enabled = %d\n", ms_info.mi_watchdog_auto_reboot);
+	goto bail;
+invalid:
+	ret = -EINVAL;
+bail:
+	return ret;
+}
+static DEVICE_ATTR(watchdog_auto_reboot, S_IRUGO | S_IWUSR, show_watchdog_auto_reboot, store_watchdog_auto_reboot);
+
+static ssize_t show_proxy_dma_threshold(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%lld\n", ms_info.mi_proxy_dma_threshold);
+}
+
+static ssize_t store_proxy_dma_threshold(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	int ret;
+	uint64_t i;
+
+	if (sscanf(buf, "%lld", &i) != 1)
+		goto invalid;
+
+	ms_info.mi_proxy_dma_threshold = i;
+	ret = strlen(buf);
+	printk("P2P proxy DMA Threshold = %lld bytes\n", ms_info.mi_proxy_dma_threshold);
+	goto bail;
+invalid:
+	ret = -EINVAL;
+bail:
+	return ret;
+}
+static DEVICE_ATTR(proxy_dma_threshold, S_IRUGO | S_IWUSR, show_proxy_dma_threshold, store_proxy_dma_threshold);
+
+static struct attribute *scif_attributes[] = {
+	&dev_attr_maxnode.attr,
+	&dev_attr_total.attr,
+	&dev_attr_nodes.attr,
+	&dev_attr_watchdog_to.attr,
+	&dev_attr_watchdog_enabled.attr,
+	&dev_attr_watchdog_auto_reboot.attr,
+	&dev_attr_proxy_dma_threshold.attr,
+	NULL
+};
+
+struct attribute_group scif_attr_group = {
+	.attrs = scif_attributes
+};
diff --git a/micscif/micscif_va_gen.c b/micscif/micscif_va_gen.c
new file mode 100644
index 0000000..7338a57
--- /dev/null
+++ b/micscif/micscif_va_gen.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/* ************************************************************************* *\
+generate a virtual address for a given size
+\* ************************************************************************* */
+#include "mic/micscif.h"
+
+/* ************************************************************************* *\
+FUNCTION: VaGenAddress::Initialize
+
+DESCRIPTION: Initialize VaGenAddress to point to one node of size = range
+\* ************************************************************************* */
+static int
+va_gen_init_internal(struct va_gen_addr *addr, uint64_t range)
+{
+	struct va_node *node;
+	int err;
+
+	va_node_init(&addr->allocator);
+	if ((err = va_node_alloc(&addr->allocator, &addr->hole_list)) < 0)
+		goto init_err;
+	if (va_node_is_valid(addr->hole_list)) {
+		node = va_node_get(&addr->allocator, addr->hole_list);
+		node->next = invalid_va_node_index;
+		node->base = 0;
+		node->range = range;
+	}
+	addr->claims_list = invalid_va_node_index;
+init_err:
+	return err;
+}
+
+/* ************************************************************************* *\
+FUNCTION:   VaGenAddress::Alloc
+Allocate virtual memory by searching through free virtual memory
+linked list for first range >= desired range.
+
+Note: Free list is sorted by base, we are searching for range.
+
+Return:     Offset to allocated virtual address if successful (in pages).
+INVALID_VA_PAGE_INDEX if failed
+\* ************************************************************************* */
+static uint64_t
+va_gen_alloc_internal(struct va_gen_addr *addr, uint64_t range)
+{
+	//==========================================================================
+	// Search for a sufficiently large memory hole (first-fit).
+	//--------------------------------------------------------------------------
+
+	// Search for first available hole of sufficient size.
+	uint32_t index = addr->hole_list;
+	struct va_node *pFind;
+	// Used to handle case of an exact range match.
+	struct va_node *pPrev = 0;
+	uint64_t base;
+
+	if (0 == range || !va_node_is_valid(addr->hole_list))
+		return INVALID_VA_PAGE_INDEX;
+
+	pFind = va_node_get(&addr->allocator, index);
+
+	for ( ; ; ) {
+		if (pFind->range >= range)
+			break;
+		else {
+			index = pFind->next;
+			// No hole sufficiently large.
+			if (!va_node_is_valid(index))
+				return INVALID_VA_PAGE_INDEX;
+			pPrev = pFind;
+			pFind = va_node_get(&addr->allocator, index);
+		}
+	}
+
+	// Found an adequate hole. Get its base.
+	base = pFind->base;
+
+	//============================================================================
+	// Uncommon case: pFind->range == in_range
+	// Remove node from the hole list when exact fit. Note, could leave the
+	// hole list empty.
+	//----------------------------------------------------------------------------
+
+	if (pFind->range == range) {
+		// first node?
+		if (addr->hole_list == index)
+			addr->hole_list = pFind->next;
+		else {
+			BUG_ON(!pPrev);
+			pPrev->next = pFind->next;
+		}
+		va_node_free(&addr->allocator, index);
+		return base;
+	}
+
+	//================================================================================
+	// Shrink an existing node that is too large.
+	//--------------------------------------------------------------------------------
+
+	else {
+		pFind->base  += range;
+		pFind->range -= range;
+	}
+
+	return base;
+}
+
+/* ************************************************************************* *\
+FUNCTION: VaGenAddress::FreeClaim
+
+DESCRIPTION:
+Removes claimed range from the claims list.
+\* ************************************************************************* */
+static void
+va_gen_free_claim(struct va_gen_addr *addr, uint64_t base, uint64_t range)
+{
+	struct va_node *pNode = 0;
+	struct va_node *pPrev = 0;
+	uint32_t index, new_index;
+	struct va_node *pNewNode;
+	int err;
+
+	if (0 == range)
+		return;
+
+	for (index = addr->claims_list; va_node_is_valid(index); index = pNode->next) {
+		pNode = va_node_get(&addr->allocator, index);
+
+		if (pNode->base <= base && pNode->base + pNode->range >= base + range) {
+			if (pNode->base == base) {
+				pNode->base += range;
+				pNode->range -= range;
+				if (0 == pNode->range) {
+					if (pPrev)
+						pPrev->next = pNode->next;
+					else
+						addr->claims_list = pNode->next;
+					va_node_free(&addr->allocator, index);
+				}
+			} else if (pNode->base + pNode->range == base + range) {
+				pNode->range -= range;
+			} else {
+				err = va_node_alloc(&addr->allocator, &new_index);
+				BUG_ON(err < 0);
+				pNewNode = va_node_get(&addr->allocator, new_index);
+				pNewNode->base = base + range;
+				pNewNode->range = pNode->range - pNewNode->base;
+				pNewNode->next = pNode->next;
+				pNode->range = base - pNode->base;
+				pNode->next = new_index;
+			}
+			return;
+		}
+		if (pNode->base > base + range) {
+			pr_debug("Freed claim not found in the list\n");
+			return;
+		}
+
+		if ((pNode->base < base) ?
+				(pNode->base + pNode->range > base) :
+				(base + range > pNode->base)) {
+			pr_debug("Freed claim partially overlaps the list\n");
+			return;
+		}
+		pPrev = pNode;
+	}
+}
+
+/* ************************************************************************* *\
+FUNCTION: VaGenAddress::InsertAndCoalesce
+
+DESCRIPTION:
+O(n) search through free list sorted by base
+should average O(n/2), and free list should be much less than the # allocated
+coalesce with node before/after if possible
+3 possible outcomes:
+1.  freed node is inserted into list (0 deallocated)
+2.  freed node range coalesced with existing node,
+so freed node can be deallocated (1 deallocated)
+3.  freed node + another node are coalesced + deallocated
+(2 deallocated)
+Fails if there is full or partial overlap between inserted
+range and ranges in the list
+
+returns false if insert failed
+\* ************************************************************************* */
+static int
+va_gen_insert_and_coalesce(struct va_node_allocator *allocator, uint32_t *list,
+			   uint64_t base, uint64_t range)
+{
+	// search through free list, insert ordered
+	// also check for coalesce
+	uint32_t findPtr = *list;
+	uint32_t prev = *list;
+	uint64_t end_range = base + range;
+	uint32_t nextPtr, ptr;
+	struct va_node *nextNode, *node;
+	int err;
+
+	while (va_node_is_valid(findPtr)) {
+		struct va_node *find = va_node_get(allocator, findPtr);
+		// overlap?
+		//    A.start < B.start && A.end > B.start    A-B==A-B A-B==B-A otherwise A-A B-B
+		//    B.start < A.start && B.end > A.start    B-A==B-A B-A==A-B otherwise B-B A-A
+		//    =>
+		//    A.start < B.start ? A.end > B.start : B.end > A.start
+
+		if ((find->base < base) ?
+				(find->base + find->range > base) :
+				(end_range > find->base)) {
+			return -1;
+		}
+		//----------------------------------------------------------
+		// coalesce?  2 possibilities:
+		//   1. (pFind->base + pFind->range) == current.base
+		//      coalesce, check next node base = endrange,
+		//         coalesce with next if possible, deallocate next, exit
+		//   2. end_range == pFind->base
+		//      coalesce, exit
+		if (end_range == find->base) {
+			// pr_debug("Coalesce base %lld before %lld\n", base, find->base);
+			find->base = base;
+			find->range += range;
+			return 0;
+		} else if ((find->base + find->range) == base) {
+			// pr_debug("Coalesce base %lld after %lld\n", base, find->base);
+			// leave the base unchanged
+			find->range += range;
+			// check the next node to see if it coalesces too
+			nextPtr = find->next;
+			if (va_node_is_valid(nextPtr)) {
+				nextNode = va_node_get(allocator, nextPtr);
+				// end_range is the same after prior coalesce
+				if (nextNode->base == end_range) {
+					// pr_debug("Double Coalesce index %d before %d\n", findPtr, nextPtr);
+					find->range += nextNode->range;
+					find->next = nextNode->next;
+					va_node_free(allocator, nextPtr);
+				}
+			}
+			return 0;
+		}
+		// end coalesce
+
+		//----------------------------------------------------------
+		// insert if found a node at a greater address
+		else if (find->base > end_range)
+			// exit loop, insert node
+			break;
+		// nothing found yet, next index
+		prev = findPtr;
+		findPtr = find->next;
+	}
+
+	//----------------------------------------------------------
+	// insert or append if node
+	//   could be at the end or empty free list (find index = INVALID)
+	//   or, next node has larger base
+	//----------------------------------------------------------
+	err = va_node_alloc(allocator, &ptr);
+	BUG_ON(err < 0);
+	if (!va_node_is_valid(ptr)) {
+		printk(KERN_ERR "FAILED to add hole!  base = %lld, range = %lld\n", base, range);
+		return 0;
+	}
+	node = va_node_get(allocator, ptr);
+	node->base = base;
+	node->range = range;
+	node->next = findPtr;
+	// First node or empty list (Alloc() can empty the list)
+	if (findPtr == *list)
+		// pr_debug("List now starts with %d\n", ptr);
+		*list = ptr;
+	else { // reached the end of the list or insertion
+		BUG_ON(!va_node_is_valid(prev));
+		// pr_debug("Append index %d after %d\n", ptr, prev);
+		(va_node_get(allocator, prev))->next = ptr;
+	}
+	return 0;
+}
+
+/* ************************************************************************* *\
+FUNCTION: VaGenAddress::Free
+
+DESCRIPTION:
+Frees allocated Virtual Address. Inserts freed range in the list of holes
+(available virtual addresses)
+\* ************************************************************************* */
+static void
+va_gen_free_internal(struct va_gen_addr *addr, uint64_t base, uint64_t range)
+{
+	int result = va_gen_insert_and_coalesce(&addr->allocator, &addr->hole_list, base, range);
+	BUG_ON(result < 0);
+}
+
+/* ************************************************************************* *\
+FUNCTION:   VaGenAddress::Alloc
+Allocate virtual memory space.
+
+Note: "Quick and dirty" implementation of aligned Alloc on top of
+non-aligned Alloc.
+
+Return:     Offset to allocated virtual address if successful (in pages).
+INVALID_VA_PAGE_INDEX if failed
+\* ************************************************************************* */
+static uint64_t
+va_gen_alloc_aligned(struct va_gen_addr *addr, uint64_t range, uint32_t unit_align)
+{
+	uint64_t base_address = va_gen_alloc_internal(addr, range + unit_align - 1);
+	uint64_t aligned_base = base_address;
+	if (0 == range || 0 == unit_align)
+		return INVALID_VA_PAGE_INDEX;
+	//BUG_ON(IsPowerOfTwo(in_unitAlign));
+
+	if (unit_align == 1 || base_address == INVALID_VA_PAGE_INDEX)
+		return base_address;
+
+	if (aligned_base > base_address)
+		va_gen_free_internal(addr, base_address, aligned_base - base_address);
+
+	if (aligned_base + range < base_address + unit_align - 1)
+		va_gen_free_internal(addr, aligned_base + range,
+				base_address + unit_align - 1 - aligned_base - range);
+	return aligned_base;
+}
+
+/* ************************************************************************* *\
+FUNCTION: VaGenAddress::Claim
+
+DESCRIPTION:
+Claims a SVAS range. Checks if range was claimed before; if not, records
+the claim in the claims list
+
+returns false if claim failed
+\* ************************************************************************* */
+static int
+va_gen_claim_internal(struct va_gen_addr *addr, uint64_t base, uint64_t range)
+{
+	return va_gen_insert_and_coalesce(&addr->allocator, &addr->claims_list, base, range);
+}
+
+/* ************************************************************************* *\
+FUNCTION:   VaGenAddressMutex::Alloc
+Allocate virtual memory space.
+
+Note: Wrapper for unit-testable address generator to add critical
+section and convert bytes to pages.
+Note: Free() selects between Free[Alloc] and FreeClaim based on
+the address range of the freed address.
+
+Return:     Allocated virtual address if successful (in bytes)
+INVALID_VA_GEN_ADDRESS if failed
+\* ************************************************************************* */
+uint64_t
+va_gen_alloc(struct va_gen_addr *addr, uint64_t num_bytes, uint32_t align_bytes)
+{
+	// Convert input bytes to pages which is our unit for the address generator.
+	uint64_t num_pages = (uint64_t)(((PAGE_SIZE - 1) + num_bytes) / PAGE_SIZE);
+	uint64_t align_pages = align_bytes / PAGE_SIZE;
+	uint64_t va_page_index, ret;
+
+	if (align_bytes < PAGE_SIZE) {
+		ret = INVALID_VA_GEN_ADDRESS;
+		WARN_ON(1);
+		goto done;
+	}
+
+	if (num_bytes > (0xffffffffULL * PAGE_SIZE)) {
+		ret = INVALID_VA_GEN_ADDRESS;
+		WARN_ON(1);
+		goto done;
+	}
+	va_page_index = va_gen_alloc_aligned(addr, num_pages, (uint32_t)(align_pages % 0xffffffff) );
+
+	if (va_page_index == INVALID_VA_PAGE_INDEX)
+		return INVALID_VA_GEN_ADDRESS;
+
+	// Convert page number to virtual address, adding base.
+	ret = va_page_index << PAGE_SHIFT;
+	ret += addr->base;
+done:
+	return ret;
+}
+
+// Claims ownership of a memory region
+uint64_t
+va_gen_claim(struct va_gen_addr *addr, uint64_t address, uint64_t num_bytes)
+{
+	uint64_t va, num_pages;
+	int result;
+
+	if (address + num_bytes > addr->base)
+		address = INVALID_VA_GEN_ADDRESS;
+	else if (address & (PAGE_SIZE - 1))
+		// address not aligned
+		address = INVALID_VA_GEN_ADDRESS;
+	else {
+		va = (uint64_t)(address >> PAGE_SHIFT);
+		// pr_debug("%s %d (%#llx,%llx)\n", __func__, __LINE__, va, num_bytes);
+		// convert input bytes to pages, our unit for the address generator
+		num_pages = (uint64_t)(((PAGE_SIZE-1) + num_bytes) / PAGE_SIZE);
+		if ((result = va_gen_claim_internal(addr, va, num_pages)) < 0)
+			address = INVALID_VA_GEN_ADDRESS;
+	}
+	return address;
+}
+
+// frees the address range so the pages may be re-assigned
+void
+va_gen_free(struct va_gen_addr *addr, uint64_t address, uint64_t num_bytes)
+{
+	uint64_t va, num_pages;
+
+	if (address >= addr->base) {
+		// convert virtual address to page number, subtracting base
+		address -= addr->base;
+		va = (uint64_t)(address >> PAGE_SHIFT);
+		// pr_debug("%s %d (%#llx,%llx)\n", __func__, __LINE__, va, num_bytes);
+		// convert input bytes to pages, our unit for the address generator
+		num_pages = (uint64_t)(((PAGE_SIZE-1) + num_bytes) / PAGE_SIZE);
+		va_gen_free_internal(addr, va, num_pages);
+	} else {
+		va = (uint64_t)(address >> PAGE_SHIFT);
+		// pr_debug("%s %d (%#llx,%llx)\n", __func__, __LINE__, va, num_bytes);
+		// convert input bytes to pages, our unit for the address generator
+		num_pages = (uint64_t)(((PAGE_SIZE-1) + num_bytes) / PAGE_SIZE);
+		va_gen_free_claim(addr, va, num_pages);
+	}
+}
+
+// base and range in bytes, though internal va generator works in pages
+int
+va_gen_init(struct va_gen_addr *addr, uint64_t base, uint64_t range)
+{
+	uint64_t rangeInPages = (uint64_t)(range >> PAGE_SHIFT);
+	int ret;
+
+	if (!(ret = va_gen_init_internal(addr, rangeInPages)))
+		addr->base = base;
+	return ret;
+}
+
+void
+va_gen_destroy(struct va_gen_addr *addr)
+{
+	va_node_destroy(&addr->allocator);
+}
diff --git a/micscif/micscif_va_node.c b/micscif/micscif_va_node.c
new file mode 100644
index 0000000..363b471
--- /dev/null
+++ b/micscif/micscif_va_node.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/***************************************************************************\
+manage available nodes for VaGenAddress
+\***************************************************************************/
+#include "mic/micscif.h"
+
+/***************************************************************************\
+FUNCTION: va_node_init
+
+DESCRIPTION: constructor for allocator for GfxGenAddress
+\***************************************************************************/
+void va_node_init(struct va_node_allocator *node)
+{
+	node->pp_slab_directory = 0;
+	node->slab_shift = 7; /* 2^7 -> 128 nodes in the slab */
+	node->nodes_in_slab = 1<<node->slab_shift;
+	node->slab_mask = (node->nodes_in_slab-1);
+	node->num_slabs = 0;
+	node->num_free_slabs = 0;
+	node->free_list = invalid_va_node_index;
+}
+
+int va_node_is_valid(uint32_t index)
+{
+	return invalid_va_node_index != index;
+}
+
+/************************************************************************** *\
+FUNCTION: va_node_destroy
+
+DESCRIPTION: destructor for allocator for GfxGenAddress
+\************************************************************************** */
+void va_node_destroy(struct va_node_allocator *node)
+{
+	uint32_t i;
+	if (node->pp_slab_directory) {
+		for (i = 0; i < node->num_slabs; i++) {
+			kfree(node->pp_slab_directory[i]);
+			node->pp_slab_directory[i] = NULL;
+		}
+		kfree(node->pp_slab_directory);
+		node->pp_slab_directory = NULL;
+	}
+}
+
+/* ************************************************************************* *\
+FUNCTION: va_node_realloc
+
+DESCRIPTION: va_node_realloc to add more node arrays
+\* ************************************************************************* */
+static int va_node_realloc(struct va_node_allocator *node)
+{
+	uint32_t growSlabs = 2 * (node->num_slabs) + 1;
+	struct va_node **ppGrowDirectory =
+		kzalloc(sizeof(struct va_node *) * growSlabs, GFP_KERNEL);
+	uint32_t i;
+
+	if (!ppGrowDirectory)
+		return -ENOMEM;
+
+	if (node->num_slabs) {
+		for (i = 0; i < node->num_slabs; i++)
+			ppGrowDirectory[i] = node->pp_slab_directory[i];
+		kfree(node->pp_slab_directory);
+		node->pp_slab_directory = NULL;
+	}
+	node->pp_slab_directory = ppGrowDirectory;
+	node->num_free_slabs = growSlabs - node->num_slabs;
+	return 0;
+}
+
+/* ************************************************************************* *\
+FUNCTION: va_node_grow
+
+DESCRIPTION: add a node array
+\* ************************************************************************* */
+static int va_node_grow(struct va_node_allocator *node)
+{
+	struct va_node *pNewSlab;
+	uint32_t i, start;
+	int ret;
+
+	if (!node->num_free_slabs)
+		if ((ret = va_node_realloc(node)) < 0)
+			return ret;
+
+	pNewSlab = kzalloc(sizeof(struct va_node) *
+			node->nodes_in_slab, GFP_KERNEL);
+	if (pNewSlab)
+		node->pp_slab_directory[node->num_slabs] = pNewSlab;
+	else
+		return -ENOMEM;
+
+	/*--------------------------------------------------------
+	* add new nodes to free list
+	* slightly better than just calling free() for each index
+	*/
+	start = node->num_slabs * node->nodes_in_slab;
+	for (i = 0; i < (node->nodes_in_slab-1); i++)
+		/* we could optimize this, but why bother? */
+		pNewSlab[i].next = start + i + 1;
+	/* add new allocations to start of list */
+	pNewSlab[node->nodes_in_slab-1].next = node->free_list;
+	node->free_list = start;
+	/*-------------------------------------------------------*/
+
+	/* update bookkeeping for array of arrays */
+	node->num_slabs++;
+	node->num_free_slabs--;
+	return 0;
+}
+
+/* ************************************************************************* *\
+FUNCTION: va_node_get
+
+DESCRIPTION: return a node reference from index
+\* ************************************************************************* */
+struct va_node *va_node_get(struct va_node_allocator *node, uint32_t index)
+{
+	uint32_t slabIndex = index >> node->slab_shift;
+	uint32_t nodeIndex = index & node->slab_mask;
+
+	return &node->pp_slab_directory[slabIndex][nodeIndex];
+}
+
+/* ************************************************************************* *\
+FUNCTION: va_node_alloc
+
+DESCRIPTION: return 0 on success with valid index in out_alloc or errno on failure.
+\* ************************************************************************* */
+int va_node_alloc(struct va_node_allocator *node, uint32_t *out_alloc)
+{
+	int ret;
+
+	if (!va_node_is_valid(node->free_list))
+		if ((ret = va_node_grow(node)) < 0)
+			return ret;
+	*out_alloc = node->free_list;
+	node->free_list = (va_node_get(node, *out_alloc))->next;
+	return 0;
+}
+
+/* ************************************************************************* *\
+FUNCTION: va_node_free
+
+DESCRIPTION: make a node available
+\* ************************************************************************* */
+void va_node_free(struct va_node_allocator *node, uint32_t index)
+{
+	struct va_node *tmp = va_node_get(node, index);
+	tmp->next = node->free_list;
+	node->free_list = index;
+}
diff --git a/mpssboot/Kbuild b/mpssboot/Kbuild
new file mode 100644
index 0000000..c58d6c8
--- /dev/null
+++ b/mpssboot/Kbuild
@@ -0,0 +1 @@
+obj-m := mpssboot.o
diff --git a/mpssboot/mpssboot.c b/mpssboot/mpssboot.c
new file mode 100644
index 0000000..7939613
--- /dev/null
+++ b/mpssboot/mpssboot.c
@@ -0,0 +1,238 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <scif.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+
+#define ACPT_BOOTED	  1
+#define ACPT_BOOT_ACK	  2
+#define ACPT_NACK_VERSION 3
+#define ACPT_REQUEST_TIME 4
+#define ACPT_TIME_DATA	  5
+
+#define ACPT_VERSION	1
+
+static dev_t dev;
+static struct class *class;
+static struct device *mbdev;
+
+static int host_notified;
+static struct timespec tod;
+static int timeset = 0;
+
+static ssize_t
+show_timesync(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "Time: %s\n", timeset? "set" : "not set");
+}
+
+static ssize_t
+set_synctime(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct scif_portID port = {0, MIC_NOTIFY};
+	static scif_epd_t epd;
+	int proto = ACPT_REQUEST_TIME;
+	int version = ACPT_VERSION; 
+	int err;
+
+	epd = scif_open();
+
+	if ((err = scif_connect(epd, &port))) {
+		printk("MPSSBOOT error, synctime connect failed: %d\n", err);
+		goto close_synctime;
+	}
+
+	if ((err = scif_send(epd, &version, sizeof(version), 0)) != sizeof(version)) {
+		printk("MPSSBOOT send version failed: %d\n", err);
+		goto close_synctime;
+	}
+
+	if ((err = scif_send(epd, &proto, sizeof(proto), 0)) != sizeof(proto)) {
+		printk("MPSSBOOT send boot finished failed: %d\n", err);
+		goto close_synctime;
+	}
+
+	if ((err = scif_recv(epd, &proto, sizeof(proto), SCIF_RECV_BLOCK)) != sizeof(proto)) {
+		printk("MPSSBOOT protocol recv ack failed: %d\n", err);
+		goto close_synctime;
+	}
+
+	if (proto != ACPT_TIME_DATA) {
+		printk("MPSSBOOT failed to receive time data packet %d\n", proto);
+		goto close_synctime;
+	}
+
+	if ((err = scif_recv(epd, &tod, sizeof(tod), SCIF_RECV_BLOCK)) != sizeof(tod)) {
+		printk("MPSSBOOT time data read size failed: %d\n", err);
+		goto close_synctime;
+	}
+
+	do_settimeofday(&tod);
+	printk("MPSSBOOT Time of day sycned with host\n");
+	timeset = 1;
+
+close_synctime:
+	scif_close(epd);
+	return count;
+}
+static DEVICE_ATTR(synctime, S_IRUGO | S_IWUSR, show_timesync, set_synctime);
+
+static ssize_t
+show_host_notified(struct device *dev, struct device_attribute *attr, char *buf)
+{
+        return snprintf(buf, PAGE_SIZE, "%d\n", host_notified);
+}
+
+static ssize_t
+set_host_notified(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct scif_portID port = {0, MIC_NOTIFY};
+	static scif_epd_t epd;
+	int proto = ACPT_BOOTED;
+	int version = ACPT_VERSION; 
+	int err;
+
+	epd = scif_open();
+
+	if ((err = scif_connect(epd, &port))) {
+		printk("MPSSBOOT error, notify connect failed: %d\n", err);
+		goto close_notify;
+	}
+
+	if ((err = scif_send(epd, &version, sizeof(version), 0)) != sizeof(version)) {
+		printk("MPSSBOOT send version failed: %d\n", err);
+		goto close_notify;
+	}
+
+	if ((err = scif_send(epd, &proto, sizeof(proto), 0)) != sizeof(proto)) {
+		printk("MPSSBOOT send boot finished failed: %d\n", err);
+		goto close_notify;
+	}
+
+	if ((err = scif_recv(epd, &proto, sizeof(proto), SCIF_RECV_BLOCK)) != sizeof(proto)) {
+		printk("MPSSBOOT protocol recv ack failed: %d\n", err);
+		goto close_notify;
+	}
+
+	if (proto != ACPT_BOOT_ACK)
+		printk("MPSSBOOT failed to receive boot ACK, got %d\n", proto);
+	else
+		printk("MPSSBOOT Boot acknowledged\n");
+
+close_notify:
+	scif_close(epd);
+	return count;
+}
+static DEVICE_ATTR(host_notified, S_IRUGO | S_IWUSR, show_host_notified, set_host_notified);
+
+static struct attribute *mb_attributes[] = {
+	&dev_attr_synctime.attr,
+	&dev_attr_host_notified.attr,
+	NULL
+};
+
+struct attribute_group mb_attr_group = {
+	.attrs = mb_attributes
+};
+
+/* This function closes the endpoint established on init */
+static void
+mpssboot_exit(void)
+{
+	sysfs_remove_group(&mbdev->kobj, &mb_attr_group);
+	device_destroy(class, dev);
+	class_destroy(class);
+}
+
+static char *
+mpssboot_devnode(struct device *dev, mode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
+}
+
+/* This function initializes a SCIF connection to the host */
+static int
+mpssboot_init(void)
+{
+	//static struct device dev;
+	int result;
+
+	alloc_chrdev_region(&dev, 0, 2, "micnotify");
+	class = class_create(THIS_MODULE, "micnotify");
+	class->devnode = mpssboot_devnode;
+	mbdev = device_create(class, NULL, dev, NULL, "notify");
+
+	result = sysfs_create_group(&mbdev->kobj, &mb_attr_group);
+	result = result;
+	return 0;
+}
+
+module_init(mpssboot_init);
+module_exit(mpssboot_exit);
+MODULE_LICENSE("GPL");
+
diff --git a/pm_scif/Kbuild b/pm_scif/Kbuild
new file mode 100644
index 0000000..4f49d0d
--- /dev/null
+++ b/pm_scif/Kbuild
@@ -0,0 +1 @@
+obj-m := pm_scif.o
diff --git a/pm_scif/pm_scif.c b/pm_scif/pm_scif.c
new file mode 100644
index 0000000..aa18b7a
--- /dev/null
+++ b/pm_scif/pm_scif.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <scif.h>
+#include <mic/mic_pm.h>
+#include <mic/micscif.h>
+#include "pm_scif.h"
+
+#define PM_DB(fmt, ...) printk(KERN_ALERT"[ %s : %d ]:"fmt,__func__, __LINE__, ##__VA_ARGS__)
+#define FUNCTION_ENTRY PM_DB("==> %s\n", __func__)
+#define FUNCTION_EXIT PM_DB("<== %s\n", __func__)
+
+#define PM_SCIF_RETRY_COUNT 5
+
+DEFINE_RWLOCK(pmscif_send);
+
+static atomic_t epinuse = ATOMIC_INIT(0);
+void pm_scif_exit(void);
+
+typedef struct _mic_pm_scif {
+	scif_epd_t ep;
+	int lport;
+	struct scif_portID rport_id;
+	struct workqueue_struct *pm_recvq;
+	struct work_struct pm_recv;
+	PM_CONNECTION_STATE con_state;
+} mic_pm_scif;
+
+mic_pm_scif *pm_scif;
+
+void
+pm_dump(char *buf, size_t len)
+{
+	int i = 0;
+
+	for ( i=0; i < len; i++) {
+
+		if (i % 8)
+			printk(KERN_ALERT"\n");
+		printk(KERN_ALERT"%x ", buf[i]);
+	}
+}
+
+static void pm_handle_open (void *msg, size_t len)
+{
+	FUNCTION_ENTRY;
+	pm_dump((char*)msg, len);
+}
+
+static void pm_handle_test (void *msg, size_t len)
+{
+	FUNCTION_ENTRY;
+	pm_dump((char*)msg, len);
+
+}
+typedef void (*_pm_msg_handler)(void*, size_t);
+
+typedef struct _pm_msg_call {
+	_pm_msg_handler handler;
+	char *name;
+}pm_msg_call;
+
+#define PM_HANDLE_ADD(opcode, function) [(opcode)] = {(function), #function}
+
+pm_msg_call pm_msg_caller[PM_MESSAGE_MAX] = {
+	 PM_HANDLE_ADD(PM_MESSAGE_OPEN, pm_handle_open),
+	 PM_HANDLE_ADD(PM_MESSAGE_TEST, pm_handle_test)
+};
+
+int
+pm_send_to_host(PM_MESSAGE opcode, void *msg, size_t len)
+{
+//	FUNCTION_ENTRY;
+	int err = 0;
+	size_t psize = sizeof(pm_msg_header) + len;
+	char *payload;
+	unsigned long flags;
+
+	if (pm_scif->con_state != PM_CONNECTED) {
+		err = -EINVAL;
+		goto error;
+	}
+
+	if (!(payload = kmalloc(psize, GFP_ATOMIC))) {
+		err = -ENOMEM;
+		goto error;
+	}
+	read_lock_irqsave(&pmscif_send,flags);
+
+	if (atomic_xchg(&epinuse,1) != 0) {
+		read_unlock_irqrestore(&pmscif_send,flags);
+		kfree(payload);
+		return -1;
+	}
+
+	((pm_msg_header*)payload)->opcode = opcode;
+	((pm_msg_header*)payload)->len = len;
+	if (len)
+		memcpy((char*)payload + sizeof(pm_msg_header), msg, len);
+
+	//0 for non blocking
+	if ((err = scif_send(pm_scif->ep, payload, psize, 0)) < 0) {
+		PM_DB("scif_recv failed\n");
+	}
+	atomic_set(&epinuse,0);
+	//for (i = 0; i < psize; i++)
+	//	printk(KERN_ALERT" buff: %X\n", payload[i]);
+	read_unlock_irqrestore(&pmscif_send,flags);
+	kfree(payload);
+//	FUNCTION_EXIT;
+error:
+	return err;
+}
+
+EXPORT_SYMBOL(pm_send_to_host);
+
+static struct mic_pmscif_handle micpmscif = {
+	.pm_scif_uos2host = pm_send_to_host,
+	.pm_scif_host2uos = NULL,
+	.owner = THIS_MODULE,
+};
+
+
+
+static void pm_send_to_uos(pm_msg_header *header, char *msg)
+{
+	if(micpmscif.pm_scif_host2uos) {
+		micpmscif.pm_scif_host2uos(header, msg);
+	}
+}
+
+static void
+pm_recv_from_host(struct work_struct *work)
+{
+	int err = 0;
+	char *msg = NULL;
+	pm_msg_header *header;
+	mic_pm_scif *pm_scif_info = container_of(work, mic_pm_scif, pm_recv);
+
+	FUNCTION_ENTRY;
+	if (pm_scif->con_state != PM_CONNECTED)
+		goto exit;
+
+	header = kmalloc(sizeof(pm_msg_header), GFP_KERNEL);
+
+	if ((err = scif_recv(pm_scif_info->ep, header, sizeof(pm_msg_header),
+							SCIF_RECV_BLOCK)) < 0) {
+		PM_DB("scif_recv failed\n");
+		goto end_con;
+	}
+
+	msg = kmalloc(header->len, GFP_KERNEL);
+
+	if ((err = scif_recv(pm_scif_info->ep, msg, header->len,
+							SCIF_RECV_BLOCK)) < 0) {
+		PM_DB("scif_recv failed\n");
+		goto end_con;
+	}
+	if(header->opcode < PM_MESSAGE_MAX) {
+		if ((header->opcode != PM_MESSAGE_CLOSE) &&
+				(header->opcode != PM_MESSAGE_CLOSE_ACK)) {
+			if(pm_msg_caller[header->opcode].handler)
+				pm_msg_caller[header->opcode].handler(msg, header->len);
+			pm_send_to_uos(header, msg);
+		} else {
+			if (header->opcode == PM_MESSAGE_CLOSE) {
+				pm_send_to_uos(header,msg);
+				pm_send_to_host(PM_MESSAGE_CLOSE_ACK, NULL, 0);
+			}
+			pm_scif->con_state = PM_DISCONNECTING;
+			goto end_con;
+		}
+	}
+	else
+		printk("pm_scif: Recvd scif message with bad opcode %d\n",
+			header->opcode);
+	kfree(header);
+	kfree(msg);
+	queue_work(pm_scif->pm_recvq, &pm_scif->pm_recv);
+	return;
+
+end_con:
+	kfree(header);
+	kfree(msg);
+exit:
+	FUNCTION_EXIT;
+}
+
+#ifdef PM_SCIF_IOCTL
+static int
+spm_ioctl(struct inode *in, struct file *f, unsigned int cmd, unsigned long arg)
+{
+	int i = 0;
+	uint32_t payload = 0xc0de0000;
+
+	FUNCTION_ENTRY;
+	for (i = 0; i < PM_MESSAGE_TEST; i++) {
+		payload++;
+		//PM_DB("sending %s with payload = %x \n",
+		//	 pm_msg_caller[i].name, payload);
+		pm_send_to_host(i, &payload, sizeof(payload));
+	}
+
+	return 0;
+}
+
+static long
+spm_unlocked_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
+{
+	return (long) spm_ioctl(f->f_path.dentry->d_inode, f, cmd, arg);
+}
+
+static int
+spm_release(struct inode *in, struct file *f)
+{
+	return 0;
+}
+
+static char *
+spm_devnode(struct device *dev, mode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "spm/%s", dev_name(dev));
+}
+
+
+static int
+spm_open(struct inode *in, struct file *f)
+{
+	return 0;
+}
+
+struct file_operations spm_ops = {
+	.owner = THIS_MODULE,
+	.unlocked_ioctl = spm_unlocked_ioctl,
+	.open  = spm_open,
+	.release = spm_release,
+};
+
+int spm_major;
+int spm_minor;
+dev_t spmdev;
+struct cdev spmcdev;
+struct class *spmclass;
+
+static void
+spm_dev_deinit(void)
+{
+	device_destroy(spmclass,spmdev);
+	class_destroy(spmclass);
+	cdev_del(&spmcdev);
+	unregister_chrdev_region(spmdev, 1);
+}
+
+static int
+spm_dev_init(void)
+{
+	int err = 0;
+
+	if (spm_major) {
+		spmdev = MKDEV(spm_major, spm_minor);
+		err = register_chrdev_region(spmdev, 1, "spm");
+	}
+	else {
+		err = alloc_chrdev_region(&spmdev, spm_minor, 1, "spm");
+		spm_major = MAJOR(spmdev);
+	}
+
+	if (err < 0) {
+		unregister_chrdev_region(spmdev, 1);
+		goto done;
+	}
+
+	spmdev = MKDEV(spm_major, spm_minor);
+	cdev_init(&spmcdev, &spm_ops);
+	spmcdev.owner = THIS_MODULE;
+	err = cdev_add(&spmcdev, spmdev, 1);
+
+	if (err)
+		goto err;
+
+	spmclass = class_create(THIS_MODULE, "spm");
+	if (IS_ERR(spmclass)) {
+		err = PTR_ERR(spmclass);
+		goto err;
+	}
+
+	spmclass->devnode = spm_devnode;
+	device_create(spmclass, NULL, spmdev, NULL, "spm");
+	if (IS_ERR(spmclass)) {
+		err = PTR_ERR(spmclass);
+		goto err;
+	}
+done:
+	return err;
+err:
+	spm_dev_deinit();
+	return err;
+}
+#endif
+
+int pm_scif_init(void)
+{
+	int err = 1;
+	int retry = 0;
+
+	FUNCTION_ENTRY;
+	PM_DB("pm_scif insmoded \n");
+#ifdef PM_SCIF_IOCTL
+	if ((err = spm_dev_init())) {
+		PM_DB(" spm_dev_init failed\n");
+		goto done;
+	}
+#endif
+	atomic_set(&epinuse,0);
+	pm_scif = kzalloc(sizeof(mic_pm_scif), GFP_KERNEL);
+
+	if (!pm_scif) {
+		err = -ENOMEM;
+		goto end_con;
+	}
+
+	pm_scif_register(&micpmscif);
+
+	if ((pm_scif->ep = scif_open()) == NULL) {
+		PM_DB(" scif_open failed\n");
+		goto end_con;
+	}
+
+	if ((pm_scif->lport = scif_bind(pm_scif->ep, 0)) < 0) {
+		PM_DB(" scif_bind failed\n");
+		goto end_con;
+	}
+
+	PM_DB(" scif_bind successfull. Local port number = %d, ep =  \n",
+							 pm_scif->lport);
+	dump_ep(pm_scif->ep, __func__,__LINE__);
+	pm_scif->rport_id.node = 0;
+	pm_scif->rport_id.port = SCIF_PM_PORT_0;
+
+	while ((err = scif_connect(pm_scif->ep, &pm_scif->rport_id)) != 0) {
+		PM_DB(" scif_connect failed with err = %d ep %p\n",err,
+			pm_scif->ep);
+		msleep(1000);
+		if (retry++ > PM_SCIF_RETRY_COUNT)
+			goto end_con;
+	}
+
+	pm_scif->pm_recvq = create_singlethread_workqueue("pm_recvq");
+	INIT_WORK(&pm_scif->pm_recv, pm_recv_from_host);
+	queue_work(pm_scif->pm_recvq, &pm_scif->pm_recv);
+	pm_scif->con_state = PM_CONNECTED;
+	err = 0;
+#ifdef PM_SCIF_IOCTL
+done:
+#endif
+	return err;
+end_con:
+	pm_scif_exit();
+	FUNCTION_EXIT;
+	return err;
+}
+EXPORT_SYMBOL(pm_scif_init);
+
+void pm_scif_exit(void)
+{
+	unsigned long flags;
+
+	FUNCTION_ENTRY;
+	PM_DB("Good Bye!, pm scif \n");
+
+	pm_send_to_host(PM_MESSAGE_CLOSE, NULL, 0);
+	write_lock_irqsave(&pmscif_send,flags);
+	atomic_set(&epinuse,1);
+	write_unlock_irqrestore(&pmscif_send,flags);
+
+	if (pm_scif) {
+		if(pm_scif->pm_recvq) {
+			flush_workqueue(pm_scif->pm_recvq);
+			PM_DB("calling destroy\n");
+			destroy_workqueue(pm_scif->pm_recvq);
+		}
+
+		PM_DB("closing ep \n");
+		if (pm_scif->ep)
+			scif_close(pm_scif->ep);
+
+		pm_scif_unregister(&micpmscif);
+		pm_scif->con_state = PM_DISCONNECTED;
+		kfree(pm_scif);
+	}
+	#ifdef PM_SCIF_IOCTL
+	spm_dev_deinit();
+	#endif
+	FUNCTION_EXIT;
+}
+
+EXPORT_SYMBOL(pm_scif_exit);
+
+module_init(pm_scif_init);
+module_exit(pm_scif_exit);
+MODULE_LICENSE("GPL");
diff --git a/pm_scif/pm_scif.h b/pm_scif/pm_scif.h
new file mode 100644
index 0000000..ca275cd
--- /dev/null
+++ b/pm_scif/pm_scif.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#if !defined(__PM_SCIF_H)
+#define __PM_SCIF_H
+
+struct mic_pmscif_handle{
+	int	(*pm_scif_uos2host)(PM_MESSAGE opcode, void *msg, size_t len);
+	int	(*pm_scif_host2uos)(pm_msg_header *header, void *msg);
+	struct	module	*owner;
+};
+
+extern int pm_scif_register(struct mic_pmscif_handle *pmscif);
+extern void pm_scif_unregister(struct mic_pmscif_handle *pmscif);
+
+#endif //__PM_SCIF_H
diff --git a/ramoops/Kbuild b/ramoops/Kbuild
new file mode 100644
index 0000000..53b0def
--- /dev/null
+++ b/ramoops/Kbuild
@@ -0,0 +1 @@
+obj-m := ramoops.o
diff --git a/ramoops/ramoops.c b/ramoops/ramoops.c
new file mode 100644
index 0000000..76cd53f
--- /dev/null
+++ b/ramoops/ramoops.c
@@ -0,0 +1,163 @@
+/*
+ * RAM Oops/Panic logger
+ *
+ * Copyright (C) 2009 Marco Stornelli <marco.stornelli@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kmsg_dump.h>
+#include <linux/time.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+
+#define RAMOOPS_KERNMSG_HDR "===="
+#define RAMOOPS_HEADER_SIZE   (5 + sizeof(struct timeval))
+
+#define RECORD_SIZE 4096
+
+static ulong mem_address;
+module_param(mem_address, ulong, 0600);
+MODULE_PARM_DESC(mem_address,
+		"start of reserved RAM used to store oops/panic logs");
+
+static ulong mem_size;
+module_param(mem_size, ulong, 0600);
+MODULE_PARM_DESC(mem_size,
+		"size of reserved RAM used to store oops/panic logs");
+
+static int dump_oops = 1;
+module_param(dump_oops, int, 0600);
+MODULE_PARM_DESC(dump_oops,
+		"set to 1 to dump oopses, 0 to only dump panics (default 1)");
+
+static struct ramoops_context {
+	struct kmsg_dumper dump;
+	void *virt_addr;
+	phys_addr_t phys_addr;
+	unsigned long size;
+	int count;
+	int max_count;
+} oops_cxt;
+
+static void ramoops_do_dump(struct kmsg_dumper *dumper,
+		enum kmsg_dump_reason reason, const char *s1, unsigned long l1,
+		const char *s2, unsigned long l2)
+{
+	struct ramoops_context *cxt = container_of(dumper,
+			struct ramoops_context, dump);
+	unsigned long s1_start, s2_start;
+	unsigned long l1_cpy, l2_cpy;
+	int res;
+	char *buf;
+	struct timeval timestamp;
+
+	/* Only dump oopses if dump_oops is set */
+	if ((reason != KMSG_DUMP_OOPS) || !dump_oops)
+		return;
+
+	buf = (char *)(cxt->virt_addr + (cxt->count * RECORD_SIZE));
+	memset(buf, '\0', RECORD_SIZE);
+	res = sprintf(buf, "%s", RAMOOPS_KERNMSG_HDR);
+	buf += res;
+	do_gettimeofday(&timestamp);
+	res = sprintf(buf, "%lu.%lu\n", (long)timestamp.tv_sec, (long)timestamp.tv_usec);
+	buf += res;
+
+	l2_cpy = min(l2, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE));
+	l1_cpy = min(l1, (unsigned long)(RECORD_SIZE - RAMOOPS_HEADER_SIZE) - l2_cpy);
+
+	s2_start = l2 - l2_cpy;
+	s1_start = l1 - l1_cpy;
+
+	memcpy(buf, s1 + s1_start, l1_cpy);
+	memcpy(buf + l1_cpy, s2 + s2_start, l2_cpy);
+
+	cxt->count = (cxt->count + 1) % cxt->max_count;
+}
+
+static int __init ramoops_init(void)
+{
+	struct ramoops_context *cxt = &oops_cxt;
+	int err = -EINVAL;
+
+	if (!mem_size) {
+		printk(KERN_ERR "Invalid size specification");
+		goto fail3;
+	}
+
+	rounddown_pow_of_two(mem_size);
+
+	if (mem_size < RECORD_SIZE) {
+		printk(KERN_ERR "size too small");
+		goto fail3;
+	}
+
+	cxt->max_count = mem_size / RECORD_SIZE;
+	cxt->count = 0;
+	cxt->size = mem_size;
+	cxt->phys_addr = mem_address;
+
+	if (!request_mem_region(cxt->phys_addr, cxt->size, "ramoops")) {
+		printk(KERN_ERR "ramoops: request mem region failed");
+		err = -EINVAL;
+		goto fail3;
+	}
+
+	cxt->virt_addr = ioremap(cxt->phys_addr,  cxt->size);
+	if (!cxt->virt_addr) {
+		printk(KERN_ERR "ramoops: ioremap failed");
+		goto fail2;
+	}
+
+	cxt->dump.dump = ramoops_do_dump;
+	err = kmsg_dump_register(&cxt->dump);
+	if (err) {
+		printk(KERN_ERR "ramoops: registering kmsg dumper failed");
+		goto fail1;
+	}
+
+	return 0;
+
+fail1:
+	iounmap(cxt->virt_addr);
+fail2:
+	release_mem_region(cxt->phys_addr, cxt->size);
+fail3:
+	return err;
+}
+
+static void __exit ramoops_exit(void)
+{
+	struct ramoops_context *cxt = &oops_cxt;
+
+	if (kmsg_dump_unregister(&cxt->dump) < 0)
+		printk(KERN_WARNING "ramoops: could not unregister kmsg_dumper\n");
+
+	iounmap(cxt->virt_addr);
+	release_mem_region(cxt->phys_addr, cxt->size);
+}
+
+
+module_init(ramoops_init);
+module_exit(ramoops_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Marco Stornelli <marco.stornelli@gmail.com>");
+MODULE_DESCRIPTION("RAM Oops/Panic logger/driver");
+
diff --git a/ras/Kbuild b/ras/Kbuild
new file mode 100644
index 0000000..7778866
--- /dev/null
+++ b/ras/Kbuild
@@ -0,0 +1,6 @@
+obj-m := micras.o
+
+micras-y := micras_main.o micras_common.o
+micras-y += micras_core.o micras_uncore.o micras_elog.o
+micras-$(CONFIG_ML1OM) += micras_knf.o
+micras-$(CONFIG_MK1OM) += micras_knc.o micras_pm.o
diff --git a/ras/Makefile b/ras/Makefile
new file mode 100644
index 0000000..f5550a7
--- /dev/null
+++ b/ras/Makefile
@@ -0,0 +1,210 @@
+#
+# Build RAS drivers
+#
+# In Linux 2.6 kernels modules must be built by the kernel's kbuild
+# system, with a path to the kernel module source directory. Kbuild
+# expects a general purpose Makefile to exist and optionally an extra
+# file named Kbuild with the kernel module build details.
+# This Makefile is a 'backwards compatible' (see file "modules.txt").
+#
+DEBUG = n
+
+ifneq ($(KERNELRELEASE),)
+
+#
+# Kbuild backwards compatibility part:
+# Load Kbuild to specify module targets and options.
+#
+include Kbuild
+
+else
+
+#
+# Standard invocation:
+#
+# Export variables to environment and pass control to kernel tools
+#  ARCH       Target architecture: l1om or k1om
+#  KERNELDIR  Top of MIC kernel tree (not repo source tree)
+#  DRIVERDIR  Top of MPSS drivers build tree (not repo source tree)
+#
+
+ARCH := $(or $(ARCH), $(shell cat $(CURDIR)/../.arch 2>/dev/null))
+
+ifeq ($(DRIVERDIR),)
+ifeq ($(shell /usr/bin/test -d ../source-root/$(ARCH)-hybrid && echo Y),Y)
+DRIVERDIR = $(PWD)/../source-root/$(ARCH)-hybrid
+KERNELDIR ?= $(DRIVERDIR)/card/kernel
+else ifeq ($(shell /usr/bin/test -d ../source-root/$(ARCH)-internal && echo Y),Y)
+DRIVERDIR = $(PWD)/../source-root/$(ARCH)-internal
+KERNELDIR ?= $(DRIVERDIR)/card/kernel
+endif
+endif
+KERNELDIR ?= ../../miclinux
+
+SCIF_SYM = $(DRIVERDIR)/card/driver/Module.symvers
+SCIF_LIB = $(DRIVERDIR)/host/scif_lib
+SCIF_HEADER = $(DRIVERDIR)/include
+
+EXTRA_CFLAGS += $(KERNWARNFLAGS)
+ifeq ($(ARCH),l1om)
+  EXTRA_CFLAGS += -DMIC_IS_L1OM
+else ifeq ($(ARCH),k1om)
+  EXTRA_CFLAGS += -DMIC_IS_K1OM
+else
+  $(error $$(ARCH) must be l1om or k1om)
+endif
+EXTRA_CFLAGS += -DINTERNAL_REG=1 -Wall
+EXTRA_CFLAGS += $(SPOOKY_MIC_CFLAGS)
+
+CROSS_COMPILE = x86_64-$(ARCH)-linux-
+
+ifeq ($(shell which $(CROSS_COMPILE)gcc 2>/dev/null),)
+  ifeq ($(shell which ../cross/bin/$(CROSS_COMPILE)gcc 2>/dev/null),)
+    $(error $$(PATH) must include $(CROSS_COMPILE)gcc)
+  else
+    CROSS_COMPILE = $(PWD)/../cross/bin/x86_64-$(ARCH)-linux-
+  endif
+endif
+
+default: modules tests
+
+modules:
+	@ echo "$(MAKE) -C $(KERNELDIR) M=$(CURDIR) ARCH=$(ARCH) modules"
+	@ $(MAKE) -C $(KERNELDIR) M=$(CURDIR) ARCH=$(ARCH) \
+		V=0 DEBUG=$(DEBUG) \
+		SPOOKY_MIC_CFLAGS=$(SPOOKY_MIC_CFLAGS) \
+		CROSS_COMPILE=$(CROSS_COMPILE) \
+		KBUILD_EXTRA_SYMBOLS=$(SCIF_SYM) \
+		modules
+
+install: modules_install
+
+modules_install:
+	@ echo "$(MAKE) -C $(KERNELDIR) M=$(CURDIR) ARCH=$(ARCH) install"
+	@ $(MAKE) -C $(KERNELDIR) M=$(CURDIR) ARCH=$(ARCH) \
+		V=0 DEBUG=$(DEBUG) \
+		CROSS_COMPILE=$(CROSS_COMPILE) \
+		SPOOKY_MIC_CFLAGS=$(SPOOKY_MIC_CFLAGS) \
+		KBUILD_EXTRA_SYMBOLS=$(SCIF_SYM) \
+		INSTALL_MOD_PATH=$(DESTDIR) \
+		modules_install
+
+#
+# Test programs, expects that compilers and SCIF libraries are present.
+#
+host-tools = edecode gdecode
+host-tests = cp mc ttl tmp cutl proc ukill fan smc fsc pm trbo ptrig cp32 p-in-host p-out-host
+card-tests = p-in-card p-out-card suid load
+
+tests: $(host-tools) $(host-tests) $(card-tests)
+
+cp:	cp.c micras_api.h
+	@ echo gcc -O2 cp.c -o cp -lscif
+	@ gcc -O2 cp.c -o cp $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+cp32:	cp32.c micras_api.h
+	@ echo gcc -O2 cp32.c -o cp32 -lscif
+	@ gcc -O2 cp32.c -o cp32 $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+mc:	mc.c micmca_api.h
+	@ echo gcc -O2 mc.c -o mc -lscif
+	@ gcc -O2 mc.c -o mc $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+cutl:	cutl.c micras_api.h
+	@ echo gcc -O2 cutl.c -o cutl -lscif -lncurses
+	@ gcc -O2 cutl.c -o cutl $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif -lncurses
+
+cutl2:	cutl2.c micras_api.h
+	@ echo gcc -O2 cutl2.c -o cutl2 -lscif -lncurses
+	@ gcc -O2 cutl2.c -o cutl2 $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif -lncurses
+
+proc:	proc.c micras_api.h
+	@ echo gcc -O2 proc.c -o proc -lscif -lncurses
+	@ gcc -O2 proc.c -o proc $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif -lncurses
+
+trbo:	trbo.c micras_api.h
+	@ echo gcc -O2 trbo.c -o trbo -lscif
+	@ gcc -O2 trbo.c -o trbo $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+fan:	fan.c micras_api.h
+	@ echo gcc -O2 fan.c -o fan -lscif
+	@ gcc -O2 fan.c -o fan $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+smc:	smc.c micras_api.h
+	@ echo gcc -O2 smc.c -o smc -lscif
+	@ gcc -O2 smc.c -o smc $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+pm_tst:	pm_tst.c micras_api.h
+	@ echo gcc -O2 pm_tst.c -o pm_tst -lscif
+	@ gcc -O2 pm_tst.c -o pm_tst $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+fsc:	fsc.c micras_api.h
+	@ echo gcc -O2 fsc.c -o fsc -lscif
+	@ gcc -O2 fsc.c -o fsc $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+ptrig: ptrig.c micras_api.h
+	@ echo gcc -O2 ptrig.c -o ptrig -lscif
+	@ gcc -O2 ptrig.c -o ptrig $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+pm:	pm.c micras_api.h micpm_api.h
+	@ echo gcc -O2 pm.c -o pm -lscif
+	@ gcc -O2 pm.c -o pm $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+tmp:	tmp.c micras_api.h
+	@ echo gcc -O2 tmp.c -o tmp -lscif
+	@ gcc -O2 tmp.c -o tmp $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+ttl:	ttl.c micras_api.h micpm_api.h
+	@ echo gcc -O2 ttl.c -o ttl -lscif
+	@ gcc -O2 ttl.c -o ttl $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+ukill:	ukill.c micras_api.h
+	@ echo gcc -O2 ukill.c -o ukill -lscif
+	@ gcc -O2 ukill.c -o ukill $(EXTRA_CFLAGS) -I$(SCIF_HEADER) -L$(SCIF_LIB) -lscif
+
+edecode: edecode.c
+	@ echo gcc -O2 edecode.c -o edecode
+	@ gcc -O2 -Wall edecode.c -o edecode
+
+gdecode: gdecode.c
+	@ echo gcc -O2 gdecode.c -o gdecode
+	@ gcc -O2 -Wall gdecode.c -o gdecode
+
+p-in-host: p-in.c Makefile
+	@ echo gcc -O2 p-in.c -o p-in-host
+	@ gcc -O2 p-in.c -o p-in-host -DIOK=16
+
+p-out-host: p-out.c Makefile
+	@ echo gcc -O2 p-out.c -o p-out-host
+	@ gcc -O2 p-out.c -o p-out-host -DIOK=16 -DTXG=64
+
+suid:	suid.c
+	@ echo cross-gcc -O2 suid.c -o suid
+	@ $(CROSS_COMPILE)gcc -O2 suid.c -o suid
+
+p-in-card: p-in.c Makefile
+	@ echo cross-gcc -O2 p-in.c -o p-in-card
+	@ $(CROSS_COMPILE)gcc -O2 p-in.c -o p-in-card -DIOK=64
+
+p-out-card: p-out.c Makefile
+	@ echo cross-gcc -O2 p-out.c -o p-out-card
+	@ $(CROSS_COMPILE)gcc -O2 p-out.c -o p-out-card -DIOK=64 -DTXG=16
+
+load: load.c
+	@ echo cross-gcc load.c -o load -pthread -lpthread
+	@ $(CROSS_COMPILE)gcc load.c -o load $(EXTRA_CFLAGS) -pthread -lpthread
+
+cpptest: 
+	@ echo Dumping compiler defines
+	@ echo > nil.c
+	@ $(CROSS_COMPILE)gcc -E -dM nil.c | sort
+	@ rm nil.c
+
+endif
+
+clean:
+	@ echo "  Cleaning .."
+	@ rm -fr *.o *~ core .*.sw? .depend .*.cmd *.ko *.mod.c \
+		.tmp_versions modules.order Module.symvers
+	@ rm -f $(host-tools) $(host-tests) $(card-tests)
+
diff --git a/ras/micmca_api.h b/ras/micmca_api.h
new file mode 100644
index 0000000..8008ad0
--- /dev/null
+++ b/ras/micmca_api.h
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * Definition of the public MC interface.
+ * Access to MC event features provided through SCIF only.
+ */
+
+#ifndef _MICMCA_API_H_
+#define _MICMCA_API_H_		1
+
+#ifdef __cplusplus
+extern "C" {	/* C++ guard */
+#endif
+
+/*
+ * Configuration manifests
+ */
+
+#pragma pack(push, 4)		/* Windows requirement */
+
+
+/*
+ * Machine check info is reported on this port. Only one consumer can
+ * (and must) connect in order to be notified about MC events.
+ */
+
+#define MR_MCE_PORT	SCIF_RAS_PORT_1
+
+
+/*
+ * MC events are provide in raw form, i.e. as close to the
+ * contents of MCA register banks as possible. It is not
+ * the responsibility of the MCA event handler to perform
+ * analysis and interpretation of these registers, beyond
+ * determining whether the event was deadly to the uOS.
+ *
+ * Any data or context corruption _IS_ deadly by definition!
+ *
+ * Source identifiers:
+ *  org	 id
+ *    0	 Bank 0 CPU #, core event, range 0..CPU_MAX
+ *    1	 Bank 1 CPU #, core event, range 0..CPU_MAX
+ *    2	 Bank 2 CPU #, core event, range 0..CPU_MAX
+ *    3  DBOX #, uncore event, range 0..DBOX_MAX
+ *    4  SBOX,   uncore event, range 0
+ *    5	 GBOX #, uncore event, range 0..GBOX_MAX
+ *    6	 TBOX #, uncore event, range 0..TBOX_MAX
+ *
+ * Report flags bits (when set) representing:
+ *   [31:5]	Unused (and reserved)
+ *	[4]	Filter event, uOS side disabled this event
+ *	[3]	Status event, no failure (just MCA bank dump)
+ *	[2]	Injected or artificially generated event
+ *      [1]	This event has been recorded in EEPROM
+ *	[0]	Fatal, the uOS is toast (card needs reset)
+ *
+ * MCA bank register sizes are not the same on all banks:
+ *
+ *	    CTL STATUS   ADDR   MISC    Notes
+ * CPU 0:    32     64     -      - 	A,M not implemented, always 0
+ * CPU 1:    32     64     64     32
+ * CPU 2:    32     64     64     - 	M not implemented, always 0
+ * DBOX:     32     64     64     - 	M not implemented, always 0
+ * SBOX:     32     64     64     64	
+ * GBOX:     64     64     64     32	
+ * TBOX:     64     64     32     -	M not implemented, not there
+ */
+
+#define	MC_ORG_BNK0	0
+#define	MC_ORG_BNK1	1
+#define	MC_ORG_BNK2	2
+#define MC_ORG_DBOX	3
+#define MC_ORG_SBOX	4
+#define MC_ORG_GBOX	5
+#define MC_ORG_TBOX	6
+
+#define MC_FLG_FATAL	(1 << 0)
+#define MC_FLG_LOG	(1 << 1)
+#define MC_FLG_FALSE	(1 << 2)
+#define MC_FLG_STATUS	(1 << 3)
+#define MC_FLG_FILTER	(1 << 4)
+
+typedef struct mce_info {
+  uint16_t	org;		/* Source of event */
+  uint16_t	id;		/* Identifier of source */
+  uint16_t	flags;		/* Report flags */
+  uint16_t	pid;		/* Alternate source ID */
+  uint64_t	stamp;		/* Time stamp of event */
+  uint64_t	ctl;		/* MCA bank register 'CTL' */
+  uint64_t	status;		/* MCA bank register 'STATUS' */
+  uint64_t	addr;		/* MCA bank register 'ADDR' */
+  uint64_t	misc;		/* MCA bank register 'MISC' */
+} MceInfo;
+
+
+#pragma pack(pop)		/* Restore to entry conditions */
+
+#ifdef __cplusplus
+}	/* C++ guard */
+#endif
+
+#endif	/* Recursion block */
diff --git a/ras/micpm_api.h b/ras/micpm_api.h
new file mode 100644
index 0000000..d86ceeb
--- /dev/null
+++ b/ras/micpm_api.h
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * Definition of the PM interface to the RAS module.
+ *
+ * Throttle event interface is similar to the MC interface.
+ * If a connection is made to MR_TTL_PORT then event records
+ * will be sent to the host. Events are sent non-blocking,
+ * so if the SCIF buffer runs full, events are dropped until
+ * the block disappear (or the session is closed).
+ * 
+ * Queries are technically implemented as an extension to the 
+ * MT interface, and thus are accessible from the host.
+ * Except for the risk of conflicting commands written to the
+ * two power limit registers, there are no side effects from
+ * host side access via SCIF.
+ *
+ * Currently there are no plans to expose this in SysFs nodes.
+ * These routines are just wrappers for read/write access to
+ * SMC registers. No precious IP here.
+ */
+
+#ifndef _MICPM_API_H_
+#define _MICPM_API_H_		1
+
+#ifdef __cplusplus
+extern "C" {	/* C++ guard */
+#endif
+
+
+/*
+**
+** Configuration manifests
+**
+*/
+
+#pragma pack(push, 4)		/* Weird Windos requirement */
+
+
+/*
+ * Throttle events are reported on this port. Only one consumer can
+ * connect in order to be notified about PM throttling events.
+ */
+
+#define	MR_TTL_PORT	SCIF_RAS_PORT_2
+
+
+/*
+ * Throttle events are provided in raw form, i.e. with as
+ * little processing on the card side as possible.
+ * For nicer throttle state display, use MT command MR_REQ_TTL.
+ *
+ * To compensate for the chance of lost events, the full
+ * throttle state is transfered in one byte on every message:
+ *
+ *  Bit#	Content
+ *    0		Power trottle state changed
+ *    1		New/Current power throttle state
+ *    2		Thermal throttle state changed
+ *    3		New/Current thermal throttle state
+ *    4		Power alert state changed
+ *    5		New/Current power alert state
+ *
+ * By definition, when power and thermal throttle are in effect
+ * the KnC is forced to run at reduced speed (600 MHz or so) and
+ * with lower operating voltages, i.e. software is not in control.
+ * During power alerts the KnC is consuming more power than PLim1
+ * and the PM module can reduce speed and/or voltages to reduce
+ * power consumption. If power consumption goes beyond PLim0, the
+ * hardware (SMC really) will start real power throttles.
+ * In effect time spent in power throttle, will also be counted
+ * as being in the power alert state. See MT request MR_REQ_TTL. 
+ */
+
+#define	PM_PWR_TTL_CHG	(1 << 0)	/* Power throttle change */
+#define	PM_PWR_TTL	(1 << 1)	/* Power Trottle state */
+#define	PM_TRM_TTL_CHG	(1 << 2)	/* Thermal throttle change */
+#define	PM_TRM_TTL	(1 << 3)	/* Thermal Trottle state */
+#define	PM_ALRT_TTL_CHG	(1 << 4)	/* Power alert change */
+#define	PM_ALRT_TTL	(1 << 5)	/* Power alert state */
+
+typedef struct ttl_info {
+  uint8_t	upd;			/* Throttle state update */
+  uint8_t	die;			/* Die temperature (as per SBOX) */
+} TtlInfo;
+
+
+
+/*
+ * PM specific MT opcodes
+ * Leave one empty slot in callout table between
+ * this and the official MT API entries.
+ */
+
+#define PM_REQ_PL0	(MR_REQ_MAX + 2)   /* Get power limit 0 */
+#define PM_SET_PL0	(MR_REQ_MAX + 3)   /* Set power limit 0 */
+#define PM_REQ_PL1	(MR_REQ_MAX + 4)   /* Get power limit 1 */
+#define PM_SET_PL1	(MR_REQ_MAX + 5)   /* Set power limit 1 */
+#define PM_REQ_PAVG	(MR_REQ_MAX + 6)   /* Get average power */
+#define PM_REQ_PTTL	(MR_REQ_MAX + 7)   /* Get power throttle */
+#define PM_REQ_VOLT	(MR_REQ_MAX + 8)   /* Get voltage */
+#define PM_REQ_TEMP	(MR_REQ_MAX + 9)   /* Get temperatures */
+#define PM_REQ_TACH	(MR_REQ_MAX + 10)  /* Get fan tachometer */
+#define PM_REQ_TTTL	(MR_REQ_MAX + 11)  /* Get thermal throttle */
+#define PM_REQ_FTTL	(MR_REQ_MAX + 12)  /* Get force throttle */
+#define PM_SET_FTTL	(MR_REQ_MAX + 13)  /* Set force throttle */
+#define PM_REQ_MAX	PM_SET_FTTL	   /* Last PM command */
+
+
+/*
+**
+** Response container structures below.
+**
+*/
+
+
+/*
+ * Get power limit
+ * REQ_PL{0/1} notes:
+ *  - Only power limit 0 have a guard band defined.
+ */
+typedef struct pm_rsp_plim {
+  uint32_t	pwr_lim;		/* Power limit, in Watt */
+  uint32_t	time_win;		/* Time Window, in mSec */
+  uint32_t	guard_band;		/* Guard band, in Watt */
+} PmRspPlim;
+
+
+/*
+ * Set power limit
+ */
+typedef struct pm_cmd_plim {
+  uint32_t	pwr_lim;		/* Power limit, in Watt */
+  uint32_t	time_win;		/* Time Window, in mSec */
+} PmCmdPlim;
+
+
+/*
+ * Get average power
+ * REQ_PAVG notes:
+ *  - Both values are subject to availability in the SMC.
+ *    The top two status bit of each SMC register is provided
+ *    separately (and stripped from the read value). Decode as
+ *	00        Data OK
+ *	01        Lower threshold reached
+ *	10        Upper threshold reached 
+ *	11        Data unavailable
+ *    It is unclear if data is good if outside thresholds.
+ */
+typedef struct pm_rsp_pavg {
+  uint8_t	stat_0;			/* Status bits for window 0 */
+  uint8_t	stat_1;			/* Status bits for window 1 */
+  uint32_t	pwr_0;			/* Average over window 0, in Watt */
+  uint32_t	pwr_1;			/* Average over window 1, in Watt */
+} PmRspPavg;
+
+
+/*
+ * Get Power throttle status
+ * REQ_PTTL notes:
+ *  - Duration value is subject to availability in the SMC.
+ *    The top two status bit of this SMC register is provided
+ *    separately (and stripped from the read value). Decode as
+ *	00        Data OK
+ *	01        Reserved
+ *	10        Reserved
+ *	11        Data unavailable
+ */
+typedef struct pm_rsp_pttl {
+  uint8_t	pwr_ttl;		/* Power throttle asserted */
+  uint8_t	stat_dur;		/* Status bits duration */
+  uint32_t	duration;		/* Power throttle duration, in mSec */
+} PmRspPttl;
+
+
+/*
+ * Get voltages
+ * REQ_VOLT notes:
+ *  - VR values are subject to availability in the SMC.
+ *    The top two status bit of each SMC register is provided
+ *    separately (and stripped from the read value). Decode as
+ *	00        Data OK
+ *	01        Lower threshold reached
+ *	10        Upper threshold reached 
+ *	11        Data unavailable
+ *    It is unclear if data is good if outside thresholds.
+ */
+typedef struct pm_rsp_volt {
+  uint8_t	stat_vccp;		/* Status bits for Vddc */
+  uint8_t	stat_vddg;		/* Status bits for Vddg */
+  uint8_t	stat_vddq;		/* Status bits for Vddq */
+  uint32_t	vccp;			/* Vccp, in mV */
+  uint32_t	vddg;			/* Vddg, in mV */
+  uint32_t	vddq;			/* Vddq, in mV */
+} PmRspVolt;
+
+
+/*
+ * Get temperatures
+ * REQ_TEMP notes:
+ *  - These values are subject to availability in the SMC.
+ *    The top two status bit of each SMC register is provided
+ *    separately (and stripped from the read value). Decode as
+ *	00        Data OK
+ *	01        Lower threshold reached
+ *	10        Upper threshold reached 
+ *	11        Data unavailable
+ *    It is unclear if data is good if outside thresholds.
+ */
+typedef struct pm_rsp_temp {
+  uint8_t	stat_cpu;		/* Status bits for Tcpu */
+  uint8_t	stat_vccp;		/* Status bits for Tvddc */
+  uint8_t	stat_vddg;		/* Status bits for Tvddg */
+  uint8_t	stat_vddq;		/* Status bits for Tvddq */
+  uint32_t	cpu;			/* CPU temp, in C */
+  uint32_t	vccp;			/* Vccp VR temp, in C */
+  uint32_t	vddg;			/* Vddg VR temp, in C */
+  uint32_t	vddq;			/* Vddq VR temp, in C */
+} PmRspTemp;
+
+
+/*
+ * Get fan tachometer
+ * REQ_TACH notes:
+ *  - These values are subject to availability in the SMC.
+ *    The top two status bit of each SMC register is provided
+ *    separately (and stripped from the read value). Decode as
+ *	00        Data OK
+ *	01        Lower threshold reached (tach only)
+ *	10        Reserved
+ *	11        Data unavailable
+ *    It is unclear if data is good if outside thresholds.
+ */
+typedef struct pm_rsp_tach {
+  uint8_t	stat_pwm;		/* Status bits for PWM */
+  uint8_t	stat_tach;		/* Status bits for TACH */
+  uint32_t	fan_pwm;		/* Fan power, in % */
+  uint32_t	fan_tach;		/* Fan speed, in RPM */
+} PmRspTach;
+
+
+/*
+ * Get thermal throttle status
+ * REQ_THRM notes:
+ *  - Duration value is subject to availability in the SMC.
+ *    The top two status bit of this SMC register is provided
+ *    separately (and stripped from the read value). Decode as
+ *	00        Data OK
+ *	01        Reserved
+ *	10        Reserved
+ *	11        Data unavailable
+ */
+typedef struct pm_rsp_tttl {
+  uint8_t	thrm_ttl;		/* Power throttle asserted */
+  uint8_t	stat_dur;		/* Status bits duration */
+  uint32_t	duration;		/* Thermal throttle duration, in mSec */
+} PmRspTttl;
+
+
+/*
+ * Get/Set force trottle control
+ */
+typedef struct pm_rsp_fttl {
+  uint8_t	forced;			/* Forced power throttle asserted */
+} PmRspFttl;
+
+
+#pragma pack(pop)		/* Restore to sane conditions */
+
+#ifdef __cplusplus
+}	/* C++ guard */
+#endif
+
+#endif	/* Recursion block */
diff --git a/ras/micras.h b/ras/micras.h
new file mode 100644
index 0000000..faa3e91
--- /dev/null
+++ b/ras/micras.h
@@ -0,0 +1,536 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * RAS module common internal declarations
+ *
+ * Configuration flags, constants and function prototypes
+ * for the RAS sysfs, MT and MC module.
+ */
+
+#ifndef _MICRAS_H_
+#define _MICRAS_H_	1
+
+
+/*
+ * Public APIs first.
+ * Must be self-contained and independent of local tunables.
+ */
+
+#include "micras_api.h"
+#include "micmca_api.h"
+#include "micpm_api.h"
+
+
+/*
+ * Local configurables & tunables
+ */
+
+#define	USE_PM		1	/* Support power management */
+
+#define RAS_HALT	1	/* Panic on uncorrectable MCAs */
+
+#define I2C_SLOW        1       /* Default to lowest speed on I2C */
+
+#define USE_FSC         1       /* Allow using FSC MGBR/MGBSR protocol */
+#define USE_SVID        0       /* Allow using SVID for VR info */
+#define USE_SMC         1       /* Prefer SMC over SBOX (telemetry) */
+
+#define MT_TIMER        1       /* Enable periodic wakeup */
+#define MT_PERIOD       999     /* Period sleep (mS) */
+
+#define MCU_NMI         1       /* Use NMI in SBOX redirection table */
+
+#define EE_VERIFY	0	/* Verify all EEPROM writes */
+#define EE_PROC		1	/* Enable access to EEPROM from /proc/elog */
+#define EE_PROC_NEW	0	/* Only display events between head & tail */
+#define EE_INJECT	0	/* Enable writes to EEPROM via /proc/elog */
+
+#define BEAM_TEST	0	/* Neuter MC handling for beam test */
+
+#define MT_VERBOSE	0	/* Track MT activity in kernel log */ 
+#define MC_VERBOSE	0	/* Track MC activity in kernel log */ 
+#define PM_VERBOSE	0	/* Track PM activity in kernel log */ 
+
+#define GBOX_WORKING	0	/* Set to one when GBOX writes are stable */
+
+#define WA_4845465	0	/* Use HSD #4845465 workaround */
+
+#define ADD_DIE_TEMP	1	/* Embed die temperature in event reports */
+
+#define NOT_YET		0	/* 'Hide' code that's not currently in use */
+
+
+/*
+ * Useful macros
+ *TBD: Cast everything to 64 bit (ULL)?
+ *     For now all is 32 bit (U)
+ */
+
+#define GET_BITS(l,r,v)	(((v) >> (r)) & ((1U << ((l) - (r) +1)) -1))
+#define PUT_BITS(l,r,v)	(((v) & ((1U << ((l) - (r) +1)) -1)) << (r))
+
+#define GET_BIT(n,v)	GET_BITS((n), (n), (v))
+#define PUT_BIT(n,v)	PUT_BITS((n), (n), (v))
+
+
+/*
+ * Init/Exit functions
+ */
+
+extern void	mr_mt_init(void);
+extern void	mr_mt_exit(void);
+extern void	mr_mt_card_init(void);
+extern void 	mr_mt_card_exit(void);
+
+
+/*
+ * Command line options (exported from generic MCE handler)
+ */
+
+extern int	mce_disabled;
+
+
+/*
+ * MT opcode/function table.
+ * Resides in micras_main() and gates access though sysctls and SCIF.
+ */
+
+struct fnc_tab {
+  uint16_t	cmd;
+  uint8_t	simple;
+  uint8_t	privileged;
+  int		(*fnc)(void *);
+};
+
+extern int	micras_priv;
+extern int	micras_mt_call(uint16_t, void *);
+
+
+/*
+ * MT get functions
+ * Spread over micras_{common,knf,knc}.c
+ */
+extern int	mr_get_hwinf(void *);
+extern int	mr_get_vers(void *);
+extern int	mr_get_pver(void *);
+extern int	mr_get_freq(void *);
+extern int	mr_get_volt(void *);
+extern int	mr_get_power(void *);
+extern int	mr_get_plim(void *);
+extern int	mr_get_clst(void *);
+extern int	mr_get_gddr(void *);
+extern int	mr_get_gfreq(void *);
+extern int	mr_get_gvolt(void *);
+extern int	mr_get_temp(void *);
+extern int	mr_get_fan(void *);
+extern int	mr_get_ecc(void *);
+extern int	mr_get_trc(void *);
+extern int	mr_get_trbo(void *);
+extern int	mr_get_oclk(void *);
+extern int	mr_get_cutl(void *);
+extern int	mr_get_mem(void *);
+extern int	mr_get_os(void *);
+extern int	mr_get_proc(void *);
+extern int	mr_get_pmcfg(void *);
+
+/*
+ * MT set functions
+ * Spread over micras_{common,knf,knc}.c
+ */
+extern int	mr_set_freq(void *);
+extern int	mr_set_volt(void *);
+extern int	mr_set_plim(void *);
+extern int	mr_set_gfreq(void *);
+extern int	mr_set_gvolt(void *);
+extern int	mr_set_fan(void *);
+extern int	mr_set_trc(void *);
+extern int	mr_set_trbo(void *);
+extern int	mr_set_oclk(void *);
+
+
+/*
+ * MT cmd functions
+ */
+extern int	mr_cmd_pkill(void *);
+extern int	mr_cmd_ukill(void *);
+
+
+#if defined(CONFIG_ML1OM) && USE_FSC
+/*
+ * MT FSC access functions
+ * KnF specific, located in micras_knf.c
+ */
+extern int	mr_get_fsc(void *);
+extern int	mr_set_fsc(void *);
+#endif
+
+#if defined(CONFIG_MK1OM)
+/*
+ * MT SMC access functions
+ * KnC specific, located in micras_knc.c
+ */
+extern int	mr_get_smc(void *);
+extern int	mr_get_led(void *);
+extern int	mr_get_prochot(void *);
+extern int	mr_get_pwralt(void *);
+extern int	mr_get_perst(void *);
+extern int	mr_get_ttl(void *);
+
+extern int	mr_set_smc(void *);
+extern int	mr_set_led(void *);
+extern int	mr_set_prochot(void *);
+extern int	mr_set_pwralt(void *);
+extern int	mr_set_perst(void *);
+#endif
+
+
+#if defined(CONFIG_MK1OM) && USE_PM
+/*
+ * PM get functions
+ */
+extern int	pm_get_pl0(void *);
+extern int	pm_get_pl1(void *);
+extern int	pm_get_pavg(void *);
+extern int	pm_get_pttl(void *);
+extern int	pm_get_volt(void *);
+extern int	pm_get_temp(void *);
+extern int	pm_get_tach(void *);
+extern int	pm_get_tttl(void *);
+extern int	pm_get_fttl(void *);
+
+/*
+ * PM set functions
+ */
+extern int	pm_set_pl0(void *);
+extern int	pm_set_pl1(void *);
+extern int	pm_set_fttl(void *);
+#endif
+
+
+/*
+ * MC & TTL event distribution functions
+ * Spread over micras_{main,elog,core}.c
+ */
+
+#ifdef MR_MCE_PORT
+extern int	micras_mc_send(struct mce_info *, int);
+extern void	micras_mc_ipmi(struct mce_info *, int);
+extern void	micras_mc_log(struct mce_info *);
+extern uint32_t	micras_mc_filter(struct mce_info *, uint64_t, int);
+#endif
+#ifdef MR_TTL_PORT
+extern void	micras_ttl_send(struct ttl_info *);
+#endif
+
+
+/*
+ * BOX constants (card variations).
+ */
+
+#ifdef  CONFIG_ML1OM
+#define DBOX_NUM	1
+#define GBOX_NUM	4
+#endif
+
+#ifdef  CONFIG_MK1OM
+#define DBOX_NUM	2
+#define GBOX_NUM	8		/* Max count, SKU dependent */
+#define TBOX_NUM	8		/* Max count, SKU dependent */
+#endif
+
+#ifndef COMMON_MMIO_BOX_SIZE
+#define COMMON_MMIO_BOX_SIZE	(1<<16)
+#endif
+
+
+/*
+ * BOX utility functions
+ * Most located in micras_main.c
+ */
+
+extern char    *mr_sku(void);
+extern int	mr_mch(void);
+extern int	mr_txs(void);
+
+extern uint8_t *micras_sbox;
+extern uint8_t *micras_dbox[DBOX_NUM];
+extern uint8_t *micras_gbox[GBOX_NUM];
+#ifdef CONFIG_MK1OM
+extern uint8_t *micras_tbox[TBOX_NUM];
+#endif
+
+extern uint8_t *mr_sbox_base(int);
+extern uint32_t	mr_sbox_rl(int, uint32_t);
+extern void	mr_sbox_wl(int, uint32_t, uint32_t);
+extern uint64_t	mr_sbox_rq(int, uint32_t);
+extern void	mr_sbox_wq(int, uint32_t, uint64_t);
+
+extern uint8_t *mr_dbox_base(int);
+extern uint32_t	mr_dbox_rl(int, uint32_t);
+extern void	mr_dbox_wl(int, uint32_t, uint32_t);
+extern uint64_t	mr_dbox_rq(int, uint32_t);
+extern void	mr_dbox_wq(int, uint32_t, uint64_t);
+
+extern uint8_t *mr_gbox_base(int);
+extern uint32_t	mr_gbox_rl(int, uint32_t);
+extern void	mr_gbox_wl(int, uint32_t, uint32_t);
+extern uint64_t	mr_gbox_rq(int, uint32_t);
+extern void	mr_gbox_wq(int, uint32_t, uint64_t);
+
+#ifdef CONFIG_MK1OM
+extern uint8_t *mr_tbox_base(int);
+extern uint32_t	mr_tbox_rl(int, uint32_t);
+extern void	mr_tbox_wl(int, uint32_t, uint32_t);
+extern uint64_t	mr_tbox_rq(int, uint32_t);
+extern void	mr_tbox_wq(int, uint32_t, uint64_t);
+#endif
+
+
+/*
+ * Un-core MCA register offsets
+ * Some #defines stolen from FreeBSD uOS.
+ *
+ *TBD: check again when we get real register include files
+ */
+
+#define SBOX_MCX_CTL_LO                 0x00003090
+#define SBOX_MCX_STATUS_LO              0x00003098
+#define SBOX_MCX_STATUS_HI              0x0000309C
+#define SBOX_MCX_ADDR_LO                0x000030A0
+#define SBOX_MCX_ADDR_HI                0x000030A4
+#define SBOX_MCX_MISC                   0x000030A8
+#define SBOX_MCX_MISC2                  0x000030AC
+#define SBOX_MCA_INT_STAT               0x0000AB00
+#define SBOX_MCA_INT_EN                 0x0000AB04
+#define SBOX_COMPONENT_ID		0x00004134
+
+#define DBOX_MC2_CTL                    0x00000340
+#define DBOX_MC2_STATUS                 0x00000348
+#define DBOX_MC2_ADDR                   0x00000350
+
+#define GBOX_FBOX_MCA_CTL_LO            0x0000005C
+#define GBOX_FBOX_MCA_CTL_HI            0x00000060
+#define GBOX_FBOX_MCA_STATUS_LO         0x00000064
+#define GBOX_FBOX_MCA_STATUS_HI         0x00000068
+#define GBOX_FBOX_MCA_ADDR_LO           0x0000006C
+#define GBOX_FBOX_MCA_ADDR_HI           0x00000070
+#define GBOX_FBOX_MCA_MISC              0x00000074
+
+#ifdef CONFIG_MK1OM
+#define TXS_MCX_CONTROL                 0x00003700
+#define TXS_MCX_STATUS                  0x00003740
+#define TXS_MCX_ADDRESS                 0x00003780
+#endif
+
+
+/*
+ * Thermal register offsets
+ */
+
+#if defined(CONFIG_MK1OM) && WA_4845465
+#ifndef SBOX_MICROCONTROLLER_FAN_STATUS
+#define SBOX_MICROCONTROLLER_FAN_STATUS	0x1020
+#endif
+#endif
+#if defined(CONFIG_MK1OM) && (WA_4845465 || ADD_DIE_TEMP || USE_PM)
+#ifndef SBOX_THERMAL_STATUS_2
+#define SBOX_THERMAL_STATUS_2		0x1080
+#endif
+#endif
+
+
+/*
+ * SMP utilities
+ * Located in micras_main.c
+ */
+
+extern uint32_t	rd_cr4_on_cpu(int);
+extern void	set_in_cr4_on_cpu(int, uint32_t);
+extern void	clear_in_cr4_on_cpu(int, uint32_t);
+extern uint64_t	rdtsc(void);
+
+
+/*
+ * General EEPROM and POST card UART access
+ * Located in micras_elog.c
+ */
+
+#define EE_BUF_COUNT   		100
+#define EE_BUF_LINELEN 		256
+extern char	ee_buf[];
+extern atomic_t ee_msg;
+extern atomic_t	ee_seen;
+
+extern char *	ee_fmt(char *, va_list);
+extern int	ee_printk(char *, ...);
+extern int	ee_print(char *, ...);
+#ifdef CONFIG_MK1OM
+extern void	ee_list(void);
+extern void	ee_wipe(void);
+#endif
+extern int	ee_init(void);
+extern int	ee_exit(void);
+
+extern void	myDELAY(uint64_t);
+
+
+/*
+ * SMC access API
+ * Provided by the kernel
+ */
+
+extern int gmbus_i2c_read(uint8_t, uint8_t, uint8_t, uint8_t *, uint16_t);
+extern int gmbus_i2c_write(uint8_t, uint8_t, uint8_t, uint8_t *, uint16_t);
+
+
+/*
+ * RAS core MCA handling
+ * Located in micras_core.c
+ */
+
+extern uint8_t	xlat_cpu[NR_CPUS];
+extern void	mcc_sync(void);
+extern int	mcc_init(void);
+extern int	mcc_exit(void);
+extern void	mcc_flt_parm(uint8_t *);
+
+
+/*
+ * RAS un-core MCA handling
+ * Located in micras_uncore.c
+ */
+
+extern void	box_reset(int);
+extern int	mcu_init(void);
+extern int	mcu_exit(void);
+
+
+#if defined(CONFIG_MK1OM) && USE_PM
+/*
+ * RAS PM handling
+ * Located in micras_pm.c
+ *
+ * Power management registration exchange records:
+ * The RAS module populates a 'params' record and pass it to
+ * the PM module through the micpm_ras_register() function.
+ * In return the PM module populate the passed 'callbacks' record.
+ * The PM module is responsible for populating the lists of
+ * supported core frequencies and core voltages. In contrast to
+ * KnF, where the lists reflect the hardware capabilities, these
+ * reflect the actual frequencies and voltages that core-freq
+ * module can use to lower power consumption.
+ */  
+
+struct micpm_params {
+  uint32_t    * freq_lst;		/* Core frequency list */
+  uint32_t    * freq_len;		/* Core freq count */
+  uint32_t	freq_siz;		/* Space in core freq list */
+  uint32_t    * volt_lst;		/* Core voltage list */
+  uint32_t    * volt_len;		/* Core voltage count */
+  uint32_t	volt_siz;		/* Space in core volt list */ 
+  int	     (* mt_call)(uint16_t, void *); /* Access MT function */
+  void	     (* mt_ttl)(int, int);	    /* Throttle notifier */
+};
+
+struct micpm_callbacks {
+  int  (*micpm_get_turbo)(void);	/* Get PM turbo setting */
+  void (*micpm_set_turbo)(int);		/* Notify PM of new turbo setting */
+  void (*micpm_vf_refresh)(void);	/* Refresh core V/F lists */
+  int  (*micpm_get_pmcfg)(void);	/* Get PM operating mode */
+};
+
+extern struct micpm_params	pm_reg;
+extern struct micpm_callbacks	pm_cb;
+
+
+/*
+ * Args for mt_ttl() function
+ */
+
+#define TTL_OFF		0
+#define TTL_ON		1
+
+#define TTL_POWER	0
+#define TTL_THERMAL	1
+
+
+/*
+ * Bit locations for micpm_get_turbo() and micpm_set_turbo()
+ */
+
+#define MR_PM_MODE	(1 << 0)	/* Turbo mode */
+#define MR_PM_STATE	(1 << 1)	/* Current turbo state */
+#define MR_PM_AVAIL	(1 << 2)	/* Turbo mode available */
+
+
+/*
+ * Bit positions for the different features turned on/off
+ * in the uOS PM configuration, for micpm_get_pmcfg().
+ */
+
+#define PMCFG_PSTATES_BIT	0
+#define PMCFG_COREC6_BIT	1
+#define PMCFG_PC3_BIT		2
+#define PMCFG_PC6_BIT		3
+
+
+/*
+ * Register/Unregister functions in micpm driver that RAS calls
+ * during module init/exit.  Pointers to the exchanged data
+ * structures are passed during registration.
+ * The RAS module guarantee that the pointers are valid until
+ * the unregister function is called. That way the PM module can
+ * modify the core frequency/voltage lists if they gets changed.
+ * The callbacks must always either be a valid function pointer
+ * or a null pointer. 
+ */
+
+extern int	micpm_ras_register(struct micpm_callbacks *, struct micpm_params *);
+extern void	micpm_ras_unregister(void);
+
+extern int	mr_pm_ttl(struct mr_rsp_ttl *);
+extern int	pm_init(void);
+extern void	pm_exit(void);
+#endif
+
+		 	
+/*
+ * Debug tools
+ */
+
+extern void dmp_hex(void *, int, const char *, ...);
+
+#endif	/* Recursion block */
diff --git a/ras/micras_api.h b/ras/micras_api.h
new file mode 100644
index 0000000..7456fb2
--- /dev/null
+++ b/ras/micras_api.h
@@ -0,0 +1,1006 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * Definition of the public RAS Monitoring Thread interface.
+ * Access to RAS features are expected from SCIF and through
+ * nodes under '/sys/class/micras'. Both interfaces ends up
+ * in the same code and thus present the exact same data.
+ *
+ * Some information that are available elsewhere through standard
+ * Linux mechanism are included in this API, though things like
+ * process status (/proc/<pid>/stat), cpu status (/proc/stat),
+ * and memory status (/proc/vmstat) are better from the source.
+ */
+
+#ifndef _MICRAS_API_H_
+#define _MICRAS_API_H_		1
+
+#ifdef __cplusplus
+extern "C" {	/* C++ guard */
+#endif
+
+/*
+**
+** Configuration manifests
+**
+*/
+
+#pragma pack(push, 4)		/* Windos requirement */
+
+
+/*
+ * RAS module version info: M.NP
+ */
+
+#define	RAS_MAJOR	"1"
+#define	RAS_MINOR	"0"
+#define	RAS_PATCH	" "
+#define RAS_VER		RAS_MAJOR "." RAS_MINOR  RAS_PATCH
+
+
+/*
+ * RAS services in uOS kernel listens on this port for incoming queries.
+ * Consumers may establish multiple connections to this port, though no
+ * guarantee on connection processing order will be given. Transactions
+ * on a connection will be processed and replied to in order recieved.
+ */
+
+#define	MR_MON_PORT	SCIF_RAS_PORT_0
+#define	MR_SCIF_MAX	32
+
+
+/*
+ * Some array max sizes.
+ * These may be replaced by system wide constants
+ * if they become available in the source tree.
+ */
+
+#define MR_VERS_LEN	120	/* Version string lengths */
+#define MR_GUID_LEN	16	/* Global unique ID length (bytes) */
+#define MR_SENO_LEN	12	/* Serial number length (bytes) */
+#define MR_PVER_LEN	8	/* API version string length */
+#define MR_PTAB_LEN	64	/* PM freq/volt pairs */
+#define MR_DIES_LEN	9	/* Die temperatures */
+#define MR_BRDS_LEN	4	/* Board temp sensors */
+#define MR_GVND_LEN	16	/* GDDR vendor string length */
+#define MR_CORE_LEN	62	/* Max number of CPU cores */
+
+
+/*
+** Transaction header for requests and responses is a fixed size
+** record followed by an optional variable length data block.
+**
+** Fields usage:
+**      cmd	[15]	data field is error record
+**	cmd	[14]	response to opcode
+**      cmd	[13:0]	opcode
+**	len		length of payload
+**	parm		command parameter
+**	stamp		host side cookie, performance monitoring
+**	spent		processing time, performance monitoring
+**
+** Command codes:
+** Codes that directly relate to cores may set the 'parm' field to a
+** non-zero value to address one core (base 1) instead of them all.
+**
+*/
+
+typedef struct mr_hdr {
+	uint16_t	cmd;	/* Command field */
+	uint16_t	len;	/* Size of data payload */
+	uint32_t	parm;	/* Parameter field */	
+	uint64_t	stamp;	/* Time stamp of 'send' (set by host) */
+	uint64_t	spent;	/* Time used on response (rdtsc delta) */
+} MrHdr;
+
+#define MR_RESP		(1 << 14)	/* Response bit */
+#define MR_ERROR	(1 << 15)	/* Error bit */
+#define MR_OP_MASK	(MR_RESP - 1)	/* Opcode mask */
+
+#define MR_REQ_HWINF	1		/* Get hardware info */
+#define	MR_REQ_VERS	2		/* Get version strings */
+#define	MR_REQ_CFREQ	3		/* Get core frequencies */
+#define	MR_SET_CFREQ	4		/* Set core frequency */
+#define MR_REQ_CVOLT	5		/* Get core voltages */
+#define MR_SET_CVOLT	6		/* Set core voltage */
+#define MR_REQ_PWR	7		/* Get power metrics */
+#define MR_REQ_PLIM	8		/* Get power limit */
+#define MR_SET_PLIM	9		/* Set power limit */
+#define MR_REQ_CLST	10		/* Get core list */
+#define MR_ENB_CORE	11		/* Enable core */
+#define MR_DIS_CORE	12		/* Disable core */
+#define MR_REQ_GDDR	13		/* Get GDDR device info */
+#define MR_REQ_GFREQ	14		/* Get GDDR frequencies */
+#define MR_SET_GFREQ	15		/* Set GDDR frequency */
+#define MR_REQ_GVOLT	16		/* Get GDDR voltages */
+#define MR_SET_GVOLT	17		/* Set GDDR voltage */
+#define MR_REQ_TEMP	18		/* Get board temperatures */
+#define MR_REQ_FAN	19		/* Get fan status */
+#define MR_SET_FAN	20		/* Set fan power */
+#define MR_REQ_ECC	21		/* Get ECC mode */
+#define MR_SET_ECC	22		/* Set ECC mode */
+#define MR_REQ_TRC	23		/* Get debug trace level */
+#define MR_SET_TRC	24		/* Set debug trace level */
+#define MR_REQ_TRBO	25		/* Get turbo mode status */
+#define MR_SET_TRBO	26		/* Set turbo mode status */
+#define MR_REQ_OCLK	27		/* Get overclocking status */
+#define MR_SET_OCLK	28		/* Set overclocking status */
+#define MR_REQ_CUTL	29		/* Get core utilization */
+#define MR_REQ_MEM	30		/* Get memory utilization */
+#define MR_REQ_OS	31		/* Get OS status & process list */
+#define MR_REQ_PROC	32		/* Get process details */
+#define MR_REQ_THRD	33		/* Get thread details */
+#define MR_REQ_PVER	34		/* Get API version */
+#define MR_CMD_PKILL	35		/* Kill process */
+#define MR_CMD_UKILL	36		/* Kill processes owned by user */
+#define MR_GET_SMC	37		/* Get SMC register */
+#define MR_SET_SMC	38		/* Write SMC register */
+#define MR_REQ_PMCFG	39		/* Get PM config mode */
+#define MR_REQ_LED	40		/* Get LED mode */
+#define MR_SET_LED	41		/* Set LED mode */
+#define MR_REQ_PROCHOT	42		/* Get PROC hot trigger */
+#define MR_SET_PROCHOT	43		/* Set PROC hot trigger */
+#define MR_REQ_GPUHOT	42		/* Get GPU hot trigger */
+#define MR_SET_GPUHOT	43		/* Set GPU hot trigger */
+#define MR_REQ_PWRALT	44		/* Get power alert trigger */
+#define MR_SET_PWRALT	45		/* Set power alert trigger */
+#define MR_REQ_PERST	46		/* Get persistent triggers flag */
+#define MR_SET_PERST	47		/* Set persistent triggers flag */
+#define MR_REQ_TTL	48		/* Get Throttle state */
+#define MR_REQ_MAX	48		/* Max command code */
+
+
+/*
+**
+** Transaction error record:
+** If an error occurs during the handling of a request, an
+** error record is returned, possibly with supplemental info.
+**
+** Fields usage:
+**	err		code indication error condition
+**	len		size of additional data
+**
+** For now there is no definition on what supplemental info
+** should look like, but the idea is to open for a possibility
+** of giving very precise specification on what the error was.
+** Consider it a place holder for future use.
+**
+** Error codes:
+** Code 'NOMEM' means that space for response generation was unavailable.
+** Code 'NOVAL' is used to indicate that a valid request (i.e. a query
+** on something temporarily unavailable, like processor utilization on
+** a core in a sleep state) has no valid response.
+**
+*/
+
+typedef struct mr_err {
+	uint16_t	err;	/* Error code field */
+	uint16_t	len;	/* Length of additional error info */
+} MrErr;
+
+#define	MR_ERR_INVOP	1	/* Dofus, command/opcode invalid */
+#define MR_ERR_INVLEN	2	/* Dofus, length not valid for opcode */
+#define MR_ERR_INVAUX	3	/* Dofus, parm field not valid for opcode */
+#define MR_ERR_INVDATA	4	/* Dofus, content of data block invalid */
+#define MR_ERR_PERM	5	/* Failure, privileged command */
+#define MR_ERR_NOMEM	6	/* Failure, out of memory */
+#define MR_ERR_SMC	7	/* Failure, SMC communication */
+#define MR_ERR_NOVAL	8	/* Failure, no valid value to report */
+#define MR_ERR_UNSUP	9	/* Failure, not implemented (temporary) */
+#define MR_ERR_RANGE	10	/* Failure, parameter out of range */
+#define MR_ERR_PEND	11	/* Pending, internal use only */
+
+
+/*
+**
+** Response container structures below.
+**
+** Strings are returned in Pascal format (why?), i.e. pre-fixed
+** with a 1 byte length field and post-fixed with a 0 byte.
+**
+*/
+
+
+/*
+ * MIC Hardware Info
+ * REQ_HWINF Notes:
+ *  - no idea how to determine PCI-E slot, it's a host side thing.
+ *  - assume revision is same as model ID in the component ID register
+ *  - unique ID not available in all flash versions
+ *  - Hardware version codes are reported as-is, anticipating
+ *    recipient to know what the codes means.
+ */
+
+typedef struct mr_rsp_hwinf {
+  uint8_t	guid[MR_GUID_LEN];	/* Unique ID, from SMC */
+  uint8_t	board;			/* Board type, SMC HW 17:16 */
+  uint8_t	fab;			/* Fab version, SMC HW 10:8 */
+  uint8_t	sku;			/* SKU #, SMC HW 2:0 */
+  uint8_t	slot;			/* PCI-E slot, get from where ? */
+  uint8_t	rev;			/* Revision, component ID 16:19 */
+  uint8_t	step;			/* Stepping, component ID 12:15 */
+  uint8_t	substep;		/* Sub-stepping, component ID 8:11 */
+  uint8_t	serial[MR_SENO_LEN];	/* Serial number, from SMC */
+} MrRspHwInf;
+
+
+
+/*
+ * MIC API version
+ * REQ_PVER Notes:
+ *  - returns RAS_VER string the module was built with.
+ */
+
+typedef struct mr_rsp_pver {
+  char		api[MR_PVER_LEN];	/* Ras module version */
+} MrRspPver;
+
+
+
+/*
+ * MIC uOS/Flash version
+ * REQ_VERS Notes:
+ *  - unclear at this point what the lengths of these strings are.
+ *    The limit of 128 bytes is a 'best safe guess' and may change.
+ *  - KnF: My card has 3 flash strings, for now that's the count.
+ *  - KnC: Has fewer defined version strings, currently only fboot0
+ *         string has been defined.
+ */
+
+typedef struct mr_rsp_vers {
+  char		fboot0[MR_VERS_LEN];	/* Fboot 0 version */
+  char		fboot1[MR_VERS_LEN];	/* Fboot 1 version */
+  char		flash[3][MR_VERS_LEN];	/* Flash block versions */
+  char		uos[MR_VERS_LEN];	/* uOS kernel version */
+  char		fsc[MR_VERS_LEN];	/* Fan controller version */
+} MrRspVers;
+
+
+
+/*
+ * Core frequency
+ * REQ_CFREQ Notes:
+ *  - current is clock read from CURRENTRATIO register.
+ *  - default/requested clock is read from COREFREQ register.
+ *    In KnF, the CURRENTRATIO is not used and therefore
+ *    COREFREQ s reported as current speed and the default
+ *    is simply the first value registered (at module load).
+ *  - supported speeds are part of freq/voltage pairs maintained
+ *    by the cpu_freq driver as part of PM (cpu_freq driver).
+ *  - unclear if we should allow manual control (writes).
+ */
+
+typedef struct mr_rsp_freq {
+  uint32_t	cur;			/* Actual core speed in kHz */
+  uint32_t	def;			/* Set core speed in kHz */
+  uint32_t	slen;			/* Supported count */
+  uint32_t	supt[MR_PTAB_LEN];	/* Supported speed list in kHz */
+} MrRspFreq;
+
+/*
+ * Set core frequency
+ * New frequency (in kHz) passed in MrHdr.parm
+ * SET_CFREQ Notes:
+ *  - need to turn off PM for this to stick
+ */
+
+
+
+/*
+ * Core voltage
+ * REQ_CVOLT Notes:
+ *  - KnF: Two core voltages; current voltage set from COREVOLT
+ *         register and sense1 read in the BOARD_VOLTAGE_SENSE register.
+ *  - KnC: 3 potential sources; SVID, SMC, and SBOX registers.
+ *         SBOX regs require SMC telemetry which is uncertain.
+ *         SVID does not work in A0, B0 is TBD.
+ *         SMC will eventually relay VR data.
+ *         Only SVID gives both set and actual values.
+ *         Only SMC sets c_val field, zero is good.
+ *  - Supported voltages are either determined from what the VRs
+ *    can support or if PM is active it is part of the freq/voltage pairs
+ *    maintained by the cpu_freq driver as part of PM (cpu_freq driver).
+ */
+
+typedef struct mr_rsp_volt {
+  uint32_t	cur;			/* Core voltage read in uV */
+  uint32_t	set;			/* Core voltage set in uV */
+  uint8_t	c_val;			/* Valid bits, volt read */
+  uint32_t	slen;			/* Supported count */
+  uint32_t	supt[MR_PTAB_LEN];	/* Supported voltage list in uV */
+} MrRspVolt;
+
+/*
+ * Set core voltage
+ * New voltage passed in MrHdr.parm
+ * SET_CVOLT Notes:
+ *  - need to turn off PM for this to stick
+ *  - Unclear if we should allow manual control through this API.
+ */
+
+
+
+/*
+ * Card power
+ * REQ_PWR Notes
+ *  - Power status only avalable on KnC via SMC query
+ *  - VR status on KnC may come from VRs directly or from SMC query
+ *  - VR status on KnF comes from SBOX registers (telemtry)
+ *  - If available, status bits from query is provided, zero is good.
+ */
+
+typedef struct mr_rsp_pws {		/* Power sensor status */
+  uint32_t	prr;			/* Current reading, in uW */
+  uint8_t	p_val;			/* Valid bits, power */
+} MrRspPws;
+
+typedef struct mr_rsp_vrr {		/* Voltage regulator status */
+  uint32_t	pwr;			/* Power reading, in uW */
+  uint32_t	cur;			/* Current, in uA */
+  uint32_t	volt;			/* Voltage, in uV */
+  uint8_t	p_val;			/* Valid bits, power */
+  uint8_t	c_val;			/* Valid bits, current */
+  uint8_t	v_val;			/* Valid bits, voltage */
+} MrRspVrr;
+
+typedef struct mr_rsp_power {
+  MrRspPws	tot0;			/* Total power, win 0 */
+  MrRspPws	tot1;			/* Total power, win 1 */
+  MrRspPws	inst;			/* Instantaneous power */
+  MrRspPws	imax;			/* Max instantaneous power */
+  MrRspPws	pcie;			/* PCI-E connector power */
+  MrRspPws	c2x3;			/* 2x3 connector power */
+  MrRspPws	c2x4;			/* 2x4 connector power */
+  MrRspVrr	vccp;			/* Core rail */
+  MrRspVrr	vddg;			/* Uncore rail */
+  MrRspVrr	vddq;			/* Memory subsystem rail */
+} MrRspPower;
+
+
+
+/*
+ * Power envelope
+ * REQ_PLIM Notes:
+ *  - power envelope is a PM property. A physical limit
+ *    is given to PM, which then calculate derivative high
+ *    and low water mark figures.
+ *  - values are retrieved from PM module
+ */
+
+typedef struct mr_rsp_plim {
+  uint32_t	phys;			/* Physical limit, in W */
+  uint32_t	hmrk;			/* High water mark, in W */
+  uint32_t	lmrk;			/* Low water mark, in W */
+} MrRspPlim;
+
+/*TBD
+ * Set power envelope
+ * New value passed in MrHdr.parm
+ * SET_PLIM Notes:
+ *  - not sure if setting this should be allowed at all.
+ */
+
+
+
+/*
+ * Core information
+ * REQ_CLST Notes:
+ *  - for the average user a core count is all required, since
+ *    logically the cores are _always_ enumerated 0 .. <n>-1.
+ *    Physical enumeration, such as ring stop, are not useful.
+ *  - perhaps this request should return the CPU bitfields from
+ *    the uOS of offline, online, possible, and present masks.
+ *    Would allow watching of PM activity.
+ */
+
+typedef struct mr_rsp_clst {
+  uint16_t	count;			/* Cores present */
+  uint16_t	thr;			/* Threads per core */
+} MrRspClst;
+
+
+/*
+ * Set core enable/disable
+ * Core id & set/reset value passed in MrHdr.parm
+ * ENB_CORE/DIS_CORE Notes:
+ *  - uOS Linux does not have write access to HW config in SPI flash.
+ *    No way to enable/disable cores
+ *  - only listed here since if compatibility with FreeBSD is needed.
+ */
+
+
+
+/*
+ * Memory device info
+ * REQ_GDDR Notes:
+ *  - This is read from scratch9, i.e. provided by bootstrap.
+ */
+
+typedef struct mr_rsp_gddr {
+  char		dev[MR_GVND_LEN];	/* Device vendor */
+  uint16_t	rev;			/* Device revision */
+  uint32_t	size;			/* Device size, in Mbit/device */
+  uint32_t	speed;			/* Transactions speed, kT/sec */
+} MrRspGddr;
+
+
+
+/*
+ * GDDR frequencies
+ * REQ_GFREQ Notes:
+ *  - current clock can be read from MEMORYFREQ register
+ *  - the GDDR nominal frequency is reported
+ *  - the supported frequency list contains values that PLLs
+ *    are capable of producing.  Info is of limited use, since
+ *    there is no way to control the GDDR frequency (locked by fuses).
+ */
+
+typedef struct mr_rsp_gfreq {
+  uint32_t	cur;			/* Current GDDR speed in kHz */
+  uint32_t	def;			/* Default GDDR speed in kHz */
+  uint32_t	slen;			/* Supported count */
+  uint32_t	supt[MR_PTAB_LEN];	/* Supported speeds list in kHz */
+} MrRspGfreq;
+
+/*
+ * Set GDDR frequency
+ * New frequency passed in MrHdr.parm
+ * SET_GFREQ Notes:
+ *  - uOS cannot alter the PLLs because it requires retraining, which
+ *    causes loss of memory content.
+ *  - KnF: uOS does not have write access to SPI flash, which is required
+ *         to modify the GDDR frequency at next reboot.
+ *  - KnC: GDDR frequency is hard locked by fuses, cannot change, ever!!! 
+ */
+
+
+
+/*
+ * GDDR voltages
+ * REQ_GVOLT Notes:
+ *  - KnF: Two GDDR voltages; current voltage set from MEMVOLT
+ *         register and sense2 from BOARD_VOLTAGE_SENSE register.
+ *         MEMVOLT register always returns zero, only sense2
+ *         actually returns something useful in current Si.
+ *  - KnC: 3 potential sources; SVID, SMC, and SBOX registers.
+ *         SBOX regs require SMC telemetry which is uncertain.
+ *         SVID does not work in A0, B0 is TBD.
+ *         SMC will eventually relay VR data
+ *         Only SVID gives both set and actual values.
+ *         Only SMC sets c_val field, zero is good.
+ *  - Supported voltages reported are voltages the VRs can be programmed
+ *    to supply. Info is of limited use, since there is no way to control
+ *    the GDDR voltage (locked by fuses).
+ */
+
+typedef struct mr_rsp_gvolt {
+  uint32_t	cur;			/* GDDR voltage read in uV */
+  uint32_t	set;			/* GDDR voltage set in uV */
+  uint8_t	c_val;			/* Valid bits, volt read */
+  uint32_t	slen;			/* Supported count */
+  uint32_t	supt[MR_PTAB_LEN];	/* Supported voltage list in uV */
+} MrRspGvolt;
+
+/*
+ * Set GDDR voltage
+ * New voltage passed in MrHdr.parm
+ * SET_GVOLT Notes:
+ *  - uOS cannot alter the VR settings at all. Even if it could
+ *    then it still clash with the need to retrain and memory loss.
+ *  - KnF: uOS does not have write access to SPI flash, which is required
+ *         to modify the GDDR voltage at next reboot.
+ *  - KnC: GDDR voltage is hard locked by fuses, cannot change, ever!!! 
+ */
+
+
+
+/*
+ * Board temperatures
+ * REQ_TEMP Notes:
+ *  - CPU die temps can be read from THERMAL_STATUS (highest
+ *    of several sensors) and CURRENT_DIE_TEMP registers.
+ *    The die sensors values do not match the status
+ *    value, so the conversion formula or calibration
+ *    needs a re-visit.
+ *  - If we could get at them, we could provide readings
+ *    from the following devices, but are they all useful?
+ *      Fan inlet sensor
+ *      Fan exhaust sensor
+ *      GDDR temp (one chip is measured) sensor
+ *      Vccp VR
+ *      Vddg VR
+ *      Vddq VR
+ *  - most devices report current and maximum temperatures in
+ *    degrees Celcius as a signed integer, 9 bits for die temp
+ *    and 8 bits for voltage regulators, 12 bit for sensors.
+ */
+
+typedef struct mr_rsp_tsns {
+  int16_t	cur;			/* Current temperature, in C */
+  int8_t	c_val;			/* Valid bits, if available */
+} MrRspTsns;
+
+typedef struct mr_rsp_tdie {
+  int16_t	cur;			/* Current temperature, in C */
+  int16_t	max;			/* Maximum temperature, in C */
+} MrRspTdie;
+
+typedef struct mr_rsp_temp {
+  MrRspTsns	die;			/* Highest on-die measure */
+  MrRspTdie	dies[MR_DIES_LEN];	/* All on-die measures */
+  MrRspTsns	brd;			/* Highest board measure */
+  MrRspTsns	fin;			/* Fan inlet */
+  MrRspTsns	fout;			/* Fan outlet */
+  MrRspTsns	gddr;			/* Gddr device */
+  MrRspTsns	vccp;			/* Vccp VR */
+  MrRspTsns	vddg;			/* Vddg VR */
+  MrRspTsns	vddq;			/* Vddq VR */
+} MrRspTemp;
+
+
+
+/*
+ * Fan speed
+ * REQ_FAN Notes:
+ *  - fan status is reported in RPM and it's control is
+ *    a pulse with modulation ratio to 255, i.e. 0 is min,
+ *    127 is ~50% and 255 is max.
+ *  - the card has logic for controlling two fans.
+ *    Only one is used and we only report status for one.
+ */
+
+typedef struct mr_rsp_fan {
+  uint16_t	rpm;			/* Fan speed, rpm */
+  uint8_t	pwm;			/* Active PWM ratio, 0..255 */
+  uint8_t	override;		/* Override flag */
+  uint8_t	r_val;			/* Valid bits, speed */
+  uint8_t	p_val;			/* Valid bits, PWM */
+} MrRspFan;
+
+/*
+ * Set fan speed
+ * Control is passed in MrHdr.parm (struct fits into 32 bit)
+ * SET_FAN Notes:
+ *  - this may collide with OOB methods (such as IPMI)
+ *    that has priority, no guarantee this will stick.
+ *  - changing fan speed parameters may interfere
+ *    with PM in undefined ways.
+ */
+
+typedef struct mr_set_fan {
+  uint8_t	override;		/* Override enable flag */
+  uint8_t	pwm;			/* Force PWM ratio, 0..255 */
+} MrSetFan;
+
+
+
+/*
+ * Error correction mode
+ * REQ_ECC Notes:
+ *  - retrieve this info from one (any) of the gboxes.
+ */
+
+typedef struct mr_rsp_ecc {
+  uint32_t	enable;			/* ECC mode: 1 enabled, 0 disabled */
+} MrRspEcc;
+
+/*
+ * Set error correction mode
+ * New mode passed in MrHdr.parm
+ * SET_ECC Notes:
+ *  - ECC cannot be changed on the fly by uOS, requires retraining
+ *    of GDDR which causes loss of memory content.
+ *  - uOS Linux does not have write access to HW config in SPI flash.
+ *    No way to change ECC enable/disable setting.
+ */
+
+
+
+/*
+ * Trace level
+ * REQ_TRC Notes:
+ *  - No idea what support this has in uOS Linux.
+ */
+
+typedef struct mr_rsp_trc {
+  uint32_t	lvl;			/* Debug trace level */
+} MrRspTrc;
+
+/*
+ * Set trace level
+ * New level passed in MrHdr.parm
+ * SET_TRC Notes:
+ *  - No idea what this does in uOS Linux (nothing yet).
+ */
+
+
+
+/*
+ * Turbo setting
+ * REQ_TRBO Notes:
+ *  - Retrieve current actual turbo mode and state
+ *  - 'set' value: 1 if enabled, 0 otherwise
+ *  - 'state' value: 1 if active, 0 otherwise
+ *  - 'avail' value: 1 if TRBO supported, 0 otherwise
+ */
+
+typedef struct mr_rsp_trbo {
+  uint8_t	set;			/* Turbo mode */
+  uint8_t	state;			/* Turbo state */
+  uint8_t	avail;			/* Turbo mode available */
+  uint8_t	pad;			/* Pad to 32 bit */
+} MrRspTrbo;
+
+/*
+ * Set turbo mode
+ * New mode passed in MrHdr.parm
+ * SET_TRB Notes:
+ *  - Set always allowed, but silently ignored is not available.
+ */
+
+
+
+/*
+ * LED override
+ * REQ_LED Notes:
+ * - KnC: Retrieve current LED mode setting, 0=normal, 1=identify
+ * - KnF: not implemented (error MR_ERR_UNSUP)
+ */
+
+typedef struct mr_rsp_led {
+  uint32_t	led;			/* LED mode setting */
+} MrRspLed;
+
+/*
+ * Set LED mode
+ * New mode passed in MrHdr.parm
+ * SET_LED Notes:
+ * - KnC: Mode values
+ *     0 is normal SMC control (fast blink)
+ *     1 is identify mode (2 blinks every 2 seconds)
+ * - KnF: not implemented (error MR_ERR_UNSUP)
+ */
+
+
+
+/*
+ * Overclocking
+ * REQ_OCLK Notes:
+ *  - Curently no idea how to represent overclocking state 
+ *  - Overclocking not supported, return MR_RSP_NOVAL
+ */
+
+typedef struct mr_rsp_oclk {
+  uint32_t	freq;			/* Over clocking setting */
+} MrRspOclk;
+
+/*
+ * Set overclocking mode
+ * New mode passed in MrHdr.parm
+ * SET_OCLK Notes:
+ *  - Overclocking not supported, return MR_RSP_NOVAL
+ */
+
+
+
+/*
+ * Processor utilization (OS status)
+ * REQ_CUTL Notes:
+ *  - returned info is a simple sum of 4 logical CPUs
+ *  - the counter units returned are Linux kernel jiffies,
+ *    typically in range 1 - 10 ms, based on continous
+ *    counters maintained by the kernel. The number of
+ *    jiffies per second is reported for scaling purposes.
+ *    In order to get a current 'utilization' figure, the
+ *    host needs to query the counters at regular intervals
+ *    and use this formula to achieve a percentage:
+ *       u = ((c2 - c1) / (t2 - t1)) * 100
+ *    or
+ *       u = ((c2 - c1) * 100) / (t2 - t1)
+ *    where t2 - t1 = elapsed jiffies between samples
+ *          c2 - c1 = usage jiffy counts between samples
+ *  - the listed counters does not add up to cover the
+ *    wall clock time exactly, sampling errors do occur.
+ *  - counters for iowait, irq, and softirq are not included.
+ *  - jiffy counters are updated by the timer tick interrupt
+ *    handler. It's accuracy is known to be limited, see
+ *    Documentation/cpu-load.txt for details.
+ *  - counters are reported regardless of core sleep states
+ */
+
+typedef struct mr_rsp_ccnt {
+  uint64_t	user;			/* Normal user mode jiffies */
+  uint64_t	nice;			/* 'Nice' user mode jiffies */
+  uint64_t	sys;			/* System mode jiffies */
+  uint64_t	idle;			/* Idle time jiffies */
+} MrRspCcnt;
+
+typedef struct mr_rsp_cutl {
+  uint32_t	tck;			/* Actual jiffs/sec (scaled by 256) */
+  uint16_t	core;			/* Cores reported on */
+  uint16_t	thr;			/* Threads per core */
+  uint64_t	jif;			/* Jiffy counter at query time */
+  MrRspCcnt	sum;			/* System wide counters */
+  MrRspCcnt	cpu[MR_CORE_LEN];	/* Counters per core */
+} MrRspCutl;
+
+
+
+/*
+ * Memory utilization (OS status)
+ * REQ_MEM Notes:
+ *  - memory snapshot is obtained from kernel structs.
+ *    No walk of page descriptors is performed.
+ *  - Not all memory stats are visible (exported to) modules.
+ *
+ *TBD:
+ * - Need clarification on what memory utilization means.
+ *   For now the total, free and buffer memory is reported.
+ */
+
+typedef struct mr_rsp_mem {
+  uint32_t	total;			/* Total usable RAM in kB */
+  uint32_t	free;			/* Free memory in kB */
+  uint32_t	bufs;			/* Buffer storage in kB */
+} MrRspMem;
+
+
+
+/*
+ * Process management (OS status)
+ * REQ_OS/REQ_PROC/REQ_THRD Notes:
+ * - split in 3 levels of detail:
+ *     1) Get set of applications (exclude kernel processes and threads)
+ *     2) Get details on specified application (pid in MrHdr.parm),
+ *        which includes a thread pid list (up to 256 threads).
+ *     3) Get details on specific thread (thread id in MrHdr.parm)
+ *   Opcodes 2 and 3 will, apart from thread list, mostly report the same
+ *   set of details. What needs monitoring (see 'man proc', section on
+ *   /proc/<pid>/stat and /proc/<pid>/status for what's available)?
+ * - process time counters are continuous, so if any ratio between
+ *   the time a process/thread spends and actual wall clock time is
+ *   to be calculated, the same logic for dynamic display applies as
+ *   for the CUTL counters. I.e. a jiffy stamp is needed in the reply.
+ *TBD:
+ * - Introduce some sanity in time measurements.
+ * - Level 3 (thread details) is not implemented (is it needed ?).
+ * - Add ppid & credentials in MrRspProc? Needed to make a "top" display.
+ */
+
+typedef struct mr_rsp_os {
+  uint64_t	uptime;			/* Seconds since OS boot */
+  uint64_t	loads[3];		/* 1, 5, 15 minute load average */
+  uint32_t	alen;			/* Application count */
+  uint32_t	apid[256];		/* Application PIDs */
+} MrRspOs;
+
+typedef struct mr_rsp_proc {
+  uint32_t	pid;			/* Process ID */
+  char		name[16];		/* Program name (less path) */
+  uint64_t	utime;			/* User time in uS */
+  uint64_t	stime;			/* System time in uS */
+  uint64_t	etime;			/* Elapsed time in uS */
+  uint32_t	rss;			/* Resident set, in kB */
+  uint32_t	vm;			/* VM size, in kB */
+  uint32_t	tlen;			/* Thread count */
+  uint32_t	tpid[256];		/* Process threads */
+} MrRspProc;
+
+
+
+/*
+ * Terminate process
+ * Signal passed in MrHdr.parm bits 31:24 (see 'kill -l')
+ * Process ID passed in MrHdr.parm bits 23:0 (see /proc/sys/kernel/pid_max)
+ * CMD_PKILL Notes:
+ * - This is specifically for MPI style cluster managers
+ *   who wants to rid the card of a specific process.
+ * - Processes owned by users ID's less than 500 are immune to this.
+ */
+
+
+
+/*
+ * Terminate user
+ * Signal passed in MrHdr.parm bits 31:24 (see 'kill -l')
+ * User ID passed in MrHdr.parm bits 23:0 (see /etc/login.defs).
+ * CMD_UKILL Notes:
+ * - This is specifically for MPI style cluster managers to
+ *   rid the card of processes owned by a specific user ID.
+ * - User ID's below 500 will silently be ignored.
+ */
+
+
+
+/*
+ * Read SMC register
+ * MR_GET_SMC Notes:
+ * - Both SMC and FSC devices are accessed through I2C busses, which
+ *   means that retrieval will be slow (order of milli seconds).
+ * - KnC: allows direct access to the SMC CSRs, which can be read
+ *        or written in any random order.
+ *        SMC CSR definitions are not within the scope of this API.
+ *        Register number passed in MrHdr.parm bits 7:0 (8 bits).
+ *        SMC registers are 32 bit, except one (UUID) that is 16 byte.
+ * - KnF: allows direct access to the fan speed controller (FSC)
+ *        status registers on board temp and power sensors.
+ *        The FSC execute command register every 50 mSec, which means
+ *        that register needs 'SET' and hold for 50 mSec before any
+ *        value can be returned. For telemetry data the SET is done
+ *        implicitly, all other has to execute a 'SET' before running
+ *        a 'GET' command.
+ *        
+          FSC register definitions are not within the scope of this API.
+ *        All sensor data returns are 8 bit wide.
+ */
+
+typedef struct mr_rsp_smc {
+  uint8_t	reg;			/* Register number */
+  uint16_t	width;			/* Valid return bytes (4 or 16) */
+  union {
+    uint32_t	val;			/* Requested register value */
+    uint8_t	uuid[16];		/* Unique identifier */
+    uint8_t	serial[12];		/* Card serial number */
+  } rtn;
+} MrRspSmc;
+
+/*
+ * Write SMC register
+ * Register number passed in MrHdr.parm bits 31:24 (8-bit address decode).
+ * Register value passed in MrHdr.parm bits 23:0 (24 bit data).
+ * MR_SET_SMC Notes:
+ * - Improper use of this command can cause thermal shutdown of the card.
+ * - Improper use can interfere with power management.
+ * - KnC: For security reasons only the following registers are writeable:
+ *        20, 22				IPMI <not documented>
+ *        2b, 2c, 2d, 2f, 30, 31, 32, 33	PM control parameters
+ *        4b					Fan Adder
+ *        60					LED control
+ *        No SMC registers of interest are more than 16 bits wide.
+ * - KnF: For security reasons only the followingregisters are writable:
+ *        0		 Fan 1 Speed Override
+ *        1		 Power Management and Control Config
+ *        11		 General Status command
+ *        Selector is 8 bits wide and only valid values are
+ *        20, 21, 22, 23			Power sensors, 1s avg.
+ *        30, 31, 32, 33			Power sensors, 1 sample
+ *        a1, a2, a3, a4, a5			Max temps
+ */
+
+
+
+/*
+ * Get PM config mode
+ * REQ_PMCFG notes:
+ *  - Return value is reported 'as-is' from the PM module.
+ */
+
+typedef struct mr_rsp_pmcfg {
+  uint32_t	mode;			/* Current PM operation mode */
+} MrRspPmcfg;
+
+
+
+/*
+ * Read Power triggers
+ * Consist of two trigger points (power,time), which can be calculated
+ * from SKU at card power-on or be persistent across reboots.
+ * At trigger (PROCHOT), GPU Hot gets asserted
+ * At trigger (PWRALT), Power Alert gets asserted
+ *
+ * MR_REQ_PROCHOT, MR_REQ_PWRALT Notes:
+ * - KnC: Read SMC registers for trigger 0 and 1 respectively.
+ *        GPUHOT: registers 0x2c and 0x2d
+ *        PWRALT: registers 0x2f and 0x30
+ * - KnF: not implemented (error MR_ERR_UNSUP)
+ */
+
+typedef struct mr_rsp_ptrig {
+  uint16_t	power;			/* Power limit, Watt */
+  uint16_t	time;			/* Time windows, mSec */
+} MrRspPtrig;
+
+/*
+ * Write Power triggers
+ * MR_SET_PROCHOT, MR_SET_PWRALT Notes
+ * Structure MrRspPtrig passed in MrHdr.parm
+ * Trigger PROCHOT.power must be higher than trigger PWRALT.power.
+ * - KnC: Write SMC registers for trigger 0 and 1 respectively.
+ *        GPUHOT: registers 0x2c and 0x2d
+ *        PWRALT: registers 0x2f and 0x30
+ * - KnF: not implemented (error MR_ERR_UNSUP)
+ * Warning: MT does not check for GPUHOT.power >= PWRALT.power.
+ *TBD: Should it?
+ *     It is anticipated that changes follows reads, i.e. checking
+ *     can be checked in application software. 
+ */
+
+
+
+/*
+ * Read Persistent Power triggers flag
+ * If set, changes to Power Triggers will be permanent
+ * MR_REQ_PERST Notes:
+ * - KnC: Reads bit 0 of SMC register 0x32
+ * - KnF: not implemented (error MR_ERR_UNSUP)
+ */
+
+typedef struct mr_rsp_perst {
+  uint32_t	perst;			/* Persistent power triggers */
+} MrRspPerst;
+
+/*
+ * Write Persistent Power triggers flag
+ * New value passed in MrHdr.parm
+ * MR_SET_PERST Notes:
+ * - KnC: Writes bit 0 of SMC register 0x32
+ * - KnF: not implemented (error MR_ERR_UNSUP)
+ */
+
+
+/*
+ * Read Throttle states
+ * Returns status of current and previous throttle state
+ * retrieved from the card side PM module.
+ * MR_REQ_TTL Notes:
+ *  - KnC: Calls PM for latest information.
+ *         Note that the 'active' flags can toggle very often,
+ *         which may make it less informative for display.
+ *         Time tracked in jiffies, not true mSec resolution.
+ *  - KnF: not implemented (error MR_ERR_UNSUP)
+ */
+
+typedef struct mr_rsp_tstat {
+  uint8_t	active;		/* Currently active */
+  uint32_t	since;		/* Length of current throttle, mSec */
+  uint32_t	count;		/* Number of throttles */
+  uint32_t	time;		/* Total time throttled, mSec */
+} MrRspTstat; 
+
+typedef struct mr_rsp_ttl {
+  MrRspTstat	thermal;	/* Thermal throttle state */
+  MrRspTstat	power;		/* Power throttle state */
+  MrRspTstat	alert;		/* Power alert state */
+} MrRspTtl;
+  
+
+#pragma pack(pop)		/* Restore to entry conditions */
+
+#ifdef __cplusplus
+}	/* C++ guard */
+#endif
+
+#endif	/* Recursion block */
diff --git a/ras/micras_common.c b/ras/micras_common.c
new file mode 100644
index 0000000..4011ec0
--- /dev/null
+++ b/ras/micras_common.c
@@ -0,0 +1,968 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * RAS MT module, common code
+ *
+ * Code and data structures to handle get/set tasks for KnC and KnF.
+ * Parties accessing the data structures are supposed to use the
+ * micras_mt_tsk() routines to ensure integrity and consistency.
+ * Particularly important when handling sysfs nodes and actions
+ * requested from SCIF connections must use that method in order
+ * to guarantee serialized access.
+ *
+ * Even if read-only access to latest valid data is required,
+ * it should go through micras_mt_tsk() using dedicated handlers
+ * in this module.
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/io.h>
+#include <linux/utsname.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <linux/tick.h>
+#include <linux/kernel_stat.h>
+#include <linux/bitmap.h>
+#include <generated/compile.h>
+#include <generated/utsrelease.h>
+#include <asm/mic/mic_knc/autobaseaddress.h>
+#include <asm/mic/mic_knc/micsboxdefine.h>
+#include "micras.h"
+
+
+/*
+ * Persistent data accessible through the CP api.
+ * Some functions just read/modify hardware CSRs
+ * and thus need no storage between invocations.
+ */
+
+       struct mr_rsp_hwinf	hwinf;  /* Card specific */
+       struct mr_rsp_vers	vers;	/* Card specific */
+static struct mr_rsp_pver	pver;
+       struct mr_rsp_freq	freq;	/* Card specific */
+       struct mr_rsp_volt	volt;	/* Card specific */
+       struct mr_rsp_power	power;	/* Card specific */
+       struct mr_rsp_plim	plim;	/* Card specific */
+static struct mr_rsp_clst	clst;
+       struct mr_rsp_gddr	gddr;
+       struct mr_rsp_gfreq	gfreq;	/* Card Specific */
+       struct mr_rsp_gvolt	gvolt;	/* Card specific */
+       struct mr_rsp_temp	temp;	/* Card specific */
+       struct mr_rsp_ecc	ecc;	/* Card specific */
+static struct mr_rsp_trc	trc;
+       struct mr_rsp_trbo	trbo;	/* Card specific */
+       struct mr_rsp_pmcfg	pmcfg;	/* Card specific */
+
+
+/*
+ * Map of SKUs for KnX cards (currently known, will change)
+ * The SKU is identified solely from the PCIe ID and sub-ID.
+ * A zero sub-ID is a don't care.
+ *
+ *TBD: core counts in KnF needs update, not all have 32.
+ *
+ * Notes:
+ * - Unless the PCIe subID differs, there are two 2250 cards
+ *   that can't be distinguished from each other, one has 8 TXs
+ *   and the other has none. PO cards -> impact only internal.
+ * - Not sure exactly what 2254 is, suspect MPI prototype.
+ */
+
+#define	VD(v, d)	(PUT_BITS(15,0,(v)) | PUT_BITS(31,16,(d)))
+
+static struct sku {
+  uint32_t	devID;		/* PCIe Vendor and device ID */
+  uint32_t	subID;		/* PCIe Sub- Vendor and device ID */
+  uint8_t	revNo;		/* PCIe Revision number */
+  uint8_t	cr;		/* Core count */
+  uint8_t	ch;		/* Memory channels */
+  uint8_t	tx;		/* TX samplers (only in KnC) */
+  char	      * name;		/* SKU name */
+} skuList[] = {
+  { VD(0x8086, 0x2240), 0, 0x00, 32, 8, 0, "E1" },		/* KnF */
+  { VD(0x8086, 0x2241), 0, 0x00, 32, 8, 0, "E2" },		/* KnF */
+  { VD(0x8086, 0x2242), 0, 0x00, 32, 8, 0, "E3" },		/* KnF */
+  { VD(0x8086, 0x2243), 0, 0x00, 32, 8, 0, "E3" },		/* KnF */
+  { VD(0x8086, 0x2249), VD(0x8086, 0xed08), 0, 32, 4, 0, "Ed" }, /* KnF */
+  { VD(0x8086, 0x2249), VD(0x8086, 0xed0a), 0, 32, 4, 0, "Eb" }, /* KnF */
+  { VD(0x8086, 0x224a), 0, 0x00, 32, 8, 0, "Eb" },		/* KnF */
+
+  { VD(0x8086, 0x2250), 0, 0x00, 60, 16, 0, "SKU1/SKU2" },	/* KnC: ES1, ES1B */
+  { VD(0x8086, 0x2250), 0, 0x10, 60, 16, 0, "SKU2" },		/* KnC: ES2 */
+  { VD(0x8086, 0x2250), 0, 0x11, 60, 16, 0, "SKU2" },		/* KnC: Mkt2 */
+  { VD(0x8086, 0x2250), 0, 0x20, 60, 16, 0, "SKU2" },
+  { VD(0x8086, 0x2251), 0, 0x00, 48, 16, 8, "SKU2" },
+  { VD(0x8086, 0x2252), 0, 0x00, 48, 16, 0, "SKU3" },
+  { VD(0x8086, 0x2253), 0, 0x00, 40,  8, 0, "SKU4/SKU5" },	/* KnC: ES0, ES1 */
+  { VD(0x8086, 0x2253), 0, 0x10, 40,  8, 0, "SKU5" },
+  { VD(0x8086, 0x2254), 0, 0x00, 62, 16, 0, "??" },		/* KnC: ?? */
+  { VD(0x8086, 0x2255), 0, 0x00, 62, 16, 8, "SKUX" },		/* KnC: A0-PO */
+  { VD(0x8086, 0x2256), 0, 0x00, 48, 12, 7, "SKU5" },		/* KnC: A0-PO */
+  { VD(0x8086, 0x2257), 0, 0x00,  4, 16, 0, "SKUZ" },
+  { VD(0x8086, 0x2258), 0, 0x00, 62, 16, 0, "SKU1" },		/* KnC: ES1, ES1B */
+  { VD(0x8086, 0x2258), 0, 0x10, 62, 16, 0, "SKU1" },
+  { VD(0x8086, 0x2259), 0, 0x00, 52, 16, 0, "SKU3" },		/* KnC: ES1 */
+  { VD(0x8086, 0x225a), 0, 0x00, 48, 12, 0, "SKU4" },		/* KnC: ES1, ES1B */
+  { VD(0x8086, 0x225a), 0, 0x10, 48, 12, 0, "SKU4" },		/* KnC: ES2 */
+  { VD(0x8086, 0x225a), 0, 0x11, 48, 12, 0, "SKU4" },		/* KnC: Int5 */
+  { VD(0x8086, 0x225b), 0, 0x00, 52, 12, 0, "SKU3" },
+  { VD(0x8086, 0x225b), 0, 0x10, 52, 12, 0, "SKU3" },
+  { VD(0x8086, 0x225c), 0, 0x10, 61, 16, 0, "SKU1" },		/* KnC: Mkt1 */
+  { VD(0x8086, 0x225c), 0, 0x11, 61, 16, 0, "SKU1" },		/* KnC: Mkt1 */
+  { VD(0x8086, 0x225c), 0, 0x20, 61, 16, 0, "SKU1" },		/* KnC: Mkt1 */
+  { VD(0x8086, 0x225d), 0, 0x10, 57, 12, 0, "SKU4" },		/* KnC: Mkt4 */
+  { VD(0x8086, 0x225d), 0, 0x11, 57, 12, 0, "SKU4" },		/* KnC: Mkt3, Mkt4 */
+  { VD(0x8086, 0x225d), 0, 0x20, 57, 12, 0, "SKU4" },
+  { VD(0x8086, 0x225e), 0, 0x11, 57, 16, 0, "GZ" },
+  { VD(0x8086, 0x225e), 0, 0x20, 57, 16, 0, "GZ" },
+};
+
+
+/*
+ * Map of GDDR vendor ID vs company names
+ */
+
+static struct {
+  int	 id;
+  char * vendor;
+} GddrVendors[] = {
+  { 1, "Samsung" },
+  { 2, "Quimonda" },
+  { 3, "Elpida" },
+  { 6, "Hynix" },
+};
+
+
+
+/*
+**
+** Initializations
+**
+** This has two intended purposes:
+**  - Do a on-time effort to collect info on properties that
+**    are not going to change after the initial setup by
+**    either bootstrap or kernel initialization.
+**  - Collect initial values on things we can modify.
+**    Intent is that unloading the ras module should reset
+**    all state to that of the time the module was loaded.
+**
+*/
+
+void __init
+mr_mt_init(void)
+{
+  static int    only_once = 1;
+  uint32_t	scr4, scr9, scr13;
+  uint32_t	eax, ebx, ecx, edx;
+  uint32_t	thr, hwt;
+  uint32_t	id;
+  int		i;
+
+  if (! only_once)
+    return;
+  only_once = 0;
+
+  /*
+   * HWINF:
+   * Scratch register 13 has more info than the hwinf record
+   * currently can contain, may revisit.
+   *  3:0	Substepping
+   *  7:4	Stepping (0 A, 2&3 B, 4 C, 6 D)
+   * 11:8	Model
+   * 15:12	Family (11 KnF)
+   * 17:16	Processor
+   * 19:18	Platform (0 Silicon, 1 FSIM, 2 MCEMU)
+   * 23:20	Extended model
+   * 31:24	Extended family
+   *
+   * Valid KnF steppings (Step + Substep):
+   * "A0" (0 + 0), "A1" (0 + 1), "A2" (0 + 2),
+   * "B0" (2 + 0), "B1" (3 + 1), "C0" (4 + 0),
+   * "D0" (6 + 0)
+   * Valid KnC steppings (Step + Substep):
+   * TBD:
+   */
+  scr13 = mr_sbox_rl(0, SBOX_SCRATCH13);
+  hwinf.rev     = GET_BITS(11,  8, scr13);
+  hwinf.step    = GET_BITS( 7,  4, scr13);
+  hwinf.substep = GET_BITS( 3,  0, scr13);
+
+  /*
+   * VERS:
+   * Add OS version
+   */
+  vers.uos[0] = scnprintf(vers.uos + 1, MR_VERS_LEN -2,
+		"Linux version: %s (build %s)",
+	      		init_uts_ns.name.release,
+	      		init_uts_ns.name.version);
+
+  /*
+   * PVERS:
+   * Make MicRas version available
+   */
+  pver.api[0] = scnprintf(pver.api + 1, MR_PVER_LEN -2,
+  		"%s", RAS_VER);
+
+  /*
+   * CLST:
+   * On regular CPU's this is read from CPUID 2 (htt cores)
+   * and CPUID 4 (die cores), threads per cores is htt/die.
+   * This does not work the same way in MIC, cores & threads
+   * per core on various SKUs is not reflected by the CPUIDs.
+   * All we have is the number of registered APIC IDs, which
+   * happens to be the same as logical CPUs (htt cores).
+   * The threads per core (die cores) is given by bootstrap in
+   * scratch register #4 as a bit field.
+   *   3:0	Threads per core (mask)
+   *   5:4	Cache size (0,1,2: 512K, 3: 256K)
+   *   9:6	GBOX channel count (0 based)
+   *  29:25	ICC divider for MCLK
+   *     30	Soft reset boot
+   *     31	Internal flash build
+   */
+  cpuid(1, &eax, &ebx, &ecx, &edx);
+  hwt = GET_BITS(23, 16, ebx);
+  if (hwt > nr_cpu_ids)
+    hwt = nr_cpu_ids;	
+  scr4 = mr_sbox_rl(0, SBOX_SCRATCH4);
+  thr = GET_BITS(3, 0, scr4);
+  thr = bitmap_weight((const unsigned long *) &thr, 4);
+  if (thr) {
+    if (hwt % thr)
+      printk("mr_mt_init: cpu/thr mismatch: hwt %d, thr %d, cor %d, (%d)\n",
+      		hwt, thr, hwt / thr, hwt % thr);
+    clst.thr = thr;
+  }
+  else {
+    printk("Who trashed scratch #4? Val 0x%08x => 0 threads/core?\n", scr4);
+    clst.thr = 4;	/* Best guess */
+  }
+  clst.count = hwt / 4;
+
+  /*
+   * GDDR:
+   * Bootstrap leaves information in scratch register #9
+   * about the GDDR devices. The layout is:
+   *  3:0	Vendor ID, see table GddrVendors above
+   *  7:4	Revision
+   *  9:8	Density (00 = 512, 01 = 1024, 02 = 2048)
+   * 11:10	FIFO depth
+   * 15:12	DRAM info ??
+   *    29	ECC enable
+   */
+  scr9 = mr_sbox_rl(0, SBOX_SCRATCH9);
+  id = GET_BITS(3, 0, scr9);
+  for(i = 0; i < ARRAY_SIZE(GddrVendors); i++)
+    if (GddrVendors[i].id == id) {
+      gddr.dev[0] = scnprintf(gddr.dev +1, MR_GVND_LEN -2,
+			      "%s", GddrVendors[i].vendor);
+      break;
+    }
+  if (i == ARRAY_SIZE(GddrVendors))
+    gddr.dev[0] = scnprintf(gddr.dev +1, MR_GVND_LEN -2, "Vendor %d", id);
+  gddr.rev = GET_BITS(7, 4, scr9);
+  gddr.size = 512 * (1 << GET_BITS(9, 8, scr9));
+
+  /*
+   * Card specific initialization
+   */
+  mr_mt_card_init();
+
+  /*
+   *TBD: Save commmon registers this module may change
+   */
+}
+
+void __exit
+mr_mt_exit(void)
+{
+  /*
+   * Card specific clean-up
+   */
+  mr_mt_card_exit();
+
+  /*
+   *TBD: Restore commmon registers this module may change
+   */
+}
+
+
+/*
+ * Return SKU properties for this card (as string)
+ * Processor can be identified on it's own easily,
+ * but the SKU reflects the impact of fuse changes
+ * which don't alter the CPU id.
+ *
+ * SKU properties:
+ *  - name	Name of sku (if known)
+ *  - mch	Number of memory channels
+ *  - txs	Number of texture samplers
+ */
+
+/*
+ * Why are these not defined in the includes?
+ */
+
+#ifndef SBOX_PCIE_VENDOR_ID_DEVICE_ID
+#define SBOX_PCIE_VENDOR_ID_DEVICE_ID           0x00005800
+#endif
+#ifndef SBOX_PCIE_PCI_SUBSYSTEM
+#define SBOX_PCIE_PCI_SUBSYSTEM                 0x0000582c
+#endif
+#ifndef SBOX_PCIE_PCI_REVISION_ID_AND_C_0X8
+#define SBOX_PCIE_PCI_REVISION_ID_AND_C_0X8	0x00005808
+#endif
+
+static struct sku *
+get_sku(void)
+{
+  static struct sku * sku;
+  uint32_t	dev, sub, rev, fuse;
+  char	      * grp;
+  int		i;
+ 
+  if (sku)
+    return sku;
+
+  dev = mr_sbox_rl(0, SBOX_PCIE_VENDOR_ID_DEVICE_ID);
+  rev = mr_sbox_rl(0, SBOX_PCIE_PCI_REVISION_ID_AND_C_0X8);
+  sub = mr_sbox_rl(0, SBOX_PCIE_PCI_SUBSYSTEM);
+  fuse = mr_sbox_rl(0, SBOX_SCRATCH7);
+  rev = GET_BITS(7, 0, rev);
+  fuse = GET_BITS(15, 0, fuse);
+ 
+  /*
+   * Usually the fuse revision define a group of SKUs.
+   * Once that's determined we'll use the other details
+   * to identify the SKU within that group.
+   */
+       if (fuse >= 0 && fuse <= 1)
+         grp = "A0 PO";
+  else if (fuse >= 2 && fuse <= 3)
+         grp = "A0 ES1";
+  else if (fuse >= 4 && fuse <= 50)
+         grp = "A0 ES1B";
+  else if (fuse >= 51 && fuse <= 100)
+         grp = "B0 PO";
+  else if (fuse >= 101 && fuse <= 150)
+         grp = "B0 ES2";
+  else if (fuse >= 151 && fuse <= 152)
+         grp = "B1 PO";
+  else if (fuse >= 153 && fuse <= 154)
+         grp = "B1 PO";
+  else if (fuse == 155)
+         grp = "B1 QS";
+  else if (fuse == 156)
+         grp = "B1 PRQ";
+  else if (fuse == 157)
+         grp = "B1 PRQ/GZ";
+  else if (fuse >= 158 && fuse <= 159)
+         grp = "B1 PRQ";
+  else if (fuse >= 201 && fuse <= 203)
+         grp = "B2 PRQ/QS";
+  else if (fuse == 253)
+         grp = "C0 PO";
+  else if (fuse == 254)
+         grp = "C0 QS";
+  else 
+         grp = "???";
+
+  /*
+   * Now determine which member of the group.
+   * Take hints from PCIe device ID and revision.
+   * Device ID mappings is a mess, see table above.
+   * Revision has a simple mapping (follows fuses):
+   *   0x00 => A0 cards
+   *   0x10 => B0 cards
+   *   0x11 => B1 cards
+   *   0x20 => C0 cards
+   *   0x21 => C1 cards (if ever to be made)
+   */
+  for(i = 0; i < ARRAY_SIZE(skuList); i++) {
+    if (dev == skuList[i].devID) {
+      if (skuList[i].subID && sub != skuList[i].subID)
+        continue;
+      if (rev != skuList[i].revNo)
+	continue;
+
+      /*
+       * Found one, this is the place to cross reference it
+       *  - memory channels should match SCR4 bits 9:6
+       */
+      break;
+    }
+  }
+
+  if (i < ARRAY_SIZE(skuList)) {
+    sku = skuList + i;
+    printk("RAS: card %x:%x:%x is a \"%s %s\" (%d cores, %d memch, %d txs)\n",
+    			dev, sub, rev, grp, sku->name, sku->cr, sku->ch, sku->tx);
+  }
+
+  return sku;
+}
+
+#if NOT_YET
+char *
+mr_sku(void)
+{
+  struct sku  * sku;
+
+  sku = get_sku();
+  return sku ? sku->name : 0;
+}
+#endif
+
+int
+mr_mch(void)
+{
+  struct sku  * sku;
+
+  sku = get_sku();
+  return sku ? sku->ch : 0;
+}
+
+int
+mr_txs(void)
+{
+  struct sku  * sku;
+
+  sku = get_sku();
+  return sku ? sku->tx : 0;
+}
+
+
+/*
+**
+** MT Get functions
+**
+** All works the same way; they get an opague pointer to
+** a place where the return structure can be placed. The
+** return value is either the amount (bytes) to be shipped
+** back in response or one of the MR_* error codes.
+**
+*/
+
+int
+mr_get_hwinf(void * p)
+{
+  struct mr_rsp_hwinf * r;
+
+  r = (struct mr_rsp_hwinf *) p;
+  *r = hwinf;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_vers(void * p)
+{
+  struct mr_rsp_vers  * r;
+
+  r = (struct mr_rsp_vers *) p;
+  *r = vers;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_pver(void * p)
+{
+  struct mr_rsp_pver  * r;
+
+  r = (struct mr_rsp_pver *) p;
+  *r = pver;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_clst(void * p)
+{
+  struct mr_rsp_clst  * r;
+
+  r = (struct mr_rsp_clst *) p;
+  *r = clst;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_gddr(void * p)
+{
+  struct mr_rsp_gddr  * r;
+
+  r = (struct mr_rsp_gddr *) p;
+  *r = gddr;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_trc(void * p)
+{
+  struct mr_rsp_trc   * r;
+
+  r = (struct mr_rsp_trc *) p;
+  *r = trc;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_cutl(void * p)
+{
+  struct mr_rsp_cutl  * r;
+  struct timespec	tp;
+  struct cpu_usage_stat * u;
+  uint64_t		user, nice, sys, idle;
+  int			i, n;
+
+  r = (struct mr_rsp_cutl *) p;
+  memset(r, '\0', sizeof(*r));
+  r->tck  = ACTHZ;
+  r->core = clst.count;
+  r->thr  = clst.thr;
+  ktime_get_ts(&tp);
+  monotonic_to_bootbased(&tp);
+  r->jif = timespec_to_jiffies(&tp);
+
+  for_each_possible_cpu(i) {
+    u = & kstat_cpu(i).cpustat;
+
+    user = u->user;
+    nice = u->nice;
+    sys  = u->system + u->irq + u->softirq;
+    idle = u->idle + u->iowait;
+
+    r->sum.user += user;
+    r->sum.nice += nice;
+    r->sum.sys  += sys;
+    r->sum.idle += idle;
+
+    /*
+     * Currently the boot processor is thread 0 of the last
+     * enabled core. Thus, on a 32 core machine, we get:
+     *
+     * cpu #	0    1  2  3  4  5 .. 124  125  126 127
+     * core #	31   0  0  0  0  1 ..  30   31   31  31
+     * apic ID	124  0  1  2  3  4 .. 123  125  126 127
+     *
+     * The core is included in the per-cpu CpuInfo struct,
+     * and it should be safe to get it from there.
+     */
+    n = cpu_data(i).cpu_core_id;
+    if (n < r->core) {
+      r->cpu[n].user += user;
+      r->cpu[n].nice += nice;
+      r->cpu[n].sys  += sys;
+      r->cpu[n].idle += idle;
+    }
+  }
+
+  return sizeof(*r);
+}
+
+
+int
+mr_get_mem(void * p)
+{
+  struct mr_rsp_mem   * r;
+  struct sysinfo	si;
+
+  si_meminfo(&si);
+
+  r = (struct mr_rsp_mem *) p;
+  memset(r, '\0', sizeof(*r));
+  r->total = si.totalram  << (PAGE_SHIFT - 10);
+  r->free  = si.freeram   << (PAGE_SHIFT - 10);
+  r->bufs  = si.bufferram << (PAGE_SHIFT - 10);
+
+  return sizeof(*r);
+}
+
+
+int
+mr_get_os(void * p)
+{
+  struct mr_rsp_os    * r;
+  uint16_t		i;
+  struct timespec       tp;
+  struct task_struct  * t;
+
+  ktime_get_ts(&tp);
+  monotonic_to_bootbased(&tp);
+
+  r = (struct mr_rsp_os *) p;
+  memset(r, '\0', sizeof(*r));
+  r->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+  r->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+  r->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+  r->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+
+  /*
+   * Walk process list and indentify processes that
+   * are associated with user programs. For now we
+   * exclude kernel threads and non-stable processes.
+   *
+   *TBD: Really just wanted to take the task_lock, but
+   *     it is not exported to modules. It seems to be
+   *     tied into the RCU logic, so locking the whole
+   *     RCU should do the trick as long as it's just
+   *     for a very short time.
+   */
+  i = 0;
+  rcu_read_lock();
+  for_each_process(t) {
+    if ((t->flags & (PF_KTHREAD | PF_STARTING | PF_EXITING)) ||
+        (t->group_leader && t->group_leader != t))
+      continue;
+
+    if (i < ARRAY_SIZE(r->apid))
+      r->apid[i] = t->pid;
+    i++;
+  }
+  rcu_read_unlock();
+  r->alen = i;
+
+  return sizeof(*r);
+}
+
+
+int
+mr_get_proc(void * p)
+{
+  struct mr_rsp_proc  * r;
+  struct task_struct  * t, * s;
+  struct mm_struct    * mm;
+  struct timespec	uptime, start, ts;
+  cputime_t		utime, stime;
+  pid_t			pid;
+  int			err, i;
+
+  err = -MR_ERR_NOVAL;
+  pid = * (uint32_t *) p;
+  if (! pid)
+    return err;
+
+  r = (struct mr_rsp_proc *) p;
+  memset(r, '\0', sizeof(*r));
+  do_posix_clock_monotonic_gettime(&uptime);
+
+  rcu_read_lock();
+  t = pid_task(find_pid_ns(pid, &init_pid_ns), PIDTYPE_PID);
+  if (t) {
+    /*
+     * Found process, get base stats
+     */
+    r->pid = pid;
+    strncpy(r->name +1, t->comm, sizeof(r->name) -1);
+    start = t->start_time;
+    utime = t->utime;
+    stime = t->stime;
+    mm = get_task_mm(t);
+    if (mm) {
+#ifdef SPLIT_RSS_COUNTING
+      r->rss = atomic_long_read(& mm->rss_stat.count[MM_FILEPAGES]) +
+	       atomic_long_read(& mm->rss_stat.count[MM_ANONPAGES]);
+#else
+      r->rss = mm->rss_stat.count[MM_FILEPAGES] +
+	       mm->rss_stat.count[MM_ANONPAGES];
+#endif
+      r->vm = mm->total_vm;
+      mmput(mm);
+    }
+
+    /*
+     * Next try get list of threads (if any)
+     */
+    i = 0;
+    if (!t->group_leader || t->group_leader == t) {
+      s = t;
+      do {
+	if (s->pid != pid) {
+	  if (i < ARRAY_SIZE(r->tpid))
+	    r->tpid[i++] = s->pid;
+	}
+      } while_each_thread(t, s);
+    }
+    r->tlen = i;
+    err = sizeof(*r);
+  }
+  rcu_read_unlock();
+
+  /*
+   * Convert values into API formats (uSec, kB).
+   */
+  if (err > 0) {
+    r->name[0] = strlen(r->name +1);
+    ts = timespec_sub(uptime, start);
+    r->etime = timespec_to_ns(&ts) / NSEC_PER_USEC;
+    r->utime = jiffies_to_usecs(utime);
+    r->stime = jiffies_to_usecs(stime);
+    r->vm  = r->vm  << (PAGE_SHIFT - 10);
+    r->rss = r->rss << (PAGE_SHIFT - 10);
+  }
+
+  return err;
+}
+
+
+
+/*
+**
+** MT Set functions
+**
+** All works the same way; they get an opague pointer to
+** a location where the 'set' parameter from the request is
+** placed. Return code is one of the MR_* error codes.
+**
+** Input screening takes place here (to the extent possible).
+**
+*/
+
+
+#if NOT_YET
+int
+mr_set_gvolt(void * p)
+{
+  /*
+   * Cannot be set from uOS, pretend success
+   */
+  return 0;
+}
+
+
+int
+mr_set_gfreq(void * p)
+{
+  /*
+   * Cannot be set from uOS, pretend success
+   */
+  return 0;
+}
+#endif
+
+
+int
+mr_set_trc(void * p)
+{
+  /*
+   * No idea on what to do with this
+   */
+  trc.lvl = *(uint32_t *) p;
+  return 0;
+}
+
+
+
+/*
+**
+** MT Process controls
+**
+*/
+
+int
+mr_cmd_pkill(void * p)
+{
+  struct task_struct  * t;
+  const struct cred   * cred;
+  pid_t			pid;
+  uint32_t		val;
+  int			sig, ret;
+
+  val = *(uint32_t *) p;
+  pid = GET_BITS(23, 0, val);
+  sig = GET_BITS(31, 24, val);
+
+  ret = -MR_ERR_INVAUX;
+  rcu_read_lock();
+  t = pid_task(find_pid_ns(pid, &init_pid_ns), PIDTYPE_PID);
+  if (t) {
+    if (!(t->flags & (PF_KTHREAD | PF_STARTING | PF_EXITING)) &&
+        !(t->group_leader && t->group_leader != t)) {
+
+      cred = __task_cred(t);
+      if (cred->euid >= 500) {
+        if (!send_sig(sig, t, 1))
+          ret = 0;
+      }
+      else
+        ret = -MR_ERR_PERM;
+    }
+  }
+  rcu_read_unlock();
+
+  return ret;
+}
+
+
+int
+mr_cmd_ukill(void * p)
+{
+  struct task_struct  * t;
+  const struct cred   * cred;
+  uid_t			uid;
+  uint32_t		val;
+  int			sig, ret;
+
+  val = *(uint32_t *) p;
+  uid = GET_BITS(23, 0, val);
+  sig = GET_BITS(31, 24, val);
+
+  if (uid < 500)
+    return -MR_ERR_PERM;
+
+  ret = 0;
+  rcu_read_lock();
+  for_each_process(t) {
+    if ((t->flags & (PF_KTHREAD | PF_STARTING | PF_EXITING)) ||
+        (t->group_leader && t->group_leader != t))
+      continue;
+
+    cred = __task_cred(t);
+    if (cred->euid == uid) {
+      ret = send_sig(sig, t, 1);
+      if (ret)
+        break;
+    }
+  }
+  rcu_read_unlock();
+
+  return ret ? -MR_ERR_INVAUX : 0;
+}
+
+
+/*
+**
+** Debug utilities.
+** Remove or comment out when development complete!
+**
+*/
+
+#if EE_VERIFY
+/*
+ * Hex dumper
+ */
+
+#include <linux/ctype.h>
+
+#define ALEN	        9               /* Digits of address shown */
+
+void
+dmp_hex(void *ptr, int len, const char *msg, ...)
+{
+    unsigned char       * d;
+    unsigned char       * prev;
+    int                   n, m;
+    int                   star;
+    char		  asc[16 + 1];
+
+    star = 0;
+    prev = 0;
+
+    /*
+     * Print message (if any).
+     * It is treated as a 'printf' format strings with arguments.
+     */
+    if (msg) {
+        va_list               ap;
+
+        va_start(ap, msg);
+        vprintk(msg, ap);
+        va_end(ap);
+        printk("\n");
+    }
+
+    /*
+     * Loop trying to dump 16 bytes at a time
+     */
+    for(d = (unsigned char *) ptr;; d += 16) {
+
+        /*
+         * Locate dump area from input buffer;
+         */
+        n = (len > 16) ? 16 : len;
+        len -= n;
+
+        /*
+         * Skip repeated lines.
+         * I want the last line shown on the output.
+         */
+        if (d != ptr && n == 16 && !memcmp(d, prev, 16)) {
+            if (len) {
+                if (!star) {
+                    star = 1;
+                    printk("%*s\n", ALEN + 3, "*");
+                }
+                continue;
+            }
+        }
+
+        /*
+         * Print one line of hex dump.
+         */
+        if (n) {
+            printk("%*lx  ", ALEN, ((long) d) & ((1L << 4 * ALEN) - 1));
+            for(m = 0; m < n; m++) {
+                printk("%02x ", d[m]);
+                if (m == 7)
+                    printk(" ");
+                asc[m] = (isascii(d[m]) && isprint(d[m])) ? d[m] : '.';
+            }
+            asc[m] = '\0';
+            printk("%*s  %s\n", 3 * (16 - m) + (m < 8), "", asc);
+        }
+
+        /*
+         * We are done when end of buffer reached
+         */
+        if (!len)
+            break;
+
+        /*
+         * Reset repeat line suppression
+         */
+        star = 0;
+        prev = d;
+    }
+}
+#endif
diff --git a/ras/micras_core.c b/ras/micras_core.c
new file mode 100644
index 0000000..2cdbb4b
--- /dev/null
+++ b/ras/micras_core.c
@@ -0,0 +1,973 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * RAS handler for core MC events
+ *
+ * Contains code to intercept MC events, collect information
+ * from core MCA banks on originating core and possibly on
+ * all active cores if necessary.
+ *
+ * In case of a severe event, defined by corrupted context,
+ * the handler will add a record of the event in the designated
+ * EEPROM hanging off the Over-Clocking I2C bus.  Next a message
+ * will be sent to the SMC (enabling IPMI notifications) and at
+ * last a message is sent to host via the MC SCIF connection
+ * (if MC SCIF session has been established).
+ *
+ * Lesser events will also be sent to the host on a 'FYI' basis,
+ * but no record will be stored in the event log, nor will the
+ * SMC be notified.
+ *
+ * Special cases of high rate correctable errors may also cause
+ * events to be recorded in EEPROM on the assumption that the
+ * root cause will be detectable from maintenance mode.
+ *
+ * The handler cannot expect any support from the OS while in
+ * exception (NMI) context. Therefore, NMI-safe routines has
+ * been added to mimic some kernel services, e.g. ee_print().
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/io.h>
+#include <linux/cpumask.h>
+#include <asm/mce.h>
+#include <asm/apic.h>
+#include "micras.h"
+
+
+/*
+**
+** Brief design notes:
+** There are two ways this code normally will be entered.
+**
+** 1) From standard interrupt context (bottom-half).
+**    This is supporting MC events picked up by the
+**    machine_check_poll(), i.e. events that aren't
+**    causing state corrruption (UC bit not set).
+**
+** 2) From exception/NMI context.
+**    This handles errors that _did_ flag processor
+**    state corruption (UC bit set, or other condition
+**    causing the kernel exception handler to pick it up).
+**  
+** Both cases can happen simultaneously on different CPU's,
+** which require careful considerations about re-entrant code
+** behaviour here. Particularly nasty is exception context where
+** normal spinlocks won't work (FYI: x86 spinlocks assume interrupt
+** disable can protect a critical region, an assumption that is
+** false when an exception/NMI occur).
+**
+** Standard interrupt context entries occur when non-fatal and
+** thus non-critical MC events are handled. In most cases just
+** results in a regular SCIF send of McInfo structs to the host.
+** Note that the call chain origin is a callout from the timer
+** thread, not from an interrupt service routine, so to name
+** it as standard interrupt context is somewhat misleading.
+**
+** Exception context messages are usuallly fatal and must be
+** dealt with immediately, because otherwise the generic machine
+** handler may panic() the system when exiting exception handler
+** (default behavior, may be tweaked by altering 'threshold').
+**
+** In order to proceed we can either implement a locking mechanism
+** at every API function entry, or we can let every function do it's
+** thing independently. The latter is preferred, though it gets
+** somewhat complicated because the API between the generic MC
+** handling and RAS module is in fact composed of several calls.
+**
+** If state between API calls needs to be tracked then that can be
+** done by means of pre-allocated arrays, similar to the generic
+** handling in the Linux kernel. Currently the only state variable
+** is the mask of CPUs that has been sent an IPI.
+**
+** Core MC events can be simulated by using the 'mce-inject' tool,
+** consisting of a kernel module and a text mode application program.
+** The 'mce-inject' module knows the difference between fatal and
+** non-fatal events (defined by the UC bit) and acts differently
+** in the two cases.  Non-fatal injections cause machine_check_poll()
+** to be called on all CPUs, resulting in events being reported to
+** function mce_poll().  Fatal injections cause do_machine_check()
+** to be called on all CPUs, resulting in calls to the mcc_exc_*
+** routines below.  Activities triggered by mce-inject are flagged
+** as 'fake', and shall _NOT_ be logged in the EEPROM.
+**
+** Warning:
+** Controls in the generic MC handling may cause the kernel to
+** panic, _ALSO_ even if no event was found in any MCA banks!!
+** Not sure exactly how to capture that sort of event.
+**
+** Warning:
+** The 'mce-inject' module uses different methods of invoking error
+** handling routines, depending on the mce record (inject_flags).
+** Specifically, the 'mce-inject' module may use of broadcast NMIs
+** to invoke machine_check_poll() or do_machine_check() on all CPUs,
+** which will make these functions  execute in exception context.
+** The NMI broadcast mechanism is based on registering a handler on
+** the 'die' notifier chain and then doing an
+**	apic->send_IPI_mask(.., NMI_VECTOR),
+** knowing that do_nmi() will invoke this notifier chain when no
+** genuine cause of NMI was found (i.e. if inb(61) returns 0xc0,
+** [which is SERR + IOCHK on chipset register NSR]).
+** Long story short; if 'mce-inject' is used we can not expect that
+** polling is done in standard interrupt context, and need to set
+** the 'in exception context' flag for SCIF access.
+**
+*/
+
+
+/*
+ * Hooks placed in the native machine check handler
+ * See file arch/x86/kernel/cpu/mcheck/mce.c for placement.
+ *
+ *  poll	After entering a non-UC event into mce_log.
+ *		This happens in normal thread context, which
+ *		means that kernel services are avaialble.
+ *  exc_flt	Filter on correctable errors. If events occur
+ *		at a very high rate they can severely slow
+ *		down the system and/or crash it entirely.
+ *		Logic here will disable reporting of some
+ *		events if they are seen too often.
+ *  exc_entry	Entering MC exception handler.
+ *		Called _after_ reading MCG_STATUS and the early
+ *		severity assesment by mce_severity() has been
+ *		performed on all banks, such that we get to
+ *		know if the native MC handler will panic.
+ *  exc_log	After entering a UC event into mce_log.
+ *		The logged mce record has all available
+ *		details on the event, and this point is the
+ *		best place to perform our RAS activities.
+ *  exc_panic	Right before the MC exception handler calls
+ *		the panic function.
+ *  exc_exit	Exit the MC exception handler
+ *  print	Exception context safe printf to POST-card UART
+ */
+
+extern void (*mca_poll)(struct mce *, uint64_t, int);
+extern void (*mca_exc_flt)(struct mce *, uint64_t, int);
+extern void (*mca_exc_entry)(struct mce *, int, int, int, char *);
+extern void (*mca_exc_log)(struct mce *, uint64_t, int, int, char *, int, int);
+extern void (*mca_exc_panic)(struct mce *, char *, char *, int);
+extern void (*mca_exc_exit)(struct mce *, int, int, int, int);
+extern int  (*mca_print)(char *, ...);
+
+extern struct mce_log   mcelog;		/* Export from kernel */
+extern struct mutex     mce_read_mutex;	/* Export from kernel */
+static unsigned		mcc_seen;	/* Last event in kernel log */
+int			in_sync;	/* Flag when sync'ing */
+
+
+/*
+ * Convert a kernel mce record into a MC API format
+ */
+
+static void
+mcc_conv(struct mce * mce, struct mce_info * mc)
+{
+  mc->org    = mce->bank;
+  mc->id     = mce->extcpu;
+#ifdef CONFIG_MK1OM
+  mc->pid    = xlat_cpu[cpu_data(mc->id).apicid];
+#endif
+  mc->stamp  = mce->time;
+  mc->status = mce->status;
+  mc->addr   = mce->addr;
+  mc->misc   = mce->misc;
+  mc->flags  = (mc->status & MCI_STATUS_UC) ? MC_FLG_FATAL : 0;
+}
+
+
+/*
+ * Filter for correctable errors, may modify CTL value.
+ * The filter is pretty crude, we just want to protect
+ * ourselves from being run over by fast recurring events.
+ * We keep tabs of events seen in a static array.
+ *
+ * Algorithm is like this:
+ *  - test if event is in filter list; if not exit filter.
+ *  - search for instance of this event in history.
+ *  - if not found, insert event in history (strike 1).
+ *  - if found but time since last seen exceeds window,
+ *    then treat event as new in history (new strike 1).
+ *  - if found and within time window, bump strike counter.
+ *  - if strike counter reach maximum, we're fed up and
+ *    turn this event off by clearing the associated
+ *    bit in the offending MCA bank's CTL register and
+ *    send a 'filter' event notification to the host.
+ *
+ * Advantages of this design is:
+ *  - individual parameters for every filtered event.
+ *  - only one event history array.
+ *  - no periodic aging of events in history array.
+ *  - no averaging over time required.
+ *  - no moving/reordering of event history entries.
+ *  - new events do not replace older seen event
+ *  - filter reacts immediately when max reached.
+ *
+ * Disadvantages are:
+ *  - linear search through filter array.
+ *  - linear search through history array.
+ *  - time parameter not obvious, it's really a limit
+ *    on how old events in history are allowed to be.
+ *  - in pathological cases the filter's reaction time
+ *    will be max * window (when events trickle in at
+ *    a rate just below the window size).
+ *  - data in ADDR and MISC registers are not used to
+ *    match current event with history. Should they be?
+ *
+ * For now, both lists are short enough that introducing
+ * more advanced searches probably are not going to help.
+ *
+ * On KnC the flash may have overrides of the mc_turnoff table.
+ */
+
+#define FT	((17 * 60) + 30) * 60	/* Default time window: 17.5 hours */
+
+static struct mc_hist {
+  uint32_t		count;		/* How many times seen */
+  uint64_t		last;		/* TSC last time seen */
+  struct mce_info	mc;		/* Local MC event record */
+} mc_history[32];
+
+static struct mc_disc {
+  uint8_t	bank, ctl;		/* Bank selector and control bit # */
+  uint16_t	win;			/* Time window (seconds) */
+  uint16_t	max;			/* Max count */
+  uint16_t	mca_code;		/* MCA code, status[15:0] */
+  uint16_t	mdl_code;		/* Model code, status[31:16] */
+} mc_turnoff[] = {
+  { 0, 3, FT, 2, 0x0150, 0x0000 },	/* MC0: J-Cache error */
+  { 1, 0, FT, 2, 0x010a, 0x0001 },	/* MC1: L2 Tag error */
+  { 1, 4, FT, 2, 0x010a, 0x0010 },	/* MC1: L2 Data error */
+  { 2, 2, FT, 2, 0x010d, 0x0100 },	/* MC2: Tag State, ext TD */
+  { 2, 2, FT, 2, 0x010d, 0x0101 },	/* MC2: Tag State, int TD */
+  { 2, 3, FT, 2, 0x012d, 0x0110 },	/* MC2: Core Valid, ext TD */
+  { 2, 3, FT, 2, 0x012d, 0x0111 },	/* MC2: Core Valid, int TD */
+  { 3, 2, FT, 2, 0x010d, 0x0100 },	/* DBOX: Tag State error, ext TD */
+  { 3, 2, FT, 2, 0x010d, 0x0101 },	/* DBOX: Tag State error, int TD */
+  { 3, 3, FT, 2, 0x012d, 0x0110 },	/* DBOX: Core Valid error, ext TD */
+  { 3, 3, FT, 2, 0x012d, 0x0111 },	/* DBOX: Core Valid error, int TD */
+  { 4, 4, FT, 2, 0x0e0b, 0x0030 },	/* SBOX: PCI-e */
+  { 5, 0, FT, 2, 0x0001, 0x0000 },	/* GBOX: Ch-0 retraining */
+  { 5, 1, FT, 2, 0x0001, 0x0001 },	/* GBOX: Ch-1 retraining */
+  { 5, 2, FT, 2, 0x0001, 0x0002 },	/* GBOX: Ch-0 ECC error */
+  { 5, 3, FT, 2, 0x0001, 0x0003 },	/* GBOX: Ch-1 ECC error */
+  { 6, 3, FT, 2, 0x010e, 0x0008 },	/* TBOX: T2 CRC error */
+};
+
+
+#ifdef CONFIG_MK1OM
+
+#define MC_FLT_SIG1	0x0e13c20f	/* Start signature */
+#define MC_FLT_SIG2	0xf1ec3df0	/* End signature */
+#define MC_FLT_SIZE	0x200		/* Filter block length */
+
+void
+mcc_flt_parm(uint8_t * p)
+{
+  uint16_t		fnum;
+
+  /*
+   * Check signatures
+   */
+  if (*((uint32_t *) p) != MC_FLT_SIG1 ||
+      *((uint32_t *)(p + MC_FLT_SIZE - 4)) != MC_FLT_SIG2) {
+    printk("mcc_flt_parm: signatures not found, (%08x, %08x)\n",
+    		*((uint32_t *) p), *((uint32_t *)(p + MC_FLT_SIZE - 4)));
+    return;
+  }
+
+  /*
+   * After start signature comes filter count (uint16_t)
+   * followed by 'count' filter descriptors (struct mc_disc).
+   */
+  fnum = *(uint16_t *)(p + 4);
+  if (fnum > ARRAY_SIZE(mc_turnoff) ||
+      fnum * sizeof(struct mc_disc) + 10 > MC_FLT_SIZE) {
+    printk("mcc_flt_parm: filter count %d not valid\n", fnum);
+    return;
+  }
+
+  /*
+   * Seems the table is legit, copy it over defaults.
+   */
+  memset(mc_turnoff, '\0', sizeof(mc_turnoff));
+  memcpy(mc_turnoff, p + 6, fnum * sizeof(struct mc_disc));
+#if MC_VERBOSE
+  {
+    int i;
+
+    for(i = 0; i < ARRAY_SIZE(mc_turnoff); i++) {
+      printk("Filter %2d: bank %d, ctl %d, win %d, max %d, mca %04x, mdl %04x\n",
+        	i, mc_turnoff[i].bank, mc_turnoff[i].ctl, mc_turnoff[i].win,
+		mc_turnoff[i].max, mc_turnoff[i].mca_code, mc_turnoff[i].mdl_code);
+    }
+  }
+#endif
+}
+
+#endif
+
+
+/*
+ * Frequency filter for core and un-core MC events
+ */
+
+uint32_t
+micras_mc_filter(struct mce_info * mc, uint64_t tsc, int exc)
+{
+  struct mc_disc      * dsc;
+  struct mc_hist      * hst;
+  uint64_t		ostamp;
+  int			i, oldest;
+
+  if (mc->status & MCI_STATUS_UC)
+    return 0;
+
+  /*
+   * Check if this event may be filtered
+   */
+  dsc = mc_turnoff;
+  for(i = 0; i < ARRAY_SIZE(mc_turnoff); i++) {
+    if (dsc->bank == mc->org &&
+        dsc->mca_code == GET_BITS(15,  0, mc->status) &&
+	dsc->mdl_code == GET_BITS(31, 16, mc->status))
+      break;
+    dsc++;
+  }
+  if (i == ARRAY_SIZE(mc_turnoff))
+    return 0;
+
+  /*
+   * Have a candidate for filter.
+   * Have we seen this one before?
+   */
+  oldest = 0;
+  ostamp = tsc;
+  hst = mc_history;
+  for(i = 0; i < ARRAY_SIZE(mc_history); i++) {
+    /*
+     * While scanning, find the oldest event too
+     */
+    if (hst->last < ostamp) {
+      ostamp = hst->last;
+      oldest = i;
+    }
+
+    /*
+     * Does this match event in filter history?
+     * TBD: how much needs to match?
+     *      For now: cpu (or box), bank, mca_code and model_code.
+     */
+    if (hst->last &&
+        hst->mc.id == mc->id &&
+        hst->mc.org == mc->org &&
+	GET_BITS(15,  0, hst->mc.status) == GET_BITS(15,  0, mc->status) &&
+	GET_BITS(31, 16, hst->mc.status) == GET_BITS(31, 16, mc->status))
+      break;
+    hst++;
+  }
+  if (i == ARRAY_SIZE(mc_history)) {
+    /*
+     * Not seen this event before.
+     * 'oldest' is where to store this event.
+     */
+    hst = mc_history + oldest;
+    hst->count = 1;
+    hst->last = tsc;
+    hst->mc = *mc;
+    return 0;
+  }
+
+  /*
+   * Already 'on file in history', test expiration date
+   */
+  if (hst->last + dsc->win * (cpu_khz * 1000LL) < tsc) {
+    /*
+     * Matching history element had expired, just overwrite it
+     */
+    hst->count = 1;
+    hst->last = tsc;
+    hst->mc = *mc;
+    return 0;
+  }
+
+  /*
+   * Filter element active, bump count and set last seen.
+   * We do _NOT_ want injected events to enter the EEPROM,
+   * so that flag is preserved over all event history
+   */
+  hst->count++;
+  if (mc->flags & MC_FLG_FALSE)
+    hst->mc.flags |= MC_FLG_FALSE;
+  if (hst->count < dsc->max) {
+    hst->last = tsc;
+    return 0;
+  }
+
+  /*
+   * Threshold reached, event source needs to be silenced.
+   * Store a record of this in the EEPROM and send a
+   * notification to host about it. Once duly reported, clear
+   * event from the filter; it is not expected to show up again.
+   * Note: we report the _first_ event seen, not the
+   *       event at hand. We could save array space
+   *       by sending latest event (less info to keep).
+   */
+  ee_printk("RAS: MCE filter #%d: bank %d, bit %d, limit %d, delta %d (mS)\n",
+	    dsc - mc_turnoff, dsc->bank, dsc->ctl, dsc->max, (tsc - hst->last) / cpu_khz);
+  hst->mc.flags |= MC_FLG_FILTER;
+#ifdef CONFIG_MK1OM
+  if (!(hst->mc.flags & MC_FLG_FALSE)) {
+    micras_mc_log(&hst->mc);
+    hst->mc.flags |= MC_FLG_LOG;
+  }
+#endif
+  micras_mc_send(&hst->mc, exc);
+  hst->last = 0;
+
+  /*
+   * MC events are disabled by caller when a
+   * non-zero mask is returned by this routine.
+   */
+  return (1 << dsc->ctl);
+}
+
+
+/*
+ * Remove/mask an 'enable-bit' from a core MCA bank.
+ * Note: This applies to _current_ cpu only. It is not explicitly
+ *       linked to the cpu that was ID'd in the incoming mce struct.
+ *	 Happens to be OK for mcc_exc_flt() and mcc_poll() and mcc_exc_log().
+ */
+
+static void
+mcc_ctl_mask(int bank, uint32_t msk)
+{
+  uint32_t		ctl_lo, ctl_hi;
+
+  rdmsr(MSR_IA32_MCx_CTL(bank), ctl_lo, ctl_hi);
+  ctl_lo &= ~msk;
+  wrmsr(MSR_IA32_MCx_CTL(bank), ctl_lo, ctl_hi);
+
+#if MC_VERBOSE
+  ee_printk("RAS: ctl mask CPU %d, MC%d_CTL -> %x\n", smp_processor_id(), bank, ctl_lo);
+#endif
+}
+
+
+/*
+ * Filtering of correctable core MC events
+ * Called from the exception handler.
+ */
+
+static void
+mcc_exc_flt(struct mce * mce, uint64_t ctl, int fake)
+{
+  struct mce_info	mc;
+  uint32_t		msk;
+
+  if (!mce)
+    return;
+
+  if (mce->status & MCI_STATUS_UC)
+    return;
+
+  mcc_conv(mce, &mc);
+  mc.ctl = ctl;
+  mc.flags = fake ? MC_FLG_FALSE : 0;
+  msk = micras_mc_filter(&mc, mce->tsc, 1);
+  if (msk)
+    mcc_ctl_mask(mce->bank, msk);
+}
+
+
+/*
+ * Only action required for polled MC events is to
+ * pass the event on to the SCIF channel (if connected).
+ * The event should already have caused an excption (the
+ * exception handler choses to ignore corrected errors)
+ * which means it already has been filtered.
+ * Injected corrected events do not cause MCE exceptions
+ * and thus escaped filtering, so we'll filter them here.
+ */
+
+static void
+mcc_poll(struct mce * mce, uint64_t ctl, int fake)
+{
+  struct mce_info	mc;
+
+#if MC_VERBOSE
+  ee_printk("RAS: poll %d, fake %d, status %llx\n", mce->extcpu, fake, mce->status);
+#endif
+
+  mcc_conv(mce, &mc);
+  mc.ctl = ctl;
+  mc.flags = fake ? MC_FLG_FALSE : 0;
+
+#if BEAM_TEST
+  /*
+   * Under beam test we only want to send the SCIF message
+   */
+  micras_mc_send(&mc, fake);
+  return;
+#endif
+
+  if (micras_mc_send(&mc, fake))
+    mcc_seen = mcelog.next;
+
+  /*
+   * According to MCA HAS the MCI_STATUS_VAL will only
+   * be set when an event's enable bit is set, in which
+   * case it is difficult to imagine how events without
+   * the MCI_STATUS_EN can appear here. The second clause
+   * of the test may never actually happen on Kn{F,C}.
+   * Note: MC polling does not capture TSCs
+   */
+  if (fake || !(mc.status & MCI_STATUS_EN)) {
+    uint32_t		msk;
+
+    msk = micras_mc_filter(&mc, rdtsc(), fake);
+    if (msk)
+      mcc_ctl_mask(mce->bank, msk);
+  }
+}
+
+
+/*
+ * One CPU entered do_machine_check().
+ * We get the initial mce record (which has cpu ID), early
+ * control variables and whether the event is injected.
+ *
+ * Since KnF and KnC deviate from the standard IA by not
+ * having the core MCAs broadcast to all CPU's we'll try
+ * to fake standard behavior in order to keep the generic
+ * machine check code intact.
+ * Therefore, if event is real (fake flag unset) and this
+ * CPU is the first seeing it (mcc_exc_mask is empty),
+ * then send IPI to all other CPU's listed in the online
+ * cpumask for vector #18. Later CPUs will see themselves
+ * marked in mcc_exc_mask and return quickly.
+ */
+
+struct cpumask	mcc_exc_mask;			/* CPU's in mce ctx */
+static atomic_t ipi_lock = ATOMIC_INIT(0);	/* Lock on exc mask */
+
+static void
+mcc_exc_entry(struct mce * mce, int fake, int no_way_out, int entry, char * msg)
+{
+  unsigned int	cpu;
+
+  /*
+   *TBD: should we use 'extcpu' from the MCE record instead?
+   */
+  cpu = smp_processor_id();
+
+  /*
+   * Injected events invokes all CPUs automatically
+   * by hooking into the NMI notify_die call_chain.
+   * Nothing to do here.
+   */
+  if (fake)
+    return;
+
+#if 1
+  /*
+   * Avoid the IPI corralling circus on corrected errors,
+   * based on assessment entirely done by mce_severity().
+   * If the result (no_way_out) is MCE_NO_SEVERITY (=0), then
+   * at worst we may have a correctable error, and that does
+   * not warrant the system lockdown managed by mce_start()
+   * and mce_end().
+   * Note that MICs do not support newer status bits (MCG_SER_P)
+   * which causes variable mce_ser always to be zero and thus
+   * the test in the inner loop of do_machine_check() will be
+   * reduced to just testing for the UC bit.
+   */
+  if (! no_way_out)
+    return;
+#endif
+
+  /*
+   * Test for entry from MT thread IPIs (testing)
+   * or a 'soft' exception from a IPI issued from
+   * the handler of the first exception.
+   * No further action needed in both cases.
+   */
+  if (cpumask_test_cpu(cpu, &mcc_exc_mask))
+    return;
+ 
+  /*
+   * Create mcc_exc_mask to flag which CPU's are
+   * to be included in the IPI. This mask is later
+   * used to determine who needs to EOI the local
+   * APIC after MC event handling.
+   */
+  while(atomic_xchg(&ipi_lock, 1))
+    cpu_relax();
+  smp_rmb();
+  if (cpumask_test_cpu(cpu, &mcc_exc_mask)) {
+    /*
+     * Another CPU got here first
+     */
+    atomic_xchg(&ipi_lock, 0);
+    return;
+  }
+  cpumask_copy(&mcc_exc_mask, cpu_online_mask);
+  cpumask_clear_cpu(cpu, &mcc_exc_mask);
+  smp_wmb();
+  atomic_xchg(&ipi_lock, 0);
+
+  /*
+   * Simulate a broadcast ny sending IPI to all
+   * other CPUs.
+   */
+  // apic->send_IPI_mask(&mcc_exc_mask, MCE_VECTOR);
+  apic->send_IPI_allbutself(MCE_VECTOR);
+}
+
+
+/*
+ * In do_machine_check() bank scan loop.
+ * Called from a lockdown, no synchronization needed.
+ * MC bank scan is complete and the mce event has been
+ * entered into the kernel MC log
+ *
+ *TBD: revise logic on HALT on UC events?
+ *     From a state corruption point of view this
+ *     _is_ a fatal error because UC bit was set.
+ *     However, if the tolerance setting is set
+ *     high enough, the generic MC handler may
+ *     not chose to panic on this event.
+ *     We currently do not have the tolerance value
+ *     when recording this event, nor do we have 
+ *     other factors that mce_reign() use to determine
+ *     what to do after reporting event to the host.
+ */
+
+static void
+mcc_exc_log(struct mce * mce, uint64_t ctl, int fake,
+	    int no_way_out, char * msg, int severity, int worst)
+{
+  struct mce_info	mc;
+  uint32_t		msk;
+
+#if MC_VERBOSE
+  ee_printk("RAS: log %d, wall %lld, nwo %d (%s), sev %d, wst %d\n",
+	    mce->extcpu, mce->time, no_way_out, msg, severity, worst);
+#endif
+
+  /*
+   * Create a message for the host.
+   */
+  mcc_conv(mce, &mc);
+  mc.ctl = ctl;
+  mc.flags |= fake ? MC_FLG_FALSE : 0;
+
+#if BEAM_TEST
+  /*
+   * Under beam test we only want to send the SCIF message
+   * This is guaranteed not to be called re-entrantly.
+   */
+  micras_mc_send(&mc, 1);
+  return;
+#endif
+
+#ifdef CONFIG_MK1OM
+  /*
+   * If this is a true event then log it in the EEPROM and
+   * notify SMC that we've had a serious machine check error.
+   */
+  if ((mc.flags & (MC_FLG_FALSE | MC_FLG_FATAL)) == MC_FLG_FATAL) {
+    micras_mc_log(&mc);
+    mc.flags |= MC_FLG_LOG;
+
+    /*
+     *TBD: Should this be deferred until the actual panic?
+     *     The user can raise tolerance such that we in
+     *     fact continue operating; in which case the SMC
+     *     notification would be (somewhat) misleading.
+     */
+    micras_mc_ipmi(&mc, 1);
+  }
+#endif
+
+  /*
+   * Always notify host and sync to kernel log
+   */
+  if (micras_mc_send(&mc, 1))
+    mcc_seen = mcelog.next;
+
+#if RAS_HALT
+  if ((mc.flags & MC_FLG_FATAL) && !fake)
+    panic("FATAL core machine check event:\n"
+      "bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n",
+      mc.org, mc.id, mc.ctl, mc.status, mc.addr, mc.misc);
+#endif
+
+  /*
+   * Correctable events can in fact reach us here if
+   * mce_no_way_out() tags them as critical (for other
+   * reasons than the UC flag, e.g. MCIP missing).
+   * If the tolerance setting is high enough to prevent
+   * such events to panic, we'd still want filtering.
+   */
+  msk = micras_mc_filter(&mc, mce->tsc, 1);
+  if (msk)
+    mcc_ctl_mask(mce->bank, msk);
+}
+
+
+/*
+ * In mce_panic().
+ * Current event is about to make the kernel panic.
+ * Sources of this call are
+ *  do_machine_check(), when no_way_out set
+ *  mce_timed_out(), CPU rendez-vous failed
+ *  mce_reign(), when severety high, a CPU hung, or no events
+ */
+
+static void
+mcc_exc_panic(struct mce * mce, char * msg, char * exp, int fake)
+{
+  /*
+   * Should host be notified in this case?
+   * And if so, how should be presented, we might not
+   * even have a mce record to show when this happens!
+   * If an mce is passed, it has already been seen and
+   * reported to the host by a call to mcc_exc_log().
+   * If mce is NULL, then this _is_ an MC relatedi panic,
+   * but we have no data fitting for a host notification.
+   * Create a pseudo event and ship that?
+   */
+  ee_printk("RAS: panic %d, wall %lld, msg %s, exp %s, fake %d\n",
+		mce->extcpu, mce->time, msg, exp, fake);
+}
+
+
+/*
+ * A CPU is leaving do_machine_check().
+ * We get this after the monarch has 'reigned' and
+ * the response to the event has been completed.
+ */
+
+static void
+mcc_exc_exit(struct mce * mce, int no_way_out, int worst, int entry, int order)
+{
+  unsigned int	cpu;
+  int		eoi;
+
+  cpu = smp_processor_id();
+
+  /*
+   * Assuming test_and_clear_bit() is atomic.
+   */
+  smp_rmb();
+  eoi = cpumask_test_and_clear_cpu(cpu, &mcc_exc_mask);
+  smp_wmb();
+  if (eoi)
+    ack_APIC_irq();
+}
+
+
+/*
+ * Routine to scan the kernel's MC log.
+ * Called when SCIF MC session has been created, to bring the host
+ * side up to date with prior unreported MC events, such as events
+ * occurring when MC session was not active (no peer was listening
+ * on the host) and events occurring before RAS module was loaded.
+ *
+ * Notes:
+ *  - This is always called in thread context.
+ *  - There are no injection flags in the kernel
+ *    MC log, i.e. no guarantee events are genuine.
+ *  - The MC kernel log has been exported explicitly for this.
+ *
+ * On synchronization (or the lack thereof):
+ * Effectively the mcelog holds a static array of mce's where the
+ * 'finished' flag says whether mce content is valid or not. The
+ * 'next' field is the index of the first element in the array that
+ * has not been assigned for an MC event. It is incremented when a
+ * new event is entered, and reset to zero on reads to /dev/mcelog.
+ * The kernel's event log does not wrap, so it is safe to use it as
+ * an indicator of how many events (finished or not) are in it.
+ * The mcelog's next field is protected by RCU style mechanisms
+ * in the kernel MCA handler (see arch/x86/kernel/cpu/mcheck/mce.c).
+ * For obvious reasons it is not genuine RCU, e.g. access to 'next'
+ * isn't within rcu_read_lock()/rcu_read_unlock() pair, just a clever
+ * masking use of a lock in an RCU macro definition.
+ * There is no RCU moving data around, the mce array does not move,
+ * and the 'finished' flag is set after a wmb() on the mce contents
+ * which means this routine will not clash with the MCE handler.
+ * Collisions with memset() on reads from /dev/mcelog are prevented
+ * by locking of mce_read_mutex.
+ */
+
+void
+mcc_sync(void)
+{
+  struct mce_info	mc;
+  unsigned		seen;
+
+  if (mce_disabled)
+    return;
+
+#if 0
+  /*
+   * Can't do this until bootstrap scrubs MC banks on all cards.
+   * It has been observed that MCA banks may _not_ be reset on card
+   * reboot which means events picked up by the kernel before loading
+   * the RAS module may have occured in a previous uOS run.
+   * Should be OK post early Jan '12 (flash ver 262, HSD 4115351).
+   */
+  return;
+#endif
+
+  /*
+   * Lock out kernel log access through /dev/mcelog
+   */
+  mutex_lock(&mce_read_mutex);
+
+  /*
+   * Start over if the log has been cleared cleared
+   */
+  if (mcc_seen > mcelog.next)
+    mcc_seen = 0;
+
+  for(seen = mcc_seen; seen < mcelog.next; seen++) {
+    /*
+     * Basic checks. Index, CPU & bank must be reasonable.
+     */
+    if (mcelog.entry[seen].finished) {
+      if (mcelog.entry[seen].cpu >= NR_CPUS ||
+          mcelog.entry[seen].bank >= 3) {
+	printk("mcc_sync: entry %d contains garbage, cpu %d, bank %d\n",
+			seen, mcelog.entry[seen].cpu, mcelog.entry[seen].bank);
+	continue;
+      }
+
+      /*
+       * Have good entry, can be UC, but it is 'old'.
+       */
+      mcc_conv(&mcelog.entry[seen], &mc);
+      mc.ctl = 0;
+
+#ifdef CONFIG_MK1OM
+      /*
+       * Log this event in the eeprom and notify
+       * that we've had a serious machine check error.
+       */
+      if (mc.flags & MC_FLG_FATAL) {
+	in_sync = 1;
+        micras_mc_log(&mc);
+	in_sync = 0;
+        mc.flags |= MC_FLG_LOG;
+        micras_mc_ipmi(&mc, 0);
+      }
+#endif
+
+      /*
+       * Notify host about this too
+       */
+      if (! micras_mc_send(&mc, 0))
+        break;
+    }
+  }
+  mcc_seen = mcelog.next;
+
+  /*
+   * Done, release lock
+   */
+  mutex_unlock(&mce_read_mutex);
+}
+
+
+/*
+ * Setup excetion handlers by hooking into the
+ * kernel's native MCA handler.
+ */
+
+int __init
+mcc_init(void)
+{
+  if (mce_disabled) {
+    printk("RAS.core: disabled\n");
+  }
+  else {
+    mca_poll      = mcc_poll;
+    mca_exc_flt   = mcc_exc_flt;
+    mca_exc_entry = mcc_exc_entry;
+    mca_exc_log   = mcc_exc_log;
+    mca_exc_panic = mcc_exc_panic;
+    mca_exc_exit  = mcc_exc_exit;
+    mca_print     = 0;		/* For debug: ee_printk; */
+    printk("RAS.core: init complete\n");
+  }
+
+  return 0;
+}
+
+
+/*
+ * Cleanup for module unload.
+ * Clear/restore hooks in the native MCA handler.
+ */
+
+int __exit
+mcc_exit(void)
+{
+  mca_poll      = 0;
+  mca_exc_flt   = 0;
+  mca_exc_entry = 0;
+  mca_exc_log   = 0;
+  mca_exc_panic = 0;
+  mca_exc_exit  = 0;
+  mca_print     = 0;
+
+  /*
+   * Links from kernel's MCE handler cut,
+   * wait for everybody in handler to leave.
+   */
+  while(atomic_read(&mce_entry))
+    cpu_relax();
+
+  printk("RAS.core: exit complete\n");
+  return 0;
+}
+
diff --git a/ras/micras_elog.c b/ras/micras_elog.c
new file mode 100644
index 0000000..349c4cb
--- /dev/null
+++ b/ras/micras_elog.c
@@ -0,0 +1,3136 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * RAS EEPROM log driver
+ *
+ * Contains code to handle creation of MC event records in
+ * the designated EEPROM hanging off the 'OverClocking' I2C bus.
+ *
+ * Since it is not clear for the moment for how long the serial
+ * port on the POST card needs to (or will) be supported, it is
+ * not safe to assume we just can tap into the Linux I2C frame
+ * work to access the 'OverClocking' I2C bus.
+ *
+ * Furthermore, we need access from exception context, and cannot
+ * run a driver that has spinlocks, mutexes and sleeps in it's path
+ * like the current PXA-derived driver has.
+ *
+ * Therefore, a local exception safe driver is included here.
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/serial_reg.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <asm/uaccess.h>
+#include <asm/mic/mic_knc/autobaseaddress.h>
+#include <asm/mic/mic_knc/micsboxdefine.h>
+#include "micras.h"
+
+#ifdef MIC_IS_EMULATION
+/*
+ * Emulation does not handle I2C busses.
+ * Therefore all code that deals with I2C needs to be
+ * replaced with harmless substitutes in emulation.
+ * The following stubs are for emulation only.
+ */
+
+#if 0
+/*
+ * Probably don't need exclusive locks in emulation
+ */
+atomic_t pxa_block = ATOMIC_INIT(0);
+
+static void
+ee_lock(void)
+{
+  while(atomic_xchg(&pxa_block, 1))
+    myDELAY(50);
+}
+
+static void
+ee_unlock(void)
+{
+  atomic_xchg(&pxa_block, 0);
+}
+#endif
+
+char		ee_buf[EE_BUF_COUNT * EE_BUF_LINELEN];
+atomic_t	ee_msg = ATOMIC_INIT(-1);
+atomic_t	ee_seen = ATOMIC_INIT(0);
+int		ee_rdy;
+
+char *
+ee_fmt(char * fmt, va_list args)
+{
+  char	      * buf;
+  int		msg_id, msg_btm;
+
+  msg_btm = atomic_read(&ee_seen);
+  msg_id = atomic_inc_return(&ee_msg);
+  if ((msg_id - msg_btm) < (EE_BUF_COUNT - 1)) {
+    buf = ee_buf + (msg_id % EE_BUF_COUNT) * EE_BUF_LINELEN;
+    vsnprintf(buf, EE_BUF_LINELEN - 1, fmt, args);
+    return buf;
+  }
+  return 0;
+}
+
+int
+ee_printk(char * fmt, ...)
+{
+  va_list       args;
+  char	      * buf;
+
+  va_start(args, fmt);
+  buf = ee_fmt(fmt, args);
+  va_end(args);
+
+  return buf ? strlen(buf) : 0;
+}
+
+int
+ee_print(char * fmt, ...)
+{
+  va_list       args;
+  char	      * buf;
+
+  va_start(args, fmt);
+  buf = ee_fmt(fmt, args);
+  va_end(args);
+
+  return buf ? strlen(buf) : 0;
+}
+EXPORT_SYMBOL_GPL(ee_print);
+
+
+int
+ee_init(void)
+{
+  ee_rdy = 1;
+
+  if (mce_disabled)
+    printk("RAS.elog (EMU): disabled\n");
+  else
+    printk("RAS.elog (EMU): init complete\n");
+  return 0;
+}
+
+int
+ee_exit(void)
+{
+  ee_rdy = 0;
+
+  printk("RAS.elog (EMU): exit complete\n");
+  return 0;
+}
+
+void
+micras_mc_log(struct mce_info * event)
+{
+  if (mce_disabled)
+    return;
+
+  /*
+   * Print entry on serial console (copy in kernel log)
+   */
+  ee_printk("RAS.elog (EMU): bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n",
+	event->org, event->id, event->ctl, event->status, event->addr, event->misc);
+}
+
+#else
+
+/*
+**
+** Exception safe I2C driver for the 'OverClocking' bus.
+** The driver is a derivative of the FreeBSD driver that
+** Ben W wrote. I.e. it is safe to re-use here because we
+** wrote it in the first place, copyright is ours.
+**
+** NOTE: This I2C bus is usually run by the PXA driver,
+**       which means that the activities of this driver
+**	 may interrupt the PXA driver's activity, i.e.
+**	 interrupt the serial console.
+**	 This is by design, the alternative was major
+**	 hacking of the PXA driver to support use in
+**	 exception context.
+**
+** NOTE: This code is currently exclusively designed to
+**	 run on a KnF or KnC device, i.e. we know what
+**	 hardware is present and we know the location
+**	 of the CSRs. This code does very little for
+**	 niceties like device discovery and registration.
+**
+** NOTE: Timing is altered slightly from the FreeBSD code.
+**	 The I2C bus should run in 400 kHz mode, which at
+**	 optimal conditions can transmit a byte in about
+**	 25 uSec (8 bits + ack/nak + a little overhead).
+**	 Therefore it does not make much sense to poll
+** 	 much faster than 1 uSec anywhere in this driver.
+**	 However, experiments show that timing is far
+**	 from optimal, though it is not clear whether
+**	 it is the UART or the controller that's slow.
+**       Update: In fact some of the boards cannot run
+**       reliably at 400 kHz, so we switched to 100 kHz.
+*/
+
+#define	REG_DBG		0	/* Debug I2C Layer 1 */
+#define	I2C_DBG		0	/* Debug I2C Layer 2 */
+#define	XFR_DBG		0	/* Debug I2C Layer 3 */
+#define	CON_DBG		0	/* Debug I2C UART */
+#define	EPR_DBG		0	/* Debug EEPROM log */
+
+#if REG_DBG
+#define REG_REG			reg_dmp
+#else
+#define REG_REG(s);		/* As nothing */
+#endif
+
+#if I2C_DBG
+#define	I2C_PRT			ee_printk
+#else
+#define	I2C_PRT(s,...);		/* As nothing */
+#endif
+
+#if XFR_DBG
+#define	XFR_PRT			ee_printk
+#else
+#define	XFR_PRT(s,...);		/* As nothing */
+#endif
+
+#if CON_DBG
+#define	CON_PRT			ee_printk
+#else
+#define	CON_PRT(s,...);		/* As nothing */
+#endif
+
+#if EPR_DBG
+#define	EPR_PRT			ee_printk
+#else
+#define	EPR_PRT(s,...);		/* As nothing */
+#endif
+
+
+#include <mic/micsboxdefine.h>
+#include "monahan.h"
+
+
+/*
+ *TBD: Get rid of Pascal relics!
+ */
+
+#ifndef FALSE
+#define FALSE	false
+#endif
+#ifndef TRUE
+#define TRUE	true
+#endif
+
+
+/*
+ * Local timer routine.
+ * Similar to the udelay function, just simpler.
+ *
+ * The delay instruction can only go upto 1023 clocks,
+ * and larger delay needs to be split into two or more
+ * delay instructions.
+ * According to Kn{F|C} errata, delay disables interrupts.
+ * Want to play nice and allow interrupts every 250 clocks.
+ * For now the overhead of the loop is ignored.
+ */
+
+#define MAX_DELAY	250
+
+void
+myDELAY(uint64_t usec)
+{
+  uint64_t	num_cpu_clks, tick;
+
+  /*
+   * Convert usec count into CPU clock cycles.
+   * Similar to set_cyc2ns_scale() we have:
+   *              us = cycles / (freq / us_per_sec)
+   *              us = cycles * (us_per_sec / freq)
+   *              us = cycles * (10^6 / (cpu_khz * 10^3))
+   *              us = cycles * (10^3 / cpu_khz)
+   *          cycles = us / ((10^3 / cpu_khz))
+   *          cycles = (us * cpu_khz) / 10^3
+   */
+  num_cpu_clks = (usec * tsc_khz) / 1000;
+
+  if (num_cpu_clks <= MAX_DELAY) {
+    __asm__ __volatile__("delay %0"::"r"(num_cpu_clks):"memory");
+  } else {
+    for(tick = MAX_DELAY; num_cpu_clks > tick; num_cpu_clks -= tick)
+      __asm__ __volatile__("delay %0"::"r"(tick):"memory");
+    __asm__ __volatile__("delay %0"::"r"(num_cpu_clks):"memory");
+  }
+}
+
+
+/*
+ * Layer 1 abstraction: device bus (controller register access)
+ *
+ * Access API to provide read/write to the I2C controller.
+ * Simply use a local copy of the SBOX MMIO routines, where the
+ * 'OverClocking' I2C controller CSRs starts at offset 0x1000.
+ * We use a local copy in order to not mix I2C register traces
+ * with those of the SBOX MMIO routines in micras_main.c.
+ *
+ *TBD: Shall debug features stay in the code?
+ */
+
+#if REG_DBG
+
+/*
+ * I2C controller register dump utilities.
+ * Traces go to the kernel log.
+ */
+
+struct bits {
+  uint32_t    mask;
+  char *set;
+  char *unset;
+};
+
+#define PXA_BIT(m, s, u)        { .mask = m, .set = s, .unset = u }
+
+static struct bits icr_bits[] = {
+  PXA_BIT(ICR_START,  "START",    0),
+  PXA_BIT(ICR_STOP,   "STOP",     0),
+  PXA_BIT(ICR_ACKNAK, "NAK",     "ACK"),
+  PXA_BIT(ICR_TB,     "TB",       0),
+  PXA_BIT(ICR_MA,     "MA",       0),
+  PXA_BIT(ICR_SCLE,   "SCLE",     0),
+  PXA_BIT(ICR_IUE,    "IUE",      0),
+  PXA_BIT(ICR_GCD,    "GCD",      0),
+  PXA_BIT(ICR_ITEIE,  "ITEIE",    0),
+  PXA_BIT(ICR_DRFIE,  "DRFIE",    0),
+  PXA_BIT(ICR_BEIE,   "BEIE",     0),
+  PXA_BIT(ICR_SSDIE,  "SSDIE",    0),
+  PXA_BIT(ICR_ALDIE,  "ALDIE",    0),
+  PXA_BIT(ICR_SADIE,  "SADIE",    0),
+  PXA_BIT(ICR_UR,     "UR",       0),
+};
+
+static struct bits isr_bits[] = {
+  PXA_BIT(ISR_RWM,    "RX",     "TX"),
+  PXA_BIT(ISR_ACKNAK, "NAK",    "ACK"),
+  PXA_BIT(ISR_UB,     "UB",     0),
+  PXA_BIT(ISR_IBB,    "IBB",    0),
+  PXA_BIT(ISR_SSD,    "SSD",    0),
+  PXA_BIT(ISR_ALD,    "ALD",    0),
+  PXA_BIT(ISR_ITE,    "ITE",    0),
+  PXA_BIT(ISR_IRF,    "IRF",    0),
+  PXA_BIT(ISR_GCAD,   "GCAD",   0),
+  PXA_BIT(ISR_SAD,    "SAD",    0),
+  PXA_BIT(ISR_BED,    "BED",    0),
+};
+
+
+static void
+decode_bits(char *prefix, struct bits *bits, int num, uint32_t val)
+{
+  char	       * str;
+
+  printk("  %s: ", prefix);
+  while (num--) {
+    str = (val & bits->mask) ? bits->set : bits->unset;
+    if (str)
+      printk("%s ", str);
+    bits++;
+  }
+}
+
+static void reg_ICR(uint32_t val)
+{
+  decode_bits("ICR", icr_bits, ARRAY_SIZE(icr_bits), val);
+  printk("\n");
+}
+
+static void reg_ISR(uint32_t val)
+{
+  decode_bits("ISR", isr_bits, ARRAY_SIZE(isr_bits), val);
+  printk("\n");
+}
+
+
+static void
+reg_dmp(char * str)
+{
+  printk("%s: ICR %08x, ISR %08x, ISAR %08x, IDBR %08x, IBMR %08x\n", str,
+  	mr_sbox_rl(0, SBOX_OC_I2C_ICR + ICR_OFFSET),
+  	mr_sbox_rl(0, SBOX_OC_I2C_ICR + ISR_OFFSET),
+  	mr_sbox_rl(0, SBOX_OC_I2C_ICR + ISAR_OFFSET),
+  	mr_sbox_rl(0, SBOX_OC_I2C_ICR + IDBR_OFFSET),
+  	mr_sbox_rl(0, SBOX_OC_I2C_ICR + IBMR_OFFSET));
+}
+
+#endif /* REG_DBG */
+
+
+/*
+ * Local versions of SBOX access routines, that
+ * does not leave trace messages in kernel log.
+ */
+
+uint32_t
+lmr_sbox_rl(int dummy, uint32_t roff)
+{
+  uint32_t	val;
+
+  val = * (volatile uint32_t *)(micras_sbox + roff);
+  return val;
+}
+
+void
+lmr_sbox_wl(int dummy, uint32_t roff, uint32_t val)
+{
+  * (volatile uint32_t *)(micras_sbox + roff) = val;
+}
+
+static uint32_t
+reg_read(uint32_t reg)
+{
+  uint32_t	val;
+
+  val = lmr_sbox_rl(0, SBOX_OC_I2C_ICR + reg);
+
+#if REG_DBG
+  printk("%s: %4x -> %08x", "rd", SBOX_OC_I2C_ICR + reg, val);
+  switch(reg) {
+    case ICR_OFFSET:	reg_ICR(val);	break;
+    case ISR_OFFSET:	reg_ISR(val);	break;
+    default:
+      printk("\n");
+  }
+#endif
+
+  return val;
+}
+
+static void
+reg_write(uint32_t reg, uint32_t val)
+{
+#if REG_DBG
+  printk("%s: %4x <- %08x", "wr", SBOX_OC_I2C_ICR + reg, val);
+  switch(reg) {
+    case ICR_OFFSET:	reg_ICR(val);	break;
+    default:
+      printk("\n");
+  }
+#endif
+
+  lmr_sbox_wl(0, SBOX_OC_I2C_ICR + reg, val);
+}
+
+
+/*
+ * Layer 2 abstraction: I2C bus driver (byte access to I2C bus)
+ *
+ * Mostly a re-implementation of Ben W's low level FreeBSD driver.
+ * Provides an API to control what goes onto the I2C bus on a
+ * per individual byte basis.
+ *
+ *  i2c_reset		Reset bus controller
+ *  i2c_init		Setup trasaction parameters (speed & mode)
+ *  i2c_start		Send slave address + R/W bit
+ *  i2c_rd_byte		Read data byte
+ *  i2c_wr_byte		Send data byte
+ *  i2c_stop		Stop current transaction
+ *
+ * NOTE: It seems that the controller lacks means to reset the
+ *       I2C bus (i.e. other devices on it). The controller
+ *       resets fine, but at least the UART has been seen
+ *       locking up and blocking the bus entirely.
+ */
+
+static uint8_t		hnd_addr = 0;			/* Target address */
+static int		hnd_freq = FREQ_100K;		/* Target speed */
+
+static uint8_t		bus_slave_addr = ISAR_SLADDR;	/* Our I2C slave address */
+static int		bus_start_op = I2C_NOP;		/* Bus command: R or W */
+static int		bus_freq = 0;			/* Bus speed (actual) */
+static int		bus_inited = 0;			/* Bus initialized */
+
+
+/*
+ * Master abort.
+ * Flip the ICR:MA bit long enough for current
+ * byte transfer to clock in/out on the wire.
+ */
+
+static int
+i2c_master_abort(void) {
+  I2C_PRT("i2c_master_abort: entry\n");
+
+  reg_write(ICR_OFFSET, reg_read(ICR_OFFSET) | ICR_MA);
+  myDELAY(25);
+  reg_write(ICR_OFFSET, reg_read(ICR_OFFSET) & ~ICR_MA);
+
+  I2C_PRT("i2c_master_abort: exit\n");
+  return 0;
+}
+
+
+/*
+ * Receive completion helper.
+ * Transmission ended (we got IRF), check if it was OK.
+ * We get ISR and whether a stop condition was expected.
+ */
+
+static int
+check_rx_isr(uint32_t isr, bool stop)
+{
+  I2C_PRT("check_rx_isr: entry, isr %02x, stop %d\n", isr, stop);
+  REG_REG("+check_rx_isr");
+
+  if (stop) {
+    /*
+     * Last byte read, controller is expected to give a
+     * NAK to slave. Verify that indeed is set in ISR.
+     */
+    if (!(isr & ISR_ACKNAK)) {
+      REG_REG("-check_rx_isr");
+      I2C_PRT("check_rx_isr: !ISR_ACKNAK, rtn %d\n", RX_SEVERE_ERROR);
+      return RX_SEVERE_ERROR;
+    }
+
+    /*
+     * The controller is expected to set the STOP condition.
+     * Once completed the controller clears the RWM bit of the ISR.
+     * Wait for this to happen in max 200 uSec.
+     */
+    if (isr & ISR_RWM) {
+      int counter;
+
+      I2C_PRT("check_rx_isr: RWM\n");
+      counter = 100;
+      while((reg_read(ISR_OFFSET) & ISR_RWM) && --counter)
+        myDELAY(2);
+      if(! counter) {
+        REG_REG("-check_rx_isr");
+        I2C_PRT("check_rx_isr: timeout, RWM wait %d uSec, rtn %d\n", 2 * 100, RX_BIZARRE_ERROR);
+        return RX_BIZARRE_ERROR;
+      }
+      I2C_PRT("check_rx_isr: RWM clear, waited %d uSec\n", 2 * (100 - counter));
+    }
+  } else {
+    /*
+     * Mid-message, verify that unit is still busy, received
+     * no NAK and that message operation is still 'read'.
+     */
+    if (!(isr & ISR_UB)) {
+      REG_REG("-check_rx_isr");
+      I2C_PRT("check_rx_isr: !UB, rtn %d\n", RX_SEVERE_ERROR);
+      return RX_SEVERE_ERROR;
+    }
+
+    if (isr & ISR_ACKNAK) {
+      REG_REG("-check_rx_isr");
+      I2C_PRT("check_rx_isr: ISR_ACKNAK, rtn %d\n", RX_SEVERE_ERROR);
+      return RX_SEVERE_ERROR;
+    }
+
+    if (!(isr & ISR_RWM)) {
+      REG_REG("-check_rx_isr");
+      I2C_PRT("check_rx_isr: !ISR_RWM, rtn %d\n", RX_BIZARRE_ERROR);
+      return RX_BIZARRE_ERROR;
+    }
+  }
+
+  REG_REG("-check_rx_isr");
+  I2C_PRT("check_rx_isr: done, rtn %d\n", XFER_SUCCESS);
+  return XFER_SUCCESS;
+}
+
+/*
+ * Wait for receive completion.
+ * We get if stop condition expected.
+ */
+
+static int
+i2c_wait_rx_full(bool stop)
+{
+  int		uwt, counter, err;
+  uint32_t	temp;
+
+  I2C_PRT("i2c_wait_rx_full: entry, stop %d\n", stop);
+  REG_REG("+i2c_wait_rx_full");
+
+  /*
+   * Guess on how long one I2C clock cycle is (in uSec)
+   */
+  uwt = (bus_freq == FREQ_400K) ? 3 : 10;
+
+  /*
+   * Wait for receive to end (IRF set).
+   * Since slave can hold the SCL to reduce the speed
+   * we wait longer than we expect the receive to last.
+   */
+  counter = 100;
+  err = INCOMPLETE_XFER;
+  while(counter) {
+    temp = reg_read(ISR_OFFSET);
+    if (temp & ISR_IRF) {
+      I2C_PRT("i2c_wait_rx_full: IRF, ISR %02x\n", temp);
+      err = check_rx_isr(temp, stop);
+      reg_write(ISR_OFFSET, reg_read(ISR_OFFSET) | ISR_IRF);
+      switch(err) {
+        case XFER_SUCCESS:
+          break;
+	case RX_SEVERE_ERROR:
+          break;
+        case RX_END_WITHOUT_STOP:
+          i2c_master_abort();
+          break;
+        default:
+	  /*
+	   * This is odd/unexpected, but not
+	   * something we can do anything about.
+	   */
+          err = XFER_SUCCESS;
+      }
+      break;
+    }
+    myDELAY(uwt);
+    counter--;
+  }
+
+  REG_REG("-i2c_wait_rx_full");
+  I2C_PRT("i2c_wait_rx_full: done, IRF wait %d uSec, err %d\n", uwt * (100 - counter), err);
+  return err;
+}
+
+
+/*
+ * Transmit completion helper.
+ * Transmission ended (we got ITE), check if it was OK.
+ * We get ISR, the current operation and whether a stop
+ * condition was expected (last byte of transmission).
+ */
+
+static int
+check_tx_isr(uint32_t isr, bool stop, int op)
+{
+  I2C_PRT("check_tx_isr: entry, isr %02x, stop %d, op %d\n", isr, stop, op);
+  REG_REG("+check_tx_isr");
+
+  if (isr & ISR_BED) {	/* Bus error */
+    REG_REG("-check_tx_isr");
+    I2C_PRT("check_tx_isr: BED, rtn %d\n", TX_NAK);
+    return TX_NAK;
+  }
+
+  if(stop) {
+    /*
+     * Last byte write, controller expected to
+     * set the stop condition. This may take a
+     * while to complete, controller holds the
+     * UB flag of ISR until finished.
+     */
+    if(isr & ISR_UB) {
+      int counter;
+
+      I2C_PRT("check_rx_isr: UB\n");
+      counter = 100;
+      while((reg_read(ISR_OFFSET) & ISR_UB) && --counter)
+        myDELAY(2);
+      if (! counter) {
+        REG_REG("-check_tx_isr");
+        I2C_PRT("check_tx_isr: UB, timeout %d uSec, rtn %d\n", 2 * 100, TX_CONTROLLER_ERROR);
+        return TX_CONTROLLER_ERROR;
+      }
+      I2C_PRT("check_tx_isr: !UB, waited %d uSec\n", 2 * (100 - counter));
+    }
+  } else {
+    /*
+     * Mid-message, the bus is expected to be busy.
+     */
+    if(!(isr & ISR_UB)) {
+      REG_REG("-check_tx_isr");
+      I2C_PRT("check_tx_isr: !UB, rtn %d\n", TX_CONTROLLER_ERROR);
+      return TX_CONTROLLER_ERROR;
+    }
+  }
+
+  /*
+   * Assert that message operation hasn't changed
+   */
+  if ((isr & 0x1) != op) {
+    REG_REG("-check_tx_isr");
+    I2C_PRT("check_tx_isr: ISR %d != %d, rtn %d\n", isr & 0x1, op, TX_CONTROLLER_ERROR);
+    return TX_CONTROLLER_ERROR;
+  }
+
+  REG_REG("-check_tx_isr");
+  I2C_PRT("check_tx_isr: done, rtn %d\n", XFER_SUCCESS);
+  return XFER_SUCCESS;
+}
+
+/*
+ * Wait for transmit completion
+ * We get the current operation and if a stop
+ * condition was expected (last byte of transmission).
+ */
+
+static int
+i2c_wait_tx_empty(bool stop, int op)
+{
+  int		counter, uwt, err;
+  uint32_t	temp;
+
+  I2C_PRT("i2c_wait_tx_empty: entry, stop %d, op %d\n", stop, op);
+  REG_REG("+i2c_wait_tx_empty");
+
+  /*
+   * Guess on how long one I2C clock cycle is (in uSec)
+   */
+  uwt = (bus_freq == FREQ_400K) ? 3 : 10;
+
+  /*
+   * Wait for transmission to end (ITE set)
+   * Since slave can hold the SCL to lower the speed
+   * we wait longer than we expect the transmission
+   * to last.
+   */
+  counter = 100;
+  err = INCOMPLETE_XFER;
+  while(counter) {
+    temp = reg_read(ISR_OFFSET);
+    if (temp & ISR_ITE) {
+      I2C_PRT("i2c_wait_tx_empty: ITE, ISR %02x\n", temp);
+      myDELAY(uwt);
+      temp = reg_read(ISR_OFFSET);
+      err = check_tx_isr(temp, stop, op);
+      reg_write(ISR_OFFSET, reg_read(ISR_OFFSET) | ISR_ITE);
+      break;
+    }
+    myDELAY(uwt);
+    counter--;
+  }
+
+  REG_REG("-i2c_wait_tx_empty");
+  I2C_PRT("i2c_wait_tx_empty: done, ITE wait %d uSec, err %d\n", uwt * (100 - counter), err);
+  return err;
+}
+
+
+/*
+ * Setup for a transaction.
+ * Determine transmission speed and program ICR accordingly.
+ * Also sets ISAR, though we probably don't neeed that.
+ */
+
+static int
+i2c_init(uint8_t slave_addr)
+{
+  uint32_t	speed;
+
+  I2C_PRT("i2c_init: entry, slave_addr %02x, hnd_speed %d\n", slave_addr, hnd_freq);
+  REG_REG("+i2c_init");
+
+  switch(hnd_freq) {
+    case FREQ_MAX:
+      speed = I2C_HS_FAST;
+      break;
+    case FREQ_400K:
+      speed = I2C_FAST;
+      break;
+    case FREQ_100K:
+      speed = I2C_STANDARD;
+      break;
+    case FREQ_AUTO:
+#if I2C_SLOW
+      hnd_freq = FREQ_100K;
+      speed = I2C_STANDARD;
+#else
+      hnd_freq = FREQ_400K;
+      speed = I2C_FAST;
+#endif
+      break;
+    default:
+      return -EINVAL;
+  }
+  if (bus_inited && hnd_freq == bus_freq) {
+    REG_REG("-i2c_init");
+    I2C_PRT("i2c_init: exit, bus_inited %d, hnd_freq %d\n", bus_inited, hnd_freq);
+    return 0;
+  }
+  I2C_PRT("i2c_init: speed %d, hnd_freq %d\n", bus_inited, hnd_freq);
+
+  bus_slave_addr = ISAR_SLADDR;
+  reg_write(ISAR_OFFSET, bus_slave_addr);
+  reg_write(ICR_OFFSET, (reg_read(ICR_OFFSET) & ~ICR_MODE) | ICR_ON | speed);
+  bus_freq = hnd_freq;
+  bus_inited = 1;
+
+  REG_REG("-i2c_init");
+  I2C_PRT("i2c_init: done, bus_inited %d, bus_freq %d\n", bus_inited, bus_freq);
+  return 0;
+}
+
+
+/*
+ * Stop current transaction.
+ * If transmitting then do a master abort, otherwise
+ * just ensure that no new transmission starts.
+ */
+
+static int
+i2c_stop(void)
+{
+  I2C_PRT("i2c_stop: entry, bus_inited %d, bus_start_op %d\n", bus_inited, bus_start_op);
+  REG_REG("+i2c_stop");
+
+  if (reg_read(ISR_OFFSET) & ISR_UB) {
+    I2C_PRT("i2c_stop: Unit busy\n");
+    i2c_master_abort();
+  }
+
+  switch(bus_start_op) {
+    case I2C_WRITE:
+      I2C_PRT("i2c_stop: Stop Write\n");
+      reg_write(ICR_OFFSET, reg_read(ICR_OFFSET) & ~(ICR_STOP | ICR_TB));
+      break;
+    case I2C_READ:
+      I2C_PRT("i2c_stop: Stop Read\n");
+      reg_write(ICR_OFFSET, reg_read(ICR_OFFSET) & ~(ICR_STOP | ICR_TB | ICR_ACKNAK));
+      break;
+  }
+  bus_start_op = I2C_NOP;
+
+  REG_REG("-i2c_stop");
+  I2C_PRT("i2c_stop: bus_start_op %d\n", bus_start_op);
+  return 0;
+}
+
+
+/*
+ * Reset I2C controller.
+ * Try to be nice and wait for current transaction to finish
+ */
+
+static int
+i2c_reset(void)
+{
+  I2C_PRT("i2c_reset: entry, bus_inited %d\n", bus_inited);
+  REG_REG("+i2c_reset");
+
+  i2c_stop();
+
+  reg_write(ICR_OFFSET, ICR_UR);
+  myDELAY(1);
+  reg_write(ISR_OFFSET, ~ISR_RESERVED);
+  myDELAY(1);
+  reg_write(ICR_OFFSET, 0);
+  myDELAY(1);
+  reg_write(ISAR_OFFSET, 0);
+  myDELAY(1);
+  reg_write(ICR_OFFSET, ICR_INIT_BITS);
+  bus_inited = 0;
+
+  REG_REG("-i2c_reset");
+  I2C_PRT("i2c_reset: exit, bus_inited %d\n", bus_inited);
+  return 0;
+}
+
+
+/*
+ * Start transaction using current setup.
+ * This is always a send of the target id and the R/W bit.
+ */
+
+static int
+i2c_start(int rw)
+{
+  int		err;
+  uint32_t	temp;
+
+  I2C_PRT("i2c_start: entry, rw %d, bus_slave_addr %02x, bus_start_op %d\n", rw, bus_slave_addr, bus_start_op);
+  REG_REG("+i2c_start");
+
+  if (hnd_addr == bus_slave_addr) {
+    bus_slave_addr = bus_slave_addr - 1;
+    I2C_PRT("i2c_start: reset slave %02x\n", bus_slave_addr);
+    reg_write(ISAR_OFFSET, bus_slave_addr);
+  }
+
+  reg_write(IDBR_OFFSET, (hnd_addr << 1) | rw);
+  temp = reg_read(ICR_OFFSET);
+  temp |= ICR_START | ICR_TB;
+  temp &= ~(ICR_STOP | ICR_ALDIE);
+  reg_write(ISR_OFFSET, ~ISR_RESERVED);
+  reg_write(ICR_OFFSET, temp);
+
+  err = i2c_wait_tx_empty(FALSE, rw);
+  if (err) {
+    i2c_reset();
+    I2C_PRT("i2c_start: exit, err %d\n", err);
+    REG_REG("-i2c_start");
+    return err;
+  }
+  bus_start_op = rw;
+
+  REG_REG("-i2c_start");
+  I2C_PRT("i2c_start: done, bus_start_op %d\n", bus_start_op);
+  return 0;
+}
+
+
+/*
+ * Read next byte of transaction
+ * Must follow a 'start' in READ mode.
+ */
+
+static int
+i2c_rd_byte(bool sendStop, uint8_t *data)
+{
+  int		retval;
+  uint32_t	temp;
+
+  I2C_PRT("i2c_rd_byte: entry, stop %d\n", sendStop);
+
+  if (bus_start_op != I2C_READ) {
+    I2C_PRT("i2c_rd_byte: exit, called during WR\n");
+    return -EINVAL;
+  }
+
+  REG_REG("+i2c_rd_byte");
+
+  temp = reg_read(ICR_OFFSET);
+  temp |= (ICR_ALDIE | ICR_TB);
+  temp &= ~(ICR_START | ICR_STOP | ICR_ACKNAK);
+  if (sendStop)
+    temp |= ICR_STOP | ICR_ACKNAK;
+
+  reg_write(ISR_OFFSET, ~ISR_RESERVED);
+  reg_write(ICR_OFFSET, temp);
+  retval = i2c_wait_rx_full(sendStop);
+  if (retval) {
+    REG_REG("-i2c_rd_byte");
+    I2C_PRT("i2c_rd_byte: exit, err %d\n", retval);
+    return retval;
+  }
+
+  temp = reg_read(IDBR_OFFSET);
+  if (data)
+    *data = temp;
+
+  if (sendStop)
+    i2c_stop();
+
+  REG_REG("-i2c_rd_byte");
+  I2C_PRT("i2c_rd_byte: done, data %02x\n", temp);
+  return 0;
+}
+
+/*
+ * Write next byte of transaction
+ * Must follow a 'start' in WRITE mode.
+ */
+
+static int
+i2c_wr_byte(bool sendStop, uint8_t data)
+{
+  int		retval;
+  uint32_t	temp;
+
+  I2C_PRT("i2c_wr_byte: entry, stop %d, data %02x\n", sendStop, data);
+
+  if (bus_start_op != I2C_WRITE) {
+    I2C_PRT("i2c_wr_byte: exit, called during RD\n");
+    return EINVAL;
+  }
+ 
+  REG_REG("+i2c_wr_byte");
+
+  reg_write(IDBR_OFFSET, data);
+
+  temp = reg_read(ICR_OFFSET);
+  temp |= (ICR_ALDIE | ICR_TB);
+  temp &= ~(ICR_START | ICR_STOP);
+  if (sendStop)
+    temp |= ICR_STOP;
+
+  reg_write(ISR_OFFSET, ~ISR_RESERVED);
+  reg_write(ICR_OFFSET, temp);
+  retval = i2c_wait_tx_empty(sendStop, I2C_WRITE);
+  if (retval) {
+    REG_REG("-i2c_wr_byte");
+    I2C_PRT("i2c_wr_byte: exit, err %d\n", retval);
+    return retval;
+  }
+
+  if (sendStop)
+    i2c_stop();
+
+  REG_REG("-i2c_wr_byte");
+  I2C_PRT("i2c_wr_byte: done\n");
+  return 0;
+}
+
+
+/*
+ * Get exclusive access to the I2C bus at _any_ given time.
+ *
+ * If a transaction is in progress then try to complete it
+ * in a non-destructive way.  We know that the interupted
+ * activity was from the console access to the UART, which
+ * boils down to just two possible sequences, read UART
+ * register or write UART register. The acting code paths is
+ *  sc16is_serial_in()
+ *  -> i2c_smbus_read_byte_data
+ *     -> i2c_smbus_xfer
+ *        -> i2c_smbus_xfer_emulated
+ *           -> i2c_transfer
+ *              -> i2c_pxa_pio_xfer
+ *		   -> i2c_pxa_do_pio_xfer
+ *		      -> i2c_pxa_set_master
+ *		      -> i2c_pxa_start_message
+ *		      -> i2c_pxa_handler (repeat for all bytes)
+ *		         -> i2c_pxa_irq_txempty (on writes)
+ *                       -> i2c_pxa_irq_rxfull (on reads)
+ *		      -> i2c_pxa_stop_message
+ *		    
+ * Function i2c_pxa_handler (designed as an interrupt handler)
+ * is polled every 10 uSec, which is pretty fast for a line that
+ * clocks at 400 kHz (minimum 20 uSec to send one byte).
+ *
+ * The two sequences on the I2C bus for the UART are:
+ *
+ *  Write:  S <addr | W> A <reg> A <data byte> A P
+ *  Read:   S <addr | W> A <reg> A Sr <addr | R> A <data byte> A P
+ *
+ * where
+ *   S	Start sequence
+ *   P  Stop sequence
+ *   Sr	Repeated start
+ *   W  Write flag
+ *   R  Read flag
+ *   A	Ack (send or recv)
+ *
+ * We need the abilitity to 'borrow' the I2C bus from the PXA driver
+ * both when it is running (say on another CPU) or when it has been
+ * interrupted (NMI and Exception context).
+ *
+ * From trackers in the PXA driver we get to know the current state
+ * of the I2C transaction with the following granularity:
+ *
+ *  '-' Idle
+ *  'B' Waiting for bus free
+ *  'I'	Initiating transfer (i.e. send addr & direction flag)
+ *  'S' Sending byte
+ *  'R' Receving byte
+ *
+ * Last byte of the transaction can be identified by the STOP flag.
+ *
+ * The take-over sequence starts by setting an atomic variable which
+ * tells the PXA driver to wait (and retry the I2C transaction when
+ * the variable gets cleared). Then we look at the controller status
+ * and command registers to determine whether it is active or not.
+ *
+ * Simple cases:
+ * -------------
+ *  state = '-'
+ *	Controller is not in use by PXA driver.
+ *
+ *  state 'B'
+ *	Controller not actively in use yet.
+ *	At worst the SCLE bit will be set, which won't affect
+ *      anything in this driver since we always run as master.
+ *
+ *  STOP bit set
+ *	This is last byte of a transaction, we have two cases:
+ *      a) Last part of a write UART register transaction.
+ *     	   - Wait for the byte to clock out
+ *      b) Last part of a read UART register transaction.
+ *	   - Wait for the byte to clock in, then preserve IDBR.
+ *
+ * Other cases:
+ * ------------
+ *  state 'I'
+ *      Starting an I2C command (Start or Start-Repeat),
+ *      we have 3 sub-cases of this:
+ *	a) Starting a write UART register transaction:
+ *	   - Wait for the byte to clock out, then transmit a
+ *	     0 byte with STOP bit set. This selects RX/TX
+ *	     UART register without accessing it.
+ *      b) Starting a read UART register transaction:
+ *	   - Same as case a), turn it into a NOP.
+ *	c) Reversing direction during read UART register,
+ *	   probably need to finish the read operation:
+ *	   - Wait for the byte to clock out, send STOP + ACK
+ *	     and wait for the receive to clock in.
+ *
+ *  state 'S'
+ *	Since STOP bit is not set, then this is the <reg>
+ *      index being transfered, two sub-cases:
+ *      a) Sending <reg> of a write UART register.
+ *	   - Wait for the byte to clock out, then transmit a
+ *	     0 byte with the STOP bit set. This inadvertantly
+ *	     and temporarily clears a random UART register,
+ *	     which may result in a null byte transmitted
+ *	     Since there is a retry associated, the intended
+ *	     register value will be written later.
+ *      b) Sending <reg> of a read UART register.
+ *	   - Same as state 'I' case c).
+ *    
+ *  state 'R'
+ *	Should not occur, because communications with the
+ *	UART only have single byte reads, which always is
+ *	accompanied by a STOP bit, and thus is covered by
+ *      the simple case above. If multi-byte reads were to
+ *	be used then we'd have to terminate it:
+ *	- Wait for the byte to clock in, send STOP + ACK
+ *	  and wait for the 2nd byte to clock in.
+ *	  Both bytes received can be discarded, as there
+ *	  is no easy way to pass them to the PXA driver.
+ *
+ * Warning:
+ *  Beyond this being an ugly hack, it is also not re-entrant.
+ *  It can reliably interrupt the console and return it without
+ *  causing too much breakage, but it cannot grab the I2C bus
+ *  from itself due to the use of global variables.
+ *
+ * Warning:
+ *  The synchronization between i2c_grap/i2c_release and the
+ *  PXA driver can still wreck the I2C controller. Cause not
+ *  known, but when it happens the PXA driver ends up repeating
+ *  these log messages:
+ *    i2c: error: pxa_pio_set_master: timeout
+ *    i2c: msg_num: 0 msg_idx: 1 msg_ptr: 0
+ *    i2c: ICR: 000017e0 ISR: 00000044
+ *    i2c: log: [000000c6:000017e0:00:9a] 
+ *    i2c i2c-0: i2c_pxa: timeout waiting for bus free
+ *    pxa_do_pio_xfer: timeout to become master
+ *    pxa_pio_set_master 'B': ISR 00044, ICR 7e0, IDBR 28, IBMR 1
+ *  Looks like the I2C controller gets stuck, ISR: IRF + IBB,
+ *  The code failing is i2c_pxa_pio_set_master(), which points
+ *  to the I2C UART as the culprit. One such case was during
+ *  module load on KnF, where the only activity in the module
+ *  was one ee_lock/ee_release pair, which in state 'B' should
+ *  be straight forward to handle. 
+ */
+
+#ifdef CONFIG_I2C_PXA
+#define PXA_SYNC	1
+#else
+#define PXA_SYNC	0
+#endif
+
+#if PXA_SYNC
+static uint32_t		sv_icr, sv_isr, sv_isar, sv_idbr, ee_term;
+extern char		pxa_state;
+extern atomic_t		pxa_block;
+#endif
+
+static void
+i2c_grab(void)
+{
+  int		uwt, n;
+  uint32_t	icr, isr;
+  char	      * w;
+
+  I2C_PRT("i2c_grab: entry\n");
+  REG_REG("+i2c_grab");
+
+#if PXA_SYNC
+        sv_isar = reg_read(ISAR_OFFSET);
+	sv_idbr = reg_read(IDBR_OFFSET);
+        sv_icr  = reg_read(ICR_OFFSET);
+  isr = sv_isr  = reg_read(ISR_OFFSET);
+  if ((pxa_state == '-' || pxa_state == 'B') && !(isr & ISR_UB)) {
+    REG_REG("-i2c_grab");
+    I2C_PRT("i2c_grab: controller idle, isr %08x\n", isr);
+    return;
+  }
+  ee_term = 1;
+  I2C_PRT("i2c_grab: controller active, pxa %c\n", pxa_state);
+#else
+  isr = reg_read(ISR_OFFSET);
+  if (!(isr & ISR_UB)) {
+    REG_REG("-i2c_grab");
+    I2C_PRT("i2c_grab: controller idle, isr %08x\n", isr);
+    return;
+  }
+  I2C_PRT("i2c_grab: controller active\n");
+  w = "-";
+#endif
+
+  /*
+   * Guess on how long one I2C clock cycle is (in uSec)
+   * Note: ignore High-Speed modes, they are not used.
+   */
+  icr = reg_read(ICR_OFFSET);
+  uwt = (icr & ICR_FAST_MODE) ? 3 : 10;
+
+  /*
+   * Wait here long enough that current byte transaction
+   * on the I2C controller must have clocked all on its bus.
+   * Imperically, we've determined that length of this wait
+   * can to be in range up to a dozen I2C clocks.
+   * We probe state once per I2C clock cycle.
+   */
+  for(n = 0; n < 100 && (isr & ISR_UB); n++) {
+    /*
+     * Controller busy doing something. Whatever it is
+     * doing, it should set either ITE or IRF when done.
+     * Need to check for this independently because UB
+     * is asserted all the way from START thru STOP.
+     */
+    if (isr & (ISR_ITE | ISR_IRF))
+      break;
+    myDELAY(uwt);
+    isr = reg_read(ISR_OFFSET);
+  }
+  I2C_PRT("i2c_grab: ITE/IRF wait %d uSec, isr %02x, UB %d\n",
+  		n * uwt, isr, (isr & ISR_UB) == ISR_UB);
+
+  /*
+   * Controller should have finished current byte transfer by now.
+   * If it was last byte of a transaction, we are done.
+   * In read mode we preserve the received data.
+   */
+  if (icr & ICR_STOP) {
+#if PXA_SYNC
+    if (isr & ISR_RWM)
+      sv_idbr = reg_read(IDBR_OFFSET);
+#endif
+    for(n = 0; n < 100 && (isr & ISR_UB); n++) {
+      myDELAY(uwt);
+      isr = reg_read(ISR_OFFSET);
+    }
+    
+    REG_REG("-i2c_grab");
+    I2C_PRT("i2c_grab: easy case, UB wait %d uSec, bus %sclear, icr %08x, isr %08x\n",
+    		n * uwt, (isr & ISR_UB) ? "NOT " : "", icr, isr);
+    return;
+  }
+
+#if PXA_SYNC
+  w = "?";
+
+  if (pxa_state == 'I') {
+    isr &= ~ISR_INTS;
+    reg_write(ISR_OFFSET, isr);
+
+    if (isr & ISR_RWM) {
+      /*
+       * Sub-case c)
+       * Start byte read and send nak+stop when received.
+       */
+      I2C_PRT("i2c_grab: state 'I', sub-case c\n");
+      icr = (icr & ~ICR_START) | (ICR_STOP | ICR_ACKNAK | ICR_TB);
+      reg_write(ICR_OFFSET, icr);
+      w = "c";
+    }
+    else {
+      /*
+       * Sub-case a) and b)
+       * Send a null byte and stop the transaction.
+       */
+      I2C_PRT("i2c_grab: state 'I', sub-case a & b\n");
+      icr = (icr & ~ICR_START) | (ICR_STOP | ICR_TB);
+      reg_write(IDBR_OFFSET, 0);
+      reg_write(ICR_OFFSET, icr);
+      w = "a & b";
+    }
+
+    myDELAY(8 * uwt);
+    isr = reg_read(ISR_OFFSET);
+    for(n = 0; n < 100 && (isr & ISR_UB); n++) {
+      myDELAY(uwt);
+      isr = reg_read(ISR_OFFSET);
+    }
+    if (*w == 'c')
+      sv_idbr = reg_read(IDBR_OFFSET);
+  }
+
+  if (pxa_state == 'S') {
+    isr &= ~ISR_INTS;
+    reg_write(ISR_OFFSET, isr);
+
+    if (isr & ISR_RWM) {
+      I2C_PRT("i2c_grab: state 'S', sub-case b\n");
+      icr = (icr & ~ICR_START) | (ICR_STOP | ICR_ACKNAK | ICR_TB);
+      reg_write(ICR_OFFSET, icr);
+      w = "b";
+    }
+    else {
+      I2C_PRT("i2c_grab: state 'S', sub-case a\n");
+      icr = (icr & ~ICR_START) | (ICR_STOP | ICR_TB);
+      reg_write(IDBR_OFFSET, 0);
+      reg_write(ICR_OFFSET, icr);
+      w = "a";
+    }
+
+    myDELAY(8 * uwt);
+    isr = reg_read(ISR_OFFSET);
+    for(n = 0; n < 100 && (isr & ISR_UB); n++) {
+      myDELAY(uwt);
+      isr = reg_read(ISR_OFFSET);
+    }
+    if (*w == 'b')
+      sv_idbr = reg_read(IDBR_OFFSET);
+  }
+#endif	/* PXA_SYNC */
+
+  REG_REG("-i2c_grab");
+  I2C_PRT("i2c_grab: controller %sclear, icr %08x, isr %08x, w %s\n",
+  		(isr & ISR_UB) ? "NOT " : "", icr, isr, w);
+}
+
+static void
+i2c_release(void)
+{
+  I2C_PRT("i2c_release: entry\n");
+  REG_REG("+i2c_release");
+
+#if PXA_SYNC
+#if 0
+  /*
+   * Reset I2C controller before returning it to PXA driver
+   *TBD: Usually not necessary, remove?
+   */
+  if (ee_term) {
+    I2C_PRT("i2c_release: resetting bus\n");
+    reg_write(ICR_OFFSET, ICR_UR);
+    myDELAY(2);
+    reg_write(ICR_OFFSET, 0);
+  }
+#endif
+
+  I2C_PRT("i2c_release: restore controller state\n");
+  reg_write(ISR_OFFSET, sv_isr);
+  reg_write(ICR_OFFSET, sv_icr & ~ICR_TB);
+  reg_write(ISAR_OFFSET, sv_isar);
+  reg_write(IDBR_OFFSET, sv_idbr);
+
+  if (ee_term)
+    ee_term = 0;
+#endif	/* PXA_SYNC */
+
+  if (reg_read(IBMR_OFFSET) != 3)
+    I2C_PRT("i2c_release: WARNING: bus active!!!\n");
+
+  REG_REG("-i2c_release");
+  I2C_PRT("i2c_release: exit\n");
+}
+
+
+/*
+ * Layer 3 abstraction: I2C driver API (message passing).
+ *
+ * Controls data transfers to/from devices on the I2C bus.
+ * This is what device drivers should use.
+ *
+ *   xfr_configure	Set target address and speed
+ *   xfr_start		Start R/W operation
+ *   xfr_write		Write buffer to target
+ *   xfr_read		Read buffer from target
+ *   xfr_rept_start	Repeat-start new R/W operation
+ *   xfr_reset		Reset driver
+ */
+
+static int
+xfr_configure(uint8_t addr, int freq)
+{
+  XFR_PRT("xfr_configure: entry, addr %02x, freq %d\n", addr, freq);
+
+  if (freq > FREQ_AUTO || freq <= FREQ_MAX) {
+    XFR_PRT("xfr_configure: exit, invalid freq\n");
+    return -EINVAL;
+  }
+
+  if (addr & 0x80) {
+    XFR_PRT("xfr_configure: exit, invalid addr\n");
+    return -EINVAL;
+  }
+
+  hnd_addr = addr;
+  hnd_freq = freq;
+  XFR_PRT("xfr_configure: done, hnd_addr %02x, hnd_freq %d\n", hnd_addr, hnd_freq);
+  return 0;
+}
+
+
+static int
+xfr_start(int rw)
+{
+  int		err;
+
+  XFR_PRT("xfr_start: entry, rw %d, hnd_addr %02x\n", rw, hnd_addr);
+
+  if (rw != I2C_WRITE && rw != I2C_READ) {
+    XFR_PRT("xfr_start: exit, op invalid\n");
+    return -EINVAL;
+  }
+
+  if (hnd_addr & 0x80) {
+    XFR_PRT("xfr_start: exit, hnd_addr %02x invalid\n", hnd_addr);
+    return -EINVAL;
+  }
+
+  err = i2c_init(hnd_addr);
+  if (err) {
+    XFR_PRT("xfr_start: i2c_init failed, err %d\n", err);
+    i2c_reset();
+    return -EIO;
+  }
+
+  err = i2c_start(rw);
+  if (err)
+    XFR_PRT("xfr_start: i2c_start failed, err %d\n", err);
+  switch(err) {
+    case INCOMPLETE_XFER:
+      i2c_stop();
+      err = -EBUSY;
+      break;
+    case TX_CONTROLLER_ERROR:
+      i2c_reset();
+      err = -ENODEV;
+      break;
+    case TX_NAK:
+      i2c_stop();
+      err = -ENXIO;
+      break;
+  }
+
+  XFR_PRT("xfr_start: done, err %d\n", err);
+  return err;
+}
+
+
+static int
+xfr_rept_start(int rw)
+{
+  int		err;
+
+  XFR_PRT("xfr_rept_start: entry, rw %d, bus_start_op %d\n", rw, bus_start_op);
+
+  if (bus_start_op != I2C_READ && bus_start_op != I2C_WRITE) {
+    XFR_PRT("xfr_rept_start: exit, mode change %d\n", -ENXIO);
+    return -ENXIO;
+  }
+
+  err = i2c_start(rw);
+  if (err)
+    XFR_PRT("xfr_rept_start: i2c_start err %d\n", err);
+  switch(err) {
+    case INCOMPLETE_XFER:
+      i2c_stop();
+      err = -EBUSY;
+      break;
+    case TX_CONTROLLER_ERROR:
+      i2c_reset();
+      err = -ENODEV;
+      break;
+    case TX_NAK:
+      i2c_stop();
+      err = -ENXIO;
+      break;
+  }
+
+  XFR_PRT("xfr_rept_start: done, err %d\n", err);
+  return err;
+}
+
+
+static int
+xfr_write(bool sendStop, int cnt, uint8_t *data)
+{
+  int		retval, i;
+
+  XFR_PRT("xfr_write: entry, sendStop %d, cnt %d\n", sendStop, cnt);
+
+  if (cnt < 0) {
+    XFR_PRT("xfr_write: exit, bad count %d\n", cnt);
+    return -EINVAL;
+  }
+
+  if (! cnt) {
+    XFR_PRT("xfr_write: null write\n");
+    retval = i2c_stop();
+    goto out;
+  }
+
+  if (cnt == 1) {
+    XFR_PRT("xfr_write: 1-byte write, '%02x'\n", *data);
+    retval = i2c_wr_byte(sendStop, *data);
+    goto out;
+  }
+
+  for (i = 0; i < cnt - 1; i++) {
+    XFR_PRT("xfr_write: multi-byte write %d, '%02x'\n", i, data[i]);
+    retval = i2c_wr_byte(FALSE, data[i]);
+    if (retval)
+      goto out;
+  }
+
+  XFR_PRT("xfr_write: last of multi-byte write %d, '%02x'\n", cnt - 1, data[cnt - 1]);
+  retval = i2c_wr_byte(sendStop, data[cnt - 1]);
+
+out:
+  if (retval)
+    XFR_PRT("xfr_write: post val %d\n", retval);
+  switch(retval) {
+    case INCOMPLETE_XFER:
+    i2c_stop();
+    retval = -EBUSY;
+    break;
+  case TX_CONTROLLER_ERROR:
+    i2c_reset();
+    retval = -ENODEV;
+    break;
+  case TX_NAK:
+    i2c_stop();
+    retval = -ENXIO;
+    break;
+  }
+
+  XFR_PRT("xfr_write: done, val %d\n", retval);
+  return retval;
+}
+
+
+static int
+xfr_read(bool sendStop, int cnt, uint8_t *data)
+{
+  int		retval, i;
+
+  XFR_PRT("xfr_read: entry, stop %d, cnt %d\n", sendStop, cnt);
+
+  if (cnt < 0) {
+    XFR_PRT("xfr_read: exit, bad count %d\n", cnt);
+    return -EINVAL;
+  }
+
+  if (! cnt) {
+    XFR_PRT("xfr_read: null read\n");
+    retval = i2c_stop();
+    goto out;
+  }
+
+  if (cnt == 1) {
+    XFR_PRT("xfr_read: 1-byte read\n");
+    retval = i2c_rd_byte(sendStop, data);
+    goto out;
+  }
+ 
+  for (i = 0; i < cnt - 1; i++) {
+    XFR_PRT("xfr_read: multi-byte read %d\n", i);
+    retval = i2c_rd_byte(FALSE, data ? &data[i] : data);
+    if (retval)
+      goto out;
+  }
+
+  XFR_PRT("xfr_read: last of multi-byte read %d\n", cnt - 1);
+  retval = i2c_rd_byte(sendStop, data ? &data[cnt - 1] : data);
+
+out:
+  if (retval) {
+    XFR_PRT("xfr_read: post val %d\n", retval);
+    i2c_reset();
+    retval = -ENXIO;
+  }
+
+  XFR_PRT("xfr_read: done, err %d\n", retval);
+  return retval;
+}
+
+
+#if NOT_YET
+static void
+xfr_reset(void)
+{
+  i2c_reset();
+}
+#endif
+
+
+
+/*
+**
+** UART support for printing from exception context.
+** A somewhat crude implementation of two low level
+** routines that write/read CSRs on the I2C UART.
+** On top of these two functions, a set of mid-layer
+** routines adds init/exit and character based I/O.
+** We try not to alter the UART's transmission setup
+** in order lower the risk of corrupting normal use.
+**
+** All UART support routines assume I2C controller
+** to be initialized by xfr_configure() and expects
+** exclusive access to the device
+**
+*/
+
+
+/*
+ * Weird way to say that the I2C UART has slave address
+ * 0x4D (or 0x48) and the UART registers are in bits
+ * [6:3] of the register address byte.
+ * KnF has both I2C UART address pins wired to Vss.
+ * KnC MPI has the address pins wired to Vdd instead.
+ *TBD: That's according to the schematics, in reality
+ *     on A0 CRBs the address of the onboard UART is
+ *     0x4D, which matches address pins wired to Vss.
+ *     Not sure why that changed.
+ */
+
+#ifdef CONFIG_ML1OM
+#define SC16IS_ADDR_0	1
+#define SC16IS_ADDR_1	1
+#endif
+#ifdef CONFIG_MK1OM	/* KAA: MPI specific or KnC specific ? */
+#define SC16IS_ADDR_0	1
+#define SC16IS_ADDR_1	1
+#endif
+#define SC16IS_ADDR(a1, a0) \
+		(0x40 | (((a1 + 8) + (a1 * 3))  | a0))
+#define SC16IS_SUBADDR(addr, ch) \
+		((addr & 0xf) << 3) | ((ch  & 3) << 1)
+
+
+static uint8_t
+cons_getreg(int reg)
+{
+  uint8_t	sub, val;
+  int		err;
+
+  CON_PRT("cons_getreg: reg %02x\n", reg);
+
+  /*
+   * The SC16IS740 device reads 8-bit UART registers
+   * by first writing the register index and then in
+   * an subsequent read operation gets the register
+   * value. The two operations can (and probably
+   * should) be joined by a repeated start to save
+   * the intermediate stop signaling.
+   */
+  val = 0;
+  sub = (uint8_t) SC16IS_SUBADDR(reg, 0);
+  err = xfr_start(I2C_WRITE);
+  if (err) {
+    CON_PRT("cons_getreg: xfr_start (WR) err %d\n", err);
+    return 0;
+  }
+  err = xfr_write(FALSE, 1, &sub);
+  if (err) {
+    CON_PRT("cons_getreg: xfr_write (%02x) err %d\n", sub, err);
+    return 0;
+  }
+  err = xfr_rept_start(I2C_READ);
+  if (err) {
+    CON_PRT("cons_getreg: xfr_rept_start (RD) err %d\n", err);
+    return 0;
+  }
+  err = xfr_read(TRUE, 1, &val);
+  if (err) {
+    CON_PRT("cons_getreg: xfr_read err %d\n", err);
+    return 0;
+  }
+
+  CON_PRT("cons_getreg: reg %02x, val %02x\n", reg, val);
+  return val;
+}
+
+
+static void
+cons_setreg(int reg, int val)
+{
+  uint8_t	payload[2];
+  int		err;
+
+  CON_PRT("cons_setreg: reg %02x, val %02x\n", reg, val);
+
+  payload[0] = (uint8_t) SC16IS_SUBADDR(reg, 0);
+  payload[1] = (uint8_t) val;
+  CON_PRT("cons_setreg: I2C payload %02x, %02x\n", payload[0], payload[1]);
+  err = xfr_start(I2C_WRITE);
+  if (err) {
+    CON_PRT("cons_setreg: xfr_start (WR) err %d\n", err);
+    return;
+  }
+  err = xfr_write(TRUE, 2, payload);
+  if (err)
+    CON_PRT("cons_getreg: xfr_write (%02x, %02x) err %d\n", payload[0], payload[1], err);
+}
+
+
+static void
+cons_init(void)
+{
+  /*
+   * For now assume that the kernel LXA driver or the
+   * bootstrap code has setup the I2C uart properly, i.e.
+   * we don't need to alter speed/databits/stopbits/parity
+   * or any other serial properties.
+   *
+   *WARNING: Since the switch of console from the I2C uart to
+   *	     the virtual console, the uart is left with default
+   *	     serial port speed of 9600 baud. Bootstrap blasts
+   *         it's messages at 115200 baud, so now the choice
+   *         of getting garbage from this routine or from the
+   *         bootstrap. Using program stty from userspace may
+   *	     set any baudrate, we cannot override it here!
+   *	      # stty 115200 < /dev/ttyS0
+   *TBD:     make 115200 baud default on I2C uart!
+   */
+  CON_PRT("cons_init: pass\n");
+}
+
+
+static void
+cons_exit(void)
+{
+  CON_PRT("cons_exit: pass\n");
+}
+
+
+#if NOT_YET
+static int
+cons_rxrdy(void)
+{
+  int		val;
+
+  CON_PRT("cons_rxrdy: check console RxRdy\n");
+
+  val = (cons_getreg(UART_LSR) & UART_LSR_DR) ? 1 : 0;
+
+  CON_PRT("cons_rxrdy: RxRdy %d\n", val);
+  return val;
+}
+
+
+static int
+cons_getc(void)
+{
+  int		c;
+
+  CON_PRT("cons_getc: rd from console\n");
+
+  while((cons_getreg(UART_LSR) & UART_LSR_DR) == 0)
+    myDELAY(1000);
+  c = cons_getreg(UART_RX);
+
+  CON_PRT("cons_getc: read '%02x'\n", c);
+  return c;
+}
+#endif
+
+
+static void
+cons_putc(int c)
+{
+  int		limit;
+
+  CON_PRT("cons_putc: wr '%02x' to console\n", c);
+
+  limit = 10;
+  while((cons_getreg(UART_LSR) & UART_LSR_THRE) == 0 && --limit) ;
+  CON_PRT("cons_putc: THRE ready, limit %d\n", limit);
+  cons_setreg(UART_TX, c);
+
+#if 0
+  /*
+   * No reason to wait for it to clock out
+   */
+  limit = 10;
+  while((cons_getreg(UART_LSR) & UART_LSR_TEMT) == 0 && --limit) ;
+  CON_PRT("cons_putc: TEMT ready, limit %d\n", limit);
+#endif
+
+  CON_PRT("cons_putc: done printing '%02x'\n", c);
+}
+
+
+/*
+ * Simple exclusive access method for the 'OverClock' I2C bus.
+ * The POST-card UART is the only known other party using this
+ * bus under normal circumstances (because it is the console).
+ * If the POST-card UART is built into the kernel, the lock is
+ * in file 'drivers/serial/8250_sc16is7xx.c'. Otherwise the lock
+ * is local to the RAS module.
+ *
+ * Warning:
+ *  This locking works perfectly in standard contexts and in
+ *  the MCA handling contexts. However, they do not mix safely.
+ *  If the ee_lock is taken from standard context, then an
+ *  MCA event may hang because it cannot get the lock, ever!
+ *  This can happen when/if ee_print() is used.
+ */
+
+#ifdef CONFIG_I2C_PXA
+extern atomic_t pxa_block;
+extern char	pxa_state;
+#else
+atomic_t pxa_block = ATOMIC_INIT(0);
+char	 pxa_state = '-';
+#endif
+
+static void
+ee_lock(void)
+{
+  /*
+   * Wait here until lock ackquired
+   */
+  while(atomic_xchg(&pxa_block, 1))
+    myDELAY(50);
+
+  /*
+   * Lock taken, I2C transaction could be underway.
+   * Wait for it to end or forcefully terminate it.
+   */
+  i2c_grab();
+}
+
+static void
+ee_unlock(void)
+{
+  i2c_release();
+  atomic_xchg(&pxa_block, 0);
+}
+
+
+/*
+ * Printf to the POST card UART.
+ *
+ * Function ee_printk() and ee_print() both creates
+ * a message into a local buffer from where the RAS
+ * timer will synch them into the kernel log about
+ * once a second. ee_printk() is thread safe.
+ *
+ * Function ee_print() will also attempt to write to
+ * the POST card serial port, which may be useful
+ * from exception context where OS services are out
+ * of the question.
+ *
+ * WARNING: ee_print() takes the same lock as
+ * the machine checks does, so if a machine check
+ * happens while a standard context thread are in
+ * this code we'll have an instant kernel hang.
+ */
+
+char ee_buf[EE_BUF_COUNT * EE_BUF_LINELEN];
+atomic_t ee_msg = ATOMIC_INIT(-1);
+atomic_t ee_seen = ATOMIC_INIT(-1);
+int  ee_rdy;
+
+#define EE_TSC		0	/* 1 to get rdtsc() included */
+
+char *
+ee_fmt(char * fmt, va_list args)
+{
+  char	      * buf;
+  int		msg_id, tsl;
+#if EE_TSC
+  uint64_t	ts = rdtsc();
+#endif
+
+  msg_id = atomic_inc_return(&ee_msg);
+  buf = ee_buf + (msg_id % EE_BUF_COUNT) * EE_BUF_LINELEN;
+  if (! *buf) {
+#if EE_TSC
+    tsl = snprintf(buf, EE_BUF_LINELEN - 1, "[%lld] ", ts);
+#else
+    tsl = 0;
+#endif
+    vsnprintf(buf + tsl, EE_BUF_LINELEN - 1 - tsl, fmt, args);
+    return buf;
+  }
+  return 0;
+}
+
+int
+ee_printk(char * fmt, ...)
+{
+  va_list       args;
+  char	      * buf;
+
+  va_start(args, fmt);
+  buf = ee_fmt(fmt, args);
+  va_end(args);
+
+  return buf ? strlen(buf) : 0;
+}
+
+int
+ee_print(char * fmt, ...)
+{
+  char 		ch, * buf;
+  va_list	args;
+  int		len;
+
+  va_start(args, fmt);
+  buf = ee_fmt(fmt, args);
+  va_end(args);
+
+  len = 0;
+  if (ee_rdy && buf) {
+    /*
+     * Get I2C bus exclusive access,
+     * setup for targeting the UART and
+     * send string one byte at a time
+     * with lf -> lr/cr translation.
+     */
+    ee_lock();
+    xfr_configure(SC16IS_ADDR(SC16IS_ADDR_1, SC16IS_ADDR_0), FREQ_AUTO);
+    while((ch = *(buf++))) {
+      if (ch == '\n') {
+        cons_putc('\r');
+        len++;
+      }
+      cons_putc(ch);
+      len++;
+    }
+    ee_unlock();
+  }
+
+  return len;
+}
+EXPORT_SYMBOL_GPL(ee_print);
+
+
+
+/*
+**
+** EEPROM support routines
+**
+** The device is a 1 Mbit Atmel AT24C1024 which has 128
+** KByte addressable storage over 2 slave addresses.
+** Lower 64 KB is at slave address 0x54 and upper
+** 64KB is at slave address 0x55, i.e. it uses LSB of
+** the slave address as bit 16 of the byte address.
+**
+** All EEPROM support routines assume I2C controller
+** to be initialized by xfr_configure() and expects
+** exclusive access to the device
+**
+** Only KnC has this storage
+*/
+
+#ifdef CONFIG_MK1OM
+
+#define MR_ELOG_SIZE		(128 * 1024)	/* 1 Mbit */
+#define MR_ELOG_ADDR_LO		0x54   		/* Lo 64K slave */
+#define MR_ELOG_ADDR_HI		0x55   		/* Hi 64K slave */
+#define EE_PG_SIZ		256		/* Device page size */
+
+
+/*
+ * Layout of the EEPROM is roughly like this:
+ *
+ *  Bytes	Content
+ *   0 - 15	Fixed log header
+ *  16 - 17	Log head index (last written)
+ *  18 - 19	Log tail index (last read)
+ *  20 - end	Log entries
+ *
+ * By definition, the log is fully read when head and
+ * tail pointer are equal (initial value: last entry).
+ * The effective log size is
+ *  (device_size - sizeof(McaHeader))/sizeof(McaRecord).
+ *
+ * Fields of interest in the log entry 'id' are
+ *  bits  7:0		Source index, 8 bit
+ *  bits 18:16		Source type, 3 bit
+ *  bits 22:22		Injected error flag
+ *  bits 23:23		Repaired flag
+ *  bits 24:24		Filtered flag
+ *  bits 31:31		Valid flag
+ *
+ * Enumeration details are in file micras_mca.h
+ *
+ * Time stamps in the MCA header and event records are supposed to be
+ * standard 32-bit Unix format, i.e.  seconds since 00:00 Jan 1 1979 GMT.
+ * This will wrap some time Jan 19th 2038, which is about 25 years from
+ * the release of KnC. Given the use of 386's (introduced 1985) in the
+ * modern data center anno '12, 32 bit will last for all practical purposes.
+ */
+
+typedef struct _mca_header {
+  uint8_t	signature[8];	/* Magic */
+  uint8_t	header_ver;	/* Format revision */
+  uint8_t	rec_start;	/* Offset of 1st record */
+  uint16_t	rec_size;	/* Size of an MCA record */
+  uint16_t	entries;	/* Log size */
+  uint8_t	logfull;	/* Log has wrapped (reserved) */
+  uint8_t	hwtype;		/* Board type (reserved) */
+  uint16_t	rec_head;	/* Head index */
+  uint16_t	rec_tail;	/* Tail index */
+} McaHeader;
+
+typedef struct _mca_record {
+  uint32_t	id;		/* Event origin & flags */
+  uint32_t	stamp;		/* Low 32 bit of system time */
+  uint64_t	ctl;            /* MCA bank register 'CTL' */
+  uint64_t	status;         /* MCA bank register 'STATUS' */
+  uint64_t	addr;           /* MCA bank register 'ADDR' */
+  uint64_t	misc;           /* MCA bank register 'MISC' */
+} McaRecord;
+
+
+/*
+ * Header to drop onto un-initalized EEPROM
+ * By definition, the EEPROM is uninitialised
+ * if the magic signature is wrong.
+ */
+
+#define MR_ELOG_NUM	(MR_ELOG_SIZE - sizeof(McaHeader))/sizeof(McaRecord)
+
+static McaHeader elog_preset = {
+  .signature = {"MCA_LOG"},
+  .header_ver = 1,
+  .rec_start = sizeof(McaHeader),
+  .rec_size = sizeof(McaRecord),
+  .entries = MR_ELOG_NUM,
+  .logfull = -1,
+  .hwtype = 0,
+  .rec_head = MR_ELOG_NUM - 1,
+  .rec_tail = MR_ELOG_NUM - 1,
+};
+
+static uint16_t	ee_num, ee_head, ee_tail;	/* Cached log state */
+
+
+#if EPR_DBG || EE_VERIFY
+/*
+ * Printk from EEPROM code.
+ * We have the lock, and the I2C target address is
+ * set for the Atmel device, we must reset I2C for
+ * the UART on every entry, and reset it back to the
+ * EEPROM in order to keep this function transparent.
+ *
+ * Warning: this call is highly risky, particularly
+ * in error conditions where the I2C bus is involved.
+ * Do not call it during an EEPROM I2C transaction!!
+ * Use for internal debug _ONLY_ and at own risk.
+ */
+
+int
+elog_print(char * fmt, ...)
+{
+  char	      * buf, ch;
+  va_list	args;
+  int		len;
+
+  va_start(args, fmt);
+  buf = ee_fmt(fmt, args);
+  va_end(args);
+
+  if (! buf)
+    return 0;
+
+  xfr_configure(SC16IS_ADDR(SC16IS_ADDR_1, SC16IS_ADDR_0), FREQ_AUTO);
+
+  len = 0;
+  while((ch = *(buf++))) {
+    if (ch == '\n') {
+      cons_putc('\r');
+      len++;
+    }
+    cons_putc(ch);
+    len++;
+  }
+
+  return len;
+}
+#endif /* EPR_DBG */
+
+
+/*
+ * Write block of data to EEPROM
+ * The Atmel device does not allow writes to cross the
+ * internal page size, which is 256 bytes on the 1 Mbit part.
+ * Given the size of an McaRecord this is likely to occur, but
+ * cannot happen more than once per call.
+ * Must preset slave address on every call.
+ */
+
+static void
+ee_wr(uint8_t addr, uint16_t ofs, uint8_t *buf, uint8_t len)
+{
+  uint16_t 	pix, swp;
+  uint8_t	wl;
+  int		err;
+
+  if (mce_disabled)
+    return;
+
+  if ((ofs + len) < ofs) {
+    EPR_PRT("ee_wr: address overrun\n");
+    return;
+  }
+
+  xfr_configure(addr, FREQ_AUTO);
+
+  pix = ofs & (EE_PG_SIZ - 1);
+  while(len) {
+    wl = (uint8_t) min((uint16_t)len, (uint16_t)(EE_PG_SIZ - pix));
+
+    err = xfr_start(I2C_WRITE);
+    if (err) {
+      EPR_PRT("ee_wr: xfr_start (WR) err %d\n", err);
+      return;
+    }
+  
+    /*
+     * Byte swap, send Most significant byte first
+     */
+    swp = (ofs >> 8) | (ofs << 8);
+    err = xfr_write(FALSE, 2, (uint8_t *) &swp);
+    if (err) {
+      EPR_PRT("ee_wr: xfr_write offset (%02x, %02x) err %d\n", ofs >> 8, ofs & 0xff, err);
+      return;
+    }
+
+    /*
+     * Write payload to device
+     */
+    err = xfr_write(TRUE, wl, buf);
+    if (err) {
+      EPR_PRT("ee_wr: xfr_write %d bytes (%02x, %02x ..) err %d\n", wl, buf[0], buf[1], err);
+      return;
+    }
+    ofs += wl;
+    buf += wl;
+    len -= wl;
+    pix = 0;
+
+    /*
+     * Data sheet says wait 5 mSec before next
+     * transaction to the device after a write.
+     */
+    myDELAY(5000);
+  }
+}
+
+
+/*
+ * Read block of data from EEPROM
+ * Must preset slave address on every call.
+ */
+
+static void
+ee_rd(uint8_t addr, uint16_t ofs, uint8_t *buf, uint8_t len)
+{
+  uint16_t	swp;
+  int		err;
+
+  if ((ofs + len) < ofs) {
+    EPR_PRT("ee_rd: address overrun\n");
+    return;
+  }
+
+  xfr_configure(addr, FREQ_AUTO);
+
+  err = xfr_start(I2C_WRITE);
+  if (err) {
+    EPR_PRT("ee_rd: xfr_start (WR) err %d\n", err);
+    return;
+  }
+
+  /*
+   * Byte swap, send Most significant byte first
+   */
+  swp = (ofs >> 8) | (ofs << 8);
+  err = xfr_write(FALSE, 2, (uint8_t *) &swp);
+  if (err) {
+    EPR_PRT("ee_rd: xfr_write (%02x, %02x) err %d\n", ofs >> 8, ofs & 0xff, err);
+    return;
+  }
+
+  /*
+   * Change bus direction and read payload
+   */
+  err = xfr_rept_start(I2C_READ);
+  if (err) {
+    EPR_PRT("ee_rd: xfr_rept_start (RD) err %d\n", err);
+    return;
+  }
+  err = xfr_read(TRUE, len, buf);
+  if (err) {
+    EPR_PRT("ee_rd: xfr_read err %d\n", err);
+    return;
+  }
+}
+
+
+/*
+ * Read one MCA event record from EEPROM
+ * Handles crossing device addresses.
+ */
+
+static void
+ee_get(McaRecord * rec, int no)
+{
+  uint32_t	pos, mid, low;
+
+  mid = MR_ELOG_SIZE / 2;
+  memset(rec, '\0', sizeof(*rec));
+  pos = sizeof(McaHeader) + no * sizeof(McaRecord);
+  if (pos < (mid - sizeof(McaRecord))) {
+    /*
+     * Record fit entirely in lower half of EEPROM
+     */
+    ee_rd(MR_ELOG_ADDR_LO, pos, (uint8_t *) rec, sizeof(*rec));
+  }
+  else
+  if (pos > mid) {
+    /*
+     * Record fit entirely in upper half of EEPROM
+     */
+    ee_rd(MR_ELOG_ADDR_HI, pos - mid, (uint8_t *) rec, sizeof(*rec));
+  }
+  else {
+    /*
+     * Record spans both halves, need 2 reads.
+     */
+    low = mid - pos;
+    ee_rd(MR_ELOG_ADDR_LO, pos, (uint8_t *) rec, low);
+    ee_rd(MR_ELOG_ADDR_HI, 0, ((uint8_t *) rec) + low, sizeof(*rec) - low);
+  }
+}
+
+
+/*
+ * Write one MCA event record to EEPROM
+ * Handles crossing device addresses.
+ */
+
+static void
+ee_put(McaRecord * rec, int no)
+{
+  uint32_t	loc, mid, low;
+
+  mid = MR_ELOG_SIZE / 2;
+  loc = sizeof(McaHeader) + no * sizeof(McaRecord);
+  if (loc < (mid - sizeof(McaRecord))) {
+    /*
+     * Record fit entirely in lower half of EEPROM
+     */
+    ee_wr(MR_ELOG_ADDR_LO, loc, (uint8_t *) rec, sizeof(*rec));
+  }
+  else
+  if (loc > mid) {
+    /*
+     * Record fit entirely in upper half of EEPROM
+     */
+    ee_wr(MR_ELOG_ADDR_HI, loc - mid, (uint8_t *) rec, sizeof(*rec));
+  }
+  else {
+    /*
+     * Record spans both halves, need 2 writes.
+     */
+    low = mid - loc;
+    ee_wr(MR_ELOG_ADDR_LO, loc, (uint8_t *) rec, low);
+    ee_wr(MR_ELOG_ADDR_HI, 0, ((uint8_t *) rec) + low, sizeof(*rec) - low);
+  }
+}
+
+
+/*
+ * Add one MCA event to the EEPROM
+ * Store the passed event info in the EEPROM, and update write
+ * position to next entry, just in case if there are more than
+ * one MC event detected that needs checking in maintenance mode.
+ *
+ * This can be called in exception context, and therefore must
+ * work without any kernel support whatsoever. We must assume
+ * kernel services are not reliable at this point.
+ */
+
+void
+micras_mc_log(struct mce_info * event)
+{
+  McaRecord	mr;
+  uint16_t	nxt, id;
+
+  if (mce_disabled)
+    return;
+
+  /*
+   * Print entry on serial console (copy in kernel log)
+   */
+#if MC_VERBOSE
+  ee_printk("RAS.elog: bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n",
+	event->org, event->id, event->ctl, event->status, event->addr, event->misc);
+#endif
+
+  /*
+   * Bail if EEPROM not in order (I2C lock-up or faulty device)
+   */
+  if (! ee_num)
+    return;
+
+  /*
+   * Prepare MCA error log record.
+   * We use the pysical CPU ID in the EEPROM records.
+   */
+  id = (event->org <= 2) ? event->pid : event->id;
+  mr.id = PUT_BITS( 7,  0, id) |
+          PUT_BITS(18, 16, event->org) |
+          PUT_BIT(22, (event->flags & MC_FLG_FALSE) != 0) |
+          PUT_BIT(24, (event->flags & MC_FLG_FILTER) != 0) |
+	  PUT_BIT(31, 1);
+  mr.stamp  = (uint32_t) event->stamp;
+  mr.ctl    = event->ctl;
+  mr.status = event->status;
+  mr.addr   = event->addr;
+  mr.misc   = event->misc;
+
+#if ADD_DIE_TEMP
+  {
+    uint32_t	tmp;
+    tmp = mr_sbox_rl(0, SBOX_THERMAL_STATUS_2);
+    mr.id |= PUT_BITS(15, 8, GET_BITS(19, 10, tmp));
+  }
+#endif
+
+  /*
+   * Get I2C bus exclusive access
+   */
+  ee_lock();
+
+#if EE_VERIFY
+  {
+    /*
+     * Check for header corruption.
+     * Time sink, only enable for debugging
+     */
+    extern int in_sync;
+    McaHeader	hdr;
+
+    ee_rd(MR_ELOG_ADDR_LO, 0, (uint8_t *) &hdr, sizeof(hdr));
+    if (memcmp(hdr.signature, elog_preset.signature,
+			sizeof(elog_preset.signature))) {
+      if (in_sync) {
+        printk("mc_log: Header corruption detected\n");
+        dmp_hex(&hdr, sizeof(hdr), "mc_log: EEPROM header (entry)");
+      }
+      else {
+	elog_print("mc_log: Header corruption detected (entry)\n");
+        elog_print("EEPROM header: signature bad, ver %d, type %d\n",
+		 hdr.header_ver, hdr.hwtype);
+        elog_print("EEPROM capacity: %d events, size %d, start %d\n",
+		hdr.entries, hdr.rec_size, hdr.rec_start);
+        elog_print("EEPROM state: head %d, tail %d, full %d\n",
+		hdr.rec_head, hdr.rec_tail, hdr.logfull);
+      }
+    }
+  }
+#endif
+
+  nxt = (ee_head + 1) % ee_num;
+  if (nxt == ee_tail) {
+    ee_printk("RAS.elog: EEPROM full, dropping event\n");
+    ee_unlock();
+    return;
+  }
+  ee_put(&mr, nxt);
+
+#if EE_VERIFY
+  {
+    /*
+     * Read back and verify with memory buffer
+     * Note: only works on 1st half of device.
+     * Time sink, only enable for debugging
+     */
+    McaRecord  tst;
+
+    ee_rd(MR_ELOG_ADDR_LO, loc, (uint8_t *) &tst, sizeof(tst));
+    if (memcmp(&mr, &tst, sizeof(tst)))
+      elog_print("Write event verify failed\n");
+    else
+      elog_print("Write event verify OK\n");
+  }
+#endif
+
+  /*
+   * Update head pointer in EEPROM header
+   */
+  ee_wr(MR_ELOG_ADDR_LO, offsetof(McaHeader, rec_head), (uint8_t *) &nxt, sizeof(nxt));
+  ee_head = nxt;
+
+#if EE_VERIFY
+  {
+    /*
+     * Read back and verify with memory buffer
+     * Time sink, only enable for debugging
+     */
+    uint16_t tst;
+
+    ee_rd(MR_ELOG_ADDR_LO, 16, (uint8_t *) &tst, 2);
+    if (tst != nxt)
+      elog_print("Write index verify failed\n");
+    else
+      elog_print("Write index verify OK\n");
+  }
+
+  {
+    /*
+     * Check again for header corruption
+     * Time sink, only enable for debugging
+     */
+    extern int in_sync;
+    McaHeader	hdr;
+
+    ee_rd(MR_ELOG_ADDR_LO, 0, (uint8_t *) &hdr, sizeof(hdr));
+    if (memcmp(hdr.signature, elog_preset.signature,
+			sizeof(elog_preset.signature))) {
+      if (in_sync) {
+        printk("mc_log: Header corruption detected (exit)\n");
+        dmp_hex(&hdr, sizeof(hdr), "mc_log: EEPROM header");
+      }
+      else {
+	elog_print("mc_log: Header corruption detected (exit)\n");
+        elog_print("EEPROM header: signature bad, ver %d, type %d\n",
+		 hdr.header_ver, hdr.hwtype);
+        elog_print("EEPROM capacity: %d events, size %d, start %d\n",
+		hdr.entries, hdr.rec_size, hdr.rec_start);
+        elog_print("EEPROM state: head %d, tail %d, full %d\n",
+		hdr.rec_head, hdr.rec_tail, hdr.logfull);
+      }
+    }
+  }
+#endif
+
+  /*
+   * Release I2C bus exclusive lock
+   */
+  ee_unlock();
+}
+
+
+/*
+ * Reset the EEPROM to mint condition
+ */
+
+#define BSIZ	0xf0
+
+static void
+ee_mint(void)
+{
+  uint8_t	buf[EE_PG_SIZ];
+  McaHeader	hdr;
+  uint32_t	loc, mid;
+  uint16_t	ofs;
+  uint8_t	addr;
+
+
+  if (ee_rdy && ! mce_disabled) {
+    printk("EEPROM erase started ..\n");
+    memset(buf, 0xff, sizeof(buf));
+
+    ee_lock();
+
+    /*
+     * Several cheats in this loop.
+     * - Despite  maximum transfer per write command is 255 (8 bit count),
+     *   we send only half a 'page', i.e. 128 byte, per call to ee_wr().
+     * - Picking exactly half a page, starting page aligned, ensures there
+     *   will be no writes across a page boundary, i.e. ee_wr() will always
+     *   result in exactly one I2C write command per call.
+     * - We know that MR_ELOG_SIZE / (EE_PG_SIZ / 2) is a clean integer,
+     *   and therefore will be no end condition to special case.
+     * - Same will be true for the 'mid-chip' limit where the target
+     *   address is bumped by one.
+     */
+    mid = MR_ELOG_SIZE / 2;
+    for(loc = 0; loc < MR_ELOG_SIZE; loc += (EE_PG_SIZ / 2)) {
+      addr = (loc < mid) ? MR_ELOG_ADDR_LO : MR_ELOG_ADDR_HI;
+      ofs = loc & 0xffff;
+      // printk(" -- loc %5x: addr %2x, offs %4x, len %4x\n", loc, addr, ofs, EE_PG_SIZ / 2);
+      ee_wr(addr, ofs, buf, EE_PG_SIZ / 2);
+    }
+
+    /*
+     * Put in a fresh header
+     */
+    ee_wr(MR_ELOG_ADDR_LO, 0, (uint8_t *) &elog_preset, sizeof(elog_preset));
+    ee_rd(MR_ELOG_ADDR_LO, 0, (uint8_t *) &hdr, sizeof(hdr));
+    printk("EEPROM erase complete\n");
+
+    ee_unlock();
+
+    /*
+     * Verify that the header stuck.
+     * If not, then complain to kernel log and set event capacity to 0
+     */
+    if (memcmp(hdr.signature, elog_preset.signature, sizeof(elog_preset.signature)) ||
+        hdr.header_ver != elog_preset.header_ver ||
+	hdr.rec_start != elog_preset.rec_start ||
+	hdr.rec_size != elog_preset.rec_size ||
+	hdr.hwtype != elog_preset.hwtype) {
+      /*
+       * Write EEPROM header failed.
+       * Leave a message in the kernel log about it.
+       */ 
+      printk("Error: EEPROM initialization failed!\n");
+      printk("MCA events cannot be logged to EEPROM\n");
+      ee_num = 0;
+    }
+    else {
+      ee_num  = hdr.entries;
+      ee_head = hdr.rec_head;
+      ee_tail = hdr.rec_tail;
+      printk("EEPROM ready!\n");
+    }
+
+
+  }
+}
+
+
+#if EE_PROC
+/*
+ * Support for user space access to the EEPROM event log.
+ * Implemented as a 'proc' file named elog, who returns
+ * MCE events on read and on writes of 6 hex values
+ * per line creates new event(s) to be entered.
+ *
+ * Compile time configurable for disabling writes and
+ * choice of whether to dump new events or everything.
+ */
+
+static struct proc_dir_entry * elog_pe;
+
+/*
+ * Write is just a simple file operation.
+ * We do not care about file offset since the specified event is to
+ * be added to the EEPROM at head+1, not at any arbitrary location.
+ */
+
+static ssize_t
+elog_write(struct file * file, const char __user * buff, size_t len, loff_t * off)
+{
+  char 	      * buf;
+  uint16_t	nxt;
+  McaRecord	mr;
+  uint64_t	ull[6];
+  char	      * ep, * cp;
+  int		i, err;
+
+  /*
+   * Get input line into kernel space
+   */
+  if (len > PAGE_SIZE -1)
+    len = PAGE_SIZE -1;
+  buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+  if (! buf)
+    return -ENOMEM;
+  if (copy_from_user(buf, buff, len)) {
+    err = -EFAULT;
+    goto wr_out;
+  }
+  buf[len] = '\0';
+  cp = ep = (char *) buf;
+
+  /*
+   * Special case EEPROM reset option,
+   * first 5 letters form the word 'reset'
+   */
+  if (!strncmp(buf, "reset", 5)) {
+    ee_mint();
+    goto wr_one;
+  }
+
+  /*
+   * Need 6 numbers for an event record
+   */
+  for(i = 0; i < 6; i++) {
+    while(isspace(*cp))
+      cp++;
+    ull[i] = simple_strtoull(cp, &ep, 16);
+    if (ep == cp || (*ep != '\0' && !isspace(*ep))) {
+      err = -EINVAL;
+      goto wr_out;
+    }
+    cp = ep;
+  }
+
+#if 0
+  /*
+   * If we were to screen this the we should ensure that
+   *   id[7:0]    < CPU_MAX on org 0, 1, 2
+   *              < DBOX_NUM on org 3
+   *		  == 0 on org 4
+   *		  < GBOX_NUM on org 5
+   *		  < TBOX_NUM on org 6
+   *   id[18:16]  <= 6
+   *   id[23]     == 0
+   *   id[31]     == 1
+   */
+#endif
+
+  if (ee_num) {
+    mr.id     = (uint32_t) ull[0];
+    mr.stamp  = (uint32_t) ull[1];
+    mr.ctl    = ull[2];
+    mr.status = ull[3];
+    mr.addr   = ull[4];
+    mr.misc   = ull[5];
+   
+    /*
+     * Add event record under I2C bus exclusive access
+     */
+    ee_lock();
+    nxt = (ee_head + 1) % ee_num;
+    ee_put(&mr, nxt);
+    ee_wr(MR_ELOG_ADDR_LO, offsetof(McaHeader, rec_head), (uint8_t *) &nxt, sizeof(nxt));
+    ee_head = nxt;
+    ee_unlock();
+  }
+
+  /*
+   * Swallow any trailing junk up to next newline
+   */
+wr_one:
+  ep = strchr(buf, '\n');
+  if (ep)
+   cp = ep + 1;
+  err = cp - buf;
+
+wr_out:
+  kfree(buf);
+  return err;
+}
+
+
+/*
+ * Use the sequencer to read one event at a time,
+ * in order of occurrence in the EEPROM. Sequence
+ * position is event index in range 0 .. ee_num,
+ * which will be offset by (ee_tail + 1) modulo
+ * ee_num if EE_PROC_NEW flag is set.
+ */
+
+static int elog_eof;		/* Elog end-of-file marker */
+
+static int
+elog_seq_show(struct seq_file * f, void * v)
+{
+  McaRecord	mr;
+  int		pos, nxt;
+  static int	inv;
+
+  pos = *(loff_t *) v;
+
+  /*
+   * Print nice header on 1st read from /proc/elog
+   */
+  if (! pos) {
+    extern struct mr_rsp_hwinf hwinf;
+    struct mr_rsp_hwinf * r = &hwinf;
+
+    inv = 0;
+    seq_printf(f, "Card %c%c%c%c%c%c%c%c%c%c%c%c: "
+    		  "brd %d, fab %d, sku %d, rev %d, stp %d, sub %d\n",
+	r->serial[0],  r->serial[1],  r->serial[2],  r->serial[3],
+	r->serial[4],  r->serial[5],  r->serial[6],  r->serial[7],
+	r->serial[8],  r->serial[9],  r->serial[10], r->serial[11],
+	r->board, r->fab, r->sku, r->rev, r->step, r->substep);
+    if (ee_num) {
+      seq_printf(f, "Head %d, tail %d, cap %d\n", ee_head, ee_tail, ee_num);
+      seq_printf(f, "%5s %8s %12s %8s %16s %16s %16s %16s\n",
+		   "index", "id", "id decode", "time", "ctrl", "status", "addr", "misc");
+    }
+    else
+      seq_printf(f, "Error: EEPROM not initialized\n");
+  }
+
+  /*
+   * Set EOF and quit if EEPROM not accessible
+   */
+  if (! ee_num) {
+    elog_eof = 1;
+    return 0;
+  }
+
+  /*
+   * Get event under I2C bus exclusive access
+   */
+#if EE_PROC_NEW
+  nxt = (pos + ee_tail + 1) % ee_num;
+#else
+  nxt = pos;
+#endif
+  ee_lock();
+  ee_get(&mr, nxt);
+  ee_unlock();
+
+#if ! EE_PROC_NEW
+  /*
+   * We refuse to print invalid entries. 
+   * However, a freshly reset EEPROM contains all 1s and
+   * therefore we won't rely on the valid-bit alone.
+   * Instead rely on the unused areas of 'id' to be 0s.
+   * Probably need to stop sequencer once a bad entry is
+   * seen because in all likelihood we've reached the
+   * log end and reading the remainder of the EEPROM will
+   * just be waste of time.
+   */
+  if (GET_BITS(30, 25, mr.id) == 0x3f &&
+      GET_BITS(21, 19, mr.id) == 0x07 &&
+      GET_BITS(15,  8, mr.id) == 0xff) {
+    if (inv++ > 10)
+      elog_eof = 1;
+    return 0;
+  }
+#endif
+
+  seq_printf(f, "%5d %08x [%d %3d %c%c%c%c] %08x %016llx %016llx %016llx %016llx\n",
+    		nxt, mr.id,
+		GET_BITS(18,16,mr.id),
+	 	GET_BITS(7,0,mr.id),
+		GET_BIT(22,mr.id) ? 'I' : ' ',
+		GET_BIT(23,mr.id) ? 'R' : ' ',
+		GET_BIT(24,mr.id) ? 'F' : ' ',
+		GET_BIT(31,mr.id) ? 'V' : ' ',
+		mr.stamp, mr.ctl, mr.status, mr.addr, mr.misc);
+
+  return 0;
+}
+
+static void *
+elog_seq_start(struct seq_file * f, loff_t * pos)
+{
+  if (ee_num) {
+    if (*pos >= ee_num)
+      return NULL;
+#if EE_PROC_NEW
+    /*
+     * Skip checks if we are dumping full log
+     */
+    if (ee_head == ee_tail)
+      return NULL;
+    if (*pos && ((*pos + ee_tail) % ee_num) == ee_head)
+      return NULL;
+#endif
+  }
+
+  elog_eof = 0;
+
+  return pos;
+}
+
+static void *
+elog_seq_next(struct seq_file * f, void * v, loff_t * pos)
+{
+  if (elog_eof)
+    return NULL;
+
+  (*pos)++;
+  if (*pos >= ee_num)
+    return NULL;
+
+#if EE_PROC_NEW
+  /*
+   * No wrap checks if we are dumping full log
+   */
+  {
+    int		nxt;
+
+    nxt = ((*pos) + ee_tail) % ee_num;
+    if (nxt == ee_head)
+      return NULL;
+  }
+#endif
+
+  return pos;
+}
+
+static void
+elog_seq_stop(struct seq_file * f, void * v)
+{
+}
+
+static const struct seq_operations elog_seq_ops = {
+  .start = elog_seq_start,
+  .next  = elog_seq_next,
+  .stop  = elog_seq_stop,
+  .show  = elog_seq_show,
+};
+
+static int
+elog_open(struct inode *inode, struct file *filp)
+{
+  return seq_open(filp, &elog_seq_ops);
+}
+
+static struct file_operations proc_elog_operations = {
+  .open           = elog_open,
+  .read           = seq_read,
+  .llseek         = seq_lseek,
+  .release        = seq_release,
+  .write	  = elog_write,
+};
+
+#endif /* EE_PROC */
+
+
+
+/*
+**
+** Validation hooks.
+**
+** ee_list	List EEPROM contents to kernel log
+** ee_wipe	Clear EEPROM (after RAS testing)
+**
+** Used by validation, exported entry point
+** Do not enable this in production code.
+**
+*/
+
+void
+ee_list(void)
+{
+  McaHeader	hdr;
+  McaRecord	rec;
+  int		pos, i;
+
+  /*
+   * Get I2C bus exclusive access
+   */
+  ee_lock();
+
+  /*
+   * Read header
+   */
+  ee_rd(MR_ELOG_ADDR_LO, 0, (uint8_t *) &hdr, sizeof(hdr));
+  if (! strncmp(hdr.signature, "MCA_LOG", sizeof(hdr.signature))) {
+    printk("MCE log header: signature OK, ver %d, type %d\n",
+		 hdr.header_ver, hdr.hwtype);
+    printk("MCE log capacity: %d events, size %d, start %d\n",
+		hdr.entries, hdr.rec_size, hdr.rec_start);
+    printk("MCE log state: head %d, tail %d, full %d\n",
+		hdr.rec_head, hdr.rec_tail, hdr.logfull);
+    if (hdr.entries != MR_ELOG_NUM) {
+      printk("MCE log check: invalid capacity, expected %ld\n", MR_ELOG_NUM);
+      goto ee_bad;
+    }
+    if (hdr.rec_size != sizeof(McaRecord)) {
+      printk("MCE log check: invalid rec size, expected %ld\n", sizeof(McaRecord));
+      goto ee_bad;
+    }
+    if (hdr.rec_tail != ee_tail ||
+	hdr.rec_head != ee_head) {
+      printk("MCE log check: cached h/t mismatch %d/%d\n", ee_head, ee_tail);
+      goto ee_bad;
+    }
+    if (hdr.entries != ee_num) {
+      printk("MCE log check: cached capacity mismatch %d\n", ee_num);
+      goto ee_bad;
+    }
+
+    /*
+     * Header looks OK,
+     * Dump all valid entries in eeprom
+     */
+    for(i = 0; i < hdr.entries; i++) {
+      ee_get(&rec, i);
+
+      /*
+       * Uninitialized parts have all FFs in them,
+       * need to screen those before testing the valid bit
+       */
+      if (rec.id != 0xffffffff && GET_BIT(31, rec.id)) {
+#if EE_VERIFY
+        dmp_hex(&rec, sizeof(rec), "ee_list: Entry[%d]", i);
+#endif
+        pos = hdr.rec_start + i * hdr.rec_size;
+ 	printk("Log %4d (pos %06x): id %08x, "
+	       "ctrl %016llx, stat %016llx, addr %016llx, misc %016llx, time %d\n",
+		i, pos, rec.id, rec.ctl, rec.status,
+		rec.addr, rec.misc, rec.stamp);
+      }
+    }
+  }
+  else {
+    printk("MCE log header: bad signature %02x%02x%02x%02x%02x%02x%02x%02x\n",
+	hdr.signature[0], hdr.signature[1], hdr.signature[2], hdr.signature[3],
+	hdr.signature[4], hdr.signature[5], hdr.signature[6], hdr.signature[7]);
+  }
+
+ee_bad:
+  /*
+   * Release I2C bus exclusive lock
+   */
+  ee_unlock();
+}
+EXPORT_SYMBOL_GPL(ee_list);
+
+void
+ee_wipe(void)
+{
+#if 1
+  printk("Wiping EEPROM disabled, call ignored\n");
+#else
+  ee_mint();
+#endif
+}
+EXPORT_SYMBOL_GPL(ee_wipe);
+#endif /* CONFIG_MK1OM */
+
+
+/*
+**
+** Setup access to the EEPROM on KnC
+** This include initializing the local I2C driver and
+** locating the next write position in the EEPROM.
+** We want to limit the exception time activity to
+** a minimum and thus make preparations up front.
+** This is expected to happen before enabling the
+** MC event intercepts.
+**
+*/
+
+int
+ee_init(void)
+{
+#if 0
+  /*
+   * Clocking the delay loop.
+   * Average results over 3 runs:
+   * uSec     % off
+   *   1	12.46
+   *   2	6.22
+   *   4	4.34
+   *   8	3.41
+   *   16	2.90
+   *   32	2.65
+   *   64	2.52
+   *   128	2.46
+   *   256	2.43
+   *   512	2.41
+   *   1024	2.41
+   *   2048	6.30
+   *   4096	2.43
+   *   8192	3.28
+   *   16384	3.30
+   *   32768	3.42
+   * , which is fine for the purposes in this driver.
+   */
+  {
+    uint64_t	t1, t2;
+    uint64_t	usec, pwr;
+
+    printk("RAS.test: tsc_khz %d\n", tsc_khz);
+    for(pwr = 0; pwr < 16; pwr++) {
+      usec = 1UL << pwr;
+      t1 = rdtsc();
+      myDELAY(usec);
+      t2 = rdtsc();
+      printk("RAS.test: myDelay(%lld) => %lld clocks\n", usec, t2 - t1);
+    }
+  }
+#endif
+    
+#ifdef CONFIG_MK1OM
+  if (! mce_disabled) {
+    McaHeader	hdr;
+
+#ifndef CONFIG_I2C_PXA
+    /*
+     * Reset I2C controller if PXA driver is not included in the kernel.
+     */
+    i2c_reset();
+#endif
+
+    /*
+     * Get I2C bus exclusive access
+     */
+    ee_lock();
+
+    /*
+     * Paranoia!!
+     * At this point the I2C controller should be inactive and
+     * the I2C bus should be idle. Verify this to be true.
+     * Note: This check is only applied on this very first
+     *       access to the I2C controller. If it passed the
+     *       two criterias we _assume_ we have good hardware.
+     * TBD: should we assume that the I2C subsystem can go bad
+     *      at runtime and add more checking?
+     */
+    ee_num = 0;
+    if ((reg_read(ISR_OFFSET) & ISR_UB) || (reg_read(IBMR_OFFSET) != 3)) {
+      printk("RAS.elog: I2C unit out of control, cannot access EEPROM\n");
+    }
+    else {
+      /*
+       * Get EEPROM header and cache log state.
+       */
+      ee_rd(MR_ELOG_ADDR_LO, 0, (uint8_t *) &hdr, sizeof(hdr));
+      if (memcmp(hdr.signature, elog_preset.signature, sizeof(elog_preset.signature)) ||
+	  hdr.header_ver != elog_preset.header_ver ||
+	  hdr.rec_start != elog_preset.rec_start ||
+	  hdr.rec_size != elog_preset.rec_size ||
+	  hdr.hwtype != elog_preset.hwtype) {
+	printk("RAS.elog: Found un-initialized EEPROM, initializing ..\n");
+	ee_wr(MR_ELOG_ADDR_LO, 0, (uint8_t *) &elog_preset, sizeof(elog_preset));
+	ee_rd(MR_ELOG_ADDR_LO, 0, (uint8_t *) &hdr, sizeof(hdr));
+      }
+
+      if (memcmp(hdr.signature, elog_preset.signature, sizeof(elog_preset.signature)) ||
+	  hdr.header_ver != elog_preset.header_ver ||
+	  hdr.rec_start != elog_preset.rec_start ||
+	  hdr.rec_size != elog_preset.rec_size ||
+	  hdr.hwtype != elog_preset.hwtype) {
+	/*
+	 * Write to EEPROM header failed.
+	 * Leave a message in the kernel log about it and set capacity to 0.
+	 */ 
+	printk("RAS.elog: Error: EEPROM initialization failed!\n");
+      }
+      else {
+	ee_num  = hdr.entries;
+	ee_head = hdr.rec_head;
+	ee_tail = hdr.rec_tail;
+	printk("RAS.elog: rev %d, size %d, head %d, tail %d\n",
+		    hdr.header_ver, ee_num, ee_head, ee_tail);
+	if (ee_head != ee_tail) {
+	  /*
+	   *TBD: should we be aggressive and replay these events to the host
+	   *     when it opens the MC SCIF channel to force the issue?
+	   */
+	  printk("RAS.elog: Warning: MCA log has unprocessed entries\n");
+	}
+      }
+    }
+    if (!ee_num)
+      printk("RAS.elog: MCA events cannot be logged to EEPROM\n");
+
+    /*
+     * Release I2C bus exclusive lock
+     */
+    ee_unlock();
+  }
+#endif /* CONFIG_MK1OM */
+
+  /*
+   * Reset I2C bus & UART (sort of, internal reset only)
+   */
+  xfr_configure(SC16IS_ADDR(SC16IS_ADDR_1, SC16IS_ADDR_0), FREQ_AUTO);
+  cons_init();
+  ee_rdy = 1;
+
+#if defined(CONFIG_MK1OM) && EE_PROC
+  /*
+   * Create proc file
+   * We allow writes if EE_INJECT is defined or during manufacturing.
+   */
+  {
+    int		mode;
+#if EE_INJECT
+    mode = 0644;
+#else
+    uint32_t	smc_err, smc_val, smc_fwv;
+
+    /*
+     * HSD 4846538
+     * Needs SMC FW 1.8 or later to be safe to use.
+     * Read FW version; if failed then not at manufacturing.
+     * If FW version 1.8 or later go read Zombie register.
+     * If zombie register responded we're at manufacturing,
+     */
+    mode = 0444;
+    smc_err = gmbus_i2c_read(2, 0x28, 0x11, (uint8_t *) &smc_fwv, sizeof(smc_fwv));
+    if (smc_err == sizeof(smc_fwv) && GET_BITS(31, 16, smc_fwv) >= 0x0108) {
+      smc_err = gmbus_i2c_read(2, 0x28, 0x1b, (uint8_t *) &smc_val, sizeof(smc_val));
+      if (smc_err == sizeof(uint32_t))
+        mode = 0644;
+    }
+    if (mode == 0444)
+      proc_elog_operations.write = 0;
+#endif
+    elog_pe = proc_create("elog", mode, 0, &proc_elog_operations);
+  }
+#endif
+
+#if 0
+  /*
+   * Say hello on the console
+   */
+  ee_printk("RAS: ee_print ready, uart adr %02x\n",
+		SC16IS_ADDR(SC16IS_ADDR_1, SC16IS_ADDR_0));
+#endif
+
+  if (mce_disabled)
+    printk("RAS.elog: disabled\n");
+  else
+    printk("RAS.elog: init complete\n");
+  return 0;
+}
+
+
+/*
+ * Cleanup for module unload.
+ * Free any resources held by this driver
+ */
+
+int
+ee_exit(void)
+{
+#if defined(CONFIG_MK1OM) && EE_PROC
+  if (elog_pe) {
+    remove_proc_entry("elog", 0);
+    elog_pe = 0;
+  }
+#endif
+
+
+  /*
+   * Reset I2C bus & UART (sort of, internal reset only)
+   */
+  ee_rdy = 0;
+  xfr_configure(SC16IS_ADDR(SC16IS_ADDR_1, SC16IS_ADDR_0), FREQ_AUTO);
+  cons_exit();
+
+  printk("RAS.elog: exit complete\n");
+  return 0;
+}
+
+#endif /* EMULATION */
diff --git a/ras/micras_knc.c b/ras/micras_knc.c
new file mode 100644
index 0000000..86ec013
--- /dev/null
+++ b/ras/micras_knc.c
@@ -0,0 +1,2794 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * RAS MT module driver
+ *
+ * Code and data structures to handle get/set tasks for KnC.
+ * Parties accessing the data structures are supposed to use the
+ * micras_mt_tsk() routines to ensure integrity and consistency.
+ * Particularly important when handling sysfs nodes and actions
+ * requested from SCIF connections must use that method in order
+ * to guarantee serialized access.
+ *
+ * Even if read-only access to latest valid data is required,
+ * it should go through micras_mt_tsk() using dedicated handlers
+ * in this module.
+ *
+ * Apologies for the messy code, but hardware support to report
+ * board properties at this time (Power-On of A0) is so erratic
+ * that odd ways of obtaining the info had to replace the POR
+ * methods. The SMC support is sporadic, A0 has issues with SVID
+ * and some SBOX registers are invalid because they depend on
+ * TMU telemetry transmissions from the SMC which some reason
+ * has been forgotten/missed/defeatured (does not happen).
+ *
+ * TBD: Once the dust settles there will be code to remove.
+ *      But until then, lots of #ifdef's remains.
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/io.h>
+#include <linux/utsname.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <linux/kernel_stat.h>
+#include <linux/bitmap.h>
+#include <asm/mic/mic_knc/autobaseaddress.h>
+#include <asm/mic/mic_knc/micsboxdefine.h>
+#include "micras.h"
+
+
+/*
+ * Persistent data accessible through the CP api.
+ * Some functions just read/modify hardware CSRs
+ * and thus need no storage between invocations.
+ */
+
+extern struct mr_rsp_hwinf	hwinf;
+extern struct mr_rsp_vers	vers;
+extern struct mr_rsp_volt	volt;
+extern struct mr_rsp_freq	freq;
+extern struct mr_rsp_power	power;
+extern struct mr_rsp_plim	plim;
+extern struct mr_rsp_gddr	gddr;
+extern struct mr_rsp_gvolt	gvolt;
+extern struct mr_rsp_gfreq	gfreq;
+extern struct mr_rsp_temp	temp;
+extern struct mr_rsp_ecc	ecc;
+extern struct mr_rsp_trbo	trbo;
+extern struct mr_rsp_pmcfg	pmcfg;
+
+#if USE_SVID
+static uint8_t	vccp_cap,  vddq_cap,  vddg_cap;
+static uint8_t	vccp_imax, vddq_imax, vddg_imax;
+#endif
+
+uint8_t	xlat_cpu[NR_CPUS];
+
+#define FIX_DBOX	1
+
+#if FIX_DBOX
+/*
+ * Pre-emptive restoring DBOX-0 register access.
+ * A glitch during clock speed changes (PM or GPU_HOT)
+ * may under some rare circumstances break access to DBOX
+ * registers. It is very rare, requires hours of tailored
+ * simulation to reproduce, never seen in the wild (yet).
+ * The gmbus controller sits in the DBOX and is affected.
+ * Calling this routine prior to every gmbus read/write
+ * reduces risk of hitting this bug to a single SMC register,
+ * which has been deemed acceptable for B-step KnCs.
+ * Only alternative is to perform repeated transaction(s)
+ * until a stable result is obtained, which will be costly
+ * in performance.
+ */
+static void
+mr_smc_deglitch(void)
+{
+  mr_dbox_rl(0, 0x600);
+  mr_dbox_rl(0, 0x2440);
+}
+#else
+#define mr_smc_deglitch();	/* As nothing */
+#endif
+
+
+/*
+**
+** Conversion between CP formats (uV, MHz, etc.)
+** and hardware register formats (SMC and VRs mostly).
+**
+*/
+
+
+/*
+ * PLL tables used to map between hw scale register
+ * value and actual frequencies given a fixed base.
+ *
+ * The core frequency (MCLK) formula is
+ *    freq = Icc * (Feedback / Feedforward)
+ * where
+ *    Icc = Frequency generated from ICC, nominal 200 MHz
+ *    FeedBack = ratio bits 8:1 (valid range: 8 .. 16)
+ *    FeedForward = ratio bits 10:9 (01 -> 4, 10 -> 2, 11 -> 1)
+ *
+ * The gddr frequency (PGCLK) formula is
+ *    freq = (X / 2) * Feedback / Feedforward
+ * where
+ *    X = SBPLL (ICC)      Table 1, FB range 10..22
+ *    X = LCVCO (ICC/2)    Table 2, FB range 44..65
+ *    X = Bypass (ICC/2)   Table 3, FB range 20..44
+ * which is why there's three gddr tables. The divide by 2 of
+ * 'X' is represented as doubling the FF dividers in the tables.
+ *
+ * Overlapping ranges over feedback and feedforward values are
+ * handled by range table(s) below such that lower frequencies
+ * can be selected at a finer granularity. The tables themselves
+ * do not allow overlaps, i.e. two ways to specify the same
+ * PLL output frequency.
+ *
+ * Note that ICC clocks have their own PLL built in which uses
+ * the PCI-E 100 MHz clock, adds SSC and scale it by a pair of
+ * dividers. One divider is (I'm told) fixed at 40, the other
+ * is fused, and none of them can be read from uOS at runtime.
+ * The fused dividers are nominally 20, which is what the
+ * tables below is based on. Some SKUs tweak the core ICC PLL
+ * by fuses, so to counter it that divider is reported in scr #4.
+ * No means to know if gddr ICC PLL gets tweaked too.
+ *
+ *WARNING: there are overlabs on the divider codes for GDDR PLLs,
+ *         which theoretically can cause false reporting of GDDR
+ *	   device speeds (example: FB dividers 20, 21, and 22 are
+ *	   defined both in gddr_tab1 and gddr_tab3). Currently
+ *         there is no way to determine which table is used.
+ */
+
+struct pll_tab {
+  uint8_t       clk_div;                /* Feed forward */
+  uint8_t       min_mul;                /* Lower feedback */
+  uint8_t       max_mul;                /* Upper feedback */
+  uint16_t      min_clk;                /* Lower frequency */
+  uint16_t      max_clk;                /* Upper frequency */
+  uint8_t       step_size;              /* Granularity */
+}  cpu_tab[] = {                        /* CPU PLL, ICC @ ~200 MHz */
+  {1,  8, 16, 1600, 3200, 200},
+  {2,  8, 15,  800, 1500, 100},
+  {4,  8, 15,  400,  750,  50},
+}, gddr_tab1[] = {                      /* GDDR PLL, ICC @ 200 MHz */
+  {2, 10, 22, 1000, 2200, 100},
+  {4, 10, 22,  500, 1100,  50},
+  {8, 10, 22,  250,  550,  25},
+}, gddr_tab2[] = {                      /* GDDR PLL, LCVCO @ 100 MHz */
+  {2, 44, 65, 2200, 3250,  50},
+}, gddr_tab3[] = {                      /* GDDR PLL, ICC bypass @ 100 MHz */
+  {2, 20, 44, 1000, 2200, 100},
+  {4, 20, 44,  500, 1100,  50},
+  {8, 20, 44,  250,  550,  25},
+};
+
+#define ICC_NOM		20		/* Nominal ICC feed back divider */
+
+static uint16_t
+ratio2freq(uint16_t ratio, struct pll_tab * tab, int tablen, uint16_t base)
+{ 
+  uint16_t      fwd, bck;
+  
+  fwd = GET_BITS(10, 9, ~ratio);
+  bck = GET_BITS(8, 1, ratio);
+
+  if (tab == gddr_tab3 && (bck & 1))
+    return 0;
+
+  if (fwd < tablen && bck >= tab[fwd].min_mul && bck <= tab[fwd].max_mul)
+    return (base * bck) / tab[fwd].clk_div;
+
+  return 0;
+}
+
+static uint16_t
+freq2ratio(uint16_t freq, struct pll_tab * tab, int tablen, uint16_t base)
+{
+  int		fwd;
+
+  for(fwd = tablen - 1; fwd >= 0; fwd--) {
+    if (freq >= tab[fwd].min_clk && freq <= tab[fwd].max_clk) {
+      /*
+       * Why bother check for accurate input?
+       * Ignoring it just rounds down to nearest supported!
+       */
+      if (freq % tab[fwd].step_size)
+        break;
+
+      return PUT_BITS(10, 9, ~fwd) |
+             PUT_BITS( 8, 1, (freq * tab[fwd].clk_div) / base);
+    }
+  }
+
+  return 0;
+}
+
+static uint32_t
+icc_fwd(void)
+{
+  uint32_t	scr4, div;
+
+  scr4 = mr_sbox_rl(0, SBOX_SCRATCH4);
+  div = GET_BITS(29, 25, scr4);
+
+  return div ? div : ICC_NOM;
+}
+
+static uint32_t
+mr_mt_gf_r2f(uint16_t pll)
+{
+  uint64_t	freq;
+
+  /*
+   * As per HSD 4118175, ICC clock at 200 MHz is currently not
+   * used on any SKUs, and is unlikely to be used in the future.
+   * Therefore, the 100 MHz tables are searched first.
+   */
+  freq = ratio2freq(pll, gddr_tab3, ARRAY_SIZE(gddr_tab3), 100);
+  if (! freq)
+    freq = ratio2freq(pll, gddr_tab2, ARRAY_SIZE(gddr_tab2), 100);
+  if (! freq)
+    freq = ratio2freq(pll, gddr_tab1, ARRAY_SIZE(gddr_tab1), 200);
+
+  return 1000 * freq;
+}
+
+static uint32_t
+mr_mt_cf_r2f(uint16_t pll)
+{
+  uint64_t	freq;
+
+  freq = ratio2freq(pll, cpu_tab, ARRAY_SIZE(cpu_tab), 200);
+
+  return (1000 * freq * ICC_NOM) / icc_fwd();
+}
+
+
+#if USE_SVID
+/*
+ * VRM12 voltage converters
+ * Only bits 7:0 are being used as follows:
+ *   Volt = Min + Res * (Bits -1)
+ *   Bits = 1 + (Volt - Min) / Res
+ * Bits value of 0 reserved for turning VR off.
+ */
+
+#define VRM12_MAX	1520000		/* 1.52 V */
+#define VRM12_MIN	 250000		/* 250 mV */
+#define VRM12_RES	   5000		/* 5.0 mV */
+
+static uint32_t
+svid2volt(uint8_t svid)
+{
+  uint32_t	bits;
+
+  bits = GET_BITS(7, 0, svid);
+  if (bits)
+    return VRM12_MIN + VRM12_RES * (bits - 1);
+  else
+    return 0;
+}
+
+static uint8_t
+volt2svid(uint32_t uv)
+{
+  uint32_t	delta, bits;
+
+  bits = 0;
+  if (uv >= VRM12_MIN && uv <= VRM12_MAX) {
+    delta = uv - VRM12_MIN;
+    /*
+     * Why bother check for accurate input?
+     * Ignoring it just rounds up to nearest!
+     */
+    if (! (delta % VRM12_RES))
+      bits = 1 + delta / VRM12_RES;
+  }
+  return PUT_BITS(7, 0, bits);
+}
+
+
+/*
+ * SVID register scaling:
+ *
+ *  Vin  = SVID_REG(0x1A) <unknown>
+ *  Iin  = SVID_REG(0x19) 1:1 A
+ *  Pin  = SVID_REG(0x1B) 1:1 W
+ *  Vout = SVID_REG(0x16) / 128 V
+ *  Iout = SVID_REG(0x15) 1:1 A
+ *  Pout = SVID_REG(0x18) 1:1 W
+ *  Iout = (SVID_REG(0x15) / ADCmax) * (SVID_REG(0x21) A
+ *  Temp = SVID_REG(0x17) 1:1 C
+ *
+ *  Note: SVID_REG(0x06) bit 7 tells Iout formula.
+ *	 Assuming 8-bit ADC => ADCmax to be 0xff.
+ *
+ * Inputs are SVID register values, outputs are u{V|A|W}.
+ */
+
+static uint32_t
+vout2volt(uint8_t vout)
+{
+  /*
+   * Linear range from 0 to 2 volt
+   */
+  return (((uint32_t) vout) * 1000000) / 128;
+}
+
+static uint32_t
+vin2volt(uint8_t vin)
+{
+  /*
+   * Formula not known.
+   */
+  return (((uint32_t) vin) * 1000000) / 128;
+}
+
+static uint32_t
+one2one(uint8_t in)
+{
+  return ((uint32_t) in) * 1000000;
+}
+
+static uint32_t
+iout2amp(uint8_t iout, uint8_t cap, uint8_t imax)
+{
+  if (GET_BITS(7, 7, cap))
+    return (((uint32_t) iout) * ((uint32_t) imax) * 1000000) / 256;
+  else
+    return one2one(iout);
+}
+
+#define iin2amp(iin)	one2one(iin)
+#define pin2watt(pin)	one2one(pin)
+#define pout2watt(pout)	one2one(pout)
+
+
+
+/*
+**
+** Simple SVIDCONTROL interface.
+**
+**    0		Parity bit out
+**  8:1		SVID data out
+** 13:9		SVID command
+** 17:14	SVID address
+**    18	Parity bit in (if any)
+** 26:19	SVID data in (if any)
+**    27	ACK #0
+**    28	ACK #1
+**    29	SVID Error
+**    30	CTL Idle
+**    31	CMD Start
+**
+** See SBOX HAS for more details.
+** One transaction is expected to finish
+** in less than 2 uSec (15.625 MHz clock)
+** and busy waiting here should be OK.
+**
+** Return values:
+**  0	OK
+** 1-7	Controller bits 29:27
+**  8	Parameter error (invalid device or opcode)
+**
+*/
+
+/*
+ * SVID command set
+ * Source: SVID Protocol rev 1.5
+ */
+#define VR12Cmd_Extend		0x00    /* Req */
+#define VR12Cmd_SetVID_Fast	0x01    /* Req */
+#define VR12Cmd_SetVID_Slow	0x02    /* Req */
+#define VR12Cmd_SetVID_Decay	0x03    /* Req */
+#define VR12Cmd_SetPS		0x04    /* Req */
+#define VR12Cmd_SetRegADR	0x05    /* Req */
+#define VR12Cmd_SetRegDAT	0x06    /* Req */
+#define VR12Cmd_GetReg		0x07    /* Req */
+#define VR12Cmd_TestMode	0x08    /* Req */
+
+/*
+ * SVID registers of interest
+ * Source: SVID Protocol rev 1.5
+ *
+ * Notes on the capability register:
+ *  bit  0 Iout (0x15)
+ *  bit  1 Vout (0x16)
+ *  bit  2 Pout (0x18)
+ *  bit  3 Iin  (0x19)
+ *  bit  4 Vin  (0x1a)
+ *  bit  5 Pin  (0x1b)
+ *  bit  6 Temp (0x17)
+ *  bit  7 Iout format of register 0x15
+ *	0 -> value in Amps
+ *	1 -> value scaled to Icc_Max
+ */
+
+#define VR12Reg_VendorID	0x00    /* Req */
+#define VR12Reg_ProductID	0x01    /* Req */
+#define VR12Reg_ProductRev	0x02    /* Req */
+#define VR12Reg_ProductDate	0x03    /* Opt */
+#define VR12Reg_LotCode		0x04    /* Opt */
+#define VR12Reg_ProtocolID	0x05    /* Req */
+#define VR12Reg_Capability	0x06    /* Req */
+#define VR12Reg_Iout		0x15    /* Req */
+#define VR12Reg_Vout		0x16    /* Opt */
+#define VR12Reg_Temp		0x17    /* Opt */
+#define VR12Reg_Pout		0x18    /* Opt */
+#define VR12Reg_Iin		0x19    /* Opt */
+#define VR12Reg_Vin		0x1a    /* Opt */
+#define VR12Reg_Pin		0x1b    /* Opt */
+#define VR12Reg_Icc_Max		0x21    /* Req */
+#define VR12Reg_Temp_Max	0x22    /* Req */
+#define VR12Reg_Vout_Max	0x30    /* Req */
+#define VR12Reg_VID_Set		0x31    /* Req */
+
+/*
+ * SVID addresses on KnC
+ */
+#define SVID_VCCP	0x0	/* Core rail */
+#define SVID_VDDQ	0x2	/* Memory rail (1st loop) */
+#define SVID_VDDG	0x3	/* Uncore rail (2nd loop) */
+
+static DEFINE_SPINLOCK(svidcontrol_lock);
+
+static int
+SvidCmd(uint8_t dev, uint8_t op, uint8_t in)
+{
+  uint32_t	cmd, ret, err;
+
+  /*
+   * The SVID Controller does not work in A0 (HSD 3498464)
+   * Pretend success, but return 0 always
+   */
+  return 0;
+
+  /*
+   * For now just check that command can be contructed.
+   *
+   *TBD: Add stricter parameter check?
+   */
+  if (dev > GET_BITS(17, 14, ~0) ||
+      op  > GET_BITS(13,  9, ~0))
+    return -MR_ERR_SMC;
+
+  /*
+   * Craft 18 bit command with even parity
+   */
+  cmd = PUT_BITS( 8,  1, in) |
+  	PUT_BITS(13,  9, op) |
+	PUT_BITS(17, 14, dev);
+  if (bitmap_weight((unsigned long *) &cmd, 18) & 1)
+    cmd |= 1;
+
+  /*
+   * Wait until controller in idle state,
+   * write command + start bit and then
+   * wait for controller to be idle again.
+   */
+  spin_lock(&svidcontrol_lock);
+  for( ;; ) {
+    ret = mr_sbox_rl(0, SBOX_SVIDCONTROL);
+    if (GET_BITS(31, 30, ret) == 0x1)
+      break;
+  }
+  mr_sbox_wl(0, SBOX_SVIDCONTROL, cmd | PUT_BIT(31, 1));
+  for( ;; ) {
+    ret = mr_sbox_rl(0, SBOX_SVIDCONTROL);
+    if (GET_BITS(31, 30, ret) == 0x1)
+      break;
+  }
+  spin_lock(&svidcontrol_lock);
+
+  /*
+   * Report command status
+   * Only if SVID_Error = 0, Ack #1 = 1, and Ack #0 = 0
+   * did we have a successful transfer, and have data
+   * to return (SBOX HAS table 9).
+   */
+  err = GET_BITS(29, 27, ret);
+  return (err == 0x2) ? GET_BITS(26, 19, ret) : -MR_ERR_SMC;
+}
+#endif
+
+
+
+/*
+**
+** SMC API
+**
+** See "Knights Corner System Managment Architecture Specification"
+** for details on the SMC internals and supported APIs.
+**
+** This module is based on rev 0.31
+**
+*/
+
+#define MR_SMC_ADDR		0x28	/* SMC DVO-B Slave address */
+
+#define MR_SMC_PCI_VID		0x00	/* PCI Vendor ID, 4 */
+#define MR_SMC_PCI_DID		0x02	/* PCI Device ID, 4 */
+#define MR_SMC_PCI_BCC		0x04	/* PCI Base Class Code, 4 */
+#define MR_SMC_PCI_SCC		0x05	/* PCI Sub Class Code, 4 */
+#define MR_SMC_PCI_PI		0x06	/* PCI Programming Interface, 4 */
+#define MR_SMC_PCI_SMBA		0x07	/* PCI MBus Manageability Address, 4 */
+#define MR_SMC_UUID		0x10	/* Universally Unique Identification, 16 */
+#define MR_SMC_FW_VERSION	0x11	/* SMC Firmware Version, 4 */
+#define MR_SMC_EXE_DOMAIN	0x12	/* SMC Execution Domain, 4 */
+#define MR_SMC_STS_SELFTEST	0x13	/* SMC Self-Test Results, 4 */
+#define MR_SMC_HW_REVISION	0x14	/* SMC Hardware Revision, 4 */
+#define MR_SMC_SERIAL		0x15	/* Card serial number, 12 */
+#define MR_SMC_SMB_RESTRT	0x17	/* Restart SMBus addr negotiation, 4 */
+
+#define MR_SMC_CPU_POST		0x1a	/* POST Register, 4 */
+#define MR_SMC_ZOMBIE		0x1b	/* Zombie Mode Enable, 4 */
+#define MR_SMC_CPU_ID		0x1c	/* CPU Identifier, 4 */
+
+#define MR_SMC_SEL_ENTRY_SEL	0x20	/* SEL Entry Selection Register, 4 */
+#define MR_SMC_SEL_DATA		0x21	/* SEL Data register, <N> */
+#define MR_SMC_SDR_ENTRY_SEL	0x22	/* SDR Entry Selection Register, 4 */
+#define MR_SMC_SDR_DATA		0x23	/* SDR Data register, <N> */
+
+#define MR_SMC_PWR_PCIE		0x28	/* PCIe Power Reading, 4 */
+#define MR_SMC_PWR_2X3		0x29	/* 2x3 Power Reading, 4 */
+#define MR_SMC_PWR_2X4		0x2a	/* 2x4 Power Reading, 4 */
+#define MR_SMC_FORCE_TTL	0x2b	/* Forced Throttle, 4 */
+#define MR_SMC_PWR_LIM_0	0x2c	/* Power Limit 0, 4 */
+#define MR_SMC_TIME_WIN_0	0x2d	/* Time Window 0, 4 */
+#define MR_SMC_PWR_LIM0_GRD	0x2e	/* Power Limit 0 Guardband, 4 */
+#define MR_SMC_PWR_LIM_1	0x2f	/* Power Limit 1, 4 */
+#define MR_SMC_TIME_WIN_1	0x30	/* Time Window 1, 4 */
+#define MR_SMC_INCL_3V3		0x31	/* Include 3.3 V, 4 */
+#define MR_SMC_PWR_LIM_PERS	0x32	/* Power Limit Persistence, 4 */
+#define MR_SMC_CLAMP_MODE	0x33	/* Clamp Mode, 4 */
+#define MR_SMC_ENERGY_STS_0	0x34	/* Energy Status 0, 4 */
+#define MR_SMC_AVG_PWR_0	0x35	/* Average Power 0, 4 */
+#define MR_SMC_AVG_PWR_1	0x36	/* Average Power 1, 4 */
+#define MR_SMC_MIN_PWR		0x37	/* Min Power, 4 */
+#define MR_SMC_PWR_TTL_DUR	0x38	/* Power Throttle Duration, 4 */
+#define MR_SMC_PWR_TTL		0x39	/* Power Throttling, 4 */
+#define MR_SMC_PWR_INST		0x3a	/* Instantaneous Power Reading, 4 */
+#define MR_SMC_PWR_IMAX		0x3b	/* Maximum Power Reading, 4 */
+#define MR_SMC_VOLT_VCCP	0x3c	/* VCCP VR Output Voltage, 4 */
+#define MR_SMC_VOLT_VDDQ	0x3d	/* VDDQ VR Output Voltage, 4 */
+#define MR_SMC_VOLT_VDDG	0x3e	/* VDDG VR Output Voltage, 4 */
+
+#define MR_SMC_TEMP_CPU		0x40	/* CPU DIE Temperature, 4 */
+#define MR_SMC_TEMP_EXHAUST	0x41	/* Card Exhaust Temperature, 4 */
+#define MR_SMC_TEMP_INLET	0x42	/* Card Inlet Temperature, 4 */
+#define MR_SMC_TEMP_VCCP	0x43	/* VCCP VR Temperature, 4 */
+#define MR_SMC_TEMP_VDDG	0x44	/* VDDG VR Temperature, 4 */
+#define MR_SMC_TEMP_VDDQ	0x45	/* VDDQ VR Temperature, 4 */
+#define MR_SMC_TEMP_GDDR	0x46	/* GDDR Temperature, 4 */
+#define MR_SMC_TEMP_EAST	0x47	/* East Temperature, 4 */
+#define MR_SMC_TEMP_WEST	0x48	/* West Temperature, 4 */
+#define MR_SMC_FAN_TACH		0x49	/* Fan RPM, 4 */
+#define MR_SMC_FAN_PWM		0x4a	/* Fan PWM Percent, 4 */
+#define MR_SMC_FAN_PWM_ADD	0x4b	/* Fan PWM Adder, 4 */
+#define MR_SMC_TCRITICAL	0x4c	/* KNC Tcritical temperature, 4 */
+#define MR_SMC_TCONTROL		0x4d	/* KNC Tcontrol temperature, 4 */
+#define MR_SMC_TRM_TTL_DUR	0x4e	/* Thermal Throttle Duration, 4 */
+#define MR_SMC_TRM_TTL		0x4f	/* Thermal Throttling, 4 */
+#define MR_SMC_TRM_PUSH		0x50	/* Target for die temp push, 4 */
+
+#define MR_SMC_PWR_VCCP		0x58	/* VCCP VR Output Power, 4 */
+#define MR_SMC_PWR_VDDQ		0x59	/* VDDQ VR Output Power, 4 */
+#define MR_SMC_PWR_VDDG		0x5a	/* VDDG VR Output Power, 4 */
+
+#define MR_SMC_LED_CODE		0x60	/* LED blink code, 4 */
+
+
+/*
+ * Simple I/O access routines for most SMC registers.
+ * All but UUID & SERIAL are 4 bytes in size.
+ */
+#define SMC_TRACK	0
+
+#if SMC_TRACK
+#define RL	printk("%s: %2x -> %08x, rtn %d\n",    __FUNCTION__, reg, *val, rl)
+#define WL	printk("%s: %2x <- %08x, rtn %d\n",    __FUNCTION__, reg, *val, rl)
+#else
+#define RL	/* As nothing */
+#define WL	/* As nothing */
+#endif
+
+#ifdef MIC_IS_EMULATION
+/*
+ * Emulation does not handle I2C busses.
+ * Therefore all code that deals with I2C needs to be
+ * replaced with harmless substitutes in emulation.
+ * The following stubs are for emulation only.
+ */
+int
+gmbus_i2c_read(uint8_t d, uint8_t a, uint8_t r, uint8_t * v, uint16_t l)
+{
+  if (v && l)
+    memset(v, 0, l);
+  return l;
+}
+
+int
+gmbus_i2c_write(uint8_t d, uint8_t a, uint8_t r, uint8_t * v, uint16_t l)
+{
+  return l;
+}
+#endif /* EMULATION */
+
+static char *
+gm_err(int err)
+{
+  char * str = "unknown";
+
+  switch(err) {
+    case -1:  str = "timeout"; break;
+    case -2:  str = "ack timeout"; break;
+    case -3:  str = "interrupted"; break;
+    case -4:  str = "invalid command"; break;
+  }
+
+  return str;
+}
+
+
+int
+mr_smc_rd(uint8_t reg, uint32_t * val)
+{
+  int		rl;
+
+  mr_smc_deglitch();
+  rl = gmbus_i2c_read(2, MR_SMC_ADDR, reg, (uint8_t *) val, sizeof(*val));
+  RL;
+  if (rl == sizeof(uint32_t))
+    return 0;
+
+  /*
+   * Something failed, do a dummy read to get I2C bus in a known good state.
+   *TBD: Do retries, and if so how many?
+   */
+  printk("smc_rd: error %d (%s), reg %02x\n", rl, gm_err(rl), reg);
+  mr_smc_deglitch();
+  gmbus_i2c_read(2, MR_SMC_ADDR, MR_SMC_FW_VERSION, (uint8_t *) &rl, sizeof(rl));
+  *val = 0;
+  return 1;
+}
+
+int
+mr_smc_wr(uint8_t reg, uint32_t * val)
+{
+  int		rl;
+
+  WL;
+  mr_smc_deglitch();
+  rl = gmbus_i2c_write(2, MR_SMC_ADDR, reg, (uint8_t *) val, sizeof(*val));
+  if (rl == sizeof(uint32_t))
+    return 0;
+
+  /*
+   * Something failed, do a dummy read to get I2C bus in a known good state.
+   *TBD: Do retries, and if so how many?
+   */
+  printk("smc_wr: error %d (%s), reg %02x\n", rl, gm_err(rl), reg);
+  mr_smc_deglitch();
+  gmbus_i2c_read(2, MR_SMC_ADDR, MR_SMC_FW_VERSION, (uint8_t *) &rl, sizeof(rl));
+  return 0;
+}
+#undef RL
+#undef WL
+
+
+/*
+ * Bypass for SMC access.
+ * Kind of a backdoor really as it allows for raw access to the SMC which
+ * may be device dependent and vary significantly between SMC firmware
+ * revisions. This is intended for host side tools that (hopefully) know
+ * what they are receiving through this interface. There is a 'set' command
+ * too, which we screen heavily since the SMC controls board cooling and
+ * therefore is critical for the cards safe operation envolope.
+ */
+
+int
+mr_get_smc(void * p)
+{
+  int		rtn;
+  uint32_t	parm;
+  struct mr_rsp_smc * r;
+
+  parm = * (uint32_t *) p;
+  if (GET_BITS(31, 8, parm))
+    return -MR_ERR_RANGE;
+  r = (struct mr_rsp_smc *) p;
+
+  r->reg = GET_BITS(7, 0, parm);
+
+  /*
+   * These cannot be read by anybody
+   */
+  if (r->reg > MR_SMC_LED_CODE ||
+      r->reg == MR_SMC_ZOMBIE)
+    return -MR_ERR_PERM;
+
+  /*
+   * These can only be read by root
+   */
+  if (! micras_priv)
+    switch(r->reg) {
+      case MR_SMC_SEL_ENTRY_SEL:
+      case MR_SMC_SEL_DATA:
+      case MR_SMC_SDR_ENTRY_SEL:
+      case MR_SMC_SDR_DATA:
+        return -MR_ERR_PERM;
+    }
+     
+  /*
+   * Determine how wide the SMC register is
+   */
+  switch(r->reg) {
+    case MR_SMC_UUID:
+      r->width = 16;
+      break;
+    case MR_SMC_SERIAL:
+      r->width = 12;
+      break;
+    default:
+      r->width = 4;
+  }
+
+  mr_smc_deglitch();
+  rtn = gmbus_i2c_read(2, MR_SMC_ADDR, r->reg, (uint8_t *) &r->rtn, r->width);
+#if SMC_TRACK
+  printk("%s: %2x -> %08x, rtn %d\n",    __FUNCTION__, r->reg, r->rtn.val, rtn);
+#endif
+  if (rtn != r->width) {
+    /*
+     * Failed once, try one more time
+     *TBD: insert a known good read before the actual retry?
+     */
+    mr_smc_deglitch();
+    rtn = gmbus_i2c_read(2, MR_SMC_ADDR, r->reg, (uint8_t *) &r->rtn, r->width);
+#if SMC_TRACK
+    printk("%s: %2x -> %08x, rtn %d\n",    __FUNCTION__, r->reg, r->rtn.val, rtn);
+#endif
+
+    if (r->reg == MR_SMC_SERIAL) {
+      memcpy((uint8_t *) &r->rtn, hwinf.serial, r->width);
+      rtn = r->width;
+    }
+  }
+
+  if (rtn != r->width)
+    return -MR_ERR_SMC;
+ 
+  return sizeof(*r);
+}
+
+
+int
+mr_set_smc(void * p)
+{
+  uint8_t	reg;
+  uint16_t	width;
+  int		rtn;
+  uint32_t	val, parm;
+
+  parm = * (uint32_t *) p;
+  reg = GET_BITS(31, 24, parm);
+
+  /*
+   * Screen for registers we allow setting.
+   * POST register is accessible to everyone,
+   * only root can 'SET' anything beyond that.
+   */
+  if (micras_priv) {
+    switch (reg) {
+      case MR_SMC_CPU_POST:
+      case MR_SMC_SEL_ENTRY_SEL:
+      case MR_SMC_SDR_ENTRY_SEL:
+      case MR_SMC_SMB_RESTRT:
+      case MR_SMC_FORCE_TTL:
+      case MR_SMC_PWR_LIM_0:
+      case MR_SMC_TIME_WIN_0:
+      case MR_SMC_PWR_LIM_1:
+      case MR_SMC_TIME_WIN_1:
+      case MR_SMC_INCL_3V3:
+      case MR_SMC_PWR_LIM_PERS:
+      case MR_SMC_CLAMP_MODE:
+      case MR_SMC_FAN_PWM_ADD:
+      case MR_SMC_LED_CODE:
+	break;
+      default:
+	return -MR_ERR_PERM;
+    }
+  }
+  else {
+    switch (reg) {
+      case MR_SMC_CPU_POST:
+	break;
+      default:
+	return -MR_ERR_PERM;
+    }
+  }
+
+  /*
+   * Screen against known SMC register widths.
+   * We insist that unused upper bits are zeros
+   */
+  switch (reg) {
+    case MR_SMC_SEL_ENTRY_SEL:
+    case MR_SMC_SDR_ENTRY_SEL:
+    case MR_SMC_FAN_PWM_ADD:
+      val = GET_BITS(7, 0, parm);	/* 8-bit registers */
+      break;
+    case MR_SMC_PWR_LIM_0:
+    case MR_SMC_TIME_WIN_0:
+    case MR_SMC_PWR_LIM_1:
+    case MR_SMC_TIME_WIN_1:
+      val = GET_BITS(15, 0, parm);	/* 16 bit registers */
+      break;
+    case MR_SMC_CPU_POST:
+      val = GET_BITS(23, 0, parm);	/* 24 bit registers */
+      break;
+    default:
+      val = GET_BIT(0, parm);		/* Booleans */
+  }
+  if (val != GET_BITS(23, 0, parm))
+    return -MR_ERR_INVAUX;
+
+  width = 4;
+  mr_smc_deglitch();
+  rtn = gmbus_i2c_write(2, MR_SMC_ADDR, reg, (uint8_t *) & val, width);
+#if SMC_TRACK
+  printk("%s: %2x <- %08x, rtn %d\n",    __FUNCTION__, reg, val, rtn);
+#endif
+  if (rtn != width)
+    return -MR_ERR_SMC;
+ 
+  return 0;
+}
+
+
+/*
+ * IPMI interface.
+ * The SMC has a connection to the host's board management software, which
+ * usually resides in a dedicated Board Management Controller, of which the
+ * SMC is supposed to be a registered satellite controller (aka. additional
+ * management controller). As such the SMC can receive controls originating
+ * from any valid IPMI session on things like power limits, but it can also
+ * add events to the non-volatile IPMI System Events Log for things like
+ * reporting catastrophic failures that otherwise might be lost because the
+ * main processors might be disabled (section 1.7.6 in IPMI spec 2.0 E5).
+ * In RAS context we'd want to let the SM know if fatal MC events occur
+ * and possibly also if the uOS crashes, such that remote management can
+ * be alerted via standard IPMI mechanisms.
+ *
+ * Input to this routine is an MceInfo record and an 'in-exception context'
+ * flag. It is still TBD what exactly to tell the SMC, but it is expected
+ * that all relevant info is in the MceInfo record.
+ */
+
+void
+micras_mc_ipmi(struct mce_info * mc, int ctx)
+{
+}
+
+
+#if !(USE_SVID || USE_SMC)
+/*
+ * Board voltage sense converter
+ * Two 10 bit read-outs from SBOX register 0x1038.
+ * The format is very poorly documented, so no
+ * warranty on this conversion. Assumption is
+ * the reading is a binary fixed point number.
+ *  bit 15 	Valid reading if set
+ *  bit 9:8	2 bit integer part
+ *  bit 7:0	8 bit fraction part
+ * Return value is 0 (invalid) or voltage i uV.
+ */
+
+uint32_t
+bvs2volt(uint16_t sense)
+{
+  uint32_t	res, f, msk;
+
+  if (! GET_BIT(15, sense))
+    return 0;
+
+  /*
+   * First get integer contribution
+   * Then accumulate fraction contributions.
+   * Divide and add fraction if corresponding bit set.
+   */
+  res = 1000000 * GET_BITS(9, 8, sense);
+  for(msk = (1 << 7), f = 1000000/2; msk && f; msk >>= 1, f >>= 1)
+    if (sense & msk)
+      res += f;
+
+  return res;
+}
+#endif
+
+
+
+/*
+**
+** Initializations
+**
+** This has two intended purposes:
+**  - Do a on-time effort to collect info on properties that
+**    are not going to change after the initial setup by
+**    either bootstrap or kernel initialization.
+**  - Collect initial values on things we can modify.
+**    Intent is that unloading the ras module should reset
+**    all state to that of the time the module was loaded.
+**
+*/
+
+
+/*
+ *TBD: substitute with official defines when availble.
+ */
+#define	KNC_FLASH_TAB	0x0FFF76000	/* Yes, it's below 4GB */
+#define KNC_FLASH_FILT	0x400		/* Correctable MC event filter */
+#define	KNC_FLASH_BASE	0x0FFFA8000	/* Yes, it's below 4GB */
+#define KNC_FLASH_SIZE	0x2000		/* 8 KB according to Scott */
+#define KNC_FLASH_BOOT1	0x1274 		/* Fboot1 version string */
+#define KNC_FLASH_BOOTB	0x02b8 		/* Fboot1 backup version string */
+#define KNC_MP_PHYS	0x9e000		/* Location of MP table */
+#define KNC_MPF_SIG	0xa0afb2a0	/* String "_PM_" inverted */
+#define KNC_MPC_SIG	0x504d4350	/* String "PCMP" */
+
+static void
+get_cpu_table(void)
+{
+  struct mpf_intel * mpf;
+  struct mpc_table * mpc;
+  struct mpc_cpu   * mpp;
+  uint8_t	     * ptr, * ep;
+
+  mpf = phys_to_virt((phys_addr_t) KNC_MP_PHYS);
+  if (mpf) {
+    if (*((uint32_t *) mpf->signature) != KNC_MPF_SIG) {
+      printk("MP FP signature not found, %02x %02x %02x %02x\n",
+	      mpf->signature[0], mpf->signature[1],
+	      mpf->signature[2], mpf->signature[3]);
+      return;
+    }
+    mpc = phys_to_virt((phys_addr_t) mpf->physptr);
+    if (mpc) {
+      if (*((uint32_t *) mpc->signature) != KNC_MPC_SIG) {
+	printk("MP header signature not found, %02x %02x %02x %02x\n",
+		mpc->signature[0], mpc->signature[1],
+		mpc->signature[2], mpc->signature[3]);
+	return;
+      }
+      ptr = (uint8_t *)(mpc + 1);
+      ep = ptr + mpc->length;
+      while(ptr < ep) {
+	switch(*ptr) {
+	  case 0x00:	/* CPU */
+	    mpp = (struct mpc_cpu *) ptr;
+	    if (GET_BIT(0, mpp->cpuflag) && mpp->apicid < nr_cpu_ids)
+	      xlat_cpu[mpp->apicid] = GET_BITS(7, 0, mpp->reserved[1]);
+	    ptr += 20;
+	    break;
+	  case 0x01:	/* BUS */
+	    ptr += 8;
+	    break;
+	  case 0x02:	/* I/O-APIC */
+	    ptr += 8;
+	    break;
+	  case 0x03:	/* INT source */
+	    ptr += 8;
+	    break;
+	  case 0x04:	/* LINT source */
+	    ptr += 8;
+	    break;
+	  default:	/* Table out of spec */
+	    ptr = ep;
+	}
+      }
+    }
+#if 0
+    {
+      uint32_t  eax, ebx, ecx, edx;
+      uint32_t	hwt, i;
+
+      cpuid(1, &eax, &ebx, &ecx, &edx);
+      hwt = GET_BITS(23, 16, ebx);
+      if (hwt > nr_cpu_ids)
+        hwt = nr_cpu_ids;
+      printk("RAS.card: CPU thread table:\n");
+      for(i=0; i < hwt; i++)
+        printk("  cpu %d -> thr %d\n", i, xlat_cpu[i]); 
+    }
+#endif
+  }
+}
+
+
+static void __init
+mr_mk_cf_lst(void)
+{
+  int		i, n;
+  uint16_t	f;
+
+  /*
+   * If PM module interface is in place, then the
+   * core voltage list may already be populated.
+   */
+  if (freq.supt[0] && freq.slen)
+    return;
+
+  n = 0;
+  for(i = ARRAY_SIZE(cpu_tab) -1; i >= 0; i--) {
+    for(f = cpu_tab[i].min_clk;
+	f <= cpu_tab[i].max_clk;
+	f += cpu_tab[i].step_size) {
+      freq.supt[n] = 1000 * f;
+      freq.slen = ++n;
+      if (n >= MR_PTAB_LEN)
+	return;
+    }
+  }
+}
+
+static void __init
+mr_mk_gf_lst(void)
+{
+  int		i, n;
+  uint16_t	f;
+
+  n = 0;
+  for(i = ARRAY_SIZE(gddr_tab1) -1; i >= 0; i--) {
+    for(f = gddr_tab1[i].min_clk;
+	f <= gddr_tab1[i].max_clk;
+	f += gddr_tab1[i].step_size) {
+      gfreq.supt[n] = 1000 * f;
+      gfreq.slen = ++n;
+      if (n == MR_PTAB_LEN)
+	return;
+    }
+  }
+  for(i = ARRAY_SIZE(gddr_tab2) -1; i >= 0; i--) {
+    for(f = gddr_tab2[i].min_clk;
+	f <= gddr_tab2[i].max_clk;
+	f += gddr_tab2[i].step_size) {
+      gfreq.supt[n] = 1000 * f;
+      gfreq.slen = ++n;
+      if (n == MR_PTAB_LEN)
+	return;
+    }
+  }
+}
+
+/*
+ * We can only list 64 values in this list, but on
+ * a VRM12 device there is 256 values to chose from.
+ * For now we'll list values from 0.7 to 1.3 volt
+ * in 10 mV increments (61 values).
+ */
+
+#define VRM_MIN	 600000
+#define VRM_MAX 1300000
+#define VRM_RES   10000
+
+static void __init
+mr_mk_cv_lst(void)
+{
+  int		n;
+  uint32_t      cv;
+
+  /*
+   * If PM module interface is in place, then the
+   * core voltage list may already be populated.
+   */
+  if (volt.supt[0] && volt.slen)
+    return;
+
+  n = 0;
+  for(cv = VRM_MIN; cv <= VRM_MAX; cv += VRM_RES) {
+    volt.supt[n] = cv;
+    volt.slen = ++n;
+    if (n >= MR_PTAB_LEN)
+      return;
+  }
+}
+
+
+void __init
+mr_mt_card_init(void)
+{
+  uint32_t	scr7, scr9, cf;
+  uint32_t	smc, ci;
+  int		rtn;
+#ifndef MIC_IS_EMULATION
+  uint8_t     * parm;
+#endif
+#if ! USE_SMC
+  uint32_t	gv;
+#endif
+#if USE_SVID
+  int		svid;
+  uint8_t	vr;
+#else
+#if ! USE_SMC
+  uint32_t	cv;
+#endif
+#endif
+#if USE_PM
+  int	     (* fnc)(void);
+#endif
+
+  /*
+   * Make CPU->phys ID translation table
+   */
+  get_cpu_table();
+
+  /*
+   * Build numbers for fboot0 and fboot 1 repectively
+   */
+  scr7 = mr_sbox_rl(0, SBOX_SCRATCH7);
+
+  /*
+   * VERS:
+   * Map flash and look for version strings.
+   */
+#ifdef MIC_IS_EMULATION
+  vers.fboot1[0] = scnprintf(vers.fboot1 + 1, MR_VERS_LEN -2,
+    		"No emulation flash version string (build %d)",
+			GET_BITS(31, 16, scr7));
+#else
+  parm = ioremap(KNC_FLASH_BASE, KNC_FLASH_SIZE);
+  if (!parm) {
+    printk("mr_mt_card_init: ioremap failure: parm %x\n", KNC_FLASH_BASE);
+    goto fail_iomap;
+  }
+
+  /*
+   * The fboot0 version (hardwired in the chip) is placed in flash
+   * by bootstrap at a fixed location, and is less than 16 byte long.
+   */
+  if (strnlen(parm + KNC_FLASH_BOOT1, 16) < 16)
+    vers.fboot1[0] = scnprintf(vers.fboot1 + 1, MR_VERS_LEN -2,
+    		"fboot1 version: %s (build %d)",
+		parm + KNC_FLASH_BOOT1, GET_BITS(31, 16, scr7));
+  else
+    vers.fboot1[0] =scnprintf(vers.fboot1 + 1, MR_VERS_LEN -2,
+    		"No valid version string found");
+  iounmap(parm);
+
+  /*
+   * While at it, check if there is a MC filter list in flash
+   */
+  parm = ioremap(KNC_FLASH_TAB, KNC_FLASH_SIZE);
+  if (!parm) {
+    printk("mr_mt_card_init: ioremap failure: parm %x\n", KNC_FLASH_TAB);
+    goto fail_iomap;
+  }
+  mcc_flt_parm(parm + KNC_FLASH_FILT);
+  iounmap(parm);
+
+fail_iomap:
+#endif
+
+  /*
+   * Retrieve ID details from the SMC
+   *   UUID, 16 byte
+   *   serial, 12 byte
+   *   FW version,
+   *	15:0	Build number
+   *    23:16	Minor version
+   *	31:24	Major version
+   * Note: Ancient systems, like Berta, runs on cards with an older
+   *       version on the SMC firmware that does not support serial.
+   */
+  mr_smc_deglitch();
+  rtn = gmbus_i2c_read(2, MR_SMC_ADDR, MR_SMC_UUID, hwinf.guid, 16);
+#if SMC_TRACK
+  printk("%s: %2x -> %08x, rtn %d\n",  __FUNCTION__, MR_SMC_UUID, *(uint32_t *) hwinf.guid, rtn);
+#endif
+  if (rtn != 16)
+    memset(hwinf.guid, '\0', 16);
+  mr_smc_deglitch();
+  rtn = gmbus_i2c_read(2, MR_SMC_ADDR, MR_SMC_SERIAL, hwinf.serial, 12);
+#if SMC_TRACK
+  printk("%s: %2x -> %08x, rtn %d\n",  __FUNCTION__, MR_SMC_SERIAL, *(uint32_t *) hwinf.serial, rtn);
+#endif
+  if (rtn != 12)
+    memcpy(hwinf.serial, "Update_SMC!!", sizeof(hwinf.serial));
+  if (! mr_smc_rd(MR_SMC_FW_VERSION, &smc))
+    vers.fsc[0] = scnprintf(vers.fsc + 1, MR_VERS_LEN -2,
+  		"SMC firmware rev. %d.%d (build %d)",
+		  GET_BITS(31, 24, smc),
+		  GET_BITS(23, 16, smc),
+		  GET_BITS(15,  0, smc));
+
+  /*
+   * HWINF:
+   * Get processor details from SBOX componentID.
+   *   19:16	Model ID => aka revision
+   *   15:12	Stepping ID => stepping
+   *   11:8	Substepping ID => substep
+   *
+   * Get Card Revision details from the SMC.
+   *   17:16	board (0=MPI, CRB, SFF, Product)
+   *   10:8	fab version (0='A' .. 7='H')
+   *    2:0	PBA SKU # (need name table here?)
+   */
+  ci = mr_sbox_rl(0, SBOX_COMPONENT_ID);
+  hwinf.rev     = GET_BITS(19, 16, ci);
+  hwinf.step    = GET_BITS(15, 12, ci);
+  hwinf.substep = GET_BITS(11,  8, ci);
+  if (! mr_smc_rd(MR_SMC_HW_REVISION, &smc)) {
+    hwinf.board = GET_BITS(17, 16, smc);
+    hwinf.fab   = GET_BITS(10,  8, smc);
+    hwinf.sku   = GET_BITS( 2,  0, smc);
+  }
+  
+  /*
+   * VOLT:
+   * By definition, reference voltage is 1st value seen.
+   * Order of preference is SVID, then SMC and lastly SBOX.
+   * SMC register bits 15:0 is voltage in mV.
+   * SBOX_COREVOLT should be in SVID voltage format.
+   */
+#if USE_SVID
+  svid = SvidCmd(SVID_VCCP, VR12Cmd_GetReg, VR12Reg_VID_Set);
+  if (svid >= 0)
+    volt.set = svid2volt(svid);
+#else
+#if USE_SMC
+  if (!mr_smc_rd(MR_SMC_VOLT_VCCP, &smc) && GET_BITS(31, 30, smc) != 0x3)
+    volt.set = GET_BITS(15, 0, smc) * 1000;
+#else
+  cv = mr_sbox_rl(0, SBOX_COREVOLT);
+  volt.set = svid2volt(GET_BITS(7, 0, cv));
+#endif
+#endif
+  mr_mk_cv_lst();
+
+  /*
+   * FREQ
+   * By definition, reference frequency is 1st value seen.
+   */
+  cf = mr_sbox_rl(0, SBOX_COREFREQ);
+  freq.def = mr_mt_cf_r2f(GET_BITS(11, 0, cf));
+  mr_mk_cf_lst();
+
+  /*
+   * GDDR:
+   * See layout of scratch #9 in 'common'.
+   * 26:16	Clock ratio encoding
+   *    27	ClamShell
+   */
+  scr9 = mr_sbox_rl(0, SBOX_SCRATCH9);
+  gddr.speed = 2 * mr_mt_gf_r2f(GET_BITS(26, 16, scr9));
+
+  /*
+   * GVOLT:
+   * Report all values the hardware can set, kind
+   * of silly as these cannot be changed from uOS.
+   * Order of preference is SVID, then SMC and lastly SBOX.
+   * SMC register bits 15:0 is voltage in mV.
+   *
+   *TBD: Seriously suspect SBOX register to be wrong.
+   */
+#if USE_SVID
+  svid = SvidCmd(SVID_VDDQ, VR12Cmd_GetReg, VR12Reg_VID_Set);
+  if (svid >= 0)
+    gvolt.set = svid2volt(svid);
+#else
+#if USE_SMC
+  if (!mr_smc_rd(MR_SMC_VOLT_VDDQ, &smc) && GET_BITS(31, 30, smc) != 0x3)
+    gvolt.set = GET_BITS(15, 0, smc) * 1000;
+#else
+  gv = mr_sbox_rl(0, SBOX_MEMVOLT);
+  gvolt.set = svid2volt(GET_BITS(7, 0, gv));
+#endif
+#endif
+ 
+  /*
+   * GFREQ:
+   * Report all values the hardware can set, kind
+   * of silly as these cannot be changed from uOS.
+   */
+  gfreq.def = mr_mt_gf_r2f(GET_BITS(26, 16, scr9));
+  mr_mk_gf_lst();
+
+  /*
+   * PWR:
+   * If we are going to use SVID registers we'd need
+   * to know the VRs capabilities and ICC_MAX setting.
+   */
+#if USE_SVID
+  vr = SvidCmd(SVID_VCCP, VR12Cmd_GetReg, VR12Reg_Capability);
+  if (vr >= 0)
+    vccp_cap = vr;
+  vr = SvidCmd(SVID_VDDQ, VR12Cmd_GetReg, VR12Reg_Capability);
+  if (vr >= 0)
+    vddq_cap = vr;
+  vr = SvidCmd(SVID_VDDG, VR12Cmd_GetReg, VR12Reg_Capability);
+  if (vr >= 0)
+    vddg_cap = vr;
+  vr = SvidCmd(SVID_VCCP, VR12Cmd_GetReg, VR12Reg_Icc_Max);
+  if (vr >= 0)
+    vccp_imax = vr;
+  vr = SvidCmd(SVID_VDDQ, VR12Cmd_GetReg, VR12Reg_Icc_Max);
+  if (vr >= 0)
+    vddq_imax = vr;
+  vr = SvidCmd(SVID_VDDG, VR12Cmd_GetReg, VR12Reg_Icc_Max);
+  if (vr >= 0)
+    vddg_imax = vr;
+#endif
+
+  /*
+   * ECC:
+   *
+   *TBD: Where to find ECC setting?
+   *     There are several GBOX registers that has something
+   *	 named ECC in them. Scott to tell once PO is done.
+   */
+  ecc.enable = GET_BIT(29, scr9);
+
+  /*
+   * TRBO
+   * The PM module have the inital turbo mode setting.
+   * Get it now, so we don't need to call PM to report it.
+   */
+#if USE_PM
+  fnc = pm_cb.micpm_get_turbo;
+  if (fnc)
+    trbo.set = fnc();
+#endif
+
+  /*
+   *TBD: Save registers this module may change
+   */
+}
+
+void __exit
+mr_mt_card_exit(void)
+{
+  /*
+   *TBD: Restore registers this module may change
+   */
+}
+
+
+
+/*
+**
+** Card specific 'Get' functions
+**
+*/
+
+int
+mr_get_volt(void * p)
+{
+  struct mr_rsp_volt  * r;
+#if USE_PM
+  void		     (* fnc)(void);
+#endif
+
+  /*
+   * Preference is VR out.
+   * Not sure if board sensors work in KnC
+   */
+#if USE_SVID
+  {
+    int			vout;	
+
+    vout = SvidCmd(SVID_VCCP, VR12Cmd_GetReg, VR12Reg_VID_Set);
+    if (vout < 0)
+      return vout;
+    volt.set = svid2volt(vout);
+
+    vout = SvidCmd(SVID_VCCP, VR12Cmd_GetReg, VR12Reg_Vout);
+    if (vout < 0)
+      return vout;
+    volt.cur = vout2volt(vout);
+  }
+#else
+#if USE_SMC
+  {
+    uint32_t	smc;
+
+    volt.cur = 0;
+    volt.c_val = 3;
+    if (! mr_smc_rd(MR_SMC_VOLT_VCCP, &smc)) {
+      volt.c_val = GET_BITS(31, 30, smc);
+      if (volt.c_val != 0x3)
+        volt.cur = GET_BITS(15, 0, smc) * 1000;
+    }
+
+    /*
+     *TBD: override 'set' value ?
+     */
+  }
+#else
+  {
+    uint32_t		fsc, cv;
+
+    cv = mr_sbox_rl(0, SBOX_COREVOLT);
+    volt.set = svid2volt(GET_BITS(7, 0, cv));
+
+    fsc = mr_sbox_rl(0, SBOX_BOARD_VOLTAGE_SENSE);
+    volt.cur = bvs2volt(GET_BITS(15, 0, fsc));
+  }
+#endif
+#endif
+
+#if USE_PM
+  /*
+   * Ask PM for table refresh
+   */
+  fnc = pm_cb.micpm_vf_refresh;
+  if (fnc)
+    fnc();
+#endif
+
+  r = (struct mr_rsp_volt *) p;
+  *r = volt;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_freq(void * p)
+{
+  struct mr_rsp_freq  * r;
+  uint32_t		cf, cr;
+#if USE_PM
+  void		     (* fnc)(void);
+#endif
+
+  /*
+   * Current Ratio:
+   *  11:0	Current core ratio
+   *     15	Enable 600 MHz
+   *  27:16	Goal ratio
+   *     31	OC disable
+   * Goal ratio is a product of base ratio and fuse overrides
+   * Current ration is a product of goal, fuse limits and themal throttle
+   *
+   * Core Frequency:
+   *  11:0	Base ratio
+   *	 15	Fuse override
+   *     31	Select ratio
+   * Base ratio accepted only if (bit 15 | bit 31 | OC disble) == 010
+   *
+   *TBD: How to detect clock bypasses?
+   *     ICC bypass cuts the core and reference base in half.
+   */
+  cr = mr_sbox_rl(0, SBOX_CURRENTRATIO);
+  cf = mr_sbox_rl(0, SBOX_COREFREQ);
+  freq.cur = mr_mt_cf_r2f(GET_BITS(11, 0, cr));
+  freq.def = mr_mt_cf_r2f(GET_BITS(11, 0, cf));
+  if (GET_BITS(11,  0, cf) != GET_BITS(11,  0, cr))
+    printk("RAS.get_freq: core not running at expected frequency\n");
+
+#if USE_PM
+  /*
+   * Ask PM for table refresh
+   */
+  fnc = pm_cb.micpm_vf_refresh;
+  if (fnc)
+    fnc();
+#endif
+
+  r = (struct mr_rsp_freq *) p;
+  *r = freq;
+  return sizeof(*r);
+}
+
+
+#if USE_SVID
+int
+mr_get_svid(uint8_t vr, uint8_t cap, uint8_t imax, struct mr_rsp_vrr * vrr)
+{
+  int			v, a, p;
+
+  p = SvidCmd(vr, VR12Cmd_GetReg, VR12Reg_Pout);
+  a = SvidCmd(vr, VR12Cmd_GetReg, VR12Reg_Iout);
+  v = SvidCmd(vr, VR12Cmd_GetReg, VR12Reg_Vout);
+
+  if (p < 0 || a < 0 || v < 0)
+    return -MR_ERR_SMC;
+
+  vrr->pwr  = pout2watt(p);
+  vrr->cur  = iout2amp(a, cap, imax);
+  vrr->volt = vout2volt(v);
+
+  return 0;
+}
+#endif
+
+#define KNC_DFF_BOARD   2       /*  DFF/SFF board */
+
+int
+mr_get_power(void * p)
+{
+  struct mr_rsp_power * r;
+#if USE_SMC
+  static struct mr_rsp_vrr vnil = { 0, 0, 0, 3, 3, 3 };
+  static struct mr_rsp_pws pnil = { 0, 3 };
+  uint32_t		vccp, vddg, vddq;
+  uint32_t		prd0, prd1, pcie, p2x3, p2x4;
+#endif
+
+#if USE_SVID
+  /*
+   * Get VR status over SVID.
+   */
+  if (mr_get_svid(SVID_VCCP, vccp_cap, vccp_imax, &power.vccp) < 0 ||
+      mr_get_svid(SVID_VDDQ, vddq_cap, vddq_imax, &power.vddq) < 0 ||
+      mr_get_svid(SVID_VDDG, vddg_cap, vddg_imax, &power.vddq) < 0)
+    return -MR_ERR_SMC;
+#else
+#if USE_SMC
+  /*
+   * Get VR status from SMC.
+   * Only voltages are available currently.
+   * Still need to screen for good data.
+   * Top 2 bits decode as
+   *  00	Data OK
+   *  01	Upper threshold reached
+   *  10	Lower threshold reached
+   *  11	Data unavailable
+   * Assume data is valid even if a threshold reached
+   */
+  power.vccp = power.vddg = power.vddq = vnil;
+  if (! mr_smc_rd(MR_SMC_VOLT_VCCP, &vccp)) {
+    power.vccp.v_val = GET_BITS(31, 30, vccp);
+    if (power.vccp.v_val != 0x3)
+      power.vccp.volt = 1000 * GET_BITS(15, 0, vccp);
+  }
+  if (! mr_smc_rd(MR_SMC_VOLT_VDDG, &vddg)) {
+    power.vddg.v_val = GET_BITS(31, 30, vddg);
+    if (power.vddg.v_val != 0x3)
+      power.vddg.volt = 1000 * GET_BITS(15, 0, vddg);
+  }
+  if (! mr_smc_rd(MR_SMC_VOLT_VDDQ, &vddq)) {
+    power.vddq.v_val = GET_BITS(31, 30, vddq);
+    if (power.vddq.v_val != 0x3)
+      power.vddq.volt = 1000 * GET_BITS(15, 0, vddq);
+  }
+  if (! mr_smc_rd(MR_SMC_PWR_VCCP, &vccp)) {
+    power.vccp.p_val = GET_BITS(31, 30, vccp);
+    if (power.vccp.p_val != 0x3)
+      power.vccp.pwr = 1000000 * GET_BITS(15, 0, vccp);
+  }
+  if (! mr_smc_rd(MR_SMC_PWR_VDDG, &vddg)) {
+    power.vddg.p_val = GET_BITS(31, 30, vddg);
+    if (power.vddg.p_val != 0x3)
+      power.vddg.pwr = 1000000 * GET_BITS(15, 0, vddg);
+  }
+  if (! mr_smc_rd(MR_SMC_PWR_VDDQ, &vddq)) {
+    power.vddq.p_val = GET_BITS(31, 30, vddq);
+    if (power.vddq.p_val != 0x3)
+      power.vddq.pwr = 1000000 * GET_BITS(15, 0, vddq);
+  }
+#endif
+#endif
+     
+#if USE_SMC
+  /*
+   * Get reads on VRs and power sensors from SMC.
+   * This is a mess:
+   *  - total power may or may not include 3.3 V rail.
+   *    If it is then it's not measured, just "guessed".
+   *  - there are two averaging windows for total power,
+   *	though it is not clear who controls these windows.
+   *    For now we assume window 0 is shorter than window 1
+   *    and thus power 0 is 'current' reading and power 1
+   *    is the '20 sec' reading.
+   *    TBD: Who controls the time windows and is is true
+   *         that Window 0 is shorter than Window 1?
+   *  - No specifics on how power sensors are averaged,
+   *    i.e. is Window 0/1 used or is is a third window.
+   *    Need to know, otherwise Ptot may not be sum(sources).
+   *  - There still is no 'max' value from SMC
+   *
+   * Still need to screen for good data.
+   * Top 2 bits decode as
+   *  00	Data OK
+   *  01	Upper threshold reached
+   *  10	Lower threshold reached
+   *  11	Data unavailable
+   * Assume data is valid even if a threshold reached
+   */
+  power.tot0 = power.tot1 =
+  power.inst = power.imax =
+  power.pcie = power.c2x3 = power.c2x4 = pnil;
+
+  if (! mr_smc_rd(MR_SMC_AVG_PWR_0, &prd0)) {
+    power.tot0.p_val = GET_BITS(31, 30, prd0);
+    if (power.tot0.p_val != 0x3)
+      power.tot0.prr = 1000000 * GET_BITS(29, 0, prd0);
+  }
+  if (! mr_smc_rd(MR_SMC_AVG_PWR_1, &prd1)) {
+    power.tot1.p_val = GET_BITS(31, 30, prd1);
+    if (power.tot1.p_val != 0x3)
+      power.tot1.prr = 1000000 * GET_BITS(29, 0, prd1);
+  }
+  power.inst = power.imax = pnil;
+  if (! mr_smc_rd(MR_SMC_PWR_INST, &prd0)) {
+    power.inst.p_val = GET_BITS(31, 30, prd0);
+    if (power.inst.p_val != 0x3)
+      power.inst.prr = 1000000 * GET_BITS(29, 0, prd0);
+  }
+  if (! mr_smc_rd(MR_SMC_PWR_IMAX, &prd1)) {
+    power.imax.p_val = GET_BITS(31, 30, prd1);
+    if (power.imax.p_val != 0x3)
+      power.imax.prr = 1000000 * GET_BITS(29, 0, prd1);
+  }
+  if (! mr_smc_rd(MR_SMC_PWR_PCIE, &pcie)) {
+    power.pcie.p_val = GET_BITS(31, 30, pcie);
+    if (power.pcie.p_val != 0x3)
+      power.pcie.prr  = 1000000 * GET_BITS(15, 0, pcie);
+  }
+  if (hwinf.board != KNC_DFF_BOARD) {
+    if (! mr_smc_rd(MR_SMC_PWR_2X3,  &p2x3)) {
+      power.c2x3.p_val = GET_BITS(31, 30, p2x3);
+      if (power.c2x3.p_val != 0x3)
+        power.c2x3.prr  = 1000000 * GET_BITS(15, 0, p2x3);
+    }
+    if (! mr_smc_rd(MR_SMC_PWR_2X4,  &p2x4)) {
+      power.c2x4.p_val = GET_BITS(31, 30, p2x4);
+      if (power.c2x4.p_val != 0x3)
+        power.c2x4.prr  = 1000000 * GET_BITS(15, 0, p2x4);
+    }
+  }
+#endif
+
+  r = (struct mr_rsp_power *) p;
+  *r = power;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_plim(void * p)
+{
+  uint32_t	pl0, pl1, grd;
+  struct mr_rsp_plim  * r;
+
+  /*
+   * Get values from PM
+   */
+  if (! mr_smc_rd(MR_SMC_PWR_LIM_0, &pl0))
+    plim.hmrk = GET_BITS(15, 0, pl0);
+
+  if (! mr_smc_rd(MR_SMC_PWR_LIM_1, &pl1))
+    plim.lmrk = GET_BITS(15, 0, pl1);
+
+  if (! mr_smc_rd(MR_SMC_PWR_LIM0_GRD, &grd))
+    plim.phys = plim.hmrk + GET_BITS(15, 0, grd);
+
+  r = (struct mr_rsp_plim *) p;
+  *r = plim;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_gfreq(void * p)
+{
+  struct mr_rsp_gfreq * r;
+  uint32_t              gbr;
+
+  /*
+   * SBOX register MEMFREQ bits 7:0 now holds 10 x rate in GTps.
+   */
+  gbr = mr_sbox_rl(0, SBOX_MEMORYFREQ);
+  gfreq.cur = GET_BITS(7, 0, gbr) * 100000 / 2;
+
+  r = (struct mr_rsp_gfreq *) p;
+  *r = gfreq;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_gvolt(void * p)
+{
+  struct mr_rsp_gvolt * r;
+
+  /*
+   * Preference is VR out.
+   * Not sure if board sensors work in KnC
+   */
+#if USE_SVID
+  {
+    int			vout;	
+
+    vout = SvidCmd(SVID_VDDQ, VR12Cmd_GetReg, VR12Reg_VID_Set);
+    if (vout < 0)
+      return vout;
+    gvolt.set = svid2volt(vout);
+
+    vout = SvidCmd(SVID_VDDQ, VR12Cmd_GetReg, VR12Reg_Vout);
+    if (vout < 0)
+      return vout;
+    gvolt.cur = vout2volt(vout);
+  }
+#else
+#if USE_SMC
+  {
+    uint32_t	smc;
+
+    gvolt.cur = 0;
+    gvolt.c_val = 3;
+    if (! mr_smc_rd(MR_SMC_VOLT_VDDQ, &smc)) {
+      gvolt.c_val = GET_BITS(31, 30, smc);
+      if (gvolt.c_val != 0x3)
+        gvolt.cur = GET_BITS(15, 0, smc) * 1000;
+    }
+    if (!gvolt.set)
+      gvolt.set = gvolt.cur;
+  }
+#else
+  {
+    uint32_t		bvs;
+
+    bvs = mr_sbox_rl(0, SBOX_BOARD_VOLTAGE_SENSE);
+    gvolt.cur = bvs2volt(GET_BITS(31, 16, bvs));
+  }
+#endif
+#endif
+
+  r = (struct mr_rsp_gvolt *) p;
+  *r = gvolt;
+  return sizeof(*r);
+}
+
+
+/*
+ * Card has 3 dedicated temp sensors (read from SMC):
+ *   0	Air Inlet (aka West)
+ *   1	Air exhaust (aka East)
+ *   2	GDDR memory (not sure which chip)
+ *
+ * VRs can measure temperature too, which may be read
+ * from SMC (via I2C bus) or the VRs directly (via SVID).
+ *   3	Vccp VR (IR3538) temp
+ *   4	Vddq VR (IR3541, loop 1) temp
+ *   5	Vddg VR (IR3541, loop 2) temp
+ * Note: Vddg and Vddq are measured on the same VR,
+ * likely will be the same reading (or very close).
+ *
+ * SBOX board temperature sensors are not connected
+ * in KnC (SBOX HAS vol 1, section 1.40.1). Instead it
+ * relies on SMC to 'broadcast' sensor telemetry into
+ * the KnC's TMU unit via it's I2C bus.
+ * Currently it doesn't, though a DCR has been filed.
+ */
+
+int
+mr_get_temp(void * p)
+{
+  struct mr_rsp_temp  * r;
+  uint32_t		die1, die2, die3;	/* Die temps */
+  uint32_t		dmx1, dmx2, dmx3;	/* Max die temps */
+#if USE_SVID
+  int			tvccp, tvddq, tvddg;	/* VR temps */
+#endif
+#if USE_SMC
+  static struct mr_rsp_tsns tnil = { 0, 3 };
+#endif
+
+#if USE_SVID
+  /*
+   * Get VR temperatures over SVID.
+   * These are _all_ positive numbers.
+   */
+  tvccp = SvidCmd(SVID_VCCP, VR12Cmd_GetReg, VR12Reg_Temp);
+  tvddq = SvidCmd(SVID_VDDQ, VR12Cmd_GetReg, VR12Reg_Temp);
+  tvddg = SvidCmd(SVID_VDDG, VR12Cmd_GetReg, VR12Reg_Temp);
+  if (tvccp < 0 || tvddq < 0 || tvddg < 0)
+    return -MR_ERR_SMC;
+  temp.vccp.cur = GET_BITS(7, 0, tvccp);
+  temp.vddq.cur = GET_BITS(7, 0, tvddq);
+  temp.vddg.cur = GET_BITS(7, 0, tvddg);
+#endif
+
+#if USE_SMC
+  /*
+   * Get temp sensor readings from SMC.
+   * According to MAS 0.30 it presents
+   *  - CPU die temp (just one value)
+   *  - Fan exhaust temp
+   *  - Fan inlet temp
+   *  - Vccp VR temp
+   *  - Vddg VR temp
+   *  - Vddq VR temp
+   *  - GDDR temp
+   *
+   * Still need to screen for good data.
+   * Top 2 bits decode as
+   *  00	Data OK
+   *  01	Upper threshold reached
+   *  10	Lower threshold reached
+   *  11	Data unavailable
+   * Assume data is valid even if a threshold reached
+   */
+  {
+    uint32_t		fin, fout, gddr;	/* Sensor temps */
+    uint32_t		vccp, vddg, vddq;	/* VR temps */
+    uint32_t		die;			/* Die summary */
+
+    temp.die = temp.fin = temp.fout =
+    temp.vccp = temp.vddg = temp.vddq = tnil;
+    if (! mr_smc_rd(MR_SMC_TEMP_CPU, &die)) {
+      temp.die.c_val = GET_BITS(31, 30, die);
+      if (temp.die.c_val != 0x3)
+	temp.die.cur  = GET_BITS(15, 0, die);
+    }
+    if (! mr_smc_rd(MR_SMC_TEMP_EXHAUST, &fout)) {
+      temp.fout.c_val = GET_BITS(31, 30, fout);
+      if (temp.fout.c_val != 0x3)
+        temp.fout.cur = GET_BITS(15, 0, fout);
+    }
+    if (! mr_smc_rd(MR_SMC_TEMP_INLET, &fin)) {
+      temp.fin.c_val = GET_BITS(31, 30, fin);
+      if (temp.fin.c_val != 0x3)
+        temp.fin.cur  = GET_BITS(15, 0, fin);
+    }
+    if (! mr_smc_rd(MR_SMC_TEMP_VCCP, &vccp)) {
+      temp.vccp.c_val = GET_BITS(31, 30, vccp);
+      if (temp.vccp.c_val != 0x3)
+        temp.vccp.cur = GET_BITS(15, 0, vccp);
+    }
+    if (! mr_smc_rd(MR_SMC_TEMP_VDDG, &vddg)) {
+      temp.vddg.c_val = GET_BITS(31, 30, vddg);
+      if (temp.vddg.c_val != 0x3)
+        temp.vddg.cur = GET_BITS(15, 0, vddg);
+    }
+    if (! mr_smc_rd(MR_SMC_TEMP_VDDQ, &vddq)) {
+      temp.vddq.c_val = GET_BITS(31, 30, vddq);
+      if (temp.vddq.c_val != 0x3)
+        temp.vddq.cur = GET_BITS(15, 0, vddq);
+    }
+    if (! mr_smc_rd(MR_SMC_TEMP_GDDR, &gddr)) {
+      temp.gddr.c_val = GET_BITS(31, 30, gddr);
+      if (temp.gddr.c_val != 0x3)
+        temp.gddr.cur = GET_BITS(15, 0, gddr);
+    }
+  }
+#else
+  /*
+   * The TMU registers relies on telemetry broadcasts from
+   * the SMC in order to report current data, early SMC
+   * firmware does not provide telemetry at all.
+   * Mapping of 'board temps' to physical sensors isn't
+   * really defined anywhere. Based on FreeBSD comments
+   * they map is:
+   *   0	Air Inlet
+   *   1	VCCP VR
+   *   2	GDDR (not sure which chip)
+   *   3	GDDR VR
+   *
+   *TBD: verify map on actual CRB
+   */
+  {
+    uint32_t		btr1, btr2;		/* Board temps */
+    uint32_t		tsta;			/* Thermal status */
+    uint32_t		fsc;			/* Fan controller status */
+
+    fsc  = mr_sbox_rl(0, SBOX_STATUS_FAN2);
+    btr1 = mr_sbox_rl(0, SBOX_BOARD_TEMP1);
+    btr2 = mr_sbox_rl(0, SBOX_BOARD_TEMP2);
+    tsta = mr_sbox_rl(0, SBOX_THERMAL_STATUS);
+    temp.fin.cur  = (btr1 & (1 << 15)) ? GET_BITS( 8,  0, btr1) : 0;
+    temp.vccp.cur = (btr1 & (1 << 31)) ? GET_BITS(24, 16, btr1) : 0;
+    temp.gddr.cur = (btr2 & (1 << 15)) ? GET_BITS( 8,  0, btr2) : 0;
+    temp.vddq.cur = (btr2 & (1 << 31)) ? GET_BITS(24, 16, btr2) : 0;
+    temp.vddg.cur = GET_BITS(19, 12, fsc);
+    temp.brd.cur = 0;
+    if (temp.fin.cur > temp.brd.cur)
+      temp.brd.cur = temp.fin.cur;
+    if (temp.vccp.cur > temp.brd.cur)
+      temp.brd.cur = temp.vccp.cur;
+    if (temp.gddr.cur > temp.brd.cur)
+      temp.brd.cur = temp.gddr.cur;
+    if (temp.vddq.cur > temp.brd.cur)
+      temp.brd.cur = temp.vddq.cur;
+    if (tsta & (1 << 31))
+      temp.die.cur = GET_BITS(30, 22, tsta);
+  }
+#endif
+
+  /*
+   * Raw SBOX data for die temperatures.
+   *
+   *TBD: do these depend on SMC telemetry?
+   *     If so they probably won't work until DCR in place.
+   */
+  die1 = mr_sbox_rl(0, SBOX_CURRENT_DIE_TEMP0);
+  die2 = mr_sbox_rl(0, SBOX_CURRENT_DIE_TEMP1);
+  die3 = mr_sbox_rl(0, SBOX_CURRENT_DIE_TEMP2);
+  dmx1 = mr_sbox_rl(0, SBOX_MAX_DIE_TEMP0);
+  dmx2 = mr_sbox_rl(0, SBOX_MAX_DIE_TEMP1);
+  dmx3 = mr_sbox_rl(0, SBOX_MAX_DIE_TEMP2);
+
+  /*
+   * Die temperatures.
+   * Always positive numbers (or zero for unfused parts)
+   */
+  temp.dies[0].cur = GET_BITS( 9,  0, die1);
+  temp.dies[1].cur = GET_BITS(19, 10, die1);
+  temp.dies[2].cur = GET_BITS(29, 20, die1);
+  temp.dies[3].cur = GET_BITS( 9,  0, die2);
+  temp.dies[4].cur = GET_BITS(19, 10, die2);
+  temp.dies[5].cur = GET_BITS(29, 20, die2);
+  temp.dies[6].cur = GET_BITS( 9,  0, die3);
+  temp.dies[7].cur = GET_BITS(19, 10, die3);
+  temp.dies[8].cur = GET_BITS(29, 20, die3);
+
+  /*
+   * Die max temp (probably 0 for unfused parts)
+   */
+  temp.dies[0].max = GET_BITS( 9,  0, dmx1);
+  temp.dies[1].max = GET_BITS(19, 10, dmx1);
+  temp.dies[2].max = GET_BITS(29, 20, dmx1);
+  temp.dies[3].max = GET_BITS( 9,  0, dmx2);
+  temp.dies[4].max = GET_BITS(19, 10, dmx2);
+  temp.dies[5].max = GET_BITS(29, 20, dmx2);
+  temp.dies[6].max = GET_BITS( 9,  0, dmx3);
+  temp.dies[7].max = GET_BITS(19, 10, dmx3);
+  temp.dies[8].max = GET_BITS(29, 20, dmx3);
+
+  r = (struct mr_rsp_temp *) p;
+  *r = temp;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_fan(void * p)
+{
+  struct mr_rsp_fan   * r;
+  uint32_t		fs, fp;
+#if USE_SMC
+  uint32_t		fa;
+#endif
+
+  r = (struct mr_rsp_fan *) p;
+
+  /*
+   * Preference is SMC data.
+   * Not sure if SBOX registers work sensors work in KnC
+   */
+#if USE_SMC
+  /*
+   * Read fan state from SMC.
+   * No info on override available.
+   */
+  r->override = 0;
+  r->r_val = r->p_val = 3;
+  if (mr_smc_rd(MR_SMC_FAN_TACH, &fs))
+    fs = PUT_BITS(31, 30, 3);
+  if (mr_smc_rd(MR_SMC_FAN_PWM, &fp))
+    fp = PUT_BITS(31, 30, 3);
+  if (mr_smc_rd(MR_SMC_FAN_PWM_ADD, &fa))
+    fa = PUT_BITS(31, 30, 3);
+ 
+  /*
+   * Still need to screen for good data.
+   * Top 2 bits decode as
+   *  00	Data OK
+   *  01	Reserved
+   *  10	Lower threshold reached (or reserved)
+   *  11	Data unavailable
+   * Assume data is still valid if a threshold reached
+   */
+  if (GET_BITS(31, 30, fs) != 0x3) {
+    /*
+     * The override concept from KnF (and SBOX registers)
+     * seems to have been replaced with a PWM adder.
+     * Propose to set override flag if adder is non-zero.
+     */
+    r->r_val = 0;
+    r->rpm = GET_BITS(15,  0, fs);
+    if (GET_BITS(31, 30, fp) != 0x3) {
+      r->p_val = 0;
+      r->pwm = GET_BITS(7, 0, fp);
+      if (GET_BITS(31, 30, fa) != 0x3) {
+	fa = GET_BITS(7, 0, fa);
+	if (fa) {
+	  r->override = 1;
+	  r->pwm += fa;
+	  if (r->pwm > 100)
+	    r->pwm = 100;
+	}
+      }
+    }
+  }
+#else
+  /*
+   * Read fan state from SBOX registers
+   * Require SMC telemetry to work.
+   */
+  fs = mr_sbox_rl(0, SBOX_STATUS_FAN1);
+  fp = mr_sbox_rl(0, SBOX_SPEED_OVERRIDE_FAN);
+
+  r->override = GET_BIT(15, fp);
+  r->rpm      = GET_BITS(15,  0, fs);
+  if (r->override)
+    r->pwm    = GET_BITS( 7,  0, fp);
+  else
+    r->pwm    = GET_BITS(23, 16, fs);
+#endif
+
+  return sizeof(*r);
+}
+
+
+int
+mr_get_ecc(void * p)
+{
+  struct mr_rsp_ecc   * r;
+
+  r = (struct mr_rsp_ecc *) p;
+  *r = ecc;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_trbo(void * p)
+{
+  struct mr_rsp_trbo  * r;
+
+  /*
+   * Get current value from PM
+   */
+#if USE_PM
+  int		     (* fnc)(void);
+
+  fnc = pm_cb.micpm_get_turbo;
+  if (fnc) {
+    uint32_t pm;
+
+    pm = fnc();
+    trbo.state = GET_BIT(1, pm);
+    trbo.avail = GET_BIT(2, pm);
+    if (! trbo.avail)
+      trbo.set = 0;
+  }
+#endif
+
+  r = (struct mr_rsp_trbo *) p;
+  *r = trbo;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_pmcfg(void * p)
+{
+  struct mr_rsp_pmcfg * r;
+
+#if USE_PM
+  int		     (* fnc)(void);
+
+  fnc = pm_cb.micpm_get_pmcfg;
+  if (fnc)
+    pmcfg.mode = fnc();
+#endif
+
+  r = (struct mr_rsp_pmcfg *) p;
+  *r = pmcfg;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_led(void * p)
+{
+  struct mr_rsp_led   * r;
+  uint32_t		led;
+
+  if (mr_smc_rd(MR_SMC_LED_CODE, &led))
+    return -MR_ERR_SMC;
+
+  r = (struct mr_rsp_led *) p;
+  r->led = GET_BIT(0, led);
+  return sizeof(*r);
+}
+
+
+int
+mr_get_prochot(void * p)
+{
+  struct mr_rsp_ptrig * r;
+  uint32_t		pwr0;
+  uint32_t		time0;
+
+  if (mr_smc_rd(MR_SMC_PWR_LIM_0, &pwr0) ||
+      mr_smc_rd(MR_SMC_TIME_WIN_0, &time0))
+    return -MR_ERR_SMC;
+
+  r = (struct mr_rsp_ptrig *) p;
+  r->power = GET_BITS(15, 0, pwr0);
+  r->time = GET_BITS(15, 0, time0);
+  return sizeof(*r);
+}
+
+
+int
+mr_get_pwralt(void * p)
+{
+  struct mr_rsp_ptrig * r;
+  uint32_t		pwr1;
+  uint32_t		time1;
+
+  if (mr_smc_rd(MR_SMC_PWR_LIM_1, &pwr1) ||
+      mr_smc_rd(MR_SMC_TIME_WIN_1, &time1))
+    return -MR_ERR_SMC;
+
+  r = (struct mr_rsp_ptrig *) p;
+  r->power = GET_BITS(15, 0, pwr1);
+  r->time = GET_BITS(15, 0, time1);
+  return sizeof(*r);
+}
+
+
+int
+mr_get_perst(void * p)
+{
+  struct mr_rsp_perst * r;
+  uint32_t		perst;
+
+  if (mr_smc_rd(MR_SMC_PWR_LIM_PERS, &perst))
+    return -MR_ERR_SMC;
+
+  r = (struct mr_rsp_perst *) p;
+  r->perst = GET_BIT(0, perst);
+  return sizeof(*r);
+}
+
+
+int
+mr_get_ttl(void * p)
+{
+  struct mr_rsp_ttl   * r;
+
+  r = (struct mr_rsp_ttl *) p;
+
+#if USE_PM
+  mr_pm_ttl(r);
+#endif
+
+  return sizeof(*r);
+}
+
+
+/*
+**
+** Card specific 'Set' functions
+** Input screening takes place here (to the extent possible).
+**
+*/
+
+
+int
+mr_set_volt(void * p)
+{
+#if USE_SVID
+  uint32_t	err, val;
+  uint8_t	svid;
+
+  /*
+   * Ensure it's a supported value
+   * Which limits to use, physical or PM list?
+   */
+  val = *(uint32_t *) p;
+  svid = volt2svid(val);
+#if 1
+  {
+    if (!svid)
+      return -MR_ERR_RANGE;
+  }
+#else
+  {
+    int		i;
+
+    for(i = 0; i < MR_PTAB_LEN; i++)
+      if (volt.supt[i] == val)
+	break;
+    if (i == MR_PTAB_LEN)
+      return -MR_ERR_RANGE;
+    }
+#endif
+
+  /*
+   * Read-modify-write the core voltage VID register
+   */
+  err = SvidCmd(SVID_VCCP, VR12Cmd_SetVID_Slow, svid);
+  printk("SetVolt: %d -> %08x (err %08x)\n", val, svid, err);
+ 
+  return err ? -MR_ERR_SMC : 0;
+#else
+  return -MR_ERR_INVOP;
+#endif
+}
+
+
+int
+mr_set_freq(void * p)
+{
+  uint32_t	cf, msk, new, val;
+  uint16_t	rat;
+  int		i;
+
+  /*
+   * Ensure it's a supported value
+   */
+  val = *(uint32_t *) p;
+  for(i = 0; i < MR_PTAB_LEN; i++)
+    if (freq.supt[i] == val)
+      break;
+  if (i == MR_PTAB_LEN)
+    return -MR_ERR_RANGE;
+
+  /*
+   * Core Frequency:
+   *  11:0	Base ratio
+   *	 15	Fuse override
+   *     31	Select ratio
+   * Base ratio accepted only if (bit 15 | bit 31 | OC disble) == 010
+   * Pre-scale frequency to counter for any ICC trickery.
+   * Not nice, makes exact table matches difficult!!
+   */
+  val = (val * icc_fwd()) / ICC_NOM;
+  rat = freq2ratio(val/1000, cpu_tab, ARRAY_SIZE(cpu_tab), 200);
+  cf = mr_sbox_rl(0, SBOX_COREFREQ);
+  msk = ~(PUT_BITS(11, 0, ~0) | PUT_BIT(15, 1) | PUT_BIT(31, 1));
+  new = (cf & msk) | PUT_BITS(11,  0, rat) | PUT_BIT(31, 1);
+  mr_sbox_wl(0, SBOX_COREFREQ, new);
+  printk("SetFreq: %d -> %08x (%08x)\n", val, new, cf);
+
+  /*
+   *TBD:
+   * We just changed the system's base clock without
+   * re-calibrating the APIC timer tick counters.
+   * There is probably a function call for the cpu-freq
+   * driver to deal with this, so should we call it?
+   */
+
+  return 0;
+}
+
+
+int
+mr_set_plim(void * p)
+{
+  plim.phys = *(uint32_t *) p;
+
+  /*
+   * Notify PM of change
+   *TBD: not supported, remove?
+   */
+  return 0;
+}
+
+
+int
+mr_set_fan(void * p)
+{
+  struct mr_set_fan * fc;
+
+  /*
+   * Ensure operation is valid, i.e. no garbage
+   * in override flag (only 1 and 0 allowed) and
+   * that pwm in in range 0 through 99.
+   */
+  fc = (struct mr_set_fan *) p;
+  if (GET_BITS(7, 1, fc->override) || fc->pwm >= 100)
+    return -MR_ERR_RANGE;
+
+#if USE_SMC
+  {
+    uint32_t	dat;
+
+    /*
+     * Determine the PWM-adder value, and send it to the SMC. 
+     * Subsequent 'GET' fan will add the calculated PWM and
+     * this adder to report current PWM percentage.
+     * Only way to retrieve the adder is via GET_SMC(0x4b).
+     */
+    if (fc->override)
+      dat = fc->pwm;
+    else
+      dat = 0;
+
+    if (mr_smc_wr(MR_SMC_FAN_PWM_ADD, &dat))
+      return -MR_ERR_SMC;
+  }
+#else
+  /*
+   * Read-modify-write the fan override register
+   * Control of fan #1 only, don't touch #2
+   * Note: require SMC to support SBOX registers
+   *       which is not on the radar right now.
+   */
+  {
+    uint32_t		fcor, fco1, fco2;
+
+    fcor = mr_sbox_rl(0, SBOX_SPEED_OVERRIDE_FAN);
+    fco2 = GET_BITS(31, 16, fcor);
+    if (fc->override)
+      fco1 = PUT_BIT(15, 1) | fc->pwm;
+    else
+      fco1 = 0;
+    mr_sbox_wl(0, SBOX_SPEED_OVERRIDE_FAN,
+  		PUT_BITS(31, 16, fco2) | PUT_BITS(15, 0, fco1));
+  }
+#endif
+
+  return 0;
+}
+
+
+int
+mr_set_trbo(void * p)
+{
+  uint32_t		tmp;
+#if USE_PM
+  void		     (* fnc)(int);
+#endif
+
+  /*
+   * Only values 0 and 1 allowed
+   */
+  tmp = *(uint32_t *) p;
+  if (GET_BITS(31, 1, tmp))
+    return -MR_ERR_RANGE;
+  trbo.set = tmp;
+
+#if USE_PM
+  /*
+   * Notify PM of new value
+   */
+  fnc = pm_cb.micpm_set_turbo;
+  if (fnc)
+    fnc(trbo.set);
+#endif
+
+  return 0;
+}
+
+
+int
+mr_set_led(void * p)
+{
+  uint32_t		led;
+
+  /*
+   * Only values 0 and 1 allowed
+   */
+  led = *(uint32_t *) p;
+  if (GET_BITS(31, 1, led))
+    return -MR_ERR_RANGE;
+
+  if (mr_smc_wr(MR_SMC_LED_CODE, &led))
+    return -MR_ERR_SMC;
+ 
+  return 0;
+}
+
+
+int
+mr_set_prochot(void * p)
+{
+  struct mr_rsp_ptrig * trig;
+  uint32_t		pwr0;
+  uint32_t		time0;
+
+  trig = (struct mr_rsp_ptrig *) p;
+  pwr0 = trig->power;
+  time0 = trig->time;
+
+  /*
+   * Check for sane values
+   *TBD: check pwr0 higher than current pwr1?
+   */
+  if (pwr0 < 50 || pwr0 > 400)
+    return -MR_ERR_RANGE;
+  if (time0 < 50 || time0 > 1000)
+    return -MR_ERR_RANGE;
+
+  if (mr_smc_wr(MR_SMC_PWR_LIM_0, &pwr0) ||
+      mr_smc_wr(MR_SMC_TIME_WIN_0, &time0))
+    return -MR_ERR_SMC;
+ 
+  return 0;
+}
+
+
+int
+mr_set_pwralt(void * p)
+{
+  struct mr_rsp_ptrig * trig;
+  uint32_t		pwr1;
+  uint32_t		time1;
+
+  trig = (struct mr_rsp_ptrig *) p;
+  pwr1 = trig->power;
+  time1 = trig->time;
+
+  /*
+   * Check for sane values
+   *TBD: check pwr1 lower than current pwr0?
+   */
+  if (pwr1 < 50 || pwr1 > 400)
+    return -MR_ERR_RANGE;
+  if (time1 < 50 || time1 > 1000)
+    return -MR_ERR_RANGE;
+
+  if (mr_smc_wr(MR_SMC_PWR_LIM_1, &pwr1) ||
+      mr_smc_wr(MR_SMC_TIME_WIN_1, &time1))
+    return -MR_ERR_SMC;
+ 
+  return 0;
+}
+
+
+int
+mr_set_perst(void * p)
+{
+  uint32_t		perst;
+
+  /*
+   * Only values 0 and 1 allowed
+   */
+  perst = *(uint32_t *) p;
+  if (GET_BITS(31, 1, perst))
+    return -MR_ERR_RANGE;
+
+  if (mr_smc_wr(MR_SMC_PWR_LIM_PERS, &perst))
+    return -MR_ERR_SMC;
+ 
+  return 0;
+}
+  
+
+#if USE_PM
+/*
+**
+** API functions dedicated for PM support
+**
+** These functions are embedded within the MT callout table
+** and thus needs to follow the calling convention, which
+** for 'get' functions is to pass an opague pointer to a buffer
+** to hold retrieved data and on return get a staus code (positive
+** on success, negative on failures) and for 'put' functions is
+** to pass an opague pointer to a buffer holding input data.
+**
+** Function list as per PM needs:
+**
+**   pm_get_pl0		reads 0x2c, 0x2d and 0x2e
+**   pm_set_pl0		writes 0x2c and 0x2d
+**
+**   pm_get_pl1		reads 0x2f and 0x30
+**   pm_set_pl1		writes 0x2f and 0x30
+**
+**   pm_get_pavg	reads 0x35 and 0x36
+**
+**   pm_get_pttl	reads 0x38 and 0x39
+**
+**   pm_get_volt	reads 0x3c, 0x3d and 0x3e
+**
+**   pm_get_temp	reads 0x40, 0x43, 0x44 and 0x45
+**
+**   pm_get_tach	reads 0x49 and 0x4a
+**
+**   pm_get_tttl	reads 0x4e and 0x4f
+**
+**   pm_get_fttl	reads 0x2b
+**   pm_set_fttl	writes 0x2b
+**
+*/
+
+#include "micpm_api.h"
+
+int
+pm_get_pl0(void * p)
+{
+  struct pm_rsp_plim  * r;
+  uint32_t		lim, win, grd;
+
+  lim = 0;
+  win = 0;
+  grd = 0;
+  mr_smc_rd(MR_SMC_PWR_LIM_0, &lim);
+  mr_smc_rd(MR_SMC_TIME_WIN_0, &win);
+  mr_smc_rd(MR_SMC_PWR_LIM0_GRD, &grd);
+
+  r = (struct pm_rsp_plim *) p;
+  r->pwr_lim = GET_BITS(15,  0, lim);
+  r->time_win = GET_BITS(15,  0, win);
+  r->guard_band = GET_BITS(15,  0, grd);
+
+  return sizeof(*r);
+}
+
+int
+pm_set_pl0(void * p)
+{
+  struct pm_cmd_plim  * r;
+
+  /*
+   * Only lower 16 bit used
+   */
+  r = (struct pm_cmd_plim *) p;
+  if (GET_BITS(31, 16, r->pwr_lim))
+    return -MR_ERR_RANGE;
+  if (GET_BITS(31, 16, r->time_win))
+    return -MR_ERR_RANGE;
+
+  /*
+   * This does not allow caller to tell which failed.
+   *TBD: do we care?
+   */
+  if (mr_smc_wr(MR_SMC_PWR_LIM_0, &r->pwr_lim))
+    return -MR_ERR_SMC;
+  if (mr_smc_wr(MR_SMC_TIME_WIN_0, &r->time_win))
+    return -MR_ERR_SMC;
+
+  return 0;
+}
+
+int
+pm_get_pl1(void * p)
+{
+  struct pm_rsp_plim  * r;
+  uint32_t		lim, win;
+
+  lim = 0;
+  win = 0;
+  mr_smc_rd(MR_SMC_PWR_LIM_1, &lim);
+  mr_smc_rd(MR_SMC_TIME_WIN_1, &win);
+
+  r = (struct pm_rsp_plim *) p;
+  r->pwr_lim = GET_BITS(15,  0, lim);
+  r->time_win = GET_BITS(15,  0, win);
+  r->guard_band = 0;
+
+  return sizeof(*r);
+}
+
+int
+pm_set_pl1(void * p)
+{
+  struct pm_cmd_plim  * r;
+
+  /*
+   * Only lower 16 bit used
+   */
+  r = (struct pm_cmd_plim *) p;
+  if (GET_BITS(31, 16, r->pwr_lim))
+    return -MR_ERR_RANGE;
+  if (GET_BITS(31, 16, r->time_win))
+    return -MR_ERR_RANGE;
+
+  /*
+   * This does not allow caller to tell which failed.
+   *TBD: do we care?
+   */
+  if (mr_smc_wr(MR_SMC_PWR_LIM_1, &r->pwr_lim))
+    return -MR_ERR_SMC;
+  if (mr_smc_wr(MR_SMC_TIME_WIN_1, &r->time_win))
+    return -MR_ERR_SMC;
+
+  return 0;
+}
+
+int
+pm_get_pavg(void * p)
+{
+  struct pm_rsp_pavg  * r;
+  uint32_t		pwr0, pwr1;
+
+  pwr0 = PUT_BITS(31, 30, 3);
+  pwr1 = PUT_BITS(31, 30, 3);
+  mr_smc_rd(MR_SMC_AVG_PWR_0, &pwr0);
+  mr_smc_rd(MR_SMC_AVG_PWR_1, &pwr1);
+
+  r = (struct pm_rsp_pavg *) p;
+  r->stat_0 = GET_BITS(31, 30, pwr0);
+  r->stat_1 = GET_BITS(31, 30, pwr1);
+  r->pwr_0  = GET_BITS(29,  0, pwr0);
+  r->pwr_1  = GET_BITS(29,  0, pwr1);
+
+  return sizeof(*r);
+}
+
+int
+pm_get_pttl(void * p)
+{
+  struct pm_rsp_pttl  * r;
+  uint32_t		dur, ttl;
+
+  if (mr_smc_rd(MR_SMC_PWR_TTL, &ttl))
+    return -MR_ERR_SMC;
+
+  r = (struct pm_rsp_pttl *) p;
+  r->pwr_ttl = GET_BIT(0, ttl);
+  dur = PUT_BITS(31, 30, 3);
+  if (r->pwr_ttl)
+    mr_smc_rd(MR_SMC_PWR_TTL_DUR, &dur);
+  r->stat_dur = GET_BITS(31, 30, dur);
+  r->duration = GET_BITS(15,  0, dur);
+
+  return sizeof(*r);
+}
+
+int
+pm_get_volt(void * p)
+{
+  struct pm_rsp_volt  * r;
+  uint32_t		vccp, vddg, vddq;
+
+  vccp = PUT_BITS(31, 30, 3);
+  vddg = PUT_BITS(31, 30, 3);
+  vddq = PUT_BITS(31, 30, 3);
+  mr_smc_rd(MR_SMC_VOLT_VCCP, &vccp);
+  mr_smc_rd(MR_SMC_VOLT_VDDG, &vddg);
+  mr_smc_rd(MR_SMC_VOLT_VDDQ, &vddq);
+
+  r = (struct pm_rsp_volt *) p;
+  r->stat_vccp = GET_BITS(31, 30, vccp);
+  r->stat_vddg = GET_BITS(31, 30, vddg);
+  r->stat_vddq = GET_BITS(31, 30, vddq);
+  r->vccp  = GET_BITS(15,  0, vccp);
+  r->vddg  = GET_BITS(15,  0, vddg);
+  r->vddq  = GET_BITS(15,  0, vddq);
+
+  return sizeof(*r);
+}
+
+int
+pm_get_temp(void * p)
+{
+  struct pm_rsp_temp  * r;
+  uint32_t		cpu, vccp, vddg, vddq;
+
+  cpu =  PUT_BITS(31, 30, 3);
+  vccp = PUT_BITS(31, 30, 3);
+  vddg = PUT_BITS(31, 30, 3);
+  vddq = PUT_BITS(31, 30, 3);
+  mr_smc_rd(MR_SMC_TEMP_CPU,  &cpu);
+  mr_smc_rd(MR_SMC_TEMP_VCCP, &vccp);
+  mr_smc_rd(MR_SMC_TEMP_VDDG, &vddg);
+  mr_smc_rd(MR_SMC_TEMP_VDDQ, &vddq);
+
+  r = (struct pm_rsp_temp *) p;
+  r->stat_cpu  = GET_BITS(31, 30, cpu);
+  r->stat_vccp = GET_BITS(31, 30, vccp);
+  r->stat_vddg = GET_BITS(31, 30, vddg);
+  r->stat_vddq = GET_BITS(31, 30, vddq);
+  r->cpu   = GET_BITS(15,  0, cpu);
+  r->vccp  = GET_BITS(15,  0, vccp);
+  r->vddg  = GET_BITS(15,  0, vddg);
+  r->vddq  = GET_BITS(15,  0, vddq);
+
+  return sizeof(*r);
+}
+
+int
+pm_get_tach(void * p)
+{
+  struct pm_rsp_tach  * r;
+  uint32_t		pwm, tach;
+
+  pwm = PUT_BITS(31, 30, 3);
+  tach = PUT_BITS(31, 30, 3);
+  mr_smc_rd(MR_SMC_FAN_PWM, &pwm);
+  mr_smc_rd(MR_SMC_FAN_TACH, &tach);
+
+  r = (struct pm_rsp_tach *) p;
+  r->stat_pwm  = GET_BITS(31, 30, pwm);
+  r->stat_tach = GET_BITS(31, 30, tach);
+  r->fan_pwm   = GET_BITS( 7,  0, pwm);
+  r->fan_tach  = GET_BITS(15,  0, tach);
+
+  return sizeof(*r);
+}
+
+int
+pm_get_tttl(void * p)
+{
+  struct pm_rsp_tttl  * r;
+  uint32_t		dur, ttl;
+
+  if (mr_smc_rd(MR_SMC_TRM_TTL, &ttl))
+    return -MR_ERR_SMC;
+
+  r = (struct pm_rsp_tttl *) p;
+  r->thrm_ttl = GET_BIT(0, ttl);
+  dur = PUT_BITS(31, 30, 3);
+  if (r->thrm_ttl)
+    mr_smc_rd(MR_SMC_TRM_TTL_DUR, &dur);
+  r->stat_dur = GET_BITS(31, 30, dur);
+  r->duration = GET_BITS(15,  0, dur);
+
+  return sizeof(*r);
+}
+
+int
+pm_get_fttl(void * p)
+{
+  struct pm_rsp_fttl  * r;
+  uint32_t		ttl;
+
+  if (mr_smc_rd(MR_SMC_FORCE_TTL, &ttl))
+    return MR_ERR_SMC;
+
+  r = (struct pm_rsp_fttl *) p;
+  r->forced = GET_BIT(0, ttl);
+
+  return sizeof(*r);
+}
+
+int
+pm_set_fttl(void * p)
+{
+  uint32_t		ttl;
+
+  /*
+   * Only values 0 and 1 allowed
+   */
+  ttl = ((struct pm_rsp_fttl *) p)->forced;
+  if (GET_BITS(31, 1, ttl))
+    return -MR_ERR_RANGE;
+
+  if (mr_smc_wr(MR_SMC_FORCE_TTL, &ttl))
+    return -MR_ERR_SMC;
+
+  return 0;
+}
+
+#endif
diff --git a/ras/micras_knf.c b/ras/micras_knf.c
new file mode 100644
index 0000000..cda0637
--- /dev/null
+++ b/ras/micras_knf.c
@@ -0,0 +1,1432 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * RAS MT module driver
+ *
+ * Code and data structures to handle get/set tasks for KnF.
+ * Parties accessing the data structures are supposed to use the
+ * micras_mt_tsk() routines to ensure integrity and consistency.
+ * Particularly important when handling sysfs nodes and actions
+ * requested from SCIF connections must use that method in order
+ * to guarantee serialized access.
+ *
+ * Even if read-only access to latest valid data is required,
+ * it should go through micras_mt_tsk() using dedicated handlers
+ * in this module.
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/io.h>
+#include <linux/utsname.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/jiffies.h>
+#include <linux/kernel_stat.h>
+#include <linux/bitmap.h>
+#include <generated/compile.h>
+#include <generated/utsrelease.h>
+#include <mic/micbaseaddressdefine.h>
+#include <mic/micsboxdefine.h>
+#include "micras_api.h"
+#include "micmca_api.h"
+#include "micras.h"
+
+
+/*
+ * Persistent data accessible through the CP api.
+ * Some functions just read/modify hardware CSRs
+ * and thus need no storage between invocations.
+ */
+
+extern struct mr_rsp_vers	vers;
+extern struct mr_rsp_volt	volt;
+extern struct mr_rsp_freq	freq;
+extern struct mr_rsp_power	power;
+extern struct mr_rsp_plim	plim;
+extern struct mr_rsp_gddr	gddr;
+extern struct mr_rsp_gvolt	gvolt;
+extern struct mr_rsp_gfreq	gfreq;
+extern struct mr_rsp_temp	temp;
+extern struct mr_rsp_ecc	ecc;
+extern struct mr_rsp_trbo	trbo;
+extern struct mr_rsp_pmcfg	pmcfg;
+
+#if USE_FSC
+/*
+**
+** FSC API
+**
+** The FSC has a back-door communication channel, not documented
+** anywhere in the register spec nor in any HAS or LLD that is
+** available on recent KnF cards (later than rev ??). 
+** Found a .35 proposal for it, so it better do. In short, this
+** backdoor relies on fan #2 is not used on KnF and the fact that
+** controls for fan #2 is transmitted over I2C to the fan speed
+** controller (FSC) unaltered, such that it can chose an alternate
+** interpretation of received data.
+**
+** The Fan Speed Override register (SBOX 0x8007d102c) has this
+** definition in the register spec:
+**
+**  Bit(s)	Usage
+**  ------	----------
+**  7:0		Fan 1 override ratio
+**  14		Fan 1 Set max speed
+**  15		Fan 1 Enable override 
+**  23:16	Fan 2 override ratio
+**  30		Fan 2 Set max speed
+**  31		Fan 2 Enable override 
+**
+** This register has been repurposed into a Message Gain Bit Bang Register
+** (MGBR) with a 4 bit command and a 16 bit data field, layout is:
+**
+**  Bit(s)	Usage
+**  ------	----------
+**  7:0		MGBR data 7:0
+**  21:14	MGBR data 15:8
+**  23:22	MGBR command 1:0
+**  31:30	MBGR command 3:2
+**
+** Command	Usage
+**    0		Fan 1 Speed Override
+**    1		Power Management and Control Config
+**    7		PMC PCIe Alert Override
+**    8		PMC 2x3 Alert Override
+**    9         PMC 2x4 Alert Override
+**   10		Temperature Override Command
+**   11		General Status Command
+**   12-15	PID Gain Command(s)
+**
+** Fan 1 control works as MGBR command 0, though the spec is unclear on
+** whether the resulting FSO register format is same as the original spec.
+** Specifically, old spec has Fan 1 override enable in FSO bit 15, whereas
+** the MGBR spec has it in MGBR data bit 15 (corresponds to FSO bit 20).
+** Test shows it has to be MGBR bit 9, i.e. compatible with register spec.
+** 
+** Fan #2 Status Register (SBOX 0x8007d1028) has been redefined into a
+** Message Gain Bit Bang Status (MGBSR) used to hold return data from
+** the MGBR General Status command in this layout:
+**
+**  Bit(s)	Usage
+**  ------	----------
+**  23:0	MGBSR data
+**  31:28	MGBR Gen. Sts. selector (bits 23:0 source).
+**
+** To get access to KnF telemetry data, only MGBR command 11 is needed.
+** Bits 7:0 of MGBR data for this command selects the sensor which FSC
+** will report to MGBSR (not sure if one-time or repeatedly). The actual
+** encoding is as follows:
+**
+**    0x00      Fan2Status
+**    0x01      PMC Configuration Command Settings
+**    0x07      Reads the 2x4 IR3275 Configuration Register
+**    0x08      Reads the 2x3 IR3275 Configuration Register
+**    0x09      Reads the PCIe IR3275 Configuration Register
+**    0x0A      Reads the Temperature Command Settings
+**    0x20      Maximum Total Card Power - 1s Moving Average (20 Samples)
+**    0x21      Maximum 2x4 Connector Power - 1s Moving Average (20 Samples)
+**    0x22      Maximum 2x3 Connector Power - 1s Moving Average (20 Samples)
+**    0x23      Maximum PCIe Connector Power - 1s Moving Average (20 Samples)
+**    0x30      Maximum Total Card Power - Single Sample
+**    0x31      Maximum 2x4 Connector Power - Single Sample
+**    0x32      Maximum 2x3 Connector Power - Single Sample
+**    0x33      Maximum PCIe Connector Power - Single Sample
+**    0xA0      Returns the current Fan Tcontrol setting for the GPU temperature
+**    0xA1      Maximum Temperature for Temperature Sensor 1 - VCCP
+**    0xA2      Maximum Temperature for Temperature Sensor 2 - Air Inlet
+**    0xA3      Maximum Temperature for Temperature Sensor 3 - NW GDDR
+**    0xA4      Maximum Temperature for Temperature Sensor 4 - V1P5 VDD VR
+**    0xA5      Maximum Temperature for Temperature Sensor 5 - Display Transmitter
+**    0xA6      Maximum Temperature for GPU
+**
+** The 'return' values in MGBSR are 16 bit only, power in Watts, Temp in C. 
+**
+** Implementation notes:
+** > The MGBR API is timing sensitive. FSC reads the MGBR register
+**   at ~50 mSec intervals over an I2C bus and performs the command
+**   on every read, which in case of the General Status command will
+**   result in wrinting FSC internal data to the MGBSR register.
+**   A delay is required after every write to MGBR in order to
+**   ensure the FSC actually sees it.
+** 
+** > I2C bus reads are 7 bytes, writes are 6 bytes, 1 clock at 100 kHz
+**   is 10 uSec, 1 byte roughly translates to 10 bits, so minimum delay
+**   on I2C from command written to return value is valid becomes
+**     10 * (6 + 7) * 10 uSec  = 1.3 mSec
+**   The I2C bus on KnF runs slower than 100 kHz, causing tranfers
+**   to take more time than that to finish.
+**   After the initial delay, we'll may need to wait on a result
+**   to arrive in the MGBSR register.
+**
+** > It seems that fan 1 override is a dynamic act, i.e. for it to
+**   be in effect the MBGR command needs to be set accordingly.
+**   Therefore, when reading telemetry, the MGBR command is set
+**   just for a period long enough for it to be seen by FSC and the
+**   result to be latched into the MGBSR register. After that period
+**   (when fan speed override is active) the MGBR is returned to
+**   restore the fan 1 override.
+**
+*/
+
+#define MR_FSC_MGBR_OVR_CMD	0	/* Fan 1 Speed Override */
+#define MR_FSC_MGBR_GEN_CMD	11	/* General Status command */
+
+#define MR_FSC_STATUS		0x00	/* FSC Status & version */
+#define MR_FSC_PMC_CFG		0x01	/* PMC Configuration */
+
+#define MR_FSC_PWR_TOT		0x20	/* Total Power (1 sec avg) */
+#define MR_FSC_PWR_2X4		0x21	/* 2x4 Power (1 sec avg) */
+#define MR_FSC_PWR_2X3		0x22	/* 2x3 Power (1 sec avg) */
+#define MR_FSC_PWR_PCIE		0x23	/* PCIe Power (1 sec avg) */
+
+#define MR_FSC_PWR1_TOT		0x30	/* Total Power (single sample) */
+#define MR_FSC_PWR1_2X4		0x31	/* 2x4 Power (single sample) */
+#define MR_FSC_PWR1_2X3		0x32	/* 2x3 Power (single sample) */
+#define MR_FSC_PWR1_PCIE	0x33	/* PCIe Power (single sample) */
+
+#define MR_FSC_TEMP_VCCP        0xA1    /* VCCP VR Temperature */
+#define MR_FSC_TEMP_INLET       0xA2    /* Card Inlet Temperature */
+#define MR_FSC_TEMP_GDDR        0xA3    /* GDDR Temperature */
+#define MR_FSC_TEMP_VDD         0xA4    /* VDD VR Temperature */
+#define MR_FSC_TEMP_DISP        0xA5    /* Display Transmitter */
+
+
+/*
+ * Simple I/O access routines for FSC registers
+ */
+
+#ifdef MIC_IS_EMULATION
+/*
+ * Emulation does not handle I2C busses in general.
+ * Not sure if FSC is emulated, but won't rely on it.
+ * The following stubs are for emulation only.
+ */
+
+int
+fsc_mgbr_read(uint32_t * v)
+{
+  if (v)
+    memset(v, 0, 4);
+
+  return 0;
+}
+
+void
+fsc_mgbr_write(uint8_t c, uint32_t v)
+{
+}
+
+#else
+
+#if 0
+#define RL      printk("%s: %2x -> %08x\n",    __FUNCTION__, mgbr_cmd, *val)
+#define WL      printk("%s: %2x <- %08x\n",    __FUNCTION__, mgbr_cmd, *val)
+#else
+#define RL      /* As nothing */
+#define WL      /* As nothing */
+#endif
+
+static uint8_t	mgbr_cmd;	/* Last MGBR command */
+static uint32_t	mgbr_dat;	/* Last MGBR data */
+static uint32_t	fan1_ovr;	/* Current fan 1 override command */
+
+/*
+ * Read MGBSR from SBOX
+ *
+ * This function only support MGBR commands MR_FSC_MGBR_{OVR|GEN}_CMD.
+ * The operation mode is that the command is written to MGBR and after
+ * a while the response shows up in MGBSR, which has fields that tell
+ * which command caused the response (bits 31:28), and for GEN command
+ * also which sensor was read. This function checks both fields.
+ * 
+ * We'll poll at 1 mSec rate and allow up to 200 mSec for the
+ * FSC to provide the measure in the SBOX register.
+ */
+
+int
+fsc_mgbsr_read(uint32_t * val)
+{
+  uint32_t	mgbsr;
+  int		n;
+  
+  for(n = 0; n < 200; n++) {
+    mgbsr = mr_sbox_rl(0, SBOX_STATUS_FAN2);
+    if ((GET_BITS(31, 28, mgbsr) == mgbr_cmd) ||
+	mgbr_cmd != MR_FSC_MGBR_GEN_CMD || mgbr_dat == 0) {
+      if (mgbr_cmd != MR_FSC_MGBR_GEN_CMD ||
+          mgbr_dat <= 1) {
+        *val = GET_BITS(23, 0, mgbsr);
+        RL;
+        return 0;
+      }
+      if (GET_BITS(23, 16, mgbsr) == mgbr_dat) {
+        *val = GET_BITS(15, 0, mgbsr);
+        RL;
+        return 0;
+      }
+    }
+    myDELAY(1000);
+  }
+
+  /*
+   * Timeout
+   */
+  return 1;
+}
+
+
+/*
+ * Write MGBR on SBOX
+ *
+ * This function only support MGBR commands MR_FSC_MGBR_{OVR|GEN}_CMD.
+ * The OVR command only when fan 1 speed override is active.
+ * The GEN command is meant to cause a new selectable telemetry to be
+ * pushed into the MBGSR register by the FSC. Any necessary delays
+ * are handled here. Not by the read function.
+ */
+
+void
+fsc_mgbr_write(uint8_t c, uint32_t * val)
+{
+  uint32_t	prev_cmd, prev_dat;
+  uint32_t	mgbr_reg, mgbr_sel;
+  uint32_t	mgbsr, n;
+
+  prev_cmd = mgbr_cmd;
+  prev_dat = mgbr_dat;
+  mgbr_cmd = GET_BITS(3, 0, c);
+  mgbr_dat = GET_BITS(15, 0, *val);
+
+  mgbr_reg = PUT_BITS(31, 30, (mgbr_cmd >> 2)) |
+	     PUT_BITS(23, 22,  mgbr_cmd) |
+	     PUT_BITS(21, 14, (mgbr_dat >> 8)) |
+	     PUT_BITS( 7,  0,  mgbr_dat);
+  WL;
+  mr_sbox_wl(0, SBOX_SPEED_OVERRIDE_FAN, mgbr_reg);
+
+  /*
+   * Special for Set Fan Speed, we keep track of that one
+   */
+  if (mgbr_cmd == MR_FSC_MGBR_OVR_CMD) {
+    if (GET_BIT(9, mgbr_dat))
+      fan1_ovr = GET_BITS(9, 0, mgbr_dat);
+    else
+      fan1_ovr = 0;
+  }
+
+  /*
+   * If the command issued is the same as the previous command,
+   * there is no way to determine if the MGBSR register is result
+   * of this or the previous command. It is not possible to clear
+   * MGBSR (read-only register), so if it is the same register,
+   * we'll just have to wait long enough for FSC to respond.
+   * Not all MGBR commands are mirrored into top 4 bits of MGBSR,
+   * those gets the simple delay treatment.
+   */
+  if ((mgbr_cmd == prev_cmd && mgbr_dat == prev_dat) ||
+       mgbr_cmd != MR_FSC_MGBR_GEN_CMD || mgbr_dat <= 1) {
+    myDELAY(100 * 1000);
+    return;
+  }
+  mgbr_sel = GET_BITS(7, 0, mgbr_dat);
+  for(n = 0; n < 200; n++) {
+    mgbsr = mr_sbox_rl(0, SBOX_STATUS_FAN2);
+    if (GET_BITS(31, 28, mgbsr) == mgbr_cmd) {
+      if (mgbr_cmd != MR_FSC_MGBR_GEN_CMD)
+	return;
+      if (GET_BITS(23, 16, mgbsr) == mgbr_sel)
+	return;
+    }
+    myDELAY(1000);
+  }
+}
+#undef RL
+#undef WL
+#endif /* EMULATION */
+
+
+/*
+ * Bypass for FSC access.
+ * Somewhat bizarre backdoor to the FSC's MGBR and MGBSR registers.
+ * The FSC interface is asymmetrical by nature since only the General
+ * Status MGBR command can cause data to be returned through MGBSR.
+ * To make it appear as telemetry registers can be read directly
+ * and without need for privileges, the Read operation is rigged to
+ * issue the appropriate MGBR registers itself when necessary.
+ *
+ * To protect the FSC integrity, the SET command are restricted
+ * to privileged users and is only accepting commands that cannot
+ * harm the FSC integrity. For now the whitelist consists of
+ *    0 Fan 1 Speed Override
+ *    1 Power Management and Control Config
+ *   11	General Status command
+ *
+ * To read back the response from a SET command the exact same value
+ * of 'parm' must be passed to a subsequent GET, in which case the
+ * the GET routine will not insert it's own MGBR command to select
+ * contents of the MGBSR to return.
+ *  
+ * Notice that FSC read is equivalent of reading Fan #2 Status register
+ * and FSC write is equivalent of writing Fan Speed Override register.
+ *
+ * This reuse the SMC interface structs, but the semantics are different.
+ *
+ * Return:
+ *  r->reg	MGBSR sensor select (if applicable) or 0
+ *  r->width	always 3 (24 bit wide field)
+ *  r->rtn.val	MGBSR sensor data
+ *
+ * Input:
+ *  parm 31:24	MGBR command (must be 0xb)
+ *  parm 15:0	MGBR data (sensor select)
+ */
+
+int
+mr_get_fsc(void * p)
+{
+  int           rtn;
+  uint32_t	raw;
+  struct mr_rsp_smc * r;
+  uint8_t       cmd;
+  uint32_t	dat, parm;
+
+  /*
+   * Extract MGBR command and dat
+   */
+  parm = * (uint32_t *) p;
+  cmd = GET_BITS(31, 24, parm);
+  dat = GET_BITS(15, 0, parm);
+
+  /*
+   * If the request is different from the last issued
+   * 'SET' command in any way then 'GET' will issue the
+   * corresponding MGBR command, if allowed.
+   */
+  if (mgbr_cmd != cmd || mgbr_dat != dat) {
+    /*
+     * Only allow 'General Status' command
+     */
+    if (cmd != MR_FSC_MGBR_GEN_CMD)
+      return -MR_ERR_PERM;
+
+    /*
+     * Screen against known FSC register widths.
+     * All commands seems to be 16 bit wide.
+     * We insist that unused upper bits are zeros.
+     */
+    if (dat != GET_BITS(23, 0, parm))
+      return -MR_ERR_INVAUX;
+
+    /*
+     * Better way to single out these numbers?
+     *  0 1 20 21 22 23 30 31 32 33 a1 a2 a3 a4 a5
+     */
+    if (! ((dat <= 1) ||
+           (dat >= 0x20 && dat <= 0x23) ||
+           (dat >= 0x30 && dat <= 0x33) ||
+           (dat >= 0xa1 && dat <= 0xa5)))
+      return -MR_ERR_PERM;
+
+    /*
+     * Write MGBR command
+     */
+    fsc_mgbr_write(cmd, &dat);
+  }
+
+  /*
+   * Read MGBSR result
+   */
+  rtn = fsc_mgbsr_read(&raw);
+  if (rtn)
+    return -MR_ERR_SMC;
+
+  /*
+   * Revert to normal if fan 1 speed override mode if needed.
+   */
+  if (fan1_ovr)
+    fsc_mgbr_write(MR_FSC_MGBR_OVR_CMD, &fan1_ovr);
+
+  r = (struct mr_rsp_smc *) p;
+  if (cmd == MR_FSC_MGBR_GEN_CMD)
+    r->reg = GET_BITS(7, 0, dat);
+  r->width = 3;
+  r->rtn.val = GET_BITS(23, 0, raw);
+
+  return sizeof(*r);
+}
+
+
+int
+mr_set_fsc(void * p)
+{
+  uint8_t       cmd;
+  uint32_t      dat, parm;
+
+  parm = * (uint32_t *) p;
+  cmd = GET_BITS(31, 24, parm);
+  dat = GET_BITS(15, 0, parm);
+
+  /*
+   * Screen against known FSC register widths.
+   * All commands seems to be 16 bit wide.
+   * We insist that unused upper bits are zeros.
+   */
+  if (dat != GET_BITS(23, 0, parm))
+    return -MR_ERR_INVAUX;
+
+  /*
+   * 4-bit command code for FSC.
+   * Mask of valid codes needs just 16 bits.
+   * Max valid codes 0..1, 7..15, mask 0xff83.
+   * Non-debug registers reduce mask to 0x0803.
+   */
+  if (! ((1 << cmd) & 0x0803))
+    return -MR_ERR_PERM;
+
+  /*
+   * Write MGBR command and revert to fan 1 speed override mode
+   * if needed (override in effect).  Side effect of reverting
+   * is that any reponse in MGBSR must to be read before next
+   * FSC sample happens, i.e. within 50 mSec.
+   */
+  fsc_mgbr_write(cmd, &dat);
+  if (fan1_ovr)
+    fsc_mgbr_write(MR_FSC_MGBR_OVR_CMD, &fan1_ovr);
+
+  return 0;
+}
+#endif
+
+
+/*
+**
+** Conversion between CP formats (uV, MHz, etc.)
+** and hardware register formats (SBOX mostly).
+**
+*/
+
+
+/*
+ * VRM11 voltage converters
+ * Only bits 6:1 are being used as follows:
+ *   Volt = Max - Res * (Bits -1)
+ *   Bits = 1 + (Max - Volt) / Res
+ * The delta divided by resolution is 62.
+ * Bits value of 0 reserved for turning VR off.
+ */
+
+#define VRM11_MAX	1600000		/* 1.60 V */
+#define VRM11_MIN	 825000		/* 825 mV */
+#define VRM11_RES	  12500		/* 12.5 mV */
+
+uint32_t
+vid2volt(uint8_t vid)
+{
+  uint32_t	bits;
+
+  bits = GET_BITS(6, 1, vid);
+  if (bits)
+    return VRM11_MAX - VRM11_RES * (bits - 1);
+  else
+    return 0;
+}
+
+uint8_t
+volt2vid(uint32_t uv)
+{
+  uint32_t	delta, bits;
+
+  bits = 0;
+  if (uv >= VRM11_MIN && uv <= VRM11_MAX) {
+    delta = VRM11_MAX - uv;
+    /*
+     * Why bother check for accurate input?
+     * Ignoring it just rounds up to nearest!
+     */
+    if (! (delta % VRM11_RES))
+      bits = 1 + delta / VRM11_RES;
+  }
+  return PUT_BITS(6, 1, bits);
+}
+
+
+/*
+ * PLL tables used to map between hw scale register
+ * value and actual frequencies given a fixed base.
+ * The formula is (probably KnF specific)
+ *    freq = Base * Feedback / Feedforward
+ * where
+ *    Base = 100 MHz
+ *    FeedBack = ratio bits 5:0
+ *    FeedForward = ratio bits 7:6 (00 -> 8, 01 -> 4, 10 -> 2, 11 -> 1)
+ *
+ * Overlapping ranges over feedback and feedforward values are
+ * handled by range table(s) below such that lower frequencies
+ * can be selected at a finer granularity.
+ */
+
+struct pll_tab {
+  uint8_t	clk_div;		/* Feed forward */
+  uint8_t	min_mul;		/* Lower feedback */
+  uint8_t	max_mul;		/* Upper feedback */
+  uint16_t	min_clk;		/* Lower frequency */
+  uint16_t	max_clk;		/* Upper frequency */
+  uint8_t	step_size;		/* Granularity */
+}  cpu_tab[] = {			/* CPU PLL */
+  { 1, 20, 40, 2000, 4000, 100},
+  { 2, 20, 39, 1000, 1950, 50},
+  { 4, 20, 39,  500,  975, 25},
+}, gddr_tab[] = {			/* GDDR PLL */
+  {1, 14, 30, 1400, 3000, 100},
+  {2, 12, 27,  600, 1350, 50},
+};
+
+#define B_CLK                   100	/* Base clock (MHz) */
+
+static uint16_t
+ratio2freq(uint8_t ratio, struct pll_tab * tab, int tablen)
+{
+  uint16_t	fwd, bck;
+
+  fwd = GET_BITS(7, 6, ~ratio);
+  bck = GET_BITS(5, 0, ratio);
+
+  if (fwd < tablen && bck >= tab[fwd].min_mul && bck <= tab[fwd].max_mul)
+    return (B_CLK * bck) / tab[fwd].clk_div;
+
+  return 0;
+}
+
+static uint8_t
+freq2ratio(uint16_t freq, struct pll_tab * tab, int tablen)
+{
+  int		fwd;
+
+  for(fwd = tablen - 1; fwd >= 0; fwd--) {
+    if (freq >= tab[fwd].min_clk && freq <= tab[fwd].max_clk) {
+      /*
+       * Why bother check for accurate input?
+       * Ignoring just rounds down to nearest supported!
+       */
+      if (freq % tab[fwd].step_size)
+        break;
+
+      return PUT_BITS(7, 6, ~fwd) |
+      	     PUT_BITS(5, 0, (freq * tab[fwd].clk_div) / B_CLK);
+    }
+  }
+
+  return 0;
+}
+
+static uint32_t
+mr_mt_gf_r2f(uint8_t pll)
+{
+  return 1000 * ratio2freq(pll, gddr_tab, ARRAY_SIZE(gddr_tab));
+}
+
+static uint32_t
+mr_mt_cf_r2f(uint8_t pll)
+{
+  return 1000 * ratio2freq(pll, cpu_tab, ARRAY_SIZE(cpu_tab));
+}
+
+
+/*
+ * Board voltage sense converter
+ * Two 10 bit read-outs from SBOX register 0x1038.
+ * The format is very poorly documented, so no
+ * warranty on this conversion. Assumption is
+ * the reading is a binary fixed point number.
+ *  bit 15 	Valid reading if set
+ *  bit 9:8	2 bit integer part
+ *  bit 7:0	8 bit fraction part
+ * Return value is 0 (invalid) or voltage i uV.
+ */
+
+uint32_t
+bvs2volt(uint16_t sense)
+{
+  uint32_t	res, f, msk;
+
+  if (! GET_BIT(15, sense))
+    return 0;
+
+  /*
+   * First get integer contribution
+   * Then accumulate fraction contributions.
+   * Divide and add fraction if corresponding bit set.
+   */
+  res = 1000000 * GET_BITS(9, 8, sense);
+  for(msk = (1 << 7), f = 1000000/2; msk && f; msk >>= 1, f >>= 1)
+    if (sense & msk)
+      res += f;
+
+  return res;
+}
+
+
+
+/*
+**
+** Initializations
+**
+** This has two intended purposes:
+**  - Do a on-time effort to collect info on properties that
+**    are not going to change after the initial setup by
+**    either bootstrap or kernel initialization.
+**  - Collect initial values on things we can modify.
+**    Intent is that unloading the ras module should reset
+**    all state to that of the time the module was loaded.
+**
+*/
+
+static void __init
+mr_mk_cf_lst(void)
+{
+  int		i, n;
+  uint16_t	f;
+
+  n = 0;
+  for(i = ARRAY_SIZE(cpu_tab) -1; i >= 0; i--) {
+    for(f = cpu_tab[i].min_clk;
+	f <= cpu_tab[i].max_clk;
+	f += cpu_tab[i].step_size) {
+      freq.supt[n] = 1000 * f;
+      freq.slen = ++n;
+      if (n >= MR_PTAB_LEN)
+	return;
+    }
+  }
+}
+
+static void __init
+mr_mk_gf_lst(void)
+{
+  int		i, n;
+  uint16_t	f;
+
+  n = 0;
+  for(i = ARRAY_SIZE(gddr_tab) -1; i >= 0; i--) {
+    for(f = gddr_tab[i].min_clk;
+	f <= gddr_tab[i].max_clk;
+	f += gddr_tab[i].step_size) {
+      gfreq.supt[n] = 1000 * f;
+      gfreq.slen = ++n;
+      if (n == MR_PTAB_LEN)
+	return;
+    }
+  }
+}
+
+static void __init
+mr_mk_cv_lst(void)
+{
+  int		n;
+  uint32_t      cv;
+
+  n = 0;
+  for(cv = VRM11_MIN; cv <= VRM11_MAX; cv += VRM11_RES) {
+    volt.supt[n] = cv;
+    volt.slen = ++n;
+    if (n >= MR_PTAB_LEN)
+      return;
+  }
+}
+
+
+void __init
+mr_mt_card_init(void)
+{
+  uint8_t	* boot, * stage2, * parm;
+  uint32_t	scr7, scr9, fsc;
+  uint32_t	cv, cf, gv;
+  int		i, j;
+
+  /*
+   * VERS:
+   * Map flash and scan for version strings.
+   * Different methods for KnF and KnC.
+   */
+  boot   = ioremap(MIC_SPI_BOOTLOADER_BASE, MIC_SPI_BOOTLOADER_SIZE);
+  stage2 = ioremap(MIC_SPI_2ND_STAGE_BASE, MIC_SPI_2ND_STAGE_SIZE);
+  parm   = ioremap(MIC_SPI_PARAMETER_BASE, MIC_SPI_PARAMETER_SIZE);
+  if (!boot || !stage2 || !parm) {
+    printk("mr_mt_init: ioremap failure: boot %p, stage2 %p, par %p\n",
+	      boot, stage2, parm);
+    goto fail_iomap;
+  }
+
+  /*
+   * Build numbers for fboot0 and fboot 1 repectively
+   */
+  scr7 = mr_sbox_rl(0, SBOX_SCRATCH7);
+
+  /*
+   * Boot block scan:
+   * Scan for string 'fboot0 version:' or use a 16 bit offset af offset 0xfff8.
+   * The latter points directly to the numeral, not to the string mentioned.
+   */
+  for(i = 0; i < MIC_SPI_BOOTLOADER_SIZE - 32; i++) {
+    if (boot[i] != 'f')
+      continue;
+
+    if (! memcmp(boot + i, "fboot0 version:", 15)) {
+      vers.fboot0[0] = scnprintf(vers.fboot0 + 1, MR_VERS_LEN -2,
+		    "%s (build %d)", boot + i, GET_BITS(15, 0, scr7));
+      break;
+    }
+  }
+
+  /*
+   * Stage 2 scan:
+   * Scan for the magic string that locates the bootstrap version. This
+   * area is formatted as '<txt> (<\0>, <vers>)', so the string we are
+   * looking for is 23 bytes later.
+   */
+  for(i = 0; i < MIC_SPI_2ND_STAGE_SIZE - 32; i++) {
+    if (stage2[i] != 'L')
+      continue;
+
+    if (! memcmp(stage2 + i, "Larrabee bootstrap", 18)) {
+      vers.fboot1[0] = scnprintf(vers.fboot1 + 1, MR_VERS_LEN -2,
+		    "fboot1 version: %s", stage2 + i + 23);
+      vers.fboot1[0] = scnprintf(vers.fboot1 + vers.fboot1[0], MR_VERS_LEN -2,
+		    " (build %d)", GET_BITS(31, 16, scr7));
+      break;
+    }
+  }
+
+  /*
+   * Parameter block scan:
+   * On 4 byte aligned locations, look for chars 'EOB_'.
+   * Numerical values for that string is 0x5f424f45.
+   */
+  for(i = j = 0; i < MIC_SPI_PARAMETER_SIZE; i += sizeof(uint32_t))
+    if (*(uint32_t *)(parm + i) == 0x5f424f45) {
+      vers.flash[j][0] = scnprintf(vers.flash[j] + 1, MR_VERS_LEN -2,
+	      "flash %c%c%c%c version: %s",
+	      parm[i+4], parm[i+5], parm[i+6], parm[i+7], parm + i + 32);
+      if (++j >= ARRAY_SIZE(vers.flash))
+	break;
+    }
+
+fail_iomap:
+  if (boot)
+    iounmap(boot);
+  if (stage2)
+    iounmap(stage2);
+  if (parm)
+    iounmap(parm);
+
+#if USE_FSC
+  /*
+   * Reset SMC registers to default (MGBR cmd 0, data 0).
+   */
+  mr_sbox_wl(0, SBOX_SPEED_OVERRIDE_FAN, 0);
+
+  /*
+   * The MGBR Status has this layout for (MGBR command 0).
+   *   7:0	Firmware version
+   *  10:8	Card straps
+   *     11	Fan disable
+   *  20:12	Temperatur sensor 5
+   *  27:21	Reserved
+   *  31:28 	Command (0)
+   */
+#else
+  /*
+   * Contrary to register spec, the fan speed controller
+   * 2 status register has been redefined to hold version
+   * information of the FSC firmware.
+   *   7:0	Revision
+   *  10:8	FSC straps
+   *     11	Fan disable
+   *  19:12	Temperatur sensor 5
+   *  27:20	Reserved
+   *     28	BIOS clear
+   *  31:29 	Reserved
+   * This is probably an early version of the MGBR hack.
+   */
+#endif
+
+  /*
+   * Retrieve FSC version and strap config
+   */
+  fsc = mr_sbox_rl(0, SBOX_STATUS_FAN2);
+  vers.fsc[0] = scnprintf(vers.fsc + 1, MR_VERS_LEN -2,
+      "FSC firmware revision: %02x, straps %x",
+      GET_BITS(7, 0, fsc), GET_BITS(10, 8, fsc));
+
+  /*
+   * VOLT:
+   * Report all voltages the hardware can set.
+   */
+  cv = mr_sbox_rl(0, SBOX_COREVOLT);
+  volt.set = vid2volt(GET_BITS(7, 0, cv));
+  mr_mk_cv_lst();
+
+  /*
+   * FREQ:
+   * In FreeBSD uOS the reference (nominal) frequency
+   * is simply the value read from the SBOX at boot time.
+   * We'll do the same and set 'def' to the same as 'current'.
+   * Report all voltages the hardware can set.
+   */
+  cf = mr_sbox_rl(0, SBOX_COREFREQ);
+  freq.def = mr_mt_cf_r2f(GET_BITS(7, 0, cf));
+  mr_mk_cf_lst();
+
+  /*
+   * GDDR:
+   * See layout of scratch #9 in 'common'.
+   * 23:16	Clock ratio encoding
+   * 28:24	External clock frequency
+   */
+  scr9 = mr_sbox_rl(0, SBOX_SCRATCH9);
+  gddr.speed = 2 * mr_mt_gf_r2f(GET_BITS(23, 16, scr9));
+
+  /*
+   * GVOLT:
+   * Report all voltages the hardware can set.
+   * Kind of silly as these cannot be changed from uOS.
+   * Cheat and set 'def' to the same as 'current'.
+   */
+  gv = mr_sbox_rl(0, SBOX_MEMVOLT);
+  gvolt.set = vid2volt(GET_BITS(7, 0, gv));
+
+  /*
+   * GFREQ:
+   * Report all values the hardware can set.
+   * Kind of silly as these cannot be changed from uOS.
+   * Cheat and set 'ref' to the same as 'current'.
+   */
+  gfreq.def = mr_mt_gf_r2f(GET_BITS(23, 16, scr9));
+  mr_mk_gf_lst();
+
+  /*
+   * POWER:
+   * If case FSC not working or if not compiled in,
+   * preset all power readings as invalid.
+   */
+  {
+    struct mr_rsp_power tmp = {{0, 3}, {0, 3}, {0, 3},
+  	   		       {0, 3}, {0, 3}, {0, 3}, {0, 3},
+           		       {0, 0, 0, 3, 3, 3},
+	   		       {0, 0, 0, 3, 3, 3},
+	   		       {0, 0, 0, 3, 3, 3}};
+    power = tmp;
+  }
+
+  /*
+   *TBD: Save card registers this module may change
+   */
+}
+
+void __exit
+mr_mt_card_exit(void)
+{
+  /*
+   *TBD: Restore card registers this module may change
+   */
+}
+
+
+
+/*
+**
+** Card specific 'Get' functions
+**
+*/
+
+int
+mr_get_volt(void * p)
+{
+  struct mr_rsp_volt  * r;
+  uint32_t		cv, fsc;
+
+
+  cv = mr_sbox_rl(0, SBOX_COREVOLT);
+  volt.set = vid2volt(GET_BITS(7, 0, cv));
+
+  fsc = mr_sbox_rl(0, SBOX_BOARD_VOLTAGE_SENSE);
+  volt.cur = bvs2volt(GET_BITS(15, 0, fsc));
+
+  r = (struct mr_rsp_volt *) p;
+  *r = volt;
+  return sizeof(*r);
+}
+
+int
+mr_get_freq(void * p)
+{
+  struct mr_rsp_freq  * r;
+  uint32_t		cf;
+
+  cf = mr_sbox_rl(0, SBOX_COREFREQ);
+  freq.cur = mr_mt_cf_r2f(GET_BITS(7, 0, cf));
+
+  r = (struct mr_rsp_freq *) p;
+  *r = freq;
+  return sizeof(*r);
+}
+
+#if USE_FSC
+/*
+ * Get Power stats from the FSC
+ */
+static void
+get_fsc_pwr(uint32_t req, struct mr_rsp_pws * pws)
+{
+  uint32_t	fsc;
+  
+  /*
+   * Read the FSC status
+   */
+  fsc_mgbr_write(MR_FSC_MGBR_GEN_CMD, &req);
+  if (fsc_mgbsr_read(&fsc))
+    pws->p_val = 3;
+  else {
+    pws->p_val = 0;
+    pws->prr = 1000000 * GET_BITS(15, 0, fsc);
+  }
+}
+#endif
+
+int
+mr_get_power(void * p)
+{
+  struct mr_rsp_power * r;
+
+#if USE_FSC
+  uint8_t	prev_cmd;
+  uint32_t	prev_dat;
+
+  /*
+   * Backup current OVERRIDE register
+   */
+  prev_cmd = mgbr_cmd;
+  prev_dat = mgbr_dat;
+  
+  /*
+   * Get Power stats from the FSC
+   */
+  get_fsc_pwr(MR_FSC_PWR_TOT,  &power.tot0);
+  get_fsc_pwr(MR_FSC_PWR1_TOT, &power.inst);
+  get_fsc_pwr(MR_FSC_PWR_PCIE, &power.pcie);
+  get_fsc_pwr(MR_FSC_PWR_2X3,  &power.c2x3);
+  get_fsc_pwr(MR_FSC_PWR_2X4,  &power.c2x4);
+
+  /*
+   * Revert to normal or fan 1 speed override mode if needed.
+   */
+  if (fan1_ovr)
+    fsc_mgbr_write(MR_FSC_MGBR_OVR_CMD, &fan1_ovr);
+  else
+    fsc_mgbr_write(prev_cmd, &prev_dat);
+#endif
+  
+  r = (struct mr_rsp_power *) p;
+  *r = power;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_plim(void * p)
+{
+  struct mr_rsp_plim  * r;
+
+#if USE_FSC
+  uint32_t	fsc, req, ofs;
+  
+  /*
+   * Read the FSC status
+   */
+  req = MR_FSC_PMC_CFG;
+  fsc_mgbr_write(MR_FSC_MGBR_GEN_CMD, &req);
+  if (! fsc_mgbsr_read(&fsc)) {
+    ofs = 5 * GET_BITS(3, 0, fsc);
+    if (GET_BIT(4, fsc))
+      plim.phys = 300 - ofs;
+    else 
+      plim.phys = 300 + ofs;
+    plim.hmrk = plim.lmrk = plim.phys;
+  }
+#endif
+
+  r = (struct mr_rsp_plim *) p;
+  *r = plim;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_gfreq(void * p)
+{
+  struct mr_rsp_gfreq * r;
+  uint32_t		gbr;
+
+  gbr = mr_sbox_rl(0, SBOX_MEMORYFREQ);
+  gfreq.cur = mr_mt_gf_r2f(GET_BITS(7, 0, gbr));
+
+  r = (struct mr_rsp_gfreq *) p;
+  *r = gfreq;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_gvolt(void * p)
+{
+  struct mr_rsp_gvolt * r;
+  uint32_t		gv, fsc;
+
+  gv = mr_sbox_rl(0, SBOX_MEMVOLT);
+  gvolt.set = vid2volt(GET_BITS(7, 0, gv));
+
+  fsc = mr_sbox_rl(0, SBOX_BOARD_VOLTAGE_SENSE);
+  gvolt.cur = bvs2volt(GET_BITS(31, 16, fsc));
+
+  r = (struct mr_rsp_gvolt *) p;
+  *r = gvolt;
+  return sizeof(*r);
+}
+
+int
+mr_get_temp(void * p)
+{
+  struct mr_rsp_temp  * r;
+  uint32_t		btr1, btr2;		/* Board temps */
+  uint32_t		die1, die2, die3;	/* Die temps */
+  uint32_t		dmx1, dmx2, dmx3;	/* Max die temps */
+  uint32_t		tsta, fsc;		/* Thermal status */
+
+  btr1 = mr_sbox_rl(0, SBOX_BOARD_TEMP1);
+  btr2 = mr_sbox_rl(0, SBOX_BOARD_TEMP2);
+  die1 = mr_sbox_rl(0, SBOX_CURRENT_DIE_TEMP0);
+  die2 = mr_sbox_rl(0, SBOX_CURRENT_DIE_TEMP1);
+  die3 = mr_sbox_rl(0, SBOX_CURRENT_DIE_TEMP2);
+  dmx1 = mr_sbox_rl(0, SBOX_MAX_DIE_TEMP0);
+  dmx2 = mr_sbox_rl(0, SBOX_MAX_DIE_TEMP1);
+  dmx3 = mr_sbox_rl(0, SBOX_MAX_DIE_TEMP2);
+  tsta = mr_sbox_rl(0, SBOX_THERMAL_STATUS);
+  fsc  = mr_sbox_rl(0, SBOX_STATUS_FAN2);
+
+  /*
+   * Board temperatures.
+   * No idea of where on the board they are located, but
+   * guessing from FreeBSD comments they are:
+   *   0	Air Inlet
+   *   1	VCCP VR
+   *   2	GDDR (not sure which chip)
+   *   3	GDDR VR
+   * The temperature read from FSC #2 seems valid, but
+   * there's no mention of where it's measured.
+   * The readings does not make much sense.
+   *       Sample readings are like this:
+   *         fin   32
+   *	     vccp  28	(vccp VR)
+   *	     vddq  33	(gddr VR)
+   *         vddg  28	(FSC 2)
+   *	   So, at least 'fin' is wrong (or fan in reverse).
+   */
+  temp.fin.cur  = (btr1 & (1 << 15)) ? GET_BITS( 8,  0, btr1) : 0;
+  temp.vccp.cur = (btr1 & (1 << 31)) ? GET_BITS(24, 16, btr1) : 0;
+  temp.gddr.cur = (btr2 & (1 << 15)) ? GET_BITS( 8,  0, btr2) : 0;
+  temp.vddq.cur = (btr2 & (1 << 31)) ? GET_BITS(24, 16, btr2) : 0;
+  temp.vddg.cur = GET_BITS(19, 12, fsc);
+  temp.brd.cur = 0;
+  if (temp.fin.cur > temp.brd.cur)
+    temp.brd.cur = temp.fin.cur;
+  if (temp.vccp.cur > temp.brd.cur)
+    temp.brd.cur = temp.vccp.cur;
+  if (temp.gddr.cur > temp.brd.cur)
+    temp.brd.cur = temp.gddr.cur;
+  if (temp.vddq.cur > temp.brd.cur)
+    temp.brd.cur = temp.vddq.cur;
+  temp.fout.c_val = 3;
+  temp.gddr.c_val = 3;
+
+  /*
+   * Die temperatures.
+   */
+  temp.die.cur = (tsta & (1 << 31)) ? GET_BITS(30, 22, tsta) : 0;
+  temp.dies[0].cur = GET_BITS( 8,  0, die1);
+  temp.dies[1].cur = GET_BITS(17,  9, die1);
+  temp.dies[2].cur = GET_BITS(26, 18, die1);
+  temp.dies[3].cur = GET_BITS( 8,  0, die2);
+  temp.dies[4].cur = GET_BITS(17,  9, die2);
+  temp.dies[5].cur = GET_BITS(26, 18, die2);
+  temp.dies[6].cur = GET_BITS( 8,  0, die3);
+  temp.dies[7].cur = GET_BITS(17,  9, die3);
+  temp.dies[8].cur = GET_BITS(26, 18, die3);
+
+  /*
+   * Die max temp (min is not reported to CP).
+   */
+  temp.dies[0].max = GET_BITS( 8,  0, dmx1);
+  temp.dies[1].max = GET_BITS(17,  9, dmx1);
+  temp.dies[2].max = GET_BITS(26, 18, dmx1);
+  temp.dies[3].max = GET_BITS( 8,  0, dmx2);
+  temp.dies[4].max = GET_BITS(17,  9, dmx2);
+  temp.dies[5].max = GET_BITS(26, 18, dmx2);
+  temp.dies[6].max = GET_BITS( 8,  0, dmx3);
+  temp.dies[7].max = GET_BITS(17,  9, dmx3);
+  temp.dies[8].max = GET_BITS(26, 18, dmx3);
+
+  r = (struct mr_rsp_temp *) p;
+  *r = temp;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_fan(void * p)
+{
+  struct mr_rsp_fan   * r;
+  uint32_t		fan1, fovr;
+
+  r = (struct mr_rsp_fan *) p;
+  fan1 = mr_sbox_rl(0, SBOX_STATUS_FAN1);
+
+#if USE_FSC
+  fovr = fan1_ovr;
+  r->override = GET_BIT(9, fovr);
+#else
+  fovr = mr_sbox_rl(0, SBOX_SPEED_OVERRIDE_FAN);
+  r->override = GET_BIT(15, fovr);
+#endif
+
+  r->rpm      = GET_BITS(15,  0, fan1);
+  if (r->override)
+    r->pwm    = GET_BITS( 7,  0, fovr);
+  else
+    r->pwm    = GET_BITS(23, 16, fan1);
+
+  return sizeof(*r);
+}
+
+
+int
+mr_get_ecc(void * p)
+{
+  struct mr_rsp_ecc   * r;
+
+  r = (struct mr_rsp_ecc *) p;
+  *r = ecc;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_trbo(void * p)
+{
+  struct mr_rsp_trbo   * r;
+
+  r = (struct mr_rsp_trbo *) p;
+  *r = trbo;
+  return sizeof(*r);
+}
+
+
+int
+mr_get_pmcfg(void * p)
+{
+  struct mr_rsp_pmcfg * r;
+
+  r = (struct mr_rsp_pmcfg *) p;
+  *r = pmcfg;
+  return sizeof(*r);
+}
+
+
+/*
+**
+** Card specific 'Set' functions
+** Input screening takes place here (to the extent possible).
+**
+*/
+
+
+int
+mr_set_volt(void * p)
+{
+  uint32_t	cv, msk, new, val;
+  uint8_t	vid;
+  int		i;
+
+  /*
+   * Ensure it's a supported value
+   */
+  val = *(uint32_t *) p;
+  for(i = 0; i < MR_PTAB_LEN; i++)
+    if (volt.supt[i] == val)
+      break;
+  if (i == MR_PTAB_LEN)
+    return -MR_ERR_RANGE;
+
+  /*
+   * Read-modify-write the core voltage VID register
+   */
+  vid = volt2vid(val);
+  cv = mr_sbox_rl(0, SBOX_COREVOLT);
+  msk = ~PUT_BITS(7, 0, ~0);
+  new = (cv & msk) | PUT_BITS(7, 0, vid);
+  mr_sbox_wl(0, SBOX_COREVOLT, new);
+  printk("SetVolt: %d -> %08x (%08x)\n", val, new, cv);
+
+  return 0;
+}
+
+
+int
+mr_set_freq(void * p)
+{
+  uint32_t	cf, msk, new, val;
+  uint8_t	rat;
+  int		i;
+
+  /*
+   * Ensure it's a supported value
+   */
+  val = *(uint32_t *) p;
+  for(i = 0; i < MR_PTAB_LEN; i++)
+    if (freq.supt[i] == val)
+      break;
+  if (i == MR_PTAB_LEN)
+    return -MR_ERR_RANGE;
+
+  /*
+   * Read-modify-write the core frequency PLL register
+   *
+   *TBD: or should we just overwrite it?
+   *     Register fields (of relevance):
+   *       7:0	New PLL encoding
+   *	     16	Async Operation
+   *	     31	Override fuse setting
+   */
+  rat = freq2ratio(val/1000, cpu_tab, ARRAY_SIZE(cpu_tab));
+  cf = mr_sbox_rl(0, SBOX_COREFREQ);
+  msk = ~(PUT_BITS(7, 0, ~0) | PUT_BIT(16, 1) | PUT_BIT(31, 1));
+  new = (cf & msk) | PUT_BITS(7, 0, rat) | PUT_BIT(31, 1);
+  mr_sbox_wl(0, SBOX_COREFREQ, new);
+  printk("SetFreq: %d -> %08x (%08x)\n", val, new, cf);
+
+  /*
+   *TBD:
+   * We just changed the system's base clock without
+   * re-calibrating the APIC timer tick counters.
+   * There is probably a function call for the cpu-freq
+   * driver to deal with this, so should we call it?
+   */
+
+  return 0;
+}
+
+
+int
+mr_set_plim(void * p)
+{
+  plim.phys = *(uint32_t *) p;
+  return 0;
+}
+
+
+int
+mr_set_fan(void * p)
+{
+  struct mr_set_fan   * fc;
+
+  /*
+   * Ensure operation is valid, i.e. no garbage
+   * in override flag (only 1 and 0 allowed) and
+   * that pwm is not zero (or above lower limit?)
+   */
+  fc = (struct mr_set_fan *) p;
+  if (GET_BITS(7, 1, fc->override) || !fc->pwm)
+    return -MR_ERR_RANGE;
+
+#if USE_FSC
+  {
+    uint32_t	dat;
+
+    /*
+     * Craft the default OVERRIDE command and write it to FSC
+     * through the MGBR register (command 0).
+     * This does not change the telemetry in MGBSR, so only way
+     * to ensure it gets registered by FSC is to wait it out
+     * (happens in fsc_mgbr_write function).
+     */
+    if (fc->override)
+      dat = PUT_BIT(9, 1) | fc->pwm;
+    else
+      dat = 0;
+    fsc_mgbr_write(MR_FSC_MGBR_OVR_CMD, &dat);
+  }
+#else
+  /*
+   * Read-modify-write the fan override register
+   * Control of fan #1 only, don't touch #2
+   */
+  {
+    uint32_t		fcor, fco1, fco2;
+
+    fcor = mr_sbox_rl(0, SBOX_SPEED_OVERRIDE_FAN);
+    fco2 = GET_BITS(31, 16, fcor);
+    if (fc->override)
+      fco1 = PUT_BIT(15, 1) | fc->pwm;
+    else
+      fco1 = 0;
+    mr_sbox_wl(0, SBOX_SPEED_OVERRIDE_FAN,
+  		PUT_BITS(31, 16, fco2) | PUT_BITS(15, 0, fco1));
+  }
+#endif
+
+  return 0;
+}
+
+
+int
+mr_set_trbo(void * p)
+{
+  return 0;
+}
+
diff --git a/ras/micras_main.c b/ras/micras_main.c
new file mode 100644
index 0000000..7e92fed
--- /dev/null
+++ b/ras/micras_main.c
@@ -0,0 +1,2650 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * RAS module driver
+ *
+ * Contains code to handle module install/deinstall
+ * and handling proper registration(s) to SCIF, sysfs
+ * pseudo file system, timer ticks, I2C driver and
+ * other one-time tasks.
+ */
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/cred.h>
+#include <asm/msr.h>
+#include <asm/mce.h>
+#include <asm/apic.h>
+#include <asm/mic/mic_common.h>
+#include <asm/mic/mic_knc/autobaseaddress.h>
+#include <asm/mic/mic_knc/micsboxdefine.h>
+#include <scif.h>
+#include "micras.h"
+
+#if MT_VERBOSE || MC_VERBOSE || PM_VERBOSE
+/*
+ * For making scif_epd_t non-opague
+ */
+#define _MIC_MICBASEDEFINE_REGISTERS_H_	1
+#include <mic/micscif.h>
+#endif
+
+/*
+** Lookup table to map API opcode into MT function.
+**
+** As we have to deal with both KnF and KnC, functions to
+** retrieve information may be generic, in micras_common.c,
+** or platform specific, in micras_kn{cf}.c.
+** Code location is transparent to this table.
+**
+** Some MT functions can safely be called without
+** serialization, e.g. if they are read-only or use
+** atomics to get/set variables. The 'simple' flag tells
+** which functions are safe to call without serialization.
+** Other functions should be called thru micras_mt_call().
+**
+** See micras_api.h and micpm_api.h for function details.
+*/
+
+static struct fnc_tab fnc_map[] = {
+  { 0,              0, 0, 0 },
+  { MR_REQ_HWINF,   1, 0, mr_get_hwinf },
+  { MR_REQ_VERS,    1, 0, mr_get_vers },
+  { MR_REQ_CFREQ,   0, 0, mr_get_freq },
+  { MR_SET_CFREQ,   0, 1, mr_set_freq },
+  { MR_REQ_CVOLT,   0, 0, mr_get_volt },
+  { MR_SET_CVOLT,   0, 1, mr_set_volt },
+  { MR_REQ_PWR,     0, 0, mr_get_power },
+  { MR_REQ_PLIM,    0, 0, mr_get_plim },
+  { MR_SET_PLIM,    0, 1, mr_set_plim },
+  { MR_REQ_CLST,    0, 0, mr_get_clst },
+  { MR_ENB_CORE,    0, 1, 0 },
+  { MR_DIS_CORE,    0, 1, 0 },
+  { MR_REQ_GDDR,    1, 0, mr_get_gddr },
+  { MR_REQ_GFREQ,   1, 0, mr_get_gfreq },
+  { MR_SET_GFREQ,   1, 1, 0 },
+  { MR_REQ_GVOLT,   1, 0, mr_get_gvolt },
+  { MR_SET_GVOLT,   1, 1, 0 },
+  { MR_REQ_TEMP,    0, 0, mr_get_temp },
+  { MR_REQ_FAN,     0, 0, mr_get_fan },
+  { MR_SET_FAN,     0, 1, mr_set_fan },
+  { MR_REQ_ECC,     1, 0, mr_get_ecc },
+  { MR_SET_ECC,     0, 1, 0 },
+  { MR_REQ_TRC,     1, 0, mr_get_trc },
+  { MR_SET_TRC,     1, 1, mr_set_trc },
+  { MR_REQ_TRBO,    0, 0, mr_get_trbo },
+  { MR_SET_TRBO,    0, 1, mr_set_trbo },
+  { MR_REQ_OCLK,    0, 0, 0 },
+  { MR_SET_OCLK,    0, 1, 0 },
+  { MR_REQ_CUTL,    0, 0, mr_get_cutl },
+  { MR_REQ_MEM,     0, 0, mr_get_mem },
+  { MR_REQ_OS,      0, 0, mr_get_os },
+  { MR_REQ_PROC,    0, 0, mr_get_proc },
+  { MR_REQ_THRD,    0, 0, 0 },
+  { MR_REQ_PVER,    1, 0, mr_get_pver },
+  { MR_CMD_PKILL,   0, 1, mr_cmd_pkill },
+  { MR_CMD_UKILL,   0, 1, mr_cmd_ukill },
+#if defined(CONFIG_MK1OM)
+  { MR_GET_SMC,     0, 0, mr_get_smc },
+  { MR_SET_SMC,     0, 0, mr_set_smc },
+#else
+#if defined(CONFIG_ML1OM) && USE_FSC
+  { MR_GET_SMC,     0, 0, mr_get_fsc },
+  { MR_SET_SMC,     0, 1, mr_set_fsc },
+#else
+  { 0,              0, 0, 0 },
+  { 0,              0, 0, 0 },
+#endif
+#endif
+  { MR_REQ_PMCFG,   0, 0, mr_get_pmcfg },
+#if defined(CONFIG_MK1OM)
+  { MR_REQ_LED,     0, 0, mr_get_led },
+  { MR_SET_LED,     0, 1, mr_set_led },
+  { MR_REQ_PROCHOT, 0, 0, mr_get_prochot },
+  { MR_SET_PROCHOT, 0, 1, mr_set_prochot },
+  { MR_REQ_PWRALT,  0, 0, mr_get_pwralt },
+  { MR_SET_PWRALT,  0, 1, mr_set_pwralt },
+  { MR_REQ_PERST,   0, 0, mr_get_perst },
+  { MR_SET_PERST,   0, 1, mr_set_perst },
+  { MR_REQ_TTL,     0, 0, mr_get_ttl },
+#else
+  { 0,              0, 0, 0 },
+  { 0,              0, 0, 0 },
+  { 0,              0, 0, 0 },
+  { 0,              0, 0, 0 },
+  { 0,              0, 0, 0 },
+  { 0,              0, 0, 0 },
+  { 0,              0, 0, 0 },
+  { 0,              0, 0, 0 },
+  { 0,              0, 0, 0 },
+#endif
+#if defined(CONFIG_MK1OM) && USE_PM
+  { 0,              0, 0, 0 },
+  { PM_REQ_PL0,     1, 0, pm_get_pl0 },
+  { PM_SET_PL0,     1, 1, pm_set_pl0 },
+  { PM_REQ_PL1,     1, 0, pm_get_pl1 },
+  { PM_SET_PL1,     1, 1, pm_set_pl1 },
+  { PM_REQ_PAVG,    1, 0, pm_get_pavg },
+  { PM_REQ_PTTL,    1, 0, pm_get_pttl },
+  { PM_REQ_VOLT,    1, 0, pm_get_volt },
+  { PM_REQ_TEMP,    1, 0, pm_get_temp },
+  { PM_REQ_TACH,    1, 0, pm_get_tach },
+  { PM_REQ_TTTL,    1, 0, pm_get_tttl },
+  { PM_REQ_FTTL,    1, 0, pm_get_fttl },
+  { PM_SET_FTTL,    1, 1, pm_set_fttl },
+#endif
+};
+
+
+
+/*
+**
+** The monitoring thread.
+** In fact this is a work_queue, that receive work items
+** from several independent parties, such as SCIF, sysfs,
+** out of band telemetry, PM and possibly timers.
+**
+** These parties pass a structure with information necessary
+** for the call-out function called by the MT thread to operate.
+** These structures must include the work item structure, such
+** that the container_of() mechanism can be used to locate it.
+**
+** The MT thread does not by itself provide any feed-back on
+** when a task was executed nor the results from it. Therefore
+** if a feedback is requred, then the callout needs to provide
+** their own methods, such as the wait queue used by function
+** micras_mt_data() below.  Experiments has shown that it is not
+** safe to place work item or the wait queue on a stack (no
+** idea why, could be a bug).
+**
+*/
+
+static int			 micras_stop;		/* Module shutdown */
+static struct delayed_work	 micras_wq_init;	/* Setup work item */
+static struct delayed_work	 micras_wq_tick;	/* Timer tick token */
+static struct workqueue_struct * micras_wq;		/* Monitor thread */
+       int			 micras_priv;		/* Call-out privileged */
+
+
+typedef struct wq_task {
+  int			req;		/* Request opcode */
+  int			rtn;		/* Return value */
+  int			priv;		/* Privileged */
+  void		      * ptr;		/* Response buffer */
+  int		     (* fnc)(void *);	/* Call out */
+  struct work_struct    wrk;		/* Work item */
+  wait_queue_head_t	wqh;		/* Wait queue header */
+} WqTask;
+
+
+#if defined(CONFIG_MK1OM) && WA_4845465
+/*
+ * SMC die temp update job.
+ *
+ * As per HSD #4845465 we push the die temperature
+ * to the SMC instead of the usual reverse direction.
+ * This has to happen at around 50 mSec intervals, which should
+ * be possible with a work queue implementation. If that turns out
+ * not to be reliable enough we may need a more direct approach.
+ * During the experiment, we want to override the pushed temp.
+ */
+
+#define DIE_PROC	1		/* Enable die temp override */
+#define SMC_PERIOD	50		/* SMC update interval, mSec */
+#define JITTER_STATS	1		/* Enable jitter measurements */
+
+static struct delayed_work	micras_wq_smc;	/* SMC update token */
+static int			smc_4845465;	/* SMC push capable */
+#if DIE_PROC
+static int			die_override;	/* Temperature override */
+#endif
+
+static void
+micras_mt_smc(struct work_struct *work)
+{
+  extern int		mr_smc_wr(uint8_t, uint32_t *);
+  static uint64_t	n;
+  uint32_t		tmp;
+  uint32_t		ts2, mfs;
+
+  if (! micras_stop) {
+    /*
+     * Re-arm for a callback in about 1 second.
+     * There is no guarantee this will be more than approximate.
+     */
+    queue_delayed_work(micras_wq, &micras_wq_smc, msecs_to_jiffies(SMC_PERIOD));
+  }
+
+#if JITTER_STATS
+  /*
+   * Time the interval in order to get some
+   * measurement on what jitter to expect.
+   * Leave a log message once every minute.
+   */
+  {
+    static uint64_t	d, t1, t2, s, hi, lo = ~0;
+
+    t2 = rdtsc();
+    if (n) {
+      d = t2 - t1;
+      s += d;
+      if (d > hi)
+	hi = d;
+      if (d < lo)
+	lo = d;
+#if 1
+      {
+	/*
+	 * Show jitter in buckets representing 5 mSec.
+	 * The center (#20) represent +- 2.5 mSec from reference.
+	 * It is assumed TSC running at 1.1 GHz here, if PM kicks
+	 * in the mesurements may be way off because it manipulate
+	 * the system clock and indirectly the jiffy counter.
+	 * It is assumed TSC running at 1.1 GHz here.
+	 */
+	static uint64_t buckets[41];
+	int		bkt;
+	int64_t		err;
+
+	err = ((d * 10) / 11) - (50 * 1000 * 1000);
+	if (err < -(25 * 100 * 1000))
+	  bkt = 19 + (err + (25 * 100 * 1000)) / (5 * 1000 * 1000);
+	else
+	if (err > (25 * 100 * 1000))
+	  bkt = 21 + (err - (25 * 100 * 1000)) / (5 * 1000 * 1000);
+	else
+	  bkt = 20;
+	if (bkt < 0)
+	  bkt = 0;
+	if (bkt > 40)
+	  bkt = 40;
+	buckets[bkt]++;
+	if ((n % ((10 * 1000)/SMC_PERIOD)) == ((10 * 1000)/SMC_PERIOD) - 1) {
+	  printk("smc_upd: dist");
+	  for(bkt = 0; bkt < 41; bkt++) {
+	    if (bkt == 20)
+	      printk(" | %lld |", buckets[bkt]);
+	    else
+	      printk(" %lld", buckets[bkt]);
+	  }
+	  printk("\n");
+	}
+      }
+#endif
+      if ((n % ((60 * 1000)/SMC_PERIOD)) == ((60 * 1000)/SMC_PERIOD) - 1)
+	printk("smc_upd: %lld, min %lld, max %lld, avg %lld\n", n, lo, hi, s / n);
+    }
+    t1 = t2;
+  }
+#endif	/* JITTER_STATS */
+
+  /*
+   * Send update to SMC to register 0x50.
+   * The value to push at the SMC must have following content
+   *
+   *  Bits  9:0	Device Temperature
+   *  		-> THERMAL_STATUS_2 bits 19:10
+   *  Bit     10	Valid bit
+   *			-> THERMAL_STATUS_2 bit 31
+   *  Bits 20:11	Thermal Monitor Control value
+   *			-> THERMAL_STATUS_2 bits 9:0
+   *  Bits 30:21	Fan Thermal Control value
+   *			-> MICROCONTROLLER_FAN_STATUS bits 17:8
+   */
+
+  n++;
+  ts2 = mr_sbox_rl(0, SBOX_THERMAL_STATUS_2);
+  mfs = mr_sbox_rl(0, SBOX_MICROCONTROLLER_FAN_STATUS);
+
+#if DIE_PROC
+  if (die_override)
+    tmp = GET_BITS(9, 0, die_override);
+  else
+#endif
+    tmp = PUT_BITS(9, 0, GET_BITS(19, 10, ts2));
+  tmp |= PUT_BIT(10, GET_BIT(31, ts2)) |
+	 PUT_BITS(20, 11, GET_BITS(9, 0, ts2)) |
+	 PUT_BITS(30, 21, GET_BITS(17, 8, mfs));
+  
+  if (mr_smc_wr(0x50, &tmp))
+    printk("smc_upd: %lld, tmp %d, SMC write failed\n", n, tmp);
+}
+
+
+#if DIE_PROC
+/*
+ * Test proc file to override die temperature push.
+ * A value of 0 means no override, any other value is
+ * pushed as if it was a 'device temperature'.
+ */
+
+static struct proc_dir_entry * die_pe;
+
+/*
+ * On writes: scan input line for single number.
+ */
+
+static ssize_t
+die_write(struct file * file, const char __user * buff, size_t len, loff_t * off)
+{
+  char 	      * buf;
+  char	      * ep, * cp;
+  unsigned long	ull;
+  int		err;
+
+  /*
+   * Get input line into kernel space
+   */
+  if (len > PAGE_SIZE -1)
+    len = PAGE_SIZE -1;
+  buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+  if (! buf)
+    return -ENOMEM;
+  if (copy_from_user(buf, buff, len)) {
+    err = -EFAULT;
+    goto wr_out;
+  }
+  buf[len] = '\0';
+  cp = ep = (char *) buf;
+
+  /*
+   * Read a number in strtoul format 0.
+   */
+  while(isspace(*cp))
+    cp++;
+  ull = simple_strtoull(cp, &ep, 0);
+  if (ep == cp || (*ep != '\0' && !isspace(*ep))) {
+    printk("Invalid die temp given\n");
+    err = -EINVAL;
+    goto wr_out;
+  }
+
+  die_override = GET_BITS(9, 0, ull);
+  printk("Die temp override set to %d C\n", die_override);
+
+  /*
+   * Swallow any trailing junk up to next newline
+   */
+  ep = strchr(buf, '\n');
+  if (ep)
+   cp = ep + 1;
+  err = cp - buf;
+
+wr_out:
+  kfree(buf);
+  return err;
+}
+
+
+/*
+ * On reads: return string of current override temp.
+ */
+
+static ssize_t
+die_read(struct file * file, char __user * buff, size_t count, loff_t *ppos)
+{
+  char buf[32];
+  size_t len;
+
+  len = snprintf(buf, sizeof(buf), "%d\n", die_override);
+  return simple_read_from_buffer(buff, count, ppos, buf, len);
+}
+
+
+static const struct file_operations proc_die_operations = {
+  .read           = die_read,
+  .write	  = die_write,
+  .llseek         = no_llseek,
+};
+#endif	/* DIE_PROC */
+#endif	/* WA_4845465 */
+
+
+/*
+ * Timer tick job
+ *
+ * This is for periodic updates from the SMC,
+ * which (with a little luck) can be avoided
+ * at the cost of I2C communications during
+ * actual CP queries.
+ */
+
+static void
+micras_mt_tick(struct work_struct *work)
+{
+#if MT_TIMER
+  static int	n;
+
+  n++;
+  if (! micras_stop) {
+    /*
+     * Re-arm for a callback in about 1 second.
+     * There is no guarantee this will be more than approximate.
+     */
+    queue_delayed_work(micras_wq, &micras_wq_tick, msecs_to_jiffies(MT_PERIOD));
+  }
+
+  /*
+   * Dump elog prints into the kernel log
+   *TBD: debug tool, time-shifts messages, remove eventually.
+   */
+  {
+    int msg_top, msg_id;
+    char * buf;
+
+    msg_id = atomic_read(&ee_seen);
+    msg_top = atomic_read(&ee_msg);
+    while(++msg_id <= msg_top) {
+      buf = ee_buf + (msg_id % EE_BUF_COUNT) * EE_BUF_LINELEN;
+      if (! *buf)
+        break;
+      printk("%s", buf);
+      *buf = '\0';
+      atomic_inc(&ee_seen);
+    }
+  }
+#endif
+}
+
+
+/*
+ * Handle SCIF & sysfs show/store requests
+ *
+ * By convention we know that the work item is member of
+ * a larger struct, which can readily be found using the
+ * container_of mechanism.
+ *
+ * Otherwise this routine just calls the function stored
+ * in the larger struct's mt_data element, and on its
+ * return wake up whoever is waiting for it's completion.
+ */
+
+static void
+micras_mt_data(struct work_struct * work)
+{
+  struct wq_task      * wq;
+
+  wq = container_of(work, struct wq_task, wrk);
+  micras_priv = wq->priv;
+  wq->rtn = wq->fnc(wq->ptr);
+  micras_priv = 0;
+  wake_up_all(& wq->wqh);
+}
+
+
+/*
+ * Helper to pass jobs (work items) to the monitoring thread.
+ *
+ * As input it receives details on function to be called, one
+ * argument to pass to that function, the opcode associated
+ * with the function and a function return value. The latter
+ * will be set to -MR_ERR_PEND, and we'll expect the callout
+ * function to change it.
+ *
+ * The work item is the only piece of information passed to
+ * the work queue callout, so we'll wrap it into a larger
+ * structure along with the received details such that the
+ * work queue can perform a function call on our behalf.
+ */
+
+static int
+micras_mt_tsk(struct wq_task * wq)
+{
+  int			err;
+
+#if MT_VERBOSE
+  uint64_t		start, stop;
+  start = rdtsc();
+#endif
+
+  /*
+   * Create a work item for the RAS thread,
+   * enqueue and wait for it's completion.
+   *
+   *TBD: Timeout length to be revisited
+   */
+  wq->rtn = -MR_ERR_PEND;
+  INIT_WORK_ONSTACK(&wq->wrk, micras_mt_data);
+  init_waitqueue_head(&wq->wqh);
+  queue_work(micras_wq, &wq->wrk);
+  err = wait_event_interruptible_timeout(wq->wqh,
+  		wq->rtn != -MR_ERR_PEND, msecs_to_jiffies(1000));
+
+  /*
+   * Check for potential errors, which for now can only be
+   * "interrupted" or "timeout". In both cases try cancel the work
+   * item from MT thread. If cancel succeds (returns true) then
+   * the work item was still "pending" and is now removed from the
+   * work queue, i.e. it is safe to continue (with error).
+   * Otherwise, the cancel operation will wait for the work item's
+   * call-out function to finish, which kind of defies the purpose
+   * of "interruptable". However, we cannot leave until it is certain
+   * that it will not be accessed by the RAS thread.
+   */
+  if (err == -ERESTARTSYS || err == 0) {
+    printk("MT tsk: interrupted or failure, err %d\n", err);
+    printk("MT tsk: FAILED: cmd %d, rtn %d, fnc %p, ptr %p\n",
+		wq->req, wq->rtn, wq->fnc, wq->ptr);
+
+    err = cancel_work_sync(&wq->wrk);
+    printk("MT tsk: work canceled (%d)\n", err);
+  }
+
+  /*
+   * Completed, turn interrupts and timeouts into MR errors.
+   */
+  err = wq->rtn;
+  if (err == -MR_ERR_PEND)
+    err = -MR_ERR_NOVAL;
+
+#if MT_VERBOSE
+  stop = rdtsc();
+  printk("MT tsk: cmd %d, err %d, time %llu\n", wq->req, err, stop - start);
+#endif
+  return err;
+}
+
+
+/*
+ * Public interface to the MT functions
+ * Caller responsible for passing a buffer large enough
+ * to hold data for reads or writes (1 page will do,
+ * but structs matching the commands are recommended).
+ * Returned data are structs defined in micras.h
+ */
+
+int
+micras_mt_call(uint16_t cmd, void * buf)
+{
+  struct wq_task	* wq;
+  int			  err;
+
+  if (micras_stop)
+    return -MR_ERR_UNSUP;
+
+  if (cmd > MR_REQ_MAX)
+    return -MR_ERR_INVOP;
+
+  err = -MR_ERR_UNSUP;
+  if (fnc_map[cmd].fnc) {
+    if (fnc_map[cmd].simple) {
+      /*
+       * Fast access, just call function
+       */
+      err = fnc_map[cmd].fnc(buf);
+    }
+    else {
+      /*
+       * Slow access, go through serializer.
+       * We allocate a work queue task for the MT thread,
+       * stuff arguments in it, run task, and then free
+       * work queue task.
+       */
+      wq = kmalloc(sizeof(* wq), GFP_KERNEL);
+      if (! wq) {
+	printk("Scif: CP work task alloc failed\n");
+	return -MR_ERR_NOMEM;
+      }
+
+      memset(wq, '\0', sizeof(*wq));
+      wq->req = cmd;
+      wq->priv = 1;
+      wq->fnc = (int (*)(void *)) fnc_map[cmd].fnc;
+      wq->ptr = buf;
+      err = micras_mt_tsk(wq);
+
+      kfree(wq);
+    }
+  }
+
+  return err;
+}
+EXPORT_SYMBOL_GPL(micras_mt_call);
+
+
+
+/*
+**
+** The sysfs nodes provided by this module is not really associated
+** with a 'struct device', since we don't create device entries for
+** access through '/dev'. Instead we register a 'struct class'
+** with nodes defined with the CLASS_ATTR macro.
+** Reasons for this choice are:
+**   - we don't want a device node created
+**   - we don't need (at least now) to create udev events
+**   - we don't act on suspend/resume transitions
+**   - we don't want to have our files unnecessarily deep
+**     in the sysfs file system.
+**
+** The sysfs layout is intended to look like:
+**
+** /sys/class/micras/		Root of this driver
+**		    /clst	Core information
+**		    /cutl	Core utilization
+**		    /ecc	Error correction mode
+**		    /fan	Fan controller
+**		    /freq	Core frequency
+**		    /gddr	GDDR devices
+**		    /gfreq	GDDR speed
+**		    /gvolt	GDDR voltage
+**		    /hwinf	Hardware Info
+**		    /mem	Memory utilization
+**		    /os		OS status
+**		    /plim	Power envelope
+**		    /power	Card power
+**		    /temp	Board tempearatures
+**		    /trbo	Turbo mode
+**		    /trc	Trace level
+**		    /vers	uOS/Flash version
+**		    /volt	Core voltage
+**
+** The following should be removed as there are better tools
+** available in /proc/<pid>/{stat|status|smap}, /proc/meminfo,
+** /proc/stat, /proc/uptime, /proc/loadavg, and /proc/cpuinfo:
+**    clst, cutl, mem, os
+**
+** Below we hand-craft a 'micras' class to sit under '/sys/class'
+** with attribute nodes directly under it. Each attribute may
+** have a 'show' and a 'store' handler, both called with a reference
+** to its class (ras_class, may hold private data), it's class_attribute,
+** a buffer reference, and for 'store's a string length. The buffer
+** passed to 'show' is one page (PAGE_SIZE, 4096) which sets the
+** upper limit on the return string(s). Return value of 'store'
+** has to be either an error code (negative) or the count of bytes
+** consumed. If consumed less than what's passed in, the store routine
+** will be called again until all input data has been consumed.
+**
+** Function pointers are hardwired by the macros below since it
+** is easy and simpler than using the fnc_map table. This may
+** change if the command set expands uncontrolled.
+** We have local helper funtions to handle array prints.
+** Any locking required is handled in called routines, not here.
+**
+** Note: This is not coded for maximum performance, since the
+**       use of the MT thread to serialize access to card data
+**	 has a cost of two task switches attached, both which
+**       may cause delays due to other system activity.
+**
+*/
+
+
+/*
+ * Hack alert!
+ * Formatting routines for arrays of 16/32/64 bit unsigned ints.
+ * This reduces the printf argument list in _SHOW() macros below
+ * considerably, though perhaps at a cost in code efficiency.
+ * They need a scratch buffer in order to construct long lines.
+ * A quick swag at the largest possible response tells that we'll
+ * never exceed half if the page we are given to scribble into.
+ * So, instead of allocating print space, we'll simply use 2nd
+ * half of the page as scratch buffer.
+ */
+
+#define BP	(buf + (PAGE_SIZE/2))		/* Scratch pad location */
+#define BL	(PAGE_SIZE/2 - 1)		/* Scratch size */
+
+
+static char *
+arr16(int16_t * arr, int len, char * buf, int siz)
+{
+  int		n, bs;
+
+  bs = 0;
+  for(n = 0; n < len && bs < siz; n++)
+    bs += scnprintf(buf + bs, siz - bs, "%s%u", n ? " " : "", arr[n]);
+  buf[bs] = '\0';
+
+  return buf;
+}
+
+
+static char *
+arr32(uint32_t * arr, int len, char * buf, int siz)
+{
+  int		n, bs;
+
+  bs = 0;
+  for(n = 0; n < len && bs < siz; n++)
+    bs += scnprintf(buf + bs, siz - bs, "%s%u", n ? " " : "", arr[n]);
+  buf[bs] = '\0';
+
+  return buf;
+}
+
+
+static char *
+arr64(uint64_t * arr, int len, char * buf, int siz)
+{
+  int		n, bs;
+
+  bs = 0;
+  for(n = 0; n < len && bs < siz; n++)
+    bs += scnprintf(buf + bs, siz - bs, "%s%llu", n ? " " : "", arr[n]);
+  buf[bs] = '\0';
+
+  return buf;
+}
+
+
+#define _SHOW(op,rec,nam,str...) \
+  static ssize_t \
+  micras_show_##nam(struct class *class, \
+  		    struct class_attribute *attr, \
+		    char *buf) \
+  { \
+    struct mr_rsp_##rec	* r; \
+    struct wq_task	* wq; \
+    int			  len; \
+    int			  err; \
+\
+    wq = kmalloc(sizeof(* wq) + sizeof(* r), GFP_KERNEL); \
+    if (! wq) \
+      return -ENOMEM; \
+\
+    memset(wq, '\0', sizeof(* wq)); \
+    r = (struct mr_rsp_##rec *)(wq + 1); \
+    wq->req = MR_REQ_##op; \
+    wq->fnc = (int (*)(void *)) mr_get_##nam; \
+    wq->ptr = r; \
+    err = micras_mt_tsk(wq); \
+\
+    if (err < 0) { \
+      len = 0; \
+      *buf = '\0'; \
+    } \
+    else { \
+      len = scnprintf(buf, PAGE_SIZE, ##str); \
+    } \
+\
+    kfree(wq); \
+    return len; \
+  }
+
+_SHOW(HWINF, hwinf, hwinf, "%u %u %u %u %u %u "
+		"%c%c%c%c%c%c%c%c%c%c%c%c "
+		"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+		r->rev, r->step, r->substep, r->board, r->fab, r->sku,
+		r->serial[0], r->serial[1], r->serial[2], r->serial[3],
+		r->serial[4], r->serial[5], r->serial[6], r->serial[7],
+		r->serial[8], r->serial[9], r->serial[10], r->serial[11],
+		r->guid[0], r->guid[1], r->guid[2], r->guid[3],
+		r->guid[4], r->guid[5], r->guid[6], r->guid[7],
+		r->guid[8], r->guid[9], r->guid[10], r->guid[11],
+		r->guid[12], r->guid[13], r->guid[14], r->guid[15]);
+
+_SHOW(VERS, vers, vers, "\"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\"\n",
+		r->fboot0 +1, r->fboot1 +1, r->flash[0] +1,
+		r->flash[1] +1, r->flash[2] +1, r->fsc +1, r->uos +1)
+
+_SHOW(CFREQ, freq, freq, "%u %u %s\n",
+		r->cur, r->def, arr32(r->supt, r->slen, BP, BL))
+
+_SHOW(CVOLT, volt, volt, "%u %u %s\n",
+		r->cur, r->set, arr32(r->supt, r->slen, BP, BL))
+
+#if defined(CONFIG_MK1OM) || (defined(CONFIG_ML1OM) && USE_FSC)
+_SHOW(PWR, power, power, "%d\n%d\n%d\n%d\n%d\n%d\n%d\n%s\n%s\n%s\n",
+		r->tot0.prr,
+		r->tot1.prr,
+		r->inst.prr,
+		r->imax.prr,
+		r->pcie.prr,
+		r->c2x3.prr,
+		r->c2x4.prr,
+		arr32(&r->vccp.pwr,  3, BP, 32),
+		arr32(&r->vddg.pwr,  3, BP + 32, 32),
+		arr32(&r->vddq.pwr,  3, BP + 64, 32))
+
+_SHOW(PLIM, plim, plim, "%u %u %u\n",
+		r->phys, r->hmrk, r->lmrk)
+#endif
+
+_SHOW(CLST, clst, clst, "%u %u\n",
+		r->count, r->thr)
+
+_SHOW(GDDR, gddr, gddr, "\"%s\" %u %u %u\n",
+		r->dev +1, r->rev, r->size, r->speed)
+
+_SHOW(GFREQ, gfreq, gfreq, "%u %u\n",
+		r->cur, r->def)
+
+_SHOW(GVOLT, gvolt, gvolt, "%u %u\n",
+		r->cur, r->set)
+
+_SHOW(TEMP, temp, temp, "%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
+		arr16(&r->die.cur,   2, BP, 32),
+		arr16(&r->brd.cur,   2, BP + 32, 32),
+		arr16(&r->fin.cur,   2, BP + 64, 32),
+		arr16(&r->fout.cur,  2, BP + 96, 32),
+		arr16(&r->gddr.cur,  2, BP + 128, 32),
+		arr16(&r->vccp.cur,  2, BP + 160, 32),
+		arr16(&r->vddg.cur,  2, BP + 224, 32),
+		arr16(&r->vddq.cur,  2, BP + 256, 32))
+
+_SHOW(FAN, fan, fan, "%u %u %u\n",
+		r->override, r->pwm, r->rpm)
+
+#ifdef CONFIG_MK1OM
+_SHOW(ECC, ecc, ecc, "%d\n",
+		r->enable)
+#endif
+
+_SHOW(TRC, trc, trc, "%d\n",
+		r->lvl)
+
+_SHOW(TRBO, trbo, trbo, "%d %d %d\n",
+		r->set, r->state, r->avail)
+
+#ifdef CONFIG_MK1OM
+_SHOW(LED, led, led, "%d\n",
+		r->led)
+
+_SHOW(PROCHOT, ptrig, prochot, "%d %d\n",
+  		r->power, r->time);
+
+_SHOW(PWRALT, ptrig, pwralt, "%d %d\n",
+  		r->power, r->time);
+
+_SHOW(PERST, perst, perst, "%d\n",
+  		r->perst);
+
+_SHOW(TTL, ttl, ttl, "%u %u %u %u\n%u %u %u %u\n%u %u %u %u\n",
+  		r->thermal.active, r->thermal.since, r->thermal.count, r->thermal.time,
+  		r->power.active,   r->power.since,   r->power.count,   r->power.time,
+  		r->alert.active,   r->alert.since,   r->alert.count,   r->alert.time);
+#endif
+
+_SHOW(CUTL, cutl, cutl, "%u %u %u %llu\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n...\n",
+		r->tck, r->core, r->thr, r->jif,
+		arr64(&r->sum.user,    4, BP, 80),
+		arr64(&r->cpu[0].user, 4, BP + 80, 80),
+		arr64(&r->cpu[1].user, 4, BP + 160, 80),
+		arr64(&r->cpu[2].user, 4, BP + 240, 80),
+		arr64(&r->cpu[3].user, 4, BP + 320, 80),
+		arr64(&r->cpu[4].user, 4, BP + 400, 80),
+		arr64(&r->cpu[5].user, 4, BP + 480, 80),
+		arr64(&r->cpu[6].user, 4, BP + 560, 80),
+		arr64(&r->cpu[7].user, 4, BP + 640, 80))
+
+_SHOW(MEM, mem, mem, "%u %u %u\n",
+		r->total, r->free, r->bufs)
+
+_SHOW(OS, os, os, "%llu %llu %llu %llu %u [%s]\n",
+		r->uptime, r->loads[0], r->loads[1], r->loads[2],
+		r->alen, arr32(r->apid, r->alen, BP, BL))
+
+
+/*
+ * Ensure caller's creditials is root on all 'set' files.
+ * Even though file creation mode should prevent writes?
+ *
+ *TBD:
+ * - How many of the 'store's are to be permitted?
+ */
+
+#define _STORE(op, nam) \
+  static ssize_t \
+  micras_store_##nam (struct class *class, \
+  		     struct class_attribute *attr, \
+		     const char *buf, \
+		     size_t count) \
+  { \
+    struct wq_task    * wq; \
+    size_t		ocount; \
+    uint32_t		val; \
+    int			err; \
+    char	      * ep; \
+\
+    if (current_euid() != 0) \
+      return -EPERM; \
+\
+    ocount = count; \
+    if (count && buf[count - 1] == '\n') \
+      ((char *) buf)[--count] = '\0'; \
+\
+    err = -EINVAL; \
+    if (count && *buf) { \
+      val = simple_strtoul(buf, &ep, 0); \
+      if (ep != buf && !*ep) { \
+        wq = kmalloc(sizeof(* wq), GFP_KERNEL); \
+        if (! wq) \
+          return -ENOMEM; \
+\
+        wq->req = MR_SET_##op; \
+        wq->fnc = (int (*)(void *)) mr_set_##nam; \
+        wq->ptr = (void *) &val; \
+        if (! micras_mt_tsk(wq)) \
+	  err = ocount; \
+        kfree(wq); \
+      } \
+    } \
+\
+    return err; \
+  }
+
+_STORE(CFREQ, freq)
+_STORE(CVOLT, volt)
+
+#if defined(CONFIG_MK1OM) || (defined(CONFIG_ML1OM) && USE_FSC)
+_STORE(PLIM, plim)
+#endif
+
+_STORE(FAN, fan)
+_STORE(TRC, trc)
+_STORE(TRBO, trbo)
+
+#ifdef CONFIG_MK1OM
+_STORE(LED, led)
+_STORE(PERST, perst)
+#endif
+
+
+/*
+ *TBD:
+ * - Remove entries clst, cutl, mem, and os.
+ *   Only included here for comparison with what cp/micinfo displays.
+ *   They really need to go.
+ */
+
+static struct class_attribute micras_attr[] = {
+  __ATTR(hwinf,   0444, micras_show_hwinf,  0),
+  __ATTR(vers,    0444, micras_show_vers,   0),
+  __ATTR(freq,    0644, micras_show_freq,   micras_store_freq),
+  __ATTR(volt,    0644, micras_show_volt,   micras_store_volt),
+#if defined(CONFIG_MK1OM) || (defined(CONFIG_ML1OM) && USE_FSC)
+  __ATTR(power,   0444, micras_show_power,  0),
+  __ATTR(plim,    0644, micras_show_plim,   micras_store_plim),
+#endif
+  __ATTR(clst,    0444, micras_show_clst,   0),
+  __ATTR(gddr,    0444, micras_show_gddr,   0),
+  __ATTR(gfreq,   0444, micras_show_gfreq,  0),
+  __ATTR(gvolt,   0444, micras_show_gvolt,  0),
+  __ATTR(fan,     0644, micras_show_fan,    micras_store_fan),
+  __ATTR(temp,    0444, micras_show_temp,   0),
+#ifdef CONFIG_MK1OM
+  __ATTR(ecc,     0444, micras_show_ecc,    0),
+#endif
+  __ATTR(trc,     0644, micras_show_trc,    micras_store_trc),
+  __ATTR(trbo,    0644, micras_show_trbo,   micras_store_trbo),
+#ifdef CONFIG_MK1OM
+  __ATTR(led,	  0644, micras_show_led,    micras_store_led),
+  __ATTR(prochot, 0444, micras_show_prochot, 0),
+  __ATTR(pwralt,  0444, micras_show_pwralt, 0),
+  __ATTR(perst,	  0644, micras_show_perst,  micras_store_perst),
+  __ATTR(ttl,	  0444, micras_show_ttl,    0),
+#endif
+  __ATTR(cutl,    0444, micras_show_cutl,   0),
+  __ATTR(mem,     0444, micras_show_mem,    0),
+  __ATTR(os,      0444, micras_show_os,     0),
+  __ATTR_NULL,
+};
+
+
+static struct class ras_class = {
+  .name           = "micras",
+  .owner	  = THIS_MODULE,
+  .class_attrs    = micras_attr,
+};
+
+
+
+/*
+**
+** SCIF interface & services are mostly handled here, including
+** all aspects of setting up and tearing down SCIF channels.
+** We create three listening SCIF sockets and create a workqueue
+** with the initial task of waiting for 'accept's to happen.
+**
+** When TTL or MC accept incoming connections, their workqueue
+** task spawns one thread just to detect if/when peer closes
+** the session and will block any further connects until thes
+** service thread terminates (peer closes session).
+** The TTL or MC event handler, executing in interrupt context,
+** will check for an open session and if one is present, deliver
+** their event record(s) on it by using scif_send().
+**
+** When CP accept incoming connections, its workqueue task spawns
+** a new thread to run a session with the peer and then proceeds
+** to accepting a new connection. Thus, there are no strict
+** bounds on number of incoming connections, but for internal
+** house-keeping sessions are limited to MR_SCIF_MAX (32).
+** Accepted requests from the peer are fulfilled through the
+** MT thread in a similar fashion as the sysctl interface, i.e.
+** though function micras_mt_tsk(), who guarantee synchronized
+** (serialized) access to MT core data and handle waits as needed.
+** Function pointers corresponding to request opcodes are found
+** by lookup in the fnc_map table.
+**
+** Note: This is not coded for maximum performance, since the
+**       use of the MT thread to serialize access to card data
+**	 has a cost of two task switches attached, both which
+**       may cause delays due to other system activity.
+*/
+
+
+static scif_epd_t		     micras_cp_lstn;	/* CP listener handle */
+static struct workqueue_struct *     micras_cp_wq;	/* CP listener thread */
+static atomic_t			     micras_cp_rst;	/* CP listener restart */
+static struct delayed_work	     micras_cp_tkn;	/* CP accept token */
+static DECLARE_BITMAP(micras_cp_fd, MR_SCIF_MAX);	/* CP free slots */
+static volatile struct scif_portID   micras_cp_si[MR_SCIF_MAX];	/* CP sessions */
+static volatile struct task_struct * micras_cp_kt[MR_SCIF_MAX];	/* CP threads */
+static volatile scif_epd_t 	     micras_cp_ep[MR_SCIF_MAX];	/* CP handles */
+
+static scif_epd_t		     micras_mc_lstn;	/* MC listener handle */
+static struct workqueue_struct     * micras_mc_wq;	/* MC listener thread */
+static struct delayed_work	     micras_mc_tkn;	/* MC accept token */
+static volatile struct task_struct * micras_mc_kt;	/* MC session */
+static volatile	scif_epd_t 	     micras_mc_ep;	/* MC handle */
+
+static scif_epd_t		     micras_ttl_lstn;	/* TTL listener handle */
+static struct workqueue_struct     * micras_ttl_wq;	/* TTL listener thread */
+static struct delayed_work	     micras_ttl_tkn;	/* TTL accept token */
+static volatile struct task_struct * micras_ttl_kt;	/* TTL session */
+static volatile	scif_epd_t 	     micras_ttl_ep;	/* TTL handle */
+
+
+/*
+ * SCIF CP session thread
+ */
+
+static int
+micras_cp_sess(void * _slot)
+{
+  struct wq_task      * wq;
+  struct mr_hdr		q, a;
+  scif_epd_t		ep;
+  uint32_t		slot;
+  void		      * buf;
+  uint64_t		start, stop;
+  int			blen, len, priv;
+
+  slot = (uint32_t)((uint64_t) _slot);
+  priv = (micras_cp_si[slot].port < 1024) ? 1 : 0;
+#if MT_VERBOSE
+  printk("Scif: CP session %d running%s\n", slot, priv ? " privileged" : "");
+#endif
+
+  /*
+   * Allocate local buffer from kernel
+   * Since the I/O buffers in SCIF is just one page,
+   * we'd never expect to need larger buffers here.
+   */
+  buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+  if (! buf) {
+    printk("Scif: CP scratch pad alloc failed\n");
+    return 0;
+  }
+
+  /*
+   * Allocate a work queue task for the MT thread
+   */
+  wq = kmalloc(sizeof(* wq), GFP_KERNEL);
+  if (! wq) {
+    printk("Scif: CP work task alloc failed\n");
+    goto cp_sess_end;
+  }
+
+  /*
+   * Start servicing MT protocol
+   */
+  ep = micras_cp_ep[slot];
+  for( ;; ) {
+
+    /*
+     * Get a message header
+     */
+    len = scif_recv(ep, &q, sizeof(q), SCIF_RECV_BLOCK);
+    start = rdtsc();
+    if (len < 0) {
+      if (len != -ECONNRESET)
+        printk("Scif: CP recv error %d\n", len);
+      goto cp_sess_end;
+    }
+    if (len != sizeof(q)) {
+      printk("Scif: CP short recv (%d), discarding\n", len);
+      continue;
+    }
+
+    /*
+     * Validate the query:
+     *  - known good opcode,
+     *  - expected length (zero)
+     *  - have callout in jump table
+     *  - check requestor's port ID on privileged opcodes.
+     *
+     *TBD: opcodes above MR_REQ_MAX is really only meant for
+     *     use by the PM module. Should it be host accessible?
+     */
+    blen = 0;
+    if (q.cmd < MR_REQ_HWINF ||
+#if defined(CONFIG_MK1OM) && USE_PM
+	q.cmd > PM_REQ_MAX
+#else
+	q.cmd > MR_REQ_MAX
+#endif
+       ) {
+      printk("Scif: CP opcode %d invalid\n", q.cmd);
+      blen = -MR_ERR_INVOP;
+    }
+    else
+    if (q.len != 0) {
+      printk("Scif: CP command length %d invalid\n", q.len);
+      blen = -MR_ERR_INVLEN;
+    }
+    else
+    if (! fnc_map[q.cmd].fnc) {
+      printk("Scif: CP opcode %d un-implemented\n", q.cmd);
+      blen = -MR_ERR_UNSUP;
+    }
+    else
+    if (fnc_map[q.cmd].privileged && !priv) {
+      printk("Scif: CP opcode %d privileged, remote %d:%d\n",
+		q.cmd, micras_cp_si[slot].node, micras_cp_si[slot].port);
+      blen = -MR_ERR_PERM;
+    }
+
+    /*
+     *TBD: If there is an error at this point, it might
+     *     be a good idea to drain the SCIF channel.
+     *     If garbage has entered the channel somehow,
+     *     then how else can we get in sync such that
+     *     next recv really is a command header?
+     *     More radical solution is closing this session.
+     */
+
+    /*
+     * If header is OK (blen still zero) then pass
+     * a work queue item to MT and wait for response.
+     * The result will end up in buf (payload for response)
+     * or an error code that can be sent back to requestor.
+     * Since we don't want to care about whether it is a
+     * get or set command here, the 'parm' value is copied
+     * into buf prior to passing the work item to MT.
+     * Thus, functions expecting an 'uint32_t *' to
+     * point to a new value will be satisfied.
+     */
+    if (blen == 0) {
+      if (fnc_map[q.cmd].simple) {
+        *((uint32_t *) buf) = q.parm;
+        blen = fnc_map[q.cmd].fnc(buf);
+      }
+      else {
+        memset(wq, '\0', sizeof(*wq));
+        wq->req = q.cmd;
+	wq->priv = priv;
+        wq->fnc = (int (*)(void *)) fnc_map[q.cmd].fnc;
+        wq->ptr = buf;
+        *((uint32_t *) buf) = q.parm;
+        blen = micras_mt_tsk(wq);
+      }
+    }
+    stop = rdtsc();
+
+    /*
+     * Craft response header
+     */
+    a.cmd = q.cmd | MR_RESP;
+    if (blen < 0) {
+      /*
+       * MT thread reported a failure.
+       * Set error bit and make error record in buf
+       */
+      a.cmd |= MR_ERROR;
+      ((struct mr_err *) buf)->err = -blen;
+      ((struct mr_err *) buf)->len = 0;
+      a.len = sizeof(struct mr_err);
+    }
+    else {
+      /*
+       * Payload size is set by call-out
+       */
+      a.len = blen;
+    }
+    a.stamp = q.stamp;
+    a.spent = stop - start;
+
+    /*
+     * Send response header (always)
+     */
+    len = scif_send(ep, &a, sizeof(a), SCIF_SEND_BLOCK);
+    if (len < 0) {
+      printk("Scif: header send error %d\n", len);
+      goto cp_sess_end;
+    }
+    if (len != sizeof(a)) {
+      printk("Scif: CP short header send (%d of %lu)\n", len, sizeof(a));
+      goto cp_sess_end;
+    }
+
+    /*
+     * Send payload (if any, defined by a.len)
+     */
+    if (a.len > 0) {
+      len = scif_send(ep, buf, a.len, SCIF_SEND_BLOCK);
+      if (len < 0) {
+        printk("Scif: CP payload send error %d\n", len);
+        goto cp_sess_end;
+      }
+      if (len != a.len) {
+        printk("Scif: CP short payload send (%d of %d)\n", len, a.len);
+        goto cp_sess_end;
+      }
+    }
+
+  }
+
+cp_sess_end:
+  if (wq)
+    kfree(wq);
+  if (buf)
+    kfree(buf);
+  ep = (scif_epd_t) atomic64_xchg((atomic64_t *)(micras_cp_ep + slot), 0);
+  if (ep)
+    scif_close(ep);
+  micras_cp_kt[slot] = 0;
+  set_bit(slot, micras_cp_fd);
+#if MT_VERBOSE
+  printk("Scif: CP session %d terminated, sess mask %lx\n", slot, micras_cp_fd[0]);
+#endif
+
+  if (atomic_xchg(&micras_cp_rst, 0)) {
+    printk("Scif: resume listener\n");
+    queue_delayed_work(micras_cp_wq, &micras_cp_tkn, 0);
+  }
+
+  return 0;
+}
+
+
+/*
+ * SCIF CP session launcher
+ */
+
+static void
+micras_cp(struct work_struct * work)
+{
+  struct task_struct  * thr;
+  scif_epd_t		sess_ep;
+  struct scif_portID	sess_id;
+  int			slot;
+  int			err;
+
+  /*
+   * Wait for somebody to connect to us
+   * We stop listening on any error whatsoever
+   */
+  err = scif_accept(micras_cp_lstn, &sess_id, &sess_ep, SCIF_ACCEPT_SYNC);
+  if (err == -EINTR) {
+    printk("Scif: CP accept interrupted, error %d\n", err);
+    return;
+  }
+  if (err < 0) {
+    printk("Scif: CP accept failed, error %d\n", err);
+    return;
+  }
+#if MT_VERBOSE
+  printk("Scif: CP accept: remote %d:%d, local %d:%d\n",
+  		sess_id.node, sess_id.port,
+		micras_cp_lstn->port.node, micras_cp_lstn->port.port);
+#endif
+
+  /*
+   * Spawn a new thread to run session with connecting peer
+   * We support only a limited number of connections, so first
+   * get a free "slot" for this session.
+   * The use of non-atomic ffs() below is safe as long as this
+   * function is never run by more than one thread at a time
+   * and all other manipulations of micras_cp_fd are atomic.
+   */
+  slot = find_first_bit(micras_cp_fd, MR_SCIF_MAX);
+  if (slot < MR_SCIF_MAX) {
+    if (micras_cp_kt[slot] || micras_cp_ep[slot]) {
+      printk("Scif: CP slot %d busy (bug)\n", slot);
+      return;
+    }
+
+    clear_bit(slot, micras_cp_fd);
+    micras_cp_ep[slot] = sess_ep;
+    micras_cp_si[slot] = sess_id;
+    thr = kthread_create(micras_cp_sess, (void *)(uint64_t) slot, "RAS CP svc %d", slot);
+    if (IS_ERR(thr)) {
+      printk("Scif: CP service thread creation failed\n");
+      scif_close(sess_ep);
+      micras_cp_ep[slot] = 0;
+      set_bit(slot, micras_cp_fd);
+      return;
+    }
+    micras_cp_kt[slot] = thr;
+#if MT_VERBOSE
+    printk("Scif: CP session %d launched, pid %d\n", slot, thr->pid);
+#endif
+    wake_up_process(thr);
+  }
+  else {
+    printk("Scif: No open session slots, closing session\n");
+    scif_close(sess_ep);
+  }
+
+  /*
+   * Keep listening until session limit reached.
+   */
+  if (bitmap_weight(micras_cp_fd, MR_SCIF_MAX))
+    queue_delayed_work(micras_cp_wq, &micras_cp_tkn, 0);
+  else {
+    printk("Scif: CP connection limit reached\n");
+    atomic_xchg(&micras_cp_rst, 1);
+  }
+}
+
+
+/*
+ * SCIF MC session thread
+ */
+
+static int
+micras_mc_sess(void * dummy)
+{
+  scif_epd_t		ep;
+  char			buf[8];
+  int			len;
+
+#if MC_VERBOSE
+  printk("Scif: MC session running\n");
+#endif
+
+  /*
+   * Start servicing.
+   * This is just to get indication if peer closes connection
+   */
+  for( ;; ) {
+    /*
+     * Sync with kernel MC event log.
+     */
+    mcc_sync();
+
+    /*
+     * Try read 1 byte from host (turns into a wait-point
+     * keeping the connection open till host closes it)
+     */
+    len = scif_recv(micras_mc_ep, buf, 1, SCIF_RECV_BLOCK);
+    if (len < 0) {
+      if (len != -ECONNRESET)
+        printk("Scif: MC recv error %d\n", len);
+      goto mc_sess_end;
+    }
+
+    /*
+     * Ignore any received content.
+     */
+  }
+
+mc_sess_end:
+  ep = (scif_epd_t) atomic64_xchg((atomic64_t *) &micras_mc_ep, 0);
+  if (ep)
+    scif_close(ep);
+  micras_mc_kt = 0;
+#if MC_VERBOSE
+  printk("Scif: MC session terminated\n");
+#endif
+  return 0;
+}
+
+
+/*
+ * SCIF MC session launcher
+ */
+
+static void
+micras_mc(struct work_struct * work)
+{
+  struct task_struct  * thr;
+  scif_epd_t		sess_ep;
+  struct scif_portID	sess_id;
+  int			err;
+
+  /*
+   * Wait for somebody to connect to us
+   * We stop listening on any error whatsoever
+   */
+  err = scif_accept(micras_mc_lstn, &sess_id, &sess_ep, SCIF_ACCEPT_SYNC);
+  if (err == -EINTR) {
+    printk("Scif: MC accept interrupted, error %d\n", err);
+    return;
+  }
+  if (err < 0) {
+    printk("Scif: MC accept failed, error %d\n", err);
+    return;
+  }
+#if MC_VERBOSE
+  printk("Scif: MC accept: remote %d:%d, local %d:%d\n",
+  		sess_ep->peer.node, sess_ep->peer.port,
+		sess_ep->port.node, sess_ep->port.port);
+#endif
+
+  /*
+   * Spawn a new thread to run session with connecting peer
+   * We support only one connection, so if one already is
+   * running this one will be rejected.
+   */
+  if (! micras_mc_ep) {
+    micras_mc_ep = sess_ep;
+    thr = kthread_create(micras_mc_sess, 0, "RAS MC svc");
+    if (IS_ERR(thr)) {
+      printk("Scif: MC service thread creation failed\n");
+      scif_close(sess_ep);
+      micras_mc_ep = 0;
+      return;
+    }
+    micras_mc_kt = thr;
+    wake_up_process(thr);
+  }
+  else {
+    printk("Scif: MC connection limit reached\n");
+    scif_close(sess_ep);
+  }
+
+  /*
+   * Keep listening
+   */
+  queue_delayed_work(micras_mc_wq, &micras_mc_tkn, 0);
+}
+
+
+/*
+ * Ship a pre-packaged machine check event record to host
+ */
+
+#ifndef SCIF_BLAST
+#define SCIF_BLAST	2
+#endif
+
+int
+micras_mc_send(struct mce_info * mce, int exc)
+{
+  if (micras_mc_ep) {
+    int		err;
+
+#if ADD_DIE_TEMP
+    err = mr_sbox_rl(0, SBOX_THERMAL_STATUS_2);
+    mce->flags |= PUT_BITS(15, 8, GET_BITS(19, 10, err));
+#endif
+
+    if (exc) {
+      /*
+       * Exception context SCIF access, can't sleep and can't
+       * wait on spinlocks either. May be detrimental to
+       * other scif communications, but this _is_ an emergency
+       * and we _do_ need to ship this message to the host.
+       */
+      err = scif_send(micras_mc_ep, mce, sizeof(*mce), SCIF_BLAST);
+      if (err != sizeof(*mce))
+        ee_printk("micras_mc_send: scif_send failed, err %d\n", err);
+    }
+    else {
+      /*
+       * Thread context SCIF access.
+       * Just send message.
+       */
+      err = scif_send(micras_mc_ep, mce, sizeof(*mce), SCIF_SEND_BLOCK);
+      if (err != sizeof(*mce))
+        printk("micras_mc_send: scif_send failed, err %d\n", err);
+    }
+    return err == sizeof(*mce);
+  }
+  return 0;
+}
+
+
+/*
+ * SCIF TTL session thread
+ */
+
+static int
+micras_ttl_sess(void * dummy)
+{
+  scif_epd_t		ep;
+  char			buf[8];
+  int			len;
+
+#if PM_VERBOSE
+  printk("Scif: TTL session running\n");
+#endif
+
+  /*
+   * Start servicing.
+   * This is just to get indication if peer closes connection
+   */
+  for( ;; ) {
+    /*
+     * Try read 1 byte from host (turns into a wait-point
+     * keeping the connection open till host closes it)
+     */
+    len = scif_recv(micras_ttl_ep, buf, 1, SCIF_RECV_BLOCK);
+    if (len < 0) {
+      if (len != -ECONNRESET)
+        printk("Scif: TTL recv error %d\n", len);
+      goto ttl_sess_end;
+    }
+
+    /*
+     * Ignore any received content.
+     */
+  }
+
+ttl_sess_end:
+  ep = (scif_epd_t) atomic64_xchg((atomic64_t *) &micras_ttl_ep, 0);
+  if (ep)
+    scif_close(ep);
+  micras_ttl_kt = 0;
+#if PM_VERBOSE
+  printk("Scif: TTL session terminated\n");
+#endif
+  return 0;
+}
+
+
+/*
+ * SCIF TTL session launcher
+ */
+
+static void
+micras_ttl(struct work_struct * work)
+{
+  struct task_struct  * thr;
+  scif_epd_t		sess_ep;
+  struct scif_portID	sess_id;
+  int			err;
+
+  /*
+   * Wait for somebody to connect to us
+   * We stop listening on any error whatsoever
+   */
+  err = scif_accept(micras_ttl_lstn, &sess_id, &sess_ep, SCIF_ACCEPT_SYNC);
+  if (err == -EINTR) {
+    printk("Scif: TTL accept interrupted, error %d\n", err);
+    return;
+  }
+  if (err < 0) {
+    printk("Scif: TTL accept failed, error %d\n", err);
+    return;
+  }
+#if PM_VERBOSE
+  printk("Scif: TTL accept: remote %d:%d, local %d:%d\n",
+  		sess_ep->peer.node, sess_ep->peer.port,
+		sess_ep->port.node, sess_ep->port.port);
+#endif
+
+  /*
+   * Spawn a new thread to run session with connecting peer
+   * We support only one connection, so if one already is
+   * running this one will be rejected.
+   */
+  if (! micras_ttl_ep) {
+    micras_ttl_ep = sess_ep;
+    thr = kthread_create(micras_ttl_sess, 0, "RAS TTL svc");
+    if (IS_ERR(thr)) {
+      printk("Scif: TTL service thread creation failed\n");
+      scif_close(sess_ep);
+      micras_ttl_ep = 0;
+      return;
+    }
+    micras_ttl_kt = thr;
+    wake_up_process(thr);
+  }
+  else {
+    printk("Scif: TTL connection limit reached\n");
+    scif_close(sess_ep);
+  }
+
+  /*
+   * Keep listening
+   */
+  queue_delayed_work(micras_ttl_wq, &micras_ttl_tkn, 0);
+}
+
+
+/*
+ * Ship a pre-packaged throttle event record to host
+ */
+
+void
+micras_ttl_send(struct ttl_info * ttl)
+{
+  static struct ttl_info split_rec;
+  static int	 	 split_rem;
+  int			err;
+  char		      * cp;
+
+  if (micras_ttl_ep) {
+
+    if (split_rem) {
+      cp = ((char *) &split_rec) + (sizeof(*ttl) - split_rem);
+      err = scif_send(micras_ttl_ep, cp, split_rem, 0);
+      if (err == split_rem) {
+	/*
+	 * Tx of pendig buffer complete
+	 */
+        split_rem = 0;
+      }
+      else {
+        if (err < 0) {
+          /*
+	   * SCIF failed squarely, just drop the message.
+	   * TBD: close end point?
+	   */
+        }
+        else {
+          /*
+	   * Another partial send
+	   */
+	  split_rem -= err;
+        }
+      }
+    }
+
+    if (! split_rem) {
+      /*
+       * Send message
+       */
+      err = scif_send(micras_ttl_ep, ttl, sizeof(*ttl), 0);
+      if (err != sizeof(*ttl)) {
+        /*
+	 * Did not send all the message
+	 */
+	if (err < 0) {
+	  /*
+	   * SCIF failed squarely, drop the message.
+	   * TBD: close end point?
+	   */
+	}
+	else {
+	  split_rec = *ttl;
+	  split_rem = sizeof(*ttl) - err;
+        }
+      }
+    }
+  }
+}
+
+
+
+/*
+**
+** MMIO regions used by RAS module
+** Until some common strategy on access to BOXes and other CSRs
+** we'll map them ourselves. All MMIO accesses are performed
+** through 32 bit unsigned integers, but a 64 bit abstraction
+** is provided for convenience (low 32 bit done first).
+**
+** We need access to the SBOX, all GBOXs, TBOXs and DBOXs.
+**
+** Note: I2C driver code for exception context in micras_elog.c
+**       has its own set of I/O routines in order to allow
+**       separate debugging.
+**      
+*/
+
+uint8_t       * micras_sbox;		/* SBOX mmio region */
+uint8_t       * micras_dbox[DBOX_NUM];	/* DBOX mmio region */
+uint8_t       * micras_gbox[GBOX_NUM];	/* GBOX mmio regions */
+#ifdef CONFIG_MK1OM
+uint8_t       * micras_tbox[TBOX_NUM];	/* TBOX mmio regions */
+#endif
+
+/*
+ * Specials: some defines are currently missing
+ */
+
+#ifdef CONFIG_MK1OM
+#define	DBOX1_BASE		0x0800620000ULL
+
+#define	GBOX4_BASE		0x08006D0000ULL
+#define	GBOX5_BASE		0x08006C0000ULL
+#define	GBOX6_BASE		0x08006B0000ULL
+#define	GBOX7_BASE		0x08006A0000ULL
+#endif
+
+
+/*
+ * MMIO I/O dumpers (for debug)
+ * Exception mode code needs to use the ee_print dumpers
+ * because printk is not safe to use (works most of the time
+ * though, but may hang the system eventually).
+ */
+#if 0
+#if 0
+extern atomic_t	pxa_block;
+#define RL	if (! atomic_read(&pxa_block)) ee_print("%s: %4x -> %08x\n",    __FUNCTION__, roff, val)
+#define RQ	if (! atomic_read(&pxa_block)) ee_print("%s: %4x -> %016llx\n", __FUNCTION__, roff, val)
+#define WL	if (! atomic_read(&pxa_block)) ee_print("%s: %4x <- %08x\n",    __FUNCTION__, roff, val)
+#define WQ	if (! atomic_read(&pxa_block)) ee_print("%s: %4x <- %016llx\n", __FUNCTION__, roff, val)
+#else
+#define RL	printk("%s: %4x -> %08x\n",    __FUNCTION__, roff, val)
+#define RQ	printk("%s: %4x -> %016llx\n", __FUNCTION__, roff, val)
+#define WL	printk("%s: %4x <- %08x\n",    __FUNCTION__, roff, val)
+#define WQ	printk("%s: %4x <- %016llx\n", __FUNCTION__, roff, val)
+#endif
+#else
+#define RL	/* As nothing */
+#define RQ	/* As nothing */
+#define WL	/* As nothing */
+#define WQ	/* As nothing */
+#endif
+
+
+/*
+ * SBOX MMIO I/O routines
+ *  mr_sbox_base	Return SBOX MMIO region
+ *  mr_sbox_rl		Read 32-bit register
+ *  mr_sbox_rq		Read 64-bit register (really two 32-bit reads)
+ *  mr_sbox_wl		Write 32-bit register
+ *  mr_sbox_wq		Write 64-bit register (really two 32-bit writes)
+ */
+
+#if NOT_YET
+uint8_t *
+mr_sbox_base(int dummy)
+{
+  return micras_sbox;
+}
+#endif
+
+uint32_t
+mr_sbox_rl(int dummy, uint32_t roff)
+{
+  uint32_t	val;
+
+  val = * (volatile uint32_t *)(micras_sbox + roff);
+  RL;
+  return val;
+}
+
+uint64_t
+mr_sbox_rq(int dummy, uint32_t roff)
+{
+  uint32_t	hi, lo;
+  uint64_t	val;
+
+  lo = * (volatile uint32_t *)(micras_sbox + roff);
+  hi = * (volatile uint32_t *)(micras_sbox + roff + 4);
+  val = ((uint64_t) hi << 32) | (uint64_t) lo;
+  RQ;
+  return val;
+}
+
+void
+mr_sbox_wl(int dummy, uint32_t roff, uint32_t val)
+{
+  WL;
+  * (volatile uint32_t *)(micras_sbox + roff) = val;
+}
+
+void
+mr_sbox_wq(int dummy, uint32_t roff, uint64_t val)
+{
+  uint32_t	hi, lo;
+
+  WQ;
+  lo = val;
+  hi = val >> 32;
+
+  * (volatile uint32_t *)(micras_sbox + roff) = lo;
+  * (volatile uint32_t *)(micras_sbox + roff + 4) = hi;
+}
+
+
+/*
+ * DBOX MMIO I/O routines
+ *  mr_dbox_base	Return DBOX MMIO region
+ *  mr_dbox_rl		Read 32-bit register
+ *  mr_dbox_rq		Read 64-bit register (really two 32-bit reads)
+ *  mr_dbox_wl		Write 32-bit register
+ *  mr_dbox_wq		Write 64-bit register (really two 32-bit writes)
+ */
+
+#if NOT_YET
+uint8_t *
+mr_dbox_base(int unit)
+{
+  return micras_dbox[unit];
+}
+#endif
+
+uint32_t
+mr_dbox_rl(int unit, uint32_t roff)
+{
+  uint32_t	val;
+
+  val = * (volatile uint32_t *)(micras_dbox[unit] + roff);
+  RL;
+  return val;
+}
+
+uint64_t
+mr_dbox_rq(int unit, uint32_t roff)
+{
+  uint32_t	hi, lo;
+  uint64_t	val;
+
+  lo = * (volatile uint32_t *)(micras_dbox[unit] + roff);
+  hi = * (volatile uint32_t *)(micras_dbox[unit] + roff + 4);
+  val = ((uint64_t) hi << 32) | (uint64_t) lo;
+  RQ;
+  return val;
+}
+
+void
+mr_dbox_wl(int unit, uint32_t roff, uint32_t val)
+{
+  WL;
+  * (volatile uint32_t *)(micras_dbox[unit] + roff) = val;
+}
+
+void
+mr_dbox_wq(int unit, uint32_t roff, uint64_t val)
+{
+  uint32_t	hi, lo;
+
+  WQ;
+  lo = val;
+  hi = val >> 32;
+
+  * (volatile uint32_t *)(micras_dbox[unit] + roff) = lo;
+  * (volatile uint32_t *)(micras_dbox[unit] + roff + 4) = hi;
+}
+
+
+/*
+ * GBOX MMIO I/O routines
+ *  mr_gbox_base	Return GBOX MMIO region
+ *  mr_gbox_rl		Read 32-bit register
+ *  mr_gbox_rq		Read 64-bit register (really two 32-bit reads)
+ *  mr_gbox_wl		Write 32-bit register
+ *  mr_gbox_wq		Write 64-bit register (really two 32-bit writes)
+ *
+ * Due to a Si bug, MMIO writes can be dropped by the GBOXs
+ * during heavy DMA activity (HSD #4844222). The risk of it
+ * happening is low enough that a 'repeat until it sticks'
+ * workaround is sufficient. No 'read' issues so far.
+ *
+ *TBD: Ramesh asked that GBOX MMIOs check for sleep states.
+ *     Not sure how to do that, but here is a good spot to
+ *     add such check, as all GBOX access comes thru here.
+ */
+
+#if NOT_YET
+uint8_t *
+mr_gbox_base(int unit)
+{
+  return micras_gbox[unit];
+}
+#endif
+
+uint32_t
+mr_gbox_rl(int unit, uint32_t roff)
+{
+  uint32_t	val;
+
+  val = * (volatile uint32_t *)(micras_gbox[unit] + roff);
+  RL;
+  return val;
+}
+
+uint64_t
+mr_gbox_rq(int unit, uint32_t roff)
+{
+  uint32_t	hi, lo;
+  uint64_t	val;
+
+  lo = * (volatile uint32_t *)(micras_gbox[unit] + roff);
+  if (roff == 0x5c) {
+    /*
+     * Instead of placing HI part of MCA_STATUS
+     * at 0x60 to form a natural 64-bit register,
+     * it located at 0xac, against all conventions.
+     */
+    hi = * (volatile uint32_t *)(micras_gbox[unit] + 0xac);
+  }
+  else
+    hi = * (volatile uint32_t *)(micras_gbox[unit] + roff + 4);
+  val = ((uint64_t) hi << 32) | (uint64_t) lo;
+  RQ;
+  return val;
+}
+
+void
+mr_gbox_wl(int unit, uint32_t roff, uint32_t val)
+{
+#if !GBOX_WORKING
+  {
+    int	rpt;
+    uint32_t rb;
+
+    /*
+     * Due to bug HSD 4844222 loop until value sticks
+     */
+    for(rpt = 10; rpt-- ; ) {
+#endif
+
+      WL;
+      * (volatile uint32_t *)(micras_gbox[unit] + roff) = val;
+
+#if !GBOX_WORKING
+      rb = mr_gbox_rl(unit, roff);
+      if (rb == val)
+        break;
+    }
+  }
+#endif
+}
+
+void
+mr_gbox_wq(int unit, uint32_t roff, uint64_t val)
+{
+  uint32_t	hi, lo;
+
+  lo = val;
+  hi = val >> 32;
+
+#if !GBOX_WORKING
+  {
+    int	rpt;
+    uint64_t rb;
+
+    /*
+     * Due to bug HSD 4844222 loop until value sticks
+     * Note: this may result in bad things happening if
+     *       wrinting to a MMIO MCA STATUS register
+     *	     since there is a non-zero chance that the
+     *       NMI handler can fire and change the register
+     *       inside this loop. Require that the caller
+     *       is on same CPU as the NMI handler (#0).
+     */
+    for(rpt = 10; rpt-- ; ) {
+#endif
+
+      WQ;
+      * (volatile uint32_t *)(micras_gbox[unit] + roff) = lo;
+      if (roff == 0x5c) {
+        /*
+         * Instead of placing HI part of MCA_STATUS
+         * at 0x60 to form a natural 64-bit register,
+         * it located at 0xac, against all conventions.
+         */
+        * (volatile uint32_t *)(micras_gbox[unit] + 0xac) = hi;
+      }
+      else
+        * (volatile uint32_t *)(micras_gbox[unit] + roff + 4) = hi;
+
+#if !GBOX_WORKING
+      rb = mr_gbox_rq(unit, roff);
+      if (rb == val)
+        break;
+    }
+  }
+#endif
+}
+
+
+#ifdef CONFIG_MK1OM
+/*
+ * TBOX MMIO I/O routines
+ *  mr_tbox_base	Return TBOX MMIO region
+ *  mr_tbox_rl		Read 32-bit register
+ *  mr_tbox_rq		Read 64-bit register (really two 32-bit reads)
+ *  mr_tbox_wl		Write 32-bit register
+ *  mr_tbox_wq		Write 64-bit register (really two 32-bit writes)
+ *
+ * Some SKUs don't have TBOXs, in which case the
+ * micras_tbox array will contain null pointers.
+ * We do not test for this here, but expect that
+ * caller either know what he's doing or consult
+ * the mr_tbox_base() function first.
+ */
+
+#if NOT_YET
+uint8_t *
+mr_tbox_base(int unit)
+{
+  return micras_tbox[unit];
+}
+#endif
+
+uint32_t
+mr_tbox_rl(int unit, uint32_t roff)
+{
+  uint32_t	val;
+
+  val = * (volatile uint32_t *)(micras_tbox[unit] + roff);
+  RL;
+  return val;
+}
+
+uint64_t
+mr_tbox_rq(int unit, uint32_t roff)
+{
+  uint32_t	hi, lo;
+  uint64_t	val;
+
+  lo = * (volatile uint32_t *)(micras_tbox[unit] + roff);
+  hi = * (volatile uint32_t *)(micras_tbox[unit] + roff + 4);
+  val = ((uint64_t) hi << 32) | (uint64_t) lo;
+  RQ;
+  return val;
+}
+
+void
+mr_tbox_wl(int unit, uint32_t roff, uint32_t val)
+{
+  WL;
+  * (volatile uint32_t *)(micras_tbox[unit] + roff) = val;
+}
+
+void
+mr_tbox_wq(int unit, uint32_t roff, uint64_t val)
+{
+  uint32_t	hi, lo;
+
+  WQ;
+  lo = val;
+  hi = val >> 32;
+
+  * (volatile uint32_t *)(micras_tbox[unit] + roff) = lo;
+  * (volatile uint32_t *)(micras_tbox[unit] + roff + 4) = hi;
+}
+#endif
+
+
+
+/*
+**
+** SMP utilities for CP and MC.
+** The kernel offers routines for MSRs, but as far
+** as I could find then there isn't any for some
+** CPU registers we need, like CR4.
+**
+**  rd_cr4_on_cpu        Read a CR4 value on CPU
+**  set_in_cr4_on_cpu    Set bits in CR4 on a CPU
+**  clear_in_cr4_on_cpu  Guess...
+**  rdtsc		 Read time stamp counter
+**
+**TBD: Special case when CPU happens to be current?
+*/
+
+#if NOT_YET
+static void
+_rd_cr4_on_cpu(void * p)
+{
+  *((uint32_t *) p) = read_cr4();
+}
+
+uint32_t
+rd_cr4_on_cpu(int cpu)
+{
+  uint32_t	cr4;
+
+  smp_call_function_single(cpu, _rd_cr4_on_cpu, &cr4, 1);
+  return cr4;
+}
+
+static void
+_set_in_cr4_on_cpu(void * p)
+{
+  uint32_t	cr4;
+
+  cr4 = read_cr4();
+  cr4 |= * (uint32_t *) p;
+  write_cr4(cr4);
+}
+
+void
+set_in_cr4_on_cpu(int cpu, uint32_t m)
+{
+  smp_call_function_single(cpu, _set_in_cr4_on_cpu, &m, 1);
+}
+
+static void
+_clear_in_cr4_on_cpu(void * p)
+{
+  uint32_t	cr4;
+
+  cr4 = read_cr4();
+  cr4 &= ~ *(uint32_t *) p;
+  write_cr4(cr4);
+}
+
+void
+clear_in_cr4_on_cpu(int cpu, uint32_t m)
+{
+  smp_call_function_single(cpu, _clear_in_cr4_on_cpu, &m, 1);
+}
+#endif
+
+uint64_t
+rdtsc(void) {
+  uint32_t lo, hi;
+  __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+  return ((uint64_t) hi) << 32 | lo;
+}
+
+
+
+/*
+**
+** Module load/unload logic
+**
+*/
+
+
+/*
+ * Startup job (run by MT thread)
+ * Intended to handle tasks that cannot impact
+ * module load status, such as kicking off service
+ * work queues, etc.
+ */
+
+static void
+micras_init2(struct work_struct * work)
+{
+  /*
+   * Make MT one-time setup and kick
+   * off 1 sec timer and SCIF listeners
+   */
+  if (! micras_stop) {
+
+    INIT_DELAYED_WORK(&micras_wq_tick, micras_mt_tick);
+    queue_delayed_work(micras_wq, &micras_wq_tick, msecs_to_jiffies(5000));
+
+    bitmap_fill(micras_cp_fd, MR_SCIF_MAX);
+    INIT_DELAYED_WORK(&micras_cp_tkn, micras_cp);
+    queue_delayed_work(micras_cp_wq, &micras_cp_tkn, 0);
+
+    INIT_DELAYED_WORK(&micras_mc_tkn, micras_mc);
+    queue_delayed_work(micras_mc_wq, &micras_mc_tkn, 0);
+
+    INIT_DELAYED_WORK(&micras_ttl_tkn, micras_ttl);
+    queue_delayed_work(micras_ttl_wq, &micras_ttl_tkn, 0);
+
+#if defined(CONFIG_MK1OM) && WA_4845465 && DIE_PROC
+    if (smc_4845465)
+      die_pe = proc_create("die", 0644, 0, &proc_die_operations);
+#endif
+    
+    printk("RAS.init: module operational\n");
+    module_put(THIS_MODULE);
+  }
+}
+
+
+static int __init
+micras_init(void)
+{
+  int		i;
+  int		err;
+
+  printk("Loading RAS module ver %s. Build date: %s\n", RAS_VER, __DATE__);
+
+  /*
+   * Create work queue for the monitoring thread
+   * and pass it some initial work to start with.
+   */
+#if defined(CONFIG_MK1OM) && WA_4845465
+  micras_wq = alloc_workqueue("RAS MT", WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+#else
+  micras_wq = create_singlethread_workqueue("RAS MT");
+#endif
+  if (! micras_wq) {
+    err = -ESRCH;
+    printk("RAS.init: cannot start work queue, error %d\n", err);
+    goto fail_wq;
+  }
+
+  /*
+   * Register top sysfs class (directory) and attach attributes (files)
+   * beneath it. No 'device's involved.
+   */
+  err = class_register(&ras_class);
+  if (err) {
+    printk("RAS.init: cannot register class 'micras', error %d\n", err);
+    goto fail_class;
+  }
+
+  /*
+   * Setup CP SCIF port in listening mode
+   */
+  micras_cp_lstn = scif_open();
+  if (! micras_cp_lstn) {
+    printk("RAS.init: cannot get SCIF CP endpoint\n");
+    goto fail_cp;
+  }
+  err = scif_bind(micras_cp_lstn, MR_MON_PORT);
+  if (err < 0) {
+    printk("RAS.init: cannot bind SCIF CP endpoint, error %d\n", err);
+    goto fail_cp_ep;
+  }
+  err = scif_listen(micras_cp_lstn, MR_SCIF_MAX);
+  if (err < 0) {
+    printk("RAS.init: cannot make SCIF CP listen, error %d\n", err);
+    goto fail_cp_ep;
+  }
+  micras_cp_wq = create_singlethread_workqueue("RAS CP listen");
+  if (! micras_cp_wq) {
+    err = -ESRCH;
+    printk("RAS.init: cannot start CP listener work queue, error %d\n", err);
+    goto fail_cp_ep;
+  }
+
+  /*
+   * Setup MC SCIF port in listening mode
+   */
+  micras_mc_lstn = scif_open();
+  if (! micras_mc_lstn) {
+    printk("RAS.init: cannot get SCIF MC endpoint\n");
+    goto fail_mc;
+  }
+  err = scif_bind(micras_mc_lstn, MR_MCE_PORT);
+  if (err < 0) {
+    printk("RAS.init: cannot bind SCIF MC endpoint, error %d\n", err);
+    goto fail_mc_ep;
+  }
+  err = scif_listen(micras_mc_lstn, MR_SCIF_MAX);
+  if (err < 0) {
+    printk("RAS.init: cannot make SCIF MC listen, error %d\n", err);
+    goto fail_mc_ep;
+  }
+  micras_mc_wq = create_singlethread_workqueue("RAS MC listen");
+  if (! micras_mc_wq) {
+    err = -ESRCH;
+    printk("RAS.init: cannot start listener work queue, error %d\n", err);
+    goto fail_mc_ep;
+  }
+
+  /*
+   * Setup TTL SCIF port in listening mode
+   */
+  micras_ttl_lstn = scif_open();
+  if (! micras_ttl_lstn) {
+    printk("RAS.init: cannot get SCIF TTL endpoint\n");
+    goto fail_ttl;
+  }
+  err = scif_bind(micras_ttl_lstn, MR_TTL_PORT);
+  if (err < 0) {
+    printk("RAS.init: cannot bind SCIF TTL endpoint, error %d\n", err);
+    goto fail_ttl_ep;
+  }
+  err = scif_listen(micras_ttl_lstn, MR_SCIF_MAX);
+  if (err < 0) {
+    printk("RAS.init: cannot make SCIF TTL listen, error %d\n", err);
+    goto fail_ttl_ep;
+  }
+  micras_ttl_wq = create_singlethread_workqueue("RAS TTL listen");
+  if (! micras_ttl_wq) {
+    err = -ESRCH;
+    printk("RAS.init: cannot start listener work queue, error %d\n", err);
+    goto fail_ttl_ep;
+  }
+
+  /*
+   * Make the MMIO maps we need.
+   */
+  micras_sbox = ioremap(SBOX_BASE, COMMON_MMIO_BOX_SIZE);
+  if (! micras_sbox)
+    goto fail_iomap;
+
+  micras_dbox[0] = ioremap(DBOX0_BASE, COMMON_MMIO_BOX_SIZE);
+  if (! micras_dbox[0])
+    goto fail_iomap;
+
+#ifdef CONFIG_MK1OM
+  micras_dbox[1] = ioremap(DBOX1_BASE, COMMON_MMIO_BOX_SIZE);
+  if (! micras_dbox[1])
+    goto fail_iomap;
+#endif
+
+  micras_gbox[0] = ioremap(GBOX0_BASE, COMMON_MMIO_BOX_SIZE);
+  micras_gbox[1] = ioremap(GBOX1_BASE, COMMON_MMIO_BOX_SIZE);
+  micras_gbox[2] = ioremap(GBOX2_BASE, COMMON_MMIO_BOX_SIZE);
+  micras_gbox[3] = ioremap(GBOX3_BASE, COMMON_MMIO_BOX_SIZE);
+  if (!micras_gbox[0] || !micras_gbox[1] ||
+      !micras_gbox[2] || !micras_gbox[3])
+    goto fail_iomap;
+
+#ifdef CONFIG_MK1OM
+  micras_gbox[4] = ioremap(GBOX4_BASE, COMMON_MMIO_BOX_SIZE);
+  micras_gbox[5] = ioremap(GBOX5_BASE, COMMON_MMIO_BOX_SIZE);
+  micras_gbox[6] = ioremap(GBOX6_BASE, COMMON_MMIO_BOX_SIZE);
+  micras_gbox[7] = ioremap(GBOX7_BASE, COMMON_MMIO_BOX_SIZE);
+  if (!micras_gbox[4] || !micras_gbox[5] ||
+      !micras_gbox[6] || !micras_gbox[7])
+    goto fail_iomap;
+#endif
+
+#ifdef CONFIG_MK1OM
+  /*
+   * Most SKUs don't have TBOXes.
+   * If not, then don't map to their MMIO space
+   */
+  if (mr_txs()) {
+    micras_tbox[0] = ioremap(TXS0_BASE, COMMON_MMIO_BOX_SIZE);
+    micras_tbox[1] = ioremap(TXS1_BASE, COMMON_MMIO_BOX_SIZE);
+    micras_tbox[2] = ioremap(TXS2_BASE, COMMON_MMIO_BOX_SIZE);
+    micras_tbox[3] = ioremap(TXS3_BASE, COMMON_MMIO_BOX_SIZE);
+    micras_tbox[4] = ioremap(TXS4_BASE, COMMON_MMIO_BOX_SIZE);
+    micras_tbox[5] = ioremap(TXS5_BASE, COMMON_MMIO_BOX_SIZE);
+    micras_tbox[6] = ioremap(TXS6_BASE, COMMON_MMIO_BOX_SIZE);
+    micras_tbox[7] = ioremap(TXS7_BASE, COMMON_MMIO_BOX_SIZE);
+    if (!micras_tbox[0] || !micras_tbox[1] ||
+        !micras_tbox[2] || !micras_tbox[3] ||
+        !micras_tbox[4] || !micras_tbox[5] ||
+        !micras_tbox[6] || !micras_tbox[7])
+      goto fail_iomap;
+  }
+#endif
+
+  /*
+   * Setup non-volatile MC error logging device.
+   */
+  if (ee_init())
+    goto fail_iomap;
+
+  /*
+   * Setup core MC event handler.
+   * If this can't fail, move into micras_wq_init instead.
+   */
+  if (mcc_init())
+    goto fail_ee;
+
+  /*
+   * Setup un-core MC event handler.
+   * If this can't fail, move into micras_wq_init instead.
+   */
+  if (mcu_init())
+    goto fail_core;
+
+  /*
+   * Prepare MT drivers
+   */
+  mr_mt_init();
+
+#if defined(CONFIG_MK1OM) && USE_PM
+  /*
+   * Setup PM interface
+   */
+  if (pm_init())
+    goto fail_uncore;
+#endif
+
+#if defined(CONFIG_MK1OM) && WA_4845465
+  /*
+   * Launch SMC temperature push work.
+   * Supported by SMC firmware later than 121.11 (build 4511).
+   */
+  {
+    extern int	mr_smc_rd(uint8_t, uint32_t *);
+    int		rev, ref;
+
+    mr_smc_rd(0x11, &rev);
+    if (rev) {
+      ref = PUT_BITS(31, 24, 121) |
+	    PUT_BITS(23, 16, 11) |
+	    PUT_BITS(15,  0, 4511);
+
+      if (rev >= ref)
+	smc_4845465 = rev;
+    }
+
+    if (smc_4845465) {
+      INIT_DELAYED_WORK(&micras_wq_smc, micras_mt_smc);
+      queue_delayed_work(micras_wq, &micras_wq_smc, 0);
+      printk("RAS.init: HSD 4845465 workaround active, fw %x\n", rev);
+    }
+    else
+      printk("RAS.init: SMC too old for HSD 4845465 workaround, fw %x\n", rev);
+  }
+#endif
+
+  /*
+   * Launch deferable setup work
+   */
+  try_module_get(THIS_MODULE);
+  INIT_DELAYED_WORK(&micras_wq_init, micras_init2);
+  queue_delayed_work(micras_wq, &micras_wq_init, msecs_to_jiffies(500));
+  printk("RAS module load completed\n");
+  return err;
+
+  /*
+   * Error exits: unwind all setup done so far and return failure
+   *
+   *TBD: consider calling exit function. Requires that it can tell
+   *     with certainty what has been setup and what hasn't.
+   */
+#if defined(CONFIG_MK1OM) && USE_PM
+fail_uncore:
+  mr_mt_exit();
+  mcu_exit();
+#endif
+fail_core:
+  mcc_exit();
+fail_ee:
+#ifdef CONFIG_MK1OM
+  ee_exit();
+#endif
+fail_iomap:
+  if (micras_sbox)
+    iounmap(micras_sbox);
+  for(i = 0; i < ARRAY_SIZE(micras_dbox); i++)
+    if (micras_dbox[i])
+      iounmap(micras_dbox[i]);
+  for(i = 0; i < ARRAY_SIZE(micras_gbox); i++)
+    if (micras_gbox[i])
+      iounmap(micras_gbox[i]);
+#ifdef CONFIG_MK1OM
+  for(i = 0; i < ARRAY_SIZE(micras_tbox); i++)
+    if (micras_tbox[i])
+      iounmap(micras_tbox[i]); 
+#endif
+
+  destroy_workqueue(micras_ttl_wq);
+
+fail_ttl_ep:
+  scif_close(micras_ttl_lstn);
+
+fail_ttl:
+  destroy_workqueue(micras_mc_wq);
+
+fail_mc_ep:
+  scif_close(micras_mc_lstn);
+
+fail_mc:
+  destroy_workqueue(micras_cp_wq);
+
+fail_cp_ep:
+  scif_close(micras_cp_lstn);
+
+fail_cp:
+  class_unregister(&ras_class);
+
+fail_class:
+  micras_stop = 1;
+  flush_workqueue(micras_wq);
+  destroy_workqueue(micras_wq);
+
+fail_wq:
+  printk("RAS module load failed\n");
+  return err;
+}
+
+
+static void __exit
+micras_exit(void)
+{
+  int		i;
+  scif_epd_t	ep;
+
+  printk("Unloading RAS module\n");
+  micras_stop = 1;
+
+  /*
+   * Disconnect MC event handlers and
+   * close the I2C eeprom interfaces.
+   */
+  mcu_exit();
+  mcc_exit();
+  ee_exit();
+ 
+  /*
+   * Close SCIF listeners (no more connects).
+   */
+  scif_close(micras_cp_lstn);
+  scif_close(micras_mc_lstn);
+  scif_close(micras_ttl_lstn);
+  msleep(10);
+  destroy_workqueue(micras_cp_wq);
+  destroy_workqueue(micras_mc_wq);
+  destroy_workqueue(micras_ttl_wq);
+
+  /*
+   * Terminate active sessions by closing their end points.
+   * Session threads then should clean up after themselves.
+   */
+  for(i = 0; i < MR_SCIF_MAX; i++) {
+    if (micras_cp_kt[i]) {
+      printk("RAS.exit: force closing CP session %d\n", i);
+      ep = (scif_epd_t) atomic64_xchg((atomic64_t *)(micras_cp_ep + i), 0);
+      if (ep)
+        scif_close(ep);
+    }
+  }
+  for(i = 0; i < 1000; i++) {
+    if (bitmap_weight(micras_cp_fd, MR_SCIF_MAX) == MR_SCIF_MAX)
+      break;
+    msleep(1);
+  }
+  if (micras_mc_kt) {
+    printk("RAS.exit: force closing MC session\n");
+    ep = (scif_epd_t) atomic64_xchg((atomic64_t *) &micras_mc_ep, 0);
+    if (ep)
+      scif_close(ep);
+    for(i = 0; (i < 1000) && micras_mc_kt; i++)
+      msleep(1);
+  }
+  if (micras_ttl_kt) {
+    printk("RAS.exit: force closing TTL session\n");
+    ep = (scif_epd_t) atomic64_xchg((atomic64_t *) &micras_ttl_ep, 0);
+    if (ep)
+      scif_close(ep);
+    for(i = 0; (i < 1000) && micras_ttl_kt; i++)
+      msleep(1);
+  }
+
+  /*
+   * Tear down sysfs class and its nodes
+   */
+  class_unregister(&ras_class);
+
+#if defined(CONFIG_MK1OM) && USE_PM
+  /*
+   * De-register with the PM module.
+   */
+  pm_exit();
+#endif
+
+  /*
+   * Shut down the work queues
+   */
+#if defined(CONFIG_MK1OM) && WA_4845465
+  if (smc_4845465)
+    cancel_delayed_work(&micras_wq_smc);
+#endif
+  cancel_delayed_work(&micras_wq_tick);
+  cancel_delayed_work(&micras_wq_init);
+  flush_workqueue(micras_wq);
+  destroy_workqueue(micras_wq);
+
+  /*
+   * Restore MT state
+   */
+  mr_mt_exit();
+
+  /*
+   * Remove MMIO region maps
+   */
+  iounmap(micras_sbox);
+  for(i = 0; i < ARRAY_SIZE(micras_dbox); i++)
+    if (micras_dbox[i])
+      iounmap(micras_dbox[i]);
+  for(i = 0; i < ARRAY_SIZE(micras_gbox); i++)
+    if (micras_gbox[i])
+      iounmap(micras_gbox[i]);
+#ifdef CONFIG_MK1OM
+  for(i = 0; i < ARRAY_SIZE(micras_tbox); i++)
+    if (micras_tbox[i])
+      iounmap(micras_tbox[i]);
+#endif
+
+#if defined(CONFIG_MK1OM) && WA_4845465 && DIE_PROC
+  if (smc_4845465 && die_pe) {
+    remove_proc_entry("die", 0);
+    die_pe = 0;
+  }
+#endif
+
+  printk("RAS module unload completed\n");
+}
+
+module_init(micras_init);
+module_exit(micras_exit);
+
+MODULE_AUTHOR("Intel Corp. 2013 (" __DATE__ ") ver " RAS_VER);
+MODULE_DESCRIPTION("RAS and HW monitoring module for MIC");
+MODULE_LICENSE("GPL");
+
diff --git a/ras/micras_pm.c b/ras/micras_pm.c
new file mode 100644
index 0000000..77172aa
--- /dev/null
+++ b/ras/micras_pm.c
@@ -0,0 +1,1050 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * RAS PM interface
+ *
+ * Contains code to handle interaction with the PM driver.
+ * This includes the initial upload of core voltages and 
+ * frequencies, handling of 'turbo' mode, and accounting
+ * for and reporting of card throttles.
+ * This really is for KnC only.
+ */
+
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/bitmap.h>
+#include <linux/cpumask.h>
+#include <linux/io.h>
+#include <linux/cred.h>
+#include <asm/msr.h>
+#include <asm/mce.h>
+#include <asm/apic.h>
+#include <asm/mic/mic_common.h>
+#include <asm/mic/mic_knc/micsboxdefine.h>
+#include <scif.h>
+#include "micras.h"
+#include "monahan.h"
+#include <asm/mic/micpm_device.h>
+
+#if USE_PM
+
+static atomic_t	pm_entry;	/* Active calls from PM */
+
+
+/*
+ * Local variables to keep track of throttle states
+ *
+ *   onoff	Set to 1 if throttling is in effect, otherwise 0
+ *   count	Count of complete throttles (not counting current).
+ *   time	Time spent in complete throttles
+ *   start	Time when current throttle started (or 0)
+ * 
+ * Units of time is measured in jiffies and converted to mSecs
+ * at the end of a throttle period. Jiffies are lower resolution
+ * than mSec. If a throttle starts and ends within same jiffy,
+ * a standard penalty of 1/2 jiffy gets added.
+ *
+ *TBD: perhaps it's better simply to add 1/2 jiffy to every throttle
+ *     period to compensate for rounding down errors. Would be fair
+ *     if average throttle period is more than 1 jiffy long.
+ *
+ *TBD: Using atomics may be overkill. Calls from the RAS MT thread
+ *     will be serialized (guaranteed), i.e. the report routine needs
+ *     not to care about re-entrancy.
+ */
+
+static atomic_t tmp_onoff;
+static atomic_t tmp_count;
+static atomic_long_t tmp_time;
+static atomic_long_t tmp_start;
+
+static atomic_t pwr_onoff;
+static atomic_t pwr_count;
+static atomic_long_t pwr_time;
+static atomic_long_t pwr_start;
+
+static atomic_t alrt_onoff;
+static atomic_t alrt_count;
+static atomic_long_t alrt_time;
+static atomic_long_t alrt_start;
+
+
+static void
+mr_pwr_enter(void)
+{
+  if (atomic_xchg(&pwr_onoff, 1))
+    return;
+
+  atomic_long_set(&pwr_start, jiffies);
+}
+
+static void
+mr_pwr_leave(void) {
+  unsigned long	then;
+
+  if (! atomic_xchg(&pwr_onoff, 0))
+    return;
+
+  then = atomic_long_xchg(&pwr_start, 0);
+  atomic_inc(&pwr_count);
+
+  if (jiffies == then)
+    atomic_long_add(jiffies_to_msecs(1) / 2, &pwr_time);
+  else
+    atomic_long_add(jiffies_to_msecs(jiffies - then), &pwr_time);
+}
+
+
+static void
+mr_tmp_enter(void)
+{
+  if (atomic_xchg(&tmp_onoff, 1))
+    return;
+
+  atomic_long_set(&tmp_start, jiffies);
+}
+
+static void
+mr_tmp_leave(void)
+{
+  unsigned long	then;
+
+  if (! atomic_xchg(&tmp_onoff, 0))
+    return;
+
+  then = atomic_long_xchg(&tmp_start, 0);
+  atomic_inc(&tmp_count);
+  if (jiffies == then)
+    atomic_long_add(jiffies_to_msecs(1) / 2, &tmp_time);
+  else
+    atomic_long_add(jiffies_to_msecs(jiffies - then), &tmp_time);
+}
+
+
+static void
+mr_alrt_enter(void)
+{
+  if (atomic_xchg(&alrt_onoff, 1))
+    return;
+
+  atomic_long_set(&alrt_start, jiffies);
+}
+
+static void
+mr_alrt_leave(void)
+{
+  unsigned long	then;
+
+  if (! atomic_xchg(&alrt_onoff, 0))
+    return;
+
+  then = atomic_long_xchg(&alrt_start, 0);
+  atomic_inc(&alrt_count);
+  if (jiffies == then)
+    atomic_long_add(jiffies_to_msecs(1) / 2, &alrt_time);
+  else
+    atomic_long_add(jiffies_to_msecs(jiffies - then), &alrt_time);
+}
+
+
+
+/*
+ * Report current throttle state(s) to MT.
+ * Simple copy of local variables, except for the time
+ * measurement, where current throttle (if any) is included.
+ * Don't want a lock to gate access to the local variables,
+ * so the atomics needs to be read in the correct order.
+ * First throttle state, then adder if throttle is in
+ * progress, then counters. If PM enters or leave throttle
+ * while reading stats, the worst is that time for the
+ * current trottle is not included until next read.
+ */
+
+int
+mr_pm_ttl(struct mr_rsp_ttl * rsp)
+{
+  unsigned long	then;
+
+  rsp->power.since = 0;
+  rsp->power.active = (uint8_t) atomic_read(&pwr_onoff);
+  if (rsp->power.active) {
+    then = atomic_long_read(&pwr_start);
+    if (then)
+      rsp->power.since = jiffies_to_msecs(jiffies - then);
+  }
+  rsp->power.count = atomic_read(&pwr_count);
+  rsp->power.time = atomic_long_read(&pwr_time);
+  
+  rsp->thermal.since = 0;
+  rsp->thermal.active = (uint8_t) atomic_read(&tmp_onoff);
+  if (rsp->thermal.active) {
+    then = atomic_long_read(&tmp_start);
+    if (then)
+      rsp->thermal.since = jiffies_to_msecs(jiffies - then);
+  }
+  rsp->thermal.count = atomic_read(&tmp_count);
+  rsp->thermal.time = atomic_long_read(&tmp_time);
+  
+  rsp->alert.since = 0;
+  rsp->alert.active = (uint8_t) atomic_read(&alrt_onoff);
+  if (rsp->alert.active) {
+    then = atomic_long_read(&alrt_start);
+    if (then)
+      rsp->alert.since = jiffies_to_msecs(jiffies - then);
+  }
+  rsp->alert.count = atomic_read(&alrt_count);
+  rsp->alert.time = atomic_long_read(&alrt_time);
+  
+  return 0;
+}
+
+
+/*
+ * Throttle signaling function (call from PM)
+ */
+
+static int	ttl_tcrit;
+
+void
+mr_throttle(int which, int state)
+{
+  struct ttl_info ttl;
+  uint32_t	   tmp;
+
+  atomic_inc(&pm_entry);
+
+  tmp = mr_sbox_rl(0, SBOX_THERMAL_STATUS_2);
+  ttl.die = GET_BITS(19, 10, tmp);
+
+  /*
+   * PM is weird in the destinction of thermal and power throttle.
+   * Power below PLIM should be quiet. Power between PLim1 and PLim0
+   * results in TTL_POWER events.  Power above PLim0 results in both
+   * TTL_POWER and TTL_THERMAL events, _even_ if temperature is well
+   * below Tcrit. We handle this by maintaining 3 throttle related
+   * event types: thermal throttles, power throttles and power alert.
+   * The power alert is flaggend on entry as TTL_POWER, no problems.
+   * The two throttles both come in as TTL_THERMAL, so we use current
+   * die temperature to determine whether it was a thermal threshold
+   * or the power limit that was exceeded. Point is power throttles
+   * arriving while temperature is above Tcrit _will_ be counted as 
+   * thermal throttles, period. 
+   */
+  ttl.upd = 0;
+  switch(which) {
+    case TTL_POWER:
+      (state == TTL_OFF) ? mr_alrt_leave() : mr_alrt_enter();
+      ttl.upd |= PM_ALRT_TTL_CHG;
+      ttl.upd |= atomic_read(&alrt_onoff) ? PM_ALRT_TTL : 0;
+      break;
+
+    case TTL_THERMAL:
+#if 1
+       /*
+        * Careful here: may get throttle ON while die > tcrit
+	* and select thermal throttle correctly and then get
+	* the corresponding throttle OFF when die has fallen
+	* below tcrit in which case we must de-assert thermal
+	* trottle.
+	* As a shortcut, we deassert both throttles if the
+	* GPU_HOT signal gets de-asserted (which is correct).
+	*/
+      if (state == TTL_OFF) {
+        if (atomic_read(&pwr_onoff))
+	  ttl.upd |= PM_PWR_TTL_CHG;
+        if (atomic_read(&tmp_onoff))
+	  ttl.upd |= PM_TRM_TTL_CHG;
+	mr_pwr_leave();
+	mr_tmp_leave();
+      }
+      else {
+	if (ttl_tcrit && ttl.die < ttl_tcrit) {
+	  if (! atomic_read(&pwr_onoff))
+	    ttl.upd |= (PM_PWR_TTL_CHG | PM_PWR_TTL);
+	  mr_pwr_enter();
+	}
+	else {
+	  if (! atomic_read(&tmp_onoff))
+	    ttl.upd |= (PM_TRM_TTL_CHG | PM_TRM_TTL);
+	  mr_tmp_enter();
+	}
+      }
+#else
+      if (ttl_tcrit && ttl.die < ttl_tcrit) { 
+        (state == TTL_OFF) ? mr_pwr_leave() : mr_pwr_enter();
+        ttl.upd |= PM_PWR_TTL_CHG;
+        ttl.upd |= atomic_read(&pwr_onoff) ? PM_PWR_TTL : 0;
+      }
+      else {
+        (state == TTL_OFF) ? mr_tmp_leave() : mr_tmp_enter();
+        ttl.upd |= PM_TRM_TTL_CHG;
+        ttl.upd |= atomic_read(&tmp_onoff) ? PM_TRM_TTL : 0;
+      }
+#endif
+     break;
+  }
+
+  micras_ttl_send(&ttl);
+   
+#if 0
+   printk("ttl - args: which %d, state %d\n", which, state);
+
+   printk("ttl - therm: on %d, count %d, time %ld, start %ld\n",
+	atomic_read(&tmp_onoff), atomic_read(&tmp_count),
+	atomic_long_read(&tmp_time), atomic_long_read(&tmp_start));
+
+   printk("ttl - power: on %d, count %d, time %ld, start %ld\n",
+	atomic_read(&pwr_onoff), atomic_read(&pwr_count),
+	atomic_long_read(&pwr_time), atomic_long_read(&pwr_start));
+
+   printk("ttl - alert: on %d, count %d, time %ld, start %ld\n",
+	atomic_read(&alrt_onoff), atomic_read(&alrt_count),
+	atomic_long_read(&alrt_time), atomic_long_read(&alrt_start));
+#endif
+
+   atomic_dec(&pm_entry);
+}
+
+
+/*
+ * Throttle signaling function (call from notifier chain)
+ *
+ * TBD: should we test for odd state transitions and recursions?
+ */
+
+static int
+mr_pm_throttle_callback(struct notifier_block *nb, unsigned long event, void *msg)
+{
+  atomic_inc(&pm_entry);
+
+  switch(event) {
+  
+    case EVENT_PROCHOT_ON:	
+      mr_throttle(TTL_THERMAL, TTL_ON);
+      break;
+
+    case EVENT_PROCHOT_OFF:
+      mr_throttle(TTL_THERMAL, TTL_OFF);
+      break;
+
+    case EVENT_PWR_ALERT_ON:
+      mr_throttle(TTL_POWER, TTL_ON);
+      break;
+   
+    case EVENT_PWR_ALERT_OFF:
+      mr_throttle(TTL_POWER, TTL_OFF);
+      break;
+
+    default:
+      /*
+       * Ignore whatever else is sent this way
+       */
+      break;
+  }
+
+  atomic_dec(&pm_entry);
+  return 0;
+}
+
+
+
+
+/*
+**
+** Power management routines
+**
+**   one_mmio_rd	Read one MMIO register into memory safe
+**   one_mmio_wr	Write one MMIO register from memory safe
+**
+**   one_msr_rd		Read one MSR register into memory safe
+**   one_msr_wr		Write one MSR register from memory safe
+**
+**   mc_suspend		Prepare for suspend, preserve CSRs to safe
+**   mc_suspend_cancel	Suspend canceled, restore operating mode
+**   mc_resume		Recover from suspend, restore CSRs from safe
+**
+** For now this stores all registers that are used by this module.
+** In reality, only those registers on power planes turned off in
+** deep sleep states needs to be stored, but at this point it is
+** not known which registers are in that group. This is a table
+** driven mechanism that _only_ handles RAS related registers.
+** 
+**TBD: Turn off MC handlers while in suspend?
+**     Both pro's and con's on this one, such as
+**	 + Disabling uncore is easy, just clear INT_EN
+**       + prevents MC to interfere with PM state transitions
+**       - can hide corruption due to UC errors
+**       - requires a lot of IPIs to shut down core MC handling
+**       + there's nobody to handle MCs when cores are asleep.
+**	 ? can events hide in *BOX banks during suspend/resume
+**         and fire when restoring the INT_EN register?
+**	 - Disabling core is not that easy (from a module).
+**	   Enabling core MCEs requires setting flag X86_CR4_MCE
+**         in CR4 on every core _and_ writing ~0 to MSR IA32_MCG_CAP
+**         on every CPU. Probably better to let per-CPU routines
+**	   like mce_suspend() and mce_resume() handle it, with
+**         some care because we'd want to save all CTLs before
+**         mce_suspend() runs and restore them after mce_resume().
+**	   Problem is how to get at these functions; they are not
+**	   exported and seems not to be hooked into the kernel's PM
+**         call chains.  Perhaps sysclass abstraction ties into PM.
+**         Even so, who's to invoke it and how?
+*/
+
+#define SAVE_BLOCK_MCA		1	/* Disable MC handling in suspend */
+#define RAS_SAVE_MSR		1	/* Include global MSRs in suspend */
+#define RAS_SAVE_CPU_MSR	0	/* Include per-CPU MSRs in suspend */
+
+#define SBOX	1		/* SBOX register (index 0) */
+#define DBOX	2		/* DBOX register (index 0..1) */
+#define GBOX	3		/* GBOX register (index 0..7) */
+#define TBOX	4		/* TBOX register (index 0..7) */
+#define GMSR	5		/* Global MSR (index 0) */
+#define LMSR	6		/* Per-CPU MSR (index 0..CONFIG_NR_CPUS-1) */
+
+#define W64	(1 << 6)	/* 64 bit MMIO register (32 bit default) */
+#define VLD	(1 << 7)	/* Register value valid, can be restored */
+
+typedef struct _regrec {
+  uint8_t	box;	/* Box type + width bit + valid bit */
+  uint8_t	num;	/* Box index (or 0) */
+  uint16_t	ofs;	/* MMIO byte offset / MSR number */
+  uint64_t	reg;	/* Register value */
+} RegRec;
+
+
+/*
+ * Rumor has it that SBOX CSRs below 0x7000 will survive deep sleep
+ * Think it's safer to save/restore CSRs that RAS writes to anyways.
+ * We'll leave out a bunch of RO CSRs, most of which are HW status.
+ * SCRATCH<n> CSRs are above 0x7000 and needs to be preserved.
+ *
+ *TBD: Somebody else to preserve scratch CSRs not used by RAS?
+ *     For now I'll save and restore all of them.
+ */
+
+static RegRec susp_mmio[] = {				/* Used in file */
+  { SBOX, 0, SBOX_MCA_INT_EN, 0 },			/* Uncore, must be 1st */
+  { SBOX, 0, SBOX_SCRATCH0, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH1, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH2, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH3, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH4, 0 },			/* Common, knc, */
+  { SBOX, 0, SBOX_SCRATCH5, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH6, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH7, 0 },			/* Knc, knf */
+  { SBOX, 0, SBOX_SCRATCH8, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH9, 0 },			/* Common, knc, knf */
+  { SBOX, 0, SBOX_SCRATCH10, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH11, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH12, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH13, 0 },			/* Common */
+  { SBOX, 0, SBOX_SCRATCH14, 0 },			/* - */
+  { SBOX, 0, SBOX_SCRATCH15, 0 },			/* - */
+//  { SBOX, 0, SBOX_COMPONENT_ID, 0 },			/* Knc  */
+//  { SBOX, 0, SBOX_SVIDCONTROL, 0 },			/* Knc  */
+//  { SBOX, 0, SBOX_PCIE_PCI_SUBSYSTEM, 0 },		/* Common */
+//  { SBOX, 0, SBOX_PCIE_VENDOR_ID_DEVICE_ID, 0 },	/* Common */
+//  { SBOX, 0, SBOX_PCIE_PCI_REVISION_ID_AND_C_0X8, 0 },/* Common */
+  { SBOX, 0, SBOX_OC_I2C_ICR + ICR_OFFSET, 0 },		/* Elog */
+  { SBOX, 0, SBOX_OC_I2C_ICR + ISR_OFFSET, 0 },		/* Elog */
+  { SBOX, 0, SBOX_OC_I2C_ICR + ISAR_OFFSET, 0 },	/* Elog */
+  { SBOX, 0, SBOX_OC_I2C_ICR + IDBR_OFFSET, 0 },	/* Elog */
+//  { SBOX, 0, SBOX_OC_I2C_ICR + IBMR_OFFSET, 0 },	/* Elog */
+//  { SBOX, 0, SBOX_COREVOLT, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_COREFREQ, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_MEMVOLT, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_MEMORYFREQ, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_CURRENTRATIO, 0 },			/* Knc */
+//  { SBOX, 0, SBOX_BOARD_VOLTAGE_SENSE, 0 },		/* Knc, knf */
+//  { SBOX, 0, SBOX_THERMAL_STATUS, 0 },		/* Knc, knf */
+//  { SBOX, 0, SBOX_BOARD_TEMP1, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_BOARD_TEMP2, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_CURRENT_DIE_TEMP0, 0 },		/* Knc, knf */
+//  { SBOX, 0, SBOX_CURRENT_DIE_TEMP1, 0 },		/* Knc, knf */
+//  { SBOX, 0, SBOX_CURRENT_DIE_TEMP2, 0 },		/* Knc, knf */
+//  { SBOX, 0, SBOX_MAX_DIE_TEMP0, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_MAX_DIE_TEMP1, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_MAX_DIE_TEMP2, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_STATUS_FAN1, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_STATUS_FAN2, 0 },			/* Knc, knf */
+//  { SBOX, 0, SBOX_SPEED_OVERRIDE_FAN, 0 },		/* Knc, knf */
+  { SBOX, 0, SBOX_MCA_INT_STAT, 0 },			/* Uncore */
+//  { SBOX, 0, SBOX_APICRT16, 0 },			/* Uncore */
+  { SBOX, 0, SBOX_MCX_CTL_LO, 0 },			/* Uncore */
+  { DBOX, 0, DBOX_MC2_CTL, 0 },				/* Uncore */
+#ifdef CONFIG_MK1OM
+  { DBOX, 1, DBOX_MC2_CTL, 0 },				/* Uncore */
+#endif
+  { GBOX | W64, 0, GBOX_FBOX_MCA_CTL_LO, 0 },		/* Uncore */
+  { GBOX | W64, 1, GBOX_FBOX_MCA_CTL_LO, 0 },		/* Uncore */
+  { GBOX | W64, 2, GBOX_FBOX_MCA_CTL_LO, 0 },		/* Uncore */
+  { GBOX | W64, 3, GBOX_FBOX_MCA_CTL_LO, 0 },		/* Uncore */
+#ifdef CONFIG_MK1OM
+  { GBOX | W64, 4, GBOX_FBOX_MCA_CTL_LO, 0 },		/* Uncore */
+  { GBOX | W64, 5, GBOX_FBOX_MCA_CTL_LO, 0 },		/* Uncore */
+  { GBOX | W64, 6, GBOX_FBOX_MCA_CTL_LO, 0 },		/* Uncore */
+  { GBOX | W64, 7, GBOX_FBOX_MCA_CTL_LO, 0 },		/* Uncore */
+#endif
+#ifdef CONFIG_MK1OM
+  { TBOX | W64, 0, TXS_MCX_CONTROL, 0 },		/* Uncore */
+  { TBOX | W64, 1, TXS_MCX_CONTROL, 0 },		/* Uncore */
+  { TBOX | W64, 2, TXS_MCX_CONTROL, 0 },		/* Uncore */
+  { TBOX | W64, 3, TXS_MCX_CONTROL, 0 },		/* Uncore */
+  { TBOX | W64, 4, TXS_MCX_CONTROL, 0 },		/* Uncore */
+  { TBOX | W64, 5, TXS_MCX_CONTROL, 0 },		/* Uncore */
+  { TBOX | W64, 6, TXS_MCX_CONTROL, 0 },		/* Uncore */
+  { TBOX | W64, 7, TXS_MCX_CONTROL, 0 },		/* Uncore */
+#endif
+};
+
+#if RAS_SAVE_MSR
+static RegRec susp_msr[] = {				/* Used in file */
+  { GMSR, 0, MSR_IA32_MCG_STATUS, 0 },			/* Uncore, kernel */
+};
+
+#if RAS_SAVE_CPU_MSR
+static RegRec susp_lcl_msr[4 * CONFIG_NR_CPUS] = {	/* Used in file */
+  { LMSR, 0, MSR_IA32_MCx_CTL(0), 0 },			/* Core, kernel */
+  { LMSR, 0, MSR_IA32_MCx_CTL(1), 0 },			/* Core, kernel */
+  { LMSR, 0, MSR_IA32_MCx_CTL(2), 0 },			/* Core, kernel */
+  { LMSR, 0, MSR_IA32_MCG_CTL, 0 },			/* kernel */
+  /*
+   * The remaining entries is setup/replicated by pm_init()
+   */
+};
+#endif
+#endif
+
+
+static void
+one_mmio_rd(RegRec * r)
+{
+  switch(r->box & 0xf) {
+    case SBOX:
+	if (r->box & W64)
+	   r->reg = mr_sbox_rq(0, r->ofs);
+	else
+	   r->reg = (uint64_t) mr_sbox_rl(0, r->ofs);
+	break;
+    case DBOX:
+        if (r->box & W64)
+	  r->reg = mr_dbox_rq(r->num, r->ofs);
+	else
+	  r->reg = (uint64_t) mr_dbox_rl(r->num, r->ofs);
+	break;
+    case GBOX:
+        if (r->box & W64)
+	  r->reg = mr_gbox_rq(r->num, r->ofs);
+	else
+	  r->reg = (uint64_t) mr_gbox_rl(r->num, r->ofs);
+	break;
+    case TBOX:
+	if (mr_txs()) {
+	  if (r->box & W64)
+	    r->reg = mr_tbox_rq(r->num, r->ofs);
+	  else
+	    r->reg = (uint64_t) mr_tbox_rl(r->num, r->ofs);
+	}
+	break;
+    default:
+    	r->box &= ~VLD;
+	return;
+  }
+  r->box |= VLD;
+
+#if PM_VERBOSE
+  printk("mmio_rd: box %d, idx %3d, ofs %04x -> %llx\n",
+  	r->box & 0xf, r->num, r->ofs, r->reg);
+#endif
+}
+
+static void
+one_mmio_wr(RegRec * r)
+{
+  if (! (r->box & VLD))
+    return;
+
+  switch(r->box & 0xf) {
+    case SBOX:
+	if (r->box & W64)
+	  mr_sbox_wq(0, r->ofs, r->reg);
+	else
+	  mr_sbox_wl(0, r->ofs, (uint32_t) r->reg);
+	break;
+    case DBOX:
+	if (r->box & W64)
+	  mr_dbox_wq(r->num, r->ofs, r->reg);
+	else
+	  mr_dbox_wl(r->num, r->ofs, (uint32_t) r->reg);
+	break;
+    case GBOX:
+	if (r->box & W64)
+	  mr_gbox_wq(r->num, r->ofs, r->reg);
+	else
+	  mr_gbox_wl(r->num, r->ofs, (uint32_t) r->reg);
+	break;
+    case TBOX:
+	if (mr_txs()) {
+	  if (r->box & W64)
+	    mr_tbox_wq(r->num, r->ofs, r->reg);
+	  else
+	    mr_tbox_wl(r->num, r->ofs, (uint32_t) r->reg);
+	}
+	break;
+  }
+  r->box &= ~VLD;
+
+#if PM_VERBOSE
+  printk("mmio_wr: box %d, idx %3d, ofs %04x <- %llx\n",
+  	r->box & 0xf, r->num, r->ofs, r->reg);
+#endif
+}
+
+
+#if RAS_SAVE_MSR
+static void
+one_msr_rd(RegRec * r)
+{
+  uint32_t	hi, lo;
+
+  switch(r->box & 0xf) {
+    case GMSR:
+    	rdmsr(r->ofs, lo, hi);
+	break;
+#if RAS_SAVE_CPU_MSR
+    case LMSR:
+    	rdmsr_on_cpu(r->num, r->ofs, &lo, &hi);
+	break;
+#endif
+    default:
+	r->box &= ~VLD;
+	return;
+  }
+  r->reg = ((uint64_t) hi) << 32 | (uint64_t) lo;
+  r->box |= VLD;
+
+#if PM_VERBOSE
+  printk("msr_rd: box %d, idx %3d, ofs %04x -> %llx\n",
+  	r->box & 0xf, r->num, r->ofs, r->reg);
+#endif
+}
+
+static void
+one_msr_wr(RegRec * r)
+{
+  uint32_t	hi, lo;
+
+  if (! (r->box & VLD))
+    return;
+
+  hi = r->reg >> 32;
+  lo = r->reg & 0xffffffff;
+  switch(r->box & 0xf) {
+    case GMSR:
+    	wrmsr(r->ofs, lo, hi);
+	break;
+#if RAS_SAVE_CPU_MSR
+    case LMSR:
+    	wrmsr_on_cpu(r->num, r->ofs, lo, hi);
+	break;
+#endif
+  }
+  r->box &= ~VLD;
+
+#if PM_VERBOSE
+  printk("msr_wr: box %d, idx %3d, ofs %04x <- %llx\n",
+  	r->box & 0xf, r->num, r->ofs, r->reg);
+#endif
+}
+#endif /* RAS_SAVE_MSR */
+
+
+/*
+ * Preserve all HW registers that will be lost in
+ * deep sleep states. This will be SBOX registers
+ * above offset 0x7000 and all other BOX registers.
+ */
+
+static void
+mr_suspend(void)
+{
+  int		i;
+
+  atomic_inc(&pm_entry);
+
+  /*
+   * Save SBOX_MCA_INT_EN first and clear it.
+   * No more uncore MCAs will get through.
+   */
+  one_mmio_rd(susp_mmio + 0);
+#if SAVE_BLOCK_MCA
+  mr_sbox_wl(0, SBOX_MCA_INT_EN, 0);
+#endif
+
+  /*
+   * Save remaining BOX MMIOs
+   */
+  for(i = 1; i < ARRAY_SIZE(susp_mmio); i++)
+    one_mmio_rd(susp_mmio + i);
+
+#if RAS_SAVE_MSR
+  /*
+   * Save global MSRs and set MCIP
+   * No new exceptions will be asserted
+   */
+  for(i = 0; i < ARRAY_SIZE(susp_msr); i++)
+    one_msr_rd(susp_msr + i);
+#if SAVE_BLOCK_MCA
+  wrmsr(MSR_IA32_MCG_STATUS, MCG_STATUS_MCIP, 0);
+#endif
+
+#if RAS_SAVE_CPU_MSR
+  /*
+   * Save per-CPU MSRs
+   */
+  for(i = 0; i < ARRAY_SIZE(susp_lcl_msr); i++)
+    one_msr_rd(susp_lcl_msr + i);
+#endif
+#endif
+
+  atomic_dec(&pm_entry);
+}
+
+
+/*
+ * Undo side effects of a suspend call.
+ * Nothing to do unless we turned MC handlers off.
+ */
+
+static void
+mr_cancel(void)
+{
+  int i;
+
+  atomic_inc(&pm_entry);
+
+  /*
+   * Restore SBOX_MCA_INT_EN to unblock uncore MCs
+   * Invalidate all other saved MMIO registers.
+   */
+  one_mmio_wr(susp_mmio + 0);
+  for(i = 1; i < ARRAY_SIZE(susp_mmio); i++)
+    susp_mmio[i].box &= ~VLD;
+
+#if RAS_SAVE_MSR
+  /*
+   * Restore IA32_MCG_STATUS to unblock core MCs
+   * Invalidate all other saved MSR registers.
+   */
+  one_msr_wr(susp_msr + 0);
+  for(i = 1; i < ARRAY_SIZE(susp_msr); i++)
+    susp_msr[i].box &= ~VLD;
+
+#if RAS_SAVE_CPU_MSR
+  for(i = 0; i < ARRAY_SIZE(susp_lcl_msr); i++)
+    susp_lcl_msr[i].box &= ~VLD;
+#endif
+#endif
+
+  atomic_dec(&pm_entry);
+}
+
+
+/*
+ * Restore all HW registers that we use.
+ */
+
+static void
+mr_resume(void)
+{
+  int		i;
+
+  atomic_inc(&pm_entry);
+
+  /*
+   * Clear uncore MCA banks (just in case)
+   */
+  if (susp_mmio[0].box & VLD) 
+    box_reset(0);
+
+  /*
+   * Restore all BOX MMIOs but SBOX_MCA_INT_EN
+   */
+  for(i = 1; i < ARRAY_SIZE(susp_mmio); i++)
+    one_mmio_wr(susp_mmio + i);
+
+  /*
+   * Then restore SBOX_MCA_INT_EN to enable uncore MCAs
+   */
+  one_mmio_wr(susp_mmio + 0);
+
+#if RAS_SAVE_MSR
+  /*
+   * Restore all global MSRs but IA32_MCG_STATUS
+   */
+  for(i = 1; i < ARRAY_SIZE(susp_msr); i++)
+    one_msr_wr(susp_msr + i);
+
+  /*
+   * Then restore IA32_MCG_STATUS to allow core MCAs
+   */
+  one_msr_wr(susp_msr + 0);
+
+#if RAS_SAVE_CPU_MSR
+  /*
+   * Restore all per-cpu MSRs
+   */
+  for(i = 0; i < ARRAY_SIZE(susp_lcl_msr); i++)
+    one_msr_wr(susp_lcl_msr + i);
+#endif
+#endif
+
+  atomic_dec(&pm_entry);
+}
+
+
+/*
+ * Callback from PM notifier chain.
+ * TBD: should we test for odd state transitions and recursions?
+ */
+
+static int
+mr_pm_callback(struct notifier_block *nb, unsigned long event, void *msg)
+{
+
+  switch(event) {
+    case MICPM_DEVEVENT_SUSPEND:
+      mr_suspend();
+      break;
+
+    case MICPM_DEVEVENT_RESUME:
+      mr_resume();
+      break;
+
+    case MICPM_DEVEVENT_FAIL_SUSPEND:
+      mr_cancel();
+      break;
+
+    default:
+      /*
+       * Ignore whatever else is sent this way
+       */
+      break;
+  }
+
+  return 0;
+}
+
+
+
+/*
+**
+** The PM module loads before RAS, so we must setup
+** the API to support power management, i.e register.
+** PM needs:
+**  - Notification when MT changes certain variables.
+**    Provided by a call-out list that the PM sets
+**    at registration time.
+**  - Access to MT calls. 
+**    The PM module can use micras_mt_call() for access.
+**    Since PM loads first, this function needs to
+**    be passed at registration time.
+** RAS needs:
+**  - list of core voltages (for CVOLT query).
+**    We pass a pointer to the voltage list and the
+**    voltage list counter to PM module, who will
+**    fill in the actual values (not available until
+**    core-freq driver loads).
+**  - list of core frequencies (for CFREQ query).
+**    Same solution as for CVOLT.
+**  - Notifications for throttle state changes.
+**  - Power management notifications for suspend/resume.
+**
+** Note: can one notifier block be inserted in multiple
+**       chains? Its assume not, which require two blocks
+**	 both pointing to the same local function.
+*/
+
+extern struct mr_rsp_freq       freq;
+extern struct mr_rsp_volt       volt;
+
+struct micpm_params    pm_reg;		/* Our data for PM */
+struct micpm_callbacks pm_cb;		/* PM data for us */
+
+extern void micpm_device_register(struct notifier_block *n);
+extern void micpm_device_unregister(struct notifier_block *n);
+extern void micpm_atomic_notifier_register(struct notifier_block *n);
+extern void micpm_atomic_notifier_unregister(struct notifier_block *n);
+
+static struct notifier_block ras_deviceevent = {
+  .notifier_call = mr_pm_callback,
+};
+
+static struct notifier_block ras_throttle_event_ns = {
+  .notifier_call = mr_pm_throttle_callback,
+};
+
+static struct notifier_block ras_throttle_event = {
+  .notifier_call = mr_pm_throttle_callback,
+};
+
+
+/*
+ * Setup PM callbacks and SCIF handler.
+ */
+
+static int
+pm_mt_call(uint16_t cmd, void * buf)
+{
+  int	err;
+
+  atomic_inc(&pm_entry);
+  err = micras_mt_call(cmd, buf);
+  atomic_dec(&pm_entry);
+
+  return err;
+}
+
+
+int __init
+pm_init(void)
+{
+  extern int mr_smc_rd(uint8_t, uint32_t *);
+
+#if RAS_SAVE_CPU_MSR
+  /*
+   * Preset MCA bank MSR register descriptions
+   *
+   *TBD: We have to use IPIs to read MSRs, which will wake
+   *     up cores at sleep when this function is called. 
+   *	 PM module may not like this at all.
+   */
+  int i, j;
+  for(i = 1; i < nr_cpu_ids; i++) {
+    j = 4 * i;
+    susp_lcl_msr[j]     = susp_lcl_msr[0];
+    susp_lcl_msr[j + 1] = susp_lcl_msr[1];
+    susp_lcl_msr[j + 2] = susp_lcl_msr[2];
+    susp_lcl_msr[j + 3] = susp_lcl_msr[3];
+    susp_lcl_msr[j].num = i;
+    susp_lcl_msr[j + 1].num = i;
+    susp_lcl_msr[j + 2].num = i;
+    susp_lcl_msr[j + 3].num = i;
+  }
+#endif
+
+  /*
+   * Get temperature where power throttle becomes thermal throttle
+   */
+  mr_smc_rd(0x4c, &ttl_tcrit);
+
+  /*
+   * Register with the MIC Power Management driver.
+   */
+  pm_reg.volt_lst = volt.supt;
+  pm_reg.volt_len = &volt.slen;
+  pm_reg.volt_siz = ARRAY_SIZE(volt.supt);
+  pm_reg.freq_lst = freq.supt;
+  pm_reg.freq_len = &freq.slen;
+  pm_reg.freq_siz = ARRAY_SIZE(freq.supt);
+  pm_reg.mt_call	   = pm_mt_call;
+  pm_reg.mt_ttl		   = mr_throttle;
+  if (micpm_ras_register(&pm_cb, &pm_reg))
+    goto fail_pm;
+
+  /*
+   * Get into the PM notifier lists
+   * MicPm reports events in 2 chains, one atomic and one
+   * blocking. Our callback will not block!
+   */ 
+  micpm_atomic_notifier_register(&ras_throttle_event_ns);
+  micpm_notifier_register(&ras_throttle_event);
+
+  if (boot_cpu_data.x86_mask == KNC_C_STEP)
+    micpm_device_register(&ras_deviceevent);
+
+  printk("RAS.pm: init complete\n");
+  return 0;
+
+fail_pm:
+  printk("RAS.pm: init failed\n");
+  return 1;
+}
+
+
+/*
+ * Cleanup for module unload.
+ * Clear/restore hooks in the native MCA handler.
+ */
+
+void __exit
+pm_exit(void)
+{
+  /*
+   * Get off the PM notifier list
+   */
+  micpm_atomic_notifier_unregister(&ras_throttle_event_ns);
+  micpm_notifier_unregister(&ras_throttle_event);
+
+  if (boot_cpu_data.x86_mask == KNC_C_STEP)
+    micpm_device_unregister(&ras_deviceevent);
+
+  /*
+   * De-register with the PM module.
+   */
+  micpm_ras_unregister();
+
+  /*
+   * Wait for an calls to module to finish.
+   */
+  while(atomic_read(&pm_entry))
+    cpu_relax();
+
+  printk("RAS.pm: exit complete\n");
+}
+
+#endif	/* USE_PM */
diff --git a/ras/micras_uncore.c b/ras/micras_uncore.c
new file mode 100644
index 0000000..af1e3a4
--- /dev/null
+++ b/ras/micras_uncore.c
@@ -0,0 +1,1194 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * RAS handler for uncore MC events
+ *
+ * Contains code to intercept MC events, collect information
+ * from uncore MCA banks and handle the situation.
+ *
+ * In case of a severe event, defined by corrupted context,
+ * the handler will add a record of the event in the designated
+ * EEPROM hanging off the Over Clocking I2C bus.  After that
+ * a message will be sent to the SMC (enabling IPMI notifications)
+ * and at last a message is sent to the host via the MC SCIF
+ * connection.
+ *
+ * Lesser events will also be sent to the host on a 'FYI' basis,
+ * but no rocord will be stored in the event log.
+ *
+ * This is in all aspects similar to the reaction to a severe
+ * core MC event. Differences are in the MC bank access (mmio),
+ * and that the event is delivered via an interrupt instead of
+ * an exception. Still, the handler cannot expect any support
+ * from the OS.
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/nmi.h>
+#include <asm/mce.h>
+#include <asm/msr.h>
+#include <asm/processor.h>
+#include <asm/mic/mic_common.h>
+#include <asm/mic/mic_knc/autobaseaddress.h>
+#include <asm/mic/mic_knc/micsboxdefine.h>
+#include "micras.h"
+
+
+/*
+ * Hooks placed in the native machine check handler
+ * See file arch/x86/kernel/traps.c for placement
+ *
+ *  nmi		Entered NMI exception handler.
+ *		Called before any other tests, which allow us
+ *		to test for and handle un-core MCA events before
+ *		the traditional NMI handling.
+ *		Note that the mce-inject mechanism also uses
+ *		NMI's to distribute calls to do_machine_check().
+ */
+
+extern int (*mca_nmi)(int);
+
+
+
+/*
+ * Table of un-core MCA banks.
+ * Though there are differences in register count and sizes, un-core bank
+ * registers are always spaced 8 bytes apart, so all we need to know is
+ * the location of the first MCA bank register (CTL) to find them.
+ * If bank is present, the bank register offsets for ctl, status, addr,
+ * and misc are thus 0, 8, 16, and 24 respectively.
+ * Default CTL masks pulled from the register documentation
+ * Some SKUs don't have support for all BOXs but that will be handled
+ * at runtime in the support code, not at compile time by this table.
+ */
+
+
+#ifdef CONFIG_ML1OM
+#define SBOX_DEF	0x000e		/* All (7) */
+#define	DBOX_DEF	0x0003		/* All (2) */
+#define	GBOX_DEF	0x0003		/* All (2) */
+#endif
+#ifdef CONFIG_MK1OM
+#define SBOX_DEF	0x03ce		/* All - PCIe errors (7) */
+#define	DBOX_DEF	0x000f		/* All (4) */
+#define	GBOX_DEF	0x3ffffffff	/* All (34) */
+#define	TBOX_DEF	0x001f		/* All (5) */
+#endif
+
+#define MCU_CTL_64	(1 << 0)	/* Bank has 64 bit CTL register */
+#define MCU_NO_ADDR	(1 << 1)	/* Bank has no ADDR register */
+#define MCU_ADDR_32	(1 << 2)	/* Bank has 32 bit ADDR register */
+#define MCU_NO_MISC	(1 << 3)	/* Bank has no MISC register */
+#define MCU_MISC_64	(1 << 4)	/* Bank has 64 bit MISC register */
+
+#define MCU_CTRL	0
+#define MCU_STAT	8
+#define MCU_ADDR	16
+#define MCU_MISC	24
+
+typedef struct _mcu_rec {
+  uint8_t	num;				/* 'BOX' count */
+  uint8_t	org;				/* Origin code */
+  uint8_t	qflg;				/* Quirk flags */
+  uint16_t	ofs;				/* MCA bank base offset */
+  uint64_t	ctl;				/* Initial CTL mask */
+  uint32_t	(*rl)(int, uint32_t);		/* 32-bit MMIO read */
+  void		(*wl)(int, uint32_t, uint32_t);	/* 32-bit MMIO write */
+  uint64_t	(*rq)(int, uint32_t);		/* 64-bit MMIO read */
+  void		(*wq)(int, uint32_t, uint64_t);	/* 64-bit MMIO write */
+} McuRec;
+
+
+static McuRec	mcu_src[] = {
+  { 1, MC_ORG_SBOX, MCU_MISC_64, SBOX_MCX_CTL_LO,
+  	SBOX_DEF, mr_sbox_rl, mr_sbox_wl, mr_sbox_rq, mr_sbox_wq },
+  { DBOX_NUM, MC_ORG_DBOX, MCU_NO_MISC, DBOX_MC2_CTL,
+  	DBOX_DEF, mr_dbox_rl, mr_dbox_wl, mr_dbox_rq, mr_dbox_wq },
+  { GBOX_NUM, MC_ORG_GBOX, MCU_CTL_64, GBOX_FBOX_MCA_CTL_LO,
+  	GBOX_DEF, mr_gbox_rl, mr_gbox_wl, mr_gbox_rq, mr_gbox_wq },
+#ifdef CONFIG_MK1OM
+  { TBOX_NUM, MC_ORG_TBOX, MCU_CTL_64 | MCU_NO_MISC | MCU_ADDR_32, TXS_MCX_CONTROL,
+  	TBOX_DEF, mr_tbox_rl, mr_tbox_wl, mr_tbox_rq, mr_tbox_wq },
+#endif
+};
+
+#define GBOX_BROKEN	1		/* Set if GBOX MCA bank is borken */
+
+#if GBOX_BROKEN
+/*
+ * Si design managed to break the GBOX MCA bank concept
+ * by not filling useful data into ADDR and MISC registers.
+ * Instead they use a bunch of registers in another part
+ * of the GBOX (mbox to be specific) to hold this info.
+ * In order to get at the right register it is necesary
+ * to partially decode the STATUS register and from there
+ * select an GBOX.MBOX register.
+ * Since the new registers are all 32 bits wide, we'll stick
+ * the value into MISC register if Misc_V bit of STATUS is
+ * not set. The following table is used for register selection
+ *
+ * model code	base	width	Chan	Notes
+ *    0		017c	32	0	26 bit address, CRC (retrain)
+ *    1		097c	32	1	26 bit address, CRC (retrain)
+ *    2		01e0	32	0	26 bit address, ECC
+ *    3		09e0	32	1	26 bit address, ECC
+ *    4		01dc	32	0	26 bit address, UC CAPE
+ *    5		09dc	32	1	26 bit address, UC CAPE
+ *   31		01a4    32	0	26 bit address, UC ECC
+ *   32		09a4	32	1	26 bit address, UC ECC
+ *
+ * Note: model code is simply the enable bit number in CTL
+ */ 
+
+static struct liu {
+  uint16_t	mcode;
+  uint16_t	base;
+} liu[] = {
+  {  0, 0x17c },	/* Correctable CRC (retrain) ch 0 */
+  {  1, 0x97c },	/* Correctable CRC (retrain) ch 1 */
+  {  2, 0x1e0 },	/* Correctable ECC, ch 0 */
+  {  3, 0x9e0 },	/* Correctable ECC, ch 1 */
+  {  4, 0x1dc },	/* Uncorrectable CAPE, ch 0 */
+  {  5, 0x9dc },	/* Uncorrectable CAPE, ch 1 */
+  { 31, 0x1a4 },	/* Uncorrectable ECC, ch 0 */
+  { 32, 0x9a4 }		/* Uncorrectable ECC, ch 1 */
+};
+
+static void
+mcu_gbox_fixup(McuRec * mr, int num, MceInfo * mi)
+{
+  int i;
+  uint16_t	mcode;
+
+  /*
+   * Skip if Status.Misc_v set
+   */
+  if (mi->status & (1ULL << 59))
+    return;
+
+  /*
+   * Get model code and if it's in the array, then read
+   * the addressed register into MISC. We don't set the
+   * Status.Misc_v bit because we want to distinguish
+   * this hack from the real MCA bank register.
+   */
+  mcode = GET_BITS(31, 16, mi->status);
+  for(i = 0; i < ARRAY_SIZE(liu); i++)
+    if (liu[i].mcode == mcode) {
+      mi->misc = (uint64_t) mr->rl(num, liu[i].base);
+      break;
+    }
+}
+#endif 
+
+/*
+ * Read Ctrl, Addr and Misc registers from an un-core MCA bank.
+ * The Status register is read/cleared in mcu_scan().
+ */
+
+static void
+mcu_read(McuRec * mr, int num, MceInfo * mi)
+{
+  if (mr->qflg & MCU_CTL_64)
+    mi->ctl = mr->rq(num, mr->ofs + MCU_CTRL);
+  else
+    mi->ctl = (uint64_t) mr->rl(num, mr->ofs + MCU_CTRL);
+
+  if (mr->qflg & MCU_NO_ADDR)
+    mi->addr = 0;
+  else {
+    if (mr->qflg & MCU_ADDR_32)
+      mi->addr = (uint64_t) mr->rl(num, mr->ofs + MCU_ADDR);
+    else
+      mi->addr = mr->rq(num, mr->ofs + MCU_ADDR);
+  }
+
+  if (mr->qflg & MCU_NO_MISC)
+    mi->misc = 0;
+  else {
+    if (mr->qflg & MCU_MISC_64)
+      mi->misc = mr->rq(num, mr->ofs + MCU_MISC);
+    else
+      mi->misc = (uint64_t) mr->rl(num, mr->ofs + MCU_MISC);
+  }
+
+#if GBOX_BROKEN
+  if (mr->org == MC_ORG_GBOX)
+    mcu_gbox_fixup(mr, num, mi);
+#endif 
+}
+
+
+/*
+ * Reset one un-core MCA bank
+ * Any quirks go here.
+ */
+
+static void
+mcu_reset(McuRec * mr, int num, int arm)
+{
+  uint64_t	ctl;
+
+  mr->wq(num, mr->ofs + MCU_STAT, 0);
+
+  if (! (mr->qflg & MCU_NO_ADDR)) {
+    if (mr->qflg & MCU_ADDR_32)
+      mr->wl(num, mr->ofs + MCU_ADDR, 0);
+    else
+      mr->wq(num, mr->ofs + MCU_ADDR, 0);
+  }
+
+  if (! (mr->qflg & MCU_NO_MISC)) {
+    if (mr->qflg & MCU_MISC_64)
+      mr->wq(num, mr->ofs + MCU_MISC, 0);
+    else
+      mr->wl(num, mr->ofs + MCU_MISC, 0);
+  }
+
+  ctl = arm ? mr->ctl : 0;
+
+#ifdef CONFIG_MK1OM
+  if (ctl && mr->org == MC_ORG_SBOX && mic_hw_stepping(0) == KNC_A_STEP)
+    ctl &= ~PUT_BIT(3, 1);	/* A0 SBOX 'unclaimed address' bug */
+
+  if (ctl && mr->org == MC_ORG_GBOX && mr_mch() != 16)
+    ctl &= ~(uint64_t) PUT_BIT(6, 1);	/* B0 GBOX 'Invalid Channel' (SKU 3 & 4) */
+#endif
+
+  if (mr->qflg & MCU_CTL_64)
+    mr->wq(num, mr->ofs + MCU_CTRL, ctl);
+  else
+    mr->wl(num, mr->ofs + MCU_CTRL, ctl);
+}
+
+
+/*
+ * Un-core MC bank pre-scan
+ * Walk through all un-core MC sources to see if any events are pending.
+ * Stops on 1st match where STATUS has both VAL bit set. On some BOXes,
+ * like GBOX, interrupt may be signalled without the EN bit being set.
+ * See HSD 4116374 for details.
+ */
+
+static int
+mcu_prescan(void)
+{
+  int		i, j;
+  uint64_t	status;
+  struct _mcu_rec * mr;
+
+  for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
+    mr = mcu_src + i;
+
+#ifdef CONFIG_MK1OM
+    if (mr->org == MC_ORG_TBOX && !mr_txs())
+      continue;
+#endif
+
+    for(j = 0; j < mr->num; j++) {
+      status = mr->rq(j, mr->ofs + MCU_STAT);
+      if (status & MCI_STATUS_VAL)
+         return 1;
+    }
+  }
+
+  return 0;
+}
+
+
+/*
+ * Un-core MC bank scanner.
+ * Walks through all un-core MC sources for new events.
+ * If any found, then process them same way as core events.
+ */
+
+static int
+mcu_scan(void)
+{
+  MceInfo	mc, uc;
+  int		gone, seen;
+  int		i, j;
+  struct _mcu_rec * mr;
+
+  /*
+   * Walk list of known un-core MC sources
+   */
+  gone = seen = 0;
+  memset(&uc, 0, sizeof(uc));
+  for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
+    mr = mcu_src + i;
+
+#ifdef CONFIG_MK1OM
+    if (mr->org == MC_ORG_TBOX && !mr_txs())
+      continue;
+#endif
+
+    for(j = 0; j < mr->num; j++) {
+
+      /*
+       * Read status to see if we have something of interest.
+       * As per HSD 4116374 the status register is cleared
+       * after read, if it had valid content.
+       *TBD: Clear unconditionally?
+       */
+      mc.status = mr->rq(j, mr->ofs + MCU_STAT);
+      if (mc.status & MCI_STATUS_VAL)
+        mr->wq(j, mr->ofs + MCU_STAT, 0);
+      else
+        continue;
+
+      /*
+       * Bank had valid content (VAL bit set).
+       * Verify the event was subscribed to (EN bit set).
+       * If not, the event is ignored.
+       */
+      if (! (mc.status & MCI_STATUS_EN))
+	continue;
+
+      /*
+       * Valid and enabled event, read remaining bank registers.
+       */
+      seen++;
+      mcu_read(mr, j, &mc);
+
+      /*
+       * Fill out blanks in the MceInfo record
+       */
+      mc.org = mr->org;
+      mc.id = j;
+      mc.stamp = get_seconds();
+      mc.flags = (mc.status & MCI_STATUS_UC) ? MC_FLG_FATAL : 0;
+
+      /*
+       * If any way to detect injected errors then this is
+       * the place to do so and indicate by MC_FLG_FALSE flag
+       */
+
+      if (mc.flags & MC_FLG_FATAL) {
+#ifdef CONFIG_MK1OM
+#if MC_VERBOSE
+	ee_printk("Uncore fatal MC: org %d, id %d, status %lx\n", mc.org, mc.id, mc.status);
+#endif
+
+        /*
+         * Log UC events in the eeprom.
+         */
+        micras_mc_log(&mc);
+        mc.flags |= MC_FLG_LOG;
+    
+        /*
+         * Notify SMC that we've had a serious machine check error.
+         */
+        micras_mc_ipmi(&mc, 1);
+#endif
+	/*
+	 * Remember 1st fatal (UC) event
+	 */
+        if (! gone++)
+	  uc = mc;
+      }
+    
+      /*
+       * Notify host
+       */
+      micras_mc_send(&mc, 1);
+
+      /*
+       * Filter corrected errors.
+       */
+      if (! (mc.flags & MC_FLG_FATAL)) {
+	uint64_t	tsc, msk;
+
+	tsc = rdtsc();
+        msk = micras_mc_filter(&mc, tsc, 1);
+	if (msk) {
+#if MC_VERBOSE
+	  ee_printk("Uncore filter: org %d, id %d, ctrl %lx, mask %lx\n", mc.org, mc.id, mc.ctl, msk);
+#endif
+	  if (mr->qflg & MCU_CTL_64)
+	    mr->wq(j, mr->ofs + MCU_CTRL, mc.ctl & ~msk);
+	  else
+	    mr->wl(j, mr->ofs + MCU_CTRL, (uint32_t)(mc.ctl & ~msk));
+	}
+      }
+
+      /*
+       * Any event post processing goes here.
+       * This would be things like cache line refresh and such.
+       * Actual algorithms are TBD.
+       */
+    }
+  }
+
+#if RAS_HALT
+  if (gone) {
+    atomic_inc(&mce_entry);
+    panic("FATAL un-core machine check event:\n"
+          "bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n",
+	  uc.org, uc.id, uc.ctl, uc.status, uc.addr, uc.misc);
+  }
+#endif
+
+  return seen;
+}
+
+
+/*
+ * NMI handler.
+ *
+ * Once we get control in 1st interrupt (NMI or regular), we'll
+ * use IPIs from the local APIC to force all active CPU's into
+ * our RAS NMI handler, similar to the core MC handler.
+ * After that, the same logic as for the generic MC handler is
+ * applied to corral all CPU's through well defined rendez-vous
+ * points where only one cpu gets to run the un-core MC event
+ * scan while everybody else are sitting in a holding pen.
+ * If containment wasn't an issue we could simply let the BP
+ * run the scan without involving other CPUs at all.
+ */
+
+#define SPINUNIT	50
+#define SERIAL_MCU	0
+
+struct cpumask	mcu_exc_mask;		/* NMI recipients */
+static int	mcu_cpu = -1;		/* SBOX target CPU */
+#if MCU_NMI
+static uint64_t	mcu_redir;		/* SBOX I/O-APIC redirection entry */
+static uint64_t	mcu_old_redir;		/* Restore value for redirection entry */
+#else
+unsigned int	mcu_eoi;		/* 1st interrupt from local APIC */
+#endif
+static atomic_t mcu_callin;		/* Entry rendez-vous gate */
+static atomic_t mcu_leavin;		/* Hold rendez-vous gate */
+
+
+static int
+mcu_timed_out(int64_t * timeout)
+{
+  if (*timeout < SPINUNIT)
+    return 1;
+
+  *timeout -= SPINUNIT;
+  touch_nmi_watchdog();
+  ndelay(SPINUNIT);
+
+  return 0;
+}
+
+
+static int
+mcu_wait(void)
+{
+  int		cpus, order;
+  int64_t	timeout;
+
+  cpus = num_online_cpus();
+  timeout = 1 * NSEC_PER_SEC;		/* 1 Second */
+
+  /*
+   * Flush all caches
+   */
+
+  /*
+   * 'Entry' rendez-vous point.
+   * Wait here until all CPUs has entered.
+   */
+  order = atomic_inc_return(&mcu_callin);
+  while(atomic_read(&mcu_callin) != cpus) {
+    if (mcu_timed_out(&timeout)) {
+      /*
+       * Timout waiting for CPU enter rendez-vous
+       */
+      return -1;
+    }
+  }
+
+  /*
+   * 'Hold' rendez-vous point.
+   * All CPUs drop by here 'simultaneously'.
+   * The first CPU that 'enter'ed (order of 1) will
+   * fall thru while the others wait until their
+   * number number comes up in the 'leavin' counter
+   * (or if a timeout happens). This also has a
+   * serializing effect, where one CPU leaves this
+   * loop at a time.
+   */
+  if (order == 1) {
+#if SERIAL_MCU
+    atomic_set(&mcu_leavin, 1);
+#endif
+  }
+  else {
+    while(atomic_read(&mcu_leavin) < order) {
+      if (mcu_timed_out(&timeout)) {
+        /*
+         * Timout waiting in CPU hold rendez-vous
+         */
+        return -1;
+      }
+    }
+  }
+
+  return order;
+}
+
+
+static int
+mcu_go(int order)
+{
+  int		ret;
+  int64_t	timeout;
+
+  ret = -1;
+  if (order < 0)
+    goto mcu_reset;
+
+#if SERIAL_MCU
+  /*
+   * If any 'per-CPU' activity is needed in isolation
+   * (one CPU at a time) then that code needs to go here.
+   */
+
+  atomic_inc(&mcu_leavin);		/* Next CPU out of hold */
+#endif
+
+  timeout = NSEC_PER_SEC;		/* 1 Second */
+  if (order == 1) {
+    int		cpus;
+
+    /*
+     * The first CPU that entered (order of 1) waits here
+     * for the others to leave the 'hold' loop in mca_wait()
+     * and enter the 'exit' rendez-vous loop below.
+     * Once they are there, it will run the uncore MCA bank
+     * scan while the others are parked in 'exit' loop below.
+     */
+    cpus = num_online_cpus();
+#if SERIAL_MCU
+    while(atomic_read(&mcu_leavin) <= cpus) {
+      if (mcu_timed_out(&timeout)) {
+        /*
+         * Timout waiting for CPU exit rendez-vous
+         */
+        goto mcu_reset;
+      }
+    }
+#else
+    atomic_set(&mcu_leavin, cpus);
+#endif
+    mcu_scan();
+    ret = 0;
+  }
+  else {
+    /*
+     * Exit rendez-vous point.
+     */
+    while(atomic_read(&mcu_leavin) != 0) {
+      if (mcu_timed_out(&timeout)) {
+        /*
+         * Timout waiting in CPU exit rendez-vous
+         */
+        goto mcu_reset;
+      }
+    }
+    return 0;
+  }
+
+  /*
+   * Reset rendez-vous counters, letting all CPUs
+   * leave this function 'simultaneously'.
+   */
+mcu_reset:
+  atomic_set(&mcu_callin, 0);
+  atomic_set(&mcu_leavin, 0);
+  return ret;
+}
+
+
+/*
+ * NMI exception handler
+ * Uncertain if all cpumask_* functions implies barriers,
+ * so erroring on the safe side explicit barriers is used.
+ */
+
+#if BEAM_TEST
+static int
+mcu_nmi(int cpu)
+{
+#ifdef CONFIG_MK1OM
+  uint32_t	mcg_status_lo, mcg_status_hi;
+#endif
+  struct _mcu_rec * mr;
+  MceInfo	mc;
+  int		i, j;
+
+  if (cpu != mcu_cpu)
+    return 0;
+
+  if (! mcu_prescan())
+    return 0;
+
+  wbinvd();
+
+#ifdef CONFIG_MK1OM
+  rdmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi);
+  wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo | MCG_STATUS_MCIP, mcg_status_hi);
+#endif
+
+  for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
+    mr = mcu_src + i;
+
+#ifdef CONFIG_MK1OM
+    if (mr->org == MC_ORG_TBOX && !mr_txs())
+      continue;
+#endif
+
+    for(j = 0; j < mr->num; j++) {
+      mc.status = mr->rq(j, mr->ofs + MCU_STAT);
+
+      if (! (mc.status & MCI_STATUS_VAL))
+        continue;
+
+      if (! (mc.status & MCI_STATUS_EN)) {
+        mr->wq(j, mr->ofs + MCU_STAT, 0);
+	continue;
+      }
+
+      mcu_read(mr, j, &mc);
+      mr->wq(j, mr->ofs + MCU_STAT, 0);
+
+      mc.org = mr->org;
+      mc.id = j;
+      mc.stamp = get_seconds();
+      mc.flags = (mc.status & MCI_STATUS_UC) ? MC_FLG_FATAL : 0;
+    
+      micras_mc_send(&mc, 1);
+    }
+  }
+
+#ifdef CONFIG_MK1OM
+  wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi);
+#endif
+  return 1;
+  
+  /*
+   * Damn compiler options !!!!!!
+   * Don't want more changes than this routine, so
+   * added dummies to shut up gcc about unused code.
+   */
+  i = mcu_wait();
+  mcu_go(i);
+}
+#else
+
+static atomic_t mcu_entry;
+
+static int
+mcu_nmi(int cpu)
+{
+#ifdef CONFIG_MK1OM
+  uint32_t	mcg_status_lo, mcg_status_hi;
+#endif
+  int		order, eoi;
+
+  atomic_inc(&mcu_entry);
+
+  /*
+   * Get MCA status from SBOX.
+   */
+#if 0
+  /*
+   * If no source bits set, this was not an un-core MCA
+   * This would work if the SBOX_MCA_INT_STAT actually worked
+   * as described both in HAS and register specification.
+   * Unfortunately, it doesn't, as per tribal knowledge errata.
+   */
+  uint32_t	int_stat, int_en;
+
+  int_en = mr_sbox_rl(0, SBOX_MCA_INT_EN);
+  int_stat = mr_sbox_rl(0, SBOX_MCA_INT_STAT);
+  if (! (int_en & int_stat)) {
+    atomic_dec(&mcu_entry);
+    return 0;
+  }
+#else
+  /*
+   * Instead of having a single source of pending un-core MCA events,
+   * we now have to walk all BOXes to check if there is a valid event
+   * pending in one of them.  That is much more expensive as we have
+   * to check this on all NMIs, including our own cascade NMIs used
+   * to corrall all CPUs in their rendezvouz point(s). We try to avoid
+   * this scan if there already is an un-core NMI in progress.
+   * We know that:
+   *  un-core MCA NMIs are sent to just one CPU, mcu_cpu
+   *  CPUs targeted in the cascade are in mcu_exc_mask
+   *  non-zero atomic variable 'mcu_callin' tells cascade is in progress
+   */
+  if (!cpumask_empty(&mcu_exc_mask))
+    goto invited;
+  if (cpu != mcu_cpu) {
+    atomic_dec(&mcu_entry);
+    return 0;
+  }
+
+  /*
+   * On CPU 0 and no un-core handling in progress!
+   * Then scan all BOXes for valid events pending,
+   * If there wasn't any, this is a false alarm and
+   * we'll re-connect MC lines and return.
+   */
+  if (! mcu_prescan()) {
+    atomic_dec(&mcu_entry);
+    return 0;
+  }
+
+invited:
+#endif
+
+  /*
+   * Flush all caches.
+   * This is uncore so it should not be necessary to
+   * empty internal (L1) caches, doesn't harm either.
+   */
+  wbinvd();
+
+  /*
+   * We do not want to be interrupted by a core MC
+   * exception while handling an NMI.  We can block
+   * core MC events by setting the MCG_STATUS_MCIP.
+   * This is a MSR, so it has to be done on all CPUs.
+   * On KnC that is, KnF does not have that MSR.
+   */
+#ifdef CONFIG_MK1OM
+  rdmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi);
+  wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo | MCG_STATUS_MCIP, mcg_status_hi);
+#endif
+
+  /*
+   * Special for the SBOX NMI target CPU:
+   *  - disconnect un-core MC lines from SBOX I/O-APIC, such
+   *    that we don't get stacked NMIs in the Local APICs.
+   *  - simulate a NMI broadcast by sending NMI to all _other_
+   *    active CPUs via IPIs.  The SBOX could do a broadcast,
+   *    but that will send NMIs to sleeping CPUs too, which
+   *    we prefer to avoid if possible.
+   *TBD: should creating the mcu_exc_mask be protected by
+   *     lock, similar to core events? Who can interfere?
+   */
+  if (cpu == mcu_cpu) {
+    mr_sbox_wl(0, SBOX_MCA_INT_EN, 0);
+    cpumask_copy(&mcu_exc_mask, cpu_online_mask);
+    cpumask_clear_cpu(cpu, &mcu_exc_mask);
+    smp_wmb();
+    // apic->send_IPI_mask(&mcu_exc_mask, NMI_VECTOR);
+    apic->send_IPI_allbutself(NMI_VECTOR);
+#if !MCU_NMI
+    if (mcu_eoi) {
+      smp_rmb();
+      cpumask_set_cpu(cpu, &mcc_exc_mask);
+      smp_wmb();
+      mcu_eoi = 0;
+    }
+#endif
+  }
+
+  /*
+   * Corral all CPUs through the rendez-vous point maze.
+   * It guarantees that:
+   *   - No CPU leaves mcu_wait() until all has entered.
+   *   - One CPU leaves mcu_wait() at a time.
+   *   - No CPU leaves mcu_go() until all has entered.
+   *   - While one CPU is in transit between mcu_wait()
+   *     and mcu_go(), all other CPUs are sitting in
+   *     tight busy-wait loops in either function.
+   *   - All CPUs leaves mcu_go() at the same time.
+   * If there is any 'per-cpu' activity that needs to be
+   * run in isolation, it must be placed between mcu_wait()
+   * and mcu_go().
+   */
+  order = mcu_wait();
+  if (mcu_go(order)) {
+    /*
+     * Timeout waiting at one of the rendez-vous points.
+     * Scan the un-core MCA banks just in case.
+     */
+    mcu_scan();
+  }
+
+  /*
+   * Special for the SBOX NMI target CPU:
+   *  - reconnect un-core MC lines through to SBOX I/O-APIC.
+   *    If new events already are pending, then this will
+   *    result in a 'rising-edge' trigger to the I/O-APIC.
+   */
+  if (cpu == mcu_cpu)
+    mr_sbox_wl(0, SBOX_MCA_INT_EN, mr_txs() ? 0x0fffff07 : 0xff07);
+
+  /*
+   * If this CPU got its NMI from an IPI, then it must
+   * send an ACK to its local APIC (I think).
+   */
+  smp_rmb();
+  eoi = cpumask_test_and_clear_cpu(cpu, &mcu_exc_mask);
+  smp_wmb();
+  if (eoi)
+    ack_APIC_irq();
+
+  /*
+   * Restore core MCG status and return 1 indicating to the
+   * kernel NMI handler we've handled it.
+   *TBD: reduce to one write per core instead of one per thread?
+   */
+#ifdef CONFIG_MK1OM
+  wrmsr(MSR_IA32_MCG_STATUS, mcg_status_lo, mcg_status_hi);
+#endif
+  atomic_dec(&mcu_entry);
+  return 1;
+}
+#endif
+
+
+#if !MCU_NMI
+/*
+ * MCA handler if using standard interrupts
+ * It's just a trampoline to convert a regular interrupt
+ * into an NMI, which is only needed if the I/O-APIC can't
+ * generate and NMI.
+ *
+ *TBD: remove all this? It is not used on KnC, and the KnF's
+ *     I've tested this on all have been OK sending NMIs.
+ */
+
+static irqreturn_t
+sbox_handler(int irq, void * tag)
+{
+  /*
+   * Convert this regular interrupt into an NMI.
+   */
+  mcu_cpu = smp_processor_id();
+  mcu_eoi = 1;
+  apic->send_IPI_self(NMI_VECTOR);
+  return IRQ_HANDLED;
+}
+#endif
+
+
+/*
+ * Reset all uncore MCA banks to defaults
+ */
+
+void
+box_reset(int arm)
+{
+  int		i, j;
+  struct _mcu_rec * mr;
+
+  for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
+    mr = mcu_src + i;
+
+#ifdef CONFIG_MK1OM
+    if (mr->org == MC_ORG_TBOX && !mr_txs())
+      continue;
+#endif
+
+    for(j = 0; j < mr->num; j++) {
+      uint64_t	status;
+
+      /*
+       *TBD: Do we want to pick up existing MCA events or drop
+       *     them because we don't know _when_ they occurred?
+       *     Reporting them would require internal buffer because
+       *     it's unlikely the SCIF MC session is up at this point.
+       *     For now we just enter events into the system log.
+       */
+      status = mr->rq(j, mr->ofs + MCU_STAT);
+      if (status & MCI_STATUS_VAL) {
+	MceInfo		mc;
+
+	mcu_read(mr, j, &mc);
+	printk("RAS.uncore: discard MC event:\n"
+	  "bnk %d, id %d, ctl %llx, stat %llx, addr %llx, misc %llx\n",
+	  mr->org, j, mc.ctl, status, mc.addr, mc.misc);
+      }
+
+      /*
+       * Reset MCA bank registers.
+       */
+      mcu_reset(mr, j, arm);
+    }
+  }
+}
+
+
+/*
+ * Setup interrupt handlers by hooking into the SBOX's I/O-APIC.
+ * For now, we send an NMI to single CPU, and let it process the
+ * event. This may need to be expanded into a broadcast NMI similar
+ * to what the generic core MC event handler does in order to keep
+ * containment at high as we possibly can.
+ *
+ *TBD: code a dual rendez-vous mechanism on all active CPUs.
+ */
+
+int __init
+mcu_init(void)
+{
+#if MC_VERBOSE
+  int		i, j;
+#endif
+
+  if (mce_disabled) {
+    printk("RAS.uncore: disabled\n");
+  }
+  else {
+    /*
+     * Clear rendez-vous counters
+     */
+    atomic_set(&mcu_callin, 0);
+    atomic_set(&mcu_leavin, 0);
+
+#if MC_VERBOSE
+    /*
+     * For debug only:
+     * Record all SBOX I/O-APIC registers to kernel log
+     */
+    printk("SBOX_APICIDR: %lx\n", mr_sbox_rl(0, SBOX_APICIDR));
+    printk("SBOX_APICVER: %lx\n", mr_sbox_rl(0, SBOX_APICVER));
+    printk("SBOX_APICAPR: %lx\n", mr_sbox_rl(0, SBOX_APICAPR));
+    for(i = 0; i < 26 ; i++)
+      printk("APICCRT%d: %llx\n", i, mr_sbox_rq(0, SBOX_APICRT0 + (8 * i)));
+    for(i = 0; i < 8 ; i++)
+      printk("APICICR%d: %llx\n", i, mr_sbox_rq(0, SBOX_APICICR0 + (8 * i)));
+    printk("SBOX_MCA_INT_EN: %lx\n", mr_sbox_rl(0, SBOX_MCA_INT_EN));
+    printk("SBOX_MCA_INT_STAT: %lx\n", mr_sbox_rl(0, SBOX_MCA_INT_STAT));
+#endif
+
+    /*
+     * Disconnect un-core MC lines from SBOX I/O-APIC, setup the
+     * individual BOXes, and clear any un-core MC pending flags
+     * from SBOX I/O-APIC
+     */
+    mr_sbox_wl(0, SBOX_MCA_INT_EN, 0);
+    box_reset(1);
+    mr_sbox_wl(0, SBOX_MCA_INT_STAT, 0);
+
+    /*
+     * Setup the SBOX I/O-APIC.
+     * Un-core MC events are routed through a mask in register
+     * SBOX_MCA_INT_EN into I/O APIC redirection table entry #16.
+     * Ideally we want all uncore MC events to be handled similar
+     * to core MCAs, which means we'd like an NMI on all CPUs.
+     * On KnF the I/O-APIC may not trigger an NMI (PoC security)
+     * and on KnC where NMI delivery is possible, it appears not
+     * to be ideal to broadcast it to all CPUs because it could
+     * wake up cores put to sleep bu power management rules.
+     * See MCA HAS, SBOX HAS Vol 4, and A0 Vol 2 for details.
+     *
+     * The redirection table entry has the following format:
+     *  47:32	Destination ID field
+     *     17	Interrrupt set (testing: trigger an interrupt)
+     *     16	Interrupt mask (0=enable, 1=disable)
+     *     15	Trigger mode (0=edge, 1=level)
+     *     14	Remote IRR (0=inactive, 1=accepted)
+     *     13	Interrupt polarity (0=active_high, 1=active_low)
+     *     12   Delivery status (0=idle, 1=send_pending)
+     *     11	Destination mode (0=physical, 1=logical)
+     *  10:8	Delivery mode (0=fixed, low, SMI, rsvd, NMI, INIT, rsvd, ext)
+     *   7:0	Interrupt vector
+     *
+     * The I/O-APIC input is 'rising edge', so we'd need to select
+     * it to be edge triggered, active high.
+     */
+#if MCU_NMI
+    /*
+     * If event delivery by NMI is preferred, we want it delivered on
+     * the BP. There is already an NMI handler present, so we have to
+     * tap into the existing NMI handler for the event notifications.
+     *
+     * The bit-fiddling below says:
+     *   NMI delivery | Destination CPU APIC ID
+     */
+    mcu_cpu = 0;
+    mcu_redir = PUT_BITS(10, 8, 4) | PUT_BITS(47, 32, (uint64_t) cpu_data(mcu_cpu).apicid);
+    mcu_old_redir = mr_sbox_rq(0, SBOX_APICRT16);
+    mr_sbox_wq(0, SBOX_APICRT16, mcu_redir | PUT_BITS(16, 16, 1));
+    mr_sbox_wq(0, SBOX_APICRT16, mcu_redir);
+#else
+    /*
+     * If event delivery by regular interrupt is preferred, then all
+     * I/O-APIC setup will be handled by calling request_irq(16,..).
+     * There is no guarantee that the event will be sent to the BP
+     * (though it's more than likely) so we'll defer indentifying the
+     * event handling CPU (mcu_cpu) till we receive the callback from
+     * the interrupt handling sus-system.
+     * The sbox_handler() function just converts the callback into an
+     * NMI because the only way containment can be achieved is to be
+     * able to lock down the system completely, which is not realistic
+     * using regular interrupts.
+     */
+    mcu_eoi = 0;
+    (void) request_irq(16, sbox_handler, IRQF_TRIGGER_HIGH, "un-core mce", (void *) 42);
+#endif
+
+    /*
+     * Finally, place hook in NMI handler in case there's
+     * an un-core event pending and connect un-core MC lines
+     * through to SBOX I/O-APIC. From this point onwards we
+     * can get uncore MC events at any time.
+     */
+    mca_nmi = mcu_nmi;
+    mr_sbox_wl(0, SBOX_MCA_INT_EN, mr_txs() ? 0x0fffff07 : 0xff07);
+
+#if MC_VERBOSE
+    /*
+     * For debug only
+     * Record initial uncore MCA banks to kernel log.
+     */
+    printk("RAS.uncore: dumping all banks\n");
+
+    /*
+     * Dump all MCA registers we set to kernel log
+     */
+    for(i = 0; i < ARRAY_SIZE(mcu_src); i++) {
+      char * boxname;
+      struct _mcu_rec * mr;
+      uint64_t  ctl, stat, addr, misc;
+
+      mr = mcu_src + i;
+#ifdef CONFIG_MK1OM
+      if (mr->org == MC_ORG_TBOX && !mr_txs())
+        continue;
+#endif
+      switch(mr->org) {
+        case MC_ORG_SBOX: boxname = "SBOX";	break;
+        case MC_ORG_DBOX: boxname = "DBOX";	break;
+        case MC_ORG_GBOX: boxname = "GBOX";	break;
+        case MC_ORG_TBOX: boxname = "TBOX";	break;
+	default:	  boxname = "??";	/* Damn compiler */
+      }
+
+      for(j = 0; j < mr->num; j++) {
+
+	if (mr->qflg & MCU_CTL_64)
+	  ctl = mr->rq(j, mr->ofs + MCU_CTRL);
+	else
+	  ctl = (uint64_t) mr->rl(j, mr->ofs + MCU_CTRL);
+
+        stat = mr->rq(j, mr->ofs + MCU_STAT);
+
+	if (mr->qflg & MCU_NO_ADDR)
+	  addr = 0;
+	else {
+	  if (mr->qflg & MCU_ADDR_32)
+	    addr = (uint64_t) mr->rl(j, mr->ofs + MCU_ADDR);
+	  else
+	    addr = mr->rq(j, mr->ofs + MCU_ADDR);
+	}
+
+	if (mr->qflg & MCU_NO_MISC)
+	  misc = 0;
+	else {
+	  if (mr->qflg & MCU_MISC_64)
+	    misc = mr->rq(j, mr->ofs + MCU_MISC);
+	  else
+	    misc = (uint64_t) mr->rl(j, mr->ofs + MCU_MISC);
+	}
+
+        printk("RAS.uncore: %s[%d] = { %llx, %llx, %llx, %llx }\n",
+		boxname, j, ctl, stat, addr, misc);
+      }
+    }
+    printk("RAS.uncore: MCA_INT_EN = %x\n", mr_sbox_rl(0, SBOX_MCA_INT_EN));
+    printk("RAS.uncore: APICRT16 = %llx\n", mr_sbox_rq(0, SBOX_APICRT16));
+#endif
+
+    printk("RAS.uncore: init complete\n");
+  }
+
+  return 0;
+}
+
+
+/*
+ * Cleanup for module unload.
+ * Clear/restore hooks in the SBOX's I/O-APIC.
+ */
+
+int __exit
+mcu_exit(void)
+{
+  if (! mce_disabled) {
+
+    /*
+     * Disconnect uncore MC lines from SBOX I/O-APIC.
+     * No new uncore MC interrupts will be made.
+     */
+    mr_sbox_wl(0, SBOX_MCA_INT_EN, 0);
+
+    /*
+     * Disconnect exception handler.
+     */
+#if MCU_NMI
+    mcu_redir = 0;
+    mr_sbox_wq(0, SBOX_APICRT16, mcu_old_redir);
+#else
+    mcu_eoi = 0;
+    free_irq(16, (void *) 42);
+#endif
+
+    /*
+     * Cut link from kernel's NMI handler and
+     * wait for everybody in handler to leave.
+     */
+    mca_nmi = 0;
+    while(atomic_read(&mcu_entry))
+      cpu_relax();
+    mcu_cpu = -1;
+
+    /*
+     * No more events will be received, clear
+     * MC reporting in all BOXes (just in case)
+     */
+    box_reset(0);
+  }
+
+  printk("RAS.uncore: exit complete\n");
+  return 0;
+}
+
diff --git a/ras/monahan.h b/ras/monahan.h
new file mode 100644
index 0000000..4f3fd1f
--- /dev/null
+++ b/ras/monahan.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * The Monahan GX processor implementation of the I2C unit does not support
+ * the hardware general call, 10-bit slave addressing or CBUS compatibility.
+ * Otherwise it is compliant with I2C spec version 2.1.
+ *
+ * This is the SBOX 'OverClock' bus controller, which for reference is
+ * mostly like the I2C controller on PXA270 with the above limitations.
+ */
+
+#ifndef _MONAHAN_H_
+#define _MONAHAN_H_	1
+
+/*
+**
+** Layer 1 stuff
+**
+** Offsets and bit definitions for the Monahans I2C controller.
+** This is equivalent to defines in 'i2c-pxa.c', but kept separate.
+*/
+
+/*
+ * Register locations (base SBOX register SBOX_OC_I2C_ICR)
+ */
+#define ICR_OFFSET		0x00
+#define ISR_OFFSET		0x04
+#define ISAR_OFFSET		0x08
+#define IDBR_OFFSET		0x0c
+#define IBMR_OFFSET		0x10
+
+/*
+ * I2C Control Register bits
+ */
+#define ICR_START		0x00000001	/* Start bit */
+#define ICR_STOP		0x00000002	/* Stop bit */
+#define ICR_ACKNAK		0x00000004	/* Send ACK(0) or NAK(1) */
+#define ICR_TB			0x00000008	/* Transfer byte bit */
+#define ICR_MA			0x00000010	/* Master abort */
+#define ICR_SCLE		0x00000020	/* Master clock enable */
+#define ICR_IUE			0x00000040	/* Unit enable */
+#define ICR_GCD			0x00000080	/* General call disable */
+#define ICR_ITEIE		0x00000100	/* Enable tx interrupts */
+#define ICR_DRFIE		0x00000200	/* Enable rx interrupts */
+#define ICR_BEIE		0x00000400	/* Enable bus error ints */
+#define ICR_SSDIE		0x00000800	/* Slave STOP detected int enable */
+#define ICR_ALDIE		0x00001000	/* Enable arbitration interrupt */
+#define ICR_SADIE		0x00002000	/* Slave address detected int enable */
+#define ICR_UR			0x00004000	/* Unit reset */
+#define ICR_MODE		0x00018000	/* Bus speed mode */
+#define ICR_RESERVED		0xfffe0000	/* Unused */
+
+/*
+ * Bus speed control values
+ * High speed modes are not supported by controller.
+ */
+#define ICR_STANDARD_MODE       0x00000000	/* 100k operation */
+#define ICR_FAST_MODE           0x00008000	/* 400k operation */
+#define ICR_HS_STANDARD_MODE	0x00010000	/* 3.4M/100k operation */
+#define ICR_HS_FAST_MODE        0x00018000	/* 3.4M/400k operation */
+
+/*
+ * Shorthands
+ */
+#define ICR_ON		(ICR_IUE | ICR_SCLE)	/* Turn unit on */
+#define ICR_INIT_BITS	(ICR_ITEIE | \
+			 ICR_DRFIE | \
+			 ICR_BEIE | \
+			 ICR_SADIE | \
+			 ICR_FAST_MODE | \
+			 ICR_ON)		/* Init flags */
+
+/*
+ * I2C Status Register bits
+ */
+#define ISR_RWM			0x00000001	/* Read(1)/write(0) mode */
+#define ISR_ACKNAK		0x00000002	/* Ack(0)/nak(1) sent or received */
+#define ISR_UB			0x00000004	/* Unit busy */
+#define ISR_IBB			0x00000008	/* Bus busy */
+#define ISR_SSD			0x00000010	/* Slave stop detected */
+#define ISR_ALD			0x00000020	/* Arbitration loss detected */
+#define ISR_ITE			0x00000040	/* Tx buffer empty */
+#define ISR_IRF			0x00000080	/* Rx buffer full */
+#define ISR_GCAD		0x00000100	/* General call address detected */
+#define ISR_SAD			0x00000200	/* Slave address detected */
+#define ISR_BED			0x00000400	/* Bus error no ACK/NAK */
+#define ISR_RESERVED		0xfffff800	/* Unused */
+
+#define ISR_INTS	(ISR_SSD | \
+			 ISR_ALD | \
+			 ISR_ITE | \
+			 ISR_IRF | \
+			 ISR_SAD | \
+			 ISR_BED)		/* Interrupt flags */
+/*
+ * I2C Slave Address Register bits
+ */
+#define ISAR_SLADDR		0x0000007f	/* 7-bit address for slave-receive mode */
+#define ISAR_RESERVED		0xffffff80	/* Unused */
+
+/*
+ * I2C Data Buffer Register bits
+ */
+#define IDBR_DATA		0x000000ff	/* 8-bit data buffer */
+#define IDBR_RESERVED		0xffffff00	/* Unused */
+
+/*
+ * I2C Bus Monitor Register bits
+ */
+#define IBMR_SDA		0x00000001	/* State of SDA pin */
+#define IBMR_SCL		0x00000002	/* State of SCL pin */
+#define IBMR_RESERVED		0xfffffffc	/* Unused */
+
+
+/*
+**
+** Layer 2 stuff
+**
+*/
+
+/*
+ * Bus speed selections
+ */
+#define	I2C_STANDARD		ICR_STANDARD_MODE
+#define	I2C_FAST		ICR_FAST_MODE
+#define	I2C_HS_STANDARD		ICR_HS_STANDARD_MODE
+#define	I2C_HS_FAST		ICR_HS_FAST_MODE
+
+/*
+ * Command types
+ */
+#define	I2C_INVALID		-1	/* Internal, not to be used */
+#define	I2C_WRITE		 0	/* Next transfer will be outgoing */
+#define	I2C_READ		 1	/* Next transfer will be incoming */
+#define	I2C_NOP			 2	/* Idle state */
+
+/*
+ * Return codes
+ */
+#define XFER_SUCCESS		  0	/* All OK */
+#define INCOMPLETE_XFER		 -1	/* Basic timeout */
+#define TX_CONTROLLER_ERROR	 -2	/* Requires reset */
+#define TX_NAK			 -3	/* NAK, master to send a stop */
+#define RX_SEVERE_ERROR		 -4	/* Requires reset */
+#define RX_END_WITHOUT_STOP	 -5	/* Deprecated */
+#define RX_BIZARRE_ERROR	 -6	/* Doesn't require reset */
+
+
+/*
+**
+** Layer 3 stuff
+**
+*/
+
+/*
+ * Frequency selections
+ */
+#define	FREQ_MAX		-3	/* As fast as possible */
+#define	FREQ_400K		-2	/* 400 kHz */
+#define	FREQ_100K		-1	/* 100 kHz */
+#define	FREQ_AUTO		 0	/* Default speed */
+
+/*
+ * Return codes: standard kernel codes used
+ *  EBUSY, ENODEV, ENXIO, EINVAL, EIO
+ */
+
+#endif /* Recursion block */
diff --git a/trace_capture/Kbuild b/trace_capture/Kbuild
new file mode 100644
index 0000000..bc12e70
--- /dev/null
+++ b/trace_capture/Kbuild
@@ -0,0 +1 @@
+obj-m := trace_capture.o
diff --git a/trace_capture/Makefile b/trace_capture/Makefile
new file mode 100644
index 0000000..199953a
--- /dev/null
+++ b/trace_capture/Makefile
@@ -0,0 +1,34 @@
+#
+# Trace Capture module
+#
+
+export ARCH = l1om
+
+KERNELDIR = $(CURDIR)/../../mic_linux
+KBUILD := $(MAKE) -C $(KERNELDIR) ARCH=$(ARCH) M=$(CURDIR)
+
+ifneq ($(DESTDIR),)
+INSTALL_MOD_PATH = $(DESTDIR)
+endif
+
+ifeq ($(shell \which x86_64-$(ARCH)-linux-gcc 2>/dev/null),)
+export PATH := $(PATH):$(CURDIR)/../cross/bin
+endif
+
+.PHONY: default modules install modules_install clean
+
+default: modules tests
+
+modules:
+	+$(KBUILD) $@
+
+install: modules_install
+
+modules_install:
+	+$(KBUILD) INSTALL_MOD_PATH=$(DESTDIR) modules_install
+
+clean:
+	+$(KBUILD) clean
+
+tests:
+	echo no tests
diff --git a/trace_capture/docapture.c b/trace_capture/docapture.c
new file mode 100644
index 0000000..587bebe
--- /dev/null
+++ b/trace_capture/docapture.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>		/* open */
+#include <unistd.h>		/* exit */
+#include <sys/ioctl.h>		/* ioctl */
+
+#include "trace_capture.h"
+
+void
+ioctl_start_capture(int file_desc, long trigger)
+{
+	ioctl(file_desc, MICTC_START_CAPTURE, trigger);
+}
+
+int
+main (int argc, char *argv[])
+{
+	int file_desc;
+	long trigger = 1;
+
+	if ((file_desc = open(MICTC_FILE_NAME, 0)) < 0) {
+		printf("Can't open device file: %s\n", MICTC_FILE_NAME);
+		exit(-1);
+	}
+
+	if (argc == 2) {
+		trigger = atoi(argv[1]);
+		printf("Trigger %ld\n", trigger);
+	}
+
+	ioctl_start_capture(file_desc, trigger);
+	printf("Done.\n");
+
+	close(file_desc);
+}
diff --git a/trace_capture/tc_host.c b/trace_capture/tc_host.c
new file mode 100644
index 0000000..5eecf7c
--- /dev/null
+++ b/trace_capture/tc_host.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include "../include/scif.h"
+#include "trace_capture.h"
+
+#define BARRIER(epd, string) { \
+        printf("%s\n", string); 						\
+	if ((err = scif_send(epd, &control_msg, sizeof(control_msg), 1)) <= 0) { \
+		printf("scif_send failed with err %d\n", errno); \
+		fflush(stdout); \
+		goto close; \
+	} \
+	if ((err = scif_recv(epd, &control_msg, sizeof(control_msg), 1)) <= 0) { \
+		printf("scif_recv failed with err %d\n", errno); \
+		fflush(stdout); \
+		goto close; \
+	} \
+}
+
+#if 0
+// These are common to the Host App
+// and the MIC driver Trace Capture Feature
+// COMMON DEFINES START HERE
+enum TRACE_COMMAND {
+	TRACE_NOP = 100,
+	TRACE_DATA,
+	TRACE_HOST_READY,
+	TRACE_DONE,
+	TRACE_ERROR,
+	TRACE_PRINT,
+	TRACE_GET_FILE,
+	TRACE_PAGE_READY,
+	TRACE_REG_COMPLETE,
+	TRACE_MEM_COMPLETE,
+	TRACE_COMPLETE
+};
+
+#define TRACE_STATUS_OFFSET   8
+#define TRACE_SIZE_OFFSET     12
+
+// Enable/Disable Memory Test.
+// This MUST be enabled simultaneously on Host App as well.
+#define MIC_TRACE_CAPTURE_MEMORY_TEST 0
+
+#if MIC_TRACE_CAPTURE_MEMORY_TEST
+#define TRACE_CHECKSUM_OFFSET 16
+#endif
+
+#define TRACE_TRIGGER_OFFSET  20
+#define TRACE_DATA_OFFSET     4096
+
+// Types of Triggers - Refer to uOS Trace Capture Wiki for Usage
+// Generic counter
+#define TRACE_HOST_GENERIC_COUNTER 0x1
+// Async Flip counter
+#define TRACE_HOST_FRAME_COUNTER   0x2
+// COMMON DEFINES END HERE
+#endif
+
+// End points for SCIF
+//static scif_epd_t mictc_epd_cmd;
+static scif_epd_t mictc_epd_data;
+
+// SCIF ports - temp hack; move to scif.h
+#define MICTC_SCIF_PORT_DATA 300
+
+static volatile uint64_t *g_traceBufferStatusOffset = NULL;
+static volatile uint64_t *g_traceBufferSizeOffset = NULL;
+static volatile uint32_t *g_traceBufferDataOffset = NULL;
+static volatile uint32_t *g_traceBufferTriggerOffset = NULL;
+
+// This is an array of trigger numbers.  The value TRACE_EOL is ignored.
+static uint32_t g_traceTriggers[TRACE_TRIGGER_MAX];
+
+static struct scif_portID portID_data;
+static scif_epd_t mictc_newepd;
+
+static void *g_mictc_buffer_base;
+static void *g_mictc_buffer_offset_xml;
+static off_t g_mictc_buffer_offset_mem;
+
+FILE *fp;
+
+static
+int open_scif_channels(void)
+{
+	int err;
+	struct pollfd spollfd;
+	int control_msg = 0;
+	long scif_offset_dst;
+	int timeout = 0;
+	int page_count = 0;
+	int i;
+
+	if ((err = posix_memalign(&g_mictc_buffer_base, 0x1000, MICTC_MEM_BUFFER_SIZE))) {
+		fprintf(stderr, "posix_memalign failed failed with %d\n", err);
+		return 0;
+	}
+	// Data channel
+	if ((mictc_epd_data = scif_open()) == SCIF_OPEN_FAILED) {
+		fprintf(stderr, "scif_open failed with ENOMEM\n", errno);
+		return 0;
+	}
+
+	if (scif_bind(mictc_epd_data, MICTC_SCIF_PORT_DATA) == -1) {
+		fprintf(stderr, "scif_bind failed with error %d\n", errno);
+		return 0;
+	}
+
+	portID_data.node = 1;
+	portID_data.port = MICTC_SCIF_PORT_DATA;
+
+	if (scif_listen(mictc_epd_data, 1) == -1) {
+		fprintf(stderr, "scif_listen failed with error %d\n", errno);
+		return 0;
+	}
+
+	while (1) {
+		printf("scif_accept in poll mode until a connect request is found\n");
+		err = 1;
+		while (err) {
+			spollfd.fd = scif_get_fd(mictc_epd_data);
+			spollfd.events = POLLIN;
+			spollfd.revents = 0;
+			if ((err = poll(&spollfd, 1, -1)) < 0) {
+				printf("poll failed with err %d\n", errno);
+			}
+			if (((err = scif_accept(mictc_epd_data, &portID_data, &mictc_newepd, 0)) < 0) && (errno != EAGAIN)) {
+				printf("scif_accept failed with err %d\n", errno);
+				return 0;
+			}
+		}
+
+		printf("scif_accept from port %d complete\n", portID_data.port);
+
+		if ((g_mictc_buffer_offset_mem = scif_register(mictc_newepd, g_mictc_buffer_base, MICTC_MEM_BUFFER_SIZE, 0,	// suggested_offset,
+													   SCIF_PROT_READ | SCIF_PROT_WRITE, 0)) < 0) {
+			fprintf(stderr, "scif_register failed with err %d\n", errno);
+			return 0;
+		}
+
+		printf("After scif_register, g_mictc_buffer_offset_mem = %llx\n",
+			   (unsigned long long)g_mictc_buffer_offset_mem);
+		fflush(stdout);
+
+		//  printf("Before scif_send\n");
+		//  fflush(stdout);
+
+		BARRIER(mictc_newepd, "before barrier");
+
+		if ((err =
+			 scif_send(mictc_newepd, &g_mictc_buffer_offset_mem, sizeof(g_mictc_buffer_offset_mem),
+					   SCIF_SEND_BLOCK)) <= 0) {
+			printf("scif_send failed with err %d\n", errno);
+			fflush(stdout);
+			goto close;
+		}
+		//    BARRIER(mictc_newepd, "scif_send");
+
+		//  printf("scif_offset = %lx\n", scif_offset);
+		//  fflush(stdout);
+
+		printf("Before scif_recv\n");
+		fflush(stdout);
+
+		if ((err = scif_recv(mictc_newepd, &scif_offset_dst, sizeof(scif_offset_dst), SCIF_RECV_BLOCK)) <= 0) {
+			printf("scif_recv failed with err %d\n", errno);
+			fflush(stdout);
+			goto close;
+		}
+		printf("scif_offset_dst = %lx\n", scif_offset_dst);
+
+		printf("Before scif_mmap\n");
+
+		if ((g_mictc_buffer_offset_xml = scif_mmap(0,	// physical address
+												   MICTC_XML_BUFFER_SIZE,	// length
+												   SCIF_PROT_READ | SCIF_PROT_WRITE,	// protection
+												   0,	// flags
+												   mictc_newepd,	// endpoint
+												   scif_offset_dst)	// offset
+			) == (void *)-1) {
+			fprintf(stderr, "scif_mmap failed with err %d\n", errno);
+			return 0;
+		}
+
+		g_traceBufferStatusOffset = (uint64_t *) (g_mictc_buffer_offset_xml + TRACE_STATUS_OFFSET);
+		g_traceBufferSizeOffset = (uint64_t *) (g_mictc_buffer_offset_xml + TRACE_SIZE_OFFSET);
+		g_traceBufferDataOffset = (uint32_t *) (g_mictc_buffer_offset_xml + TRACE_DATA_OFFSET);
+		g_traceBufferTriggerOffset = (uint32_t *) (g_mictc_buffer_offset_xml + TRACE_TRIGGER_OFFSET);
+
+		for (i = 0; i < TRACE_TRIGGER_MAX; i++) {
+		  *g_traceBufferTriggerOffset = g_traceTriggers[i];
+		  g_traceBufferTriggerOffset++;
+		}
+
+		*g_traceBufferStatusOffset = TRACE_HOST_READY;
+
+		printf("Before fopen\n");
+
+		if ((fp = fopen("cpu.xml", "w")) == NULL) {
+			fprintf(stderr, "Cannot open file cpu.xml.\n");
+		}
+
+		printf("Waiting for TRACE_REG_COMPLETE or TRACE_ABORTED");
+		fflush(stdout);
+
+		while (*g_traceBufferStatusOffset != TRACE_REG_COMPLETE) {
+			printf(".");
+			fflush(stdout);
+			sleep(1);
+			if (timeout++ >= 200) {
+				// Hmmm, something is hung up.  Save everything in the buffer ignoring length.
+				printf("Punt!\n");
+				fprintf(fp, "%s\n", (char *)g_traceBufferDataOffset);
+				*g_traceBufferStatusOffset = TRACE_GET_FILE;
+				fclose(fp);
+				sleep(5);
+				goto close;		// and quit
+			}
+			// If this happens the current trigger was not one we want -- reset and wait.
+			if (*g_traceBufferStatusOffset == TRACE_ABORTED) {
+				printf("\nAborted trace\n");
+				fflush(stdout);
+				goto close2;
+			}
+		}
+		printf("\n");
+
+		{
+			int j;
+
+			asm volatile ("lfence" ::: "memory");
+			j = *g_traceBufferSizeOffset;
+			fprintf(fp, "%*s\n", j, (char *)g_traceBufferDataOffset);
+		}
+		*g_traceBufferStatusOffset = TRACE_GET_FILE;
+		fclose(fp);
+		sleep(5);
+
+		// Memory dump
+
+		if ((fp = fopen("mem.dat", "w")) == NULL) {
+			fprintf(stderr, "Cannot open file mem.dat.\n");
+		}
+
+		printf("Waiting for memory pages\n");
+		fflush(stdout);
+
+		timeout = 0;
+
+		{
+			long i = 0;
+
+			while (*g_traceBufferStatusOffset != TRACE_MEM_COMPLETE) {
+			  	//printf("status %d\n", *g_traceBufferStatusOffset); 
+
+				if (*g_traceBufferStatusOffset == TRACE_PAGE_READY) {
+					printf(" %ld", i++);
+					fflush(stdout);
+					asm volatile ("lfence" ::: "memory");
+
+					if (fwrite(g_mictc_buffer_base, *g_traceBufferSizeOffset, 1, fp) != 1) {
+						fprintf(stderr, "\nCannot write file mem.dat.  error = %d\n", ferror(fp));
+						return 0;
+					}
+					*g_traceBufferStatusOffset = TRACE_HOST_READY;	// Get next page
+					timeout = 0;
+				} else {
+					//  printf(".");
+					//  fflush(stdout);
+					usleep(10000);
+
+					if (timeout++ >= 2000) {
+						// Hmmm, something is hung up.  Just close and quit.
+						printf("Punt!\n");
+						fclose(fp);
+						sleep(5);
+						goto close;	// and quit
+					}
+				}
+			}
+		}
+ close1:
+		printf("\nClosing memory dump file.\n");
+		fflush(stdout);
+		fclose(fp);
+		*g_traceBufferStatusOffset = TRACE_COMPLETE;	// File is closed; tell driver we are done.
+		printf("Done.\n");
+		fflush(stdout);
+ close2:
+		sleep(2);
+		scif_munmap(g_mictc_buffer_offset_xml, MICTC_XML_BUFFER_SIZE);
+		scif_unregister(mictc_newepd, (off_t) g_mictc_buffer_base, MICTC_MEM_BUFFER_SIZE);
+		scif_close(mictc_newepd);
+	} // while (1)
+ close:
+	scif_munmap(g_mictc_buffer_offset_xml, MICTC_XML_BUFFER_SIZE);
+	scif_close(mictc_newepd);
+	scif_close(mictc_epd_data);
+	free(g_mictc_buffer_base);
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	int i;
+
+	for (i = 0; i < TRACE_TRIGGER_MAX; i++) {
+		g_traceTriggers[i] = TRACE_EOL;
+	}
+
+	if (argc >= 2) {
+		for (i = 1; i < argc; i++) {
+			if (i > TRACE_TRIGGER_MAX) break;
+
+			g_traceTriggers[i - 1] = atoi(argv[i]);
+			printf("Trigger %d\n", g_traceTriggers[i - 1]);
+		}
+	} else {
+		printf("No triggers -- accept everything\n");
+	}
+
+	if (!open_scif_channels())
+		exit(1);
+
+	exit(0);
+}
diff --git a/trace_capture/tc_memcvt.c b/trace_capture/tc_memcvt.c
new file mode 100644
index 0000000..38efce1
--- /dev/null
+++ b/trace_capture/tc_memcvt.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include "../include/scif.h"
+
+// Use 2MB for KNF and 4MB for KNC.
+#define MICTC_XML_BUFFER_SIZE (2 * 1024 * 1024)
+
+// Memory transfer window.  1GB
+#define MICTC_MEM_BUFFER_SIZE (1 * 1024UL * 1024UL * 1024UL)
+
+FILE *ip;
+FILE *op;
+
+
+int main(void)
+{
+  long srcPhysAddr = 0;
+  uint32_t page_buf[4096/4];
+  long i = 0;
+  int size;
+  char dest[64];
+
+  if ((ip = fopen("mem.dat", "r")) == NULL) {
+    fprintf(stderr, "Cannot open file mem.dat.\n");
+  }
+
+  if ((op = fopen("memfmt.txt", "w")) == NULL) {
+    fprintf(stderr, "Cannot open file memfmt.txt.\n");
+  }
+
+  while (! feof(ip)) {
+    fread(page_buf, sizeof(page_buf), 1, ip); // check for error
+      
+    size = sprintf(dest, "origin %lx\n", srcPhysAddr);
+    fwrite(dest, size, 1, op);
+
+    for (i = 0; i < 4096/4; i++) {
+      size = sprintf(dest, "%x\n", page_buf[i]);
+      fwrite(dest, size, 1, op);
+    }
+
+    srcPhysAddr += 4096;
+  }
+  fclose(ip);
+  fclose(op);
+}
diff --git a/trace_capture/trace_capture.c b/trace_capture/trace_capture.c
new file mode 100644
index 0000000..5cf1bfe
--- /dev/null
+++ b/trace_capture/trace_capture.c
@@ -0,0 +1,2031 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * Trace Capture Driver
+ *
+ * Contains code to handle trace_capture syscall, stop all cpus
+ * and dump their state, then dump all physical memeory.
+ */
+
+#include "trace_capture.h"
+
+//#define DEBUG
+
+int always_false = 0;
+
+#define BARRIER(epd, string) { \
+	printk("%s\n", string); \
+	if ((err = scif_send(epd, &control_msg, sizeof(control_msg), 1)) <= 0) { \
+	  pr_crit("%s:%s:%d scif_send failed with err %ld\n", __FILE__, __FUNCTION__, __LINE__, err); \
+	  goto close;							\
+	} \
+	if ((err = scif_recv(epd, &control_msg, sizeof(control_msg), 1)) <= 0) { \
+	  pr_crit("%s:%s:%d scif_recv failed with err %ld\n", __FILE__, __FUNCTION__, __LINE__, err); \
+	  goto close;							\
+	} \
+}
+
+/* SPU privileged gates (per specification) */
+#define SPU_SPBA_OFFSET        0x1000 /* offset of Privileged gates in SPU MMIO */
+#define SPU_XQ_SIZE            0x040
+#define SPU_XQ_BASE            0x080
+#define SPU_XQ_INDEX           0x0C0
+#define SPU_CR                 0x100
+#define SPU_CONTROL            0x100
+#define SPU_SAMPLER_BASE       0x140
+#define SPU_ABORT              0x180
+#define SPU_ABORT_STATUS       0x1C0
+#define SPU_FLUSH              0x200
+#define SPU_FLUSH_STATUS       0x240
+#define SPU_INVALPG_4K         0x280
+#define SPU_INVALPG_64K        0x2C0
+#define SPU_INVALPG_2M         0x300
+#define SPU_EMPTY              0x340
+#define SPU_ACTIVE             0x340
+#define SPU_FULL               0x380
+#define SPU_SOFT_RESET         0x3C0
+#define SPU_PMU_EVENT_SEL      0x400
+#define SPU_CONTROL2           0x440
+#define SPU_CONTROL3           0x480
+
+#define SPU_MEM_BW_LIMIT       0x4C0 // This is 64 bit register
+
+#define SPU_TCU_CREDITS        0x700
+#define SPU_FER                0x800
+#define SPU_ALT_FER            0x840
+#define SPU_MATCH_ACTION       0x880
+#define SPU_INVAL              0xB00
+#define SPU_COUNTER0_SET       0x500
+#define SPU_COUNTER1_SET       0x540
+#define SPU_COUNTER2_SET       0x580
+#define SPU_COUNTER3_SET       0x5C0
+#define SPU_COUNTER4_SET       0x600
+#define SPU_COUNTER5_SET       0x640
+#define SPU_COUNTER6_SET       0x680
+#define SPU_COUNTER7_SET       0x6C0
+
+#define CBOX_SPU_PA_MSR              0x0000017E
+#define CBOX_SPU_SAMPLER_BIND_MSR    0x0000017F
+
+#define	MSR_SF_MASK	0xc0000084	/* syscall flags mask */
+#define	MSR_FSBASE	0xc0000100	/* base address of the %fs "segment" */
+#define	MSR_GSBASE	0xc0000101	/* base address of the %gs "segment" */
+#define	MSR_KGSBASE	0xc0000102	/* base address of the kernel %gs */
+
+// MSR's defined in the trace file sent during REQs
+// Are these all valid for L1OM??
+#define P6_CR_TSC                    0x10
+#define X86_CR_APICBASE              0x1b
+#define MIC_CR_SPUBASE               0x1c
+#define IA32_CR_MISC                 0x1a0
+#define WMT_CR_LASTBRANCH_0          0x1db
+#define WMT_CR_LASTBRANCH_1          0x1dc
+#define X86_CR_MTRRphysMask0         0x201
+#define X86_CR_MTRRphysMask1         0x203
+#define X86_CR_MTRRphysMask2         0x205
+#define X86_CR_MTRRphysMask3         0x207
+#define X86_CR_MTRRphysMask4         0x209
+#define X86_CR_MTRRphysMask5         0x20b
+#define X86_CR_MTRRphysMask6         0x20d
+#define X86_CR_MTRRphysMask7         0x20f
+#define IA32_CR_PAT                  0x277
+#define IA32_MTRR_DEF_TYPE           0x2ff
+#define VMX_MSR_BASE                 0x480
+#define VMX_MSR_BASE_PLUS_1          0x481
+#define VMX_MSR_BASE_PLUS_2          0x482
+#define VMX_MSR_BASE_PLUS_3          0x483
+#define VMX_MSR_BASE_PLUS_4          0x484
+#define VMX_MSR_BASE_PLUS_5          0x485
+#define VMX_MSR_BASE_PLUS_6          0x486
+#define VMX_MSR_BASE_PLUS_7          0x487
+#define VMX_MSR_BASE_PLUS_8          0x488
+#define VMX_MSR_BASE_PLUS_9          0x489
+#define TIME                         0x4711
+#define PINFO                        0x4712
+#define X86_CR_MTRRdefType           0x2ff
+#define X86_CR_MTRRcap               0xfe
+#define X86_CR_MTRRphysBase0         0x200
+#define X86_CR_MTRRphysBase1         0x202
+#define X86_CR_MTRRphysBase2         0x204
+#define X86_CR_MTRRphysBase3         0x206
+#define X86_CR_MTRRphysBase4         0x208
+#define X86_CR_MTRRphysBase5         0x20a
+#define X86_CR_MTRRphysBase6         0x20c
+#define X86_CR_MTRRphysBase7         0x20e
+#define X86_CR_MTRRfix64K_00000      0x250
+#define X86_CR_MTRRfix16K_80000      0x258
+#define X86_CR_MTRRfix16K_A0000      0x259
+#define X86_CR_MTRRfix4K_C0000       0x268
+#define X86_CR_MTRRfix4K_C8000       0x269
+#define X86_CR_MTRRfix4K_D0000       0x26a
+#define X86_CR_MTRRfix4K_D8000       0x26b
+#define X86_CR_MTRRfix4K_E0000       0x26c
+#define X86_CR_MTRRfix4K_E8000       0x26d
+#define X86_CR_MTRRfix4K_F0000       0x26e
+#define X86_CR_MTRRfix4K_F8000       0x26f
+#define P5_MC_ADDR                   0x0
+#define P5_MC_TYPE                   0x1
+#define MSR_TR1                      0x2
+#define MSR_TR2                      0x4
+#define MSR_TR3                      0x5
+#define MSR_TR4                      0x6
+#define MSR_TR5                      0x7
+#define MSR_TR6                      0x8
+#define MSR_TR7                      0x9
+#define MSR_TR9                      0xb
+#define MSR_TR10                     0xc
+#define MSR_TR11                     0xd
+#define MSR_TR12                     0xe
+#define IA32_APIC_BASE               0x1b
+#define IA32_TIME_STAMP_COUNTER      0x10
+#define IA32_PerfCntr0               0x20
+#define IA32_PerfCntr1               0x21
+#define IA32_PerfCntr2               0x22
+#define IA32_PerfCntr3               0x23
+#define PerfFilteredCntr0            0x24
+#define PerfFilteredCntr1            0x25
+#define PerfFilteredCntr2            0x26
+#define PerfFilteredCntr3            0x27
+#define IA32_PerfEvtSel0             0x28
+#define IA32_PerfEvtSel1             0x29
+#define IA32_PerfEvtSel2             0x2a
+#define IA32_PerfEvtSel3             0x2b
+#define PerfFilterMask               0x2c
+#define IA32_PERF_GLOBAL_STATUS      0x2d
+#define IA32_PERF_GLOBAL_OVF_CONTROL 0x2e
+#define IA32_PERF_GLOBAL_CTRL        0x2f
+#define IA32_MCG_CTL                 0x17b
+#define IA32_MC0_CTRL                0x400
+#define IA32_MC0_STAT                0x401
+#define IA32_MC0_ADDR                0x402
+#define IA32_MC0_MISC                0x403
+#define IA32_MC1_CTRL                0x404
+#define IA32_MC1_STAT                0x405
+#define IA32_MC1_ADDR                0x406
+#define IA32_MC1_MISC                0x407
+#define STAR                         0xc0000081
+#define LSTAR                        0xc0000082
+#define SYSCALL_FLAG_MASK            0xc0000084
+#define X86_PAT                      0x277
+#define SPU_BASE                     0x1C
+
+// Kernel virtual address to physical page at 0xfee03000
+// This is created by an ioremap outside of interrupt context.
+static uint8_t *spu_addr;
+
+struct mictc_seg {
+  struct desc_struct desc;
+  char zero[8];
+  u16 selector;
+  uint64_t base;
+};
+
+struct mictc_tss {
+  tss_desc desc;
+  u16 selector;
+  uint64_t base;
+};
+
+struct mictc_segment_reg
+{
+    struct mictc_seg cs;
+    struct mictc_seg ds;
+    struct mictc_seg es;
+    struct mictc_seg ss;
+    struct mictc_seg fs;
+    struct mictc_seg gs;
+    struct mictc_tss ldtr;
+    struct mictc_tss tr;
+};
+
+#define MAX_SEG_REG 8
+
+static char *SegRegNames[MAX_SEG_REG] = {"CS","DS","ES","SS", "FS","GS","LDTR","TR"};
+
+//static struct i387_fxsave_struct fpu;
+
+struct mictc_trace
+{
+  struct mictc_segment_reg segment;
+  struct vpustate_struct vpustate;
+  struct i387_fxsave_struct fpu;
+};
+
+struct mictc_trace *trace;
+
+// fxsave definition copied from fpu.c
+//#define mictc_fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
+#define mictc_fxsave(addr) __asm __volatile("fxsave (%0)" : "=a" (addr) : [fx] "a" (addr))
+
+
+// Spinlock to serialize access in IPI handler
+static DEFINE_SPINLOCK(mictc_lock);
+
+// Used to count the cpus waiting
+static atomic_t cpus_stopped = ATOMIC_INIT(0);
+
+// Used to count the cpus released
+static atomic_t cpus_released = ATOMIC_INIT(0);
+
+// End points for SCIF
+//static scif_epd_t mictc_endp_cmd;
+static scif_epd_t mictc_endp_data;
+
+// SCIF ports - temp hack; move to scif.h
+#define MICTC_SCIF_PORT_DATA 300
+
+// Used to prevent concurent access into the same device .
+static int Device_Open = 0;
+
+#define PS_BUF_SIZE 150
+//static char print_string_buf[PS_BUF_SIZE] = "";
+
+#define print_str(fmt, ...) \
+{ \
+  snprintf(print_string_buf, PS_BUF_SIZE, fmt, ##__VA_ARGS__); \
+  print_string(print_string_buf); \
+}
+
+//#define printk(fmt, ...) print_str(fmt, ##__VA_ARGS__)
+//#undef pr_crit
+//#define pr_crit(fmt, ...) print_str(fmt, ##__VA_ARGS__)
+
+// Interrupts off / on
+#define cli __asm (" cli\n")
+#define sti __asm (" sti\n")
+
+// Debug code to display low 16 bits of eflags register.
+#define print_eflags \
+	{unsigned long kernel_eflags; \
+	  raw_local_save_flags(kernel_eflags); \
+	  printk("%s:%d eflags %lx\n", __FUNCTION__, __LINE__, kernel_eflags); \
+	}
+
+
+// Find another definition of this in some .h file
+static __inline void
+mictc_cpuid(u_int ax, u_int *p)
+{
+	__asm __volatile("cpuid"
+			 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+			 :  "0" (ax));
+}
+
+static inline
+uint32_t get_dr(int regno)
+{
+	unsigned long val = 0;	/* Damn you, gcc! */
+
+	switch (regno) {
+	case 0:
+		asm("mov %%db0, %0" :"=r" (val));
+		break;
+	case 1:
+		asm("mov %%db1, %0" :"=r" (val));
+		break;
+	case 2:
+		asm("mov %%db2, %0" :"=r" (val));
+		break;
+	case 3:
+		asm("mov %%db3, %0" :"=r" (val));
+		break;
+	case 4:
+		asm("mov %%db4, %0" :"=r" (val));
+		break;
+	case 5:
+		asm("mov %%db5, %0" :"=r" (val));
+		break;
+	case 6:
+		asm("mov %%db6, %0" :"=r" (val));
+		break;
+	case 7:
+		asm("mov %%db7, %0" :"=r" (val));
+		break;
+	default:
+		BUG();
+	}
+	return val;
+}
+
+
+static inline void mictc_store_ldt(u16 *dtr)
+{
+	asm volatile("sldt %0":"=m" (*dtr));
+}
+
+
+static inline void mictc_store_tr(u16 *dtr)
+{
+	asm volatile("str %0":"=m" (*dtr));
+}
+
+
+static inline void read_gdt_entry(struct desc_struct *gdt, int entry,
+				  void *desc, int type)
+{
+	unsigned int size;
+	switch (type) {
+	case DESC_TSS:
+		size = sizeof(tss_desc);
+		break;
+	case DESC_LDT:
+		size = sizeof(ldt_desc);
+		break;
+	default:
+		size = sizeof(struct desc_struct);
+		break;
+	}
+	memcpy(desc, &gdt[entry], size);
+#if 0 // Helpful for debug
+	{ u64 *p = (u64 *)&gdt[entry];
+	  printk("GDT[entry] = %p %llx %llx\n", &gdt[entry], p[0], p[1]);
+	}
+#endif
+}
+
+
+static inline void __get_tss_desc(unsigned cpu, unsigned int entry, void *dest)
+{
+	struct desc_struct *d = get_cpu_gdt_table(cpu);
+	read_gdt_entry(d, entry, dest, DESC_TSS);
+}
+
+#define get_tss_desc(cpu, addr) __get_tss_desc(cpu, GDT_ENTRY_TSS, addr)
+
+
+static inline void __get_seg_desc(unsigned cpu, unsigned int entry, void *dest)
+{
+  struct desc_struct *d = get_cpu_gdt_table(cpu);
+
+  read_gdt_entry(d, entry, dest, 0);
+}
+
+#define get_seg_desc(cpu, seg, addr) __get_seg_desc(cpu, ((seg & 0xffff) >> 3), addr)
+
+// Redefine rdmsr to work like BSD.
+
+//#undef rdmsr
+//#define rdmsr(msr) tc_msr((msr))
+
+static inline
+uint64_t tc_rdmsr(uint32_t msrid)
+{
+  uint32_t lower, upper;
+  rdmsr(msrid, lower, upper);
+  return (uint64_t)upper << 32 | lower;
+}
+
+// Number of Retries before it is assumed that the Host will not respond
+#define TRACE_CAPTURE_TIMEOUT  50000000
+
+static void *g_traceBufferAllocated;
+
+// Global variable used by initiator to wait for everyone to complete trace captures
+//static volatile u32 g_smpTraceCaptureWait;
+
+// Global variable to keep track of how much data we are writing to the shared buffer
+// with the Host.
+static volatile u64 g_sizeXferred = 0;
+
+static s64 g_triggerFound = -1;
+
+static volatile u64 *g_traceBufferStatusOffset = NULL;
+static volatile u64 *g_traceBufferSizeOffset = NULL;
+static volatile u32 *g_traceBufferDataOffset = 0;
+static volatile u32 *g_traceBufferTriggerOffset = NULL;
+
+// This is an array of trigger numbers.  The value TRACE_EOL is ignored.
+static u32 g_traceTriggers[TRACE_TRIGGER_MAX];
+static u32 g_traceCurrentTrigger;
+
+static long scif_offset_xml;
+//static long scif_offset_xml_dst;
+static long scif_offset_mem;
+static long scif_offset_dst;
+
+#if MIC_TRACE_CAPTURE_MEMORY_TEST
+static volatile u64 *g_traceBufferChecksumOffset = NULL;
+
+// The maximum size allowed for a DMA transfer is 1MB - 4K. The size of this array
+// is 1MB to allow this to be used as the dst memory while dumping entire GDDR
+// For Debug purposes only.
+static u32 g_dstMemoryDump[4096/sizeof(u32)] __attribute__ ((aligned(4096)));
+#endif
+
+#define TRACE_SPRINTF(...) \
+    (g_sizeXferred += sprintf(((char*)g_traceBufferDataOffset + g_sizeXferred), __VA_ARGS__))
+
+#define ADD_SPU_REG_TO_HEADER(x) \
+    TRACE_SPRINTF("\t\t\t\t<reg offset=\"0x%x\">\n\t\t\t\t\t<name>%s</name>\n\t\t\t\t</reg>\n", (x), #x)
+
+#define ADD_MSR_TO_HEADER(x) \
+    TRACE_SPRINTF("\t\t\t\t<reg addr=\"0x%x\"/>\n", (x))
+
+#define TRACE_SPRINTF_MSR(x) \
+    TRACE_SPRINTF("\t\t\t\t<reg addr=\"0x%x\">0x%llx</reg>\n", (x), tc_rdmsr((x)))
+
+#define TRACE_SPRINTF_SPU(x) \
+    TRACE_SPRINTF("\t\t\t\t<reg offset=\"0x%x\">0x%llx</reg>\n", (x), *(volatile u64*)((u8*)spu_addr + (x)))
+
+#define TRACE_SPRINTF_VECTOR(x, vpu) \
+    PrintVector((u8*)&(vpu), (x))
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_trace_capture_prep_SPU_header
+//
+//  DESCRIPTION:
+//  Perform all the tasks related to preparing the SPU Trace Header
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_trace_capture_prep_SPU_header(void)
+{
+    TRACE_SPRINTF("\t\t\t<spu>\n");
+    ADD_SPU_REG_TO_HEADER(SPU_XQ_SIZE);
+    ADD_SPU_REG_TO_HEADER(SPU_XQ_BASE);
+    ADD_SPU_REG_TO_HEADER(SPU_XQ_INDEX);
+    ADD_SPU_REG_TO_HEADER(SPU_CONTROL);
+    ADD_SPU_REG_TO_HEADER(SPU_SAMPLER_BASE);
+    ADD_SPU_REG_TO_HEADER(SPU_PMU_EVENT_SEL);
+    ADD_SPU_REG_TO_HEADER(SPU_CONTROL2);
+    ADD_SPU_REG_TO_HEADER(SPU_CONTROL3);
+    TRACE_SPRINTF("\t\t\t</spu>\n");
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_trace_capture_prep_cpuid_header
+//
+//  DESCRIPTION:
+//  Perform all the tasks related to preparing the CPUID Trace Header
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_trace_capture_prep_cpuid_header(void)
+{
+    u_int regs[4];
+    int i =0;
+    TRACE_SPRINTF("\t\t\t<cpuid>\n");
+    for (i = 0; i < 0x4; i++)
+    {
+        mictc_cpuid(i, regs);
+        TRACE_SPRINTF("\t\t\t\t<reg eax=\"0x%x\">0x%x-0x%x-0x%x-0x%x</reg>\n",
+                      i, regs[0], regs[1], regs[2], regs[3]);
+    }
+    TRACE_SPRINTF("\t\t\t</cpuid>\n");
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_trace_capture_prep_msr_header
+//
+//  DESCRIPTION:
+//  Perform all the tasks related to preparing the MSR Trace Header
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_trace_capture_prep_msr_header(void)
+{
+    TRACE_SPRINTF("\t\t\t<msr>\n");
+    ADD_MSR_TO_HEADER(P6_CR_TSC);
+    ADD_MSR_TO_HEADER(X86_CR_APICBASE);
+    ADD_MSR_TO_HEADER(CBOX_SPU_PA_MSR);
+    ADD_MSR_TO_HEADER(SPU_BASE);
+    ADD_MSR_TO_HEADER(CBOX_SPU_SAMPLER_BIND_MSR);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysMask0);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysMask1);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysMask2);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysMask3);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysMask4);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysMask5);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysMask6);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysMask7);
+    ADD_MSR_TO_HEADER(MSR_EFER);
+    ADD_MSR_TO_HEADER(MSR_SF_MASK);
+    ADD_MSR_TO_HEADER(MSR_FSBASE);
+    ADD_MSR_TO_HEADER(MSR_GSBASE);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRdefType);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRcap);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysBase2);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysBase0);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysBase1);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysBase3);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysBase4);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysBase5);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysBase6);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRphysBase7);
+    ADD_MSR_TO_HEADER(STAR);
+    ADD_MSR_TO_HEADER(LSTAR);
+    ADD_MSR_TO_HEADER(MSR_KGSBASE);
+    
+    // The following MSR's are currently ifdef'd out
+    // because LarrySim barfs on these.
+    // We might need these later.
+#if 0
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix64K_00000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix16K_80000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix16K_A0000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix4K_C0000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix4K_C8000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix4K_D0000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix4K_D8000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix4K_E0000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix4K_E8000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix4K_F0000);
+    ADD_MSR_TO_HEADER(X86_CR_MTRRfix4K_F8000);
+    ADD_MSR_TO_HEADER(P5_MC_ADDR);
+    ADD_MSR_TO_HEADER(P5_MC_TYPE);
+    ADD_MSR_TO_HEADER(MSR_TR1);
+    ADD_MSR_TO_HEADER(MSR_TR2);
+    ADD_MSR_TO_HEADER(MSR_TR3);
+    ADD_MSR_TO_HEADER(MSR_TR4);
+    ADD_MSR_TO_HEADER(MSR_TR5);
+    ADD_MSR_TO_HEADER(MSR_TR6);
+    ADD_MSR_TO_HEADER(MSR_TR7);
+    ADD_MSR_TO_HEADER(MSR_TR9);
+    ADD_MSR_TO_HEADER(MSR_TR10);
+    ADD_MSR_TO_HEADER(MSR_TR11);
+    ADD_MSR_TO_HEADER(MSR_TR12);
+    ADD_MSR_TO_HEADER(IA32_APIC_BASE); 
+    ADD_MSR_TO_HEADER(IA32_TIME_STAMP_COUNTER); 
+    ADD_MSR_TO_HEADER(IA32_PerfCntr0); 
+    ADD_MSR_TO_HEADER(IA32_PerfCntr1); 
+    ADD_MSR_TO_HEADER(IA32_PerfCntr2); 
+    ADD_MSR_TO_HEADER(IA32_PerfCntr3); 
+    ADD_MSR_TO_HEADER(PerfFilteredCntr0); 
+    ADD_MSR_TO_HEADER(PerfFilteredCntr1); 
+    ADD_MSR_TO_HEADER(PerfFilteredCntr2); 
+    ADD_MSR_TO_HEADER(PerfFilteredCntr3); 
+    ADD_MSR_TO_HEADER(IA32_PerfEvtSel0); 
+    ADD_MSR_TO_HEADER(IA32_PerfEvtSel1); 
+    ADD_MSR_TO_HEADER(IA32_PerfEvtSel2); 
+    ADD_MSR_TO_HEADER(IA32_PerfEvtSel3); 
+    ADD_MSR_TO_HEADER(PerfFilterMask); 
+    ADD_MSR_TO_HEADER(IA32_PERF_GLOBAL_STATUS); 
+    ADD_MSR_TO_HEADER(IA32_PERF_GLOBAL_OVF_CONTROL); 
+    ADD_MSR_TO_HEADER(IA32_PERF_GLOBAL_CTRL);     
+    ADD_MSR_TO_HEADER(IA32_MCG_CTL); 
+    ADD_MSR_TO_HEADER(IA32_MC0_CTRL); 
+    ADD_MSR_TO_HEADER(IA32_MC0_STAT); 
+    ADD_MSR_TO_HEADER(IA32_MC0_ADDR); 
+    ADD_MSR_TO_HEADER(IA32_MC0_MISC); 
+    ADD_MSR_TO_HEADER(IA32_MC1_CTRL); 
+    ADD_MSR_TO_HEADER(IA32_MC1_STAT); 
+    ADD_MSR_TO_HEADER(IA32_MC1_ADDR); 
+    ADD_MSR_TO_HEADER(IA32_MC1_MISC); 
+    ADD_MSR_TO_HEADER(SYSCALL_FLAG_MASK);
+    ADD_MSR_TO_HEADER(X86_PAT);
+#endif
+    TRACE_SPRINTF("\t\t\t</msr>\n");
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_prep_header
+//
+//  DESCRIPTION:
+//  Perform all the tasks related to preparing the Trace Header
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_prep_header(void)
+{
+    int i;
+
+    TRACE_SPRINTF("<?xml version=\"1.0\" standalone=\"yes\"?>\n");
+    TRACE_SPRINTF("<arch_data>\n");
+    TRACE_SPRINTF("<!-- The format of this file is defined in https://cpu-sim.intel.com/twiki/bin/view/CpuSim/TraceFileFormats. -->\n");
+    TRACE_SPRINTF("\t<header>\n");
+    TRACE_SPRINTF("\t\t<format_version>1.0</format_version>\n");
+    TRACE_SPRINTF("\t\t<creation_date>Nov 19 2009</creation_date>\n");
+    TRACE_SPRINTF("\t\t<arch_xml_ver>1.1</arch_xml_ver>\n");
+    TRACE_SPRINTF("\t\t<arch_xml_date>Oct 21 2009</arch_xml_date>\n");
+    TRACE_SPRINTF("\t\t<created_by>archlib</created_by>\n");
+    TRACE_SPRINTF("\t\t<comment>Warnings!  This is based on the state available in archlib.</comment>\n");
+    TRACE_SPRINTF("\t\t<comment>  This state dump is primarily good for capturing frequently used architectural register state.</comment>\n");
+    TRACE_SPRINTF("\t\t<comment>  Support for CPUId, MSRs, APIC, and x87 state is currently incomplete.</comment>\n");
+    TRACE_SPRINTF("\t\t<comment>  There is no support for state not specifically modeled in archlib.</comment>\n");
+    TRACE_SPRINTF("\t\t<comment>  Have also noticed inconsistencies in the final value of the RFLAGS reg.</comment>\n");
+    if (g_triggerFound != -1)
+    {
+        TRACE_SPRINTF("\t\t<comment>  This capture is generated for HOST BASED TRIGGER # %lld.</comment>\n", g_triggerFound);
+        g_triggerFound = -1;
+    }
+    TRACE_SPRINTF("\t</header>\n");
+    TRACE_SPRINTF("\t<cpu_definition>\n");
+    TRACE_SPRINTF("\t\t<num_cpus>%d</num_cpus>\n", num_online_cpus());
+    TRACE_SPRINTF("<!-- the number of \"cpu\" definitions must correspond to the \"num_cpus\" data item -->\n");
+
+    for (i = 0; i < num_online_cpus(); i++)
+    {
+        TRACE_SPRINTF("\t\t<cpu num=\"%d\">\n", i);
+// SPU is not supported in Linux
+	if (always_false) mictc_trace_capture_prep_SPU_header();
+	mictc_trace_capture_prep_cpuid_header();
+	mictc_trace_capture_prep_msr_header();
+        TRACE_SPRINTF("\t\t</cpu>\n");
+    }
+
+    TRACE_SPRINTF("\t</cpu_definition>\n");
+    TRACE_SPRINTF("\t<platform_definition>\n");
+    TRACE_SPRINTF("\t\t<physical_memory/>\n");
+    TRACE_SPRINTF("\t</platform_definition>\n");
+    TRACE_SPRINTF("\t<cpu_state>\n");
+    TRACE_SPRINTF("<!-- the number of \"cpu\" definitions must correspond to the \"num_cpus\" data item -->\n");
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_capture_general_purpose_reg
+//
+//  DESCRIPTION:
+//  Capture all general purpose registers.
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_capture_general_purpose_reg(struct pt_regs *regs)
+{
+  //  printk("starting reg dump regs=%llx\n", (uint64_t)regs);
+
+  if (!regs) {
+    printk("Null pointer found.  cpu %d %s\n", smp_processor_id(), current->comm);
+    return;
+  }
+
+    TRACE_SPRINTF("\t\t\t<general>\n");
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"RAX\">0x%lx</reg>\n", regs->ax);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"RBX\">0x%lx</reg>\n", regs->bx);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"RCX\">0x%lx</reg>\n", regs->cx);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"RDX\">0x%lx</reg>\n", regs->dx);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"RBP\">0x%lx</reg>\n", regs->bp);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"RSP\">0x%lx</reg>\n", regs->sp);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"RSI\">0x%lx</reg>\n", regs->si);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"RDI\">0x%lx</reg>\n", regs->di);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"R8\">0x%lx</reg>\n",  regs->r8);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"R9\">0x%lx</reg>\n",  regs->r9);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"R10\">0x%lx</reg>\n", regs->r10);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"R11\">0x%lx</reg>\n", regs->r11);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"R12\">0x%lx</reg>\n", regs->r12);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"R13\">0x%lx</reg>\n", regs->r13);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"R14\">0x%lx</reg>\n", regs->r14);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"R15\">0x%lx</reg>\n", regs->r15);
+//  In cases where a CPU is halted and is woken up from halt by the trace capture IPI 
+//  we want to report the RIP as the one pointing to the halt instruction itself 
+//  and not the one on the trap frame. This is to avoid the condition where the simulator-run
+//  for these halted CPUs ends up running extra cycles (before going back idle) 
+//  which would not happen under actual conditions. Problem reported by Jason S.   
+////    if(regs->tf_rip == (register_t)ExitIdle)
+////        TRACE_SPRINTF("\t\t\t\t<reg name=\"RIP\">0x%lx</reg>\n", regs->ip-1);
+////    else
+        TRACE_SPRINTF("\t\t\t\t<reg name=\"RIP\">0x%lx</reg>\n", regs->ip);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"RFLAGS\">0x%lx</reg>\n", regs->flags);
+    TRACE_SPRINTF("\t\t\t</general>\n");
+
+    //  printk("ending reg dump\n");
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_capture_segment_reg
+//
+//  DESCRIPTION:
+//  Capture all segment registers.
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_capture_segment_reg(struct mictc_segment_reg *segment, struct pt_regs *regs)
+{
+  int i, v;
+  struct desc_ptr gdtr;
+  struct desc_ptr idtr;
+  struct mictc_seg *segreg;
+
+//  printk("Segment registers on cpu %d\n", smp_processor_id());
+
+  // This is only useful during initial development.
+  if (!regs) {
+    printk("Null pointer found.  cpu %d %s\n", smp_processor_id(), current->comm);
+    return;
+  }
+
+    segment->cs.selector   = (u16)regs->cs;
+    segment->ss.selector   = (u16)regs->ss;
+#if 0
+    if (ISPL(regs->tf_cs) == SEL_KPL && curthread->td_pcb->pcb_ds == 0x0) {
+        // Specifically required for kernel IDLE thread
+        segment->ds   = 0x10;
+        segment->es   = 0x10;
+        segment->fs   = 0x10;
+        segment->gs   = 0x10;
+    } else {
+#endif
+    asm("movl %%ds,%0" : "=r" (v)); segment->ds.selector = v;
+    asm("movl %%es,%0" : "=r" (v)); segment->es.selector = v;
+    segment->fs.selector = current->thread.fs;
+    segment->gs.selector = current->thread.gs;
+//    }
+    mictc_store_tr(&(segment->tr.selector));
+    get_tss_desc(smp_processor_id(), &(segment->tr.desc));
+    store_gdt(&gdtr);
+    store_idt(&idtr);
+    mictc_store_ldt(&(segment->ldtr.selector));
+    // LDT is not used, so zeros will be printed.
+
+    TRACE_SPRINTF("\t\t\t<segment>\n");
+    segreg = (struct mictc_seg *)&(segment->cs);
+
+    for(i=0; i < MAX_SEG_REG; i++) {
+        if (strcmp(SegRegNames[i], "GS") == 0) {
+            segreg->base = tc_rdmsr(MSR_KGSBASE);
+        }
+        if (strcmp(SegRegNames[i], "FS") == 0) {
+            segreg->base = tc_rdmsr(MSR_FSBASE);
+        }
+
+	// Fill in the segment descriptor for cs to gs
+	if (i <= 5) {
+	  get_seg_desc(smp_processor_id(), segreg->selector, &(segreg->desc));
+	}
+
+        TRACE_SPRINTF("\t\t\t\t<reg name=\"%s\">\n",SegRegNames[i]);
+	if (i > 5) {		// LDT and TSS
+	  struct mictc_tss *segreg1 =(struct mictc_tss *)segreg;
+
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"base\">0x%llx</attr>\n", ((uint64_t)segreg1->desc.base3 << 32) | (uint64_t)((segreg1->desc.base2 << 24) | (segreg1->desc.base1 << 16) | segreg1->desc.base0));
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"limit\">0x%x</attr>\n", (segreg1->desc.limit1 << 16) | segreg1->desc.limit0);
+	  TRACE_SPRINTF("\t\t\t\t\t<selector>0x%x</selector>\n", segreg1->selector);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"G\">0x%x</attr>\n", segreg1->desc.g);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"DB\">0x%x</attr>\n", 0); // double word of base and limit
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"L\">0x%x</attr>\n", 0);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"AVL\">0x0</attr>\n");//AVL bit not populated in the gdt[] array
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"P\">0x%x</attr>\n", segreg1->desc.p);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"DPL\">0x%x</attr>\n", segreg1->desc.dpl);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"S\">0x%x</attr>\n", segreg1->desc.type & 0x10 ? 1 : 0); //The S bit (descriptor type) is clubbed along with the ssd_type element.
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"TYPE\">0x%x</attr>\n", (segreg1->desc.type & 0xf));
+	} else {
+	  if (segreg->base) {
+	    TRACE_SPRINTF("\t\t\t\t\t<attr name=\"base\">0x%llx</attr>\n", segreg->base);
+	  } else {
+	    TRACE_SPRINTF("\t\t\t\t\t<attr name=\"base\">0x%x</attr>\n", (segreg->desc.base2 << 24) | (segreg->desc.base1 << 16) |segreg->desc.base0);
+	  }
+	  if (segreg->desc.l) segreg->desc.a = 0;
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"limit\">0x%x</attr>\n", (segreg->desc.limit << 16) | segreg->desc.limit0);
+	  TRACE_SPRINTF("\t\t\t\t\t<selector>0x%x</selector>\n", segreg->selector);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"G\">0x%x</attr>\n", segreg->desc.g);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"DB\">0x%x</attr>\n", segreg->desc.a & 1); // double word of base and limit
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"L\">0x%x</attr>\n", segreg->desc.l);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"AVL\">0x0</attr>\n");//AVL bit not populated in the gdt[] array
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"P\">0x%x</attr>\n", segreg->desc.p);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"DPL\">0x%x</attr>\n", segreg->desc.dpl);
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"S\">0x%x</attr>\n", segreg->desc.type & 0x10 ? 1 : 0); //The S bit (descriptor type) is clubbed along with the ssd_type element.
+	  TRACE_SPRINTF("\t\t\t\t\t<attr name=\"TYPE\">0x%x</attr>\n", (segreg->desc.type & 0xf));
+	}
+	TRACE_SPRINTF("\t\t\t\t</reg>\n");
+	segreg++;
+    }
+
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"GDTR\">\n");
+    TRACE_SPRINTF("\t\t\t\t\t<attr name=\"base\">0x%lx</attr>\n", gdtr.address);
+    TRACE_SPRINTF("\t\t\t\t\t<attr name=\"limit\">0x%x</attr>\n", gdtr.size);
+    TRACE_SPRINTF("\t\t\t\t</reg>\n");
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"IDTR\">\n");
+    TRACE_SPRINTF("\t\t\t\t\t<attr name=\"base\">0x%lx</attr>\n", idtr.address);
+    TRACE_SPRINTF("\t\t\t\t\t<attr name=\"limit\">0x%x</attr>\n", idtr.size);
+    TRACE_SPRINTF("\t\t\t\t</reg>\n");
+
+    TRACE_SPRINTF("\t\t\t</segment>\n");
+
+//    printk("End of mictc_capture_segment_reg\n");
+
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_capture_debug_reg
+//
+//  DESCRIPTION:
+//  Capture all debug registers.
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_capture_debug_reg(void)
+{
+    TRACE_SPRINTF("\t\t\t<debug>\n");
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"DR0\">0x%x</reg>\n", get_dr(0));
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"DR1\">0x%x</reg>\n", get_dr(1));
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"DR2\">0x%x</reg>\n", get_dr(2));
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"DR3\">0x%x</reg>\n", get_dr(3));
+// These don't exist.
+//    TRACE_SPRINTF("\t\t\t\t<reg name=\"DR4\">0x%x</reg>\n", get_dr(4));
+//    TRACE_SPRINTF("\t\t\t\t<reg name=\"DR5\">0x%x</reg>\n", get_dr(5));
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"DR6\">0x%x</reg>\n", get_dr(6));
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"DR7\">0x%x</reg>\n", get_dr(7));
+    TRACE_SPRINTF("\t\t\t</debug>\n");
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_capture_control_reg
+//
+//  DESCRIPTION:
+//  Capture all control registers.
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_capture_control_reg(void)
+{
+    TRACE_SPRINTF("\t\t\t<control>\n");
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"CR0\">0x%lx</reg>\n", (read_cr0()) & 0xffffffff);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"CR2\">0x%lx</reg>\n", read_cr2());
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"CR3\">0x%lx</reg>\n", read_cr3());
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"CR4\">0x%lx</reg>\n", (read_cr4()) & 0xffffffff);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"CR8\">0x%lx</reg>\n", read_cr8());
+    TRACE_SPRINTF("\t\t\t</control>\n");
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_capture_SPU_reg
+//
+//  DESCRIPTION:
+//  Capture all SPU registers.
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_capture_SPU_reg(void)
+{
+#if 0
+  // FIXME - The SPU is not setup currently in Linux
+
+  TRACE_SPRINTF("\t\t\t<spu>\n");
+  TRACE_SPRINTF_SPU(SPU_XQ_SIZE);
+  TRACE_SPRINTF_SPU(SPU_XQ_BASE);
+  TRACE_SPRINTF_SPU(SPU_XQ_INDEX);
+  TRACE_SPRINTF_SPU(SPU_CONTROL);
+  TRACE_SPRINTF_SPU(SPU_SAMPLER_BASE);
+  TRACE_SPRINTF_SPU(SPU_PMU_EVENT_SEL);
+  TRACE_SPRINTF_SPU(SPU_CONTROL2);
+  TRACE_SPRINTF_SPU(SPU_CONTROL3);
+  TRACE_SPRINTF("\t\t\t</spu>\n");
+#endif
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: PrintVector
+//
+//  DESCRIPTION:
+//  Prints _m512 vectors
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+PrintVector(u8 *res_mem, int reg_num)
+{
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"V%d\">0x"
+		  "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x"
+		  "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x"
+		  "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x"
+		  "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x</reg>\n",
+                  reg_num,
+                  res_mem[63], res_mem[62], res_mem[61], res_mem[60], res_mem[59], res_mem[58], res_mem[57], res_mem[56],
+                  res_mem[55], res_mem[54], res_mem[53], res_mem[52], res_mem[51], res_mem[50], res_mem[49], res_mem[48],
+                  res_mem[47], res_mem[46], res_mem[45], res_mem[44], res_mem[43], res_mem[42], res_mem[41], res_mem[40],
+                  res_mem[39], res_mem[38], res_mem[37], res_mem[36], res_mem[35], res_mem[34], res_mem[33], res_mem[32],
+                  res_mem[31], res_mem[30], res_mem[29], res_mem[28], res_mem[27], res_mem[26], res_mem[25], res_mem[24],
+                  res_mem[23], res_mem[22], res_mem[21], res_mem[20], res_mem[19], res_mem[18], res_mem[17], res_mem[16],
+                  res_mem[15], res_mem[14], res_mem[13], res_mem[12], res_mem[11], res_mem[10], res_mem[9], res_mem[8],
+                  res_mem[7], res_mem[6], res_mem[5], res_mem[4], res_mem[3], res_mem[2], res_mem[1], res_mem[0]);
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: PrintFPRegister
+//
+//  DESCRIPTION:
+//  Prints 10 byte FP register contents
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+PrintFPRegister(u8 *res_mem, int reg_num)
+{
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"FR%d\">0x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x</reg>\n",
+                  reg_num,
+                  res_mem[9],
+                  res_mem[8],
+                  res_mem[7],
+                  res_mem[6],
+                  res_mem[5],
+                  res_mem[4],
+                  res_mem[3],
+                  res_mem[2],
+                  res_mem[1],
+                  res_mem[0]);
+}
+
+
+// VPU Instructions
+
+#ifdef CONFIG_ML1OM
+#define VSTORED_DISP32_EAX(v, disp32) " vstored %%v" #v "," #disp32 "(%%rax)\n"
+
+#define VKSTORE_DISP32_EAX(k, disp32)	\
+	"	vkmov  %%k" #k ",%%ebx\n" \
+	"	movw %%bx, " #disp32 "(%%rax)\n"
+
+#define STVXCSR_DISP32_EAX(disp32) "	stvxcsr " #disp32 "(%%rax)\n"
+
+#else
+// For K1OM
+#define VSTORED_DISP32_EAX(v, disp32) " vpackstorelps %%zmm" #v "," #disp32 "(%%rax)\n"
+
+#define VKSTORE_DISP32_EAX(k, disp32)	\
+	"	kmov  %%k" #k ",%%ebx\n" \
+	"	movw %%bx, " #disp32 "(%%rax)\n"
+
+#define STVXCSR_DISP32_EAX(disp32) "	stmxcsr " #disp32 "(%%rax)\n"
+#endif
+
+static inline void save_vpu(struct vpustate_struct *vpustate) 
+{ 
+	asm volatile(
+		VSTORED_DISP32_EAX(0, 0x00)
+		VSTORED_DISP32_EAX(1, 0x40)
+		VSTORED_DISP32_EAX(2, 0x80)
+		VSTORED_DISP32_EAX(3, 0xc0)
+		VSTORED_DISP32_EAX(4, 0x100)
+		VSTORED_DISP32_EAX(5, 0x140)
+		VSTORED_DISP32_EAX(6, 0x180)
+		VSTORED_DISP32_EAX(7, 0x1c0)
+		VSTORED_DISP32_EAX(8, 0x200)
+		VSTORED_DISP32_EAX(9, 0x240)
+		VSTORED_DISP32_EAX(10, 0x280)
+		VSTORED_DISP32_EAX(11, 0x2c0)
+		VSTORED_DISP32_EAX(12, 0x300)
+		VSTORED_DISP32_EAX(13, 0x340)
+		VSTORED_DISP32_EAX(14, 0x380)
+		VSTORED_DISP32_EAX(15, 0x3c0)
+		VSTORED_DISP32_EAX(16, 0x400)
+		VSTORED_DISP32_EAX(17, 0x440)
+		VSTORED_DISP32_EAX(18, 0x480)
+		VSTORED_DISP32_EAX(19, 0x4c0)
+		VSTORED_DISP32_EAX(20, 0x500)
+		VSTORED_DISP32_EAX(21, 0x540)
+		VSTORED_DISP32_EAX(22, 0x580)
+		VSTORED_DISP32_EAX(23, 0x5c0)
+		VSTORED_DISP32_EAX(24, 0x600)
+		VSTORED_DISP32_EAX(25, 0x640)
+		VSTORED_DISP32_EAX(26, 0x680)
+		VSTORED_DISP32_EAX(27, 0x6c0)
+		VSTORED_DISP32_EAX(28, 0x700)
+		VSTORED_DISP32_EAX(29, 0x740)
+		VSTORED_DISP32_EAX(30, 0x780)
+		VSTORED_DISP32_EAX(31, 0x7c0)
+		VKSTORE_DISP32_EAX(0, 0x800)
+		VKSTORE_DISP32_EAX(1, 0x802)
+		VKSTORE_DISP32_EAX(2, 0x804)
+		VKSTORE_DISP32_EAX(3, 0x806)
+		VKSTORE_DISP32_EAX(4, 0x808)
+		VKSTORE_DISP32_EAX(5, 0x80a)
+		VKSTORE_DISP32_EAX(6, 0x80c)
+		VKSTORE_DISP32_EAX(7, 0x80e)
+		STVXCSR_DISP32_EAX(0x810)
+		: "=m" (vpustate) : [fx] "a" (vpustate) : "ebx"
+	);
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_capture_vector_reg
+//
+//  DESCRIPTION:
+//  Capture all vector registers.
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_capture_vector_reg(struct vpustate_struct *vpustate)
+{
+  //  printk("vpustate = %p\n", vpustate);
+
+    save_vpu(vpustate);
+
+    TRACE_SPRINTF("\t\t\t<vpu>\n");
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"K0\">0x%x</reg>\n", vpustate->k[0]);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"K1\">0x%x</reg>\n", vpustate->k[1]);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"K2\">0x%x</reg>\n", vpustate->k[2]);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"K3\">0x%x</reg>\n", vpustate->k[3]);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"K4\">0x%x</reg>\n", vpustate->k[4]);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"K5\">0x%x</reg>\n", vpustate->k[5]);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"K6\">0x%x</reg>\n", vpustate->k[6]);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"K7\">0x%x</reg>\n", vpustate->k[7]);
+    TRACE_SPRINTF_VECTOR(0, vpustate->vector_space[0]);
+    TRACE_SPRINTF_VECTOR(1, vpustate->vector_space[16]);
+    TRACE_SPRINTF_VECTOR(2, vpustate->vector_space[32]);
+    TRACE_SPRINTF_VECTOR(3, vpustate->vector_space[48]);
+    TRACE_SPRINTF_VECTOR(4, vpustate->vector_space[64]);
+    TRACE_SPRINTF_VECTOR(5, vpustate->vector_space[80]);
+    TRACE_SPRINTF_VECTOR(6, vpustate->vector_space[96]);
+    TRACE_SPRINTF_VECTOR(7, vpustate->vector_space[112]);
+    TRACE_SPRINTF_VECTOR(8, vpustate->vector_space[128]);
+    TRACE_SPRINTF_VECTOR(9, vpustate->vector_space[144]);
+    TRACE_SPRINTF_VECTOR(10, vpustate->vector_space[160]);
+    TRACE_SPRINTF_VECTOR(11, vpustate->vector_space[176]);
+    TRACE_SPRINTF_VECTOR(12, vpustate->vector_space[192]);
+    TRACE_SPRINTF_VECTOR(13, vpustate->vector_space[208]);
+    TRACE_SPRINTF_VECTOR(14, vpustate->vector_space[224]);
+    TRACE_SPRINTF_VECTOR(15, vpustate->vector_space[240]);
+    TRACE_SPRINTF_VECTOR(16, vpustate->vector_space[256]);
+    TRACE_SPRINTF_VECTOR(17, vpustate->vector_space[272]);
+    TRACE_SPRINTF_VECTOR(18, vpustate->vector_space[288]);
+    TRACE_SPRINTF_VECTOR(19, vpustate->vector_space[304]);
+    TRACE_SPRINTF_VECTOR(20, vpustate->vector_space[320]);
+    TRACE_SPRINTF_VECTOR(21, vpustate->vector_space[336]);
+    TRACE_SPRINTF_VECTOR(22, vpustate->vector_space[352]);
+    TRACE_SPRINTF_VECTOR(23, vpustate->vector_space[368]);
+    TRACE_SPRINTF_VECTOR(24, vpustate->vector_space[384]);
+    TRACE_SPRINTF_VECTOR(25, vpustate->vector_space[400]);
+    TRACE_SPRINTF_VECTOR(26, vpustate->vector_space[416]);
+    TRACE_SPRINTF_VECTOR(27, vpustate->vector_space[432]);
+    TRACE_SPRINTF_VECTOR(28, vpustate->vector_space[448]);
+    TRACE_SPRINTF_VECTOR(29, vpustate->vector_space[464]);
+    TRACE_SPRINTF_VECTOR(30, vpustate->vector_space[480]);
+    TRACE_SPRINTF_VECTOR(31, vpustate->vector_space[496]);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"VXCSR\">0x%x</reg>\n", vpustate->vxcsr);
+    TRACE_SPRINTF("\t\t\t</vpu>\n");
+}
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_capture_FPU_reg
+//
+//  DESCRIPTION:
+//  Capture all FPU registers.
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_capture_FPU_reg(struct i387_fxsave_struct *fpu)
+{
+
+/*
+    Get FPU contents from the registers instead of the PCB.
+    fxsave on L1OM saves only the x87 FPU registers and not the SSE2 and MMX registers.
+    For format of the data below refer Intel 64 and IA-32 Arch. SDM Vol 2A Instr Set Ref A-M
+    tables 3-59 & 3-60.
+*/
+    mictc_fxsave(fpu);
+
+    TRACE_SPRINTF("\t\t\t<fp>\n");
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"CW\">0x%x</reg>\n", fpu->cwd);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"SW\">0x%x</reg>\n", fpu->swd);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"TW\">0x%x</reg>\n", (fpu->twd));
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"FCS\">0x%x</reg>\n", (fpu->fcs & 0xffff));
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"OPCODE\">0x%x</reg>\n", fpu->fop);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"FDS\">0x%x</reg>\n", (fpu->fos & 0xffff));
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"FIP\">0x%x</reg>\n", fpu->fip);
+    TRACE_SPRINTF("\t\t\t\t<reg name=\"DATAOP\">0x%x</reg>\n", (fpu->foo));
+    PrintFPRegister((u8 *)&(fpu->st_space[0]), 0);
+    PrintFPRegister((u8 *)&(fpu->st_space[4]), 1);
+    PrintFPRegister((u8 *)&(fpu->st_space[8]), 2);
+    PrintFPRegister((u8 *)&(fpu->st_space[12]), 3);
+    PrintFPRegister((u8 *)&(fpu->st_space[16]), 4);
+    PrintFPRegister((u8 *)&(fpu->st_space[20]), 5);
+    PrintFPRegister((u8 *)&(fpu->st_space[24]), 6);
+    PrintFPRegister((u8 *)&(fpu->st_space[28]), 7);
+    TRACE_SPRINTF("\t\t\t</fp>\n");
+
+#if 0
+    printk("00 %08x %08x\n", ((u32*)fpu)[0], ((u32*)fpu)[1]);
+    printk("08 %08x %08x\n", ((u32*)fpu)[2], ((u32*)fpu)[3]);
+    printk("10 %08x %08x\n", ((u32*)fpu)[4], ((u32*)fpu)[5]);
+    printk("18 %08x %08x\n", ((u32*)fpu)[6], ((u32*)fpu)[7]);
+    printk("20 %08x %08x\n", ((u32*)fpu)[8], ((u32*)fpu)[9]);
+    printk("28 %08x %08x\n", ((u32*)fpu)[10], ((u32*)fpu)[11]);
+    printk("30 %08x %08x\n", ((u32*)fpu)[12], ((u32*)fpu)[13]);
+    printk("38 %08x %08x\n", ((u32*)fpu)[14], ((u32*)fpu)[15]);
+    printk("40 %08x %08x\n", ((u32*)fpu)[16], ((u32*)fpu)[17]);
+    printk("48 %08x %08x\n", ((u32*)fpu)[18], ((u32*)fpu)[19]);
+    printk("50 %08x %08x\n", ((u32*)fpu)[20], ((u32*)fpu)[21]);
+    printk("58 %08x %08x\n", ((u32*)fpu)[22], ((u32*)fpu)[23]);
+    printk("60 %08x %08x\n", ((u32*)fpu)[24], ((u32*)fpu)[25]);
+    printk("68 %08x %08x\n", ((u32*)fpu)[26], ((u32*)fpu)[27]);
+    printk("70 %08x %08x\n", ((u32*)fpu)[28], ((u32*)fpu)[29]);
+    printk("78 %08x %08x\n", ((u32*)fpu)[30], ((u32*)fpu)[31]);
+    printk("80 %08x %08x\n", ((u32*)fpu)[32], ((u32*)fpu)[33]);
+    printk("88 %08x %08x\n", ((u32*)fpu)[34], ((u32*)fpu)[35]);
+    printk("90 %08x %08x\n", ((u32*)fpu)[36], ((u32*)fpu)[37]);
+    printk("98 %08x %08x\n", ((u32*)fpu)[38], ((u32*)fpu)[39]);
+#endif
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_capture_MSR
+//
+//  DESCRIPTION:
+//  Capture all MSR
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_capture_MSR(void)
+{
+  //    u32 me_cpu = PCPU_GET(cpuid);
+#if 0
+    //msr->msrMIC_CR_SPUBASE       = tc_rdmsr(MIC_CR_SPUBASE);
+    //msr->msrIA32_CR_MISC         = tc_rdmsr(IA32_CR_MISC);
+    //msr->msrWMT_CR_LASTBRANCH_0  = tc_rdmsr(WMT_CR_LASTBRANCH_0);
+    //msr->msrWMT_CR_LASTBRANCH_1  = tc_rdmsr(WMT_CR_LASTBRANCH_1);
+    msr->msrVMX_MSR_BASE         = tc_rdmsr(VMX_MSR_BASE);
+    msr->msrVMX_MSR_BASE_PLUS_1  = tc_rdmsr(VMX_MSR_BASE_PLUS_1);
+    msr->msrVMX_MSR_BASE_PLUS_2  = tc_rdmsr(VMX_MSR_BASE_PLUS_2);
+    msr->msrVMX_MSR_BASE_PLUS_3  = tc_rdmsr(VMX_MSR_BASE_PLUS_3);
+    msr->msrVMX_MSR_BASE_PLUS_4  = tc_rdmsr(VMX_MSR_BASE_PLUS_4);
+    msr->msrVMX_MSR_BASE_PLUS_5  = tc_rdmsr(VMX_MSR_BASE_PLUS_5);
+    msr->msrVMX_MSR_BASE_PLUS_6  = tc_rdmsr(VMX_MSR_BASE_PLUS_6);
+    msr->msrVMX_MSR_BASE_PLUS_7  = tc_rdmsr(VMX_MSR_BASE_PLUS_7);
+    msr->msrVMX_MSR_BASE_PLUS_8  = tc_rdmsr(VMX_MSR_BASE_PLUS_8);
+    msr->msrVMX_MSR_BASE_PLUS_9  = tc_rdmsr(VMX_MSR_BASE_PLUS_9);
+    msr->msrTIME                 = tc_rdmsr(TIME);
+    msr->msrPINFO                = tc_rdmsr(PINFO);
+#endif
+    TRACE_SPRINTF("\t\t\t<msr>\n");
+    TRACE_SPRINTF_MSR(P6_CR_TSC);
+    TRACE_SPRINTF_MSR(X86_CR_APICBASE);
+    TRACE_SPRINTF_MSR(CBOX_SPU_PA_MSR);
+    // This is being added since it is included in the ITP dump as well.
+    TRACE_SPRINTF("\t\t\t\t<reg addr=\"0x%x\">0x%llx</reg>\n", SPU_BASE, (tc_rdmsr(CBOX_SPU_PA_MSR) & 0x7fffffffffffffff) + 0x1000);
+    TRACE_SPRINTF_MSR(CBOX_SPU_SAMPLER_BIND_MSR);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysMask0);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysMask1);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysMask2);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysMask3);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysMask4);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysMask5);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysMask6);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysMask7);
+    TRACE_SPRINTF_MSR(MSR_EFER & ~0x800); // Force bit 11 to 0
+    TRACE_SPRINTF_MSR(MSR_SF_MASK);
+    TRACE_SPRINTF_MSR(MSR_FSBASE);
+    TRACE_SPRINTF_MSR(MSR_GSBASE);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRcap);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRdefType);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysBase2);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysBase0);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysBase1);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysBase3);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysBase4);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysBase5);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysBase6);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRphysBase7);
+    TRACE_SPRINTF_MSR(STAR);
+    TRACE_SPRINTF_MSR(LSTAR);
+
+    // MSR_KGSBASE needs some special handling.
+    // On Silicon when a thread transitions from Ring 3->Ring 0 the
+    // first instruction it executes is swapgs which swaps the value
+    // of the current GSBase (which could be 0x0) with the value in
+    // MSR_KGSBASE to get to the per cpu data structure and onwards to the kernel stack.
+    // On Silicon, when the same thread transitions from Ring 0->Ring 3 MSR_KGSBASE gets
+    // the right value as a result of another swapgs on the way back.
+    // Where Trace Capture differs from Silicon is that we take a snapshot while executing
+    // in Ring 0 (when MSR_KGSBASE could be 0x0) but the first instruction
+    // which executes on LarrySim is a Ring 3 instruction.
+    // On the first syscall in LarrySim when it executes a swapgs it sees a MSR_KGSBASE value of 0x0.
+    // LarrySim cannot get to the kernel stack and we correctly hit a double fault (Bang!).
+    // The correct fix is to ensure that LarrySim sees a correct value of
+    // MSR_KGSBASE when it is provided a snapshot.
+//FIXME
+//    TRACE_SPRINTF("\t\t\t\t<reg addr=\"0x%x\">0x%lx</reg>\n", MSR_KGSBASE, &__pcpu[me_cpu]);
+
+    // The following MSR's are currently ifdef'd out
+    // because LarrySim barfs on these.
+    // We might need these later.
+#if 0
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix64K_00000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix16K_80000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix16K_A0000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix4K_C0000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix4K_C8000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix4K_D0000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix4K_D8000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix4K_E0000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix4K_E8000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix4K_F0000);
+    TRACE_SPRINTF_MSR(X86_CR_MTRRfix4K_F8000);
+    TRACE_SPRINTF_MSR(P5_MC_ADDR);
+    TRACE_SPRINTF_MSR(P5_MC_TYPE);
+    TRACE_SPRINTF_MSR(MSR_TR1);
+    TRACE_SPRINTF_MSR(MSR_TR2);
+    TRACE_SPRINTF_MSR(MSR_TR3);
+    TRACE_SPRINTF_MSR(MSR_TR4);
+    TRACE_SPRINTF_MSR(MSR_TR5);
+    TRACE_SPRINTF_MSR(MSR_TR6);
+    TRACE_SPRINTF_MSR(MSR_TR7);
+    TRACE_SPRINTF_MSR(MSR_TR9);
+    TRACE_SPRINTF_MSR(MSR_TR10);
+    TRACE_SPRINTF_MSR(MSR_TR11);
+    TRACE_SPRINTF_MSR(MSR_TR12);
+    TRACE_SPRINTF_MSR(IA32_APIC_BASE);
+    TRACE_SPRINTF_MSR(IA32_TIME_STAMP_COUNTER);
+    TRACE_SPRINTF_MSR(IA32_PerfCntr0);
+    TRACE_SPRINTF_MSR(IA32_PerfCntr1);
+    TRACE_SPRINTF_MSR(IA32_PerfCntr2);
+    TRACE_SPRINTF_MSR(IA32_PerfCntr3);
+    TRACE_SPRINTF_MSR(PerfFilteredCntr0);
+    TRACE_SPRINTF_MSR(PerfFilteredCntr1);
+    TRACE_SPRINTF_MSR(PerfFilteredCntr2);
+    TRACE_SPRINTF_MSR(PerfFilteredCntr3);
+    TRACE_SPRINTF_MSR(IA32_PerfEvtSel0);
+    TRACE_SPRINTF_MSR(IA32_PerfEvtSel1);
+    TRACE_SPRINTF_MSR(IA32_PerfEvtSel2);
+    TRACE_SPRINTF_MSR(IA32_PerfEvtSel3);
+    TRACE_SPRINTF_MSR(PerfFilterMask);
+    TRACE_SPRINTF_MSR(IA32_PERF_GLOBAL_STATUS);
+    TRACE_SPRINTF_MSR(IA32_PERF_GLOBAL_OVF_CONTROL);
+    TRACE_SPRINTF_MSR(IA32_PERF_GLOBAL_CTRL);    
+    TRACE_SPRINTF_MSR(IA32_MCG_CTL);
+    TRACE_SPRINTF_MSR(IA32_MC0_CTRL);
+    TRACE_SPRINTF_MSR(IA32_MC0_STAT);
+    TRACE_SPRINTF_MSR(IA32_MC0_ADDR);
+    TRACE_SPRINTF_MSR(IA32_MC0_MISC);
+    TRACE_SPRINTF_MSR(IA32_MC1_CTRL);
+    TRACE_SPRINTF_MSR(IA32_MC1_STAT);
+    TRACE_SPRINTF_MSR(IA32_MC1_ADDR);
+    TRACE_SPRINTF_MSR(IA32_MC1_MISC);
+    TRACE_SPRINTF_MSR(SYSCALL_FLAG_MASK);
+    TRACE_SPRINTF_MSR(X86_PAT);
+#endif
+    TRACE_SPRINTF("\t\t\t</msr>\n");
+}
+
+
+//u64 rdtsccount = 0, dmasetuptime = 0, dmacomptime=0, hostacktime=0;
+
+#if MIC_TRACE_CAPTURE_MEMORY_TEST
+// Local function to count the number of bytes in a U32
+// This is only used for the memory test.
+static U32 AddBytes(U32 add)
+{
+    U32 sum = 0x0;
+    for (int i=0; i < sizeof(U32); i++)
+    {
+        sum += (add & 0xFF);
+        add = (add >> 8);
+    }
+    return sum;
+}
+#endif
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_capture_memory
+//
+//  DESCRIPTION:
+//  Trace Capture IPI Handler
+//
+//  PARAMETERS: None
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static int
+mictc_capture_memory(void)
+{
+	long err;
+	long i;
+	long delay_count;
+	long total_transfered = 0;
+
+	g_sizeXferred = 0;
+
+	// Transfer a full buffer.
+	for (i = 0; total_transfered < (max_pfn << PAGE_SHIFT); i++) {
+	  printk("before scif_writeto, i = %ld\n", i);
+
+	  // Transfer any remainder
+	  if ((max_pfn << PAGE_SHIFT) - total_transfered < MICTC_MEM_BUFFER_SIZE) {
+	    long remainder = ((uint64_t)max_pfn <<  PAGE_SHIFT) % MICTC_MEM_BUFFER_SIZE;
+
+	    printk("Writing %ld bytes, max_pfn = %ld\n", remainder, max_pfn);
+
+	    if ((err = scif_writeto(mictc_endp_data, scif_offset_mem + (i * MICTC_MEM_BUFFER_SIZE),
+				    remainder, scif_offset_dst, 0)) < 0) {
+	      pr_crit("%s:%s:%d scif_writeto failed with error %ld\n", __FILE__, __FUNCTION__, __LINE__, err);
+	      return 1;
+	    }
+	    total_transfered += remainder;
+	    g_sizeXferred = remainder;
+	  } else {
+	    if ((err = scif_writeto(mictc_endp_data, scif_offset_mem + (i * MICTC_MEM_BUFFER_SIZE),
+				    MICTC_MEM_BUFFER_SIZE, scif_offset_dst, 0)) < 0) {
+	      pr_crit("%s:%s:%d scif_writeto failed with error %ld\n", __FILE__, __FUNCTION__, __LINE__, err);
+	      return 1;
+	    }
+	    total_transfered += MICTC_MEM_BUFFER_SIZE;
+	    g_sizeXferred = MICTC_MEM_BUFFER_SIZE;
+	  }
+	  *g_traceBufferSizeOffset = g_sizeXferred;
+	  printk("before fence\n");
+	  err = scif_fence_signal(mictc_endp_data, (off_t)scif_offset_xml + TRACE_STATUS_OFFSET,
+				  TRACE_PAGE_READY, 0, 0, SCIF_FENCE_INIT_SELF | SCIF_SIGNAL_LOCAL);
+
+	  if (err < 0) {
+	    printk("scif_fence_signal failed.  err = %ld\n", err);
+	    return 1;
+	  }
+	  printk("TRACE_PAGE_READY %lld bytes\n", g_sizeXferred);
+	  g_sizeXferred = 0;
+
+	  delay_count = 0;
+	  printk("waiting for TRACE_HOST_READY\n");
+
+	  while (*g_traceBufferStatusOffset != TRACE_HOST_READY) {
+	    cpu_relax();
+	    delay_count++;
+	    if (delay_count == TRACE_CAPTURE_TIMEOUT) {
+	      printk("Memory Dump Timeout. Host did not update @physAddr 0x%lx\n", i << PAGE_SHIFT);
+	      return -EBUSY;
+	    }
+	  }
+	}
+      *g_traceBufferSizeOffset = 0;
+      *g_traceBufferStatusOffset = TRACE_MEM_COMPLETE;
+
+      delay_count = 0;
+
+      while (*g_traceBufferStatusOffset != TRACE_COMPLETE) {
+	cpu_relax();
+	delay_count++;
+	if (delay_count == TRACE_CAPTURE_TIMEOUT) {
+	  printk("Trace completion timeout.\n");
+	  return -EBUSY;
+	}
+      }
+
+      return 0;
+}
+
+
+//------------------------------------------------------------------------------
+//  FUNCTION: mictc_trace_capture
+//
+//  DESCRIPTION:
+//  Perform all the tasks related to Trace Capture
+//  for a particular Hardware Thread.
+//  The tasks currently include:
+//  	General purpose registers
+//	Segment registers
+//	Debug registers
+//	Control registers
+//	VPU registers
+//	MSRs
+//
+//	Note:  The SPU is not setup in Linux.
+//
+//  PARAMETERS: regs - pointer to the task's registers
+//
+//  RETURNS: None
+//
+//  TODOS:
+//
+static void
+mictc_trace_capture(struct pt_regs *regs)
+{
+    long delay_count;
+
+//    printk("Entering mictc_trace_capture on cpu %d, for process = %s\n", smp_processor_id(), current->comm);
+
+    // Logic to let threads in one by one in order
+
+    while (atomic_read(&cpus_stopped) != smp_processor_id()) {
+	    cpu_relax();
+//STH	    touch_nmi_watchdog();
+    }
+
+    if (smp_processor_id() == 0)
+    {
+        // CPU0 is responsible for preparing the
+        // Trace Capture Header.
+	mictc_prep_header();
+    }
+
+    TRACE_SPRINTF("\t\t<cpu num=\"%d\">\n", smp_processor_id());
+    mictc_capture_general_purpose_reg(regs);
+    mictc_capture_segment_reg(&(trace->segment), regs);
+    mictc_capture_debug_reg();
+    mictc_capture_control_reg();
+    mictc_capture_vector_reg(&(trace->vpustate));
+
+//STH    touch_nmi_watchdog();	// Just to be safe
+
+    // The SPU is not setup currently in Linux
+    if (always_false) mictc_capture_SPU_reg();
+
+    mictc_capture_FPU_reg(&(trace->fpu));
+    mictc_capture_MSR();
+
+//    printk("In mictc_trace_capture on cpu %d, after MSRs\n", smp_processor_id());
+
+    TRACE_SPRINTF("\t\t</cpu>\n");
+
+    // Each core should flush their caches
+    // as the initiator is going to take a memory
+    // dump soon after.
+    // Not required since DMA should snoop the caches.
+    //wbinvd();
+
+//    printk("In mictc_trace_capture on cpu %d, before check for last cpu\n", smp_processor_id());
+
+    if (smp_processor_id() == (num_online_cpus() - 1))
+    {
+        // The last CPU is responsible for preparing the
+        // Trace Capture Trailer.
+        TRACE_SPRINTF("\t</cpu_state>\n");
+
+        TRACE_SPRINTF("</arch_data>\n");
+
+        // Update the size as the Host App needs this information.
+        *g_traceBufferSizeOffset = g_sizeXferred;
+
+        g_sizeXferred = 0;
+
+        // Update the status for the Host App. The CPU register state has been written by all
+        // the hardware threads. The host app polls for this status.
+        *g_traceBufferStatusOffset = TRACE_REG_COMPLETE;
+
+        printk("Completed Arch Dump. Now Beginning Memory Dump. Be patient (~1 min is ETA)..\n");
+
+	delay_count = 0;
+
+        while (*g_traceBufferStatusOffset != TRACE_GET_FILE)
+        {
+	    cpu_relax();
+            delay_count++;
+            if (delay_count == TRACE_CAPTURE_TIMEOUT)
+            {
+                printk("Arch Dump Timeout. Host did not update status.\n");
+                break;
+            }
+        }
+	printk("%s out of wait loop.\n", __FUNCTION__);
+    }
+
+//    printk("Exiting mictc_trace_capture on cpu %d\n", smp_processor_id());
+}
+
+
+// Starting point for trace_capture.
+static void
+mictc_start_capture(void)
+{
+	long ret;
+	long err;
+        struct scif_portID portID_data;
+	int control_msg = 0;
+	int i;
+	int found_it = 0;
+
+	spin_lock(&mictc_lock);
+	printk("Starting tracecapture on cpu %d.  Taking lock.\n", smp_processor_id());
+
+	if (!(g_traceBufferAllocated = kmalloc(MICTC_XML_BUFFER_SIZE, GFP_KERNEL))) {
+		pr_crit("%s:%s:%d kmalloc failed failed with ENOMEM\n", __FILE__, __FUNCTION__, __LINE__);
+		goto done0;
+	}
+
+	pr_crit("%s:%s:%d kmalloc returned %llx\n", __FILE__, __FUNCTION__, __LINE__, (uint64_t)g_traceBufferAllocated);
+
+	g_traceBufferStatusOffset  = (u64*)((u64)g_traceBufferAllocated + TRACE_STATUS_OFFSET);
+	g_traceBufferSizeOffset    = (u64*)((u64)g_traceBufferAllocated + TRACE_SIZE_OFFSET);
+	g_traceBufferDataOffset    = (u32*)((u64)g_traceBufferAllocated + TRACE_DATA_OFFSET);
+	g_traceBufferTriggerOffset  = (u32*)((u64)g_traceBufferAllocated + TRACE_TRIGGER_OFFSET);
+
+	*g_traceBufferStatusOffset = TRACE_DATA;
+#if MIC_TRACE_CAPTURE_MEMORY_TEST
+	g_traceBufferChecksumOffset  = (u64*)((u64)g_traceBufferAllocated + TRACE_CHECKSUM_OFFSET);
+#endif
+
+	if (!(trace = (struct mictc_trace *)kmalloc(sizeof(struct mictc_trace), GFP_KERNEL))) {
+		pr_crit("%s:%s:%d kmalloc failed failed with ENOMEM\n", __FILE__, __FUNCTION__, __LINE__);
+		goto done1a;
+	}
+
+	pr_crit("%s:%s:%d kmalloc returned %llx\n", __FILE__, __FUNCTION__, __LINE__, (uint64_t)trace);
+
+	memset(trace, 0, sizeof(struct mictc_trace));
+
+	pr_crit("g_traceBufferStatusOffset %llx\n", (uint64_t)g_traceBufferStatusOffset);
+	pr_crit("g_traceBufferSizeOffset   %llx\n", (uint64_t)g_traceBufferSizeOffset);
+	pr_crit("g_traceBufferDataOffset   %llx\n", (uint64_t)g_traceBufferDataOffset);
+
+	// Data channel
+	if (!(mictc_endp_data = scif_open())) {
+		pr_crit("%s:%s:%d scif_open failed with ENOMEM\n", __FILE__, __FUNCTION__, __LINE__);
+		return;
+	}
+
+	if ((ret = scif_bind(mictc_endp_data, MICTC_SCIF_PORT_DATA)) < 0) {
+		pr_crit("%s:%s:%d scif_bind failed with error %ld\n", __FILE__, __FUNCTION__, __LINE__, ret);
+		goto done1;
+	}
+
+	portID_data.node = 0;
+	portID_data.port = MICTC_SCIF_PORT_DATA;
+
+	if ((ret = scif_connect(mictc_endp_data, &portID_data)) < 0) {
+		pr_crit("%s:%s:%d scif_connect failed with error %ld\n", __FILE__, __FUNCTION__, __LINE__, ret);
+		goto done1;
+	}
+
+	if ((ret = (long)scif_register(mictc_endp_data,
+				       g_traceBufferAllocated,
+				       MICTC_XML_BUFFER_SIZE,
+				       0,             // suggested_offset,
+				       SCIF_PROT_READ | SCIF_PROT_WRITE,
+				       SCIF_MAP_KERNEL)) < 0) {
+	  if (ret > -300) {
+	    pr_crit("%s:%s:%d scif_register failed failed with %ld\n", __FILE__, __FUNCTION__, __LINE__, ret);
+	    goto done2;
+	  }
+	}
+	scif_offset_xml = ret;
+	pr_crit("%s:%s:%d scif_register scif_offset_xml = %lx\n", __FILE__, __FUNCTION__, __LINE__, scif_offset_xml);
+
+	// Register all of physical memory.
+	if ((ret = (long)scif_register(mictc_endp_data,
+				       __va(0),	// Physical page 0
+				       max_pfn  << PAGE_SHIFT,
+				       0, 	// suggested_offset,
+				       SCIF_PROT_READ | SCIF_PROT_WRITE,
+				       SCIF_MAP_KERNEL)) < 0) {
+	  if (ret > -300) {
+	    pr_crit("%s:%s:%d scif_register failed failed with %ld\n", __FILE__, __FUNCTION__, __LINE__, ret);
+	    goto done2;
+	  }
+	}
+	scif_offset_mem = ret;
+	pr_crit("%s:%s:%d scif_register scif_offset_mem = %lx\n", __FILE__, __FUNCTION__, __LINE__, scif_offset_mem);
+
+	BARRIER(mictc_endp_data, "before barrier");
+
+	if ((err = scif_recv(mictc_endp_data, &scif_offset_dst, sizeof(scif_offset_dst), SCIF_RECV_BLOCK)) <= 0) {
+	  pr_crit("%s:%s:%d scif_recv failed with err %ld\n", __FILE__, __FUNCTION__, __LINE__, err);
+	  goto close;
+	}
+
+//	g_traceBufferDataOffset = (u32 *)ret;
+//	pr_crit("%s:%s:%d scif_register ret %lx\n", __FILE__, __FUNCTION__, __LINE__, scif_offset);
+
+	if ((err = scif_send(mictc_endp_data, &scif_offset_xml, sizeof(scif_offset_xml), SCIF_SEND_BLOCK)) <= 0) {
+	  pr_crit("%s:%s:%d scif_send failed with err %ld\n", __FILE__, __FUNCTION__, __LINE__, err);
+	  goto close;
+	}
+
+	while (*g_traceBufferStatusOffset != TRACE_HOST_READY)
+	  {
+	    msleep(100);
+	    touch_nmi_watchdog();
+	  }
+
+	// Get trigger data.
+	for (i = 0; i < TRACE_TRIGGER_MAX; i++) {
+	  g_traceTriggers[i] = *g_traceBufferTriggerOffset;
+	  printk("Found trace trigger %d\n", g_traceTriggers[i]);
+	  g_traceBufferTriggerOffset++;
+
+	  if (g_traceTriggers[i] == TRACE_EOL) break;
+	}
+
+	// Is the trigger data empty?  If so, accept everything.
+	if (g_traceTriggers[0] == TRACE_EOL) {
+		printk("Trace trigger data is empty.\n");
+		found_it = 1;
+	} else if (g_traceTriggers[0] == TRACE_IGNORE) {
+		printk("Ignoring current trace.");
+	} else {
+		// See if g_traceCurrentTrigger is in the trigger data.
+		// If not, abort this trace.
+		for (i = 0; i < TRACE_TRIGGER_MAX; i++) {
+			if (g_traceTriggers[i] == TRACE_EOL) break;
+
+			if (g_traceTriggers[i] == g_traceCurrentTrigger) {
+	  			found_it = 1;
+				printk("Matched trace trigger %d\n", g_traceTriggers[i]);
+				break;
+			}
+		}
+	}
+
+	if (!found_it) {
+		// Abort this trace
+		printk("Trace trigger did not match -- aborting.\n");
+		*g_traceBufferStatusOffset = TRACE_ABORTED;
+		goto done3;
+	}
+
+	if (always_false) {
+	  // Mmap memory at 0xfee03000 physical.
+	  spu_addr = ioremap(0xfee03000, 0x1000);
+	  if (! spu_addr) {
+	    pr_crit("%s ioremap failed.\n", __FUNCTION__);
+	    goto done3;
+	  }
+	  printk("CPU ioremap %p\n", spu_addr);
+	}
+
+	cli;			// Interrupts off
+	atomic_set(&cpus_stopped, 0);
+	atomic_set(&cpus_released, 0);
+	// Send IPI to capture all other cpus.
+	apic->send_IPI_allbutself(NMI_VECTOR);
+	mictc_trace_capture(task_pt_regs(current));
+	atomic_inc(&cpus_stopped);
+
+	pr_debug("start_capture:  Entering wait loop until lock count %d >= %d on cpu %d\n", atomic_read(&cpus_stopped), num_online_cpus() - 1, smp_processor_id());
+
+	{ int ctr = 0;
+	  // Wait for every other CPU to finish its trace capture tasks.
+	  while (atomic_read(&cpus_stopped) < num_online_cpus()) {
+	    cpu_relax();
+//STH	    touch_nmi_watchdog();
+	    if (ctr++ > 1000000) {
+	      ctr = 0;
+	      printk("%s:%d *** waiting loop cpus_stopped = %d\n", __FUNCTION__, __LINE__, atomic_read(&cpus_stopped));
+	    }
+	  }
+	}
+
+	printk("%s out of wait loop.\n", __FUNCTION__);
+
+        // Get a memory dump here before exiting.
+        err = mictc_capture_memory();
+
+        printk("Completed Memory Dump.\n");
+//        printk("Completed Memory Dump. DMASetuptime = %ld , DMATime = %ld, HostAckTime = %ld\n", dmasetuptime, dmacomptime, hostacktime);
+
+	// Now release all cores.
+	atomic_set(&cpus_stopped, num_online_cpus() + 1);
+
+	// Wait for every other CPU to be released
+	while (atomic_read(&cpus_released) < num_online_cpus() - 1) {
+	  //	  msleep(2000);
+	  cpu_relax();
+	  touch_nmi_watchdog();
+	}
+	sti;			// Interrupts on
+
+	// FIXME This cleanup probably needs to be checked.
+ close:
+	if (always_false) {
+	  iounmap(spu_addr);
+	}
+ done3:
+//	scif_unregister(mictc_endp_data, scif_offset, MICTC_XML_BUFFER_SIZE);
+ done2:
+ done1:
+	scif_close(mictc_endp_data);
+	kfree(trace);
+ done1a:
+	kfree(g_traceBufferAllocated);
+	spin_unlock(&mictc_lock);
+ done0:
+	printk("Ending tracecapture on cpu %d.  Releasing lock.\n", smp_processor_id());
+}
+EXPORT_SYMBOL(mictc_start_capture);
+
+
+/*
+ * mictc_handle_exception() - main entry point from a kernel exception
+ *
+ * Locking hierarchy:
+ *	interface locks, if any (begin_session)
+ */
+int
+mictc_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
+{
+	// Interrupts are off.
+
+	//	printk("Entering mictc_handle_exception on cpu %d  pid: %d, name: %s\n", smp_processor_id(), current->pid, current->comm);
+
+	mictc_trace_capture(regs);
+	atomic_inc(&cpus_stopped);
+	pr_debug("handler:  Entering wait loop until lock count %d >= %d on cpu %d\n", atomic_read(&cpus_stopped), num_online_cpus() - 1, smp_processor_id());
+	// Wait for every other CPU to finish its Trace Capture Tasks.
+	// This test is for num_online_cpus+1 to hold all threads that are
+	// in interrupt context so that the main thread can dump memory.
+	while (atomic_read(&cpus_stopped) < num_online_cpus() + 1) {
+	  cpu_relax();
+//STH	  touch_nmi_watchdog();
+	}
+
+	atomic_inc(&cpus_released);
+
+	printk("Exiting mictc_handle_exception on cpu %d %s\n", smp_processor_id(), current->comm);
+	return 1;
+}
+
+
+static int __mictc_notify(struct die_args *args, unsigned long cmd)
+{
+	struct pt_regs *regs = args->regs;
+#if 0
+	switch (cmd) {
+	case DIE_NMI:
+		if (atomic_read(&kgdb_active) != -1) {
+			/* KGDB CPU roundup */
+			kgdb_nmicallback(smp_processor_id(), regs);
+			was_in_debug_nmi[smp_processor_id()] = 1;
+			touch_nmi_watchdog();
+			return NOTIFY_STOP;
+		}
+		return NOTIFY_DONE;
+
+	case DIE_NMIUNKNOWN:
+		if (was_in_debug_nmi[smp_processor_id()]) {
+			was_in_debug_nmi[smp_processor_id()] = 0;
+			return NOTIFY_STOP;
+		}
+		return NOTIFY_DONE;
+
+	case DIE_DEBUG:
+		if (atomic_read(&kgdb_cpu_doing_single_step) != -1) {
+			if (user_mode(regs))
+				return single_step_cont(regs, args);
+			break;
+		} else if (test_thread_flag(TIF_SINGLESTEP))
+			/* This means a user thread is single stepping
+			 * a system call which should be ignored
+			 */
+			return NOTIFY_DONE;
+		/* fall through */
+	default:
+		if (user_mode(regs))
+			return NOTIFY_DONE;
+	}
+#endif
+	if (cmd == DIE_NMI) {
+	  if (mictc_handle_exception(args->trapnr, args->signr, cmd, regs)) {
+	    touch_nmi_watchdog();
+	    return NOTIFY_STOP;
+	  }
+	} else {
+	  touch_nmi_watchdog();
+	  return NOTIFY_DONE;
+	}
+
+	/* Must touch watchdog before return to normal operation */
+	touch_nmi_watchdog();
+	return NOTIFY_STOP;
+}
+
+
+static int
+mictc_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __mictc_notify(ptr, cmd);
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+
+/* 
+ * This function is called whenever a process tries to do an ioctl on our
+ * device file. We get two extra parameters (additional to the inode and file
+ * structures, which all device functions get): the number of the ioctl called
+ * and the parameter given to the ioctl function.
+ *
+ * If the ioctl is write or read/write (meaning output is returned to the
+ * calling process), the ioctl call returns the output of this function.
+ *
+ */
+long device_ioctl(
+		 struct file *file,	/* ditto */
+		 unsigned int ioctl_num,	/* number and param for ioctl */
+		 unsigned long ioctl_param)
+{
+	// Switch according to the ioctl called
+	switch (ioctl_num) {
+	case MICTC_START_CAPTURE:
+
+		// ioctl_param contains the trace trigger number.
+		// Save it to check against the g_traceTrigger array.
+		g_traceCurrentTrigger = (u32)ioctl_param;
+		printk("IOCTL trace trigger %ld\n", ioctl_param);
+		mictc_start_capture();
+		break;
+	default:
+		printk("Invalid ioctl.\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+
+/* 
+ * This is called whenever a process attempts to open the device file 
+ */
+static int device_open(struct inode *inode, struct file *file)
+{
+#ifdef DEBUG
+	printk(KERN_INFO "device_open(%p)\n", file);
+#endif
+
+	/* 
+	 * We don't want to talk to two processes at the same time 
+	 */
+	if (Device_Open)
+		return -EBUSY;
+
+	Device_Open++;
+	try_module_get(THIS_MODULE);
+	return 0;
+}
+
+static int device_release(struct inode *inode, struct file *file)
+{
+#ifdef DEBUG
+	printk(KERN_INFO "device_release(%p,%p)\n", inode, file);
+#endif
+
+	/* 
+	 * We're now ready for our next caller 
+	 */
+	Device_Open--;
+
+	module_put(THIS_MODULE);
+	return 0;
+}
+
+
+/* 
+ * This structure will hold the functions to be called
+ * when a process does something to the device we
+ * created. Since a pointer to this structure is kept in
+ * the devices table, it can't be local to
+ * init_module. NULL is for unimplemented functions. 
+ */
+struct file_operations Fops = {
+  //	.read = device_read,
+  //	.write = device_write,
+	.unlocked_ioctl = device_ioctl,
+	.open = device_open,
+	.release = device_release,	/* a.k.a. close */
+};
+
+static struct notifier_block mictc_notifier = {
+	.notifier_call	= mictc_notify,
+	.priority = 0x7fffffff /* we need to be notified first */
+};
+
+
+/*
+ *	mictc_init - Register our notifier
+ *
+ */
+static
+int mictc_init(void)
+{
+	int ret_val;
+	/* 
+	 * Register the character device (atleast try) 
+	 */
+	ret_val = register_chrdev(MICTC_MAJOR_NUM, MICTC_DEVICE_NAME, &Fops);
+
+	/* 
+	 * Negative values signify an error 
+	 */
+	if (ret_val < 0) {
+		printk(KERN_ALERT "%s failed with %d\n",
+		       "Sorry, registering the character device ", ret_val);
+		return ret_val;
+	}
+
+	printk(KERN_INFO "%s The major device number is %d.\n",
+	       "Registeration is a success", MICTC_MAJOR_NUM);
+	printk(KERN_INFO "To use trace capture you'll have to create a device file:\n");
+	printk(KERN_INFO "mknod %s c %d 0\n", MICTC_FILE_NAME, MICTC_MAJOR_NUM);
+
+	return register_die_notifier(&mictc_notifier);
+
+}
+
+
+static
+void mictc_exit(void)
+{
+	return;
+}
+
+module_init(mictc_init);
+module_exit(mictc_exit);
+
+MODULE_AUTHOR("Intel Corp. 2011 (sth " __DATE__ ") ver " TC_VER);
+MODULE_DESCRIPTION("Trace Capture module for K1OM");
+MODULE_LICENSE("GPL");
diff --git a/trace_capture/trace_capture.h b/trace_capture/trace_capture.h
new file mode 100644
index 0000000..b793dff
--- /dev/null
+++ b/trace_capture/trace_capture.h
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+/*
+ * Trace Capture module common declarations
+ *
+ * Contains configuration, constants and function prototypes
+ * for the Trace Capture module.
+ */
+
+#ifndef _MICTC_H_
+#define _MICTC_H_	1
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+//#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+#include <linux/kdebug.h>
+#include <linux/slab.h>
+#include <linux/ratelimit.h>
+#include <linux/fs.h>
+
+#include <asm/uaccess.h>	// for get_user and put_user
+#include <asm/processor.h>
+#include <asm/debugreg.h>
+#include <asm/apicdef.h>
+#include <asm/system.h>
+#include <asm/apic.h>
+#include <asm/nmi.h>
+#include <asm/irq_regs.h>
+#include <asm/svm.h>
+#include <asm/desc.h>
+#include <linux/ioctl.h>
+
+#ifndef __SCIF_H__
+#include <scif.h>
+#endif
+
+/*
+ * Version info: M.NP
+ */
+
+#define	TC_MAJOR	"0"
+#define	TC_MINOR	"1"
+#define	TC_PATCH	"a"
+#define TC_VER		TC_MAJOR "." TC_MINOR  TC_PATCH
+
+// These are common to the Host App
+// and the MIC driver Trace Capture Feature
+// COMMON DEFINES START HERE
+enum TRACE_COMMAND
+{
+    TRACE_NOP = 100,
+    TRACE_DATA,
+    TRACE_HOST_READY,
+    TRACE_DONE,
+    TRACE_ERROR,
+    TRACE_PRINT,
+    TRACE_GET_FILE,
+    TRACE_PAGE_READY,
+    TRACE_REG_COMPLETE,
+    TRACE_MEM_COMPLETE,
+    TRACE_COMPLETE,
+    TRACE_ABORTED
+};
+
+// IOCTL
+#define MICTC_MAJOR_NUM		's'
+#define MICTC_DEVICE_NAME	"trace_capture"
+#define MICTC_FILE_NAME		"/dev/trace_capture"
+
+#define MICTC_START_CAPTURE	_IOW(MICTC_MAJOR_NUM, 0xff, int)
+
+// Use 2MB for KNF and 4MB for K1OM (auto-detected).
+#define MICTC_XML_BUFFER_SIZE (2 * 1024UL * 1024UL)
+
+#define MICTC_MEM_BUFFER_SIZE (1 * 1024UL * 1024UL * 1024UL)
+
+// Shared memory constants
+#define TRACE_STATUS_OFFSET   8
+#define TRACE_SIZE_OFFSET     16
+
+// Enable/Disable Memory Test.
+// This MUST be enabled simultaneously on Host App as well.
+#define MIC_TRACE_CAPTURE_MEMORY_TEST 0
+
+#if MIC_TRACE_CAPTURE_MEMORY_TEST
+#define TRACE_CHECKSUM_OFFSET 24
+#endif
+
+#define TRACE_TRIGGER_MAX	10
+#define TRACE_TRIGGER_OFFSET	28
+#define TRACE_DATA_OFFSET	4096
+
+// Used to indicate the end of the list for trace triggers.
+#define TRACE_EOL 0xffffffff
+// Used for trace counts to indicate that the driver should ignore current trace.
+// Only meaningful when it is first in the list of trace triggers -- the entries
+// after it are ignored.  Trace counts supersede trace triggers.
+#define TRACE_IGNORE 0xfffffffe
+
+// Types of Triggers - Refer to uOS Trace Capture Wiki for Usage
+// Generic counter
+#define TRACE_HOST_GENERIC_COUNTER 0x1
+// Async Flip counter
+#define TRACE_HOST_FRAME_COUNTER   0x2
+// COMMON DEFINES END HERE
+
+// MSR's defined in the trace file sent during REQs
+// Are these all valid for L1OM??
+#define P6_CR_TSC                    0x10
+#define X86_CR_APICBASE              0x1b
+#define MIC_CR_SPUBASE               0x1c
+#define IA32_CR_MISC                 0x1a0
+#define WMT_CR_LASTBRANCH_0          0x1db
+#define WMT_CR_LASTBRANCH_1          0x1dc
+#define X86_CR_MTRRphysMask0         0x201
+#define X86_CR_MTRRphysMask1         0x203
+#define X86_CR_MTRRphysMask2         0x205
+#define X86_CR_MTRRphysMask3         0x207
+#define X86_CR_MTRRphysMask4         0x209
+#define X86_CR_MTRRphysMask5         0x20b
+#define X86_CR_MTRRphysMask6         0x20d
+#define X86_CR_MTRRphysMask7         0x20f
+#define IA32_CR_PAT                  0x277
+#define IA32_MTRR_DEF_TYPE           0x2ff
+#define VMX_MSR_BASE                 0x480
+#define VMX_MSR_BASE_PLUS_1          0x481
+#define VMX_MSR_BASE_PLUS_2          0x482
+#define VMX_MSR_BASE_PLUS_3          0x483
+#define VMX_MSR_BASE_PLUS_4          0x484
+#define VMX_MSR_BASE_PLUS_5          0x485
+#define VMX_MSR_BASE_PLUS_6          0x486
+#define VMX_MSR_BASE_PLUS_7          0x487
+#define VMX_MSR_BASE_PLUS_8          0x488
+#define VMX_MSR_BASE_PLUS_9          0x489
+#define TIME                         0x4711
+#define PINFO                        0x4712
+#define X86_CR_MTRRdefType           0x2ff
+#define X86_CR_MTRRcap               0xfe
+#define X86_CR_MTRRphysBase0         0x200
+#define X86_CR_MTRRphysBase1         0x202
+#define X86_CR_MTRRphysBase2         0x204
+#define X86_CR_MTRRphysBase3         0x206
+#define X86_CR_MTRRphysBase4         0x208
+#define X86_CR_MTRRphysBase5         0x20a
+#define X86_CR_MTRRphysBase6         0x20c
+#define X86_CR_MTRRphysBase7         0x20e
+#define X86_CR_MTRRfix64K_00000      0x250
+#define X86_CR_MTRRfix16K_80000      0x258
+#define X86_CR_MTRRfix16K_A0000      0x259
+#define X86_CR_MTRRfix4K_C0000       0x268
+#define X86_CR_MTRRfix4K_C8000       0x269
+#define X86_CR_MTRRfix4K_D0000       0x26a
+#define X86_CR_MTRRfix4K_D8000       0x26b
+#define X86_CR_MTRRfix4K_E0000       0x26c
+#define X86_CR_MTRRfix4K_E8000       0x26d
+#define X86_CR_MTRRfix4K_F0000       0x26e
+#define X86_CR_MTRRfix4K_F8000       0x26f
+#define P5_MC_ADDR                   0x0
+#define P5_MC_TYPE                   0x1
+#define MSR_TR1                      0x2
+#define MSR_TR2                      0x4
+#define MSR_TR3                      0x5
+#define MSR_TR4                      0x6
+#define MSR_TR5                      0x7
+#define MSR_TR6                      0x8
+#define MSR_TR7                      0x9
+#define MSR_TR9                      0xb
+#define MSR_TR10                     0xc
+#define MSR_TR11                     0xd
+#define MSR_TR12                     0xe
+#define IA32_APIC_BASE               0x1b
+#define IA32_TIME_STAMP_COUNTER      0x10
+#define IA32_PerfCntr0               0x20
+#define IA32_PerfCntr1               0x21
+#define IA32_PerfCntr2               0x22
+#define IA32_PerfCntr3               0x23
+#define PerfFilteredCntr0            0x24
+#define PerfFilteredCntr1            0x25
+#define PerfFilteredCntr2            0x26
+#define PerfFilteredCntr3            0x27
+#define IA32_PerfEvtSel0             0x28
+#define IA32_PerfEvtSel1             0x29
+#define IA32_PerfEvtSel2             0x2a
+#define IA32_PerfEvtSel3             0x2b
+#define PerfFilterMask               0x2c
+#define IA32_PERF_GLOBAL_STATUS      0x2d
+#define IA32_PERF_GLOBAL_OVF_CONTROL 0x2e
+#define IA32_PERF_GLOBAL_CTRL        0x2f
+#define IA32_MCG_CTL                 0x17b
+#define IA32_MC0_CTRL                0x400
+#define IA32_MC0_STAT                0x401
+#define IA32_MC0_ADDR                0x402
+#define IA32_MC0_MISC                0x403
+#define IA32_MC1_CTRL                0x404
+#define IA32_MC1_STAT                0x405
+#define IA32_MC1_ADDR                0x406
+#define IA32_MC1_MISC                0x407
+#define STAR                         0xc0000081
+#define LSTAR                        0xc0000082
+#define SYSCALL_FLAG_MASK            0xc0000084
+#define X86_PAT                      0x277
+#define SPU_BASE                     0x1C
+
+
+#endif	/* Recursion block */
diff --git a/udev-mic.rules b/udev-mic.rules
new file mode 100644
index 0000000..3930e21
--- /dev/null
+++ b/udev-mic.rules
@@ -0,0 +1,9 @@
+# do not edit this file, it will be overwritten on update
+# initramfs:default
+
+# MIC SCIF
+KERNEL=="scif", ACTION=="add", NAME="mic/%k",MODE="0666", RUN+="/bin/chmod og+x /dev/mic"
+KERNEL=="ctrl", ACTION=="add", NAME="mic/%k", MODE="0666"
+
+# Bring up network interfaces manually on rhel7 after module reload
+KERNEL=="mic*", SUBSYSTEM=="net", RUN+="/bin/sh -c '/bin/grep 7. /etc/redhat-release && /sbin/ifup %k'"
diff --git a/vcons/Kbuild b/vcons/Kbuild
new file mode 100644
index 0000000..ffaf350
--- /dev/null
+++ b/vcons/Kbuild
@@ -0,0 +1,3 @@
+michvc-objs := hvc_mic.o
+
+obj-m := michvc.o
diff --git a/vcons/hvc_console.h b/vcons/hvc_console.h
new file mode 100644
index 0000000..54381eb
--- /dev/null
+++ b/vcons/hvc_console.h
@@ -0,0 +1,119 @@
+/*
+ * hvc_console.h
+ * Copyright (C) 2005 IBM Corporation
+ *
+ * Author(s):
+ * 	Ryan S. Arnold <rsa@us.ibm.com>
+ *
+ * hvc_console header information:
+ *      moved here from arch/powerpc/include/asm/hvconsole.h
+ *      and drivers/char/hvc_console.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#ifndef HVC_CONSOLE_H
+#define HVC_CONSOLE_H
+#include <linux/kref.h>
+#include <linux/tty.h>
+#include <linux/spinlock.h>
+
+/*
+ * This is the max number of console adapters that can/will be found as
+ * console devices on first stage console init.  Any number beyond this range
+ * can't be used as a console device but is still a valid tty device.
+ */
+#define MAX_NR_HVC_CONSOLES	16
+
+/*
+ * The Linux TTY code does not support dynamic addition of tty derived devices
+ * so we need to know how many tty devices we might need when space is allocated
+ * for the tty device.  Since this driver supports hotplug of vty adapters we
+ * need to make sure we have enough allocated.
+ */
+#define HVC_ALLOC_TTY_ADAPTERS	8
+
+struct hvc_struct {
+	spinlock_t lock;
+	int index;
+	struct tty_struct *tty;
+	int count;
+	int do_wakeup;
+	char *outbuf;
+	int outbuf_size;
+	int n_outbuf;
+	uint32_t vtermno;
+	const struct hv_ops *ops;
+	int irq_requested;
+	int data;
+	struct winsize ws;
+	struct work_struct tty_resize;
+	struct list_head next;
+	struct kref kref; /* ref count & hvc_struct lifetime */
+};
+
+/* implemented by a low level driver */
+struct hv_ops {
+	int (*get_chars)(uint32_t vtermno, char *buf, int count);
+	int (*put_chars)(uint32_t vtermno, const char *buf, int count);
+
+	/* Callbacks for notification. Called in open, close and hangup */
+	int (*notifier_add)(struct hvc_struct *hp, int irq);
+	void (*notifier_del)(struct hvc_struct *hp, int irq);
+	void (*notifier_hangup)(struct hvc_struct *hp, int irq);
+};
+
+/* Register a vterm and a slot index for use as a console (console_init) */
+extern int hvc_instantiate(uint32_t vtermno, int index,
+			   const struct hv_ops *ops);
+
+/* register a vterm for hvc tty operation (module_init or hotplug add) */
+extern struct hvc_struct * hvc_alloc(uint32_t vtermno, int data,
+				     const struct hv_ops *ops, int outbuf_size);
+/* remove a vterm from hvc tty operation (module_exit or hotplug remove) */
+extern int hvc_remove(struct hvc_struct *hp);
+
+/* data available */
+int hvc_poll(struct hvc_struct *hp);
+void hvc_kick(void);
+
+/* Resize hvc tty terminal window */
+extern void __hvc_resize(struct hvc_struct *hp, struct winsize ws);
+
+static inline void hvc_resize(struct hvc_struct *hp, struct winsize ws)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&hp->lock, flags);
+	__hvc_resize(hp, ws);
+	spin_unlock_irqrestore(&hp->lock, flags);
+}
+
+/* default notifier for irq based notification */
+extern int notifier_add_irq(struct hvc_struct *hp, int data);
+extern void notifier_del_irq(struct hvc_struct *hp, int data);
+extern void notifier_hangup_irq(struct hvc_struct *hp, int data);
+
+
+#if defined(CONFIG_XMON) && defined(CONFIG_SMP)
+#include <asm/xmon.h>
+#else
+static inline int cpus_are_in_xmon(void)
+{
+	return 0;
+}
+#endif
+
+#endif // HVC_CONSOLE_H
diff --git a/vcons/hvc_mic.c b/vcons/hvc_mic.c
new file mode 100644
index 0000000..21640c0
--- /dev/null
+++ b/vcons/hvc_mic.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/module.h>
+#include <linux/mm.h>
+#include "hvc_console.h"
+#include <mic/micscif_rb.h>
+#include <mic/micvcons.h>
+#include <asm/io.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/capability.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <linux/kernel.h>
+#include <linux/mm_types.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/irqflags.h>
+#include <linux/time.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/semaphore.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <asm/bug.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <asm/atomic.h>
+#include <linux/netdevice.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <asm/mic/mic_common.h>
+
+#define MIC_COOKIE 	0xc0c0
+#define MIC_KNC		1
+
+static long vcons_hdr_addr;
+static struct micscif_rb mic_out_buf;
+static struct micscif_rb mic_in_buf;
+
+struct vcons_info {
+	struct vcons_buf *hdr;
+	struct vcons_mic_header *mic_hdr;
+	char *vcons_op_buf;
+	char *vcons_ip_buf;
+};
+
+static struct vcons_info vcons_info;
+static int dbg = 0;
+
+/* Receive data from the host (mic i/p buffer) */
+static int hvc_mic_get_chars(uint32_t vt, char *buf, int count)
+{
+	int ret, len, get_count;
+
+	len = micscif_rb_count(&mic_in_buf, count);
+	get_count = min(len, count);
+	ret = micscif_rb_get_next(&mic_in_buf, buf, get_count);
+	if (ret == get_count)
+		micscif_rb_update_read_ptr(&mic_in_buf);
+
+	return ret;
+}
+
+/* Send data to the host (mic o/p buffer) */
+static int hvc_mic_put_chars(uint32_t vt, const char *buf, int count)
+{
+	int ret;
+	int put_count;
+	volatile int *host_status =
+		(volatile int *)&vcons_info.mic_hdr->host_status;
+	
+	put_count = min(micscif_rb_space(&mic_out_buf), count);
+	if (put_count) {
+		ret = micscif_rb_write(&mic_out_buf, (void *)buf, put_count);
+		BUG_ON(ret);
+		micscif_rb_commit(&mic_out_buf);
+	} else if (*host_status != MIC_VCONS_HOST_OPEN)
+		return count;
+	return put_count;
+}
+
+
+static irqreturn_t hvc_mic_handle_interrupt(int irq, void *dev_id)
+{
+	struct hvc_struct *hp = (struct hvc_struct *)dev_id;
+	if (hvc_poll(hp)) {
+		hvc_kick();
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+static int hvc_mic_notifier_add_irq(struct hvc_struct *hp, int irq)
+{
+	int ret = request_irq(get_sbox_irq(HVC_SBOX_INT_IDX),
+				hvc_mic_handle_interrupt, IRQF_DISABLED,
+				"hvc intr", hp);
+	if (ret) {
+		printk("Unable to register interrupt\n");
+		return ret;
+	}
+	hp->irq_requested = 1;
+	return 0;
+}
+
+static void hvc_mic_notifier_del_irq(struct hvc_struct *hp, int irq)
+{
+	if (hp->irq_requested) {
+		free_irq(get_sbox_irq(HVC_SBOX_INT_IDX), hp);
+		hp->irq_requested = 0;
+	}
+}
+
+static void hvc_mic_notifier_hangup_irq(struct hvc_struct *hp, int irq)
+{
+	hvc_mic_notifier_del_irq(hp, irq);
+}
+
+static const struct hv_ops hvc_mic_ops = {
+	.get_chars = hvc_mic_get_chars,
+	.put_chars = hvc_mic_put_chars,
+	.notifier_add = hvc_mic_notifier_add_irq,
+	.notifier_del = hvc_mic_notifier_del_irq,
+	.notifier_hangup = hvc_mic_notifier_hangup_irq,
+};
+
+static void dump_vcons_hdr(struct vcons_buf *hdr)
+{
+	printk(KERN_ERR "host_magic\t%x\n", readl(&hdr->host_magic));
+	printk(KERN_ERR "mic_magic\t%x\n", readl(&hdr->mic_magic));
+	printk(KERN_ERR "o_buf_dma_addr\t%x\n", readl(&hdr->o_buf_dma_addr));
+	printk(KERN_ERR "o_wr\t%x\n", readl(&hdr->o_wr));
+	printk(KERN_ERR "o_size\t%x\n", readl(&hdr->o_size));
+	printk(KERN_ERR "i_hdr_addr\t%lx\n", readq(&hdr->i_hdr_addr));
+	printk(KERN_ERR "i_buf_addr\t%lx\n", readq(&hdr->i_buf_addr));
+	printk(KERN_ERR "i_rd\t%x\n", readl(&hdr->i_rd));
+}
+
+static int mic_cons_init(void)
+{
+	int rc;
+
+	if ((rc = hvc_instantiate(MIC_COOKIE, 0, &hvc_mic_ops)))
+		printk(KERN_ERR "error instantiating hvc console\n");
+
+	return rc;
+}
+
+static struct hvc_struct *hp;
+static int __init hvc_mic_init(void)
+{
+	struct vcons_buf *hdr = NULL;
+	struct vcons_buf tmp_hdr;
+	int err = 0;
+	char *hvc_buf;
+	u8 card_type=0;
+	uint16_t host_rb_ver, mic_rb_ver;
+
+#if defined(CONFIG_MK1OM)
+	card_type = MIC_KNC;
+#endif
+	hvc_buf = (char *)get_zeroed_page(GFP_KERNEL);
+	if (!hvc_buf) {
+		printk(KERN_ERR "unable to allocate vcons buffer\n");
+		return -ENOMEM;
+	}
+	if (card_type == MIC_KNC) {
+		vcons_info.vcons_ip_buf = hvc_buf;
+		vcons_info.mic_hdr = (struct vcons_mic_header *)kzalloc(sizeof(struct vcons_mic_header), GFP_KERNEL);
+		if (!vcons_info.mic_hdr) {
+			free_page((unsigned long)hvc_buf);
+			printk(KERN_ERR "unable to allocate vcons header\n");
+			return -ENOMEM;
+		}
+	} else {
+		vcons_info.vcons_ip_buf = hvc_buf + PAGE_SIZE/2;
+		vcons_info.mic_hdr = (struct vcons_mic_header *)hvc_buf;
+	}
+
+	vcons_info.hdr = hdr = ioremap_nocache(vcons_hdr_addr, 
+			sizeof(struct vcons_buf));
+	if (!hdr) {
+		printk(KERN_ERR "unable to map vcons header\n");
+		err = -ENOMEM;
+		goto error;
+	}
+
+	if (dbg)
+		dump_vcons_hdr(hdr);
+
+	if (readl(&hdr->host_magic) != MIC_HOST_VCONS_READY) {
+		printk(KERN_ERR "host not ready, giving up\n");
+		err = -ENODEV;
+		goto error;
+	}
+
+	host_rb_ver = readw(&hdr->host_rb_ver);
+	mic_rb_ver = micscif_rb_get_version();
+	writew(mic_rb_ver, &hdr->mic_rb_ver);
+	if (host_rb_ver != mic_rb_ver) {
+		printk(KERN_ERR "Card and host ring buffer versions mismatch.");
+		printk(KERN_ERR "Card ver: %d, Host ver: %d \n", mic_rb_ver,
+								host_rb_ver);
+		writel(MIC_VCONS_RB_VER_ERR, &hdr->mic_magic);
+		err = -ENXIO;
+		goto error;
+	}
+	memcpy_fromio(&tmp_hdr, hdr, sizeof(struct vcons_buf));
+
+	if (!(vcons_info.vcons_op_buf = ioremap_nocache(tmp_hdr.o_buf_dma_addr, 
+							tmp_hdr.o_size))) {
+		printk(KERN_ERR "unable to map vcons output buffer\n");
+		err = -ENOMEM;
+		goto error;
+	}
+
+	tmp_hdr.i_hdr_addr = virt_to_phys(vcons_info.mic_hdr);
+	tmp_hdr.i_buf_addr = virt_to_phys(vcons_info.vcons_ip_buf);
+
+	if (card_type == MIC_KNC)
+		tmp_hdr.i_size = PAGE_SIZE;
+	else
+		tmp_hdr.i_size = PAGE_SIZE/2;
+
+	micscif_rb_init(&mic_out_buf, (volatile uint32_t *)&vcons_info.mic_hdr->o_rd,
+			(volatile uint32_t *)&hdr->o_wr,
+			(volatile uint32_t *)vcons_info.vcons_op_buf,
+			tmp_hdr.o_size);
+
+	micscif_rb_init(&mic_in_buf,
+			(volatile uint32_t *)&hdr->i_rd,
+			(volatile uint32_t *)&vcons_info.mic_hdr->i_wr,
+			(volatile uint32_t *)vcons_info.vcons_ip_buf,
+			tmp_hdr.i_size);
+
+	mic_cons_init();
+	hp = hvc_alloc(MIC_COOKIE, 2, &hvc_mic_ops, 128);
+
+	if (IS_ERR(hp)) {
+		printk(KERN_ERR "unable to allocate hvc console\n");
+		err = PTR_ERR(hp);
+	} else {
+		writeq(tmp_hdr.i_hdr_addr, &hdr->i_hdr_addr);
+		writeq(tmp_hdr.i_buf_addr, &hdr->i_buf_addr);
+		writel(tmp_hdr.i_size, &hdr->i_size);
+		writel(MIC_VCONS_READY, &hdr->mic_magic);
+		if (dbg)
+			dump_vcons_hdr(hdr);
+
+		return 0;
+	}
+error:
+	if (hdr)
+		iounmap(hdr);
+	if (vcons_info.vcons_op_buf)
+		iounmap(vcons_info.vcons_op_buf);
+#if defined(CONFIG_MK1OM)
+	free_page((unsigned long)vcons_info.vcons_ip_buf);
+	kfree(vcons_info.mic_hdr);
+#else
+	free_page((unsigned long)vcons_info.mic_hdr);
+#endif
+	return err;
+}
+
+static void __exit hvc_mic_exit(void)
+{
+	char buf[8];
+	int ret, len;
+
+	writel(0, &vcons_info.hdr->mic_magic);
+
+	do {
+		len = micscif_rb_count(&mic_in_buf, sizeof(buf));
+		ret = micscif_rb_get_next(&mic_in_buf, buf,
+				min(len, (int)sizeof(buf)));
+	} while (ret > 0);
+
+	iounmap(vcons_info.hdr);
+	iounmap(vcons_info.vcons_op_buf);
+#if defined(CONFIG_MK1OM)
+	free_page((unsigned long)vcons_info.vcons_ip_buf);
+	kfree(vcons_info.mic_hdr);
+#else
+	free_page((unsigned long)vcons_info.mic_hdr);
+#endif
+	if (hp)
+		hvc_remove(hp);
+}
+
+MODULE_PARM_DESC(vcons_hdr_addr, "mic address of vcons hdr");
+module_param(vcons_hdr_addr, long, S_IRUGO);
+module_param(dbg, int, S_IRUGO);
+MODULE_LICENSE("GPL");
+module_init(hvc_mic_init);
+module_exit(hvc_mic_exit);
+
diff --git a/virtio/Kbuild b/virtio/Kbuild
new file mode 100644
index 0000000..a0033e5
--- /dev/null
+++ b/virtio/Kbuild
@@ -0,0 +1,2 @@
+obj-m += mic_virtblk.o
+
diff --git a/virtio/mic_virtblk.c b/virtio/mic_virtblk.c
new file mode 100644
index 0000000..356b48f
--- /dev/null
+++ b/virtio/mic_virtblk.c
@@ -0,0 +1,862 @@
+/*
+  virtio block device adapted for MIC.
+  copied from drivers/block/virtio_blk.c of Linux kernel
+  It is initially commited by
+  Rusty Russell <rusty@rustcorp.com.au>  2007-10-21 18:03:38
+  with SHA1 ID, e467cde238184d1b0923db2cd61ae1c5a6dc15aa
+
+  drivers/block/virtio_blk.c of Linux kernel does not have copyright notice.
+
+ * For adapting to MIC
+ * (C) Copyright 2012 Intel Corporation
+ * Author: Caz Yokoyama <Caz.Yokoyama@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ */
+//#define DEBUG
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_blk.h>
+#include <linux/scatterlist.h>
+#include <linux/list.h>
+#include "mic_common.h"
+#include "mic/micveth_dma.h"
+#include "mic/micscif_intr.h"
+#include "mic/mic_virtio.h"
+
+#define SBOX_MMIO_LENGTH	(64 * 1024)
+
+#define PART_BITS 4
+
+#define VIRTQUEUE_LENGTH 128
+#define MIC_VRING_ALIGN PAGE_SIZE
+
+#define INTERRUPT_ID_FOR_VIRTBLK 3
+
+extern int get_sbox_irq(int index);
+
+static int major, index = 0;
+static long virtio_addr = 0;
+static mic_data_t virtblk_mic_data;
+
+struct virtio_blk
+{
+	spinlock_t lock;
+
+	struct virtio_device *vdev;
+	struct virtqueue *vq;
+
+	/* The disk structure for the kernel. */
+	struct gendisk *disk;
+
+	/* Request tracking. */
+	struct list_head reqs;
+
+	mempool_t *pool;
+
+	/* virtual address of blk_config */
+	void __iomem *ioaddr;
+
+	/* What host tells us, plus 2 for header & tailer. */
+	unsigned int sg_elems;
+
+	/* sbox va */
+	u8 *sbox;
+
+	/* Scatterlist: can be too big for stack. */
+	struct scatterlist sg[/*sg_elems*/];
+};
+
+struct virtblk_req
+{
+	struct list_head list;
+	struct request *req;
+	struct virtio_blk_outhdr out_hdr;
+	struct virtio_scsi_inhdr in_hdr;
+	u8 status;
+};
+
+#define blk_pc_request(rq)     ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
+
+/* The following vring_virtqueue and to_vvq() are copied from virtio_ring.c. Please name sure you have the same structure
+   as in virtio_ring.c. The reason why they are copied is that I don't want to change virtio_ring.c which is a symbolic link.
+*/
+struct vring_virtqueue
+{
+	struct virtqueue vq;
+
+	/* Actual memory layout for this queue */
+	struct vring vring;
+
+	/* Other side has made a mess, don't try any more. */
+	bool broken;
+
+	/* Host supports indirect buffers */
+	bool indirect;
+
+	/* Number of free buffers */
+	unsigned int num_free;
+	/* Head of free buffer list. */
+	unsigned int free_head;
+	/* Number we've added since last sync. */
+	unsigned int num_added;
+
+	/* Last used index we've seen. */
+	u16 last_used_idx;
+
+	/* How to notify other side. FIXME: commonalize hcalls! */
+	void (*notify)(struct virtqueue *vq);
+
+#ifdef DEBUG
+	/* They're supposed to lock for us. */
+	unsigned int in_use;
+#endif
+
+	struct _mic_ctx_t *mic_ctx;
+	/* Tokens for callbacks. */
+	void *data[];
+};
+
+#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
+
+static void blk_done(struct virtqueue *vq)
+{
+	struct virtio_blk *vblk = vq->vdev->priv;
+	struct virtblk_req *vbr;
+	unsigned int len;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vblk->lock, flags);
+	while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
+		int error;
+
+		switch (vbr->status) {
+		case VIRTIO_BLK_S_OK:
+			error = 0;
+			break;
+		case VIRTIO_BLK_S_UNSUPP:
+			error = -ENOTTY;
+			break;
+		default:
+			error = -EIO;
+			break;
+		}
+
+		if (blk_pc_request(vbr->req)) {
+			vbr->req->resid_len = vbr->in_hdr.residual;
+			vbr->req->sense_len = vbr->in_hdr.sense_len;
+			vbr->req->errors = vbr->in_hdr.errors;
+		}
+
+		__blk_end_request_all(vbr->req, error);
+		list_del(&vbr->list);
+		mempool_free(vbr, vblk->pool);
+	}
+	/* In case queue is stopped waiting for more buffers. */
+	blk_start_queue(vblk->disk->queue);
+	spin_unlock_irqrestore(&vblk->lock, flags);
+}
+
+static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
+		   struct request *req)
+{
+	unsigned long num, out = 0, in = 0;
+	struct virtblk_req *vbr;
+
+	vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
+	if (!vbr)
+		/* When another request finishes we'll try again. */
+		return false;
+
+	vbr->req = req;
+
+	if (req->cmd_flags & REQ_FLUSH) {
+		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+		vbr->out_hdr.sector = 0;
+		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+	} else {
+		switch (req->cmd_type) {
+		case REQ_TYPE_FS:
+			vbr->out_hdr.type = 0;
+			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		case REQ_TYPE_BLOCK_PC:
+			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		case REQ_TYPE_SPECIAL:
+			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		default:
+			/* We don't put anything else in the queue. */
+			BUG();
+		}
+	}
+
+	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
+
+	/*
+	 * If this is a packet command we need a couple of additional headers.
+	 * Behind the normal outhdr we put a segment with the scsi command
+	 * block, and before the normal inhdr we put the sense data and the
+	 * inhdr with additional status information before the normal inhdr.
+	 */
+	if (blk_pc_request(vbr->req))
+		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
+
+	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
+
+	if (blk_pc_request(vbr->req)) {
+		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
+		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
+			   sizeof(vbr->in_hdr));
+	}
+
+	sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
+		   sizeof(vbr->status));
+
+	if (num) {
+		if (rq_data_dir(vbr->req) == WRITE) {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
+			out += num;
+		} else {
+			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
+			in += num;
+		}
+	}
+
+	if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
+		mempool_free(vbr, vblk->pool);
+		return false;
+	}
+
+	list_add_tail(&vbr->list, &vblk->reqs);
+	return true;
+}
+
+static void do_virtblk_request(struct request_queue *q)
+{
+	struct virtio_blk *vblk = q->queuedata;
+	struct request *req;
+	unsigned int issued = 0;
+
+	while ((req = blk_peek_request(q)) != NULL) {
+		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
+
+		/* If this request fails, stop queue and wait for something to
+		   finish to restart it. */
+		if (!do_req(q, vblk, req)) {
+			blk_stop_queue(q);
+			break;
+		}
+		blk_start_request(req);
+		issued++;
+	}
+
+	if (issued)
+		virtqueue_kick(vblk->vq);
+}
+
+static int
+set_capacity_from_host(struct virtio_blk *vblk)
+{
+	struct virtio_device *vdev = vblk->vdev;
+	u64 cap;
+
+	/* Host must always specify the capacity. */
+	vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
+			  &cap, sizeof(cap));
+	if (cap == 0) {
+		printk(KERN_ERR "Have you set virtblk file?\n");
+		return -ENXIO;
+	}
+
+	/* If capacity is too big, truncate with warning. */
+	if ((sector_t)cap != cap) {
+		dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
+			 (unsigned long long)cap);
+		cap = (sector_t)-1;
+	}
+	set_capacity(vblk->disk, cap);
+
+	return 0;
+}
+
+static int
+virtblk_open(struct block_device *bdev, fmode_t mode)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct virtio_blk *vblk = disk->private_data;
+
+	return set_capacity_from_host(vblk);
+}
+
+static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
+			 unsigned cmd, unsigned long data)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct virtio_blk *vblk = disk->private_data;
+
+	/*
+	 * Only allow the generic SCSI ioctls if the host can support it.
+	 */
+	if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
+		return -ENOTTY;
+
+	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
+			      (void __user *)data);
+}
+
+/* We provide getgeo only to please some old bootloader/partitioning tools */
+static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
+{
+	struct virtio_blk *vblk = bd->bd_disk->private_data;
+	struct virtio_blk_geometry vgeo;
+	int err;
+
+	/* see if the host passed in geometry config */
+	err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
+				offsetof(struct virtio_blk_config, geometry),
+				&vgeo);
+
+	if (!err) {
+		geo->heads = vgeo.heads;
+		geo->sectors = vgeo.sectors;
+		geo->cylinders = vgeo.cylinders;
+	} else {
+		/* some standard values, similar to sd */
+		geo->heads = 1 << 6;
+		geo->sectors = 1 << 5;
+		geo->cylinders = get_capacity(bd->bd_disk) >> 11;
+	}
+	return 0;
+}
+
+static const struct block_device_operations virtblk_fops = {
+	.open = virtblk_open,
+	.ioctl = virtblk_ioctl,
+	.owner  = THIS_MODULE,
+	.getgeo = virtblk_getgeo,
+};
+
+static int index_to_minor(int index)
+{
+	return index << PART_BITS;
+}
+ 
+static inline bool more_used(const struct vring_virtqueue *vq)
+{
+  return vq->last_used_idx != vq->vring.used->idx;
+}
+
+static irqreturn_t
+mic_virtblk_intr_handler(int irq, void *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	if (!more_used(vq)) {
+		pr_debug("virtqueue interrupt with no work for %p\n", vq);
+		goto _exit_;
+	}
+
+	if (unlikely(vq->broken))
+		goto _exit_;
+
+	pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
+	if (vq->vq.callback)
+		vq->vq.callback(&vq->vq);
+
+ _exit_:
+	return IRQ_HANDLED;
+}
+
+static int __devinit virtblk_probe(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk;
+	struct request_queue *q;
+	int err;
+	u32 v, blk_size, sg_elems, opt_io_size;
+	u16 min_io_size;
+	u8 physical_block_exp, alignment_offset;
+	struct board_info *bd_info = virtblk_mic_data.dd_bi[0];
+	struct vb_shared *vb_shared;
+
+	if (index_to_minor(index) >= 1 << MINORBITS)
+		return -ENOSPC;
+
+	vb_shared = ((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared;
+	vdev->features[0] = readl(&vb_shared->host_features);
+
+	/* We need to know how many segments before we allocate. */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
+				offsetof(struct virtio_blk_config, seg_max),
+				&sg_elems);
+	if (err)
+		sg_elems = 1;
+
+	/* We need an extra sg elements at head and tail. */
+	sg_elems += 2;
+	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
+				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
+	if (!vblk) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&vblk->reqs);
+	spin_lock_init(&vblk->lock);
+	vblk->vdev = vdev;
+	vblk->sg_elems = sg_elems;
+	sg_init_table(vblk->sg, vblk->sg_elems);
+
+	/* map sbox */
+	vblk->sbox = ioremap_nocache(SBOX_BASE, SBOX_MMIO_LENGTH);
+	if (!vblk->sbox) {
+		printk(KERN_ERR "%s: NULL SBOX ptr\n", __func__);
+		err = -ENOMEM;
+		goto out_free_vblk;
+	}
+
+	/* We expect one virtqueue, for output. */
+	vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
+	if (IS_ERR(vblk->vq)) {
+		err = PTR_ERR(vblk->vq);
+		goto out_unmap_sbox;
+	}
+
+	if ((err = request_irq(get_sbox_irq(VIRTIO_SBOX_INT_IDX),
+						   mic_virtblk_intr_handler, IRQF_DISABLED,
+						   "virtio intr", vblk->vq))) {
+		printk(KERN_ERR "%s: can't register interrupt: %d\n", __func__, err);
+		goto out_free_vq;
+	}
+
+	vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
+	if (!vblk->pool) {
+		err = -ENOMEM;
+		goto out_free_irq;
+	}
+
+	/* FIXME: How many partitions?  How long is a piece of string? */
+	vblk->disk = alloc_disk(1 << PART_BITS);
+	if (!vblk->disk) {
+		err = -ENOMEM;
+		goto out_mempool;
+	}
+
+	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
+	if (!q) {
+		err = -ENOMEM;
+		goto out_put_disk;
+	}
+
+	q->queuedata = vblk;
+
+	if (index < 26) {
+		sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
+	} else if (index < (26 + 1) * 26) {
+		sprintf(vblk->disk->disk_name, "vd%c%c",
+			'a' + index / 26 - 1, 'a' + index % 26);
+	} else {
+		const unsigned int m1 = (index / 26 - 1) / 26 - 1;
+		const unsigned int m2 = (index / 26 - 1) % 26;
+		const unsigned int m3 =  index % 26;
+		sprintf(vblk->disk->disk_name, "vd%c%c%c",
+			'a' + m1, 'a' + m2, 'a' + m3);
+	}
+
+	vblk->disk->major = major;
+	vblk->disk->first_minor = index_to_minor(index);
+	vblk->disk->private_data = vblk;
+	vblk->disk->fops = &virtblk_fops;
+	vblk->disk->driverfs_dev = NULL;  // There is no parent device.
+	index++;
+
+	/* configure queue flush support */
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
+		blk_queue_flush(q, REQ_FLUSH);
+
+	/* If disk is read-only in the host, the guest should obey */
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) {
+	  if (vdev->config->get_features(vdev) & (1U << VIRTIO_BLK_F_RO)) {
+		set_disk_ro(vblk->disk, 1);
+	  }
+	}
+
+	err = set_capacity_from_host(vblk);
+	if (err)
+		goto out_put_disk;
+
+	/* We can handle whatever the host told us to handle. */
+	blk_queue_max_segments(q, vblk->sg_elems-2);
+
+	/* No need to bounce any requests */
+	blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
+
+	/* No real sector limit. */
+	blk_queue_max_hw_sectors(q, -1U);
+
+	/* Host can optionally specify maximum segment size and number of
+	 * segments. */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
+				offsetof(struct virtio_blk_config, size_max),
+				&v);
+	if (!err)
+		blk_queue_max_segment_size(q, v);
+	else
+		blk_queue_max_segment_size(q, -1U);
+
+	/* Host can optionally specify the block size of the device */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
+				offsetof(struct virtio_blk_config, blk_size),
+				&blk_size);
+	if (!err)
+		blk_queue_logical_block_size(q, blk_size);
+	else
+		blk_size = queue_logical_block_size(q);
+
+	/* Use topology information if available */
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, physical_block_exp),
+			&physical_block_exp);
+	if (!err && physical_block_exp)
+		blk_queue_physical_block_size(q,
+				blk_size * (1 << physical_block_exp));
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, alignment_offset),
+			&alignment_offset);
+	if (!err && alignment_offset)
+		blk_queue_alignment_offset(q, blk_size * alignment_offset);
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, min_io_size),
+			&min_io_size);
+	if (!err && min_io_size)
+		blk_queue_io_min(q, blk_size * min_io_size);
+
+	err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
+			offsetof(struct virtio_blk_config, opt_io_size),
+			&opt_io_size);
+	if (!err && opt_io_size)
+		blk_queue_io_opt(q, blk_size * opt_io_size);
+
+	add_disk(vblk->disk);
+	return 0;
+
+out_put_disk:
+	put_disk(vblk->disk);
+out_mempool:
+	mempool_destroy(vblk->pool);
+out_free_irq:
+	free_irq(get_sbox_irq(VIRTIO_SBOX_INT_IDX), vblk->vq);
+out_free_vq:
+	vdev->config->del_vqs(vdev);
+out_unmap_sbox:
+	iounmap(vblk->sbox);
+out_free_vblk:
+	kfree(vblk);
+out:
+	return err;
+}
+
+static void __devexit virtblk_remove(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+
+	/* Nothing should be pending. */
+	BUG_ON(!list_empty(&vblk->reqs));
+
+	free_irq(get_sbox_irq(VIRTIO_SBOX_INT_IDX), vblk->vq);
+
+	/* Stop all the virtqueues. */
+	vdev->config->reset(vdev);
+
+	del_gendisk(vblk->disk);
+	blk_cleanup_queue(vblk->disk->queue);
+	put_disk(vblk->disk);
+	mempool_destroy(vblk->pool);
+	vdev->config->del_vqs(vdev);
+	iounmap(vblk->sbox);
+	kfree(vblk);
+}
+
+/* config->get_features() implementation */
+static u32 virtblk_get_features(struct virtio_device *vdev)
+{
+	/* When someone needs more than 32 feature bits, we'll need to
+	 * steal a bit to indicate that the rest are somewhere else. */
+	struct board_info *bd_info = virtblk_mic_data.dd_bi[0];
+	struct vb_shared *vb_shared;
+
+	vb_shared = ((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared;
+	return readl(&vb_shared->host_features);
+}
+
+/* virtio config->finalize_features() implementation */
+static void virtblk_finalize_features(struct virtio_device *vdev)
+{
+	struct board_info *bd_info = virtblk_mic_data.dd_bi[0];
+	struct vb_shared *vb_shared;
+
+	/* Give virtio_ring a chance to accept features. */
+	vring_transport_features(vdev);
+
+	/* We only support 32 feature bits. */
+	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
+
+	vb_shared = ((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared;
+	writel(vdev->features[0], &vb_shared->client_features);
+}
+
+/* config->get() implementation */
+static void virtblk_get(struct virtio_device *vdev, unsigned offset,
+		   void *buf, unsigned len)
+{
+	struct board_info *bd_info = virtblk_mic_data.dd_bi[0];
+	struct vb_shared *vb_shared;
+	void *ioaddr;
+	u8 *ptr = buf;
+	int i;
+
+	vb_shared = ((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared;
+	ioaddr = (void *)&vb_shared->blk_config + offset;
+	for (i = 0; i < len; i++)
+		ptr[i] = readb(ioaddr + i);
+}
+
+static void virtblk_reset(struct virtio_device *vdev)
+{
+}
+
+/* the notify function used when creating a virt queue */
+static void virtblk_notify(struct virtqueue *vq)
+{
+	const int doorbell = 2;
+	struct virtio_blk *vblk = vq->vdev->priv;
+	uint32_t db_reg;
+
+	/* Ring host doorbell interrupt */
+	db_reg = readl(vblk->sbox + (SBOX_SDBIC0 + (4 * doorbell)))
+			| SBOX_SDBIC0_DBREQ_BIT;
+	writel(db_reg, vblk->sbox + (SBOX_SDBIC0 + (4 * doorbell)));
+}
+
+/* the config->del_vqs() implementation */
+static void virtblk_del_vqs(struct virtio_device *vdev)
+{
+	struct virtio_blk *vblk = vdev->priv;
+	unsigned long size;
+
+	size = PAGE_ALIGN(vring_size(VIRTQUEUE_LENGTH, MIC_VRING_ALIGN));
+	free_pages_exact(vblk->vq->priv, size);
+
+	vring_del_virtqueue(vblk->vq);
+	vblk->vq = NULL;
+}
+
+/* the config->find_vqs() implementation */
+static int virtblk_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[],
+		       vq_callback_t *callbacks[],
+		       const char *names[])
+{
+	struct virtio_blk *vblk = vdev->priv;
+	struct virtqueue *vq;
+	int err;
+	unsigned long size;
+	void *queue;  /* the virtual address of the ring queue */
+	struct vring_virtqueue *vvq;
+	struct vring *vring;
+	struct board_info *bd_info = virtblk_mic_data.dd_bi[0];
+
+	BUG_ON(nvqs != 1);
+	BUG_ON(vblk == NULL);
+
+	size = PAGE_ALIGN(vring_size(VIRTQUEUE_LENGTH, MIC_VRING_ALIGN));
+	queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
+	if (queue == NULL) {
+		err = -ENOMEM;
+		goto out_info;
+	}
+
+	/* create the vring */
+	vq = vring_new_virtqueue(VIRTQUEUE_LENGTH, MIC_VRING_ALIGN,
+							 vdev, queue, virtblk_notify, callbacks[0], names[0]);
+	if (vq == NULL) {
+		err = -ENOMEM;
+		goto out_activate_queue;
+	}
+	vq->priv = queue;
+
+	vqs[0] = vblk->vq = vq;
+
+	vvq = to_vvq(vq);
+	vring = &((struct mic_virtblk *)bd_info->bi_virtio)->vb_shared->vring;
+	writel(vvq->vring.num, &vring->num);
+	writeq(virt_to_phys(vvq->vring.desc), &vring->desc);
+	writeq(virt_to_phys(vvq->vring.avail), &vring->avail);
+	writeq(virt_to_phys(vvq->vring.used), &vring->used);
+
+	return 0;
+
+out_activate_queue:
+	free_pages_exact(queue, size);
+out_info:
+	return err;
+}
+
+static struct virtio_config_ops virtio_blk_config_ops = {
+	.get		= virtblk_get,
+	//	.set		= vp_set,
+	//	.get_status	= vp_get_status,
+	//	.set_status	= vp_set_status,
+	.reset		= virtblk_reset,
+	.find_vqs	= virtblk_find_vqs,
+	.del_vqs	= virtblk_del_vqs,
+	.get_features	= virtblk_get_features,
+	.finalize_features = virtblk_finalize_features,
+};
+
+static unsigned int features[] = {
+	VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
+	VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
+	VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
+};
+
+/*
+ * virtio_blk causes spurious section mismatch warning by
+ * simultaneously referring to a __devinit and a __devexit function.
+ * Use __refdata to avoid this warning.
+ */
+static struct virtio_driver __refdata virtio_blk = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
+	.driver.name =	KBUILD_MODNAME,
+	.driver.owner =	THIS_MODULE,
+};
+
+struct class block_class = {
+	.name		= "block",
+};
+
+static struct device_type disk_type = {
+	.name		= "disk",
+	/*
+	.groups		= disk_attr_groups,
+	.release	= disk_release,
+	.devnode	= block_devnode,
+	*/
+};
+
+static int __init init(void)
+{
+	bd_info_t *bd_info;
+	struct virtio_device *vdev;
+	struct mic_virtblk *mic_virtblk;
+	int ret;
+	struct vb_shared *vb_shared;
+
+#ifdef CONFIG_ML1OM
+	printk(KERN_ERR "virtio block device is not available on KNF\n");
+	ret = -ENODEV;
+	goto error_return;
+#endif
+	major = register_blkdev(0, "virtblk");
+	if (major < 0) {
+		ret = major;
+		goto error_return;
+	}
+
+	bd_info = kmalloc(sizeof(bd_info_t), GFP_KERNEL);
+	if (bd_info == NULL) {
+	  ret = -ENOMEM;
+	  goto error_return;
+	}
+	memset(bd_info, 0, sizeof(*bd_info));
+	virtblk_mic_data.dd_numdevs = 1;
+	index = 0;
+	virtblk_mic_data.dd_bi[0] = bd_info;
+	bd_info->bi_ctx.bi_id = 0;
+
+	mic_virtblk = kmalloc(sizeof(*mic_virtblk), GFP_KERNEL);
+	if (mic_virtblk == NULL) {
+	  ret = -ENOMEM;
+	  goto free_bd_info;
+	}
+	memset(mic_virtblk, 0, sizeof(*mic_virtblk));
+	bd_info->bi_virtio = (void *)mic_virtblk;
+
+	if (virtio_addr == 0) {
+	  printk(KERN_ERR "virtio address is not passed from host\n");
+	  return -ENODEV;
+	  goto free_mic_virtblk;
+	}
+	vb_shared = ioremap_nocache(virtio_addr, sizeof(*vb_shared));
+	if (vb_shared == NULL) {
+	  ret = -ENODEV;
+	  goto free_mic_virtblk;
+	}
+	vb_shared->update = true;
+	mic_virtblk->vb_shared = vb_shared;
+
+	vdev = kmalloc(sizeof(*vdev), GFP_KERNEL);
+	if (vdev == NULL) {
+	  ret = -ENOMEM;
+	  goto free_mic_virtblk;
+	}
+	memset(vdev, 0, sizeof(*vdev));
+	vdev->config = &virtio_blk_config_ops;
+	INIT_LIST_HEAD(&vdev->vqs);
+	vdev->dev.driver = &virtio_blk.driver;
+	vdev->dev.class = &block_class;
+	vdev->dev.type = &disk_type;
+	device_initialize(&vdev->dev);
+	mic_virtblk->vdev = (void *)vdev;
+
+	return virtblk_probe(vdev);
+
+ free_mic_virtblk:
+	kfree(bd_info->bi_virtio);
+ free_bd_info:
+	kfree(bd_info);
+ error_return:
+	return ret;
+}
+
+static void __exit fini(void)
+{
+	bd_info_t *bd_info = virtblk_mic_data.dd_bi[0];
+	struct mic_virtblk *mic_virtblk = (struct mic_virtblk *)bd_info->bi_virtio;
+
+	unregister_blkdev(major, "virtblk");
+	virtblk_remove(mic_virtblk->vdev);
+	iounmap(mic_virtblk->vb_shared);
+	kfree(mic_virtblk->vdev);
+	kfree(bd_info->bi_virtio);
+	kfree(bd_info);
+}
+module_init(init);
+module_exit(fini);
+
+MODULE_DESCRIPTION("Virtio block driver");
+MODULE_LICENSE("GPL");
+MODULE_PARM_DESC(virtio_addr, "address of virtio related structure");
+module_param(virtio_addr, long, S_IRUGO);
diff --git a/vnet/Kbuild b/vnet/Kbuild
new file mode 100644
index 0000000..492d0ca
--- /dev/null
+++ b/vnet/Kbuild
@@ -0,0 +1,3 @@
+obj-m += intel_micveth.o
+
+intel_micveth-objs := micveth.o micveth_param.o micveth_dma.o
diff --git a/vnet/mic.h b/vnet/mic.h
new file mode 100644
index 0000000..bfd5e81
--- /dev/null
+++ b/vnet/mic.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#ifndef MICDLDR_H
+#define MICDLDR_H
+
+#define MIC_DECONS_DISABLE	0
+#define MIC_DECONS_ENABLE	1
+
+typedef struct mic_upload {
+	int	up_brdnum;
+	int	up_uossize;
+	char	*up_uosbuf;
+	int	up_dcons;
+	int	up_uoslog;
+	int	up_uosreserve;
+} mic_upload_t;
+
+typedef struct mic_sys_config {
+	int	sc_numCards;
+} mic_sys_config_t;
+
+#define UOS_NOT_BOOTED		0
+#define	UOS_BOOTING		1
+#define	UOS_BOOT_FAILED		2
+#define	UOS_BOOT_SUCCEED	3
+#define	UOS_RUNNING		4
+#define	UOS_WEDGED		5
+#define UOS_UNKNOWN		6
+
+#define PCI_VENDOR_INTEL	0x8086
+
+#define PCI_SPEED_GEN1		1
+#define PCI_SPEED_GEN2		2
+
+#define GDDR_VENDOR_SAMSUNG	1
+#define GDDR_VENDOR_QIMONDA	2
+#define GDDR_VENDOR_HYNIX	6
+
+#define GDDR_DENSITY_512MB	0
+#define GDDR_DENSITY_1GB	1
+
+typedef struct mic_brd_config {
+	int		bc_brdnum;
+	struct {
+		char		step[4];
+		int		freqMhz;
+		int		vid;
+		int		uvolts;
+	} bc_core;
+	struct {
+		unsigned short	vendor;
+		unsigned short	device;
+		unsigned int	class;
+		char		capableSpeed;
+		char		capableWidth;
+		char		currentSpeed;
+		char		currentWidth;
+	} bc_pcie;
+	struct {
+		char	vendor;
+		char	density;
+		char	fifoDepth;
+		short	freq;		// MT/sec
+		int	size;		// Mbytes
+	} bc_gddr;
+	int		bc_uOSstate;
+} mic_brd_config_t;
+
+#define MIC_UPLOAD_UOS	_IOWR('l', 1, struct mic_upload) 
+#define MIC_RESET_UOS	_IOWR('l', 2, int) 
+#define MIC_SYS_CONFIG	_IOWR('l', 3, struct mic_sys_config) 
+#define MIC_BRD_CONFIG	_IOWR('l', 4, struct mic_brd_config) 
+
+#endif // MICDLDR_H
+
diff --git a/vnet/micveth.c b/vnet/micveth.c
new file mode 100644
index 0000000..5ad96a9
--- /dev/null
+++ b/vnet/micveth.c
@@ -0,0 +1,869 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/list.h>
+
+#include "mic/micveth.h"
+
+#define PWR_MGMT_NO_POLL_AFTER_LINKS_UP 1
+
+/* #define HOST */
+#define SBOX_MMIO_LENGTH (64 * 1024)
+
+/* Host - Card link initialization rotocol
+ * Card comes up and writes MICVETH_LINK_UP_MAGIC to scratch 14 & 15
+ * Host detects that the card side interface is up and writes the
+ * 1) address of the tx/rx descriptor ring buffer to scratch 14 & 15
+ * 2) last 2 octets of the MAC address (allows the host to identify
+ * the board number based on its mac address)
+ */
+
+/* Host - Card descriptor queue/ring buffer (from the perspective of the host)
+ *
+ * There is a transmit and a receive queue. Each queue entry has
+ * a physical address and a length.
+ *
+ * Packet transmission
+ * The host adds a queue entry with the physical address of the skb and its
+ * length and updates the write pointer. The receive side on the card sees the
+ * new entry, allocates a new skb, maps the host's skb, copies it to a locally
+ * allocated skb and updates the read pointer. The host side later frees up skbs
+ * starting from a cached read pointer upto the read pointer
+ *
+ * Packet reception
+ * The host "posts" skbs to the rx queue. The transmit routine on the card
+ * copies its local skb to the host skb, updates the write pointer and frees
+ * its local skb
+ */
+
+/* Vnet interrupts are now functional (with vnet=dma module parameter). In the
+   main flow of the driver all polling in the interrupt mode has been
+   eliminated. However, polling is still happening in clientpoll() routine which
+   tracks if the link is up or down. This can also be replaced by an interrupt
+   driven mechanism which will be done in the future. Apart from this, only
+   limited testing has been done in the interrupt mode, especially with respect
+   to sharing the interrupt with scif. Therefore, for now the default mode of
+   operation is still left as poll in micstart.
+*/
+
+#define SBOX_SDBIC0_DBREQ_BIT   0x80000000
+
+
+#ifdef HOST
+#else
+struct skb_node {
+	struct list_head list;
+	struct sk_buff *skb;
+};
+
+/* List of skbs to be transmitted - global for now assumes KN* has a single interface */
+struct list_head skb_list;
+LIST_HEAD(skb_list);
+#endif
+
+static void _micveth_process_descriptors(micveth_info_t *veth_info);
+
+#ifdef HOST
+#else
+static int micveth_xmit_enqueue(struct sk_buff *skb, struct net_device *dev, micveth_info_t *veth_info);
+static int micveth_xmit_dequeue(struct net_device *dev, micveth_info_t *veth_info);
+static struct sk_buff *dequeue_skb(micveth_info_t *veth_info);
+static void micvnet_tx_dequeue_handler(struct work_struct *work);
+
+int micveth_start(mic_ctx_t *mic_ctx);
+void micveth_stop(mic_ctx_t *mic_ctx);
+static int micveth_start_dev(struct net_device *dev);
+static int micveth_stop_dev(struct net_device *dev);
+#endif
+
+static void micveth_clientpoll(struct work_struct *work);
+static void micveth_poll(struct work_struct *work);
+static irqreturn_t micvnet_host_intr_handler(int irq, void *cookie);
+static void micvnet_intr_bh_handler(struct work_struct *work);
+static void micveth_send_intr(micveth_info_t *veth_info);
+int get_sbox_irq(int index);
+
+#ifdef HOST
+#else
+static mic_ctx_t mic_ctx_g;
+#endif
+
+micveth_t micveth;
+
+static int
+micveth_set_address(struct net_device *dev, void *p)
+{
+	struct sockaddr *sa = p;
+
+	if (!is_valid_ether_addr(sa->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
+	return 0;
+}
+
+static void
+micveth_multicast_list(struct net_device *dev)
+{
+}
+
+#ifdef HOST
+#else
+/* Enqueues an skb for transmission. This is necessary because micveth_xmit is called in
+   interrupt context and we cannot call ioremap_nocache from interrupt context. */
+static int
+micveth_xmit_enqueue(struct sk_buff *skb, struct net_device *dev, micveth_info_t *veth_info)
+{
+	struct skb_node *new_node = kmalloc(sizeof(*new_node), GFP_ATOMIC);
+
+	if (!new_node)
+		return ENOMEM;
+	new_node->skb = skb;
+	spin_lock(&veth_info->vi_txlock);
+	list_add_tail(&new_node->list, &skb_list);
+	spin_unlock(&veth_info->vi_txlock);
+	return 0;
+}
+
+/* Dequeues a skb enqueued by micveth_xmit_enqueue */
+static struct sk_buff *
+dequeue_skb(micveth_info_t *veth_info)
+{
+	struct sk_buff *skb = NULL;
+	struct skb_node *skb_node = NULL;
+
+	spin_lock_bh(&veth_info->vi_txlock);
+	if (!list_empty(&skb_list))
+	{
+		skb_node = list_entry(skb_list.next, struct skb_node , list);
+		list_del(&skb_node->list);
+		skb = skb_node->skb;
+	}
+	spin_unlock_bh(&veth_info->vi_txlock);
+
+	if (skb_node)
+		kfree(skb_node);
+	return skb;
+}
+
+/* Transmits skbs that have been enqueued by the by micveth_xmit_enqueue */
+static int
+micveth_xmit_dequeue(struct net_device *dev, micveth_info_t *veth_info)
+{
+	veth_ring_t *ring;
+	ring_queue_t *tx_queue;
+	ring_desc_t *desc;
+	int next_tail;
+	void *dst;
+	struct sk_buff *skb;
+
+	while ((skb = dequeue_skb(veth_info))) {
+		ring = veth_info->ring_ptr;
+		tx_queue = &ring->r_rx;
+
+		next_tail = (tx_queue->rq_tail + 1) % tx_queue->rq_length;
+		if (next_tail == tx_queue->rq_head) {
+			printk(KERN_WARNING "dropping packet\n");
+			/* queue_full situation - just drop the packet and let the stack retry */
+			return 1;
+		}
+
+		desc = &tx_queue->rq_descs[tx_queue->rq_tail];
+		dst = ioremap_nocache(desc->rd_phys, skb->len);
+		if (!dst) {
+			tx_queue->rq_tail = (tx_queue->rq_tail + 1) % tx_queue->rq_length;
+			dev_kfree_skb(skb);
+			dev->stats.tx_dropped++;
+			continue;
+		}
+		desc->rd_length = skb->len;
+		desc->rd_valid = 1;
+		memcpy(dst, skb->data, skb->len);
+		/*
+		 * Need a write memory barrier between copying the skb data to
+		 * the buffer and updating the tail pointer.  NOT an smp_wmb(),
+		 * because this memory barrier needs to be done even if there is
+		 * a single CPU in the system.
+		 *
+		 * No need for the serializing request (Si bug workaround in
+		 * KNF), since the buffer exists in host memory.  If the buffer
+		 * lives in card memory, and this code is running on the host,  we
+		 * would need extra barriers and a "serializing request" on any write.
+		 */
+		wmb();
+		tx_queue->rq_tail = (tx_queue->rq_tail + 1) % tx_queue->rq_length;
+		iounmap(dst);
+		dev_kfree_skb(skb);
+
+		if (mic_vnet_mode == VNET_MODE_INTR) {
+			micveth_send_intr(veth_info);
+		}
+	}
+
+	return 0;
+}
+
+static void
+micvnet_tx_dequeue_handler(struct work_struct *work)
+{
+	micveth_info_t *veth_info = container_of(work, micveth_info_t, vi_txws);
+	struct net_device *dev_veth = veth_info->vi_netdev;
+
+	micveth_xmit_dequeue(dev_veth, veth_info);
+}
+#endif
+
+#ifdef HOST
+#else  // card
+/* Transmit callback */
+static int
+micveth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	micveth_info_t *veth_info;
+
+	if (be16_to_cpu(skb->protocol) == ETH_P_IPV6) {
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	veth_info = &micveth.lv_info[0];
+	if (veth_info->vi_state == VETH_STATE_LINKUP) {
+		if (micveth_xmit_enqueue(skb, dev, veth_info)) {
+			dev_kfree_skb(skb);
+			dev->stats.tx_dropped++;
+		}
+	} else {
+		dev_kfree_skb(skb);
+	}
+
+	/* Reuse the interrupt workqueue to also queue tx dequeue tasks */
+	queue_work(veth_info->vi_wq, &veth_info->vi_txws);
+
+	return NETDEV_TX_OK;
+}
+#endif
+
+static int
+micveth_change_mtu(struct net_device *dev, int new_mtu)
+{
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+
+/* Start callback  */
+static int
+micveth_start_dev(struct net_device *dev)
+{
+	micveth_info_t *veth_info = dev->ml_priv;
+
+	micveth_start(veth_info->mic_ctx);
+	return 0;
+}
+
+/* Stop callback */
+static int
+micveth_stop_dev(struct net_device *dev)
+{
+	micveth_info_t *veth_info = dev->ml_priv;
+
+	micveth_stop(veth_info->mic_ctx);
+	return 0;
+}
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,28)
+static const struct net_device_ops veth_netdev_ops = {
+	.ndo_open		= micveth_start_dev,
+	.ndo_stop		= micveth_stop_dev,
+	.ndo_start_xmit		= micveth_xmit,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_multicast_list = micveth_multicast_list,
+	.ndo_set_mac_address	= micveth_set_address,
+	.ndo_change_mtu		= micveth_change_mtu,
+};
+#endif
+
+static void
+micveth_setup(struct net_device *dev)
+{
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,28)
+	dev->hard_start_xmit = micveth_xmit;
+	dev->set_multicast_list = micveth_multicast_list;
+	dev->set_mac_address = micveth_set_address;
+#endif
+	ether_setup(dev);
+
+	/* Initialize the device structure. */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,28)
+	dev->netdev_ops = &veth_netdev_ops;
+#endif
+	dev->destructor = free_netdev;
+
+	/* Fill in device structure with ethernet-generic values. */
+	dev->mtu = (MICVETH_MAX_PACKET_SIZE);
+	dev->tx_queue_len = 0;
+	dev->flags &= ~IFF_MULTICAST;
+	random_ether_addr(dev->dev_addr);
+}
+
+static int
+micveth_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+	return 0;
+}
+
+static struct rtnl_link_ops micveth_link_ops __read_mostly = {
+	.kind		= "micveth",
+	.setup		= micveth_setup,
+	.validate	= micveth_validate,
+};
+
+static int
+micveth_probe_int(micveth_info_t *veth_info, mic_ctx_t *mic_ctx)
+{
+	struct net_device *dev_veth;
+	int err = 0;
+
+	veth_info->vi_sbox = ioremap_nocache(SBOX_BASE, SBOX_MMIO_LENGTH);
+	veth_info->vi_scratch14 = (uint32_t *)(veth_info->vi_sbox + SBOX_SCRATCH14);
+	veth_info->vi_scratch15 = (uint32_t *)(veth_info->vi_sbox + SBOX_SCRATCH14);
+	writel(0x55, veth_info->vi_sbox + SBOX_DCR);
+
+	veth_info->mic_ctx = mic_ctx;
+	mic_ctx->bi_vethinfo = (void *)veth_info;
+
+	spin_lock_init(&veth_info->vi_txlock);
+	spin_lock_init(&veth_info->vi_rxlock);
+
+	if (mic_vnet_mode == VNET_MODE_POLL)
+		INIT_DELAYED_WORK(&veth_info->vi_poll, micveth_poll);
+
+	snprintf(veth_info->vi_wqname, sizeof(veth_info->vi_wqname),
+		 "VNET INTR %d", 0);
+	veth_info->vi_wq = create_singlethread_workqueue(veth_info->vi_wqname);
+	INIT_WORK(&veth_info->vi_txws, micvnet_tx_dequeue_handler);
+
+	if (mic_vnet_mode == VNET_MODE_INTR) {
+		if ((err = request_irq(get_sbox_irq(VNET_SBOX_INT_IDX),
+				micvnet_host_intr_handler, IRQF_DISABLED,
+				"micveth intr", veth_info))) {
+			printk(KERN_ERR "%s: interrupt registration failed\n", __func__);
+			return err;
+		}
+		INIT_WORK(&veth_info->vi_bh, micvnet_intr_bh_handler);
+	}
+
+	// Set the current sk_buff allocation size
+	veth_info->vi_skb_mtu = MICVETH_MAX_PACKET_SIZE + 32;
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0)
+	if ((dev_veth = alloc_netdev(sizeof(micveth_info_t), "mic%d", micveth_setup)) == NULL) {
+#else
+	if ((dev_veth = alloc_netdev(sizeof(micveth_info_t), "mic%d", NET_NAME_UNKNOWN, micveth_setup)) == NULL) {
+#endif
+		return -ENOMEM;
+	}
+
+	veth_info->vi_netdev = dev_veth;
+	dev_veth->ml_priv = veth_info;
+	dev_veth->rtnl_link_ops = &micveth_link_ops;
+
+	if ((err = register_netdev(dev_veth)) < 0) {
+		printk("register netdev failed %d\n", err);
+		free_netdev(dev_veth);
+		return err;
+	}
+
+	veth_info->vi_state = VETH_STATE_INITIALIZED;
+
+	/* Inform host after completing initialization */
+	printk("%s: writing magic to SC14 and SC15\n", __FUNCTION__);
+	writel(MICVETH_LINK_UP_MAGIC, veth_info->vi_sbox + SBOX_SCRATCH14);
+	writel(MICVETH_LINK_UP_MAGIC, veth_info->vi_sbox + SBOX_SCRATCH15);
+
+	return 0;
+}
+
+void
+micveth_remove_int(mic_ctx_t *mic_ctx)
+{
+	micveth_stop(mic_ctx);
+}
+
+static int __init
+micveth_create_int(int num_bds, struct device *dev)
+{
+	int bd;
+	int err = 0;
+
+	printk("micveth_init(%d)\n", num_bds);
+
+	micveth.lv_num_interfaces = num_bds;
+	micveth.lv_num_clients = num_bds;
+	micveth.lv_active_clients = 0;
+	micveth.lv_num_links_remaining = num_bds;
+
+	if ((err = rtnl_link_register(&micveth_link_ops))) {
+		printk(KERN_ERR "%s: rtnl_link_register failed!\n", __func__);
+		return err;
+	}
+
+	// Allocate space for the control of each device in the system.
+	micveth.lv_info = kmalloc(sizeof(micveth_info_t) * num_bds, GFP_KERNEL);
+	if (!micveth.lv_info) {
+		printk(KERN_ERR "%s: micveth_info alloc failed!\n", __func__);
+		return -ENOMEM;
+	}
+
+	// Initialize state mutex.  Overloaded use for several fields.
+	mutex_init(&micveth.lv_state_mutex);
+
+	// Setup of timer for probeing active mic clients.  When the total active board
+	// count is zero the poll is not running.
+	micveth.lv_pollstate = CLIENT_POLL_STOPPED;
+	INIT_DELAYED_WORK(&micveth.lv_poll, micveth_clientpoll);
+	init_waitqueue_head(&micveth.lv_wq);
+
+	// Init each of the existing boards.
+	for (bd = 0; bd < num_bds; bd++) {
+#ifdef HOST
+		micveth_probe_int(&micveth.lv_info[bd], &mic_data.dd_bi[bd]->bi_ctx);
+#else
+		micveth_probe_int(&micveth.lv_info[bd], &mic_ctx_g);
+#endif
+	}
+
+	return err;
+}
+
+static void
+micveth_exit_int(void)
+{
+	micveth_info_t *veth_info = &micveth.lv_info[0];
+#ifdef HOST
+#endif
+	micveth_stop(veth_info->mic_ctx);
+
+	destroy_workqueue(veth_info->vi_wq);
+	rtnl_link_unregister(&micveth_link_ops);
+
+#ifdef HOST
+#else  // card
+	iounmap((void *)veth_info->ring_ptr);
+	iounmap(veth_info->vi_sbox);
+#endif
+
+	kfree(micveth.lv_info);
+}
+
+/* Card side - tell the host that the interface is up */
+static int
+micveth_start_int(mic_ctx_t *mic_ctx)
+{
+	micveth_info_t *veth_info = &micveth.lv_info[mic_ctx->bi_id];
+
+	// Eventuall (very soon) most of the descriptor allocation for a board will be done here
+	if (veth_info->vi_state != VETH_STATE_INITIALIZED)
+		return 0;
+
+	mutex_lock(&micveth.lv_state_mutex);
+
+	if (micveth.lv_pollstate == CLIENT_POLL_STOPPED) {
+		schedule_delayed_work(&micveth.lv_poll, msecs_to_jiffies(MICVETH_CLIENT_TIMER_DELAY));
+		micveth.lv_pollstate = CLIENT_POLL_RUNNING;
+	}
+
+	micveth.lv_active_clients++;
+	mutex_unlock(&micveth.lv_state_mutex);
+
+	veth_info->vi_state = VETH_STATE_LINKDOWN;
+
+	return 0;
+}
+
+/* Card side - tell the host that the interface is down */
+static void
+micveth_stop_int(mic_ctx_t *mic_ctx)
+{
+	micveth_info_t *veth_info = (micveth_info_t *)(mic_ctx->bi_vethinfo);
+
+	if (veth_info->vi_state == VETH_STATE_INITIALIZED)
+		return;
+
+	mutex_lock(&micveth.lv_state_mutex);
+	micveth.lv_active_clients--;
+	veth_info->vi_state = VETH_STATE_INITIALIZED;
+
+	if (micveth.lv_active_clients) {
+		mutex_unlock(&micveth.lv_state_mutex);
+		return;
+	}
+
+	micveth.lv_num_links_remaining = micveth.lv_num_clients;
+
+#if PWR_MGMT_NO_POLL_AFTER_LINKS_UP
+	micveth.lv_pollstate = CLIENT_POLL_STOPPED;
+	mutex_unlock(&micveth.lv_state_mutex);
+#else
+	micveth.lv_pollstate = CLIENT_POLL_STOPPING;
+	mutex_unlock(&micveth.lv_state_mutex);
+	wait_event(micveth.lv_wq, micveth.lv_pollstate == CLIENT_POLL_STOPPED);
+#endif
+
+#ifdef HOST
+#else  // card
+	writel(MICVETH_LINK_DOWN_MAGIC, veth_info->vi_sbox + SBOX_SCRATCH14);
+	writel(MICVETH_LINK_DOWN_MAGIC, veth_info->vi_sbox + SBOX_SCRATCH15);
+#endif
+}
+
+#ifdef HOST
+#else  // card
+/* Link detection */
+static void
+micveth_clientpoll(struct work_struct *work)
+{
+	micveth_info_t *veth_info;
+	mic_ctx_t *mic_ctx;
+	uint32_t scratch14;
+	uint32_t scratch15;
+	struct net_device *dev_veth;
+	veth_info = &micveth.lv_info[0];
+	dev_veth = veth_info->vi_netdev;
+	mic_ctx = veth_info->mic_ctx;
+	mutex_lock(&micveth.lv_state_mutex);
+
+	if (micveth.lv_pollstate == CLIENT_POLL_STOPPING) {
+		micveth.lv_pollstate = CLIENT_POLL_STOPPED;
+		mutex_unlock(&micveth.lv_state_mutex);
+		wake_up(&micveth.lv_wq);
+		return;
+	}
+
+	if (veth_info->vi_state == VETH_STATE_LINKUP) {
+		scratch14 = readl(veth_info->vi_sbox + SBOX_SCRATCH14);
+		scratch15 = readl(veth_info->vi_sbox + SBOX_SCRATCH15);
+
+		if ((MICVETH_LINK_DOWN_MAGIC == scratch14) &&
+				(MICVETH_LINK_DOWN_MAGIC == scratch15)) {
+			veth_info->vi_state = VETH_STATE_LINKDOWN;
+		}
+	} else {
+		scratch14 = readl(veth_info->vi_sbox + SBOX_SCRATCH14);
+		scratch15 = readl(veth_info->vi_sbox + SBOX_SCRATCH15);
+
+		if ((MICVETH_LINK_UP_MAGIC != scratch14) &&
+		    (MICVETH_LINK_UP_MAGIC != scratch15)) {
+			printk("micveth_clientpoll(): SC14 and SC15 changed from MAGIC, I got the RB addresses!\n");
+			writel(MICVETH_LINK_UP_MAGIC, veth_info->vi_sbox + SBOX_SCRATCH14);
+			writel(MICVETH_LINK_UP_MAGIC, veth_info->vi_sbox + SBOX_SCRATCH15);
+			dev_veth->dev_addr[4] = (scratch15 >> 24) & 0xff;
+			dev_veth->dev_addr[5] = (scratch15 >> 16) & 0xff;
+			veth_info->vi_ring.phys = ((uint64_t)(scratch15 & 0xffff) << 32) | scratch14;
+			veth_info->vi_ring.phys |= (1ULL << 39);
+			veth_info->vi_ring.length = sizeof(veth_ring_t);
+			veth_info->ring_ptr = ioremap_nocache(veth_info->vi_ring.phys, veth_info->vi_ring.length);
+			BUG_ON(veth_info->ring_ptr == NULL);
+
+			printk("micveth_clientpoll(): VETH_STATE_LINKUP\n");
+			veth_info->vi_state = VETH_STATE_LINKUP;
+			if (mic_vnet_mode == VNET_MODE_POLL) {
+				printk("micveth_clientpoll(): poll for work now !!\n");
+				schedule_delayed_work(&veth_info->vi_poll, msecs_to_jiffies(MICVETH_POLL_TIMER_DELAY));
+			}
+
+			micveth.lv_num_links_remaining--;
+		}
+	}
+	mutex_unlock(&micveth.lv_state_mutex);
+
+#if PWR_MGMT_NO_POLL_AFTER_LINKS_UP
+	if (micveth.lv_num_links_remaining)
+#endif
+		schedule_delayed_work(&micveth.lv_poll, msecs_to_jiffies(MICVETH_CLIENT_TIMER_DELAY));
+}
+#endif
+extern struct sk_buff *jsp_dbg1;
+
+#ifdef HOST
+#else  // card
+static irqreturn_t
+micvnet_host_intr_handler(int irq, void *cookie)
+{
+	micveth_info_t *veth_info = cookie;
+	queue_work(veth_info->vi_wq, &veth_info->vi_bh);
+	return IRQ_HANDLED;
+}
+
+/* Ring host doorbell 3 interrupt */
+static void
+micveth_send_intr(micveth_info_t *veth_info)
+{
+	uint32_t db_reg;
+
+	// Ring host doorbell 3 interrupt
+	db_reg = readl(veth_info->vi_sbox + SBOX_SDBIC3) | SBOX_SDBIC0_DBREQ_BIT;
+	writel(db_reg, veth_info->vi_sbox + SBOX_SDBIC3);
+}
+
+static void
+_micveth_process_descriptors(micveth_info_t *veth_info)
+{
+	veth_ring_t *ring = veth_info->ring_ptr;
+	ring_queue_t *rx_queue = &ring->r_tx;
+	ring_desc_t desc;
+	struct sk_buff *skb;
+	void *pkt;
+	int receive_skb = 0;
+	int err;
+
+	if (veth_info->vi_state != VETH_STATE_LINKUP) {
+		return;
+	}
+
+	spin_lock(&veth_info->vi_rxlock);
+
+	while (rx_queue->rq_head != rx_queue->rq_tail) {
+		desc = rx_queue->rq_descs[rx_queue->rq_head];
+
+		veth_info->vi_netdev->stats.rx_packets++;
+		veth_info->vi_netdev->stats.rx_bytes += desc.rd_length;
+
+		pkt = ioremap_nocache(desc.rd_phys, desc.rd_length);
+		if (pkt == NULL) {
+			veth_info->vi_netdev->stats.rx_dropped++;
+			goto update_ring;
+		}
+
+		/* handle jumbo frame */
+		if (desc.rd_length > ETH_DATA_LEN)
+			skb = dev_alloc_skb(veth_info->vi_skb_mtu);
+		else
+			skb = dev_alloc_skb(ETH_DATA_LEN + 32);
+		if (skb == NULL) {
+			veth_info->vi_netdev->stats.rx_dropped++;
+			iounmap(pkt);
+			goto update_ring;
+		}
+
+		memcpy(skb_put(skb,desc.rd_length), pkt, desc.rd_length);
+		iounmap(pkt);
+		skb->dev = veth_info->vi_netdev;
+		skb->protocol = eth_type_trans(skb, skb->dev);
+		skb->ip_summed = CHECKSUM_NONE;
+		local_bh_disable();
+		err = netif_receive_skb(skb);
+		err = err;
+		local_bh_enable();
+		/*
+		 * Need a general memory barrier between copying the data from
+		 * the buffer and updating the head pointer. It's the general
+		 * mb() because we're ordering the read of the data with the write.
+		 *
+		 * No need for the serializing request (Si bug workaround in
+		 * KNF), since the buffer exists in host memory.  If the buffer
+		 * lives in card memory, and this code is running on the host,  we
+		 * would need extra barriers and a "serializing request" on any write.
+		 */
+		mb();
+update_ring:
+		rx_queue->rq_head = (rx_queue->rq_head + 1) % rx_queue->rq_length;
+		receive_skb++;
+	}
+
+	/* Send intr to TX so that pending SKB's can be freed */
+	if (receive_skb && mic_vnet_mode == VNET_MODE_INTR) {
+		micveth_send_intr(veth_info);
+	}
+
+	spin_unlock(&veth_info->vi_rxlock);
+
+	if (mic_vnet_mode == VNET_MODE_POLL) {
+		schedule_delayed_work(&veth_info->vi_poll, msecs_to_jiffies(MICVETH_POLL_TIMER_DELAY));
+	}
+}
+
+static void
+micvnet_intr_bh_handler(struct work_struct *work)
+{
+	micveth_info_t *veth_info = container_of(work, micveth_info_t, vi_bh);
+	_micveth_process_descriptors(veth_info);
+}
+
+static void
+micveth_poll(struct work_struct *work)
+{
+	micveth_info_t *veth_info = container_of(work, micveth_info_t, vi_poll.work);
+
+	_micveth_process_descriptors(veth_info);
+}
+
+#endif
+
+#ifdef HOST
+#else  // card
+static int __init
+micveth_module_init_int(void)
+{
+	mic_ctx_t *mic_ctx = &mic_ctx_g;
+	int ret = 0;
+
+	printk("micveth_probe()\n");
+	memset(mic_ctx, 0, sizeof(*mic_ctx));
+	mic_ctx->bi_id = 0;
+
+	if ((ret = micveth_init(NULL)))
+		return ret;
+	if ((ret = micveth_init_legacy(1, NULL)))
+		return ret;
+
+	return 0;
+}
+
+static void __exit
+micveth_module_exit_int(void)
+{
+	micveth_exit();
+}
+#endif
+
+/*
+  VNET driver public API. These are simply wrappers which either invoke the old
+  interrupt/poll mode functions or the new DMA mode functions. These are temporary and
+  will be phased out with the old interrupt/poll mode so only the DMA mode will be around
+  eventually.
+ */
+int __init
+micveth_init(struct device *dev)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		return micvnet_init(dev);
+	/* Intr/poll modes use micveth_init_legacy */
+	return 0;
+}
+
+int __init
+micveth_init_legacy(int num_bds, struct device *dev)
+{
+	if (mic_vnet_mode != VNET_MODE_DMA)
+		return micveth_create_int(num_bds, dev);
+	/* DMA mode uses micveth_create */
+	return 0;
+}
+
+void
+micveth_exit(void)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		micvnet_exit();
+	else
+		micveth_exit_int();
+}
+
+int
+micveth_probe(mic_ctx_t *mic_ctx)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		return micvnet_probe(mic_ctx);
+	/* No support for micveth_probe in legacy intr/poll modes */
+	return 0;
+}
+
+void
+micveth_remove(mic_ctx_t *mic_ctx)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		micvnet_remove(mic_ctx);
+	/* No support for micveth_remove in legacy intr/poll modes */
+}
+
+int
+micveth_start(mic_ctx_t *mic_ctx)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		return micvnet_start(mic_ctx);
+	else
+		return micveth_start_int(mic_ctx);
+}
+
+void
+micveth_stop(mic_ctx_t *mic_ctx)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		micvnet_stop(mic_ctx);
+	else
+		micveth_stop_int(mic_ctx);
+}
+
+static int __init
+micveth_module_init(void)
+{
+	printk("vnet: mode: %s, buffers: %d\n", 
+		mic_vnet_modes[mic_vnet_mode], vnet_num_buffers);
+
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		return micvnet_module_init();
+	else
+		return micveth_module_init_int();
+}
+
+static void __exit
+micveth_module_exit(void)
+{
+	if (mic_vnet_mode == VNET_MODE_DMA)
+		micvnet_module_exit();
+	else
+		micveth_module_exit_int();
+}
+
+#ifdef HOST
+#else  // card
+module_init(micveth_module_init);
+module_exit(micveth_module_exit);
+
+MODULE_LICENSE("GPL");
+#endif
diff --git a/vnet/micveth_dma.c b/vnet/micveth_dma.c
new file mode 100644
index 0000000..c62675b
--- /dev/null
+++ b/vnet/micveth_dma.c
@@ -0,0 +1,1642 @@
+
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/list.h>
+#include <linux/circ_buf.h>
+#include <linux/reboot.h>
+#include "mic_common.h"
+#include "mic/micveth_dma.h"
+#include "mic/mic_macaddr.h"
+
+/* TODO: Clean up shutdown, let DMA's drain */
+
+#ifndef HOST
+#define SBOX_SDBIC0_DBREQ_BIT   0x80000000
+#define SBOX_MMIO_LENGTH	(64 * 1024)
+#endif
+#define STOP_WAIT_TIMEOUT	(4 * HZ)
+
+#ifndef HOST
+static mic_ctx_t mic_ctx_g;
+#endif
+
+struct micvnet micvnet;
+
+
+static void micvnet_send_intr(struct micvnet_info *vnet_info);
+static int micvnet_init_msg_rings(struct micvnet_info *vnet_info);
+static int micvnet_init_rx_skb_send_msg(struct micvnet_info *vnet_info);
+static void micvnet_send_add_dma_buffer_messages(struct micvnet_info *vnet_info);
+static void micvnet_stop_ws(struct work_struct *work);
+static void micvnet_start_ws(struct work_struct *work);
+int get_sbox_irq(int index);
+
+static __always_inline mic_ctx_t *
+vnet_to_ctx(struct micvnet_info *vnet_info)
+{
+	return vnet_info->mic_ctx;
+}
+
+static __always_inline void
+micvnet_wake_queue(struct micvnet_info *vnet_info)
+{
+	if (atomic_read(&vnet_info->vi_state) == MICVNET_STATE_LINKUP)
+		netif_wake_queue(vnet_info->vi_netdev);
+}
+
+static __always_inline void
+micvnet_dec_cnt_tx_pending(struct micvnet_info *vnet_info)
+{
+	if (atomic_dec_and_test(&vnet_info->cnt_tx_pending) &&
+	   (atomic_read(&vnet_info->vi_state) == MICVNET_STATE_LINK_DOWN))
+		wake_up_interruptible(&vnet_info->stop_waitq);
+}
+
+
+/***********************************************************
+  Pre-allocated "list" of objects which are allocated and deallocated in FIFO
+  sequence. Allows reservation of memory at init time to prevent mem allocation
+  failures at run time. */
+static int
+list_obj_list_init(int num_obj, size_t obj_size, struct obj_list *list)
+{
+	list->size = num_obj + 1;
+	list->obj_size = obj_size;
+	list->head = list->tail	= 0;
+
+	if (!(list->buf = kmalloc(list->size * list->obj_size, GFP_KERNEL))) {
+		printk(KERN_ERR "%s: list alloc failed\n", __func__);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void
+list_obj_list_deinit(struct obj_list *list)
+{
+	if (list->buf) {
+		kfree(list->buf);
+		list->buf = NULL;
+	}
+}
+
+static void *
+list_obj_alloc(struct obj_list *list)
+{
+	char *obj;
+
+	/* Remove bug_on() here to handle VNET OOO messages. In OOO conditions
+	 * requests to allocate more objects than list->size are possible.  */
+        if (((list->head + 1) % list->size) == list->tail) {
+		printk(KERN_ERR "%s: BUG: no free objects in obj list\n", __func__);
+		return NULL;
+	}
+
+	obj = list->buf + list->head * list->obj_size;
+	wmb();
+	list->head = (list->head + 1) % list->size;
+
+	return obj;
+}
+
+void
+list_obj_free(struct obj_list *list)
+{
+	/* Remove bug_on() here to handle VNET OOO messages */
+        if (list->tail == list->head) {
+		printk(KERN_ERR "%s: BUG: free too many list objects\n", __func__);
+		return;
+	}
+
+	list->tail = (list->tail + 1) % list->size;
+}
+
+/***********************************************************
+ *  Vnet message functions
+ */
+#ifdef HOST
+static void
+micvnet_msg_rb_init(struct micvnet_msg_rb *rb)
+{
+	rb->head = rb->tail = 0;
+	rb->size = MICVNET_MSG_RB_SIZE;
+	rb->prev_head = rb->prev_tail = rb->size - 1;
+}
+
+static void
+micvnet_reset_msg_rings(struct micvnet_info *vnet_info)
+{
+	micvnet_msg_rb_init(vnet_info->vi_qp.tx);
+	micvnet_msg_rb_init(vnet_info->vi_qp.rx);
+}
+#endif
+
+static void
+micvnet_msg_rb_write_msg(struct micvnet_info *vnet_info, struct micvnet_msg *msg)
+{
+	struct micvnet_msg_rb *rb = vnet_info->vi_qp.tx;
+
+	/* The condition below should never occur under normal conditions
+	   because the VNET message ring buffer size is at least 1 greater than
+	   the maximum total number of outstanding messages possible in the
+	   system. However, all bets are off if VNET OOO messages are
+	   seen. Therefore remove the previous bug_on() here and busy wait. */
+	while (((rb->head + 1) % rb->size) == rb->tail)
+		cpu_relax();
+
+	if (!(rb->head == (rb->prev_head + 1) % rb->size))
+		printk(KERN_ERR "BUG: head not equal to prev_head + 1:\n \
+			head %d prev_head %d\n", rb->head, rb->prev_head);
+
+	smp_mb();
+#ifdef HOST
+	rb->buf[rb->head] = *msg;
+#else
+	memcpy_toio(&rb->buf[rb->head], msg, sizeof(*msg));
+#endif
+	smp_mb();
+	serializing_request(&rb->buf[rb->head]);
+
+	rb->prev_head = rb->head;
+	rb->head = (rb->head + 1) % rb->size;
+#ifndef HOST
+	rb->head = rb->head;
+#endif
+	smp_mb();
+	serializing_request(&rb->head);
+}
+
+static int
+micvnet_msg_rb_read_msg(struct micvnet_info *vnet_info, struct micvnet_msg *msg)
+{
+	struct micvnet_msg_rb *rb = vnet_info->vi_qp.rx;
+
+	if (rb->tail == rb->head)
+		return 1;
+
+	if (!(rb->tail == (rb->prev_tail + 1) % rb->size))
+		printk(KERN_ERR "BUG: tail not equal to prev_tail + 1:\n \
+			tail %d prev_tail %d\n", rb->tail, rb->prev_tail);
+
+	smp_mb();
+#ifdef HOST
+	*msg = rb->buf[rb->tail];
+#else
+	memcpy_fromio(msg, &rb->buf[rb->tail], sizeof(*msg));
+#endif
+	smp_mb();
+	serializing_request(&rb->buf[rb->tail]);
+
+	rb->prev_tail = rb->tail;
+	rb->tail = (rb->tail + 1) % rb->size;
+#ifndef HOST
+	rb->tail = rb->tail;
+#endif
+	smp_mb();
+	serializing_request(&rb->tail);
+
+	return 0;
+}
+
+void
+micvnet_msg_send_msg(struct micvnet_info *vnet_info, struct micvnet_msg *msg)
+{
+	micvnet_msg_rb_write_msg(vnet_info, msg);
+#ifdef HOST
+	if (micpm_get_reference(vnet_to_ctx(vnet_info), true))
+		return;
+#endif
+	micvnet_send_intr(vnet_info);
+#ifdef HOST
+	micpm_put_reference(vnet_to_ctx(vnet_info));
+#endif
+}
+
+static void
+micvnet_msg_send_add_dma_buffer_msg(struct micvnet_info *vnet_info,
+				    struct rx_node *rnode)
+{
+	struct micvnet_msg msg;
+	struct micvnet_msg_add_dma_buffer
+		*body = &msg.body.micvnet_msg_add_dma_buffer;
+
+	msg.msg_id     = MICVNET_MSG_ADD_DMA_BUFFER;
+	body->buf_phys = rnode->phys;
+	body->buf_size = rnode->size;
+	micvnet_msg_send_msg(vnet_info, &msg);
+}
+
+static void
+micvnet_msg_recv_add_dma_buffer(struct micvnet_info *vnet_info,
+				struct micvnet_msg_add_dma_buffer *msg)
+{
+	struct dma_node *dnode;
+
+	/* Remove bug_on() here to handle VNET OOO messages */
+	if (!(dnode = list_obj_alloc(&vnet_info->dnode_list)))
+		return;
+
+	dnode->phys = msg->buf_phys;
+	dnode->size = msg->buf_size;
+
+	spin_lock(&vnet_info->vi_rxlock);
+	list_add_tail(&dnode->list, &vnet_info->vi_dma_buf);
+	spin_unlock(&vnet_info->vi_rxlock);
+
+	atomic_inc(&vnet_info->cnt_dma_buf_avail);
+	micvnet_wake_queue(vnet_info);
+}
+
+static void
+micvnet_msg_send_dma_complete_msg(struct micvnet_info *vnet_info,
+				  struct sched_node *snode)
+{
+	struct micvnet_msg msg;
+	struct micvnet_msg_dma_complete
+		*body = &msg.body.micvnet_msg_dma_complete;
+
+	msg.msg_id	 = MICVNET_MSG_DMA_COMPLETE;
+	body->dst_phys	 = snode->dst_phys;
+	body->size	 = snode->skb->len;
+	body->dma_offset = snode->dma_offset;
+	micvnet_msg_send_msg(vnet_info, &msg);
+}
+
+/* Handle an unexpected out-of-order message */
+static int
+micvnet_msg_handle_ooo_msg(struct micvnet_info *vnet_info,
+			struct micvnet_msg_dma_complete *msg)
+{
+	struct micvnet_msg_rb *rb = vnet_info->vi_qp.rx;
+	struct rx_node *rnode;
+	struct list_head *pos, *tmpl;
+	bool found = false;
+
+	rnode = list_entry((&vnet_info->vi_rx_skb)->next, struct rx_node, list);
+
+	/* Normal operation */
+	if (rnode->phys == msg->dst_phys
+		&& msg->size <= (rnode->size - 3 * DMA_ALIGNMENT)
+		&& msg->dma_offset < 2 * DMA_ALIGNMENT)
+		return 0;
+
+	/* Flag that weird stuff's going on */
+	printk(KERN_ERR "BUG: Unexpected vnet dma_complete message parameters:\n \
+			rnode->phys %p, msg->dst_phys %p\n		\
+			rnode->size %lld, msg->size %lld, msg->dma_offset %lld\n \
+			rx rb head %d tail %d size %d\n", 
+		(char *) rnode->phys, (char *) msg->dst_phys, 
+		rnode->size, msg->size, msg->dma_offset, 
+		rb->head, rb->tail, rb->size);
+
+	/* if message is received in order but with incorrect parameters
+	   (size/dma_offset), drop it, but re-add the rnode at the back of the
+	   rx_skb list, as well as at tx, similar to what is done below for ooo
+	   case. */
+	if (rnode->phys == msg->dst_phys) {
+		list_del(&rnode->list);
+		list_add_tail(&rnode->list, &vnet_info->vi_rx_skb);
+		micvnet_msg_send_add_dma_buffer_msg(vnet_info, rnode);
+		vnet_info->vi_netdev->stats.rx_dropped++;
+		return 1;
+	}
+
+	/* Start of OOO message processing. First check if the message has
+	 * really been received OOO. If it is completely unknown to us we just
+	 * drop it and go on. */
+	list_for_each(pos, &vnet_info->vi_rx_skb) {
+		rnode = list_entry(pos, struct rx_node, list);
+		if (rnode->phys == msg->dst_phys) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		vnet_info->vi_netdev->stats.rx_dropped++;
+		return 1;
+	}
+
+	vnet_info->vi_netdev->stats.rx_errors++;
+
+	/* Skip all the rnode's till we find the one we are looking for. Rather
+	 * than free rnode skb's and reallocate them, and therby risk allocation
+	 * failures, we simply delete the rnode's from their current position on
+	 * the rnode list and re-add them at back of the list, as well as add
+	 * them back at tx.  */
+	list_for_each_safe(pos, tmpl, &vnet_info->vi_rx_skb) {
+		rnode = list_entry(pos, struct rx_node, list);
+		if (rnode->phys == msg->dst_phys)
+			break;
+
+		list_del(&rnode->list);
+		list_add_tail(&rnode->list, &vnet_info->vi_rx_skb);
+		micvnet_msg_send_add_dma_buffer_msg(vnet_info, rnode);
+	}
+
+	return 0;
+}
+
+static void
+micvnet_msg_recv_dma_complete(struct micvnet_info *vnet_info,
+			      struct micvnet_msg_dma_complete *msg)
+{
+	struct rx_node *rnode;
+	struct sk_buff *skb;
+
+	vnet_info->vi_netdev->stats.rx_packets++;
+
+	if (micvnet_msg_handle_ooo_msg(vnet_info, msg))
+		return;
+
+	rnode = list_entry((&vnet_info->vi_rx_skb)->next, struct rx_node, list);
+	/* Our OOO message handling guarantees that rnode->phys == msg->dst_phys */
+
+	vnet_info->vi_netdev->stats.rx_bytes += msg->size;
+	list_del(&rnode->list);
+
+	spin_lock_bh(&vnet_info->vi_txlock);
+	if (atomic_read(&vnet_info->vi_state) != MICVNET_STATE_LINKUP) {
+		spin_unlock_bh(&vnet_info->vi_txlock);
+		goto skip_adding_new_buffers;
+	}
+	atomic_inc(&vnet_info->cnt_tx_pending);
+	spin_unlock_bh(&vnet_info->vi_txlock);
+
+	/* OOM handling: check if a new SKB can be allocated. If not, we will re-add the
+	   old SKB to TX and not give it to the network stack, i.e. drop it */
+	if (micvnet_init_rx_skb_send_msg(vnet_info)) {
+		list_add_tail(&rnode->list, &vnet_info->vi_rx_skb);
+		micvnet_msg_send_add_dma_buffer_msg(vnet_info, rnode);
+		micvnet_dec_cnt_tx_pending(vnet_info);
+		vnet_info->vi_netdev->stats.rx_dropped++;
+		return;
+	}
+	micvnet_dec_cnt_tx_pending(vnet_info);
+
+skip_adding_new_buffers:
+	skb = rnode->skb;
+	skb_reserve(skb, msg->dma_offset);
+	skb_put(skb, msg->size);
+	skb->dev = vnet_info->vi_netdev;
+	skb->protocol = eth_type_trans(skb, skb->dev);
+	skb->ip_summed = CHECKSUM_NONE;
+
+	local_bh_disable();
+	netif_receive_skb(skb);
+	local_bh_enable();
+
+#ifdef HOST
+	mic_ctx_unmap_single(vnet_to_ctx(vnet_info), rnode->phys, rnode->size);
+#endif
+	kfree(rnode);
+}
+
+static void
+micvnet_msg_send_link_down_msg(struct work_struct *work)
+{
+	struct micvnet_info *vnet_info
+		= container_of(work, struct micvnet_info, vi_ws_link_down);
+	struct micvnet_msg msg;
+	msg.msg_id = MICVNET_MSG_LINK_DOWN;
+	micvnet_msg_send_msg(vnet_info, &msg);
+}
+
+static void
+micvnet_msg_recv_msg_link_down(struct micvnet_info *vnet_info)
+{
+	atomic_set(&vnet_info->vi_state, MICVNET_STATE_BEGIN_UNINIT);
+
+	if (vnet_info->link_down_initiator)
+		wake_up_interruptible(&vnet_info->stop_waitq);
+	else
+		schedule_work(&vnet_info->vi_ws_stop);
+}
+
+static void
+micvnet_msg_send_link_up_msg(struct micvnet_info *vnet_info)
+{
+	struct micvnet_msg msg;
+	struct micvnet_msg_link_up
+		*body = &msg.body.micvnet_msg_link_up;
+
+	msg.msg_id = MICVNET_MSG_LINK_UP;
+	body->vnet_driver_version = VNET_DRIVER_VERSION;
+	micvnet_msg_send_msg(vnet_info, &msg);
+}
+
+static void
+micvnet_msg_recv_msg_link_up(struct micvnet_info *vnet_info,
+			     struct micvnet_msg_link_up *msg)
+{
+	if (msg->vnet_driver_version != VNET_DRIVER_VERSION) {
+		printk(KERN_ERR "%s: Error: vnet driver version mismatch: "
+			"expected %d actual %lld\n"
+			"Ensure that host and card modules are "
+			"from the same build.\n", 
+			__func__, VNET_DRIVER_VERSION, 
+			msg->vnet_driver_version);
+		return;
+	}
+#ifdef HOST
+	schedule_work(&vnet_info->vi_ws_start);
+#else
+	micvnet_send_add_dma_buffer_messages(vnet_info);
+#endif
+}
+
+static void
+micvnet_msg_process_messages(struct micvnet_info *vnet_info)
+{
+	struct micvnet_msg msg;
+
+#ifdef HOST
+	micpm_get_reference(vnet_to_ctx(vnet_info), true);
+#endif
+	while (!micvnet_msg_rb_read_msg(vnet_info, &msg)) {
+		switch(msg.msg_id) {
+		case MICVNET_MSG_ADD_DMA_BUFFER:
+			micvnet_msg_recv_add_dma_buffer
+				(vnet_info,
+				 &msg.body.micvnet_msg_add_dma_buffer);
+			break;
+
+		case MICVNET_MSG_DMA_COMPLETE:
+			micvnet_msg_recv_dma_complete
+				(vnet_info,
+				 &msg.body.micvnet_msg_dma_complete);
+			break;
+
+		case MICVNET_MSG_LINK_DOWN:
+			micvnet_msg_recv_msg_link_down(vnet_info);
+			break;
+
+		case MICVNET_MSG_LINK_UP:
+			micvnet_msg_recv_msg_link_up(vnet_info,
+						&msg.body.micvnet_msg_link_up);
+			break;
+
+		default:
+			printk(KERN_ERR "BUG: unknown vnet msg id: %lld\n", msg.msg_id);
+			break;
+		}
+	}
+#ifdef HOST
+	micpm_put_reference(vnet_to_ctx(vnet_info));
+#endif
+}
+
+/***********************************************************
+ *  Interrupts
+ */
+#ifdef HOST
+static int
+micvnet_host_doorbell_intr_handler(mic_ctx_t *mic_ctx, int doorbell)
+{
+	struct micvnet_info *vnet_info;
+	vnet_info = mic_ctx->bi_vethinfo;
+
+	queue_work(vnet_info->vi_wq, &vnet_info->vi_ws_bh);
+	return 0;
+}
+#else
+static irqreturn_t 
+micvnet_host_intr_handler(int irq, void *data)
+{
+	struct micvnet_info *vnet_info = data;
+	queue_work(vnet_info->vi_wq, &vnet_info->vi_ws_bh);
+	return IRQ_HANDLED;
+}
+#endif
+
+static void
+micvnet_intr_bh_handler(struct work_struct *work)
+{
+	struct micvnet_info *vnet_info
+		= container_of(work, struct micvnet_info, vi_ws_bh);
+
+	micvnet_msg_process_messages(vnet_info);
+}
+
+#ifdef HOST
+static void
+micvnet_send_intr(struct micvnet_info *vnet_info)
+{
+	mic_ctx_t *mic_ctx = vnet_info->mic_ctx;
+	mic_send_vnet_intr(mic_ctx);
+}
+#else
+/* Ring host doorbell 3 interrupt */
+static void
+micvnet_send_intr(struct micvnet_info *vnet_info)
+{
+	uint32_t db_reg;
+
+	/* Ring host doorbell 3 interrupt */
+	db_reg = readl(vnet_info->vi_sbox + SBOX_SDBIC3)
+			| SBOX_SDBIC0_DBREQ_BIT;
+	writel(db_reg, vnet_info->vi_sbox + SBOX_SDBIC3);
+}
+#endif
+
+/***********************************************************
+ *  Net device ops and rtnl link ops
+ */
+/*
+  Do nothing in ndo_open and ndo_stop. There are two reasons for this:
+  1. Since host and card side drivers are driver pairs, if ifconfig up or
+     ifconfig down occurs on one side this needs to be communicated to the other
+     side other side otherwise in the current implementation this can bring down
+     the system. Ignoring ifconfig up or down avoids this issue.
+  2. For now, micvnet_init is called before the dma can be initialized. However,
+     as soon as micvnet_init has been called and netdev has been created, the OS
+     can invoke .ndo_open, which however requires the DMA to have been
+     initialized. But DMA can not be initialized until later (at present after
+     the card has booted).
+  Therefore we ourselves call micvnet_start and micvnet_stop at appropriate
+  times when we are ready for them. The only consequence is all packets till
+  micvnet_start has been invoked will be dropped in ndo_start_xmit.
+ */
+
+/* Start callback  */
+static int
+micvnet_start_dev(struct net_device *dev)
+{
+	struct micvnet_info *vnet_info = dev->ml_priv;
+
+	/* Stop the queue till the state becomes LINKUP. The queue will be started when
+	   dma buffers are added in micvnet_msg_recv_add_dma_buffer(). Not doing this
+	   results in packets getting dropped till state is LINKUP. */
+	if (atomic_read(&vnet_info->vi_state) != MICVNET_STATE_LINKUP)
+		netif_stop_queue(vnet_info->vi_netdev);
+
+	return 0;
+}
+
+/* Stop callback */
+static int
+micvnet_stop_dev(struct net_device *dev)
+{
+	return 0;
+}
+
+static void
+micvnet_dma_cb_bh(struct work_struct *work)
+{
+	struct micvnet_info
+		*vnet_info = container_of(work, struct micvnet_info, vi_ws_dmacb);
+	struct sched_node *snode;
+
+	if (!atomic_read(&vnet_info->cnt_dma_complete))
+		return;
+
+	do {
+		spin_lock_bh(&vnet_info->vi_txlock);
+		snode = list_entry((&vnet_info->vi_sched_skb)->next, 
+					 struct sched_node, list);
+		list_del(&snode->list);
+		spin_unlock_bh(&vnet_info->vi_txlock);
+
+		micvnet_msg_send_dma_complete_msg(vnet_info, snode);
+
+		micvnet_dec_cnt_tx_pending(vnet_info);
+#ifdef HOST
+		mic_ctx_unmap_single(vnet_to_ctx(vnet_info),
+				snode->dma_src_phys, snode->dma_size);
+		micpm_put_reference(vnet_to_ctx(vnet_info));
+#endif
+		kfree_skb(snode->skb);
+		kfree(snode);
+
+	} while (!atomic_dec_and_test(&vnet_info->cnt_dma_complete));
+}
+
+static void
+micvnet_dma_completion_callback(uint64_t data)
+{
+	struct micvnet_info *vnet_info = (struct micvnet_info *) data;
+
+	atomic_inc(&vnet_info->cnt_dma_complete);
+
+	queue_work(vnet_info->vi_wq, &vnet_info->vi_ws_dmacb);
+}
+
+static int
+micvnet_do_dma(struct micvnet_info *vnet_info, struct sched_node *snode)
+{
+	uint64_t dma_src, dma_dst;
+	int ret = 0;
+
+	dma_src = snode->dma_src_phys;
+	dma_dst = ALIGN(snode->dst_phys, DMA_ALIGNMENT);
+	snode->dma_offset = (snode->skb->data - snode->skb_data_aligned)
+				+ (dma_dst - snode->dst_phys);
+	if ((ret = request_dma_channel(vnet_info->dma_chan)))
+		goto err_exit;
+
+	ret = do_dma(vnet_info->dma_chan,
+		     DO_DMA_INTR,
+		     dma_src,
+		     dma_dst,
+		     snode->dma_size,
+		     &vnet_info->dma_cb);
+
+	free_dma_channel(vnet_info->dma_chan);
+
+err_exit:
+	return ret;
+}
+
+static int
+micvnet_schedule_dma(struct micvnet_info *vnet_info)
+{
+	struct tx_node *tnode;
+	struct sched_node *snode;
+	struct dma_node *dnode;
+	struct sk_buff *skb;
+	int ret = 0;
+	/* tnode */
+	spin_lock_bh(&vnet_info->vi_txlock);
+	BUG_ON(list_empty(&vnet_info->vi_tx_skb));
+	tnode = list_entry((&vnet_info->vi_tx_skb)->next, 
+				 struct tx_node, list);
+	list_del(&tnode->list);
+	spin_unlock_bh(&vnet_info->vi_txlock);
+	skb = tnode->skb;
+	kfree(tnode);
+
+#ifdef HOST
+	if ((ret = micpm_get_reference(vnet_to_ctx(vnet_info), true)))
+		goto err_exit_no_dec_node_refcnt;
+#endif
+
+	/* dnode */
+	spin_lock(&vnet_info->vi_rxlock);
+	BUG_ON(list_empty(&vnet_info->vi_dma_buf));
+	dnode = list_entry((&vnet_info->vi_dma_buf)->next, 
+				 struct dma_node, list);
+	spin_unlock(&vnet_info->vi_rxlock);
+	if (dnode->size < skb->len + 3 * DMA_ALIGNMENT) {
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+
+	/* snode */
+	if (!(snode = kmalloc(sizeof(*snode), GFP_KERNEL))) {
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+	snode->skb = skb;
+	snode->dst_phys = dnode->phys;
+	snode->skb_data_aligned
+		= (unsigned char *) ((uint64_t) skb->data & ~(DMA_ALIGNMENT - 1));
+	snode->dma_size
+		= ALIGN((skb->len + (skb->data - snode->skb_data_aligned)), 
+			      DMA_ALIGNMENT);
+#ifdef HOST
+	snode->dma_src_phys = mic_ctx_map_single(vnet_to_ctx(vnet_info),
+			snode->skb_data_aligned,
+			snode->dma_size);
+	if (mic_map_error(snode->dma_src_phys)) {
+		kfree(snode);
+		ret = -ENOMEM;
+		goto err_exit;
+	}
+#else
+	snode->dma_src_phys = virt_to_phys(snode->skb_data_aligned);
+#endif
+
+	if ((ret = micvnet_do_dma(vnet_info, snode))) {
+#ifdef HOST
+		mic_ctx_unmap_single(vnet_to_ctx(vnet_info),
+				       snode->dma_src_phys, snode->dma_size);
+#endif
+		kfree(snode);
+		goto err_exit;
+	}
+
+	/* Update snode/dnode lists only after all operations have successfully
+	   completed and no further errors are possible */
+	spin_lock_bh(&vnet_info->vi_txlock);
+	list_add_tail(&snode->list, &vnet_info->vi_sched_skb);
+	spin_unlock_bh(&vnet_info->vi_txlock);
+
+	spin_lock(&vnet_info->vi_rxlock);
+	list_del(&dnode->list);
+	spin_unlock(&vnet_info->vi_rxlock);
+	list_obj_free(&vnet_info->dnode_list);
+
+	vnet_info->vi_netdev->stats.tx_packets++;
+	vnet_info->vi_netdev->stats.tx_bytes += skb->len;
+
+	return ret;
+
+err_exit:
+#ifdef HOST
+	micpm_put_reference(vnet_to_ctx(vnet_info));
+err_exit_no_dec_node_refcnt:
+#endif
+	micvnet_dec_cnt_tx_pending(vnet_info);
+	atomic_inc(&vnet_info->cnt_dma_buf_avail);
+	micvnet_wake_queue(vnet_info);
+	skb->dev->stats.tx_dropped++;
+	kfree_skb(skb);
+	return ret;
+}
+
+static void
+micvnet_schedule_dmas(struct work_struct *work)
+{
+	struct micvnet_info *vnet_info
+		= container_of(work, struct micvnet_info, vi_ws_tx);
+	volatile bool tx_skb_list_empty;
+	while (1) {
+		spin_lock_bh(&vnet_info->vi_txlock);
+		tx_skb_list_empty = list_empty(&vnet_info->vi_tx_skb);
+		spin_unlock_bh(&vnet_info->vi_txlock);
+		if (tx_skb_list_empty)
+			break;
+
+		micvnet_schedule_dma(vnet_info);
+	}
+}
+
+int
+micvnet_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct micvnet_info *vnet_info = (struct micvnet_info*)dev->ml_priv;
+	struct tx_node  *tnode;
+	if (!vnet_info || !atomic_read(&vnet_info->cnt_dma_buf_avail)){
+		goto err_exit;
+	}
+
+	if (!(tnode = kmalloc(sizeof(*tnode), GFP_ATOMIC)))
+		goto err_exit;
+	tnode->skb = skb;
+
+	spin_lock(&vnet_info->vi_txlock);
+	if (atomic_read(&vnet_info->vi_state) != MICVNET_STATE_LINKUP)
+		goto err_exit_unlock;
+	list_add_tail(&tnode->list, &vnet_info->vi_tx_skb);
+	atomic_inc(&vnet_info->cnt_tx_pending);
+	spin_unlock(&vnet_info->vi_txlock);
+
+	queue_work(vnet_info->vi_wq, &vnet_info->vi_ws_tx);
+
+	if (atomic_dec_and_test(&vnet_info->cnt_dma_buf_avail))
+		netif_stop_queue(vnet_info->vi_netdev);
+
+	return NETDEV_TX_OK;
+
+err_exit_unlock:
+	kfree(tnode);
+	spin_unlock(&vnet_info->vi_txlock);
+err_exit:
+	kfree_skb(skb);
+	dev->stats.tx_dropped++;
+	return NETDEV_TX_OK;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
+static void
+micvnet_multicast_list(struct net_device *dev)
+{
+}
+#endif
+
+static int
+micvnet_set_address(struct net_device *dev, void *p)
+{
+	struct sockaddr *sa = p;
+
+	if (!is_valid_ether_addr(sa->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
+	return 0;
+}
+
+#define MIN_MTU 68
+#define MAX_MTU MICVNET_MAX_MTU
+
+static int
+micvnet_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu < MIN_MTU || new_mtu > MAX_MTU)
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+union serial {
+	uint32_t regs[3];
+	char	 string[13];
+};
+
+void
+mic_get_serial_from_dbox(struct micvnet_info *vni, char *serialnum)
+{
+	union serial serial;
+#ifdef HOST
+	serial.regs[0] = DBOX_READ(vni->mic_ctx->mmio.va, DBOX_SWF1X0);
+	serial.regs[1] = DBOX_READ(vni->mic_ctx->mmio.va, DBOX_SWF1X1);
+	serial.regs[2] = DBOX_READ(vni->mic_ctx->mmio.va, DBOX_SWF1X2);
+#else
+	serial.regs[0] = readl(vni->vi_dbox + DBOX_SWF1X0);
+	serial.regs[1] = readl(vni->vi_dbox + DBOX_SWF1X1);
+	serial.regs[2] = readl(vni->vi_dbox + DBOX_SWF1X2);
+#endif
+	serial.string[12] = '\0';
+	strcpy(serialnum, serial.string);
+}
+
+int
+micvnet_setmac_from_serial(struct net_device *dev)
+{
+	struct micvnet_info *vni = (struct micvnet_info *)dev->ml_priv;
+	char serialnum[17];
+	int err;
+	
+	mic_get_serial_from_dbox(vni, serialnum);
+#ifdef HOST
+	err = mic_get_mac_from_serial(serialnum, dev->dev_addr, 1);
+#else
+	err = mic_get_mac_from_serial(serialnum, dev->dev_addr, 0);
+#endif
+	return err;
+}
+
+static const struct net_device_ops micvnet_netdev_ops = {
+	.ndo_open		= micvnet_start_dev,
+	.ndo_stop		= micvnet_stop_dev,
+	.ndo_start_xmit		= micvnet_xmit,
+	.ndo_validate_addr	= eth_validate_addr,
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
+	.ndo_set_multicast_list = micvnet_multicast_list,
+#endif
+	.ndo_set_mac_address	= micvnet_set_address,
+	.ndo_change_mtu		= micvnet_change_mtu,
+};
+
+static void
+micvnet_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	/* Initialize the device structure. */
+	dev->netdev_ops = &micvnet_netdev_ops;
+	dev->destructor = free_netdev;
+
+	/* Fill in device structure with ethernet-generic values. */
+	dev->mtu = MICVNET_MAX_MTU;
+	dev->flags &= ~IFF_MULTICAST;
+}
+
+static struct rtnl_link_ops micvnet_link_ops __read_mostly = {
+	.kind		= "micvnet",
+	.setup		= micvnet_setup,
+};
+
+/***********************************************************
+ *  Vnet init/deinit
+ */
+static int
+micvnet_init_hw_regs(struct micvnet_info *vnet_info)
+{
+#ifdef HOST
+	mic_ctx_t *mic_ctx = vnet_info->mic_ctx;
+
+	vnet_info->vi_pdev = mic_ctx->bi_pdev;
+	vnet_info->vi_sbox = (uint8_t *)((unsigned long) mic_ctx->mmio.va +
+					 HOST_SBOX_BASE_ADDRESS);
+	vnet_info->vi_scratch14
+		= (uint32_t *)((unsigned long)mic_ctx->mmio.va +
+			       HOST_SBOX_BASE_ADDRESS + SBOX_SCRATCH14);
+#else
+	vnet_info->vi_sbox = ioremap_nocache(SBOX_BASE, SBOX_MMIO_LENGTH);
+	vnet_info->vi_dbox = ioremap_nocache(DBOX_BASE, SBOX_MMIO_LENGTH);
+	if (!vnet_info->vi_sbox) {
+		printk(KERN_ERR "%s: NULL SBOX ptr\n", __func__);
+		return -ENOMEM;
+	}
+	vnet_info->vi_scratch14
+		= (uint32_t *)(vnet_info->vi_sbox + SBOX_SCRATCH14);
+#endif
+	return 0;
+}
+
+static void
+micvnet_deinit_hw_regs(struct micvnet_info *vnet_info)
+{
+#ifndef HOST
+	iounmap(vnet_info->vi_sbox);
+	iounmap(vnet_info->vi_dbox);
+#endif
+}
+
+static int
+micvnet_init_interrupts(struct micvnet_info *vnet_info)
+{
+	mic_ctx_t *mic_ctx = vnet_info->mic_ctx;
+	int ret = 0;
+
+	spin_lock_init(&vnet_info->vi_txlock);
+	spin_lock_init(&vnet_info->vi_rxlock);
+
+	snprintf(vnet_info->vi_wqname, sizeof(vnet_info->vi_wqname),
+		 "VNET WQ %d", mic_ctx->bi_id);
+
+	if (!(vnet_info->vi_wq =
+	      __mic_create_singlethread_workqueue(vnet_info->vi_wqname))) {
+		printk(KERN_ERR "%s: create_singlethread_workqueue\n", __func__);
+		return -ENOMEM;
+	}
+	init_waitqueue_head(&vnet_info->stop_waitq);
+
+	INIT_WORK(&vnet_info->vi_ws_bh, micvnet_intr_bh_handler);
+	INIT_WORK(&vnet_info->vi_ws_tx, micvnet_schedule_dmas);
+	INIT_WORK(&vnet_info->vi_ws_dmacb, micvnet_dma_cb_bh);
+	INIT_WORK(&vnet_info->vi_ws_link_down, micvnet_msg_send_link_down_msg);
+	INIT_WORK(&vnet_info->vi_ws_stop, micvnet_stop_ws);
+	INIT_WORK(&vnet_info->vi_ws_start, micvnet_start_ws);
+#ifdef HOST
+	if ((ret = mic_reg_irqhandler(mic_ctx, 3, "Host DoorBell 3",
+				      micvnet_host_doorbell_intr_handler))) {
+#else
+	if ((ret = request_irq(get_sbox_irq(VNET_SBOX_INT_IDX),
+				micvnet_host_intr_handler, IRQF_DISABLED,
+				"vnet intr", vnet_info))) {
+#endif
+		printk(KERN_ERR "%s: interrupt registration failed\n", __func__);
+		goto err_exit_destroy_workqueue;
+	}
+	return 0;
+
+err_exit_destroy_workqueue:
+	destroy_workqueue(vnet_info->vi_wq);
+	return ret;
+}
+
+static void
+micvnet_deinit_interrupts(struct micvnet_info *vnet_info)
+{
+#ifdef HOST
+	mic_unreg_irqhandler(vnet_info->mic_ctx, 3, "Host DoorBell 3");
+#else
+	free_irq(get_sbox_irq(VNET_SBOX_INT_IDX), vnet_info);
+#endif
+	destroy_workqueue(vnet_info->vi_wq);
+}
+
+
+static int
+micvnet_init_netdev(struct micvnet_info *vnet_info)
+{
+	struct net_device *dev_vnet;
+	int ret = 0;
+
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0))
+	if ((dev_vnet = (struct net_device *)alloc_netdev(sizeof(struct micvnet_info), "mic%d",
+					   NET_NAME_UNKNOWN, micvnet_setup)) == NULL) {
+#else
+	if ((dev_vnet = (struct net_device *)alloc_netdev(sizeof(struct micvnet_info), "mic%d", 
+					   micvnet_setup)) == NULL) {
+#endif
+		printk(KERN_ERR "%s: alloc_netdev failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	vnet_info->vi_netdev = dev_vnet;
+	dev_vnet->ml_priv = vnet_info;
+
+	if (micvnet_setmac_from_serial(dev_vnet))
+		random_ether_addr(dev_vnet->dev_addr);
+
+	dev_vnet->rtnl_link_ops = &micvnet_link_ops;
+
+	if ((ret = register_netdev(dev_vnet)) < 0) {
+		printk(KERN_ERR "%s: register_netdev failed %d\n", __func__, ret);
+		free_netdev(dev_vnet);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+micvnet_init_msg_rings(struct micvnet_info *vnet_info)
+{
+#ifdef HOST
+ 	vnet_info->vi_qp.tx = &vnet_info->vi_rp.rb_tx;
+	vnet_info->vi_qp.rx = &vnet_info->vi_rp.rb_rx;
+	micvnet_reset_msg_rings(vnet_info);
+
+	vnet_info->vi_rp_phys = mic_ctx_map_single(vnet_to_ctx(vnet_info),
+		       		       		&vnet_info->vi_rp,
+					       sizeof(vnet_info->vi_rp));
+	if (mic_map_error(vnet_info->vi_rp_phys)) {
+		printk(KERN_ERR "%s: mic_map_error failed\n", __func__);
+		return -ENOMEM;
+	}
+#else
+	if (!(vnet_info->vi_rp_phys = vnet_addr)) {
+		printk(KERN_ERR "%s: null vnet_addr\n", __func__);
+		return -ENOMEM;
+	}
+	vnet_info->ring_ptr
+		= ioremap_nocache(vnet_info->vi_rp_phys, 
+				       sizeof(struct micvnet_msg_ring_pair));
+	if (!vnet_info->ring_ptr) {
+		printk(KERN_ERR "%s: NULL ring ptr\n", __func__);
+		return -ENOMEM;
+	}
+	vnet_info->vi_qp.tx = &vnet_info->ring_ptr->rb_rx;
+	vnet_info->vi_qp.rx = &vnet_info->ring_ptr->rb_tx;
+#endif
+	return 0;
+}
+
+static void
+micvnet_deinit_msg_rings(struct micvnet_info *vnet_info)
+{
+#ifdef HOST
+	mic_ctx_unmap_single(vnet_to_ctx(vnet_info),
+			     vnet_info->vi_rp_phys, sizeof(vnet_info->vi_rp));
+#else
+	iounmap(vnet_info->ring_ptr);
+#endif
+}
+
+static int
+micvnet_init_lists(struct micvnet_info *vnet_info)
+{
+	int ret;
+	if ((ret = list_obj_list_init(VNET_MAX_SKBS, sizeof(struct dma_node),
+				      &vnet_info->dnode_list)))
+		return ret;
+
+	INIT_LIST_HEAD(&vnet_info->vi_rx_skb);
+	INIT_LIST_HEAD(&vnet_info->vi_dma_buf);
+	INIT_LIST_HEAD(&vnet_info->vi_tx_skb);
+	INIT_LIST_HEAD(&vnet_info->vi_sched_skb);
+	return 0;
+}
+
+static void
+micvnet_deinit_lists(struct micvnet_info *vnet_info)
+{
+	struct list_head *pos, *tmpq;
+	struct rx_node *rnode;
+	struct tx_node *tnode;
+	struct dma_node *dnode;
+	struct sched_node *snode;
+
+	list_for_each_safe(pos, tmpq, &vnet_info->vi_rx_skb) {
+		rnode = list_entry(pos, struct rx_node, list);
+		list_del(&rnode->list);
+#ifdef HOST
+		mic_ctx_unmap_single(vnet_to_ctx(vnet_info),
+			         rnode->phys, rnode->size);
+#endif
+		kfree_skb(rnode->skb);
+		kfree(rnode);
+	}
+
+	list_for_each_safe(pos, tmpq, &vnet_info->vi_dma_buf) {
+		dnode = list_entry(pos, struct dma_node, list);
+		list_del(&dnode->list);
+		list_obj_free(&vnet_info->dnode_list);
+	}
+
+	list_for_each_safe(pos, tmpq, &vnet_info->vi_tx_skb) {
+		tnode = list_entry(pos, struct tx_node, list);
+		list_del(&tnode->list);
+		kfree_skb(tnode->skb);
+		kfree(tnode);
+	}
+
+	list_for_each_safe(pos, tmpq, &vnet_info->vi_sched_skb) {
+		snode = list_entry(pos, struct sched_node, list);
+		list_del(&snode->list);
+#ifdef HOST
+		mic_ctx_unmap_single(vnet_to_ctx(vnet_info), snode->dma_src_phys,
+				snode->dma_size);
+		micpm_put_reference(vnet_to_ctx(vnet_info));
+#endif
+		kfree_skb(snode->skb);
+		kfree(snode);
+	}
+
+	list_obj_list_deinit(&vnet_info->dnode_list);
+}
+static int
+micvnet_init_dma(struct micvnet_info *vnet_info)
+{
+	mic_ctx_t *mic_ctx = vnet_info->mic_ctx;
+	int ret;
+
+	 /* Note: open_dma_device must use mic_ctx->dma_handle since that is
+	    used in the isr */
+#ifdef HOST
+	if (micpm_get_reference(mic_ctx, true) != 0) {
+		printk(KERN_ERR "%s: micpm_get_reference failed\n", __func__);
+		return -ENODEV;
+	}
+
+	if ((ret = open_dma_device(mic_ctx->bi_id + 1,
+				   mic_ctx->mmio.va + HOST_SBOX_BASE_ADDRESS,
+				   &mic_ctx->dma_handle))) {
+		printk(KERN_ERR "%s: open_dma_device failed\n", __func__);
+		micpm_put_reference(mic_ctx);
+		return ret;
+	}
+	micpm_put_reference(mic_ctx);
+#else
+	if ((ret = open_dma_device(0, 0, &mic_ctx->dma_handle))) {
+		printk(KERN_ERR "%s: open_dma_device failed\n", __func__);
+		return ret;
+	}
+#endif
+
+	vnet_info->dma_handle = mic_ctx->dma_handle;
+
+	if ((ret = allocate_dma_channel(vnet_info->dma_handle,
+					&vnet_info->dma_chan))) {
+		printk(KERN_ERR "%s: allocate_dma_channel failed\n", __func__);
+		goto err_exit_close_dma;
+	}
+	free_dma_channel(vnet_info->dma_chan);
+	vnet_info->dma_cb.dma_completion_func = micvnet_dma_completion_callback;
+	vnet_info->dma_cb.cb_cookie = (uint64_t) vnet_info;
+	atomic_set(&vnet_info->cnt_dma_complete, 0);
+	atomic_set(&vnet_info->cnt_dma_buf_avail, 0);
+	vnet_info->link_down_initiator = false;
+	atomic_set(&vnet_info->cnt_tx_pending, 0);
+	return 0;
+
+err_exit_close_dma:
+	close_dma_device(mic_ctx->bi_id + 1, &vnet_info->dma_handle);
+	return ret;
+}
+
+static void
+micvnet_deinit_dma(struct micvnet_info *vnet_info)
+{
+	mic_ctx_t *mic_ctx = vnet_info->mic_ctx;
+
+	close_dma_device(mic_ctx->bi_id + 1, &vnet_info->dma_handle);
+}
+static int
+micvnet_alloc_rx_node(struct micvnet_info *vnet_info, struct rx_node **node)
+{
+	struct rx_node *rnode;
+
+	if (!(rnode = kmalloc(sizeof(*rnode), GFP_KERNEL)))
+		return -ENOMEM;
+
+	rnode->size = vnet_info->vi_netdev->mtu + 3 * DMA_ALIGNMENT + ETH_HLEN;
+
+	if (!(rnode->skb = dev_alloc_skb(rnode->size))) {
+		kfree(rnode);
+		return -ENOMEM;
+	}
+
+#ifdef HOST
+	rnode->phys = mic_ctx_map_single(vnet_to_ctx(vnet_info),
+					rnode->skb->data, rnode->size);
+	if (mic_map_error(rnode->phys)) {
+		kfree_skb(rnode->skb);
+		kfree(rnode);
+		return -ENOMEM;
+	}
+#else
+	rnode->phys = virt_to_phys(rnode->skb->data);
+#endif
+
+	*node = rnode;
+
+	return 0;
+}
+
+static int
+micvnet_init_rx_skb_send_msg(struct micvnet_info *vnet_info)
+{
+	struct rx_node *rnode;
+	int ret = 0;
+
+	if (unlikely(ret = micvnet_alloc_rx_node(vnet_info, &rnode)))
+		return ret;
+
+	list_add_tail(&rnode->list, &vnet_info->vi_rx_skb);
+
+	micvnet_msg_send_add_dma_buffer_msg(vnet_info, rnode);
+
+	return 0;
+}
+
+static int
+micvnet_init_rx_skbs(struct micvnet_info *vnet_info)
+{
+	struct rx_node *rnode;
+	int i, ret = 0;
+
+
+	if ( (vnet_num_buffers <= 0) || (vnet_num_buffers > VNET_MAX_SKBS) )
+		vnet_num_buffers = VNET_MAX_SKBS;
+
+	for (i = 0; i < vnet_num_buffers; i++) {
+		if (unlikely(ret = micvnet_alloc_rx_node(vnet_info, &rnode)))
+			return ret;
+
+		list_add_tail(&rnode->list, &vnet_info->vi_rx_skb);
+	}
+
+	return ret;
+}
+
+static void
+micvnet_send_add_dma_buffer_messages(struct micvnet_info *vnet_info)
+{
+	struct rx_node *rnode;
+	struct list_head *pos;
+
+	list_for_each(pos, &vnet_info->vi_rx_skb) {
+		rnode = list_entry(pos, struct rx_node, list);
+		micvnet_msg_send_add_dma_buffer_msg(vnet_info, rnode);
+	}
+}
+
+static void
+micvnet_initiate_link_down(struct micvnet_info *vnet_info)
+{
+	int ret;
+	netif_tx_disable(vnet_info->vi_netdev);
+	spin_lock_bh(&vnet_info->vi_txlock);
+	atomic_set(&vnet_info->vi_state, MICVNET_STATE_LINK_DOWN);
+	spin_unlock_bh(&vnet_info->vi_txlock);
+
+	/* This wait precludes this function to be called from the context of
+	 * the vnet wq thread */
+	ret = wait_event_interruptible_timeout(
+		vnet_info->stop_waitq, 
+		(atomic_read(&vnet_info->cnt_tx_pending) == 0), 
+		STOP_WAIT_TIMEOUT);
+	if (!ret)
+		printk(KERN_ERR "%s timeout waiting for Tx dma buffers to drain\n", __func__);
+	/* To avoid introducing a lock in micvnet_msg_send_msg() send the
+	 * LINK_DOWN message from vnet wq thread context. LINK_DOWN will be the
+	 * LAST message sent. */
+	queue_work(vnet_info->vi_wq, &vnet_info->vi_ws_link_down);
+}
+
+static void
+micvnet_stop_deinit(struct micvnet_info *vnet_info)
+{
+	flush_workqueue(vnet_info->vi_wq);
+	atomic_set(&vnet_info->vi_state, MICVNET_STATE_UNINITIALIZED);
+
+	micvnet_deinit_dma(vnet_info);
+	micvnet_deinit_lists(vnet_info);
+#ifdef HOST
+	micvnet_reset_msg_rings(vnet_info);
+#endif
+	atomic_dec(&micvnet.lv_active_clients);
+}
+
+int
+micvnet_probe(mic_ctx_t *mic_ctx)
+{
+	struct micvnet_info *vnet_info;
+	int ret = 0;
+
+	mic_ctx->bi_vethinfo = NULL;
+
+	if (!micvnet.created)
+		return 1;
+
+	if (!(vnet_info = kzalloc(sizeof(struct micvnet_info), GFP_KERNEL))) {
+		printk(KERN_ERR "%s: vnet_info alloc failed\n", __func__);
+		return -ENOMEM;
+	}
+
+	mic_ctx->bi_vethinfo = vnet_info;
+	vnet_info->mic_ctx = mic_ctx;
+	if ((ret = micvnet_init_hw_regs(vnet_info)))
+		goto err_exit_free_vnet_info;
+	if ((ret = micvnet_init_msg_rings(vnet_info)))
+		goto err_exit_deinit_hw_regs;
+	if ((ret = micvnet_init_interrupts(vnet_info)))
+		goto err_exit_deinit_msg_rings;
+	if ((ret = micvnet_init_netdev(vnet_info)))
+		goto err_exit_deinit_interrupts;
+
+	atomic_set(&vnet_info->vi_state, MICVNET_STATE_UNINITIALIZED);
+	return 0;
+
+err_exit_deinit_interrupts:
+	micvnet_deinit_interrupts(vnet_info);
+err_exit_deinit_msg_rings:
+	micvnet_deinit_msg_rings(vnet_info);
+err_exit_deinit_hw_regs:
+	micvnet_deinit_hw_regs(vnet_info);
+err_exit_free_vnet_info:
+	kfree(vnet_info);
+
+	return ret;
+}
+
+void
+micvnet_remove(mic_ctx_t *mic_ctx)
+{
+	struct micvnet_info
+		*vnet_info = (struct micvnet_info *) mic_ctx->bi_vethinfo;
+
+	if (!vnet_info)
+		return;
+
+	micvnet_stop(mic_ctx);
+
+	vnet_info->vi_netdev->ml_priv = NULL;
+
+	micvnet_deinit_interrupts(vnet_info);
+	micvnet_deinit_msg_rings(vnet_info);
+	micvnet_deinit_hw_regs(vnet_info);
+
+	mic_ctx->bi_vethinfo = NULL;
+
+	kfree(vnet_info);
+}
+
+int
+micvnet_execute_start(struct micvnet_info *vnet_info)
+{
+	int ret = 0;
+
+	if (!vnet_info) {
+		printk(KERN_ERR "%s: vnet_info is NULL\n", __func__);
+		return 1;
+	}
+
+	if (atomic_cmpxchg(&vnet_info->vi_state, MICVNET_STATE_UNINITIALIZED, 
+				 MICVNET_STATE_TRANSITIONING) != MICVNET_STATE_UNINITIALIZED) {
+		printk(KERN_ERR "%s: wrong vnet state %d\n", __func__, 
+			    atomic_read(&vnet_info->vi_state));
+		return 1;
+	}
+
+	if ((ret = micvnet_init_lists(vnet_info)))
+		goto err_exit;
+	if ((ret = micvnet_init_dma(vnet_info)))
+		goto err_exit_deinit_lists;
+	if ((ret = micvnet_init_rx_skbs(vnet_info))) {
+		printk(KERN_ERR "%s: micvnet_init_rx_skbs failed\n", __func__);
+		goto err_exit_deinit_dma;
+	}
+
+	memset(&vnet_info->vi_netdev->stats, 0, sizeof(vnet_info->vi_netdev->stats));
+	atomic_inc(&micvnet.lv_active_clients);
+	atomic_set(&vnet_info->vi_state, MICVNET_STATE_LINKUP);
+
+	micvnet_msg_send_link_up_msg(vnet_info);
+#ifdef HOST
+	micvnet_send_add_dma_buffer_messages(vnet_info);
+#else
+	writel(MICVNET_CARD_UP_MAGIC, vnet_info->vi_scratch14);
+	/* Card adds DMA buffers to host after receiving MICVNET_MSG_LINK_UP */
+#endif
+	return 0;
+
+err_exit_deinit_dma:
+	micvnet_deinit_dma(vnet_info);
+err_exit_deinit_lists:
+	/* RX SKB's are deallocated in micvnet_deinit_lists() */
+	micvnet_deinit_lists(vnet_info);
+err_exit:
+	atomic_set(&vnet_info->vi_state, MICVNET_STATE_UNINITIALIZED);
+	return ret;
+}
+
+static void
+micvnet_start_ws(struct work_struct *work)
+{
+	struct micvnet_info *vnet_info
+		= container_of(work, struct micvnet_info, vi_ws_start);
+
+	micvnet_execute_start(vnet_info);
+}
+
+int micvnet_start(mic_ctx_t *mic_ctx)
+{
+#ifndef HOST
+	struct micvnet_info *vnet_info = (struct micvnet_info *) mic_ctx->bi_vethinfo;
+	micvnet_execute_start(vnet_info);
+#endif
+	return 0;
+}
+
+void
+micvnet_execute_stop(struct micvnet_info *vnet_info)
+{
+	int ret;
+	if (!vnet_info)
+		return;
+
+	switch(atomic_read(&vnet_info->vi_state)) {
+	case MICVNET_STATE_LINKUP:
+	case MICVNET_STATE_BEGIN_UNINIT:
+		break;
+	default:
+		return;
+	}
+
+#ifdef HOST
+	if ((micpm_get_reference(vnet_to_ctx(vnet_info), true)) != 0)
+		goto exit;
+#endif
+	micvnet_initiate_link_down(vnet_info);
+	if (vnet_info->link_down_initiator && !(vnet_info->mic_ctx->state == MIC_SHUTDOWN && vnet_info->mic_ctx->sdbic1)){
+		ret = wait_event_interruptible_timeout(
+			vnet_info->stop_waitq, 
+			(atomic_read(&vnet_info->vi_state) == MICVNET_STATE_BEGIN_UNINIT), 
+			STOP_WAIT_TIMEOUT);
+		if (!ret)
+			printk(KERN_ERR "%s: timeout waiting for link down message response\n", __func__);
+	}
+
+#ifdef HOST
+	micpm_put_reference(vnet_to_ctx(vnet_info));
+exit:
+#endif
+	micvnet_stop_deinit(vnet_info);
+}
+
+void
+micvnet_stop(mic_ctx_t *mic_ctx)
+{
+	struct micvnet_info *vnet_info = (struct micvnet_info *) mic_ctx->bi_vethinfo;
+
+	vnet_info->link_down_initiator = true;
+	micvnet_execute_stop(vnet_info);
+}
+
+static void
+micvnet_stop_ws(struct work_struct *work)
+{
+	struct micvnet_info *vnet_info
+		= container_of(work, struct micvnet_info, vi_ws_stop);
+
+	vnet_info->link_down_initiator = false;
+	micvnet_execute_stop(vnet_info);
+}
+
+#if !defined(WINDOWS) && defined(HOST)
+static ssize_t
+show_vnet(struct device *dev, struct device_attribute *attr, char *buf);
+DEVICE_ATTR(vnet, S_IRUGO, show_vnet, NULL);
+
+static ssize_t
+show_vnet(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "Number of active vnet clients: %d\n",
+			atomic_read(&micvnet.lv_active_clients));
+}
+#endif
+
+int
+micvnet_init(struct device *dev)
+{
+	int ret = 0;
+
+	micvnet.created = 0;
+	atomic_set(&micvnet.lv_active_clients, 0);
+
+	if ((ret = rtnl_link_register(&micvnet_link_ops))) {
+		printk(KERN_ERR "%s: rtnl_link_register failed\n", __func__);
+		return ret;
+	}
+
+#ifdef HOST
+	if ((ret = device_create_file(dev, &dev_attr_vnet))) {
+		printk(KERN_ERR "%s: device_create_file failed\n", __func__);
+		rtnl_link_unregister(&micvnet_link_ops);
+		return ret;
+	}
+#endif
+	micvnet.created = 1;
+	return 0;
+}
+
+void
+micvnet_exit(void)
+{
+	rtnl_link_unregister(&micvnet_link_ops);
+}
+
+#ifndef HOST
+static void __exit
+_micvnet_module_exit(void)
+{
+	mic_ctx_t *mic_ctx = &mic_ctx_g;
+
+	micvnet_stop(mic_ctx);
+	micvnet_remove(mic_ctx);
+	micvnet_exit();
+}
+
+static int
+micvnet_reboot(struct notifier_block *notifier, unsigned long unused1, void *unused2)
+{
+	/* Calling _micvnet_module_exit() here will hang the uOS during shutdown in NFS
+	 * root case */
+	return NOTIFY_OK;
+}
+
+static struct notifier_block micvnet_reboot_notifier = {
+	.notifier_call = micvnet_reboot,
+	.priority = 0,
+};
+
+void __exit
+micvnet_module_exit(void)
+{
+	unregister_reboot_notifier(&micvnet_reboot_notifier);
+	_micvnet_module_exit();
+}
+
+int __init
+micvnet_module_init(void)
+{
+	mic_ctx_t *mic_ctx = &mic_ctx_g;
+	int ret = 0;
+
+	if ((ret = register_reboot_notifier(&micvnet_reboot_notifier))) {
+		printk(KERN_ERR "register_reboot_notifier failed: error %d\n", ret);
+		goto err_exit;
+	}
+
+	memset(mic_ctx, 0, sizeof(*mic_ctx));
+	mic_ctx->bi_id = 0;
+
+	if ((ret = micvnet_init(NULL)))
+		goto err_exit_unregister_reboot_notifier;
+	if ((ret = micvnet_probe(mic_ctx)))
+		goto err_exit_micvnet_exit;
+	if ((ret = micvnet_start(mic_ctx)))
+		goto err_exit_micvnet_remove;
+
+	return 0;
+
+err_exit_micvnet_remove:
+	micvnet_remove(mic_ctx);
+err_exit_micvnet_exit:
+	micvnet_exit();
+err_exit_unregister_reboot_notifier:
+	unregister_reboot_notifier(&micvnet_reboot_notifier);
+err_exit:
+	printk(KERN_ERR "%s failed: error %d\n", __func__, ret);
+	return ret;
+}
+
+#ifdef STANDALONE_VNET_DMA
+module_init(micvnet_module_init);
+module_exit(micvnet_module_exit);
+#endif
+
+MODULE_LICENSE("GPL");
+#endif
diff --git a/vnet/micveth_param.c b/vnet/micveth_param.c
new file mode 100644
index 0000000..449deed
--- /dev/null
+++ b/vnet/micveth_param.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2010-2017 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Disclaimer: The codes contained in these modules may be specific to
+ * the Intel Software Development Platform codenamed Knights Ferry,
+ * and the Intel product codenamed Knights Corner, and are not backward
+ * compatible with other Intel products. Additionally, Intel will NOT
+ * support the codes or instruction set in future products.
+ *
+ * Intel offers no warranty of any kind regarding the code. This code is
+ * licensed on an "AS IS" basis and Intel is not obligated to provide
+ * any support, assistance, installation, training, or other services
+ * of any kind. Intel is also not obligated to provide any updates,
+ * enhancements or extensions. Intel specifically disclaims any warranty
+ * of merchantability, non-infringement, fitness for any particular
+ * purpose, and any other warranty.
+ *
+ * Further, Intel disclaims all liability of any kind, including but
+ * not limited to liability for infringement of any proprietary rights,
+ * relating to the use of the code, even if Intel is notified of the
+ * possibility of such liability. Except as expressly stated in an Intel
+ * license agreement provided with this code and agreed upon with Intel,
+ * no license, express or implied, by estoppel or otherwise, to any
+ * intellectual property rights is granted herein.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/version.h>
+
+#include "mic/micveth.h"
+
+#define __VNET_MODE(u, l) #l ,
+char *mic_vnet_modes[] = { VNET_MODES };
+#undef __VNET_MODE
+
+/*
+ *KAA: not sure when this API changed, could have been in 35.
+ */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
+#define	GRRR	const
+#else
+#define GRRR	/* As nothing */
+#endif
+
+static int param_set_vnetmode(const char *val, GRRR struct kernel_param *kp)
+{
+	int i;
+	for (i = 0; i < sizeof(mic_vnet_modes) / sizeof(char *); i++)
+		if (!strcmp(val, mic_vnet_modes[i])) {
+			mic_vnet_mode = i;
+			return 0;
+		}
+	return -EINVAL;
+}
+
+static int param_get_vnetmode(char *buffer, GRRR struct kernel_param *kp)
+{
+	return sprintf(buffer, "%s", mic_vnet_modes[mic_vnet_mode]);
+}
+
+#define param_check_vnetmode(name, p) __param_check(name, p, int)
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
+struct kernel_param_ops param_ops_vnetmode = {
+  .set = param_set_vnetmode,
+  .get = param_get_vnetmode,
+};
+#endif /* Kernel > 2.6.36 */
+
+int mic_vnet_mode = VNET_MODE_DMA;
+module_param_named(vnet, mic_vnet_mode, vnetmode, 0400);
+#define __VNET_MODE(u, l) " " #l
+MODULE_PARM_DESC(vnet, "Vnet operating mode, one of:" VNET_MODES);
+#undef __VNET_MODE
+
+int vnet_num_buffers = VNET_MAX_SKBS;
+module_param(vnet_num_buffers, int, 0400);
+MODULE_PARM_DESC(vnet_num_buffers, "Number of buffers used by the VNET driver");
+
+ulong vnet_addr = 0;
+module_param(vnet_addr, ulong, 0400);
+MODULE_PARM_DESC(vnet_addr, "Vnet driver host ring address");
+
+
-- 
2.20.1