***************************************************************************** * MAKING A NEW SYSTEM CALL * * * * * * Copyright (c) 2001 Daniel P. Bovet, Marco Cesati, and Cosimo Comella * * Permission is granted to copy, distribute and/or modify this document * * under the terms of the GNU Free Documentation License, Version 1.1, * * published by the Free Software Foundation; with no Invariant Sections, * * with no Front-Cover Texts, and with no Back-Cover Texts. A copy of the * * license is included in the file named LICENSE. * * * * (version 1.1) * ***************************************************************************** The objective is to illustrate how the performance of the system is affected when disabling the whole hardware cache system. After issuing the: cr0_cacheset 1 1 command (total cache disabling), the system runs roughly 10 times slower. This project consists of two parts: a) making a new system call denoted as cr0_cacheset() to allow User Mode programs to play with the CD and NW flags of the cr0 control register; b) writing a User Mode program called cr0_cacheset, which makes use of the new system call. The two binary parameters passed in the command line refer respectively to the new values of the CD and NW flags. The cr0_cacheset() system call can be issued by a process with the proper capability to enable (flag=1) or to disable (flag=0) the CD and NW flags of the cr0 control register. These flags determine (in part) the behaviour of the processor hardware caches. See Intel P6 manuals for further details on CD and NW flags. ***************************************************************************** STEP 0: set the proper EXTRAVERSION value in Makefile ***************************************************************************** replace: EXTRAVERSION = with: EXTRAVERSION = kh3 ***************************************************************************** STEP 1: modify the linux/arch-i386/config.in file ***************************************************************************** add right after: bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE the following line: bool ' Hardware cache disabling' CONFIG_HWCACHE_DISABLE ***************************************************************************** STEP 2: Add a new kernel hacking configuration option ***************************************************************************** a) add right after: Magic System Request Key support CONFIG_MAGIC_SYSRQ If you say Y here, you will have some control over the system even if the system crashes for example during kernel debugging (e.g., you will be able to flush the buffer cache to disk, reboot the system immediately or dump some status information). This is accomplished by pressing various keys while holding SysRq (Alt+PrintScreen). It also works on a serial console (on PC hardware at least), if you send a BREAK and then within 5 seconds a command keypress. The keys are documented in Documentation/sysrq.txt. Don't say Y unless you really know what this hack does. the following lines: Hardware cache disabling support CONFIG_HWCACHE_DISABLE If you say Y here, you will have some control over the CPU hardware cache. More precisely, a new system call denoted as cacheset() may be issued by programs with root privilege to enable or disable the CPU hardware cache. b) run menuconfig and select the CONFIG_HWCACHE_DISABLE option in the kernel hacking menu c) recompile the kernel and check whether the help for the new option is correctly displayed d) read the include/linux/autoconf.h file to see whether CONFIG_HWCACHE_DISABLE is properly defined **************************************************************************** STEP 3: create a new directory linux/new_syscalls which will contain the new object file of the service routine sys_cacheset.o ***************************************************************************** a) mkdir new_syscalls ***************************************************************************** STEP 4: modify the main linux/Makefile ***************************************************************************** a) replace CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o with: CORE_FILES =kernel/kernel.o mm/mm.o fs/fs.o ipc/ipc.o new_syscalls/new_syscalls.o b) replace SUBDIRS =kernel drivers mm fs net ipc lib line with: SUBDIRS =kernel drivers mm fs net ipc lib new_syscalls ***************************************************************************** STEP 5: Reserve a number for the new system call ***************************************************************************** a) add the following lines in the include/asm-i386/unistd.h file: #ifdef CONFIG_HWCACHE_DISABLE #define __NR_cacheset 225 #endif ***************************************************************************** STEP 6: Insert a new entry in the system call table ***************************************************************************** a) add in arch/i386/kernel/entry.S right after: .long SYMBOL_NAME(sys_gettid) the following lines (#ifdef and #endif must start from column 1): /* implementing new system calls inside the kernel */ #ifdef CONFIG_HWCACHE_DISABLE .long SYMBOL_NAME(sys_cacheset) #endif ***************************************************************************** STEP 7: Create the Makefile for the linux/new_syscalls directory ***************************************************************************** # # Makefile for linux/new_syscalls # # Note: dependencies are done automatically by "make dep", which also removes # any old dependency. DON'T put your own dependencies ere unless it's something # special (ie not a .c file). # # Note2: the CFLAGS definition is now in the main Makefile... # O_TARGET := new_syscalls.o obj-m := obj-y += sys_cacheset.o include $(TOPDIR)/Rules.make ***************************************************************************** STEP 8: Add the sys_cacheset service routine in the new_syscalls directory ***************************************************************************** Thanks to the clean interface, the cd and nw parameters of the system call are now parameters of the service routine: /* */ /* linux/new_system_calls/sys_cacheset.c */ /* */ /* This file contains the service routine of sys_cacheset() */ /* */ #include #include #include #include #include #include #include unsigned long cacheset_clear, cacheset_or; asmlinkage int sys_cacheset(int cd, int nw) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; if ((cd == 0) && (nw == 0)) cacheset_or = 0x00000000; else if ((cd == 1) && (nw == 0)) cacheset_or = 0x40000000; else if ((cd == 1) && (nw == 1)) cacheset_or = 0x60000000; else { printk("<1>invalid sys_cacheset parameters\n"); return -1; } cacheset_clear = 0x9fffffff; /* AND mask used to reset flags cd and nw */ asm("pushl %eax\n\t" "movl %cr0,%eax\n\t" "andl cacheset_clear,%eax\n\t" "orl cacheset_or,%eax\n\t" "movl %eax,%cr0\n\t" "popl %eax"); return 0; } ***************************************************************************** STEP 9: Write a testing program in /tmp/Tcacheset.c ***************************************************************************** Since cacheset() makes use of two parameters, the testing program relies on the _syscall2 macro to interface with the system call handler inside the kernel: /* The Tcacheset test program is stored in /tmp/Tcacheset.c */ #include #include #include #include #define __NR_cacheset 225 _syscall2(int, cacheset, int, cd, int, nw) int main(int argc, char **argv) { if (argc != 3) { printf("ERROR: cacheset() makes use of two parameters\n"); exit(-1); } else { printf("argc value: %d\n", argc); printf("cd value: %d\n", atoi(argv[1])); printf("nw value: %d\n",atoi(argv[2])); /* printf("invoking cacheset() system call\n"); */ printf("\nreturn value of sys_cacheset(): %d \n", cacheset(atoi(argv[1]), atoi(argv[2]))); exit(0); } } ***************************************************************************** STEP 10: Test the system call: compare the time required to list all the directories in /usr/src/linux with hw cache enabled and disabled ***************************************************************************** a) run: time ls -R /usr/src/linux and check the time b) run /tmp/Tcacheset 1 1 time ls -R /usr/src/linux and check the time c) remember to re-enable the hw cache: /tmp/Tcacheset 0 0