diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/Config.in linux-2.4.9-wt1/drivers/perfctr/Config.in --- linux-2.4.9-wt1-2l/drivers/perfctr/Config.in Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/Config.in Sun Aug 19 22:14:58 2001 @@ -0,0 +1,12 @@ +# $Id: Config.in,v 1.7 2001/08/19 20:14:58 mikpe Exp $ +# Performance-monitoring counters driver configuration +# + +tristate 'Performance-monitoring counters support' CONFIG_PERFCTR +if [ "$CONFIG_PERFCTR" != "n" ]; then + define_bool CONFIG_KPERFCTR y + bool ' Additional internal consistency checks' CONFIG_PERFCTR_DEBUG + bool ' Init-time hardware tests' CONFIG_PERFCTR_INIT_TESTS + bool ' Virtual performance counters support' CONFIG_PERFCTR_VIRTUAL $CONFIG_PERFCTR + bool ' Global performance counters support' CONFIG_PERFCTR_GLOBAL $CONFIG_PERFCTR +fi diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/Makefile linux-2.4.9-wt1/drivers/perfctr/Makefile --- linux-2.4.9-wt1-2l/drivers/perfctr/Makefile Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/Makefile Wed Dec 20 23:39:06 2000 @@ -0,0 +1,42 @@ +# $Id: Makefile,v 1.4 2000/12/20 22:39:06 mikpe Exp $ +# Makefile for the Performance-monitoring counters driver. +# + +O_TARGET := kperfctr.o +driver-objs-y := init.o +driver-objs-$(CONFIG_X86) += x86.o +tests-objs-$(CONFIG_X86) := x86_tests.o +kernel-objs-$(CONFIG_X86) := x86_setup.o +driver-objs-$(CONFIG_PERFCTR_INIT_TESTS) += $(tests-objs-y) +driver-objs-$(CONFIG_PERFCTR_VIRTUAL) += virtual.o +stub-objs-$(CONFIG_PERFCTR)-$(CONFIG_PERFCTR_VIRTUAL) := virtual_stub.o +driver-objs-$(CONFIG_PERFCTR_GLOBAL) += global.o +m-objs-$(CONFIG_PERFCTR) := perfctr.o +y-objs-$(CONFIG_PERFCTR) := $(driver-objs-y) +kernel-objs-y += $(stub-objs-m-y) + +export-objs := $(kernel-objs-y) +obj-y := $(kernel-objs-y) $(y-objs-y) +obj-m := $(m-objs-m) +list-multi := perfctr.o +perfctr-objs := $(driver-objs-y) + +ifeq ($(VERSION).$(PATCHLEVEL),2.2) +multi-y := $(filter $(list-multi), $(obj-y)) +multi-m := $(filter $(list-multi), $(obj-m)) +int-y := $(sort $(foreach m, $(multi-y), $($(basename $(m))-objs))) +int-m := $(sort $(foreach m, $(multi-m), $($(basename $(m))-objs))) +obj-m := $(filter-out $(obj-y), $(obj-m)) +int-m := $(filter-out $(int-y), $(int-m)) +O_OBJS := $(sort $(filter-out $(export-objs), $(obj-y))) +OX_OBJS := $(sort $(filter $(export-objs), $(obj-y))) +M_OBJS := $(sort $(filter-out $(export-objs), $(obj-m))) +MX_OBJS := $(sort $(filter $(export-objs), $(obj-m))) +MI_OBJS := $(sort $(filter-out $(export-objs), $(int-m))) +MIX_OBJS := $(sort $(filter $(export-objs), $(int-m))) +endif + +include $(TOPDIR)/Rules.make + +perfctr.o: $(perfctr-objs) + $(LD) -r -o $@ $(perfctr-objs) diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/RELEASE-NOTES linux-2.4.9-wt1/drivers/perfctr/RELEASE-NOTES --- linux-2.4.9-wt1-2l/drivers/perfctr/RELEASE-NOTES Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/RELEASE-NOTES Tue Aug 28 17:01:45 2001 @@ -0,0 +1,536 @@ +$Id: RELEASE-NOTES,v 1.42 2001/08/28 15:01:45 mikpe Exp $ + +RELEASE NOTES +============= + +Version 2.1.1, 2001-08-28 +- Preliminary recognition of Pentium 4 processors, including + checking the IA32_MISC_ENABLE MSR. +- Moved %cr4 access functions from to + x86_compat.h, to work around changes in 2.4.9-ac3. +- More %cr4 cleanups possible since the removal of dodgy_tsc() + in Version 2.1: moved {set,clear}_in_cr4_local() into x86.c, + and eliminated the set_in_cr4() compat macro. +- Fixed a bug in x86.c:finalise_backpatching(): the fake cstatus + mustn't include i-mode counters unless we have PCINT support. + Failure to check this cased fatal init-time oopses in some + configs (CONFIG_X86_UP_APIC set but no local APIC in the CPU). +- Minor comment updates in x86.c due to AMD #22007 Revision J. +- Removed '%' before 'cr4' in printouts from x86_tests.c, to + avoid the '%' being mutated by log-reading user-space code. + +Version 2.1, 2001-08-19 +- Fixed a call backpatching bug, caused by an incompatibility + between the 2.4 and 2.2 kernels' xchg() macros. The 2.2 version + lacks a "volatile" causing gcc to remove the entire statement + if xchg() is used for side-effect only. Reverted to a plain + assignment, which is safe since the 2.0.1 backpatching changes. +- Fixed a bug where an attempt to use /proc//perfctr on an + unsupported processor would cause a (well-behaved) kernel oops, + due to calling a NULL function pointer in x86.c, vperfctr_open() + now returns -ENODEV if virtual.c hasn't been initialised. +- Removed the WinChip configuration option, the dodgy_tsc() callback, + and the clr_cap_tsc() x86_compat macro. WinChip users should configure + for generic 586 or less and use the kernel's "notsc" boot parameter. + This cleans up the driver and the 2.4 kernel patches, at the expense + of more code in the 2.2 kernel patches to implement "notsc" support. +- Minor cleanup: moved version number definition from init.c to + a separate file, version.h. + +Version 2.0.1, 2001-08-14 +- The unsynchronised backpatching in x86.c didn't work on SMP, + due to Pentium III erratum E49, and similar errata for other + P6 processors. (The change in 2.0-pre6 was insufficient.) + x86.c now finalises the backpatching at driver init time, + by "priming" the relevant code paths. To make this feasible, + the isuspend() and iresume() methods are now merged into + the other high-level methods; virtual.c became a bit cleaner. +- Removed obsolete "WinChip pmc_map[] must be identity" check. + +Version 2.0, 2001-08-08 +- Resurrected partial support for interrupt-mode virtual perfctrs. + virtual.c permits a single i-mode perfctr, in addition to TSC + and a number of a-mode perfctrs. BUG: The i-mode PMC must be last, + which constrains CPUs like the P6 where we currently restrict + the pmc_map[] to be the identity mapping. (Not a problem for + K7 since it is symmetric, or P4 since it is expected to use a + non-identity pmc_map[].) + New perfctr_cpu_ireload() procedure to force reload of i-mode + PMCs from their start values before resuming. Currently, this + just invalidates the CPU cache, which forces the following + iresume() and resume() to do the right thing. + perfctr_cpu_update_control() now calls setup_imode_start_values() + to "prime" i-mode PMCs from the control.ireset[] array. +- Bug fix in perfctr_cpu_update_control(): start by clearing cstatus. + Prevents a failed attempt to update the control from leaving the + object in a state with old cstatus != 0 but new control. + +Version 2.0-pre7, 2001-08-07 +- Cleaned up the driver's debugging code (virtual, x86). +- Internal driver rearrangements. The low-level driver (x86) now handles + sampling/suspending/resuming counters. Merged counter state (sums and + start values) and CPU control data to a single "CPU state" object. + This simplifies the high-level drivers, and permits some optimisations + in the low-level driver by avoiding the need to buffer tsc/pmc samples + in memory before updating the accumulated sums (not yet implemented). +- Removed the read_counters, write_control, disable_rdpmc, and enable_rdpmc + methods from , since they have been obsoleted by the + new suspend/resume/sample methods. +- Rearranged the 'cstatus' encoding slightly by putting 'nractrs' in + the low 7 bits; this was done because 'nractrs' is retrieved more + often than 'nrctrs'. +- Removed the obsolete 'status' field from vperfctr_state. Exported + 'cstatus' and its access methods to user-space. (Remove the + control.tsc_on/nractrs/nrictrs fields entirely?) +- Removed WinChip "fake TSC" support. The user-space library can now + sample with slightly less overhead on sane processors. +- WinChip and VIA C3 now use p5mmx_read_counters() instead of their + own versions. + +Version 2.0-pre6, 2001-07-27 +- New patches for kernels 2.4.6, 2.4.7, and 2.4.7-ac1. +- Sampling bug fix for SMP. Normally processes are suspended and + resumed many times per second, but on SMP machines it is possible + for a process to run for a long time without being suspended. + Since sampling is performed at the suspend and resume actions, + a performance counter may wrap around more than once between + sampling points. When this occurs, the accumulated counts will + be highly variable and much lower than expected. + A software timer is now used to ensure that sampling deadlines + aren't missed on SMP machines. (The timer is run by the same code + which runs the ITIMER_VIRTUAL interval timer.) +- Bug fix in the x86 "redirect call" backpatching routine. To be + SMP safe, a bus-locked write to the code must be used. +- Bug fix in the internal debugging code (CONFIG_PERFCTR_DEBUG). + The "shadow" data structure used to detect if a process' perfctr + pointer has been clobbered could cause lockups with SMP kernels. + Rewrote the code to be simpler and more robust. +- Minor performance tweak for the P5/P5MMX read counters procedures, + to work around the P5's cache which doesn't allocate a cache line + on a write miss. +- To avoid undetected data layout mismatches, the user-space library + now checks the data layout version field in a virtual perfctr when + it is being mmap:ed into the user's address space. +- A few minor cleanups. + +Version 2.0-pre5, 2001-06-11 +- Internally use a single 'cstatus' field instead of the three + tsc_on/nractrs/nrictrs fields. Should reduce overhead slightly. +- Reorder the fields in cpu_control so that 'cstatus' and other + frequently used fields get small offsets -- avoids some disp32 + addressing modes in timing-critical code. +- Fixed a bug in p6_iresume where it forgot to invalidate the + EVNTSEL cache, causing p6_write_control to fail to reload the + MSRs. (K7 had a similar bug.) Since i-mode support is disabled + at the moment, no-one was actually bitten by this. +- Fixed another iresume/write_control cache invalidation bug where a + switch to an "uninitialised" CPU would fail to initialise the MSRs. +- Added a CONFIG_PERFCTR_DEBUG option to enable internal consistency + checks. Currently, this checks that a task's vperfctr pointer + isn't clobbered behind our backs, that resume and suspend for + a vperfctr are performed on the same CPU, and that the EVNTSEL + cache is semi-consistent when reloading is optimised away. + ("semi" because it only checks that the cache agrees with the + user's control data, and not that the cache agrees with the MSRs.) +- Minor cleanups. + +Version 2.0-pre4, 2001-04-30 +- Cleanups in x86.c. #defines introduced for magic constants. + More sharing of procedures between different CPU drivers. + Fixed a bug where k7_iresume() could cause k7_write_control() + to fail to reload the correct EVNTSELs. + The WinChip C6/2/3 driver now "fakes" an incrementing TSC. +- General cleanups: s/__inline__/inline/ following Linux kernel + coding standards, and renamed the low-level control objects to + cpu_control to distinguish them from {v,g}perfctr_control objects. +- O_CREAT is now interpreted when /proc/self/perfctr is opened: + if the vperfctr does not exist, then it is created; if the + vperfctr does exist, then EEXIST is returned (unfortunately + O_EXCL doesn't work, since it's intercepted by the VFS layer). + "perfex -i" uses this to avoid having to create a vperfctr when + only an INFO command is to be issued. + libperfctr.c:vperfctr_open() uses this to decide whether to + UNLINK the newly opened vperfctr in case of errors or not. +- Cleaned up virtual.c's 2.4/2.2 VFS interface code a little, + and eliminated the OWNER_THIS_MODULE compat macro. +- Added MOD_{INC,DEC}_USE_COUNTs to virtual.c's file_operations + open and release procedures for 2.2 kernels. This should + simulate 2.4's fops_get/put at >open() and >release(). + +Version 2.0-pre3, 2001-04-17 +- Interrupt-mode virtual perfctrs are temporarily disabled since + x86.c doesn't yet detect which PMC overflowed. The old API + could be made to work, but it was broken anyway. +- Integrated the new P4-ready data structures and APIs. + The driver compiles but the user-space stuff hasn't been + updated yet, so there may be some remaining bugs. + + I have not yet committed to all details of this API. Some + things, like accumulating counters in virtual.c and global.c, + are uglier now, and going from a single "status == nrctrs" + field to three separate fields (tsc_on, nrctrs, nrictrs) + cannot be good for performance. + + In the new API the control information is split in separate + arrays depending on their use, i.e. a struct-of-arrays layout + instead of an array-of-struct layout. The advantage of the + struct-of-arrays layout is that it should cause fewer cache + lines to be touched at the performance-critical operations. + The disadvantage is that the layout changes whenever the + number of array elements has to be increased -- as is the + case for the future Pentium 4 support (18 counters). + +Version 2.0-pre2, 2001-04-07 +- Removed automatic inheritance of per-process virtual perfctrs + across fork(). Unless wait4() is modified, it's difficult to + communicate the final values back to the parent: the now + abandoned code did this in a way which made it impossible + to distinguish one child's final counts from another's. + Inheritance can be implemented in user-space anyway, so the + loss is not great. The interface between the driver and the rest + of the kernel is now smaller and simpler than before. +- Simulating cpu_khz by a macro in very old kernels broke since + there's also a struct field with that name :-( Instead of + putting the ugly workaround back in, I decided to drop support + for kernels older than 2.2.16. +- Preliminary support for the VIA C3 processor -- the C3 is + apparently a faster version of the VIA Cyrix III. +- Added rdtsc cost deduction to the init tests code, and changed + it to output per-instruction costs as well. +- More cleanups, making 2.2 compatibility crud less visible. + +Version 2.0-pre1, 2001-03-25 +- First round of API and coding changes/cleanups for version 2.0: + made perfctr_info.version a string, moved some perfctr_info inits + to x86.c and eliminated some redundant variables, removed dead VFS + code from virtual.c, removed obsolete K7 tests from x86_tests.c, + removed mmu_cr4_features wrappers from x86_compat.h, minor cleanup + in virtual_stub.c. +- Fixed an include file problem which made some C compilers (not gcc) + fail when compiling user-space applications using the driver. +- Added missing EXPORT_SYMBOL declarations needed by the UP-APIC PM + code when the driver is built as a module. +- Preliminary changes in x86.c to deal with UP-APIC power management + issues in 2.4-ac kernels. The PM callback is only a stub for now. + +Version 1.9, 2001-02-13 +- Fixed compilation problems for 2.2 and SMP kernels. +- Found updated documentation on "VIA Cyrix III". Apparently, there + are two distinct chips: the older Joshua (a Cyrix design) and the + newer Samuel (a Centaur design). Our current code supported Joshua, + but mistook Samuel for Joshua. Corrected the identification of Samuel + and added explicit support for it. Samuel's EVNTSEL1 is not well- + documented, so there are some new Samuel-specific tests in x86_tests.c. +- Added preliminary interrupt-mode support for AMD K7. +- Small tweaks to virtual.c's interrupt handling. + +Version 1.8, 2001-01-23 +- Added preliminary interrupt-mode support to virtual perfctrs. + Currently for P6 only, and the local APIC must have been enabled. + Tested on 2.4.0-ac10 with CONFIG_X86_UP_APIC=y. + When an i-mode vperfctr interrupts on overflow, the counters are + suspended and a user-specified signal is sent to the process. The + user's signal handler can read the trap pc from the mmap:ed vperfctr, + and should then issue an IRESUME ioctl to restart the counters. + The next version will support buffering and automatic restart. +- Some cleanups in the x86.c init and exit code. Removed the implicit + smp_call_function() calls from x86_compat.h. + +Version 1.7, 2001-01-01 +- Updated Makefile for 2.4.0-test13-pre3 Rules.make changes. +- Removed PERFCTR_ATTACH ioctl from /dev/perfctr, making the + vperfctrs only accessible via /proc/self/perfctr. Removed + the "attach" code from virtual.c, and temporarily commented + out the "vperfctr fs" code. Moved /dev/perfctr initialisation + and implementation from init.c to global.c. +- Eliminated CONFIG_VPERFCTR_PROC, making /proc/pid/perfctr + mandatory if CONFIG_PERFCTR_VIRTUAL is set. +- Some 2.2/2.4 compatibility cleanups. +- VIA Cyrix III detection bug fix. Contrary to VIA's documentation, + the Cyrix III vendor field is Centaur, not Cyrix. + +Version 1.6, 2000-11-21 +- Preliminary implementation of /proc/pid/perfctr. Seems to work, + but virtual.c and virtual_stub.c is again filled with + #if LINUX_VERSION_CODE crap which will need to be cleaned up. + The INFO ioctl is now implemented by vperfctrs too, to avoid the + need for opening /dev/perfctr. +- virtual.c now puts the perfctr pointer in filp->private_data + instead of inode->u.generic_ip. The main reason for this change + is that proc-fs places a dentry pointer in inode->u.generic_ip. +- sys_vperfctr_control() no longer resets the virtual TSC + if it already is active. The virtual TSC therefore runs + continuously from its first activation until the process + stops or unlinks its vperfctrs. +- Updates for 2.4.0-test11pre6. Use 2.4-style cpu_has_XXX + feature testing macros. Updated x86_compat.h to implement + missing cpu_has_mmx and cpu_has_msr, and compatibility + macros for 2.2. Changed vperfctr_fs_read_super() to use + new_inode(sb) instead of get_empty_inode() + some init code. +- Updates for 2.4.0-test9. Fixed x86_compat.h for cpu_khz change. + Since drivers/Makefile was converted to the new list style, + it became more difficult to handle CONFIG_PERFCTR=m. Changed + Config.in to set CONFIG_KPERFCTR=y when CONFIG_PERFCTR != n, + resulting in a much cleaner kernel patch for 2.4.0-test9. +- Removed d_alloc_root wrapper since 2.2 doesn't need it any more. +- When building for 2.2.18pre, use some of its 2.4 compatibility + features (module_init, module_exit and DECLARE_MUTEX). +- Updates for 2.4.0-test8: repaired kernel patch for new parameter + in do_fork, and fixed CLONE_PERFCTR conflict with CLONE_THREAD. + +Version 1.5, 2000-09-03 +- Dropped support for intermediate 2.3 and early 2.4.0-test kernels. + The code now supports kernels 2.2.xx and 2.4.0-test7 or later only. + Cleanups in compat.h and virtual.c. +- Rewrote the Makefile to use object file lists instead of conditionals. + This gets slightly hairy since kernel extensions are needed even + when the driver proper is built as a module. +- Removed the definition of CONFIG_PERFCTR_X86 from Config.in. + Use the 2.4 standard CONFIG_X86 instead. The 2.2.xx kernel + patches now define CONFIG_X86 in arch/i386/config.in. +- Cleaned up the vperfctr inheritance filter. Instead of setting + a disable flag (CLONE_KTHREAD) when kernel-internal threads are + created, I now set CLONE_PERFCTR in sys_fork and sys_vfork. +- /dev/perfctr no longer accepts the SAMPLE and UNLINK ioctls. + All operations pertaining to a process' virtual perfctrs must + be applied to the fd returned from the ATTACH ioctl. +- Removed the remote-control features from the virtual perfctrs. + Significant simplifications in virtual.c. Removed some now + unused stuff from compat.h and virtual_stub.c. + +Version 1.4, 2000-08-11 +- Fixed a memory leak bug in virtual.c. An extraneous dget() in + get_vperfctr_filp() prevented reclaiming the dentry and inode + allocated for a vperfctr file. +- Major changes to the VFS interface in virtual.c. Starting with + 2.4.0-test6, inode->i_sb == NULL no longer works. Added code to + register a "vperfctr" fs and define a superblock and a mount point. + Completely rewrote the dentry init code. Most of the new code is + adapted from fs/pipe.c, with simplifications and macros to continue + supporting 2.2.x kernels. `ls -l /proc/*/fd/' now prints recognizable + names for vperfctr files. +- Cleaned up virtual.c slightly. Removed "#if 1" tests around the + vperfctr inheritance code. Rewrote vperfctr_alloc and vperfctr_free + to use the virt_to_page and {Set,Clear}PageReserved macros; + also updated compat.h to provide these for older kernels. +- Updated for 2.4.0-test3: a dummy `open' file operation is no longer + required by drivers/char/misc.c. +- Updated for `owner' field in file_operations added in 2.4.0-test2. + Removed MOD_{INC,DEC}_USE_COUNT from init.c (except when compiling + for 2.2.x) and virtual.c. Added MOD_{INC,DEC}_USE_COUNT to the + reserve/release functions in x86.c -- needed because the driver + may be active even if no open file refers to it. Using can_unload + in the module struct instead is possible but not as tidy. + +Version 1.3, 2000-06-29 +- Implemented inheritance for virtual perfctrs: fork() copies the + evntsel data to the child, exit() stops the child's counters but + does not detach the vperfctr object, and wait() adds the child's + counters to the parent's `children' counters. + Added a CLONE_KTHREAD flag to prevent inheritance to threads + created implicitly by request_module() and kernel_thread(). +- Fixed a half-broken printk() in x86_tests.c. +- Added checks to virtual.c to prevent the remote-control interface + from trying to activate dead vperfctrs. +- Updated vperfctr_attach() for changes in 2.3.99-pre7 and 2.4.0-test2. +- Fixed a problem introduced in 1.2 which caused linker errors if + CONFIG_PERFCTR=m and CONFIG_PERFCTR_INIT_TESTS=y. +- Export CPU kHz via a new field in PERFCTR_INFO ioctl, to enable + user-space to map accumulated TSC counts to actual time. + +Version 1.2, 2000-05-24 +- Added support for generic x86 processors with a time-stamp counter + but no performance-monitoring counters. By using the driver to + virtualise the TSC, accurate cycle-count measurements are now + possible on PMC-less processors like the AMD K6. +- Removed some of the special-casing of the x86 time-stamp counter. + It's now "just another counter", except that no evntsel is + needed to enable it. +- WinChip bug fix: the "fake TSC" code would increment an + uninitialised counter. +- Reorganised the x86 driver. Moved the optional init-time testing + code to a separate source file. +- Miscellaneous code cleanups and naming convention changes. + +Version 1.1, 2000-05-13 +- vperfctr_attach() now accepts pid 0 as an alias for the current + process. This reduces the number of getpid() calls needed in + the user-space library. (Suggested by Ulrich Drepper.) +- Added support for the VIA Cyrix III processor. +- Tuned the x86 driver interface. Replaced function pointers + with stubs which rewrite callers to invoke the correct callees. +- Added ARRAY_SIZE definition to compat.h for 2.2.x builds. +- Updated for 2.3.48 inode changes. +- Moved code closer to 2.3.x coding standards. Removed init_module + and cleanup_module, added __exit, module_init, and module_exit, + and extended "compat.h" accordingly. Cleaned up + and a little. + +Version 1.0, 2000-01-31 +- Prepared the driver to cope with non-x86 architectures: + - Moved generic parts of to . + - Merged driver's private "x86.h" into . + - Config.in now defines CONFIG_PERFCTR_${ARCH}, and Makefile uses + it to select appropriate arch-dependent object files +- The driver now reads the low 32 bits of the counters, + instead of 40 or 48 bits zero-extended to 64 bits. + Sums are still 64 bits. This was done to reduce the number + of cache lines needed for certain data structures, to + simplify and improve the performance of the sampling + procedures, and to change 64+(64-64) arithmetic to 64+(32-32) + for the benefit of gcc on x86. This change doesn't reduce + precision, as long as no event occurs more than 2^32 times + between two sampling points. +- PERFCTR_GLOBAL_READ now forces all CPUs to be sampled, if the + sampling timer isn't running. + +Version 0.11, 2000-01-30 +- Added a missing EXPORT_SYMBOL which prevented the driver + from being built as a module in SMP kernels. +- Support for the CPU sampling instructions (i.e. RDPMC and + RDTSC on x86) is now announced explicitly by PERFCTR_INFO. +- The x86 hardware driver now keeps CR4.PCE globally enabled. + There are two reasons for this. First, the cost of toggling + this flag at process suspend/resume is high. Second, changes + in kernel 2.3.40 imply that any processor's %cr4 may be updated + asynchronously from the global variable mmu_cr4_features. + +Version 0.10, 2000-01-23 +- Added support for global-mode perfctrs (global.c). +- There is now a config option controlling whether to + perform init-time hardware tests or not. +- Added a hardware reserve/release mechanism so that multiple + high-level services don't simultaneously use the hardware. +- The driver is now officially device . +- Tuned the 64-bit tsc/msr/pmc read operations in x86.c. +- Support for virtual perfctrs can now be enabled or disabled + via CONFIG_PERFCTR_VIRTUAL. +- Added support for the WinChip 3 processor. +- Split the code into several files: x86.c (x86 drivers), + virtual.c (virtualised perfctrs), setup.c (boot-time actions), + init.c (driver top-level and init code). + +Version 0.9, 2000-01-02 +- The driver can now be built as a module. +- Dropped sys_perfctr() system call and went back to using a + /dev/perfctr character device. Generic operations are now + ioctl commands on /dev/perfctr, and control operations on + virtual perfctrs are ioctl commands on their file descriptors. + Initially this change was done because new system calls in 2.3.x + made maintenance and binary compatibility with 2.2.x hard, but + the new API is actually cleaner than the previous system call. +- Moved this code from arch/i386/kernel/ to drivers/perfctr/. + +Version 0.8, 1999-11-14 +- Made the process management callback functions inline to + reduce scheduling overhead for processes not using perfctrs. +- Changed the 'status' field to contain the number of active + counters. Changed read_counters, write_control, and accumulate + to use this information to avoid unnecessary work. +- Fixed a bug in k7_check_control() which caused it to + require all four counters to be enabled. +- Fixed sys_perfctr() to return -ENODEV instead of -ENOSYS + if the processor doesn't support perfctrs. +- Some code cleanups. +- Evntsel MSRs are updated lazily, and counters are not written to. + + The following table lists the costs (in cycles) of various + instructions which access the counter or evntsel registers. + The table was derived from data collected by init-time tests + run by previous versions of this driver. + + Processor P5 P5MMX PII PIII K7 + Clock freq. (MHz) 133 233 266 450 500 + + RDPMC n/a 14 31 36 13 + RDMSR (counter) 29 28 81 80 52 + WRMSR (counter) 35 37 97 115 80 + WRMSR (evntsel) 33 37 88 105 232 + + Several things are apparent from this table: + + 1. It's much cheaper to use RDPMC than RDMSR to read the counters. + 2. It's much more expensive to reset a counter than to read it. + 3. It's expensive to write to an evntsel register. + + As of version 0.8, this driver uses the following strategies: + * The evntsel registers are updated lazily. A per_cpu_control[] + array caches the contents of each CPU's evntsel registers, + and only when a process requires a different setup are the + evntsel registers written to. In most cases, this eliminates the + need to reprogram the evntsel registers when switching processes. + The older drivers would write to the evntsel registers both at + process suspend and resume. + * The counter registers are read both at process resume and suspend, + and the difference is added to the process' accumulated counters. + The older drivers would reset the counters at resume, read them + at suspend, and add the values read to the accumulated counters. + * Only those registers enabled by the user's control information + are manipulated, instead of blindly manipulating all of them. + +Version 0.7 1999-10-25 +- The init-time checks in version 0.6 of this driver showed that + RDMSR is a lot slower than RDPMC for reading the PMCs. The driver + now uses RDPMC instead of RDMSR whenever possible. +- Added an mmap() operation to perfctr files. This allows any client + to read the accumulated counter state without making a system call. + The old "sync to user-provided buffer" method has been removed, + as it entailed additional copy operations and only worked for the + "active" process. The PERFCTR_READ operation has been replaced + by a simpler PERFCTR_SAMPLE operation, for the benefit of pre-MMX + Intel P5 processors which cannot sample counters in user-mode. + This rewrite actually simplified the code. +- The AMD K7 should now be supported correctly. The init-time checks + in version 0.6 of this driver revealed that each K7 counter has + its own ENable bit. (Thanks to Nathan Slingerland for running the + test and reporting the results to me.) +- Plugged a potential memory leak in perfctr_attach_task(). +- No longer piggyback on prctl(); sys_perfctr() is a real system call. +- Some code cleanups. + +Version 0.6 1999-09-08 +- Temporarily added some init-time code that checks the + costs of RDPMC/RDMSR/WRMSR operations applied to perfctr MSRs, + the semantics of the ENable bit on the Athlon, and gets + the boot-time value of the WinChip CESR register. + This code can be turned off by #defining INIT_DEBUG to 0. +- Preliminary support for the AMD K7 Athlon processor. +- The code will now build in both 2.3.x and 2.2.x kernels. + +Version 0.5 1999-08-29 +- The user-space buffer is updated whenever state.status changes, + even when a remote command triggers the change. +- Reworked and simplified the high-level code. All accesses + now require an attached file in order to implement proper + accounting and syncronisation. The only exception is UNLINK: + a process may always UNLINK its own PMCs. +- Fixed counting bug in sys_perfctr_read(). +- Improved support for the Intel Pentium III. +- Another WinChip fix: fake TSC update at process resume. +- The code should now be safe for 'gcc -fstrict-aliasing'. + +Version 0.4 1999-07-31 +- Implemented PERFCTR_ATTACH and PERFCTR_{READ,CONTROL,STOP,UNLINK} + on attached perfctrs. An attached perfctr is represented as a file. +- Fixed an error in the WinChip-specific code. +- Perfctrs now survive exec(). + +Version 0.3 1999-07-22 +- Interface now via sys_prctl() instead of /dev/perfctr. +- Added NYI stubs for accessing other processes' perfctrs. +- Moved to dynamic allocation of a task's perfctr state. +- Minor code cleanups. + +Version 0.2 1999-06-07 +- Added support for WinChip CPUs. +- Restart counters from zero, not their previous values. This + corrected a problem for Intel P6 (WRMSR writes 32 bits to a PERFCTR + MSR and then sign-extends to 40 bits), and also simplified the code. +- Added support for syncing the kernel's counter values to a user- + provided buffer each time a process is resumed. This feature, and + the fact that the driver enables RDPMC in processes using PMCs, + allows user-level computation of a process' accumulated counter + values without incurring the overhead of making a system call. + +Version 0.1 1999-05-30 +- First public release. diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/TODO linux-2.4.9-wt1/drivers/perfctr/TODO --- linux-2.4.9-wt1-2l/drivers/perfctr/TODO Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/TODO Tue Aug 14 23:10:34 2001 @@ -0,0 +1,28 @@ +$Id: TODO,v 1.20 2001/08/14 21:10:34 mikpe Exp $ + +- Add buffering and automatic restart to i-mode vperfctrs. + +- Add interrupt support to global-mode perfctrs. + +- On 2.4 make /proc/self/perfctr a symlink to "perfctr:[XXXX]" + so that the open file descriptor can be identified as a perfctr + object. + +- Put 2.4 and 2.2 VFS code in separate files, at least for virtual. + +- Use the "rewrite function call sites" trick to get rid of the + process scheduling callback function pointers. + (Only relevant when the driver is a module.) + +- Global-mode perfctrs need some form of access control mechanism. + +- Implement perfctr multiplexing. + Use a timer to cycle through a array. + +- Add support for other architectures. + Start with the relatively straightforward UltraSPARC. + +- Add Documentation/perfctr/ ? + +- Pentium MMX and Pentium Pro have errata which affect SMM and CR4. + Will this hit us, or is it a problem for the BIOS to solve? diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/compat.h linux-2.4.9-wt1/drivers/perfctr/compat.h --- linux-2.4.9-wt1-2l/drivers/perfctr/compat.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/compat.h Fri Jul 27 00:40:18 2001 @@ -0,0 +1,54 @@ +/* $Id: compat.h,v 1.16 2001/07/26 22:40:18 mikpe Exp $ + * Performance-monitoring counters driver. + * Compatibility definitions for 2.2/2.4 kernels. + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ +#include +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + +#define vma_pgoff(vma) ((vma)->vm_pgoff) +#define task_thread(tsk) (&(tsk)->thread) +#define proc_pid_inode_denotes_task(inode,tsk) \ + ((tsk) == (inode)->u.proc_i.task) + +#else /* 2.4 simulation for 2.2 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,2,18) +#ifdef MODULE +#define module_init(x) int init_module(void) { return x(); } +#define module_exit(x) void cleanup_module(void) { x(); } +#else +#define module_init(x) /* explicit call is needed */ +#define module_exit(x) /* empty */ +#endif /* MODULE */ +#define DECLARE_MUTEX(name) struct semaphore name = MUTEX +#endif /* < 2.2.18 */ + +#define proc_pid_inode_denotes_task(inode,tsk) \ + ((tsk)->pid == ((inode)->i_ino >> 16)) + +#define virt_to_page(kaddr) (mem_map + MAP_NR(kaddr)) + +#define fops_get(fops) (fops) + +#define vma_pgoff(vma) ((vma)->vm_offset) /* NOT, but suffices for != 0 */ + +#define get_zeroed_page(mask) get_free_page((mask)) +#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) +#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +#ifdef MODULE +#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#define __exit /* empty */ +#else +#define __exit __attribute__((unused, __section__("text.init"))) +#endif + +#define task_thread(tsk) (&(tsk)->tss) + +#endif /* 2.4 simulation for 2.2 */ diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/global.c linux-2.4.9-wt1/drivers/perfctr/global.c --- linux-2.4.9-wt1-2l/drivers/perfctr/global.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/global.c Tue Aug 7 20:16:07 2001 @@ -0,0 +1,277 @@ +/* $Id: global.c,v 1.9 2001/08/07 18:16:07 mikpe Exp $ + * Global-mode performance-monitoring counters via /dev/perfctr. + * + * Copyright (C) 2000-2001 Mikael Pettersson + * + * XXX: Doesn't do any authentication yet. Should we limit control + * to root, or base it on having write access to /dev/perfctr? + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include + +#include + +#include "compat.h" +#include "global.h" + +static const char this_service[] = __FILE__; +static int hardware_is_ours = 0; +static struct timer_list sampling_timer; + +static unsigned int nr_active_cpus = 0; + +struct gperfctr { + struct perfctr_cpu_state cpu_state; + spinlock_t lock; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +static struct gperfctr per_cpu_gperfctr[NR_CPUS] __cacheline_aligned; + +static int reserve_hardware(void) +{ + const char *other; + + if( hardware_is_ours ) + return 0; + other = perfctr_cpu_reserve(this_service); + if( other ) { + printk(KERN_ERR __FILE__ ": " __FUNCTION__ + ": failed because hardware is taken by '%s'\n", + other); + return -EBUSY; + } + hardware_is_ours = 1; + MOD_INC_USE_COUNT; + return 0; +} + +static void release_hardware(void) +{ + nr_active_cpus = 0; + if( hardware_is_ours ) { + hardware_is_ours = 0; + del_timer(&sampling_timer); + sampling_timer.data = 0; + perfctr_cpu_release(this_service); + MOD_DEC_USE_COUNT; + } +} + +static void sample_this_cpu(void *unused) +{ + struct gperfctr *perfctr; + + perfctr = &per_cpu_gperfctr[smp_processor_id()]; + if( !perfctr_cstatus_enabled(perfctr->cpu_state.cstatus) ) + return; + spin_lock(&perfctr->lock); + perfctr_cpu_sample(&perfctr->cpu_state); + spin_unlock(&perfctr->lock); +} + +static void sample_all_cpus(void) +{ + smp_call_function(sample_this_cpu, NULL, 1, 1); + sample_this_cpu(NULL); +} + +static void sampling_timer_function(unsigned long interval) +{ + sample_all_cpus(); + sampling_timer.expires = jiffies + interval; + add_timer(&sampling_timer); +} + +static unsigned long usectojiffies(unsigned long usec) +{ + /* based on kernel/itimer.c:tvtojiffies() */ + usec += 1000000 / HZ - 1; + usec /= 1000000 / HZ; + return usec; +} + +static void start_sampling_timer(unsigned long interval_usec) +{ + if( interval_usec > 0 ) { + unsigned long interval = usectojiffies(interval_usec); + init_timer(&sampling_timer); + sampling_timer.function = sampling_timer_function; + sampling_timer.data = interval; + sampling_timer.expires = jiffies + interval; + add_timer(&sampling_timer); + } +} + +static void start_this_cpu(void *unused) +{ + struct gperfctr *perfctr; + + perfctr = &per_cpu_gperfctr[smp_processor_id()]; + if( perfctr_cstatus_enabled(perfctr->cpu_state.cstatus) ) + perfctr_cpu_resume(&perfctr->cpu_state); +} + +static void start_all_cpus(void) +{ + smp_call_function(start_this_cpu, NULL, 1, 1); + start_this_cpu(NULL); +} + +static int gperfctr_control(struct gperfctr_control *argp) +{ + unsigned long interval_usec; + unsigned int nrcpus, i; + int last_active, ret; + struct gperfctr *perfctr; + struct perfctr_cpu_control cpu_control; + static DECLARE_MUTEX(control_mutex); + + if( nr_active_cpus > 0 ) + return -EBUSY; /* you have to stop them first */ + if( get_user(interval_usec, &argp->interval_usec) ) + return -EFAULT; + if( get_user(nrcpus, &argp->nrcpus) ) + return -EFAULT; + if( nrcpus > smp_num_cpus ) + return -EINVAL; + down(&control_mutex); + last_active = -1; + for(i = 0; i < nrcpus; ++i) { + ret = -EFAULT; + if( copy_from_user(&cpu_control, + &argp->cpu_control[i], + sizeof cpu_control) ) + goto out_up; + /* we don't permit i-mode counters */ + ret = -EPERM; + if( cpu_control.nrictrs != 0 ) + goto out_up; + perfctr = &per_cpu_gperfctr[cpu_logical_map(i)]; + spin_lock(&perfctr->lock); + perfctr->cpu_state.control = cpu_control; + memset(&perfctr->cpu_state.sum, 0, sizeof perfctr->cpu_state.sum); + ret = perfctr_cpu_update_control(&perfctr->cpu_state); + spin_unlock(&perfctr->lock); + if( ret < 0 ) + goto out_up; + if( perfctr_cstatus_enabled(perfctr->cpu_state.cstatus) ) + last_active = i; + } + for(; i < smp_num_cpus; ++i) { + perfctr = &per_cpu_gperfctr[cpu_logical_map(i)]; + memset(&perfctr->cpu_state, 0, sizeof perfctr->cpu_state); + } + nr_active_cpus = ret = last_active + 1; + if( ret > 0 ) { + if( reserve_hardware() < 0 ) { + nr_active_cpus = 0; + ret = -EBUSY; + } else { + start_all_cpus(); + start_sampling_timer(interval_usec); + } + } + out_up: + up(&control_mutex); + return ret; +} + +static int gperfctr_read(struct gperfctr_state *arg) +{ + unsigned nrcpus, i; + struct gperfctr *perfctr; + struct gperfctr_cpu_state state; + + if( get_user(nrcpus, &arg->nrcpus) ) + return -EFAULT; + if( nrcpus > smp_num_cpus ) + nrcpus = smp_num_cpus; + if( sampling_timer.data == 0 ) /* no timer; sample now */ + sample_all_cpus(); + for(i = 0; i < nrcpus; ++i) { + perfctr = &per_cpu_gperfctr[cpu_logical_map(i)]; + spin_lock(&perfctr->lock); + state.cpu_control = perfctr->cpu_state.control; + state.sum = perfctr->cpu_state.sum; + spin_unlock(&perfctr->lock); + if( copy_to_user(&arg->cpu_state[i], &state, sizeof state) ) + return -EFAULT; + } + return nr_active_cpus; +} + +static int gperfctr_stop(void) +{ + release_hardware(); + return 0; +} + +static int dev_perfctr_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + switch( cmd ) { + case PERFCTR_INFO: + return sys_perfctr_info((struct perfctr_info*)arg); + case GPERFCTR_CONTROL: + return gperfctr_control((struct gperfctr_control*)arg); + case GPERFCTR_READ: + return gperfctr_read((struct gperfctr_state*)arg); + case GPERFCTR_STOP: + return gperfctr_stop(); + } + return -EINVAL; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +static struct file_operations dev_perfctr_file_ops = { + .owner = THIS_MODULE, + .ioctl = dev_perfctr_ioctl, +}; +#else +static int dev_perfctr_open(struct inode *inode, struct file *filp) +{ + MOD_INC_USE_COUNT; + return 0; +} +static int dev_perfctr_release(struct inode *inode, struct file *filp) +{ + MOD_DEC_USE_COUNT; + return 0; +} +static struct file_operations dev_perfctr_file_ops = { + .open = dev_perfctr_open, + .release = dev_perfctr_release, + .ioctl = dev_perfctr_ioctl, +}; +#endif + +static struct miscdevice dev_perfctr = { + .minor = 182, + .name = "perfctr", + .fops = &dev_perfctr_file_ops, +}; + +int __init gperfctr_init(void) +{ + int i, err; + + if( (err = misc_register(&dev_perfctr)) != 0 ) { + printk(KERN_ERR "/dev/perfctr: failed to register, errno %d\n", + -err); + return err; + } + for(i = 0; i < smp_num_cpus; ++i) + per_cpu_gperfctr[i].lock = SPIN_LOCK_UNLOCKED; + return 0; +} + +void gperfctr_exit(void) +{ + misc_deregister(&dev_perfctr); +} diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/global.h linux-2.4.9-wt1/drivers/perfctr/global.h --- linux-2.4.9-wt1-2l/drivers/perfctr/global.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/global.h Tue May 1 01:31:14 2001 @@ -0,0 +1,13 @@ +/* $Id: global.h,v 1.3 2001/04/30 23:31:14 mikpe Exp $ + * Global-mode performance-monitoring counters. + * + * Copyright (C) 2000-2001 Mikael Pettersson + */ + +#ifdef CONFIG_PERFCTR_GLOBAL +extern int gperfctr_init(void); +extern void gperfctr_exit(void); +#else +static inline int gperfctr_init(void) { return 0; } +static inline void gperfctr_exit(void) { } +#endif diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/init.c linux-2.4.9-wt1/drivers/perfctr/init.c --- linux-2.4.9-wt1-2l/drivers/perfctr/init.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/init.c Sun Aug 19 21:43:29 2001 @@ -0,0 +1,61 @@ +/* $Id: init.c,v 1.35 2001/08/19 19:43:29 mikpe Exp $ + * Performance-monitoring counters driver. + * Top-level initialisation code. + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ +#include +#include +#include +#include +#include +#include + +#include + +#include "compat.h" +#include "virtual.h" +#include "global.h" +#include "version.h" + +struct perfctr_info perfctr_info = { + .version = VERSION +#ifdef CONFIG_PERFCTR_DEBUG + " DEBUG" +#endif +}; + +int sys_perfctr_info(struct perfctr_info *argp) +{ + if( copy_to_user(argp, &perfctr_info, sizeof perfctr_info) ) + return -EFAULT; + return 0; +} + +int __init perfctr_init(void) +{ + int err; + if( (err = perfctr_cpu_init()) != 0 ) { + printk(KERN_INFO "perfctr: not supported by this processor\n"); + return err; + } + if( (err = vperfctr_init()) != 0 ) + return err; + if( (err = gperfctr_init()) != 0 ) + return err; + printk(KERN_INFO "perfctr: driver %s, cpu type %s at %lu kHz\n", + perfctr_info.version, + perfctr_cpu_name[perfctr_info.cpu_type], + perfctr_info.cpu_khz); + return 0; +} + +void __exit perfctr_exit(void) +{ + gperfctr_exit(); + vperfctr_exit(); + perfctr_cpu_exit(); +} + +module_init(perfctr_init) +module_exit(perfctr_exit) diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/version.h linux-2.4.9-wt1/drivers/perfctr/version.h --- linux-2.4.9-wt1-2l/drivers/perfctr/version.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/version.h Tue Aug 28 17:01:45 2001 @@ -0,0 +1 @@ +#define VERSION "2.1.1" diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/virtual.c linux-2.4.9-wt1/drivers/perfctr/virtual.c --- linux-2.4.9-wt1-2l/drivers/perfctr/virtual.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/virtual.c Sun Aug 19 21:50:36 2001 @@ -0,0 +1,635 @@ +/* $Id: virtual.c,v 1.31 2001/08/19 19:50:36 mikpe Exp $ + * Virtual per-process performance counters. + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "compat.h" +#include "virtual.h" + +/**************************************************************** + * * + * Data types and macros. * + * * + ****************************************************************/ + +struct vperfctr { +/* User-visible fields: (must be first for mmap()) */ + struct vperfctr_state state; +/* Kernel-private fields: */ + atomic_t count; +#ifdef CONFIG_SMP + unsigned int sampling_timer; +#endif +#ifdef CONFIG_PERFCTR_DEBUG + unsigned start_smp_id; + unsigned suspended; +#endif +#if PERFCTR_INTERRUPT_SUPPORT + unsigned int iresume_cstatus; +#if 0 + struct perfctr_ibuf_entry ientry; +#endif +#endif +}; +#define IS_RUNNING(perfctr) perfctr_cstatus_enabled((perfctr)->state.cpu_state.cstatus) +#define IS_IMODE(perfctr) perfctr_cstatus_has_ictrs((perfctr)->state.cpu_state.cstatus) + +#ifdef CONFIG_PERFCTR_DEBUG +#define debug_free(perfctr) \ +do { \ + int i; \ + for(i = 0; i < PAGE_SIZE/sizeof(int); ++i) \ + ((int*)(perfctr))[i] = 0xfedac0ed; \ +} while( 0 ) +#define debug_init(perfctr) do { (perfctr)->suspended = 1; } while( 0 ) +#define debug_suspend(perfctr) \ +do { \ + if( (perfctr)->suspended ) \ + printk(KERN_ERR __FUNCTION__ ": BUG! suspending non-running perfctr (pid %d, comm %s)\n", \ + current->pid, current->comm); \ + (perfctr)->suspended = 1; \ +} while( 0 ) +#define debug_resume(perfctr) \ +do { \ + if( !(perfctr)->suspended ) \ + printk(KERN_ERR __FUNCTION__ ": BUG! resuming non-suspended perfctr (pid %d, comm %s)\n", \ + current->pid, current->comm); \ + (perfctr)->suspended = 0; \ +} while( 0 ) +#define debug_check_smp_id(perfctr) \ +do { \ + if( (perfctr)->start_smp_id != smp_processor_id() ) { \ + printk(KERN_ERR __FUNCTION__ ": BUG! current cpu %u differs from start cpu %u (pid %d, comm %s)\n", \ + smp_processor_id(), (perfctr)->start_smp_id, \ + current->pid, current->comm); \ + return; \ + } \ +} while( 0 ) +#define debug_set_smp_id(perfctr) \ + do { (perfctr)->start_smp_id = smp_processor_id(); } while( 0 ) +#else /* CONFIG_PERFCTR_DEBUG */ +#define debug_free(perfctr) do{}while(0) +#define debug_init(perfctr) do{}while(0) +#define debug_suspend(perfctr) do{}while(0) +#define debug_resume(perfctr) do{}while(0) +#define debug_check_smp_id(perfctr) do{}while(0) +#define debug_set_smp_id(perfctr) do{}while(0) +#endif /* CONFIG_PERFCTR_DEBUG */ + +/**************************************************************** + * * + * Resource management. * + * * + ****************************************************************/ + +/* XXX: perhaps relax this to number of _live_ perfctrs */ +static spinlock_t nrctrs_lock = SPIN_LOCK_UNLOCKED; +int nrctrs = 0; +static const char this_service[] = __FILE__; +#if PERFCTR_INTERRUPT_SUPPORT +static void vperfctr_ihandler(unsigned long pc); +#endif + +static int inc_nrctrs(void) +{ + const char *other; + + other = NULL; + spin_lock(&nrctrs_lock); + if( ++nrctrs == 1 ) + other = perfctr_cpu_reserve(this_service); + spin_unlock(&nrctrs_lock); + if( other ) { + printk(KERN_ERR __FILE__ + ": cannot operate, perfctr hardware taken by '%s'\n", + other); + return -EBUSY; + } +#if PERFCTR_INTERRUPT_SUPPORT + perfctr_cpu_set_ihandler(vperfctr_ihandler); +#endif + return 0; +} + +static void dec_nrctrs(void) +{ + spin_lock(&nrctrs_lock); + if( --nrctrs == 0 ) + perfctr_cpu_release(this_service); + spin_unlock(&nrctrs_lock); +} + +static struct vperfctr *vperfctr_alloc(void) +{ + unsigned long page; + + if( inc_nrctrs() != 0 ) + return NULL; + page = get_zeroed_page(GFP_KERNEL); + if( !page ) { + dec_nrctrs(); + return NULL; + } + SetPageReserved(virt_to_page(page)); + return (struct vperfctr*) page; +} + +static void vperfctr_free(struct vperfctr *perfctr) +{ + debug_free(perfctr); + ClearPageReserved(virt_to_page(perfctr)); + free_page((unsigned long)perfctr); + dec_nrctrs(); +} + +static struct vperfctr *get_empty_vperfctr(void) +{ + struct vperfctr *perfctr = vperfctr_alloc(); + if( perfctr ) { + perfctr->state.magic = VPERFCTR_MAGIC; +#if 0 + perfctr->state.ibuf_offset = offsetof(struct vperfctr, ientry); + perfctr->state.ibuf_size = 1; +#endif + atomic_set(&perfctr->count, 1); + debug_init(perfctr); + } + return perfctr; +} + +static void put_vperfctr(struct vperfctr *perfctr) +{ + if( atomic_dec_and_test(&perfctr->count) ) + vperfctr_free(perfctr); +} + +/**************************************************************** + * * + * Basic counter operations. * + * * + ****************************************************************/ + +/* PRE: perfctr == TASK_VPERFCTR(current) && IS_RUNNING(perfctr) + * Suspend the counters. + */ +static inline void vperfctr_suspend(struct vperfctr *perfctr) +{ + debug_suspend(perfctr); + debug_check_smp_id(perfctr); + perfctr_cpu_suspend(&perfctr->state.cpu_state); +} + +static inline void vperfctr_reset_sampling_timer(struct vperfctr *perfctr) +{ +#ifdef CONFIG_SMP + /* XXX: base the value on perfctr_info.cpu_khz instead! */ + perfctr->sampling_timer = HZ/2; +#endif +} + +/* PRE: perfctr == TASK_VPERFCTR(current) && IS_RUNNING(perfctr) + * Restart the counters. + */ +static inline void vperfctr_resume(struct vperfctr *perfctr) +{ + debug_resume(perfctr); + perfctr_cpu_resume(&perfctr->state.cpu_state); + vperfctr_reset_sampling_timer(perfctr); + debug_set_smp_id(perfctr); +} + +/* Sample the counters but do not suspend them. */ +static void vperfctr_sample(struct vperfctr *perfctr) +{ + if( IS_RUNNING(perfctr) ) { + debug_check_smp_id(perfctr); + perfctr_cpu_sample(&perfctr->state.cpu_state); + vperfctr_reset_sampling_timer(perfctr); + } +} + +#if PERFCTR_INTERRUPT_SUPPORT +/* vperfctr interrupt handler (XXX: add buffering support) */ +static void vperfctr_ihandler(unsigned long pc) +{ + struct task_struct *tsk = current; + struct vperfctr *perfctr; + siginfo_t si; + + perfctr = _vperfctr_get_thread(task_thread(tsk)); + if( !perfctr ) { + printk(KERN_ERR __FUNCTION__ + ": BUG! pid %d has no vperfctr\n", + tsk->pid); + return; + } + if( !IS_IMODE(perfctr) ) { + printk(KERN_ERR __FUNCTION__ + ": BUG! pid %d's vperfctr has cstatus %#x\n", + tsk->pid, perfctr->state.cpu_state.cstatus); + return; + } + vperfctr_suspend(perfctr); + /* suspend a-mode and i-mode PMCs, leaving only TSC on */ + perfctr->iresume_cstatus = perfctr->state.cpu_state.cstatus; + if( perfctr_cstatus_has_tsc(perfctr->iresume_cstatus) ) { + perfctr->state.cpu_state.cstatus = perfctr_mk_cstatus(1, 0, 0); + vperfctr_resume(perfctr); + } else + perfctr->state.cpu_state.cstatus = 0; +#if 0 + perfctr->ientry.pc = pc; + perfctr->ientry.pmc = -1; +#endif + si.si_signo = perfctr->state.si_signo; + si.si_errno = 0; + si.si_code = perfctr->state.si_code; /* XXX: pass PMC# in si_code? */ + if( !send_sig_info(si.si_signo, &si, tsk) ) + send_sig(si.si_signo, tsk, 1); +} +#endif + +/**************************************************************** + * * + * Process management operations. * + * * + ****************************************************************/ + +/* Called from exit_thread() or sys_vperfctr_unlink(). + * Current has just detached its vperfctr. + * If the counters are running, stop them and sample their final values. + * Mark this perfctr as dead and decrement its use count. + */ +void __vperfctr_exit(struct vperfctr *perfctr) +{ + if( IS_RUNNING(perfctr) ) + vperfctr_suspend(perfctr); + perfctr->state.cpu_state.cstatus = 0; +#if PERFCTR_INTERRUPT_SUPPORT + perfctr->iresume_cstatus = 0; +#endif + put_vperfctr(perfctr); +} + +/* schedule() --> switch_to() --> .. --> __vperfctr_suspend(). + * If the counters are running, suspend them. + */ +void __vperfctr_suspend(struct vperfctr *perfctr) +{ + if( IS_RUNNING(perfctr) ) + vperfctr_suspend(perfctr); +} + +/* schedule() --> switch_to() --> .. --> __vperfctr_resume(). + * PRE: perfctr == TASK_VPERFCTR(current) + * If the counters are runnable, resume them. + */ +void __vperfctr_resume(struct vperfctr *perfctr) +{ + if( IS_RUNNING(perfctr) ) + vperfctr_resume(perfctr); +} + +/* Called from update_one_process() [triggered by timer interrupt]. + * PRE: perfctr == TASK_VPERFCTR(current). + * Sample the counters but do not suspend them. + * Needed on SMP to avoid precision loss due to multiple counter + * wraparounds between resume/suspend for CPU-bound processes. + */ +void __vperfctr_sample(struct vperfctr *perfctr) +{ +#ifdef CONFIG_SMP + if( --perfctr->sampling_timer == 0 ) + vperfctr_sample(perfctr); +#endif +} + +/**************************************************************** + * * + * Virtual perfctr "system calls". * + * * + ****************************************************************/ + +/* PRE: perfctr == TASK_VPERFCTR(current) */ +static int sys_vperfctr_stop(struct vperfctr *perfctr) +{ + if( IS_RUNNING(perfctr) ) { + vperfctr_suspend(perfctr); + perfctr->state.cpu_state.cstatus = 0; +#if PERFCTR_INTERRUPT_SUPPORT + perfctr->iresume_cstatus = 0; +#endif + } + return 0; +} + +static int +sys_vperfctr_control(struct vperfctr *perfctr, struct vperfctr_control *argp) +{ + struct vperfctr_control control; + int err; + unsigned int prev_cstatus; + unsigned int prev_start_tsc; + + if( copy_from_user(&control, argp, sizeof control) ) + return -EFAULT; +#if PERFCTR_INTERRUPT_SUPPORT + if( control.cpu_control.nrictrs > 1 ) + return -EINVAL; + perfctr->iresume_cstatus = 0; +#else + if( control.cpu_control.nrictrs > 0 ) + return -EINVAL; +#endif + prev_cstatus = perfctr->state.cpu_state.cstatus; + perfctr->state.cpu_state.control = control.cpu_control; + err = perfctr_cpu_update_control(&perfctr->state.cpu_state); + if( err < 0 ) + return err; + if( !perfctr_cstatus_enabled(perfctr->state.cpu_state.cstatus) ) + /* XXX: too late, cstatus == 0 now :-( */ + return sys_vperfctr_stop(perfctr); + + /* XXX: validate si_signo and si_code? */ + perfctr->state.si_signo = control.si_signo; + perfctr->state.si_code = control.si_code; + + /* + * Clear the perfctr sums and restart the perfctrs. + * + * If the counters were running before this control call, + * then don't clear the time-stamp counter's sum and don't + * overwrite its current start value. + */ + if( prev_cstatus == 0 ) + perfctr->state.cpu_state.sum.tsc = 0; + memset(&perfctr->state.cpu_state.sum.pmc, 0, + sizeof perfctr->state.cpu_state.sum.pmc); + prev_start_tsc = perfctr->state.cpu_state.start.tsc; + vperfctr_resume(perfctr); /* clobbers start.tsc :-( */ + if( prev_cstatus != 0 ) + perfctr->state.cpu_state.start.tsc = prev_start_tsc; + + return 0; +} + +static int sys_vperfctr_iresume(struct vperfctr *perfctr) +{ +#if PERFCTR_INTERRUPT_SUPPORT + unsigned int iresume_cstatus; + unsigned int prev_start_tsc; + unsigned int i; + + iresume_cstatus = perfctr->iresume_cstatus; + if( !perfctr_cstatus_has_ictrs(iresume_cstatus) ) + return -EPERM; + + if( IS_RUNNING(perfctr) ) + vperfctr_suspend(perfctr); + + perfctr->state.cpu_state.cstatus = iresume_cstatus; + perfctr->iresume_cstatus = 0; + + /* XXX: only works for a single i-mode PMC */ + i = perfctr_cstatus_nractrs(iresume_cstatus); + perfctr->state.cpu_state.start.pmc[i] = + perfctr->state.cpu_state.control.ireset[i]; + + prev_start_tsc = perfctr->state.cpu_state.start.tsc; + perfctr_cpu_ireload(&perfctr->state.cpu_state); + vperfctr_resume(perfctr); /* clobbers start.tsc :-( */ + perfctr->state.cpu_state.start.tsc = prev_start_tsc; + return 0; +#else + return -ENOSYS; +#endif +} + +/* PRE: perfctr == TASK_VPERFCTR(current) */ +static int sys_vperfctr_unlink(struct vperfctr *perfctr) +{ + _vperfctr_set_thread(task_thread(current), NULL); + __vperfctr_exit(perfctr); + return 0; +} + +/* PRE: perfctr == TASK_VPERFCTR(current) + * Sample the current process' counters and update state. + * This operation is used on processors like the pre-MMX Intel P5, + * which cannot sample the counter registers in user-mode. + */ +static int sys_vperfctr_sample(struct vperfctr *perfctr) +{ + vperfctr_sample(perfctr); + return 0; +} + +/**************************************************************** + * * + * Virtual perfctr file operations. * + * * + ****************************************************************/ + +static int vperfctr_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct vperfctr *perfctr; + + /* Only allow read-only mapping of first page. */ + if( (vma->vm_end - vma->vm_start) != PAGE_SIZE || + vma_pgoff(vma) != 0 || + (pgprot_val(vma->vm_page_prot) & _PAGE_RW) || + (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) ) + return -EPERM; + perfctr = filp->private_data; + if( !perfctr ) + return -EPERM; + return remap_page_range(vma->vm_start, virt_to_phys(perfctr), + PAGE_SIZE, vma->vm_page_prot); +} + +static int vperfctr_release(struct inode *inode, struct file *filp) +{ + struct vperfctr *perfctr = filp->private_data; + filp->private_data = NULL; + if( perfctr ) + put_vperfctr(perfctr); + return 0; +} + +static int vperfctr_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct vperfctr *perfctr; + + switch( cmd ) { + case PERFCTR_INFO: + return sys_perfctr_info((struct perfctr_info*)arg); + } + perfctr = filp->private_data; + if( !perfctr || perfctr != _vperfctr_get_thread(task_thread(current)) ) + return -EPERM; + switch( cmd ) { + case VPERFCTR_CONTROL: + return sys_vperfctr_control(perfctr, (struct vperfctr_control*)arg); + case VPERFCTR_STOP: + return sys_vperfctr_stop(perfctr); + case VPERFCTR_UNLINK: + return sys_vperfctr_unlink(perfctr); + case VPERFCTR_SAMPLE: + return sys_vperfctr_sample(perfctr); + case VPERFCTR_IRESUME: + return sys_vperfctr_iresume(perfctr); + } + return -EINVAL; +} + +static int vperfctr_init_done; + +static int vperfctr_open(struct inode *inode, struct file *filp) +{ + struct task_struct *tsk; + struct vperfctr *perfctr; + + /* The link from /proc//perfctr exists even if the + hardware detection failed. Disallow open in this case. */ + if( !vperfctr_init_done ) + return -ENODEV; + + /* XXX: + * - permit read-only open of other process' vperfctr, using + * same permission check as in the old ATTACH interface + * - or add a spinlock to the thread_struct and allow a + * "remote open" even if the target proc isn't stopped? + */ + tsk = current; + if( !proc_pid_inode_denotes_task(inode, tsk) ) + return -EPERM; + perfctr = _vperfctr_get_thread(task_thread(tsk)); + if( filp->f_flags & O_CREAT ) { + if( perfctr ) + return -EEXIST; + perfctr = get_empty_vperfctr(); + if( !perfctr ) + return -ENOMEM; + } + filp->private_data = perfctr; + if( perfctr ) + atomic_inc(&perfctr->count); + if( !_vperfctr_get_thread(task_thread(tsk)) ) + _vperfctr_set_thread(task_thread(tsk), perfctr); + return 0; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + +static struct file_operations vperfctr_file_ops = { + .owner = THIS_MODULE, + .mmap = vperfctr_mmap, + .release = vperfctr_release, + .ioctl = vperfctr_ioctl, + .open = vperfctr_open, +}; + +#if !defined(MODULE) +void perfctr_set_proc_pid_ops(struct inode *inode) +{ + inode->i_fop = &vperfctr_file_ops; +} +#endif + +#else /* 2.2 :-( */ + +#include + +#if defined(MODULE) +static int vperfctr_release_22(struct inode *inode, struct file *filp) +{ + vperfctr_release(inode, filp); + MOD_DEC_USE_COUNT; /* 2.4 kernel does this for us */ + return 0; +} +static int vperfctr_open_22(struct inode *inode, struct file *filp) +{ + int ret; + MOD_INC_USE_COUNT; /* 2.4 kernel does this for us */ + ret = vperfctr_open(inode, filp); + if( ret < 0 ) + MOD_DEC_USE_COUNT; + return ret; +} +#else /* !MODULE */ +#define vperfctr_release_22 vperfctr_release +#define vperfctr_open_22 vperfctr_open +#endif /* MODULE */ + +static struct file_operations vperfctr_file_ops = { + .mmap = vperfctr_mmap, + .release = vperfctr_release_22, + .ioctl = vperfctr_ioctl, + .open = vperfctr_open_22, +}; + +#if !defined(MODULE) +struct inode_operations perfctr_proc_pid_inode_operations = { + .default_file_ops = &vperfctr_file_ops, + .permission = proc_permission, +}; +#endif + +#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) */ + +/**************************************************************** + * * + * module_init/exit * + * * + ****************************************************************/ + +#ifdef MODULE +static struct vperfctr_stub off; + +static void vperfctr_stub_init(void) +{ + write_lock(&vperfctr_stub_lock); + off = vperfctr_stub; + vperfctr_stub.exit = __vperfctr_exit; + vperfctr_stub.suspend = __vperfctr_suspend; + vperfctr_stub.resume = __vperfctr_resume; + vperfctr_stub.sample = __vperfctr_sample; + vperfctr_stub.file_ops = &vperfctr_file_ops; + write_unlock(&vperfctr_stub_lock); +} + +static void vperfctr_stub_exit(void) +{ + write_lock(&vperfctr_stub_lock); + vperfctr_stub = off; + write_unlock(&vperfctr_stub_lock); +} +#else +static inline void vperfctr_stub_init(void) { } +static inline void vperfctr_stub_exit(void) { } +#endif /* MODULE */ + +int __init vperfctr_init(void) +{ + vperfctr_stub_init(); + vperfctr_init_done = 1; + return 0; +} + +void __exit vperfctr_exit(void) +{ + vperfctr_stub_exit(); +} diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/virtual.h linux-2.4.9-wt1/drivers/perfctr/virtual.h --- linux-2.4.9-wt1-2l/drivers/perfctr/virtual.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/virtual.h Tue May 1 01:31:14 2001 @@ -0,0 +1,13 @@ +/* $Id: virtual.h,v 1.5 2001/04/30 23:31:14 mikpe Exp $ + * Virtual per-process performance counters. + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ + +#ifdef CONFIG_PERFCTR_VIRTUAL +extern int vperfctr_init(void); +extern void vperfctr_exit(void); +#else +static inline int vperfctr_init(void) { return 0; } +static inline void vperfctr_exit(void) { } +#endif diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/virtual_stub.c linux-2.4.9-wt1/drivers/perfctr/virtual_stub.c --- linux-2.4.9-wt1-2l/drivers/perfctr/virtual_stub.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/virtual_stub.c Fri Jul 27 00:42:58 2001 @@ -0,0 +1,93 @@ +/* $Id: virtual_stub.c,v 1.10 2001/07/26 22:42:58 mikpe Exp $ + * Kernel stub used to support virtual perfctrs when the + * perfctr driver is built as a module. + * + * Copyright (C) 2000-2001 Mikael Pettersson + */ +#include +#include +#include +#include +#include +#include +#include "compat.h" + +static void bug(const char *func, void *callee) +{ + printk(KERN_ERR __FILE__ ": BUG! call to __vperfctr_%s " + "from %p, pid %u, '%s' when perfctr module is not loaded\n", + func, callee, current->pid, current->comm); + _vperfctr_set_thread(task_thread(current), NULL); +} + +static void bug_exit(struct vperfctr *perfctr) +{ + bug("exit", __builtin_return_address(0)); +} + +static void bug_suspend(struct vperfctr *perfctr) +{ + bug("suspend", __builtin_return_address(0)); +} + +static void bug_resume(struct vperfctr *perfctr) +{ + bug("resume", __builtin_return_address(0)); +} + +static void bug_sample(struct vperfctr *perfctr) +{ + bug("sample", __builtin_return_address(0)); +} + +static int vperfctr_stub_open(struct inode *inode, struct file *filp) +{ + struct file_operations *fops; + + if( !proc_pid_inode_denotes_task(inode, current) ) + return -EPERM; + read_lock(&vperfctr_stub_lock); + fops = fops_get(vperfctr_stub.file_ops); + read_unlock(&vperfctr_stub_lock); + if( !fops && request_module("perfctr") == 0 ) { + read_lock(&vperfctr_stub_lock); + fops = fops_get(vperfctr_stub.file_ops); + read_unlock(&vperfctr_stub_lock); + } + if( !fops ) + return -ENOSYS; + filp->f_op = fops; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + inode->i_fop = fops; /* no fops_get since only filp->f_op counts */ +#endif + return fops->open(inode, filp); +} + +static struct file_operations vperfctr_stub_file_ops = { + .open = vperfctr_stub_open, +}; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +void perfctr_set_proc_pid_ops(struct inode *inode) +{ + inode->i_fop = &vperfctr_stub_file_ops; +} +#else +#include +struct inode_operations perfctr_proc_pid_inode_operations = { + .default_file_ops = &vperfctr_stub_file_ops, + .permission = proc_permission, +}; +#endif + +struct vperfctr_stub vperfctr_stub = { + .exit = bug_exit, + .suspend = bug_suspend, + .resume = bug_resume, + .sample = bug_sample, + .file_ops = NULL, +}; +rwlock_t vperfctr_stub_lock = RW_LOCK_UNLOCKED; + +EXPORT_SYMBOL(vperfctr_stub); +EXPORT_SYMBOL(vperfctr_stub_lock); diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/x86.c linux-2.4.9-wt1/drivers/perfctr/x86.c --- linux-2.4.9-wt1-2l/drivers/perfctr/x86.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/x86.c Tue Aug 28 17:01:45 2001 @@ -0,0 +1,1191 @@ +/* $Id: x86.c,v 1.29 2001/08/28 15:01:45 mikpe Exp $ + * x86 performance-monitoring counters driver. + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include + +#include + +#include "compat.h" +#include "x86_compat.h" +#include "x86_tests.h" + +/* Support for lazy evntsel and perfctr MSR updates. */ +struct per_cpu_cache { /* subset of perfctr_cpu_state */ + union { + unsigned int p5_cesr; + unsigned int id; /* cache owner id */ + } k1; + struct { + unsigned int evntsel[18]; + } control; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); +static struct per_cpu_cache per_cpu_cache[NR_CPUS] __cacheline_aligned; + +/* Intel P5, Cyrix 6x86MX/MII/III, Centaur WinChip C6/2/3 */ +#define MSR_P5_CESR 0x11 +#define MSR_P5_CTR0 0x12 /* .. 0x13 */ +#define P5_CESR_CPL 0x00C0 +#define P5_CESR_RESERVED (~0x01FF) +#define MII_CESR_RESERVED (~0x05FF) +#define C6_CESR_RESERVED (~0x00FF) + +/* Intel P6, VIA Cyrix III / C3 */ +#define MSR_P6_PERFCTR0 0xC1 /* .. 0xC2 */ +#define MSR_P6_EVNTSEL0 0x186 /* .. 0x187 */ +#define P6_EVNTSEL_ENABLE 0x00400000 +#define P6_EVNTSEL_INT 0x00100000 +#define P6_EVNTSEL_CPL 0x00030000 +#define P6_EVNTSEL_RESERVED 0x00280000 +#define VC3_EVNTSEL1_RESERVED (~0x1FF) + +/* AMD K7 */ +#define MSR_K7_EVNTSEL0 0xC0010000 /* .. 0xC0010003 */ +#define MSR_K7_PERFCTR0 0xC0010004 /* .. 0xC0010007 */ + +#define rdmsrl(msr,low) \ + __asm__ __volatile__("rdmsr" : "=a"(low) : "c"(msr) : "edx") +#define rdpmcl(ctr,low) \ + __asm__ __volatile__("rdpmc" : "=a"(low) : "c"(ctr) : "edx") + +static inline void set_in_cr4_local(unsigned int mask) +{ + write_cr4(read_cr4() | mask); +} + +static inline void clear_in_cr4_local(unsigned int mask) +{ + write_cr4(read_cr4() & ~mask); +} + +static unsigned int new_id(void) +{ + static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static unsigned int counter; + int id; + + spin_lock(&lock); + id = ++counter; + spin_unlock(&lock); + return id; +} + +/**************************************************************** + * * + * Driver procedures. * + * * + ****************************************************************/ + +/* + * Intel P5 family (Pentium, family code 5). + * - One TSC and two 40-bit PMCs. + * - A single 32-bit CESR (MSR 0x11) controls both PMCs. + * CESR has two halves, each controlling one PMC. + * To keep the API reasonably clean, the user puts 16 bits of + * control data in each counter's evntsel; the driver combines + * these to a single 32-bit CESR value. + * - Overflow interrupts are not available. + * - Pentium MMX added the RDPMC instruction. RDPMC has lower + * overhead than RDMSR and it can be used in user-mode code. + * - The MMX events are not symmetric: some events are only available + * for some PMC, and some event codes denote different events + * depending on which PMCs they control. + * - pmc_map[] is not required to be the identity function. + */ + +/* shared with MII and C6 */ +static int p5_like_check_control(struct perfctr_cpu_state *state, + unsigned int reserved_bits, int is_c6) +{ + unsigned short cesr_half[2]; + unsigned int pmc, evntsel, i; + + if( state->control.nrictrs != 0 || state->control.nractrs > 2 ) + return -EINVAL; + cesr_half[0] = 0; + cesr_half[1] = 0; + for(i = 0; i < state->control.nractrs; ++i) { + pmc = state->control.pmc_map[i]; + if( pmc > 1 || cesr_half[pmc] != 0 ) + return -EINVAL; + evntsel = state->control.evntsel[i]; + /* protect reserved bits */ + if( (evntsel & reserved_bits) != 0 ) + return -EPERM; + /* the CPL field (if defined) must be non-zero */ + if( !is_c6 && !(evntsel & P5_CESR_CPL) ) + return -EINVAL; + cesr_half[pmc] = evntsel; + } + state->k1.p5_cesr = (cesr_half[1] << 16) | cesr_half[0]; + return 0; +} + +static int p5_check_control(struct perfctr_cpu_state *state) +{ + return p5_like_check_control(state, P5_CESR_RESERVED, 0); +} + +/* shared with MII but not C6 */ +static void p5_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cpu; + unsigned cesr; + + cesr = state->k1.p5_cesr; + if( !cesr ) /* no PMC is on (this test doesn't work on C6) */ + return; + cpu = &per_cpu_cache[smp_processor_id()]; + if( cpu->k1.p5_cesr != cesr ) { + cpu->k1.p5_cesr = cesr; + wrmsr(MSR_P5_CESR, cesr, 0); + } +} + +static void p5_read_counters(const struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + unsigned int cstatus, nrctrs, i; + + /* The P5 doesn't allocate a cache line on a write miss, so do + a dummy read to avoid a write miss here _and_ a read miss + later in our caller. */ + asm("" : : "r"(ctrs->tsc)); + + cstatus = state->cstatus; + if( perfctr_cstatus_has_tsc(cstatus) ) + rdtscl(ctrs->tsc); + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int pmc = state->control.pmc_map[i]; + rdmsrl(MSR_P5_CTR0+pmc, ctrs->pmc[i]); + } +} + +/* shared with MII, C6, and VC3 */ +static void p5mmx_read_counters(const struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + unsigned int cstatus, nrctrs, i; + + /* The P5 doesn't allocate a cache line on a write miss, so do + a dummy read to avoid a write miss here _and_ a read miss + later in our caller. */ + asm("" : : "r"(ctrs->tsc)); + + cstatus = state->cstatus; + if( perfctr_cstatus_has_tsc(cstatus) ) + rdtscl(ctrs->tsc); + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int pmc = state->control.pmc_map[i]; + rdpmcl(pmc, ctrs->pmc[i]); + } +} + +/* shared with MII and C6 */ +static void p5_clear_counters(void) +{ + wrmsr(MSR_P5_CESR, 0, 0); + wrmsr(MSR_P5_CTR0+0, 0, 0); + wrmsr(MSR_P5_CTR0+1, 0, 0); +} + +/* + * Cyrix 6x86/MII/III. + * - Same MSR assignments as P5 MMX. Has RDPMC and two 48-bit PMCs. + * - Event codes and CESR formatting as in the plain P5 subset. + * - Many but not all P5 MMX event codes are implemented. + * - Cyrix adds a few more event codes. The event code is widened + * to 7 bits, and Cyrix puts the high bit in CESR bit 10 + * (and CESR bit 26 for PMC1). + */ + +static int mii_check_control(struct perfctr_cpu_state *state) +{ + return p5_like_check_control(state, MII_CESR_RESERVED, 0); +} + +/* + * Centaur WinChip C6/2/3. + * - Same MSR assignments as P5 MMX. Has RDPMC and two 40-bit PMCs. + * - CESR is formatted with two halves, like P5. However, there + * are no defined control fields for e.g. CPL selection, and + * there is no defined method for stopping the counters. + * - Only a few event codes are defined. + * - The 64-bit TSC is synthesised from the low 32 bits of the + * two PMCs, and CESR has to be set up appropriately. + * Reprogramming CESR causes RDTSC to yield invalid results. + * (The C6 may also hang in this case, due to C6 erratum I-13.) + * Therefore, using the PMCs on any of these processors requires + * that the TSC is not accessed at all: + * 1. The kernel must be configured or a TSC-less processor, i.e. + * generic 586 or less. + * 2. The "notsc" boot parameter must be passed to the kernel. + * 3. User-space libraries and code must also be configured and + * compiled for a generic 586 or less. + */ + +#if !defined(CONFIG_X86_TSC) +static int c6_check_control(struct perfctr_cpu_state *state) +{ + if( state->control.tsc_on ) + return -EINVAL; + return p5_like_check_control(state, C6_CESR_RESERVED, 1); +} + +static void c6_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cpu; + unsigned cesr; + + if( perfctr_cstatus_nractrs(state->cstatus) == 0 ) /* no PMC is on */ + return; + cpu = &per_cpu_cache[smp_processor_id()]; + cesr = state->k1.p5_cesr; + if( cpu->k1.p5_cesr != cesr ) { + cpu->k1.p5_cesr = cesr; + wrmsr(MSR_P5_CESR, cesr, 0); + } +} +#endif + +/* + * Intel P6 family (Pentium Pro, Pentium II, and Pentium III cores, + * and Xeon and Celeron versions of Pentium II and III cores). + * - One TSC and two 40-bit PMCs. + * - One 32-bit EVNTSEL MSR for each PMC. + * - EVNTSEL0 contains a global enable/disable bit. + * That bit is reserved in EVNTSEL1. + * - Each EVNTSEL contains a CPL field. + * - Overflow interrupts are possible, but requires that the + * local APIC is available. Mobile P6 CPUs have no local APIC. + * Additional kernel patches are also required. + * - The PMCs cannot be initialised with arbitrary values, since + * wrmsr fills the high bits by sign-extending from bit 31. + * - Most events are symmetric, but a few are not. + * - pmc_map[] is required to be the identity function. PMC1 cannot + * be used if PMC0 is skipped (since EVNTSEL0 has the global + * enable bit), so the counters might as well be listed in the + * natural order. + */ + +/* shared with K7 */ +static int p6_like_check_control(struct perfctr_cpu_state *state, int is_k7) +{ + unsigned int evntsel, i, nractrs, nrctrs; + + nractrs = state->control.nractrs; + nrctrs = nractrs + state->control.nrictrs; + if( nrctrs < nractrs || nrctrs > (is_k7 ? 4 : 2) ) + return -EINVAL; + + for(i = 0; i < nrctrs; ++i) { + /* pmc_map[] should be the identity function */ + if( state->control.pmc_map[i] != i ) + return -EINVAL; + evntsel = state->control.evntsel[i]; + /* protect reserved bits */ + if( evntsel & P6_EVNTSEL_RESERVED ) + return -EPERM; + /* check ENable bit */ + if( is_k7 ) { + /* ENable bit must be set in each evntsel */ + if( !(evntsel & P6_EVNTSEL_ENABLE) ) + return -EINVAL; + } else { + /* only evntsel[0] has the ENable bit */ + if( evntsel & P6_EVNTSEL_ENABLE ) { + if( i > 0 ) + return -EPERM; + } else { + if( i == 0 ) + return -EINVAL; + } + } + /* the CPL field must be non-zero */ + if( !(evntsel & P6_EVNTSEL_CPL) ) + return -EINVAL; + /* INT bit must be off for a-mode and on for i-mode counters */ + if( evntsel & P6_EVNTSEL_INT ) { + if( i < nractrs ) + return -EINVAL; + } else { + if( i >= nractrs ) + return -EINVAL; + } + } + state->k1.id = new_id(); + return 0; +} + +static int p6_check_control(struct perfctr_cpu_state *state) +{ + return p6_like_check_control(state, 0); +} + +#ifdef CONFIG_PERFCTR_DEBUG +static void debug_evntsel_cache(const struct perfctr_cpu_state *state, + const struct per_cpu_cache *cpu) +{ + unsigned int nrctrs, i; + + nrctrs = perfctr_cstatus_nrctrs(state->cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int evntsel = state->control.evntsel[i]; + if( evntsel != cpu->control.evntsel[i] ) { + printk(KERN_ERR "perfctr/x86.c: (pid %d, comm %s) " + "evntsel[%u] is %#x, should be %#x\n", + current->pid, current->comm, + i, cpu->control.evntsel[i], evntsel); + return; + } + } +} +#else +static inline void debug_evntsel_cache(const struct perfctr_cpu_state *s, + const struct per_cpu_cache *c) +{ } +#endif + +static void p6_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cpu; + unsigned int nrctrs, i; + + cpu = &per_cpu_cache[smp_processor_id()]; + if( cpu->k1.id == state->k1.id ) { + debug_evntsel_cache(state, cpu); + return; + } + nrctrs = perfctr_cstatus_nrctrs(state->cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int evntsel = state->control.evntsel[i]; + if( evntsel != cpu->control.evntsel[i] ) { + cpu->control.evntsel[i] = evntsel; + wrmsr(MSR_P6_EVNTSEL0+i, evntsel, 0); + } + } + cpu->k1.id = state->k1.id; +} + +/* shared with K7 */ +static void p6_read_counters(const struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + unsigned int cstatus, nrctrs, i; + + cstatus = state->cstatus; + if( perfctr_cstatus_has_tsc(cstatus) ) /* XXX: ignore this test? */ + rdtscl(ctrs->tsc); + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) + rdpmcl(i, ctrs->pmc[i]); +} + +static void p6_clear_counters(void) +{ + int i; + + for(i = 0; i < 2; ++i) { + wrmsr(MSR_P6_EVNTSEL0+i, 0, 0); + wrmsr(MSR_P6_PERFCTR0+i, 0, 0); + } +} + +#if PERFCTR_INTERRUPT_SUPPORT +/* PRE: perfctr_cstatus_has_ictrs(state->cstatus) != 0 */ +static void p6_isuspend(struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cpu; + unsigned int cstatus, nrctrs, i; + + cpu = &per_cpu_cache[smp_processor_id()]; + cpu->control.evntsel[0] = 0; + wrmsr(MSR_P6_EVNTSEL0, 0, 0); + /* cpu->k1.id is still == state->k1.id */ + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) + rdpmcl(i, state->start.pmc[i]); +} + +/* PRE: perfctr_cstatus_has_ictrs(state->cstatus) != 0 */ +static void p6_iresume(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cpu; + unsigned int cstatus, nrctrs, i; + + cpu = &per_cpu_cache[smp_processor_id()]; + /* XXX: move k1.id test up here? */ + if( cpu->control.evntsel[0] ) { + cpu->control.evntsel[0] = 0; + wrmsr(MSR_P6_EVNTSEL0, 0, 0); + cpu->k1.id = 0; + } else if( cpu->k1.id == state->k1.id ) { + /* isuspend() cleared EVNTSEL0, so invalidate the cache + here to force write_control() to reload the EVNTSELs. + The k1.id cache still allows us to avoid reloading + the PERFCTRs. */ + cpu->k1.id = 0; + return; + } + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) + wrmsr(MSR_P6_PERFCTR0+i, state->start.pmc[i], 0); + /* cpu->k1.id remains != state->k1.id */ +} +#endif /* PERFCTR_INTERRUPT_SUPPORT */ + +/* + * AMD K7 family (Athlon, Duron). + * - Somewhat similar to the Intel P6 family. + * - Four 48-bit PMCs. + * - Four 32-bit EVNTSEL MSRs with similar layout as in P6. + * - Completely different MSR assignments :-( + * - Fewer countable events defined :-( + * - The events appear to be completely symmetric. + * - The EVNTSEL MSRs are symmetric since each has its own enable bit. + * - Publicly available documentation is incomplete. + * - pmc_map[] is required to be the identity function. + */ + +static int k7_check_control(struct perfctr_cpu_state *state) +{ + return p6_like_check_control(state, 1); +} + +static void k7_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cpu; + unsigned int nrctrs, i; + + cpu = &per_cpu_cache[smp_processor_id()]; + if( cpu->k1.id == state->k1.id ) { + debug_evntsel_cache(state, cpu); + return; + } + nrctrs = perfctr_cstatus_nrctrs(state->cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int evntsel = state->control.evntsel[i]; + if( evntsel != cpu->control.evntsel[i] ) { + cpu->control.evntsel[i] = evntsel; + wrmsr(MSR_K7_EVNTSEL0+i, evntsel, 0); + } + } + cpu->k1.id = state->k1.id; +} + +static void k7_clear_counters(void) +{ + int i; + + for(i = 0; i < 4; ++i) { + wrmsr(MSR_K7_EVNTSEL0+i, 0, 0); + wrmsr(MSR_K7_PERFCTR0+i, 0, 0); + } +} + +#if PERFCTR_INTERRUPT_SUPPORT +/* PRE: perfctr_cstatus_has_ictrs(control->cstatus) != 0 */ +static void k7_isuspend(struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cpu; + unsigned int cstatus, nrctrs, i; + + cpu = &per_cpu_cache[smp_processor_id()]; + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { + cpu->control.evntsel[i] = 0; + wrmsr(MSR_K7_EVNTSEL0+i, 0, 0); + rdpmcl(i, state->start.pmc[i]); + } + /* cpu->k1.id is still == state->k1.id */ +} + +/* PRE: perfctr_cstatus_has_ictrs(state->cstatus) != 0 */ +static void k7_iresume(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cpu; + unsigned int cstatus, nrctrs, nractrs, i; + int id_valid; + + cpu = &per_cpu_cache[smp_processor_id()]; + id_valid = 1; + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + nractrs = perfctr_cstatus_nractrs(cstatus); + /* XXX: move k1.id test up here? */ + for(i = nractrs; i < nrctrs; ++i) { + if( cpu->control.evntsel[i] ) { + cpu->control.evntsel[i] = 0; + wrmsr(MSR_K7_EVNTSEL0+i, 0, 0); + id_valid = 0; + } + } + if( !id_valid ) + cpu->k1.id = 0; + else if( cpu->k1.id == state->k1.id ) { + cpu->k1.id = 0; /* see comment in p6_iresume() */ + return; + } + for(i = nractrs; i < nrctrs; ++i) + wrmsr(MSR_K7_PERFCTR0+i, state->start.pmc[i], -1); + /* cpu->k1.id remains != state->k1.id */ +} +#endif /* PERFCTR_INTERRUPT_SUPPORT */ + +/* + * VIA Cyrix III and C3 family. + * - A Centaur design somewhat similar to the Intel P6. + * - PERFCTR0 is an alias for the TSC, and EVNTSEL0 is read-only. + * - PERFCTR1 is 32 bits wide. + * - EVNTSEL1 has no defined control fields, and there is no + * defined method for stopping the counter. + * - It is not known if the reserved fields in EVNTSEL1 should be + * preserved or written with zeroes. + * - Only a few event codes are defined. + * - No local APIC or interrupt-mode support. + * - pmc_map[] is NOT the identity function: pmc_map[0] must be 1, + * if nrctrs == 1. + */ +static int vc3_check_control(struct perfctr_cpu_state *state) +{ + if( state->control.nrictrs || state->control.nractrs > 1 ) + return -EINVAL; + if( state->control.nractrs == 1 ) { + if( state->control.pmc_map[0] != 1 ) + return -EINVAL; + if( state->control.evntsel[0] & VC3_EVNTSEL1_RESERVED ) + return -EPERM; + } + return 0; +} + +static void vc3_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cpu; + unsigned evntsel; + + if( state->control.nractrs == 0 ) /* PERFCTR1 is not on */ + return; + cpu = &per_cpu_cache[smp_processor_id()]; + evntsel = state->control.evntsel[0]; + if( cpu->control.evntsel[0] != evntsel ) { + /* XXX: read-modify-write to preserve reserved bits */ + unsigned int old, dummy; + cpu->control.evntsel[0] = evntsel; + rdmsr(MSR_P6_EVNTSEL0+1, old, dummy); + evntsel |= (old & VC3_EVNTSEL1_RESERVED); + wrmsr(MSR_P6_EVNTSEL0+1, evntsel, 0); + } +} + +/* + * Generic driver for any x86 with a working TSC. + */ + +static int generic_check_control(struct perfctr_cpu_state *state) +{ + if( state->control.nractrs || state->control.nrictrs ) + return -EINVAL; + return 0; +} + +static void generic_write_control(const struct perfctr_cpu_state *state) +{ +} + +static void generic_read_counters(const struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + rdtscl(ctrs->tsc); +} + +static void generic_clear_counters(void) +{ +} + +/* + * Driver methods, internal and exported. + * + * Frequently called functions (write_control, read_counters, + * isuspend and iresume) are back-patched to invoke the correct + * processor-specific methods directly, thereby saving the + * overheads of indirect function calls. + * + * Backpatchable call sites must have been "finalised" after + * initialisation. The reason for this is that unsynchronised code + * modification doesn't work in multiprocessor systems, due to + * Intel P6 errata. Consequently, all backpatchable call sites + * must be known and local to this file. + */ + +static int redirect_call_disable; + +static void redirect_call(void *ra, void *to) +{ + /* XXX: make this function __init later */ + if( redirect_call_disable ) + printk(KERN_ERR __FILE__ ":" __FUNCTION__ + ": unresolved call to %p at %p\n", + to, ra); + /* we can only redirect `call near relative' instructions */ + if( *((unsigned char*)ra - 5) != 0xE8 ) { + printk(KERN_WARNING __FILE__ ":" __FUNCTION__ + ": unable to redirect caller %p to %p\n", + ra, to); + return; + } + *(int*)((char*)ra - 4) = (char*)to - (char*)ra; +} + +static void (*write_control)(const struct perfctr_cpu_state*); +static void perfctr_cpu_write_control(const struct perfctr_cpu_state *state) +{ + redirect_call(__builtin_return_address(0), write_control); + return write_control(state); +} + +static void (*read_counters)(const struct perfctr_cpu_state*, + struct perfctr_low_ctrs*); +static void perfctr_cpu_read_counters(const struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + redirect_call(__builtin_return_address(0), read_counters); + return read_counters(state, ctrs); +} + +#if PERFCTR_INTERRUPT_SUPPORT +static void (*cpu_isuspend)(struct perfctr_cpu_state*); +static void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) +{ + redirect_call(__builtin_return_address(0), cpu_isuspend); + return cpu_isuspend(state); +} + +static void (*cpu_iresume)(const struct perfctr_cpu_state*); +static void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) +{ + redirect_call(__builtin_return_address(0), cpu_iresume); + return cpu_iresume(state); +} + +void perfctr_cpu_ireload(const struct perfctr_cpu_state *state) +{ + /* Call ireload() this just before iresume() to bypass + internal caching and force a reload of i-mode PMCs. */ + struct per_cpu_cache *cpu; + cpu = &per_cpu_cache[smp_processor_id()]; + cpu->k1.id = 0; +} +#endif /* PERFCTR_INTERRUPT_SUPPORT */ + +static inline void setup_imode_start_values(struct perfctr_cpu_state *state) +{ +#if PERFCTR_INTERRUPT_SUPPORT + unsigned int cstatus, nrctrs, i; + + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) + state->start.pmc[i] = state->control.ireset[i]; +#endif +} + +static int (*check_control)(struct perfctr_cpu_state*); +int perfctr_cpu_update_control(struct perfctr_cpu_state *state) +{ + int err; + +#if PERFCTR_INTERRUPT_SUPPORT + if( perfctr_cstatus_has_ictrs(state->cstatus) ) + perfctr_cpu_isuspend(state); +#endif + state->cstatus = 0; + if( perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT ) { + /* permit at most one i-mode counter, since we don't yet + identify the source counter on LVTPC interrupt */ + if( state->control.nrictrs > 1 ) + return -EINVAL; + } else { + /* disallow i-mode counters if we cannot catch the interrupts */ + if( state->control.nrictrs ) + return -EPERM; + } + err = check_control(state); + if( err < 0 ) + return err; + state->cstatus = perfctr_mk_cstatus(state->control.tsc_on, + state->control.nractrs, + state->control.nrictrs); + setup_imode_start_values(state); + return 0; +} + +void perfctr_cpu_suspend(struct perfctr_cpu_state *state) +{ + unsigned int i, cstatus, nractrs; + struct perfctr_low_ctrs now; + +#if PERFCTR_INTERRUPT_SUPPORT + if( perfctr_cstatus_has_ictrs(state->cstatus) ) + perfctr_cpu_isuspend(state); +#endif + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if( perfctr_cstatus_has_tsc(cstatus) ) + state->sum.tsc += now.tsc - state->start.tsc; + nractrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nractrs; ++i) + state->sum.pmc[i] += now.pmc[i] - state->start.pmc[i]; + /* perfctr_cpu_disable_rdpmc(); */ /* not for x86 */ +} + +void perfctr_cpu_resume(struct perfctr_cpu_state *state) +{ +#if PERFCTR_INTERRUPT_SUPPORT + if( perfctr_cstatus_has_ictrs(state->cstatus) ) + perfctr_cpu_iresume(state); +#endif + /* perfctr_cpu_enable_rdpmc(); */ /* not for x86 or global-mode */ + perfctr_cpu_write_control(state); + perfctr_cpu_read_counters(state, &state->start); + /* XXX: if (SMP && start.tsc == now.tsc) ++now.tsc; */ +} + +void perfctr_cpu_sample(struct perfctr_cpu_state *state) +{ + unsigned int i, cstatus, nractrs; + struct perfctr_low_ctrs now; + + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if( perfctr_cstatus_has_tsc(cstatus) ) { + state->sum.tsc += now.tsc - state->start.tsc; + state->start.tsc = now.tsc; + } + nractrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nractrs; ++i) { + state->sum.pmc[i] += now.pmc[i] - state->start.pmc[i]; + state->start.pmc[i] = now.pmc[i]; + } +} + +static void (*clear_counters)(void); +static void perfctr_cpu_clear_counters(void) +{ + return clear_counters(); +} + +/**************************************************************** + * * + * Processor detection and initialisation procedures. * + * * + ****************************************************************/ + +/* see comment above at redirect_call() */ +static void __init finalise_backpatching(void) +{ + struct per_cpu_cache *cpu; + struct perfctr_cpu_state state; + + cpu = &per_cpu_cache[smp_processor_id()]; + memset(cpu, 0, sizeof *cpu); + memset(&state, 0, sizeof state); + state.cstatus = + (perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) + ? perfctr_mk_cstatus(0, 0, 1) + : 0; + perfctr_cpu_sample(&state); + perfctr_cpu_resume(&state); + perfctr_cpu_suspend(&state); + perfctr_cpu_update_control(&state); + + redirect_call_disable = 1; +} + +static int __init intel_init(void) +{ + if( !cpu_has_tsc ) + return -ENODEV; + switch( boot_cpu_data.x86 ) { + case 5: + if( cpu_has_mmx ) { + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P5MMX; + read_counters = p5mmx_read_counters; + } else { + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P5; + perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; + read_counters = p5_read_counters; + } + write_control = p5_write_control; + check_control = p5_check_control; + clear_counters = p5_clear_counters; + perfctr_p5_init_tests(); + return 0; + case 6: + if( boot_cpu_data.x86_model >= 7 ) /* PIII */ + perfctr_info.cpu_type = PERFCTR_X86_INTEL_PIII; + else if( boot_cpu_data.x86_model >= 3 ) /* PII or Celeron */ + perfctr_info.cpu_type = PERFCTR_X86_INTEL_PII; + else + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P6; + read_counters = p6_read_counters; + write_control = p6_write_control; + check_control = p6_check_control; + clear_counters = p6_clear_counters; +#if PERFCTR_INTERRUPT_SUPPORT + if( cpu_has_apic ) { + perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; + cpu_isuspend = p6_isuspend; + cpu_iresume = p6_iresume; + } +#endif + perfctr_p6_init_tests(); + return 0; + case 15: /* Pentium 4 */ + { + unsigned int x; + + printk("perfctr: Pentium 4 detected\n"); + rdmsrl(0x1A0, x); /* IA32_MISC_ENABLE */ + if( !(x & (1 << 7)) ) { + printk("perfctr: Performance Monitoring is unavailable\n"); + return -ENODEV; + } + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P4; + return -ENODEV; /* return 0; later */ + } + } + return -ENODEV; +} + +static int __init amd_init(void) +{ + if( !cpu_has_tsc ) + return -ENODEV; + switch( boot_cpu_data.x86 ) { + case 6: /* K7. Model 1 does not have a local APIC. + AMD Document #22007 Revision J hints that APIC-less + K7s signal overflows as debug interrupts. */ + perfctr_info.cpu_type = PERFCTR_X86_AMD_K7; + read_counters = p6_read_counters; + write_control = k7_write_control; + check_control = k7_check_control; + clear_counters = k7_clear_counters; +#if PERFCTR_INTERRUPT_SUPPORT + if( cpu_has_apic ) { + perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; + cpu_isuspend = k7_isuspend; + cpu_iresume = k7_iresume; + } +#endif + perfctr_k7_init_tests(); + return 0; + } + return -ENODEV; +} + +static int __init cyrix_init(void) +{ + if( !cpu_has_tsc ) + return -ENODEV; + switch( boot_cpu_data.x86 ) { + case 6: /* 6x86MX, MII, or III */ + perfctr_info.cpu_type = PERFCTR_X86_CYRIX_MII; + read_counters = p5mmx_read_counters; + write_control = p5_write_control; + check_control = mii_check_control; + clear_counters = p5_clear_counters; + perfctr_mii_init_tests(); + return 0; + } + return -ENODEV; +} + +static int __init centaur_init(void) +{ + switch( boot_cpu_data.x86 ) { +#if !defined(CONFIG_X86_TSC) + case 5: + switch( boot_cpu_data.x86_model ) { + case 4: /* WinChip C6 */ + perfctr_info.cpu_type = PERFCTR_X86_WINCHIP_C6; + break; + case 8: /* WinChip 2, 2A, or 2B */ + case 9: /* WinChip 3, a 2A with larger cache and lower voltage */ + perfctr_info.cpu_type = PERFCTR_X86_WINCHIP_2; + break; + default: + return -ENODEV; + } + /* + * TSC must be inaccessible for perfctrs to work. + */ + if( !(read_cr4() & X86_CR4_TSD) || cpu_has_tsc ) + return -ENODEV; + perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDTSC; + read_counters = p5mmx_read_counters; + write_control = c6_write_control; + check_control = c6_check_control; + clear_counters = p5_clear_counters; + perfctr_c6_init_tests(); + return 0; +#endif + case 6: /* VIA Cyrix III / C3 */ + if( !cpu_has_tsc ) + return -ENODEV; + switch( boot_cpu_data.x86_model ) { + case 6: /* VIA Cyrix III */ + case 7: /* VIA C3 */ + break; + default: + return -ENODEV; + } + perfctr_info.cpu_type = PERFCTR_X86_VIA_C3; + read_counters = p5mmx_read_counters; + write_control = vc3_write_control; + check_control = vc3_check_control; + clear_counters = generic_clear_counters; + perfctr_vc3_init_tests(); +#if !defined(CONFIG_PERFCTR_INIT_TESTS) + printk(KERN_WARNING "perfctr: VIA C3 / Cyrix III processor detected.\n" + KERN_WARNING "perfctr: To help improve support for this processor, please reconfigure with CONFIG_PERFCTR_INIT_TESTS=y\n"); +#endif + return 0; + } + return -ENODEV; +} + +static int __init generic_init(void) +{ + if( !cpu_has_tsc ) + return -ENODEV; + perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; + perfctr_info.cpu_type = PERFCTR_X86_GENERIC; + check_control = generic_check_control; + write_control = generic_write_control; + read_counters = generic_read_counters; + clear_counters = generic_clear_counters; + return 0; +} + +static char generic_name[] __initdata = "Generic x86 with TSC"; +static char p5_name[] __initdata = "Intel Pentium"; +static char p5mmx_name[] __initdata = "Intel Pentium MMX"; +static char p6_name[] __initdata = "Intel Pentium Pro"; +static char pii_name[] __initdata = "Intel Pentium II"; +static char piii_name[] __initdata = "Intel Pentium III"; +static char mii_name[] __initdata = "Cyrix 6x86MX/MII/III"; +static char wcc6_name[] __initdata = "WinChip C6"; +static char wc2_name[] __initdata = "WinChip 2/3"; +static char k7_name[] __initdata = "AMD K7"; +static char vc3_name[] __initdata = "VIA Cyrix III / C3"; + +char *perfctr_cpu_name[] __initdata = { + [PERFCTR_X86_GENERIC] generic_name, + [PERFCTR_X86_INTEL_P5] p5_name, + [PERFCTR_X86_INTEL_P5MMX] p5mmx_name, + [PERFCTR_X86_INTEL_P6] p6_name, + [PERFCTR_X86_INTEL_PII] pii_name, + [PERFCTR_X86_INTEL_PIII] piii_name, + [PERFCTR_X86_CYRIX_MII] mii_name, + [PERFCTR_X86_WINCHIP_C6] wcc6_name, + [PERFCTR_X86_WINCHIP_2] wc2_name, + [PERFCTR_X86_AMD_K7] k7_name, + [PERFCTR_X86_VIA_C3] vc3_name, +}; + +static void __init perfctr_cpu_init_one(void *ignore) +{ + perfctr_cpu_clear_counters(); +#if PERFCTR_INTERRUPT_SUPPORT + if( cpu_has_apic ) + apic_write(APIC_LVTPC, LOCAL_PERFCTR_VECTOR); +#endif + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + set_in_cr4_local(X86_CR4_PCE); +} + +static void __exit perfctr_cpu_exit_one(void *ignore) +{ + perfctr_cpu_clear_counters(); +#if PERFCTR_INTERRUPT_SUPPORT + if( cpu_has_apic ) + apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); +#endif + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + clear_in_cr4_local(X86_CR4_PCE); +} + +#if defined(NMI_LOCAL_APIC) && defined(CONFIG_PM) + +static void __init unregister_nmi_pmdev(void) +{ + if( nmi_pmdev ) { + apic_pm_unregister(nmi_pmdev); + nmi_pmdev = 0; + } +} + +static int x86_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data) +{ + /* XXX: incomplete */ + return 0; +} + +static struct pm_dev *x86_pmdev; + +static void __init x86_pm_init(void) +{ + x86_pmdev = apic_pm_register(PM_SYS_DEV, 0, x86_pm_callback); +} + +static void __exit x86_pm_exit(void) +{ + if( x86_pmdev ) { + apic_pm_unregister(x86_pmdev); + x86_pmdev = NULL; + } +} + +#else + +static inline void unregister_nmi_pmdev(void) { } +static inline void x86_pm_init(void) { } +static inline void x86_pm_exit(void) { } + +#endif /* NMI_LOCAL_APIC && CONFIG_PM */ + +#if defined(NMI_LOCAL_APIC) + +static void __init disable_nmi_watchdog(void) +{ + if( nmi_perfctr_msr ) { + nmi_perfctr_msr = 0; + printk(KERN_NOTICE "perfctr: disabled nmi_watchdog\n"); + unregister_nmi_pmdev(); + } +} + +#else + +static inline void disable_nmi_watchdog(void) { } + +#endif + +int __init perfctr_cpu_init(void) +{ + int err = -ENODEV; + + /* RDPMC and RDTSC are on by default. They will be disabled + by the init procedures if necessary. */ + perfctr_info.cpu_features = PERFCTR_FEATURE_RDPMC | PERFCTR_FEATURE_RDTSC; + + if( cpu_has_msr ) { + switch( boot_cpu_data.x86_vendor ) { + case X86_VENDOR_INTEL: + err = intel_init(); + break; + case X86_VENDOR_AMD: + err = amd_init(); + break; + case X86_VENDOR_CYRIX: + err = cyrix_init(); + break; + case X86_VENDOR_CENTAUR: + err = centaur_init(); + } + } + if( err ) { + err = generic_init(); /* last resort */ + if( err ) + return err; + } + /* + * Put the hardware in a sane state: + * - finalise resolution of backpatchable call sites + * - clear perfctr MSRs + * - set up APIC_LVTPC + * - set CR4.PCE [on permanently due to __flush_tlb_global()] + * - install our default interrupt handler + */ + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + mmu_cr4_features |= X86_CR4_PCE; + finalise_backpatching(); + perfctr_cpu_init_one(NULL); + smp_call_function(perfctr_cpu_init_one, NULL, 1, 1); + perfctr_cpu_set_ihandler(NULL); + /* + * Fix up the connection to the local APIC: + * - disable and disconnect the NMI watchdog + * - register our PM callback + */ + disable_nmi_watchdog(); + x86_pm_init(); + /* + * per_cpu_cache[] is initialised to contain "impossible" + * evntsel values guaranteed to differ from anything accepted + * by perfctr_cpu_check_control(). This way, initialisation of + * a CPU's evntsel MSRs will happen automatically the first time + * perfctr_cpu_write_control() executes on it. + * All-bits-one works for all currently supported processors. + * The memset also sets the ids to -1, which is intentional. + */ + memset(per_cpu_cache, ~0, sizeof per_cpu_cache); + + perfctr_info.cpu_khz = cpu_khz; + perfctr_info.nrcpus = smp_num_cpus; + + return 0; +} + +void __exit perfctr_cpu_exit(void) +{ + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + mmu_cr4_features &= ~X86_CR4_PCE; + perfctr_cpu_exit_one(NULL); + smp_call_function(perfctr_cpu_exit_one, NULL, 1, 1); + perfctr_cpu_set_ihandler(NULL); + x86_pm_exit(); + /* XXX: restart nmi watchdog? */ +} + +/**************************************************************** + * * + * Hardware reservation. * + * * + ****************************************************************/ + +static const char *current_service = 0; + +const char *perfctr_cpu_reserve(const char *service) +{ + if( current_service ) + return current_service; + current_service = service; + MOD_INC_USE_COUNT; + return 0; +} + +void perfctr_cpu_release(const char *service) +{ + if( service != current_service ) { + printk(KERN_ERR __FUNCTION__ + ": attempt by %s to release while reserved by %s\n", + service, current_service); + } else { + perfctr_cpu_set_ihandler(NULL); + current_service = 0; + MOD_DEC_USE_COUNT; + } +} diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/x86_compat.h linux-2.4.9-wt1/drivers/perfctr/x86_compat.h --- linux-2.4.9-wt1-2l/drivers/perfctr/x86_compat.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/x86_compat.h Tue Aug 28 17:01:45 2001 @@ -0,0 +1,44 @@ +/* $Id: x86_compat.h,v 1.14 2001/08/28 15:01:45 mikpe Exp $ + * Performance-monitoring counters driver. + * x86-specific compatibility definitions for 2.2/2.4 kernels. + * + * Copyright (C) 2000-2001 Mikael Pettersson + */ +#include +#include + +/* 2.4.9-ac3 added {read,write}_cr4() macros in */ +#if !defined(write_cr4) +static inline void write_cr4(unsigned int x) +{ + __asm__ __volatile__("movl %0,%%cr4" : : "r"(x)); +} + +static inline unsigned int read_cr4(void) +{ + unsigned int x; + __asm__ __volatile__("movl %%cr4,%0" : "=r"(x)); + return x; +} +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) + +/* missing from */ +#define cpu_has_mmx (test_bit(X86_FEATURE_MMX, boot_cpu_data.x86_capability)) +#define cpu_has_msr (test_bit(X86_FEATURE_MSR, boot_cpu_data.x86_capability)) + +#else /* 2.4 simulation for 2.2 */ + +#define cpu_has_mmx (boot_cpu_data.x86_capability & X86_FEATURE_MMX) +#define cpu_has_msr (boot_cpu_data.x86_capability & X86_FEATURE_MSR) +#define cpu_has_tsc (boot_cpu_data.x86_capability & X86_FEATURE_TSC) + +#define X86_CR4_TSD 0x0004 +#define X86_CR4_PCE 0x0100 + +unsigned long mmu_cr4_features; /*fake*/ + +#endif /* 2.4 simulation for 2.2 */ + +extern unsigned long cpu_khz; diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/x86_setup.c linux-2.4.9-wt1/drivers/perfctr/x86_setup.c --- linux-2.4.9-wt1-2l/drivers/perfctr/x86_setup.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/x86_setup.c Tue Aug 28 17:01:45 2001 @@ -0,0 +1,133 @@ +/* $Id: x86_setup.c,v 1.18 2001/08/28 15:01:45 mikpe Exp $ + * Performance-monitoring counters driver. + * x86-specific kernel-resident code. + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ +#include +#include +#include +#include +#include +#include +#include +#include "x86_compat.h" +#include /* for DEBUG */ + +#if PERFCTR_INTERRUPT_SUPPORT +unsigned int apic_lvtpc_irqs[NR_CPUS]; + +static void perfctr_default_ihandler(unsigned long pc) +{ + ++apic_lvtpc_irqs[smp_processor_id()]; +} + +static perfctr_ihandler_t perfctr_ihandler = perfctr_default_ihandler; + +static void __attribute__((unused)) +do_perfctr_interrupt(struct pt_regs *regs) +{ + /* XXX: should be rewritten in assembly and inlined below */ + /* XXX: recursive interrupts? delay the ACK, mask LVTPC, or queue? */ + ack_APIC_irq(); + (*perfctr_ihandler)(regs->eip); + /* XXX: on P4 LVTPC must now be unmasked */ +} + +#define BUILD_PERFCTR_INTERRUPT(x,v) XBUILD_PERFCTR_INTERRUPT(x,v) +#define XBUILD_PERFCTR_INTERRUPT(x,v) \ +asmlinkage void x(void); \ +__asm__( \ + "\n.text\n\t" \ + __ALIGN_STR "\n\t" \ + ".type " SYMBOL_NAME_STR(x) ",@function\n" \ + ".globl " SYMBOL_NAME_STR(x) "\n" \ +SYMBOL_NAME_STR(x) ":\n\t" \ + "pushl $" #v "\n\t" \ + SAVE_ALL \ + "pushl %esp\n\t" \ + "call " SYMBOL_NAME_STR(do_ ## x) "\n\t" \ + "addl $4,%esp\n\t" \ + "jmp ret_from_intr\n\t" \ + ".size " SYMBOL_NAME_STR(x) ",.-" SYMBOL_NAME_STR(x) "\n" \ + ".previous\n"); + +BUILD_PERFCTR_INTERRUPT(perfctr_interrupt,LOCAL_PERFCTR_VECTOR) + +void perfctr_cpu_set_ihandler(perfctr_ihandler_t ihandler) +{ + perfctr_ihandler = ihandler ? ihandler : perfctr_default_ihandler; +} +#endif + +#if defined(CONFIG_PERFCTR_DEBUG) && defined(CONFIG_PERFCTR_VIRTUAL) +struct shadow_vperfctr { + unsigned int pad[512]; + void *magic2; +}; + +void _vperfctr_set_thread(struct thread_struct *thread, struct vperfctr *perfctr) +{ + thread->perfctr = perfctr; + if( perfctr ) { + struct shadow_vperfctr *shadow; + shadow = (struct shadow_vperfctr*)perfctr; + shadow->magic2 = &shadow->magic2; + } +} + +struct vperfctr *__vperfctr_get_thread(const struct thread_struct *thread, + const char *function) +{ + struct vperfctr *perfctr; + struct shadow_vperfctr *shadow; + + perfctr = thread->perfctr; + if( !perfctr ) + return NULL; + if( (long)perfctr & (4096-1) ) { + printk(KERN_ERR "%s: BUG! perfctr 0x%08lx is not page aligned (pid %d, comm %s)\n", + function, (long)perfctr, current->pid, current->comm); + return NULL; + } + if( ((struct vperfctr_state*)perfctr)->magic != VPERFCTR_MAGIC ) { + printk(KERN_ERR "%s: BUG! perfctr 0x%08lx has invalid magic 0x%08x\n (pid %d, comm %s)\n", + function, (long)perfctr, ((struct vperfctr_state*)perfctr)->magic, current->pid, current->comm); + return NULL; + } + shadow = (struct shadow_vperfctr*)perfctr; + if( shadow->magic2 != &shadow->magic2 ) { + printk(KERN_ERR "%s: BUG! perfctr 0x%08lx has invalid magic2 0x%08lx\n (pid %d, comm %s)\n", + function, (long)perfctr, (long)shadow->magic2, current->pid, current->comm); + return NULL; + } + return perfctr; +} + +#ifdef CONFIG_PERFCTR_MODULE +EXPORT_SYMBOL(_vperfctr_set_thread); +EXPORT_SYMBOL(__vperfctr_get_thread); +#endif + +#endif /* DEBUG && VIRTUAL */ + +#ifdef CONFIG_PERFCTR_MODULE +EXPORT_SYMBOL(mmu_cr4_features); +EXPORT_SYMBOL(cpu_khz); + +#ifdef NMI_LOCAL_APIC +EXPORT_SYMBOL(nmi_perfctr_msr); + +#ifdef CONFIG_PM +EXPORT_SYMBOL(apic_pm_register); +EXPORT_SYMBOL(apic_pm_unregister); +EXPORT_SYMBOL(nmi_pmdev); +#endif /* CONFIG_PM */ + +#endif /* NMI_LOCAL_APIC */ + +#if PERFCTR_INTERRUPT_SUPPORT +EXPORT_SYMBOL(perfctr_cpu_set_ihandler); +#endif /* PERFCTR_INTERRUPT_SUPPORT */ + +#endif /* MODULE */ diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/x86_tests.c linux-2.4.9-wt1/drivers/perfctr/x86_tests.c --- linux-2.4.9-wt1-2l/drivers/perfctr/x86_tests.c Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/x86_tests.c Sun Aug 26 12:16:32 2001 @@ -0,0 +1,212 @@ +/* $Id: x86_tests.c,v 1.10 2001/08/26 10:16:32 mikpe Exp $ + * Performance-monitoring counters driver. + * Optional x86-specific init-time tests. + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include "compat.h" +#include "x86_compat.h" +#include "x86_tests.h" + +#define MSR_P5_CESR 0x11 +#define MSR_P5_CTR0 0x12 +#define MSR_P6_PERFCTR0 0xC1 +#define MSR_P6_EVNTSEL0 0x186 +#define MSR_K7_EVNTSEL0 0xC0010000 +#define MSR_K7_PERFCTR0 0xC0010004 + +#define NITER 64 +#define X2(S) S";"S +#define X8(S) X2(X2(X2(S))) + +static void __init do_rdpmc(unsigned unused1, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("rdpmc") : : "c"(1) : "eax", "edx"); +} + +static void __init do_rdmsr(unsigned msr, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("rdmsr") : : "c"(msr) : "eax", "edx"); +} + +static void __init do_wrmsr(unsigned msr, unsigned data) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("wrmsr") : : "c"(msr), "a"(data), "d"(0)); +} + +static void __init do_rdcr4(unsigned unused1, unsigned unused2) +{ + unsigned i; + unsigned dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("movl %%cr4,%0") : "=r"(dummy)); +} + +static void __init do_wrcr4(unsigned cr4, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("movl %0,%%cr4") : : "r"(cr4)); +} + +static void __init do_rdtsc(unsigned unused1, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("rdtsc") : : : "eax", "edx"); +} + +static void __init do_empty_loop(unsigned unused1, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__("" : : "c"(0)); +} + +static unsigned __init run(void (*doit)(unsigned, unsigned), + unsigned arg1, unsigned arg2) +{ + unsigned start, dummy, stop; + rdtsc(start, dummy); + (*doit)(arg1, arg2); /* should take < 2^32 cycles to complete */ + rdtsc(stop, dummy); + return stop - start; +} + +static void __init init_tests_message(void) +{ + printk(KERN_INFO "Please email the following PERFCTR INIT lines " + "to mikpe@csd.uu.se\n" + KERN_INFO "To remove this message, rebuild the driver " + "with CONFIG_PERFCTR_INIT_TESTS=n\n"); + printk(KERN_INFO "PERFCTR INIT: vendor %u, family %u, model %u, stepping %u, clock %lu kHz\n", + boot_cpu_data.x86_vendor, + boot_cpu_data.x86, + boot_cpu_data.x86_model, + boot_cpu_data.x86_mask, + cpu_khz); +} + +static void __init +measure_overheads(unsigned msr_evntsel0, unsigned evntsel0, unsigned msr_perfctr0) +{ + int i; + unsigned int loop, ticks[8]; + const char *name[8]; + + name[0] = "rdtsc"; + ticks[0] = run(do_rdtsc, 0, 0); + name[1] = "rdpmc"; + ticks[1] = (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) + ? run(do_rdpmc,0,0) : 0; + name[2] = "rdmsr (counter)"; + ticks[2] = run(do_rdmsr, msr_perfctr0, 0); + name[3] = "rdmsr (evntsel)"; + ticks[3] = run(do_rdmsr, msr_evntsel0, 0); + name[4] = "wrmsr (counter)"; + ticks[4] = run(do_wrmsr, msr_perfctr0, 0); + name[5] = "wrmsr (evntsel)"; + ticks[5] = run(do_wrmsr, msr_evntsel0, evntsel0); + name[6] = "read cr4"; + ticks[6] = run(do_rdcr4, 0, 0); + name[7] = "write cr4"; + ticks[7] = run(do_wrcr4, read_cr4(), 0); + + loop = run(do_empty_loop, 0, 0); + + wrmsr(msr_evntsel0, 0, 0); + + init_tests_message(); + printk(KERN_INFO "PERFCTR INIT: NITER == %u\n", NITER); + printk(KERN_INFO "PERFCTR INIT: loop overhead is %u cycles\n", loop); + for(i = 0; i < ARRAY_SIZE(ticks); ++i) { + unsigned int x; + if( !ticks[i] ) + continue; + x = ((ticks[i] - loop) * 10) / NITER; + printk(KERN_INFO "PERFCTR INIT: %s cost is %u.%u cycles (%u total)\n", + name[i], x/10, x%10, ticks[i]); + } +} + +void __init perfctr_p5_init_tests(void) +{ + unsigned evnt = 0x16 | (3 << 6); + measure_overheads(MSR_P5_CESR, evnt, MSR_P5_CTR0); +} + +void __init perfctr_p6_init_tests(void) +{ + unsigned evnt = 0xC0 | (3 << 16) | (1 << 22); + measure_overheads(MSR_P6_EVNTSEL0, evnt, MSR_P6_PERFCTR0); +} + +void __init perfctr_k7_init_tests(void) +{ + unsigned evnt = 0xC0 | (3 << 16) | (1 << 22); + measure_overheads(MSR_K7_EVNTSEL0, evnt, MSR_K7_PERFCTR0); +} + +#if !defined(CONFIG_X86_TSC) +void __init perfctr_c6_init_tests(void) +{ + unsigned int cesr, dummy; + + rdmsr(MSR_P5_CESR, cesr, dummy); + init_tests_message(); + printk(KERN_INFO "PERFCTR INIT: boot CESR == %#08x\n", cesr); +} +#endif + +static unsigned __init vc3_test(unsigned int mask) +{ + unsigned evntsel, before, after, dummy; + + evntsel = 0xC0 | (mask & ~0x1FF); + wrmsr(MSR_P6_EVNTSEL0+1, evntsel, 0); + rdpmc(1, before, dummy); + do_empty_loop(0, 0); + rdpmc(1, after, dummy); + return after - before; +} + +void __init perfctr_vc3_init_tests(void) +{ + unsigned int evntsel0, evntsel1, dummy; + unsigned int test0, test1; + + rdmsr(MSR_P6_EVNTSEL0+0, evntsel0, dummy); + rdmsr(MSR_P6_EVNTSEL0+1, evntsel1, dummy); + init_tests_message(); + printk(KERN_INFO "PERFCTR INIT: VIA C3: boot EVNTSEL0 == %#08x\n", + evntsel0); + printk(KERN_INFO "PERFCTR INIT: VIA C3: boot EVNTSEL1 == %#08x\n", + evntsel1); + /* check if the reserved bits can be toggled */ + wrmsr(MSR_P6_EVNTSEL0+1, (~evntsel1 & ~0x1FF), 0); + rdmsr(MSR_P6_EVNTSEL0+1, evntsel0, dummy); + printk(KERN_INFO "PERFCTR INIT: VIA C3: EVNTSEL1 after toggle == %#08x\n", + evntsel0); + /* check if the reserved bits should be preserved */ + test0 = vc3_test(evntsel1); + test1 = vc3_test(0); + printk(KERN_INFO "PERFCTR INIT: VIA C3: test0 == %u\n", + test0); + printk(KERN_INFO "PERFCTR INIT: VIA C3: test1 == %u\n", + test1); + /* restore the reserved bits, just in case */ + wrmsr(MSR_P6_EVNTSEL0+1, evntsel1, 0); +} diff -urN linux-2.4.9-wt1-2l/drivers/perfctr/x86_tests.h linux-2.4.9-wt1/drivers/perfctr/x86_tests.h --- linux-2.4.9-wt1-2l/drivers/perfctr/x86_tests.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/drivers/perfctr/x86_tests.h Tue Apr 17 01:16:59 2001 @@ -0,0 +1,21 @@ +/* $Id: x86_tests.h,v 1.3 2001/04/16 23:16:59 mikpe Exp $ + * Performance-monitoring counters driver. + * Optional x86-specific init-time tests. + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ + +#ifdef CONFIG_PERFCTR_INIT_TESTS +extern void perfctr_p5_init_tests(void); +extern void perfctr_p6_init_tests(void); +extern void perfctr_k7_init_tests(void); +extern void perfctr_c6_init_tests(void); +extern void perfctr_vc3_init_tests(void); +#else +#define perfctr_p5_init_tests() +#define perfctr_p6_init_tests() +#define perfctr_k7_init_tests() +#define perfctr_c6_init_tests() +#define perfctr_vc3_init_tests() +#endif +#define perfctr_mii_init_tests() perfctr_p5_init_tests() diff -urN linux-2.4.9-wt1-2l/include/asm-i386/perfctr.h linux-2.4.9-wt1/include/asm-i386/perfctr.h --- linux-2.4.9-wt1-2l/include/asm-i386/perfctr.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/include/asm-i386/perfctr.h Tue Aug 28 17:01:45 2001 @@ -0,0 +1,138 @@ +/* $Id: perfctr.h,v 1.15 2001/08/28 15:01:45 mikpe Exp $ + * x86 Performance-Monitoring Counters driver + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ +#ifndef _ASM_I386_PERFCTR_H +#define _ASM_I386_PERFCTR_H + +struct perfctr_sum_ctrs { + unsigned long long tsc; + unsigned long long pmc[18]; +}; + +struct perfctr_low_ctrs { + unsigned int tsc; + unsigned int pmc[18]; +}; + +struct perfctr_cpu_control { + unsigned int tsc_on; + unsigned int nractrs; /* # of a-mode counters */ + unsigned int nrictrs; /* # of i-mode counters */ + unsigned int pmc_map[18]; + unsigned int evntsel[18]; /* one per counter, even on P5 */ + unsigned int evntsel_aux[18]; /* e.g. P4 ESCR contents */ + int ireset[18]; /* <= 0, for i-mode counters */ +}; + +struct perfctr_cpu_state { + unsigned int cstatus; + union { + unsigned int p5_cesr; + unsigned int id; /* cache owner id */ + } k1; + struct perfctr_sum_ctrs sum; + struct perfctr_low_ctrs start; + struct perfctr_cpu_control control; + union { + unsigned int p4_escr_map[18]; + } k2; +}; + +/* `struct perfctr_cpu_state' binary layout version number */ +#define PERFCTR_CPU_STATE_MAGIC 0x0200 /* 2.0 */ + +/* cstatus is a re-encoding of control.tsc_on/nractrs/nrictrs + which should have less overhead in most cases */ + +static inline +unsigned int perfctr_mk_cstatus(unsigned int tsc_on, unsigned int nractrs, + unsigned int nrictrs) +{ + return (tsc_on<<31) | (nrictrs<<16) | ((nractrs+nrictrs)<<8) | nractrs; +} + +static inline unsigned int perfctr_cstatus_enabled(unsigned int cstatus) +{ + return cstatus; +} + +static inline int perfctr_cstatus_has_tsc(unsigned int cstatus) +{ + return (int)cstatus < 0; /* test and jump on sign */ +} + +static inline unsigned int perfctr_cstatus_nractrs(unsigned int cstatus) +{ + return cstatus & 0x7F; /* and with imm8 */ +} + +static inline unsigned int perfctr_cstatus_nrctrs(unsigned int cstatus) +{ + return (cstatus >> 8) & 0x7F; +} + +static inline unsigned int perfctr_cstatus_has_ictrs(unsigned int cstatus) +{ + return cstatus & (0x7F << 16); +} + +#ifdef __KERNEL__ + +#if defined(CONFIG_PERFCTR) || defined(CONFIG_PERFCTR_MODULE) + +/* Driver init/exit. */ +extern int perfctr_cpu_init(void); +extern void perfctr_cpu_exit(void); + +/* CPU type name. */ +extern char *perfctr_cpu_name[]; + +/* Hardware reservation. */ +extern const char *perfctr_cpu_reserve(const char *service); +extern void perfctr_cpu_release(const char *service); + +/* Check that the new control data is valid. + Update the driver's private control data. + Returns a negative error code if the control data is invalid. */ +extern int perfctr_cpu_update_control(struct perfctr_cpu_state *state); + +/* Read a-mode counters. Subtract from start and accumulate into sums. */ +extern void perfctr_cpu_suspend(struct perfctr_cpu_state *state); + +/* Write control registers. Read a-mode counters into start. */ +extern void perfctr_cpu_resume(struct perfctr_cpu_state *state); + +/* Perform an efficient combined suspend/resume operation. */ +extern void perfctr_cpu_sample(struct perfctr_cpu_state *state); + +typedef void (*perfctr_ihandler_t)(unsigned long pc); + +#ifdef CONFIG_X86_LOCAL_APIC +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) +#include +#include +struct hw_interrupt_type; +#include +#ifdef LOCAL_PERFCTR_VECTOR +#define PERFCTR_INTERRUPT_SUPPORT 1 +#endif +#endif +#endif + +#if PERFCTR_INTERRUPT_SUPPORT +extern unsigned int apic_lvtpc_irqs[]; +extern void perfctr_interrupt(void); +extern void perfctr_cpu_set_ihandler(perfctr_ihandler_t); +extern void perfctr_cpu_ireload(const struct perfctr_cpu_state*); +#else +static inline void perfctr_cpu_set_ihandler(perfctr_ihandler_t x) { } +#endif + +#endif /* CONFIG_PERFCTR */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_I386_PERFCTR_H */ diff -urN linux-2.4.9-wt1-2l/include/linux/perfctr.h linux-2.4.9-wt1/include/linux/perfctr.h --- linux-2.4.9-wt1-2l/include/linux/perfctr.h Thu Jan 1 01:00:00 1970 +++ linux-2.4.9-wt1/include/linux/perfctr.h Tue Aug 28 17:01:45 2001 @@ -0,0 +1,220 @@ +/* $Id: perfctr.h,v 1.24 2001/08/28 15:01:45 mikpe Exp $ + * Performance-Monitoring Counters driver + * + * Copyright (C) 1999-2001 Mikael Pettersson + */ +#ifndef _LINUX_PERFCTR_H +#define _LINUX_PERFCTR_H + +#include + +struct perfctr_info { + char version[32]; + unsigned char nrcpus; + unsigned char cpu_type; + unsigned char cpu_features; + unsigned long cpu_khz; +}; + +/* cpu_type values */ +#define PERFCTR_X86_GENERIC 0 /* any x86 with rdtsc */ +#define PERFCTR_X86_INTEL_P5 1 /* no rdpmc */ +#define PERFCTR_X86_INTEL_P5MMX 2 +#define PERFCTR_X86_INTEL_P6 3 +#define PERFCTR_X86_INTEL_PII 4 +#define PERFCTR_X86_INTEL_PIII 5 +#define PERFCTR_X86_CYRIX_MII 6 +#define PERFCTR_X86_WINCHIP_C6 7 /* no rdtsc */ +#define PERFCTR_X86_WINCHIP_2 8 /* no rdtsc */ +#define PERFCTR_X86_AMD_K7 9 +#define PERFCTR_X86_VIA_C3 10 /* no pmc0 */ +#define PERFCTR_X86_INTEL_P4 11 + +/* cpu_features flag bits */ +#define PERFCTR_FEATURE_RDPMC 0x01 +#define PERFCTR_FEATURE_RDTSC 0x02 +#define PERFCTR_FEATURE_PCINT 0x04 + +struct perfctr_ibuf_entry { + unsigned long pc; + unsigned int pmc; +}; + +/* user's view of mmap:ed virtual perfctr */ +struct vperfctr_state { + unsigned int magic; + int si_signo; + int si_code; + unsigned int ibuf_offset; + unsigned int ibuf_size; + struct perfctr_cpu_state cpu_state; +}; + +/* `struct vperfctr_state' binary layout version number */ +#define VPERFCTR_STATE_MAGIC 0x0200 /* 2.0 */ +#define VPERFCTR_MAGIC ((VPERFCTR_STATE_MAGIC<<16)|PERFCTR_CPU_STATE_MAGIC) + +/* parameter in VPERFCTR_CONTROL command */ +struct vperfctr_control { + int si_signo; + int si_code; + struct perfctr_cpu_control cpu_control; +}; + +/* parameter in GPERFCTR_CONTROL command */ +struct gperfctr_control { + unsigned long interval_usec; + unsigned int nrcpus; + struct perfctr_cpu_control cpu_control[1]; /* actually 'nrcpus' */ +}; + +/* returned by GPERFCTR_READ command */ +struct gperfctr_cpu_state { + struct perfctr_cpu_control cpu_control; + struct perfctr_sum_ctrs sum; +}; +struct gperfctr_state { + unsigned nrcpus; + struct gperfctr_cpu_state cpu_state[1]; /* actually 'nrcpus' */ +}; + +#include +#define _PERFCTR_IOCTL 0xD0 /* 'P'+128, currently unassigned */ + +#define PERFCTR_INFO _IOR(_PERFCTR_IOCTL,0,struct perfctr_info) + +#define VPERFCTR_SAMPLE _IO(_PERFCTR_IOCTL,1) +#define VPERFCTR_UNLINK _IO(_PERFCTR_IOCTL,2) +#define VPERFCTR_CONTROL _IOW(_PERFCTR_IOCTL,3,struct vperfctr_control) +#define VPERFCTR_STOP _IO(_PERFCTR_IOCTL,4) +#define VPERFCTR_IRESUME _IO(_PERFCTR_IOCTL,5) + +#define GPERFCTR_CONTROL _IOW(_PERFCTR_IOCTL,16,struct gperfctr_control) +#define GPERFCTR_READ _IOR(_PERFCTR_IOCTL,17,struct gperfctr_state) +#define GPERFCTR_STOP _IO(_PERFCTR_IOCTL,18) + +#ifdef __KERNEL__ + +extern struct perfctr_info perfctr_info; +extern int sys_perfctr_info(struct perfctr_info*); + +/* + * Virtual per-process performance-monitoring counters. + */ +struct vperfctr; /* opaque */ + +#ifdef CONFIG_PERFCTR_VIRTUAL + +/* process management operations */ +extern struct vperfctr *__vperfctr_copy(struct vperfctr*); +extern void __vperfctr_exit(struct vperfctr*); +extern void __vperfctr_suspend(struct vperfctr*); +extern void __vperfctr_resume(struct vperfctr*); +extern void __vperfctr_sample(struct vperfctr*); + +#ifdef CONFIG_PERFCTR_MODULE +extern struct vperfctr_stub { + void (*exit)(struct vperfctr*); + void (*suspend)(struct vperfctr*); + void (*resume)(struct vperfctr*); + void (*sample)(struct vperfctr*); + struct file_operations *file_ops; +} vperfctr_stub; +/* lock taken on module load/unload and ->file_ops access; + the process management operations don't take the lock + because the module is known to be loaded and in use */ +extern rwlock_t vperfctr_stub_lock; +#define _vperfctr_exit(x) vperfctr_stub.exit((x)) +#define _vperfctr_suspend(x) vperfctr_stub.suspend((x)) +#define _vperfctr_resume(x) vperfctr_stub.resume((x)) +#define _vperfctr_sample(x) vperfctr_stub.sample((x)) +#else /* !CONFIG_PERFCTR_MODULE */ +#define _vperfctr_exit(x) __vperfctr_exit((x)) +#define _vperfctr_suspend(x) __vperfctr_suspend((x)) +#define _vperfctr_resume(x) __vperfctr_resume((x)) +#define _vperfctr_sample(x) __vperfctr_sample((x)) +#endif /* CONFIG_PERFCTR_MODULE */ + +#ifdef CONFIG_PERFCTR_DEBUG + +extern void _vperfctr_set_thread(struct thread_struct*, struct vperfctr*); +extern struct vperfctr *__vperfctr_get_thread(const struct thread_struct*, + const char*); +#define _vperfctr_get_thread(thread) __vperfctr_get_thread((thread),__FUNCTION__) + +#else /* !CONFIG_PERFCTR_DEBUG */ + +static inline void _vperfctr_set_thread(struct thread_struct *thread, + struct vperfctr *perfctr) +{ + thread->perfctr = perfctr; +} + +static inline struct vperfctr * +_vperfctr_get_thread(const struct thread_struct *thread) +{ + return thread->perfctr; +} + +#endif /* CONFIG_PERFCTR_DEBUG */ + +static inline void perfctr_copy_thread(struct thread_struct *thread) +{ + _vperfctr_set_thread(thread, NULL); +} + +static inline void perfctr_exit_thread(struct thread_struct *thread) +{ + struct vperfctr *perfctr; + perfctr = _vperfctr_get_thread(thread); + if( perfctr ) { + _vperfctr_set_thread(thread, NULL); + _vperfctr_exit(perfctr); + } +} + +static inline void perfctr_suspend_thread(struct thread_struct *prev) +{ + struct vperfctr *perfctr; + perfctr = _vperfctr_get_thread(prev); + if( perfctr ) + _vperfctr_suspend(perfctr); +} + +/* PRE: next is current */ +static inline void perfctr_resume_thread(struct thread_struct *next) +{ + struct vperfctr *perfctr; + perfctr = _vperfctr_get_thread(next); + if( perfctr ) + _vperfctr_resume(perfctr); +} + +static inline void perfctr_sample_thread(struct thread_struct *thread) +{ +#ifdef CONFIG_SMP + struct vperfctr *perfctr; + perfctr = _vperfctr_get_thread(thread); + if( perfctr ) + _vperfctr_sample(perfctr); +#endif +} + +#define PERFCTR_PROC_PID_MODE (0 | S_IRUSR) +extern void perfctr_set_proc_pid_ops(struct inode *inode); +/* for 2.2: */ +extern struct inode_operations perfctr_proc_pid_inode_operations; + +#else /* !CONFIG_PERFCTR_VIRTUAL */ + +static inline void perfctr_copy_thread(struct thread_struct *t) { } +static inline void perfctr_exit_thread(struct thread_struct *t) { } +static inline void perfctr_suspend_thread(struct thread_struct *t) { } +static inline void perfctr_resume_thread(struct thread_struct *t) { } +static inline void perfctr_sample_thread(struct thread_struct *t) { } + +#endif /* CONFIG_PERFCTR_VIRTUAL */ + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_PERFCTR_H */