Find the answer to your Linux question:
Results 1 to 4 of 4
I managed to get a performance monitoring counter to run and generate local APIC interrupts on overflow. How do I register an interrupt handler for those interrupts?...
Enjoy an ad free experience by logging in. Not a member yet? Register.
  1. #1
    Just Joined!
    Join Date
    Oct 2013
    Posts
    4

    How to register performance counter overflow interrupt handler?


    I managed to get a performance monitoring counter to run and generate local APIC interrupts on overflow. How do I register an interrupt handler for those interrupts?

  2. #2
    Just Joined!
    Join Date
    Oct 2013
    Posts
    4
    OK to specify the problem and my current approach:

    I made a very simple char driver which starts a performance monitoring counter when initialized, by setting the appropriate MSRs. In theory, the driver also registers a handler for the performance counter overflow, which just does a printk.

    By checking the counter I can see that it is initialized properly and is incrementing in value, and that it overflows (by going from negative to positive values).

    Here is the code for the driver:

    Code:
    #include <linux/kernel.h>
    #include <linux/module.h>
    #include <linux/fs.h>
    #include <linux/device.h>
    #include <linux/cdev.h>
    #include <linux/kdebug.h>
    
    const char TAG[] = "Perf Driver :";
    
    // Device info
    static dev_t mdev;
    static struct cdev c_dev;
    static struct class *cl;
    
    // Model-Specific Registers
    static const u32 IA32_PERF_GLOBAL_CTRL        = 0x038F;
    static const u32 IA32_PERF_GLOBAL_OVF_CTRL	  = 0x0390;
    static const u32 IA32_PERF_FIXED_CTR_CTRL	  = 0x038D;
    static const u32 IA32_FIXED_CTR0		= 0x0309;
    static const u32 IA32_FIXED_CTR1		= 0x030A;
    static const u32 IA32_FIXED_CTR2		= 0x030B;
    static const u32 IA32_PMC0              = 0x00C1;
    static const u32 IA32_PMC1              = 0x00C2;
    static const u32 IA32_PMC2              = 0x00C3;
    static const u32 IA32_PMC3              = 0x00C4;
    static const u32 IA32_PERFEVTSEL0       = 0x0186;
    static const u32 IA32_PERFEVTSEL1       = 0x0187;
    static const u32 IA32_PERFEVTSEL2       = 0x0188;
    static const u32 IA32_PERFEVTSEL3       = 0x0189;
    
    static const u64 INST_RETIRED           = 0x00C0;
    
    // Valid entries for IA32_PERF_GLOBAL_OVF_CTRL
    static const u64 CLR_OVF_PMC0           = (u64) 0x1 << 0;
    static const u64 CLR_OVF_BUFFER         = (u64) 0x1 << 62;
    static const u64 CLR_PEBS_CNDCHANGED    = (u64) 0x1 << 63;
    
    // Valid entries for IA32_PERFEVTSELn
    static const u64 INT_ENABLE             = (u64) 0x1 << 20;
    static const u64 COUNTER_ENABLE         = (u64) 0x1 << 22;
    static const u64 USR_MODE               = (u64) 0x1 << 16;
    static const u64 OS_MODE                = (u64) 0x1 << 17;
    
    
    static void wrmsr64_safe_on_cpu(int cpu, u32 reg, u64 val)
    {
        wrmsr_safe_on_cpu(cpu, reg, 
                (u32)((u64)(unsigned long)val),
                (u32)((u64)(unsigned long)val >> 32));
    }
    
    static void clear_msrs(void)
    {
        int cpu;
        for_each_online_cpu(cpu)
        {
            wrmsr64_safe_on_cpu(cpu, IA32_PERF_GLOBAL_CTRL,       0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_PERF_GLOBAL_OVF_CTRL,   0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_PERF_FIXED_CTR_CTRL,    0x0);
    
            wrmsr64_safe_on_cpu(cpu, IA32_FIXED_CTR0,             0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_FIXED_CTR1,             0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_FIXED_CTR2,             0x0);
    
            wrmsr64_safe_on_cpu(cpu, IA32_PMC0,                   0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_PMC1,                   0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_PMC2,                   0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_PMC3,                   0x0);
    
            wrmsr64_safe_on_cpu(cpu, IA32_PERFEVTSEL0,            0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_PERFEVTSEL1,            0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_PERFEVTSEL2,            0x0);
            wrmsr64_safe_on_cpu(cpu, IA32_PERFEVTSEL3,            0x0);
        }
    }
    
    static void read_msrs(void)
    {
        int cpu;
        for_each_online_cpu(cpu)
        {
            u32 lo, hi;
            u64 val;
    
            rdmsr_safe_on_cpu(cpu, IA32_PMC0, &lo, &hi);
            val = ((u64) hi << 32) | (u64)lo;
    
            printk(KERN_INFO "%s IA32_PMC0 value : %llx\n", TAG, val);
        }
    }
    
    static void init_msr_counters(void)
    {
        int cpu;
        for_each_online_cpu(cpu)
        {
            u64 counterVal = (u64)(-999);
    
            // Clear overflow by setting bit in GLOBAL_OVF_CTRL
            wrmsr64_safe_on_cpu(cpu, 
                    IA32_PERF_GLOBAL_OVF_CTRL, 
                    CLR_OVF_PMC0
                    | CLR_OVF_BUFFER);
    
            // Program programmable counter 0
            wrmsr64_safe_on_cpu(cpu, 
                    IA32_PERFEVTSEL0, 
                    (u64)INST_RETIRED
                    | INT_ENABLE 
                    | COUNTER_ENABLE 
                    | USR_MODE); 
    
            // Reset PMC0 counter
            wrmsr64_safe_on_cpu(cpu, IA32_PMC0, counterVal);
        }
    }
    
    static void begin_sampling(void)
    {
        int cpu;
        for_each_online_cpu(cpu)
        {
            wrmsr64_safe_on_cpu(cpu, IA32_PERF_GLOBAL_CTRL, 0x1);
        }
    }
    
    static void end_sampling(void)
    {
        int cpu;
        for_each_online_cpu(cpu)
        {
            wrmsr64_safe_on_cpu(cpu, IA32_PERF_GLOBAL_CTRL, 0x0);
        }
    }
    
    //irq_handler_t PMC_handler(int irq, void *dev_id, struct pt_regs *regs)
    static int __kprobes PMC_handler(struct notifier_block *self,
            unsigned long cmd, void *__args)
    {
        printk(KERN_INFO "%s Got the PMC interrupt!!\n", TAG);
    
        apic_write(APIC_LVTPC,APIC_DM_NMI);
    
        return NOTIFY_STOP;
    }
    
    static __read_mostly struct notifier_block PMC_notifier = {
        .notifier_call          = PMC_handler,
        .next                   = NULL,
        .priority               = 0
    };
    
    static int init_PMC_handler(void)
    {
        apic_write(APIC_LVTPC,APIC_DM_NMI);
        register_die_notifier(&PMC_notifier);
    
        return 0;
    }
    
    static int perf_open(struct inode *i, struct file *f)
    {
        printk(KERN_INFO "%s open()\n", TAG);
        return 0;
    }
    
    static int perf_close(struct inode *i, struct file *f)
    {
        printk(KERN_INFO "%s close()\n", TAG);
        return 0;
    }
    
    static ssize_t perf_read(struct file *f, char __user *buf, size_t
            len, loff_t *off)
    {
        printk(KERN_INFO "%s read()\n", TAG);
        read_msrs();
        return 0;
    }
    
    static ssize_t perf_write(struct file *f, const char __user *buf,
            size_t len, loff_t *off)
    {
        printk(KERN_INFO "%s write()\n", TAG);
        return 0;
    }
    
    static void killall(void)
    {
        end_sampling();
        clear_msrs();
        unregister_die_notifier(&PMC_notifier);
    
        cdev_del(&c_dev);
        device_destroy(cl, mdev);
        class_destroy(cl);
        unregister_chrdev_region(mdev, 1);
    
        printk(KERN_INFO "%s device unregistered\n", TAG);
    }
    
    static struct file_operations pugs_fops =
    {
        .owner = THIS_MODULE,
        .open = perf_open,
        .release = perf_close,
        .read = perf_read,
        .write = perf_write
    };
    
    static int __init perf_init(void) /* Constructor */
    {
        if(alloc_chrdev_region(&mdev, 0, 1, "perf") < 0) 
        {
            return -1;
        }
    
        printk(KERN_INFO "%s device <%d,%d> registered\n", TAG, MAJOR(mdev), MINOR(mdev));
    
        if((cl = class_create(THIS_MODULE, "perf")) == NULL)
        {
            printk(KERN_ERR "%s could not create class!\n", TAG);
            unregister_chrdev_region(mdev, 1);
            return -1;
        }
    
        if(device_create(cl, NULL, mdev, NULL, "perf") == NULL)
        {
            printk(KERN_ERR "%s could not create device!\n", TAG);
            class_destroy(cl);
            unregister_chrdev_region(mdev, 1);
            return -1;
        }
    
        cdev_init(&c_dev, &pugs_fops);
    
        if(cdev_add(&c_dev, mdev, 1) == -1)
        {
            printk(KERN_ERR "%s could not add device!\n", TAG);
            device_destroy(cl, mdev);
            class_destroy(cl);
            unregister_chrdev_region(mdev, 1);
            return -1;
        }
    
        if(init_PMC_handler() != 0)
        {
            printk(KERN_ERR "%s failed to register NMI notifier!\n", TAG);
            killall();
            return -1;
        }
    
        clear_msrs();
        init_msr_counters();
        read_msrs();
        begin_sampling();
        
        printk(KERN_INFO "%s initialization successful\n", TAG);
    
        return 0;
    }
     
    static void __exit perf_exit(void) /* Destructor */
    {
        killall();
    }
     
    module_init(perf_init);
    module_exit(perf_exit);
     
    MODULE_LICENSE("GPL");
    MODULE_DESCRIPTION("perf counter driver");
    To register the interrupt handler, i'm using

    apic_write(APIC_LVTPC,APIC_DM_NMI); // sets the performance counter overflow to a non-maskable interrupt
    register_die_notifier(&PMC_notifier); // adds a notifier to the chain of events that occur after a NMI is encountered (?)

    which I found by digging up the code for perf on linux 2.6.

    And the output I'm getting:

    Code:
    [ 2303.059706] Perf Driver : device <251,0> registered
    [ 2303.060102] Perf Driver : IA32_PMC0 value : fffffffffc19
    [ 2303.060111] Perf Driver : IA32_PMC0 value : fffffffffc19
    [ 2303.060119] Perf Driver : IA32_PMC0 value : fffffffffc19
    [ 2303.060126] Perf Driver : IA32_PMC0 value : fffffffffc19
    [ 2303.060141] Perf Driver : initialization successful
    [ 2310.012613] Perf Driver : open()
    [ 2310.012645] Perf Driver : read()
    [ 2310.012655] Perf Driver : IA32_PMC0 value : a777273
    [ 2310.012658] Perf Driver : IA32_PMC0 value : 850062d
    [ 2310.012665] Perf Driver : IA32_PMC0 value : 2c1ddb7
    [ 2310.012676] Perf Driver : IA32_PMC0 value : 724f859
    [ 2310.012686] Perf Driver : close()
    You can see the initial value of the counter on each core is -999, or fffffffffc19. Later on, I read from the file and see the counter is positive for all cores.

    Am I correctly setting up the counter to generate LAPIC interrupts on overflow? If so, what am I missing to add a function which executes when the overflow is encountered?

  3. #3
    Just Joined!
    Join Date
    Oct 2013
    Posts
    4
    Still no luck, is there any more information I can/should include? Please let me know, thanks.

  4. #4
    Just Joined!
    Join Date
    Oct 2013
    Posts
    4
    I found the problem!

    Counter overflow is a local interrupt (triggered by LAPIC) so I had to set the NMI notifier to handle local NMI interrupts. Problem solved by changing this:

    Code:
    static __read_mostly struct notifier_block PMC_notifier = {
        .notifier_call          = PMC_handler,
        .next                   = NULL,
        .priority               = 0
    };
    to this:

    Code:
    static __read_mostly struct notifier_block PMC_notifier = {
        .notifier_call          = PMC_handler,
        .next                   = NULL,
        .priority               = NMI_LOCAL_LOW_PRIOR
    };
    If you're on a later kernel, just use register_nmi_handler(NMI_LOCAL, myHandler)

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •