Sophie

Sophie

distrib > Scientific%20Linux > 5x > x86_64 > by-pkgid > 27922b4260f65d317aabda37e42bbbff > files > 1856

kernel-2.6.18-238.el5.src.rpm

From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 15 Aug 2008 17:45:45 +0200
Subject: [misc] batch kprobe register/unregister
Message-id: 20080815174545.5f2cb459@psychotron.englab.brq.redhat.com
O-Subject: Re: [RHEL5.3 patch] BZ437579 batch kprobe unregister
Bugzilla: 437579
RH-Acked-by: Dave Anderson <anderson@redhat.com>
RH-Acked-by: Anton Arapov <aarapov@redhat.com>

BZ437579
https://bugzilla.redhat.com/show_bug.cgi?id=437579

Description
Added functions that can register/unregister array of
kprobes/jprobes/kretprobes.
Batch unregisters shorten the time for script shutdown from 1/HZ *
number-of-probes to a much smaller constant, by avoiding repeated sched_sync
operations in the middle.

Upstream status:
26b31c1908e02a316edfba08080373342e662c14
4a296e07c3a410c09b9155da4c2fa84a07964f38
9861668f747895608cea425f8457989d8dd2edf2
67dddaad5d8b8c5ee5b96a7e2f6cb0faad703865

Brew build:
http://brewweb.devel.redhat.com/brew/taskinfo?taskID=1358586

Test status:
Booted on x86_64 and tested by some systemtap scripts.
Also tested by kprobe smoke test from 2.6.26-rc5-mm3 (tests/test_kprobes.c).
It was slightly modified because current kretprobe implementation doesn't
have entry_handler.
"Kprobe smoke test passed successfully"

Jirka

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index a6a2208..a3b07bb 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -36,6 +36,11 @@ registration function such as register_kprobe() specifies where
 the probe is to be inserted and what handler is to be called when
 the probe is hit.
 
+There are also register_/unregister_*probes() functions for batch
+registration/unregistration of a group of *probes. These functions
+can speed up unregistration process when you have to unregister
+a lot of probes at once.
+
 The next three subsections explain how the different types of
 probes work.  They explain certain things that you'll need to
 know in order to make the best use of Kprobes -- e.g., the
@@ -163,9 +168,11 @@ code mapping.
 4. API Reference
 
 The Kprobes API includes a "register" function and an "unregister"
-function for each type of probe.  Here are terse, mini-man-page
-specifications for these functions and the associated probe handlers
-that you'll write.  See the latter half of this document for examples.
+function for each type of probe. The API also includes "register_*probes"
+and "unregister_*probes" functions for (un)registering arrays of probes.
+Here are terse, mini-man-page specifications for these functions and
+the associated probe handlers that you'll write. See the latter half 
+of this document for examples.
 
 4.1 register_kprobe
 
@@ -295,6 +302,39 @@ void unregister_kretprobe(struct kretprobe *rp);
 Removes the specified probe.  The unregister function can be called
 at any time after the probe has been registered.
 
+4.5 register_*probes
+
+#include <linux/kprobes.h>
+int register_kprobes(struct kprobe **kps, int num);
+int register_kretprobes(struct kretprobe **rps, int num);
+int register_jprobes(struct jprobe **jps, int num);
+
+Registers each of the num probes in the specified array.  If any
+error occurs during registration, all probes in the array, up to
+the bad probe, are safely unregistered before the register_*probes
+function returns.
+- kps/rps/jps: an array of pointers to *probe data structures
+- num: the number of the array entries.
+
+NOTE:
+You have to allocate(or define) an array of pointers and set all
+of the array entries before using these functions.
+
+4.6 unregister_*probes
+
+#include <linux/kprobes.h>
+void unregister_kprobes(struct kprobe **kps, int num);
+void unregister_kretprobes(struct kretprobe **rps, int num);
+void unregister_jprobes(struct jprobe **jps, int num);
+
+Removes each of the num probes in the specified array at once.
+
+NOTE:
+If the functions find some incorrect probes (ex. unregistered
+probes) in the specified array, they clear the addr field of those
+incorrect probes. However, other probes in the array are
+unregistered correctly.
+
 5. Kprobes Features and Limitations
 
 Kprobes allows multiple probes at the same address.  Currently,
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index e6b5e1e..a8156a5 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -911,10 +911,15 @@ static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg)
 	return;
 }
 
+unsigned long arch_deref_entry_point(void *entry)
+{
+	return ((struct fnptr *)entry)->ip;
+}
+
 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
-	unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
+	unsigned long addr = arch_deref_entry_point(jp->entry);
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	struct param_bsp_cfm pa;
 	int bytes;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9cb1f9d..4e94fcb 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -472,6 +472,13 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 	return ret;
 }
 
+#ifdef CONFIG_PPC64
+unsigned long arch_deref_entry_point(void *entry)
+{
+	return (unsigned long)(((func_descr_t *)entry)->entry);
+}
+#endif
+
 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
@@ -480,8 +487,10 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
 
 	/* setup return addr to the jprobe handler routine */
-	regs->nip = (unsigned long)(((func_descr_t *)jp->entry)->entry);
+	regs->nip = arch_deref_entry_point(jp->entry);
+#ifdef CONFIG_PPC64
 	regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
+#endif
 
 	return 1;
 }
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 5fd9323..3830a0c 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -196,14 +196,21 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void)
 
 int register_kprobe(struct kprobe *p);
 void unregister_kprobe(struct kprobe *p);
+int register_kprobes(struct kprobe **kps, int num);
+void unregister_kprobes(struct kprobe **kps, int num);
 int setjmp_pre_handler(struct kprobe *, struct pt_regs *);
 int longjmp_break_handler(struct kprobe *, struct pt_regs *);
 int register_jprobe(struct jprobe *p);
 void unregister_jprobe(struct jprobe *p);
+int register_jprobes(struct jprobe **jps, int num);
+void unregister_jprobes(struct jprobe **jps, int num);
 void jprobe_return(void);
+unsigned long arch_deref_entry_point(void *);
 
 int register_kretprobe(struct kretprobe *rp);
 void unregister_kretprobe(struct kretprobe *rp);
+int register_kretprobes(struct kretprobe **rps, int num);
+void unregister_kretprobes(struct kretprobe **rps, int num);
 
 struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp);
 void add_rp_inst(struct kretprobe_instance *ri);
@@ -223,16 +230,30 @@ static inline int register_kprobe(struct kprobe *p)
 {
 	return -ENOSYS;
 }
+static inline int register_kprobes(struct kprobe **kps, int num)
+{
+	return -ENOSYS;
+}
 static inline void unregister_kprobe(struct kprobe *p)
 {
 }
+static inline void unregister_kprobes(struct kprobe **kps, int num)
+{
+}
 static inline int register_jprobe(struct jprobe *p)
 {
 	return -ENOSYS;
 }
+static inline int register_jprobes(struct jprobe **jps, int num)
+{
+	return -ENOSYS;
+}
 static inline void unregister_jprobe(struct jprobe *p)
 {
 }
+static inline void unregister_jprobes(struct jprobe **jps, int num)
+{
+}
 static inline void jprobe_return(void)
 {
 }
@@ -240,9 +261,16 @@ static inline int register_kretprobe(struct kretprobe *rp)
 {
 	return -ENOSYS;
 }
+static inline int register_kretprobes(struct kretprobe **rps, int num)
+{
+	return -ENOSYS;
+}
 static inline void unregister_kretprobe(struct kretprobe *rp)
 {
 }
+static inline void unregister_kretprobes(struct kretprobe **rps, int num)
+{
+}
 static inline void kprobe_flush_task(struct task_struct *tk)
 {
 }
diff --git a/include/linux/list.h b/include/linux/list.h
index 0d114ba..f96bd38 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -318,6 +318,15 @@ static inline int list_empty_careful(const struct list_head *head)
 	return (next == head) && (next == head->prev);
 }
 
+/**
+ * list_is_singular - tests whether a list has just one entry.
+ * @head: the list to test.
+ */
+static inline int list_is_singular(const struct list_head *head)
+{
+	return !list_empty(head) && (head->next == head->prev);
+}
+
 static inline void __list_splice(struct list_head *list,
 				 struct list_head *head)
 {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 419dab6..d744725 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -289,17 +289,6 @@ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp)
 }
 
 /* Called with kretprobe_lock held */
-static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe
-							      *rp)
-{
-	struct hlist_node *node;
-	struct kretprobe_instance *ri;
-	hlist_for_each_entry(ri, node, &rp->used_instances, uflist)
-		return ri;
-	return NULL;
-}
-
-/* Called with kretprobe_lock held */
 void __kprobes add_rp_inst(struct kretprobe_instance *ri)
 {
 	/*
@@ -370,6 +359,21 @@ static inline void free_rp_inst(struct kretprobe *rp)
 	}
 }
 
+static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
+{
+	unsigned long flags;
+	struct kretprobe_instance *ri;
+	struct hlist_node *pos, *next;
+	/* No race here */
+	spin_lock_irqsave(&kretprobe_lock, flags);
+	hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
+		ri->rp = NULL;
+		hlist_del(&ri->uflist);
+	}
+	spin_unlock_irqrestore(&kretprobe_lock, flags);
+	free_rp_inst(rp);
+}
+
 /*
  * Keep all fields in the kprobe consistent
  */
@@ -494,6 +498,7 @@ static int __kprobes __register_kprobe(struct kprobe *p,
 	}
 
 	p->nmissed = 0;
+	INIT_LIST_HEAD(&p->list);
 	mutex_lock(&kprobe_mutex);
 	old_p = get_kprobe(p->addr);
 	if (old_p) {
@@ -524,73 +529,63 @@ out:
 	return ret;
 }
 
-int __kprobes register_kprobe(struct kprobe *p)
-{
-	return __register_kprobe(p,
-		(unsigned long)__builtin_return_address(0));
-}
-
-void __kprobes unregister_kprobe(struct kprobe *p)
+/*
+ * Unregister a kprobe without a scheduler synchronization.
+ */
+static int __kprobes __unregister_kprobe_top(struct kprobe *p)
 {
-	struct module *mod;
 	struct kprobe *old_p, *list_p;
-	int cleanup_p;
 
-	mutex_lock(&kprobe_mutex);
 	old_p = get_kprobe(p->addr);
-	if (unlikely(!old_p)) {
-		mutex_unlock(&kprobe_mutex);
-		return;
-	}
+	if (unlikely(!old_p))
+		return -EINVAL;
+
 	if (p != old_p) {
 		list_for_each_entry_rcu(list_p, &old_p->list, list)
 			if (list_p == p)
 			/* kprobe p is a valid probe */
 				goto valid_p;
-		mutex_unlock(&kprobe_mutex);
-		return;
+		return -EINVAL;
 	}
 valid_p:
 	if ((old_p == p) || ((old_p->pre_handler == aggr_pre_handler) &&
-		(p->list.next == &old_p->list) &&
-		(p->list.prev == &old_p->list))) {
+		list_is_singular(&old_p->list))) {
 		/* Only probe on the hash list */
 		arch_disarm_kprobe(p);
 		hlist_del_rcu(&old_p->hlist);
-		cleanup_p = 1;
 	} else {
+		if (p->break_handler)
+			old_p->break_handler = NULL;
+		if (p->post_handler) {
+			list_for_each_entry_rcu(list_p, &old_p->list, list) {
+				if ((list_p != p) && (list_p->post_handler))
+					goto noclean;
+			}
+			old_p->post_handler = NULL;
+		}
+noclean:
 		list_del_rcu(&p->list);
-		cleanup_p = 0;
 	}
+	return 0;
+}
 
-	mutex_unlock(&kprobe_mutex);
+static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
+{
+	struct module *mod;
+	struct kprobe *old_p;
 
-	synchronize_sched();
 	if (p->mod_refcounted &&
 	    (mod = module_text_address((unsigned long)p->addr)))
 		module_put(mod);
 
-	if (cleanup_p) {
-		if (p != old_p) {
-			list_del_rcu(&p->list);
+	if (list_empty(&p->list) || list_is_singular(&p->list)) {
+		if (!list_empty(&p->list)) {
+			/* "p" is the last child of an aggr_kprobe */
+			old_p = list_entry(p->list.next, struct kprobe, list);
+			list_del(&p->list);
 			kfree(old_p);
 		}
 		arch_remove_kprobe(p);
-	} else {
-		mutex_lock(&kprobe_mutex);
-		if (p->break_handler)
-			old_p->break_handler = NULL;
-		if (p->post_handler){
-			list_for_each_entry_rcu(list_p, &old_p->list, list){
-				if (list_p->post_handler){
-					cleanup_p = 2;
-					break;
-				}
-			}
-			if (cleanup_p == 0)
-				old_p->post_handler = NULL;
-		}
-		mutex_unlock(&kprobe_mutex);
 	}
 
 	/* Call unregister_page_fault_notifier()
@@ -604,25 +599,136 @@ valid_p:
 	return;
 }
 
+static int __register_kprobes(struct kprobe **kps, int num,
+		unsigned long called_from)
+{
+	int i, ret = 0;
+
+	if (num <= 0)
+		return -EINVAL;
+	for (i = 0; i < num; i++) {
+		ret = __register_kprobe(kps[i], called_from);
+		if (ret < 0) {
+			if (i > 0)
+				unregister_kprobes(kps, i);
+			break;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Registration and unregistration functions for kprobe.
+ */
+int __kprobes register_kprobe(struct kprobe *p)
+{
+	return __register_kprobes(&p, 1,
+			(unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_kprobe(struct kprobe *p)
+{
+	unregister_kprobes(&p, 1);
+}
+
+int __kprobes register_kprobes(struct kprobe **kps, int num)
+{
+	return __register_kprobes(kps, num,
+			(unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_kprobes(struct kprobe **kps, int num)
+{
+	int i;
+
+	if (num <= 0)
+		return;
+	mutex_lock(&kprobe_mutex);
+	for (i = 0; i < num; i++)
+		if (__unregister_kprobe_top(kps[i]) < 0)
+			kps[i]->addr = NULL;
+	mutex_unlock(&kprobe_mutex);
+
+	synchronize_sched();
+	for (i = 0; i < num; i++)
+		if (kps[i]->addr)
+			__unregister_kprobe_bottom(kps[i]);
+}
+
 static struct notifier_block kprobe_exceptions_nb = {
 	.notifier_call = kprobe_exceptions_notify,
 	.priority = 0x7fffffff /* we need to be notified first */
 };
 
+unsigned long __attribute__((weak)) arch_deref_entry_point(void *entry)
+{
+	return (unsigned long)entry;
+}
 
-int __kprobes register_jprobe(struct jprobe *jp)
+static int __register_jprobes(struct jprobe **jps, int num,
+	unsigned long called_from)
 {
-	/* Todo: Verify probepoint is a function entry point */
-	jp->kp.pre_handler = setjmp_pre_handler;
-	jp->kp.break_handler = longjmp_break_handler;
+	struct jprobe *jp;
+	int ret = 0, i;
 
-	return __register_kprobe(&jp->kp,
+	if (num <= 0)
+		return -EINVAL;
+	for (i = 0; i < num; i++) {
+		unsigned long addr;
+		jp = jps[i];
+		addr = arch_deref_entry_point(jp->entry);
+
+		if (!kernel_text_address(addr))
+			ret = -EINVAL;
+		else {
+			/* Todo: Verify probepoint is a function entry point */
+			jp->kp.pre_handler = setjmp_pre_handler;
+			jp->kp.break_handler = longjmp_break_handler;
+			ret = __register_kprobe(&jp->kp, called_from);
+		}
+		if (ret < 0) {
+			if (i > 0)
+				unregister_jprobes(jps, i);
+			break;
+		}
+	}
+	return ret;
+}
+
+int __kprobes register_jprobe(struct jprobe *jp)
+{
+	return __register_jprobes(&jp, 1,
 		(unsigned long)__builtin_return_address(0));
 }
 
 void __kprobes unregister_jprobe(struct jprobe *jp)
 {
-	unregister_kprobe(&jp->kp);
+	unregister_jprobes(&jp, 1);
+}
+
+int __kprobes register_jprobes(struct jprobe **jps, int num)
+{
+	return __register_jprobes(jps, num,
+		(unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_jprobes(struct jprobe **jps, int num)
+{
+	int i;
+
+	if (num <= 0)
+		return;
+	mutex_lock(&kprobe_mutex);
+	for (i = 0; i < num; i++)
+		if (__unregister_kprobe_top(&jps[i]->kp) < 0)
+			jps[i]->kp.addr = NULL;
+	mutex_unlock(&kprobe_mutex);
+
+	synchronize_sched();
+	for (i = 0; i < num; i++) {
+		if (jps[i]->kp.addr)
+			__unregister_kprobe_bottom(&jps[i]->kp);
+	}
 }
 
 #ifdef ARCH_SUPPORTS_KRETPROBES
@@ -644,7 +750,8 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 	return 0;
 }
 
-int __kprobes register_kretprobe(struct kretprobe *rp)
+static int __kprobes __register_kretprobe(struct kretprobe *rp,
+					  unsigned long called_from)
 {
 	int ret = 0;
 	struct kretprobe_instance *inst;
@@ -689,12 +796,68 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 
 	rp->nmissed = 0;
 	/* Establish function entry probe point */
-	if ((ret = __register_kprobe(&rp->kp,
-		(unsigned long)__builtin_return_address(0))) != 0)
+	ret = __register_kprobe(&rp->kp, called_from);
+	if (ret != 0)
 		free_rp_inst(rp);
 	return ret;
 }
 
+static int __register_kretprobes(struct kretprobe **rps, int num,
+		unsigned long called_from)
+{
+	int ret = 0, i;
+
+	if (num <= 0)
+		return -EINVAL;
+	for (i = 0; i < num; i++) {
+		ret = __register_kretprobe(rps[i], called_from);
+		if (ret < 0) {
+			if (i > 0)
+				unregister_kretprobes(rps, i);
+			break;
+		}
+	}
+	return ret;
+}
+
+int __kprobes register_kretprobe(struct kretprobe *rp)
+{
+	return __register_kretprobes(&rp, 1,
+			(unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_kretprobe(struct kretprobe *rp)
+{
+	unregister_kretprobes(&rp, 1);
+}
+
+int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+{
+	return __register_kretprobes(rps, num,
+			(unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+{
+	int i;
+
+	if (num <= 0)
+		return;
+	mutex_lock(&kprobe_mutex);
+	for (i = 0; i < num; i++)
+		if (__unregister_kprobe_top(&rps[i]->kp) < 0)
+			rps[i]->kp.addr = NULL;
+	mutex_unlock(&kprobe_mutex);
+
+	synchronize_sched();
+	for (i = 0; i < num; i++) {
+		if (rps[i]->kp.addr) {
+			__unregister_kprobe_bottom(&rps[i]->kp);
+			cleanup_rp_inst(rps[i]);
+		}
+	}
+}
+
 #else /* ARCH_SUPPORTS_KRETPROBES */
 
 int __kprobes register_kretprobe(struct kretprobe *rp)
@@ -702,24 +865,26 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 	return -ENOSYS;
 }
 
-#endif /* ARCH_SUPPORTS_KRETPROBES */
-
+int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+{
+		return -ENOSYS;
+}
 void __kprobes unregister_kretprobe(struct kretprobe *rp)
 {
-	unsigned long flags;
-	struct kretprobe_instance *ri;
+}
 
-	unregister_kprobe(&rp->kp);
-	/* No race here */
-	spin_lock_irqsave(&kretprobe_lock, flags);
-	while ((ri = get_used_rp_inst(rp)) != NULL) {
-		ri->rp = NULL;
-		hlist_del(&ri->uflist);
-	}
-	spin_unlock_irqrestore(&kretprobe_lock, flags);
-	free_rp_inst(rp);
+void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+{
+}
+
+static int __kprobes pre_handler_kretprobe(struct kprobe *p,
+							   struct pt_regs *regs)
+{
+		return 0;
 }
 
+#endif /* ARCH_SUPPORTS_KRETPROBES */
+
 static int __init init_kprobes(void)
 {
 	int i, err = 0;
@@ -755,9 +920,14 @@ __initcall(init_kprobes);
 
 EXPORT_SYMBOL_GPL(register_kprobe);
 EXPORT_SYMBOL_GPL(unregister_kprobe);
+EXPORT_SYMBOL_GPL(register_kprobes);
+EXPORT_SYMBOL_GPL(unregister_kprobes);
 EXPORT_SYMBOL_GPL(register_jprobe);
 EXPORT_SYMBOL_GPL(unregister_jprobe);
+EXPORT_SYMBOL_GPL(register_jprobes);
+EXPORT_SYMBOL_GPL(unregister_jprobes);
 EXPORT_SYMBOL_GPL(jprobe_return);
 EXPORT_SYMBOL_GPL(register_kretprobe);
 EXPORT_SYMBOL_GPL(unregister_kretprobe);
-
+EXPORT_SYMBOL_GPL(register_kretprobes);
+EXPORT_SYMBOL_GPL(unregister_kretprobes);