Blob Blame History Raw
diff --git a/Makefile.am b/Makefile.am
index 6c299d8..a00c8bf 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -24,7 +24,10 @@ libltrace_la_SOURCES = \
 	output.c \
 	proc.c \
 	read_config_file.c  \
-	summary.c
+	summary.c \
+	library.c \
+	filter.c \
+	glob.c
 
 libltrace_la_LIBADD = \
 	$(libelf_LIBS) \
@@ -56,7 +59,10 @@ noinst_HEADERS = \
 	ltrace.h \
 	options.h \
 	output.h \
-	read_config_file.h
+	read_config_file.h \
+	library.h \
+	filter.h \
+	glob.h
 
 dist_man1_MANS = \
 	ltrace.1
diff --git a/breakpoint.h b/breakpoint.h
index ce6f501..0398072 100644
--- a/breakpoint.h
+++ b/breakpoint.h
@@ -33,74 +33,106 @@
  * enable.  Realized tracepoints enable breakpoints, which are a
  * low-level realization of high-level tracepoint.
  *
- * Tracepoints are provided by the main binary as well as by any
- * opened libraries: every time an ELF file is mapped into the address
- * space, a new set of tracepoints is extracted, and filtered
- * according to user settings.  Those tracepoints that are left are
- * then realized, and the tracing starts.
- *
- * A scheme like this would take care of gradually introducing
- * breakpoints when the library is mapped, and therefore ready, and
- * would avoid certain hacks.  For example on PPC64, we don't actually
- * add breakpoints to PLT.  Instead, we read the PLT (which contains
- * addresses, not code), to figure out where to put the breakpoints.
- * In prelinked code, that address is non-zero, and points to an
- * address that's not yet mapped.  ptrace then fails when we try to
- * add the breakpoint.
- *
- * Ideally, return breakpoints would be just a special kind of
- * tracepoint that has attached some magic.  Or a feature of a
- * tracepoint.  Service breakpoints like the handling of dlopen would
- * be a low-level breakpoint, likely without tracepoint attached.
+ * Service breakpoints like the handling of dlopen would be a
+ * low-level breakpoint, likely without tracepoint attached.
  *
  * So that's for sometimes.
  */
 
-#include "arch.h"
+#include "sysdep.h"
+#include "library.h"
 
 struct Process;
 struct breakpoint;
 
 struct bp_callbacks {
-	void (*on_hit) (struct breakpoint *bp, struct Process *proc);
-	void (*on_destroy) (struct breakpoint *bp);
+	void (*on_hit)(struct breakpoint *bp, struct Process *proc);
+	void (*on_continue)(struct breakpoint *bp, struct Process *proc);
+	void (*on_retract)(struct breakpoint *bp, struct Process *proc);
 };
 
 struct breakpoint {
 	struct bp_callbacks *cbs;
+	struct library_symbol *libsym;
 	void *addr;
 	unsigned char orig_value[BREAKPOINT_LENGTH];
 	int enabled;
-	struct library_symbol *libsym;
-#ifdef __arm__
-	int thumb_mode;
-#endif
+	struct arch_breakpoint_data arch;
 };
 
 /* Call on-hit handler of BP, if any is set.  */
 void breakpoint_on_hit(struct breakpoint *bp, struct Process *proc);
 
-/* Call on-destroy handler of BP, if any is set.  */
-void breakpoint_on_destroy(struct breakpoint *bp);
+/* Call on-continue handler of BP.  If none is set, call
+ * continue_after_breakpoint.  */
+void breakpoint_on_continue(struct breakpoint *bp, struct Process *proc);
+
+/* Call on-retract handler of BP, if any is set.  This should be
+ * called before the breakpoints are destroyed.  The reason for a
+ * separate interface is that breakpoint_destroy has to be callable
+ * without PROC.  ON_DISABLE might be useful as well, but that would
+ * be called every time we disable the breakpoint, which is too often
+ * (a breakpoint has to be disabled every time that we need to execute
+ * the instruction underneath it).  */
+void breakpoint_on_retract(struct breakpoint *bp, struct Process *proc);
+
+/* Initialize a breakpoint structure.  That doesn't actually realize
+ * the breakpoint.  The breakpoint is initially assumed to be
+ * disabled.  orig_value has to be set separately.  CBS may be
+ * NULL.  */
+int breakpoint_init(struct breakpoint *bp, struct Process *proc,
+		    target_address_t addr, struct library_symbol *libsym);
+
+/* Make a clone of breakpoint BP into the area of memory pointed to by
+ * RETP.  The original breakpoint was assigned to process OLD_PROC,
+ * the cloned breakpoint will be attached to process NEW_PROC.
+ * Returns 0 on success or a negative value on failure.  */
+int breakpoint_clone(struct breakpoint *retp, struct Process *new_proc,
+		     struct breakpoint *bp, struct Process *old_proc);
+
+/* Set callbacks.  If CBS is non-NULL, then BP->cbs shall be NULL.  */
+void breakpoint_set_callbacks(struct breakpoint *bp, struct bp_callbacks *cbs);
 
-/* This is actually three functions rolled in one:
- *  - breakpoint_init
- *  - proc_insert_breakpoint
- *  - breakpoint_enable
- * XXX I think it should be broken up somehow.  */
+/* Destroy a breakpoint structure.   */
+void breakpoint_destroy(struct breakpoint *bp);
+
+/* Call enable_breakpoint the first time it's called.  Returns 0 on
+ * success and a negative value on failure.  */
+int breakpoint_turn_on(struct breakpoint *bp, struct Process *proc);
+
+/* Call disable_breakpoint when turned off the same number of times
+ * that it was turned on.  Returns 0 on success and a negative value
+ * on failure.  */
+int breakpoint_turn_off(struct breakpoint *bp, struct Process *proc);
+
+/* Utility function that does what typically needs to be done when a
+ * breakpoint is to be inserted.  It checks whether there is another
+ * breakpoint in PROC->LEADER for given ADDR.  If not, it allocates
+ * memory for a new breakpoint on the heap, initializes it, and calls
+ * PROC_ADD_BREAKPOINT to add the newly-created breakpoint.  For newly
+ * added as well as preexisting breakpoints, it then calls
+ * BREAKPOINT_TURN_ON.  If anything fails, it cleans up and returns
+ * NULL.  Otherwise it returns the breakpoint for ADDR.  */
 struct breakpoint *insert_breakpoint(struct Process *proc, void *addr,
-				     struct library_symbol *libsym, int enable);
+				     struct library_symbol *libsym);
+
+/* Name of a symbol associated with BP.  May be NULL.  */
+const char *breakpoint_name(const struct breakpoint *bp);
 
-/* */
+/* A library that this breakpoint comes from.  May be NULL.  */
+struct library *breakpoint_library(const struct breakpoint *bp);
+
+/* Again, this seems to be several interfaces rolled into one:
+ *  - breakpoint_disable
+ *  - proc_remove_breakpoint
+ *  - breakpoint_destroy
+ * XXX */
 void delete_breakpoint(struct Process *proc, void *addr);
 
 /* XXX some of the following belongs to proc.h/proc.c.  */
 struct breakpoint *address2bpstruct(struct Process *proc, void *addr);
 void enable_all_breakpoints(struct Process *proc);
 void disable_all_breakpoints(struct Process *proc);
-int breakpoints_init(struct Process *proc, int enable);
-
-void reinitialize_breakpoints(struct Process *proc);
-
+int breakpoints_init(struct Process *proc);
 
 #endif /* BREAKPOINT_H */
diff --git a/breakpoints.c b/breakpoints.c
index 5713fe4..9536266 100644
--- a/breakpoints.c
+++ b/breakpoints.c
@@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
+#include <errno.h>
 
 #ifdef __powerpc__
 #include <sys/ptrace.h>
@@ -10,21 +11,52 @@
 
 #include "breakpoint.h"
 #include "common.h"
+#include "proc.h"
+#include "library.h"
+
+#ifndef ARCH_HAVE_TRANSLATE_ADDRESS
+int
+arch_translate_address_dyn(struct Process *proc,
+		       target_address_t addr, target_address_t *ret)
+{
+	*ret = addr;
+	return 0;
+}
+
+struct ltelf;
+int
+arch_translate_address(struct ltelf *lte,
+		       target_address_t addr, target_address_t *ret)
+{
+	*ret = addr;
+	return 0;
+}
+#endif
 
 void
 breakpoint_on_hit(struct breakpoint *bp, struct Process *proc)
 {
 	assert(bp != NULL);
 	if (bp->cbs != NULL && bp->cbs->on_hit != NULL)
-		(bp->cbs->on_hit) (bp, proc);
+		(bp->cbs->on_hit)(bp, proc);
+}
+
+void
+breakpoint_on_continue(struct breakpoint *bp, struct Process *proc)
+{
+	assert(bp != NULL);
+	if (bp->cbs != NULL && bp->cbs->on_continue != NULL)
+		(bp->cbs->on_continue)(bp, proc);
+	else
+		continue_after_breakpoint(proc, bp);
 }
 
 void
-breakpoint_on_destroy(struct breakpoint *bp)
+breakpoint_on_retract(struct breakpoint *bp, struct Process *proc)
 {
 	assert(bp != NULL);
-	if (bp->cbs != NULL && bp->cbs->on_destroy != NULL)
-		(bp->cbs->on_destroy) (bp);
+	if (bp->cbs != NULL && bp->cbs->on_retract != NULL)
+		(bp->cbs->on_retract)(bp, proc);
 }
 
 /*****************************************************************************/
@@ -39,52 +71,171 @@ address2bpstruct(Process *proc, void *addr)
 	return dict_find_entry(proc->breakpoints, addr);
 }
 
-struct breakpoint *
-insert_breakpoint(Process *proc, void *addr,
-		  struct library_symbol *libsym, int enable)
+#ifndef ARCH_HAVE_BREAKPOINT_DATA
+int
+arch_breakpoint_init(struct Process *proc, struct breakpoint *sbp)
 {
-	struct breakpoint *sbp;
+	return 0;
+}
 
-	Process * leader = proc->leader;
+void
+arch_breakpoint_destroy(struct breakpoint *sbp)
+{
+}
+
+int
+arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp)
+{
+	return 0;
+}
+#endif
+
+static void
+breakpoint_init_base(struct breakpoint *bp, struct Process *proc,
+		     target_address_t addr, struct library_symbol *libsym)
+{
+	bp->cbs = NULL;
+	bp->addr = addr;
+	memset(bp->orig_value, 0, sizeof(bp->orig_value));
+	bp->enabled = 0;
+	bp->libsym = libsym;
+}
+
+/* On second thought, I don't think we need PROC.  All the translation
+ * (arch_translate_address in particular) should be doable using
+ * static lookups of various sections in the ELF file.  We shouldn't
+ * need process for anything.  */
+int
+breakpoint_init(struct breakpoint *bp, struct Process *proc,
+		target_address_t addr, struct library_symbol *libsym)
+{
+	breakpoint_init_base(bp, proc, addr, libsym);
+	return arch_breakpoint_init(proc, bp);
+}
+
+void
+breakpoint_set_callbacks(struct breakpoint *bp, struct bp_callbacks *cbs)
+{
+	if (bp->cbs != NULL)
+		assert(bp->cbs == NULL);
+	bp->cbs = cbs;
+}
+
+void
+breakpoint_destroy(struct breakpoint *bp)
+{
+	if (bp == NULL)
+		return;
+	arch_breakpoint_destroy(bp);
+}
+
+struct find_symbol_data {
+	struct library_symbol *old_libsym;
+	struct library_symbol *found_libsym;
+};
+
+static enum callback_status
+find_sym_in_lib(struct Process *proc, struct library *lib, void *u)
+{
+	struct find_symbol_data *fs = u;
+	fs->found_libsym
+		= library_each_symbol(lib, NULL, library_symbol_equal_cb,
+				      fs->old_libsym);
+	return fs->found_libsym != NULL ? CBS_STOP : CBS_CONT;
+}
+
+int
+breakpoint_clone(struct breakpoint *retp, struct Process *new_proc,
+		 struct breakpoint *bp, struct Process *old_proc)
+{
+	/* Find library and symbol that this breakpoint was linked to.  */
+	struct library_symbol *libsym = bp->libsym;
+	struct library *lib = NULL;
+	if (libsym != NULL) {
+		struct find_symbol_data f_data = {
+			.old_libsym = libsym,
+		};
+		lib = proc_each_library(old_proc, NULL,
+					find_sym_in_lib, &f_data);
+		assert(lib != NULL);
+		libsym = f_data.found_libsym;
+	}
+
+	/* LIB and LIBSYM now hold the new library and symbol that
+	 * correspond to the original breakpoint.  Now we can do the
+	 * clone itself.  */
+	breakpoint_init_base(retp, new_proc, bp->addr, libsym);
+	memcpy(retp->orig_value, bp->orig_value, sizeof(bp->orig_value));
+	retp->enabled = bp->enabled;
+	if (arch_breakpoint_clone(retp, bp) < 0)
+		return -1;
+	breakpoint_set_callbacks(retp, bp->cbs);
+	return 0;
+}
+
+int
+breakpoint_turn_on(struct breakpoint *bp, struct Process *proc)
+{
+	bp->enabled++;
+	if (bp->enabled == 1) {
+		assert(proc->pid != 0);
+		enable_breakpoint(proc, bp);
+	}
+	return 0;
+}
+
+int
+breakpoint_turn_off(struct breakpoint *bp, struct Process *proc)
+{
+	bp->enabled--;
+	if (bp->enabled == 0)
+		disable_breakpoint(proc, bp);
+	assert(bp->enabled >= 0);
+	return 0;
+}
+
+struct breakpoint *
+insert_breakpoint(struct Process *proc, void *addr,
+		  struct library_symbol *libsym)
+{
+	Process *leader = proc->leader;
 
 	/* Only the group leader should be getting the breakpoints and
 	 * thus have ->breakpoint initialized.  */
 	assert(leader != NULL);
 	assert(leader->breakpoints != NULL);
 
-#ifdef __arm__
-	int thumb_mode = (int)addr & 1;
-	if (thumb_mode)
-		addr = (void *)((int)addr & ~1);
-#endif
-
-	debug(DEBUG_FUNCTION, "insert_breakpoint(pid=%d, addr=%p, symbol=%s)", proc->pid, addr, libsym ? libsym->name : "NULL");
-	debug(1, "symbol=%s, addr=%p", libsym?libsym->name:"(nil)", addr);
-
-	if (!addr)
-		return NULL;
+	debug(DEBUG_FUNCTION, "insert_breakpoint(pid=%d, addr=%p, symbol=%s)",
+	      proc->pid, addr, libsym ? libsym->name : "NULL");
 
-	if (libsym)
-		libsym->needs_init = 0;
+	assert(addr != 0);
 
-	sbp = dict_find_entry(leader->breakpoints, addr);
+	/* XXX what we need to do instead is have a list of
+	 * breakpoints that are enabled at this address.  The
+	 * following works if every breakpoint is the same and there's
+	 * no extra data, but that doesn't hold anymore.  For now it
+	 * will suffice, about the only realistic case where we need
+	 * to have more than one breakpoint per address is return from
+	 * a recursive library call.  */
+	struct breakpoint *sbp = dict_find_entry(leader->breakpoints, addr);
 	if (sbp == NULL) {
-		sbp = calloc(1, sizeof(*sbp));
-		if (sbp == NULL) {
-			return NULL;	/* TODO FIXME XXX: error_mem */
+		sbp = malloc(sizeof(*sbp));
+		if (sbp == NULL
+		    || breakpoint_init(sbp, proc, addr, libsym) < 0) {
+			free(sbp);
+			return NULL;
+		}
+		if (proc_add_breakpoint(leader, sbp) < 0) {
+		fail:
+			breakpoint_destroy(sbp);
+			free(sbp);
+			return NULL;
 		}
-		dict_enter(leader->breakpoints, addr, sbp);
-		sbp->addr = addr;
-		sbp->libsym = libsym;
 	}
-#ifdef __arm__
-	sbp->thumb_mode = thumb_mode | proc->thumb_mode;
-	proc->thumb_mode = 0;
-#endif
-	sbp->enabled++;
-	if (sbp->enabled == 1 && enable) {
-		assert(proc->pid != 0);
-		enable_breakpoint(proc, sbp);
+
+	if (breakpoint_turn_on(sbp, proc) < 0) {
+		proc_remove_breakpoint(leader, sbp);
+		goto fail;
 	}
 
 	return sbp;
@@ -93,23 +244,41 @@ insert_breakpoint(Process *proc, void *addr,
 void
 delete_breakpoint(Process *proc, void *addr)
 {
-	struct breakpoint *sbp;
-
 	debug(DEBUG_FUNCTION, "delete_breakpoint(pid=%d, addr=%p)", proc->pid, addr);
 
 	Process * leader = proc->leader;
 	assert(leader != NULL);
 
-	sbp = dict_find_entry(leader->breakpoints, addr);
-	assert(sbp);		/* FIXME: remove after debugging has been done. */
+	struct breakpoint *sbp = dict_find_entry(leader->breakpoints, addr);
+	assert(sbp != NULL);
 	/* This should only happen on out-of-memory conditions. */
 	if (sbp == NULL)
 		return;
 
-	sbp->enabled--;
-	if (sbp->enabled == 0)
-		disable_breakpoint(proc, sbp);
-	assert(sbp->enabled >= 0);
+	if (breakpoint_turn_off(sbp, proc) < 0) {
+		fprintf(stderr, "Couldn't turn off the breakpoint %s@%p\n",
+			breakpoint_name(sbp), sbp->addr);
+		return;
+	}
+	if (sbp->enabled == 0) {
+		proc_remove_breakpoint(leader, sbp);
+		breakpoint_destroy(sbp);
+		free(sbp);
+	}
+}
+
+const char *
+breakpoint_name(const struct breakpoint *bp)
+{
+	assert(bp != NULL);
+	return bp->libsym != NULL ? bp->libsym->name : NULL;
+}
+
+struct library *
+breakpoint_library(const struct breakpoint *bp)
+{
+	assert(bp != NULL);
+	return bp->libsym != NULL ? bp->libsym->lib : NULL;
 }
 
 static void
@@ -176,108 +345,97 @@ disable_all_breakpoints(Process *proc) {
 	dict_apply_to_all(proc->breakpoints, disable_bp_cb, proc);
 }
 
-static void
-free_bp_cb(void *addr, void *sbp, void *data) {
-	debug(DEBUG_FUNCTION, "free_bp_cb(sbp=%p)", sbp);
-	assert(sbp);
-	free(sbp);
-}
+/* XXX This is not currently properly supported.  On clone, this is
+ * just sliced.  Hopefully at the point that clone is done, this
+ * breakpoint is not necessary anymore.  If this use case ends up
+ * being important, we need to add a clone and destroy callbacks to
+ * breakpoints, and we should also probably drop arch_breakpoint_data
+ * so that we don't end up with two different customization mechanisms
+ * for one structure.  */
+struct entry_breakpoint {
+	struct breakpoint super;
+	target_address_t dyn_addr;
+};
 
 static void
-entry_callback_hit(struct breakpoint *bp, struct Process *proc)
+entry_breakpoint_on_hit(struct breakpoint *a, struct Process *proc)
 {
+	struct entry_breakpoint *bp = (void *)a;
 	if (proc == NULL || proc->leader == NULL)
 		return;
-	delete_breakpoint(proc, bp->addr); // xxx
-	reinitialize_breakpoints(proc->leader);
+	target_address_t dyn_addr = bp->dyn_addr;
+	delete_breakpoint(proc, bp->super.addr);
+	linkmap_init(proc, dyn_addr);
+	arch_dynlink_done(proc);
 }
 
 int
-breakpoints_init(Process *proc, int enable)
+entry_breakpoint_init(struct Process *proc,
+		      struct entry_breakpoint *bp, target_address_t addr,
+		      struct library *lib)
 {
-	debug(DEBUG_FUNCTION, "breakpoints_init(pid=%d)", proc->pid);
-	if (proc->breakpoints) {	/* let's remove that struct */
-		dict_apply_to_all(proc->breakpoints, free_bp_cb, NULL);
-		dict_clear(proc->breakpoints);
-		proc->breakpoints = NULL;
-	}
+	int err;
+	if ((err = breakpoint_init(&bp->super, proc, addr, NULL)) < 0)
+		return err;
 
-	/* Only the thread group leader should hold the breakpoints.
-	 * (N.B. PID may be set to 0 temporarily when called by
-	 * handle_exec).  */
-	assert(proc->leader == proc);
+	static struct bp_callbacks entry_callbacks = {
+		.on_hit = entry_breakpoint_on_hit,
+	};
+	bp->super.cbs = &entry_callbacks;
+	bp->dyn_addr = lib->dyn_addr;
+	return 0;
+}
 
-	proc->breakpoints = dict_init(dict_key2hash_int,
-				      dict_key_cmp_int);
+int
+breakpoints_init(Process *proc)
+{
+	debug(DEBUG_FUNCTION, "breakpoints_init(pid=%d)", proc->pid);
 
-	destroy_library_symbol_chain(proc->list_of_symbols);
-	proc->list_of_symbols = NULL;
+	/* XXX breakpoint dictionary should be initialized
+	 * outside.  Here we just put in breakpoints.  */
+	assert(proc->breakpoints != NULL);
 
-	GElf_Addr entry;
-	if (options.libcalls && proc->filename) {
-		proc->list_of_symbols = read_elf(proc, &entry);
-		if (proc->list_of_symbols == NULL) {
-		fail:
-			/* XXX leak breakpoints */
-			return -1;
-		}
+	/* Only the thread group leader should hold the breakpoints.  */
+	assert(proc->leader == proc);
 
-		if (opt_e) {
-			struct library_symbol **tmp1 = &proc->list_of_symbols;
-			while (*tmp1) {
-				struct opt_e_t *tmp2 = opt_e;
-				int keep = !opt_e_enable;
-
-				while (tmp2) {
-					if (!strcmp((*tmp1)->name,
-						    tmp2->name)) {
-						keep = opt_e_enable;
-					}
-					tmp2 = tmp2->next;
-				}
-				if (!keep) {
-					*tmp1 = (*tmp1)->next;
-				} else {
-					tmp1 = &((*tmp1)->next);
-				}
-			}
+	/* N.B. the following used to be conditional on this, and
+	 * maybe it still needs to be.  */
+	assert(proc->filename != NULL);
+
+	struct library *lib = ltelf_read_main_binary(proc, proc->filename);
+	struct entry_breakpoint *entry_bp = NULL;
+	int bp_state = 0;
+	int result = -1;
+	switch (lib != NULL) {
+	fail:
+		switch (bp_state) {
+		case 2:
+			proc_remove_library(proc, lib);
+			proc_remove_breakpoint(proc, &entry_bp->super);
+		case 1:
+			breakpoint_destroy(&entry_bp->super);
 		}
+		library_destroy(lib);
+		free(entry_bp);
+	case 0:
+		return result;
 	}
 
-	struct breakpoint *entry_bp
-		= insert_breakpoint(proc, (void *)(uintptr_t)entry, NULL, 1);
-	if (entry_bp == NULL) {
-		fprintf(stderr, "fail!\n");
+	entry_bp = malloc(sizeof(*entry_bp));
+	if (entry_bp == NULL
+	    || (result = entry_breakpoint_init(proc, entry_bp,
+					       lib->entry, lib)) < 0)
 		goto fail;
-	}
+	++bp_state;
 
-	static struct bp_callbacks entry_callbacks = {
-		.on_hit = entry_callback_hit,
-	};
-	entry_bp->cbs = &entry_callbacks;
+	if ((result = proc_add_breakpoint(proc, &entry_bp->super)) < 0)
+		goto fail;
+	++bp_state;
+
+	if ((result = breakpoint_turn_on(&entry_bp->super, proc)) < 0)
+		goto fail;
+	proc_add_library(proc, lib);
 
 	proc->callstack_depth = 0;
 	return 0;
 }
-
-void
-reinitialize_breakpoints(Process *proc) {
-	struct library_symbol *sym;
-
-	debug(DEBUG_FUNCTION, "reinitialize_breakpoints(pid=%d)", proc->pid);
-
-	sym = proc->list_of_symbols;
-
-	while (sym) {
-		if (sym->needs_init) {
-			insert_breakpoint(proc, sym2addr(proc, sym), sym, 1);
-			if (sym->needs_init && !sym->is_weak) {
-				fprintf(stderr,
-					"could not re-initialize breakpoint for \"%s\" in file \"%s\"\n",
-					sym->name, proc->filename);
-				exit(1);
-			}
-		}
-		sym = sym->next;
-	}
-}
diff --git a/common.h b/common.h
index ed618b5..04e095c 100644
--- a/common.h
+++ b/common.h
@@ -2,7 +2,4 @@
 #include <config.h>
-#if defined(HAVE_LIBUNWIND)
-#include <libunwind.h>
-#endif /* defined(HAVE_LIBUNWIND) */
 
 #include <sys/types.h>
 #include <sys/time.h>
@@ -17,6 +14,7 @@
 #include "debug.h"
 #include "ltrace-elf.h"
 #include "read_config_file.h"
+#include "proc.h"
 
 #if defined HAVE_LIBIBERTY || defined HAVE_LIBSUPC__
 # define USE_DEMANGLE
@@ -116,116 +114,9 @@ struct Function {
 	Function * next;
 };
 
-enum toplt {
-	LS_TOPLT_NONE = 0,	/* PLT not used for this symbol. */
-	LS_TOPLT_EXEC,		/* PLT for this symbol is executable. */
-	LS_TOPLT_POINT		/* PLT for this symbol is a non-executable. */
-};
-
 extern Function * list_of_functions;
 extern char *PLTs_initialized_by_here;
 
-struct library_symbol {
-	char * name;
-	void * enter_addr;
-	char needs_init;
-	enum toplt plt_type;
-	char is_weak;
-	struct library_symbol * next;
-};
-
-struct callstack_element {
-	union {
-		int syscall;
-		struct library_symbol * libfunc;
-	} c_un;
-	int is_syscall;
-	void * return_addr;
-	struct timeval time_spent;
-	void * arch_ptr;
-};
-
-#define MAX_CALLDEPTH 64
-
-typedef enum Process_State Process_State;
-enum Process_State {
-	STATE_ATTACHED = 0,
-	STATE_BEING_CREATED,
-	STATE_IGNORED  /* ignore this process (it's a fork and no -f was used) */
-};
-
-typedef struct Event_Handler Event_Handler;
-struct Event_Handler {
-	/* Event handler that overrides the default one.  Should
-	 * return NULL if the event was handled, otherwise the
-	 * returned event is passed to the default handler.  */
-	Event * (* on_event)(Event_Handler * self, Event * event);
-
-	/* Called when the event handler removal is requested.  */
-	void (* destroy)(Event_Handler * self);
-};
-
-/* XXX We would rather have this all organized a little differently,
- * have Process for the whole group and Task for what's there for
- * per-thread stuff.  But for now this is the less invasive way of
- * structuring it.  */
-struct Process {
-	Process_State state;
-	Process * parent;         /* needed by STATE_BEING_CREATED */
-	char * filename;
-	pid_t pid;
-
-	/* Dictionary of breakpoints (which is a mapping
-	 * address->breakpoint).  This is NULL for non-leader
-	 * processes.  */
-	Dict * breakpoints;
-
-	int mask_32bit;           /* 1 if 64-bit ltrace is tracing 32-bit process */
-	unsigned int personality;
-	int tracesysgood;         /* signal indicating a PTRACE_SYSCALL trap */
-
-	int callstack_depth;
-	struct callstack_element callstack[MAX_CALLDEPTH];
-	struct library_symbol * list_of_symbols;
-
-	int libdl_hooked;
-	/* Arch-dependent: */
-	void * debug;	/* arch-dep process debug struct */
-	long debug_state; /* arch-dep debug state */
-	void * instruction_pointer;
-	void * stack_pointer;      /* To get return addr, args... */
-	void * return_addr;
-	void * arch_ptr;
-	short e_machine;
-	short need_to_reinitialize_breakpoints;
-#ifdef __arm__
-	int thumb_mode;           /* ARM execution mode: 0: ARM, 1: Thumb */
-#endif
-
-#if defined(HAVE_LIBUNWIND)
-	/* libunwind address space */
-	unw_addr_space_t unwind_as;
-	void *unwind_priv;
-#endif /* defined(HAVE_LIBUNWIND) */
-
-	/* Set in leader.  */
-	Event_Handler * event_handler;
-
-
-	/**
-	 * Process chaining.
-	 **/
-	Process * next;
-
-	/* LEADER points to the leader thread of the POSIX.1 process.
-	   If X->LEADER == X, then X is the leader thread and the
-	   Process structures chained by NEXT represent other threads,
-	   up until, but not including, the next leader thread.
-	   LEADER may be NULL after the leader has already exited.  In
-	   that case this process is waiting to be collected.  */
-	Process * leader;
-};
-
 struct opt_c_struct {
 	int count;
 	struct timeval tv;
@@ -248,23 +139,6 @@ enum process_status {
 	ps_other,	/* Necessary other states can be added as needed.  */
 };
 
-enum pcb_status {
-	pcb_stop, /* The iteration should stop.  */
-	pcb_cont, /* The iteration should continue.  */
-};
-
-/* Process list  */
-extern Process * pid2proc(pid_t pid);
-extern void add_process(Process * proc);
-extern void remove_process(Process * proc);
-extern void change_process_leader(Process * proc, Process * leader);
-extern Process *each_process(Process * start,
-			     enum pcb_status (* cb)(Process * proc, void * data),
-			     void * data);
-extern Process *each_task(Process * start,
-			  enum pcb_status (* cb)(Process * proc, void * data),
-			  void * data);
-
 /* Events  */
 enum ecb_status {
 	ecb_cont, /* The iteration should continue.  */
@@ -279,31 +153,15 @@ extern Event * each_qd_event(enum ecb_status (* cb)(Event * event, void * data),
 extern void enque_event(Event * event);
 extern void handle_event(Event * event);
 
-extern void install_event_handler(Process * proc, Event_Handler * handler);
-extern void destroy_event_handler(Process * proc);
-
 extern pid_t execute_program(const char * command, char ** argv);
 extern int display_arg(enum tof type, Process * proc, int arg_num, arg_type_info * info);
 extern void disable_all_breakpoints(Process * proc);
 
-extern Process * open_program(char * filename, pid_t pid, int init_breakpoints);
-extern void open_pid(pid_t pid);
 extern void show_summary(void);
 extern arg_type_info * lookup_prototype(enum arg_type at);
 
-extern int do_init_elf(struct ltelf *lte, const char *filename);
-extern void do_close_elf(struct ltelf *lte);
-extern int in_load_libraries(const char *name, struct ltelf *lte, size_t count, GElf_Sym *sym);
-extern struct library_symbol *library_symbols;
-extern void add_library_symbol(GElf_Addr addr, const char *name,
-		struct library_symbol **library_symbolspp,
-		enum toplt type_of_plt, int is_weak);
-
-extern struct library_symbol * clone_library_symbol(struct library_symbol * s);
-extern void destroy_library_symbol(struct library_symbol * s);
-extern void destroy_library_symbol_chain(struct library_symbol * chain);
-
 struct breakpoint;
+struct library_symbol;
 
 /* Arch-dependent stuff: */
 extern char * pid2name(pid_t pid);
@@ -311,8 +169,8 @@ extern pid_t process_leader(pid_t pid);
 extern int process_tasks(pid_t pid, pid_t **ret_tasks, size_t *ret_n);
 extern int process_stopped(pid_t pid);
 extern enum process_status process_status(pid_t pid);
-extern void trace_set_options(Process * proc, pid_t pid);
-extern void wait_for_proc(pid_t pid);
+extern void trace_set_options(struct Process *proc);
+extern int wait_for_proc(pid_t pid);
 extern void trace_me(void);
 extern int trace_pid(pid_t pid);
 extern void untrace_pid(pid_t pid);
@@ -322,13 +180,13 @@ extern void set_instruction_pointer(Process * proc, void * addr);
 extern void * get_stack_pointer(Process * proc);
 extern void * get_return_addr(Process * proc, void * stack_pointer);
 extern void set_return_addr(Process * proc, void * addr);
-extern void enable_breakpoint(Process * proc, struct breakpoint *sbp);
-extern void disable_breakpoint(Process * proc, struct breakpoint *sbp);
+extern void enable_breakpoint(struct Process *proc, struct breakpoint *sbp);
+extern void disable_breakpoint(struct Process *proc, struct breakpoint *sbp);
 extern int syscall_p(Process * proc, int status, int * sysnum);
 extern void continue_process(pid_t pid);
 extern void continue_after_signal(pid_t pid, int signum);
 extern void continue_after_syscall(Process *proc, int sysnum, int ret_p);
-extern void continue_after_breakpoint(Process * proc, struct breakpoint *sbp);
+extern void continue_after_breakpoint(struct Process *proc, struct breakpoint *sbp);
 extern void continue_after_vfork(Process * proc);
 extern long gimme_arg(enum tof type, Process * proc, int arg_num, arg_type_info * info);
 extern void save_register_args(enum tof type, Process * proc);
@@ -337,7 +195,7 @@ extern int umovelong (Process * proc, void * addr, long * result, arg_type_info
 extern size_t umovebytes (Process *proc, void * addr, void * laddr, size_t count);
 extern int ffcheck(void * maddr);
 extern void * sym2addr(Process *, struct library_symbol *);
-extern int linkmap_init(Process *, struct ltelf *);
+extern int linkmap_init(struct Process *proc, void *dyn_addr);
 extern void arch_check_dbg(Process *proc);
 extern int task_kill (pid_t pid, int sig);
 
@@ -357,9 +215,51 @@ void trace_fail_warning(pid_t pid);
  * If the call to OS_LTRACE_EXITING_SIGHANDLER didn't handle the
  * request, OS_LTRACE_EXITING is called when the next event is
  * generated.  Therefore it's called in "safe" context, without
- * re-entrancy concerns, but it's only called after an even is
+ * re-entrancy concerns, but it's only called after an event is
  * generated.  */
 int os_ltrace_exiting_sighandler(void);
 void os_ltrace_exiting(void);
 
-extern struct ltelf main_lte;
+int arch_elf_init(struct ltelf *lte, struct library *lib);
+void arch_elf_destroy(struct ltelf *lte);
+
+enum plt_status {
+	plt_fail,
+	plt_ok,
+	plt_default,
+};
+
+enum plt_status arch_elf_add_plt_entry(struct Process *p, struct ltelf *l,
+				       const char *n, GElf_Rela *r, size_t i,
+				       struct library_symbol **ret);
+
+int arch_breakpoint_init(struct Process *proc, struct breakpoint *sbp);
+void arch_breakpoint_destroy(struct breakpoint *sbp);
+int arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp);
+
+void arch_library_init(struct library *lib);
+void arch_library_destroy(struct library *lib);
+void arch_library_clone(struct library *retp, struct library *lib);
+
+int arch_library_symbol_init(struct library_symbol *libsym);
+void arch_library_symbol_destroy(struct library_symbol *libsym);
+int arch_library_symbol_clone(struct library_symbol *retp,
+			      struct library_symbol *libsym);
+
+int arch_process_init(struct Process *proc);
+void arch_process_destroy(struct Process *proc);
+int arch_process_clone(struct Process *retp, struct Process *proc);
+int arch_process_exec(struct Process *proc);
+
+typedef void *target_address_t;
+/* This should extract entry point address and interpreter (dynamic
+ * linker) bias if possible.  Returns 0 if there were no errors, -1
+ * otherwise.  Sets *ENTRYP and *INTERP_BIASP to non-zero values if
+ * the corresponding value is known.  Unknown values are set to 0.  */
+int process_get_entry(struct Process *proc,
+		      target_address_t *entryp,
+		      target_address_t *interp_biasp);
+
+/* This is called after the dynamic linker is done with the
+ * process startup.  */
+void arch_dynlink_done(struct Process *proc);
diff --git a/debug.c b/debug.c
index 1be873b..5fb9feb 100644
--- a/debug.c
+++ b/debug.c
@@ -16,6 +16,7 @@ debug_(int level, const char *file, int line, const char *fmt, ...) {
 	va_end(args);
 
 	output_line(NULL, "DEBUG: %s:%d: %s", file, line, buf);
+	fflush(options.output);
 }
 
 /*
diff --git a/defs.h b/defs.h
index dea000b..1eadb47 100644
--- a/defs.h
+++ b/defs.h
@@ -14,5 +14,3 @@
 #ifndef DEFAULT_ARRAYLEN
 #define DEFAULT_ARRAYLEN  4	/* default maximum # array elements */
 #endif				/* (-A switch) */
-
-#define MAX_LIBRARIES 200
diff --git a/dict.c b/dict.c
index ba318cd..b32ef8e 100644
--- a/dict.c
+++ b/dict.c
@@ -24,13 +24,14 @@ struct dict_entry {
 
 struct dict {
 	struct dict_entry *buckets[DICTTABLESIZE];
-	unsigned int (*key2hash) (void *);
-	int (*key_cmp) (void *, void *);
+	unsigned int (*key2hash) (const void *);
+	int (*key_cmp) (const void *, const void *);
 };
 
 Dict *
-dict_init(unsigned int (*key2hash) (void *),
-		       int (*key_cmp) (void *, void *)) {
+dict_init(unsigned int (*key2hash) (const void *),
+	  int (*key_cmp) (const void *, const void *))
+{
 	Dict *d;
 	int i;
 
@@ -103,7 +104,31 @@ dict_enter(Dict *d, void *key, void *value) {
 }
 
 void *
-dict_find_entry(Dict *d, void *key) {
+dict_remove(Dict *d, void *key)
+{
+	assert(d != NULL);
+	debug(DEBUG_FUNCTION, "dict_remove(%p)", key);
+
+	unsigned int hash = d->key2hash(key);
+	unsigned int bucketpos = hash % DICTTABLESIZE;
+
+	struct dict_entry **entryp;
+	for (entryp = &d->buckets[bucketpos]; (*entryp) != NULL;
+	     entryp = &(*entryp)->next) {
+		struct dict_entry *entry = *entryp;
+		if (hash != entry->hash)
+			continue;
+		if (d->key_cmp(key, entry->key) == 0) {
+			*entryp = entry->next;
+			return entry->value;
+		}
+	}
+	return NULL;
+}
+
+void *
+dict_find_entry(Dict *d, const void *key)
+{
 	unsigned int hash;
 	unsigned int bucketpos;
 	struct dict_entry *entry;
@@ -147,7 +172,8 @@ dict_apply_to_all(Dict *d,
 /*****************************************************************************/
 
 unsigned int
-dict_key2hash_string(void *key) {
+dict_key2hash_string(const void *key)
+{
 	const char *s = (const char *)key;
 	unsigned int total = 0, shift = 0;
 
@@ -163,19 +189,22 @@ dict_key2hash_string(void *key) {
 }
 
 int
-dict_key_cmp_string(void *key1, void *key2) {
+dict_key_cmp_string(const void *key1, const void *key2)
+{
 	assert(key1);
 	assert(key2);
 	return strcmp((const char *)key1, (const char *)key2);
 }
 
 unsigned int
-dict_key2hash_int(void *key) {
+dict_key2hash_int(const void *key)
+{
 	return (unsigned long)key;
 }
 
 int
-dict_key_cmp_int(void *key1, void *key2) {
+dict_key_cmp_int(const void *key1, const void *key2)
+{
 	return key1 - key2;
 }
 
diff --git a/dict.h b/dict.h
index 27dc7bf..f41011a 100644
--- a/dict.h
+++ b/dict.h
@@ -4,19 +4,22 @@
 
 typedef struct dict Dict;
 
-extern Dict *dict_init(unsigned int (*key2hash) (void *),
-			      int (*key_cmp) (void *, void *));
+extern Dict *dict_init(unsigned int (*key2hash) (const void *),
+		       int (*key_cmp) (const void *, const void *));
 extern void dict_clear(Dict *d);
 extern int dict_enter(Dict *d, void *key, void *value);
-extern void *dict_find_entry(Dict *d, void *key);
+extern void *dict_remove(Dict *d, void *key);
+extern void *dict_find_entry(Dict *d, const void *key);
 extern void dict_apply_to_all(Dict *d,
 			      void (*func) (void *key, void *value, void *data),
 			      void *data);
 
-extern unsigned int dict_key2hash_string(void *key);
-extern int dict_key_cmp_string(void *key1, void *key2);
-extern unsigned int dict_key2hash_int(void *key);
-extern int dict_key_cmp_int(void *key1, void *key2);
+extern unsigned int dict_key2hash_string(const void *key);
+extern int dict_key_cmp_string(const void *key1, const void *key2);
+
+extern unsigned int dict_key2hash_int(const void *key);
+extern int dict_key_cmp_int(const void *key1, const void *key2);
+
 extern Dict * dict_clone(Dict *old, void * (*key_clone)(void*), void * (*value_clone)(void*));
 extern Dict * dict_clone2(Dict * old,
 			  void * (* key_clone)(void * key, void * data),
diff --git a/display_args.c b/display_args.c
index c639c88..5df34ca 100644
--- a/display_args.c
+++ b/display_args.c
@@ -5,6 +5,7 @@
 #include <limits.h>
 
 #include "common.h"
+#include "proc.h"
 
 static int display_char(int what);
 static int display_string(enum tof type, Process *proc,
diff --git a/execute_program.c b/execute_program.c
index 859f32c..55df205 100644
--- a/execute_program.c
+++ b/execute_program.c
@@ -78,6 +78,7 @@ execute_program(const char * command, char **argv)
 
 	pid = fork();
 	if (pid < 0) {
+	fail:
 		perror("ltrace: fork");
 		exit(1);
 	} else if (!pid) {	/* child */
@@ -89,9 +90,9 @@ execute_program(const char * command, char **argv)
 		_exit(1);
 	}
 
-	wait_for_proc(pid);
+	if (wait_for_proc(pid) < 0)
+		goto fail;
 
 	debug(1, "PID=%d", pid);
-
 	return pid;
 }
diff --git a/filter.c b/filter.c
new file mode 100644
index 0000000..003010d
--- /dev/null
+++ b/filter.c
@@ -0,0 +1,188 @@
+/*
+ * This file is part of ltrace.
+ * Copyright (C) 2012 Petr Machata, Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "filter.h"
+#include "library.h"
+
+void
+filter_init(struct filter *filt)
+{
+	filt->rules = NULL;
+	filt->next = NULL;
+}
+
+void
+filter_destroy(struct filter *filt)
+{
+	struct filter_rule *it;
+	for (it = filt->rules; it != NULL; ) {
+		struct filter_rule *next = it->next;
+		filter_rule_destroy(it);
+		it = next;
+	}
+}
+
+void
+filter_rule_init(struct filter_rule *rule, enum filter_rule_type type,
+		 struct filter_lib_matcher *matcher,
+		 regex_t symbol_re)
+{
+	rule->type = type;
+	rule->lib_matcher = matcher;
+	rule->symbol_re = symbol_re;
+	rule->next = NULL;
+}
+
+void
+filter_rule_destroy(struct filter_rule *rule)
+{
+	filter_lib_matcher_destroy(rule->lib_matcher);
+	regfree(&rule->symbol_re);
+}
+
+void
+filter_add_rule(struct filter *filt, struct filter_rule *rule)
+{
+	struct filter_rule **rulep;
+	for (rulep = &filt->rules; *rulep != NULL; rulep = &(*rulep)->next)
+		;
+	*rulep = rule;
+}
+
+void
+filter_lib_matcher_name_init(struct filter_lib_matcher *matcher,
+			     enum filter_lib_matcher_type type,
+			     regex_t libname_re)
+{
+	switch (type) {
+	case FLM_MAIN:
+		assert(type != type);
+		abort();
+
+	case FLM_SONAME:
+	case FLM_PATHNAME:
+		matcher->type = type;
+		matcher->libname_re = libname_re;
+	}
+}
+
+void
+filter_lib_matcher_main_init(struct filter_lib_matcher *matcher)
+{
+	matcher->type = FLM_MAIN;
+}
+
+void
+filter_lib_matcher_destroy(struct filter_lib_matcher *matcher)
+{
+	switch (matcher->type) {
+	case FLM_SONAME:
+	case FLM_PATHNAME:
+		regfree(&matcher->libname_re);
+		break;
+	case FLM_MAIN:
+		break;
+	}
+}
+
+static int
+re_match_or_error(regex_t *re, const char *name, const char *what)
+{
+	int status = regexec(re, name, 0, NULL, 0);
+	if (status == 0)
+		return 1;
+	if (status == REG_NOMATCH)
+		return 0;
+
+	char buf[200];
+	regerror(status, re, buf, sizeof buf);
+	fprintf(stderr, "Error when matching %s: %s\n", name, buf);
+
+	return 0;
+}
+
+static int
+matcher_matches_library(struct filter_lib_matcher *matcher, struct library *lib)
+{
+	switch (matcher->type) {
+	case FLM_SONAME:
+		return re_match_or_error(&matcher->libname_re, lib->soname,
+					 "library soname");
+	case FLM_PATHNAME:
+		return re_match_or_error(&matcher->libname_re, lib->pathname,
+					 "library pathname");
+	case FLM_MAIN:
+		return lib->type == LT_LIBTYPE_MAIN;
+	}
+	assert(matcher->type != matcher->type);
+	abort();
+}
+
+int
+filter_matches_library(struct filter *filt, struct library *lib)
+{
+	if (filt == NULL)
+		return 0;
+
+	struct filter_rule *it;
+	for (it = filt->rules; it != NULL; it = it->next)
+		switch (it->type) {
+		case FR_ADD:
+			if (matcher_matches_library(it->lib_matcher, lib))
+				return 1;
+		case FR_SUBTRACT:
+			continue;
+		};
+	return 0;
+}
+
+int
+filter_matches_symbol(struct filter *filt,
+		      const char *sym_name, struct library *lib)
+{
+	for (; filt != NULL; filt = filt->next) {
+		int matches = 0;
+		struct filter_rule *it;
+		for (it = filt->rules; it != NULL; it = it->next) {
+			switch (it->type) {
+			case FR_ADD:
+				if (matches)
+					continue;
+				break;
+			case FR_SUBTRACT:
+				if (!matches)
+					continue;
+			}
+
+			if (matcher_matches_library(it->lib_matcher, lib)
+			    && re_match_or_error(&it->symbol_re, sym_name,
+						 "symbol name"))
+				matches = !matches;
+		}
+		if (matches)
+			return 1;
+	}
+	return 0;
+}
diff --git a/filter.h b/filter.h
new file mode 100644
index 0000000..65c575a
--- /dev/null
+++ b/filter.h
@@ -0,0 +1,99 @@
+/*
+ * This file is part of ltrace.
+ * Copyright (C) 2012 Petr Machata, Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+/* This file contains declarations and types for working with symbol
+ * filters.  */
+
+#ifndef FILTER_H
+#define FILTER_H
+
+#include <sys/types.h>
+#include <regex.h>
+
+struct library;
+struct library_symbol;
+
+enum filter_lib_matcher_type {
+	/* Match by soname.  */
+	FLM_SONAME,
+	/* Match by path name.  */
+	FLM_PATHNAME,
+	/* Match main binary.  */
+	FLM_MAIN,
+};
+
+struct filter_lib_matcher {
+	enum filter_lib_matcher_type type;
+	regex_t libname_re;
+};
+
+enum filter_rule_type {
+	FR_ADD,
+	FR_SUBTRACT,
+};
+
+struct filter_rule {
+	struct filter_rule *next;
+	struct filter_lib_matcher *lib_matcher;
+	regex_t symbol_re; /* Regex for matching symbol name.  */
+	enum filter_rule_type type;
+};
+
+struct filter {
+	struct filter *next;
+	struct filter_rule *rules;
+};
+
+void filter_init(struct filter *filt);
+void filter_destroy(struct filter *filt);
+
+/* Both SYMBOL_RE and MATCHER are owned and destroyed by RULE.  */
+void filter_rule_init(struct filter_rule *rule, enum filter_rule_type type,
+		      struct filter_lib_matcher *matcher,
+		      regex_t symbol_re);
+
+void filter_rule_destroy(struct filter_rule *rule);
+
+/* RULE is added to FILT and owned and destroyed by it.  */
+void filter_add_rule(struct filter *filt, struct filter_rule *rule);
+
+/* Create a matcher that matches library name.  RE is owned and
+ * destroyed by MATCHER.  TYPE shall be FLM_SONAME or
+ * FLM_PATHNAME.  */
+void filter_lib_matcher_name_init(struct filter_lib_matcher *matcher,
+				  enum filter_lib_matcher_type type,
+				  regex_t re);
+
+/* Create a matcher that matches main binary.  */
+void filter_lib_matcher_main_init(struct filter_lib_matcher *matcher);
+
+void filter_lib_matcher_destroy(struct filter_lib_matcher *matcher);
+
+/* Ask whether FILTER might match a symbol in LIB.  0 if no, non-0 if
+ * yes.  Note that positive answer doesn't mean that anything will
+ * actually be matched, just that potentially it could.  */
+int filter_matches_library(struct filter *filt, struct library *lib);
+
+/* Ask whether FILTER matches this symbol.  Returns 0 if it doesn't,
+ * or non-0 value if it does.  */
+int filter_matches_symbol(struct filter *filt, const char *sym_name,
+			  struct library *lib);
+
+#endif /* FILTER_H */
diff --git a/glob.c b/glob.c
new file mode 100644
index 0000000..6c5c9ef
--- /dev/null
+++ b/glob.c
@@ -0,0 +1,275 @@
+/*
+ * This file is part of ltrace.
+ * Copyright (C) 2007, 2008, 2012 Petr Machata, Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <sys/types.h>
+#include <regex.h>
+#include <string.h>
+#include <stdlib.h>
+
+static ssize_t
+match_character_class(const char *glob, size_t length, size_t from)
+{
+	size_t i;
+	if (length > 0)
+		for (i = from + 2; i < length - 1 && glob[++i] != ':'; )
+			;
+	if (i >= length || glob[++i] != ']')
+		return -1;
+	return i;
+}
+
+static ssize_t
+match_brack(const char *glob, size_t length, size_t from, int *exclmp)
+{
+	size_t i = from + 1;
+
+	if (i >= length)
+		return -1;
+
+	/* Complement operator.  */
+	*exclmp = 0;
+	if (glob[i] == '^' || glob[i] == '!') {
+		*exclmp = glob[i++] == '!';
+		if (i >= length)
+			return -1;
+	}
+
+	/* On first character, both [ and ] are legal.  But when [ is
+	 * followed with :, it's character class.  */
+	if (glob[i] == '[' && glob[i + 1] == ':') {
+		ssize_t j = match_character_class(glob, length, i);
+		if (j < 0)
+		fail:
+			return -1;
+		i = j;
+	}
+	++i; /* skip any character, including [ or ]  */
+
+	int escape = 0;
+	for (; i < length; ++i) {
+		char c = glob[i];
+		if (escape) {
+			++i;
+			escape = 0;
+
+		} else if (c == '[' && glob[i + 1] == ':') {
+			ssize_t j = match_character_class(glob, length, i);
+			if (j < 0)
+				goto fail;
+			i = j;
+
+		} else if (c == ']') {
+			return i;
+		}
+	}
+	return -1;
+}
+
+static int
+append(char **bufp, const char *str, size_t str_size,
+       size_t *sizep, size_t *allocp)
+{
+	if (str_size == 0)
+		str_size = strlen(str);
+	size_t nsize = *sizep + str_size;
+	if (nsize > *allocp) {
+		size_t nalloc = nsize * 2;
+		char *nbuf = realloc(*bufp, nalloc);
+		if (nbuf == NULL)
+			return -1;
+		*allocp = nalloc;
+		*bufp = nbuf;
+	}
+
+	memcpy(*bufp + *sizep, str, str_size);
+	*sizep = nsize;
+	return 0;
+}
+
+static int
+glob_to_regex(const char *glob, char **retp)
+{
+	size_t allocd = 0;
+	size_t size = 0;
+	char *buf = NULL;
+
+	size_t length = strlen(glob);
+	int escape = 0;
+	size_t i;
+	for(i = 0; i < length; ++i) {
+		char c = glob[i];
+		if (escape) {
+			if (c == '\\') {
+				if (append(&buf, "\\\\", 0,
+					   &size, &allocd) < 0) {
+				fail:
+					free(buf);
+					return REG_ESPACE;
+				}
+
+			} else if (c == '*') {
+				if (append(&buf, "\\*", 0, &size, &allocd) < 0)
+					goto fail;
+			} else if (c == '?') {
+				if (append(&buf, "?", 0, &size, &allocd) < 0)
+					goto fail;
+			} else if (append(&buf, (char[]){ '\\', c }, 2,
+					  &size, &allocd) < 0)
+				goto fail;
+			escape = 0;
+		} else {
+			if (c == '\\')
+				escape = 1;
+			else if (c == '[') {
+				int exclm;
+				ssize_t j = match_brack(glob, length, i, &exclm);
+				if (j < 0)
+					return REG_EBRACK;
+				if (exclm
+				    && append(&buf, "[^", 2,
+					      &size, &allocd) < 0)
+					goto fail;
+				if (append(&buf, glob + i + 2*exclm,
+					   j - i + 1 - 2*exclm,
+					   &size, &allocd) < 0)
+					goto fail;
+				i = j;
+
+			} else if (c == '*') {
+				if (append(&buf, ".*", 0, &size, &allocd) < 0)
+					goto fail;
+			} else if (c == '?') {
+				if (append(&buf, ".", 0, &size, &allocd) < 0)
+					goto fail;
+			} else if (c == '.') {
+				if (append(&buf, "\\.", 0, &size, &allocd) < 0)
+					goto fail;
+			} else if (append(&buf, &c, 1, &size, &allocd) < 0)
+				goto fail;
+		}
+	}
+
+	if (escape) {
+		free(buf);
+		return REG_EESCAPE;
+	}
+
+	{
+		char c = 0;
+		if (append(&buf, &c, 1, &size, &allocd) < 0)
+			goto fail;
+	}
+	*retp = buf;
+	return 0;
+}
+
+int
+globcomp(regex_t *preg, const char *glob, int cflags)
+{
+	char *regex;
+	int status = glob_to_regex(glob, &regex);
+	if (status != 0)
+		return status;
+	status = regcomp(preg, regex, cflags);
+	free(regex);
+	return status;
+}
+
+#ifdef TEST
+#include <assert.h>
+#include <stdio.h>
+
+static void
+translate(const char *glob, int exp_status, const char *expect)
+{
+	char *pattern = NULL;
+	int status = glob_to_regex(glob, &pattern);
+	if (status != exp_status) {
+		fprintf(stderr, "translating %s, expected status %d, got %d\n",
+			glob, exp_status, status);
+		return;
+	}
+
+	if (status == 0) {
+		assert(pattern != NULL);
+		if (strcmp(pattern, expect) != 0)
+			fprintf(stderr, "translating %s, expected %s, got %s\n",
+				glob, expect, pattern);
+		free(pattern);
+	} else {
+		assert(pattern == NULL);
+	}
+}
+
+static void
+try_match(const char *glob, const char *str, int expect)
+{
+	regex_t preg;
+	int status = globcomp(&preg, glob, 0);
+	assert(status == 0);
+	status = regexec(&preg, str, 0, NULL, 0);
+	assert(status == expect);
+	regfree(&preg);
+}
+
+int
+main(void)
+{
+        translate("*", 0, ".*");
+        translate("?", 0, ".");
+        translate(".*", 0, "\\..*");
+        translate("*.*", 0, ".*\\..*");
+        translate("*a*", 0, ".*a.*");
+        translate("[abc]", 0, "[abc]");
+        translate("[^abc]", 0, "[^abc]");
+        translate("[!abc]", 0, "[^abc]");
+        translate("[]]", 0, "[]]");
+        translate("[[]", 0, "[[]");
+        translate("[^]]", 0, "[^]]");
+        translate("[^a-z]", 0, "[^a-z]");
+        translate("[abc\\]]", 0, "[abc\\]]");
+        translate("[abc\\]def]", 0, "[abc\\]def]");
+        translate("[[:space:]]", 0, "[[:space:]]");
+        translate("[^[:space:]]", 0, "[^[:space:]]");
+        translate("[![:space:]]", 0, "[^[:space:]]");
+        translate("[^a-z]*", 0, "[^a-z].*");
+        translate("[^a-z]bar*", 0, "[^a-z]bar.*");
+	translate("*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.*.", 0,
+		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\."
+		  ".*\\..*\\..*\\..*\\..*\\..*\\..*\\..*\\.");
+
+        translate("\\", REG_EESCAPE, NULL);
+        translate("[^[:naotuh\\", REG_EBRACK, NULL);
+        translate("[^[:", REG_EBRACK, NULL);
+        translate("[^[", REG_EBRACK, NULL);
+        translate("[^", REG_EBRACK, NULL);
+        translate("[\\", REG_EBRACK, NULL);
+        translate("[", REG_EBRACK, NULL);
+
+	try_match("abc*def", "abc012def", 0);
+	try_match("abc*def", "ab012def", REG_NOMATCH);
+	try_match("[abc]*def", "a1def", 0);
+	try_match("[abc]*def", "b1def", 0);
+	try_match("[abc]*def", "d1def", REG_NOMATCH);
+
+	return 0;
+}
+
+#endif
diff --git a/glob.h b/glob.h
new file mode 100644
index 0000000..d60c0a2
--- /dev/null
+++ b/glob.h
@@ -0,0 +1,32 @@
+/*
+ * This file is part of ltrace.
+ * Copyright (C) 2007, 2008, 2012 Petr Machata, Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _GLOB_H_
+#define _GLOB_H_
+
+#include <sys/types.h>
+#include <regex.h>
+
+/* This is akin to regcomp(3), except it compiles a glob expression
+ * passed in GLOB.  See glob(7) for more information about the syntax
+ * supported by globcomp.  */
+int globcomp(regex_t *preg, const char *glob, int cflags);
+
+#endif /* _GLOB_H_ */
diff --git a/handle_event.c b/handle_event.c
index ec4c9f3..73c118a 100644
--- a/handle_event.c
+++ b/handle_event.c
@@ -1,20 +1,18 @@
 #define _GNU_SOURCE
 #include "config.h"
 
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
 #include <stdio.h>
-#include <string.h>
 #include <stdlib.h>
-#include <signal.h>
-#include <assert.h>
+#include <string.h>
 #include <sys/time.h>
-#include <errno.h>
-
-#ifdef __powerpc__
-#include <sys/ptrace.h>
-#endif
 
-#include "common.h"
 #include "breakpoint.h"
+#include "common.h"
+#include "library.h"
+#include "proc.h"
 
 static void handle_signal(Event *event);
 static void handle_exit(Event *event);
@@ -42,7 +40,7 @@ call_handler(Process * proc, Event * event)
 {
 	assert(proc != NULL);
 
-	Event_Handler * handler = proc->event_handler;
+	struct event_handler *handler = proc->event_handler;
 	if (handler == NULL)
 		return event;
 
@@ -86,60 +84,72 @@ handle_event(Event *event)
 		debug(1, "event: none");
 		return;
 	case EVENT_SIGNAL:
-		debug(1, "event: signal (%s [%d])",
+		debug(1, "[%d] event: signal (%s [%d])",
+		      event->proc->pid,
 		      shortsignal(event->proc, event->e_un.signum),
 		      event->e_un.signum);
 		handle_signal(event);
 		return;
 	case EVENT_EXIT:
-		debug(1, "event: exit (%d)", event->e_un.ret_val);
+		debug(1, "[%d] event: exit (%d)",
+		      event->proc->pid,
+		      event->e_un.ret_val);
 		handle_exit(event);
 		return;
 	case EVENT_EXIT_SIGNAL:
-		debug(1, "event: exit signal (%s [%d])",
+		debug(1, "[%d] event: exit signal (%s [%d])",
+		      event->proc->pid,
 		      shortsignal(event->proc, event->e_un.signum),
 		      event->e_un.signum);
 		handle_exit_signal(event);
 		return;
 	case EVENT_SYSCALL:
-		debug(1, "event: syscall (%s [%d])",
+		debug(1, "[%d] event: syscall (%s [%d])",
+		      event->proc->pid,
 		      sysname(event->proc, event->e_un.sysnum),
 		      event->e_un.sysnum);
 		handle_syscall(event);
 		return;
 	case EVENT_SYSRET:
-		debug(1, "event: sysret (%s [%d])",
+		debug(1, "[%d] event: sysret (%s [%d])",
+		      event->proc->pid,
 		      sysname(event->proc, event->e_un.sysnum),
 		      event->e_un.sysnum);
 		handle_sysret(event);
 		return;
 	case EVENT_ARCH_SYSCALL:
-		debug(1, "event: arch_syscall (%s [%d])",
-				arch_sysname(event->proc, event->e_un.sysnum),
-				event->e_un.sysnum);
+		debug(1, "[%d] event: arch_syscall (%s [%d])",
+		      event->proc->pid,
+		      arch_sysname(event->proc, event->e_un.sysnum),
+		      event->e_un.sysnum);
 		handle_arch_syscall(event);
 		return;
 	case EVENT_ARCH_SYSRET:
-		debug(1, "event: arch_sysret (%s [%d])",
-				arch_sysname(event->proc, event->e_un.sysnum),
-				event->e_un.sysnum);
+		debug(1, "[%d] event: arch_sysret (%s [%d])",
+		      event->proc->pid,
+		      arch_sysname(event->proc, event->e_un.sysnum),
+		      event->e_un.sysnum);
 		handle_arch_sysret(event);
 		return;
 	case EVENT_CLONE:
 	case EVENT_VFORK:
-		debug(1, "event: clone (%u)", event->e_un.newpid);
+		debug(1, "[%d] event: clone (%u)",
+		      event->proc->pid, event->e_un.newpid);
 		handle_clone(event);
 		return;
 	case EVENT_EXEC:
-		debug(1, "event: exec()");
+		debug(1, "[%d] event: exec()",
+		      event->proc->pid);
 		handle_exec(event);
 		return;
 	case EVENT_BREAKPOINT:
-		debug(1, "event: breakpoint");
+		debug(1, "[%d] event: breakpoint %p",
+		      event->proc->pid, event->e_un.brk_addr);
 		handle_breakpoint(event);
 		return;
 	case EVENT_NEW:
-		debug(1, "event: new process");
+		debug(1, "[%d] event: new process",
+		      event->e_un.newpid);
 		handle_new(event);
 		return;
 	default:
@@ -148,37 +158,6 @@ handle_event(Event *event)
 	}
 }
 
-/* TODO */
-static void *
-address_clone(void * addr, void * data)
-{
-	debug(DEBUG_FUNCTION, "address_clone(%p)", addr);
-	return addr;
-}
-
-static void *
-breakpoint_clone(void *bp, void *data)
-{
-	Dict *map = data;
-	debug(DEBUG_FUNCTION, "breakpoint_clone(%p)", bp);
-	struct breakpoint *b = malloc(sizeof(*b));
-	if (!b) {
-		perror("malloc()");
-		exit(1);
-	}
-	memcpy(b, bp, sizeof(*b));
-	if (b->libsym != NULL) {
-		struct library_symbol *sym = dict_find_entry(map, b->libsym);
-		if (b->libsym == NULL) {
-			fprintf(stderr, "Can't find cloned symbol %s.\n",
-				b->libsym->name);
-			return NULL;
-		}
-		b->libsym = sym;
-	}
-	return b;
-}
-
 typedef struct Pending_New Pending_New;
 struct Pending_New {
 	pid_t pid;
@@ -240,83 +219,45 @@ pending_new_remove(pid_t pid) {
 	}
 }
 
-static int
-clone_breakpoints(Process * proc, Process * orig_proc)
-{
-	/* When copying breakpoints, we also have to copy the
-	 * referenced symbols, and link them properly.  */
-	Dict * map = dict_init(&dict_key2hash_int, &dict_key_cmp_int);
-	struct library_symbol * it = proc->list_of_symbols;
-	proc->list_of_symbols = NULL;
-	for (; it != NULL; it = it->next) {
-		struct library_symbol * libsym = clone_library_symbol(it);
-		if (libsym == NULL) {
-			int save_errno;
-		err:
-			save_errno = errno;
-			destroy_library_symbol_chain(proc->list_of_symbols);
-			dict_clear(map);
-			errno = save_errno;
-			return -1;
-		}
-		libsym->next = proc->list_of_symbols;
-		proc->list_of_symbols = libsym;
-		if (dict_enter(map, it, libsym) != 0)
-			goto err;
-	}
-
-	proc->breakpoints = dict_clone2(orig_proc->breakpoints,
-					address_clone, breakpoint_clone, map);
-	if (proc->breakpoints == NULL)
-		goto err;
-
-	dict_clear(map);
-	return 0;
-}
-
 static void
-handle_clone(Event * event) {
-	Process *p;
-
+handle_clone(Event *event)
+{
 	debug(DEBUG_FUNCTION, "handle_clone(pid=%d)", event->proc->pid);
 
-	p = malloc(sizeof(Process));
-	if (!p) {
+	struct Process *proc = malloc(sizeof(*proc));
+	if (proc == NULL) {
+	fail:
+		free(proc);
+		/* XXX proper error handling here, please.  */
 		perror("malloc()");
 		exit(1);
 	}
-	memcpy(p, event->proc, sizeof(Process));
-	p->pid = event->e_un.newpid;
-	p->parent = event->proc;
+
+	if (process_clone(proc, event->proc, event->e_un.newpid) < 0)
+		goto fail;
+	proc->parent = event->proc;
 
 	/* We save register values to the arch pointer, and these need
 	   to be per-thread.  */
-	p->arch_ptr = NULL;
-
-	if (pending_new(p->pid)) {
-		pending_new_remove(p->pid);
-		if (p->event_handler != NULL)
-			destroy_event_handler(p);
-		if (event->proc->state == STATE_ATTACHED && options.follow) {
-			p->state = STATE_ATTACHED;
-		} else {
-			p->state = STATE_IGNORED;
-		}
-		continue_process(p->pid);
-		add_process(p);
+	proc->arch_ptr = NULL;
+
+	if (pending_new(proc->pid)) {
+		pending_new_remove(proc->pid);
+		/* XXX this used to be destroy_event_handler call, but
+		 * I don't think we want to call that on a shared
+		 * state.  */
+		proc->event_handler = NULL;
+		if (event->proc->state == STATE_ATTACHED && options.follow)
+			proc->state = STATE_ATTACHED;
+		else
+			proc->state = STATE_IGNORED;
+		continue_process(proc->pid);
 	} else {
-		p->state = STATE_BEING_CREATED;
-		add_process(p);
+		proc->state = STATE_BEING_CREATED;
 	}
 
-	if (p->leader == p)
-		clone_breakpoints(p, event->proc->leader);
-	else
-		/* Thread groups share breakpoints.  */
-		p->breakpoints = NULL;
-
 	if (event->type == EVENT_VFORK)
-		continue_after_vfork(p);
+		continue_after_vfork(proc);
 	else
 		continue_process(event->proc->pid);
 }
@@ -445,14 +386,38 @@ handle_exit_signal(Event *event) {
 }
 
 static void
+output_syscall(struct Process *proc, const char *name,
+	       void (*output)(enum tof, struct Process *,
+			      struct library_symbol *))
+{
+	struct library_symbol syscall;
+	if (library_symbol_init(&syscall, 0, name, 0, LS_TOPLT_NONE) >= 0) {
+		(*output)(LT_TOF_SYSCALL, proc, &syscall);
+		library_symbol_destroy(&syscall);
+	}
+}
+
+static void
+output_syscall_left(struct Process *proc, const char *name)
+{
+	output_syscall(proc, name, &output_left);
+}
+
+static void
+output_syscall_right(struct Process *proc, const char *name)
+{
+	output_syscall(proc, name, &output_right);
+}
+
+static void
 handle_syscall(Event *event) {
 	debug(DEBUG_FUNCTION, "handle_syscall(pid=%d, sysnum=%d)", event->proc->pid, event->e_un.sysnum);
 	if (event->proc->state != STATE_IGNORED) {
 		callstack_push_syscall(event->proc, event->e_un.sysnum);
-		if (options.syscalls) {
-			output_left(LT_TOF_SYSCALL, event->proc,
-				    sysname(event->proc, event->e_un.sysnum));
-		}
+		if (options.syscalls)
+			output_syscall_left(event->proc,
+					    sysname(event->proc,
+						    event->e_un.sysnum));
 	}
 	continue_after_syscall(event->proc, event->e_un.sysnum, 0);
 }
@@ -461,20 +426,36 @@ static void
 handle_exec(Event * event) {
 	Process * proc = event->proc;
 
+	/* Save the PID so that we can use it after unsuccessful
+	 * process_exec.  */
+	pid_t pid = proc->pid;
+
 	debug(DEBUG_FUNCTION, "handle_exec(pid=%d)", proc->pid);
 	if (proc->state == STATE_IGNORED) {
-		untrace_pid(proc->pid);
+	untrace:
+		untrace_pid(pid);
 		remove_process(proc);
 		return;
 	}
 	output_line(proc, "--- Called exec() ---");
-	proc->mask_32bit = 0;
-	proc->personality = 0;
-	proc->arch_ptr = NULL;
-	free(proc->filename);
-	proc->filename = pid2name(proc->pid);
-	breakpoints_init(proc, 0);
-	proc->callstack_depth = 0;
+
+	if (process_exec(proc) < 0) {
+		fprintf(stderr,
+			"couldn't reinitialize process %d after exec\n", pid);
+		goto untrace;
+	}
+
+	continue_process(proc->pid);
+
+	/* After the exec, we expect to hit the first executable
+	 * instruction.
+	 *
+	 * XXX TODO It would be nice to have this removed, but then we
+	 * need to do that also for initial call to wait_for_proc in
+	 * execute_program.  In that case we could generate a
+	 * EVENT_FIRST event or something, or maybe this could somehow
+	 * be rolled into EVENT_NEW.  */
+	wait_for_proc(proc->pid);
 	continue_process(proc->pid);
 }
 
@@ -484,8 +465,9 @@ handle_arch_syscall(Event *event) {
 	if (event->proc->state != STATE_IGNORED) {
 		callstack_push_syscall(event->proc, 0xf0000 + event->e_un.sysnum);
 		if (options.syscalls) {
-			output_left(LT_TOF_SYSCALL, event->proc,
-					arch_sysname(event->proc, event->e_un.sysnum));
+			output_syscall_left(event->proc,
+					    arch_sysname(event->proc,
+							 event->e_un.sysnum));
 		}
 	}
 	continue_process(event->proc->pid);
@@ -522,10 +504,11 @@ handle_sysret(Event *event) {
 		if (opt_T || options.summary) {
 			calc_time_spent(event->proc);
 		}
-		if (options.syscalls) {
-			output_right(LT_TOF_SYSCALLR, event->proc,
-					sysname(event->proc, event->e_un.sysnum));
-		}
+		if (options.syscalls)
+			output_syscall_right(event->proc,
+					     sysname(event->proc,
+						     event->e_un.sysnum));
+
 		assert(event->proc->callstack_depth > 0);
 		unsigned d = event->proc->callstack_depth - 1;
 		assert(event->proc->callstack[d].is_syscall);
@@ -541,10 +524,10 @@ handle_arch_sysret(Event *event) {
 		if (opt_T || options.summary) {
 			calc_time_spent(event->proc);
 		}
-		if (options.syscalls) {
-			output_right(LT_TOF_SYSCALLR, event->proc,
-					arch_sysname(event->proc, event->e_un.sysnum));
-		}
+		if (options.syscalls)
+			output_syscall_right(event->proc,
+					     arch_sysname(event->proc,
+							  event->e_un.sysnum));
 		callstack_pop(event->proc);
 	}
 	continue_process(event->proc->pid);
@@ -556,7 +539,7 @@ output_right_tos(struct Process *proc)
 	size_t d = proc->callstack_depth;
 	struct callstack_element *elem = &proc->callstack[d - 1];
 	if (proc->state != STATE_IGNORED)
-		output_right(LT_TOF_FUNCTIONR, proc, elem->c_un.libfunc->name);
+		output_right(LT_TOF_FUNCTIONR, proc, elem->c_un.libfunc);
 }
 
 static void
@@ -564,43 +564,7 @@ handle_breakpoint(Event *event)
 
 	for (i = event->proc->callstack_depth - 1; i >= 0; i--) {
 		if (brk_addr == event->proc->callstack[i].return_addr) {
-			struct library_symbol *libsym =
-			    event->proc->callstack[i].c_un.libfunc;
-#ifdef __powerpc__
-			/*
-			 * PPC HACK! (XXX FIXME TODO)
-			 * The PLT gets modified during the first call,
-			 * so be sure to re-enable the breakpoint.
-			 */
-			unsigned long a;
-			void *addr = sym2addr(event->proc, libsym);
-
-			if (libsym->plt_type != LS_TOPLT_POINT) {
-				unsigned char break_insn[] = BREAKPOINT_VALUE;
-
-				sbp = address2bpstruct(leader, addr);
-				assert(sbp);
-				a = ptrace(PTRACE_PEEKTEXT, event->proc->pid,
-					   addr);
-
-				if (memcmp(&a, break_insn, BREAKPOINT_LENGTH)) {
-					sbp->enabled--;
-					insert_breakpoint(event->proc, addr,
-							  libsym, 1);
-				}
-			} else {
-				sbp = dict_find_entry(leader->breakpoints, addr);
-				/* On powerpc, the breakpoint address
-				   may end up being actual entry point
-				   of the library symbol, not the PLT
-				   address we computed.  In that case,
-				   sbp is NULL.  */
-				if (sbp == NULL || addr != sbp->addr) {
-					insert_breakpoint(event->proc, addr,
-							  libsym, 1);
-				}
-			}
-#elif defined(__mips__)
+#if defined(__mips__)
 			void *addr = NULL;
 			struct library_symbol *sym= event->proc->callstack[i].c_un.libfunc;
 			struct library_symbol *new_sym;
@@ -624,14 +571,14 @@ handle_breakpoint(Event *event)
 			sbp = dict_find_entry(leader->breakpoints, addr);
 			if (sbp) {
 				if (addr != sbp->addr) {
-					insert_breakpoint(event->proc, addr, sym, 1);
+					insert_breakpoint(event->proc, addr, sym);
 				}
 			} else {
 				new_sym=malloc(sizeof(*new_sym) + strlen(sym->name) + 1);
 				memcpy(new_sym,sym,sizeof(*new_sym) + strlen(sym->name) + 1);
 				new_sym->next = leader->list_of_symbols;
 				leader->list_of_symbols = new_sym;
-				insert_breakpoint(event->proc, addr, new_sym, 1);
+				insert_breakpoint(event->proc, addr, new_sym);
 			}
 #endif
 			for (j = event->proc->callstack_depth - 1; j > i; j--) {
@@ -644,6 +591,9 @@ handle_breakpoint(Event *event)
 			}
 			event->proc->return_addr = brk_addr;
 
+			struct library_symbol *libsym =
+			    event->proc->callstack[i].c_un.libfunc;
+
 			output_right_tos(event->proc);
 			callstack_pop(event->proc);
 
@@ -666,41 +616,44 @@ handle_breakpoint(Event *event)
 				callstack_pop(event->proc);
 			}
 
-			sbp = address2bpstruct(leader, brk_addr);
-			continue_after_breakpoint(event->proc, sbp);
+			/* Maybe the previous callstack_pop's got rid
+			 * of the breakpoint, but if we are in a
+			 * recursive call, it's still enabled.  In
+			 * that case we need to skip it properly.  */
+			if ((sbp = address2bpstruct(leader, brk_addr)) != NULL) {
+				continue_after_breakpoint(event->proc, sbp);
+			} else {
+				set_instruction_pointer(event->proc, brk_addr);
+				continue_process(event->proc->pid);
+			}
 			return;
 		}
 	}
 
-	if ((sbp = address2bpstruct(leader, brk_addr))) {
+	if ((sbp = address2bpstruct(leader, brk_addr)) != NULL)
 		breakpoint_on_hit(sbp, event->proc);
-
-		if (sbp->libsym == NULL) {
-			continue_after_breakpoint(event->proc, sbp);
-			return;
-		}
-
-		if (strcmp(sbp->libsym->name, "") == 0) {
-			debug(DEBUG_PROCESS, "Hit _dl_debug_state breakpoint!\n");
-			arch_check_dbg(leader);
-		}
-
-		if (event->proc->state != STATE_IGNORED) {
+	else if (event->proc->state != STATE_IGNORED)
+		output_line(event->proc,
+			    "unexpected breakpoint at %p", brk_addr);
+
+	/* breakpoint_on_hit may delete its own breakpoint, so we have
+	 * to look it up again.  */
+	if ((sbp = address2bpstruct(leader, brk_addr)) != NULL) {
+		if (event->proc->state != STATE_IGNORED
+		    && sbp->libsym != NULL) {
 			event->proc->stack_pointer = get_stack_pointer(event->proc);
 			event->proc->return_addr =
 				get_return_addr(event->proc, event->proc->stack_pointer);
 			callstack_push_symfunc(event->proc, sbp->libsym);
-			output_left(LT_TOF_FUNCTION, event->proc, sbp->libsym->name);
+			output_left(LT_TOF_FUNCTION, event->proc, sbp->libsym);
 		}
 
-		continue_after_breakpoint(event->proc, sbp);
+		breakpoint_on_continue(sbp, event->proc);
 		return;
+	} else {
+		set_instruction_pointer(event->proc, brk_addr);
 	}
 
-	if (event->proc->state != STATE_IGNORED && !options.no_plt) {
-		output_line(event->proc, "unexpected breakpoint at %p",
-			    brk_addr);
-	}
 	continue_process(event->proc->pid);
 }
 
@@ -745,9 +698,8 @@ callstack_push_symfunc(Process *proc, struct library_symbol *sym) {
 	elem->c_un.libfunc = sym;
 
 	elem->return_addr = proc->return_addr;
-	if (elem->return_addr) {
-		insert_breakpoint(proc, elem->return_addr, NULL, 1);
-	}
+	if (elem->return_addr)
+		insert_breakpoint(proc, elem->return_addr, NULL);
 
 	/* handle functions like atexit() on mips which have no return */
 	if (opt_T || options.summary) {
diff --git a/libltrace.c b/libltrace.c
index 777ad1b..92f2701 100644
--- a/libltrace.c
+++ b/libltrace.c
@@ -1,22 +1,23 @@
 #include "config.h"
 
+#include <sys/param.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
 #include <string.h>
-#include <errno.h>
-#include <sys/param.h>
-#include <signal.h>
-#include <sys/wait.h>
+#include <unistd.h>
 
 #include "common.h"
+#include "proc.h"
 
 char *command = NULL;
 
 int exiting = 0;		/* =1 if a SIGINT or SIGTERM has been received */
 
-static enum pcb_status
-stop_non_p_processes (Process * proc, void * data)
+static enum callback_status
+stop_non_p_processes(Process *proc, void *data)
 {
 	int stop = 1;
 
@@ -38,7 +39,7 @@ stop_non_p_processes (Process * proc, void * data)
 		kill(proc->pid, SIGSTOP);
 	}
 
-	return pcb_cont;
+	return CBS_CONT;
 }
 
 static void
@@ -104,20 +105,22 @@ ltrace_init(int argc, char **argv) {
 		}
 		opt_F = opt_F->next;
 	}
-	if (opt_e) {
-		struct opt_e_t *tmp = opt_e;
-		while (tmp) {
-			debug(1, "Option -e: %s\n", tmp->name);
-			tmp = tmp->next;
-		}
-	}
 	if (command) {
 		/* Check that the binary ABI is supported before
 		 * calling execute_program.  */
 		struct ltelf lte = {};
 		open_elf(&lte, command);
 
-		open_program(command, execute_program(command, argv), 0);
+		pid_t pid = execute_program(command, argv);
+		struct Process *proc = open_program(command, pid);
+		if (proc == NULL) {
+			fprintf(stderr, "couldn't open program '%s': %s\n",
+				command, strerror(errno));
+			exit(EXIT_FAILURE);
+		}
+
+		trace_set_options(proc);
+		continue_process(pid);
 	}
 	opt_p_tmp = opt_p;
 	while (opt_p_tmp) {
diff --git a/library.c b/library.c
new file mode 100644
index 0000000..92fccea
--- /dev/null
+++ b/library.c
@@ -0,0 +1,354 @@
+/*
+ * This file is part of ltrace.
+ * Copyright (C) 2011,2012 Petr Machata, Red Hat Inc.
+ * Copyright (C) 2001,2009 Juan Cespedes
+ * Copyright (C) 2006 Ian Wienand
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "library.h"
+#include "proc.h" // for enum callback_status
+#include "debug.h"
+#include "common.h" // for arch_library_symbol_init, arch_library_init
+
+#ifndef ARCH_HAVE_LIBRARY_DATA
+void
+arch_library_init(struct library *lib)
+{
+}
+
+void
+arch_library_destroy(struct library *lib)
+{
+}
+
+void
+arch_library_clone(struct library *retp, struct library *lib)
+{
+}
+#endif
+
+#ifndef ARCH_HAVE_LIBRARY_SYMBOL_DATA
+int
+arch_library_symbol_init(struct library_symbol *libsym)
+{
+	return 0;
+}
+
+void
+arch_library_symbol_destroy(struct library_symbol *libsym)
+{
+}
+
+int
+arch_library_symbol_clone(struct library_symbol *retp,
+			  struct library_symbol *libsym)
+{
+	return 0;
+}
+#endif
+
+unsigned int
+target_address_hash(const void *key)
+{
+	/* XXX this assumes that key is passed by value.  */
+	union {
+		target_address_t addr;
+		unsigned int ints[sizeof(target_address_t)
+				  / sizeof(unsigned int)];
+	} u = { .addr = (target_address_t)key };
+
+	size_t i;
+	unsigned int h = 0;
+	for (i = 0; i < sizeof(u.ints) / sizeof(*u.ints); ++i)
+		h ^= dict_key2hash_int((void *)(uintptr_t)u.ints[i]);
+	return h;
+}
+
+int
+target_address_cmp(const void *key1, const void *key2)
+{
+	/* XXX this assumes that key is passed by value.  */
+	target_address_t addr1 = (target_address_t)key1;
+	target_address_t addr2 = (target_address_t)key2;
+	return addr1 < addr2 ? 1
+	     : addr1 > addr2 ? -1 : 0;
+}
+
+/* If the other symbol owns the name, we need to make the copy, so
+ * that the life-times of the two symbols are not dependent on each
+ * other.  */
+static int
+strdup_if_owned(const char **retp, const char *str, int owned)
+{
+	if (!owned || str == NULL) {
+		*retp = str;
+		return 0;
+	} else {
+		*retp = strdup(str);
+		return *retp != NULL ? 0 : -1;
+	}
+}
+
+static void
+private_library_symbol_init(struct library_symbol *libsym,
+			    target_address_t addr,
+			    const char *name, int own_name,
+			    enum toplt type_of_plt)
+{
+	libsym->next = NULL;
+	libsym->lib = NULL;
+	libsym->plt_type = type_of_plt;
+	libsym->name = name;
+	libsym->own_name = own_name;
+	libsym->enter_addr = (void *)(uintptr_t)addr;
+}
+
+static void
+private_library_symbol_destroy(struct library_symbol *libsym)
+{
+	library_symbol_set_name(libsym, NULL, 0);
+}
+
+int
+library_symbol_init(struct library_symbol *libsym,
+		    target_address_t addr, const char *name, int own_name,
+		    enum toplt type_of_plt)
+{
+	private_library_symbol_init(libsym, addr, name, own_name, type_of_plt);
+
+	/* If arch init fails, we've already set libsym->name and
+	 * own_name.  But we return failure, and the client code isn't
+	 * supposed to call library_symbol_destroy in such a case.  */
+	return arch_library_symbol_init(libsym);
+}
+
+void
+library_symbol_destroy(struct library_symbol *libsym)
+{
+	if (libsym != NULL) {
+		private_library_symbol_destroy(libsym);
+		arch_library_symbol_destroy(libsym);
+	}
+}
+
+int
+library_symbol_clone(struct library_symbol *retp, struct library_symbol *libsym)
+{
+	const char *name;
+	if (strdup_if_owned(&name, libsym->name, libsym->own_name) < 0)
+		return -1;
+
+	private_library_symbol_init(retp, libsym->enter_addr,
+				    name, libsym->own_name, libsym->plt_type);
+
+	if (arch_library_symbol_clone(retp, libsym) < 0) {
+		private_library_symbol_destroy(retp);
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+library_symbol_cmp(struct library_symbol *a, struct library_symbol *b)
+{
+	if (a->enter_addr < b->enter_addr)
+		return -1;
+	if (a->enter_addr > b->enter_addr)
+		return 1;
+	if (a->name != NULL && b->name != NULL)
+		return strcmp(a->name, b->name);
+	if (a->name == NULL) {
+		if (b->name == NULL)
+			return 0;
+		return -1;
+	}
+	return 1;
+}
+
+void
+library_symbol_set_name(struct library_symbol *libsym,
+			const char *name, int own_name)
+{
+	if (libsym->own_name)
+		free((char *)libsym->name);
+	libsym->name = name;
+	libsym->own_name = own_name;
+}
+
+enum callback_status
+library_symbol_equal_cb(struct library_symbol *libsym, void *u)
+{
+	struct library_symbol *standard = u;
+	return library_symbol_cmp(libsym, standard) == 0 ? CBS_STOP : CBS_CONT;
+}
+
+
+static void
+private_library_init(struct library *lib, enum library_type type)
+{
+	lib->next = NULL;
+	lib->soname = NULL;
+	lib->own_soname = 0;
+	lib->pathname = NULL;
+	lib->own_pathname = 0;
+	lib->symbols = NULL;
+	lib->type = type;
+}
+
+void
+library_init(struct library *lib, enum library_type type)
+{
+	private_library_init(lib, type);
+	arch_library_init(lib);
+}
+
+int
+library_clone(struct library *retp, struct library *lib)
+{
+	const char *soname = NULL;
+	const char *pathname;
+	if (strdup_if_owned(&soname, lib->soname, lib->own_soname) < 0
+	     || strdup_if_owned(&pathname,
+				lib->pathname, lib->own_pathname) < 0) {
+		if (lib->own_soname)
+			free((char *)soname);
+		return -1;
+	}
+
+	private_library_init(retp, lib->type);
+	library_set_soname(retp, soname, lib->own_soname);
+	library_set_soname(retp, pathname, lib->own_pathname);
+	arch_library_clone(retp, lib);
+
+	struct library_symbol *it;
+	struct library_symbol **nsymp = &retp->symbols;
+	for (it = lib->symbols; it != NULL; it = it->next) {
+		*nsymp = malloc(sizeof(**nsymp));
+		if (*nsymp == NULL
+		    || library_symbol_clone(*nsymp, it) < 0) {
+			/* Release what we managed to allocate.  */
+			library_destroy(retp);
+			return -1;
+		}
+
+		(*nsymp)->lib = retp;
+		nsymp = &(*nsymp)->next;
+	}
+	return 0;
+}
+
+void
+library_destroy(struct library *lib)
+{
+	if (lib == NULL)
+		return;
+
+	arch_library_destroy(lib);
+	library_set_soname(lib, NULL, 0);
+	library_set_pathname(lib, NULL, 0);
+
+	struct library_symbol *sym;
+	for (sym = lib->symbols; sym != NULL; ) {
+		struct library_symbol *next = sym->next;
+		library_symbol_destroy(sym);
+		free(sym);
+		sym = next;
+	}
+}
+
+void
+library_set_soname(struct library *lib, const char *new_name, int own_name)
+{
+	if (lib->own_soname)
+		free((char *)lib->soname);
+	lib->soname = new_name;
+	lib->own_soname = own_name;
+}
+
+void
+library_set_pathname(struct library *lib, const char *new_name, int own_name)
+{
+	if (lib->own_pathname)
+		free((char *)lib->pathname);
+	lib->pathname = new_name;
+	lib->own_pathname = own_name;
+}
+
+struct library_symbol *
+library_each_symbol(struct library *lib, struct library_symbol *start_after,
+		    enum callback_status (*cb)(struct library_symbol *, void *),
+		    void *data)
+{
+	struct library_symbol *it = start_after == NULL ? lib->symbols
+		: start_after->next;
+
+	while (it != NULL) {
+		struct library_symbol *next = it->next;
+
+		switch ((*cb)(it, data)) {
+		case CBS_FAIL:
+			/* XXX handle me  */
+		case CBS_STOP:
+			return it;
+		case CBS_CONT:
+			break;
+		}
+
+		it = next;
+	}
+
+	return NULL;
+}
+
+void
+library_add_symbol(struct library *lib, struct library_symbol *first)
+{
+	struct library_symbol *last;
+	for (last = first; last != NULL; ) {
+		last->lib = lib;
+		if (last->next != NULL)
+			last = last->next;
+		else
+			break;
+	}
+
+	assert(last->next == NULL);
+	last->next = lib->symbols;
+	lib->symbols = first;
+}
+
+enum callback_status
+library_named_cb(struct Process *proc, struct library *lib, void *name)
+{
+	if (name == lib->soname
+	    || strcmp(lib->soname, (char *)name) == 0)
+		return CBS_STOP;
+	else
+		return CBS_CONT;
+}
+
+enum callback_status
+library_with_key_cb(struct Process *proc, struct library *lib, void *keyp)
+{
+	return lib->key == *(target_address_t *)keyp ? CBS_STOP : CBS_CONT;
+}
diff --git a/library.h b/library.h
new file mode 100644
index 0000000..c387b02
--- /dev/null
+++ b/library.h
@@ -0,0 +1,196 @@
+/*
+ * This file is part of ltrace.
+ * Copyright (C) 2012 Petr Machata, Red Hat Inc.
+ * Copyright (C) 2006 Paul Gilliam
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _LIBRARY_H_
+#define _LIBRARY_H_
+
+#include <stdint.h>
+#include "sysdep.h"
+
+struct Process;
+struct library;
+
+enum toplt {
+	LS_TOPLT_NONE = 0,	/* PLT not used for this symbol. */
+	LS_TOPLT_EXEC,		/* PLT for this symbol is executable. */
+};
+
+/* We should in general be able to trace 64-bit processes with 32-bit
+ * ltrace.  (At least PPC has several PTRACE requests related to
+ * tracing 64-on-32, so presumably it should be possible.)  But ltrace
+ * is currently hopelessly infested with using void* for host address.
+ * So keep with it, for now.  */
+typedef void *target_address_t;
+
+/* Dict interface.  */
+unsigned int target_address_hash(const void *key);
+int target_address_cmp(const void *key1, const void *key2);
+
+struct library_symbol {
+	struct library_symbol *next;
+	struct library *lib;
+	const char *name;
+	target_address_t enter_addr;
+	enum toplt plt_type;
+	char own_name;
+	struct arch_library_symbol_data arch;
+};
+
+/* Init LIBSYM.  NAME will be freed when LIBSYM is destroyed if
+ * OWN_NAME.  ARCH has to be initialized by a separate call.  */
+int library_symbol_init(struct library_symbol *libsym,
+			target_address_t addr, const char *name, int own_name,
+			enum toplt type_of_plt);
+
+/* Copy library symbol SYM into the area pointed-to by RETP.  Return 0
+ * on success or a negative value on failure.  */
+int library_symbol_clone(struct library_symbol *retp,
+			 struct library_symbol *sym);
+
+/* Destroy library symbol.  This essentially just frees name if it's
+ * owned.  It doesn't free the memory associated with SYM pointer
+ * itself.  Returns 0 on success or a negative value in case of an
+ * error (which would be an out of memory condition).  */
+void library_symbol_destroy(struct library_symbol *sym);
+
+/* Compare two library symbols.  Returns a negative value, 0, or a
+ * positive value, much like strcmp.  The function compares symbol
+ * addresses, and if those are equal, it compares symbol names.  If
+ * those are equal, too, the symbols are considered equal.  */
+int library_symbol_cmp(struct library_symbol *a, struct library_symbol *b);
+
+/* Set a name for library symbol.  This frees the old name, if
+ * that is owned.  */
+void library_symbol_set_name(struct library_symbol *libsym,
+			     const char *name, int own_name);
+
+/* A function that can be used as library_each_symbol callback.  Looks
+ * for a symbol SYM for which library_symbol_cmp(SYM, STANDARD)
+ * returns 0.  */
+enum callback_status library_symbol_equal_cb(struct library_symbol *libsym,
+					     void *standard);
+
+enum library_type {
+	LT_LIBTYPE_MAIN,
+	LT_LIBTYPE_DSO,
+};
+
+/* XXX we might consider sharing libraries across processes.  Things
+ * like libc will be opened by every single process, no point cloning
+ * these everywhere.  But for now, keep the ownership structure
+ * simple.  */
+struct library {
+	struct library *next;
+
+	/* Unique key. Two library objects are considered equal, if
+	 * they have the same key.  */
+	target_address_t key;
+
+	/* Address where the library is mapped.  Two library objects
+	 * are considered equal, if they have the same base.  */
+	target_address_t base;
+
+	/* Absolute address of the entry point.  Useful for main
+	 * binary, though I suppose the value might be useful for the
+	 * dynamic linker, too (in case we ever want to do early
+	 * process tracing).  */
+	target_address_t entry;
+
+	/* Address of PT_DYNAMIC segment.  */
+	target_address_t dyn_addr;
+
+	/* Symbols associated with the library.  */
+	struct library_symbol *symbols;
+
+	const char *soname;
+	const char *pathname;
+
+	enum library_type type;
+
+	char own_soname : 1;
+	char own_pathname : 1;
+
+	struct arch_library_data arch;
+};
+
+/* Init LIB.  */
+void library_init(struct library *lib, enum library_type type);
+
+/* Initialize RETP to a library identical to LIB.  Symbols are not
+ * shared, but copied over.  Returns 0 on success and a negative value
+ * in case of failure.  */
+int library_clone(struct library *retp, struct library *lib);
+
+/* Destroy library.  Doesn't free LIB itself.  Symbols are destroyed
+ * and freed.  */
+void library_destroy(struct library *lib);
+
+/* Set library soname.  Frees the old name if necessary.  */
+void library_set_soname(struct library *lib,
+			const char *new_name, int own_name);
+
+/* Set library pathname.  Frees the old name if necessary.  */
+void library_set_pathname(struct library *lib,
+			  const char *new_name, int own_name);
+
+/* Iterate through list of symbols of library LIB.  Restarts are
+ * supported via START_AFTER (see each_process for details of
+ * iteration interface).  */
+struct library_symbol *library_each_symbol
+	(struct library *lib, struct library_symbol *start_after,
+	 enum callback_status (*cb)(struct library_symbol *, void *),
+	 void *data);
+
+/* Add a new symbol SYM to LIB.  SYM is assumed owned, we need to
+ * overwrite SYM->next.  */
+void library_add_symbol(struct library *lib, struct library_symbol *sym);
+
+/* A function that can be used as proc_each_library callback.  Looks
+ * for a library with the name passed in DATA.  PROC is ignored.  */
+enum callback_status library_named_cb(struct Process *proc,
+				      struct library *lib, void *name);
+
+/* A function that can be used as proc_each_library callback.  Looks
+ * for a library with given base.
+ *
+ * NOTE: The key is passed as a POINTER to target_address_t (that
+ * because in general, target_address_t doesn't fit in void*).  */
+enum callback_status library_with_key_cb(struct Process *proc,
+					 struct library *lib, void *keyp);
+
+/* XXX this should really be in backend.h (as on pmachata/revamp
+ * branch), or, on this branch, in common.h.  But we need
+ * target_address_t (which should also be in backend.h, I reckon), so
+ * stuff it here for the time being.  */
+/* This function is implemented in the back end.  It is called for all
+ * raw addresses as read from symbol tables etc.  If necessary on
+ * given architecture, this function should translate the address
+ * according to .opd or other indirection mechanism.  Returns 0 on
+ * success and a negative value on failure.  */
+struct ltelf;
+int arch_translate_address(struct ltelf *lte,
+			   target_address_t addr, target_address_t *ret);
+/* This is the same function as arch_translate_address, except it's
+ * used at the point that we don't have ELF available anymore.  */
+int arch_translate_address_dyn(struct Process *proc,
+			       target_address_t addr, target_address_t *ret);
+
+#endif /* _LIBRARY_H_ */
diff --git a/ltrace-elf.c b/ltrace-elf.c
index f7fc239..a311c5f 100644
--- a/ltrace-elf.c
+++ b/ltrace-elf.c
@@ -1,68 +1,96 @@
 #include "config.h"
 
+#include <assert.h>
 #include <endian.h>
 #include <errno.h>
-#include <error.h>
 #include <fcntl.h>
 #include <gelf.h>
 #include <inttypes.h>
+#include <search.h>
 #include <stdint.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
-#include <assert.h>
 
 #include "common.h"
-
-void do_close_elf(struct ltelf *lte);
-void add_library_symbol(GElf_Addr addr, const char *name,
-		struct library_symbol **library_symbolspp,
-		enum toplt type_of_plt, int is_weak);
-int in_load_libraries(const char *name, struct ltelf *lte, size_t count, GElf_Sym *sym);
-static GElf_Addr opd2addr(struct ltelf *ltc, GElf_Addr addr);
-
-struct library_symbol *library_symbols = NULL;
-struct ltelf main_lte;
+#include "proc.h"
+#include "library.h"
+#include "filter.h"
 
 #ifdef PLT_REINITALISATION_BP
 extern char *PLTs_initialized_by_here;
 #endif
 
-#ifndef DT_PPC_GOT
-# define DT_PPC_GOT		(DT_LOPROC + 0)
-#endif
-
-#define PPC_PLT_STUB_SIZE 16
-
-static Elf_Data *loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
+#ifndef ARCH_HAVE_LTELF_DATA
+int
+arch_elf_init(struct ltelf *lte, struct library *lib)
 {
-	Elf_Data *data = elf_getdata(scn, NULL);
-	if (data == NULL || elf_getdata(scn, data) != NULL
-	    || data->d_off || data->d_size != shdr->sh_size)
-		return NULL;
-	return data;
+	return 0;
 }
 
-static int inside(GElf_Addr addr, GElf_Shdr *shdr)
+void
+arch_elf_destroy(struct ltelf *lte)
 {
-	return addr >= shdr->sh_addr
-		&& addr < shdr->sh_addr + shdr->sh_size;
 }
+#endif
 
-static int maybe_pick_section(GElf_Addr addr,
-			      Elf_Scn *in_sec, GElf_Shdr *in_shdr,
-			      Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
+int
+default_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
+			  const char *a_name, GElf_Rela *rela, size_t ndx,
+			  struct library_symbol **ret)
 {
-	if (inside (addr, in_shdr)) {
-		*tgt_sec = in_sec;
-		*tgt_shdr = *in_shdr;
-		return 1;
+	char *name = strdup(a_name);
+	if (name == NULL) {
+	fail:
+		free(name);
+		return -1;
+	}
+
+	GElf_Addr addr = arch_plt_sym_val(lte, ndx, rela);
+
+	struct library_symbol *libsym = malloc(sizeof(*libsym));
+	if (libsym == NULL)
+		goto fail;
+
+	/* XXX The double cast should be removed when
+	 * target_address_t becomes integral type.  */
+	target_address_t taddr = (target_address_t)
+		(uintptr_t)(addr + lte->bias);
+
+	if (library_symbol_init(libsym, taddr, name, 1, LS_TOPLT_EXEC) < 0) {
+		free(libsym);
+		goto fail;
 	}
+
+	*ret = libsym;
 	return 0;
 }
 
-static int get_section_covering(struct ltelf *lte, GElf_Addr addr,
-				Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
+#ifndef ARCH_HAVE_ADD_PLT_ENTRY
+enum plt_status
+arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
+		       const char *a_name, GElf_Rela *rela, size_t ndx,
+		       struct library_symbol **ret)
+{
+	return plt_default;
+}
+#endif
+
+Elf_Data *
+elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr)
+{
+	Elf_Data *data = elf_getdata(scn, NULL);
+	if (data == NULL || elf_getdata(scn, data) != NULL
+	    || data->d_off || data->d_size != shdr->sh_size)
+		return NULL;
+	return data;
+}
+
+static int
+elf_get_section_if(struct ltelf *lte, Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr,
+		   int (*predicate)(Elf_Scn *, GElf_Shdr *, void *data),
+		   void *data)
 {
 	int i;
 	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
@@ -72,69 +100,115 @@ static int get_section_covering(struct ltelf *lte, GElf_Addr addr,
 		scn = elf_getscn(lte->elf, i);
 		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
 			debug(1, "Couldn't read section or header.");
+			return -1;
+		}
+		if (predicate(scn, &shdr, data)) {
+			*tgt_sec = scn;
+			*tgt_shdr = shdr;
 			return 0;
 		}
-
-		if (maybe_pick_section(addr, scn, &shdr, tgt_sec, tgt_shdr))
-			return 1;
 	}
+	return -1;
 
-	return 0;
 }
 
-static GElf_Addr read32be(Elf_Data *data, size_t offset)
+static int
+inside_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
 {
-	if (data->d_size < offset + 4) {
-		debug(1, "Not enough data to read 32bit value at offset %zd.",
-		      offset);
-		return 0;
-	}
+	GElf_Addr addr = *(GElf_Addr *)data;
+	return addr >= shdr->sh_addr
+		&& addr < shdr->sh_addr + shdr->sh_size;
+}
+
+int
+elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
+			 Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
+{
+	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
+				  &inside_p, &addr);
+}
 
-	unsigned char const *buf = data->d_buf + offset;
-	return ((Elf32_Word)buf[0] << 24)
-		| ((Elf32_Word)buf[1] << 16)
-		| ((Elf32_Word)buf[2] << 8)
-		| ((Elf32_Word)buf[3]);
+static int
+type_p(Elf_Scn *scn, GElf_Shdr *shdr, void *data)
+{
+	GElf_Word type = *(GElf_Word *)data;
+	return shdr->sh_type == type;
 }
 
-static GElf_Addr get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot,
-			       Elf_Data *plt_data)
+int
+elf_get_section_type(struct ltelf *lte, GElf_Word type,
+		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
 {
-	Elf_Scn *ppcgot_sec = NULL;
-	GElf_Shdr ppcgot_shdr;
-	if (ppcgot != 0
-	    && !get_section_covering(lte, ppcgot, &ppcgot_sec, &ppcgot_shdr))
-		// xxx should be the log out
-		fprintf(stderr,
-			"DT_PPC_GOT=%#" PRIx64 ", but no such section found.\n",
-			ppcgot);
-
-	if (ppcgot_sec != NULL) {
-		Elf_Data *data = loaddata(ppcgot_sec, &ppcgot_shdr);
-		if (data == NULL
-		    || data->d_size < 8 )
-			debug(1, "Couldn't read GOT data.");
-		else {
-			// where PPCGOT begins in .got
-			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
-			GElf_Addr glink_vma = read32be(data, offset + 4);
-			if (glink_vma != 0) {
-				debug(1, "PPC GOT glink_vma address: %#" PRIx64,
-				      glink_vma);
-				return glink_vma;
-			}
-		}
-	}
+	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
+				  &type_p, &type);
+}
 
-	if (plt_data != NULL) {
-		GElf_Addr glink_vma = read32be(plt_data, 0);
-		debug(1, ".plt glink_vma address: %#" PRIx64, glink_vma);
-		return glink_vma;
-	}
+struct section_named_data {
+	struct ltelf *lte;
+	const char *name;
+};
+
+static int
+name_p(Elf_Scn *scn, GElf_Shdr *shdr, void *d)
+{
+	struct section_named_data *data = d;
+	const char *name = elf_strptr(data->lte->elf,
+				      data->lte->ehdr.e_shstrndx,
+				      shdr->sh_name);
+	return strcmp(name, data->name) == 0;
+}
 
+int
+elf_get_section_named(struct ltelf *lte, const char *name,
+		     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr)
+{
+	struct section_named_data data = {
+		.lte = lte,
+		.name = name,
+	};
+	return elf_get_section_if(lte, tgt_sec, tgt_shdr,
+				  &name_p, &data);
+}
+
+static int
+need_data(Elf_Data *data, GElf_Xword offset, GElf_Xword size)
+{
+	assert(data != NULL);
+	if (data->d_size < size || offset > data->d_size - size) {
+		debug(1, "Not enough data to read %zd-byte value"
+		      " at offset %zd.", size, offset);
+		return -1;
+	}
 	return 0;
 }
 
+#define DEF_READER(NAME, SIZE)						\
+	int								\
+	NAME(Elf_Data *data, GElf_Xword offset, uint##SIZE##_t *retp)	\
+	{								\
+		if (!need_data(data, offset, SIZE / 8) < 0)		\
+			return -1;					\
+									\
+		if (data->d_buf == NULL) /* NODATA section */ {		\
+			*retp = 0;					\
+			return 0;					\
+		}							\
+									\
+		union {							\
+			uint##SIZE##_t dst;				\
+			char buf[0];					\
+		} u;							\
+		memcpy(u.buf, data->d_buf + offset, sizeof(u.dst));	\
+		*retp = u.dst;						\
+		return 0;						\
+	}
+
+DEF_READER(elf_read_u16, 16)
+DEF_READER(elf_read_u32, 32)
+DEF_READER(elf_read_u64, 64)
+
+#undef DEF_READER
+
 int
 open_elf(struct ltelf *lte, const char *filename)
 {
@@ -150,17 +224,22 @@ open_elf(struct ltelf *lte, const char *filename)
 	lte->elf = elf_begin(lte->fd, ELF_C_READ, NULL);
 #endif
 
-	if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF)
-		error(EXIT_FAILURE, 0, "Can't open ELF file \"%s\"", filename);
+	if (lte->elf == NULL || elf_kind(lte->elf) != ELF_K_ELF) {
+		fprintf(stderr, "\"%s\" is not an ELF file\n", filename);
+		exit(EXIT_FAILURE);
+	}
 
-	if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL)
-		error(EXIT_FAILURE, 0, "Can't read ELF header of \"%s\"",
-		      filename);
+	if (gelf_getehdr(lte->elf, &lte->ehdr) == NULL) {
+		fprintf(stderr, "can't read ELF header of \"%s\": %s\n",
+			filename, elf_errmsg(-1));
+		exit(EXIT_FAILURE);
+	}
 
-	if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN)
-		error(EXIT_FAILURE, 0,
-		      "\"%s\" is not an ELF executable nor shared library",
-		      filename);
+	if (lte->ehdr.e_type != ET_EXEC && lte->ehdr.e_type != ET_DYN) {
+		fprintf(stderr, "\"%s\" is neither an ELF executable"
+			" nor a shared library\n", filename);
+		exit(EXIT_FAILURE);
+	}
 
 	if ((lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS
 	     || lte->ehdr.e_machine != LT_ELF_MACHINE)
@@ -172,18 +251,58 @@ open_elf(struct ltelf *lte, const char *filename)
 	    && (lte->ehdr.e_ident[EI_CLASS] != LT_ELFCLASS3
 		|| lte->ehdr.e_machine != LT_ELF_MACHINE3)
 #endif
-	    )
-		error(EXIT_FAILURE, 0,
-		      "\"%s\" is ELF from incompatible architecture", filename);
+		) {
+		fprintf(stderr,
+			"\"%s\" is ELF from incompatible architecture\n",
+			filename);
+		exit(EXIT_FAILURE);
+	}
 
 	return 0;
 }
 
-int
-do_init_elf(struct ltelf *lte, const char *filename) {
+static void
+read_symbol_table(struct ltelf *lte, const char *filename,
+		  Elf_Scn *scn, GElf_Shdr *shdr, const char *name,
+		  Elf_Data **datap, size_t *countp, const char **strsp)
+{
+	*datap = elf_getdata(scn, NULL);
+	*countp = shdr->sh_size / shdr->sh_entsize;
+	if ((*datap == NULL || elf_getdata(scn, *datap) != NULL)
+	    && options.static_filter != NULL) {
+		fprintf(stderr, "Couldn't get data of section"
+			" %s from \"%s\": %s\n",
+			name, filename, elf_errmsg(-1));
+		exit(EXIT_FAILURE);
+	}
+
+	scn = elf_getscn(lte->elf, shdr->sh_link);
+	GElf_Shdr shdr2;
+	if (scn == NULL || gelf_getshdr(scn, &shdr2) == NULL) {
+		fprintf(stderr, "Couldn't get header of section"
+			" #%d from \"%s\": %s\n",
+			shdr2.sh_link, filename, elf_errmsg(-1));
+		exit(EXIT_FAILURE);
+	}
+
+	Elf_Data *data = elf_getdata(scn, NULL);
+	if (data == NULL || elf_getdata(scn, data) != NULL
+	    || shdr2.sh_size != data->d_size || data->d_off) {
+		fprintf(stderr, "Couldn't get data of section"
+			" #%d from \"%s\": %s\n",
+			shdr2.sh_link, filename, elf_errmsg(-1));
+		exit(EXIT_FAILURE);
+	}
+
+	*strsp = data->d_buf;
+}
+
+static int
+do_init_elf(struct ltelf *lte, const char *filename, GElf_Addr bias)
+{
 	int i;
 	GElf_Addr relplt_addr = 0;
-	size_t relplt_size = 0;
+	GElf_Addr soname_offset = 0;
 
 	debug(DEBUG_FUNCTION, "do_init_elf(filename=%s)", filename);
 	debug(1, "Reading ELF from %s...", filename);
@@ -191,8 +310,25 @@ do_init_elf(struct ltelf *lte, const char *filename) {
 	if (open_elf(lte, filename) < 0)
 		return -1;
 
-	Elf_Data *plt_data = NULL;
-	GElf_Addr ppcgot = 0;
+	/* Find out the base address.  */
+	{
+		GElf_Phdr phdr;
+		for (i = 0; gelf_getphdr (lte->elf, i, &phdr) != NULL; ++i) {
+			if (phdr.p_type == PT_LOAD) {
+				lte->base_addr = phdr.p_vaddr + bias;
+				break;
+			}
+		}
+	}
+
+	if (lte->base_addr == 0) {
+		fprintf(stderr, "Couldn't determine base address of %s\n",
+			filename);
+		return -1;
+	}
+
+	lte->bias = bias;
+	lte->entry_addr = lte->ehdr.e_entry + lte->bias;
 
 	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
 		Elf_Scn *scn;
@@ -200,68 +336,29 @@ do_init_elf(struct ltelf *lte, const char *filename) {
 		const char *name;
 
 		scn = elf_getscn(lte->elf, i);
-		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
-			error(EXIT_FAILURE, 0,
-			      "Couldn't get section header from \"%s\"",
-			      filename);
+		if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
+			fprintf(stderr,	"Couldn't get section #%d from"
+				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
+			exit(EXIT_FAILURE);
+		}
 
 		name = elf_strptr(lte->elf, lte->ehdr.e_shstrndx, shdr.sh_name);
-		if (name == NULL)
-			error(EXIT_FAILURE, 0,
-			      "Couldn't get section header from \"%s\"",
-			      filename);
+		if (name == NULL) {
+			fprintf(stderr,	"Couldn't get name of section #%d from"
+				" \"%s\": %s\n", i, filename, elf_errmsg(-1));
+			exit(EXIT_FAILURE);
+		}
 
 		if (shdr.sh_type == SHT_SYMTAB) {
-			Elf_Data *data;
-
-			lte->symtab = elf_getdata(scn, NULL);
-			lte->symtab_count = shdr.sh_size / shdr.sh_entsize;
-			if ((lte->symtab == NULL
-			     || elf_getdata(scn, lte->symtab) != NULL)
-			    && opt_x != NULL)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get .symtab data from \"%s\"",
-				      filename);
-
-			scn = elf_getscn(lte->elf, shdr.sh_link);
-			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get section header from \"%s\"",
-				      filename);
+			read_symbol_table(lte, filename,
+					  scn, &shdr, name, &lte->symtab,
+					  &lte->symtab_count, &lte->strtab);
 
-			data = elf_getdata(scn, NULL);
-			if (data == NULL || elf_getdata(scn, data) != NULL
-			    || shdr.sh_size != data->d_size || data->d_off)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get .strtab data from \"%s\"",
-				      filename);
-
-			lte->strtab = data->d_buf;
 		} else if (shdr.sh_type == SHT_DYNSYM) {
-			Elf_Data *data;
-
-			lte->dynsym = elf_getdata(scn, NULL);
-			lte->dynsym_count = shdr.sh_size / shdr.sh_entsize;
-			if (lte->dynsym == NULL
-			    || elf_getdata(scn, lte->dynsym) != NULL)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get .dynsym data from \"%s\"",
-				      filename);
+			read_symbol_table(lte, filename,
+					  scn, &shdr, name, &lte->dynsym,
+					  &lte->dynsym_count, &lte->dynstr);
 
-			scn = elf_getscn(lte->elf, shdr.sh_link);
-			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get section header from \"%s\"",
-				      filename);
-
-			data = elf_getdata(scn, NULL);
-			if (data == NULL || elf_getdata(scn, data) != NULL
-			    || shdr.sh_size != data->d_size || data->d_off)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get .dynstr data from \"%s\"",
-				      filename);
-
-			lte->dynstr = data->d_buf;
 		} else if (shdr.sh_type == SHT_DYNAMIC) {
 			Elf_Data *data;
 			size_t j;
@@ -270,135 +367,39 @@ do_init_elf(struct ltelf *lte, const char *filename) {
 			lte->dyn_sz = shdr.sh_size;
 
 			data = elf_getdata(scn, NULL);
-			if (data == NULL || elf_getdata(scn, data) != NULL)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get .dynamic data from \"%s\"",
-				      filename);
+			if (data == NULL || elf_getdata(scn, data) != NULL) {
+				fprintf(stderr, "Couldn't get .dynamic data"
+					" from \"%s\": %s\n",
+					filename, strerror(errno));
+				exit(EXIT_FAILURE);
+			}
 
 			for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
 				GElf_Dyn dyn;
 
-				if (gelf_getdyn(data, j, &dyn) == NULL)
-					error(EXIT_FAILURE, 0,
-					      "Couldn't get .dynamic data from \"%s\"",
-					      filename);
-#ifdef __mips__
-/**
-  MIPS ABI Supplement:
-
-  DT_PLTGOT This member holds the address of the .got section.
-
-  DT_MIPS_SYMTABNO This member holds the number of entries in the
-  .dynsym section.
-
-  DT_MIPS_LOCAL_GOTNO This member holds the number of local global
-  offset table entries.
-
-  DT_MIPS_GOTSYM This member holds the index of the first dyamic
-  symbol table entry that corresponds to an entry in the gobal offset
-  table.
-
- */
-				if(dyn.d_tag==DT_PLTGOT){
-					lte->pltgot_addr=dyn.d_un.d_ptr;
-				}
-				if(dyn.d_tag==DT_MIPS_LOCAL_GOTNO){
-					lte->mips_local_gotno=dyn.d_un.d_val;
+				if (gelf_getdyn(data, j, &dyn) == NULL) {
+					fprintf(stderr, "Couldn't get .dynamic"
+						" data from \"%s\": %s\n",
+						filename, strerror(errno));
+					exit(EXIT_FAILURE);
 				}
-				if(dyn.d_tag==DT_MIPS_GOTSYM){
-					lte->mips_gotsym=dyn.d_un.d_val;
-				}
-#endif // __mips__
 				if (dyn.d_tag == DT_JMPREL)
 					relplt_addr = dyn.d_un.d_ptr;
 				else if (dyn.d_tag == DT_PLTRELSZ)
-					relplt_size = dyn.d_un.d_val;
-				else if (dyn.d_tag == DT_PPC_GOT) {
-					ppcgot = dyn.d_un.d_val;
-					debug(1, "ppcgot %#" PRIx64, ppcgot);
-				}
-			}
-		} else if (shdr.sh_type == SHT_HASH) {
-			Elf_Data *data;
-			size_t j;
-
-			lte->hash_type = SHT_HASH;
-
-			data = elf_getdata(scn, NULL);
-			if (data == NULL || elf_getdata(scn, data) != NULL
-			    || data->d_off || data->d_size != shdr.sh_size)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get .hash data from \"%s\"",
-				      filename);
-
-			if (shdr.sh_entsize == 4) {
-				/* Standard conforming ELF.  */
-				if (data->d_type != ELF_T_WORD)
-					error(EXIT_FAILURE, 0,
-					      "Couldn't get .hash data from \"%s\"",
-					      filename);
-				lte->hash = (Elf32_Word *) data->d_buf;
-			} else if (shdr.sh_entsize == 8) {
-				/* Alpha or s390x.  */
-				Elf32_Word *dst, *src;
-				size_t hash_count = data->d_size / 8;
-
-				lte->hash = (Elf32_Word *)
-				    malloc(hash_count * sizeof(Elf32_Word));
-				if (lte->hash == NULL)
-					error(EXIT_FAILURE, 0,
-					      "Couldn't convert .hash section from \"%s\"",
-					      filename);
-				lte->lte_flags |= LTE_HASH_MALLOCED;
-				dst = lte->hash;
-				src = (Elf32_Word *) data->d_buf;
-				if ((data->d_type == ELF_T_WORD
-				     && __BYTE_ORDER == __BIG_ENDIAN)
-				    || (data->d_type == ELF_T_XWORD
-					&& lte->ehdr.e_ident[EI_DATA] ==
-					ELFDATA2MSB))
-					++src;
-				for (j = 0; j < hash_count; ++j, src += 2)
-					*dst++ = *src;
-			} else
-				error(EXIT_FAILURE, 0,
-				      "Unknown .hash sh_entsize in \"%s\"",
-				      filename);
-		} else if (shdr.sh_type == SHT_GNU_HASH
-			   && lte->hash == NULL) {
-			Elf_Data *data;
-
-			lte->hash_type = SHT_GNU_HASH;
-
-			if (shdr.sh_entsize != 0
-			    && shdr.sh_entsize != 4) {
-				error(EXIT_FAILURE, 0,
-				      ".gnu.hash sh_entsize in \"%s\" "
-					"should be 4, but is %#" PRIx64,
-					filename, shdr.sh_entsize);
+					lte->relplt_size = dyn.d_un.d_val;
+				else if (dyn.d_tag == DT_SONAME)
+					soname_offset = dyn.d_un.d_val;
 			}
-
-			data = loaddata(scn, &shdr);
-			if (data == NULL)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get .gnu.hash data from \"%s\"",
-				      filename);
-
-			lte->hash = (Elf32_Word *) data->d_buf;
 		} else if (shdr.sh_type == SHT_PROGBITS
 			   || shdr.sh_type == SHT_NOBITS) {
 			if (strcmp(name, ".plt") == 0) {
 				lte->plt_addr = shdr.sh_addr;
 				lte->plt_size = shdr.sh_size;
-				if (shdr.sh_flags & SHF_EXECINSTR) {
-					lte->lte_flags |= LTE_PLT_EXECUTABLE;
-				}
-				if (lte->ehdr.e_machine == EM_PPC) {
-					plt_data = loaddata(scn, &shdr);
-					if (plt_data == NULL)
-						fprintf(stderr,
-							"Can't load .plt data\n");
-				}
+				lte->plt_data = elf_loaddata(scn, &shdr);
+				if (lte->plt_data == NULL)
+					fprintf(stderr,
+						"Can't load .plt data\n");
+				lte->plt_flags = shdr.sh_flags;
 			}
 #ifdef ARCH_SUPPORTS_OPD
 			else if (strcmp(name, ".opd") == 0) {
@@ -410,449 +411,376 @@ do_init_elf(struct ltelf *lte, const char *filename) {
 		}
 	}
 
-	if (lte->dynsym == NULL || lte->dynstr == NULL)
-		error(EXIT_FAILURE, 0,
-		      "Couldn't find .dynsym or .dynstr in \"%s\"", filename);
+	if (lte->dynsym == NULL || lte->dynstr == NULL) {
+		fprintf(stderr, "Couldn't find .dynsym or .dynstr in \"%s\"\n",
+			filename);
+		exit(EXIT_FAILURE);
+	}
 
 	if (!relplt_addr || !lte->plt_addr) {
 		debug(1, "%s has no PLT relocations", filename);
 		lte->relplt = NULL;
 		lte->relplt_count = 0;
-	} else if (relplt_size == 0) {
+	} else if (lte->relplt_size == 0) {
 		debug(1, "%s has unknown PLT size", filename);
 		lte->relplt = NULL;
 		lte->relplt_count = 0;
 	} else {
-		if (lte->ehdr.e_machine == EM_PPC) {
-			GElf_Addr glink_vma
-				= get_glink_vma(lte, ppcgot, plt_data);
-
-			assert (relplt_size % 12 == 0);
-			size_t count = relplt_size / 12; // size of RELA entry
-			lte->plt_stub_vma = glink_vma
-				- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
-			debug(1, "stub_vma is %#" PRIx64, lte->plt_stub_vma);
-		}
 
 		for (i = 1; i < lte->ehdr.e_shnum; ++i) {
 			Elf_Scn *scn;
 			GElf_Shdr shdr;
 
 			scn = elf_getscn(lte->elf, i);
-			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL)
-				error(EXIT_FAILURE, 0,
-				      "Couldn't get section header from \"%s\"",
-				      filename);
+			if (scn == NULL || gelf_getshdr(scn, &shdr) == NULL) {
+				fprintf(stderr, "Couldn't get section header"
+					" from \"%s\": %s\n",
+					filename, elf_errmsg(-1));
+				exit(EXIT_FAILURE);
+			}
 			if (shdr.sh_addr == relplt_addr
-			    && shdr.sh_size == relplt_size) {
+			    && shdr.sh_size == lte->relplt_size) {
 				lte->relplt = elf_getdata(scn, NULL);
 				lte->relplt_count =
 				    shdr.sh_size / shdr.sh_entsize;
 				if (lte->relplt == NULL
-				    || elf_getdata(scn, lte->relplt) != NULL)
-					error(EXIT_FAILURE, 0,
-					      "Couldn't get .rel*.plt data from \"%s\"",
-					      filename);
+				    || elf_getdata(scn, lte->relplt) != NULL) {
+					fprintf(stderr, "Couldn't get .rel*.plt"
+						" data from \"%s\": %s\n",
+						filename, elf_errmsg(-1));
+					exit(EXIT_FAILURE);
+				}
 				break;
 			}
 		}
 
-		if (i == lte->ehdr.e_shnum)
-			error(EXIT_FAILURE, 0,
-			      "Couldn't find .rel*.plt section in \"%s\"",
-			      filename);
+		if (i == lte->ehdr.e_shnum) {
+			fprintf(stderr,
+				"Couldn't find .rel*.plt section in \"%s\"\n",
+				filename);
+			exit(EXIT_FAILURE);
+		}
 
 		debug(1, "%s %zd PLT relocations", filename, lte->relplt_count);
 	}
+
+	if (soname_offset != 0)
+		lte->soname = lte->dynstr + soname_offset;
+
 	return 0;
 }
 
+/* XXX temporarily non-static */
 void
 do_close_elf(struct ltelf *lte) {
 	debug(DEBUG_FUNCTION, "do_close_elf()");
-	if (lte->lte_flags & LTE_HASH_MALLOCED)
-		free((char *)lte->hash);
+	arch_elf_destroy(lte);
 	elf_end(lte->elf);
 	close(lte->fd);
 }
 
-static struct library_symbol *
-create_library_symbol(const char * name, GElf_Addr addr)
-{
-	size_t namel = strlen(name) + 1;
-	struct library_symbol * sym = calloc(sizeof(*sym) + namel, 1);
-	if (sym == NULL) {
-		perror("create_library_symbol");
-		return NULL;
-	}
-	sym->name = (char *)(sym + 1);
-	memcpy(sym->name, name, namel);
-	sym->enter_addr = (void *)(uintptr_t) addr;
-	return sym;
-}
-
-void
-add_library_symbol(GElf_Addr addr, const char *name,
-		   struct library_symbol **library_symbolspp,
-		   enum toplt type_of_plt, int is_weak)
+static int
+populate_plt(struct Process *proc, const char *filename,
+	     struct ltelf *lte, struct library *lib)
 {
-	struct library_symbol *s;
-
-	debug(DEBUG_FUNCTION, "add_library_symbol()");
-
-	s = create_library_symbol(name, addr);
-	if (s == NULL)
-		error(EXIT_FAILURE, errno, "add_library_symbol failed");
+	size_t i;
+	for (i = 0; i < lte->relplt_count; ++i) {
+		GElf_Rel rel;
+		GElf_Rela rela;
+		GElf_Sym sym;
+		void *ret;
 
-	s->needs_init = 1;
-	s->is_weak = is_weak;
-	s->plt_type = type_of_plt;
+		if (lte->relplt->d_type == ELF_T_REL) {
+			ret = gelf_getrel(lte->relplt, i, &rel);
+			rela.r_offset = rel.r_offset;
+			rela.r_info = rel.r_info;
+			rela.r_addend = 0;
+		} else {
+			ret = gelf_getrela(lte->relplt, i, &rela);
+		}
 
-	s->next = *library_symbolspp;
-	*library_symbolspp = s;
+		if (ret == NULL
+		    || ELF64_R_SYM(rela.r_info) >= lte->dynsym_count
+		    || gelf_getsym(lte->dynsym, ELF64_R_SYM(rela.r_info),
+				   &sym) == NULL) {
+			fprintf(stderr,
+				"Couldn't get relocation from \"%s\": %s\n",
+				filename, elf_errmsg(-1));
+			exit(EXIT_FAILURE);
+		}
 
-	debug(2, "addr: %p, symbol: \"%s\"", (void *)(uintptr_t) addr, name);
-}
+		char const *name = lte->dynstr + sym.st_name;
 
-struct library_symbol *
-clone_library_symbol(struct library_symbol * sym)
-{
-	struct library_symbol * copy
-		= create_library_symbol(sym->name,
-					(GElf_Addr)(uintptr_t)sym->enter_addr);
-	if (copy == NULL)
-		return NULL;
-
-	copy->needs_init = sym->needs_init;
-	copy->is_weak = sym->is_weak;
-	copy->plt_type = sym->plt_type;
+		if (!filter_matches_symbol(options.plt_filter, name, lib))
+			continue;
 
-	return copy;
+		struct library_symbol *libsym = NULL;
+		switch (arch_elf_add_plt_entry(proc, lte, name,
+					       &rela, i, &libsym)) {
+		case plt_default:
+			if (default_elf_add_plt_entry(proc, lte, name,
+						      &rela, i, &libsym) < 0)
+			/* fall-through */
+		case plt_fail:
+				return -1;
+			/* fall-through */
+		case plt_ok:
+			if (libsym != NULL)
+				library_add_symbol(lib, libsym);
+		}
+	}
+	return 0;
 }
 
-void
-destroy_library_symbol(struct library_symbol * sym)
-{
-	free(sym);
-}
+/* When -x rules result in request to trace several aliases, we only
+ * want to add such symbol once.  The only way that those symbols
+ * differ in is their name, e.g. in glibc you have __GI___libc_free,
+ * __cfree, __free, __libc_free, cfree and free all defined on the
+ * same address.  So instead we keep this unique symbol struct for
+ * each address, and replace name in libsym with a shorter variant if
+ * we find it.  */
+struct unique_symbol {
+	target_address_t addr;
+	struct library_symbol *libsym;
+};
 
-void
-destroy_library_symbol_chain(struct library_symbol * sym)
+static int
+unique_symbol_cmp(const void *key, const void *val)
 {
-	while (sym != NULL) {
-		struct library_symbol * next = sym->next;
-		destroy_library_symbol(sym);
-		sym = next;
-	}
-}
-
-/* stolen from elfutils-0.123 */
-static unsigned long
-private_elf_gnu_hash(const char *name) {
-	unsigned long h = 5381;
-	const unsigned char *string = (const unsigned char *)name;
-	unsigned char c;
-	for (c = *string; c; c = *++string)
-		h = h * 33 + c;
-	return h & 0xffffffff;
+	const struct unique_symbol *sym_key = key;
+	const struct unique_symbol *sym_val = val;
+	return sym_key->addr != sym_val->addr;
 }
 
 static int
-symbol_matches(struct ltelf *lte, size_t lte_i, GElf_Sym *sym,
-	       size_t symidx, const char *name)
+populate_this_symtab(struct Process *proc, const char *filename,
+		     struct ltelf *lte, struct library *lib,
+		     Elf_Data *symtab, const char *strtab, size_t size)
 {
-	GElf_Sym tmp_sym;
-	GElf_Sym *tmp;
-
-	tmp = (sym) ? (sym) : (&tmp_sym);
-
-	if (gelf_getsym(lte[lte_i].dynsym, symidx, tmp) == NULL)
-		error(EXIT_FAILURE, 0, "Couldn't get symbol from .dynsym");
-	else {
-		tmp->st_value += lte[lte_i].base_addr;
-		debug(2, "symbol found: %s, %zd, %#" PRIx64,
-		      name, lte_i, tmp->st_value);
+	/* Using sorted array would be arguably better, but this
+	 * should be well enough for the number of symbols that we
+	 * typically deal with.  */
+	size_t num_symbols = 0;
+	struct unique_symbol *symbols = malloc(sizeof(*symbols) * size);
+	if (symbols == NULL) {
+		fprintf(stderr, "couldn't insert symbols for -x: %s\n",
+			strerror(errno));
+		return -1;
 	}
-	return tmp->st_value != 0
-		&& tmp->st_shndx != SHN_UNDEF
-		&& strcmp(name, lte[lte_i].dynstr + tmp->st_name) == 0;
-}
 
-int
-in_load_libraries(const char *name, struct ltelf *lte, size_t count, GElf_Sym *sym) {
+	GElf_Word secflags[lte->ehdr.e_shnum];
 	size_t i;
-	unsigned long hash;
-	unsigned long gnu_hash;
-
-	if (!count)
-		return 1;
-
-#ifdef ELF_HASH_TAKES_SIGNED_CHAR
-	hash = elf_hash(name);
-#else
-	hash = elf_hash((const unsigned char *)name);
-#endif
-	gnu_hash = private_elf_gnu_hash(name);
-
-	for (i = 0; i < count; ++i) {
-		if (lte[i].hash == NULL)
+	for (i = 1; i < lte->ehdr.e_shnum; ++i) {
+		Elf_Scn *scn = elf_getscn(lte->elf, i);
+		if (scn == NULL)
 			continue;
-
-		if (lte[i].hash_type == SHT_GNU_HASH) {
-			Elf32_Word * hashbase = lte[i].hash;
-			Elf32_Word nbuckets = *hashbase++;
-			Elf32_Word symbias = *hashbase++;
-			Elf32_Word bitmask_nwords = *hashbase++;
-			Elf32_Word * buckets;
-			Elf32_Word * chain_zero;
-			Elf32_Word bucket;
-
-			// +1 for skipped `shift'
-			hashbase += lte[i].ehdr.e_ident[EI_CLASS] * bitmask_nwords + 1;
-			buckets = hashbase;
-			hashbase += nbuckets;
-			chain_zero = hashbase - symbias;
-			bucket = buckets[gnu_hash % nbuckets];
-
-			if (bucket != 0) {
-				const Elf32_Word *hasharr = &chain_zero[bucket];
-				do
-					if ((*hasharr & ~1u) == (gnu_hash & ~1u)) {
-						int symidx = hasharr - chain_zero;
-						if (symbol_matches(lte, i,
-								   sym, symidx,
-								   name))
-							return 1;
-					}
-				while ((*hasharr++ & 1u) == 0);
-			}
-		} else {
-			Elf32_Word nbuckets, symndx;
-			Elf32_Word *buckets, *chain;
-			nbuckets = lte[i].hash[0];
-			buckets = &lte[i].hash[2];
-			chain = &lte[i].hash[2 + nbuckets];
-
-			for (symndx = buckets[hash % nbuckets];
-			     symndx != STN_UNDEF; symndx = chain[symndx])
-				if (symbol_matches(lte, i, sym, symndx, name))
-					return 1;
-		}
+		GElf_Shdr shdr;
+		if (gelf_getshdr(scn, &shdr) == NULL)
+			continue;
+		secflags[i] = shdr.sh_flags;
 	}
-	return 0;
-}
-
-static GElf_Addr
-opd2addr(struct ltelf *lte, GElf_Addr addr) {
-#ifdef ARCH_SUPPORTS_OPD
-	unsigned long base, offset;
-
-	if (!lte->opd)
-		return addr;
-
-	base = (unsigned long)lte->opd->d_buf;
-	offset = (unsigned long)addr - (unsigned long)lte->opd_addr;
-	if (offset > lte->opd_size)
-		error(EXIT_FAILURE, 0, "static plt not in .opd");
-
-	return *(GElf_Addr*)(base + offset);
-#else //!ARCH_SUPPORTS_OPD
-	return addr;
-#endif
-}
 
-struct library_symbol *
-read_elf(Process *proc, GElf_Addr *entryp)
-{
-	struct ltelf lte[MAX_LIBRARIES + 1];
-	size_t i;
-	struct opt_x_t *xptr;
-	struct opt_x_t *opt_x_loc = opt_x;
-	struct library_symbol **lib_tail = NULL;
-	int exit_out = 0;
-	int count = 0;
+	size_t lib_len = strlen(lib->soname);
+	for (i = 0; i < size; ++i) {
+		GElf_Sym sym;
+		if (gelf_getsym(symtab, i, &sym) == NULL) {
+		fail:
+			fprintf(stderr,
+				"couldn't get symbol #%zd from %s: %s\n",
+				i, filename, elf_errmsg(-1));
+			continue;
+		}
 
-	debug(DEBUG_FUNCTION, "read_elf(file=%s)", proc->filename);
+		/* XXX support IFUNC as well.  */
+		if (GELF_ST_TYPE(sym.st_info) != STT_FUNC
+		    || sym.st_value == 0)
+			continue;
 
-	memset(lte, 0, sizeof(*lte));
-	library_symbols = NULL;
-	library_num = 0;
-	proc->libdl_hooked = 0;
+		const char *orig_name = strtab + sym.st_name;
+		const char *version = strchr(orig_name, '@');
+		size_t len = version != NULL ? (assert(version > orig_name),
+						(size_t)(version - orig_name))
+			: strlen(orig_name);
+		char name[len + 1];
+		memcpy(name, orig_name, len);
+		name[len] = 0;
 
-	if (do_init_elf(lte, proc->filename))
-		return NULL;
+		if (!filter_matches_symbol(options.static_filter, name, lib))
+			continue;
 
-	memcpy(&main_lte, lte, sizeof(struct ltelf));
+		target_address_t addr = (target_address_t)
+			(uintptr_t)(sym.st_value + lte->bias);
+		target_address_t naddr;
+
+		/* On arches that support OPD, the value of typical
+		 * function symbol will be a pointer to .opd, but some
+		 * will point directly to .text.  We don't want to
+		 * translate those.  */
+		if (secflags[sym.st_shndx] & SHF_EXECINSTR) {
+			naddr = addr;
+		} else if (arch_translate_address(lte, addr, &naddr) < 0) {
+			fprintf(stderr,
+				"couldn't translate address of %s@%s: %s\n",
+				name, lib->soname, strerror(errno));
+			continue;
+		}
 
-	if (opt_p && opt_p->pid > 0) {
-		linkmap_init(proc, lte);
-		proc->libdl_hooked = 1;
-	}
+		char *full_name;
+		if (lib->type != LT_LIBTYPE_MAIN) {
+			full_name = malloc(strlen(name) + 1 + lib_len + 1);
+			if (full_name == NULL)
+				goto fail;
+			sprintf(full_name, "%s@%s", name, lib->soname);
+		} else {
+			full_name = strdup(name);
+			if (full_name == NULL)
+				goto fail;
+		}
 
-	proc->e_machine = lte->ehdr.e_machine;
+		/* Look whether we already have a symbol for this
+		 * address.  If not, add this one.  */
+		struct unique_symbol key = { naddr, NULL };
+		struct unique_symbol *unique
+			= lsearch(&key, symbols, &num_symbols,
+				  sizeof(*symbols), &unique_symbol_cmp);
+
+		if (unique->libsym == NULL) {
+			struct library_symbol *libsym = malloc(sizeof(*libsym));
+			if (libsym == NULL
+			    || library_symbol_init(libsym, naddr, full_name,
+						   1, LS_TOPLT_NONE) < 0) {
+				--num_symbols;
+				goto fail;
+			}
+			unique->libsym = libsym;
+			unique->addr = naddr;
 
-	for (i = 0; i < library_num; ++i) {
-		if (do_init_elf(&lte[i + 1], library[i]))
-			error(EXIT_FAILURE, errno, "Can't open \"%s\"",
-			      library[i]);
-	}
+		} else if (strlen(full_name) < strlen(unique->libsym->name)) {
+			library_symbol_set_name(unique->libsym, full_name, 1);
 
-	if (!options.no_plt) {
-#ifdef __mips__
-		// MIPS doesn't use the PLT and the GOT entries get changed
-		// on startup.
-		for(i=lte->mips_gotsym; i<lte->dynsym_count;i++){
-			GElf_Sym sym;
-			const char *name;
-			GElf_Addr addr = arch_plt_sym_val(lte, i, 0);
-			if (gelf_getsym(lte->dynsym, i, &sym) == NULL){
-				error(EXIT_FAILURE, 0,
-						"Couldn't get relocation from \"%s\"",
-						proc->filename);
-			}
-			name=lte->dynstr+sym.st_name;
-			if(ELF64_ST_TYPE(sym.st_info) != STT_FUNC){
-				debug(2,"sym %s not a function",name);
-				continue;
-			}
-			add_library_symbol(addr, name, &library_symbols, 0,
-					ELF64_ST_BIND(sym.st_info) != 0);
-			if (!lib_tail)
-				lib_tail = &(library_symbols->next);
+		} else {
+			free(full_name);
 		}
-#else
-		for (i = 0; i < lte->relplt_count; ++i) {
-			GElf_Rel rel;
-			GElf_Rela rela;
-			GElf_Sym sym;
-			GElf_Addr addr;
-			void *ret;
-			const char *name;
-
-			if (lte->relplt->d_type == ELF_T_REL) {
-				ret = gelf_getrel(lte->relplt, i, &rel);
-				rela.r_offset = rel.r_offset;
-				rela.r_info = rel.r_info;
-				rela.r_addend = 0;
-			} else
-				ret = gelf_getrela(lte->relplt, i, &rela);
-
-			if (ret == NULL
-					|| ELF64_R_SYM(rela.r_info) >= lte->dynsym_count
-					|| gelf_getsym(lte->dynsym, ELF64_R_SYM(rela.r_info),
-						&sym) == NULL)
-				error(EXIT_FAILURE, 0,
-						"Couldn't get relocation from \"%s\"",
-						proc->filename);
-
-			name = lte->dynstr + sym.st_name;
-			count = library_num ? library_num+1 : 0;
-
-			if (in_load_libraries(name, lte, count, NULL)) {
-				enum toplt pltt;
-				if (sym.st_value == 0 && lte->plt_stub_vma != 0) {
-					pltt = LS_TOPLT_EXEC;
-					addr = lte->plt_stub_vma + PPC_PLT_STUB_SIZE * i;
-				}
-				else {
-					pltt = PLTS_ARE_EXECUTABLE(lte)
-						?  LS_TOPLT_EXEC : LS_TOPLT_POINT;
-					addr = arch_plt_sym_val(lte, i, &rela);
-				}
+	}
 
-				add_library_symbol(addr, name, &library_symbols, pltt,
-						ELF64_ST_BIND(sym.st_info) == STB_WEAK);
-				if (!lib_tail)
-					lib_tail = &(library_symbols->next);
-			}
-		}
-#endif // !__mips__
-	} else {
-		lib_tail = &library_symbols;
+	for (i = 0; i < num_symbols; ++i) {
+		assert(symbols[i].libsym != NULL);
+		library_add_symbol(lib, symbols[i].libsym);
 	}
 
-	for (i = 0; i < lte->symtab_count; ++i) {
-		GElf_Sym sym;
-		GElf_Addr addr;
-		const char *name;
+	free(symbols);
 
-		if (gelf_getsym(lte->symtab, i, &sym) == NULL)
-			error(EXIT_FAILURE, 0,
-			      "Couldn't get symbol from \"%s\"",
-			      proc->filename);
+	return 0;
+}
 
-		name = lte->strtab + sym.st_name;
-		addr = sym.st_value;
-		if (!addr)
-			continue;
+static int
+populate_symtab(struct Process *proc, const char *filename,
+		struct ltelf *lte, struct library *lib)
+{
+	if (lte->symtab != NULL && lte->strtab != NULL)
+		return populate_this_symtab(proc, filename, lte, lib,
+					    lte->symtab, lte->strtab,
+					    lte->symtab_count);
+	else
+		return populate_this_symtab(proc, filename, lte, lib,
+					    lte->dynsym, lte->dynstr,
+					    lte->dynsym_count);
+}
 
-		for (xptr = opt_x_loc; xptr; xptr = xptr->next)
-			if (xptr->name && strcmp(xptr->name, name) == 0) {
-				/* FIXME: Should be able to use &library_symbols as above.  But
-				   when you do, none of the real library symbols cause breaks. */
-				add_library_symbol(opd2addr(lte, addr),
-						   name, lib_tail, LS_TOPLT_NONE, 0);
-				xptr->found = 1;
-				break;
-			}
+int
+ltelf_read_library(struct library *lib, struct Process *proc,
+		   const char *filename, GElf_Addr bias)
+{
+	struct ltelf lte = {};
+	if (do_init_elf(&lte, filename, bias) < 0)
+		return -1;
+	if (arch_elf_init(&lte, lib) < 0) {
+		fprintf(stderr, "Backend initialization failed.\n");
+		return -1;
 	}
 
-	unsigned found_count = 0;
+	proc->e_machine = lte.ehdr.e_machine;
 
-	for (xptr = opt_x_loc; xptr; xptr = xptr->next) {
-		if (xptr->found)
-			continue;
+	int status = 0;
+	if (lib == NULL)
+		goto fail;
 
-		GElf_Sym sym;
-		GElf_Addr addr;
-		if (in_load_libraries(xptr->name, lte, library_num+1, &sym)) {
-			debug(2, "found symbol %s @ %#" PRIx64 ", adding it.",
-					xptr->name, sym.st_value);
-			addr = sym.st_value;
-			if (ELF32_ST_TYPE (sym.st_info) == STT_FUNC) {
-				add_library_symbol(addr, xptr->name, lib_tail, LS_TOPLT_NONE, 0);
-				xptr->found = 1;
-				found_count++;
-			}
-		}
-		if (found_count == opt_x_cnt){
-			debug(2, "done, found everything: %d\n", found_count);
-			break;
-		}
+	/* Note that we set soname and pathname as soon as they are
+	 * allocated, so in case of further errors, this get released
+	 * when LIB is release, which should happen in the caller when
+	 * we return error.  */
+
+	if (lib->pathname == NULL) {
+		char *pathname = strdup(filename);
+		if (pathname == NULL)
+			goto fail;
+		library_set_pathname(lib, pathname, 1);
 	}
 
-	if (lte->ehdr.e_entry != 0) {
-		*entryp = opd2addr(lte, lte->ehdr.e_entry);
+	if (lte.soname != NULL) {
+		char *soname = strdup(lte.soname);
+		if (soname == NULL)
+			goto fail;
+		library_set_soname(lib, soname, 1);
 	} else {
+		const char *soname = rindex(lib->pathname, '/') + 1;
+		if (soname == NULL)
+			soname = lib->pathname;
+		library_set_soname(lib, soname, 0);
 	}
 
-	for (xptr = opt_x_loc; xptr; xptr = xptr->next)
-		if ( ! xptr->found) {
-			char *badthing = "WARNING";
-#ifdef PLT_REINITALISATION_BP
-			if (strcmp(xptr->name, PLTs_initialized_by_here) == 0) {
-				if (lte->ehdr.e_entry) {
-					fprintf (stderr, "WARNING: Using e_ent"
-						 "ry from elf header (%p) for "
-						 "address of \"%s\"\n", (void*)
-						 (long) lte->ehdr.e_entry,
-						 PLTs_initialized_by_here);
-					continue;
-				}
-				badthing = "ERROR";
-				exit_out = 1;
-			}
-#endif
-			fprintf (stderr,
-				 "%s: Couldn't find symbol \"%s\" in file \"%s\" assuming it will be loaded by libdl!"
-				 "\n", badthing, xptr->name, proc->filename);
-		}
-	if (exit_out) {
-		exit (1);
-	}
+	/* XXX The double cast should be removed when
+	 * target_address_t becomes integral type.  */
+	target_address_t entry = (target_address_t)(uintptr_t)lte.entry_addr;
+	if (arch_translate_address(&lte, entry, &entry) < 0)
+		goto fail;
+
+	/* XXX The double cast should be removed when
+	 * target_address_t becomes integral type.  */
+	lib->base = (target_address_t)(uintptr_t)lte.base_addr;
+	lib->entry = entry;
+	/* XXX The double cast should be removed when
+	 * target_address_t becomes integral type.  */
+	lib->dyn_addr = (target_address_t)(uintptr_t)lte.dyn_addr;
+
+	if (filter_matches_library(options.plt_filter, lib)
+	    && populate_plt(proc, filename, &lte, lib) < 0)
+		goto fail;
+
+	if (filter_matches_library(options.static_filter, lib)
+	    && populate_symtab(proc, filename, &lte, lib) < 0)
+		goto fail;
+
+done:
+	do_close_elf(&lte);
+	return status;
+
+fail:
+	status = -1;
+	goto done;
+}
 
-	for (i = 0; i < library_num + 1; ++i)
-		do_close_elf(&lte[i]);
+struct library *
+ltelf_read_main_binary(struct Process *proc, const char *path)
+{
+	struct library *lib = malloc(sizeof(*lib));
+	if (lib == NULL)
+		return NULL;
+	library_init(lib, LT_LIBTYPE_MAIN);
+	library_set_pathname(lib, path, 0);
+
+	/* There is a race between running the process and reading its
+	 * binary for internal consumption.  So open the binary from
+	 * the /proc filesystem.  XXX Note that there is similar race
+	 * for libraries, but there we don't have a nice answer like
+	 * that.  Presumably we could read the DSOs from the process
+	 * memory image, but that's not currently done.  */
+	char *fname = pid2name(proc->pid);
+	if (ltelf_read_library(lib, proc, fname, 0) < 0) {
+		library_destroy(lib);
+		free(lib);
+		return NULL;
+	}
 
-	return library_symbols;
+	return lib;
 }
diff --git a/ltrace-elf.h b/ltrace-elf.h
index 4da8a0a..64d1cb8 100644
--- a/ltrace-elf.h
+++ b/ltrace-elf.h
@@ -3,7 +3,18 @@
 
 #include <gelf.h>
 #include <stdlib.h>
+#include "sysdep.h"
 
+struct Process;
+struct library;
+struct library_symbol;
+
+/* XXX Ok, the original idea was to separate the low-level ELF data
+ * from the abstract "struct library" object, but we use some of the
+ * following extensively in the back end.  Not all though.  So what we
+ * use should be move to struct library, and the rest of this
+ * structure maybe could be safely hidden in .c.  How to integrate the
+ * arch-specific bits into struct library is unclear as of now.  */
 struct ltelf {
 	int fd;
 	Elf *elf;
@@ -12,46 +23,59 @@ struct ltelf {
 	size_t dynsym_count;
 	const char *dynstr;
 	GElf_Addr plt_addr;
+	GElf_Word plt_flags;
 	size_t plt_size;
 	Elf_Data *relplt;
+	Elf_Data *plt_data;
 	size_t relplt_count;
 	Elf_Data *symtab;
 	const char *strtab;
+	const char *soname;
 	size_t symtab_count;
 	Elf_Data *opd;
 	GElf_Addr *opd_addr;
 	size_t opd_size;
-	Elf32_Word *hash;
-	int hash_type;
-	int lte_flags;
 	GElf_Addr dyn_addr;
 	size_t dyn_sz;
+	size_t relplt_size;
+	GElf_Addr bias;
+	GElf_Addr entry_addr;
 	GElf_Addr base_addr;
-#ifdef __mips__
-	size_t pltgot_addr;
-	size_t mips_local_gotno;
-	size_t mips_gotsym;
-#endif // __mips__
-	GElf_Addr plt_stub_vma;
+	struct arch_ltelf_data arch;
 };
 
-#define ELF_MAX_SEGMENTS  50
-#define LTE_HASH_MALLOCED 1
-#define LTE_PLT_EXECUTABLE 2
+int open_elf(struct ltelf *lte, const char *filename);
 
-#define PLTS_ARE_EXECUTABLE(lte) ((lte->lte_flags & LTE_PLT_EXECUTABLE) != 0)
+/* XXX is it possible to put breakpoints in VDSO and VSYSCALL
+ * pseudo-libraries?  For now we assume that all libraries can be
+ * opened via a filesystem.  BASE is ignored for ET_EXEC files.  */
+int ltelf_read_library(struct library *lib, struct Process *proc,
+		       const char *filename, GElf_Addr bias);
 
-extern size_t library_num;
-extern char *library[MAX_LIBRARIES];
+/* Create a library object representing the main binary.  The entry
+ * point address is stored to *ENTRYP.  */
+struct library *ltelf_read_main_binary(struct Process *proc, const char *path);
 
-extern int open_elf(struct ltelf *lte, const char *filename);
-extern struct library_symbol *read_elf(Process *proc, GElf_Addr *entryp);
+GElf_Addr arch_plt_sym_val(struct ltelf *, size_t, GElf_Rela *);
 
-extern GElf_Addr arch_plt_sym_val(struct ltelf *, size_t, GElf_Rela *);
+Elf_Data *elf_loaddata(Elf_Scn *scn, GElf_Shdr *shdr);
+int elf_get_section_covering(struct ltelf *lte, GElf_Addr addr,
+			     Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr);
+int elf_get_section_type(struct ltelf *lte, GElf_Word type,
+			 Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr);
+int elf_get_section_named(struct ltelf *lte, const char *name,
+			  Elf_Scn **tgt_sec, GElf_Shdr *tgt_shdr);
 
-#ifndef SHT_GNU_HASH
-#define SHT_GNU_HASH	0x6ffffff6	/* GNU-style hash table. */
-#endif
+/* Read, respectively, 2, 4, or 8 bytes from Elf data at given OFFSET,
+ * and store it in *RETP.  Returns 0 on success or a negative value if
+ * there's not enough data.  */
+int elf_read_u16(Elf_Data *data, GElf_Xword offset, uint16_t *retp);
+int elf_read_u32(Elf_Data *data, GElf_Xword offset, uint32_t *retp);
+int elf_read_u64(Elf_Data *data, GElf_Xword offset, uint64_t *retp);
+
+int default_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
+			      const char *a_name, GElf_Rela *rela, size_t ndx,
+			      struct library_symbol **ret);
 
 #if __WORDSIZE == 32
 #define PRI_ELF_ADDR		PRIx32
diff --git a/ltrace.1 b/ltrace.1
index 4d320e6..3a908be 100644
--- a/ltrace.1
+++ b/ltrace.1
@@ -1,3 +1,4 @@
+.\" Copyright (c) 2012 Petr Machata, Red Hat Inc.
 .\" Copyright (c) 1997-2005 Juan Cespedes <cespedes@debian.org>
 .\" This file is covered by the GNU GPL
 .TH ltrace 1
@@ -64,22 +65,12 @@ carries upon a traced process
 DEBUG_FUNCTION.  Shows every entry to internal functions
 .RE
 .TP
-.I \-e expr
-A qualifying expression which modifies which events to trace.
-The format of the expression is:
-.br
-[!]value1[,value2]...
-.br
-where the values are the functions to trace.  Using an exclamation
-mark negates the set of values.  For example
-.I \-e printf
-means to trace only the printf library call.  By contrast,
-.I \-e !printf
-means to trace every library call except printf.
-.IP
-Note that some shells use the exclamation point for history
-expansion; even inside quoted arguments.  If so, you must escape
-the exclamation point with a backslash.
+.I \-e filter
+A qualifying expression which modifies which library calls to trace.
+The format of the filter expression is described in the section
+\fBFILTER EXPRESSIONS\fR.  If more than one \-e option appears on the
+command line, the library calls that match any of them are traced.  If
+no \-e is given, \fB@MAIN\fR is assumed as a default.
 .TP
 .I \-f
 Trace child processes as they are created by
@@ -175,16 +166,72 @@ is set at
 which must be an external function.  By default, '_start' is used.
 NOTE: this flag is only available on the architectures that need it.
 .TP
-.I \-x extern
-Trace the external function
-.IR extern .
-This option will search the symbol table and lib-dl loaded libraries when
-attempting to match the given symbol name.
-This option may be repeated.
+.I \-x filter
+A qualifying expression which modifies which symbol table entry points
+to trace.  The format of the filter expression is described in the
+section \fBFILTER EXPRESSIONS\fR.  If more than one \-x option appears
+on the command line, the symbols that match any of them are traced.
+No entry points are traced if no \-x is given.
 .TP
 .I \-V, \-\-version
 Show the version number of ltrace and exit.
 
+.SH FILTER EXPRESSIONS
+
+Filter expression is a chain of glob- or regexp-based rules that are
+used to pick symbols for tracing from libraries that the process uses.
+Most of it is intuitive, so as an example, the following would trace
+calls to malloc and free, except those done by libc:
+
+-e malloc+free-@libc.so*
+
+This reads: trace malloc and free, but don't trace anything that comes
+from libc.  Semi-formally, the syntax of the above example looks
+approximately like this:
+
+{[+-][\fIsymbol pattern\fR][@\fIlibrary pattern\fR]}
+
+\fISymbol pattern\fR is used to match symbol names, \fIlibrary
+pattern\fR to match library SONAMEs.  Both are implicitly globs, but
+can be regular expressions as well (see below).  The glob syntax
+supports meta-characters \fB*\fR and \fB?\fR and character classes,
+similarly to what basic bash globs support.  \fB^\fR and \fB$\fR are
+recognized to mean, respectively, start and end of given name.
+
+Both \fIsymbol pattern\fR and \fIlibrary pattern\fR have to match the
+whole name.  If you want to match only a part of name, surround it
+with one or two *'s as appropriate.  The exception is if the pattern
+is not mentioned at all, in which case it's as if the corresponding
+pattern were \fB*\fR.  (So \fBmalloc\fR is really \fBmalloc@*\fR and
+\fB@libc.*\fR is really \fB*@libc.*\fR.)
+
+In libraries that don't have an explicit SONAME, basename is taken for
+SONAME.  That holds for main binary as well: \fB/bin/echo\fR has an
+implicit SONAME of \fBecho\fR.  In addition to that, special library
+pattern \fBMAIN\fR always matches symbols in the main binary and never
+a library with actual SONAME \fBMAIN\fR (use e.g. \fB^MAIN\fR or
+\fB[M]AIN\fR for that).
+
+If the symbol or library pattern is surrounded in slashes (/like
+this/), then it is considered a regular expression instead.  As a
+shorthand, instead of writing \fB/x/@/y/\fR, you can write
+\fB/x@y/\fR.
+
+If the library pattern starts with a slash, it is not a SONAME
+expression, but a path expression, and is matched against the library
+path name.
+
+The first rule may lack a sign, in which case \fB+\fR is assumed.  If,
+on the other hand, the first rule has a \fB-\fR sign, it is as if
+there was another rule \fB@*\fR in front of it.
+
+The above rules are used to construct the set of traced symbols.  Each
+candidate symbol is passed through the chain of above rules.
+Initially, the symbol is \fIunmarked\fR.  If it symbol matches a
+\fB+\fR rule, it becomes \fImarked\fR, if it matches a \fB-\fR rule,
+it becomes \fIunmarked\fR.  If, after applying all rules, the symbol
+is \fImarked\fR, it will be traced.
+
 .SH BUGS
 It has most of the bugs stated in
 .BR strace(1) .
@@ -212,6 +259,8 @@ Personal config file, overrides
 
 .SH AUTHOR
 Juan Cespedes <cespedes@debian.org>
+.br
+Petr Machata <pmachata@redhat.com>
 
 .SH "SEE ALSO"
 .BR strace(1) ,
diff --git a/ltrace.h b/ltrace.h
index 194704d..fe3d6ed 100644
--- a/ltrace.h
+++ b/ltrace.h
@@ -1,3 +1,6 @@
+#ifndef _LTRACE_H_
+#define _LTRACE_H_
+
 typedef enum Event_type Event_type;
 enum Event_type {
 	EVENT_NONE=0,
@@ -18,11 +21,10 @@ enum Event_type {
 	EVENT_MAX
 };
 
-typedef struct Process Process;
 typedef struct Event Event;
 struct Event {
 	struct Event * next;
-	Process * proc;
+	struct Process * proc;
 	Event_type type;
 	union {
 		int ret_val;     /* EVENT_EXIT */
@@ -38,3 +40,5 @@ typedef void (*callback_func) (Event *);
 extern void ltrace_init(int argc, char **argv);
 extern void ltrace_add_callback(callback_func f, Event_type type);
 extern void ltrace_main(void);
+
+#endif /* _LTRACE_H_ */
diff --git a/options.c b/options.c
index 74c28bd..d5edc1a 100644
--- a/options.c
+++ b/options.c
@@ -1,16 +1,19 @@
 #include "config.h"
 
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <limits.h>
 #include <sys/ioctl.h>
-
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
 #include <getopt.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
 
 #include "common.h"
+#include "filter.h"
+#include "glob.h"
 
 #ifndef SYSCONFDIR
 #define SYSCONFDIR "/etc"
@@ -23,7 +26,6 @@ struct options_t options = {
 	.align    = DEFAULT_ALIGN,    /* alignment column for results */
 	.user     = NULL,             /* username to run command as */
 	.syscalls = 0,                /* display syscalls */
-	.libcalls = 1,                /* display library calls */
 #ifdef USE_DEMANGLE
 	.demangle = 0,                /* Demangle low-level symbol names */
 #endif
@@ -36,8 +38,6 @@ struct options_t options = {
 	.follow = 0,                  /* trace child processes */
 };
 
-char *library[MAX_LIBRARIES];
-size_t library_num = 0;
 static char *progname;		/* Program name (`ltrace') */
 int opt_i = 0;			/* instruction pointer */
 int opt_r = 0;			/* print relative timestamp */
@@ -47,14 +47,6 @@ int opt_T = 0;			/* show the time spent inside each call */
 /* List of pids given to option -p: */
 struct opt_p_t *opt_p = NULL;	/* attach to process with a given pid */
 
-/* List of function names given to option -e: */
-struct opt_e_t *opt_e = NULL;
-int opt_e_enable = 1;
-
-/* List of global function names given to -x: */
-struct opt_x_t *opt_x = NULL;
-unsigned int opt_x_cnt = 0;
-
 /* List of filenames give to option -F: */
 struct opt_F_t *opt_F = NULL;	/* alternate configuration file(s) */
 
@@ -181,11 +173,221 @@ guess_cols(void) {
 	}
 }
 
+static void
+add_filter_rule(struct filter *filt, const char *expr,
+		enum filter_rule_type type,
+		const char *a_sym, int sym_re_p,
+		const char *a_lib, int lib_re_p)
+{
+	struct filter_rule *rule = malloc(sizeof(*rule));
+	struct filter_lib_matcher *matcher = malloc(sizeof(*matcher));
+
+	if (rule == NULL || matcher == NULL) {
+		fprintf(stderr, "rule near '%s' will be ignored: %s\n",
+			expr, strerror(errno));
+	fail:
+		free(rule);
+		free(matcher);
+		return;
+	}
+
+	regex_t symbol_re;
+	int status;
+	{
+		/* Add ^ to the start of expression and $ to the end, so that
+		 * we match the whole symbol name.  Let the user write the "*"
+		 * explicitly if they wish.  */
+		char sym[strlen(a_sym) + 3];
+		sprintf(sym, "^%s$", a_sym);
+		status = (sym_re_p ? regcomp : globcomp)(&symbol_re, sym, 0);
+		if (status != 0) {
+			char buf[100];
+			regerror(status, &symbol_re, buf, sizeof buf);
+			fprintf(stderr, "rule near '%s' will be ignored: %s\n",
+				expr, buf);
+			goto fail;
+		}
+	}
+
+	if (strcmp(a_lib, "MAIN") == 0) {
+		filter_lib_matcher_main_init(matcher);
+	} else {
+		/* Add ^ and $ to the library expression as well.  */
+		char lib[strlen(a_lib) + 3];
+		sprintf(lib, "^%s$", a_lib);
+
+		enum filter_lib_matcher_type type
+			= lib[0] == '/' ? FLM_PATHNAME : FLM_SONAME;
+
+		regex_t lib_re;
+		status = (lib_re_p ? regcomp : globcomp)(&lib_re, lib, 0);
+		if (status != 0) {
+			char buf[100];
+			regerror(status, &lib_re, buf, sizeof buf);
+			fprintf(stderr, "rule near '%s' will be ignored: %s\n",
+				expr, buf);
+
+			regfree(&symbol_re);
+			goto fail;
+		}
+		filter_lib_matcher_name_init(matcher, type, lib_re);
+	}
+
+	filter_rule_init(rule, type, matcher, symbol_re);
+	filter_add_rule(filt, rule);
+}
+
+static int
+parse_filter(struct filter *filt, char *expr)
+{
+	/* Filter is a chain of sym@lib rules separated by '-'.  If
+	 * the filter expression starts with '-', the missing initial
+	 * rule is implicitly *@*.  */
+
+	enum filter_rule_type type = FR_ADD;
+
+	while (*expr != 0) {
+		size_t s = strcspn(expr, "@-+");
+		char *symname = expr;
+		char *libname;
+		char *next = expr + s + 1;
+		enum filter_rule_type this_type = type;
+
+		if (expr[s] == 0) {
+			libname = "*";
+			expr = next - 1;
+
+		} else if (expr[s] == '-' || expr[s] == '+') {
+			type = expr[s] == '-' ? FR_SUBTRACT : FR_ADD;
+			expr[s] = 0;
+			libname = "*";
+			expr = next;
+
+		} else {
+			assert(expr[s] == '@');
+			expr[s] = 0;
+			s = strcspn(next, "-+");
+			if (s == 0) {
+				libname = "*";
+				expr = next;
+			} else if (next[s] == 0) {
+				expr = next + s;
+				libname = next;
+			} else {
+				assert(next[s] == '-' || next[s] == '+');
+				type = next[s] == '-' ? FR_SUBTRACT : FR_ADD;
+				next[s] = 0;
+				expr = next + s + 1;
+				libname = next;
+			}
+		}
+
+		assert(*libname != 0);
+		char *symend = symname + strlen(symname) - 1;
+		char *libend = libname + strlen(libname) - 1;
+		int sym_is_re = 0;
+		int lib_is_re = 0;
+
+		/*
+		 * /xxx/@... and ...@/xxx/ means that xxx are regular
+		 * expressions.  They are globs otherwise.
+		 *
+		 * /xxx@yyy/ is the same as /xxx/@/yyy/
+		 *
+		 * @/xxx matches library path name
+		 * @.xxx matches library relative path name
+		 */
+		if (symname[0] == '/') {
+			if (symname != symend && symend[0] == '/') {
+				++symname;
+				*symend-- = 0;
+				sym_is_re = 1;
+
+			} else {
+				sym_is_re = 1;
+				lib_is_re = 1;
+				++symname;
+
+				/* /XXX@YYY/ is the same as
+				 * /XXX/@/YYY/.  */
+				if (libend[0] != '/')
+					fprintf(stderr, "unmatched '/'"
+						" in symbol name\n");
+				else
+					*libend-- = 0;
+			}
+		}
+
+		/* If libname ends in '/', then we expect '/' in the
+		 * beginning too.  Otherwise the initial '/' is part
+		 * of absolute file name.  */
+		if (!lib_is_re && libend[0] == '/') {
+			lib_is_re = 1;
+			*libend-- = 0;
+			if (libname != libend && libname[0] == '/')
+				++libname;
+			else
+				fprintf(stderr, "unmatched '/'"
+					" in library name\n");
+		}
+
+		if (*symname == 0) /* /@AA/ */
+			symname = "*";
+		if (*libname == 0) /* /aa@/ */
+			libname = "*";
+
+		add_filter_rule(filt, expr, this_type,
+				symname, sym_is_re,
+				libname, lib_is_re);
+	}
+
+	return 0;
+}
+
+static struct filter *
+recursive_parse_chain(char *expr)
+{
+	struct filter *filt = malloc(sizeof(*filt));
+	if (filt == NULL) {
+		fprintf(stderr, "(part of) filter will be ignored: '%s': %s\n",
+			expr, strerror(errno));
+		return NULL;
+	}
+
+	filter_init(filt);
+	if (parse_filter(filt, expr) < 0) {
+		fprintf(stderr, "Filter '%s' will be ignored.\n", expr);
+		free(filt);
+		filt = NULL;
+	}
+
+	return filt;
+}
+
+static void
+parse_filter_chain(const char *expr, struct filter **retp)
+{
+	char *str = strdup(expr);
+	if (str == NULL) {
+		fprintf(stderr, "filter '%s' will be ignored: %s\n",
+			expr, strerror(errno));
+		return;
+	}
+	/* Support initial '!' for backward compatibility.  */
+	if (str[0] == '!')
+		str[0] = '-';
+
+	struct filter **tailp;
+	for (tailp = retp; *tailp != NULL; tailp = &(*tailp)->next)
+		;
+	*tailp = recursive_parse_chain(str);
+}
+
 char **
-process_options(int argc, char **argv) {
+process_options(int argc, char **argv)
+{
 	progname = argv[0];
 	options.output = stderr;
-	options.no_plt = 0;
 	options.no_signals = 0;
 #if defined(HAVE_LIBUNWIND)
 	options.bt_depth = -1;
@@ -193,6 +395,8 @@ process_options(int argc, char **argv) {
 
 	guess_cols();
 
+	int libcalls = 1;
+
 	while (1) {
 		int c;
 		char *p;
@@ -209,14 +413,13 @@ process_options(int argc, char **argv) {
 			{"library", 1, 0, 'l'},
 			{"output", 1, 0, 'o'},
 			{"version", 0, 0, 'V'},
-			{"no-plt", 0, 0, 'g'},
 			{"no-signals", 0, 0, 'b'},
 #if defined(HAVE_LIBUNWIND)
 			{"where", 1, 0, 'w'},
 #endif /* defined(HAVE_LIBUNWIND) */
 			{0, 0, 0, 0}
 		};
-		c = getopt_long(argc, argv, "+cfhiLrStTVgb"
+		c = getopt_long(argc, argv, "+cfhiLrStTVb"
 # ifdef USE_DEMANGLE
 				"C"
 # endif
@@ -258,39 +461,11 @@ process_options(int argc, char **argv) {
 				err_usage();
 			}
 			break;
+
 		case 'e':
-			{
-				char *str_e = strdup(optarg);
-				if (!str_e) {
-					perror("ltrace: strdup");
-					exit(1);
-				}
-				if (str_e[0] == '!') {
-					opt_e_enable = 0;
-					str_e++;
-				}
-				while (*str_e) {
-					struct opt_e_t *tmp;
-					char *str2 = strchr(str_e, ',');
-					if (str2) {
-						*str2 = '\0';
-					}
-					tmp = malloc(sizeof(struct opt_e_t));
-					if (!tmp) {
-						perror("ltrace: malloc");
-						exit(1);
-					}
-					tmp->name = str_e;
-					tmp->next = opt_e;
-					opt_e = tmp;
-					if (str2) {
-						str_e = str2 + 1;
-					} else {
-						break;
-					}
-				}
-				break;
-			}
+			parse_filter_chain(optarg, &options.plt_filter);
+			break;
+
 		case 'f':
 			options.follow = 1;
 			break;
@@ -306,9 +481,6 @@ process_options(int argc, char **argv) {
 				opt_F = tmp;
 				break;
 			}
-		case 'g':
-			options.no_plt = 1;
-			break;
 		case 'h':
 			usage();
 			exit(0);
@@ -316,16 +488,11 @@ process_options(int argc, char **argv) {
 			opt_i++;
 			break;
 		case 'l':
-			if (library_num == MAX_LIBRARIES) {
-				fprintf(stderr,
-					"Too many libraries.  Maximum is %i.\n",
-					MAX_LIBRARIES);
-				exit(1);
-			}
-			library[library_num++] = optarg;
+			// XXX TODO
+			fprintf(stderr, "-l support not yet implemented\n");
 			break;
 		case 'L':
-			options.libcalls = 0;
+			libcalls = 0;
 			break;
 		case 'n':
 			options.indent = atoi(optarg);
@@ -334,7 +501,7 @@ process_options(int argc, char **argv) {
 			options.output = fopen(optarg, "w");
 			if (!options.output) {
 				fprintf(stderr,
-					"Can't open %s for output: %s\n",
+					"can't open %s for writing: %s\n",
 					optarg, strerror(errno));
 				exit(1);
 			}
@@ -393,31 +560,8 @@ process_options(int argc, char **argv) {
 			/* Fall Thru */
 
 		case 'x':
-			{
-				struct opt_x_t *p = opt_x;
-
-				/* First, check for duplicate. */
-				while (p && strcmp(p->name, optarg)) {
-					p = p->next;
-				}
-				if (p) {
-					break;
-				}
-
-				/* If not duplicate, add to list. */
-				p = malloc(sizeof(struct opt_x_t));
-				if (!p) {
-					perror("ltrace: malloc");
-					exit(1);
-				}
-				opt_x_cnt++;
-				p->name = optarg;
-				p->found = 0;
-				p->next = opt_x;
-				p->hash = ~(0UL);
-				opt_x = p;
-				break;
-			}
+			parse_filter_chain(optarg, &options.static_filter);
+			break;
 
 		default:
 			err_usage();
@@ -449,6 +593,14 @@ process_options(int argc, char **argv) {
 		opt_F = egg;
 	}
 
+	/* Set default filter.  Use @MAIN for now, as that's what
+	 * ltrace used to have in the past.  XXX Maybe we should make
+	 * this "*" instead.  */
+	if (options.plt_filter == NULL && libcalls) {
+		parse_filter_chain("@MAIN", &options.plt_filter);
+		options.hide_caller = 1;
+	}
+
 	if (!opt_p && argc < 1) {
 		fprintf(stderr, "%s: too few arguments\n", progname);
 		err_usage();
diff --git a/options.h b/options.h
index 9a00629..3ffee71 100644
--- a/options.h
+++ b/options.h
@@ -1,11 +1,12 @@
 #include <stdio.h>
 #include <sys/types.h>
 
+struct filter;
+
 struct options_t {
 	int align;      /* -a: default alignment column for results */
 	char * user;    /* -u: username to run command as */
 	int syscalls;   /* -S: display system calls */
-	int libcalls;   /* -L: display library calls */
 	int demangle;   /* -C: demangle low-level names into user-level names */
 	int indent;     /* -n: indent trace output according to program flow */
 	FILE *output;   /* output to a specific file */
@@ -14,11 +15,13 @@ struct options_t {
 	size_t arraylen;   /* default maximum # of array elements printed */
 	size_t strlen;     /* default maximum # of bytes printed in strings */
 	int follow;     /* trace child processes */
-	int no_plt;     /* set bps on PLT entries */
 	int no_signals; /* don't print signals */
 #if defined(HAVE_LIBUNWIND)
 	int bt_depth;	 /* how may levels of stack frames to show */
 #endif /* defined(HAVE_LIBUNWIND) */
+	struct filter *plt_filter;
+	struct filter *static_filter;
+	int hide_caller; /* Whether caller library should be hidden.  */
 };
 extern struct options_t options;
 
@@ -32,31 +35,13 @@ struct opt_p_t {
 	struct opt_p_t *next;
 };
 
-struct opt_e_t {
-	char *name;
-	struct opt_e_t *next;
-};
-
 struct opt_F_t {
 	char *filename;
 	struct opt_F_t *next;
 };
 
-struct opt_x_t {
-	char *name;
-	int found;
-	unsigned long hash;
-	struct opt_x_t *next;
-};
-
 extern struct opt_p_t *opt_p;	/* attach to process with a given pid */
 
-extern struct opt_e_t *opt_e;	/* list of function names to display */
-extern int opt_e_enable;	/* 0 if '!' is used, 1 otherwise */
-
 extern struct opt_F_t *opt_F;	/* alternate configuration file(s) */
 
-extern struct opt_x_t *opt_x;	/* list of functions to break at */
-extern unsigned int opt_x_cnt;
-
 extern char **process_options(int argc, char **argv);
diff --git a/output.c b/output.c
index 1e2e709..ac8c9d0 100644
--- a/output.c
+++ b/output.c
@@ -9,6 +9,8 @@
 #include <unistd.h>
 
 #include "common.h"
+#include "proc.h"
+#include "library.h"
 
 /* TODO FIXME XXX: include in common.h: */
 extern struct timeval current_time_spent;
@@ -153,7 +155,10 @@ tabto(int col) {
 }
 
 void
-output_left(enum tof type, Process *proc, char const *function_name) {
+output_left(enum tof type, struct Process *proc,
+	    struct library_symbol *libsym)
+{
+	const char *function_name = libsym->name;
 	Function *func;
 	static arg_type_info *arg_unknown = NULL;
 	if (arg_unknown == NULL)
@@ -169,10 +174,15 @@ output_left(enum tof type, Process *proc, char const *function_name) {
 	current_proc = proc;
 	current_depth = proc->callstack_depth;
 	begin_of_line(type, proc);
+	if (!options.hide_caller && libsym->lib != NULL
+	    && libsym->plt_type != LS_TOPLT_NONE)
+		current_column += fprintf(options.output, "%s->",
+					  libsym->lib->soname);
 #ifdef USE_DEMANGLE
 	current_column +=
-	    fprintf(options.output, "%s(",
-		    options.demangle ? my_demangle(function_name) : function_name);
+		fprintf(options.output, "%s(",
+			(options.demangle
+			 ? my_demangle(function_name) : function_name));
 #else
 	current_column += fprintf(options.output, "%s(", function_name);
 #endif
@@ -210,7 +220,9 @@ output_left(enum tof type, Process *proc, char const *function_name) {
 }
 
 void
-output_right(enum tof type, Process *proc, char *function_name) {
+output_right(enum tof type, struct Process *proc, struct library_symbol *libsym)
+{
+	const char *function_name = libsym->name;
 	Function *func = name2func(function_name);
 	static arg_type_info *arg_unknown = NULL;
 	if (arg_unknown == NULL)
diff --git a/output.h b/output.h
index fa840c7..714078f 100644
--- a/output.h
+++ b/output.h
@@ -1,3 +1,7 @@
-void output_line(Process *proc, char *fmt, ...);
-void output_left(enum tof type, Process *proc, char const *function_name);
-void output_right(enum tof type, Process *proc, char *function_name);
+struct Process;
+struct library_symbol;
+void output_line(struct Process *proc, char *fmt, ...);
+void output_left(enum tof type, struct Process *proc,
+		 struct library_symbol *libsym);
+void output_right(enum tof type, struct Process *proc,
+		  struct library_symbol *libsym);
diff --git a/proc.c b/proc.c
index 47086b4..51833fe 100644
--- a/proc.c
+++ b/proc.c
@@ -11,45 +11,296 @@
 #include <errno.h>
 #include <stdlib.h>
 #include <assert.h>
-#include <error.h>
 
 #include "common.h"
 #include "breakpoint.h"
+#include "proc.h"
 
-Process *
-open_program(char *filename, pid_t pid, int enable) {
-	Process *proc;
-	assert(pid != 0);
-	proc = calloc(sizeof(Process), 1);
-	if (!proc) {
-		perror("malloc");
-		exit(1);
+#ifndef ARCH_HAVE_PROCESS_DATA
+int
+arch_process_init(struct Process *proc)
+{
+	return 0;
+}
+
+void
+arch_process_destroy(struct Process *proc)
+{
+}
+
+int
+arch_process_clone(struct Process *retp, struct Process *proc)
+{
+	return 0;
+}
+
+int
+arch_process_exec(struct Process *proc)
+{
+	return 0;
+}
+#endif
+
+#ifndef ARCH_HAVE_DYNLINK_DONE
+void
+arch_dynlink_done(struct Process *proc)
+{
+}
+#endif
+
+static void add_process(struct Process *proc, int was_exec);
+
+static int
+process_bare_init(struct Process *proc, const char *filename,
+		  pid_t pid, int was_exec)
+{
+	if (!was_exec) {
+		memset(proc, 0, sizeof(*proc));
+
+		proc->filename = strdup(filename);
+		if (proc->filename == NULL) {
+		fail:
+			free(proc->filename);
+			if (proc->breakpoints != NULL)
+				dict_clear(proc->breakpoints);
+			return -1;
+		}
 	}
 
-	proc->filename = strdup(filename);
+	/* Add process so that we know who the leader is.  */
 	proc->pid = pid;
+	add_process(proc, was_exec);
+	if (proc->leader == NULL)
+		goto fail;
+
+	if (proc->leader == proc) {
+		proc->breakpoints = dict_init(target_address_hash,
+					      target_address_cmp);
+		if (proc->breakpoints == NULL)
+			goto fail;
+	} else {
+		proc->breakpoints = NULL;
+	}
+
 #if defined(HAVE_LIBUNWIND)
 	proc->unwind_priv = _UPT_create(pid);
 	proc->unwind_as = unw_create_addr_space(&_UPT_accessors, 0);
 #endif /* defined(HAVE_LIBUNWIND) */
 
-	add_process(proc);
-	if (proc->leader == NULL) {
+	return 0;
+}
+
+static void
+process_bare_destroy(struct Process *proc, int was_exec)
+{
+	dict_clear(proc->breakpoints);
+	if (!was_exec) {
+		free(proc->filename);
+		remove_process(proc);
+	}
+}
+
+static int
+process_init_main(struct Process *proc)
+{
+	target_address_t entry;
+	target_address_t interp_bias;
+	if (process_get_entry(proc, &entry, &interp_bias) < 0) {
+		fprintf(stderr, "Couldn't get entry points of process %d\n",
+			proc->pid);
+		return -1;
+	}
+
+	if (breakpoints_init(proc) < 0) {
+		fprintf(stderr, "failed to init breakpoints %d\n",
+			proc->pid);
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+process_init(struct Process *proc, const char *filename, pid_t pid)
+{
+	if (process_bare_init(proc, filename, pid, 0) < 0) {
+	fail:
+		fprintf(stderr, "failed to initialize process %d: %s\n",
+			pid, strerror(errno));
+		return -1;
+	}
+
+	if (arch_process_init(proc) < 0) {
+		process_bare_destroy(proc, 0);
+		goto fail;
+	}
+
+	if (proc->leader != proc)
+		return 0;
+	if (process_init_main(proc) < 0) {
+		process_bare_destroy(proc, 0);
+		goto fail;
+	}
+	return 0;
+}
+
+static enum callback_status
+destroy_breakpoint_cb(struct Process *proc, struct breakpoint *bp, void *data)
+{
+	breakpoint_destroy(bp);
+	free(bp);
+	return CBS_CONT;
+}
+
+static void
+private_process_destroy(struct Process *proc, int keep_filename)
+{
+	if (!keep_filename)
+		free(proc->filename);
+
+	/* Libraries and symbols.  This is only relevant in
+	 * leader.  */
+	struct library *lib;
+	for (lib = proc->libraries; lib != NULL; ) {
+		struct library *next = lib->next;
+		library_destroy(lib);
+		free(lib);
+		lib = next;
+	}
+	proc->libraries = NULL;
+
+	/* Breakpoints.  */
+	if (proc->breakpoints != NULL) {
+		proc_each_breakpoint(proc, NULL, destroy_breakpoint_cb, NULL);
+		dict_clear(proc->breakpoints);
+		proc->breakpoints = NULL;
+	}
+}
+
+void
+process_destroy(struct Process *proc)
+{
+	private_process_destroy(proc, 0);
+	arch_process_destroy(proc);
+}
+
+int
+process_exec(struct Process *proc)
+{
+	/* Call exec first, before we destroy the main state.  */
+	if (arch_process_exec(proc) < 0)
+		return -1;
+
+	private_process_destroy(proc, 1);
+	if (process_bare_init(proc, NULL, proc->pid, 1) < 0)
+		return -1;
+	if (process_init_main(proc) < 0) {
+		process_bare_destroy(proc, 1);
+		return -1;
+	}
+	return 0;
+}
+
+struct Process *
+open_program(const char *filename, pid_t pid)
+{
+	assert(pid != 0);
+	struct Process *proc = malloc(sizeof(*proc));
+	if (proc == NULL || process_init(proc, filename, pid) < 0) {
 		free(proc);
 		return NULL;
 	}
+	return proc;
+}
 
-	if (proc->leader == proc) {
-		trace_set_options(proc, proc->pid);
-		if (breakpoints_init(proc, enable)) {
-			fprintf(stderr, "failed to init breakpoints %d\n",
-				proc->pid);
-			remove_process(proc);
-			return NULL;
+struct clone_single_bp_data {
+	struct Process *old_proc;
+	struct Process *new_proc;
+	int error;
+};
+
+static void
+clone_single_bp(void *key, void *value, void *u)
+{
+	struct breakpoint *bp = value;
+	struct clone_single_bp_data *data = u;
+
+	data->error = 0;
+	struct breakpoint *clone = malloc(sizeof(*clone));
+	if (clone == NULL
+	    || breakpoint_clone(clone, data->new_proc,
+				bp, data->old_proc) < 0) {
+	fail:
+		free(clone);
+		data->error = -1;
+	}
+	if (proc_add_breakpoint(data->new_proc->leader, clone) < 0) {
+		breakpoint_destroy(clone);
+		goto fail;
+	}
+}
+
+int
+process_clone(struct Process *retp, struct Process *proc, pid_t pid)
+{
+	if (process_bare_init(retp, proc->filename, pid, 0) < 0) {
+	fail:
+		fprintf(stderr, "failed to clone process %d->%d : %s\n",
+			proc->pid, pid, strerror(errno));
+		return -1;
+	}
+
+	retp->tracesysgood = proc->tracesysgood;
+	retp->e_machine = proc->e_machine;
+
+	/* For non-leader processes, that's all we need to do.  */
+	if (retp->leader != retp)
+		return 0;
+
+	/* Clone symbols first so that we can clone and relink
+	 * breakpoints.  */
+	struct library *lib;
+	struct library **nlibp = &retp->libraries;
+	for (lib = proc->libraries; lib != NULL; lib = lib->next) {
+		*nlibp = malloc(sizeof(**nlibp));
+		if (*nlibp == NULL
+		    || library_clone(*nlibp, lib) < 0) {
+		fail2:
+			process_bare_destroy(retp, 0);
+
+			/* Error when cloning.  Unroll what was done.  */
+			for (lib = retp->libraries; lib != NULL; ) {
+				struct library *next = lib->next;
+				library_destroy(lib);
+				free(lib);
+				lib = next;
+			}
+			goto fail;
 		}
+
+		nlibp = &(*nlibp)->next;
 	}
 
-	return proc;
+	/* Now clone breakpoints.  Symbol relinking is done in
+	 * clone_single_bp.  */
+	struct clone_single_bp_data data = {
+		.old_proc = proc,
+		.new_proc = retp,
+		.error = 0,
+	};
+	dict_apply_to_all(proc->breakpoints, &clone_single_bp, &data);
+
+	/* And finally the call stack.  */
+	memcpy(retp->callstack, proc->callstack, sizeof(retp->callstack));
+	retp->callstack_depth = proc->callstack_depth;
+
+	if (data.error < 0)
+		goto fail2;
+
+	if (arch_process_clone(retp, proc) < 0)
+		goto fail2;
+
+	return 0;
 }
 
 static int
@@ -67,19 +318,19 @@ open_one_pid(pid_t pid)
 		return -1;
 	}
 
-	proc = open_program(filename, pid, 0);
+	proc = open_program(filename, pid);
 	if (proc == NULL)
 		return -1;
-	trace_set_options(proc, pid);
+	trace_set_options(proc);
 
 	return 0;
 }
 
-static enum pcb_status
+static enum callback_status
 start_one_pid(Process * proc, void * data)
 {
 	continue_process(proc->pid);
-	return pcb_cont;
+	return CBS_CONT;
 }
 
 void
@@ -134,20 +385,21 @@ open_pid(pid_t pid)
 		old_ntasks = ntasks;
 	}
 
-	/* Done.  Now initialize breakpoints and then continue
-	 * everyone.  */
-	Process * leader;
-	leader = pid2proc(pid)->leader;
-	enable_all_breakpoints(leader);
+	struct Process *leader = pid2proc(pid)->leader;
 
-	each_task(pid2proc(pid)->leader, start_one_pid, NULL);
+	/* XXX Is there a way to figure out whether _start has
+	 * actually already been hit?  */
+	arch_dynlink_done(leader);
+
+	/* Done.  Continue everyone.  */
+	each_task(leader, NULL, start_one_pid, NULL);
 }
 
-static enum pcb_status
+static enum callback_status
 find_proc(Process * proc, void * data)
 {
 	pid_t pid = (pid_t)(uintptr_t)data;
-	return proc->pid == pid ? pcb_stop : pcb_cont;
+	return proc->pid == pid ? CBS_STOP : CBS_CONT;
 }
 
 Process *
@@ -179,41 +431,60 @@ unlist_process(Process * proc)
 	}
 }
 
-Process *
-each_process(Process * proc,
-	     enum pcb_status (* cb)(Process * proc, void * data),
-	     void * data)
+struct Process *
+each_process(struct Process *start_after,
+	     enum callback_status(*cb)(struct Process *proc, void *data),
+	     void *data)
 {
-	Process * it = proc ?: list_of_processes;
-	for (; it != NULL; ) {
+	struct Process *it = start_after == NULL ? list_of_processes
+		: start_after->next;
+
+	while (it != NULL) {
 		/* Callback might call remove_process.  */
-		Process * next = it->next;
-		if ((*cb) (it, data) == pcb_stop)
+		struct Process *next = it->next;
+		switch ((*cb)(it, data)) {
+		case CBS_FAIL:
+			/* XXX handle me */
+		case CBS_STOP:
 			return it;
+		case CBS_CONT:
+			break;
+		}
 		it = next;
 	}
 	return NULL;
 }
 
 Process *
-each_task(Process * it, enum pcb_status (* cb)(Process * proc, void * data),
-	  void * data)
+each_task(struct Process *proc, struct Process *start_after,
+	  enum callback_status(*cb)(struct Process *proc, void *data),
+	  void *data)
 {
+	assert(proc != NULL);
+	struct Process *it = start_after == NULL ? proc->leader
+		: start_after->next;
+
 	if (it != NULL) {
-		Process * leader = it->leader;
-		for (; it != NULL && it->leader == leader; ) {
+		struct Process *leader = it->leader;
+		while (it != NULL && it->leader == leader) {
 			/* Callback might call remove_process.  */
-			Process * next = it->next;
-			if ((*cb) (it, data) == pcb_stop)
+			struct Process *next = it->next;
+			switch ((*cb)(it, data)) {
+			case CBS_FAIL:
+				/* XXX handle me */
+			case CBS_STOP:
 				return it;
+			case CBS_CONT:
+				break;
+			}
 			it = next;
 		}
 	}
 	return NULL;
 }
 
-void
-add_process(Process * proc)
+static void
+add_process(struct Process *proc, int was_exec)
 {
 	Process ** leaderp = &list_of_processes;
 	if (proc->pid) {
@@ -231,8 +502,11 @@ add_process(Process * proc)
 				leaderp = &leader->next;
 		}
 	}
-	proc->next = *leaderp;
-	*leaderp = proc;
+
+	if (!was_exec) {
+		proc->next = *leaderp;
+		*leaderp = proc;
+	}
 }
 
 void
@@ -252,13 +526,13 @@ change_process_leader(Process * proc, Process * leader)
 	*leaderp = proc;
 }
 
-static enum pcb_status
-clear_leader(Process * proc, void * data)
+static enum callback_status
+clear_leader(struct Process *proc, void *data)
 {
 	debug(DEBUG_FUNCTION, "detach_task %d from leader %d",
 	      proc->pid, proc->leader->pid);
 	proc->leader = NULL;
-	return pcb_cont;
+	return CBS_CONT;
 }
 
 static enum ecb_status
@@ -284,15 +558,16 @@ remove_process(Process *proc)
 	debug(DEBUG_FUNCTION, "remove_proc(pid=%d)", proc->pid);
 
 	if (proc->leader == proc)
-		each_task(proc, &clear_leader, NULL);
+		each_task(proc, NULL, &clear_leader, NULL);
 
 	unlist_process(proc);
 	delete_events_for(proc);
+	process_destroy(proc);
 	free(proc);
 }
 
 void
-install_event_handler(Process * proc, Event_Handler * handler)
+install_event_handler(Process *proc, struct event_handler *handler)
 {
 	debug(DEBUG_FUNCTION, "install_event_handler(pid=%d, %p)", proc->pid, handler);
 	assert(proc->event_handler == NULL);
@@ -302,7 +577,7 @@ install_event_handler(Process * proc, Event_Handler * handler)
 void
 destroy_event_handler(Process * proc)
 {
-	Event_Handler * handler = proc->event_handler;
+	struct event_handler *handler = proc->event_handler;
 	debug(DEBUG_FUNCTION, "destroy_event_handler(pid=%d, %p)", proc->pid, handler);
 	assert(handler != NULL);
 	if (handler->destroy != NULL)
@@ -310,3 +585,197 @@ destroy_event_handler(Process * proc)
 	free(handler);
 	proc->event_handler = NULL;
 }
+
+static enum callback_status
+breakpoint_for_symbol(struct library_symbol *libsym, void *data)
+{
+	struct Process *proc = data;
+	assert(proc->leader == proc);
+
+	/* If there is an artificial breakpoint on the same address,
+	 * its libsym will be NULL, and we can smuggle our libsym
+	 * there.  That artificial breakpoint is there presumably for
+	 * the callbacks, which we don't touch.  If there is a real
+	 * breakpoint, then this is a bug.  ltrace-elf.c should filter
+	 * symbols and ignore extra symbol aliases.
+	 *
+	 * The other direction is more complicated and currently not
+	 * supported.  If a breakpoint has custom callbacks, it might
+	 * be also custom-allocated, and we would really need to swap
+	 * the two: delete the one now in the dictionary, swap values
+	 * around, and put the new breakpoint back in.  */
+	struct breakpoint *bp = dict_find_entry(proc->breakpoints,
+						libsym->enter_addr);
+	if (bp != NULL) {
+		assert(bp->libsym == NULL);
+		bp->libsym = libsym;
+		return CBS_CONT;
+	}
+
+	bp = malloc(sizeof(*bp));
+	if (bp == NULL
+	    || breakpoint_init(bp, proc, libsym->enter_addr, libsym) < 0) {
+	fail:
+		free(bp);
+		return CBS_FAIL;
+	}
+	if (proc_add_breakpoint(proc, bp) < 0) {
+		breakpoint_destroy(bp);
+		goto fail;
+	}
+
+	if (breakpoint_turn_on(bp, proc) < 0) {
+		proc_remove_breakpoint(proc, bp);
+		breakpoint_destroy(bp);
+		goto fail;
+	}
+
+	return CBS_CONT;
+}
+
+void
+proc_add_library(struct Process *proc, struct library *lib)
+{
+	assert(lib->next == NULL);
+	lib->next = proc->libraries;
+	proc->libraries = lib;
+	debug(DEBUG_PROCESS, "added library %s@%p (%s) to %d",
+	      lib->soname, lib->base, lib->pathname, proc->pid);
+
+	struct library_symbol *libsym = NULL;
+	while ((libsym = library_each_symbol(lib, libsym, breakpoint_for_symbol,
+					     proc)) != NULL)
+		fprintf(stderr, "couldn't insert breakpoint for %s to %d: %s",
+			libsym->name, proc->pid, strerror(errno));
+}
+
+int
+proc_remove_library(struct Process *proc, struct library *lib)
+{
+	struct library **libp;
+	for (libp = &proc->libraries; *libp != NULL; libp = &(*libp)->next)
+		if (*libp == lib) {
+			*libp = lib->next;
+			return 0;
+		}
+	return -1;
+}
+
+struct library *
+proc_each_library(struct Process *proc, struct library *it,
+		  enum callback_status (*cb)(struct Process *proc,
+					     struct library *lib, void *data),
+		  void *data)
+{
+	if (it == NULL)
+		it = proc->libraries;
+
+	while (it != NULL) {
+		struct library *next = it->next;
+
+		switch (cb(proc, it, data)) {
+		case CBS_FAIL:
+			/* XXX handle me */
+		case CBS_STOP:
+			return it;
+		case CBS_CONT:
+			break;
+		}
+
+		it = next;
+	}
+
+	return NULL;
+}
+
+static void
+check_leader(struct Process *proc)
+{
+	/* Only the group leader should be getting the breakpoints and
+	 * thus have ->breakpoint initialized.  */
+	assert(proc->leader != NULL);
+	assert(proc->leader == proc);
+	assert(proc->breakpoints != NULL);
+}
+
+int
+proc_add_breakpoint(struct Process *proc, struct breakpoint *bp)
+{
+	debug(DEBUG_FUNCTION, "proc_add_breakpoint(pid=%d, %s@%p)",
+	      proc->pid, breakpoint_name(bp), bp->addr);
+	check_leader(proc);
+
+	/* XXX We might merge bp->libsym instead of the following
+	 * assert, but that's not necessary right now.  Read the
+	 * comment in breakpoint_for_symbol.  */
+	assert(dict_find_entry(proc->breakpoints, bp->addr) == NULL);
+
+	if (dict_enter(proc->breakpoints, bp->addr, bp) < 0) {
+		fprintf(stderr,
+			"couldn't enter breakpoint %s@%p to dictionary: %s\n",
+			breakpoint_name(bp), bp->addr, strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+proc_remove_breakpoint(struct Process *proc, struct breakpoint *bp)
+{
+	debug(DEBUG_FUNCTION, "proc_remove_breakpoint(pid=%d, %s@%p)",
+	      proc->pid, breakpoint_name(bp), bp->addr);
+	check_leader(proc);
+	struct breakpoint *removed = dict_remove(proc->breakpoints, bp->addr);
+	assert(removed == bp);
+}
+
+/* Dict doesn't support iteration restarts, so here's this contraption
+ * for now.  XXX add restarts to dict.  */
+struct each_breakpoint_data
+{
+	void *start;
+	void *end;
+	struct Process *proc;
+	enum callback_status (*cb)(struct Process *proc,
+				   struct breakpoint *bp,
+				   void *data);
+	void *cb_data;
+};
+
+static void
+each_breakpoint_cb(void *key, void *value, void *d)
+{
+	struct each_breakpoint_data *data = d;
+	if (data->end != NULL)
+		return;
+	if (data->start == key)
+		data->start = NULL;
+
+	if (data->start == NULL) {
+		switch (data->cb(data->proc, value, data->cb_data)) {
+		case CBS_FAIL:
+			/* XXX handle me */
+		case CBS_STOP:
+			data->end = key;
+		case CBS_CONT:
+			return;
+		}
+	}
+}
+
+void *
+proc_each_breakpoint(struct Process *proc, void *start,
+		     enum callback_status (*cb)(struct Process *proc,
+						struct breakpoint *bp,
+						void *data), void *data)
+{
+	struct each_breakpoint_data dd = {
+		.start = start,
+		.proc = proc,
+		.cb = cb,
+		.cb_data = data,
+	};
+	dict_apply_to_all(proc->breakpoints, &each_breakpoint_cb, &dd);
+	return dd.end;
+}
diff --git a/proc.h b/proc.h
new file mode 100644
index 0000000..443bd8e
--- /dev/null
+++ b/proc.h
@@ -0,0 +1,234 @@
+/*
+ * This file is part of ltrace.
+ * Copyright (C) 2010,2011,2012 Petr Machata, Red Hat Inc.
+ * Copyright (C) 2010 Joe Damato
+ * Copyright (C) 1998,2001,2008,2009 Juan Cespedes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _PROC_H_
+#define _PROC_H_
+
+#if defined(HAVE_LIBUNWIND)
+# include <libunwind.h>
+#endif /* defined(HAVE_LIBUNWIND) */
+
+#include "ltrace.h"
+#include "dict.h"
+#include "sysdep.h"
+
+struct library;
+struct breakpoint;
+
+/* XXX Move this somewhere where it makes sense.  When the mess in
+ * common.h is disentangled, that would actually be a good place for
+ * this.  */
+enum callback_status {
+	CBS_STOP, /* The iteration should stop.  */
+	CBS_CONT, /* The iteration should continue.  */
+	CBS_FAIL, /* There was an error.  The iteration should stop
+		   * and return error.  */
+};
+
+struct event_handler {
+	/* Event handler that overrides the default one.  Should
+	 * return NULL if the event was handled, otherwise the
+	 * returned event is passed to the default handler.  */
+	Event *(*on_event)(struct event_handler *self, Event *event);
+
+	/* Called when the event handler removal is requested.  */
+	void (*destroy)(struct event_handler *self);
+};
+
+enum process_state {
+	STATE_ATTACHED = 0,
+	STATE_BEING_CREATED,
+	STATE_IGNORED  /* ignore this process (it's a fork and no -f was used) */
+};
+
+struct callstack_element {
+	union {
+		int syscall;
+		struct library_symbol * libfunc;
+	} c_un;
+	int is_syscall;
+	void * return_addr;
+	struct timeval time_spent;
+	void * arch_ptr;
+};
+
+/* XXX We should get rid of this.  */
+#define MAX_CALLDEPTH 64
+
+/* XXX We would rather have this all organized a little differently,
+ * have Process for the whole group and Task for what's there for
+ * per-thread stuff.  But for now this is the less invasive way of
+ * structuring it.  */
+typedef struct Process Process;
+struct Process {
+	enum process_state state;
+	Process * parent;         /* needed by STATE_BEING_CREATED */
+	char * filename;
+	pid_t pid;
+
+	/* Dictionary of breakpoints (which is a mapping
+	 * address->breakpoint).  This is NULL for non-leader
+	 * processes.  XXX note that we store addresses (keys) by
+	 * value.  That assumes that target_address_t fits in host
+	 * pointer.  */
+	Dict * breakpoints;
+
+	int mask_32bit;           /* 1 if 64-bit ltrace is tracing 32-bit process */
+	unsigned int personality;
+	int tracesysgood;         /* signal indicating a PTRACE_SYSCALL trap */
+
+	int callstack_depth;
+	struct callstack_element callstack[MAX_CALLDEPTH];
+
+	/* Linked list of libraries in backwards order of mapping.
+	 * The last element is the executed binary itself.  */
+	struct library *libraries;
+
+	/* Arch-dependent: */
+	void *debug;	/* arch-dep process debug struct XXX move to
+			 * os_process_data after it's invented.  */
+	void * instruction_pointer;
+	void * stack_pointer;      /* To get return addr, args... */
+	void * return_addr;
+	void * arch_ptr;
+	short e_machine;
+#ifdef __arm__
+	int thumb_mode;           /* ARM execution mode: 0: ARM, 1: Thumb */
+#endif
+
+#if defined(HAVE_LIBUNWIND)
+	/* libunwind address space */
+	unw_addr_space_t unwind_as;
+	void *unwind_priv;
+#endif /* defined(HAVE_LIBUNWIND) */
+
+	/* Set in leader.  */
+	struct event_handler *event_handler;
+
+	/**
+	 * Process chaining.
+	 **/
+	Process * next;
+
+	/* LEADER points to the leader thread of the POSIX.1 process.
+	   If X->LEADER == X, then X is the leader thread and the
+	   Process structures chained by NEXT represent other threads,
+	   up until, but not including, the next leader thread.
+	   LEADER may be NULL after the leader has already exited.  In
+	   that case this process is waiting to be collected.  */
+	Process * leader;
+
+	struct arch_process_data arch;
+};
+
+/* Initialize a process given a path to binary FILENAME, with a PID,
+ * and add the process to an internal chain of traced processes.  */
+int process_init(struct Process *proc, const char *filename, pid_t pid);
+
+/* PROC underwent an exec.  This is a bit like process_destroy
+ * followed by process_init, except that some state is kept and the
+ * process doesn't lose it's place in the list of processes.  */
+int process_exec(struct Process *proc);
+
+/* Release any memory allocated for PROC (but not PROC itself).  Does
+ * NOT remove PROC from internal chain.
+ *
+ * XXX clearly this init/destroy pair is different than others and
+ * should be fixed.  process_init should presumably be separate from
+ * process_add.  */
+void process_destroy(struct Process *proc);
+
+struct Process *open_program(const char *filename, pid_t pid);
+void open_pid(pid_t pid);
+Process * pid2proc(pid_t pid);
+
+/* Clone the contents of PROC into the memory referenced by RETP.
+ * Returns 0 on success or a negative value on failure.  */
+int process_clone(struct Process *retp, struct Process *proc, pid_t pid);
+
+/* Iterate through the processes that ltrace currently traces.  CB is
+ * called for each process.  Tasks are considered to be processes for
+ * the purpose of this iterator.
+ *
+ * Notes on this iteration interface: The iteration starts after the
+ * process designated by START_AFTER, or at the first process if
+ * START_AFTER is NULL.  DATA is passed verbatim to CB.  If CB returns
+ * CBS_STOP, the iteration stops and the current iterator is returned.
+ * That iterator can then be used to restart the iteration.  NULL is
+ * returned when iteration ends.
+ *
+ * There's no provision for returning error states.  Errors need to be
+ * signaled to the caller via DATA, together with any other data that
+ * the callback needs.  */
+Process *each_process(Process *start_after,
+		      enum callback_status (*cb)(struct Process *proc,
+						 void *data),
+		      void *data);
+
+/* Iterate through list of tasks of given process PROC.  Restarts are
+ * supported via START_AFTER (see each_process for details of
+ * iteration interface).  */
+Process *each_task(struct Process *proc, struct Process *start_after,
+		   enum callback_status (*cb)(struct Process *proc,
+					      void *data),
+		   void *data);
+
+void change_process_leader(Process *proc, Process *leader);
+
+/* Remove process from the list of traced processes, drop any events
+ * in the event queue, destroy it and free memory.  */
+void remove_process(struct Process *proc);
+
+void install_event_handler(Process *proc, struct event_handler *handler);
+void destroy_event_handler(Process *proc);
+
+/* Add a library LIB to the list of PROC's libraries.  */
+void proc_add_library(struct Process *proc, struct library *lib);
+
+/* Remove LIB from list of PROC's libraries.  Returns 0 if the library
+ * was found and unlinked, otherwise returns a negative value.  */
+int proc_remove_library(struct Process *proc, struct library *lib);
+
+/* Iterate through the libraries of PROC.  See each_process for
+ * detailed description of the iteration interface.  */
+struct library *proc_each_library(struct Process *proc, struct library *start,
+				  enum callback_status (*cb)(struct Process *p,
+							     struct library *l,
+							     void *data),
+				  void *data);
+
+/* Insert BP into PROC.  */
+int proc_add_breakpoint(struct Process *proc, struct breakpoint *bp);
+
+/* Remove BP from PROC.  This has no reason to fail in runtime.  If it
+ * does not find BP in PROC, it's hard error guarded by assertion.  */
+void proc_remove_breakpoint(struct Process *proc, struct breakpoint *bp);
+
+/* Iterate through the libraries of PROC.  See each_process for
+ * detailed description of the iteration interface.  */
+void *proc_each_breakpoint(struct Process *proc, void *start,
+			   enum callback_status (*cb)(struct Process *proc,
+						      struct breakpoint *bp,
+						      void *data),
+			   void *data);
+
+#endif /* _PROC_H_ */
diff --git a/sysdeps/linux-gnu/Makefile.am b/sysdeps/linux-gnu/Makefile.am
index bd52092..e6fd7ef 100644
--- a/sysdeps/linux-gnu/Makefile.am
+++ b/sysdeps/linux-gnu/Makefile.am
@@ -28,7 +28,8 @@ ___libos_la_LIBADD = \
 noinst_HEADERS = \
 	arch_syscallent.h \
 	signalent1.h \
-	syscallent1.h
+	syscallent1.h \
+	trace.h
 
 EXTRA_DIST = \
 	arch_mksyscallent \
diff --git a/sysdeps/linux-gnu/alpha/plt.c b/sysdeps/linux-gnu/alpha/plt.c
index 83337b2..8ef456e 100644
--- a/sysdeps/linux-gnu/alpha/plt.c
+++ b/sysdeps/linux-gnu/alpha/plt.c
@@ -1,4 +1,5 @@
 #include <gelf.h>
+#include "proc.h"
 #include "common.h"
 
 GElf_Addr
diff --git a/sysdeps/linux-gnu/alpha/regs.c b/sysdeps/linux-gnu/alpha/regs.c
index 9554e48..3c02a5d 100644
--- a/sysdeps/linux-gnu/alpha/regs.c
+++ b/sysdeps/linux-gnu/alpha/regs.c
@@ -4,6 +4,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
+#include "proc.h"
 #include "common.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
diff --git a/sysdeps/linux-gnu/alpha/trace.c b/sysdeps/linux-gnu/alpha/trace.c
index e4d4063..18fe395 100644
--- a/sysdeps/linux-gnu/alpha/trace.c
+++ b/sysdeps/linux-gnu/alpha/trace.c
@@ -6,6 +6,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
+#include "proc.h"
 #include "common.h"
 #include "debug.h"
 
diff --git a/sysdeps/linux-gnu/arm/arch.h b/sysdeps/linux-gnu/arm/arch.h
index 8f2dfb3..d50e439 100644
--- a/sysdeps/linux-gnu/arm/arch.h
+++ b/sysdeps/linux-gnu/arm/arch.h
@@ -9,3 +9,8 @@
 
 #define LT_ELFCLASS	ELFCLASS32
 #define LT_ELF_MACHINE	EM_ARM
+
+#define ARCH_HAVE_BREAKPOINT_DATA
+struct arch_breakpoint_data {
+	int thumb_mode;
+};
diff --git a/sysdeps/linux-gnu/arm/breakpoint.c b/sysdeps/linux-gnu/arm/breakpoint.c
index 493f973..324ff07 100644
--- a/sysdeps/linux-gnu/arm/breakpoint.c
+++ b/sysdeps/linux-gnu/arm/breakpoint.c
@@ -82,3 +82,29 @@ arch_disable_breakpoint(pid_t pid, const struct breakpoint *sbp)
 		ptrace(PTRACE_POKETEXT, pid, sbp->addr + i * sizeof(long), a);
 	}
 }
+
+int
+arch_breakpoint_init(struct Process *proc, struct breakpoint *sbp)
+{
+	/* XXX That uintptr_t cast is there temporarily until
+	 * target_address_t becomes integral type.  */
+	int thumb_mode = ((uintptr_t)sbp->addr) & 1;
+	if (thumb_mode)
+		sbp->addr = (void *)((uintptr_t)sbp->addr & ~1);
+	sbp->arch.thumb_mode = thumb_mode | proc->thumb_mode;
+	/* XXX This doesn't seem like it belongs here.  */
+	proc->thumb_mode = 0;
+	return 0;
+}
+
+void
+arch_breakpoint_destroy(struct breakpoint *sbp)
+{
+}
+
+int
+arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp)
+{
+	retp->arch.thumb_mode = sbp->arch.thumb_mode;
+	return 0;
+}
diff --git a/sysdeps/linux-gnu/arm/plt.c b/sysdeps/linux-gnu/arm/plt.c
index 76f4f4c..fb98d7b 100644
--- a/sysdeps/linux-gnu/arm/plt.c
+++ b/sysdeps/linux-gnu/arm/plt.c
@@ -1,4 +1,5 @@
 #include <gelf.h>
+#include "proc.h"
 #include "common.h"
 
 static int
diff --git a/sysdeps/linux-gnu/arm/regs.c b/sysdeps/linux-gnu/arm/regs.c
index b8aed6e..22bc4bf 100644
--- a/sysdeps/linux-gnu/arm/regs.c
+++ b/sysdeps/linux-gnu/arm/regs.c
@@ -4,6 +4,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
+#include "proc.h"
 #include "common.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
@@ -39,9 +40,15 @@ void *
 get_return_addr(Process *proc, void *stack_pointer) {
 	long addr = ptrace(PTRACE_PEEKUSER, proc->pid, off_lr, 0);
 
+	/* Remember & unset the thumb mode bit.  XXX This is really a
+	 * bit of a hack, as we assume that the following
+	 * insert_breakpoint call will be related to this address.
+	 * This interface should really be get_return_breakpoint, or
+	 * maybe install_return_breakpoint.  */
 	proc->thumb_mode = addr & 1;
 	if (proc->thumb_mode)
 		addr &= ~1;
+
 	return (void *)addr;
 }
 
diff --git a/sysdeps/linux-gnu/arm/trace.c b/sysdeps/linux-gnu/arm/trace.c
index 39b8264..f465b72 100644
--- a/sysdeps/linux-gnu/arm/trace.c
+++ b/sysdeps/linux-gnu/arm/trace.c
@@ -7,6 +7,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
+#include "proc.h"
 #include "common.h"
 #include "output.h"
 #include "ptrace.h"
diff --git a/sysdeps/linux-gnu/breakpoint.c b/sysdeps/linux-gnu/breakpoint.c
index b98374b..e05e730 100644
--- a/sysdeps/linux-gnu/breakpoint.c
+++ b/sysdeps/linux-gnu/breakpoint.c
@@ -1,11 +1,15 @@
 #include "config.h"
 
 #include <sys/ptrace.h>
+#include <errno.h>
 #include <string.h>
+#include <stdio.h>
 
 #include "common.h"
-#include "arch.h"
+#include "sysdep.h"
 #include "breakpoint.h"
+#include "proc.h"
+#include "library.h"
 
 #ifdef ARCH_HAVE_ENABLE_BREAKPOINT
 extern void arch_enable_breakpoint(pid_t, struct breakpoint *);
@@ -16,15 +20,20 @@ arch_enable_breakpoint(pid_t pid, struct breakpoint *sbp)
 	static unsigned char break_insn[] = BREAKPOINT_VALUE;
 	unsigned int i, j;
 
-	if (sbp->libsym) {
-		debug(DEBUG_PROCESS, "enable_breakpoint: pid=%d, addr=%p, symbol=%s", pid, sbp->addr, sbp->libsym->name);
-	} else {
-		debug(DEBUG_PROCESS, "enable_breakpoint: pid=%d, addr=%p", pid, sbp->addr);
-	}
+	debug(DEBUG_PROCESS,
+	      "arch_enable_breakpoint: pid=%d, addr=%p, symbol=%s",
+	      pid, sbp->addr, breakpoint_name(sbp));
 
 	for (i = 0; i < 1 + ((BREAKPOINT_LENGTH - 1) / sizeof(long)); i++) {
 		long a = ptrace(PTRACE_PEEKTEXT, pid,
 				sbp->addr + i * sizeof(long), 0);
+		if (a == -1 && errno) {
+			fprintf(stderr, "enable_breakpoint"
+				" pid=%d, addr=%p, symbol=%s: %s\n",
+				pid, sbp->addr, breakpoint_name(sbp),
+				strerror(errno));
+			return;
+		}
 		for (j = 0;
 		     j < sizeof(long)
 		     && i * sizeof(long) + j < BREAKPOINT_LENGTH; j++) {
@@ -33,7 +42,15 @@ arch_enable_breakpoint(pid_t pid, struct breakpoint *sbp)
 			sbp->orig_value[i * sizeof(long) + j] = bytes[j];
 			bytes[j] = break_insn[i * sizeof(long) + j];
 		}
-		ptrace(PTRACE_POKETEXT, pid, sbp->addr + i * sizeof(long), a);
+		a = ptrace(PTRACE_POKETEXT, pid,
+			   sbp->addr + i * sizeof(long), a);
+		if (a == -1) {
+			fprintf(stderr, "enable_breakpoint"
+				" pid=%d, addr=%p, symbol=%s: %s\n",
+				pid, sbp->addr, breakpoint_name(sbp),
+				strerror(errno));
+			return;
+		}
 	}
 }
 #endif				/* ARCH_HAVE_ENABLE_BREAKPOINT */
@@ -41,11 +58,8 @@ arch_enable_breakpoint(pid_t pid, struct breakpoint *sbp)
 void
 enable_breakpoint(Process *proc, struct breakpoint *sbp)
 {
-	if (sbp->libsym) {
-		debug(DEBUG_PROCESS, "enable_breakpoint: pid=%d, addr=%p, symbol=%s", proc->pid, sbp->addr, sbp->libsym->name);
-	} else {
-		debug(DEBUG_PROCESS, "enable_breakpoint: pid=%d, addr=%p", proc->pid, sbp->addr);
-	}
+	debug(DEBUG_PROCESS, "enable_breakpoint: pid=%d, addr=%p, symbol=%s",
+	      proc->pid, sbp->addr, breakpoint_name(sbp));
 	arch_enable_breakpoint(proc->pid, sbp);
 }
 
@@ -57,16 +71,19 @@ arch_disable_breakpoint(pid_t pid, const struct breakpoint *sbp)
 {
 	unsigned int i, j;
 
-	if (sbp->libsym) {
-		debug(DEBUG_PROCESS, "disable_breakpoint: pid=%d, addr=%p, symbol=%s", pid, sbp->addr, sbp->libsym->name);
-	} else {
-		debug(DEBUG_PROCESS, "disable_breakpoint: pid=%d, addr=%p", pid, sbp->addr);
-	}
+	debug(DEBUG_PROCESS,
+	      "arch_disable_breakpoint: pid=%d, addr=%p, symbol=%s",
+	      pid, sbp->addr, breakpoint_name(sbp));
 
 	for (i = 0; i < 1 + ((BREAKPOINT_LENGTH - 1) / sizeof(long)); i++) {
-		long a =
-		    ptrace(PTRACE_PEEKTEXT, pid, sbp->addr + i * sizeof(long),
-			   0);
+		long a = ptrace(PTRACE_PEEKTEXT, pid,
+				sbp->addr + i * sizeof(long), 0);
+		if (a == -1 && errno) {
+			fprintf(stderr,
+				"disable_breakpoint pid=%d, addr=%p: %s\n",
+				pid, sbp->addr, strerror(errno));
+			return;
+		}
 		for (j = 0;
 		     j < sizeof(long)
 		     && i * sizeof(long) + j < BREAKPOINT_LENGTH; j++) {
@@ -74,7 +91,14 @@ arch_disable_breakpoint(pid_t pid, const struct breakpoint *sbp)
 
 			bytes[j] = sbp->orig_value[i * sizeof(long) + j];
 		}
-		ptrace(PTRACE_POKETEXT, pid, sbp->addr + i * sizeof(long), a);
+		a = ptrace(PTRACE_POKETEXT, pid,
+			   sbp->addr + i * sizeof(long), a);
+		if (a == -1 && errno) {
+			fprintf(stderr,
+				"disable_breakpoint pid=%d, addr=%p: %s\n",
+				pid, sbp->addr, strerror(errno));
+			return;
+		}
 	}
 }
 #endif				/* ARCH_HAVE_DISABLE_BREAKPOINT */
@@ -82,10 +106,7 @@ arch_disable_breakpoint(pid_t pid, const struct breakpoint *sbp)
 void
 disable_breakpoint(Process *proc, struct breakpoint *sbp)
 {
-	if (sbp->libsym) {
-		debug(DEBUG_PROCESS, "disable_breakpoint: pid=%d, addr=%p, symbol=%s", proc->pid, sbp->addr, sbp->libsym->name);
-	} else {
-		debug(DEBUG_PROCESS, "disable_breakpoint: pid=%d, addr=%p", proc->pid, sbp->addr);
-	}
+	debug(DEBUG_PROCESS, "disable_breakpoint: pid=%d, addr=%p, symbol=%s",
+	      proc->pid, sbp->addr, breakpoint_name(sbp));
 	arch_disable_breakpoint(proc->pid, sbp);
 }
diff --git a/sysdeps/linux-gnu/events.c b/sysdeps/linux-gnu/events.c
index 0167049..91d873e 100644
--- a/sysdeps/linux-gnu/events.c
+++ b/sysdeps/linux-gnu/events.c
@@ -13,6 +13,7 @@
 
 #include "common.h"
 #include "breakpoint.h"
+#include "proc.h"
 
 static Event event;
 
@@ -21,10 +22,10 @@ static Event event;
 static Event * delayed_events = NULL;
 static Event * end_delayed_events = NULL;
 
-static enum pcb_status
+static enum callback_status
 first (Process * proc, void * data)
 {
-	return pcb_stop;
+	return CBS_STOP;
 }
 
 void
@@ -174,14 +175,6 @@ next_event(void)
 	get_arch_dep(event.proc);
 	debug(3, "event from pid %u", pid);
 	Process *leader = event.proc->leader;
-	if (leader == event.proc) {
-		if (!event.proc->libdl_hooked) {
-			/* debug struct may not have been written yet.. */
-			if (linkmap_init(event.proc, &main_lte) == 0) {
-				event.proc->libdl_hooked = 1;
-			}
-		}
-	}
 
 	/* The process should be stopped after the waitpid call.  But
 	 * when the whole thread group is terminated, we see
diff --git a/sysdeps/linux-gnu/i386/plt.c b/sysdeps/linux-gnu/i386/plt.c
index b53ff44..daaf15a 100644
--- a/sysdeps/linux-gnu/i386/plt.c
+++ b/sysdeps/linux-gnu/i386/plt.c
@@ -1,12 +1,16 @@
 #include <gelf.h>
-#include "common.h"
+#include "proc.h"
+#include "library.h"
+#include "ltrace-elf.h"
 
 GElf_Addr
-arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela * rela) {
+arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
+{
 	return lte->plt_addr + (ndx + 1) * 16;
 }
 
 void *
-sym2addr(Process *proc, struct library_symbol *sym) {
+sym2addr(struct Process *proc, struct library_symbol *sym)
+{
 	return sym->enter_addr;
 }
diff --git a/sysdeps/linux-gnu/i386/regs.c b/sysdeps/linux-gnu/i386/regs.c
index 6777f17..a1584ac 100644
--- a/sysdeps/linux-gnu/i386/regs.c
+++ b/sysdeps/linux-gnu/i386/regs.c
@@ -4,7 +4,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
-#include "common.h"
+#include "proc.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
 # define PTRACE_PEEKUSER PTRACE_PEEKUSR
diff --git a/sysdeps/linux-gnu/i386/trace.c b/sysdeps/linux-gnu/i386/trace.c
index 76f1105..f0c1e50 100644
--- a/sysdeps/linux-gnu/i386/trace.c
+++ b/sysdeps/linux-gnu/i386/trace.c
@@ -1,12 +1,14 @@
 #include "config.h"
 
-#include <stdlib.h>
+#include <sys/ptrace.h>
 #include <sys/types.h>
 #include <sys/wait.h>
-#include <signal.h>
-#include <sys/ptrace.h>
 #include <asm/ptrace.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
 
+#include "proc.h"
 #include "common.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
@@ -24,20 +26,32 @@ get_arch_dep(Process *proc) {
 /* Returns 1 if syscall, 2 if sysret, 0 otherwise.
  */
 int
-syscall_p(Process *proc, int status, int *sysnum) {
+syscall_p(struct Process *proc, int status, int *sysnum)
+{
 	if (WIFSTOPPED(status)
 	    && WSTOPSIG(status) == (SIGTRAP | proc->tracesysgood)) {
+		struct callstack_element *elem = NULL;
+		if (proc->callstack_depth > 0)
+			elem = proc->callstack + proc->callstack_depth - 1;
+
 		*sysnum = ptrace(PTRACE_PEEKUSER, proc->pid, 4 * ORIG_EAX, 0);
+		if (*sysnum == -1) {
+			if (errno)
+				return -1;
+			/* Otherwise, ORIG_EAX == -1 means that the
+			 * system call should not be restarted.  In
+			 * that case rely on what we have on
+			 * stack.  */
+			if (elem != NULL && elem->is_syscall)
+				*sysnum = elem->c_un.syscall;
+		}
 
-		if (proc->callstack_depth > 0 &&
-				proc->callstack[proc->callstack_depth - 1].is_syscall &&
-				proc->callstack[proc->callstack_depth - 1].c_un.syscall == *sysnum) {
+		if (elem != NULL && elem->is_syscall
+		    && elem->c_un.syscall == *sysnum)
 			return 2;
-		}
 
-		if (*sysnum >= 0) {
+		if (*sysnum >= 0)
 			return 1;
-		}
 	}
 	return 0;
 }
diff --git a/sysdeps/linux-gnu/ia64/plt.c b/sysdeps/linux-gnu/ia64/plt.c
index 7fd451b..323df65 100644
--- a/sysdeps/linux-gnu/ia64/plt.c
+++ b/sysdeps/linux-gnu/ia64/plt.c
@@ -1,4 +1,5 @@
 #include <gelf.h>
+#include "proc.h"
 #include "common.h"
 
 /* A bundle is 128 bits */
diff --git a/sysdeps/linux-gnu/ia64/regs.c b/sysdeps/linux-gnu/ia64/regs.c
index 3f5d951..64c0164 100644
--- a/sysdeps/linux-gnu/ia64/regs.c
+++ b/sysdeps/linux-gnu/ia64/regs.c
@@ -8,6 +8,7 @@
 #include <asm/rse.h>
 
 #include <stddef.h>
+#include "proc.h"
 #include "common.h"
 
 void *
diff --git a/sysdeps/linux-gnu/ia64/trace.c b/sysdeps/linux-gnu/ia64/trace.c
index 079ed55..385fac1 100644
--- a/sysdeps/linux-gnu/ia64/trace.c
+++ b/sysdeps/linux-gnu/ia64/trace.c
@@ -11,6 +11,7 @@
 #include <asm/rse.h>
 #include <errno.h>
 
+#include "proc.h"
 #include "common.h"
 
 /* What we think of as a bundle, ptrace thinks of it as two unsigned
diff --git a/sysdeps/linux-gnu/m68k/plt.c b/sysdeps/linux-gnu/m68k/plt.c
index 508d7fc..a1c2604 100644
--- a/sysdeps/linux-gnu/m68k/plt.c
+++ b/sysdeps/linux-gnu/m68k/plt.c
@@ -1,4 +1,5 @@
 #include <gelf.h>
+#include "proc.h"
 #include "common.h"
 
 GElf_Addr
diff --git a/sysdeps/linux-gnu/m68k/regs.c b/sysdeps/linux-gnu/m68k/regs.c
index 959a60e..1542b5a 100644
--- a/sysdeps/linux-gnu/m68k/regs.c
+++ b/sysdeps/linux-gnu/m68k/regs.c
@@ -4,6 +4,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
+#include "proc.h"
 #include "common.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
diff --git a/sysdeps/linux-gnu/m68k/trace.c b/sysdeps/linux-gnu/m68k/trace.c
index 2f89fdf..c63702d 100644
--- a/sysdeps/linux-gnu/m68k/trace.c
+++ b/sysdeps/linux-gnu/m68k/trace.c
@@ -6,6 +6,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
+#include "proc.h"
 #include "common.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
diff --git a/sysdeps/linux-gnu/mipsel/arch.h b/sysdeps/linux-gnu/mipsel/arch.h
index dd0ca35..f7e2316 100644
--- a/sysdeps/linux-gnu/mipsel/arch.h
+++ b/sysdeps/linux-gnu/mipsel/arch.h
@@ -1,3 +1,8 @@
+#ifndef LTRACE_MIPS_ARCH_H
+#define LTRACE_MIPS_ARCH_H
+
+#include <stddef.h>
+
 #define BREAKPOINT_VALUE { 0x0d, 0x00, 0x00, 0x00 }
 #define BREAKPOINT_LENGTH 4
 #define DECR_PC_AFTER_BREAK 0
@@ -7,3 +12,12 @@
 
 #define PLTs_INIT_BY_HERE "_start"
 #define E_ENTRY_NAME    "_start"
+
+#define ARCH_HAVE_LTELF_DATA
+struct arch_ltelf_data {
+	size_t pltgot_addr;
+	size_t mips_local_gotno;
+	size_t mips_gotsym;
+};
+
+#endif /* LTRACE_MIPS_ARCH_H */
diff --git a/sysdeps/linux-gnu/mipsel/plt.c b/sysdeps/linux-gnu/mipsel/plt.c
index 57dfb9a..3ffaddf 100644
--- a/sysdeps/linux-gnu/mipsel/plt.c
+++ b/sysdeps/linux-gnu/mipsel/plt.c
@@ -1,6 +1,8 @@
 #include "debug.h"
 #include <gelf.h>
 #include <sys/ptrace.h>
+#include <error.h>
+#include "proc.h"
 #include "common.h"
 
 /**
@@ -69,4 +71,62 @@ sym2addr(Process *proc, struct library_symbol *sym) {
     return (void *)ret;;
 }
 
+/**
+  MIPS ABI Supplement:
+
+  DT_PLTGOT This member holds the address of the .got section.
+
+  DT_MIPS_SYMTABNO This member holds the number of entries in the
+  .dynsym section.
+
+  DT_MIPS_LOCAL_GOTNO This member holds the number of local global
+  offset table entries.
+
+  DT_MIPS_GOTSYM This member holds the index of the first dyamic
+  symbol table entry that corresponds to an entry in the gobal offset
+  table.
+
+ */
+int
+arch_elf_init(struct ltelf *lte)
+{
+	Elf_Scn *scn;
+	GElf_Shdr shdr;
+	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
+	    || scn == NULL) {
+	fail:
+		error(0, 0, "Couldn't get SHT_DYNAMIC: %s",
+		      elf_errmsg(-1));
+		return -1;
+	}
+
+	Elf_Data *data = elf_loaddata(scn, &shdr);
+	if (data == NULL)
+		goto fail;
+
+	size_t j;
+	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
+		GElf_Dyn dyn;
+		if (gelf_getdyn(data, j, &dyn) == NULL)
+			goto fail;
+
+		if(dyn.d_tag == DT_PLTGOT) {
+			lte->arch.pltgot_addr = dyn.d_un.d_ptr;
+		}
+		if(dyn.d_tag == DT_MIPS_LOCAL_GOTNO){
+			lte->arch.mips_local_gotno = dyn.d_un.d_val;
+		}
+		if(dyn.d_tag == DT_MIPS_GOTSYM){
+			lte->arch.mips_gotsym = dyn.d_un.d_val;
+		}
+	}
+
+	return 0;
+}
+
+void
+arch_elf_destroy(struct ltelf *lte)
+{
+}
+
 /**@}*/
diff --git a/sysdeps/linux-gnu/mipsel/regs.c b/sysdeps/linux-gnu/mipsel/regs.c
index badbb10..a8a9b10 100644
--- a/sysdeps/linux-gnu/mipsel/regs.c
+++ b/sysdeps/linux-gnu/mipsel/regs.c
@@ -5,6 +5,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
+#include "proc.h"
 #include "common.h"
 #include "mipsel.h"
 
diff --git a/sysdeps/linux-gnu/mipsel/trace.c b/sysdeps/linux-gnu/mipsel/trace.c
index 6553967..4b999e4 100644
--- a/sysdeps/linux-gnu/mipsel/trace.c
+++ b/sysdeps/linux-gnu/mipsel/trace.c
@@ -6,6 +6,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 #include "debug.h"
+#include "proc.h"
 #include "common.h"
 #include "mipsel.h"
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
diff --git a/sysdeps/linux-gnu/ppc/arch.h b/sysdeps/linux-gnu/ppc/arch.h
index 64c1821..6e258e7 100644
--- a/sysdeps/linux-gnu/ppc/arch.h
+++ b/sysdeps/linux-gnu/ppc/arch.h
@@ -1,3 +1,8 @@
+#ifndef LTRACE_PPC_ARCH_H
+#define LTRACE_PPC_ARCH_H
+
+#include <gelf.h>
+
 #define BREAKPOINT_VALUE { 0x7f, 0xe0, 0x00, 0x08 }
 #define BREAKPOINT_LENGTH 4
 #define DECR_PC_AFTER_BREAK 0
@@ -13,13 +18,68 @@
 
 #define PLT_REINITALISATION_BP    "_start"
 
-/* Start of arch-specific functions.  */
 #define ARCH_ENDIAN_BIG
 #define ARCH_HAVE_ATOMIC_SINGLESTEP
+#define ARCH_HAVE_ADD_PLT_ENTRY
+#define ARCH_HAVE_TRANSLATE_ADDRESS
+#define ARCH_HAVE_DYNLINK_DONE
 
-#define PPC_NOP { 0x60, 0x00, 0x00, 0x00 }
-#define PPC_NOP_LENGTH 4
+struct library_symbol;
 
-#if (PPC_NOP_LENGTH != BREAKPOINT_LENGTH)
-#error "Length of the breakpoint value not equal to the length of a nop instruction"
-#endif
+#define ARCH_HAVE_LTELF_DATA
+struct arch_ltelf_data {
+	GElf_Addr plt_stub_vma;
+	struct library_symbol *stubs;
+	Elf_Data *opd_data;
+	GElf_Addr opd_base;
+	GElf_Xword opd_size;
+	int secure_plt;
+};
+
+#define ARCH_HAVE_LIBRARY_DATA
+struct arch_library_data {
+	int bss_plt_prelinked;
+};
+
+enum ppc64_plt_type {
+	/* Either a non-PLT symbol, or PPC32 symbol.  */
+	PPC_DEFAULT = 0,
+
+	/* PPC64 STUB, never resolved.  */
+	PPC64_PLT_STUB,
+
+	/* Unresolved PLT symbol (.plt contains PLT address).  */
+	PPC_PLT_UNRESOLVED,
+
+	/* Resolved PLT symbol.  The corresponding .plt slot contained
+	 * target address, which was changed to the address of
+	 * corresponding PLT entry.  The original is now saved in
+	 * RESOLVED_VALUE.  */
+	PPC_PLT_RESOLVED,
+};
+
+#define ARCH_HAVE_LIBRARY_SYMBOL_DATA
+struct arch_library_symbol_data {
+	enum ppc64_plt_type type;
+	GElf_Addr resolved_value;
+
+	/* Address of corresponding slot in .plt.  */
+	GElf_Addr plt_slot_addr;
+};
+
+#define ARCH_HAVE_BREAKPOINT_DATA
+struct arch_breakpoint_data {
+	/* We need this just for arch_breakpoint_init.  */
+};
+
+#define ARCH_HAVE_PROCESS_DATA
+struct arch_process_data {
+	/* Breakpoint that hits when the dynamic linker is about to
+	 * update a .plt slot.  NULL before that address is known.  */
+	struct breakpoint *dl_plt_update_bp;
+
+	/* PLT update breakpoint looks here for the handler.  */
+	struct process_stopping_handler *handler;
+};
+
+#endif /* LTRACE_PPC_ARCH_H */
diff --git a/sysdeps/linux-gnu/ppc/plt.c b/sysdeps/linux-gnu/ppc/plt.c
index 668f63d..3b6a25f 100644
--- a/sysdeps/linux-gnu/ppc/plt.c
+++ b/sysdeps/linux-gnu/ppc/plt.c
@@ -1,55 +1,986 @@
 #include <gelf.h>
 #include <sys/ptrace.h>
+#include <errno.h>
+#include <error.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <string.h>
+
+#include "proc.h"
 #include "common.h"
+#include "library.h"
+#include "breakpoint.h"
+#include "linux-gnu/trace.h"
+
+/* There are two PLT types on 32-bit PPC: old-style, BSS PLT, and
+ * new-style "secure" PLT.  We can tell one from the other by the
+ * flags on the .plt section.  If it's +X (executable), it's BSS PLT,
+ * otherwise it's secure.
+ *
+ * BSS PLT works the same way as most architectures: the .plt section
+ * contains trampolines and we put breakpoints to those.  If not
+ * prelinked, .plt contains zeroes, and dynamic linker fills in the
+ * initial set of trampolines, which means that we need to delay
+ * enabling breakpoints until after binary entry point is hit.
+ * Additionally, after first call, dynamic linker updates .plt with
+ * branch to resolved address.  That means that on first hit, we must
+ * do something similar to the PPC64 gambit described below.
+ *
+ * With secure PLT, the .plt section doesn't contain instructions but
+ * addresses.  The real PLT table is stored in .text.  Addresses of
+ * those PLT entries can be computed, and apart from the fact that
+ * they are in .text, they are ordinary PLT entries.
+ *
+ * 64-bit PPC is more involved.  Program linker creates for each
+ * library call a _stub_ symbol named xxxxxxxx.plt_call.<callee>
+ * (where xxxxxxxx is a hexadecimal number).  That stub does the call
+ * dispatch: it loads an address of a function to call from the
+ * section .plt, and branches.  PLT entries themselves are essentially
+ * a curried call to the resolver.  When the symbol is resolved, the
+ * resolver updates the value stored in .plt, and the next time
+ * around, the stub calls the library function directly.  So we make
+ * at most one trip (none if the binary is prelinked) through each PLT
+ * entry, and correspondingly that is useless as a breakpoint site.
+ *
+ * Note the three confusing terms: stubs (that play the role of PLT
+ * entries), PLT entries, .plt section.
+ *
+ * We first check symbol tables and see if we happen to have stub
+ * symbols available.  If yes we just put breakpoints to those, and
+ * treat them as usual breakpoints.  The only tricky part is realizing
+ * that there can be more than one breakpoint per symbol.
+ *
+ * The case that we don't have the stub symbols available is harder.
+ * The following scheme uses two kinds of PLT breakpoints: unresolved
+ * and resolved (to some address).  When the process starts (or when
+ * we attach), we distribute unresolved PLT breakpoints to the PLT
+ * entries (not stubs).  Then we look in .plt, and for each entry
+ * whose value is different than the corresponding PLT entry address,
+ * we assume it was already resolved, and convert the breakpoint to
+ * resolved.  We also rewrite the resolved value in .plt back to the
+ * PLT address.
+ *
+ * When a PLT entry hits a resolved breakpoint (which happens because
+ * we rewrite .plt with the original unresolved addresses), we move
+ * the instruction pointer to the corresponding address and continue
+ * the process as if nothing happened.
+ *
+ * When unresolved PLT entry is called for the first time, we need to
+ * catch the new value that the resolver will write to a .plt slot.
+ * We also need to prevent another thread from racing through and
+ * taking the branch without ltrace noticing.  So when unresolved PLT
+ * entry hits, we have to stop all threads.  We then single-step
+ * through the resolver, until the .plt slot changes.  When it does,
+ * we treat it the same way as above: convert the PLT breakpoint to
+ * resolved, and rewrite the .plt value back to PLT address.  We then
+ * start all threads again.
+ *
+ * As an optimization, we remember the address where the address was
+ * resolved, and put a breakpoint there.  The next time around (when
+ * the next PLT entry is to be resolved), instead of single-stepping
+ * through half the dynamic linker, we just let the thread run and hit
+ * this breakpoint.  When it hits, we know the PLT entry was resolved.
+ *
+ * XXX TODO If we have hardware watch point, we might put a read watch
+ * on .plt slot, and discover the offenders this way.  I don't know
+ * the details, but I assume at most a handful (like, one or two, if
+ * available at all) addresses may be watched at a time, and thus this
+ * would be used as an amendment of the above rather than full-on
+ * solution to PLT tracing on PPC.
+ */
+
+#define PPC_PLT_STUB_SIZE 16
+#define PPC64_PLT_STUB_SIZE 8 //xxx
+
+static inline int
+host_powerpc64()
+{
+#ifdef __powerpc64__
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+int
+read_target_4(struct Process *proc, target_address_t addr, uint32_t *lp)
+{
+	unsigned long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
+	if (l == -1UL && errno)
+		return -1;
+#ifdef __powerpc64__
+	l >>= 32;
+#endif
+	*lp = l;
+	return 0;
+}
+
+static int
+read_target_8(struct Process *proc, target_address_t addr, uint64_t *lp)
+{
+	unsigned long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
+	if (l == -1UL && errno)
+		return -1;
+	if (host_powerpc64()) {
+		*lp = l;
+	} else {
+		unsigned long l2 = ptrace(PTRACE_PEEKTEXT, proc->pid,
+					  addr + 4, 0);
+		if (l2 == -1UL && errno)
+			return -1;
+		*lp = ((uint64_t)l << 32) | l2;
+	}
+	return 0;
+}
+
+int
+read_target_long(struct Process *proc, target_address_t addr, uint64_t *lp)
+{
+	if (proc->e_machine == EM_PPC) {
+		uint32_t w;
+		int ret = read_target_4(proc, addr, &w);
+		if (ret >= 0)
+			*lp = (uint64_t)w;
+		return ret;
+	} else {
+		return read_target_8(proc, addr, lp);
+	}
+}
+
+static enum callback_status
+reenable_breakpoint(struct Process *proc, struct breakpoint *bp, void *data)
+{
+	/* We don't need to re-enable non-PLT breakpoints and
+	 * breakpoints that are not PPC32 BSS unprelinked.  */
+	if (bp->libsym == NULL
+	    || bp->libsym->plt_type == LS_TOPLT_NONE
+	    || bp->libsym->lib->arch.bss_plt_prelinked != 0)
+		return CBS_CONT;
+
+	debug(DEBUG_PROCESS, "pid=%d reenable_breakpoint %s",
+	      proc->pid, breakpoint_name(bp));
+
+	assert(proc->e_machine == EM_PPC);
+	uint64_t l;
+	if (read_target_8(proc, bp->addr, &l) < 0) {
+		error(0, errno, "couldn't read PLT value for %s(%p)",
+		      breakpoint_name(bp), bp->addr);
+		return CBS_CONT;
+	}
+
+	/* XXX double cast  */
+	bp->libsym->arch.plt_slot_addr = (GElf_Addr)(uintptr_t)bp->addr;
+
+	/* If necessary, re-enable the breakpoint if it was
+	 * overwritten by the dynamic linker.  */
+	union {
+		uint32_t insn;
+		char buf[4];
+	} u = { .buf = BREAKPOINT_VALUE };
+	if (l >> 32 == u.insn)
+		debug(DEBUG_PROCESS, "pid=%d, breakpoint still present"
+		      " at %p, avoiding reenable", proc->pid, bp->addr);
+	else
+		enable_breakpoint(proc, bp);
+
+	bp->libsym->arch.resolved_value = l;
+
+	return CBS_CONT;
+}
+
+void
+arch_dynlink_done(struct Process *proc)
+{
+	/* On PPC32, .plt of objects that use BSS PLT are overwritten
+	 * by the dynamic linker (unless that object was prelinked).
+	 * We need to re-enable breakpoints in those objects.  */
+	proc_each_breakpoint(proc, NULL, reenable_breakpoint, NULL);
+}
 
 GElf_Addr
-arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela * rela) {
-	return rela->r_offset;
+arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela *rela)
+{
+	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
+		assert(lte->arch.plt_stub_vma != 0);
+		return lte->arch.plt_stub_vma + PPC_PLT_STUB_SIZE * ndx;
+
+	} else if (lte->ehdr.e_machine == EM_PPC) {
+		return rela->r_offset;
+
+	} else {
+		/* If we get here, we don't have stub symbols.  In
+		 * that case we put brakpoints to PLT entries the same
+		 * as the PPC32 secure PLT case does.  */
+		assert(lte->arch.plt_stub_vma != 0);
+		return lte->arch.plt_stub_vma + PPC64_PLT_STUB_SIZE * ndx;
+	}
+}
+
+/* This entry point is called when ltelf is not available
+ * anymore--during runtime.  At that point we don't have to concern
+ * ourselves with bias, as the values in OPD have been resolved
+ * already.  */
+int
+arch_translate_address_dyn(struct Process *proc,
+			   target_address_t addr, target_address_t *ret)
+{
+	if (proc->e_machine == EM_PPC64) {
+		uint64_t value;
+		if (read_target_8(proc, addr, &value) < 0) {
+			error(0, errno, "dynamic .opd translation of %p", addr);
+			return -1;
+		}
+		*ret = (target_address_t)value;
+		return 0;
+	}
+
+	*ret = addr;
+	return 0;
+}
+
+int
+arch_translate_address(struct ltelf *lte,
+		       target_address_t addr, target_address_t *ret)
+{
+	if (lte->ehdr.e_machine == EM_PPC64) {
+		GElf_Xword offset = (GElf_Addr)addr - lte->arch.opd_base;
+		uint64_t value;
+		if (elf_read_u64(lte->arch.opd_data, offset, &value) < 0) {
+			error(0, 0, "static .opd translation of %p: %s", addr,
+			      elf_errmsg(-1));
+			return -1;
+		}
+		*ret = (target_address_t)(value + lte->bias);
+		return 0;
+	}
+
+	*ret = addr;
+	return 0;
+}
+
+static int
+load_opd_data(struct ltelf *lte, struct library *lib)
+{
+	Elf_Scn *sec;
+	GElf_Shdr shdr;
+	if (elf_get_section_named(lte, ".opd", &sec, &shdr) < 0) {
+	fail:
+		fprintf(stderr, "couldn't find .opd data\n");
+		return -1;
+	}
+
+	lte->arch.opd_data = elf_rawdata(sec, NULL);
+	if (lte->arch.opd_data == NULL)
+		goto fail;
+
+	lte->arch.opd_base = shdr.sh_addr + lte->bias;
+	lte->arch.opd_size = shdr.sh_size;
+
+	return 0;
 }
 
 void *
-sym2addr(Process *proc, struct library_symbol *sym) {
-	void *addr = sym->enter_addr;
-	long pt_ret;
+sym2addr(struct Process *proc, struct library_symbol *sym)
+{
+	return sym->enter_addr;
+}
+
+static GElf_Addr
+get_glink_vma(struct ltelf *lte, GElf_Addr ppcgot, Elf_Data *plt_data)
+{
+	Elf_Scn *ppcgot_sec = NULL;
+	GElf_Shdr ppcgot_shdr;
+	if (ppcgot != 0
+	    && elf_get_section_covering(lte, ppcgot,
+					&ppcgot_sec, &ppcgot_shdr) < 0)
+		error(0, 0, "DT_PPC_GOT=%#"PRIx64", but no such section found",
+		      ppcgot);
+
+	if (ppcgot_sec != NULL) {
+		Elf_Data *data = elf_loaddata(ppcgot_sec, &ppcgot_shdr);
+		if (data == NULL || data->d_size < 8 ) {
+			error(0, 0, "couldn't read GOT data");
+		} else {
+			// where PPCGOT begins in .got
+			size_t offset = ppcgot - ppcgot_shdr.sh_addr;
+			assert(offset % 4 == 0);
+			uint32_t glink_vma;
+			if (elf_read_u32(data, offset + 4, &glink_vma) < 0) {
+				error(0, 0, "couldn't read glink VMA address"
+				      " at %zd@GOT", offset);
+				return 0;
+			}
+			if (glink_vma != 0) {
+				debug(1, "PPC GOT glink_vma address: %#" PRIx32,
+				      glink_vma);
+				return (GElf_Addr)glink_vma;
+			}
+		}
+	}
+
+	if (plt_data != NULL) {
+		uint32_t glink_vma;
+		if (elf_read_u32(plt_data, 0, &glink_vma) < 0) {
+			error(0, 0, "couldn't read glink VMA address");
+			return 0;
+		}
+		debug(1, ".plt glink_vma address: %#" PRIx32, glink_vma);
+		return (GElf_Addr)glink_vma;
+	}
+
+	return 0;
+}
+
+static int
+load_dynamic_entry(struct ltelf *lte, int tag, GElf_Addr *valuep)
+{
+	Elf_Scn *scn;
+	GElf_Shdr shdr;
+	if (elf_get_section_type(lte, SHT_DYNAMIC, &scn, &shdr) < 0
+	    || scn == NULL) {
+	fail:
+		error(0, 0, "Couldn't get SHT_DYNAMIC: %s",
+		      elf_errmsg(-1));
+		return -1;
+	}
 
-	debug(3, 0);
+	Elf_Data *data = elf_loaddata(scn, &shdr);
+	if (data == NULL)
+		goto fail;
 
-	if (sym->plt_type != LS_TOPLT_POINT) {
-		return addr;
+	size_t j;
+	for (j = 0; j < shdr.sh_size / shdr.sh_entsize; ++j) {
+		GElf_Dyn dyn;
+		if (gelf_getdyn(data, j, &dyn) == NULL)
+			goto fail;
+
+		if(dyn.d_tag == tag) {
+			*valuep = dyn.d_un.d_ptr;
+			return 0;
+		}
 	}
 
-	if (proc->pid == 0) {
+	return -1;
+}
+
+static int
+load_ppcgot(struct ltelf *lte, GElf_Addr *ppcgotp)
+{
+	return load_dynamic_entry(lte, DT_PPC_GOT, ppcgotp);
+}
+
+static int
+load_ppc64_glink(struct ltelf *lte, GElf_Addr *glinkp)
+{
+	return load_dynamic_entry(lte, DT_PPC64_GLINK, glinkp);
+}
+
+static int
+nonzero_data(Elf_Data *data)
+{
+	/* We are not supposed to get here if there's no PLT.  */
+	assert(data != NULL);
+
+	unsigned char *buf = data->d_buf;
+	if (buf == NULL)
 		return 0;
+
+	size_t i;
+	for (i = 0; i < data->d_size; ++i)
+		if (buf[i] != 0)
+			return 1;
+	return 0;
+}
+
+int
+arch_elf_init(struct ltelf *lte, struct library *lib)
+{
+	if (lte->ehdr.e_machine == EM_PPC64
+	    && load_opd_data(lte, lib) < 0)
+		return -1;
+
+	lte->arch.secure_plt = !(lte->plt_flags & SHF_EXECINSTR);
+
+	/* For PPC32 BSS, it is important whether the binary was
+	 * prelinked.  If .plt section is NODATA, or if it contains
+	 * zeroes, then this library is not prelinked, and we need to
+	 * delay breakpoints.  */
+	if (lte->ehdr.e_machine == EM_PPC && !lte->arch.secure_plt)
+		lib->arch.bss_plt_prelinked = nonzero_data(lte->plt_data);
+	else
+		/* For cases where it's irrelevant, initialize the
+		 * value to something conspicuous.  */
+		lib->arch.bss_plt_prelinked = -1;
+
+	if (lte->ehdr.e_machine == EM_PPC && lte->arch.secure_plt) {
+		GElf_Addr ppcgot;
+		if (load_ppcgot(lte, &ppcgot) < 0) {
+			error(0, 0, "couldn't find DT_PPC_GOT");
+			return -1;
+		}
+		GElf_Addr glink_vma = get_glink_vma(lte, ppcgot, lte->plt_data);
+
+		assert (lte->relplt_size % 12 == 0);
+		size_t count = lte->relplt_size / 12; // size of RELA entry
+		lte->arch.plt_stub_vma = glink_vma
+			- (GElf_Addr)count * PPC_PLT_STUB_SIZE;
+		debug(1, "stub_vma is %#" PRIx64, lte->arch.plt_stub_vma);
+
+	} else if (lte->ehdr.e_machine == EM_PPC64) {
+		GElf_Addr glink_vma;
+		if (load_ppc64_glink(lte, &glink_vma) < 0) {
+			error(0, 0, "couldn't find DT_PPC64_GLINK");
+			return -1;
+		}
+
+		/* The first glink stub starts at offset 32.  */
+		lte->arch.plt_stub_vma = glink_vma + 32;
 	}
 
-	if (options.debug >= 3) {
-		xinfdump(proc->pid, (void *)(((long)addr-32)&0xfffffff0),
-			 sizeof(void*)*8);
+	/* On PPC64, look for stub symbols in symbol table.  These are
+	 * called: xxxxxxxx.plt_call.callee_name@version+addend.  */
+	if (lte->ehdr.e_machine == EM_PPC64
+	    && lte->symtab != NULL && lte->strtab != NULL) {
+
+		/* N.B. We can't simply skip the symbols that we fail
+		 * to read or malloc.  There may be more than one stub
+		 * per symbol name, and if we failed in one but
+		 * succeeded in another, the PLT enabling code would
+		 * have no way to tell that something is missing.  We
+		 * could work around that, of course, but it doesn't
+		 * seem worth the trouble.  So if anything fails, we
+		 * just pretend that we don't have stub symbols at
+		 * all, as if the binary is stripped.  */
+
+		size_t i;
+		for (i = 0; i < lte->symtab_count; ++i) {
+			GElf_Sym sym;
+			if (gelf_getsym(lte->symtab, i, &sym) == NULL) {
+				struct library_symbol *sym, *next;
+			fail:
+				for (sym = lte->arch.stubs; sym != NULL; ) {
+					next = sym->next;
+					library_symbol_destroy(sym);
+					free(sym);
+					sym = next;
+				}
+				lte->arch.stubs = NULL;
+				break;
+			}
+
+			const char *name = lte->strtab + sym.st_name;
+
+#define STUBN ".plt_call."
+			if ((name = strstr(name, STUBN)) == NULL)
+				continue;
+			name += sizeof(STUBN) - 1;
+#undef STUBN
+
+			size_t len;
+			const char *ver = strchr(name, '@');
+			if (ver != NULL) {
+				len = ver - name;
+
+			} else {
+				/* If there is "+" at all, check that
+				 * the symbol name ends in "+0".  */
+				const char *add = strrchr(name, '+');
+				if (add != NULL) {
+					assert(strcmp(add, "+0") == 0);
+					len = add - name;
+				} else {
+					len = strlen(name);
+				}
+			}
+
+			char *sym_name = strndup(name, len);
+			struct library_symbol *libsym = malloc(sizeof(*libsym));
+			if (sym_name == NULL || libsym == NULL) {
+			fail2:
+				free(sym_name);
+				free(libsym);
+				goto fail;
+			}
+
+			/* XXX The double cast should be removed when
+			 * target_address_t becomes integral type.  */
+			target_address_t addr = (target_address_t)
+				(uintptr_t)sym.st_value + lte->bias;
+			if (library_symbol_init(libsym, addr, sym_name, 1,
+						LS_TOPLT_EXEC) < 0)
+				goto fail2;
+			libsym->arch.type = PPC64_PLT_STUB;
+			libsym->next = lte->arch.stubs;
+			lte->arch.stubs = libsym;
+		}
+	}
+
+	return 0;
+}
+
+static int
+read_plt_slot_value(struct Process *proc, GElf_Addr addr, GElf_Addr *valp)
+{
+	/* On PPC64, we read from .plt, which contains 8 byte
+	 * addresses.  On PPC32 we read from .plt, which contains 4
+	 * byte instructions, but the PLT is two instructions, and
+	 * either can change.  */
+	uint64_t l;
+	/* XXX double cast.  */
+	if (read_target_8(proc, (target_address_t)(uintptr_t)addr, &l) < 0) {
+		error(0, errno, "ptrace .plt slot value @%#" PRIx64, addr);
+		return -1;
 	}
 
-	// On a PowerPC-64 system, a plt is three 64-bit words: the first is the
-	// 64-bit address of the routine.  Before the PLT has been initialized,
-	// this will be 0x0. In fact, the symbol table won't have the plt's
-	// address even.  Ater the PLT has been initialized, but before it has
-	// been resolved, the first word will be the address of the function in
-	// the dynamic linker that will reslove the PLT.  After the PLT is
-	// resolved, this will will be the address of the routine whose symbol
-	// is in the symbol table.
+	*valp = (GElf_Addr)l;
+	return 0;
+}
+
+static int
+unresolve_plt_slot(struct Process *proc, GElf_Addr addr, GElf_Addr value)
+{
+	/* We only modify plt_entry[0], which holds the resolved
+	 * address of the routine.  We keep the TOC and environment
+	 * pointers intact.  Hence the only adjustment that we need to
+	 * do is to IP.  */
+	if (ptrace(PTRACE_POKETEXT, proc->pid, addr, value) < 0) {
+		error(0, errno, "unresolve .plt slot");
+		return -1;
+	}
+	return 0;
+}
+
+static void
+mark_as_resolved(struct library_symbol *libsym, GElf_Addr value)
+{
+	libsym->arch.type = PPC_PLT_RESOLVED;
+	libsym->arch.resolved_value = value;
+}
 
-	// On a PowerPC-32 system, there are two types of PLTs: secure (new) and
-	// non-secure (old).  For the secure case, the PLT is simply a pointer
-	// and we can treat it much as we do for the PowerPC-64 case.  For the
-	// non-secure case, the PLT is executable code and we can put the
-	// break-point right in the PLT.
+enum plt_status
+arch_elf_add_plt_entry(struct Process *proc, struct ltelf *lte,
+		       const char *a_name, GElf_Rela *rela, size_t ndx,
+		       struct library_symbol **ret)
+{
+	if (lte->ehdr.e_machine == EM_PPC)
+		return plt_default;
 
-	pt_ret = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
+	/* PPC64.  If we have stubs, we return a chain of breakpoint
+	 * sites, one for each stub that corresponds to this PLT
+	 * entry.  */
+	struct library_symbol *chain = NULL;
+	struct library_symbol **symp;
+	for (symp = &lte->arch.stubs; *symp != NULL; ) {
+		struct library_symbol *sym = *symp;
+		if (strcmp(sym->name, a_name) != 0) {
+			symp = &(*symp)->next;
+			continue;
+		}
+
+		/* Re-chain the symbol from stubs to CHAIN.  */
+		*symp = sym->next;
+		sym->next = chain;
+		chain = sym;
+	}
+
+	if (chain != NULL) {
+		*ret = chain;
+		return plt_ok;
+	}
+
+	/* We don't have stub symbols.  Find corresponding .plt slot,
+	 * and check whether it contains the corresponding PLT address
+	 * (or 0 if the dynamic linker hasn't run yet).  N.B. we don't
+	 * want read this from ELF file, but from process image.  That
+	 * makes a difference if we are attaching to a running
+	 * process.  */
+
+	GElf_Addr plt_entry_addr = arch_plt_sym_val(lte, ndx, rela);
+	GElf_Addr plt_slot_addr = rela->r_offset;
+	assert(plt_slot_addr >= lte->plt_addr
+	       || plt_slot_addr < lte->plt_addr + lte->plt_size);
+
+	GElf_Addr plt_slot_value;
+	if (read_plt_slot_value(proc, plt_slot_addr, &plt_slot_value) < 0)
+		return plt_fail;
+
+	char *name = strdup(a_name);
+	struct library_symbol *libsym = malloc(sizeof(*libsym));
+	if (name == NULL || libsym == NULL) {
+		error(0, errno, "allocation for .plt slot");
+	fail:
+		free(name);
+		free(libsym);
+		return plt_fail;
+	}
+
+	/* XXX The double cast should be removed when
+	 * target_address_t becomes integral type.  */
+	if (library_symbol_init(libsym,
+				(target_address_t)(uintptr_t)plt_entry_addr,
+				name, 1, LS_TOPLT_EXEC) < 0)
+		goto fail;
+	libsym->arch.plt_slot_addr = plt_slot_addr;
+
+	if (plt_slot_value == plt_entry_addr || plt_slot_value == 0) {
+		libsym->arch.type = PPC_PLT_UNRESOLVED;
+		libsym->arch.resolved_value = plt_entry_addr;
 
-	if (proc->mask_32bit) {
-		// Assume big-endian.
-		addr = (void *)((pt_ret >> 32) & 0xffffffff);
 	} else {
-		addr = (void *)pt_ret;
+		/* Unresolve the .plt slot.  If the binary was
+		 * prelinked, this makes the code invalid, because in
+		 * case of prelinked binary, the dynamic linker
+		 * doesn't update .plt[0] and .plt[1] with addresses
+		 * of the resover.  But we don't care, we will never
+		 * need to enter the resolver.  That just means that
+		 * we have to un-un-resolve this back before we
+		 * detach.  */
+
+		if (unresolve_plt_slot(proc, plt_slot_addr, plt_entry_addr) < 0) {
+			library_symbol_destroy(libsym);
+			goto fail;
+		}
+		mark_as_resolved(libsym, plt_slot_value);
 	}
 
-	return addr;
+	*ret = libsym;
+	return plt_ok;
+}
+
+void
+arch_elf_destroy(struct ltelf *lte)
+{
+	struct library_symbol *sym;
+	for (sym = lte->arch.stubs; sym != NULL; ) {
+		struct library_symbol *next = sym->next;
+		library_symbol_destroy(sym);
+		free(sym);
+		sym = next;
+	}
+}
+
+static void
+dl_plt_update_bp_on_hit(struct breakpoint *bp, struct Process *proc)
+{
+	debug(DEBUG_PROCESS, "pid=%d dl_plt_update_bp_on_hit %s(%p)",
+	      proc->pid, breakpoint_name(bp), bp->addr);
+	struct process_stopping_handler *self = proc->arch.handler;
+	assert(self != NULL);
+
+	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
+	GElf_Addr value;
+	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
+		return;
+
+	/* On PPC64, we rewrite the slot value.  */
+	if (proc->e_machine == EM_PPC64)
+		unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
+				   libsym->arch.resolved_value);
+	/* We mark the breakpoint as resolved on both arches.  */
+	mark_as_resolved(libsym, value);
+
+	/* cb_on_all_stopped looks if HANDLER is set to NULL as a way
+	 * to check that this was run.  It's an error if it
+	 * wasn't.  */
+	proc->arch.handler = NULL;
+
+	breakpoint_turn_off(bp, proc);
+}
+
+static void
+cb_on_all_stopped(struct process_stopping_handler *self)
+{
+	/* Put that in for dl_plt_update_bp_on_hit to see.  */
+	assert(self->task_enabling_breakpoint->arch.handler == NULL);
+	self->task_enabling_breakpoint->arch.handler = self;
+
+	linux_ptrace_disable_and_continue(self);
+}
+
+static enum callback_status
+cb_keep_stepping_p(struct process_stopping_handler *self)
+{
+	struct Process *proc = self->task_enabling_breakpoint;
+	struct library_symbol *libsym = self->breakpoint_being_enabled->libsym;
+
+	GElf_Addr value;
+	if (read_plt_slot_value(proc, libsym->arch.plt_slot_addr, &value) < 0)
+		return CBS_FAIL;
+
+	/* In UNRESOLVED state, the RESOLVED_VALUE in fact contains
+	 * the PLT entry value.  */
+	if (value == libsym->arch.resolved_value)
+		return CBS_CONT;
+
+	debug(DEBUG_PROCESS, "pid=%d PLT got resolved to value %#"PRIx64,
+	      proc->pid, value);
+
+	/* The .plt slot got resolved!  We can migrate the breakpoint
+	 * to RESOLVED and stop single-stepping.  */
+	if (proc->e_machine == EM_PPC64
+	    && unresolve_plt_slot(proc, libsym->arch.plt_slot_addr,
+				  libsym->arch.resolved_value) < 0)
+		return CBS_FAIL;
+
+	/* Resolving on PPC64 consists of overwriting a doubleword in
+	 * .plt.  That doubleword is than read back by a stub, and
+	 * jumped on.  Hopefully we can assume that double word update
+	 * is done on a single place only, as it contains a final
+	 * address.  We still need to look around for any sync
+	 * instruction, but essentially it is safe to optimize away
+	 * the single stepping next time and install a post-update
+	 * breakpoint.
+	 *
+	 * The situation on PPC32 BSS is more complicated.  The
+	 * dynamic linker here updates potentially several
+	 * instructions (XXX currently we assume two) and the rules
+	 * are more complicated.  Sometimes it's enough to adjust just
+	 * one of the addresses--the logic for generating optimal
+	 * dispatch depends on relative addresses of the .plt entry
+	 * and the jump destination.  We can't assume that the some
+	 * instruction block does the update every time.  So on PPC32,
+	 * we turn the optimization off and just step through it each
+	 * time.  */
+	if (proc->e_machine == EM_PPC)
+		goto done;
+
+	/* Install breakpoint to the address where the change takes
+	 * place.  If we fail, then that just means that we'll have to
+	 * singlestep the next time around as well.  */
+	struct Process *leader = proc->leader;
+	if (leader == NULL || leader->arch.dl_plt_update_bp != NULL)
+		goto done;
+
+	/* We need to install to the next instruction.  ADDR points to
+	 * a store instruction, so moving the breakpoint one
+	 * instruction forward is safe.  */
+	target_address_t addr = get_instruction_pointer(proc) + 4;
+	leader->arch.dl_plt_update_bp = insert_breakpoint(proc, addr, NULL);
+	if (leader->arch.dl_plt_update_bp == NULL)
+		goto done;
+
+	static struct bp_callbacks dl_plt_update_cbs = {
+		.on_hit = dl_plt_update_bp_on_hit,
+	};
+	leader->arch.dl_plt_update_bp->cbs = &dl_plt_update_cbs;
+
+	/* Turn it off for now.  We will turn it on again when we hit
+	 * the PLT entry that needs this.  */
+	breakpoint_turn_off(leader->arch.dl_plt_update_bp, proc);
+
+done:
+	mark_as_resolved(libsym, value);
+
+	return CBS_STOP;
+}
+
+static void
+jump_to_entry_point(struct Process *proc, struct breakpoint *bp)
+{
+	/* XXX The double cast should be removed when
+	 * target_address_t becomes integral type.  */
+	target_address_t rv = (target_address_t)
+		(uintptr_t)bp->libsym->arch.resolved_value;
+	set_instruction_pointer(proc, rv);
+}
+
+static void
+ppc_plt_bp_continue(struct breakpoint *bp, struct Process *proc)
+{
+	switch (bp->libsym->arch.type) {
+		struct Process *leader;
+		void (*on_all_stopped)(struct process_stopping_handler *);
+		enum callback_status (*keep_stepping_p)
+			(struct process_stopping_handler *);
+
+	case PPC_DEFAULT:
+		assert(proc->e_machine == EM_PPC);
+		assert(bp->libsym != NULL);
+		assert(bp->libsym->lib->arch.bss_plt_prelinked == 0);
+		/* fall-through */
+
+	case PPC_PLT_UNRESOLVED:
+		on_all_stopped = NULL;
+		keep_stepping_p = NULL;
+		leader = proc->leader;
+
+		if (leader != NULL && leader->arch.dl_plt_update_bp != NULL
+		    && breakpoint_turn_on(leader->arch.dl_plt_update_bp,
+					  proc) >= 0)
+			on_all_stopped = cb_on_all_stopped;
+		else
+			keep_stepping_p = cb_keep_stepping_p;
+
+		if (process_install_stopping_handler
+		    (proc, bp, on_all_stopped, keep_stepping_p, NULL) < 0) {
+			error(0, 0, "ppc_plt_bp_continue: couldn't install"
+			      " event handler");
+			continue_after_breakpoint(proc, bp);
+		}
+		return;
+
+	case PPC_PLT_RESOLVED:
+		if (proc->e_machine == EM_PPC) {
+			continue_after_breakpoint(proc, bp);
+			return;
+		}
+
+		jump_to_entry_point(proc, bp);
+		continue_process(proc->pid);
+		return;
+
+	case PPC64_PLT_STUB:
+		/* These should never hit here.  */
+		break;
+	}
+
+	assert(bp->libsym->arch.type != bp->libsym->arch.type);
+	abort();
+}
+
+/* When a process is in a PLT stub, it may have already read the data
+ * in .plt that we changed.  If we detach now, it will jump to PLT
+ * entry and continue to the dynamic linker, where it will SIGSEGV,
+ * because zeroth .plt slot is not filled in prelinked binaries, and
+ * the dynamic linker needs that data.  Moreover, the process may
+ * actually have hit the breakpoint already.  This functions tries to
+ * detect both cases and do any fix-ups necessary to mend this
+ * situation.  */
+static enum callback_status
+detach_task_cb(struct Process *task, void *data)
+{
+	struct breakpoint *bp = data;
+
+	if (get_instruction_pointer(task) == bp->addr) {
+		debug(DEBUG_PROCESS, "%d at %p, which is PLT slot",
+		      task->pid, bp->addr);
+		jump_to_entry_point(task, bp);
+		return CBS_CONT;
+	}
+
+	/* XXX There's still a window of several instructions where we
+	 * might catch the task inside a stub such that it has already
+	 * read destination address from .plt, but hasn't jumped yet,
+	 * thus avoiding the breakpoint.  */
+
+	return CBS_CONT;
+}
+
+static void
+ppc_plt_bp_retract(struct breakpoint *bp, struct Process *proc)
+{
+	/* On PPC64, we rewrite .plt with PLT entry addresses.  This
+	 * needs to be undone.  Unfortunately, the program may have
+	 * made decisions based on that value */
+	if (proc->e_machine == EM_PPC64
+	    && bp->libsym != NULL
+	    && bp->libsym->arch.type == PPC_PLT_RESOLVED) {
+		each_task(proc->leader, NULL, detach_task_cb, bp);
+		unresolve_plt_slot(proc, bp->libsym->arch.plt_slot_addr,
+				   bp->libsym->arch.resolved_value);
+	}
+}
+
+void
+arch_library_init(struct library *lib)
+{
+}
+
+void
+arch_library_destroy(struct library *lib)
+{
+}
+
+void
+arch_library_clone(struct library *retp, struct library *lib)
+{
+}
+
+int
+arch_library_symbol_init(struct library_symbol *libsym)
+{
+	/* We set type explicitly in the code above, where we have the
+	 * necessary context.  This is for calls from ltrace-elf.c and
+	 * such.  */
+	libsym->arch.type = PPC_DEFAULT;
+	return 0;
+}
+
+void
+arch_library_symbol_destroy(struct library_symbol *libsym)
+{
+}
+
+int
+arch_library_symbol_clone(struct library_symbol *retp,
+			  struct library_symbol *libsym)
+{
+	retp->arch = libsym->arch;
+	return 0;
+}
+
+/* For some symbol types, we need to set up custom callbacks.  XXX we
+ * don't need PROC here, we can store the data in BP if it is of
+ * interest to us.  */
+int
+arch_breakpoint_init(struct Process *proc, struct breakpoint *bp)
+{
+	/* Artificial and entry-point breakpoints are plain.  */
+	if (bp->libsym == NULL || bp->libsym->plt_type != LS_TOPLT_EXEC)
+		return 0;
+
+	/* On PPC, secure PLT and prelinked BSS PLT are plain.  */
+	if (proc->e_machine == EM_PPC
+	    && bp->libsym->lib->arch.bss_plt_prelinked != 0)
+		return 0;
+
+	/* On PPC64, stub PLT breakpoints are plain.  */
+	if (proc->e_machine == EM_PPC64
+	    && bp->libsym->arch.type == PPC64_PLT_STUB)
+		return 0;
+
+	static struct bp_callbacks cbs = {
+		.on_continue = ppc_plt_bp_continue,
+		.on_retract = ppc_plt_bp_retract,
+	};
+	breakpoint_set_callbacks(bp, &cbs);
+	return 0;
+}
+
+void
+arch_breakpoint_destroy(struct breakpoint *bp)
+{
+}
+
+int
+arch_breakpoint_clone(struct breakpoint *retp, struct breakpoint *sbp)
+{
+	retp->arch = sbp->arch;
+	return 0;
+}
+
+int
+arch_process_init(struct Process *proc)
+{
+	proc->arch.dl_plt_update_bp = NULL;
+	proc->arch.handler = NULL;
+	return 0;
+}
+
+void
+arch_process_destroy(struct Process *proc)
+{
+}
+
+int
+arch_process_clone(struct Process *retp, struct Process *proc)
+{
+	retp->arch = proc->arch;
+	return 0;
+}
+
+int
+arch_process_exec(struct Process *proc)
+{
+	return arch_process_init(proc);
 }
diff --git a/sysdeps/linux-gnu/ppc/regs.c b/sysdeps/linux-gnu/ppc/regs.c
index eca58ff..26e962f 100644
--- a/sysdeps/linux-gnu/ppc/regs.c
+++ b/sysdeps/linux-gnu/ppc/regs.c
@@ -3,7 +3,10 @@
 #include <sys/types.h>
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
+#include <errno.h>
+#include <error.h>
 
+#include "proc.h"
 #include "common.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
@@ -20,8 +23,10 @@ get_instruction_pointer(Process *proc) {
 }
 
 void
-set_instruction_pointer(Process *proc, void *addr) {
-	ptrace(PTRACE_POKEUSER, proc->pid, sizeof(long)*PT_NIP, addr);
+set_instruction_pointer(Process *proc, void *addr)
+{
+	if (ptrace(PTRACE_POKEUSER, proc->pid, sizeof(long)*PT_NIP, addr) != 0)
+		error(0, errno, "set_instruction_pointer");
 }
 
 void *
diff --git a/sysdeps/linux-gnu/ppc/syscallent.h b/sysdeps/linux-gnu/ppc/syscallent.h
index 5ce5739..7537b3d 100644
--- a/sysdeps/linux-gnu/ppc/syscallent.h
+++ b/sysdeps/linux-gnu/ppc/syscallent.h
@@ -1,4 +1,4 @@
-"0",				/* 0 */
+    "restart_syscall",	        /* 0 */
     "exit",			/* 1 */
     "fork",			/* 2 */
     "read",			/* 3 */
@@ -177,8 +177,8 @@
     "rt_sigtimedwait",		/* 176 */
     "rt_sigqueueinfo",		/* 177 */
     "rt_sigsuspend",		/* 178 */
-    "pread",			/* 179 */
-    "pwrite",			/* 180 */
+    "pread64",			/* 179 */
+    "pwrite64",			/* 180 */
     "chown",			/* 181 */
     "getcwd",			/* 182 */
     "capget",			/* 183 */
@@ -254,6 +254,12 @@
     "fstatfs64",		/* 253 */
     "fadvise64_64",		/* 254 */
     "rtas",			/* 255 */
+    "sys_debug_setcontext",	/* 256 */
+    "server",			/* 257 */
+    "migrate_pages",		/* 258 */
+    "mbind",			/* 259 */
+    "get_mempolicy",		/* 260 */
+    "set_mempolicy",		/* 261 */
     "mq_open",			/* 262 */
     "mq_unlink",		/* 263 */
     "mq_timedsend",		/* 264 */
@@ -270,3 +276,78 @@
     "inotify_init",		/* 275 */
     "inotify_add_watch",	/* 276 */
     "inotify_rm_watch",		/* 277 */
+    "spu_run",			/* 278 */
+    "spu_create",		/* 279 */
+    "pselect6",			/* 280 */
+    "ppoll",			/* 281 */
+    "unshare",			/* 282 */
+    "splice",			/* 283 */
+    "tee",			/* 284 */
+    "vmsplice",			/* 285 */
+    "openat",			/* 286 */
+    "mkdirat",			/* 287 */
+    "mknodat",			/* 288 */
+    "fchownat",			/* 289 */
+    "futimesat",		/* 290 */
+    "fstatat64",		/* 291 */
+    "unlinkat",			/* 292 */
+    "renameat",			/* 293 */
+    "linkat",			/* 294 */
+    "symlinkat",		/* 295 */
+    "readlinkat",		/* 296 */
+    "fchmodat",			/* 297 */
+    "faccessat",		/* 298 */
+    "get_robust_list",		/* 299 */
+    "set_robust_list",		/* 300 */
+    "move_pages",		/* 301 */
+    "getcpu",			/* 302 */
+    "epoll_pwait",		/* 303 */
+    "utimensat",		/* 304 */
+    "signalfd",			/* 305 */
+    "timerfd_create",		/* 306 */
+    "eventfd",			/* 307 */
+    "sync_file_range2",		/* 308 */
+    "fallocate",		/* 309 */
+    "subpage_prot",		/* 310 */
+    "timerfd_settime",		/* 311 */
+    "timerfd_gettime",		/* 312 */
+    "signalfd4",		/* 313 */
+    "eventfd2",			/* 314 */
+    "epoll_create1",		/* 315 */
+    "dup3",			/* 316 */
+    "pipe2",			/* 317 */
+    "inotify_init1",		/* 318 */
+    "perf_event_open",		/* 319 */
+    "preadv",			/* 320 */
+    "pwritev",			/* 321 */
+    "rt_tgsigqueueinfo",	/* 322 */
+    "fanotify_init",		/* 323 */
+    "fanotify_mark",		/* 324 */
+    "prlimit64",		/* 325 */
+    "socket",			/* 326 */
+    "bind",			/* 327 */
+    "connect",			/* 328 */
+    "listen",			/* 329 */
+    "accept",			/* 330 */
+    "getsockname",		/* 331 */
+    "getpeername",		/* 332 */
+    "socketpair",		/* 333 */
+    "send",			/* 334 */
+    "sendto",			/* 335 */
+    "recv",			/* 336 */
+    "recvfrom",			/* 337 */
+    "shutdown",			/* 338 */
+    "setsockopt",		/* 339 */
+    "getsockopt",		/* 340 */
+    "sendmsg",			/* 341 */
+    "recvmsg",			/* 342 */
+    "recvmmsg",			/* 343 */
+    "accept4",			/* 344 */
+    "name_to_handle_at",	/* 345 */
+    "open_by_handle_at",	/* 346 */
+    "clock_adjtime",		/* 347 */
+    "syncfs",			/* 348 */
+    "sendmmsg",			/* 349 */
+    "setns",			/* 350 */
+    "process_vm_readv",		/* 351 */
+    "process_writev",		/* 352 */
diff --git a/sysdeps/linux-gnu/ppc/trace.c b/sysdeps/linux-gnu/ppc/trace.c
index 05993de..742785a 100644
--- a/sysdeps/linux-gnu/ppc/trace.c
+++ b/sysdeps/linux-gnu/ppc/trace.c
@@ -9,6 +9,7 @@
 #include <errno.h>
 #include <string.h>
 
+#include "proc.h"
 #include "common.h"
 #include "ptrace.h"
 #include "breakpoint.h"
@@ -209,30 +211,46 @@ arch_umovelong (Process *proc, void *addr, long *result, arg_type_info *info) {
 #define STWCX_INSTRUCTION 0x7c00012d
 #define STDCX_INSTRUCTION 0x7c0001ad
 #define BC_MASK 0xfc000000
-#define BC_INSTRUCTION 0x40000000
+#define BC_INSN 0x40000000
+#define BRANCH_MASK 0xfc000000
+
+/* In plt.h.  XXX make this official interface.  */
+int read_target_4(struct Process *proc, target_address_t addr, uint32_t *lp);
 
 int
 arch_atomic_singlestep(struct Process *proc, struct breakpoint *sbp,
 		       int (*add_cb)(void *addr, void *data),
 		       void *add_cb_data)
 {
-	void *addr = sbp->addr;
-	debug(1, "pid=%d addr=%p", proc->pid, addr);
+	target_address_t ip = get_instruction_pointer(proc);
+	struct breakpoint *other = address2bpstruct(proc->leader, ip);
+
+	debug(1, "arch_atomic_singlestep pid=%d addr=%p %s(%p)",
+	      proc->pid, ip, breakpoint_name(sbp), sbp->addr);
 
 	/* If the original instruction was lwarx/ldarx, we can't
 	 * single-step over it, instead we have to execute the whole
 	 * atomic block at once.  */
 	union {
 		uint32_t insn;
-		char buf[4];
+		char buf[BREAKPOINT_LENGTH];
 	} u;
-	memcpy(u.buf, sbp->orig_value, BREAKPOINT_LENGTH);
+	if (other != NULL) {
+		memcpy(u.buf, sbp->orig_value, BREAKPOINT_LENGTH);
+	} else if (read_target_4(proc, ip, &u.insn) < 0) {
+		fprintf(stderr, "couldn't read instruction at IP %p\n", ip);
+		/* Do the normal singlestep.  */
+		return 1;
+	}
 
 	if ((u.insn & LWARX_MASK) != LWARX_INSTRUCTION
 	    && (u.insn & LWARX_MASK) != LDARX_INSTRUCTION)
 		return 1;
 
+	debug(1, "singlestep over atomic block at %p", ip);
+
 	int insn_count;
+	target_address_t addr = ip;
 	for (insn_count = 0; ; ++insn_count) {
 		addr += 4;
 		unsigned long l = ptrace(PTRACE_PEEKTEXT, proc->pid, addr, 0);
@@ -245,27 +263,39 @@ arch_atomic_singlestep(struct Process *proc, Breakpoint *sbp,
 		insn = l;
 #endif
 
-		/* If we hit a branch instruction, give up.  The
-		 * computation could escape that way and we'd have to
-		 * treat that case specially.  */
-		if ((insn & BC_MASK) == BC_INSTRUCTION) {
-			debug(1, "pid=%d, found branch at %p, giving up",
-			      proc->pid, addr);
-			return -1;
+		/* If a conditional branch is found, put a breakpoint
+		 * in its destination address.  */
+		if ((insn & BRANCH_MASK) == BC_INSN) {
+			int immediate = ((insn & 0xfffc) ^ 0x8000) - 0x8000;
+			int absolute = insn & 2;
+
+			/* XXX drop the following casts.  */
+			target_address_t branch_addr;
+			if (absolute)
+				branch_addr = (void *)(uintptr_t)immediate;
+			else
+				branch_addr = addr + (uintptr_t)immediate;
+
+			debug(1, "pid=%d, branch in atomic block from %p to %p",
+			      proc->pid, addr, branch_addr);
+			if (add_cb(branch_addr, add_cb_data) < 0)
+				return -1;
 		}
 
+		/* Assume that the atomic sequence ends with a
+		 * stwcx/stdcx instruction.  */
 		if ((insn & STWCX_MASK) == STWCX_INSTRUCTION
 		    || (insn & STWCX_MASK) == STDCX_INSTRUCTION) {
-			debug(1, "pid=%d, found end of atomic block at %p",
-			      proc->pid, addr);
+			debug(1, "pid=%d, found end of atomic block %p at %p",
+			      proc->pid, ip, addr);
 			break;
 		}
 
 		/* Arbitrary cut-off.  If we didn't find the
 		 * terminating instruction by now, just give up.  */
 		if (insn_count > 16) {
-			debug(1, "pid=%d, couldn't find end of atomic block",
-			      proc->pid);
+			fprintf(stderr, "[%d] couldn't find end of atomic block"
+				" at %p\n", proc->pid, ip);
 			return -1;
 		}
 	}
diff --git a/sysdeps/linux-gnu/proc.c b/sysdeps/linux-gnu/proc.c
index 3350117..b5123fe 100644
--- a/sysdeps/linux-gnu/proc.c
+++ b/sysdeps/linux-gnu/proc.c
@@ -1,23 +1,24 @@
 #define _GNU_SOURCE /* For getline.  */
 #include "config.h"
 
-#include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>
 #include <link.h>
+#include <signal.h>
 #include <stdio.h>
 #include <string.h>
-#include <signal.h>
 #include <unistd.h>
-#include <dirent.h>
-#include <ctype.h>
-#include <errno.h>
-#include <sys/syscall.h>
-#include <error.h>
 
 #include "common.h"
 #include "breakpoint.h"
+#include "proc.h"
+#include "library.h"
 
 /* /proc/pid doesn't exist just after the fork, and sometimes `ltrace'
  * couldn't open it to find the executable.  So it may be necessary to
@@ -77,27 +78,28 @@ find_line_starting(FILE * file, const char * prefix, size_t len)
 }
 
 static void
-each_line_starting(FILE * file, const char *prefix,
-		   enum pcb_status (*cb)(const char * line, const char * prefix,
-					 void * data),
-		   void * data)
+each_line_starting(FILE *file, const char *prefix,
+		   enum callback_status (*cb)(const char *line,
+					      const char *prefix,
+					      void *data),
+		   void *data)
 {
 	size_t len = strlen(prefix);
 	char * line;
 	while ((line = find_line_starting(file, prefix, len)) != NULL) {
-		enum pcb_status st = (*cb)(line, prefix, data);
+		enum callback_status st = (*cb)(line, prefix, data);
 		free (line);
-		if (st == pcb_stop)
+		if (st == CBS_STOP)
 			return;
 	}
 }
 
-static enum pcb_status
-process_leader_cb(const char * line, const char * prefix, void * data)
+static enum callback_status
+process_leader_cb(const char *line, const char *prefix, void *data)
 {
 	pid_t * pidp = data;
 	*pidp = atoi(line + strlen(prefix));
-	return pcb_stop;
+	return CBS_STOP;
 }
 
 pid_t
@@ -113,13 +115,13 @@ process_leader(pid_t pid)
 	return tgid;
 }
 
-static enum pcb_status
-process_stopped_cb(const char * line, const char * prefix, void * data)
+static enum callback_status
+process_stopped_cb(const char *line, const char *prefix, void *data)
 {
 	char c = line[strlen(prefix)];
 	// t:tracing stop, T:job control stop
 	*(int *)data = (c == 't' || c == 'T');
-	return pcb_stop;
+	return CBS_STOP;
 }
 
 int
@@ -135,15 +137,15 @@ process_stopped(pid_t pid)
 	return is_stopped;
 }
 
-static enum pcb_status
-process_status_cb(const char * line, const char * prefix, void * data)
+static enum callback_status
+process_status_cb(const char *line, const char *prefix, void *data)
 {
 	const char * status = line + strlen(prefix);
 	const char c = *status;
 
 #define RETURN(C) do {					\
 		*(enum process_status *)data = C;	\
-		return pcb_stop;			\
+		return CBS_STOP;			\
 	} while (0)
 
 	switch (c) {
@@ -179,7 +181,8 @@ process_status(pid_t pid)
 		each_line_starting(file, "State:\t", &process_status_cb, &ret);
 		fclose(file);
 		if (ret == ps_invalid)
-			error(0, errno, "process_status %d", pid);
+			fprintf(stderr, "process_status %d: %s", pid,
+				strerror(errno));
 	} else
 		/* If the file is not present, the process presumably
 		 * exited already.  */
@@ -242,51 +245,178 @@ process_tasks(pid_t pid, pid_t **ret_tasks, size_t *ret_n)
 	return 0;
 }
 
+/* On native 64-bit system, we need to be careful when handling cross
+ * tracing.  This select appropriate pointer depending on host and
+ * target architectures.  XXX Really we should abstract this into the
+ * ABI object, as theorized about somewhere on pmachata/revamp
+ * branch.  */
+static void *
+select_32_64(struct Process *proc, void *p32, void *p64)
+{
+	if (sizeof(long) == 4 || proc->mask_32bit)
+		return p32;
+	else
+		return p64;
+}
+
+static int
+fetch_dyn64(struct Process *proc, target_address_t *addr, Elf64_Dyn *ret)
+{
+	if (umovebytes(proc, *addr, ret, sizeof(*ret)) != sizeof(*ret))
+		return -1;
+	*addr += sizeof(*ret);
+	return 0;
+}
+
 static int
-find_dynamic_entry_addr(Process *proc, void *pvAddr, int d_tag, void **addr) {
-	int i = 0, done = 0;
-	ElfW(Dyn) entry;
+fetch_dyn32(struct Process *proc, target_address_t *addr, Elf64_Dyn *ret)
+{
+	Elf32_Dyn dyn;
+	if (umovebytes(proc, *addr, &dyn, sizeof(dyn)) != sizeof(dyn))
+		return -1;
+
+	*addr += sizeof(dyn);
+	ret->d_tag = dyn.d_tag;
+	ret->d_un.d_val = dyn.d_un.d_val;
+
+	return 0;
+}
 
+static int (*
+dyn_fetcher(struct Process *proc))(struct Process *,
+				   target_address_t *, Elf64_Dyn *)
+{
+	return select_32_64(proc, fetch_dyn32, fetch_dyn64);
+}
+
+static int
+find_dynamic_entry_addr(struct Process *proc, target_address_t src_addr,
+			int d_tag, target_address_t *ret)
+{
 	debug(DEBUG_FUNCTION, "find_dynamic_entry()");
 
-	if (addr ==	NULL || pvAddr == NULL || d_tag < 0 || d_tag > DT_NUM) {
+	if (ret == NULL || src_addr == 0 || d_tag < 0 || d_tag > DT_NUM)
 		return -1;
-	}
 
-	while ((!done) && (i < ELF_MAX_SEGMENTS) &&
-		(sizeof(entry) == umovebytes(proc, pvAddr, &entry, sizeof(entry))) &&
-		(entry.d_tag != DT_NULL)) {
+	int i = 0;
+	while (1) {
+		Elf64_Dyn entry;
+		if (dyn_fetcher(proc)(proc, &src_addr, &entry) < 0
+		    || entry.d_tag == DT_NULL
+		    || i++ > 100) { /* Arbitrary cut-off so that we
+				     * don't loop forever if the
+				     * binary is corrupted.  */
+			debug(2, "Couldn't find address for dtag!");
+			return -1;
+		}
+
 		if (entry.d_tag == d_tag) {
-			done = 1;
-			*addr = (void *)entry.d_un.d_val;
+			/* XXX The double cast should be removed when
+			 * target_address_t becomes integral type.  */
+			*ret = (target_address_t)(uintptr_t)entry.d_un.d_val;
+			debug(2, "found address: %p in dtag %d", *ret, d_tag);
+			return 0;
 		}
-		pvAddr += sizeof(entry);
-		i++;
 	}
+}
 
-	if (done) {
-		debug(2, "found address: 0x%p in dtag %d\n", *addr, d_tag);
-		return 0;
+/* Our own type for representing 32-bit linkmap.  We can't rely on the
+ * definition in link.h, because that's only accurate for our host
+ * architecture, not for target architecture (where the traced process
+ * runs). */
+#define LT_LINK_MAP(BITS)			\
+	{					\
+		Elf##BITS##_Addr l_addr;	\
+		Elf##BITS##_Addr l_name;	\
+		Elf##BITS##_Addr l_ld;		\
+		Elf##BITS##_Addr l_next;	\
+		Elf##BITS##_Addr l_prev;	\
 	}
-	else {
-		debug(2, "Couldn't address for dtag!\n");
+struct lt_link_map_32 LT_LINK_MAP(32);
+struct lt_link_map_64 LT_LINK_MAP(64);
+
+static int
+fetch_lm64(struct Process *proc, target_address_t addr,
+	   struct lt_link_map_64 *ret)
+{
+	if (umovebytes(proc, addr, ret, sizeof(*ret)) != sizeof(*ret))
+		return -1;
+	return 0;
+}
+
+static int
+fetch_lm32(struct Process *proc, target_address_t addr,
+	   struct lt_link_map_64 *ret)
+{
+	struct lt_link_map_32 lm;
+	if (umovebytes(proc, addr, &lm, sizeof(lm)) != sizeof(lm))
 		return -1;
+
+	ret->l_addr = lm.l_addr;
+	ret->l_name = lm.l_name;
+	ret->l_ld = lm.l_ld;
+	ret->l_next = lm.l_next;
+	ret->l_prev = lm.l_prev;
+
+	return 0;
+}
+
+static int (*
+lm_fetcher(struct Process *proc))(struct Process *,
+				  target_address_t, struct lt_link_map_64 *)
+{
+	return select_32_64(proc, fetch_lm32, fetch_lm64);
+}
+
+/* The same as above holds for struct r_debug.  */
+#define LT_R_DEBUG(BITS)			\
+	{					\
+		int r_version;			\
+		Elf##BITS##_Addr r_map;		\
+		Elf##BITS##_Addr r_brk;		\
+		int r_state;			\
+		Elf##BITS##_Addr r_ldbase;	\
 	}
+
+struct lt_r_debug_32 LT_R_DEBUG(32);
+struct lt_r_debug_64 LT_R_DEBUG(64);
+
+static int
+fetch_rd64(struct Process *proc, target_address_t addr,
+	   struct lt_r_debug_64 *ret)
+{
+	if (umovebytes(proc, addr, ret, sizeof(*ret)) != sizeof(*ret))
+		return -1;
+	return 0;
 }
 
-struct cb_data {
-	const char *lib_name;
-	struct ltelf *lte;
-	ElfW(Addr) addr;
-	Process *proc;
-};
+static int
+fetch_rd32(struct Process *proc, target_address_t addr,
+	   struct lt_r_debug_64 *ret)
+{
+	struct lt_r_debug_32 rd;
+	if (umovebytes(proc, addr, &rd, sizeof(rd)) != sizeof(rd))
+		return -1;
 
-static void
-crawl_linkmap(Process *proc, struct r_debug *dbg, void (*callback)(void *), struct cb_data *data) {
-	struct link_map rlm;
-	char lib_name[BUFSIZ];
-	struct link_map *lm = NULL;
+	ret->r_version = rd.r_version;
+	ret->r_map = rd.r_map;
+	ret->r_brk = rd.r_brk;
+	ret->r_state = rd.r_state;
+	ret->r_ldbase = rd.r_ldbase;
+
+	return 0;
+}
 
+static int (*
+rdebug_fetcher(struct Process *proc))(struct Process *,
+				      target_address_t, struct lt_r_debug_64 *)
+{
+	return select_32_64(proc, fetch_rd32, fetch_rd64);
+}
+
+static void
+crawl_linkmap(struct Process *proc, struct lt_r_debug_64 *dbg)
+{
 	debug (DEBUG_FUNCTION, "crawl_linkmap()");
 
 	if (!dbg || !dbg->r_map) {
@@ -294,201 +424,234 @@ crawl_linkmap(Process *proc, struct r_debug *dbg, void (*callback)(void *), stru
 		return;
 	}
 
-	lm = dbg->r_map;
+	/* XXX The double cast should be removed when
+	 * target_address_t becomes integral type.  */
+	target_address_t addr = (target_address_t)(uintptr_t)dbg->r_map;
 
-	while (lm) {
-		if (umovebytes(proc, lm, &rlm, sizeof(rlm)) != sizeof(rlm)) {
-			debug(2, "Unable to read link map\n");
+	while (addr != 0) {
+		struct lt_link_map_64 rlm;
+		if (lm_fetcher(proc)(proc, addr, &rlm) < 0) {
+			debug(2, "Unable to read link map");
 			return;
 		}
 
-		lm = rlm.l_next;
-		if (rlm.l_name == NULL) {
-			debug(2, "Invalid library name referenced in dynamic linker map\n");
+		target_address_t key = addr;
+		/* XXX The double cast should be removed when
+		 * target_address_t becomes integral type.  */
+		addr = (target_address_t)(uintptr_t)rlm.l_next;
+		if (rlm.l_name == 0) {
+			debug(2, "Name of mapped library is NULL");
 			return;
 		}
 
-		umovebytes(proc, rlm.l_name, lib_name, sizeof(lib_name));
+		char lib_name[BUFSIZ];
+		/* XXX The double cast should be removed when
+		 * target_address_t becomes integral type.  */
+		umovebytes(proc, (target_address_t)(uintptr_t)rlm.l_name,
+			   lib_name, sizeof(lib_name));
 
-		if (lib_name[0] == '\0') {
-			debug(2, "Library name is an empty string");
+		if (*lib_name == '\0') {
+			/* VDSO.  No associated file, XXX but we might
+			 * load it from the address space of the
+			 * process.  */
 			continue;
 		}
 
-		if (callback) {
-			debug(2, "Dispatching callback for: %s, "
-					"Loaded at 0x%" PRI_ELF_ADDR "\n",
-					lib_name, rlm.l_addr);
-			data->addr = rlm.l_addr;
-			data->lib_name = lib_name;
-			callback(data);
+		/* Do we have that library already?  */
+		if (proc_each_library(proc, NULL, library_with_key_cb, &key))
+			continue;
+
+		struct library *lib = malloc(sizeof(*lib));
+		if (lib == NULL) {
+		fail:
+			if (lib != NULL)
+				library_destroy(lib);
+			fprintf(stderr, "Couldn't load ELF object %s: %s\n",
+				lib_name, strerror(errno));
+			continue;
 		}
+		library_init(lib, LT_LIBTYPE_DSO);
+
+		if (ltelf_read_library(lib, proc, lib_name, rlm.l_addr) < 0)
+			goto fail;
+
+		lib->key = key;
+		proc_add_library(proc, lib);
 	}
 	return;
 }
 
-static struct r_debug *
-load_debug_struct(Process *proc) {
-	struct r_debug *rdbg = NULL;
+/* A struct stored at proc->debug.  */
+struct debug_struct
+{
+	target_address_t debug_addr;
+	int state;
+};
 
+static int
+load_debug_struct(struct Process *proc, struct lt_r_debug_64 *ret)
+{
 	debug(DEBUG_FUNCTION, "load_debug_struct");
 
-	rdbg = malloc(sizeof(*rdbg));
-	if (!rdbg) {
-		return NULL;
-	}
+	struct debug_struct *debug = proc->debug;
 
-	if (umovebytes(proc, proc->debug, rdbg, sizeof(*rdbg)) != sizeof(*rdbg)) {
+	if (rdebug_fetcher(proc)(proc, debug->debug_addr, ret) < 0) {
 		debug(2, "This process does not have a debug structure!\n");
-		free(rdbg);
-		return NULL;
+		return -1;
 	}
 
-	return rdbg;
+	return 0;
 }
 
 static void
-linkmap_add_cb(void *data) { //const char *lib_name, ElfW(Addr) addr) {
-	size_t i = 0;
-	struct cb_data *lm_add = data;
-	struct ltelf lte;
-	struct opt_x_t *xptr;
-
-	debug(DEBUG_FUNCTION, "linkmap_add_cb");
-
-	/*
-		XXX
-		iterate through library[i]'s to see if this lib is in the list.
-		if not, add it
-	 */
-	for(;i < library_num;i++) {
-		if (strcmp(library[i], lm_add->lib_name) == 0) {
-			/* found it, so its not new */
-			return;
-		}
-	}
-
-	/* new library linked! */
-	debug(2, "New libdl loaded library found: %s\n", lm_add->lib_name);
-
-	if (library_num < MAX_LIBRARIES) {
-		library[library_num++] = strdup(lm_add->lib_name);
-		memset(&lte, 0, sizeof(struct ltelf));
-		lte.base_addr = lm_add->addr;
-		do_init_elf(&lte, library[library_num-1]);
-		/* add bps */
-		for (xptr = opt_x; xptr; xptr = xptr->next) {
-			if (xptr->found)
-				continue;
-
-			GElf_Sym sym;
-			GElf_Addr addr;
-
-			if (in_load_libraries(xptr->name, &lte, 1, &sym)) {
-				debug(2, "found symbol %s @ %#" PRIx64
-						", adding it.",
-						xptr->name, sym.st_value);
-				addr = sym.st_value;
-				add_library_symbol(addr, xptr->name, &library_symbols, LS_TOPLT_NONE, 0);
-				xptr->found = 1;
-				insert_breakpoint(lm_add->proc,
-						  sym2addr(lm_add->proc,
-							   library_symbols),
-						  library_symbols, 1);
-			}
-		}
-		do_close_elf(&lte);
-	}
-}
-
-void
-arch_check_dbg(Process *proc) {
-	struct r_debug *dbg = NULL;
-	struct cb_data data;
-
+rdebug_bp_on_hit(struct breakpoint *bp, struct Process *proc)
+{
 	debug(DEBUG_FUNCTION, "arch_check_dbg");
 
-	if (!(dbg = load_debug_struct(proc))) {
+	struct lt_r_debug_64 rdbg;
+	if (load_debug_struct(proc, &rdbg) < 0) {
 		debug(2, "Unable to load debug structure!");
 		return;
 	}
 
-	if (dbg->r_state == RT_CONSISTENT) {
+	struct debug_struct *debug = proc->debug;
+	if (rdbg.r_state == RT_CONSISTENT) {
 		debug(2, "Linkmap is now consistent");
-		if (proc->debug_state == RT_ADD) {
+		if (debug->state == RT_ADD) {
 			debug(2, "Adding DSO to linkmap");
-			data.proc = proc;
-			crawl_linkmap(proc, dbg, linkmap_add_cb, &data);
-		} else if (proc->debug_state == RT_DELETE) {
+			//data.proc = proc;
+			crawl_linkmap(proc, &rdbg);
+			//&data);
+		} else if (debug->state == RT_DELETE) {
 			debug(2, "Removing DSO from linkmap");
 		} else {
 			debug(2, "Unexpected debug state!");
 		}
 	}
 
-	proc->debug_state = dbg->r_state;
-
-	return;
+	debug->state = rdbg.r_state;
 }
 
-static void
-hook_libdl_cb(void *data) {
-	struct cb_data *hook_data = data;
-	const char *lib_name = NULL;
-	ElfW(Addr) addr;
-	struct ltelf *lte = NULL;
-
-	debug(DEBUG_FUNCTION, "add_library_cb");
+int
+linkmap_init(struct Process *proc, target_address_t dyn_addr)
+{
+	debug(DEBUG_FUNCTION, "linkmap_init()");
 
-	if (!data) {
-		debug(2, "No callback data");
-		return;
+	struct debug_struct *debug = malloc(sizeof(*debug));
+	if (debug == NULL) {
+		fprintf(stderr, "couldn't allocate debug struct: %s\n",
+			strerror(errno));
+	fail:
+		proc->debug = NULL;
+		free(debug);
+		return -1;
 	}
+	proc->debug = debug;
 
-	lib_name = hook_data->lib_name;
-	addr = hook_data->addr;
-	lte = hook_data->lte;
-
-	if (library_num < MAX_LIBRARIES) {
-		library[library_num++] = strdup(lib_name);
-		lte[library_num].base_addr = addr;
+	if (find_dynamic_entry_addr(proc, dyn_addr, DT_DEBUG,
+				    &debug->debug_addr) == -1) {
+		debug(2, "Couldn't find debug structure!");
+		goto fail;
 	}
-	else {
-		fprintf (stderr, "MAX LIBS REACHED\n");
-		exit(EXIT_FAILURE);
+
+	int status;
+	struct lt_r_debug_64 rdbg;
+	if ((status = load_debug_struct(proc, &rdbg)) < 0) {
+		debug(2, "No debug structure or no memory to allocate one!");
+		return status;
 	}
-}
 
-int
-linkmap_init(Process *proc, struct ltelf *lte) {
-	void *dbg_addr = NULL, *dyn_addr = GELF_ADDR_CAST(lte->dyn_addr);
-	struct r_debug *rdbg = NULL;
-	struct cb_data data;
+	/* XXX The double cast should be removed when
+	 * target_address_t becomes integral type.  */
+	target_address_t addr = (target_address_t)(uintptr_t)rdbg.r_brk;
+	if (arch_translate_address_dyn(proc, addr, &addr) < 0)
+		goto fail;
 
-	debug(DEBUG_FUNCTION, "linkmap_init()");
+	struct breakpoint *rdebug_bp = insert_breakpoint(proc, addr, NULL);
+	static struct bp_callbacks rdebug_callbacks = {
+		.on_hit = rdebug_bp_on_hit,
+	};
+	rdebug_bp->cbs = &rdebug_callbacks;
 
-	if (find_dynamic_entry_addr(proc, dyn_addr, DT_DEBUG, &dbg_addr) == -1) {
-		debug(2, "Couldn't find debug structure!");
-		return -1;
-	}
+	crawl_linkmap(proc, &rdbg);
 
-	proc->debug = dbg_addr;
+	return 0;
+}
 
-	if (!(rdbg = load_debug_struct(proc))) {
-		debug(2, "No debug structure or no memory to allocate one!");
+static int
+fetch_auxv64_entry(int fd, Elf64_auxv_t *ret)
+{
+	/* Reaching EOF is as much problem as not reading whole
+	 * entry.  */
+	return read(fd, ret, sizeof(*ret)) == sizeof(*ret) ? 0 : -1;
+}
+
+static int
+fetch_auxv32_entry(int fd, Elf64_auxv_t *ret)
+{
+	Elf32_auxv_t auxv;
+	if (read(fd, &auxv, sizeof(auxv)) != sizeof(auxv))
 		return -1;
+
+	ret->a_type = auxv.a_type;
+	ret->a_un.a_val = auxv.a_un.a_val;
+	return 0;
+}
+
+static int (*
+auxv_fetcher(struct Process *proc))(int, Elf64_auxv_t *)
+{
+	return select_32_64(proc, fetch_auxv32_entry, fetch_auxv64_entry);
+}
+
+int
+process_get_entry(struct Process *proc,
+		  target_address_t *entryp,
+		  target_address_t *interp_biasp)
+{
+	PROC_PID_FILE(fn, "/proc/%d/auxv", proc->pid);
+	int fd = open(fn, O_RDONLY);
+	if (fd == -1) {
+	fail:
+		fprintf(stderr, "couldn't read %s: %s", fn, strerror(errno));
+	done:
+		if (fd != -1)
+			close(fd);
+		return fd == -1 ? -1 : 0;
 	}
 
-	data.lte = lte;
+	target_address_t at_entry = 0;
+	target_address_t at_bias = 0;
+	while (1) {
+		Elf64_auxv_t entry;
+		if (auxv_fetcher(proc)(fd, &entry) < 0)
+			goto fail;
+
+		switch (entry.a_type) {
+		case AT_BASE:
+			/* XXX The double cast should be removed when
+			 * target_address_t becomes integral type.  */
+			at_bias = (target_address_t)
+				(uintptr_t)entry.a_un.a_val;
+			continue;
 
-	add_library_symbol(rdbg->r_brk, "", &library_symbols, LS_TOPLT_NONE, 0);
-	insert_breakpoint(proc, sym2addr(proc, library_symbols),
-			  library_symbols, 1);
+		case AT_ENTRY:
+			/* XXX The double cast should be removed when
+			 * target_address_t becomes integral type.  */
+			at_entry = (target_address_t)
+				(uintptr_t)entry.a_un.a_val;
+		default:
+			continue;
 
-	crawl_linkmap(proc, rdbg, hook_libdl_cb, &data);
+		case AT_NULL:
+			break;
+		}
+		break;
+	}
 
-	free(rdbg);
-	return 0;
+	*entryp = at_entry;
+	*interp_biasp = at_bias;
+	goto done;
 }
 
 int
diff --git a/sysdeps/linux-gnu/s390/plt.c b/sysdeps/linux-gnu/s390/plt.c
index 85a1dd1..754d270 100644
--- a/sysdeps/linux-gnu/s390/plt.c
+++ b/sysdeps/linux-gnu/s390/plt.c
@@ -1,4 +1,5 @@
 #include <gelf.h>
+#include "proc.h"
 #include "common.h"
 
 GElf_Addr
diff --git a/sysdeps/linux-gnu/s390/regs.c b/sysdeps/linux-gnu/s390/regs.c
index 169893e..a45dd9b 100644
--- a/sysdeps/linux-gnu/s390/regs.c
+++ b/sysdeps/linux-gnu/s390/regs.c
@@ -9,6 +9,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
+#include "proc.h"
 #include "common.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
diff --git a/sysdeps/linux-gnu/s390/trace.c b/sysdeps/linux-gnu/s390/trace.c
index 63935de..8c08f1f 100644
--- a/sysdeps/linux-gnu/s390/trace.c
+++ b/sysdeps/linux-gnu/s390/trace.c
@@ -17,6 +17,7 @@
 #include <sys/ptrace.h>
 #include <asm/ptrace.h>
 
+#include "proc.h"
 #include "common.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
diff --git a/sysdeps/linux-gnu/sparc/plt.c b/sysdeps/linux-gnu/sparc/plt.c
index f9e6d80..658e549 100644
--- a/sysdeps/linux-gnu/sparc/plt.c
+++ b/sysdeps/linux-gnu/sparc/plt.c
@@ -1,4 +1,5 @@
 #include <gelf.h>
+#include "proc.h"
 #include "common.h"
 
 GElf_Addr
diff --git a/sysdeps/linux-gnu/sparc/regs.c b/sysdeps/linux-gnu/sparc/regs.c
index 49d2729..d7ee114 100644
--- a/sysdeps/linux-gnu/sparc/regs.c
+++ b/sysdeps/linux-gnu/sparc/regs.c
@@ -2,6 +2,7 @@
 
 #include <sys/types.h>
 #include "ptrace.h"
+#include "proc.h"
 #include "common.h"
 
 void *
diff --git a/sysdeps/linux-gnu/sparc/trace.c b/sysdeps/linux-gnu/sparc/trace.c
index 7f05b55..e05c4d3 100644
--- a/sysdeps/linux-gnu/sparc/trace.c
+++ b/sysdeps/linux-gnu/sparc/trace.c
@@ -6,6 +6,7 @@
 #include <signal.h>
 #include <string.h>
 #include "ptrace.h"
+#include "proc.h"
 #include "common.h"
 
 void
diff --git a/sysdeps/linux-gnu/trace.c b/sysdeps/linux-gnu/trace.c
index fe64a28..d5c5262 100644
--- a/sysdeps/linux-gnu/trace.c
+++ b/sysdeps/linux-gnu/trace.c
@@ -17,6 +17,8 @@
 #include "common.h"
 #include "config.h"
 #include "breakpoint.h"
+#include "proc.h"
+#include "linux-gnu/trace.h"
 
 #include "config.h"
 #ifdef HAVE_LIBSELINUX
@@ -107,26 +109,21 @@ trace_me(void)
 }
 
 /* There's a (hopefully) brief period of time after the child process
- * exec's when we can't trace it yet.  Here we wait for kernel to
+ * forks when we can't trace it yet.  Here we wait for kernel to
  * prepare the process.  */
-void
+int
 wait_for_proc(pid_t pid)
 {
-	size_t i;
-	for (i = 0; i < 100; ++i) {
-		/* We read from memory address 0, but that shouldn't
-		 * be a problem: the reading will just fail.  We are
-		 * looking for a particular reason of failure.  */
-		if (ptrace(PTRACE_PEEKTEXT, pid, 0, 0) != -1
-		    || errno != ESRCH)
-			return;
-
-		usleep(1000);
+	/* man ptrace: PTRACE_ATTACH attaches to the process specified
+	   in pid.  The child is sent a SIGSTOP, but will not
+	   necessarily have stopped by the completion of this call;
+	   use wait() to wait for the child to stop. */
+	if (waitpid(pid, NULL, __WALL) != pid) {
+		perror ("trace_pid: waitpid");
+		return -1;
 	}
 
-	fprintf(stderr, "\
-I consistently fail to read a word from the freshly launched process.\n\
-I'll now try to proceed with tracing, but this shouldn't be happening.\n");
+	return 0;
 }
 
 int
@@ -139,23 +136,16 @@ trace_pid(pid_t pid)
 	if (ptrace(PTRACE_ATTACH, pid, 1, 0) < 0)
 		return -1;
 
-	/* man ptrace: PTRACE_ATTACH attaches to the process specified
-	   in pid.  The child is sent a SIGSTOP, but will not
-	   necessarily have stopped by the completion of this call;
-	   use wait() to wait for the child to stop. */
-	if (waitpid (pid, NULL, __WALL) != pid) {
-		perror ("trace_pid: waitpid");
-		return -1;
-	}
-
-	return 0;
+	return wait_for_proc(pid);
 }
 
 void
-trace_set_options(Process *proc, pid_t pid) {
+trace_set_options(struct Process *proc)
+{
 	if (proc->tracesysgood & 0x80)
 		return;
 
+	pid_t pid = proc->pid;
 	debug(DEBUG_PROCESS, "trace_set_options: pid=%d", pid);
 
 	long options = PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK |
@@ -212,67 +202,6 @@ continue_process(pid_t pid)
 		      "putting off the continue, events in que.");
 }
 
-/**
- * This is used for bookkeeping related to PIDs that the event
- * handlers work with.
- */
-struct pid_task {
-	pid_t pid;	/* This may be 0 for tasks that exited
-			 * mid-handling.  */
-	int sigstopped : 1;
-	int got_event : 1;
-	int delivered : 1;
-	int vforked : 1;
-	int sysret : 1;
-} * pids;
-
-struct pid_set {
-	struct pid_task * tasks;
-	size_t count;
-	size_t alloc;
-};
-
-/**
- * Breakpoint re-enablement.  When we hit a breakpoint, we must
- * disable it, single-step, and re-enable it.  That single-step can be
- * done only by one task in a task group, while others are stopped,
- * otherwise the processes would race for who sees the breakpoint
- * disabled and who doesn't.  The following is to keep track of it
- * all.
- */
-struct process_stopping_handler
-{
-	Event_Handler super;
-
-	/* The task that is doing the re-enablement.  */
-	Process * task_enabling_breakpoint;
-
-	/* The pointer being re-enabled.  */
-	struct breakpoint *breakpoint_being_enabled;
-
-	/* Artificial atomic skip breakpoint, if any needed.  */
-	void *atomic_skip_bp_addr;
-
-	enum {
-		/* We are waiting for everyone to land in t/T.  */
-		psh_stopping = 0,
-
-		/* We are doing the PTRACE_SINGLESTEP.  */
-		psh_singlestep,
-
-		/* We are waiting for all the SIGSTOPs to arrive so
-		 * that we can sink them.  */
-		psh_sinking,
-
-		/* This is for tracking the ugly workaround.  */
-		psh_ugly_workaround,
-	} state;
-
-	int exiting;
-
-	struct pid_set pids;
-};
-
 static struct pid_task *
 get_task_info(struct pid_set * pids, pid_t pid)
 {
@@ -303,8 +232,8 @@ add_task_info(struct pid_set * pids, pid_t pid)
 	return task_info;
 }
 
-static enum pcb_status
-task_stopped(Process * task, void * data)
+static enum callback_status
+task_stopped(struct Process *task, void *data)
 {
 	enum process_status st = process_status(task->pid);
 	if (data != NULL)
@@ -318,48 +247,48 @@ task_stopped(Process * task, void * data)
 	case ps_invalid:
 	case ps_tracing_stop:
 	case ps_zombie:
-		return pcb_cont;
+		return CBS_CONT;
 	case ps_sleeping:
 	case ps_stop:
 	case ps_other:
-		return pcb_stop;
+		return CBS_STOP;
 	}
 
 	abort ();
 }
 
 /* Task is blocked if it's stopped, or if it's a vfork parent.  */
-static enum pcb_status
-task_blocked(Process * task, void * data)
+static enum callback_status
+task_blocked(struct Process *task, void *data)
 {
 	struct pid_set * pids = data;
 	struct pid_task * task_info = get_task_info(pids, task->pid);
 	if (task_info != NULL
 	    && task_info->vforked)
-		return pcb_cont;
+		return CBS_CONT;
 
 	return task_stopped(task, NULL);
 }
 
-static Event * process_vfork_on_event(Event_Handler * super, Event * event);
+static Event *process_vfork_on_event(struct event_handler *super, Event *event);
 
-static enum pcb_status
-task_vforked(Process * task, void * data)
+static enum callback_status
+task_vforked(struct Process *task, void *data)
 {
 	if (task->event_handler != NULL
 	    && task->event_handler->on_event == &process_vfork_on_event)
-		return pcb_stop;
-	return pcb_cont;
+		return CBS_STOP;
+	return CBS_CONT;
 }
 
 static int
-is_vfork_parent(Process * task)
+is_vfork_parent(struct Process *task)
 {
-	return each_task(task->leader, &task_vforked, NULL) != NULL;
+	return each_task(task->leader, NULL, &task_vforked, NULL) != NULL;
 }
 
-static enum pcb_status
-send_sigstop(Process * task, void * data)
+static enum callback_status
+send_sigstop(struct Process *task, void *data)
 {
 	Process * leader = task->leader;
 	struct pid_set * pids = data;
@@ -372,24 +301,24 @@ send_sigstop(Process * task, void * data)
 		perror("send_sigstop: add_task_info");
 		destroy_event_handler(leader);
 		/* Signal failure upwards.  */
-		return pcb_stop;
+		return CBS_STOP;
 	}
 
 	/* This task still has not been attached to.  It should be
 	   stopped by the kernel.  */
 	if (task->state == STATE_BEING_CREATED)
-		return pcb_cont;
+		return CBS_CONT;
 
 	/* Don't bother sending SIGSTOP if we are already stopped, or
 	 * if we sent the SIGSTOP already, which happens when we are
 	 * handling "onexit" and inherited the handler from breakpoint
 	 * re-enablement.  */
 	enum process_status st;
-	if (task_stopped(task, &st) == pcb_cont)
-		return pcb_cont;
+	if (task_stopped(task, &st) == CBS_CONT)
+		return CBS_CONT;
 	if (task_info->sigstopped) {
 		if (!task_info->delivered)
-			return pcb_cont;
+			return CBS_CONT;
 		task_info->delivered = 0;
 	}
 
@@ -400,7 +329,7 @@ send_sigstop(Process * task, void * data)
 	if (st == ps_sleeping
 	    && is_vfork_parent (task)) {
 		task_info->vforked = 1;
-		return pcb_cont;
+		return CBS_CONT;
 	}
 
 	if (task_kill(task->pid, SIGSTOP) >= 0) {
@@ -410,7 +339,7 @@ send_sigstop(Process * task, void * data)
 		fprintf(stderr,
 			"Warning: couldn't send SIGSTOP to %d\n", task->pid);
 
-	return pcb_cont;
+	return CBS_CONT;
 }
 
 /* On certain kernels, detaching right after a singlestep causes the
@@ -426,7 +355,7 @@ ugly_workaround(Process * proc)
 	if (sbp != NULL)
 		enable_breakpoint(proc, sbp);
 	else
-		insert_breakpoint(proc, ip, NULL, 1);
+		insert_breakpoint(proc, ip, NULL);
 	ptrace(PTRACE_CONT, proc->pid, 0, 0);
 }
 
@@ -444,9 +373,21 @@ process_stopping_done(struct process_stopping_handler * self, Process * leader)
 				continue_process(self->pids.tasks[i].pid);
 		continue_process(self->task_enabling_breakpoint->pid);
 		destroy_event_handler(leader);
-	} else {
+	}
+
+	if (self->exiting) {
+	ugly_workaround:
 		self->state = psh_ugly_workaround;
 		ugly_workaround(self->task_enabling_breakpoint);
+	} else {
+		switch ((self->ugly_workaround_p)(self)) {
+		case CBS_FAIL:
+			/* xxx handle me */
+		case CBS_STOP:
+			break;
+		case CBS_CONT:
+			goto ugly_workaround;
+		}
 	}
 }
 
@@ -464,21 +405,28 @@ undo_breakpoint(Event * event, void * data)
 	return ecb_cont;
 }
 
-static enum pcb_status
-untrace_task(Process * task, void * data)
+static enum callback_status
+untrace_task(struct Process *task, void *data)
 {
 	if (task != data)
 		untrace_pid(task->pid);
-	return pcb_cont;
+	return CBS_CONT;
 }
 
-static enum pcb_status
-remove_task(Process * task, void * data)
+static enum callback_status
+remove_task(struct Process *task, void *data)
 {
 	/* Don't untrace leader just yet.  */
 	if (task != data)
 		remove_process(task);
-	return pcb_cont;
+	return CBS_CONT;
+}
+
+static enum callback_status
+retract_breakpoint_cb(struct Process *proc, struct breakpoint *bp, void *data)
+{
+	breakpoint_on_retract(bp, proc);
+	return CBS_CONT;
 }
 
 static void
@@ -486,6 +434,7 @@ detach_process(Process * leader)
 {
 	each_qd_event(&undo_breakpoint, leader);
 	disable_all_breakpoints(leader);
+	proc_each_breakpoint(leader, NULL, retract_breakpoint_cb, NULL);
 
 	/* Now untrace the process, if it was attached to by -p.  */
 	struct opt_p_t * it;
@@ -494,11 +443,11 @@ detach_process(Process * leader)
 		if (proc == NULL)
 			continue;
 		if (proc->leader == leader) {
-			each_task(leader, &untrace_task, NULL);
+			each_task(leader, NULL, &untrace_task, NULL);
 			break;
 		}
 	}
-	each_task(leader, &remove_task, leader);
+	each_task(leader, NULL, &remove_task, leader);
 	destroy_event_handler(leader);
 	remove_task(leader, NULL);
 }
@@ -632,19 +581,56 @@ arch_atomic_singlestep(struct Process *proc, Breakpoint *sbp,
 }
 #endif
 
+static Event *process_stopping_on_event(struct event_handler *super,
+					Event *event);
+
+static void
+remove_atomic_breakpoints(struct Process *proc)
+{
+	struct process_stopping_handler *self
+		= (void *)proc->leader->event_handler;
+	assert(self != NULL);
+	assert(self->super.on_event == process_stopping_on_event);
+
+	int ct = sizeof(self->atomic_skip_bp_addrs)
+		/ sizeof(*self->atomic_skip_bp_addrs);
+	int i;
+	for (i = 0; i < ct; ++i)
+		if (self->atomic_skip_bp_addrs[i] != 0) {
+			delete_breakpoint(proc, self->atomic_skip_bp_addrs[i]);
+			self->atomic_skip_bp_addrs[i] = 0;
+		}
+}
+
+static void
+atomic_singlestep_bp_on_hit(struct breakpoint *bp, struct Process *proc)
+{
+	remove_atomic_breakpoints(proc);
+}
+
 static int
 atomic_singlestep_add_bp(void *addr, void *data)
 {
 	struct process_stopping_handler *self = data;
 	struct Process *proc = self->task_enabling_breakpoint;
 
-	/* Only support single address as of now.  */
-	assert(self->atomic_skip_bp_addr == NULL);
-
-	self->atomic_skip_bp_addr = addr + 4;
-	insert_breakpoint(proc->leader, self->atomic_skip_bp_addr, NULL, 1);
+	int ct = sizeof(self->atomic_skip_bp_addrs)
+		/ sizeof(*self->atomic_skip_bp_addrs);
+	int i;
+	for (i = 0; i < ct; ++i)
+		if (self->atomic_skip_bp_addrs[i] == 0) {
+			self->atomic_skip_bp_addrs[i] = addr;
+			static struct bp_callbacks cbs = {
+				.on_hit = atomic_singlestep_bp_on_hit,
+			};
+			struct breakpoint *bp
+				= insert_breakpoint(proc, addr, NULL);
+			breakpoint_set_callbacks(bp, &cbs);
+			return 0;
+		}
 
-	return 0;
+	assert(!"Too many atomic singlestep breakpoints!");
+	abort();
 }
 
 static int
@@ -670,30 +656,68 @@ singlestep(struct process_stopping_handler *self) ***
 }
 
 static void
-post_singlestep(struct process_stopping_handler *self, Event **eventp)
+post_singlestep(struct process_stopping_handler *self,
+		struct Event **eventp)
 {
 	continue_for_sigstop_delivery(&self->pids);
 
-	if ((*eventp)->type == EVENT_BREAKPOINT)
+	if (*eventp != NULL && (*eventp)->type == EVENT_BREAKPOINT)
 		*eventp = NULL; // handled
 
-	if (self->atomic_skip_bp_addr != 0)
-		delete_breakpoint(self->task_enabling_breakpoint->leader,
-				  self->atomic_skip_bp_addr);
+	struct Process *proc = self->task_enabling_breakpoint;
 
+	remove_atomic_breakpoints(proc);
 	self->breakpoint_being_enabled = NULL;
 }
 
 static void
-singlestep_error(struct process_stopping_handler *self, Event **eventp)
+singlestep_error(struct process_stopping_handler *self)
 {
 	struct Process *teb = self->task_enabling_breakpoint;
 	struct breakpoint *sbp = self->breakpoint_being_enabled;
-	fprintf(stderr, "%d couldn't singlestep over %s (%p)\n",
+	fprintf(stderr, "%d couldn't continue when handling %s (%p) at %p\n",
 		teb->pid, sbp->libsym != NULL ? sbp->libsym->name : NULL,
-		sbp->addr);
+		sbp->addr, get_instruction_pointer(teb));
 	delete_breakpoint(teb->leader, sbp->addr);
-	post_singlestep(self, eventp);
+}
+
+static void
+pt_continue(struct process_stopping_handler *self)
+{
+	struct Process *teb = self->task_enabling_breakpoint;
+	debug(1, "PTRACE_CONT");
+	ptrace(PTRACE_CONT, teb->pid, 0, 0);
+}
+
+static void
+pt_singlestep(struct process_stopping_handler *self)
+{
+	if (singlestep(self) < 0)
+		singlestep_error(self);
+}
+
+static void
+disable_and(struct process_stopping_handler *self,
+	    void (*do_this)(struct process_stopping_handler *self))
+{
+	struct Process *teb = self->task_enabling_breakpoint;
+	debug(DEBUG_PROCESS, "all stopped, now singlestep/cont %d", teb->pid);
+	if (self->breakpoint_being_enabled->enabled)
+		disable_breakpoint(teb, self->breakpoint_being_enabled);
+	(do_this)(self);
+	self->state = psh_singlestep;
+}
+
+void
+linux_ptrace_disable_and_singlestep(struct process_stopping_handler *self)
+{
+	disable_and(self, &pt_singlestep);
+}
+
+void
+linux_ptrace_disable_and_continue(struct process_stopping_handler *self)
+{
+	disable_and(self, &pt_continue);
 }
 
 /* This event handler is installed when we are in the process of
@@ -703,16 +727,15 @@ singlestep_error(struct process_stopping_handler *self, Event **eventp)
  * happens, we let the re-enablement thread to PTRACE_SINGLESTEP,
  * re-enable, and continue everyone.  */
 static Event *
-process_stopping_on_event(Event_Handler * super, Event * event)
+process_stopping_on_event(struct event_handler *super, Event *event)
 {
 	struct process_stopping_handler * self = (void *)super;
 	Process * task = event->proc;
 	Process * leader = task->leader;
-	struct breakpoint *sbp = self->breakpoint_being_enabled;
 	Process * teb = self->task_enabling_breakpoint;
 
 	debug(DEBUG_PROCESS,
-	      "pid %d; event type %d; state %d",
+	      "process_stopping_on_event: pid %d; event type %d; state %d",
 	      task->pid, event->type, self->state);
 
 	struct pid_task * task_info = get_task_info(&self->pids, task->pid);
@@ -741,17 +764,10 @@ process_stopping_on_event(Event_Handler * super, Event * event)
 	switch (state) {
 	case psh_stopping:
 		/* If everyone is stopped, singlestep.  */
-		if (each_task(leader, &task_blocked, &self->pids) == NULL) {
-			debug(DEBUG_PROCESS, "all stopped, now SINGLESTEP %d",
-			      teb->pid);
-			if (sbp->enabled)
-				disable_breakpoint(teb, sbp);
-			if (singlestep(self) < 0) {
-				singlestep_error(self, &event);
-				goto psh_sinking;
-			}
-
-			self->state = state = psh_singlestep;
+		if (each_task(leader, NULL, &task_blocked,
+			      &self->pids) == NULL) {
+			(self->on_all_stopped)(self);
+			state = self->state;
 		}
 		break;
 
@@ -760,18 +776,47 @@ process_stopping_on_event(Event_Handler * super, Event * event)
 		 * have now stepped, and can re-enable the breakpoint.  */
 		if (event != NULL && task == teb) {
 
-			/* This is not the singlestep that we are waiting for.  */
+			/* If this was caused by a real breakpoint, as
+			 * opposed to a singlestep, assume that it's
+			 * an artificial breakpoint installed for some
+			 * reason for the re-enablement.  In that case
+			 * handle it.  */
+			if (event->type == EVENT_BREAKPOINT) {
+				target_address_t ip
+					= get_instruction_pointer(task);
+				struct breakpoint *other
+					= address2bpstruct(leader, ip);
+				if (other != NULL)
+					breakpoint_on_hit(other, task);
+			}
+
+			/* If we got SIGNAL instead of BREAKPOINT,
+			 * then this is not singlestep at all.  */
 			if (event->type == EVENT_SIGNAL) {
+			do_singlestep:
 				if (singlestep(self) < 0) {
-					singlestep_error(self, &event);
+					singlestep_error(self);
+					post_singlestep(self, &event);
 					goto psh_sinking;
 				}
 				break;
+			} else {
+				switch ((self->keep_stepping_p)(self)) {
+				case CBS_FAIL:
+					/* XXX handle me */
+				case CBS_STOP:
+					break;
+				case CBS_CONT:
+					/* Sink singlestep event.  */
+					if (event->type == EVENT_BREAKPOINT)
+						event = NULL;
+					goto do_singlestep;
+				}
 			}
 
-			/* Essentially we don't care what event caused
-			 * the thread to stop.  We can do the
-			 * re-enablement now.  */
+			/* Re-enable the breakpoint that we are
+			 * stepping over.  */
+			struct breakpoint *sbp = self->breakpoint_being_enabled;
 			if (sbp->enabled)
 				enable_breakpoint(teb, sbp);
 
@@ -812,52 +857,89 @@ process_stopping_on_event(Event_Handler * super, Event * event)
 }
 
 static void
-process_stopping_destroy(Event_Handler * super)
+process_stopping_destroy(struct event_handler *super)
 {
 	struct process_stopping_handler * self = (void *)super;
 	free(self->pids.tasks);
 }
 
+static enum callback_status
+no(struct process_stopping_handler *self)
+{
+	return CBS_STOP;
+}
+
+int
+process_install_stopping_handler(struct Process *proc, struct breakpoint *sbp,
+				 void (*as)(struct process_stopping_handler *),
+				 enum callback_status (*ks)
+					 (struct process_stopping_handler *),
+				 enum callback_status (*uw)
+					(struct process_stopping_handler *))
+{
+	debug(DEBUG_FUNCTION,
+	      "process_install_stopping_handler: pid=%d", proc->pid);
+
+	struct process_stopping_handler *handler = calloc(sizeof(*handler), 1);
+	if (handler == NULL)
+		return -1;
+
+	if (as == NULL)
+		as = &linux_ptrace_disable_and_singlestep;
+	if (ks == NULL)
+		ks = &no;
+	if (uw == NULL)
+		uw = &no;
+
+	handler->super.on_event = process_stopping_on_event;
+	handler->super.destroy = process_stopping_destroy;
+	handler->task_enabling_breakpoint = proc;
+	handler->breakpoint_being_enabled = sbp;
+	handler->on_all_stopped = as;
+	handler->keep_stepping_p = ks;
+	handler->ugly_workaround_p = uw;
+
+	install_event_handler(proc->leader, &handler->super);
+
+	if (each_task(proc->leader, NULL, &send_sigstop,
+		      &handler->pids) != NULL) {
+		destroy_event_handler(proc);
+		return -1;
+	}
+
+	/* And deliver the first fake event, in case all the
+	 * conditions are already fulfilled.  */
+	Event ev = {
+		.type = EVENT_NONE,
+		.proc = proc,
+	};
+	process_stopping_on_event(&handler->super, &ev);
+
+	return 0;
+}
+
 void
 continue_after_breakpoint(Process *proc, struct breakpoint *sbp)
 {
+	debug(DEBUG_PROCESS,
+	      "continue_after_breakpoint: pid=%d, addr=%p",
+	      proc->pid, sbp->addr);
+
 	set_instruction_pointer(proc, sbp->addr);
+
 	if (sbp->enabled == 0) {
 		continue_process(proc->pid);
 	} else {
-		debug(DEBUG_PROCESS,
-		      "continue_after_breakpoint: pid=%d, addr=%p",
-		      proc->pid, sbp->addr);
 #if defined __sparc__  || defined __ia64___ || defined __mips__
 		/* we don't want to singlestep here */
 		continue_process(proc->pid);
 #else
-		struct process_stopping_handler * handler
-			= calloc(sizeof(*handler), 1);
-		if (handler == NULL) {
-			perror("malloc breakpoint disable handler");
-		fatal:
+		if (process_install_stopping_handler
+		    (proc, sbp, NULL, NULL, NULL) < 0) {
+			perror("process_stopping_handler_create");
 			/* Carry on not bothering to re-enable.  */
 			continue_process(proc->pid);
-			return;
 		}
-
-		handler->super.on_event = process_stopping_on_event;
-		handler->super.destroy = process_stopping_destroy;
-		handler->task_enabling_breakpoint = proc;
-		handler->breakpoint_being_enabled = sbp;
-		install_event_handler(proc->leader, &handler->super);
-
-		if (each_task(proc->leader, &send_sigstop,
-			      &handler->pids) != NULL)
-			goto fatal;
-
-		/* And deliver the first fake event, in case all the
-		 * conditions are already fulfilled.  */
-		Event ev;
-		ev.type = EVENT_NONE;
-		ev.proc = proc;
-		process_stopping_on_event(&handler->super, &ev);
 #endif
 	}
 }
@@ -872,18 +954,20 @@ continue_after_breakpoint(Process *proc, struct breakpoint *sbp)
  */
 struct ltrace_exiting_handler
 {
-	Event_Handler super;
+	struct event_handler super;
 	struct pid_set pids;
 };
 
 static Event *
-ltrace_exiting_on_event(Event_Handler * super, Event * event)
+ltrace_exiting_on_event(struct event_handler *super, Event *event)
 {
 	struct ltrace_exiting_handler * self = (void *)super;
 	Process * task = event->proc;
 	Process * leader = task->leader;
 
-	debug(DEBUG_PROCESS, "pid %d; event type %d", task->pid, event->type);
+	debug(DEBUG_PROCESS,
+	      "ltrace_exiting_on_event: pid %d; event type %d",
+	      task->pid, event->type);
 
 	struct pid_task * task_info = get_task_info(&self->pids, task->pid);
 	handle_stopping_event(task_info, &event);
@@ -904,7 +988,7 @@ ltrace_exiting_on_event(Event_Handler * super, Event * event)
 }
 
 static void
-ltrace_exiting_destroy(Event_Handler * super)
+ltrace_exiting_destroy(struct event_handler *super)
 {
 	struct ltrace_exiting_handler * self = (void *)super;
 	free(self->pids.tasks);
@@ -947,7 +1031,7 @@ ltrace_exiting_install_handler(Process * proc)
 	handler->super.destroy = ltrace_exiting_destroy;
 	install_event_handler(proc->leader, &handler->super);
 
-	if (each_task(proc->leader, &send_sigstop,
+	if (each_task(proc->leader, NULL, &send_sigstop,
 		      &handler->pids) != NULL)
 		goto fatal;
 
@@ -975,13 +1059,17 @@ ltrace_exiting_install_handler(Process * proc)
 
 struct process_vfork_handler
 {
-	Event_Handler super;
+	struct event_handler super;
 	void * bp_addr;
 };
 
 static Event *
-process_vfork_on_event(Event_Handler * super, Event * event)
+process_vfork_on_event(struct event_handler *super, Event *event)
 {
+	debug(DEBUG_PROCESS,
+	      "process_vfork_on_event: pid %d; event type %d",
+	      event->proc->pid, event->type);
+
 	struct process_vfork_handler * self = (void *)super;
 	struct breakpoint *sbp;
 	assert(self != NULL);
@@ -989,7 +1077,7 @@ process_vfork_on_event(Event_Handler * super, Event * event)
 	switch (event->type) {
 	case EVENT_BREAKPOINT:
 		/* Remember the vfork return breakpoint.  */
-		if (self->bp_addr == NULL)
+		if (self->bp_addr == 0)
 			self->bp_addr = event->e_un.brk_addr;
 		break;
 
@@ -998,13 +1086,15 @@ process_vfork_on_event(Event_Handler * super, Event * event)
 	case EVENT_EXEC:
 		/* Smuggle back in the vfork return breakpoint, so
 		 * that our parent can trip over it once again.  */
-		if (self->bp_addr != NULL) {
+		if (self->bp_addr != 0) {
 			sbp = dict_find_entry(event->proc->leader->breakpoints,
 					      self->bp_addr);
 			if (sbp != NULL)
-				insert_breakpoint(event->proc->parent,
-						  self->bp_addr,
-						  sbp->libsym, 1);
+				assert(sbp->libsym == NULL);
+			/* We don't mind failing that, it's not a big
+			 * deal to not display one extra vfork return.  */
+			insert_breakpoint(event->proc->parent,
+					  self->bp_addr, NULL);
 		}
 
 		continue_process(event->proc->parent->pid);
diff --git a/sysdeps/linux-gnu/trace.h b/sysdeps/linux-gnu/trace.h
new file mode 100644
index 0000000..0f40709
--- /dev/null
+++ b/sysdeps/linux-gnu/trace.h
@@ -0,0 +1,119 @@
+/*
+ * This file is part of ltrace.
+ * Copyright (C) 2011,2012 Petr Machata, Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _LTRACE_LINUX_TRACE_H_
+#define _LTRACE_LINUX_TRACE_H_
+
+/* This publishes some Linux-specific data structures used for process
+ * handling.  */
+
+/**
+ * This is used for bookkeeping related to PIDs that the event
+ * handlers work with.
+ */
+struct pid_task {
+	pid_t pid;	/* This may be 0 for tasks that exited
+			 * mid-handling.  */
+	int sigstopped : 1;
+	int got_event : 1;
+	int delivered : 1;
+	int vforked : 1;
+	int sysret : 1;
+};
+
+struct pid_set {
+	struct pid_task *tasks;
+	size_t count;
+	size_t alloc;
+};
+
+/**
+ * Breakpoint re-enablement.  When we hit a breakpoint, we must
+ * disable it, single-step, and re-enable it.  That single-step can be
+ * done only by one task in a task group, while others are stopped,
+ * otherwise the processes would race for who sees the breakpoint
+ * disabled and who doesn't.  The following is to keep track of it
+ * all.
+ */
+struct process_stopping_handler
+{
+	struct event_handler super;
+
+	/* The task that is doing the re-enablement.  */
+	struct Process *task_enabling_breakpoint;
+
+	/* The pointer being re-enabled.  */
+	struct breakpoint *breakpoint_being_enabled;
+
+	/* Artificial atomic skip breakpoint, if any needed.  */
+	void *atomic_skip_bp_addrs[2];
+
+	/* When all tasks are stopped, this callback gets called.  */
+	void (*on_all_stopped)(struct process_stopping_handler *);
+
+	/* When we get a singlestep event, this is called to decide
+	 * whether to stop stepping, or whether to enable the
+	 * brakpoint, sink remaining signals, and continue
+	 * everyone.  */
+	enum callback_status (*keep_stepping_p)
+		(struct process_stopping_handler *);
+
+	/* Whether we need to use ugly workaround to get around
+	 * various problems with singlestepping.  */
+	enum callback_status (*ugly_workaround_p)
+		(struct process_stopping_handler *);
+
+	enum {
+		/* We are waiting for everyone to land in t/T.  */
+		psh_stopping = 0,
+
+		/* We are doing the PTRACE_SINGLESTEP.  */
+		psh_singlestep,
+
+		/* We are waiting for all the SIGSTOPs to arrive so
+		 * that we can sink them.  */
+		psh_sinking,
+
+		/* This is for tracking the ugly workaround.  */
+		psh_ugly_workaround,
+	} state;
+
+	int exiting;
+
+	struct pid_set pids;
+};
+
+/* Allocate a process stopping handler, initialize it and install it.
+ * Return 0 on success or a negative value on failure.  Pass NULL for
+ * each callback to use a default instead.  The default for
+ * ON_ALL_STOPPED is LINUX_PTRACE_DISABLE_AND_SINGLESTEP, the default
+ * for KEEP_STEPPING_P and UGLY_WORKAROUND_P is "no".  */
+int process_install_stopping_handler
+	(struct Process *proc, struct breakpoint *sbp,
+	 void (*on_all_stopped)(struct process_stopping_handler *),
+	 enum callback_status (*keep_stepping_p)
+		 (struct process_stopping_handler *),
+	 enum callback_status (*ugly_workaround_p)
+		(struct process_stopping_handler *));
+
+void linux_ptrace_disable_and_singlestep(struct process_stopping_handler *self);
+void linux_ptrace_disable_and_continue(struct process_stopping_handler *self);
+
+#endif /* _LTRACE_LINUX_TRACE_H_ */
diff --git a/sysdeps/linux-gnu/x86_64/plt.c b/sysdeps/linux-gnu/x86_64/plt.c
index b53ff44..bb1b2b1 100644
--- a/sysdeps/linux-gnu/x86_64/plt.c
+++ b/sysdeps/linux-gnu/x86_64/plt.c
@@ -1,5 +1,7 @@
 #include <gelf.h>
+#include "proc.h"
 #include "common.h"
+#include "library.h"
 
 GElf_Addr
 arch_plt_sym_val(struct ltelf *lte, size_t ndx, GElf_Rela * rela) {
diff --git a/sysdeps/linux-gnu/x86_64/regs.c b/sysdeps/linux-gnu/x86_64/regs.c
index ed1f118..0ff3281 100644
--- a/sysdeps/linux-gnu/x86_64/regs.c
+++ b/sysdeps/linux-gnu/x86_64/regs.c
@@ -4,7 +4,7 @@
 #include <sys/ptrace.h>
 #include <sys/reg.h>
 
-#include "common.h"
+#include "proc.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
 # define PTRACE_PEEKUSER PTRACE_PEEKUSR
diff --git a/sysdeps/linux-gnu/x86_64/trace.c b/sysdeps/linux-gnu/x86_64/trace.c
index d0299d9..0d3f693 100644
--- a/sysdeps/linux-gnu/x86_64/trace.c
+++ b/sysdeps/linux-gnu/x86_64/trace.c
@@ -12,6 +12,7 @@
 
 #include "common.h"
 #include "ptrace.h"
+#include "proc.h"
 
 #if (!defined(PTRACE_PEEKUSER) && defined(PTRACE_PEEKUSR))
 # define PTRACE_PEEKUSER PTRACE_PEEKUSR
@@ -42,23 +43,35 @@ get_arch_dep(Process *proc) {
 /* Returns 1 if syscall, 2 if sysret, 0 otherwise.
  */
 int
-syscall_p(Process *proc, int status, int *sysnum) {
+syscall_p(struct Process *proc, int status, int *sysnum)
+{
 	if (WIFSTOPPED(status)
 	    && WSTOPSIG(status) == (SIGTRAP | proc->tracesysgood)) {
+		struct callstack_element *elem = NULL;
+		if (proc->callstack_depth > 0)
+			elem = proc->callstack + proc->callstack_depth - 1;
+
 		long int ret = ptrace(PTRACE_PEEKUSER, proc->pid, 8 * ORIG_RAX, 0);
-		if (ret == -1 && errno)
-			return -1;
+		if (ret == -1) {
+			if (errno)
+				return -1;
+			/* Otherwise, ORIG_RAX == -1 means that the
+			 * system call should not be restarted.  In
+			 * that case rely on what we have on
+			 * stack.  */
+			if (elem != NULL && elem->is_syscall)
+				ret = elem->c_un.syscall;
+		}
 
 		*sysnum = ret;
-		if (proc->callstack_depth > 0 &&
-				proc->callstack[proc->callstack_depth - 1].is_syscall &&
-				proc->callstack[proc->callstack_depth - 1].c_un.syscall == *sysnum) {
+		debug(DEBUG_FUNCTION, "sysnum=%ld %p %d\n", ret,
+		      get_instruction_pointer(proc), errno);
+		if (elem != NULL && elem->is_syscall
+		    && elem->c_un.syscall == *sysnum)
 			return 2;
-		}
 
-		if (*sysnum >= 0) {
+		if (*sysnum >= 0)
 			return 1;
-		}
 	}
 	return 0;
 }
diff --git a/sysdeps/sysdep.h b/sysdeps/sysdep.h
index 1d19c6f..96b3857 100644
--- a/sysdeps/sysdep.h
+++ b/sysdeps/sysdep.h
@@ -1 +1,31 @@
+#ifndef LTRACE_SYSDEP_H
+#define LTRACE_SYSDEP_H
+
 #include <arch.h>
+
+#ifndef ARCH_HAVE_LTELF_DATA
+struct arch_ltelf_data {
+};
+#endif
+
+#ifndef ARCH_HAVE_BREAKPOINT_DATA
+struct arch_breakpoint_data {
+};
+#endif
+
+#ifndef ARCH_HAVE_LIBRARY_SYMBOL_DATA
+struct arch_library_symbol_data {
+};
+#endif
+
+#ifndef ARCH_HAVE_LIBRARY_DATA
+struct arch_library_data {
+};
+#endif
+
+#ifndef ARCH_HAVE_PROCESS_DATA
+struct arch_process_data {
+};
+#endif
+
+#endif /* LTRACE_SYSDEP_H */
diff --git a/testsuite/ltrace.main/filt.c b/testsuite/ltrace.main/filt.c
new file mode 100644
index 0000000..f31a30d
--- /dev/null
+++ b/testsuite/ltrace.main/filt.c
@@ -0,0 +1,8 @@
+void func1(void);
+
+int
+main(int argc, char *argv[])
+{
+  func1();
+  return 0;
+}
diff --git a/testsuite/ltrace.main/filt1.c b/testsuite/ltrace.main/filt1.c
new file mode 100644
index 0000000..a0eb906
--- /dev/null
+++ b/testsuite/ltrace.main/filt1.c
@@ -0,0 +1,7 @@
+void func2(void);
+
+void
+func1(void)
+{
+	func2();
+}
diff --git a/testsuite/ltrace.main/filt2.c b/testsuite/ltrace.main/filt2.c
new file mode 100644
index 0000000..24999a3
--- /dev/null
+++ b/testsuite/ltrace.main/filt2.c
@@ -0,0 +1,5 @@
+void
+func2(void)
+{
+	puts("func2");
+}
diff --git a/testsuite/ltrace.main/filters.exp b/testsuite/ltrace.main/filters.exp
new file mode 100644
index 0000000..1a9a8f7
--- /dev/null
+++ b/testsuite/ltrace.main/filters.exp
@@ -0,0 +1,79 @@
+# Copyright (C) 2012 Petr Machata, Red Hat Inc.
+
+set testfile "filt"
+set srcfile0 $srcdir/$subdir/$testfile.c
+set binfile0 $objdir/$subdir/$testfile
+set base1 "filt1"
+set srcfile1 $srcdir/$subdir/$base1.c
+set binfile1 $objdir/$subdir/lib$base1.so
+set base2 "filt2"
+set srcfile2 $srcdir/$subdir/$base2.c
+set binfile2 $objdir/$subdir/lib$base2.so
+
+if [get_compiler_info $binfile0] {
+  return -1
+}
+
+verbose "compiling source file now....."
+if { [ltrace_compile_shlib $srcfile2 $binfile2 debug ] != "" 
+  || [ltrace_compile_shlib $srcfile1 $binfile1 debug ] != "" 
+  || [ltrace_compile $srcfile0 $binfile0 executable [list debug shlib=$binfile1 shlib=$binfile2] ] != ""} {
+  send_user "Testcase compile failed, so all tests in this file will automatically fail.\n"
+}
+
+ltrace_options "-e*"
+set exec_output [ltrace_runtest $objdir/$subdir $binfile0]
+
+# Check the output of this program.
+verbose "ltrace runtest output: $exec_output\n"
+if [regexp {ELF from incompatible architecture} $exec_output] {
+	fail "32-bit ltrace can not perform on 64-bit PUTs and rebuild ltrace in 64 bit mode!"
+	return 
+} elseif [ regexp {Couldn't get .hash data} $exec_output ] {
+	fail "Couldn't get .hash data!"
+	return
+}
+
+ltrace_verify_output ${binfile0}.ltrace "filt->func1" 1
+ltrace_verify_output ${binfile0}.ltrace "libfilt1.so->func2" 1
+ltrace_verify_output ${binfile0}.ltrace "libfilt2.so->puts" 1
+ltrace_verify_output ${binfile0}.ltrace "func2 resumed" 1
+ltrace_verify_output ${binfile0}.ltrace "func1 resumed" 1
+
+# I simply can't figure out how to pass an empty string to
+# ltrace_options without that getting interpreted as {}, so pass
+# something harmless instead.
+ltrace_options "-b"
+ltrace_runtest $objdir/$subdir $binfile0
+ltrace_verify_output ${binfile0}.ltrace "^func1(.*)" 1
+
+ltrace_options "-e@MAIN"
+ltrace_runtest $objdir/$subdir $binfile0
+ltrace_verify_output ${binfile0}.ltrace "filt->func1(.*)" 1
+
+ltrace_options "-e@libfilt1.so"
+ltrace_runtest $objdir/$subdir $binfile0
+ltrace_verify_output ${binfile0}.ltrace "libfilt1.so->func2(.*)" 1
+
+ltrace_options "-e@libfilt2.so"
+ltrace_runtest $objdir/$subdir $binfile0
+ltrace_verify_output ${binfile0}.ltrace "libfilt2.so->puts(.*)" 1
+
+ltrace_options "-e@libfilt*"
+ltrace_runtest $objdir/$subdir $binfile0
+ltrace_verify_output ${binfile0}.ltrace "libfilt1.so->func2(" 1
+ltrace_verify_output ${binfile0}.ltrace "libfilt2.so->puts(.*)" 1
+ltrace_verify_output ${binfile0}.ltrace "func2 resumed" 1
+
+ltrace_options "-efunc*"
+ltrace_runtest $objdir/$subdir $binfile0
+ltrace_verify_output ${binfile0}.ltrace "filt->func1(" 1
+ltrace_verify_output ${binfile0}.ltrace "libfilt1.so->func2(.*)" 1
+ltrace_verify_output ${binfile0}.ltrace "func1 resumed" 1
+
+# Check that we handle breakpoint on both PLT entry and entry point
+ltrace_options "-efunc1" "-xfunc1"
+ltrace_runtest $objdir/$subdir $binfile0
+ltrace_verify_output ${binfile0}.ltrace "filt->func1(" 1
+ltrace_verify_output ${binfile0}.ltrace "func1@libfilt1.so(.*)" 1
+ltrace_verify_output ${binfile0}.ltrace "func1 resumed" 1
diff --git a/testsuite/ltrace.main/parameters.c b/testsuite/ltrace.main/parameters.c
index 154de84..fb46dfe 100644
--- a/testsuite/ltrace.main/parameters.c
+++ b/testsuite/ltrace.main/parameters.c
@@ -36,10 +36,16 @@ typedef enum {
 void func_enum(color_t);
 void func_typedef(color_t);
 
-void func_work (char *x);
-void func_call (char *x, char* y, void (*cb) (char *));
+void func_work(char *x);
+void func_call(char *x, char *y, void (*cb)(char *));
 
-int 
+void
+call_func_work (char *x)
+{
+	func_work(x);
+}
+
+int
 main ()
 {
   int x = 17;
@@ -124,7 +130,7 @@ main ()
   {
     char x[10] = {};
     char y[10] = {};
-    func_call (x, y, func_work);
+    func_call(x, y, call_func_work);
   }
 
   return 0;
diff --git a/testsuite/ltrace.minor/libdl-simple.exp b/testsuite/ltrace.minor/libdl-simple.exp
index 9957001..91af5bd 100644
--- a/testsuite/ltrace.minor/libdl-simple.exp
+++ b/testsuite/ltrace.minor/libdl-simple.exp
@@ -28,25 +28,5 @@ if [regexp {ELF from incompatible architecture} $exec_output] {
 	return
 }
 
-# Verify the time for calling sleep.
-set fd [ open $objdir/$subdir/$binfile.ltrace r]
-set FOUND 0
-while { [gets $fd line] >= 0 } {
-	# match the line with sleep and extract the spent time in sleep and sleep argument.
-	if [ regexp {(test_libdl)\(} $line match tester ] then {
-		verbose "test_libdl = $tester"
-
-		if { $tester == "test_libdl" } then {
-			pass "Successfully traced libdl loaded function."
-		} else {
-			fail "Failed to trace libdl loaded function."
-		}
-	set FOUND 1
-	break
-        }
-}
-close $fd
-
-if {$FOUND != 1} then {
-	fail "Fail to trace libdl loaded function!"
-}
+set pattern "test_libdl@liblibdl-simple.so"
+ltrace_verify_output ${objdir}/${subdir}/${testfile}.ltrace $pattern 1
diff -urp ltrace-0.6.0-orig/Makefile.in ltrace-0.6.0/Makefile.in
--- ltrace-0.6.0-orig/Makefile.in	2012-05-01 00:24:18.824433420 +0200
+++ ltrace-0.6.0/Makefile.in	2012-05-01 00:25:01.285738780 +0200
@@ -65,7 +65,7 @@ libltrace_la_DEPENDENCIES = $(am__DEPEND
 am_libltrace_la_OBJECTS = breakpoints.lo debug.lo demangle.lo dict.lo \
 	display_args.lo ltrace-elf.lo execute_program.lo \
 	handle_event.lo libltrace.lo options.lo output.lo proc.lo \
-	read_config_file.lo summary.lo
+	read_config_file.lo summary.lo library.lo filter.lo glob.lo
 libltrace_la_OBJECTS = $(am_libltrace_la_OBJECTS)
 am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" \
 	"$(DESTDIR)$(docdir)" "$(DESTDIR)$(sysconfdir)"
@@ -311,7 +314,10 @@ libltrace_la_SOURCES = \
 	output.c \
 	proc.c \
 	read_config_file.c  \
-	summary.c
+	summary.c \
+	library.c \
+	filter.c \
+	glob.c
 
 libltrace_la_LIBADD = \
 	$(libelf_LIBS) \
@@ -338,7 +344,10 @@ noinst_HEADERS = \
 	ltrace.h \
 	options.h \
 	output.h \
-	read_config_file.h
+	read_config_file.h \
+	library.h \
+	filter.h \
+	glob.h
 
 dist_man1_MANS = \
 	ltrace.1
diff -urp ltrace-0.6.0-orig/sysdeps/linux-gnu/Makefile.in ltrace-0.6.0/sysdeps/linux-gnu/Makefile.in
--- ltrace-0.6.0-orig/sysdeps/linux-gnu/Makefile.in	2012-05-01 00:24:18.864434649 +0200
+++ ltrace-0.6.0/sysdeps/linux-gnu/Makefile.in	2012-05-01 00:24:59.835694202 +0200
@@ -260,7 +263,8 @@ ___libos_la_LIBADD = \
 noinst_HEADERS = \
 	arch_syscallent.h \
 	signalent1.h \
-	syscallent1.h
+	syscallent1.h \
+	trace.h
 
 EXTRA_DIST = \
 	arch_mksyscallent \
diff --git a/ltrace-elf.c b/ltrace-elf.c
index a311c5f..b1af070 100644
--- a/ltrace-elf.c
+++ b/ltrace-elf.c
@@ -175,8 +175,8 @@ need_data(Elf_Data *data, GElf_Xword offset, GElf_Xword size)
 {
 	assert(data != NULL);
 	if (data->d_size < size || offset > data->d_size - size) {
-		debug(1, "Not enough data to read %zd-byte value"
-		      " at offset %zd.", size, offset);
+		debug(1, "Not enough data to read %"PRId64"-byte value"
+		      " at offset %"PRId64".", size, offset);
 		return -1;
 	}
 	return 0;
diff --git a/sysdeps/linux-gnu/ppc/plt.c b/sysdeps/linux-gnu/ppc/plt.c
index 3b6a25f..9717738 100644
--- a/sysdeps/linux-gnu/ppc/plt.c
+++ b/sysdeps/linux-gnu/ppc/plt.c
@@ -230,7 +230,9 @@ arch_translate_address_dyn(struct Process *proc,
 			error(0, errno, "dynamic .opd translation of %p", addr);
 			return -1;
 		}
-		*ret = (target_address_t)value;
+		/* XXX The double cast should be removed when
+		 * target_address_t becomes integral type.  */
+		*ret = (target_address_t)(uintptr_t)value;
 		return 0;
 	}
 
@@ -243,14 +245,17 @@ arch_translate_address(struct ltelf *lte,
 		       target_address_t addr, target_address_t *ret)
 {
 	if (lte->ehdr.e_machine == EM_PPC64) {
-		GElf_Xword offset = (GElf_Addr)addr - lte->arch.opd_base;
+		/* XXX The double cast should be removed when
+		 * target_address_t becomes integral type.  */
+		GElf_Xword offset
+			= (GElf_Addr)(uintptr_t)addr - lte->arch.opd_base;
 		uint64_t value;
 		if (elf_read_u64(lte->arch.opd_data, offset, &value) < 0) {
 			error(0, 0, "static .opd translation of %p: %s", addr,
 			      elf_errmsg(-1));
 			return -1;
 		}
-		*ret = (target_address_t)(value + lte->bias);
+		*ret = (target_address_t)(uintptr_t)(value + lte->bias);
 		return 0;
 	}