485fd76
#!/usr/bin/python
485fd76
'''
485fd76
From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
485fd76
to be extended with Python code e.g. for library-specific data visualizations,
485fd76
such as for the C++ STL types.  Documentation on this API can be seen at:
485fd76
http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
485fd76
485fd76
485fd76
This python module deals with the case when the process being debugged (the
485fd76
"inferior process" in gdb parlance) is itself python, or more specifically,
485fd76
linked against libpython.  In this situation, almost every item of data is a
485fd76
(PyObject*), and having the debugger merely print their addresses is not very
485fd76
enlightening.
485fd76
485fd76
This module embeds knowledge about the implementation details of libpython so
485fd76
that we can emit useful visualizations e.g. a string, a list, a dict, a frame
485fd76
giving file/line information and the state of local variables
485fd76
485fd76
In particular, given a gdb.Value corresponding to a PyObject* in the inferior
485fd76
process, we can generate a "proxy value" within the gdb process.  For example,
485fd76
given a PyObject* in the inferior process that is in fact a PyListObject*
485fd76
holding three PyObject* that turn out to be PyStringObject* instances, we can
485fd76
generate a proxy value within the gdb process that is a list of strings:
485fd76
  ["foo", "bar", "baz"]
485fd76
485fd76
We try to defer gdb.lookup_type() invocations for python types until as late as
485fd76
possible: for a dynamically linked python binary, when the process starts in
485fd76
the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
485fd76
the type names are known to the debugger
485fd76
485fd76
The module also extends gdb with some python-specific commands.
485fd76
'''
485fd76
485fd76
import gdb
485fd76
485fd76
# Look up the gdb.Type for some standard types:
485fd76
_type_char_ptr = gdb.lookup_type('char').pointer() # char*
485fd76
_type_void_ptr = gdb.lookup_type('void').pointer() # void*
485fd76
_type_size_t = gdb.lookup_type('size_t')
485fd76
485fd76
SIZEOF_VOID_P = _type_void_ptr.sizeof
485fd76
485fd76
485fd76
Py_TPFLAGS_HEAPTYPE = (1L << 9)
485fd76
485fd76
Py_TPFLAGS_INT_SUBCLASS      = (1L << 23)
485fd76
Py_TPFLAGS_LONG_SUBCLASS     = (1L << 24)
485fd76
Py_TPFLAGS_LIST_SUBCLASS     = (1L << 25)
485fd76
Py_TPFLAGS_TUPLE_SUBCLASS    = (1L << 26)
485fd76
Py_TPFLAGS_STRING_SUBCLASS   = (1L << 27)
485fd76
Py_TPFLAGS_UNICODE_SUBCLASS  = (1L << 28)
485fd76
Py_TPFLAGS_DICT_SUBCLASS     = (1L << 29)
485fd76
Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
485fd76
Py_TPFLAGS_TYPE_SUBCLASS     = (1L << 31)
485fd76
485fd76
485fd76
class NullPyObjectPtr(RuntimeError):
485fd76
    pass
485fd76
485fd76
485fd76
def safety_limit(val):
485fd76
    # Given a integer value from the process being debugged, limit it to some
485fd76
    # safety threshold so that arbitrary breakage within said process doesn't
485fd76
    # break the gdb process too much (e.g. sizes of iterations, sizes of lists)
485fd76
    return min(val, 100)
485fd76
485fd76
485fd76
def safe_range(val):
485fd76
    # As per range, but don't trust the value too much: cap it to a safety
485fd76
    # threshold in case the data was corrupted
485fd76
    return xrange(safety_limit(val))
485fd76
485fd76
485fd76
class PyObjectPtr(object):
485fd76
    """
485fd76
    Class wrapping a gdb.Value that's a either a (PyObject*) within the
485fd76
    inferior process, or some subclass pointer e.g. (PyStringObject*)
485fd76
485fd76
    There will be a subclass for every refined PyObject type that we care
485fd76
    about.
485fd76
485fd76
    Note that at every stage the underlying pointer could be NULL, point
485fd76
    to corrupt data, etc; this is the debugger, after all.
485fd76
    """
485fd76
    _typename = 'PyObject'
485fd76
485fd76
    def __init__(self, gdbval, cast_to=None):
485fd76
        if cast_to:
485fd76
                self._gdbval = gdbval.cast(cast_to)
485fd76
        else:
485fd76
            self._gdbval = gdbval
485fd76
485fd76
    def field(self, name):
485fd76
        '''
485fd76
        Get the gdb.Value for the given field within the PyObject, coping with
485fd76
        some python 2 versus python 3 differences.
485fd76
485fd76
        Various libpython types are defined using the "PyObject_HEAD" and
485fd76
        "PyObject_VAR_HEAD" macros.
485fd76
485fd76
        In Python 2, this these are defined so that "ob_type" and (for a var
485fd76
        object) "ob_size" are fields of the type in question.
485fd76
485fd76
        In Python 3, this is defined as an embedded PyVarObject type thus:
485fd76
           PyVarObject ob_base;
485fd76
        so that the "ob_size" field is located insize the "ob_base" field, and
485fd76
        the "ob_type" is most easily accessed by casting back to a (PyObject*).
485fd76
        '''
485fd76
        if self.is_null():
485fd76
            raise NullPyObjectPtr(self)
485fd76
485fd76
        if name == 'ob_type':
485fd76
            pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
485fd76
            return pyo_ptr.dereference()[name]
485fd76
485fd76
        if name == 'ob_size':
485fd76
            try:
485fd76
                # Python 2:
485fd76
                return self._gdbval.dereference()[name]
485fd76
            except RuntimeError:
485fd76
                # Python 3:
485fd76
                return self._gdbval.dereference()['ob_base'][name]
485fd76
485fd76
        # General case: look it up inside the object:
485fd76
        return self._gdbval.dereference()[name]
485fd76
485fd76
    def type(self):
485fd76
        return PyTypeObjectPtr(self.field('ob_type'))
485fd76
485fd76
    def is_null(self):
485fd76
        return 0 == long(self._gdbval)
485fd76
485fd76
    def safe_tp_name(self):
485fd76
        try:
485fd76
            return self.type().field('tp_name').string()
485fd76
        except NullPyObjectPtr:
485fd76
            # NULL tp_name?
485fd76
            return 'unknown'
485fd76
        except RuntimeError:
485fd76
            # Can't even read the object at all?
485fd76
            return 'unknown'
485fd76
485fd76
    def proxyval(self):
485fd76
        '''
485fd76
        Scrape a value from the inferior process, and try to represent it
485fd76
        within the gdb process, whilst (hopefully) avoiding crashes when
485fd76
        the remote data is corrupt.
485fd76
485fd76
        Derived classes will override this.
485fd76
485fd76
        For example, a PyIntObject* with ob_ival 42 in the inferior process
485fd76
        should result in an int(42) in this process.
485fd76
        '''
485fd76
485fd76
        class FakeRepr(object):
485fd76
            """
485fd76
            Class representing a non-descript PyObject* value in the inferior
485fd76
            process for when we don't have a custom scraper, intended to have
485fd76
            a sane repr().
485fd76
            """
485fd76
485fd76
            def __init__(self, tp_name, address):
485fd76
                self.tp_name = tp_name
485fd76
                self.address = address
485fd76
485fd76
            def __repr__(self):
485fd76
                # For the NULL pointer, we have no way of knowing a type, so
485fd76
                # special-case it as per
485fd76
                # http://bugs.python.org/issue8032#msg100882
485fd76
                if self.address == 0:
485fd76
                    return '0x0'
485fd76
                return '<%s at remote 0x%x>' % (self.tp_name, self.address)
485fd76
485fd76
        return FakeRepr(self.safe_tp_name(),
485fd76
                        long(self._gdbval))
485fd76
485fd76
    @classmethod
485fd76
    def subclass_from_type(cls, t):
485fd76
        '''
485fd76
        Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
485fd76
        (PyTypeObject*), determine the corresponding subclass of PyObjectPtr
485fd76
        to use
485fd76
485fd76
        Ideally, we would look up the symbols for the global types, but that
485fd76
        isn't working yet:
485fd76
          (gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
485fd76
          Traceback (most recent call last):
485fd76
            File "<string>", line 1, in <module>
485fd76
          NotImplementedError: Symbol type not yet supported in Python scripts.
485fd76
          Error while executing Python code.
485fd76
485fd76
        For now, we use tp_flags, after doing some string comparisons on the
485fd76
        tp_name for some special-cases that don't seem to be visible through
485fd76
        flags
485fd76
        '''
485fd76
        try:
485fd76
            tp_name = t.field('tp_name').string()
485fd76
            tp_flags = int(t.field('tp_flags'))
485fd76
        except RuntimeError:
485fd76
            # Handle any kind of error e.g. NULL ptrs by simply using the base
485fd76
            # class
485fd76
            return cls
485fd76
485fd76
        #print 'tp_flags = 0x%08x' % tp_flags
485fd76
        #print 'tp_name = %r' % tp_name
485fd76
485fd76
        name_map = {'bool': PyBoolObjectPtr,
485fd76
                    'classobj': PyClassObjectPtr,
485fd76
                    'instance': PyInstanceObjectPtr,
485fd76
                    'NoneType': PyNoneStructPtr,
485fd76
                    'frame': PyFrameObjectPtr,
485fd76
                    }
485fd76
        if tp_name in name_map:
485fd76
            return name_map[tp_name]
485fd76
485fd76
        if tp_flags & Py_TPFLAGS_HEAPTYPE:
485fd76
            return HeapTypeObjectPtr
485fd76
485fd76
        if tp_flags & Py_TPFLAGS_INT_SUBCLASS:
485fd76
            return PyIntObjectPtr
485fd76
        if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
485fd76
            return PyLongObjectPtr
485fd76
        if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
485fd76
            return PyListObjectPtr
485fd76
        if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
485fd76
            return PyTupleObjectPtr
485fd76
        if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
485fd76
            return PyStringObjectPtr
485fd76
        if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
485fd76
            return PyUnicodeObjectPtr
485fd76
        if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
485fd76
            return PyDictObjectPtr
485fd76
        #if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
485fd76
        #    return something
485fd76
        #if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
485fd76
        #    return PyTypeObjectPtr
485fd76
485fd76
        # Use the base class:
485fd76
        return cls
485fd76
485fd76
    @classmethod
485fd76
    def from_pyobject_ptr(cls, gdbval):
485fd76
        '''
485fd76
        Try to locate the appropriate derived class dynamically, and cast
485fd76
        the pointer accordingly.
485fd76
        '''
485fd76
        try:
485fd76
            p = PyObjectPtr(gdbval)
485fd76
            cls = cls.subclass_from_type(p.type())
485fd76
            return cls(gdbval, cast_to=cls.get_gdb_type())
485fd76
        except RuntimeError:
485fd76
            # Handle any kind of error e.g. NULL ptrs by simply using the base
485fd76
            # class
485fd76
            pass
485fd76
        return cls(gdbval)
485fd76
485fd76
    @classmethod
485fd76
    def get_gdb_type(cls):
485fd76
        return gdb.lookup_type(cls._typename).pointer()
485fd76
485fd76
485fd76
class InstanceProxy(object):
485fd76
485fd76
    def __init__(self, cl_name, attrdict, address):
485fd76
        self.cl_name = cl_name
485fd76
        self.attrdict = attrdict
485fd76
        self.address = address
485fd76
485fd76
    def __repr__(self):
485fd76
        if isinstance(self.attrdict, dict):
485fd76
            kwargs = ', '.join(["%s=%r" % (arg, val)
485fd76
                                for arg, val in self.attrdict.iteritems()])
485fd76
            return '<%s(%s) at remote 0x%x>' % (self.cl_name,
485fd76
                                                kwargs, self.address)
485fd76
        else:
485fd76
            return '<%s at remote 0x%x>' % (self.cl_name,
485fd76
                                            self.address)
485fd76
        
485fd76
485fd76
def _PyObject_VAR_SIZE(typeobj, nitems):
485fd76
    return ( ( typeobj.field('tp_basicsize') +
485fd76
               nitems * typeobj.field('tp_itemsize') +
485fd76
               (SIZEOF_VOID_P - 1)
485fd76
             ) & ~(SIZEOF_VOID_P - 1)
485fd76
           ).cast(_type_size_t)
485fd76
485fd76
class HeapTypeObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyObject'
485fd76
485fd76
    def proxyval(self):
485fd76
        '''
485fd76
        Support for new-style classes.
485fd76
485fd76
        Currently we just locate the dictionary using a transliteration to
485fd76
        python of _PyObject_GetDictPtr, ignoring descriptors
485fd76
        '''
485fd76
        attr_dict = {}
485fd76
485fd76
        try:
485fd76
            typeobj = self.type()
485fd76
            dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
485fd76
            if dictoffset != 0:
485fd76
                if dictoffset < 0:
485fd76
                    type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
485fd76
                    tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
485fd76
                    if tsize < 0:
485fd76
                        tsize = -tsize
485fd76
                    size = _PyObject_VAR_SIZE(typeobj, tsize)
485fd76
                    dictoffset += size
485fd76
                    assert dictoffset > 0
485fd76
                    assert dictoffset % SIZEOF_VOID_P == 0
485fd76
485fd76
                dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
485fd76
                PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
485fd76
                dictptr = dictptr.cast(PyObjectPtrPtr)
485fd76
                attr_dict = PyObjectPtr.from_pyobject_ptr(dictptr.dereference()).proxyval()
485fd76
        except RuntimeError:
485fd76
            # Corrupt data somewhere; fail safe
485fd76
            pass    
485fd76
485fd76
        tp_name = self.safe_tp_name()
485fd76
485fd76
        # New-style class:
485fd76
        return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
485fd76
485fd76
485fd76
class PyBoolObjectPtr(PyObjectPtr):
485fd76
    """
485fd76
    Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
485fd76
    <bool> instances (Py_True/Py_False) within the process being debugged.
485fd76
    """
485fd76
    _typename = 'PyBoolObject'
485fd76
485fd76
    def proxyval(self):
485fd76
        if int_from_int(self.field('ob_ival')):
485fd76
            return True
485fd76
        else:
485fd76
            return False
485fd76
485fd76
485fd76
class PyClassObjectPtr(PyObjectPtr):
485fd76
    """
485fd76
    Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
485fd76
    instance within the process being debugged.
485fd76
    """
485fd76
    _typename = 'PyClassObject'
485fd76
485fd76
485fd76
class PyCodeObjectPtr(PyObjectPtr):
485fd76
    """
485fd76
    Class wrapping a gdb.Value that's a PyCodeObject* i.e. a  instance
485fd76
    within the process being debugged.
485fd76
    """
485fd76
    _typename = 'PyCodeObject'
485fd76
485fd76
    def addr2line(self, addrq):
485fd76
        '''
485fd76
        Get the line number for a given bytecode offset
485fd76
485fd76
        Analogous to PyCode_Addr2Line; translated from pseudocode in
485fd76
        Objects/lnotab_notes.txt
485fd76
        '''
485fd76
        co_lnotab = PyObjectPtr.from_pyobject_ptr(self.field('co_lnotab')).proxyval()
485fd76
485fd76
        # Initialize lineno to co_firstlineno as per PyCode_Addr2Line
485fd76
        # not 0, as lnotab_notes.txt has it:
485fd76
	lineno = int_from_int(self.field('co_firstlineno'))
485fd76
485fd76
        addr = 0
485fd76
        for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
485fd76
            addr += ord(addr_incr)
485fd76
            if addr > addrq:
485fd76
                return lineno
485fd76
            lineno += ord(line_incr)
485fd76
        return lineno
485fd76
485fd76
class PyDictObjectPtr(PyObjectPtr):
485fd76
    """
485fd76
    Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
485fd76
    within the process being debugged.
485fd76
    """
485fd76
    _typename = 'PyDictObject'
485fd76
485fd76
    def proxyval(self):
485fd76
        result = {}
485fd76
        for i in safe_range(self.field('ma_mask') + 1):
485fd76
            ep = self.field('ma_table') + i
485fd76
            pvalue = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
485fd76
            if not pvalue.is_null():
485fd76
                pkey = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
485fd76
                result[pkey.proxyval()] = pvalue.proxyval()
485fd76
        return result
485fd76
485fd76
485fd76
class PyInstanceObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyInstanceObject'
485fd76
485fd76
    def proxyval(self):
485fd76
        # Get name of class:
485fd76
        in_class = PyObjectPtr.from_pyobject_ptr(self.field('in_class'))
485fd76
        cl_name = PyObjectPtr.from_pyobject_ptr(in_class.field('cl_name')).proxyval()
485fd76
485fd76
        # Get dictionary of instance attributes:
485fd76
        in_dict = PyObjectPtr.from_pyobject_ptr(self.field('in_dict')).proxyval()
485fd76
485fd76
        # Old-style class:
485fd76
        return InstanceProxy(cl_name, in_dict, long(self._gdbval))
485fd76
485fd76
485fd76
class PyIntObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyIntObject'
485fd76
485fd76
    def proxyval(self):
485fd76
        result = int_from_int(self.field('ob_ival'))
485fd76
        return result
485fd76
485fd76
485fd76
class PyListObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyListObject'
485fd76
485fd76
    def __getitem__(self, i):
485fd76
        # Get the gdb.Value for the (PyObject*) with the given index:
485fd76
        field_ob_item = self.field('ob_item')
485fd76
        return field_ob_item[i]
485fd76
485fd76
    def proxyval(self):
485fd76
        result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval()
485fd76
                  for i in safe_range(int_from_int(self.field('ob_size')))]
485fd76
        return result
485fd76
485fd76
485fd76
class PyLongObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyLongObject'
485fd76
485fd76
    def proxyval(self):
485fd76
        '''
485fd76
        Python's Include/longobjrep.h has this declaration:
485fd76
           struct _longobject {
485fd76
               PyObject_VAR_HEAD
485fd76
               digit ob_digit[1];
485fd76
           };
485fd76
485fd76
        with this description:
485fd76
            The absolute value of a number is equal to
485fd76
                 SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
485fd76
            Negative numbers are represented with ob_size < 0;
485fd76
            zero is represented by ob_size == 0.
485fd76
485fd76
        where SHIFT can be either:
485fd76
            #define PyLong_SHIFT        30
485fd76
            #define PyLong_SHIFT        15
485fd76
        '''
485fd76
        ob_size = long(self.field('ob_size'))
485fd76
        if ob_size == 0:
485fd76
            return 0L
485fd76
485fd76
        ob_digit = self.field('ob_digit')
485fd76
485fd76
        if gdb.lookup_type('digit').sizeof == 2:
485fd76
            SHIFT = 15L
485fd76
        else:
485fd76
            # FIXME: I haven't yet tested this case
485fd76
            SHIFT = 30L
485fd76
485fd76
        digits = [long(ob_digit[i]) * 2**(SHIFT*i)
485fd76
                  for i in safe_range(abs(ob_size))]
485fd76
        result = sum(digits)
485fd76
        if ob_size < 0:
485fd76
            result = -result
485fd76
        return result
485fd76
485fd76
485fd76
class PyNoneStructPtr(PyObjectPtr):
485fd76
    """
485fd76
    Class wrapping a gdb.Value that's a PyObject* pointing to the
485fd76
    singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
485fd76
    """
485fd76
    _typename = 'PyObject'
485fd76
485fd76
    def proxyval(self):
485fd76
        return None
485fd76
485fd76
485fd76
class PyFrameObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyFrameObject'
485fd76
485fd76
    def __str__(self):
485fd76
        fi = FrameInfo(self)
485fd76
        return str(fi)
485fd76
485fd76
485fd76
class PyStringObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyStringObject'
485fd76
485fd76
    def __str__(self):
485fd76
        field_ob_size = self.field('ob_size')
485fd76
        field_ob_sval = self.field('ob_sval')
485fd76
        char_ptr = field_ob_sval.address.cast(_type_char_ptr)
485fd76
        return ''.join([chr(field_ob_sval[i]) for i in safe_range(field_ob_size)])
485fd76
485fd76
    def proxyval(self):
485fd76
        return str(self)
485fd76
485fd76
485fd76
class PyTupleObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyTupleObject'
485fd76
485fd76
    def __getitem__(self, i):
485fd76
        # Get the gdb.Value for the (PyObject*) with the given index:
485fd76
        field_ob_item = self.field('ob_item')
485fd76
        return field_ob_item[i]
485fd76
485fd76
    def proxyval(self):
485fd76
        result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval()
485fd76
                        for i in safe_range(int_from_int(self.field('ob_size')))])
485fd76
        return result
485fd76
485fd76
485fd76
class PyTypeObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyTypeObject'
485fd76
485fd76
485fd76
class PyUnicodeObjectPtr(PyObjectPtr):
485fd76
    _typename = 'PyUnicodeObject'
485fd76
485fd76
    def proxyval(self):
485fd76
        # From unicodeobject.h:
485fd76
        #     Py_ssize_t length;  /* Length of raw Unicode data in buffer */
485fd76
        #     Py_UNICODE *str;    /* Raw Unicode buffer */
485fd76
        field_length = long(self.field('length'))
485fd76
        field_str = self.field('str')
485fd76
485fd76
        # Gather a list of ints from the Py_UNICODE array; these are either
485fd76
        # UCS-2 or UCS-4 code points:
485fd76
        Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
485fd76
485fd76
        # Convert the int code points to unicode characters, and generate a
485fd76
        # local unicode instance:
485fd76
        result = u''.join([unichr(ucs) for ucs in Py_UNICODEs])
485fd76
        return result
485fd76
485fd76
485fd76
def int_from_int(gdbval):
485fd76
    return int(str(gdbval))
485fd76
485fd76
485fd76
def stringify(val):
485fd76
    # TODO: repr() puts everything on one line; pformat can be nicer, but
485fd76
    # can lead to v.long results; this function isolates the choice
485fd76
    if True:
485fd76
        return repr(val)
485fd76
    else:
485fd76
        from pprint import pformat
485fd76
        return pformat(val)
485fd76
485fd76
485fd76
class FrameInfo:
485fd76
    '''
485fd76
    Class representing all of the information we can scrape about a
485fd76
    PyFrameObject*
485fd76
    '''
485fd76
    def __init__(self, fval):
485fd76
        self.fval = fval
485fd76
        self.co = PyCodeObjectPtr.from_pyobject_ptr(fval.field('f_code'))
485fd76
        self.co_name = PyObjectPtr.from_pyobject_ptr(self.co.field('co_name'))
485fd76
        self.co_filename = PyObjectPtr.from_pyobject_ptr(self.co.field('co_filename'))
485fd76
        self.f_lineno = int_from_int(fval.field('f_lineno'))
485fd76
        self.f_lasti = int_from_int(fval.field('f_lasti'))
485fd76
        self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
485fd76
        self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
485fd76
        self.locals = [] # list of kv pairs
485fd76
        f_localsplus = self.fval.field('f_localsplus')
485fd76
        for i in safe_range(self.co_nlocals):
485fd76
            #print 'i=%i' % i
485fd76
            value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
485fd76
            if not value.is_null():
485fd76
                name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
485fd76
                #print 'name=%s' % name
485fd76
                value = value.proxyval()
485fd76
                #print 'value=%s' % value
485fd76
                self.locals.append((str(name), value))
485fd76
485fd76
    def filename(self):
485fd76
        '''Get the path of the current Python source file, as a string'''
485fd76
        return self.co_filename.proxyval()
485fd76
485fd76
    def current_line_num(self):
485fd76
        '''Get current line number as an integer (1-based)
485fd76
        
485fd76
        Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
485fd76
        
485fd76
        See Objects/lnotab_notes.txt
485fd76
        '''
485fd76
        f_trace = self.fval.field('f_trace')
485fd76
        if long(f_trace) != 0:
485fd76
            # we have a non-NULL f_trace:
485fd76
            return self.f_lineno
485fd76
        else:
485fd76
            #try:
485fd76
            return self.co.addr2line(self.f_lasti)
485fd76
            #except ValueError:
485fd76
            #    return self.f_lineno
485fd76
485fd76
    def current_line(self):
485fd76
        '''Get the text of the current source line as a string, with a trailing
485fd76
        newline character'''
485fd76
        with open(self.filename(), 'r') as f:
485fd76
            all_lines = f.readlines()
485fd76
            # Convert from 1-based current_line_num to 0-based list offset:
485fd76
            return all_lines[self.current_line_num()-1]
485fd76
485fd76
    def __str__(self):
485fd76
        return ('Frame 0x%x, for file %s, line %i, in %s (%s)'
485fd76
                % (long(self.fval._gdbval),
485fd76
                   self.co_filename,
485fd76
                   self.current_line_num(),
485fd76
                   self.co_name,
485fd76
                   ', '.join(['%s=%s' % (k, stringify(v)) for k, v in self.locals]))
485fd76
                )
485fd76
485fd76
485fd76
class PyObjectPtrPrinter:
485fd76
    "Prints a (PyObject*)"
485fd76
485fd76
    def __init__ (self, gdbval):
485fd76
        self.gdbval = gdbval
485fd76
485fd76
    def to_string (self):
485fd76
        proxyval = PyObjectPtr.from_pyobject_ptr(self.gdbval).proxyval()
485fd76
        return stringify(proxyval)
485fd76
485fd76
485fd76
class PyFrameObjectPtrPrinter(PyObjectPtrPrinter):
485fd76
    "Prints a (PyFrameObject*)"
485fd76
485fd76
    def to_string (self):
485fd76
        pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
485fd76
        fi = FrameInfo(pyop)
485fd76
        return str(fi)
485fd76
485fd76
485fd76
def pretty_printer_lookup(gdbval):
485fd76
    type = gdbval.type.unqualified()
485fd76
    if type.code == gdb.TYPE_CODE_PTR:
485fd76
        type = type.target().unqualified()
485fd76
        t = str(type)
485fd76
        if t == "PyObject":
485fd76
            return PyObjectPtrPrinter(gdbval)
485fd76
        elif t == "PyFrameObject":
485fd76
            return PyFrameObjectPtrPrinter(gdbval)
485fd76
485fd76
485fd76
"""
485fd76
During development, I've been manually invoking the code in this way:
485fd76
(gdb) python
485fd76
485fd76
import sys
485fd76
sys.path.append('/home/david/coding/python-gdb')
485fd76
import libpython
485fd76
end
485fd76
485fd76
then reloading it after each edit like this:
485fd76
(gdb) python reload(libpython)
485fd76
485fd76
The following code should ensure that the prettyprinter is registered
485fd76
if the code is autoloaded by gdb when visiting libpython.so, provided
485fd76
that this python file is installed to the same path as the library (or its
485fd76
.debug file) plus a "-gdb.py" suffix, e.g:
485fd76
  /usr/lib/libpython2.6.so.1.0-gdb.py
485fd76
  /usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
485fd76
"""
485fd76
def register (obj):
485fd76
    if obj == None:
485fd76
        obj = gdb
485fd76
485fd76
    # Wire up the pretty-printer
485fd76
    obj.pretty_printers.append(pretty_printer_lookup)
485fd76
485fd76
register (gdb.current_objfile ())
485fd76
485fd76
def get_python_frame(gdb_frame):
485fd76
    try:
485fd76
        f = gdb_frame.read_var('f')
485fd76
        return PyFrameObjectPtr.from_pyobject_ptr(f)
485fd76
    except ValueError:
485fd76
        return None
485fd76
485fd76
def get_selected_python_frame():
485fd76
    '''Try to obtain a (gdbframe, PyFrameObjectPtr) pair for the
485fd76
    currently-running python code, or (None, None)'''
485fd76
    gdb_frame = gdb.selected_frame()
485fd76
    while gdb_frame:
485fd76
        if (gdb_frame.function() is None or
485fd76
            gdb_frame.function().name != 'PyEval_EvalFrameEx'):
485fd76
            gdb_frame = gdb_frame.older()
485fd76
            continue
485fd76
485fd76
        try:
485fd76
            f = gdb_frame.read_var('f')
485fd76
            return gdb_frame, PyFrameObjectPtr.from_pyobject_ptr(f)
485fd76
        except ValueError:
485fd76
            gdb_frame = gdb_frame.older()
485fd76
    return None, None
485fd76
485fd76
class PyList(gdb.Command):
485fd76
    '''List the current Python source code, if any
485fd76
485fd76
    Use
485fd76
       py-list START
485fd76
    to list at a different line number within the python source.
485fd76
    
485fd76
    Use
485fd76
       py-list START, END
485fd76
    to list a specific range of lines within the python source.
485fd76
    '''
485fd76
485fd76
    def __init__(self):
485fd76
        gdb.Command.__init__ (self,
485fd76
                              "py-list",
485fd76
                              gdb.COMMAND_FILES,
485fd76
                              gdb.COMPLETE_NONE)
485fd76
485fd76
485fd76
    def invoke(self, args, from_tty):
485fd76
        import re
485fd76
485fd76
        start = None
485fd76
        end = None
485fd76
485fd76
        m = re.match(r'\s*(\d+)\s*', args)
485fd76
        if m:
485fd76
            start = int(m.group(0))
485fd76
            end = start + 10
485fd76
485fd76
        m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
485fd76
        if m:
485fd76
            start, end = map(int, m.groups())
485fd76
485fd76
        gdb_frame, py_frame = get_selected_python_frame()
485fd76
        if not py_frame:
485fd76
            print 'Unable to locate python frame'
485fd76
            return
485fd76
485fd76
        fi = FrameInfo(py_frame)
485fd76
        filename = fi.filename()
485fd76
        lineno = fi.current_line_num()
485fd76
485fd76
        if start is None:
485fd76
            start = lineno - 5
485fd76
            end = lineno + 5
485fd76
485fd76
        if start<1:
485fd76
            start = 1
485fd76
485fd76
        with open(filename, 'r') as f:
485fd76
            all_lines = f.readlines()
485fd76
            # start and end are 1-based, all_lines is 0-based;
485fd76
            # so [start-1:end] as a python slice gives us [start, end] as a
485fd76
            # closed interval
485fd76
            for i, line in enumerate(all_lines[start-1:end]):
485fd76
                sys.stdout.write('%4s    %s' % (i+start, line))
485fd76
            
485fd76
        
485fd76
# ...and register the command:
485fd76
PyList()
485fd76
485fd76
def move_in_stack(move_up):
485fd76
    '''Move up or down the stack (for the py-up/py-down command)'''
485fd76
    gdb_frame, py_frame = get_selected_python_frame()
485fd76
    while gdb_frame:
485fd76
        if move_up:
485fd76
            iter_frame = gdb_frame.older()
485fd76
        else:
485fd76
            iter_frame = gdb_frame.newer()
485fd76
485fd76
        if not iter_frame:
485fd76
            break
485fd76
485fd76
        if (iter_frame.function() and 
485fd76
            iter_frame.function().name == 'PyEval_EvalFrameEx'):
485fd76
            # Result:
485fd76
            iter_frame.select()
485fd76
            py_frame = get_python_frame(iter_frame)
485fd76
            fi = FrameInfo(py_frame)
485fd76
            print fi
485fd76
            sys.stdout.write(fi.current_line())
485fd76
            return
485fd76
485fd76
        gdb_frame = iter_frame
485fd76
485fd76
    if move_up:
485fd76
        print 'Unable to find an older python frame'
485fd76
    else:
485fd76
        print 'Unable to find a newer python frame'
485fd76
485fd76
class PyUp(gdb.Command):
485fd76
    'Select and print the python stack frame that called this one (if any)'
485fd76
    def __init__(self):
485fd76
        gdb.Command.__init__ (self,
485fd76
                              "py-up",
485fd76
                              gdb.COMMAND_STACK,
485fd76
                              gdb.COMPLETE_NONE)
485fd76
485fd76
485fd76
    def invoke(self, args, from_tty):
485fd76
        move_in_stack(move_up=True)
485fd76
485fd76
PyUp()
485fd76
485fd76
class PyDown(gdb.Command):
485fd76
    'Select and print the python stack frame called by this one (if any)'
485fd76
    def __init__(self):
485fd76
        gdb.Command.__init__ (self,
485fd76
                              "py-down",
485fd76
                              gdb.COMMAND_STACK,
485fd76
                              gdb.COMPLETE_NONE)
485fd76
485fd76
485fd76
    def invoke(self, args, from_tty):
485fd76
        move_in_stack(move_up=False)
485fd76
485fd76
PyDown()
485fd76
485fd76
class PyBacktrace(gdb.Command):
485fd76
    'Display the current python frame and all the frames within its call stack (if any)'
485fd76
    def __init__(self):
485fd76
        gdb.Command.__init__ (self,
485fd76
                              "py-bt",
485fd76
                              gdb.COMMAND_STACK,
485fd76
                              gdb.COMPLETE_NONE)
485fd76
485fd76
485fd76
    def invoke(self, args, from_tty):
485fd76
        gdb_frame, py_frame = get_selected_python_frame()
485fd76
        while gdb_frame:
485fd76
            gdb_frame = gdb_frame.older()
485fd76
485fd76
            if not gdb_frame:
485fd76
                break
485fd76
485fd76
            if (gdb_frame.function() and 
485fd76
                gdb_frame.function().name == 'PyEval_EvalFrameEx'):
485fd76
                py_frame = get_python_frame(gdb_frame)
485fd76
                fi = FrameInfo(py_frame)
485fd76
                print '  ', fi
485fd76
                sys.stdout.write(fi.current_line())
485fd76
485fd76
PyBacktrace()