查看Objective C的C++实现引发的思考

很早之前看到一篇block原理的blog,里面介绍可以通过clang查看一个文件的c的实现,当时就看了看转化后的代码,发现特别长,就大概瞄了几眼就关了,这几天研究runtime的时候就仔细的研究了下。

编译器对类的转化

@interface KKPerson : NSObject
{
    @public
    int  age;
    char *name;
}

@property (nonatomic, copy  ) NSString *k_name;

- (void)foo;
+ (void)manager;

@end

定义一个很简单的类,有实例变量、属性、实例方法、类方法,通过clang查看它转换成什么。

#ifndef _REWRITER_typedef_KKPerson
#define _REWRITER_typedef_KKPerson
typedef struct objc_object KKPerson;
typedef struct {} _objc_exc_KKPerson;
#endif

extern "C" unsigned long OBJC_IVAR_$_KKPerson$age;
extern "C" unsigned long OBJC_IVAR_$_KKPerson$name;
extern "C" unsigned long OBJC_IVAR_$_KKPerson$_k_name;
struct KKPerson_IMPL {
    struct NSObject_IMPL NSObject_IVARS;
    int age;
    char *name;
    NSString *_k_name;
};


// @property (nonatomic, copy ) NSString *k_name;

// - (void)foo;
// + (void)manager;

/* @end */

// @implementation KKPerson
static void _I_KKPerson_foo(KKPerson * self, SEL _cmd) {}
static void _C_KKPerson_manager(Class self, SEL _cmd) {}

static NSString * _I_KKPerson_k_name(KKPerson * self, SEL _cmd) { return (*(NSString **)((char *)self + OBJC_IVAR_$_KKPerson$_k_name)); }
extern "C" __declspec(dllimport) void objc_setProperty (id, SEL, long, id, bool, bool);

static void _I_KKPerson_setK_name_(KKPerson * self, SEL _cmd, NSString *k_name) { objc_setProperty (self, _cmd, __OFFSETOFIVAR__(struct KKPerson, _k_name), (id)k_name, 0, 1); }

通过上面可以很容易看出来,KKPerson就是struct objc_object的别名,编译器又生成了struct KKPerson_IMPL,它存储着KKPerson的实例变量,这样就可以很容易得出它占多大空间了。

接着它把oc方法直接转换成c的方法,通过前缀_I_KKPerson__C_KKPerson_可以很方便地辨识出它是类方法还是实例方法,而且也能知道它是那个类的方法。

属性

编译器生成了_I_KKPerson_k_name_I_KKPerson_setK_name_方法,能看出来属性就是由实例变量、set方法、get方法构成。get方法就简单就是通过偏移量来获取的,set方法是通过调用objc_setProperty方法来实现的。

void objc_setProperty(id self, SEL _cmd, ptrdiff_t offset, id newValue, BOOL atomic, signed char shouldCopy)
{
    bool copy = (shouldCopy && shouldCopy != MUTABLE_COPY);
    bool mutableCopy = (shouldCopy == MUTABLE_COPY);
    reallySetProperty(self, _cmd, newValue, offset, atomic, copy, mutableCopy);
}

static inline void reallySetProperty(id self, SEL _cmd, id newValue, ptrdiff_t offset, bool atomic, bool copy, bool mutableCopy)
{
    if (offset == 0) {
        object_setClass(self, newValue);
        return;
    }
    id oldValue;
    id *slot = (id*) ((char*)self + offset);
    if (copy) {
        newValue = [newValue copyWithZone:nil];
    } else if (mutableCopy) {
        newValue = [newValue mutableCopyWithZone:nil];
    } else {
        if (*slot == newValue) return;
        newValue = objc_retain(newValue);
    }
    if (!atomic) {
        oldValue = *slot;
        *slot = newValue;
    } else {
        spinlock_t& slotlock = PropertyLocks[slot];
        slotlock.lock();
        oldValue = *slot;
        *slot = newValue;        
        slotlock.unlock();
    }
    objc_release(oldValue);
}

通过它的4个参数也能推断出它的大概实现,第一个参数偏移量,是拿旧值的,第二个参数新值,是赋值用的,第三个参数原子性,判断是否加锁,第四个参数拷贝,判断是否拷贝。看上面的实现也都是围绕这4个参数来的,通过偏移量拿到旧值地址,再赋新值。

类的存储

类的方法、实例变量都有了,那是通过什么数据结构来存储哪些东西呢?
先来看下类、方法、实例变量、分类他们的数据结构。

struct _prop_t {
    const char *name;
    const char *attributes;
};

struct _objc_method {
    struct objc_selector * _cmd;
    const char *method_type;
    void  *_imp;
};

struct _protocol_t {
    void * isa;  // NULL
    const char *protocol_name;
    const struct _protocol_list_t * protocol_list; // super protocols
    const struct method_list_t *instance_methods;
    const struct method_list_t *class_methods;
    const struct method_list_t *optionalInstanceMethods;
    const struct method_list_t *optionalClassMethods;
    const struct _prop_list_t * properties;
    const unsigned int size;  // sizeof(struct _protocol_t)
    const unsigned int flags;  // = 0
    const char ** extendedMethodTypes;
};

struct _ivar_t {
    unsigned long int *offset;  // pointer to ivar offset location
    const char *name;
    const char *type;
    unsigned int alignment;
    unsigned int  size;
};

struct _class_ro_t {
    unsigned int flags;
    unsigned int instanceStart;
    unsigned int instanceSize;
    unsigned int reserved;
    const unsigned char *ivarLayout;
    const char *name;
    const struct _method_list_t *baseMethods;
    const struct _objc_protocol_list *baseProtocols;
    const struct _ivar_list_t *ivars;
    const unsigned char *weakIvarLayout;
    const struct _prop_list_t *properties;
};

struct _class_t {
    struct _class_t *isa;
    struct _class_t *superclass;
    void *cache;
    void *vtable;
    struct _class_ro_t *ro;
};

struct _category_t {
    const char *name;
    struct _class_t *cls;
    const struct _method_list_t *instance_methods;
    const struct _method_list_t *class_methods;
    const struct _protocol_list_t *protocols;
    const struct _prop_list_t *properties;
};

这些数据结构跟runtime中的数据结构稍有不同,而且在应用启动的时候会转化的,这些数据结构是存储在section中的,具体哪些存储到哪个位置,可以查看符号表,里面很清楚。

声明完类的相关数据结构后,就可以定义一个具体的类,这样的话每一个类都有一个固定的地址,在整个运行过程中都不会变,因为在编译期就决定了,除非改代码再编译。

extern "C" __declspec(dllexport) struct _class_t OBJC_CLASS_$_KKPerson __attribute__ ((used, section ("__DATA,__objc_data"))) = {
    0, // &OBJC_METACLASS_$_KKPerson,
    0, // &OBJC_CLASS_$_NSObject,
    0, // (void *)&_objc_empty_cache,
    0, // unused, was (void *)&_objc_empty_vtable,
    &_OBJC_CLASS_RO_$_KKPerson,
};

static struct _class_ro_t _OBJC_CLASS_RO_$_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    0, __OFFSETOFIVAR__(struct KKPerson, age), sizeof(struct KKPerson_IMPL),
    (unsigned int)0,
    0,
    "KKPerson",
    (const struct _method_list_t *)&_OBJC_$_INSTANCE_METHODS_KKPerson,
    0,
    (const struct _ivar_list_t *)&_OBJC_$_INSTANCE_VARIABLES_KKPerson,
    0,
    (const struct _prop_list_t *)&_OBJC_$_PROP_LIST_KKPerson,
};

可以看出类真正的数据都是在存在_OBJC_CLASS_RO_$_KKPerson里面的,里面有类名、类大小、起始地址(继承)、实例方法、实例变量、属性列表。

看到这有个疑问,那类方法存到哪了呢,答案是存在元类里面了。

extern "C" __declspec(dllexport) struct _class_t OBJC_METACLASS_$_KKPerson __attribute__ ((used, section ("__DATA,__objc_data"))) = {
    0, // &OBJC_METACLASS_$_NSObject,
    0, // &OBJC_METACLASS_$_NSObject,
    0, // (void *)&_objc_empty_cache,
    0, // unused, was (void *)&_objc_empty_vtable,
    &_OBJC_METACLASS_RO_$_KKPerson,
};

static struct _class_ro_t _OBJC_METACLASS_RO_$_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    1, sizeof(struct _class_t), sizeof(struct _class_t),
    (unsigned int)0,
    0,
    "KKPerson",
    (const struct _method_list_t *)&_OBJC_$_CLASS_METHODS_KKPerson,
    0,
    0,
    0,
    0,
};

可以看出_OBJC_METACLASS_RO_$_KKPerson里面存储着_OBJC_$_CLASS_METHODS_KKPerson地址,里面存储的都是类方法。

static struct /*_ivar_list_t*/ {
    unsigned int entsize;  // sizeof(struct _prop_t)
    unsigned int count;
    struct _ivar_t ivar_list[3];
} _OBJC_$_INSTANCE_VARIABLES_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_ivar_t),
    3,
    {{(unsigned long int *)&OBJC_IVAR_$_KKPerson$age, "age", "i", 2, 4},
     {(unsigned long int *)&OBJC_IVAR_$_KKPerson$name, "name", "*", 3, 8},
     {(unsigned long int *)&OBJC_IVAR_$_KKPerson$_k_name, "_k_name", "@\"NSString\"", 3, 8}}
};

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[3];
} _OBJC_$_INSTANCE_METHODS_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    3,
    {{(struct objc_selector *)"foo", "v16@0:8", (void *)_I_KKPerson_foo},
    {(struct objc_selector *)"k_name", "@16@0:8", (void *)_I_KKPerson_k_name},
    {(struct objc_selector *)"setK_name:", "v24@0:8@16", (void *)_I_KKPerson_setK_name_}}
};

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[1];
} _OBJC_$_CLASS_METHODS_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    1,
    {{(struct objc_selector *)"manager", "v16@0:8", (void *)_C_KKPerson_manager}}
};

这里只列出实例列表、实例方法列表、类方法列表,其他的都差不多。
每一个方法都有方法名、方法参数符号、实现地址(IMP),从这也可以看出来,调用一个实例方法,都是先从类方法列表里面查找,找到了就拿出相应的实现地址,然后调用。

细心的人能看到OBJC_CLASS_$_KKPerson结构体,前面几个值都为0,那是什么时候又重新赋值呢?

static void OBJC_CLASS_SETUP_$_KKPerson(void ) {
    OBJC_METACLASS_$_KKPerson.isa = &OBJC_METACLASS_$_NSObject;
    OBJC_METACLASS_$_KKPerson.superclass = &OBJC_METACLASS_$_NSObject;
    OBJC_METACLASS_$_KKPerson.cache = &_objc_empty_cache;
    OBJC_CLASS_$_KKPerson.isa = &OBJC_METACLASS_$_KKPerson;
    OBJC_CLASS_$_KKPerson.superclass = &OBJC_CLASS_$_NSObject;
    OBJC_CLASS_$_KKPerson.cache = &_objc_empty_cache;
}
#pragma section(".objc_inithooks$B", long, read, write)
__declspec(allocate(".objc_inithooks$B")) static void *OBJC_CLASS_SETUP[] = {
    (void *)&OBJC_CLASS_SETUP_$_KKPerson,
};

可以看到是通过OBJC_CLASS_SETUP_$_KKPerson这个方法来重新赋值的,而且还有一个void *的数组,里面存的是OBJC_CLASS_SETUP_$_KKPerson方法地址。能够推测出在编译的某个阶段通过这个数组来调用里面的方法,然后再对类进行重新赋值,至于是什么时候,目前还没研究出来。

typedef struct objc_object KKPerson;
struct KKPerson_IMPL {
    struct NSObject_IMPL NSObject_IVARS;
    int age;
    char *name;
    NSString *_k_name;
};

#define __OFFSETOFIVAR__(TYPE, MEMBER) ((long long) &((TYPE *)0)->MEMBER)

extern "C" unsigned long int OBJC_IVAR_$_KKPerson$age __attribute__ ((used, section ("__DATA,__objc_ivar"))) = __OFFSETOFIVAR__(struct KKPerson, age);
extern "C" unsigned long int OBJC_IVAR_$_KKPerson$name __attribute__ ((used, section ("__DATA,__objc_ivar"))) = __OFFSETOFIVAR__(struct KKPerson, name);
extern "C" unsigned long int OBJC_IVAR_$_KKPerson$_k_name __attribute__ ((used, section ("__DATA,__objc_ivar"))) = __OFFSETOFIVAR__(struct KKPerson, _k_name);

我这边还有一个疑惑,在定义实例变量结构体的时候,需要偏移量,通过上面的代码也能看出来,__OFFSETOFIVAR__这个宏定义就是求一个结构体某个变量的偏移量,但是__OFFSETOFIVAR__(struct KKPerson, age)里面的第一个参数竟然是struct KKPerson,不应该是struct KKPerson_IMPL吗?这点很不理解,我推测在之后的阶段又替换为struct KKPerson_IMPL了,不然没法解释,希望某个大佬能给我解答下。

/// RewriteIvarOffsetComputation - This routine synthesizes computation of
/// ivar offset.
void RewriteModernObjC::RewriteIvarOffsetComputation(ObjCIvarDecl *ivar,
                                                         std::string &Result) {
  Result += "__OFFSETOFIVAR__(struct ";
  Result += ivar->getContainingInterface()->getNameAsString();
  if (LangOpts.MicrosoftExt)
    Result += "_IMPL";
  Result += ", ";
  if (ivar->isBitField())
    ObjCIvarBitfieldGroupDecl(ivar, Result);
  else
    Result += ivar->getNameAsString();
  Result += ")";
}

这个疑问我通过分析clang的源码后,发现LangOpts.MicrosoftExt为真的时候,就是struct KKPerson_IMPL。至于LangOpts.MicrosoftExt,它是clangMicrosoftc++扩展。

这里只是把一个类编译后结构稍微讲解了下,那这些结构在应用启动后,在内存是什么样的,就需要看runtime的代码了,下一篇开始看runtime

最后贴一张符号表,能够清晰地看到每个结构的地址。

# Sections:
# Address   Size        Segment Section
0x100000C80 0x00000203  __TEXT  __text
0x100000E84 0x0000002A  __TEXT  __stubs
0x100000EB0 0x00000056  __TEXT  __stub_helper
0x100000F06 0x0000000B  __TEXT  __objc_classname
0x100000F11 0x00000048  __TEXT  __objc_methname
0x100000F59 0x0000002B  __TEXT  __objc_methtype
0x100000F84 0x0000002A  __TEXT  __cstring
0x100000FB0 0x00000048  __TEXT  __unwind_info
0x100001000 0x00000010  __DATA  __nl_symbol_ptr
0x100001010 0x00000038  __DATA  __la_symbol_ptr
0x100001048 0x00000020  __DATA  __cfstring
0x100001068 0x00000008  __DATA  __objc_classlist
0x100001070 0x00000008  __DATA  __objc_imageinfo
0x100001078 0x00000198  __DATA  __objc_const
0x100001210 0x00000030  __DATA  __objc_selrefs
0x100001240 0x00000008  __DATA  __objc_classrefs
0x100001248 0x00000018  __DATA  __objc_ivar
0x100001260 0x00000050  __DATA  __objc_data
# Symbols:
# Address   Size        File  Name
0x100000C80 0x00000010  [  1] -[KKPerson foo]
0x100000C90 0x00000010  [  1] +[KKPerson manager]
0x100000CA0 0x00000030  [  1] -[KKPerson k_name]
0x100000CD0 0x00000040  [  1] -[KKPerson setK_name:]
0x100000D10 0x00000040  [  1] -[KKPerson .cxx_destruct]
0x100000D50 0x00000133  [  1] _main
0x100000E84 0x00000006  [  2] _objc_autoreleasePoolPop
0x100000E8A 0x00000006  [  2] _objc_autoreleasePoolPush
0x100000E90 0x00000006  [  2] _objc_getProperty
0x100000E96 0x00000006  [  2] _objc_msgSend
0x100000E9C 0x00000006  [  2] _objc_retainAutoreleasedReturnValue
0x100000EA2 0x00000006  [  2] _objc_setProperty_nonatomic_copy
0x100000EA8 0x00000006  [  2] _objc_storeStrong
0x100000EB0 0x00000010  [  0] helper helper
0x100000EC0 0x0000000A  [  2] _objc_autoreleasePoolPop
0x100000ECA 0x0000000A  [  2] _objc_autoreleasePoolPush
0x100000ED4 0x0000000A  [  2] _objc_getProperty
0x100000EDE 0x0000000A  [  2] _objc_msgSend
0x100000EE8 0x0000000A  [  2] _objc_retainAutoreleasedReturnValue
0x100000EF2 0x0000000A  [  2] _objc_setProperty_nonatomic_copy
0x100000EFC 0x0000000A  [  2] _objc_storeStrong
0x100000F06 0x00000009  [  1] literal string: KKPerson
0x100000F0F 0x00000002  [  1] literal string: !
0x100000F11 0x00000008  [  1] literal string: manager
0x100000F19 0x00000004  [  1] literal string: foo
0x100000F1D 0x0000000E  [  1] literal string: .cxx_destruct
0x100000F2B 0x00000007  [  1] literal string: k_name
0x100000F32 0x0000000B  [  1] literal string: setK_name:
0x100000F3D 0x00000004  [  1] literal string: age
0x100000F41 0x00000005  [  1] literal string: name
0x100000F46 0x00000008  [  1] literal string: _k_name
0x100000F4E 0x00000006  [  1] literal string: alloc
0x100000F54 0x00000005  [  1] literal string: init
0x100000F59 0x00000008  [  1] literal string: v16@0:8
0x100000F61 0x00000008  [  1] literal string: @16@0:8
0x100000F69 0x0000000B  [  1] literal string: v24@0:8@16
0x100000F74 0x00000002  [  1] literal string: I
0x100000F76 0x00000002  [  1] literal string: *
0x100000F78 0x0000000C  [  1] literal string: @"NSString"
0x100000F84 0x00000007  [  1] literal string: k_name
0x100000F8B 0x0000001A  [  1] literal string: T@"NSString",C,N,V_k_name
0x100000FA5 0x00000004  [  1] literal string: adf
0x100000FA9 0x00000005  [  1] literal string: xiao
0x100000FB0 0x00000048  [  0] compact unwind info
0x100001000 0x00000008  [  0] non-lazy-pointer-to-local: dyld_stub_binder
0x100001008 0x00000008  [  0] non-lazy-pointer
0x100001010 0x00000008  [  2] _objc_autoreleasePoolPop
0x100001018 0x00000008  [  2] _objc_autoreleasePoolPush
0x100001020 0x00000008  [  2] _objc_getProperty
0x100001028 0x00000008  [  2] _objc_msgSend
0x100001030 0x00000008  [  2] _objc_retainAutoreleasedReturnValue
0x100001038 0x00000008  [  2] _objc_setProperty_nonatomic_copy
0x100001040 0x00000008  [  2] _objc_storeStrong
0x100001048 0x00000020  [  1] CFString
0x100001068 0x00000008  [  1] anon
0x100001070 0x00000008  [  0] objc image info
0x100001078 0x00000020  [  1] l_OBJC_$_CLASS_METHODS_KKPerson
0x100001098 0x00000048  [  1] l_OBJC_METACLASS_RO_$_KKPerson
0x1000010E0 0x00000068  [  1] l_OBJC_$_INSTANCE_METHODS_KKPerson
0x100001148 0x00000068  [  1] l_OBJC_$_INSTANCE_VARIABLES_KKPerson
0x1000011B0 0x00000018  [  1] l_OBJC_$_PROP_LIST_KKPerson
0x1000011C8 0x00000048  [  1] l_OBJC_CLASS_RO_$_KKPerson
0x100001210 0x00000008  [  1] pointer-to-literal-cstring
0x100001218 0x00000008  [  1] pointer-to-literal-cstring
0x100001220 0x00000008  [  1] pointer-to-literal-cstring
0x100001228 0x00000008  [  1] pointer-to-literal-cstring
0x100001230 0x00000008  [  1] pointer-to-literal-cstring
0x100001238 0x00000008  [  1] pointer-to-literal-cstring
0x100001240 0x00000008  [  1] objc-class-ref
0x100001248 0x00000008  [  1] _OBJC_IVAR_$_KKPerson._k_name
0x100001250 0x00000008  [  1] _OBJC_IVAR_$_KKPerson.age
0x100001258 0x00000008  [  1] _OBJC_IVAR_$_KKPerson.name
0x100001260 0x00000028  [  1] _OBJC_METACLASS_$_KKPerson
0x100001288 0x00000028  [  1] _OBJC_CLASS_$_KKPerson

推荐阅读更多精彩内容