常量池解析（2）

本文轉載自查看原文 2020-07-31 10:30 486 04類及常量池解釋

在parse_constant_pool()方法中調用parse_constant_pool_entries()方法對常量池中的各個項進行解析，方法的實現如下：

void ClassFileParser::parse_constant_pool_entries(int length, TRAPS) {
  // Use a local copy of ClassFileStream. It helps the C++ compiler to optimize
  // this function (_current can be allocated in a register, with scalar
  // replacement of aggregates). The _current pointer is copied back to
  // stream() when this function returns. DON'T call another method within
  // this method that uses stream().
  ClassFileStream*  cfs0 = stream();
  ClassFileStream   cfs1 = *cfs0;
  ClassFileStream*  cfs  = &cfs1;

  Handle class_loader(THREAD, _loader_data->class_loader());

  // Used for batching symbol allocations.
  const char*   names[SymbolTable::symbol_alloc_batch_size];
  int           lengths[SymbolTable::symbol_alloc_batch_size];
  int           indices[SymbolTable::symbol_alloc_batch_size];
  unsigned int  hashValues[SymbolTable::symbol_alloc_batch_size];
  int           names_count = 0;

  // parsing  Index 0 is unused
  for (int index = 1; index < length; index++) {
    // Each of the following case guarantees one more byte in the stream
    // for the following tag or the access_flags following constant pool,
    // so we don't need bounds-check for reading tag.
    u1 tag = cfs->get_u1_fast();
    switch (tag) {
      case JVM_CONSTANT_Class :
        {
          cfs->guarantee_more(3, CHECK);  // name_index, tag/access_flags
          u2 name_index = cfs->get_u2_fast();
          _cp->klass_index_at_put(index, name_index);
        }
        break;
      case JVM_CONSTANT_Fieldref :
        {
          cfs->guarantee_more(5, CHECK);  // class_index, name_and_type_index, tag/access_flags
          u2 class_index = cfs->get_u2_fast();
          u2 name_and_type_index = cfs->get_u2_fast();
          _cp->field_at_put(index, class_index, name_and_type_index);
        }
        break;
      case JVM_CONSTANT_Methodref :
        {
          cfs->guarantee_more(5, CHECK);  // class_index, name_and_type_index, tag/access_flags
          u2 class_index = cfs->get_u2_fast();
          u2 name_and_type_index = cfs->get_u2_fast();
          _cp->method_at_put(index, class_index, name_and_type_index);
        }
        break;
      case JVM_CONSTANT_InterfaceMethodref :
        {
          cfs->guarantee_more(5, CHECK);  // class_index, name_and_type_index, tag/access_flags
          u2 class_index = cfs->get_u2_fast();
          u2 name_and_type_index = cfs->get_u2_fast();
          _cp->interface_method_at_put(index, class_index, name_and_type_index);
        }
        break;
      case JVM_CONSTANT_String :
        {
          cfs->guarantee_more(3, CHECK);  // string_index, tag/access_flags
          u2 string_index = cfs->get_u2_fast();
          _cp->string_index_at_put(index, string_index);
        }
        break;
      case JVM_CONSTANT_MethodHandle :
      case JVM_CONSTANT_MethodType :
        if (tag == JVM_CONSTANT_MethodHandle) {
          cfs->guarantee_more(4, CHECK);  // ref_kind, method_index, tag/access_flags
          u1 ref_kind = cfs->get_u1_fast();
          u2 method_index = cfs->get_u2_fast();
          _cp->method_handle_index_at_put(index, ref_kind, method_index);
        } else if (tag == JVM_CONSTANT_MethodType) {
          cfs->guarantee_more(3, CHECK);  // signature_index, tag/access_flags
          u2 signature_index = cfs->get_u2_fast();
          _cp->method_type_index_at_put(index, signature_index);
        } else {
          ShouldNotReachHere();
        }
        break;
      case JVM_CONSTANT_InvokeDynamic :
        {
          cfs->guarantee_more(5, CHECK);  // bsm_index, nt, tag/access_flags
          u2 bootstrap_specifier_index = cfs->get_u2_fast();
          u2 name_and_type_index = cfs->get_u2_fast();
          if (_max_bootstrap_specifier_index < (int) bootstrap_specifier_index)
            _max_bootstrap_specifier_index = (int) bootstrap_specifier_index;  // collect for later
          _cp->invoke_dynamic_at_put(index, bootstrap_specifier_index, name_and_type_index);
        }
        break;
      case JVM_CONSTANT_Integer :
        {
          cfs->guarantee_more(5, CHECK);  // bytes, tag/access_flags
          u4 bytes = cfs->get_u4_fast();
          _cp->int_at_put(index, (jint) bytes);
        }
        break;
      case JVM_CONSTANT_Float :
        {
          cfs->guarantee_more(5, CHECK);  // bytes, tag/access_flags
          u4 bytes = cfs->get_u4_fast();
          _cp->float_at_put(index, *(jfloat*)&bytes);
        }
        break;
      case JVM_CONSTANT_Long :
        {
          cfs->guarantee_more(9, CHECK);  // bytes, tag/access_flags
          u8 bytes = cfs->get_u8_fast();
          _cp->long_at_put(index, bytes);
        }
        index++;   // Skip entry following eigth-byte constant, see JVM book p. 98
        break;
      case JVM_CONSTANT_Double :
        {
          cfs->guarantee_more(9, CHECK);  // bytes, tag/access_flags
          u8 bytes = cfs->get_u8_fast();
          _cp->double_at_put(index, *(jdouble*)&bytes);
        }
        index++;   // Skip entry following eigth-byte constant, see JVM book p. 98
        break;
      case JVM_CONSTANT_NameAndType :
        {
          cfs->guarantee_more(5, CHECK);  // name_index, signature_index, tag/access_flags
          u2 name_index = cfs->get_u2_fast();
          u2 signature_index = cfs->get_u2_fast();
          _cp->name_and_type_at_put(index, name_index, signature_index);
        }
        break;
      case JVM_CONSTANT_Utf8 :
        {
          cfs->guarantee_more(2, CHECK);  // utf8_length
          u2  utf8_length = cfs->get_u2_fast();
          u1* utf8_buffer = cfs->get_u1_buffer();
          assert(utf8_buffer != NULL, "null utf8 buffer");
          // Got utf8 string, guarantee utf8_length+1 bytes, set stream position forward.
          cfs->guarantee_more(utf8_length+1, CHECK);  // utf8 string, tag/access_flags
          cfs->skip_u1_fast(utf8_length);

          if (EnableInvokeDynamic && has_cp_patch_at(index)) {
            Handle patch = clear_cp_patch_at(index);

            char* str = java_lang_String::as_utf8_string(patch());
            // (could use java_lang_String::as_symbol instead, but might as well batch them)
            utf8_buffer = (u1*) str;
            utf8_length = (int) strlen(str);
          }

          unsigned int hash;
          Symbol* result = SymbolTable::lookup_only((char*)utf8_buffer, utf8_length, hash);
          if (result == NULL) {
            names[names_count] = (char*)utf8_buffer;
            lengths[names_count] = utf8_length;
            indices[names_count] = index;
            hashValues[names_count++] = hash;
            if (names_count == SymbolTable::symbol_alloc_batch_size) {
              SymbolTable::new_symbols(_loader_data, _cp, names_count, names, lengths, indices, hashValues, CHECK);
              names_count = 0;
            }
          } else {
            _cp->symbol_at_put(index, result);
          }
        }
        break;
      default:
        classfile_parse_error("Unknown constant tag %u in class file %s", tag, CHECK);
        break;
    }
  }

  // Allocate the remaining symbols
  if (names_count > 0) {
    SymbolTable::new_symbols(_loader_data, _cp, names_count, names, lengths, indices, hashValues, CHECK);
  }

  cfs0->set_current(cfs1.current());
}

循環處理length個常量池項，不過第一個常量池項不需要處理，所以循環下標index的值初始化為1。

如果要了解各個常量池項的具體結構，代碼的邏輯理解起來其實並不難。所有項的第一個字節都是用來描述常量池元素類型，調用cfs->get_u1_fast()獲取元素類型后，就可以通過switch語句分情況進行處理。

1、JVM_CONSTANT_Class項的解析

JVM_CONSTANT_Class格式如下：

CONSTANT_Class_info {
    u1 tag;
    u2 name_index;
}

調用cfs->get_u2_fast()方法獲取name_index，然后調用_cp->klass_index_at_put()方法進行存儲。_cp的類型為ConstantPool*，ConstantPool類中的klass_index_at_put()方法的實現如下：

// For temporary use while constructing constant pool
void klass_index_at_put(int which, int name_index) {
    tag_at_put(which, JVM_CONSTANT_ClassIndex);
    *int_at_addr(which) = name_index;
}

void tag_at_put(int which, jbyte t) {
    tags()->at_put(which, t);
}

jint* int_at_addr(int which) const {
     assert(is_within_bounds(which), "index out of bounds");
     return (jint*) &base()[which];
}
intptr_t* base() const {
	  return (intptr_t*) (
			        (  (char*) this  ) + sizeof(ConstantPool)
			  );
}

常量池項的下標與數組的下標是相同的，也就是說，如果當前JVM_CONSTANT_Class存儲在常量池中的下標為1處，則也要存儲到tags數組中下標為1的地方。同時要將名稱索引name_index保存到ConstantPool中存儲數據區的對應位置上。

舉個例子如下：

#1 = Class         #5        // TestClass
...
#5 = Utf8          TestClass

假設JVM_CONSTANT_Class是常量池第一項，則解析完這一頂后的ConstantPool對象如下圖所示。

其中#0（表示常量池索引0）的值為0是因為在分配內存時會將其內存清零。

2、CONSTANT_Fieldref_info項的解析

格式如下：

CONSTANT_Fieldref_info {
	u1 tag;
	u2 class_index;
	u2 name_and_type_index;
}

調用field_at_put()存儲class_index與name_and_type_index，方法的實現如下：

void field_at_put(int which, int class_index, int name_and_type_index) {
    tag_at_put(which, JVM_CONSTANT_Fieldref);
    *int_at_addr(which) = ((jint) name_and_type_index<<16) | class_index;
}

name_and_type_index存儲在高16位，class_index存儲在低16位。　　

3、JVM_CONSTANT_Methodref項的解析

JVM_CONSTANT_Methodref項的格式如下：

CONSTANT_Methodref_info {
    u1 tag;
    u2 class_index;
    u2 name_and_type_index;
}

按照格式讀取Class文件，獲取到相關屬性值后調用ConstantPool的method_at_put()方法進行存儲，這個方法的實現如下：

void method_at_put(int which, int class_index, int name_and_type_index) {
    tag_at_put(which, JVM_CONSTANT_Methodref);
    *int_at_addr(which) = ((jint) name_and_type_index<<16) | class_index;
}

由於ConstantPool數據區一個槽是一個指針類型的寬度，所以至少有32個位，又由於class_index與name_and_type_index屬性的類型為u2，這時候就可以使用高16位存儲name_and_type_index，低16位存儲class_index即可。　　

4、JVM_CONSTANT_InterfaceMethodref項的解析

格式如下：

CONSTANT_InterfaceMethodref_info {
    u1 tag;
    u2 class_index;
    u2 name_and_type_index;
}

調用的interface_method_at_put()方法的實現如下：

void interface_method_at_put(int which, int class_index, int name_and_type_index) {
    tag_at_put(which, JVM_CONSTANT_InterfaceMethodref);
    *int_at_addr(which) = ((jint) name_and_type_index<<16) | class_index;  // Not so nice
}

5、JVM_CONSTANT_String項的解析　　

格式如下：

CONSTANT_String_info {
    u1 tag;
    u2 string_index;
}

調用的string_index_at_put()方法的實現如下：

void string_index_at_put(int which, int string_index) {
    tag_at_put(which, JVM_CONSTANT_StringIndex);
    *int_at_addr(which) = string_index;
}

6、JVM_CONSTANT_MethodHandle項的解析　　

格式如下：

CONSTANT_MethodHandle_info {
    u1 tag;
    u1 reference_kind;
    u2 reference_index;
}

調用的method_handle_index_at_put()方法的實現如下：

void method_handle_index_at_put(int which, int ref_kind, int ref_index) {
    tag_at_put(which, JVM_CONSTANT_MethodHandle);
    *int_at_addr(which) = ((jint) ref_index<<16) | ref_kind;
}

7、JVM_CONSTANT_MethodType項的解析　　

格式如下：

CONSTANT_MethodType_info {
    u1 tag;
    u2 descriptor_index;
}

調用的method_type_index_at_put()方法的實現如下：

void method_type_index_at_put(int which, int ref_index) {
    tag_at_put(which, JVM_CONSTANT_MethodType);
    *int_at_addr(which) = ref_index;
}

8、JVM_CONSTANT_InvokeDynamic項的解析　　

格式如下：

CONSTANT_InvokeDynamic_info {
    u1 tag;
    u2 bootstrap_method_attr_index;
    u2 name_and_type_index;
}

調用的invoke_dynamic_at_put()方法的實現如下：　　

void invoke_dynamic_at_put(int which, int bootstrap_specifier_index, int name_and_type_index) {
    tag_at_put(which, JVM_CONSTANT_InvokeDynamic);
    *int_at_addr(which) = ((jint) name_and_type_index<<16) | bootstrap_specifier_index;
}

9、JVM_CONSTANT_Integer、JVM_CONSTANT_Float項的解析　　

格式如下：

CONSTANT_Integer_info {
    u1 tag;
    u4 bytes;
}

CONSTANT_Float_info {
    u1 tag;
    u4 bytes;
}

調用的方法分別為int_at_put()和float_at_put()方法，實現如下：

void int_at_put(int which, jint i) {
    tag_at_put(which, JVM_CONSTANT_Integer);
    *int_at_addr(which) = i;
}

void float_at_put(int which, jfloat f) {
    tag_at_put(which, JVM_CONSTANT_Float);
    *float_at_addr(which) = f;
}

10、JVM_CONSTANT_Long、JVM_CONSTANT_Double項的解析　　

格式如下：

CONSTANT_Long_info {
    u1 tag;
    u4 high_bytes;
    u4 low_bytes;
}

CONSTANT_Double_info {
    u1 tag;
    u4 high_bytes;
    u4 low_bytes;
}

調用的long_at_put()和double_at_put()方法的實現如下：

void long_at_put(int which, jlong l) {
      tag_at_put(which, JVM_CONSTANT_Long);
      // *long_at_addr(which) = l;
      Bytes::put_native_u8((address)long_at_addr(which), *(  (u8*) &l  ));
}

void double_at_put(int which, jdouble d) {
    tag_at_put(which, JVM_CONSTANT_Double);
    // *double_at_addr(which) = d;
    // u8 temp = *(u8*) &d;
    Bytes::put_native_u8((address) double_at_addr(which), *((u8*) &d));
}

調用的Bytes::put_native_u8()方法的實現如下：

static inline void put_native_u8(address p, u8 x)   { *(u8*)p = x; }

11、JVM_CONSTANT_NameAndType項的解析　　

格式如下：

CONSTANT_NameAndType_info {
    u1 tag;
    u2 name_index;
    u2 descriptor_index;
}

調用的name_and_type_at_put()方法的實現如下：

void name_and_type_at_put(int which, int name_index, int signature_index) {
    tag_at_put(which, JVM_CONSTANT_NameAndType);
    *int_at_addr(which) = ((jint) signature_index<<16) | name_index;  // Not so nice
}

12、JVM_CONSTANT_Utf8項的解析　　　　

格式如下：

CONSTANT_Utf8_info {
    u1 tag;
    u2 length;
    u1 bytes[length];
}

在HotSpot虛擬機中，字符串通常都會表示為Symbol對象，這樣有利於使用符號表來存儲字符串，對於2個相同的字符串來說，完全可以使用同一個Symbol對象來表示。這樣就可以在ConstantPool數據區相應槽位上存儲指向Symbol的指針即可。

調用SymbolTable::lookup_only()方法從符號表中查找對應的Symbol對象，如果查找不到需要暫時將相關的信息存儲到臨時的names、lengths、indices與hashValues數組中，這樣就可以調用SymbolTable::new_symbols()進行批量添加Symbol對象來提高效率；如果找到對應的Symbol對象，則調用symbol_at_put()方法，實現如下：

void symbol_at_put(int which, Symbol* s) {
    assert(s->refcount() != 0, "should have nonzero refcount");
    tag_at_put(which, JVM_CONSTANT_Utf8);
    *symbol_at_addr(which) = s;
}

Symbol** symbol_at_addr(int which) const {
    assert(is_within_bounds(which), "index out of bounds");
    return (Symbol**) &base()[which];
}

將指向Symbol對象的指針存儲到指定的位置。　

如果Symbol對象表示的是類名稱，那么后面是類連接后，相應索引位置上的值會更新為指向InstanceKlass實例的指針，后面會詳細介紹。

相關文章的鏈接如下：

1、在Ubuntu 16.04上編譯OpenJDK8的源代碼

3、 HotSpot項目結構