0x0
最近在做一些JVM启动加速的工作,用Instrumentation做了个agent,会调用retransformClasses之类的修改字节码,这一部分的工作主要是省去了类查找的开销,对于JVM对类的解析,验证和链接等开销还是存在的。
于是准备结合JEP 310 AppCDS来做,因为CDS可以省去JVM解析等开销(实际上,AppCDS不能完成这个场景,用了内部的EagerCDS,这里就不能展开,如果后期开源了倒是可以说说,这里用AppCDS不影响描述)。
结果就遇到AppCDS+JVMTI agent跑的时候JVM Crash🤔。可以确定JVM是支持AppCDS+JVMTI的
两者结合的时候有一些问题,也有一些解决 方案 ,这些patch解决问题的问题是JVMTI动态修改bootclasspath/classpath导致dumptime和runtime不一样。和这个问题的场景不一样,所以可以确定与这些patch无关。
0x1
crash后的hs_err部分如下:
#
# A fatal error has been detected by the Java Runtime Environment:
#
# SIGSEGV (0xb) at pc=0x00007ffff558cfbc, pid=7854, tid=7880
#
# JRE version: OpenJDK Runtime Environment (11.0.7) (slowdebug build 11.0.7-internal+0-adhoc.qingfengyy.ajdk)
# Java VM: OpenJDK 64-Bit Server VM (slowdebug 11.0.7-internal+0-adhoc.qingfengyy.ajdk, mixed mode, sharing, tiered, compressed oops, g1 gc, linux-amd64)
# Problematic frame:
# V [libjvm.so+0xb55fbc] PackageEntry::module() const+0xc
#
# No core dump will be written. Core dumps have been disabled. To enable core dumping, try "ulimit -c unlimited" before starting Java again
#
# If you would like to submit a bug report, please visit:
# http://bugreport.java.com/bugreport/crash.jsp
#
--------------- S U M M A R Y ------------
Command Line: --add-opens=java.base/jdk.internal.loader=ALL-UNNAMED --add-opens=java.base/jdk.internal.util.jar=ALL-UNNAMED -javaagent:/home/qingfeng.yy/jar_index/jarindexer/jarindexer.jar=use -XX:+UnlockExperimentalVMOptions -XX:+EagerAppCDS -XX:+EagerAppCDSLegacyVerisonSupport -Xshare:on -XX:SharedArchiveFile=my.jsa -Dcom.alibaba.cds.listPath=my.lst Buy2ByURLClassLoader
Host: e69e13043.et15sqa, Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz, 96 cores, 503G, Alibaba Group Enterprise Linux Server release 7.2 (Paladin)
Time: Wed Aug 19 11:16:24 2020 CST elapsed time: 589 seconds (0d 0h 9m 49s)
--------------- T H R E A D ---------------
Current thread (0x00007ffff001f800): JavaThread "main" [_thread_in_vm, id=7880, stack(0x00007ffff7ee6000,0x00007ffff7fe7000)]
Stack: [0x00007ffff7ee6000,0x00007ffff7fe7000], sp=0x00007ffff7fe2070, free space=1008k
Native frames: (J=compiled Java code, A=aot compiled Java code, j=interpreted, Vv=VM code, C=native code)
V [libjvm.so+0xb55fbc] PackageEntry::module() const+0xc
V [libjvm.so+0xfa005a] InstanceKlass::module() const+0x32
V [libjvm.so+0x125132b] KlassFactory::check_shared_class_file_load_hook(InstanceKlass*, Symbol*, Handle, Handle, Thread*)+0x2eb
V [libjvm.so+0x165f208] SystemDictionary::load_shared_class(InstanceKlass*, Handle, Handle, Thread*)+0x35a
V [libjvm.so+0x166abbf] SystemDictionaryShared::acquire_class_for_current_thread(InstanceKlass*, Handle, Handle, Thread*)+0xcf
V [libjvm.so+0x166a960] SystemDictionaryShared::define_class_from_cds(InstanceKlass*, Handle, Handle, Thread*)+0x40
V [libjvm.so+0x10f5489] JVM_DefineClassFromCDS+0x2b0
j java.lang.ClassLoader.defineClassFromCDS0(Ljava/lang/ClassLoader;Ljava/security/ProtectionDomain;J)Ljava/lang/Class;+0 java.base@11.0.7-internal
j java.lang.ClassLoader.defineClassFromCDS(Ljava/lang/String;JLjava/security/ProtectionDomain;)Ljava/lang/Class;+17 java.base@11.0.7-internal
j java.security.SecureClassLoader.defineClassFromCDS(Ljava/lang/String;JLjava/security/CodeSource;)Ljava/lang/Class;+9 java.base@11.0.7-internal
j java.net.URLClassLoader.defineClassInternal(Ljava/lang/String;Ljdk/internal/loader/Resource;ZLjava/lang/String;Ljava/lang/String;J)Ljava/lang/Class;+346 java.base@11.0.7-internal
j java.net.URLClassLoader.defineClassFromCDS(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;J)Ljava/lang/Class;+8 java.base@11.0.7-internal
j java.net.URLClassLoader$1.run()Ljava/lang/Class;+41 java.base@11.0.7-internal
j java.net.URLClassLoader$1.run()Ljava/lang/Object;+1 java.base@11.0.7-internal
v ~StubRoutines::call_stub
V [libjvm.so+0xfd1127] JavaCalls::call_helper(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x689
V [libjvm.so+0x1498618] os::os_exception_wrapper(void (*)(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*), JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x32
V [libjvm.so+0xfd0a9b] JavaCalls::call(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x14b
V [libjvm.so+0x10eafbe] JVM_DoPrivileged+0x769
C [libjava.so+0xe8f8] Java_java_security_AccessController_doPrivileged__Ljava_security_PrivilegedExceptionAction_2Ljava_security_AccessControlContext_2+0x46
j java.security.AccessController.doPrivileged(Ljava/security/PrivilegedExceptionAction;Ljava/security/AccessControlContext;)Ljava/lang/Object;+0 java.base@11.0.7-internal
j java.net.URLClassLoader.findClassInternal(Ljava/lang/String;ZLjava/lang/String;J)Ljava/lang/Class;+17 java.base@11.0.7-internal
j java.net.URLClassLoader.findClassFromCDS(Ljava/lang/String;Ljava/lang/String;J)Ljava/lang/Class;+5 java.base@11.0.7-internal
j java.lang.ClassLoader.loadClassFromCDS(Ljava/lang/String;Ljava/lang/String;JI)Ljava/lang/Class;+147 java.base@11.0.7-internal
v ~StubRoutines::call_stub
V [libjvm.so+0xfd1127] JavaCalls::call_helper(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x689
V [libjvm.so+0x1498618] os::os_exception_wrapper(void (*)(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*), JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x32
V [libjvm.so+0xfd0a9b] JavaCalls::call(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x14b
V [libjvm.so+0xfcf9d0] JavaCalls::call_virtual(JavaValue*, Klass*, Symbol*, Symbol*, JavaCallArguments*, Thread*)+0x1a2
V [libjvm.so+0x166a52c] SystemDictionaryShared::load_class_from_cds(Symbol const*, Handle, InstanceKlass*, int, Thread*)+0x204
V [libjvm.so+0x166a716] SystemDictionaryShared::lookup_shared(Symbol*, Handle, bool&, bool, Thread*)+0x164
V [libjvm.so+0x165ff63] SystemDictionary::load_instance_class(Symbol*, Handle, Thread*)+0x631
V [libjvm.so+0x165d202] SystemDictionary::resolve_instance_class_or_null(Symbol*, Handle, Handle, Thread*)+0x92a
V [libjvm.so+0x165b8a8] SystemDictionary::resolve_or_null(Symbol*, Handle, Handle, Thread*)+0x11e
V [libjvm.so+0x165b4ab] SystemDictionary::resolve_or_fail(Symbol*, Handle, Handle, bool, Thread*)+0x35
V [libjvm.so+0x11015d8] find_class_from_class_loader(JNIEnv_*, Symbol*, unsigned char, Handle, Handle, unsigned char, Thread*)+0x45
V [libjvm.so+0x10e704b] JVM_FindClassFromCaller+0x36f
C [libjava.so+0xf555] Java_java_lang_Class_forName0+0x22b
j java.lang.Class.forName0(Ljava/lang/String;ZLjava/lang/ClassLoader;Ljava/lang/Class;)Ljava/lang/Class;+0 java.base@11.0.7-internal
j java.lang.Class.forName(Ljava/lang/String;ZLjava/lang/ClassLoader;)Ljava/lang/Class;+43 java.base@11.0.7-internal
j Buy2ByURLClassLoader.lambda$main$1(Ljava/net/URLClassLoader;[ILjava/lang/String;)V+12
j Buy2ByURLClassLoader$$Lambda$25.accept(Ljava/lang/Object;)V+12
j java.util.stream.ForEachOps$ForEachOp$OfRef.accept(Ljava/lang/Object;)V+5 java.base@11.0.7-internal
J 1021 c1 java.util.stream.ReferencePipeline$2$1.accept(Ljava/lang/Object;)V java.base@11.0.7-internal (27 bytes) @ 0x00007fffd87f7864 [0x00007fffd87f7480+0x00000000000003e4]
j java.util.Iterator.forEachRemaining(Ljava/util/function/Consumer;)V+21 java.base@11.0.7-internal
j java.util.Spliterators$IteratorSpliterator.forEachRemaining(Ljava/util/function/Consumer;)V+52 java.base@11.0.7-internal
j java.util.stream.AbstractPipeline.copyInto(Ljava/util/stream/Sink;Ljava/util/Spliterator;)V+32 java.base@11.0.7-internal
j java.util.stream.AbstractPipeline.wrapAndCopyInto(Ljava/util/stream/Sink;Ljava/util/Spliterator;)Ljava/util/stream/Sink;+13 java.base@11.0.7-internal
j java.util.stream.ForEachOps$ForEachOp.evaluateSequential(Ljava/util/stream/PipelineHelper;Ljava/util/Spliterator;)Ljava/lang/Void;+3 java.base@11.0.7-internal
j java.util.stream.ForEachOps$ForEachOp$OfRef.evaluateSequential(Ljava/util/stream/PipelineHelper;Ljava/util/Spliterator;)Ljava/lang/Object;+3 java.base@11.0.7-internal
j java.util.stream.AbstractPipeline.evaluate(Ljava/util/stream/TerminalOp;)Ljava/lang/Object;+88 java.base@11.0.7-internal
j java.util.stream.ReferencePipeline.forEach(Ljava/util/function/Consumer;)V+6 java.base@11.0.7-internal
j Buy2ByURLClassLoader.main([Ljava/lang/String;)V+89
v ~StubRoutines::call_stub
V [libjvm.so+0xfd1127] JavaCalls::call_helper(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x689
V [libjvm.so+0x1498618] os::os_exception_wrapper(void (*)(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*), JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x32
V [libjvm.so+0xfd0a9b] JavaCalls::call(JavaValue*, methodHandle const&, JavaCallArguments*, Thread*)+0x14b
V [libjvm.so+0x107c15e] jni_invoke_static(JNIEnv_*, JavaValue*, _jobject*, JNICallType, _jmethodID*, JNI_ArgumentPusher*, Thread*)+0x1f0
V [libjvm.so+0x1093240] jni_CallStaticVoidMethod+0x36a
C [libjli.so+0x4f3e] JavaMain+0xcd7
从最上层几个frame的calls来看。最上面两个看不出什么,看第三个:
// called during initial loading of a shared class
InstanceKlass* KlassFactory::check_shared_class_file_load_hook(...) {
#if INCLUDE_CDS && INCLUDE_JVMTI
assert(ik != NULL, "sanity");
assert(ik->is_shared(), "expecting a shared class");
if (JvmtiExport::should_post_class_file_load_hook()) {
assert(THREAD->is_Java_thread(), "must be JavaThread");
// Post the CFLH
JvmtiCachedClassFileData* cached_class_file = NULL;
JvmtiCachedClassFileData* archived_class_data = ik->get_archived_class_data();
assert(archived_class_data != NULL, "shared class has no archived class data");
unsigned char* ptr =
VM_RedefineClasses::get_cached_class_file_bytes(archived_class_data);
unsigned char* end_ptr =
ptr + VM_RedefineClasses::get_cached_class_file_len(archived_class_data);
unsigned char* old_ptr = ptr;
JvmtiExport::post_class_file_load_hook(class_name,
class_loader,
protection_domain,
&ptr,
&end_ptr,
&cached_class_file);
if (old_ptr != ptr) {
// JVMTI agent has modified class file data.
// Set new class file stream using JVMTI agent modified class file data.
ClassLoaderData* loader_data =
ClassLoaderData::class_loader_data(class_loader());
int path_index = ik->shared_classpath_index();
const char* pathname;
if (path_index < 0) {
ModuleEntry* mod_entry = ik->module();
if (mod_entry != NULL && (mod_entry->location() != NULL)) {
ResourceMark rm;
pathname = (const char*)(mod_entry->location()->as_C_string());
} else {
pathname = "";
}
}
...
}
}
#endif
return NULL;
}
ik是org/apache/xerces/jaxp/JAXPConstants
,比较莫名其妙。ik->module()
crash,要走到这里需要old_ptr!=ptr
,这两个指针往前看是:
unsigned char* ptr =
VM_RedefineClasses::get_cached_class_file_bytes(archived_class_data);
unsigned char* end_ptr =
ptr + VM_RedefineClasses::get_cached_class_file_len(archived_class_data);
unsigned char* old_ptr = ptr;
JvmtiExport::post_class_file_load_hook(class_name,
class_loader,
protection_domain,
&ptr,
&end_ptr,
&cached_class_file);
if (old_ptr != ptr){...}
调用JvmtiExport::post_class_file_load_hook
之前肯定是old_ptr==ptr
,所以问题可能就是JvmtiExport::post_class_file_load_hook
了,它的参数只有ptr和end_ptr,所以应该是它修改了ptr,导致old_ptr不等于ptr。
0x2
为了进一步确认,避免白费功夫,先看看是不是真的这个原因。[1]
看一下KlassFactory::check_shared_class_file_load_hook
的caller,即SystemDictionary::load_shared_class
:
InstanceKlass* SystemDictionary::load_shared_class(InstanceKlass* ik,
Handle class_loader,
Handle protection_domain, TRAPS) {
...
InstanceKlass* new_ik = KlassFactory::check_shared_class_file_load_hook(
ik, class_name, class_loader, protection_domain, CHECK_NULL);
if (new_ik != NULL) {
// The class is changed by CFLH. Return the new class. The shared class is
// not used.
return new_ik;
}
...
return ik;
}
CFLH表示Class File Load Hook,这段代码说如果agent修改了类的字节码,那就不使用CDS archive的ik类,使用修改后的类,但是agent的实现根本没有修改过org/apache/xerces/jaxp/JAXPConstants
,所以把这段逻辑注释掉,再跑一下AppCDS+agent,没有任何问题。那可以确定是JvmtiExport::post_class_file_load_hook
,回到[1]处继续。
0x3
JvmtiExport::post_class_file_load_hook
会经过层层调用,走到JvmtiClassFileLoadHookPoster::post_to_env
,之前的ptr和end_ptr现在分别对应_data_ptr和_endptr,cur_ptr指向data_ptr:
class JvmtiClassFileLoadHookPoster : public StackObj {
...
public:
inline JvmtiClassFileLoadHookPoster(Symbol* h_name, Handle class_loader,
Handle h_protection_domain,
unsigned char **data_ptr, unsigned char **end_ptr,
JvmtiCachedClassFileData **cache_ptr) {
_h_name = h_name;
_class_loader = class_loader;
_h_protection_domain = h_protection_domain;
_data_ptr = data_ptr;
_end_ptr = end_ptr;
_thread = JavaThread::current();
_curr_len = *end_ptr - *data_ptr;
_curr_data = *data_ptr;
_curr_env = NULL;
_cached_class_file_ptr = cache_ptr;
...
}
...
void post_to_env(JvmtiEnv* env, bool caching_needed) {
...
unsigned char *new_data = NULL;
if (callback != NULL) {
(*callback)(env->jvmti_external(), jem.jni_env(),
jem.class_being_redefined(),
jem.jloader(), jem.class_name(),
jem.protection_domain(),
_curr_len, _curr_data,
&new_len, &new_data);
}
if (new_data != NULL) {
...
_curr_data = new_data;
_curr_len = new_len;
// Save the current agent env we need this to deallocate the
// memory allocated by this agent.
_curr_env = env;
}
}
...
};
这里最开始new_data为NULL,经过一个调用后,如果new_data不为NULL,则修改cur_data,即修改data_ptr,即修改ptr。
所以问题就是,这个调用导致了new_data不为null。
这个调用会从libjvm.so转到libinstrument.so,调用transformClassFile:
void
transformClassFile( JPLISAgent * agent,
JNIEnv * jnienv,
jobject loaderObject,
const char* name,
jclass classBeingRedefined,
jobject protectionDomain,
jint class_data_len,
const unsigned char* class_data,
jint* new_class_data_len,
unsigned char** new_class_data,
jboolean is_retransformer) {
jboolean errorOutstanding = JNI_FALSE;
jstring classNameStringObject = NULL;
jarray classFileBufferObject = NULL;
jarray transformedBufferObject = NULL;
jsize transformedBufferSize = 0;
unsigned char * resultBuffer = NULL;
jboolean shouldRun = JNI_FALSE;
...
/* Finally, unmarshall the parameters (if someone touched the buffer, tell the JVM) */
if ( !errorOutstanding ) {
if ( transformedBufferObject != NULL ) {
transformedBufferSize = (*jnienv)->GetArrayLength( jnienv,
transformedBufferObject);
errorOutstanding = checkForAndClearThrowable(jnienv);
jplis_assert_msg(!errorOutstanding, "can't get array length");
if ( !errorOutstanding ) {
/* allocate the response buffer with the JVMTI allocate call.
* This is what the JVMTI spec says to do for Class File Load hook responses
*/
jvmtiError allocError = (*(jvmti(agent)))->Allocate(jvmti(agent),
transformedBufferSize,
&resultBuffer);
errorOutstanding = (allocError != JVMTI_ERROR_NONE);
jplis_assert_msg(!errorOutstanding, "can't allocate result buffer");
}
if ( !errorOutstanding ) {
(*jnienv)->GetByteArrayRegion( jnienv,
transformedBufferObject,
0,
transformedBufferSize,
(jbyte *) resultBuffer);
errorOutstanding = checkForAndClearThrowable(jnienv);
jplis_assert_msg(!errorOutstanding, "can't get byte array region");
/* in this case, we will not return the buffer to the JVMTI,
* so we need to deallocate it ourselves
*/
if ( errorOutstanding ) {
deallocate( jvmti(agent),
(void*)resultBuffer);
}
}
if ( !errorOutstanding ) {
*new_class_data_len = (transformedBufferSize);
*new_class_data = resultBuffer;
}
}
}
...
}
return;
}
这个调用是transform相关的,已经猜到了,agent的transform不会返回null,就算类没有修改,也是返回原来的byte[]
:
@Override
public byte[] transform(ClassLoader loader, String className, Class<?> classBeingRedefined,
ProtectionDomain protectionDomain, byte[] classfileBuffer) {
if (classBeingRedefined != null && classBeingRedefined == URLClassLoader.class) {
try {
ClassPool cp = new ClassPool();
cp.appendSystemPath();
CtClass ctClass = cp.get(URLClassLoader.class.getName());
dumpCtor1(cp, ctClass);
dumpCtor2(cp, ctClass);
dumpCtor3(cp, ctClass);
dumpCtor4(cp, ctClass);
dumpCtor5(cp, ctClass);
dumpCtor6(cp, ctClass);
dumpCtor7(cp, ctClass);
byte[] classData = ctClass.toBytecode();
ctClass.detach();
return classData;
} catch (Exception e) {
try {
Files.writeString(Paths.get(ERROR_LOG_FILE), e.toString());
} catch (java.io.IOException e1) {
e.printStackTrace();
}
}
}
return classfileBuffer;
}
这就导致了new_data最开始是NULL,后面变成新的,导致出了问题。修改Java的transform,没有修改字节码的部分返回return null,问题就解决了。
0x4
再看看doc:
惭愧,官方有说过,如果transform的实现没有修改过类字节码,那么就应该返回null。
进一步,如果transform的实现修改了类,应该创建一个新的byte[]
,然后把classfileBuffer的数据复制进去,再修改新的byte[],而不是直接修改classfileBuffer的字节码,网上的教程和example很多都没有注意这个问题。
这就完了吗?对于手头的工作来说,到这里已经可以了。但是回到问题本身,实际上还有下文。因为即便返回了原来的classfileBuffer而不是按照推荐返回的null,对应的逻辑也应该是使用classfileBuffer做为类的新字节码代替原来的CDS archive数据,而不是JVM Crash,JVM的实现其实是有问题的。