diff options
100 files changed, 4452 insertions, 885 deletions
@@ -1,6 +1,6 @@ ^build/ ^dist/ -^nbproject/private/ +/nbproject/private/ ^src/share/tools/hsdis/build/ ^src/share/tools/IdealGraphVisualizer/[a-zA-Z0-9]*/build/ ^src/share/tools/IdealGraphVisualizer/build/ @@ -50,3 +50,6 @@ a94714c550658fd6741793ef036cb9625dc2ab1a jdk7-b72 faf94d94786b621f8e13cbcc941ca69c6d967c3f jdk7-b73 f4b900403d6e4b0af51447bd13bbe23fe3a1dac7 jdk7-b74 d8dd291a362acb656026a9c0a9da48501505a1e7 jdk7-b75 +9174bb32e934965288121f75394874eeb1fcb649 jdk7-b76 +455105fc81d941482f8f8056afaa7aa0949c9300 jdk7-b77 +e703499b4b51e3af756ae77c3d5e8b3058a14e4e jdk7-b78 diff --git a/make/hotspot_version b/make/hotspot_version index 5da770b9f..aba3cd361 100644 --- a/make/hotspot_version +++ b/make/hotspot_version @@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2009 HS_MAJOR_VER=17 HS_MINOR_VER=0 -HS_BUILD_NUMBER=05 +HS_BUILD_NUMBER=06 JDK_MAJOR_VER=1 JDK_MINOR_VER=7 diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp index 714f2fb8f..c22a8c1d0 100644 --- a/src/cpu/x86/vm/assembler_x86.cpp +++ b/src/cpu/x86/vm/assembler_x86.cpp @@ -7666,7 +7666,7 @@ RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_ad #ifdef ASSERT Label L; - testl(tmp, tmp); + testptr(tmp, tmp); jccb(Assembler::notZero, L); hlt(); bind(L); diff --git a/src/cpu/x86/vm/interp_masm_x86_32.cpp b/src/cpu/x86/vm/interp_masm_x86_32.cpp index e06e423a4..a30092523 100644 --- a/src/cpu/x86/vm/interp_masm_x86_32.cpp +++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp @@ -196,6 +196,9 @@ void InterpreterMacroAssembler::get_cache_index_at_bcp(Register reg, int bcp_off } else { assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic"); movl(reg, Address(rsi, bcp_offset)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line"); notl(reg); // convert to plain index } diff --git a/src/cpu/x86/vm/interp_masm_x86_64.cpp b/src/cpu/x86/vm/interp_masm_x86_64.cpp index ad449c8bd..9418540dc 100644 --- a/src/cpu/x86/vm/interp_masm_x86_64.cpp +++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp @@ -185,12 +185,30 @@ void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp( } +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + bool giant_index) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (!giant_index) { + load_unsigned_short(index, Address(r13, bcp_offset)); + } else { + assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic"); + movl(index, Address(r13, bcp_offset)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line"); + notl(index); // convert to plain index + } +} + + void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, - int bcp_offset) { - assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + int bcp_offset, + bool giant_index) { assert(cache != index, "must use different registers"); - load_unsigned_short(index, Address(r13, bcp_offset)); + get_cache_index_at_bcp(index, bcp_offset, giant_index); movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize)); assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); // convert from field index to ConstantPoolCacheEntry index @@ -200,10 +218,10 @@ void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register tmp, - int bcp_offset) { - assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + int bcp_offset, + bool giant_index) { assert(cache != tmp, "must use different register"); - load_unsigned_short(tmp, Address(r13, bcp_offset)); + get_cache_index_at_bcp(tmp, bcp_offset, giant_index); assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); // convert from field index to ConstantPoolCacheEntry index // and from word offset to byte offset @@ -1236,7 +1254,8 @@ void InterpreterMacroAssembler::profile_final_call(Register mdp) { void InterpreterMacroAssembler::profile_virtual_call(Register receiver, Register mdp, - Register reg2) { + Register reg2, + bool receiver_can_be_null) { if (ProfileInterpreter) { Label profile_continue; @@ -1246,8 +1265,15 @@ void InterpreterMacroAssembler::profile_virtual_call(Register receiver, // We are making a call. Increment the count. increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + Label skip_receiver_profile; + if (receiver_can_be_null) { + testptr(receiver, receiver); + jcc(Assembler::zero, skip_receiver_profile); + } + // Record the receiver type. record_klass_in_profile(receiver, mdp, reg2); + bind(skip_receiver_profile); // The method data pointer needs to be updated to reflect the new target. update_mdp_by_constant(mdp, diff --git a/src/cpu/x86/vm/interp_masm_x86_64.hpp b/src/cpu/x86/vm/interp_masm_x86_64.hpp index c35cb3a19..0cfc9bf7f 100644 --- a/src/cpu/x86/vm/interp_masm_x86_64.hpp +++ b/src/cpu/x86/vm/interp_masm_x86_64.hpp @@ -95,9 +95,10 @@ class InterpreterMacroAssembler: public MacroAssembler { void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); void get_cache_and_index_at_bcp(Register cache, Register index, - int bcp_offset); + int bcp_offset, bool giant_index = false); void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, - int bcp_offset); + int bcp_offset, bool giant_index = false); + void get_cache_index_at_bcp(Register index, int bcp_offset, bool giant_index = false); void pop_ptr(Register r = rax); @@ -236,7 +237,8 @@ class InterpreterMacroAssembler: public MacroAssembler { void profile_call(Register mdp); void profile_final_call(Register mdp); void profile_virtual_call(Register receiver, Register mdp, - Register scratch2); + Register scratch2, + bool receiver_can_be_null = false); void profile_ret(Register return_bci, Register mdp); void profile_null_seen(Register mdp); void profile_typecheck(Register mdp, Register klass, Register scratch); diff --git a/src/cpu/x86/vm/interpreter_x86_64.cpp b/src/cpu/x86/vm/interpreter_x86_64.cpp index c3cbf56ca..6c234a33a 100644 --- a/src/cpu/x86/vm/interpreter_x86_64.cpp +++ b/src/cpu/x86/vm/interpreter_x86_64.cpp @@ -277,12 +277,11 @@ address InterpreterGenerator::generate_abstract_entry(void) { address entry_point = __ pc(); // abstract method entry - // remove return address. Not really needed, since exception - // handling throws away expression stack - __ pop(rbx); - // adjust stack to what a normal return would do - __ mov(rsp, r13); + // pop return address, reset last_sp to NULL + __ empty_expression_stack(); + __ restore_bcp(); // rsi must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) // throw exception __ call_VM(noreg, CAST_FROM_FN_PTR(address, @@ -300,7 +299,10 @@ address InterpreterGenerator::generate_method_handle_entry(void) { if (!EnableMethodHandles) { return generate_abstract_entry(); } - return generate_abstract_entry(); //6815692// + + address entry_point = MethodHandles::generate_method_handle_interpreter_entry(_masm); + + return entry_point; } diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp index a682a81b8..58024f2a8 100644 --- a/src/cpu/x86/vm/methodHandles_x86.cpp +++ b/src/cpu/x86/vm/methodHandles_x86.cpp @@ -448,7 +448,7 @@ void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHan rbx_index, Address::times_ptr, base + vtableEntry::method_offset_in_bytes()); Register rbx_method = rbx_temp; - __ movl(rbx_method, vtable_entry_addr); + __ movptr(rbx_method, vtable_entry_addr); __ verify_oop(rbx_method); __ jmp(rbx_method_fie); diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 6b0731490..cc80a4df2 100644 --- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -2935,6 +2935,16 @@ class StubGenerator: public StubCodeGenerator { // arraycopy stubs used by compilers generate_arraycopy_stubs(); + + // generic method handle stubs + if (EnableMethodHandles && SystemDictionary::MethodHandle_klass() != NULL) { + for (MethodHandles::EntryKind ek = MethodHandles::_EK_FIRST; + ek < MethodHandles::_EK_LIMIT; + ek = MethodHandles::EntryKind(1 + (int)ek)) { + StubCodeMark mark(this, "MethodHandle", MethodHandles::entry_name(ek)); + MethodHandles::generate_method_handle_stub(_masm, ek); + } + } } public: diff --git a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp index cd9d08915..273d3901e 100644 --- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp +++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp @@ -100,21 +100,26 @@ address TemplateInterpreterGenerator::generate_ClassCastException_handler() { return entry; } -// Arguments are: required type in rarg1, failing object (or NULL) in rarg2 +// Arguments are: required type at TOS+8, failing object (or NULL) at TOS+4. address TemplateInterpreterGenerator::generate_WrongMethodType_handler() { address entry = __ pc(); __ pop(c_rarg2); // failing object is at TOS __ pop(c_rarg1); // required type is at TOS+8 - // expression stack must be empty before entering the VM if an - // exception happened + __ verify_oop(c_rarg1); + __ verify_oop(c_rarg2); + + // Various method handle types use interpreter registers as temps. + __ restore_bcp(); + __ restore_locals(); + + // Expression stack must be empty before entering the VM for an exception. __ empty_expression_stack(); __ call_VM(noreg, CAST_FROM_FN_PTR(address, - InterpreterRuntime:: - throw_WrongMethodTypeException), + InterpreterRuntime::throw_WrongMethodTypeException), // pass required type, failing object (or NULL) c_rarg1, c_rarg2); return entry; @@ -182,15 +187,29 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, __ restore_bcp(); __ restore_locals(); - __ get_cache_and_index_at_bcp(rbx, rcx, 1); + Label L_got_cache, L_giant_index; + if (EnableInvokeDynamic) { + __ cmpb(Address(r13, 0), Bytecodes::_invokedynamic); + __ jcc(Assembler::equal, L_giant_index); + } + __ get_cache_and_index_at_bcp(rbx, rcx, 1, false); + __ bind(L_got_cache); __ movl(rbx, Address(rbx, rcx, - Address::times_8, + Address::times_ptr, in_bytes(constantPoolCacheOopDesc::base_offset()) + 3 * wordSize)); __ andl(rbx, 0xFF); if (TaggedStackInterpreter) __ shll(rbx, 1); // 2 slots per parameter. __ lea(rsp, Address(rsp, rbx, Address::times_8)); __ dispatch_next(state, step); + + // out of the main line of code... + if (EnableInvokeDynamic) { + __ bind(L_giant_index); + __ get_cache_and_index_at_bcp(rbx, rcx, 1, true); + __ jmp(L_got_cache); + } + return entry; } diff --git a/src/cpu/x86/vm/templateTable_x86_32.cpp b/src/cpu/x86/vm/templateTable_x86_32.cpp index ff78f933d..8959b3410 100644 --- a/src/cpu/x86/vm/templateTable_x86_32.cpp +++ b/src/cpu/x86/vm/templateTable_x86_32.cpp @@ -3146,7 +3146,6 @@ void TemplateTable::invokedynamic(int byte_no) { __ profile_call(rsi); } - Label handle_unlinked_site; __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx))); __ null_check(rcx); __ prepare_to_jump_from_interpreted(); diff --git a/src/cpu/x86/vm/templateTable_x86_64.cpp b/src/cpu/x86/vm/templateTable_x86_64.cpp index 1180227b5..f461b10e4 100644 --- a/src/cpu/x86/vm/templateTable_x86_64.cpp +++ b/src/cpu/x86/vm/templateTable_x86_64.cpp @@ -203,18 +203,15 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc, __ jcc(Assembler::notEqual, fast_patch); __ get_method(scratch); // Let breakpoint table handling rewrite to quicker bytecode - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::set_original_bytecode_at), - scratch, r13, bc); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, r13, bc); #ifndef ASSERT __ jmpb(patch_done); - __ bind(fast_patch); - } #else __ jmp(patch_done); +#endif __ bind(fast_patch); } +#ifdef ASSERT Label okay; __ load_unsigned_byte(scratch, at_bcp(0)); __ cmpl(scratch, (int) Bytecodes::java_code(bytecode)); @@ -2054,26 +2051,28 @@ void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits } } -void TemplateTable::resolve_cache_and_index(int byte_no, - Register Rcache, - Register index) { +void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) { assert(byte_no == 1 || byte_no == 2, "byte_no out of range"); + bool is_invokedynamic = (bytecode() == Bytecodes::_invokedynamic); const Register temp = rbx; assert_different_registers(Rcache, index, temp); const int shift_count = (1 + byte_no) * BitsPerByte; Label resolved; - __ get_cache_and_index_at_bcp(Rcache, index, 1); - __ movl(temp, Address(Rcache, - index, Address::times_8, - constantPoolCacheOopDesc::base_offset() + - ConstantPoolCacheEntry::indices_offset())); - __ shrl(temp, shift_count); - // have we resolved this bytecode? - __ andl(temp, 0xFF); - __ cmpl(temp, (int) bytecode()); - __ jcc(Assembler::equal, resolved); + __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic); + if (is_invokedynamic) { + // we are resolved if the f1 field contains a non-null CallSite object + __ cmpptr(Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()), (int32_t) NULL_WORD); + __ jcc(Assembler::notEqual, resolved); + } else { + __ movl(temp, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset())); + __ shrl(temp, shift_count); + // have we resolved this bytecode? + __ andl(temp, 0xFF); + __ cmpl(temp, (int) bytecode()); + __ jcc(Assembler::equal, resolved); + } // resolve first time through address entry; @@ -2090,6 +2089,9 @@ void TemplateTable::resolve_cache_and_index(int byte_no, case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); break; + case Bytecodes::_invokedynamic: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); + break; default: ShouldNotReachHere(); break; @@ -2098,7 +2100,7 @@ void TemplateTable::resolve_cache_and_index(int byte_no, __ call_VM(noreg, entry, temp); // Update registers with resolved info - __ get_cache_and_index_at_bcp(Rcache, index, 1); + __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic); __ bind(resolved); } @@ -2832,15 +2834,14 @@ void TemplateTable::count_calls(Register method, Register temp) { ShouldNotReachHere(); } -void TemplateTable::prepare_invoke(Register method, - Register index, - int byte_no, - Bytecodes::Code code) { +void TemplateTable::prepare_invoke(Register method, Register index, int byte_no) { // determine flags + Bytecodes::Code code = bytecode(); const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; const bool is_invokevirtual = code == Bytecodes::_invokevirtual; const bool is_invokespecial = code == Bytecodes::_invokespecial; - const bool load_receiver = code != Bytecodes::_invokestatic; + const bool load_receiver = (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic); const bool receiver_null_check = is_invokespecial; const bool save_flags = is_invokeinterface || is_invokevirtual; // setup registers & access constant pool cache @@ -2858,9 +2859,13 @@ void TemplateTable::prepare_invoke(Register method, __ movl(recv, flags); __ andl(recv, 0xFF); if (TaggedStackInterpreter) __ shll(recv, 1); // index*2 - __ movptr(recv, Address(rsp, recv, Address::times_8, - -Interpreter::expr_offset_in_bytes(1))); - __ verify_oop(recv); + Address recv_addr(rsp, recv, Address::times_8, -Interpreter::expr_offset_in_bytes(1)); + if (is_invokedynamic) { + __ lea(recv, recv_addr); + } else { + __ movptr(recv, recv_addr); + __ verify_oop(recv); + } } // do null check if needed @@ -2878,10 +2883,14 @@ void TemplateTable::prepare_invoke(Register method, ConstantPoolCacheEntry::verify_tosBits(); // load return address { - ExternalAddress return_5((address)Interpreter::return_5_addrs_by_index_table()); - ExternalAddress return_3((address)Interpreter::return_3_addrs_by_index_table()); - __ lea(rscratch1, (is_invokeinterface ? return_5 : return_3)); - __ movptr(flags, Address(rscratch1, flags, Address::times_8)); + address table_addr; + if (is_invokeinterface || is_invokedynamic) + table_addr = (address)Interpreter::return_5_addrs_by_index_table(); + else + table_addr = (address)Interpreter::return_3_addrs_by_index_table(); + ExternalAddress table(table_addr); + __ lea(rscratch1, table); + __ movptr(flags, Address(rscratch1, flags, Address::times_ptr)); } // push return address @@ -2947,7 +2956,7 @@ void TemplateTable::invokevirtual_helper(Register index, void TemplateTable::invokevirtual(int byte_no) { transition(vtos, vtos); - prepare_invoke(rbx, noreg, byte_no, bytecode()); + prepare_invoke(rbx, noreg, byte_no); // rbx: index // rcx: receiver @@ -2959,7 +2968,7 @@ void TemplateTable::invokevirtual(int byte_no) { void TemplateTable::invokespecial(int byte_no) { transition(vtos, vtos); - prepare_invoke(rbx, noreg, byte_no, bytecode()); + prepare_invoke(rbx, noreg, byte_no); // do the call __ verify_oop(rbx); __ profile_call(rax); @@ -2969,7 +2978,7 @@ void TemplateTable::invokespecial(int byte_no) { void TemplateTable::invokestatic(int byte_no) { transition(vtos, vtos); - prepare_invoke(rbx, noreg, byte_no, bytecode()); + prepare_invoke(rbx, noreg, byte_no); // do the call __ verify_oop(rbx); __ profile_call(rax); @@ -2983,7 +2992,7 @@ void TemplateTable::fast_invokevfinal(int byte_no) { void TemplateTable::invokeinterface(int byte_no) { transition(vtos, vtos); - prepare_invoke(rax, rbx, byte_no, bytecode()); + prepare_invoke(rax, rbx, byte_no); // rax: Interface // rbx: index @@ -3072,7 +3081,24 @@ void TemplateTable::invokedynamic(int byte_no) { return; } - __ stop("invokedynamic NYI");//6815692// + prepare_invoke(rax, rbx, byte_no); + + // rax: CallSite object (f1) + // rbx: unused (f2) + // rcx: receiver address + // rdx: flags (unused) + + if (ProfileInterpreter) { + Label L; + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(r13); + } + + __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx))); + __ null_check(rcx); + __ prepare_to_jump_from_interpreted(); + __ jump_to_method_handle_entry(rcx, rdx); } diff --git a/src/cpu/x86/vm/templateTable_x86_64.hpp b/src/cpu/x86/vm/templateTable_x86_64.hpp index ec531a7bc..6a9fdf9ed 100644 --- a/src/cpu/x86/vm/templateTable_x86_64.hpp +++ b/src/cpu/x86/vm/templateTable_x86_64.hpp @@ -22,8 +22,7 @@ * */ - static void prepare_invoke(Register method, Register index, int byte_no, - Bytecodes::Code code); + static void prepare_invoke(Register method, Register index, int byte_no); static void invokevirtual_helper(Register index, Register recv, Register flags); static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); diff --git a/src/share/vm/ci/ciEnv.cpp b/src/share/vm/ci/ciEnv.cpp index 0a6de348d..5d7df11f5 100644 --- a/src/share/vm/ci/ciEnv.cpp +++ b/src/share/vm/ci/ciEnv.cpp @@ -46,6 +46,9 @@ ciInstanceKlass* ciEnv::_Throwable; ciInstanceKlass* ciEnv::_Thread; ciInstanceKlass* ciEnv::_OutOfMemoryError; ciInstanceKlass* ciEnv::_String; +ciInstanceKlass* ciEnv::_StringBuffer; +ciInstanceKlass* ciEnv::_StringBuilder; +ciInstanceKlass* ciEnv::_Integer; ciSymbol* ciEnv::_unloaded_cisymbol = NULL; ciInstanceKlass* ciEnv::_unloaded_ciinstance_klass = NULL; @@ -110,6 +113,8 @@ ciEnv::ciEnv(CompileTask* task, int system_dictionary_modification_counter) { _ArrayIndexOutOfBoundsException_instance = NULL; _ArrayStoreException_instance = NULL; _ClassCastException_instance = NULL; + _the_null_string = NULL; + _the_min_jint_string = NULL; } ciEnv::ciEnv(Arena* arena) { @@ -163,6 +168,8 @@ ciEnv::ciEnv(Arena* arena) { _ArrayIndexOutOfBoundsException_instance = NULL; _ArrayStoreException_instance = NULL; _ClassCastException_instance = NULL; + _the_null_string = NULL; + _the_min_jint_string = NULL; } ciEnv::~ciEnv() { @@ -248,6 +255,22 @@ ciInstance* ciEnv::ClassCastException_instance() { return _ClassCastException_instance; } +ciInstance* ciEnv::the_null_string() { + if (_the_null_string == NULL) { + VM_ENTRY_MARK; + _the_null_string = get_object(Universe::the_null_string())->as_instance(); + } + return _the_null_string; +} + +ciInstance* ciEnv::the_min_jint_string() { + if (_the_min_jint_string == NULL) { + VM_ENTRY_MARK; + _the_min_jint_string = get_object(Universe::the_min_jint_string())->as_instance(); + } + return _the_min_jint_string; +} + // ------------------------------------------------------------------ // ciEnv::get_method_from_handle ciMethod* ciEnv::get_method_from_handle(jobject method) { diff --git a/src/share/vm/ci/ciEnv.hpp b/src/share/vm/ci/ciEnv.hpp index e855dbf9e..493600e02 100644 --- a/src/share/vm/ci/ciEnv.hpp +++ b/src/share/vm/ci/ciEnv.hpp @@ -82,6 +82,9 @@ private: static ciInstanceKlass* _Thread; static ciInstanceKlass* _OutOfMemoryError; static ciInstanceKlass* _String; + static ciInstanceKlass* _StringBuffer; + static ciInstanceKlass* _StringBuilder; + static ciInstanceKlass* _Integer; static ciSymbol* _unloaded_cisymbol; static ciInstanceKlass* _unloaded_ciinstance_klass; @@ -97,6 +100,9 @@ private: ciInstance* _ArrayStoreException_instance; ciInstance* _ClassCastException_instance; + ciInstance* _the_null_string; // The Java string "null" + ciInstance* _the_min_jint_string; // The Java string "-2147483648" + // Look up a klass by name from a particular class loader (the accessor's). // If require_local, result must be defined in that class loader, or NULL. // If !require_local, a result from remote class loader may be reported, @@ -310,6 +316,15 @@ public: ciInstanceKlass* String_klass() { return _String; } + ciInstanceKlass* StringBuilder_klass() { + return _StringBuilder; + } + ciInstanceKlass* StringBuffer_klass() { + return _StringBuffer; + } + ciInstanceKlass* Integer_klass() { + return _Integer; + } ciInstance* NullPointerException_instance() { assert(_NullPointerException_instance != NULL, "initialization problem"); return _NullPointerException_instance; @@ -324,6 +339,9 @@ public: ciInstance* ArrayStoreException_instance(); ciInstance* ClassCastException_instance(); + ciInstance* the_null_string(); + ciInstance* the_min_jint_string(); + static ciSymbol* unloaded_cisymbol() { return _unloaded_cisymbol; } diff --git a/src/share/vm/ci/ciInstanceKlass.cpp b/src/share/vm/ci/ciInstanceKlass.cpp index 1053727a9..ac14e71d5 100644 --- a/src/share/vm/ci/ciInstanceKlass.cpp +++ b/src/share/vm/ci/ciInstanceKlass.cpp @@ -341,6 +341,20 @@ ciField* ciInstanceKlass::get_field_by_offset(int field_offset, bool is_static) } // ------------------------------------------------------------------ +// ciInstanceKlass::get_field_by_name +ciField* ciInstanceKlass::get_field_by_name(ciSymbol* name, ciSymbol* signature, bool is_static) { + VM_ENTRY_MARK; + instanceKlass* k = get_instanceKlass(); + fieldDescriptor fd; + klassOop def = k->find_field(name->get_symbolOop(), signature->get_symbolOop(), is_static, &fd); + if (def == NULL) { + return NULL; + } + ciField* field = new (CURRENT_THREAD_ENV->arena()) ciField(&fd); + return field; +} + +// ------------------------------------------------------------------ // ciInstanceKlass::non_static_fields. class NonStaticFieldFiller: public FieldClosure { diff --git a/src/share/vm/ci/ciInstanceKlass.hpp b/src/share/vm/ci/ciInstanceKlass.hpp index a60020adc..007a2ab8d 100644 --- a/src/share/vm/ci/ciInstanceKlass.hpp +++ b/src/share/vm/ci/ciInstanceKlass.hpp @@ -148,6 +148,7 @@ public: ciInstanceKlass* get_canonical_holder(int offset); ciField* get_field_by_offset(int field_offset, bool is_static); + ciField* get_field_by_name(ciSymbol* name, ciSymbol* signature, bool is_static); GrowableArray<ciField*>* non_static_fields(); diff --git a/src/share/vm/ci/ciObjectFactory.cpp b/src/share/vm/ci/ciObjectFactory.cpp index 6fb4edc4d..f05abb21f 100644 --- a/src/share/vm/ci/ciObjectFactory.cpp +++ b/src/share/vm/ci/ciObjectFactory.cpp @@ -168,6 +168,15 @@ void ciObjectFactory::init_shared_objects() { ciEnv::_String = get(SystemDictionary::string_klass()) ->as_instance_klass(); + ciEnv::_StringBuffer = + get(SystemDictionary::stringBuffer_klass()) + ->as_instance_klass(); + ciEnv::_StringBuilder = + get(SystemDictionary::StringBuilder_klass()) + ->as_instance_klass(); + ciEnv::_Integer = + get(SystemDictionary::int_klass()) + ->as_instance_klass(); for (int len = -1; len != _ci_objects->length(); ) { len = _ci_objects->length(); diff --git a/src/share/vm/classfile/classFileParser.cpp b/src/share/vm/classfile/classFileParser.cpp index d0c6e67ed..18ee3fd90 100644 --- a/src/share/vm/classfile/classFileParser.cpp +++ b/src/share/vm/classfile/classFileParser.cpp @@ -2511,23 +2511,12 @@ void ClassFileParser::java_dyn_MethodHandle_fix_pre(constantPoolHandle cp, fac_ptr->nonstatic_byte_count -= 1; (*fields_ptr)->ushort_at_put(i + instanceKlass::signature_index_offset, word_sig_index); - if (wordSize == jintSize) { - fac_ptr->nonstatic_word_count += 1; - } else { - fac_ptr->nonstatic_double_count += 1; - } + fac_ptr->nonstatic_word_count += 1; - FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i+4); + FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i + instanceKlass::low_offset); assert(atype == NONSTATIC_BYTE, ""); FieldAllocationType new_atype = NONSTATIC_WORD; - if (wordSize > jintSize) { - if (Universe::field_type_should_be_aligned(T_LONG)) { - atype = NONSTATIC_ALIGNED_DOUBLE; - } else { - atype = NONSTATIC_DOUBLE; - } - } - (*fields_ptr)->ushort_at_put(i+4, new_atype); + (*fields_ptr)->ushort_at_put(i + instanceKlass::low_offset, new_atype); found_vmentry = true; break; @@ -3085,7 +3074,7 @@ instanceKlassHandle ClassFileParser::parseClassFile(symbolHandle name, int len = fields->length(); for (int i = 0; i < len; i += instanceKlass::next_offset) { int real_offset; - FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i+4); + FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i + instanceKlass::low_offset); switch (atype) { case STATIC_OOP: real_offset = next_static_oop_offset; @@ -3173,8 +3162,8 @@ instanceKlassHandle ClassFileParser::parseClassFile(symbolHandle name, default: ShouldNotReachHere(); } - fields->short_at_put(i+4, extract_low_short_from_int(real_offset) ); - fields->short_at_put(i+5, extract_high_short_from_int(real_offset) ); + fields->short_at_put(i + instanceKlass::low_offset, extract_low_short_from_int(real_offset)); + fields->short_at_put(i + instanceKlass::high_offset, extract_high_short_from_int(real_offset)); } // Size of instances diff --git a/src/share/vm/classfile/javaClasses.cpp b/src/share/vm/classfile/javaClasses.cpp index cb71bbdd7..1ca9c41c9 100644 --- a/src/share/vm/classfile/javaClasses.cpp +++ b/src/share/vm/classfile/javaClasses.cpp @@ -1124,8 +1124,7 @@ class BacktraceBuilder: public StackObj { if (_dirty && _methods != NULL) { BarrierSet* bs = Universe::heap()->barrier_set(); assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt"); - bs->write_ref_array(MemRegion((HeapWord*)_methods->base(), - _methods->array_size())); + bs->write_ref_array((HeapWord*)_methods->base(), _methods->length()); _dirty = false; } } diff --git a/src/share/vm/classfile/systemDictionary.hpp b/src/share/vm/classfile/systemDictionary.hpp index 5ee4e1c7a..4d0bb8cfe 100644 --- a/src/share/vm/classfile/systemDictionary.hpp +++ b/src/share/vm/classfile/systemDictionary.hpp @@ -150,6 +150,7 @@ class SymbolPropertyTable; template(vector_klass, java_util_Vector, Pre) \ template(hashtable_klass, java_util_Hashtable, Pre) \ template(stringBuffer_klass, java_lang_StringBuffer, Pre) \ + template(StringBuilder_klass, java_lang_StringBuilder, Pre) \ \ /* It's NULL in non-1.4 JDKs. */ \ template(stackTraceElement_klass, java_lang_StackTraceElement, Opt) \ diff --git a/src/share/vm/classfile/vmSymbols.cpp b/src/share/vm/classfile/vmSymbols.cpp index c805af344..cc96bee95 100644 --- a/src/share/vm/classfile/vmSymbols.cpp +++ b/src/share/vm/classfile/vmSymbols.cpp @@ -303,6 +303,11 @@ inline bool match_F_R(jshort flags) { const int neg = JVM_ACC_STATIC | JVM_ACC_SYNCHRONIZED; return (flags & (req | neg)) == req; } +inline bool match_F_Y(jshort flags) { + const int req = JVM_ACC_SYNCHRONIZED; + const int neg = JVM_ACC_STATIC; + return (flags & (req | neg)) == req; +} inline bool match_F_RN(jshort flags) { const int req = JVM_ACC_NATIVE; const int neg = JVM_ACC_STATIC | JVM_ACC_SYNCHRONIZED; @@ -361,6 +366,7 @@ const char* vmIntrinsics::short_name_as_C_string(vmIntrinsics::ID id, char* buf, const char* sname = vmSymbols::name_for(signature_for(id)); const char* fname = ""; switch (flags_for(id)) { + case F_Y: fname = "synchronized "; break; case F_RN: fname = "native "; break; case F_SN: fname = "native static "; break; case F_S: fname = "static "; break; diff --git a/src/share/vm/classfile/vmSymbols.hpp b/src/share/vm/classfile/vmSymbols.hpp index fdc8786e5..7323bbb73 100644 --- a/src/share/vm/classfile/vmSymbols.hpp +++ b/src/share/vm/classfile/vmSymbols.hpp @@ -84,6 +84,7 @@ template(java_lang_reflect_Field, "java/lang/reflect/Field") \ template(java_lang_reflect_Array, "java/lang/reflect/Array") \ template(java_lang_StringBuffer, "java/lang/StringBuffer") \ + template(java_lang_StringBuilder, "java/lang/StringBuilder") \ template(java_lang_CharSequence, "java/lang/CharSequence") \ template(java_security_AccessControlContext, "java/security/AccessControlContext") \ template(java_security_ProtectionDomain, "java/security/ProtectionDomain") \ @@ -335,6 +336,7 @@ template(ptypes_name, "ptypes") \ template(form_name, "form") \ template(erasedType_name, "erasedType") \ + template(append_name, "append") \ \ /* non-intrinsic name/signature pairs: */ \ template(register_method_name, "register") \ @@ -416,6 +418,13 @@ template(string_signature, "Ljava/lang/String;") \ template(reference_signature, "Ljava/lang/ref/Reference;") \ template(concurrenthashmap_signature, "Ljava/util/concurrent/ConcurrentHashMap;") \ + template(String_StringBuilder_signature, "(Ljava/lang/String;)Ljava/lang/StringBuilder;") \ + template(int_StringBuilder_signature, "(I)Ljava/lang/StringBuilder;") \ + template(char_StringBuilder_signature, "(C)Ljava/lang/StringBuilder;") \ + template(String_StringBuffer_signature, "(Ljava/lang/String;)Ljava/lang/StringBuffer;") \ + template(int_StringBuffer_signature, "(I)Ljava/lang/StringBuffer;") \ + template(char_StringBuffer_signature, "(C)Ljava/lang/StringBuffer;") \ + template(int_String_signature, "(I)Ljava/lang/String;") \ /* signature symbols needed by intrinsics */ \ VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, template, VM_ALIAS_IGNORE) \ \ @@ -815,10 +824,34 @@ /*the compiler does have special inlining code for these; bytecode inline is just fine */ \ \ do_intrinsic(_fillInStackTrace, java_lang_Throwable, fillInStackTrace_name, void_throwable_signature, F_RNY) \ - \ - do_intrinsic(_Object_init, java_lang_Object, object_initializer_name, void_method_signature, F_R) \ - /* (symbol object_initializer_name defined above) */ \ - \ + \ + do_intrinsic(_StringBuilder_void, java_lang_StringBuilder, object_initializer_name, void_method_signature, F_R) \ + do_intrinsic(_StringBuilder_int, java_lang_StringBuilder, object_initializer_name, int_void_signature, F_R) \ + do_intrinsic(_StringBuilder_String, java_lang_StringBuilder, object_initializer_name, string_void_signature, F_R) \ + \ + do_intrinsic(_StringBuilder_append_char, java_lang_StringBuilder, append_name, char_StringBuilder_signature, F_R) \ + do_intrinsic(_StringBuilder_append_int, java_lang_StringBuilder, append_name, int_StringBuilder_signature, F_R) \ + do_intrinsic(_StringBuilder_append_String, java_lang_StringBuilder, append_name, String_StringBuilder_signature, F_R) \ + \ + do_intrinsic(_StringBuilder_toString, java_lang_StringBuilder, toString_name, void_string_signature, F_R) \ + \ + do_intrinsic(_StringBuffer_void, java_lang_StringBuffer, object_initializer_name, void_method_signature, F_R) \ + do_intrinsic(_StringBuffer_int, java_lang_StringBuffer, object_initializer_name, int_void_signature, F_R) \ + do_intrinsic(_StringBuffer_String, java_lang_StringBuffer, object_initializer_name, string_void_signature, F_R) \ + \ + do_intrinsic(_StringBuffer_append_char, java_lang_StringBuffer, append_name, char_StringBuffer_signature, F_Y) \ + do_intrinsic(_StringBuffer_append_int, java_lang_StringBuffer, append_name, int_StringBuffer_signature, F_Y) \ + do_intrinsic(_StringBuffer_append_String, java_lang_StringBuffer, append_name, String_StringBuffer_signature, F_Y) \ + \ + do_intrinsic(_StringBuffer_toString, java_lang_StringBuffer, toString_name, void_string_signature, F_Y) \ + \ + do_intrinsic(_Integer_toString, java_lang_Integer, toString_name, int_String_signature, F_S) \ + \ + do_intrinsic(_String_String, java_lang_String, object_initializer_name, string_void_signature, F_R) \ + \ + do_intrinsic(_Object_init, java_lang_Object, object_initializer_name, void_method_signature, F_R) \ + /* (symbol object_initializer_name defined above) */ \ + \ do_intrinsic(_invoke, java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \ /* (symbols invoke_name and invoke_signature defined above) */ \ \ @@ -946,11 +979,12 @@ class vmIntrinsics: AllStatic { enum Flags { // AccessFlags syndromes relevant to intrinsics. F_none = 0, - F_R, // !static !synchronized (R="regular") - F_S, // static !synchronized - F_RN, // !static native !synchronized - F_SN, // static native !synchronized - F_RNY // !static native synchronized + F_R, // !static ?native !synchronized (R="regular") + F_S, // static ?native !synchronized + F_Y, // !static ?native synchronized + F_RN, // !static native !synchronized + F_SN, // static native !synchronized + F_RNY // !static native synchronized }; public: diff --git a/src/share/vm/code/nmethod.cpp b/src/share/vm/code/nmethod.cpp index 7f7ca1175..5544666e9 100644 --- a/src/share/vm/code/nmethod.cpp +++ b/src/share/vm/code/nmethod.cpp @@ -414,9 +414,8 @@ int nmethod::total_size() const { } const char* nmethod::compile_kind() const { - if (method() == NULL) return "unloaded"; - if (is_native_method()) return "c2n"; if (is_osr_method()) return "osr"; + if (method() != NULL && is_native_method()) return "c2n"; return NULL; } @@ -1127,6 +1126,9 @@ void nmethod::make_unloaded(BoolObjectClosure* is_alive, oop cause) { } flags.state = unloaded; + // Log the unloading. + log_state_change(); + // The methodOop is gone at this point assert(_method == NULL, "Tautology"); @@ -1137,8 +1139,6 @@ void nmethod::make_unloaded(BoolObjectClosure* is_alive, oop cause) { void nmethod::invalidate_osr_method() { assert(_entry_bci != InvocationEntryBci, "wrong kind of nmethod"); - if (_entry_bci != InvalidOSREntryBci) - inc_decompile_count(); // Remove from list of active nmethods if (method() != NULL) instanceKlass::cast(method()->method_holder())->remove_osr_nmethod(this); @@ -1146,59 +1146,63 @@ void nmethod::invalidate_osr_method() { _entry_bci = InvalidOSREntryBci; } -void nmethod::log_state_change(int state) const { +void nmethod::log_state_change() const { if (LogCompilation) { if (xtty != NULL) { ttyLocker ttyl; // keep the following output all in one block - xtty->begin_elem("make_not_entrant %sthread='" UINTX_FORMAT "'", - (state == zombie ? "zombie='1' " : ""), - os::current_thread_id()); + if (flags.state == unloaded) { + xtty->begin_elem("make_unloaded thread='" UINTX_FORMAT "'", + os::current_thread_id()); + } else { + xtty->begin_elem("make_not_entrant thread='" UINTX_FORMAT "'%s", + os::current_thread_id(), + (flags.state == zombie ? " zombie='1'" : "")); + } log_identity(xtty); xtty->stamp(); xtty->end_elem(); } } - if (PrintCompilation) { - print_on(tty, state == zombie ? "made zombie " : "made not entrant "); + if (PrintCompilation && flags.state != unloaded) { + print_on(tty, flags.state == zombie ? "made zombie " : "made not entrant "); tty->cr(); } } // Common functionality for both make_not_entrant and make_zombie -void nmethod::make_not_entrant_or_zombie(int state) { +bool nmethod::make_not_entrant_or_zombie(int state) { assert(state == zombie || state == not_entrant, "must be zombie or not_entrant"); - // Code for an on-stack-replacement nmethod is removed when a class gets unloaded. - // They never become zombie/non-entrant, so the nmethod sweeper will never remove - // them. Instead the entry_bci is set to InvalidOSREntryBci, so the osr nmethod - // will never be used anymore. That the nmethods only gets removed when class unloading - // happens, make life much simpler, since the nmethods are not just going to disappear - // out of the blue. - if (is_osr_method()) { - if (osr_entry_bci() != InvalidOSREntryBci) { - // only log this once - log_state_change(state); - } - invalidate_osr_method(); - return; - } - - // If the method is already zombie or set to the state we want, nothing to do - if (is_zombie() || (state == not_entrant && is_not_entrant())) { - return; + // If the method is already zombie there is nothing to do + if (is_zombie()) { + return false; } - log_state_change(state); - // Make sure the nmethod is not flushed in case of a safepoint in code below. nmethodLocker nml(this); { + // invalidate osr nmethod before acquiring the patching lock since + // they both acquire leaf locks and we don't want a deadlock. + // This logic is equivalent to the logic below for patching the + // verified entry point of regular methods. + if (is_osr_method()) { + // this effectively makes the osr nmethod not entrant + invalidate_osr_method(); + } + // Enter critical section. Does not block for safepoint. MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag); + + if (flags.state == state) { + // another thread already performed this transition so nothing + // to do, but return false to indicate this. + return false; + } + // The caller can be calling the method statically or through an inline // cache call. - if (!is_not_entrant()) { + if (!is_osr_method() && !is_not_entrant()) { NativeJump::patch_verified_entry(entry_point(), verified_entry_point(), SharedRuntime::get_handle_wrong_method_stub()); assert (NativeJump::instruction_size == nmethod::_zombie_instruction_size, ""); @@ -1217,6 +1221,10 @@ void nmethod::make_not_entrant_or_zombie(int state) { // Change state flags.state = state; + + // Log the transition once + log_state_change(); + } // leave critical region under Patching_lock if (state == not_entrant) { @@ -1240,7 +1248,6 @@ void nmethod::make_not_entrant_or_zombie(int state) { // It's a true state change, so mark the method as decompiled. inc_decompile_count(); - // zombie only - if a JVMTI agent has enabled the CompiledMethodUnload event // and it hasn't already been reported for this nmethod then report it now. // (the event may have been reported earilier if the GC marked it for unloading). @@ -1268,7 +1275,7 @@ void nmethod::make_not_entrant_or_zombie(int state) { // Check whether method got unloaded at a safepoint before this, // if so we can skip the flushing steps below - if (method() == NULL) return; + if (method() == NULL) return true; // Remove nmethod from method. // We need to check if both the _code and _from_compiled_code_entry_point @@ -1282,6 +1289,8 @@ void nmethod::make_not_entrant_or_zombie(int state) { HandleMark hm; method()->clear_code(); } + + return true; } diff --git a/src/share/vm/code/nmethod.hpp b/src/share/vm/code/nmethod.hpp index c7abdea89..48080b5b9 100644 --- a/src/share/vm/code/nmethod.hpp +++ b/src/share/vm/code/nmethod.hpp @@ -252,7 +252,9 @@ class nmethod : public CodeBlob { void* operator new(size_t size, int nmethod_size); const char* reloc_string_for(u_char* begin, u_char* end); - void make_not_entrant_or_zombie(int state); + // Returns true if this thread changed the state of the nmethod or + // false if another thread performed the transition. + bool make_not_entrant_or_zombie(int state); void inc_decompile_count(); // used to check that writes to nmFlags are done consistently. @@ -375,10 +377,12 @@ class nmethod : public CodeBlob { bool is_zombie() const { return flags.state == zombie; } bool is_unloaded() const { return flags.state == unloaded; } - // Make the nmethod non entrant. The nmethod will continue to be alive. - // It is used when an uncommon trap happens. - void make_not_entrant() { make_not_entrant_or_zombie(not_entrant); } - void make_zombie() { make_not_entrant_or_zombie(zombie); } + // Make the nmethod non entrant. The nmethod will continue to be + // alive. It is used when an uncommon trap happens. Returns true + // if this thread changed the state of the nmethod or false if + // another thread performed the transition. + bool make_not_entrant() { return make_not_entrant_or_zombie(not_entrant); } + bool make_zombie() { return make_not_entrant_or_zombie(zombie); } // used by jvmti to track if the unload event has been reported bool unload_reported() { return _unload_reported; } @@ -563,7 +567,7 @@ class nmethod : public CodeBlob { // Logging void log_identity(xmlStream* log) const; void log_new_nmethod() const; - void log_state_change(int state) const; + void log_state_change() const; // Prints a comment for one native instruction (reloc info, pc desc) void print_code_comment_on(outputStream* st, int column, address begin, address end); diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp index c3d30c348..20bbd7aba 100644 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @@ -709,7 +709,8 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen, // Support for parallelizing survivor space rescan if (CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) { - size_t max_plab_samples = MaxNewSize/((SurvivorRatio+2)*MinTLABSize); + size_t max_plab_samples = cp->max_gen0_size()/ + ((SurvivorRatio+2)*MinTLABSize); _survivor_plab_array = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads); _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, 2*max_plab_samples); _cursor = NEW_C_HEAP_ARRAY(size_t, ParallelGCThreads); diff --git a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp index 3000f010b..967605176 100644 --- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp +++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp @@ -351,9 +351,16 @@ void CollectionSetChooser::printSortedHeapRegions() { gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage", _numMarkedRegions); + + DEBUG_ONLY(int marked_count = 0;) for (int i = 0; i < _markedRegions.length(); i++) { - printHeapRegion(_markedRegions.at(i)); + HeapRegion* r = _markedRegions.at(i); + if (r != NULL) { + printHeapRegion(r); + DEBUG_ONLY(marked_count++;) + } } + assert(marked_count == _numMarkedRegions, "must be"); gclog_or_tty->print_cr("Done sorted heap region print"); } diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp index 34939de57..11c288073 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp @@ -42,28 +42,49 @@ ConcurrentG1Refine::ConcurrentG1Refine() : _n_periods(0), _threads(NULL), _n_threads(0) { - if (G1ConcRefine) { - _n_threads = (int)thread_num(); - if (_n_threads > 0) { - _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); - int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); - ConcurrentG1RefineThread *next = NULL; - for (int i = _n_threads - 1; i >= 0; i--) { - ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); - assert(t != NULL, "Conc refine should have been created"); - assert(t->cg1r() == this, "Conc refine thread should refer to this"); - _threads[i] = t; - next = t; - } - } + + // Ergomonically select initial concurrent refinement parameters + if (FLAG_IS_DEFAULT(G1ConcRefineGreenZone)) { + FLAG_SET_DEFAULT(G1ConcRefineGreenZone, MAX2<int>(ParallelGCThreads, 1)); + } + set_green_zone(G1ConcRefineGreenZone); + + if (FLAG_IS_DEFAULT(G1ConcRefineYellowZone)) { + FLAG_SET_DEFAULT(G1ConcRefineYellowZone, green_zone() * 3); + } + set_yellow_zone(MAX2<int>(G1ConcRefineYellowZone, green_zone())); + + if (FLAG_IS_DEFAULT(G1ConcRefineRedZone)) { + FLAG_SET_DEFAULT(G1ConcRefineRedZone, yellow_zone() * 2); + } + set_red_zone(MAX2<int>(G1ConcRefineRedZone, yellow_zone())); + _n_worker_threads = thread_num(); + // We need one extra thread to do the young gen rset size sampling. + _n_threads = _n_worker_threads + 1; + reset_threshold_step(); + + _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); + int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); + ConcurrentG1RefineThread *next = NULL; + for (int i = _n_threads - 1; i >= 0; i--) { + ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); + assert(t != NULL, "Conc refine should have been created"); + assert(t->cg1r() == this, "Conc refine thread should refer to this"); + _threads[i] = t; + next = t; } } -size_t ConcurrentG1Refine::thread_num() { - if (G1ConcRefine) { - return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads; +void ConcurrentG1Refine::reset_threshold_step() { + if (FLAG_IS_DEFAULT(G1ConcRefineThresholdStep)) { + _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1); + } else { + _thread_threshold_step = G1ConcRefineThresholdStep; } - return 0; +} + +int ConcurrentG1Refine::thread_num() { + return MAX2<int>((G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads, 1); } void ConcurrentG1Refine::init() { @@ -123,6 +144,15 @@ void ConcurrentG1Refine::stop() { } } +void ConcurrentG1Refine::reinitialize_threads() { + reset_threshold_step(); + if (_threads != NULL) { + for (int i = 0; i < _n_threads; i++) { + _threads[i]->initialize(); + } + } +} + ConcurrentG1Refine::~ConcurrentG1Refine() { if (G1ConcRSLogCacheSize > 0) { assert(_card_counts != NULL, "Logic"); @@ -384,4 +414,3 @@ void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const { st->cr(); } } - diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp index 5cef3058c..7bcbecfbe 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp @@ -29,6 +29,31 @@ class G1RemSet; class ConcurrentG1Refine: public CHeapObj { ConcurrentG1RefineThread** _threads; int _n_threads; + int _n_worker_threads; + /* + * The value of the update buffer queue length falls into one of 3 zones: + * green, yellow, red. If the value is in [0, green) nothing is + * done, the buffers are left unprocessed to enable the caching effect of the + * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement + * threads are gradually activated. In [yellow, red) all threads are + * running. If the length becomes red (max queue length) the mutators start + * processing the buffers. + * + * There are some interesting cases (with G1AdaptiveConcRefine turned off): + * 1) green = yellow = red = 0. In this case the mutator will process all + * buffers. Except for those that are created by the deferred updates + * machinery during a collection. + * 2) green = 0. Means no caching. Can be a good way to minimize the + * amount of time spent updating rsets during a collection. + */ + int _green_zone; + int _yellow_zone; + int _red_zone; + + int _thread_threshold_step; + + // Reset the threshold step value based of the current zone boundaries. + void reset_threshold_step(); // The cache for card refinement. bool _use_cache; @@ -147,6 +172,8 @@ class ConcurrentG1Refine: public CHeapObj { void init(); // Accomplish some initialization that has to wait. void stop(); + void reinitialize_threads(); + // Iterate over the conc refine threads void threads_do(ThreadClosure *tc); @@ -178,7 +205,20 @@ class ConcurrentG1Refine: public CHeapObj { void clear_and_record_card_counts(); - static size_t thread_num(); + static int thread_num(); void print_worker_threads_on(outputStream* st) const; + + void set_green_zone(int x) { _green_zone = x; } + void set_yellow_zone(int x) { _yellow_zone = x; } + void set_red_zone(int x) { _red_zone = x; } + + int green_zone() const { return _green_zone; } + int yellow_zone() const { return _yellow_zone; } + int red_zone() const { return _red_zone; } + + int total_thread_num() const { return _n_threads; } + int worker_thread_num() const { return _n_worker_threads; } + + int thread_threshold_step() const { return _thread_threshold_step; } }; diff --git a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp index aaf2544fe..b23c287a6 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp @@ -25,10 +25,6 @@ #include "incls/_precompiled.incl" #include "incls/_concurrentG1RefineThread.cpp.incl" -// ======= Concurrent Mark Thread ======== - -// The CM thread is created when the G1 garbage collector is used - ConcurrentG1RefineThread:: ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next, int worker_id_offset, int worker_id) : @@ -37,19 +33,42 @@ ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *nex _worker_id(worker_id), _active(false), _next(next), + _monitor(NULL), _cg1r(cg1r), - _vtime_accum(0.0), - _interval_ms(5.0) + _vtime_accum(0.0) { + + // Each thread has its own monitor. The i-th thread is responsible for signalling + // to thread i+1 if the number of buffers in the queue exceeds a threashold for this + // thread. Monitors are also used to wake up the threads during termination. + // The 0th worker in notified by mutator threads and has a special monitor. + // The last worker is used for young gen rset size sampling. + if (worker_id > 0) { + _monitor = new Monitor(Mutex::nonleaf, "Refinement monitor", true); + } else { + _monitor = DirtyCardQ_CBL_mon; + } + initialize(); create_and_start(); } +void ConcurrentG1RefineThread::initialize() { + if (_worker_id < cg1r()->worker_thread_num()) { + // Current thread activation threshold + _threshold = MIN2<int>(cg1r()->thread_threshold_step() * (_worker_id + 1) + cg1r()->green_zone(), + cg1r()->yellow_zone()); + // A thread deactivates once the number of buffer reached a deactivation threshold + _deactivation_threshold = MAX2<int>(_threshold - cg1r()->thread_threshold_step(), cg1r()->green_zone()); + } else { + set_active(true); + } +} + void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { G1CollectedHeap* g1h = G1CollectedHeap::heap(); G1CollectorPolicy* g1p = g1h->g1_policy(); if (g1p->adaptive_young_list_length()) { int regions_visited = 0; - g1h->young_list_rs_length_sampling_init(); while (g1h->young_list_rs_length_sampling_more()) { g1h->young_list_rs_length_sampling_next(); @@ -70,99 +89,121 @@ void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { } } +void ConcurrentG1RefineThread::run_young_rs_sampling() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + _vtime_start = os::elapsedVTime(); + while(!_should_terminate) { + _sts.join(); + sample_young_list_rs_lengths(); + _sts.leave(); + + if (os::supports_vtime()) { + _vtime_accum = (os::elapsedVTime() - _vtime_start); + } else { + _vtime_accum = 0.0; + } + + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + if (_should_terminate) { + break; + } + _monitor->wait(Mutex::_no_safepoint_check_flag, G1ConcRefineServiceInterval); + } +} + +void ConcurrentG1RefineThread::wait_for_completed_buffers() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + while (!_should_terminate && !is_active()) { + _monitor->wait(Mutex::_no_safepoint_check_flag); + } +} + +bool ConcurrentG1RefineThread::is_active() { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + return _worker_id > 0 ? _active : dcqs.process_completed_buffers(); +} + +void ConcurrentG1RefineThread::activate() { + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + if (_worker_id > 0) { + if (G1TraceConcurrentRefinement) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + gclog_or_tty->print_cr("G1-Refine-activated worker %d, on threshold %d, current %d", + _worker_id, _threshold, (int)dcqs.completed_buffers_num()); + } + set_active(true); + } else { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.set_process_completed(true); + } + _monitor->notify(); +} + +void ConcurrentG1RefineThread::deactivate() { + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + if (_worker_id > 0) { + if (G1TraceConcurrentRefinement) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + gclog_or_tty->print_cr("G1-Refine-deactivated worker %d, off threshold %d, current %d", + _worker_id, _deactivation_threshold, (int)dcqs.completed_buffers_num()); + } + set_active(false); + } else { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + dcqs.set_process_completed(false); + } +} + void ConcurrentG1RefineThread::run() { initialize_in_thread(); - _vtime_start = os::elapsedVTime(); wait_for_universe_init(); + if (_worker_id >= cg1r()->worker_thread_num()) { + run_young_rs_sampling(); + terminate(); + } + + _vtime_start = os::elapsedVTime(); while (!_should_terminate) { DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - // Wait for completed log buffers to exist. - { - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); - while (((_worker_id == 0 && !dcqs.process_completed_buffers()) || - (_worker_id > 0 && !is_active())) && - !_should_terminate) { - DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); - } - } + + // Wait for work + wait_for_completed_buffers(); if (_should_terminate) { - return; + break; } - // Now we take them off (this doesn't hold locks while it applies - // closures.) (If we did a full collection, then we'll do a full - // traversal. _sts.join(); - int n_logs = 0; - int lower_limit = 0; - double start_vtime_sec; // only used when G1SmoothConcRefine is on - int prev_buffer_num; // only used when G1SmoothConcRefine is on - // This thread activation threshold - int threshold = G1UpdateBufferQueueProcessingThreshold * _worker_id; - // Next thread activation threshold - int next_threshold = threshold + G1UpdateBufferQueueProcessingThreshold; - int deactivation_threshold = MAX2<int>(threshold - G1UpdateBufferQueueProcessingThreshold / 2, 0); - - if (G1SmoothConcRefine) { - lower_limit = 0; - start_vtime_sec = os::elapsedVTime(); - prev_buffer_num = (int) dcqs.completed_buffers_num(); - } else { - lower_limit = G1UpdateBufferQueueProcessingThreshold / 4; // For now. - } - while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) { - double end_vtime_sec; - double elapsed_vtime_sec; - int elapsed_vtime_ms; - int curr_buffer_num = (int) dcqs.completed_buffers_num(); - - if (G1SmoothConcRefine) { - end_vtime_sec = os::elapsedVTime(); - elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; - elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); - - if (curr_buffer_num > prev_buffer_num || - curr_buffer_num > next_threshold) { - decreaseInterval(elapsed_vtime_ms); - } else if (curr_buffer_num < prev_buffer_num) { - increaseInterval(elapsed_vtime_ms); - } + + do { + int curr_buffer_num = (int)dcqs.completed_buffers_num(); + // If the number of the buffers falls down into the yellow zone, + // that means that the transition period after the evacuation pause has ended. + if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) { + dcqs.set_completed_queue_padding(0); } - if (_worker_id == 0) { - sample_young_list_rs_lengths(); - } else if (curr_buffer_num < deactivation_threshold) { + + if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) { // If the number of the buffer has fallen below our threshold // we should deactivate. The predecessor will reactivate this // thread should the number of the buffers cross the threshold again. - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); deactivate(); - if (G1TraceConcurrentRefinement) { - gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id); - } break; } // Check if we need to activate the next thread. - if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) { - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); + if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) { _next->activate(); - DirtyCardQ_CBL_mon->notify_all(); - if (G1TraceConcurrentRefinement) { - gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id); - } } + } while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, cg1r()->green_zone())); - if (G1SmoothConcRefine) { - prev_buffer_num = curr_buffer_num; - _sts.leave(); - os::sleep(Thread::current(), (jlong) _interval_ms, false); - _sts.join(); - start_vtime_sec = os::elapsedVTime(); - } - n_logs++; + // We can exit the loop above while being active if there was a yield request. + if (is_active()) { + deactivate(); } + _sts.leave(); if (os::supports_vtime()) { @@ -172,7 +213,6 @@ void ConcurrentG1RefineThread::run() { } } assert(_should_terminate, "just checking"); - terminate(); } @@ -191,8 +231,8 @@ void ConcurrentG1RefineThread::stop() { } { - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); - DirtyCardQ_CBL_mon->notify_all(); + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); + _monitor->notify(); } { diff --git a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp index 167fc176e..b2eb5d327 100644 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp @@ -40,42 +40,36 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread { // when the number of the rset update buffer crosses a certain threshold. A successor // would self-deactivate when the number of the buffers falls below the threshold. bool _active; - ConcurrentG1RefineThread * _next; - public: - virtual void run(); - - bool is_active() { return _active; } - void activate() { _active = true; } - void deactivate() { _active = false; } + ConcurrentG1RefineThread* _next; + Monitor* _monitor; + ConcurrentG1Refine* _cg1r; - private: - ConcurrentG1Refine* _cg1r; + int _thread_threshold_step; + // This thread activation threshold + int _threshold; + // This thread deactivation threshold + int _deactivation_threshold; - double _interval_ms; + void sample_young_list_rs_lengths(); + void run_young_rs_sampling(); + void wait_for_completed_buffers(); - void decreaseInterval(int processing_time_ms) { - double min_interval_ms = (double) processing_time_ms; - _interval_ms = 0.8 * _interval_ms; - if (_interval_ms < min_interval_ms) - _interval_ms = min_interval_ms; - } - void increaseInterval(int processing_time_ms) { - double max_interval_ms = 9.0 * (double) processing_time_ms; - _interval_ms = 1.1 * _interval_ms; - if (max_interval_ms > 0 && _interval_ms > max_interval_ms) - _interval_ms = max_interval_ms; - } - - void sleepBeforeNextCycle(); + void set_active(bool x) { _active = x; } + bool is_active(); + void activate(); + void deactivate(); // For use by G1CollectedHeap, which is a friend. static SuspendibleThreadSet* sts() { return &_sts; } - public: +public: + virtual void run(); // Constructor ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next, int worker_id_offset, int worker_id); + void initialize(); + // Printing void print() const; void print_on(outputStream* st) const; @@ -83,13 +77,10 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread { // Total virtual time so far. double vtime_accum() { return _vtime_accum; } - ConcurrentG1Refine* cg1r() { return _cg1r; } - - void sample_young_list_rs_lengths(); + ConcurrentG1Refine* cg1r() { return _cg1r; } // Yield for GC - void yield(); - + void yield(); // shutdown void stop(); }; diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/src/share/vm/gc_implementation/g1/concurrentMark.cpp index af93ae7d2..c77af4141 100644 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp @@ -760,7 +760,6 @@ void ConcurrentMark::checkpointRootsInitialPost() { rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); - satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); satb_mq_set.set_active_all_threads(true); // update_g1_committed() will be called at the end of an evac pause diff --git a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp index 7372a4a78..81d8716b1 100644 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp @@ -61,8 +61,8 @@ bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl, #pragma warning( disable:4355 ) // 'this' : used in base member initializer list #endif // _MSC_VER -DirtyCardQueueSet::DirtyCardQueueSet() : - PtrQueueSet(true /*notify_when_complete*/), +DirtyCardQueueSet::DirtyCardQueueSet(bool notify_when_complete) : + PtrQueueSet(notify_when_complete), _closure(NULL), _shared_dirty_card_queue(this, true /*perm*/), _free_ids(NULL), @@ -77,12 +77,12 @@ size_t DirtyCardQueueSet::num_par_ids() { } void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, + int process_completed_threshold, int max_completed_queue, Mutex* lock, PtrQueueSet* fl_owner) { - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner); + PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, + max_completed_queue, fl_owner); set_buffer_size(G1UpdateBufferSize); - set_process_completed_threshold(G1UpdateBufferQueueProcessingThreshold); - _shared_dirty_card_queue.set_lock(lock); _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon); } @@ -154,9 +154,10 @@ bool DirtyCardQueueSet::mut_process_buffer(void** buf) { return b; } -DirtyCardQueueSet::CompletedBufferNode* -DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) { - CompletedBufferNode* nd = NULL; + +BufferNode* +DirtyCardQueueSet::get_completed_buffer(int stop_at) { + BufferNode* nd = NULL; MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); if ((int)_n_completed_buffers <= stop_at) { @@ -166,53 +167,31 @@ DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) { if (_completed_buffers_head != NULL) { nd = _completed_buffers_head; - _completed_buffers_head = nd->next; + _completed_buffers_head = nd->next(); if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL; _n_completed_buffers--; + assert(_n_completed_buffers >= 0, "Invariant"); } debug_only(assert_completed_buffer_list_len_correct_locked()); return nd; } -// We only do this in contexts where there is no concurrent enqueueing. -DirtyCardQueueSet::CompletedBufferNode* -DirtyCardQueueSet::get_completed_buffer_CAS() { - CompletedBufferNode* nd = _completed_buffers_head; - - while (nd != NULL) { - CompletedBufferNode* next = nd->next; - CompletedBufferNode* result = - (CompletedBufferNode*)Atomic::cmpxchg_ptr(next, - &_completed_buffers_head, - nd); - if (result == nd) { - return result; - } else { - nd = _completed_buffers_head; - } - } - assert(_completed_buffers_head == NULL, "Loop post"); - _completed_buffers_tail = NULL; - return NULL; -} - bool DirtyCardQueueSet:: apply_closure_to_completed_buffer_helper(int worker_i, - CompletedBufferNode* nd) { + BufferNode* nd) { if (nd != NULL) { + void **buf = BufferNode::make_buffer_from_node(nd); + size_t index = nd->index(); bool b = - DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, - nd->index, _sz, + DirtyCardQueue::apply_closure_to_buffer(_closure, buf, + index, _sz, true, worker_i); - void** buf = nd->buf; - size_t index = nd->index; - delete nd; if (b) { deallocate_buffer(buf); return true; // In normal case, go on to next buffer. } else { - enqueue_complete_buffer(buf, index, true); + enqueue_complete_buffer(buf, index); return false; } } else { @@ -222,40 +201,36 @@ apply_closure_to_completed_buffer_helper(int worker_i, bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i, int stop_at, - bool with_CAS) + bool during_pause) { - CompletedBufferNode* nd = NULL; - if (with_CAS) { - guarantee(stop_at == 0, "Precondition"); - nd = get_completed_buffer_CAS(); - } else { - nd = get_completed_buffer_lock(stop_at); - } + assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause"); + BufferNode* nd = get_completed_buffer(stop_at); bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); if (res) Atomic::inc(&_processed_buffers_rs_thread); return res; } void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() { - CompletedBufferNode* nd = _completed_buffers_head; + BufferNode* nd = _completed_buffers_head; while (nd != NULL) { bool b = - DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz, - false); + DirtyCardQueue::apply_closure_to_buffer(_closure, + BufferNode::make_buffer_from_node(nd), + 0, _sz, false); guarantee(b, "Should not stop early."); - nd = nd->next; + nd = nd->next(); } } void DirtyCardQueueSet::abandon_logs() { assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); - CompletedBufferNode* buffers_to_delete = NULL; + BufferNode* buffers_to_delete = NULL; { MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); while (_completed_buffers_head != NULL) { - CompletedBufferNode* nd = _completed_buffers_head; - _completed_buffers_head = nd->next; - nd->next = buffers_to_delete; + BufferNode* nd = _completed_buffers_head; + _completed_buffers_head = nd->next(); + nd->set_next(buffers_to_delete); buffers_to_delete = nd; } _n_completed_buffers = 0; @@ -263,10 +238,9 @@ void DirtyCardQueueSet::abandon_logs() { debug_only(assert_completed_buffer_list_len_correct_locked()); } while (buffers_to_delete != NULL) { - CompletedBufferNode* nd = buffers_to_delete; - buffers_to_delete = nd->next; - deallocate_buffer(nd->buf); - delete nd; + BufferNode* nd = buffers_to_delete; + buffers_to_delete = nd->next(); + deallocate_buffer(BufferNode::make_buffer_from_node(nd)); } // Since abandon is done only at safepoints, we can safely manipulate // these queues. diff --git a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp index 7a6f3f27b..da2c83784 100644 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp @@ -1,5 +1,5 @@ /* - * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -84,11 +84,12 @@ class DirtyCardQueueSet: public PtrQueueSet { jint _processed_buffers_rs_thread; public: - DirtyCardQueueSet(); + DirtyCardQueueSet(bool notify_when_complete = true); void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0, - Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL); + int process_completed_threshold, + int max_completed_queue, + Mutex* lock, PtrQueueSet* fl_owner = NULL); // The number of parallel ids that can be claimed to allow collector or // mutator threads to do card-processing work. @@ -120,12 +121,13 @@ public: // is returned to the completed buffer set, and this call returns false. bool apply_closure_to_completed_buffer(int worker_i = 0, int stop_at = 0, - bool with_CAS = false); + bool during_pause = false); + bool apply_closure_to_completed_buffer_helper(int worker_i, - CompletedBufferNode* nd); + BufferNode* nd); + + BufferNode* get_completed_buffer(int stop_at); - CompletedBufferNode* get_completed_buffer_CAS(); - CompletedBufferNode* get_completed_buffer_lock(int stop_at); // Applies the current closure to all completed buffers, // non-consumptively. void apply_closure_to_all_completed_buffers(); diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index 550b028ea..c60f17680 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -928,6 +928,8 @@ void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); TraceTime t(full ? "Full GC (System.gc())" : "Full GC", PrintGC, true, gclog_or_tty); + TraceMemoryManagerStats tms(true /* fullGC */); + double start = os::elapsedTime(); g1_policy()->record_full_collection_start(); @@ -1001,6 +1003,8 @@ void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs, COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); + MemoryService::track_memory_usage(); + if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { HandleMark hm; // Discard invalid handles created during verification gclog_or_tty->print(" VerifyAfterGC:"); @@ -1371,6 +1375,7 @@ void G1CollectedHeap::shrink(size_t shrink_bytes) { G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : SharedHeap(policy_), _g1_policy(policy_), + _dirty_card_queue_set(false), _ref_processor(NULL), _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)), _bot_shared(NULL), @@ -1456,8 +1461,6 @@ jint G1CollectedHeap::initialize() { Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); - // We allocate this in any case, but only do no work if the command line - // param is off. _cg1r = new ConcurrentG1Refine(); // Reserve the maximum. @@ -1590,18 +1593,20 @@ jint G1CollectedHeap::initialize() { JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon, SATB_Q_FL_lock, - 0, + G1SATBProcessCompletedThreshold, Shared_SATB_Q_lock); JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, - G1UpdateBufferQueueMaxLength, + concurrent_g1_refine()->yellow_zone(), + concurrent_g1_refine()->red_zone(), Shared_DirtyCardQ_lock); if (G1DeferredRSUpdate) { dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, - 0, + -1, // never trigger processing + -1, // no limit on length Shared_DirtyCardQ_lock, &JavaThread::dirty_card_queue_set()); } @@ -1732,13 +1737,6 @@ size_t G1CollectedHeap::unsafe_max_alloc() { return car->free(); } -void G1CollectedHeap::collect(GCCause::Cause cause) { - // The caller doesn't have the Heap_lock - assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock"); - MutexLocker ml(Heap_lock); - collect_locked(cause); -} - void G1CollectedHeap::collect_as_vm_thread(GCCause::Cause cause) { assert(Thread::current()->is_VM_thread(), "Precondition#1"); assert(Heap_lock->is_locked(), "Precondition#2"); @@ -1755,17 +1753,31 @@ void G1CollectedHeap::collect_as_vm_thread(GCCause::Cause cause) { } } +void G1CollectedHeap::collect(GCCause::Cause cause) { + // The caller doesn't have the Heap_lock + assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock"); -void G1CollectedHeap::collect_locked(GCCause::Cause cause) { - // Don't want to do a GC until cleanup is completed. - wait_for_cleanup_complete(); - - // Read the GC count while holding the Heap_lock - int gc_count_before = SharedHeap::heap()->total_collections(); + int gc_count_before; { - MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back - VM_G1CollectFull op(gc_count_before, cause); - VMThread::execute(&op); + MutexLocker ml(Heap_lock); + // Read the GC count while holding the Heap_lock + gc_count_before = SharedHeap::heap()->total_collections(); + + // Don't want to do a GC until cleanup is completed. + wait_for_cleanup_complete(); + } // We give up heap lock; VMThread::execute gets it back below + switch (cause) { + case GCCause::_scavenge_alot: { + // Do an incremental pause, which might sometimes be abandoned. + VM_G1IncCollectionPause op(gc_count_before, cause); + VMThread::execute(&op); + break; + } + default: { + // In all other cases, we currently do a full gc. + VM_G1CollectFull op(gc_count_before, cause); + VMThread::execute(&op); + } } } @@ -2119,7 +2131,7 @@ size_t G1CollectedHeap::large_typearray_limit() { } size_t G1CollectedHeap::max_capacity() const { - return _g1_committed.byte_size(); + return g1_reserved_obj_bytes(); } jlong G1CollectedHeap::millis_since_last_gc() { @@ -2638,6 +2650,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint() { } { + ResourceMark rm; + char verbose_str[128]; sprintf(verbose_str, "GC pause "); if (g1_policy()->in_young_gc_mode()) { @@ -2649,8 +2663,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint() { if (g1_policy()->should_initiate_conc_mark()) strcat(verbose_str, " (initial-mark)"); - GCCauseSetter x(this, GCCause::_g1_inc_collection_pause); - // if PrintGCDetails is on, we'll print long statistics information // in the collector policy code, so let's not print this as the output // is messy if we do. @@ -2658,7 +2670,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint() { TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty); - ResourceMark rm; + TraceMemoryManagerStats tms(false /* fullGC */); + assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint"); assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread"); guarantee(!is_gc_active(), "collection is not reentrant"); @@ -2802,6 +2815,22 @@ G1CollectedHeap::do_collection_pause_at_safepoint() { _young_list->reset_auxilary_lists(); } } else { + if (_in_cset_fast_test != NULL) { + assert(_in_cset_fast_test_base != NULL, "Since _in_cset_fast_test isn't"); + FREE_C_HEAP_ARRAY(bool, _in_cset_fast_test_base); + // this is more for peace of mind; we're nulling them here and + // we're expecting them to be null at the beginning of the next GC + _in_cset_fast_test = NULL; + _in_cset_fast_test_base = NULL; + } + // This looks confusing, because the DPT should really be empty + // at this point -- since we have not done any collection work, + // there should not be any derived pointers in the table to update; + // however, there is some additional state in the DPT which is + // reset at the end of the (null) "gc" here via the following call. + // A better approach might be to split off that state resetting work + // into a separate method that asserts that the DPT is empty and call + // that here. That is deferred for now. COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } @@ -2838,6 +2867,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint() { assert(regions_accounted_for(), "Region leakage."); + MemoryService::track_memory_usage(); + if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { HandleMark hm; // Discard invalid handles created during verification gclog_or_tty->print(" VerifyAfterGC:"); @@ -4209,10 +4240,11 @@ void G1CollectedHeap::evacuate_collection_set() { RedirtyLoggedCardTableEntryFastClosure redirty; dirty_card_queue_set().set_closure(&redirty); dirty_card_queue_set().apply_closure_to_all_completed_buffers(); - JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set()); + + DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set(); + dcq.merge_bufferlists(&dirty_card_queue_set()); assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); } - COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); } diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp index 5e0782826..3c071beed 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp @@ -692,7 +692,7 @@ public: // Reserved (g1 only; super method includes perm), capacity and the used // portion in bytes. - size_t g1_reserved_obj_bytes() { return _g1_reserved.byte_size(); } + size_t g1_reserved_obj_bytes() const { return _g1_reserved.byte_size(); } virtual size_t capacity() const; virtual size_t used() const; // This should be called when we're not holding the heap lock. The diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp index 20e8fba5d..487fb6d95 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp @@ -1516,8 +1516,30 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) { (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0; update_recent_gc_times(end_time_sec, elapsed_ms); _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum()/interval_ms; - // using 1.01 to account for floating point inaccuracies - assert(recent_avg_pause_time_ratio() < 1.01, "All GC?"); + if (recent_avg_pause_time_ratio() < 0.0 || + (recent_avg_pause_time_ratio() - 1.0 > 0.0)) { +#ifndef PRODUCT + // Dump info to allow post-facto debugging + gclog_or_tty->print_cr("recent_avg_pause_time_ratio() out of bounds"); + gclog_or_tty->print_cr("-------------------------------------------"); + gclog_or_tty->print_cr("Recent GC Times (ms):"); + _recent_gc_times_ms->dump(); + gclog_or_tty->print_cr("(End Time=%3.3f) Recent GC End Times (s):", end_time_sec); + _recent_prev_end_times_for_all_gcs_sec->dump(); + gclog_or_tty->print_cr("GC = %3.3f, Interval = %3.3f, Ratio = %3.3f", + _recent_gc_times_ms->sum(), interval_ms, recent_avg_pause_time_ratio()); + // In debug mode, terminate the JVM if the user wants to debug at this point. + assert(!G1FailOnFPError, "Debugging data for CR 6898948 has been dumped above"); +#endif // !PRODUCT + // Clip ratio between 0.0 and 1.0, and continue. This will be fixed in + // CR 6902692 by redoing the manner in which the ratio is incrementally computed. + if (_recent_avg_pause_time_ratio < 0.0) { + _recent_avg_pause_time_ratio = 0.0; + } else { + assert(_recent_avg_pause_time_ratio - 1.0 > 0.0, "Ctl-point invariant"); + _recent_avg_pause_time_ratio = 1.0; + } + } } if (G1PolicyVerbose > 1) { @@ -1892,6 +1914,10 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) { calculate_young_list_min_length(); calculate_young_list_target_config(); + // Note that _mmu_tracker->max_gc_time() returns the time in seconds. + double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSUpdatePauseFractionPercent / 100.0; + adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms); + // </NEW PREDICTION> _target_pause_time_ms = -1.0; @@ -1899,6 +1925,47 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) { // <NEW PREDICTION> +void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time, + double update_rs_processed_buffers, + double goal_ms) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + ConcurrentG1Refine *cg1r = G1CollectedHeap::heap()->concurrent_g1_refine(); + + if (G1AdaptiveConcRefine) { + const int k_gy = 3, k_gr = 6; + const double inc_k = 1.1, dec_k = 0.9; + + int g = cg1r->green_zone(); + if (update_rs_time > goal_ms) { + g = (int)(g * dec_k); // Can become 0, that's OK. That would mean a mutator-only processing. + } else { + if (update_rs_time < goal_ms && update_rs_processed_buffers > g) { + g = (int)MAX2(g * inc_k, g + 1.0); + } + } + // Change the refinement threads params + cg1r->set_green_zone(g); + cg1r->set_yellow_zone(g * k_gy); + cg1r->set_red_zone(g * k_gr); + cg1r->reinitialize_threads(); + + int processing_threshold_delta = MAX2((int)(cg1r->green_zone() * sigma()), 1); + int processing_threshold = MIN2(cg1r->green_zone() + processing_threshold_delta, + cg1r->yellow_zone()); + // Change the barrier params + dcqs.set_process_completed_threshold(processing_threshold); + dcqs.set_max_completed_queue(cg1r->red_zone()); + } + + int curr_queue_size = dcqs.completed_buffers_num(); + if (curr_queue_size >= cg1r->yellow_zone()) { + dcqs.set_completed_queue_padding(curr_queue_size); + } else { + dcqs.set_completed_queue_padding(0); + } + dcqs.notify_if_necessary(); +} + double G1CollectorPolicy:: predict_young_collection_elapsed_time_ms(size_t adjustment) { @@ -2825,8 +2892,15 @@ choose_collection_set() { double non_young_start_time_sec; start_recording_regions(); - guarantee(_target_pause_time_ms > -1.0, + guarantee(_target_pause_time_ms > -1.0 + NOT_PRODUCT(|| Universe::heap()->gc_cause() == GCCause::_scavenge_alot), "_target_pause_time_ms should have been set!"); +#ifndef PRODUCT + if (_target_pause_time_ms <= -1.0) { + assert(ScavengeALot && Universe::heap()->gc_cause() == GCCause::_scavenge_alot, "Error"); + _target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; + } +#endif assert(_collection_set == NULL, "Precondition"); double base_time_ms = predict_base_elapsed_time_ms(_pending_cards); @@ -2972,7 +3046,3 @@ record_collection_pause_end(bool abandoned) { G1CollectorPolicy::record_collection_pause_end(abandoned); assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end."); } - -// Local Variables: *** -// c-indentation-style: gnu *** -// End: *** diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp index 0dff91a0b..2b9eb83f0 100644 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp @@ -316,6 +316,10 @@ private: bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group); #endif // PRODUCT + void adjust_concurrent_refinement(double update_rs_time, + double update_rs_processed_buffers, + double goal_ms); + protected: double _pause_time_target_ms; double _recorded_young_cset_choice_time_ms; diff --git a/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp b/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp index b0d0df45d..492adaef6 100644 --- a/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp +++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp @@ -86,12 +86,22 @@ void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) { // increase the array size (:-) // remove the oldest entry (this might allow more GC time for // the time slice than what's allowed) - // concolidate the two entries with the minimum gap between them - // (this mighte allow less GC time than what's allowed) - guarantee(0, "array full, currently we can't recover"); + // consolidate the two entries with the minimum gap between them + // (this might allow less GC time than what's allowed) + guarantee(NOT_PRODUCT(ScavengeALot ||) G1ForgetfulMMUTracker, + "array full, currently we can't recover unless +G1ForgetfulMMUTracker"); + // In the case where ScavengeALot is true, such overflow is not + // uncommon; in such cases, we can, without much loss of precision + // or performance (we are GC'ing most of the time anyway!), + // simply overwrite the oldest entry in the tracker: this + // is also the behaviour when G1ForgetfulMMUTracker is enabled. + _head_index = trim_index(_head_index + 1); + assert(_head_index == _tail_index, "Because we have a full circular buffer"); + _tail_index = trim_index(_tail_index + 1); + } else { + _head_index = trim_index(_head_index + 1); + ++_no_entries; } - _head_index = trim_index(_head_index + 1); - ++_no_entries; _array[_head_index] = G1MMUTrackerQueueElem(start, end); } diff --git a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp index 0eff2c386..1030454a7 100644 --- a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp +++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp @@ -99,7 +99,10 @@ private: // The array is of fixed size and I don't think we'll need more than // two or three entries with the current behaviour of G1 pauses. // If the array is full, an easy fix is to look for the pauses with - // the shortest gap between them and concolidate them. + // the shortest gap between them and consolidate them. + // For now, we have taken the expedient alternative of forgetting + // the oldest entry in the event that +G1ForgetfulMMUTracker, thus + // potentially violating MMU specs for some time thereafter. G1MMUTrackerQueueElem _array[QueueLength]; int _head_index; diff --git a/src/share/vm/gc_implementation/g1/g1_globals.hpp b/src/share/vm/gc_implementation/g1/g1_globals.hpp index c941c8755..85f3841c1 100644 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp @@ -85,7 +85,7 @@ diagnostic(bool, G1SummarizeZFStats, false, \ "Summarize zero-filling info") \ \ - develop(bool, G1TraceConcurrentRefinement, false, \ + diagnostic(bool, G1TraceConcurrentRefinement, false, \ "Trace G1 concurrent refinement") \ \ product(intx, G1MarkStackSize, 2 * 1024 * 1024, \ @@ -94,19 +94,6 @@ product(intx, G1MarkRegionStackSize, 1024 * 1024, \ "Size of the region stack for concurrent marking.") \ \ - develop(bool, G1ConcRefine, true, \ - "If true, run concurrent rem set refinement for G1") \ - \ - develop(intx, G1ConcRefineTargTraversals, 4, \ - "Number of concurrent refinement we try to achieve") \ - \ - develop(intx, G1ConcRefineInitialDelta, 4, \ - "Number of heap regions of alloc ahead of starting collection " \ - "pause to start concurrent refinement (initially)") \ - \ - develop(bool, G1SmoothConcRefine, true, \ - "Attempts to smooth out the overhead of concurrent refinement") \ - \ develop(bool, G1ConcZeroFill, true, \ "If true, run concurrent zero-filling thread") \ \ @@ -178,13 +165,38 @@ product(intx, G1UpdateBufferSize, 256, \ "Size of an update buffer") \ \ - product(intx, G1UpdateBufferQueueProcessingThreshold, 5, \ + product(intx, G1ConcRefineYellowZone, 0, \ "Number of enqueued update buffers that will " \ - "trigger concurrent processing") \ + "trigger concurrent processing. Will be selected ergonomically " \ + "by default.") \ \ - product(intx, G1UpdateBufferQueueMaxLength, 30, \ + product(intx, G1ConcRefineRedZone, 0, \ "Maximum number of enqueued update buffers before mutator " \ - "threads start processing new ones instead of enqueueing them") \ + "threads start processing new ones instead of enqueueing them. " \ + "Will be selected ergonomically by default. Zero will disable " \ + "concurrent processing.") \ + \ + product(intx, G1ConcRefineGreenZone, 0, \ + "The number of update buffers that are left in the queue by the " \ + "concurrent processing threads. Will be selected ergonomically " \ + "by default.") \ + \ + product(intx, G1ConcRefineServiceInterval, 300, \ + "The last concurrent refinement thread wakes up every " \ + "specified number of milliseconds to do miscellaneous work.") \ + \ + product(intx, G1ConcRefineThresholdStep, 0, \ + "Each time the rset update queue increases by this amount " \ + "activate the next refinement thread if available. " \ + "Will be selected ergonomically by default.") \ + \ + product(intx, G1RSUpdatePauseFractionPercent, 10, \ + "A target percentage of time that is allowed to be spend on " \ + "process RS update buffers during the collection pause.") \ + \ + product(bool, G1AdaptiveConcRefine, true, \ + "Select green, yellow and red zones adaptively to meet the " \ + "the pause requirements.") \ \ develop(intx, G1ConcRSLogCacheSize, 10, \ "Log base 2 of the length of conc RS hot-card cache.") \ @@ -242,6 +254,10 @@ product(bool, G1UseSurvivorSpaces, true, \ "When true, use survivor space.") \ \ + develop(bool, G1FailOnFPError, false, \ + "When set, G1 will fail when it encounters an FP 'error', " \ + "so as to allow debugging") \ + \ develop(bool, G1FixedTenuringThreshold, false, \ "When set, G1 will not adjust the tenuring threshold") \ \ @@ -252,6 +268,9 @@ "If non-0 is the size of the G1 survivor space, " \ "otherwise SurvivorRatio is used to determine the size") \ \ + product(bool, G1ForgetfulMMUTracker, false, \ + "If the MMU tracker's memory is full, forget the oldest entry") \ + \ product(uintx, G1HeapRegionSize, 0, \ "Size of the G1 regions.") \ \ diff --git a/src/share/vm/gc_implementation/g1/ptrQueue.cpp b/src/share/vm/gc_implementation/g1/ptrQueue.cpp index 060743dd3..50a1967bb 100644 --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp @@ -64,8 +64,8 @@ void PtrQueue::enqueue_known_active(void* ptr) { while (_index == 0) { handle_zero_index(); } - assert(_index > 0, "postcondition"); + assert(_index > 0, "postcondition"); _index -= oopSize; _buf[byte_index_to_index((int)_index)] = ptr; assert(0 <= _index && _index <= _sz, "Invariant."); @@ -99,94 +99,110 @@ void** PtrQueueSet::allocate_buffer() { assert(_sz > 0, "Didn't set a buffer size."); MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); if (_fl_owner->_buf_free_list != NULL) { - void** res = _fl_owner->_buf_free_list; - _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0]; + void** res = BufferNode::make_buffer_from_node(_fl_owner->_buf_free_list); + _fl_owner->_buf_free_list = _fl_owner->_buf_free_list->next(); _fl_owner->_buf_free_list_sz--; - // Just override the next pointer with NULL, just in case we scan this part - // of the buffer. - res[0] = NULL; return res; } else { - return NEW_C_HEAP_ARRAY(void*, _sz); + // Allocate space for the BufferNode in front of the buffer. + char *b = NEW_C_HEAP_ARRAY(char, _sz + BufferNode::aligned_size()); + return BufferNode::make_buffer_from_block(b); } } void PtrQueueSet::deallocate_buffer(void** buf) { assert(_sz > 0, "Didn't set a buffer size."); MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); - buf[0] = (void*)_fl_owner->_buf_free_list; - _fl_owner->_buf_free_list = buf; + BufferNode *node = BufferNode::make_node_from_buffer(buf); + node->set_next(_fl_owner->_buf_free_list); + _fl_owner->_buf_free_list = node; _fl_owner->_buf_free_list_sz++; } void PtrQueueSet::reduce_free_list() { + assert(_fl_owner == this, "Free list reduction is allowed only for the owner"); // For now we'll adopt the strategy of deleting half. MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); size_t n = _buf_free_list_sz / 2; while (n > 0) { assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong."); - void** head = _buf_free_list; - _buf_free_list = (void**)_buf_free_list[0]; - FREE_C_HEAP_ARRAY(void*,head); + void* b = BufferNode::make_block_from_node(_buf_free_list); + _buf_free_list = _buf_free_list->next(); + FREE_C_HEAP_ARRAY(char, b); + _buf_free_list_sz --; n--; } } -void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) { - // I use explicit locking here because there's a bailout in the middle. - _cbl_mon->lock_without_safepoint_check(); - - Thread* thread = Thread::current(); - assert( ignore_max_completed || - thread->is_Java_thread() || - SafepointSynchronize::is_at_safepoint(), - "invariant" ); - ignore_max_completed = ignore_max_completed || !thread->is_Java_thread(); - - if (!ignore_max_completed && _max_completed_queue > 0 && - _n_completed_buffers >= (size_t) _max_completed_queue) { - _cbl_mon->unlock(); - bool b = mut_process_buffer(buf); - if (b) { - deallocate_buffer(buf); - return; +void PtrQueue::handle_zero_index() { + assert(0 == _index, "Precondition."); + // This thread records the full buffer and allocates a new one (while + // holding the lock if there is one). + if (_buf != NULL) { + if (_lock) { + locking_enqueue_completed_buffer(_buf); + } else { + if (qset()->process_or_enqueue_complete_buffer(_buf)) { + // Recycle the buffer. No allocation. + _sz = qset()->buffer_size(); + _index = _sz; + return; + } } + } + // Reallocate the buffer + _buf = qset()->allocate_buffer(); + _sz = qset()->buffer_size(); + _index = _sz; + assert(0 <= _index && _index <= _sz, "Invariant."); +} - // Otherwise, go ahead and enqueue the buffer. Must reaquire the lock. - _cbl_mon->lock_without_safepoint_check(); +bool PtrQueueSet::process_or_enqueue_complete_buffer(void** buf) { + if (Thread::current()->is_Java_thread()) { + // We don't lock. It is fine to be epsilon-precise here. + if (_max_completed_queue == 0 || _max_completed_queue > 0 && + _n_completed_buffers >= _max_completed_queue + _completed_queue_padding) { + bool b = mut_process_buffer(buf); + if (b) { + // True here means that the buffer hasn't been deallocated and the caller may reuse it. + return true; + } + } } + // The buffer will be enqueued. The caller will have to get a new one. + enqueue_complete_buffer(buf); + return false; +} - // Here we still hold the _cbl_mon. - CompletedBufferNode* cbn = new CompletedBufferNode; - cbn->buf = buf; - cbn->next = NULL; - cbn->index = index; +void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index) { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + BufferNode* cbn = BufferNode::new_from_buffer(buf); + cbn->set_index(index); if (_completed_buffers_tail == NULL) { assert(_completed_buffers_head == NULL, "Well-formedness"); _completed_buffers_head = cbn; _completed_buffers_tail = cbn; } else { - _completed_buffers_tail->next = cbn; + _completed_buffers_tail->set_next(cbn); _completed_buffers_tail = cbn; } _n_completed_buffers++; - if (!_process_completed && + if (!_process_completed && _process_completed_threshold >= 0 && _n_completed_buffers >= _process_completed_threshold) { _process_completed = true; if (_notify_when_complete) - _cbl_mon->notify_all(); + _cbl_mon->notify(); } debug_only(assert_completed_buffer_list_len_correct_locked()); - _cbl_mon->unlock(); } int PtrQueueSet::completed_buffers_list_length() { int n = 0; - CompletedBufferNode* cbn = _completed_buffers_head; + BufferNode* cbn = _completed_buffers_head; while (cbn != NULL) { n++; - cbn = cbn->next; + cbn = cbn->next(); } return n; } @@ -197,7 +213,7 @@ void PtrQueueSet::assert_completed_buffer_list_len_correct() { } void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() { - guarantee((size_t)completed_buffers_list_length() == _n_completed_buffers, + guarantee(completed_buffers_list_length() == _n_completed_buffers, "Completed buffer length is wrong."); } @@ -206,12 +222,8 @@ void PtrQueueSet::set_buffer_size(size_t sz) { _sz = sz * oopSize; } -void PtrQueueSet::set_process_completed_threshold(size_t sz) { - _process_completed_threshold = sz; -} - -// Merge lists of buffers. Notify waiting threads if the length of the list -// exceeds threshold. The source queue is emptied as a result. The queues +// Merge lists of buffers. Notify the processing threads. +// The source queue is emptied as a result. The queues // must share the monitor. void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); @@ -223,7 +235,7 @@ void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { } else { assert(_completed_buffers_head != NULL, "Well formedness"); if (src->_completed_buffers_head != NULL) { - _completed_buffers_tail->next = src->_completed_buffers_head; + _completed_buffers_tail->set_next(src->_completed_buffers_head); _completed_buffers_tail = src->_completed_buffers_tail; } } @@ -236,31 +248,13 @@ void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || _completed_buffers_head != NULL && _completed_buffers_tail != NULL, "Sanity"); +} - if (!_process_completed && - _n_completed_buffers >= _process_completed_threshold) { +void PtrQueueSet::notify_if_necessary() { + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); + if (_n_completed_buffers >= _process_completed_threshold || _max_completed_queue == 0) { _process_completed = true; if (_notify_when_complete) - _cbl_mon->notify_all(); - } -} - -// Merge free lists of the two queues. The free list of the source -// queue is emptied as a result. The queues must share the same -// mutex that guards free lists. -void PtrQueueSet::merge_freelists(PtrQueueSet* src) { - assert(_fl_lock == src->_fl_lock, "Should share the same lock"); - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); - if (_buf_free_list != NULL) { - void **p = _buf_free_list; - while (*p != NULL) { - p = (void**)*p; - } - *p = src->_buf_free_list; - } else { - _buf_free_list = src->_buf_free_list; + _cbl_mon->notify(); } - _buf_free_list_sz += src->_buf_free_list_sz; - src->_buf_free_list = NULL; - src->_buf_free_list_sz = 0; } diff --git a/src/share/vm/gc_implementation/g1/ptrQueue.hpp b/src/share/vm/gc_implementation/g1/ptrQueue.hpp index 663dcba4f..ccf5b207c 100644 --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp @@ -27,8 +27,10 @@ // the addresses of modified old-generation objects. This type supports // this operation. -class PtrQueueSet; +// The definition of placement operator new(size_t, void*) in the <new>. +#include <new> +class PtrQueueSet; class PtrQueue VALUE_OBJ_CLASS_SPEC { protected: @@ -77,7 +79,7 @@ public: else enqueue_known_active(ptr); } - inline void handle_zero_index(); + void handle_zero_index(); void locking_enqueue_completed_buffer(void** buf); void enqueue_known_active(void* ptr); @@ -126,34 +128,65 @@ public: }; +class BufferNode { + size_t _index; + BufferNode* _next; +public: + BufferNode() : _index(0), _next(NULL) { } + BufferNode* next() const { return _next; } + void set_next(BufferNode* n) { _next = n; } + size_t index() const { return _index; } + void set_index(size_t i) { _index = i; } + + // Align the size of the structure to the size of the pointer + static size_t aligned_size() { + static const size_t alignment = round_to(sizeof(BufferNode), sizeof(void*)); + return alignment; + } + + // BufferNode is allocated before the buffer. + // The chunk of memory that holds both of them is a block. + + // Produce a new BufferNode given a buffer. + static BufferNode* new_from_buffer(void** buf) { + return new (make_block_from_buffer(buf)) BufferNode; + } + + // The following are the required conversion routines: + static BufferNode* make_node_from_buffer(void** buf) { + return (BufferNode*)make_block_from_buffer(buf); + } + static void** make_buffer_from_node(BufferNode *node) { + return make_buffer_from_block(node); + } + static void* make_block_from_node(BufferNode *node) { + return (void*)node; + } + static void** make_buffer_from_block(void* p) { + return (void**)((char*)p + aligned_size()); + } + static void* make_block_from_buffer(void** p) { + return (void*)((char*)p - aligned_size()); + } +}; + // A PtrQueueSet represents resources common to a set of pointer queues. // In particular, the individual queues allocate buffers from this shared // set, and return completed buffers to the set. // All these variables are are protected by the TLOQ_CBL_mon. XXX ??? class PtrQueueSet VALUE_OBJ_CLASS_SPEC { - protected: - - class CompletedBufferNode: public CHeapObj { - public: - void** buf; - size_t index; - CompletedBufferNode* next; - CompletedBufferNode() : buf(NULL), - index(0), next(NULL){ } - }; - Monitor* _cbl_mon; // Protects the fields below. - CompletedBufferNode* _completed_buffers_head; - CompletedBufferNode* _completed_buffers_tail; - size_t _n_completed_buffers; - size_t _process_completed_threshold; + BufferNode* _completed_buffers_head; + BufferNode* _completed_buffers_tail; + int _n_completed_buffers; + int _process_completed_threshold; volatile bool _process_completed; // This (and the interpretation of the first element as a "next" // pointer) are protected by the TLOQ_FL_lock. Mutex* _fl_lock; - void** _buf_free_list; + BufferNode* _buf_free_list; size_t _buf_free_list_sz; // Queue set can share a freelist. The _fl_owner variable // specifies the owner. It is set to "this" by default. @@ -170,6 +203,7 @@ protected: // Maximum number of elements allowed on completed queue: after that, // enqueuer does the work itself. Zero indicates no maximum. int _max_completed_queue; + int _completed_queue_padding; int completed_buffers_list_length(); void assert_completed_buffer_list_len_correct_locked(); @@ -191,9 +225,12 @@ public: // Because of init-order concerns, we can't pass these as constructor // arguments. void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0, + int process_completed_threshold, + int max_completed_queue, PtrQueueSet *fl_owner = NULL) { _max_completed_queue = max_completed_queue; + _process_completed_threshold = process_completed_threshold; + _completed_queue_padding = 0; assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); _cbl_mon = cbl_mon; _fl_lock = fl_lock; @@ -208,14 +245,17 @@ public: void deallocate_buffer(void** buf); // Declares that "buf" is a complete buffer. - void enqueue_complete_buffer(void** buf, size_t index = 0, - bool ignore_max_completed = false); + void enqueue_complete_buffer(void** buf, size_t index = 0); + + // To be invoked by the mutator. + bool process_or_enqueue_complete_buffer(void** buf); bool completed_buffers_exist_dirty() { return _n_completed_buffers > 0; } bool process_completed_buffers() { return _process_completed; } + void set_process_completed(bool x) { _process_completed = x; } bool active() { return _all_active; } @@ -226,15 +266,24 @@ public: // Get the buffer size. size_t buffer_size() { return _sz; } - // Set the number of completed buffers that triggers log processing. - void set_process_completed_threshold(size_t sz); + // Get/Set the number of completed buffers that triggers log processing. + void set_process_completed_threshold(int sz) { _process_completed_threshold = sz; } + int process_completed_threshold() const { return _process_completed_threshold; } // Must only be called at a safe point. Indicates that the buffer free // list size may be reduced, if that is deemed desirable. void reduce_free_list(); - size_t completed_buffers_num() { return _n_completed_buffers; } + int completed_buffers_num() { return _n_completed_buffers; } void merge_bufferlists(PtrQueueSet* src); - void merge_freelists(PtrQueueSet* src); + + void set_max_completed_queue(int m) { _max_completed_queue = m; } + int max_completed_queue() { return _max_completed_queue; } + + void set_completed_queue_padding(int padding) { _completed_queue_padding = padding; } + int completed_queue_padding() { return _completed_queue_padding; } + + // Notify the consumer if the number of buffers crossed the threshold + void notify_if_necessary(); }; diff --git a/src/share/vm/gc_implementation/g1/satbQueue.cpp b/src/share/vm/gc_implementation/g1/satbQueue.cpp index 3cf402ce7..8efdc3b9c 100644 --- a/src/share/vm/gc_implementation/g1/satbQueue.cpp +++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp @@ -67,9 +67,9 @@ SATBMarkQueueSet::SATBMarkQueueSet() : {} void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue, + int process_completed_threshold, Mutex* lock) { - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); + PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, -1); _shared_satb_queue.set_lock(lock); if (ParallelGCThreads > 0) { _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads); @@ -122,12 +122,12 @@ void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) { bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, int worker) { - CompletedBufferNode* nd = NULL; + BufferNode* nd = NULL; { MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); if (_completed_buffers_head != NULL) { nd = _completed_buffers_head; - _completed_buffers_head = nd->next; + _completed_buffers_head = nd->next(); if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL; _n_completed_buffers--; if (_n_completed_buffers == 0) _process_completed = false; @@ -135,9 +135,9 @@ bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, } ObjectClosure* cl = (par ? _par_closures[worker] : _closure); if (nd != NULL) { - ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz); - deallocate_buffer(nd->buf); - delete nd; + void **buf = BufferNode::make_buffer_from_node(nd); + ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz); + deallocate_buffer(buf); return true; } else { return false; @@ -145,13 +145,13 @@ bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, } void SATBMarkQueueSet::abandon_partial_marking() { - CompletedBufferNode* buffers_to_delete = NULL; + BufferNode* buffers_to_delete = NULL; { MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); while (_completed_buffers_head != NULL) { - CompletedBufferNode* nd = _completed_buffers_head; - _completed_buffers_head = nd->next; - nd->next = buffers_to_delete; + BufferNode* nd = _completed_buffers_head; + _completed_buffers_head = nd->next(); + nd->set_next(buffers_to_delete); buffers_to_delete = nd; } _completed_buffers_tail = NULL; @@ -159,10 +159,9 @@ void SATBMarkQueueSet::abandon_partial_marking() { DEBUG_ONLY(assert_completed_buffer_list_len_correct_locked()); } while (buffers_to_delete != NULL) { - CompletedBufferNode* nd = buffers_to_delete; - buffers_to_delete = nd->next; - deallocate_buffer(nd->buf); - delete nd; + BufferNode* nd = buffers_to_delete; + buffers_to_delete = nd->next(); + deallocate_buffer(BufferNode::make_buffer_from_node(nd)); } assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); // So we can safely manipulate these queues. diff --git a/src/share/vm/gc_implementation/g1/satbQueue.hpp b/src/share/vm/gc_implementation/g1/satbQueue.hpp index ed1181dd7..76218a636 100644 --- a/src/share/vm/gc_implementation/g1/satbQueue.hpp +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp @@ -60,8 +60,8 @@ public: SATBMarkQueueSet(); void initialize(Monitor* cbl_mon, Mutex* fl_lock, - int max_completed_queue = 0, - Mutex* lock = NULL); + int process_completed_threshold, + Mutex* lock); static void handle_zero_index_for_thread(JavaThread* t); diff --git a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp index e59dbe483..2137efa4e 100644 --- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp @@ -42,7 +42,7 @@ void VM_G1CollectFull::doit() { void VM_G1IncCollectionPause::doit() { JvmtiGCForAllocationMarker jgcm; G1CollectedHeap* g1h = G1CollectedHeap::heap(); - GCCauseSetter x(g1h, GCCause::_g1_inc_collection_pause); + GCCauseSetter x(g1h, _gc_cause); g1h->do_collection_pause_at_safepoint(); } diff --git a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp index 6cf0605ec..95dda3844 100644 --- a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp +++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp @@ -68,8 +68,9 @@ class VM_G1CollectForAllocation: public VM_GC_Operation { class VM_G1IncCollectionPause: public VM_GC_Operation { public: - VM_G1IncCollectionPause(int gc_count_before) : - VM_GC_Operation(gc_count_before) {} + VM_G1IncCollectionPause(int gc_count_before, + GCCause::Cause gc_cause = GCCause::_g1_inc_collection_pause) : + VM_GC_Operation(gc_count_before) { _gc_cause = gc_cause; } virtual VMOp_Type type() const { return VMOp_G1IncCollectionPause; } virtual void doit(); virtual const char* name() const { diff --git a/src/share/vm/gc_implementation/includeDB_gc_g1 b/src/share/vm/gc_implementation/includeDB_gc_g1 index 63bfbce76..60531af90 100644 --- a/src/share/vm/gc_implementation/includeDB_gc_g1 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 @@ -109,7 +109,6 @@ dirtyCardQueue.cpp atomic.hpp dirtyCardQueue.cpp dirtyCardQueue.hpp dirtyCardQueue.cpp heapRegionRemSet.hpp dirtyCardQueue.cpp mutexLocker.hpp -dirtyCardQueue.cpp ptrQueue.inline.hpp dirtyCardQueue.cpp safepoint.hpp dirtyCardQueue.cpp thread.hpp dirtyCardQueue.cpp thread_<os_family>.inline.hpp @@ -222,6 +221,15 @@ g1MarkSweep.hpp oop.hpp g1MarkSweep.hpp timer.hpp g1MarkSweep.hpp universe.hpp +g1MemoryPool.cpp heapRegion.hpp +g1MemoryPool.cpp g1CollectedHeap.inline.hpp +g1MemoryPool.cpp g1CollectedHeap.hpp +g1MemoryPool.cpp g1CollectorPolicy.hpp +g1MemoryPool.cpp g1MemoryPool.hpp + +g1MemoryPool.hpp memoryUsage.hpp +g1MemoryPool.hpp memoryPool.hpp + g1OopClosures.inline.hpp concurrentMark.hpp g1OopClosures.inline.hpp g1OopClosures.hpp g1OopClosures.inline.hpp g1CollectedHeap.hpp @@ -303,12 +311,13 @@ heapRegionSeq.inline.hpp heapRegionSeq.hpp klass.hpp g1OopClosures.hpp +memoryService.cpp g1MemoryPool.hpp + ptrQueue.cpp allocation.hpp ptrQueue.cpp allocation.inline.hpp ptrQueue.cpp mutex.hpp ptrQueue.cpp mutexLocker.hpp ptrQueue.cpp ptrQueue.hpp -ptrQueue.cpp ptrQueue.inline.hpp ptrQueue.cpp thread_<os_family>.inline.hpp ptrQueue.hpp allocation.hpp @@ -318,7 +327,6 @@ ptrQueue.inline.hpp ptrQueue.hpp satbQueue.cpp allocation.inline.hpp satbQueue.cpp mutexLocker.hpp -satbQueue.cpp ptrQueue.inline.hpp satbQueue.cpp satbQueue.hpp satbQueue.cpp sharedHeap.hpp satbQueue.cpp thread.hpp diff --git a/src/share/vm/includeDB_compiler2 b/src/share/vm/includeDB_compiler2 index 6ba7bfaf8..abe70d1f7 100644 --- a/src/share/vm/includeDB_compiler2 +++ b/src/share/vm/includeDB_compiler2 @@ -149,6 +149,7 @@ c2compiler.cpp runtime.hpp c2compiler.hpp abstractCompiler.hpp callGenerator.cpp addnode.hpp +callGenerator.cpp bcEscapeAnalyzer.hpp callGenerator.cpp callGenerator.hpp callGenerator.cpp callnode.hpp callGenerator.cpp cfgnode.hpp @@ -321,6 +322,7 @@ compile.cpp phaseX.hpp compile.cpp rootnode.hpp compile.cpp runtime.hpp compile.cpp signature.hpp +compile.cpp stringopts.hpp compile.cpp stubRoutines.hpp compile.cpp systemDictionary.hpp compile.cpp timer.hpp @@ -476,12 +478,16 @@ graphKit.cpp rootnode.hpp graphKit.cpp runtime.hpp graphKit.cpp sharedRuntime.hpp +graphKit.hpp addnode.hpp graphKit.hpp callnode.hpp graphKit.hpp cfgnode.hpp graphKit.hpp ciEnv.hpp +graphKit.hpp divnode.hpp graphKit.hpp compile.hpp graphKit.hpp deoptimization.hpp graphKit.hpp phaseX.hpp +graphKit.hpp mulnode.hpp +graphKit.hpp subnode.hpp graphKit.hpp type.hpp idealKit.cpp addnode.hpp @@ -490,7 +496,10 @@ idealKit.cpp cfgnode.hpp idealKit.cpp idealKit.hpp idealKit.cpp runtime.hpp +idealKit.hpp addnode.hpp +idealKit.hpp cfgnode.hpp idealKit.hpp connode.hpp +idealKit.hpp divnode.hpp idealKit.hpp mulnode.hpp idealKit.hpp phaseX.hpp idealKit.hpp subnode.hpp @@ -641,6 +650,7 @@ macro.cpp addnode.hpp macro.cpp callnode.hpp macro.cpp cfgnode.hpp macro.cpp compile.hpp +macro.cpp compileLog.hpp macro.cpp connode.hpp macro.cpp locknode.hpp macro.cpp loopnode.hpp @@ -993,6 +1003,21 @@ split_if.cpp callnode.hpp split_if.cpp connode.hpp split_if.cpp loopnode.hpp +stringopts.hpp phaseX.hpp +stringopts.hpp node.hpp + +stringopts.cpp addnode.hpp +stringopts.cpp callnode.hpp +stringopts.cpp callGenerator.hpp +stringopts.cpp compileLog.hpp +stringopts.cpp divnode.hpp +stringopts.cpp idealKit.hpp +stringopts.cpp graphKit.hpp +stringopts.cpp rootnode.hpp +stringopts.cpp runtime.hpp +stringopts.cpp subnode.hpp +stringopts.cpp stringopts.hpp + stubGenerator_<arch_model>.cpp runtime.hpp stubRoutines.cpp runtime.hpp diff --git a/src/share/vm/includeDB_core b/src/share/vm/includeDB_core index 1a5f3ee30..6b0cf03f1 100644 --- a/src/share/vm/includeDB_core +++ b/src/share/vm/includeDB_core @@ -289,7 +289,7 @@ attachListener.hpp allocation.hpp attachListener.hpp debug.hpp attachListener.hpp ostream.hpp -barrierSet.cpp barrierSet.hpp +barrierSet.cpp barrierSet.inline.hpp barrierSet.cpp collectedHeap.hpp barrierSet.cpp universe.hpp @@ -570,6 +570,7 @@ ciEnv.hpp debugInfoRec.hpp ciEnv.hpp dependencies.hpp ciEnv.hpp exceptionHandlerTable.hpp ciEnv.hpp oopMap.hpp +ciEnv.hpp systemDictionary.hpp ciEnv.hpp thread.hpp ciExceptionHandler.cpp ciExceptionHandler.hpp diff --git a/src/share/vm/memory/barrierSet.cpp b/src/share/vm/memory/barrierSet.cpp index 07ff7a0fc..8cd80b7c4 100644 --- a/src/share/vm/memory/barrierSet.cpp +++ b/src/share/vm/memory/barrierSet.cpp @@ -41,11 +41,6 @@ void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) { // count is number of array elements being written void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) { - assert(count <= (size_t)max_intx, "count too large"); - HeapWord* end = start + objArrayOopDesc::array_size((int)count); -#if 0 - warning("Post:\t" INTPTR_FORMAT "[" SIZE_FORMAT "] : [" INTPTR_FORMAT","INTPTR_FORMAT")\t", - start, count, start, end); -#endif - Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, end)); + // simply delegate to instance method + Universe::heap()->barrier_set()->write_ref_array(start, count); } diff --git a/src/share/vm/memory/barrierSet.hpp b/src/share/vm/memory/barrierSet.hpp index 0fc6a0061..c624f2506 100644 --- a/src/share/vm/memory/barrierSet.hpp +++ b/src/share/vm/memory/barrierSet.hpp @@ -121,17 +121,20 @@ public: virtual void read_ref_array(MemRegion mr) = 0; virtual void read_prim_array(MemRegion mr) = 0; + // Below length is the # array elements being written virtual void write_ref_array_pre( oop* dst, int length) {} virtual void write_ref_array_pre(narrowOop* dst, int length) {} + // Below MemRegion mr is expected to be HeapWord-aligned inline void write_ref_array(MemRegion mr); + // Below count is the # array elements being written, starting + // at the address "start", which may not necessarily be HeapWord-aligned + inline void write_ref_array(HeapWord* start, size_t count); - // Static versions, suitable for calling from generated code. + // Static versions, suitable for calling from generated code; + // count is # array elements being written, starting with "start", + // which may not necessarily be HeapWord-aligned. static void static_write_ref_array_pre(HeapWord* start, size_t count); static void static_write_ref_array_post(HeapWord* start, size_t count); - // Narrow oop versions of the above; count is # of array elements being written, - // starting with "start", which is HeapWord-aligned. - static void static_write_ref_array_pre_narrow(HeapWord* start, size_t count); - static void static_write_ref_array_post_narrow(HeapWord* start, size_t count); protected: virtual void write_ref_array_work(MemRegion mr) = 0; diff --git a/src/share/vm/memory/barrierSet.inline.hpp b/src/share/vm/memory/barrierSet.inline.hpp index edcb551bd..ddc398348 100644 --- a/src/share/vm/memory/barrierSet.inline.hpp +++ b/src/share/vm/memory/barrierSet.inline.hpp @@ -43,6 +43,8 @@ void BarrierSet::write_ref_field(void* field, oop new_val) { } void BarrierSet::write_ref_array(MemRegion mr) { + assert((HeapWord*)align_size_down((uintptr_t)mr.start(), HeapWordSize) == mr.start() , "Unaligned start"); + assert((HeapWord*)align_size_up ((uintptr_t)mr.end(), HeapWordSize) == mr.end(), "Unaligned end" ); if (kind() == CardTableModRef) { ((CardTableModRefBS*)this)->inline_write_ref_array(mr); } else { @@ -50,6 +52,34 @@ void BarrierSet::write_ref_array(MemRegion mr) { } } +// count is number of array elements being written +void BarrierSet::write_ref_array(HeapWord* start, size_t count) { + assert(count <= (size_t)max_intx, "count too large"); + HeapWord* end = (HeapWord*)((char*)start + (count*heapOopSize)); + // In the case of compressed oops, start and end may potentially be misaligned; + // so we need to conservatively align the first downward (this is not + // strictly necessary for current uses, but a case of good hygiene and, + // if you will, aesthetics) and the second upward (this is essential for + // current uses) to a HeapWord boundary, so we mark all cards overlapping + // this write. In the event that this evolves in the future to calling a + // logging barrier of narrow oop granularity, like the pre-barrier for G1 + // (mentioned here merely by way of example), we will need to change this + // interface, much like the pre-barrier one above, so it is "exactly precise" + // (if i may be allowed the adverbial redundancy for emphasis) and does not + // include narrow oop slots not included in the original write interval. + HeapWord* aligned_start = (HeapWord*)align_size_down((uintptr_t)start, HeapWordSize); + HeapWord* aligned_end = (HeapWord*)align_size_up ((uintptr_t)end, HeapWordSize); + // If compressed oops were not being used, these should already be aligned + assert(UseCompressedOops || (aligned_start == start && aligned_end == end), + "Expected heap word alignment of start and end"); +#if 0 + warning("Post:\t" INTPTR_FORMAT "[" SIZE_FORMAT "] : [" INTPTR_FORMAT","INTPTR_FORMAT")\t", + start, count, aligned_start, aligned_end); +#endif + write_ref_array_work(MemRegion(aligned_start, aligned_end)); +} + + void BarrierSet::write_region(MemRegion mr) { if (kind() == CardTableModRef) { ((CardTableModRefBS*)this)->inline_write_region(mr); diff --git a/src/share/vm/memory/cardTableModRefBS.cpp b/src/share/vm/memory/cardTableModRefBS.cpp index c036a3555..2deb7da83 100644 --- a/src/share/vm/memory/cardTableModRefBS.cpp +++ b/src/share/vm/memory/cardTableModRefBS.cpp @@ -511,6 +511,8 @@ void CardTableModRefBS::mod_oop_in_space_iterate(Space* sp, } void CardTableModRefBS::dirty_MemRegion(MemRegion mr) { + assert((HeapWord*)align_size_down((uintptr_t)mr.start(), HeapWordSize) == mr.start(), "Unaligned start"); + assert((HeapWord*)align_size_up ((uintptr_t)mr.end(), HeapWordSize) == mr.end(), "Unaligned end" ); jbyte* cur = byte_for(mr.start()); jbyte* last = byte_after(mr.last()); while (cur < last) { @@ -520,6 +522,8 @@ void CardTableModRefBS::dirty_MemRegion(MemRegion mr) { } void CardTableModRefBS::invalidate(MemRegion mr, bool whole_heap) { + assert((HeapWord*)align_size_down((uintptr_t)mr.start(), HeapWordSize) == mr.start(), "Unaligned start"); + assert((HeapWord*)align_size_up ((uintptr_t)mr.end(), HeapWordSize) == mr.end(), "Unaligned end" ); for (int i = 0; i < _cur_covered_regions; i++) { MemRegion mri = mr.intersection(_covered[i]); if (!mri.is_empty()) dirty_MemRegion(mri); diff --git a/src/share/vm/memory/sharedHeap.hpp b/src/share/vm/memory/sharedHeap.hpp index 0bf863fb1..609438724 100644 --- a/src/share/vm/memory/sharedHeap.hpp +++ b/src/share/vm/memory/sharedHeap.hpp @@ -224,10 +224,6 @@ public: CodeBlobClosure* code_roots, OopClosure* non_root_closure); - - // Like CollectedHeap::collect, but assume that the caller holds the Heap_lock. - virtual void collect_locked(GCCause::Cause cause) = 0; - // The functions below are helper functions that a subclass of // "SharedHeap" can use in the implementation of its virtual // functions. diff --git a/src/share/vm/memory/universe.cpp b/src/share/vm/memory/universe.cpp index 5a80ac144..5a4bd323a 100644 --- a/src/share/vm/memory/universe.cpp +++ b/src/share/vm/memory/universe.cpp @@ -67,6 +67,8 @@ typeArrayOop Universe::_the_empty_int_array = NULL; objArrayOop Universe::_the_empty_system_obj_array = NULL; objArrayOop Universe::_the_empty_class_klass_array = NULL; objArrayOop Universe::_the_array_interfaces_array = NULL; +oop Universe::_the_null_string = NULL; +oop Universe::_the_min_jint_string = NULL; LatestMethodOopCache* Universe::_finalizer_register_cache = NULL; LatestMethodOopCache* Universe::_loader_addClass_cache = NULL; ActiveMethodOopsCache* Universe::_reflect_invoke_cache = NULL; @@ -187,6 +189,8 @@ void Universe::oops_do(OopClosure* f, bool do_all) { f->do_oop((oop*)&_the_empty_system_obj_array); f->do_oop((oop*)&_the_empty_class_klass_array); f->do_oop((oop*)&_the_array_interfaces_array); + f->do_oop((oop*)&_the_null_string); + f->do_oop((oop*)&_the_min_jint_string); _finalizer_register_cache->oops_do(f); _loader_addClass_cache->oops_do(f); _reflect_invoke_cache->oops_do(f); @@ -289,6 +293,9 @@ void Universe::genesis(TRAPS) { klassOop ok = SystemDictionary::object_klass(); + _the_null_string = StringTable::intern("null", CHECK); + _the_min_jint_string = StringTable::intern("-2147483648", CHECK); + if (UseSharedSpaces) { // Verify shared interfaces array. assert(_the_array_interfaces_array->obj_at(0) == diff --git a/src/share/vm/memory/universe.hpp b/src/share/vm/memory/universe.hpp index b22a1eba6..97044e161 100644 --- a/src/share/vm/memory/universe.hpp +++ b/src/share/vm/memory/universe.hpp @@ -169,6 +169,8 @@ class Universe: AllStatic { static objArrayOop _the_empty_system_obj_array; // Canonicalized system obj array static objArrayOop _the_empty_class_klass_array; // Canonicalized obj array of type java.lang.Class static objArrayOop _the_array_interfaces_array; // Canonicalized 2-array of cloneable & serializable klasses + static oop _the_null_string; // A cache of "null" as a Java string + static oop _the_min_jint_string; // A cache of "-2147483648" as a Java string static LatestMethodOopCache* _finalizer_register_cache; // static method for registering finalizable objects static LatestMethodOopCache* _loader_addClass_cache; // method for registering loaded classes in class loader vector static ActiveMethodOopsCache* _reflect_invoke_cache; // method for security checks @@ -310,6 +312,8 @@ class Universe: AllStatic { static objArrayOop the_empty_system_obj_array () { return _the_empty_system_obj_array; } static objArrayOop the_empty_class_klass_array () { return _the_empty_class_klass_array; } static objArrayOop the_array_interfaces_array() { return _the_array_interfaces_array; } + static oop the_null_string() { return _the_null_string; } + static oop the_min_jint_string() { return _the_min_jint_string; } static methodOop finalizer_register_method() { return _finalizer_register_cache->get_methodOop(); } static methodOop loader_addClass_method() { return _loader_addClass_cache->get_methodOop(); } static ActiveMethodOopsCache* reflect_invoke_cache() { return _reflect_invoke_cache; } diff --git a/src/share/vm/oops/objArrayKlass.cpp b/src/share/vm/oops/objArrayKlass.cpp index 212126490..68556a5a0 100644 --- a/src/share/vm/oops/objArrayKlass.cpp +++ b/src/share/vm/oops/objArrayKlass.cpp @@ -127,16 +127,14 @@ template <class T> void objArrayKlass::do_copy(arrayOop s, T* src, // pointer delta is scaled to number of elements (length field in // objArrayOop) which we assume is 32 bit. assert(pd == (size_t)(int)pd, "length field overflow"); - const size_t done_word_len = objArrayOopDesc::array_size((int)pd); - bs->write_ref_array(MemRegion((HeapWord*)dst, done_word_len)); + bs->write_ref_array((HeapWord*)dst, pd); THROW(vmSymbols::java_lang_ArrayStoreException()); return; } } } } - const size_t word_len = objArrayOopDesc::array_size(length); - bs->write_ref_array(MemRegion((HeapWord*)dst, word_len)); + bs->write_ref_array((HeapWord*)dst, length); } void objArrayKlass::copy_array(arrayOop s, int src_pos, arrayOop d, diff --git a/src/share/vm/oops/objArrayOop.hpp b/src/share/vm/oops/objArrayOop.hpp index 626f398a6..1c1764a87 100644 --- a/src/share/vm/oops/objArrayOop.hpp +++ b/src/share/vm/oops/objArrayOop.hpp @@ -37,6 +37,32 @@ class objArrayOopDesc : public arrayOopDesc { return &((T*)base())[index]; } +private: + // Give size of objArrayOop in HeapWords minus the header + static int array_size(int length) { + const int OopsPerHeapWord = HeapWordSize/heapOopSize; + assert(OopsPerHeapWord >= 1 && (HeapWordSize % heapOopSize == 0), + "Else the following (new) computation would be in error"); +#ifdef ASSERT + // The old code is left in for sanity-checking; it'll + // go away pretty soon. XXX + // Without UseCompressedOops, this is simply: + // oop->length() * HeapWordsPerOop; + // With narrowOops, HeapWordsPerOop is 1/2 or equal 0 as an integer. + // The oop elements are aligned up to wordSize + const int HeapWordsPerOop = heapOopSize/HeapWordSize; + int old_res; + if (HeapWordsPerOop > 0) { + old_res = length * HeapWordsPerOop; + } else { + old_res = align_size_up(length, OopsPerHeapWord)/OopsPerHeapWord; + } +#endif // ASSERT + int res = ((uint)length + OopsPerHeapWord - 1)/OopsPerHeapWord; + assert(res == old_res, "Inconsistency between old and new."); + return res; + } + public: // Returns the offset of the first element. static int base_offset_in_bytes() { @@ -67,27 +93,14 @@ class objArrayOopDesc : public arrayOopDesc { // Sizing static int header_size() { return arrayOopDesc::header_size(T_OBJECT); } int object_size() { return object_size(length()); } - int array_size() { return array_size(length()); } static int object_size(int length) { // This returns the object size in HeapWords. - return align_object_size(header_size() + array_size(length)); - } - - // Give size of objArrayOop in HeapWords minus the header - static int array_size(int length) { - // Without UseCompressedOops, this is simply: - // oop->length() * HeapWordsPerOop; - // With narrowOops, HeapWordsPerOop is 1/2 or equal 0 as an integer. - // The oop elements are aligned up to wordSize - const int HeapWordsPerOop = heapOopSize/HeapWordSize; - if (HeapWordsPerOop > 0) { - return length * HeapWordsPerOop; - } else { - const int OopsPerHeapWord = HeapWordSize/heapOopSize; - int word_len = align_size_up(length, OopsPerHeapWord)/OopsPerHeapWord; - return word_len; - } + uint asz = array_size(length); + uint osz = align_object_size(header_size() + asz); + assert(osz >= asz, "no overflow"); + assert((int)osz > 0, "no overflow"); + return (int)osz; } // special iterators for index ranges, returns size of object diff --git a/src/share/vm/opto/c2_globals.cpp b/src/share/vm/opto/c2_globals.cpp index 5715b24ba..40594bf94 100644 --- a/src/share/vm/opto/c2_globals.cpp +++ b/src/share/vm/opto/c2_globals.cpp @@ -25,4 +25,4 @@ # include "incls/_precompiled.incl" # include "incls/_c2_globals.cpp.incl" -C2_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG) +C2_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, MATERIALIZE_NOTPRODUCT_FLAG) diff --git a/src/share/vm/opto/c2_globals.hpp b/src/share/vm/opto/c2_globals.hpp index 091ad4a9b..b68b66342 100644 --- a/src/share/vm/opto/c2_globals.hpp +++ b/src/share/vm/opto/c2_globals.hpp @@ -26,7 +26,7 @@ // Defines all globals flags used by the server compiler. // -#define C2_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \ +#define C2_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct) \ \ notproduct(intx, CompileZapFirst, 0, \ "If +ZapDeadCompiledLocals, " \ @@ -394,6 +394,12 @@ product(bool, UseOptoBiasInlining, true, \ "Generate biased locking code in C2 ideal graph") \ \ + product(bool, OptimizeStringConcat, false, \ + "Optimize the construction of Strings by StringBuilder") \ + \ + notproduct(bool, PrintOptimizeStringConcat, false, \ + "Print information about transformations performed on Strings") \ + \ product(intx, ValueSearchLimit, 1000, \ "Recursion limit in PhaseMacroExpand::value_from_mem_phi") \ \ @@ -413,4 +419,4 @@ product(bool, BlockLayoutRotateLoops, true, \ "Allow back branches to be fall throughs in the block layour") \ -C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG) +C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG) diff --git a/src/share/vm/opto/callGenerator.cpp b/src/share/vm/opto/callGenerator.cpp index 8ce7c0ce5..8ddc84715 100644 --- a/src/share/vm/opto/callGenerator.cpp +++ b/src/share/vm/opto/callGenerator.cpp @@ -98,12 +98,21 @@ JVMState* ParseGenerator::generate(JVMState* jvms) { //---------------------------DirectCallGenerator------------------------------ // Internal class which handles all out-of-line calls w/o receiver type checks. class DirectCallGenerator : public CallGenerator { -public: - DirectCallGenerator(ciMethod* method) - : CallGenerator(method) + private: + CallStaticJavaNode* _call_node; + // Force separate memory and I/O projections for the exceptional + // paths to facilitate late inlinig. + bool _separate_io_proj; + + public: + DirectCallGenerator(ciMethod* method, bool separate_io_proj) + : CallGenerator(method), + _separate_io_proj(separate_io_proj) { } virtual JVMState* generate(JVMState* jvms); + + CallStaticJavaNode* call_node() const { return _call_node; } }; JVMState* DirectCallGenerator::generate(JVMState* jvms) { @@ -129,9 +138,10 @@ JVMState* DirectCallGenerator::generate(JVMState* jvms) { call->set_optimized_virtual(true); } kit.set_arguments_for_java_call(call); - kit.set_edges_for_java_call(call); - Node* ret = kit.set_results_for_java_call(call); + kit.set_edges_for_java_call(call, false, _separate_io_proj); + Node* ret = kit.set_results_for_java_call(call, _separate_io_proj); kit.push_node(method()->return_type()->basic_type(), ret); + _call_node = call; // Save the call node in case we need it later return kit.transfer_exceptions_into_jvms(); } @@ -238,9 +248,9 @@ CallGenerator* CallGenerator::for_osr(ciMethod* m, int osr_bci) { return new ParseGenerator(m, expected_uses, true); } -CallGenerator* CallGenerator::for_direct_call(ciMethod* m) { +CallGenerator* CallGenerator::for_direct_call(ciMethod* m, bool separate_io_proj) { assert(!m->is_abstract(), "for_direct_call mismatch"); - return new DirectCallGenerator(m); + return new DirectCallGenerator(m, separate_io_proj); } CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index) { @@ -248,6 +258,108 @@ CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index) { return new VirtualCallGenerator(m, vtable_index); } +// Allow inlining decisions to be delayed +class LateInlineCallGenerator : public DirectCallGenerator { + CallGenerator* _inline_cg; + + public: + LateInlineCallGenerator(ciMethod* method, CallGenerator* inline_cg) : + DirectCallGenerator(method, true), _inline_cg(inline_cg) {} + + virtual bool is_late_inline() const { return true; } + + // Convert the CallStaticJava into an inline + virtual void do_late_inline(); + + JVMState* generate(JVMState* jvms) { + // Record that this call site should be revisited once the main + // parse is finished. + Compile::current()->add_late_inline(this); + + // Emit the CallStaticJava and request separate projections so + // that the late inlining logic can distinguish between fall + // through and exceptional uses of the memory and io projections + // as is done for allocations and macro expansion. + return DirectCallGenerator::generate(jvms); + } + +}; + + +void LateInlineCallGenerator::do_late_inline() { + // Can't inline it + if (call_node() == NULL || call_node()->outcnt() == 0 || + call_node()->in(0) == NULL || call_node()->in(0)->is_top()) + return; + + CallStaticJavaNode* call = call_node(); + + // Make a clone of the JVMState that appropriate to use for driving a parse + Compile* C = Compile::current(); + JVMState* jvms = call->jvms()->clone_shallow(C); + uint size = call->req(); + SafePointNode* map = new (C, size) SafePointNode(size, jvms); + for (uint i1 = 0; i1 < size; i1++) { + map->init_req(i1, call->in(i1)); + } + + // Make sure the state is a MergeMem for parsing. + if (!map->in(TypeFunc::Memory)->is_MergeMem()) { + map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory))); + } + + // Make enough space for the expression stack and transfer the incoming arguments + int nargs = method()->arg_size(); + jvms->set_map(map); + map->ensure_stack(jvms, jvms->method()->max_stack()); + if (nargs > 0) { + for (int i1 = 0; i1 < nargs; i1++) { + map->set_req(i1 + jvms->argoff(), call->in(TypeFunc::Parms + i1)); + } + } + + CompileLog* log = C->log(); + if (log != NULL) { + log->head("late_inline method='%d'", log->identify(method())); + JVMState* p = jvms; + while (p != NULL) { + log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method())); + p = p->caller(); + } + log->tail("late_inline"); + } + + // Setup default node notes to be picked up by the inlining + Node_Notes* old_nn = C->default_node_notes(); + if (old_nn != NULL) { + Node_Notes* entry_nn = old_nn->clone(C); + entry_nn->set_jvms(jvms); + C->set_default_node_notes(entry_nn); + } + + // Now perform the inling using the synthesized JVMState + JVMState* new_jvms = _inline_cg->generate(jvms); + if (new_jvms == NULL) return; // no change + if (C->failing()) return; + + // Capture any exceptional control flow + GraphKit kit(new_jvms); + + // Find the result object + Node* result = C->top(); + int result_size = method()->return_type()->size(); + if (result_size != 0 && !kit.stopped()) { + result = (result_size == 1) ? kit.pop() : kit.pop_pair(); + } + + kit.replace_call(call, result); +} + + +CallGenerator* CallGenerator::for_late_inline(ciMethod* method, CallGenerator* inline_cg) { + return new LateInlineCallGenerator(method, inline_cg); +} + //---------------------------WarmCallGenerator-------------------------------- // Internal class which handles initial deferral of inlining decisions. @@ -315,70 +427,7 @@ JVMState* WarmCallGenerator::generate(JVMState* jvms) { } void WarmCallInfo::make_hot() { - Compile* C = Compile::current(); - // Replace the callnode with something better. - CallJavaNode* call = this->call()->as_CallJava(); - ciMethod* method = call->method(); - int nargs = method->arg_size(); - JVMState* jvms = call->jvms()->clone_shallow(C); - uint size = TypeFunc::Parms + MAX2(2, nargs); - SafePointNode* map = new (C, size) SafePointNode(size, jvms); - for (uint i1 = 0; i1 < (uint)(TypeFunc::Parms + nargs); i1++) { - map->init_req(i1, call->in(i1)); - } - jvms->set_map(map); - jvms->set_offsets(map->req()); - jvms->set_locoff(TypeFunc::Parms); - jvms->set_stkoff(TypeFunc::Parms); - GraphKit kit(jvms); - - JVMState* new_jvms = _hot_cg->generate(kit.jvms()); - if (new_jvms == NULL) return; // no change - if (C->failing()) return; - - kit.set_jvms(new_jvms); - Node* res = C->top(); - int res_size = method->return_type()->size(); - if (res_size != 0) { - kit.inc_sp(-res_size); - res = kit.argument(0); - } - GraphKit ekit(kit.combine_and_pop_all_exception_states()->jvms()); - - // Replace the call: - for (DUIterator i = call->outs(); call->has_out(i); i++) { - Node* n = call->out(i); - Node* nn = NULL; // replacement - if (n->is_Proj()) { - ProjNode* nproj = n->as_Proj(); - assert(nproj->_con < (uint)(TypeFunc::Parms + (res_size ? 1 : 0)), "sane proj"); - if (nproj->_con == TypeFunc::Parms) { - nn = res; - } else { - nn = kit.map()->in(nproj->_con); - } - if (nproj->_con == TypeFunc::I_O) { - for (DUIterator j = nproj->outs(); nproj->has_out(j); j++) { - Node* e = nproj->out(j); - if (e->Opcode() == Op_CreateEx) { - e->replace_by(ekit.argument(0)); - } else if (e->Opcode() == Op_Catch) { - for (DUIterator k = e->outs(); e->has_out(k); k++) { - CatchProjNode* p = e->out(j)->as_CatchProj(); - if (p->is_handler_proj()) { - p->replace_by(ekit.control()); - } else { - p->replace_by(kit.control()); - } - } - } - } - } - } - NOT_PRODUCT(if (!nn) n->dump(2)); - assert(nn != NULL, "don't know what to do with this user"); - n->replace_by(nn); - } + Unimplemented(); } void WarmCallInfo::make_cold() { diff --git a/src/share/vm/opto/callGenerator.hpp b/src/share/vm/opto/callGenerator.hpp index bbd47ca4a..70ec9d3ff 100644 --- a/src/share/vm/opto/callGenerator.hpp +++ b/src/share/vm/opto/callGenerator.hpp @@ -57,6 +57,13 @@ class CallGenerator : public ResourceObj { // is_trap: Does not return to the caller. (E.g., uncommon trap.) virtual bool is_trap() const { return false; } + // is_late_inline: supports conversion of call into an inline + virtual bool is_late_inline() const { return false; } + // Replace the call with an inline version of the code + virtual void do_late_inline() { ShouldNotReachHere(); } + + virtual CallStaticJavaNode* call_node() const { ShouldNotReachHere(); return NULL; } + // Note: It is possible for a CG to be both inline and virtual. // (The hashCode intrinsic does a vtable check and an inlined fast path.) @@ -92,9 +99,12 @@ class CallGenerator : public ResourceObj { static CallGenerator* for_osr(ciMethod* m, int osr_bci); // How to generate vanilla out-of-line call sites: - static CallGenerator* for_direct_call(ciMethod* m); // static, special + static CallGenerator* for_direct_call(ciMethod* m, bool separate_io_projs = false); // static, special static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index); // virtual, interface + // How to generate a replace a direct call with an inline version + static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg); + // How to make a call but defer the decision whether to inline or not. static CallGenerator* for_warm_call(WarmCallInfo* ci, CallGenerator* if_cold, diff --git a/src/share/vm/opto/callnode.cpp b/src/share/vm/opto/callnode.cpp index 5c69109b8..a59027131 100644 --- a/src/share/vm/opto/callnode.cpp +++ b/src/share/vm/opto/callnode.cpp @@ -693,6 +693,84 @@ Node *CallNode::result_cast() { } +void CallNode::extract_projections(CallProjections* projs, bool separate_io_proj) { + projs->fallthrough_proj = NULL; + projs->fallthrough_catchproj = NULL; + projs->fallthrough_ioproj = NULL; + projs->catchall_ioproj = NULL; + projs->catchall_catchproj = NULL; + projs->fallthrough_memproj = NULL; + projs->catchall_memproj = NULL; + projs->resproj = NULL; + projs->exobj = NULL; + + for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { + ProjNode *pn = fast_out(i)->as_Proj(); + if (pn->outcnt() == 0) continue; + switch (pn->_con) { + case TypeFunc::Control: + { + // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj + projs->fallthrough_proj = pn; + DUIterator_Fast jmax, j = pn->fast_outs(jmax); + const Node *cn = pn->fast_out(j); + if (cn->is_Catch()) { + ProjNode *cpn = NULL; + for (DUIterator_Fast kmax, k = cn->fast_outs(kmax); k < kmax; k++) { + cpn = cn->fast_out(k)->as_Proj(); + assert(cpn->is_CatchProj(), "must be a CatchProjNode"); + if (cpn->_con == CatchProjNode::fall_through_index) + projs->fallthrough_catchproj = cpn; + else { + assert(cpn->_con == CatchProjNode::catch_all_index, "must be correct index."); + projs->catchall_catchproj = cpn; + } + } + } + break; + } + case TypeFunc::I_O: + if (pn->_is_io_use) + projs->catchall_ioproj = pn; + else + projs->fallthrough_ioproj = pn; + for (DUIterator j = pn->outs(); pn->has_out(j); j++) { + Node* e = pn->out(j); + if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj()) { + assert(projs->exobj == NULL, "only one"); + projs->exobj = e; + } + } + break; + case TypeFunc::Memory: + if (pn->_is_io_use) + projs->catchall_memproj = pn; + else + projs->fallthrough_memproj = pn; + break; + case TypeFunc::Parms: + projs->resproj = pn; + break; + default: + assert(false, "unexpected projection from allocation node."); + } + } + + // The resproj may not exist because the result couuld be ignored + // and the exception object may not exist if an exception handler + // swallows the exception but all the other must exist and be found. + assert(projs->fallthrough_proj != NULL, "must be found"); + assert(projs->fallthrough_catchproj != NULL, "must be found"); + assert(projs->fallthrough_memproj != NULL, "must be found"); + assert(projs->fallthrough_ioproj != NULL, "must be found"); + assert(projs->catchall_catchproj != NULL, "must be found"); + if (separate_io_proj) { + assert(projs->catchall_memproj != NULL, "must be found"); + assert(projs->catchall_ioproj != NULL, "must be found"); + } +} + + //============================================================================= uint CallJavaNode::size_of() const { return sizeof(*this); } uint CallJavaNode::cmp( const Node &n ) const { diff --git a/src/share/vm/opto/callnode.hpp b/src/share/vm/opto/callnode.hpp index ac886f3ba..a9649a7cc 100644 --- a/src/share/vm/opto/callnode.hpp +++ b/src/share/vm/opto/callnode.hpp @@ -470,6 +470,23 @@ public: #endif }; + +// Simple container for the outgoing projections of a call. Useful +// for serious surgery on calls. +class CallProjections : public StackObj { +public: + Node* fallthrough_proj; + Node* fallthrough_catchproj; + Node* fallthrough_memproj; + Node* fallthrough_ioproj; + Node* catchall_catchproj; + Node* catchall_memproj; + Node* catchall_ioproj; + Node* resproj; + Node* exobj; +}; + + //------------------------------CallNode--------------------------------------- // Call nodes now subsume the function of debug nodes at callsites, so they // contain the functionality of a full scope chain of debug nodes. @@ -521,6 +538,11 @@ public: // or returns NULL if there is no one. Node *result_cast(); + // Collect all the interesting edges from a call for use in + // replacing the call by something else. Used by macro expansion + // and the late inlining support. + void extract_projections(CallProjections* projs, bool separate_io_proj); + virtual uint match_edge(uint idx) const; #ifndef PRODUCT @@ -529,6 +551,7 @@ public: #endif }; + //------------------------------CallJavaNode----------------------------------- // Make a static or dynamic subroutine call node using Java calling // convention. (The "Java" calling convention is the compiler's calling diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp index ecffd2f4e..f5d5387e9 100644 --- a/src/share/vm/opto/compile.cpp +++ b/src/share/vm/opto/compile.cpp @@ -224,6 +224,32 @@ bool Compile::valid_bundle_info(const Node *n) { } +void Compile::gvn_replace_by(Node* n, Node* nn) { + for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) { + Node* use = n->last_out(i); + bool is_in_table = initial_gvn()->hash_delete(use); + uint uses_found = 0; + for (uint j = 0; j < use->len(); j++) { + if (use->in(j) == n) { + if (j < use->req()) + use->set_req(j, nn); + else + use->set_prec(j, nn); + uses_found++; + } + } + if (is_in_table) { + // reinsert into table + initial_gvn()->hash_find_insert(use); + } + record_for_igvn(use); + i -= uses_found; // we deleted 1 or more copies of this edge + } +} + + + + // Identify all nodes that are reachable from below, useful. // Use breadth-first pass that records state in a Unique_Node_List, // recursive traversal is slower. @@ -554,6 +580,28 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr rethrow_exceptions(kit.transfer_exceptions_into_jvms()); } + if (!failing() && has_stringbuilder()) { + { + // remove useless nodes to make the usage analysis simpler + ResourceMark rm; + PhaseRemoveUseless pru(initial_gvn(), &for_igvn); + } + + { + ResourceMark rm; + print_method("Before StringOpts", 3); + PhaseStringOpts pso(initial_gvn(), &for_igvn); + print_method("After StringOpts", 3); + } + + // now inline anything that we skipped the first time around + while (_late_inlines.length() > 0) { + CallGenerator* cg = _late_inlines.pop(); + cg->do_late_inline(); + } + } + assert(_late_inlines.length() == 0, "should have been processed"); + print_method("Before RemoveUseless", 3); // Remove clutter produced by parsing. @@ -820,6 +868,7 @@ void Compile::Init(int aliaslevel) { _fixed_slots = 0; set_has_split_ifs(false); set_has_loops(has_method() && method()->has_loops()); // first approximation + set_has_stringbuilder(false); _deopt_happens = true; // start out assuming the worst _trap_can_recompile = false; // no traps emitted yet _major_progress = true; // start out assuming good things will happen @@ -1803,6 +1852,7 @@ void Compile::dump_asm(int *pcs, uint pc_limit) { !n->is_Phi() && // a few noisely useless nodes !n->is_Proj() && !n->is_MachTemp() && + !n->is_SafePointScalarObject() && !n->is_Catch() && // Would be nice to print exception table targets !n->is_MergeMem() && // Not very interesting !n->is_top() && // Debug info table constants @@ -2240,6 +2290,30 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { break; } + case Op_Proj: { + if (OptimizeStringConcat) { + ProjNode* p = n->as_Proj(); + if (p->_is_io_use) { + // Separate projections were used for the exception path which + // are normally removed by a late inline. If it wasn't inlined + // then they will hang around and should just be replaced with + // the original one. + Node* proj = NULL; + // Replace with just one + for (SimpleDUIterator i(p->in(0)); i.has_next(); i.next()) { + Node *use = i.get(); + if (use->is_Proj() && p != use && use->as_Proj()->_con == p->_con) { + proj = use; + break; + } + } + assert(p != NULL, "must be found"); + p->subsume_by(proj); + } + } + break; + } + case Op_Phi: if (n->as_Phi()->bottom_type()->isa_narrowoop()) { // The EncodeP optimization may create Phi with the same edges diff --git a/src/share/vm/opto/compile.hpp b/src/share/vm/opto/compile.hpp index 4bd1900fc..450ff269f 100644 --- a/src/share/vm/opto/compile.hpp +++ b/src/share/vm/opto/compile.hpp @@ -149,6 +149,7 @@ class Compile : public Phase { bool _has_loops; // True if the method _may_ have some loops bool _has_split_ifs; // True if the method _may_ have some split-if bool _has_unsafe_access; // True if the method _may_ produce faults in unsafe loads or stores. + bool _has_stringbuilder; // True StringBuffers or StringBuilders are allocated uint _trap_hist[trapHistLength]; // Cumulative traps bool _trap_can_recompile; // Have we emitted a recompiling trap? uint _decompile_count; // Cumulative decompilation counts. @@ -219,6 +220,9 @@ class Compile : public Phase { Unique_Node_List* _for_igvn; // Initial work-list for next round of Iterative GVN WarmCallInfo* _warm_calls; // Sorted work-list for heat-based inlining. + GrowableArray<CallGenerator*> _late_inlines; // List of CallGenerators to be revisited after + // main parsing has finished. + // Matching, CFG layout, allocation, code generation PhaseCFG* _cfg; // Results of CFG finding bool _select_24_bit_instr; // We selected an instruction with a 24-bit result @@ -298,6 +302,8 @@ class Compile : public Phase { void set_has_split_ifs(bool z) { _has_split_ifs = z; } bool has_unsafe_access() const { return _has_unsafe_access; } void set_has_unsafe_access(bool z) { _has_unsafe_access = z; } + bool has_stringbuilder() const { return _has_stringbuilder; } + void set_has_stringbuilder(bool z) { _has_stringbuilder = z; } void set_trap_count(uint r, uint c) { assert(r < trapHistLength, "oob"); _trap_hist[r] = c; } uint trap_count(uint r) const { assert(r < trapHistLength, "oob"); return _trap_hist[r]; } bool trap_can_recompile() const { return _trap_can_recompile; } @@ -475,6 +481,7 @@ class Compile : public Phase { // Decide how to build a call. // The profile factor is a discount to apply to this site's interp. profile. CallGenerator* call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor); + bool should_delay_inlining(ciMethod* call_method, JVMState* jvms); // Report if there were too many traps at a current method and bci. // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded. @@ -495,6 +502,11 @@ class Compile : public Phase { void set_initial_gvn(PhaseGVN *gvn) { _initial_gvn = gvn; } void set_for_igvn(Unique_Node_List *for_igvn) { _for_igvn = for_igvn; } + // Replace n by nn using initial_gvn, calling hash_delete and + // record_for_igvn as needed. + void gvn_replace_by(Node* n, Node* nn); + + void identify_useful_nodes(Unique_Node_List &useful); void remove_useless_nodes (Unique_Node_List &useful); @@ -502,6 +514,9 @@ class Compile : public Phase { void set_warm_calls(WarmCallInfo* l) { _warm_calls = l; } WarmCallInfo* pop_warm_call(); + // Record this CallGenerator for inlining at the end of parsing. + void add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); } + // Matching, CFG layout, allocation, code generation PhaseCFG* cfg() { return _cfg; } bool select_24_bit_instr() const { return _select_24_bit_instr; } diff --git a/src/share/vm/opto/doCall.cpp b/src/share/vm/opto/doCall.cpp index 5104e648a..fff6eedda 100644 --- a/src/share/vm/opto/doCall.cpp +++ b/src/share/vm/opto/doCall.cpp @@ -128,6 +128,12 @@ CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index, if (allow_inline) { CallGenerator* cg = CallGenerator::for_inline(call_method, expected_uses); + if (require_inline && cg != NULL && should_delay_inlining(call_method, jvms)) { + // Delay the inlining of this method to give us the + // opportunity to perform some high level optimizations + // first. + return CallGenerator::for_late_inline(call_method, cg); + } if (cg == NULL) { // Fall through. } else if (require_inline || !InlineWarmCalls) { @@ -225,10 +231,63 @@ CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index, } else { // Class Hierarchy Analysis or Type Profile reveals a unique target, // or it is a static or special call. - return CallGenerator::for_direct_call(call_method); + return CallGenerator::for_direct_call(call_method, should_delay_inlining(call_method, jvms)); } } +// Return true for methods that shouldn't be inlined early so that +// they are easier to analyze and optimize as intrinsics. +bool Compile::should_delay_inlining(ciMethod* call_method, JVMState* jvms) { + if (has_stringbuilder()) { + + if ((call_method->holder() == C->env()->StringBuilder_klass() || + call_method->holder() == C->env()->StringBuffer_klass()) && + (jvms->method()->holder() == C->env()->StringBuilder_klass() || + jvms->method()->holder() == C->env()->StringBuffer_klass())) { + // Delay SB calls only when called from non-SB code + return false; + } + + switch (call_method->intrinsic_id()) { + case vmIntrinsics::_StringBuilder_void: + case vmIntrinsics::_StringBuilder_int: + case vmIntrinsics::_StringBuilder_String: + case vmIntrinsics::_StringBuilder_append_char: + case vmIntrinsics::_StringBuilder_append_int: + case vmIntrinsics::_StringBuilder_append_String: + case vmIntrinsics::_StringBuilder_toString: + case vmIntrinsics::_StringBuffer_void: + case vmIntrinsics::_StringBuffer_int: + case vmIntrinsics::_StringBuffer_String: + case vmIntrinsics::_StringBuffer_append_char: + case vmIntrinsics::_StringBuffer_append_int: + case vmIntrinsics::_StringBuffer_append_String: + case vmIntrinsics::_StringBuffer_toString: + case vmIntrinsics::_Integer_toString: + return true; + + case vmIntrinsics::_String_String: + { + Node* receiver = jvms->map()->in(jvms->argoff() + 1); + if (receiver->is_Proj() && receiver->in(0)->is_CallStaticJava()) { + CallStaticJavaNode* csj = receiver->in(0)->as_CallStaticJava(); + ciMethod* m = csj->method(); + if (m != NULL && + (m->intrinsic_id() == vmIntrinsics::_StringBuffer_toString || + m->intrinsic_id() == vmIntrinsics::_StringBuilder_toString)) + // Delay String.<init>(new SB()) + return true; + } + return false; + } + + default: + return false; + } + } + return false; +} + // uncommon-trap call-sites where callee is unloaded, uninitialized or will not link bool Parse::can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass* klass) { diff --git a/src/share/vm/opto/escape.cpp b/src/share/vm/opto/escape.cpp index 478a96a75..b560151bf 100644 --- a/src/share/vm/opto/escape.cpp +++ b/src/share/vm/opto/escape.cpp @@ -543,6 +543,7 @@ bool ConnectionGraph::split_AddP(Node *addp, Node *base, PhaseGVN *igvn) { int alias_idx = _compile->get_alias_index(tinst); igvn->set_type(addp, tinst); // record the allocation in the node map + assert(ptnode_adr(addp->_idx)->_node != NULL, "should be registered"); set_map(addp->_idx, get_map(base->_idx)); // Set addp's Base and Address to 'base'. @@ -618,9 +619,14 @@ PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, Gro const TypePtr *atype = C->get_adr_type(alias_idx); result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype); C->copy_node_notes_to(result, orig_phi); - set_map_phi(orig_phi->_idx, result); igvn->set_type(result, result->bottom_type()); record_for_optimizer(result); + + debug_only(Node* pn = ptnode_adr(orig_phi->_idx)->_node;) + assert(pn == NULL || pn == orig_phi, "wrong node"); + set_map(orig_phi->_idx, result); + ptnode_adr(orig_phi->_idx)->_node = orig_phi; + new_created = true; return result; } @@ -711,6 +717,81 @@ static Node *step_through_mergemem(MergeMemNode *mmem, int alias_idx, const Type } // +// Move memory users to their memory slices. +// +void ConnectionGraph::move_inst_mem(Node* n, GrowableArray<PhiNode *> &orig_phis, PhaseGVN *igvn) { + Compile* C = _compile; + + const TypePtr* tp = igvn->type(n->in(MemNode::Address))->isa_ptr(); + assert(tp != NULL, "ptr type"); + int alias_idx = C->get_alias_index(tp); + int general_idx = C->get_general_index(alias_idx); + + // Move users first + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* use = n->fast_out(i); + if (use->is_MergeMem()) { + MergeMemNode* mmem = use->as_MergeMem(); + assert(n == mmem->memory_at(alias_idx), "should be on instance memory slice"); + if (n != mmem->memory_at(general_idx) || alias_idx == general_idx) { + continue; // Nothing to do + } + // Replace previous general reference to mem node. + uint orig_uniq = C->unique(); + Node* m = find_inst_mem(n, general_idx, orig_phis, igvn); + assert(orig_uniq == C->unique(), "no new nodes"); + mmem->set_memory_at(general_idx, m); + --imax; + --i; + } else if (use->is_MemBar()) { + assert(!use->is_Initialize(), "initializing stores should not be moved"); + if (use->req() > MemBarNode::Precedent && + use->in(MemBarNode::Precedent) == n) { + // Don't move related membars. + record_for_optimizer(use); + continue; + } + tp = use->as_MemBar()->adr_type()->isa_ptr(); + if (tp != NULL && C->get_alias_index(tp) == alias_idx || + alias_idx == general_idx) { + continue; // Nothing to do + } + // Move to general memory slice. + uint orig_uniq = C->unique(); + Node* m = find_inst_mem(n, general_idx, orig_phis, igvn); + assert(orig_uniq == C->unique(), "no new nodes"); + igvn->hash_delete(use); + imax -= use->replace_edge(n, m); + igvn->hash_insert(use); + record_for_optimizer(use); + --i; +#ifdef ASSERT + } else if (use->is_Mem()) { + if (use->Opcode() == Op_StoreCM && use->in(MemNode::OopStore) == n) { + // Don't move related cardmark. + continue; + } + // Memory nodes should have new memory input. + tp = igvn->type(use->in(MemNode::Address))->isa_ptr(); + assert(tp != NULL, "ptr type"); + int idx = C->get_alias_index(tp); + assert(get_map(use->_idx) != NULL || idx == alias_idx, + "Following memory nodes should have new memory input or be on the same memory slice"); + } else if (use->is_Phi()) { + // Phi nodes should be split and moved already. + tp = use->as_Phi()->adr_type()->isa_ptr(); + assert(tp != NULL, "ptr type"); + int idx = C->get_alias_index(tp); + assert(idx == alias_idx, "Following Phi nodes should be on the same memory slice"); + } else { + use->dump(); + assert(false, "should not be here"); +#endif + } + } +} + +// // Search memory chain of "mem" to find a MemNode whose address // is the specified alias index. // @@ -775,10 +856,18 @@ Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArra C->get_alias_index(result->as_Phi()->adr_type()) != alias_idx) { Node *un = result->as_Phi()->unique_input(phase); if (un != NULL) { + orig_phis.append_if_missing(result->as_Phi()); result = un; } else { break; } + } else if (result->is_ClearArray()) { + if (!ClearArrayNode::step_through(&result, (uint)tinst->instance_id(), phase)) { + // Can not bypass initialization of the instance + // we are looking for. + break; + } + // Otherwise skip it (the call updated 'result' value). } else if (result->Opcode() == Op_SCMemProj) { assert(result->in(0)->is_LoadStore(), "sanity"); const Type *at = phase->type(result->in(0)->in(MemNode::Address)); @@ -808,7 +897,6 @@ Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArra return result; } - // // Convert the types of unescaped object to instance types where possible, // propagate the new type information through the graph, and update memory @@ -900,12 +988,13 @@ Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArra // void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) { GrowableArray<Node *> memnode_worklist; - GrowableArray<Node *> mergemem_worklist; GrowableArray<PhiNode *> orig_phis; + PhaseGVN *igvn = _compile->initial_gvn(); uint new_index_start = (uint) _compile->num_alias_types(); - VectorSet visited(Thread::current()->resource_area()); - VectorSet ptset(Thread::current()->resource_area()); + Arena* arena = Thread::current()->resource_area(); + VectorSet visited(arena); + VectorSet ptset(arena); // Phase 1: Process possible allocations from alloc_worklist. @@ -981,6 +1070,8 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) // - non-escaping // - eligible to be a unique type // - not determined to be ineligible by escape analysis + assert(ptnode_adr(alloc->_idx)->_node != NULL && + ptnode_adr(n->_idx)->_node != NULL, "should be registered"); set_map(alloc->_idx, n); set_map(n->_idx, alloc); const TypeOopPtr *t = igvn->type(n)->isa_oopptr(); @@ -1025,7 +1116,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) alloc_worklist.append_if_missing(addp2); } alloc_worklist.append_if_missing(use); - } else if (use->is_Initialize()) { + } else if (use->is_MemBar()) { memnode_worklist.append_if_missing(use); } } @@ -1035,10 +1126,12 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) PointsTo(ptset, get_addp_base(n), igvn); assert(ptset.Size() == 1, "AddP address is unique"); uint elem = ptset.getelem(); // Allocation node's index - if (elem == _phantom_object) + if (elem == _phantom_object) { + assert(false, "escaped allocation"); continue; // Assume the value was set outside this method. + } Node *base = get_map(elem); // CheckCastPP node - if (!split_AddP(n, base, igvn)) continue; // wrong type + if (!split_AddP(n, base, igvn)) continue; // wrong type from dead path tinst = igvn->type(base)->isa_oopptr(); } else if (n->is_Phi() || n->is_CheckCastPP() || @@ -1053,8 +1146,10 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) PointsTo(ptset, n, igvn); if (ptset.Size() == 1) { uint elem = ptset.getelem(); // Allocation node's index - if (elem == _phantom_object) + if (elem == _phantom_object) { + assert(false, "escaped allocation"); continue; // Assume the value was set outside this method. + } Node *val = get_map(elem); // CheckCastPP node TypeNode *tn = n->as_Type(); tinst = igvn->type(val)->isa_oopptr(); @@ -1069,8 +1164,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) tn_t = tn_type->isa_oopptr(); } - if (tn_t != NULL && - tinst->cast_to_instance_id(TypeOopPtr::InstanceBot)->higher_equal(tn_t)) { + if (tn_t != NULL && tinst->klass()->is_subtype_of(tn_t->klass())) { if (tn_type->isa_narrowoop()) { tn_type = tinst->make_narrowoop(); } else { @@ -1082,33 +1176,25 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) igvn->hash_insert(tn); record_for_optimizer(n); } else { - continue; // wrong type + assert(tn_type == TypePtr::NULL_PTR || + tn_t != NULL && !tinst->klass()->is_subtype_of(tn_t->klass()), + "unexpected type"); + continue; // Skip dead path with different type } } } else { + debug_only(n->dump();) + assert(false, "EA: unexpected node"); continue; } - // push users on appropriate worklist + // push allocation's users on appropriate worklist for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node *use = n->fast_out(i); if(use->is_Mem() && use->in(MemNode::Address) == n) { + // Load/store to instance's field memnode_worklist.append_if_missing(use); - } else if (use->is_Initialize()) { + } else if (use->is_MemBar()) { memnode_worklist.append_if_missing(use); - } else if (use->is_MergeMem()) { - mergemem_worklist.append_if_missing(use); - } else if (use->is_SafePoint() && tinst != NULL) { - // Look for MergeMem nodes for calls which reference unique allocation - // (through CheckCastPP nodes) even for debug info. - Node* m = use->in(TypeFunc::Memory); - uint iid = tinst->instance_id(); - while (m->is_Proj() && m->in(0)->is_SafePoint() && - m->in(0) != use && !m->in(0)->_idx != iid) { - m = m->in(0)->in(TypeFunc::Memory); - } - if (m->is_MergeMem()) { - mergemem_worklist.append_if_missing(m); - } } else if (use->is_AddP() && use->outcnt() > 0) { // No dead nodes Node* addp2 = find_second_addp(use, n); if (addp2 != NULL) { @@ -1121,6 +1207,29 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) use->is_DecodeN() || (use->is_ConstraintCast() && use->Opcode() == Op_CastPP)) { alloc_worklist.append_if_missing(use); +#ifdef ASSERT + } else if (use->is_Mem()) { + assert(use->in(MemNode::Address) != n, "EA: missing allocation reference path"); + } else if (use->is_MergeMem()) { + assert(_mergemem_worklist.contains(use->as_MergeMem()), "EA: missing MergeMem node in the worklist"); + } else if (use->is_SafePoint()) { + // Look for MergeMem nodes for calls which reference unique allocation + // (through CheckCastPP nodes) even for debug info. + Node* m = use->in(TypeFunc::Memory); + if (m->is_MergeMem()) { + assert(_mergemem_worklist.contains(m->as_MergeMem()), "EA: missing MergeMem node in the worklist"); + } + } else { + uint op = use->Opcode(); + if (!(op == Op_CmpP || op == Op_Conv2B || + op == Op_CastP2X || op == Op_StoreCM || + op == Op_FastLock || op == Op_AryEq || op == Op_StrComp || + op == Op_StrEquals || op == Op_StrIndexOf)) { + n->dump(); + use->dump(); + assert(false, "EA: missing allocation reference path"); + } +#endif } } @@ -1138,13 +1247,11 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) Node *n = memnode_worklist.pop(); if (visited.test_set(n->_idx)) continue; - if (n->is_Phi()) { - assert(n->as_Phi()->adr_type() != TypePtr::BOTTOM, "narrow memory slice required"); - // we don't need to do anything, but the users must be pushed if we haven't processed - // this Phi before - } else if (n->is_Initialize()) { - // we don't need to do anything, but the users of the memory projection must be pushed - n = n->as_Initialize()->proj_out(TypeFunc::Memory); + if (n->is_Phi() || n->is_ClearArray()) { + // we don't need to do anything, but the users must be pushed + } else if (n->is_MemBar()) { // Initialize, MemBar nodes + // we don't need to do anything, but the users must be pushed + n = n->as_MemBar()->proj_out(TypeFunc::Memory); if (n == NULL) continue; } else { @@ -1161,6 +1268,10 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) return; } if (mem != n->in(MemNode::Memory)) { + // We delay the memory edge update since we need old one in + // MergeMem code below when instances memory slices are separated. + debug_only(Node* pn = ptnode_adr(n->_idx)->_node;) + assert(pn == NULL || pn == n, "wrong node"); set_map(n->_idx, mem); ptnode_adr(n->_idx)->_node = n; } @@ -1181,36 +1292,55 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) // push user on appropriate worklist for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node *use = n->fast_out(i); - if (use->is_Phi()) { + if (use->is_Phi() || use->is_ClearArray()) { memnode_worklist.append_if_missing(use); } else if(use->is_Mem() && use->in(MemNode::Memory) == n) { + if (use->Opcode() == Op_StoreCM) // Ignore cardmark stores + continue; memnode_worklist.append_if_missing(use); - } else if (use->is_Initialize()) { + } else if (use->is_MemBar()) { memnode_worklist.append_if_missing(use); +#ifdef ASSERT + } else if(use->is_Mem()) { + assert(use->in(MemNode::Memory) != n, "EA: missing memory path"); } else if (use->is_MergeMem()) { - mergemem_worklist.append_if_missing(use); + assert(_mergemem_worklist.contains(use->as_MergeMem()), "EA: missing MergeMem node in the worklist"); + } else { + uint op = use->Opcode(); + if (!(op == Op_StoreCM || + (op == Op_CallLeaf && use->as_CallLeaf()->_name != NULL && + strcmp(use->as_CallLeaf()->_name, "g1_wb_pre") == 0) || + op == Op_AryEq || op == Op_StrComp || + op == Op_StrEquals || op == Op_StrIndexOf)) { + n->dump(); + use->dump(); + assert(false, "EA: missing memory path"); + } +#endif } } } // Phase 3: Process MergeMem nodes from mergemem_worklist. - // Walk each memory moving the first node encountered of each + // Walk each memory slice moving the first node encountered of each // instance type to the the input corresponding to its alias index. - while (mergemem_worklist.length() != 0) { - Node *n = mergemem_worklist.pop(); - assert(n->is_MergeMem(), "MergeMem node required."); - if (visited.test_set(n->_idx)) - continue; - MergeMemNode *nmm = n->as_MergeMem(); + uint length = _mergemem_worklist.length(); + for( uint next = 0; next < length; ++next ) { + MergeMemNode* nmm = _mergemem_worklist.at(next); + assert(!visited.test_set(nmm->_idx), "should not be visited before"); // Note: we don't want to use MergeMemStream here because we only want to - // scan inputs which exist at the start, not ones we add during processing. - uint nslices = nmm->req(); + // scan inputs which exist at the start, not ones we add during processing. + // Note 2: MergeMem may already contains instance memory slices added + // during find_inst_mem() call when memory nodes were processed above. igvn->hash_delete(nmm); + uint nslices = nmm->req(); for (uint i = Compile::AliasIdxRaw+1; i < nslices; i++) { Node* mem = nmm->in(i); Node* cur = NULL; if (mem == NULL || mem->is_top()) continue; + // First, update mergemem by moving memory nodes to corresponding slices + // if their type became more precise since this mergemem was created. while (mem->is_Mem()) { const Type *at = igvn->type(mem->in(MemNode::Address)); if (at != Type::TOP) { @@ -1229,7 +1359,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) } nmm->set_memory_at(i, (cur != NULL) ? cur : mem); // Find any instance of the current type if we haven't encountered - // a value of the instance along the chain. + // already a memory slice of the instance along the memory chain. for (uint ni = new_index_start; ni < new_index_end; ni++) { if((uint)_compile->get_general_index(ni) == i) { Node *m = (ni >= nmm->req()) ? nmm->empty_memory() : nmm->in(ni); @@ -1245,11 +1375,11 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) } // Find the rest of instances values for (uint ni = new_index_start; ni < new_index_end; ni++) { - const TypeOopPtr *tinst = igvn->C->get_adr_type(ni)->isa_oopptr(); + const TypeOopPtr *tinst = _compile->get_adr_type(ni)->isa_oopptr(); Node* result = step_through_mergemem(nmm, ni, tinst); if (result == nmm->base_memory()) { // Didn't find instance memory, search through general slice recursively. - result = nmm->memory_at(igvn->C->get_general_index(ni)); + result = nmm->memory_at(_compile->get_general_index(ni)); result = find_inst_mem(result, ni, orig_phis, igvn); if (_compile->failing()) { return; @@ -1259,41 +1389,6 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) } igvn->hash_insert(nmm); record_for_optimizer(nmm); - - // Propagate new memory slices to following MergeMem nodes. - for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { - Node *use = n->fast_out(i); - if (use->is_Call()) { - CallNode* in = use->as_Call(); - if (in->proj_out(TypeFunc::Memory) != NULL) { - Node* m = in->proj_out(TypeFunc::Memory); - for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) { - Node* mm = m->fast_out(j); - if (mm->is_MergeMem()) { - mergemem_worklist.append_if_missing(mm); - } - } - } - if (use->is_Allocate()) { - use = use->as_Allocate()->initialization(); - if (use == NULL) { - continue; - } - } - } - if (use->is_Initialize()) { - InitializeNode* in = use->as_Initialize(); - if (in->proj_out(TypeFunc::Memory) != NULL) { - Node* m = in->proj_out(TypeFunc::Memory); - for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) { - Node* mm = m->fast_out(j); - if (mm->is_MergeMem()) { - mergemem_worklist.append_if_missing(mm); - } - } - } - } - } } // Phase 4: Update the inputs of non-instance memory Phis and @@ -1322,19 +1417,48 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *> &alloc_worklist) } // Update the memory inputs of MemNodes with the value we computed - // in Phase 2. + // in Phase 2 and move stores memory users to corresponding memory slices. +#ifdef ASSERT + visited.Clear(); + Node_Stack old_mems(arena, _compile->unique() >> 2); +#endif for (uint i = 0; i < nodes_size(); i++) { Node *nmem = get_map(i); if (nmem != NULL) { Node *n = ptnode_adr(i)->_node; - if (n != NULL && n->is_Mem()) { + assert(n != NULL, "sanity"); + if (n->is_Mem()) { +#ifdef ASSERT + Node* old_mem = n->in(MemNode::Memory); + if (!visited.test_set(old_mem->_idx)) { + old_mems.push(old_mem, old_mem->outcnt()); + } +#endif + assert(n->in(MemNode::Memory) != nmem, "sanity"); + if (!n->is_Load()) { + // Move memory users of a store first. + move_inst_mem(n, orig_phis, igvn); + } + // Now update memory input igvn->hash_delete(n); n->set_req(MemNode::Memory, nmem); igvn->hash_insert(n); record_for_optimizer(n); + } else { + assert(n->is_Allocate() || n->is_CheckCastPP() || + n->is_AddP() || n->is_Phi(), "unknown node used for set_map()"); } } } +#ifdef ASSERT + // Verify that memory was split correctly + while (old_mems.is_nonempty()) { + Node* old_mem = old_mems.node(); + uint old_cnt = old_mems.index(); + old_mems.pop(); + assert(old_cnt = old_mem->outcnt(), "old mem could be lost"); + } +#endif } bool ConnectionGraph::has_candidates(Compile *C) { @@ -1381,8 +1505,20 @@ bool ConnectionGraph::compute_escape() { ptnode_adr(n->_idx)->node_type() == PointsToNode::JavaObject) { has_allocations = true; } - if(n->is_AddP()) - cg_worklist.append(n->_idx); + if(n->is_AddP()) { + // Collect address nodes which directly reference an allocation. + // Use them during stage 3 below to build initial connection graph + // field edges. Other field edges could be added after StoreP/LoadP + // nodes are processed during stage 4 below. + Node* base = get_addp_base(n); + if(base->is_Proj() && base->in(0)->is_Allocate()) { + cg_worklist.append(n->_idx); + } + } else if (n->is_MergeMem()) { + // Collect all MergeMem nodes to add memory slices for + // scalar replaceable objects in split_unique_types(). + _mergemem_worklist.append(n->as_MergeMem()); + } for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node* m = n->fast_out(i); // Get user worklist_init.push(m); @@ -1423,12 +1559,13 @@ bool ConnectionGraph::compute_escape() { } } - VectorSet ptset(Thread::current()->resource_area()); + Arena* arena = Thread::current()->resource_area(); + VectorSet ptset(arena); GrowableArray<uint> deferred_edges; - VectorSet visited(Thread::current()->resource_area()); + VectorSet visited(arena); - // 5. Remove deferred edges from the graph and collect - // information needed for type splitting. + // 5. Remove deferred edges from the graph and adjust + // escape state of nonescaping objects. cg_length = cg_worklist.length(); for( uint next = 0; next < cg_length; ++next ) { int ni = cg_worklist.at(next); @@ -1438,98 +1575,9 @@ bool ConnectionGraph::compute_escape() { remove_deferred(ni, &deferred_edges, &visited); Node *n = ptn->_node; if (n->is_AddP()) { - // Search for objects which are not scalar replaceable. - // Mark their escape state as ArgEscape to propagate the state - // to referenced objects. - // Note: currently there are no difference in compiler optimizations - // for ArgEscape objects and NoEscape objects which are not - // scalar replaceable. - - int offset = ptn->offset(); - Node *base = get_addp_base(n); - ptset.Clear(); - PointsTo(ptset, base, igvn); - int ptset_size = ptset.Size(); - - // Check if a field's initializing value is recorded and add - // a corresponding NULL field's value if it is not recorded. - // Connection Graph does not record a default initialization by NULL - // captured by Initialize node. - // - // Note: it will disable scalar replacement in some cases: - // - // Point p[] = new Point[1]; - // p[0] = new Point(); // Will be not scalar replaced - // - // but it will save us from incorrect optimizations in next cases: - // - // Point p[] = new Point[1]; - // if ( x ) p[0] = new Point(); // Will be not scalar replaced - // - // Without a control flow analysis we can't distinguish above cases. - // - if (offset != Type::OffsetBot && ptset_size == 1) { - uint elem = ptset.getelem(); // Allocation node's index - // It does not matter if it is not Allocation node since - // only non-escaping allocations are scalar replaced. - if (ptnode_adr(elem)->_node->is_Allocate() && - ptnode_adr(elem)->escape_state() == PointsToNode::NoEscape) { - AllocateNode* alloc = ptnode_adr(elem)->_node->as_Allocate(); - InitializeNode* ini = alloc->initialization(); - Node* value = NULL; - if (ini != NULL) { - BasicType ft = UseCompressedOops ? T_NARROWOOP : T_OBJECT; - Node* store = ini->find_captured_store(offset, type2aelembytes(ft), igvn); - if (store != NULL && store->is_Store()) - value = store->in(MemNode::ValueIn); - } - if (value == NULL || value != ptnode_adr(value->_idx)->_node) { - // A field's initializing value was not recorded. Add NULL. - uint null_idx = UseCompressedOops ? _noop_null : _oop_null; - add_pointsto_edge(ni, null_idx); - } - } - } - - // An object is not scalar replaceable if the field which may point - // to it has unknown offset (unknown element of an array of objects). - // - if (offset == Type::OffsetBot) { - uint e_cnt = ptn->edge_count(); - for (uint ei = 0; ei < e_cnt; ei++) { - uint npi = ptn->edge_target(ei); - set_escape_state(npi, PointsToNode::ArgEscape); - ptnode_adr(npi)->_scalar_replaceable = false; - } - } - - // Currently an object is not scalar replaceable if a LoadStore node - // access its field since the field value is unknown after it. - // - bool has_LoadStore = false; - for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { - Node *use = n->fast_out(i); - if (use->is_LoadStore()) { - has_LoadStore = true; - break; - } - } - // An object is not scalar replaceable if the address points - // to unknown field (unknown element for arrays, offset is OffsetBot). - // - // Or the address may point to more then one object. This may produce - // the false positive result (set scalar_replaceable to false) - // since the flow-insensitive escape analysis can't separate - // the case when stores overwrite the field's value from the case - // when stores happened on different control branches. - // - if (ptset_size > 1 || ptset_size != 0 && - (has_LoadStore || offset == Type::OffsetBot)) { - for( VectorSetI j(&ptset); j.test(); ++j ) { - set_escape_state(j.elem, PointsToNode::ArgEscape); - ptnode_adr(j.elem)->_scalar_replaceable = false; - } - } + // Search for objects which are not scalar replaceable + // and adjust their escape state. + verify_escape_state(ni, ptset, igvn); } } } @@ -1646,6 +1694,150 @@ bool ConnectionGraph::compute_escape() { return has_non_escaping_obj; } +// Search for objects which are not scalar replaceable. +void ConnectionGraph::verify_escape_state(int nidx, VectorSet& ptset, PhaseTransform* phase) { + PointsToNode* ptn = ptnode_adr(nidx); + Node* n = ptn->_node; + assert(n->is_AddP(), "Should be called for AddP nodes only"); + // Search for objects which are not scalar replaceable. + // Mark their escape state as ArgEscape to propagate the state + // to referenced objects. + // Note: currently there are no difference in compiler optimizations + // for ArgEscape objects and NoEscape objects which are not + // scalar replaceable. + + Compile* C = _compile; + + int offset = ptn->offset(); + Node* base = get_addp_base(n); + ptset.Clear(); + PointsTo(ptset, base, phase); + int ptset_size = ptset.Size(); + + // Check if a oop field's initializing value is recorded and add + // a corresponding NULL field's value if it is not recorded. + // Connection Graph does not record a default initialization by NULL + // captured by Initialize node. + // + // Note: it will disable scalar replacement in some cases: + // + // Point p[] = new Point[1]; + // p[0] = new Point(); // Will be not scalar replaced + // + // but it will save us from incorrect optimizations in next cases: + // + // Point p[] = new Point[1]; + // if ( x ) p[0] = new Point(); // Will be not scalar replaced + // + // Do a simple control flow analysis to distinguish above cases. + // + if (offset != Type::OffsetBot && ptset_size == 1) { + uint elem = ptset.getelem(); // Allocation node's index + // It does not matter if it is not Allocation node since + // only non-escaping allocations are scalar replaced. + if (ptnode_adr(elem)->_node->is_Allocate() && + ptnode_adr(elem)->escape_state() == PointsToNode::NoEscape) { + AllocateNode* alloc = ptnode_adr(elem)->_node->as_Allocate(); + InitializeNode* ini = alloc->initialization(); + + // Check only oop fields. + const Type* adr_type = n->as_AddP()->bottom_type(); + BasicType basic_field_type = T_INT; + if (adr_type->isa_instptr()) { + ciField* field = C->alias_type(adr_type->isa_instptr())->field(); + if (field != NULL) { + basic_field_type = field->layout_type(); + } else { + // Ignore non field load (for example, klass load) + } + } else if (adr_type->isa_aryptr()) { + const Type* elemtype = adr_type->isa_aryptr()->elem(); + basic_field_type = elemtype->array_element_basic_type(); + } else { + // Raw pointers are used for initializing stores so skip it. + assert(adr_type->isa_rawptr() && base->is_Proj() && + (base->in(0) == alloc),"unexpected pointer type"); + } + if (basic_field_type == T_OBJECT || + basic_field_type == T_NARROWOOP || + basic_field_type == T_ARRAY) { + Node* value = NULL; + if (ini != NULL) { + BasicType ft = UseCompressedOops ? T_NARROWOOP : T_OBJECT; + Node* store = ini->find_captured_store(offset, type2aelembytes(ft), phase); + if (store != NULL && store->is_Store()) { + value = store->in(MemNode::ValueIn); + } else if (ptn->edge_count() > 0) { // Are there oop stores? + // Check for a store which follows allocation without branches. + // For example, a volatile field store is not collected + // by Initialize node. TODO: it would be nice to use idom() here. + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + store = n->fast_out(i); + if (store->is_Store() && store->in(0) != NULL) { + Node* ctrl = store->in(0); + while(!(ctrl == ini || ctrl == alloc || ctrl == NULL || + ctrl == C->root() || ctrl == C->top() || ctrl->is_Region() || + ctrl->is_IfTrue() || ctrl->is_IfFalse())) { + ctrl = ctrl->in(0); + } + if (ctrl == ini || ctrl == alloc) { + value = store->in(MemNode::ValueIn); + break; + } + } + } + } + } + if (value == NULL || value != ptnode_adr(value->_idx)->_node) { + // A field's initializing value was not recorded. Add NULL. + uint null_idx = UseCompressedOops ? _noop_null : _oop_null; + add_pointsto_edge(nidx, null_idx); + } + } + } + } + + // An object is not scalar replaceable if the field which may point + // to it has unknown offset (unknown element of an array of objects). + // + if (offset == Type::OffsetBot) { + uint e_cnt = ptn->edge_count(); + for (uint ei = 0; ei < e_cnt; ei++) { + uint npi = ptn->edge_target(ei); + set_escape_state(npi, PointsToNode::ArgEscape); + ptnode_adr(npi)->_scalar_replaceable = false; + } + } + + // Currently an object is not scalar replaceable if a LoadStore node + // access its field since the field value is unknown after it. + // + bool has_LoadStore = false; + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node *use = n->fast_out(i); + if (use->is_LoadStore()) { + has_LoadStore = true; + break; + } + } + // An object is not scalar replaceable if the address points + // to unknown field (unknown element for arrays, offset is OffsetBot). + // + // Or the address may point to more then one object. This may produce + // the false positive result (set scalar_replaceable to false) + // since the flow-insensitive escape analysis can't separate + // the case when stores overwrite the field's value from the case + // when stores happened on different control branches. + // + if (ptset_size > 1 || ptset_size != 0 && + (has_LoadStore || offset == Type::OffsetBot)) { + for( VectorSetI j(&ptset); j.test(); ++j ) { + set_escape_state(j.elem, PointsToNode::ArgEscape); + ptnode_adr(j.elem)->_scalar_replaceable = false; + } + } +} + void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *phase) { switch (call->Opcode()) { @@ -1657,6 +1849,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *pha assert(false, "should be done already"); break; #endif + case Op_CallLeaf: case Op_CallLeafNoFP: { // Stub calls, objects do not escape but they are not scale replaceable. @@ -1667,9 +1860,23 @@ void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *pha const Type* at = d->field_at(i); Node *arg = call->in(i)->uncast(); const Type *aat = phase->type(arg); - if (!arg->is_top() && at->isa_ptr() && aat->isa_ptr()) { + if (!arg->is_top() && at->isa_ptr() && aat->isa_ptr() && + ptnode_adr(arg->_idx)->escape_state() < PointsToNode::ArgEscape) { + assert(aat == Type::TOP || aat == TypePtr::NULL_PTR || aat->isa_ptr() != NULL, "expecting an Ptr"); +#ifdef ASSERT + if (!(call->Opcode() == Op_CallLeafNoFP && + call->as_CallLeaf()->_name != NULL && + (strstr(call->as_CallLeaf()->_name, "arraycopy") != 0) || + call->as_CallLeaf()->_name != NULL && + (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 || + strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 )) + ) { + call->dump(); + assert(false, "EA: unexpected CallLeaf"); + } +#endif set_escape_state(arg->_idx, PointsToNode::ArgEscape); if (arg->is_AddP()) { // @@ -1706,9 +1913,10 @@ void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *pha for (uint i = TypeFunc::Parms; i < d->cnt(); i++) { const Type* at = d->field_at(i); int k = i - TypeFunc::Parms; + Node *arg = call->in(i)->uncast(); - if (at->isa_oopptr() != NULL) { - Node *arg = call->in(i)->uncast(); + if (at->isa_oopptr() != NULL && + ptnode_adr(arg->_idx)->escape_state() < PointsToNode::ArgEscape) { bool global_escapes = false; bool fields_escapes = false; @@ -1942,20 +2150,23 @@ void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase) record_for_optimizer(n); _processed.set(n->_idx); } else { - // Have to process call's arguments first. + // Don't mark as processed since call's arguments have to be processed. PointsToNode::NodeType nt = PointsToNode::UnknownType; + PointsToNode::EscapeState es = PointsToNode::UnknownEscape; // Check if a call returns an object. const TypeTuple *r = n->as_Call()->tf()->range(); - if (n->is_CallStaticJava() && r->cnt() > TypeFunc::Parms && + if (r->cnt() > TypeFunc::Parms && + r->field_at(TypeFunc::Parms)->isa_ptr() && n->as_Call()->proj_out(TypeFunc::Parms) != NULL) { - // Note: use isa_ptr() instead of isa_oopptr() here because - // the _multianewarray functions return a TypeRawPtr. - if (r->field_at(TypeFunc::Parms)->isa_ptr() != NULL) { - nt = PointsToNode::JavaObject; + nt = PointsToNode::JavaObject; + if (!n->is_CallStaticJava()) { + // Since the called mathod is statically unknown assume + // the worst case that the returned value globally escapes. + es = PointsToNode::GlobalEscape; } } - add_node(n, nt, PointsToNode::UnknownEscape, false); + add_node(n, nt, es, false); } return; } @@ -2088,18 +2299,27 @@ void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase) } case Op_Proj: { - // we are only interested in the result projection from a call + // we are only interested in the oop result projection from a call if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() ) { - add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false); - process_call_result(n->as_Proj(), phase); - if (!_processed.test(n->_idx)) { - // The call's result may need to be processed later if the call - // returns it's argument and the argument is not processed yet. - _delayed_worklist.push(n); + const TypeTuple *r = n->in(0)->as_Call()->tf()->range(); + assert(r->cnt() > TypeFunc::Parms, "sanity"); + if (r->field_at(TypeFunc::Parms)->isa_ptr() != NULL) { + add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false); + int ti = n->in(0)->_idx; + // The call may not be registered yet (since not all its inputs are registered) + // if this is the projection from backbranch edge of Phi. + if (ptnode_adr(ti)->node_type() != PointsToNode::UnknownType) { + process_call_result(n->as_Proj(), phase); + } + if (!_processed.test(n->_idx)) { + // The call's result may need to be processed later if the call + // returns it's argument and the argument is not processed yet. + _delayed_worklist.push(n); + } + break; } - } else { - _processed.set(n->_idx); } + _processed.set(n->_idx); break; } case Op_Return: @@ -2160,6 +2380,15 @@ void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase) } break; } + case Op_AryEq: + case Op_StrComp: + case Op_StrEquals: + case Op_StrIndexOf: + { + // char[] arrays passed to string intrinsics are not scalar replaceable. + add_node(n, PointsToNode::UnknownType, PointsToNode::UnknownEscape, false); + break; + } case Op_ThreadLocal: { add_node(n, PointsToNode::JavaObject, PointsToNode::ArgEscape, true); @@ -2174,6 +2403,7 @@ void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase) void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) { uint n_idx = n->_idx; + assert(ptnode_adr(n_idx)->_node != NULL, "node should be registered"); // Don't set processed bit for AddP, LoadP, StoreP since // they may need more then one pass to process. @@ -2211,6 +2441,7 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) { case Op_DecodeN: { int ti = n->in(1)->_idx; + assert(ptnode_adr(ti)->node_type() != PointsToNode::UnknownType, "all nodes should be registered"); if (ptnode_adr(ti)->node_type() == PointsToNode::JavaObject) { add_pointsto_edge(n_idx, ti); } else { @@ -2250,7 +2481,6 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) { #endif Node* adr = n->in(MemNode::Address)->uncast(); - const Type *adr_type = phase->type(adr); Node* adr_base; if (adr->is_AddP()) { adr_base = get_addp_base(adr); @@ -2302,13 +2532,19 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) { } case Op_Proj: { - // we are only interested in the result projection from a call + // we are only interested in the oop result projection from a call if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() ) { - process_call_result(n->as_Proj(), phase); - assert(_processed.test(n_idx), "all call results should be processed"); - } else { - assert(false, "Op_Proj"); + assert(ptnode_adr(n->in(0)->_idx)->node_type() != PointsToNode::UnknownType, + "all nodes should be registered"); + const TypeTuple *r = n->in(0)->as_Call()->tf()->range(); + assert(r->cnt() > TypeFunc::Parms, "sanity"); + if (r->field_at(TypeFunc::Parms)->isa_ptr() != NULL) { + process_call_result(n->as_Proj(), phase); + assert(_processed.test(n_idx), "all call results should be processed"); + break; + } } + assert(false, "Op_Proj"); break; } case Op_Return: @@ -2320,6 +2556,7 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) { } #endif int ti = n->in(TypeFunc::Parms)->_idx; + assert(ptnode_adr(ti)->node_type() != PointsToNode::UnknownType, "node should be registered"); if (ptnode_adr(ti)->node_type() == PointsToNode::JavaObject) { add_pointsto_edge(n_idx, ti); } else { @@ -2354,14 +2591,38 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) { } break; } + case Op_AryEq: + case Op_StrComp: + case Op_StrEquals: + case Op_StrIndexOf: + { + // char[] arrays passed to string intrinsic do not escape but + // they are not scalar replaceable. Adjust escape state for them. + // Start from in(2) edge since in(1) is memory edge. + for (uint i = 2; i < n->req(); i++) { + Node* adr = n->in(i)->uncast(); + const Type *at = phase->type(adr); + if (!adr->is_top() && at->isa_ptr()) { + assert(at == Type::TOP || at == TypePtr::NULL_PTR || + at->isa_ptr() != NULL, "expecting an Ptr"); + if (adr->is_AddP()) { + adr = get_addp_base(adr); + } + // Mark as ArgEscape everything "adr" could point to. + set_escape_state(adr->_idx, PointsToNode::ArgEscape); + } + } + _processed.set(n_idx); + break; + } case Op_ThreadLocal: { assert(false, "Op_ThreadLocal"); break; } default: - ; - // nothing to do + // This method should be called only for EA specific nodes. + ShouldNotReachHere(); } } diff --git a/src/share/vm/opto/escape.hpp b/src/share/vm/opto/escape.hpp index 1ce0cc9cf..576043beb 100644 --- a/src/share/vm/opto/escape.hpp +++ b/src/share/vm/opto/escape.hpp @@ -210,6 +210,8 @@ private: Unique_Node_List _delayed_worklist; // Nodes to be processed before // the call build_connection_graph(). + GrowableArray<MergeMemNode *> _mergemem_worklist; // List of all MergeMem nodes + VectorSet _processed; // Records which nodes have been // processed. @@ -289,7 +291,7 @@ private: bool split_AddP(Node *addp, Node *base, PhaseGVN *igvn); PhiNode *create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist, PhaseGVN *igvn, bool &new_created); PhiNode *split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *> &orig_phi_worklist, PhaseGVN *igvn); - Node *find_mem(Node *mem, int alias_idx, PhaseGVN *igvn); + void move_inst_mem(Node* n, GrowableArray<PhiNode *> &orig_phis, PhaseGVN *igvn); Node *find_inst_mem(Node *mem, int alias_idx,GrowableArray<PhiNode *> &orig_phi_worklist, PhaseGVN *igvn); // Propagate unique types created for unescaped allocated objects @@ -298,7 +300,6 @@ private: // manage entries in _node_map void set_map(int idx, Node *n) { _node_map.map(idx, n); } - void set_map_phi(int idx, PhiNode *p) { _node_map.map(idx, (Node *) p); } Node *get_map(int idx) { return _node_map[idx]; } PhiNode *get_map_phi(int idx) { Node *phi = _node_map[idx]; @@ -315,6 +316,9 @@ private: // Set the escape state of a node void set_escape_state(uint ni, PointsToNode::EscapeState es); + // Search for objects which are not scalar replaceable. + void verify_escape_state(int nidx, VectorSet& ptset, PhaseTransform* phase); + public: ConnectionGraph(Compile *C); diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp index b63aae489..3a42be98b 100644 --- a/src/share/vm/opto/graphKit.cpp +++ b/src/share/vm/opto/graphKit.cpp @@ -1351,8 +1351,8 @@ void GraphKit::set_all_memory(Node* newmem) { } //------------------------------set_all_memory_call---------------------------- -void GraphKit::set_all_memory_call(Node* call) { - Node* newmem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) ); +void GraphKit::set_all_memory_call(Node* call, bool separate_io_proj) { + Node* newmem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory, separate_io_proj) ); set_all_memory(newmem); } @@ -1573,7 +1573,7 @@ void GraphKit::set_arguments_for_java_call(CallJavaNode* call) { //---------------------------set_edges_for_java_call--------------------------- // Connect a newly created call into the current JVMS. // A return value node (if any) is returned from set_edges_for_java_call. -void GraphKit::set_edges_for_java_call(CallJavaNode* call, bool must_throw) { +void GraphKit::set_edges_for_java_call(CallJavaNode* call, bool must_throw, bool separate_io_proj) { // Add the predefined inputs: call->init_req( TypeFunc::Control, control() ); @@ -1595,13 +1595,13 @@ void GraphKit::set_edges_for_java_call(CallJavaNode* call, bool must_throw) { // Re-use the current map to produce the result. set_control(_gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Control))); - set_i_o( _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::I_O ))); - set_all_memory_call(xcall); + set_i_o( _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::I_O , separate_io_proj))); + set_all_memory_call(xcall, separate_io_proj); //return xcall; // no need, caller already has it } -Node* GraphKit::set_results_for_java_call(CallJavaNode* call) { +Node* GraphKit::set_results_for_java_call(CallJavaNode* call, bool separate_io_proj) { if (stopped()) return top(); // maybe the call folded up? // Capture the return value, if any. @@ -1614,8 +1614,15 @@ Node* GraphKit::set_results_for_java_call(CallJavaNode* call) { // Note: Since any out-of-line call can produce an exception, // we always insert an I_O projection from the call into the result. - make_slow_call_ex(call, env()->Throwable_klass(), false); + make_slow_call_ex(call, env()->Throwable_klass(), separate_io_proj); + if (separate_io_proj) { + // The caller requested separate projections be used by the fall + // through and exceptional paths, so replace the projections for + // the fall through path. + set_i_o(_gvn.transform( new (C, 1) ProjNode(call, TypeFunc::I_O) )); + set_all_memory(_gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) )); + } return ret; } @@ -1678,6 +1685,64 @@ void GraphKit::set_predefined_output_for_runtime_call(Node* call, } } + +// Replace the call with the current state of the kit. +void GraphKit::replace_call(CallNode* call, Node* result) { + JVMState* ejvms = NULL; + if (has_exceptions()) { + ejvms = transfer_exceptions_into_jvms(); + } + + SafePointNode* final_state = stop(); + + // Find all the needed outputs of this call + CallProjections callprojs; + call->extract_projections(&callprojs, true); + + // Replace all the old call edges with the edges from the inlining result + C->gvn_replace_by(callprojs.fallthrough_catchproj, final_state->in(TypeFunc::Control)); + C->gvn_replace_by(callprojs.fallthrough_memproj, final_state->in(TypeFunc::Memory)); + C->gvn_replace_by(callprojs.fallthrough_ioproj, final_state->in(TypeFunc::I_O)); + + // Replace the result with the new result if it exists and is used + if (callprojs.resproj != NULL && result != NULL) { + C->gvn_replace_by(callprojs.resproj, result); + } + + if (ejvms == NULL) { + // No exception edges to simply kill off those paths + C->gvn_replace_by(callprojs.catchall_catchproj, C->top()); + C->gvn_replace_by(callprojs.catchall_memproj, C->top()); + C->gvn_replace_by(callprojs.catchall_ioproj, C->top()); + + // Replace the old exception object with top + if (callprojs.exobj != NULL) { + C->gvn_replace_by(callprojs.exobj, C->top()); + } + } else { + GraphKit ekit(ejvms); + + // Load my combined exception state into the kit, with all phis transformed: + SafePointNode* ex_map = ekit.combine_and_pop_all_exception_states(); + + Node* ex_oop = ekit.use_exception_state(ex_map); + + C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control()); + C->gvn_replace_by(callprojs.catchall_memproj, ekit.reset_memory()); + C->gvn_replace_by(callprojs.catchall_ioproj, ekit.i_o()); + + // Replace the old exception object with the newly created one + if (callprojs.exobj != NULL) { + C->gvn_replace_by(callprojs.exobj, ex_oop); + } + } + + // Disconnect the call from the graph + call->disconnect_inputs(NULL); + C->gvn_replace_by(call, C->top()); +} + + //------------------------------increment_counter------------------------------ // for statistics: increment a VM counter by 1 @@ -3459,4 +3524,3 @@ void GraphKit::g1_write_barrier_post(Node* oop_store, sync_kit(ideal); } #undef __ - diff --git a/src/share/vm/opto/graphKit.hpp b/src/share/vm/opto/graphKit.hpp index b127789b5..8135aca2d 100644 --- a/src/share/vm/opto/graphKit.hpp +++ b/src/share/vm/opto/graphKit.hpp @@ -279,6 +279,34 @@ class GraphKit : public Phase { } Node* basic_plus_adr(Node* base, Node* ptr, Node* offset); + + // Some convenient shortcuts for common nodes + Node* IfTrue(IfNode* iff) { return _gvn.transform(new (C,1) IfTrueNode(iff)); } + Node* IfFalse(IfNode* iff) { return _gvn.transform(new (C,1) IfFalseNode(iff)); } + + Node* AddI(Node* l, Node* r) { return _gvn.transform(new (C,3) AddINode(l, r)); } + Node* SubI(Node* l, Node* r) { return _gvn.transform(new (C,3) SubINode(l, r)); } + Node* MulI(Node* l, Node* r) { return _gvn.transform(new (C,3) MulINode(l, r)); } + Node* DivI(Node* ctl, Node* l, Node* r) { return _gvn.transform(new (C,3) DivINode(ctl, l, r)); } + + Node* AndI(Node* l, Node* r) { return _gvn.transform(new (C,3) AndINode(l, r)); } + Node* OrI(Node* l, Node* r) { return _gvn.transform(new (C,3) OrINode(l, r)); } + Node* XorI(Node* l, Node* r) { return _gvn.transform(new (C,3) XorINode(l, r)); } + + Node* MaxI(Node* l, Node* r) { return _gvn.transform(new (C,3) MaxINode(l, r)); } + Node* MinI(Node* l, Node* r) { return _gvn.transform(new (C,3) MinINode(l, r)); } + + Node* LShiftI(Node* l, Node* r) { return _gvn.transform(new (C,3) LShiftINode(l, r)); } + Node* RShiftI(Node* l, Node* r) { return _gvn.transform(new (C,3) RShiftINode(l, r)); } + Node* URShiftI(Node* l, Node* r) { return _gvn.transform(new (C,3) URShiftINode(l, r)); } + + Node* CmpI(Node* l, Node* r) { return _gvn.transform(new (C,3) CmpINode(l, r)); } + Node* CmpL(Node* l, Node* r) { return _gvn.transform(new (C,3) CmpLNode(l, r)); } + Node* CmpP(Node* l, Node* r) { return _gvn.transform(new (C,3) CmpPNode(l, r)); } + Node* Bool(Node* cmp, BoolTest::mask relop) { return _gvn.transform(new (C,2) BoolNode(cmp, relop)); } + + Node* AddP(Node* b, Node* a, Node* o) { return _gvn.transform(new (C,4) AddPNode(b, a, o)); } + // Convert between int and long, and size_t. // (See macros ConvI2X, etc., in type.hpp for ConvI2X, etc.) Node* ConvI2L(Node* offset); @@ -400,7 +428,7 @@ class GraphKit : public Phase { void set_all_memory(Node* newmem); // Create a memory projection from the call, then set_all_memory. - void set_all_memory_call(Node* call); + void set_all_memory_call(Node* call, bool separate_io_proj = false); // Create a LoadNode, reading from the parser's memory state. // (Note: require_atomic_access is useful only with T_LONG.) @@ -543,12 +571,12 @@ class GraphKit : public Phase { // Transform the call, and update the basics: control, i_o, memory. // (The next step is usually to call set_results_for_java_call.) void set_edges_for_java_call(CallJavaNode* call, - bool must_throw = false); + bool must_throw = false, bool separate_io_proj = false); // Finish up a java call that was started by set_edges_for_java_call. // Call add_exception on any throw arising from the call. // Return the call result (transformed). - Node* set_results_for_java_call(CallJavaNode* call); + Node* set_results_for_java_call(CallJavaNode* call, bool separate_io_proj = false); // Similar to set_edges_for_java_call, but simplified for runtime calls. void set_predefined_output_for_runtime_call(Node* call) { @@ -559,6 +587,11 @@ class GraphKit : public Phase { const TypePtr* hook_mem); Node* set_predefined_input_for_runtime_call(SafePointNode* call); + // Replace the call with the current state of the kit. Requires + // that the call was generated with separate io_projs so that + // exceptional control flow can be handled properly. + void replace_call(CallNode* call, Node* result); + // helper functions for statistics void increment_counter(address counter_addr); // increment a debug counter void increment_counter(Node* counter_addr); // increment a debug counter diff --git a/src/share/vm/opto/lcm.cpp b/src/share/vm/opto/lcm.cpp index 31de55a54..0afe80c8f 100644 --- a/src/share/vm/opto/lcm.cpp +++ b/src/share/vm/opto/lcm.cpp @@ -616,8 +616,9 @@ bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, Vect assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark"); } } - if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire && - n->req() > TypeFunc::Parms ) { + if( n->is_Mach() && n->req() > TypeFunc::Parms && + (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire || + n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) { // MemBarAcquire could be created without Precedent edge. // del_req() replaces the specified edge with the last input edge // and then removes the last edge. If the specified edge > number of diff --git a/src/share/vm/opto/loopnode.cpp b/src/share/vm/opto/loopnode.cpp index a1d872252..883fa75c7 100644 --- a/src/share/vm/opto/loopnode.cpp +++ b/src/share/vm/opto/loopnode.cpp @@ -1279,7 +1279,8 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) { // Visit all children, looking for Phis for (DUIterator i = cl->outs(); cl->has_out(i); i++) { Node *out = cl->out(i); - if (!out->is_Phi() || out == phi) continue; // Looking for other phis + // Look for other phis (secondary IVs). Skip dead ones + if (!out->is_Phi() || out == phi || !phase->has_node(out)) continue; PhiNode* phi2 = out->as_Phi(); Node *incr2 = phi2->in( LoopNode::LoopBackControl ); // Look for induction variables of the form: X += constant diff --git a/src/share/vm/opto/macro.cpp b/src/share/vm/opto/macro.cpp index e2421a7f3..2fdc335b9 100644 --- a/src/share/vm/opto/macro.cpp +++ b/src/share/vm/opto/macro.cpp @@ -316,6 +316,21 @@ static Node *scan_mem_chain(Node *mem, int alias_idx, int offset, Node *start_me assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw"); } mem = mem->in(MemNode::Memory); + } else if (mem->is_ClearArray()) { + if (!ClearArrayNode::step_through(&mem, alloc->_idx, phase)) { + // Can not bypass initialization of the instance + // we are looking. + debug_only(intptr_t offset;) + assert(alloc == AllocateNode::Ideal_allocation(mem->in(3), phase, offset), "sanity"); + InitializeNode* init = alloc->as_Allocate()->initialization(); + // We are looking for stored value, return Initialize node + // or memory edge from Allocate node. + if (init != NULL) + return init; + else + return alloc->in(TypeFunc::Memory); // It will produce zero value (see callers). + } + // Otherwise skip it (the call updated 'mem' value). } else if (mem->Opcode() == Op_SCMemProj) { assert(mem->in(0)->is_LoadStore(), "sanity"); const TypePtr* atype = mem->in(0)->in(MemNode::Address)->bottom_type()->is_ptr(); @@ -823,6 +838,18 @@ void PhaseMacroExpand::process_users_of_allocation(AllocateNode *alloc) { Node *n = use->last_out(k); uint oc2 = use->outcnt(); if (n->is_Store()) { +#ifdef ASSERT + // Verify that there is no dependent MemBarVolatile nodes, + // they should be removed during IGVN, see MemBarNode::Ideal(). + for (DUIterator_Fast pmax, p = n->fast_outs(pmax); + p < pmax; p++) { + Node* mb = n->fast_out(p); + assert(mb->is_Initialize() || !mb->is_MemBar() || + mb->req() <= MemBarNode::Precedent || + mb->in(MemBarNode::Precedent) != n, + "MemBarVolatile should be eliminated for non-escaping object"); + } +#endif _igvn.replace_node(n, n->in(MemNode::Memory)); } else { eliminate_card_mark(n); @@ -912,15 +939,29 @@ bool PhaseMacroExpand::eliminate_allocate_node(AllocateNode *alloc) { return false; } + CompileLog* log = C->log(); + if (log != NULL) { + Node* klass = alloc->in(AllocateNode::KlassNode); + const TypeKlassPtr* tklass = _igvn.type(klass)->is_klassptr(); + log->head("eliminate_allocation type='%d'", + log->identify(tklass->klass())); + JVMState* p = alloc->jvms(); + while (p != NULL) { + log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method())); + p = p->caller(); + } + log->tail("eliminate_allocation"); + } + process_users_of_allocation(alloc); #ifndef PRODUCT -if (PrintEliminateAllocations) { - if (alloc->is_AllocateArray()) - tty->print_cr("++++ Eliminated: %d AllocateArray", alloc->_idx); - else - tty->print_cr("++++ Eliminated: %d Allocate", alloc->_idx); -} + if (PrintEliminateAllocations) { + if (alloc->is_AllocateArray()) + tty->print_cr("++++ Eliminated: %d AllocateArray", alloc->_idx); + else + tty->print_cr("++++ Eliminated: %d Allocate", alloc->_idx); + } #endif return true; @@ -1639,6 +1680,18 @@ bool PhaseMacroExpand::eliminate_locking_node(AbstractLockNode *alock) { } // if (!oldbox->is_eliminated()) } // if (alock->is_Lock() && !lock->is_coarsened()) + CompileLog* log = C->log(); + if (log != NULL) { + log->head("eliminate_lock lock='%d'", + alock->is_Lock()); + JVMState* p = alock->jvms(); + while (p != NULL) { + log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method())); + p = p->caller(); + } + log->tail("eliminate_lock"); + } + #ifndef PRODUCT if (PrintEliminateLocks) { if (alock->is_Lock()) { diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp index 02c15af72..4698add45 100644 --- a/src/share/vm/opto/memnode.cpp +++ b/src/share/vm/opto/memnode.cpp @@ -123,6 +123,13 @@ Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr, } else { assert(false, "unexpected projection"); } + } else if (result->is_ClearArray()) { + if (!ClearArrayNode::step_through(&result, instance_id, phase)) { + // Can not bypass initialization of the instance + // we are looking for. + break; + } + // Otherwise skip it (the call updated 'result' value). } else if (result->is_MergeMem()) { result = step_through_mergemem(phase, result->as_MergeMem(), t_adr, NULL, tty); } @@ -537,6 +544,15 @@ Node* MemNode::find_previous_store(PhaseTransform* phase) { } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) { mem = mem->in(0)->in(TypeFunc::Memory); continue; // (a) advance through independent MemBar memory + } else if (mem->is_ClearArray()) { + if (ClearArrayNode::step_through(&mem, (uint)addr_t->instance_id(), phase)) { + // (the call updated 'mem' value) + continue; // (a) advance through independent allocation memory + } else { + // Can not bypass initialization of the instance + // we are looking for. + return mem; + } } else if (mem->is_MergeMem()) { int alias_idx = phase->C->get_alias_index(adr_type()); mem = mem->as_MergeMem()->memory_at(alias_idx); @@ -1503,6 +1519,8 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const { } } } else if (tp->base() == Type::InstPtr) { + const TypeInstPtr* tinst = tp->is_instptr(); + ciKlass* klass = tinst->klass(); assert( off != Type::OffsetBot || // arrays can be cast to Objects tp->is_oopptr()->klass()->is_java_lang_Object() || @@ -1510,6 +1528,25 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const { phase->C->has_unsafe_access(), "Field accesses must be precise" ); // For oop loads, we expect the _type to be precise + if (OptimizeStringConcat && klass == phase->C->env()->String_klass() && + adr->is_AddP() && off != Type::OffsetBot) { + // For constant Strings treat the fields as compile time constants. + Node* base = adr->in(AddPNode::Base); + if (base->Opcode() == Op_ConP) { + const TypeOopPtr* t = phase->type(base)->isa_oopptr(); + ciObject* string = t->const_oop(); + ciConstant constant = string->as_instance()->field_value_by_offset(off); + if (constant.basic_type() == T_INT) { + return TypeInt::make(constant.as_int()); + } else if (constant.basic_type() == T_ARRAY) { + if (adr->bottom_type()->is_ptr_to_narrowoop()) { + return TypeNarrowOop::make_from_constant(constant.as_object()); + } else { + return TypeOopPtr::make_from_constant(constant.as_object()); + } + } + } + } } else if (tp->base() == Type::KlassPtr) { assert( off != Type::OffsetBot || // arrays can be cast to Objects @@ -2433,6 +2470,31 @@ Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){ return mem; } +//----------------------------step_through---------------------------------- +// Return allocation input memory edge if it is different instance +// or itself if it is the one we are looking for. +bool ClearArrayNode::step_through(Node** np, uint instance_id, PhaseTransform* phase) { + Node* n = *np; + assert(n->is_ClearArray(), "sanity"); + intptr_t offset; + AllocateNode* alloc = AllocateNode::Ideal_allocation(n->in(3), phase, offset); + // This method is called only before Allocate nodes are expanded during + // macro nodes expansion. Before that ClearArray nodes are only generated + // in LibraryCallKit::generate_arraycopy() which follows allocations. + assert(alloc != NULL, "should have allocation"); + if (alloc->_idx == instance_id) { + // Can not bypass initialization of the instance we are looking for. + return false; + } + // Otherwise skip it. + InitializeNode* init = alloc->initialization(); + if (init != NULL) + *np = init->in(TypeFunc::Memory); + else + *np = alloc->in(TypeFunc::Memory); + return true; +} + //----------------------------clear_memory------------------------------------- // Generate code to initialize object storage to zero. Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest, @@ -2606,7 +2668,30 @@ MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) { // Return a node which is more "ideal" than the current node. Strip out // control copies Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) { - return remove_dead_region(phase, can_reshape) ? this : NULL; + if (remove_dead_region(phase, can_reshape)) return this; + + // Eliminate volatile MemBars for scalar replaced objects. + if (can_reshape && req() == (Precedent+1) && + (Opcode() == Op_MemBarAcquire || Opcode() == Op_MemBarVolatile)) { + // Volatile field loads and stores. + Node* my_mem = in(MemBarNode::Precedent); + if (my_mem != NULL && my_mem->is_Mem()) { + const TypeOopPtr* t_oop = my_mem->in(MemNode::Address)->bottom_type()->isa_oopptr(); + // Check for scalar replaced object reference. + if( t_oop != NULL && t_oop->is_known_instance_field() && + t_oop->offset() != Type::OffsetBot && + t_oop->offset() != Type::OffsetTop) { + // Replace MemBar projections by its inputs. + PhaseIterGVN* igvn = phase->is_IterGVN(); + igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory)); + igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control)); + // Must return either the original node (now dead) or a new node + // (Do not return a top here, since that would break the uniqueness of top.) + return new (phase->C, 1) ConINode(TypeInt::ZERO); + } + } + } + return NULL; } //------------------------------Value------------------------------------------ diff --git a/src/share/vm/opto/memnode.hpp b/src/share/vm/opto/memnode.hpp index a71df7d40..cd0e60d97 100644 --- a/src/share/vm/opto/memnode.hpp +++ b/src/share/vm/opto/memnode.hpp @@ -717,7 +717,10 @@ public: //------------------------------ClearArray------------------------------------- class ClearArrayNode: public Node { public: - ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base ) : Node(ctrl,arymem,word_cnt,base) {} + ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base ) + : Node(ctrl,arymem,word_cnt,base) { + init_class_id(Class_ClearArray); + } virtual int Opcode() const; virtual const Type *bottom_type() const { return Type::MEMORY; } // ClearArray modifies array elements, and so affects only the @@ -743,6 +746,9 @@ public: Node* start_offset, Node* end_offset, PhaseGVN* phase); + // Return allocation input memory edge if it is different instance + // or itself if it is the one we are looking for. + static bool step_through(Node** np, uint instance_id, PhaseTransform* phase); }; //------------------------------StrComp------------------------------------- diff --git a/src/share/vm/opto/node.hpp b/src/share/vm/opto/node.hpp index bad160705..92da96b40 100644 --- a/src/share/vm/opto/node.hpp +++ b/src/share/vm/opto/node.hpp @@ -47,6 +47,7 @@ class CallStaticJavaNode; class CatchNode; class CatchProjNode; class CheckCastPPNode; +class ClearArrayNode; class CmpNode; class CodeBuffer; class ConstraintCastNode; @@ -599,8 +600,9 @@ public: DEFINE_CLASS_ID(BoxLock, Node, 10) DEFINE_CLASS_ID(Add, Node, 11) DEFINE_CLASS_ID(Mul, Node, 12) + DEFINE_CLASS_ID(ClearArray, Node, 13) - _max_classes = ClassMask_Mul + _max_classes = ClassMask_ClearArray }; #undef DEFINE_CLASS_ID @@ -661,18 +663,25 @@ public: return (_flags & Flag_is_Call) != 0; } + CallNode* isa_Call() const { + return is_Call() ? as_Call() : NULL; + } + CallNode *as_Call() const { // Only for CallNode (not for MachCallNode) assert((_class_id & ClassMask_Call) == Class_Call, "invalid node class"); return (CallNode*)this; } - #define DEFINE_CLASS_QUERY(type) \ - bool is_##type() const { \ + #define DEFINE_CLASS_QUERY(type) \ + bool is_##type() const { \ return ((_class_id & ClassMask_##type) == Class_##type); \ - } \ - type##Node *as_##type() const { \ - assert(is_##type(), "invalid node class"); \ - return (type##Node*)this; \ + } \ + type##Node *as_##type() const { \ + assert(is_##type(), "invalid node class"); \ + return (type##Node*)this; \ + } \ + type##Node* isa_##type() const { \ + return (is_##type()) ? as_##type() : NULL; \ } DEFINE_CLASS_QUERY(AbstractLock) @@ -691,6 +700,7 @@ public: DEFINE_CLASS_QUERY(CatchProj) DEFINE_CLASS_QUERY(CheckCastPP) DEFINE_CLASS_QUERY(ConstraintCast) + DEFINE_CLASS_QUERY(ClearArray) DEFINE_CLASS_QUERY(CMove) DEFINE_CLASS_QUERY(Cmp) DEFINE_CLASS_QUERY(CountedLoop) @@ -1249,6 +1259,24 @@ Node* Node::last_out(DUIterator_Last& i) const { #undef I_VDUI_ONLY #undef VDUI_ONLY +// An Iterator that truly follows the iterator pattern. Doesn't +// support deletion but could be made to. +// +// for (SimpleDUIterator i(n); i.has_next(); i.next()) { +// Node* m = i.get(); +// +class SimpleDUIterator : public StackObj { + private: + Node* node; + DUIterator_Fast i; + DUIterator_Fast imax; + public: + SimpleDUIterator(Node* n): node(n), i(n->fast_outs(imax)) {} + bool has_next() { return i < imax; } + void next() { i++; } + Node* get() { return node->fast_out(i); } +}; + //----------------------------------------------------------------------------- // Map dense integer indices to Nodes. Uses classic doubling-array trick. @@ -1290,6 +1318,12 @@ class Node_List : public Node_Array { public: Node_List() : Node_Array(Thread::current()->resource_area()), _cnt(0) {} Node_List(Arena *a) : Node_Array(a), _cnt(0) {} + bool contains(Node* n) { + for (uint e = 0; e < size(); e++) { + if (at(e) == n) return true; + } + return false; + } void insert( uint i, Node *n ) { Node_Array::insert(i,n); _cnt++; } void remove( uint i ) { Node_Array::remove(i); _cnt--; } void push( Node *b ) { map(_cnt++,b); } diff --git a/src/share/vm/opto/parse3.cpp b/src/share/vm/opto/parse3.cpp index 7125cb5d6..83a3927a5 100644 --- a/src/share/vm/opto/parse3.cpp +++ b/src/share/vm/opto/parse3.cpp @@ -240,19 +240,19 @@ void Parse::do_put_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool // membar is dependent on the store, keeping any other membars generated // below from floating up past the store. int adr_idx = C->get_alias_index(adr_type); - insert_mem_bar_volatile(Op_MemBarVolatile, adr_idx); + insert_mem_bar_volatile(Op_MemBarVolatile, adr_idx, store); // Now place a membar for AliasIdxBot for the unknown yet-to-be-parsed // volatile alias indices. Skip this if the membar is redundant. if (adr_idx != Compile::AliasIdxBot) { - insert_mem_bar_volatile(Op_MemBarVolatile, Compile::AliasIdxBot); + insert_mem_bar_volatile(Op_MemBarVolatile, Compile::AliasIdxBot, store); } // Finally, place alias-index-specific membars for each volatile index // that isn't the adr_idx membar. Typically there's only 1 or 2. for( int i = Compile::AliasIdxRaw; i < C->num_alias_types(); i++ ) { if (i != adr_idx && C->alias_type(i)->is_volatile()) { - insert_mem_bar_volatile(Op_MemBarVolatile, i); + insert_mem_bar_volatile(Op_MemBarVolatile, i, store); } } } diff --git a/src/share/vm/opto/parseHelper.cpp b/src/share/vm/opto/parseHelper.cpp index 6b3f432ea..ab7883fd8 100644 --- a/src/share/vm/opto/parseHelper.cpp +++ b/src/share/vm/opto/parseHelper.cpp @@ -221,6 +221,14 @@ void Parse::do_new() { // Push resultant oop onto stack push(obj); + + // Keep track of whether opportunities exist for StringBuilder + // optimizations. + if (OptimizeStringConcat && + (klass == C->env()->StringBuilder_klass() || + klass == C->env()->StringBuffer_klass())) { + C->set_has_stringbuilder(true); + } } #ifndef PRODUCT diff --git a/src/share/vm/opto/phase.hpp b/src/share/vm/opto/phase.hpp index d0d54e1d9..9df5500fd 100644 --- a/src/share/vm/opto/phase.hpp +++ b/src/share/vm/opto/phase.hpp @@ -44,6 +44,7 @@ public: BlockLayout, // Linear ordering of blocks Register_Allocation, // Register allocation, duh LIVE, // Dragon-book LIVE range problem + StringOpts, // StringBuilder related optimizations Interference_Graph, // Building the IFG Coalesce, // Coalescing copies Ideal_Loop, // Find idealized trip-counted loops diff --git a/src/share/vm/opto/phaseX.hpp b/src/share/vm/opto/phaseX.hpp index b9391ba1d..33ff56f0e 100644 --- a/src/share/vm/opto/phaseX.hpp +++ b/src/share/vm/opto/phaseX.hpp @@ -345,7 +345,11 @@ public: Node *hash_find(const Node *n) { return _table.hash_find(n); } // Used after parsing to eliminate values that are no longer in program - void remove_useless_nodes(VectorSet &useful) { _table.remove_useless_nodes(useful); } + void remove_useless_nodes(VectorSet &useful) { + _table.remove_useless_nodes(useful); + // this may invalidate cached cons so reset the cache + init_con_caches(); + } virtual ConNode* uncached_makecon(const Type* t); // override from PhaseTransform diff --git a/src/share/vm/opto/stringopts.cpp b/src/share/vm/opto/stringopts.cpp new file mode 100644 index 000000000..192d31f82 --- /dev/null +++ b/src/share/vm/opto/stringopts.cpp @@ -0,0 +1,1395 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +#include "incls/_precompiled.incl" +#include "incls/_stringopts.cpp.incl" + +#define __ kit. + +class StringConcat : public ResourceObj { + private: + PhaseStringOpts* _stringopts; + Node* _string_alloc; + AllocateNode* _begin; // The allocation the begins the pattern + CallStaticJavaNode* _end; // The final call of the pattern. Will either be + // SB.toString or or String.<init>(SB.toString) + bool _multiple; // indicates this is a fusion of two or more + // separate StringBuilders + + Node* _arguments; // The list of arguments to be concatenated + GrowableArray<int> _mode; // into a String along with a mode flag + // indicating how to treat the value. + + Node_List _control; // List of control nodes that will be deleted + Node_List _uncommon_traps; // Uncommon traps that needs to be rewritten + // to restart at the initial JVMState. + public: + // Mode for converting arguments to Strings + enum { + StringMode, + IntMode, + CharMode + }; + + StringConcat(PhaseStringOpts* stringopts, CallStaticJavaNode* end): + _end(end), + _begin(NULL), + _multiple(false), + _string_alloc(NULL), + _stringopts(stringopts) { + _arguments = new (_stringopts->C, 1) Node(1); + _arguments->del_req(0); + } + + bool validate_control_flow(); + + void merge_add() { +#if 0 + // XXX This is place holder code for reusing an existing String + // allocation but the logic for checking the state safety is + // probably inadequate at the moment. + CallProjections endprojs; + sc->end()->extract_projections(&endprojs, false); + if (endprojs.resproj != NULL) { + for (SimpleDUIterator i(endprojs.resproj); i.has_next(); i.next()) { + CallStaticJavaNode *use = i.get()->isa_CallStaticJava(); + if (use != NULL && use->method() != NULL && + use->method()->holder() == C->env()->String_klass() && + use->method()->name() == ciSymbol::object_initializer_name() && + use->in(TypeFunc::Parms + 1) == endprojs.resproj) { + // Found useless new String(sb.toString()) so reuse the newly allocated String + // when creating the result instead of allocating a new one. + sc->set_string_alloc(use->in(TypeFunc::Parms)); + sc->set_end(use); + } + } + } +#endif + } + + StringConcat* merge(StringConcat* other, Node* arg); + + void set_allocation(AllocateNode* alloc) { + _begin = alloc; + } + + void append(Node* value, int mode) { + _arguments->add_req(value); + _mode.append(mode); + } + void push(Node* value, int mode) { + _arguments->ins_req(0, value); + _mode.insert_before(0, mode); + } + void push_string(Node* value) { + push(value, StringMode); + } + void push_int(Node* value) { + push(value, IntMode); + } + void push_char(Node* value) { + push(value, CharMode); + } + + Node* argument(int i) { + return _arguments->in(i); + } + void set_argument(int i, Node* value) { + _arguments->set_req(i, value); + } + int num_arguments() { + return _mode.length(); + } + int mode(int i) { + return _mode.at(i); + } + void add_control(Node* ctrl) { + assert(!_control.contains(ctrl), "only push once"); + _control.push(ctrl); + } + CallStaticJavaNode* end() { return _end; } + AllocateNode* begin() { return _begin; } + Node* string_alloc() { return _string_alloc; } + + void eliminate_unneeded_control(); + void eliminate_initialize(InitializeNode* init); + void eliminate_call(CallNode* call); + + void maybe_log_transform() { + CompileLog* log = _stringopts->C->log(); + if (log != NULL) { + log->head("replace_string_concat arguments='%d' string_alloc='%d' multiple='%d'", + num_arguments(), + _string_alloc != NULL, + _multiple); + JVMState* p = _begin->jvms(); + while (p != NULL) { + log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method())); + p = p->caller(); + } + log->tail("replace_string_concat"); + } + } + + void convert_uncommon_traps(GraphKit& kit, const JVMState* jvms) { + for (uint u = 0; u < _uncommon_traps.size(); u++) { + Node* uct = _uncommon_traps.at(u); + + // Build a new call using the jvms state of the allocate + address call_addr = SharedRuntime::uncommon_trap_blob()->instructions_begin(); + const TypeFunc* call_type = OptoRuntime::uncommon_trap_Type(); + int size = call_type->domain()->cnt(); + const TypePtr* no_memory_effects = NULL; + Compile* C = _stringopts->C; + CallStaticJavaNode* call = new (C, size) CallStaticJavaNode(call_type, call_addr, "uncommon_trap", + jvms->bci(), no_memory_effects); + for (int e = 0; e < TypeFunc::Parms; e++) { + call->init_req(e, uct->in(e)); + } + // Set the trap request to record intrinsic failure if this trap + // is taken too many times. Ideally we would handle then traps by + // doing the original bookkeeping in the MDO so that if it caused + // the code to be thrown out we could still recompile and use the + // optimization. Failing the uncommon traps doesn't really mean + // that the optimization is a bad idea but there's no other way to + // do the MDO updates currently. + int trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_intrinsic, + Deoptimization::Action_make_not_entrant); + call->init_req(TypeFunc::Parms, __ intcon(trap_request)); + kit.add_safepoint_edges(call); + + _stringopts->gvn()->transform(call); + C->gvn_replace_by(uct, call); + uct->disconnect_inputs(NULL); + } + } + + void cleanup() { + // disconnect the hook node + _arguments->disconnect_inputs(NULL); + } +}; + + +void StringConcat::eliminate_unneeded_control() { + eliminate_initialize(begin()->initialization()); + for (uint i = 0; i < _control.size(); i++) { + Node* n = _control.at(i); + if (n->is_Call()) { + if (n != _end) { + eliminate_call(n->as_Call()); + } + } else if (n->is_IfTrue()) { + Compile* C = _stringopts->C; + C->gvn_replace_by(n, n->in(0)->in(0)); + C->gvn_replace_by(n->in(0), C->top()); + } + } +} + + +StringConcat* StringConcat::merge(StringConcat* other, Node* arg) { + StringConcat* result = new StringConcat(_stringopts, _end); + for (uint x = 0; x < _control.size(); x++) { + Node* n = _control.at(x); + if (n->is_Call()) { + result->_control.push(n); + } + } + for (uint x = 0; x < other->_control.size(); x++) { + Node* n = other->_control.at(x); + if (n->is_Call()) { + result->_control.push(n); + } + } + assert(result->_control.contains(other->_end), "what?"); + assert(result->_control.contains(_begin), "what?"); + for (int x = 0; x < num_arguments(); x++) { + if (argument(x) == arg) { + // replace the toString result with the all the arguments that + // made up the other StringConcat + for (int y = 0; y < other->num_arguments(); y++) { + result->append(other->argument(y), other->mode(y)); + } + } else { + result->append(argument(x), mode(x)); + } + } + result->set_allocation(other->_begin); + result->_multiple = true; + return result; +} + + +void StringConcat::eliminate_call(CallNode* call) { + Compile* C = _stringopts->C; + CallProjections projs; + call->extract_projections(&projs, false); + if (projs.fallthrough_catchproj != NULL) { + C->gvn_replace_by(projs.fallthrough_catchproj, call->in(TypeFunc::Control)); + } + if (projs.fallthrough_memproj != NULL) { + C->gvn_replace_by(projs.fallthrough_memproj, call->in(TypeFunc::Memory)); + } + if (projs.catchall_memproj != NULL) { + C->gvn_replace_by(projs.catchall_memproj, C->top()); + } + if (projs.fallthrough_ioproj != NULL) { + C->gvn_replace_by(projs.fallthrough_ioproj, call->in(TypeFunc::I_O)); + } + if (projs.catchall_ioproj != NULL) { + C->gvn_replace_by(projs.catchall_ioproj, C->top()); + } + if (projs.catchall_catchproj != NULL) { + // EA can't cope with the partially collapsed graph this + // creates so put it on the worklist to be collapsed later. + for (SimpleDUIterator i(projs.catchall_catchproj); i.has_next(); i.next()) { + Node *use = i.get(); + int opc = use->Opcode(); + if (opc == Op_CreateEx || opc == Op_Region) { + _stringopts->record_dead_node(use); + } + } + C->gvn_replace_by(projs.catchall_catchproj, C->top()); + } + if (projs.resproj != NULL) { + C->gvn_replace_by(projs.resproj, C->top()); + } + C->gvn_replace_by(call, C->top()); +} + +void StringConcat::eliminate_initialize(InitializeNode* init) { + Compile* C = _stringopts->C; + + // Eliminate Initialize node. + assert(init->outcnt() <= 2, "only a control and memory projection expected"); + assert(init->req() <= InitializeNode::RawStores, "no pending inits"); + Node *ctrl_proj = init->proj_out(TypeFunc::Control); + if (ctrl_proj != NULL) { + C->gvn_replace_by(ctrl_proj, init->in(TypeFunc::Control)); + } + Node *mem_proj = init->proj_out(TypeFunc::Memory); + if (mem_proj != NULL) { + Node *mem = init->in(TypeFunc::Memory); + C->gvn_replace_by(mem_proj, mem); + } + C->gvn_replace_by(init, C->top()); + init->disconnect_inputs(NULL); +} + +Node_List PhaseStringOpts::collect_toString_calls() { + Node_List string_calls; + Node_List worklist; + + _visited.Clear(); + + // Prime the worklist + for (uint i = 1; i < C->root()->len(); i++) { + Node* n = C->root()->in(i); + if (n != NULL && !_visited.test_set(n->_idx)) { + worklist.push(n); + } + } + + while (worklist.size() > 0) { + Node* ctrl = worklist.pop(); + if (ctrl->is_CallStaticJava()) { + CallStaticJavaNode* csj = ctrl->as_CallStaticJava(); + ciMethod* m = csj->method(); + if (m != NULL && + (m->intrinsic_id() == vmIntrinsics::_StringBuffer_toString || + m->intrinsic_id() == vmIntrinsics::_StringBuilder_toString)) { + string_calls.push(csj); + } + } + if (ctrl->in(0) != NULL && !_visited.test_set(ctrl->in(0)->_idx)) { + worklist.push(ctrl->in(0)); + } + if (ctrl->is_Region()) { + for (uint i = 1; i < ctrl->len(); i++) { + if (ctrl->in(i) != NULL && !_visited.test_set(ctrl->in(i)->_idx)) { + worklist.push(ctrl->in(i)); + } + } + } + } + return string_calls; +} + + +StringConcat* PhaseStringOpts::build_candidate(CallStaticJavaNode* call) { + ciMethod* m = call->method(); + ciSymbol* string_sig; + ciSymbol* int_sig; + ciSymbol* char_sig; + if (m->holder() == C->env()->StringBuilder_klass()) { + string_sig = ciSymbol::String_StringBuilder_signature(); + int_sig = ciSymbol::int_StringBuilder_signature(); + char_sig = ciSymbol::char_StringBuilder_signature(); + } else if (m->holder() == C->env()->StringBuffer_klass()) { + string_sig = ciSymbol::String_StringBuffer_signature(); + int_sig = ciSymbol::int_StringBuffer_signature(); + char_sig = ciSymbol::char_StringBuffer_signature(); + } else { + return NULL; + } +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print("considering toString call in "); + call->jvms()->dump_spec(tty); tty->cr(); + } +#endif + + StringConcat* sc = new StringConcat(this, call); + + AllocateNode* alloc = NULL; + InitializeNode* init = NULL; + + // possible opportunity for StringBuilder fusion + CallStaticJavaNode* cnode = call; + while (cnode) { + Node* recv = cnode->in(TypeFunc::Parms)->uncast(); + if (recv->is_Proj()) { + recv = recv->in(0); + } + cnode = recv->isa_CallStaticJava(); + if (cnode == NULL) { + alloc = recv->isa_Allocate(); + if (alloc == NULL) { + break; + } + // Find the constructor call + Node* result = alloc->result_cast(); + if (result == NULL || !result->is_CheckCastPP()) { + // strange looking allocation +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print("giving up because allocation looks strange "); + alloc->jvms()->dump_spec(tty); tty->cr(); + } +#endif + break; + } + Node* constructor = NULL; + for (SimpleDUIterator i(result); i.has_next(); i.next()) { + CallStaticJavaNode *use = i.get()->isa_CallStaticJava(); + if (use != NULL && use->method() != NULL && + use->method()->name() == ciSymbol::object_initializer_name() && + use->method()->holder() == m->holder()) { + // Matched the constructor. + ciSymbol* sig = use->method()->signature()->as_symbol(); + if (sig == ciSymbol::void_method_signature() || + sig == ciSymbol::int_void_signature() || + sig == ciSymbol::string_void_signature()) { + if (sig == ciSymbol::string_void_signature()) { + // StringBuilder(String) so pick this up as the first argument + assert(use->in(TypeFunc::Parms + 1) != NULL, "what?"); + sc->push_string(use->in(TypeFunc::Parms + 1)); + } + // The int variant takes an initial size for the backing + // array so just treat it like the void version. + constructor = use; + } else { +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print("unexpected constructor signature: %s", sig->as_utf8()); + } +#endif + } + break; + } + } + if (constructor == NULL) { + // couldn't find constructor +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print("giving up because couldn't find constructor "); + alloc->jvms()->dump_spec(tty); + } +#endif + break; + } + + // Walked all the way back and found the constructor call so see + // if this call converted into a direct string concatenation. + sc->add_control(call); + sc->add_control(constructor); + sc->add_control(alloc); + sc->set_allocation(alloc); + if (sc->validate_control_flow()) { + return sc; + } else { + return NULL; + } + } else if (cnode->method() == NULL) { + break; + } else if (cnode->method()->holder() == m->holder() && + cnode->method()->name() == ciSymbol::append_name() && + (cnode->method()->signature()->as_symbol() == string_sig || + cnode->method()->signature()->as_symbol() == char_sig || + cnode->method()->signature()->as_symbol() == int_sig)) { + sc->add_control(cnode); + Node* arg = cnode->in(TypeFunc::Parms + 1); + if (cnode->method()->signature()->as_symbol() == int_sig) { + sc->push_int(arg); + } else if (cnode->method()->signature()->as_symbol() == char_sig) { + sc->push_char(arg); + } else { + if (arg->is_Proj() && arg->in(0)->is_CallStaticJava()) { + CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava(); + if (csj->method() != NULL && + csj->method()->holder() == C->env()->Integer_klass() && + csj->method()->name() == ciSymbol::toString_name()) { + sc->add_control(csj); + sc->push_int(csj->in(TypeFunc::Parms)); + continue; + } + } + sc->push_string(arg); + } + continue; + } else { + // some unhandled signature +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print("giving up because encountered unexpected signature "); + cnode->tf()->dump(); tty->cr(); + cnode->in(TypeFunc::Parms + 1)->dump(); + } +#endif + break; + } + } + return NULL; +} + + +PhaseStringOpts::PhaseStringOpts(PhaseGVN* gvn, Unique_Node_List*): + Phase(StringOpts), + _gvn(gvn), + _visited(Thread::current()->resource_area()) { + + assert(OptimizeStringConcat, "shouldn't be here"); + + size_table_field = C->env()->Integer_klass()->get_field_by_name(ciSymbol::make("sizeTable"), + ciSymbol::make("[I"), true); + if (size_table_field == NULL) { + // Something wrong so give up. + assert(false, "why can't we find Integer.sizeTable?"); + return; + } + + // Collect the types needed to talk about the various slices of memory + const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(), + false, NULL, 0); + + const TypePtr* value_field_type = string_type->add_offset(java_lang_String::value_offset_in_bytes()); + const TypePtr* offset_field_type = string_type->add_offset(java_lang_String::offset_offset_in_bytes()); + const TypePtr* count_field_type = string_type->add_offset(java_lang_String::count_offset_in_bytes()); + + value_field_idx = C->get_alias_index(value_field_type); + count_field_idx = C->get_alias_index(count_field_type); + offset_field_idx = C->get_alias_index(offset_field_type); + char_adr_idx = C->get_alias_index(TypeAryPtr::CHARS); + + // For each locally allocated StringBuffer see if the usages can be + // collapsed into a single String construction. + + // Run through the list of allocation looking for SB.toString to see + // if it's possible to fuse the usage of the SB into a single String + // construction. + GrowableArray<StringConcat*> concats; + Node_List toStrings = collect_toString_calls(); + while (toStrings.size() > 0) { + StringConcat* sc = build_candidate(toStrings.pop()->as_CallStaticJava()); + if (sc != NULL) { + concats.push(sc); + } + } + + // try to coalesce separate concats + restart: + for (int c = 0; c < concats.length(); c++) { + StringConcat* sc = concats.at(c); + for (int i = 0; i < sc->num_arguments(); i++) { + Node* arg = sc->argument(i); + if (arg->is_Proj() && arg->in(0)->is_CallStaticJava()) { + CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava(); + if (csj->method() != NULL && + (csj->method()->holder() == C->env()->StringBuffer_klass() || + csj->method()->holder() == C->env()->StringBuilder_klass()) && + csj->method()->name() == ciSymbol::toString_name()) { + for (int o = 0; o < concats.length(); o++) { + if (c == o) continue; + StringConcat* other = concats.at(o); + if (other->end() == csj) { +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print_cr("considering stacked concats"); + } +#endif + + StringConcat* merged = sc->merge(other, arg); + if (merged->validate_control_flow()) { +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print_cr("stacking would succeed"); + } +#endif + if (c < o) { + concats.remove_at(o); + concats.at_put(c, merged); + } else { + concats.remove_at(c); + concats.at_put(o, merged); + } + goto restart; + } else { +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print_cr("stacking would fail"); + } +#endif + } + } + } + } + } + } + } + + + for (int c = 0; c < concats.length(); c++) { + StringConcat* sc = concats.at(c); + replace_string_concat(sc); + } + + remove_dead_nodes(); +} + +void PhaseStringOpts::record_dead_node(Node* dead) { + dead_worklist.push(dead); +} + +void PhaseStringOpts::remove_dead_nodes() { + // Delete any dead nodes to make things clean enough that escape + // analysis doesn't get unhappy. + while (dead_worklist.size() > 0) { + Node* use = dead_worklist.pop(); + int opc = use->Opcode(); + switch (opc) { + case Op_Region: { + uint i = 1; + for (i = 1; i < use->req(); i++) { + if (use->in(i) != C->top()) { + break; + } + } + if (i >= use->req()) { + for (SimpleDUIterator i(use); i.has_next(); i.next()) { + Node* m = i.get(); + if (m->is_Phi()) { + dead_worklist.push(m); + } + } + C->gvn_replace_by(use, C->top()); + } + break; + } + case Op_AddP: + case Op_CreateEx: { + // Recurisvely clean up references to CreateEx so EA doesn't + // get unhappy about the partially collapsed graph. + for (SimpleDUIterator i(use); i.has_next(); i.next()) { + Node* m = i.get(); + if (m->is_AddP()) { + dead_worklist.push(m); + } + } + C->gvn_replace_by(use, C->top()); + break; + } + case Op_Phi: + if (use->in(0) == C->top()) { + C->gvn_replace_by(use, C->top()); + } + break; + } + } +} + + +bool StringConcat::validate_control_flow() { + // We found all the calls and arguments now lets see if it's + // safe to transform the graph as we would expect. + + // Check to see if this resulted in too many uncommon traps previously + if (Compile::current()->too_many_traps(_begin->jvms()->method(), _begin->jvms()->bci(), + Deoptimization::Reason_intrinsic)) { + return false; + } + + // Walk backwards over the control flow from toString to the + // allocation and make sure all the control flow is ok. This + // means it's either going to be eliminated once the calls are + // removed or it can safely be transformed into an uncommon + // trap. + + int null_check_count = 0; + Unique_Node_List ctrl_path; + + assert(_control.contains(_begin), "missing"); + assert(_control.contains(_end), "missing"); + + // Collect the nodes that we know about and will eliminate into ctrl_path + for (uint i = 0; i < _control.size(); i++) { + // Push the call and it's control projection + Node* n = _control.at(i); + if (n->is_Allocate()) { + AllocateNode* an = n->as_Allocate(); + InitializeNode* init = an->initialization(); + ctrl_path.push(init); + ctrl_path.push(init->as_Multi()->proj_out(0)); + } + if (n->is_Call()) { + CallNode* cn = n->as_Call(); + ctrl_path.push(cn); + ctrl_path.push(cn->proj_out(0)); + ctrl_path.push(cn->proj_out(0)->unique_out()); + ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0)); + } else { + ShouldNotReachHere(); + } + } + + // Skip backwards through the control checking for unexpected contro flow + Node* ptr = _end; + bool fail = false; + while (ptr != _begin) { + if (ptr->is_Call() && ctrl_path.member(ptr)) { + ptr = ptr->in(0); + } else if (ptr->is_CatchProj() && ctrl_path.member(ptr)) { + ptr = ptr->in(0)->in(0)->in(0); + assert(ctrl_path.member(ptr), "should be a known piece of control"); + } else if (ptr->is_IfTrue()) { + IfNode* iff = ptr->in(0)->as_If(); + BoolNode* b = iff->in(1)->isa_Bool(); + Node* cmp = b->in(1); + Node* v1 = cmp->in(1); + Node* v2 = cmp->in(2); + Node* otherproj = iff->proj_out(1 - ptr->as_Proj()->_con); + + // Null check of the return of append which can simply be eliminated + if (b->_test._test == BoolTest::ne && + v2->bottom_type() == TypePtr::NULL_PTR && + v1->is_Proj() && ctrl_path.member(v1->in(0))) { + // NULL check of the return value of the append + null_check_count++; + if (otherproj->outcnt() == 1) { + CallStaticJavaNode* call = otherproj->unique_out()->isa_CallStaticJava(); + if (call != NULL && call->_name != NULL && strcmp(call->_name, "uncommon_trap") == 0) { + ctrl_path.push(call); + } + } + _control.push(ptr); + ptr = ptr->in(0)->in(0); + continue; + } + + // A test which leads to an uncommon trap which should be safe. + // Later this trap will be converted into a trap that restarts + // at the beginning. + if (otherproj->outcnt() == 1) { + CallStaticJavaNode* call = otherproj->unique_out()->isa_CallStaticJava(); + if (call != NULL && call->_name != NULL && strcmp(call->_name, "uncommon_trap") == 0) { + // control flow leads to uct so should be ok + _uncommon_traps.push(call); + ctrl_path.push(call); + ptr = ptr->in(0)->in(0); + continue; + } + } + +#ifndef PRODUCT + // Some unexpected control flow we don't know how to handle. + if (PrintOptimizeStringConcat) { + tty->print_cr("failing with unknown test"); + b->dump(); + cmp->dump(); + v1->dump(); + v2->dump(); + tty->cr(); + } +#endif + break; + } else if (ptr->is_Proj() && ptr->in(0)->is_Initialize()) { + ptr = ptr->in(0)->in(0); + } else if (ptr->is_Region()) { + Node* copy = ptr->as_Region()->is_copy(); + if (copy != NULL) { + ptr = copy; + continue; + } + if (ptr->req() == 3 && + ptr->in(1) != NULL && ptr->in(1)->is_Proj() && + ptr->in(2) != NULL && ptr->in(2)->is_Proj() && + ptr->in(1)->in(0) == ptr->in(2)->in(0) && + ptr->in(1)->in(0) != NULL && ptr->in(1)->in(0)->is_If()) { + // Simple diamond. + // XXX should check for possibly merging stores. simple data merges are ok. + ptr = ptr->in(1)->in(0)->in(0); + continue; + } +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print_cr("fusion would fail for region"); + _begin->dump(); + ptr->dump(2); + } +#endif + fail = true; + break; + } else { + // other unknown control + if (!fail) { +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + tty->print_cr("fusion would fail for"); + _begin->dump(); + } +#endif + fail = true; + } +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + ptr->dump(); + } +#endif + ptr = ptr->in(0); + } + } +#ifndef PRODUCT + if (PrintOptimizeStringConcat && fail) { + tty->cr(); + } +#endif + if (fail) return !fail; + + // Validate that all these results produced are contained within + // this cluster of objects. First collect all the results produced + // by calls in the region. + _stringopts->_visited.Clear(); + Node_List worklist; + Node* final_result = _end->proj_out(TypeFunc::Parms); + for (uint i = 0; i < _control.size(); i++) { + CallNode* cnode = _control.at(i)->isa_Call(); + if (cnode != NULL) { + _stringopts->_visited.test_set(cnode->_idx); + } + Node* result = cnode != NULL ? cnode->proj_out(TypeFunc::Parms) : NULL; + if (result != NULL && result != final_result) { + worklist.push(result); + } + } + + Node* last_result = NULL; + while (worklist.size() > 0) { + Node* result = worklist.pop(); + if (_stringopts->_visited.test_set(result->_idx)) + continue; + for (SimpleDUIterator i(result); i.has_next(); i.next()) { + Node *use = i.get(); + if (ctrl_path.member(use)) { + // already checked this + continue; + } + int opc = use->Opcode(); + if (opc == Op_CmpP || opc == Op_Node) { + ctrl_path.push(use); + continue; + } + if (opc == Op_CastPP || opc == Op_CheckCastPP) { + for (SimpleDUIterator j(use); j.has_next(); j.next()) { + worklist.push(j.get()); + } + worklist.push(use->in(1)); + ctrl_path.push(use); + continue; + } +#ifndef PRODUCT + if (PrintOptimizeStringConcat) { + if (result != last_result) { + last_result = result; + tty->print_cr("extra uses for result:"); + last_result->dump(); + } + use->dump(); + } +#endif + fail = true; + break; + } + } + +#ifndef PRODUCT + if (PrintOptimizeStringConcat && !fail) { + ttyLocker ttyl; + tty->cr(); + tty->print("fusion would succeed (%d %d) for ", null_check_count, _uncommon_traps.size()); + _begin->jvms()->dump_spec(tty); tty->cr(); + for (int i = 0; i < num_arguments(); i++) { + argument(i)->dump(); + } + _control.dump(); + tty->cr(); + } +#endif + + return !fail; +} + +Node* PhaseStringOpts::fetch_static_field(GraphKit& kit, ciField* field) { + const TypeKlassPtr* klass_type = TypeKlassPtr::make(field->holder()); + Node* klass_node = __ makecon(klass_type); + BasicType bt = field->layout_type(); + ciType* field_klass = field->type(); + + const Type *type; + if( bt == T_OBJECT ) { + if (!field->type()->is_loaded()) { + type = TypeInstPtr::BOTTOM; + } else if (field->is_constant()) { + // This can happen if the constant oop is non-perm. + ciObject* con = field->constant_value().as_object(); + // Do not "join" in the previous type; it doesn't add value, + // and may yield a vacuous result if the field is of interface type. + type = TypeOopPtr::make_from_constant(con)->isa_oopptr(); + assert(type != NULL, "field singleton type must be consistent"); + } else { + type = TypeOopPtr::make_from_klass(field_klass->as_klass()); + } + } else { + type = Type::get_const_basic_type(bt); + } + + return kit.make_load(NULL, kit.basic_plus_adr(klass_node, field->offset_in_bytes()), + type, T_OBJECT, + C->get_alias_index(klass_type->add_offset(field->offset_in_bytes()))); +} + +Node* PhaseStringOpts::int_stringSize(GraphKit& kit, Node* arg) { + RegionNode *final_merge = new (C, 3) RegionNode(3); + kit.gvn().set_type(final_merge, Type::CONTROL); + Node* final_size = new (C, 3) PhiNode(final_merge, TypeInt::INT); + kit.gvn().set_type(final_size, TypeInt::INT); + + IfNode* iff = kit.create_and_map_if(kit.control(), + __ Bool(__ CmpI(arg, __ intcon(0x80000000)), BoolTest::ne), + PROB_FAIR, COUNT_UNKNOWN); + Node* is_min = __ IfFalse(iff); + final_merge->init_req(1, is_min); + final_size->init_req(1, __ intcon(11)); + + kit.set_control(__ IfTrue(iff)); + if (kit.stopped()) { + final_merge->init_req(2, C->top()); + final_size->init_req(2, C->top()); + } else { + + // int size = (i < 0) ? stringSize(-i) + 1 : stringSize(i); + RegionNode *r = new (C, 3) RegionNode(3); + kit.gvn().set_type(r, Type::CONTROL); + Node *phi = new (C, 3) PhiNode(r, TypeInt::INT); + kit.gvn().set_type(phi, TypeInt::INT); + Node *size = new (C, 3) PhiNode(r, TypeInt::INT); + kit.gvn().set_type(size, TypeInt::INT); + Node* chk = __ CmpI(arg, __ intcon(0)); + Node* p = __ Bool(chk, BoolTest::lt); + IfNode* iff = kit.create_and_map_if(kit.control(), p, PROB_FAIR, COUNT_UNKNOWN); + Node* lessthan = __ IfTrue(iff); + Node* greaterequal = __ IfFalse(iff); + r->init_req(1, lessthan); + phi->init_req(1, __ SubI(__ intcon(0), arg)); + size->init_req(1, __ intcon(1)); + r->init_req(2, greaterequal); + phi->init_req(2, arg); + size->init_req(2, __ intcon(0)); + kit.set_control(r); + C->record_for_igvn(r); + C->record_for_igvn(phi); + C->record_for_igvn(size); + + // for (int i=0; ; i++) + // if (x <= sizeTable[i]) + // return i+1; + RegionNode *loop = new (C, 3) RegionNode(3); + loop->init_req(1, kit.control()); + kit.gvn().set_type(loop, Type::CONTROL); + + Node *index = new (C, 3) PhiNode(loop, TypeInt::INT); + index->init_req(1, __ intcon(0)); + kit.gvn().set_type(index, TypeInt::INT); + kit.set_control(loop); + Node* sizeTable = fetch_static_field(kit, size_table_field); + + Node* value = kit.load_array_element(NULL, sizeTable, index, TypeAryPtr::INTS); + C->record_for_igvn(value); + Node* limit = __ CmpI(phi, value); + Node* limitb = __ Bool(limit, BoolTest::le); + IfNode* iff2 = kit.create_and_map_if(kit.control(), limitb, PROB_MIN, COUNT_UNKNOWN); + Node* lessEqual = __ IfTrue(iff2); + Node* greater = __ IfFalse(iff2); + + loop->init_req(2, greater); + index->init_req(2, __ AddI(index, __ intcon(1))); + + kit.set_control(lessEqual); + C->record_for_igvn(loop); + C->record_for_igvn(index); + + final_merge->init_req(2, kit.control()); + final_size->init_req(2, __ AddI(__ AddI(index, size), __ intcon(1))); + } + + kit.set_control(final_merge); + C->record_for_igvn(final_merge); + C->record_for_igvn(final_size); + + return final_size; +} + +void PhaseStringOpts::int_getChars(GraphKit& kit, Node* arg, Node* char_array, Node* start, Node* end) { + RegionNode *final_merge = new (C, 4) RegionNode(4); + kit.gvn().set_type(final_merge, Type::CONTROL); + Node *final_mem = PhiNode::make(final_merge, kit.memory(char_adr_idx), Type::MEMORY, TypeAryPtr::CHARS); + kit.gvn().set_type(final_mem, Type::MEMORY); + + // need to handle Integer.MIN_VALUE specially because negating doesn't make it positive + { + // i == MIN_VALUE + IfNode* iff = kit.create_and_map_if(kit.control(), + __ Bool(__ CmpI(arg, __ intcon(0x80000000)), BoolTest::ne), + PROB_FAIR, COUNT_UNKNOWN); + + Node* old_mem = kit.memory(char_adr_idx); + + kit.set_control(__ IfFalse(iff)); + if (kit.stopped()) { + // Statically not equal to MIN_VALUE so this path is dead + final_merge->init_req(3, kit.control()); + } else { + copy_string(kit, __ makecon(TypeInstPtr::make(C->env()->the_min_jint_string())), + char_array, start); + final_merge->init_req(3, kit.control()); + final_mem->init_req(3, kit.memory(char_adr_idx)); + } + + kit.set_control(__ IfTrue(iff)); + kit.set_memory(old_mem, char_adr_idx); + } + + + // Simplified version of Integer.getChars + + // int q, r; + // int charPos = index; + Node* charPos = end; + + // char sign = 0; + + Node* i = arg; + Node* sign = __ intcon(0); + + // if (i < 0) { + // sign = '-'; + // i = -i; + // } + { + IfNode* iff = kit.create_and_map_if(kit.control(), + __ Bool(__ CmpI(arg, __ intcon(0)), BoolTest::lt), + PROB_FAIR, COUNT_UNKNOWN); + + RegionNode *merge = new (C, 3) RegionNode(3); + kit.gvn().set_type(merge, Type::CONTROL); + i = new (C, 3) PhiNode(merge, TypeInt::INT); + kit.gvn().set_type(i, TypeInt::INT); + sign = new (C, 3) PhiNode(merge, TypeInt::INT); + kit.gvn().set_type(sign, TypeInt::INT); + + merge->init_req(1, __ IfTrue(iff)); + i->init_req(1, __ SubI(__ intcon(0), arg)); + sign->init_req(1, __ intcon('-')); + merge->init_req(2, __ IfFalse(iff)); + i->init_req(2, arg); + sign->init_req(2, __ intcon(0)); + + kit.set_control(merge); + + C->record_for_igvn(merge); + C->record_for_igvn(i); + C->record_for_igvn(sign); + } + + // for (;;) { + // q = i / 10; + // r = i - ((q << 3) + (q << 1)); // r = i-(q*10) ... + // buf [--charPos] = digits [r]; + // i = q; + // if (i == 0) break; + // } + + { + RegionNode *head = new (C, 3) RegionNode(3); + head->init_req(1, kit.control()); + kit.gvn().set_type(head, Type::CONTROL); + Node *i_phi = new (C, 3) PhiNode(head, TypeInt::INT); + i_phi->init_req(1, i); + kit.gvn().set_type(i_phi, TypeInt::INT); + charPos = PhiNode::make(head, charPos); + kit.gvn().set_type(charPos, TypeInt::INT); + Node *mem = PhiNode::make(head, kit.memory(char_adr_idx), Type::MEMORY, TypeAryPtr::CHARS); + kit.gvn().set_type(mem, Type::MEMORY); + kit.set_control(head); + kit.set_memory(mem, char_adr_idx); + + Node* q = __ DivI(kit.null(), i_phi, __ intcon(10)); + Node* r = __ SubI(i_phi, __ AddI(__ LShiftI(q, __ intcon(3)), + __ LShiftI(q, __ intcon(1)))); + Node* m1 = __ SubI(charPos, __ intcon(1)); + Node* ch = __ AddI(r, __ intcon('0')); + + Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR), + ch, T_CHAR, char_adr_idx); + + + IfNode* iff = kit.create_and_map_if(head, __ Bool(__ CmpI(q, __ intcon(0)), BoolTest::ne), + PROB_FAIR, COUNT_UNKNOWN); + Node* ne = __ IfTrue(iff); + Node* eq = __ IfFalse(iff); + + head->init_req(2, ne); + mem->init_req(2, st); + i_phi->init_req(2, q); + charPos->init_req(2, m1); + + charPos = m1; + + kit.set_control(eq); + kit.set_memory(st, char_adr_idx); + + C->record_for_igvn(head); + C->record_for_igvn(mem); + C->record_for_igvn(i_phi); + C->record_for_igvn(charPos); + } + + { + // if (sign != 0) { + // buf [--charPos] = sign; + // } + IfNode* iff = kit.create_and_map_if(kit.control(), + __ Bool(__ CmpI(sign, __ intcon(0)), BoolTest::ne), + PROB_FAIR, COUNT_UNKNOWN); + + final_merge->init_req(2, __ IfFalse(iff)); + final_mem->init_req(2, kit.memory(char_adr_idx)); + + kit.set_control(__ IfTrue(iff)); + if (kit.stopped()) { + final_merge->init_req(1, C->top()); + final_mem->init_req(1, C->top()); + } else { + Node* m1 = __ SubI(charPos, __ intcon(1)); + Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR), + sign, T_CHAR, char_adr_idx); + + final_merge->init_req(1, kit.control()); + final_mem->init_req(1, st); + } + + kit.set_control(final_merge); + kit.set_memory(final_mem, char_adr_idx); + + C->record_for_igvn(final_merge); + C->record_for_igvn(final_mem); + } +} + + +Node* PhaseStringOpts::copy_string(GraphKit& kit, Node* str, Node* char_array, Node* start) { + Node* string = str; + Node* offset = kit.make_load(NULL, + kit.basic_plus_adr(string, string, java_lang_String::offset_offset_in_bytes()), + TypeInt::INT, T_INT, offset_field_idx); + Node* count = kit.make_load(NULL, + kit.basic_plus_adr(string, string, java_lang_String::count_offset_in_bytes()), + TypeInt::INT, T_INT, count_field_idx); + const TypeAryPtr* value_type = TypeAryPtr::make(TypePtr::NotNull, + TypeAry::make(TypeInt::CHAR,TypeInt::POS), + ciTypeArrayKlass::make(T_CHAR), true, 0); + Node* value = kit.make_load(NULL, + kit.basic_plus_adr(string, string, java_lang_String::value_offset_in_bytes()), + value_type, T_OBJECT, value_field_idx); + + // copy the contents + if (offset->is_Con() && count->is_Con() && value->is_Con() && count->get_int() < unroll_string_copy_length) { + // For small constant strings just emit individual stores. + // A length of 6 seems like a good space/speed tradeof. + int c = count->get_int(); + int o = offset->get_int(); + const TypeOopPtr* t = kit.gvn().type(value)->isa_oopptr(); + ciTypeArray* value_array = t->const_oop()->as_type_array(); + for (int e = 0; e < c; e++) { + __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR), + __ intcon(value_array->char_at(o + e)), T_CHAR, char_adr_idx); + start = __ AddI(start, __ intcon(1)); + } + } else { + Node* src_ptr = kit.array_element_address(value, offset, T_CHAR); + Node* dst_ptr = kit.array_element_address(char_array, start, T_CHAR); + Node* c = count; + Node* extra = NULL; +#ifdef _LP64 + c = __ ConvI2L(c); + extra = C->top(); +#endif + Node* call = kit.make_runtime_call(GraphKit::RC_LEAF|GraphKit::RC_NO_FP, + OptoRuntime::fast_arraycopy_Type(), + CAST_FROM_FN_PTR(address, StubRoutines::jshort_disjoint_arraycopy()), + "jshort_disjoint_arraycopy", TypeAryPtr::CHARS, + src_ptr, dst_ptr, c, extra); + start = __ AddI(start, count); + } + return start; +} + + +void PhaseStringOpts::replace_string_concat(StringConcat* sc) { + // Log a little info about the transformation + sc->maybe_log_transform(); + + // pull the JVMState of the allocation into a SafePointNode to serve as + // as a shim for the insertion of the new code. + JVMState* jvms = sc->begin()->jvms()->clone_shallow(C); + uint size = sc->begin()->req(); + SafePointNode* map = new (C, size) SafePointNode(size, jvms); + + // copy the control and memory state from the final call into our + // new starting state. This allows any preceeding tests to feed + // into the new section of code. + for (uint i1 = 0; i1 < TypeFunc::Parms; i1++) { + map->init_req(i1, sc->end()->in(i1)); + } + // blow away old allocation arguments + for (uint i1 = TypeFunc::Parms; i1 < jvms->debug_start(); i1++) { + map->init_req(i1, C->top()); + } + // Copy the rest of the inputs for the JVMState + for (uint i1 = jvms->debug_start(); i1 < sc->begin()->req(); i1++) { + map->init_req(i1, sc->begin()->in(i1)); + } + // Make sure the memory state is a MergeMem for parsing. + if (!map->in(TypeFunc::Memory)->is_MergeMem()) { + map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory))); + } + + jvms->set_map(map); + map->ensure_stack(jvms, jvms->method()->max_stack()); + + + // disconnect all the old StringBuilder calls from the graph + sc->eliminate_unneeded_control(); + + // At this point all the old work has been completely removed from + // the graph and the saved JVMState exists at the point where the + // final toString call used to be. + GraphKit kit(jvms); + + // There may be uncommon traps which are still using the + // intermediate states and these need to be rewritten to point at + // the JVMState at the beginning of the transformation. + sc->convert_uncommon_traps(kit, jvms); + + // Now insert the logic to compute the size of the string followed + // by all the logic to construct array and resulting string. + + Node* null_string = __ makecon(TypeInstPtr::make(C->env()->the_null_string())); + + // Create a region for the overflow checks to merge into. + int args = MAX2(sc->num_arguments(), 1); + RegionNode* overflow = new (C, args) RegionNode(args); + kit.gvn().set_type(overflow, Type::CONTROL); + + // Create a hook node to hold onto the individual sizes since they + // are need for the copying phase. + Node* string_sizes = new (C, args) Node(args); + + Node* length = __ intcon(0); + for (int argi = 0; argi < sc->num_arguments(); argi++) { + Node* arg = sc->argument(argi); + switch (sc->mode(argi)) { + case StringConcat::IntMode: { + Node* string_size = int_stringSize(kit, arg); + + // accumulate total + length = __ AddI(length, string_size); + + // Cache this value for the use by int_toString + string_sizes->init_req(argi, string_size); + break; + } + case StringConcat::StringMode: { + const Type* type = kit.gvn().type(arg); + if (type == TypePtr::NULL_PTR) { + // replace the argument with the null checked version + arg = null_string; + sc->set_argument(argi, arg); + } else if (!type->higher_equal(TypeInstPtr::NOTNULL)) { + // s = s != null ? s : "null"; + // length = length + (s.count - s.offset); + RegionNode *r = new (C, 3) RegionNode(3); + kit.gvn().set_type(r, Type::CONTROL); + Node *phi = new (C, 3) PhiNode(r, type->join(TypeInstPtr::NOTNULL)); + kit.gvn().set_type(phi, phi->bottom_type()); + Node* p = __ Bool(__ CmpP(arg, kit.null()), BoolTest::ne); + IfNode* iff = kit.create_and_map_if(kit.control(), p, PROB_MIN, COUNT_UNKNOWN); + Node* notnull = __ IfTrue(iff); + Node* isnull = __ IfFalse(iff); + r->init_req(1, notnull); + phi->init_req(1, arg); + r->init_req(2, isnull); + phi->init_req(2, null_string); + kit.set_control(r); + C->record_for_igvn(r); + C->record_for_igvn(phi); + // replace the argument with the null checked version + arg = phi; + sc->set_argument(argi, arg); + } + // Node* offset = kit.make_load(NULL, kit.basic_plus_adr(arg, arg, offset_offset), + // TypeInt::INT, T_INT, offset_field_idx); + Node* count = kit.make_load(NULL, kit.basic_plus_adr(arg, arg, java_lang_String::count_offset_in_bytes()), + TypeInt::INT, T_INT, count_field_idx); + length = __ AddI(length, count); + string_sizes->init_req(argi, NULL); + break; + } + case StringConcat::CharMode: { + // one character only + length = __ AddI(length, __ intcon(1)); + break; + } + default: + ShouldNotReachHere(); + } + if (argi > 0) { + // Check that the sum hasn't overflowed + IfNode* iff = kit.create_and_map_if(kit.control(), + __ Bool(__ CmpI(length, __ intcon(0)), BoolTest::lt), + PROB_MIN, COUNT_UNKNOWN); + kit.set_control(__ IfFalse(iff)); + overflow->set_req(argi, __ IfTrue(iff)); + } + } + + { + // Hook + PreserveJVMState pjvms(&kit); + kit.set_control(overflow); + kit.uncommon_trap(Deoptimization::Reason_intrinsic, + Deoptimization::Action_make_not_entrant); + } + + // length now contains the number of characters needed for the + // char[] so create a new AllocateArray for the char[] + Node* char_array = NULL; + { + PreserveReexecuteState preexecs(&kit); + // The original jvms is for an allocation of either a String or + // StringBuffer so no stack adjustment is necessary for proper + // reexecution. If we deoptimize in the slow path the bytecode + // will be reexecuted and the char[] allocation will be thrown away. + kit.jvms()->set_should_reexecute(true); + char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))), + length, 1); + } + + // Mark the allocation so that zeroing is skipped since the code + // below will overwrite the entire array + AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn); + char_alloc->maybe_set_complete(_gvn); + + // Now copy the string representations into the final char[] + Node* start = __ intcon(0); + for (int argi = 0; argi < sc->num_arguments(); argi++) { + Node* arg = sc->argument(argi); + switch (sc->mode(argi)) { + case StringConcat::IntMode: { + Node* end = __ AddI(start, string_sizes->in(argi)); + // getChars words backwards so pass the ending point as well as the start + int_getChars(kit, arg, char_array, start, end); + start = end; + break; + } + case StringConcat::StringMode: { + start = copy_string(kit, arg, char_array, start); + break; + } + case StringConcat::CharMode: { + __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR), + arg, T_CHAR, char_adr_idx); + start = __ AddI(start, __ intcon(1)); + break; + } + default: + ShouldNotReachHere(); + } + } + + // If we're not reusing an existing String allocation then allocate one here. + Node* result = sc->string_alloc(); + if (result == NULL) { + PreserveReexecuteState preexecs(&kit); + // The original jvms is for an allocation of either a String or + // StringBuffer so no stack adjustment is necessary for proper + // reexecution. + kit.jvms()->set_should_reexecute(true); + result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass()))); + } + + // Intialize the string + kit.store_to_memory(kit.control(), kit.basic_plus_adr(result, java_lang_String::offset_offset_in_bytes()), + __ intcon(0), T_INT, offset_field_idx); + kit.store_to_memory(kit.control(), kit.basic_plus_adr(result, java_lang_String::count_offset_in_bytes()), + length, T_INT, count_field_idx); + kit.store_to_memory(kit.control(), kit.basic_plus_adr(result, java_lang_String::value_offset_in_bytes()), + char_array, T_OBJECT, value_field_idx); + + // hook up the outgoing control and result + kit.replace_call(sc->end(), result); + + // Unhook any hook nodes + string_sizes->disconnect_inputs(NULL); + sc->cleanup(); +} diff --git a/src/share/vm/opto/stringopts.hpp b/src/share/vm/opto/stringopts.hpp new file mode 100644 index 000000000..417e08a91 --- /dev/null +++ b/src/share/vm/opto/stringopts.hpp @@ -0,0 +1,83 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class StringConcat; + +class PhaseStringOpts : public Phase { + friend class StringConcat; + + private: + PhaseGVN* _gvn; + + // List of dead nodes to clean up aggressively at the end + Unique_Node_List dead_worklist; + + // Memory slices needed for code gen + int char_adr_idx; + int value_field_idx; + int count_field_idx; + int offset_field_idx; + + // Integer.sizeTable - used for int to String conversion + ciField* size_table_field; + + // A set for use by various stages + VectorSet _visited; + + // Collect a list of all SB.toString calls + Node_List collect_toString_calls(); + + // Examine the use of the SB alloc to see if it can be replace with + // a single string construction. + StringConcat* build_candidate(CallStaticJavaNode* call); + + // Replace all the SB calls in concat with an optimization String allocation + void replace_string_concat(StringConcat* concat); + + // Load the value of a static field, performing any constant folding. + Node* fetch_static_field(GraphKit& kit, ciField* field); + + // Compute the number of characters required to represent the int value + Node* int_stringSize(GraphKit& kit, Node* value); + + // Copy the characters representing value into char_array starting at start + void int_getChars(GraphKit& kit, Node* value, Node* char_array, Node* start, Node* end); + + // Copy of the contents of the String str into char_array starting at index start. + Node* copy_string(GraphKit& kit, Node* str, Node* char_array, Node* start); + + // Clean up any leftover nodes + void record_dead_node(Node* node); + void remove_dead_nodes(); + + PhaseGVN* gvn() { return _gvn; } + + enum { + // max length of constant string copy unrolling in copy_string + unroll_string_copy_length = 6 + }; + + public: + PhaseStringOpts(PhaseGVN* gvn, Unique_Node_List* worklist); +}; diff --git a/src/share/vm/opto/type.hpp b/src/share/vm/opto/type.hpp index 9b11f9ca5..03f81532c 100644 --- a/src/share/vm/opto/type.hpp +++ b/src/share/vm/opto/type.hpp @@ -847,9 +847,6 @@ public: // Constant pointer to array static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot); - // Convenience - static const TypeAryPtr *make(ciObject* o); - // Return a 'ptr' version of this type virtual const Type *cast_to_ptr_type(PTR ptr) const; diff --git a/src/share/vm/runtime/globals.cpp b/src/share/vm/runtime/globals.cpp index 71fec5524..03e068dea 100644 --- a/src/share/vm/runtime/globals.cpp +++ b/src/share/vm/runtime/globals.cpp @@ -46,7 +46,8 @@ bool Flag::is_unlocker() const { bool Flag::is_unlocked() const { if (strcmp(kind, "{diagnostic}") == 0) { return UnlockDiagnosticVMOptions; - } else if (strcmp(kind, "{experimental}") == 0) { + } else if (strcmp(kind, "{experimental}") == 0 || + strcmp(kind, "{C2 experimental}") == 0) { return UnlockExperimentalVMOptions; } else { return true; @@ -169,6 +170,7 @@ void Flag::print_as_flag(outputStream* st) { #define C2_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{C2 product}", DEFAULT }, #define C2_PD_PRODUCT_FLAG_STRUCT(type, name, doc) { #type, XSTR(name), &name, "{C2 pd product}", DEFAULT }, #define C2_DIAGNOSTIC_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{C2 diagnostic}", DEFAULT }, +#define C2_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{C2 experimental}", DEFAULT }, #ifdef PRODUCT #define C2_DEVELOP_FLAG_STRUCT(type, name, value, doc) /* flag is constant */ #define C2_PD_DEVELOP_FLAG_STRUCT(type, name, doc) /* flag is constant */ @@ -190,7 +192,7 @@ static Flag flagTable[] = { C1_FLAGS(C1_DEVELOP_FLAG_STRUCT, C1_PD_DEVELOP_FLAG_STRUCT, C1_PRODUCT_FLAG_STRUCT, C1_PD_PRODUCT_FLAG_STRUCT, C1_NOTPRODUCT_FLAG_STRUCT) #endif #ifdef COMPILER2 - C2_FLAGS(C2_DEVELOP_FLAG_STRUCT, C2_PD_DEVELOP_FLAG_STRUCT, C2_PRODUCT_FLAG_STRUCT, C2_PD_PRODUCT_FLAG_STRUCT, C2_DIAGNOSTIC_FLAG_STRUCT, C2_NOTPRODUCT_FLAG_STRUCT) + C2_FLAGS(C2_DEVELOP_FLAG_STRUCT, C2_PD_DEVELOP_FLAG_STRUCT, C2_PRODUCT_FLAG_STRUCT, C2_PD_PRODUCT_FLAG_STRUCT, C2_DIAGNOSTIC_FLAG_STRUCT, C2_EXPERIMENTAL_FLAG_STRUCT, C2_NOTPRODUCT_FLAG_STRUCT) #endif {0, NULL, NULL} }; diff --git a/src/share/vm/runtime/globals_extension.hpp b/src/share/vm/runtime/globals_extension.hpp index 35844b7d2..f32cab1cc 100644 --- a/src/share/vm/runtime/globals_extension.hpp +++ b/src/share/vm/runtime/globals_extension.hpp @@ -64,6 +64,7 @@ #define C2_PRODUCT_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name), #define C2_PD_PRODUCT_FLAG_MEMBER(type, name, doc) FLAG_MEMBER(name), #define C2_DIAGNOSTIC_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name), +#define C2_EXPERIMENTAL_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name), #ifdef PRODUCT #define C2_DEVELOP_FLAG_MEMBER(type, name, value, doc) /* flag is constant */ #define C2_PD_DEVELOP_FLAG_MEMBER(type, name, doc) /* flag is constant */ @@ -84,7 +85,7 @@ typedef enum { C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER, C1_NOTPRODUCT_FLAG_MEMBER) #endif #ifdef COMPILER2 - C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER) + C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_EXPERIMENTAL_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER) #endif NUM_CommandLineFlag } CommandLineFlag; @@ -130,6 +131,7 @@ typedef enum { #define C2_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type), #define C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, doc) FLAG_MEMBER_WITH_TYPE(name,type), #define C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type), +#define C2_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) FLAG_MEMBER_WITH_TYPE(name,type), #ifdef PRODUCT #define C2_DEVELOP_FLAG_MEMBER_WITH_TYPE(type, name, value, doc) /* flag is constant */ #define C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE(type, name, doc) /* flag is constant */ @@ -181,6 +183,7 @@ typedef enum { C2_PRODUCT_FLAG_MEMBER_WITH_TYPE, C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE, C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE, + C2_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE, C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE) #endif NUM_CommandLineFlagWithType diff --git a/src/share/vm/services/g1MemoryPool.cpp b/src/share/vm/services/g1MemoryPool.cpp new file mode 100644 index 000000000..7b73f7c79 --- /dev/null +++ b/src/share/vm/services/g1MemoryPool.cpp @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +# include "incls/_precompiled.incl" +# include "incls/_g1MemoryPool.cpp.incl" + +G1MemoryPoolSuper::G1MemoryPoolSuper(G1CollectedHeap* g1h, + const char* name, + size_t init_size, + size_t max_size, + bool support_usage_threshold) : + _g1h(g1h), CollectedMemoryPool(name, + MemoryPool::Heap, + init_size, + max_size, + support_usage_threshold) { + assert(UseG1GC, "sanity"); +} + +// See the comment at the top of g1MemoryPool.hpp +size_t G1MemoryPoolSuper::eden_space_committed(G1CollectedHeap* g1h) { + return MAX2(eden_space_used(g1h), (size_t) HeapRegion::GrainBytes); +} + +// See the comment at the top of g1MemoryPool.hpp +size_t G1MemoryPoolSuper::eden_space_used(G1CollectedHeap* g1h) { + size_t young_list_length = g1h->young_list_length(); + size_t eden_used = young_list_length * HeapRegion::GrainBytes; + size_t survivor_used = survivor_space_used(g1h); + eden_used = subtract_up_to_zero(eden_used, survivor_used); + return eden_used; +} + +// See the comment at the top of g1MemoryPool.hpp +size_t G1MemoryPoolSuper::eden_space_max(G1CollectedHeap* g1h) { + // This should ensure that it returns a value no smaller than the + // region size. Currently, eden_space_committed() guarantees that. + return eden_space_committed(g1h); +} + +// See the comment at the top of g1MemoryPool.hpp +size_t G1MemoryPoolSuper::survivor_space_committed(G1CollectedHeap* g1h) { + return MAX2(survivor_space_used(g1h), (size_t) HeapRegion::GrainBytes); +} + +// See the comment at the top of g1MemoryPool.hpp +size_t G1MemoryPoolSuper::survivor_space_used(G1CollectedHeap* g1h) { + size_t survivor_num = g1h->g1_policy()->recorded_survivor_regions(); + size_t survivor_used = survivor_num * HeapRegion::GrainBytes; + return survivor_used; +} + +// See the comment at the top of g1MemoryPool.hpp +size_t G1MemoryPoolSuper::survivor_space_max(G1CollectedHeap* g1h) { + // This should ensure that it returns a value no smaller than the + // region size. Currently, survivor_space_committed() guarantees that. + return survivor_space_committed(g1h); +} + +// See the comment at the top of g1MemoryPool.hpp +size_t G1MemoryPoolSuper::old_space_committed(G1CollectedHeap* g1h) { + size_t committed = overall_committed(g1h); + size_t eden_committed = eden_space_committed(g1h); + size_t survivor_committed = survivor_space_committed(g1h); + committed = subtract_up_to_zero(committed, eden_committed); + committed = subtract_up_to_zero(committed, survivor_committed); + committed = MAX2(committed, (size_t) HeapRegion::GrainBytes); + return committed; +} + +// See the comment at the top of g1MemoryPool.hpp +size_t G1MemoryPoolSuper::old_space_used(G1CollectedHeap* g1h) { + size_t used = overall_used(g1h); + size_t eden_used = eden_space_used(g1h); + size_t survivor_used = survivor_space_used(g1h); + used = subtract_up_to_zero(used, eden_used); + used = subtract_up_to_zero(used, survivor_used); + return used; +} + +// See the comment at the top of g1MemoryPool.hpp +size_t G1MemoryPoolSuper::old_space_max(G1CollectedHeap* g1h) { + size_t max = overall_max(g1h); + size_t eden_max = eden_space_max(g1h); + size_t survivor_max = survivor_space_max(g1h); + max = subtract_up_to_zero(max, eden_max); + max = subtract_up_to_zero(max, survivor_max); + max = MAX2(max, (size_t) HeapRegion::GrainBytes); + return max; +} + +G1EdenPool::G1EdenPool(G1CollectedHeap* g1h) : + G1MemoryPoolSuper(g1h, + "G1 Eden", + eden_space_committed(g1h), /* init_size */ + eden_space_max(g1h), /* max_size */ + false /* support_usage_threshold */) { +} + +MemoryUsage G1EdenPool::get_memory_usage() { + size_t initial_sz = initial_size(); + size_t max_sz = max_size(); + size_t used = used_in_bytes(); + size_t committed = eden_space_committed(_g1h); + + return MemoryUsage(initial_sz, used, committed, max_sz); +} + +G1SurvivorPool::G1SurvivorPool(G1CollectedHeap* g1h) : + G1MemoryPoolSuper(g1h, + "G1 Survivor", + survivor_space_committed(g1h), /* init_size */ + survivor_space_max(g1h), /* max_size */ + false /* support_usage_threshold */) { +} + +MemoryUsage G1SurvivorPool::get_memory_usage() { + size_t initial_sz = initial_size(); + size_t max_sz = max_size(); + size_t used = used_in_bytes(); + size_t committed = survivor_space_committed(_g1h); + + return MemoryUsage(initial_sz, used, committed, max_sz); +} + +G1OldGenPool::G1OldGenPool(G1CollectedHeap* g1h) : + G1MemoryPoolSuper(g1h, + "G1 Old Gen", + old_space_committed(g1h), /* init_size */ + old_space_max(g1h), /* max_size */ + true /* support_usage_threshold */) { +} + +MemoryUsage G1OldGenPool::get_memory_usage() { + size_t initial_sz = initial_size(); + size_t max_sz = max_size(); + size_t used = used_in_bytes(); + size_t committed = old_space_committed(_g1h); + + return MemoryUsage(initial_sz, used, committed, max_sz); +} diff --git a/src/share/vm/services/g1MemoryPool.hpp b/src/share/vm/services/g1MemoryPool.hpp new file mode 100644 index 000000000..cfc61e5f1 --- /dev/null +++ b/src/share/vm/services/g1MemoryPool.hpp @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +class G1CollectedHeap; + +// This file contains the three classes that represent the memory +// pools of the G1 spaces: G1EdenPool, G1SurvivorPool, and +// G1OldGenPool. In G1, unlike our other GCs, we do not have a +// physical space for each of those spaces. Instead, we allocate +// regions for all three spaces out of a single pool of regions (that +// pool basically covers the entire heap). As a result, the eden, +// survivor, and old gen are considered logical spaces in G1, as each +// is a set of non-contiguous regions. This is also reflected in the +// way we map them to memory pools here. The easiest way to have done +// this would have been to map the entire G1 heap to a single memory +// pool. However, it's helpful to show how large the eden and survivor +// get, as this does affect the performance and behavior of G1. Which +// is why we introduce the three memory pools implemented here. +// +// The above approach inroduces a couple of challenging issues in the +// implementation of the three memory pools: +// +// 1) The used space calculation for a pool is not necessarily +// independent of the others. We can easily get from G1 the overall +// used space in the entire heap, the number of regions in the young +// generation (includes both eden and survivors), and the number of +// survivor regions. So, from that we calculate: +// +// survivor_used = survivor_num * region_size +// eden_used = young_region_num * region_size - survivor_used +// old_gen_used = overall_used - eden_used - survivor_used +// +// Note that survivor_used and eden_used are upper bounds. To get the +// actual value we would have to iterate over the regions and add up +// ->used(). But that'd be expensive. So, we'll accept some lack of +// accuracy for those two. But, we have to be careful when calculating +// old_gen_used, in case we subtract from overall_used more then the +// actual number and our result goes negative. +// +// 2) Calculating the used space is straightforward, as described +// above. However, how do we calculate the committed space, given that +// we allocate space for the eden, survivor, and old gen out of the +// same pool of regions? One way to do this is to use the used value +// as also the committed value for the eden and survivor spaces and +// then calculate the old gen committed space as follows: +// +// old_gen_committed = overall_committed - eden_committed - survivor_committed +// +// Maybe a better way to do that would be to calculate used for eden +// and survivor as a sum of ->used() over their regions and then +// calculate committed as region_num * region_size (i.e., what we use +// to calculate the used space now). This is something to consider +// in the future. +// +// 3) Another decision that is again not straightforward is what is +// the max size that each memory pool can grow to. Right now, we set +// that the committed size for the eden and the survivors and +// calculate the old gen max as follows (basically, it's a similar +// pattern to what we use for the committed space, as described +// above): +// +// old_gen_max = overall_max - eden_max - survivor_max +// +// 4) Now, there is a very subtle issue with all the above. The +// framework will call get_memory_usage() on the three pools +// asynchronously. As a result, each call might get a different value +// for, say, survivor_num which will yield inconsistent values for +// eden_used, survivor_used, and old_gen_used (as survivor_num is used +// in the calculation of all three). This would normally be +// ok. However, it's possible that this might cause the sum of +// eden_used, survivor_used, and old_gen_used to go over the max heap +// size and this seems to sometimes cause JConsole (and maybe other +// clients) to get confused. There's not a really an easy / clean +// solution to this problem, due to the asynchrounous nature of the +// framework. + + +// This class is shared by the three G1 memory pool classes +// (G1EdenPool, G1SurvivorPool, G1OldGenPool). Given that the way we +// calculate used / committed bytes for these three pools is related +// (see comment above), we put the calculations in this class so that +// we can easily share them among the subclasses. +class G1MemoryPoolSuper : public CollectedMemoryPool { +private: + // It returns x - y if x > y, 0 otherwise. + // As described in the comment above, some of the inputs to the + // calculations we have to do are obtained concurrently and hence + // may be inconsistent with each other. So, this provides a + // defensive way of performing the subtraction and avoids the value + // going negative (which would mean a very large result, given that + // the parameter are size_t). + static size_t subtract_up_to_zero(size_t x, size_t y) { + if (x > y) { + return x - y; + } else { + return 0; + } + } + +protected: + G1CollectedHeap* _g1h; + + // Would only be called from subclasses. + G1MemoryPoolSuper(G1CollectedHeap* g1h, + const char* name, + size_t init_size, + size_t max_size, + bool support_usage_threshold); + + // The reason why all the code is in static methods is so that it + // can be safely called from the constructors of the subclasses. + + static size_t overall_committed(G1CollectedHeap* g1h) { + return g1h->capacity(); + } + static size_t overall_used(G1CollectedHeap* g1h) { + return g1h->used_unlocked(); + } + static size_t overall_max(G1CollectedHeap* g1h) { + return g1h->g1_reserved_obj_bytes(); + } + + static size_t eden_space_committed(G1CollectedHeap* g1h); + static size_t eden_space_used(G1CollectedHeap* g1h); + static size_t eden_space_max(G1CollectedHeap* g1h); + + static size_t survivor_space_committed(G1CollectedHeap* g1h); + static size_t survivor_space_used(G1CollectedHeap* g1h); + static size_t survivor_space_max(G1CollectedHeap* g1h); + + static size_t old_space_committed(G1CollectedHeap* g1h); + static size_t old_space_used(G1CollectedHeap* g1h); + static size_t old_space_max(G1CollectedHeap* g1h); +}; + +// Memory pool that represents the G1 eden. +class G1EdenPool : public G1MemoryPoolSuper { +public: + G1EdenPool(G1CollectedHeap* g1h); + + size_t used_in_bytes() { + return eden_space_used(_g1h); + } + size_t max_size() const { + return eden_space_max(_g1h); + } + MemoryUsage get_memory_usage(); +}; + +// Memory pool that represents the G1 survivor. +class G1SurvivorPool : public G1MemoryPoolSuper { +public: + G1SurvivorPool(G1CollectedHeap* g1h); + + size_t used_in_bytes() { + return survivor_space_used(_g1h); + } + size_t max_size() const { + return survivor_space_max(_g1h); + } + MemoryUsage get_memory_usage(); +}; + +// Memory pool that represents the G1 old gen. +class G1OldGenPool : public G1MemoryPoolSuper { +public: + G1OldGenPool(G1CollectedHeap* g1h); + + size_t used_in_bytes() { + return old_space_used(_g1h); + } + size_t max_size() const { + return old_space_max(_g1h); + } + MemoryUsage get_memory_usage(); +}; diff --git a/src/share/vm/services/memoryManager.cpp b/src/share/vm/services/memoryManager.cpp index 7b7905c5a..f92ba14c1 100644 --- a/src/share/vm/services/memoryManager.cpp +++ b/src/share/vm/services/memoryManager.cpp @@ -72,6 +72,14 @@ GCMemoryManager* MemoryManager::get_psMarkSweep_memory_manager() { return (GCMemoryManager*) new PSMarkSweepMemoryManager(); } +GCMemoryManager* MemoryManager::get_g1YoungGen_memory_manager() { + return (GCMemoryManager*) new G1YoungGenMemoryManager(); +} + +GCMemoryManager* MemoryManager::get_g1OldGen_memory_manager() { + return (GCMemoryManager*) new G1OldGenMemoryManager(); +} + instanceOop MemoryManager::get_memory_manager_instance(TRAPS) { // Must do an acquire so as to force ordering of subsequent // loads from anything _memory_mgr_obj points to or implies. diff --git a/src/share/vm/services/memoryManager.hpp b/src/share/vm/services/memoryManager.hpp index 4efc955eb..955ca8139 100644 --- a/src/share/vm/services/memoryManager.hpp +++ b/src/share/vm/services/memoryManager.hpp @@ -54,7 +54,9 @@ public: ParNew, ConcurrentMarkSweep, PSScavenge, - PSMarkSweep + PSMarkSweep, + G1YoungGen, + G1OldGen }; MemoryManager(); @@ -85,6 +87,8 @@ public: static GCMemoryManager* get_cms_memory_manager(); static GCMemoryManager* get_psScavenge_memory_manager(); static GCMemoryManager* get_psMarkSweep_memory_manager(); + static GCMemoryManager* get_g1YoungGen_memory_manager(); + static GCMemoryManager* get_g1OldGen_memory_manager(); }; @@ -231,3 +235,21 @@ public: MemoryManager::Name kind() { return MemoryManager::PSMarkSweep; } const char* name() { return "PS MarkSweep"; } }; + +class G1YoungGenMemoryManager : public GCMemoryManager { +private: +public: + G1YoungGenMemoryManager() : GCMemoryManager() {} + + MemoryManager::Name kind() { return MemoryManager::G1YoungGen; } + const char* name() { return "G1 Young Generation"; } +}; + +class G1OldGenMemoryManager : public GCMemoryManager { +private: +public: + G1OldGenMemoryManager() : GCMemoryManager() {} + + MemoryManager::Name kind() { return MemoryManager::G1OldGen; } + const char* name() { return "G1 Old Generation"; } +}; diff --git a/src/share/vm/services/memoryService.cpp b/src/share/vm/services/memoryService.cpp index 1d243828c..9941a6f0f 100644 --- a/src/share/vm/services/memoryService.cpp +++ b/src/share/vm/services/memoryService.cpp @@ -60,8 +60,8 @@ void MemoryService::set_universe_heap(CollectedHeap* heap) { break; } case CollectedHeap::G1CollectedHeap : { - G1CollectedHeap::g1_unimplemented(); - return; + add_g1_heap_info(G1CollectedHeap::heap()); + break; } #endif // SERIALGC default: { @@ -164,6 +164,19 @@ void MemoryService::add_parallel_scavenge_heap_info(ParallelScavengeHeap* heap) add_psOld_memory_pool(heap->old_gen(), _major_gc_manager); add_psPerm_memory_pool(heap->perm_gen(), _major_gc_manager); } + +void MemoryService::add_g1_heap_info(G1CollectedHeap* g1h) { + assert(UseG1GC, "sanity"); + + _minor_gc_manager = MemoryManager::get_g1YoungGen_memory_manager(); + _major_gc_manager = MemoryManager::get_g1OldGen_memory_manager(); + _managers_list->append(_minor_gc_manager); + _managers_list->append(_major_gc_manager); + + add_g1YoungGen_memory_pool(g1h, _major_gc_manager, _minor_gc_manager); + add_g1OldGen_memory_pool(g1h, _major_gc_manager); + add_g1PermGen_memory_pool(g1h, _major_gc_manager); +} #endif // SERIALGC MemoryPool* MemoryService::add_gen(Generation* gen, @@ -384,6 +397,64 @@ void MemoryService::add_psPerm_memory_pool(PSPermGen* gen, MemoryManager* mgr) { mgr->add_pool(perm_gen); _pools_list->append(perm_gen); } + +void MemoryService::add_g1YoungGen_memory_pool(G1CollectedHeap* g1h, + MemoryManager* major_mgr, + MemoryManager* minor_mgr) { + assert(major_mgr != NULL && minor_mgr != NULL, "should have two managers"); + + G1EdenPool* eden = new G1EdenPool(g1h); + G1SurvivorPool* survivor = new G1SurvivorPool(g1h); + + major_mgr->add_pool(eden); + major_mgr->add_pool(survivor); + minor_mgr->add_pool(eden); + minor_mgr->add_pool(survivor); + _pools_list->append(eden); + _pools_list->append(survivor); +} + +void MemoryService::add_g1OldGen_memory_pool(G1CollectedHeap* g1h, + MemoryManager* mgr) { + assert(mgr != NULL, "should have one manager"); + + G1OldGenPool* old_gen = new G1OldGenPool(g1h); + mgr->add_pool(old_gen); + _pools_list->append(old_gen); +} + +void MemoryService::add_g1PermGen_memory_pool(G1CollectedHeap* g1h, + MemoryManager* mgr) { + assert(mgr != NULL, "should have one manager"); + + CompactingPermGenGen* perm_gen = (CompactingPermGenGen*) g1h->perm_gen(); + PermanentGenerationSpec* spec = perm_gen->spec(); + size_t max_size = spec->max_size() - spec->read_only_size() + - spec->read_write_size(); + MemoryPool* pool = add_space(perm_gen->unshared_space(), + "G1 Perm Gen", + false, /* is_heap */ + max_size, + true /* support_usage_threshold */); + mgr->add_pool(pool); + + // in case we support CDS in G1 + if (UseSharedSpaces) { + pool = add_space(perm_gen->ro_space(), + "G1 Perm Gen [shared-ro]", + false, /* is_heap */ + spec->read_only_size(), + true /* support_usage_threshold */); + mgr->add_pool(pool); + + pool = add_space(perm_gen->rw_space(), + "G1 Perm Gen [shared-rw]", + false, /* is_heap */ + spec->read_write_size(), + true /* support_usage_threshold */); + mgr->add_pool(pool); + } +} #endif // SERIALGC void MemoryService::add_code_heap_memory_pool(CodeHeap* heap) { diff --git a/src/share/vm/services/memoryService.hpp b/src/share/vm/services/memoryService.hpp index 52b76a729..44823ae42 100644 --- a/src/share/vm/services/memoryService.hpp +++ b/src/share/vm/services/memoryService.hpp @@ -40,6 +40,7 @@ class GenCollectedHeap; class ParallelScavengeHeap; class CompactingPermGenGen; class CMSPermGenGen; +class G1CollectedHeap; // VM Monitoring and Management Support @@ -88,6 +89,13 @@ private: static void add_psPerm_memory_pool(PSPermGen* perm, MemoryManager* mgr); + static void add_g1YoungGen_memory_pool(G1CollectedHeap* g1h, + MemoryManager* major_mgr, + MemoryManager* minor_mgr); + static void add_g1OldGen_memory_pool(G1CollectedHeap* g1h, + MemoryManager* mgr); + static void add_g1PermGen_memory_pool(G1CollectedHeap* g1h, + MemoryManager* mgr); static MemoryPool* add_space(ContiguousSpace* space, const char* name, @@ -111,6 +119,7 @@ private: static void add_gen_collected_heap_info(GenCollectedHeap* heap); static void add_parallel_scavenge_heap_info(ParallelScavengeHeap* heap); + static void add_g1_heap_info(G1CollectedHeap* g1h); public: static void set_universe_heap(CollectedHeap* heap); diff --git a/src/share/vm/utilities/growableArray.hpp b/src/share/vm/utilities/growableArray.hpp index 77ce93c13..c52364666 100644 --- a/src/share/vm/utilities/growableArray.hpp +++ b/src/share/vm/utilities/growableArray.hpp @@ -278,6 +278,17 @@ template<class E> class GrowableArray : public GenericGrowableArray { _len--; } + // inserts the given element before the element at index i + void insert_before(const int idx, const E& elem) { + check_nesting(); + if (_len == _max) grow(_len); + for (int j = _len - 1; j >= idx; j--) { + _data[j + 1] = _data[j]; + } + _len++; + _data[idx] = elem; + } + void appendAll(const GrowableArray<E>* l) { for (int i = 0; i < l->_len; i++) { raw_at_put_grow(_len, l->_data[i], 0); diff --git a/src/share/vm/utilities/numberSeq.cpp b/src/share/vm/utilities/numberSeq.cpp index 7cd06b28c..0c152cfde 100644 --- a/src/share/vm/utilities/numberSeq.cpp +++ b/src/share/vm/utilities/numberSeq.cpp @@ -241,3 +241,33 @@ double TruncatedSeq::predict_next() const { return b0 + b1 * num; } + + +// Printing/Debugging Support + +void AbsSeq::dump() { dump_on(gclog_or_tty); } + +void AbsSeq::dump_on(outputStream* s) { + s->print_cr("\t _num = %d, _sum = %7.3f, _sum_of_squares = %7.3f", + _num, _sum, _sum_of_squares); + s->print_cr("\t _davg = %7.3f, _dvariance = %7.3f, _alpha = %7.3f", + _davg, _dvariance, _alpha); +} + +void NumberSeq::dump_on(outputStream* s) { + AbsSeq::dump_on(s); + s->print_cr("\t\t _last = %7.3f, _maximum = %7.3f"); +} + +void TruncatedSeq::dump_on(outputStream* s) { + AbsSeq::dump_on(s); + s->print_cr("\t\t _length = %d, _next = %d", _length, _next); + for (int i = 0; i < _length; i++) { + if (i%5 == 0) { + s->cr(); + s->print("\t"); + } + s->print("\t[%d]=%7.3f", i, _sequence[i]); + } + s->print_cr(""); +} diff --git a/src/share/vm/utilities/numberSeq.hpp b/src/share/vm/utilities/numberSeq.hpp index 4366c8bf1..eb40b149b 100644 --- a/src/share/vm/utilities/numberSeq.hpp +++ b/src/share/vm/utilities/numberSeq.hpp @@ -74,6 +74,10 @@ public: double davg() const; // decaying average double dvariance() const; // decaying variance double dsd() const; // decaying "standard deviation" + + // Debugging/Printing + virtual void dump(); + virtual void dump_on(outputStream* s); }; class NumberSeq: public AbsSeq { @@ -91,6 +95,9 @@ public: virtual void add(double val); virtual double maximum() const { return _maximum; } virtual double last() const { return _last; } + + // Debugging/Printing + virtual void dump_on(outputStream* s); }; class TruncatedSeq: public AbsSeq { @@ -114,4 +121,7 @@ public: double oldest() const; // the oldest valid value in the sequence double predict_next() const; // prediction based on linear regression + + // Debugging/Printing + virtual void dump_on(outputStream* s); }; diff --git a/test/compiler/6895383/Test.java b/test/compiler/6895383/Test.java new file mode 100644 index 000000000..719ba3113 --- /dev/null +++ b/test/compiler/6895383/Test.java @@ -0,0 +1,51 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +/** + * @test + * @bug 6895383 + * @summary JCK test throws NPE for method compiled with Escape Analysis + * + * @run main/othervm -Xcomp Test + */ + +public class Test { + public static void main(String argv[]) { + Test test = new Test(); + test.testRemove1_IndexOutOfBounds(); + test.testAddAll1_IndexOutOfBoundsException(); + } + + public void testRemove1_IndexOutOfBounds() { + CopyOnWriteArrayList c = new CopyOnWriteArrayList(); + } + + public void testAddAll1_IndexOutOfBoundsException() { + try { + CopyOnWriteArrayList c = new CopyOnWriteArrayList(); + c.addAll(-1, new LinkedList()); // should throw IndexOutOfBoundsException + } catch (IndexOutOfBoundsException e) { + } + } +} diff --git a/test/compiler/6896727/Test.java b/test/compiler/6896727/Test.java new file mode 100644 index 000000000..a9aef1d2b --- /dev/null +++ b/test/compiler/6896727/Test.java @@ -0,0 +1,48 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + */ + +/* + * @test + * @bug 6896727 + * @summary nsk/logging/LoggingPermission/LoggingPermission/logperm002 fails with G1, EscapeAnalisys w/o COOPs + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:+DoEscapeAnalysis -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC Test + */ + +public class Test { + + final static String testString = "abracadabra"; + public static void main(String args[]) { + String params[][] = { + {"control", testString} + }; + for (int i=0; i<params.length; i++) { + try { + System.out.println("Params :" + testString + " and " + params[i][0] + ", " + params[i][1]); + if (params[i][1] == null) { + System.exit(97); + } + } catch (Exception e) {} + } + } +} diff --git a/test/compiler/6901572/Test.java b/test/compiler/6901572/Test.java new file mode 100644 index 000000000..1139161b1 --- /dev/null +++ b/test/compiler/6901572/Test.java @@ -0,0 +1,55 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/** + * @test + * @bug 6901572 + * @summary JVM 1.6.16 crash on loops: assert(has_node(i),"") + * + * @run main/othervm Test + */ + + +public class Test { + + public static void main(String[] args) { + for (int i = 0; i < 2; i++) + NestedLoop(); + } + + public static long NestedLoop() { + final int n = 50; + long startTime = System.currentTimeMillis(); + int x = 0; + for(int a = 0; a < n; a++) + for(int b = 0; b < n; b++) + for(int c = 0; c < n; c++) + for(int d = 0; d < n; d++) + for(int e = 0; e < n; e++) + for(int f = 0; f < n; f++) + x++; + long stopTime = System.currentTimeMillis(); + + return stopTime - startTime; + } +} |