100 files changed, 4452 insertions, 885 deletions
diff --git a/.hgignore b/.hgignore
index fec499bf6..9818ff1af 100644
--- a/.hgignore
+++ b/.hgignore
@@ -1,6 +1,6 @@
 ^build/
 ^dist/
-^nbproject/private/
+/nbproject/private/
 ^src/share/tools/hsdis/build/
 ^src/share/tools/IdealGraphVisualizer/[a-zA-Z0-9]*/build/
 ^src/share/tools/IdealGraphVisualizer/build/
diff --git a/.hgtags b/.hgtags
index b8ce52726..980e37e6b 100644
--- a/.hgtags
+++ b/.hgtags
@@ -50,3 +50,6 @@ a94714c550658fd6741793ef036cb9625dc2ab1a jdk7-b72
 faf94d94786b621f8e13cbcc941ca69c6d967c3f jdk7-b73
 f4b900403d6e4b0af51447bd13bbe23fe3a1dac7 jdk7-b74
 d8dd291a362acb656026a9c0a9da48501505a1e7 jdk7-b75
+9174bb32e934965288121f75394874eeb1fcb649 jdk7-b76
+455105fc81d941482f8f8056afaa7aa0949c9300 jdk7-b77
+e703499b4b51e3af756ae77c3d5e8b3058a14e4e jdk7-b78
diff --git a/make/hotspot_version b/make/hotspot_version
index 5da770b9f..aba3cd361 100644
--- a/make/hotspot_version
+++ b/make/hotspot_version
@@ -35,7 +35,7 @@ HOTSPOT_VM_COPYRIGHT=Copyright 2009
 
 HS_MAJOR_VER=17
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=05
+HS_BUILD_NUMBER=06
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index 714f2fb8f..c22a8c1d0 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -7666,7 +7666,7 @@ RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_ad
 
 #ifdef ASSERT
   Label L;
-  testl(tmp, tmp);
+  testptr(tmp, tmp);
   jccb(Assembler::notZero, L);
   hlt();
   bind(L);
diff --git a/src/cpu/x86/vm/interp_masm_x86_32.cpp b/src/cpu/x86/vm/interp_masm_x86_32.cpp
index e06e423a4..a30092523 100644
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp
@@ -196,6 +196,9 @@ void InterpreterMacroAssembler::get_cache_index_at_bcp(Register reg, int bcp_off
   } else {
     assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic");
     movl(reg, Address(rsi, bcp_offset));
+    // Check if the secondary index definition is still ~x, otherwise
+    // we have to change the following assembler code to calculate the
+    // plain index.
     assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line");
     notl(reg);  // convert to plain index
   }
diff --git a/src/cpu/x86/vm/interp_masm_x86_64.cpp b/src/cpu/x86/vm/interp_masm_x86_64.cpp
index ad449c8bd..9418540dc 100644
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp
@@ -185,12 +185,30 @@ void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(
 }
 
 
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
+                                                       int bcp_offset,
+                                                       bool giant_index) {
+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  if (!giant_index) {
+    load_unsigned_short(index, Address(r13, bcp_offset));
+  } else {
+    assert(EnableInvokeDynamic, "giant index used only for EnableInvokeDynamic");
+    movl(index, Address(r13, bcp_offset));
+    // Check if the secondary index definition is still ~x, otherwise
+    // we have to change the following assembler code to calculate the
+    // plain index.
+    assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line");
+    notl(index);  // convert to plain index
+  }
+}
+
+
 void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
                                                            Register index,
-                                                           int bcp_offset) {
-  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+                                                           int bcp_offset,
+                                                           bool giant_index) {
   assert(cache != index, "must use different registers");
-  load_unsigned_short(index, Address(r13, bcp_offset));
+  get_cache_index_at_bcp(index, bcp_offset, giant_index);
   movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
   // convert from field index to ConstantPoolCacheEntry index
@@ -200,10 +218,10 @@ void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
 
 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
                                                                Register tmp,
-                                                               int bcp_offset) {
-  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+                                                               int bcp_offset,
+                                                               bool giant_index) {
   assert(cache != tmp, "must use different register");
-  load_unsigned_short(tmp, Address(r13, bcp_offset));
+  get_cache_index_at_bcp(tmp, bcp_offset, giant_index);
   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
   // convert from field index to ConstantPoolCacheEntry index
   // and from word offset to byte offset
@@ -1236,7 +1254,8 @@ void InterpreterMacroAssembler::profile_final_call(Register mdp) {
 
 void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
                                                      Register mdp,
-                                                     Register reg2) {
+                                                     Register reg2,
+                                                     bool receiver_can_be_null) {
   if (ProfileInterpreter) {
     Label profile_continue;
 
@@ -1246,8 +1265,15 @@ void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
     // We are making a call.  Increment the count.
     increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 
+    Label skip_receiver_profile;
+    if (receiver_can_be_null) {
+      testptr(receiver, receiver);
+      jcc(Assembler::zero, skip_receiver_profile);
+    }
+
     // Record the receiver type.
     record_klass_in_profile(receiver, mdp, reg2);
+    bind(skip_receiver_profile);
 
     // The method data pointer needs to be updated to reflect the new target.
     update_mdp_by_constant(mdp,
diff --git a/src/cpu/x86/vm/interp_masm_x86_64.hpp b/src/cpu/x86/vm/interp_masm_x86_64.hpp
index c35cb3a19..0cfc9bf7f 100644
--- a/src/cpu/x86/vm/interp_masm_x86_64.hpp
+++ b/src/cpu/x86/vm/interp_masm_x86_64.hpp
@@ -95,9 +95,10 @@ class InterpreterMacroAssembler: public MacroAssembler {
 
   void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
   void get_cache_and_index_at_bcp(Register cache, Register index,
-                                  int bcp_offset);
+                                  int bcp_offset, bool giant_index = false);
   void get_cache_entry_pointer_at_bcp(Register cache, Register tmp,
-                                      int bcp_offset);
+                                      int bcp_offset, bool giant_index = false);
+  void get_cache_index_at_bcp(Register index, int bcp_offset, bool giant_index = false);
 
 
   void pop_ptr(Register r = rax);
@@ -236,7 +237,8 @@ class InterpreterMacroAssembler: public MacroAssembler {
   void profile_call(Register mdp);
   void profile_final_call(Register mdp);
   void profile_virtual_call(Register receiver, Register mdp,
-                            Register scratch2);
+                            Register scratch2,
+                            bool receiver_can_be_null = false);
   void profile_ret(Register return_bci, Register mdp);
   void profile_null_seen(Register mdp);
   void profile_typecheck(Register mdp, Register klass, Register scratch);
diff --git a/src/cpu/x86/vm/interpreter_x86_64.cpp b/src/cpu/x86/vm/interpreter_x86_64.cpp
index c3cbf56ca..6c234a33a 100644
--- a/src/cpu/x86/vm/interpreter_x86_64.cpp
+++ b/src/cpu/x86/vm/interpreter_x86_64.cpp
@@ -277,12 +277,11 @@ address InterpreterGenerator::generate_abstract_entry(void) {
   address entry_point = __ pc();
 
   // abstract method entry
-  // remove return address. Not really needed, since exception
-  // handling throws away expression stack
-  __ pop(rbx);
 
-  // adjust stack to what a normal return would do
-  __ mov(rsp, r13);
+  //  pop return address, reset last_sp to NULL
+  __ empty_expression_stack();
+  __ restore_bcp();      // rsi must be correct for exception handler   (was destroyed)
+  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
 
   // throw exception
   __ call_VM(noreg, CAST_FROM_FN_PTR(address,
@@ -300,7 +299,10 @@ address InterpreterGenerator::generate_method_handle_entry(void) {
   if (!EnableMethodHandles) {
     return generate_abstract_entry();
   }
-  return generate_abstract_entry(); //6815692//
+
+  address entry_point = MethodHandles::generate_method_handle_interpreter_entry(_masm);
+
+  return entry_point;
 }
 
 
diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp
index a682a81b8..58024f2a8 100644
--- a/src/cpu/x86/vm/methodHandles_x86.cpp
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp
@@ -448,7 +448,7 @@ void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHan
                                 rbx_index, Address::times_ptr,
                                 base + vtableEntry::method_offset_in_bytes());
       Register rbx_method = rbx_temp;
-      __ movl(rbx_method, vtable_entry_addr);
+      __ movptr(rbx_method, vtable_entry_addr);
 
       __ verify_oop(rbx_method);
       __ jmp(rbx_method_fie);
diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index 6b0731490..cc80a4df2 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -2935,6 +2935,16 @@ class StubGenerator: public StubCodeGenerator {
 
     // arraycopy stubs used by compilers
     generate_arraycopy_stubs();
+
+    // generic method handle stubs
+    if (EnableMethodHandles && SystemDictionary::MethodHandle_klass() != NULL) {
+      for (MethodHandles::EntryKind ek = MethodHandles::_EK_FIRST;
+           ek < MethodHandles::_EK_LIMIT;
+           ek = MethodHandles::EntryKind(1 + (int)ek)) {
+        StubCodeMark mark(this, "MethodHandle", MethodHandles::entry_name(ek));
+        MethodHandles::generate_method_handle_stub(_masm, ek);
+      }
+    }
   }
 
  public:
diff --git a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
index cd9d08915..273d3901e 100644
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
@@ -100,21 +100,26 @@ address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
   return entry;
 }
 
-// Arguments are: required type in rarg1, failing object (or NULL) in rarg2
+// Arguments are: required type at TOS+8, failing object (or NULL) at TOS+4.
 address TemplateInterpreterGenerator::generate_WrongMethodType_handler() {
   address entry = __ pc();
 
   __ pop(c_rarg2);              // failing object is at TOS
   __ pop(c_rarg1);              // required type is at TOS+8
 
-  // expression stack must be empty before entering the VM if an
-  // exception happened
+  __ verify_oop(c_rarg1);
+  __ verify_oop(c_rarg2);
+
+  // Various method handle types use interpreter registers as temps.
+  __ restore_bcp();
+  __ restore_locals();
+
+  // Expression stack must be empty before entering the VM for an exception.
   __ empty_expression_stack();
 
   __ call_VM(noreg,
              CAST_FROM_FN_PTR(address,
-                              InterpreterRuntime::
-                              throw_WrongMethodTypeException),
+                              InterpreterRuntime::throw_WrongMethodTypeException),
              // pass required type, failing object (or NULL)
              c_rarg1, c_rarg2);
   return entry;
@@ -182,15 +187,29 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
   __ restore_bcp();
   __ restore_locals();
 
-  __ get_cache_and_index_at_bcp(rbx, rcx, 1);
+  Label L_got_cache, L_giant_index;
+  if (EnableInvokeDynamic) {
+    __ cmpb(Address(r13, 0), Bytecodes::_invokedynamic);
+    __ jcc(Assembler::equal, L_giant_index);
+  }
+  __ get_cache_and_index_at_bcp(rbx, rcx, 1, false);
+  __ bind(L_got_cache);
   __ movl(rbx, Address(rbx, rcx,
-                       Address::times_8,
+                       Address::times_ptr,
                        in_bytes(constantPoolCacheOopDesc::base_offset()) +
                        3 * wordSize));
   __ andl(rbx, 0xFF);
   if (TaggedStackInterpreter) __ shll(rbx, 1); // 2 slots per parameter.
   __ lea(rsp, Address(rsp, rbx, Address::times_8));
   __ dispatch_next(state, step);
+
+  // out of the main line of code...
+  if (EnableInvokeDynamic) {
+    __ bind(L_giant_index);
+    __ get_cache_and_index_at_bcp(rbx, rcx, 1, true);
+    __ jmp(L_got_cache);
+  }
+
   return entry;
 }
 
diff --git a/src/cpu/x86/vm/templateTable_x86_32.cpp b/src/cpu/x86/vm/templateTable_x86_32.cpp
index ff78f933d..8959b3410 100644
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp
@@ -3146,7 +3146,6 @@ void TemplateTable::invokedynamic(int byte_no) {
     __ profile_call(rsi);
   }
 
-  Label handle_unlinked_site;
   __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx)));
   __ null_check(rcx);
   __ prepare_to_jump_from_interpreted();
diff --git a/src/cpu/x86/vm/templateTable_x86_64.cpp b/src/cpu/x86/vm/templateTable_x86_64.cpp
index 1180227b5..f461b10e4 100644
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp
@@ -203,18 +203,15 @@ void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc,
     __ jcc(Assembler::notEqual, fast_patch);
     __ get_method(scratch);
     // Let breakpoint table handling rewrite to quicker bytecode
-    __ call_VM(noreg,
-               CAST_FROM_FN_PTR(address,
-                                InterpreterRuntime::set_original_bytecode_at),
-               scratch, r13, bc);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, r13, bc);
 #ifndef ASSERT
     __ jmpb(patch_done);
-    __ bind(fast_patch);
-  }
 #else
     __ jmp(patch_done);
+#endif
     __ bind(fast_patch);
   }
+#ifdef ASSERT
   Label okay;
   __ load_unsigned_byte(scratch, at_bcp(0));
   __ cmpl(scratch, (int) Bytecodes::java_code(bytecode));
@@ -2054,26 +2051,28 @@ void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits
   }
 }
 
-void TemplateTable::resolve_cache_and_index(int byte_no,
-                                            Register Rcache,
-                                            Register index) {
+void TemplateTable::resolve_cache_and_index(int byte_no, Register Rcache, Register index) {
   assert(byte_no == 1 || byte_no == 2, "byte_no out of range");
+  bool is_invokedynamic = (bytecode() == Bytecodes::_invokedynamic);
 
   const Register temp = rbx;
   assert_different_registers(Rcache, index, temp);
 
   const int shift_count = (1 + byte_no) * BitsPerByte;
   Label resolved;
-  __ get_cache_and_index_at_bcp(Rcache, index, 1);
-  __ movl(temp, Address(Rcache,
-                        index, Address::times_8,
-                        constantPoolCacheOopDesc::base_offset() +
-                        ConstantPoolCacheEntry::indices_offset()));
-  __ shrl(temp, shift_count);
-  // have we resolved this bytecode?
-  __ andl(temp, 0xFF);
-  __ cmpl(temp, (int) bytecode());
-  __ jcc(Assembler::equal, resolved);
+  __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic);
+  if (is_invokedynamic) {
+    // we are resolved if the f1 field contains a non-null CallSite object
+    __ cmpptr(Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()), (int32_t) NULL_WORD);
+    __ jcc(Assembler::notEqual, resolved);
+  } else {
+    __ movl(temp, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
+    __ shrl(temp, shift_count);
+    // have we resolved this bytecode?
+    __ andl(temp, 0xFF);
+    __ cmpl(temp, (int) bytecode());
+    __ jcc(Assembler::equal, resolved);
+  }
 
   // resolve first time through
   address entry;
@@ -2090,6 +2089,9 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
   case Bytecodes::_invokeinterface:
     entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
     break;
+  case Bytecodes::_invokedynamic:
+    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
+    break;
   default:
     ShouldNotReachHere();
     break;
@@ -2098,7 +2100,7 @@ void TemplateTable::resolve_cache_and_index(int byte_no,
   __ call_VM(noreg, entry, temp);
 
   // Update registers with resolved info
-  __ get_cache_and_index_at_bcp(Rcache, index, 1);
+  __ get_cache_and_index_at_bcp(Rcache, index, 1, is_invokedynamic);
   __ bind(resolved);
 }
 
@@ -2832,15 +2834,14 @@ void TemplateTable::count_calls(Register method, Register temp) {
   ShouldNotReachHere();
 }
 
-void TemplateTable::prepare_invoke(Register method,
-                                   Register index,
-                                   int byte_no,
-                                   Bytecodes::Code code) {
+void TemplateTable::prepare_invoke(Register method, Register index, int byte_no) {
   // determine flags
+  Bytecodes::Code code = bytecode();
   const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
   const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
   const bool is_invokespecial    = code == Bytecodes::_invokespecial;
-  const bool load_receiver       = code != Bytecodes::_invokestatic;
+  const bool load_receiver      = (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic);
   const bool receiver_null_check = is_invokespecial;
   const bool save_flags = is_invokeinterface || is_invokevirtual;
   // setup registers & access constant pool cache
@@ -2858,9 +2859,13 @@ void TemplateTable::prepare_invoke(Register method,
     __ movl(recv, flags);
     __ andl(recv, 0xFF);
     if (TaggedStackInterpreter) __ shll(recv, 1);  // index*2
-    __ movptr(recv, Address(rsp, recv, Address::times_8,
-                                 -Interpreter::expr_offset_in_bytes(1)));
-    __ verify_oop(recv);
+    Address recv_addr(rsp, recv, Address::times_8, -Interpreter::expr_offset_in_bytes(1));
+    if (is_invokedynamic) {
+      __ lea(recv, recv_addr);
+    } else {
+      __ movptr(recv, recv_addr);
+      __ verify_oop(recv);
+    }
   }
 
   // do null check if needed
@@ -2878,10 +2883,14 @@ void TemplateTable::prepare_invoke(Register method,
   ConstantPoolCacheEntry::verify_tosBits();
   // load return address
   {
-    ExternalAddress return_5((address)Interpreter::return_5_addrs_by_index_table());
-    ExternalAddress return_3((address)Interpreter::return_3_addrs_by_index_table());
-    __ lea(rscratch1, (is_invokeinterface ? return_5 : return_3));
-    __ movptr(flags, Address(rscratch1, flags, Address::times_8));
+    address table_addr;
+    if (is_invokeinterface || is_invokedynamic)
+      table_addr = (address)Interpreter::return_5_addrs_by_index_table();
+    else
+      table_addr = (address)Interpreter::return_3_addrs_by_index_table();
+    ExternalAddress table(table_addr);
+    __ lea(rscratch1, table);
+    __ movptr(flags, Address(rscratch1, flags, Address::times_ptr));
   }
 
   // push return address
@@ -2947,7 +2956,7 @@ void TemplateTable::invokevirtual_helper(Register index,
 
 void TemplateTable::invokevirtual(int byte_no) {
   transition(vtos, vtos);
-  prepare_invoke(rbx, noreg, byte_no, bytecode());
+  prepare_invoke(rbx, noreg, byte_no);
 
   // rbx: index
   // rcx: receiver
@@ -2959,7 +2968,7 @@ void TemplateTable::invokevirtual(int byte_no) {
 
 void TemplateTable::invokespecial(int byte_no) {
   transition(vtos, vtos);
-  prepare_invoke(rbx, noreg, byte_no, bytecode());
+  prepare_invoke(rbx, noreg, byte_no);
   // do the call
   __ verify_oop(rbx);
   __ profile_call(rax);
@@ -2969,7 +2978,7 @@ void TemplateTable::invokespecial(int byte_no) {
 
 void TemplateTable::invokestatic(int byte_no) {
   transition(vtos, vtos);
-  prepare_invoke(rbx, noreg, byte_no, bytecode());
+  prepare_invoke(rbx, noreg, byte_no);
   // do the call
   __ verify_oop(rbx);
   __ profile_call(rax);
@@ -2983,7 +2992,7 @@ void TemplateTable::fast_invokevfinal(int byte_no) {
 
 void TemplateTable::invokeinterface(int byte_no) {
   transition(vtos, vtos);
-  prepare_invoke(rax, rbx, byte_no, bytecode());
+  prepare_invoke(rax, rbx, byte_no);
 
   // rax: Interface
   // rbx: index
@@ -3072,7 +3081,24 @@ void TemplateTable::invokedynamic(int byte_no) {
     return;
   }
 
-  __ stop("invokedynamic NYI");//6815692//
+  prepare_invoke(rax, rbx, byte_no);
+
+  // rax: CallSite object (f1)
+  // rbx: unused (f2)
+  // rcx: receiver address
+  // rdx: flags (unused)
+
+  if (ProfileInterpreter) {
+    Label L;
+    // %%% should make a type profile for any invokedynamic that takes a ref argument
+    // profile this call
+    __ profile_call(r13);
+  }
+
+  __ movptr(rcx, Address(rax, __ delayed_value(java_dyn_CallSite::target_offset_in_bytes, rcx)));
+  __ null_check(rcx);
+  __ prepare_to_jump_from_interpreted();
+  __ jump_to_method_handle_entry(rcx, rdx);
 }
 
 
diff --git a/src/cpu/x86/vm/templateTable_x86_64.hpp b/src/cpu/x86/vm/templateTable_x86_64.hpp
index ec531a7bc..6a9fdf9ed 100644
--- a/src/cpu/x86/vm/templateTable_x86_64.hpp
+++ b/src/cpu/x86/vm/templateTable_x86_64.hpp
@@ -22,8 +22,7 @@
  *
  */
 
-  static void prepare_invoke(Register method, Register index, int byte_no,
-                             Bytecodes::Code code);
+  static void prepare_invoke(Register method, Register index, int byte_no);
   static void invokevirtual_helper(Register index, Register recv,
                                    Register flags);
   static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
diff --git a/src/share/vm/ci/ciEnv.cpp b/src/share/vm/ci/ciEnv.cpp
index 0a6de348d..5d7df11f5 100644
--- a/src/share/vm/ci/ciEnv.cpp
+++ b/src/share/vm/ci/ciEnv.cpp
@@ -46,6 +46,9 @@ ciInstanceKlass* ciEnv::_Throwable;
 ciInstanceKlass* ciEnv::_Thread;
 ciInstanceKlass* ciEnv::_OutOfMemoryError;
 ciInstanceKlass* ciEnv::_String;
+ciInstanceKlass* ciEnv::_StringBuffer;
+ciInstanceKlass* ciEnv::_StringBuilder;
+ciInstanceKlass* ciEnv::_Integer;
 
 ciSymbol*        ciEnv::_unloaded_cisymbol = NULL;
 ciInstanceKlass* ciEnv::_unloaded_ciinstance_klass = NULL;
@@ -110,6 +113,8 @@ ciEnv::ciEnv(CompileTask* task, int system_dictionary_modification_counter) {
   _ArrayIndexOutOfBoundsException_instance = NULL;
   _ArrayStoreException_instance = NULL;
   _ClassCastException_instance = NULL;
+  _the_null_string = NULL;
+  _the_min_jint_string = NULL;
 }
 
 ciEnv::ciEnv(Arena* arena) {
@@ -163,6 +168,8 @@ ciEnv::ciEnv(Arena* arena) {
   _ArrayIndexOutOfBoundsException_instance = NULL;
   _ArrayStoreException_instance = NULL;
   _ClassCastException_instance = NULL;
+  _the_null_string = NULL;
+  _the_min_jint_string = NULL;
 }
 
 ciEnv::~ciEnv() {
@@ -248,6 +255,22 @@ ciInstance* ciEnv::ClassCastException_instance() {
   return _ClassCastException_instance;
 }
 
+ciInstance* ciEnv::the_null_string() {
+  if (_the_null_string == NULL) {
+    VM_ENTRY_MARK;
+    _the_null_string = get_object(Universe::the_null_string())->as_instance();
+  }
+  return _the_null_string;
+}
+
+ciInstance* ciEnv::the_min_jint_string() {
+  if (_the_min_jint_string == NULL) {
+    VM_ENTRY_MARK;
+    _the_min_jint_string = get_object(Universe::the_min_jint_string())->as_instance();
+  }
+  return _the_min_jint_string;
+}
+
 // ------------------------------------------------------------------
 // ciEnv::get_method_from_handle
 ciMethod* ciEnv::get_method_from_handle(jobject method) {
diff --git a/src/share/vm/ci/ciEnv.hpp b/src/share/vm/ci/ciEnv.hpp
index e855dbf9e..493600e02 100644
--- a/src/share/vm/ci/ciEnv.hpp
+++ b/src/share/vm/ci/ciEnv.hpp
@@ -82,6 +82,9 @@ private:
   static ciInstanceKlass* _Thread;
   static ciInstanceKlass* _OutOfMemoryError;
   static ciInstanceKlass* _String;
+  static ciInstanceKlass* _StringBuffer;
+  static ciInstanceKlass* _StringBuilder;
+  static ciInstanceKlass* _Integer;
 
   static ciSymbol*        _unloaded_cisymbol;
   static ciInstanceKlass* _unloaded_ciinstance_klass;
@@ -97,6 +100,9 @@ private:
   ciInstance* _ArrayStoreException_instance;
   ciInstance* _ClassCastException_instance;
 
+  ciInstance* _the_null_string;      // The Java string "null"
+  ciInstance* _the_min_jint_string; // The Java string "-2147483648"
+
   // Look up a klass by name from a particular class loader (the accessor's).
   // If require_local, result must be defined in that class loader, or NULL.
   // If !require_local, a result from remote class loader may be reported,
@@ -310,6 +316,15 @@ public:
   ciInstanceKlass* String_klass() {
     return _String;
   }
+  ciInstanceKlass* StringBuilder_klass() {
+    return _StringBuilder;
+  }
+  ciInstanceKlass* StringBuffer_klass() {
+    return _StringBuffer;
+  }
+  ciInstanceKlass* Integer_klass() {
+    return _Integer;
+  }
   ciInstance* NullPointerException_instance() {
     assert(_NullPointerException_instance != NULL, "initialization problem");
     return _NullPointerException_instance;
@@ -324,6 +339,9 @@ public:
   ciInstance* ArrayStoreException_instance();
   ciInstance* ClassCastException_instance();
 
+  ciInstance* the_null_string();
+  ciInstance* the_min_jint_string();
+
   static ciSymbol* unloaded_cisymbol() {
     return _unloaded_cisymbol;
   }
diff --git a/src/share/vm/ci/ciInstanceKlass.cpp b/src/share/vm/ci/ciInstanceKlass.cpp
index 1053727a9..ac14e71d5 100644
--- a/src/share/vm/ci/ciInstanceKlass.cpp
+++ b/src/share/vm/ci/ciInstanceKlass.cpp
@@ -341,6 +341,20 @@ ciField* ciInstanceKlass::get_field_by_offset(int field_offset, bool is_static)
 }
 
 // ------------------------------------------------------------------
+// ciInstanceKlass::get_field_by_name
+ciField* ciInstanceKlass::get_field_by_name(ciSymbol* name, ciSymbol* signature, bool is_static) {
+  VM_ENTRY_MARK;
+  instanceKlass* k = get_instanceKlass();
+  fieldDescriptor fd;
+  klassOop def = k->find_field(name->get_symbolOop(), signature->get_symbolOop(), is_static, &fd);
+  if (def == NULL) {
+    return NULL;
+  }
+  ciField* field = new (CURRENT_THREAD_ENV->arena()) ciField(&fd);
+  return field;
+}
+
+// ------------------------------------------------------------------
 // ciInstanceKlass::non_static_fields.
 
 class NonStaticFieldFiller: public FieldClosure {
diff --git a/src/share/vm/ci/ciInstanceKlass.hpp b/src/share/vm/ci/ciInstanceKlass.hpp
index a60020adc..007a2ab8d 100644
--- a/src/share/vm/ci/ciInstanceKlass.hpp
+++ b/src/share/vm/ci/ciInstanceKlass.hpp
@@ -148,6 +148,7 @@ public:
 
   ciInstanceKlass* get_canonical_holder(int offset);
   ciField* get_field_by_offset(int field_offset, bool is_static);
+  ciField* get_field_by_name(ciSymbol* name, ciSymbol* signature, bool is_static);
 
   GrowableArray<ciField*>* non_static_fields();
 
diff --git a/src/share/vm/ci/ciObjectFactory.cpp b/src/share/vm/ci/ciObjectFactory.cpp
index 6fb4edc4d..f05abb21f 100644
--- a/src/share/vm/ci/ciObjectFactory.cpp
+++ b/src/share/vm/ci/ciObjectFactory.cpp
@@ -168,6 +168,15 @@ void ciObjectFactory::init_shared_objects() {
   ciEnv::_String =
     get(SystemDictionary::string_klass())
       ->as_instance_klass();
+  ciEnv::_StringBuffer =
+    get(SystemDictionary::stringBuffer_klass())
+      ->as_instance_klass();
+  ciEnv::_StringBuilder =
+    get(SystemDictionary::StringBuilder_klass())
+      ->as_instance_klass();
+  ciEnv::_Integer =
+    get(SystemDictionary::int_klass())
+      ->as_instance_klass();
 
   for (int len = -1; len != _ci_objects->length(); ) {
     len = _ci_objects->length();
diff --git a/src/share/vm/classfile/classFileParser.cpp b/src/share/vm/classfile/classFileParser.cpp
index d0c6e67ed..18ee3fd90 100644
--- a/src/share/vm/classfile/classFileParser.cpp
+++ b/src/share/vm/classfile/classFileParser.cpp
@@ -2511,23 +2511,12 @@ void ClassFileParser::java_dyn_MethodHandle_fix_pre(constantPoolHandle cp,
       fac_ptr->nonstatic_byte_count -= 1;
       (*fields_ptr)->ushort_at_put(i + instanceKlass::signature_index_offset,
                                    word_sig_index);
-      if (wordSize == jintSize) {
-        fac_ptr->nonstatic_word_count += 1;
-      } else {
-        fac_ptr->nonstatic_double_count += 1;
-      }
+      fac_ptr->nonstatic_word_count += 1;
 
-      FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i+4);
+      FieldAllocationType atype = (FieldAllocationType) (*fields_ptr)->ushort_at(i + instanceKlass::low_offset);
       assert(atype == NONSTATIC_BYTE, "");
       FieldAllocationType new_atype = NONSTATIC_WORD;
-      if (wordSize > jintSize) {
-        if (Universe::field_type_should_be_aligned(T_LONG)) {
-          atype = NONSTATIC_ALIGNED_DOUBLE;
-        } else {
-          atype = NONSTATIC_DOUBLE;
-        }
-      }
-      (*fields_ptr)->ushort_at_put(i+4, new_atype);
+      (*fields_ptr)->ushort_at_put(i + instanceKlass::low_offset, new_atype);
 
       found_vmentry = true;
       break;
@@ -3085,7 +3074,7 @@ instanceKlassHandle ClassFileParser::parseClassFile(symbolHandle name,
     int len = fields->length();
     for (int i = 0; i < len; i += instanceKlass::next_offset) {
       int real_offset;
-      FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i+4);
+      FieldAllocationType atype = (FieldAllocationType) fields->ushort_at(i + instanceKlass::low_offset);
       switch (atype) {
         case STATIC_OOP:
           real_offset = next_static_oop_offset;
@@ -3173,8 +3162,8 @@ instanceKlassHandle ClassFileParser::parseClassFile(symbolHandle name,
         default:
           ShouldNotReachHere();
       }
-      fields->short_at_put(i+4, extract_low_short_from_int(real_offset) );
-      fields->short_at_put(i+5, extract_high_short_from_int(real_offset) );
+      fields->short_at_put(i + instanceKlass::low_offset,  extract_low_short_from_int(real_offset));
+      fields->short_at_put(i + instanceKlass::high_offset, extract_high_short_from_int(real_offset));
     }
 
     // Size of instances
diff --git a/src/share/vm/classfile/javaClasses.cpp b/src/share/vm/classfile/javaClasses.cpp
index cb71bbdd7..1ca9c41c9 100644
--- a/src/share/vm/classfile/javaClasses.cpp
+++ b/src/share/vm/classfile/javaClasses.cpp
@@ -1124,8 +1124,7 @@ class BacktraceBuilder: public StackObj {
     if (_dirty && _methods != NULL) {
       BarrierSet* bs = Universe::heap()->barrier_set();
       assert(bs->has_write_ref_array_opt(), "Barrier set must have ref array opt");
-      bs->write_ref_array(MemRegion((HeapWord*)_methods->base(),
-                                    _methods->array_size()));
+      bs->write_ref_array((HeapWord*)_methods->base(), _methods->length());
       _dirty = false;
     }
   }
diff --git a/src/share/vm/classfile/systemDictionary.hpp b/src/share/vm/classfile/systemDictionary.hpp
index 5ee4e1c7a..4d0bb8cfe 100644
--- a/src/share/vm/classfile/systemDictionary.hpp
+++ b/src/share/vm/classfile/systemDictionary.hpp
@@ -150,6 +150,7 @@ class SymbolPropertyTable;
   template(vector_klass,                 java_util_Vector,               Pre) \
   template(hashtable_klass,              java_util_Hashtable,            Pre) \
   template(stringBuffer_klass,           java_lang_StringBuffer,         Pre) \
+  template(StringBuilder_klass,          java_lang_StringBuilder,        Pre) \
                                                                               \
   /* It's NULL in non-1.4 JDKs. */                                            \
   template(stackTraceElement_klass,      java_lang_StackTraceElement,    Opt) \
diff --git a/src/share/vm/classfile/vmSymbols.cpp b/src/share/vm/classfile/vmSymbols.cpp
index c805af344..cc96bee95 100644
--- a/src/share/vm/classfile/vmSymbols.cpp
+++ b/src/share/vm/classfile/vmSymbols.cpp
@@ -303,6 +303,11 @@ inline bool match_F_R(jshort flags) {
   const int neg = JVM_ACC_STATIC | JVM_ACC_SYNCHRONIZED;
   return (flags & (req | neg)) == req;
 }
+inline bool match_F_Y(jshort flags) {
+  const int req = JVM_ACC_SYNCHRONIZED;
+  const int neg = JVM_ACC_STATIC;
+  return (flags & (req | neg)) == req;
+}
 inline bool match_F_RN(jshort flags) {
   const int req = JVM_ACC_NATIVE;
   const int neg = JVM_ACC_STATIC | JVM_ACC_SYNCHRONIZED;
@@ -361,6 +366,7 @@ const char* vmIntrinsics::short_name_as_C_string(vmIntrinsics::ID id, char* buf,
   const char* sname = vmSymbols::name_for(signature_for(id));
   const char* fname = "";
   switch (flags_for(id)) {
+  case F_Y:  fname = "synchronized ";  break;
   case F_RN: fname = "native ";        break;
   case F_SN: fname = "native static "; break;
   case F_S:  fname = "static ";        break;
diff --git a/src/share/vm/classfile/vmSymbols.hpp b/src/share/vm/classfile/vmSymbols.hpp
index fdc8786e5..7323bbb73 100644
--- a/src/share/vm/classfile/vmSymbols.hpp
+++ b/src/share/vm/classfile/vmSymbols.hpp
@@ -84,6 +84,7 @@
   template(java_lang_reflect_Field,                   "java/lang/reflect/Field")                  \
   template(java_lang_reflect_Array,                   "java/lang/reflect/Array")                  \
   template(java_lang_StringBuffer,                    "java/lang/StringBuffer")                   \
+  template(java_lang_StringBuilder,                   "java/lang/StringBuilder")                  \
   template(java_lang_CharSequence,                    "java/lang/CharSequence")                   \
   template(java_security_AccessControlContext,        "java/security/AccessControlContext")       \
   template(java_security_ProtectionDomain,            "java/security/ProtectionDomain")           \
@@ -335,6 +336,7 @@
   template(ptypes_name,                               "ptypes")                                   \
   template(form_name,                                 "form")                                     \
   template(erasedType_name,                           "erasedType")                               \
+  template(append_name,                               "append")                                   \
                                                                                                   \
   /* non-intrinsic name/signature pairs: */                                                       \
   template(register_method_name,                      "register")                                 \
@@ -416,6 +418,13 @@
   template(string_signature,                          "Ljava/lang/String;")                                       \
   template(reference_signature,                       "Ljava/lang/ref/Reference;")                                \
   template(concurrenthashmap_signature,               "Ljava/util/concurrent/ConcurrentHashMap;")                 \
+  template(String_StringBuilder_signature,            "(Ljava/lang/String;)Ljava/lang/StringBuilder;")            \
+  template(int_StringBuilder_signature,               "(I)Ljava/lang/StringBuilder;")                             \
+  template(char_StringBuilder_signature,              "(C)Ljava/lang/StringBuilder;")                             \
+  template(String_StringBuffer_signature,             "(Ljava/lang/String;)Ljava/lang/StringBuffer;")             \
+  template(int_StringBuffer_signature,                "(I)Ljava/lang/StringBuffer;")                              \
+  template(char_StringBuffer_signature,               "(C)Ljava/lang/StringBuffer;")                              \
+  template(int_String_signature,                      "(I)Ljava/lang/String;")                                    \
   /* signature symbols needed by intrinsics */                                                                    \
   VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, template, VM_ALIAS_IGNORE)            \
                                                                                                                   \
@@ -815,10 +824,34 @@
     /*the compiler does have special inlining code for these; bytecode inline is just fine */                           \
                                                                                                                         \
   do_intrinsic(_fillInStackTrace,         java_lang_Throwable, fillInStackTrace_name, void_throwable_signature,  F_RNY) \
-                                                                                                                        \
-  do_intrinsic(_Object_init,              java_lang_Object, object_initializer_name, void_method_signature,      F_R)   \
-  /*    (symbol object_initializer_name defined above) */                                                               \
-                                                                                                                        \
+                                                                                                                          \
+  do_intrinsic(_StringBuilder_void,   java_lang_StringBuilder, object_initializer_name, void_method_signature,     F_R)   \
+  do_intrinsic(_StringBuilder_int,    java_lang_StringBuilder, object_initializer_name, int_void_signature,        F_R)   \
+  do_intrinsic(_StringBuilder_String, java_lang_StringBuilder, object_initializer_name, string_void_signature,     F_R)   \
+                                                                                                                          \
+  do_intrinsic(_StringBuilder_append_char,   java_lang_StringBuilder, append_name, char_StringBuilder_signature,   F_R)   \
+  do_intrinsic(_StringBuilder_append_int,    java_lang_StringBuilder, append_name, int_StringBuilder_signature,    F_R)   \
+  do_intrinsic(_StringBuilder_append_String, java_lang_StringBuilder, append_name, String_StringBuilder_signature, F_R)   \
+                                                                                                                          \
+  do_intrinsic(_StringBuilder_toString, java_lang_StringBuilder, toString_name, void_string_signature,             F_R)   \
+                                                                                                                          \
+  do_intrinsic(_StringBuffer_void,   java_lang_StringBuffer, object_initializer_name, void_method_signature,       F_R)   \
+  do_intrinsic(_StringBuffer_int,    java_lang_StringBuffer, object_initializer_name, int_void_signature,          F_R)   \
+  do_intrinsic(_StringBuffer_String, java_lang_StringBuffer, object_initializer_name, string_void_signature,       F_R)   \
+                                                                                                                          \
+  do_intrinsic(_StringBuffer_append_char,   java_lang_StringBuffer, append_name, char_StringBuffer_signature,      F_Y)   \
+  do_intrinsic(_StringBuffer_append_int,    java_lang_StringBuffer, append_name, int_StringBuffer_signature,       F_Y)   \
+  do_intrinsic(_StringBuffer_append_String, java_lang_StringBuffer, append_name, String_StringBuffer_signature,    F_Y)   \
+                                                                                                                          \
+  do_intrinsic(_StringBuffer_toString,  java_lang_StringBuffer, toString_name, void_string_signature,              F_Y)   \
+                                                                                                                          \
+  do_intrinsic(_Integer_toString,      java_lang_Integer, toString_name, int_String_signature,                     F_S)   \
+                                                                                                                          \
+  do_intrinsic(_String_String, java_lang_String, object_initializer_name, string_void_signature,                   F_R)   \
+                                                                                                                          \
+  do_intrinsic(_Object_init,              java_lang_Object, object_initializer_name, void_method_signature,        F_R)   \
+  /*    (symbol object_initializer_name defined above) */                                                                 \
+                                                                                                                          \
   do_intrinsic(_invoke,                   java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \
   /*   (symbols invoke_name and invoke_signature defined above) */                                                      \
                                                                                                                         \
@@ -946,11 +979,12 @@ class vmIntrinsics: AllStatic {
   enum Flags {
     // AccessFlags syndromes relevant to intrinsics.
     F_none = 0,
-    F_R,                        // !static        !synchronized (R="regular")
-    F_S,                        //  static        !synchronized
-    F_RN,                       // !static native !synchronized
-    F_SN,                       //  static native !synchronized
-    F_RNY                       // !static native  synchronized
+    F_R,                        // !static ?native !synchronized (R="regular")
+    F_S,                        //  static ?native !synchronized
+    F_Y,                        // !static ?native  synchronized
+    F_RN,                       // !static  native !synchronized
+    F_SN,                       //  static  native !synchronized
+    F_RNY                       // !static  native  synchronized
   };
 
 public:
diff --git a/src/share/vm/code/nmethod.cpp b/src/share/vm/code/nmethod.cpp
index 7f7ca1175..5544666e9 100644
--- a/src/share/vm/code/nmethod.cpp
+++ b/src/share/vm/code/nmethod.cpp
@@ -414,9 +414,8 @@ int nmethod::total_size() const {
 }
 
 const char* nmethod::compile_kind() const {
-  if (method() == NULL)    return "unloaded";
-  if (is_native_method())  return "c2n";
   if (is_osr_method())     return "osr";
+  if (method() != NULL && is_native_method())  return "c2n";
   return NULL;
 }
 
@@ -1127,6 +1126,9 @@ void nmethod::make_unloaded(BoolObjectClosure* is_alive, oop cause) {
   }
   flags.state = unloaded;
 
+  // Log the unloading.
+  log_state_change();
+
   // The methodOop is gone at this point
   assert(_method == NULL, "Tautology");
 
@@ -1137,8 +1139,6 @@ void nmethod::make_unloaded(BoolObjectClosure* is_alive, oop cause) {
 
 void nmethod::invalidate_osr_method() {
   assert(_entry_bci != InvocationEntryBci, "wrong kind of nmethod");
-  if (_entry_bci != InvalidOSREntryBci)
-    inc_decompile_count();
   // Remove from list of active nmethods
   if (method() != NULL)
     instanceKlass::cast(method()->method_holder())->remove_osr_nmethod(this);
@@ -1146,59 +1146,63 @@ void nmethod::invalidate_osr_method() {
   _entry_bci = InvalidOSREntryBci;
 }
 
-void nmethod::log_state_change(int state) const {
+void nmethod::log_state_change() const {
   if (LogCompilation) {
     if (xtty != NULL) {
       ttyLocker ttyl;  // keep the following output all in one block
-      xtty->begin_elem("make_not_entrant %sthread='" UINTX_FORMAT "'",
-                       (state == zombie ? "zombie='1' " : ""),
-                       os::current_thread_id());
+      if (flags.state == unloaded) {
+        xtty->begin_elem("make_unloaded thread='" UINTX_FORMAT "'",
+                         os::current_thread_id());
+      } else {
+        xtty->begin_elem("make_not_entrant thread='" UINTX_FORMAT "'%s",
+                         os::current_thread_id(),
+                         (flags.state == zombie ? " zombie='1'" : ""));
+      }
       log_identity(xtty);
       xtty->stamp();
       xtty->end_elem();
     }
   }
-  if (PrintCompilation) {
-    print_on(tty, state == zombie ? "made zombie " : "made not entrant ");
+  if (PrintCompilation && flags.state != unloaded) {
+    print_on(tty, flags.state == zombie ? "made zombie " : "made not entrant ");
     tty->cr();
   }
 }
 
 // Common functionality for both make_not_entrant and make_zombie
-void nmethod::make_not_entrant_or_zombie(int state) {
+bool nmethod::make_not_entrant_or_zombie(int state) {
   assert(state == zombie || state == not_entrant, "must be zombie or not_entrant");
 
-  // Code for an on-stack-replacement nmethod is removed when a class gets unloaded.
-  // They never become zombie/non-entrant, so the nmethod sweeper will never remove
-  // them. Instead the entry_bci is set to InvalidOSREntryBci, so the osr nmethod
-  // will never be used anymore. That the nmethods only gets removed when class unloading
-  // happens, make life much simpler, since the nmethods are not just going to disappear
-  // out of the blue.
-  if (is_osr_method()) {
-    if (osr_entry_bci() != InvalidOSREntryBci) {
-      // only log this once
-      log_state_change(state);
-    }
-    invalidate_osr_method();
-    return;
-  }
-
-  // If the method is already zombie or set to the state we want, nothing to do
-  if (is_zombie() || (state == not_entrant && is_not_entrant())) {
-    return;
+  // If the method is already zombie there is nothing to do
+  if (is_zombie()) {
+    return false;
   }
 
-  log_state_change(state);
-
   // Make sure the nmethod is not flushed in case of a safepoint in code below.
   nmethodLocker nml(this);
 
   {
+    // invalidate osr nmethod before acquiring the patching lock since
+    // they both acquire leaf locks and we don't want a deadlock.
+    // This logic is equivalent to the logic below for patching the
+    // verified entry point of regular methods.
+    if (is_osr_method()) {
+      // this effectively makes the osr nmethod not entrant
+      invalidate_osr_method();
+    }
+
     // Enter critical section.  Does not block for safepoint.
     MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
+
+    if (flags.state == state) {
+      // another thread already performed this transition so nothing
+      // to do, but return false to indicate this.
+      return false;
+    }
+
     // The caller can be calling the method statically or through an inline
     // cache call.
-    if (!is_not_entrant()) {
+    if (!is_osr_method() && !is_not_entrant()) {
       NativeJump::patch_verified_entry(entry_point(), verified_entry_point(),
                   SharedRuntime::get_handle_wrong_method_stub());
       assert (NativeJump::instruction_size == nmethod::_zombie_instruction_size, "");
@@ -1217,6 +1221,10 @@ void nmethod::make_not_entrant_or_zombie(int state) {
 
     // Change state
     flags.state = state;
+
+    // Log the transition once
+    log_state_change();
+
   } // leave critical region under Patching_lock
 
   if (state == not_entrant) {
@@ -1240,7 +1248,6 @@ void nmethod::make_not_entrant_or_zombie(int state) {
   // It's a true state change, so mark the method as decompiled.
   inc_decompile_count();
 
-
   // zombie only - if a JVMTI agent has enabled the CompiledMethodUnload event
   // and it hasn't already been reported for this nmethod then report it now.
   // (the event may have been reported earilier if the GC marked it for unloading).
@@ -1268,7 +1275,7 @@ void nmethod::make_not_entrant_or_zombie(int state) {
 
   // Check whether method got unloaded at a safepoint before this,
   // if so we can skip the flushing steps below
-  if (method() == NULL) return;
+  if (method() == NULL) return true;
 
   // Remove nmethod from method.
   // We need to check if both the _code and _from_compiled_code_entry_point
@@ -1282,6 +1289,8 @@ void nmethod::make_not_entrant_or_zombie(int state) {
     HandleMark hm;
     method()->clear_code();
   }
+
+  return true;
 }
 
 
diff --git a/src/share/vm/code/nmethod.hpp b/src/share/vm/code/nmethod.hpp
index c7abdea89..48080b5b9 100644
--- a/src/share/vm/code/nmethod.hpp
+++ b/src/share/vm/code/nmethod.hpp
@@ -252,7 +252,9 @@ class nmethod : public CodeBlob {
   void* operator new(size_t size, int nmethod_size);
 
   const char* reloc_string_for(u_char* begin, u_char* end);
-  void make_not_entrant_or_zombie(int state);
+  // Returns true if this thread changed the state of the nmethod or
+  // false if another thread performed the transition.
+  bool make_not_entrant_or_zombie(int state);
   void inc_decompile_count();
 
   // used to check that writes to nmFlags are done consistently.
@@ -375,10 +377,12 @@ class nmethod : public CodeBlob {
   bool  is_zombie() const                         { return flags.state == zombie; }
   bool  is_unloaded() const                       { return flags.state == unloaded;   }
 
-  // Make the nmethod non entrant. The nmethod will continue to be alive.
-  // It is used when an uncommon trap happens.
-  void  make_not_entrant()                        { make_not_entrant_or_zombie(not_entrant); }
-  void  make_zombie()                             { make_not_entrant_or_zombie(zombie); }
+  // Make the nmethod non entrant. The nmethod will continue to be
+  // alive.  It is used when an uncommon trap happens.  Returns true
+  // if this thread changed the state of the nmethod or false if
+  // another thread performed the transition.
+  bool  make_not_entrant()                        { return make_not_entrant_or_zombie(not_entrant); }
+  bool  make_zombie()                             { return make_not_entrant_or_zombie(zombie); }
 
   // used by jvmti to track if the unload event has been reported
   bool  unload_reported()                         { return _unload_reported; }
@@ -563,7 +567,7 @@ class nmethod : public CodeBlob {
   // Logging
   void log_identity(xmlStream* log) const;
   void log_new_nmethod() const;
-  void log_state_change(int state) const;
+  void log_state_change() const;
 
   // Prints a comment for one native instruction (reloc info, pc desc)
   void print_code_comment_on(outputStream* st, int column, address begin, address end);
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
index c3d30c348..20bbd7aba 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -709,7 +709,8 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
 
   // Support for parallelizing survivor space rescan
   if (CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) {
-    size_t max_plab_samples = MaxNewSize/((SurvivorRatio+2)*MinTLABSize);
+    size_t max_plab_samples = cp->max_gen0_size()/
+                                ((SurvivorRatio+2)*MinTLABSize);
     _survivor_plab_array  = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads);
     _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, 2*max_plab_samples);
     _cursor               = NEW_C_HEAP_ARRAY(size_t, ParallelGCThreads);
diff --git a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
index 3000f010b..967605176 100644
--- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
+++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
@@ -351,9 +351,16 @@ void
 CollectionSetChooser::printSortedHeapRegions() {
   gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage",
                 _numMarkedRegions);
+
+  DEBUG_ONLY(int marked_count = 0;)
   for (int i = 0; i < _markedRegions.length(); i++) {
-    printHeapRegion(_markedRegions.at(i));
+    HeapRegion* r = _markedRegions.at(i);
+    if (r != NULL) {
+      printHeapRegion(r);
+      DEBUG_ONLY(marked_count++;)
+    }
   }
+  assert(marked_count == _numMarkedRegions, "must be");
   gclog_or_tty->print_cr("Done sorted heap region print");
 }
 
diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
index 34939de57..11c288073 100644
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
@@ -42,28 +42,49 @@ ConcurrentG1Refine::ConcurrentG1Refine() :
   _n_periods(0),
   _threads(NULL), _n_threads(0)
 {
-  if (G1ConcRefine) {
-    _n_threads = (int)thread_num();
-    if (_n_threads > 0) {
-      _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
-      int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
-      ConcurrentG1RefineThread *next = NULL;
-      for (int i = _n_threads - 1; i >= 0; i--) {
-        ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
-        assert(t != NULL, "Conc refine should have been created");
-        assert(t->cg1r() == this, "Conc refine thread should refer to this");
-        _threads[i] = t;
-        next = t;
-      }
-    }
+
+  // Ergomonically select initial concurrent refinement parameters
+  if (FLAG_IS_DEFAULT(G1ConcRefineGreenZone)) {
+    FLAG_SET_DEFAULT(G1ConcRefineGreenZone, MAX2<int>(ParallelGCThreads, 1));
+  }
+  set_green_zone(G1ConcRefineGreenZone);
+
+  if (FLAG_IS_DEFAULT(G1ConcRefineYellowZone)) {
+    FLAG_SET_DEFAULT(G1ConcRefineYellowZone, green_zone() * 3);
+  }
+  set_yellow_zone(MAX2<int>(G1ConcRefineYellowZone, green_zone()));
+
+  if (FLAG_IS_DEFAULT(G1ConcRefineRedZone)) {
+    FLAG_SET_DEFAULT(G1ConcRefineRedZone, yellow_zone() * 2);
+  }
+  set_red_zone(MAX2<int>(G1ConcRefineRedZone, yellow_zone()));
+  _n_worker_threads = thread_num();
+  // We need one extra thread to do the young gen rset size sampling.
+  _n_threads = _n_worker_threads + 1;
+  reset_threshold_step();
+
+  _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads);
+  int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids();
+  ConcurrentG1RefineThread *next = NULL;
+  for (int i = _n_threads - 1; i >= 0; i--) {
+    ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i);
+    assert(t != NULL, "Conc refine should have been created");
+    assert(t->cg1r() == this, "Conc refine thread should refer to this");
+    _threads[i] = t;
+    next = t;
   }
 }
 
-size_t ConcurrentG1Refine::thread_num() {
-  if (G1ConcRefine) {
-    return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads;
+void ConcurrentG1Refine::reset_threshold_step() {
+  if (FLAG_IS_DEFAULT(G1ConcRefineThresholdStep)) {
+    _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1);
+  } else {
+    _thread_threshold_step = G1ConcRefineThresholdStep;
   }
-  return 0;
+}
+
+int ConcurrentG1Refine::thread_num() {
+  return MAX2<int>((G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads, 1);
 }
 
 void ConcurrentG1Refine::init() {
@@ -123,6 +144,15 @@ void ConcurrentG1Refine::stop() {
   }
 }
 
+void ConcurrentG1Refine::reinitialize_threads() {
+  reset_threshold_step();
+  if (_threads != NULL) {
+    for (int i = 0; i < _n_threads; i++) {
+      _threads[i]->initialize();
+    }
+  }
+}
+
 ConcurrentG1Refine::~ConcurrentG1Refine() {
   if (G1ConcRSLogCacheSize > 0) {
     assert(_card_counts != NULL, "Logic");
@@ -384,4 +414,3 @@ void ConcurrentG1Refine::print_worker_threads_on(outputStream* st) const {
     st->cr();
   }
 }
-
diff --git a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
index 5cef3058c..7bcbecfbe 100644
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
@@ -29,6 +29,31 @@ class G1RemSet;
 class ConcurrentG1Refine: public CHeapObj {
   ConcurrentG1RefineThread** _threads;
   int _n_threads;
+  int _n_worker_threads;
+ /*
+  * The value of the update buffer queue length falls into one of 3 zones:
+  * green, yellow, red. If the value is in [0, green) nothing is
+  * done, the buffers are left unprocessed to enable the caching effect of the
+  * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement
+  * threads are gradually activated. In [yellow, red) all threads are
+  * running. If the length becomes red (max queue length) the mutators start
+  * processing the buffers.
+  *
+  * There are some interesting cases (with G1AdaptiveConcRefine turned off):
+  * 1) green = yellow = red = 0. In this case the mutator will process all
+  *    buffers. Except for those that are created by the deferred updates
+  *    machinery during a collection.
+  * 2) green = 0. Means no caching. Can be a good way to minimize the
+  *    amount of time spent updating rsets during a collection.
+  */
+  int _green_zone;
+  int _yellow_zone;
+  int _red_zone;
+
+  int _thread_threshold_step;
+
+  // Reset the threshold step value based of the current zone boundaries.
+  void reset_threshold_step();
 
   // The cache for card refinement.
   bool   _use_cache;
@@ -147,6 +172,8 @@ class ConcurrentG1Refine: public CHeapObj {
   void init(); // Accomplish some initialization that has to wait.
   void stop();
 
+  void reinitialize_threads();
+
   // Iterate over the conc refine threads
   void threads_do(ThreadClosure *tc);
 
@@ -178,7 +205,20 @@ class ConcurrentG1Refine: public CHeapObj {
 
   void clear_and_record_card_counts();
 
-  static size_t thread_num();
+  static int thread_num();
 
   void print_worker_threads_on(outputStream* st) const;
+
+  void set_green_zone(int x)  { _green_zone = x;  }
+  void set_yellow_zone(int x) { _yellow_zone = x; }
+  void set_red_zone(int x)    { _red_zone = x;    }
+
+  int green_zone() const      { return _green_zone;  }
+  int yellow_zone() const     { return _yellow_zone; }
+  int red_zone() const        { return _red_zone;    }
+
+  int total_thread_num() const  { return _n_threads;        }
+  int worker_thread_num() const { return _n_worker_threads; }
+
+  int thread_threshold_step() const { return _thread_threshold_step; }
 };
diff --git a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp
index aaf2544fe..b23c287a6 100644
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp
@@ -25,10 +25,6 @@
 #include "incls/_precompiled.incl"
 #include "incls/_concurrentG1RefineThread.cpp.incl"
 
-// ======= Concurrent Mark Thread ========
-
-// The CM thread is created when the G1 garbage collector is used
-
 ConcurrentG1RefineThread::
 ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next,
                          int worker_id_offset, int worker_id) :
@@ -37,19 +33,42 @@ ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *nex
   _worker_id(worker_id),
   _active(false),
   _next(next),
+  _monitor(NULL),
   _cg1r(cg1r),
-  _vtime_accum(0.0),
-  _interval_ms(5.0)
+  _vtime_accum(0.0)
 {
+
+  // Each thread has its own monitor. The i-th thread is responsible for signalling
+  // to thread i+1 if the number of buffers in the queue exceeds a threashold for this
+  // thread. Monitors are also used to wake up the threads during termination.
+  // The 0th worker in notified by mutator threads and has a special monitor.
+  // The last worker is used for young gen rset size sampling.
+  if (worker_id > 0) {
+    _monitor = new Monitor(Mutex::nonleaf, "Refinement monitor", true);
+  } else {
+    _monitor = DirtyCardQ_CBL_mon;
+  }
+  initialize();
   create_and_start();
 }
 
+void ConcurrentG1RefineThread::initialize() {
+  if (_worker_id < cg1r()->worker_thread_num()) {
+    // Current thread activation threshold
+    _threshold = MIN2<int>(cg1r()->thread_threshold_step() * (_worker_id + 1) + cg1r()->green_zone(),
+                           cg1r()->yellow_zone());
+    // A thread deactivates once the number of buffer reached a deactivation threshold
+    _deactivation_threshold = MAX2<int>(_threshold - cg1r()->thread_threshold_step(), cg1r()->green_zone());
+  } else {
+    set_active(true);
+  }
+}
+
 void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   G1CollectorPolicy* g1p = g1h->g1_policy();
   if (g1p->adaptive_young_list_length()) {
     int regions_visited = 0;
-
     g1h->young_list_rs_length_sampling_init();
     while (g1h->young_list_rs_length_sampling_more()) {
       g1h->young_list_rs_length_sampling_next();
@@ -70,99 +89,121 @@ void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
   }
 }
 
+void ConcurrentG1RefineThread::run_young_rs_sampling() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  _vtime_start = os::elapsedVTime();
+  while(!_should_terminate) {
+    _sts.join();
+    sample_young_list_rs_lengths();
+    _sts.leave();
+
+    if (os::supports_vtime()) {
+      _vtime_accum = (os::elapsedVTime() - _vtime_start);
+    } else {
+      _vtime_accum = 0.0;
+    }
+
+    MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
+    if (_should_terminate) {
+      break;
+    }
+    _monitor->wait(Mutex::_no_safepoint_check_flag, G1ConcRefineServiceInterval);
+  }
+}
+
+void ConcurrentG1RefineThread::wait_for_completed_buffers() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
+  while (!_should_terminate && !is_active()) {
+    _monitor->wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+bool ConcurrentG1RefineThread::is_active() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  return _worker_id > 0 ? _active : dcqs.process_completed_buffers();
+}
+
+void ConcurrentG1RefineThread::activate() {
+  MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
+  if (_worker_id > 0) {
+    if (G1TraceConcurrentRefinement) {
+      DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+      gclog_or_tty->print_cr("G1-Refine-activated worker %d, on threshold %d, current %d",
+                             _worker_id, _threshold, (int)dcqs.completed_buffers_num());
+    }
+    set_active(true);
+  } else {
+    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+    dcqs.set_process_completed(true);
+  }
+  _monitor->notify();
+}
+
+void ConcurrentG1RefineThread::deactivate() {
+  MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
+  if (_worker_id > 0) {
+    if (G1TraceConcurrentRefinement) {
+      DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+      gclog_or_tty->print_cr("G1-Refine-deactivated worker %d, off threshold %d, current %d",
+                             _worker_id, _deactivation_threshold, (int)dcqs.completed_buffers_num());
+    }
+    set_active(false);
+  } else {
+    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+    dcqs.set_process_completed(false);
+  }
+}
+
 void ConcurrentG1RefineThread::run() {
   initialize_in_thread();
-  _vtime_start = os::elapsedVTime();
   wait_for_universe_init();
 
+  if (_worker_id >= cg1r()->worker_thread_num()) {
+    run_young_rs_sampling();
+    terminate();
+  }
+
+  _vtime_start = os::elapsedVTime();
   while (!_should_terminate) {
     DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-    // Wait for completed log buffers to exist.
-    {
-      MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
-      while (((_worker_id == 0 && !dcqs.process_completed_buffers()) ||
-              (_worker_id > 0 && !is_active())) &&
-             !_should_terminate) {
-         DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
-      }
-    }
+
+    // Wait for work
+    wait_for_completed_buffers();
 
     if (_should_terminate) {
-      return;
+      break;
     }
 
-    // Now we take them off (this doesn't hold locks while it applies
-    // closures.)  (If we did a full collection, then we'll do a full
-    // traversal.
     _sts.join();
-    int n_logs = 0;
-    int lower_limit = 0;
-    double start_vtime_sec; // only used when G1SmoothConcRefine is on
-    int prev_buffer_num; // only used when G1SmoothConcRefine is on
-    // This thread activation threshold
-    int threshold = G1UpdateBufferQueueProcessingThreshold * _worker_id;
-    // Next thread activation threshold
-    int next_threshold = threshold + G1UpdateBufferQueueProcessingThreshold;
-    int deactivation_threshold = MAX2<int>(threshold - G1UpdateBufferQueueProcessingThreshold / 2, 0);
-
-    if (G1SmoothConcRefine) {
-      lower_limit = 0;
-      start_vtime_sec = os::elapsedVTime();
-      prev_buffer_num = (int) dcqs.completed_buffers_num();
-    } else {
-      lower_limit = G1UpdateBufferQueueProcessingThreshold / 4; // For now.
-    }
-    while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) {
-      double end_vtime_sec;
-      double elapsed_vtime_sec;
-      int elapsed_vtime_ms;
-      int curr_buffer_num = (int) dcqs.completed_buffers_num();
-
-      if (G1SmoothConcRefine) {
-        end_vtime_sec = os::elapsedVTime();
-        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
-        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
-
-        if (curr_buffer_num > prev_buffer_num ||
-            curr_buffer_num > next_threshold) {
-          decreaseInterval(elapsed_vtime_ms);
-        } else if (curr_buffer_num < prev_buffer_num) {
-          increaseInterval(elapsed_vtime_ms);
-        }
+
+    do {
+      int curr_buffer_num = (int)dcqs.completed_buffers_num();
+      // If the number of the buffers falls down into the yellow zone,
+      // that means that the transition period after the evacuation pause has ended.
+      if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) {
+        dcqs.set_completed_queue_padding(0);
       }
-      if (_worker_id == 0) {
-        sample_young_list_rs_lengths();
-      } else if (curr_buffer_num < deactivation_threshold) {
+
+      if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) {
         // If the number of the buffer has fallen below our threshold
         // we should deactivate. The predecessor will reactivate this
         // thread should the number of the buffers cross the threshold again.
-        MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
         deactivate();
-        if (G1TraceConcurrentRefinement) {
-          gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id);
-        }
         break;
       }
 
       // Check if we need to activate the next thread.
-      if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) {
-        MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+      if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) {
         _next->activate();
-        DirtyCardQ_CBL_mon->notify_all();
-        if (G1TraceConcurrentRefinement) {
-          gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id);
-        }
       }
+    } while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, cg1r()->green_zone()));
 
-      if (G1SmoothConcRefine) {
-        prev_buffer_num = curr_buffer_num;
-        _sts.leave();
-        os::sleep(Thread::current(), (jlong) _interval_ms, false);
-        _sts.join();
-        start_vtime_sec = os::elapsedVTime();
-      }
-      n_logs++;
+    // We can exit the loop above while being active if there was a yield request.
+    if (is_active()) {
+      deactivate();
     }
+
     _sts.leave();
 
     if (os::supports_vtime()) {
@@ -172,7 +213,6 @@ void ConcurrentG1RefineThread::run() {
     }
   }
   assert(_should_terminate, "just checking");
-
   terminate();
 }
 
@@ -191,8 +231,8 @@ void ConcurrentG1RefineThread::stop() {
   }
 
   {
-    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
-    DirtyCardQ_CBL_mon->notify_all();
+    MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
+    _monitor->notify();
   }
 
   {
diff --git a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp
index 167fc176e..b2eb5d327 100644
--- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp
+++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp
@@ -40,42 +40,36 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread {
   // when the number of the rset update buffer crosses a certain threshold. A successor
   // would self-deactivate when the number of the buffers falls below the threshold.
   bool _active;
-  ConcurrentG1RefineThread *       _next;
- public:
-  virtual void run();
-
-  bool is_active()  { return _active;  }
-  void activate()   { _active = true;  }
-  void deactivate() { _active = false; }
+  ConcurrentG1RefineThread* _next;
+  Monitor* _monitor;
+  ConcurrentG1Refine* _cg1r;
 
- private:
-  ConcurrentG1Refine*              _cg1r;
+  int _thread_threshold_step;
+  // This thread activation threshold
+  int _threshold;
+  // This thread deactivation threshold
+  int _deactivation_threshold;
 
-  double                           _interval_ms;
+  void sample_young_list_rs_lengths();
+  void run_young_rs_sampling();
+  void wait_for_completed_buffers();
 
-  void decreaseInterval(int processing_time_ms) {
-    double min_interval_ms = (double) processing_time_ms;
-    _interval_ms = 0.8 * _interval_ms;
-    if (_interval_ms < min_interval_ms)
-      _interval_ms = min_interval_ms;
-  }
-  void increaseInterval(int processing_time_ms) {
-    double max_interval_ms = 9.0 * (double) processing_time_ms;
-    _interval_ms = 1.1 * _interval_ms;
-    if (max_interval_ms > 0 && _interval_ms > max_interval_ms)
-      _interval_ms = max_interval_ms;
-  }
-
-  void sleepBeforeNextCycle();
+  void set_active(bool x) { _active = x; }
+  bool is_active();
+  void activate();
+  void deactivate();
 
   // For use by G1CollectedHeap, which is a friend.
   static SuspendibleThreadSet* sts() { return &_sts; }
 
- public:
+public:
+  virtual void run();
   // Constructor
   ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next,
                            int worker_id_offset, int worker_id);
 
+  void initialize();
+
   // Printing
   void print() const;
   void print_on(outputStream* st) const;
@@ -83,13 +77,10 @@ class ConcurrentG1RefineThread: public ConcurrentGCThread {
   // Total virtual time so far.
   double vtime_accum() { return _vtime_accum; }
 
-  ConcurrentG1Refine* cg1r()                     { return _cg1r;     }
-
-  void            sample_young_list_rs_lengths();
+  ConcurrentG1Refine* cg1r() { return _cg1r;     }
 
   // Yield for GC
-  void            yield();
-
+  void yield();
   // shutdown
   void stop();
 };
diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/src/share/vm/gc_implementation/g1/concurrentMark.cpp
index af93ae7d2..c77af4141 100644
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -760,7 +760,6 @@ void ConcurrentMark::checkpointRootsInitialPost() {
   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
   satb_mq_set.set_active_all_threads(true);
 
   // update_g1_committed() will be called at the end of an evac pause
diff --git a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
index 7372a4a78..81d8716b1 100644
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
@@ -61,8 +61,8 @@ bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl,
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
 
-DirtyCardQueueSet::DirtyCardQueueSet() :
-  PtrQueueSet(true /*notify_when_complete*/),
+DirtyCardQueueSet::DirtyCardQueueSet(bool notify_when_complete) :
+  PtrQueueSet(notify_when_complete),
   _closure(NULL),
   _shared_dirty_card_queue(this, true /*perm*/),
   _free_ids(NULL),
@@ -77,12 +77,12 @@ size_t DirtyCardQueueSet::num_par_ids() {
 }
 
 void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                                   int process_completed_threshold,
                                    int max_completed_queue,
                                    Mutex* lock, PtrQueueSet* fl_owner) {
-  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner);
+  PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold,
+                          max_completed_queue, fl_owner);
   set_buffer_size(G1UpdateBufferSize);
-  set_process_completed_threshold(G1UpdateBufferQueueProcessingThreshold);
-
   _shared_dirty_card_queue.set_lock(lock);
   _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
 }
@@ -154,9 +154,10 @@ bool DirtyCardQueueSet::mut_process_buffer(void** buf) {
   return b;
 }
 
-DirtyCardQueueSet::CompletedBufferNode*
-DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) {
-  CompletedBufferNode* nd = NULL;
+
+BufferNode*
+DirtyCardQueueSet::get_completed_buffer(int stop_at) {
+  BufferNode* nd = NULL;
   MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 
   if ((int)_n_completed_buffers <= stop_at) {
@@ -166,53 +167,31 @@ DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) {
 
   if (_completed_buffers_head != NULL) {
     nd = _completed_buffers_head;
-    _completed_buffers_head = nd->next;
+    _completed_buffers_head = nd->next();
     if (_completed_buffers_head == NULL)
       _completed_buffers_tail = NULL;
     _n_completed_buffers--;
+    assert(_n_completed_buffers >= 0, "Invariant");
   }
   debug_only(assert_completed_buffer_list_len_correct_locked());
   return nd;
 }
 
-// We only do this in contexts where there is no concurrent enqueueing.
-DirtyCardQueueSet::CompletedBufferNode*
-DirtyCardQueueSet::get_completed_buffer_CAS() {
-  CompletedBufferNode* nd = _completed_buffers_head;
-
-  while (nd != NULL) {
-    CompletedBufferNode* next = nd->next;
-    CompletedBufferNode* result =
-      (CompletedBufferNode*)Atomic::cmpxchg_ptr(next,
-                                                &_completed_buffers_head,
-                                                nd);
-    if (result == nd) {
-      return result;
-    } else {
-      nd = _completed_buffers_head;
-    }
-  }
-  assert(_completed_buffers_head == NULL, "Loop post");
-  _completed_buffers_tail = NULL;
-  return NULL;
-}
-
 bool DirtyCardQueueSet::
 apply_closure_to_completed_buffer_helper(int worker_i,
-                                         CompletedBufferNode* nd) {
+                                         BufferNode* nd) {
   if (nd != NULL) {
+    void **buf = BufferNode::make_buffer_from_node(nd);
+    size_t index = nd->index();
     bool b =
-      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf,
-                                              nd->index, _sz,
+      DirtyCardQueue::apply_closure_to_buffer(_closure, buf,
+                                              index, _sz,
                                               true, worker_i);
-    void** buf = nd->buf;
-    size_t index = nd->index;
-    delete nd;
     if (b) {
       deallocate_buffer(buf);
       return true;  // In normal case, go on to next buffer.
     } else {
-      enqueue_complete_buffer(buf, index, true);
+      enqueue_complete_buffer(buf, index);
       return false;
     }
   } else {
@@ -222,40 +201,36 @@ apply_closure_to_completed_buffer_helper(int worker_i,
 
 bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
                                                           int stop_at,
-                                                          bool with_CAS)
+                                                          bool during_pause)
 {
-  CompletedBufferNode* nd = NULL;
-  if (with_CAS) {
-    guarantee(stop_at == 0, "Precondition");
-    nd = get_completed_buffer_CAS();
-  } else {
-    nd = get_completed_buffer_lock(stop_at);
-  }
+  assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause");
+  BufferNode* nd = get_completed_buffer(stop_at);
   bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
   if (res) Atomic::inc(&_processed_buffers_rs_thread);
   return res;
 }
 
 void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
-  CompletedBufferNode* nd = _completed_buffers_head;
+  BufferNode* nd = _completed_buffers_head;
   while (nd != NULL) {
     bool b =
-      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz,
-                                              false);
+      DirtyCardQueue::apply_closure_to_buffer(_closure,
+                                              BufferNode::make_buffer_from_node(nd),
+                                              0, _sz, false);
     guarantee(b, "Should not stop early.");
-    nd = nd->next;
+    nd = nd->next();
   }
 }
 
 void DirtyCardQueueSet::abandon_logs() {
   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
-  CompletedBufferNode* buffers_to_delete = NULL;
+  BufferNode* buffers_to_delete = NULL;
   {
     MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
     while (_completed_buffers_head != NULL) {
-      CompletedBufferNode* nd = _completed_buffers_head;
-      _completed_buffers_head = nd->next;
-      nd->next = buffers_to_delete;
+      BufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next();
+      nd->set_next(buffers_to_delete);
       buffers_to_delete = nd;
     }
     _n_completed_buffers = 0;
@@ -263,10 +238,9 @@ void DirtyCardQueueSet::abandon_logs() {
     debug_only(assert_completed_buffer_list_len_correct_locked());
   }
   while (buffers_to_delete != NULL) {
-    CompletedBufferNode* nd = buffers_to_delete;
-    buffers_to_delete = nd->next;
-    deallocate_buffer(nd->buf);
-    delete nd;
+    BufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next();
+    deallocate_buffer(BufferNode::make_buffer_from_node(nd));
   }
   // Since abandon is done only at safepoints, we can safely manipulate
   // these queues.
diff --git a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
index 7a6f3f27b..da2c83784 100644
--- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
+++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 2001-2009 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -84,11 +84,12 @@ class DirtyCardQueueSet: public PtrQueueSet {
   jint _processed_buffers_rs_thread;
 
 public:
-  DirtyCardQueueSet();
+  DirtyCardQueueSet(bool notify_when_complete = true);
 
   void initialize(Monitor* cbl_mon, Mutex* fl_lock,
-                  int max_completed_queue = 0,
-                  Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL);
+                  int process_completed_threshold,
+                  int max_completed_queue,
+                  Mutex* lock, PtrQueueSet* fl_owner = NULL);
 
   // The number of parallel ids that can be claimed to allow collector or
   // mutator threads to do card-processing work.
@@ -120,12 +121,13 @@ public:
   // is returned to the completed buffer set, and this call returns false.
   bool apply_closure_to_completed_buffer(int worker_i = 0,
                                          int stop_at = 0,
-                                         bool with_CAS = false);
+                                         bool during_pause = false);
+
   bool apply_closure_to_completed_buffer_helper(int worker_i,
-                                                CompletedBufferNode* nd);
+                                                BufferNode* nd);
+
+  BufferNode* get_completed_buffer(int stop_at);
 
-  CompletedBufferNode* get_completed_buffer_CAS();
-  CompletedBufferNode* get_completed_buffer_lock(int stop_at);
   // Applies the current closure to all completed buffers,
   // non-consumptively.
   void apply_closure_to_all_completed_buffers();
diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index 550b028ea..c60f17680 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -928,6 +928,8 @@ void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
     TraceTime t(full ? "Full GC (System.gc())" : "Full GC", PrintGC, true, gclog_or_tty);
 
+    TraceMemoryManagerStats tms(true /* fullGC */);
+
     double start = os::elapsedTime();
     g1_policy()->record_full_collection_start();
 
@@ -1001,6 +1003,8 @@ void G1CollectedHeap::do_collection(bool full, bool clear_all_soft_refs,
 
     COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 
+    MemoryService::track_memory_usage();
+
     if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
       HandleMark hm;  // Discard invalid handles created during verification
       gclog_or_tty->print(" VerifyAfterGC:");
@@ -1371,6 +1375,7 @@ void G1CollectedHeap::shrink(size_t shrink_bytes) {
 G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
   SharedHeap(policy_),
   _g1_policy(policy_),
+  _dirty_card_queue_set(false),
   _ref_processor(NULL),
   _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
   _bot_shared(NULL),
@@ -1456,8 +1461,6 @@ jint G1CollectedHeap::initialize() {
   Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap");
   Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap");
 
-  // We allocate this in any case, but only do no work if the command line
-  // param is off.
   _cg1r = new ConcurrentG1Refine();
 
   // Reserve the maximum.
@@ -1590,18 +1593,20 @@ jint G1CollectedHeap::initialize() {
 
   JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon,
                                                SATB_Q_FL_lock,
-                                               0,
+                                               G1SATBProcessCompletedThreshold,
                                                Shared_SATB_Q_lock);
 
   JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
                                                 DirtyCardQ_FL_lock,
-                                                G1UpdateBufferQueueMaxLength,
+                                                concurrent_g1_refine()->yellow_zone(),
+                                                concurrent_g1_refine()->red_zone(),
                                                 Shared_DirtyCardQ_lock);
 
   if (G1DeferredRSUpdate) {
     dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon,
                                       DirtyCardQ_FL_lock,
-                                      0,
+                                      -1, // never trigger processing
+                                      -1, // no limit on length
                                       Shared_DirtyCardQ_lock,
                                       &JavaThread::dirty_card_queue_set());
   }
@@ -1732,13 +1737,6 @@ size_t G1CollectedHeap::unsafe_max_alloc() {
   return car->free();
 }
 
-void G1CollectedHeap::collect(GCCause::Cause cause) {
-  // The caller doesn't have the Heap_lock
-  assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock");
-  MutexLocker ml(Heap_lock);
-  collect_locked(cause);
-}
-
 void G1CollectedHeap::collect_as_vm_thread(GCCause::Cause cause) {
   assert(Thread::current()->is_VM_thread(), "Precondition#1");
   assert(Heap_lock->is_locked(), "Precondition#2");
@@ -1755,17 +1753,31 @@ void G1CollectedHeap::collect_as_vm_thread(GCCause::Cause cause) {
   }
 }
 
+void G1CollectedHeap::collect(GCCause::Cause cause) {
+  // The caller doesn't have the Heap_lock
+  assert(!Heap_lock->owned_by_self(), "this thread should not own the Heap_lock");
 
-void G1CollectedHeap::collect_locked(GCCause::Cause cause) {
-  // Don't want to do a GC until cleanup is completed.
-  wait_for_cleanup_complete();
-
-  // Read the GC count while holding the Heap_lock
-  int gc_count_before = SharedHeap::heap()->total_collections();
+  int gc_count_before;
   {
-    MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
-    VM_G1CollectFull op(gc_count_before, cause);
-    VMThread::execute(&op);
+    MutexLocker ml(Heap_lock);
+    // Read the GC count while holding the Heap_lock
+    gc_count_before = SharedHeap::heap()->total_collections();
+
+    // Don't want to do a GC until cleanup is completed.
+    wait_for_cleanup_complete();
+  } // We give up heap lock; VMThread::execute gets it back below
+  switch (cause) {
+    case GCCause::_scavenge_alot: {
+      // Do an incremental pause, which might sometimes be abandoned.
+      VM_G1IncCollectionPause op(gc_count_before, cause);
+      VMThread::execute(&op);
+      break;
+    }
+    default: {
+      // In all other cases, we currently do a full gc.
+      VM_G1CollectFull op(gc_count_before, cause);
+      VMThread::execute(&op);
+    }
   }
 }
 
@@ -2119,7 +2131,7 @@ size_t G1CollectedHeap::large_typearray_limit() {
 }
 
 size_t G1CollectedHeap::max_capacity() const {
-  return _g1_committed.byte_size();
+  return g1_reserved_obj_bytes();
 }
 
 jlong G1CollectedHeap::millis_since_last_gc() {
@@ -2638,6 +2650,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint() {
   }
 
   {
+    ResourceMark rm;
+
     char verbose_str[128];
     sprintf(verbose_str, "GC pause ");
     if (g1_policy()->in_young_gc_mode()) {
@@ -2649,8 +2663,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint() {
     if (g1_policy()->should_initiate_conc_mark())
       strcat(verbose_str, " (initial-mark)");
 
-    GCCauseSetter x(this, GCCause::_g1_inc_collection_pause);
-
     // if PrintGCDetails is on, we'll print long statistics information
     // in the collector policy code, so let's not print this as the output
     // is messy if we do.
@@ -2658,7 +2670,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint() {
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
     TraceTime t(verbose_str, PrintGC && !PrintGCDetails, true, gclog_or_tty);
 
-    ResourceMark rm;
+    TraceMemoryManagerStats tms(false /* fullGC */);
+
     assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
     assert(Thread::current() == VMThread::vm_thread(), "should be in vm thread");
     guarantee(!is_gc_active(), "collection is not reentrant");
@@ -2802,6 +2815,22 @@ G1CollectedHeap::do_collection_pause_at_safepoint() {
           _young_list->reset_auxilary_lists();
         }
       } else {
+        if (_in_cset_fast_test != NULL) {
+          assert(_in_cset_fast_test_base != NULL, "Since _in_cset_fast_test isn't");
+          FREE_C_HEAP_ARRAY(bool, _in_cset_fast_test_base);
+          //  this is more for peace of mind; we're nulling them here and
+          // we're expecting them to be null at the beginning of the next GC
+          _in_cset_fast_test = NULL;
+          _in_cset_fast_test_base = NULL;
+        }
+        // This looks confusing, because the DPT should really be empty
+        // at this point -- since we have not done any collection work,
+        // there should not be any derived pointers in the table to update;
+        // however, there is some additional state in the DPT which is
+        // reset at the end of the (null) "gc" here via the following call.
+        // A better approach might be to split off that state resetting work
+        // into a separate method that asserts that the DPT is empty and call
+        // that here. That is deferred for now.
         COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
       }
 
@@ -2838,6 +2867,8 @@ G1CollectedHeap::do_collection_pause_at_safepoint() {
 
       assert(regions_accounted_for(), "Region leakage.");
 
+      MemoryService::track_memory_usage();
+
       if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) {
         HandleMark hm;  // Discard invalid handles created during verification
         gclog_or_tty->print(" VerifyAfterGC:");
@@ -4209,10 +4240,11 @@ void G1CollectedHeap::evacuate_collection_set() {
     RedirtyLoggedCardTableEntryFastClosure redirty;
     dirty_card_queue_set().set_closure(&redirty);
     dirty_card_queue_set().apply_closure_to_all_completed_buffers();
-    JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set());
+
+    DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
+    dcq.merge_bufferlists(&dirty_card_queue_set());
     assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed");
   }
-
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 }
 
diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
index 5e0782826..3c071beed 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@@ -692,7 +692,7 @@ public:
 
   // Reserved (g1 only; super method includes perm), capacity and the used
   // portion in bytes.
-  size_t g1_reserved_obj_bytes() { return _g1_reserved.byte_size(); }
+  size_t g1_reserved_obj_bytes() const { return _g1_reserved.byte_size(); }
   virtual size_t capacity() const;
   virtual size_t used() const;
   // This should be called when we're not holding the heap lock. The
diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
index 20e8fba5d..487fb6d95 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@@ -1516,8 +1516,30 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
       (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0;
     update_recent_gc_times(end_time_sec, elapsed_ms);
     _recent_avg_pause_time_ratio = _recent_gc_times_ms->sum()/interval_ms;
-    // using 1.01 to account for floating point inaccuracies
-    assert(recent_avg_pause_time_ratio() < 1.01, "All GC?");
+    if (recent_avg_pause_time_ratio() < 0.0 ||
+        (recent_avg_pause_time_ratio() - 1.0 > 0.0)) {
+#ifndef PRODUCT
+      // Dump info to allow post-facto debugging
+      gclog_or_tty->print_cr("recent_avg_pause_time_ratio() out of bounds");
+      gclog_or_tty->print_cr("-------------------------------------------");
+      gclog_or_tty->print_cr("Recent GC Times (ms):");
+      _recent_gc_times_ms->dump();
+      gclog_or_tty->print_cr("(End Time=%3.3f) Recent GC End Times (s):", end_time_sec);
+      _recent_prev_end_times_for_all_gcs_sec->dump();
+      gclog_or_tty->print_cr("GC = %3.3f, Interval = %3.3f, Ratio = %3.3f",
+                             _recent_gc_times_ms->sum(), interval_ms, recent_avg_pause_time_ratio());
+      // In debug mode, terminate the JVM if the user wants to debug at this point.
+      assert(!G1FailOnFPError, "Debugging data for CR 6898948 has been dumped above");
+#endif  // !PRODUCT
+      // Clip ratio between 0.0 and 1.0, and continue. This will be fixed in
+      // CR 6902692 by redoing the manner in which the ratio is incrementally computed.
+      if (_recent_avg_pause_time_ratio < 0.0) {
+        _recent_avg_pause_time_ratio = 0.0;
+      } else {
+        assert(_recent_avg_pause_time_ratio - 1.0 > 0.0, "Ctl-point invariant");
+        _recent_avg_pause_time_ratio = 1.0;
+      }
+    }
   }
 
   if (G1PolicyVerbose > 1) {
@@ -1892,6 +1914,10 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
   calculate_young_list_min_length();
   calculate_young_list_target_config();
 
+  // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
+  double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSUpdatePauseFractionPercent / 100.0;
+  adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms);
+
   // </NEW PREDICTION>
 
   _target_pause_time_ms = -1.0;
@@ -1899,6 +1925,47 @@ void G1CollectorPolicy::record_collection_pause_end(bool abandoned) {
 
 // <NEW PREDICTION>
 
+void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time,
+                                                     double update_rs_processed_buffers,
+                                                     double goal_ms) {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  ConcurrentG1Refine *cg1r = G1CollectedHeap::heap()->concurrent_g1_refine();
+
+  if (G1AdaptiveConcRefine) {
+    const int k_gy = 3, k_gr = 6;
+    const double inc_k = 1.1, dec_k = 0.9;
+
+    int g = cg1r->green_zone();
+    if (update_rs_time > goal_ms) {
+      g = (int)(g * dec_k);  // Can become 0, that's OK. That would mean a mutator-only processing.
+    } else {
+      if (update_rs_time < goal_ms && update_rs_processed_buffers > g) {
+        g = (int)MAX2(g * inc_k, g + 1.0);
+      }
+    }
+    // Change the refinement threads params
+    cg1r->set_green_zone(g);
+    cg1r->set_yellow_zone(g * k_gy);
+    cg1r->set_red_zone(g * k_gr);
+    cg1r->reinitialize_threads();
+
+    int processing_threshold_delta = MAX2((int)(cg1r->green_zone() * sigma()), 1);
+    int processing_threshold = MIN2(cg1r->green_zone() + processing_threshold_delta,
+                                    cg1r->yellow_zone());
+    // Change the barrier params
+    dcqs.set_process_completed_threshold(processing_threshold);
+    dcqs.set_max_completed_queue(cg1r->red_zone());
+  }
+
+  int curr_queue_size = dcqs.completed_buffers_num();
+  if (curr_queue_size >= cg1r->yellow_zone()) {
+    dcqs.set_completed_queue_padding(curr_queue_size);
+  } else {
+    dcqs.set_completed_queue_padding(0);
+  }
+  dcqs.notify_if_necessary();
+}
+
 double
 G1CollectorPolicy::
 predict_young_collection_elapsed_time_ms(size_t adjustment) {
@@ -2825,8 +2892,15 @@ choose_collection_set() {
   double non_young_start_time_sec;
   start_recording_regions();
 
-  guarantee(_target_pause_time_ms > -1.0,
+  guarantee(_target_pause_time_ms > -1.0
+            NOT_PRODUCT(|| Universe::heap()->gc_cause() == GCCause::_scavenge_alot),
             "_target_pause_time_ms should have been set!");
+#ifndef PRODUCT
+  if (_target_pause_time_ms <= -1.0) {
+    assert(ScavengeALot && Universe::heap()->gc_cause() == GCCause::_scavenge_alot, "Error");
+    _target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0;
+  }
+#endif
   assert(_collection_set == NULL, "Precondition");
 
   double base_time_ms = predict_base_elapsed_time_ms(_pending_cards);
@@ -2972,7 +3046,3 @@ record_collection_pause_end(bool abandoned) {
   G1CollectorPolicy::record_collection_pause_end(abandoned);
   assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
 }
-
-// Local Variables: ***
-// c-indentation-style: gnu ***
-// End: ***
diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
index 0dff91a0b..2b9eb83f0 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
@@ -316,6 +316,10 @@ private:
   bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group);
 #endif // PRODUCT
 
+  void adjust_concurrent_refinement(double update_rs_time,
+                                    double update_rs_processed_buffers,
+                                    double goal_ms);
+
 protected:
   double _pause_time_target_ms;
   double _recorded_young_cset_choice_time_ms;
diff --git a/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp b/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp
index b0d0df45d..492adaef6 100644
--- a/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp
+++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp
@@ -86,12 +86,22 @@ void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) {
     //   increase the array size (:-)
     //   remove the oldest entry (this might allow more GC time for
     //     the time slice than what's allowed)
-    //   concolidate the two entries with the minimum gap between them
-    //     (this mighte allow less GC time than what's allowed)
-    guarantee(0, "array full, currently we can't recover");
+    //   consolidate the two entries with the minimum gap between them
+    //     (this might allow less GC time than what's allowed)
+    guarantee(NOT_PRODUCT(ScavengeALot ||) G1ForgetfulMMUTracker,
+              "array full, currently we can't recover unless +G1ForgetfulMMUTracker");
+    // In the case where ScavengeALot is true, such overflow is not
+    // uncommon; in such cases, we can, without much loss of precision
+    // or performance (we are GC'ing most of the time anyway!),
+    // simply overwrite the oldest entry in the tracker: this
+    // is also the behaviour when G1ForgetfulMMUTracker is enabled.
+    _head_index = trim_index(_head_index + 1);
+    assert(_head_index == _tail_index, "Because we have a full circular buffer");
+    _tail_index = trim_index(_tail_index + 1);
+  } else {
+    _head_index = trim_index(_head_index + 1);
+    ++_no_entries;
   }
-  _head_index = trim_index(_head_index + 1);
-  ++_no_entries;
   _array[_head_index] = G1MMUTrackerQueueElem(start, end);
 }
 
diff --git a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp
index 0eff2c386..1030454a7 100644
--- a/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp
+++ b/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp
@@ -99,7 +99,10 @@ private:
   // The array is of fixed size and I don't think we'll need more than
   // two or three entries with the current behaviour of G1 pauses.
   // If the array is full, an easy fix is to look for the pauses with
-  // the shortest gap between them and concolidate them.
+  // the shortest gap between them and consolidate them.
+  // For now, we have taken the expedient alternative of forgetting
+  // the oldest entry in the event that +G1ForgetfulMMUTracker, thus
+  // potentially violating MMU specs for some time thereafter.
 
   G1MMUTrackerQueueElem _array[QueueLength];
   int                   _head_index;
diff --git a/src/share/vm/gc_implementation/g1/g1_globals.hpp b/src/share/vm/gc_implementation/g1/g1_globals.hpp
index c941c8755..85f3841c1 100644
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp
@@ -85,7 +85,7 @@
   diagnostic(bool, G1SummarizeZFStats, false,                               \
           "Summarize zero-filling info")                                    \
                                                                             \
-  develop(bool, G1TraceConcurrentRefinement, false,                         \
+  diagnostic(bool, G1TraceConcurrentRefinement, false,                      \
           "Trace G1 concurrent refinement")                                 \
                                                                             \
   product(intx, G1MarkStackSize, 2 * 1024 * 1024,                           \
@@ -94,19 +94,6 @@
   product(intx, G1MarkRegionStackSize, 1024 * 1024,                         \
           "Size of the region stack for concurrent marking.")               \
                                                                             \
-  develop(bool, G1ConcRefine, true,                                         \
-          "If true, run concurrent rem set refinement for G1")              \
-                                                                            \
-  develop(intx, G1ConcRefineTargTraversals, 4,                              \
-          "Number of concurrent refinement we try to achieve")              \
-                                                                            \
-  develop(intx, G1ConcRefineInitialDelta, 4,                                \
-          "Number of heap regions of alloc ahead of starting collection "   \
-          "pause to start concurrent refinement (initially)")               \
-                                                                            \
-  develop(bool, G1SmoothConcRefine, true,                                   \
-          "Attempts to smooth out the overhead of concurrent refinement")   \
-                                                                            \
   develop(bool, G1ConcZeroFill, true,                                       \
           "If true, run concurrent zero-filling thread")                    \
                                                                             \
@@ -178,13 +165,38 @@
   product(intx, G1UpdateBufferSize, 256,                                    \
           "Size of an update buffer")                                       \
                                                                             \
-  product(intx, G1UpdateBufferQueueProcessingThreshold, 5,                  \
+  product(intx, G1ConcRefineYellowZone, 0,                                  \
           "Number of enqueued update buffers that will "                    \
-          "trigger concurrent processing")                                  \
+          "trigger concurrent processing. Will be selected ergonomically "  \
+          "by default.")                                                    \
                                                                             \
-  product(intx, G1UpdateBufferQueueMaxLength, 30,                           \
+  product(intx, G1ConcRefineRedZone, 0,                                     \
           "Maximum number of enqueued update buffers before mutator "       \
-          "threads start processing new ones instead of enqueueing them")   \
+          "threads start processing new ones instead of enqueueing them. "  \
+          "Will be selected ergonomically by default. Zero will disable "   \
+          "concurrent processing.")                                         \
+                                                                            \
+  product(intx, G1ConcRefineGreenZone, 0,                                   \
+          "The number of update buffers that are left in the queue by the " \
+          "concurrent processing threads. Will be selected ergonomically "  \
+          "by default.")                                                    \
+                                                                            \
+  product(intx, G1ConcRefineServiceInterval, 300,                           \
+          "The last concurrent refinement thread wakes up every "           \
+          "specified number of milliseconds to do miscellaneous work.")     \
+                                                                            \
+  product(intx, G1ConcRefineThresholdStep, 0,                               \
+          "Each time the rset update queue increases by this amount "       \
+          "activate the next refinement thread if available. "              \
+          "Will be selected ergonomically by default.")                     \
+                                                                            \
+  product(intx, G1RSUpdatePauseFractionPercent, 10,                         \
+          "A target percentage of time that is allowed to be spend on "     \
+          "process RS update buffers during the collection pause.")         \
+                                                                            \
+  product(bool, G1AdaptiveConcRefine, true,                                 \
+          "Select green, yellow and red zones adaptively to meet the "      \
+          "the pause requirements.")                                        \
                                                                             \
   develop(intx, G1ConcRSLogCacheSize, 10,                                   \
           "Log base 2 of the length of conc RS hot-card cache.")            \
@@ -242,6 +254,10 @@
   product(bool, G1UseSurvivorSpaces, true,                                  \
           "When true, use survivor space.")                                 \
                                                                             \
+  develop(bool, G1FailOnFPError, false,                                     \
+          "When set, G1 will fail when it encounters an FP 'error', "       \
+          "so as to allow debugging")                                       \
+                                                                            \
   develop(bool, G1FixedTenuringThreshold, false,                            \
           "When set, G1 will not adjust the tenuring threshold")            \
                                                                             \
@@ -252,6 +268,9 @@
           "If non-0 is the size of the G1 survivor space, "                 \
           "otherwise SurvivorRatio is used to determine the size")          \
                                                                             \
+  product(bool, G1ForgetfulMMUTracker, false,                               \
+          "If the MMU tracker's memory is full, forget the oldest entry")   \
+                                                                            \
   product(uintx, G1HeapRegionSize, 0,                                       \
           "Size of the G1 regions.")                                        \
                                                                             \
diff --git a/src/share/vm/gc_implementation/g1/ptrQueue.cpp b/src/share/vm/gc_implementation/g1/ptrQueue.cpp
index 060743dd3..50a1967bb 100644
--- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp
@@ -64,8 +64,8 @@ void PtrQueue::enqueue_known_active(void* ptr) {
   while (_index == 0) {
     handle_zero_index();
   }
-  assert(_index > 0, "postcondition");
 
+  assert(_index > 0, "postcondition");
   _index -= oopSize;
   _buf[byte_index_to_index((int)_index)] = ptr;
   assert(0 <= _index && _index <= _sz, "Invariant.");
@@ -99,94 +99,110 @@ void** PtrQueueSet::allocate_buffer() {
   assert(_sz > 0, "Didn't set a buffer size.");
   MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
   if (_fl_owner->_buf_free_list != NULL) {
-    void** res = _fl_owner->_buf_free_list;
-    _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0];
+    void** res = BufferNode::make_buffer_from_node(_fl_owner->_buf_free_list);
+    _fl_owner->_buf_free_list = _fl_owner->_buf_free_list->next();
     _fl_owner->_buf_free_list_sz--;
-    // Just override the next pointer with NULL, just in case we scan this part
-    // of the buffer.
-    res[0] = NULL;
     return res;
   } else {
-    return NEW_C_HEAP_ARRAY(void*, _sz);
+    // Allocate space for the BufferNode in front of the buffer.
+    char *b =  NEW_C_HEAP_ARRAY(char, _sz + BufferNode::aligned_size());
+    return BufferNode::make_buffer_from_block(b);
   }
 }
 
 void PtrQueueSet::deallocate_buffer(void** buf) {
   assert(_sz > 0, "Didn't set a buffer size.");
   MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
-  buf[0] = (void*)_fl_owner->_buf_free_list;
-  _fl_owner->_buf_free_list = buf;
+  BufferNode *node = BufferNode::make_node_from_buffer(buf);
+  node->set_next(_fl_owner->_buf_free_list);
+  _fl_owner->_buf_free_list = node;
   _fl_owner->_buf_free_list_sz++;
 }
 
 void PtrQueueSet::reduce_free_list() {
+  assert(_fl_owner == this, "Free list reduction is allowed only for the owner");
   // For now we'll adopt the strategy of deleting half.
   MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
   size_t n = _buf_free_list_sz / 2;
   while (n > 0) {
     assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong.");
-    void** head = _buf_free_list;
-    _buf_free_list = (void**)_buf_free_list[0];
-    FREE_C_HEAP_ARRAY(void*,head);
+    void* b = BufferNode::make_block_from_node(_buf_free_list);
+    _buf_free_list = _buf_free_list->next();
+    FREE_C_HEAP_ARRAY(char, b);
+    _buf_free_list_sz --;
     n--;
   }
 }
 
-void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) {
-  // I use explicit locking here because there's a bailout in the middle.
-  _cbl_mon->lock_without_safepoint_check();
-
-  Thread* thread = Thread::current();
-  assert( ignore_max_completed ||
-          thread->is_Java_thread() ||
-          SafepointSynchronize::is_at_safepoint(),
-          "invariant" );
-  ignore_max_completed = ignore_max_completed || !thread->is_Java_thread();
-
-  if (!ignore_max_completed && _max_completed_queue > 0 &&
-      _n_completed_buffers >= (size_t) _max_completed_queue) {
-    _cbl_mon->unlock();
-    bool b = mut_process_buffer(buf);
-    if (b) {
-      deallocate_buffer(buf);
-      return;
+void PtrQueue::handle_zero_index() {
+  assert(0 == _index, "Precondition.");
+  // This thread records the full buffer and allocates a new one (while
+  // holding the lock if there is one).
+  if (_buf != NULL) {
+    if (_lock) {
+      locking_enqueue_completed_buffer(_buf);
+    } else {
+      if (qset()->process_or_enqueue_complete_buffer(_buf)) {
+        // Recycle the buffer. No allocation.
+        _sz = qset()->buffer_size();
+        _index = _sz;
+        return;
+      }
     }
+  }
+  // Reallocate the buffer
+  _buf = qset()->allocate_buffer();
+  _sz = qset()->buffer_size();
+  _index = _sz;
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+}
 
-    // Otherwise, go ahead and enqueue the buffer.  Must reaquire the lock.
-    _cbl_mon->lock_without_safepoint_check();
+bool PtrQueueSet::process_or_enqueue_complete_buffer(void** buf) {
+  if (Thread::current()->is_Java_thread()) {
+    // We don't lock. It is fine to be epsilon-precise here.
+    if (_max_completed_queue == 0 || _max_completed_queue > 0 &&
+        _n_completed_buffers >= _max_completed_queue + _completed_queue_padding) {
+      bool b = mut_process_buffer(buf);
+      if (b) {
+        // True here means that the buffer hasn't been deallocated and the caller may reuse it.
+        return true;
+      }
+    }
   }
+  // The buffer will be enqueued. The caller will have to get a new one.
+  enqueue_complete_buffer(buf);
+  return false;
+}
 
-  // Here we still hold the _cbl_mon.
-  CompletedBufferNode* cbn = new CompletedBufferNode;
-  cbn->buf = buf;
-  cbn->next = NULL;
-  cbn->index = index;
+void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index) {
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  BufferNode* cbn = BufferNode::new_from_buffer(buf);
+  cbn->set_index(index);
   if (_completed_buffers_tail == NULL) {
     assert(_completed_buffers_head == NULL, "Well-formedness");
     _completed_buffers_head = cbn;
     _completed_buffers_tail = cbn;
   } else {
-    _completed_buffers_tail->next = cbn;
+    _completed_buffers_tail->set_next(cbn);
     _completed_buffers_tail = cbn;
   }
   _n_completed_buffers++;
 
-  if (!_process_completed &&
+  if (!_process_completed && _process_completed_threshold >= 0 &&
       _n_completed_buffers >= _process_completed_threshold) {
     _process_completed = true;
     if (_notify_when_complete)
-      _cbl_mon->notify_all();
+      _cbl_mon->notify();
   }
   debug_only(assert_completed_buffer_list_len_correct_locked());
-  _cbl_mon->unlock();
 }
 
 int PtrQueueSet::completed_buffers_list_length() {
   int n = 0;
-  CompletedBufferNode* cbn = _completed_buffers_head;
+  BufferNode* cbn = _completed_buffers_head;
   while (cbn != NULL) {
     n++;
-    cbn = cbn->next;
+    cbn = cbn->next();
   }
   return n;
 }
@@ -197,7 +213,7 @@ void PtrQueueSet::assert_completed_buffer_list_len_correct() {
 }
 
 void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() {
-  guarantee((size_t)completed_buffers_list_length() ==  _n_completed_buffers,
+  guarantee(completed_buffers_list_length() ==  _n_completed_buffers,
             "Completed buffer length is wrong.");
 }
 
@@ -206,12 +222,8 @@ void PtrQueueSet::set_buffer_size(size_t sz) {
   _sz = sz * oopSize;
 }
 
-void PtrQueueSet::set_process_completed_threshold(size_t sz) {
-  _process_completed_threshold = sz;
-}
-
-// Merge lists of buffers. Notify waiting threads if the length of the list
-// exceeds threshold. The source queue is emptied as a result. The queues
+// Merge lists of buffers. Notify the processing threads.
+// The source queue is emptied as a result. The queues
 // must share the monitor.
 void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) {
   assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
@@ -223,7 +235,7 @@ void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) {
   } else {
     assert(_completed_buffers_head != NULL, "Well formedness");
     if (src->_completed_buffers_head != NULL) {
-      _completed_buffers_tail->next = src->_completed_buffers_head;
+      _completed_buffers_tail->set_next(src->_completed_buffers_head);
       _completed_buffers_tail = src->_completed_buffers_tail;
     }
   }
@@ -236,31 +248,13 @@ void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) {
   assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
          _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
          "Sanity");
+}
 
-  if (!_process_completed &&
-      _n_completed_buffers >= _process_completed_threshold) {
+void PtrQueueSet::notify_if_necessary() {
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  if (_n_completed_buffers >= _process_completed_threshold || _max_completed_queue == 0) {
     _process_completed = true;
     if (_notify_when_complete)
-      _cbl_mon->notify_all();
-  }
-}
-
-// Merge free lists of the two queues. The free list of the source
-// queue is emptied as a result. The queues must share the same
-// mutex that guards free lists.
-void PtrQueueSet::merge_freelists(PtrQueueSet* src) {
-  assert(_fl_lock == src->_fl_lock, "Should share the same lock");
-  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
-  if (_buf_free_list != NULL) {
-    void **p = _buf_free_list;
-    while (*p != NULL) {
-      p = (void**)*p;
-    }
-    *p = src->_buf_free_list;
-  } else {
-    _buf_free_list = src->_buf_free_list;
+      _cbl_mon->notify();
   }
-  _buf_free_list_sz += src->_buf_free_list_sz;
-  src->_buf_free_list = NULL;
-  src->_buf_free_list_sz = 0;
 }
diff --git a/src/share/vm/gc_implementation/g1/ptrQueue.hpp b/src/share/vm/gc_implementation/g1/ptrQueue.hpp
index 663dcba4f..ccf5b207c 100644
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp
@@ -27,8 +27,10 @@
 // the addresses of modified old-generation objects.  This type supports
 // this operation.
 
-class PtrQueueSet;
+// The definition of placement operator new(size_t, void*) in the <new>.
+#include <new>
 
+class PtrQueueSet;
 class PtrQueue VALUE_OBJ_CLASS_SPEC {
 
 protected:
@@ -77,7 +79,7 @@ public:
     else enqueue_known_active(ptr);
   }
 
-  inline void handle_zero_index();
+  void handle_zero_index();
   void locking_enqueue_completed_buffer(void** buf);
 
   void enqueue_known_active(void* ptr);
@@ -126,34 +128,65 @@ public:
 
 };
 
+class BufferNode {
+  size_t _index;
+  BufferNode* _next;
+public:
+  BufferNode() : _index(0), _next(NULL) { }
+  BufferNode* next() const     { return _next;  }
+  void set_next(BufferNode* n) { _next = n;     }
+  size_t index() const         { return _index; }
+  void set_index(size_t i)     { _index = i;    }
+
+  // Align the size of the structure to the size of the pointer
+  static size_t aligned_size() {
+    static const size_t alignment = round_to(sizeof(BufferNode), sizeof(void*));
+    return alignment;
+  }
+
+  // BufferNode is allocated before the buffer.
+  // The chunk of memory that holds both of them is a block.
+
+  // Produce a new BufferNode given a buffer.
+  static BufferNode* new_from_buffer(void** buf) {
+    return new (make_block_from_buffer(buf)) BufferNode;
+  }
+
+  // The following are the required conversion routines:
+  static BufferNode* make_node_from_buffer(void** buf) {
+    return (BufferNode*)make_block_from_buffer(buf);
+  }
+  static void** make_buffer_from_node(BufferNode *node) {
+    return make_buffer_from_block(node);
+  }
+  static void* make_block_from_node(BufferNode *node) {
+    return (void*)node;
+  }
+  static void** make_buffer_from_block(void* p) {
+    return (void**)((char*)p + aligned_size());
+  }
+  static void* make_block_from_buffer(void** p) {
+    return (void*)((char*)p - aligned_size());
+  }
+};
+
 // A PtrQueueSet represents resources common to a set of pointer queues.
 // In particular, the individual queues allocate buffers from this shared
 // set, and return completed buffers to the set.
 // All these variables are are protected by the TLOQ_CBL_mon. XXX ???
 class PtrQueueSet VALUE_OBJ_CLASS_SPEC {
-
 protected:
-
-  class CompletedBufferNode: public CHeapObj {
-  public:
-    void** buf;
-    size_t index;
-    CompletedBufferNode* next;
-    CompletedBufferNode() : buf(NULL),
-      index(0), next(NULL){ }
-  };
-
   Monitor* _cbl_mon;  // Protects the fields below.
-  CompletedBufferNode* _completed_buffers_head;
-  CompletedBufferNode* _completed_buffers_tail;
-  size_t _n_completed_buffers;
-  size_t _process_completed_threshold;
+  BufferNode* _completed_buffers_head;
+  BufferNode* _completed_buffers_tail;
+  int _n_completed_buffers;
+  int _process_completed_threshold;
   volatile bool _process_completed;
 
   // This (and the interpretation of the first element as a "next"
   // pointer) are protected by the TLOQ_FL_lock.
   Mutex* _fl_lock;
-  void** _buf_free_list;
+  BufferNode* _buf_free_list;
   size_t _buf_free_list_sz;
   // Queue set can share a freelist. The _fl_owner variable
   // specifies the owner. It is set to "this" by default.
@@ -170,6 +203,7 @@ protected:
   // Maximum number of elements allowed on completed queue: after that,
   // enqueuer does the work itself.  Zero indicates no maximum.
   int _max_completed_queue;
+  int _completed_queue_padding;
 
   int completed_buffers_list_length();
   void assert_completed_buffer_list_len_correct_locked();
@@ -191,9 +225,12 @@ public:
   // Because of init-order concerns, we can't pass these as constructor
   // arguments.
   void initialize(Monitor* cbl_mon, Mutex* fl_lock,
-                  int max_completed_queue = 0,
+                  int process_completed_threshold,
+                  int max_completed_queue,
                   PtrQueueSet *fl_owner = NULL) {
     _max_completed_queue = max_completed_queue;
+    _process_completed_threshold = process_completed_threshold;
+    _completed_queue_padding = 0;
     assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?");
     _cbl_mon = cbl_mon;
     _fl_lock = fl_lock;
@@ -208,14 +245,17 @@ public:
   void deallocate_buffer(void** buf);
 
   // Declares that "buf" is a complete buffer.
-  void enqueue_complete_buffer(void** buf, size_t index = 0,
-                               bool ignore_max_completed = false);
+  void enqueue_complete_buffer(void** buf, size_t index = 0);
+
+  // To be invoked by the mutator.
+  bool process_or_enqueue_complete_buffer(void** buf);
 
   bool completed_buffers_exist_dirty() {
     return _n_completed_buffers > 0;
   }
 
   bool process_completed_buffers() { return _process_completed; }
+  void set_process_completed(bool x) { _process_completed = x; }
 
   bool active() { return _all_active; }
 
@@ -226,15 +266,24 @@ public:
   // Get the buffer size.
   size_t buffer_size() { return _sz; }
 
-  // Set the number of completed buffers that triggers log processing.
-  void set_process_completed_threshold(size_t sz);
+  // Get/Set the number of completed buffers that triggers log processing.
+  void set_process_completed_threshold(int sz) { _process_completed_threshold = sz; }
+  int process_completed_threshold() const { return _process_completed_threshold; }
 
   // Must only be called at a safe point.  Indicates that the buffer free
   // list size may be reduced, if that is deemed desirable.
   void reduce_free_list();
 
-  size_t completed_buffers_num() { return _n_completed_buffers; }
+  int completed_buffers_num() { return _n_completed_buffers; }
 
   void merge_bufferlists(PtrQueueSet* src);
-  void merge_freelists(PtrQueueSet* src);
+
+  void set_max_completed_queue(int m) { _max_completed_queue = m; }
+  int max_completed_queue() { return _max_completed_queue; }
+
+  void set_completed_queue_padding(int padding) { _completed_queue_padding = padding; }
+  int completed_queue_padding() { return _completed_queue_padding; }
+
+  // Notify the consumer if the number of buffers crossed the threshold
+  void notify_if_necessary();
 };
diff --git a/src/share/vm/gc_implementation/g1/satbQueue.cpp b/src/share/vm/gc_implementation/g1/satbQueue.cpp
index 3cf402ce7..8efdc3b9c 100644
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp
@@ -67,9 +67,9 @@ SATBMarkQueueSet::SATBMarkQueueSet() :
 {}
 
 void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
-                                  int max_completed_queue,
+                                  int process_completed_threshold,
                                   Mutex* lock) {
-  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+  PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, -1);
   _shared_satb_queue.set_lock(lock);
   if (ParallelGCThreads > 0) {
     _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads);
@@ -122,12 +122,12 @@ void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
 
 bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
                                                               int worker) {
-  CompletedBufferNode* nd = NULL;
+  BufferNode* nd = NULL;
   {
     MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
     if (_completed_buffers_head != NULL) {
       nd = _completed_buffers_head;
-      _completed_buffers_head = nd->next;
+      _completed_buffers_head = nd->next();
       if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL;
       _n_completed_buffers--;
       if (_n_completed_buffers == 0) _process_completed = false;
@@ -135,9 +135,9 @@ bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
   }
   ObjectClosure* cl = (par ? _par_closures[worker] : _closure);
   if (nd != NULL) {
-    ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz);
-    deallocate_buffer(nd->buf);
-    delete nd;
+    void **buf = BufferNode::make_buffer_from_node(nd);
+    ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
+    deallocate_buffer(buf);
     return true;
   } else {
     return false;
@@ -145,13 +145,13 @@ bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
 }
 
 void SATBMarkQueueSet::abandon_partial_marking() {
-  CompletedBufferNode* buffers_to_delete = NULL;
+  BufferNode* buffers_to_delete = NULL;
   {
     MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
     while (_completed_buffers_head != NULL) {
-      CompletedBufferNode* nd = _completed_buffers_head;
-      _completed_buffers_head = nd->next;
-      nd->next = buffers_to_delete;
+      BufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next();
+      nd->set_next(buffers_to_delete);
       buffers_to_delete = nd;
     }
     _completed_buffers_tail = NULL;
@@ -159,10 +159,9 @@ void SATBMarkQueueSet::abandon_partial_marking() {
     DEBUG_ONLY(assert_completed_buffer_list_len_correct_locked());
   }
   while (buffers_to_delete != NULL) {
-    CompletedBufferNode* nd = buffers_to_delete;
-    buffers_to_delete = nd->next;
-    deallocate_buffer(nd->buf);
-    delete nd;
+    BufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next();
+    deallocate_buffer(BufferNode::make_buffer_from_node(nd));
   }
   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
   // So we can safely manipulate these queues.
diff --git a/src/share/vm/gc_implementation/g1/satbQueue.hpp b/src/share/vm/gc_implementation/g1/satbQueue.hpp
index ed1181dd7..76218a636 100644
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp
@@ -60,8 +60,8 @@ public:
   SATBMarkQueueSet();
 
   void initialize(Monitor* cbl_mon, Mutex* fl_lock,
-                  int max_completed_queue = 0,
-                  Mutex* lock = NULL);
+                  int process_completed_threshold,
+                  Mutex* lock);
 
   static void handle_zero_index_for_thread(JavaThread* t);
 
diff --git a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp
index e59dbe483..2137efa4e 100644
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp
@@ -42,7 +42,7 @@ void VM_G1CollectFull::doit() {
 void VM_G1IncCollectionPause::doit() {
   JvmtiGCForAllocationMarker jgcm;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  GCCauseSetter x(g1h, GCCause::_g1_inc_collection_pause);
+  GCCauseSetter x(g1h, _gc_cause);
   g1h->do_collection_pause_at_safepoint();
 }
 
diff --git a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
index 6cf0605ec..95dda3844 100644
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
@@ -68,8 +68,9 @@ class VM_G1CollectForAllocation: public VM_GC_Operation {
 
 class VM_G1IncCollectionPause: public VM_GC_Operation {
  public:
-  VM_G1IncCollectionPause(int gc_count_before) :
-    VM_GC_Operation(gc_count_before) {}
+  VM_G1IncCollectionPause(int gc_count_before,
+                          GCCause::Cause gc_cause = GCCause::_g1_inc_collection_pause) :
+    VM_GC_Operation(gc_count_before) { _gc_cause = gc_cause; }
   virtual VMOp_Type type() const { return VMOp_G1IncCollectionPause; }
   virtual void doit();
   virtual const char* name() const {
diff --git a/src/share/vm/gc_implementation/includeDB_gc_g1 b/src/share/vm/gc_implementation/includeDB_gc_g1
index 63bfbce76..60531af90 100644
--- a/src/share/vm/gc_implementation/includeDB_gc_g1
+++ b/src/share/vm/gc_implementation/includeDB_gc_g1
@@ -109,7 +109,6 @@ dirtyCardQueue.cpp                      atomic.hpp
 dirtyCardQueue.cpp                      dirtyCardQueue.hpp
 dirtyCardQueue.cpp			heapRegionRemSet.hpp
 dirtyCardQueue.cpp                      mutexLocker.hpp
-dirtyCardQueue.cpp                      ptrQueue.inline.hpp
 dirtyCardQueue.cpp                      safepoint.hpp
 dirtyCardQueue.cpp                      thread.hpp
 dirtyCardQueue.cpp                      thread_<os_family>.inline.hpp
@@ -222,6 +221,15 @@ g1MarkSweep.hpp                         oop.hpp
 g1MarkSweep.hpp                         timer.hpp
 g1MarkSweep.hpp                         universe.hpp
 
+g1MemoryPool.cpp                        heapRegion.hpp
+g1MemoryPool.cpp                        g1CollectedHeap.inline.hpp
+g1MemoryPool.cpp                        g1CollectedHeap.hpp
+g1MemoryPool.cpp                        g1CollectorPolicy.hpp
+g1MemoryPool.cpp                        g1MemoryPool.hpp
+
+g1MemoryPool.hpp                        memoryUsage.hpp
+g1MemoryPool.hpp                        memoryPool.hpp
+
 g1OopClosures.inline.hpp		concurrentMark.hpp
 g1OopClosures.inline.hpp		g1OopClosures.hpp
 g1OopClosures.inline.hpp		g1CollectedHeap.hpp
@@ -303,12 +311,13 @@ heapRegionSeq.inline.hpp                heapRegionSeq.hpp
 
 klass.hpp				g1OopClosures.hpp
 
+memoryService.cpp                       g1MemoryPool.hpp
+
 ptrQueue.cpp                            allocation.hpp
 ptrQueue.cpp                            allocation.inline.hpp
 ptrQueue.cpp                            mutex.hpp
 ptrQueue.cpp                            mutexLocker.hpp
 ptrQueue.cpp                            ptrQueue.hpp
-ptrQueue.cpp                            ptrQueue.inline.hpp
 ptrQueue.cpp                            thread_<os_family>.inline.hpp
 
 ptrQueue.hpp                            allocation.hpp
@@ -318,7 +327,6 @@ ptrQueue.inline.hpp                     ptrQueue.hpp
 
 satbQueue.cpp                           allocation.inline.hpp
 satbQueue.cpp                           mutexLocker.hpp
-satbQueue.cpp                           ptrQueue.inline.hpp
 satbQueue.cpp                           satbQueue.hpp
 satbQueue.cpp                           sharedHeap.hpp
 satbQueue.cpp                           thread.hpp
diff --git a/src/share/vm/includeDB_compiler2 b/src/share/vm/includeDB_compiler2
index 6ba7bfaf8..abe70d1f7 100644
--- a/src/share/vm/includeDB_compiler2
+++ b/src/share/vm/includeDB_compiler2
@@ -149,6 +149,7 @@ c2compiler.cpp                          runtime.hpp
 c2compiler.hpp                          abstractCompiler.hpp
 
 callGenerator.cpp                       addnode.hpp
+callGenerator.cpp                       bcEscapeAnalyzer.hpp
 callGenerator.cpp                       callGenerator.hpp
 callGenerator.cpp                       callnode.hpp
 callGenerator.cpp                       cfgnode.hpp
@@ -321,6 +322,7 @@ compile.cpp                             phaseX.hpp
 compile.cpp                             rootnode.hpp
 compile.cpp                             runtime.hpp
 compile.cpp                             signature.hpp
+compile.cpp                             stringopts.hpp
 compile.cpp                             stubRoutines.hpp
 compile.cpp                             systemDictionary.hpp
 compile.cpp                             timer.hpp
@@ -476,12 +478,16 @@ graphKit.cpp                            rootnode.hpp
 graphKit.cpp                            runtime.hpp
 graphKit.cpp                            sharedRuntime.hpp
 
+graphKit.hpp                            addnode.hpp
 graphKit.hpp                            callnode.hpp
 graphKit.hpp                            cfgnode.hpp
 graphKit.hpp                            ciEnv.hpp
+graphKit.hpp                            divnode.hpp
 graphKit.hpp                            compile.hpp
 graphKit.hpp                            deoptimization.hpp
 graphKit.hpp                            phaseX.hpp
+graphKit.hpp                            mulnode.hpp
+graphKit.hpp                            subnode.hpp
 graphKit.hpp                            type.hpp
 
 idealKit.cpp                            addnode.hpp
@@ -490,7 +496,10 @@ idealKit.cpp                            cfgnode.hpp
 idealKit.cpp                            idealKit.hpp
 idealKit.cpp				runtime.hpp
 
+idealKit.hpp                            addnode.hpp
+idealKit.hpp                            cfgnode.hpp
 idealKit.hpp                            connode.hpp
+idealKit.hpp                            divnode.hpp
 idealKit.hpp                            mulnode.hpp
 idealKit.hpp                            phaseX.hpp
 idealKit.hpp                            subnode.hpp
@@ -641,6 +650,7 @@ macro.cpp                               addnode.hpp
 macro.cpp                               callnode.hpp
 macro.cpp                               cfgnode.hpp
 macro.cpp                               compile.hpp
+macro.cpp                              compileLog.hpp
 macro.cpp                               connode.hpp
 macro.cpp                               locknode.hpp
 macro.cpp                               loopnode.hpp
@@ -993,6 +1003,21 @@ split_if.cpp                            callnode.hpp
 split_if.cpp                            connode.hpp
 split_if.cpp                            loopnode.hpp
 
+stringopts.hpp                          phaseX.hpp
+stringopts.hpp                          node.hpp
+
+stringopts.cpp                          addnode.hpp
+stringopts.cpp                          callnode.hpp
+stringopts.cpp                          callGenerator.hpp
+stringopts.cpp                          compileLog.hpp
+stringopts.cpp                          divnode.hpp
+stringopts.cpp                          idealKit.hpp
+stringopts.cpp                          graphKit.hpp
+stringopts.cpp                          rootnode.hpp
+stringopts.cpp                          runtime.hpp
+stringopts.cpp                          subnode.hpp
+stringopts.cpp                          stringopts.hpp
+
 stubGenerator_<arch_model>.cpp          runtime.hpp
 
 stubRoutines.cpp                        runtime.hpp
diff --git a/src/share/vm/includeDB_core b/src/share/vm/includeDB_core
index 1a5f3ee30..6b0cf03f1 100644
--- a/src/share/vm/includeDB_core
+++ b/src/share/vm/includeDB_core
@@ -289,7 +289,7 @@ attachListener.hpp                      allocation.hpp
 attachListener.hpp                      debug.hpp
 attachListener.hpp                      ostream.hpp
 
-barrierSet.cpp				barrierSet.hpp
+barrierSet.cpp				barrierSet.inline.hpp
 barrierSet.cpp			        collectedHeap.hpp
 barrierSet.cpp				universe.hpp
 
@@ -570,6 +570,7 @@ ciEnv.hpp                               debugInfoRec.hpp
 ciEnv.hpp                               dependencies.hpp
 ciEnv.hpp                               exceptionHandlerTable.hpp
 ciEnv.hpp                               oopMap.hpp
+ciEnv.hpp                               systemDictionary.hpp
 ciEnv.hpp                               thread.hpp
 
 ciExceptionHandler.cpp                  ciExceptionHandler.hpp
diff --git a/src/share/vm/memory/barrierSet.cpp b/src/share/vm/memory/barrierSet.cpp
index 07ff7a0fc..8cd80b7c4 100644
--- a/src/share/vm/memory/barrierSet.cpp
+++ b/src/share/vm/memory/barrierSet.cpp
@@ -41,11 +41,6 @@ void BarrierSet::static_write_ref_array_pre(HeapWord* start, size_t count) {
 
 // count is number of array elements being written
 void BarrierSet::static_write_ref_array_post(HeapWord* start, size_t count) {
-  assert(count <= (size_t)max_intx, "count too large");
-  HeapWord* end = start + objArrayOopDesc::array_size((int)count);
-#if 0
-  warning("Post:\t" INTPTR_FORMAT "[" SIZE_FORMAT "] : [" INTPTR_FORMAT","INTPTR_FORMAT")\t",
-                   start,            count,              start,          end);
-#endif
-  Universe::heap()->barrier_set()->write_ref_array_work(MemRegion(start, end));
+  // simply delegate to instance method
+  Universe::heap()->barrier_set()->write_ref_array(start, count);
 }
diff --git a/src/share/vm/memory/barrierSet.hpp b/src/share/vm/memory/barrierSet.hpp
index 0fc6a0061..c624f2506 100644
--- a/src/share/vm/memory/barrierSet.hpp
+++ b/src/share/vm/memory/barrierSet.hpp
@@ -121,17 +121,20 @@ public:
   virtual void read_ref_array(MemRegion mr) = 0;
   virtual void read_prim_array(MemRegion mr) = 0;
 
+  // Below length is the # array elements being written
   virtual void write_ref_array_pre(      oop* dst, int length) {}
   virtual void write_ref_array_pre(narrowOop* dst, int length) {}
+  // Below MemRegion mr is expected to be HeapWord-aligned
   inline void write_ref_array(MemRegion mr);
+  // Below count is the # array elements being written, starting
+  // at the address "start", which may not necessarily be HeapWord-aligned
+  inline void write_ref_array(HeapWord* start, size_t count);
 
-  // Static versions, suitable for calling from generated code.
+  // Static versions, suitable for calling from generated code;
+  // count is # array elements being written, starting with "start",
+  // which may not necessarily be HeapWord-aligned.
   static void static_write_ref_array_pre(HeapWord* start, size_t count);
   static void static_write_ref_array_post(HeapWord* start, size_t count);
-  // Narrow oop versions of the above; count is # of array elements being written,
-  // starting with "start", which is HeapWord-aligned.
-  static void static_write_ref_array_pre_narrow(HeapWord* start, size_t count);
-  static void static_write_ref_array_post_narrow(HeapWord* start, size_t count);
 
 protected:
   virtual void write_ref_array_work(MemRegion mr) = 0;
diff --git a/src/share/vm/memory/barrierSet.inline.hpp b/src/share/vm/memory/barrierSet.inline.hpp
index edcb551bd..ddc398348 100644
--- a/src/share/vm/memory/barrierSet.inline.hpp
+++ b/src/share/vm/memory/barrierSet.inline.hpp
@@ -43,6 +43,8 @@ void BarrierSet::write_ref_field(void* field, oop new_val) {
 }
 
 void BarrierSet::write_ref_array(MemRegion mr) {
+  assert((HeapWord*)align_size_down((uintptr_t)mr.start(), HeapWordSize) == mr.start() , "Unaligned start");
+  assert((HeapWord*)align_size_up  ((uintptr_t)mr.end(),   HeapWordSize) == mr.end(),    "Unaligned end"  );
   if (kind() == CardTableModRef) {
     ((CardTableModRefBS*)this)->inline_write_ref_array(mr);
   } else {
@@ -50,6 +52,34 @@ void BarrierSet::write_ref_array(MemRegion mr) {
   }
 }
 
+// count is number of array elements being written
+void BarrierSet::write_ref_array(HeapWord* start, size_t count) {
+  assert(count <= (size_t)max_intx, "count too large");
+  HeapWord* end = (HeapWord*)((char*)start + (count*heapOopSize));
+  // In the case of compressed oops, start and end may potentially be misaligned;
+  // so we need to conservatively align the first downward (this is not
+  // strictly necessary for current uses, but a case of good hygiene and,
+  // if you will, aesthetics) and the second upward (this is essential for
+  // current uses) to a HeapWord boundary, so we mark all cards overlapping
+  // this write. In the event that this evolves in the future to calling a
+  // logging barrier of narrow oop granularity, like the pre-barrier for G1
+  // (mentioned here merely by way of example), we will need to change this
+  // interface, much like the pre-barrier one above, so it is "exactly precise"
+  // (if i may be allowed the adverbial redundancy for emphasis) and does not
+  // include narrow oop slots not included in the original write interval.
+  HeapWord* aligned_start = (HeapWord*)align_size_down((uintptr_t)start, HeapWordSize);
+  HeapWord* aligned_end   = (HeapWord*)align_size_up  ((uintptr_t)end,   HeapWordSize);
+  // If compressed oops were not being used, these should already be aligned
+  assert(UseCompressedOops || (aligned_start == start && aligned_end == end),
+         "Expected heap word alignment of start and end");
+#if 0
+  warning("Post:\t" INTPTR_FORMAT "[" SIZE_FORMAT "] : [" INTPTR_FORMAT","INTPTR_FORMAT")\t",
+                   start,            count,              aligned_start,   aligned_end);
+#endif
+  write_ref_array_work(MemRegion(aligned_start, aligned_end));
+}
+
+
 void BarrierSet::write_region(MemRegion mr) {
   if (kind() == CardTableModRef) {
     ((CardTableModRefBS*)this)->inline_write_region(mr);
diff --git a/src/share/vm/memory/cardTableModRefBS.cpp b/src/share/vm/memory/cardTableModRefBS.cpp
index c036a3555..2deb7da83 100644
--- a/src/share/vm/memory/cardTableModRefBS.cpp
+++ b/src/share/vm/memory/cardTableModRefBS.cpp
@@ -511,6 +511,8 @@ void CardTableModRefBS::mod_oop_in_space_iterate(Space* sp,
 }
 
 void CardTableModRefBS::dirty_MemRegion(MemRegion mr) {
+  assert((HeapWord*)align_size_down((uintptr_t)mr.start(), HeapWordSize) == mr.start(), "Unaligned start");
+  assert((HeapWord*)align_size_up  ((uintptr_t)mr.end(),   HeapWordSize) == mr.end(),   "Unaligned end"  );
   jbyte* cur  = byte_for(mr.start());
   jbyte* last = byte_after(mr.last());
   while (cur < last) {
@@ -520,6 +522,8 @@ void CardTableModRefBS::dirty_MemRegion(MemRegion mr) {
 }
 
 void CardTableModRefBS::invalidate(MemRegion mr, bool whole_heap) {
+  assert((HeapWord*)align_size_down((uintptr_t)mr.start(), HeapWordSize) == mr.start(), "Unaligned start");
+  assert((HeapWord*)align_size_up  ((uintptr_t)mr.end(),   HeapWordSize) == mr.end(),   "Unaligned end"  );
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (!mri.is_empty()) dirty_MemRegion(mri);
diff --git a/src/share/vm/memory/sharedHeap.hpp b/src/share/vm/memory/sharedHeap.hpp
index 0bf863fb1..609438724 100644
--- a/src/share/vm/memory/sharedHeap.hpp
+++ b/src/share/vm/memory/sharedHeap.hpp
@@ -224,10 +224,6 @@ public:
                           CodeBlobClosure* code_roots,
                           OopClosure* non_root_closure);
 
-
-  // Like CollectedHeap::collect, but assume that the caller holds the Heap_lock.
-  virtual void collect_locked(GCCause::Cause cause) = 0;
-
   // The functions below are helper functions that a subclass of
   // "SharedHeap" can use in the implementation of its virtual
   // functions.
diff --git a/src/share/vm/memory/universe.cpp b/src/share/vm/memory/universe.cpp
index 5a80ac144..5a4bd323a 100644
--- a/src/share/vm/memory/universe.cpp
+++ b/src/share/vm/memory/universe.cpp
@@ -67,6 +67,8 @@ typeArrayOop Universe::_the_empty_int_array           = NULL;
 objArrayOop Universe::_the_empty_system_obj_array     = NULL;
 objArrayOop Universe::_the_empty_class_klass_array    = NULL;
 objArrayOop Universe::_the_array_interfaces_array     = NULL;
+oop Universe::_the_null_string                        = NULL;
+oop Universe::_the_min_jint_string                   = NULL;
 LatestMethodOopCache* Universe::_finalizer_register_cache = NULL;
 LatestMethodOopCache* Universe::_loader_addClass_cache    = NULL;
 ActiveMethodOopsCache* Universe::_reflect_invoke_cache    = NULL;
@@ -187,6 +189,8 @@ void Universe::oops_do(OopClosure* f, bool do_all) {
   f->do_oop((oop*)&_the_empty_system_obj_array);
   f->do_oop((oop*)&_the_empty_class_klass_array);
   f->do_oop((oop*)&_the_array_interfaces_array);
+  f->do_oop((oop*)&_the_null_string);
+  f->do_oop((oop*)&_the_min_jint_string);
   _finalizer_register_cache->oops_do(f);
   _loader_addClass_cache->oops_do(f);
   _reflect_invoke_cache->oops_do(f);
@@ -289,6 +293,9 @@ void Universe::genesis(TRAPS) {
 
     klassOop ok = SystemDictionary::object_klass();
 
+    _the_null_string            = StringTable::intern("null", CHECK);
+    _the_min_jint_string       = StringTable::intern("-2147483648", CHECK);
+
     if (UseSharedSpaces) {
       // Verify shared interfaces array.
       assert(_the_array_interfaces_array->obj_at(0) ==
diff --git a/src/share/vm/memory/universe.hpp b/src/share/vm/memory/universe.hpp
index b22a1eba6..97044e161 100644
--- a/src/share/vm/memory/universe.hpp
+++ b/src/share/vm/memory/universe.hpp
@@ -169,6 +169,8 @@ class Universe: AllStatic {
   static objArrayOop  _the_empty_system_obj_array;    // Canonicalized system obj array
   static objArrayOop  _the_empty_class_klass_array;   // Canonicalized obj array of type java.lang.Class
   static objArrayOop  _the_array_interfaces_array;    // Canonicalized 2-array of cloneable & serializable klasses
+  static oop          _the_null_string;               // A cache of "null" as a Java string
+  static oop          _the_min_jint_string;          // A cache of "-2147483648" as a Java string
   static LatestMethodOopCache* _finalizer_register_cache; // static method for registering finalizable objects
   static LatestMethodOopCache* _loader_addClass_cache;    // method for registering loaded classes in class loader vector
   static ActiveMethodOopsCache* _reflect_invoke_cache;    // method for security checks
@@ -310,6 +312,8 @@ class Universe: AllStatic {
   static objArrayOop  the_empty_system_obj_array ()   { return _the_empty_system_obj_array;    }
   static objArrayOop  the_empty_class_klass_array ()  { return _the_empty_class_klass_array;   }
   static objArrayOop  the_array_interfaces_array()    { return _the_array_interfaces_array;    }
+  static oop          the_null_string()               { return _the_null_string;               }
+  static oop          the_min_jint_string()          { return _the_min_jint_string;          }
   static methodOop    finalizer_register_method()     { return _finalizer_register_cache->get_methodOop(); }
   static methodOop    loader_addClass_method()        { return _loader_addClass_cache->get_methodOop(); }
   static ActiveMethodOopsCache* reflect_invoke_cache() { return _reflect_invoke_cache; }
diff --git a/src/share/vm/oops/objArrayKlass.cpp b/src/share/vm/oops/objArrayKlass.cpp
index 212126490..68556a5a0 100644
--- a/src/share/vm/oops/objArrayKlass.cpp
+++ b/src/share/vm/oops/objArrayKlass.cpp
@@ -127,16 +127,14 @@ template <class T> void objArrayKlass::do_copy(arrayOop s, T* src,
           // pointer delta is scaled to number of elements (length field in
           // objArrayOop) which we assume is 32 bit.
           assert(pd == (size_t)(int)pd, "length field overflow");
-          const size_t done_word_len = objArrayOopDesc::array_size((int)pd);
-          bs->write_ref_array(MemRegion((HeapWord*)dst, done_word_len));
+          bs->write_ref_array((HeapWord*)dst, pd);
           THROW(vmSymbols::java_lang_ArrayStoreException());
           return;
         }
       }
     }
   }
-  const size_t word_len = objArrayOopDesc::array_size(length);
-  bs->write_ref_array(MemRegion((HeapWord*)dst, word_len));
+  bs->write_ref_array((HeapWord*)dst, length);
 }
 
 void objArrayKlass::copy_array(arrayOop s, int src_pos, arrayOop d,
diff --git a/src/share/vm/oops/objArrayOop.hpp b/src/share/vm/oops/objArrayOop.hpp
index 626f398a6..1c1764a87 100644
--- a/src/share/vm/oops/objArrayOop.hpp
+++ b/src/share/vm/oops/objArrayOop.hpp
@@ -37,6 +37,32 @@ class objArrayOopDesc : public arrayOopDesc {
     return &((T*)base())[index];
   }
 
+private:
+  // Give size of objArrayOop in HeapWords minus the header
+  static int array_size(int length) {
+    const int OopsPerHeapWord = HeapWordSize/heapOopSize;
+    assert(OopsPerHeapWord >= 1 && (HeapWordSize % heapOopSize == 0),
+           "Else the following (new) computation would be in error");
+#ifdef ASSERT
+    // The old code is left in for sanity-checking; it'll
+    // go away pretty soon. XXX
+    // Without UseCompressedOops, this is simply:
+    // oop->length() * HeapWordsPerOop;
+    // With narrowOops, HeapWordsPerOop is 1/2 or equal 0 as an integer.
+    // The oop elements are aligned up to wordSize
+    const int HeapWordsPerOop = heapOopSize/HeapWordSize;
+    int old_res;
+    if (HeapWordsPerOop > 0) {
+      old_res = length * HeapWordsPerOop;
+    } else {
+      old_res = align_size_up(length, OopsPerHeapWord)/OopsPerHeapWord;
+    }
+#endif  // ASSERT
+    int res = ((uint)length + OopsPerHeapWord - 1)/OopsPerHeapWord;
+    assert(res == old_res, "Inconsistency between old and new.");
+    return res;
+  }
+
  public:
   // Returns the offset of the first element.
   static int base_offset_in_bytes() {
@@ -67,27 +93,14 @@ class objArrayOopDesc : public arrayOopDesc {
   // Sizing
   static int header_size()    { return arrayOopDesc::header_size(T_OBJECT); }
   int object_size()           { return object_size(length()); }
-  int array_size()            { return array_size(length()); }
 
   static int object_size(int length) {
     // This returns the object size in HeapWords.
-    return align_object_size(header_size() + array_size(length));
-  }
-
-  // Give size of objArrayOop in HeapWords minus the header
-  static int array_size(int length) {
-    // Without UseCompressedOops, this is simply:
-    // oop->length() * HeapWordsPerOop;
-    // With narrowOops, HeapWordsPerOop is 1/2 or equal 0 as an integer.
-    // The oop elements are aligned up to wordSize
-    const int HeapWordsPerOop = heapOopSize/HeapWordSize;
-    if (HeapWordsPerOop > 0) {
-      return length * HeapWordsPerOop;
-    } else {
-      const int OopsPerHeapWord = HeapWordSize/heapOopSize;
-      int word_len = align_size_up(length, OopsPerHeapWord)/OopsPerHeapWord;
-      return word_len;
-    }
+    uint asz = array_size(length);
+    uint osz = align_object_size(header_size() + asz);
+    assert(osz >= asz,   "no overflow");
+    assert((int)osz > 0, "no overflow");
+    return (int)osz;
   }
 
   // special iterators for index ranges, returns size of object
diff --git a/src/share/vm/opto/c2_globals.cpp b/src/share/vm/opto/c2_globals.cpp
index 5715b24ba..40594bf94 100644
--- a/src/share/vm/opto/c2_globals.cpp
+++ b/src/share/vm/opto/c2_globals.cpp
@@ -25,4 +25,4 @@
 # include "incls/_precompiled.incl"
 # include "incls/_c2_globals.cpp.incl"
 
-C2_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG)
+C2_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, MATERIALIZE_NOTPRODUCT_FLAG)
diff --git a/src/share/vm/opto/c2_globals.hpp b/src/share/vm/opto/c2_globals.hpp
index 091ad4a9b..b68b66342 100644
--- a/src/share/vm/opto/c2_globals.hpp
+++ b/src/share/vm/opto/c2_globals.hpp
@@ -26,7 +26,7 @@
 // Defines all globals flags used by the server compiler.
 //
 
-#define C2_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
+#define C2_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct) \
                                                                             \
   notproduct(intx, CompileZapFirst, 0,                                      \
           "If +ZapDeadCompiledLocals, "                                     \
@@ -394,6 +394,12 @@
   product(bool, UseOptoBiasInlining, true,                                  \
           "Generate biased locking code in C2 ideal graph")                 \
                                                                             \
+  product(bool, OptimizeStringConcat, false,                                \
+          "Optimize the construction of Strings by StringBuilder")          \
+                                                                            \
+  notproduct(bool, PrintOptimizeStringConcat, false,                        \
+          "Print information about transformations performed on Strings")   \
+                                                                            \
   product(intx, ValueSearchLimit, 1000,                                     \
           "Recursion limit in PhaseMacroExpand::value_from_mem_phi")        \
                                                                             \
@@ -413,4 +419,4 @@
   product(bool, BlockLayoutRotateLoops, true,                               \
           "Allow back branches to be fall throughs in the block layour")    \
 
-C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
+C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
diff --git a/src/share/vm/opto/callGenerator.cpp b/src/share/vm/opto/callGenerator.cpp
index 8ce7c0ce5..8ddc84715 100644
--- a/src/share/vm/opto/callGenerator.cpp
+++ b/src/share/vm/opto/callGenerator.cpp
@@ -98,12 +98,21 @@ JVMState* ParseGenerator::generate(JVMState* jvms) {
 //---------------------------DirectCallGenerator------------------------------
 // Internal class which handles all out-of-line calls w/o receiver type checks.
 class DirectCallGenerator : public CallGenerator {
-public:
-  DirectCallGenerator(ciMethod* method)
-    : CallGenerator(method)
+ private:
+  CallStaticJavaNode* _call_node;
+  // Force separate memory and I/O projections for the exceptional
+  // paths to facilitate late inlinig.
+  bool                _separate_io_proj;
+
+ public:
+  DirectCallGenerator(ciMethod* method, bool separate_io_proj)
+    : CallGenerator(method),
+      _separate_io_proj(separate_io_proj)
   {
   }
   virtual JVMState* generate(JVMState* jvms);
+
+  CallStaticJavaNode* call_node() const { return _call_node; }
 };
 
 JVMState* DirectCallGenerator::generate(JVMState* jvms) {
@@ -129,9 +138,10 @@ JVMState* DirectCallGenerator::generate(JVMState* jvms) {
     call->set_optimized_virtual(true);
   }
   kit.set_arguments_for_java_call(call);
-  kit.set_edges_for_java_call(call);
-  Node* ret = kit.set_results_for_java_call(call);
+  kit.set_edges_for_java_call(call, false, _separate_io_proj);
+  Node* ret = kit.set_results_for_java_call(call, _separate_io_proj);
   kit.push_node(method()->return_type()->basic_type(), ret);
+  _call_node = call;  // Save the call node in case we need it later
   return kit.transfer_exceptions_into_jvms();
 }
 
@@ -238,9 +248,9 @@ CallGenerator* CallGenerator::for_osr(ciMethod* m, int osr_bci) {
   return new ParseGenerator(m, expected_uses, true);
 }
 
-CallGenerator* CallGenerator::for_direct_call(ciMethod* m) {
+CallGenerator* CallGenerator::for_direct_call(ciMethod* m, bool separate_io_proj) {
   assert(!m->is_abstract(), "for_direct_call mismatch");
-  return new DirectCallGenerator(m);
+  return new DirectCallGenerator(m, separate_io_proj);
 }
 
 CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index) {
@@ -248,6 +258,108 @@ CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index) {
   return new VirtualCallGenerator(m, vtable_index);
 }
 
+// Allow inlining decisions to be delayed
+class LateInlineCallGenerator : public DirectCallGenerator {
+  CallGenerator* _inline_cg;
+
+ public:
+  LateInlineCallGenerator(ciMethod* method, CallGenerator* inline_cg) :
+    DirectCallGenerator(method, true), _inline_cg(inline_cg) {}
+
+  virtual bool      is_late_inline() const { return true; }
+
+  // Convert the CallStaticJava into an inline
+  virtual void do_late_inline();
+
+  JVMState* generate(JVMState* jvms) {
+    // Record that this call site should be revisited once the main
+    // parse is finished.
+    Compile::current()->add_late_inline(this);
+
+    // Emit the CallStaticJava and request separate projections so
+    // that the late inlining logic can distinguish between fall
+    // through and exceptional uses of the memory and io projections
+    // as is done for allocations and macro expansion.
+    return DirectCallGenerator::generate(jvms);
+  }
+
+};
+
+
+void LateInlineCallGenerator::do_late_inline() {
+  // Can't inline it
+  if (call_node() == NULL || call_node()->outcnt() == 0 ||
+      call_node()->in(0) == NULL || call_node()->in(0)->is_top())
+    return;
+
+  CallStaticJavaNode* call = call_node();
+
+  // Make a clone of the JVMState that appropriate to use for driving a parse
+  Compile* C = Compile::current();
+  JVMState* jvms     = call->jvms()->clone_shallow(C);
+  uint size = call->req();
+  SafePointNode* map = new (C, size) SafePointNode(size, jvms);
+  for (uint i1 = 0; i1 < size; i1++) {
+    map->init_req(i1, call->in(i1));
+  }
+
+  // Make sure the state is a MergeMem for parsing.
+  if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
+    map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
+  }
+
+  // Make enough space for the expression stack and transfer the incoming arguments
+  int nargs    = method()->arg_size();
+  jvms->set_map(map);
+  map->ensure_stack(jvms, jvms->method()->max_stack());
+  if (nargs > 0) {
+    for (int i1 = 0; i1 < nargs; i1++) {
+      map->set_req(i1 + jvms->argoff(), call->in(TypeFunc::Parms + i1));
+    }
+  }
+
+  CompileLog* log = C->log();
+  if (log != NULL) {
+    log->head("late_inline method='%d'", log->identify(method()));
+    JVMState* p = jvms;
+    while (p != NULL) {
+      log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method()));
+      p = p->caller();
+    }
+    log->tail("late_inline");
+  }
+
+  // Setup default node notes to be picked up by the inlining
+  Node_Notes* old_nn = C->default_node_notes();
+  if (old_nn != NULL) {
+    Node_Notes* entry_nn = old_nn->clone(C);
+    entry_nn->set_jvms(jvms);
+    C->set_default_node_notes(entry_nn);
+  }
+
+  // Now perform the inling using the synthesized JVMState
+  JVMState* new_jvms = _inline_cg->generate(jvms);
+  if (new_jvms == NULL)  return;  // no change
+  if (C->failing())      return;
+
+  // Capture any exceptional control flow
+  GraphKit kit(new_jvms);
+
+  // Find the result object
+  Node* result = C->top();
+  int   result_size = method()->return_type()->size();
+  if (result_size != 0 && !kit.stopped()) {
+    result = (result_size == 1) ? kit.pop() : kit.pop_pair();
+  }
+
+  kit.replace_call(call, result);
+}
+
+
+CallGenerator* CallGenerator::for_late_inline(ciMethod* method, CallGenerator* inline_cg) {
+  return new LateInlineCallGenerator(method, inline_cg);
+}
+
 
 //---------------------------WarmCallGenerator--------------------------------
 // Internal class which handles initial deferral of inlining decisions.
@@ -315,70 +427,7 @@ JVMState* WarmCallGenerator::generate(JVMState* jvms) {
 }
 
 void WarmCallInfo::make_hot() {
-  Compile* C = Compile::current();
-  // Replace the callnode with something better.
-  CallJavaNode* call = this->call()->as_CallJava();
-  ciMethod* method   = call->method();
-  int       nargs    = method->arg_size();
-  JVMState* jvms     = call->jvms()->clone_shallow(C);
-  uint size = TypeFunc::Parms + MAX2(2, nargs);
-  SafePointNode* map = new (C, size) SafePointNode(size, jvms);
-  for (uint i1 = 0; i1 < (uint)(TypeFunc::Parms + nargs); i1++) {
-    map->init_req(i1, call->in(i1));
-  }
-  jvms->set_map(map);
-  jvms->set_offsets(map->req());
-  jvms->set_locoff(TypeFunc::Parms);
-  jvms->set_stkoff(TypeFunc::Parms);
-  GraphKit kit(jvms);
-
-  JVMState* new_jvms = _hot_cg->generate(kit.jvms());
-  if (new_jvms == NULL)  return;  // no change
-  if (C->failing())      return;
-
-  kit.set_jvms(new_jvms);
-  Node* res = C->top();
-  int   res_size = method->return_type()->size();
-  if (res_size != 0) {
-    kit.inc_sp(-res_size);
-    res = kit.argument(0);
-  }
-  GraphKit ekit(kit.combine_and_pop_all_exception_states()->jvms());
-
-  // Replace the call:
-  for (DUIterator i = call->outs(); call->has_out(i); i++) {
-    Node* n = call->out(i);
-    Node* nn = NULL;  // replacement
-    if (n->is_Proj()) {
-      ProjNode* nproj = n->as_Proj();
-      assert(nproj->_con < (uint)(TypeFunc::Parms + (res_size ? 1 : 0)), "sane proj");
-      if (nproj->_con == TypeFunc::Parms) {
-        nn = res;
-      } else {
-        nn = kit.map()->in(nproj->_con);
-      }
-      if (nproj->_con == TypeFunc::I_O) {
-        for (DUIterator j = nproj->outs(); nproj->has_out(j); j++) {
-          Node* e = nproj->out(j);
-          if (e->Opcode() == Op_CreateEx) {
-            e->replace_by(ekit.argument(0));
-          } else if (e->Opcode() == Op_Catch) {
-            for (DUIterator k = e->outs(); e->has_out(k); k++) {
-              CatchProjNode* p = e->out(j)->as_CatchProj();
-              if (p->is_handler_proj()) {
-                p->replace_by(ekit.control());
-              } else {
-                p->replace_by(kit.control());
-              }
-            }
-          }
-        }
-      }
-    }
-    NOT_PRODUCT(if (!nn)  n->dump(2));
-    assert(nn != NULL, "don't know what to do with this user");
-    n->replace_by(nn);
-  }
+  Unimplemented();
 }
 
 void WarmCallInfo::make_cold() {
diff --git a/src/share/vm/opto/callGenerator.hpp b/src/share/vm/opto/callGenerator.hpp
index bbd47ca4a..70ec9d3ff 100644
--- a/src/share/vm/opto/callGenerator.hpp
+++ b/src/share/vm/opto/callGenerator.hpp
@@ -57,6 +57,13 @@ class CallGenerator : public ResourceObj {
   // is_trap: Does not return to the caller.  (E.g., uncommon trap.)
   virtual bool      is_trap() const             { return false; }
 
+  // is_late_inline: supports conversion of call into an inline
+  virtual bool      is_late_inline() const      { return false; }
+  // Replace the call with an inline version of the code
+  virtual void do_late_inline() { ShouldNotReachHere(); }
+
+  virtual CallStaticJavaNode* call_node() const { ShouldNotReachHere(); return NULL; }
+
   // Note:  It is possible for a CG to be both inline and virtual.
   // (The hashCode intrinsic does a vtable check and an inlined fast path.)
 
@@ -92,9 +99,12 @@ class CallGenerator : public ResourceObj {
   static CallGenerator* for_osr(ciMethod* m, int osr_bci);
 
   // How to generate vanilla out-of-line call sites:
-  static CallGenerator* for_direct_call(ciMethod* m);   // static, special
+  static CallGenerator* for_direct_call(ciMethod* m, bool separate_io_projs = false);   // static, special
   static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index);  // virtual, interface
 
+  // How to generate a replace a direct call with an inline version
+  static CallGenerator* for_late_inline(ciMethod* m, CallGenerator* inline_cg);
+
   // How to make a call but defer the decision whether to inline or not.
   static CallGenerator* for_warm_call(WarmCallInfo* ci,
                                       CallGenerator* if_cold,
diff --git a/src/share/vm/opto/callnode.cpp b/src/share/vm/opto/callnode.cpp
index 5c69109b8..a59027131 100644
--- a/src/share/vm/opto/callnode.cpp
+++ b/src/share/vm/opto/callnode.cpp
@@ -693,6 +693,84 @@ Node *CallNode::result_cast() {
 }
 
 
+void CallNode::extract_projections(CallProjections* projs, bool separate_io_proj) {
+  projs->fallthrough_proj      = NULL;
+  projs->fallthrough_catchproj = NULL;
+  projs->fallthrough_ioproj    = NULL;
+  projs->catchall_ioproj       = NULL;
+  projs->catchall_catchproj    = NULL;
+  projs->fallthrough_memproj   = NULL;
+  projs->catchall_memproj      = NULL;
+  projs->resproj               = NULL;
+  projs->exobj                 = NULL;
+
+  for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+    ProjNode *pn = fast_out(i)->as_Proj();
+    if (pn->outcnt() == 0) continue;
+    switch (pn->_con) {
+    case TypeFunc::Control:
+      {
+        // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj
+        projs->fallthrough_proj = pn;
+        DUIterator_Fast jmax, j = pn->fast_outs(jmax);
+        const Node *cn = pn->fast_out(j);
+        if (cn->is_Catch()) {
+          ProjNode *cpn = NULL;
+          for (DUIterator_Fast kmax, k = cn->fast_outs(kmax); k < kmax; k++) {
+            cpn = cn->fast_out(k)->as_Proj();
+            assert(cpn->is_CatchProj(), "must be a CatchProjNode");
+            if (cpn->_con == CatchProjNode::fall_through_index)
+              projs->fallthrough_catchproj = cpn;
+            else {
+              assert(cpn->_con == CatchProjNode::catch_all_index, "must be correct index.");
+              projs->catchall_catchproj = cpn;
+            }
+          }
+        }
+        break;
+      }
+    case TypeFunc::I_O:
+      if (pn->_is_io_use)
+        projs->catchall_ioproj = pn;
+      else
+        projs->fallthrough_ioproj = pn;
+      for (DUIterator j = pn->outs(); pn->has_out(j); j++) {
+        Node* e = pn->out(j);
+        if (e->Opcode() == Op_CreateEx && e->in(0)->is_CatchProj()) {
+          assert(projs->exobj == NULL, "only one");
+          projs->exobj = e;
+        }
+      }
+      break;
+    case TypeFunc::Memory:
+      if (pn->_is_io_use)
+        projs->catchall_memproj = pn;
+      else
+        projs->fallthrough_memproj = pn;
+      break;
+    case TypeFunc::Parms:
+      projs->resproj = pn;
+      break;
+    default:
+      assert(false, "unexpected projection from allocation node.");
+    }
+  }
+
+  // The resproj may not exist because the result couuld be ignored
+  // and the exception object may not exist if an exception handler
+  // swallows the exception but all the other must exist and be found.
+  assert(projs->fallthrough_proj      != NULL, "must be found");
+  assert(projs->fallthrough_catchproj != NULL, "must be found");
+  assert(projs->fallthrough_memproj   != NULL, "must be found");
+  assert(projs->fallthrough_ioproj    != NULL, "must be found");
+  assert(projs->catchall_catchproj    != NULL, "must be found");
+  if (separate_io_proj) {
+    assert(projs->catchall_memproj      != NULL, "must be found");
+    assert(projs->catchall_ioproj       != NULL, "must be found");
+  }
+}
+
+
 //=============================================================================
 uint CallJavaNode::size_of() const { return sizeof(*this); }
 uint CallJavaNode::cmp( const Node &n ) const {
diff --git a/src/share/vm/opto/callnode.hpp b/src/share/vm/opto/callnode.hpp
index ac886f3ba..a9649a7cc 100644
--- a/src/share/vm/opto/callnode.hpp
+++ b/src/share/vm/opto/callnode.hpp
@@ -470,6 +470,23 @@ public:
 #endif
 };
 
+
+// Simple container for the outgoing projections of a call.  Useful
+// for serious surgery on calls.
+class CallProjections : public StackObj {
+public:
+  Node* fallthrough_proj;
+  Node* fallthrough_catchproj;
+  Node* fallthrough_memproj;
+  Node* fallthrough_ioproj;
+  Node* catchall_catchproj;
+  Node* catchall_memproj;
+  Node* catchall_ioproj;
+  Node* resproj;
+  Node* exobj;
+};
+
+
 //------------------------------CallNode---------------------------------------
 // Call nodes now subsume the function of debug nodes at callsites, so they
 // contain the functionality of a full scope chain of debug nodes.
@@ -521,6 +538,11 @@ public:
   // or returns NULL if there is no one.
   Node *result_cast();
 
+  // Collect all the interesting edges from a call for use in
+  // replacing the call by something else.  Used by macro expansion
+  // and the late inlining support.
+  void extract_projections(CallProjections* projs, bool separate_io_proj);
+
   virtual uint match_edge(uint idx) const;
 
 #ifndef PRODUCT
@@ -529,6 +551,7 @@ public:
 #endif
 };
 
+
 //------------------------------CallJavaNode-----------------------------------
 // Make a static or dynamic subroutine call node using Java calling
 // convention.  (The "Java" calling convention is the compiler's calling
diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp
index ecffd2f4e..f5d5387e9 100644
--- a/src/share/vm/opto/compile.cpp
+++ b/src/share/vm/opto/compile.cpp
@@ -224,6 +224,32 @@ bool Compile::valid_bundle_info(const Node *n) {
 }
 
 
+void Compile::gvn_replace_by(Node* n, Node* nn) {
+  for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) {
+    Node* use = n->last_out(i);
+    bool is_in_table = initial_gvn()->hash_delete(use);
+    uint uses_found = 0;
+    for (uint j = 0; j < use->len(); j++) {
+      if (use->in(j) == n) {
+        if (j < use->req())
+          use->set_req(j, nn);
+        else
+          use->set_prec(j, nn);
+        uses_found++;
+      }
+    }
+    if (is_in_table) {
+      // reinsert into table
+      initial_gvn()->hash_find_insert(use);
+    }
+    record_for_igvn(use);
+    i -= uses_found;    // we deleted 1 or more copies of this edge
+  }
+}
+
+
+
+
 // Identify all nodes that are reachable from below, useful.
 // Use breadth-first pass that records state in a Unique_Node_List,
 // recursive traversal is slower.
@@ -554,6 +580,28 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
       rethrow_exceptions(kit.transfer_exceptions_into_jvms());
     }
 
+    if (!failing() && has_stringbuilder()) {
+      {
+        // remove useless nodes to make the usage analysis simpler
+        ResourceMark rm;
+        PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
+      }
+
+      {
+        ResourceMark rm;
+        print_method("Before StringOpts", 3);
+        PhaseStringOpts pso(initial_gvn(), &for_igvn);
+        print_method("After StringOpts", 3);
+      }
+
+      // now inline anything that we skipped the first time around
+      while (_late_inlines.length() > 0) {
+        CallGenerator* cg = _late_inlines.pop();
+        cg->do_late_inline();
+      }
+    }
+    assert(_late_inlines.length() == 0, "should have been processed");
+
     print_method("Before RemoveUseless", 3);
 
     // Remove clutter produced by parsing.
@@ -820,6 +868,7 @@ void Compile::Init(int aliaslevel) {
   _fixed_slots = 0;
   set_has_split_ifs(false);
   set_has_loops(has_method() && method()->has_loops()); // first approximation
+  set_has_stringbuilder(false);
   _deopt_happens = true;  // start out assuming the worst
   _trap_can_recompile = false;  // no traps emitted yet
   _major_progress = true; // start out assuming good things will happen
@@ -1803,6 +1852,7 @@ void Compile::dump_asm(int *pcs, uint pc_limit) {
           !n->is_Phi() &&       // a few noisely useless nodes
           !n->is_Proj() &&
           !n->is_MachTemp() &&
+          !n->is_SafePointScalarObject() &&
           !n->is_Catch() &&     // Would be nice to print exception table targets
           !n->is_MergeMem() &&  // Not very interesting
           !n->is_top() &&       // Debug info table constants
@@ -2240,6 +2290,30 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
     break;
   }
 
+  case Op_Proj: {
+    if (OptimizeStringConcat) {
+      ProjNode* p = n->as_Proj();
+      if (p->_is_io_use) {
+        // Separate projections were used for the exception path which
+        // are normally removed by a late inline.  If it wasn't inlined
+        // then they will hang around and should just be replaced with
+        // the original one.
+        Node* proj = NULL;
+        // Replace with just one
+        for (SimpleDUIterator i(p->in(0)); i.has_next(); i.next()) {
+          Node *use = i.get();
+          if (use->is_Proj() && p != use && use->as_Proj()->_con == p->_con) {
+            proj = use;
+            break;
+          }
+        }
+        assert(p != NULL, "must be found");
+        p->subsume_by(proj);
+      }
+    }
+    break;
+  }
+
   case Op_Phi:
     if (n->as_Phi()->bottom_type()->isa_narrowoop()) {
       // The EncodeP optimization may create Phi with the same edges
diff --git a/src/share/vm/opto/compile.hpp b/src/share/vm/opto/compile.hpp
index 4bd1900fc..450ff269f 100644
--- a/src/share/vm/opto/compile.hpp
+++ b/src/share/vm/opto/compile.hpp
@@ -149,6 +149,7 @@ class Compile : public Phase {
   bool                  _has_loops;             // True if the method _may_ have some loops
   bool                  _has_split_ifs;         // True if the method _may_ have some split-if
   bool                  _has_unsafe_access;     // True if the method _may_ produce faults in unsafe loads or stores.
+  bool                  _has_stringbuilder;     // True StringBuffers or StringBuilders are allocated
   uint                  _trap_hist[trapHistLength];  // Cumulative traps
   bool                  _trap_can_recompile;    // Have we emitted a recompiling trap?
   uint                  _decompile_count;       // Cumulative decompilation counts.
@@ -219,6 +220,9 @@ class Compile : public Phase {
   Unique_Node_List*     _for_igvn;              // Initial work-list for next round of Iterative GVN
   WarmCallInfo*         _warm_calls;            // Sorted work-list for heat-based inlining.
 
+  GrowableArray<CallGenerator*> _late_inlines;  // List of CallGenerators to be revisited after
+                                                // main parsing has finished.
+
   // Matching, CFG layout, allocation, code generation
   PhaseCFG*             _cfg;                   // Results of CFG finding
   bool                  _select_24_bit_instr;   // We selected an instruction with a 24-bit result
@@ -298,6 +302,8 @@ class Compile : public Phase {
   void          set_has_split_ifs(bool z)       { _has_split_ifs = z; }
   bool              has_unsafe_access() const   { return _has_unsafe_access; }
   void          set_has_unsafe_access(bool z)   { _has_unsafe_access = z; }
+  bool              has_stringbuilder() const   { return _has_stringbuilder; }
+  void          set_has_stringbuilder(bool z)   { _has_stringbuilder = z; }
   void          set_trap_count(uint r, uint c)  { assert(r < trapHistLength, "oob");        _trap_hist[r] = c; }
   uint              trap_count(uint r) const    { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
   bool              trap_can_recompile() const  { return _trap_can_recompile; }
@@ -475,6 +481,7 @@ class Compile : public Phase {
   // Decide how to build a call.
   // The profile factor is a discount to apply to this site's interp. profile.
   CallGenerator*    call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor);
+  bool should_delay_inlining(ciMethod* call_method, JVMState* jvms);
 
   // Report if there were too many traps at a current method and bci.
   // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
@@ -495,6 +502,11 @@ class Compile : public Phase {
   void          set_initial_gvn(PhaseGVN *gvn)           { _initial_gvn = gvn; }
   void          set_for_igvn(Unique_Node_List *for_igvn) { _for_igvn = for_igvn; }
 
+  // Replace n by nn using initial_gvn, calling hash_delete and
+  // record_for_igvn as needed.
+  void gvn_replace_by(Node* n, Node* nn);
+
+
   void              identify_useful_nodes(Unique_Node_List &useful);
   void              remove_useless_nodes  (Unique_Node_List &useful);
 
@@ -502,6 +514,9 @@ class Compile : public Phase {
   void          set_warm_calls(WarmCallInfo* l) { _warm_calls = l; }
   WarmCallInfo* pop_warm_call();
 
+  // Record this CallGenerator for inlining at the end of parsing.
+  void              add_late_inline(CallGenerator* cg) { _late_inlines.push(cg); }
+
   // Matching, CFG layout, allocation, code generation
   PhaseCFG*         cfg()                       { return _cfg; }
   bool              select_24_bit_instr() const { return _select_24_bit_instr; }
diff --git a/src/share/vm/opto/doCall.cpp b/src/share/vm/opto/doCall.cpp
index 5104e648a..fff6eedda 100644
--- a/src/share/vm/opto/doCall.cpp
+++ b/src/share/vm/opto/doCall.cpp
@@ -128,6 +128,12 @@ CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index,
 
       if (allow_inline) {
         CallGenerator* cg = CallGenerator::for_inline(call_method, expected_uses);
+        if (require_inline && cg != NULL && should_delay_inlining(call_method, jvms)) {
+          // Delay the inlining of this method to give us the
+          // opportunity to perform some high level optimizations
+          // first.
+          return CallGenerator::for_late_inline(call_method, cg);
+        }
         if (cg == NULL) {
           // Fall through.
         } else if (require_inline || !InlineWarmCalls) {
@@ -225,10 +231,63 @@ CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index,
   } else {
     // Class Hierarchy Analysis or Type Profile reveals a unique target,
     // or it is a static or special call.
-    return CallGenerator::for_direct_call(call_method);
+    return CallGenerator::for_direct_call(call_method, should_delay_inlining(call_method, jvms));
   }
 }
 
+// Return true for methods that shouldn't be inlined early so that
+// they are easier to analyze and optimize as intrinsics.
+bool Compile::should_delay_inlining(ciMethod* call_method, JVMState* jvms) {
+  if (has_stringbuilder()) {
+
+    if ((call_method->holder() == C->env()->StringBuilder_klass() ||
+         call_method->holder() == C->env()->StringBuffer_klass()) &&
+        (jvms->method()->holder() == C->env()->StringBuilder_klass() ||
+         jvms->method()->holder() == C->env()->StringBuffer_klass())) {
+      // Delay SB calls only when called from non-SB code
+      return false;
+    }
+
+    switch (call_method->intrinsic_id()) {
+      case vmIntrinsics::_StringBuilder_void:
+      case vmIntrinsics::_StringBuilder_int:
+      case vmIntrinsics::_StringBuilder_String:
+      case vmIntrinsics::_StringBuilder_append_char:
+      case vmIntrinsics::_StringBuilder_append_int:
+      case vmIntrinsics::_StringBuilder_append_String:
+      case vmIntrinsics::_StringBuilder_toString:
+      case vmIntrinsics::_StringBuffer_void:
+      case vmIntrinsics::_StringBuffer_int:
+      case vmIntrinsics::_StringBuffer_String:
+      case vmIntrinsics::_StringBuffer_append_char:
+      case vmIntrinsics::_StringBuffer_append_int:
+      case vmIntrinsics::_StringBuffer_append_String:
+      case vmIntrinsics::_StringBuffer_toString:
+      case vmIntrinsics::_Integer_toString:
+        return true;
+
+      case vmIntrinsics::_String_String:
+        {
+          Node* receiver = jvms->map()->in(jvms->argoff() + 1);
+          if (receiver->is_Proj() && receiver->in(0)->is_CallStaticJava()) {
+            CallStaticJavaNode* csj = receiver->in(0)->as_CallStaticJava();
+            ciMethod* m = csj->method();
+            if (m != NULL &&
+                (m->intrinsic_id() == vmIntrinsics::_StringBuffer_toString ||
+                 m->intrinsic_id() == vmIntrinsics::_StringBuilder_toString))
+              // Delay String.<init>(new SB())
+              return true;
+          }
+          return false;
+        }
+
+      default:
+        return false;
+    }
+  }
+  return false;
+}
+
 
 // uncommon-trap call-sites where callee is unloaded, uninitialized or will not link
 bool Parse::can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass* klass) {
diff --git a/src/share/vm/opto/escape.cpp b/src/share/vm/opto/escape.cpp
index 478a96a75..b560151bf 100644
--- a/src/share/vm/opto/escape.cpp
+++ b/src/share/vm/opto/escape.cpp
@@ -543,6 +543,7 @@ bool ConnectionGraph::split_AddP(Node *addp, Node *base,  PhaseGVN  *igvn) {
   int alias_idx = _compile->get_alias_index(tinst);
   igvn->set_type(addp, tinst);
   // record the allocation in the node map
+  assert(ptnode_adr(addp->_idx)->_node != NULL, "should be registered");
   set_map(addp->_idx, get_map(base->_idx));
 
   // Set addp's Base and Address to 'base'.
@@ -618,9 +619,14 @@ PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, Gro
   const TypePtr *atype = C->get_adr_type(alias_idx);
   result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype);
   C->copy_node_notes_to(result, orig_phi);
-  set_map_phi(orig_phi->_idx, result);
   igvn->set_type(result, result->bottom_type());
   record_for_optimizer(result);
+
+  debug_only(Node* pn = ptnode_adr(orig_phi->_idx)->_node;)
+  assert(pn == NULL || pn == orig_phi, "wrong node");
+  set_map(orig_phi->_idx, result);
+  ptnode_adr(orig_phi->_idx)->_node = orig_phi;
+
   new_created = true;
   return result;
 }
@@ -711,6 +717,81 @@ static Node *step_through_mergemem(MergeMemNode *mmem, int alias_idx, const Type
 }
 
 //
+// Move memory users to their memory slices.
+//
+void ConnectionGraph::move_inst_mem(Node* n, GrowableArray<PhiNode *>  &orig_phis, PhaseGVN *igvn) {
+  Compile* C = _compile;
+
+  const TypePtr* tp = igvn->type(n->in(MemNode::Address))->isa_ptr();
+  assert(tp != NULL, "ptr type");
+  int alias_idx = C->get_alias_index(tp);
+  int general_idx = C->get_general_index(alias_idx);
+
+  // Move users first
+  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+    Node* use = n->fast_out(i);
+    if (use->is_MergeMem()) {
+      MergeMemNode* mmem = use->as_MergeMem();
+      assert(n == mmem->memory_at(alias_idx), "should be on instance memory slice");
+      if (n != mmem->memory_at(general_idx) || alias_idx == general_idx) {
+        continue; // Nothing to do
+      }
+      // Replace previous general reference to mem node.
+      uint orig_uniq = C->unique();
+      Node* m = find_inst_mem(n, general_idx, orig_phis, igvn);
+      assert(orig_uniq == C->unique(), "no new nodes");
+      mmem->set_memory_at(general_idx, m);
+      --imax;
+      --i;
+    } else if (use->is_MemBar()) {
+      assert(!use->is_Initialize(), "initializing stores should not be moved");
+      if (use->req() > MemBarNode::Precedent &&
+          use->in(MemBarNode::Precedent) == n) {
+        // Don't move related membars.
+        record_for_optimizer(use);
+        continue;
+      }
+      tp = use->as_MemBar()->adr_type()->isa_ptr();
+      if (tp != NULL && C->get_alias_index(tp) == alias_idx ||
+          alias_idx == general_idx) {
+        continue; // Nothing to do
+      }
+      // Move to general memory slice.
+      uint orig_uniq = C->unique();
+      Node* m = find_inst_mem(n, general_idx, orig_phis, igvn);
+      assert(orig_uniq == C->unique(), "no new nodes");
+      igvn->hash_delete(use);
+      imax -= use->replace_edge(n, m);
+      igvn->hash_insert(use);
+      record_for_optimizer(use);
+      --i;
+#ifdef ASSERT
+    } else if (use->is_Mem()) {
+      if (use->Opcode() == Op_StoreCM && use->in(MemNode::OopStore) == n) {
+        // Don't move related cardmark.
+        continue;
+      }
+      // Memory nodes should have new memory input.
+      tp = igvn->type(use->in(MemNode::Address))->isa_ptr();
+      assert(tp != NULL, "ptr type");
+      int idx = C->get_alias_index(tp);
+      assert(get_map(use->_idx) != NULL || idx == alias_idx,
+             "Following memory nodes should have new memory input or be on the same memory slice");
+    } else if (use->is_Phi()) {
+      // Phi nodes should be split and moved already.
+      tp = use->as_Phi()->adr_type()->isa_ptr();
+      assert(tp != NULL, "ptr type");
+      int idx = C->get_alias_index(tp);
+      assert(idx == alias_idx, "Following Phi nodes should be on the same memory slice");
+    } else {
+      use->dump();
+      assert(false, "should not be here");
+#endif
+    }
+  }
+}
+
+//
 // Search memory chain of "mem" to find a MemNode whose address
 // is the specified alias index.
 //
@@ -775,10 +856,18 @@ Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArra
                C->get_alias_index(result->as_Phi()->adr_type()) != alias_idx) {
       Node *un = result->as_Phi()->unique_input(phase);
       if (un != NULL) {
+        orig_phis.append_if_missing(result->as_Phi());
         result = un;
       } else {
         break;
       }
+    } else if (result->is_ClearArray()) {
+      if (!ClearArrayNode::step_through(&result, (uint)tinst->instance_id(), phase)) {
+        // Can not bypass initialization of the instance
+        // we are looking for.
+        break;
+      }
+      // Otherwise skip it (the call updated 'result' value).
     } else if (result->Opcode() == Op_SCMemProj) {
       assert(result->in(0)->is_LoadStore(), "sanity");
       const Type *at = phase->type(result->in(0)->in(MemNode::Address));
@@ -808,7 +897,6 @@ Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArra
   return result;
 }
 
-
 //
 //  Convert the types of unescaped object to instance types where possible,
 //  propagate the new type information through the graph, and update memory
@@ -900,12 +988,13 @@ Node* ConnectionGraph::find_inst_mem(Node *orig_mem, int alias_idx, GrowableArra
 //
 void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist) {
   GrowableArray<Node *>  memnode_worklist;
-  GrowableArray<Node *>  mergemem_worklist;
   GrowableArray<PhiNode *>  orig_phis;
+
   PhaseGVN  *igvn = _compile->initial_gvn();
   uint new_index_start = (uint) _compile->num_alias_types();
-  VectorSet visited(Thread::current()->resource_area());
-  VectorSet ptset(Thread::current()->resource_area());
+  Arena* arena = Thread::current()->resource_area();
+  VectorSet visited(arena);
+  VectorSet ptset(arena);
 
 
   //  Phase 1:  Process possible allocations from alloc_worklist.
@@ -981,6 +1070,8 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
       //   - non-escaping
       //   - eligible to be a unique type
       //   - not determined to be ineligible by escape analysis
+      assert(ptnode_adr(alloc->_idx)->_node != NULL &&
+             ptnode_adr(n->_idx)->_node != NULL, "should be registered");
       set_map(alloc->_idx, n);
       set_map(n->_idx, alloc);
       const TypeOopPtr *t = igvn->type(n)->isa_oopptr();
@@ -1025,7 +1116,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
               alloc_worklist.append_if_missing(addp2);
             }
             alloc_worklist.append_if_missing(use);
-          } else if (use->is_Initialize()) {
+          } else if (use->is_MemBar()) {
             memnode_worklist.append_if_missing(use);
           }
         }
@@ -1035,10 +1126,12 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
       PointsTo(ptset, get_addp_base(n), igvn);
       assert(ptset.Size() == 1, "AddP address is unique");
       uint elem = ptset.getelem(); // Allocation node's index
-      if (elem == _phantom_object)
+      if (elem == _phantom_object) {
+        assert(false, "escaped allocation");
         continue; // Assume the value was set outside this method.
+      }
       Node *base = get_map(elem);  // CheckCastPP node
-      if (!split_AddP(n, base, igvn)) continue; // wrong type
+      if (!split_AddP(n, base, igvn)) continue; // wrong type from dead path
       tinst = igvn->type(base)->isa_oopptr();
     } else if (n->is_Phi() ||
                n->is_CheckCastPP() ||
@@ -1053,8 +1146,10 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
       PointsTo(ptset, n, igvn);
       if (ptset.Size() == 1) {
         uint elem = ptset.getelem(); // Allocation node's index
-        if (elem == _phantom_object)
+        if (elem == _phantom_object) {
+          assert(false, "escaped allocation");
           continue; // Assume the value was set outside this method.
+        }
         Node *val = get_map(elem);   // CheckCastPP node
         TypeNode *tn = n->as_Type();
         tinst = igvn->type(val)->isa_oopptr();
@@ -1069,8 +1164,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
           tn_t = tn_type->isa_oopptr();
         }
 
-        if (tn_t != NULL &&
-            tinst->cast_to_instance_id(TypeOopPtr::InstanceBot)->higher_equal(tn_t)) {
+        if (tn_t != NULL && tinst->klass()->is_subtype_of(tn_t->klass())) {
           if (tn_type->isa_narrowoop()) {
             tn_type = tinst->make_narrowoop();
           } else {
@@ -1082,33 +1176,25 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
           igvn->hash_insert(tn);
           record_for_optimizer(n);
         } else {
-          continue; // wrong type
+          assert(tn_type == TypePtr::NULL_PTR ||
+                 tn_t != NULL && !tinst->klass()->is_subtype_of(tn_t->klass()),
+                 "unexpected type");
+          continue; // Skip dead path with different type
         }
       }
     } else {
+      debug_only(n->dump();)
+      assert(false, "EA: unexpected node");
       continue;
     }
-    // push users on appropriate worklist
+    // push allocation's users on appropriate worklist
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node *use = n->fast_out(i);
       if(use->is_Mem() && use->in(MemNode::Address) == n) {
+        // Load/store to instance's field
         memnode_worklist.append_if_missing(use);
-      } else if (use->is_Initialize()) {
+      } else if (use->is_MemBar()) {
         memnode_worklist.append_if_missing(use);
-      } else if (use->is_MergeMem()) {
-        mergemem_worklist.append_if_missing(use);
-      } else if (use->is_SafePoint() && tinst != NULL) {
-        // Look for MergeMem nodes for calls which reference unique allocation
-        // (through CheckCastPP nodes) even for debug info.
-        Node* m = use->in(TypeFunc::Memory);
-        uint iid = tinst->instance_id();
-        while (m->is_Proj() && m->in(0)->is_SafePoint() &&
-               m->in(0) != use && !m->in(0)->_idx != iid) {
-          m = m->in(0)->in(TypeFunc::Memory);
-        }
-        if (m->is_MergeMem()) {
-          mergemem_worklist.append_if_missing(m);
-        }
       } else if (use->is_AddP() && use->outcnt() > 0) { // No dead nodes
         Node* addp2 = find_second_addp(use, n);
         if (addp2 != NULL) {
@@ -1121,6 +1207,29 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
                  use->is_DecodeN() ||
                  (use->is_ConstraintCast() && use->Opcode() == Op_CastPP)) {
         alloc_worklist.append_if_missing(use);
+#ifdef ASSERT
+      } else if (use->is_Mem()) {
+        assert(use->in(MemNode::Address) != n, "EA: missing allocation reference path");
+      } else if (use->is_MergeMem()) {
+        assert(_mergemem_worklist.contains(use->as_MergeMem()), "EA: missing MergeMem node in the worklist");
+      } else if (use->is_SafePoint()) {
+        // Look for MergeMem nodes for calls which reference unique allocation
+        // (through CheckCastPP nodes) even for debug info.
+        Node* m = use->in(TypeFunc::Memory);
+        if (m->is_MergeMem()) {
+          assert(_mergemem_worklist.contains(m->as_MergeMem()), "EA: missing MergeMem node in the worklist");
+        }
+      } else {
+        uint op = use->Opcode();
+        if (!(op == Op_CmpP || op == Op_Conv2B ||
+              op == Op_CastP2X || op == Op_StoreCM ||
+              op == Op_FastLock || op == Op_AryEq || op == Op_StrComp ||
+              op == Op_StrEquals || op == Op_StrIndexOf)) {
+          n->dump();
+          use->dump();
+          assert(false, "EA: missing allocation reference path");
+        }
+#endif
       }
     }
 
@@ -1138,13 +1247,11 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
     Node *n = memnode_worklist.pop();
     if (visited.test_set(n->_idx))
       continue;
-    if (n->is_Phi()) {
-      assert(n->as_Phi()->adr_type() != TypePtr::BOTTOM, "narrow memory slice required");
-      // we don't need to do anything, but the users must be pushed if we haven't processed
-      // this Phi before
-    } else if (n->is_Initialize()) {
-      // we don't need to do anything, but the users of the memory projection must be pushed
-      n = n->as_Initialize()->proj_out(TypeFunc::Memory);
+    if (n->is_Phi() || n->is_ClearArray()) {
+      // we don't need to do anything, but the users must be pushed
+    } else if (n->is_MemBar()) { // Initialize, MemBar nodes
+      // we don't need to do anything, but the users must be pushed
+      n = n->as_MemBar()->proj_out(TypeFunc::Memory);
       if (n == NULL)
         continue;
     } else {
@@ -1161,6 +1268,10 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
         return;
       }
       if (mem != n->in(MemNode::Memory)) {
+        // We delay the memory edge update since we need old one in
+        // MergeMem code below when instances memory slices are separated.
+        debug_only(Node* pn = ptnode_adr(n->_idx)->_node;)
+        assert(pn == NULL || pn == n, "wrong node");
         set_map(n->_idx, mem);
         ptnode_adr(n->_idx)->_node = n;
       }
@@ -1181,36 +1292,55 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
     // push user on appropriate worklist
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node *use = n->fast_out(i);
-      if (use->is_Phi()) {
+      if (use->is_Phi() || use->is_ClearArray()) {
         memnode_worklist.append_if_missing(use);
       } else if(use->is_Mem() && use->in(MemNode::Memory) == n) {
+        if (use->Opcode() == Op_StoreCM) // Ignore cardmark stores
+          continue;
         memnode_worklist.append_if_missing(use);
-      } else if (use->is_Initialize()) {
+      } else if (use->is_MemBar()) {
         memnode_worklist.append_if_missing(use);
+#ifdef ASSERT
+      } else if(use->is_Mem()) {
+        assert(use->in(MemNode::Memory) != n, "EA: missing memory path");
       } else if (use->is_MergeMem()) {
-        mergemem_worklist.append_if_missing(use);
+        assert(_mergemem_worklist.contains(use->as_MergeMem()), "EA: missing MergeMem node in the worklist");
+      } else {
+        uint op = use->Opcode();
+        if (!(op == Op_StoreCM ||
+              (op == Op_CallLeaf && use->as_CallLeaf()->_name != NULL &&
+               strcmp(use->as_CallLeaf()->_name, "g1_wb_pre") == 0) ||
+              op == Op_AryEq || op == Op_StrComp ||
+              op == Op_StrEquals || op == Op_StrIndexOf)) {
+          n->dump();
+          use->dump();
+          assert(false, "EA: missing memory path");
+        }
+#endif
       }
     }
   }
 
   //  Phase 3:  Process MergeMem nodes from mergemem_worklist.
-  //            Walk each memory moving the first node encountered of each
+  //            Walk each memory slice moving the first node encountered of each
   //            instance type to the the input corresponding to its alias index.
-  while (mergemem_worklist.length() != 0) {
-    Node *n = mergemem_worklist.pop();
-    assert(n->is_MergeMem(), "MergeMem node required.");
-    if (visited.test_set(n->_idx))
-      continue;
-    MergeMemNode *nmm = n->as_MergeMem();
+  uint length = _mergemem_worklist.length();
+  for( uint next = 0; next < length; ++next ) {
+    MergeMemNode* nmm = _mergemem_worklist.at(next);
+    assert(!visited.test_set(nmm->_idx), "should not be visited before");
     // Note: we don't want to use MergeMemStream here because we only want to
-    //  scan inputs which exist at the start, not ones we add during processing.
-    uint nslices = nmm->req();
+    // scan inputs which exist at the start, not ones we add during processing.
+    // Note 2: MergeMem may already contains instance memory slices added
+    // during find_inst_mem() call when memory nodes were processed above.
     igvn->hash_delete(nmm);
+    uint nslices = nmm->req();
     for (uint i = Compile::AliasIdxRaw+1; i < nslices; i++) {
       Node* mem = nmm->in(i);
       Node* cur = NULL;
       if (mem == NULL || mem->is_top())
         continue;
+      // First, update mergemem by moving memory nodes to corresponding slices
+      // if their type became more precise since this mergemem was created.
       while (mem->is_Mem()) {
         const Type *at = igvn->type(mem->in(MemNode::Address));
         if (at != Type::TOP) {
@@ -1229,7 +1359,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
       }
       nmm->set_memory_at(i, (cur != NULL) ? cur : mem);
       // Find any instance of the current type if we haven't encountered
-      // a value of the instance along the chain.
+      // already a memory slice of the instance along the memory chain.
       for (uint ni = new_index_start; ni < new_index_end; ni++) {
         if((uint)_compile->get_general_index(ni) == i) {
           Node *m = (ni >= nmm->req()) ? nmm->empty_memory() : nmm->in(ni);
@@ -1245,11 +1375,11 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
     }
     // Find the rest of instances values
     for (uint ni = new_index_start; ni < new_index_end; ni++) {
-      const TypeOopPtr *tinst = igvn->C->get_adr_type(ni)->isa_oopptr();
+      const TypeOopPtr *tinst = _compile->get_adr_type(ni)->isa_oopptr();
       Node* result = step_through_mergemem(nmm, ni, tinst);
       if (result == nmm->base_memory()) {
         // Didn't find instance memory, search through general slice recursively.
-        result = nmm->memory_at(igvn->C->get_general_index(ni));
+        result = nmm->memory_at(_compile->get_general_index(ni));
         result = find_inst_mem(result, ni, orig_phis, igvn);
         if (_compile->failing()) {
           return;
@@ -1259,41 +1389,6 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
     }
     igvn->hash_insert(nmm);
     record_for_optimizer(nmm);
-
-    // Propagate new memory slices to following MergeMem nodes.
-    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
-      Node *use = n->fast_out(i);
-      if (use->is_Call()) {
-        CallNode* in = use->as_Call();
-        if (in->proj_out(TypeFunc::Memory) != NULL) {
-          Node* m = in->proj_out(TypeFunc::Memory);
-          for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
-            Node* mm = m->fast_out(j);
-            if (mm->is_MergeMem()) {
-              mergemem_worklist.append_if_missing(mm);
-            }
-          }
-        }
-        if (use->is_Allocate()) {
-          use = use->as_Allocate()->initialization();
-          if (use == NULL) {
-            continue;
-          }
-        }
-      }
-      if (use->is_Initialize()) {
-        InitializeNode* in = use->as_Initialize();
-        if (in->proj_out(TypeFunc::Memory) != NULL) {
-          Node* m = in->proj_out(TypeFunc::Memory);
-          for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
-            Node* mm = m->fast_out(j);
-            if (mm->is_MergeMem()) {
-              mergemem_worklist.append_if_missing(mm);
-            }
-          }
-        }
-      }
-    }
   }
 
   //  Phase 4:  Update the inputs of non-instance memory Phis and
@@ -1322,19 +1417,48 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist)
   }
 
   // Update the memory inputs of MemNodes with the value we computed
-  // in Phase 2.
+  // in Phase 2 and move stores memory users to corresponding memory slices.
+#ifdef ASSERT
+  visited.Clear();
+  Node_Stack old_mems(arena, _compile->unique() >> 2);
+#endif
   for (uint i = 0; i < nodes_size(); i++) {
     Node *nmem = get_map(i);
     if (nmem != NULL) {
       Node *n = ptnode_adr(i)->_node;
-      if (n != NULL && n->is_Mem()) {
+      assert(n != NULL, "sanity");
+      if (n->is_Mem()) {
+#ifdef ASSERT
+        Node* old_mem = n->in(MemNode::Memory);
+        if (!visited.test_set(old_mem->_idx)) {
+          old_mems.push(old_mem, old_mem->outcnt());
+        }
+#endif
+        assert(n->in(MemNode::Memory) != nmem, "sanity");
+        if (!n->is_Load()) {
+          // Move memory users of a store first.
+          move_inst_mem(n, orig_phis, igvn);
+        }
+        // Now update memory input
         igvn->hash_delete(n);
         n->set_req(MemNode::Memory, nmem);
         igvn->hash_insert(n);
         record_for_optimizer(n);
+      } else {
+        assert(n->is_Allocate() || n->is_CheckCastPP() ||
+               n->is_AddP() || n->is_Phi(), "unknown node used for set_map()");
       }
     }
   }
+#ifdef ASSERT
+  // Verify that memory was split correctly
+  while (old_mems.is_nonempty()) {
+    Node* old_mem = old_mems.node();
+    uint  old_cnt = old_mems.index();
+    old_mems.pop();
+    assert(old_cnt = old_mem->outcnt(), "old mem could be lost");
+  }
+#endif
 }
 
 bool ConnectionGraph::has_candidates(Compile *C) {
@@ -1381,8 +1505,20 @@ bool ConnectionGraph::compute_escape() {
         ptnode_adr(n->_idx)->node_type() == PointsToNode::JavaObject) {
       has_allocations = true;
     }
-    if(n->is_AddP())
-      cg_worklist.append(n->_idx);
+    if(n->is_AddP()) {
+      // Collect address nodes which directly reference an allocation.
+      // Use them during stage 3 below to build initial connection graph
+      // field edges. Other field edges could be added after StoreP/LoadP
+      // nodes are processed during stage 4 below.
+      Node* base = get_addp_base(n);
+      if(base->is_Proj() && base->in(0)->is_Allocate()) {
+        cg_worklist.append(n->_idx);
+      }
+    } else if (n->is_MergeMem()) {
+      // Collect all MergeMem nodes to add memory slices for
+      // scalar replaceable objects in split_unique_types().
+      _mergemem_worklist.append(n->as_MergeMem());
+    }
     for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
       Node* m = n->fast_out(i);   // Get user
       worklist_init.push(m);
@@ -1423,12 +1559,13 @@ bool ConnectionGraph::compute_escape() {
     }
   }
 
-  VectorSet ptset(Thread::current()->resource_area());
+  Arena* arena = Thread::current()->resource_area();
+  VectorSet ptset(arena);
   GrowableArray<uint>  deferred_edges;
-  VectorSet visited(Thread::current()->resource_area());
+  VectorSet visited(arena);
 
-  // 5. Remove deferred edges from the graph and collect
-  //    information needed for type splitting.
+  // 5. Remove deferred edges from the graph and adjust
+  //    escape state of nonescaping objects.
   cg_length = cg_worklist.length();
   for( uint next = 0; next < cg_length; ++next ) {
     int ni = cg_worklist.at(next);
@@ -1438,98 +1575,9 @@ bool ConnectionGraph::compute_escape() {
       remove_deferred(ni, &deferred_edges, &visited);
       Node *n = ptn->_node;
       if (n->is_AddP()) {
-        // Search for objects which are not scalar replaceable.
-        // Mark their escape state as ArgEscape to propagate the state
-        // to referenced objects.
-        // Note: currently there are no difference in compiler optimizations
-        // for ArgEscape objects and NoEscape objects which are not
-        // scalar replaceable.
-
-        int offset = ptn->offset();
-        Node *base = get_addp_base(n);
-        ptset.Clear();
-        PointsTo(ptset, base, igvn);
-        int ptset_size = ptset.Size();
-
-        // Check if a field's initializing value is recorded and add
-        // a corresponding NULL field's value if it is not recorded.
-        // Connection Graph does not record a default initialization by NULL
-        // captured by Initialize node.
-        //
-        // Note: it will disable scalar replacement in some cases:
-        //
-        //    Point p[] = new Point[1];
-        //    p[0] = new Point(); // Will be not scalar replaced
-        //
-        // but it will save us from incorrect optimizations in next cases:
-        //
-        //    Point p[] = new Point[1];
-        //    if ( x ) p[0] = new Point(); // Will be not scalar replaced
-        //
-        // Without a control flow analysis we can't distinguish above cases.
-        //
-        if (offset != Type::OffsetBot && ptset_size == 1) {
-          uint elem = ptset.getelem(); // Allocation node's index
-          // It does not matter if it is not Allocation node since
-          // only non-escaping allocations are scalar replaced.
-          if (ptnode_adr(elem)->_node->is_Allocate() &&
-              ptnode_adr(elem)->escape_state() == PointsToNode::NoEscape) {
-            AllocateNode* alloc = ptnode_adr(elem)->_node->as_Allocate();
-            InitializeNode* ini = alloc->initialization();
-            Node* value = NULL;
-            if (ini != NULL) {
-              BasicType ft = UseCompressedOops ? T_NARROWOOP : T_OBJECT;
-              Node* store = ini->find_captured_store(offset, type2aelembytes(ft), igvn);
-              if (store != NULL && store->is_Store())
-                value = store->in(MemNode::ValueIn);
-            }
-            if (value == NULL || value != ptnode_adr(value->_idx)->_node) {
-              // A field's initializing value was not recorded. Add NULL.
-              uint null_idx = UseCompressedOops ? _noop_null : _oop_null;
-              add_pointsto_edge(ni, null_idx);
-            }
-          }
-        }
-
-        // An object is not scalar replaceable if the field which may point
-        // to it has unknown offset (unknown element of an array of objects).
-        //
-        if (offset == Type::OffsetBot) {
-          uint e_cnt = ptn->edge_count();
-          for (uint ei = 0; ei < e_cnt; ei++) {
-            uint npi = ptn->edge_target(ei);
-            set_escape_state(npi, PointsToNode::ArgEscape);
-            ptnode_adr(npi)->_scalar_replaceable = false;
-          }
-        }
-
-        // Currently an object is not scalar replaceable if a LoadStore node
-        // access its field since the field value is unknown after it.
-        //
-        bool has_LoadStore = false;
-        for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
-          Node *use = n->fast_out(i);
-          if (use->is_LoadStore()) {
-            has_LoadStore = true;
-            break;
-          }
-        }
-        // An object is not scalar replaceable if the address points
-        // to unknown field (unknown element for arrays, offset is OffsetBot).
-        //
-        // Or the address may point to more then one object. This may produce
-        // the false positive result (set scalar_replaceable to false)
-        // since the flow-insensitive escape analysis can't separate
-        // the case when stores overwrite the field's value from the case
-        // when stores happened on different control branches.
-        //
-        if (ptset_size > 1 || ptset_size != 0 &&
-            (has_LoadStore || offset == Type::OffsetBot)) {
-          for( VectorSetI j(&ptset); j.test(); ++j ) {
-            set_escape_state(j.elem, PointsToNode::ArgEscape);
-            ptnode_adr(j.elem)->_scalar_replaceable = false;
-          }
-        }
+        // Search for objects which are not scalar replaceable
+        // and adjust their escape state.
+        verify_escape_state(ni, ptset, igvn);
       }
     }
   }
@@ -1646,6 +1694,150 @@ bool ConnectionGraph::compute_escape() {
   return has_non_escaping_obj;
 }
 
+// Search for objects which are not scalar replaceable.
+void ConnectionGraph::verify_escape_state(int nidx, VectorSet& ptset, PhaseTransform* phase) {
+  PointsToNode* ptn = ptnode_adr(nidx);
+  Node* n = ptn->_node;
+  assert(n->is_AddP(), "Should be called for AddP nodes only");
+  // Search for objects which are not scalar replaceable.
+  // Mark their escape state as ArgEscape to propagate the state
+  // to referenced objects.
+  // Note: currently there are no difference in compiler optimizations
+  // for ArgEscape objects and NoEscape objects which are not
+  // scalar replaceable.
+
+  Compile* C = _compile;
+
+  int offset = ptn->offset();
+  Node* base = get_addp_base(n);
+  ptset.Clear();
+  PointsTo(ptset, base, phase);
+  int ptset_size = ptset.Size();
+
+  // Check if a oop field's initializing value is recorded and add
+  // a corresponding NULL field's value if it is not recorded.
+  // Connection Graph does not record a default initialization by NULL
+  // captured by Initialize node.
+  //
+  // Note: it will disable scalar replacement in some cases:
+  //
+  //    Point p[] = new Point[1];
+  //    p[0] = new Point(); // Will be not scalar replaced
+  //
+  // but it will save us from incorrect optimizations in next cases:
+  //
+  //    Point p[] = new Point[1];
+  //    if ( x ) p[0] = new Point(); // Will be not scalar replaced
+  //
+  // Do a simple control flow analysis to distinguish above cases.
+  //
+  if (offset != Type::OffsetBot && ptset_size == 1) {
+    uint elem = ptset.getelem(); // Allocation node's index
+    // It does not matter if it is not Allocation node since
+    // only non-escaping allocations are scalar replaced.
+    if (ptnode_adr(elem)->_node->is_Allocate() &&
+        ptnode_adr(elem)->escape_state() == PointsToNode::NoEscape) {
+      AllocateNode* alloc = ptnode_adr(elem)->_node->as_Allocate();
+      InitializeNode* ini = alloc->initialization();
+
+      // Check only oop fields.
+      const Type* adr_type = n->as_AddP()->bottom_type();
+      BasicType basic_field_type = T_INT;
+      if (adr_type->isa_instptr()) {
+        ciField* field = C->alias_type(adr_type->isa_instptr())->field();
+        if (field != NULL) {
+          basic_field_type = field->layout_type();
+        } else {
+          // Ignore non field load (for example, klass load)
+        }
+      } else if (adr_type->isa_aryptr()) {
+        const Type* elemtype = adr_type->isa_aryptr()->elem();
+        basic_field_type = elemtype->array_element_basic_type();
+      } else {
+        // Raw pointers are used for initializing stores so skip it.
+        assert(adr_type->isa_rawptr() && base->is_Proj() &&
+               (base->in(0) == alloc),"unexpected pointer type");
+      }
+      if (basic_field_type == T_OBJECT ||
+          basic_field_type == T_NARROWOOP ||
+          basic_field_type == T_ARRAY) {
+        Node* value = NULL;
+        if (ini != NULL) {
+          BasicType ft = UseCompressedOops ? T_NARROWOOP : T_OBJECT;
+          Node* store = ini->find_captured_store(offset, type2aelembytes(ft), phase);
+          if (store != NULL && store->is_Store()) {
+            value = store->in(MemNode::ValueIn);
+          } else if (ptn->edge_count() > 0) { // Are there oop stores?
+            // Check for a store which follows allocation without branches.
+            // For example, a volatile field store is not collected
+            // by Initialize node. TODO: it would be nice to use idom() here.
+            for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+              store = n->fast_out(i);
+              if (store->is_Store() && store->in(0) != NULL) {
+                Node* ctrl = store->in(0);
+                while(!(ctrl == ini || ctrl == alloc || ctrl == NULL ||
+                        ctrl == C->root() || ctrl == C->top() || ctrl->is_Region() ||
+                        ctrl->is_IfTrue() || ctrl->is_IfFalse())) {
+                   ctrl = ctrl->in(0);
+                }
+                if (ctrl == ini || ctrl == alloc) {
+                  value = store->in(MemNode::ValueIn);
+                  break;
+                }
+              }
+            }
+          }
+        }
+        if (value == NULL || value != ptnode_adr(value->_idx)->_node) {
+          // A field's initializing value was not recorded. Add NULL.
+          uint null_idx = UseCompressedOops ? _noop_null : _oop_null;
+          add_pointsto_edge(nidx, null_idx);
+        }
+      }
+    }
+  }
+
+  // An object is not scalar replaceable if the field which may point
+  // to it has unknown offset (unknown element of an array of objects).
+  //
+  if (offset == Type::OffsetBot) {
+    uint e_cnt = ptn->edge_count();
+    for (uint ei = 0; ei < e_cnt; ei++) {
+      uint npi = ptn->edge_target(ei);
+      set_escape_state(npi, PointsToNode::ArgEscape);
+      ptnode_adr(npi)->_scalar_replaceable = false;
+    }
+  }
+
+  // Currently an object is not scalar replaceable if a LoadStore node
+  // access its field since the field value is unknown after it.
+  //
+  bool has_LoadStore = false;
+  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+    Node *use = n->fast_out(i);
+    if (use->is_LoadStore()) {
+      has_LoadStore = true;
+      break;
+    }
+  }
+  // An object is not scalar replaceable if the address points
+  // to unknown field (unknown element for arrays, offset is OffsetBot).
+  //
+  // Or the address may point to more then one object. This may produce
+  // the false positive result (set scalar_replaceable to false)
+  // since the flow-insensitive escape analysis can't separate
+  // the case when stores overwrite the field's value from the case
+  // when stores happened on different control branches.
+  //
+  if (ptset_size > 1 || ptset_size != 0 &&
+      (has_LoadStore || offset == Type::OffsetBot)) {
+    for( VectorSetI j(&ptset); j.test(); ++j ) {
+      set_escape_state(j.elem, PointsToNode::ArgEscape);
+      ptnode_adr(j.elem)->_scalar_replaceable = false;
+    }
+  }
+}
+
 void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *phase) {
 
     switch (call->Opcode()) {
@@ -1657,6 +1849,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *pha
       assert(false, "should be done already");
       break;
 #endif
+    case Op_CallLeaf:
     case Op_CallLeafNoFP:
     {
       // Stub calls, objects do not escape but they are not scale replaceable.
@@ -1667,9 +1860,23 @@ void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *pha
         const Type* at = d->field_at(i);
         Node *arg = call->in(i)->uncast();
         const Type *aat = phase->type(arg);
-        if (!arg->is_top() && at->isa_ptr() && aat->isa_ptr()) {
+        if (!arg->is_top() && at->isa_ptr() && aat->isa_ptr() &&
+            ptnode_adr(arg->_idx)->escape_state() < PointsToNode::ArgEscape) {
+
           assert(aat == Type::TOP || aat == TypePtr::NULL_PTR ||
                  aat->isa_ptr() != NULL, "expecting an Ptr");
+#ifdef ASSERT
+          if (!(call->Opcode() == Op_CallLeafNoFP &&
+                call->as_CallLeaf()->_name != NULL &&
+                (strstr(call->as_CallLeaf()->_name, "arraycopy")  != 0) ||
+                call->as_CallLeaf()->_name != NULL &&
+                (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
+                 strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
+          ) {
+            call->dump();
+            assert(false, "EA: unexpected CallLeaf");
+          }
+#endif
           set_escape_state(arg->_idx, PointsToNode::ArgEscape);
           if (arg->is_AddP()) {
             //
@@ -1706,9 +1913,10 @@ void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *pha
         for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
           const Type* at = d->field_at(i);
           int k = i - TypeFunc::Parms;
+          Node *arg = call->in(i)->uncast();
 
-          if (at->isa_oopptr() != NULL) {
-            Node *arg = call->in(i)->uncast();
+          if (at->isa_oopptr() != NULL &&
+              ptnode_adr(arg->_idx)->escape_state() < PointsToNode::ArgEscape) {
 
             bool global_escapes = false;
             bool fields_escapes = false;
@@ -1942,20 +2150,23 @@ void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase)
       record_for_optimizer(n);
       _processed.set(n->_idx);
     } else {
-      // Have to process call's arguments first.
+      // Don't mark as processed since call's arguments have to be processed.
       PointsToNode::NodeType nt = PointsToNode::UnknownType;
+      PointsToNode::EscapeState es = PointsToNode::UnknownEscape;
 
       // Check if a call returns an object.
       const TypeTuple *r = n->as_Call()->tf()->range();
-      if (n->is_CallStaticJava() && r->cnt() > TypeFunc::Parms &&
+      if (r->cnt() > TypeFunc::Parms &&
+          r->field_at(TypeFunc::Parms)->isa_ptr() &&
           n->as_Call()->proj_out(TypeFunc::Parms) != NULL) {
-        // Note:  use isa_ptr() instead of isa_oopptr() here because
-        //        the _multianewarray functions return a TypeRawPtr.
-        if (r->field_at(TypeFunc::Parms)->isa_ptr() != NULL) {
-          nt = PointsToNode::JavaObject;
+        nt = PointsToNode::JavaObject;
+        if (!n->is_CallStaticJava()) {
+          // Since the called mathod is statically unknown assume
+          // the worst case that the returned value globally escapes.
+          es = PointsToNode::GlobalEscape;
         }
       }
-      add_node(n, nt, PointsToNode::UnknownEscape, false);
+      add_node(n, nt, es, false);
     }
     return;
   }
@@ -2088,18 +2299,27 @@ void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase)
     }
     case Op_Proj:
     {
-      // we are only interested in the result projection from a call
+      // we are only interested in the oop result projection from a call
       if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() ) {
-        add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false);
-        process_call_result(n->as_Proj(), phase);
-        if (!_processed.test(n->_idx)) {
-          // The call's result may need to be processed later if the call
-          // returns it's argument and the argument is not processed yet.
-          _delayed_worklist.push(n);
+        const TypeTuple *r = n->in(0)->as_Call()->tf()->range();
+        assert(r->cnt() > TypeFunc::Parms, "sanity");
+        if (r->field_at(TypeFunc::Parms)->isa_ptr() != NULL) {
+          add_node(n, PointsToNode::LocalVar, PointsToNode::UnknownEscape, false);
+          int ti = n->in(0)->_idx;
+          // The call may not be registered yet (since not all its inputs are registered)
+          // if this is the projection from backbranch edge of Phi.
+          if (ptnode_adr(ti)->node_type() != PointsToNode::UnknownType) {
+            process_call_result(n->as_Proj(), phase);
+          }
+          if (!_processed.test(n->_idx)) {
+            // The call's result may need to be processed later if the call
+            // returns it's argument and the argument is not processed yet.
+            _delayed_worklist.push(n);
+          }
+          break;
         }
-      } else {
-        _processed.set(n->_idx);
       }
+      _processed.set(n->_idx);
       break;
     }
     case Op_Return:
@@ -2160,6 +2380,15 @@ void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase)
       }
       break;
     }
+    case Op_AryEq:
+    case Op_StrComp:
+    case Op_StrEquals:
+    case Op_StrIndexOf:
+    {
+      // char[] arrays passed to string intrinsics are not scalar replaceable.
+      add_node(n, PointsToNode::UnknownType, PointsToNode::UnknownEscape, false);
+      break;
+    }
     case Op_ThreadLocal:
     {
       add_node(n, PointsToNode::JavaObject, PointsToNode::ArgEscape, true);
@@ -2174,6 +2403,7 @@ void ConnectionGraph::record_for_escape_analysis(Node *n, PhaseTransform *phase)
 
 void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) {
   uint n_idx = n->_idx;
+  assert(ptnode_adr(n_idx)->_node != NULL, "node should be registered");
 
   // Don't set processed bit for AddP, LoadP, StoreP since
   // they may need more then one pass to process.
@@ -2211,6 +2441,7 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) {
     case Op_DecodeN:
     {
       int ti = n->in(1)->_idx;
+      assert(ptnode_adr(ti)->node_type() != PointsToNode::UnknownType, "all nodes should be registered");
       if (ptnode_adr(ti)->node_type() == PointsToNode::JavaObject) {
         add_pointsto_edge(n_idx, ti);
       } else {
@@ -2250,7 +2481,6 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) {
 #endif
 
       Node* adr = n->in(MemNode::Address)->uncast();
-      const Type *adr_type = phase->type(adr);
       Node* adr_base;
       if (adr->is_AddP()) {
         adr_base = get_addp_base(adr);
@@ -2302,13 +2532,19 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) {
     }
     case Op_Proj:
     {
-      // we are only interested in the result projection from a call
+      // we are only interested in the oop result projection from a call
       if (n->as_Proj()->_con == TypeFunc::Parms && n->in(0)->is_Call() ) {
-        process_call_result(n->as_Proj(), phase);
-        assert(_processed.test(n_idx), "all call results should be processed");
-      } else {
-        assert(false, "Op_Proj");
+        assert(ptnode_adr(n->in(0)->_idx)->node_type() != PointsToNode::UnknownType,
+               "all nodes should be registered");
+        const TypeTuple *r = n->in(0)->as_Call()->tf()->range();
+        assert(r->cnt() > TypeFunc::Parms, "sanity");
+        if (r->field_at(TypeFunc::Parms)->isa_ptr() != NULL) {
+          process_call_result(n->as_Proj(), phase);
+          assert(_processed.test(n_idx), "all call results should be processed");
+          break;
+        }
       }
+      assert(false, "Op_Proj");
       break;
     }
     case Op_Return:
@@ -2320,6 +2556,7 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) {
       }
 #endif
       int ti = n->in(TypeFunc::Parms)->_idx;
+      assert(ptnode_adr(ti)->node_type() != PointsToNode::UnknownType, "node should be registered");
       if (ptnode_adr(ti)->node_type() == PointsToNode::JavaObject) {
         add_pointsto_edge(n_idx, ti);
       } else {
@@ -2354,14 +2591,38 @@ void ConnectionGraph::build_connection_graph(Node *n, PhaseTransform *phase) {
       }
       break;
     }
+    case Op_AryEq:
+    case Op_StrComp:
+    case Op_StrEquals:
+    case Op_StrIndexOf:
+    {
+      // char[] arrays passed to string intrinsic do not escape but
+      // they are not scalar replaceable. Adjust escape state for them.
+      // Start from in(2) edge since in(1) is memory edge.
+      for (uint i = 2; i < n->req(); i++) {
+        Node* adr = n->in(i)->uncast();
+        const Type *at = phase->type(adr);
+        if (!adr->is_top() && at->isa_ptr()) {
+          assert(at == Type::TOP || at == TypePtr::NULL_PTR ||
+                 at->isa_ptr() != NULL, "expecting an Ptr");
+          if (adr->is_AddP()) {
+            adr = get_addp_base(adr);
+          }
+          // Mark as ArgEscape everything "adr" could point to.
+          set_escape_state(adr->_idx, PointsToNode::ArgEscape);
+        }
+      }
+      _processed.set(n_idx);
+      break;
+    }
     case Op_ThreadLocal:
     {
       assert(false, "Op_ThreadLocal");
       break;
     }
     default:
-      ;
-      // nothing to do
+      // This method should be called only for EA specific nodes.
+      ShouldNotReachHere();
   }
 }
 
diff --git a/src/share/vm/opto/escape.hpp b/src/share/vm/opto/escape.hpp
index 1ce0cc9cf..576043beb 100644
--- a/src/share/vm/opto/escape.hpp
+++ b/src/share/vm/opto/escape.hpp
@@ -210,6 +210,8 @@ private:
   Unique_Node_List  _delayed_worklist; // Nodes to be processed before
                                        // the call build_connection_graph().
 
+  GrowableArray<MergeMemNode *>  _mergemem_worklist; // List of all MergeMem nodes
+
   VectorSet                _processed; // Records which nodes have been
                                        // processed.
 
@@ -289,7 +291,7 @@ private:
   bool split_AddP(Node *addp, Node *base,  PhaseGVN  *igvn);
   PhiNode *create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn, bool &new_created);
   PhiNode *split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn);
-  Node *find_mem(Node *mem, int alias_idx, PhaseGVN  *igvn);
+  void  move_inst_mem(Node* n, GrowableArray<PhiNode *>  &orig_phis, PhaseGVN *igvn);
   Node *find_inst_mem(Node *mem, int alias_idx,GrowableArray<PhiNode *>  &orig_phi_worklist,  PhaseGVN  *igvn);
 
   // Propagate unique types created for unescaped allocated objects
@@ -298,7 +300,6 @@ private:
 
   // manage entries in _node_map
   void  set_map(int idx, Node *n)        { _node_map.map(idx, n); }
-  void  set_map_phi(int idx, PhiNode *p) { _node_map.map(idx, (Node *) p); }
   Node *get_map(int idx)                 { return _node_map[idx]; }
   PhiNode *get_map_phi(int idx) {
     Node *phi = _node_map[idx];
@@ -315,6 +316,9 @@ private:
   // Set the escape state of a node
   void set_escape_state(uint ni, PointsToNode::EscapeState es);
 
+  // Search for objects which are not scalar replaceable.
+  void verify_escape_state(int nidx, VectorSet& ptset, PhaseTransform* phase);
+
 public:
   ConnectionGraph(Compile *C);
 
diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp
index b63aae489..3a42be98b 100644
--- a/src/share/vm/opto/graphKit.cpp
+++ b/src/share/vm/opto/graphKit.cpp
@@ -1351,8 +1351,8 @@ void GraphKit::set_all_memory(Node* newmem) {
 }
 
 //------------------------------set_all_memory_call----------------------------
-void GraphKit::set_all_memory_call(Node* call) {
-  Node* newmem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+void GraphKit::set_all_memory_call(Node* call, bool separate_io_proj) {
+  Node* newmem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory, separate_io_proj) );
   set_all_memory(newmem);
 }
 
@@ -1573,7 +1573,7 @@ void GraphKit::set_arguments_for_java_call(CallJavaNode* call) {
 //---------------------------set_edges_for_java_call---------------------------
 // Connect a newly created call into the current JVMS.
 // A return value node (if any) is returned from set_edges_for_java_call.
-void GraphKit::set_edges_for_java_call(CallJavaNode* call, bool must_throw) {
+void GraphKit::set_edges_for_java_call(CallJavaNode* call, bool must_throw, bool separate_io_proj) {
 
   // Add the predefined inputs:
   call->init_req( TypeFunc::Control, control() );
@@ -1595,13 +1595,13 @@ void GraphKit::set_edges_for_java_call(CallJavaNode* call, bool must_throw) {
   // Re-use the current map to produce the result.
 
   set_control(_gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Control)));
-  set_i_o(    _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::I_O    )));
-  set_all_memory_call(xcall);
+  set_i_o(    _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::I_O    , separate_io_proj)));
+  set_all_memory_call(xcall, separate_io_proj);
 
   //return xcall;   // no need, caller already has it
 }
 
-Node* GraphKit::set_results_for_java_call(CallJavaNode* call) {
+Node* GraphKit::set_results_for_java_call(CallJavaNode* call, bool separate_io_proj) {
   if (stopped())  return top();  // maybe the call folded up?
 
   // Capture the return value, if any.
@@ -1614,8 +1614,15 @@ Node* GraphKit::set_results_for_java_call(CallJavaNode* call) {
   // Note:  Since any out-of-line call can produce an exception,
   // we always insert an I_O projection from the call into the result.
 
-  make_slow_call_ex(call, env()->Throwable_klass(), false);
+  make_slow_call_ex(call, env()->Throwable_klass(), separate_io_proj);
 
+  if (separate_io_proj) {
+    // The caller requested separate projections be used by the fall
+    // through and exceptional paths, so replace the projections for
+    // the fall through path.
+    set_i_o(_gvn.transform( new (C, 1) ProjNode(call, TypeFunc::I_O) ));
+    set_all_memory(_gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) ));
+  }
   return ret;
 }
 
@@ -1678,6 +1685,64 @@ void GraphKit::set_predefined_output_for_runtime_call(Node* call,
   }
 }
 
+
+// Replace the call with the current state of the kit.
+void GraphKit::replace_call(CallNode* call, Node* result) {
+  JVMState* ejvms = NULL;
+  if (has_exceptions()) {
+    ejvms = transfer_exceptions_into_jvms();
+  }
+
+  SafePointNode* final_state = stop();
+
+  // Find all the needed outputs of this call
+  CallProjections callprojs;
+  call->extract_projections(&callprojs, true);
+
+  // Replace all the old call edges with the edges from the inlining result
+  C->gvn_replace_by(callprojs.fallthrough_catchproj, final_state->in(TypeFunc::Control));
+  C->gvn_replace_by(callprojs.fallthrough_memproj,   final_state->in(TypeFunc::Memory));
+  C->gvn_replace_by(callprojs.fallthrough_ioproj,    final_state->in(TypeFunc::I_O));
+
+  // Replace the result with the new result if it exists and is used
+  if (callprojs.resproj != NULL && result != NULL) {
+    C->gvn_replace_by(callprojs.resproj, result);
+  }
+
+  if (ejvms == NULL) {
+    // No exception edges to simply kill off those paths
+    C->gvn_replace_by(callprojs.catchall_catchproj, C->top());
+    C->gvn_replace_by(callprojs.catchall_memproj,   C->top());
+    C->gvn_replace_by(callprojs.catchall_ioproj,    C->top());
+
+    // Replace the old exception object with top
+    if (callprojs.exobj != NULL) {
+      C->gvn_replace_by(callprojs.exobj, C->top());
+    }
+  } else {
+    GraphKit ekit(ejvms);
+
+    // Load my combined exception state into the kit, with all phis transformed:
+    SafePointNode* ex_map = ekit.combine_and_pop_all_exception_states();
+
+    Node* ex_oop = ekit.use_exception_state(ex_map);
+
+    C->gvn_replace_by(callprojs.catchall_catchproj, ekit.control());
+    C->gvn_replace_by(callprojs.catchall_memproj,   ekit.reset_memory());
+    C->gvn_replace_by(callprojs.catchall_ioproj,    ekit.i_o());
+
+    // Replace the old exception object with the newly created one
+    if (callprojs.exobj != NULL) {
+      C->gvn_replace_by(callprojs.exobj, ex_oop);
+    }
+  }
+
+  // Disconnect the call from the graph
+  call->disconnect_inputs(NULL);
+  C->gvn_replace_by(call, C->top());
+}
+
+
 //------------------------------increment_counter------------------------------
 // for statistics: increment a VM counter by 1
 
@@ -3459,4 +3524,3 @@ void GraphKit::g1_write_barrier_post(Node* oop_store,
   sync_kit(ideal);
 }
 #undef __
-
diff --git a/src/share/vm/opto/graphKit.hpp b/src/share/vm/opto/graphKit.hpp
index b127789b5..8135aca2d 100644
--- a/src/share/vm/opto/graphKit.hpp
+++ b/src/share/vm/opto/graphKit.hpp
@@ -279,6 +279,34 @@ class GraphKit : public Phase {
   }
   Node* basic_plus_adr(Node* base, Node* ptr, Node* offset);
 
+
+  // Some convenient shortcuts for common nodes
+  Node* IfTrue(IfNode* iff)                   { return _gvn.transform(new (C,1) IfTrueNode(iff));      }
+  Node* IfFalse(IfNode* iff)                  { return _gvn.transform(new (C,1) IfFalseNode(iff));     }
+
+  Node* AddI(Node* l, Node* r)                { return _gvn.transform(new (C,3) AddINode(l, r));       }
+  Node* SubI(Node* l, Node* r)                { return _gvn.transform(new (C,3) SubINode(l, r));       }
+  Node* MulI(Node* l, Node* r)                { return _gvn.transform(new (C,3) MulINode(l, r));       }
+  Node* DivI(Node* ctl, Node* l, Node* r)     { return _gvn.transform(new (C,3) DivINode(ctl, l, r));  }
+
+  Node* AndI(Node* l, Node* r)                { return _gvn.transform(new (C,3) AndINode(l, r));       }
+  Node* OrI(Node* l, Node* r)                 { return _gvn.transform(new (C,3) OrINode(l, r));        }
+  Node* XorI(Node* l, Node* r)                { return _gvn.transform(new (C,3) XorINode(l, r));       }
+
+  Node* MaxI(Node* l, Node* r)                { return _gvn.transform(new (C,3) MaxINode(l, r));       }
+  Node* MinI(Node* l, Node* r)                { return _gvn.transform(new (C,3) MinINode(l, r));       }
+
+  Node* LShiftI(Node* l, Node* r)             { return _gvn.transform(new (C,3) LShiftINode(l, r));    }
+  Node* RShiftI(Node* l, Node* r)             { return _gvn.transform(new (C,3) RShiftINode(l, r));    }
+  Node* URShiftI(Node* l, Node* r)            { return _gvn.transform(new (C,3) URShiftINode(l, r));   }
+
+  Node* CmpI(Node* l, Node* r)                { return _gvn.transform(new (C,3) CmpINode(l, r));       }
+  Node* CmpL(Node* l, Node* r)                { return _gvn.transform(new (C,3) CmpLNode(l, r));       }
+  Node* CmpP(Node* l, Node* r)                { return _gvn.transform(new (C,3) CmpPNode(l, r));       }
+  Node* Bool(Node* cmp, BoolTest::mask relop) { return _gvn.transform(new (C,2) BoolNode(cmp, relop)); }
+
+  Node* AddP(Node* b, Node* a, Node* o)       { return _gvn.transform(new (C,4) AddPNode(b, a, o));    }
+
   // Convert between int and long, and size_t.
   // (See macros ConvI2X, etc., in type.hpp for ConvI2X, etc.)
   Node* ConvI2L(Node* offset);
@@ -400,7 +428,7 @@ class GraphKit : public Phase {
   void set_all_memory(Node* newmem);
 
   // Create a memory projection from the call, then set_all_memory.
-  void set_all_memory_call(Node* call);
+  void set_all_memory_call(Node* call, bool separate_io_proj = false);
 
   // Create a LoadNode, reading from the parser's memory state.
   // (Note:  require_atomic_access is useful only with T_LONG.)
@@ -543,12 +571,12 @@ class GraphKit : public Phase {
   // Transform the call, and update the basics: control, i_o, memory.
   // (The next step is usually to call set_results_for_java_call.)
   void set_edges_for_java_call(CallJavaNode* call,
-                               bool must_throw = false);
+                               bool must_throw = false, bool separate_io_proj = false);
 
   // Finish up a java call that was started by set_edges_for_java_call.
   // Call add_exception on any throw arising from the call.
   // Return the call result (transformed).
-  Node* set_results_for_java_call(CallJavaNode* call);
+  Node* set_results_for_java_call(CallJavaNode* call, bool separate_io_proj = false);
 
   // Similar to set_edges_for_java_call, but simplified for runtime calls.
   void  set_predefined_output_for_runtime_call(Node* call) {
@@ -559,6 +587,11 @@ class GraphKit : public Phase {
                                                const TypePtr* hook_mem);
   Node* set_predefined_input_for_runtime_call(SafePointNode* call);
 
+  // Replace the call with the current state of the kit.  Requires
+  // that the call was generated with separate io_projs so that
+  // exceptional control flow can be handled properly.
+  void replace_call(CallNode* call, Node* result);
+
   // helper functions for statistics
   void increment_counter(address counter_addr);   // increment a debug counter
   void increment_counter(Node*   counter_addr);   // increment a debug counter
diff --git a/src/share/vm/opto/lcm.cpp b/src/share/vm/opto/lcm.cpp
index 31de55a54..0afe80c8f 100644
--- a/src/share/vm/opto/lcm.cpp
+++ b/src/share/vm/opto/lcm.cpp
@@ -616,8 +616,9 @@ bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, Vect
           assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark");
         }
       }
-      if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire &&
-          n->req() > TypeFunc::Parms ) {
+      if( n->is_Mach() && n->req() > TypeFunc::Parms &&
+          (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ||
+           n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) {
         // MemBarAcquire could be created without Precedent edge.
         // del_req() replaces the specified edge with the last input edge
         // and then removes the last edge. If the specified edge > number of
diff --git a/src/share/vm/opto/loopnode.cpp b/src/share/vm/opto/loopnode.cpp
index a1d872252..883fa75c7 100644
--- a/src/share/vm/opto/loopnode.cpp
+++ b/src/share/vm/opto/loopnode.cpp
@@ -1279,7 +1279,8 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
     // Visit all children, looking for Phis
     for (DUIterator i = cl->outs(); cl->has_out(i); i++) {
       Node *out = cl->out(i);
-      if (!out->is_Phi() || out == phi)  continue; // Looking for other phis
+      // Look for other phis (secondary IVs). Skip dead ones
+      if (!out->is_Phi() || out == phi || !phase->has_node(out)) continue;
       PhiNode* phi2 = out->as_Phi();
       Node *incr2 = phi2->in( LoopNode::LoopBackControl );
       // Look for induction variables of the form:  X += constant
diff --git a/src/share/vm/opto/macro.cpp b/src/share/vm/opto/macro.cpp
index e2421a7f3..2fdc335b9 100644
--- a/src/share/vm/opto/macro.cpp
+++ b/src/share/vm/opto/macro.cpp
@@ -316,6 +316,21 @@ static Node *scan_mem_chain(Node *mem, int alias_idx, int offset, Node *start_me
         assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw");
       }
       mem = mem->in(MemNode::Memory);
+    } else if (mem->is_ClearArray()) {
+      if (!ClearArrayNode::step_through(&mem, alloc->_idx, phase)) {
+        // Can not bypass initialization of the instance
+        // we are looking.
+        debug_only(intptr_t offset;)
+        assert(alloc == AllocateNode::Ideal_allocation(mem->in(3), phase, offset), "sanity");
+        InitializeNode* init = alloc->as_Allocate()->initialization();
+        // We are looking for stored value, return Initialize node
+        // or memory edge from Allocate node.
+        if (init != NULL)
+          return init;
+        else
+          return alloc->in(TypeFunc::Memory); // It will produce zero value (see callers).
+      }
+      // Otherwise skip it (the call updated 'mem' value).
     } else if (mem->Opcode() == Op_SCMemProj) {
       assert(mem->in(0)->is_LoadStore(), "sanity");
       const TypePtr* atype = mem->in(0)->in(MemNode::Address)->bottom_type()->is_ptr();
@@ -823,6 +838,18 @@ void PhaseMacroExpand::process_users_of_allocation(AllocateNode *alloc) {
           Node *n = use->last_out(k);
           uint oc2 = use->outcnt();
           if (n->is_Store()) {
+#ifdef ASSERT
+            // Verify that there is no dependent MemBarVolatile nodes,
+            // they should be removed during IGVN, see MemBarNode::Ideal().
+            for (DUIterator_Fast pmax, p = n->fast_outs(pmax);
+                                       p < pmax; p++) {
+              Node* mb = n->fast_out(p);
+              assert(mb->is_Initialize() || !mb->is_MemBar() ||
+                     mb->req() <= MemBarNode::Precedent ||
+                     mb->in(MemBarNode::Precedent) != n,
+                     "MemBarVolatile should be eliminated for non-escaping object");
+            }
+#endif
             _igvn.replace_node(n, n->in(MemNode::Memory));
           } else {
             eliminate_card_mark(n);
@@ -912,15 +939,29 @@ bool PhaseMacroExpand::eliminate_allocate_node(AllocateNode *alloc) {
     return false;
   }
 
+  CompileLog* log = C->log();
+  if (log != NULL) {
+    Node* klass = alloc->in(AllocateNode::KlassNode);
+    const TypeKlassPtr* tklass = _igvn.type(klass)->is_klassptr();
+    log->head("eliminate_allocation type='%d'",
+              log->identify(tklass->klass()));
+    JVMState* p = alloc->jvms();
+    while (p != NULL) {
+      log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method()));
+      p = p->caller();
+    }
+    log->tail("eliminate_allocation");
+  }
+
   process_users_of_allocation(alloc);
 
 #ifndef PRODUCT
-if (PrintEliminateAllocations) {
-  if (alloc->is_AllocateArray())
-    tty->print_cr("++++ Eliminated: %d AllocateArray", alloc->_idx);
-  else
-    tty->print_cr("++++ Eliminated: %d Allocate", alloc->_idx);
-}
+  if (PrintEliminateAllocations) {
+    if (alloc->is_AllocateArray())
+      tty->print_cr("++++ Eliminated: %d AllocateArray", alloc->_idx);
+    else
+      tty->print_cr("++++ Eliminated: %d Allocate", alloc->_idx);
+  }
 #endif
 
   return true;
@@ -1639,6 +1680,18 @@ bool PhaseMacroExpand::eliminate_locking_node(AbstractLockNode *alock) {
       } // if (!oldbox->is_eliminated())
   } // if (alock->is_Lock() && !lock->is_coarsened())
 
+  CompileLog* log = C->log();
+  if (log != NULL) {
+    log->head("eliminate_lock lock='%d'",
+              alock->is_Lock());
+    JVMState* p = alock->jvms();
+    while (p != NULL) {
+      log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method()));
+      p = p->caller();
+    }
+    log->tail("eliminate_lock");
+  }
+
   #ifndef PRODUCT
   if (PrintEliminateLocks) {
     if (alock->is_Lock()) {
diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp
index 02c15af72..4698add45 100644
--- a/src/share/vm/opto/memnode.cpp
+++ b/src/share/vm/opto/memnode.cpp
@@ -123,6 +123,13 @@ Node *MemNode::optimize_simple_memory_chain(Node *mchain, const TypePtr *t_adr,
       } else {
         assert(false, "unexpected projection");
       }
+    } else if (result->is_ClearArray()) {
+      if (!ClearArrayNode::step_through(&result, instance_id, phase)) {
+        // Can not bypass initialization of the instance
+        // we are looking for.
+        break;
+      }
+      // Otherwise skip it (the call updated 'result' value).
     } else if (result->is_MergeMem()) {
       result = step_through_mergemem(phase, result->as_MergeMem(), t_adr, NULL, tty);
     }
@@ -537,6 +544,15 @@ Node* MemNode::find_previous_store(PhaseTransform* phase) {
       } else if (mem->is_Proj() && mem->in(0)->is_MemBar()) {
         mem = mem->in(0)->in(TypeFunc::Memory);
         continue;           // (a) advance through independent MemBar memory
+      } else if (mem->is_ClearArray()) {
+        if (ClearArrayNode::step_through(&mem, (uint)addr_t->instance_id(), phase)) {
+          // (the call updated 'mem' value)
+          continue;         // (a) advance through independent allocation memory
+        } else {
+          // Can not bypass initialization of the instance
+          // we are looking for.
+          return mem;
+        }
       } else if (mem->is_MergeMem()) {
         int alias_idx = phase->C->get_alias_index(adr_type());
         mem = mem->as_MergeMem()->memory_at(alias_idx);
@@ -1503,6 +1519,8 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const {
       }
     }
   } else if (tp->base() == Type::InstPtr) {
+    const TypeInstPtr* tinst = tp->is_instptr();
+    ciKlass* klass = tinst->klass();
     assert( off != Type::OffsetBot ||
             // arrays can be cast to Objects
             tp->is_oopptr()->klass()->is_java_lang_Object() ||
@@ -1510,6 +1528,25 @@ const Type *LoadNode::Value( PhaseTransform *phase ) const {
             phase->C->has_unsafe_access(),
             "Field accesses must be precise" );
     // For oop loads, we expect the _type to be precise
+    if (OptimizeStringConcat && klass == phase->C->env()->String_klass() &&
+        adr->is_AddP() && off != Type::OffsetBot) {
+      // For constant Strings treat the fields as compile time constants.
+      Node* base = adr->in(AddPNode::Base);
+      if (base->Opcode() == Op_ConP) {
+        const TypeOopPtr* t = phase->type(base)->isa_oopptr();
+        ciObject* string = t->const_oop();
+        ciConstant constant = string->as_instance()->field_value_by_offset(off);
+        if (constant.basic_type() == T_INT) {
+          return TypeInt::make(constant.as_int());
+        } else if (constant.basic_type() == T_ARRAY) {
+          if (adr->bottom_type()->is_ptr_to_narrowoop()) {
+            return TypeNarrowOop::make_from_constant(constant.as_object());
+          } else {
+            return TypeOopPtr::make_from_constant(constant.as_object());
+          }
+        }
+      }
+    }
   } else if (tp->base() == Type::KlassPtr) {
     assert( off != Type::OffsetBot ||
             // arrays can be cast to Objects
@@ -2433,6 +2470,31 @@ Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
   return mem;
 }
 
+//----------------------------step_through----------------------------------
+// Return allocation input memory edge if it is different instance
+// or itself if it is the one we are looking for.
+bool ClearArrayNode::step_through(Node** np, uint instance_id, PhaseTransform* phase) {
+  Node* n = *np;
+  assert(n->is_ClearArray(), "sanity");
+  intptr_t offset;
+  AllocateNode* alloc = AllocateNode::Ideal_allocation(n->in(3), phase, offset);
+  // This method is called only before Allocate nodes are expanded during
+  // macro nodes expansion. Before that ClearArray nodes are only generated
+  // in LibraryCallKit::generate_arraycopy() which follows allocations.
+  assert(alloc != NULL, "should have allocation");
+  if (alloc->_idx == instance_id) {
+    // Can not bypass initialization of the instance we are looking for.
+    return false;
+  }
+  // Otherwise skip it.
+  InitializeNode* init = alloc->initialization();
+  if (init != NULL)
+    *np = init->in(TypeFunc::Memory);
+  else
+    *np = alloc->in(TypeFunc::Memory);
+  return true;
+}
+
 //----------------------------clear_memory-------------------------------------
 // Generate code to initialize object storage to zero.
 Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
@@ -2606,7 +2668,30 @@ MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) {
 // Return a node which is more "ideal" than the current node.  Strip out
 // control copies
 Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  return remove_dead_region(phase, can_reshape) ? this : NULL;
+  if (remove_dead_region(phase, can_reshape)) return this;
+
+  // Eliminate volatile MemBars for scalar replaced objects.
+  if (can_reshape && req() == (Precedent+1) &&
+      (Opcode() == Op_MemBarAcquire || Opcode() == Op_MemBarVolatile)) {
+    // Volatile field loads and stores.
+    Node* my_mem = in(MemBarNode::Precedent);
+    if (my_mem != NULL && my_mem->is_Mem()) {
+      const TypeOopPtr* t_oop = my_mem->in(MemNode::Address)->bottom_type()->isa_oopptr();
+      // Check for scalar replaced object reference.
+      if( t_oop != NULL && t_oop->is_known_instance_field() &&
+          t_oop->offset() != Type::OffsetBot &&
+          t_oop->offset() != Type::OffsetTop) {
+        // Replace MemBar projections by its inputs.
+        PhaseIterGVN* igvn = phase->is_IterGVN();
+        igvn->replace_node(proj_out(TypeFunc::Memory), in(TypeFunc::Memory));
+        igvn->replace_node(proj_out(TypeFunc::Control), in(TypeFunc::Control));
+        // Must return either the original node (now dead) or a new node
+        // (Do not return a top here, since that would break the uniqueness of top.)
+        return new (phase->C, 1) ConINode(TypeInt::ZERO);
+      }
+    }
+  }
+  return NULL;
 }
 
 //------------------------------Value------------------------------------------
diff --git a/src/share/vm/opto/memnode.hpp b/src/share/vm/opto/memnode.hpp
index a71df7d40..cd0e60d97 100644
--- a/src/share/vm/opto/memnode.hpp
+++ b/src/share/vm/opto/memnode.hpp
@@ -717,7 +717,10 @@ public:
 //------------------------------ClearArray-------------------------------------
 class ClearArrayNode: public Node {
 public:
-  ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base ) : Node(ctrl,arymem,word_cnt,base) {}
+  ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base )
+    : Node(ctrl,arymem,word_cnt,base) {
+    init_class_id(Class_ClearArray);
+  }
   virtual int         Opcode() const;
   virtual const Type *bottom_type() const { return Type::MEMORY; }
   // ClearArray modifies array elements, and so affects only the
@@ -743,6 +746,9 @@ public:
                             Node* start_offset,
                             Node* end_offset,
                             PhaseGVN* phase);
+  // Return allocation input memory edge if it is different instance
+  // or itself if it is the one we are looking for.
+  static bool step_through(Node** np, uint instance_id, PhaseTransform* phase);
 };
 
 //------------------------------StrComp-------------------------------------
diff --git a/src/share/vm/opto/node.hpp b/src/share/vm/opto/node.hpp
index bad160705..92da96b40 100644
--- a/src/share/vm/opto/node.hpp
+++ b/src/share/vm/opto/node.hpp
@@ -47,6 +47,7 @@ class CallStaticJavaNode;
 class CatchNode;
 class CatchProjNode;
 class CheckCastPPNode;
+class ClearArrayNode;
 class CmpNode;
 class CodeBuffer;
 class ConstraintCastNode;
@@ -599,8 +600,9 @@ public:
     DEFINE_CLASS_ID(BoxLock,  Node, 10)
     DEFINE_CLASS_ID(Add,      Node, 11)
     DEFINE_CLASS_ID(Mul,      Node, 12)
+    DEFINE_CLASS_ID(ClearArray, Node, 13)
 
-    _max_classes  = ClassMask_Mul
+    _max_classes  = ClassMask_ClearArray
   };
   #undef DEFINE_CLASS_ID
 
@@ -661,18 +663,25 @@ public:
     return (_flags & Flag_is_Call) != 0;
   }
 
+  CallNode* isa_Call() const {
+    return is_Call() ? as_Call() : NULL;
+  }
+
   CallNode *as_Call() const { // Only for CallNode (not for MachCallNode)
     assert((_class_id & ClassMask_Call) == Class_Call, "invalid node class");
     return (CallNode*)this;
   }
 
-  #define DEFINE_CLASS_QUERY(type) \
-  bool is_##type() const { \
+  #define DEFINE_CLASS_QUERY(type)                           \
+  bool is_##type() const {                                   \
     return ((_class_id & ClassMask_##type) == Class_##type); \
-  } \
-  type##Node *as_##type() const { \
-    assert(is_##type(), "invalid node class"); \
-    return (type##Node*)this; \
+  }                                                          \
+  type##Node *as_##type() const {                            \
+    assert(is_##type(), "invalid node class");               \
+    return (type##Node*)this;                                \
+  }                                                          \
+  type##Node* isa_##type() const {                           \
+    return (is_##type()) ? as_##type() : NULL;               \
   }
 
   DEFINE_CLASS_QUERY(AbstractLock)
@@ -691,6 +700,7 @@ public:
   DEFINE_CLASS_QUERY(CatchProj)
   DEFINE_CLASS_QUERY(CheckCastPP)
   DEFINE_CLASS_QUERY(ConstraintCast)
+  DEFINE_CLASS_QUERY(ClearArray)
   DEFINE_CLASS_QUERY(CMove)
   DEFINE_CLASS_QUERY(Cmp)
   DEFINE_CLASS_QUERY(CountedLoop)
@@ -1249,6 +1259,24 @@ Node* Node::last_out(DUIterator_Last& i) const {
 #undef I_VDUI_ONLY
 #undef VDUI_ONLY
 
+// An Iterator that truly follows the iterator pattern.  Doesn't
+// support deletion but could be made to.
+//
+//   for (SimpleDUIterator i(n); i.has_next(); i.next()) {
+//     Node* m = i.get();
+//
+class SimpleDUIterator : public StackObj {
+ private:
+  Node* node;
+  DUIterator_Fast i;
+  DUIterator_Fast imax;
+ public:
+  SimpleDUIterator(Node* n): node(n), i(n->fast_outs(imax)) {}
+  bool has_next() { return i < imax; }
+  void next() { i++; }
+  Node* get() { return node->fast_out(i); }
+};
+
 
 //-----------------------------------------------------------------------------
 // Map dense integer indices to Nodes.  Uses classic doubling-array trick.
@@ -1290,6 +1318,12 @@ class Node_List : public Node_Array {
 public:
   Node_List() : Node_Array(Thread::current()->resource_area()), _cnt(0) {}
   Node_List(Arena *a) : Node_Array(a), _cnt(0) {}
+  bool contains(Node* n) {
+    for (uint e = 0; e < size(); e++) {
+      if (at(e) == n) return true;
+    }
+    return false;
+  }
   void insert( uint i, Node *n ) { Node_Array::insert(i,n); _cnt++; }
   void remove( uint i ) { Node_Array::remove(i); _cnt--; }
   void push( Node *b ) { map(_cnt++,b); }
diff --git a/src/share/vm/opto/parse3.cpp b/src/share/vm/opto/parse3.cpp
index 7125cb5d6..83a3927a5 100644
--- a/src/share/vm/opto/parse3.cpp
+++ b/src/share/vm/opto/parse3.cpp
@@ -240,19 +240,19 @@ void Parse::do_put_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool
     // membar is dependent on the store, keeping any other membars generated
     // below from floating up past the store.
     int adr_idx = C->get_alias_index(adr_type);
-    insert_mem_bar_volatile(Op_MemBarVolatile, adr_idx);
+    insert_mem_bar_volatile(Op_MemBarVolatile, adr_idx, store);
 
     // Now place a membar for AliasIdxBot for the unknown yet-to-be-parsed
     // volatile alias indices. Skip this if the membar is redundant.
     if (adr_idx != Compile::AliasIdxBot) {
-      insert_mem_bar_volatile(Op_MemBarVolatile, Compile::AliasIdxBot);
+      insert_mem_bar_volatile(Op_MemBarVolatile, Compile::AliasIdxBot, store);
     }
 
     // Finally, place alias-index-specific membars for each volatile index
     // that isn't the adr_idx membar. Typically there's only 1 or 2.
     for( int i = Compile::AliasIdxRaw; i < C->num_alias_types(); i++ ) {
       if (i != adr_idx && C->alias_type(i)->is_volatile()) {
-        insert_mem_bar_volatile(Op_MemBarVolatile, i);
+        insert_mem_bar_volatile(Op_MemBarVolatile, i, store);
       }
     }
   }
diff --git a/src/share/vm/opto/parseHelper.cpp b/src/share/vm/opto/parseHelper.cpp
index 6b3f432ea..ab7883fd8 100644
--- a/src/share/vm/opto/parseHelper.cpp
+++ b/src/share/vm/opto/parseHelper.cpp
@@ -221,6 +221,14 @@ void Parse::do_new() {
 
   // Push resultant oop onto stack
   push(obj);
+
+  // Keep track of whether opportunities exist for StringBuilder
+  // optimizations.
+  if (OptimizeStringConcat &&
+      (klass == C->env()->StringBuilder_klass() ||
+       klass == C->env()->StringBuffer_klass())) {
+    C->set_has_stringbuilder(true);
+  }
 }
 
 #ifndef PRODUCT
diff --git a/src/share/vm/opto/phase.hpp b/src/share/vm/opto/phase.hpp
index d0d54e1d9..9df5500fd 100644
--- a/src/share/vm/opto/phase.hpp
+++ b/src/share/vm/opto/phase.hpp
@@ -44,6 +44,7 @@ public:
     BlockLayout,                // Linear ordering of blocks
     Register_Allocation,        // Register allocation, duh
     LIVE,                       // Dragon-book LIVE range problem
+    StringOpts,                 // StringBuilder related optimizations
     Interference_Graph,         // Building the IFG
     Coalesce,                   // Coalescing copies
     Ideal_Loop,                 // Find idealized trip-counted loops
diff --git a/src/share/vm/opto/phaseX.hpp b/src/share/vm/opto/phaseX.hpp
index b9391ba1d..33ff56f0e 100644
--- a/src/share/vm/opto/phaseX.hpp
+++ b/src/share/vm/opto/phaseX.hpp
@@ -345,7 +345,11 @@ public:
   Node  *hash_find(const Node *n) { return _table.hash_find(n); }
 
   // Used after parsing to eliminate values that are no longer in program
-  void   remove_useless_nodes(VectorSet &useful) { _table.remove_useless_nodes(useful); }
+  void   remove_useless_nodes(VectorSet &useful) {
+    _table.remove_useless_nodes(useful);
+    // this may invalidate cached cons so reset the cache
+    init_con_caches();
+  }
 
   virtual ConNode* uncached_makecon(const Type* t);  // override from PhaseTransform
 
diff --git a/src/share/vm/opto/stringopts.cpp b/src/share/vm/opto/stringopts.cpp
new file mode 100644
index 000000000..192d31f82
--- /dev/null
+++ b/src/share/vm/opto/stringopts.cpp
@@ -0,0 +1,1395 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_stringopts.cpp.incl"
+
+#define __ kit.
+
+class StringConcat : public ResourceObj {
+ private:
+  PhaseStringOpts*    _stringopts;
+  Node*               _string_alloc;
+  AllocateNode*       _begin;          // The allocation the begins the pattern
+  CallStaticJavaNode* _end;            // The final call of the pattern.  Will either be
+                                       // SB.toString or or String.<init>(SB.toString)
+  bool                _multiple;       // indicates this is a fusion of two or more
+                                       // separate StringBuilders
+
+  Node*               _arguments;      // The list of arguments to be concatenated
+  GrowableArray<int>  _mode;           // into a String along with a mode flag
+                                       // indicating how to treat the value.
+
+  Node_List           _control;        // List of control nodes that will be deleted
+  Node_List           _uncommon_traps; // Uncommon traps that needs to be rewritten
+                                       // to restart at the initial JVMState.
+ public:
+  // Mode for converting arguments to Strings
+  enum {
+    StringMode,
+    IntMode,
+    CharMode
+  };
+
+  StringConcat(PhaseStringOpts* stringopts, CallStaticJavaNode* end):
+    _end(end),
+    _begin(NULL),
+    _multiple(false),
+    _string_alloc(NULL),
+    _stringopts(stringopts) {
+    _arguments = new (_stringopts->C, 1) Node(1);
+    _arguments->del_req(0);
+  }
+
+  bool validate_control_flow();
+
+  void merge_add() {
+#if 0
+    // XXX This is place holder code for reusing an existing String
+    // allocation but the logic for checking the state safety is
+    // probably inadequate at the moment.
+    CallProjections endprojs;
+    sc->end()->extract_projections(&endprojs, false);
+    if (endprojs.resproj != NULL) {
+      for (SimpleDUIterator i(endprojs.resproj); i.has_next(); i.next()) {
+        CallStaticJavaNode *use = i.get()->isa_CallStaticJava();
+        if (use != NULL && use->method() != NULL &&
+            use->method()->holder() == C->env()->String_klass() &&
+            use->method()->name() == ciSymbol::object_initializer_name() &&
+            use->in(TypeFunc::Parms + 1) == endprojs.resproj) {
+          // Found useless new String(sb.toString()) so reuse the newly allocated String
+          // when creating the result instead of allocating a new one.
+          sc->set_string_alloc(use->in(TypeFunc::Parms));
+          sc->set_end(use);
+        }
+      }
+    }
+#endif
+  }
+
+  StringConcat* merge(StringConcat* other, Node* arg);
+
+  void set_allocation(AllocateNode* alloc) {
+    _begin = alloc;
+  }
+
+  void append(Node* value, int mode) {
+    _arguments->add_req(value);
+    _mode.append(mode);
+  }
+  void push(Node* value, int mode) {
+    _arguments->ins_req(0, value);
+    _mode.insert_before(0, mode);
+  }
+  void push_string(Node* value) {
+    push(value, StringMode);
+  }
+  void push_int(Node* value) {
+    push(value, IntMode);
+  }
+  void push_char(Node* value) {
+    push(value, CharMode);
+  }
+
+  Node* argument(int i) {
+    return _arguments->in(i);
+  }
+  void set_argument(int i, Node* value) {
+    _arguments->set_req(i, value);
+  }
+  int num_arguments() {
+    return _mode.length();
+  }
+  int mode(int i) {
+    return _mode.at(i);
+  }
+  void add_control(Node* ctrl) {
+    assert(!_control.contains(ctrl), "only push once");
+    _control.push(ctrl);
+  }
+  CallStaticJavaNode* end() { return _end; }
+  AllocateNode* begin() { return _begin; }
+  Node* string_alloc() { return _string_alloc; }
+
+  void eliminate_unneeded_control();
+  void eliminate_initialize(InitializeNode* init);
+  void eliminate_call(CallNode* call);
+
+  void maybe_log_transform() {
+    CompileLog* log = _stringopts->C->log();
+    if (log != NULL) {
+      log->head("replace_string_concat arguments='%d' string_alloc='%d' multiple='%d'",
+                num_arguments(),
+                _string_alloc != NULL,
+                _multiple);
+      JVMState* p = _begin->jvms();
+      while (p != NULL) {
+        log->elem("jvms bci='%d' method='%d'", p->bci(), log->identify(p->method()));
+        p = p->caller();
+      }
+      log->tail("replace_string_concat");
+    }
+  }
+
+  void convert_uncommon_traps(GraphKit& kit, const JVMState* jvms) {
+    for (uint u = 0; u < _uncommon_traps.size(); u++) {
+      Node* uct = _uncommon_traps.at(u);
+
+      // Build a new call using the jvms state of the allocate
+      address call_addr = SharedRuntime::uncommon_trap_blob()->instructions_begin();
+      const TypeFunc* call_type = OptoRuntime::uncommon_trap_Type();
+      int size = call_type->domain()->cnt();
+      const TypePtr* no_memory_effects = NULL;
+      Compile* C = _stringopts->C;
+      CallStaticJavaNode* call = new (C, size) CallStaticJavaNode(call_type, call_addr, "uncommon_trap",
+                                                                  jvms->bci(), no_memory_effects);
+      for (int e = 0; e < TypeFunc::Parms; e++) {
+        call->init_req(e, uct->in(e));
+      }
+      // Set the trap request to record intrinsic failure if this trap
+      // is taken too many times.  Ideally we would handle then traps by
+      // doing the original bookkeeping in the MDO so that if it caused
+      // the code to be thrown out we could still recompile and use the
+      // optimization.  Failing the uncommon traps doesn't really mean
+      // that the optimization is a bad idea but there's no other way to
+      // do the MDO updates currently.
+      int trap_request = Deoptimization::make_trap_request(Deoptimization::Reason_intrinsic,
+                                                           Deoptimization::Action_make_not_entrant);
+      call->init_req(TypeFunc::Parms, __ intcon(trap_request));
+      kit.add_safepoint_edges(call);
+
+      _stringopts->gvn()->transform(call);
+      C->gvn_replace_by(uct, call);
+      uct->disconnect_inputs(NULL);
+    }
+  }
+
+  void cleanup() {
+    // disconnect the hook node
+    _arguments->disconnect_inputs(NULL);
+  }
+};
+
+
+void StringConcat::eliminate_unneeded_control() {
+  eliminate_initialize(begin()->initialization());
+  for (uint i = 0; i < _control.size(); i++) {
+    Node* n = _control.at(i);
+    if (n->is_Call()) {
+      if (n != _end) {
+        eliminate_call(n->as_Call());
+      }
+    } else if (n->is_IfTrue()) {
+      Compile* C = _stringopts->C;
+      C->gvn_replace_by(n, n->in(0)->in(0));
+      C->gvn_replace_by(n->in(0), C->top());
+    }
+  }
+}
+
+
+StringConcat* StringConcat::merge(StringConcat* other, Node* arg) {
+  StringConcat* result = new StringConcat(_stringopts, _end);
+  for (uint x = 0; x < _control.size(); x++) {
+    Node* n = _control.at(x);
+    if (n->is_Call()) {
+      result->_control.push(n);
+    }
+  }
+  for (uint x = 0; x < other->_control.size(); x++) {
+    Node* n = other->_control.at(x);
+    if (n->is_Call()) {
+      result->_control.push(n);
+    }
+  }
+  assert(result->_control.contains(other->_end), "what?");
+  assert(result->_control.contains(_begin), "what?");
+  for (int x = 0; x < num_arguments(); x++) {
+    if (argument(x) == arg) {
+      // replace the toString result with the all the arguments that
+      // made up the other StringConcat
+      for (int y = 0; y < other->num_arguments(); y++) {
+        result->append(other->argument(y), other->mode(y));
+      }
+    } else {
+      result->append(argument(x), mode(x));
+    }
+  }
+  result->set_allocation(other->_begin);
+  result->_multiple = true;
+  return result;
+}
+
+
+void StringConcat::eliminate_call(CallNode* call) {
+  Compile* C = _stringopts->C;
+  CallProjections projs;
+  call->extract_projections(&projs, false);
+  if (projs.fallthrough_catchproj != NULL) {
+    C->gvn_replace_by(projs.fallthrough_catchproj, call->in(TypeFunc::Control));
+  }
+  if (projs.fallthrough_memproj != NULL) {
+    C->gvn_replace_by(projs.fallthrough_memproj, call->in(TypeFunc::Memory));
+  }
+  if (projs.catchall_memproj != NULL) {
+    C->gvn_replace_by(projs.catchall_memproj, C->top());
+  }
+  if (projs.fallthrough_ioproj != NULL) {
+    C->gvn_replace_by(projs.fallthrough_ioproj, call->in(TypeFunc::I_O));
+  }
+  if (projs.catchall_ioproj != NULL) {
+    C->gvn_replace_by(projs.catchall_ioproj, C->top());
+  }
+  if (projs.catchall_catchproj != NULL) {
+    // EA can't cope with the partially collapsed graph this
+    // creates so put it on the worklist to be collapsed later.
+    for (SimpleDUIterator i(projs.catchall_catchproj); i.has_next(); i.next()) {
+      Node *use = i.get();
+      int opc = use->Opcode();
+      if (opc == Op_CreateEx || opc == Op_Region) {
+        _stringopts->record_dead_node(use);
+      }
+    }
+    C->gvn_replace_by(projs.catchall_catchproj, C->top());
+  }
+  if (projs.resproj != NULL) {
+    C->gvn_replace_by(projs.resproj, C->top());
+  }
+  C->gvn_replace_by(call, C->top());
+}
+
+void StringConcat::eliminate_initialize(InitializeNode* init) {
+  Compile* C = _stringopts->C;
+
+  // Eliminate Initialize node.
+  assert(init->outcnt() <= 2, "only a control and memory projection expected");
+  assert(init->req() <= InitializeNode::RawStores, "no pending inits");
+  Node *ctrl_proj = init->proj_out(TypeFunc::Control);
+  if (ctrl_proj != NULL) {
+    C->gvn_replace_by(ctrl_proj, init->in(TypeFunc::Control));
+  }
+  Node *mem_proj = init->proj_out(TypeFunc::Memory);
+  if (mem_proj != NULL) {
+    Node *mem = init->in(TypeFunc::Memory);
+    C->gvn_replace_by(mem_proj, mem);
+  }
+  C->gvn_replace_by(init, C->top());
+  init->disconnect_inputs(NULL);
+}
+
+Node_List PhaseStringOpts::collect_toString_calls() {
+  Node_List string_calls;
+  Node_List worklist;
+
+  _visited.Clear();
+
+  // Prime the worklist
+  for (uint i = 1; i < C->root()->len(); i++) {
+    Node* n = C->root()->in(i);
+    if (n != NULL && !_visited.test_set(n->_idx)) {
+      worklist.push(n);
+    }
+  }
+
+  while (worklist.size() > 0) {
+    Node* ctrl = worklist.pop();
+    if (ctrl->is_CallStaticJava()) {
+      CallStaticJavaNode* csj = ctrl->as_CallStaticJava();
+      ciMethod* m = csj->method();
+      if (m != NULL &&
+          (m->intrinsic_id() == vmIntrinsics::_StringBuffer_toString ||
+           m->intrinsic_id() == vmIntrinsics::_StringBuilder_toString)) {
+        string_calls.push(csj);
+      }
+    }
+    if (ctrl->in(0) != NULL && !_visited.test_set(ctrl->in(0)->_idx)) {
+      worklist.push(ctrl->in(0));
+    }
+    if (ctrl->is_Region()) {
+      for (uint i = 1; i < ctrl->len(); i++) {
+        if (ctrl->in(i) != NULL && !_visited.test_set(ctrl->in(i)->_idx)) {
+          worklist.push(ctrl->in(i));
+        }
+      }
+    }
+  }
+  return string_calls;
+}
+
+
+StringConcat* PhaseStringOpts::build_candidate(CallStaticJavaNode* call) {
+  ciMethod* m = call->method();
+  ciSymbol* string_sig;
+  ciSymbol* int_sig;
+  ciSymbol* char_sig;
+  if (m->holder() == C->env()->StringBuilder_klass()) {
+    string_sig = ciSymbol::String_StringBuilder_signature();
+    int_sig = ciSymbol::int_StringBuilder_signature();
+    char_sig = ciSymbol::char_StringBuilder_signature();
+  } else if (m->holder() == C->env()->StringBuffer_klass()) {
+    string_sig = ciSymbol::String_StringBuffer_signature();
+    int_sig = ciSymbol::int_StringBuffer_signature();
+    char_sig = ciSymbol::char_StringBuffer_signature();
+  } else {
+    return NULL;
+  }
+#ifndef PRODUCT
+  if (PrintOptimizeStringConcat) {
+    tty->print("considering toString call in ");
+    call->jvms()->dump_spec(tty); tty->cr();
+  }
+#endif
+
+  StringConcat* sc = new StringConcat(this, call);
+
+  AllocateNode* alloc = NULL;
+  InitializeNode* init = NULL;
+
+  // possible opportunity for StringBuilder fusion
+  CallStaticJavaNode* cnode = call;
+  while (cnode) {
+    Node* recv = cnode->in(TypeFunc::Parms)->uncast();
+    if (recv->is_Proj()) {
+      recv = recv->in(0);
+    }
+    cnode = recv->isa_CallStaticJava();
+    if (cnode == NULL) {
+      alloc = recv->isa_Allocate();
+      if (alloc == NULL) {
+        break;
+      }
+      // Find the constructor call
+      Node* result = alloc->result_cast();
+      if (result == NULL || !result->is_CheckCastPP()) {
+        // strange looking allocation
+#ifndef PRODUCT
+        if (PrintOptimizeStringConcat) {
+          tty->print("giving up because allocation looks strange ");
+          alloc->jvms()->dump_spec(tty); tty->cr();
+        }
+#endif
+        break;
+      }
+      Node* constructor = NULL;
+      for (SimpleDUIterator i(result); i.has_next(); i.next()) {
+        CallStaticJavaNode *use = i.get()->isa_CallStaticJava();
+        if (use != NULL && use->method() != NULL &&
+            use->method()->name() == ciSymbol::object_initializer_name() &&
+            use->method()->holder() == m->holder()) {
+          // Matched the constructor.
+          ciSymbol* sig = use->method()->signature()->as_symbol();
+          if (sig == ciSymbol::void_method_signature() ||
+              sig == ciSymbol::int_void_signature() ||
+              sig == ciSymbol::string_void_signature()) {
+            if (sig == ciSymbol::string_void_signature()) {
+              // StringBuilder(String) so pick this up as the first argument
+              assert(use->in(TypeFunc::Parms + 1) != NULL, "what?");
+              sc->push_string(use->in(TypeFunc::Parms + 1));
+            }
+            // The int variant takes an initial size for the backing
+            // array so just treat it like the void version.
+            constructor = use;
+          } else {
+#ifndef PRODUCT
+            if (PrintOptimizeStringConcat) {
+              tty->print("unexpected constructor signature: %s", sig->as_utf8());
+            }
+#endif
+          }
+          break;
+        }
+      }
+      if (constructor == NULL) {
+        // couldn't find constructor
+#ifndef PRODUCT
+        if (PrintOptimizeStringConcat) {
+          tty->print("giving up because couldn't find constructor ");
+          alloc->jvms()->dump_spec(tty);
+        }
+#endif
+        break;
+      }
+
+      // Walked all the way back and found the constructor call so see
+      // if this call converted into a direct string concatenation.
+      sc->add_control(call);
+      sc->add_control(constructor);
+      sc->add_control(alloc);
+      sc->set_allocation(alloc);
+      if (sc->validate_control_flow()) {
+        return sc;
+      } else {
+        return NULL;
+      }
+    } else if (cnode->method() == NULL) {
+      break;
+    } else if (cnode->method()->holder() == m->holder() &&
+               cnode->method()->name() == ciSymbol::append_name() &&
+               (cnode->method()->signature()->as_symbol() == string_sig ||
+                cnode->method()->signature()->as_symbol() == char_sig ||
+                cnode->method()->signature()->as_symbol() == int_sig)) {
+      sc->add_control(cnode);
+      Node* arg = cnode->in(TypeFunc::Parms + 1);
+      if (cnode->method()->signature()->as_symbol() == int_sig) {
+        sc->push_int(arg);
+      } else if (cnode->method()->signature()->as_symbol() == char_sig) {
+        sc->push_char(arg);
+      } else {
+        if (arg->is_Proj() && arg->in(0)->is_CallStaticJava()) {
+          CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava();
+          if (csj->method() != NULL &&
+              csj->method()->holder() == C->env()->Integer_klass() &&
+              csj->method()->name() == ciSymbol::toString_name()) {
+            sc->add_control(csj);
+            sc->push_int(csj->in(TypeFunc::Parms));
+            continue;
+          }
+        }
+        sc->push_string(arg);
+      }
+      continue;
+    } else {
+      // some unhandled signature
+#ifndef PRODUCT
+      if (PrintOptimizeStringConcat) {
+        tty->print("giving up because encountered unexpected signature ");
+        cnode->tf()->dump(); tty->cr();
+        cnode->in(TypeFunc::Parms + 1)->dump();
+      }
+#endif
+      break;
+    }
+  }
+  return NULL;
+}
+
+
+PhaseStringOpts::PhaseStringOpts(PhaseGVN* gvn, Unique_Node_List*):
+  Phase(StringOpts),
+  _gvn(gvn),
+  _visited(Thread::current()->resource_area()) {
+
+  assert(OptimizeStringConcat, "shouldn't be here");
+
+  size_table_field = C->env()->Integer_klass()->get_field_by_name(ciSymbol::make("sizeTable"),
+                                                                  ciSymbol::make("[I"), true);
+  if (size_table_field == NULL) {
+    // Something wrong so give up.
+    assert(false, "why can't we find Integer.sizeTable?");
+    return;
+  }
+
+  // Collect the types needed to talk about the various slices of memory
+  const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(),
+                                                     false, NULL, 0);
+
+  const TypePtr* value_field_type = string_type->add_offset(java_lang_String::value_offset_in_bytes());
+  const TypePtr* offset_field_type = string_type->add_offset(java_lang_String::offset_offset_in_bytes());
+  const TypePtr* count_field_type = string_type->add_offset(java_lang_String::count_offset_in_bytes());
+
+  value_field_idx = C->get_alias_index(value_field_type);
+  count_field_idx = C->get_alias_index(count_field_type);
+  offset_field_idx = C->get_alias_index(offset_field_type);
+  char_adr_idx = C->get_alias_index(TypeAryPtr::CHARS);
+
+  // For each locally allocated StringBuffer see if the usages can be
+  // collapsed into a single String construction.
+
+  // Run through the list of allocation looking for SB.toString to see
+  // if it's possible to fuse the usage of the SB into a single String
+  // construction.
+  GrowableArray<StringConcat*> concats;
+  Node_List toStrings = collect_toString_calls();
+  while (toStrings.size() > 0) {
+    StringConcat* sc = build_candidate(toStrings.pop()->as_CallStaticJava());
+    if (sc != NULL) {
+      concats.push(sc);
+    }
+  }
+
+  // try to coalesce separate concats
+ restart:
+  for (int c = 0; c < concats.length(); c++) {
+    StringConcat* sc = concats.at(c);
+    for (int i = 0; i < sc->num_arguments(); i++) {
+      Node* arg = sc->argument(i);
+      if (arg->is_Proj() && arg->in(0)->is_CallStaticJava()) {
+        CallStaticJavaNode* csj = arg->in(0)->as_CallStaticJava();
+        if (csj->method() != NULL &&
+            (csj->method()->holder() == C->env()->StringBuffer_klass() ||
+             csj->method()->holder() == C->env()->StringBuilder_klass()) &&
+            csj->method()->name() == ciSymbol::toString_name()) {
+          for (int o = 0; o < concats.length(); o++) {
+            if (c == o) continue;
+            StringConcat* other = concats.at(o);
+            if (other->end() == csj) {
+#ifndef PRODUCT
+              if (PrintOptimizeStringConcat) {
+                tty->print_cr("considering stacked concats");
+              }
+#endif
+
+              StringConcat* merged = sc->merge(other, arg);
+              if (merged->validate_control_flow()) {
+#ifndef PRODUCT
+                if (PrintOptimizeStringConcat) {
+                  tty->print_cr("stacking would succeed");
+                }
+#endif
+                if (c < o) {
+                  concats.remove_at(o);
+                  concats.at_put(c, merged);
+                } else {
+                  concats.remove_at(c);
+                  concats.at_put(o, merged);
+                }
+                goto restart;
+              } else {
+#ifndef PRODUCT
+                if (PrintOptimizeStringConcat) {
+                  tty->print_cr("stacking would fail");
+                }
+#endif
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+
+  for (int c = 0; c < concats.length(); c++) {
+    StringConcat* sc = concats.at(c);
+    replace_string_concat(sc);
+  }
+
+  remove_dead_nodes();
+}
+
+void PhaseStringOpts::record_dead_node(Node* dead) {
+  dead_worklist.push(dead);
+}
+
+void PhaseStringOpts::remove_dead_nodes() {
+  // Delete any dead nodes to make things clean enough that escape
+  // analysis doesn't get unhappy.
+  while (dead_worklist.size() > 0) {
+    Node* use = dead_worklist.pop();
+    int opc = use->Opcode();
+    switch (opc) {
+      case Op_Region: {
+        uint i = 1;
+        for (i = 1; i < use->req(); i++) {
+          if (use->in(i) != C->top()) {
+            break;
+          }
+        }
+        if (i >= use->req()) {
+          for (SimpleDUIterator i(use); i.has_next(); i.next()) {
+            Node* m = i.get();
+            if (m->is_Phi()) {
+              dead_worklist.push(m);
+            }
+          }
+          C->gvn_replace_by(use, C->top());
+        }
+        break;
+      }
+      case Op_AddP:
+      case Op_CreateEx: {
+        // Recurisvely clean up references to CreateEx so EA doesn't
+        // get unhappy about the partially collapsed graph.
+        for (SimpleDUIterator i(use); i.has_next(); i.next()) {
+          Node* m = i.get();
+          if (m->is_AddP()) {
+            dead_worklist.push(m);
+          }
+        }
+        C->gvn_replace_by(use, C->top());
+        break;
+      }
+      case Op_Phi:
+        if (use->in(0) == C->top()) {
+          C->gvn_replace_by(use, C->top());
+        }
+        break;
+    }
+  }
+}
+
+
+bool StringConcat::validate_control_flow() {
+  // We found all the calls and arguments now lets see if it's
+  // safe to transform the graph as we would expect.
+
+  // Check to see if this resulted in too many uncommon traps previously
+  if (Compile::current()->too_many_traps(_begin->jvms()->method(), _begin->jvms()->bci(),
+                        Deoptimization::Reason_intrinsic)) {
+    return false;
+  }
+
+  // Walk backwards over the control flow from toString to the
+  // allocation and make sure all the control flow is ok.  This
+  // means it's either going to be eliminated once the calls are
+  // removed or it can safely be transformed into an uncommon
+  // trap.
+
+  int null_check_count = 0;
+  Unique_Node_List ctrl_path;
+
+  assert(_control.contains(_begin), "missing");
+  assert(_control.contains(_end), "missing");
+
+  // Collect the nodes that we know about and will eliminate into ctrl_path
+  for (uint i = 0; i < _control.size(); i++) {
+    // Push the call and it's control projection
+    Node* n = _control.at(i);
+    if (n->is_Allocate()) {
+      AllocateNode* an = n->as_Allocate();
+      InitializeNode* init = an->initialization();
+      ctrl_path.push(init);
+      ctrl_path.push(init->as_Multi()->proj_out(0));
+    }
+    if (n->is_Call()) {
+      CallNode* cn = n->as_Call();
+      ctrl_path.push(cn);
+      ctrl_path.push(cn->proj_out(0));
+      ctrl_path.push(cn->proj_out(0)->unique_out());
+      ctrl_path.push(cn->proj_out(0)->unique_out()->as_Catch()->proj_out(0));
+    } else {
+      ShouldNotReachHere();
+    }
+  }
+
+  // Skip backwards through the control checking for unexpected contro flow
+  Node* ptr = _end;
+  bool fail = false;
+  while (ptr != _begin) {
+    if (ptr->is_Call() && ctrl_path.member(ptr)) {
+      ptr = ptr->in(0);
+    } else if (ptr->is_CatchProj() && ctrl_path.member(ptr)) {
+      ptr = ptr->in(0)->in(0)->in(0);
+      assert(ctrl_path.member(ptr), "should be a known piece of control");
+    } else if (ptr->is_IfTrue()) {
+      IfNode* iff = ptr->in(0)->as_If();
+      BoolNode* b = iff->in(1)->isa_Bool();
+      Node* cmp = b->in(1);
+      Node* v1 = cmp->in(1);
+      Node* v2 = cmp->in(2);
+      Node* otherproj = iff->proj_out(1 - ptr->as_Proj()->_con);
+
+      // Null check of the return of append which can simply be eliminated
+      if (b->_test._test == BoolTest::ne &&
+          v2->bottom_type() == TypePtr::NULL_PTR &&
+          v1->is_Proj() && ctrl_path.member(v1->in(0))) {
+        // NULL check of the return value of the append
+        null_check_count++;
+        if (otherproj->outcnt() == 1) {
+          CallStaticJavaNode* call = otherproj->unique_out()->isa_CallStaticJava();
+          if (call != NULL && call->_name != NULL && strcmp(call->_name, "uncommon_trap") == 0) {
+            ctrl_path.push(call);
+          }
+        }
+        _control.push(ptr);
+        ptr = ptr->in(0)->in(0);
+        continue;
+      }
+
+      // A test which leads to an uncommon trap which should be safe.
+      // Later this trap will be converted into a trap that restarts
+      // at the beginning.
+      if (otherproj->outcnt() == 1) {
+        CallStaticJavaNode* call = otherproj->unique_out()->isa_CallStaticJava();
+        if (call != NULL && call->_name != NULL && strcmp(call->_name, "uncommon_trap") == 0) {
+          // control flow leads to uct so should be ok
+          _uncommon_traps.push(call);
+          ctrl_path.push(call);
+          ptr = ptr->in(0)->in(0);
+          continue;
+        }
+      }
+
+#ifndef PRODUCT
+      // Some unexpected control flow we don't know how to handle.
+      if (PrintOptimizeStringConcat) {
+        tty->print_cr("failing with unknown test");
+        b->dump();
+        cmp->dump();
+        v1->dump();
+        v2->dump();
+        tty->cr();
+      }
+#endif
+      break;
+    } else if (ptr->is_Proj() && ptr->in(0)->is_Initialize()) {
+      ptr = ptr->in(0)->in(0);
+    } else if (ptr->is_Region()) {
+      Node* copy = ptr->as_Region()->is_copy();
+      if (copy != NULL) {
+        ptr = copy;
+        continue;
+      }
+      if (ptr->req() == 3 &&
+          ptr->in(1) != NULL && ptr->in(1)->is_Proj() &&
+          ptr->in(2) != NULL && ptr->in(2)->is_Proj() &&
+          ptr->in(1)->in(0) == ptr->in(2)->in(0) &&
+          ptr->in(1)->in(0) != NULL && ptr->in(1)->in(0)->is_If()) {
+        // Simple diamond.
+        // XXX should check for possibly merging stores.  simple data merges are ok.
+        ptr = ptr->in(1)->in(0)->in(0);
+        continue;
+      }
+#ifndef PRODUCT
+      if (PrintOptimizeStringConcat) {
+        tty->print_cr("fusion would fail for region");
+        _begin->dump();
+        ptr->dump(2);
+      }
+#endif
+      fail = true;
+      break;
+    } else {
+      // other unknown control
+      if (!fail) {
+#ifndef PRODUCT
+        if (PrintOptimizeStringConcat) {
+          tty->print_cr("fusion would fail for");
+          _begin->dump();
+        }
+#endif
+        fail = true;
+      }
+#ifndef PRODUCT
+      if (PrintOptimizeStringConcat) {
+        ptr->dump();
+      }
+#endif
+      ptr = ptr->in(0);
+    }
+  }
+#ifndef PRODUCT
+  if (PrintOptimizeStringConcat && fail) {
+    tty->cr();
+  }
+#endif
+  if (fail) return !fail;
+
+  // Validate that all these results produced are contained within
+  // this cluster of objects.  First collect all the results produced
+  // by calls in the region.
+  _stringopts->_visited.Clear();
+  Node_List worklist;
+  Node* final_result = _end->proj_out(TypeFunc::Parms);
+  for (uint i = 0; i < _control.size(); i++) {
+    CallNode* cnode = _control.at(i)->isa_Call();
+    if (cnode != NULL) {
+      _stringopts->_visited.test_set(cnode->_idx);
+    }
+    Node* result = cnode != NULL ? cnode->proj_out(TypeFunc::Parms) : NULL;
+    if (result != NULL && result != final_result) {
+      worklist.push(result);
+    }
+  }
+
+  Node* last_result = NULL;
+  while (worklist.size() > 0) {
+    Node* result = worklist.pop();
+    if (_stringopts->_visited.test_set(result->_idx))
+      continue;
+    for (SimpleDUIterator i(result); i.has_next(); i.next()) {
+      Node *use = i.get();
+      if (ctrl_path.member(use)) {
+        // already checked this
+        continue;
+      }
+      int opc = use->Opcode();
+      if (opc == Op_CmpP || opc == Op_Node) {
+        ctrl_path.push(use);
+        continue;
+      }
+      if (opc == Op_CastPP || opc == Op_CheckCastPP) {
+        for (SimpleDUIterator j(use); j.has_next(); j.next()) {
+          worklist.push(j.get());
+        }
+        worklist.push(use->in(1));
+        ctrl_path.push(use);
+        continue;
+      }
+#ifndef PRODUCT
+      if (PrintOptimizeStringConcat) {
+        if (result != last_result) {
+          last_result = result;
+          tty->print_cr("extra uses for result:");
+          last_result->dump();
+        }
+        use->dump();
+      }
+#endif
+      fail = true;
+      break;
+    }
+  }
+
+#ifndef PRODUCT
+  if (PrintOptimizeStringConcat && !fail) {
+    ttyLocker ttyl;
+    tty->cr();
+    tty->print("fusion would succeed (%d %d) for ", null_check_count, _uncommon_traps.size());
+    _begin->jvms()->dump_spec(tty); tty->cr();
+    for (int i = 0; i < num_arguments(); i++) {
+      argument(i)->dump();
+    }
+    _control.dump();
+    tty->cr();
+  }
+#endif
+
+  return !fail;
+}
+
+Node* PhaseStringOpts::fetch_static_field(GraphKit& kit, ciField* field) {
+  const TypeKlassPtr* klass_type = TypeKlassPtr::make(field->holder());
+  Node* klass_node = __ makecon(klass_type);
+  BasicType bt = field->layout_type();
+  ciType* field_klass = field->type();
+
+  const Type *type;
+  if( bt == T_OBJECT ) {
+    if (!field->type()->is_loaded()) {
+      type = TypeInstPtr::BOTTOM;
+    } else if (field->is_constant()) {
+      // This can happen if the constant oop is non-perm.
+      ciObject* con = field->constant_value().as_object();
+      // Do not "join" in the previous type; it doesn't add value,
+      // and may yield a vacuous result if the field is of interface type.
+      type = TypeOopPtr::make_from_constant(con)->isa_oopptr();
+      assert(type != NULL, "field singleton type must be consistent");
+    } else {
+      type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+    }
+  } else {
+    type = Type::get_const_basic_type(bt);
+  }
+
+  return kit.make_load(NULL, kit.basic_plus_adr(klass_node, field->offset_in_bytes()),
+                       type, T_OBJECT,
+                       C->get_alias_index(klass_type->add_offset(field->offset_in_bytes())));
+}
+
+Node* PhaseStringOpts::int_stringSize(GraphKit& kit, Node* arg) {
+  RegionNode *final_merge = new (C, 3) RegionNode(3);
+  kit.gvn().set_type(final_merge, Type::CONTROL);
+  Node* final_size = new (C, 3) PhiNode(final_merge, TypeInt::INT);
+  kit.gvn().set_type(final_size, TypeInt::INT);
+
+  IfNode* iff = kit.create_and_map_if(kit.control(),
+                                      __ Bool(__ CmpI(arg, __ intcon(0x80000000)), BoolTest::ne),
+                                      PROB_FAIR, COUNT_UNKNOWN);
+  Node* is_min = __ IfFalse(iff);
+  final_merge->init_req(1, is_min);
+  final_size->init_req(1, __ intcon(11));
+
+  kit.set_control(__ IfTrue(iff));
+  if (kit.stopped()) {
+    final_merge->init_req(2, C->top());
+    final_size->init_req(2, C->top());
+  } else {
+
+    // int size = (i < 0) ? stringSize(-i) + 1 : stringSize(i);
+    RegionNode *r = new (C, 3) RegionNode(3);
+    kit.gvn().set_type(r, Type::CONTROL);
+    Node *phi = new (C, 3) PhiNode(r, TypeInt::INT);
+    kit.gvn().set_type(phi, TypeInt::INT);
+    Node *size = new (C, 3) PhiNode(r, TypeInt::INT);
+    kit.gvn().set_type(size, TypeInt::INT);
+    Node* chk = __ CmpI(arg, __ intcon(0));
+    Node* p = __ Bool(chk, BoolTest::lt);
+    IfNode* iff = kit.create_and_map_if(kit.control(), p, PROB_FAIR, COUNT_UNKNOWN);
+    Node* lessthan = __ IfTrue(iff);
+    Node* greaterequal = __ IfFalse(iff);
+    r->init_req(1, lessthan);
+    phi->init_req(1, __ SubI(__ intcon(0), arg));
+    size->init_req(1, __ intcon(1));
+    r->init_req(2, greaterequal);
+    phi->init_req(2, arg);
+    size->init_req(2, __ intcon(0));
+    kit.set_control(r);
+    C->record_for_igvn(r);
+    C->record_for_igvn(phi);
+    C->record_for_igvn(size);
+
+    // for (int i=0; ; i++)
+    //   if (x <= sizeTable[i])
+    //     return i+1;
+    RegionNode *loop = new (C, 3) RegionNode(3);
+    loop->init_req(1, kit.control());
+    kit.gvn().set_type(loop, Type::CONTROL);
+
+    Node *index = new (C, 3) PhiNode(loop, TypeInt::INT);
+    index->init_req(1, __ intcon(0));
+    kit.gvn().set_type(index, TypeInt::INT);
+    kit.set_control(loop);
+    Node* sizeTable = fetch_static_field(kit, size_table_field);
+
+    Node* value = kit.load_array_element(NULL, sizeTable, index, TypeAryPtr::INTS);
+    C->record_for_igvn(value);
+    Node* limit = __ CmpI(phi, value);
+    Node* limitb = __ Bool(limit, BoolTest::le);
+    IfNode* iff2 = kit.create_and_map_if(kit.control(), limitb, PROB_MIN, COUNT_UNKNOWN);
+    Node* lessEqual = __ IfTrue(iff2);
+    Node* greater = __ IfFalse(iff2);
+
+    loop->init_req(2, greater);
+    index->init_req(2, __ AddI(index, __ intcon(1)));
+
+    kit.set_control(lessEqual);
+    C->record_for_igvn(loop);
+    C->record_for_igvn(index);
+
+    final_merge->init_req(2, kit.control());
+    final_size->init_req(2, __ AddI(__ AddI(index, size), __ intcon(1)));
+  }
+
+  kit.set_control(final_merge);
+  C->record_for_igvn(final_merge);
+  C->record_for_igvn(final_size);
+
+  return final_size;
+}
+
+void PhaseStringOpts::int_getChars(GraphKit& kit, Node* arg, Node* char_array, Node* start, Node* end) {
+  RegionNode *final_merge = new (C, 4) RegionNode(4);
+  kit.gvn().set_type(final_merge, Type::CONTROL);
+  Node *final_mem = PhiNode::make(final_merge, kit.memory(char_adr_idx), Type::MEMORY, TypeAryPtr::CHARS);
+  kit.gvn().set_type(final_mem, Type::MEMORY);
+
+  // need to handle Integer.MIN_VALUE specially because negating doesn't make it positive
+  {
+    // i == MIN_VALUE
+    IfNode* iff = kit.create_and_map_if(kit.control(),
+                                        __ Bool(__ CmpI(arg, __ intcon(0x80000000)), BoolTest::ne),
+                                        PROB_FAIR, COUNT_UNKNOWN);
+
+    Node* old_mem = kit.memory(char_adr_idx);
+
+    kit.set_control(__ IfFalse(iff));
+    if (kit.stopped()) {
+      // Statically not equal to MIN_VALUE so this path is dead
+      final_merge->init_req(3, kit.control());
+    } else {
+      copy_string(kit, __ makecon(TypeInstPtr::make(C->env()->the_min_jint_string())),
+                  char_array, start);
+      final_merge->init_req(3, kit.control());
+      final_mem->init_req(3, kit.memory(char_adr_idx));
+    }
+
+    kit.set_control(__ IfTrue(iff));
+    kit.set_memory(old_mem, char_adr_idx);
+  }
+
+
+  // Simplified version of Integer.getChars
+
+  // int q, r;
+  // int charPos = index;
+  Node* charPos = end;
+
+  // char sign = 0;
+
+  Node* i = arg;
+  Node* sign = __ intcon(0);
+
+  // if (i < 0) {
+  //     sign = '-';
+  //     i = -i;
+  // }
+  {
+    IfNode* iff = kit.create_and_map_if(kit.control(),
+                                        __ Bool(__ CmpI(arg, __ intcon(0)), BoolTest::lt),
+                                        PROB_FAIR, COUNT_UNKNOWN);
+
+    RegionNode *merge = new (C, 3) RegionNode(3);
+    kit.gvn().set_type(merge, Type::CONTROL);
+    i = new (C, 3) PhiNode(merge, TypeInt::INT);
+    kit.gvn().set_type(i, TypeInt::INT);
+    sign = new (C, 3) PhiNode(merge, TypeInt::INT);
+    kit.gvn().set_type(sign, TypeInt::INT);
+
+    merge->init_req(1, __ IfTrue(iff));
+    i->init_req(1, __ SubI(__ intcon(0), arg));
+    sign->init_req(1, __ intcon('-'));
+    merge->init_req(2, __ IfFalse(iff));
+    i->init_req(2, arg);
+    sign->init_req(2, __ intcon(0));
+
+    kit.set_control(merge);
+
+    C->record_for_igvn(merge);
+    C->record_for_igvn(i);
+    C->record_for_igvn(sign);
+  }
+
+  // for (;;) {
+  //     q = i / 10;
+  //     r = i - ((q << 3) + (q << 1));  // r = i-(q*10) ...
+  //     buf [--charPos] = digits [r];
+  //     i = q;
+  //     if (i == 0) break;
+  // }
+
+  {
+    RegionNode *head = new (C, 3) RegionNode(3);
+    head->init_req(1, kit.control());
+    kit.gvn().set_type(head, Type::CONTROL);
+    Node *i_phi = new (C, 3) PhiNode(head, TypeInt::INT);
+    i_phi->init_req(1, i);
+    kit.gvn().set_type(i_phi, TypeInt::INT);
+    charPos = PhiNode::make(head, charPos);
+    kit.gvn().set_type(charPos, TypeInt::INT);
+    Node *mem = PhiNode::make(head, kit.memory(char_adr_idx), Type::MEMORY, TypeAryPtr::CHARS);
+    kit.gvn().set_type(mem, Type::MEMORY);
+    kit.set_control(head);
+    kit.set_memory(mem, char_adr_idx);
+
+    Node* q = __ DivI(kit.null(), i_phi, __ intcon(10));
+    Node* r = __ SubI(i_phi, __ AddI(__ LShiftI(q, __ intcon(3)),
+                                     __ LShiftI(q, __ intcon(1))));
+    Node* m1 = __ SubI(charPos, __ intcon(1));
+    Node* ch = __ AddI(r, __ intcon('0'));
+
+    Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR),
+                                  ch, T_CHAR, char_adr_idx);
+
+
+    IfNode* iff = kit.create_and_map_if(head, __ Bool(__ CmpI(q, __ intcon(0)), BoolTest::ne),
+                                        PROB_FAIR, COUNT_UNKNOWN);
+    Node* ne = __ IfTrue(iff);
+    Node* eq = __ IfFalse(iff);
+
+    head->init_req(2, ne);
+    mem->init_req(2, st);
+    i_phi->init_req(2, q);
+    charPos->init_req(2, m1);
+
+    charPos = m1;
+
+    kit.set_control(eq);
+    kit.set_memory(st, char_adr_idx);
+
+    C->record_for_igvn(head);
+    C->record_for_igvn(mem);
+    C->record_for_igvn(i_phi);
+    C->record_for_igvn(charPos);
+  }
+
+  {
+    // if (sign != 0) {
+    //     buf [--charPos] = sign;
+    // }
+    IfNode* iff = kit.create_and_map_if(kit.control(),
+                                        __ Bool(__ CmpI(sign, __ intcon(0)), BoolTest::ne),
+                                        PROB_FAIR, COUNT_UNKNOWN);
+
+    final_merge->init_req(2, __ IfFalse(iff));
+    final_mem->init_req(2, kit.memory(char_adr_idx));
+
+    kit.set_control(__ IfTrue(iff));
+    if (kit.stopped()) {
+      final_merge->init_req(1, C->top());
+      final_mem->init_req(1, C->top());
+    } else {
+      Node* m1 = __ SubI(charPos, __ intcon(1));
+      Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR),
+                                    sign, T_CHAR, char_adr_idx);
+
+      final_merge->init_req(1, kit.control());
+      final_mem->init_req(1, st);
+    }
+
+    kit.set_control(final_merge);
+    kit.set_memory(final_mem, char_adr_idx);
+
+    C->record_for_igvn(final_merge);
+    C->record_for_igvn(final_mem);
+  }
+}
+
+
+Node* PhaseStringOpts::copy_string(GraphKit& kit, Node* str, Node* char_array, Node* start) {
+  Node* string = str;
+  Node* offset = kit.make_load(NULL,
+                               kit.basic_plus_adr(string, string, java_lang_String::offset_offset_in_bytes()),
+                               TypeInt::INT, T_INT, offset_field_idx);
+  Node* count = kit.make_load(NULL,
+                              kit.basic_plus_adr(string, string, java_lang_String::count_offset_in_bytes()),
+                              TypeInt::INT, T_INT, count_field_idx);
+  const TypeAryPtr*  value_type = TypeAryPtr::make(TypePtr::NotNull,
+                                                   TypeAry::make(TypeInt::CHAR,TypeInt::POS),
+                                                   ciTypeArrayKlass::make(T_CHAR), true, 0);
+  Node* value = kit.make_load(NULL,
+                              kit.basic_plus_adr(string, string, java_lang_String::value_offset_in_bytes()),
+                              value_type, T_OBJECT, value_field_idx);
+
+  // copy the contents
+  if (offset->is_Con() && count->is_Con() && value->is_Con() && count->get_int() < unroll_string_copy_length) {
+    // For small constant strings just emit individual stores.
+    // A length of 6 seems like a good space/speed tradeof.
+    int c = count->get_int();
+    int o = offset->get_int();
+    const TypeOopPtr* t = kit.gvn().type(value)->isa_oopptr();
+    ciTypeArray* value_array = t->const_oop()->as_type_array();
+    for (int e = 0; e < c; e++) {
+      __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
+                         __ intcon(value_array->char_at(o + e)), T_CHAR, char_adr_idx);
+      start = __ AddI(start, __ intcon(1));
+    }
+  } else {
+    Node* src_ptr = kit.array_element_address(value, offset, T_CHAR);
+    Node* dst_ptr = kit.array_element_address(char_array, start, T_CHAR);
+    Node* c = count;
+    Node* extra = NULL;
+#ifdef _LP64
+    c = __ ConvI2L(c);
+    extra = C->top();
+#endif
+    Node* call = kit.make_runtime_call(GraphKit::RC_LEAF|GraphKit::RC_NO_FP,
+                                       OptoRuntime::fast_arraycopy_Type(),
+                                       CAST_FROM_FN_PTR(address, StubRoutines::jshort_disjoint_arraycopy()),
+                                       "jshort_disjoint_arraycopy", TypeAryPtr::CHARS,
+                                       src_ptr, dst_ptr, c, extra);
+    start = __ AddI(start, count);
+  }
+  return start;
+}
+
+
+void PhaseStringOpts::replace_string_concat(StringConcat* sc) {
+  // Log a little info about the transformation
+  sc->maybe_log_transform();
+
+  // pull the JVMState of the allocation into a SafePointNode to serve as
+  // as a shim for the insertion of the new code.
+  JVMState* jvms     = sc->begin()->jvms()->clone_shallow(C);
+  uint size = sc->begin()->req();
+  SafePointNode* map = new (C, size) SafePointNode(size, jvms);
+
+  // copy the control and memory state from the final call into our
+  // new starting state.  This allows any preceeding tests to feed
+  // into the new section of code.
+  for (uint i1 = 0; i1 < TypeFunc::Parms; i1++) {
+    map->init_req(i1, sc->end()->in(i1));
+  }
+  // blow away old allocation arguments
+  for (uint i1 = TypeFunc::Parms; i1 < jvms->debug_start(); i1++) {
+    map->init_req(i1, C->top());
+  }
+  // Copy the rest of the inputs for the JVMState
+  for (uint i1 = jvms->debug_start(); i1 < sc->begin()->req(); i1++) {
+    map->init_req(i1, sc->begin()->in(i1));
+  }
+  // Make sure the memory state is a MergeMem for parsing.
+  if (!map->in(TypeFunc::Memory)->is_MergeMem()) {
+    map->set_req(TypeFunc::Memory, MergeMemNode::make(C, map->in(TypeFunc::Memory)));
+  }
+
+  jvms->set_map(map);
+  map->ensure_stack(jvms, jvms->method()->max_stack());
+
+
+  // disconnect all the old StringBuilder calls from the graph
+  sc->eliminate_unneeded_control();
+
+  // At this point all the old work has been completely removed from
+  // the graph and the saved JVMState exists at the point where the
+  // final toString call used to be.
+  GraphKit kit(jvms);
+
+  // There may be uncommon traps which are still using the
+  // intermediate states and these need to be rewritten to point at
+  // the JVMState at the beginning of the transformation.
+  sc->convert_uncommon_traps(kit, jvms);
+
+  // Now insert the logic to compute the size of the string followed
+  // by all the logic to construct array and resulting string.
+
+  Node* null_string = __ makecon(TypeInstPtr::make(C->env()->the_null_string()));
+
+  // Create a region for the overflow checks to merge into.
+  int args = MAX2(sc->num_arguments(), 1);
+  RegionNode* overflow = new (C, args) RegionNode(args);
+  kit.gvn().set_type(overflow, Type::CONTROL);
+
+  // Create a hook node to hold onto the individual sizes since they
+  // are need for the copying phase.
+  Node* string_sizes = new (C, args) Node(args);
+
+  Node* length = __ intcon(0);
+  for (int argi = 0; argi < sc->num_arguments(); argi++) {
+    Node* arg = sc->argument(argi);
+    switch (sc->mode(argi)) {
+      case StringConcat::IntMode: {
+        Node* string_size = int_stringSize(kit, arg);
+
+        // accumulate total
+        length = __ AddI(length, string_size);
+
+        // Cache this value for the use by int_toString
+        string_sizes->init_req(argi, string_size);
+        break;
+      }
+      case StringConcat::StringMode: {
+        const Type* type = kit.gvn().type(arg);
+        if (type == TypePtr::NULL_PTR) {
+          // replace the argument with the null checked version
+          arg = null_string;
+          sc->set_argument(argi, arg);
+        } else if (!type->higher_equal(TypeInstPtr::NOTNULL)) {
+          // s = s != null ? s : "null";
+          // length = length + (s.count - s.offset);
+          RegionNode *r = new (C, 3) RegionNode(3);
+          kit.gvn().set_type(r, Type::CONTROL);
+          Node *phi = new (C, 3) PhiNode(r, type->join(TypeInstPtr::NOTNULL));
+          kit.gvn().set_type(phi, phi->bottom_type());
+          Node* p = __ Bool(__ CmpP(arg, kit.null()), BoolTest::ne);
+          IfNode* iff = kit.create_and_map_if(kit.control(), p, PROB_MIN, COUNT_UNKNOWN);
+          Node* notnull = __ IfTrue(iff);
+          Node* isnull =  __ IfFalse(iff);
+          r->init_req(1, notnull);
+          phi->init_req(1, arg);
+          r->init_req(2, isnull);
+          phi->init_req(2, null_string);
+          kit.set_control(r);
+          C->record_for_igvn(r);
+          C->record_for_igvn(phi);
+          // replace the argument with the null checked version
+          arg = phi;
+          sc->set_argument(argi, arg);
+        }
+        //         Node* offset = kit.make_load(NULL, kit.basic_plus_adr(arg, arg, offset_offset),
+        //                                      TypeInt::INT, T_INT, offset_field_idx);
+        Node* count = kit.make_load(NULL, kit.basic_plus_adr(arg, arg, java_lang_String::count_offset_in_bytes()),
+                                    TypeInt::INT, T_INT, count_field_idx);
+        length = __ AddI(length, count);
+        string_sizes->init_req(argi, NULL);
+        break;
+      }
+      case StringConcat::CharMode: {
+        // one character only
+        length = __ AddI(length, __ intcon(1));
+        break;
+      }
+      default:
+        ShouldNotReachHere();
+    }
+    if (argi > 0) {
+      // Check that the sum hasn't overflowed
+      IfNode* iff = kit.create_and_map_if(kit.control(),
+                                          __ Bool(__ CmpI(length, __ intcon(0)), BoolTest::lt),
+                                          PROB_MIN, COUNT_UNKNOWN);
+      kit.set_control(__ IfFalse(iff));
+      overflow->set_req(argi, __ IfTrue(iff));
+    }
+  }
+
+  {
+    // Hook
+    PreserveJVMState pjvms(&kit);
+    kit.set_control(overflow);
+    kit.uncommon_trap(Deoptimization::Reason_intrinsic,
+                      Deoptimization::Action_make_not_entrant);
+  }
+
+  // length now contains the number of characters needed for the
+  // char[] so create a new AllocateArray for the char[]
+  Node* char_array = NULL;
+  {
+    PreserveReexecuteState preexecs(&kit);
+    // The original jvms is for an allocation of either a String or
+    // StringBuffer so no stack adjustment is necessary for proper
+    // reexecution.  If we deoptimize in the slow path the bytecode
+    // will be reexecuted and the char[] allocation will be thrown away.
+    kit.jvms()->set_should_reexecute(true);
+    char_array = kit.new_array(__ makecon(TypeKlassPtr::make(ciTypeArrayKlass::make(T_CHAR))),
+                               length, 1);
+  }
+
+  // Mark the allocation so that zeroing is skipped since the code
+  // below will overwrite the entire array
+  AllocateArrayNode* char_alloc = AllocateArrayNode::Ideal_array_allocation(char_array, _gvn);
+  char_alloc->maybe_set_complete(_gvn);
+
+  // Now copy the string representations into the final char[]
+  Node* start = __ intcon(0);
+  for (int argi = 0; argi < sc->num_arguments(); argi++) {
+    Node* arg = sc->argument(argi);
+    switch (sc->mode(argi)) {
+      case StringConcat::IntMode: {
+        Node* end = __ AddI(start, string_sizes->in(argi));
+        // getChars words backwards so pass the ending point as well as the start
+        int_getChars(kit, arg, char_array, start, end);
+        start = end;
+        break;
+      }
+      case StringConcat::StringMode: {
+        start = copy_string(kit, arg, char_array, start);
+        break;
+      }
+      case StringConcat::CharMode: {
+        __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR),
+                           arg, T_CHAR, char_adr_idx);
+        start = __ AddI(start, __ intcon(1));
+        break;
+      }
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  // If we're not reusing an existing String allocation then allocate one here.
+  Node* result = sc->string_alloc();
+  if (result == NULL) {
+    PreserveReexecuteState preexecs(&kit);
+    // The original jvms is for an allocation of either a String or
+    // StringBuffer so no stack adjustment is necessary for proper
+    // reexecution.
+    kit.jvms()->set_should_reexecute(true);
+    result = kit.new_instance(__ makecon(TypeKlassPtr::make(C->env()->String_klass())));
+  }
+
+  // Intialize the string
+  kit.store_to_memory(kit.control(), kit.basic_plus_adr(result, java_lang_String::offset_offset_in_bytes()),
+                      __ intcon(0), T_INT, offset_field_idx);
+  kit.store_to_memory(kit.control(), kit.basic_plus_adr(result, java_lang_String::count_offset_in_bytes()),
+                      length, T_INT, count_field_idx);
+  kit.store_to_memory(kit.control(), kit.basic_plus_adr(result, java_lang_String::value_offset_in_bytes()),
+                      char_array, T_OBJECT, value_field_idx);
+
+  // hook up the outgoing control and result
+  kit.replace_call(sc->end(), result);
+
+  // Unhook any hook nodes
+  string_sizes->disconnect_inputs(NULL);
+  sc->cleanup();
+}
diff --git a/src/share/vm/opto/stringopts.hpp b/src/share/vm/opto/stringopts.hpp
new file mode 100644
index 000000000..417e08a91
--- /dev/null
+++ b/src/share/vm/opto/stringopts.hpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class StringConcat;
+
+class PhaseStringOpts : public Phase {
+  friend class StringConcat;
+
+ private:
+  PhaseGVN* _gvn;
+
+  // List of dead nodes to clean up aggressively at the end
+  Unique_Node_List dead_worklist;
+
+  // Memory slices needed for code gen
+  int char_adr_idx;
+  int value_field_idx;
+  int count_field_idx;
+  int offset_field_idx;
+
+  // Integer.sizeTable - used for int to String conversion
+  ciField* size_table_field;
+
+  // A set for use by various stages
+  VectorSet _visited;
+
+  // Collect a list of all SB.toString calls
+  Node_List collect_toString_calls();
+
+  // Examine the use of the SB alloc to see if it can be replace with
+  // a single string construction.
+  StringConcat* build_candidate(CallStaticJavaNode* call);
+
+  // Replace all the SB calls in concat with an optimization String allocation
+  void replace_string_concat(StringConcat* concat);
+
+  // Load the value of a static field, performing any constant folding.
+  Node* fetch_static_field(GraphKit& kit, ciField* field);
+
+  // Compute the number of characters required to represent the int value
+  Node* int_stringSize(GraphKit& kit, Node* value);
+
+  // Copy the characters representing value into char_array starting at start
+  void int_getChars(GraphKit& kit, Node* value, Node* char_array, Node* start, Node* end);
+
+  // Copy of the contents of the String str into char_array starting at index start.
+  Node* copy_string(GraphKit& kit, Node* str, Node* char_array, Node* start);
+
+  // Clean up any leftover nodes
+  void record_dead_node(Node* node);
+  void remove_dead_nodes();
+
+  PhaseGVN* gvn() { return _gvn; }
+
+  enum {
+    // max length of constant string copy unrolling in copy_string
+    unroll_string_copy_length = 6
+  };
+
+ public:
+  PhaseStringOpts(PhaseGVN* gvn, Unique_Node_List* worklist);
+};
diff --git a/src/share/vm/opto/type.hpp b/src/share/vm/opto/type.hpp
index 9b11f9ca5..03f81532c 100644
--- a/src/share/vm/opto/type.hpp
+++ b/src/share/vm/opto/type.hpp
@@ -847,9 +847,6 @@ public:
   // Constant pointer to array
   static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = InstanceBot);
 
-  // Convenience
-  static const TypeAryPtr *make(ciObject* o);
-
   // Return a 'ptr' version of this type
   virtual const Type *cast_to_ptr_type(PTR ptr) const;
 
diff --git a/src/share/vm/runtime/globals.cpp b/src/share/vm/runtime/globals.cpp
index 71fec5524..03e068dea 100644
--- a/src/share/vm/runtime/globals.cpp
+++ b/src/share/vm/runtime/globals.cpp
@@ -46,7 +46,8 @@ bool Flag::is_unlocker() const {
 bool Flag::is_unlocked() const {
   if (strcmp(kind, "{diagnostic}") == 0) {
     return UnlockDiagnosticVMOptions;
-  } else if (strcmp(kind, "{experimental}") == 0) {
+  } else if (strcmp(kind, "{experimental}") == 0 ||
+             strcmp(kind, "{C2 experimental}") == 0) {
     return UnlockExperimentalVMOptions;
   } else {
     return true;
@@ -169,6 +170,7 @@ void Flag::print_as_flag(outputStream* st) {
 #define C2_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{C2 product}", DEFAULT },
 #define C2_PD_PRODUCT_FLAG_STRUCT(type, name, doc)     { #type, XSTR(name), &name, "{C2 pd product}", DEFAULT },
 #define C2_DIAGNOSTIC_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{C2 diagnostic}", DEFAULT },
+#define C2_EXPERIMENTAL_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name, "{C2 experimental}", DEFAULT },
 #ifdef PRODUCT
   #define C2_DEVELOP_FLAG_STRUCT(type, name, value, doc) /* flag is constant */
   #define C2_PD_DEVELOP_FLAG_STRUCT(type, name, doc)     /* flag is constant */
@@ -190,7 +192,7 @@ static Flag flagTable[] = {
  C1_FLAGS(C1_DEVELOP_FLAG_STRUCT, C1_PD_DEVELOP_FLAG_STRUCT, C1_PRODUCT_FLAG_STRUCT, C1_PD_PRODUCT_FLAG_STRUCT, C1_NOTPRODUCT_FLAG_STRUCT)
 #endif
 #ifdef COMPILER2
- C2_FLAGS(C2_DEVELOP_FLAG_STRUCT, C2_PD_DEVELOP_FLAG_STRUCT, C2_PRODUCT_FLAG_STRUCT, C2_PD_PRODUCT_FLAG_STRUCT, C2_DIAGNOSTIC_FLAG_STRUCT, C2_NOTPRODUCT_FLAG_STRUCT)
+ C2_FLAGS(C2_DEVELOP_FLAG_STRUCT, C2_PD_DEVELOP_FLAG_STRUCT, C2_PRODUCT_FLAG_STRUCT, C2_PD_PRODUCT_FLAG_STRUCT, C2_DIAGNOSTIC_FLAG_STRUCT, C2_EXPERIMENTAL_FLAG_STRUCT, C2_NOTPRODUCT_FLAG_STRUCT)
 #endif
  {0, NULL, NULL}
 };
diff --git a/src/share/vm/runtime/globals_extension.hpp b/src/share/vm/runtime/globals_extension.hpp
index 35844b7d2..f32cab1cc 100644
--- a/src/share/vm/runtime/globals_extension.hpp
+++ b/src/share/vm/runtime/globals_extension.hpp
@@ -64,6 +64,7 @@
 #define C2_PRODUCT_FLAG_MEMBER(type, name, value, doc)         FLAG_MEMBER(name),
 #define C2_PD_PRODUCT_FLAG_MEMBER(type, name, doc)             FLAG_MEMBER(name),
 #define C2_DIAGNOSTIC_FLAG_MEMBER(type, name, value, doc)      FLAG_MEMBER(name),
+#define C2_EXPERIMENTAL_FLAG_MEMBER(type, name, value, doc)    FLAG_MEMBER(name),
 #ifdef PRODUCT
   #define C2_DEVELOP_FLAG_MEMBER(type, name, value, doc)       /* flag is constant */
   #define C2_PD_DEVELOP_FLAG_MEMBER(type, name, doc)           /* flag is constant */
@@ -84,7 +85,7 @@ typedef enum {
  C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, C1_PD_DEVELOP_FLAG_MEMBER, C1_PRODUCT_FLAG_MEMBER, C1_PD_PRODUCT_FLAG_MEMBER, C1_NOTPRODUCT_FLAG_MEMBER)
 #endif
 #ifdef COMPILER2
- C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
+ C2_FLAGS(C2_DEVELOP_FLAG_MEMBER, C2_PD_DEVELOP_FLAG_MEMBER, C2_PRODUCT_FLAG_MEMBER, C2_PD_PRODUCT_FLAG_MEMBER, C2_DIAGNOSTIC_FLAG_MEMBER, C2_EXPERIMENTAL_FLAG_MEMBER, C2_NOTPRODUCT_FLAG_MEMBER)
 #endif
  NUM_CommandLineFlag
 } CommandLineFlag;
@@ -130,6 +131,7 @@ typedef enum {
 #define C2_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)         FLAG_MEMBER_WITH_TYPE(name,type),
 #define C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, doc)             FLAG_MEMBER_WITH_TYPE(name,type),
 #define C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)      FLAG_MEMBER_WITH_TYPE(name,type),
+#define C2_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)      FLAG_MEMBER_WITH_TYPE(name,type),
 #ifdef PRODUCT
   #define C2_DEVELOP_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)       /* flag is constant */
   #define C2_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE(type, name, doc)           /* flag is constant */
@@ -181,6 +183,7 @@ typedef enum {
           C2_PRODUCT_FLAG_MEMBER_WITH_TYPE,
           C2_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
           C2_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
+          C2_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE,
           C2_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE)
 #endif
  NUM_CommandLineFlagWithType
diff --git a/src/share/vm/services/g1MemoryPool.cpp b/src/share/vm/services/g1MemoryPool.cpp
new file mode 100644
index 000000000..7b73f7c79
--- /dev/null
+++ b/src/share/vm/services/g1MemoryPool.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_g1MemoryPool.cpp.incl"
+
+G1MemoryPoolSuper::G1MemoryPoolSuper(G1CollectedHeap* g1h,
+                                     const char* name,
+                                     size_t init_size,
+                                     size_t max_size,
+                                     bool support_usage_threshold) :
+  _g1h(g1h), CollectedMemoryPool(name,
+                                 MemoryPool::Heap,
+                                 init_size,
+                                 max_size,
+                                 support_usage_threshold) {
+  assert(UseG1GC, "sanity");
+}
+
+// See the comment at the top of g1MemoryPool.hpp
+size_t G1MemoryPoolSuper::eden_space_committed(G1CollectedHeap* g1h) {
+  return MAX2(eden_space_used(g1h), (size_t) HeapRegion::GrainBytes);
+}
+
+// See the comment at the top of g1MemoryPool.hpp
+size_t G1MemoryPoolSuper::eden_space_used(G1CollectedHeap* g1h) {
+  size_t young_list_length = g1h->young_list_length();
+  size_t eden_used = young_list_length * HeapRegion::GrainBytes;
+  size_t survivor_used = survivor_space_used(g1h);
+  eden_used = subtract_up_to_zero(eden_used, survivor_used);
+  return eden_used;
+}
+
+// See the comment at the top of g1MemoryPool.hpp
+size_t G1MemoryPoolSuper::eden_space_max(G1CollectedHeap* g1h) {
+  // This should ensure that it returns a value no smaller than the
+  // region size. Currently, eden_space_committed() guarantees that.
+  return eden_space_committed(g1h);
+}
+
+// See the comment at the top of g1MemoryPool.hpp
+size_t G1MemoryPoolSuper::survivor_space_committed(G1CollectedHeap* g1h) {
+  return MAX2(survivor_space_used(g1h), (size_t) HeapRegion::GrainBytes);
+}
+
+// See the comment at the top of g1MemoryPool.hpp
+size_t G1MemoryPoolSuper::survivor_space_used(G1CollectedHeap* g1h) {
+  size_t survivor_num = g1h->g1_policy()->recorded_survivor_regions();
+  size_t survivor_used = survivor_num * HeapRegion::GrainBytes;
+  return survivor_used;
+}
+
+// See the comment at the top of g1MemoryPool.hpp
+size_t G1MemoryPoolSuper::survivor_space_max(G1CollectedHeap* g1h) {
+  // This should ensure that it returns a value no smaller than the
+  // region size. Currently, survivor_space_committed() guarantees that.
+  return survivor_space_committed(g1h);
+}
+
+// See the comment at the top of g1MemoryPool.hpp
+size_t G1MemoryPoolSuper::old_space_committed(G1CollectedHeap* g1h) {
+  size_t committed = overall_committed(g1h);
+  size_t eden_committed = eden_space_committed(g1h);
+  size_t survivor_committed = survivor_space_committed(g1h);
+  committed = subtract_up_to_zero(committed, eden_committed);
+  committed = subtract_up_to_zero(committed, survivor_committed);
+  committed = MAX2(committed, (size_t) HeapRegion::GrainBytes);
+  return committed;
+}
+
+// See the comment at the top of g1MemoryPool.hpp
+size_t G1MemoryPoolSuper::old_space_used(G1CollectedHeap* g1h) {
+  size_t used = overall_used(g1h);
+  size_t eden_used = eden_space_used(g1h);
+  size_t survivor_used = survivor_space_used(g1h);
+  used = subtract_up_to_zero(used, eden_used);
+  used = subtract_up_to_zero(used, survivor_used);
+  return used;
+}
+
+// See the comment at the top of g1MemoryPool.hpp
+size_t G1MemoryPoolSuper::old_space_max(G1CollectedHeap* g1h) {
+  size_t max = overall_max(g1h);
+  size_t eden_max = eden_space_max(g1h);
+  size_t survivor_max = survivor_space_max(g1h);
+  max = subtract_up_to_zero(max, eden_max);
+  max = subtract_up_to_zero(max, survivor_max);
+  max = MAX2(max, (size_t) HeapRegion::GrainBytes);
+  return max;
+}
+
+G1EdenPool::G1EdenPool(G1CollectedHeap* g1h) :
+  G1MemoryPoolSuper(g1h,
+                    "G1 Eden",
+                    eden_space_committed(g1h), /* init_size */
+                    eden_space_max(g1h), /* max_size */
+                    false /* support_usage_threshold */) {
+}
+
+MemoryUsage G1EdenPool::get_memory_usage() {
+  size_t initial_sz = initial_size();
+  size_t max_sz     = max_size();
+  size_t used       = used_in_bytes();
+  size_t committed  = eden_space_committed(_g1h);
+
+  return MemoryUsage(initial_sz, used, committed, max_sz);
+}
+
+G1SurvivorPool::G1SurvivorPool(G1CollectedHeap* g1h) :
+  G1MemoryPoolSuper(g1h,
+                    "G1 Survivor",
+                    survivor_space_committed(g1h), /* init_size */
+                    survivor_space_max(g1h), /* max_size */
+                    false /* support_usage_threshold */) {
+}
+
+MemoryUsage G1SurvivorPool::get_memory_usage() {
+  size_t initial_sz = initial_size();
+  size_t max_sz     = max_size();
+  size_t used       = used_in_bytes();
+  size_t committed  = survivor_space_committed(_g1h);
+
+  return MemoryUsage(initial_sz, used, committed, max_sz);
+}
+
+G1OldGenPool::G1OldGenPool(G1CollectedHeap* g1h) :
+  G1MemoryPoolSuper(g1h,
+                    "G1 Old Gen",
+                    old_space_committed(g1h), /* init_size */
+                    old_space_max(g1h), /* max_size */
+                    true /* support_usage_threshold */) {
+}
+
+MemoryUsage G1OldGenPool::get_memory_usage() {
+  size_t initial_sz = initial_size();
+  size_t max_sz     = max_size();
+  size_t used       = used_in_bytes();
+  size_t committed  = old_space_committed(_g1h);
+
+  return MemoryUsage(initial_sz, used, committed, max_sz);
+}
diff --git a/src/share/vm/services/g1MemoryPool.hpp b/src/share/vm/services/g1MemoryPool.hpp
new file mode 100644
index 000000000..cfc61e5f1
--- /dev/null
+++ b/src/share/vm/services/g1MemoryPool.hpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class G1CollectedHeap;
+
+// This file contains the three classes that represent the memory
+// pools of the G1 spaces: G1EdenPool, G1SurvivorPool, and
+// G1OldGenPool. In G1, unlike our other GCs, we do not have a
+// physical space for each of those spaces. Instead, we allocate
+// regions for all three spaces out of a single pool of regions (that
+// pool basically covers the entire heap). As a result, the eden,
+// survivor, and old gen are considered logical spaces in G1, as each
+// is a set of non-contiguous regions. This is also reflected in the
+// way we map them to memory pools here. The easiest way to have done
+// this would have been to map the entire G1 heap to a single memory
+// pool. However, it's helpful to show how large the eden and survivor
+// get, as this does affect the performance and behavior of G1. Which
+// is why we introduce the three memory pools implemented here.
+//
+// The above approach inroduces a couple of challenging issues in the
+// implementation of the three memory pools:
+//
+// 1) The used space calculation for a pool is not necessarily
+// independent of the others. We can easily get from G1 the overall
+// used space in the entire heap, the number of regions in the young
+// generation (includes both eden and survivors), and the number of
+// survivor regions. So, from that we calculate:
+//
+//  survivor_used = survivor_num * region_size
+//  eden_used     = young_region_num * region_size - survivor_used
+//  old_gen_used  = overall_used - eden_used - survivor_used
+//
+// Note that survivor_used and eden_used are upper bounds. To get the
+// actual value we would have to iterate over the regions and add up
+// ->used(). But that'd be expensive. So, we'll accept some lack of
+// accuracy for those two. But, we have to be careful when calculating
+// old_gen_used, in case we subtract from overall_used more then the
+// actual number and our result goes negative.
+//
+// 2) Calculating the used space is straightforward, as described
+// above. However, how do we calculate the committed space, given that
+// we allocate space for the eden, survivor, and old gen out of the
+// same pool of regions? One way to do this is to use the used value
+// as also the committed value for the eden and survivor spaces and
+// then calculate the old gen committed space as follows:
+//
+//  old_gen_committed = overall_committed - eden_committed - survivor_committed
+//
+// Maybe a better way to do that would be to calculate used for eden
+// and survivor as a sum of ->used() over their regions and then
+// calculate committed as region_num * region_size (i.e., what we use
+// to calculate the used space now). This is something to consider
+// in the future.
+//
+// 3) Another decision that is again not straightforward is what is
+// the max size that each memory pool can grow to. Right now, we set
+// that the committed size for the eden and the survivors and
+// calculate the old gen max as follows (basically, it's a similar
+// pattern to what we use for the committed space, as described
+// above):
+//
+//  old_gen_max = overall_max - eden_max - survivor_max
+//
+// 4) Now, there is a very subtle issue with all the above. The
+// framework will call get_memory_usage() on the three pools
+// asynchronously. As a result, each call might get a different value
+// for, say, survivor_num which will yield inconsistent values for
+// eden_used, survivor_used, and old_gen_used (as survivor_num is used
+// in the calculation of all three). This would normally be
+// ok. However, it's possible that this might cause the sum of
+// eden_used, survivor_used, and old_gen_used to go over the max heap
+// size and this seems to sometimes cause JConsole (and maybe other
+// clients) to get confused. There's not a really an easy / clean
+// solution to this problem, due to the asynchrounous nature of the
+// framework.
+
+
+// This class is shared by the three G1 memory pool classes
+// (G1EdenPool, G1SurvivorPool, G1OldGenPool). Given that the way we
+// calculate used / committed bytes for these three pools is related
+// (see comment above), we put the calculations in this class so that
+// we can easily share them among the subclasses.
+class G1MemoryPoolSuper : public CollectedMemoryPool {
+private:
+  // It returns x - y if x > y, 0 otherwise.
+  // As described in the comment above, some of the inputs to the
+  // calculations we have to do are obtained concurrently and hence
+  // may be inconsistent with each other. So, this provides a
+  // defensive way of performing the subtraction and avoids the value
+  // going negative (which would mean a very large result, given that
+  // the parameter are size_t).
+  static size_t subtract_up_to_zero(size_t x, size_t y) {
+    if (x > y) {
+      return x - y;
+    } else {
+      return 0;
+    }
+  }
+
+protected:
+  G1CollectedHeap* _g1h;
+
+  // Would only be called from subclasses.
+  G1MemoryPoolSuper(G1CollectedHeap* g1h,
+                    const char* name,
+                    size_t init_size,
+                    size_t max_size,
+                    bool support_usage_threshold);
+
+  // The reason why all the code is in static methods is so that it
+  // can be safely called from the constructors of the subclasses.
+
+  static size_t overall_committed(G1CollectedHeap* g1h) {
+    return g1h->capacity();
+  }
+  static size_t overall_used(G1CollectedHeap* g1h) {
+    return g1h->used_unlocked();
+  }
+  static size_t overall_max(G1CollectedHeap* g1h) {
+    return g1h->g1_reserved_obj_bytes();
+  }
+
+  static size_t eden_space_committed(G1CollectedHeap* g1h);
+  static size_t eden_space_used(G1CollectedHeap* g1h);
+  static size_t eden_space_max(G1CollectedHeap* g1h);
+
+  static size_t survivor_space_committed(G1CollectedHeap* g1h);
+  static size_t survivor_space_used(G1CollectedHeap* g1h);
+  static size_t survivor_space_max(G1CollectedHeap* g1h);
+
+  static size_t old_space_committed(G1CollectedHeap* g1h);
+  static size_t old_space_used(G1CollectedHeap* g1h);
+  static size_t old_space_max(G1CollectedHeap* g1h);
+};
+
+// Memory pool that represents the G1 eden.
+class G1EdenPool : public G1MemoryPoolSuper {
+public:
+  G1EdenPool(G1CollectedHeap* g1h);
+
+  size_t used_in_bytes() {
+    return eden_space_used(_g1h);
+  }
+  size_t max_size() const {
+    return eden_space_max(_g1h);
+  }
+  MemoryUsage get_memory_usage();
+};
+
+// Memory pool that represents the G1 survivor.
+class G1SurvivorPool : public G1MemoryPoolSuper {
+public:
+  G1SurvivorPool(G1CollectedHeap* g1h);
+
+  size_t used_in_bytes() {
+    return survivor_space_used(_g1h);
+  }
+  size_t max_size() const {
+    return survivor_space_max(_g1h);
+  }
+  MemoryUsage get_memory_usage();
+};
+
+// Memory pool that represents the G1 old gen.
+class G1OldGenPool : public G1MemoryPoolSuper {
+public:
+  G1OldGenPool(G1CollectedHeap* g1h);
+
+  size_t used_in_bytes() {
+    return old_space_used(_g1h);
+  }
+  size_t max_size() const {
+    return old_space_max(_g1h);
+  }
+  MemoryUsage get_memory_usage();
+};
diff --git a/src/share/vm/services/memoryManager.cpp b/src/share/vm/services/memoryManager.cpp
index 7b7905c5a..f92ba14c1 100644
--- a/src/share/vm/services/memoryManager.cpp
+++ b/src/share/vm/services/memoryManager.cpp
@@ -72,6 +72,14 @@ GCMemoryManager* MemoryManager::get_psMarkSweep_memory_manager() {
   return (GCMemoryManager*) new PSMarkSweepMemoryManager();
 }
 
+GCMemoryManager* MemoryManager::get_g1YoungGen_memory_manager() {
+  return (GCMemoryManager*) new G1YoungGenMemoryManager();
+}
+
+GCMemoryManager* MemoryManager::get_g1OldGen_memory_manager() {
+  return (GCMemoryManager*) new G1OldGenMemoryManager();
+}
+
 instanceOop MemoryManager::get_memory_manager_instance(TRAPS) {
   // Must do an acquire so as to force ordering of subsequent
   // loads from anything _memory_mgr_obj points to or implies.
diff --git a/src/share/vm/services/memoryManager.hpp b/src/share/vm/services/memoryManager.hpp
index 4efc955eb..955ca8139 100644
--- a/src/share/vm/services/memoryManager.hpp
+++ b/src/share/vm/services/memoryManager.hpp
@@ -54,7 +54,9 @@ public:
     ParNew,
     ConcurrentMarkSweep,
     PSScavenge,
-    PSMarkSweep
+    PSMarkSweep,
+    G1YoungGen,
+    G1OldGen
   };
 
   MemoryManager();
@@ -85,6 +87,8 @@ public:
   static GCMemoryManager* get_cms_memory_manager();
   static GCMemoryManager* get_psScavenge_memory_manager();
   static GCMemoryManager* get_psMarkSweep_memory_manager();
+  static GCMemoryManager* get_g1YoungGen_memory_manager();
+  static GCMemoryManager* get_g1OldGen_memory_manager();
 
 };
 
@@ -231,3 +235,21 @@ public:
   MemoryManager::Name kind() { return MemoryManager::PSMarkSweep; }
   const char* name()         { return "PS MarkSweep"; }
 };
+
+class G1YoungGenMemoryManager : public GCMemoryManager {
+private:
+public:
+  G1YoungGenMemoryManager() : GCMemoryManager() {}
+
+  MemoryManager::Name kind() { return MemoryManager::G1YoungGen; }
+  const char* name()         { return "G1 Young Generation"; }
+};
+
+class G1OldGenMemoryManager : public GCMemoryManager {
+private:
+public:
+  G1OldGenMemoryManager() : GCMemoryManager() {}
+
+  MemoryManager::Name kind() { return MemoryManager::G1OldGen; }
+  const char* name()         { return "G1 Old Generation"; }
+};
diff --git a/src/share/vm/services/memoryService.cpp b/src/share/vm/services/memoryService.cpp
index 1d243828c..9941a6f0f 100644
--- a/src/share/vm/services/memoryService.cpp
+++ b/src/share/vm/services/memoryService.cpp
@@ -60,8 +60,8 @@ void MemoryService::set_universe_heap(CollectedHeap* heap) {
       break;
     }
     case CollectedHeap::G1CollectedHeap : {
-      G1CollectedHeap::g1_unimplemented();
-      return;
+      add_g1_heap_info(G1CollectedHeap::heap());
+      break;
     }
 #endif // SERIALGC
     default: {
@@ -164,6 +164,19 @@ void MemoryService::add_parallel_scavenge_heap_info(ParallelScavengeHeap* heap)
   add_psOld_memory_pool(heap->old_gen(), _major_gc_manager);
   add_psPerm_memory_pool(heap->perm_gen(), _major_gc_manager);
 }
+
+void MemoryService::add_g1_heap_info(G1CollectedHeap* g1h) {
+  assert(UseG1GC, "sanity");
+
+  _minor_gc_manager = MemoryManager::get_g1YoungGen_memory_manager();
+  _major_gc_manager = MemoryManager::get_g1OldGen_memory_manager();
+  _managers_list->append(_minor_gc_manager);
+  _managers_list->append(_major_gc_manager);
+
+  add_g1YoungGen_memory_pool(g1h, _major_gc_manager, _minor_gc_manager);
+  add_g1OldGen_memory_pool(g1h, _major_gc_manager);
+  add_g1PermGen_memory_pool(g1h, _major_gc_manager);
+}
 #endif // SERIALGC
 
 MemoryPool* MemoryService::add_gen(Generation* gen,
@@ -384,6 +397,64 @@ void MemoryService::add_psPerm_memory_pool(PSPermGen* gen, MemoryManager* mgr) {
   mgr->add_pool(perm_gen);
   _pools_list->append(perm_gen);
 }
+
+void MemoryService::add_g1YoungGen_memory_pool(G1CollectedHeap* g1h,
+                                               MemoryManager* major_mgr,
+                                               MemoryManager* minor_mgr) {
+  assert(major_mgr != NULL && minor_mgr != NULL, "should have two managers");
+
+  G1EdenPool* eden = new G1EdenPool(g1h);
+  G1SurvivorPool* survivor = new G1SurvivorPool(g1h);
+
+  major_mgr->add_pool(eden);
+  major_mgr->add_pool(survivor);
+  minor_mgr->add_pool(eden);
+  minor_mgr->add_pool(survivor);
+  _pools_list->append(eden);
+  _pools_list->append(survivor);
+}
+
+void MemoryService::add_g1OldGen_memory_pool(G1CollectedHeap* g1h,
+                                             MemoryManager* mgr) {
+  assert(mgr != NULL, "should have one manager");
+
+  G1OldGenPool* old_gen = new G1OldGenPool(g1h);
+  mgr->add_pool(old_gen);
+  _pools_list->append(old_gen);
+}
+
+void MemoryService::add_g1PermGen_memory_pool(G1CollectedHeap* g1h,
+                                              MemoryManager* mgr) {
+  assert(mgr != NULL, "should have one manager");
+
+  CompactingPermGenGen* perm_gen = (CompactingPermGenGen*) g1h->perm_gen();
+  PermanentGenerationSpec* spec = perm_gen->spec();
+  size_t max_size = spec->max_size() - spec->read_only_size()
+                                     - spec->read_write_size();
+  MemoryPool* pool = add_space(perm_gen->unshared_space(),
+                               "G1 Perm Gen",
+                               false, /* is_heap */
+                               max_size,
+                               true   /* support_usage_threshold */);
+  mgr->add_pool(pool);
+
+  // in case we support CDS in G1
+  if (UseSharedSpaces) {
+    pool = add_space(perm_gen->ro_space(),
+                     "G1 Perm Gen [shared-ro]",
+                     false, /* is_heap */
+                     spec->read_only_size(),
+                     true   /* support_usage_threshold */);
+    mgr->add_pool(pool);
+
+    pool = add_space(perm_gen->rw_space(),
+                     "G1 Perm Gen [shared-rw]",
+                     false, /* is_heap */
+                     spec->read_write_size(),
+                     true   /* support_usage_threshold */);
+    mgr->add_pool(pool);
+  }
+}
 #endif // SERIALGC
 
 void MemoryService::add_code_heap_memory_pool(CodeHeap* heap) {
diff --git a/src/share/vm/services/memoryService.hpp b/src/share/vm/services/memoryService.hpp
index 52b76a729..44823ae42 100644
--- a/src/share/vm/services/memoryService.hpp
+++ b/src/share/vm/services/memoryService.hpp
@@ -40,6 +40,7 @@ class GenCollectedHeap;
 class ParallelScavengeHeap;
 class CompactingPermGenGen;
 class CMSPermGenGen;
+class G1CollectedHeap;
 
 // VM Monitoring and Management Support
 
@@ -88,6 +89,13 @@ private:
   static void add_psPerm_memory_pool(PSPermGen* perm,
                                      MemoryManager* mgr);
 
+  static void add_g1YoungGen_memory_pool(G1CollectedHeap* g1h,
+                                         MemoryManager* major_mgr,
+                                         MemoryManager* minor_mgr);
+  static void add_g1OldGen_memory_pool(G1CollectedHeap* g1h,
+                                       MemoryManager* mgr);
+  static void add_g1PermGen_memory_pool(G1CollectedHeap* g1h,
+                                        MemoryManager* mgr);
 
   static MemoryPool* add_space(ContiguousSpace* space,
                                const char* name,
@@ -111,6 +119,7 @@ private:
 
   static void add_gen_collected_heap_info(GenCollectedHeap* heap);
   static void add_parallel_scavenge_heap_info(ParallelScavengeHeap* heap);
+  static void add_g1_heap_info(G1CollectedHeap* g1h);
 
 public:
   static void set_universe_heap(CollectedHeap* heap);
diff --git a/src/share/vm/utilities/growableArray.hpp b/src/share/vm/utilities/growableArray.hpp
index 77ce93c13..c52364666 100644
--- a/src/share/vm/utilities/growableArray.hpp
+++ b/src/share/vm/utilities/growableArray.hpp
@@ -278,6 +278,17 @@ template<class E> class GrowableArray : public GenericGrowableArray {
     _len--;
   }
 
+  // inserts the given element before the element at index i
+  void insert_before(const int idx, const E& elem) {
+    check_nesting();
+    if (_len == _max) grow(_len);
+    for (int j = _len - 1; j >= idx; j--) {
+      _data[j + 1] = _data[j];
+    }
+    _len++;
+    _data[idx] = elem;
+  }
+
   void appendAll(const GrowableArray<E>* l) {
     for (int i = 0; i < l->_len; i++) {
       raw_at_put_grow(_len, l->_data[i], 0);
diff --git a/src/share/vm/utilities/numberSeq.cpp b/src/share/vm/utilities/numberSeq.cpp
index 7cd06b28c..0c152cfde 100644
--- a/src/share/vm/utilities/numberSeq.cpp
+++ b/src/share/vm/utilities/numberSeq.cpp
@@ -241,3 +241,33 @@ double TruncatedSeq::predict_next() const {
 
   return b0 + b1 * num;
 }
+
+
+// Printing/Debugging Support
+
+void AbsSeq::dump() { dump_on(gclog_or_tty); }
+
+void AbsSeq::dump_on(outputStream* s) {
+  s->print_cr("\t _num = %d, _sum = %7.3f, _sum_of_squares = %7.3f",
+                  _num,      _sum,         _sum_of_squares);
+  s->print_cr("\t _davg = %7.3f, _dvariance = %7.3f, _alpha = %7.3f",
+                  _davg,         _dvariance,         _alpha);
+}
+
+void NumberSeq::dump_on(outputStream* s) {
+  AbsSeq::dump_on(s);
+  s->print_cr("\t\t _last = %7.3f, _maximum = %7.3f");
+}
+
+void TruncatedSeq::dump_on(outputStream* s) {
+  AbsSeq::dump_on(s);
+  s->print_cr("\t\t _length = %d, _next = %d", _length, _next);
+  for (int i = 0; i < _length; i++) {
+    if (i%5 == 0) {
+      s->cr();
+      s->print("\t");
+    }
+    s->print("\t[%d]=%7.3f", i, _sequence[i]);
+  }
+  s->print_cr("");
+}
diff --git a/src/share/vm/utilities/numberSeq.hpp b/src/share/vm/utilities/numberSeq.hpp
index 4366c8bf1..eb40b149b 100644
--- a/src/share/vm/utilities/numberSeq.hpp
+++ b/src/share/vm/utilities/numberSeq.hpp
@@ -74,6 +74,10 @@ public:
   double davg() const; // decaying average
   double dvariance() const; // decaying variance
   double dsd() const; // decaying "standard deviation"
+
+  // Debugging/Printing
+  virtual void dump();
+  virtual void dump_on(outputStream* s);
 };
 
 class NumberSeq: public AbsSeq {
@@ -91,6 +95,9 @@ public:
   virtual void add(double val);
   virtual double maximum() const { return _maximum; }
   virtual double last() const { return _last; }
+
+  // Debugging/Printing
+  virtual void dump_on(outputStream* s);
 };
 
 class TruncatedSeq: public AbsSeq {
@@ -114,4 +121,7 @@ public:
 
   double oldest() const; // the oldest valid value in the sequence
   double predict_next() const; // prediction based on linear regression
+
+  // Debugging/Printing
+  virtual void dump_on(outputStream* s);
 };
diff --git a/test/compiler/6895383/Test.java b/test/compiler/6895383/Test.java
new file mode 100644
index 000000000..719ba3113
--- /dev/null
+++ b/test/compiler/6895383/Test.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6895383
+ * @summary JCK test throws NPE for method compiled with Escape Analysis
+ *
+ * @run main/othervm -Xcomp Test
+ */
+
+public class Test {
+    public static void main(String argv[]) {
+        Test test = new Test();
+        test.testRemove1_IndexOutOfBounds();
+        test.testAddAll1_IndexOutOfBoundsException();
+    }
+
+    public void testRemove1_IndexOutOfBounds() {
+        CopyOnWriteArrayList c = new CopyOnWriteArrayList();
+    }
+
+    public void testAddAll1_IndexOutOfBoundsException() {
+        try {
+            CopyOnWriteArrayList c = new CopyOnWriteArrayList();
+            c.addAll(-1, new LinkedList()); // should throw IndexOutOfBoundsException
+        } catch (IndexOutOfBoundsException e) {
+        }
+    }
+}
diff --git a/test/compiler/6896727/Test.java b/test/compiler/6896727/Test.java
new file mode 100644
index 000000000..a9aef1d2b
--- /dev/null
+++ b/test/compiler/6896727/Test.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 6896727
+ * @summary nsk/logging/LoggingPermission/LoggingPermission/logperm002 fails with G1, EscapeAnalisys w/o COOPs
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -Xcomp -XX:+DoEscapeAnalysis -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC Test
+ */
+
+public class Test {
+
+    final static String testString = "abracadabra";
+    public static void main(String args[]) {
+        String params[][] = {
+            {"control", testString}
+        };
+        for (int i=0; i<params.length; i++) {
+            try {
+                System.out.println("Params :" + testString + " and " + params[i][0] + ", " + params[i][1]);
+                if (params[i][1] == null) {
+                    System.exit(97);
+                }
+            } catch (Exception e) {}
+        }
+    }
+}
diff --git a/test/compiler/6901572/Test.java b/test/compiler/6901572/Test.java
new file mode 100644
index 000000000..1139161b1
--- /dev/null
+++ b/test/compiler/6901572/Test.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+/**
+ * @test
+ * @bug 6901572
+ * @summary JVM 1.6.16 crash on loops: assert(has_node(i),"")
+ *
+ * @run main/othervm Test
+ */
+
+
+public class Test {
+
+    public static void main(String[] args) {
+        for (int i = 0; i < 2; i++)
+            NestedLoop();
+    }
+
+    public static long NestedLoop() {
+        final int n = 50;
+        long startTime = System.currentTimeMillis();
+        int x = 0;
+        for(int a = 0; a < n; a++)
+            for(int b = 0; b < n; b++)
+                for(int c = 0; c < n; c++)
+                    for(int d = 0; d < n; d++)
+                        for(int e = 0; e < n; e++)
+                            for(int f = 0; f < n; f++)
+                                x++;
+        long stopTime = System.currentTimeMillis();
+
+        return stopTime - startTime;
+    }
+}