Mergehs15-b01

author: trims <none@none> 2009-01-30 15:28:00 -0800
committer: trims <none@none> 2009-01-30 15:28:00 -0800
commit: d548fe68de5bb6ab73de7207d974066259394fb1 (patch)
tree: 9becd2db41d56e09aa1f1f36602229bc92846798
parent: 6f1aac16cca6ee43b5499cebfe54a070f0f01cbd (diff)
parent: 9f5a2de4ba2fb0411a5d2ebf596acbb0c51c8f18 (diff)
106 files changed, 1477 insertions, 544 deletions
diff --git a/agent/src/os/linux/ps_core.c b/agent/src/os/linux/ps_core.c
index 3562f2d31..a6156f0a8 100644
--- a/agent/src/os/linux/ps_core.c
+++ b/agent/src/os/linux/ps_core.c
@@ -238,8 +238,8 @@ struct FileMapHeader {
   // Ignore the rest of the FileMapHeader. We don't need those fields here.
 };
 
-static bool read_int(struct ps_prochandle* ph, uintptr_t addr, int* pvalue) {
-   int i;
+static bool read_jboolean(struct ps_prochandle* ph, uintptr_t addr, jboolean* pvalue) {
+   jboolean i;
    if (ps_pdread(ph, (psaddr_t) addr, &i, sizeof(i)) == PS_OK) {
       *pvalue = i;
       return true;
@@ -295,7 +295,7 @@ static bool init_classsharing_workaround(struct ps_prochandle* ph) {
          int fd = -1, m = 0;
          uintptr_t base = 0, useSharedSpacesAddr = 0;
          uintptr_t sharedArchivePathAddrAddr = 0, sharedArchivePathAddr = 0;
-         int useSharedSpaces = 0;
+         jboolean useSharedSpaces = 0;
          map_info* mi = 0;
 
          memset(classes_jsa, 0, sizeof(classes_jsa));
@@ -306,12 +306,15 @@ static bool init_classsharing_workaround(struct ps_prochandle* ph) {
             return false;
          }
 
-         if (read_int(ph, useSharedSpacesAddr, &useSharedSpaces) != true) {
+         // Hotspot vm types are not exported to build this library. So
+         // using equivalent type jboolean to read the value of
+         // UseSharedSpaces which is same as hotspot type "bool".
+         if (read_jboolean(ph, useSharedSpacesAddr, &useSharedSpaces) != true) {
             print_debug("can't read the value of 'UseSharedSpaces' flag\n");
             return false;
          }
 
-         if (useSharedSpaces == 0) {
+         if ((int)useSharedSpaces == 0) {
             print_debug("UseSharedSpaces is false, assuming -Xshare:off!\n");
             return true;
          }
diff --git a/agent/src/os/solaris/proc/saproc.cpp b/agent/src/os/solaris/proc/saproc.cpp
index 231773c56..415e28af6 100644
--- a/agent/src/os/solaris/proc/saproc.cpp
+++ b/agent/src/os/solaris/proc/saproc.cpp
@@ -502,8 +502,8 @@ struct FileMapHeader {
 };
 
 static bool
-read_int(struct ps_prochandle* ph, psaddr_t addr, int* pvalue) {
-  int i;
+read_jboolean(struct ps_prochandle* ph, psaddr_t addr, jboolean* pvalue) {
+  jboolean i;
   if (ps_pread(ph, addr, &i, sizeof(i)) == PS_OK) {
     *pvalue = i;
     return true;
@@ -575,10 +575,13 @@ init_classsharing_workaround(void *cd, const prmap_t* pmap, const char* obj_name
   }
 
   // read the value of the flag "UseSharedSpaces"
-  int value = 0;
-  if (read_int(ph, useSharedSpacesAddr, &value) != true) {
+  // Since hotspot types are not available to build this library. So
+  // equivalent type "jboolean" is used to read the value of "UseSharedSpaces"
+  // which is same as hotspot type "bool".
+  jboolean value = 0;
+  if (read_jboolean(ph, useSharedSpacesAddr, &value) != true) {
     THROW_NEW_DEBUGGER_EXCEPTION_("can't read 'UseSharedSpaces' flag", 1);
-  } else if (value == 0) {
+  } else if ((int)value == 0) {
     print_debug("UseSharedSpaces is false, assuming -Xshare:off!\n");
     return 1;
   }
diff --git a/make/hotspot_version b/make/hotspot_version
index 4454f180b..ffda334c5 100644
--- a/make/hotspot_version
+++ b/make/hotspot_version
@@ -33,9 +33,9 @@
 # Don't put quotes (fail windows build).
 HOTSPOT_VM_COPYRIGHT=Copyright 2008
 
-HS_MAJOR_VER=14
+HS_MAJOR_VER=15
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=10
+HS_BUILD_NUMBER=01
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=7
diff --git a/src/cpu/sparc/vm/jni_sparc.h b/src/cpu/sparc/vm/jni_sparc.h
index 7d2845e01..33a494b60 100644
--- a/src/cpu/sparc/vm/jni_sparc.h
+++ b/src/cpu/sparc/vm/jni_sparc.h
@@ -28,5 +28,11 @@
 #define JNICALL
 
 typedef int jint;
-typedef long long jlong;
+
+#ifdef _LP64
+  typedef long jlong;
+#else
+  typedef long long jlong;
+#endif
+
 typedef signed char jbyte;
diff --git a/src/cpu/x86/vm/assembler_x86.cpp b/src/cpu/x86/vm/assembler_x86.cpp
index edf0d3dbc..d6433f0e1 100644
--- a/src/cpu/x86/vm/assembler_x86.cpp
+++ b/src/cpu/x86/vm/assembler_x86.cpp
@@ -5212,15 +5212,15 @@ void MacroAssembler::pushptr(AddressLiteral src) {
 void MacroAssembler::reset_last_Java_frame(bool clear_fp,
                                            bool clear_pc) {
   // we must set sp to zero to clear frame
-  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), (int32_t)NULL_WORD);
+  movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
   // must clear fp, so that compiled frames are not confused; it is
   // possible that we need it only for debugging
   if (clear_fp) {
-    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), (int32_t)NULL_WORD);
+    movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
   }
 
   if (clear_pc) {
-    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), (int32_t)NULL_WORD);
+    movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
   }
 }
 
@@ -5670,7 +5670,7 @@ void MacroAssembler::call_VM_base(Register oop_result,
   // get oop result if there is one and reset the value in the thread
   if (oop_result->is_valid()) {
     movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
-    movptr(Address(java_thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+    movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD);
     verify_oop(oop_result, "broken oop in call_VM_base");
   }
 }
@@ -6426,13 +6426,13 @@ void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp,
     get_thread(java_thread);
   }
   // we must set sp to zero to clear frame
-  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), (int32_t)NULL_WORD);
+  movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD);
   if (clear_fp) {
-    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), (int32_t)NULL_WORD);
+    movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD);
   }
 
   if (clear_pc)
-    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), (int32_t)NULL_WORD);
+    movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD);
 
 }
 
@@ -6943,29 +6943,32 @@ void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
 
   Label slow_case, done;
 
-  // x ?<= pi/4
-  fld_d(ExternalAddress((address)&pi_4));
-  fld_s(1);                // Stack:  X  PI/4  X
-  fabs();                  // Stack: |X| PI/4  X
-  fcmp(tmp);
-  jcc(Assembler::above, slow_case);
+  ExternalAddress pi4_adr = (address)&pi_4;
+  if (reachable(pi4_adr)) {
+    // x ?<= pi/4
+    fld_d(pi4_adr);
+    fld_s(1);                // Stack:  X  PI/4  X
+    fabs();                  // Stack: |X| PI/4  X
+    fcmp(tmp);
+    jcc(Assembler::above, slow_case);
 
-  // fastest case: -pi/4 <= x <= pi/4
-  switch(trig) {
-  case 's':
-    fsin();
-    break;
-  case 'c':
-    fcos();
-    break;
-  case 't':
-    ftan();
-    break;
-  default:
-    assert(false, "bad intrinsic");
-    break;
+    // fastest case: -pi/4 <= x <= pi/4
+    switch(trig) {
+    case 's':
+      fsin();
+      break;
+    case 'c':
+      fcos();
+      break;
+    case 't':
+      ftan();
+      break;
+    default:
+      assert(false, "bad intrinsic");
+      break;
+    }
+    jmp(done);
   }
-  jmp(done);
 
   // slow case: runtime call
   bind(slow_case);
diff --git a/src/cpu/x86/vm/bytecodeInterpreter_x86.inline.hpp b/src/cpu/x86/vm/bytecodeInterpreter_x86.inline.hpp
index 0e41dbc4a..f66bfd4d3 100644
--- a/src/cpu/x86/vm/bytecodeInterpreter_x86.inline.hpp
+++ b/src/cpu/x86/vm/bytecodeInterpreter_x86.inline.hpp
@@ -213,7 +213,7 @@ inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
 
 inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
   /* it's possible we could catch this special case implicitly */
-  if (op1 == 0x80000000 && op2 == -1) return op1;
+  if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
   else return op1 / op2;
 }
 
@@ -231,7 +231,7 @@ inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
 
 inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
   /* it's possible we could catch this special case implicitly */
-  if (op1 == 0x80000000 && op2 == -1) return 0;
+  if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
   else return op1 % op2;
 }
 
diff --git a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
index 4300d196c..cdf508fab 100644
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
@@ -779,7 +779,7 @@ void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmi
     case T_OBJECT:  // fall through
     case T_ARRAY:
       if (c->as_jobject() == NULL) {
-        __ movptr(as_Address(addr), (int32_t)NULL_WORD);
+        __ movptr(as_Address(addr), NULL_WORD);
       } else {
         if (is_literal_address(addr)) {
           ShouldNotReachHere();
diff --git a/src/cpu/x86/vm/c1_Runtime1_x86.cpp b/src/cpu/x86/vm/c1_Runtime1_x86.cpp
index eed2ad68d..12aea3fde 100644
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp
@@ -78,10 +78,10 @@ int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address e
     movptr(rax, Address(thread, Thread::pending_exception_offset()));
     // make sure that the vm_results are cleared
     if (oop_result1->is_valid()) {
-      movptr(Address(thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+      movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
     }
     if (oop_result2->is_valid()) {
-      movptr(Address(thread, JavaThread::vm_result_2_offset()), (int32_t)NULL_WORD);
+      movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
     }
     if (frame_size() == no_frame_size) {
       leave();
@@ -96,12 +96,12 @@ int StubAssembler::call_RT(Register oop_result1, Register oop_result2, address e
   // get oop results if there are any and reset the values in the thread
   if (oop_result1->is_valid()) {
     movptr(oop_result1, Address(thread, JavaThread::vm_result_offset()));
-    movptr(Address(thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+    movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
     verify_oop(oop_result1);
   }
   if (oop_result2->is_valid()) {
     movptr(oop_result2, Address(thread, JavaThread::vm_result_2_offset()));
-    movptr(Address(thread, JavaThread::vm_result_2_offset()), (int32_t)NULL_WORD);
+    movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
     verify_oop(oop_result2);
   }
   return call_offset;
@@ -728,8 +728,8 @@ void Runtime1::generate_handle_exception(StubAssembler *sasm, OopMapSet* oop_map
 
   // clear exception fields in JavaThread because they are no longer needed
   // (fields must be cleared because they are processed by GC otherwise)
-  __ movptr(Address(thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
-  __ movptr(Address(thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(thread, JavaThread::exception_oop_offset()), NULL_WORD);
+  __ movptr(Address(thread, JavaThread::exception_pc_offset()), NULL_WORD);
 
   // pop the stub frame off
   __ leave();
@@ -878,7 +878,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
 
     // load and clear pending exception
     __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
-    __ movptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
+    __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
 
     // check that there is really a valid exception
     __ verify_not_null_oop(rax);
@@ -971,14 +971,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
         // load pending exception oop into rax,
         __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset()));
         // clear pending exception
-        __ movptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
+        __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD);
 
         // load issuing PC (the return address for this stub) into rdx
         __ movptr(exception_pc, Address(rbp, 1*BytesPerWord));
 
         // make sure that the vm_results are cleared (may be unnecessary)
-        __ movptr(Address(thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
-        __ movptr(Address(thread, JavaThread::vm_result_2_offset()), (int32_t)NULL_WORD);
+        __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
+        __ movptr(Address(thread, JavaThread::vm_result_2_offset()), NULL_WORD);
 
         // verify that that there is really a valid exception in rax,
         __ verify_not_null_oop(exception_oop);
@@ -1393,7 +1393,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
         __ ret(0);
 
         __ bind(miss);
-        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 0); // result
+        __ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
         __ pop(rax);
         __ pop(rcx);
         __ pop(rsi);
diff --git a/src/cpu/x86/vm/cppInterpreter_x86.cpp b/src/cpu/x86/vm/cppInterpreter_x86.cpp
index ecaa3ec21..a3621ad88 100644
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp
@@ -594,7 +594,7 @@ void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rax);
 
   // for c++ interpreter can rsi really be munged?
-  __ lea(state, Address(rbp, -sizeof(BytecodeInterpreter)));                               // restore state
+  __ lea(state, Address(rbp, -(int)sizeof(BytecodeInterpreter)));                               // restore state
   __ movptr(rbx, Address(state, byte_offset_of(BytecodeInterpreter, _method)));            // restore method
   __ movptr(rdi, Address(state, byte_offset_of(BytecodeInterpreter, _locals)));            // get locals pointer
 
@@ -658,7 +658,7 @@ void InterpreterGenerator::generate_stack_overflow_check(void) {
     const Address size_of_stack    (rbx, methodOopDesc::max_stack_offset());
     // Always give one monitor to allow us to start interp if sync method.
     // Any additional monitors need a check when moving the expression stack
-    const one_monitor = frame::interpreter_frame_monitor_size() * wordSize;
+    const int one_monitor = frame::interpreter_frame_monitor_size() * wordSize;
   __ load_unsigned_word(rax, size_of_stack);                            // get size of expression stack in words
   __ lea(rax, Address(noreg, rax, Interpreter::stackElementScale(), one_monitor));
   __ lea(rax, Address(rax, rdx, Interpreter::stackElementScale(), overhead_size));
@@ -1829,7 +1829,7 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
   Label unwind_and_forward;
 
   // restore state pointer.
-  __ lea(state, Address(rbp,  -sizeof(BytecodeInterpreter)));
+  __ lea(state, Address(rbp,  -(int)sizeof(BytecodeInterpreter)));
 
   __ movptr(rbx, STATE(_method));                       // get method
 #ifdef _LP64
@@ -1877,14 +1877,14 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
 
   // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases
   if (UseSSE < 2) {
-    __ lea(state, Address(rbp,  -sizeof(BytecodeInterpreter)));
+    __ lea(state, Address(rbp,  -(int)sizeof(BytecodeInterpreter)));
     __ movptr(rbx, STATE(_result._to_call._callee));                   // get method just executed
     __ movl(rcx, Address(rbx, methodOopDesc::result_index_offset()));
     __ cmpl(rcx, AbstractInterpreter::BasicType_as_index(T_FLOAT));    // Result stub address array index
     __ jcc(Assembler::equal, do_float);
     __ cmpl(rcx, AbstractInterpreter::BasicType_as_index(T_DOUBLE));    // Result stub address array index
     __ jcc(Assembler::equal, do_double);
-#ifdef COMPILER2
+#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
     __ empty_FPU_stack();
 #endif // COMPILER2
     __ jmp(done_conv);
@@ -1928,7 +1928,7 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
 
   // Restore rsi/r13 as compiled code may not preserve it
 
-  __ lea(state, Address(rbp,  -sizeof(BytecodeInterpreter)));
+  __ lea(state, Address(rbp,  -(int)sizeof(BytecodeInterpreter)));
 
   // restore stack to what we had when we left (in case i2c extended it)
 
@@ -1942,7 +1942,7 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) {
 #else
   __ movptr(rcx, STATE(_thread));                       // get thread
   __ cmpptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
-#endif / __LP64
+#endif // _LP64
   __ jcc(Assembler::notZero, return_with_exception);
 
   // get method just executed
diff --git a/src/cpu/x86/vm/frame_x86.inline.hpp b/src/cpu/x86/vm/frame_x86.inline.hpp
index f06b40de3..82c37fd41 100644
--- a/src/cpu/x86/vm/frame_x86.inline.hpp
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp
@@ -139,7 +139,7 @@ inline address* frame::native_param_addr(int idx) const { return (address*) addr
 #ifdef CC_INTERP
 
 inline interpreterState frame::get_interpreterState() const {
-  return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize ));
+  return ((interpreterState)addr_at( -((int)sizeof(BytecodeInterpreter))/wordSize ));
 }
 
 inline intptr_t*    frame::sender_sp()        const {
diff --git a/src/cpu/x86/vm/interp_masm_x86_32.cpp b/src/cpu/x86/vm/interp_masm_x86_32.cpp
index 78f0f1238..c11c3bc54 100644
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp
@@ -133,7 +133,7 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
                              + in_ByteSize(wordSize));
   switch (state) {
     case atos: movptr(rax, oop_addr);
-               movptr(oop_addr, (int32_t)NULL_WORD);
+               movptr(oop_addr, NULL_WORD);
                verify_oop(rax, state);                break;
     case ltos:
                movl(rdx, val_addr1);               // fall through
@@ -148,8 +148,8 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
   }
   // Clean up tos value in the thread object
   movl(tos_addr,  (int32_t) ilgl);
-  movptr(val_addr,  (int32_t)NULL_WORD);
-  NOT_LP64(movl(val_addr1, (int32_t)NULL_WORD));
+  movptr(val_addr,  NULL_WORD);
+  NOT_LP64(movptr(val_addr1, NULL_WORD));
 }
 
 
@@ -944,7 +944,7 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
     movptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes ()));
 
     // Free entry
-    movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), (int32_t)NULL_WORD);
+    movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), NULL_WORD);
 
     if (UseBiasedLocking) {
       biased_locking_exit(obj_reg, header_reg, done);
diff --git a/src/cpu/x86/vm/interp_masm_x86_32.hpp b/src/cpu/x86/vm/interp_masm_x86_32.hpp
index 247f4ec3b..57cd7f325 100644
--- a/src/cpu/x86/vm/interp_masm_x86_32.hpp
+++ b/src/cpu/x86/vm/interp_masm_x86_32.hpp
@@ -120,7 +120,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
   void empty_expression_stack()                            {
        movptr(rsp, Address(rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
       // NULL last_sp until next java call
-      movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+      movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   }
 
   // Tagged stack helpers for swap and dup
diff --git a/src/cpu/x86/vm/interp_masm_x86_64.cpp b/src/cpu/x86/vm/interp_masm_x86_64.cpp
index 9809649d3..ebcac0fdd 100644
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp
@@ -30,7 +30,7 @@
 
 #ifdef CC_INTERP
 void InterpreterMacroAssembler::get_method(Register reg) {
-  movptr(reg, Address(rbp, -(sizeof(BytecodeInterpreter) + 2 * wordSize)));
+  movptr(reg, Address(rbp, -((int)sizeof(BytecodeInterpreter) + 2 * wordSize)));
   movptr(reg, Address(reg, byte_offset_of(BytecodeInterpreter, _method)));
 }
 #endif // CC_INTERP
diff --git a/src/cpu/x86/vm/interpreterRT_x86_32.cpp b/src/cpu/x86/vm/interpreterRT_x86_32.cpp
index 96a1c3073..60af2f457 100644
--- a/src/cpu/x86/vm/interpreterRT_x86_32.cpp
+++ b/src/cpu/x86/vm/interpreterRT_x86_32.cpp
@@ -54,7 +54,7 @@ void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_
   __ cmpptr(Address(from(), Interpreter::local_offset_in_bytes(from_offset)), (int32_t)NULL_WORD); // do not use temp() to avoid AGI
   Label L;
   __ jcc(Assembler::notZero, L);
-  __ movptr(temp(), ((int32_t)NULL_WORD));
+  __ movptr(temp(), NULL_WORD);
   __ bind(L);
   __ movptr(Address(to(), to_offset * wordSize), temp());
 }
@@ -110,7 +110,7 @@ class SlowSignatureHandler: public NativeSignatureIterator {
   virtual void pass_object() {
     // pass address of from
     intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0));
-    *_to++ = (*(intptr_t*)from_addr == 0) ? NULL : from_addr;
+    *_to++ = (*(intptr_t*)from_addr == 0) ? NULL_WORD : from_addr;
     debug_only(verify_tag(frame::TagReference));
     _from -= Interpreter::stackElementSize();
    }
diff --git a/src/cpu/x86/vm/jni_x86.h b/src/cpu/x86/vm/jni_x86.h
index 89f95e357..625562bb3 100644
--- a/src/cpu/x86/vm/jni_x86.h
+++ b/src/cpu/x86/vm/jni_x86.h
@@ -32,7 +32,13 @@
   #define JNICALL
 
   typedef int jint;
+
+#ifdef _LP64
+  typedef long jlong;
+#else
   typedef long long jlong;
+#endif
+
 #else
   #define JNIEXPORT __declspec(dllexport)
   #define JNIIMPORT __declspec(dllimport)
diff --git a/src/cpu/x86/vm/runtime_x86_32.cpp b/src/cpu/x86/vm/runtime_x86_32.cpp
index 72870e11e..6a92a2c05 100644
--- a/src/cpu/x86/vm/runtime_x86_32.cpp
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp
@@ -129,11 +129,11 @@ void OptoRuntime::generate_exception_blob() {
   // Get the exception pc in case we are deoptimized
   __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset()));
 #ifdef ASSERT
-  __ movptr(Address(rcx, JavaThread::exception_handler_pc_offset()), (int32_t)NULL_WORD);
-  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_handler_pc_offset()), NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD);
 #endif
   // Clear the exception oop so GC no longer processes it as a root.
-  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD);
 
   __ pop(rcx);
 
diff --git a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
index 7c0aa6bf5..3cf22d7fa 100644
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp
@@ -39,6 +39,8 @@ RuntimeStub*       SharedRuntime::_resolve_opt_virtual_call_blob;
 RuntimeStub*       SharedRuntime::_resolve_virtual_call_blob;
 RuntimeStub*       SharedRuntime::_resolve_static_call_blob;
 
+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
+
 class RegisterSaver {
   enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ };
   // Capture info about frame layout
@@ -1299,7 +1301,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
 
   // Now compute actual number of stack words we need rounding to make
   // stack properly aligned.
-  stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
+  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
 
   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 
@@ -1793,7 +1795,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
   // reset handle block
   __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
 
-  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
 
   // Any exception pending?
   __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
@@ -1865,7 +1867,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
 
     __ pushptr(Address(thread, in_bytes(Thread::pending_exception_offset())));
-    __ movptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
+    __ movptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
 
 
     // should be a peal
@@ -2431,7 +2433,7 @@ void SharedRuntime::generate_deopt_blob() {
   __ get_thread(rdi);
   __ movptr(rdx, Address(rdi, JavaThread::exception_pc_offset()));
   __ movptr(Address(rbp, wordSize), rdx);
-  __ movptr(Address(rdi, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rdi, JavaThread::exception_pc_offset()), NULL_WORD);
 
 #ifdef ASSERT
   // verify that there is really an exception oop in JavaThread
@@ -2489,8 +2491,8 @@ void SharedRuntime::generate_deopt_blob() {
   __ jcc(Assembler::notEqual, noException);
   __ movptr(rax, Address(rcx, JavaThread::exception_oop_offset()));
   __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset()));
-  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
-  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD);
 
   __ verify_oop(rax);
 
@@ -2582,7 +2584,7 @@ void SharedRuntime::generate_deopt_blob() {
           rbx); // Make it walkable
 #else /* CC_INTERP */
   // This value is corrected by layout_activation_impl
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
 #endif /* CC_INTERP */
   __ movptr(sp_temp, rsp);              // pass to next frame
@@ -2802,7 +2804,7 @@ void SharedRuntime::generate_uncommon_trap_blob() {
           rbx); // Make it walkable
 #else /* CC_INTERP */
   // This value is corrected by layout_activation_impl
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD );
   __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
 #endif /* CC_INTERP */
   __ movptr(sp_temp, rsp);              // pass to next frame
@@ -3020,7 +3022,7 @@ static RuntimeStub* generate_resolve_blob(address destination, const char* name)
   // exception pending => remove activation and forward to exception handler
 
   __ get_thread(thread);
-  __ movptr(Address(thread, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
   __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
   __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
 
diff --git a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
index 26e7e1489..7fc2b6685 100644
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
@@ -39,6 +39,8 @@ RuntimeStub*       SharedRuntime::_resolve_opt_virtual_call_blob;
 RuntimeStub*       SharedRuntime::_resolve_virtual_call_blob;
 RuntimeStub*       SharedRuntime::_resolve_static_call_blob;
 
+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
+
 #define __ masm->
 
 class SimpleRuntimeFrame {
@@ -1286,7 +1288,7 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
 
   // Now compute actual number of stack words we need rounding to make
   // stack properly aligned.
-  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
+  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
 
   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 
@@ -2954,10 +2956,16 @@ void SharedRuntime::generate_uncommon_trap_blob() {
   __ pushptr(Address(rcx, 0));     // Save return address
   __ enter();                      // Save old & set new rbp
   __ subptr(rsp, rbx);             // Prolog
+#ifdef CC_INTERP
+  __ movptr(Address(rbp,
+                  -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
+            sender_sp); // Make it walkable
+#else // CC_INTERP
   __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize),
             sender_sp);            // Make it walkable
   // This value is corrected by layout_activation_impl
   __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
+#endif // CC_INTERP
   __ mov(sender_sp, rsp);          // Pass sender_sp to next frame
   __ addptr(rsi, wordSize);        // Bump array pointer (sizes)
   __ addptr(rcx, wordSize);        // Bump array pointer (pcs)
diff --git a/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
index 06435cb00..9b220e204 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@@ -407,7 +407,7 @@ class StubGenerator: public StubCodeGenerator {
     __ get_thread(rcx);
     __ pop(rdx);
     __ movptr(rax, Address(rcx, Thread::pending_exception_offset()));
-    __ movptr(Address(rcx, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
+    __ movptr(Address(rcx, Thread::pending_exception_offset()), NULL_WORD);
 
 #ifdef ASSERT
     // make sure exception is set
diff --git a/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
index 33f6e88ee..6c2fb5694 100644
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@@ -472,7 +472,7 @@ class StubGenerator: public StubCodeGenerator {
     // setup rax & rdx, remove return address & clear pending exception
     __ pop(rdx);
     __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
-    __ movptr(Address(r15_thread, Thread::pending_exception_offset()), (int)NULL_WORD);
+    __ movptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
 
 #ifdef ASSERT
     // make sure exception is set
@@ -954,9 +954,9 @@ class StubGenerator: public StubCodeGenerator {
     __ jcc(Assembler::zero, exit); // if obj is NULL it is OK
     // Check if the oop is in the right area of memory
     __ movptr(c_rarg2, rax);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_oop_mask());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_oop_mask());
     __ andptr(c_rarg2, c_rarg3);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_oop_bits());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_oop_bits());
     __ cmpptr(c_rarg2, c_rarg3);
     __ jcc(Assembler::notZero, error);
 
@@ -969,9 +969,9 @@ class StubGenerator: public StubCodeGenerator {
     __ jcc(Assembler::zero, error); // if klass is NULL it is broken
     // Check if the klass is in the right area of memory
     __ mov(c_rarg2, rax);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_klass_mask());
     __ andptr(c_rarg2, c_rarg3);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_klass_bits());
     __ cmpptr(c_rarg2, c_rarg3);
     __ jcc(Assembler::notZero, error);
 
@@ -980,9 +980,9 @@ class StubGenerator: public StubCodeGenerator {
     __ testptr(rax, rax);
     __ jcc(Assembler::zero, error); // if klass' klass is NULL it is broken
     // Check if the klass' klass is in the right area of memory
-    __ movptr(c_rarg3, (int64_t) Universe::verify_klass_mask());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_klass_mask());
     __ andptr(rax, c_rarg3);
-    __ movptr(c_rarg3, (int64_t) Universe::verify_klass_bits());
+    __ movptr(c_rarg3, (intptr_t) Universe::verify_klass_bits());
     __ cmpptr(rax, c_rarg3);
     __ jcc(Assembler::notZero, error);
 
diff --git a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
index ca186af69..ed40fb701 100644
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
@@ -110,7 +110,7 @@ address TemplateInterpreterGenerator::generate_exception_handler_common(const ch
     if (message != NULL) {
       __ lea(rbx, ExternalAddress((address)message));
     } else {
-      __ movptr(rbx, (int32_t)NULL_WORD);
+      __ movptr(rbx, NULL_WORD);
     }
     __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), rax, rbx);
   }
@@ -123,7 +123,7 @@ address TemplateInterpreterGenerator::generate_exception_handler_common(const ch
 address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
   address entry = __ pc();
   // NULL last_sp until next java call
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   __ dispatch_next(state);
   return entry;
 }
@@ -176,7 +176,7 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state,
   // Restore stack bottom in case i2c adjusted stack
   __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize));
   // and NULL it as marker that rsp is now tos until next java call
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
 
   __ restore_bcp();
   __ restore_locals();
@@ -211,7 +211,7 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, i
 
   // The stack is not extended by deopt but we must NULL last_sp as this
   // entry is like a "return".
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   __ restore_bcp();
   __ restore_locals();
   // handle exceptions
@@ -382,7 +382,7 @@ void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
   // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp).
   // The call returns the address of the verified entry point for the method or NULL
   // if the compilation did not complete (either went background or bailed out).
-  __ movptr(rax, (int32_t)false);
+  __ movptr(rax, (intptr_t)false);
   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rax);
 
   __ movptr(rbx, Address(rbp, method_offset));   // restore methodOop
@@ -1028,7 +1028,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) {
 
   // reset handle block
   __ movptr(t, Address(thread, JavaThread::active_handles_offset()));
-  __ movptr(Address(t, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
+  __ movptr(Address(t, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
 
   // If result was an oop then unbox and save it in the frame
   { Label L;
@@ -1488,7 +1488,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
 
   // Restore sp to interpreter_frame_last_sp even though we are going
   // to empty the expression stack for the exception processing.
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
   // rax,: exception
   // rdx: return address/pc that threw exception
   __ restore_bcp();                              // rsi points to call/send
@@ -1608,7 +1608,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
   __ reset_last_Java_frame(rcx, true, true);
   // Restore the last_sp and null it out
   __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize));
-  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
+  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
 
   __ restore_bcp();
   __ restore_locals();
@@ -1636,7 +1636,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
   // restore exception
   __ get_thread(rcx);
   __ movptr(rax, Address(rcx, JavaThread::vm_result_offset()));
-  __ movptr(Address(rcx, JavaThread::vm_result_offset()), (int32_t)NULL_WORD);
+  __ movptr(Address(rcx, JavaThread::vm_result_offset()), NULL_WORD);
   __ verify_oop(rax);
 
   // Inbetween activations - previous activation type unknown yet
diff --git a/src/cpu/x86/vm/templateTable_x86_32.cpp b/src/cpu/x86/vm/templateTable_x86_32.cpp
index 5acd696b2..13242651c 100644
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp
@@ -137,7 +137,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
         // Do the actual store
         // noreg means NULL
         if (val == noreg) {
-          __ movl(Address(rdx, 0), NULL_WORD);
+          __ movptr(Address(rdx, 0), NULL_WORD);
           // No post barrier for NULL
         } else {
           __ movl(Address(rdx, 0), val);
@@ -152,7 +152,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
     case BarrierSet::CardTableExtension:
       {
         if (val == noreg) {
-          __ movl(obj, NULL_WORD);
+          __ movptr(obj, NULL_WORD);
         } else {
           __ movl(obj, val);
           // flatten object address if needed
@@ -168,7 +168,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm,
     case BarrierSet::ModRef:
     case BarrierSet::Other:
       if (val == noreg) {
-        __ movl(obj, NULL_WORD);
+        __ movptr(obj, NULL_WORD);
       } else {
         __ movl(obj, val);
       }
diff --git a/src/cpu/x86/vm/x86_32.ad b/src/cpu/x86/vm/x86_32.ad
index 5f6f8724a..6f69cbd49 100644
--- a/src/cpu/x86/vm/x86_32.ad
+++ b/src/cpu/x86/vm/x86_32.ad
@@ -3371,7 +3371,7 @@ encode %{
          masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
          masm.get_thread (scrReg) ; 
          masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
-         masm.movptr(tmpReg, 0);                         // consider: xor vs mov
+         masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
          if (os::is_MP()) { masm.lock(); } 
          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
       } else 
@@ -3387,7 +3387,7 @@ encode %{
 
          if ((EmitSync & 64) == 0) {
            // Optimistic form: consider XORL tmpReg,tmpReg
-           masm.movptr(tmpReg, 0 ) ; 
+           masm.movptr(tmpReg, NULL_WORD) ; 
          } else { 
            // Can suffer RTS->RTO upgrades on shared or cold $ lines
            // Test-And-CAS instead of CAS
@@ -3587,7 +3587,7 @@ encode %{
          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ; 
+         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
          masm.jmpb  (DONE_LABEL) ; 
       } else { 
          masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
@@ -3596,7 +3596,7 @@ encode %{
          masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
          masm.jccb  (Assembler::notZero, CheckSucc) ; 
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ; 
+         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
          masm.jmpb  (DONE_LABEL) ; 
       }
 
@@ -3644,7 +3644,7 @@ encode %{
          // We currently use (3), although it's likely that switching to (2)
          // is correct for the future.
             
-         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), 0) ; 
+         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
          if (os::is_MP()) { 
             if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
               masm.mfence();
diff --git a/src/os/linux/vm/os_linux.cpp b/src/os/linux/vm/os_linux.cpp
index 3b8c0b4dc..a5506d874 100644
--- a/src/os/linux/vm/os_linux.cpp
+++ b/src/os/linux/vm/os_linux.cpp
@@ -279,7 +279,11 @@ void os::init_system_properties_values() {
  *        ...
  *        7: The default directories, normally /lib and /usr/lib.
  */
+#if defined(AMD64) || defined(_LP64) && (defined(SPARC) || defined(PPC) || defined(S390))
+#define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
+#else
 #define DEFAULT_LIBPATH "/lib:/usr/lib"
+#endif
 
 #define EXTENSIONS_DIR  "/lib/ext"
 #define ENDORSED_DIR    "/lib/endorsed"
@@ -1160,7 +1164,10 @@ void os::Linux::capture_initial_stack(size_t max_size) {
 
         /*                                     1   1   1   1   1   1   1   1   1   1   2   2   2   2   2   2   2   2   2 */
         /*              3  4  5  6  7  8   9   0   1   2   3   4   5   6   7   8   9   0   1   2   3   4   5   6   7   8 */
-        i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu",
+        i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld "
+                   UINTX_FORMAT UINTX_FORMAT UINTX_FORMAT
+                   " %lu "
+                   UINTX_FORMAT UINTX_FORMAT UINTX_FORMAT,
              &state,          /* 3  %c  */
              &ppid,           /* 4  %d  */
              &pgrp,           /* 5  %d  */
@@ -1180,13 +1187,13 @@ void os::Linux::capture_initial_stack(size_t max_size) {
              &nice,           /* 19 %ld  */
              &junk,           /* 20 %ld  */
              &it_real,        /* 21 %ld  */
-             &start,          /* 22 %lu  */
-             &vsize,          /* 23 %lu  */
-             &rss,            /* 24 %ld  */
+             &start,          /* 22 UINTX_FORMAT  */
+             &vsize,          /* 23 UINTX_FORMAT  */
+             &rss,            /* 24 UINTX_FORMAT  */
              &rsslim,         /* 25 %lu  */
-             &scodes,         /* 26 %lu  */
-             &ecode,          /* 27 %lu  */
-             &stack_start);   /* 28 %lu  */
+             &scodes,         /* 26 UINTX_FORMAT  */
+             &ecode,          /* 27 UINTX_FORMAT  */
+             &stack_start);   /* 28 UINTX_FORMAT  */
       }
 
       if (i != 28 - 2) {
@@ -2024,7 +2031,8 @@ void os::jvm_path(char *buf, jint len) {
                 CAST_FROM_FN_PTR(address, os::jvm_path),
                 dli_fname, sizeof(dli_fname), NULL);
   assert(ret != 0, "cannot locate libjvm");
-  realpath(dli_fname, buf);
+  if (realpath(dli_fname, buf) == NULL)
+    return;
 
   if (strcmp(Arguments::sun_java_launcher(), "gamma") == 0) {
     // Support for the gamma launcher.  Typical value for buf is
@@ -2048,7 +2056,8 @@ void os::jvm_path(char *buf, jint len) {
         assert(strstr(p, "/libjvm") == p, "invalid library name");
         p = strstr(p, "_g") ? "_g" : "";
 
-        realpath(java_home_var, buf);
+        if (realpath(java_home_var, buf) == NULL)
+          return;
         sprintf(buf + strlen(buf), "/jre/lib/%s", cpu_arch);
         if (0 == access(buf, F_OK)) {
           // Use current module name "libjvm[_g].so" instead of
@@ -2059,7 +2068,8 @@ void os::jvm_path(char *buf, jint len) {
           sprintf(buf + strlen(buf), "/hotspot/libjvm%s.so", p);
         } else {
           // Go back to path of .so
-          realpath(dli_fname, buf);
+          if (realpath(dli_fname, buf) == NULL)
+            return;
         }
       }
     }
@@ -4184,11 +4194,11 @@ static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) {
   // Skip blank chars
   do s++; while (isspace(*s));
 
-  count = sscanf(s,"%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
-                 &idummy, &idummy, &idummy, &idummy, &idummy, &idummy,
+  count = sscanf(s,"%*c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu",
+                 &idummy, &idummy, &idummy, &idummy, &idummy,
                  &ldummy, &ldummy, &ldummy, &ldummy, &ldummy,
                  &user_time, &sys_time);
-  if ( count != 13 ) return -1;
+  if ( count != 12 ) return -1;
   if (user_sys_cpu_time) {
     return ((jlong)sys_time + (jlong)user_time) * (1000000000 / clock_tics_per_sec);
   } else {
diff --git a/src/share/vm/classfile/classFileParser.cpp b/src/share/vm/classfile/classFileParser.cpp
index 703217f92..4f8ec88f8 100644
--- a/src/share/vm/classfile/classFileParser.cpp
+++ b/src/share/vm/classfile/classFileParser.cpp
@@ -232,7 +232,9 @@ constantPoolHandle ClassFileParser::parse_constant_pool(TRAPS) {
     length >= 1, "Illegal constant pool size %u in class file %s",
     length, CHECK_(nullHandle));
   constantPoolOop constant_pool =
-                      oopFactory::new_constantPool(length, CHECK_(nullHandle));
+                      oopFactory::new_constantPool(length,
+                                                   methodOopDesc::IsSafeConc,
+                                                   CHECK_(nullHandle));
   constantPoolHandle cp (THREAD, constant_pool);
 
   cp->set_partially_loaded();    // Enables heap verify to work on partial constantPoolOops
@@ -1675,7 +1677,8 @@ methodHandle ClassFileParser::parse_method(constantPoolHandle cp, bool is_interf
   // All sizing information for a methodOop is finally available, now create it
   methodOop m_oop  = oopFactory::new_method(
     code_length, access_flags, linenumber_table_length,
-    total_lvt_length, checked_exceptions_length, CHECK_(nullHandle));
+    total_lvt_length, checked_exceptions_length,
+    methodOopDesc::IsSafeConc, CHECK_(nullHandle));
   methodHandle m (THREAD, m_oop);
 
   ClassLoadingService::add_class_method_size(m_oop->size()*HeapWordSize);
diff --git a/src/share/vm/classfile/javaClasses.cpp b/src/share/vm/classfile/javaClasses.cpp
index ea68a3667..385e6a16e 100644
--- a/src/share/vm/classfile/javaClasses.cpp
+++ b/src/share/vm/classfile/javaClasses.cpp
@@ -441,6 +441,7 @@ oop java_lang_Class::primitive_mirror(BasicType t) {
 
 bool java_lang_Class::offsets_computed = false;
 int  java_lang_Class::classRedefinedCount_offset = -1;
+int  java_lang_Class::parallelCapable_offset = -1;
 
 void java_lang_Class::compute_offsets() {
   assert(!offsets_computed, "offsets should be initialized only once");
@@ -451,6 +452,23 @@ void java_lang_Class::compute_offsets() {
   // so don't go fatal.
   compute_optional_offset(classRedefinedCount_offset,
     k, vmSymbols::classRedefinedCount_name(), vmSymbols::int_signature());
+
+  // The field indicating parallelCapable (parallelLockMap) is only present starting in 7,
+  klassOop k1 = SystemDictionary::classloader_klass();
+  compute_optional_offset(parallelCapable_offset,
+    k1, vmSymbols::parallelCapable_name(), vmSymbols::concurrenthashmap_signature());
+}
+
+// For class loader classes, parallelCapable defined
+// based on non-null field
+// Written to by java.lang.ClassLoader, vm only reads this field, doesn't set it
+bool java_lang_Class::parallelCapable(oop class_loader) {
+  if (!JDK_Version::is_gte_jdk17x_version()
+     || parallelCapable_offset == -1) {
+     // Default for backward compatibility is false
+     return false;
+  }
+  return (class_loader->obj_field(parallelCapable_offset) != NULL);
 }
 
 int java_lang_Class::classRedefinedCount(oop the_class_mirror) {
@@ -866,7 +884,7 @@ char* java_lang_Throwable::print_stack_element_to_buffer(methodOop method, int b
     }
     nmethod* nm = method->code();
     if (WizardMode && nm != NULL) {
-      sprintf(buf + (int)strlen(buf), "(nmethod %#x)", nm);
+      sprintf(buf + (int)strlen(buf), "(nmethod " PTR_FORMAT ")", (intptr_t)nm);
     }
   }
 
diff --git a/src/share/vm/classfile/javaClasses.hpp b/src/share/vm/classfile/javaClasses.hpp
index 933d5b82d..612a00135 100644
--- a/src/share/vm/classfile/javaClasses.hpp
+++ b/src/share/vm/classfile/javaClasses.hpp
@@ -141,6 +141,7 @@ class java_lang_Class : AllStatic {
   static void compute_offsets();
   static bool offsets_computed;
   static int classRedefinedCount_offset;
+  static int parallelCapable_offset;
 
  public:
   // Instance creation
@@ -168,6 +169,8 @@ class java_lang_Class : AllStatic {
   // Support for classRedefinedCount field
   static int classRedefinedCount(oop the_class_mirror);
   static void set_classRedefinedCount(oop the_class_mirror, int value);
+  // Support for parallelCapable field
+  static bool parallelCapable(oop the_class_mirror);
   // Debugging
   friend class JavaClasses;
   friend class instanceKlass;   // verification code accesses offsets
diff --git a/src/share/vm/classfile/systemDictionary.cpp b/src/share/vm/classfile/systemDictionary.cpp
index 0908e1c87..a5d797f24 100644
--- a/src/share/vm/classfile/systemDictionary.cpp
+++ b/src/share/vm/classfile/systemDictionary.cpp
@@ -90,6 +90,14 @@ bool SystemDictionary::is_internal_format(symbolHandle class_name) {
 #endif
 
 // ----------------------------------------------------------------------------
+// Parallel class loading check
+
+bool SystemDictionary::is_parallelCapable(Handle class_loader) {
+  if (UnsyncloadClass || class_loader.is_null()) return true;
+  if (AlwaysLockClassLoader) return false;
+  return java_lang_Class::parallelCapable(class_loader());
+}
+// ----------------------------------------------------------------------------
 // Resolving of classes
 
 // Forwards to resolve_or_null
@@ -196,7 +204,8 @@ klassOop SystemDictionary::resolve_array_class_or_null(symbolHandle class_name,
 // super-class callers:
 //   ClassFileParser - for defineClass & jvmtiRedefineClasses
 //   load_shared_class - while loading a class from shared archive
-//   resolve_instance_class_or_fail:
+//   resolve_instance_class_or_null:
+//     via: handle_parallel_super_load
 //      when resolving a class that has an existing placeholder with
 //      a saved superclass [i.e. a defineClass is currently in progress]
 //      if another thread is trying to resolve the class, it must do
@@ -283,12 +292,9 @@ klassOop SystemDictionary::resolve_super_or_fail(symbolHandle child_name,
       if (probe && probe->check_seen_thread(THREAD, PlaceholderTable::LOAD_SUPER)) {
           throw_circularity_error = true;
       }
-
-      // add placeholder entry even if error - callers will remove on error
+    }
+    if (!throw_circularity_error) {
       PlaceholderEntry* newprobe = placeholders()->find_and_add(p_index, p_hash, child_name, class_loader, PlaceholderTable::LOAD_SUPER, class_name, THREAD);
-      if (throw_circularity_error) {
-         newprobe->remove_seen_thread(THREAD, PlaceholderTable::LOAD_SUPER);
-      }
     }
   }
   if (throw_circularity_error) {
@@ -325,7 +331,6 @@ klassOop SystemDictionary::resolve_super_or_fail(symbolHandle child_name,
   return superk_h();
 }
 
-
 void SystemDictionary::validate_protection_domain(instanceKlassHandle klass,
                                                   Handle class_loader,
                                                   Handle protection_domain,
@@ -421,7 +426,7 @@ void SystemDictionary::double_lock_wait(Handle lockObject, TRAPS) {
   bool calledholdinglock
       = ObjectSynchronizer::current_thread_holds_lock((JavaThread*)THREAD, lockObject);
   assert(calledholdinglock,"must hold lock for notify");
-  assert(!UnsyncloadClass, "unexpected double_lock_wait");
+  assert((!(lockObject() == _system_loader_lock_obj) && !is_parallelCapable(lockObject)), "unexpected double_lock_wait");
   ObjectSynchronizer::notifyall(lockObject, THREAD);
   intptr_t recursions =  ObjectSynchronizer::complete_exit(lockObject, THREAD);
   SystemDictionary_lock->wait();
@@ -439,7 +444,7 @@ void SystemDictionary::double_lock_wait(Handle lockObject, TRAPS) {
 // even in non-circularity situations.
 // Note: only one thread can define the class, but multiple can resolve
 // Note: must call resolve_super_or_fail even if null super -
-// to force placeholder entry creation for this class
+// to force placeholder entry creation for this class for circularity detection
 // Caller must check for pending exception
 // Returns non-null klassOop if other thread has completed load
 // and we are done,
@@ -477,9 +482,9 @@ instanceKlassHandle SystemDictionary::handle_parallel_super_load(
     SystemDictionary_lock->notify_all();
   }
 
-  // UnsyncloadClass does NOT wait for parallel superclass loads to complete
-  // Bootstrap classloader does wait for parallel superclass loads
- if (UnsyncloadClass) {
+  // parallelCapable class loaders do NOT wait for parallel superclass loads to complete
+  // Serial class loaders and bootstrap classloader do wait for superclass loads
+ if (!class_loader.is_null() && is_parallelCapable(class_loader)) {
     MutexLocker mu(SystemDictionary_lock, THREAD);
     // Check if classloading completed while we were loading superclass or waiting
     klassOop check = find_class(d_index, d_hash, name, class_loader);
@@ -566,10 +571,10 @@ klassOop SystemDictionary::resolve_instance_class_or_null(symbolHandle class_nam
   // This lock must be acquired here so the waiter will find
   // any successful result in the SystemDictionary and not attempt
   // the define
-  // Classloaders that support parallelism, e.g. bootstrap classloader,
+  // ParallelCapable Classloaders and the bootstrap classloader,
   // or all classloaders with UnsyncloadClass do not acquire lock here
   bool DoObjectLock = true;
-  if (UnsyncloadClass || (class_loader.is_null())) {
+  if (is_parallelCapable(class_loader)) {
     DoObjectLock = false;
   }
 
@@ -627,6 +632,9 @@ klassOop SystemDictionary::resolve_instance_class_or_null(symbolHandle class_nam
     // Five cases:
     // All cases need to prevent modifying bootclasssearchpath
     // in parallel with a classload of same classname
+    // Redefineclasses uses existence of the placeholder for the duration
+    // of the class load to prevent concurrent redefinition of not completely
+    // defined classes.
     // case 1. traditional classloaders that rely on the classloader object lock
     //   - no other need for LOAD_INSTANCE
     // case 2. traditional classloaders that break the classloader object lock
@@ -642,12 +650,13 @@ klassOop SystemDictionary::resolve_instance_class_or_null(symbolHandle class_nam
     //    This classloader supports parallelism at the classloader level,
     //    but only allows a single load of a class/classloader pair.
     //    No performance benefit and no deadlock issues.
-    // case 5. Future: parallel user level classloaders - without objectLocker
+    // case 5. parallelCapable user level classloaders - without objectLocker
+    //    Allow parallel classloading of a class/classloader pair
     symbolHandle nullsymbolHandle;
     bool throw_circularity_error = false;
     {
       MutexLocker mu(SystemDictionary_lock, THREAD);
-      if (!UnsyncloadClass) {
+      if (class_loader.is_null() || !is_parallelCapable(class_loader)) {
         PlaceholderEntry* oldprobe = placeholders()->get_entry(p_index, p_hash, name, class_loader);
         if (oldprobe) {
           // only need check_seen_thread once, not on each loop
@@ -681,25 +690,25 @@ klassOop SystemDictionary::resolve_instance_class_or_null(symbolHandle class_nam
         }
       }
       // All cases: add LOAD_INSTANCE
-      // case 3: UnsyncloadClass: allow competing threads to try
+      // case 3: UnsyncloadClass || case 5: parallelCapable: allow competing threads to try
       // LOAD_INSTANCE in parallel
       // add placeholder entry even if error - callers will remove on error
-      if (!class_has_been_loaded) {
+      if (!throw_circularity_error && !class_has_been_loaded) {
         PlaceholderEntry* newprobe = placeholders()->find_and_add(p_index, p_hash, name, class_loader, PlaceholderTable::LOAD_INSTANCE, nullsymbolHandle, THREAD);
-        if (throw_circularity_error) {
-          newprobe->remove_seen_thread(THREAD, PlaceholderTable::LOAD_INSTANCE);
-        }
         // For class loaders that do not acquire the classloader object lock,
         // if they did not catch another thread holding LOAD_INSTANCE,
         // need a check analogous to the acquire ObjectLocker/find_class
         // i.e. now that we hold the LOAD_INSTANCE token on loading this class/CL
         // one final check if the load has already completed
+        // class loaders holding the ObjectLock shouldn't find the class here
         klassOop check = find_class(d_index, d_hash, name, class_loader);
         if (check != NULL) {
         // Klass is already loaded, so just return it
           k = instanceKlassHandle(THREAD, check);
           class_has_been_loaded = true;
           newprobe->remove_seen_thread(THREAD, PlaceholderTable::LOAD_INSTANCE);
+          placeholders()->find_and_remove(p_index, p_hash, name, class_loader, THREAD);
+          SystemDictionary_lock->notify_all();
         }
       }
     }
@@ -714,18 +723,14 @@ klassOop SystemDictionary::resolve_instance_class_or_null(symbolHandle class_nam
       // Do actual loading
       k = load_instance_class(name, class_loader, THREAD);
 
-      // In custom class loaders, the usual findClass calls
-      // findLoadedClass, which directly searches  the SystemDictionary, then
-      // defineClass. If these are not atomic with respect to other threads,
-      // the findLoadedClass can fail, but the defineClass can get a
-      // LinkageError:: duplicate class definition.
+      // For UnsyncloadClass and AllowParallelDefineClass only:
       // If they got a linkageError, check if a parallel class load succeeded.
       // If it did, then for bytecode resolution the specification requires
       // that we return the same result we did for the other thread, i.e. the
       // successfully loaded instanceKlass
-      // Note: Class can not be unloaded as long as any classloader refs exist
       // Should not get here for classloaders that support parallelism
-      // with the new cleaner mechanism, e.g. bootstrap classloader
+      // with the new cleaner mechanism
+      // Bootstrap goes through here to allow for an extra guarantee check
       if (UnsyncloadClass || (class_loader.is_null())) {
         if (k.is_null() && HAS_PENDING_EXCEPTION
           && PENDING_EXCEPTION->is_a(SystemDictionary::linkageError_klass())) {
@@ -955,10 +960,10 @@ klassOop SystemDictionary::parse_stream(symbolHandle class_name,
   instanceKlassHandle k = ClassFileParser(st).parseClassFile(class_name,
                                                              class_loader,
                                                              protection_domain,
-                                                             cp_patches,
                                                              parsed_name,
                                                              THREAD);
 
+
   // We don't redefine the class, so we just need to clean up whether there
   // was an error or not (don't want to modify any system dictionary
   // data structures).
@@ -1013,11 +1018,17 @@ klassOop SystemDictionary::resolve_from_stream(symbolHandle class_name,
                                                ClassFileStream* st,
                                                TRAPS) {
 
-  // Make sure we are synchronized on the class loader before we initiate
-  // loading.
+  // Classloaders that support parallelism, e.g. bootstrap classloader,
+  // or all classloaders with UnsyncloadClass do not acquire lock here
+  bool DoObjectLock = true;
+  if (is_parallelCapable(class_loader)) {
+    DoObjectLock = false;
+  }
+
+  // Make sure we are synchronized on the class loader before we proceed
   Handle lockObject = compute_loader_lock_object(class_loader, THREAD);
   check_loader_lock_contention(lockObject, THREAD);
-  ObjectLocker ol(lockObject, THREAD);
+  ObjectLocker ol(lockObject, THREAD, DoObjectLock);
 
   symbolHandle parsed_name;
 
@@ -1069,7 +1080,13 @@ klassOop SystemDictionary::resolve_from_stream(symbolHandle class_name,
            "external class name format used internally");
 
     // Add class just loaded
-    define_instance_class(k, THREAD);
+    // If a class loader supports parallel classloading handle parallel define requests
+    // find_or_define_instance_class may return a different instanceKlass
+    if (is_parallelCapable(class_loader)) {
+      k = find_or_define_instance_class(class_name, class_loader, k, THREAD);
+    } else {
+      define_instance_class(k, THREAD);
+    }
   }
 
   // If parsing the class file or define_instance_class failed, we
@@ -1299,7 +1316,7 @@ instanceKlassHandle SystemDictionary::load_instance_class(symbolHandle class_nam
     }
 #endif // KERNEL
 
-    // find_or_define_instance_class may return a different k
+    // find_or_define_instance_class may return a different instanceKlass
     if (!k.is_null()) {
       k = find_or_define_instance_class(class_name, class_loader, k, CHECK_(nh));
     }
@@ -1316,14 +1333,24 @@ instanceKlassHandle SystemDictionary::load_instance_class(symbolHandle class_nam
 
     KlassHandle spec_klass (THREAD, SystemDictionary::classloader_klass());
 
-    // UnsyncloadClass option means don't synchronize loadClass() calls.
-    // loadClassInternal() is synchronized and public loadClass(String) is not.
-    // This flag is for diagnostic purposes only. It is risky to call
+    // Call public unsynchronized loadClass(String) directly for all class loaders
+    // for parallelCapable class loaders. JDK >=7, loadClass(String, boolean) will
+    // acquire a class-name based lock rather than the class loader object lock.
+    // JDK < 7 already acquire the class loader lock in loadClass(String, boolean),
+    // so the call to loadClassInternal() was not required.
+    //
+    // UnsyncloadClass flag means both call loadClass(String) and do
+    // not acquire the class loader lock even for class loaders that are
+    // not parallelCapable. This was a risky transitional
+    // flag for diagnostic purposes only. It is risky to call
     // custom class loaders without synchronization.
     // WARNING If a custom class loader does NOT synchronizer findClass, or callers of
-    // findClass, this flag risks unexpected timing bugs in the field.
+    // findClass, the UnsyncloadClass flag risks unexpected timing bugs in the field.
     // Do NOT assume this will be supported in future releases.
-    if (!UnsyncloadClass && has_loadClassInternal()) {
+    //
+    // Added MustCallLoadClassInternal in case we discover in the field
+    // a customer that counts on this call
+    if (MustCallLoadClassInternal && has_loadClassInternal()) {
       JavaCalls::call_special(&result,
                               class_loader,
                               spec_klass,
@@ -1365,14 +1392,17 @@ void SystemDictionary::define_instance_class(instanceKlassHandle k, TRAPS) {
 
   Handle class_loader_h(THREAD, k->class_loader());
 
-  // for bootstrap classloader don't acquire lock
-  if (!class_loader_h.is_null()) {
+ // for bootstrap and other parallel classloaders don't acquire lock,
+ // use placeholder token
+ // If a parallelCapable class loader calls define_instance_class instead of
+ // find_or_define_instance_class to get here, we have a timing
+ // hole with systemDictionary updates and check_constraints
+ if (!class_loader_h.is_null() && !is_parallelCapable(class_loader_h)) {
     assert(ObjectSynchronizer::current_thread_holds_lock((JavaThread*)THREAD,
          compute_loader_lock_object(class_loader_h, THREAD)),
          "define called without lock");
   }
 
-
   // Check class-loading constraints. Throw exception if violation is detected.
   // Grabs and releases SystemDictionary_lock
   // The check_constraints/find_class call and update_dictionary sequence
@@ -1427,59 +1457,63 @@ void SystemDictionary::define_instance_class(instanceKlassHandle k, TRAPS) {
 
 // Support parallel classloading
 // Initial implementation for bootstrap classloader
-// For future:
 // For custom class loaders that support parallel classloading,
-// in case they do not synchronize around
-// FindLoadedClass/DefineClass calls, we check for parallel
+// With AllowParallelDefine flag==true, in case they do not synchronize around
+// FindLoadedClass/DefineClass, calls, we check for parallel
 // loading for them, wait if a defineClass is in progress
 // and return the initial requestor's results
+// With AllowParallelDefine flag==false, call through to define_instance_class
+// which will throw LinkageError: duplicate class definition.
 // For better performance, the class loaders should synchronize
-// findClass(), i.e. FindLoadedClass/DefineClass or they
+// findClass(), i.e. FindLoadedClass/DefineClassIfAbsent or they
 // potentially waste time reading and parsing the bytestream.
 // Note: VM callers should ensure consistency of k/class_name,class_loader
 instanceKlassHandle SystemDictionary::find_or_define_instance_class(symbolHandle class_name, Handle class_loader, instanceKlassHandle k, TRAPS) {
 
   instanceKlassHandle nh = instanceKlassHandle(); // null Handle
+  symbolHandle name_h(THREAD, k->name()); // passed in class_name may be null
 
-  unsigned int d_hash = dictionary()->compute_hash(class_name, class_loader);
+  unsigned int d_hash = dictionary()->compute_hash(name_h, class_loader);
   int d_index = dictionary()->hash_to_index(d_hash);
 
 // Hold SD lock around find_class and placeholder creation for DEFINE_CLASS
-  unsigned int p_hash = placeholders()->compute_hash(class_name, class_loader);
+  unsigned int p_hash = placeholders()->compute_hash(name_h, class_loader);
   int p_index = placeholders()->hash_to_index(p_hash);
   PlaceholderEntry* probe;
 
   {
     MutexLocker mu(SystemDictionary_lock, THREAD);
     // First check if class already defined
-    klassOop check = find_class(d_index, d_hash, class_name, class_loader);
+    klassOop check = find_class(d_index, d_hash, name_h, class_loader);
     if (check != NULL) {
       return(instanceKlassHandle(THREAD, check));
     }
 
     // Acquire define token for this class/classloader
     symbolHandle nullsymbolHandle;
-    probe = placeholders()->find_and_add(p_index, p_hash, class_name, class_loader, PlaceholderTable::DEFINE_CLASS, nullsymbolHandle, THREAD);
-    // Check if another thread defining in parallel
-    if (probe->definer() == NULL) {
-      // Thread will define the class
-      probe->set_definer(THREAD);
-    } else {
-      // Wait for defining thread to finish and return results
-      while (probe->definer() != NULL) {
-        SystemDictionary_lock->wait();
-      }
-      if (probe->instanceKlass() != NULL) {
+    probe = placeholders()->find_and_add(p_index, p_hash, name_h, class_loader, PlaceholderTable::DEFINE_CLASS, nullsymbolHandle, THREAD);
+    // Wait if another thread defining in parallel
+    // All threads wait - even those that will throw duplicate class: otherwise
+    // caller is surprised by LinkageError: duplicate, but findLoadedClass fails
+    // if other thread has not finished updating dictionary
+    while (probe->definer() != NULL) {
+      SystemDictionary_lock->wait();
+    }
+    // Only special cases allow parallel defines and can use other thread's results
+    // Other cases fall through, and may run into duplicate defines
+    // caught by finding an entry in the SystemDictionary
+    if ((UnsyncloadClass || AllowParallelDefineClass) && (probe->instanceKlass() != NULL)) {
         probe->remove_seen_thread(THREAD, PlaceholderTable::DEFINE_CLASS);
-        return(instanceKlassHandle(THREAD, probe->instanceKlass()));
-      } else {
-        // If definer had an error, try again as any new thread would
-        probe->set_definer(THREAD);
+        placeholders()->find_and_remove(p_index, p_hash, name_h, class_loader, THREAD);
+        SystemDictionary_lock->notify_all();
 #ifdef ASSERT
-        klassOop check = find_class(d_index, d_hash, class_name, class_loader);
-        assert(check == NULL, "definer missed recording success");
+        klassOop check = find_class(d_index, d_hash, name_h, class_loader);
+        assert(check != NULL, "definer missed recording success");
 #endif
-      }
+        return(instanceKlassHandle(THREAD, probe->instanceKlass()));
+    } else {
+      // This thread will define the class (even if earlier thread tried and had an error)
+      probe->set_definer(THREAD);
     }
   }
 
@@ -1490,7 +1524,7 @@ instanceKlassHandle SystemDictionary::find_or_define_instance_class(symbolHandle
   // definer must notify any waiting threads
   {
     MutexLocker mu(SystemDictionary_lock, THREAD);
-    PlaceholderEntry* probe = placeholders()->get_entry(p_index, p_hash, class_name, class_loader);
+    PlaceholderEntry* probe = placeholders()->get_entry(p_index, p_hash, name_h, class_loader);
     assert(probe != NULL, "DEFINE_CLASS placeholder lost?");
     if (probe != NULL) {
       if (HAS_PENDING_EXCEPTION) {
@@ -1501,6 +1535,7 @@ instanceKlassHandle SystemDictionary::find_or_define_instance_class(symbolHandle
       }
       probe->set_definer(NULL);
       probe->remove_seen_thread(THREAD, PlaceholderTable::DEFINE_CLASS);
+      placeholders()->find_and_remove(p_index, p_hash, name_h, class_loader, THREAD);
       SystemDictionary_lock->notify_all();
     }
   }
@@ -1512,7 +1547,6 @@ instanceKlassHandle SystemDictionary::find_or_define_instance_class(symbolHandle
 
   return k;
 }
-
 Handle SystemDictionary::compute_loader_lock_object(Handle class_loader, TRAPS) {
   // If class_loader is NULL we synchronize on _system_loader_lock_obj
   if (class_loader.is_null()) {
@@ -1902,11 +1936,11 @@ void SystemDictionary::initialize_preloaded_classes(TRAPS) {
     warning("Cannot find sun/jkernel/DownloadManager");
   }
 #endif // KERNEL
+
   { // Compute whether we should use loadClass or loadClassInternal when loading classes.
     methodOop method = instanceKlass::cast(classloader_klass())->find_method(vmSymbols::loadClassInternal_name(), vmSymbols::string_class_signature());
     _has_loadClassInternal = (method != NULL);
   }
-
   { // Compute whether we should use checkPackageAccess or NOT
     methodOop method = instanceKlass::cast(classloader_klass())->find_method(vmSymbols::checkPackageAccess_name(), vmSymbols::class_protectiondomain_signature());
     _has_checkPackageAccess = (method != NULL);
diff --git a/src/share/vm/classfile/systemDictionary.hpp b/src/share/vm/classfile/systemDictionary.hpp
index beade180f..38abf2d3c 100644
--- a/src/share/vm/classfile/systemDictionary.hpp
+++ b/src/share/vm/classfile/systemDictionary.hpp
@@ -526,6 +526,7 @@ private:
   static instanceKlassHandle load_instance_class(symbolHandle class_name, Handle class_loader, TRAPS);
   static Handle compute_loader_lock_object(Handle class_loader, TRAPS);
   static void check_loader_lock_contention(Handle loader_lock, TRAPS);
+  static bool is_parallelCapable(Handle class_loader);
 
   static klassOop find_shared_class(symbolHandle class_name);
 
diff --git a/src/share/vm/classfile/vmSymbols.hpp b/src/share/vm/classfile/vmSymbols.hpp
index 6a5a796d0..8379af9c4 100644
--- a/src/share/vm/classfile/vmSymbols.hpp
+++ b/src/share/vm/classfile/vmSymbols.hpp
@@ -362,6 +362,7 @@
   template(class_signature,                           "Ljava/lang/Class;")                                        \
   template(string_signature,                          "Ljava/lang/String;")                                       \
   template(reference_signature,                       "Ljava/lang/ref/Reference;")                                \
+  template(concurrenthashmap_signature,               "Ljava/util/concurrent/ConcurrentHashMap;")                 \
   /* signature symbols needed by intrinsics */                                                                    \
   VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, template, VM_ALIAS_IGNORE)            \
                                                                                                                   \
@@ -374,6 +375,9 @@
   /* used by ClassFormatError when class name is not known yet */                                                 \
   template(unknown_class_name,                        "<Unknown>")                                                \
                                                                                                                   \
+  /* used to identify class loaders handling parallel class loading */                                            \
+  template(parallelCapable_name,                      "parallelLockMap;")                                         \
+                                                                                                                  \
   /* JVM monitoring and management support */                                                                     \
   template(java_lang_StackTraceElement_array,          "[Ljava/lang/StackTraceElement;")                          \
   template(java_lang_management_ThreadState,           "java/lang/management/ThreadState")                        \
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
index 4b1fecd7c..6c6272a2f 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@@ -706,6 +706,30 @@ void CompactibleFreeListSpace::object_iterate(ObjectClosure* blk) {
   }
 }
 
+// Apply the given closure to each live object in the space
+//   The usage of CompactibleFreeListSpace
+// by the ConcurrentMarkSweepGeneration for concurrent GC's allows
+// objects in the space with references to objects that are no longer
+// valid.  For example, an object may reference another object
+// that has already been sweep up (collected).  This method uses
+// obj_is_alive() to determine whether it is safe to apply the closure to
+// an object.  See obj_is_alive() for details on how liveness of an
+// object is decided.
+
+void CompactibleFreeListSpace::safe_object_iterate(ObjectClosure* blk) {
+  assert_lock_strong(freelistLock());
+  NOT_PRODUCT(verify_objects_initialized());
+  HeapWord *cur, *limit;
+  size_t curSize;
+  for (cur = bottom(), limit = end(); cur < limit;
+       cur += curSize) {
+    curSize = block_size(cur);
+    if (block_is_obj(cur) && obj_is_alive(cur)) {
+      blk->do_object(oop(cur));
+    }
+  }
+}
+
 void CompactibleFreeListSpace::object_iterate_mem(MemRegion mr,
                                                   UpwardsObjectClosure* cl) {
   assert_locked();
@@ -861,7 +885,9 @@ const {
     } else {
       // must read from what 'p' points to in each loop.
       klassOop k = ((volatile oopDesc*)p)->klass_or_null();
-      if (k != NULL && ((oopDesc*)p)->is_parsable()) {
+      if (k != NULL &&
+          ((oopDesc*)p)->is_parsable() &&
+          ((oopDesc*)p)->is_conc_safe()) {
         assert(k->is_oop(), "Should really be klass oop.");
         oop o = (oop)p;
         assert(o->is_oop(), "Should be an oop");
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
index 5306a8f30..e0c48a1b7 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
@@ -481,6 +481,15 @@ class CompactibleFreeListSpace: public CompactibleSpace {
   void oop_iterate(OopClosure* cl);
 
   void object_iterate(ObjectClosure* blk);
+  // Apply the closure to each object in the space whose references
+  // point to objects in the heap.  The usage of CompactibleFreeListSpace
+  // by the ConcurrentMarkSweepGeneration for concurrent GC's allows
+  // objects in the space with references to objects that are no longer
+  // valid.  For example, an object may reference another object
+  // that has already been sweep up (collected).  This method uses
+  // obj_is_alive() to determine whether it is safe to iterate of
+  // an object.
+  void safe_object_iterate(ObjectClosure* blk);
   void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
 
   // Requires that "mr" be entirely within the space.
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
index e83441076..d716797ba 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -3018,6 +3018,16 @@ ConcurrentMarkSweepGeneration::object_iterate(ObjectClosure* cl) {
 }
 
 void
+ConcurrentMarkSweepGeneration::safe_object_iterate(ObjectClosure* cl) {
+  if (freelistLock()->owned_by_self()) {
+    Generation::safe_object_iterate(cl);
+  } else {
+    MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
+    Generation::safe_object_iterate(cl);
+  }
+}
+
+void
 ConcurrentMarkSweepGeneration::pre_adjust_pointers() {
 }
 
@@ -6623,7 +6633,11 @@ size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m(
   if (_bitMap->isMarked(addr)) {
     // it's marked; is it potentially uninitialized?
     if (p->klass_or_null() != NULL) {
-      if (CMSPermGenPrecleaningEnabled && !p->is_parsable()) {
+      // If is_conc_safe is false, the object may be undergoing
+      // change by the VM outside a safepoint.  Don't try to
+      // scan it, but rather leave it for the remark phase.
+      if (CMSPermGenPrecleaningEnabled &&
+          (!p->is_conc_safe() || !p->is_parsable())) {
         // Signal precleaning to redirty the card since
         // the klass pointer is already installed.
         assert(size == 0, "Initial value");
@@ -7001,7 +7015,6 @@ void MarkFromRootsClosure::scanOopsInOop(HeapWord* ptr) {
       _mut->clear_range(mr);
     }
   DEBUG_ONLY(})
-
   // Note: the finger doesn't advance while we drain
   // the stack below.
   PushOrMarkClosure pushOrMarkClosure(_collector,
@@ -8062,9 +8075,13 @@ size_t SweepClosure::doLiveChunk(FreeChunk* fc) {
     #ifdef DEBUG
       if (oop(addr)->klass_or_null() != NULL &&
           (   !_collector->should_unload_classes()
-           || oop(addr)->is_parsable())) {
+           || (oop(addr)->is_parsable()) &&
+               oop(addr)->is_conc_safe())) {
         // Ignore mark word because we are running concurrent with mutators
         assert(oop(addr)->is_oop(true), "live block should be an oop");
+        // is_conc_safe is checked before performing this assertion
+        // because an object that is not is_conc_safe may yet have
+        // the return from size() correct.
         assert(size ==
                CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size()),
                "P-mark and computed size do not agree");
@@ -8077,6 +8094,13 @@ size_t SweepClosure::doLiveChunk(FreeChunk* fc) {
            (!_collector->should_unload_classes()
             || oop(addr)->is_parsable()),
            "Should be an initialized object");
+    // Note that there are objects used during class redefinition
+    // (e.g., merge_cp in VM_RedefineClasses::merge_cp_and_rewrite()
+    // which are discarded with their is_conc_safe state still
+    // false.  These object may be floating garbage so may be
+    // seen here.  If they are floating garbage their size
+    // should be attainable from their klass.  Do not that
+    // is_conc_safe() is true for oop(addr).
     // Ignore mark word because we are running concurrent with mutators
     assert(oop(addr)->is_oop(true), "live block should be an oop");
     // Verify that the bit map has no bits marked between
@@ -8484,7 +8508,7 @@ bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
   size_t i = num;
   oop  cur = _overflow_list;
   const markOop proto = markOopDesc::prototype();
-  NOT_PRODUCT(size_t n = 0;)
+  NOT_PRODUCT(ssize_t n = 0;)
   for (oop next; i > 0 && cur != NULL; cur = next, i--) {
     next = oop(cur->mark());
     cur->set_mark(proto);   // until proven otherwise
@@ -8501,45 +8525,131 @@ bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
   return !stack->isEmpty();
 }
 
-// Multi-threaded; use CAS to break off a prefix
+#define BUSY  (oop(0x1aff1aff))
+// (MT-safe) Get a prefix of at most "num" from the list.
+// The overflow list is chained through the mark word of
+// each object in the list. We fetch the entire list,
+// break off a prefix of the right size and return the
+// remainder. If other threads try to take objects from
+// the overflow list at that time, they will wait for
+// some time to see if data becomes available. If (and
+// only if) another thread places one or more object(s)
+// on the global list before we have returned the suffix
+// to the global list, we will walk down our local list
+// to find its end and append the global list to
+// our suffix before returning it. This suffix walk can
+// prove to be expensive (quadratic in the amount of traffic)
+// when there are many objects in the overflow list and
+// there is much producer-consumer contention on the list.
+// *NOTE*: The overflow list manipulation code here and
+// in ParNewGeneration:: are very similar in shape,
+// except that in the ParNew case we use the old (from/eden)
+// copy of the object to thread the list via its klass word.
+// Because of the common code, if you make any changes in
+// the code below, please check the ParNew version to see if
+// similar changes might be needed.
+// CR 6797058 has been filed to consolidate the common code.
 bool CMSCollector::par_take_from_overflow_list(size_t num,
                                                OopTaskQueue* work_q) {
-  assert(work_q->size() == 0, "That's the current policy");
+  assert(work_q->size() == 0, "First empty local work queue");
   assert(num < work_q->max_elems(), "Can't bite more than we can chew");
   if (_overflow_list == NULL) {
     return false;
   }
   // Grab the entire list; we'll put back a suffix
-  oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
-  if (prefix == NULL) {  // someone grabbed it before we did ...
-    // ... we could spin for a short while, but for now we don't
-    return false;
+  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+  Thread* tid = Thread::current();
+  size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
+  size_t sleep_time_millis = MAX2((size_t)1, num/100);
+  // If the list is busy, we spin for a short while,
+  // sleeping between attempts to get the list.
+  for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
+    os::sleep(tid, sleep_time_millis, false);
+    if (_overflow_list == NULL) {
+      // Nothing left to take
+      return false;
+    } else if (_overflow_list != BUSY) {
+      // Try and grab the prefix
+      prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+    }
+  }
+  // If the list was found to be empty, or we spun long
+  // enough, we give up and return empty-handed. If we leave
+  // the list in the BUSY state below, it must be the case that
+  // some other thread holds the overflow list and will set it
+  // to a non-BUSY state in the future.
+  if (prefix == NULL || prefix == BUSY) {
+     // Nothing to take or waited long enough
+     if (prefix == NULL) {
+       // Write back the NULL in case we overwrote it with BUSY above
+       // and it is still the same value.
+       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+     }
+     return false;
   }
+  assert(prefix != NULL && prefix != BUSY, "Error");
   size_t i = num;
   oop cur = prefix;
+  // Walk down the first "num" objects, unless we reach the end.
   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
-  if (cur->mark() != NULL) {
+  if (cur->mark() == NULL) {
+    // We have "num" or fewer elements in the list, so there
+    // is nothing to return to the global list.
+    // Write back the NULL in lieu of the BUSY we wrote
+    // above, if it is still the same value.
+    if (_overflow_list == BUSY) {
+      (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+    }
+  } else {
+    // Chop off the suffix and rerturn it to the global list.
+    assert(cur->mark() != BUSY, "Error");
     oop suffix_head = cur->mark(); // suffix will be put back on global list
     cur->set_mark(NULL);           // break off suffix
-    // Find tail of suffix so we can prepend suffix to global list
-    for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
-    oop suffix_tail = cur;
-    assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
-           "Tautology");
+    // It's possible that the list is still in the empty(busy) state
+    // we left it in a short while ago; in that case we may be
+    // able to place back the suffix without incurring the cost
+    // of a walk down the list.
     oop observed_overflow_list = _overflow_list;
-    do {
-      cur = observed_overflow_list;
-      suffix_tail->set_mark(markOop(cur));
+    oop cur_overflow_list = observed_overflow_list;
+    bool attached = false;
+    while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
       observed_overflow_list =
-        (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur);
-    } while (cur != observed_overflow_list);
+        (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
+      if (cur_overflow_list == observed_overflow_list) {
+        attached = true;
+        break;
+      } else cur_overflow_list = observed_overflow_list;
+    }
+    if (!attached) {
+      // Too bad, someone else sneaked in (at least) an element; we'll need
+      // to do a splice. Find tail of suffix so we can prepend suffix to global
+      // list.
+      for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
+      oop suffix_tail = cur;
+      assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
+             "Tautology");
+      observed_overflow_list = _overflow_list;
+      do {
+        cur_overflow_list = observed_overflow_list;
+        if (cur_overflow_list != BUSY) {
+          // Do the splice ...
+          suffix_tail->set_mark(markOop(cur_overflow_list));
+        } else { // cur_overflow_list == BUSY
+          suffix_tail->set_mark(NULL);
+        }
+        // ... and try to place spliced list back on overflow_list ...
+        observed_overflow_list =
+          (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
+      } while (cur_overflow_list != observed_overflow_list);
+      // ... until we have succeeded in doing so.
+    }
   }
 
   // Push the prefix elements on work_q
   assert(prefix != NULL, "control point invariant");
   const markOop proto = markOopDesc::prototype();
   oop next;
-  NOT_PRODUCT(size_t n = 0;)
+  NOT_PRODUCT(ssize_t n = 0;)
   for (cur = prefix; cur != NULL; cur = next) {
     next = oop(cur->mark());
     cur->set_mark(proto);   // until proven otherwise
@@ -8573,11 +8683,16 @@ void CMSCollector::par_push_on_overflow_list(oop p) {
   oop cur_overflow_list;
   do {
     cur_overflow_list = observed_overflow_list;
-    p->set_mark(markOop(cur_overflow_list));
+    if (cur_overflow_list != BUSY) {
+      p->set_mark(markOop(cur_overflow_list));
+    } else {
+      p->set_mark(NULL);
+    }
     observed_overflow_list =
       (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
   } while (cur_overflow_list != observed_overflow_list);
 }
+#undef BUSY
 
 // Single threaded
 // General Note on GrowableArray: pushes may silently fail
@@ -8586,7 +8701,7 @@ void CMSCollector::par_push_on_overflow_list(oop p) {
 // a lot of code in the JVM. The prudent thing for GrowableArray
 // to do (for now) is to exit with an error. However, that may
 // be too draconian in some cases because the caller may be
-// able to recover without much harm. For suych cases, we
+// able to recover without much harm. For such cases, we
 // should probably introduce a "soft_push" method which returns
 // an indication of success or failure with the assumption that
 // the caller may be able to recover from a failure; code in
@@ -8594,8 +8709,6 @@ void CMSCollector::par_push_on_overflow_list(oop p) {
 // failures where possible, thus, incrementally hardening the VM
 // in such low resource situations.
 void CMSCollector::preserve_mark_work(oop p, markOop m) {
-  int PreserveMarkStackSize = 128;
-
   if (_preserved_oop_stack == NULL) {
     assert(_preserved_mark_stack == NULL,
            "bijection with preserved_oop_stack");
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
index 44ef14e8d..d36c6fc47 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
@@ -595,7 +595,7 @@ class CMSCollector: public CHeapObj {
   size_t        _ser_kac_preclean_ovflw;
   size_t        _ser_kac_ovflw;
   size_t        _par_kac_ovflw;
-  NOT_PRODUCT(size_t _num_par_pushes;)
+  NOT_PRODUCT(ssize_t _num_par_pushes;)
 
   // ("Weak") Reference processing support
   ReferenceProcessor*            _ref_processor;
@@ -1212,6 +1212,7 @@ class ConcurrentMarkSweepGeneration: public CardGeneration {
   // More iteration support
   virtual void oop_iterate(MemRegion mr, OopClosure* cl);
   virtual void oop_iterate(OopClosure* cl);
+  virtual void safe_object_iterate(ObjectClosure* cl);
   virtual void object_iterate(ObjectClosure* cl);
 
   // Need to declare the full complement of closures, whether we'll
diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index 3aa19ccb8..ba3244ef7 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -1285,7 +1285,9 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) :
   _unclean_regions_coming(false),
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
-  _surviving_young_words(NULL)
+  _surviving_young_words(NULL),
+  _in_cset_fast_test(NULL),
+  _in_cset_fast_test_base(NULL)
 {
   _g1h = this; // To catch bugs.
   if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
@@ -2485,6 +2487,19 @@ G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) {
     g1_policy()->record_collection_pause_start(start_time_sec,
                                                start_used_bytes);
 
+    guarantee(_in_cset_fast_test == NULL, "invariant");
+    guarantee(_in_cset_fast_test_base == NULL, "invariant");
+    _in_cset_fast_test_length = n_regions();
+    _in_cset_fast_test_base =
+                             NEW_C_HEAP_ARRAY(bool, _in_cset_fast_test_length);
+    memset(_in_cset_fast_test_base, false,
+                                     _in_cset_fast_test_length * sizeof(bool));
+    // We're biasing _in_cset_fast_test to avoid subtracting the
+    // beginning of the heap every time we want to index; basically
+    // it's the same with what we do with the card table.
+    _in_cset_fast_test = _in_cset_fast_test_base -
+              ((size_t) _g1_reserved.start() >> HeapRegion::LogOfHRGrainBytes);
+
 #if SCAN_ONLY_VERBOSE
     _young_list->print();
 #endif // SCAN_ONLY_VERBOSE
@@ -2553,6 +2568,12 @@ G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) {
       free_collection_set(g1_policy()->collection_set());
       g1_policy()->clear_collection_set();
 
+      FREE_C_HEAP_ARRAY(bool, _in_cset_fast_test_base);
+      // this is more for peace of mind; we're nulling them here and
+      // we're expecting them to be null at the beginning of the next GC
+      _in_cset_fast_test = NULL;
+      _in_cset_fast_test_base = NULL;
+
       if (popular_region != NULL) {
         // We have to wait until now, because we don't want the region to
         // be rescheduled for pop-evac during RS update.
@@ -3560,6 +3581,9 @@ public:
   size_t undo_waste()                            { return _undo_waste; }
 
   void push_on_queue(oop* ref) {
+    assert(ref != NULL, "invariant");
+    assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref), "invariant");
+
     if (!refs()->push(ref)) {
       overflowed_refs()->push(ref);
       IF_G1_DETAILED_STATS(note_overflow_push());
@@ -3572,6 +3596,10 @@ public:
     if (!refs()->pop_local(ref)) {
       ref = NULL;
     } else {
+      assert(ref != NULL, "invariant");
+      assert(has_partial_array_mask(ref) || _g1h->obj_in_cs(*ref),
+             "invariant");
+
       IF_G1_DETAILED_STATS(note_pop());
     }
   }
@@ -3601,8 +3629,7 @@ public:
 
       obj = alloc_buf->allocate(word_sz);
       assert(obj != NULL, "buffer was definitely big enough...");
-    }
-    else {
+    } else {
       obj = _g1h->par_allocate_during_gc(purpose, word_sz);
     }
     return obj;
@@ -3695,24 +3722,57 @@ public:
     }
   }
 
+private:
+  void deal_with_reference(oop* ref_to_scan) {
+    if (has_partial_array_mask(ref_to_scan)) {
+      _partial_scan_cl->do_oop_nv(ref_to_scan);
+    } else {
+      // Note: we can use "raw" versions of "region_containing" because
+      // "obj_to_scan" is definitely in the heap, and is not in a
+      // humongous region.
+      HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
+      _evac_cl->set_region(r);
+      _evac_cl->do_oop_nv(ref_to_scan);
+    }
+  }
+
+public:
   void trim_queue() {
+    // I've replicated the loop twice, first to drain the overflow
+    // queue, second to drain the task queue. This is better than
+    // having a single loop, which checks both conditions and, inside
+    // it, either pops the overflow queue or the task queue, as each
+    // loop is tighter. Also, the decision to drain the overflow queue
+    // first is not arbitrary, as the overflow queue is not visible
+    // to the other workers, whereas the task queue is. So, we want to
+    // drain the "invisible" entries first, while allowing the other
+    // workers to potentially steal the "visible" entries.
+
     while (refs_to_scan() > 0 || overflowed_refs_to_scan() > 0) {
-      oop *ref_to_scan = NULL;
-      if (overflowed_refs_to_scan() == 0) {
-        pop_from_queue(ref_to_scan);
-      } else {
+      while (overflowed_refs_to_scan() > 0) {
+        oop *ref_to_scan = NULL;
         pop_from_overflow_queue(ref_to_scan);
+        assert(ref_to_scan != NULL, "invariant");
+        // We shouldn't have pushed it on the queue if it was not
+        // pointing into the CSet.
+        assert(ref_to_scan != NULL, "sanity");
+        assert(has_partial_array_mask(ref_to_scan) ||
+                                      _g1h->obj_in_cs(*ref_to_scan), "sanity");
+
+        deal_with_reference(ref_to_scan);
       }
-      if (ref_to_scan != NULL) {
-        if ((intptr_t)ref_to_scan & G1_PARTIAL_ARRAY_MASK) {
-          _partial_scan_cl->do_oop_nv(ref_to_scan);
-        } else {
-          // Note: we can use "raw" versions of "region_containing" because
-          // "obj_to_scan" is definitely in the heap, and is not in a
-          // humongous region.
-          HeapRegion* r = _g1h->heap_region_containing_raw(ref_to_scan);
-          _evac_cl->set_region(r);
-          _evac_cl->do_oop_nv(ref_to_scan);
+
+      while (refs_to_scan() > 0) {
+        oop *ref_to_scan = NULL;
+        pop_from_queue(ref_to_scan);
+
+        if (ref_to_scan != NULL) {
+          // We shouldn't have pushed it on the queue if it was not
+          // pointing into the CSet.
+          assert(has_partial_array_mask(ref_to_scan) ||
+                                      _g1h->obj_in_cs(*ref_to_scan), "sanity");
+
+          deal_with_reference(ref_to_scan);
         }
       }
     }
@@ -3728,16 +3788,25 @@ G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState*
 // Should probably be made inline and moved in g1OopClosures.inline.hpp.
 void G1ParScanClosure::do_oop_nv(oop* p) {
   oop obj = *p;
+
   if (obj != NULL) {
-    if (_g1->obj_in_cs(obj)) {
-      if (obj->is_forwarded()) {
-        *p = obj->forwardee();
-      } else {
-        _par_scan_state->push_on_queue(p);
-        return;
-      }
+    if (_g1->in_cset_fast_test(obj)) {
+      // We're not going to even bother checking whether the object is
+      // already forwarded or not, as this usually causes an immediate
+      // stall. We'll try to prefetch the object (for write, given that
+      // we might need to install the forwarding reference) and we'll
+      // get back to it when pop it from the queue
+      Prefetch::write(obj->mark_addr(), 0);
+      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+
+      // slightly paranoid test; I'm trying to catch potential
+      // problems before we go into push_on_queue to know where the
+      // problem is coming from
+      assert(obj == *p, "the value of *p should not have changed");
+      _par_scan_state->push_on_queue(p);
+    } else {
+      _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
     }
-    _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
   }
 }
 
@@ -3777,13 +3846,36 @@ oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
     return _g1->handle_evacuation_failure_par(cl, old);
   }
 
+  // We're going to allocate linearly, so might as well prefetch ahead.
+  Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
+
   oop forward_ptr = old->forward_to_atomic(obj);
   if (forward_ptr == NULL) {
     Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
-    obj->set_mark(m);
     if (g1p->track_object_age(alloc_purpose)) {
-      obj->incr_age();
+      // We could simply do obj->incr_age(). However, this causes a
+      // performance issue. obj->incr_age() will first check whether
+      // the object has a displaced mark by checking its mark word;
+      // getting the mark word from the new location of the object
+      // stalls. So, given that we already have the mark word and we
+      // are about to install it anyway, it's better to increase the
+      // age on the mark word, when the object does not have a
+      // displaced mark word. We're not expecting many objects to have
+      // a displaced marked word, so that case is not optimized
+      // further (it could be...) and we simply call obj->incr_age().
+
+      if (m->has_displaced_mark_helper()) {
+        // in this case, we have to install the mark word first,
+        // otherwise obj looks to be forwarded (the old mark word,
+        // which contains the forward pointer, was copied)
+        obj->set_mark(m);
+        obj->incr_age();
+      } else {
+        m = m->incr_age();
+      }
     }
+    obj->set_mark(m);
+
     // preserve "next" mark bit
     if (_g1->mark_in_progress() && !_g1->is_obj_ill(old)) {
       if (!use_local_bitmaps ||
@@ -3805,9 +3897,11 @@ oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
 
     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
       arrayOop(old)->set_length(0);
-      _par_scan_state->push_on_queue((oop*) ((intptr_t)old | G1_PARTIAL_ARRAY_MASK));
+      _par_scan_state->push_on_queue(set_partial_array_mask(old));
     } else {
-      _scanner->set_region(_g1->heap_region_containing(obj));
+      // No point in using the slower heap_region_containing() method,
+      // given that we know obj is in the heap.
+      _scanner->set_region(_g1->heap_region_containing_raw(obj));
       obj->oop_iterate_backwards(_scanner);
     }
   } else {
@@ -3817,47 +3911,55 @@ oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
   return obj;
 }
 
-template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
-void G1ParCopyClosure<do_gen_barrier, barrier, do_mark_forwardee>::do_oop_work(oop* p) {
+template<bool do_gen_barrier, G1Barrier barrier,
+         bool do_mark_forwardee, bool skip_cset_test>
+void G1ParCopyClosure<do_gen_barrier, barrier,
+                      do_mark_forwardee, skip_cset_test>::do_oop_work(oop* p) {
   oop obj = *p;
   assert(barrier != G1BarrierRS || obj != NULL,
          "Precondition: G1BarrierRS implies obj is nonNull");
 
-  if (obj != NULL) {
-    if (_g1->obj_in_cs(obj)) {
+  // The only time we skip the cset test is when we're scanning
+  // references popped from the queue. And we only push on the queue
+  // references that we know point into the cset, so no point in
+  // checking again. But we'll leave an assert here for peace of mind.
+  assert(!skip_cset_test || _g1->obj_in_cs(obj), "invariant");
+
+  // here the null check is implicit in the cset_fast_test() test
+  if (skip_cset_test || _g1->in_cset_fast_test(obj)) {
 #if G1_REM_SET_LOGGING
-      gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" into CS.",
-                             p, (void*) obj);
+    gclog_or_tty->print_cr("Loc "PTR_FORMAT" contains pointer "PTR_FORMAT" "
+                           "into CS.", p, (void*) obj);
 #endif
-      if (obj->is_forwarded()) {
-        *p = obj->forwardee();
-      } else {
-        *p = copy_to_survivor_space(obj);
-      }
-      // When scanning the RS, we only care about objs in CS.
-      if (barrier == G1BarrierRS) {
-        _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
-      }
+    if (obj->is_forwarded()) {
+      *p = obj->forwardee();
+    } else {
+      *p = copy_to_survivor_space(obj);
     }
-    // When scanning moved objs, must look at all oops.
-    if (barrier == G1BarrierEvac) {
+    // When scanning the RS, we only care about objs in CS.
+    if (barrier == G1BarrierRS) {
       _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
     }
+  }
 
-    if (do_gen_barrier) {
-      par_do_barrier(p);
-    }
+  // When scanning moved objs, must look at all oops.
+  if (barrier == G1BarrierEvac && obj != NULL) {
+    _g1_rem->par_write_ref(_from, p, _par_scan_state->queue_num());
+  }
+
+  if (do_gen_barrier && obj != NULL) {
+    par_do_barrier(p);
   }
 }
 
-template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
+template void G1ParCopyClosure<false, G1BarrierEvac, false, true>::do_oop_work(oop* p);
 
-template <class T> void G1ParScanPartialArrayClosure::process_array_chunk(
+template<class T> void G1ParScanPartialArrayClosure::process_array_chunk(
   oop obj, int start, int end) {
   // process our set of indices (include header in first chunk)
   assert(start < end, "invariant");
   T* const base      = (T*)objArrayOop(obj)->base();
-  T* const start_addr = base + start;
+  T* const start_addr = (start == 0) ? (T*) obj : base + start;
   T* const end_addr   = base + end;
   MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
   _scanner.set_region(_g1->heap_region_containing(obj));
@@ -3866,7 +3968,8 @@ template <class T> void G1ParScanPartialArrayClosure::process_array_chunk(
 
 void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) {
   assert(!UseCompressedOops, "Needs to be fixed to work with compressed oops");
-  oop old = oop((intptr_t)p & ~G1_PARTIAL_ARRAY_MASK);
+  assert(has_partial_array_mask(p), "invariant");
+  oop old = clear_partial_array_mask(p);
   assert(old->is_objArray(), "must be obj array");
   assert(old->is_forwarded(), "must be forwarded");
   assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
@@ -3884,7 +3987,7 @@ void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) {
     end = start + ParGCArrayScanChunk;
     arrayOop(old)->set_length(end);
     // Push remainder.
-    _par_scan_state->push_on_queue((oop*) ((intptr_t) old | G1_PARTIAL_ARRAY_MASK));
+    _par_scan_state->push_on_queue(set_partial_array_mask(old));
   } else {
     // Restore length so that the heap remains parsable in
     // case of evacuation failure.
@@ -3893,11 +3996,6 @@ void G1ParScanPartialArrayClosure::do_oop_nv(oop* p) {
 
   // process our set of indices (include header in first chunk)
   process_array_chunk<oop>(obj, start, end);
-  oop* start_addr = start == 0 ? (oop*)obj : obj->obj_at_addr<oop>(start);
-  oop* end_addr   = (oop*)(obj->base()) + end; // obj_at_addr(end) asserts end < length
-  MemRegion mr((HeapWord*)start_addr, (HeapWord*)end_addr);
-  _scanner.set_region(_g1->heap_region_containing(obj));
-  obj->oop_iterate(&_scanner, mr);
 }
 
 int G1ScanAndBalanceClosure::_nq = 0;
@@ -3931,6 +4029,13 @@ public:
                           pss->hash_seed(),
                           ref_to_scan)) {
         IF_G1_DETAILED_STATS(pss->note_steal());
+
+        // slightly paranoid tests; I'm trying to catch potential
+        // problems before we go into push_on_queue to know where the
+        // problem is coming from
+        assert(ref_to_scan != NULL, "invariant");
+        assert(has_partial_array_mask(ref_to_scan) ||
+                                   _g1h->obj_in_cs(*ref_to_scan), "invariant");
         pss->push_on_queue(ref_to_scan);
         continue;
       }
@@ -3976,10 +4081,10 @@ public:
     ResourceMark rm;
     HandleMark   hm;
 
-    G1ParScanThreadState pss(_g1h, i);
-    G1ParScanHeapEvacClosure     scan_evac_cl(_g1h, &pss);
-    G1ParScanHeapEvacClosure     evac_failure_cl(_g1h, &pss);
-    G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss);
+    G1ParScanThreadState            pss(_g1h, i);
+    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss);
+    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss);
+    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss);
 
     pss.set_evac_closure(&scan_evac_cl);
     pss.set_evac_failure_closure(&evac_failure_cl);
diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
index e83b1a7de..738b4be04 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
@@ -247,6 +247,27 @@ private:
   NumberSeq _pop_obj_rc_at_copy;
   void print_popularity_summary_info() const;
 
+  // This is used for a quick test on whether a reference points into
+  // the collection set or not. Basically, we have an array, with one
+  // byte per region, and that byte denotes whether the corresponding
+  // region is in the collection set or not. The entry corresponding
+  // the bottom of the heap, i.e., region 0, is pointed to by
+  // _in_cset_fast_test_base.  The _in_cset_fast_test field has been
+  // biased so that it actually points to address 0 of the address
+  // space, to make the test as fast as possible (we can simply shift
+  // the address to address into it, instead of having to subtract the
+  // bottom of the heap from the address before shifting it; basically
+  // it works in the same way the card table works).
+  bool* _in_cset_fast_test;
+
+  // The allocated array used for the fast test on whether a reference
+  // points into the collection set or not. This field is also used to
+  // free the array.
+  bool* _in_cset_fast_test_base;
+
+  // The length of the _in_cset_fast_test_base array.
+  size_t _in_cset_fast_test_length;
+
   volatile unsigned _gc_time_stamp;
 
   size_t* _surviving_young_words;
@@ -368,6 +389,38 @@ public:
   virtual void gc_prologue(bool full);
   virtual void gc_epilogue(bool full);
 
+  // We register a region with the fast "in collection set" test. We
+  // simply set to true the array slot corresponding to this region.
+  void register_region_with_in_cset_fast_test(HeapRegion* r) {
+    assert(_in_cset_fast_test_base != NULL, "sanity");
+    assert(r->in_collection_set(), "invariant");
+    int index = r->hrs_index();
+    assert(0 <= (size_t) index && (size_t) index < _in_cset_fast_test_length,
+           "invariant");
+    assert(!_in_cset_fast_test_base[index], "invariant");
+    _in_cset_fast_test_base[index] = true;
+  }
+
+  // This is a fast test on whether a reference points into the
+  // collection set or not. It does not assume that the reference
+  // points into the heap; if it doesn't, it will return false.
+  bool in_cset_fast_test(oop obj) {
+    assert(_in_cset_fast_test != NULL, "sanity");
+    if (_g1_committed.contains((HeapWord*) obj)) {
+      // no need to subtract the bottom of the heap from obj,
+      // _in_cset_fast_test is biased
+      size_t index = ((size_t) obj) >> HeapRegion::LogOfHRGrainBytes;
+      bool ret = _in_cset_fast_test[index];
+      // let's make sure the result is consistent with what the slower
+      // test returns
+      assert( ret || !obj_in_cs(obj), "sanity");
+      assert(!ret ||  obj_in_cs(obj), "sanity");
+      return ret;
+    } else {
+      return false;
+    }
+  }
+
 protected:
 
   // Shrink the garbage-first heap by at most the given size (in bytes!).
@@ -850,6 +903,7 @@ public:
 
   // Iterate over all objects, calling "cl.do_object" on each.
   virtual void object_iterate(ObjectClosure* cl);
+  virtual void safe_object_iterate(ObjectClosure* cl) { object_iterate(cl); }
 
   // Iterate over all objects allocated since the last collection, calling
   // "cl.do_object" on each.  The heap must have been initialized properly
diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
index 8cafe3d98..4d88ee4cb 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
@@ -36,8 +36,11 @@ G1CollectedHeap::heap_region_containing(const void* addr) const {
 
 inline HeapRegion*
 G1CollectedHeap::heap_region_containing_raw(const void* addr) const {
-  HeapRegion* res = _hrs->addr_to_region(addr);
-  assert(res != NULL, "addr outside of heap?");
+  assert(_g1_reserved.contains(addr), "invariant");
+  size_t index = ((intptr_t) addr - (intptr_t) _g1_reserved.start())
+                                              >> HeapRegion::LogOfHRGrainBytes;
+  HeapRegion* res = _hrs->at(index);
+  assert(res == _hrs->addr_to_region(addr), "sanity");
   return res;
 }
 
diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
index a5d0165bb..e467e4d80 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@@ -2985,6 +2985,7 @@ add_to_collection_set(HeapRegion* hr) {
   _collection_set = hr;
   _collection_set_size++;
   _collection_set_bytes_used_before += hr->used();
+  _g1->register_region_with_in_cset_fast_test(hr);
 }
 
 void
diff --git a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
index 58653196a..2791bf6ee 100644
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
@@ -77,6 +77,18 @@ public:
 
 #define G1_PARTIAL_ARRAY_MASK 1
 
+inline bool has_partial_array_mask(oop* ref) {
+  return (intptr_t) ref & G1_PARTIAL_ARRAY_MASK;
+}
+
+inline oop* set_partial_array_mask(oop obj) {
+  return (oop*) ((intptr_t) obj | G1_PARTIAL_ARRAY_MASK);
+}
+
+inline oop clear_partial_array_mask(oop* ref) {
+  return oop((intptr_t) ref & ~G1_PARTIAL_ARRAY_MASK);
+}
+
 class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
   G1ParScanClosure _scanner;
   template <class T> void process_array_chunk(oop obj, int start, int end);
@@ -101,7 +113,8 @@ public:
     G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
 };
 
-template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+template<bool do_gen_barrier, G1Barrier barrier,
+         bool do_mark_forwardee, bool skip_cset_test>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
   void do_oop_work(oop* p);
@@ -119,14 +132,22 @@ public:
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
 };
 
-typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
-typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
-typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
-typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
-typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
-
+typedef G1ParCopyClosure<false, G1BarrierNone, false, false> G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<true,  G1BarrierNone, false, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierNone, true,  false> G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<true,  G1BarrierNone, true,  false> G1ParScanAndMarkPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS,   false, false> G1ParScanHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS,   true,  false> G1ParScanAndMarkHeapRSClosure;
+// This is the only case when we set skip_cset_test. Basically, this
+// closure is (should?) only be called directly while we're draining
+// the overflow and task queues. In that case we know that the
+// reference in question points into the collection set, otherwise we
+// would not have pushed it on the queue.
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, true> G1ParScanHeapEvacClosure;
+// We need a separate closure to handle references during evacuation
+// failure processing, as it cannot asume that the reference already
+ // points to the collection set (like G1ParScanHeapEvacClosure does).
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, false> G1ParScanHeapEvacFailureClosure;
 
 class FilterIntoCSClosure: public OopClosure {
   G1CollectedHeap* _g1;
diff --git a/src/share/vm/gc_implementation/g1/g1_globals.hpp b/src/share/vm/gc_implementation/g1/g1_globals.hpp
index 72a684812..e7e9d7363 100644
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp
@@ -28,7 +28,7 @@
 
 #define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw) \
                                                                             \
-  product(intx, ParallelGCG1AllocBufferSize, 4*K,                           \
+  product(intx, ParallelGCG1AllocBufferSize, 8*K,                           \
           "Size of parallel G1 allocation buffers in to-space.")            \
                                                                             \
   product(intx, G1TimeSliceMS, 500,                                         \
diff --git a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
index 39f72a616..4cfb76464 100644
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
@@ -32,11 +32,13 @@ enum G1Barrier {
   G1BarrierNone, G1BarrierRS, G1BarrierEvac
 };
 
-template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+template<bool do_gen_barrier, G1Barrier barrier,
+         bool do_mark_forwardee, bool skip_cset_test>
 class G1ParCopyClosure;
 class G1ParScanClosure;
 
-typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false, true>
+                                                      G1ParScanHeapEvacClosure;
 
 class FilterIntoCSClosure;
 class FilterOutOfRegionClosure;
diff --git a/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep b/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
index d751d2d1f..ac88dc175 100644
--- a/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+++ b/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
@@ -28,6 +28,7 @@ binaryTreeDictionary.cpp                allocationStats.hpp
 binaryTreeDictionary.cpp                binaryTreeDictionary.hpp
 binaryTreeDictionary.cpp                globals.hpp
 binaryTreeDictionary.cpp                ostream.hpp
+binaryTreeDictionary.cpp                space.inline.hpp
 binaryTreeDictionary.cpp                spaceDecorator.hpp
 
 binaryTreeDictionary.hpp                freeBlockDictionary.hpp
diff --git a/src/share/vm/gc_implementation/includeDB_gc_g1 b/src/share/vm/gc_implementation/includeDB_gc_g1
index 536a3704f..375154d89 100644
--- a/src/share/vm/gc_implementation/includeDB_gc_g1
+++ b/src/share/vm/gc_implementation/includeDB_gc_g1
@@ -31,9 +31,10 @@ bufferingOopClosure.hpp			os.hpp
 cardTableRS.cpp				concurrentMark.hpp
 cardTableRS.cpp				g1SATBCardTableModRefBS.hpp
 
-collectionSetChooser.cpp		g1CollectedHeap.hpp
+collectionSetChooser.cpp		g1CollectedHeap.inline.hpp
 collectionSetChooser.cpp		g1CollectorPolicy.hpp
 collectionSetChooser.cpp		collectionSetChooser.hpp
+collectionSetChooser.cpp		space.inline.hpp
 
 collectionSetChooser.hpp		heapRegion.hpp
 collectionSetChooser.hpp                growableArray.hpp
@@ -42,14 +43,15 @@ concurrentG1Refine.cpp			atomic.hpp
 concurrentG1Refine.cpp			concurrentG1Refine.hpp
 concurrentG1Refine.cpp			concurrentG1RefineThread.hpp
 concurrentG1Refine.cpp			copy.hpp
-concurrentG1Refine.cpp			g1CollectedHeap.hpp
+concurrentG1Refine.cpp			g1CollectedHeap.inline.hpp
 concurrentG1Refine.cpp			g1RemSet.hpp
+concurrentG1Refine.cpp			space.inline.hpp
 
 concurrentG1Refine.hpp			globalDefinitions.hpp
 
 concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
 concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
-concurrentG1RefineThread.cpp		g1CollectedHeap.hpp
+concurrentG1RefineThread.cpp		g1CollectedHeap.inline.hpp
 concurrentG1RefineThread.cpp            g1CollectorPolicy.hpp
 concurrentG1RefineThread.cpp		handles.inline.hpp
 concurrentG1RefineThread.cpp		mutexLocker.hpp
@@ -166,7 +168,7 @@ g1CollectorPolicy.cpp			concurrentMark.hpp
 g1CollectorPolicy.cpp			concurrentMarkThread.inline.hpp
 g1CollectorPolicy.cpp			debug.hpp
 g1CollectorPolicy.cpp			java.hpp
-g1CollectorPolicy.cpp                   g1CollectedHeap.hpp
+g1CollectorPolicy.cpp                   g1CollectedHeap.inline.hpp
 g1CollectorPolicy.cpp                   g1CollectorPolicy.hpp
 g1CollectorPolicy.cpp                   heapRegionRemSet.hpp
 g1CollectorPolicy.cpp			mutexLocker.hpp
@@ -187,7 +189,7 @@ g1MarkSweep.cpp                         biasedLocking.hpp
 g1MarkSweep.cpp                         codeCache.hpp
 g1MarkSweep.cpp                         events.hpp
 g1MarkSweep.cpp                         fprofiler.hpp
-g1MarkSweep.hpp                         g1CollectedHeap.hpp
+g1MarkSweep.hpp                         g1CollectedHeap.inline.hpp
 g1MarkSweep.cpp                         g1MarkSweep.hpp
 g1MarkSweep.cpp                         gcLocker.hpp
 g1MarkSweep.cpp                         genCollectedHeap.hpp
@@ -264,7 +266,7 @@ heapRegion.cpp                          heapRegionSeq.inline.hpp
 heapRegion.cpp                          iterator.hpp
 heapRegion.cpp                          oop.inline.hpp
 
-heapRegion.hpp                          space.hpp
+heapRegion.hpp                          space.inline.hpp
 heapRegion.hpp                          spaceDecorator.hpp
 heapRegion.hpp                          g1BlockOffsetTable.inline.hpp
 heapRegion.hpp                          watermark.hpp
@@ -283,7 +285,7 @@ heapRegionRemSet.cpp                    globalDefinitions.hpp
 heapRegionRemSet.cpp                    space.inline.hpp
 
 heapRegionSeq.cpp                       allocation.hpp
-heapRegionSeq.cpp                       g1CollectedHeap.hpp
+heapRegionSeq.cpp                       g1CollectedHeap.inline.hpp
 heapRegionSeq.cpp                       heapRegionSeq.hpp
 
 heapRegionSeq.hpp                       growableArray.hpp
@@ -334,18 +336,18 @@ specialized_oop_closures.hpp		g1_specialized_oop_closures.hpp
 survRateGroup.hpp			numberSeq.hpp
 
 survRateGroup.cpp			allocation.hpp
-survRateGroup.cpp			g1CollectedHeap.hpp
+survRateGroup.cpp			g1CollectedHeap.inline.hpp
 survRateGroup.cpp			g1CollectorPolicy.hpp
 survRateGroup.cpp			heapRegion.hpp
 survRateGroup.cpp			survRateGroup.hpp
 
 thread.cpp				concurrentMarkThread.inline.hpp
 
-universe.cpp                            g1CollectedHeap.hpp
+universe.cpp                            g1CollectedHeap.inline.hpp
 universe.cpp                            g1CollectorPolicy.hpp
 
 vm_operations_g1.hpp			vmGCOperations.hpp
 
 vm_operations_g1.cpp			vm_operations_g1.hpp
-vm_operations_g1.cpp                    g1CollectedHeap.hpp
+vm_operations_g1.cpp                    g1CollectedHeap.inline.hpp
 vm_operations_g1.cpp                    isGCActiveMark.hpp
diff --git a/src/share/vm/gc_implementation/includeDB_gc_parNew b/src/share/vm/gc_implementation/includeDB_gc_parNew
index e5e5bc17b..0d8160ffa 100644
--- a/src/share/vm/gc_implementation/includeDB_gc_parNew
+++ b/src/share/vm/gc_implementation/includeDB_gc_parNew
@@ -29,6 +29,8 @@ asParNewGeneration.cpp                  asParNewGeneration.hpp
 asParNewGeneration.cpp                  cmsAdaptiveSizePolicy.hpp
 asParNewGeneration.cpp                  cmsGCAdaptivePolicyCounters.hpp
 asParNewGeneration.cpp                  defNewGeneration.inline.hpp
+asParNewGeneration.cpp                  markOop.inline.hpp
+asParNewGeneration.cpp                  markSweep.inline.hpp
 asParNewGeneration.cpp                  oop.pcgc.inline.hpp
 asParNewGeneration.cpp                  parNewGeneration.hpp
 asParNewGeneration.cpp                  referencePolicy.hpp
@@ -40,7 +42,7 @@ parCardTableModRefBS.cpp                cardTableRS.hpp
 parCardTableModRefBS.cpp                java.hpp
 parCardTableModRefBS.cpp                mutexLocker.hpp
 parCardTableModRefBS.cpp                sharedHeap.hpp
-parCardTableModRefBS.cpp                space.hpp
+parCardTableModRefBS.cpp                space.inline.hpp
 parCardTableModRefBS.cpp                universe.hpp
 parCardTableModRefBS.cpp                virtualspace.hpp
 
@@ -77,6 +79,7 @@ parNewGeneration.cpp                    resourceArea.hpp
 parNewGeneration.cpp                    sharedHeap.hpp
 parNewGeneration.cpp                    space.hpp
 parNewGeneration.cpp                    spaceDecorator.hpp
+parNewGeneration.cpp                    thread.hpp
 parNewGeneration.cpp                    workgroup.hpp
 
 parNewGeneration.hpp                    defNewGeneration.hpp
diff --git a/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge b/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge
index 13ba87c83..0e0ae1a1e 100644
--- a/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge
+++ b/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge
@@ -302,6 +302,8 @@ psOldGen.hpp                         	safepoint.hpp
 psOldGen.hpp                            spaceCounters.hpp
 
 psPermGen.cpp                           gcUtil.hpp
+psPermGen.cpp                           markOop.inline.hpp
+psPermGen.cpp                           markSweep.inline.hpp
 psPermGen.cpp                           parallelScavengeHeap.hpp
 psPermGen.cpp                           psMarkSweepDecorator.hpp
 psPermGen.cpp                           psParallelCompact.hpp
diff --git a/src/share/vm/gc_implementation/includeDB_gc_shared b/src/share/vm/gc_implementation/includeDB_gc_shared
index 7ea226589..99ce759a4 100644
--- a/src/share/vm/gc_implementation/includeDB_gc_shared
+++ b/src/share/vm/gc_implementation/includeDB_gc_shared
@@ -100,4 +100,4 @@ spaceCounters.hpp                       mutableSpace.hpp
 spaceCounters.hpp                       perfData.hpp
 spaceCounters.hpp                       generationCounters.hpp
 
-vmGCOperations.cpp                      g1CollectedHeap.hpp
+vmGCOperations.cpp                      g1CollectedHeap.inline.hpp
diff --git a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
index a2b7607bb..a5010925b 100644
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@@ -404,6 +404,8 @@ void ParEvacuateFollowersClosure::do_void() {
     if (terminator()->offer_termination()) break;
     par_scan_state()->end_term_time();
   }
+  assert(par_gen()->_overflow_list == NULL && par_gen()->_num_par_pushes == 0,
+         "Broken overflow list?");
   // Finish the last termination pause.
   par_scan_state()->end_term_time();
 }
@@ -456,6 +458,8 @@ ParNewGeneration(ReservedSpace rs, size_t initial_byte_size, int level)
   _is_alive_closure(this),
   _plab_stats(YoungPLABSize, PLABWeight)
 {
+  NOT_PRODUCT(_overflow_counter = ParGCWorkQueueOverflowInterval;)
+  NOT_PRODUCT(_num_par_pushes = 0;)
   _task_queues = new ObjToScanQueueSet(ParallelGCThreads);
   guarantee(_task_queues != NULL, "task_queues allocation failure.");
 
@@ -993,12 +997,19 @@ oop ParNewGeneration::copy_to_survivor_space_avoiding_promotion_undo(
              "push forwarded object");
     }
     // Push it on one of the queues of to-be-scanned objects.
-    if (!par_scan_state->work_queue()->push(obj_to_push)) {
+    bool simulate_overflow = false;
+    NOT_PRODUCT(
+      if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
+        // simulate a stack overflow
+        simulate_overflow = true;
+      }
+    )
+    if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
       // Add stats for overflow pushes.
       if (Verbose && PrintGCDetails) {
         gclog_or_tty->print("queue overflow!\n");
       }
-      push_on_overflow_list(old);
+      push_on_overflow_list(old, par_scan_state);
       par_scan_state->note_overflow_push();
     }
     par_scan_state->note_push();
@@ -1110,9 +1121,16 @@ oop ParNewGeneration::copy_to_survivor_space_with_undo(
              "push forwarded object");
     }
     // Push it on one of the queues of to-be-scanned objects.
-    if (!par_scan_state->work_queue()->push(obj_to_push)) {
+    bool simulate_overflow = false;
+    NOT_PRODUCT(
+      if (ParGCWorkQueueOverflowALot && should_simulate_overflow()) {
+        // simulate a stack overflow
+        simulate_overflow = true;
+      }
+    )
+    if (simulate_overflow || !par_scan_state->work_queue()->push(obj_to_push)) {
       // Add stats for overflow pushes.
-      push_on_overflow_list(old);
+      push_on_overflow_list(old, par_scan_state);
       par_scan_state->note_overflow_push();
     }
     par_scan_state->note_push();
@@ -1135,89 +1153,190 @@ oop ParNewGeneration::copy_to_survivor_space_with_undo(
   return forward_ptr;
 }
 
-void ParNewGeneration::push_on_overflow_list(oop from_space_obj) {
-  oop cur_overflow_list = _overflow_list;
+#ifndef PRODUCT
+// It's OK to call this multi-threaded;  the worst thing
+// that can happen is that we'll get a bunch of closely
+// spaced simulated oveflows, but that's OK, in fact
+// probably good as it would exercise the overflow code
+// under contention.
+bool ParNewGeneration::should_simulate_overflow() {
+  if (_overflow_counter-- <= 0) { // just being defensive
+    _overflow_counter = ParGCWorkQueueOverflowInterval;
+    return true;
+  } else {
+    return false;
+  }
+}
+#endif
+
+#define BUSY (oop(0x1aff1aff))
+void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) {
   // if the object has been forwarded to itself, then we cannot
   // use the klass pointer for the linked list.  Instead we have
   // to allocate an oopDesc in the C-Heap and use that for the linked list.
+  // XXX This is horribly inefficient when a promotion failure occurs
+  // and should be fixed. XXX FIX ME !!!
+#ifndef PRODUCT
+  Atomic::inc_ptr(&_num_par_pushes);
+  assert(_num_par_pushes > 0, "Tautology");
+#endif
   if (from_space_obj->forwardee() == from_space_obj) {
     oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1);
     listhead->forward_to(from_space_obj);
     from_space_obj = listhead;
   }
-  while (true) {
-    from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
-    oop observed_overflow_list =
-      (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
-    if (observed_overflow_list == cur_overflow_list) break;
-    // Otherwise...
+  oop observed_overflow_list = _overflow_list;
+  oop cur_overflow_list;
+  do {
     cur_overflow_list = observed_overflow_list;
-  }
+    if (cur_overflow_list != BUSY) {
+      from_space_obj->set_klass_to_list_ptr(cur_overflow_list);
+    } else {
+      from_space_obj->set_klass_to_list_ptr(NULL);
+    }
+    observed_overflow_list =
+      (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list);
+  } while (cur_overflow_list != observed_overflow_list);
 }
 
+// *NOTE*: The overflow list manipulation code here and
+// in CMSCollector:: are very similar in shape,
+// except that in the CMS case we thread the objects
+// directly into the list via their mark word, and do
+// not need to deal with special cases below related
+// to chunking of object arrays and promotion failure
+// handling.
+// CR 6797058 has been filed to attempt consolidation of
+// the common code.
+// Because of the common code, if you make any changes in
+// the code below, please check the CMS version to see if
+// similar changes might be needed.
+// See CMSCollector::par_take_from_overflow_list() for
+// more extensive documentation comments.
 bool
 ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) {
   ObjToScanQueue* work_q = par_scan_state->work_queue();
+  assert(work_q->size() == 0, "Should first empty local work queue");
   // How many to take?
-  int objsFromOverflow = MIN2(work_q->max_elems()/4,
-                              (juint)ParGCDesiredObjsFromOverflowList);
+  size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4,
+                                 (size_t)ParGCDesiredObjsFromOverflowList);
 
   if (_overflow_list == NULL) return false;
 
   // Otherwise, there was something there; try claiming the list.
-  oop prefix = (oop)Atomic::xchg_ptr(NULL, &_overflow_list);
-
-  if (prefix == NULL) {
-    return false;
-  }
+  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
   // Trim off a prefix of at most objsFromOverflow items
-  int i = 1;
+  Thread* tid = Thread::current();
+  size_t spin_count = (size_t)ParallelGCThreads;
+  size_t sleep_time_millis = MAX2((size_t)1, objsFromOverflow/100);
+  for (size_t spin = 0; prefix == BUSY && spin < spin_count; spin++) {
+    // someone grabbed it before we did ...
+    // ... we spin for a short while...
+    os::sleep(tid, sleep_time_millis, false);
+    if (_overflow_list == NULL) {
+      // nothing left to take
+      return false;
+    } else if (_overflow_list != BUSY) {
+     // try and grab the prefix
+     prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
+    }
+  }
+  if (prefix == NULL || prefix == BUSY) {
+     // Nothing to take or waited long enough
+     if (prefix == NULL) {
+       // Write back the NULL in case we overwrote it with BUSY above
+       // and it is still the same value.
+       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
+     }
+     return false;
+  }
+  assert(prefix != NULL && prefix != BUSY, "Error");
+  size_t i = 1;
   oop cur = prefix;
   while (i < objsFromOverflow && cur->klass_or_null() != NULL) {
     i++; cur = oop(cur->klass());
   }
 
   // Reattach remaining (suffix) to overflow list
-  if (cur->klass_or_null() != NULL) {
-    oop suffix = oop(cur->klass());
-    cur->set_klass_to_list_ptr(NULL);
-
-    // Find last item of suffix list
-    oop last = suffix;
-    while (last->klass_or_null() != NULL) {
-      last = oop(last->klass());
+  if (cur->klass_or_null() == NULL) {
+    // Write back the NULL in lieu of the BUSY we wrote
+    // above and it is still the same value.
+    if (_overflow_list == BUSY) {
+      (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
     }
-    // Atomically prepend suffix to current overflow list
-    oop cur_overflow_list = _overflow_list;
-    while (true) {
-      last->set_klass_to_list_ptr(cur_overflow_list);
-      oop observed_overflow_list =
-        (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
-      if (observed_overflow_list == cur_overflow_list) break;
-      // Otherwise...
-      cur_overflow_list = observed_overflow_list;
+  } else {
+    assert(cur->klass_or_null() != BUSY, "Error");
+    oop suffix = oop(cur->klass());       // suffix will be put back on global list
+    cur->set_klass_to_list_ptr(NULL);     // break off suffix
+    // It's possible that the list is still in the empty(busy) state
+    // we left it in a short while ago; in that case we may be
+    // able to place back the suffix.
+    oop observed_overflow_list = _overflow_list;
+    oop cur_overflow_list = observed_overflow_list;
+    bool attached = false;
+    while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
+      observed_overflow_list =
+        (oop) Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
+      if (cur_overflow_list == observed_overflow_list) {
+        attached = true;
+        break;
+      } else cur_overflow_list = observed_overflow_list;
+    }
+    if (!attached) {
+      // Too bad, someone else got in in between; we'll need to do a splice.
+      // Find the last item of suffix list
+      oop last = suffix;
+      while (last->klass_or_null() != NULL) {
+        last = oop(last->klass());
+      }
+      // Atomically prepend suffix to current overflow list
+      observed_overflow_list = _overflow_list;
+      do {
+        cur_overflow_list = observed_overflow_list;
+        if (cur_overflow_list != BUSY) {
+          // Do the splice ...
+          last->set_klass_to_list_ptr(cur_overflow_list);
+        } else { // cur_overflow_list == BUSY
+          last->set_klass_to_list_ptr(NULL);
+        }
+        observed_overflow_list =
+          (oop)Atomic::cmpxchg_ptr(suffix, &_overflow_list, cur_overflow_list);
+      } while (cur_overflow_list != observed_overflow_list);
     }
   }
 
   // Push objects on prefix list onto this thread's work queue
-  assert(cur != NULL, "program logic");
+  assert(prefix != NULL && prefix != BUSY, "program logic");
   cur = prefix;
-  int n = 0;
+  ssize_t n = 0;
   while (cur != NULL) {
     oop obj_to_push = cur->forwardee();
     oop next        = oop(cur->klass_or_null());
     cur->set_klass(obj_to_push->klass());
-    if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
-      obj_to_push = cur;
+    // This may be an array object that is self-forwarded. In that case, the list pointer
+    // space, cur, is not in the Java heap, but rather in the C-heap and should be freed.
+    if (!is_in_reserved(cur)) {
+      // This can become a scaling bottleneck when there is work queue overflow coincident
+      // with promotion failure.
+      oopDesc* f = cur;
+      FREE_C_HEAP_ARRAY(oopDesc, f);
+    } else if (par_scan_state->should_be_partially_scanned(obj_to_push, cur)) {
       assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned");
+      obj_to_push = cur;
     }
-    work_q->push(obj_to_push);
+    bool ok = work_q->push(obj_to_push);
+    assert(ok, "Should have succeeded");
     cur = next;
     n++;
   }
   par_scan_state->note_overflow_refill(n);
+#ifndef PRODUCT
+  assert(_num_par_pushes >= n, "Too many pops?");
+  Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
+#endif
   return true;
 }
+#undef BUSY
 
 void ParNewGeneration::ref_processor_init()
 {
diff --git a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
index a9e2d199f..b8de0b1e7 100644
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp
@@ -278,6 +278,7 @@ class ParNewGeneration: public DefNewGeneration {
   friend class ParNewRefProcTask;
   friend class ParNewRefProcTaskExecutor;
   friend class ParScanThreadStateSet;
+  friend class ParEvacuateFollowersClosure;
 
  private:
   // XXX use a global constant instead of 64!
@@ -296,6 +297,7 @@ class ParNewGeneration: public DefNewGeneration {
   // klass-pointers (klass information already copied to the forwarded
   // image.)  Manipulated with CAS.
   oop _overflow_list;
+  NOT_PRODUCT(ssize_t _num_par_pushes;)
 
   // If true, older generation does not support promotion undo, so avoid.
   static bool _avoid_promotion_undo;
@@ -372,8 +374,12 @@ class ParNewGeneration: public DefNewGeneration {
   oop copy_to_survivor_space_with_undo(ParScanThreadState* par_scan_state,
                              oop obj, size_t obj_sz, markOop m);
 
+  // in support of testing overflow code
+  NOT_PRODUCT(int _overflow_counter;)
+  NOT_PRODUCT(bool should_simulate_overflow();)
+
   // Push the given (from-space) object on the global overflow list.
-  void push_on_overflow_list(oop from_space_obj);
+  void push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state);
 
   // If the global overflow list is non-empty, move some tasks from it
   // onto "work_q" (which must be empty).  No more than 1/4 of the
diff --git a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
index 087cdf93c..c268b6a2f 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
@@ -200,6 +200,7 @@ class ParallelScavengeHeap : public CollectedHeap {
 
   void oop_iterate(OopClosure* cl);
   void object_iterate(ObjectClosure* cl);
+  void safe_object_iterate(ObjectClosure* cl) { object_iterate(cl); }
   void permanent_oop_iterate(OopClosure* cl);
   void permanent_object_iterate(ObjectClosure* cl);
 
diff --git a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp
index 2335a20e1..2d31e5d72 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp
@@ -116,7 +116,7 @@ void PSOldGen::initialize_work(const char* perf_data_name, int level) {
   // ObjectSpace stuff
   //
 
-  _object_space = new MutableSpace();
+  _object_space = new MutableSpace(virtual_space()->alignment());
 
   if (_object_space == NULL)
     vm_exit_during_initialization("Could not allocate an old gen space");
@@ -385,10 +385,10 @@ void PSOldGen::post_resize() {
   start_array()->set_covered_region(new_memregion);
   Universe::heap()->barrier_set()->resize_covered_region(new_memregion);
 
-  HeapWord* const virtual_space_high = (HeapWord*) virtual_space()->high();
-
   // ALWAYS do this last!!
-  object_space()->set_end(virtual_space_high);
+  object_space()->initialize(new_memregion,
+                             SpaceDecorator::DontClear,
+                             SpaceDecorator::DontMangle);
 
   assert(new_word_size == heap_word_size(object_space()->capacity_in_bytes()),
     "Sanity");
diff --git a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp
index 523103f6c..4003e19c6 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.cpp
@@ -78,7 +78,7 @@ void PSVirtualSpace::release() {
   _special = false;
 }
 
-bool PSVirtualSpace::expand_by(size_t bytes, bool pre_touch) {
+bool PSVirtualSpace::expand_by(size_t bytes) {
   assert(is_aligned(bytes), "arg not aligned");
   DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this));
 
@@ -92,15 +92,6 @@ bool PSVirtualSpace::expand_by(size_t bytes, bool pre_touch) {
     _committed_high_addr += bytes;
   }
 
-  if (pre_touch || AlwaysPreTouch) {
-    for (char* curr = base_addr;
-         curr < _committed_high_addr;
-         curr += os::vm_page_size()) {
-      char tmp = *curr;
-      *curr = 0;
-    }
-  }
-
   return result;
 }
 
@@ -255,7 +246,7 @@ PSVirtualSpaceHighToLow::PSVirtualSpaceHighToLow(ReservedSpace rs) {
   DEBUG_ONLY(verify());
 }
 
-bool PSVirtualSpaceHighToLow::expand_by(size_t bytes, bool pre_touch) {
+bool PSVirtualSpaceHighToLow::expand_by(size_t bytes) {
   assert(is_aligned(bytes), "arg not aligned");
   DEBUG_ONLY(PSVirtualSpaceVerifier this_verifier(this));
 
@@ -269,15 +260,6 @@ bool PSVirtualSpaceHighToLow::expand_by(size_t bytes, bool pre_touch) {
     _committed_low_addr -= bytes;
   }
 
-  if (pre_touch || AlwaysPreTouch) {
-    for (char* curr = base_addr;
-         curr < _committed_high_addr;
-         curr += os::vm_page_size()) {
-      char tmp = *curr;
-      *curr = 0;
-    }
-  }
-
   return result;
 }
 
diff --git a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp
index 8e1e03679..c3c103599 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psVirtualspace.hpp
@@ -80,7 +80,7 @@ class PSVirtualSpace : public CHeapObj {
   inline  void   set_reserved(char* low_addr, char* high_addr, bool special);
   inline  void   set_reserved(ReservedSpace rs);
   inline  void   set_committed(char* low_addr, char* high_addr);
-  virtual bool   expand_by(size_t bytes, bool pre_touch = false);
+  virtual bool   expand_by(size_t bytes);
   virtual bool   shrink_by(size_t bytes);
   virtual size_t expand_into(PSVirtualSpace* space, size_t bytes);
   void           release();
@@ -127,7 +127,7 @@ class PSVirtualSpaceHighToLow : public PSVirtualSpace {
   PSVirtualSpaceHighToLow(ReservedSpace rs, size_t alignment);
   PSVirtualSpaceHighToLow(ReservedSpace rs);
 
-  virtual bool   expand_by(size_t bytes, bool pre_touch = false);
+  virtual bool   expand_by(size_t bytes);
   virtual bool   shrink_by(size_t bytes);
   virtual size_t expand_into(PSVirtualSpace* space, size_t bytes);
 
diff --git a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp
index bfbffcba0..1b12c6bf8 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp
@@ -64,12 +64,12 @@ void PSYoungGen::initialize_work() {
   }
 
   if (UseNUMA) {
-    _eden_space = new MutableNUMASpace();
+    _eden_space = new MutableNUMASpace(virtual_space()->alignment());
   } else {
-    _eden_space = new MutableSpace();
+    _eden_space = new MutableSpace(virtual_space()->alignment());
   }
-  _from_space = new MutableSpace();
-  _to_space   = new MutableSpace();
+  _from_space = new MutableSpace(virtual_space()->alignment());
+  _to_space   = new MutableSpace(virtual_space()->alignment());
 
   if (_eden_space == NULL || _from_space == NULL || _to_space == NULL) {
     vm_exit_during_initialization("Could not allocate a young gen space");
diff --git a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
index c639bbf3f..a3787c8cd 100644
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
@@ -27,7 +27,7 @@
 # include "incls/_mutableNUMASpace.cpp.incl"
 
 
-MutableNUMASpace::MutableNUMASpace() {
+MutableNUMASpace::MutableNUMASpace(size_t alignment) : MutableSpace(alignment) {
   _lgrp_spaces = new (ResourceObj::C_HEAP) GrowableArray<LGRPSpace*>(0, true);
   _page_size = os::vm_page_size();
   _adaptation_cycles = 0;
@@ -221,7 +221,7 @@ bool MutableNUMASpace::update_layout(bool force) {
         }
       }
       if (!found) {
-        lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i]));
+        lgrp_spaces()->append(new LGRPSpace(lgrp_ids[i], alignment()));
       }
     }
 
@@ -443,10 +443,10 @@ void MutableNUMASpace::select_tails(MemRegion new_region, MemRegion intersection
   // Is there bottom?
   if (new_region.start() < intersection.start()) { // Yes
     // Try to coalesce small pages into a large one.
-    if (UseLargePages && page_size() >= os::large_page_size()) {
-      HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), os::large_page_size());
+    if (UseLargePages && page_size() >= alignment()) {
+      HeapWord* p = (HeapWord*)round_to((intptr_t) intersection.start(), alignment());
       if (new_region.contains(p)
-          && pointer_delta(p, new_region.start(), sizeof(char)) >= os::large_page_size()) {
+          && pointer_delta(p, new_region.start(), sizeof(char)) >= alignment()) {
         if (intersection.contains(p)) {
           intersection = MemRegion(p, intersection.end());
         } else {
@@ -462,10 +462,10 @@ void MutableNUMASpace::select_tails(MemRegion new_region, MemRegion intersection
   // Is there top?
   if (intersection.end() < new_region.end()) { // Yes
     // Try to coalesce small pages into a large one.
-    if (UseLargePages && page_size() >= os::large_page_size()) {
-      HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), os::large_page_size());
+    if (UseLargePages && page_size() >= alignment()) {
+      HeapWord* p = (HeapWord*)round_down((intptr_t) intersection.end(), alignment());
       if (new_region.contains(p)
-          && pointer_delta(new_region.end(), p, sizeof(char)) >= os::large_page_size()) {
+          && pointer_delta(new_region.end(), p, sizeof(char)) >= alignment()) {
         if (intersection.contains(p)) {
           intersection = MemRegion(intersection.start(), p);
         } else {
@@ -504,12 +504,12 @@ void MutableNUMASpace::merge_regions(MemRegion new_region, MemRegion* intersecti
             // That's the only case we have to make an additional bias_region() call.
             HeapWord* start = invalid_region->start();
             HeapWord* end = invalid_region->end();
-            if (UseLargePages && page_size() >= os::large_page_size()) {
-              HeapWord *p = (HeapWord*)round_down((intptr_t) start, os::large_page_size());
+            if (UseLargePages && page_size() >= alignment()) {
+              HeapWord *p = (HeapWord*)round_down((intptr_t) start, alignment());
               if (new_region.contains(p)) {
                 start = p;
               }
-              p = (HeapWord*)round_to((intptr_t) end, os::large_page_size());
+              p = (HeapWord*)round_to((intptr_t) end, alignment());
               if (new_region.contains(end)) {
                 end = p;
               }
@@ -526,7 +526,8 @@ void MutableNUMASpace::merge_regions(MemRegion new_region, MemRegion* intersecti
 
 void MutableNUMASpace::initialize(MemRegion mr,
                                   bool clear_space,
-                                  bool mangle_space) {
+                                  bool mangle_space,
+                                  bool setup_pages) {
   assert(clear_space, "Reallocation will destory data!");
   assert(lgrp_spaces()->length() > 0, "There should be at least one space");
 
@@ -538,7 +539,7 @@ void MutableNUMASpace::initialize(MemRegion mr,
 
   // Compute chunk sizes
   size_t prev_page_size = page_size();
-  set_page_size(UseLargePages ? os::large_page_size() : os::vm_page_size());
+  set_page_size(UseLargePages ? alignment() : os::vm_page_size());
   HeapWord* rounded_bottom = (HeapWord*)round_to((intptr_t) bottom(), page_size());
   HeapWord* rounded_end = (HeapWord*)round_down((intptr_t) end(), page_size());
   size_t base_space_size_pages = pointer_delta(rounded_end, rounded_bottom, sizeof(char)) / page_size();
@@ -666,7 +667,7 @@ void MutableNUMASpace::initialize(MemRegion mr,
     }
 
     // Clear space (set top = bottom) but never mangle.
-    s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle);
+    s->initialize(new_region, SpaceDecorator::Clear, SpaceDecorator::DontMangle, MutableSpace::DontSetupPages);
 
     set_adaptation_cycles(samples_count());
   }
diff --git a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
index f7ca24625..128db555b 100644
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
@@ -82,8 +82,8 @@ class MutableNUMASpace : public MutableSpace {
     char* last_page_scanned()            { return _last_page_scanned; }
     void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
    public:
-    LGRPSpace(int l) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
-      _space = new MutableSpace();
+    LGRPSpace(int l, size_t alignment) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
+      _space = new MutableSpace(alignment);
       _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
     }
     ~LGRPSpace() {
@@ -183,10 +183,10 @@ class MutableNUMASpace : public MutableSpace {
 
  public:
   GrowableArray<LGRPSpace*>* lgrp_spaces() const     { return _lgrp_spaces;       }
-  MutableNUMASpace();
+  MutableNUMASpace(size_t alignment);
   virtual ~MutableNUMASpace();
   // Space initialization.
-  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space, bool setup_pages = SetupPages);
   // Update space layout if necessary. Do all adaptive resizing job.
   virtual void update();
   // Update allocation rate averages.
diff --git a/src/share/vm/gc_implementation/shared/mutableSpace.cpp b/src/share/vm/gc_implementation/shared/mutableSpace.cpp
index 65c6d34cd..90e1d3ca2 100644
--- a/src/share/vm/gc_implementation/shared/mutableSpace.cpp
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.cpp
@@ -25,7 +25,10 @@
 # include "incls/_precompiled.incl"
 # include "incls/_mutableSpace.cpp.incl"
 
-MutableSpace::MutableSpace(): ImmutableSpace(), _top(NULL) {
+MutableSpace::MutableSpace(size_t alignment): ImmutableSpace(), _top(NULL), _alignment(alignment) {
+  assert(MutableSpace::alignment() >= 0 &&
+         MutableSpace::alignment() % os::vm_page_size() == 0,
+         "Space should be aligned");
   _mangler = new MutableSpaceMangler(this);
 }
 
@@ -33,16 +36,88 @@ MutableSpace::~MutableSpace() {
   delete _mangler;
 }
 
+void MutableSpace::numa_setup_pages(MemRegion mr, bool clear_space) {
+  if (!mr.is_empty()) {
+    size_t page_size = UseLargePages ? alignment() : os::vm_page_size();
+    HeapWord *start = (HeapWord*)round_to((intptr_t) mr.start(), page_size);
+    HeapWord *end =  (HeapWord*)round_down((intptr_t) mr.end(), page_size);
+    if (end > start) {
+      size_t size = pointer_delta(end, start, sizeof(char));
+      if (clear_space) {
+        // Prefer page reallocation to migration.
+        os::free_memory((char*)start, size);
+      }
+      os::numa_make_global((char*)start, size);
+    }
+  }
+}
+
+void MutableSpace::pretouch_pages(MemRegion mr) {
+  for (volatile char *p = (char*)mr.start(); p < (char*)mr.end(); p += os::vm_page_size()) {
+    char t = *p; *p = t;
+  }
+}
+
 void MutableSpace::initialize(MemRegion mr,
                               bool clear_space,
-                              bool mangle_space) {
-  HeapWord* bottom = mr.start();
-  HeapWord* end    = mr.end();
+                              bool mangle_space,
+                              bool setup_pages) {
 
-  assert(Universe::on_page_boundary(bottom) && Universe::on_page_boundary(end),
+  assert(Universe::on_page_boundary(mr.start()) && Universe::on_page_boundary(mr.end()),
          "invalid space boundaries");
-  set_bottom(bottom);
-  set_end(end);
+
+  if (setup_pages && (UseNUMA || AlwaysPreTouch)) {
+    // The space may move left and right or expand/shrink.
+    // We'd like to enforce the desired page placement.
+    MemRegion head, tail;
+    if (last_setup_region().is_empty()) {
+      // If it's the first initialization don't limit the amount of work.
+      head = mr;
+      tail = MemRegion(mr.end(), mr.end());
+    } else {
+      // Is there an intersection with the address space?
+      MemRegion intersection = last_setup_region().intersection(mr);
+      if (intersection.is_empty()) {
+        intersection = MemRegion(mr.end(), mr.end());
+      }
+      // All the sizes below are in words.
+      size_t head_size = 0, tail_size = 0;
+      if (mr.start() <= intersection.start()) {
+        head_size = pointer_delta(intersection.start(), mr.start());
+      }
+      if(intersection.end() <= mr.end()) {
+        tail_size = pointer_delta(mr.end(), intersection.end());
+      }
+      // Limit the amount of page manipulation if necessary.
+      if (NUMASpaceResizeRate > 0 && !AlwaysPreTouch) {
+        const size_t change_size = head_size + tail_size;
+        const float setup_rate_words = NUMASpaceResizeRate >> LogBytesPerWord;
+        head_size = MIN2((size_t)(setup_rate_words * head_size / change_size),
+                         head_size);
+        tail_size = MIN2((size_t)(setup_rate_words * tail_size / change_size),
+                         tail_size);
+      }
+      head = MemRegion(intersection.start() - head_size, intersection.start());
+      tail = MemRegion(intersection.end(), intersection.end() + tail_size);
+    }
+    assert(mr.contains(head) && mr.contains(tail), "Sanity");
+
+    if (UseNUMA) {
+      numa_setup_pages(head, clear_space);
+      numa_setup_pages(tail, clear_space);
+    }
+
+    if (AlwaysPreTouch) {
+      pretouch_pages(head);
+      pretouch_pages(tail);
+    }
+
+    // Remember where we stopped so that we can continue later.
+    set_last_setup_region(MemRegion(head.start(), tail.end()));
+  }
+
+  set_bottom(mr.start());
+  set_end(mr.end());
 
   if (clear_space) {
     clear(mangle_space);
diff --git a/src/share/vm/gc_implementation/shared/mutableSpace.hpp b/src/share/vm/gc_implementation/shared/mutableSpace.hpp
index b55da4c84..98382d6c4 100644
--- a/src/share/vm/gc_implementation/shared/mutableSpace.hpp
+++ b/src/share/vm/gc_implementation/shared/mutableSpace.hpp
@@ -25,7 +25,10 @@
 // A MutableSpace is a subtype of ImmutableSpace that supports the
 // concept of allocation. This includes the concepts that a space may
 // be only partially full, and the querry methods that go with such
-// an assumption.
+// an assumption. MutableSpace is also responsible for minimizing the
+// page allocation time by having the memory pretouched (with
+// AlwaysPretouch) and for optimizing page placement on NUMA systems
+// by make the underlying region interleaved (with UseNUMA).
 //
 // Invariant: (ImmutableSpace +) bottom() <= top() <= end()
 // top() is inclusive and end() is exclusive.
@@ -37,15 +40,23 @@ class MutableSpace: public ImmutableSpace {
 
   // Helper for mangling unused space in debug builds
   MutableSpaceMangler* _mangler;
-
+  // The last region which page had been setup to be interleaved.
+  MemRegion _last_setup_region;
+  size_t _alignment;
  protected:
   HeapWord* _top;
 
   MutableSpaceMangler* mangler() { return _mangler; }
 
+  void numa_setup_pages(MemRegion mr, bool clear_space);
+  void pretouch_pages(MemRegion mr);
+
+  void set_last_setup_region(MemRegion mr) { _last_setup_region = mr;   }
+  MemRegion last_setup_region() const      { return _last_setup_region; }
+
  public:
   virtual ~MutableSpace();
-  MutableSpace();
+  MutableSpace(size_t page_size);
 
   // Accessors
   HeapWord* top() const                    { return _top;    }
@@ -57,13 +68,20 @@ class MutableSpace: public ImmutableSpace {
   virtual void set_bottom(HeapWord* value) { _bottom = value; }
   virtual void set_end(HeapWord* value)    { _end = value; }
 
+  size_t alignment()                       { return _alignment; }
+
   // Returns a subregion containing all objects in this space.
   MemRegion used_region() { return MemRegion(bottom(), top()); }
 
+  static const bool SetupPages = true;
+  static const bool DontSetupPages = false;
+
   // Initialization
   virtual void initialize(MemRegion mr,
                           bool clear_space,
-                          bool mangle_space);
+                          bool mangle_space,
+                          bool setup_pages = SetupPages);
+
   virtual void clear(bool mangle_space);
   // Does the usual initialization but optionally resets top to bottom.
 #if 0  // MANGLE_SPACE
diff --git a/src/share/vm/gc_interface/collectedHeap.hpp b/src/share/vm/gc_interface/collectedHeap.hpp
index 13d30c70b..41d80a3a9 100644
--- a/src/share/vm/gc_interface/collectedHeap.hpp
+++ b/src/share/vm/gc_interface/collectedHeap.hpp
@@ -466,6 +466,10 @@ class CollectedHeap : public CHeapObj {
   // This includes objects in permanent memory.
   virtual void object_iterate(ObjectClosure* cl) = 0;
 
+  // Similar to object_iterate() except iterates only
+  // over live objects.
+  virtual void safe_object_iterate(ObjectClosure* cl) = 0;
+
   // Behaves the same as oop_iterate, except only traverses
   // interior pointers contained in permanent memory. If there
   // is no permanent memory, does nothing.
diff --git a/src/share/vm/includeDB_compiler2 b/src/share/vm/includeDB_compiler2
index ea3b88cc1..bf8458933 100644
--- a/src/share/vm/includeDB_compiler2
+++ b/src/share/vm/includeDB_compiler2
@@ -140,6 +140,7 @@ c2_globals_<os_family>.hpp              globalDefinitions.hpp
 c2_globals_<os_family>.hpp              macros.hpp
 
 c2_init_<arch>.cpp                      compile.hpp
+c2_init_<arch>.cpp                      node.hpp
 
 c2compiler.cpp                          ad_<arch_model>.hpp
 c2compiler.cpp                          c2compiler.hpp
@@ -839,6 +840,7 @@ parseHelper.cpp                         systemDictionary.hpp
 phase.cpp                               compile.hpp
 phase.cpp                               compileBroker.hpp
 phase.cpp                               nmethod.hpp
+phase.cpp                               node.hpp
 phase.cpp                               phase.hpp
 
 phase.hpp                               port.hpp
diff --git a/src/share/vm/includeDB_core b/src/share/vm/includeDB_core
index 7ca1fcabe..8d39f3277 100644
--- a/src/share/vm/includeDB_core
+++ b/src/share/vm/includeDB_core
@@ -1311,6 +1311,7 @@ cppInterpreter_<arch>.cpp               bytecodeHistogram.hpp
 cppInterpreter_<arch>.cpp               debug.hpp
 cppInterpreter_<arch>.cpp               deoptimization.hpp
 cppInterpreter_<arch>.cpp               frame.inline.hpp
+cppInterpreter_<arch>.cpp               interfaceSupport.hpp
 cppInterpreter_<arch>.cpp               interpreterRuntime.hpp
 cppInterpreter_<arch>.cpp               interpreter.hpp
 cppInterpreter_<arch>.cpp               interpreterGenerator.hpp
@@ -2014,7 +2015,7 @@ instanceKlass.cpp                       verifier.hpp
 instanceKlass.cpp                       vmSymbols.hpp
 
 instanceKlass.hpp                       accessFlags.hpp
-instanceKlass.hpp                       bitMap.hpp
+instanceKlass.hpp                       bitMap.inline.hpp
 instanceKlass.hpp                       constMethodOop.hpp
 instanceKlass.hpp                       constantPoolOop.hpp
 instanceKlass.hpp                       handles.hpp
@@ -3771,6 +3772,7 @@ spaceDecorator.hpp                      space.hpp
 
 spaceDecorator.cpp                      copy.hpp
 spaceDecorator.cpp                      spaceDecorator.hpp
+spaceDecorator.cpp                      space.inline.hpp
 
 specialized_oop_closures.cpp            ostream.hpp
 specialized_oop_closures.cpp            specialized_oop_closures.hpp
diff --git a/src/share/vm/includeDB_features b/src/share/vm/includeDB_features
index d783a04e5..e136dfe78 100644
--- a/src/share/vm/includeDB_features
+++ b/src/share/vm/includeDB_features
@@ -59,6 +59,8 @@ dump.cpp                                vm_operations.hpp
 
 dump_<arch_model>.cpp                   assembler_<arch>.inline.hpp
 dump_<arch_model>.cpp                   compactingPermGenGen.hpp
+dump_<arch_model>.cpp                   generation.inline.hpp
+dump_<arch_model>.cpp                   space.inline.hpp
 
 forte.cpp                               collectedHeap.inline.hpp
 forte.cpp                               debugInfoRec.hpp
diff --git a/src/share/vm/interpreter/bytecodeInterpreter.cpp b/src/share/vm/interpreter/bytecodeInterpreter.cpp
index b4204414a..351d29cc9 100644
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp
@@ -163,7 +163,7 @@
 #ifdef USELABELS
 // Have to do this dispatch this way in C++ because otherwise gcc complains about crossing an
 // initialization (which is is the initialization of the table pointer...)
-#define DISPATCH(opcode) goto *dispatch_table[opcode]
+#define DISPATCH(opcode) goto *(void*)dispatch_table[opcode]
 #define CONTINUE {                              \
         opcode = *pc;                           \
         DO_UPDATE_INSTRUCTION_COUNT(opcode);    \
@@ -341,7 +341,7 @@
  */
 #undef CHECK_NULL
 #define CHECK_NULL(obj_)                                                 \
-    if ((obj_) == 0) {                                                   \
+    if ((obj_) == NULL) {                                                \
         VM_JAVA_ERROR(vmSymbols::java_lang_NullPointerException(), "");  \
     }
 
@@ -1362,7 +1362,7 @@ run:
 
 #define NULL_COMPARISON_NOT_OP(name)                                         \
       CASE(_if##name): {                                                     \
-          int skip = (!(STACK_OBJECT(-1) == 0))                              \
+          int skip = (!(STACK_OBJECT(-1) == NULL))                           \
                       ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
           address branch_pc = pc;                                            \
           UPDATE_PC_AND_TOS(skip, -1);                                       \
@@ -1372,7 +1372,7 @@ run:
 
 #define NULL_COMPARISON_OP(name)                                             \
       CASE(_if##name): {                                                     \
-          int skip = ((STACK_OBJECT(-1) == 0))                               \
+          int skip = ((STACK_OBJECT(-1) == NULL))                            \
                       ? (int16_t)Bytes::get_Java_u2(pc + 1) : 3;             \
           address branch_pc = pc;                                            \
           UPDATE_PC_AND_TOS(skip, -1);                                       \
diff --git a/src/share/vm/interpreter/bytecodeInterpreter.hpp b/src/share/vm/interpreter/bytecodeInterpreter.hpp
index 715acf671..a1504851a 100644
--- a/src/share/vm/interpreter/bytecodeInterpreter.hpp
+++ b/src/share/vm/interpreter/bytecodeInterpreter.hpp
@@ -66,7 +66,6 @@ friend class CppInterpreterGenerator;
 friend class InterpreterGenerator;
 friend class InterpreterMacroAssembler;
 friend class frame;
-friend class SharedRuntime;
 friend class VMStructs;
 
 public:
diff --git a/src/share/vm/libadt/port.hpp b/src/share/vm/libadt/port.hpp
index 70034d09b..68a84f1a4 100644
--- a/src/share/vm/libadt/port.hpp
+++ b/src/share/vm/libadt/port.hpp
@@ -34,17 +34,6 @@
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
-#undef bzero
-inline void bzero(void *b, int len) { memset(b,0,len); }
-#undef bcopy
-inline void bcopy(const void *s, void *d, size_t len) { memmove(d,s,len); }
-#undef bcmp
-inline int bcmp(const void *s,const void *t,int len) { return memcmp(s,t,len);}
-extern "C" unsigned long strtoul(const char *s, char **end, int base);
-
-// Definition for sys_errlist varies from Sun 4.1 & Solaris.
-// We use the new Solaris definition.
-#include <string.h>
 
 // Access to the C++ class virtual function pointer
 // Put the class in the macro
diff --git a/src/share/vm/memory/genCollectedHeap.cpp b/src/share/vm/memory/genCollectedHeap.cpp
index 832a3331e..41dfac1b5 100644
--- a/src/share/vm/memory/genCollectedHeap.cpp
+++ b/src/share/vm/memory/genCollectedHeap.cpp
@@ -910,6 +910,13 @@ void GenCollectedHeap::object_iterate(ObjectClosure* cl) {
   perm_gen()->object_iterate(cl);
 }
 
+void GenCollectedHeap::safe_object_iterate(ObjectClosure* cl) {
+  for (int i = 0; i < _n_gens; i++) {
+    _gens[i]->safe_object_iterate(cl);
+  }
+  perm_gen()->safe_object_iterate(cl);
+}
+
 void GenCollectedHeap::object_iterate_since_last_GC(ObjectClosure* cl) {
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->object_iterate_since_last_GC(cl);
diff --git a/src/share/vm/memory/genCollectedHeap.hpp b/src/share/vm/memory/genCollectedHeap.hpp
index f9a4d5087..3ab20cfd1 100644
--- a/src/share/vm/memory/genCollectedHeap.hpp
+++ b/src/share/vm/memory/genCollectedHeap.hpp
@@ -215,6 +215,7 @@ public:
   void oop_iterate(OopClosure* cl);
   void oop_iterate(MemRegion mr, OopClosure* cl);
   void object_iterate(ObjectClosure* cl);
+  void safe_object_iterate(ObjectClosure* cl);
   void object_iterate_since_last_GC(ObjectClosure* cl);
   Space* space_containing(const void* addr) const;
 
diff --git a/src/share/vm/memory/generation.cpp b/src/share/vm/memory/generation.cpp
index 5167db243..d7b1d9f87 100644
--- a/src/share/vm/memory/generation.cpp
+++ b/src/share/vm/memory/generation.cpp
@@ -319,6 +319,21 @@ void Generation::object_iterate(ObjectClosure* cl) {
   space_iterate(&blk);
 }
 
+class GenerationSafeObjIterateClosure : public SpaceClosure {
+ private:
+  ObjectClosure* _cl;
+ public:
+  virtual void do_space(Space* s) {
+    s->safe_object_iterate(_cl);
+  }
+  GenerationSafeObjIterateClosure(ObjectClosure* cl) : _cl(cl) {}
+};
+
+void Generation::safe_object_iterate(ObjectClosure* cl) {
+  GenerationSafeObjIterateClosure blk(cl);
+  space_iterate(&blk);
+}
+
 void Generation::prepare_for_compaction(CompactPoint* cp) {
   // Generic implementation, can be specialized
   CompactibleSpace* space = first_compaction_space();
diff --git a/src/share/vm/memory/generation.hpp b/src/share/vm/memory/generation.hpp
index 0f0e74e47..206949901 100644
--- a/src/share/vm/memory/generation.hpp
+++ b/src/share/vm/memory/generation.hpp
@@ -518,6 +518,11 @@ class Generation: public CHeapObj {
   // each.
   virtual void object_iterate(ObjectClosure* cl);
 
+  // Iterate over all safe objects in the generation, calling "cl.do_object" on
+  // each.  An object is safe if its references point to other objects in
+  // the heap.  This defaults to object_iterate() unless overridden.
+  virtual void safe_object_iterate(ObjectClosure* cl);
+
   // Iterate over all objects allocated in the generation since the last
   // collection, calling "cl.do_object" on each.  The generation must have
   // been initialized properly to support this function, or else this call
diff --git a/src/share/vm/memory/heapInspection.cpp b/src/share/vm/memory/heapInspection.cpp
index e3d6fbd7b..3bc17bea6 100644
--- a/src/share/vm/memory/heapInspection.cpp
+++ b/src/share/vm/memory/heapInspection.cpp
@@ -263,6 +263,9 @@ void HeapInspection::heap_inspection(outputStream* st) {
   if (!cit.allocation_failed()) {
     // Iterate over objects in the heap
     RecordInstanceClosure ric(&cit);
+    // If this operation encounters a bad object when using CMS,
+    // consider using safe_object_iterate() which avoids perm gen
+    // objects that may contain bad references.
     Universe::heap()->object_iterate(&ric);
 
     // Report if certain classes are not counted because of
@@ -317,5 +320,8 @@ void HeapInspection::find_instances_at_safepoint(klassOop k, GrowableArray<oop>*
 
   // Iterate over objects in the heap
   FindInstanceClosure fic(k, result);
+  // If this operation encounters a bad object when using CMS,
+  // consider using safe_object_iterate() which avoids perm gen
+  // objects that may contain bad references.
   Universe::heap()->object_iterate(&fic);
 }
diff --git a/src/share/vm/memory/oopFactory.cpp b/src/share/vm/memory/oopFactory.cpp
index e9ea08d3b..24c67634e 100644
--- a/src/share/vm/memory/oopFactory.cpp
+++ b/src/share/vm/memory/oopFactory.cpp
@@ -82,9 +82,11 @@ objArrayOop oopFactory::new_system_objArray(int length, TRAPS) {
 }
 
 
-constantPoolOop oopFactory::new_constantPool(int length, TRAPS) {
+constantPoolOop oopFactory::new_constantPool(int length,
+                                             bool is_conc_safe,
+                                             TRAPS) {
   constantPoolKlass* ck = constantPoolKlass::cast(Universe::constantPoolKlassObj());
-  return ck->allocate(length, CHECK_NULL);
+  return ck->allocate(length, is_conc_safe, CHECK_NULL);
 }
 
 
@@ -105,11 +107,13 @@ constMethodOop oopFactory::new_constMethod(int byte_code_size,
                                            int compressed_line_number_size,
                                            int localvariable_table_length,
                                            int checked_exceptions_length,
+                                           bool is_conc_safe,
                                            TRAPS) {
   klassOop cmkObj = Universe::constMethodKlassObj();
   constMethodKlass* cmk = constMethodKlass::cast(cmkObj);
   return cmk->allocate(byte_code_size, compressed_line_number_size,
                        localvariable_table_length, checked_exceptions_length,
+                       is_conc_safe,
                        CHECK_NULL);
 }
 
@@ -117,14 +121,17 @@ constMethodOop oopFactory::new_constMethod(int byte_code_size,
 methodOop oopFactory::new_method(int byte_code_size, AccessFlags access_flags,
                                  int compressed_line_number_size,
                                  int localvariable_table_length,
-                                 int checked_exceptions_length, TRAPS) {
+                                 int checked_exceptions_length,
+                                 bool is_conc_safe,
+                                 TRAPS) {
   methodKlass* mk = methodKlass::cast(Universe::methodKlassObj());
   assert(!access_flags.is_native() || byte_code_size == 0,
          "native methods should not contain byte codes");
   constMethodOop cm = new_constMethod(byte_code_size,
                                       compressed_line_number_size,
                                       localvariable_table_length,
-                                      checked_exceptions_length, CHECK_NULL);
+                                      checked_exceptions_length,
+                                      is_conc_safe, CHECK_NULL);
   constMethodHandle rw(THREAD, cm);
   return mk->allocate(rw, access_flags, CHECK_NULL);
 }
diff --git a/src/share/vm/memory/oopFactory.hpp b/src/share/vm/memory/oopFactory.hpp
index 855e0fcf4..e5a80073b 100644
--- a/src/share/vm/memory/oopFactory.hpp
+++ b/src/share/vm/memory/oopFactory.hpp
@@ -81,7 +81,9 @@ class oopFactory: AllStatic {
   static symbolHandle    new_symbol_handle(const char* name, TRAPS) { return new_symbol_handle(name, (int)strlen(name), CHECK_(symbolHandle())); }
 
   // Constant pools
-  static constantPoolOop      new_constantPool     (int length, TRAPS);
+  static constantPoolOop      new_constantPool     (int length,
+                                                    bool is_conc_safe,
+                                                    TRAPS);
   static constantPoolCacheOop new_constantPoolCache(int length, TRAPS);
 
   // Instance classes
@@ -93,9 +95,20 @@ private:
   static constMethodOop  new_constMethod(int byte_code_size,
                                          int compressed_line_number_size,
                                          int localvariable_table_length,
-                                         int checked_exceptions_length, TRAPS);
+                                         int checked_exceptions_length,
+                                         bool is_conc_safe,
+                                         TRAPS);
 public:
-  static methodOop       new_method(int byte_code_size, AccessFlags access_flags, int compressed_line_number_size, int localvariable_table_length, int checked_exceptions_length, TRAPS);
+  // Set is_conc_safe for methods which cannot safely be
+  // processed by concurrent GC even after the return of
+  // the method.
+  static methodOop       new_method(int byte_code_size,
+                                    AccessFlags access_flags,
+                                    int compressed_line_number_size,
+                                    int localvariable_table_length,
+                                    int checked_exceptions_length,
+                                    bool is_conc_safe,
+                                    TRAPS);
 
   // Method Data containers
   static methodDataOop   new_methodData(methodHandle method, TRAPS);
diff --git a/src/share/vm/memory/referenceProcessor.cpp b/src/share/vm/memory/referenceProcessor.cpp
index 073886f2f..ac6f4777e 100644
--- a/src/share/vm/memory/referenceProcessor.cpp
+++ b/src/share/vm/memory/referenceProcessor.cpp
@@ -721,12 +721,6 @@ ReferenceProcessor::process_phase3(DiscoveredList&    refs_list,
                              iter.obj(), iter.obj()->blueprint()->internal_name());
     }
     assert(iter.obj()->is_oop(UseConcMarkSweepGC), "Adding a bad reference");
-    // If discovery is concurrent, we may have objects with null referents,
-    // being those that were concurrently cleared after they were discovered
-    // (and not subsequently precleaned).
-    assert(   (discovery_is_atomic() && iter.referent()->is_oop())
-           || (!discovery_is_atomic() && iter.referent()->is_oop_or_null(UseConcMarkSweepGC)),
-           "Adding a bad referent");
     iter.next();
   }
   // Remember to keep sentinel pointer around
diff --git a/src/share/vm/memory/space.cpp b/src/share/vm/memory/space.cpp
index 652d585bb..00f970013 100644
--- a/src/share/vm/memory/space.cpp
+++ b/src/share/vm/memory/space.cpp
@@ -569,7 +569,15 @@ void Space::object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl) {
   if (prev > mr.start()) {
     region_start_addr = prev;
     blk_start_addr    = prev;
-    assert(blk_start_addr == block_start(region_start_addr), "invariant");
+    // The previous invocation may have pushed "prev" beyond the
+    // last allocated block yet there may be still be blocks
+    // in this region due to a particular coalescing policy.
+    // Relax the assertion so that the case where the unallocated
+    // block is maintained and "prev" is beyond the unallocated
+    // block does not cause the assertion to fire.
+    assert((BlockOffsetArrayUseUnallocatedBlock &&
+            (!is_in(prev))) ||
+           (blk_start_addr == block_start(region_start_addr)), "invariant");
   } else {
     region_start_addr = mr.start();
     blk_start_addr    = block_start(region_start_addr);
@@ -705,6 +713,12 @@ void ContiguousSpace::object_iterate(ObjectClosure* blk) {
   object_iterate_from(bm, blk);
 }
 
+// For a continguous space object_iterate() and safe_object_iterate()
+// are the same.
+void ContiguousSpace::safe_object_iterate(ObjectClosure* blk) {
+  object_iterate(blk);
+}
+
 void ContiguousSpace::object_iterate_from(WaterMark mark, ObjectClosure* blk) {
   assert(mark.space() == this, "Mark does not match space");
   HeapWord* p = mark.point();
diff --git a/src/share/vm/memory/space.hpp b/src/share/vm/memory/space.hpp
index a102c5f22..f055638e8 100644
--- a/src/share/vm/memory/space.hpp
+++ b/src/share/vm/memory/space.hpp
@@ -193,6 +193,9 @@ class Space: public CHeapObj {
   // each.  Objects allocated by applications of the closure are not
   // included in the iteration.
   virtual void object_iterate(ObjectClosure* blk) = 0;
+  // Similar to object_iterate() except only iterates over
+  // objects whose internal references point to objects in the space.
+  virtual void safe_object_iterate(ObjectClosure* blk) = 0;
 
   // Iterate over all objects that intersect with mr, calling "cl->do_object"
   // on each.  There is an exception to this: if this closure has already
@@ -843,6 +846,9 @@ class ContiguousSpace: public CompactibleSpace {
   void oop_iterate(OopClosure* cl);
   void oop_iterate(MemRegion mr, OopClosure* cl);
   void object_iterate(ObjectClosure* blk);
+  // For contiguous spaces this method will iterate safely over objects
+  // in the space (i.e., between bottom and top) when at a safepoint.
+  void safe_object_iterate(ObjectClosure* blk);
   void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
   // iterates on objects up to the safe limit
   HeapWord* object_iterate_careful(ObjectClosureCareful* cl);
diff --git a/src/share/vm/oops/constMethodKlass.cpp b/src/share/vm/oops/constMethodKlass.cpp
index 802c4430a..f2fe1706a 100644
--- a/src/share/vm/oops/constMethodKlass.cpp
+++ b/src/share/vm/oops/constMethodKlass.cpp
@@ -49,10 +49,16 @@ bool constMethodKlass::oop_is_parsable(oop obj) const {
   return constMethodOop(obj)->object_is_parsable();
 }
 
+bool constMethodKlass::oop_is_conc_safe(oop obj) const {
+  assert(obj->is_constMethod(), "must be constMethod oop");
+  return constMethodOop(obj)->is_conc_safe();
+}
+
 constMethodOop constMethodKlass::allocate(int byte_code_size,
                                           int compressed_line_number_size,
                                           int localvariable_table_length,
                                           int checked_exceptions_length,
+                                          bool is_conc_safe,
                                           TRAPS) {
 
   int size = constMethodOopDesc::object_size(byte_code_size,
@@ -75,6 +81,7 @@ constMethodOop constMethodKlass::allocate(int byte_code_size,
                                 compressed_line_number_size,
                                 localvariable_table_length);
   assert(cm->size() == size, "wrong size for object");
+  cm->set_is_conc_safe(is_conc_safe);
   cm->set_partially_loaded();
   assert(cm->is_parsable(), "Is safely parsable by gc");
   return cm;
diff --git a/src/share/vm/oops/constMethodKlass.hpp b/src/share/vm/oops/constMethodKlass.hpp
index 87fc9c315..a3f7d9710 100644
--- a/src/share/vm/oops/constMethodKlass.hpp
+++ b/src/share/vm/oops/constMethodKlass.hpp
@@ -32,12 +32,16 @@ public:
   // Testing
   bool oop_is_constMethod() const { return true; }
   virtual bool oop_is_parsable(oop obj) const;
+  virtual bool oop_is_conc_safe(oop obj) const;
+
 
   // Allocation
   DEFINE_ALLOCATE_PERMANENT(constMethodKlass);
   constMethodOop allocate(int byte_code_size, int compressed_line_number_size,
                           int localvariable_table_length,
-                          int checked_exceptions_length, TRAPS);
+                          int checked_exceptions_length,
+                          bool is_conc_safe,
+                          TRAPS);
   static klassOop create_klass(TRAPS);
 
   // Sizing
diff --git a/src/share/vm/oops/constMethodOop.hpp b/src/share/vm/oops/constMethodOop.hpp
index e9ffa4b66..4669e6a85 100644
--- a/src/share/vm/oops/constMethodOop.hpp
+++ b/src/share/vm/oops/constMethodOop.hpp
@@ -104,6 +104,7 @@ private:
   // loads and stores.  This value may updated and read without a lock by
   // multiple threads, so is volatile.
   volatile uint64_t _fingerprint;
+  volatile bool     _is_conc_safe; // if true, safe for concurrent GC processing
 
 public:
   oop* oop_block_beg() const { return adr_method(); }
@@ -273,6 +274,8 @@ public:
   oop*  adr_method() const             { return (oop*)&_method;          }
   oop*  adr_stackmap_data() const      { return (oop*)&_stackmap_data;   }
   oop*  adr_exception_table() const    { return (oop*)&_exception_table; }
+  bool is_conc_safe() { return _is_conc_safe; }
+  void set_is_conc_safe(bool v) { _is_conc_safe = v; }
 
   // Unique id for the method
   static const u2 MAX_IDNUM;
diff --git a/src/share/vm/oops/constantPoolKlass.cpp b/src/share/vm/oops/constantPoolKlass.cpp
index 593b74668..2a17c00e1 100644
--- a/src/share/vm/oops/constantPoolKlass.cpp
+++ b/src/share/vm/oops/constantPoolKlass.cpp
@@ -25,7 +25,7 @@
 # include "incls/_precompiled.incl"
 # include "incls/_constantPoolKlass.cpp.incl"
 
-constantPoolOop constantPoolKlass::allocate(int length, TRAPS) {
+constantPoolOop constantPoolKlass::allocate(int length, bool is_conc_safe, TRAPS) {
   int size = constantPoolOopDesc::object_size(length);
   KlassHandle klass (THREAD, as_klassOop());
   constantPoolOop c =
@@ -38,6 +38,9 @@ constantPoolOop constantPoolKlass::allocate(int length, TRAPS) {
   c->set_flags(0);
   // only set to non-zero if constant pool is merged by RedefineClasses
   c->set_orig_length(0);
+  // if constant pool may change during RedefineClasses, it is created
+  // unsafe for GC concurrent processing.
+  c->set_is_conc_safe(is_conc_safe);
   // all fields are initialized; needed for GC
 
   // initialize tag array
@@ -207,6 +210,11 @@ int constantPoolKlass::oop_oop_iterate_m(oop obj, OopClosure* blk, MemRegion mr)
   return size;
 }
 
+bool constantPoolKlass::oop_is_conc_safe(oop obj) const {
+  assert(obj->is_constantPool(), "must be constantPool");
+  return constantPoolOop(obj)->is_conc_safe();
+}
+
 #ifndef SERIALGC
 int constantPoolKlass::oop_update_pointers(ParCompactionManager* cm, oop obj) {
   assert (obj->is_constantPool(), "obj must be constant pool");
diff --git a/src/share/vm/oops/constantPoolKlass.hpp b/src/share/vm/oops/constantPoolKlass.hpp
index 324efbec0..2f9efc285 100644
--- a/src/share/vm/oops/constantPoolKlass.hpp
+++ b/src/share/vm/oops/constantPoolKlass.hpp
@@ -34,7 +34,7 @@ class constantPoolKlass : public Klass {
 
   // Allocation
   DEFINE_ALLOCATE_PERMANENT(constantPoolKlass);
-  constantPoolOop allocate(int length, TRAPS);
+  constantPoolOop allocate(int length, bool is_conc_safe, TRAPS);
   static klassOop create_klass(TRAPS);
 
   // Casting from klassOop
@@ -48,6 +48,8 @@ class constantPoolKlass : public Klass {
   int object_size() const        { return align_object_size(header_size()); }
 
   // Garbage collection
+  // Returns true is the object is safe for GC concurrent processing.
+  virtual bool oop_is_conc_safe(oop obj) const;
   void oop_follow_contents(oop obj);
   int oop_adjust_pointers(oop obj);
 
diff --git a/src/share/vm/oops/constantPoolOop.cpp b/src/share/vm/oops/constantPoolOop.cpp
index 842f80679..e90bda568 100644
--- a/src/share/vm/oops/constantPoolOop.cpp
+++ b/src/share/vm/oops/constantPoolOop.cpp
@@ -962,7 +962,7 @@ static void print_cpool_bytes(jint cnt, u1 *bytes) {
       }
       case JVM_CONSTANT_Long: {
         u8 val = Bytes::get_Java_u8(bytes);
-        printf("long         %lldl", *(jlong *) &val);
+        printf("long         "INT64_FORMAT, *(jlong *) &val);
         ent_size = 8;
         idx++; // Long takes two cpool slots
         break;
diff --git a/src/share/vm/oops/constantPoolOop.hpp b/src/share/vm/oops/constantPoolOop.hpp
index 562705909..fd2264c1e 100644
--- a/src/share/vm/oops/constantPoolOop.hpp
+++ b/src/share/vm/oops/constantPoolOop.hpp
@@ -43,6 +43,8 @@ class constantPoolOopDesc : public oopDesc {
   klassOop             _pool_holder;   // the corresponding class
   int                  _flags;         // a few header bits to describe contents for GC
   int                  _length; // number of elements in the array
+  volatile bool        _is_conc_safe; // if true, safe for concurrent
+                                      // GC processing
   // only set to non-zero if constant pool is merged by RedefineClasses
   int                  _orig_length;
 
@@ -379,6 +381,9 @@ class constantPoolOopDesc : public oopDesc {
   static int object_size(int length)   { return align_object_size(header_size() + length); }
   int object_size()                    { return object_size(length()); }
 
+  bool is_conc_safe()                  { return _is_conc_safe; }
+  void set_is_conc_safe(bool v)        { _is_conc_safe = v; }
+
   friend class constantPoolKlass;
   friend class ClassFileParser;
   friend class SystemDictionary;
diff --git a/src/share/vm/oops/klass.hpp b/src/share/vm/oops/klass.hpp
index 46852c02e..881da970d 100644
--- a/src/share/vm/oops/klass.hpp
+++ b/src/share/vm/oops/klass.hpp
@@ -606,8 +606,19 @@ class Klass : public Klass_vtbl {
   #undef assert_same_query
 
   // Unless overridden, oop is parsable if it has a klass pointer.
+  // Parsability of an object is object specific.
   virtual bool oop_is_parsable(oop obj) const { return true; }
 
+  // Unless overridden, oop is safe for concurrent GC processing
+  // after its allocation is complete.  The exception to
+  // this is the case where objects are changed after allocation.
+  // Class redefinition is one of the known exceptions. During
+  // class redefinition, an allocated class can changed in order
+  // order to create a merged class (the combiniation of the
+  // old class definition that has to be perserved and the new class
+  // definition which is being created.
+  virtual bool oop_is_conc_safe(oop obj) const { return true; }
+
   // Access flags
   AccessFlags access_flags() const         { return _access_flags;  }
   void set_access_flags(AccessFlags flags) { _access_flags = flags; }
diff --git a/src/share/vm/oops/methodOop.cpp b/src/share/vm/oops/methodOop.cpp
index 8a5507cba..c239ccf1a 100644
--- a/src/share/vm/oops/methodOop.cpp
+++ b/src/share/vm/oops/methodOop.cpp
@@ -792,15 +792,34 @@ methodHandle methodOopDesc:: clone_with_new_data(methodHandle m, u_char* new_cod
   AccessFlags flags = m->access_flags();
   int checked_exceptions_len = m->checked_exceptions_length();
   int localvariable_len = m->localvariable_table_length();
-  methodOop newm_oop = oopFactory::new_method(new_code_length, flags, new_compressed_linenumber_size, localvariable_len, checked_exceptions_len, CHECK_(methodHandle()));
+  // Allocate newm_oop with the is_conc_safe parameter set
+  // to IsUnsafeConc to indicate that newm_oop is not yet
+  // safe for concurrent processing by a GC.
+  methodOop newm_oop = oopFactory::new_method(new_code_length,
+                                              flags,
+                                              new_compressed_linenumber_size,
+                                              localvariable_len,
+                                              checked_exceptions_len,
+                                              IsUnsafeConc,
+                                              CHECK_(methodHandle()));
   methodHandle newm (THREAD, newm_oop);
   int new_method_size = newm->method_size();
   // Create a shallow copy of methodOopDesc part, but be careful to preserve the new constMethodOop
   constMethodOop newcm = newm->constMethod();
   int new_const_method_size = newm->constMethod()->object_size();
+
   memcpy(newm(), m(), sizeof(methodOopDesc));
   // Create shallow copy of constMethodOopDesc, but be careful to preserve the methodOop
+  // is_conc_safe is set to false because that is the value of
+  // is_conc_safe initialzied into newcm and the copy should
+  // not overwrite that value.  During the window during which it is
+  // tagged as unsafe, some extra work could be needed during precleaning
+  // or concurrent marking but those phases will be correct.  Setting and
+  // resetting is done in preference to a careful copying into newcm to
+  // avoid having to know the precise layout of a constMethodOop.
+  m->constMethod()->set_is_conc_safe(false);
   memcpy(newcm, m->constMethod(), sizeof(constMethodOopDesc));
+  m->constMethod()->set_is_conc_safe(true);
   // Reset correct method/const method, method size, and parameter info
   newcm->set_method(newm());
   newm->set_constMethod(newcm);
@@ -831,6 +850,10 @@ methodHandle methodOopDesc:: clone_with_new_data(methodHandle m, u_char* new_cod
            m->localvariable_table_start(),
            localvariable_len * sizeof(LocalVariableTableElement));
   }
+
+  // Only set is_conc_safe to true when changes to newcm are
+  // complete.
+  newcm->set_is_conc_safe(true);
   return newm;
 }
 
diff --git a/src/share/vm/oops/methodOop.hpp b/src/share/vm/oops/methodOop.hpp
index 965181c66..8b03a6838 100644
--- a/src/share/vm/oops/methodOop.hpp
+++ b/src/share/vm/oops/methodOop.hpp
@@ -129,6 +129,10 @@ class methodOopDesc : public oopDesc {
   volatile address           _from_interpreted_entry; // Cache of _code ? _adapter->i2c_entry() : _i2i_entry
 
  public:
+
+  static const bool IsUnsafeConc         = false;
+  static const bool IsSafeConc           = true;
+
   // accessors for instance variables
   constMethodOop constMethod() const             { return _constMethod; }
   void set_constMethod(constMethodOop xconst)    { oop_store_without_check((oop*)&_constMethod, (oop)xconst); }
diff --git a/src/share/vm/oops/oop.hpp b/src/share/vm/oops/oop.hpp
index 07c2ba5ac..ba13ca63c 100644
--- a/src/share/vm/oops/oop.hpp
+++ b/src/share/vm/oops/oop.hpp
@@ -108,6 +108,13 @@ class oopDesc {
   // installation of their klass pointer.
   bool is_parsable();
 
+  // Some perm gen objects that have been allocated and initialized
+  // can be changed by the VM when not at a safe point (class rededfinition
+  // is an example).  Such objects should not be examined by the
+  // concurrent processing of a garbage collector if is_conc_safe()
+  // returns false.
+  bool is_conc_safe();
+
   // type test operations (inlined in oop.inline.h)
   bool is_instance()           const;
   bool is_instanceRef()        const;
diff --git a/src/share/vm/oops/oop.inline.hpp b/src/share/vm/oops/oop.inline.hpp
index 0c7a3967f..9161310de 100644
--- a/src/share/vm/oops/oop.inline.hpp
+++ b/src/share/vm/oops/oop.inline.hpp
@@ -435,6 +435,10 @@ inline bool oopDesc::is_parsable() {
   return blueprint()->oop_is_parsable(this);
 }
 
+inline bool oopDesc::is_conc_safe() {
+  return blueprint()->oop_is_conc_safe(this);
+}
+
 inline void update_barrier_set(void* p, oop v) {
   assert(oopDesc::bs() != NULL, "Uninitialized bs in oop!");
   oopDesc::bs()->write_ref_field(p, v);
diff --git a/src/share/vm/oops/oopsHierarchy.hpp b/src/share/vm/oops/oopsHierarchy.hpp
index 597dafe3d..06c64d0e4 100644
--- a/src/share/vm/oops/oopsHierarchy.hpp
+++ b/src/share/vm/oops/oopsHierarchy.hpp
@@ -126,8 +126,11 @@ public:
   operator jobject () const           { return (jobject)obj(); }
   // from javaClasses.cpp
   operator JavaThread* () const       { return (JavaThread*)obj(); }
+
+#ifndef _LP64
   // from jvm.cpp
   operator jlong* () const            { return (jlong*)obj(); }
+#endif
 
   // from parNewGeneration and other things that want to get to the end of
   // an oop for stuff (like constMethodKlass.cpp, objArrayKlass.cpp)
diff --git a/src/share/vm/opto/idealGraphPrinter.cpp b/src/share/vm/opto/idealGraphPrinter.cpp
index 6dfc5fc9c..ad45b566e 100644
--- a/src/share/vm/opto/idealGraphPrinter.cpp
+++ b/src/share/vm/opto/idealGraphPrinter.cpp
@@ -557,7 +557,7 @@ void IdealGraphPrinter::visit_node(Node *n, void *param) {
 
         // max. 2 chars allowed
         if (value >= -9 && value <= 99) {
-          sprintf(buffer, "%d", value);
+          sprintf(buffer, INT64_FORMAT, value);
           print_prop(short_name, buffer);
         } else {
           print_prop(short_name, "L");
diff --git a/src/share/vm/prims/jni.cpp b/src/share/vm/prims/jni.cpp
index b4a82b9e5..e3f715dd2 100644
--- a/src/share/vm/prims/jni.cpp
+++ b/src/share/vm/prims/jni.cpp
@@ -2691,8 +2691,13 @@ static bool initializeDirectBufferSupport(JNIEnv* env, JavaThread* thread) {
 
     directBufferSupportInitializeEnded = 1;
   } else {
-    ThreadInVMfromNative tivn(thread); // set state as yield_all can call os:sleep
     while (!directBufferSupportInitializeEnded && !directBufferSupportInitializeFailed) {
+      // Set state as yield_all can call os:sleep. On Solaris, yield_all calls
+      // os::sleep which requires the VM state transition. On other platforms, it
+      // is not necessary. The following call to change the VM state is purposely
+      // put inside the loop to avoid potential deadlock when multiple threads
+      // try to call this method. See 6791815 for more details.
+      ThreadInVMfromNative tivn(thread);
       os::yield_all();
     }
   }
diff --git a/src/share/vm/prims/jvm.cpp b/src/share/vm/prims/jvm.cpp
index 7f832341c..1835594ba 100644
--- a/src/share/vm/prims/jvm.cpp
+++ b/src/share/vm/prims/jvm.cpp
@@ -2475,7 +2475,8 @@ void jio_print(const char* s) {
   if (Arguments::vfprintf_hook() != NULL) {
     jio_fprintf(defaultStream::output_stream(), "%s", s);
   } else {
-    ::write(defaultStream::output_fd(), s, (int)strlen(s));
+    // Make an unused local variable to avoid warning from gcc 4.x compiler.
+    size_t count = ::write(defaultStream::output_fd(), s, (int)strlen(s));
   }
 }
 
diff --git a/src/share/vm/prims/jvmtiRedefineClasses.cpp b/src/share/vm/prims/jvmtiRedefineClasses.cpp
index bd8fe2b35..4cc6b577b 100644
--- a/src/share/vm/prims/jvmtiRedefineClasses.cpp
+++ b/src/share/vm/prims/jvmtiRedefineClasses.cpp
@@ -1230,8 +1230,14 @@ jvmtiError VM_RedefineClasses::merge_cp_and_rewrite(
 
   // Constant pools are not easily reused so we allocate a new one
   // each time.
+  // merge_cp is created unsafe for concurrent GC processing.  It
+  // should be marked safe before discarding it because, even if
+  // garbage.  If it crosses a card boundary, it may be scanned
+  // in order to find the start of the first complete object on the card.
   constantPoolHandle merge_cp(THREAD,
-    oopFactory::new_constantPool(merge_cp_length, THREAD));
+    oopFactory::new_constantPool(merge_cp_length,
+                                 methodOopDesc::IsUnsafeConc,
+                                 THREAD));
   int orig_length = old_cp->orig_length();
   if (orig_length == 0) {
     // This old_cp is an actual original constant pool. We save
@@ -1274,6 +1280,7 @@ jvmtiError VM_RedefineClasses::merge_cp_and_rewrite(
       // rewriting so we can't use the old constant pool with the new
       // class.
 
+      merge_cp()->set_is_conc_safe(true);
       merge_cp = constantPoolHandle();  // toss the merged constant pool
     } else if (old_cp->length() < scratch_cp->length()) {
       // The old constant pool has fewer entries than the new constant
@@ -1283,6 +1290,7 @@ jvmtiError VM_RedefineClasses::merge_cp_and_rewrite(
       // rewriting so we can't use the new constant pool with the old
       // class.
 
+      merge_cp()->set_is_conc_safe(true);
       merge_cp = constantPoolHandle();  // toss the merged constant pool
     } else {
       // The old constant pool has more entries than the new constant
@@ -1296,6 +1304,7 @@ jvmtiError VM_RedefineClasses::merge_cp_and_rewrite(
       set_new_constant_pool(scratch_class, merge_cp, merge_cp_length, true,
         THREAD);
       // drop local ref to the merged constant pool
+      merge_cp()->set_is_conc_safe(true);
       merge_cp = constantPoolHandle();
     }
   } else {
@@ -1325,7 +1334,10 @@ jvmtiError VM_RedefineClasses::merge_cp_and_rewrite(
     // GCed.
     set_new_constant_pool(scratch_class, merge_cp, merge_cp_length, true,
       THREAD);
+    merge_cp()->set_is_conc_safe(true);
   }
+  assert(old_cp()->is_conc_safe(), "Just checking");
+  assert(scratch_cp()->is_conc_safe(), "Just checking");
 
   return JVMTI_ERROR_NONE;
 } // end merge_cp_and_rewrite()
@@ -2314,13 +2326,16 @@ void VM_RedefineClasses::set_new_constant_pool(
     // worst case merge situation. We want to associate the minimum
     // sized constant pool with the klass to save space.
     constantPoolHandle smaller_cp(THREAD,
-      oopFactory::new_constantPool(scratch_cp_length, THREAD));
+      oopFactory::new_constantPool(scratch_cp_length,
+                                   methodOopDesc::IsUnsafeConc,
+                                   THREAD));
     // preserve orig_length() value in the smaller copy
     int orig_length = scratch_cp->orig_length();
     assert(orig_length != 0, "sanity check");
     smaller_cp->set_orig_length(orig_length);
     scratch_cp->copy_cp_to(1, scratch_cp_length - 1, smaller_cp, 1, THREAD);
     scratch_cp = smaller_cp;
+    smaller_cp()->set_is_conc_safe(true);
   }
 
   // attach new constant pool to klass
@@ -2516,6 +2531,7 @@ void VM_RedefineClasses::set_new_constant_pool(
 
     rewrite_cp_refs_in_stack_map_table(method, THREAD);
   } // end for each method
+  assert(scratch_cp()->is_conc_safe(), "Just checking");
 } // end set_new_constant_pool()
 
 
diff --git a/src/share/vm/prims/jvmtiTagMap.cpp b/src/share/vm/prims/jvmtiTagMap.cpp
index 152789815..dcf83bb22 100644
--- a/src/share/vm/prims/jvmtiTagMap.cpp
+++ b/src/share/vm/prims/jvmtiTagMap.cpp
@@ -1320,6 +1320,9 @@ class VM_HeapIterateOperation: public VM_Operation {
     }
 
     // do the iteration
+    // If this operation encounters a bad object when using CMS,
+    // consider using safe_object_iterate() which avoids perm gen
+    // objects that may contain bad references.
     Universe::heap()->object_iterate(_blk);
 
     // when sharing is enabled we must iterate over the shared spaces
diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp
index b9c943d10..b719636bb 100644
--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -229,6 +229,7 @@ public:
 
   inline void set_base(const char* base);
   inline void add_prefix(const char* prefix);
+  inline void add_suffix_to_prefix(const char* suffix);
   inline void add_suffix(const char* suffix);
   inline void reset_path(const char* base);
 
@@ -290,6 +291,10 @@ inline void SysClassPath::add_prefix(const char* prefix) {
   _items[_scp_prefix] = add_to_path(_items[_scp_prefix], prefix, true);
 }
 
+inline void SysClassPath::add_suffix_to_prefix(const char* suffix) {
+  _items[_scp_prefix] = add_to_path(_items[_scp_prefix], suffix, false);
+}
+
 inline void SysClassPath::add_suffix(const char* suffix) {
   _items[_scp_suffix] = add_to_path(_items[_scp_suffix], suffix, false);
 }
@@ -512,7 +517,6 @@ static bool set_bool_flag(char* name, bool value, FlagValueOrigin origin) {
   return CommandLineFlags::boolAtPut(name, &value, origin);
 }
 
-
 static bool set_fp_numeric_flag(char* name, char* value, FlagValueOrigin origin) {
   double v;
   if (sscanf(value, "%lf", &v) != 1) {
@@ -525,7 +529,6 @@ static bool set_fp_numeric_flag(char* name, char* value, FlagValueOrigin origin)
   return false;
 }
 
-
 static bool set_numeric_flag(char* name, char* value, FlagValueOrigin origin) {
   julong v;
   intx intx_v;
@@ -555,7 +558,6 @@ static bool set_numeric_flag(char* name, char* value, FlagValueOrigin origin) {
   return false;
 }
 
-
 static bool set_string_flag(char* name, const char* value, FlagValueOrigin origin) {
   if (!CommandLineFlags::ccstrAtPut(name, &value, origin))  return false;
   // Contract:  CommandLineFlags always returns a pointer that needs freeing.
@@ -591,7 +593,6 @@ static bool append_to_string_flag(char* name, const char* new_value, FlagValueOr
   return true;
 }
 
-
 bool Arguments::parse_argument(const char* arg, FlagValueOrigin origin) {
 
   // range of acceptable characters spelled out for portability reasons
@@ -652,7 +653,6 @@ bool Arguments::parse_argument(const char* arg, FlagValueOrigin origin) {
   return false;
 }
 
-
 void Arguments::add_string(char*** bldarray, int* count, const char* arg) {
   assert(bldarray != NULL, "illegal argument");
 
@@ -756,7 +756,6 @@ bool Arguments::process_argument(const char* arg,
   return true;
 }
 
-
 bool Arguments::process_settings_file(const char* file_name, bool should_exist, jboolean ignore_unrecognized) {
   FILE* stream = fopen(file_name, "rb");
   if (stream == NULL) {
@@ -932,7 +931,6 @@ void Arguments::set_mode_flags(Mode mode) {
   }
 }
 
-
 // Conflict: required to use shared spaces (-Xshare:on), but
 // incompatible command line options were chosen.
 
@@ -946,7 +944,6 @@ static void no_shared_spaces() {
   }
 }
 
-
 // If the user has chosen ParallelGCThreads > 0, we set UseParNewGC
 // if it's not explictly set or unset. If the user has chosen
 // UseParNewGC and not explicitly set ParallelGCThreads we
@@ -1361,7 +1358,7 @@ void Arguments::set_aggressive_opts_flags() {
 
     // Feed the cache size setting into the JDK
     char buffer[1024];
-    sprintf(buffer, "java.lang.Integer.IntegerCache.high=%d", AutoBoxCacheMax);
+    sprintf(buffer, "java.lang.Integer.IntegerCache.high=" INTX_FORMAT, AutoBoxCacheMax);
     add_property(buffer);
   }
   if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) {
@@ -1714,6 +1711,21 @@ jint Arguments::parse_vm_init_args(const JavaVMInitArgs* args) {
     return result;
   }
 
+  if (AggressiveOpts) {
+    // Insert alt-rt.jar between user-specified bootclasspath
+    // prefix and the default bootclasspath.  os::set_boot_path()
+    // uses meta_index_dir as the default bootclasspath directory.
+    const char* altclasses_jar = "alt-rt.jar";
+    size_t altclasses_path_len = strlen(get_meta_index_dir()) + 1 +
+                                 strlen(altclasses_jar);
+    char* altclasses_path = NEW_C_HEAP_ARRAY(char, altclasses_path_len);
+    strcpy(altclasses_path, get_meta_index_dir());
+    strcat(altclasses_path, altclasses_jar);
+    scp.add_suffix_to_prefix(altclasses_path);
+    scp_assembly_required = true;
+    FREE_C_HEAP_ARRAY(char, altclasses_path);
+  }
+
   // Parse _JAVA_OPTIONS environment variable (if present) (mimics classic VM)
   result = parse_java_options_environment_variable(&scp, &scp_assembly_required);
   if (result != JNI_OK) {
@@ -1729,7 +1741,6 @@ jint Arguments::parse_vm_init_args(const JavaVMInitArgs* args) {
   return JNI_OK;
 }
 
-
 jint Arguments::parse_each_vm_init_arg(const JavaVMInitArgs* args,
                                        SysClassPath* scp_p,
                                        bool* scp_assembly_required_p,
@@ -1795,7 +1806,7 @@ jint Arguments::parse_each_vm_init_arg(const JavaVMInitArgs* args,
       *scp_assembly_required_p = true;
     // -Xrun
     } else if (match_option(option, "-Xrun", &tail)) {
-      if(tail != NULL) {
+      if (tail != NULL) {
         const char* pos = strchr(tail, ':');
         size_t len = (pos == NULL) ? strlen(tail) : pos - tail;
         char* name = (char*)memcpy(NEW_C_HEAP_ARRAY(char, len + 1), tail, len);
@@ -2558,7 +2569,6 @@ jint Arguments::parse(const JavaVMInitArgs* args) {
     }
   }
 
-
   // Parse JavaVMInitArgs structure passed in, as well as JAVA_TOOL_OPTIONS and _JAVA_OPTIONS
   jint result = parse_vm_init_args(args);
   if (result != JNI_OK) {
diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp
index 7d9e1a614..af35e0998 100644
--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -835,8 +835,21 @@ class CommandLineFlags {
           "Prints the system dictionary at exit")                           \
                                                                             \
   diagnostic(bool, UnsyncloadClass, false,                                  \
-          "Unstable: VM calls loadClass unsynchronized. Custom classloader "\
-          "must call VM synchronized for findClass & defineClass")          \
+          "Unstable: VM calls loadClass unsynchronized. Custom "            \
+          "class loader  must call VM synchronized for findClass "          \
+          "and defineClass.")                                               \
+                                                                            \
+  product(bool, AlwaysLockClassLoader, false,                               \
+          "Require the VM to acquire the class loader lock before calling " \
+          "loadClass() even for class loaders registering "                 \
+          "as parallel capable. Default false. ")                           \
+                                                                            \
+  product(bool, AllowParallelDefineClass, false,                            \
+          "Allow parallel defineClass requests for class loaders "          \
+          "registering as parallel capable. Default false")                 \
+                                                                            \
+  product(bool, MustCallLoadClassInternal, false,                           \
+          "Call loadClassInternal() rather than loadClass().Default false") \
                                                                             \
   product_pd(bool, DontYieldALot,                                           \
           "Throw away obvious excess yield calls (for SOLARIS only)")       \
@@ -1294,7 +1307,14 @@ class CommandLineFlags {
   product(intx, ParGCArrayScanChunk, 50,                                    \
           "Scan a subset and push remainder, if array is bigger than this") \
                                                                             \
-  product(intx, ParGCDesiredObjsFromOverflowList, 20,                       \
+  notproduct(bool, ParGCWorkQueueOverflowALot, false,                       \
+          "Whether we should simulate work queue overflow in ParNew")       \
+                                                                            \
+  notproduct(uintx, ParGCWorkQueueOverflowInterval, 1000,                   \
+          "An `interval' counter that determines how frequently"            \
+          " we simulate overflow; a smaller number increases frequency")    \
+                                                                            \
+  product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
           "The desired number of objects to claim from the overflow list")  \
                                                                             \
   product(uintx, CMSParPromoteBlocksToClaim, 50,                            \
@@ -1416,8 +1436,8 @@ class CommandLineFlags {
           "Whether we should simulate frequent marking stack / work queue"  \
           " overflow")                                                      \
                                                                             \
-  notproduct(intx, CMSMarkStackOverflowInterval, 1000,                      \
-          "A per-thread `interval' counter that determines how frequently"  \
+  notproduct(uintx, CMSMarkStackOverflowInterval, 1000,                     \
+          "An `interval' counter that determines how frequently"            \
           " we simulate overflow; a smaller number increases frequency")    \
                                                                             \
   product(uintx, CMSMaxAbortablePrecleanLoops, 0,                           \
@@ -1635,7 +1655,7 @@ class CommandLineFlags {
   develop(uintx, WorkStealingYieldsBeforeSleep, 1000,                       \
           "Number of yields before a sleep is done during workstealing")    \
                                                                             \
-  product(uintx, PreserveMarkStackSize, 40,                                 \
+  product(uintx, PreserveMarkStackSize, 1024,                               \
            "Size for stack used in promotion failure handling")             \
                                                                             \
   product_pd(bool, UseTLAB, "Use thread-local object allocation")           \
diff --git a/src/share/vm/runtime/memprofiler.cpp b/src/share/vm/runtime/memprofiler.cpp
index 4d5ad533d..f8a259f9b 100644
--- a/src/share/vm/runtime/memprofiler.cpp
+++ b/src/share/vm/runtime/memprofiler.cpp
@@ -104,21 +104,22 @@ void MemProfiler::do_trace() {
   }
 
   // Print trace line in log
-  fprintf(_log_fp, "%6.1f,%5d,%5d,%6ld,%6ld,%6ld,%6ld,",
-      os::elapsedTime(),
-      Threads::number_of_threads(),
-      SystemDictionary::number_of_classes(),
-      Universe::heap()->used() / K,
-      Universe::heap()->capacity() / K,
-      Universe::heap()->permanent_used() / HWperKB,
-      Universe::heap()->permanent_capacity() / HWperKB);
-
-  fprintf(_log_fp, "%6ld,", CodeCache::capacity() / K);
-
-  fprintf(_log_fp, "%6ld,%6ld,%6ld\n",
-      handles_memory_usage / K,
-      resource_memory_usage / K,
-      OopMapCache::memory_usage() / K);
+  fprintf(_log_fp, "%6.1f,%5d,%5d," UINTX_FORMAT_W(6) "," UINTX_FORMAT_W(6) ","
+          UINTX_FORMAT_W(6) "," UINTX_FORMAT_W(6) ",",
+          os::elapsedTime(),
+          Threads::number_of_threads(),
+          SystemDictionary::number_of_classes(),
+          Universe::heap()->used() / K,
+          Universe::heap()->capacity() / K,
+          Universe::heap()->permanent_used() / HWperKB,
+          Universe::heap()->permanent_capacity() / HWperKB);
+
+  fprintf(_log_fp, UINTX_FORMAT_W(6) ",", CodeCache::capacity() / K);
+
+  fprintf(_log_fp, UINTX_FORMAT_W(6) "," UINTX_FORMAT_W(6) ",%6ld\n",
+          handles_memory_usage / K,
+          resource_memory_usage / K,
+          OopMapCache::memory_usage() / K);
   fflush(_log_fp);
 }
 
diff --git a/src/share/vm/runtime/safepoint.cpp b/src/share/vm/runtime/safepoint.cpp
index 2a3b838f6..c13af643a 100644
--- a/src/share/vm/runtime/safepoint.cpp
+++ b/src/share/vm/runtime/safepoint.cpp
@@ -730,7 +730,7 @@ void SafepointSynchronize::print_safepoint_timeout(SafepointTimeoutReason reason
   if (DieOnSafepointTimeout) {
     char msg[1024];
     VM_Operation *op = VMThread::vm_operation();
-    sprintf(msg, "Safepoint sync time longer than %d ms detected when executing %s.",
+    sprintf(msg, "Safepoint sync time longer than " INTX_FORMAT "ms detected when executing %s.",
             SafepointTimeoutDelay,
             op != NULL ? op->name() : "no vm operation");
     fatal(msg);
diff --git a/src/share/vm/runtime/sharedRuntime.cpp b/src/share/vm/runtime/sharedRuntime.cpp
index 76067dc8e..cf69631c0 100644
--- a/src/share/vm/runtime/sharedRuntime.cpp
+++ b/src/share/vm/runtime/sharedRuntime.cpp
@@ -192,64 +192,46 @@ JRT_END
 
 
 JRT_LEAF(jint, SharedRuntime::f2i(jfloat  x))
-  if (g_isnan(x)) {return 0;}
-  jlong lltmp = (jlong)x;
-  jint ltmp   = (jint)lltmp;
-  if (ltmp == lltmp) {
-    return ltmp;
-  } else {
-    if (x < 0) {
-      return min_jint;
-    } else {
-      return max_jint;
-    }
-  }
+  if (g_isnan(x))
+    return 0;
+  if (x >= (jfloat) max_jint)
+    return max_jint;
+  if (x <= (jfloat) min_jint)
+    return min_jint;
+  return (jint) x;
 JRT_END
 
 
 JRT_LEAF(jlong, SharedRuntime::f2l(jfloat  x))
-  if (g_isnan(x)) {return 0;}
-  jlong lltmp = (jlong)x;
-  if (lltmp != min_jlong) {
-    return lltmp;
-  } else {
-    if (x < 0) {
-      return min_jlong;
-    } else {
-      return max_jlong;
-    }
-  }
+  if (g_isnan(x))
+    return 0;
+  if (x >= (jfloat) max_jlong)
+    return max_jlong;
+  if (x <= (jfloat) min_jlong)
+    return min_jlong;
+  return (jlong) x;
 JRT_END
 
 
 JRT_LEAF(jint, SharedRuntime::d2i(jdouble x))
-  if (g_isnan(x)) {return 0;}
-  jlong lltmp = (jlong)x;
-  jint ltmp   = (jint)lltmp;
-  if (ltmp == lltmp) {
-    return ltmp;
-  } else {
-    if (x < 0) {
-      return min_jint;
-    } else {
-      return max_jint;
-    }
-  }
+  if (g_isnan(x))
+    return 0;
+  if (x >= (jdouble) max_jint)
+    return max_jint;
+  if (x <= (jdouble) min_jint)
+    return min_jint;
+  return (jint) x;
 JRT_END
 
 
 JRT_LEAF(jlong, SharedRuntime::d2l(jdouble x))
-  if (g_isnan(x)) {return 0;}
-  jlong lltmp = (jlong)x;
-  if (lltmp != min_jlong) {
-    return lltmp;
-  } else {
-    if (x < 0) {
-      return min_jlong;
-    } else {
-      return max_jlong;
-    }
-  }
+  if (g_isnan(x))
+    return 0;
+  if (x >= (jdouble) max_jlong)
+    return max_jlong;
+  if (x <= (jdouble) min_jlong)
+    return min_jlong;
+  return (jlong) x;
 JRT_END
 
 
diff --git a/src/share/vm/runtime/synchronizer.cpp b/src/share/vm/runtime/synchronizer.cpp
index aeb66980a..e0f3cfe04 100644
--- a/src/share/vm/runtime/synchronizer.cpp
+++ b/src/share/vm/runtime/synchronizer.cpp
@@ -424,7 +424,7 @@ void ObjectSynchronizer::Initialize () {
 // asserts is that error message -- often something about negative array
 // indices -- is opaque.
 
-#define CTASSERT(x) { int tag[1-(2*!(x))]; printf ("Tag @%X\n", tag); }
+#define CTASSERT(x) { int tag[1-(2*!(x))]; printf ("Tag @" INTPTR_FORMAT "\n", (intptr_t)tag); }
 
 void ObjectMonitor::ctAsserts() {
   CTASSERT(offset_of (ObjectMonitor, _header) == 0);
diff --git a/src/share/vm/services/heapDumper.cpp b/src/share/vm/services/heapDumper.cpp
index bf7aaf1a9..18bd9f477 100644
--- a/src/share/vm/services/heapDumper.cpp
+++ b/src/share/vm/services/heapDumper.cpp
@@ -1700,7 +1700,7 @@ void VM_HeapDumper::doit() {
   // The HPROF_GC_CLASS_DUMP and HPROF_GC_INSTANCE_DUMP are the vast bulk
   // of the heap dump.
   HeapObjectDumper obj_dumper(this, writer());
-  Universe::heap()->object_iterate(&obj_dumper);
+  Universe::heap()->safe_object_iterate(&obj_dumper);
 
   // HPROF_GC_ROOT_THREAD_OBJ + frames + jni locals
   do_threads();
diff --git a/src/share/vm/utilities/globalDefinitions.hpp b/src/share/vm/utilities/globalDefinitions.hpp
index f87bfad69..93b2a2b67 100644
--- a/src/share/vm/utilities/globalDefinitions.hpp
+++ b/src/share/vm/utilities/globalDefinitions.hpp
@@ -1087,15 +1087,24 @@ inline int build_int_from_shorts( jushort low, jushort high ) {
 // Format macros that allow the field width to be specified.  The width must be
 // a string literal (e.g., "8") or a macro that evaluates to one.
 #ifdef _LP64
+#define UINTX_FORMAT_W(width)   UINT64_FORMAT_W(width)
 #define SSIZE_FORMAT_W(width)   INT64_FORMAT_W(width)
 #define SIZE_FORMAT_W(width)    UINT64_FORMAT_W(width)
 #else
+#define UINTX_FORMAT_W(width)   UINT32_FORMAT_W(width)
 #define SSIZE_FORMAT_W(width)   INT32_FORMAT_W(width)
 #define SIZE_FORMAT_W(width)    UINT32_FORMAT_W(width)
 #endif // _LP64
 
 // Format pointers and size_t (or size_t-like integer types) which change size
-// between 32- and 64-bit.
+// between 32- and 64-bit. The pointer format theoretically should be "%p",
+// however, it has different output on different platforms. On Windows, the data
+// will be padded with zeros automatically. On Solaris, we can use "%016p" &
+// "%08p" on 64 bit & 32 bit platforms to make the data padded with extra zeros.
+// On Linux, "%016p" or "%08p" is not be allowed, at least on the latest GCC
+// 4.3.2. So we have to use "%016x" or "%08x" to simulate the printing format.
+// GCC 4.3.2, however requires the data to be converted to "intptr_t" when
+// using "%x".
 #ifdef  _LP64
 #define PTR_FORMAT    PTR64_FORMAT
 #define UINTX_FORMAT  UINT64_FORMAT
diff --git a/src/share/vm/utilities/globalDefinitions_gcc.hpp b/src/share/vm/utilities/globalDefinitions_gcc.hpp
index 417978f08..48f2c7e88 100644
--- a/src/share/vm/utilities/globalDefinitions_gcc.hpp
+++ b/src/share/vm/utilities/globalDefinitions_gcc.hpp
@@ -116,7 +116,9 @@
   #ifdef _LP64
     #define NULL_WORD  0L
   #else
-    #define NULL_WORD  0
+    // Cast 0 to intptr_t rather than int32_t since they are not the same type
+    // on platforms such as Mac OS X.
+    #define NULL_WORD  ((intptr_t)0)
   #endif
 #else
   #define NULL_WORD  NULL
diff --git a/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp b/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp
index f25364b72..62c8b92e2 100644
--- a/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp
+++ b/src/share/vm/utilities/globalDefinitions_sparcWorks.hpp
@@ -115,7 +115,9 @@
   #ifdef _LP64
     #define NULL_WORD  0L
   #else
-    #define NULL_WORD  0
+    // Cast 0 to intptr_t rather than int32_t since they are not the same type
+    // on some platforms.
+    #define NULL_WORD  ((intptr_t)0)
   #endif
 #else
   #define NULL_WORD  NULL
diff --git a/src/share/vm/utilities/ostream.cpp b/src/share/vm/utilities/ostream.cpp
index 8770bed52..65d18802b 100644
--- a/src/share/vm/utilities/ostream.cpp
+++ b/src/share/vm/utilities/ostream.cpp
@@ -300,7 +300,10 @@ fileStream::fileStream(const char* file_name) {
 }
 
 void fileStream::write(const char* s, size_t len) {
-  if (_file != NULL)  fwrite(s, 1, len, _file);
+  if (_file != NULL)  {
+    // Make an unused local variable to avoid warning from gcc 4.x compiler.
+    size_t count = fwrite(s, 1, len, _file);
+  }
   update_position(s, len);
 }
 
@@ -328,7 +331,10 @@ fdStream::~fdStream() {
 }
 
 void fdStream::write(const char* s, size_t len) {
-  if (_fd != -1) ::write(_fd, s, (int)len);
+  if (_fd != -1) {
+    // Make an unused local variable to avoid warning from gcc 4.x compiler.
+    size_t count = ::write(_fd, s, (int)len);
+  }
   update_position(s, len);
 }
 
diff --git a/src/share/vm/utilities/vmError.cpp b/src/share/vm/utilities/vmError.cpp
index d2a0161a7..a4d0cb0ba 100644
--- a/src/share/vm/utilities/vmError.cpp
+++ b/src/share/vm/utilities/vmError.cpp
@@ -674,6 +674,11 @@ void VMError::report_and_die() {
     reset_signal_handlers();
 
   } else {
+    // If UseOsErrorReporting we call this for each level of the call stack
+    // while searching for the exception handler.  Only the first level needs
+    // to be reported.
+    if (UseOSErrorReporting && log_done) return;
+
     // This is not the first error, see if it happened in a different thread
     // or in the same thread during error reporting.
     if (first_error_tid != mytid) {
diff --git a/src/share/vm/utilities/vmError.hpp b/src/share/vm/utilities/vmError.hpp
index 414bc7f21..8e618d914 100644
--- a/src/share/vm/utilities/vmError.hpp
+++ b/src/share/vm/utilities/vmError.hpp
@@ -50,7 +50,7 @@ class VMError : public StackObj {
 
   // additional info for VM internal errors
   const char * _filename;
-  int          _lineno;
+  size_t       _lineno;
 
   // used by fatal error handler
   int          _current_step;
diff --git a/test/compiler/6778657/Test.java b/test/compiler/6778657/Test.java
new file mode 100644
index 000000000..4fdd33e93
--- /dev/null
+++ b/test/compiler/6778657/Test.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2008 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 6778657
+ * @summary Casts in SharedRuntime::f2i, f2l, d2i and d2l rely on undefined C++ behaviour
+ */
+
+public class Test {
+  public static void check_f2i(int expect) {
+    float check = expect;
+    check *= 2;
+    int actual = (int) check;
+    if (actual != expect)
+      throw new RuntimeException("expecting " + expect + ", got " + actual);
+  }
+
+  public static void check_f2l(long expect) {
+    float check = expect;
+    check *= 2;
+    long actual = (long) check;
+    if (actual != expect)
+      throw new RuntimeException("expecting " + expect + ", got " + actual);
+  }
+
+  public static void check_d2i(int expect) {
+    double check = expect;
+    check *= 2;
+    int actual = (int) check;
+    if (actual != expect)
+      throw new RuntimeException("expecting " + expect + ", got " + actual);
+  }
+
+  public static void check_d2l(long expect) {
+    double check = expect;
+    check *= 2;
+    long actual = (long) check;
+    if (actual != expect)
+      throw new RuntimeException("expecting " + expect + ", got " + actual);
+  }
+
+  public static void main(String[] args) {
+    check_f2i(Integer.MAX_VALUE);
+    check_f2i(Integer.MIN_VALUE);
+    check_f2l(Long.MAX_VALUE);
+    check_f2l(Long.MIN_VALUE);
+    check_d2i(Integer.MAX_VALUE);
+    check_d2i(Integer.MIN_VALUE);
+    check_d2l(Long.MAX_VALUE);
+    check_d2l(Long.MIN_VALUE);
+  }
+}
+
author	trims <none@none>	2009-01-30 15:28:00 -0800
committer	trims <none@none>	2009-01-30 15:28:00 -0800
commit	d548fe68de5bb6ab73de7207d974066259394fb1 (patch)
tree	9becd2db41d56e09aa1f1f36602229bc92846798
parent	6f1aac16cca6ee43b5499cebfe54a070f0f01cbd (diff)
parent	9f5a2de4ba2fb0411a5d2ebf596acbb0c51c8f18 (diff)